diff --git "a/robot_action_model/trainer_state.json" "b/robot_action_model/trainer_state.json" new file mode 100644--- /dev/null +++ "b/robot_action_model/trainer_state.json" @@ -0,0 +1,126016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.345547235940736, + "global_step": 21000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.307855626326964e-08, + "loss": 13.2394, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.0615711252653928e-07, + "loss": 13.5785, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.5923566878980893e-07, + "loss": 13.0081, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.1231422505307855e-07, + "loss": 13.1462, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.6539278131634817e-07, + "loss": 13.423, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3.1847133757961787e-07, + "loss": 13.3561, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 3.715498938428875e-07, + "loss": 12.9398, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.246284501061571e-07, + "loss": 13.0864, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.777070063694267e-07, + "loss": 13.0811, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5.307855626326963e-07, + "loss": 13.042, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 5.83864118895966e-07, + "loss": 12.8726, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 6.369426751592357e-07, + "loss": 12.9285, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 6.900212314225053e-07, + "loss": 12.974, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 7.43099787685775e-07, + "loss": 12.7314, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 7.961783439490446e-07, + "loss": 12.7048, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 8.492569002123142e-07, + "loss": 12.6257, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 9.023354564755839e-07, + "loss": 12.6064, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 9.554140127388535e-07, + "loss": 12.2679, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 1.008492569002123e-06, + "loss": 12.1374, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 1.0615711252653927e-06, + "loss": 12.3263, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.1146496815286625e-06, + "loss": 11.9696, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 1.167728237791932e-06, + "loss": 11.955, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.2208067940552019e-06, + "loss": 11.5576, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.2738853503184715e-06, + "loss": 11.6582, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.326963906581741e-06, + "loss": 11.271, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.3800424628450107e-06, + "loss": 11.1264, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.4331210191082802e-06, + "loss": 10.9406, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.48619957537155e-06, + "loss": 10.7237, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.5392781316348196e-06, + "loss": 10.6955, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 1.5923566878980892e-06, + "loss": 10.6187, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.6454352441613588e-06, + "loss": 10.3437, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 1.6985138004246284e-06, + "loss": 10.1739, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.7515923566878982e-06, + "loss": 10.1383, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 1.8046709129511678e-06, + "loss": 10.2044, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 1.8577494692144374e-06, + "loss": 9.7015, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.910828025477707e-06, + "loss": 9.2676, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.963906581740977e-06, + "loss": 9.0955, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 2.016985138004246e-06, + "loss": 8.7729, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 2.070063694267516e-06, + "loss": 8.7713, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 2.1231422505307854e-06, + "loss": 8.4284, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 2.176220806794055e-06, + "loss": 8.1487, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 2.229299363057325e-06, + "loss": 8.0746, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 2.2823779193205943e-06, + "loss": 7.6863, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 2.335456475583864e-06, + "loss": 7.5029, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 2.388535031847134e-06, + "loss": 7.3109, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 2.4416135881104038e-06, + "loss": 7.0102, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 2.494692144373673e-06, + "loss": 6.9272, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 2.547770700636943e-06, + "loss": 6.5848, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 2.6008492569002127e-06, + "loss": 6.7954, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 2.653927813163482e-06, + "loss": 5.9796, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.707006369426752e-06, + "loss": 5.8593, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 2.7600849256900213e-06, + "loss": 5.4405, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 2.813163481953291e-06, + "loss": 5.3938, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 2.8662420382165605e-06, + "loss": 5.3757, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 2.9193205944798303e-06, + "loss": 4.9917, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 2.9723991507431e-06, + "loss": 4.6684, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 3.0254777070063695e-06, + "loss": 4.424, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 3.0785562632696393e-06, + "loss": 4.2579, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 3.1316348195329087e-06, + "loss": 4.2904, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 3.1847133757961785e-06, + "loss": 4.2257, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 3.2377919320594483e-06, + "loss": 4.1382, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 3.2908704883227177e-06, + "loss": 3.9079, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 3.3439490445859875e-06, + "loss": 3.8785, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 3.397027600849257e-06, + "loss": 3.7329, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 3.4501061571125266e-06, + "loss": 3.8806, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 3.5031847133757964e-06, + "loss": 3.5959, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 3.556263269639066e-06, + "loss": 3.8628, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 3.6093418259023356e-06, + "loss": 3.4981, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 3.662420382165605e-06, + "loss": 3.6243, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 3.715498938428875e-06, + "loss": 3.5661, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 3.7685774946921446e-06, + "loss": 3.2206, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 3.821656050955414e-06, + "loss": 3.3048, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 3.874734607218684e-06, + "loss": 3.1822, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 3.927813163481954e-06, + "loss": 3.3145, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 3.9808917197452226e-06, + "loss": 3.2493, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 4.033970276008492e-06, + "loss": 3.2409, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 4.087048832271762e-06, + "loss": 3.0861, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 4.140127388535032e-06, + "loss": 2.884, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 4.193205944798302e-06, + "loss": 2.9362, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 4.246284501061571e-06, + "loss": 2.8918, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.2993630573248405e-06, + "loss": 2.8552, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 4.35244161358811e-06, + "loss": 2.7369, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 4.40552016985138e-06, + "loss": 2.6513, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 4.45859872611465e-06, + "loss": 2.9522, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 4.511677282377919e-06, + "loss": 2.6039, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 4.564755838641189e-06, + "loss": 2.6981, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 4.6178343949044585e-06, + "loss": 2.6402, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 4.670912951167728e-06, + "loss": 2.3836, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 4.723991507430998e-06, + "loss": 2.4552, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 4.777070063694268e-06, + "loss": 2.5614, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.830148619957538e-06, + "loss": 2.6077, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 4.8832271762208075e-06, + "loss": 2.2563, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 4.936305732484077e-06, + "loss": 2.3505, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 4.989384288747346e-06, + "loss": 2.3402, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 5.042462845010616e-06, + "loss": 2.0426, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 5.095541401273886e-06, + "loss": 2.3866, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 5.148619957537156e-06, + "loss": 2.1927, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 5.2016985138004255e-06, + "loss": 2.1204, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 5.2547770700636944e-06, + "loss": 2.2482, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 5.307855626326964e-06, + "loss": 2.4146, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 5.360934182590234e-06, + "loss": 2.3019, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 5.414012738853504e-06, + "loss": 2.3379, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 5.467091295116774e-06, + "loss": 2.2618, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 5.520169851380043e-06, + "loss": 2.0462, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 5.573248407643312e-06, + "loss": 2.2584, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 5.626326963906582e-06, + "loss": 1.9958, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 5.679405520169852e-06, + "loss": 2.1784, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 5.732484076433121e-06, + "loss": 2.0382, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 5.785562632696391e-06, + "loss": 1.976, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 5.838641188959661e-06, + "loss": 1.9094, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 5.89171974522293e-06, + "loss": 2.0613, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 5.9447983014862e-06, + "loss": 1.9439, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 5.997876857749469e-06, + "loss": 1.9312, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 6.050955414012739e-06, + "loss": 2.1243, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 6.104033970276009e-06, + "loss": 1.9707, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 6.1571125265392786e-06, + "loss": 1.9748, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 6.210191082802548e-06, + "loss": 1.9749, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 6.263269639065817e-06, + "loss": 1.979, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 6.316348195329087e-06, + "loss": 1.8732, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 6.369426751592357e-06, + "loss": 1.9548, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 6.422505307855627e-06, + "loss": 1.9318, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 6.4755838641188965e-06, + "loss": 1.9402, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 6.5286624203821655e-06, + "loss": 1.8525, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 6.581740976645435e-06, + "loss": 1.8138, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 6.634819532908705e-06, + "loss": 1.972, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 6.687898089171975e-06, + "loss": 1.9255, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 6.740976645435245e-06, + "loss": 1.7673, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 6.794055201698514e-06, + "loss": 1.8791, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 6.8471337579617835e-06, + "loss": 1.8743, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 6.900212314225053e-06, + "loss": 2.1206, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 6.953290870488323e-06, + "loss": 1.9329, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 7.006369426751593e-06, + "loss": 1.9081, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 7.059447983014862e-06, + "loss": 2.0504, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 7.112526539278132e-06, + "loss": 1.9007, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 7.1656050955414014e-06, + "loss": 1.9854, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 7.218683651804671e-06, + "loss": 1.9312, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 7.271762208067941e-06, + "loss": 1.7299, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 7.32484076433121e-06, + "loss": 1.8772, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 7.37791932059448e-06, + "loss": 1.9035, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 7.43099787685775e-06, + "loss": 1.8971, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 7.484076433121019e-06, + "loss": 1.8346, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 7.537154989384289e-06, + "loss": 1.8914, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 7.590233545647558e-06, + "loss": 1.9371, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 7.643312101910828e-06, + "loss": 1.8659, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 7.696390658174098e-06, + "loss": 1.8345, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 7.749469214437368e-06, + "loss": 1.8419, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 7.802547770700637e-06, + "loss": 1.8226, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 7.855626326963907e-06, + "loss": 1.8649, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 7.908704883227177e-06, + "loss": 1.7414, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 7.961783439490445e-06, + "loss": 1.7607, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 8.014861995753715e-06, + "loss": 1.9958, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 8.067940552016985e-06, + "loss": 1.9977, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 8.121019108280255e-06, + "loss": 1.948, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 8.174097664543524e-06, + "loss": 1.8801, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 8.227176220806794e-06, + "loss": 1.7973, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 8.280254777070064e-06, + "loss": 1.762, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 8.333333333333334e-06, + "loss": 1.8774, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 8.386411889596604e-06, + "loss": 1.7778, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 8.439490445859873e-06, + "loss": 1.7982, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 8.492569002123141e-06, + "loss": 1.876, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 8.545647558386411e-06, + "loss": 1.7986, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 8.598726114649681e-06, + "loss": 1.8512, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 8.651804670912951e-06, + "loss": 1.7415, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 8.70488322717622e-06, + "loss": 1.9235, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 8.75796178343949e-06, + "loss": 1.8396, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 8.81104033970276e-06, + "loss": 1.8685, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 8.86411889596603e-06, + "loss": 1.8122, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 8.9171974522293e-06, + "loss": 1.7921, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 8.970276008492568e-06, + "loss": 1.7168, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 9.023354564755838e-06, + "loss": 1.8416, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 9.076433121019108e-06, + "loss": 1.7362, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 9.129511677282377e-06, + "loss": 1.8464, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 9.182590233545647e-06, + "loss": 1.8438, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 9.235668789808917e-06, + "loss": 1.7913, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 9.288747346072187e-06, + "loss": 1.7962, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 9.341825902335457e-06, + "loss": 1.7934, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 9.394904458598726e-06, + "loss": 1.7505, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 9.447983014861996e-06, + "loss": 1.8372, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 9.501061571125266e-06, + "loss": 1.6862, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 9.554140127388536e-06, + "loss": 1.7439, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 9.607218683651806e-06, + "loss": 1.8254, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 9.660297239915075e-06, + "loss": 1.7703, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 9.713375796178345e-06, + "loss": 1.7435, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 9.766454352441615e-06, + "loss": 1.7457, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 9.819532908704885e-06, + "loss": 1.7817, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 9.872611464968155e-06, + "loss": 1.838, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 9.925690021231423e-06, + "loss": 1.7542, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 9.978768577494693e-06, + "loss": 1.8811, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 1.0031847133757962e-05, + "loss": 1.7568, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 1.0084925690021232e-05, + "loss": 1.7377, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.0138004246284502e-05, + "loss": 1.9874, + "step": 191 + }, + { + "epoch": 0.03, + "learning_rate": 1.0191082802547772e-05, + "loss": 1.8294, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 1.0244161358811042e-05, + "loss": 1.7966, + "step": 193 + }, + { + "epoch": 0.03, + "learning_rate": 1.0297239915074311e-05, + "loss": 1.7603, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 1.0350318471337581e-05, + "loss": 1.8469, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 1.0403397027600851e-05, + "loss": 1.848, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 1.0456475583864119e-05, + "loss": 1.7503, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 1.0509554140127389e-05, + "loss": 1.7525, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 1.0562632696390659e-05, + "loss": 1.7769, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 1.0615711252653929e-05, + "loss": 1.6932, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.0668789808917198e-05, + "loss": 1.8658, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 1.0721868365180468e-05, + "loss": 1.8308, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 1.0774946921443738e-05, + "loss": 1.7774, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 1.0828025477707008e-05, + "loss": 1.8035, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 1.0881104033970278e-05, + "loss": 1.7462, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 1.0934182590233547e-05, + "loss": 1.8383, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 1.0987261146496815e-05, + "loss": 1.8283, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 1.1040339702760085e-05, + "loss": 1.804, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 1.1093418259023355e-05, + "loss": 1.7737, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 1.1146496815286625e-05, + "loss": 1.7962, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.1199575371549895e-05, + "loss": 1.7451, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 1.1252653927813164e-05, + "loss": 1.7099, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 1.1305732484076434e-05, + "loss": 1.7736, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 1.1358811040339704e-05, + "loss": 1.8268, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 1.1411889596602974e-05, + "loss": 1.7221, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 1.1464968152866242e-05, + "loss": 1.707, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 1.1518046709129512e-05, + "loss": 1.7096, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 1.1571125265392782e-05, + "loss": 1.7667, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 1.1624203821656051e-05, + "loss": 1.8853, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 1.1677282377919321e-05, + "loss": 1.7017, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 1.1730360934182591e-05, + "loss": 1.7002, + "step": 221 + }, + { + "epoch": 0.04, + "learning_rate": 1.178343949044586e-05, + "loss": 1.7738, + "step": 222 + }, + { + "epoch": 0.04, + "learning_rate": 1.183651804670913e-05, + "loss": 1.7321, + "step": 223 + }, + { + "epoch": 0.04, + "learning_rate": 1.18895966029724e-05, + "loss": 1.7405, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 1.194267515923567e-05, + "loss": 1.7042, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 1.1995753715498938e-05, + "loss": 1.757, + "step": 226 + }, + { + "epoch": 0.04, + "learning_rate": 1.2048832271762208e-05, + "loss": 1.824, + "step": 227 + }, + { + "epoch": 0.04, + "learning_rate": 1.2101910828025478e-05, + "loss": 1.7749, + "step": 228 + }, + { + "epoch": 0.04, + "learning_rate": 1.2154989384288748e-05, + "loss": 1.6916, + "step": 229 + }, + { + "epoch": 0.04, + "learning_rate": 1.2208067940552018e-05, + "loss": 1.8248, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 1.2261146496815287e-05, + "loss": 1.747, + "step": 231 + }, + { + "epoch": 0.04, + "learning_rate": 1.2314225053078557e-05, + "loss": 1.7767, + "step": 232 + }, + { + "epoch": 0.04, + "learning_rate": 1.2367303609341827e-05, + "loss": 1.8638, + "step": 233 + }, + { + "epoch": 0.04, + "learning_rate": 1.2420382165605097e-05, + "loss": 1.883, + "step": 234 + }, + { + "epoch": 0.04, + "learning_rate": 1.2473460721868367e-05, + "loss": 1.695, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 1.2526539278131635e-05, + "loss": 1.7912, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 1.2579617834394904e-05, + "loss": 1.7668, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 1.2632696390658174e-05, + "loss": 1.7254, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 1.2685774946921444e-05, + "loss": 1.8065, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 1.2738853503184714e-05, + "loss": 1.7407, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 1.2791932059447984e-05, + "loss": 1.8269, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 1.2845010615711253e-05, + "loss": 1.6442, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 1.2898089171974523e-05, + "loss": 1.76, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 1.2951167728237793e-05, + "loss": 1.8382, + "step": 244 + }, + { + "epoch": 0.04, + "learning_rate": 1.3004246284501063e-05, + "loss": 1.754, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 1.3057324840764331e-05, + "loss": 1.7527, + "step": 246 + }, + { + "epoch": 0.04, + "learning_rate": 1.31104033970276e-05, + "loss": 1.7334, + "step": 247 + }, + { + "epoch": 0.04, + "learning_rate": 1.316348195329087e-05, + "loss": 1.7709, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 1.321656050955414e-05, + "loss": 1.7049, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 1.326963906581741e-05, + "loss": 1.7254, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 1.332271762208068e-05, + "loss": 1.7452, + "step": 251 + }, + { + "epoch": 0.04, + "learning_rate": 1.337579617834395e-05, + "loss": 1.7457, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 1.342887473460722e-05, + "loss": 1.8252, + "step": 253 + }, + { + "epoch": 0.04, + "learning_rate": 1.348195329087049e-05, + "loss": 1.7939, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 1.353503184713376e-05, + "loss": 1.779, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 1.3588110403397027e-05, + "loss": 1.6789, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 1.3641188959660297e-05, + "loss": 1.8051, + "step": 257 + }, + { + "epoch": 0.04, + "learning_rate": 1.3694267515923567e-05, + "loss": 1.7489, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 1.3747346072186837e-05, + "loss": 1.7917, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 1.3800424628450107e-05, + "loss": 1.7999, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.3853503184713376e-05, + "loss": 1.7932, + "step": 261 + }, + { + "epoch": 0.04, + "learning_rate": 1.3906581740976646e-05, + "loss": 1.7785, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 1.3959660297239916e-05, + "loss": 1.8581, + "step": 263 + }, + { + "epoch": 0.04, + "learning_rate": 1.4012738853503186e-05, + "loss": 1.6873, + "step": 264 + }, + { + "epoch": 0.04, + "learning_rate": 1.4065817409766454e-05, + "loss": 1.7676, + "step": 265 + }, + { + "epoch": 0.04, + "learning_rate": 1.4118895966029724e-05, + "loss": 1.7536, + "step": 266 + }, + { + "epoch": 0.04, + "learning_rate": 1.4171974522292993e-05, + "loss": 1.7727, + "step": 267 + }, + { + "epoch": 0.04, + "learning_rate": 1.4225053078556263e-05, + "loss": 1.8111, + "step": 268 + }, + { + "epoch": 0.04, + "learning_rate": 1.4278131634819533e-05, + "loss": 1.7551, + "step": 269 + }, + { + "epoch": 0.04, + "learning_rate": 1.4331210191082803e-05, + "loss": 1.7625, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.4384288747346073e-05, + "loss": 1.7789, + "step": 271 + }, + { + "epoch": 0.04, + "learning_rate": 1.4437367303609342e-05, + "loss": 1.7994, + "step": 272 + }, + { + "epoch": 0.04, + "learning_rate": 1.4490445859872612e-05, + "loss": 1.7413, + "step": 273 + }, + { + "epoch": 0.04, + "learning_rate": 1.4543524416135882e-05, + "loss": 1.8493, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 1.459660297239915e-05, + "loss": 1.7341, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 1.464968152866242e-05, + "loss": 1.6797, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 1.470276008492569e-05, + "loss": 1.7534, + "step": 277 + }, + { + "epoch": 0.04, + "learning_rate": 1.475583864118896e-05, + "loss": 1.6369, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 1.480891719745223e-05, + "loss": 1.7259, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 1.48619957537155e-05, + "loss": 1.8071, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.4915074309978769e-05, + "loss": 1.6766, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 1.4968152866242039e-05, + "loss": 1.7492, + "step": 282 + }, + { + "epoch": 0.05, + "learning_rate": 1.5021231422505309e-05, + "loss": 1.8395, + "step": 283 + }, + { + "epoch": 0.05, + "learning_rate": 1.5074309978768578e-05, + "loss": 1.7494, + "step": 284 + }, + { + "epoch": 0.05, + "learning_rate": 1.5127388535031847e-05, + "loss": 1.8256, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 1.5180467091295116e-05, + "loss": 1.7491, + "step": 286 + }, + { + "epoch": 0.05, + "learning_rate": 1.5233545647558386e-05, + "loss": 1.7623, + "step": 287 + }, + { + "epoch": 0.05, + "learning_rate": 1.5286624203821656e-05, + "loss": 1.7713, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 1.5339702760084927e-05, + "loss": 1.7782, + "step": 289 + }, + { + "epoch": 0.05, + "learning_rate": 1.5392781316348196e-05, + "loss": 1.7375, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 1.5445859872611464e-05, + "loss": 1.8366, + "step": 291 + }, + { + "epoch": 0.05, + "learning_rate": 1.5498938428874735e-05, + "loss": 1.7118, + "step": 292 + }, + { + "epoch": 0.05, + "learning_rate": 1.5552016985138003e-05, + "loss": 1.8163, + "step": 293 + }, + { + "epoch": 0.05, + "learning_rate": 1.5605095541401275e-05, + "loss": 1.8422, + "step": 294 + }, + { + "epoch": 0.05, + "learning_rate": 1.5658174097664543e-05, + "loss": 1.7868, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 1.5711252653927814e-05, + "loss": 1.7494, + "step": 296 + }, + { + "epoch": 0.05, + "learning_rate": 1.5764331210191083e-05, + "loss": 1.78, + "step": 297 + }, + { + "epoch": 0.05, + "learning_rate": 1.5817409766454354e-05, + "loss": 1.7398, + "step": 298 + }, + { + "epoch": 0.05, + "learning_rate": 1.5870488322717622e-05, + "loss": 1.7319, + "step": 299 + }, + { + "epoch": 0.05, + "learning_rate": 1.592356687898089e-05, + "loss": 1.7439, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 1.5976645435244162e-05, + "loss": 1.6106, + "step": 301 + }, + { + "epoch": 0.05, + "learning_rate": 1.602972399150743e-05, + "loss": 1.7283, + "step": 302 + }, + { + "epoch": 0.05, + "learning_rate": 1.60828025477707e-05, + "loss": 1.8201, + "step": 303 + }, + { + "epoch": 0.05, + "learning_rate": 1.613588110403397e-05, + "loss": 1.8263, + "step": 304 + }, + { + "epoch": 0.05, + "learning_rate": 1.618895966029724e-05, + "loss": 1.76, + "step": 305 + }, + { + "epoch": 0.05, + "learning_rate": 1.624203821656051e-05, + "loss": 1.8246, + "step": 306 + }, + { + "epoch": 0.05, + "learning_rate": 1.629511677282378e-05, + "loss": 1.7459, + "step": 307 + }, + { + "epoch": 0.05, + "learning_rate": 1.634819532908705e-05, + "loss": 1.7105, + "step": 308 + }, + { + "epoch": 0.05, + "learning_rate": 1.6401273885350317e-05, + "loss": 1.6788, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 1.6454352441613588e-05, + "loss": 1.7235, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 1.6507430997876856e-05, + "loss": 1.7301, + "step": 311 + }, + { + "epoch": 0.05, + "learning_rate": 1.6560509554140128e-05, + "loss": 1.7603, + "step": 312 + }, + { + "epoch": 0.05, + "learning_rate": 1.6613588110403396e-05, + "loss": 1.785, + "step": 313 + }, + { + "epoch": 0.05, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.7839, + "step": 314 + }, + { + "epoch": 0.05, + "learning_rate": 1.6719745222929936e-05, + "loss": 1.8071, + "step": 315 + }, + { + "epoch": 0.05, + "learning_rate": 1.6772823779193207e-05, + "loss": 1.7206, + "step": 316 + }, + { + "epoch": 0.05, + "learning_rate": 1.6825902335456475e-05, + "loss": 1.7958, + "step": 317 + }, + { + "epoch": 0.05, + "learning_rate": 1.6878980891719747e-05, + "loss": 1.7571, + "step": 318 + }, + { + "epoch": 0.05, + "learning_rate": 1.6932059447983015e-05, + "loss": 1.7329, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 1.6985138004246283e-05, + "loss": 1.7394, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 1.7038216560509554e-05, + "loss": 1.7653, + "step": 321 + }, + { + "epoch": 0.05, + "learning_rate": 1.7091295116772823e-05, + "loss": 1.6213, + "step": 322 + }, + { + "epoch": 0.05, + "learning_rate": 1.7144373673036094e-05, + "loss": 1.7928, + "step": 323 + }, + { + "epoch": 0.05, + "learning_rate": 1.7197452229299362e-05, + "loss": 1.7673, + "step": 324 + }, + { + "epoch": 0.05, + "learning_rate": 1.7250530785562634e-05, + "loss": 1.6208, + "step": 325 + }, + { + "epoch": 0.05, + "learning_rate": 1.7303609341825902e-05, + "loss": 1.7639, + "step": 326 + }, + { + "epoch": 0.05, + "learning_rate": 1.7356687898089173e-05, + "loss": 1.8074, + "step": 327 + }, + { + "epoch": 0.05, + "learning_rate": 1.740976645435244e-05, + "loss": 1.766, + "step": 328 + }, + { + "epoch": 0.05, + "learning_rate": 1.746284501061571e-05, + "loss": 1.7995, + "step": 329 + }, + { + "epoch": 0.05, + "learning_rate": 1.751592356687898e-05, + "loss": 1.7793, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 1.756900212314225e-05, + "loss": 1.7708, + "step": 331 + }, + { + "epoch": 0.05, + "learning_rate": 1.762208067940552e-05, + "loss": 1.7271, + "step": 332 + }, + { + "epoch": 0.05, + "learning_rate": 1.767515923566879e-05, + "loss": 1.7913, + "step": 333 + }, + { + "epoch": 0.05, + "learning_rate": 1.772823779193206e-05, + "loss": 1.7497, + "step": 334 + }, + { + "epoch": 0.05, + "learning_rate": 1.7781316348195328e-05, + "loss": 1.7389, + "step": 335 + }, + { + "epoch": 0.05, + "learning_rate": 1.78343949044586e-05, + "loss": 1.7852, + "step": 336 + }, + { + "epoch": 0.05, + "learning_rate": 1.7887473460721868e-05, + "loss": 1.7337, + "step": 337 + }, + { + "epoch": 0.05, + "learning_rate": 1.7940552016985136e-05, + "loss": 1.7797, + "step": 338 + }, + { + "epoch": 0.05, + "learning_rate": 1.7993630573248407e-05, + "loss": 1.7064, + "step": 339 + }, + { + "epoch": 0.05, + "learning_rate": 1.8046709129511676e-05, + "loss": 1.6687, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 1.8099787685774947e-05, + "loss": 1.6851, + "step": 341 + }, + { + "epoch": 0.05, + "learning_rate": 1.8152866242038215e-05, + "loss": 1.7716, + "step": 342 + }, + { + "epoch": 0.05, + "learning_rate": 1.8205944798301487e-05, + "loss": 1.7418, + "step": 343 + }, + { + "epoch": 0.05, + "learning_rate": 1.8259023354564755e-05, + "loss": 1.7734, + "step": 344 + }, + { + "epoch": 0.05, + "learning_rate": 1.8312101910828026e-05, + "loss": 1.7266, + "step": 345 + }, + { + "epoch": 0.06, + "learning_rate": 1.8365180467091294e-05, + "loss": 1.7366, + "step": 346 + }, + { + "epoch": 0.06, + "learning_rate": 1.8418259023354566e-05, + "loss": 1.8191, + "step": 347 + }, + { + "epoch": 0.06, + "learning_rate": 1.8471337579617834e-05, + "loss": 1.7777, + "step": 348 + }, + { + "epoch": 0.06, + "learning_rate": 1.8524416135881102e-05, + "loss": 1.6773, + "step": 349 + }, + { + "epoch": 0.06, + "learning_rate": 1.8577494692144374e-05, + "loss": 1.7571, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 1.8630573248407642e-05, + "loss": 1.7991, + "step": 351 + }, + { + "epoch": 0.06, + "learning_rate": 1.8683651804670913e-05, + "loss": 1.6566, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 1.873673036093418e-05, + "loss": 1.7523, + "step": 353 + }, + { + "epoch": 0.06, + "learning_rate": 1.8789808917197453e-05, + "loss": 1.7112, + "step": 354 + }, + { + "epoch": 0.06, + "learning_rate": 1.8842887473460724e-05, + "loss": 1.7251, + "step": 355 + }, + { + "epoch": 0.06, + "learning_rate": 1.8895966029723992e-05, + "loss": 1.6962, + "step": 356 + }, + { + "epoch": 0.06, + "learning_rate": 1.8949044585987264e-05, + "loss": 1.7735, + "step": 357 + }, + { + "epoch": 0.06, + "learning_rate": 1.9002123142250532e-05, + "loss": 1.6943, + "step": 358 + }, + { + "epoch": 0.06, + "learning_rate": 1.9055201698513804e-05, + "loss": 1.6965, + "step": 359 + }, + { + "epoch": 0.06, + "learning_rate": 1.910828025477707e-05, + "loss": 1.7586, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 1.9161358811040343e-05, + "loss": 1.7877, + "step": 361 + }, + { + "epoch": 0.06, + "learning_rate": 1.921443736730361e-05, + "loss": 1.7255, + "step": 362 + }, + { + "epoch": 0.06, + "learning_rate": 1.9267515923566883e-05, + "loss": 1.8001, + "step": 363 + }, + { + "epoch": 0.06, + "learning_rate": 1.932059447983015e-05, + "loss": 1.7364, + "step": 364 + }, + { + "epoch": 0.06, + "learning_rate": 1.937367303609342e-05, + "loss": 1.7547, + "step": 365 + }, + { + "epoch": 0.06, + "learning_rate": 1.942675159235669e-05, + "loss": 1.7793, + "step": 366 + }, + { + "epoch": 0.06, + "learning_rate": 1.947983014861996e-05, + "loss": 1.7245, + "step": 367 + }, + { + "epoch": 0.06, + "learning_rate": 1.953290870488323e-05, + "loss": 1.8604, + "step": 368 + }, + { + "epoch": 0.06, + "learning_rate": 1.9585987261146498e-05, + "loss": 1.7412, + "step": 369 + }, + { + "epoch": 0.06, + "learning_rate": 1.963906581740977e-05, + "loss": 1.742, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 1.9692144373673038e-05, + "loss": 1.7016, + "step": 371 + }, + { + "epoch": 0.06, + "learning_rate": 1.974522292993631e-05, + "loss": 1.8565, + "step": 372 + }, + { + "epoch": 0.06, + "learning_rate": 1.9798301486199577e-05, + "loss": 1.7629, + "step": 373 + }, + { + "epoch": 0.06, + "learning_rate": 1.9851380042462846e-05, + "loss": 1.7308, + "step": 374 + }, + { + "epoch": 0.06, + "learning_rate": 1.9904458598726117e-05, + "loss": 1.7234, + "step": 375 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957537154989385e-05, + "loss": 1.69, + "step": 376 + }, + { + "epoch": 0.06, + "learning_rate": 2.0010615711252657e-05, + "loss": 1.6764, + "step": 377 + }, + { + "epoch": 0.06, + "learning_rate": 2.0063694267515925e-05, + "loss": 1.7348, + "step": 378 + }, + { + "epoch": 0.06, + "learning_rate": 2.0116772823779196e-05, + "loss": 1.7891, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 2.0169851380042464e-05, + "loss": 1.6461, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 2.0222929936305736e-05, + "loss": 1.7279, + "step": 381 + }, + { + "epoch": 0.06, + "learning_rate": 2.0276008492569004e-05, + "loss": 1.7295, + "step": 382 + }, + { + "epoch": 0.06, + "learning_rate": 2.0329087048832275e-05, + "loss": 1.725, + "step": 383 + }, + { + "epoch": 0.06, + "learning_rate": 2.0382165605095544e-05, + "loss": 1.7318, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 2.043524416135881e-05, + "loss": 1.7745, + "step": 385 + }, + { + "epoch": 0.06, + "learning_rate": 2.0488322717622083e-05, + "loss": 1.6553, + "step": 386 + }, + { + "epoch": 0.06, + "learning_rate": 2.054140127388535e-05, + "loss": 1.7428, + "step": 387 + }, + { + "epoch": 0.06, + "learning_rate": 2.0594479830148623e-05, + "loss": 1.764, + "step": 388 + }, + { + "epoch": 0.06, + "learning_rate": 2.064755838641189e-05, + "loss": 1.7641, + "step": 389 + }, + { + "epoch": 0.06, + "learning_rate": 2.0700636942675162e-05, + "loss": 1.72, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 2.075371549893843e-05, + "loss": 1.7725, + "step": 391 + }, + { + "epoch": 0.06, + "learning_rate": 2.0806794055201702e-05, + "loss": 1.755, + "step": 392 + }, + { + "epoch": 0.06, + "learning_rate": 2.085987261146497e-05, + "loss": 1.843, + "step": 393 + }, + { + "epoch": 0.06, + "learning_rate": 2.0912951167728238e-05, + "loss": 1.6745, + "step": 394 + }, + { + "epoch": 0.06, + "learning_rate": 2.096602972399151e-05, + "loss": 1.8095, + "step": 395 + }, + { + "epoch": 0.06, + "learning_rate": 2.1019108280254778e-05, + "loss": 1.7815, + "step": 396 + }, + { + "epoch": 0.06, + "learning_rate": 2.107218683651805e-05, + "loss": 1.6943, + "step": 397 + }, + { + "epoch": 0.06, + "learning_rate": 2.1125265392781317e-05, + "loss": 1.7335, + "step": 398 + }, + { + "epoch": 0.06, + "learning_rate": 2.117834394904459e-05, + "loss": 1.7863, + "step": 399 + }, + { + "epoch": 0.06, + "learning_rate": 2.1231422505307857e-05, + "loss": 1.7079, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 2.128450106157113e-05, + "loss": 1.928, + "step": 401 + }, + { + "epoch": 0.06, + "learning_rate": 2.1337579617834397e-05, + "loss": 1.7933, + "step": 402 + }, + { + "epoch": 0.06, + "learning_rate": 2.1390658174097665e-05, + "loss": 1.7624, + "step": 403 + }, + { + "epoch": 0.06, + "learning_rate": 2.1443736730360936e-05, + "loss": 1.7532, + "step": 404 + }, + { + "epoch": 0.06, + "learning_rate": 2.1496815286624204e-05, + "loss": 1.7005, + "step": 405 + }, + { + "epoch": 0.06, + "learning_rate": 2.1549893842887476e-05, + "loss": 1.7096, + "step": 406 + }, + { + "epoch": 0.06, + "learning_rate": 2.1602972399150744e-05, + "loss": 1.7014, + "step": 407 + }, + { + "epoch": 0.06, + "learning_rate": 2.1656050955414015e-05, + "loss": 1.7562, + "step": 408 + }, + { + "epoch": 0.07, + "learning_rate": 2.1709129511677284e-05, + "loss": 1.7051, + "step": 409 + }, + { + "epoch": 0.07, + "learning_rate": 2.1762208067940555e-05, + "loss": 1.7441, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 2.1815286624203823e-05, + "loss": 1.7231, + "step": 411 + }, + { + "epoch": 0.07, + "learning_rate": 2.1868365180467095e-05, + "loss": 1.6898, + "step": 412 + }, + { + "epoch": 0.07, + "learning_rate": 2.1921443736730363e-05, + "loss": 1.6999, + "step": 413 + }, + { + "epoch": 0.07, + "learning_rate": 2.197452229299363e-05, + "loss": 1.659, + "step": 414 + }, + { + "epoch": 0.07, + "learning_rate": 2.2027600849256902e-05, + "loss": 1.73, + "step": 415 + }, + { + "epoch": 0.07, + "learning_rate": 2.208067940552017e-05, + "loss": 1.7718, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 2.2133757961783442e-05, + "loss": 1.7391, + "step": 417 + }, + { + "epoch": 0.07, + "learning_rate": 2.218683651804671e-05, + "loss": 1.6849, + "step": 418 + }, + { + "epoch": 0.07, + "learning_rate": 2.223991507430998e-05, + "loss": 1.6969, + "step": 419 + }, + { + "epoch": 0.07, + "learning_rate": 2.229299363057325e-05, + "loss": 1.7341, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 2.234607218683652e-05, + "loss": 1.6623, + "step": 421 + }, + { + "epoch": 0.07, + "learning_rate": 2.239915074309979e-05, + "loss": 1.7084, + "step": 422 + }, + { + "epoch": 0.07, + "learning_rate": 2.2452229299363057e-05, + "loss": 1.7769, + "step": 423 + }, + { + "epoch": 0.07, + "learning_rate": 2.250530785562633e-05, + "loss": 1.7378, + "step": 424 + }, + { + "epoch": 0.07, + "learning_rate": 2.2558386411889597e-05, + "loss": 1.7355, + "step": 425 + }, + { + "epoch": 0.07, + "learning_rate": 2.261146496815287e-05, + "loss": 1.8237, + "step": 426 + }, + { + "epoch": 0.07, + "learning_rate": 2.2664543524416137e-05, + "loss": 1.6815, + "step": 427 + }, + { + "epoch": 0.07, + "learning_rate": 2.2717622080679408e-05, + "loss": 1.7585, + "step": 428 + }, + { + "epoch": 0.07, + "learning_rate": 2.2770700636942676e-05, + "loss": 1.764, + "step": 429 + }, + { + "epoch": 0.07, + "learning_rate": 2.2823779193205948e-05, + "loss": 1.7687, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 2.2876857749469216e-05, + "loss": 1.6967, + "step": 431 + }, + { + "epoch": 0.07, + "learning_rate": 2.2929936305732484e-05, + "loss": 1.658, + "step": 432 + }, + { + "epoch": 0.07, + "learning_rate": 2.2983014861995755e-05, + "loss": 1.7407, + "step": 433 + }, + { + "epoch": 0.07, + "learning_rate": 2.3036093418259024e-05, + "loss": 1.7635, + "step": 434 + }, + { + "epoch": 0.07, + "learning_rate": 2.3089171974522295e-05, + "loss": 1.7269, + "step": 435 + }, + { + "epoch": 0.07, + "learning_rate": 2.3142250530785563e-05, + "loss": 1.7232, + "step": 436 + }, + { + "epoch": 0.07, + "learning_rate": 2.3195329087048835e-05, + "loss": 1.7296, + "step": 437 + }, + { + "epoch": 0.07, + "learning_rate": 2.3248407643312103e-05, + "loss": 1.773, + "step": 438 + }, + { + "epoch": 0.07, + "learning_rate": 2.3301486199575374e-05, + "loss": 1.7638, + "step": 439 + }, + { + "epoch": 0.07, + "learning_rate": 2.3354564755838642e-05, + "loss": 1.7348, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 2.3407643312101914e-05, + "loss": 1.7816, + "step": 441 + }, + { + "epoch": 0.07, + "learning_rate": 2.3460721868365182e-05, + "loss": 1.6746, + "step": 442 + }, + { + "epoch": 0.07, + "learning_rate": 2.351380042462845e-05, + "loss": 1.7239, + "step": 443 + }, + { + "epoch": 0.07, + "learning_rate": 2.356687898089172e-05, + "loss": 1.8, + "step": 444 + }, + { + "epoch": 0.07, + "learning_rate": 2.361995753715499e-05, + "loss": 1.7302, + "step": 445 + }, + { + "epoch": 0.07, + "learning_rate": 2.367303609341826e-05, + "loss": 1.7168, + "step": 446 + }, + { + "epoch": 0.07, + "learning_rate": 2.372611464968153e-05, + "loss": 1.7488, + "step": 447 + }, + { + "epoch": 0.07, + "learning_rate": 2.37791932059448e-05, + "loss": 1.7816, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 2.383227176220807e-05, + "loss": 1.7422, + "step": 449 + }, + { + "epoch": 0.07, + "learning_rate": 2.388535031847134e-05, + "loss": 1.7034, + "step": 450 + }, + { + "epoch": 0.07, + "learning_rate": 2.393842887473461e-05, + "loss": 1.6768, + "step": 451 + }, + { + "epoch": 0.07, + "learning_rate": 2.3991507430997877e-05, + "loss": 1.7316, + "step": 452 + }, + { + "epoch": 0.07, + "learning_rate": 2.4044585987261148e-05, + "loss": 1.7459, + "step": 453 + }, + { + "epoch": 0.07, + "learning_rate": 2.4097664543524416e-05, + "loss": 1.7408, + "step": 454 + }, + { + "epoch": 0.07, + "learning_rate": 2.4150743099787688e-05, + "loss": 1.7735, + "step": 455 + }, + { + "epoch": 0.07, + "learning_rate": 2.4203821656050956e-05, + "loss": 1.7786, + "step": 456 + }, + { + "epoch": 0.07, + "learning_rate": 2.4256900212314227e-05, + "loss": 1.7398, + "step": 457 + }, + { + "epoch": 0.07, + "learning_rate": 2.4309978768577495e-05, + "loss": 1.6686, + "step": 458 + }, + { + "epoch": 0.07, + "learning_rate": 2.4363057324840767e-05, + "loss": 1.7383, + "step": 459 + }, + { + "epoch": 0.07, + "learning_rate": 2.4416135881104035e-05, + "loss": 1.6986, + "step": 460 + }, + { + "epoch": 0.07, + "learning_rate": 2.4469214437367307e-05, + "loss": 1.7243, + "step": 461 + }, + { + "epoch": 0.07, + "learning_rate": 2.4522292993630575e-05, + "loss": 1.7103, + "step": 462 + }, + { + "epoch": 0.07, + "learning_rate": 2.4575371549893843e-05, + "loss": 1.6854, + "step": 463 + }, + { + "epoch": 0.07, + "learning_rate": 2.4628450106157114e-05, + "loss": 1.7729, + "step": 464 + }, + { + "epoch": 0.07, + "learning_rate": 2.4681528662420382e-05, + "loss": 1.7461, + "step": 465 + }, + { + "epoch": 0.07, + "learning_rate": 2.4734607218683654e-05, + "loss": 1.7199, + "step": 466 + }, + { + "epoch": 0.07, + "learning_rate": 2.4787685774946922e-05, + "loss": 1.6926, + "step": 467 + }, + { + "epoch": 0.07, + "learning_rate": 2.4840764331210193e-05, + "loss": 1.6162, + "step": 468 + }, + { + "epoch": 0.07, + "learning_rate": 2.489384288747346e-05, + "loss": 1.6949, + "step": 469 + }, + { + "epoch": 0.07, + "learning_rate": 2.4946921443736733e-05, + "loss": 1.6942, + "step": 470 + }, + { + "epoch": 0.08, + "learning_rate": 2.5e-05, + "loss": 1.7611, + "step": 471 + }, + { + "epoch": 0.08, + "learning_rate": 2.505307855626327e-05, + "loss": 1.7071, + "step": 472 + }, + { + "epoch": 0.08, + "learning_rate": 2.510615711252654e-05, + "loss": 1.7811, + "step": 473 + }, + { + "epoch": 0.08, + "learning_rate": 2.515923566878981e-05, + "loss": 1.6702, + "step": 474 + }, + { + "epoch": 0.08, + "learning_rate": 2.521231422505308e-05, + "loss": 1.785, + "step": 475 + }, + { + "epoch": 0.08, + "learning_rate": 2.526539278131635e-05, + "loss": 1.78, + "step": 476 + }, + { + "epoch": 0.08, + "learning_rate": 2.531847133757962e-05, + "loss": 1.6166, + "step": 477 + }, + { + "epoch": 0.08, + "learning_rate": 2.5371549893842888e-05, + "loss": 1.7741, + "step": 478 + }, + { + "epoch": 0.08, + "learning_rate": 2.542462845010616e-05, + "loss": 1.7162, + "step": 479 + }, + { + "epoch": 0.08, + "learning_rate": 2.5477707006369428e-05, + "loss": 1.7183, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 2.5530785562632696e-05, + "loss": 1.7235, + "step": 481 + }, + { + "epoch": 0.08, + "learning_rate": 2.5583864118895967e-05, + "loss": 1.643, + "step": 482 + }, + { + "epoch": 0.08, + "learning_rate": 2.5636942675159235e-05, + "loss": 1.7505, + "step": 483 + }, + { + "epoch": 0.08, + "learning_rate": 2.5690021231422507e-05, + "loss": 1.7042, + "step": 484 + }, + { + "epoch": 0.08, + "learning_rate": 2.5743099787685775e-05, + "loss": 1.7562, + "step": 485 + }, + { + "epoch": 0.08, + "learning_rate": 2.5796178343949047e-05, + "loss": 1.6997, + "step": 486 + }, + { + "epoch": 0.08, + "learning_rate": 2.5849256900212315e-05, + "loss": 1.7107, + "step": 487 + }, + { + "epoch": 0.08, + "learning_rate": 2.5902335456475586e-05, + "loss": 1.7249, + "step": 488 + }, + { + "epoch": 0.08, + "learning_rate": 2.5955414012738854e-05, + "loss": 1.7123, + "step": 489 + }, + { + "epoch": 0.08, + "learning_rate": 2.6008492569002126e-05, + "loss": 1.7532, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 2.6061571125265394e-05, + "loss": 1.7075, + "step": 491 + }, + { + "epoch": 0.08, + "learning_rate": 2.6114649681528662e-05, + "loss": 1.7439, + "step": 492 + }, + { + "epoch": 0.08, + "learning_rate": 2.6167728237791933e-05, + "loss": 1.7153, + "step": 493 + }, + { + "epoch": 0.08, + "learning_rate": 2.62208067940552e-05, + "loss": 1.8087, + "step": 494 + }, + { + "epoch": 0.08, + "learning_rate": 2.6273885350318473e-05, + "loss": 1.6498, + "step": 495 + }, + { + "epoch": 0.08, + "learning_rate": 2.632696390658174e-05, + "loss": 1.7434, + "step": 496 + }, + { + "epoch": 0.08, + "learning_rate": 2.6380042462845013e-05, + "loss": 1.7606, + "step": 497 + }, + { + "epoch": 0.08, + "learning_rate": 2.643312101910828e-05, + "loss": 1.6246, + "step": 498 + }, + { + "epoch": 0.08, + "learning_rate": 2.6486199575371552e-05, + "loss": 1.739, + "step": 499 + }, + { + "epoch": 0.08, + "learning_rate": 2.653927813163482e-05, + "loss": 1.7452, + "step": 500 + }, + { + "epoch": 0.08, + "learning_rate": 2.659235668789809e-05, + "loss": 1.7566, + "step": 501 + }, + { + "epoch": 0.08, + "learning_rate": 2.664543524416136e-05, + "loss": 1.7023, + "step": 502 + }, + { + "epoch": 0.08, + "learning_rate": 2.6698513800424628e-05, + "loss": 1.6305, + "step": 503 + }, + { + "epoch": 0.08, + "learning_rate": 2.67515923566879e-05, + "loss": 1.7754, + "step": 504 + }, + { + "epoch": 0.08, + "learning_rate": 2.6804670912951168e-05, + "loss": 1.6847, + "step": 505 + }, + { + "epoch": 0.08, + "learning_rate": 2.685774946921444e-05, + "loss": 1.7802, + "step": 506 + }, + { + "epoch": 0.08, + "learning_rate": 2.6910828025477707e-05, + "loss": 1.7356, + "step": 507 + }, + { + "epoch": 0.08, + "learning_rate": 2.696390658174098e-05, + "loss": 1.7267, + "step": 508 + }, + { + "epoch": 0.08, + "learning_rate": 2.7016985138004247e-05, + "loss": 1.7084, + "step": 509 + }, + { + "epoch": 0.08, + "learning_rate": 2.707006369426752e-05, + "loss": 1.7519, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 2.7123142250530787e-05, + "loss": 1.7402, + "step": 511 + }, + { + "epoch": 0.08, + "learning_rate": 2.7176220806794055e-05, + "loss": 1.7067, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 2.7229299363057326e-05, + "loss": 1.6315, + "step": 513 + }, + { + "epoch": 0.08, + "learning_rate": 2.7282377919320594e-05, + "loss": 1.6718, + "step": 514 + }, + { + "epoch": 0.08, + "learning_rate": 2.7335456475583866e-05, + "loss": 1.6731, + "step": 515 + }, + { + "epoch": 0.08, + "learning_rate": 2.7388535031847134e-05, + "loss": 1.6517, + "step": 516 + }, + { + "epoch": 0.08, + "learning_rate": 2.7441613588110405e-05, + "loss": 1.7357, + "step": 517 + }, + { + "epoch": 0.08, + "learning_rate": 2.7494692144373673e-05, + "loss": 1.7597, + "step": 518 + }, + { + "epoch": 0.08, + "learning_rate": 2.7547770700636945e-05, + "loss": 1.6275, + "step": 519 + }, + { + "epoch": 0.08, + "learning_rate": 2.7600849256900213e-05, + "loss": 1.7389, + "step": 520 + }, + { + "epoch": 0.08, + "learning_rate": 2.765392781316348e-05, + "loss": 1.6022, + "step": 521 + }, + { + "epoch": 0.08, + "learning_rate": 2.7707006369426753e-05, + "loss": 1.7589, + "step": 522 + }, + { + "epoch": 0.08, + "learning_rate": 2.776008492569002e-05, + "loss": 1.6912, + "step": 523 + }, + { + "epoch": 0.08, + "learning_rate": 2.7813163481953292e-05, + "loss": 1.6904, + "step": 524 + }, + { + "epoch": 0.08, + "learning_rate": 2.786624203821656e-05, + "loss": 1.746, + "step": 525 + }, + { + "epoch": 0.08, + "learning_rate": 2.7919320594479832e-05, + "loss": 1.6886, + "step": 526 + }, + { + "epoch": 0.08, + "learning_rate": 2.79723991507431e-05, + "loss": 1.7287, + "step": 527 + }, + { + "epoch": 0.08, + "learning_rate": 2.802547770700637e-05, + "loss": 1.6906, + "step": 528 + }, + { + "epoch": 0.08, + "learning_rate": 2.807855626326964e-05, + "loss": 1.6573, + "step": 529 + }, + { + "epoch": 0.08, + "learning_rate": 2.8131634819532908e-05, + "loss": 1.9117, + "step": 530 + }, + { + "epoch": 0.08, + "learning_rate": 2.818471337579618e-05, + "loss": 1.624, + "step": 531 + }, + { + "epoch": 0.08, + "learning_rate": 2.8237791932059447e-05, + "loss": 1.6885, + "step": 532 + }, + { + "epoch": 0.08, + "learning_rate": 2.829087048832272e-05, + "loss": 1.6795, + "step": 533 + }, + { + "epoch": 0.09, + "learning_rate": 2.8343949044585987e-05, + "loss": 1.6886, + "step": 534 + }, + { + "epoch": 0.09, + "learning_rate": 2.839702760084926e-05, + "loss": 1.7045, + "step": 535 + }, + { + "epoch": 0.09, + "learning_rate": 2.8450106157112527e-05, + "loss": 1.6994, + "step": 536 + }, + { + "epoch": 0.09, + "learning_rate": 2.8503184713375798e-05, + "loss": 1.7574, + "step": 537 + }, + { + "epoch": 0.09, + "learning_rate": 2.8556263269639066e-05, + "loss": 1.7544, + "step": 538 + }, + { + "epoch": 0.09, + "learning_rate": 2.8609341825902338e-05, + "loss": 1.7086, + "step": 539 + }, + { + "epoch": 0.09, + "learning_rate": 2.8662420382165606e-05, + "loss": 1.7088, + "step": 540 + }, + { + "epoch": 0.09, + "learning_rate": 2.8715498938428874e-05, + "loss": 1.7203, + "step": 541 + }, + { + "epoch": 0.09, + "learning_rate": 2.8768577494692145e-05, + "loss": 1.5918, + "step": 542 + }, + { + "epoch": 0.09, + "learning_rate": 2.8821656050955413e-05, + "loss": 1.7634, + "step": 543 + }, + { + "epoch": 0.09, + "learning_rate": 2.8874734607218685e-05, + "loss": 1.6718, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 2.8927813163481953e-05, + "loss": 1.6391, + "step": 545 + }, + { + "epoch": 0.09, + "learning_rate": 2.8980891719745225e-05, + "loss": 1.6752, + "step": 546 + }, + { + "epoch": 0.09, + "learning_rate": 2.9033970276008493e-05, + "loss": 1.7159, + "step": 547 + }, + { + "epoch": 0.09, + "learning_rate": 2.9087048832271764e-05, + "loss": 1.6554, + "step": 548 + }, + { + "epoch": 0.09, + "learning_rate": 2.9140127388535032e-05, + "loss": 1.7115, + "step": 549 + }, + { + "epoch": 0.09, + "learning_rate": 2.91932059447983e-05, + "loss": 1.7797, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 2.9246284501061572e-05, + "loss": 1.6947, + "step": 551 + }, + { + "epoch": 0.09, + "learning_rate": 2.929936305732484e-05, + "loss": 1.6709, + "step": 552 + }, + { + "epoch": 0.09, + "learning_rate": 2.935244161358811e-05, + "loss": 1.7502, + "step": 553 + }, + { + "epoch": 0.09, + "learning_rate": 2.940552016985138e-05, + "loss": 1.771, + "step": 554 + }, + { + "epoch": 0.09, + "learning_rate": 2.945859872611465e-05, + "loss": 1.7101, + "step": 555 + }, + { + "epoch": 0.09, + "learning_rate": 2.951167728237792e-05, + "loss": 1.6721, + "step": 556 + }, + { + "epoch": 0.09, + "learning_rate": 2.956475583864119e-05, + "loss": 1.8008, + "step": 557 + }, + { + "epoch": 0.09, + "learning_rate": 2.961783439490446e-05, + "loss": 1.702, + "step": 558 + }, + { + "epoch": 0.09, + "learning_rate": 2.9670912951167727e-05, + "loss": 1.7524, + "step": 559 + }, + { + "epoch": 0.09, + "learning_rate": 2.9723991507431e-05, + "loss": 1.6954, + "step": 560 + }, + { + "epoch": 0.09, + "learning_rate": 2.9777070063694267e-05, + "loss": 1.7373, + "step": 561 + }, + { + "epoch": 0.09, + "learning_rate": 2.9830148619957538e-05, + "loss": 1.7402, + "step": 562 + }, + { + "epoch": 0.09, + "learning_rate": 2.9883227176220806e-05, + "loss": 1.6732, + "step": 563 + }, + { + "epoch": 0.09, + "learning_rate": 2.9936305732484078e-05, + "loss": 1.7241, + "step": 564 + }, + { + "epoch": 0.09, + "learning_rate": 2.9989384288747346e-05, + "loss": 1.7487, + "step": 565 + }, + { + "epoch": 0.09, + "learning_rate": 3.0042462845010617e-05, + "loss": 1.7186, + "step": 566 + }, + { + "epoch": 0.09, + "learning_rate": 3.0095541401273885e-05, + "loss": 1.6738, + "step": 567 + }, + { + "epoch": 0.09, + "learning_rate": 3.0148619957537157e-05, + "loss": 1.7535, + "step": 568 + }, + { + "epoch": 0.09, + "learning_rate": 3.0201698513800425e-05, + "loss": 1.7228, + "step": 569 + }, + { + "epoch": 0.09, + "learning_rate": 3.0254777070063693e-05, + "loss": 1.714, + "step": 570 + }, + { + "epoch": 0.09, + "learning_rate": 3.0307855626326965e-05, + "loss": 1.706, + "step": 571 + }, + { + "epoch": 0.09, + "learning_rate": 3.0360934182590233e-05, + "loss": 1.6977, + "step": 572 + }, + { + "epoch": 0.09, + "learning_rate": 3.0414012738853504e-05, + "loss": 1.7574, + "step": 573 + }, + { + "epoch": 0.09, + "learning_rate": 3.0467091295116772e-05, + "loss": 1.677, + "step": 574 + }, + { + "epoch": 0.09, + "learning_rate": 3.0520169851380044e-05, + "loss": 1.6087, + "step": 575 + }, + { + "epoch": 0.09, + "learning_rate": 3.057324840764331e-05, + "loss": 1.7672, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 3.062632696390658e-05, + "loss": 1.7668, + "step": 577 + }, + { + "epoch": 0.09, + "learning_rate": 3.0679405520169855e-05, + "loss": 1.6475, + "step": 578 + }, + { + "epoch": 0.09, + "learning_rate": 3.073248407643312e-05, + "loss": 1.688, + "step": 579 + }, + { + "epoch": 0.09, + "learning_rate": 3.078556263269639e-05, + "loss": 1.7369, + "step": 580 + }, + { + "epoch": 0.09, + "learning_rate": 3.083864118895966e-05, + "loss": 1.7315, + "step": 581 + }, + { + "epoch": 0.09, + "learning_rate": 3.089171974522293e-05, + "loss": 1.7317, + "step": 582 + }, + { + "epoch": 0.09, + "learning_rate": 3.09447983014862e-05, + "loss": 1.7168, + "step": 583 + }, + { + "epoch": 0.09, + "learning_rate": 3.099787685774947e-05, + "loss": 1.7728, + "step": 584 + }, + { + "epoch": 0.09, + "learning_rate": 3.105095541401274e-05, + "loss": 1.7395, + "step": 585 + }, + { + "epoch": 0.09, + "learning_rate": 3.1104033970276007e-05, + "loss": 1.6962, + "step": 586 + }, + { + "epoch": 0.09, + "learning_rate": 3.115711252653928e-05, + "loss": 1.7178, + "step": 587 + }, + { + "epoch": 0.09, + "learning_rate": 3.121019108280255e-05, + "loss": 1.6498, + "step": 588 + }, + { + "epoch": 0.09, + "learning_rate": 3.126326963906582e-05, + "loss": 1.7007, + "step": 589 + }, + { + "epoch": 0.09, + "learning_rate": 3.1316348195329086e-05, + "loss": 1.6954, + "step": 590 + }, + { + "epoch": 0.09, + "learning_rate": 3.1369426751592354e-05, + "loss": 1.7347, + "step": 591 + }, + { + "epoch": 0.09, + "learning_rate": 3.142250530785563e-05, + "loss": 1.6871, + "step": 592 + }, + { + "epoch": 0.09, + "learning_rate": 3.14755838641189e-05, + "loss": 1.7315, + "step": 593 + }, + { + "epoch": 0.09, + "learning_rate": 3.1528662420382165e-05, + "loss": 1.6435, + "step": 594 + }, + { + "epoch": 0.09, + "learning_rate": 3.158174097664543e-05, + "loss": 1.7452, + "step": 595 + }, + { + "epoch": 0.09, + "learning_rate": 3.163481953290871e-05, + "loss": 1.6703, + "step": 596 + }, + { + "epoch": 0.1, + "learning_rate": 3.1687898089171976e-05, + "loss": 1.7151, + "step": 597 + }, + { + "epoch": 0.1, + "learning_rate": 3.1740976645435244e-05, + "loss": 1.6608, + "step": 598 + }, + { + "epoch": 0.1, + "learning_rate": 3.179405520169851e-05, + "loss": 1.6194, + "step": 599 + }, + { + "epoch": 0.1, + "learning_rate": 3.184713375796178e-05, + "loss": 1.6481, + "step": 600 + }, + { + "epoch": 0.1, + "learning_rate": 3.1900212314225055e-05, + "loss": 1.7341, + "step": 601 + }, + { + "epoch": 0.1, + "learning_rate": 3.1953290870488323e-05, + "loss": 1.7129, + "step": 602 + }, + { + "epoch": 0.1, + "learning_rate": 3.200636942675159e-05, + "loss": 1.7071, + "step": 603 + }, + { + "epoch": 0.1, + "learning_rate": 3.205944798301486e-05, + "loss": 1.7723, + "step": 604 + }, + { + "epoch": 0.1, + "learning_rate": 3.2112526539278135e-05, + "loss": 1.6893, + "step": 605 + }, + { + "epoch": 0.1, + "learning_rate": 3.21656050955414e-05, + "loss": 1.618, + "step": 606 + }, + { + "epoch": 0.1, + "learning_rate": 3.221868365180467e-05, + "loss": 1.6517, + "step": 607 + }, + { + "epoch": 0.1, + "learning_rate": 3.227176220806794e-05, + "loss": 1.6617, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 3.232484076433121e-05, + "loss": 1.7133, + "step": 609 + }, + { + "epoch": 0.1, + "learning_rate": 3.237791932059448e-05, + "loss": 1.615, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 3.243099787685775e-05, + "loss": 1.7107, + "step": 611 + }, + { + "epoch": 0.1, + "learning_rate": 3.248407643312102e-05, + "loss": 1.685, + "step": 612 + }, + { + "epoch": 0.1, + "learning_rate": 3.2537154989384286e-05, + "loss": 1.7484, + "step": 613 + }, + { + "epoch": 0.1, + "learning_rate": 3.259023354564756e-05, + "loss": 1.6393, + "step": 614 + }, + { + "epoch": 0.1, + "learning_rate": 3.264331210191083e-05, + "loss": 1.6688, + "step": 615 + }, + { + "epoch": 0.1, + "learning_rate": 3.26963906581741e-05, + "loss": 1.6684, + "step": 616 + }, + { + "epoch": 0.1, + "learning_rate": 3.2749469214437365e-05, + "loss": 1.656, + "step": 617 + }, + { + "epoch": 0.1, + "learning_rate": 3.2802547770700634e-05, + "loss": 1.6195, + "step": 618 + }, + { + "epoch": 0.1, + "learning_rate": 3.285562632696391e-05, + "loss": 1.6412, + "step": 619 + }, + { + "epoch": 0.1, + "learning_rate": 3.2908704883227177e-05, + "loss": 1.6654, + "step": 620 + }, + { + "epoch": 0.1, + "learning_rate": 3.2961783439490445e-05, + "loss": 1.688, + "step": 621 + }, + { + "epoch": 0.1, + "learning_rate": 3.301486199575371e-05, + "loss": 1.7699, + "step": 622 + }, + { + "epoch": 0.1, + "learning_rate": 3.306794055201699e-05, + "loss": 1.6645, + "step": 623 + }, + { + "epoch": 0.1, + "learning_rate": 3.3121019108280256e-05, + "loss": 1.6668, + "step": 624 + }, + { + "epoch": 0.1, + "learning_rate": 3.3174097664543524e-05, + "loss": 1.7279, + "step": 625 + }, + { + "epoch": 0.1, + "learning_rate": 3.322717622080679e-05, + "loss": 1.7345, + "step": 626 + }, + { + "epoch": 0.1, + "learning_rate": 3.328025477707007e-05, + "loss": 1.7143, + "step": 627 + }, + { + "epoch": 0.1, + "learning_rate": 3.3333333333333335e-05, + "loss": 1.761, + "step": 628 + }, + { + "epoch": 0.1, + "learning_rate": 3.33864118895966e-05, + "loss": 1.699, + "step": 629 + }, + { + "epoch": 0.1, + "learning_rate": 3.343949044585987e-05, + "loss": 1.635, + "step": 630 + }, + { + "epoch": 0.1, + "learning_rate": 3.349256900212314e-05, + "loss": 1.7247, + "step": 631 + }, + { + "epoch": 0.1, + "learning_rate": 3.3545647558386414e-05, + "loss": 1.6674, + "step": 632 + }, + { + "epoch": 0.1, + "learning_rate": 3.359872611464968e-05, + "loss": 1.7209, + "step": 633 + }, + { + "epoch": 0.1, + "learning_rate": 3.365180467091295e-05, + "loss": 1.7156, + "step": 634 + }, + { + "epoch": 0.1, + "learning_rate": 3.370488322717622e-05, + "loss": 1.668, + "step": 635 + }, + { + "epoch": 0.1, + "learning_rate": 3.375796178343949e-05, + "loss": 1.6838, + "step": 636 + }, + { + "epoch": 0.1, + "learning_rate": 3.381104033970276e-05, + "loss": 1.6752, + "step": 637 + }, + { + "epoch": 0.1, + "learning_rate": 3.386411889596603e-05, + "loss": 1.6031, + "step": 638 + }, + { + "epoch": 0.1, + "learning_rate": 3.39171974522293e-05, + "loss": 1.7016, + "step": 639 + }, + { + "epoch": 0.1, + "learning_rate": 3.3970276008492566e-05, + "loss": 1.6352, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 3.402335456475584e-05, + "loss": 1.7107, + "step": 641 + }, + { + "epoch": 0.1, + "learning_rate": 3.407643312101911e-05, + "loss": 1.7099, + "step": 642 + }, + { + "epoch": 0.1, + "learning_rate": 3.412951167728238e-05, + "loss": 1.6599, + "step": 643 + }, + { + "epoch": 0.1, + "learning_rate": 3.4182590233545645e-05, + "loss": 1.6673, + "step": 644 + }, + { + "epoch": 0.1, + "learning_rate": 3.423566878980892e-05, + "loss": 1.666, + "step": 645 + }, + { + "epoch": 0.1, + "learning_rate": 3.428874734607219e-05, + "loss": 1.7171, + "step": 646 + }, + { + "epoch": 0.1, + "learning_rate": 3.4341825902335456e-05, + "loss": 1.7051, + "step": 647 + }, + { + "epoch": 0.1, + "learning_rate": 3.4394904458598724e-05, + "loss": 1.5711, + "step": 648 + }, + { + "epoch": 0.1, + "learning_rate": 3.444798301486199e-05, + "loss": 1.6917, + "step": 649 + }, + { + "epoch": 0.1, + "learning_rate": 3.450106157112527e-05, + "loss": 1.7677, + "step": 650 + }, + { + "epoch": 0.1, + "learning_rate": 3.4554140127388535e-05, + "loss": 1.618, + "step": 651 + }, + { + "epoch": 0.1, + "learning_rate": 3.4607218683651803e-05, + "loss": 1.6195, + "step": 652 + }, + { + "epoch": 0.1, + "learning_rate": 3.466029723991507e-05, + "loss": 1.6533, + "step": 653 + }, + { + "epoch": 0.1, + "learning_rate": 3.4713375796178346e-05, + "loss": 1.5926, + "step": 654 + }, + { + "epoch": 0.1, + "learning_rate": 3.4766454352441615e-05, + "loss": 1.6068, + "step": 655 + }, + { + "epoch": 0.1, + "learning_rate": 3.481953290870488e-05, + "loss": 1.678, + "step": 656 + }, + { + "epoch": 0.1, + "learning_rate": 3.487261146496815e-05, + "loss": 1.6347, + "step": 657 + }, + { + "epoch": 0.1, + "learning_rate": 3.492569002123142e-05, + "loss": 1.6373, + "step": 658 + }, + { + "epoch": 0.1, + "learning_rate": 3.4978768577494694e-05, + "loss": 1.6661, + "step": 659 + }, + { + "epoch": 0.11, + "learning_rate": 3.503184713375796e-05, + "loss": 1.6954, + "step": 660 + }, + { + "epoch": 0.11, + "learning_rate": 3.508492569002123e-05, + "loss": 1.6806, + "step": 661 + }, + { + "epoch": 0.11, + "learning_rate": 3.51380042462845e-05, + "loss": 1.7214, + "step": 662 + }, + { + "epoch": 0.11, + "learning_rate": 3.519108280254777e-05, + "loss": 1.6921, + "step": 663 + }, + { + "epoch": 0.11, + "learning_rate": 3.524416135881104e-05, + "loss": 1.6434, + "step": 664 + }, + { + "epoch": 0.11, + "learning_rate": 3.529723991507431e-05, + "loss": 1.666, + "step": 665 + }, + { + "epoch": 0.11, + "learning_rate": 3.535031847133758e-05, + "loss": 1.5945, + "step": 666 + }, + { + "epoch": 0.11, + "learning_rate": 3.5403397027600845e-05, + "loss": 1.6778, + "step": 667 + }, + { + "epoch": 0.11, + "learning_rate": 3.545647558386412e-05, + "loss": 1.7031, + "step": 668 + }, + { + "epoch": 0.11, + "learning_rate": 3.550955414012739e-05, + "loss": 1.6798, + "step": 669 + }, + { + "epoch": 0.11, + "learning_rate": 3.5562632696390657e-05, + "loss": 1.6707, + "step": 670 + }, + { + "epoch": 0.11, + "learning_rate": 3.5615711252653925e-05, + "loss": 1.6282, + "step": 671 + }, + { + "epoch": 0.11, + "learning_rate": 3.56687898089172e-05, + "loss": 1.6953, + "step": 672 + }, + { + "epoch": 0.11, + "learning_rate": 3.572186836518047e-05, + "loss": 1.6288, + "step": 673 + }, + { + "epoch": 0.11, + "learning_rate": 3.5774946921443736e-05, + "loss": 1.6373, + "step": 674 + }, + { + "epoch": 0.11, + "learning_rate": 3.5828025477707004e-05, + "loss": 1.6108, + "step": 675 + }, + { + "epoch": 0.11, + "learning_rate": 3.588110403397027e-05, + "loss": 1.6638, + "step": 676 + }, + { + "epoch": 0.11, + "learning_rate": 3.593418259023355e-05, + "loss": 1.5918, + "step": 677 + }, + { + "epoch": 0.11, + "learning_rate": 3.5987261146496815e-05, + "loss": 1.6317, + "step": 678 + }, + { + "epoch": 0.11, + "learning_rate": 3.604033970276008e-05, + "loss": 1.7241, + "step": 679 + }, + { + "epoch": 0.11, + "learning_rate": 3.609341825902335e-05, + "loss": 1.7456, + "step": 680 + }, + { + "epoch": 0.11, + "learning_rate": 3.6146496815286626e-05, + "loss": 1.6568, + "step": 681 + }, + { + "epoch": 0.11, + "learning_rate": 3.6199575371549894e-05, + "loss": 1.6791, + "step": 682 + }, + { + "epoch": 0.11, + "learning_rate": 3.625265392781316e-05, + "loss": 1.6846, + "step": 683 + }, + { + "epoch": 0.11, + "learning_rate": 3.630573248407643e-05, + "loss": 1.6788, + "step": 684 + }, + { + "epoch": 0.11, + "learning_rate": 3.6358811040339705e-05, + "loss": 1.6797, + "step": 685 + }, + { + "epoch": 0.11, + "learning_rate": 3.641188959660297e-05, + "loss": 1.7185, + "step": 686 + }, + { + "epoch": 0.11, + "learning_rate": 3.646496815286624e-05, + "loss": 1.6563, + "step": 687 + }, + { + "epoch": 0.11, + "learning_rate": 3.651804670912951e-05, + "loss": 1.6662, + "step": 688 + }, + { + "epoch": 0.11, + "learning_rate": 3.657112526539278e-05, + "loss": 1.6534, + "step": 689 + }, + { + "epoch": 0.11, + "learning_rate": 3.662420382165605e-05, + "loss": 1.7525, + "step": 690 + }, + { + "epoch": 0.11, + "learning_rate": 3.667728237791932e-05, + "loss": 1.5653, + "step": 691 + }, + { + "epoch": 0.11, + "learning_rate": 3.673036093418259e-05, + "loss": 1.6767, + "step": 692 + }, + { + "epoch": 0.11, + "learning_rate": 3.678343949044586e-05, + "loss": 1.6519, + "step": 693 + }, + { + "epoch": 0.11, + "learning_rate": 3.683651804670913e-05, + "loss": 1.5879, + "step": 694 + }, + { + "epoch": 0.11, + "learning_rate": 3.68895966029724e-05, + "loss": 1.6358, + "step": 695 + }, + { + "epoch": 0.11, + "learning_rate": 3.694267515923567e-05, + "loss": 1.6195, + "step": 696 + }, + { + "epoch": 0.11, + "learning_rate": 3.6995753715498936e-05, + "loss": 1.5995, + "step": 697 + }, + { + "epoch": 0.11, + "learning_rate": 3.7048832271762204e-05, + "loss": 1.6749, + "step": 698 + }, + { + "epoch": 0.11, + "learning_rate": 3.710191082802548e-05, + "loss": 1.6506, + "step": 699 + }, + { + "epoch": 0.11, + "learning_rate": 3.715498938428875e-05, + "loss": 1.6787, + "step": 700 + }, + { + "epoch": 0.11, + "learning_rate": 3.7208067940552015e-05, + "loss": 1.6947, + "step": 701 + }, + { + "epoch": 0.11, + "learning_rate": 3.7261146496815283e-05, + "loss": 1.6927, + "step": 702 + }, + { + "epoch": 0.11, + "learning_rate": 3.731422505307856e-05, + "loss": 1.5945, + "step": 703 + }, + { + "epoch": 0.11, + "learning_rate": 3.7367303609341826e-05, + "loss": 1.6209, + "step": 704 + }, + { + "epoch": 0.11, + "learning_rate": 3.7420382165605095e-05, + "loss": 1.6782, + "step": 705 + }, + { + "epoch": 0.11, + "learning_rate": 3.747346072186836e-05, + "loss": 1.7465, + "step": 706 + }, + { + "epoch": 0.11, + "learning_rate": 3.752653927813164e-05, + "loss": 1.7504, + "step": 707 + }, + { + "epoch": 0.11, + "learning_rate": 3.7579617834394906e-05, + "loss": 1.6585, + "step": 708 + }, + { + "epoch": 0.11, + "learning_rate": 3.763269639065818e-05, + "loss": 1.6572, + "step": 709 + }, + { + "epoch": 0.11, + "learning_rate": 3.768577494692145e-05, + "loss": 1.7366, + "step": 710 + }, + { + "epoch": 0.11, + "learning_rate": 3.773885350318472e-05, + "loss": 1.7074, + "step": 711 + }, + { + "epoch": 0.11, + "learning_rate": 3.7791932059447985e-05, + "loss": 1.6489, + "step": 712 + }, + { + "epoch": 0.11, + "learning_rate": 3.784501061571126e-05, + "loss": 1.6636, + "step": 713 + }, + { + "epoch": 0.11, + "learning_rate": 3.789808917197453e-05, + "loss": 1.6607, + "step": 714 + }, + { + "epoch": 0.11, + "learning_rate": 3.7951167728237796e-05, + "loss": 1.6365, + "step": 715 + }, + { + "epoch": 0.11, + "learning_rate": 3.8004246284501064e-05, + "loss": 1.7146, + "step": 716 + }, + { + "epoch": 0.11, + "learning_rate": 3.805732484076434e-05, + "loss": 1.6575, + "step": 717 + }, + { + "epoch": 0.11, + "learning_rate": 3.811040339702761e-05, + "loss": 1.6855, + "step": 718 + }, + { + "epoch": 0.11, + "learning_rate": 3.8163481953290875e-05, + "loss": 1.7229, + "step": 719 + }, + { + "epoch": 0.11, + "learning_rate": 3.821656050955414e-05, + "loss": 1.7244, + "step": 720 + }, + { + "epoch": 0.11, + "learning_rate": 3.826963906581741e-05, + "loss": 1.7057, + "step": 721 + }, + { + "epoch": 0.12, + "learning_rate": 3.8322717622080686e-05, + "loss": 1.7036, + "step": 722 + }, + { + "epoch": 0.12, + "learning_rate": 3.8375796178343954e-05, + "loss": 1.6633, + "step": 723 + }, + { + "epoch": 0.12, + "learning_rate": 3.842887473460722e-05, + "loss": 1.6632, + "step": 724 + }, + { + "epoch": 0.12, + "learning_rate": 3.848195329087049e-05, + "loss": 1.6657, + "step": 725 + }, + { + "epoch": 0.12, + "learning_rate": 3.8535031847133766e-05, + "loss": 1.5941, + "step": 726 + }, + { + "epoch": 0.12, + "learning_rate": 3.8588110403397034e-05, + "loss": 1.6687, + "step": 727 + }, + { + "epoch": 0.12, + "learning_rate": 3.86411889596603e-05, + "loss": 1.6804, + "step": 728 + }, + { + "epoch": 0.12, + "learning_rate": 3.869426751592357e-05, + "loss": 1.6402, + "step": 729 + }, + { + "epoch": 0.12, + "learning_rate": 3.874734607218684e-05, + "loss": 1.6608, + "step": 730 + }, + { + "epoch": 0.12, + "learning_rate": 3.880042462845011e-05, + "loss": 1.6169, + "step": 731 + }, + { + "epoch": 0.12, + "learning_rate": 3.885350318471338e-05, + "loss": 1.575, + "step": 732 + }, + { + "epoch": 0.12, + "learning_rate": 3.890658174097665e-05, + "loss": 1.6014, + "step": 733 + }, + { + "epoch": 0.12, + "learning_rate": 3.895966029723992e-05, + "loss": 1.6929, + "step": 734 + }, + { + "epoch": 0.12, + "learning_rate": 3.901273885350319e-05, + "loss": 1.7285, + "step": 735 + }, + { + "epoch": 0.12, + "learning_rate": 3.906581740976646e-05, + "loss": 1.6514, + "step": 736 + }, + { + "epoch": 0.12, + "learning_rate": 3.911889596602973e-05, + "loss": 1.6786, + "step": 737 + }, + { + "epoch": 0.12, + "learning_rate": 3.9171974522292996e-05, + "loss": 1.7326, + "step": 738 + }, + { + "epoch": 0.12, + "learning_rate": 3.9225053078556264e-05, + "loss": 1.548, + "step": 739 + }, + { + "epoch": 0.12, + "learning_rate": 3.927813163481954e-05, + "loss": 1.6687, + "step": 740 + }, + { + "epoch": 0.12, + "learning_rate": 3.933121019108281e-05, + "loss": 1.656, + "step": 741 + }, + { + "epoch": 0.12, + "learning_rate": 3.9384288747346076e-05, + "loss": 1.5577, + "step": 742 + }, + { + "epoch": 0.12, + "learning_rate": 3.9437367303609344e-05, + "loss": 1.773, + "step": 743 + }, + { + "epoch": 0.12, + "learning_rate": 3.949044585987262e-05, + "loss": 1.6667, + "step": 744 + }, + { + "epoch": 0.12, + "learning_rate": 3.954352441613589e-05, + "loss": 1.7739, + "step": 745 + }, + { + "epoch": 0.12, + "learning_rate": 3.9596602972399155e-05, + "loss": 1.6042, + "step": 746 + }, + { + "epoch": 0.12, + "learning_rate": 3.964968152866242e-05, + "loss": 1.6756, + "step": 747 + }, + { + "epoch": 0.12, + "learning_rate": 3.970276008492569e-05, + "loss": 1.6155, + "step": 748 + }, + { + "epoch": 0.12, + "learning_rate": 3.9755838641188966e-05, + "loss": 1.5683, + "step": 749 + }, + { + "epoch": 0.12, + "learning_rate": 3.9808917197452234e-05, + "loss": 1.7452, + "step": 750 + }, + { + "epoch": 0.12, + "learning_rate": 3.98619957537155e-05, + "loss": 1.6494, + "step": 751 + }, + { + "epoch": 0.12, + "learning_rate": 3.991507430997877e-05, + "loss": 1.6514, + "step": 752 + }, + { + "epoch": 0.12, + "learning_rate": 3.9968152866242045e-05, + "loss": 1.6452, + "step": 753 + }, + { + "epoch": 0.12, + "learning_rate": 4.002123142250531e-05, + "loss": 1.6043, + "step": 754 + }, + { + "epoch": 0.12, + "learning_rate": 4.007430997876858e-05, + "loss": 1.6459, + "step": 755 + }, + { + "epoch": 0.12, + "learning_rate": 4.012738853503185e-05, + "loss": 1.6508, + "step": 756 + }, + { + "epoch": 0.12, + "learning_rate": 4.018046709129512e-05, + "loss": 1.6801, + "step": 757 + }, + { + "epoch": 0.12, + "learning_rate": 4.023354564755839e-05, + "loss": 1.7403, + "step": 758 + }, + { + "epoch": 0.12, + "learning_rate": 4.028662420382166e-05, + "loss": 1.6588, + "step": 759 + }, + { + "epoch": 0.12, + "learning_rate": 4.033970276008493e-05, + "loss": 1.5769, + "step": 760 + }, + { + "epoch": 0.12, + "learning_rate": 4.03927813163482e-05, + "loss": 1.6742, + "step": 761 + }, + { + "epoch": 0.12, + "learning_rate": 4.044585987261147e-05, + "loss": 1.6325, + "step": 762 + }, + { + "epoch": 0.12, + "learning_rate": 4.049893842887474e-05, + "loss": 1.6096, + "step": 763 + }, + { + "epoch": 0.12, + "learning_rate": 4.055201698513801e-05, + "loss": 1.5629, + "step": 764 + }, + { + "epoch": 0.12, + "learning_rate": 4.0605095541401276e-05, + "loss": 1.64, + "step": 765 + }, + { + "epoch": 0.12, + "learning_rate": 4.065817409766455e-05, + "loss": 1.6503, + "step": 766 + }, + { + "epoch": 0.12, + "learning_rate": 4.071125265392782e-05, + "loss": 1.6496, + "step": 767 + }, + { + "epoch": 0.12, + "learning_rate": 4.076433121019109e-05, + "loss": 1.6312, + "step": 768 + }, + { + "epoch": 0.12, + "learning_rate": 4.0817409766454355e-05, + "loss": 1.5552, + "step": 769 + }, + { + "epoch": 0.12, + "learning_rate": 4.087048832271762e-05, + "loss": 1.6384, + "step": 770 + }, + { + "epoch": 0.12, + "learning_rate": 4.09235668789809e-05, + "loss": 1.5973, + "step": 771 + }, + { + "epoch": 0.12, + "learning_rate": 4.0976645435244166e-05, + "loss": 1.6632, + "step": 772 + }, + { + "epoch": 0.12, + "learning_rate": 4.1029723991507434e-05, + "loss": 1.6343, + "step": 773 + }, + { + "epoch": 0.12, + "learning_rate": 4.10828025477707e-05, + "loss": 1.5579, + "step": 774 + }, + { + "epoch": 0.12, + "learning_rate": 4.113588110403398e-05, + "loss": 1.6177, + "step": 775 + }, + { + "epoch": 0.12, + "learning_rate": 4.1188959660297246e-05, + "loss": 1.6325, + "step": 776 + }, + { + "epoch": 0.12, + "learning_rate": 4.1242038216560514e-05, + "loss": 1.7332, + "step": 777 + }, + { + "epoch": 0.12, + "learning_rate": 4.129511677282378e-05, + "loss": 1.6566, + "step": 778 + }, + { + "epoch": 0.12, + "learning_rate": 4.134819532908705e-05, + "loss": 1.6449, + "step": 779 + }, + { + "epoch": 0.12, + "learning_rate": 4.1401273885350325e-05, + "loss": 1.6508, + "step": 780 + }, + { + "epoch": 0.12, + "learning_rate": 4.145435244161359e-05, + "loss": 1.5946, + "step": 781 + }, + { + "epoch": 0.12, + "learning_rate": 4.150743099787686e-05, + "loss": 1.68, + "step": 782 + }, + { + "epoch": 0.12, + "learning_rate": 4.156050955414013e-05, + "loss": 1.6786, + "step": 783 + }, + { + "epoch": 0.12, + "learning_rate": 4.1613588110403404e-05, + "loss": 1.5788, + "step": 784 + }, + { + "epoch": 0.13, + "learning_rate": 4.166666666666667e-05, + "loss": 1.6602, + "step": 785 + }, + { + "epoch": 0.13, + "learning_rate": 4.171974522292994e-05, + "loss": 1.6504, + "step": 786 + }, + { + "epoch": 0.13, + "learning_rate": 4.177282377919321e-05, + "loss": 1.6854, + "step": 787 + }, + { + "epoch": 0.13, + "learning_rate": 4.1825902335456476e-05, + "loss": 1.6348, + "step": 788 + }, + { + "epoch": 0.13, + "learning_rate": 4.187898089171975e-05, + "loss": 1.6337, + "step": 789 + }, + { + "epoch": 0.13, + "learning_rate": 4.193205944798302e-05, + "loss": 1.6265, + "step": 790 + }, + { + "epoch": 0.13, + "learning_rate": 4.198513800424629e-05, + "loss": 1.6532, + "step": 791 + }, + { + "epoch": 0.13, + "learning_rate": 4.2038216560509556e-05, + "loss": 1.6641, + "step": 792 + }, + { + "epoch": 0.13, + "learning_rate": 4.209129511677283e-05, + "loss": 1.6276, + "step": 793 + }, + { + "epoch": 0.13, + "learning_rate": 4.21443736730361e-05, + "loss": 1.7302, + "step": 794 + }, + { + "epoch": 0.13, + "learning_rate": 4.219745222929937e-05, + "loss": 1.751, + "step": 795 + }, + { + "epoch": 0.13, + "learning_rate": 4.2250530785562635e-05, + "loss": 1.5596, + "step": 796 + }, + { + "epoch": 0.13, + "learning_rate": 4.23036093418259e-05, + "loss": 1.653, + "step": 797 + }, + { + "epoch": 0.13, + "learning_rate": 4.235668789808918e-05, + "loss": 1.6084, + "step": 798 + }, + { + "epoch": 0.13, + "learning_rate": 4.2409766454352446e-05, + "loss": 1.6591, + "step": 799 + }, + { + "epoch": 0.13, + "learning_rate": 4.2462845010615714e-05, + "loss": 1.5998, + "step": 800 + }, + { + "epoch": 0.13, + "learning_rate": 4.251592356687898e-05, + "loss": 1.6234, + "step": 801 + }, + { + "epoch": 0.13, + "learning_rate": 4.256900212314226e-05, + "loss": 1.6553, + "step": 802 + }, + { + "epoch": 0.13, + "learning_rate": 4.2622080679405525e-05, + "loss": 1.6825, + "step": 803 + }, + { + "epoch": 0.13, + "learning_rate": 4.267515923566879e-05, + "loss": 1.6316, + "step": 804 + }, + { + "epoch": 0.13, + "learning_rate": 4.272823779193206e-05, + "loss": 1.67, + "step": 805 + }, + { + "epoch": 0.13, + "learning_rate": 4.278131634819533e-05, + "loss": 1.6949, + "step": 806 + }, + { + "epoch": 0.13, + "learning_rate": 4.2834394904458604e-05, + "loss": 1.6623, + "step": 807 + }, + { + "epoch": 0.13, + "learning_rate": 4.288747346072187e-05, + "loss": 1.6049, + "step": 808 + }, + { + "epoch": 0.13, + "learning_rate": 4.294055201698514e-05, + "loss": 1.6604, + "step": 809 + }, + { + "epoch": 0.13, + "learning_rate": 4.299363057324841e-05, + "loss": 1.6201, + "step": 810 + }, + { + "epoch": 0.13, + "learning_rate": 4.3046709129511684e-05, + "loss": 1.6842, + "step": 811 + }, + { + "epoch": 0.13, + "learning_rate": 4.309978768577495e-05, + "loss": 1.6376, + "step": 812 + }, + { + "epoch": 0.13, + "learning_rate": 4.315286624203822e-05, + "loss": 1.6865, + "step": 813 + }, + { + "epoch": 0.13, + "learning_rate": 4.320594479830149e-05, + "loss": 1.6749, + "step": 814 + }, + { + "epoch": 0.13, + "learning_rate": 4.325902335456476e-05, + "loss": 1.6262, + "step": 815 + }, + { + "epoch": 0.13, + "learning_rate": 4.331210191082803e-05, + "loss": 1.6252, + "step": 816 + }, + { + "epoch": 0.13, + "learning_rate": 4.33651804670913e-05, + "loss": 1.5819, + "step": 817 + }, + { + "epoch": 0.13, + "learning_rate": 4.341825902335457e-05, + "loss": 1.6955, + "step": 818 + }, + { + "epoch": 0.13, + "learning_rate": 4.3471337579617835e-05, + "loss": 1.6526, + "step": 819 + }, + { + "epoch": 0.13, + "learning_rate": 4.352441613588111e-05, + "loss": 1.7002, + "step": 820 + }, + { + "epoch": 0.13, + "learning_rate": 4.357749469214438e-05, + "loss": 1.6948, + "step": 821 + }, + { + "epoch": 0.13, + "learning_rate": 4.3630573248407646e-05, + "loss": 1.639, + "step": 822 + }, + { + "epoch": 0.13, + "learning_rate": 4.3683651804670914e-05, + "loss": 1.5983, + "step": 823 + }, + { + "epoch": 0.13, + "learning_rate": 4.373673036093419e-05, + "loss": 1.6815, + "step": 824 + }, + { + "epoch": 0.13, + "learning_rate": 4.378980891719746e-05, + "loss": 1.5906, + "step": 825 + }, + { + "epoch": 0.13, + "learning_rate": 4.3842887473460726e-05, + "loss": 1.7111, + "step": 826 + }, + { + "epoch": 0.13, + "learning_rate": 4.3895966029723994e-05, + "loss": 1.672, + "step": 827 + }, + { + "epoch": 0.13, + "learning_rate": 4.394904458598726e-05, + "loss": 1.617, + "step": 828 + }, + { + "epoch": 0.13, + "learning_rate": 4.400212314225054e-05, + "loss": 1.5561, + "step": 829 + }, + { + "epoch": 0.13, + "learning_rate": 4.4055201698513805e-05, + "loss": 1.6724, + "step": 830 + }, + { + "epoch": 0.13, + "learning_rate": 4.410828025477707e-05, + "loss": 1.5813, + "step": 831 + }, + { + "epoch": 0.13, + "learning_rate": 4.416135881104034e-05, + "loss": 1.644, + "step": 832 + }, + { + "epoch": 0.13, + "learning_rate": 4.4214437367303616e-05, + "loss": 1.6467, + "step": 833 + }, + { + "epoch": 0.13, + "learning_rate": 4.4267515923566884e-05, + "loss": 1.6423, + "step": 834 + }, + { + "epoch": 0.13, + "learning_rate": 4.432059447983015e-05, + "loss": 1.6905, + "step": 835 + }, + { + "epoch": 0.13, + "learning_rate": 4.437367303609342e-05, + "loss": 1.6641, + "step": 836 + }, + { + "epoch": 0.13, + "learning_rate": 4.442675159235669e-05, + "loss": 1.5294, + "step": 837 + }, + { + "epoch": 0.13, + "learning_rate": 4.447983014861996e-05, + "loss": 1.5904, + "step": 838 + }, + { + "epoch": 0.13, + "learning_rate": 4.453290870488323e-05, + "loss": 1.6389, + "step": 839 + }, + { + "epoch": 0.13, + "learning_rate": 4.45859872611465e-05, + "loss": 1.6144, + "step": 840 + }, + { + "epoch": 0.13, + "learning_rate": 4.463906581740977e-05, + "loss": 1.6291, + "step": 841 + }, + { + "epoch": 0.13, + "learning_rate": 4.469214437367304e-05, + "loss": 1.6914, + "step": 842 + }, + { + "epoch": 0.13, + "learning_rate": 4.474522292993631e-05, + "loss": 1.6888, + "step": 843 + }, + { + "epoch": 0.13, + "learning_rate": 4.479830148619958e-05, + "loss": 1.622, + "step": 844 + }, + { + "epoch": 0.13, + "learning_rate": 4.485138004246285e-05, + "loss": 1.6619, + "step": 845 + }, + { + "epoch": 0.13, + "learning_rate": 4.4904458598726115e-05, + "loss": 1.6371, + "step": 846 + }, + { + "epoch": 0.13, + "learning_rate": 4.495753715498939e-05, + "loss": 1.6328, + "step": 847 + }, + { + "epoch": 0.14, + "learning_rate": 4.501061571125266e-05, + "loss": 1.6428, + "step": 848 + }, + { + "epoch": 0.14, + "learning_rate": 4.5063694267515926e-05, + "loss": 1.7071, + "step": 849 + }, + { + "epoch": 0.14, + "learning_rate": 4.5116772823779194e-05, + "loss": 1.6692, + "step": 850 + }, + { + "epoch": 0.14, + "learning_rate": 4.516985138004247e-05, + "loss": 1.7193, + "step": 851 + }, + { + "epoch": 0.14, + "learning_rate": 4.522292993630574e-05, + "loss": 1.6279, + "step": 852 + }, + { + "epoch": 0.14, + "learning_rate": 4.5276008492569005e-05, + "loss": 1.6452, + "step": 853 + }, + { + "epoch": 0.14, + "learning_rate": 4.532908704883227e-05, + "loss": 1.6885, + "step": 854 + }, + { + "epoch": 0.14, + "learning_rate": 4.538216560509554e-05, + "loss": 1.7035, + "step": 855 + }, + { + "epoch": 0.14, + "learning_rate": 4.5435244161358816e-05, + "loss": 1.5905, + "step": 856 + }, + { + "epoch": 0.14, + "learning_rate": 4.5488322717622084e-05, + "loss": 1.5783, + "step": 857 + }, + { + "epoch": 0.14, + "learning_rate": 4.554140127388535e-05, + "loss": 1.6213, + "step": 858 + }, + { + "epoch": 0.14, + "learning_rate": 4.559447983014862e-05, + "loss": 1.5328, + "step": 859 + }, + { + "epoch": 0.14, + "learning_rate": 4.5647558386411895e-05, + "loss": 1.6983, + "step": 860 + }, + { + "epoch": 0.14, + "learning_rate": 4.5700636942675164e-05, + "loss": 1.6273, + "step": 861 + }, + { + "epoch": 0.14, + "learning_rate": 4.575371549893843e-05, + "loss": 1.6346, + "step": 862 + }, + { + "epoch": 0.14, + "learning_rate": 4.58067940552017e-05, + "loss": 1.6169, + "step": 863 + }, + { + "epoch": 0.14, + "learning_rate": 4.585987261146497e-05, + "loss": 1.676, + "step": 864 + }, + { + "epoch": 0.14, + "learning_rate": 4.591295116772824e-05, + "loss": 1.6648, + "step": 865 + }, + { + "epoch": 0.14, + "learning_rate": 4.596602972399151e-05, + "loss": 1.7119, + "step": 866 + }, + { + "epoch": 0.14, + "learning_rate": 4.601910828025478e-05, + "loss": 1.6602, + "step": 867 + }, + { + "epoch": 0.14, + "learning_rate": 4.607218683651805e-05, + "loss": 1.5983, + "step": 868 + }, + { + "epoch": 0.14, + "learning_rate": 4.612526539278132e-05, + "loss": 1.632, + "step": 869 + }, + { + "epoch": 0.14, + "learning_rate": 4.617834394904459e-05, + "loss": 1.6303, + "step": 870 + }, + { + "epoch": 0.14, + "learning_rate": 4.623142250530786e-05, + "loss": 1.686, + "step": 871 + }, + { + "epoch": 0.14, + "learning_rate": 4.6284501061571126e-05, + "loss": 1.5966, + "step": 872 + }, + { + "epoch": 0.14, + "learning_rate": 4.63375796178344e-05, + "loss": 1.6562, + "step": 873 + }, + { + "epoch": 0.14, + "learning_rate": 4.639065817409767e-05, + "loss": 1.594, + "step": 874 + }, + { + "epoch": 0.14, + "learning_rate": 4.644373673036094e-05, + "loss": 1.4919, + "step": 875 + }, + { + "epoch": 0.14, + "learning_rate": 4.6496815286624206e-05, + "loss": 1.5465, + "step": 876 + }, + { + "epoch": 0.14, + "learning_rate": 4.6549893842887474e-05, + "loss": 1.583, + "step": 877 + }, + { + "epoch": 0.14, + "learning_rate": 4.660297239915075e-05, + "loss": 1.5359, + "step": 878 + }, + { + "epoch": 0.14, + "learning_rate": 4.665605095541402e-05, + "loss": 1.6689, + "step": 879 + }, + { + "epoch": 0.14, + "learning_rate": 4.6709129511677285e-05, + "loss": 1.7151, + "step": 880 + }, + { + "epoch": 0.14, + "learning_rate": 4.676220806794055e-05, + "loss": 1.6798, + "step": 881 + }, + { + "epoch": 0.14, + "learning_rate": 4.681528662420383e-05, + "loss": 1.6214, + "step": 882 + }, + { + "epoch": 0.14, + "learning_rate": 4.6868365180467096e-05, + "loss": 1.5905, + "step": 883 + }, + { + "epoch": 0.14, + "learning_rate": 4.6921443736730364e-05, + "loss": 1.6619, + "step": 884 + }, + { + "epoch": 0.14, + "learning_rate": 4.697452229299363e-05, + "loss": 1.7149, + "step": 885 + }, + { + "epoch": 0.14, + "learning_rate": 4.70276008492569e-05, + "loss": 1.5615, + "step": 886 + }, + { + "epoch": 0.14, + "learning_rate": 4.7080679405520175e-05, + "loss": 1.6493, + "step": 887 + }, + { + "epoch": 0.14, + "learning_rate": 4.713375796178344e-05, + "loss": 1.6701, + "step": 888 + }, + { + "epoch": 0.14, + "learning_rate": 4.718683651804671e-05, + "loss": 1.6115, + "step": 889 + }, + { + "epoch": 0.14, + "learning_rate": 4.723991507430998e-05, + "loss": 1.6462, + "step": 890 + }, + { + "epoch": 0.14, + "learning_rate": 4.7292993630573254e-05, + "loss": 1.6582, + "step": 891 + }, + { + "epoch": 0.14, + "learning_rate": 4.734607218683652e-05, + "loss": 1.6493, + "step": 892 + }, + { + "epoch": 0.14, + "learning_rate": 4.739915074309979e-05, + "loss": 1.6076, + "step": 893 + }, + { + "epoch": 0.14, + "learning_rate": 4.745222929936306e-05, + "loss": 1.6527, + "step": 894 + }, + { + "epoch": 0.14, + "learning_rate": 4.750530785562633e-05, + "loss": 1.617, + "step": 895 + }, + { + "epoch": 0.14, + "learning_rate": 4.75583864118896e-05, + "loss": 1.6289, + "step": 896 + }, + { + "epoch": 0.14, + "learning_rate": 4.761146496815287e-05, + "loss": 1.7097, + "step": 897 + }, + { + "epoch": 0.14, + "learning_rate": 4.766454352441614e-05, + "loss": 1.6998, + "step": 898 + }, + { + "epoch": 0.14, + "learning_rate": 4.7717622080679406e-05, + "loss": 1.7042, + "step": 899 + }, + { + "epoch": 0.14, + "learning_rate": 4.777070063694268e-05, + "loss": 1.5545, + "step": 900 + }, + { + "epoch": 0.14, + "learning_rate": 4.782377919320595e-05, + "loss": 1.6985, + "step": 901 + }, + { + "epoch": 0.14, + "learning_rate": 4.787685774946922e-05, + "loss": 1.5871, + "step": 902 + }, + { + "epoch": 0.14, + "learning_rate": 4.7929936305732485e-05, + "loss": 1.7256, + "step": 903 + }, + { + "epoch": 0.14, + "learning_rate": 4.798301486199575e-05, + "loss": 1.689, + "step": 904 + }, + { + "epoch": 0.14, + "learning_rate": 4.803609341825903e-05, + "loss": 1.6779, + "step": 905 + }, + { + "epoch": 0.14, + "learning_rate": 4.8089171974522296e-05, + "loss": 1.6828, + "step": 906 + }, + { + "epoch": 0.14, + "learning_rate": 4.8142250530785564e-05, + "loss": 1.5648, + "step": 907 + }, + { + "epoch": 0.14, + "learning_rate": 4.819532908704883e-05, + "loss": 1.5995, + "step": 908 + }, + { + "epoch": 0.14, + "learning_rate": 4.824840764331211e-05, + "loss": 1.6543, + "step": 909 + }, + { + "epoch": 0.14, + "learning_rate": 4.8301486199575375e-05, + "loss": 1.6913, + "step": 910 + }, + { + "epoch": 0.15, + "learning_rate": 4.8354564755838644e-05, + "loss": 1.6514, + "step": 911 + }, + { + "epoch": 0.15, + "learning_rate": 4.840764331210191e-05, + "loss": 1.6079, + "step": 912 + }, + { + "epoch": 0.15, + "learning_rate": 4.846072186836518e-05, + "loss": 1.6073, + "step": 913 + }, + { + "epoch": 0.15, + "learning_rate": 4.8513800424628455e-05, + "loss": 1.7448, + "step": 914 + }, + { + "epoch": 0.15, + "learning_rate": 4.856687898089172e-05, + "loss": 1.622, + "step": 915 + }, + { + "epoch": 0.15, + "learning_rate": 4.861995753715499e-05, + "loss": 1.673, + "step": 916 + }, + { + "epoch": 0.15, + "learning_rate": 4.867303609341826e-05, + "loss": 1.5909, + "step": 917 + }, + { + "epoch": 0.15, + "learning_rate": 4.8726114649681534e-05, + "loss": 1.6288, + "step": 918 + }, + { + "epoch": 0.15, + "learning_rate": 4.87791932059448e-05, + "loss": 1.6406, + "step": 919 + }, + { + "epoch": 0.15, + "learning_rate": 4.883227176220807e-05, + "loss": 1.5574, + "step": 920 + }, + { + "epoch": 0.15, + "learning_rate": 4.888535031847134e-05, + "loss": 1.6892, + "step": 921 + }, + { + "epoch": 0.15, + "learning_rate": 4.893842887473461e-05, + "loss": 1.6007, + "step": 922 + }, + { + "epoch": 0.15, + "learning_rate": 4.899150743099788e-05, + "loss": 1.547, + "step": 923 + }, + { + "epoch": 0.15, + "learning_rate": 4.904458598726115e-05, + "loss": 1.5884, + "step": 924 + }, + { + "epoch": 0.15, + "learning_rate": 4.909766454352442e-05, + "loss": 1.721, + "step": 925 + }, + { + "epoch": 0.15, + "learning_rate": 4.9150743099787686e-05, + "loss": 1.6706, + "step": 926 + }, + { + "epoch": 0.15, + "learning_rate": 4.920382165605096e-05, + "loss": 1.6067, + "step": 927 + }, + { + "epoch": 0.15, + "learning_rate": 4.925690021231423e-05, + "loss": 1.6711, + "step": 928 + }, + { + "epoch": 0.15, + "learning_rate": 4.93099787685775e-05, + "loss": 1.682, + "step": 929 + }, + { + "epoch": 0.15, + "learning_rate": 4.9363057324840765e-05, + "loss": 1.7421, + "step": 930 + }, + { + "epoch": 0.15, + "learning_rate": 4.941613588110404e-05, + "loss": 1.6859, + "step": 931 + }, + { + "epoch": 0.15, + "learning_rate": 4.946921443736731e-05, + "loss": 1.5722, + "step": 932 + }, + { + "epoch": 0.15, + "learning_rate": 4.9522292993630576e-05, + "loss": 1.5793, + "step": 933 + }, + { + "epoch": 0.15, + "learning_rate": 4.9575371549893844e-05, + "loss": 1.5855, + "step": 934 + }, + { + "epoch": 0.15, + "learning_rate": 4.962845010615711e-05, + "loss": 1.6373, + "step": 935 + }, + { + "epoch": 0.15, + "learning_rate": 4.968152866242039e-05, + "loss": 1.6299, + "step": 936 + }, + { + "epoch": 0.15, + "learning_rate": 4.9734607218683655e-05, + "loss": 1.6066, + "step": 937 + }, + { + "epoch": 0.15, + "learning_rate": 4.978768577494692e-05, + "loss": 1.6022, + "step": 938 + }, + { + "epoch": 0.15, + "learning_rate": 4.984076433121019e-05, + "loss": 1.5861, + "step": 939 + }, + { + "epoch": 0.15, + "learning_rate": 4.9893842887473466e-05, + "loss": 1.6395, + "step": 940 + }, + { + "epoch": 0.15, + "learning_rate": 4.9946921443736734e-05, + "loss": 1.5928, + "step": 941 + }, + { + "epoch": 0.15, + "learning_rate": 5e-05, + "loss": 1.6496, + "step": 942 + }, + { + "epoch": 0.15, + "learning_rate": 4.999999986688259e-05, + "loss": 1.6897, + "step": 943 + }, + { + "epoch": 0.15, + "learning_rate": 4.999999946753036e-05, + "loss": 1.5608, + "step": 944 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999998801943314e-05, + "loss": 1.7186, + "step": 945 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999997870121453e-05, + "loss": 1.6578, + "step": 946 + }, + { + "epoch": 0.15, + "learning_rate": 4.99999966720648e-05, + "loss": 1.6882, + "step": 947 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999995207773355e-05, + "loss": 1.6426, + "step": 948 + }, + { + "epoch": 0.15, + "learning_rate": 4.999999347724714e-05, + "loss": 1.6533, + "step": 949 + }, + { + "epoch": 0.15, + "learning_rate": 4.999999148048617e-05, + "loss": 1.6673, + "step": 950 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999989217490474e-05, + "loss": 1.6079, + "step": 951 + }, + { + "epoch": 0.15, + "learning_rate": 4.999998668826006e-05, + "loss": 1.6514, + "step": 952 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999983892794975e-05, + "loss": 1.6747, + "step": 953 + }, + { + "epoch": 0.15, + "learning_rate": 4.999998083109524e-05, + "loss": 1.6675, + "step": 954 + }, + { + "epoch": 0.15, + "learning_rate": 4.999997750316089e-05, + "loss": 1.6473, + "step": 955 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999973908991946e-05, + "loss": 1.6065, + "step": 956 + }, + { + "epoch": 0.15, + "learning_rate": 4.999997004858846e-05, + "loss": 1.6709, + "step": 957 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999965921950484e-05, + "loss": 1.6223, + "step": 958 + }, + { + "epoch": 0.15, + "learning_rate": 4.999996152907804e-05, + "loss": 1.6117, + "step": 959 + }, + { + "epoch": 0.15, + "learning_rate": 4.999995686997118e-05, + "loss": 1.6262, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 4.999995194462996e-05, + "loss": 1.6044, + "step": 961 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999946753054426e-05, + "loss": 1.6547, + "step": 962 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999941295244644e-05, + "loss": 1.6825, + "step": 963 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999935571200664e-05, + "loss": 1.6275, + "step": 964 + }, + { + "epoch": 0.15, + "learning_rate": 4.9999929580922546e-05, + "loss": 1.6356, + "step": 965 + }, + { + "epoch": 0.15, + "learning_rate": 4.999992332441036e-05, + "loss": 1.5831, + "step": 966 + }, + { + "epoch": 0.15, + "learning_rate": 4.999991680166416e-05, + "loss": 1.5974, + "step": 967 + }, + { + "epoch": 0.15, + "learning_rate": 4.999991001268402e-05, + "loss": 1.5889, + "step": 968 + }, + { + "epoch": 0.15, + "learning_rate": 4.999990295747003e-05, + "loss": 1.5693, + "step": 969 + }, + { + "epoch": 0.15, + "learning_rate": 4.999989563602224e-05, + "loss": 1.7083, + "step": 970 + }, + { + "epoch": 0.15, + "learning_rate": 4.999988804834075e-05, + "loss": 1.6652, + "step": 971 + }, + { + "epoch": 0.15, + "learning_rate": 4.999988019442563e-05, + "loss": 1.6234, + "step": 972 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999872074276955e-05, + "loss": 1.6539, + "step": 973 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999863687894824e-05, + "loss": 1.6353, + "step": 974 + }, + { + "epoch": 0.16, + "learning_rate": 4.999985503527932e-05, + "loss": 1.7224, + "step": 975 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999846116430534e-05, + "loss": 1.684, + "step": 976 + }, + { + "epoch": 0.16, + "learning_rate": 4.999983693134858e-05, + "loss": 1.5518, + "step": 977 + }, + { + "epoch": 0.16, + "learning_rate": 4.999982748003353e-05, + "loss": 1.6447, + "step": 978 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999817762485495e-05, + "loss": 1.5948, + "step": 979 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999807778704576e-05, + "loss": 1.661, + "step": 980 + }, + { + "epoch": 0.16, + "learning_rate": 4.999979752869089e-05, + "loss": 1.5777, + "step": 981 + }, + { + "epoch": 0.16, + "learning_rate": 4.999978701244453e-05, + "loss": 1.6604, + "step": 982 + }, + { + "epoch": 0.16, + "learning_rate": 4.999977622996562e-05, + "loss": 1.5567, + "step": 983 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999765181254276e-05, + "loss": 1.6162, + "step": 984 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999753866310604e-05, + "loss": 1.6566, + "step": 985 + }, + { + "epoch": 0.16, + "learning_rate": 4.999974228513473e-05, + "loss": 1.6091, + "step": 986 + }, + { + "epoch": 0.16, + "learning_rate": 4.999973043772678e-05, + "loss": 1.6782, + "step": 987 + }, + { + "epoch": 0.16, + "learning_rate": 4.999971832408688e-05, + "loss": 1.5445, + "step": 988 + }, + { + "epoch": 0.16, + "learning_rate": 4.999970594421516e-05, + "loss": 1.6519, + "step": 989 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999693298111744e-05, + "loss": 1.6732, + "step": 990 + }, + { + "epoch": 0.16, + "learning_rate": 4.999968038577677e-05, + "loss": 1.6306, + "step": 991 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999667207210386e-05, + "loss": 1.5674, + "step": 992 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999653762412716e-05, + "loss": 1.6281, + "step": 993 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999640051383916e-05, + "loss": 1.6926, + "step": 994 + }, + { + "epoch": 0.16, + "learning_rate": 4.999962607412413e-05, + "loss": 1.6102, + "step": 995 + }, + { + "epoch": 0.16, + "learning_rate": 4.99996118306335e-05, + "loss": 1.541, + "step": 996 + }, + { + "epoch": 0.16, + "learning_rate": 4.999959732091218e-05, + "loss": 1.6947, + "step": 997 + }, + { + "epoch": 0.16, + "learning_rate": 4.999958254496033e-05, + "loss": 1.6895, + "step": 998 + }, + { + "epoch": 0.16, + "learning_rate": 4.99995675027781e-05, + "loss": 1.6594, + "step": 999 + }, + { + "epoch": 0.16, + "learning_rate": 4.999955219436565e-05, + "loss": 1.7354, + "step": 1000 + }, + { + "epoch": 0.16, + "learning_rate": 4.999953661972315e-05, + "loss": 1.6363, + "step": 1001 + }, + { + "epoch": 0.16, + "learning_rate": 4.999952077885077e-05, + "loss": 1.6459, + "step": 1002 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999504671748666e-05, + "loss": 1.6498, + "step": 1003 + }, + { + "epoch": 0.16, + "learning_rate": 4.999948829841701e-05, + "loss": 1.5847, + "step": 1004 + }, + { + "epoch": 0.16, + "learning_rate": 4.999947165885599e-05, + "loss": 1.7089, + "step": 1005 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999454753065764e-05, + "loss": 1.5749, + "step": 1006 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999437581046525e-05, + "loss": 1.6296, + "step": 1007 + }, + { + "epoch": 0.16, + "learning_rate": 4.999942014279846e-05, + "loss": 1.6629, + "step": 1008 + }, + { + "epoch": 0.16, + "learning_rate": 4.999940243832175e-05, + "loss": 1.5811, + "step": 1009 + }, + { + "epoch": 0.16, + "learning_rate": 4.999938446761657e-05, + "loss": 1.6635, + "step": 1010 + }, + { + "epoch": 0.16, + "learning_rate": 4.999936623068313e-05, + "loss": 1.6341, + "step": 1011 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999347727521616e-05, + "loss": 1.6314, + "step": 1012 + }, + { + "epoch": 0.16, + "learning_rate": 4.999932895813223e-05, + "loss": 1.565, + "step": 1013 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999309922515155e-05, + "loss": 1.5913, + "step": 1014 + }, + { + "epoch": 0.16, + "learning_rate": 4.999929062067062e-05, + "loss": 1.5819, + "step": 1015 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999271052598816e-05, + "loss": 1.6286, + "step": 1016 + }, + { + "epoch": 0.16, + "learning_rate": 4.999925121829995e-05, + "loss": 1.6307, + "step": 1017 + }, + { + "epoch": 0.16, + "learning_rate": 4.999923111777424e-05, + "loss": 1.6262, + "step": 1018 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999210751021886e-05, + "loss": 1.5698, + "step": 1019 + }, + { + "epoch": 0.16, + "learning_rate": 4.999919011804313e-05, + "loss": 1.7123, + "step": 1020 + }, + { + "epoch": 0.16, + "learning_rate": 4.999916921883816e-05, + "loss": 1.6878, + "step": 1021 + }, + { + "epoch": 0.16, + "learning_rate": 4.999914805340722e-05, + "loss": 1.5606, + "step": 1022 + }, + { + "epoch": 0.16, + "learning_rate": 4.999912662175053e-05, + "loss": 1.6193, + "step": 1023 + }, + { + "epoch": 0.16, + "learning_rate": 4.999910492386833e-05, + "loss": 1.5977, + "step": 1024 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999082959760826e-05, + "loss": 1.6263, + "step": 1025 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999060729428275e-05, + "loss": 1.5559, + "step": 1026 + }, + { + "epoch": 0.16, + "learning_rate": 4.9999038232870896e-05, + "loss": 1.5983, + "step": 1027 + }, + { + "epoch": 0.16, + "learning_rate": 4.999901547008895e-05, + "loss": 1.5759, + "step": 1028 + }, + { + "epoch": 0.16, + "learning_rate": 4.999899244108266e-05, + "loss": 1.5514, + "step": 1029 + }, + { + "epoch": 0.16, + "learning_rate": 4.999896914585227e-05, + "loss": 1.6188, + "step": 1030 + }, + { + "epoch": 0.16, + "learning_rate": 4.999894558439805e-05, + "loss": 1.6395, + "step": 1031 + }, + { + "epoch": 0.16, + "learning_rate": 4.999892175672023e-05, + "loss": 1.6279, + "step": 1032 + }, + { + "epoch": 0.16, + "learning_rate": 4.999889766281907e-05, + "loss": 1.6735, + "step": 1033 + }, + { + "epoch": 0.16, + "learning_rate": 4.9998873302694826e-05, + "loss": 1.6564, + "step": 1034 + }, + { + "epoch": 0.16, + "learning_rate": 4.9998848676347756e-05, + "loss": 1.5908, + "step": 1035 + }, + { + "epoch": 0.17, + "learning_rate": 4.999882378377813e-05, + "loss": 1.571, + "step": 1036 + }, + { + "epoch": 0.17, + "learning_rate": 4.99987986249862e-05, + "loss": 1.6665, + "step": 1037 + }, + { + "epoch": 0.17, + "learning_rate": 4.999877319997225e-05, + "loss": 1.599, + "step": 1038 + }, + { + "epoch": 0.17, + "learning_rate": 4.999874750873654e-05, + "loss": 1.6727, + "step": 1039 + }, + { + "epoch": 0.17, + "learning_rate": 4.999872155127935e-05, + "loss": 1.5708, + "step": 1040 + }, + { + "epoch": 0.17, + "learning_rate": 4.999869532760095e-05, + "loss": 1.6338, + "step": 1041 + }, + { + "epoch": 0.17, + "learning_rate": 4.999866883770162e-05, + "loss": 1.6357, + "step": 1042 + }, + { + "epoch": 0.17, + "learning_rate": 4.999864208158164e-05, + "loss": 1.5489, + "step": 1043 + }, + { + "epoch": 0.17, + "learning_rate": 4.99986150592413e-05, + "loss": 1.5578, + "step": 1044 + }, + { + "epoch": 0.17, + "learning_rate": 4.999858777068089e-05, + "loss": 1.6962, + "step": 1045 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998560215900706e-05, + "loss": 1.6064, + "step": 1046 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998532394901024e-05, + "loss": 1.6674, + "step": 1047 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998504307682147e-05, + "loss": 1.5959, + "step": 1048 + }, + { + "epoch": 0.17, + "learning_rate": 4.999847595424438e-05, + "loss": 1.641, + "step": 1049 + }, + { + "epoch": 0.17, + "learning_rate": 4.999844733458802e-05, + "loss": 1.6225, + "step": 1050 + }, + { + "epoch": 0.17, + "learning_rate": 4.999841844871337e-05, + "loss": 1.628, + "step": 1051 + }, + { + "epoch": 0.17, + "learning_rate": 4.999838929662074e-05, + "loss": 1.5731, + "step": 1052 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998359878310445e-05, + "loss": 1.681, + "step": 1053 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998330193782796e-05, + "loss": 1.5954, + "step": 1054 + }, + { + "epoch": 0.17, + "learning_rate": 4.99983002430381e-05, + "loss": 1.5389, + "step": 1055 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998270026076685e-05, + "loss": 1.5232, + "step": 1056 + }, + { + "epoch": 0.17, + "learning_rate": 4.999823954289888e-05, + "loss": 1.6373, + "step": 1057 + }, + { + "epoch": 0.17, + "learning_rate": 4.999820879350499e-05, + "loss": 1.6521, + "step": 1058 + }, + { + "epoch": 0.17, + "learning_rate": 4.999817777789536e-05, + "loss": 1.6187, + "step": 1059 + }, + { + "epoch": 0.17, + "learning_rate": 4.999814649607031e-05, + "loss": 1.5383, + "step": 1060 + }, + { + "epoch": 0.17, + "learning_rate": 4.999811494803017e-05, + "loss": 1.6322, + "step": 1061 + }, + { + "epoch": 0.17, + "learning_rate": 4.99980831337753e-05, + "loss": 1.6159, + "step": 1062 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998051053306003e-05, + "loss": 1.5783, + "step": 1063 + }, + { + "epoch": 0.17, + "learning_rate": 4.9998018706622654e-05, + "loss": 1.6267, + "step": 1064 + }, + { + "epoch": 0.17, + "learning_rate": 4.999798609372557e-05, + "loss": 1.5875, + "step": 1065 + }, + { + "epoch": 0.17, + "learning_rate": 4.999795321461512e-05, + "loss": 1.6695, + "step": 1066 + }, + { + "epoch": 0.17, + "learning_rate": 4.999792006929164e-05, + "loss": 1.621, + "step": 1067 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997886657755486e-05, + "loss": 1.6694, + "step": 1068 + }, + { + "epoch": 0.17, + "learning_rate": 4.999785298000702e-05, + "loss": 1.6315, + "step": 1069 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997819036046595e-05, + "loss": 1.5939, + "step": 1070 + }, + { + "epoch": 0.17, + "learning_rate": 4.999778482587457e-05, + "loss": 1.5834, + "step": 1071 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997750349491324e-05, + "loss": 1.6603, + "step": 1072 + }, + { + "epoch": 0.17, + "learning_rate": 4.99977156068972e-05, + "loss": 1.5668, + "step": 1073 + }, + { + "epoch": 0.17, + "learning_rate": 4.999768059809259e-05, + "loss": 1.6648, + "step": 1074 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997645323077855e-05, + "loss": 1.6295, + "step": 1075 + }, + { + "epoch": 0.17, + "learning_rate": 4.999760978185337e-05, + "loss": 1.6549, + "step": 1076 + }, + { + "epoch": 0.17, + "learning_rate": 4.999757397441952e-05, + "loss": 1.6004, + "step": 1077 + }, + { + "epoch": 0.17, + "learning_rate": 4.999753790077669e-05, + "loss": 1.6048, + "step": 1078 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997501560925255e-05, + "loss": 1.5022, + "step": 1079 + }, + { + "epoch": 0.17, + "learning_rate": 4.99974649548656e-05, + "loss": 1.553, + "step": 1080 + }, + { + "epoch": 0.17, + "learning_rate": 4.999742808259812e-05, + "loss": 1.6843, + "step": 1081 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997390944123214e-05, + "loss": 1.5793, + "step": 1082 + }, + { + "epoch": 0.17, + "learning_rate": 4.999735353944126e-05, + "loss": 1.7546, + "step": 1083 + }, + { + "epoch": 0.17, + "learning_rate": 4.999731586855268e-05, + "loss": 1.6006, + "step": 1084 + }, + { + "epoch": 0.17, + "learning_rate": 4.999727793145785e-05, + "loss": 1.6024, + "step": 1085 + }, + { + "epoch": 0.17, + "learning_rate": 4.999723972815719e-05, + "loss": 1.7511, + "step": 1086 + }, + { + "epoch": 0.17, + "learning_rate": 4.9997201258651105e-05, + "loss": 1.6299, + "step": 1087 + }, + { + "epoch": 0.17, + "learning_rate": 4.999716252294001e-05, + "loss": 1.5952, + "step": 1088 + }, + { + "epoch": 0.17, + "learning_rate": 4.99971235210243e-05, + "loss": 1.5775, + "step": 1089 + }, + { + "epoch": 0.17, + "learning_rate": 4.99970842529044e-05, + "loss": 1.6345, + "step": 1090 + }, + { + "epoch": 0.17, + "learning_rate": 4.999704471858073e-05, + "loss": 1.6713, + "step": 1091 + }, + { + "epoch": 0.17, + "learning_rate": 4.999700491805371e-05, + "loss": 1.5788, + "step": 1092 + }, + { + "epoch": 0.17, + "learning_rate": 4.999696485132377e-05, + "loss": 1.6198, + "step": 1093 + }, + { + "epoch": 0.17, + "learning_rate": 4.999692451839133e-05, + "loss": 1.5681, + "step": 1094 + }, + { + "epoch": 0.17, + "learning_rate": 4.9996883919256806e-05, + "loss": 1.6197, + "step": 1095 + }, + { + "epoch": 0.17, + "learning_rate": 4.999684305392066e-05, + "loss": 1.5995, + "step": 1096 + }, + { + "epoch": 0.17, + "learning_rate": 4.999680192238331e-05, + "loss": 1.6146, + "step": 1097 + }, + { + "epoch": 0.17, + "learning_rate": 4.999676052464519e-05, + "loss": 1.6732, + "step": 1098 + }, + { + "epoch": 0.18, + "learning_rate": 4.999671886070674e-05, + "loss": 1.6541, + "step": 1099 + }, + { + "epoch": 0.18, + "learning_rate": 4.999667693056842e-05, + "loss": 1.6044, + "step": 1100 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996634734230664e-05, + "loss": 1.5225, + "step": 1101 + }, + { + "epoch": 0.18, + "learning_rate": 4.999659227169392e-05, + "loss": 1.6052, + "step": 1102 + }, + { + "epoch": 0.18, + "learning_rate": 4.999654954295865e-05, + "loss": 1.6567, + "step": 1103 + }, + { + "epoch": 0.18, + "learning_rate": 4.99965065480253e-05, + "loss": 1.5832, + "step": 1104 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996463286894335e-05, + "loss": 1.5614, + "step": 1105 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996419759566204e-05, + "loss": 1.6605, + "step": 1106 + }, + { + "epoch": 0.18, + "learning_rate": 4.999637596604139e-05, + "loss": 1.5666, + "step": 1107 + }, + { + "epoch": 0.18, + "learning_rate": 4.999633190632034e-05, + "loss": 1.6734, + "step": 1108 + }, + { + "epoch": 0.18, + "learning_rate": 4.999628758040353e-05, + "loss": 1.5461, + "step": 1109 + }, + { + "epoch": 0.18, + "learning_rate": 4.999624298829143e-05, + "loss": 1.6312, + "step": 1110 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996198129984526e-05, + "loss": 1.6459, + "step": 1111 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996153005483284e-05, + "loss": 1.6297, + "step": 1112 + }, + { + "epoch": 0.18, + "learning_rate": 4.999610761478818e-05, + "loss": 1.6803, + "step": 1113 + }, + { + "epoch": 0.18, + "learning_rate": 4.999606195789972e-05, + "loss": 1.5429, + "step": 1114 + }, + { + "epoch": 0.18, + "learning_rate": 4.9996016034818364e-05, + "loss": 1.5696, + "step": 1115 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995969845544625e-05, + "loss": 1.636, + "step": 1116 + }, + { + "epoch": 0.18, + "learning_rate": 4.999592339007897e-05, + "loss": 1.6106, + "step": 1117 + }, + { + "epoch": 0.18, + "learning_rate": 4.999587666842191e-05, + "loss": 1.5904, + "step": 1118 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995829680573944e-05, + "loss": 1.583, + "step": 1119 + }, + { + "epoch": 0.18, + "learning_rate": 4.999578242653556e-05, + "loss": 1.6345, + "step": 1120 + }, + { + "epoch": 0.18, + "learning_rate": 4.999573490630727e-05, + "loss": 1.5639, + "step": 1121 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995687119889576e-05, + "loss": 1.6425, + "step": 1122 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995639067283e-05, + "loss": 1.5586, + "step": 1123 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995590748488035e-05, + "loss": 1.6422, + "step": 1124 + }, + { + "epoch": 0.18, + "learning_rate": 4.99955421635052e-05, + "loss": 1.5998, + "step": 1125 + }, + { + "epoch": 0.18, + "learning_rate": 4.999549331233503e-05, + "loss": 1.6615, + "step": 1126 + }, + { + "epoch": 0.18, + "learning_rate": 4.999544419497802e-05, + "loss": 1.6211, + "step": 1127 + }, + { + "epoch": 0.18, + "learning_rate": 4.99953948114347e-05, + "loss": 1.6153, + "step": 1128 + }, + { + "epoch": 0.18, + "learning_rate": 4.999534516170561e-05, + "loss": 1.6644, + "step": 1129 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995295245791274e-05, + "loss": 1.5111, + "step": 1130 + }, + { + "epoch": 0.18, + "learning_rate": 4.999524506369221e-05, + "loss": 1.6353, + "step": 1131 + }, + { + "epoch": 0.18, + "learning_rate": 4.999519461540896e-05, + "loss": 1.6166, + "step": 1132 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995143900942064e-05, + "loss": 1.5671, + "step": 1133 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995092920292064e-05, + "loss": 1.6156, + "step": 1134 + }, + { + "epoch": 0.18, + "learning_rate": 4.9995041673459495e-05, + "loss": 1.6298, + "step": 1135 + }, + { + "epoch": 0.18, + "learning_rate": 4.999499016044491e-05, + "loss": 1.5876, + "step": 1136 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994938381248854e-05, + "loss": 1.5248, + "step": 1137 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994886335871874e-05, + "loss": 1.5703, + "step": 1138 + }, + { + "epoch": 0.18, + "learning_rate": 4.999483402431453e-05, + "loss": 1.5588, + "step": 1139 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994781446577385e-05, + "loss": 1.6364, + "step": 1140 + }, + { + "epoch": 0.18, + "learning_rate": 4.999472860266099e-05, + "loss": 1.5515, + "step": 1141 + }, + { + "epoch": 0.18, + "learning_rate": 4.999467549256591e-05, + "loss": 1.5938, + "step": 1142 + }, + { + "epoch": 0.18, + "learning_rate": 4.999462211629271e-05, + "loss": 1.6405, + "step": 1143 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994568473841964e-05, + "loss": 1.5685, + "step": 1144 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994514565214226e-05, + "loss": 1.6342, + "step": 1145 + }, + { + "epoch": 0.18, + "learning_rate": 4.999446039041009e-05, + "loss": 1.6262, + "step": 1146 + }, + { + "epoch": 0.18, + "learning_rate": 4.999440594943012e-05, + "loss": 1.6683, + "step": 1147 + }, + { + "epoch": 0.18, + "learning_rate": 4.999435124227491e-05, + "loss": 1.5899, + "step": 1148 + }, + { + "epoch": 0.18, + "learning_rate": 4.999429626894503e-05, + "loss": 1.626, + "step": 1149 + }, + { + "epoch": 0.18, + "learning_rate": 4.999424102944107e-05, + "loss": 1.6026, + "step": 1150 + }, + { + "epoch": 0.18, + "learning_rate": 4.999418552376362e-05, + "loss": 1.5562, + "step": 1151 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994129751913264e-05, + "loss": 1.6389, + "step": 1152 + }, + { + "epoch": 0.18, + "learning_rate": 4.99940737138906e-05, + "loss": 1.6244, + "step": 1153 + }, + { + "epoch": 0.18, + "learning_rate": 4.9994017409696226e-05, + "loss": 1.6773, + "step": 1154 + }, + { + "epoch": 0.18, + "learning_rate": 4.999396083933075e-05, + "loss": 1.5495, + "step": 1155 + }, + { + "epoch": 0.18, + "learning_rate": 4.999390400279475e-05, + "loss": 1.7222, + "step": 1156 + }, + { + "epoch": 0.18, + "learning_rate": 4.999384690008885e-05, + "loss": 1.6356, + "step": 1157 + }, + { + "epoch": 0.18, + "learning_rate": 4.9993789531213656e-05, + "loss": 1.5318, + "step": 1158 + }, + { + "epoch": 0.18, + "learning_rate": 4.999373189616978e-05, + "loss": 1.5819, + "step": 1159 + }, + { + "epoch": 0.18, + "learning_rate": 4.9993673994957834e-05, + "loss": 1.5838, + "step": 1160 + }, + { + "epoch": 0.18, + "learning_rate": 4.999361582757843e-05, + "loss": 1.6922, + "step": 1161 + }, + { + "epoch": 0.19, + "learning_rate": 4.9993557394032196e-05, + "loss": 1.5868, + "step": 1162 + }, + { + "epoch": 0.19, + "learning_rate": 4.9993498694319754e-05, + "loss": 1.6193, + "step": 1163 + }, + { + "epoch": 0.19, + "learning_rate": 4.9993439728441716e-05, + "loss": 1.6935, + "step": 1164 + }, + { + "epoch": 0.19, + "learning_rate": 4.999338049639872e-05, + "loss": 1.7002, + "step": 1165 + }, + { + "epoch": 0.19, + "learning_rate": 4.9993320998191394e-05, + "loss": 1.6387, + "step": 1166 + }, + { + "epoch": 0.19, + "learning_rate": 4.999326123382038e-05, + "loss": 1.534, + "step": 1167 + }, + { + "epoch": 0.19, + "learning_rate": 4.99932012032863e-05, + "loss": 1.5505, + "step": 1168 + }, + { + "epoch": 0.19, + "learning_rate": 4.999314090658981e-05, + "loss": 1.6548, + "step": 1169 + }, + { + "epoch": 0.19, + "learning_rate": 4.999308034373153e-05, + "loss": 1.6073, + "step": 1170 + }, + { + "epoch": 0.19, + "learning_rate": 4.9993019514712126e-05, + "loss": 1.7097, + "step": 1171 + }, + { + "epoch": 0.19, + "learning_rate": 4.9992958419532247e-05, + "loss": 1.5011, + "step": 1172 + }, + { + "epoch": 0.19, + "learning_rate": 4.9992897058192524e-05, + "loss": 1.601, + "step": 1173 + }, + { + "epoch": 0.19, + "learning_rate": 4.999283543069362e-05, + "loss": 1.6228, + "step": 1174 + }, + { + "epoch": 0.19, + "learning_rate": 4.9992773537036195e-05, + "loss": 1.5894, + "step": 1175 + }, + { + "epoch": 0.19, + "learning_rate": 4.99927113772209e-05, + "loss": 1.5596, + "step": 1176 + }, + { + "epoch": 0.19, + "learning_rate": 4.99926489512484e-05, + "loss": 1.5736, + "step": 1177 + }, + { + "epoch": 0.19, + "learning_rate": 4.999258625911938e-05, + "loss": 1.5897, + "step": 1178 + }, + { + "epoch": 0.19, + "learning_rate": 4.9992523300834474e-05, + "loss": 1.6323, + "step": 1179 + }, + { + "epoch": 0.19, + "learning_rate": 4.999246007639437e-05, + "loss": 1.7217, + "step": 1180 + }, + { + "epoch": 0.19, + "learning_rate": 4.999239658579974e-05, + "loss": 1.4924, + "step": 1181 + }, + { + "epoch": 0.19, + "learning_rate": 4.999233282905127e-05, + "loss": 1.6405, + "step": 1182 + }, + { + "epoch": 0.19, + "learning_rate": 4.999226880614962e-05, + "loss": 1.6633, + "step": 1183 + }, + { + "epoch": 0.19, + "learning_rate": 4.999220451709548e-05, + "loss": 1.5926, + "step": 1184 + }, + { + "epoch": 0.19, + "learning_rate": 4.999213996188953e-05, + "loss": 1.5635, + "step": 1185 + }, + { + "epoch": 0.19, + "learning_rate": 4.9992075140532466e-05, + "loss": 1.5985, + "step": 1186 + }, + { + "epoch": 0.19, + "learning_rate": 4.999201005302499e-05, + "loss": 1.5394, + "step": 1187 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991944699367754e-05, + "loss": 1.5914, + "step": 1188 + }, + { + "epoch": 0.19, + "learning_rate": 4.99918790795615e-05, + "loss": 1.6612, + "step": 1189 + }, + { + "epoch": 0.19, + "learning_rate": 4.99918131936069e-05, + "loss": 1.6192, + "step": 1190 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991747041504655e-05, + "loss": 1.7092, + "step": 1191 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991680623255485e-05, + "loss": 1.6522, + "step": 1192 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991613938860094e-05, + "loss": 1.517, + "step": 1193 + }, + { + "epoch": 0.19, + "learning_rate": 4.999154698831918e-05, + "loss": 1.5238, + "step": 1194 + }, + { + "epoch": 0.19, + "learning_rate": 4.999147977163346e-05, + "loss": 1.6851, + "step": 1195 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991412288803666e-05, + "loss": 1.6009, + "step": 1196 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991344539830486e-05, + "loss": 1.6052, + "step": 1197 + }, + { + "epoch": 0.19, + "learning_rate": 4.999127652471467e-05, + "loss": 1.6008, + "step": 1198 + }, + { + "epoch": 0.19, + "learning_rate": 4.999120824345693e-05, + "loss": 1.5627, + "step": 1199 + }, + { + "epoch": 0.19, + "learning_rate": 4.9991139696058e-05, + "loss": 1.6124, + "step": 1200 + }, + { + "epoch": 0.19, + "learning_rate": 4.99910708825186e-05, + "loss": 1.5857, + "step": 1201 + }, + { + "epoch": 0.19, + "learning_rate": 4.999100180283946e-05, + "loss": 1.6319, + "step": 1202 + }, + { + "epoch": 0.19, + "learning_rate": 4.999093245702133e-05, + "loss": 1.5621, + "step": 1203 + }, + { + "epoch": 0.19, + "learning_rate": 4.999086284506494e-05, + "loss": 1.6334, + "step": 1204 + }, + { + "epoch": 0.19, + "learning_rate": 4.9990792966971045e-05, + "loss": 1.6033, + "step": 1205 + }, + { + "epoch": 0.19, + "learning_rate": 4.999072282274036e-05, + "loss": 1.6982, + "step": 1206 + }, + { + "epoch": 0.19, + "learning_rate": 4.9990652412373665e-05, + "loss": 1.5569, + "step": 1207 + }, + { + "epoch": 0.19, + "learning_rate": 4.999058173587168e-05, + "loss": 1.57, + "step": 1208 + }, + { + "epoch": 0.19, + "learning_rate": 4.999051079323518e-05, + "loss": 1.5979, + "step": 1209 + }, + { + "epoch": 0.19, + "learning_rate": 4.999043958446491e-05, + "loss": 1.6519, + "step": 1210 + }, + { + "epoch": 0.19, + "learning_rate": 4.999036810956163e-05, + "loss": 1.6625, + "step": 1211 + }, + { + "epoch": 0.19, + "learning_rate": 4.99902963685261e-05, + "loss": 1.5818, + "step": 1212 + }, + { + "epoch": 0.19, + "learning_rate": 4.999022436135909e-05, + "loss": 1.5845, + "step": 1213 + }, + { + "epoch": 0.19, + "learning_rate": 4.999015208806136e-05, + "loss": 1.5544, + "step": 1214 + }, + { + "epoch": 0.19, + "learning_rate": 4.9990079548633686e-05, + "loss": 1.5887, + "step": 1215 + }, + { + "epoch": 0.19, + "learning_rate": 4.999000674307683e-05, + "loss": 1.5933, + "step": 1216 + }, + { + "epoch": 0.19, + "learning_rate": 4.9989933671391576e-05, + "loss": 1.5923, + "step": 1217 + }, + { + "epoch": 0.19, + "learning_rate": 4.9989860333578696e-05, + "loss": 1.6532, + "step": 1218 + }, + { + "epoch": 0.19, + "learning_rate": 4.998978672963899e-05, + "loss": 1.5238, + "step": 1219 + }, + { + "epoch": 0.19, + "learning_rate": 4.9989712859573215e-05, + "loss": 1.6016, + "step": 1220 + }, + { + "epoch": 0.19, + "learning_rate": 4.9989638723382174e-05, + "loss": 1.6493, + "step": 1221 + }, + { + "epoch": 0.19, + "learning_rate": 4.998956432106664e-05, + "loss": 1.5693, + "step": 1222 + }, + { + "epoch": 0.19, + "learning_rate": 4.998948965262743e-05, + "loss": 1.612, + "step": 1223 + }, + { + "epoch": 0.19, + "learning_rate": 4.998941471806533e-05, + "loss": 1.6732, + "step": 1224 + }, + { + "epoch": 0.2, + "learning_rate": 4.998933951738112e-05, + "loss": 1.6226, + "step": 1225 + }, + { + "epoch": 0.2, + "learning_rate": 4.9989264050575627e-05, + "loss": 1.5863, + "step": 1226 + }, + { + "epoch": 0.2, + "learning_rate": 4.998918831764964e-05, + "loss": 1.5684, + "step": 1227 + }, + { + "epoch": 0.2, + "learning_rate": 4.998911231860397e-05, + "loss": 1.549, + "step": 1228 + }, + { + "epoch": 0.2, + "learning_rate": 4.998903605343942e-05, + "loss": 1.6421, + "step": 1229 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988959522156814e-05, + "loss": 1.6148, + "step": 1230 + }, + { + "epoch": 0.2, + "learning_rate": 4.998888272475696e-05, + "loss": 1.5608, + "step": 1231 + }, + { + "epoch": 0.2, + "learning_rate": 4.998880566124067e-05, + "loss": 1.6176, + "step": 1232 + }, + { + "epoch": 0.2, + "learning_rate": 4.998872833160878e-05, + "loss": 1.5552, + "step": 1233 + }, + { + "epoch": 0.2, + "learning_rate": 4.99886507358621e-05, + "loss": 1.598, + "step": 1234 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988572874001454e-05, + "loss": 1.5962, + "step": 1235 + }, + { + "epoch": 0.2, + "learning_rate": 4.998849474602768e-05, + "loss": 1.6732, + "step": 1236 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988416351941615e-05, + "loss": 1.5698, + "step": 1237 + }, + { + "epoch": 0.2, + "learning_rate": 4.998833769174408e-05, + "loss": 1.563, + "step": 1238 + }, + { + "epoch": 0.2, + "learning_rate": 4.998825876543593e-05, + "loss": 1.6091, + "step": 1239 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988179573017985e-05, + "loss": 1.5528, + "step": 1240 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988100114491096e-05, + "loss": 1.6399, + "step": 1241 + }, + { + "epoch": 0.2, + "learning_rate": 4.9988020389856114e-05, + "loss": 1.5469, + "step": 1242 + }, + { + "epoch": 0.2, + "learning_rate": 4.9987940399113894e-05, + "loss": 1.6065, + "step": 1243 + }, + { + "epoch": 0.2, + "learning_rate": 4.998786014226527e-05, + "loss": 1.683, + "step": 1244 + }, + { + "epoch": 0.2, + "learning_rate": 4.9987779619311106e-05, + "loss": 1.6682, + "step": 1245 + }, + { + "epoch": 0.2, + "learning_rate": 4.998769883025226e-05, + "loss": 1.5985, + "step": 1246 + }, + { + "epoch": 0.2, + "learning_rate": 4.99876177750896e-05, + "loss": 1.5341, + "step": 1247 + }, + { + "epoch": 0.2, + "learning_rate": 4.998753645382398e-05, + "loss": 1.5715, + "step": 1248 + }, + { + "epoch": 0.2, + "learning_rate": 4.998745486645625e-05, + "loss": 1.601, + "step": 1249 + }, + { + "epoch": 0.2, + "learning_rate": 4.9987373012987314e-05, + "loss": 1.5323, + "step": 1250 + }, + { + "epoch": 0.2, + "learning_rate": 4.998729089341802e-05, + "loss": 1.6123, + "step": 1251 + }, + { + "epoch": 0.2, + "learning_rate": 4.998720850774925e-05, + "loss": 1.6463, + "step": 1252 + }, + { + "epoch": 0.2, + "learning_rate": 4.9987125855981874e-05, + "loss": 1.5015, + "step": 1253 + }, + { + "epoch": 0.2, + "learning_rate": 4.998704293811678e-05, + "loss": 1.6104, + "step": 1254 + }, + { + "epoch": 0.2, + "learning_rate": 4.998695975415485e-05, + "loss": 1.5546, + "step": 1255 + }, + { + "epoch": 0.2, + "learning_rate": 4.9986876304096966e-05, + "loss": 1.6368, + "step": 1256 + }, + { + "epoch": 0.2, + "learning_rate": 4.998679258794402e-05, + "loss": 1.6148, + "step": 1257 + }, + { + "epoch": 0.2, + "learning_rate": 4.9986708605696905e-05, + "loss": 1.5793, + "step": 1258 + }, + { + "epoch": 0.2, + "learning_rate": 4.998662435735652e-05, + "loss": 1.7033, + "step": 1259 + }, + { + "epoch": 0.2, + "learning_rate": 4.998653984292374e-05, + "loss": 1.5479, + "step": 1260 + }, + { + "epoch": 0.2, + "learning_rate": 4.998645506239949e-05, + "loss": 1.542, + "step": 1261 + }, + { + "epoch": 0.2, + "learning_rate": 4.998637001578466e-05, + "loss": 1.6403, + "step": 1262 + }, + { + "epoch": 0.2, + "learning_rate": 4.998628470308017e-05, + "loss": 1.5576, + "step": 1263 + }, + { + "epoch": 0.2, + "learning_rate": 4.99861991242869e-05, + "loss": 1.5767, + "step": 1264 + }, + { + "epoch": 0.2, + "learning_rate": 4.9986113279405786e-05, + "loss": 1.5549, + "step": 1265 + }, + { + "epoch": 0.2, + "learning_rate": 4.998602716843773e-05, + "loss": 1.6682, + "step": 1266 + }, + { + "epoch": 0.2, + "learning_rate": 4.998594079138367e-05, + "loss": 1.6455, + "step": 1267 + }, + { + "epoch": 0.2, + "learning_rate": 4.9985854148244494e-05, + "loss": 1.5145, + "step": 1268 + }, + { + "epoch": 0.2, + "learning_rate": 4.9985767239021146e-05, + "loss": 1.5891, + "step": 1269 + }, + { + "epoch": 0.2, + "learning_rate": 4.998568006371455e-05, + "loss": 1.6187, + "step": 1270 + }, + { + "epoch": 0.2, + "learning_rate": 4.998559262232562e-05, + "loss": 1.6104, + "step": 1271 + }, + { + "epoch": 0.2, + "learning_rate": 4.998550491485531e-05, + "loss": 1.6524, + "step": 1272 + }, + { + "epoch": 0.2, + "learning_rate": 4.998541694130453e-05, + "loss": 1.5435, + "step": 1273 + }, + { + "epoch": 0.2, + "learning_rate": 4.998532870167424e-05, + "loss": 1.6014, + "step": 1274 + }, + { + "epoch": 0.2, + "learning_rate": 4.998524019596536e-05, + "loss": 1.6035, + "step": 1275 + }, + { + "epoch": 0.2, + "learning_rate": 4.998515142417884e-05, + "loss": 1.5596, + "step": 1276 + }, + { + "epoch": 0.2, + "learning_rate": 4.998506238631563e-05, + "loss": 1.6439, + "step": 1277 + }, + { + "epoch": 0.2, + "learning_rate": 4.998497308237668e-05, + "loss": 1.6366, + "step": 1278 + }, + { + "epoch": 0.2, + "learning_rate": 4.998488351236293e-05, + "loss": 1.5285, + "step": 1279 + }, + { + "epoch": 0.2, + "learning_rate": 4.998479367627534e-05, + "loss": 1.6112, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 4.9984703574114863e-05, + "loss": 1.5378, + "step": 1281 + }, + { + "epoch": 0.2, + "learning_rate": 4.998461320588246e-05, + "loss": 1.4846, + "step": 1282 + }, + { + "epoch": 0.2, + "learning_rate": 4.99845225715791e-05, + "loss": 1.5814, + "step": 1283 + }, + { + "epoch": 0.2, + "learning_rate": 4.9984431671205747e-05, + "loss": 1.6221, + "step": 1284 + }, + { + "epoch": 0.2, + "learning_rate": 4.998434050476336e-05, + "loss": 1.5925, + "step": 1285 + }, + { + "epoch": 0.2, + "learning_rate": 4.998424907225291e-05, + "loss": 1.6229, + "step": 1286 + }, + { + "epoch": 0.21, + "learning_rate": 4.998415737367538e-05, + "loss": 1.5622, + "step": 1287 + }, + { + "epoch": 0.21, + "learning_rate": 4.998406540903174e-05, + "loss": 1.5941, + "step": 1288 + }, + { + "epoch": 0.21, + "learning_rate": 4.998397317832298e-05, + "loss": 1.5723, + "step": 1289 + }, + { + "epoch": 0.21, + "learning_rate": 4.9983880681550065e-05, + "loss": 1.5894, + "step": 1290 + }, + { + "epoch": 0.21, + "learning_rate": 4.998378791871399e-05, + "loss": 1.6195, + "step": 1291 + }, + { + "epoch": 0.21, + "learning_rate": 4.9983694889815745e-05, + "loss": 1.5589, + "step": 1292 + }, + { + "epoch": 0.21, + "learning_rate": 4.9983601594856314e-05, + "loss": 1.6174, + "step": 1293 + }, + { + "epoch": 0.21, + "learning_rate": 4.99835080338367e-05, + "loss": 1.5964, + "step": 1294 + }, + { + "epoch": 0.21, + "learning_rate": 4.998341420675788e-05, + "loss": 1.5243, + "step": 1295 + }, + { + "epoch": 0.21, + "learning_rate": 4.998332011362088e-05, + "loss": 1.5378, + "step": 1296 + }, + { + "epoch": 0.21, + "learning_rate": 4.998322575442669e-05, + "loss": 1.627, + "step": 1297 + }, + { + "epoch": 0.21, + "learning_rate": 4.998313112917631e-05, + "loss": 1.5654, + "step": 1298 + }, + { + "epoch": 0.21, + "learning_rate": 4.998303623787074e-05, + "loss": 1.5811, + "step": 1299 + }, + { + "epoch": 0.21, + "learning_rate": 4.9982941080511015e-05, + "loss": 1.5704, + "step": 1300 + }, + { + "epoch": 0.21, + "learning_rate": 4.998284565709813e-05, + "loss": 1.6062, + "step": 1301 + }, + { + "epoch": 0.21, + "learning_rate": 4.9982749967633104e-05, + "loss": 1.6045, + "step": 1302 + }, + { + "epoch": 0.21, + "learning_rate": 4.998265401211696e-05, + "loss": 1.6317, + "step": 1303 + }, + { + "epoch": 0.21, + "learning_rate": 4.9982557790550724e-05, + "loss": 1.5313, + "step": 1304 + }, + { + "epoch": 0.21, + "learning_rate": 4.99824613029354e-05, + "loss": 1.5834, + "step": 1305 + }, + { + "epoch": 0.21, + "learning_rate": 4.9982364549272046e-05, + "loss": 1.5358, + "step": 1306 + }, + { + "epoch": 0.21, + "learning_rate": 4.998226752956167e-05, + "loss": 1.6296, + "step": 1307 + }, + { + "epoch": 0.21, + "learning_rate": 4.998217024380532e-05, + "loss": 1.6665, + "step": 1308 + }, + { + "epoch": 0.21, + "learning_rate": 4.998207269200401e-05, + "loss": 1.6588, + "step": 1309 + }, + { + "epoch": 0.21, + "learning_rate": 4.99819748741588e-05, + "loss": 1.5905, + "step": 1310 + }, + { + "epoch": 0.21, + "learning_rate": 4.998187679027072e-05, + "loss": 1.5906, + "step": 1311 + }, + { + "epoch": 0.21, + "learning_rate": 4.998177844034083e-05, + "loss": 1.6717, + "step": 1312 + }, + { + "epoch": 0.21, + "learning_rate": 4.998167982437015e-05, + "loss": 1.5995, + "step": 1313 + }, + { + "epoch": 0.21, + "learning_rate": 4.9981580942359764e-05, + "loss": 1.5129, + "step": 1314 + }, + { + "epoch": 0.21, + "learning_rate": 4.99814817943107e-05, + "loss": 1.6252, + "step": 1315 + }, + { + "epoch": 0.21, + "learning_rate": 4.9981382380224016e-05, + "loss": 1.634, + "step": 1316 + }, + { + "epoch": 0.21, + "learning_rate": 4.9981282700100784e-05, + "loss": 1.6123, + "step": 1317 + }, + { + "epoch": 0.21, + "learning_rate": 4.9981182753942054e-05, + "loss": 1.6246, + "step": 1318 + }, + { + "epoch": 0.21, + "learning_rate": 4.998108254174889e-05, + "loss": 1.5902, + "step": 1319 + }, + { + "epoch": 0.21, + "learning_rate": 4.9980982063522374e-05, + "loss": 1.5207, + "step": 1320 + }, + { + "epoch": 0.21, + "learning_rate": 4.998088131926356e-05, + "loss": 1.6447, + "step": 1321 + }, + { + "epoch": 0.21, + "learning_rate": 4.998078030897352e-05, + "loss": 1.6123, + "step": 1322 + }, + { + "epoch": 0.21, + "learning_rate": 4.998067903265334e-05, + "loss": 1.6169, + "step": 1323 + }, + { + "epoch": 0.21, + "learning_rate": 4.9980577490304104e-05, + "loss": 1.5057, + "step": 1324 + }, + { + "epoch": 0.21, + "learning_rate": 4.998047568192688e-05, + "loss": 1.6145, + "step": 1325 + }, + { + "epoch": 0.21, + "learning_rate": 4.9980373607522746e-05, + "loss": 1.5117, + "step": 1326 + }, + { + "epoch": 0.21, + "learning_rate": 4.9980271267092806e-05, + "loss": 1.5782, + "step": 1327 + }, + { + "epoch": 0.21, + "learning_rate": 4.998016866063814e-05, + "loss": 1.613, + "step": 1328 + }, + { + "epoch": 0.21, + "learning_rate": 4.9980065788159845e-05, + "loss": 1.5676, + "step": 1329 + }, + { + "epoch": 0.21, + "learning_rate": 4.997996264965902e-05, + "loss": 1.5935, + "step": 1330 + }, + { + "epoch": 0.21, + "learning_rate": 4.997985924513675e-05, + "loss": 1.5859, + "step": 1331 + }, + { + "epoch": 0.21, + "learning_rate": 4.9979755574594153e-05, + "loss": 1.5363, + "step": 1332 + }, + { + "epoch": 0.21, + "learning_rate": 4.997965163803232e-05, + "loss": 1.585, + "step": 1333 + }, + { + "epoch": 0.21, + "learning_rate": 4.997954743545236e-05, + "loss": 1.5596, + "step": 1334 + }, + { + "epoch": 0.21, + "learning_rate": 4.997944296685539e-05, + "loss": 1.5492, + "step": 1335 + }, + { + "epoch": 0.21, + "learning_rate": 4.997933823224252e-05, + "loss": 1.6442, + "step": 1336 + }, + { + "epoch": 0.21, + "learning_rate": 4.997923323161486e-05, + "loss": 1.6706, + "step": 1337 + }, + { + "epoch": 0.21, + "learning_rate": 4.997912796497353e-05, + "loss": 1.6211, + "step": 1338 + }, + { + "epoch": 0.21, + "learning_rate": 4.997902243231966e-05, + "loss": 1.5939, + "step": 1339 + }, + { + "epoch": 0.21, + "learning_rate": 4.997891663365435e-05, + "loss": 1.5975, + "step": 1340 + }, + { + "epoch": 0.21, + "learning_rate": 4.9978810568978754e-05, + "loss": 1.5957, + "step": 1341 + }, + { + "epoch": 0.21, + "learning_rate": 4.997870423829399e-05, + "loss": 1.5249, + "step": 1342 + }, + { + "epoch": 0.21, + "learning_rate": 4.997859764160119e-05, + "loss": 1.6746, + "step": 1343 + }, + { + "epoch": 0.21, + "learning_rate": 4.997849077890149e-05, + "loss": 1.5031, + "step": 1344 + }, + { + "epoch": 0.21, + "learning_rate": 4.9978383650196025e-05, + "loss": 1.6232, + "step": 1345 + }, + { + "epoch": 0.21, + "learning_rate": 4.9978276255485937e-05, + "loss": 1.6124, + "step": 1346 + }, + { + "epoch": 0.21, + "learning_rate": 4.997816859477238e-05, + "loss": 1.6144, + "step": 1347 + }, + { + "epoch": 0.21, + "learning_rate": 4.9978060668056484e-05, + "loss": 1.6711, + "step": 1348 + }, + { + "epoch": 0.21, + "learning_rate": 4.997795247533942e-05, + "loss": 1.5285, + "step": 1349 + }, + { + "epoch": 0.22, + "learning_rate": 4.997784401662231e-05, + "loss": 1.5074, + "step": 1350 + }, + { + "epoch": 0.22, + "learning_rate": 4.9977735291906334e-05, + "loss": 1.605, + "step": 1351 + }, + { + "epoch": 0.22, + "learning_rate": 4.9977626301192636e-05, + "loss": 1.5539, + "step": 1352 + }, + { + "epoch": 0.22, + "learning_rate": 4.997751704448239e-05, + "loss": 1.5838, + "step": 1353 + }, + { + "epoch": 0.22, + "learning_rate": 4.997740752177675e-05, + "loss": 1.6221, + "step": 1354 + }, + { + "epoch": 0.22, + "learning_rate": 4.997729773307688e-05, + "loss": 1.5671, + "step": 1355 + }, + { + "epoch": 0.22, + "learning_rate": 4.9977187678383955e-05, + "loss": 1.6057, + "step": 1356 + }, + { + "epoch": 0.22, + "learning_rate": 4.997707735769915e-05, + "loss": 1.5788, + "step": 1357 + }, + { + "epoch": 0.22, + "learning_rate": 4.9976966771023626e-05, + "loss": 1.6197, + "step": 1358 + }, + { + "epoch": 0.22, + "learning_rate": 4.997685591835858e-05, + "loss": 1.6018, + "step": 1359 + }, + { + "epoch": 0.22, + "learning_rate": 4.997674479970518e-05, + "loss": 1.5736, + "step": 1360 + }, + { + "epoch": 0.22, + "learning_rate": 4.99766334150646e-05, + "loss": 1.543, + "step": 1361 + }, + { + "epoch": 0.22, + "learning_rate": 4.997652176443804e-05, + "loss": 1.5866, + "step": 1362 + }, + { + "epoch": 0.22, + "learning_rate": 4.99764098478267e-05, + "loss": 1.614, + "step": 1363 + }, + { + "epoch": 0.22, + "learning_rate": 4.9976297665231755e-05, + "loss": 1.5349, + "step": 1364 + }, + { + "epoch": 0.22, + "learning_rate": 4.9976185216654396e-05, + "loss": 1.5439, + "step": 1365 + }, + { + "epoch": 0.22, + "learning_rate": 4.9976072502095836e-05, + "loss": 1.6392, + "step": 1366 + }, + { + "epoch": 0.22, + "learning_rate": 4.9975959521557256e-05, + "loss": 1.5394, + "step": 1367 + }, + { + "epoch": 0.22, + "learning_rate": 4.997584627503988e-05, + "loss": 1.6814, + "step": 1368 + }, + { + "epoch": 0.22, + "learning_rate": 4.9975732762544905e-05, + "loss": 1.6371, + "step": 1369 + }, + { + "epoch": 0.22, + "learning_rate": 4.997561898407353e-05, + "loss": 1.5917, + "step": 1370 + }, + { + "epoch": 0.22, + "learning_rate": 4.997550493962699e-05, + "loss": 1.4918, + "step": 1371 + }, + { + "epoch": 0.22, + "learning_rate": 4.9975390629206474e-05, + "loss": 1.5907, + "step": 1372 + }, + { + "epoch": 0.22, + "learning_rate": 4.997527605281321e-05, + "loss": 1.5495, + "step": 1373 + }, + { + "epoch": 0.22, + "learning_rate": 4.997516121044843e-05, + "loss": 1.5909, + "step": 1374 + }, + { + "epoch": 0.22, + "learning_rate": 4.997504610211334e-05, + "loss": 1.5852, + "step": 1375 + }, + { + "epoch": 0.22, + "learning_rate": 4.997493072780917e-05, + "loss": 1.5967, + "step": 1376 + }, + { + "epoch": 0.22, + "learning_rate": 4.997481508753715e-05, + "loss": 1.6089, + "step": 1377 + }, + { + "epoch": 0.22, + "learning_rate": 4.997469918129851e-05, + "loss": 1.4776, + "step": 1378 + }, + { + "epoch": 0.22, + "learning_rate": 4.997458300909449e-05, + "loss": 1.6026, + "step": 1379 + }, + { + "epoch": 0.22, + "learning_rate": 4.997446657092632e-05, + "loss": 1.5414, + "step": 1380 + }, + { + "epoch": 0.22, + "learning_rate": 4.997434986679525e-05, + "loss": 1.624, + "step": 1381 + }, + { + "epoch": 0.22, + "learning_rate": 4.997423289670251e-05, + "loss": 1.607, + "step": 1382 + }, + { + "epoch": 0.22, + "learning_rate": 4.997411566064936e-05, + "loss": 1.5079, + "step": 1383 + }, + { + "epoch": 0.22, + "learning_rate": 4.997399815863703e-05, + "loss": 1.5604, + "step": 1384 + }, + { + "epoch": 0.22, + "learning_rate": 4.9973880390666786e-05, + "loss": 1.5528, + "step": 1385 + }, + { + "epoch": 0.22, + "learning_rate": 4.997376235673988e-05, + "loss": 1.5641, + "step": 1386 + }, + { + "epoch": 0.22, + "learning_rate": 4.997364405685756e-05, + "loss": 1.5646, + "step": 1387 + }, + { + "epoch": 0.22, + "learning_rate": 4.9973525491021103e-05, + "loss": 1.6356, + "step": 1388 + }, + { + "epoch": 0.22, + "learning_rate": 4.997340665923175e-05, + "loss": 1.5415, + "step": 1389 + }, + { + "epoch": 0.22, + "learning_rate": 4.997328756149079e-05, + "loss": 1.5461, + "step": 1390 + }, + { + "epoch": 0.22, + "learning_rate": 4.997316819779947e-05, + "loss": 1.6126, + "step": 1391 + }, + { + "epoch": 0.22, + "learning_rate": 4.9973048568159074e-05, + "loss": 1.6397, + "step": 1392 + }, + { + "epoch": 0.22, + "learning_rate": 4.9972928672570866e-05, + "loss": 1.6278, + "step": 1393 + }, + { + "epoch": 0.22, + "learning_rate": 4.997280851103614e-05, + "loss": 1.6078, + "step": 1394 + }, + { + "epoch": 0.22, + "learning_rate": 4.997268808355616e-05, + "loss": 1.6455, + "step": 1395 + }, + { + "epoch": 0.22, + "learning_rate": 4.997256739013221e-05, + "loss": 1.5928, + "step": 1396 + }, + { + "epoch": 0.22, + "learning_rate": 4.997244643076558e-05, + "loss": 1.6446, + "step": 1397 + }, + { + "epoch": 0.22, + "learning_rate": 4.997232520545756e-05, + "loss": 1.607, + "step": 1398 + }, + { + "epoch": 0.22, + "learning_rate": 4.997220371420943e-05, + "loss": 1.6353, + "step": 1399 + }, + { + "epoch": 0.22, + "learning_rate": 4.9972081957022497e-05, + "loss": 1.5877, + "step": 1400 + }, + { + "epoch": 0.22, + "learning_rate": 4.997195993389805e-05, + "loss": 1.5467, + "step": 1401 + }, + { + "epoch": 0.22, + "learning_rate": 4.99718376448374e-05, + "loss": 1.5352, + "step": 1402 + }, + { + "epoch": 0.22, + "learning_rate": 4.9971715089841826e-05, + "loss": 1.6382, + "step": 1403 + }, + { + "epoch": 0.22, + "learning_rate": 4.997159226891265e-05, + "loss": 1.6328, + "step": 1404 + }, + { + "epoch": 0.22, + "learning_rate": 4.997146918205118e-05, + "loss": 1.6789, + "step": 1405 + }, + { + "epoch": 0.22, + "learning_rate": 4.9971345829258723e-05, + "loss": 1.5754, + "step": 1406 + }, + { + "epoch": 0.22, + "learning_rate": 4.997122221053659e-05, + "loss": 1.6615, + "step": 1407 + }, + { + "epoch": 0.22, + "learning_rate": 4.997109832588611e-05, + "loss": 1.6994, + "step": 1408 + }, + { + "epoch": 0.22, + "learning_rate": 4.9970974175308595e-05, + "loss": 1.5404, + "step": 1409 + }, + { + "epoch": 0.22, + "learning_rate": 4.997084975880535e-05, + "loss": 1.5964, + "step": 1410 + }, + { + "epoch": 0.22, + "learning_rate": 4.997072507637772e-05, + "loss": 1.6341, + "step": 1411 + }, + { + "epoch": 0.22, + "learning_rate": 4.997060012802704e-05, + "loss": 1.6252, + "step": 1412 + }, + { + "epoch": 0.23, + "learning_rate": 4.9970474913754614e-05, + "loss": 1.5648, + "step": 1413 + }, + { + "epoch": 0.23, + "learning_rate": 4.99703494335618e-05, + "loss": 1.6418, + "step": 1414 + }, + { + "epoch": 0.23, + "learning_rate": 4.997022368744992e-05, + "loss": 1.5887, + "step": 1415 + }, + { + "epoch": 0.23, + "learning_rate": 4.997009767542032e-05, + "loss": 1.6233, + "step": 1416 + }, + { + "epoch": 0.23, + "learning_rate": 4.9969971397474336e-05, + "loss": 1.605, + "step": 1417 + }, + { + "epoch": 0.23, + "learning_rate": 4.996984485361332e-05, + "loss": 1.5995, + "step": 1418 + }, + { + "epoch": 0.23, + "learning_rate": 4.996971804383861e-05, + "loss": 1.5545, + "step": 1419 + }, + { + "epoch": 0.23, + "learning_rate": 4.996959096815157e-05, + "loss": 1.5845, + "step": 1420 + }, + { + "epoch": 0.23, + "learning_rate": 4.996946362655354e-05, + "loss": 1.5611, + "step": 1421 + }, + { + "epoch": 0.23, + "learning_rate": 4.996933601904589e-05, + "loss": 1.6341, + "step": 1422 + }, + { + "epoch": 0.23, + "learning_rate": 4.996920814562996e-05, + "loss": 1.6001, + "step": 1423 + }, + { + "epoch": 0.23, + "learning_rate": 4.9969080006307124e-05, + "loss": 1.5878, + "step": 1424 + }, + { + "epoch": 0.23, + "learning_rate": 4.996895160107875e-05, + "loss": 1.6013, + "step": 1425 + }, + { + "epoch": 0.23, + "learning_rate": 4.99688229299462e-05, + "loss": 1.5466, + "step": 1426 + }, + { + "epoch": 0.23, + "learning_rate": 4.996869399291084e-05, + "loss": 1.6014, + "step": 1427 + }, + { + "epoch": 0.23, + "learning_rate": 4.9968564789974046e-05, + "loss": 1.6418, + "step": 1428 + }, + { + "epoch": 0.23, + "learning_rate": 4.9968435321137204e-05, + "loss": 1.6236, + "step": 1429 + }, + { + "epoch": 0.23, + "learning_rate": 4.996830558640168e-05, + "loss": 1.6153, + "step": 1430 + }, + { + "epoch": 0.23, + "learning_rate": 4.996817558576886e-05, + "loss": 1.4568, + "step": 1431 + }, + { + "epoch": 0.23, + "learning_rate": 4.996804531924013e-05, + "loss": 1.5487, + "step": 1432 + }, + { + "epoch": 0.23, + "learning_rate": 4.996791478681688e-05, + "loss": 1.663, + "step": 1433 + }, + { + "epoch": 0.23, + "learning_rate": 4.996778398850049e-05, + "loss": 1.6363, + "step": 1434 + }, + { + "epoch": 0.23, + "learning_rate": 4.9967652924292355e-05, + "loss": 1.5676, + "step": 1435 + }, + { + "epoch": 0.23, + "learning_rate": 4.996752159419389e-05, + "loss": 1.5738, + "step": 1436 + }, + { + "epoch": 0.23, + "learning_rate": 4.9967389998206457e-05, + "loss": 1.5297, + "step": 1437 + }, + { + "epoch": 0.23, + "learning_rate": 4.9967258136331495e-05, + "loss": 1.5625, + "step": 1438 + }, + { + "epoch": 0.23, + "learning_rate": 4.9967126008570384e-05, + "loss": 1.6757, + "step": 1439 + }, + { + "epoch": 0.23, + "learning_rate": 4.996699361492454e-05, + "loss": 1.5897, + "step": 1440 + }, + { + "epoch": 0.23, + "learning_rate": 4.996686095539538e-05, + "loss": 1.5471, + "step": 1441 + }, + { + "epoch": 0.23, + "learning_rate": 4.99667280299843e-05, + "loss": 1.5899, + "step": 1442 + }, + { + "epoch": 0.23, + "learning_rate": 4.996659483869273e-05, + "loss": 1.5831, + "step": 1443 + }, + { + "epoch": 0.23, + "learning_rate": 4.996646138152207e-05, + "loss": 1.59, + "step": 1444 + }, + { + "epoch": 0.23, + "learning_rate": 4.996632765847377e-05, + "loss": 1.5871, + "step": 1445 + }, + { + "epoch": 0.23, + "learning_rate": 4.9966193669549225e-05, + "loss": 1.5835, + "step": 1446 + }, + { + "epoch": 0.23, + "learning_rate": 4.9966059414749876e-05, + "loss": 1.59, + "step": 1447 + }, + { + "epoch": 0.23, + "learning_rate": 4.996592489407715e-05, + "loss": 1.5496, + "step": 1448 + }, + { + "epoch": 0.23, + "learning_rate": 4.9965790107532494e-05, + "loss": 1.5596, + "step": 1449 + }, + { + "epoch": 0.23, + "learning_rate": 4.996565505511732e-05, + "loss": 1.5891, + "step": 1450 + }, + { + "epoch": 0.23, + "learning_rate": 4.996551973683308e-05, + "loss": 1.6479, + "step": 1451 + }, + { + "epoch": 0.23, + "learning_rate": 4.9965384152681216e-05, + "loss": 1.4931, + "step": 1452 + }, + { + "epoch": 0.23, + "learning_rate": 4.996524830266316e-05, + "loss": 1.587, + "step": 1453 + }, + { + "epoch": 0.23, + "learning_rate": 4.9965112186780374e-05, + "loss": 1.5587, + "step": 1454 + }, + { + "epoch": 0.23, + "learning_rate": 4.996497580503429e-05, + "loss": 1.5409, + "step": 1455 + }, + { + "epoch": 0.23, + "learning_rate": 4.996483915742637e-05, + "loss": 1.5674, + "step": 1456 + }, + { + "epoch": 0.23, + "learning_rate": 4.9964702243958084e-05, + "loss": 1.6137, + "step": 1457 + }, + { + "epoch": 0.23, + "learning_rate": 4.996456506463086e-05, + "loss": 1.5948, + "step": 1458 + }, + { + "epoch": 0.23, + "learning_rate": 4.9964427619446185e-05, + "loss": 1.6226, + "step": 1459 + }, + { + "epoch": 0.23, + "learning_rate": 4.99642899084055e-05, + "loss": 1.6522, + "step": 1460 + }, + { + "epoch": 0.23, + "learning_rate": 4.996415193151029e-05, + "loss": 1.5382, + "step": 1461 + }, + { + "epoch": 0.23, + "learning_rate": 4.996401368876202e-05, + "loss": 1.5297, + "step": 1462 + }, + { + "epoch": 0.23, + "learning_rate": 4.996387518016217e-05, + "loss": 1.6098, + "step": 1463 + }, + { + "epoch": 0.23, + "learning_rate": 4.996373640571219e-05, + "loss": 1.5696, + "step": 1464 + }, + { + "epoch": 0.23, + "learning_rate": 4.996359736541359e-05, + "loss": 1.6559, + "step": 1465 + }, + { + "epoch": 0.23, + "learning_rate": 4.996345805926781e-05, + "loss": 1.6141, + "step": 1466 + }, + { + "epoch": 0.23, + "learning_rate": 4.996331848727637e-05, + "loss": 1.52, + "step": 1467 + }, + { + "epoch": 0.23, + "learning_rate": 4.996317864944074e-05, + "loss": 1.5948, + "step": 1468 + }, + { + "epoch": 0.23, + "learning_rate": 4.9963038545762416e-05, + "loss": 1.5472, + "step": 1469 + }, + { + "epoch": 0.23, + "learning_rate": 4.996289817624289e-05, + "loss": 1.5787, + "step": 1470 + }, + { + "epoch": 0.23, + "learning_rate": 4.9962757540883654e-05, + "loss": 1.6151, + "step": 1471 + }, + { + "epoch": 0.23, + "learning_rate": 4.99626166396862e-05, + "loss": 1.5837, + "step": 1472 + }, + { + "epoch": 0.23, + "learning_rate": 4.9962475472652035e-05, + "loss": 1.5537, + "step": 1473 + }, + { + "epoch": 0.23, + "learning_rate": 4.9962334039782665e-05, + "loss": 1.4961, + "step": 1474 + }, + { + "epoch": 0.23, + "learning_rate": 4.996219234107959e-05, + "loss": 1.5734, + "step": 1475 + }, + { + "epoch": 0.24, + "learning_rate": 4.996205037654432e-05, + "loss": 1.6559, + "step": 1476 + }, + { + "epoch": 0.24, + "learning_rate": 4.996190814617837e-05, + "loss": 1.6164, + "step": 1477 + }, + { + "epoch": 0.24, + "learning_rate": 4.996176564998325e-05, + "loss": 1.4802, + "step": 1478 + }, + { + "epoch": 0.24, + "learning_rate": 4.996162288796049e-05, + "loss": 1.6826, + "step": 1479 + }, + { + "epoch": 0.24, + "learning_rate": 4.996147986011159e-05, + "loss": 1.5429, + "step": 1480 + }, + { + "epoch": 0.24, + "learning_rate": 4.996133656643809e-05, + "loss": 1.6439, + "step": 1481 + }, + { + "epoch": 0.24, + "learning_rate": 4.996119300694151e-05, + "loss": 1.6654, + "step": 1482 + }, + { + "epoch": 0.24, + "learning_rate": 4.996104918162338e-05, + "loss": 1.5422, + "step": 1483 + }, + { + "epoch": 0.24, + "learning_rate": 4.996090509048522e-05, + "loss": 1.6221, + "step": 1484 + }, + { + "epoch": 0.24, + "learning_rate": 4.9960760733528576e-05, + "loss": 1.5944, + "step": 1485 + }, + { + "epoch": 0.24, + "learning_rate": 4.9960616110755e-05, + "loss": 1.6133, + "step": 1486 + }, + { + "epoch": 0.24, + "learning_rate": 4.9960471222166006e-05, + "loss": 1.5431, + "step": 1487 + }, + { + "epoch": 0.24, + "learning_rate": 4.996032606776314e-05, + "loss": 1.6096, + "step": 1488 + }, + { + "epoch": 0.24, + "learning_rate": 4.996018064754797e-05, + "loss": 1.6349, + "step": 1489 + }, + { + "epoch": 0.24, + "learning_rate": 4.9960034961522016e-05, + "loss": 1.5386, + "step": 1490 + }, + { + "epoch": 0.24, + "learning_rate": 4.995988900968686e-05, + "loss": 1.606, + "step": 1491 + }, + { + "epoch": 0.24, + "learning_rate": 4.9959742792044026e-05, + "loss": 1.5594, + "step": 1492 + }, + { + "epoch": 0.24, + "learning_rate": 4.995959630859509e-05, + "loss": 1.5453, + "step": 1493 + }, + { + "epoch": 0.24, + "learning_rate": 4.9959449559341604e-05, + "loss": 1.598, + "step": 1494 + }, + { + "epoch": 0.24, + "learning_rate": 4.995930254428514e-05, + "loss": 1.537, + "step": 1495 + }, + { + "epoch": 0.24, + "learning_rate": 4.995915526342725e-05, + "loss": 1.5215, + "step": 1496 + }, + { + "epoch": 0.24, + "learning_rate": 4.9959007716769505e-05, + "loss": 1.5409, + "step": 1497 + }, + { + "epoch": 0.24, + "learning_rate": 4.995885990431349e-05, + "loss": 1.5933, + "step": 1498 + }, + { + "epoch": 0.24, + "learning_rate": 4.995871182606077e-05, + "loss": 1.5762, + "step": 1499 + }, + { + "epoch": 0.24, + "learning_rate": 4.995856348201291e-05, + "loss": 1.5785, + "step": 1500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9958414872171514e-05, + "loss": 1.5821, + "step": 1501 + }, + { + "epoch": 0.24, + "learning_rate": 4.995826599653814e-05, + "loss": 1.6081, + "step": 1502 + }, + { + "epoch": 0.24, + "learning_rate": 4.99581168551144e-05, + "loss": 1.5752, + "step": 1503 + }, + { + "epoch": 0.24, + "learning_rate": 4.995796744790186e-05, + "loss": 1.5794, + "step": 1504 + }, + { + "epoch": 0.24, + "learning_rate": 4.9957817774902126e-05, + "loss": 1.5044, + "step": 1505 + }, + { + "epoch": 0.24, + "learning_rate": 4.995766783611678e-05, + "loss": 1.5604, + "step": 1506 + }, + { + "epoch": 0.24, + "learning_rate": 4.9957517631547424e-05, + "loss": 1.5973, + "step": 1507 + }, + { + "epoch": 0.24, + "learning_rate": 4.9957367161195656e-05, + "loss": 1.4991, + "step": 1508 + }, + { + "epoch": 0.24, + "learning_rate": 4.995721642506308e-05, + "loss": 1.545, + "step": 1509 + }, + { + "epoch": 0.24, + "learning_rate": 4.995706542315131e-05, + "loss": 1.5324, + "step": 1510 + }, + { + "epoch": 0.24, + "learning_rate": 4.9956914155461935e-05, + "loss": 1.583, + "step": 1511 + }, + { + "epoch": 0.24, + "learning_rate": 4.995676262199658e-05, + "loss": 1.5482, + "step": 1512 + }, + { + "epoch": 0.24, + "learning_rate": 4.9956610822756843e-05, + "loss": 1.5441, + "step": 1513 + }, + { + "epoch": 0.24, + "learning_rate": 4.9956458757744365e-05, + "loss": 1.6172, + "step": 1514 + }, + { + "epoch": 0.24, + "learning_rate": 4.995630642696075e-05, + "loss": 1.6468, + "step": 1515 + }, + { + "epoch": 0.24, + "learning_rate": 4.995615383040763e-05, + "loss": 1.6056, + "step": 1516 + }, + { + "epoch": 0.24, + "learning_rate": 4.995600096808661e-05, + "loss": 1.6096, + "step": 1517 + }, + { + "epoch": 0.24, + "learning_rate": 4.995584783999934e-05, + "loss": 1.5591, + "step": 1518 + }, + { + "epoch": 0.24, + "learning_rate": 4.9955694446147436e-05, + "loss": 1.6283, + "step": 1519 + }, + { + "epoch": 0.24, + "learning_rate": 4.995554078653254e-05, + "loss": 1.5679, + "step": 1520 + }, + { + "epoch": 0.24, + "learning_rate": 4.995538686115628e-05, + "loss": 1.5956, + "step": 1521 + }, + { + "epoch": 0.24, + "learning_rate": 4.99552326700203e-05, + "loss": 1.5847, + "step": 1522 + }, + { + "epoch": 0.24, + "learning_rate": 4.995507821312625e-05, + "loss": 1.5905, + "step": 1523 + }, + { + "epoch": 0.24, + "learning_rate": 4.995492349047577e-05, + "loss": 1.5215, + "step": 1524 + }, + { + "epoch": 0.24, + "learning_rate": 4.9954768502070496e-05, + "loss": 1.6338, + "step": 1525 + }, + { + "epoch": 0.24, + "learning_rate": 4.99546132479121e-05, + "loss": 1.5194, + "step": 1526 + }, + { + "epoch": 0.24, + "learning_rate": 4.995445772800221e-05, + "loss": 1.5093, + "step": 1527 + }, + { + "epoch": 0.24, + "learning_rate": 4.9954301942342504e-05, + "loss": 1.62, + "step": 1528 + }, + { + "epoch": 0.24, + "learning_rate": 4.995414589093463e-05, + "loss": 1.5658, + "step": 1529 + }, + { + "epoch": 0.24, + "learning_rate": 4.995398957378025e-05, + "loss": 1.68, + "step": 1530 + }, + { + "epoch": 0.24, + "learning_rate": 4.995383299088103e-05, + "loss": 1.6146, + "step": 1531 + }, + { + "epoch": 0.24, + "learning_rate": 4.995367614223865e-05, + "loss": 1.6113, + "step": 1532 + }, + { + "epoch": 0.24, + "learning_rate": 4.9953519027854754e-05, + "loss": 1.5803, + "step": 1533 + }, + { + "epoch": 0.24, + "learning_rate": 4.9953361647731037e-05, + "loss": 1.5521, + "step": 1534 + }, + { + "epoch": 0.24, + "learning_rate": 4.995320400186917e-05, + "loss": 1.5905, + "step": 1535 + }, + { + "epoch": 0.24, + "learning_rate": 4.995304609027083e-05, + "loss": 1.5903, + "step": 1536 + }, + { + "epoch": 0.24, + "learning_rate": 4.995288791293769e-05, + "loss": 1.6543, + "step": 1537 + }, + { + "epoch": 0.25, + "learning_rate": 4.995272946987145e-05, + "loss": 1.6263, + "step": 1538 + }, + { + "epoch": 0.25, + "learning_rate": 4.995257076107379e-05, + "loss": 1.5687, + "step": 1539 + }, + { + "epoch": 0.25, + "learning_rate": 4.9952411786546395e-05, + "loss": 1.567, + "step": 1540 + }, + { + "epoch": 0.25, + "learning_rate": 4.995225254629097e-05, + "loss": 1.5735, + "step": 1541 + }, + { + "epoch": 0.25, + "learning_rate": 4.99520930403092e-05, + "loss": 1.5993, + "step": 1542 + }, + { + "epoch": 0.25, + "learning_rate": 4.995193326860279e-05, + "loss": 1.4458, + "step": 1543 + }, + { + "epoch": 0.25, + "learning_rate": 4.995177323117344e-05, + "loss": 1.5634, + "step": 1544 + }, + { + "epoch": 0.25, + "learning_rate": 4.9951612928022854e-05, + "loss": 1.6608, + "step": 1545 + }, + { + "epoch": 0.25, + "learning_rate": 4.995145235915274e-05, + "loss": 1.6298, + "step": 1546 + }, + { + "epoch": 0.25, + "learning_rate": 4.99512915245648e-05, + "loss": 1.6441, + "step": 1547 + }, + { + "epoch": 0.25, + "learning_rate": 4.9951130424260764e-05, + "loss": 1.541, + "step": 1548 + }, + { + "epoch": 0.25, + "learning_rate": 4.995096905824233e-05, + "loss": 1.5465, + "step": 1549 + }, + { + "epoch": 0.25, + "learning_rate": 4.9950807426511225e-05, + "loss": 1.5513, + "step": 1550 + }, + { + "epoch": 0.25, + "learning_rate": 4.9950645529069165e-05, + "loss": 1.531, + "step": 1551 + }, + { + "epoch": 0.25, + "learning_rate": 4.995048336591789e-05, + "loss": 1.6093, + "step": 1552 + }, + { + "epoch": 0.25, + "learning_rate": 4.99503209370591e-05, + "loss": 1.6099, + "step": 1553 + }, + { + "epoch": 0.25, + "learning_rate": 4.995015824249455e-05, + "loss": 1.628, + "step": 1554 + }, + { + "epoch": 0.25, + "learning_rate": 4.994999528222596e-05, + "loss": 1.5411, + "step": 1555 + }, + { + "epoch": 0.25, + "learning_rate": 4.9949832056255065e-05, + "loss": 1.5661, + "step": 1556 + }, + { + "epoch": 0.25, + "learning_rate": 4.994966856458361e-05, + "loss": 1.5127, + "step": 1557 + }, + { + "epoch": 0.25, + "learning_rate": 4.994950480721333e-05, + "loss": 1.5995, + "step": 1558 + }, + { + "epoch": 0.25, + "learning_rate": 4.994934078414597e-05, + "loss": 1.5666, + "step": 1559 + }, + { + "epoch": 0.25, + "learning_rate": 4.9949176495383276e-05, + "loss": 1.5375, + "step": 1560 + }, + { + "epoch": 0.25, + "learning_rate": 4.9949011940927006e-05, + "loss": 1.6444, + "step": 1561 + }, + { + "epoch": 0.25, + "learning_rate": 4.9948847120778896e-05, + "loss": 1.6275, + "step": 1562 + }, + { + "epoch": 0.25, + "learning_rate": 4.994868203494072e-05, + "loss": 1.6109, + "step": 1563 + }, + { + "epoch": 0.25, + "learning_rate": 4.9948516683414224e-05, + "loss": 1.5342, + "step": 1564 + }, + { + "epoch": 0.25, + "learning_rate": 4.994835106620117e-05, + "loss": 1.5827, + "step": 1565 + }, + { + "epoch": 0.25, + "learning_rate": 4.994818518330333e-05, + "loss": 1.5873, + "step": 1566 + }, + { + "epoch": 0.25, + "learning_rate": 4.994801903472246e-05, + "loss": 1.5908, + "step": 1567 + }, + { + "epoch": 0.25, + "learning_rate": 4.9947852620460326e-05, + "loss": 1.5568, + "step": 1568 + }, + { + "epoch": 0.25, + "learning_rate": 4.9947685940518715e-05, + "loss": 1.5424, + "step": 1569 + }, + { + "epoch": 0.25, + "learning_rate": 4.99475189948994e-05, + "loss": 1.5654, + "step": 1570 + }, + { + "epoch": 0.25, + "learning_rate": 4.994735178360415e-05, + "loss": 1.5563, + "step": 1571 + }, + { + "epoch": 0.25, + "learning_rate": 4.994718430663474e-05, + "loss": 1.5737, + "step": 1572 + }, + { + "epoch": 0.25, + "learning_rate": 4.9947016563992976e-05, + "loss": 1.5917, + "step": 1573 + }, + { + "epoch": 0.25, + "learning_rate": 4.994684855568062e-05, + "loss": 1.6063, + "step": 1574 + }, + { + "epoch": 0.25, + "learning_rate": 4.994668028169949e-05, + "loss": 1.5323, + "step": 1575 + }, + { + "epoch": 0.25, + "learning_rate": 4.994651174205134e-05, + "loss": 1.6348, + "step": 1576 + }, + { + "epoch": 0.25, + "learning_rate": 4.9946342936738e-05, + "loss": 1.6397, + "step": 1577 + }, + { + "epoch": 0.25, + "learning_rate": 4.9946173865761245e-05, + "loss": 1.5384, + "step": 1578 + }, + { + "epoch": 0.25, + "learning_rate": 4.9946004529122885e-05, + "loss": 1.506, + "step": 1579 + }, + { + "epoch": 0.25, + "learning_rate": 4.994583492682472e-05, + "loss": 1.6094, + "step": 1580 + }, + { + "epoch": 0.25, + "learning_rate": 4.994566505886856e-05, + "loss": 1.5542, + "step": 1581 + }, + { + "epoch": 0.25, + "learning_rate": 4.994549492525622e-05, + "loss": 1.5944, + "step": 1582 + }, + { + "epoch": 0.25, + "learning_rate": 4.994532452598949e-05, + "loss": 1.5473, + "step": 1583 + }, + { + "epoch": 0.25, + "learning_rate": 4.994515386107021e-05, + "loss": 1.5554, + "step": 1584 + }, + { + "epoch": 0.25, + "learning_rate": 4.9944982930500185e-05, + "loss": 1.5718, + "step": 1585 + }, + { + "epoch": 0.25, + "learning_rate": 4.994481173428123e-05, + "loss": 1.5928, + "step": 1586 + }, + { + "epoch": 0.25, + "learning_rate": 4.994464027241518e-05, + "loss": 1.6273, + "step": 1587 + }, + { + "epoch": 0.25, + "learning_rate": 4.9944468544903846e-05, + "loss": 1.5383, + "step": 1588 + }, + { + "epoch": 0.25, + "learning_rate": 4.994429655174907e-05, + "loss": 1.5892, + "step": 1589 + }, + { + "epoch": 0.25, + "learning_rate": 4.994412429295269e-05, + "loss": 1.5277, + "step": 1590 + }, + { + "epoch": 0.25, + "learning_rate": 4.994395176851652e-05, + "loss": 1.6086, + "step": 1591 + }, + { + "epoch": 0.25, + "learning_rate": 4.9943778978442405e-05, + "loss": 1.5081, + "step": 1592 + }, + { + "epoch": 0.25, + "learning_rate": 4.9943605922732195e-05, + "loss": 1.5789, + "step": 1593 + }, + { + "epoch": 0.25, + "learning_rate": 4.9943432601387715e-05, + "loss": 1.5667, + "step": 1594 + }, + { + "epoch": 0.25, + "learning_rate": 4.994325901441084e-05, + "loss": 1.5078, + "step": 1595 + }, + { + "epoch": 0.25, + "learning_rate": 4.994308516180338e-05, + "loss": 1.6467, + "step": 1596 + }, + { + "epoch": 0.25, + "learning_rate": 4.994291104356722e-05, + "loss": 1.6152, + "step": 1597 + }, + { + "epoch": 0.25, + "learning_rate": 4.9942736659704195e-05, + "loss": 1.5965, + "step": 1598 + }, + { + "epoch": 0.25, + "learning_rate": 4.994256201021617e-05, + "loss": 1.5866, + "step": 1599 + }, + { + "epoch": 0.25, + "learning_rate": 4.9942387095105004e-05, + "loss": 1.626, + "step": 1600 + }, + { + "epoch": 0.26, + "learning_rate": 4.9942211914372556e-05, + "loss": 1.5536, + "step": 1601 + }, + { + "epoch": 0.26, + "learning_rate": 4.994203646802069e-05, + "loss": 1.6201, + "step": 1602 + }, + { + "epoch": 0.26, + "learning_rate": 4.994186075605128e-05, + "loss": 1.5251, + "step": 1603 + }, + { + "epoch": 0.26, + "learning_rate": 4.9941684778466206e-05, + "loss": 1.499, + "step": 1604 + }, + { + "epoch": 0.26, + "learning_rate": 4.994150853526732e-05, + "loss": 1.6022, + "step": 1605 + }, + { + "epoch": 0.26, + "learning_rate": 4.994133202645651e-05, + "loss": 1.5863, + "step": 1606 + }, + { + "epoch": 0.26, + "learning_rate": 4.994115525203566e-05, + "loss": 1.6115, + "step": 1607 + }, + { + "epoch": 0.26, + "learning_rate": 4.994097821200665e-05, + "loss": 1.6167, + "step": 1608 + }, + { + "epoch": 0.26, + "learning_rate": 4.994080090637137e-05, + "loss": 1.624, + "step": 1609 + }, + { + "epoch": 0.26, + "learning_rate": 4.9940623335131686e-05, + "loss": 1.6694, + "step": 1610 + }, + { + "epoch": 0.26, + "learning_rate": 4.994044549828951e-05, + "loss": 1.551, + "step": 1611 + }, + { + "epoch": 0.26, + "learning_rate": 4.994026739584674e-05, + "loss": 1.5576, + "step": 1612 + }, + { + "epoch": 0.26, + "learning_rate": 4.994008902780526e-05, + "loss": 1.627, + "step": 1613 + }, + { + "epoch": 0.26, + "learning_rate": 4.993991039416697e-05, + "loss": 1.5467, + "step": 1614 + }, + { + "epoch": 0.26, + "learning_rate": 4.9939731494933776e-05, + "loss": 1.5397, + "step": 1615 + }, + { + "epoch": 0.26, + "learning_rate": 4.9939552330107585e-05, + "loss": 1.5972, + "step": 1616 + }, + { + "epoch": 0.26, + "learning_rate": 4.9939372899690304e-05, + "loss": 1.5021, + "step": 1617 + }, + { + "epoch": 0.26, + "learning_rate": 4.993919320368384e-05, + "loss": 1.5827, + "step": 1618 + }, + { + "epoch": 0.26, + "learning_rate": 4.9939013242090114e-05, + "loss": 1.5978, + "step": 1619 + }, + { + "epoch": 0.26, + "learning_rate": 4.993883301491103e-05, + "loss": 1.6192, + "step": 1620 + }, + { + "epoch": 0.26, + "learning_rate": 4.993865252214852e-05, + "loss": 1.5773, + "step": 1621 + }, + { + "epoch": 0.26, + "learning_rate": 4.99384717638045e-05, + "loss": 1.5751, + "step": 1622 + }, + { + "epoch": 0.26, + "learning_rate": 4.99382907398809e-05, + "loss": 1.592, + "step": 1623 + }, + { + "epoch": 0.26, + "learning_rate": 4.993810945037964e-05, + "loss": 1.6641, + "step": 1624 + }, + { + "epoch": 0.26, + "learning_rate": 4.993792789530266e-05, + "loss": 1.5728, + "step": 1625 + }, + { + "epoch": 0.26, + "learning_rate": 4.993774607465188e-05, + "loss": 1.5928, + "step": 1626 + }, + { + "epoch": 0.26, + "learning_rate": 4.993756398842925e-05, + "loss": 1.5747, + "step": 1627 + }, + { + "epoch": 0.26, + "learning_rate": 4.993738163663671e-05, + "loss": 1.6022, + "step": 1628 + }, + { + "epoch": 0.26, + "learning_rate": 4.9937199019276184e-05, + "loss": 1.5327, + "step": 1629 + }, + { + "epoch": 0.26, + "learning_rate": 4.993701613634963e-05, + "loss": 1.5142, + "step": 1630 + }, + { + "epoch": 0.26, + "learning_rate": 4.993683298785899e-05, + "loss": 1.5659, + "step": 1631 + }, + { + "epoch": 0.26, + "learning_rate": 4.993664957380623e-05, + "loss": 1.5375, + "step": 1632 + }, + { + "epoch": 0.26, + "learning_rate": 4.9936465894193285e-05, + "loss": 1.5902, + "step": 1633 + }, + { + "epoch": 0.26, + "learning_rate": 4.993628194902211e-05, + "loss": 1.5757, + "step": 1634 + }, + { + "epoch": 0.26, + "learning_rate": 4.993609773829468e-05, + "loss": 1.5472, + "step": 1635 + }, + { + "epoch": 0.26, + "learning_rate": 4.9935913262012945e-05, + "loss": 1.554, + "step": 1636 + }, + { + "epoch": 0.26, + "learning_rate": 4.9935728520178884e-05, + "loss": 1.5, + "step": 1637 + }, + { + "epoch": 0.26, + "learning_rate": 4.993554351279444e-05, + "loss": 1.5165, + "step": 1638 + }, + { + "epoch": 0.26, + "learning_rate": 4.99353582398616e-05, + "loss": 1.5723, + "step": 1639 + }, + { + "epoch": 0.26, + "learning_rate": 4.993517270138233e-05, + "loss": 1.6063, + "step": 1640 + }, + { + "epoch": 0.26, + "learning_rate": 4.9934986897358614e-05, + "loss": 1.5801, + "step": 1641 + }, + { + "epoch": 0.26, + "learning_rate": 4.9934800827792426e-05, + "loss": 1.6071, + "step": 1642 + }, + { + "epoch": 0.26, + "learning_rate": 4.993461449268575e-05, + "loss": 1.5933, + "step": 1643 + }, + { + "epoch": 0.26, + "learning_rate": 4.993442789204056e-05, + "loss": 1.5858, + "step": 1644 + }, + { + "epoch": 0.26, + "learning_rate": 4.993424102585885e-05, + "loss": 1.548, + "step": 1645 + }, + { + "epoch": 0.26, + "learning_rate": 4.993405389414262e-05, + "loss": 1.5757, + "step": 1646 + }, + { + "epoch": 0.26, + "learning_rate": 4.993386649689385e-05, + "loss": 1.6398, + "step": 1647 + }, + { + "epoch": 0.26, + "learning_rate": 4.993367883411454e-05, + "loss": 1.5163, + "step": 1648 + }, + { + "epoch": 0.26, + "learning_rate": 4.993349090580669e-05, + "loss": 1.4922, + "step": 1649 + }, + { + "epoch": 0.26, + "learning_rate": 4.9933302711972294e-05, + "loss": 1.5768, + "step": 1650 + }, + { + "epoch": 0.26, + "learning_rate": 4.993311425261337e-05, + "loss": 1.6556, + "step": 1651 + }, + { + "epoch": 0.26, + "learning_rate": 4.9932925527731903e-05, + "loss": 1.5638, + "step": 1652 + }, + { + "epoch": 0.26, + "learning_rate": 4.993273653732993e-05, + "loss": 1.5374, + "step": 1653 + }, + { + "epoch": 0.26, + "learning_rate": 4.9932547281409436e-05, + "loss": 1.5662, + "step": 1654 + }, + { + "epoch": 0.26, + "learning_rate": 4.993235775997246e-05, + "loss": 1.5516, + "step": 1655 + }, + { + "epoch": 0.26, + "learning_rate": 4.993216797302101e-05, + "loss": 1.6056, + "step": 1656 + }, + { + "epoch": 0.26, + "learning_rate": 4.993197792055711e-05, + "loss": 1.5755, + "step": 1657 + }, + { + "epoch": 0.26, + "learning_rate": 4.993178760258278e-05, + "loss": 1.672, + "step": 1658 + }, + { + "epoch": 0.26, + "learning_rate": 4.993159701910004e-05, + "loss": 1.5877, + "step": 1659 + }, + { + "epoch": 0.26, + "learning_rate": 4.993140617011094e-05, + "loss": 1.6058, + "step": 1660 + }, + { + "epoch": 0.26, + "learning_rate": 4.99312150556175e-05, + "loss": 1.5786, + "step": 1661 + }, + { + "epoch": 0.26, + "learning_rate": 4.9931023675621746e-05, + "loss": 1.5194, + "step": 1662 + }, + { + "epoch": 0.26, + "learning_rate": 4.993083203012573e-05, + "loss": 1.5602, + "step": 1663 + }, + { + "epoch": 0.27, + "learning_rate": 4.9930640119131486e-05, + "loss": 1.5448, + "step": 1664 + }, + { + "epoch": 0.27, + "learning_rate": 4.993044794264107e-05, + "loss": 1.6776, + "step": 1665 + }, + { + "epoch": 0.27, + "learning_rate": 4.993025550065651e-05, + "loss": 1.5241, + "step": 1666 + }, + { + "epoch": 0.27, + "learning_rate": 4.993006279317987e-05, + "loss": 1.6127, + "step": 1667 + }, + { + "epoch": 0.27, + "learning_rate": 4.9929869820213194e-05, + "loss": 1.6335, + "step": 1668 + }, + { + "epoch": 0.27, + "learning_rate": 4.992967658175854e-05, + "loss": 1.5563, + "step": 1669 + }, + { + "epoch": 0.27, + "learning_rate": 4.9929483077817964e-05, + "loss": 1.5567, + "step": 1670 + }, + { + "epoch": 0.27, + "learning_rate": 4.9929289308393535e-05, + "loss": 1.5104, + "step": 1671 + }, + { + "epoch": 0.27, + "learning_rate": 4.9929095273487303e-05, + "loss": 1.5553, + "step": 1672 + }, + { + "epoch": 0.27, + "learning_rate": 4.992890097310134e-05, + "loss": 1.57, + "step": 1673 + }, + { + "epoch": 0.27, + "learning_rate": 4.9928706407237725e-05, + "loss": 1.5715, + "step": 1674 + }, + { + "epoch": 0.27, + "learning_rate": 4.992851157589851e-05, + "loss": 1.55, + "step": 1675 + }, + { + "epoch": 0.27, + "learning_rate": 4.992831647908579e-05, + "loss": 1.4816, + "step": 1676 + }, + { + "epoch": 0.27, + "learning_rate": 4.9928121116801627e-05, + "loss": 1.641, + "step": 1677 + }, + { + "epoch": 0.27, + "learning_rate": 4.992792548904811e-05, + "loss": 1.6208, + "step": 1678 + }, + { + "epoch": 0.27, + "learning_rate": 4.992772959582733e-05, + "loss": 1.5712, + "step": 1679 + }, + { + "epoch": 0.27, + "learning_rate": 4.992753343714135e-05, + "loss": 1.5972, + "step": 1680 + }, + { + "epoch": 0.27, + "learning_rate": 4.992733701299228e-05, + "loss": 1.5222, + "step": 1681 + }, + { + "epoch": 0.27, + "learning_rate": 4.9927140323382205e-05, + "loss": 1.5775, + "step": 1682 + }, + { + "epoch": 0.27, + "learning_rate": 4.992694336831322e-05, + "loss": 1.5518, + "step": 1683 + }, + { + "epoch": 0.27, + "learning_rate": 4.992674614778742e-05, + "loss": 1.518, + "step": 1684 + }, + { + "epoch": 0.27, + "learning_rate": 4.99265486618069e-05, + "loss": 1.4474, + "step": 1685 + }, + { + "epoch": 0.27, + "learning_rate": 4.992635091037378e-05, + "loss": 1.5236, + "step": 1686 + }, + { + "epoch": 0.27, + "learning_rate": 4.9926152893490155e-05, + "loss": 1.6394, + "step": 1687 + }, + { + "epoch": 0.27, + "learning_rate": 4.9925954611158134e-05, + "loss": 1.5407, + "step": 1688 + }, + { + "epoch": 0.27, + "learning_rate": 4.9925756063379825e-05, + "loss": 1.6057, + "step": 1689 + }, + { + "epoch": 0.27, + "learning_rate": 4.992555725015735e-05, + "loss": 1.5933, + "step": 1690 + }, + { + "epoch": 0.27, + "learning_rate": 4.992535817149282e-05, + "loss": 1.5014, + "step": 1691 + }, + { + "epoch": 0.27, + "learning_rate": 4.992515882738836e-05, + "loss": 1.5658, + "step": 1692 + }, + { + "epoch": 0.27, + "learning_rate": 4.992495921784609e-05, + "loss": 1.598, + "step": 1693 + }, + { + "epoch": 0.27, + "learning_rate": 4.9924759342868144e-05, + "loss": 1.6056, + "step": 1694 + }, + { + "epoch": 0.27, + "learning_rate": 4.992455920245663e-05, + "loss": 1.5578, + "step": 1695 + }, + { + "epoch": 0.27, + "learning_rate": 4.99243587966137e-05, + "loss": 1.6493, + "step": 1696 + }, + { + "epoch": 0.27, + "learning_rate": 4.9924158125341487e-05, + "loss": 1.6237, + "step": 1697 + }, + { + "epoch": 0.27, + "learning_rate": 4.992395718864211e-05, + "loss": 1.6115, + "step": 1698 + }, + { + "epoch": 0.27, + "learning_rate": 4.992375598651773e-05, + "loss": 1.6192, + "step": 1699 + }, + { + "epoch": 0.27, + "learning_rate": 4.992355451897047e-05, + "loss": 1.5902, + "step": 1700 + }, + { + "epoch": 0.27, + "learning_rate": 4.9923352786002495e-05, + "loss": 1.5126, + "step": 1701 + }, + { + "epoch": 0.27, + "learning_rate": 4.9923150787615944e-05, + "loss": 1.6219, + "step": 1702 + }, + { + "epoch": 0.27, + "learning_rate": 4.992294852381296e-05, + "loss": 1.6127, + "step": 1703 + }, + { + "epoch": 0.27, + "learning_rate": 4.9922745994595714e-05, + "loss": 1.4968, + "step": 1704 + }, + { + "epoch": 0.27, + "learning_rate": 4.992254319996635e-05, + "loss": 1.6156, + "step": 1705 + }, + { + "epoch": 0.27, + "learning_rate": 4.9922340139927035e-05, + "loss": 1.4843, + "step": 1706 + }, + { + "epoch": 0.27, + "learning_rate": 4.9922136814479926e-05, + "loss": 1.5695, + "step": 1707 + }, + { + "epoch": 0.27, + "learning_rate": 4.992193322362719e-05, + "loss": 1.5525, + "step": 1708 + }, + { + "epoch": 0.27, + "learning_rate": 4.9921729367370996e-05, + "loss": 1.6223, + "step": 1709 + }, + { + "epoch": 0.27, + "learning_rate": 4.992152524571352e-05, + "loss": 1.5591, + "step": 1710 + }, + { + "epoch": 0.27, + "learning_rate": 4.9921320858656917e-05, + "loss": 1.5153, + "step": 1711 + }, + { + "epoch": 0.27, + "learning_rate": 4.992111620620339e-05, + "loss": 1.5442, + "step": 1712 + }, + { + "epoch": 0.27, + "learning_rate": 4.9920911288355096e-05, + "loss": 1.6123, + "step": 1713 + }, + { + "epoch": 0.27, + "learning_rate": 4.9920706105114235e-05, + "loss": 1.5414, + "step": 1714 + }, + { + "epoch": 0.27, + "learning_rate": 4.9920500656482976e-05, + "loss": 1.5105, + "step": 1715 + }, + { + "epoch": 0.27, + "learning_rate": 4.992029494246352e-05, + "loss": 1.6019, + "step": 1716 + }, + { + "epoch": 0.27, + "learning_rate": 4.9920088963058054e-05, + "loss": 1.632, + "step": 1717 + }, + { + "epoch": 0.27, + "learning_rate": 4.991988271826876e-05, + "loss": 1.6229, + "step": 1718 + }, + { + "epoch": 0.27, + "learning_rate": 4.991967620809785e-05, + "loss": 1.6511, + "step": 1719 + }, + { + "epoch": 0.27, + "learning_rate": 4.991946943254752e-05, + "loss": 1.5524, + "step": 1720 + }, + { + "epoch": 0.27, + "learning_rate": 4.991926239161997e-05, + "loss": 1.6662, + "step": 1721 + }, + { + "epoch": 0.27, + "learning_rate": 4.99190550853174e-05, + "loss": 1.5812, + "step": 1722 + }, + { + "epoch": 0.27, + "learning_rate": 4.991884751364203e-05, + "loss": 1.6107, + "step": 1723 + }, + { + "epoch": 0.27, + "learning_rate": 4.9918639676596054e-05, + "loss": 1.5487, + "step": 1724 + }, + { + "epoch": 0.27, + "learning_rate": 4.99184315741817e-05, + "loss": 1.5926, + "step": 1725 + }, + { + "epoch": 0.27, + "learning_rate": 4.991822320640117e-05, + "loss": 1.6449, + "step": 1726 + }, + { + "epoch": 0.28, + "learning_rate": 4.9918014573256697e-05, + "loss": 1.4919, + "step": 1727 + }, + { + "epoch": 0.28, + "learning_rate": 4.99178056747505e-05, + "loss": 1.544, + "step": 1728 + }, + { + "epoch": 0.28, + "learning_rate": 4.991759651088479e-05, + "loss": 1.5918, + "step": 1729 + }, + { + "epoch": 0.28, + "learning_rate": 4.991738708166182e-05, + "loss": 1.5973, + "step": 1730 + }, + { + "epoch": 0.28, + "learning_rate": 4.991717738708379e-05, + "loss": 1.5972, + "step": 1731 + }, + { + "epoch": 0.28, + "learning_rate": 4.9916967427152955e-05, + "loss": 1.6276, + "step": 1732 + }, + { + "epoch": 0.28, + "learning_rate": 4.9916757201871545e-05, + "loss": 1.6011, + "step": 1733 + }, + { + "epoch": 0.28, + "learning_rate": 4.99165467112418e-05, + "loss": 1.543, + "step": 1734 + }, + { + "epoch": 0.28, + "learning_rate": 4.991633595526596e-05, + "loss": 1.5599, + "step": 1735 + }, + { + "epoch": 0.28, + "learning_rate": 4.991612493394626e-05, + "loss": 1.5785, + "step": 1736 + }, + { + "epoch": 0.28, + "learning_rate": 4.991591364728496e-05, + "loss": 1.5943, + "step": 1737 + }, + { + "epoch": 0.28, + "learning_rate": 4.9915702095284314e-05, + "loss": 1.6055, + "step": 1738 + }, + { + "epoch": 0.28, + "learning_rate": 4.991549027794656e-05, + "loss": 1.603, + "step": 1739 + }, + { + "epoch": 0.28, + "learning_rate": 4.9915278195273965e-05, + "loss": 1.5718, + "step": 1740 + }, + { + "epoch": 0.28, + "learning_rate": 4.9915065847268785e-05, + "loss": 1.5487, + "step": 1741 + }, + { + "epoch": 0.28, + "learning_rate": 4.991485323393328e-05, + "loss": 1.5947, + "step": 1742 + }, + { + "epoch": 0.28, + "learning_rate": 4.99146403552697e-05, + "loss": 1.5383, + "step": 1743 + }, + { + "epoch": 0.28, + "learning_rate": 4.9914427211280346e-05, + "loss": 1.5804, + "step": 1744 + }, + { + "epoch": 0.28, + "learning_rate": 4.991421380196746e-05, + "loss": 1.5571, + "step": 1745 + }, + { + "epoch": 0.28, + "learning_rate": 4.991400012733332e-05, + "loss": 1.5741, + "step": 1746 + }, + { + "epoch": 0.28, + "learning_rate": 4.9913786187380207e-05, + "loss": 1.5593, + "step": 1747 + }, + { + "epoch": 0.28, + "learning_rate": 4.99135719821104e-05, + "loss": 1.6089, + "step": 1748 + }, + { + "epoch": 0.28, + "learning_rate": 4.991335751152617e-05, + "loss": 1.5537, + "step": 1749 + }, + { + "epoch": 0.28, + "learning_rate": 4.9913142775629814e-05, + "loss": 1.6154, + "step": 1750 + }, + { + "epoch": 0.28, + "learning_rate": 4.991292777442361e-05, + "loss": 1.6174, + "step": 1751 + }, + { + "epoch": 0.28, + "learning_rate": 4.9912712507909856e-05, + "loss": 1.5881, + "step": 1752 + }, + { + "epoch": 0.28, + "learning_rate": 4.991249697609083e-05, + "loss": 1.5273, + "step": 1753 + }, + { + "epoch": 0.28, + "learning_rate": 4.9912281178968834e-05, + "loss": 1.5973, + "step": 1754 + }, + { + "epoch": 0.28, + "learning_rate": 4.991206511654617e-05, + "loss": 1.5697, + "step": 1755 + }, + { + "epoch": 0.28, + "learning_rate": 4.9911848788825145e-05, + "loss": 1.5783, + "step": 1756 + }, + { + "epoch": 0.28, + "learning_rate": 4.991163219580804e-05, + "loss": 1.5127, + "step": 1757 + }, + { + "epoch": 0.28, + "learning_rate": 4.9911415337497194e-05, + "loss": 1.5417, + "step": 1758 + }, + { + "epoch": 0.28, + "learning_rate": 4.991119821389489e-05, + "loss": 1.5549, + "step": 1759 + }, + { + "epoch": 0.28, + "learning_rate": 4.991098082500345e-05, + "loss": 1.5518, + "step": 1760 + }, + { + "epoch": 0.28, + "learning_rate": 4.991076317082519e-05, + "loss": 1.53, + "step": 1761 + }, + { + "epoch": 0.28, + "learning_rate": 4.991054525136242e-05, + "loss": 1.6367, + "step": 1762 + }, + { + "epoch": 0.28, + "learning_rate": 4.991032706661747e-05, + "loss": 1.604, + "step": 1763 + }, + { + "epoch": 0.28, + "learning_rate": 4.9910108616592664e-05, + "loss": 1.569, + "step": 1764 + }, + { + "epoch": 0.28, + "learning_rate": 4.990988990129032e-05, + "loss": 1.5744, + "step": 1765 + }, + { + "epoch": 0.28, + "learning_rate": 4.990967092071277e-05, + "loss": 1.5526, + "step": 1766 + }, + { + "epoch": 0.28, + "learning_rate": 4.9909451674862354e-05, + "loss": 1.5759, + "step": 1767 + }, + { + "epoch": 0.28, + "learning_rate": 4.990923216374139e-05, + "loss": 1.555, + "step": 1768 + }, + { + "epoch": 0.28, + "learning_rate": 4.990901238735224e-05, + "loss": 1.6006, + "step": 1769 + }, + { + "epoch": 0.28, + "learning_rate": 4.990879234569722e-05, + "loss": 1.6084, + "step": 1770 + }, + { + "epoch": 0.28, + "learning_rate": 4.990857203877869e-05, + "loss": 1.5577, + "step": 1771 + }, + { + "epoch": 0.28, + "learning_rate": 4.990835146659899e-05, + "loss": 1.5775, + "step": 1772 + }, + { + "epoch": 0.28, + "learning_rate": 4.990813062916047e-05, + "loss": 1.5441, + "step": 1773 + }, + { + "epoch": 0.28, + "learning_rate": 4.990790952646547e-05, + "loss": 1.5368, + "step": 1774 + }, + { + "epoch": 0.28, + "learning_rate": 4.9907688158516365e-05, + "loss": 1.5506, + "step": 1775 + }, + { + "epoch": 0.28, + "learning_rate": 4.9907466525315505e-05, + "loss": 1.5449, + "step": 1776 + }, + { + "epoch": 0.28, + "learning_rate": 4.990724462686524e-05, + "loss": 1.5946, + "step": 1777 + }, + { + "epoch": 0.28, + "learning_rate": 4.9907022463167945e-05, + "loss": 1.5671, + "step": 1778 + }, + { + "epoch": 0.28, + "learning_rate": 4.990680003422599e-05, + "loss": 1.6369, + "step": 1779 + }, + { + "epoch": 0.28, + "learning_rate": 4.990657734004173e-05, + "loss": 1.6216, + "step": 1780 + }, + { + "epoch": 0.28, + "learning_rate": 4.9906354380617536e-05, + "loss": 1.5824, + "step": 1781 + }, + { + "epoch": 0.28, + "learning_rate": 4.990613115595579e-05, + "loss": 1.5501, + "step": 1782 + }, + { + "epoch": 0.28, + "learning_rate": 4.9905907666058875e-05, + "loss": 1.5763, + "step": 1783 + }, + { + "epoch": 0.28, + "learning_rate": 4.990568391092916e-05, + "loss": 1.6227, + "step": 1784 + }, + { + "epoch": 0.28, + "learning_rate": 4.990545989056904e-05, + "loss": 1.5576, + "step": 1785 + }, + { + "epoch": 0.28, + "learning_rate": 4.990523560498088e-05, + "loss": 1.6115, + "step": 1786 + }, + { + "epoch": 0.28, + "learning_rate": 4.990501105416709e-05, + "loss": 1.6053, + "step": 1787 + }, + { + "epoch": 0.28, + "learning_rate": 4.990478623813005e-05, + "loss": 1.6048, + "step": 1788 + }, + { + "epoch": 0.29, + "learning_rate": 4.990456115687215e-05, + "loss": 1.6046, + "step": 1789 + }, + { + "epoch": 0.29, + "learning_rate": 4.990433581039581e-05, + "loss": 1.6226, + "step": 1790 + }, + { + "epoch": 0.29, + "learning_rate": 4.99041101987034e-05, + "loss": 1.5574, + "step": 1791 + }, + { + "epoch": 0.29, + "learning_rate": 4.9903884321797335e-05, + "loss": 1.5949, + "step": 1792 + }, + { + "epoch": 0.29, + "learning_rate": 4.990365817968003e-05, + "loss": 1.5922, + "step": 1793 + }, + { + "epoch": 0.29, + "learning_rate": 4.990343177235388e-05, + "loss": 1.4946, + "step": 1794 + }, + { + "epoch": 0.29, + "learning_rate": 4.99032050998213e-05, + "loss": 1.6104, + "step": 1795 + }, + { + "epoch": 0.29, + "learning_rate": 4.990297816208471e-05, + "loss": 1.5938, + "step": 1796 + }, + { + "epoch": 0.29, + "learning_rate": 4.9902750959146515e-05, + "loss": 1.5456, + "step": 1797 + }, + { + "epoch": 0.29, + "learning_rate": 4.9902523491009145e-05, + "loss": 1.5705, + "step": 1798 + }, + { + "epoch": 0.29, + "learning_rate": 4.9902295757675015e-05, + "loss": 1.5584, + "step": 1799 + }, + { + "epoch": 0.29, + "learning_rate": 4.990206775914655e-05, + "loss": 1.6138, + "step": 1800 + }, + { + "epoch": 0.29, + "learning_rate": 4.9901839495426193e-05, + "loss": 1.5169, + "step": 1801 + }, + { + "epoch": 0.29, + "learning_rate": 4.990161096651636e-05, + "loss": 1.5718, + "step": 1802 + }, + { + "epoch": 0.29, + "learning_rate": 4.9901382172419486e-05, + "loss": 1.5379, + "step": 1803 + }, + { + "epoch": 0.29, + "learning_rate": 4.990115311313801e-05, + "loss": 1.5985, + "step": 1804 + }, + { + "epoch": 0.29, + "learning_rate": 4.9900923788674374e-05, + "loss": 1.5783, + "step": 1805 + }, + { + "epoch": 0.29, + "learning_rate": 4.9900694199031016e-05, + "loss": 1.5487, + "step": 1806 + }, + { + "epoch": 0.29, + "learning_rate": 4.990046434421039e-05, + "loss": 1.5858, + "step": 1807 + }, + { + "epoch": 0.29, + "learning_rate": 4.990023422421492e-05, + "loss": 1.55, + "step": 1808 + }, + { + "epoch": 0.29, + "learning_rate": 4.990000383904708e-05, + "loss": 1.5693, + "step": 1809 + }, + { + "epoch": 0.29, + "learning_rate": 4.989977318870932e-05, + "loss": 1.527, + "step": 1810 + }, + { + "epoch": 0.29, + "learning_rate": 4.989954227320409e-05, + "loss": 1.6597, + "step": 1811 + }, + { + "epoch": 0.29, + "learning_rate": 4.989931109253385e-05, + "loss": 1.6115, + "step": 1812 + }, + { + "epoch": 0.29, + "learning_rate": 4.9899079646701064e-05, + "loss": 1.6447, + "step": 1813 + }, + { + "epoch": 0.29, + "learning_rate": 4.9898847935708196e-05, + "loss": 1.5669, + "step": 1814 + }, + { + "epoch": 0.29, + "learning_rate": 4.9898615959557715e-05, + "loss": 1.5005, + "step": 1815 + }, + { + "epoch": 0.29, + "learning_rate": 4.989838371825209e-05, + "loss": 1.515, + "step": 1816 + }, + { + "epoch": 0.29, + "learning_rate": 4.98981512117938e-05, + "loss": 1.5581, + "step": 1817 + }, + { + "epoch": 0.29, + "learning_rate": 4.989791844018531e-05, + "loss": 1.534, + "step": 1818 + }, + { + "epoch": 0.29, + "learning_rate": 4.98976854034291e-05, + "loss": 1.5842, + "step": 1819 + }, + { + "epoch": 0.29, + "learning_rate": 4.989745210152766e-05, + "loss": 1.6236, + "step": 1820 + }, + { + "epoch": 0.29, + "learning_rate": 4.989721853448348e-05, + "loss": 1.5282, + "step": 1821 + }, + { + "epoch": 0.29, + "learning_rate": 4.989698470229902e-05, + "loss": 1.5674, + "step": 1822 + }, + { + "epoch": 0.29, + "learning_rate": 4.9896750604976806e-05, + "loss": 1.572, + "step": 1823 + }, + { + "epoch": 0.29, + "learning_rate": 4.9896516242519305e-05, + "loss": 1.5687, + "step": 1824 + }, + { + "epoch": 0.29, + "learning_rate": 4.989628161492902e-05, + "loss": 1.5666, + "step": 1825 + }, + { + "epoch": 0.29, + "learning_rate": 4.989604672220846e-05, + "loss": 1.5155, + "step": 1826 + }, + { + "epoch": 0.29, + "learning_rate": 4.9895811564360103e-05, + "loss": 1.5591, + "step": 1827 + }, + { + "epoch": 0.29, + "learning_rate": 4.989557614138647e-05, + "loss": 1.5881, + "step": 1828 + }, + { + "epoch": 0.29, + "learning_rate": 4.9895340453290074e-05, + "loss": 1.6097, + "step": 1829 + }, + { + "epoch": 0.29, + "learning_rate": 4.9895104500073406e-05, + "loss": 1.5175, + "step": 1830 + }, + { + "epoch": 0.29, + "learning_rate": 4.9894868281738996e-05, + "loss": 1.5688, + "step": 1831 + }, + { + "epoch": 0.29, + "learning_rate": 4.989463179828936e-05, + "loss": 1.6016, + "step": 1832 + }, + { + "epoch": 0.29, + "learning_rate": 4.9894395049726995e-05, + "loss": 1.5806, + "step": 1833 + }, + { + "epoch": 0.29, + "learning_rate": 4.9894158036054445e-05, + "loss": 1.5302, + "step": 1834 + }, + { + "epoch": 0.29, + "learning_rate": 4.989392075727423e-05, + "loss": 1.5932, + "step": 1835 + }, + { + "epoch": 0.29, + "learning_rate": 4.9893683213388856e-05, + "loss": 1.6397, + "step": 1836 + }, + { + "epoch": 0.29, + "learning_rate": 4.989344540440089e-05, + "loss": 1.5038, + "step": 1837 + }, + { + "epoch": 0.29, + "learning_rate": 4.989320733031282e-05, + "loss": 1.5173, + "step": 1838 + }, + { + "epoch": 0.29, + "learning_rate": 4.989296899112722e-05, + "loss": 1.5576, + "step": 1839 + }, + { + "epoch": 0.29, + "learning_rate": 4.9892730386846614e-05, + "loss": 1.6143, + "step": 1840 + }, + { + "epoch": 0.29, + "learning_rate": 4.989249151747354e-05, + "loss": 1.5702, + "step": 1841 + }, + { + "epoch": 0.29, + "learning_rate": 4.9892252383010544e-05, + "loss": 1.571, + "step": 1842 + }, + { + "epoch": 0.29, + "learning_rate": 4.989201298346017e-05, + "loss": 1.5899, + "step": 1843 + }, + { + "epoch": 0.29, + "learning_rate": 4.989177331882497e-05, + "loss": 1.6332, + "step": 1844 + }, + { + "epoch": 0.29, + "learning_rate": 4.98915333891075e-05, + "loss": 1.5762, + "step": 1845 + }, + { + "epoch": 0.29, + "learning_rate": 4.989129319431031e-05, + "loss": 1.6087, + "step": 1846 + }, + { + "epoch": 0.29, + "learning_rate": 4.989105273443595e-05, + "loss": 1.5419, + "step": 1847 + }, + { + "epoch": 0.29, + "learning_rate": 4.9890812009487006e-05, + "loss": 1.5385, + "step": 1848 + }, + { + "epoch": 0.29, + "learning_rate": 4.989057101946602e-05, + "loss": 1.5068, + "step": 1849 + }, + { + "epoch": 0.29, + "learning_rate": 4.9890329764375554e-05, + "loss": 1.5015, + "step": 1850 + }, + { + "epoch": 0.29, + "learning_rate": 4.9890088244218205e-05, + "loss": 1.5503, + "step": 1851 + }, + { + "epoch": 0.3, + "learning_rate": 4.988984645899651e-05, + "loss": 1.5297, + "step": 1852 + }, + { + "epoch": 0.3, + "learning_rate": 4.988960440871308e-05, + "loss": 1.6086, + "step": 1853 + }, + { + "epoch": 0.3, + "learning_rate": 4.9889362093370454e-05, + "loss": 1.5262, + "step": 1854 + }, + { + "epoch": 0.3, + "learning_rate": 4.988911951297124e-05, + "loss": 1.5445, + "step": 1855 + }, + { + "epoch": 0.3, + "learning_rate": 4.988887666751801e-05, + "loss": 1.5855, + "step": 1856 + }, + { + "epoch": 0.3, + "learning_rate": 4.9888633557013365e-05, + "loss": 1.5462, + "step": 1857 + }, + { + "epoch": 0.3, + "learning_rate": 4.9888390181459876e-05, + "loss": 1.5219, + "step": 1858 + }, + { + "epoch": 0.3, + "learning_rate": 4.988814654086015e-05, + "loss": 1.5952, + "step": 1859 + }, + { + "epoch": 0.3, + "learning_rate": 4.988790263521676e-05, + "loss": 1.5205, + "step": 1860 + }, + { + "epoch": 0.3, + "learning_rate": 4.988765846453232e-05, + "loss": 1.5575, + "step": 1861 + }, + { + "epoch": 0.3, + "learning_rate": 4.988741402880943e-05, + "loss": 1.5284, + "step": 1862 + }, + { + "epoch": 0.3, + "learning_rate": 4.988716932805069e-05, + "loss": 1.5562, + "step": 1863 + }, + { + "epoch": 0.3, + "learning_rate": 4.988692436225871e-05, + "loss": 1.5355, + "step": 1864 + }, + { + "epoch": 0.3, + "learning_rate": 4.988667913143609e-05, + "loss": 1.6123, + "step": 1865 + }, + { + "epoch": 0.3, + "learning_rate": 4.9886433635585444e-05, + "loss": 1.6079, + "step": 1866 + }, + { + "epoch": 0.3, + "learning_rate": 4.98861878747094e-05, + "loss": 1.5506, + "step": 1867 + }, + { + "epoch": 0.3, + "learning_rate": 4.9885941848810546e-05, + "loss": 1.5863, + "step": 1868 + }, + { + "epoch": 0.3, + "learning_rate": 4.988569555789153e-05, + "loss": 1.5487, + "step": 1869 + }, + { + "epoch": 0.3, + "learning_rate": 4.988544900195496e-05, + "loss": 1.578, + "step": 1870 + }, + { + "epoch": 0.3, + "learning_rate": 4.9885202181003474e-05, + "loss": 1.5895, + "step": 1871 + }, + { + "epoch": 0.3, + "learning_rate": 4.988495509503969e-05, + "loss": 1.5093, + "step": 1872 + }, + { + "epoch": 0.3, + "learning_rate": 4.9884707744066246e-05, + "loss": 1.5679, + "step": 1873 + }, + { + "epoch": 0.3, + "learning_rate": 4.9884460128085765e-05, + "loss": 1.5054, + "step": 1874 + }, + { + "epoch": 0.3, + "learning_rate": 4.9884212247100895e-05, + "loss": 1.6426, + "step": 1875 + }, + { + "epoch": 0.3, + "learning_rate": 4.9883964101114274e-05, + "loss": 1.6153, + "step": 1876 + }, + { + "epoch": 0.3, + "learning_rate": 4.988371569012854e-05, + "loss": 1.6016, + "step": 1877 + }, + { + "epoch": 0.3, + "learning_rate": 4.988346701414634e-05, + "loss": 1.6128, + "step": 1878 + }, + { + "epoch": 0.3, + "learning_rate": 4.9883218073170324e-05, + "loss": 1.6249, + "step": 1879 + }, + { + "epoch": 0.3, + "learning_rate": 4.988296886720314e-05, + "loss": 1.5633, + "step": 1880 + }, + { + "epoch": 0.3, + "learning_rate": 4.9882719396247455e-05, + "loss": 1.584, + "step": 1881 + }, + { + "epoch": 0.3, + "learning_rate": 4.988246966030591e-05, + "loss": 1.5326, + "step": 1882 + }, + { + "epoch": 0.3, + "learning_rate": 4.988221965938117e-05, + "loss": 1.5697, + "step": 1883 + }, + { + "epoch": 0.3, + "learning_rate": 4.988196939347589e-05, + "loss": 1.5698, + "step": 1884 + }, + { + "epoch": 0.3, + "learning_rate": 4.988171886259275e-05, + "loss": 1.5213, + "step": 1885 + }, + { + "epoch": 0.3, + "learning_rate": 4.9881468066734405e-05, + "loss": 1.6296, + "step": 1886 + }, + { + "epoch": 0.3, + "learning_rate": 4.9881217005903534e-05, + "loss": 1.6003, + "step": 1887 + }, + { + "epoch": 0.3, + "learning_rate": 4.98809656801028e-05, + "loss": 1.6009, + "step": 1888 + }, + { + "epoch": 0.3, + "learning_rate": 4.98807140893349e-05, + "loss": 1.5629, + "step": 1889 + }, + { + "epoch": 0.3, + "learning_rate": 4.988046223360249e-05, + "loss": 1.5951, + "step": 1890 + }, + { + "epoch": 0.3, + "learning_rate": 4.988021011290826e-05, + "loss": 1.5202, + "step": 1891 + }, + { + "epoch": 0.3, + "learning_rate": 4.987995772725491e-05, + "loss": 1.4981, + "step": 1892 + }, + { + "epoch": 0.3, + "learning_rate": 4.9879705076645114e-05, + "loss": 1.523, + "step": 1893 + }, + { + "epoch": 0.3, + "learning_rate": 4.987945216108156e-05, + "loss": 1.5759, + "step": 1894 + }, + { + "epoch": 0.3, + "learning_rate": 4.987919898056694e-05, + "loss": 1.575, + "step": 1895 + }, + { + "epoch": 0.3, + "learning_rate": 4.987894553510396e-05, + "loss": 1.5239, + "step": 1896 + }, + { + "epoch": 0.3, + "learning_rate": 4.987869182469531e-05, + "loss": 1.5469, + "step": 1897 + }, + { + "epoch": 0.3, + "learning_rate": 4.987843784934371e-05, + "loss": 1.6668, + "step": 1898 + }, + { + "epoch": 0.3, + "learning_rate": 4.987818360905184e-05, + "loss": 1.464, + "step": 1899 + }, + { + "epoch": 0.3, + "learning_rate": 4.987792910382242e-05, + "loss": 1.5851, + "step": 1900 + }, + { + "epoch": 0.3, + "learning_rate": 4.987767433365815e-05, + "loss": 1.5928, + "step": 1901 + }, + { + "epoch": 0.3, + "learning_rate": 4.987741929856177e-05, + "loss": 1.5679, + "step": 1902 + }, + { + "epoch": 0.3, + "learning_rate": 4.987716399853597e-05, + "loss": 1.5692, + "step": 1903 + }, + { + "epoch": 0.3, + "learning_rate": 4.987690843358347e-05, + "loss": 1.5474, + "step": 1904 + }, + { + "epoch": 0.3, + "learning_rate": 4.9876652603707004e-05, + "loss": 1.5309, + "step": 1905 + }, + { + "epoch": 0.3, + "learning_rate": 4.987639650890929e-05, + "loss": 1.5635, + "step": 1906 + }, + { + "epoch": 0.3, + "learning_rate": 4.9876140149193054e-05, + "loss": 1.6348, + "step": 1907 + }, + { + "epoch": 0.3, + "learning_rate": 4.987588352456103e-05, + "loss": 1.5378, + "step": 1908 + }, + { + "epoch": 0.3, + "learning_rate": 4.987562663501595e-05, + "loss": 1.5119, + "step": 1909 + }, + { + "epoch": 0.3, + "learning_rate": 4.987536948056055e-05, + "loss": 1.5915, + "step": 1910 + }, + { + "epoch": 0.3, + "learning_rate": 4.987511206119757e-05, + "loss": 1.4871, + "step": 1911 + }, + { + "epoch": 0.3, + "learning_rate": 4.9874854376929736e-05, + "loss": 1.612, + "step": 1912 + }, + { + "epoch": 0.3, + "learning_rate": 4.987459642775981e-05, + "loss": 1.5581, + "step": 1913 + }, + { + "epoch": 0.3, + "learning_rate": 4.9874338213690544e-05, + "loss": 1.5773, + "step": 1914 + }, + { + "epoch": 0.31, + "learning_rate": 4.9874079734724666e-05, + "loss": 1.5728, + "step": 1915 + }, + { + "epoch": 0.31, + "learning_rate": 4.987382099086495e-05, + "loss": 1.5796, + "step": 1916 + }, + { + "epoch": 0.31, + "learning_rate": 4.987356198211413e-05, + "loss": 1.5565, + "step": 1917 + }, + { + "epoch": 0.31, + "learning_rate": 4.987330270847498e-05, + "loss": 1.5474, + "step": 1918 + }, + { + "epoch": 0.31, + "learning_rate": 4.987304316995026e-05, + "loss": 1.5721, + "step": 1919 + }, + { + "epoch": 0.31, + "learning_rate": 4.987278336654273e-05, + "loss": 1.5643, + "step": 1920 + }, + { + "epoch": 0.31, + "learning_rate": 4.987252329825516e-05, + "loss": 1.6115, + "step": 1921 + }, + { + "epoch": 0.31, + "learning_rate": 4.9872262965090315e-05, + "loss": 1.6183, + "step": 1922 + }, + { + "epoch": 0.31, + "learning_rate": 4.987200236705097e-05, + "loss": 1.5371, + "step": 1923 + }, + { + "epoch": 0.31, + "learning_rate": 4.98717415041399e-05, + "loss": 1.5765, + "step": 1924 + }, + { + "epoch": 0.31, + "learning_rate": 4.987148037635988e-05, + "loss": 1.5549, + "step": 1925 + }, + { + "epoch": 0.31, + "learning_rate": 4.9871218983713694e-05, + "loss": 1.4963, + "step": 1926 + }, + { + "epoch": 0.31, + "learning_rate": 4.987095732620414e-05, + "loss": 1.6348, + "step": 1927 + }, + { + "epoch": 0.31, + "learning_rate": 4.987069540383397e-05, + "loss": 1.5913, + "step": 1928 + }, + { + "epoch": 0.31, + "learning_rate": 4.9870433216606e-05, + "loss": 1.5423, + "step": 1929 + }, + { + "epoch": 0.31, + "learning_rate": 4.9870170764523014e-05, + "loss": 1.5137, + "step": 1930 + }, + { + "epoch": 0.31, + "learning_rate": 4.986990804758782e-05, + "loss": 1.5256, + "step": 1931 + }, + { + "epoch": 0.31, + "learning_rate": 4.9869645065803195e-05, + "loss": 1.5343, + "step": 1932 + }, + { + "epoch": 0.31, + "learning_rate": 4.986938181917195e-05, + "loss": 1.529, + "step": 1933 + }, + { + "epoch": 0.31, + "learning_rate": 4.9869118307696884e-05, + "loss": 1.5781, + "step": 1934 + }, + { + "epoch": 0.31, + "learning_rate": 4.986885453138081e-05, + "loss": 1.5503, + "step": 1935 + }, + { + "epoch": 0.31, + "learning_rate": 4.986859049022653e-05, + "loss": 1.6009, + "step": 1936 + }, + { + "epoch": 0.31, + "learning_rate": 4.9868326184236865e-05, + "loss": 1.5365, + "step": 1937 + }, + { + "epoch": 0.31, + "learning_rate": 4.986806161341462e-05, + "loss": 1.499, + "step": 1938 + }, + { + "epoch": 0.31, + "learning_rate": 4.986779677776262e-05, + "loss": 1.5145, + "step": 1939 + }, + { + "epoch": 0.31, + "learning_rate": 4.986753167728368e-05, + "loss": 1.5695, + "step": 1940 + }, + { + "epoch": 0.31, + "learning_rate": 4.986726631198062e-05, + "loss": 1.6001, + "step": 1941 + }, + { + "epoch": 0.31, + "learning_rate": 4.986700068185628e-05, + "loss": 1.5938, + "step": 1942 + }, + { + "epoch": 0.31, + "learning_rate": 4.986673478691347e-05, + "loss": 1.5853, + "step": 1943 + }, + { + "epoch": 0.31, + "learning_rate": 4.986646862715504e-05, + "loss": 1.5075, + "step": 1944 + }, + { + "epoch": 0.31, + "learning_rate": 4.986620220258381e-05, + "loss": 1.6068, + "step": 1945 + }, + { + "epoch": 0.31, + "learning_rate": 4.986593551320263e-05, + "loss": 1.529, + "step": 1946 + }, + { + "epoch": 0.31, + "learning_rate": 4.986566855901433e-05, + "loss": 1.549, + "step": 1947 + }, + { + "epoch": 0.31, + "learning_rate": 4.9865401340021754e-05, + "loss": 1.5568, + "step": 1948 + }, + { + "epoch": 0.31, + "learning_rate": 4.9865133856227756e-05, + "loss": 1.5586, + "step": 1949 + }, + { + "epoch": 0.31, + "learning_rate": 4.9864866107635165e-05, + "loss": 1.619, + "step": 1950 + }, + { + "epoch": 0.31, + "learning_rate": 4.9864598094246854e-05, + "loss": 1.6364, + "step": 1951 + }, + { + "epoch": 0.31, + "learning_rate": 4.986432981606567e-05, + "loss": 1.4709, + "step": 1952 + }, + { + "epoch": 0.31, + "learning_rate": 4.986406127309447e-05, + "loss": 1.5687, + "step": 1953 + }, + { + "epoch": 0.31, + "learning_rate": 4.986379246533611e-05, + "loss": 1.4914, + "step": 1954 + }, + { + "epoch": 0.31, + "learning_rate": 4.9863523392793454e-05, + "loss": 1.5519, + "step": 1955 + }, + { + "epoch": 0.31, + "learning_rate": 4.986325405546938e-05, + "loss": 1.6434, + "step": 1956 + }, + { + "epoch": 0.31, + "learning_rate": 4.986298445336674e-05, + "loss": 1.5343, + "step": 1957 + }, + { + "epoch": 0.31, + "learning_rate": 4.98627145864884e-05, + "loss": 1.568, + "step": 1958 + }, + { + "epoch": 0.31, + "learning_rate": 4.986244445483725e-05, + "loss": 1.618, + "step": 1959 + }, + { + "epoch": 0.31, + "learning_rate": 4.986217405841616e-05, + "loss": 1.5692, + "step": 1960 + }, + { + "epoch": 0.31, + "learning_rate": 4.986190339722801e-05, + "loss": 1.5703, + "step": 1961 + }, + { + "epoch": 0.31, + "learning_rate": 4.9861632471275686e-05, + "loss": 1.5563, + "step": 1962 + }, + { + "epoch": 0.31, + "learning_rate": 4.9861361280562076e-05, + "loss": 1.5529, + "step": 1963 + }, + { + "epoch": 0.31, + "learning_rate": 4.986108982509005e-05, + "loss": 1.5505, + "step": 1964 + }, + { + "epoch": 0.31, + "learning_rate": 4.9860818104862514e-05, + "loss": 1.6082, + "step": 1965 + }, + { + "epoch": 0.31, + "learning_rate": 4.986054611988237e-05, + "loss": 1.5741, + "step": 1966 + }, + { + "epoch": 0.31, + "learning_rate": 4.986027387015249e-05, + "loss": 1.5838, + "step": 1967 + }, + { + "epoch": 0.31, + "learning_rate": 4.986000135567579e-05, + "loss": 1.6063, + "step": 1968 + }, + { + "epoch": 0.31, + "learning_rate": 4.985972857645517e-05, + "loss": 1.6125, + "step": 1969 + }, + { + "epoch": 0.31, + "learning_rate": 4.985945553249353e-05, + "loss": 1.662, + "step": 1970 + }, + { + "epoch": 0.31, + "learning_rate": 4.9859182223793785e-05, + "loss": 1.5918, + "step": 1971 + }, + { + "epoch": 0.31, + "learning_rate": 4.9858908650358836e-05, + "loss": 1.5905, + "step": 1972 + }, + { + "epoch": 0.31, + "learning_rate": 4.9858634812191606e-05, + "loss": 1.5702, + "step": 1973 + }, + { + "epoch": 0.31, + "learning_rate": 4.985836070929501e-05, + "loss": 1.5547, + "step": 1974 + }, + { + "epoch": 0.31, + "learning_rate": 4.985808634167196e-05, + "loss": 1.5677, + "step": 1975 + }, + { + "epoch": 0.31, + "learning_rate": 4.9857811709325386e-05, + "loss": 1.5612, + "step": 1976 + }, + { + "epoch": 0.31, + "learning_rate": 4.9857536812258206e-05, + "loss": 1.653, + "step": 1977 + }, + { + "epoch": 0.32, + "learning_rate": 4.9857261650473356e-05, + "loss": 1.6093, + "step": 1978 + }, + { + "epoch": 0.32, + "learning_rate": 4.985698622397375e-05, + "loss": 1.5353, + "step": 1979 + }, + { + "epoch": 0.32, + "learning_rate": 4.985671053276234e-05, + "loss": 1.5199, + "step": 1980 + }, + { + "epoch": 0.32, + "learning_rate": 4.9856434576842047e-05, + "loss": 1.5084, + "step": 1981 + }, + { + "epoch": 0.32, + "learning_rate": 4.985615835621582e-05, + "loss": 1.6298, + "step": 1982 + }, + { + "epoch": 0.32, + "learning_rate": 4.98558818708866e-05, + "loss": 1.5344, + "step": 1983 + }, + { + "epoch": 0.32, + "learning_rate": 4.985560512085732e-05, + "loss": 1.5277, + "step": 1984 + }, + { + "epoch": 0.32, + "learning_rate": 4.985532810613095e-05, + "loss": 1.5651, + "step": 1985 + }, + { + "epoch": 0.32, + "learning_rate": 4.985505082671042e-05, + "loss": 1.4719, + "step": 1986 + }, + { + "epoch": 0.32, + "learning_rate": 4.9854773282598674e-05, + "loss": 1.6055, + "step": 1987 + }, + { + "epoch": 0.32, + "learning_rate": 4.98544954737987e-05, + "loss": 1.6445, + "step": 1988 + }, + { + "epoch": 0.32, + "learning_rate": 4.985421740031343e-05, + "loss": 1.5269, + "step": 1989 + }, + { + "epoch": 0.32, + "learning_rate": 4.985393906214584e-05, + "loss": 1.6496, + "step": 1990 + }, + { + "epoch": 0.32, + "learning_rate": 4.985366045929888e-05, + "loss": 1.5464, + "step": 1991 + }, + { + "epoch": 0.32, + "learning_rate": 4.985338159177553e-05, + "loss": 1.6411, + "step": 1992 + }, + { + "epoch": 0.32, + "learning_rate": 4.9853102459578757e-05, + "loss": 1.5482, + "step": 1993 + }, + { + "epoch": 0.32, + "learning_rate": 4.985282306271153e-05, + "loss": 1.5049, + "step": 1994 + }, + { + "epoch": 0.32, + "learning_rate": 4.985254340117683e-05, + "loss": 1.5627, + "step": 1995 + }, + { + "epoch": 0.32, + "learning_rate": 4.9852263474977624e-05, + "loss": 1.5557, + "step": 1996 + }, + { + "epoch": 0.32, + "learning_rate": 4.98519832841169e-05, + "loss": 1.606, + "step": 1997 + }, + { + "epoch": 0.32, + "learning_rate": 4.985170282859765e-05, + "loss": 1.5925, + "step": 1998 + }, + { + "epoch": 0.32, + "learning_rate": 4.985142210842285e-05, + "loss": 1.4961, + "step": 1999 + }, + { + "epoch": 0.32, + "learning_rate": 4.985114112359548e-05, + "loss": 1.5754, + "step": 2000 + }, + { + "epoch": 0.32, + "learning_rate": 4.985085987411856e-05, + "loss": 1.5619, + "step": 2001 + }, + { + "epoch": 0.32, + "learning_rate": 4.985057835999506e-05, + "loss": 1.4947, + "step": 2002 + }, + { + "epoch": 0.32, + "learning_rate": 4.985029658122799e-05, + "loss": 1.5248, + "step": 2003 + }, + { + "epoch": 0.32, + "learning_rate": 4.9850014537820355e-05, + "loss": 1.5788, + "step": 2004 + }, + { + "epoch": 0.32, + "learning_rate": 4.984973222977515e-05, + "loss": 1.5114, + "step": 2005 + }, + { + "epoch": 0.32, + "learning_rate": 4.9849449657095375e-05, + "loss": 1.5827, + "step": 2006 + }, + { + "epoch": 0.32, + "learning_rate": 4.984916681978406e-05, + "loss": 1.5926, + "step": 2007 + }, + { + "epoch": 0.32, + "learning_rate": 4.984888371784419e-05, + "loss": 1.5169, + "step": 2008 + }, + { + "epoch": 0.32, + "learning_rate": 4.984860035127881e-05, + "loss": 1.596, + "step": 2009 + }, + { + "epoch": 0.32, + "learning_rate": 4.984831672009091e-05, + "loss": 1.5233, + "step": 2010 + }, + { + "epoch": 0.32, + "learning_rate": 4.984803282428353e-05, + "loss": 1.5459, + "step": 2011 + }, + { + "epoch": 0.32, + "learning_rate": 4.984774866385968e-05, + "loss": 1.5893, + "step": 2012 + }, + { + "epoch": 0.32, + "learning_rate": 4.984746423882239e-05, + "loss": 1.5934, + "step": 2013 + }, + { + "epoch": 0.32, + "learning_rate": 4.98471795491747e-05, + "loss": 1.6183, + "step": 2014 + }, + { + "epoch": 0.32, + "learning_rate": 4.984689459491963e-05, + "loss": 1.6519, + "step": 2015 + }, + { + "epoch": 0.32, + "learning_rate": 4.9846609376060215e-05, + "loss": 1.603, + "step": 2016 + }, + { + "epoch": 0.32, + "learning_rate": 4.98463238925995e-05, + "loss": 1.5204, + "step": 2017 + }, + { + "epoch": 0.32, + "learning_rate": 4.9846038144540515e-05, + "loss": 1.6336, + "step": 2018 + }, + { + "epoch": 0.32, + "learning_rate": 4.984575213188631e-05, + "loss": 1.5589, + "step": 2019 + }, + { + "epoch": 0.32, + "learning_rate": 4.9845465854639927e-05, + "loss": 1.5417, + "step": 2020 + }, + { + "epoch": 0.32, + "learning_rate": 4.984517931280442e-05, + "loss": 1.5692, + "step": 2021 + }, + { + "epoch": 0.32, + "learning_rate": 4.984489250638283e-05, + "loss": 1.5102, + "step": 2022 + }, + { + "epoch": 0.32, + "learning_rate": 4.9844605435378236e-05, + "loss": 1.5028, + "step": 2023 + }, + { + "epoch": 0.32, + "learning_rate": 4.9844318099793663e-05, + "loss": 1.5577, + "step": 2024 + }, + { + "epoch": 0.32, + "learning_rate": 4.984403049963219e-05, + "loss": 1.5171, + "step": 2025 + }, + { + "epoch": 0.32, + "learning_rate": 4.984374263489687e-05, + "loss": 1.5714, + "step": 2026 + }, + { + "epoch": 0.32, + "learning_rate": 4.9843454505590773e-05, + "loss": 1.5368, + "step": 2027 + }, + { + "epoch": 0.32, + "learning_rate": 4.9843166111716976e-05, + "loss": 1.5285, + "step": 2028 + }, + { + "epoch": 0.32, + "learning_rate": 4.984287745327854e-05, + "loss": 1.5181, + "step": 2029 + }, + { + "epoch": 0.32, + "learning_rate": 4.984258853027854e-05, + "loss": 1.5585, + "step": 2030 + }, + { + "epoch": 0.32, + "learning_rate": 4.9842299342720053e-05, + "loss": 1.501, + "step": 2031 + }, + { + "epoch": 0.32, + "learning_rate": 4.9842009890606165e-05, + "loss": 1.5616, + "step": 2032 + }, + { + "epoch": 0.32, + "learning_rate": 4.984172017393995e-05, + "loss": 1.5461, + "step": 2033 + }, + { + "epoch": 0.32, + "learning_rate": 4.98414301927245e-05, + "loss": 1.6413, + "step": 2034 + }, + { + "epoch": 0.32, + "learning_rate": 4.98411399469629e-05, + "loss": 1.5896, + "step": 2035 + }, + { + "epoch": 0.32, + "learning_rate": 4.9840849436658244e-05, + "loss": 1.5915, + "step": 2036 + }, + { + "epoch": 0.32, + "learning_rate": 4.984055866181361e-05, + "loss": 1.515, + "step": 2037 + }, + { + "epoch": 0.32, + "learning_rate": 4.984026762243212e-05, + "loss": 1.5333, + "step": 2038 + }, + { + "epoch": 0.32, + "learning_rate": 4.983997631851686e-05, + "loss": 1.5279, + "step": 2039 + }, + { + "epoch": 0.32, + "learning_rate": 4.9839684750070924e-05, + "loss": 1.5562, + "step": 2040 + }, + { + "epoch": 0.33, + "learning_rate": 4.983939291709743e-05, + "loss": 1.5838, + "step": 2041 + }, + { + "epoch": 0.33, + "learning_rate": 4.983910081959948e-05, + "loss": 1.6348, + "step": 2042 + }, + { + "epoch": 0.33, + "learning_rate": 4.983880845758019e-05, + "loss": 1.5741, + "step": 2043 + }, + { + "epoch": 0.33, + "learning_rate": 4.9838515831042675e-05, + "loss": 1.5715, + "step": 2044 + }, + { + "epoch": 0.33, + "learning_rate": 4.983822293999004e-05, + "loss": 1.5374, + "step": 2045 + }, + { + "epoch": 0.33, + "learning_rate": 4.983792978442541e-05, + "loss": 1.5116, + "step": 2046 + }, + { + "epoch": 0.33, + "learning_rate": 4.983763636435191e-05, + "loss": 1.4818, + "step": 2047 + }, + { + "epoch": 0.33, + "learning_rate": 4.983734267977266e-05, + "loss": 1.5729, + "step": 2048 + }, + { + "epoch": 0.33, + "learning_rate": 4.9837048730690785e-05, + "loss": 1.6604, + "step": 2049 + }, + { + "epoch": 0.33, + "learning_rate": 4.983675451710943e-05, + "loss": 1.5007, + "step": 2050 + }, + { + "epoch": 0.33, + "learning_rate": 4.9836460039031705e-05, + "loss": 1.5523, + "step": 2051 + }, + { + "epoch": 0.33, + "learning_rate": 4.983616529646077e-05, + "loss": 1.5843, + "step": 2052 + }, + { + "epoch": 0.33, + "learning_rate": 4.9835870289399756e-05, + "loss": 1.5606, + "step": 2053 + }, + { + "epoch": 0.33, + "learning_rate": 4.9835575017851795e-05, + "loss": 1.5641, + "step": 2054 + }, + { + "epoch": 0.33, + "learning_rate": 4.983527948182004e-05, + "loss": 1.5327, + "step": 2055 + }, + { + "epoch": 0.33, + "learning_rate": 4.983498368130764e-05, + "loss": 1.5814, + "step": 2056 + }, + { + "epoch": 0.33, + "learning_rate": 4.9834687616317724e-05, + "loss": 1.5604, + "step": 2057 + }, + { + "epoch": 0.33, + "learning_rate": 4.9834391286853475e-05, + "loss": 1.4442, + "step": 2058 + }, + { + "epoch": 0.33, + "learning_rate": 4.983409469291804e-05, + "loss": 1.5977, + "step": 2059 + }, + { + "epoch": 0.33, + "learning_rate": 4.9833797834514575e-05, + "loss": 1.6483, + "step": 2060 + }, + { + "epoch": 0.33, + "learning_rate": 4.983350071164623e-05, + "loss": 1.6006, + "step": 2061 + }, + { + "epoch": 0.33, + "learning_rate": 4.983320332431618e-05, + "loss": 1.5645, + "step": 2062 + }, + { + "epoch": 0.33, + "learning_rate": 4.9832905672527594e-05, + "loss": 1.6629, + "step": 2063 + }, + { + "epoch": 0.33, + "learning_rate": 4.983260775628365e-05, + "loss": 1.5357, + "step": 2064 + }, + { + "epoch": 0.33, + "learning_rate": 4.9832309575587496e-05, + "loss": 1.5527, + "step": 2065 + }, + { + "epoch": 0.33, + "learning_rate": 4.9832011130442325e-05, + "loss": 1.5168, + "step": 2066 + }, + { + "epoch": 0.33, + "learning_rate": 4.983171242085131e-05, + "loss": 1.5781, + "step": 2067 + }, + { + "epoch": 0.33, + "learning_rate": 4.983141344681763e-05, + "loss": 1.5493, + "step": 2068 + }, + { + "epoch": 0.33, + "learning_rate": 4.983111420834448e-05, + "loss": 1.5955, + "step": 2069 + }, + { + "epoch": 0.33, + "learning_rate": 4.9830814705435035e-05, + "loss": 1.5487, + "step": 2070 + }, + { + "epoch": 0.33, + "learning_rate": 4.9830514938092494e-05, + "loss": 1.5851, + "step": 2071 + }, + { + "epoch": 0.33, + "learning_rate": 4.983021490632004e-05, + "loss": 1.5355, + "step": 2072 + }, + { + "epoch": 0.33, + "learning_rate": 4.982991461012087e-05, + "loss": 1.5625, + "step": 2073 + }, + { + "epoch": 0.33, + "learning_rate": 4.9829614049498186e-05, + "loss": 1.5986, + "step": 2074 + }, + { + "epoch": 0.33, + "learning_rate": 4.982931322445519e-05, + "loss": 1.5332, + "step": 2075 + }, + { + "epoch": 0.33, + "learning_rate": 4.9829012134995076e-05, + "loss": 1.5747, + "step": 2076 + }, + { + "epoch": 0.33, + "learning_rate": 4.982871078112106e-05, + "loss": 1.5615, + "step": 2077 + }, + { + "epoch": 0.33, + "learning_rate": 4.982840916283634e-05, + "loss": 1.6279, + "step": 2078 + }, + { + "epoch": 0.33, + "learning_rate": 4.982810728014415e-05, + "loss": 1.5825, + "step": 2079 + }, + { + "epoch": 0.33, + "learning_rate": 4.9827805133047684e-05, + "loss": 1.5529, + "step": 2080 + }, + { + "epoch": 0.33, + "learning_rate": 4.982750272155017e-05, + "loss": 1.5619, + "step": 2081 + }, + { + "epoch": 0.33, + "learning_rate": 4.982720004565482e-05, + "loss": 1.5475, + "step": 2082 + }, + { + "epoch": 0.33, + "learning_rate": 4.982689710536487e-05, + "loss": 1.6112, + "step": 2083 + }, + { + "epoch": 0.33, + "learning_rate": 4.982659390068353e-05, + "loss": 1.5047, + "step": 2084 + }, + { + "epoch": 0.33, + "learning_rate": 4.9826290431614034e-05, + "loss": 1.5379, + "step": 2085 + }, + { + "epoch": 0.33, + "learning_rate": 4.982598669815963e-05, + "loss": 1.6242, + "step": 2086 + }, + { + "epoch": 0.33, + "learning_rate": 4.982568270032353e-05, + "loss": 1.5514, + "step": 2087 + }, + { + "epoch": 0.33, + "learning_rate": 4.982537843810899e-05, + "loss": 1.5641, + "step": 2088 + }, + { + "epoch": 0.33, + "learning_rate": 4.982507391151924e-05, + "loss": 1.5833, + "step": 2089 + }, + { + "epoch": 0.33, + "learning_rate": 4.9824769120557515e-05, + "loss": 1.5998, + "step": 2090 + }, + { + "epoch": 0.33, + "learning_rate": 4.982446406522708e-05, + "loss": 1.544, + "step": 2091 + }, + { + "epoch": 0.33, + "learning_rate": 4.982415874553117e-05, + "loss": 1.5121, + "step": 2092 + }, + { + "epoch": 0.33, + "learning_rate": 4.982385316147304e-05, + "loss": 1.5531, + "step": 2093 + }, + { + "epoch": 0.33, + "learning_rate": 4.982354731305595e-05, + "loss": 1.5793, + "step": 2094 + }, + { + "epoch": 0.33, + "learning_rate": 4.982324120028314e-05, + "loss": 1.5814, + "step": 2095 + }, + { + "epoch": 0.33, + "learning_rate": 4.9822934823157894e-05, + "loss": 1.5527, + "step": 2096 + }, + { + "epoch": 0.33, + "learning_rate": 4.982262818168346e-05, + "loss": 1.5731, + "step": 2097 + }, + { + "epoch": 0.33, + "learning_rate": 4.98223212758631e-05, + "loss": 1.6286, + "step": 2098 + }, + { + "epoch": 0.33, + "learning_rate": 4.98220141057001e-05, + "loss": 1.5567, + "step": 2099 + }, + { + "epoch": 0.33, + "learning_rate": 4.982170667119771e-05, + "loss": 1.6187, + "step": 2100 + }, + { + "epoch": 0.33, + "learning_rate": 4.9821398972359215e-05, + "loss": 1.578, + "step": 2101 + }, + { + "epoch": 0.33, + "learning_rate": 4.9821091009187894e-05, + "loss": 1.619, + "step": 2102 + }, + { + "epoch": 0.34, + "learning_rate": 4.982078278168701e-05, + "loss": 1.5404, + "step": 2103 + }, + { + "epoch": 0.34, + "learning_rate": 4.9820474289859876e-05, + "loss": 1.6558, + "step": 2104 + }, + { + "epoch": 0.34, + "learning_rate": 4.982016553370975e-05, + "loss": 1.5744, + "step": 2105 + }, + { + "epoch": 0.34, + "learning_rate": 4.9819856513239936e-05, + "loss": 1.467, + "step": 2106 + }, + { + "epoch": 0.34, + "learning_rate": 4.981954722845371e-05, + "loss": 1.5807, + "step": 2107 + }, + { + "epoch": 0.34, + "learning_rate": 4.981923767935438e-05, + "loss": 1.5156, + "step": 2108 + }, + { + "epoch": 0.34, + "learning_rate": 4.981892786594524e-05, + "loss": 1.6139, + "step": 2109 + }, + { + "epoch": 0.34, + "learning_rate": 4.981861778822958e-05, + "loss": 1.6066, + "step": 2110 + }, + { + "epoch": 0.34, + "learning_rate": 4.981830744621071e-05, + "loss": 1.5765, + "step": 2111 + }, + { + "epoch": 0.34, + "learning_rate": 4.9817996839891937e-05, + "loss": 1.5422, + "step": 2112 + }, + { + "epoch": 0.34, + "learning_rate": 4.981768596927656e-05, + "loss": 1.5788, + "step": 2113 + }, + { + "epoch": 0.34, + "learning_rate": 4.981737483436789e-05, + "loss": 1.5347, + "step": 2114 + }, + { + "epoch": 0.34, + "learning_rate": 4.9817063435169255e-05, + "loss": 1.5776, + "step": 2115 + }, + { + "epoch": 0.34, + "learning_rate": 4.981675177168396e-05, + "loss": 1.5941, + "step": 2116 + }, + { + "epoch": 0.34, + "learning_rate": 4.981643984391532e-05, + "loss": 1.4957, + "step": 2117 + }, + { + "epoch": 0.34, + "learning_rate": 4.981612765186667e-05, + "loss": 1.5352, + "step": 2118 + }, + { + "epoch": 0.34, + "learning_rate": 4.9815815195541316e-05, + "loss": 1.5262, + "step": 2119 + }, + { + "epoch": 0.34, + "learning_rate": 4.9815502474942605e-05, + "loss": 1.5361, + "step": 2120 + }, + { + "epoch": 0.34, + "learning_rate": 4.9815189490073856e-05, + "loss": 1.6029, + "step": 2121 + }, + { + "epoch": 0.34, + "learning_rate": 4.9814876240938404e-05, + "loss": 1.5916, + "step": 2122 + }, + { + "epoch": 0.34, + "learning_rate": 4.981456272753958e-05, + "loss": 1.5916, + "step": 2123 + }, + { + "epoch": 0.34, + "learning_rate": 4.981424894988074e-05, + "loss": 1.5503, + "step": 2124 + }, + { + "epoch": 0.34, + "learning_rate": 4.9813934907965205e-05, + "loss": 1.5912, + "step": 2125 + }, + { + "epoch": 0.34, + "learning_rate": 4.981362060179633e-05, + "loss": 1.5366, + "step": 2126 + }, + { + "epoch": 0.34, + "learning_rate": 4.981330603137746e-05, + "loss": 1.5257, + "step": 2127 + }, + { + "epoch": 0.34, + "learning_rate": 4.9812991196711944e-05, + "loss": 1.5718, + "step": 2128 + }, + { + "epoch": 0.34, + "learning_rate": 4.9812676097803146e-05, + "loss": 1.5765, + "step": 2129 + }, + { + "epoch": 0.34, + "learning_rate": 4.98123607346544e-05, + "loss": 1.5392, + "step": 2130 + }, + { + "epoch": 0.34, + "learning_rate": 4.981204510726908e-05, + "loss": 1.537, + "step": 2131 + }, + { + "epoch": 0.34, + "learning_rate": 4.9811729215650545e-05, + "loss": 1.5081, + "step": 2132 + }, + { + "epoch": 0.34, + "learning_rate": 4.981141305980216e-05, + "loss": 1.5267, + "step": 2133 + }, + { + "epoch": 0.34, + "learning_rate": 4.981109663972728e-05, + "loss": 1.5234, + "step": 2134 + }, + { + "epoch": 0.34, + "learning_rate": 4.981077995542929e-05, + "loss": 1.6201, + "step": 2135 + }, + { + "epoch": 0.34, + "learning_rate": 4.981046300691156e-05, + "loss": 1.5041, + "step": 2136 + }, + { + "epoch": 0.34, + "learning_rate": 4.981014579417746e-05, + "loss": 1.5255, + "step": 2137 + }, + { + "epoch": 0.34, + "learning_rate": 4.9809828317230355e-05, + "loss": 1.5311, + "step": 2138 + }, + { + "epoch": 0.34, + "learning_rate": 4.980951057607366e-05, + "loss": 1.5295, + "step": 2139 + }, + { + "epoch": 0.34, + "learning_rate": 4.980919257071073e-05, + "loss": 1.5798, + "step": 2140 + }, + { + "epoch": 0.34, + "learning_rate": 4.9808874301144965e-05, + "loss": 1.6021, + "step": 2141 + }, + { + "epoch": 0.34, + "learning_rate": 4.980855576737975e-05, + "loss": 1.6465, + "step": 2142 + }, + { + "epoch": 0.34, + "learning_rate": 4.9808236969418475e-05, + "loss": 1.5339, + "step": 2143 + }, + { + "epoch": 0.34, + "learning_rate": 4.980791790726454e-05, + "loss": 1.5604, + "step": 2144 + }, + { + "epoch": 0.34, + "learning_rate": 4.980759858092135e-05, + "loss": 1.5262, + "step": 2145 + }, + { + "epoch": 0.34, + "learning_rate": 4.9807278990392284e-05, + "loss": 1.5754, + "step": 2146 + }, + { + "epoch": 0.34, + "learning_rate": 4.9806959135680753e-05, + "loss": 1.4919, + "step": 2147 + }, + { + "epoch": 0.34, + "learning_rate": 4.980663901679018e-05, + "loss": 1.5449, + "step": 2148 + }, + { + "epoch": 0.34, + "learning_rate": 4.9806318633723956e-05, + "loss": 1.5202, + "step": 2149 + }, + { + "epoch": 0.34, + "learning_rate": 4.98059979864855e-05, + "loss": 1.5625, + "step": 2150 + }, + { + "epoch": 0.34, + "learning_rate": 4.980567707507822e-05, + "loss": 1.6237, + "step": 2151 + }, + { + "epoch": 0.34, + "learning_rate": 4.980535589950555e-05, + "loss": 1.5526, + "step": 2152 + }, + { + "epoch": 0.34, + "learning_rate": 4.9805034459770895e-05, + "loss": 1.5381, + "step": 2153 + }, + { + "epoch": 0.34, + "learning_rate": 4.9804712755877674e-05, + "loss": 1.6294, + "step": 2154 + }, + { + "epoch": 0.34, + "learning_rate": 4.980439078782933e-05, + "loss": 1.4889, + "step": 2155 + }, + { + "epoch": 0.34, + "learning_rate": 4.980406855562928e-05, + "loss": 1.501, + "step": 2156 + }, + { + "epoch": 0.34, + "learning_rate": 4.980374605928097e-05, + "loss": 1.5512, + "step": 2157 + }, + { + "epoch": 0.34, + "learning_rate": 4.980342329878781e-05, + "loss": 1.5811, + "step": 2158 + }, + { + "epoch": 0.34, + "learning_rate": 4.980310027415325e-05, + "loss": 1.5606, + "step": 2159 + }, + { + "epoch": 0.34, + "learning_rate": 4.980277698538074e-05, + "loss": 1.5329, + "step": 2160 + }, + { + "epoch": 0.34, + "learning_rate": 4.9802453432473706e-05, + "loss": 1.6675, + "step": 2161 + }, + { + "epoch": 0.34, + "learning_rate": 4.98021296154356e-05, + "loss": 1.5697, + "step": 2162 + }, + { + "epoch": 0.34, + "learning_rate": 4.9801805534269875e-05, + "loss": 1.6119, + "step": 2163 + }, + { + "epoch": 0.34, + "learning_rate": 4.9801481188979985e-05, + "loss": 1.541, + "step": 2164 + }, + { + "epoch": 0.34, + "learning_rate": 4.9801156579569365e-05, + "loss": 1.5301, + "step": 2165 + }, + { + "epoch": 0.35, + "learning_rate": 4.980083170604149e-05, + "loss": 1.6203, + "step": 2166 + }, + { + "epoch": 0.35, + "learning_rate": 4.9800506568399814e-05, + "loss": 1.5653, + "step": 2167 + }, + { + "epoch": 0.35, + "learning_rate": 4.9800181166647796e-05, + "loss": 1.4551, + "step": 2168 + }, + { + "epoch": 0.35, + "learning_rate": 4.979985550078891e-05, + "loss": 1.6159, + "step": 2169 + }, + { + "epoch": 0.35, + "learning_rate": 4.979952957082662e-05, + "loss": 1.5435, + "step": 2170 + }, + { + "epoch": 0.35, + "learning_rate": 4.979920337676439e-05, + "loss": 1.5904, + "step": 2171 + }, + { + "epoch": 0.35, + "learning_rate": 4.979887691860571e-05, + "loss": 1.5149, + "step": 2172 + }, + { + "epoch": 0.35, + "learning_rate": 4.9798550196354046e-05, + "loss": 1.5433, + "step": 2173 + }, + { + "epoch": 0.35, + "learning_rate": 4.979822321001287e-05, + "loss": 1.5874, + "step": 2174 + }, + { + "epoch": 0.35, + "learning_rate": 4.9797895959585675e-05, + "loss": 1.5248, + "step": 2175 + }, + { + "epoch": 0.35, + "learning_rate": 4.979756844507594e-05, + "loss": 1.4982, + "step": 2176 + }, + { + "epoch": 0.35, + "learning_rate": 4.9797240666487164e-05, + "loss": 1.5879, + "step": 2177 + }, + { + "epoch": 0.35, + "learning_rate": 4.979691262382282e-05, + "loss": 1.6005, + "step": 2178 + }, + { + "epoch": 0.35, + "learning_rate": 4.979658431708642e-05, + "loss": 1.5834, + "step": 2179 + }, + { + "epoch": 0.35, + "learning_rate": 4.979625574628145e-05, + "loss": 1.5948, + "step": 2180 + }, + { + "epoch": 0.35, + "learning_rate": 4.979592691141141e-05, + "loss": 1.6069, + "step": 2181 + }, + { + "epoch": 0.35, + "learning_rate": 4.9795597812479797e-05, + "loss": 1.6394, + "step": 2182 + }, + { + "epoch": 0.35, + "learning_rate": 4.979526844949013e-05, + "loss": 1.568, + "step": 2183 + }, + { + "epoch": 0.35, + "learning_rate": 4.9794938822445895e-05, + "loss": 1.5794, + "step": 2184 + }, + { + "epoch": 0.35, + "learning_rate": 4.9794608931350626e-05, + "loss": 1.603, + "step": 2185 + }, + { + "epoch": 0.35, + "learning_rate": 4.979427877620782e-05, + "loss": 1.5549, + "step": 2186 + }, + { + "epoch": 0.35, + "learning_rate": 4.9793948357021e-05, + "loss": 1.5625, + "step": 2187 + }, + { + "epoch": 0.35, + "learning_rate": 4.979361767379368e-05, + "loss": 1.5378, + "step": 2188 + }, + { + "epoch": 0.35, + "learning_rate": 4.979328672652939e-05, + "loss": 1.5873, + "step": 2189 + }, + { + "epoch": 0.35, + "learning_rate": 4.979295551523164e-05, + "loss": 1.5616, + "step": 2190 + }, + { + "epoch": 0.35, + "learning_rate": 4.979262403990398e-05, + "loss": 1.5227, + "step": 2191 + }, + { + "epoch": 0.35, + "learning_rate": 4.9792292300549915e-05, + "loss": 1.5845, + "step": 2192 + }, + { + "epoch": 0.35, + "learning_rate": 4.979196029717299e-05, + "loss": 1.5493, + "step": 2193 + }, + { + "epoch": 0.35, + "learning_rate": 4.979162802977674e-05, + "loss": 1.5208, + "step": 2194 + }, + { + "epoch": 0.35, + "learning_rate": 4.97912954983647e-05, + "loss": 1.5741, + "step": 2195 + }, + { + "epoch": 0.35, + "learning_rate": 4.979096270294042e-05, + "loss": 1.6113, + "step": 2196 + }, + { + "epoch": 0.35, + "learning_rate": 4.9790629643507434e-05, + "loss": 1.5156, + "step": 2197 + }, + { + "epoch": 0.35, + "learning_rate": 4.9790296320069294e-05, + "loss": 1.5501, + "step": 2198 + }, + { + "epoch": 0.35, + "learning_rate": 4.9789962732629545e-05, + "loss": 1.5468, + "step": 2199 + }, + { + "epoch": 0.35, + "learning_rate": 4.978962888119175e-05, + "loss": 1.5929, + "step": 2200 + }, + { + "epoch": 0.35, + "learning_rate": 4.9789294765759456e-05, + "loss": 1.6066, + "step": 2201 + }, + { + "epoch": 0.35, + "learning_rate": 4.9788960386336224e-05, + "loss": 1.6345, + "step": 2202 + }, + { + "epoch": 0.35, + "learning_rate": 4.978862574292561e-05, + "loss": 1.5936, + "step": 2203 + }, + { + "epoch": 0.35, + "learning_rate": 4.978829083553118e-05, + "loss": 1.5669, + "step": 2204 + }, + { + "epoch": 0.35, + "learning_rate": 4.97879556641565e-05, + "loss": 1.5878, + "step": 2205 + }, + { + "epoch": 0.35, + "learning_rate": 4.978762022880515e-05, + "loss": 1.6045, + "step": 2206 + }, + { + "epoch": 0.35, + "learning_rate": 4.9787284529480695e-05, + "loss": 1.5627, + "step": 2207 + }, + { + "epoch": 0.35, + "learning_rate": 4.97869485661867e-05, + "loss": 1.5818, + "step": 2208 + }, + { + "epoch": 0.35, + "learning_rate": 4.978661233892675e-05, + "loss": 1.5547, + "step": 2209 + }, + { + "epoch": 0.35, + "learning_rate": 4.9786275847704434e-05, + "loss": 1.6066, + "step": 2210 + }, + { + "epoch": 0.35, + "learning_rate": 4.978593909252333e-05, + "loss": 1.6055, + "step": 2211 + }, + { + "epoch": 0.35, + "learning_rate": 4.9785602073387014e-05, + "loss": 1.537, + "step": 2212 + }, + { + "epoch": 0.35, + "learning_rate": 4.978526479029909e-05, + "loss": 1.5192, + "step": 2213 + }, + { + "epoch": 0.35, + "learning_rate": 4.978492724326314e-05, + "loss": 1.5433, + "step": 2214 + }, + { + "epoch": 0.35, + "learning_rate": 4.9784589432282764e-05, + "loss": 1.5702, + "step": 2215 + }, + { + "epoch": 0.35, + "learning_rate": 4.978425135736156e-05, + "loss": 1.5726, + "step": 2216 + }, + { + "epoch": 0.35, + "learning_rate": 4.978391301850312e-05, + "loss": 1.6179, + "step": 2217 + }, + { + "epoch": 0.35, + "learning_rate": 4.9783574415711056e-05, + "loss": 1.5798, + "step": 2218 + }, + { + "epoch": 0.35, + "learning_rate": 4.978323554898897e-05, + "loss": 1.541, + "step": 2219 + }, + { + "epoch": 0.35, + "learning_rate": 4.9782896418340475e-05, + "loss": 1.5649, + "step": 2220 + }, + { + "epoch": 0.35, + "learning_rate": 4.978255702376917e-05, + "loss": 1.577, + "step": 2221 + }, + { + "epoch": 0.35, + "learning_rate": 4.978221736527869e-05, + "loss": 1.5516, + "step": 2222 + }, + { + "epoch": 0.35, + "learning_rate": 4.9781877442872636e-05, + "loss": 1.6066, + "step": 2223 + }, + { + "epoch": 0.35, + "learning_rate": 4.978153725655463e-05, + "loss": 1.5246, + "step": 2224 + }, + { + "epoch": 0.35, + "learning_rate": 4.9781196806328304e-05, + "loss": 1.5728, + "step": 2225 + }, + { + "epoch": 0.35, + "learning_rate": 4.978085609219727e-05, + "loss": 1.5311, + "step": 2226 + }, + { + "epoch": 0.35, + "learning_rate": 4.978051511416517e-05, + "loss": 1.5716, + "step": 2227 + }, + { + "epoch": 0.35, + "learning_rate": 4.978017387223563e-05, + "loss": 1.6053, + "step": 2228 + }, + { + "epoch": 0.36, + "learning_rate": 4.977983236641227e-05, + "loss": 1.4961, + "step": 2229 + }, + { + "epoch": 0.36, + "learning_rate": 4.977949059669875e-05, + "loss": 1.5274, + "step": 2230 + }, + { + "epoch": 0.36, + "learning_rate": 4.97791485630987e-05, + "loss": 1.6004, + "step": 2231 + }, + { + "epoch": 0.36, + "learning_rate": 4.977880626561575e-05, + "loss": 1.6664, + "step": 2232 + }, + { + "epoch": 0.36, + "learning_rate": 4.977846370425357e-05, + "loss": 1.5544, + "step": 2233 + }, + { + "epoch": 0.36, + "learning_rate": 4.977812087901579e-05, + "loss": 1.5444, + "step": 2234 + }, + { + "epoch": 0.36, + "learning_rate": 4.9777777789906064e-05, + "loss": 1.5685, + "step": 2235 + }, + { + "epoch": 0.36, + "learning_rate": 4.977743443692805e-05, + "loss": 1.57, + "step": 2236 + }, + { + "epoch": 0.36, + "learning_rate": 4.97770908200854e-05, + "loss": 1.514, + "step": 2237 + }, + { + "epoch": 0.36, + "learning_rate": 4.977674693938178e-05, + "loss": 1.5519, + "step": 2238 + }, + { + "epoch": 0.36, + "learning_rate": 4.977640279482084e-05, + "loss": 1.5785, + "step": 2239 + }, + { + "epoch": 0.36, + "learning_rate": 4.977605838640626e-05, + "loss": 1.6035, + "step": 2240 + }, + { + "epoch": 0.36, + "learning_rate": 4.977571371414169e-05, + "loss": 1.5322, + "step": 2241 + }, + { + "epoch": 0.36, + "learning_rate": 4.9775368778030815e-05, + "loss": 1.5791, + "step": 2242 + }, + { + "epoch": 0.36, + "learning_rate": 4.977502357807731e-05, + "loss": 1.5705, + "step": 2243 + }, + { + "epoch": 0.36, + "learning_rate": 4.9774678114284846e-05, + "loss": 1.5949, + "step": 2244 + }, + { + "epoch": 0.36, + "learning_rate": 4.97743323866571e-05, + "loss": 1.5604, + "step": 2245 + }, + { + "epoch": 0.36, + "learning_rate": 4.9773986395197746e-05, + "loss": 1.5824, + "step": 2246 + }, + { + "epoch": 0.36, + "learning_rate": 4.977364013991048e-05, + "loss": 1.5218, + "step": 2247 + }, + { + "epoch": 0.36, + "learning_rate": 4.9773293620798986e-05, + "loss": 1.4997, + "step": 2248 + }, + { + "epoch": 0.36, + "learning_rate": 4.9772946837866965e-05, + "loss": 1.5633, + "step": 2249 + }, + { + "epoch": 0.36, + "learning_rate": 4.977259979111809e-05, + "loss": 1.5885, + "step": 2250 + }, + { + "epoch": 0.36, + "learning_rate": 4.977225248055607e-05, + "loss": 1.6475, + "step": 2251 + }, + { + "epoch": 0.36, + "learning_rate": 4.977190490618461e-05, + "loss": 1.6014, + "step": 2252 + }, + { + "epoch": 0.36, + "learning_rate": 4.977155706800739e-05, + "loss": 1.5143, + "step": 2253 + }, + { + "epoch": 0.36, + "learning_rate": 4.9771208966028124e-05, + "loss": 1.5299, + "step": 2254 + }, + { + "epoch": 0.36, + "learning_rate": 4.9770860600250534e-05, + "loss": 1.5545, + "step": 2255 + }, + { + "epoch": 0.36, + "learning_rate": 4.9770511970678315e-05, + "loss": 1.5182, + "step": 2256 + }, + { + "epoch": 0.36, + "learning_rate": 4.9770163077315174e-05, + "loss": 1.5697, + "step": 2257 + }, + { + "epoch": 0.36, + "learning_rate": 4.9769813920164845e-05, + "loss": 1.5593, + "step": 2258 + }, + { + "epoch": 0.36, + "learning_rate": 4.976946449923102e-05, + "loss": 1.5347, + "step": 2259 + }, + { + "epoch": 0.36, + "learning_rate": 4.976911481451745e-05, + "loss": 1.5319, + "step": 2260 + }, + { + "epoch": 0.36, + "learning_rate": 4.976876486602784e-05, + "loss": 1.5379, + "step": 2261 + }, + { + "epoch": 0.36, + "learning_rate": 4.976841465376592e-05, + "loss": 1.6384, + "step": 2262 + }, + { + "epoch": 0.36, + "learning_rate": 4.976806417773543e-05, + "loss": 1.5832, + "step": 2263 + }, + { + "epoch": 0.36, + "learning_rate": 4.9767713437940086e-05, + "loss": 1.5259, + "step": 2264 + }, + { + "epoch": 0.36, + "learning_rate": 4.976736243438363e-05, + "loss": 1.5633, + "step": 2265 + }, + { + "epoch": 0.36, + "learning_rate": 4.97670111670698e-05, + "loss": 1.5624, + "step": 2266 + }, + { + "epoch": 0.36, + "learning_rate": 4.9766659636002346e-05, + "loss": 1.5618, + "step": 2267 + }, + { + "epoch": 0.36, + "learning_rate": 4.976630784118499e-05, + "loss": 1.5594, + "step": 2268 + }, + { + "epoch": 0.36, + "learning_rate": 4.97659557826215e-05, + "loss": 1.5563, + "step": 2269 + }, + { + "epoch": 0.36, + "learning_rate": 4.9765603460315615e-05, + "loss": 1.5125, + "step": 2270 + }, + { + "epoch": 0.36, + "learning_rate": 4.9765250874271093e-05, + "loss": 1.5754, + "step": 2271 + }, + { + "epoch": 0.36, + "learning_rate": 4.976489802449168e-05, + "loss": 1.6237, + "step": 2272 + }, + { + "epoch": 0.36, + "learning_rate": 4.976454491098114e-05, + "loss": 1.5908, + "step": 2273 + }, + { + "epoch": 0.36, + "learning_rate": 4.9764191533743225e-05, + "loss": 1.5288, + "step": 2274 + }, + { + "epoch": 0.36, + "learning_rate": 4.976383789278171e-05, + "loss": 1.6018, + "step": 2275 + }, + { + "epoch": 0.36, + "learning_rate": 4.9763483988100354e-05, + "loss": 1.5085, + "step": 2276 + }, + { + "epoch": 0.36, + "learning_rate": 4.976312981970294e-05, + "loss": 1.5573, + "step": 2277 + }, + { + "epoch": 0.36, + "learning_rate": 4.9762775387593206e-05, + "loss": 1.5374, + "step": 2278 + }, + { + "epoch": 0.36, + "learning_rate": 4.9762420691774964e-05, + "loss": 1.5695, + "step": 2279 + }, + { + "epoch": 0.36, + "learning_rate": 4.9762065732251973e-05, + "loss": 1.5521, + "step": 2280 + }, + { + "epoch": 0.36, + "learning_rate": 4.9761710509028016e-05, + "loss": 1.5399, + "step": 2281 + }, + { + "epoch": 0.36, + "learning_rate": 4.976135502210687e-05, + "loss": 1.5697, + "step": 2282 + }, + { + "epoch": 0.36, + "learning_rate": 4.9760999271492326e-05, + "loss": 1.559, + "step": 2283 + }, + { + "epoch": 0.36, + "learning_rate": 4.9760643257188175e-05, + "loss": 1.5965, + "step": 2284 + }, + { + "epoch": 0.36, + "learning_rate": 4.976028697919821e-05, + "loss": 1.5412, + "step": 2285 + }, + { + "epoch": 0.36, + "learning_rate": 4.9759930437526215e-05, + "loss": 1.5816, + "step": 2286 + }, + { + "epoch": 0.36, + "learning_rate": 4.9759573632175996e-05, + "loss": 1.5549, + "step": 2287 + }, + { + "epoch": 0.36, + "learning_rate": 4.9759216563151346e-05, + "loss": 1.527, + "step": 2288 + }, + { + "epoch": 0.36, + "learning_rate": 4.9758859230456066e-05, + "loss": 1.5579, + "step": 2289 + }, + { + "epoch": 0.36, + "learning_rate": 4.975850163409398e-05, + "loss": 1.556, + "step": 2290 + }, + { + "epoch": 0.36, + "learning_rate": 4.975814377406888e-05, + "loss": 1.5864, + "step": 2291 + }, + { + "epoch": 0.37, + "learning_rate": 4.975778565038457e-05, + "loss": 1.601, + "step": 2292 + }, + { + "epoch": 0.37, + "learning_rate": 4.975742726304489e-05, + "loss": 1.6148, + "step": 2293 + }, + { + "epoch": 0.37, + "learning_rate": 4.9757068612053625e-05, + "loss": 1.6011, + "step": 2294 + }, + { + "epoch": 0.37, + "learning_rate": 4.9756709697414615e-05, + "loss": 1.596, + "step": 2295 + }, + { + "epoch": 0.37, + "learning_rate": 4.975635051913168e-05, + "loss": 1.5, + "step": 2296 + }, + { + "epoch": 0.37, + "learning_rate": 4.9755991077208644e-05, + "loss": 1.59, + "step": 2297 + }, + { + "epoch": 0.37, + "learning_rate": 4.9755631371649326e-05, + "loss": 1.6227, + "step": 2298 + }, + { + "epoch": 0.37, + "learning_rate": 4.975527140245756e-05, + "loss": 1.5291, + "step": 2299 + }, + { + "epoch": 0.37, + "learning_rate": 4.975491116963719e-05, + "loss": 1.543, + "step": 2300 + }, + { + "epoch": 0.37, + "learning_rate": 4.975455067319205e-05, + "loss": 1.5407, + "step": 2301 + }, + { + "epoch": 0.37, + "learning_rate": 4.975418991312596e-05, + "loss": 1.5277, + "step": 2302 + }, + { + "epoch": 0.37, + "learning_rate": 4.9753828889442784e-05, + "loss": 1.5042, + "step": 2303 + }, + { + "epoch": 0.37, + "learning_rate": 4.975346760214636e-05, + "loss": 1.5444, + "step": 2304 + }, + { + "epoch": 0.37, + "learning_rate": 4.9753106051240526e-05, + "loss": 1.5057, + "step": 2305 + }, + { + "epoch": 0.37, + "learning_rate": 4.975274423672916e-05, + "loss": 1.5557, + "step": 2306 + }, + { + "epoch": 0.37, + "learning_rate": 4.975238215861607e-05, + "loss": 1.5247, + "step": 2307 + }, + { + "epoch": 0.37, + "learning_rate": 4.975201981690516e-05, + "loss": 1.5848, + "step": 2308 + }, + { + "epoch": 0.37, + "learning_rate": 4.975165721160026e-05, + "loss": 1.5947, + "step": 2309 + }, + { + "epoch": 0.37, + "learning_rate": 4.9751294342705224e-05, + "loss": 1.5469, + "step": 2310 + }, + { + "epoch": 0.37, + "learning_rate": 4.975093121022394e-05, + "loss": 1.5252, + "step": 2311 + }, + { + "epoch": 0.37, + "learning_rate": 4.9750567814160263e-05, + "loss": 1.5535, + "step": 2312 + }, + { + "epoch": 0.37, + "learning_rate": 4.9750204154518076e-05, + "loss": 1.5677, + "step": 2313 + }, + { + "epoch": 0.37, + "learning_rate": 4.974984023130123e-05, + "loss": 1.6056, + "step": 2314 + }, + { + "epoch": 0.37, + "learning_rate": 4.974947604451362e-05, + "loss": 1.5993, + "step": 2315 + }, + { + "epoch": 0.37, + "learning_rate": 4.9749111594159106e-05, + "loss": 1.5487, + "step": 2316 + }, + { + "epoch": 0.37, + "learning_rate": 4.974874688024158e-05, + "loss": 1.5674, + "step": 2317 + }, + { + "epoch": 0.37, + "learning_rate": 4.974838190276493e-05, + "loss": 1.4691, + "step": 2318 + }, + { + "epoch": 0.37, + "learning_rate": 4.974801666173304e-05, + "loss": 1.5744, + "step": 2319 + }, + { + "epoch": 0.37, + "learning_rate": 4.974765115714979e-05, + "loss": 1.5562, + "step": 2320 + }, + { + "epoch": 0.37, + "learning_rate": 4.974728538901908e-05, + "loss": 1.5064, + "step": 2321 + }, + { + "epoch": 0.37, + "learning_rate": 4.974691935734481e-05, + "loss": 1.506, + "step": 2322 + }, + { + "epoch": 0.37, + "learning_rate": 4.974655306213088e-05, + "loss": 1.5138, + "step": 2323 + }, + { + "epoch": 0.37, + "learning_rate": 4.9746186503381174e-05, + "loss": 1.5907, + "step": 2324 + }, + { + "epoch": 0.37, + "learning_rate": 4.9745819681099606e-05, + "loss": 1.5527, + "step": 2325 + }, + { + "epoch": 0.37, + "learning_rate": 4.974545259529008e-05, + "loss": 1.5682, + "step": 2326 + }, + { + "epoch": 0.37, + "learning_rate": 4.974508524595651e-05, + "loss": 1.533, + "step": 2327 + }, + { + "epoch": 0.37, + "learning_rate": 4.974471763310281e-05, + "loss": 1.5138, + "step": 2328 + }, + { + "epoch": 0.37, + "learning_rate": 4.9744349756732884e-05, + "loss": 1.5811, + "step": 2329 + }, + { + "epoch": 0.37, + "learning_rate": 4.974398161685066e-05, + "loss": 1.5772, + "step": 2330 + }, + { + "epoch": 0.37, + "learning_rate": 4.974361321346005e-05, + "loss": 1.6348, + "step": 2331 + }, + { + "epoch": 0.37, + "learning_rate": 4.974324454656498e-05, + "loss": 1.5257, + "step": 2332 + }, + { + "epoch": 0.37, + "learning_rate": 4.974287561616938e-05, + "loss": 1.536, + "step": 2333 + }, + { + "epoch": 0.37, + "learning_rate": 4.974250642227718e-05, + "loss": 1.6019, + "step": 2334 + }, + { + "epoch": 0.37, + "learning_rate": 4.974213696489231e-05, + "loss": 1.5726, + "step": 2335 + }, + { + "epoch": 0.37, + "learning_rate": 4.974176724401869e-05, + "loss": 1.5412, + "step": 2336 + }, + { + "epoch": 0.37, + "learning_rate": 4.9741397259660284e-05, + "loss": 1.6188, + "step": 2337 + }, + { + "epoch": 0.37, + "learning_rate": 4.9741027011821006e-05, + "loss": 1.5739, + "step": 2338 + }, + { + "epoch": 0.37, + "learning_rate": 4.9740656500504826e-05, + "loss": 1.57, + "step": 2339 + }, + { + "epoch": 0.37, + "learning_rate": 4.9740285725715665e-05, + "loss": 1.5506, + "step": 2340 + }, + { + "epoch": 0.37, + "learning_rate": 4.9739914687457476e-05, + "loss": 1.5475, + "step": 2341 + }, + { + "epoch": 0.37, + "learning_rate": 4.973954338573422e-05, + "loss": 1.5796, + "step": 2342 + }, + { + "epoch": 0.37, + "learning_rate": 4.9739171820549846e-05, + "loss": 1.6305, + "step": 2343 + }, + { + "epoch": 0.37, + "learning_rate": 4.9738799991908313e-05, + "loss": 1.5718, + "step": 2344 + }, + { + "epoch": 0.37, + "learning_rate": 4.973842789981358e-05, + "loss": 1.4912, + "step": 2345 + }, + { + "epoch": 0.37, + "learning_rate": 4.9738055544269614e-05, + "loss": 1.4889, + "step": 2346 + }, + { + "epoch": 0.37, + "learning_rate": 4.9737682925280374e-05, + "loss": 1.5925, + "step": 2347 + }, + { + "epoch": 0.37, + "learning_rate": 4.9737310042849814e-05, + "loss": 1.5695, + "step": 2348 + }, + { + "epoch": 0.37, + "learning_rate": 4.973693689698194e-05, + "loss": 1.5226, + "step": 2349 + }, + { + "epoch": 0.37, + "learning_rate": 4.9736563487680696e-05, + "loss": 1.5153, + "step": 2350 + }, + { + "epoch": 0.37, + "learning_rate": 4.973618981495007e-05, + "loss": 1.5796, + "step": 2351 + }, + { + "epoch": 0.37, + "learning_rate": 4.973581587879405e-05, + "loss": 1.6146, + "step": 2352 + }, + { + "epoch": 0.37, + "learning_rate": 4.9735441679216595e-05, + "loss": 1.5235, + "step": 2353 + }, + { + "epoch": 0.38, + "learning_rate": 4.973506721622171e-05, + "loss": 1.55, + "step": 2354 + }, + { + "epoch": 0.38, + "learning_rate": 4.973469248981337e-05, + "loss": 1.5789, + "step": 2355 + }, + { + "epoch": 0.38, + "learning_rate": 4.9734317499995575e-05, + "loss": 1.5204, + "step": 2356 + }, + { + "epoch": 0.38, + "learning_rate": 4.973394224677231e-05, + "loss": 1.5137, + "step": 2357 + }, + { + "epoch": 0.38, + "learning_rate": 4.973356673014759e-05, + "loss": 1.6133, + "step": 2358 + }, + { + "epoch": 0.38, + "learning_rate": 4.9733190950125394e-05, + "loss": 1.5347, + "step": 2359 + }, + { + "epoch": 0.38, + "learning_rate": 4.973281490670973e-05, + "loss": 1.4959, + "step": 2360 + }, + { + "epoch": 0.38, + "learning_rate": 4.9732438599904596e-05, + "loss": 1.5702, + "step": 2361 + }, + { + "epoch": 0.38, + "learning_rate": 4.973206202971402e-05, + "loss": 1.6179, + "step": 2362 + }, + { + "epoch": 0.38, + "learning_rate": 4.973168519614199e-05, + "loss": 1.5719, + "step": 2363 + }, + { + "epoch": 0.38, + "learning_rate": 4.973130809919252e-05, + "loss": 1.6253, + "step": 2364 + }, + { + "epoch": 0.38, + "learning_rate": 4.973093073886965e-05, + "loss": 1.5916, + "step": 2365 + }, + { + "epoch": 0.38, + "learning_rate": 4.973055311517737e-05, + "loss": 1.5303, + "step": 2366 + }, + { + "epoch": 0.38, + "learning_rate": 4.973017522811973e-05, + "loss": 1.5843, + "step": 2367 + }, + { + "epoch": 0.38, + "learning_rate": 4.9729797077700715e-05, + "loss": 1.5515, + "step": 2368 + }, + { + "epoch": 0.38, + "learning_rate": 4.972941866392439e-05, + "loss": 1.5278, + "step": 2369 + }, + { + "epoch": 0.38, + "learning_rate": 4.9729039986794764e-05, + "loss": 1.5744, + "step": 2370 + }, + { + "epoch": 0.38, + "learning_rate": 4.972866104631588e-05, + "loss": 1.5728, + "step": 2371 + }, + { + "epoch": 0.38, + "learning_rate": 4.9728281842491764e-05, + "loss": 1.5114, + "step": 2372 + }, + { + "epoch": 0.38, + "learning_rate": 4.972790237532646e-05, + "loss": 1.5326, + "step": 2373 + }, + { + "epoch": 0.38, + "learning_rate": 4.972752264482401e-05, + "loss": 1.5462, + "step": 2374 + }, + { + "epoch": 0.38, + "learning_rate": 4.9727142650988454e-05, + "loss": 1.4865, + "step": 2375 + }, + { + "epoch": 0.38, + "learning_rate": 4.9726762393823845e-05, + "loss": 1.5383, + "step": 2376 + }, + { + "epoch": 0.38, + "learning_rate": 4.972638187333423e-05, + "loss": 1.5754, + "step": 2377 + }, + { + "epoch": 0.38, + "learning_rate": 4.972600108952366e-05, + "loss": 1.5539, + "step": 2378 + }, + { + "epoch": 0.38, + "learning_rate": 4.972562004239618e-05, + "loss": 1.6265, + "step": 2379 + }, + { + "epoch": 0.38, + "learning_rate": 4.972523873195586e-05, + "loss": 1.5829, + "step": 2380 + }, + { + "epoch": 0.38, + "learning_rate": 4.972485715820676e-05, + "loss": 1.5877, + "step": 2381 + }, + { + "epoch": 0.38, + "learning_rate": 4.9724475321152955e-05, + "loss": 1.5125, + "step": 2382 + }, + { + "epoch": 0.38, + "learning_rate": 4.9724093220798485e-05, + "loss": 1.5812, + "step": 2383 + }, + { + "epoch": 0.38, + "learning_rate": 4.972371085714743e-05, + "loss": 1.5539, + "step": 2384 + }, + { + "epoch": 0.38, + "learning_rate": 4.972332823020387e-05, + "loss": 1.4987, + "step": 2385 + }, + { + "epoch": 0.38, + "learning_rate": 4.972294533997188e-05, + "loss": 1.4867, + "step": 2386 + }, + { + "epoch": 0.38, + "learning_rate": 4.9722562186455526e-05, + "loss": 1.4632, + "step": 2387 + }, + { + "epoch": 0.38, + "learning_rate": 4.972217876965889e-05, + "loss": 1.5985, + "step": 2388 + }, + { + "epoch": 0.38, + "learning_rate": 4.972179508958606e-05, + "loss": 1.5479, + "step": 2389 + }, + { + "epoch": 0.38, + "learning_rate": 4.972141114624113e-05, + "loss": 1.5622, + "step": 2390 + }, + { + "epoch": 0.38, + "learning_rate": 4.972102693962818e-05, + "loss": 1.5619, + "step": 2391 + }, + { + "epoch": 0.38, + "learning_rate": 4.9720642469751294e-05, + "loss": 1.5367, + "step": 2392 + }, + { + "epoch": 0.38, + "learning_rate": 4.9720257736614574e-05, + "loss": 1.5426, + "step": 2393 + }, + { + "epoch": 0.38, + "learning_rate": 4.9719872740222114e-05, + "loss": 1.5705, + "step": 2394 + }, + { + "epoch": 0.38, + "learning_rate": 4.971948748057803e-05, + "loss": 1.5929, + "step": 2395 + }, + { + "epoch": 0.38, + "learning_rate": 4.97191019576864e-05, + "loss": 1.5687, + "step": 2396 + }, + { + "epoch": 0.38, + "learning_rate": 4.971871617155135e-05, + "loss": 1.6174, + "step": 2397 + }, + { + "epoch": 0.38, + "learning_rate": 4.971833012217698e-05, + "loss": 1.5633, + "step": 2398 + }, + { + "epoch": 0.38, + "learning_rate": 4.971794380956739e-05, + "loss": 1.6029, + "step": 2399 + }, + { + "epoch": 0.38, + "learning_rate": 4.9717557233726716e-05, + "loss": 1.6021, + "step": 2400 + }, + { + "epoch": 0.38, + "learning_rate": 4.971717039465906e-05, + "loss": 1.5304, + "step": 2401 + }, + { + "epoch": 0.38, + "learning_rate": 4.9716783292368545e-05, + "loss": 1.516, + "step": 2402 + }, + { + "epoch": 0.38, + "learning_rate": 4.9716395926859294e-05, + "loss": 1.5916, + "step": 2403 + }, + { + "epoch": 0.38, + "learning_rate": 4.971600829813543e-05, + "loss": 1.5187, + "step": 2404 + }, + { + "epoch": 0.38, + "learning_rate": 4.971562040620109e-05, + "loss": 1.5636, + "step": 2405 + }, + { + "epoch": 0.38, + "learning_rate": 4.97152322510604e-05, + "loss": 1.4987, + "step": 2406 + }, + { + "epoch": 0.38, + "learning_rate": 4.971484383271748e-05, + "loss": 1.6297, + "step": 2407 + }, + { + "epoch": 0.38, + "learning_rate": 4.971445515117648e-05, + "loss": 1.5553, + "step": 2408 + }, + { + "epoch": 0.38, + "learning_rate": 4.971406620644154e-05, + "loss": 1.495, + "step": 2409 + }, + { + "epoch": 0.38, + "learning_rate": 4.97136769985168e-05, + "loss": 1.527, + "step": 2410 + }, + { + "epoch": 0.38, + "learning_rate": 4.971328752740641e-05, + "loss": 1.5539, + "step": 2411 + }, + { + "epoch": 0.38, + "learning_rate": 4.97128977931145e-05, + "loss": 1.5233, + "step": 2412 + }, + { + "epoch": 0.38, + "learning_rate": 4.9712507795645235e-05, + "loss": 1.519, + "step": 2413 + }, + { + "epoch": 0.38, + "learning_rate": 4.971211753500277e-05, + "loss": 1.648, + "step": 2414 + }, + { + "epoch": 0.38, + "learning_rate": 4.971172701119125e-05, + "loss": 1.4483, + "step": 2415 + }, + { + "epoch": 0.38, + "learning_rate": 4.9711336224214844e-05, + "loss": 1.5173, + "step": 2416 + }, + { + "epoch": 0.39, + "learning_rate": 4.971094517407771e-05, + "loss": 1.4766, + "step": 2417 + }, + { + "epoch": 0.39, + "learning_rate": 4.971055386078401e-05, + "loss": 1.5622, + "step": 2418 + }, + { + "epoch": 0.39, + "learning_rate": 4.9710162284337915e-05, + "loss": 1.6351, + "step": 2419 + }, + { + "epoch": 0.39, + "learning_rate": 4.9709770444743595e-05, + "loss": 1.5147, + "step": 2420 + }, + { + "epoch": 0.39, + "learning_rate": 4.970937834200522e-05, + "loss": 1.4544, + "step": 2421 + }, + { + "epoch": 0.39, + "learning_rate": 4.970898597612696e-05, + "loss": 1.5996, + "step": 2422 + }, + { + "epoch": 0.39, + "learning_rate": 4.970859334711301e-05, + "loss": 1.5778, + "step": 2423 + }, + { + "epoch": 0.39, + "learning_rate": 4.9708200454967545e-05, + "loss": 1.5379, + "step": 2424 + }, + { + "epoch": 0.39, + "learning_rate": 4.970780729969474e-05, + "loss": 1.5129, + "step": 2425 + }, + { + "epoch": 0.39, + "learning_rate": 4.970741388129879e-05, + "loss": 1.5441, + "step": 2426 + }, + { + "epoch": 0.39, + "learning_rate": 4.970702019978388e-05, + "loss": 1.4994, + "step": 2427 + }, + { + "epoch": 0.39, + "learning_rate": 4.97066262551542e-05, + "loss": 1.537, + "step": 2428 + }, + { + "epoch": 0.39, + "learning_rate": 4.970623204741396e-05, + "loss": 1.5853, + "step": 2429 + }, + { + "epoch": 0.39, + "learning_rate": 4.970583757656734e-05, + "loss": 1.5781, + "step": 2430 + }, + { + "epoch": 0.39, + "learning_rate": 4.970544284261855e-05, + "loss": 1.6284, + "step": 2431 + }, + { + "epoch": 0.39, + "learning_rate": 4.97050478455718e-05, + "loss": 1.57, + "step": 2432 + }, + { + "epoch": 0.39, + "learning_rate": 4.970465258543129e-05, + "loss": 1.5607, + "step": 2433 + }, + { + "epoch": 0.39, + "learning_rate": 4.970425706220122e-05, + "loss": 1.5319, + "step": 2434 + }, + { + "epoch": 0.39, + "learning_rate": 4.970386127588582e-05, + "loss": 1.5521, + "step": 2435 + }, + { + "epoch": 0.39, + "learning_rate": 4.9703465226489285e-05, + "loss": 1.5651, + "step": 2436 + }, + { + "epoch": 0.39, + "learning_rate": 4.9703068914015846e-05, + "loss": 1.5501, + "step": 2437 + }, + { + "epoch": 0.39, + "learning_rate": 4.970267233846972e-05, + "loss": 1.5918, + "step": 2438 + }, + { + "epoch": 0.39, + "learning_rate": 4.970227549985514e-05, + "loss": 1.5633, + "step": 2439 + }, + { + "epoch": 0.39, + "learning_rate": 4.9701878398176317e-05, + "loss": 1.6304, + "step": 2440 + }, + { + "epoch": 0.39, + "learning_rate": 4.970148103343748e-05, + "loss": 1.5487, + "step": 2441 + }, + { + "epoch": 0.39, + "learning_rate": 4.970108340564288e-05, + "loss": 1.5392, + "step": 2442 + }, + { + "epoch": 0.39, + "learning_rate": 4.9700685514796724e-05, + "loss": 1.5716, + "step": 2443 + }, + { + "epoch": 0.39, + "learning_rate": 4.970028736090328e-05, + "loss": 1.53, + "step": 2444 + }, + { + "epoch": 0.39, + "learning_rate": 4.9699888943966756e-05, + "loss": 1.513, + "step": 2445 + }, + { + "epoch": 0.39, + "learning_rate": 4.969949026399141e-05, + "loss": 1.5443, + "step": 2446 + }, + { + "epoch": 0.39, + "learning_rate": 4.96990913209815e-05, + "loss": 1.543, + "step": 2447 + }, + { + "epoch": 0.39, + "learning_rate": 4.969869211494125e-05, + "loss": 1.5335, + "step": 2448 + }, + { + "epoch": 0.39, + "learning_rate": 4.969829264587493e-05, + "loss": 1.5239, + "step": 2449 + }, + { + "epoch": 0.39, + "learning_rate": 4.969789291378679e-05, + "loss": 1.582, + "step": 2450 + }, + { + "epoch": 0.39, + "learning_rate": 4.969749291868108e-05, + "loss": 1.5446, + "step": 2451 + }, + { + "epoch": 0.39, + "learning_rate": 4.969709266056207e-05, + "loss": 1.6165, + "step": 2452 + }, + { + "epoch": 0.39, + "learning_rate": 4.9696692139434016e-05, + "loss": 1.6323, + "step": 2453 + }, + { + "epoch": 0.39, + "learning_rate": 4.9696291355301186e-05, + "loss": 1.5448, + "step": 2454 + }, + { + "epoch": 0.39, + "learning_rate": 4.9695890308167844e-05, + "loss": 1.5296, + "step": 2455 + }, + { + "epoch": 0.39, + "learning_rate": 4.969548899803826e-05, + "loss": 1.5383, + "step": 2456 + }, + { + "epoch": 0.39, + "learning_rate": 4.9695087424916716e-05, + "loss": 1.5138, + "step": 2457 + }, + { + "epoch": 0.39, + "learning_rate": 4.9694685588807475e-05, + "loss": 1.512, + "step": 2458 + }, + { + "epoch": 0.39, + "learning_rate": 4.969428348971484e-05, + "loss": 1.5474, + "step": 2459 + }, + { + "epoch": 0.39, + "learning_rate": 4.969388112764307e-05, + "loss": 1.5933, + "step": 2460 + }, + { + "epoch": 0.39, + "learning_rate": 4.969347850259646e-05, + "loss": 1.5619, + "step": 2461 + }, + { + "epoch": 0.39, + "learning_rate": 4.969307561457929e-05, + "loss": 1.5431, + "step": 2462 + }, + { + "epoch": 0.39, + "learning_rate": 4.969267246359586e-05, + "loss": 1.5352, + "step": 2463 + }, + { + "epoch": 0.39, + "learning_rate": 4.969226904965046e-05, + "loss": 1.5853, + "step": 2464 + }, + { + "epoch": 0.39, + "learning_rate": 4.969186537274738e-05, + "loss": 1.4808, + "step": 2465 + }, + { + "epoch": 0.39, + "learning_rate": 4.9691461432890936e-05, + "loss": 1.5063, + "step": 2466 + }, + { + "epoch": 0.39, + "learning_rate": 4.969105723008541e-05, + "loss": 1.6042, + "step": 2467 + }, + { + "epoch": 0.39, + "learning_rate": 4.969065276433512e-05, + "loss": 1.5179, + "step": 2468 + }, + { + "epoch": 0.39, + "learning_rate": 4.969024803564436e-05, + "loss": 1.6146, + "step": 2469 + }, + { + "epoch": 0.39, + "learning_rate": 4.968984304401746e-05, + "loss": 1.5643, + "step": 2470 + }, + { + "epoch": 0.39, + "learning_rate": 4.9689437789458715e-05, + "loss": 1.5147, + "step": 2471 + }, + { + "epoch": 0.39, + "learning_rate": 4.9689032271972446e-05, + "loss": 1.5407, + "step": 2472 + }, + { + "epoch": 0.39, + "learning_rate": 4.9688626491562976e-05, + "loss": 1.655, + "step": 2473 + }, + { + "epoch": 0.39, + "learning_rate": 4.9688220448234614e-05, + "loss": 1.5907, + "step": 2474 + }, + { + "epoch": 0.39, + "learning_rate": 4.96878141419917e-05, + "loss": 1.5579, + "step": 2475 + }, + { + "epoch": 0.39, + "learning_rate": 4.968740757283855e-05, + "loss": 1.5588, + "step": 2476 + }, + { + "epoch": 0.39, + "learning_rate": 4.96870007407795e-05, + "loss": 1.5329, + "step": 2477 + }, + { + "epoch": 0.39, + "learning_rate": 4.968659364581888e-05, + "loss": 1.5864, + "step": 2478 + }, + { + "epoch": 0.39, + "learning_rate": 4.968618628796102e-05, + "loss": 1.6384, + "step": 2479 + }, + { + "epoch": 0.4, + "learning_rate": 4.968577866721027e-05, + "loss": 1.5041, + "step": 2480 + }, + { + "epoch": 0.4, + "learning_rate": 4.9685370783570964e-05, + "loss": 1.5768, + "step": 2481 + }, + { + "epoch": 0.4, + "learning_rate": 4.968496263704744e-05, + "loss": 1.5925, + "step": 2482 + }, + { + "epoch": 0.4, + "learning_rate": 4.968455422764406e-05, + "loss": 1.5578, + "step": 2483 + }, + { + "epoch": 0.4, + "learning_rate": 4.968414555536516e-05, + "loss": 1.5674, + "step": 2484 + }, + { + "epoch": 0.4, + "learning_rate": 4.968373662021509e-05, + "loss": 1.5565, + "step": 2485 + }, + { + "epoch": 0.4, + "learning_rate": 4.9683327422198226e-05, + "loss": 1.5433, + "step": 2486 + }, + { + "epoch": 0.4, + "learning_rate": 4.968291796131889e-05, + "loss": 1.5814, + "step": 2487 + }, + { + "epoch": 0.4, + "learning_rate": 4.968250823758148e-05, + "loss": 1.5145, + "step": 2488 + }, + { + "epoch": 0.4, + "learning_rate": 4.968209825099033e-05, + "loss": 1.6273, + "step": 2489 + }, + { + "epoch": 0.4, + "learning_rate": 4.968168800154982e-05, + "loss": 1.561, + "step": 2490 + }, + { + "epoch": 0.4, + "learning_rate": 4.968127748926433e-05, + "loss": 1.652, + "step": 2491 + }, + { + "epoch": 0.4, + "learning_rate": 4.968086671413821e-05, + "loss": 1.5985, + "step": 2492 + }, + { + "epoch": 0.4, + "learning_rate": 4.968045567617584e-05, + "loss": 1.5698, + "step": 2493 + }, + { + "epoch": 0.4, + "learning_rate": 4.968004437538161e-05, + "loss": 1.5443, + "step": 2494 + }, + { + "epoch": 0.4, + "learning_rate": 4.967963281175988e-05, + "loss": 1.5584, + "step": 2495 + }, + { + "epoch": 0.4, + "learning_rate": 4.967922098531506e-05, + "loss": 1.4725, + "step": 2496 + }, + { + "epoch": 0.4, + "learning_rate": 4.96788088960515e-05, + "loss": 1.5832, + "step": 2497 + }, + { + "epoch": 0.4, + "learning_rate": 4.967839654397362e-05, + "loss": 1.5933, + "step": 2498 + }, + { + "epoch": 0.4, + "learning_rate": 4.9677983929085796e-05, + "loss": 1.6271, + "step": 2499 + }, + { + "epoch": 0.4, + "learning_rate": 4.9677571051392424e-05, + "loss": 1.4832, + "step": 2500 + }, + { + "epoch": 0.4, + "learning_rate": 4.967715791089791e-05, + "loss": 1.5286, + "step": 2501 + }, + { + "epoch": 0.4, + "learning_rate": 4.9676744507606644e-05, + "loss": 1.5, + "step": 2502 + }, + { + "epoch": 0.4, + "learning_rate": 4.9676330841523024e-05, + "loss": 1.5472, + "step": 2503 + }, + { + "epoch": 0.4, + "learning_rate": 4.9675916912651465e-05, + "loss": 1.5264, + "step": 2504 + }, + { + "epoch": 0.4, + "learning_rate": 4.967550272099638e-05, + "loss": 1.6138, + "step": 2505 + }, + { + "epoch": 0.4, + "learning_rate": 4.967508826656216e-05, + "loss": 1.5308, + "step": 2506 + }, + { + "epoch": 0.4, + "learning_rate": 4.967467354935323e-05, + "loss": 1.5925, + "step": 2507 + }, + { + "epoch": 0.4, + "learning_rate": 4.9674258569374014e-05, + "loss": 1.5197, + "step": 2508 + }, + { + "epoch": 0.4, + "learning_rate": 4.9673843326628925e-05, + "loss": 1.5871, + "step": 2509 + }, + { + "epoch": 0.4, + "learning_rate": 4.9673427821122384e-05, + "loss": 1.5383, + "step": 2510 + }, + { + "epoch": 0.4, + "learning_rate": 4.9673012052858816e-05, + "loss": 1.5972, + "step": 2511 + }, + { + "epoch": 0.4, + "learning_rate": 4.967259602184264e-05, + "loss": 1.5739, + "step": 2512 + }, + { + "epoch": 0.4, + "learning_rate": 4.967217972807831e-05, + "loss": 1.576, + "step": 2513 + }, + { + "epoch": 0.4, + "learning_rate": 4.967176317157024e-05, + "loss": 1.5978, + "step": 2514 + }, + { + "epoch": 0.4, + "learning_rate": 4.967134635232286e-05, + "loss": 1.5498, + "step": 2515 + }, + { + "epoch": 0.4, + "learning_rate": 4.9670929270340626e-05, + "loss": 1.6174, + "step": 2516 + }, + { + "epoch": 0.4, + "learning_rate": 4.9670511925627984e-05, + "loss": 1.5547, + "step": 2517 + }, + { + "epoch": 0.4, + "learning_rate": 4.967009431818935e-05, + "loss": 1.5645, + "step": 2518 + }, + { + "epoch": 0.4, + "learning_rate": 4.96696764480292e-05, + "loss": 1.5641, + "step": 2519 + }, + { + "epoch": 0.4, + "learning_rate": 4.966925831515198e-05, + "loss": 1.6047, + "step": 2520 + }, + { + "epoch": 0.4, + "learning_rate": 4.966883991956212e-05, + "loss": 1.5778, + "step": 2521 + }, + { + "epoch": 0.4, + "learning_rate": 4.96684212612641e-05, + "loss": 1.4686, + "step": 2522 + }, + { + "epoch": 0.4, + "learning_rate": 4.966800234026237e-05, + "loss": 1.6092, + "step": 2523 + }, + { + "epoch": 0.4, + "learning_rate": 4.9667583156561395e-05, + "loss": 1.5682, + "step": 2524 + }, + { + "epoch": 0.4, + "learning_rate": 4.9667163710165626e-05, + "loss": 1.5702, + "step": 2525 + }, + { + "epoch": 0.4, + "learning_rate": 4.9666744001079544e-05, + "loss": 1.554, + "step": 2526 + }, + { + "epoch": 0.4, + "learning_rate": 4.966632402930761e-05, + "loss": 1.6021, + "step": 2527 + }, + { + "epoch": 0.4, + "learning_rate": 4.9665903794854305e-05, + "loss": 1.6117, + "step": 2528 + }, + { + "epoch": 0.4, + "learning_rate": 4.96654832977241e-05, + "loss": 1.5103, + "step": 2529 + }, + { + "epoch": 0.4, + "learning_rate": 4.966506253792148e-05, + "loss": 1.4199, + "step": 2530 + }, + { + "epoch": 0.4, + "learning_rate": 4.9664641515450907e-05, + "loss": 1.5194, + "step": 2531 + }, + { + "epoch": 0.4, + "learning_rate": 4.9664220230316884e-05, + "loss": 1.523, + "step": 2532 + }, + { + "epoch": 0.4, + "learning_rate": 4.966379868252388e-05, + "loss": 1.6364, + "step": 2533 + }, + { + "epoch": 0.4, + "learning_rate": 4.96633768720764e-05, + "loss": 1.5907, + "step": 2534 + }, + { + "epoch": 0.4, + "learning_rate": 4.966295479897893e-05, + "loss": 1.5555, + "step": 2535 + }, + { + "epoch": 0.4, + "learning_rate": 4.9662532463235965e-05, + "loss": 1.5571, + "step": 2536 + }, + { + "epoch": 0.4, + "learning_rate": 4.9662109864852004e-05, + "loss": 1.5513, + "step": 2537 + }, + { + "epoch": 0.4, + "learning_rate": 4.9661687003831534e-05, + "loss": 1.6139, + "step": 2538 + }, + { + "epoch": 0.4, + "learning_rate": 4.9661263880179085e-05, + "loss": 1.4961, + "step": 2539 + }, + { + "epoch": 0.4, + "learning_rate": 4.966084049389914e-05, + "loss": 1.481, + "step": 2540 + }, + { + "epoch": 0.4, + "learning_rate": 4.966041684499621e-05, + "loss": 1.5234, + "step": 2541 + }, + { + "epoch": 0.4, + "learning_rate": 4.965999293347482e-05, + "loss": 1.5703, + "step": 2542 + }, + { + "epoch": 0.41, + "learning_rate": 4.965956875933948e-05, + "loss": 1.5409, + "step": 2543 + }, + { + "epoch": 0.41, + "learning_rate": 4.9659144322594694e-05, + "loss": 1.4945, + "step": 2544 + }, + { + "epoch": 0.41, + "learning_rate": 4.9658719623245e-05, + "loss": 1.5664, + "step": 2545 + }, + { + "epoch": 0.41, + "learning_rate": 4.9658294661294914e-05, + "loss": 1.5415, + "step": 2546 + }, + { + "epoch": 0.41, + "learning_rate": 4.965786943674896e-05, + "loss": 1.5202, + "step": 2547 + }, + { + "epoch": 0.41, + "learning_rate": 4.965744394961166e-05, + "loss": 1.6548, + "step": 2548 + }, + { + "epoch": 0.41, + "learning_rate": 4.965701819988755e-05, + "loss": 1.533, + "step": 2549 + }, + { + "epoch": 0.41, + "learning_rate": 4.965659218758117e-05, + "loss": 1.5178, + "step": 2550 + }, + { + "epoch": 0.41, + "learning_rate": 4.9656165912697054e-05, + "loss": 1.5425, + "step": 2551 + }, + { + "epoch": 0.41, + "learning_rate": 4.965573937523974e-05, + "loss": 1.5734, + "step": 2552 + }, + { + "epoch": 0.41, + "learning_rate": 4.965531257521378e-05, + "loss": 1.6209, + "step": 2553 + }, + { + "epoch": 0.41, + "learning_rate": 4.965488551262369e-05, + "loss": 1.5594, + "step": 2554 + }, + { + "epoch": 0.41, + "learning_rate": 4.965445818747405e-05, + "loss": 1.5348, + "step": 2555 + }, + { + "epoch": 0.41, + "learning_rate": 4.96540305997694e-05, + "loss": 1.5527, + "step": 2556 + }, + { + "epoch": 0.41, + "learning_rate": 4.965360274951428e-05, + "loss": 1.5726, + "step": 2557 + }, + { + "epoch": 0.41, + "learning_rate": 4.9653174636713274e-05, + "loss": 1.5786, + "step": 2558 + }, + { + "epoch": 0.41, + "learning_rate": 4.9652746261370914e-05, + "loss": 1.5417, + "step": 2559 + }, + { + "epoch": 0.41, + "learning_rate": 4.9652317623491776e-05, + "loss": 1.5667, + "step": 2560 + }, + { + "epoch": 0.41, + "learning_rate": 4.965188872308042e-05, + "loss": 1.4981, + "step": 2561 + }, + { + "epoch": 0.41, + "learning_rate": 4.965145956014142e-05, + "loss": 1.4808, + "step": 2562 + }, + { + "epoch": 0.41, + "learning_rate": 4.965103013467934e-05, + "loss": 1.5615, + "step": 2563 + }, + { + "epoch": 0.41, + "learning_rate": 4.965060044669876e-05, + "loss": 1.5158, + "step": 2564 + }, + { + "epoch": 0.41, + "learning_rate": 4.965017049620424e-05, + "loss": 1.5879, + "step": 2565 + }, + { + "epoch": 0.41, + "learning_rate": 4.964974028320038e-05, + "loss": 1.523, + "step": 2566 + }, + { + "epoch": 0.41, + "learning_rate": 4.964930980769175e-05, + "loss": 1.5423, + "step": 2567 + }, + { + "epoch": 0.41, + "learning_rate": 4.964887906968293e-05, + "loss": 1.6413, + "step": 2568 + }, + { + "epoch": 0.41, + "learning_rate": 4.964844806917852e-05, + "loss": 1.6304, + "step": 2569 + }, + { + "epoch": 0.41, + "learning_rate": 4.9648016806183096e-05, + "loss": 1.5604, + "step": 2570 + }, + { + "epoch": 0.41, + "learning_rate": 4.964758528070126e-05, + "loss": 1.5975, + "step": 2571 + }, + { + "epoch": 0.41, + "learning_rate": 4.96471534927376e-05, + "loss": 1.5532, + "step": 2572 + }, + { + "epoch": 0.41, + "learning_rate": 4.964672144229672e-05, + "loss": 1.5373, + "step": 2573 + }, + { + "epoch": 0.41, + "learning_rate": 4.964628912938323e-05, + "loss": 1.5441, + "step": 2574 + }, + { + "epoch": 0.41, + "learning_rate": 4.9645856554001714e-05, + "loss": 1.4878, + "step": 2575 + }, + { + "epoch": 0.41, + "learning_rate": 4.964542371615679e-05, + "loss": 1.5622, + "step": 2576 + }, + { + "epoch": 0.41, + "learning_rate": 4.9644990615853074e-05, + "loss": 1.6068, + "step": 2577 + }, + { + "epoch": 0.41, + "learning_rate": 4.964455725309516e-05, + "loss": 1.5557, + "step": 2578 + }, + { + "epoch": 0.41, + "learning_rate": 4.964412362788767e-05, + "loss": 1.6232, + "step": 2579 + }, + { + "epoch": 0.41, + "learning_rate": 4.9643689740235235e-05, + "loss": 1.5487, + "step": 2580 + }, + { + "epoch": 0.41, + "learning_rate": 4.964325559014247e-05, + "loss": 1.5562, + "step": 2581 + }, + { + "epoch": 0.41, + "learning_rate": 4.964282117761398e-05, + "loss": 1.5575, + "step": 2582 + }, + { + "epoch": 0.41, + "learning_rate": 4.9642386502654416e-05, + "loss": 1.5477, + "step": 2583 + }, + { + "epoch": 0.41, + "learning_rate": 4.964195156526839e-05, + "loss": 1.6159, + "step": 2584 + }, + { + "epoch": 0.41, + "learning_rate": 4.9641516365460543e-05, + "loss": 1.5627, + "step": 2585 + }, + { + "epoch": 0.41, + "learning_rate": 4.964108090323551e-05, + "loss": 1.5153, + "step": 2586 + }, + { + "epoch": 0.41, + "learning_rate": 4.9640645178597924e-05, + "loss": 1.5309, + "step": 2587 + }, + { + "epoch": 0.41, + "learning_rate": 4.9640209191552426e-05, + "loss": 1.5298, + "step": 2588 + }, + { + "epoch": 0.41, + "learning_rate": 4.9639772942103656e-05, + "loss": 1.495, + "step": 2589 + }, + { + "epoch": 0.41, + "learning_rate": 4.963933643025627e-05, + "loss": 1.5791, + "step": 2590 + }, + { + "epoch": 0.41, + "learning_rate": 4.96388996560149e-05, + "loss": 1.5697, + "step": 2591 + }, + { + "epoch": 0.41, + "learning_rate": 4.9638462619384215e-05, + "loss": 1.5524, + "step": 2592 + }, + { + "epoch": 0.41, + "learning_rate": 4.9638025320368863e-05, + "loss": 1.5575, + "step": 2593 + }, + { + "epoch": 0.41, + "learning_rate": 4.963758775897349e-05, + "loss": 1.5024, + "step": 2594 + }, + { + "epoch": 0.41, + "learning_rate": 4.9637149935202775e-05, + "loss": 1.5181, + "step": 2595 + }, + { + "epoch": 0.41, + "learning_rate": 4.963671184906137e-05, + "loss": 1.5389, + "step": 2596 + }, + { + "epoch": 0.41, + "learning_rate": 4.9636273500553934e-05, + "loss": 1.5332, + "step": 2597 + }, + { + "epoch": 0.41, + "learning_rate": 4.963583488968515e-05, + "loss": 1.5409, + "step": 2598 + }, + { + "epoch": 0.41, + "learning_rate": 4.963539601645968e-05, + "loss": 1.5508, + "step": 2599 + }, + { + "epoch": 0.41, + "learning_rate": 4.9634956880882194e-05, + "loss": 1.5349, + "step": 2600 + }, + { + "epoch": 0.41, + "learning_rate": 4.963451748295738e-05, + "loss": 1.5793, + "step": 2601 + }, + { + "epoch": 0.41, + "learning_rate": 4.963407782268991e-05, + "loss": 1.4815, + "step": 2602 + }, + { + "epoch": 0.41, + "learning_rate": 4.9633637900084464e-05, + "loss": 1.5187, + "step": 2603 + }, + { + "epoch": 0.41, + "learning_rate": 4.9633197715145726e-05, + "loss": 1.5687, + "step": 2604 + }, + { + "epoch": 0.42, + "learning_rate": 4.963275726787839e-05, + "loss": 1.5557, + "step": 2605 + }, + { + "epoch": 0.42, + "learning_rate": 4.963231655828715e-05, + "loss": 1.572, + "step": 2606 + }, + { + "epoch": 0.42, + "learning_rate": 4.9631875586376684e-05, + "loss": 1.522, + "step": 2607 + }, + { + "epoch": 0.42, + "learning_rate": 4.96314343521517e-05, + "loss": 1.6156, + "step": 2608 + }, + { + "epoch": 0.42, + "learning_rate": 4.96309928556169e-05, + "loss": 1.9898, + "step": 2609 + }, + { + "epoch": 0.42, + "learning_rate": 4.9630551096776975e-05, + "loss": 1.5677, + "step": 2610 + }, + { + "epoch": 0.42, + "learning_rate": 4.963010907563663e-05, + "loss": 1.5572, + "step": 2611 + }, + { + "epoch": 0.42, + "learning_rate": 4.962966679220059e-05, + "loss": 1.5062, + "step": 2612 + }, + { + "epoch": 0.42, + "learning_rate": 4.962922424647354e-05, + "loss": 1.5807, + "step": 2613 + }, + { + "epoch": 0.42, + "learning_rate": 4.962878143846021e-05, + "loss": 1.4979, + "step": 2614 + }, + { + "epoch": 0.42, + "learning_rate": 4.962833836816531e-05, + "loss": 1.5184, + "step": 2615 + }, + { + "epoch": 0.42, + "learning_rate": 4.962789503559355e-05, + "loss": 1.5698, + "step": 2616 + }, + { + "epoch": 0.42, + "learning_rate": 4.9627451440749675e-05, + "loss": 1.5461, + "step": 2617 + }, + { + "epoch": 0.42, + "learning_rate": 4.9627007583638386e-05, + "loss": 1.5436, + "step": 2618 + }, + { + "epoch": 0.42, + "learning_rate": 4.962656346426441e-05, + "loss": 1.5841, + "step": 2619 + }, + { + "epoch": 0.42, + "learning_rate": 4.96261190826325e-05, + "loss": 1.5434, + "step": 2620 + }, + { + "epoch": 0.42, + "learning_rate": 4.962567443874736e-05, + "loss": 1.5966, + "step": 2621 + }, + { + "epoch": 0.42, + "learning_rate": 4.962522953261374e-05, + "loss": 1.6105, + "step": 2622 + }, + { + "epoch": 0.42, + "learning_rate": 4.962478436423638e-05, + "loss": 1.5888, + "step": 2623 + }, + { + "epoch": 0.42, + "learning_rate": 4.962433893362002e-05, + "loss": 1.4936, + "step": 2624 + }, + { + "epoch": 0.42, + "learning_rate": 4.962389324076939e-05, + "loss": 1.5052, + "step": 2625 + }, + { + "epoch": 0.42, + "learning_rate": 4.962344728568925e-05, + "loss": 1.5164, + "step": 2626 + }, + { + "epoch": 0.42, + "learning_rate": 4.962300106838435e-05, + "loss": 1.5235, + "step": 2627 + }, + { + "epoch": 0.42, + "learning_rate": 4.9622554588859435e-05, + "loss": 1.5549, + "step": 2628 + }, + { + "epoch": 0.42, + "learning_rate": 4.9622107847119256e-05, + "loss": 1.5468, + "step": 2629 + }, + { + "epoch": 0.42, + "learning_rate": 4.962166084316859e-05, + "loss": 1.5491, + "step": 2630 + }, + { + "epoch": 0.42, + "learning_rate": 4.962121357701217e-05, + "loss": 1.5473, + "step": 2631 + }, + { + "epoch": 0.42, + "learning_rate": 4.962076604865479e-05, + "loss": 1.5462, + "step": 2632 + }, + { + "epoch": 0.42, + "learning_rate": 4.9620318258101184e-05, + "loss": 1.4351, + "step": 2633 + }, + { + "epoch": 0.42, + "learning_rate": 4.9619870205356145e-05, + "loss": 1.5033, + "step": 2634 + }, + { + "epoch": 0.42, + "learning_rate": 4.961942189042443e-05, + "loss": 1.5975, + "step": 2635 + }, + { + "epoch": 0.42, + "learning_rate": 4.9618973313310826e-05, + "loss": 1.5166, + "step": 2636 + }, + { + "epoch": 0.42, + "learning_rate": 4.9618524474020086e-05, + "loss": 1.6133, + "step": 2637 + }, + { + "epoch": 0.42, + "learning_rate": 4.961807537255703e-05, + "loss": 1.5156, + "step": 2638 + }, + { + "epoch": 0.42, + "learning_rate": 4.96176260089264e-05, + "loss": 1.5955, + "step": 2639 + }, + { + "epoch": 0.42, + "learning_rate": 4.9617176383133e-05, + "loss": 1.5573, + "step": 2640 + }, + { + "epoch": 0.42, + "learning_rate": 4.961672649518162e-05, + "loss": 1.5318, + "step": 2641 + }, + { + "epoch": 0.42, + "learning_rate": 4.961627634507705e-05, + "loss": 1.5419, + "step": 2642 + }, + { + "epoch": 0.42, + "learning_rate": 4.9615825932824085e-05, + "loss": 1.557, + "step": 2643 + }, + { + "epoch": 0.42, + "learning_rate": 4.961537525842751e-05, + "loss": 1.5804, + "step": 2644 + }, + { + "epoch": 0.42, + "learning_rate": 4.961492432189214e-05, + "loss": 1.4831, + "step": 2645 + }, + { + "epoch": 0.42, + "learning_rate": 4.961447312322277e-05, + "loss": 1.5137, + "step": 2646 + }, + { + "epoch": 0.42, + "learning_rate": 4.96140216624242e-05, + "loss": 1.535, + "step": 2647 + }, + { + "epoch": 0.42, + "learning_rate": 4.961356993950125e-05, + "loss": 1.5917, + "step": 2648 + }, + { + "epoch": 0.42, + "learning_rate": 4.961311795445871e-05, + "loss": 1.5328, + "step": 2649 + }, + { + "epoch": 0.42, + "learning_rate": 4.961266570730142e-05, + "loss": 1.5181, + "step": 2650 + }, + { + "epoch": 0.42, + "learning_rate": 4.961221319803418e-05, + "loss": 1.5667, + "step": 2651 + }, + { + "epoch": 0.42, + "learning_rate": 4.96117604266618e-05, + "loss": 1.5008, + "step": 2652 + }, + { + "epoch": 0.42, + "learning_rate": 4.961130739318913e-05, + "loss": 1.5905, + "step": 2653 + }, + { + "epoch": 0.42, + "learning_rate": 4.961085409762097e-05, + "loss": 1.5479, + "step": 2654 + }, + { + "epoch": 0.42, + "learning_rate": 4.961040053996216e-05, + "loss": 1.5456, + "step": 2655 + }, + { + "epoch": 0.42, + "learning_rate": 4.960994672021752e-05, + "loss": 1.5646, + "step": 2656 + }, + { + "epoch": 0.42, + "learning_rate": 4.96094926383919e-05, + "loss": 1.6162, + "step": 2657 + }, + { + "epoch": 0.42, + "learning_rate": 4.960903829449011e-05, + "loss": 1.5791, + "step": 2658 + }, + { + "epoch": 0.42, + "learning_rate": 4.9608583688517004e-05, + "loss": 1.5594, + "step": 2659 + }, + { + "epoch": 0.42, + "learning_rate": 4.960812882047743e-05, + "loss": 1.6097, + "step": 2660 + }, + { + "epoch": 0.42, + "learning_rate": 4.960767369037622e-05, + "loss": 1.578, + "step": 2661 + }, + { + "epoch": 0.42, + "learning_rate": 4.960721829821823e-05, + "loss": 1.5521, + "step": 2662 + }, + { + "epoch": 0.42, + "learning_rate": 4.9606762644008295e-05, + "loss": 1.5031, + "step": 2663 + }, + { + "epoch": 0.42, + "learning_rate": 4.9606306727751284e-05, + "loss": 1.5926, + "step": 2664 + }, + { + "epoch": 0.42, + "learning_rate": 4.960585054945204e-05, + "loss": 1.4943, + "step": 2665 + }, + { + "epoch": 0.42, + "learning_rate": 4.960539410911543e-05, + "loss": 1.562, + "step": 2666 + }, + { + "epoch": 0.42, + "learning_rate": 4.960493740674631e-05, + "loss": 1.5786, + "step": 2667 + }, + { + "epoch": 0.43, + "learning_rate": 4.960448044234954e-05, + "loss": 1.4926, + "step": 2668 + }, + { + "epoch": 0.43, + "learning_rate": 4.9604023215929994e-05, + "loss": 1.543, + "step": 2669 + }, + { + "epoch": 0.43, + "learning_rate": 4.960356572749254e-05, + "loss": 1.5389, + "step": 2670 + }, + { + "epoch": 0.43, + "learning_rate": 4.9603107977042054e-05, + "loss": 1.5764, + "step": 2671 + }, + { + "epoch": 0.43, + "learning_rate": 4.96026499645834e-05, + "loss": 1.5505, + "step": 2672 + }, + { + "epoch": 0.43, + "learning_rate": 4.9602191690121455e-05, + "loss": 1.5723, + "step": 2673 + }, + { + "epoch": 0.43, + "learning_rate": 4.9601733153661114e-05, + "loss": 1.5754, + "step": 2674 + }, + { + "epoch": 0.43, + "learning_rate": 4.960127435520724e-05, + "loss": 1.583, + "step": 2675 + }, + { + "epoch": 0.43, + "learning_rate": 4.960081529476474e-05, + "loss": 1.5466, + "step": 2676 + }, + { + "epoch": 0.43, + "learning_rate": 4.9600355972338486e-05, + "loss": 1.5879, + "step": 2677 + }, + { + "epoch": 0.43, + "learning_rate": 4.959989638793338e-05, + "loss": 1.4751, + "step": 2678 + }, + { + "epoch": 0.43, + "learning_rate": 4.959943654155431e-05, + "loss": 1.5272, + "step": 2679 + }, + { + "epoch": 0.43, + "learning_rate": 4.9598976433206176e-05, + "loss": 1.5319, + "step": 2680 + }, + { + "epoch": 0.43, + "learning_rate": 4.9598516062893884e-05, + "loss": 1.5359, + "step": 2681 + }, + { + "epoch": 0.43, + "learning_rate": 4.959805543062233e-05, + "loss": 1.5993, + "step": 2682 + }, + { + "epoch": 0.43, + "learning_rate": 4.9597594536396405e-05, + "loss": 1.5625, + "step": 2683 + }, + { + "epoch": 0.43, + "learning_rate": 4.959713338022104e-05, + "loss": 1.528, + "step": 2684 + }, + { + "epoch": 0.43, + "learning_rate": 4.959667196210114e-05, + "loss": 1.5205, + "step": 2685 + }, + { + "epoch": 0.43, + "learning_rate": 4.9596210282041614e-05, + "loss": 1.5824, + "step": 2686 + }, + { + "epoch": 0.43, + "learning_rate": 4.959574834004739e-05, + "loss": 1.4889, + "step": 2687 + }, + { + "epoch": 0.43, + "learning_rate": 4.959528613612336e-05, + "loss": 1.5181, + "step": 2688 + }, + { + "epoch": 0.43, + "learning_rate": 4.959482367027448e-05, + "loss": 1.4784, + "step": 2689 + }, + { + "epoch": 0.43, + "learning_rate": 4.959436094250566e-05, + "loss": 1.5873, + "step": 2690 + }, + { + "epoch": 0.43, + "learning_rate": 4.9593897952821824e-05, + "loss": 1.5498, + "step": 2691 + }, + { + "epoch": 0.43, + "learning_rate": 4.9593434701227906e-05, + "loss": 1.5412, + "step": 2692 + }, + { + "epoch": 0.43, + "learning_rate": 4.959297118772883e-05, + "loss": 1.5277, + "step": 2693 + }, + { + "epoch": 0.43, + "learning_rate": 4.9592507412329556e-05, + "loss": 1.5251, + "step": 2694 + }, + { + "epoch": 0.43, + "learning_rate": 4.9592043375035005e-05, + "loss": 1.5796, + "step": 2695 + }, + { + "epoch": 0.43, + "learning_rate": 4.959157907585012e-05, + "loss": 1.572, + "step": 2696 + }, + { + "epoch": 0.43, + "learning_rate": 4.959111451477985e-05, + "loss": 1.572, + "step": 2697 + }, + { + "epoch": 0.43, + "learning_rate": 4.959064969182913e-05, + "loss": 1.5558, + "step": 2698 + }, + { + "epoch": 0.43, + "learning_rate": 4.9590184607002934e-05, + "loss": 1.5137, + "step": 2699 + }, + { + "epoch": 0.43, + "learning_rate": 4.958971926030619e-05, + "loss": 1.4932, + "step": 2700 + }, + { + "epoch": 0.43, + "learning_rate": 4.958925365174387e-05, + "loss": 1.508, + "step": 2701 + }, + { + "epoch": 0.43, + "learning_rate": 4.9588787781320924e-05, + "loss": 1.5536, + "step": 2702 + }, + { + "epoch": 0.43, + "learning_rate": 4.958832164904232e-05, + "loss": 1.5977, + "step": 2703 + }, + { + "epoch": 0.43, + "learning_rate": 4.958785525491302e-05, + "loss": 1.5845, + "step": 2704 + }, + { + "epoch": 0.43, + "learning_rate": 4.9587388598937986e-05, + "loss": 1.5349, + "step": 2705 + }, + { + "epoch": 0.43, + "learning_rate": 4.958692168112219e-05, + "loss": 1.5321, + "step": 2706 + }, + { + "epoch": 0.43, + "learning_rate": 4.958645450147061e-05, + "loss": 1.6263, + "step": 2707 + }, + { + "epoch": 0.43, + "learning_rate": 4.958598705998821e-05, + "loss": 1.5038, + "step": 2708 + }, + { + "epoch": 0.43, + "learning_rate": 4.958551935667998e-05, + "loss": 1.5199, + "step": 2709 + }, + { + "epoch": 0.43, + "learning_rate": 4.9585051391550893e-05, + "loss": 1.6587, + "step": 2710 + }, + { + "epoch": 0.43, + "learning_rate": 4.958458316460593e-05, + "loss": 1.5739, + "step": 2711 + }, + { + "epoch": 0.43, + "learning_rate": 4.9584114675850086e-05, + "loss": 1.5425, + "step": 2712 + }, + { + "epoch": 0.43, + "learning_rate": 4.958364592528835e-05, + "loss": 1.5129, + "step": 2713 + }, + { + "epoch": 0.43, + "learning_rate": 4.95831769129257e-05, + "loss": 1.5298, + "step": 2714 + }, + { + "epoch": 0.43, + "learning_rate": 4.9582707638767144e-05, + "loss": 1.5609, + "step": 2715 + }, + { + "epoch": 0.43, + "learning_rate": 4.958223810281768e-05, + "loss": 1.5676, + "step": 2716 + }, + { + "epoch": 0.43, + "learning_rate": 4.958176830508231e-05, + "loss": 1.5633, + "step": 2717 + }, + { + "epoch": 0.43, + "learning_rate": 4.958129824556602e-05, + "loss": 1.5589, + "step": 2718 + }, + { + "epoch": 0.43, + "learning_rate": 4.958082792427383e-05, + "loss": 1.5212, + "step": 2719 + }, + { + "epoch": 0.43, + "learning_rate": 4.9580357341210756e-05, + "loss": 1.5166, + "step": 2720 + }, + { + "epoch": 0.43, + "learning_rate": 4.957988649638179e-05, + "loss": 1.5192, + "step": 2721 + }, + { + "epoch": 0.43, + "learning_rate": 4.9579415389791954e-05, + "loss": 1.5581, + "step": 2722 + }, + { + "epoch": 0.43, + "learning_rate": 4.957894402144627e-05, + "loss": 1.5956, + "step": 2723 + }, + { + "epoch": 0.43, + "learning_rate": 4.957847239134976e-05, + "loss": 1.5537, + "step": 2724 + }, + { + "epoch": 0.43, + "learning_rate": 4.957800049950743e-05, + "loss": 1.4813, + "step": 2725 + }, + { + "epoch": 0.43, + "learning_rate": 4.9577528345924326e-05, + "loss": 1.5077, + "step": 2726 + }, + { + "epoch": 0.43, + "learning_rate": 4.957705593060546e-05, + "loss": 1.5355, + "step": 2727 + }, + { + "epoch": 0.43, + "learning_rate": 4.9576583253555866e-05, + "loss": 1.5326, + "step": 2728 + }, + { + "epoch": 0.43, + "learning_rate": 4.9576110314780586e-05, + "loss": 1.5275, + "step": 2729 + }, + { + "epoch": 0.43, + "learning_rate": 4.9575637114284655e-05, + "loss": 1.4572, + "step": 2730 + }, + { + "epoch": 0.44, + "learning_rate": 4.9575163652073107e-05, + "loss": 1.5225, + "step": 2731 + }, + { + "epoch": 0.44, + "learning_rate": 4.957468992815098e-05, + "loss": 1.5187, + "step": 2732 + }, + { + "epoch": 0.44, + "learning_rate": 4.957421594252334e-05, + "loss": 1.5396, + "step": 2733 + }, + { + "epoch": 0.44, + "learning_rate": 4.9573741695195205e-05, + "loss": 1.5388, + "step": 2734 + }, + { + "epoch": 0.44, + "learning_rate": 4.9573267186171647e-05, + "loss": 1.6019, + "step": 2735 + }, + { + "epoch": 0.44, + "learning_rate": 4.9572792415457706e-05, + "loss": 1.5495, + "step": 2736 + }, + { + "epoch": 0.44, + "learning_rate": 4.957231738305845e-05, + "loss": 1.5354, + "step": 2737 + }, + { + "epoch": 0.44, + "learning_rate": 4.9571842088978934e-05, + "loss": 1.5702, + "step": 2738 + }, + { + "epoch": 0.44, + "learning_rate": 4.957136653322421e-05, + "loss": 1.5243, + "step": 2739 + }, + { + "epoch": 0.44, + "learning_rate": 4.957089071579936e-05, + "loss": 1.5311, + "step": 2740 + }, + { + "epoch": 0.44, + "learning_rate": 4.9570414636709436e-05, + "loss": 1.5881, + "step": 2741 + }, + { + "epoch": 0.44, + "learning_rate": 4.9569938295959504e-05, + "loss": 1.5562, + "step": 2742 + }, + { + "epoch": 0.44, + "learning_rate": 4.956946169355466e-05, + "loss": 1.5653, + "step": 2743 + }, + { + "epoch": 0.44, + "learning_rate": 4.956898482949996e-05, + "loss": 1.6301, + "step": 2744 + }, + { + "epoch": 0.44, + "learning_rate": 4.9568507703800486e-05, + "loss": 1.5785, + "step": 2745 + }, + { + "epoch": 0.44, + "learning_rate": 4.956803031646132e-05, + "loss": 1.5625, + "step": 2746 + }, + { + "epoch": 0.44, + "learning_rate": 4.9567552667487555e-05, + "loss": 1.583, + "step": 2747 + }, + { + "epoch": 0.44, + "learning_rate": 4.9567074756884265e-05, + "loss": 1.5199, + "step": 2748 + }, + { + "epoch": 0.44, + "learning_rate": 4.9566596584656546e-05, + "loss": 1.5666, + "step": 2749 + }, + { + "epoch": 0.44, + "learning_rate": 4.956611815080948e-05, + "loss": 1.62, + "step": 2750 + }, + { + "epoch": 0.44, + "learning_rate": 4.9565639455348174e-05, + "loss": 1.5584, + "step": 2751 + }, + { + "epoch": 0.44, + "learning_rate": 4.956516049827772e-05, + "loss": 1.5843, + "step": 2752 + }, + { + "epoch": 0.44, + "learning_rate": 4.956468127960322e-05, + "loss": 1.5184, + "step": 2753 + }, + { + "epoch": 0.44, + "learning_rate": 4.956420179932979e-05, + "loss": 1.5217, + "step": 2754 + }, + { + "epoch": 0.44, + "learning_rate": 4.956372205746251e-05, + "loss": 1.5731, + "step": 2755 + }, + { + "epoch": 0.44, + "learning_rate": 4.956324205400651e-05, + "loss": 1.5542, + "step": 2756 + }, + { + "epoch": 0.44, + "learning_rate": 4.956276178896689e-05, + "loss": 1.5565, + "step": 2757 + }, + { + "epoch": 0.44, + "learning_rate": 4.956228126234878e-05, + "loss": 1.5996, + "step": 2758 + }, + { + "epoch": 0.44, + "learning_rate": 4.956180047415728e-05, + "loss": 1.551, + "step": 2759 + }, + { + "epoch": 0.44, + "learning_rate": 4.956131942439751e-05, + "loss": 1.5794, + "step": 2760 + }, + { + "epoch": 0.44, + "learning_rate": 4.956083811307461e-05, + "loss": 1.5513, + "step": 2761 + }, + { + "epoch": 0.44, + "learning_rate": 4.9560356540193684e-05, + "loss": 1.5562, + "step": 2762 + }, + { + "epoch": 0.44, + "learning_rate": 4.955987470575988e-05, + "loss": 1.5223, + "step": 2763 + }, + { + "epoch": 0.44, + "learning_rate": 4.9559392609778324e-05, + "loss": 1.5166, + "step": 2764 + }, + { + "epoch": 0.44, + "learning_rate": 4.955891025225414e-05, + "loss": 1.6125, + "step": 2765 + }, + { + "epoch": 0.44, + "learning_rate": 4.9558427633192483e-05, + "loss": 1.5174, + "step": 2766 + }, + { + "epoch": 0.44, + "learning_rate": 4.955794475259847e-05, + "loss": 1.5378, + "step": 2767 + }, + { + "epoch": 0.44, + "learning_rate": 4.955746161047726e-05, + "loss": 1.575, + "step": 2768 + }, + { + "epoch": 0.44, + "learning_rate": 4.9556978206833994e-05, + "loss": 1.5589, + "step": 2769 + }, + { + "epoch": 0.44, + "learning_rate": 4.955649454167381e-05, + "loss": 1.6569, + "step": 2770 + }, + { + "epoch": 0.44, + "learning_rate": 4.955601061500188e-05, + "loss": 1.5379, + "step": 2771 + }, + { + "epoch": 0.44, + "learning_rate": 4.955552642682334e-05, + "loss": 1.5488, + "step": 2772 + }, + { + "epoch": 0.44, + "learning_rate": 4.955504197714335e-05, + "loss": 1.4933, + "step": 2773 + }, + { + "epoch": 0.44, + "learning_rate": 4.955455726596707e-05, + "loss": 1.55, + "step": 2774 + }, + { + "epoch": 0.44, + "learning_rate": 4.955407229329966e-05, + "loss": 1.5602, + "step": 2775 + }, + { + "epoch": 0.44, + "learning_rate": 4.9553587059146295e-05, + "loss": 1.5749, + "step": 2776 + }, + { + "epoch": 0.44, + "learning_rate": 4.955310156351213e-05, + "loss": 1.4847, + "step": 2777 + }, + { + "epoch": 0.44, + "learning_rate": 4.955261580640234e-05, + "loss": 1.5596, + "step": 2778 + }, + { + "epoch": 0.44, + "learning_rate": 4.95521297878221e-05, + "loss": 1.5583, + "step": 2779 + }, + { + "epoch": 0.44, + "learning_rate": 4.9551643507776586e-05, + "loss": 1.4634, + "step": 2780 + }, + { + "epoch": 0.44, + "learning_rate": 4.9551156966270975e-05, + "loss": 1.5184, + "step": 2781 + }, + { + "epoch": 0.44, + "learning_rate": 4.955067016331044e-05, + "loss": 1.5765, + "step": 2782 + }, + { + "epoch": 0.44, + "learning_rate": 4.955018309890017e-05, + "loss": 1.5672, + "step": 2783 + }, + { + "epoch": 0.44, + "learning_rate": 4.954969577304537e-05, + "loss": 1.5747, + "step": 2784 + }, + { + "epoch": 0.44, + "learning_rate": 4.95492081857512e-05, + "loss": 1.54, + "step": 2785 + }, + { + "epoch": 0.44, + "learning_rate": 4.954872033702287e-05, + "loss": 1.5523, + "step": 2786 + }, + { + "epoch": 0.44, + "learning_rate": 4.954823222686558e-05, + "loss": 1.516, + "step": 2787 + }, + { + "epoch": 0.44, + "learning_rate": 4.954774385528451e-05, + "loss": 1.5871, + "step": 2788 + }, + { + "epoch": 0.44, + "learning_rate": 4.954725522228488e-05, + "loss": 1.5523, + "step": 2789 + }, + { + "epoch": 0.44, + "learning_rate": 4.954676632787188e-05, + "loss": 1.5654, + "step": 2790 + }, + { + "epoch": 0.44, + "learning_rate": 4.954627717205073e-05, + "loss": 1.4933, + "step": 2791 + }, + { + "epoch": 0.44, + "learning_rate": 4.954578775482662e-05, + "loss": 1.5301, + "step": 2792 + }, + { + "epoch": 0.44, + "learning_rate": 4.954529807620477e-05, + "loss": 1.5443, + "step": 2793 + }, + { + "epoch": 0.45, + "learning_rate": 4.954480813619041e-05, + "loss": 1.4714, + "step": 2794 + }, + { + "epoch": 0.45, + "learning_rate": 4.954431793478874e-05, + "loss": 1.5539, + "step": 2795 + }, + { + "epoch": 0.45, + "learning_rate": 4.9543827472004975e-05, + "loss": 1.5881, + "step": 2796 + }, + { + "epoch": 0.45, + "learning_rate": 4.954333674784436e-05, + "loss": 1.533, + "step": 2797 + }, + { + "epoch": 0.45, + "learning_rate": 4.954284576231211e-05, + "loss": 1.5576, + "step": 2798 + }, + { + "epoch": 0.45, + "learning_rate": 4.9542354515413446e-05, + "loss": 1.5285, + "step": 2799 + }, + { + "epoch": 0.45, + "learning_rate": 4.954186300715361e-05, + "loss": 1.4932, + "step": 2800 + }, + { + "epoch": 0.45, + "learning_rate": 4.9541371237537834e-05, + "loss": 1.5428, + "step": 2801 + }, + { + "epoch": 0.45, + "learning_rate": 4.954087920657135e-05, + "loss": 1.5604, + "step": 2802 + }, + { + "epoch": 0.45, + "learning_rate": 4.9540386914259404e-05, + "loss": 1.5778, + "step": 2803 + }, + { + "epoch": 0.45, + "learning_rate": 4.953989436060723e-05, + "loss": 1.5999, + "step": 2804 + }, + { + "epoch": 0.45, + "learning_rate": 4.953940154562009e-05, + "loss": 1.5428, + "step": 2805 + }, + { + "epoch": 0.45, + "learning_rate": 4.9538908469303215e-05, + "loss": 1.5448, + "step": 2806 + }, + { + "epoch": 0.45, + "learning_rate": 4.953841513166187e-05, + "loss": 1.5178, + "step": 2807 + }, + { + "epoch": 0.45, + "learning_rate": 4.9537921532701294e-05, + "loss": 1.5065, + "step": 2808 + }, + { + "epoch": 0.45, + "learning_rate": 4.953742767242675e-05, + "loss": 1.5176, + "step": 2809 + }, + { + "epoch": 0.45, + "learning_rate": 4.95369335508435e-05, + "loss": 1.5435, + "step": 2810 + }, + { + "epoch": 0.45, + "learning_rate": 4.9536439167956803e-05, + "loss": 1.5687, + "step": 2811 + }, + { + "epoch": 0.45, + "learning_rate": 4.953594452377194e-05, + "loss": 1.5902, + "step": 2812 + }, + { + "epoch": 0.45, + "learning_rate": 4.953544961829415e-05, + "loss": 1.4671, + "step": 2813 + }, + { + "epoch": 0.45, + "learning_rate": 4.9534954451528714e-05, + "loss": 1.5462, + "step": 2814 + }, + { + "epoch": 0.45, + "learning_rate": 4.953445902348092e-05, + "loss": 1.6021, + "step": 2815 + }, + { + "epoch": 0.45, + "learning_rate": 4.953396333415602e-05, + "loss": 1.5718, + "step": 2816 + }, + { + "epoch": 0.45, + "learning_rate": 4.953346738355932e-05, + "loss": 1.5837, + "step": 2817 + }, + { + "epoch": 0.45, + "learning_rate": 4.9532971171696074e-05, + "loss": 1.5737, + "step": 2818 + }, + { + "epoch": 0.45, + "learning_rate": 4.953247469857159e-05, + "loss": 1.5275, + "step": 2819 + }, + { + "epoch": 0.45, + "learning_rate": 4.953197796419113e-05, + "loss": 1.4904, + "step": 2820 + }, + { + "epoch": 0.45, + "learning_rate": 4.9531480968560014e-05, + "loss": 1.5622, + "step": 2821 + }, + { + "epoch": 0.45, + "learning_rate": 4.953098371168351e-05, + "loss": 1.6126, + "step": 2822 + }, + { + "epoch": 0.45, + "learning_rate": 4.953048619356693e-05, + "loss": 1.5005, + "step": 2823 + }, + { + "epoch": 0.45, + "learning_rate": 4.952998841421555e-05, + "loss": 1.5127, + "step": 2824 + }, + { + "epoch": 0.45, + "learning_rate": 4.95294903736347e-05, + "loss": 1.5433, + "step": 2825 + }, + { + "epoch": 0.45, + "learning_rate": 4.9528992071829666e-05, + "loss": 1.5462, + "step": 2826 + }, + { + "epoch": 0.45, + "learning_rate": 4.9528493508805765e-05, + "loss": 1.6175, + "step": 2827 + }, + { + "epoch": 0.45, + "learning_rate": 4.952799468456829e-05, + "loss": 1.5726, + "step": 2828 + }, + { + "epoch": 0.45, + "learning_rate": 4.952749559912256e-05, + "loss": 1.5498, + "step": 2829 + }, + { + "epoch": 0.45, + "learning_rate": 4.95269962524739e-05, + "loss": 1.6356, + "step": 2830 + }, + { + "epoch": 0.45, + "learning_rate": 4.952649664462762e-05, + "loss": 1.5545, + "step": 2831 + }, + { + "epoch": 0.45, + "learning_rate": 4.952599677558904e-05, + "loss": 1.5375, + "step": 2832 + }, + { + "epoch": 0.45, + "learning_rate": 4.9525496645363484e-05, + "loss": 1.5542, + "step": 2833 + }, + { + "epoch": 0.45, + "learning_rate": 4.952499625395628e-05, + "loss": 1.6095, + "step": 2834 + }, + { + "epoch": 0.45, + "learning_rate": 4.9524495601372755e-05, + "loss": 1.5803, + "step": 2835 + }, + { + "epoch": 0.45, + "learning_rate": 4.952399468761824e-05, + "loss": 1.5389, + "step": 2836 + }, + { + "epoch": 0.45, + "learning_rate": 4.9523493512698074e-05, + "loss": 1.5794, + "step": 2837 + }, + { + "epoch": 0.45, + "learning_rate": 4.9522992076617584e-05, + "loss": 1.536, + "step": 2838 + }, + { + "epoch": 0.45, + "learning_rate": 4.9522490379382125e-05, + "loss": 1.5632, + "step": 2839 + }, + { + "epoch": 0.45, + "learning_rate": 4.952198842099702e-05, + "loss": 1.5267, + "step": 2840 + }, + { + "epoch": 0.45, + "learning_rate": 4.952148620146764e-05, + "loss": 1.5415, + "step": 2841 + }, + { + "epoch": 0.45, + "learning_rate": 4.9520983720799304e-05, + "loss": 1.5799, + "step": 2842 + }, + { + "epoch": 0.45, + "learning_rate": 4.9520480978997394e-05, + "loss": 1.4915, + "step": 2843 + }, + { + "epoch": 0.45, + "learning_rate": 4.951997797606724e-05, + "loss": 1.5529, + "step": 2844 + }, + { + "epoch": 0.45, + "learning_rate": 4.95194747120142e-05, + "loss": 1.5729, + "step": 2845 + }, + { + "epoch": 0.45, + "learning_rate": 4.9518971186843656e-05, + "loss": 1.5627, + "step": 2846 + }, + { + "epoch": 0.45, + "learning_rate": 4.951846740056094e-05, + "loss": 1.5977, + "step": 2847 + }, + { + "epoch": 0.45, + "learning_rate": 4.9517963353171446e-05, + "loss": 1.5736, + "step": 2848 + }, + { + "epoch": 0.45, + "learning_rate": 4.9517459044680516e-05, + "loss": 1.5715, + "step": 2849 + }, + { + "epoch": 0.45, + "learning_rate": 4.951695447509354e-05, + "loss": 1.5036, + "step": 2850 + }, + { + "epoch": 0.45, + "learning_rate": 4.9516449644415876e-05, + "loss": 1.5308, + "step": 2851 + }, + { + "epoch": 0.45, + "learning_rate": 4.951594455265291e-05, + "loss": 1.6149, + "step": 2852 + }, + { + "epoch": 0.45, + "learning_rate": 4.951543919981002e-05, + "loss": 1.5171, + "step": 2853 + }, + { + "epoch": 0.45, + "learning_rate": 4.951493358589259e-05, + "loss": 1.5119, + "step": 2854 + }, + { + "epoch": 0.45, + "learning_rate": 4.9514427710906e-05, + "loss": 1.5072, + "step": 2855 + }, + { + "epoch": 0.45, + "learning_rate": 4.951392157485563e-05, + "loss": 1.4671, + "step": 2856 + }, + { + "epoch": 0.46, + "learning_rate": 4.951341517774688e-05, + "loss": 1.5109, + "step": 2857 + }, + { + "epoch": 0.46, + "learning_rate": 4.951290851958514e-05, + "loss": 1.5534, + "step": 2858 + }, + { + "epoch": 0.46, + "learning_rate": 4.9512401600375814e-05, + "loss": 1.5487, + "step": 2859 + }, + { + "epoch": 0.46, + "learning_rate": 4.9511894420124296e-05, + "loss": 1.5306, + "step": 2860 + }, + { + "epoch": 0.46, + "learning_rate": 4.951138697883597e-05, + "loss": 1.5116, + "step": 2861 + }, + { + "epoch": 0.46, + "learning_rate": 4.951087927651627e-05, + "loss": 1.5776, + "step": 2862 + }, + { + "epoch": 0.46, + "learning_rate": 4.9510371313170564e-05, + "loss": 1.5402, + "step": 2863 + }, + { + "epoch": 0.46, + "learning_rate": 4.950986308880431e-05, + "loss": 1.5391, + "step": 2864 + }, + { + "epoch": 0.46, + "learning_rate": 4.9509354603422875e-05, + "loss": 1.6019, + "step": 2865 + }, + { + "epoch": 0.46, + "learning_rate": 4.9508845857031696e-05, + "loss": 1.5996, + "step": 2866 + }, + { + "epoch": 0.46, + "learning_rate": 4.950833684963619e-05, + "loss": 1.5612, + "step": 2867 + }, + { + "epoch": 0.46, + "learning_rate": 4.950782758124178e-05, + "loss": 1.5448, + "step": 2868 + }, + { + "epoch": 0.46, + "learning_rate": 4.950731805185389e-05, + "loss": 1.5195, + "step": 2869 + }, + { + "epoch": 0.46, + "learning_rate": 4.950680826147793e-05, + "loss": 1.53, + "step": 2870 + }, + { + "epoch": 0.46, + "learning_rate": 4.950629821011935e-05, + "loss": 1.4981, + "step": 2871 + }, + { + "epoch": 0.46, + "learning_rate": 4.950578789778357e-05, + "loss": 1.5659, + "step": 2872 + }, + { + "epoch": 0.46, + "learning_rate": 4.950527732447602e-05, + "loss": 1.5939, + "step": 2873 + }, + { + "epoch": 0.46, + "learning_rate": 4.9504766490202145e-05, + "loss": 1.5482, + "step": 2874 + }, + { + "epoch": 0.46, + "learning_rate": 4.9504255394967394e-05, + "loss": 1.6113, + "step": 2875 + }, + { + "epoch": 0.46, + "learning_rate": 4.950374403877719e-05, + "loss": 1.5358, + "step": 2876 + }, + { + "epoch": 0.46, + "learning_rate": 4.9503232421636994e-05, + "loss": 1.5635, + "step": 2877 + }, + { + "epoch": 0.46, + "learning_rate": 4.950272054355225e-05, + "loss": 1.5137, + "step": 2878 + }, + { + "epoch": 0.46, + "learning_rate": 4.9502208404528404e-05, + "loss": 1.5225, + "step": 2879 + }, + { + "epoch": 0.46, + "learning_rate": 4.950169600457092e-05, + "loss": 1.5746, + "step": 2880 + }, + { + "epoch": 0.46, + "learning_rate": 4.9501183343685244e-05, + "loss": 1.5046, + "step": 2881 + }, + { + "epoch": 0.46, + "learning_rate": 4.950067042187684e-05, + "loss": 1.5293, + "step": 2882 + }, + { + "epoch": 0.46, + "learning_rate": 4.950015723915117e-05, + "loss": 1.5731, + "step": 2883 + }, + { + "epoch": 0.46, + "learning_rate": 4.9499643795513693e-05, + "loss": 1.5037, + "step": 2884 + }, + { + "epoch": 0.46, + "learning_rate": 4.94991300909699e-05, + "loss": 1.5254, + "step": 2885 + }, + { + "epoch": 0.46, + "learning_rate": 4.949861612552523e-05, + "loss": 1.6104, + "step": 2886 + }, + { + "epoch": 0.46, + "learning_rate": 4.949810189918518e-05, + "loss": 1.5801, + "step": 2887 + }, + { + "epoch": 0.46, + "learning_rate": 4.949758741195522e-05, + "loss": 1.5754, + "step": 2888 + }, + { + "epoch": 0.46, + "learning_rate": 4.949707266384083e-05, + "loss": 1.6393, + "step": 2889 + }, + { + "epoch": 0.46, + "learning_rate": 4.949655765484747e-05, + "loss": 1.5706, + "step": 2890 + }, + { + "epoch": 0.46, + "learning_rate": 4.949604238498066e-05, + "loss": 1.513, + "step": 2891 + }, + { + "epoch": 0.46, + "learning_rate": 4.9495526854245865e-05, + "loss": 1.605, + "step": 2892 + }, + { + "epoch": 0.46, + "learning_rate": 4.9495011062648586e-05, + "loss": 1.557, + "step": 2893 + }, + { + "epoch": 0.46, + "learning_rate": 4.949449501019431e-05, + "loss": 1.5493, + "step": 2894 + }, + { + "epoch": 0.46, + "learning_rate": 4.949397869688853e-05, + "loss": 1.4779, + "step": 2895 + }, + { + "epoch": 0.46, + "learning_rate": 4.949346212273675e-05, + "loss": 1.64, + "step": 2896 + }, + { + "epoch": 0.46, + "learning_rate": 4.9492945287744456e-05, + "loss": 1.5334, + "step": 2897 + }, + { + "epoch": 0.46, + "learning_rate": 4.949242819191718e-05, + "loss": 1.5825, + "step": 2898 + }, + { + "epoch": 0.46, + "learning_rate": 4.94919108352604e-05, + "loss": 1.5464, + "step": 2899 + }, + { + "epoch": 0.46, + "learning_rate": 4.949139321777965e-05, + "loss": 1.4857, + "step": 2900 + }, + { + "epoch": 0.46, + "learning_rate": 4.949087533948043e-05, + "loss": 1.5457, + "step": 2901 + }, + { + "epoch": 0.46, + "learning_rate": 4.949035720036825e-05, + "loss": 1.5114, + "step": 2902 + }, + { + "epoch": 0.46, + "learning_rate": 4.948983880044864e-05, + "loss": 1.6188, + "step": 2903 + }, + { + "epoch": 0.46, + "learning_rate": 4.948932013972712e-05, + "loss": 1.5156, + "step": 2904 + }, + { + "epoch": 0.46, + "learning_rate": 4.94888012182092e-05, + "loss": 1.5622, + "step": 2905 + }, + { + "epoch": 0.46, + "learning_rate": 4.948828203590041e-05, + "loss": 1.5233, + "step": 2906 + }, + { + "epoch": 0.46, + "learning_rate": 4.9487762592806295e-05, + "loss": 1.4922, + "step": 2907 + }, + { + "epoch": 0.46, + "learning_rate": 4.948724288893237e-05, + "loss": 1.5371, + "step": 2908 + }, + { + "epoch": 0.46, + "learning_rate": 4.948672292428418e-05, + "loss": 1.5282, + "step": 2909 + }, + { + "epoch": 0.46, + "learning_rate": 4.9486202698867247e-05, + "loss": 1.5306, + "step": 2910 + }, + { + "epoch": 0.46, + "learning_rate": 4.948568221268713e-05, + "loss": 1.5005, + "step": 2911 + }, + { + "epoch": 0.46, + "learning_rate": 4.948516146574936e-05, + "loss": 1.5785, + "step": 2912 + }, + { + "epoch": 0.46, + "learning_rate": 4.948464045805949e-05, + "loss": 1.5819, + "step": 2913 + }, + { + "epoch": 0.46, + "learning_rate": 4.948411918962306e-05, + "loss": 1.5387, + "step": 2914 + }, + { + "epoch": 0.46, + "learning_rate": 4.9483597660445616e-05, + "loss": 1.5483, + "step": 2915 + }, + { + "epoch": 0.46, + "learning_rate": 4.948307587053274e-05, + "loss": 1.5581, + "step": 2916 + }, + { + "epoch": 0.46, + "learning_rate": 4.948255381988996e-05, + "loss": 1.5889, + "step": 2917 + }, + { + "epoch": 0.46, + "learning_rate": 4.948203150852284e-05, + "loss": 1.5181, + "step": 2918 + }, + { + "epoch": 0.47, + "learning_rate": 4.948150893643695e-05, + "loss": 1.5236, + "step": 2919 + }, + { + "epoch": 0.47, + "learning_rate": 4.948098610363786e-05, + "loss": 1.5114, + "step": 2920 + }, + { + "epoch": 0.47, + "learning_rate": 4.948046301013113e-05, + "loss": 1.5477, + "step": 2921 + }, + { + "epoch": 0.47, + "learning_rate": 4.9479939655922325e-05, + "loss": 1.5894, + "step": 2922 + }, + { + "epoch": 0.47, + "learning_rate": 4.9479416041017025e-05, + "loss": 1.5789, + "step": 2923 + }, + { + "epoch": 0.47, + "learning_rate": 4.947889216542081e-05, + "loss": 1.5571, + "step": 2924 + }, + { + "epoch": 0.47, + "learning_rate": 4.947836802913926e-05, + "loss": 1.4854, + "step": 2925 + }, + { + "epoch": 0.47, + "learning_rate": 4.947784363217794e-05, + "loss": 1.5584, + "step": 2926 + }, + { + "epoch": 0.47, + "learning_rate": 4.947731897454245e-05, + "loss": 1.5409, + "step": 2927 + }, + { + "epoch": 0.47, + "learning_rate": 4.947679405623838e-05, + "loss": 1.4893, + "step": 2928 + }, + { + "epoch": 0.47, + "learning_rate": 4.9476268877271304e-05, + "loss": 1.5599, + "step": 2929 + }, + { + "epoch": 0.47, + "learning_rate": 4.9475743437646835e-05, + "loss": 1.5674, + "step": 2930 + }, + { + "epoch": 0.47, + "learning_rate": 4.947521773737055e-05, + "loss": 1.53, + "step": 2931 + }, + { + "epoch": 0.47, + "learning_rate": 4.9474691776448057e-05, + "loss": 1.5202, + "step": 2932 + }, + { + "epoch": 0.47, + "learning_rate": 4.9474165554884954e-05, + "loss": 1.4593, + "step": 2933 + }, + { + "epoch": 0.47, + "learning_rate": 4.947363907268685e-05, + "loss": 1.5964, + "step": 2934 + }, + { + "epoch": 0.47, + "learning_rate": 4.947311232985935e-05, + "loss": 1.5259, + "step": 2935 + }, + { + "epoch": 0.47, + "learning_rate": 4.947258532640805e-05, + "loss": 1.5593, + "step": 2936 + }, + { + "epoch": 0.47, + "learning_rate": 4.947205806233859e-05, + "loss": 1.5344, + "step": 2937 + }, + { + "epoch": 0.47, + "learning_rate": 4.947153053765656e-05, + "loss": 1.5098, + "step": 2938 + }, + { + "epoch": 0.47, + "learning_rate": 4.947100275236759e-05, + "loss": 1.4828, + "step": 2939 + }, + { + "epoch": 0.47, + "learning_rate": 4.947047470647729e-05, + "loss": 1.5335, + "step": 2940 + }, + { + "epoch": 0.47, + "learning_rate": 4.9469946399991304e-05, + "loss": 1.516, + "step": 2941 + }, + { + "epoch": 0.47, + "learning_rate": 4.946941783291523e-05, + "loss": 1.5536, + "step": 2942 + }, + { + "epoch": 0.47, + "learning_rate": 4.946888900525473e-05, + "loss": 1.5339, + "step": 2943 + }, + { + "epoch": 0.47, + "learning_rate": 4.94683599170154e-05, + "loss": 1.5445, + "step": 2944 + }, + { + "epoch": 0.47, + "learning_rate": 4.94678305682029e-05, + "loss": 1.5917, + "step": 2945 + }, + { + "epoch": 0.47, + "learning_rate": 4.946730095882286e-05, + "loss": 1.5192, + "step": 2946 + }, + { + "epoch": 0.47, + "learning_rate": 4.9466771088880915e-05, + "loss": 1.567, + "step": 2947 + }, + { + "epoch": 0.47, + "learning_rate": 4.946624095838271e-05, + "loss": 1.5689, + "step": 2948 + }, + { + "epoch": 0.47, + "learning_rate": 4.9465710567333903e-05, + "loss": 1.4878, + "step": 2949 + }, + { + "epoch": 0.47, + "learning_rate": 4.9465179915740124e-05, + "loss": 1.5065, + "step": 2950 + }, + { + "epoch": 0.47, + "learning_rate": 4.9464649003607034e-05, + "loss": 1.5941, + "step": 2951 + }, + { + "epoch": 0.47, + "learning_rate": 4.946411783094028e-05, + "loss": 1.5514, + "step": 2952 + }, + { + "epoch": 0.47, + "learning_rate": 4.946358639774553e-05, + "loss": 1.5052, + "step": 2953 + }, + { + "epoch": 0.47, + "learning_rate": 4.9463054704028434e-05, + "loss": 1.5319, + "step": 2954 + }, + { + "epoch": 0.47, + "learning_rate": 4.946252274979466e-05, + "loss": 1.5815, + "step": 2955 + }, + { + "epoch": 0.47, + "learning_rate": 4.946199053504988e-05, + "loss": 1.5636, + "step": 2956 + }, + { + "epoch": 0.47, + "learning_rate": 4.946145805979973e-05, + "loss": 1.5843, + "step": 2957 + }, + { + "epoch": 0.47, + "learning_rate": 4.946092532404991e-05, + "loss": 1.6244, + "step": 2958 + }, + { + "epoch": 0.47, + "learning_rate": 4.946039232780609e-05, + "loss": 1.4971, + "step": 2959 + }, + { + "epoch": 0.47, + "learning_rate": 4.9459859071073943e-05, + "loss": 1.5078, + "step": 2960 + }, + { + "epoch": 0.47, + "learning_rate": 4.945932555385915e-05, + "loss": 1.549, + "step": 2961 + }, + { + "epoch": 0.47, + "learning_rate": 4.9458791776167376e-05, + "loss": 1.486, + "step": 2962 + }, + { + "epoch": 0.47, + "learning_rate": 4.945825773800433e-05, + "loss": 1.5448, + "step": 2963 + }, + { + "epoch": 0.47, + "learning_rate": 4.945772343937569e-05, + "loss": 1.5524, + "step": 2964 + }, + { + "epoch": 0.47, + "learning_rate": 4.945718888028714e-05, + "loss": 1.5422, + "step": 2965 + }, + { + "epoch": 0.47, + "learning_rate": 4.9456654060744376e-05, + "loss": 1.6579, + "step": 2966 + }, + { + "epoch": 0.47, + "learning_rate": 4.9456118980753095e-05, + "loss": 1.6528, + "step": 2967 + }, + { + "epoch": 0.47, + "learning_rate": 4.945558364031899e-05, + "loss": 1.515, + "step": 2968 + }, + { + "epoch": 0.47, + "learning_rate": 4.945504803944777e-05, + "loss": 1.6287, + "step": 2969 + }, + { + "epoch": 0.47, + "learning_rate": 4.945451217814514e-05, + "loss": 1.5269, + "step": 2970 + }, + { + "epoch": 0.47, + "learning_rate": 4.94539760564168e-05, + "loss": 1.6467, + "step": 2971 + }, + { + "epoch": 0.47, + "learning_rate": 4.945343967426846e-05, + "loss": 1.5311, + "step": 2972 + }, + { + "epoch": 0.47, + "learning_rate": 4.9452903031705836e-05, + "loss": 1.5243, + "step": 2973 + }, + { + "epoch": 0.47, + "learning_rate": 4.945236612873464e-05, + "loss": 1.6227, + "step": 2974 + }, + { + "epoch": 0.47, + "learning_rate": 4.945182896536058e-05, + "loss": 1.6099, + "step": 2975 + }, + { + "epoch": 0.47, + "learning_rate": 4.94512915415894e-05, + "loss": 1.5785, + "step": 2976 + }, + { + "epoch": 0.47, + "learning_rate": 4.94507538574268e-05, + "loss": 1.5677, + "step": 2977 + }, + { + "epoch": 0.47, + "learning_rate": 4.9450215912878514e-05, + "loss": 1.5143, + "step": 2978 + }, + { + "epoch": 0.47, + "learning_rate": 4.944967770795028e-05, + "loss": 1.5047, + "step": 2979 + }, + { + "epoch": 0.47, + "learning_rate": 4.944913924264782e-05, + "loss": 1.6481, + "step": 2980 + }, + { + "epoch": 0.47, + "learning_rate": 4.9448600516976876e-05, + "loss": 1.5485, + "step": 2981 + }, + { + "epoch": 0.48, + "learning_rate": 4.9448061530943174e-05, + "loss": 1.528, + "step": 2982 + }, + { + "epoch": 0.48, + "learning_rate": 4.944752228455246e-05, + "loss": 1.5925, + "step": 2983 + }, + { + "epoch": 0.48, + "learning_rate": 4.944698277781048e-05, + "loss": 1.5921, + "step": 2984 + }, + { + "epoch": 0.48, + "learning_rate": 4.944644301072297e-05, + "loss": 1.5596, + "step": 2985 + }, + { + "epoch": 0.48, + "learning_rate": 4.9445902983295686e-05, + "loss": 1.4956, + "step": 2986 + }, + { + "epoch": 0.48, + "learning_rate": 4.944536269553438e-05, + "loss": 1.4772, + "step": 2987 + }, + { + "epoch": 0.48, + "learning_rate": 4.944482214744479e-05, + "loss": 1.5282, + "step": 2988 + }, + { + "epoch": 0.48, + "learning_rate": 4.9444281339032694e-05, + "loss": 1.605, + "step": 2989 + }, + { + "epoch": 0.48, + "learning_rate": 4.9443740270303845e-05, + "loss": 1.5466, + "step": 2990 + }, + { + "epoch": 0.48, + "learning_rate": 4.9443198941264e-05, + "loss": 1.4836, + "step": 2991 + }, + { + "epoch": 0.48, + "learning_rate": 4.944265735191892e-05, + "loss": 1.5233, + "step": 2992 + }, + { + "epoch": 0.48, + "learning_rate": 4.944211550227439e-05, + "loss": 1.5384, + "step": 2993 + }, + { + "epoch": 0.48, + "learning_rate": 4.944157339233616e-05, + "loss": 1.6087, + "step": 2994 + }, + { + "epoch": 0.48, + "learning_rate": 4.944103102211002e-05, + "loss": 1.4491, + "step": 2995 + }, + { + "epoch": 0.48, + "learning_rate": 4.944048839160173e-05, + "loss": 1.4919, + "step": 2996 + }, + { + "epoch": 0.48, + "learning_rate": 4.9439945500817084e-05, + "loss": 1.6125, + "step": 2997 + }, + { + "epoch": 0.48, + "learning_rate": 4.943940234976185e-05, + "loss": 1.6213, + "step": 2998 + }, + { + "epoch": 0.48, + "learning_rate": 4.9438858938441826e-05, + "loss": 1.4899, + "step": 2999 + }, + { + "epoch": 0.48, + "learning_rate": 4.943831526686278e-05, + "loss": 1.5334, + "step": 3000 + }, + { + "epoch": 0.48, + "learning_rate": 4.9437771335030516e-05, + "loss": 1.5995, + "step": 3001 + }, + { + "epoch": 0.48, + "learning_rate": 4.943722714295083e-05, + "loss": 1.5641, + "step": 3002 + }, + { + "epoch": 0.48, + "learning_rate": 4.943668269062951e-05, + "loss": 1.5112, + "step": 3003 + }, + { + "epoch": 0.48, + "learning_rate": 4.943613797807235e-05, + "loss": 1.5915, + "step": 3004 + }, + { + "epoch": 0.48, + "learning_rate": 4.9435593005285166e-05, + "loss": 1.5394, + "step": 3005 + }, + { + "epoch": 0.48, + "learning_rate": 4.9435047772273744e-05, + "loss": 1.516, + "step": 3006 + }, + { + "epoch": 0.48, + "learning_rate": 4.9434502279043906e-05, + "loss": 1.5799, + "step": 3007 + }, + { + "epoch": 0.48, + "learning_rate": 4.943395652560144e-05, + "loss": 1.592, + "step": 3008 + }, + { + "epoch": 0.48, + "learning_rate": 4.943341051195218e-05, + "loss": 1.4963, + "step": 3009 + }, + { + "epoch": 0.48, + "learning_rate": 4.943286423810193e-05, + "loss": 1.5319, + "step": 3010 + }, + { + "epoch": 0.48, + "learning_rate": 4.943231770405651e-05, + "loss": 1.5599, + "step": 3011 + }, + { + "epoch": 0.48, + "learning_rate": 4.943177090982174e-05, + "loss": 1.5448, + "step": 3012 + }, + { + "epoch": 0.48, + "learning_rate": 4.943122385540344e-05, + "loss": 1.5381, + "step": 3013 + }, + { + "epoch": 0.48, + "learning_rate": 4.943067654080744e-05, + "loss": 1.5269, + "step": 3014 + }, + { + "epoch": 0.48, + "learning_rate": 4.943012896603957e-05, + "loss": 1.5871, + "step": 3015 + }, + { + "epoch": 0.48, + "learning_rate": 4.942958113110565e-05, + "loss": 1.4961, + "step": 3016 + }, + { + "epoch": 0.48, + "learning_rate": 4.942903303601153e-05, + "loss": 1.5335, + "step": 3017 + }, + { + "epoch": 0.48, + "learning_rate": 4.942848468076304e-05, + "loss": 1.5373, + "step": 3018 + }, + { + "epoch": 0.48, + "learning_rate": 4.942793606536602e-05, + "loss": 1.63, + "step": 3019 + }, + { + "epoch": 0.48, + "learning_rate": 4.942738718982631e-05, + "loss": 1.471, + "step": 3020 + }, + { + "epoch": 0.48, + "learning_rate": 4.942683805414975e-05, + "loss": 1.492, + "step": 3021 + }, + { + "epoch": 0.48, + "learning_rate": 4.94262886583422e-05, + "loss": 1.5731, + "step": 3022 + }, + { + "epoch": 0.48, + "learning_rate": 4.942573900240951e-05, + "loss": 1.5588, + "step": 3023 + }, + { + "epoch": 0.48, + "learning_rate": 4.942518908635752e-05, + "loss": 1.5161, + "step": 3024 + }, + { + "epoch": 0.48, + "learning_rate": 4.94246389101921e-05, + "loss": 1.5322, + "step": 3025 + }, + { + "epoch": 0.48, + "learning_rate": 4.942408847391911e-05, + "loss": 1.5565, + "step": 3026 + }, + { + "epoch": 0.48, + "learning_rate": 4.942353777754439e-05, + "loss": 1.5584, + "step": 3027 + }, + { + "epoch": 0.48, + "learning_rate": 4.942298682107383e-05, + "loss": 1.4992, + "step": 3028 + }, + { + "epoch": 0.48, + "learning_rate": 4.9422435604513285e-05, + "loss": 1.5078, + "step": 3029 + }, + { + "epoch": 0.48, + "learning_rate": 4.9421884127868634e-05, + "loss": 1.5763, + "step": 3030 + }, + { + "epoch": 0.48, + "learning_rate": 4.942133239114574e-05, + "loss": 1.4943, + "step": 3031 + }, + { + "epoch": 0.48, + "learning_rate": 4.9420780394350485e-05, + "loss": 1.5389, + "step": 3032 + }, + { + "epoch": 0.48, + "learning_rate": 4.942022813748874e-05, + "loss": 1.5179, + "step": 3033 + }, + { + "epoch": 0.48, + "learning_rate": 4.94196756205664e-05, + "loss": 1.5251, + "step": 3034 + }, + { + "epoch": 0.48, + "learning_rate": 4.941912284358933e-05, + "loss": 1.5807, + "step": 3035 + }, + { + "epoch": 0.48, + "learning_rate": 4.941856980656343e-05, + "loss": 1.5371, + "step": 3036 + }, + { + "epoch": 0.48, + "learning_rate": 4.941801650949459e-05, + "loss": 1.5275, + "step": 3037 + }, + { + "epoch": 0.48, + "learning_rate": 4.9417462952388696e-05, + "loss": 1.4925, + "step": 3038 + }, + { + "epoch": 0.48, + "learning_rate": 4.941690913525164e-05, + "loss": 1.568, + "step": 3039 + }, + { + "epoch": 0.48, + "learning_rate": 4.941635505808934e-05, + "loss": 1.5073, + "step": 3040 + }, + { + "epoch": 0.48, + "learning_rate": 4.941580072090768e-05, + "loss": 1.5537, + "step": 3041 + }, + { + "epoch": 0.48, + "learning_rate": 4.941524612371256e-05, + "loss": 1.634, + "step": 3042 + }, + { + "epoch": 0.48, + "learning_rate": 4.9414691266509886e-05, + "loss": 1.5747, + "step": 3043 + }, + { + "epoch": 0.48, + "learning_rate": 4.941413614930558e-05, + "loss": 1.5309, + "step": 3044 + }, + { + "epoch": 0.49, + "learning_rate": 4.941358077210554e-05, + "loss": 1.5385, + "step": 3045 + }, + { + "epoch": 0.49, + "learning_rate": 4.941302513491569e-05, + "loss": 1.5391, + "step": 3046 + }, + { + "epoch": 0.49, + "learning_rate": 4.9412469237741945e-05, + "loss": 1.5316, + "step": 3047 + }, + { + "epoch": 0.49, + "learning_rate": 4.941191308059022e-05, + "loss": 1.5949, + "step": 3048 + }, + { + "epoch": 0.49, + "learning_rate": 4.9411356663466446e-05, + "loss": 1.4953, + "step": 3049 + }, + { + "epoch": 0.49, + "learning_rate": 4.9410799986376544e-05, + "loss": 1.5632, + "step": 3050 + }, + { + "epoch": 0.49, + "learning_rate": 4.941024304932644e-05, + "loss": 1.5233, + "step": 3051 + }, + { + "epoch": 0.49, + "learning_rate": 4.9409685852322066e-05, + "loss": 1.5643, + "step": 3052 + }, + { + "epoch": 0.49, + "learning_rate": 4.940912839536936e-05, + "loss": 1.5946, + "step": 3053 + }, + { + "epoch": 0.49, + "learning_rate": 4.9408570678474256e-05, + "loss": 1.5506, + "step": 3054 + }, + { + "epoch": 0.49, + "learning_rate": 4.9408012701642694e-05, + "loss": 1.589, + "step": 3055 + }, + { + "epoch": 0.49, + "learning_rate": 4.9407454464880614e-05, + "loss": 1.5275, + "step": 3056 + }, + { + "epoch": 0.49, + "learning_rate": 4.940689596819396e-05, + "loss": 1.5417, + "step": 3057 + }, + { + "epoch": 0.49, + "learning_rate": 4.9406337211588684e-05, + "loss": 1.4852, + "step": 3058 + }, + { + "epoch": 0.49, + "learning_rate": 4.940577819507073e-05, + "loss": 1.555, + "step": 3059 + }, + { + "epoch": 0.49, + "learning_rate": 4.940521891864606e-05, + "loss": 1.5592, + "step": 3060 + }, + { + "epoch": 0.49, + "learning_rate": 4.9404659382320626e-05, + "loss": 1.5072, + "step": 3061 + }, + { + "epoch": 0.49, + "learning_rate": 4.940409958610038e-05, + "loss": 1.5047, + "step": 3062 + }, + { + "epoch": 0.49, + "learning_rate": 4.94035395299913e-05, + "loss": 1.682, + "step": 3063 + }, + { + "epoch": 0.49, + "learning_rate": 4.9402979213999334e-05, + "loss": 1.5703, + "step": 3064 + }, + { + "epoch": 0.49, + "learning_rate": 4.9402418638130454e-05, + "loss": 1.5519, + "step": 3065 + }, + { + "epoch": 0.49, + "learning_rate": 4.9401857802390626e-05, + "loss": 1.5856, + "step": 3066 + }, + { + "epoch": 0.49, + "learning_rate": 4.940129670678584e-05, + "loss": 1.5277, + "step": 3067 + }, + { + "epoch": 0.49, + "learning_rate": 4.9400735351322045e-05, + "loss": 1.5817, + "step": 3068 + }, + { + "epoch": 0.49, + "learning_rate": 4.940017373600524e-05, + "loss": 1.5239, + "step": 3069 + }, + { + "epoch": 0.49, + "learning_rate": 4.939961186084139e-05, + "loss": 1.4945, + "step": 3070 + }, + { + "epoch": 0.49, + "learning_rate": 4.93990497258365e-05, + "loss": 1.5252, + "step": 3071 + }, + { + "epoch": 0.49, + "learning_rate": 4.939848733099654e-05, + "loss": 1.5508, + "step": 3072 + }, + { + "epoch": 0.49, + "learning_rate": 4.939792467632749e-05, + "loss": 1.5497, + "step": 3073 + }, + { + "epoch": 0.49, + "learning_rate": 4.939736176183537e-05, + "loss": 1.5711, + "step": 3074 + }, + { + "epoch": 0.49, + "learning_rate": 4.939679858752615e-05, + "loss": 1.6273, + "step": 3075 + }, + { + "epoch": 0.49, + "learning_rate": 4.939623515340585e-05, + "loss": 1.5247, + "step": 3076 + }, + { + "epoch": 0.49, + "learning_rate": 4.939567145948044e-05, + "loss": 1.61, + "step": 3077 + }, + { + "epoch": 0.49, + "learning_rate": 4.939510750575594e-05, + "loss": 1.5923, + "step": 3078 + }, + { + "epoch": 0.49, + "learning_rate": 4.939454329223837e-05, + "loss": 1.5042, + "step": 3079 + }, + { + "epoch": 0.49, + "learning_rate": 4.9393978818933715e-05, + "loss": 1.5107, + "step": 3080 + }, + { + "epoch": 0.49, + "learning_rate": 4.9393414085848e-05, + "loss": 1.5334, + "step": 3081 + }, + { + "epoch": 0.49, + "learning_rate": 4.939284909298722e-05, + "loss": 1.4847, + "step": 3082 + }, + { + "epoch": 0.49, + "learning_rate": 4.9392283840357425e-05, + "loss": 1.5309, + "step": 3083 + }, + { + "epoch": 0.49, + "learning_rate": 4.939171832796461e-05, + "loss": 1.5612, + "step": 3084 + }, + { + "epoch": 0.49, + "learning_rate": 4.93911525558148e-05, + "loss": 1.6147, + "step": 3085 + }, + { + "epoch": 0.49, + "learning_rate": 4.939058652391403e-05, + "loss": 1.5409, + "step": 3086 + }, + { + "epoch": 0.49, + "learning_rate": 4.939002023226832e-05, + "loss": 1.5173, + "step": 3087 + }, + { + "epoch": 0.49, + "learning_rate": 4.93894536808837e-05, + "loss": 1.4253, + "step": 3088 + }, + { + "epoch": 0.49, + "learning_rate": 4.9388886869766206e-05, + "loss": 1.5747, + "step": 3089 + }, + { + "epoch": 0.49, + "learning_rate": 4.938831979892187e-05, + "loss": 1.5654, + "step": 3090 + }, + { + "epoch": 0.49, + "learning_rate": 4.9387752468356735e-05, + "loss": 1.4834, + "step": 3091 + }, + { + "epoch": 0.49, + "learning_rate": 4.9387184878076844e-05, + "loss": 1.5111, + "step": 3092 + }, + { + "epoch": 0.49, + "learning_rate": 4.938661702808824e-05, + "loss": 1.5716, + "step": 3093 + }, + { + "epoch": 0.49, + "learning_rate": 4.938604891839697e-05, + "loss": 1.5483, + "step": 3094 + }, + { + "epoch": 0.49, + "learning_rate": 4.938548054900909e-05, + "loss": 1.4801, + "step": 3095 + }, + { + "epoch": 0.49, + "learning_rate": 4.9384911919930634e-05, + "loss": 1.6065, + "step": 3096 + }, + { + "epoch": 0.49, + "learning_rate": 4.938434303116768e-05, + "loss": 1.5907, + "step": 3097 + }, + { + "epoch": 0.49, + "learning_rate": 4.938377388272628e-05, + "loss": 1.5405, + "step": 3098 + }, + { + "epoch": 0.49, + "learning_rate": 4.9383204474612485e-05, + "loss": 1.5041, + "step": 3099 + }, + { + "epoch": 0.49, + "learning_rate": 4.938263480683237e-05, + "loss": 1.4689, + "step": 3100 + }, + { + "epoch": 0.49, + "learning_rate": 4.9382064879391995e-05, + "loss": 1.492, + "step": 3101 + }, + { + "epoch": 0.49, + "learning_rate": 4.938149469229743e-05, + "loss": 1.5776, + "step": 3102 + }, + { + "epoch": 0.49, + "learning_rate": 4.938092424555475e-05, + "loss": 1.5322, + "step": 3103 + }, + { + "epoch": 0.49, + "learning_rate": 4.938035353917003e-05, + "loss": 1.5305, + "step": 3104 + }, + { + "epoch": 0.49, + "learning_rate": 4.9379782573149345e-05, + "loss": 1.577, + "step": 3105 + }, + { + "epoch": 0.49, + "learning_rate": 4.9379211347498777e-05, + "loss": 1.5705, + "step": 3106 + }, + { + "epoch": 0.49, + "learning_rate": 4.937863986222441e-05, + "loss": 1.5228, + "step": 3107 + }, + { + "epoch": 0.5, + "learning_rate": 4.937806811733233e-05, + "loss": 1.6008, + "step": 3108 + }, + { + "epoch": 0.5, + "learning_rate": 4.9377496112828626e-05, + "loss": 1.5101, + "step": 3109 + }, + { + "epoch": 0.5, + "learning_rate": 4.9376923848719384e-05, + "loss": 1.6022, + "step": 3110 + }, + { + "epoch": 0.5, + "learning_rate": 4.9376351325010706e-05, + "loss": 1.5526, + "step": 3111 + }, + { + "epoch": 0.5, + "learning_rate": 4.937577854170868e-05, + "loss": 1.5655, + "step": 3112 + }, + { + "epoch": 0.5, + "learning_rate": 4.937520549881942e-05, + "loss": 1.5208, + "step": 3113 + }, + { + "epoch": 0.5, + "learning_rate": 4.9374632196349014e-05, + "loss": 1.5347, + "step": 3114 + }, + { + "epoch": 0.5, + "learning_rate": 4.9374058634303574e-05, + "loss": 1.5218, + "step": 3115 + }, + { + "epoch": 0.5, + "learning_rate": 4.937348481268921e-05, + "loss": 1.5298, + "step": 3116 + }, + { + "epoch": 0.5, + "learning_rate": 4.9372910731512036e-05, + "loss": 1.5246, + "step": 3117 + }, + { + "epoch": 0.5, + "learning_rate": 4.937233639077815e-05, + "loss": 1.5145, + "step": 3118 + }, + { + "epoch": 0.5, + "learning_rate": 4.937176179049368e-05, + "loss": 1.5807, + "step": 3119 + }, + { + "epoch": 0.5, + "learning_rate": 4.9371186930664746e-05, + "loss": 1.5191, + "step": 3120 + }, + { + "epoch": 0.5, + "learning_rate": 4.937061181129746e-05, + "loss": 1.5337, + "step": 3121 + }, + { + "epoch": 0.5, + "learning_rate": 4.937003643239796e-05, + "loss": 1.5448, + "step": 3122 + }, + { + "epoch": 0.5, + "learning_rate": 4.936946079397237e-05, + "loss": 1.514, + "step": 3123 + }, + { + "epoch": 0.5, + "learning_rate": 4.9368884896026814e-05, + "loss": 1.5651, + "step": 3124 + }, + { + "epoch": 0.5, + "learning_rate": 4.936830873856743e-05, + "loss": 1.5356, + "step": 3125 + }, + { + "epoch": 0.5, + "learning_rate": 4.936773232160035e-05, + "loss": 1.5117, + "step": 3126 + }, + { + "epoch": 0.5, + "learning_rate": 4.936715564513172e-05, + "loss": 1.572, + "step": 3127 + }, + { + "epoch": 0.5, + "learning_rate": 4.936657870916767e-05, + "loss": 1.457, + "step": 3128 + }, + { + "epoch": 0.5, + "learning_rate": 4.936600151371435e-05, + "loss": 1.5536, + "step": 3129 + }, + { + "epoch": 0.5, + "learning_rate": 4.936542405877791e-05, + "loss": 1.5415, + "step": 3130 + }, + { + "epoch": 0.5, + "learning_rate": 4.93648463443645e-05, + "loss": 1.5711, + "step": 3131 + }, + { + "epoch": 0.5, + "learning_rate": 4.936426837048026e-05, + "loss": 1.4845, + "step": 3132 + }, + { + "epoch": 0.5, + "learning_rate": 4.936369013713136e-05, + "loss": 1.5702, + "step": 3133 + }, + { + "epoch": 0.5, + "learning_rate": 4.936311164432395e-05, + "loss": 1.5781, + "step": 3134 + }, + { + "epoch": 0.5, + "learning_rate": 4.9362532892064195e-05, + "loss": 1.5711, + "step": 3135 + }, + { + "epoch": 0.5, + "learning_rate": 4.936195388035825e-05, + "loss": 1.6061, + "step": 3136 + }, + { + "epoch": 0.5, + "learning_rate": 4.936137460921229e-05, + "loss": 1.5296, + "step": 3137 + }, + { + "epoch": 0.5, + "learning_rate": 4.9360795078632484e-05, + "loss": 1.6034, + "step": 3138 + }, + { + "epoch": 0.5, + "learning_rate": 4.936021528862499e-05, + "loss": 1.5647, + "step": 3139 + }, + { + "epoch": 0.5, + "learning_rate": 4.9359635239196e-05, + "loss": 1.6123, + "step": 3140 + }, + { + "epoch": 0.5, + "learning_rate": 4.9359054930351676e-05, + "loss": 1.5085, + "step": 3141 + }, + { + "epoch": 0.5, + "learning_rate": 4.935847436209822e-05, + "loss": 1.4588, + "step": 3142 + }, + { + "epoch": 0.5, + "learning_rate": 4.935789353444179e-05, + "loss": 1.5319, + "step": 3143 + }, + { + "epoch": 0.5, + "learning_rate": 4.935731244738858e-05, + "loss": 1.5721, + "step": 3144 + }, + { + "epoch": 0.5, + "learning_rate": 4.9356731100944785e-05, + "loss": 1.555, + "step": 3145 + }, + { + "epoch": 0.5, + "learning_rate": 4.9356149495116596e-05, + "loss": 1.5208, + "step": 3146 + }, + { + "epoch": 0.5, + "learning_rate": 4.9355567629910195e-05, + "loss": 1.5457, + "step": 3147 + }, + { + "epoch": 0.5, + "learning_rate": 4.935498550533178e-05, + "loss": 1.6217, + "step": 3148 + }, + { + "epoch": 0.5, + "learning_rate": 4.935440312138757e-05, + "loss": 1.5643, + "step": 3149 + }, + { + "epoch": 0.5, + "learning_rate": 4.935382047808375e-05, + "loss": 1.5597, + "step": 3150 + }, + { + "epoch": 0.5, + "learning_rate": 4.9353237575426515e-05, + "loss": 1.5127, + "step": 3151 + }, + { + "epoch": 0.5, + "learning_rate": 4.93526544134221e-05, + "loss": 1.6064, + "step": 3152 + }, + { + "epoch": 0.5, + "learning_rate": 4.935207099207669e-05, + "loss": 1.52, + "step": 3153 + }, + { + "epoch": 0.5, + "learning_rate": 4.935148731139651e-05, + "loss": 1.5316, + "step": 3154 + }, + { + "epoch": 0.5, + "learning_rate": 4.935090337138777e-05, + "loss": 1.4985, + "step": 3155 + }, + { + "epoch": 0.5, + "learning_rate": 4.9350319172056704e-05, + "loss": 1.4912, + "step": 3156 + }, + { + "epoch": 0.5, + "learning_rate": 4.9349734713409515e-05, + "loss": 1.5837, + "step": 3157 + }, + { + "epoch": 0.5, + "learning_rate": 4.934914999545244e-05, + "loss": 1.5553, + "step": 3158 + }, + { + "epoch": 0.5, + "learning_rate": 4.9348565018191694e-05, + "loss": 1.5827, + "step": 3159 + }, + { + "epoch": 0.5, + "learning_rate": 4.934797978163351e-05, + "loss": 1.5697, + "step": 3160 + }, + { + "epoch": 0.5, + "learning_rate": 4.934739428578413e-05, + "loss": 1.5343, + "step": 3161 + }, + { + "epoch": 0.5, + "learning_rate": 4.9346808530649785e-05, + "loss": 1.5822, + "step": 3162 + }, + { + "epoch": 0.5, + "learning_rate": 4.9346222516236704e-05, + "loss": 1.5962, + "step": 3163 + }, + { + "epoch": 0.5, + "learning_rate": 4.934563624255114e-05, + "loss": 1.5734, + "step": 3164 + }, + { + "epoch": 0.5, + "learning_rate": 4.934504970959933e-05, + "loss": 1.5505, + "step": 3165 + }, + { + "epoch": 0.5, + "learning_rate": 4.9344462917387515e-05, + "loss": 1.5039, + "step": 3166 + }, + { + "epoch": 0.5, + "learning_rate": 4.934387586592195e-05, + "loss": 1.6201, + "step": 3167 + }, + { + "epoch": 0.5, + "learning_rate": 4.9343288555208894e-05, + "loss": 1.5734, + "step": 3168 + }, + { + "epoch": 0.5, + "learning_rate": 4.9342700985254593e-05, + "loss": 1.512, + "step": 3169 + }, + { + "epoch": 0.51, + "learning_rate": 4.93421131560653e-05, + "loss": 1.4844, + "step": 3170 + }, + { + "epoch": 0.51, + "learning_rate": 4.934152506764728e-05, + "loss": 1.5876, + "step": 3171 + }, + { + "epoch": 0.51, + "learning_rate": 4.93409367200068e-05, + "loss": 1.5075, + "step": 3172 + }, + { + "epoch": 0.51, + "learning_rate": 4.934034811315012e-05, + "loss": 1.5026, + "step": 3173 + }, + { + "epoch": 0.51, + "learning_rate": 4.933975924708351e-05, + "loss": 1.5563, + "step": 3174 + }, + { + "epoch": 0.51, + "learning_rate": 4.933917012181324e-05, + "loss": 1.6165, + "step": 3175 + }, + { + "epoch": 0.51, + "learning_rate": 4.933858073734558e-05, + "loss": 1.4744, + "step": 3176 + }, + { + "epoch": 0.51, + "learning_rate": 4.933799109368683e-05, + "loss": 1.4831, + "step": 3177 + }, + { + "epoch": 0.51, + "learning_rate": 4.9337401190843234e-05, + "loss": 1.5067, + "step": 3178 + }, + { + "epoch": 0.51, + "learning_rate": 4.93368110288211e-05, + "loss": 1.5534, + "step": 3179 + }, + { + "epoch": 0.51, + "learning_rate": 4.933622060762669e-05, + "loss": 1.5524, + "step": 3180 + }, + { + "epoch": 0.51, + "learning_rate": 4.933562992726632e-05, + "loss": 1.5438, + "step": 3181 + }, + { + "epoch": 0.51, + "learning_rate": 4.933503898774626e-05, + "loss": 1.5913, + "step": 3182 + }, + { + "epoch": 0.51, + "learning_rate": 4.9334447789072815e-05, + "loss": 1.5376, + "step": 3183 + }, + { + "epoch": 0.51, + "learning_rate": 4.9333856331252256e-05, + "loss": 1.556, + "step": 3184 + }, + { + "epoch": 0.51, + "learning_rate": 4.933326461429092e-05, + "loss": 1.4886, + "step": 3185 + }, + { + "epoch": 0.51, + "learning_rate": 4.933267263819508e-05, + "loss": 1.5861, + "step": 3186 + }, + { + "epoch": 0.51, + "learning_rate": 4.933208040297105e-05, + "loss": 1.5684, + "step": 3187 + }, + { + "epoch": 0.51, + "learning_rate": 4.933148790862514e-05, + "loss": 1.4678, + "step": 3188 + }, + { + "epoch": 0.51, + "learning_rate": 4.933089515516366e-05, + "loss": 1.4561, + "step": 3189 + }, + { + "epoch": 0.51, + "learning_rate": 4.933030214259291e-05, + "loss": 1.5365, + "step": 3190 + }, + { + "epoch": 0.51, + "learning_rate": 4.9329708870919214e-05, + "loss": 1.5915, + "step": 3191 + }, + { + "epoch": 0.51, + "learning_rate": 4.932911534014889e-05, + "loss": 1.5259, + "step": 3192 + }, + { + "epoch": 0.51, + "learning_rate": 4.932852155028826e-05, + "loss": 1.5527, + "step": 3193 + }, + { + "epoch": 0.51, + "learning_rate": 4.932792750134364e-05, + "loss": 1.4826, + "step": 3194 + }, + { + "epoch": 0.51, + "learning_rate": 4.932733319332137e-05, + "loss": 1.5042, + "step": 3195 + }, + { + "epoch": 0.51, + "learning_rate": 4.932673862622776e-05, + "loss": 1.5804, + "step": 3196 + }, + { + "epoch": 0.51, + "learning_rate": 4.932614380006917e-05, + "loss": 1.6123, + "step": 3197 + }, + { + "epoch": 0.51, + "learning_rate": 4.93255487148519e-05, + "loss": 1.5335, + "step": 3198 + }, + { + "epoch": 0.51, + "learning_rate": 4.932495337058232e-05, + "loss": 1.5566, + "step": 3199 + }, + { + "epoch": 0.51, + "learning_rate": 4.9324357767266744e-05, + "loss": 1.591, + "step": 3200 + }, + { + "epoch": 0.51, + "learning_rate": 4.9323761904911525e-05, + "loss": 1.5785, + "step": 3201 + }, + { + "epoch": 0.51, + "learning_rate": 4.932316578352302e-05, + "loss": 1.5614, + "step": 3202 + }, + { + "epoch": 0.51, + "learning_rate": 4.9322569403107557e-05, + "loss": 1.5316, + "step": 3203 + }, + { + "epoch": 0.51, + "learning_rate": 4.93219727636715e-05, + "loss": 1.5249, + "step": 3204 + }, + { + "epoch": 0.51, + "learning_rate": 4.9321375865221194e-05, + "loss": 1.5256, + "step": 3205 + }, + { + "epoch": 0.51, + "learning_rate": 4.932077870776301e-05, + "loss": 1.5495, + "step": 3206 + }, + { + "epoch": 0.51, + "learning_rate": 4.932018129130329e-05, + "loss": 1.4862, + "step": 3207 + }, + { + "epoch": 0.51, + "learning_rate": 4.9319583615848404e-05, + "loss": 1.575, + "step": 3208 + }, + { + "epoch": 0.51, + "learning_rate": 4.931898568140472e-05, + "loss": 1.5194, + "step": 3209 + }, + { + "epoch": 0.51, + "learning_rate": 4.93183874879786e-05, + "loss": 1.4727, + "step": 3210 + }, + { + "epoch": 0.51, + "learning_rate": 4.931778903557642e-05, + "loss": 1.5576, + "step": 3211 + }, + { + "epoch": 0.51, + "learning_rate": 4.931719032420455e-05, + "loss": 1.606, + "step": 3212 + }, + { + "epoch": 0.51, + "learning_rate": 4.931659135386937e-05, + "loss": 1.4697, + "step": 3213 + }, + { + "epoch": 0.51, + "learning_rate": 4.931599212457725e-05, + "loss": 1.5524, + "step": 3214 + }, + { + "epoch": 0.51, + "learning_rate": 4.9315392636334576e-05, + "loss": 1.5697, + "step": 3215 + }, + { + "epoch": 0.51, + "learning_rate": 4.9314792889147746e-05, + "loss": 1.506, + "step": 3216 + }, + { + "epoch": 0.51, + "learning_rate": 4.9314192883023114e-05, + "loss": 1.5832, + "step": 3217 + }, + { + "epoch": 0.51, + "learning_rate": 4.93135926179671e-05, + "loss": 1.4943, + "step": 3218 + }, + { + "epoch": 0.51, + "learning_rate": 4.931299209398609e-05, + "loss": 1.5181, + "step": 3219 + }, + { + "epoch": 0.51, + "learning_rate": 4.9312391311086464e-05, + "loss": 1.5677, + "step": 3220 + }, + { + "epoch": 0.51, + "learning_rate": 4.931179026927464e-05, + "loss": 1.5065, + "step": 3221 + }, + { + "epoch": 0.51, + "learning_rate": 4.9311188968557e-05, + "loss": 1.5946, + "step": 3222 + }, + { + "epoch": 0.51, + "learning_rate": 4.9310587408939965e-05, + "loss": 1.5283, + "step": 3223 + }, + { + "epoch": 0.51, + "learning_rate": 4.930998559042993e-05, + "loss": 1.5734, + "step": 3224 + }, + { + "epoch": 0.51, + "learning_rate": 4.9309383513033314e-05, + "loss": 1.4858, + "step": 3225 + }, + { + "epoch": 0.51, + "learning_rate": 4.930878117675651e-05, + "loss": 1.5241, + "step": 3226 + }, + { + "epoch": 0.51, + "learning_rate": 4.9308178581605954e-05, + "loss": 1.5176, + "step": 3227 + }, + { + "epoch": 0.51, + "learning_rate": 4.9307575727588054e-05, + "loss": 1.5879, + "step": 3228 + }, + { + "epoch": 0.51, + "learning_rate": 4.930697261470922e-05, + "loss": 1.5383, + "step": 3229 + }, + { + "epoch": 0.51, + "learning_rate": 4.9306369242975895e-05, + "loss": 1.5239, + "step": 3230 + }, + { + "epoch": 0.51, + "learning_rate": 4.9305765612394496e-05, + "loss": 1.5884, + "step": 3231 + }, + { + "epoch": 0.51, + "learning_rate": 4.9305161722971443e-05, + "loss": 1.5493, + "step": 3232 + }, + { + "epoch": 0.52, + "learning_rate": 4.9304557574713174e-05, + "loss": 1.508, + "step": 3233 + }, + { + "epoch": 0.52, + "learning_rate": 4.9303953167626125e-05, + "loss": 1.5389, + "step": 3234 + }, + { + "epoch": 0.52, + "learning_rate": 4.930334850171673e-05, + "loss": 1.5324, + "step": 3235 + }, + { + "epoch": 0.52, + "learning_rate": 4.9302743576991426e-05, + "loss": 1.5356, + "step": 3236 + }, + { + "epoch": 0.52, + "learning_rate": 4.9302138393456664e-05, + "loss": 1.5086, + "step": 3237 + }, + { + "epoch": 0.52, + "learning_rate": 4.930153295111888e-05, + "loss": 1.6035, + "step": 3238 + }, + { + "epoch": 0.52, + "learning_rate": 4.930092724998452e-05, + "loss": 1.5526, + "step": 3239 + }, + { + "epoch": 0.52, + "learning_rate": 4.930032129006004e-05, + "loss": 1.5079, + "step": 3240 + }, + { + "epoch": 0.52, + "learning_rate": 4.9299715071351886e-05, + "loss": 1.5472, + "step": 3241 + }, + { + "epoch": 0.52, + "learning_rate": 4.929910859386653e-05, + "loss": 1.6065, + "step": 3242 + }, + { + "epoch": 0.52, + "learning_rate": 4.9298501857610415e-05, + "loss": 1.4963, + "step": 3243 + }, + { + "epoch": 0.52, + "learning_rate": 4.929789486259e-05, + "loss": 1.5744, + "step": 3244 + }, + { + "epoch": 0.52, + "learning_rate": 4.929728760881176e-05, + "loss": 1.5539, + "step": 3245 + }, + { + "epoch": 0.52, + "learning_rate": 4.9296680096282164e-05, + "loss": 1.5637, + "step": 3246 + }, + { + "epoch": 0.52, + "learning_rate": 4.929607232500767e-05, + "loss": 1.5693, + "step": 3247 + }, + { + "epoch": 0.52, + "learning_rate": 4.9295464294994766e-05, + "loss": 1.5028, + "step": 3248 + }, + { + "epoch": 0.52, + "learning_rate": 4.92948560062499e-05, + "loss": 1.5038, + "step": 3249 + }, + { + "epoch": 0.52, + "learning_rate": 4.929424745877959e-05, + "loss": 1.5127, + "step": 3250 + }, + { + "epoch": 0.52, + "learning_rate": 4.9293638652590275e-05, + "loss": 1.5521, + "step": 3251 + }, + { + "epoch": 0.52, + "learning_rate": 4.9293029587688464e-05, + "loss": 1.5788, + "step": 3252 + }, + { + "epoch": 0.52, + "learning_rate": 4.929242026408064e-05, + "loss": 1.5494, + "step": 3253 + }, + { + "epoch": 0.52, + "learning_rate": 4.929181068177328e-05, + "loss": 1.5894, + "step": 3254 + }, + { + "epoch": 0.52, + "learning_rate": 4.9291200840772886e-05, + "loss": 1.4835, + "step": 3255 + }, + { + "epoch": 0.52, + "learning_rate": 4.9290590741085956e-05, + "loss": 1.5099, + "step": 3256 + }, + { + "epoch": 0.52, + "learning_rate": 4.928998038271898e-05, + "loss": 1.5438, + "step": 3257 + }, + { + "epoch": 0.52, + "learning_rate": 4.928936976567846e-05, + "loss": 1.5629, + "step": 3258 + }, + { + "epoch": 0.52, + "learning_rate": 4.92887588899709e-05, + "loss": 1.5545, + "step": 3259 + }, + { + "epoch": 0.52, + "learning_rate": 4.9288147755602797e-05, + "loss": 1.528, + "step": 3260 + }, + { + "epoch": 0.52, + "learning_rate": 4.928753636258067e-05, + "loss": 1.554, + "step": 3261 + }, + { + "epoch": 0.52, + "learning_rate": 4.928692471091103e-05, + "loss": 1.6149, + "step": 3262 + }, + { + "epoch": 0.52, + "learning_rate": 4.928631280060038e-05, + "loss": 1.5253, + "step": 3263 + }, + { + "epoch": 0.52, + "learning_rate": 4.928570063165525e-05, + "loss": 1.5462, + "step": 3264 + }, + { + "epoch": 0.52, + "learning_rate": 4.928508820408215e-05, + "loss": 1.5628, + "step": 3265 + }, + { + "epoch": 0.52, + "learning_rate": 4.92844755178876e-05, + "loss": 1.5413, + "step": 3266 + }, + { + "epoch": 0.52, + "learning_rate": 4.928386257307813e-05, + "loss": 1.5331, + "step": 3267 + }, + { + "epoch": 0.52, + "learning_rate": 4.928324936966027e-05, + "loss": 1.5317, + "step": 3268 + }, + { + "epoch": 0.52, + "learning_rate": 4.928263590764055e-05, + "loss": 1.5262, + "step": 3269 + }, + { + "epoch": 0.52, + "learning_rate": 4.928202218702549e-05, + "loss": 1.4823, + "step": 3270 + }, + { + "epoch": 0.52, + "learning_rate": 4.928140820782164e-05, + "loss": 1.5044, + "step": 3271 + }, + { + "epoch": 0.52, + "learning_rate": 4.9280793970035547e-05, + "loss": 1.5615, + "step": 3272 + }, + { + "epoch": 0.52, + "learning_rate": 4.9280179473673725e-05, + "loss": 1.5212, + "step": 3273 + }, + { + "epoch": 0.52, + "learning_rate": 4.927956471874273e-05, + "loss": 1.5176, + "step": 3274 + }, + { + "epoch": 0.52, + "learning_rate": 4.927894970524912e-05, + "loss": 1.4966, + "step": 3275 + }, + { + "epoch": 0.52, + "learning_rate": 4.9278334433199434e-05, + "loss": 1.5288, + "step": 3276 + }, + { + "epoch": 0.52, + "learning_rate": 4.9277718902600226e-05, + "loss": 1.5407, + "step": 3277 + }, + { + "epoch": 0.52, + "learning_rate": 4.927710311345805e-05, + "loss": 1.5596, + "step": 3278 + }, + { + "epoch": 0.52, + "learning_rate": 4.927648706577947e-05, + "loss": 1.5859, + "step": 3279 + }, + { + "epoch": 0.52, + "learning_rate": 4.927587075957103e-05, + "loss": 1.5842, + "step": 3280 + }, + { + "epoch": 0.52, + "learning_rate": 4.9275254194839316e-05, + "loss": 1.5288, + "step": 3281 + }, + { + "epoch": 0.52, + "learning_rate": 4.927463737159088e-05, + "loss": 1.5083, + "step": 3282 + }, + { + "epoch": 0.52, + "learning_rate": 4.927402028983229e-05, + "loss": 1.5016, + "step": 3283 + }, + { + "epoch": 0.52, + "learning_rate": 4.927340294957012e-05, + "loss": 1.5469, + "step": 3284 + }, + { + "epoch": 0.52, + "learning_rate": 4.9272785350810946e-05, + "loss": 1.5672, + "step": 3285 + }, + { + "epoch": 0.52, + "learning_rate": 4.927216749356135e-05, + "loss": 1.5225, + "step": 3286 + }, + { + "epoch": 0.52, + "learning_rate": 4.9271549377827894e-05, + "loss": 1.5324, + "step": 3287 + }, + { + "epoch": 0.52, + "learning_rate": 4.9270931003617184e-05, + "loss": 1.5908, + "step": 3288 + }, + { + "epoch": 0.52, + "learning_rate": 4.9270312370935786e-05, + "loss": 1.6198, + "step": 3289 + }, + { + "epoch": 0.52, + "learning_rate": 4.926969347979031e-05, + "loss": 1.5907, + "step": 3290 + }, + { + "epoch": 0.52, + "learning_rate": 4.926907433018732e-05, + "loss": 1.5921, + "step": 3291 + }, + { + "epoch": 0.52, + "learning_rate": 4.926845492213343e-05, + "loss": 1.5155, + "step": 3292 + }, + { + "epoch": 0.52, + "learning_rate": 4.926783525563522e-05, + "loss": 1.4963, + "step": 3293 + }, + { + "epoch": 0.52, + "learning_rate": 4.9267215330699304e-05, + "loss": 1.5269, + "step": 3294 + }, + { + "epoch": 0.52, + "learning_rate": 4.926659514733228e-05, + "loss": 1.5114, + "step": 3295 + }, + { + "epoch": 0.53, + "learning_rate": 4.926597470554074e-05, + "loss": 1.6582, + "step": 3296 + }, + { + "epoch": 0.53, + "learning_rate": 4.926535400533132e-05, + "loss": 1.5723, + "step": 3297 + }, + { + "epoch": 0.53, + "learning_rate": 4.92647330467106e-05, + "loss": 1.5674, + "step": 3298 + }, + { + "epoch": 0.53, + "learning_rate": 4.92641118296852e-05, + "loss": 1.5788, + "step": 3299 + }, + { + "epoch": 0.53, + "learning_rate": 4.9263490354261745e-05, + "loss": 1.5514, + "step": 3300 + }, + { + "epoch": 0.53, + "learning_rate": 4.926286862044685e-05, + "loss": 1.625, + "step": 3301 + }, + { + "epoch": 0.53, + "learning_rate": 4.926224662824713e-05, + "loss": 1.5451, + "step": 3302 + }, + { + "epoch": 0.53, + "learning_rate": 4.926162437766922e-05, + "loss": 1.5627, + "step": 3303 + }, + { + "epoch": 0.53, + "learning_rate": 4.926100186871974e-05, + "loss": 1.5384, + "step": 3304 + }, + { + "epoch": 0.53, + "learning_rate": 4.926037910140531e-05, + "loss": 1.5534, + "step": 3305 + }, + { + "epoch": 0.53, + "learning_rate": 4.925975607573258e-05, + "loss": 1.5513, + "step": 3306 + }, + { + "epoch": 0.53, + "learning_rate": 4.9259132791708175e-05, + "loss": 1.5731, + "step": 3307 + }, + { + "epoch": 0.53, + "learning_rate": 4.9258509249338724e-05, + "loss": 1.529, + "step": 3308 + }, + { + "epoch": 0.53, + "learning_rate": 4.925788544863089e-05, + "loss": 1.4688, + "step": 3309 + }, + { + "epoch": 0.53, + "learning_rate": 4.92572613895913e-05, + "loss": 1.4945, + "step": 3310 + }, + { + "epoch": 0.53, + "learning_rate": 4.9256637072226594e-05, + "loss": 1.492, + "step": 3311 + }, + { + "epoch": 0.53, + "learning_rate": 4.925601249654345e-05, + "loss": 1.5124, + "step": 3312 + }, + { + "epoch": 0.53, + "learning_rate": 4.925538766254848e-05, + "loss": 1.5085, + "step": 3313 + }, + { + "epoch": 0.53, + "learning_rate": 4.925476257024836e-05, + "loss": 1.5173, + "step": 3314 + }, + { + "epoch": 0.53, + "learning_rate": 4.925413721964975e-05, + "loss": 1.5592, + "step": 3315 + }, + { + "epoch": 0.53, + "learning_rate": 4.925351161075931e-05, + "loss": 1.507, + "step": 3316 + }, + { + "epoch": 0.53, + "learning_rate": 4.925288574358368e-05, + "loss": 1.4984, + "step": 3317 + }, + { + "epoch": 0.53, + "learning_rate": 4.925225961812955e-05, + "loss": 1.6012, + "step": 3318 + }, + { + "epoch": 0.53, + "learning_rate": 4.925163323440358e-05, + "loss": 1.52, + "step": 3319 + }, + { + "epoch": 0.53, + "learning_rate": 4.925100659241243e-05, + "loss": 1.5968, + "step": 3320 + }, + { + "epoch": 0.53, + "learning_rate": 4.925037969216279e-05, + "loss": 1.4461, + "step": 3321 + }, + { + "epoch": 0.53, + "learning_rate": 4.924975253366133e-05, + "loss": 1.5614, + "step": 3322 + }, + { + "epoch": 0.53, + "learning_rate": 4.924912511691473e-05, + "loss": 1.5441, + "step": 3323 + }, + { + "epoch": 0.53, + "learning_rate": 4.924849744192966e-05, + "loss": 1.6478, + "step": 3324 + }, + { + "epoch": 0.53, + "learning_rate": 4.924786950871283e-05, + "loss": 1.5563, + "step": 3325 + }, + { + "epoch": 0.53, + "learning_rate": 4.92472413172709e-05, + "loss": 1.6322, + "step": 3326 + }, + { + "epoch": 0.53, + "learning_rate": 4.924661286761057e-05, + "loss": 1.4868, + "step": 3327 + }, + { + "epoch": 0.53, + "learning_rate": 4.9245984159738534e-05, + "loss": 1.4828, + "step": 3328 + }, + { + "epoch": 0.53, + "learning_rate": 4.924535519366149e-05, + "loss": 1.6361, + "step": 3329 + }, + { + "epoch": 0.53, + "learning_rate": 4.924472596938613e-05, + "loss": 1.5874, + "step": 3330 + }, + { + "epoch": 0.53, + "learning_rate": 4.924409648691916e-05, + "loss": 1.5594, + "step": 3331 + }, + { + "epoch": 0.53, + "learning_rate": 4.924346674626728e-05, + "loss": 1.5415, + "step": 3332 + }, + { + "epoch": 0.53, + "learning_rate": 4.92428367474372e-05, + "loss": 1.5247, + "step": 3333 + }, + { + "epoch": 0.53, + "learning_rate": 4.9242206490435625e-05, + "loss": 1.5037, + "step": 3334 + }, + { + "epoch": 0.53, + "learning_rate": 4.9241575975269275e-05, + "loss": 1.5267, + "step": 3335 + }, + { + "epoch": 0.53, + "learning_rate": 4.9240945201944846e-05, + "loss": 1.5939, + "step": 3336 + }, + { + "epoch": 0.53, + "learning_rate": 4.924031417046907e-05, + "loss": 1.547, + "step": 3337 + }, + { + "epoch": 0.53, + "learning_rate": 4.923968288084867e-05, + "loss": 1.52, + "step": 3338 + }, + { + "epoch": 0.53, + "learning_rate": 4.923905133309036e-05, + "loss": 1.5458, + "step": 3339 + }, + { + "epoch": 0.53, + "learning_rate": 4.9238419527200873e-05, + "loss": 1.5132, + "step": 3340 + }, + { + "epoch": 0.53, + "learning_rate": 4.923778746318693e-05, + "loss": 1.4404, + "step": 3341 + }, + { + "epoch": 0.53, + "learning_rate": 4.923715514105527e-05, + "loss": 1.5262, + "step": 3342 + }, + { + "epoch": 0.53, + "learning_rate": 4.923652256081262e-05, + "loss": 1.6297, + "step": 3343 + }, + { + "epoch": 0.53, + "learning_rate": 4.9235889722465725e-05, + "loss": 1.599, + "step": 3344 + }, + { + "epoch": 0.53, + "learning_rate": 4.923525662602131e-05, + "loss": 1.5557, + "step": 3345 + }, + { + "epoch": 0.53, + "learning_rate": 4.923462327148612e-05, + "loss": 1.5073, + "step": 3346 + }, + { + "epoch": 0.53, + "learning_rate": 4.923398965886692e-05, + "loss": 1.5552, + "step": 3347 + }, + { + "epoch": 0.53, + "learning_rate": 4.923335578817043e-05, + "loss": 1.4857, + "step": 3348 + }, + { + "epoch": 0.53, + "learning_rate": 4.9232721659403424e-05, + "loss": 1.5343, + "step": 3349 + }, + { + "epoch": 0.53, + "learning_rate": 4.923208727257264e-05, + "loss": 1.4774, + "step": 3350 + }, + { + "epoch": 0.53, + "learning_rate": 4.9231452627684834e-05, + "loss": 1.506, + "step": 3351 + }, + { + "epoch": 0.53, + "learning_rate": 4.923081772474678e-05, + "loss": 1.5103, + "step": 3352 + }, + { + "epoch": 0.53, + "learning_rate": 4.923018256376521e-05, + "loss": 1.5023, + "step": 3353 + }, + { + "epoch": 0.53, + "learning_rate": 4.922954714474692e-05, + "loss": 1.5645, + "step": 3354 + }, + { + "epoch": 0.53, + "learning_rate": 4.922891146769867e-05, + "loss": 1.5062, + "step": 3355 + }, + { + "epoch": 0.53, + "learning_rate": 4.922827553262721e-05, + "loss": 1.5674, + "step": 3356 + }, + { + "epoch": 0.53, + "learning_rate": 4.922763933953932e-05, + "loss": 1.5088, + "step": 3357 + }, + { + "epoch": 0.53, + "learning_rate": 4.922700288844179e-05, + "loss": 1.4331, + "step": 3358 + }, + { + "epoch": 0.54, + "learning_rate": 4.9226366179341385e-05, + "loss": 1.571, + "step": 3359 + }, + { + "epoch": 0.54, + "learning_rate": 4.922572921224489e-05, + "loss": 1.5384, + "step": 3360 + }, + { + "epoch": 0.54, + "learning_rate": 4.922509198715909e-05, + "loss": 1.6045, + "step": 3361 + }, + { + "epoch": 0.54, + "learning_rate": 4.9224454504090764e-05, + "loss": 1.6408, + "step": 3362 + }, + { + "epoch": 0.54, + "learning_rate": 4.92238167630467e-05, + "loss": 1.4928, + "step": 3363 + }, + { + "epoch": 0.54, + "learning_rate": 4.922317876403369e-05, + "loss": 1.5474, + "step": 3364 + }, + { + "epoch": 0.54, + "learning_rate": 4.922254050705855e-05, + "loss": 1.5715, + "step": 3365 + }, + { + "epoch": 0.54, + "learning_rate": 4.922190199212805e-05, + "loss": 1.5085, + "step": 3366 + }, + { + "epoch": 0.54, + "learning_rate": 4.9221263219249e-05, + "loss": 1.5037, + "step": 3367 + }, + { + "epoch": 0.54, + "learning_rate": 4.92206241884282e-05, + "loss": 1.507, + "step": 3368 + }, + { + "epoch": 0.54, + "learning_rate": 4.9219984899672456e-05, + "loss": 1.5129, + "step": 3369 + }, + { + "epoch": 0.54, + "learning_rate": 4.9219345352988575e-05, + "loss": 1.5298, + "step": 3370 + }, + { + "epoch": 0.54, + "learning_rate": 4.9218705548383385e-05, + "loss": 1.6235, + "step": 3371 + }, + { + "epoch": 0.54, + "learning_rate": 4.921806548586367e-05, + "loss": 1.5369, + "step": 3372 + }, + { + "epoch": 0.54, + "learning_rate": 4.921742516543626e-05, + "loss": 1.5625, + "step": 3373 + }, + { + "epoch": 0.54, + "learning_rate": 4.921678458710798e-05, + "loss": 1.5526, + "step": 3374 + }, + { + "epoch": 0.54, + "learning_rate": 4.921614375088564e-05, + "loss": 1.5098, + "step": 3375 + }, + { + "epoch": 0.54, + "learning_rate": 4.921550265677608e-05, + "loss": 1.4622, + "step": 3376 + }, + { + "epoch": 0.54, + "learning_rate": 4.921486130478612e-05, + "loss": 1.4994, + "step": 3377 + }, + { + "epoch": 0.54, + "learning_rate": 4.921421969492258e-05, + "loss": 1.5166, + "step": 3378 + }, + { + "epoch": 0.54, + "learning_rate": 4.92135778271923e-05, + "loss": 1.5304, + "step": 3379 + }, + { + "epoch": 0.54, + "learning_rate": 4.921293570160213e-05, + "loss": 1.535, + "step": 3380 + }, + { + "epoch": 0.54, + "learning_rate": 4.921229331815888e-05, + "loss": 1.5261, + "step": 3381 + }, + { + "epoch": 0.54, + "learning_rate": 4.9211650676869414e-05, + "loss": 1.6126, + "step": 3382 + }, + { + "epoch": 0.54, + "learning_rate": 4.9211007777740556e-05, + "loss": 1.4762, + "step": 3383 + }, + { + "epoch": 0.54, + "learning_rate": 4.921036462077918e-05, + "loss": 1.5283, + "step": 3384 + }, + { + "epoch": 0.54, + "learning_rate": 4.920972120599211e-05, + "loss": 1.612, + "step": 3385 + }, + { + "epoch": 0.54, + "learning_rate": 4.920907753338621e-05, + "loss": 1.4819, + "step": 3386 + }, + { + "epoch": 0.54, + "learning_rate": 4.920843360296833e-05, + "loss": 1.4483, + "step": 3387 + }, + { + "epoch": 0.54, + "learning_rate": 4.920778941474533e-05, + "loss": 1.5046, + "step": 3388 + }, + { + "epoch": 0.54, + "learning_rate": 4.9207144968724076e-05, + "loss": 1.5072, + "step": 3389 + }, + { + "epoch": 0.54, + "learning_rate": 4.9206500264911415e-05, + "loss": 1.4483, + "step": 3390 + }, + { + "epoch": 0.54, + "learning_rate": 4.920585530331423e-05, + "loss": 1.5073, + "step": 3391 + }, + { + "epoch": 0.54, + "learning_rate": 4.9205210083939374e-05, + "loss": 1.5892, + "step": 3392 + }, + { + "epoch": 0.54, + "learning_rate": 4.920456460679373e-05, + "loss": 1.5374, + "step": 3393 + }, + { + "epoch": 0.54, + "learning_rate": 4.920391887188417e-05, + "loss": 1.5423, + "step": 3394 + }, + { + "epoch": 0.54, + "learning_rate": 4.9203272879217564e-05, + "loss": 1.5308, + "step": 3395 + }, + { + "epoch": 0.54, + "learning_rate": 4.92026266288008e-05, + "loss": 1.6107, + "step": 3396 + }, + { + "epoch": 0.54, + "learning_rate": 4.9201980120640754e-05, + "loss": 1.4393, + "step": 3397 + }, + { + "epoch": 0.54, + "learning_rate": 4.920133335474432e-05, + "loss": 1.5793, + "step": 3398 + }, + { + "epoch": 0.54, + "learning_rate": 4.920068633111836e-05, + "loss": 1.5435, + "step": 3399 + }, + { + "epoch": 0.54, + "learning_rate": 4.9200039049769796e-05, + "loss": 1.5313, + "step": 3400 + }, + { + "epoch": 0.54, + "learning_rate": 4.9199391510705516e-05, + "loss": 1.4945, + "step": 3401 + }, + { + "epoch": 0.54, + "learning_rate": 4.91987437139324e-05, + "loss": 1.589, + "step": 3402 + }, + { + "epoch": 0.54, + "learning_rate": 4.919809565945736e-05, + "loss": 1.4881, + "step": 3403 + }, + { + "epoch": 0.54, + "learning_rate": 4.9197447347287275e-05, + "loss": 1.479, + "step": 3404 + }, + { + "epoch": 0.54, + "learning_rate": 4.919679877742908e-05, + "loss": 1.4608, + "step": 3405 + }, + { + "epoch": 0.54, + "learning_rate": 4.919614994988967e-05, + "loss": 1.4741, + "step": 3406 + }, + { + "epoch": 0.54, + "learning_rate": 4.9195500864675946e-05, + "loss": 1.5188, + "step": 3407 + }, + { + "epoch": 0.54, + "learning_rate": 4.919485152179484e-05, + "loss": 1.4735, + "step": 3408 + }, + { + "epoch": 0.54, + "learning_rate": 4.919420192125324e-05, + "loss": 1.5625, + "step": 3409 + }, + { + "epoch": 0.54, + "learning_rate": 4.919355206305808e-05, + "loss": 1.5125, + "step": 3410 + }, + { + "epoch": 0.54, + "learning_rate": 4.919290194721629e-05, + "loss": 1.6034, + "step": 3411 + }, + { + "epoch": 0.54, + "learning_rate": 4.919225157373477e-05, + "loss": 1.5791, + "step": 3412 + }, + { + "epoch": 0.54, + "learning_rate": 4.9191600942620465e-05, + "loss": 1.5412, + "step": 3413 + }, + { + "epoch": 0.54, + "learning_rate": 4.9190950053880295e-05, + "loss": 1.5335, + "step": 3414 + }, + { + "epoch": 0.54, + "learning_rate": 4.919029890752119e-05, + "loss": 1.5023, + "step": 3415 + }, + { + "epoch": 0.54, + "learning_rate": 4.91896475035501e-05, + "loss": 1.4815, + "step": 3416 + }, + { + "epoch": 0.54, + "learning_rate": 4.918899584197394e-05, + "loss": 1.5173, + "step": 3417 + }, + { + "epoch": 0.54, + "learning_rate": 4.9188343922799654e-05, + "loss": 1.5485, + "step": 3418 + }, + { + "epoch": 0.54, + "learning_rate": 4.91876917460342e-05, + "loss": 1.5425, + "step": 3419 + }, + { + "epoch": 0.54, + "learning_rate": 4.9187039311684504e-05, + "loss": 1.5384, + "step": 3420 + }, + { + "epoch": 0.55, + "learning_rate": 4.918638661975754e-05, + "loss": 1.5319, + "step": 3421 + }, + { + "epoch": 0.55, + "learning_rate": 4.918573367026022e-05, + "loss": 1.4564, + "step": 3422 + }, + { + "epoch": 0.55, + "learning_rate": 4.918508046319954e-05, + "loss": 1.6089, + "step": 3423 + }, + { + "epoch": 0.55, + "learning_rate": 4.9184426998582436e-05, + "loss": 1.4982, + "step": 3424 + }, + { + "epoch": 0.55, + "learning_rate": 4.9183773276415854e-05, + "loss": 1.5503, + "step": 3425 + }, + { + "epoch": 0.55, + "learning_rate": 4.918311929670678e-05, + "loss": 1.5054, + "step": 3426 + }, + { + "epoch": 0.55, + "learning_rate": 4.9182465059462156e-05, + "loss": 1.5584, + "step": 3427 + }, + { + "epoch": 0.55, + "learning_rate": 4.9181810564688966e-05, + "loss": 1.5595, + "step": 3428 + }, + { + "epoch": 0.55, + "learning_rate": 4.918115581239418e-05, + "loss": 1.5015, + "step": 3429 + }, + { + "epoch": 0.55, + "learning_rate": 4.918050080258476e-05, + "loss": 1.5267, + "step": 3430 + }, + { + "epoch": 0.55, + "learning_rate": 4.917984553526769e-05, + "loss": 1.5426, + "step": 3431 + }, + { + "epoch": 0.55, + "learning_rate": 4.917919001044994e-05, + "loss": 1.5684, + "step": 3432 + }, + { + "epoch": 0.55, + "learning_rate": 4.91785342281385e-05, + "loss": 1.5381, + "step": 3433 + }, + { + "epoch": 0.55, + "learning_rate": 4.9177878188340346e-05, + "loss": 1.5814, + "step": 3434 + }, + { + "epoch": 0.55, + "learning_rate": 4.917722189106248e-05, + "loss": 1.5304, + "step": 3435 + }, + { + "epoch": 0.55, + "learning_rate": 4.917656533631187e-05, + "loss": 1.4631, + "step": 3436 + }, + { + "epoch": 0.55, + "learning_rate": 4.917590852409551e-05, + "loss": 1.5413, + "step": 3437 + }, + { + "epoch": 0.55, + "learning_rate": 4.9175251454420406e-05, + "loss": 1.5506, + "step": 3438 + }, + { + "epoch": 0.55, + "learning_rate": 4.917459412729355e-05, + "loss": 1.5275, + "step": 3439 + }, + { + "epoch": 0.55, + "learning_rate": 4.917393654272195e-05, + "loss": 1.4969, + "step": 3440 + }, + { + "epoch": 0.55, + "learning_rate": 4.91732787007126e-05, + "loss": 1.5723, + "step": 3441 + }, + { + "epoch": 0.55, + "learning_rate": 4.917262060127251e-05, + "loss": 1.5217, + "step": 3442 + }, + { + "epoch": 0.55, + "learning_rate": 4.917196224440868e-05, + "loss": 1.5122, + "step": 3443 + }, + { + "epoch": 0.55, + "learning_rate": 4.917130363012813e-05, + "loss": 1.5356, + "step": 3444 + }, + { + "epoch": 0.55, + "learning_rate": 4.9170644758437875e-05, + "loss": 1.5356, + "step": 3445 + }, + { + "epoch": 0.55, + "learning_rate": 4.916998562934493e-05, + "loss": 1.5568, + "step": 3446 + }, + { + "epoch": 0.55, + "learning_rate": 4.91693262428563e-05, + "loss": 1.5573, + "step": 3447 + }, + { + "epoch": 0.55, + "learning_rate": 4.916866659897903e-05, + "loss": 1.486, + "step": 3448 + }, + { + "epoch": 0.55, + "learning_rate": 4.9168006697720125e-05, + "loss": 1.5049, + "step": 3449 + }, + { + "epoch": 0.55, + "learning_rate": 4.916734653908662e-05, + "loss": 1.5322, + "step": 3450 + }, + { + "epoch": 0.55, + "learning_rate": 4.9166686123085556e-05, + "loss": 1.5798, + "step": 3451 + }, + { + "epoch": 0.55, + "learning_rate": 4.916602544972395e-05, + "loss": 1.5855, + "step": 3452 + }, + { + "epoch": 0.55, + "learning_rate": 4.916536451900885e-05, + "loss": 1.533, + "step": 3453 + }, + { + "epoch": 0.55, + "learning_rate": 4.9164703330947294e-05, + "loss": 1.4865, + "step": 3454 + }, + { + "epoch": 0.55, + "learning_rate": 4.9164041885546307e-05, + "loss": 1.5885, + "step": 3455 + }, + { + "epoch": 0.55, + "learning_rate": 4.916338018281295e-05, + "loss": 1.542, + "step": 3456 + }, + { + "epoch": 0.55, + "learning_rate": 4.9162718222754255e-05, + "loss": 1.52, + "step": 3457 + }, + { + "epoch": 0.55, + "learning_rate": 4.916205600537729e-05, + "loss": 1.5832, + "step": 3458 + }, + { + "epoch": 0.55, + "learning_rate": 4.91613935306891e-05, + "loss": 1.5547, + "step": 3459 + }, + { + "epoch": 0.55, + "learning_rate": 4.916073079869673e-05, + "loss": 1.5356, + "step": 3460 + }, + { + "epoch": 0.55, + "learning_rate": 4.916006780940725e-05, + "loss": 1.5171, + "step": 3461 + }, + { + "epoch": 0.55, + "learning_rate": 4.915940456282772e-05, + "loss": 1.5783, + "step": 3462 + }, + { + "epoch": 0.55, + "learning_rate": 4.915874105896519e-05, + "loss": 1.5773, + "step": 3463 + }, + { + "epoch": 0.55, + "learning_rate": 4.9158077297826735e-05, + "loss": 1.5609, + "step": 3464 + }, + { + "epoch": 0.55, + "learning_rate": 4.9157413279419425e-05, + "loss": 1.5033, + "step": 3465 + }, + { + "epoch": 0.55, + "learning_rate": 4.915674900375033e-05, + "loss": 1.5369, + "step": 3466 + }, + { + "epoch": 0.55, + "learning_rate": 4.915608447082652e-05, + "loss": 1.5828, + "step": 3467 + }, + { + "epoch": 0.55, + "learning_rate": 4.915541968065509e-05, + "loss": 1.5776, + "step": 3468 + }, + { + "epoch": 0.55, + "learning_rate": 4.915475463324309e-05, + "loss": 1.4852, + "step": 3469 + }, + { + "epoch": 0.55, + "learning_rate": 4.915408932859763e-05, + "loss": 1.5244, + "step": 3470 + }, + { + "epoch": 0.55, + "learning_rate": 4.915342376672577e-05, + "loss": 1.5239, + "step": 3471 + }, + { + "epoch": 0.55, + "learning_rate": 4.915275794763462e-05, + "loss": 1.5402, + "step": 3472 + }, + { + "epoch": 0.55, + "learning_rate": 4.915209187133126e-05, + "loss": 1.6081, + "step": 3473 + }, + { + "epoch": 0.55, + "learning_rate": 4.9151425537822784e-05, + "loss": 1.5529, + "step": 3474 + }, + { + "epoch": 0.55, + "learning_rate": 4.9150758947116296e-05, + "loss": 1.498, + "step": 3475 + }, + { + "epoch": 0.55, + "learning_rate": 4.915009209921888e-05, + "loss": 1.5186, + "step": 3476 + }, + { + "epoch": 0.55, + "learning_rate": 4.914942499413765e-05, + "loss": 1.5313, + "step": 3477 + }, + { + "epoch": 0.55, + "learning_rate": 4.9148757631879696e-05, + "loss": 1.5418, + "step": 3478 + }, + { + "epoch": 0.55, + "learning_rate": 4.9148090012452134e-05, + "loss": 1.5272, + "step": 3479 + }, + { + "epoch": 0.55, + "learning_rate": 4.914742213586208e-05, + "loss": 1.5469, + "step": 3480 + }, + { + "epoch": 0.55, + "learning_rate": 4.914675400211664e-05, + "loss": 1.4826, + "step": 3481 + }, + { + "epoch": 0.55, + "learning_rate": 4.914608561122293e-05, + "loss": 1.5008, + "step": 3482 + }, + { + "epoch": 0.55, + "learning_rate": 4.914541696318807e-05, + "loss": 1.5101, + "step": 3483 + }, + { + "epoch": 0.56, + "learning_rate": 4.914474805801918e-05, + "loss": 1.4445, + "step": 3484 + }, + { + "epoch": 0.56, + "learning_rate": 4.9144078895723365e-05, + "loss": 1.4878, + "step": 3485 + }, + { + "epoch": 0.56, + "learning_rate": 4.914340947630779e-05, + "loss": 1.5212, + "step": 3486 + }, + { + "epoch": 0.56, + "learning_rate": 4.914273979977955e-05, + "loss": 1.5343, + "step": 3487 + }, + { + "epoch": 0.56, + "learning_rate": 4.914206986614579e-05, + "loss": 1.4775, + "step": 3488 + }, + { + "epoch": 0.56, + "learning_rate": 4.914139967541364e-05, + "loss": 1.5422, + "step": 3489 + }, + { + "epoch": 0.56, + "learning_rate": 4.9140729227590246e-05, + "loss": 1.5081, + "step": 3490 + }, + { + "epoch": 0.56, + "learning_rate": 4.914005852268274e-05, + "loss": 1.4535, + "step": 3491 + }, + { + "epoch": 0.56, + "learning_rate": 4.913938756069826e-05, + "loss": 1.4565, + "step": 3492 + }, + { + "epoch": 0.56, + "learning_rate": 4.913871634164397e-05, + "loss": 1.4896, + "step": 3493 + }, + { + "epoch": 0.56, + "learning_rate": 4.9138044865527e-05, + "loss": 1.5285, + "step": 3494 + }, + { + "epoch": 0.56, + "learning_rate": 4.91373731323545e-05, + "loss": 1.4751, + "step": 3495 + }, + { + "epoch": 0.56, + "learning_rate": 4.913670114213364e-05, + "loss": 1.4396, + "step": 3496 + }, + { + "epoch": 0.56, + "learning_rate": 4.913602889487157e-05, + "loss": 1.5817, + "step": 3497 + }, + { + "epoch": 0.56, + "learning_rate": 4.913535639057544e-05, + "loss": 1.5483, + "step": 3498 + }, + { + "epoch": 0.56, + "learning_rate": 4.913468362925241e-05, + "loss": 1.627, + "step": 3499 + }, + { + "epoch": 0.56, + "learning_rate": 4.913401061090966e-05, + "loss": 1.4071, + "step": 3500 + }, + { + "epoch": 0.56, + "learning_rate": 4.9133337335554346e-05, + "loss": 1.4924, + "step": 3501 + }, + { + "epoch": 0.56, + "learning_rate": 4.913266380319364e-05, + "loss": 1.6407, + "step": 3502 + }, + { + "epoch": 0.56, + "learning_rate": 4.913199001383472e-05, + "loss": 1.5736, + "step": 3503 + }, + { + "epoch": 0.56, + "learning_rate": 4.913131596748476e-05, + "loss": 1.5745, + "step": 3504 + }, + { + "epoch": 0.56, + "learning_rate": 4.913064166415093e-05, + "loss": 1.5022, + "step": 3505 + }, + { + "epoch": 0.56, + "learning_rate": 4.9129967103840414e-05, + "loss": 1.5531, + "step": 3506 + }, + { + "epoch": 0.56, + "learning_rate": 4.91292922865604e-05, + "loss": 1.535, + "step": 3507 + }, + { + "epoch": 0.56, + "learning_rate": 4.9128617212318074e-05, + "loss": 1.5233, + "step": 3508 + }, + { + "epoch": 0.56, + "learning_rate": 4.912794188112063e-05, + "loss": 1.4761, + "step": 3509 + }, + { + "epoch": 0.56, + "learning_rate": 4.912726629297524e-05, + "loss": 1.5236, + "step": 3510 + }, + { + "epoch": 0.56, + "learning_rate": 4.912659044788912e-05, + "loss": 1.5457, + "step": 3511 + }, + { + "epoch": 0.56, + "learning_rate": 4.912591434586946e-05, + "loss": 1.5179, + "step": 3512 + }, + { + "epoch": 0.56, + "learning_rate": 4.912523798692346e-05, + "loss": 1.5516, + "step": 3513 + }, + { + "epoch": 0.56, + "learning_rate": 4.912456137105832e-05, + "loss": 1.5246, + "step": 3514 + }, + { + "epoch": 0.56, + "learning_rate": 4.912388449828124e-05, + "loss": 1.5552, + "step": 3515 + }, + { + "epoch": 0.56, + "learning_rate": 4.912320736859944e-05, + "loss": 1.6052, + "step": 3516 + }, + { + "epoch": 0.56, + "learning_rate": 4.912252998202014e-05, + "loss": 1.5368, + "step": 3517 + }, + { + "epoch": 0.56, + "learning_rate": 4.912185233855053e-05, + "loss": 1.4419, + "step": 3518 + }, + { + "epoch": 0.56, + "learning_rate": 4.9121174438197834e-05, + "loss": 1.5285, + "step": 3519 + }, + { + "epoch": 0.56, + "learning_rate": 4.9120496280969286e-05, + "loss": 1.4937, + "step": 3520 + }, + { + "epoch": 0.56, + "learning_rate": 4.911981786687209e-05, + "loss": 1.4883, + "step": 3521 + }, + { + "epoch": 0.56, + "learning_rate": 4.911913919591347e-05, + "loss": 1.5597, + "step": 3522 + }, + { + "epoch": 0.56, + "learning_rate": 4.911846026810068e-05, + "loss": 1.4826, + "step": 3523 + }, + { + "epoch": 0.56, + "learning_rate": 4.9117781083440905e-05, + "loss": 1.451, + "step": 3524 + }, + { + "epoch": 0.56, + "learning_rate": 4.911710164194142e-05, + "loss": 1.5635, + "step": 3525 + }, + { + "epoch": 0.56, + "learning_rate": 4.911642194360944e-05, + "loss": 1.5793, + "step": 3526 + }, + { + "epoch": 0.56, + "learning_rate": 4.911574198845221e-05, + "loss": 1.5038, + "step": 3527 + }, + { + "epoch": 0.56, + "learning_rate": 4.9115061776476966e-05, + "loss": 1.5381, + "step": 3528 + }, + { + "epoch": 0.56, + "learning_rate": 4.911438130769096e-05, + "loss": 1.4762, + "step": 3529 + }, + { + "epoch": 0.56, + "learning_rate": 4.911370058210143e-05, + "loss": 1.516, + "step": 3530 + }, + { + "epoch": 0.56, + "learning_rate": 4.9113019599715624e-05, + "loss": 1.5093, + "step": 3531 + }, + { + "epoch": 0.56, + "learning_rate": 4.911233836054081e-05, + "loss": 1.5037, + "step": 3532 + }, + { + "epoch": 0.56, + "learning_rate": 4.911165686458422e-05, + "loss": 1.5702, + "step": 3533 + }, + { + "epoch": 0.56, + "learning_rate": 4.911097511185313e-05, + "loss": 1.5169, + "step": 3534 + }, + { + "epoch": 0.56, + "learning_rate": 4.9110293102354794e-05, + "loss": 1.6195, + "step": 3535 + }, + { + "epoch": 0.56, + "learning_rate": 4.9109610836096473e-05, + "loss": 1.5609, + "step": 3536 + }, + { + "epoch": 0.56, + "learning_rate": 4.910892831308543e-05, + "loss": 1.5077, + "step": 3537 + }, + { + "epoch": 0.56, + "learning_rate": 4.910824553332894e-05, + "loss": 1.4558, + "step": 3538 + }, + { + "epoch": 0.56, + "learning_rate": 4.910756249683427e-05, + "loss": 1.4668, + "step": 3539 + }, + { + "epoch": 0.56, + "learning_rate": 4.9106879203608694e-05, + "loss": 1.4867, + "step": 3540 + }, + { + "epoch": 0.56, + "learning_rate": 4.910619565365949e-05, + "loss": 1.6364, + "step": 3541 + }, + { + "epoch": 0.56, + "learning_rate": 4.9105511846993946e-05, + "loss": 1.5658, + "step": 3542 + }, + { + "epoch": 0.56, + "learning_rate": 4.910482778361932e-05, + "loss": 1.5007, + "step": 3543 + }, + { + "epoch": 0.56, + "learning_rate": 4.910414346354293e-05, + "loss": 1.5755, + "step": 3544 + }, + { + "epoch": 0.56, + "learning_rate": 4.9103458886772033e-05, + "loss": 1.5286, + "step": 3545 + }, + { + "epoch": 0.56, + "learning_rate": 4.910277405331394e-05, + "loss": 1.5584, + "step": 3546 + }, + { + "epoch": 0.57, + "learning_rate": 4.9102088963175936e-05, + "loss": 1.4797, + "step": 3547 + }, + { + "epoch": 0.57, + "learning_rate": 4.9101403616365315e-05, + "loss": 1.5579, + "step": 3548 + }, + { + "epoch": 0.57, + "learning_rate": 4.910071801288938e-05, + "loss": 1.5496, + "step": 3549 + }, + { + "epoch": 0.57, + "learning_rate": 4.910003215275543e-05, + "loss": 1.5431, + "step": 3550 + }, + { + "epoch": 0.57, + "learning_rate": 4.9099346035970764e-05, + "loss": 1.5327, + "step": 3551 + }, + { + "epoch": 0.57, + "learning_rate": 4.909865966254269e-05, + "loss": 1.5407, + "step": 3552 + }, + { + "epoch": 0.57, + "learning_rate": 4.909797303247853e-05, + "loss": 1.5658, + "step": 3553 + }, + { + "epoch": 0.57, + "learning_rate": 4.9097286145785595e-05, + "loss": 1.5438, + "step": 3554 + }, + { + "epoch": 0.57, + "learning_rate": 4.909659900247118e-05, + "loss": 1.5815, + "step": 3555 + }, + { + "epoch": 0.57, + "learning_rate": 4.9095911602542615e-05, + "loss": 1.5632, + "step": 3556 + }, + { + "epoch": 0.57, + "learning_rate": 4.909522394600722e-05, + "loss": 1.526, + "step": 3557 + }, + { + "epoch": 0.57, + "learning_rate": 4.9094536032872326e-05, + "loss": 1.4515, + "step": 3558 + }, + { + "epoch": 0.57, + "learning_rate": 4.909384786314525e-05, + "loss": 1.5732, + "step": 3559 + }, + { + "epoch": 0.57, + "learning_rate": 4.9093159436833316e-05, + "loss": 1.4987, + "step": 3560 + }, + { + "epoch": 0.57, + "learning_rate": 4.9092470753943866e-05, + "loss": 1.53, + "step": 3561 + }, + { + "epoch": 0.57, + "learning_rate": 4.909178181448423e-05, + "loss": 1.4792, + "step": 3562 + }, + { + "epoch": 0.57, + "learning_rate": 4.909109261846174e-05, + "loss": 1.5168, + "step": 3563 + }, + { + "epoch": 0.57, + "learning_rate": 4.909040316588374e-05, + "loss": 1.4813, + "step": 3564 + }, + { + "epoch": 0.57, + "learning_rate": 4.908971345675758e-05, + "loss": 1.486, + "step": 3565 + }, + { + "epoch": 0.57, + "learning_rate": 4.908902349109059e-05, + "loss": 1.4875, + "step": 3566 + }, + { + "epoch": 0.57, + "learning_rate": 4.908833326889012e-05, + "loss": 1.5088, + "step": 3567 + }, + { + "epoch": 0.57, + "learning_rate": 4.908764279016354e-05, + "loss": 1.4977, + "step": 3568 + }, + { + "epoch": 0.57, + "learning_rate": 4.908695205491817e-05, + "loss": 1.5703, + "step": 3569 + }, + { + "epoch": 0.57, + "learning_rate": 4.9086261063161395e-05, + "loss": 1.5921, + "step": 3570 + }, + { + "epoch": 0.57, + "learning_rate": 4.9085569814900554e-05, + "loss": 1.5264, + "step": 3571 + }, + { + "epoch": 0.57, + "learning_rate": 4.9084878310143026e-05, + "loss": 1.5073, + "step": 3572 + }, + { + "epoch": 0.57, + "learning_rate": 4.908418654889617e-05, + "loss": 1.5233, + "step": 3573 + }, + { + "epoch": 0.57, + "learning_rate": 4.908349453116734e-05, + "loss": 1.6053, + "step": 3574 + }, + { + "epoch": 0.57, + "learning_rate": 4.9082802256963913e-05, + "loss": 1.5371, + "step": 3575 + }, + { + "epoch": 0.57, + "learning_rate": 4.908210972629327e-05, + "loss": 1.5112, + "step": 3576 + }, + { + "epoch": 0.57, + "learning_rate": 4.908141693916278e-05, + "loss": 1.547, + "step": 3577 + }, + { + "epoch": 0.57, + "learning_rate": 4.9080723895579816e-05, + "loss": 1.5636, + "step": 3578 + }, + { + "epoch": 0.57, + "learning_rate": 4.9080030595551765e-05, + "loss": 1.5768, + "step": 3579 + }, + { + "epoch": 0.57, + "learning_rate": 4.907933703908601e-05, + "loss": 1.5443, + "step": 3580 + }, + { + "epoch": 0.57, + "learning_rate": 4.907864322618992e-05, + "loss": 1.5518, + "step": 3581 + }, + { + "epoch": 0.57, + "learning_rate": 4.907794915687091e-05, + "loss": 1.554, + "step": 3582 + }, + { + "epoch": 0.57, + "learning_rate": 4.907725483113636e-05, + "loss": 1.5697, + "step": 3583 + }, + { + "epoch": 0.57, + "learning_rate": 4.9076560248993664e-05, + "loss": 1.5334, + "step": 3584 + }, + { + "epoch": 0.57, + "learning_rate": 4.907586541045022e-05, + "loss": 1.5524, + "step": 3585 + }, + { + "epoch": 0.57, + "learning_rate": 4.907517031551342e-05, + "loss": 1.5532, + "step": 3586 + }, + { + "epoch": 0.57, + "learning_rate": 4.907447496419067e-05, + "loss": 1.5763, + "step": 3587 + }, + { + "epoch": 0.57, + "learning_rate": 4.907377935648938e-05, + "loss": 1.5687, + "step": 3588 + }, + { + "epoch": 0.57, + "learning_rate": 4.907308349241696e-05, + "loss": 1.5075, + "step": 3589 + }, + { + "epoch": 0.57, + "learning_rate": 4.907238737198081e-05, + "loss": 1.5633, + "step": 3590 + }, + { + "epoch": 0.57, + "learning_rate": 4.907169099518836e-05, + "loss": 1.5519, + "step": 3591 + }, + { + "epoch": 0.57, + "learning_rate": 4.907099436204701e-05, + "loss": 1.4784, + "step": 3592 + }, + { + "epoch": 0.57, + "learning_rate": 4.9070297472564176e-05, + "loss": 1.5081, + "step": 3593 + }, + { + "epoch": 0.57, + "learning_rate": 4.906960032674729e-05, + "loss": 1.5361, + "step": 3594 + }, + { + "epoch": 0.57, + "learning_rate": 4.9068902924603777e-05, + "loss": 1.5127, + "step": 3595 + }, + { + "epoch": 0.57, + "learning_rate": 4.9068205266141065e-05, + "loss": 1.5282, + "step": 3596 + }, + { + "epoch": 0.57, + "learning_rate": 4.906750735136657e-05, + "loss": 1.5213, + "step": 3597 + }, + { + "epoch": 0.57, + "learning_rate": 4.9066809180287745e-05, + "loss": 1.5578, + "step": 3598 + }, + { + "epoch": 0.57, + "learning_rate": 4.9066110752911996e-05, + "loss": 1.4959, + "step": 3599 + }, + { + "epoch": 0.57, + "learning_rate": 4.90654120692468e-05, + "loss": 1.5269, + "step": 3600 + }, + { + "epoch": 0.57, + "learning_rate": 4.906471312929955e-05, + "loss": 1.518, + "step": 3601 + }, + { + "epoch": 0.57, + "learning_rate": 4.906401393307774e-05, + "loss": 1.4965, + "step": 3602 + }, + { + "epoch": 0.57, + "learning_rate": 4.906331448058878e-05, + "loss": 1.5217, + "step": 3603 + }, + { + "epoch": 0.57, + "learning_rate": 4.906261477184013e-05, + "loss": 1.5037, + "step": 3604 + }, + { + "epoch": 0.57, + "learning_rate": 4.906191480683924e-05, + "loss": 1.5015, + "step": 3605 + }, + { + "epoch": 0.57, + "learning_rate": 4.9061214585593574e-05, + "loss": 1.5508, + "step": 3606 + }, + { + "epoch": 0.57, + "learning_rate": 4.906051410811057e-05, + "loss": 1.5519, + "step": 3607 + }, + { + "epoch": 0.57, + "learning_rate": 4.90598133743977e-05, + "loss": 1.4463, + "step": 3608 + }, + { + "epoch": 0.57, + "learning_rate": 4.905911238446243e-05, + "loss": 1.5931, + "step": 3609 + }, + { + "epoch": 0.58, + "learning_rate": 4.905841113831222e-05, + "loss": 1.5731, + "step": 3610 + }, + { + "epoch": 0.58, + "learning_rate": 4.9057709635954526e-05, + "loss": 1.5576, + "step": 3611 + }, + { + "epoch": 0.58, + "learning_rate": 4.905700787739684e-05, + "loss": 1.4689, + "step": 3612 + }, + { + "epoch": 0.58, + "learning_rate": 4.905630586264663e-05, + "loss": 1.5083, + "step": 3613 + }, + { + "epoch": 0.58, + "learning_rate": 4.9055603591711365e-05, + "loss": 1.5374, + "step": 3614 + }, + { + "epoch": 0.58, + "learning_rate": 4.905490106459853e-05, + "loss": 1.5783, + "step": 3615 + }, + { + "epoch": 0.58, + "learning_rate": 4.905419828131559e-05, + "loss": 1.5194, + "step": 3616 + }, + { + "epoch": 0.58, + "learning_rate": 4.905349524187005e-05, + "loss": 1.5881, + "step": 3617 + }, + { + "epoch": 0.58, + "learning_rate": 4.9052791946269384e-05, + "loss": 1.5259, + "step": 3618 + }, + { + "epoch": 0.58, + "learning_rate": 4.905208839452109e-05, + "loss": 1.596, + "step": 3619 + }, + { + "epoch": 0.58, + "learning_rate": 4.9051384586632656e-05, + "loss": 1.5996, + "step": 3620 + }, + { + "epoch": 0.58, + "learning_rate": 4.9050680522611576e-05, + "loss": 1.5202, + "step": 3621 + }, + { + "epoch": 0.58, + "learning_rate": 4.904997620246535e-05, + "loss": 1.5249, + "step": 3622 + }, + { + "epoch": 0.58, + "learning_rate": 4.904927162620148e-05, + "loss": 1.5275, + "step": 3623 + }, + { + "epoch": 0.58, + "learning_rate": 4.904856679382747e-05, + "loss": 1.5439, + "step": 3624 + }, + { + "epoch": 0.58, + "learning_rate": 4.9047861705350825e-05, + "loss": 1.5007, + "step": 3625 + }, + { + "epoch": 0.58, + "learning_rate": 4.9047156360779054e-05, + "loss": 1.5148, + "step": 3626 + }, + { + "epoch": 0.58, + "learning_rate": 4.904645076011966e-05, + "loss": 1.5177, + "step": 3627 + }, + { + "epoch": 0.58, + "learning_rate": 4.9045744903380165e-05, + "loss": 1.5874, + "step": 3628 + }, + { + "epoch": 0.58, + "learning_rate": 4.904503879056809e-05, + "loss": 1.5011, + "step": 3629 + }, + { + "epoch": 0.58, + "learning_rate": 4.904433242169095e-05, + "loss": 1.5446, + "step": 3630 + }, + { + "epoch": 0.58, + "learning_rate": 4.9043625796756264e-05, + "loss": 1.5475, + "step": 3631 + }, + { + "epoch": 0.58, + "learning_rate": 4.904291891577156e-05, + "loss": 1.5788, + "step": 3632 + }, + { + "epoch": 0.58, + "learning_rate": 4.904221177874437e-05, + "loss": 1.5381, + "step": 3633 + }, + { + "epoch": 0.58, + "learning_rate": 4.904150438568222e-05, + "loss": 1.4889, + "step": 3634 + }, + { + "epoch": 0.58, + "learning_rate": 4.904079673659264e-05, + "loss": 1.5122, + "step": 3635 + }, + { + "epoch": 0.58, + "learning_rate": 4.9040088831483176e-05, + "loss": 1.5029, + "step": 3636 + }, + { + "epoch": 0.58, + "learning_rate": 4.903938067036136e-05, + "loss": 1.5249, + "step": 3637 + }, + { + "epoch": 0.58, + "learning_rate": 4.903867225323473e-05, + "loss": 1.5259, + "step": 3638 + }, + { + "epoch": 0.58, + "learning_rate": 4.903796358011084e-05, + "loss": 1.5146, + "step": 3639 + }, + { + "epoch": 0.58, + "learning_rate": 4.9037254650997234e-05, + "loss": 1.481, + "step": 3640 + }, + { + "epoch": 0.58, + "learning_rate": 4.9036545465901454e-05, + "loss": 1.4909, + "step": 3641 + }, + { + "epoch": 0.58, + "learning_rate": 4.903583602483106e-05, + "loss": 1.5145, + "step": 3642 + }, + { + "epoch": 0.58, + "learning_rate": 4.9035126327793604e-05, + "loss": 1.487, + "step": 3643 + }, + { + "epoch": 0.58, + "learning_rate": 4.9034416374796646e-05, + "loss": 1.5736, + "step": 3644 + }, + { + "epoch": 0.58, + "learning_rate": 4.903370616584774e-05, + "loss": 1.5568, + "step": 3645 + }, + { + "epoch": 0.58, + "learning_rate": 4.9032995700954454e-05, + "loss": 1.5689, + "step": 3646 + }, + { + "epoch": 0.58, + "learning_rate": 4.9032284980124366e-05, + "loss": 1.4702, + "step": 3647 + }, + { + "epoch": 0.58, + "learning_rate": 4.903157400336502e-05, + "loss": 1.599, + "step": 3648 + }, + { + "epoch": 0.58, + "learning_rate": 4.903086277068401e-05, + "loss": 1.5005, + "step": 3649 + }, + { + "epoch": 0.58, + "learning_rate": 4.9030151282088896e-05, + "loss": 1.4967, + "step": 3650 + }, + { + "epoch": 0.58, + "learning_rate": 4.902943953758726e-05, + "loss": 1.5596, + "step": 3651 + }, + { + "epoch": 0.58, + "learning_rate": 4.902872753718668e-05, + "loss": 1.5575, + "step": 3652 + }, + { + "epoch": 0.58, + "learning_rate": 4.9028015280894746e-05, + "loss": 1.5407, + "step": 3653 + }, + { + "epoch": 0.58, + "learning_rate": 4.902730276871904e-05, + "loss": 1.5049, + "step": 3654 + }, + { + "epoch": 0.58, + "learning_rate": 4.902659000066714e-05, + "loss": 1.4945, + "step": 3655 + }, + { + "epoch": 0.58, + "learning_rate": 4.902587697674664e-05, + "loss": 1.5104, + "step": 3656 + }, + { + "epoch": 0.58, + "learning_rate": 4.902516369696515e-05, + "loss": 1.5731, + "step": 3657 + }, + { + "epoch": 0.58, + "learning_rate": 4.902445016133024e-05, + "loss": 1.5798, + "step": 3658 + }, + { + "epoch": 0.58, + "learning_rate": 4.9023736369849525e-05, + "loss": 1.5604, + "step": 3659 + }, + { + "epoch": 0.58, + "learning_rate": 4.90230223225306e-05, + "loss": 1.499, + "step": 3660 + }, + { + "epoch": 0.58, + "learning_rate": 4.902230801938108e-05, + "loss": 1.528, + "step": 3661 + }, + { + "epoch": 0.58, + "learning_rate": 4.902159346040855e-05, + "loss": 1.5479, + "step": 3662 + }, + { + "epoch": 0.58, + "learning_rate": 4.902087864562065e-05, + "loss": 1.5635, + "step": 3663 + }, + { + "epoch": 0.58, + "learning_rate": 4.9020163575024966e-05, + "loss": 1.5122, + "step": 3664 + }, + { + "epoch": 0.58, + "learning_rate": 4.9019448248629125e-05, + "loss": 1.5026, + "step": 3665 + }, + { + "epoch": 0.58, + "learning_rate": 4.9018732666440745e-05, + "loss": 1.499, + "step": 3666 + }, + { + "epoch": 0.58, + "learning_rate": 4.9018016828467436e-05, + "loss": 1.5628, + "step": 3667 + }, + { + "epoch": 0.58, + "learning_rate": 4.901730073471684e-05, + "loss": 1.625, + "step": 3668 + }, + { + "epoch": 0.58, + "learning_rate": 4.9016584385196565e-05, + "loss": 1.5666, + "step": 3669 + }, + { + "epoch": 0.58, + "learning_rate": 4.9015867779914256e-05, + "loss": 1.5856, + "step": 3670 + }, + { + "epoch": 0.58, + "learning_rate": 4.901515091887753e-05, + "loss": 1.527, + "step": 3671 + }, + { + "epoch": 0.58, + "learning_rate": 4.901443380209402e-05, + "loss": 1.57, + "step": 3672 + }, + { + "epoch": 0.59, + "learning_rate": 4.9013716429571375e-05, + "loss": 1.5477, + "step": 3673 + }, + { + "epoch": 0.59, + "learning_rate": 4.901299880131723e-05, + "loss": 1.6076, + "step": 3674 + }, + { + "epoch": 0.59, + "learning_rate": 4.9012280917339233e-05, + "loss": 1.5762, + "step": 3675 + }, + { + "epoch": 0.59, + "learning_rate": 4.901156277764502e-05, + "loss": 1.5275, + "step": 3676 + }, + { + "epoch": 0.59, + "learning_rate": 4.901084438224223e-05, + "loss": 1.5246, + "step": 3677 + }, + { + "epoch": 0.59, + "learning_rate": 4.9010125731138534e-05, + "loss": 1.5127, + "step": 3678 + }, + { + "epoch": 0.59, + "learning_rate": 4.9009406824341573e-05, + "loss": 1.5055, + "step": 3679 + }, + { + "epoch": 0.59, + "learning_rate": 4.9008687661859004e-05, + "loss": 1.5688, + "step": 3680 + }, + { + "epoch": 0.59, + "learning_rate": 4.900796824369849e-05, + "loss": 1.5653, + "step": 3681 + }, + { + "epoch": 0.59, + "learning_rate": 4.900724856986769e-05, + "loss": 1.5505, + "step": 3682 + }, + { + "epoch": 0.59, + "learning_rate": 4.900652864037427e-05, + "loss": 1.5671, + "step": 3683 + }, + { + "epoch": 0.59, + "learning_rate": 4.900580845522589e-05, + "loss": 1.5549, + "step": 3684 + }, + { + "epoch": 0.59, + "learning_rate": 4.9005088014430226e-05, + "loss": 1.4974, + "step": 3685 + }, + { + "epoch": 0.59, + "learning_rate": 4.900436731799495e-05, + "loss": 1.5356, + "step": 3686 + }, + { + "epoch": 0.59, + "learning_rate": 4.900364636592774e-05, + "loss": 1.6123, + "step": 3687 + }, + { + "epoch": 0.59, + "learning_rate": 4.900292515823626e-05, + "loss": 1.4766, + "step": 3688 + }, + { + "epoch": 0.59, + "learning_rate": 4.90022036949282e-05, + "loss": 1.5166, + "step": 3689 + }, + { + "epoch": 0.59, + "learning_rate": 4.900148197601124e-05, + "loss": 1.4969, + "step": 3690 + }, + { + "epoch": 0.59, + "learning_rate": 4.9000760001493086e-05, + "loss": 1.5326, + "step": 3691 + }, + { + "epoch": 0.59, + "learning_rate": 4.900003777138139e-05, + "loss": 1.4714, + "step": 3692 + }, + { + "epoch": 0.59, + "learning_rate": 4.8999315285683875e-05, + "loss": 1.508, + "step": 3693 + }, + { + "epoch": 0.59, + "learning_rate": 4.8998592544408214e-05, + "loss": 1.5877, + "step": 3694 + }, + { + "epoch": 0.59, + "learning_rate": 4.8997869547562113e-05, + "loss": 1.5791, + "step": 3695 + }, + { + "epoch": 0.59, + "learning_rate": 4.899714629515328e-05, + "loss": 1.4924, + "step": 3696 + }, + { + "epoch": 0.59, + "learning_rate": 4.89964227871894e-05, + "loss": 1.4684, + "step": 3697 + }, + { + "epoch": 0.59, + "learning_rate": 4.899569902367819e-05, + "loss": 1.5729, + "step": 3698 + }, + { + "epoch": 0.59, + "learning_rate": 4.8994975004627354e-05, + "loss": 1.4771, + "step": 3699 + }, + { + "epoch": 0.59, + "learning_rate": 4.8994250730044595e-05, + "loss": 1.5278, + "step": 3700 + }, + { + "epoch": 0.59, + "learning_rate": 4.899352619993764e-05, + "loss": 1.596, + "step": 3701 + }, + { + "epoch": 0.59, + "learning_rate": 4.899280141431419e-05, + "loss": 1.5552, + "step": 3702 + }, + { + "epoch": 0.59, + "learning_rate": 4.899207637318198e-05, + "loss": 1.5262, + "step": 3703 + }, + { + "epoch": 0.59, + "learning_rate": 4.899135107654872e-05, + "loss": 1.4888, + "step": 3704 + }, + { + "epoch": 0.59, + "learning_rate": 4.8990625524422126e-05, + "loss": 1.4795, + "step": 3705 + }, + { + "epoch": 0.59, + "learning_rate": 4.8989899716809953e-05, + "loss": 1.5199, + "step": 3706 + }, + { + "epoch": 0.59, + "learning_rate": 4.89891736537199e-05, + "loss": 1.584, + "step": 3707 + }, + { + "epoch": 0.59, + "learning_rate": 4.898844733515972e-05, + "loss": 1.5526, + "step": 3708 + }, + { + "epoch": 0.59, + "learning_rate": 4.898772076113713e-05, + "loss": 1.5049, + "step": 3709 + }, + { + "epoch": 0.59, + "learning_rate": 4.8986993931659885e-05, + "loss": 1.4777, + "step": 3710 + }, + { + "epoch": 0.59, + "learning_rate": 4.8986266846735716e-05, + "loss": 1.4601, + "step": 3711 + }, + { + "epoch": 0.59, + "learning_rate": 4.898553950637237e-05, + "loss": 1.5967, + "step": 3712 + }, + { + "epoch": 0.59, + "learning_rate": 4.8984811910577585e-05, + "loss": 1.5301, + "step": 3713 + }, + { + "epoch": 0.59, + "learning_rate": 4.8984084059359114e-05, + "loss": 1.5218, + "step": 3714 + }, + { + "epoch": 0.59, + "learning_rate": 4.8983355952724715e-05, + "loss": 1.5768, + "step": 3715 + }, + { + "epoch": 0.59, + "learning_rate": 4.898262759068213e-05, + "loss": 1.5508, + "step": 3716 + }, + { + "epoch": 0.59, + "learning_rate": 4.898189897323913e-05, + "loss": 1.5547, + "step": 3717 + }, + { + "epoch": 0.59, + "learning_rate": 4.898117010040345e-05, + "loss": 1.4857, + "step": 3718 + }, + { + "epoch": 0.59, + "learning_rate": 4.8980440972182884e-05, + "loss": 1.6054, + "step": 3719 + }, + { + "epoch": 0.59, + "learning_rate": 4.897971158858517e-05, + "loss": 1.4764, + "step": 3720 + }, + { + "epoch": 0.59, + "learning_rate": 4.8978981949618084e-05, + "loss": 1.5772, + "step": 3721 + }, + { + "epoch": 0.59, + "learning_rate": 4.897825205528941e-05, + "loss": 1.5488, + "step": 3722 + }, + { + "epoch": 0.59, + "learning_rate": 4.89775219056069e-05, + "loss": 1.5646, + "step": 3723 + }, + { + "epoch": 0.59, + "learning_rate": 4.897679150057834e-05, + "loss": 1.5472, + "step": 3724 + }, + { + "epoch": 0.59, + "learning_rate": 4.897606084021151e-05, + "loss": 1.5029, + "step": 3725 + }, + { + "epoch": 0.59, + "learning_rate": 4.897532992451419e-05, + "loss": 1.5233, + "step": 3726 + }, + { + "epoch": 0.59, + "learning_rate": 4.897459875349416e-05, + "loss": 1.5334, + "step": 3727 + }, + { + "epoch": 0.59, + "learning_rate": 4.8973867327159203e-05, + "loss": 1.5514, + "step": 3728 + }, + { + "epoch": 0.59, + "learning_rate": 4.8973135645517124e-05, + "loss": 1.5116, + "step": 3729 + }, + { + "epoch": 0.59, + "learning_rate": 4.8972403708575695e-05, + "loss": 1.4754, + "step": 3730 + }, + { + "epoch": 0.59, + "learning_rate": 4.897167151634273e-05, + "loss": 1.6024, + "step": 3731 + }, + { + "epoch": 0.59, + "learning_rate": 4.897093906882602e-05, + "loss": 1.5443, + "step": 3732 + }, + { + "epoch": 0.59, + "learning_rate": 4.8970206366033346e-05, + "loss": 1.4927, + "step": 3733 + }, + { + "epoch": 0.59, + "learning_rate": 4.896947340797253e-05, + "loss": 1.5171, + "step": 3734 + }, + { + "epoch": 0.6, + "learning_rate": 4.8968740194651384e-05, + "loss": 1.5589, + "step": 3735 + }, + { + "epoch": 0.6, + "learning_rate": 4.8968006726077705e-05, + "loss": 1.4964, + "step": 3736 + }, + { + "epoch": 0.6, + "learning_rate": 4.89672730022593e-05, + "loss": 1.5396, + "step": 3737 + }, + { + "epoch": 0.6, + "learning_rate": 4.896653902320399e-05, + "loss": 1.541, + "step": 3738 + }, + { + "epoch": 0.6, + "learning_rate": 4.896580478891959e-05, + "loss": 1.4969, + "step": 3739 + }, + { + "epoch": 0.6, + "learning_rate": 4.8965070299413924e-05, + "loss": 1.5552, + "step": 3740 + }, + { + "epoch": 0.6, + "learning_rate": 4.896433555469481e-05, + "loss": 1.4842, + "step": 3741 + }, + { + "epoch": 0.6, + "learning_rate": 4.896360055477006e-05, + "loss": 1.576, + "step": 3742 + }, + { + "epoch": 0.6, + "learning_rate": 4.896286529964752e-05, + "loss": 1.4826, + "step": 3743 + }, + { + "epoch": 0.6, + "learning_rate": 4.896212978933501e-05, + "loss": 1.5063, + "step": 3744 + }, + { + "epoch": 0.6, + "learning_rate": 4.896139402384037e-05, + "loss": 1.4447, + "step": 3745 + }, + { + "epoch": 0.6, + "learning_rate": 4.8960658003171426e-05, + "loss": 1.4608, + "step": 3746 + }, + { + "epoch": 0.6, + "learning_rate": 4.895992172733602e-05, + "loss": 1.5682, + "step": 3747 + }, + { + "epoch": 0.6, + "learning_rate": 4.8959185196342e-05, + "loss": 1.4439, + "step": 3748 + }, + { + "epoch": 0.6, + "learning_rate": 4.89584484101972e-05, + "loss": 1.5207, + "step": 3749 + }, + { + "epoch": 0.6, + "learning_rate": 4.8957711368909476e-05, + "loss": 1.5742, + "step": 3750 + }, + { + "epoch": 0.6, + "learning_rate": 4.895697407248666e-05, + "loss": 1.6318, + "step": 3751 + }, + { + "epoch": 0.6, + "learning_rate": 4.895623652093663e-05, + "loss": 1.564, + "step": 3752 + }, + { + "epoch": 0.6, + "learning_rate": 4.895549871426721e-05, + "loss": 1.5671, + "step": 3753 + }, + { + "epoch": 0.6, + "learning_rate": 4.8954760652486276e-05, + "loss": 1.5283, + "step": 3754 + }, + { + "epoch": 0.6, + "learning_rate": 4.895402233560169e-05, + "loss": 1.5057, + "step": 3755 + }, + { + "epoch": 0.6, + "learning_rate": 4.895328376362131e-05, + "loss": 1.9413, + "step": 3756 + }, + { + "epoch": 0.6, + "learning_rate": 4.895254493655299e-05, + "loss": 1.5426, + "step": 3757 + }, + { + "epoch": 0.6, + "learning_rate": 4.895180585440462e-05, + "loss": 1.5291, + "step": 3758 + }, + { + "epoch": 0.6, + "learning_rate": 4.8951066517184054e-05, + "loss": 1.494, + "step": 3759 + }, + { + "epoch": 0.6, + "learning_rate": 4.895032692489917e-05, + "loss": 1.4373, + "step": 3760 + }, + { + "epoch": 0.6, + "learning_rate": 4.894958707755785e-05, + "loss": 1.537, + "step": 3761 + }, + { + "epoch": 0.6, + "learning_rate": 4.894884697516796e-05, + "loss": 1.4955, + "step": 3762 + }, + { + "epoch": 0.6, + "learning_rate": 4.894810661773739e-05, + "loss": 1.4699, + "step": 3763 + }, + { + "epoch": 0.6, + "learning_rate": 4.894736600527402e-05, + "loss": 1.529, + "step": 3764 + }, + { + "epoch": 0.6, + "learning_rate": 4.894662513778575e-05, + "loss": 1.4813, + "step": 3765 + }, + { + "epoch": 0.6, + "learning_rate": 4.894588401528046e-05, + "loss": 1.4899, + "step": 3766 + }, + { + "epoch": 0.6, + "learning_rate": 4.894514263776604e-05, + "loss": 1.5233, + "step": 3767 + }, + { + "epoch": 0.6, + "learning_rate": 4.8944401005250386e-05, + "loss": 1.4976, + "step": 3768 + }, + { + "epoch": 0.6, + "learning_rate": 4.89436591177414e-05, + "loss": 1.5991, + "step": 3769 + }, + { + "epoch": 0.6, + "learning_rate": 4.894291697524699e-05, + "loss": 1.5964, + "step": 3770 + }, + { + "epoch": 0.6, + "learning_rate": 4.8942174577775033e-05, + "loss": 1.5226, + "step": 3771 + }, + { + "epoch": 0.6, + "learning_rate": 4.894143192533347e-05, + "loss": 1.5244, + "step": 3772 + }, + { + "epoch": 0.6, + "learning_rate": 4.8940689017930184e-05, + "loss": 1.5658, + "step": 3773 + }, + { + "epoch": 0.6, + "learning_rate": 4.893994585557309e-05, + "loss": 1.4845, + "step": 3774 + }, + { + "epoch": 0.6, + "learning_rate": 4.893920243827011e-05, + "loss": 1.5226, + "step": 3775 + }, + { + "epoch": 0.6, + "learning_rate": 4.893845876602916e-05, + "loss": 1.5409, + "step": 3776 + }, + { + "epoch": 0.6, + "learning_rate": 4.893771483885816e-05, + "loss": 1.5251, + "step": 3777 + }, + { + "epoch": 0.6, + "learning_rate": 4.893697065676502e-05, + "loss": 1.5404, + "step": 3778 + }, + { + "epoch": 0.6, + "learning_rate": 4.893622621975769e-05, + "loss": 1.5757, + "step": 3779 + }, + { + "epoch": 0.6, + "learning_rate": 4.893548152784407e-05, + "loss": 1.5132, + "step": 3780 + }, + { + "epoch": 0.6, + "learning_rate": 4.893473658103211e-05, + "loss": 1.4901, + "step": 3781 + }, + { + "epoch": 0.6, + "learning_rate": 4.893399137932974e-05, + "loss": 1.4383, + "step": 3782 + }, + { + "epoch": 0.6, + "learning_rate": 4.893324592274488e-05, + "loss": 1.5187, + "step": 3783 + }, + { + "epoch": 0.6, + "learning_rate": 4.893250021128549e-05, + "loss": 1.5495, + "step": 3784 + }, + { + "epoch": 0.6, + "learning_rate": 4.89317542449595e-05, + "loss": 1.4907, + "step": 3785 + }, + { + "epoch": 0.6, + "learning_rate": 4.893100802377486e-05, + "loss": 1.4455, + "step": 3786 + }, + { + "epoch": 0.6, + "learning_rate": 4.893026154773951e-05, + "loss": 1.5607, + "step": 3787 + }, + { + "epoch": 0.6, + "learning_rate": 4.892951481686141e-05, + "loss": 1.5197, + "step": 3788 + }, + { + "epoch": 0.6, + "learning_rate": 4.892876783114849e-05, + "loss": 1.5448, + "step": 3789 + }, + { + "epoch": 0.6, + "learning_rate": 4.8928020590608736e-05, + "loss": 1.592, + "step": 3790 + }, + { + "epoch": 0.6, + "learning_rate": 4.892727309525008e-05, + "loss": 1.488, + "step": 3791 + }, + { + "epoch": 0.6, + "learning_rate": 4.89265253450805e-05, + "loss": 1.5946, + "step": 3792 + }, + { + "epoch": 0.6, + "learning_rate": 4.892577734010794e-05, + "loss": 1.5457, + "step": 3793 + }, + { + "epoch": 0.6, + "learning_rate": 4.8925029080340387e-05, + "loss": 1.5646, + "step": 3794 + }, + { + "epoch": 0.6, + "learning_rate": 4.8924280565785795e-05, + "loss": 1.5015, + "step": 3795 + }, + { + "epoch": 0.6, + "learning_rate": 4.8923531796452136e-05, + "loss": 1.5846, + "step": 3796 + }, + { + "epoch": 0.6, + "learning_rate": 4.89227827723474e-05, + "loss": 1.5275, + "step": 3797 + }, + { + "epoch": 0.61, + "learning_rate": 4.892203349347954e-05, + "loss": 1.512, + "step": 3798 + }, + { + "epoch": 0.61, + "learning_rate": 4.892128395985655e-05, + "loss": 1.5496, + "step": 3799 + }, + { + "epoch": 0.61, + "learning_rate": 4.892053417148641e-05, + "loss": 1.557, + "step": 3800 + }, + { + "epoch": 0.61, + "learning_rate": 4.89197841283771e-05, + "loss": 1.5052, + "step": 3801 + }, + { + "epoch": 0.61, + "learning_rate": 4.891903383053662e-05, + "loss": 1.4925, + "step": 3802 + }, + { + "epoch": 0.61, + "learning_rate": 4.8918283277972946e-05, + "loss": 1.5101, + "step": 3803 + }, + { + "epoch": 0.61, + "learning_rate": 4.891753247069407e-05, + "loss": 1.5472, + "step": 3804 + }, + { + "epoch": 0.61, + "learning_rate": 4.8916781408708e-05, + "loss": 1.5252, + "step": 3805 + }, + { + "epoch": 0.61, + "learning_rate": 4.891603009202272e-05, + "loss": 1.6042, + "step": 3806 + }, + { + "epoch": 0.61, + "learning_rate": 4.891527852064626e-05, + "loss": 1.5399, + "step": 3807 + }, + { + "epoch": 0.61, + "learning_rate": 4.8914526694586585e-05, + "loss": 1.5218, + "step": 3808 + }, + { + "epoch": 0.61, + "learning_rate": 4.8913774613851724e-05, + "loss": 1.5111, + "step": 3809 + }, + { + "epoch": 0.61, + "learning_rate": 4.8913022278449685e-05, + "loss": 1.5182, + "step": 3810 + }, + { + "epoch": 0.61, + "learning_rate": 4.8912269688388465e-05, + "loss": 1.4613, + "step": 3811 + }, + { + "epoch": 0.61, + "learning_rate": 4.8911516843676106e-05, + "loss": 1.5348, + "step": 3812 + }, + { + "epoch": 0.61, + "learning_rate": 4.89107637443206e-05, + "loss": 1.5042, + "step": 3813 + }, + { + "epoch": 0.61, + "learning_rate": 4.891001039032998e-05, + "loss": 1.4671, + "step": 3814 + }, + { + "epoch": 0.61, + "learning_rate": 4.890925678171226e-05, + "loss": 1.5498, + "step": 3815 + }, + { + "epoch": 0.61, + "learning_rate": 4.890850291847549e-05, + "loss": 1.6016, + "step": 3816 + }, + { + "epoch": 0.61, + "learning_rate": 4.8907748800627655e-05, + "loss": 1.5426, + "step": 3817 + }, + { + "epoch": 0.61, + "learning_rate": 4.890699442817682e-05, + "loss": 1.5985, + "step": 3818 + }, + { + "epoch": 0.61, + "learning_rate": 4.890623980113101e-05, + "loss": 1.6034, + "step": 3819 + }, + { + "epoch": 0.61, + "learning_rate": 4.890548491949826e-05, + "loss": 1.4862, + "step": 3820 + }, + { + "epoch": 0.61, + "learning_rate": 4.890472978328661e-05, + "loss": 1.505, + "step": 3821 + }, + { + "epoch": 0.61, + "learning_rate": 4.890397439250409e-05, + "loss": 1.5529, + "step": 3822 + }, + { + "epoch": 0.61, + "learning_rate": 4.890321874715877e-05, + "loss": 1.5656, + "step": 3823 + }, + { + "epoch": 0.61, + "learning_rate": 4.8902462847258676e-05, + "loss": 1.4989, + "step": 3824 + }, + { + "epoch": 0.61, + "learning_rate": 4.890170669281186e-05, + "loss": 1.4657, + "step": 3825 + }, + { + "epoch": 0.61, + "learning_rate": 4.890095028382638e-05, + "loss": 1.5234, + "step": 3826 + }, + { + "epoch": 0.61, + "learning_rate": 4.89001936203103e-05, + "loss": 1.4915, + "step": 3827 + }, + { + "epoch": 0.61, + "learning_rate": 4.889943670227166e-05, + "loss": 1.5166, + "step": 3828 + }, + { + "epoch": 0.61, + "learning_rate": 4.8898679529718524e-05, + "loss": 1.4709, + "step": 3829 + }, + { + "epoch": 0.61, + "learning_rate": 4.889792210265897e-05, + "loss": 1.543, + "step": 3830 + }, + { + "epoch": 0.61, + "learning_rate": 4.889716442110105e-05, + "loss": 1.5259, + "step": 3831 + }, + { + "epoch": 0.61, + "learning_rate": 4.889640648505284e-05, + "loss": 1.5303, + "step": 3832 + }, + { + "epoch": 0.61, + "learning_rate": 4.889564829452241e-05, + "loss": 1.5933, + "step": 3833 + }, + { + "epoch": 0.61, + "learning_rate": 4.8894889849517825e-05, + "loss": 1.4928, + "step": 3834 + }, + { + "epoch": 0.61, + "learning_rate": 4.889413115004717e-05, + "loss": 1.5049, + "step": 3835 + }, + { + "epoch": 0.61, + "learning_rate": 4.889337219611854e-05, + "loss": 1.5876, + "step": 3836 + }, + { + "epoch": 0.61, + "learning_rate": 4.889261298773998e-05, + "loss": 1.5618, + "step": 3837 + }, + { + "epoch": 0.61, + "learning_rate": 4.8891853524919616e-05, + "loss": 1.5241, + "step": 3838 + }, + { + "epoch": 0.61, + "learning_rate": 4.889109380766551e-05, + "loss": 1.5356, + "step": 3839 + }, + { + "epoch": 0.61, + "learning_rate": 4.889033383598576e-05, + "loss": 1.5374, + "step": 3840 + }, + { + "epoch": 0.61, + "learning_rate": 4.8889573609888455e-05, + "loss": 1.492, + "step": 3841 + }, + { + "epoch": 0.61, + "learning_rate": 4.88888131293817e-05, + "loss": 1.5487, + "step": 3842 + }, + { + "epoch": 0.61, + "learning_rate": 4.888805239447358e-05, + "loss": 1.5065, + "step": 3843 + }, + { + "epoch": 0.61, + "learning_rate": 4.888729140517222e-05, + "loss": 1.5798, + "step": 3844 + }, + { + "epoch": 0.61, + "learning_rate": 4.88865301614857e-05, + "loss": 1.509, + "step": 3845 + }, + { + "epoch": 0.61, + "learning_rate": 4.8885768663422135e-05, + "loss": 1.5275, + "step": 3846 + }, + { + "epoch": 0.61, + "learning_rate": 4.8885006910989634e-05, + "loss": 1.5825, + "step": 3847 + }, + { + "epoch": 0.61, + "learning_rate": 4.888424490419631e-05, + "loss": 1.5107, + "step": 3848 + }, + { + "epoch": 0.61, + "learning_rate": 4.888348264305028e-05, + "loss": 1.4799, + "step": 3849 + }, + { + "epoch": 0.61, + "learning_rate": 4.888272012755966e-05, + "loss": 1.479, + "step": 3850 + }, + { + "epoch": 0.61, + "learning_rate": 4.8881957357732574e-05, + "loss": 1.5342, + "step": 3851 + }, + { + "epoch": 0.61, + "learning_rate": 4.888119433357713e-05, + "loss": 1.4829, + "step": 3852 + }, + { + "epoch": 0.61, + "learning_rate": 4.888043105510148e-05, + "loss": 1.4683, + "step": 3853 + }, + { + "epoch": 0.61, + "learning_rate": 4.8879667522313735e-05, + "loss": 1.535, + "step": 3854 + }, + { + "epoch": 0.61, + "learning_rate": 4.8878903735222015e-05, + "loss": 1.4865, + "step": 3855 + }, + { + "epoch": 0.61, + "learning_rate": 4.887813969383448e-05, + "loss": 1.5467, + "step": 3856 + }, + { + "epoch": 0.61, + "learning_rate": 4.887737539815925e-05, + "loss": 1.5882, + "step": 3857 + }, + { + "epoch": 0.61, + "learning_rate": 4.887661084820446e-05, + "loss": 1.5941, + "step": 3858 + }, + { + "epoch": 0.61, + "learning_rate": 4.887584604397828e-05, + "loss": 1.5026, + "step": 3859 + }, + { + "epoch": 0.61, + "learning_rate": 4.887508098548882e-05, + "loss": 1.5426, + "step": 3860 + }, + { + "epoch": 0.62, + "learning_rate": 4.8874315672744245e-05, + "loss": 1.5721, + "step": 3861 + }, + { + "epoch": 0.62, + "learning_rate": 4.887355010575271e-05, + "loss": 1.5534, + "step": 3862 + }, + { + "epoch": 0.62, + "learning_rate": 4.887278428452235e-05, + "loss": 1.4777, + "step": 3863 + }, + { + "epoch": 0.62, + "learning_rate": 4.887201820906133e-05, + "loss": 1.5052, + "step": 3864 + }, + { + "epoch": 0.62, + "learning_rate": 4.8871251879377815e-05, + "loss": 1.5081, + "step": 3865 + }, + { + "epoch": 0.62, + "learning_rate": 4.887048529547996e-05, + "loss": 1.5112, + "step": 3866 + }, + { + "epoch": 0.62, + "learning_rate": 4.886971845737593e-05, + "loss": 1.5454, + "step": 3867 + }, + { + "epoch": 0.62, + "learning_rate": 4.886895136507389e-05, + "loss": 1.4842, + "step": 3868 + }, + { + "epoch": 0.62, + "learning_rate": 4.886818401858201e-05, + "loss": 1.5584, + "step": 3869 + }, + { + "epoch": 0.62, + "learning_rate": 4.8867416417908454e-05, + "loss": 1.4878, + "step": 3870 + }, + { + "epoch": 0.62, + "learning_rate": 4.8866648563061406e-05, + "loss": 1.5029, + "step": 3871 + }, + { + "epoch": 0.62, + "learning_rate": 4.886588045404904e-05, + "loss": 1.5628, + "step": 3872 + }, + { + "epoch": 0.62, + "learning_rate": 4.886511209087954e-05, + "loss": 1.5412, + "step": 3873 + }, + { + "epoch": 0.62, + "learning_rate": 4.886434347356108e-05, + "loss": 1.5404, + "step": 3874 + }, + { + "epoch": 0.62, + "learning_rate": 4.886357460210186e-05, + "loss": 1.515, + "step": 3875 + }, + { + "epoch": 0.62, + "learning_rate": 4.8862805476510056e-05, + "loss": 1.4847, + "step": 3876 + }, + { + "epoch": 0.62, + "learning_rate": 4.886203609679385e-05, + "loss": 1.5081, + "step": 3877 + }, + { + "epoch": 0.62, + "learning_rate": 4.8861266462961456e-05, + "loss": 1.4751, + "step": 3878 + }, + { + "epoch": 0.62, + "learning_rate": 4.8860496575021056e-05, + "loss": 1.5042, + "step": 3879 + }, + { + "epoch": 0.62, + "learning_rate": 4.8859726432980854e-05, + "loss": 1.4967, + "step": 3880 + }, + { + "epoch": 0.62, + "learning_rate": 4.885895603684906e-05, + "loss": 1.6007, + "step": 3881 + }, + { + "epoch": 0.62, + "learning_rate": 4.885818538663386e-05, + "loss": 1.6216, + "step": 3882 + }, + { + "epoch": 0.62, + "learning_rate": 4.885741448234348e-05, + "loss": 1.492, + "step": 3883 + }, + { + "epoch": 0.62, + "learning_rate": 4.8856643323986104e-05, + "loss": 1.5594, + "step": 3884 + }, + { + "epoch": 0.62, + "learning_rate": 4.8855871911569975e-05, + "loss": 1.5343, + "step": 3885 + }, + { + "epoch": 0.62, + "learning_rate": 4.885510024510329e-05, + "loss": 1.4914, + "step": 3886 + }, + { + "epoch": 0.62, + "learning_rate": 4.885432832459427e-05, + "loss": 1.5658, + "step": 3887 + }, + { + "epoch": 0.62, + "learning_rate": 4.885355615005113e-05, + "loss": 1.4899, + "step": 3888 + }, + { + "epoch": 0.62, + "learning_rate": 4.88527837214821e-05, + "loss": 1.5067, + "step": 3889 + }, + { + "epoch": 0.62, + "learning_rate": 4.885201103889541e-05, + "loss": 1.4762, + "step": 3890 + }, + { + "epoch": 0.62, + "learning_rate": 4.885123810229928e-05, + "loss": 1.5532, + "step": 3891 + }, + { + "epoch": 0.62, + "learning_rate": 4.8850464911701945e-05, + "loss": 1.5339, + "step": 3892 + }, + { + "epoch": 0.62, + "learning_rate": 4.8849691467111636e-05, + "loss": 1.5524, + "step": 3893 + }, + { + "epoch": 0.62, + "learning_rate": 4.8848917768536594e-05, + "loss": 1.5542, + "step": 3894 + }, + { + "epoch": 0.62, + "learning_rate": 4.884814381598506e-05, + "loss": 1.494, + "step": 3895 + }, + { + "epoch": 0.62, + "learning_rate": 4.884736960946527e-05, + "loss": 1.4797, + "step": 3896 + }, + { + "epoch": 0.62, + "learning_rate": 4.884659514898547e-05, + "loss": 1.5002, + "step": 3897 + }, + { + "epoch": 0.62, + "learning_rate": 4.884582043455391e-05, + "loss": 1.509, + "step": 3898 + }, + { + "epoch": 0.62, + "learning_rate": 4.8845045466178854e-05, + "loss": 1.5226, + "step": 3899 + }, + { + "epoch": 0.62, + "learning_rate": 4.8844270243868525e-05, + "loss": 1.5021, + "step": 3900 + }, + { + "epoch": 0.62, + "learning_rate": 4.8843494767631196e-05, + "loss": 1.5342, + "step": 3901 + }, + { + "epoch": 0.62, + "learning_rate": 4.884271903747513e-05, + "loss": 1.4839, + "step": 3902 + }, + { + "epoch": 0.62, + "learning_rate": 4.8841943053408566e-05, + "loss": 1.4673, + "step": 3903 + }, + { + "epoch": 0.62, + "learning_rate": 4.88411668154398e-05, + "loss": 1.499, + "step": 3904 + }, + { + "epoch": 0.62, + "learning_rate": 4.884039032357707e-05, + "loss": 1.4642, + "step": 3905 + }, + { + "epoch": 0.62, + "learning_rate": 4.8839613577828666e-05, + "loss": 1.5308, + "step": 3906 + }, + { + "epoch": 0.62, + "learning_rate": 4.883883657820284e-05, + "loss": 1.4486, + "step": 3907 + }, + { + "epoch": 0.62, + "learning_rate": 4.883805932470789e-05, + "loss": 1.5535, + "step": 3908 + }, + { + "epoch": 0.62, + "learning_rate": 4.8837281817352075e-05, + "loss": 1.5373, + "step": 3909 + }, + { + "epoch": 0.62, + "learning_rate": 4.883650405614368e-05, + "loss": 1.5137, + "step": 3910 + }, + { + "epoch": 0.62, + "learning_rate": 4.883572604109099e-05, + "loss": 1.5236, + "step": 3911 + }, + { + "epoch": 0.62, + "learning_rate": 4.883494777220228e-05, + "loss": 1.5861, + "step": 3912 + }, + { + "epoch": 0.62, + "learning_rate": 4.883416924948586e-05, + "loss": 1.4795, + "step": 3913 + }, + { + "epoch": 0.62, + "learning_rate": 4.883339047295e-05, + "loss": 1.4852, + "step": 3914 + }, + { + "epoch": 0.62, + "learning_rate": 4.8832611442603007e-05, + "loss": 1.6292, + "step": 3915 + }, + { + "epoch": 0.62, + "learning_rate": 4.883183215845316e-05, + "loss": 1.5487, + "step": 3916 + }, + { + "epoch": 0.62, + "learning_rate": 4.8831052620508774e-05, + "loss": 1.5871, + "step": 3917 + }, + { + "epoch": 0.62, + "learning_rate": 4.883027282877815e-05, + "loss": 1.5169, + "step": 3918 + }, + { + "epoch": 0.62, + "learning_rate": 4.882949278326959e-05, + "loss": 1.5112, + "step": 3919 + }, + { + "epoch": 0.62, + "learning_rate": 4.882871248399139e-05, + "loss": 1.562, + "step": 3920 + }, + { + "epoch": 0.62, + "learning_rate": 4.8827931930951875e-05, + "loss": 1.5482, + "step": 3921 + }, + { + "epoch": 0.62, + "learning_rate": 4.8827151124159355e-05, + "loss": 1.5509, + "step": 3922 + }, + { + "epoch": 0.62, + "learning_rate": 4.882637006362214e-05, + "loss": 1.5251, + "step": 3923 + }, + { + "epoch": 0.63, + "learning_rate": 4.8825588749348537e-05, + "loss": 1.5965, + "step": 3924 + }, + { + "epoch": 0.63, + "learning_rate": 4.882480718134689e-05, + "loss": 1.5957, + "step": 3925 + }, + { + "epoch": 0.63, + "learning_rate": 4.882402535962551e-05, + "loss": 1.5558, + "step": 3926 + }, + { + "epoch": 0.63, + "learning_rate": 4.882324328419272e-05, + "loss": 1.5239, + "step": 3927 + }, + { + "epoch": 0.63, + "learning_rate": 4.8822460955056854e-05, + "loss": 1.5824, + "step": 3928 + }, + { + "epoch": 0.63, + "learning_rate": 4.8821678372226244e-05, + "loss": 1.5697, + "step": 3929 + }, + { + "epoch": 0.63, + "learning_rate": 4.882089553570922e-05, + "loss": 1.5153, + "step": 3930 + }, + { + "epoch": 0.63, + "learning_rate": 4.882011244551412e-05, + "loss": 1.5418, + "step": 3931 + }, + { + "epoch": 0.63, + "learning_rate": 4.881932910164929e-05, + "loss": 1.5457, + "step": 3932 + }, + { + "epoch": 0.63, + "learning_rate": 4.881854550412306e-05, + "loss": 1.5781, + "step": 3933 + }, + { + "epoch": 0.63, + "learning_rate": 4.8817761652943776e-05, + "loss": 1.5047, + "step": 3934 + }, + { + "epoch": 0.63, + "learning_rate": 4.88169775481198e-05, + "loss": 1.5015, + "step": 3935 + }, + { + "epoch": 0.63, + "learning_rate": 4.881619318965946e-05, + "loss": 1.542, + "step": 3936 + }, + { + "epoch": 0.63, + "learning_rate": 4.8815408577571135e-05, + "loss": 1.4874, + "step": 3937 + }, + { + "epoch": 0.63, + "learning_rate": 4.881462371186317e-05, + "loss": 1.5763, + "step": 3938 + }, + { + "epoch": 0.63, + "learning_rate": 4.881383859254391e-05, + "loss": 1.5594, + "step": 3939 + }, + { + "epoch": 0.63, + "learning_rate": 4.881305321962173e-05, + "loss": 1.5054, + "step": 3940 + }, + { + "epoch": 0.63, + "learning_rate": 4.881226759310499e-05, + "loss": 1.5564, + "step": 3941 + }, + { + "epoch": 0.63, + "learning_rate": 4.881148171300206e-05, + "loss": 1.5246, + "step": 3942 + }, + { + "epoch": 0.63, + "learning_rate": 4.88106955793213e-05, + "loss": 1.6084, + "step": 3943 + }, + { + "epoch": 0.63, + "learning_rate": 4.88099091920711e-05, + "loss": 1.5837, + "step": 3944 + }, + { + "epoch": 0.63, + "learning_rate": 4.880912255125981e-05, + "loss": 1.5579, + "step": 3945 + }, + { + "epoch": 0.63, + "learning_rate": 4.880833565689582e-05, + "loss": 1.4906, + "step": 3946 + }, + { + "epoch": 0.63, + "learning_rate": 4.8807548508987513e-05, + "loss": 1.5155, + "step": 3947 + }, + { + "epoch": 0.63, + "learning_rate": 4.8806761107543276e-05, + "loss": 1.5169, + "step": 3948 + }, + { + "epoch": 0.63, + "learning_rate": 4.880597345257147e-05, + "loss": 1.5744, + "step": 3949 + }, + { + "epoch": 0.63, + "learning_rate": 4.880518554408051e-05, + "loss": 1.4331, + "step": 3950 + }, + { + "epoch": 0.63, + "learning_rate": 4.880439738207878e-05, + "loss": 1.542, + "step": 3951 + }, + { + "epoch": 0.63, + "learning_rate": 4.880360896657466e-05, + "loss": 1.5596, + "step": 3952 + }, + { + "epoch": 0.63, + "learning_rate": 4.880282029757657e-05, + "loss": 1.5555, + "step": 3953 + }, + { + "epoch": 0.63, + "learning_rate": 4.8802031375092884e-05, + "loss": 1.5591, + "step": 3954 + }, + { + "epoch": 0.63, + "learning_rate": 4.8801242199132016e-05, + "loss": 1.5184, + "step": 3955 + }, + { + "epoch": 0.63, + "learning_rate": 4.880045276970237e-05, + "loss": 1.5851, + "step": 3956 + }, + { + "epoch": 0.63, + "learning_rate": 4.879966308681235e-05, + "loss": 1.4842, + "step": 3957 + }, + { + "epoch": 0.63, + "learning_rate": 4.879887315047037e-05, + "loss": 1.5355, + "step": 3958 + }, + { + "epoch": 0.63, + "learning_rate": 4.879808296068484e-05, + "loss": 1.5625, + "step": 3959 + }, + { + "epoch": 0.63, + "learning_rate": 4.879729251746417e-05, + "loss": 1.5876, + "step": 3960 + }, + { + "epoch": 0.63, + "learning_rate": 4.8796501820816785e-05, + "loss": 1.5068, + "step": 3961 + }, + { + "epoch": 0.63, + "learning_rate": 4.87957108707511e-05, + "loss": 1.5026, + "step": 3962 + }, + { + "epoch": 0.63, + "learning_rate": 4.879491966727555e-05, + "loss": 1.5304, + "step": 3963 + }, + { + "epoch": 0.63, + "learning_rate": 4.879412821039854e-05, + "loss": 1.4899, + "step": 3964 + }, + { + "epoch": 0.63, + "learning_rate": 4.8793336500128514e-05, + "loss": 1.5477, + "step": 3965 + }, + { + "epoch": 0.63, + "learning_rate": 4.8792544536473905e-05, + "loss": 1.4964, + "step": 3966 + }, + { + "epoch": 0.63, + "learning_rate": 4.879175231944314e-05, + "loss": 1.5646, + "step": 3967 + }, + { + "epoch": 0.63, + "learning_rate": 4.879095984904466e-05, + "loss": 1.5612, + "step": 3968 + }, + { + "epoch": 0.63, + "learning_rate": 4.879016712528689e-05, + "loss": 1.4902, + "step": 3969 + }, + { + "epoch": 0.63, + "learning_rate": 4.878937414817829e-05, + "loss": 1.5195, + "step": 3970 + }, + { + "epoch": 0.63, + "learning_rate": 4.87885809177273e-05, + "loss": 1.6009, + "step": 3971 + }, + { + "epoch": 0.63, + "learning_rate": 4.878778743394237e-05, + "loss": 1.6139, + "step": 3972 + }, + { + "epoch": 0.63, + "learning_rate": 4.8786993696831935e-05, + "loss": 1.5495, + "step": 3973 + }, + { + "epoch": 0.63, + "learning_rate": 4.878619970640446e-05, + "loss": 1.5828, + "step": 3974 + }, + { + "epoch": 0.63, + "learning_rate": 4.8785405462668406e-05, + "loss": 1.6001, + "step": 3975 + }, + { + "epoch": 0.63, + "learning_rate": 4.878461096563222e-05, + "loss": 1.4653, + "step": 3976 + }, + { + "epoch": 0.63, + "learning_rate": 4.8783816215304366e-05, + "loss": 1.4507, + "step": 3977 + }, + { + "epoch": 0.63, + "learning_rate": 4.8783021211693315e-05, + "loss": 1.548, + "step": 3978 + }, + { + "epoch": 0.63, + "learning_rate": 4.878222595480751e-05, + "loss": 1.5243, + "step": 3979 + }, + { + "epoch": 0.63, + "learning_rate": 4.878143044465546e-05, + "loss": 1.5549, + "step": 3980 + }, + { + "epoch": 0.63, + "learning_rate": 4.87806346812456e-05, + "loss": 1.4784, + "step": 3981 + }, + { + "epoch": 0.63, + "learning_rate": 4.877983866458642e-05, + "loss": 1.5203, + "step": 3982 + }, + { + "epoch": 0.63, + "learning_rate": 4.87790423946864e-05, + "loss": 1.5814, + "step": 3983 + }, + { + "epoch": 0.63, + "learning_rate": 4.877824587155401e-05, + "loss": 1.5519, + "step": 3984 + }, + { + "epoch": 0.63, + "learning_rate": 4.877744909519775e-05, + "loss": 1.4888, + "step": 3985 + }, + { + "epoch": 0.64, + "learning_rate": 4.877665206562607e-05, + "loss": 1.5711, + "step": 3986 + }, + { + "epoch": 0.64, + "learning_rate": 4.87758547828475e-05, + "loss": 1.5267, + "step": 3987 + }, + { + "epoch": 0.64, + "learning_rate": 4.8775057246870506e-05, + "loss": 1.4779, + "step": 3988 + }, + { + "epoch": 0.64, + "learning_rate": 4.877425945770358e-05, + "loss": 1.521, + "step": 3989 + }, + { + "epoch": 0.64, + "learning_rate": 4.877346141535523e-05, + "loss": 1.4797, + "step": 3990 + }, + { + "epoch": 0.64, + "learning_rate": 4.877266311983395e-05, + "loss": 1.568, + "step": 3991 + }, + { + "epoch": 0.64, + "learning_rate": 4.877186457114824e-05, + "loss": 1.5164, + "step": 3992 + }, + { + "epoch": 0.64, + "learning_rate": 4.8771065769306604e-05, + "loss": 1.5744, + "step": 3993 + }, + { + "epoch": 0.64, + "learning_rate": 4.8770266714317545e-05, + "loss": 1.5396, + "step": 3994 + }, + { + "epoch": 0.64, + "learning_rate": 4.8769467406189586e-05, + "loss": 1.5744, + "step": 3995 + }, + { + "epoch": 0.64, + "learning_rate": 4.8768667844931224e-05, + "loss": 1.5661, + "step": 3996 + }, + { + "epoch": 0.64, + "learning_rate": 4.876786803055098e-05, + "loss": 1.5155, + "step": 3997 + }, + { + "epoch": 0.64, + "learning_rate": 4.876706796305737e-05, + "loss": 1.5671, + "step": 3998 + }, + { + "epoch": 0.64, + "learning_rate": 4.8766267642458916e-05, + "loss": 1.5632, + "step": 3999 + }, + { + "epoch": 0.64, + "learning_rate": 4.876546706876415e-05, + "loss": 1.5239, + "step": 4000 + }, + { + "epoch": 0.64, + "learning_rate": 4.876466624198157e-05, + "loss": 1.5474, + "step": 4001 + }, + { + "epoch": 0.64, + "learning_rate": 4.8763865162119734e-05, + "loss": 1.5078, + "step": 4002 + }, + { + "epoch": 0.64, + "learning_rate": 4.876306382918716e-05, + "loss": 1.5075, + "step": 4003 + }, + { + "epoch": 0.64, + "learning_rate": 4.8762262243192383e-05, + "loss": 1.4699, + "step": 4004 + }, + { + "epoch": 0.64, + "learning_rate": 4.8761460404143944e-05, + "loss": 1.5314, + "step": 4005 + }, + { + "epoch": 0.64, + "learning_rate": 4.876065831205037e-05, + "loss": 1.5378, + "step": 4006 + }, + { + "epoch": 0.64, + "learning_rate": 4.875985596692021e-05, + "loss": 1.5495, + "step": 4007 + }, + { + "epoch": 0.64, + "learning_rate": 4.875905336876201e-05, + "loss": 1.522, + "step": 4008 + }, + { + "epoch": 0.64, + "learning_rate": 4.875825051758433e-05, + "loss": 1.5067, + "step": 4009 + }, + { + "epoch": 0.64, + "learning_rate": 4.875744741339569e-05, + "loss": 1.5026, + "step": 4010 + }, + { + "epoch": 0.64, + "learning_rate": 4.875664405620466e-05, + "loss": 1.488, + "step": 4011 + }, + { + "epoch": 0.64, + "learning_rate": 4.8755840446019796e-05, + "loss": 1.526, + "step": 4012 + }, + { + "epoch": 0.64, + "learning_rate": 4.8755036582849654e-05, + "loss": 1.4969, + "step": 4013 + }, + { + "epoch": 0.64, + "learning_rate": 4.8754232466702796e-05, + "loss": 1.5164, + "step": 4014 + }, + { + "epoch": 0.64, + "learning_rate": 4.875342809758778e-05, + "loss": 1.5015, + "step": 4015 + }, + { + "epoch": 0.64, + "learning_rate": 4.8752623475513185e-05, + "loss": 1.4676, + "step": 4016 + }, + { + "epoch": 0.64, + "learning_rate": 4.875181860048757e-05, + "loss": 1.5571, + "step": 4017 + }, + { + "epoch": 0.64, + "learning_rate": 4.875101347251949e-05, + "loss": 1.5274, + "step": 4018 + }, + { + "epoch": 0.64, + "learning_rate": 4.875020809161755e-05, + "loss": 1.5752, + "step": 4019 + }, + { + "epoch": 0.64, + "learning_rate": 4.874940245779031e-05, + "loss": 1.4937, + "step": 4020 + }, + { + "epoch": 0.64, + "learning_rate": 4.874859657104635e-05, + "loss": 1.5977, + "step": 4021 + }, + { + "epoch": 0.64, + "learning_rate": 4.874779043139426e-05, + "loss": 1.485, + "step": 4022 + }, + { + "epoch": 0.64, + "learning_rate": 4.8746984038842616e-05, + "loss": 1.4777, + "step": 4023 + }, + { + "epoch": 0.64, + "learning_rate": 4.874617739340001e-05, + "loss": 1.5399, + "step": 4024 + }, + { + "epoch": 0.64, + "learning_rate": 4.874537049507503e-05, + "loss": 1.5394, + "step": 4025 + }, + { + "epoch": 0.64, + "learning_rate": 4.8744563343876274e-05, + "loss": 1.5567, + "step": 4026 + }, + { + "epoch": 0.64, + "learning_rate": 4.874375593981233e-05, + "loss": 1.4922, + "step": 4027 + }, + { + "epoch": 0.64, + "learning_rate": 4.87429482828918e-05, + "loss": 1.5562, + "step": 4028 + }, + { + "epoch": 0.64, + "learning_rate": 4.874214037312329e-05, + "loss": 1.5306, + "step": 4029 + }, + { + "epoch": 0.64, + "learning_rate": 4.8741332210515404e-05, + "loss": 1.4714, + "step": 4030 + }, + { + "epoch": 0.64, + "learning_rate": 4.8740523795076734e-05, + "loss": 1.5142, + "step": 4031 + }, + { + "epoch": 0.64, + "learning_rate": 4.87397151268159e-05, + "loss": 1.589, + "step": 4032 + }, + { + "epoch": 0.64, + "learning_rate": 4.873890620574151e-05, + "loss": 1.5156, + "step": 4033 + }, + { + "epoch": 0.64, + "learning_rate": 4.8738097031862186e-05, + "loss": 1.5282, + "step": 4034 + }, + { + "epoch": 0.64, + "learning_rate": 4.873728760518654e-05, + "loss": 1.4683, + "step": 4035 + }, + { + "epoch": 0.64, + "learning_rate": 4.873647792572319e-05, + "loss": 1.5127, + "step": 4036 + }, + { + "epoch": 0.64, + "learning_rate": 4.873566799348076e-05, + "loss": 1.5197, + "step": 4037 + }, + { + "epoch": 0.64, + "learning_rate": 4.8734857808467885e-05, + "loss": 1.5519, + "step": 4038 + }, + { + "epoch": 0.64, + "learning_rate": 4.8734047370693175e-05, + "loss": 1.5565, + "step": 4039 + }, + { + "epoch": 0.64, + "learning_rate": 4.8733236680165276e-05, + "loss": 1.5272, + "step": 4040 + }, + { + "epoch": 0.64, + "learning_rate": 4.873242573689281e-05, + "loss": 1.5635, + "step": 4041 + }, + { + "epoch": 0.64, + "learning_rate": 4.873161454088442e-05, + "loss": 1.465, + "step": 4042 + }, + { + "epoch": 0.64, + "learning_rate": 4.873080309214874e-05, + "loss": 1.4719, + "step": 4043 + }, + { + "epoch": 0.64, + "learning_rate": 4.872999139069442e-05, + "loss": 1.4572, + "step": 4044 + }, + { + "epoch": 0.64, + "learning_rate": 4.8729179436530095e-05, + "loss": 1.55, + "step": 4045 + }, + { + "epoch": 0.64, + "learning_rate": 4.872836722966442e-05, + "loss": 1.5511, + "step": 4046 + }, + { + "epoch": 0.64, + "learning_rate": 4.872755477010603e-05, + "loss": 1.5396, + "step": 4047 + }, + { + "epoch": 0.64, + "learning_rate": 4.8726742057863596e-05, + "loss": 1.5389, + "step": 4048 + }, + { + "epoch": 0.65, + "learning_rate": 4.872592909294576e-05, + "loss": 1.5859, + "step": 4049 + }, + { + "epoch": 0.65, + "learning_rate": 4.8725115875361185e-05, + "loss": 1.5239, + "step": 4050 + }, + { + "epoch": 0.65, + "learning_rate": 4.872430240511853e-05, + "loss": 1.5199, + "step": 4051 + }, + { + "epoch": 0.65, + "learning_rate": 4.8723488682226454e-05, + "loss": 1.4943, + "step": 4052 + }, + { + "epoch": 0.65, + "learning_rate": 4.8722674706693624e-05, + "loss": 1.4429, + "step": 4053 + }, + { + "epoch": 0.65, + "learning_rate": 4.8721860478528725e-05, + "loss": 1.4871, + "step": 4054 + }, + { + "epoch": 0.65, + "learning_rate": 4.87210459977404e-05, + "loss": 1.5039, + "step": 4055 + }, + { + "epoch": 0.65, + "learning_rate": 4.8720231264337346e-05, + "loss": 1.4855, + "step": 4056 + }, + { + "epoch": 0.65, + "learning_rate": 4.871941627832822e-05, + "loss": 1.536, + "step": 4057 + }, + { + "epoch": 0.65, + "learning_rate": 4.8718601039721726e-05, + "loss": 1.6058, + "step": 4058 + }, + { + "epoch": 0.65, + "learning_rate": 4.871778554852652e-05, + "loss": 1.5112, + "step": 4059 + }, + { + "epoch": 0.65, + "learning_rate": 4.8716969804751295e-05, + "loss": 1.5379, + "step": 4060 + }, + { + "epoch": 0.65, + "learning_rate": 4.871615380840474e-05, + "loss": 1.4569, + "step": 4061 + }, + { + "epoch": 0.65, + "learning_rate": 4.871533755949556e-05, + "loss": 1.5506, + "step": 4062 + }, + { + "epoch": 0.65, + "learning_rate": 4.8714521058032416e-05, + "loss": 1.5316, + "step": 4063 + }, + { + "epoch": 0.65, + "learning_rate": 4.871370430402403e-05, + "loss": 1.5628, + "step": 4064 + }, + { + "epoch": 0.65, + "learning_rate": 4.871288729747908e-05, + "loss": 1.5882, + "step": 4065 + }, + { + "epoch": 0.65, + "learning_rate": 4.8712070038406285e-05, + "loss": 1.5542, + "step": 4066 + }, + { + "epoch": 0.65, + "learning_rate": 4.871125252681434e-05, + "loss": 1.5311, + "step": 4067 + }, + { + "epoch": 0.65, + "learning_rate": 4.8710434762711956e-05, + "loss": 1.535, + "step": 4068 + }, + { + "epoch": 0.65, + "learning_rate": 4.870961674610782e-05, + "loss": 1.4681, + "step": 4069 + }, + { + "epoch": 0.65, + "learning_rate": 4.8708798477010675e-05, + "loss": 1.5182, + "step": 4070 + }, + { + "epoch": 0.65, + "learning_rate": 4.8707979955429215e-05, + "loss": 1.5098, + "step": 4071 + }, + { + "epoch": 0.65, + "learning_rate": 4.870716118137216e-05, + "loss": 1.4816, + "step": 4072 + }, + { + "epoch": 0.65, + "learning_rate": 4.870634215484824e-05, + "loss": 1.5314, + "step": 4073 + }, + { + "epoch": 0.65, + "learning_rate": 4.870552287586616e-05, + "loss": 1.5695, + "step": 4074 + }, + { + "epoch": 0.65, + "learning_rate": 4.8704703344434655e-05, + "loss": 1.5264, + "step": 4075 + }, + { + "epoch": 0.65, + "learning_rate": 4.8703883560562454e-05, + "loss": 1.5843, + "step": 4076 + }, + { + "epoch": 0.65, + "learning_rate": 4.870306352425827e-05, + "loss": 1.5859, + "step": 4077 + }, + { + "epoch": 0.65, + "learning_rate": 4.870224323553087e-05, + "loss": 1.5319, + "step": 4078 + }, + { + "epoch": 0.65, + "learning_rate": 4.8701422694388965e-05, + "loss": 1.5508, + "step": 4079 + }, + { + "epoch": 0.65, + "learning_rate": 4.870060190084129e-05, + "loss": 1.528, + "step": 4080 + }, + { + "epoch": 0.65, + "learning_rate": 4.86997808548966e-05, + "loss": 1.5272, + "step": 4081 + }, + { + "epoch": 0.65, + "learning_rate": 4.869895955656363e-05, + "loss": 1.5369, + "step": 4082 + }, + { + "epoch": 0.65, + "learning_rate": 4.8698138005851125e-05, + "loss": 1.5506, + "step": 4083 + }, + { + "epoch": 0.65, + "learning_rate": 4.869731620276784e-05, + "loss": 1.5041, + "step": 4084 + }, + { + "epoch": 0.65, + "learning_rate": 4.869649414732253e-05, + "loss": 1.5687, + "step": 4085 + }, + { + "epoch": 0.65, + "learning_rate": 4.8695671839523935e-05, + "loss": 1.599, + "step": 4086 + }, + { + "epoch": 0.65, + "learning_rate": 4.869484927938083e-05, + "loss": 1.508, + "step": 4087 + }, + { + "epoch": 0.65, + "learning_rate": 4.8694026466901965e-05, + "loss": 1.571, + "step": 4088 + }, + { + "epoch": 0.65, + "learning_rate": 4.86932034020961e-05, + "loss": 1.5238, + "step": 4089 + }, + { + "epoch": 0.65, + "learning_rate": 4.8692380084972e-05, + "loss": 1.4635, + "step": 4090 + }, + { + "epoch": 0.65, + "learning_rate": 4.869155651553844e-05, + "loss": 1.5415, + "step": 4091 + }, + { + "epoch": 0.65, + "learning_rate": 4.8690732693804184e-05, + "loss": 1.4766, + "step": 4092 + }, + { + "epoch": 0.65, + "learning_rate": 4.868990861977801e-05, + "loss": 1.5352, + "step": 4093 + }, + { + "epoch": 0.65, + "learning_rate": 4.868908429346869e-05, + "loss": 1.4985, + "step": 4094 + }, + { + "epoch": 0.65, + "learning_rate": 4.868825971488501e-05, + "loss": 1.5466, + "step": 4095 + }, + { + "epoch": 0.65, + "learning_rate": 4.8687434884035734e-05, + "loss": 1.5086, + "step": 4096 + }, + { + "epoch": 0.65, + "learning_rate": 4.868660980092966e-05, + "loss": 1.5293, + "step": 4097 + }, + { + "epoch": 0.65, + "learning_rate": 4.868578446557558e-05, + "loss": 1.5353, + "step": 4098 + }, + { + "epoch": 0.65, + "learning_rate": 4.8684958877982275e-05, + "loss": 1.5031, + "step": 4099 + }, + { + "epoch": 0.65, + "learning_rate": 4.868413303815853e-05, + "loss": 1.4658, + "step": 4100 + }, + { + "epoch": 0.65, + "learning_rate": 4.868330694611315e-05, + "loss": 1.513, + "step": 4101 + }, + { + "epoch": 0.65, + "learning_rate": 4.8682480601854936e-05, + "loss": 1.5062, + "step": 4102 + }, + { + "epoch": 0.65, + "learning_rate": 4.868165400539267e-05, + "loss": 1.5415, + "step": 4103 + }, + { + "epoch": 0.65, + "learning_rate": 4.868082715673518e-05, + "loss": 1.5521, + "step": 4104 + }, + { + "epoch": 0.65, + "learning_rate": 4.8680000055891254e-05, + "loss": 1.5194, + "step": 4105 + }, + { + "epoch": 0.65, + "learning_rate": 4.867917270286969e-05, + "loss": 1.523, + "step": 4106 + }, + { + "epoch": 0.65, + "learning_rate": 4.8678345097679325e-05, + "loss": 1.5508, + "step": 4107 + }, + { + "epoch": 0.65, + "learning_rate": 4.8677517240328966e-05, + "loss": 1.5677, + "step": 4108 + }, + { + "epoch": 0.65, + "learning_rate": 4.8676689130827414e-05, + "loss": 1.5238, + "step": 4109 + }, + { + "epoch": 0.65, + "learning_rate": 4.867586076918349e-05, + "loss": 1.4714, + "step": 4110 + }, + { + "epoch": 0.65, + "learning_rate": 4.8675032155406034e-05, + "loss": 1.5708, + "step": 4111 + }, + { + "epoch": 0.66, + "learning_rate": 4.8674203289503864e-05, + "loss": 1.4715, + "step": 4112 + }, + { + "epoch": 0.66, + "learning_rate": 4.867337417148579e-05, + "loss": 1.4544, + "step": 4113 + }, + { + "epoch": 0.66, + "learning_rate": 4.867254480136065e-05, + "loss": 1.4855, + "step": 4114 + }, + { + "epoch": 0.66, + "learning_rate": 4.8671715179137295e-05, + "loss": 1.5086, + "step": 4115 + }, + { + "epoch": 0.66, + "learning_rate": 4.867088530482454e-05, + "loss": 1.4915, + "step": 4116 + }, + { + "epoch": 0.66, + "learning_rate": 4.867005517843122e-05, + "loss": 1.4658, + "step": 4117 + }, + { + "epoch": 0.66, + "learning_rate": 4.866922479996619e-05, + "loss": 1.5085, + "step": 4118 + }, + { + "epoch": 0.66, + "learning_rate": 4.866839416943828e-05, + "loss": 1.4974, + "step": 4119 + }, + { + "epoch": 0.66, + "learning_rate": 4.866756328685635e-05, + "loss": 1.4792, + "step": 4120 + }, + { + "epoch": 0.66, + "learning_rate": 4.8666732152229236e-05, + "loss": 1.4898, + "step": 4121 + }, + { + "epoch": 0.66, + "learning_rate": 4.8665900765565784e-05, + "loss": 1.5037, + "step": 4122 + }, + { + "epoch": 0.66, + "learning_rate": 4.8665069126874874e-05, + "loss": 1.5181, + "step": 4123 + }, + { + "epoch": 0.66, + "learning_rate": 4.866423723616533e-05, + "loss": 1.508, + "step": 4124 + }, + { + "epoch": 0.66, + "learning_rate": 4.8663405093446036e-05, + "loss": 1.5362, + "step": 4125 + }, + { + "epoch": 0.66, + "learning_rate": 4.866257269872584e-05, + "loss": 1.5721, + "step": 4126 + }, + { + "epoch": 0.66, + "learning_rate": 4.866174005201361e-05, + "loss": 1.542, + "step": 4127 + }, + { + "epoch": 0.66, + "learning_rate": 4.8660907153318214e-05, + "loss": 1.4837, + "step": 4128 + }, + { + "epoch": 0.66, + "learning_rate": 4.8660074002648524e-05, + "loss": 1.5056, + "step": 4129 + }, + { + "epoch": 0.66, + "learning_rate": 4.865924060001341e-05, + "loss": 1.5636, + "step": 4130 + }, + { + "epoch": 0.66, + "learning_rate": 4.8658406945421744e-05, + "loss": 1.4995, + "step": 4131 + }, + { + "epoch": 0.66, + "learning_rate": 4.865757303888241e-05, + "loss": 1.5319, + "step": 4132 + }, + { + "epoch": 0.66, + "learning_rate": 4.8656738880404285e-05, + "loss": 1.5168, + "step": 4133 + }, + { + "epoch": 0.66, + "learning_rate": 4.865590446999626e-05, + "loss": 1.486, + "step": 4134 + }, + { + "epoch": 0.66, + "learning_rate": 4.865506980766721e-05, + "loss": 1.5037, + "step": 4135 + }, + { + "epoch": 0.66, + "learning_rate": 4.865423489342602e-05, + "loss": 1.5132, + "step": 4136 + }, + { + "epoch": 0.66, + "learning_rate": 4.86533997272816e-05, + "loss": 1.4873, + "step": 4137 + }, + { + "epoch": 0.66, + "learning_rate": 4.865256430924283e-05, + "loss": 1.5408, + "step": 4138 + }, + { + "epoch": 0.66, + "learning_rate": 4.8651728639318605e-05, + "loss": 1.534, + "step": 4139 + }, + { + "epoch": 0.66, + "learning_rate": 4.865089271751783e-05, + "loss": 1.6043, + "step": 4140 + }, + { + "epoch": 0.66, + "learning_rate": 4.8650056543849406e-05, + "loss": 1.5321, + "step": 4141 + }, + { + "epoch": 0.66, + "learning_rate": 4.8649220118322246e-05, + "loss": 1.5715, + "step": 4142 + }, + { + "epoch": 0.66, + "learning_rate": 4.864838344094525e-05, + "loss": 1.5047, + "step": 4143 + }, + { + "epoch": 0.66, + "learning_rate": 4.8647546511727315e-05, + "loss": 1.5011, + "step": 4144 + }, + { + "epoch": 0.66, + "learning_rate": 4.8646709330677374e-05, + "loss": 1.4707, + "step": 4145 + }, + { + "epoch": 0.66, + "learning_rate": 4.864587189780434e-05, + "loss": 1.5697, + "step": 4146 + }, + { + "epoch": 0.66, + "learning_rate": 4.864503421311713e-05, + "loss": 1.5361, + "step": 4147 + }, + { + "epoch": 0.66, + "learning_rate": 4.864419627662464e-05, + "loss": 1.6011, + "step": 4148 + }, + { + "epoch": 0.66, + "learning_rate": 4.864335808833583e-05, + "loss": 1.5086, + "step": 4149 + }, + { + "epoch": 0.66, + "learning_rate": 4.86425196482596e-05, + "loss": 1.4709, + "step": 4150 + }, + { + "epoch": 0.66, + "learning_rate": 4.86416809564049e-05, + "loss": 1.5288, + "step": 4151 + }, + { + "epoch": 0.66, + "learning_rate": 4.8640842012780655e-05, + "loss": 1.5143, + "step": 4152 + }, + { + "epoch": 0.66, + "learning_rate": 4.864000281739578e-05, + "loss": 1.5481, + "step": 4153 + }, + { + "epoch": 0.66, + "learning_rate": 4.8639163370259235e-05, + "loss": 1.4644, + "step": 4154 + }, + { + "epoch": 0.66, + "learning_rate": 4.863832367137995e-05, + "loss": 1.5303, + "step": 4155 + }, + { + "epoch": 0.66, + "learning_rate": 4.8637483720766874e-05, + "loss": 1.5567, + "step": 4156 + }, + { + "epoch": 0.66, + "learning_rate": 4.863664351842894e-05, + "loss": 1.5401, + "step": 4157 + }, + { + "epoch": 0.66, + "learning_rate": 4.863580306437511e-05, + "loss": 1.4818, + "step": 4158 + }, + { + "epoch": 0.66, + "learning_rate": 4.863496235861432e-05, + "loss": 1.5391, + "step": 4159 + }, + { + "epoch": 0.66, + "learning_rate": 4.8634121401155536e-05, + "loss": 1.5537, + "step": 4160 + }, + { + "epoch": 0.66, + "learning_rate": 4.8633280192007705e-05, + "loss": 1.5365, + "step": 4161 + }, + { + "epoch": 0.66, + "learning_rate": 4.863243873117979e-05, + "loss": 1.4709, + "step": 4162 + }, + { + "epoch": 0.66, + "learning_rate": 4.8631597018680745e-05, + "loss": 1.5355, + "step": 4163 + }, + { + "epoch": 0.66, + "learning_rate": 4.8630755054519546e-05, + "loss": 1.5096, + "step": 4164 + }, + { + "epoch": 0.66, + "learning_rate": 4.862991283870515e-05, + "loss": 1.5601, + "step": 4165 + }, + { + "epoch": 0.66, + "learning_rate": 4.862907037124652e-05, + "loss": 1.5762, + "step": 4166 + }, + { + "epoch": 0.66, + "learning_rate": 4.8628227652152645e-05, + "loss": 1.478, + "step": 4167 + }, + { + "epoch": 0.66, + "learning_rate": 4.862738468143249e-05, + "loss": 1.4937, + "step": 4168 + }, + { + "epoch": 0.66, + "learning_rate": 4.8626541459095034e-05, + "loss": 1.5241, + "step": 4169 + }, + { + "epoch": 0.66, + "learning_rate": 4.862569798514925e-05, + "loss": 1.5845, + "step": 4170 + }, + { + "epoch": 0.66, + "learning_rate": 4.862485425960412e-05, + "loss": 1.4966, + "step": 4171 + }, + { + "epoch": 0.66, + "learning_rate": 4.862401028246865e-05, + "loss": 1.4766, + "step": 4172 + }, + { + "epoch": 0.66, + "learning_rate": 4.862316605375181e-05, + "loss": 1.4979, + "step": 4173 + }, + { + "epoch": 0.66, + "learning_rate": 4.862232157346259e-05, + "loss": 1.5057, + "step": 4174 + }, + { + "epoch": 0.67, + "learning_rate": 4.862147684160999e-05, + "loss": 1.519, + "step": 4175 + }, + { + "epoch": 0.67, + "learning_rate": 4.862063185820299e-05, + "loss": 1.4937, + "step": 4176 + }, + { + "epoch": 0.67, + "learning_rate": 4.861978662325062e-05, + "loss": 1.4691, + "step": 4177 + }, + { + "epoch": 0.67, + "learning_rate": 4.861894113676185e-05, + "loss": 1.5796, + "step": 4178 + }, + { + "epoch": 0.67, + "learning_rate": 4.86180953987457e-05, + "loss": 1.5654, + "step": 4179 + }, + { + "epoch": 0.67, + "learning_rate": 4.861724940921117e-05, + "loss": 1.5238, + "step": 4180 + }, + { + "epoch": 0.67, + "learning_rate": 4.861640316816727e-05, + "loss": 1.5316, + "step": 4181 + }, + { + "epoch": 0.67, + "learning_rate": 4.861555667562302e-05, + "loss": 1.4953, + "step": 4182 + }, + { + "epoch": 0.67, + "learning_rate": 4.8614709931587434e-05, + "loss": 1.5046, + "step": 4183 + }, + { + "epoch": 0.67, + "learning_rate": 4.861386293606951e-05, + "loss": 1.5301, + "step": 4184 + }, + { + "epoch": 0.67, + "learning_rate": 4.861301568907829e-05, + "loss": 1.5203, + "step": 4185 + }, + { + "epoch": 0.67, + "learning_rate": 4.861216819062279e-05, + "loss": 1.5475, + "step": 4186 + }, + { + "epoch": 0.67, + "learning_rate": 4.861132044071203e-05, + "loss": 1.5708, + "step": 4187 + }, + { + "epoch": 0.67, + "learning_rate": 4.861047243935505e-05, + "loss": 1.5033, + "step": 4188 + }, + { + "epoch": 0.67, + "learning_rate": 4.8609624186560865e-05, + "loss": 1.527, + "step": 4189 + }, + { + "epoch": 0.67, + "learning_rate": 4.860877568233852e-05, + "loss": 1.5238, + "step": 4190 + }, + { + "epoch": 0.67, + "learning_rate": 4.8607926926697043e-05, + "loss": 1.5155, + "step": 4191 + }, + { + "epoch": 0.67, + "learning_rate": 4.860707791964548e-05, + "loss": 1.5228, + "step": 4192 + }, + { + "epoch": 0.67, + "learning_rate": 4.860622866119287e-05, + "loss": 1.5528, + "step": 4193 + }, + { + "epoch": 0.67, + "learning_rate": 4.860537915134825e-05, + "loss": 1.4663, + "step": 4194 + }, + { + "epoch": 0.67, + "learning_rate": 4.860452939012068e-05, + "loss": 1.5003, + "step": 4195 + }, + { + "epoch": 0.67, + "learning_rate": 4.86036793775192e-05, + "loss": 1.5461, + "step": 4196 + }, + { + "epoch": 0.67, + "learning_rate": 4.8602829113552865e-05, + "loss": 1.4741, + "step": 4197 + }, + { + "epoch": 0.67, + "learning_rate": 4.8601978598230734e-05, + "loss": 1.5143, + "step": 4198 + }, + { + "epoch": 0.67, + "learning_rate": 4.8601127831561855e-05, + "loss": 1.4998, + "step": 4199 + }, + { + "epoch": 0.67, + "learning_rate": 4.8600276813555295e-05, + "loss": 1.54, + "step": 4200 + }, + { + "epoch": 0.67, + "learning_rate": 4.8599425544220115e-05, + "loss": 1.4855, + "step": 4201 + }, + { + "epoch": 0.67, + "learning_rate": 4.859857402356538e-05, + "loss": 1.5313, + "step": 4202 + }, + { + "epoch": 0.67, + "learning_rate": 4.859772225160016e-05, + "loss": 1.4884, + "step": 4203 + }, + { + "epoch": 0.67, + "learning_rate": 4.8596870228333527e-05, + "loss": 1.5422, + "step": 4204 + }, + { + "epoch": 0.67, + "learning_rate": 4.859601795377454e-05, + "loss": 1.5345, + "step": 4205 + }, + { + "epoch": 0.67, + "learning_rate": 4.8595165427932296e-05, + "loss": 1.5231, + "step": 4206 + }, + { + "epoch": 0.67, + "learning_rate": 4.8594312650815864e-05, + "loss": 1.5452, + "step": 4207 + }, + { + "epoch": 0.67, + "learning_rate": 4.859345962243434e-05, + "loss": 1.5536, + "step": 4208 + }, + { + "epoch": 0.67, + "learning_rate": 4.859260634279678e-05, + "loss": 1.5439, + "step": 4209 + }, + { + "epoch": 0.67, + "learning_rate": 4.8591752811912285e-05, + "loss": 1.471, + "step": 4210 + }, + { + "epoch": 0.67, + "learning_rate": 4.8590899029789945e-05, + "loss": 1.5348, + "step": 4211 + }, + { + "epoch": 0.67, + "learning_rate": 4.8590044996438854e-05, + "loss": 1.5649, + "step": 4212 + }, + { + "epoch": 0.67, + "learning_rate": 4.858919071186811e-05, + "loss": 1.519, + "step": 4213 + }, + { + "epoch": 0.67, + "learning_rate": 4.85883361760868e-05, + "loss": 1.4787, + "step": 4214 + }, + { + "epoch": 0.67, + "learning_rate": 4.858748138910404e-05, + "loss": 1.6068, + "step": 4215 + }, + { + "epoch": 0.67, + "learning_rate": 4.858662635092892e-05, + "loss": 1.4681, + "step": 4216 + }, + { + "epoch": 0.67, + "learning_rate": 4.8585771061570546e-05, + "loss": 1.4894, + "step": 4217 + }, + { + "epoch": 0.67, + "learning_rate": 4.858491552103803e-05, + "loss": 1.5189, + "step": 4218 + }, + { + "epoch": 0.67, + "learning_rate": 4.858405972934048e-05, + "loss": 1.5205, + "step": 4219 + }, + { + "epoch": 0.67, + "learning_rate": 4.858320368648702e-05, + "loss": 1.6156, + "step": 4220 + }, + { + "epoch": 0.67, + "learning_rate": 4.8582347392486756e-05, + "loss": 1.5264, + "step": 4221 + }, + { + "epoch": 0.67, + "learning_rate": 4.858149084734881e-05, + "loss": 1.5868, + "step": 4222 + }, + { + "epoch": 0.67, + "learning_rate": 4.8580634051082294e-05, + "loss": 1.4784, + "step": 4223 + }, + { + "epoch": 0.67, + "learning_rate": 4.857977700369635e-05, + "loss": 1.5309, + "step": 4224 + }, + { + "epoch": 0.67, + "learning_rate": 4.85789197052001e-05, + "loss": 1.6204, + "step": 4225 + }, + { + "epoch": 0.67, + "learning_rate": 4.857806215560266e-05, + "loss": 1.5179, + "step": 4226 + }, + { + "epoch": 0.67, + "learning_rate": 4.857720435491317e-05, + "loss": 1.4694, + "step": 4227 + }, + { + "epoch": 0.67, + "learning_rate": 4.857634630314078e-05, + "loss": 1.5164, + "step": 4228 + }, + { + "epoch": 0.67, + "learning_rate": 4.857548800029461e-05, + "loss": 1.5799, + "step": 4229 + }, + { + "epoch": 0.67, + "learning_rate": 4.8574629446383815e-05, + "loss": 1.5308, + "step": 4230 + }, + { + "epoch": 0.67, + "learning_rate": 4.857377064141751e-05, + "loss": 1.4837, + "step": 4231 + }, + { + "epoch": 0.67, + "learning_rate": 4.857291158540488e-05, + "loss": 1.5171, + "step": 4232 + }, + { + "epoch": 0.67, + "learning_rate": 4.857205227835504e-05, + "loss": 1.4688, + "step": 4233 + }, + { + "epoch": 0.67, + "learning_rate": 4.857119272027716e-05, + "loss": 1.5094, + "step": 4234 + }, + { + "epoch": 0.67, + "learning_rate": 4.8570332911180384e-05, + "loss": 1.4676, + "step": 4235 + }, + { + "epoch": 0.67, + "learning_rate": 4.856947285107387e-05, + "loss": 1.5285, + "step": 4236 + }, + { + "epoch": 0.68, + "learning_rate": 4.856861253996679e-05, + "loss": 1.4684, + "step": 4237 + }, + { + "epoch": 0.68, + "learning_rate": 4.8567751977868284e-05, + "loss": 1.5225, + "step": 4238 + }, + { + "epoch": 0.68, + "learning_rate": 4.856689116478753e-05, + "loss": 1.5428, + "step": 4239 + }, + { + "epoch": 0.68, + "learning_rate": 4.8566030100733696e-05, + "loss": 1.5431, + "step": 4240 + }, + { + "epoch": 0.68, + "learning_rate": 4.8565168785715954e-05, + "loss": 1.4974, + "step": 4241 + }, + { + "epoch": 0.68, + "learning_rate": 4.8564307219743465e-05, + "loss": 1.5575, + "step": 4242 + }, + { + "epoch": 0.68, + "learning_rate": 4.856344540282541e-05, + "loss": 1.569, + "step": 4243 + }, + { + "epoch": 0.68, + "learning_rate": 4.8562583334970965e-05, + "loss": 1.4583, + "step": 4244 + }, + { + "epoch": 0.68, + "learning_rate": 4.856172101618932e-05, + "loss": 1.5332, + "step": 4245 + }, + { + "epoch": 0.68, + "learning_rate": 4.856085844648964e-05, + "loss": 1.4775, + "step": 4246 + }, + { + "epoch": 0.68, + "learning_rate": 4.8559995625881124e-05, + "loss": 1.4881, + "step": 4247 + }, + { + "epoch": 0.68, + "learning_rate": 4.855913255437297e-05, + "loss": 1.5239, + "step": 4248 + }, + { + "epoch": 0.68, + "learning_rate": 4.8558269231974346e-05, + "loss": 1.5081, + "step": 4249 + }, + { + "epoch": 0.68, + "learning_rate": 4.855740565869446e-05, + "loss": 1.5081, + "step": 4250 + }, + { + "epoch": 0.68, + "learning_rate": 4.8556541834542505e-05, + "loss": 1.5335, + "step": 4251 + }, + { + "epoch": 0.68, + "learning_rate": 4.8555677759527685e-05, + "loss": 1.5563, + "step": 4252 + }, + { + "epoch": 0.68, + "learning_rate": 4.85548134336592e-05, + "loss": 1.506, + "step": 4253 + }, + { + "epoch": 0.68, + "learning_rate": 4.855394885694624e-05, + "loss": 1.5625, + "step": 4254 + }, + { + "epoch": 0.68, + "learning_rate": 4.855308402939803e-05, + "loss": 1.5361, + "step": 4255 + }, + { + "epoch": 0.68, + "learning_rate": 4.855221895102379e-05, + "loss": 1.5938, + "step": 4256 + }, + { + "epoch": 0.68, + "learning_rate": 4.855135362183269e-05, + "loss": 1.5106, + "step": 4257 + }, + { + "epoch": 0.68, + "learning_rate": 4.8550488041834e-05, + "loss": 1.5273, + "step": 4258 + }, + { + "epoch": 0.68, + "learning_rate": 4.854962221103689e-05, + "loss": 1.5078, + "step": 4259 + }, + { + "epoch": 0.68, + "learning_rate": 4.854875612945061e-05, + "loss": 1.5205, + "step": 4260 + }, + { + "epoch": 0.68, + "learning_rate": 4.854788979708437e-05, + "loss": 1.514, + "step": 4261 + }, + { + "epoch": 0.68, + "learning_rate": 4.8547023213947394e-05, + "loss": 1.4661, + "step": 4262 + }, + { + "epoch": 0.68, + "learning_rate": 4.854615638004893e-05, + "loss": 1.5153, + "step": 4263 + }, + { + "epoch": 0.68, + "learning_rate": 4.854528929539819e-05, + "loss": 1.4546, + "step": 4264 + }, + { + "epoch": 0.68, + "learning_rate": 4.854442196000441e-05, + "loss": 1.5094, + "step": 4265 + }, + { + "epoch": 0.68, + "learning_rate": 4.8543554373876834e-05, + "loss": 1.5088, + "step": 4266 + }, + { + "epoch": 0.68, + "learning_rate": 4.8542686537024696e-05, + "loss": 1.5379, + "step": 4267 + }, + { + "epoch": 0.68, + "learning_rate": 4.854181844945724e-05, + "loss": 1.4334, + "step": 4268 + }, + { + "epoch": 0.68, + "learning_rate": 4.854095011118371e-05, + "loss": 1.5161, + "step": 4269 + }, + { + "epoch": 0.68, + "learning_rate": 4.854008152221336e-05, + "loss": 1.474, + "step": 4270 + }, + { + "epoch": 0.68, + "learning_rate": 4.853921268255542e-05, + "loss": 1.5262, + "step": 4271 + }, + { + "epoch": 0.68, + "learning_rate": 4.853834359221917e-05, + "loss": 1.6439, + "step": 4272 + }, + { + "epoch": 0.68, + "learning_rate": 4.853747425121384e-05, + "loss": 1.5002, + "step": 4273 + }, + { + "epoch": 0.68, + "learning_rate": 4.8536604659548706e-05, + "loss": 1.5558, + "step": 4274 + }, + { + "epoch": 0.68, + "learning_rate": 4.853573481723302e-05, + "loss": 1.5225, + "step": 4275 + }, + { + "epoch": 0.68, + "learning_rate": 4.8534864724276043e-05, + "loss": 1.5085, + "step": 4276 + }, + { + "epoch": 0.68, + "learning_rate": 4.8533994380687056e-05, + "loss": 1.5402, + "step": 4277 + }, + { + "epoch": 0.68, + "learning_rate": 4.85331237864753e-05, + "loss": 1.5863, + "step": 4278 + }, + { + "epoch": 0.68, + "learning_rate": 4.853225294165008e-05, + "loss": 1.5838, + "step": 4279 + }, + { + "epoch": 0.68, + "learning_rate": 4.853138184622064e-05, + "loss": 1.5228, + "step": 4280 + }, + { + "epoch": 0.68, + "learning_rate": 4.853051050019628e-05, + "loss": 1.5008, + "step": 4281 + }, + { + "epoch": 0.68, + "learning_rate": 4.852963890358626e-05, + "loss": 1.3898, + "step": 4282 + }, + { + "epoch": 0.68, + "learning_rate": 4.852876705639987e-05, + "loss": 1.5335, + "step": 4283 + }, + { + "epoch": 0.68, + "learning_rate": 4.85278949586464e-05, + "loss": 1.458, + "step": 4284 + }, + { + "epoch": 0.68, + "learning_rate": 4.852702261033514e-05, + "loss": 1.5754, + "step": 4285 + }, + { + "epoch": 0.68, + "learning_rate": 4.8526150011475365e-05, + "loss": 1.4736, + "step": 4286 + }, + { + "epoch": 0.68, + "learning_rate": 4.852527716207638e-05, + "loss": 1.4883, + "step": 4287 + }, + { + "epoch": 0.68, + "learning_rate": 4.852440406214747e-05, + "loss": 1.5249, + "step": 4288 + }, + { + "epoch": 0.68, + "learning_rate": 4.852353071169794e-05, + "loss": 1.4995, + "step": 4289 + }, + { + "epoch": 0.68, + "learning_rate": 4.8522657110737094e-05, + "loss": 1.5155, + "step": 4290 + }, + { + "epoch": 0.68, + "learning_rate": 4.8521783259274224e-05, + "loss": 1.527, + "step": 4291 + }, + { + "epoch": 0.68, + "learning_rate": 4.852090915731864e-05, + "loss": 1.4769, + "step": 4292 + }, + { + "epoch": 0.68, + "learning_rate": 4.852003480487966e-05, + "loss": 1.4959, + "step": 4293 + }, + { + "epoch": 0.68, + "learning_rate": 4.851916020196658e-05, + "loss": 1.5215, + "step": 4294 + }, + { + "epoch": 0.68, + "learning_rate": 4.851828534858874e-05, + "loss": 1.4878, + "step": 4295 + }, + { + "epoch": 0.68, + "learning_rate": 4.8517410244755427e-05, + "loss": 1.5562, + "step": 4296 + }, + { + "epoch": 0.68, + "learning_rate": 4.8516534890475975e-05, + "loss": 1.5534, + "step": 4297 + }, + { + "epoch": 0.68, + "learning_rate": 4.8515659285759705e-05, + "loss": 1.5041, + "step": 4298 + }, + { + "epoch": 0.68, + "learning_rate": 4.851478343061594e-05, + "loss": 1.5002, + "step": 4299 + }, + { + "epoch": 0.69, + "learning_rate": 4.851390732505401e-05, + "loss": 1.5558, + "step": 4300 + }, + { + "epoch": 0.69, + "learning_rate": 4.8513030969083236e-05, + "loss": 1.528, + "step": 4301 + }, + { + "epoch": 0.69, + "learning_rate": 4.851215436271296e-05, + "loss": 1.5445, + "step": 4302 + }, + { + "epoch": 0.69, + "learning_rate": 4.851127750595251e-05, + "loss": 1.4697, + "step": 4303 + }, + { + "epoch": 0.69, + "learning_rate": 4.851040039881124e-05, + "loss": 1.4419, + "step": 4304 + }, + { + "epoch": 0.69, + "learning_rate": 4.850952304129847e-05, + "loss": 1.544, + "step": 4305 + }, + { + "epoch": 0.69, + "learning_rate": 4.850864543342356e-05, + "loss": 1.4828, + "step": 4306 + }, + { + "epoch": 0.69, + "learning_rate": 4.850776757519584e-05, + "loss": 1.4694, + "step": 4307 + }, + { + "epoch": 0.69, + "learning_rate": 4.8506889466624675e-05, + "loss": 1.4645, + "step": 4308 + }, + { + "epoch": 0.69, + "learning_rate": 4.8506011107719405e-05, + "loss": 1.4868, + "step": 4309 + }, + { + "epoch": 0.69, + "learning_rate": 4.8505132498489384e-05, + "loss": 1.5264, + "step": 4310 + }, + { + "epoch": 0.69, + "learning_rate": 4.8504253638943976e-05, + "loss": 1.4785, + "step": 4311 + }, + { + "epoch": 0.69, + "learning_rate": 4.850337452909254e-05, + "loss": 1.5832, + "step": 4312 + }, + { + "epoch": 0.69, + "learning_rate": 4.8502495168944426e-05, + "loss": 1.4884, + "step": 4313 + }, + { + "epoch": 0.69, + "learning_rate": 4.850161555850902e-05, + "loss": 1.5156, + "step": 4314 + }, + { + "epoch": 0.69, + "learning_rate": 4.8500735697795666e-05, + "loss": 1.4979, + "step": 4315 + }, + { + "epoch": 0.69, + "learning_rate": 4.849985558681375e-05, + "loss": 1.4933, + "step": 4316 + }, + { + "epoch": 0.69, + "learning_rate": 4.849897522557263e-05, + "loss": 1.4315, + "step": 4317 + }, + { + "epoch": 0.69, + "learning_rate": 4.84980946140817e-05, + "loss": 1.5073, + "step": 4318 + }, + { + "epoch": 0.69, + "learning_rate": 4.8497213752350326e-05, + "loss": 1.5911, + "step": 4319 + }, + { + "epoch": 0.69, + "learning_rate": 4.849633264038789e-05, + "loss": 1.5693, + "step": 4320 + }, + { + "epoch": 0.69, + "learning_rate": 4.849545127820377e-05, + "loss": 1.5521, + "step": 4321 + }, + { + "epoch": 0.69, + "learning_rate": 4.8494569665807365e-05, + "loss": 1.474, + "step": 4322 + }, + { + "epoch": 0.69, + "learning_rate": 4.849368780320806e-05, + "loss": 1.5218, + "step": 4323 + }, + { + "epoch": 0.69, + "learning_rate": 4.8492805690415245e-05, + "loss": 1.4604, + "step": 4324 + }, + { + "epoch": 0.69, + "learning_rate": 4.8491923327438304e-05, + "loss": 1.5031, + "step": 4325 + }, + { + "epoch": 0.69, + "learning_rate": 4.849104071428664e-05, + "loss": 1.5034, + "step": 4326 + }, + { + "epoch": 0.69, + "learning_rate": 4.8490157850969666e-05, + "loss": 1.5566, + "step": 4327 + }, + { + "epoch": 0.69, + "learning_rate": 4.848927473749676e-05, + "loss": 1.4743, + "step": 4328 + }, + { + "epoch": 0.69, + "learning_rate": 4.8488391373877345e-05, + "loss": 1.5562, + "step": 4329 + }, + { + "epoch": 0.69, + "learning_rate": 4.848750776012082e-05, + "loss": 1.5376, + "step": 4330 + }, + { + "epoch": 0.69, + "learning_rate": 4.84866238962366e-05, + "loss": 1.5446, + "step": 4331 + }, + { + "epoch": 0.69, + "learning_rate": 4.848573978223409e-05, + "loss": 1.5418, + "step": 4332 + }, + { + "epoch": 0.69, + "learning_rate": 4.848485541812271e-05, + "loss": 1.5036, + "step": 4333 + }, + { + "epoch": 0.69, + "learning_rate": 4.8483970803911886e-05, + "loss": 1.4798, + "step": 4334 + }, + { + "epoch": 0.69, + "learning_rate": 4.848308593961102e-05, + "loss": 1.5145, + "step": 4335 + }, + { + "epoch": 0.69, + "learning_rate": 4.848220082522955e-05, + "loss": 1.5799, + "step": 4336 + }, + { + "epoch": 0.69, + "learning_rate": 4.84813154607769e-05, + "loss": 1.57, + "step": 4337 + }, + { + "epoch": 0.69, + "learning_rate": 4.8480429846262496e-05, + "loss": 1.5506, + "step": 4338 + }, + { + "epoch": 0.69, + "learning_rate": 4.847954398169576e-05, + "loss": 1.4782, + "step": 4339 + }, + { + "epoch": 0.69, + "learning_rate": 4.847865786708615e-05, + "loss": 1.5466, + "step": 4340 + }, + { + "epoch": 0.69, + "learning_rate": 4.8477771502443084e-05, + "loss": 1.5462, + "step": 4341 + }, + { + "epoch": 0.69, + "learning_rate": 4.8476884887776e-05, + "loss": 1.5355, + "step": 4342 + }, + { + "epoch": 0.69, + "learning_rate": 4.847599802309435e-05, + "loss": 1.5565, + "step": 4343 + }, + { + "epoch": 0.69, + "learning_rate": 4.8475110908407566e-05, + "loss": 1.5347, + "step": 4344 + }, + { + "epoch": 0.69, + "learning_rate": 4.8474223543725106e-05, + "loss": 1.5433, + "step": 4345 + }, + { + "epoch": 0.69, + "learning_rate": 4.847333592905642e-05, + "loss": 1.5345, + "step": 4346 + }, + { + "epoch": 0.69, + "learning_rate": 4.847244806441096e-05, + "loss": 1.5087, + "step": 4347 + }, + { + "epoch": 0.69, + "learning_rate": 4.8471559949798176e-05, + "loss": 1.4896, + "step": 4348 + }, + { + "epoch": 0.69, + "learning_rate": 4.847067158522752e-05, + "loss": 1.5855, + "step": 4349 + }, + { + "epoch": 0.69, + "learning_rate": 4.8469782970708464e-05, + "loss": 1.5597, + "step": 4350 + }, + { + "epoch": 0.69, + "learning_rate": 4.846889410625048e-05, + "loss": 1.5046, + "step": 4351 + }, + { + "epoch": 0.69, + "learning_rate": 4.846800499186301e-05, + "loss": 1.4219, + "step": 4352 + }, + { + "epoch": 0.69, + "learning_rate": 4.846711562755554e-05, + "loss": 1.5503, + "step": 4353 + }, + { + "epoch": 0.69, + "learning_rate": 4.8466226013337535e-05, + "loss": 1.4572, + "step": 4354 + }, + { + "epoch": 0.69, + "learning_rate": 4.846533614921847e-05, + "loss": 1.5252, + "step": 4355 + }, + { + "epoch": 0.69, + "learning_rate": 4.846444603520782e-05, + "loss": 1.5291, + "step": 4356 + }, + { + "epoch": 0.69, + "learning_rate": 4.8463555671315066e-05, + "loss": 1.568, + "step": 4357 + }, + { + "epoch": 0.69, + "learning_rate": 4.846266505754969e-05, + "loss": 1.501, + "step": 4358 + }, + { + "epoch": 0.69, + "learning_rate": 4.846177419392117e-05, + "loss": 1.523, + "step": 4359 + }, + { + "epoch": 0.69, + "learning_rate": 4.846088308043901e-05, + "loss": 1.5164, + "step": 4360 + }, + { + "epoch": 0.69, + "learning_rate": 4.845999171711268e-05, + "loss": 1.5467, + "step": 4361 + }, + { + "epoch": 0.69, + "learning_rate": 4.845910010395168e-05, + "loss": 1.5887, + "step": 4362 + }, + { + "epoch": 0.7, + "learning_rate": 4.845820824096551e-05, + "loss": 1.5181, + "step": 4363 + }, + { + "epoch": 0.7, + "learning_rate": 4.8457316128163666e-05, + "loss": 1.4601, + "step": 4364 + }, + { + "epoch": 0.7, + "learning_rate": 4.845642376555565e-05, + "loss": 1.562, + "step": 4365 + }, + { + "epoch": 0.7, + "learning_rate": 4.8455531153150945e-05, + "loss": 1.4754, + "step": 4366 + }, + { + "epoch": 0.7, + "learning_rate": 4.845463829095909e-05, + "loss": 1.5218, + "step": 4367 + }, + { + "epoch": 0.7, + "learning_rate": 4.845374517898957e-05, + "loss": 1.5171, + "step": 4368 + }, + { + "epoch": 0.7, + "learning_rate": 4.84528518172519e-05, + "loss": 1.5028, + "step": 4369 + }, + { + "epoch": 0.7, + "learning_rate": 4.845195820575561e-05, + "loss": 1.5291, + "step": 4370 + }, + { + "epoch": 0.7, + "learning_rate": 4.845106434451019e-05, + "loss": 1.5571, + "step": 4371 + }, + { + "epoch": 0.7, + "learning_rate": 4.845017023352517e-05, + "loss": 1.4512, + "step": 4372 + }, + { + "epoch": 0.7, + "learning_rate": 4.8449275872810086e-05, + "loss": 1.4778, + "step": 4373 + }, + { + "epoch": 0.7, + "learning_rate": 4.844838126237444e-05, + "loss": 1.4842, + "step": 4374 + }, + { + "epoch": 0.7, + "learning_rate": 4.844748640222778e-05, + "loss": 1.495, + "step": 4375 + }, + { + "epoch": 0.7, + "learning_rate": 4.844659129237962e-05, + "loss": 1.5109, + "step": 4376 + }, + { + "epoch": 0.7, + "learning_rate": 4.844569593283949e-05, + "loss": 1.5415, + "step": 4377 + }, + { + "epoch": 0.7, + "learning_rate": 4.844480032361694e-05, + "loss": 1.4984, + "step": 4378 + }, + { + "epoch": 0.7, + "learning_rate": 4.84439044647215e-05, + "loss": 1.5007, + "step": 4379 + }, + { + "epoch": 0.7, + "learning_rate": 4.844300835616271e-05, + "loss": 1.4915, + "step": 4380 + }, + { + "epoch": 0.7, + "learning_rate": 4.8442111997950114e-05, + "loss": 1.5711, + "step": 4381 + }, + { + "epoch": 0.7, + "learning_rate": 4.844121539009325e-05, + "loss": 1.5324, + "step": 4382 + }, + { + "epoch": 0.7, + "learning_rate": 4.8440318532601687e-05, + "loss": 1.4565, + "step": 4383 + }, + { + "epoch": 0.7, + "learning_rate": 4.843942142548495e-05, + "loss": 1.4954, + "step": 4384 + }, + { + "epoch": 0.7, + "learning_rate": 4.8438524068752613e-05, + "loss": 1.4439, + "step": 4385 + }, + { + "epoch": 0.7, + "learning_rate": 4.843762646241422e-05, + "loss": 1.5422, + "step": 4386 + }, + { + "epoch": 0.7, + "learning_rate": 4.8436728606479344e-05, + "loss": 1.561, + "step": 4387 + }, + { + "epoch": 0.7, + "learning_rate": 4.843583050095752e-05, + "loss": 1.5729, + "step": 4388 + }, + { + "epoch": 0.7, + "learning_rate": 4.843493214585835e-05, + "loss": 1.5498, + "step": 4389 + }, + { + "epoch": 0.7, + "learning_rate": 4.843403354119137e-05, + "loss": 1.5418, + "step": 4390 + }, + { + "epoch": 0.7, + "learning_rate": 4.843313468696616e-05, + "loss": 1.43, + "step": 4391 + }, + { + "epoch": 0.7, + "learning_rate": 4.843223558319229e-05, + "loss": 1.5389, + "step": 4392 + }, + { + "epoch": 0.7, + "learning_rate": 4.8431336229879345e-05, + "loss": 1.492, + "step": 4393 + }, + { + "epoch": 0.7, + "learning_rate": 4.8430436627036876e-05, + "loss": 1.5519, + "step": 4394 + }, + { + "epoch": 0.7, + "learning_rate": 4.84295367746745e-05, + "loss": 1.5322, + "step": 4395 + }, + { + "epoch": 0.7, + "learning_rate": 4.842863667280178e-05, + "loss": 1.4759, + "step": 4396 + }, + { + "epoch": 0.7, + "learning_rate": 4.84277363214283e-05, + "loss": 1.4852, + "step": 4397 + }, + { + "epoch": 0.7, + "learning_rate": 4.842683572056365e-05, + "loss": 1.5381, + "step": 4398 + }, + { + "epoch": 0.7, + "learning_rate": 4.842593487021742e-05, + "loss": 1.4842, + "step": 4399 + }, + { + "epoch": 0.7, + "learning_rate": 4.842503377039921e-05, + "loss": 1.5269, + "step": 4400 + }, + { + "epoch": 0.7, + "learning_rate": 4.8424132421118615e-05, + "loss": 1.518, + "step": 4401 + }, + { + "epoch": 0.7, + "learning_rate": 4.8423230822385227e-05, + "loss": 1.6738, + "step": 4402 + }, + { + "epoch": 0.7, + "learning_rate": 4.842232897420865e-05, + "loss": 1.5667, + "step": 4403 + }, + { + "epoch": 0.7, + "learning_rate": 4.842142687659849e-05, + "loss": 1.5275, + "step": 4404 + }, + { + "epoch": 0.7, + "learning_rate": 4.842052452956436e-05, + "loss": 1.5791, + "step": 4405 + }, + { + "epoch": 0.7, + "learning_rate": 4.841962193311585e-05, + "loss": 1.5845, + "step": 4406 + }, + { + "epoch": 0.7, + "learning_rate": 4.84187190872626e-05, + "loss": 1.6177, + "step": 4407 + }, + { + "epoch": 0.7, + "learning_rate": 4.84178159920142e-05, + "loss": 1.6394, + "step": 4408 + }, + { + "epoch": 0.7, + "learning_rate": 4.8416912647380275e-05, + "loss": 1.5094, + "step": 4409 + }, + { + "epoch": 0.7, + "learning_rate": 4.8416009053370445e-05, + "loss": 1.5812, + "step": 4410 + }, + { + "epoch": 0.7, + "learning_rate": 4.841510520999435e-05, + "loss": 1.5791, + "step": 4411 + }, + { + "epoch": 0.7, + "learning_rate": 4.8414201117261585e-05, + "loss": 1.5853, + "step": 4412 + }, + { + "epoch": 0.7, + "learning_rate": 4.84132967751818e-05, + "loss": 1.563, + "step": 4413 + }, + { + "epoch": 0.7, + "learning_rate": 4.841239218376462e-05, + "loss": 1.5596, + "step": 4414 + }, + { + "epoch": 0.7, + "learning_rate": 4.841148734301967e-05, + "loss": 1.5936, + "step": 4415 + }, + { + "epoch": 0.7, + "learning_rate": 4.84105822529566e-05, + "loss": 1.4618, + "step": 4416 + }, + { + "epoch": 0.7, + "learning_rate": 4.8409676913585044e-05, + "loss": 1.4958, + "step": 4417 + }, + { + "epoch": 0.7, + "learning_rate": 4.840877132491464e-05, + "loss": 1.5373, + "step": 4418 + }, + { + "epoch": 0.7, + "learning_rate": 4.8407865486955025e-05, + "loss": 1.5719, + "step": 4419 + }, + { + "epoch": 0.7, + "learning_rate": 4.840695939971586e-05, + "loss": 1.5589, + "step": 4420 + }, + { + "epoch": 0.7, + "learning_rate": 4.840605306320679e-05, + "loss": 1.4588, + "step": 4421 + }, + { + "epoch": 0.7, + "learning_rate": 4.840514647743746e-05, + "loss": 1.4704, + "step": 4422 + }, + { + "epoch": 0.7, + "learning_rate": 4.840423964241753e-05, + "loss": 1.6159, + "step": 4423 + }, + { + "epoch": 0.7, + "learning_rate": 4.840333255815666e-05, + "loss": 1.5842, + "step": 4424 + }, + { + "epoch": 0.7, + "learning_rate": 4.84024252246645e-05, + "loss": 1.5477, + "step": 4425 + }, + { + "epoch": 0.71, + "learning_rate": 4.840151764195073e-05, + "loss": 1.4948, + "step": 4426 + }, + { + "epoch": 0.71, + "learning_rate": 4.8400609810025e-05, + "loss": 1.5189, + "step": 4427 + }, + { + "epoch": 0.71, + "learning_rate": 4.8399701728896984e-05, + "loss": 1.5054, + "step": 4428 + }, + { + "epoch": 0.71, + "learning_rate": 4.839879339857635e-05, + "loss": 1.5004, + "step": 4429 + }, + { + "epoch": 0.71, + "learning_rate": 4.839788481907277e-05, + "loss": 1.5715, + "step": 4430 + }, + { + "epoch": 0.71, + "learning_rate": 4.839697599039592e-05, + "loss": 1.5964, + "step": 4431 + }, + { + "epoch": 0.71, + "learning_rate": 4.839606691255548e-05, + "loss": 1.557, + "step": 4432 + }, + { + "epoch": 0.71, + "learning_rate": 4.839515758556114e-05, + "loss": 1.516, + "step": 4433 + }, + { + "epoch": 0.71, + "learning_rate": 4.8394248009422574e-05, + "loss": 1.5445, + "step": 4434 + }, + { + "epoch": 0.71, + "learning_rate": 4.839333818414946e-05, + "loss": 1.653, + "step": 4435 + }, + { + "epoch": 0.71, + "learning_rate": 4.83924281097515e-05, + "loss": 1.5366, + "step": 4436 + }, + { + "epoch": 0.71, + "learning_rate": 4.839151778623839e-05, + "loss": 1.4832, + "step": 4437 + }, + { + "epoch": 0.71, + "learning_rate": 4.839060721361981e-05, + "loss": 1.5031, + "step": 4438 + }, + { + "epoch": 0.71, + "learning_rate": 4.8389696391905466e-05, + "loss": 1.5724, + "step": 4439 + }, + { + "epoch": 0.71, + "learning_rate": 4.838878532110506e-05, + "loss": 1.5654, + "step": 4440 + }, + { + "epoch": 0.71, + "learning_rate": 4.838787400122828e-05, + "loss": 1.535, + "step": 4441 + }, + { + "epoch": 0.71, + "learning_rate": 4.838696243228484e-05, + "loss": 1.5293, + "step": 4442 + }, + { + "epoch": 0.71, + "learning_rate": 4.838605061428446e-05, + "loss": 1.5682, + "step": 4443 + }, + { + "epoch": 0.71, + "learning_rate": 4.838513854723683e-05, + "loss": 1.529, + "step": 4444 + }, + { + "epoch": 0.71, + "learning_rate": 4.838422623115168e-05, + "loss": 1.4668, + "step": 4445 + }, + { + "epoch": 0.71, + "learning_rate": 4.838331366603871e-05, + "loss": 1.4919, + "step": 4446 + }, + { + "epoch": 0.71, + "learning_rate": 4.838240085190765e-05, + "loss": 1.5216, + "step": 4447 + }, + { + "epoch": 0.71, + "learning_rate": 4.838148778876822e-05, + "loss": 1.5376, + "step": 4448 + }, + { + "epoch": 0.71, + "learning_rate": 4.838057447663013e-05, + "loss": 1.5169, + "step": 4449 + }, + { + "epoch": 0.71, + "learning_rate": 4.837966091550313e-05, + "loss": 1.4946, + "step": 4450 + }, + { + "epoch": 0.71, + "learning_rate": 4.8378747105396926e-05, + "loss": 1.5267, + "step": 4451 + }, + { + "epoch": 0.71, + "learning_rate": 4.8377833046321264e-05, + "loss": 1.5391, + "step": 4452 + }, + { + "epoch": 0.71, + "learning_rate": 4.837691873828587e-05, + "loss": 1.5402, + "step": 4453 + }, + { + "epoch": 0.71, + "learning_rate": 4.837600418130048e-05, + "loss": 1.5065, + "step": 4454 + }, + { + "epoch": 0.71, + "learning_rate": 4.837508937537484e-05, + "loss": 1.5137, + "step": 4455 + }, + { + "epoch": 0.71, + "learning_rate": 4.8374174320518694e-05, + "loss": 1.4821, + "step": 4456 + }, + { + "epoch": 0.71, + "learning_rate": 4.837325901674178e-05, + "loss": 1.4694, + "step": 4457 + }, + { + "epoch": 0.71, + "learning_rate": 4.8372343464053846e-05, + "loss": 1.4906, + "step": 4458 + }, + { + "epoch": 0.71, + "learning_rate": 4.837142766246465e-05, + "loss": 1.4963, + "step": 4459 + }, + { + "epoch": 0.71, + "learning_rate": 4.8370511611983924e-05, + "loss": 1.5915, + "step": 4460 + }, + { + "epoch": 0.71, + "learning_rate": 4.8369595312621455e-05, + "loss": 1.485, + "step": 4461 + }, + { + "epoch": 0.71, + "learning_rate": 4.836867876438698e-05, + "loss": 1.5199, + "step": 4462 + }, + { + "epoch": 0.71, + "learning_rate": 4.8367761967290255e-05, + "loss": 1.5423, + "step": 4463 + }, + { + "epoch": 0.71, + "learning_rate": 4.836684492134106e-05, + "loss": 1.5643, + "step": 4464 + }, + { + "epoch": 0.71, + "learning_rate": 4.8365927626549144e-05, + "loss": 1.5015, + "step": 4465 + }, + { + "epoch": 0.71, + "learning_rate": 4.836501008292429e-05, + "loss": 1.5361, + "step": 4466 + }, + { + "epoch": 0.71, + "learning_rate": 4.8364092290476266e-05, + "loss": 1.4954, + "step": 4467 + }, + { + "epoch": 0.71, + "learning_rate": 4.836317424921484e-05, + "loss": 1.549, + "step": 4468 + }, + { + "epoch": 0.71, + "learning_rate": 4.836225595914979e-05, + "loss": 1.4873, + "step": 4469 + }, + { + "epoch": 0.71, + "learning_rate": 4.8361337420290906e-05, + "loss": 1.5431, + "step": 4470 + }, + { + "epoch": 0.71, + "learning_rate": 4.836041863264796e-05, + "loss": 1.5168, + "step": 4471 + }, + { + "epoch": 0.71, + "learning_rate": 4.8359499596230734e-05, + "loss": 1.5104, + "step": 4472 + }, + { + "epoch": 0.71, + "learning_rate": 4.835858031104902e-05, + "loss": 1.5186, + "step": 4473 + }, + { + "epoch": 0.71, + "learning_rate": 4.835766077711261e-05, + "loss": 1.4958, + "step": 4474 + }, + { + "epoch": 0.71, + "learning_rate": 4.8356740994431297e-05, + "loss": 1.613, + "step": 4475 + }, + { + "epoch": 0.71, + "learning_rate": 4.835582096301486e-05, + "loss": 1.4795, + "step": 4476 + }, + { + "epoch": 0.71, + "learning_rate": 4.835490068287312e-05, + "loss": 1.4596, + "step": 4477 + }, + { + "epoch": 0.71, + "learning_rate": 4.835398015401586e-05, + "loss": 1.4974, + "step": 4478 + }, + { + "epoch": 0.71, + "learning_rate": 4.8353059376452895e-05, + "loss": 1.4434, + "step": 4479 + }, + { + "epoch": 0.71, + "learning_rate": 4.835213835019402e-05, + "loss": 1.5464, + "step": 4480 + }, + { + "epoch": 0.71, + "learning_rate": 4.835121707524906e-05, + "loss": 1.4945, + "step": 4481 + }, + { + "epoch": 0.71, + "learning_rate": 4.83502955516278e-05, + "loss": 1.5384, + "step": 4482 + }, + { + "epoch": 0.71, + "learning_rate": 4.834937377934008e-05, + "loss": 1.4897, + "step": 4483 + }, + { + "epoch": 0.71, + "learning_rate": 4.83484517583957e-05, + "loss": 1.5977, + "step": 4484 + }, + { + "epoch": 0.71, + "learning_rate": 4.834752948880449e-05, + "loss": 1.5239, + "step": 4485 + }, + { + "epoch": 0.71, + "learning_rate": 4.834660697057626e-05, + "loss": 1.4759, + "step": 4486 + }, + { + "epoch": 0.71, + "learning_rate": 4.834568420372084e-05, + "loss": 1.5213, + "step": 4487 + }, + { + "epoch": 0.71, + "learning_rate": 4.8344761188248056e-05, + "loss": 1.6038, + "step": 4488 + }, + { + "epoch": 0.72, + "learning_rate": 4.8343837924167736e-05, + "loss": 1.5535, + "step": 4489 + }, + { + "epoch": 0.72, + "learning_rate": 4.834291441148972e-05, + "loss": 1.533, + "step": 4490 + }, + { + "epoch": 0.72, + "learning_rate": 4.834199065022384e-05, + "loss": 1.5181, + "step": 4491 + }, + { + "epoch": 0.72, + "learning_rate": 4.834106664037992e-05, + "loss": 1.4798, + "step": 4492 + }, + { + "epoch": 0.72, + "learning_rate": 4.834014238196782e-05, + "loss": 1.5846, + "step": 4493 + }, + { + "epoch": 0.72, + "learning_rate": 4.833921787499737e-05, + "loss": 1.5392, + "step": 4494 + }, + { + "epoch": 0.72, + "learning_rate": 4.833829311947842e-05, + "loss": 1.6034, + "step": 4495 + }, + { + "epoch": 0.72, + "learning_rate": 4.833736811542082e-05, + "loss": 1.563, + "step": 4496 + }, + { + "epoch": 0.72, + "learning_rate": 4.8336442862834416e-05, + "loss": 1.5093, + "step": 4497 + }, + { + "epoch": 0.72, + "learning_rate": 4.833551736172906e-05, + "loss": 1.5243, + "step": 4498 + }, + { + "epoch": 0.72, + "learning_rate": 4.833459161211462e-05, + "loss": 1.5194, + "step": 4499 + }, + { + "epoch": 0.72, + "learning_rate": 4.8333665614000935e-05, + "loss": 1.5711, + "step": 4500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8332739367397883e-05, + "loss": 1.4564, + "step": 4501 + }, + { + "epoch": 0.72, + "learning_rate": 4.8331812872315325e-05, + "loss": 1.523, + "step": 4502 + }, + { + "epoch": 0.72, + "learning_rate": 4.833088612876312e-05, + "loss": 1.5137, + "step": 4503 + }, + { + "epoch": 0.72, + "learning_rate": 4.8329959136751146e-05, + "loss": 1.5077, + "step": 4504 + }, + { + "epoch": 0.72, + "learning_rate": 4.8329031896289264e-05, + "loss": 1.495, + "step": 4505 + }, + { + "epoch": 0.72, + "learning_rate": 4.832810440738737e-05, + "loss": 1.5067, + "step": 4506 + }, + { + "epoch": 0.72, + "learning_rate": 4.832717667005532e-05, + "loss": 1.4738, + "step": 4507 + }, + { + "epoch": 0.72, + "learning_rate": 4.832624868430299e-05, + "loss": 1.5663, + "step": 4508 + }, + { + "epoch": 0.72, + "learning_rate": 4.832532045014029e-05, + "loss": 1.5111, + "step": 4509 + }, + { + "epoch": 0.72, + "learning_rate": 4.8324391967577075e-05, + "loss": 1.5549, + "step": 4510 + }, + { + "epoch": 0.72, + "learning_rate": 4.832346323662326e-05, + "loss": 1.5736, + "step": 4511 + }, + { + "epoch": 0.72, + "learning_rate": 4.832253425728871e-05, + "loss": 1.5111, + "step": 4512 + }, + { + "epoch": 0.72, + "learning_rate": 4.8321605029583326e-05, + "loss": 1.5488, + "step": 4513 + }, + { + "epoch": 0.72, + "learning_rate": 4.832067555351702e-05, + "loss": 1.5028, + "step": 4514 + }, + { + "epoch": 0.72, + "learning_rate": 4.8319745829099675e-05, + "loss": 1.5278, + "step": 4515 + }, + { + "epoch": 0.72, + "learning_rate": 4.8318815856341185e-05, + "loss": 1.5469, + "step": 4516 + }, + { + "epoch": 0.72, + "learning_rate": 4.831788563525147e-05, + "loss": 1.5824, + "step": 4517 + }, + { + "epoch": 0.72, + "learning_rate": 4.8316955165840434e-05, + "loss": 1.5272, + "step": 4518 + }, + { + "epoch": 0.72, + "learning_rate": 4.831602444811797e-05, + "loss": 1.5208, + "step": 4519 + }, + { + "epoch": 0.72, + "learning_rate": 4.8315093482094015e-05, + "loss": 1.578, + "step": 4520 + }, + { + "epoch": 0.72, + "learning_rate": 4.831416226777846e-05, + "loss": 1.4445, + "step": 4521 + }, + { + "epoch": 0.72, + "learning_rate": 4.831323080518124e-05, + "loss": 1.5122, + "step": 4522 + }, + { + "epoch": 0.72, + "learning_rate": 4.831229909431225e-05, + "loss": 1.5285, + "step": 4523 + }, + { + "epoch": 0.72, + "learning_rate": 4.831136713518144e-05, + "loss": 1.5195, + "step": 4524 + }, + { + "epoch": 0.72, + "learning_rate": 4.831043492779872e-05, + "loss": 1.4948, + "step": 4525 + }, + { + "epoch": 0.72, + "learning_rate": 4.8309502472174025e-05, + "loss": 1.5031, + "step": 4526 + }, + { + "epoch": 0.72, + "learning_rate": 4.8308569768317276e-05, + "loss": 1.5476, + "step": 4527 + }, + { + "epoch": 0.72, + "learning_rate": 4.830763681623841e-05, + "loss": 1.4653, + "step": 4528 + }, + { + "epoch": 0.72, + "learning_rate": 4.8306703615947354e-05, + "loss": 1.5955, + "step": 4529 + }, + { + "epoch": 0.72, + "learning_rate": 4.830577016745407e-05, + "loss": 1.52, + "step": 4530 + }, + { + "epoch": 0.72, + "learning_rate": 4.8304836470768475e-05, + "loss": 1.5186, + "step": 4531 + }, + { + "epoch": 0.72, + "learning_rate": 4.830390252590052e-05, + "loss": 1.4741, + "step": 4532 + }, + { + "epoch": 0.72, + "learning_rate": 4.830296833286016e-05, + "loss": 1.4842, + "step": 4533 + }, + { + "epoch": 0.72, + "learning_rate": 4.8302033891657316e-05, + "loss": 1.4997, + "step": 4534 + }, + { + "epoch": 0.72, + "learning_rate": 4.830109920230197e-05, + "loss": 1.4891, + "step": 4535 + }, + { + "epoch": 0.72, + "learning_rate": 4.830016426480406e-05, + "loss": 1.5161, + "step": 4536 + }, + { + "epoch": 0.72, + "learning_rate": 4.829922907917355e-05, + "loss": 1.5117, + "step": 4537 + }, + { + "epoch": 0.72, + "learning_rate": 4.8298293645420395e-05, + "loss": 1.5479, + "step": 4538 + }, + { + "epoch": 0.72, + "learning_rate": 4.829735796355456e-05, + "loss": 1.4237, + "step": 4539 + }, + { + "epoch": 0.72, + "learning_rate": 4.8296422033586e-05, + "loss": 1.513, + "step": 4540 + }, + { + "epoch": 0.72, + "learning_rate": 4.8295485855524694e-05, + "loss": 1.505, + "step": 4541 + }, + { + "epoch": 0.72, + "learning_rate": 4.829454942938061e-05, + "loss": 1.5203, + "step": 4542 + }, + { + "epoch": 0.72, + "learning_rate": 4.829361275516371e-05, + "loss": 1.4536, + "step": 4543 + }, + { + "epoch": 0.72, + "learning_rate": 4.829267583288398e-05, + "loss": 1.4603, + "step": 4544 + }, + { + "epoch": 0.72, + "learning_rate": 4.829173866255139e-05, + "loss": 1.5192, + "step": 4545 + }, + { + "epoch": 0.72, + "learning_rate": 4.829080124417593e-05, + "loss": 1.5508, + "step": 4546 + }, + { + "epoch": 0.72, + "learning_rate": 4.828986357776757e-05, + "loss": 1.4626, + "step": 4547 + }, + { + "epoch": 0.72, + "learning_rate": 4.82889256633363e-05, + "loss": 1.5726, + "step": 4548 + }, + { + "epoch": 0.72, + "learning_rate": 4.828798750089212e-05, + "loss": 1.4154, + "step": 4549 + }, + { + "epoch": 0.72, + "learning_rate": 4.828704909044501e-05, + "loss": 1.6354, + "step": 4550 + }, + { + "epoch": 0.73, + "learning_rate": 4.828611043200496e-05, + "loss": 1.4585, + "step": 4551 + }, + { + "epoch": 0.73, + "learning_rate": 4.8285171525581976e-05, + "loss": 1.4679, + "step": 4552 + }, + { + "epoch": 0.73, + "learning_rate": 4.828423237118605e-05, + "loss": 1.4562, + "step": 4553 + }, + { + "epoch": 0.73, + "learning_rate": 4.828329296882719e-05, + "loss": 1.5021, + "step": 4554 + }, + { + "epoch": 0.73, + "learning_rate": 4.828235331851539e-05, + "loss": 1.4985, + "step": 4555 + }, + { + "epoch": 0.73, + "learning_rate": 4.8281413420260666e-05, + "loss": 1.5651, + "step": 4556 + }, + { + "epoch": 0.73, + "learning_rate": 4.828047327407302e-05, + "loss": 1.4971, + "step": 4557 + }, + { + "epoch": 0.73, + "learning_rate": 4.827953287996246e-05, + "loss": 1.5905, + "step": 4558 + }, + { + "epoch": 0.73, + "learning_rate": 4.827859223793902e-05, + "loss": 1.5389, + "step": 4559 + }, + { + "epoch": 0.73, + "learning_rate": 4.827765134801271e-05, + "loss": 1.4749, + "step": 4560 + }, + { + "epoch": 0.73, + "learning_rate": 4.8276710210193535e-05, + "loss": 1.5234, + "step": 4561 + }, + { + "epoch": 0.73, + "learning_rate": 4.827576882449153e-05, + "loss": 1.4966, + "step": 4562 + }, + { + "epoch": 0.73, + "learning_rate": 4.8274827190916715e-05, + "loss": 1.5166, + "step": 4563 + }, + { + "epoch": 0.73, + "learning_rate": 4.8273885309479125e-05, + "loss": 1.5132, + "step": 4564 + }, + { + "epoch": 0.73, + "learning_rate": 4.827294318018879e-05, + "loss": 1.4461, + "step": 4565 + }, + { + "epoch": 0.73, + "learning_rate": 4.8272000803055737e-05, + "loss": 1.5156, + "step": 4566 + }, + { + "epoch": 0.73, + "learning_rate": 4.827105817809e-05, + "loss": 1.4727, + "step": 4567 + }, + { + "epoch": 0.73, + "learning_rate": 4.827011530530162e-05, + "loss": 1.5594, + "step": 4568 + }, + { + "epoch": 0.73, + "learning_rate": 4.826917218470065e-05, + "loss": 1.5076, + "step": 4569 + }, + { + "epoch": 0.73, + "learning_rate": 4.826822881629711e-05, + "loss": 1.5288, + "step": 4570 + }, + { + "epoch": 0.73, + "learning_rate": 4.826728520010107e-05, + "loss": 1.519, + "step": 4571 + }, + { + "epoch": 0.73, + "learning_rate": 4.826634133612257e-05, + "loss": 1.4797, + "step": 4572 + }, + { + "epoch": 0.73, + "learning_rate": 4.826539722437165e-05, + "loss": 1.5244, + "step": 4573 + }, + { + "epoch": 0.73, + "learning_rate": 4.826445286485838e-05, + "loss": 1.5552, + "step": 4574 + }, + { + "epoch": 0.73, + "learning_rate": 4.8263508257592814e-05, + "loss": 1.4927, + "step": 4575 + }, + { + "epoch": 0.73, + "learning_rate": 4.8262563402585e-05, + "loss": 1.5498, + "step": 4576 + }, + { + "epoch": 0.73, + "learning_rate": 4.8261618299845016e-05, + "loss": 1.4839, + "step": 4577 + }, + { + "epoch": 0.73, + "learning_rate": 4.826067294938292e-05, + "loss": 1.4559, + "step": 4578 + }, + { + "epoch": 0.73, + "learning_rate": 4.8259727351208776e-05, + "loss": 1.5653, + "step": 4579 + }, + { + "epoch": 0.73, + "learning_rate": 4.825878150533266e-05, + "loss": 1.4779, + "step": 4580 + }, + { + "epoch": 0.73, + "learning_rate": 4.8257835411764643e-05, + "loss": 1.5941, + "step": 4581 + }, + { + "epoch": 0.73, + "learning_rate": 4.825688907051479e-05, + "loss": 1.5462, + "step": 4582 + }, + { + "epoch": 0.73, + "learning_rate": 4.8255942481593194e-05, + "loss": 1.5101, + "step": 4583 + }, + { + "epoch": 0.73, + "learning_rate": 4.825499564500993e-05, + "loss": 1.4829, + "step": 4584 + }, + { + "epoch": 0.73, + "learning_rate": 4.825404856077508e-05, + "loss": 1.5107, + "step": 4585 + }, + { + "epoch": 0.73, + "learning_rate": 4.825310122889873e-05, + "loss": 1.5226, + "step": 4586 + }, + { + "epoch": 0.73, + "learning_rate": 4.825215364939097e-05, + "loss": 1.5277, + "step": 4587 + }, + { + "epoch": 0.73, + "learning_rate": 4.825120582226189e-05, + "loss": 1.527, + "step": 4588 + }, + { + "epoch": 0.73, + "learning_rate": 4.825025774752159e-05, + "loss": 1.5295, + "step": 4589 + }, + { + "epoch": 0.73, + "learning_rate": 4.824930942518016e-05, + "loss": 1.4884, + "step": 4590 + }, + { + "epoch": 0.73, + "learning_rate": 4.824836085524769e-05, + "loss": 1.5736, + "step": 4591 + }, + { + "epoch": 0.73, + "learning_rate": 4.8247412037734296e-05, + "loss": 1.4668, + "step": 4592 + }, + { + "epoch": 0.73, + "learning_rate": 4.824646297265008e-05, + "loss": 1.5088, + "step": 4593 + }, + { + "epoch": 0.73, + "learning_rate": 4.824551366000514e-05, + "loss": 1.5706, + "step": 4594 + }, + { + "epoch": 0.73, + "learning_rate": 4.8244564099809596e-05, + "loss": 1.4862, + "step": 4595 + }, + { + "epoch": 0.73, + "learning_rate": 4.824361429207356e-05, + "loss": 1.509, + "step": 4596 + }, + { + "epoch": 0.73, + "learning_rate": 4.8242664236807135e-05, + "loss": 1.5422, + "step": 4597 + }, + { + "epoch": 0.73, + "learning_rate": 4.824171393402046e-05, + "loss": 1.5781, + "step": 4598 + }, + { + "epoch": 0.73, + "learning_rate": 4.824076338372363e-05, + "loss": 1.5197, + "step": 4599 + }, + { + "epoch": 0.73, + "learning_rate": 4.823981258592678e-05, + "loss": 1.5073, + "step": 4600 + }, + { + "epoch": 0.73, + "learning_rate": 4.823886154064004e-05, + "loss": 1.4902, + "step": 4601 + }, + { + "epoch": 0.73, + "learning_rate": 4.823791024787353e-05, + "loss": 1.4731, + "step": 4602 + }, + { + "epoch": 0.73, + "learning_rate": 4.823695870763739e-05, + "loss": 1.4894, + "step": 4603 + }, + { + "epoch": 0.73, + "learning_rate": 4.823600691994174e-05, + "loss": 1.5684, + "step": 4604 + }, + { + "epoch": 0.73, + "learning_rate": 4.823505488479673e-05, + "loss": 1.5073, + "step": 4605 + }, + { + "epoch": 0.73, + "learning_rate": 4.8234102602212485e-05, + "loss": 1.4626, + "step": 4606 + }, + { + "epoch": 0.73, + "learning_rate": 4.8233150072199165e-05, + "loss": 1.5329, + "step": 4607 + }, + { + "epoch": 0.73, + "learning_rate": 4.823219729476689e-05, + "loss": 1.5156, + "step": 4608 + }, + { + "epoch": 0.73, + "learning_rate": 4.8231244269925826e-05, + "loss": 1.4491, + "step": 4609 + }, + { + "epoch": 0.73, + "learning_rate": 4.823029099768611e-05, + "loss": 1.4924, + "step": 4610 + }, + { + "epoch": 0.73, + "learning_rate": 4.82293374780579e-05, + "loss": 1.5176, + "step": 4611 + }, + { + "epoch": 0.73, + "learning_rate": 4.822838371105134e-05, + "loss": 1.5395, + "step": 4612 + }, + { + "epoch": 0.73, + "learning_rate": 4.822742969667661e-05, + "loss": 1.4718, + "step": 4613 + }, + { + "epoch": 0.74, + "learning_rate": 4.8226475434943844e-05, + "loss": 1.4953, + "step": 4614 + }, + { + "epoch": 0.74, + "learning_rate": 4.822552092586322e-05, + "loss": 1.5067, + "step": 4615 + }, + { + "epoch": 0.74, + "learning_rate": 4.82245661694449e-05, + "loss": 1.5288, + "step": 4616 + }, + { + "epoch": 0.74, + "learning_rate": 4.822361116569905e-05, + "loss": 1.5321, + "step": 4617 + }, + { + "epoch": 0.74, + "learning_rate": 4.822265591463584e-05, + "loss": 1.479, + "step": 4618 + }, + { + "epoch": 0.74, + "learning_rate": 4.8221700416265444e-05, + "loss": 1.4665, + "step": 4619 + }, + { + "epoch": 0.74, + "learning_rate": 4.822074467059804e-05, + "loss": 1.7119, + "step": 4620 + }, + { + "epoch": 0.74, + "learning_rate": 4.8219788677643796e-05, + "loss": 1.5133, + "step": 4621 + }, + { + "epoch": 0.74, + "learning_rate": 4.82188324374129e-05, + "loss": 1.4587, + "step": 4622 + }, + { + "epoch": 0.74, + "learning_rate": 4.821787594991554e-05, + "loss": 1.6393, + "step": 4623 + }, + { + "epoch": 0.74, + "learning_rate": 4.8216919215161894e-05, + "loss": 1.5221, + "step": 4624 + }, + { + "epoch": 0.74, + "learning_rate": 4.821596223316215e-05, + "loss": 1.5314, + "step": 4625 + }, + { + "epoch": 0.74, + "learning_rate": 4.821500500392651e-05, + "loss": 1.5682, + "step": 4626 + }, + { + "epoch": 0.74, + "learning_rate": 4.821404752746516e-05, + "loss": 1.5615, + "step": 4627 + }, + { + "epoch": 0.74, + "learning_rate": 4.8213089803788294e-05, + "loss": 1.5622, + "step": 4628 + }, + { + "epoch": 0.74, + "learning_rate": 4.8212131832906115e-05, + "loss": 1.5648, + "step": 4629 + }, + { + "epoch": 0.74, + "learning_rate": 4.821117361482883e-05, + "loss": 1.4982, + "step": 4630 + }, + { + "epoch": 0.74, + "learning_rate": 4.8210215149566634e-05, + "loss": 1.5195, + "step": 4631 + }, + { + "epoch": 0.74, + "learning_rate": 4.8209256437129735e-05, + "loss": 1.4549, + "step": 4632 + }, + { + "epoch": 0.74, + "learning_rate": 4.820829747752835e-05, + "loss": 1.5326, + "step": 4633 + }, + { + "epoch": 0.74, + "learning_rate": 4.820733827077268e-05, + "loss": 1.5325, + "step": 4634 + }, + { + "epoch": 0.74, + "learning_rate": 4.8206378816872956e-05, + "loss": 1.5461, + "step": 4635 + }, + { + "epoch": 0.74, + "learning_rate": 4.820541911583938e-05, + "loss": 1.5049, + "step": 4636 + }, + { + "epoch": 0.74, + "learning_rate": 4.820445916768219e-05, + "loss": 1.5839, + "step": 4637 + }, + { + "epoch": 0.74, + "learning_rate": 4.8203498972411585e-05, + "loss": 1.5876, + "step": 4638 + }, + { + "epoch": 0.74, + "learning_rate": 4.820253853003781e-05, + "loss": 1.5612, + "step": 4639 + }, + { + "epoch": 0.74, + "learning_rate": 4.8201577840571075e-05, + "loss": 1.6125, + "step": 4640 + }, + { + "epoch": 0.74, + "learning_rate": 4.820061690402163e-05, + "loss": 1.5329, + "step": 4641 + }, + { + "epoch": 0.74, + "learning_rate": 4.819965572039971e-05, + "loss": 1.4556, + "step": 4642 + }, + { + "epoch": 0.74, + "learning_rate": 4.819869428971553e-05, + "loss": 1.5304, + "step": 4643 + }, + { + "epoch": 0.74, + "learning_rate": 4.819773261197934e-05, + "loss": 1.4801, + "step": 4644 + }, + { + "epoch": 0.74, + "learning_rate": 4.8196770687201385e-05, + "loss": 1.5272, + "step": 4645 + }, + { + "epoch": 0.74, + "learning_rate": 4.819580851539191e-05, + "loss": 1.5638, + "step": 4646 + }, + { + "epoch": 0.74, + "learning_rate": 4.819484609656114e-05, + "loss": 1.5103, + "step": 4647 + }, + { + "epoch": 0.74, + "learning_rate": 4.819388343071936e-05, + "loss": 1.5601, + "step": 4648 + }, + { + "epoch": 0.74, + "learning_rate": 4.8192920517876796e-05, + "loss": 1.5337, + "step": 4649 + }, + { + "epoch": 0.74, + "learning_rate": 4.8191957358043704e-05, + "loss": 1.4842, + "step": 4650 + }, + { + "epoch": 0.74, + "learning_rate": 4.8190993951230354e-05, + "loss": 1.5347, + "step": 4651 + }, + { + "epoch": 0.74, + "learning_rate": 4.819003029744698e-05, + "loss": 1.4863, + "step": 4652 + }, + { + "epoch": 0.74, + "learning_rate": 4.8189066396703886e-05, + "loss": 1.4821, + "step": 4653 + }, + { + "epoch": 0.74, + "learning_rate": 4.81881022490113e-05, + "loss": 1.513, + "step": 4654 + }, + { + "epoch": 0.74, + "learning_rate": 4.818713785437951e-05, + "loss": 1.4771, + "step": 4655 + }, + { + "epoch": 0.74, + "learning_rate": 4.818617321281877e-05, + "loss": 1.5286, + "step": 4656 + }, + { + "epoch": 0.74, + "learning_rate": 4.8185208324339374e-05, + "loss": 1.5031, + "step": 4657 + }, + { + "epoch": 0.74, + "learning_rate": 4.8184243188951586e-05, + "loss": 1.5633, + "step": 4658 + }, + { + "epoch": 0.74, + "learning_rate": 4.8183277806665675e-05, + "loss": 1.5706, + "step": 4659 + }, + { + "epoch": 0.74, + "learning_rate": 4.8182312177491926e-05, + "loss": 1.4969, + "step": 4660 + }, + { + "epoch": 0.74, + "learning_rate": 4.818134630144063e-05, + "loss": 1.4196, + "step": 4661 + }, + { + "epoch": 0.74, + "learning_rate": 4.8180380178522076e-05, + "loss": 1.5599, + "step": 4662 + }, + { + "epoch": 0.74, + "learning_rate": 4.817941380874654e-05, + "loss": 1.5112, + "step": 4663 + }, + { + "epoch": 0.74, + "learning_rate": 4.8178447192124326e-05, + "loss": 1.5099, + "step": 4664 + }, + { + "epoch": 0.74, + "learning_rate": 4.8177480328665713e-05, + "loss": 1.5186, + "step": 4665 + }, + { + "epoch": 0.74, + "learning_rate": 4.8176513218381015e-05, + "loss": 1.5236, + "step": 4666 + }, + { + "epoch": 0.74, + "learning_rate": 4.817554586128051e-05, + "loss": 1.4842, + "step": 4667 + }, + { + "epoch": 0.74, + "learning_rate": 4.817457825737452e-05, + "loss": 1.5098, + "step": 4668 + }, + { + "epoch": 0.74, + "learning_rate": 4.817361040667335e-05, + "loss": 1.4766, + "step": 4669 + }, + { + "epoch": 0.74, + "learning_rate": 4.817264230918729e-05, + "loss": 1.4943, + "step": 4670 + }, + { + "epoch": 0.74, + "learning_rate": 4.817167396492665e-05, + "loss": 1.3714, + "step": 4671 + }, + { + "epoch": 0.74, + "learning_rate": 4.817070537390176e-05, + "loss": 1.5306, + "step": 4672 + }, + { + "epoch": 0.74, + "learning_rate": 4.816973653612292e-05, + "loss": 1.4579, + "step": 4673 + }, + { + "epoch": 0.74, + "learning_rate": 4.8168767451600456e-05, + "loss": 1.5773, + "step": 4674 + }, + { + "epoch": 0.74, + "learning_rate": 4.8167798120344684e-05, + "loss": 1.4963, + "step": 4675 + }, + { + "epoch": 0.74, + "learning_rate": 4.816682854236593e-05, + "loss": 1.5386, + "step": 4676 + }, + { + "epoch": 0.75, + "learning_rate": 4.816585871767451e-05, + "loss": 1.529, + "step": 4677 + }, + { + "epoch": 0.75, + "learning_rate": 4.8164888646280756e-05, + "loss": 1.5573, + "step": 4678 + }, + { + "epoch": 0.75, + "learning_rate": 4.8163918328195015e-05, + "loss": 1.5397, + "step": 4679 + }, + { + "epoch": 0.75, + "learning_rate": 4.81629477634276e-05, + "loss": 1.5124, + "step": 4680 + }, + { + "epoch": 0.75, + "learning_rate": 4.816197695198885e-05, + "loss": 1.5025, + "step": 4681 + }, + { + "epoch": 0.75, + "learning_rate": 4.816100589388911e-05, + "loss": 1.499, + "step": 4682 + }, + { + "epoch": 0.75, + "learning_rate": 4.816003458913872e-05, + "loss": 1.4823, + "step": 4683 + }, + { + "epoch": 0.75, + "learning_rate": 4.815906303774802e-05, + "loss": 1.4722, + "step": 4684 + }, + { + "epoch": 0.75, + "learning_rate": 4.8158091239727364e-05, + "loss": 1.5848, + "step": 4685 + }, + { + "epoch": 0.75, + "learning_rate": 4.8157119195087096e-05, + "loss": 1.5309, + "step": 4686 + }, + { + "epoch": 0.75, + "learning_rate": 4.815614690383756e-05, + "loss": 1.5658, + "step": 4687 + }, + { + "epoch": 0.75, + "learning_rate": 4.8155174365989125e-05, + "loss": 1.5347, + "step": 4688 + }, + { + "epoch": 0.75, + "learning_rate": 4.815420158155214e-05, + "loss": 1.5374, + "step": 4689 + }, + { + "epoch": 0.75, + "learning_rate": 4.815322855053697e-05, + "loss": 1.5132, + "step": 4690 + }, + { + "epoch": 0.75, + "learning_rate": 4.815225527295396e-05, + "loss": 1.5474, + "step": 4691 + }, + { + "epoch": 0.75, + "learning_rate": 4.8151281748813496e-05, + "loss": 1.4692, + "step": 4692 + }, + { + "epoch": 0.75, + "learning_rate": 4.815030797812593e-05, + "loss": 1.5007, + "step": 4693 + }, + { + "epoch": 0.75, + "learning_rate": 4.8149333960901647e-05, + "loss": 1.5285, + "step": 4694 + }, + { + "epoch": 0.75, + "learning_rate": 4.8148359697151005e-05, + "loss": 1.495, + "step": 4695 + }, + { + "epoch": 0.75, + "learning_rate": 4.8147385186884394e-05, + "loss": 1.5646, + "step": 4696 + }, + { + "epoch": 0.75, + "learning_rate": 4.8146410430112175e-05, + "loss": 1.5228, + "step": 4697 + }, + { + "epoch": 0.75, + "learning_rate": 4.814543542684474e-05, + "loss": 1.5327, + "step": 4698 + }, + { + "epoch": 0.75, + "learning_rate": 4.8144460177092475e-05, + "loss": 1.4665, + "step": 4699 + }, + { + "epoch": 0.75, + "learning_rate": 4.814348468086576e-05, + "loss": 1.5205, + "step": 4700 + }, + { + "epoch": 0.75, + "learning_rate": 4.814250893817498e-05, + "loss": 1.4992, + "step": 4701 + }, + { + "epoch": 0.75, + "learning_rate": 4.814153294903053e-05, + "loss": 1.5313, + "step": 4702 + }, + { + "epoch": 0.75, + "learning_rate": 4.8140556713442805e-05, + "loss": 1.5306, + "step": 4703 + }, + { + "epoch": 0.75, + "learning_rate": 4.8139580231422196e-05, + "loss": 1.445, + "step": 4704 + }, + { + "epoch": 0.75, + "learning_rate": 4.813860350297911e-05, + "loss": 1.5444, + "step": 4705 + }, + { + "epoch": 0.75, + "learning_rate": 4.813762652812394e-05, + "loss": 1.4997, + "step": 4706 + }, + { + "epoch": 0.75, + "learning_rate": 4.81366493068671e-05, + "loss": 1.4922, + "step": 4707 + }, + { + "epoch": 0.75, + "learning_rate": 4.813567183921899e-05, + "loss": 1.514, + "step": 4708 + }, + { + "epoch": 0.75, + "learning_rate": 4.813469412519002e-05, + "loss": 1.5225, + "step": 4709 + }, + { + "epoch": 0.75, + "learning_rate": 4.8133716164790606e-05, + "loss": 1.5197, + "step": 4710 + }, + { + "epoch": 0.75, + "learning_rate": 4.813273795803116e-05, + "loss": 1.4883, + "step": 4711 + }, + { + "epoch": 0.75, + "learning_rate": 4.813175950492209e-05, + "loss": 1.4369, + "step": 4712 + }, + { + "epoch": 0.75, + "learning_rate": 4.8130780805473827e-05, + "loss": 1.5378, + "step": 4713 + }, + { + "epoch": 0.75, + "learning_rate": 4.81298018596968e-05, + "loss": 1.5929, + "step": 4714 + }, + { + "epoch": 0.75, + "learning_rate": 4.8128822667601424e-05, + "loss": 1.585, + "step": 4715 + }, + { + "epoch": 0.75, + "learning_rate": 4.8127843229198125e-05, + "loss": 1.5667, + "step": 4716 + }, + { + "epoch": 0.75, + "learning_rate": 4.8126863544497336e-05, + "loss": 1.5041, + "step": 4717 + }, + { + "epoch": 0.75, + "learning_rate": 4.8125883613509494e-05, + "loss": 1.5002, + "step": 4718 + }, + { + "epoch": 0.75, + "learning_rate": 4.8124903436245026e-05, + "loss": 1.5843, + "step": 4719 + }, + { + "epoch": 0.75, + "learning_rate": 4.812392301271438e-05, + "loss": 1.5443, + "step": 4720 + }, + { + "epoch": 0.75, + "learning_rate": 4.8122942342928e-05, + "loss": 1.5917, + "step": 4721 + }, + { + "epoch": 0.75, + "learning_rate": 4.812196142689631e-05, + "loss": 1.5169, + "step": 4722 + }, + { + "epoch": 0.75, + "learning_rate": 4.812098026462978e-05, + "loss": 1.5475, + "step": 4723 + }, + { + "epoch": 0.75, + "learning_rate": 4.811999885613884e-05, + "loss": 1.5563, + "step": 4724 + }, + { + "epoch": 0.75, + "learning_rate": 4.8119017201433946e-05, + "loss": 1.5073, + "step": 4725 + }, + { + "epoch": 0.75, + "learning_rate": 4.811803530052556e-05, + "loss": 1.4772, + "step": 4726 + }, + { + "epoch": 0.75, + "learning_rate": 4.8117053153424127e-05, + "loss": 1.5591, + "step": 4727 + }, + { + "epoch": 0.75, + "learning_rate": 4.811607076014012e-05, + "loss": 1.5199, + "step": 4728 + }, + { + "epoch": 0.75, + "learning_rate": 4.811508812068399e-05, + "loss": 1.5371, + "step": 4729 + }, + { + "epoch": 0.75, + "learning_rate": 4.8114105235066206e-05, + "loss": 1.4795, + "step": 4730 + }, + { + "epoch": 0.75, + "learning_rate": 4.811312210329724e-05, + "loss": 1.5171, + "step": 4731 + }, + { + "epoch": 0.75, + "learning_rate": 4.8112138725387544e-05, + "loss": 1.5044, + "step": 4732 + }, + { + "epoch": 0.75, + "learning_rate": 4.8111155101347605e-05, + "loss": 1.4401, + "step": 4733 + }, + { + "epoch": 0.75, + "learning_rate": 4.81101712311879e-05, + "loss": 1.4889, + "step": 4734 + }, + { + "epoch": 0.75, + "learning_rate": 4.81091871149189e-05, + "loss": 1.478, + "step": 4735 + }, + { + "epoch": 0.75, + "learning_rate": 4.810820275255109e-05, + "loss": 1.4661, + "step": 4736 + }, + { + "epoch": 0.75, + "learning_rate": 4.810721814409494e-05, + "loss": 1.5267, + "step": 4737 + }, + { + "epoch": 0.75, + "learning_rate": 4.810623328956096e-05, + "loss": 1.4995, + "step": 4738 + }, + { + "epoch": 0.75, + "learning_rate": 4.810524818895962e-05, + "loss": 1.5054, + "step": 4739 + }, + { + "epoch": 0.76, + "learning_rate": 4.81042628423014e-05, + "loss": 1.4855, + "step": 4740 + }, + { + "epoch": 0.76, + "learning_rate": 4.8103277249596826e-05, + "loss": 1.4535, + "step": 4741 + }, + { + "epoch": 0.76, + "learning_rate": 4.810229141085637e-05, + "loss": 1.4943, + "step": 4742 + }, + { + "epoch": 0.76, + "learning_rate": 4.810130532609053e-05, + "loss": 1.6147, + "step": 4743 + }, + { + "epoch": 0.76, + "learning_rate": 4.8100318995309826e-05, + "loss": 1.465, + "step": 4744 + }, + { + "epoch": 0.76, + "learning_rate": 4.8099332418524735e-05, + "loss": 1.5049, + "step": 4745 + }, + { + "epoch": 0.76, + "learning_rate": 4.8098345595745787e-05, + "loss": 1.5111, + "step": 4746 + }, + { + "epoch": 0.76, + "learning_rate": 4.8097358526983476e-05, + "loss": 1.5711, + "step": 4747 + }, + { + "epoch": 0.76, + "learning_rate": 4.8096371212248327e-05, + "loss": 1.5104, + "step": 4748 + }, + { + "epoch": 0.76, + "learning_rate": 4.809538365155084e-05, + "loss": 1.5135, + "step": 4749 + }, + { + "epoch": 0.76, + "learning_rate": 4.809439584490154e-05, + "loss": 1.5701, + "step": 4750 + }, + { + "epoch": 0.76, + "learning_rate": 4.809340779231094e-05, + "loss": 1.4779, + "step": 4751 + }, + { + "epoch": 0.76, + "learning_rate": 4.809241949378958e-05, + "loss": 1.519, + "step": 4752 + }, + { + "epoch": 0.76, + "learning_rate": 4.809143094934796e-05, + "loss": 1.4201, + "step": 4753 + }, + { + "epoch": 0.76, + "learning_rate": 4.809044215899663e-05, + "loss": 1.4707, + "step": 4754 + }, + { + "epoch": 0.76, + "learning_rate": 4.80894531227461e-05, + "loss": 1.5018, + "step": 4755 + }, + { + "epoch": 0.76, + "learning_rate": 4.808846384060691e-05, + "loss": 1.5592, + "step": 4756 + }, + { + "epoch": 0.76, + "learning_rate": 4.808747431258961e-05, + "loss": 1.5527, + "step": 4757 + }, + { + "epoch": 0.76, + "learning_rate": 4.808648453870472e-05, + "loss": 1.4746, + "step": 4758 + }, + { + "epoch": 0.76, + "learning_rate": 4.8085494518962785e-05, + "loss": 1.5838, + "step": 4759 + }, + { + "epoch": 0.76, + "learning_rate": 4.808450425337435e-05, + "loss": 1.4852, + "step": 4760 + }, + { + "epoch": 0.76, + "learning_rate": 4.808351374194996e-05, + "loss": 1.4945, + "step": 4761 + }, + { + "epoch": 0.76, + "learning_rate": 4.8082522984700154e-05, + "loss": 1.5713, + "step": 4762 + }, + { + "epoch": 0.76, + "learning_rate": 4.8081531981635504e-05, + "loss": 1.5267, + "step": 4763 + }, + { + "epoch": 0.76, + "learning_rate": 4.808054073276655e-05, + "loss": 1.4849, + "step": 4764 + }, + { + "epoch": 0.76, + "learning_rate": 4.807954923810384e-05, + "loss": 1.4811, + "step": 4765 + }, + { + "epoch": 0.76, + "learning_rate": 4.807855749765795e-05, + "loss": 1.5194, + "step": 4766 + }, + { + "epoch": 0.76, + "learning_rate": 4.8077565511439435e-05, + "loss": 1.5042, + "step": 4767 + }, + { + "epoch": 0.76, + "learning_rate": 4.8076573279458855e-05, + "loss": 1.4998, + "step": 4768 + }, + { + "epoch": 0.76, + "learning_rate": 4.8075580801726784e-05, + "loss": 1.5781, + "step": 4769 + }, + { + "epoch": 0.76, + "learning_rate": 4.807458807825378e-05, + "loss": 1.48, + "step": 4770 + }, + { + "epoch": 0.76, + "learning_rate": 4.807359510905043e-05, + "loss": 1.472, + "step": 4771 + }, + { + "epoch": 0.76, + "learning_rate": 4.8072601894127295e-05, + "loss": 1.4046, + "step": 4772 + }, + { + "epoch": 0.76, + "learning_rate": 4.807160843349496e-05, + "loss": 1.4855, + "step": 4773 + }, + { + "epoch": 0.76, + "learning_rate": 4.8070614727164e-05, + "loss": 1.4808, + "step": 4774 + }, + { + "epoch": 0.76, + "learning_rate": 4.8069620775144996e-05, + "loss": 1.4528, + "step": 4775 + }, + { + "epoch": 0.76, + "learning_rate": 4.8068626577448544e-05, + "loss": 1.4871, + "step": 4776 + }, + { + "epoch": 0.76, + "learning_rate": 4.806763213408522e-05, + "loss": 1.5623, + "step": 4777 + }, + { + "epoch": 0.76, + "learning_rate": 4.806663744506562e-05, + "loss": 1.5645, + "step": 4778 + }, + { + "epoch": 0.76, + "learning_rate": 4.806564251040034e-05, + "loss": 1.5832, + "step": 4779 + }, + { + "epoch": 0.76, + "learning_rate": 4.8064647330099964e-05, + "loss": 1.4893, + "step": 4780 + }, + { + "epoch": 0.76, + "learning_rate": 4.80636519041751e-05, + "loss": 1.5648, + "step": 4781 + }, + { + "epoch": 0.76, + "learning_rate": 4.806265623263634e-05, + "loss": 1.4844, + "step": 4782 + }, + { + "epoch": 0.76, + "learning_rate": 4.80616603154943e-05, + "loss": 1.5503, + "step": 4783 + }, + { + "epoch": 0.76, + "learning_rate": 4.806066415275958e-05, + "loss": 1.4383, + "step": 4784 + }, + { + "epoch": 0.76, + "learning_rate": 4.805966774444278e-05, + "loss": 1.534, + "step": 4785 + }, + { + "epoch": 0.76, + "learning_rate": 4.805867109055452e-05, + "loss": 1.5225, + "step": 4786 + }, + { + "epoch": 0.76, + "learning_rate": 4.805767419110542e-05, + "loss": 1.5534, + "step": 4787 + }, + { + "epoch": 0.76, + "learning_rate": 4.805667704610608e-05, + "loss": 1.5313, + "step": 4788 + }, + { + "epoch": 0.76, + "learning_rate": 4.8055679655567123e-05, + "loss": 1.4334, + "step": 4789 + }, + { + "epoch": 0.76, + "learning_rate": 4.805468201949919e-05, + "loss": 1.584, + "step": 4790 + }, + { + "epoch": 0.76, + "learning_rate": 4.805368413791288e-05, + "loss": 1.5164, + "step": 4791 + }, + { + "epoch": 0.76, + "learning_rate": 4.805268601081883e-05, + "loss": 1.5459, + "step": 4792 + }, + { + "epoch": 0.76, + "learning_rate": 4.8051687638227674e-05, + "loss": 1.5116, + "step": 4793 + }, + { + "epoch": 0.76, + "learning_rate": 4.805068902015003e-05, + "loss": 1.4972, + "step": 4794 + }, + { + "epoch": 0.76, + "learning_rate": 4.804969015659655e-05, + "loss": 1.4536, + "step": 4795 + }, + { + "epoch": 0.76, + "learning_rate": 4.804869104757787e-05, + "loss": 1.5244, + "step": 4796 + }, + { + "epoch": 0.76, + "learning_rate": 4.804769169310462e-05, + "loss": 1.4878, + "step": 4797 + }, + { + "epoch": 0.76, + "learning_rate": 4.804669209318744e-05, + "loss": 1.507, + "step": 4798 + }, + { + "epoch": 0.76, + "learning_rate": 4.804569224783698e-05, + "loss": 1.5223, + "step": 4799 + }, + { + "epoch": 0.76, + "learning_rate": 4.80446921570639e-05, + "loss": 1.5452, + "step": 4800 + }, + { + "epoch": 0.76, + "learning_rate": 4.804369182087883e-05, + "loss": 1.5036, + "step": 4801 + }, + { + "epoch": 0.77, + "learning_rate": 4.8042691239292434e-05, + "loss": 1.5799, + "step": 4802 + }, + { + "epoch": 0.77, + "learning_rate": 4.804169041231537e-05, + "loss": 1.5153, + "step": 4803 + }, + { + "epoch": 0.77, + "learning_rate": 4.804068933995829e-05, + "loss": 1.4985, + "step": 4804 + }, + { + "epoch": 0.77, + "learning_rate": 4.803968802223186e-05, + "loss": 1.4769, + "step": 4805 + }, + { + "epoch": 0.77, + "learning_rate": 4.803868645914673e-05, + "loss": 1.5173, + "step": 4806 + }, + { + "epoch": 0.77, + "learning_rate": 4.8037684650713585e-05, + "loss": 1.4644, + "step": 4807 + }, + { + "epoch": 0.77, + "learning_rate": 4.8036682596943086e-05, + "loss": 1.5181, + "step": 4808 + }, + { + "epoch": 0.77, + "learning_rate": 4.803568029784589e-05, + "loss": 1.4489, + "step": 4809 + }, + { + "epoch": 0.77, + "learning_rate": 4.80346777534327e-05, + "loss": 1.556, + "step": 4810 + }, + { + "epoch": 0.77, + "learning_rate": 4.803367496371417e-05, + "loss": 1.4603, + "step": 4811 + }, + { + "epoch": 0.77, + "learning_rate": 4.803267192870098e-05, + "loss": 1.488, + "step": 4812 + }, + { + "epoch": 0.77, + "learning_rate": 4.8031668648403825e-05, + "loss": 1.5501, + "step": 4813 + }, + { + "epoch": 0.77, + "learning_rate": 4.803066512283338e-05, + "loss": 1.4956, + "step": 4814 + }, + { + "epoch": 0.77, + "learning_rate": 4.8029661352000335e-05, + "loss": 1.4997, + "step": 4815 + }, + { + "epoch": 0.77, + "learning_rate": 4.8028657335915376e-05, + "loss": 1.5129, + "step": 4816 + }, + { + "epoch": 0.77, + "learning_rate": 4.80276530745892e-05, + "loss": 1.5052, + "step": 4817 + }, + { + "epoch": 0.77, + "learning_rate": 4.8026648568032495e-05, + "loss": 1.4925, + "step": 4818 + }, + { + "epoch": 0.77, + "learning_rate": 4.802564381625597e-05, + "loss": 1.5369, + "step": 4819 + }, + { + "epoch": 0.77, + "learning_rate": 4.802463881927031e-05, + "loss": 1.4797, + "step": 4820 + }, + { + "epoch": 0.77, + "learning_rate": 4.802363357708623e-05, + "loss": 1.4909, + "step": 4821 + }, + { + "epoch": 0.77, + "learning_rate": 4.802262808971443e-05, + "loss": 1.4891, + "step": 4822 + }, + { + "epoch": 0.77, + "learning_rate": 4.802162235716562e-05, + "loss": 1.5915, + "step": 4823 + }, + { + "epoch": 0.77, + "learning_rate": 4.80206163794505e-05, + "loss": 1.5226, + "step": 4824 + }, + { + "epoch": 0.77, + "learning_rate": 4.80196101565798e-05, + "loss": 1.4746, + "step": 4825 + }, + { + "epoch": 0.77, + "learning_rate": 4.8018603688564226e-05, + "loss": 1.5573, + "step": 4826 + }, + { + "epoch": 0.77, + "learning_rate": 4.8017596975414494e-05, + "loss": 1.4569, + "step": 4827 + }, + { + "epoch": 0.77, + "learning_rate": 4.801659001714133e-05, + "loss": 1.5977, + "step": 4828 + }, + { + "epoch": 0.77, + "learning_rate": 4.8015582813755454e-05, + "loss": 1.492, + "step": 4829 + }, + { + "epoch": 0.77, + "learning_rate": 4.80145753652676e-05, + "loss": 1.3973, + "step": 4830 + }, + { + "epoch": 0.77, + "learning_rate": 4.801356767168849e-05, + "loss": 1.5646, + "step": 4831 + }, + { + "epoch": 0.77, + "learning_rate": 4.801255973302885e-05, + "loss": 1.4606, + "step": 4832 + }, + { + "epoch": 0.77, + "learning_rate": 4.801155154929943e-05, + "loss": 1.5233, + "step": 4833 + }, + { + "epoch": 0.77, + "learning_rate": 4.801054312051094e-05, + "loss": 1.5306, + "step": 4834 + }, + { + "epoch": 0.77, + "learning_rate": 4.800953444667415e-05, + "loss": 1.4823, + "step": 4835 + }, + { + "epoch": 0.77, + "learning_rate": 4.800852552779978e-05, + "loss": 1.5475, + "step": 4836 + }, + { + "epoch": 0.77, + "learning_rate": 4.800751636389859e-05, + "loss": 1.5156, + "step": 4837 + }, + { + "epoch": 0.77, + "learning_rate": 4.800650695498132e-05, + "loss": 1.5181, + "step": 4838 + }, + { + "epoch": 0.77, + "learning_rate": 4.800549730105871e-05, + "loss": 1.495, + "step": 4839 + }, + { + "epoch": 0.77, + "learning_rate": 4.8004487402141525e-05, + "loss": 1.4658, + "step": 4840 + }, + { + "epoch": 0.77, + "learning_rate": 4.800347725824052e-05, + "loss": 1.5054, + "step": 4841 + }, + { + "epoch": 0.77, + "learning_rate": 4.8002466869366435e-05, + "loss": 1.5295, + "step": 4842 + }, + { + "epoch": 0.77, + "learning_rate": 4.800145623553005e-05, + "loss": 1.4959, + "step": 4843 + }, + { + "epoch": 0.77, + "learning_rate": 4.800044535674213e-05, + "loss": 1.4639, + "step": 4844 + }, + { + "epoch": 0.77, + "learning_rate": 4.799943423301342e-05, + "loss": 1.5142, + "step": 4845 + }, + { + "epoch": 0.77, + "learning_rate": 4.7998422864354694e-05, + "loss": 1.4855, + "step": 4846 + }, + { + "epoch": 0.77, + "learning_rate": 4.7997411250776734e-05, + "loss": 1.5596, + "step": 4847 + }, + { + "epoch": 0.77, + "learning_rate": 4.799639939229031e-05, + "loss": 1.5166, + "step": 4848 + }, + { + "epoch": 0.77, + "learning_rate": 4.799538728890619e-05, + "loss": 1.4906, + "step": 4849 + }, + { + "epoch": 0.77, + "learning_rate": 4.7994374940635155e-05, + "loss": 1.502, + "step": 4850 + }, + { + "epoch": 0.77, + "learning_rate": 4.799336234748799e-05, + "loss": 1.4762, + "step": 4851 + }, + { + "epoch": 0.77, + "learning_rate": 4.7992349509475476e-05, + "loss": 1.4723, + "step": 4852 + }, + { + "epoch": 0.77, + "learning_rate": 4.799133642660839e-05, + "loss": 1.5588, + "step": 4853 + }, + { + "epoch": 0.77, + "learning_rate": 4.799032309889754e-05, + "loss": 1.4777, + "step": 4854 + }, + { + "epoch": 0.77, + "learning_rate": 4.7989309526353696e-05, + "loss": 1.4631, + "step": 4855 + }, + { + "epoch": 0.77, + "learning_rate": 4.7988295708987674e-05, + "loss": 1.4518, + "step": 4856 + }, + { + "epoch": 0.77, + "learning_rate": 4.798728164681026e-05, + "loss": 1.498, + "step": 4857 + }, + { + "epoch": 0.77, + "learning_rate": 4.798626733983225e-05, + "loss": 1.5143, + "step": 4858 + }, + { + "epoch": 0.77, + "learning_rate": 4.7985252788064436e-05, + "loss": 1.472, + "step": 4859 + }, + { + "epoch": 0.77, + "learning_rate": 4.798423799151765e-05, + "loss": 1.5318, + "step": 4860 + }, + { + "epoch": 0.77, + "learning_rate": 4.798322295020268e-05, + "loss": 1.4857, + "step": 4861 + }, + { + "epoch": 0.77, + "learning_rate": 4.798220766413033e-05, + "loss": 1.5392, + "step": 4862 + }, + { + "epoch": 0.77, + "learning_rate": 4.7981192133311424e-05, + "loss": 1.5438, + "step": 4863 + }, + { + "epoch": 0.77, + "learning_rate": 4.798017635775678e-05, + "loss": 1.547, + "step": 4864 + }, + { + "epoch": 0.78, + "learning_rate": 4.7979160337477205e-05, + "loss": 1.4821, + "step": 4865 + }, + { + "epoch": 0.78, + "learning_rate": 4.797814407248353e-05, + "loss": 1.5208, + "step": 4866 + }, + { + "epoch": 0.78, + "learning_rate": 4.7977127562786565e-05, + "loss": 1.5005, + "step": 4867 + }, + { + "epoch": 0.78, + "learning_rate": 4.797611080839714e-05, + "loss": 1.5067, + "step": 4868 + }, + { + "epoch": 0.78, + "learning_rate": 4.797509380932609e-05, + "loss": 1.5404, + "step": 4869 + }, + { + "epoch": 0.78, + "learning_rate": 4.7974076565584234e-05, + "loss": 1.5161, + "step": 4870 + }, + { + "epoch": 0.78, + "learning_rate": 4.797305907718241e-05, + "loss": 1.4795, + "step": 4871 + }, + { + "epoch": 0.78, + "learning_rate": 4.797204134413146e-05, + "loss": 1.5104, + "step": 4872 + }, + { + "epoch": 0.78, + "learning_rate": 4.7971023366442216e-05, + "loss": 1.4668, + "step": 4873 + }, + { + "epoch": 0.78, + "learning_rate": 4.797000514412552e-05, + "loss": 1.4683, + "step": 4874 + }, + { + "epoch": 0.78, + "learning_rate": 4.7968986677192205e-05, + "loss": 1.5111, + "step": 4875 + }, + { + "epoch": 0.78, + "learning_rate": 4.796796796565313e-05, + "loss": 1.5511, + "step": 4876 + }, + { + "epoch": 0.78, + "learning_rate": 4.796694900951914e-05, + "loss": 1.4935, + "step": 4877 + }, + { + "epoch": 0.78, + "learning_rate": 4.79659298088011e-05, + "loss": 1.4456, + "step": 4878 + }, + { + "epoch": 0.78, + "learning_rate": 4.7964910363509833e-05, + "loss": 1.5905, + "step": 4879 + }, + { + "epoch": 0.78, + "learning_rate": 4.796389067365622e-05, + "loss": 1.513, + "step": 4880 + }, + { + "epoch": 0.78, + "learning_rate": 4.7962870739251114e-05, + "loss": 1.4108, + "step": 4881 + }, + { + "epoch": 0.78, + "learning_rate": 4.796185056030538e-05, + "loss": 1.5177, + "step": 4882 + }, + { + "epoch": 0.78, + "learning_rate": 4.796083013682987e-05, + "loss": 1.4622, + "step": 4883 + }, + { + "epoch": 0.78, + "learning_rate": 4.795980946883546e-05, + "loss": 1.4455, + "step": 4884 + }, + { + "epoch": 0.78, + "learning_rate": 4.795878855633302e-05, + "loss": 1.5841, + "step": 4885 + }, + { + "epoch": 0.78, + "learning_rate": 4.795776739933342e-05, + "loss": 1.5334, + "step": 4886 + }, + { + "epoch": 0.78, + "learning_rate": 4.795674599784754e-05, + "loss": 1.4785, + "step": 4887 + }, + { + "epoch": 0.78, + "learning_rate": 4.795572435188624e-05, + "loss": 1.4878, + "step": 4888 + }, + { + "epoch": 0.78, + "learning_rate": 4.795470246146043e-05, + "loss": 1.5013, + "step": 4889 + }, + { + "epoch": 0.78, + "learning_rate": 4.7953680326580966e-05, + "loss": 1.5077, + "step": 4890 + }, + { + "epoch": 0.78, + "learning_rate": 4.795265794725874e-05, + "loss": 1.4886, + "step": 4891 + }, + { + "epoch": 0.78, + "learning_rate": 4.795163532350464e-05, + "loss": 1.481, + "step": 4892 + }, + { + "epoch": 0.78, + "learning_rate": 4.795061245532956e-05, + "loss": 1.4551, + "step": 4893 + }, + { + "epoch": 0.78, + "learning_rate": 4.79495893427444e-05, + "loss": 1.4784, + "step": 4894 + }, + { + "epoch": 0.78, + "learning_rate": 4.794856598576003e-05, + "loss": 1.4696, + "step": 4895 + }, + { + "epoch": 0.78, + "learning_rate": 4.7947542384387376e-05, + "loss": 1.4797, + "step": 4896 + }, + { + "epoch": 0.78, + "learning_rate": 4.7946518538637326e-05, + "loss": 1.5384, + "step": 4897 + }, + { + "epoch": 0.78, + "learning_rate": 4.7945494448520785e-05, + "loss": 1.4626, + "step": 4898 + }, + { + "epoch": 0.78, + "learning_rate": 4.7944470114048656e-05, + "loss": 1.5483, + "step": 4899 + }, + { + "epoch": 0.78, + "learning_rate": 4.7943445535231855e-05, + "loss": 1.4896, + "step": 4900 + }, + { + "epoch": 0.78, + "learning_rate": 4.7942420712081285e-05, + "loss": 1.4938, + "step": 4901 + }, + { + "epoch": 0.78, + "learning_rate": 4.794139564460786e-05, + "loss": 1.4886, + "step": 4902 + }, + { + "epoch": 0.78, + "learning_rate": 4.7940370332822505e-05, + "loss": 1.4831, + "step": 4903 + }, + { + "epoch": 0.78, + "learning_rate": 4.793934477673613e-05, + "loss": 1.4571, + "step": 4904 + }, + { + "epoch": 0.78, + "learning_rate": 4.7938318976359656e-05, + "loss": 1.5145, + "step": 4905 + }, + { + "epoch": 0.78, + "learning_rate": 4.793729293170402e-05, + "loss": 1.4375, + "step": 4906 + }, + { + "epoch": 0.78, + "learning_rate": 4.793626664278014e-05, + "loss": 1.5283, + "step": 4907 + }, + { + "epoch": 0.78, + "learning_rate": 4.793524010959893e-05, + "loss": 1.5402, + "step": 4908 + }, + { + "epoch": 0.78, + "learning_rate": 4.793421333217135e-05, + "loss": 1.4465, + "step": 4909 + }, + { + "epoch": 0.78, + "learning_rate": 4.7933186310508325e-05, + "loss": 1.534, + "step": 4910 + }, + { + "epoch": 0.78, + "learning_rate": 4.7932159044620784e-05, + "loss": 1.5532, + "step": 4911 + }, + { + "epoch": 0.78, + "learning_rate": 4.793113153451967e-05, + "loss": 1.5402, + "step": 4912 + }, + { + "epoch": 0.78, + "learning_rate": 4.793010378021593e-05, + "loss": 1.5413, + "step": 4913 + }, + { + "epoch": 0.78, + "learning_rate": 4.79290757817205e-05, + "loss": 1.5062, + "step": 4914 + }, + { + "epoch": 0.78, + "learning_rate": 4.792804753904434e-05, + "loss": 1.5436, + "step": 4915 + }, + { + "epoch": 0.78, + "learning_rate": 4.7927019052198394e-05, + "loss": 1.5042, + "step": 4916 + }, + { + "epoch": 0.78, + "learning_rate": 4.7925990321193614e-05, + "loss": 1.5518, + "step": 4917 + }, + { + "epoch": 0.78, + "learning_rate": 4.792496134604095e-05, + "loss": 1.5643, + "step": 4918 + }, + { + "epoch": 0.78, + "learning_rate": 4.792393212675137e-05, + "loss": 1.5138, + "step": 4919 + }, + { + "epoch": 0.78, + "learning_rate": 4.7922902663335835e-05, + "loss": 1.5068, + "step": 4920 + }, + { + "epoch": 0.78, + "learning_rate": 4.792187295580529e-05, + "loss": 1.4666, + "step": 4921 + }, + { + "epoch": 0.78, + "learning_rate": 4.7920843004170724e-05, + "loss": 1.5072, + "step": 4922 + }, + { + "epoch": 0.78, + "learning_rate": 4.7919812808443096e-05, + "loss": 1.4811, + "step": 4923 + }, + { + "epoch": 0.78, + "learning_rate": 4.791878236863338e-05, + "loss": 1.5697, + "step": 4924 + }, + { + "epoch": 0.78, + "learning_rate": 4.791775168475253e-05, + "loss": 1.5332, + "step": 4925 + }, + { + "epoch": 0.78, + "learning_rate": 4.791672075681155e-05, + "loss": 1.5516, + "step": 4926 + }, + { + "epoch": 0.78, + "learning_rate": 4.791568958482141e-05, + "loss": 1.4998, + "step": 4927 + }, + { + "epoch": 0.79, + "learning_rate": 4.791465816879308e-05, + "loss": 1.4992, + "step": 4928 + }, + { + "epoch": 0.79, + "learning_rate": 4.791362650873756e-05, + "loss": 1.4525, + "step": 4929 + }, + { + "epoch": 0.79, + "learning_rate": 4.791259460466583e-05, + "loss": 1.542, + "step": 4930 + }, + { + "epoch": 0.79, + "learning_rate": 4.791156245658887e-05, + "loss": 1.6008, + "step": 4931 + }, + { + "epoch": 0.79, + "learning_rate": 4.791053006451768e-05, + "loss": 1.5202, + "step": 4932 + }, + { + "epoch": 0.79, + "learning_rate": 4.790949742846326e-05, + "loss": 1.5158, + "step": 4933 + }, + { + "epoch": 0.79, + "learning_rate": 4.79084645484366e-05, + "loss": 1.5438, + "step": 4934 + }, + { + "epoch": 0.79, + "learning_rate": 4.7907431424448704e-05, + "loss": 1.5085, + "step": 4935 + }, + { + "epoch": 0.79, + "learning_rate": 4.790639805651056e-05, + "loss": 1.519, + "step": 4936 + }, + { + "epoch": 0.79, + "learning_rate": 4.790536444463319e-05, + "loss": 1.4989, + "step": 4937 + }, + { + "epoch": 0.79, + "learning_rate": 4.7904330588827595e-05, + "loss": 1.5146, + "step": 4938 + }, + { + "epoch": 0.79, + "learning_rate": 4.7903296489104784e-05, + "loss": 1.5581, + "step": 4939 + }, + { + "epoch": 0.79, + "learning_rate": 4.7902262145475774e-05, + "loss": 1.5666, + "step": 4940 + }, + { + "epoch": 0.79, + "learning_rate": 4.790122755795157e-05, + "loss": 1.4735, + "step": 4941 + }, + { + "epoch": 0.79, + "learning_rate": 4.79001927265432e-05, + "loss": 1.5288, + "step": 4942 + }, + { + "epoch": 0.79, + "learning_rate": 4.789915765126168e-05, + "loss": 1.55, + "step": 4943 + }, + { + "epoch": 0.79, + "learning_rate": 4.7898122332118036e-05, + "loss": 1.4881, + "step": 4944 + }, + { + "epoch": 0.79, + "learning_rate": 4.7897086769123285e-05, + "loss": 1.4841, + "step": 4945 + }, + { + "epoch": 0.79, + "learning_rate": 4.789605096228846e-05, + "loss": 1.4697, + "step": 4946 + }, + { + "epoch": 0.79, + "learning_rate": 4.78950149116246e-05, + "loss": 1.4704, + "step": 4947 + }, + { + "epoch": 0.79, + "learning_rate": 4.789397861714273e-05, + "loss": 1.4933, + "step": 4948 + }, + { + "epoch": 0.79, + "learning_rate": 4.789294207885388e-05, + "loss": 1.5174, + "step": 4949 + }, + { + "epoch": 0.79, + "learning_rate": 4.78919052967691e-05, + "loss": 1.4941, + "step": 4950 + }, + { + "epoch": 0.79, + "learning_rate": 4.7890868270899424e-05, + "loss": 1.5911, + "step": 4951 + }, + { + "epoch": 0.79, + "learning_rate": 4.78898310012559e-05, + "loss": 1.4964, + "step": 4952 + }, + { + "epoch": 0.79, + "learning_rate": 4.7888793487849584e-05, + "loss": 1.4736, + "step": 4953 + }, + { + "epoch": 0.79, + "learning_rate": 4.78877557306915e-05, + "loss": 1.5252, + "step": 4954 + }, + { + "epoch": 0.79, + "learning_rate": 4.788671772979272e-05, + "loss": 1.4787, + "step": 4955 + }, + { + "epoch": 0.79, + "learning_rate": 4.788567948516428e-05, + "loss": 1.4535, + "step": 4956 + }, + { + "epoch": 0.79, + "learning_rate": 4.788464099681726e-05, + "loss": 1.5163, + "step": 4957 + }, + { + "epoch": 0.79, + "learning_rate": 4.78836022647627e-05, + "loss": 1.436, + "step": 4958 + }, + { + "epoch": 0.79, + "learning_rate": 4.7882563289011674e-05, + "loss": 1.4372, + "step": 4959 + }, + { + "epoch": 0.79, + "learning_rate": 4.788152406957524e-05, + "loss": 1.4753, + "step": 4960 + }, + { + "epoch": 0.79, + "learning_rate": 4.7880484606464456e-05, + "loss": 1.5031, + "step": 4961 + }, + { + "epoch": 0.79, + "learning_rate": 4.787944489969042e-05, + "loss": 1.4887, + "step": 4962 + }, + { + "epoch": 0.79, + "learning_rate": 4.787840494926417e-05, + "loss": 1.5423, + "step": 4963 + }, + { + "epoch": 0.79, + "learning_rate": 4.787736475519681e-05, + "loss": 1.5806, + "step": 4964 + }, + { + "epoch": 0.79, + "learning_rate": 4.787632431749939e-05, + "loss": 1.5109, + "step": 4965 + }, + { + "epoch": 0.79, + "learning_rate": 4.787528363618301e-05, + "loss": 1.48, + "step": 4966 + }, + { + "epoch": 0.79, + "learning_rate": 4.7874242711258754e-05, + "loss": 1.5444, + "step": 4967 + }, + { + "epoch": 0.79, + "learning_rate": 4.78732015427377e-05, + "loss": 1.5112, + "step": 4968 + }, + { + "epoch": 0.79, + "learning_rate": 4.7872160130630935e-05, + "loss": 1.5617, + "step": 4969 + }, + { + "epoch": 0.79, + "learning_rate": 4.787111847494955e-05, + "loss": 1.514, + "step": 4970 + }, + { + "epoch": 0.79, + "learning_rate": 4.7870076575704634e-05, + "loss": 1.443, + "step": 4971 + }, + { + "epoch": 0.79, + "learning_rate": 4.786903443290729e-05, + "loss": 1.5111, + "step": 4972 + }, + { + "epoch": 0.79, + "learning_rate": 4.786799204656862e-05, + "loss": 1.5241, + "step": 4973 + }, + { + "epoch": 0.79, + "learning_rate": 4.786694941669971e-05, + "loss": 1.4948, + "step": 4974 + }, + { + "epoch": 0.79, + "learning_rate": 4.786590654331167e-05, + "loss": 1.5308, + "step": 4975 + }, + { + "epoch": 0.79, + "learning_rate": 4.786486342641561e-05, + "loss": 1.473, + "step": 4976 + }, + { + "epoch": 0.79, + "learning_rate": 4.7863820066022636e-05, + "loss": 1.4886, + "step": 4977 + }, + { + "epoch": 0.79, + "learning_rate": 4.7862776462143866e-05, + "loss": 1.5887, + "step": 4978 + }, + { + "epoch": 0.79, + "learning_rate": 4.78617326147904e-05, + "loss": 1.5226, + "step": 4979 + }, + { + "epoch": 0.79, + "learning_rate": 4.786068852397336e-05, + "loss": 1.4932, + "step": 4980 + }, + { + "epoch": 0.79, + "learning_rate": 4.785964418970387e-05, + "loss": 1.5135, + "step": 4981 + }, + { + "epoch": 0.79, + "learning_rate": 4.785859961199305e-05, + "loss": 1.4292, + "step": 4982 + }, + { + "epoch": 0.79, + "learning_rate": 4.7857554790852025e-05, + "loss": 1.5055, + "step": 4983 + }, + { + "epoch": 0.79, + "learning_rate": 4.785650972629191e-05, + "loss": 1.5382, + "step": 4984 + }, + { + "epoch": 0.79, + "learning_rate": 4.7855464418323834e-05, + "loss": 1.5199, + "step": 4985 + }, + { + "epoch": 0.79, + "learning_rate": 4.785441886695895e-05, + "loss": 1.5664, + "step": 4986 + }, + { + "epoch": 0.79, + "learning_rate": 4.7853373072208384e-05, + "loss": 1.5535, + "step": 4987 + }, + { + "epoch": 0.79, + "learning_rate": 4.785232703408327e-05, + "loss": 1.5817, + "step": 4988 + }, + { + "epoch": 0.79, + "learning_rate": 4.785128075259474e-05, + "loss": 1.4959, + "step": 4989 + }, + { + "epoch": 0.79, + "learning_rate": 4.7850234227753943e-05, + "loss": 1.5065, + "step": 4990 + }, + { + "epoch": 0.8, + "learning_rate": 4.784918745957203e-05, + "loss": 1.508, + "step": 4991 + }, + { + "epoch": 0.8, + "learning_rate": 4.784814044806014e-05, + "loss": 1.5311, + "step": 4992 + }, + { + "epoch": 0.8, + "learning_rate": 4.784709319322942e-05, + "loss": 1.5182, + "step": 4993 + }, + { + "epoch": 0.8, + "learning_rate": 4.784604569509104e-05, + "loss": 1.5579, + "step": 4994 + }, + { + "epoch": 0.8, + "learning_rate": 4.784499795365613e-05, + "loss": 1.4562, + "step": 4995 + }, + { + "epoch": 0.8, + "learning_rate": 4.784394996893587e-05, + "loss": 1.5008, + "step": 4996 + }, + { + "epoch": 0.8, + "learning_rate": 4.784290174094141e-05, + "loss": 1.4797, + "step": 4997 + }, + { + "epoch": 0.8, + "learning_rate": 4.784185326968391e-05, + "loss": 1.4551, + "step": 4998 + }, + { + "epoch": 0.8, + "learning_rate": 4.7840804555174554e-05, + "loss": 1.4889, + "step": 4999 + }, + { + "epoch": 0.8, + "learning_rate": 4.783975559742449e-05, + "loss": 1.5228, + "step": 5000 + }, + { + "epoch": 0.8, + "learning_rate": 4.783870639644489e-05, + "loss": 1.5005, + "step": 5001 + }, + { + "epoch": 0.8, + "learning_rate": 4.783765695224693e-05, + "loss": 1.5949, + "step": 5002 + }, + { + "epoch": 0.8, + "learning_rate": 4.783660726484179e-05, + "loss": 1.4723, + "step": 5003 + }, + { + "epoch": 0.8, + "learning_rate": 4.7835557334240653e-05, + "loss": 1.5461, + "step": 5004 + }, + { + "epoch": 0.8, + "learning_rate": 4.7834507160454695e-05, + "loss": 1.4691, + "step": 5005 + }, + { + "epoch": 0.8, + "learning_rate": 4.78334567434951e-05, + "loss": 1.4517, + "step": 5006 + }, + { + "epoch": 0.8, + "learning_rate": 4.783240608337304e-05, + "loss": 1.4873, + "step": 5007 + }, + { + "epoch": 0.8, + "learning_rate": 4.783135518009973e-05, + "loss": 1.4956, + "step": 5008 + }, + { + "epoch": 0.8, + "learning_rate": 4.7830304033686344e-05, + "loss": 1.4994, + "step": 5009 + }, + { + "epoch": 0.8, + "learning_rate": 4.782925264414408e-05, + "loss": 1.4487, + "step": 5010 + }, + { + "epoch": 0.8, + "learning_rate": 4.782820101148414e-05, + "loss": 1.4815, + "step": 5011 + }, + { + "epoch": 0.8, + "learning_rate": 4.782714913571771e-05, + "loss": 1.5013, + "step": 5012 + }, + { + "epoch": 0.8, + "learning_rate": 4.782609701685601e-05, + "loss": 1.5177, + "step": 5013 + }, + { + "epoch": 0.8, + "learning_rate": 4.782504465491023e-05, + "loss": 1.5356, + "step": 5014 + }, + { + "epoch": 0.8, + "learning_rate": 4.782399204989159e-05, + "loss": 1.4816, + "step": 5015 + }, + { + "epoch": 0.8, + "learning_rate": 4.782293920181128e-05, + "loss": 1.4981, + "step": 5016 + }, + { + "epoch": 0.8, + "learning_rate": 4.782188611068053e-05, + "loss": 1.5454, + "step": 5017 + }, + { + "epoch": 0.8, + "learning_rate": 4.782083277651054e-05, + "loss": 1.5026, + "step": 5018 + }, + { + "epoch": 0.8, + "learning_rate": 4.781977919931254e-05, + "loss": 1.4321, + "step": 5019 + }, + { + "epoch": 0.8, + "learning_rate": 4.781872537909775e-05, + "loss": 1.5194, + "step": 5020 + }, + { + "epoch": 0.8, + "learning_rate": 4.7817671315877385e-05, + "loss": 1.4719, + "step": 5021 + }, + { + "epoch": 0.8, + "learning_rate": 4.781661700966267e-05, + "loss": 1.5181, + "step": 5022 + }, + { + "epoch": 0.8, + "learning_rate": 4.781556246046485e-05, + "loss": 1.4282, + "step": 5023 + }, + { + "epoch": 0.8, + "learning_rate": 4.7814507668295125e-05, + "loss": 1.4452, + "step": 5024 + }, + { + "epoch": 0.8, + "learning_rate": 4.781345263316476e-05, + "loss": 1.4864, + "step": 5025 + }, + { + "epoch": 0.8, + "learning_rate": 4.781239735508496e-05, + "loss": 1.4972, + "step": 5026 + }, + { + "epoch": 0.8, + "learning_rate": 4.781134183406698e-05, + "loss": 1.4961, + "step": 5027 + }, + { + "epoch": 0.8, + "learning_rate": 4.781028607012207e-05, + "loss": 1.478, + "step": 5028 + }, + { + "epoch": 0.8, + "learning_rate": 4.7809230063261454e-05, + "loss": 1.57, + "step": 5029 + }, + { + "epoch": 0.8, + "learning_rate": 4.780817381349638e-05, + "loss": 1.4805, + "step": 5030 + }, + { + "epoch": 0.8, + "learning_rate": 4.7807117320838115e-05, + "loss": 1.5552, + "step": 5031 + }, + { + "epoch": 0.8, + "learning_rate": 4.780606058529788e-05, + "loss": 1.5353, + "step": 5032 + }, + { + "epoch": 0.8, + "learning_rate": 4.780500360688696e-05, + "loss": 1.4904, + "step": 5033 + }, + { + "epoch": 0.8, + "learning_rate": 4.780394638561659e-05, + "loss": 1.4749, + "step": 5034 + }, + { + "epoch": 0.8, + "learning_rate": 4.780288892149804e-05, + "loss": 1.5488, + "step": 5035 + }, + { + "epoch": 0.8, + "learning_rate": 4.780183121454257e-05, + "loss": 1.5055, + "step": 5036 + }, + { + "epoch": 0.8, + "learning_rate": 4.7800773264761436e-05, + "loss": 1.576, + "step": 5037 + }, + { + "epoch": 0.8, + "learning_rate": 4.779971507216591e-05, + "loss": 1.4333, + "step": 5038 + }, + { + "epoch": 0.8, + "learning_rate": 4.779865663676726e-05, + "loss": 1.5002, + "step": 5039 + }, + { + "epoch": 0.8, + "learning_rate": 4.7797597958576754e-05, + "loss": 1.5125, + "step": 5040 + }, + { + "epoch": 0.8, + "learning_rate": 4.7796539037605677e-05, + "loss": 1.4788, + "step": 5041 + }, + { + "epoch": 0.8, + "learning_rate": 4.7795479873865293e-05, + "loss": 1.4924, + "step": 5042 + }, + { + "epoch": 0.8, + "learning_rate": 4.7794420467366894e-05, + "loss": 1.6165, + "step": 5043 + }, + { + "epoch": 0.8, + "learning_rate": 4.779336081812176e-05, + "loss": 1.4671, + "step": 5044 + }, + { + "epoch": 0.8, + "learning_rate": 4.779230092614116e-05, + "loss": 1.5002, + "step": 5045 + }, + { + "epoch": 0.8, + "learning_rate": 4.7791240791436395e-05, + "loss": 1.4715, + "step": 5046 + }, + { + "epoch": 0.8, + "learning_rate": 4.779018041401876e-05, + "loss": 1.4746, + "step": 5047 + }, + { + "epoch": 0.8, + "learning_rate": 4.778911979389953e-05, + "loss": 1.4294, + "step": 5048 + }, + { + "epoch": 0.8, + "learning_rate": 4.778805893109002e-05, + "loss": 1.528, + "step": 5049 + }, + { + "epoch": 0.8, + "learning_rate": 4.778699782560151e-05, + "loss": 1.5763, + "step": 5050 + }, + { + "epoch": 0.8, + "learning_rate": 4.778593647744531e-05, + "loss": 1.5815, + "step": 5051 + }, + { + "epoch": 0.8, + "learning_rate": 4.778487488663272e-05, + "loss": 1.4504, + "step": 5052 + }, + { + "epoch": 0.81, + "learning_rate": 4.7783813053175045e-05, + "loss": 1.4819, + "step": 5053 + }, + { + "epoch": 0.81, + "learning_rate": 4.778275097708359e-05, + "loss": 1.4409, + "step": 5054 + }, + { + "epoch": 0.81, + "learning_rate": 4.778168865836967e-05, + "loss": 1.4377, + "step": 5055 + }, + { + "epoch": 0.81, + "learning_rate": 4.7780626097044604e-05, + "loss": 1.4626, + "step": 5056 + }, + { + "epoch": 0.81, + "learning_rate": 4.77795632931197e-05, + "loss": 1.5223, + "step": 5057 + }, + { + "epoch": 0.81, + "learning_rate": 4.7778500246606273e-05, + "loss": 1.4994, + "step": 5058 + }, + { + "epoch": 0.81, + "learning_rate": 4.777743695751564e-05, + "loss": 1.5099, + "step": 5059 + }, + { + "epoch": 0.81, + "learning_rate": 4.777637342585914e-05, + "loss": 1.5213, + "step": 5060 + }, + { + "epoch": 0.81, + "learning_rate": 4.7775309651648096e-05, + "loss": 1.4751, + "step": 5061 + }, + { + "epoch": 0.81, + "learning_rate": 4.777424563489382e-05, + "loss": 1.5226, + "step": 5062 + }, + { + "epoch": 0.81, + "learning_rate": 4.777318137560767e-05, + "loss": 1.5503, + "step": 5063 + }, + { + "epoch": 0.81, + "learning_rate": 4.777211687380095e-05, + "loss": 1.5392, + "step": 5064 + }, + { + "epoch": 0.81, + "learning_rate": 4.7771052129485015e-05, + "loss": 1.4769, + "step": 5065 + }, + { + "epoch": 0.81, + "learning_rate": 4.7769987142671203e-05, + "loss": 1.5295, + "step": 5066 + }, + { + "epoch": 0.81, + "learning_rate": 4.776892191337085e-05, + "loss": 1.5311, + "step": 5067 + }, + { + "epoch": 0.81, + "learning_rate": 4.7767856441595294e-05, + "loss": 1.508, + "step": 5068 + }, + { + "epoch": 0.81, + "learning_rate": 4.77667907273559e-05, + "loss": 1.52, + "step": 5069 + }, + { + "epoch": 0.81, + "learning_rate": 4.7765724770664e-05, + "loss": 1.5967, + "step": 5070 + }, + { + "epoch": 0.81, + "learning_rate": 4.776465857153095e-05, + "loss": 1.5579, + "step": 5071 + }, + { + "epoch": 0.81, + "learning_rate": 4.7763592129968115e-05, + "loss": 1.5514, + "step": 5072 + }, + { + "epoch": 0.81, + "learning_rate": 4.7762525445986845e-05, + "loss": 1.529, + "step": 5073 + }, + { + "epoch": 0.81, + "learning_rate": 4.776145851959849e-05, + "loss": 1.4764, + "step": 5074 + }, + { + "epoch": 0.81, + "learning_rate": 4.776039135081443e-05, + "loss": 1.5225, + "step": 5075 + }, + { + "epoch": 0.81, + "learning_rate": 4.775932393964602e-05, + "loss": 1.4596, + "step": 5076 + }, + { + "epoch": 0.81, + "learning_rate": 4.775825628610462e-05, + "loss": 1.4342, + "step": 5077 + }, + { + "epoch": 0.81, + "learning_rate": 4.7757188390201615e-05, + "loss": 1.536, + "step": 5078 + }, + { + "epoch": 0.81, + "learning_rate": 4.7756120251948354e-05, + "loss": 1.4461, + "step": 5079 + }, + { + "epoch": 0.81, + "learning_rate": 4.775505187135624e-05, + "loss": 1.5285, + "step": 5080 + }, + { + "epoch": 0.81, + "learning_rate": 4.775398324843664e-05, + "loss": 1.4946, + "step": 5081 + }, + { + "epoch": 0.81, + "learning_rate": 4.775291438320093e-05, + "loss": 1.5461, + "step": 5082 + }, + { + "epoch": 0.81, + "learning_rate": 4.77518452756605e-05, + "loss": 1.5506, + "step": 5083 + }, + { + "epoch": 0.81, + "learning_rate": 4.775077592582673e-05, + "loss": 1.5653, + "step": 5084 + }, + { + "epoch": 0.81, + "learning_rate": 4.7749706333710996e-05, + "loss": 1.592, + "step": 5085 + }, + { + "epoch": 0.81, + "learning_rate": 4.774863649932472e-05, + "loss": 1.5532, + "step": 5086 + }, + { + "epoch": 0.81, + "learning_rate": 4.774756642267926e-05, + "loss": 1.5023, + "step": 5087 + }, + { + "epoch": 0.81, + "learning_rate": 4.7746496103786036e-05, + "loss": 1.5636, + "step": 5088 + }, + { + "epoch": 0.81, + "learning_rate": 4.774542554265644e-05, + "loss": 1.4985, + "step": 5089 + }, + { + "epoch": 0.81, + "learning_rate": 4.7744354739301866e-05, + "loss": 1.5277, + "step": 5090 + }, + { + "epoch": 0.81, + "learning_rate": 4.774328369373373e-05, + "loss": 1.5124, + "step": 5091 + }, + { + "epoch": 0.81, + "learning_rate": 4.774221240596342e-05, + "loss": 1.4878, + "step": 5092 + }, + { + "epoch": 0.81, + "learning_rate": 4.774114087600237e-05, + "loss": 1.4942, + "step": 5093 + }, + { + "epoch": 0.81, + "learning_rate": 4.774006910386196e-05, + "loss": 1.5143, + "step": 5094 + }, + { + "epoch": 0.81, + "learning_rate": 4.7738997089553626e-05, + "loss": 1.467, + "step": 5095 + }, + { + "epoch": 0.81, + "learning_rate": 4.7737924833088774e-05, + "loss": 1.5138, + "step": 5096 + }, + { + "epoch": 0.81, + "learning_rate": 4.773685233447883e-05, + "loss": 1.5127, + "step": 5097 + }, + { + "epoch": 0.81, + "learning_rate": 4.773577959373522e-05, + "loss": 1.4801, + "step": 5098 + }, + { + "epoch": 0.81, + "learning_rate": 4.773470661086935e-05, + "loss": 1.5234, + "step": 5099 + }, + { + "epoch": 0.81, + "learning_rate": 4.773363338589266e-05, + "loss": 1.4927, + "step": 5100 + }, + { + "epoch": 0.81, + "learning_rate": 4.773255991881658e-05, + "loss": 1.4774, + "step": 5101 + }, + { + "epoch": 0.81, + "learning_rate": 4.7731486209652544e-05, + "loss": 1.4733, + "step": 5102 + }, + { + "epoch": 0.81, + "learning_rate": 4.773041225841197e-05, + "loss": 1.4696, + "step": 5103 + }, + { + "epoch": 0.81, + "learning_rate": 4.772933806510631e-05, + "loss": 1.5729, + "step": 5104 + }, + { + "epoch": 0.81, + "learning_rate": 4.7728263629746994e-05, + "loss": 1.501, + "step": 5105 + }, + { + "epoch": 0.81, + "learning_rate": 4.772718895234547e-05, + "loss": 1.5363, + "step": 5106 + }, + { + "epoch": 0.81, + "learning_rate": 4.772611403291318e-05, + "loss": 1.5382, + "step": 5107 + }, + { + "epoch": 0.81, + "learning_rate": 4.772503887146158e-05, + "loss": 1.4406, + "step": 5108 + }, + { + "epoch": 0.81, + "learning_rate": 4.7723963468002107e-05, + "loss": 1.5708, + "step": 5109 + }, + { + "epoch": 0.81, + "learning_rate": 4.772288782254622e-05, + "loss": 1.5158, + "step": 5110 + }, + { + "epoch": 0.81, + "learning_rate": 4.7721811935105384e-05, + "loss": 1.4658, + "step": 5111 + }, + { + "epoch": 0.81, + "learning_rate": 4.772073580569103e-05, + "loss": 1.5252, + "step": 5112 + }, + { + "epoch": 0.81, + "learning_rate": 4.7719659434314644e-05, + "loss": 1.471, + "step": 5113 + }, + { + "epoch": 0.81, + "learning_rate": 4.771858282098768e-05, + "loss": 1.548, + "step": 5114 + }, + { + "epoch": 0.81, + "learning_rate": 4.771750596572159e-05, + "loss": 1.528, + "step": 5115 + }, + { + "epoch": 0.82, + "learning_rate": 4.7716428868527865e-05, + "loss": 1.5184, + "step": 5116 + }, + { + "epoch": 0.82, + "learning_rate": 4.7715351529417954e-05, + "loss": 1.4972, + "step": 5117 + }, + { + "epoch": 0.82, + "learning_rate": 4.771427394840334e-05, + "loss": 1.4893, + "step": 5118 + }, + { + "epoch": 0.82, + "learning_rate": 4.7713196125495505e-05, + "loss": 1.4727, + "step": 5119 + }, + { + "epoch": 0.82, + "learning_rate": 4.7712118060705926e-05, + "loss": 1.487, + "step": 5120 + }, + { + "epoch": 0.82, + "learning_rate": 4.7711039754046075e-05, + "loss": 1.4707, + "step": 5121 + }, + { + "epoch": 0.82, + "learning_rate": 4.770996120552743e-05, + "loss": 1.502, + "step": 5122 + }, + { + "epoch": 0.82, + "learning_rate": 4.7708882415161494e-05, + "loss": 1.5479, + "step": 5123 + }, + { + "epoch": 0.82, + "learning_rate": 4.770780338295974e-05, + "loss": 1.5423, + "step": 5124 + }, + { + "epoch": 0.82, + "learning_rate": 4.7706724108933676e-05, + "loss": 1.4526, + "step": 5125 + }, + { + "epoch": 0.82, + "learning_rate": 4.770564459309478e-05, + "loss": 1.4907, + "step": 5126 + }, + { + "epoch": 0.82, + "learning_rate": 4.7704564835454555e-05, + "loss": 1.4824, + "step": 5127 + }, + { + "epoch": 0.82, + "learning_rate": 4.770348483602449e-05, + "loss": 1.4852, + "step": 5128 + }, + { + "epoch": 0.82, + "learning_rate": 4.770240459481611e-05, + "loss": 1.4832, + "step": 5129 + }, + { + "epoch": 0.82, + "learning_rate": 4.77013241118409e-05, + "loss": 1.4246, + "step": 5130 + }, + { + "epoch": 0.82, + "learning_rate": 4.7700243387110366e-05, + "loss": 1.4844, + "step": 5131 + }, + { + "epoch": 0.82, + "learning_rate": 4.769916242063602e-05, + "loss": 1.4779, + "step": 5132 + }, + { + "epoch": 0.82, + "learning_rate": 4.769808121242938e-05, + "loss": 1.4601, + "step": 5133 + }, + { + "epoch": 0.82, + "learning_rate": 4.769699976250195e-05, + "loss": 1.4821, + "step": 5134 + }, + { + "epoch": 0.82, + "learning_rate": 4.7695918070865264e-05, + "loss": 1.5329, + "step": 5135 + }, + { + "epoch": 0.82, + "learning_rate": 4.7694836137530816e-05, + "loss": 1.4997, + "step": 5136 + }, + { + "epoch": 0.82, + "learning_rate": 4.769375396251015e-05, + "loss": 1.4634, + "step": 5137 + }, + { + "epoch": 0.82, + "learning_rate": 4.769267154581478e-05, + "loss": 1.4929, + "step": 5138 + }, + { + "epoch": 0.82, + "learning_rate": 4.7691588887456236e-05, + "loss": 1.4797, + "step": 5139 + }, + { + "epoch": 0.82, + "learning_rate": 4.769050598744605e-05, + "loss": 1.4831, + "step": 5140 + }, + { + "epoch": 0.82, + "learning_rate": 4.768942284579574e-05, + "loss": 1.5954, + "step": 5141 + }, + { + "epoch": 0.82, + "learning_rate": 4.7688339462516854e-05, + "loss": 1.4741, + "step": 5142 + }, + { + "epoch": 0.82, + "learning_rate": 4.768725583762094e-05, + "loss": 1.4565, + "step": 5143 + }, + { + "epoch": 0.82, + "learning_rate": 4.768617197111951e-05, + "loss": 1.5407, + "step": 5144 + }, + { + "epoch": 0.82, + "learning_rate": 4.768508786302413e-05, + "loss": 1.5622, + "step": 5145 + }, + { + "epoch": 0.82, + "learning_rate": 4.768400351334633e-05, + "loss": 1.5104, + "step": 5146 + }, + { + "epoch": 0.82, + "learning_rate": 4.768291892209768e-05, + "loss": 1.563, + "step": 5147 + }, + { + "epoch": 0.82, + "learning_rate": 4.76818340892897e-05, + "loss": 1.4795, + "step": 5148 + }, + { + "epoch": 0.82, + "learning_rate": 4.7680749014933956e-05, + "loss": 1.5933, + "step": 5149 + }, + { + "epoch": 0.82, + "learning_rate": 4.767966369904201e-05, + "loss": 1.5352, + "step": 5150 + }, + { + "epoch": 0.82, + "learning_rate": 4.767857814162542e-05, + "loss": 1.5324, + "step": 5151 + }, + { + "epoch": 0.82, + "learning_rate": 4.767749234269574e-05, + "loss": 1.5098, + "step": 5152 + }, + { + "epoch": 0.82, + "learning_rate": 4.767640630226452e-05, + "loss": 1.5231, + "step": 5153 + }, + { + "epoch": 0.82, + "learning_rate": 4.7675320020343355e-05, + "loss": 1.4774, + "step": 5154 + }, + { + "epoch": 0.82, + "learning_rate": 4.767423349694379e-05, + "loss": 1.4792, + "step": 5155 + }, + { + "epoch": 0.82, + "learning_rate": 4.767314673207741e-05, + "loss": 1.4763, + "step": 5156 + }, + { + "epoch": 0.82, + "learning_rate": 4.767205972575578e-05, + "loss": 1.4554, + "step": 5157 + }, + { + "epoch": 0.82, + "learning_rate": 4.7670972477990485e-05, + "loss": 1.4982, + "step": 5158 + }, + { + "epoch": 0.82, + "learning_rate": 4.766988498879309e-05, + "loss": 1.535, + "step": 5159 + }, + { + "epoch": 0.82, + "learning_rate": 4.766879725817518e-05, + "loss": 1.541, + "step": 5160 + }, + { + "epoch": 0.82, + "learning_rate": 4.7667709286148345e-05, + "loss": 1.4969, + "step": 5161 + }, + { + "epoch": 0.82, + "learning_rate": 4.766662107272417e-05, + "loss": 1.4727, + "step": 5162 + }, + { + "epoch": 0.82, + "learning_rate": 4.7665532617914235e-05, + "loss": 1.4404, + "step": 5163 + }, + { + "epoch": 0.82, + "learning_rate": 4.766444392173015e-05, + "loss": 1.5303, + "step": 5164 + }, + { + "epoch": 0.82, + "learning_rate": 4.766335498418349e-05, + "loss": 1.4696, + "step": 5165 + }, + { + "epoch": 0.82, + "learning_rate": 4.7662265805285855e-05, + "loss": 1.5391, + "step": 5166 + }, + { + "epoch": 0.82, + "learning_rate": 4.7661176385048857e-05, + "loss": 1.4808, + "step": 5167 + }, + { + "epoch": 0.82, + "learning_rate": 4.766008672348408e-05, + "loss": 1.4277, + "step": 5168 + }, + { + "epoch": 0.82, + "learning_rate": 4.765899682060314e-05, + "loss": 1.4878, + "step": 5169 + }, + { + "epoch": 0.82, + "learning_rate": 4.765790667641764e-05, + "loss": 1.5145, + "step": 5170 + }, + { + "epoch": 0.82, + "learning_rate": 4.765681629093919e-05, + "loss": 1.5551, + "step": 5171 + }, + { + "epoch": 0.82, + "learning_rate": 4.7655725664179405e-05, + "loss": 1.4198, + "step": 5172 + }, + { + "epoch": 0.82, + "learning_rate": 4.765463479614989e-05, + "loss": 1.4427, + "step": 5173 + }, + { + "epoch": 0.82, + "learning_rate": 4.765354368686228e-05, + "loss": 1.4313, + "step": 5174 + }, + { + "epoch": 0.82, + "learning_rate": 4.7652452336328174e-05, + "loss": 1.5278, + "step": 5175 + }, + { + "epoch": 0.82, + "learning_rate": 4.7651360744559206e-05, + "loss": 1.5399, + "step": 5176 + }, + { + "epoch": 0.82, + "learning_rate": 4.7650268911567e-05, + "loss": 1.4439, + "step": 5177 + }, + { + "epoch": 0.82, + "learning_rate": 4.764917683736318e-05, + "loss": 1.4479, + "step": 5178 + }, + { + "epoch": 0.83, + "learning_rate": 4.7648084521959366e-05, + "loss": 1.5736, + "step": 5179 + }, + { + "epoch": 0.83, + "learning_rate": 4.764699196536721e-05, + "loss": 1.5233, + "step": 5180 + }, + { + "epoch": 0.83, + "learning_rate": 4.764589916759834e-05, + "loss": 1.4616, + "step": 5181 + }, + { + "epoch": 0.83, + "learning_rate": 4.764480612866439e-05, + "loss": 1.4963, + "step": 5182 + }, + { + "epoch": 0.83, + "learning_rate": 4.764371284857701e-05, + "loss": 1.4994, + "step": 5183 + }, + { + "epoch": 0.83, + "learning_rate": 4.764261932734783e-05, + "loss": 1.4785, + "step": 5184 + }, + { + "epoch": 0.83, + "learning_rate": 4.76415255649885e-05, + "loss": 1.5555, + "step": 5185 + }, + { + "epoch": 0.83, + "learning_rate": 4.7640431561510674e-05, + "loss": 1.5211, + "step": 5186 + }, + { + "epoch": 0.83, + "learning_rate": 4.7639337316925994e-05, + "loss": 1.4995, + "step": 5187 + }, + { + "epoch": 0.83, + "learning_rate": 4.763824283124612e-05, + "loss": 1.548, + "step": 5188 + }, + { + "epoch": 0.83, + "learning_rate": 4.7637148104482696e-05, + "loss": 1.5999, + "step": 5189 + }, + { + "epoch": 0.83, + "learning_rate": 4.763605313664739e-05, + "loss": 1.5107, + "step": 5190 + }, + { + "epoch": 0.83, + "learning_rate": 4.763495792775186e-05, + "loss": 1.472, + "step": 5191 + }, + { + "epoch": 0.83, + "learning_rate": 4.763386247780777e-05, + "loss": 1.4907, + "step": 5192 + }, + { + "epoch": 0.83, + "learning_rate": 4.763276678682679e-05, + "loss": 1.5804, + "step": 5193 + }, + { + "epoch": 0.83, + "learning_rate": 4.7631670854820586e-05, + "loss": 1.5173, + "step": 5194 + }, + { + "epoch": 0.83, + "learning_rate": 4.763057468180082e-05, + "loss": 1.4915, + "step": 5195 + }, + { + "epoch": 0.83, + "learning_rate": 4.762947826777918e-05, + "loss": 1.4855, + "step": 5196 + }, + { + "epoch": 0.83, + "learning_rate": 4.762838161276734e-05, + "loss": 1.5026, + "step": 5197 + }, + { + "epoch": 0.83, + "learning_rate": 4.762728471677696e-05, + "loss": 1.4476, + "step": 5198 + }, + { + "epoch": 0.83, + "learning_rate": 4.7626187579819734e-05, + "loss": 1.5091, + "step": 5199 + }, + { + "epoch": 0.83, + "learning_rate": 4.7625090201907364e-05, + "loss": 1.4447, + "step": 5200 + }, + { + "epoch": 0.83, + "learning_rate": 4.7623992583051505e-05, + "loss": 1.472, + "step": 5201 + }, + { + "epoch": 0.83, + "learning_rate": 4.762289472326387e-05, + "loss": 1.5181, + "step": 5202 + }, + { + "epoch": 0.83, + "learning_rate": 4.762179662255614e-05, + "loss": 1.5369, + "step": 5203 + }, + { + "epoch": 0.83, + "learning_rate": 4.762069828094e-05, + "loss": 1.4639, + "step": 5204 + }, + { + "epoch": 0.83, + "learning_rate": 4.761959969842717e-05, + "loss": 1.4336, + "step": 5205 + }, + { + "epoch": 0.83, + "learning_rate": 4.761850087502932e-05, + "loss": 1.4409, + "step": 5206 + }, + { + "epoch": 0.83, + "learning_rate": 4.761740181075818e-05, + "loss": 1.5527, + "step": 5207 + }, + { + "epoch": 0.83, + "learning_rate": 4.7616302505625434e-05, + "loss": 1.5628, + "step": 5208 + }, + { + "epoch": 0.83, + "learning_rate": 4.76152029596428e-05, + "loss": 1.5405, + "step": 5209 + }, + { + "epoch": 0.83, + "learning_rate": 4.7614103172821986e-05, + "loss": 1.4653, + "step": 5210 + }, + { + "epoch": 0.83, + "learning_rate": 4.7613003145174706e-05, + "loss": 1.4396, + "step": 5211 + }, + { + "epoch": 0.83, + "learning_rate": 4.7611902876712666e-05, + "loss": 1.4626, + "step": 5212 + }, + { + "epoch": 0.83, + "learning_rate": 4.7610802367447596e-05, + "loss": 1.4779, + "step": 5213 + }, + { + "epoch": 0.83, + "learning_rate": 4.76097016173912e-05, + "loss": 1.4857, + "step": 5214 + }, + { + "epoch": 0.83, + "learning_rate": 4.760860062655521e-05, + "loss": 1.514, + "step": 5215 + }, + { + "epoch": 0.83, + "learning_rate": 4.760749939495135e-05, + "loss": 1.5205, + "step": 5216 + }, + { + "epoch": 0.83, + "learning_rate": 4.760639792259134e-05, + "loss": 1.4971, + "step": 5217 + }, + { + "epoch": 0.83, + "learning_rate": 4.760529620948693e-05, + "loss": 1.4147, + "step": 5218 + }, + { + "epoch": 0.83, + "learning_rate": 4.760419425564983e-05, + "loss": 1.4771, + "step": 5219 + }, + { + "epoch": 0.83, + "learning_rate": 4.760309206109179e-05, + "loss": 1.5142, + "step": 5220 + }, + { + "epoch": 0.83, + "learning_rate": 4.760198962582454e-05, + "loss": 1.5038, + "step": 5221 + }, + { + "epoch": 0.83, + "learning_rate": 4.760088694985982e-05, + "loss": 1.4596, + "step": 5222 + }, + { + "epoch": 0.83, + "learning_rate": 4.759978403320937e-05, + "loss": 1.506, + "step": 5223 + }, + { + "epoch": 0.83, + "learning_rate": 4.759868087588496e-05, + "loss": 1.5073, + "step": 5224 + }, + { + "epoch": 0.83, + "learning_rate": 4.75975774778983e-05, + "loss": 1.4172, + "step": 5225 + }, + { + "epoch": 0.83, + "learning_rate": 4.7596473839261166e-05, + "loss": 1.4816, + "step": 5226 + }, + { + "epoch": 0.83, + "learning_rate": 4.75953699599853e-05, + "loss": 1.498, + "step": 5227 + }, + { + "epoch": 0.83, + "learning_rate": 4.759426584008247e-05, + "loss": 1.527, + "step": 5228 + }, + { + "epoch": 0.83, + "learning_rate": 4.759316147956442e-05, + "loss": 1.5477, + "step": 5229 + }, + { + "epoch": 0.83, + "learning_rate": 4.759205687844293e-05, + "loss": 1.4606, + "step": 5230 + }, + { + "epoch": 0.83, + "learning_rate": 4.759095203672973e-05, + "loss": 1.5254, + "step": 5231 + }, + { + "epoch": 0.83, + "learning_rate": 4.758984695443662e-05, + "loss": 1.5124, + "step": 5232 + }, + { + "epoch": 0.83, + "learning_rate": 4.7588741631575344e-05, + "loss": 1.4823, + "step": 5233 + }, + { + "epoch": 0.83, + "learning_rate": 4.7587636068157695e-05, + "loss": 1.4741, + "step": 5234 + }, + { + "epoch": 0.83, + "learning_rate": 4.758653026419543e-05, + "loss": 1.485, + "step": 5235 + }, + { + "epoch": 0.83, + "learning_rate": 4.758542421970033e-05, + "loss": 1.5576, + "step": 5236 + }, + { + "epoch": 0.83, + "learning_rate": 4.758431793468417e-05, + "loss": 1.4681, + "step": 5237 + }, + { + "epoch": 0.83, + "learning_rate": 4.758321140915874e-05, + "loss": 1.4168, + "step": 5238 + }, + { + "epoch": 0.83, + "learning_rate": 4.758210464313582e-05, + "loss": 1.5187, + "step": 5239 + }, + { + "epoch": 0.83, + "learning_rate": 4.758099763662719e-05, + "loss": 1.5111, + "step": 5240 + }, + { + "epoch": 0.83, + "learning_rate": 4.757989038964466e-05, + "loss": 1.5524, + "step": 5241 + }, + { + "epoch": 0.84, + "learning_rate": 4.757878290219999e-05, + "loss": 1.4499, + "step": 5242 + }, + { + "epoch": 0.84, + "learning_rate": 4.757767517430499e-05, + "loss": 1.4621, + "step": 5243 + }, + { + "epoch": 0.84, + "learning_rate": 4.757656720597146e-05, + "loss": 1.5077, + "step": 5244 + }, + { + "epoch": 0.84, + "learning_rate": 4.7575458997211195e-05, + "loss": 1.5398, + "step": 5245 + }, + { + "epoch": 0.84, + "learning_rate": 4.7574350548036e-05, + "loss": 1.5522, + "step": 5246 + }, + { + "epoch": 0.84, + "learning_rate": 4.757324185845768e-05, + "loss": 1.4466, + "step": 5247 + }, + { + "epoch": 0.84, + "learning_rate": 4.757213292848803e-05, + "loss": 1.5138, + "step": 5248 + }, + { + "epoch": 0.84, + "learning_rate": 4.757102375813888e-05, + "loss": 1.5433, + "step": 5249 + }, + { + "epoch": 0.84, + "learning_rate": 4.756991434742202e-05, + "loss": 1.5448, + "step": 5250 + }, + { + "epoch": 0.84, + "learning_rate": 4.756880469634928e-05, + "loss": 1.5148, + "step": 5251 + }, + { + "epoch": 0.84, + "learning_rate": 4.756769480493247e-05, + "loss": 1.4878, + "step": 5252 + }, + { + "epoch": 0.84, + "learning_rate": 4.7566584673183415e-05, + "loss": 1.5109, + "step": 5253 + }, + { + "epoch": 0.84, + "learning_rate": 4.7565474301113924e-05, + "loss": 1.5466, + "step": 5254 + }, + { + "epoch": 0.84, + "learning_rate": 4.756436368873584e-05, + "loss": 1.5801, + "step": 5255 + }, + { + "epoch": 0.84, + "learning_rate": 4.756325283606098e-05, + "loss": 1.4432, + "step": 5256 + }, + { + "epoch": 0.84, + "learning_rate": 4.756214174310117e-05, + "loss": 1.497, + "step": 5257 + }, + { + "epoch": 0.84, + "learning_rate": 4.7561030409868255e-05, + "loss": 1.4631, + "step": 5258 + }, + { + "epoch": 0.84, + "learning_rate": 4.755991883637406e-05, + "loss": 1.5114, + "step": 5259 + }, + { + "epoch": 0.84, + "learning_rate": 4.755880702263043e-05, + "loss": 1.4867, + "step": 5260 + }, + { + "epoch": 0.84, + "learning_rate": 4.75576949686492e-05, + "loss": 1.5064, + "step": 5261 + }, + { + "epoch": 0.84, + "learning_rate": 4.7556582674442214e-05, + "loss": 1.4652, + "step": 5262 + }, + { + "epoch": 0.84, + "learning_rate": 4.7555470140021316e-05, + "loss": 1.5778, + "step": 5263 + }, + { + "epoch": 0.84, + "learning_rate": 4.755435736539835e-05, + "loss": 1.5508, + "step": 5264 + }, + { + "epoch": 0.84, + "learning_rate": 4.755324435058518e-05, + "loss": 1.5155, + "step": 5265 + }, + { + "epoch": 0.84, + "learning_rate": 4.7552131095593645e-05, + "loss": 1.5083, + "step": 5266 + }, + { + "epoch": 0.84, + "learning_rate": 4.75510176004356e-05, + "loss": 1.4601, + "step": 5267 + }, + { + "epoch": 0.84, + "learning_rate": 4.754990386512291e-05, + "loss": 1.5335, + "step": 5268 + }, + { + "epoch": 0.84, + "learning_rate": 4.754878988966745e-05, + "loss": 1.4243, + "step": 5269 + }, + { + "epoch": 0.84, + "learning_rate": 4.754767567408105e-05, + "loss": 1.5158, + "step": 5270 + }, + { + "epoch": 0.84, + "learning_rate": 4.75465612183756e-05, + "loss": 1.4814, + "step": 5271 + }, + { + "epoch": 0.84, + "learning_rate": 4.754544652256296e-05, + "loss": 1.4596, + "step": 5272 + }, + { + "epoch": 0.84, + "learning_rate": 4.7544331586655e-05, + "loss": 1.5464, + "step": 5273 + }, + { + "epoch": 0.84, + "learning_rate": 4.75432164106636e-05, + "loss": 1.5163, + "step": 5274 + }, + { + "epoch": 0.84, + "learning_rate": 4.7542100994600635e-05, + "loss": 1.4688, + "step": 5275 + }, + { + "epoch": 0.84, + "learning_rate": 4.7540985338477974e-05, + "loss": 1.5361, + "step": 5276 + }, + { + "epoch": 0.84, + "learning_rate": 4.7539869442307504e-05, + "loss": 1.554, + "step": 5277 + }, + { + "epoch": 0.84, + "learning_rate": 4.753875330610111e-05, + "loss": 1.5915, + "step": 5278 + }, + { + "epoch": 0.84, + "learning_rate": 4.753763692987068e-05, + "loss": 1.5218, + "step": 5279 + }, + { + "epoch": 0.84, + "learning_rate": 4.7536520313628094e-05, + "loss": 1.5428, + "step": 5280 + }, + { + "epoch": 0.84, + "learning_rate": 4.753540345738525e-05, + "loss": 1.5413, + "step": 5281 + }, + { + "epoch": 0.84, + "learning_rate": 4.753428636115404e-05, + "loss": 1.486, + "step": 5282 + }, + { + "epoch": 0.84, + "learning_rate": 4.753316902494637e-05, + "loss": 1.4676, + "step": 5283 + }, + { + "epoch": 0.84, + "learning_rate": 4.753205144877412e-05, + "loss": 1.4639, + "step": 5284 + }, + { + "epoch": 0.84, + "learning_rate": 4.75309336326492e-05, + "loss": 1.4531, + "step": 5285 + }, + { + "epoch": 0.84, + "learning_rate": 4.752981557658353e-05, + "loss": 1.4875, + "step": 5286 + }, + { + "epoch": 0.84, + "learning_rate": 4.7528697280588984e-05, + "loss": 1.6038, + "step": 5287 + }, + { + "epoch": 0.84, + "learning_rate": 4.7527578744677495e-05, + "loss": 1.4707, + "step": 5288 + }, + { + "epoch": 0.84, + "learning_rate": 4.752645996886098e-05, + "loss": 1.5656, + "step": 5289 + }, + { + "epoch": 0.84, + "learning_rate": 4.752534095315133e-05, + "loss": 1.4635, + "step": 5290 + }, + { + "epoch": 0.84, + "learning_rate": 4.752422169756048e-05, + "loss": 1.512, + "step": 5291 + }, + { + "epoch": 0.84, + "learning_rate": 4.7523102202100345e-05, + "loss": 1.5162, + "step": 5292 + }, + { + "epoch": 0.84, + "learning_rate": 4.752198246678284e-05, + "loss": 1.4663, + "step": 5293 + }, + { + "epoch": 0.84, + "learning_rate": 4.75208624916199e-05, + "loss": 1.487, + "step": 5294 + }, + { + "epoch": 0.84, + "learning_rate": 4.7519742276623444e-05, + "loss": 1.5016, + "step": 5295 + }, + { + "epoch": 0.84, + "learning_rate": 4.7518621821805406e-05, + "loss": 1.4997, + "step": 5296 + }, + { + "epoch": 0.84, + "learning_rate": 4.7517501127177724e-05, + "loss": 1.5627, + "step": 5297 + }, + { + "epoch": 0.84, + "learning_rate": 4.751638019275232e-05, + "loss": 1.4172, + "step": 5298 + }, + { + "epoch": 0.84, + "learning_rate": 4.7515259018541135e-05, + "loss": 1.5168, + "step": 5299 + }, + { + "epoch": 0.84, + "learning_rate": 4.7514137604556106e-05, + "loss": 1.4924, + "step": 5300 + }, + { + "epoch": 0.84, + "learning_rate": 4.751301595080919e-05, + "loss": 1.4758, + "step": 5301 + }, + { + "epoch": 0.84, + "learning_rate": 4.751189405731232e-05, + "loss": 1.534, + "step": 5302 + }, + { + "epoch": 0.84, + "learning_rate": 4.7510771924077444e-05, + "loss": 1.4282, + "step": 5303 + }, + { + "epoch": 0.84, + "learning_rate": 4.7509649551116516e-05, + "loss": 1.4911, + "step": 5304 + }, + { + "epoch": 0.85, + "learning_rate": 4.7508526938441475e-05, + "loss": 1.4183, + "step": 5305 + }, + { + "epoch": 0.85, + "learning_rate": 4.7507404086064297e-05, + "loss": 1.4894, + "step": 5306 + }, + { + "epoch": 0.85, + "learning_rate": 4.750628099399693e-05, + "loss": 1.4505, + "step": 5307 + }, + { + "epoch": 0.85, + "learning_rate": 4.7505157662251344e-05, + "loss": 1.5146, + "step": 5308 + }, + { + "epoch": 0.85, + "learning_rate": 4.750403409083948e-05, + "loss": 1.5837, + "step": 5309 + }, + { + "epoch": 0.85, + "learning_rate": 4.750291027977332e-05, + "loss": 1.5829, + "step": 5310 + }, + { + "epoch": 0.85, + "learning_rate": 4.7501786229064816e-05, + "loss": 1.5073, + "step": 5311 + }, + { + "epoch": 0.85, + "learning_rate": 4.750066193872596e-05, + "loss": 1.4985, + "step": 5312 + }, + { + "epoch": 0.85, + "learning_rate": 4.749953740876871e-05, + "loss": 1.4896, + "step": 5313 + }, + { + "epoch": 0.85, + "learning_rate": 4.749841263920505e-05, + "loss": 1.45, + "step": 5314 + }, + { + "epoch": 0.85, + "learning_rate": 4.749728763004696e-05, + "loss": 1.5034, + "step": 5315 + }, + { + "epoch": 0.85, + "learning_rate": 4.7496162381306407e-05, + "loss": 1.599, + "step": 5316 + }, + { + "epoch": 0.85, + "learning_rate": 4.7495036892995385e-05, + "loss": 1.4976, + "step": 5317 + }, + { + "epoch": 0.85, + "learning_rate": 4.7493911165125874e-05, + "loss": 1.4443, + "step": 5318 + }, + { + "epoch": 0.85, + "learning_rate": 4.749278519770987e-05, + "loss": 1.4989, + "step": 5319 + }, + { + "epoch": 0.85, + "learning_rate": 4.749165899075936e-05, + "loss": 1.458, + "step": 5320 + }, + { + "epoch": 0.85, + "learning_rate": 4.749053254428634e-05, + "loss": 1.5238, + "step": 5321 + }, + { + "epoch": 0.85, + "learning_rate": 4.7489405858302795e-05, + "loss": 1.5249, + "step": 5322 + }, + { + "epoch": 0.85, + "learning_rate": 4.7488278932820726e-05, + "loss": 1.4684, + "step": 5323 + }, + { + "epoch": 0.85, + "learning_rate": 4.748715176785215e-05, + "loss": 1.5501, + "step": 5324 + }, + { + "epoch": 0.85, + "learning_rate": 4.7486024363409055e-05, + "loss": 1.4712, + "step": 5325 + }, + { + "epoch": 0.85, + "learning_rate": 4.7484896719503456e-05, + "loss": 1.5176, + "step": 5326 + }, + { + "epoch": 0.85, + "learning_rate": 4.7483768836147354e-05, + "loss": 1.568, + "step": 5327 + }, + { + "epoch": 0.85, + "learning_rate": 4.7482640713352765e-05, + "loss": 1.4707, + "step": 5328 + }, + { + "epoch": 0.85, + "learning_rate": 4.748151235113171e-05, + "loss": 1.5329, + "step": 5329 + }, + { + "epoch": 0.85, + "learning_rate": 4.748038374949618e-05, + "loss": 1.4774, + "step": 5330 + }, + { + "epoch": 0.85, + "learning_rate": 4.7479254908458224e-05, + "loss": 1.5008, + "step": 5331 + }, + { + "epoch": 0.85, + "learning_rate": 4.747812582802985e-05, + "loss": 1.5392, + "step": 5332 + }, + { + "epoch": 0.85, + "learning_rate": 4.7476996508223084e-05, + "loss": 1.472, + "step": 5333 + }, + { + "epoch": 0.85, + "learning_rate": 4.7475866949049945e-05, + "loss": 1.5679, + "step": 5334 + }, + { + "epoch": 0.85, + "learning_rate": 4.7474737150522474e-05, + "loss": 1.4891, + "step": 5335 + }, + { + "epoch": 0.85, + "learning_rate": 4.74736071126527e-05, + "loss": 1.5177, + "step": 5336 + }, + { + "epoch": 0.85, + "learning_rate": 4.747247683545264e-05, + "loss": 1.5252, + "step": 5337 + }, + { + "epoch": 0.85, + "learning_rate": 4.747134631893436e-05, + "loss": 1.4977, + "step": 5338 + }, + { + "epoch": 0.85, + "learning_rate": 4.7470215563109874e-05, + "loss": 1.4819, + "step": 5339 + }, + { + "epoch": 0.85, + "learning_rate": 4.7469084567991237e-05, + "loss": 1.4964, + "step": 5340 + }, + { + "epoch": 0.85, + "learning_rate": 4.74679533335905e-05, + "loss": 1.459, + "step": 5341 + }, + { + "epoch": 0.85, + "learning_rate": 4.746682185991969e-05, + "loss": 1.4889, + "step": 5342 + }, + { + "epoch": 0.85, + "learning_rate": 4.7465690146990873e-05, + "loss": 1.4653, + "step": 5343 + }, + { + "epoch": 0.85, + "learning_rate": 4.746455819481609e-05, + "loss": 1.4424, + "step": 5344 + }, + { + "epoch": 0.85, + "learning_rate": 4.7463426003407405e-05, + "loss": 1.4964, + "step": 5345 + }, + { + "epoch": 0.85, + "learning_rate": 4.746229357277686e-05, + "loss": 1.5513, + "step": 5346 + }, + { + "epoch": 0.85, + "learning_rate": 4.746116090293654e-05, + "loss": 1.5182, + "step": 5347 + }, + { + "epoch": 0.85, + "learning_rate": 4.746002799389849e-05, + "loss": 1.5046, + "step": 5348 + }, + { + "epoch": 0.85, + "learning_rate": 4.745889484567477e-05, + "loss": 1.5033, + "step": 5349 + }, + { + "epoch": 0.85, + "learning_rate": 4.7457761458277464e-05, + "loss": 1.4979, + "step": 5350 + }, + { + "epoch": 0.85, + "learning_rate": 4.745662783171863e-05, + "loss": 1.5171, + "step": 5351 + }, + { + "epoch": 0.85, + "learning_rate": 4.745549396601034e-05, + "loss": 1.4933, + "step": 5352 + }, + { + "epoch": 0.85, + "learning_rate": 4.7454359861164674e-05, + "loss": 1.5815, + "step": 5353 + }, + { + "epoch": 0.85, + "learning_rate": 4.745322551719371e-05, + "loss": 1.4702, + "step": 5354 + }, + { + "epoch": 0.85, + "learning_rate": 4.7452090934109525e-05, + "loss": 1.5436, + "step": 5355 + }, + { + "epoch": 0.85, + "learning_rate": 4.7450956111924206e-05, + "loss": 1.5658, + "step": 5356 + }, + { + "epoch": 0.85, + "learning_rate": 4.7449821050649824e-05, + "loss": 1.5272, + "step": 5357 + }, + { + "epoch": 0.85, + "learning_rate": 4.744868575029848e-05, + "loss": 1.5226, + "step": 5358 + }, + { + "epoch": 0.85, + "learning_rate": 4.7447550210882275e-05, + "loss": 1.4652, + "step": 5359 + }, + { + "epoch": 0.85, + "learning_rate": 4.744641443241328e-05, + "loss": 1.5107, + "step": 5360 + }, + { + "epoch": 0.85, + "learning_rate": 4.7445278414903596e-05, + "loss": 1.4613, + "step": 5361 + }, + { + "epoch": 0.85, + "learning_rate": 4.744414215836532e-05, + "loss": 1.4671, + "step": 5362 + }, + { + "epoch": 0.85, + "learning_rate": 4.7443005662810566e-05, + "loss": 1.4935, + "step": 5363 + }, + { + "epoch": 0.85, + "learning_rate": 4.744186892825142e-05, + "loss": 1.4902, + "step": 5364 + }, + { + "epoch": 0.85, + "learning_rate": 4.74407319547e-05, + "loss": 1.4881, + "step": 5365 + }, + { + "epoch": 0.85, + "learning_rate": 4.7439594742168414e-05, + "loss": 1.4961, + "step": 5366 + }, + { + "epoch": 0.86, + "learning_rate": 4.743845729066876e-05, + "loss": 1.5257, + "step": 5367 + }, + { + "epoch": 0.86, + "learning_rate": 4.7437319600213156e-05, + "loss": 1.5122, + "step": 5368 + }, + { + "epoch": 0.86, + "learning_rate": 4.7436181670813726e-05, + "loss": 1.4823, + "step": 5369 + }, + { + "epoch": 0.86, + "learning_rate": 4.743504350248258e-05, + "loss": 1.4684, + "step": 5370 + }, + { + "epoch": 0.86, + "learning_rate": 4.743390509523183e-05, + "loss": 1.5444, + "step": 5371 + }, + { + "epoch": 0.86, + "learning_rate": 4.743276644907363e-05, + "loss": 1.5, + "step": 5372 + }, + { + "epoch": 0.86, + "learning_rate": 4.743162756402008e-05, + "loss": 1.5008, + "step": 5373 + }, + { + "epoch": 0.86, + "learning_rate": 4.7430488440083306e-05, + "loss": 1.6105, + "step": 5374 + }, + { + "epoch": 0.86, + "learning_rate": 4.742934907727546e-05, + "loss": 1.4028, + "step": 5375 + }, + { + "epoch": 0.86, + "learning_rate": 4.742820947560866e-05, + "loss": 1.5469, + "step": 5376 + }, + { + "epoch": 0.86, + "learning_rate": 4.7427069635095045e-05, + "loss": 1.4408, + "step": 5377 + }, + { + "epoch": 0.86, + "learning_rate": 4.742592955574675e-05, + "loss": 1.5062, + "step": 5378 + }, + { + "epoch": 0.86, + "learning_rate": 4.742478923757593e-05, + "loss": 1.5877, + "step": 5379 + }, + { + "epoch": 0.86, + "learning_rate": 4.742364868059471e-05, + "loss": 1.5422, + "step": 5380 + }, + { + "epoch": 0.86, + "learning_rate": 4.7422507884815246e-05, + "loss": 1.5524, + "step": 5381 + }, + { + "epoch": 0.86, + "learning_rate": 4.7421366850249694e-05, + "loss": 1.5431, + "step": 5382 + }, + { + "epoch": 0.86, + "learning_rate": 4.742022557691019e-05, + "loss": 1.4577, + "step": 5383 + }, + { + "epoch": 0.86, + "learning_rate": 4.7419084064808894e-05, + "loss": 1.4806, + "step": 5384 + }, + { + "epoch": 0.86, + "learning_rate": 4.7417942313957966e-05, + "loss": 1.5623, + "step": 5385 + }, + { + "epoch": 0.86, + "learning_rate": 4.741680032436956e-05, + "loss": 1.5392, + "step": 5386 + }, + { + "epoch": 0.86, + "learning_rate": 4.7415658096055846e-05, + "loss": 1.4899, + "step": 5387 + }, + { + "epoch": 0.86, + "learning_rate": 4.741451562902898e-05, + "loss": 1.4598, + "step": 5388 + }, + { + "epoch": 0.86, + "learning_rate": 4.741337292330113e-05, + "loss": 1.5557, + "step": 5389 + }, + { + "epoch": 0.86, + "learning_rate": 4.7412229978884473e-05, + "loss": 1.5327, + "step": 5390 + }, + { + "epoch": 0.86, + "learning_rate": 4.741108679579116e-05, + "loss": 1.5739, + "step": 5391 + }, + { + "epoch": 0.86, + "learning_rate": 4.7409943374033384e-05, + "loss": 1.5736, + "step": 5392 + }, + { + "epoch": 0.86, + "learning_rate": 4.7408799713623316e-05, + "loss": 1.5238, + "step": 5393 + }, + { + "epoch": 0.86, + "learning_rate": 4.740765581457314e-05, + "loss": 1.4644, + "step": 5394 + }, + { + "epoch": 0.86, + "learning_rate": 4.7406511676895034e-05, + "loss": 1.4967, + "step": 5395 + }, + { + "epoch": 0.86, + "learning_rate": 4.740536730060118e-05, + "loss": 1.4595, + "step": 5396 + }, + { + "epoch": 0.86, + "learning_rate": 4.740422268570376e-05, + "loss": 1.4373, + "step": 5397 + }, + { + "epoch": 0.86, + "learning_rate": 4.7403077832214984e-05, + "loss": 1.536, + "step": 5398 + }, + { + "epoch": 0.86, + "learning_rate": 4.7401932740147024e-05, + "loss": 1.4774, + "step": 5399 + }, + { + "epoch": 0.86, + "learning_rate": 4.740078740951207e-05, + "loss": 1.4681, + "step": 5400 + }, + { + "epoch": 0.86, + "learning_rate": 4.7399641840322344e-05, + "loss": 1.4948, + "step": 5401 + }, + { + "epoch": 0.86, + "learning_rate": 4.7398496032590026e-05, + "loss": 1.5607, + "step": 5402 + }, + { + "epoch": 0.86, + "learning_rate": 4.739734998632732e-05, + "loss": 1.5129, + "step": 5403 + }, + { + "epoch": 0.86, + "learning_rate": 4.7396203701546446e-05, + "loss": 1.5534, + "step": 5404 + }, + { + "epoch": 0.86, + "learning_rate": 4.7395057178259593e-05, + "loss": 1.4785, + "step": 5405 + }, + { + "epoch": 0.86, + "learning_rate": 4.739391041647898e-05, + "loss": 1.4987, + "step": 5406 + }, + { + "epoch": 0.86, + "learning_rate": 4.7392763416216815e-05, + "loss": 1.5052, + "step": 5407 + }, + { + "epoch": 0.86, + "learning_rate": 4.739161617748531e-05, + "loss": 1.4979, + "step": 5408 + }, + { + "epoch": 0.86, + "learning_rate": 4.7390468700296685e-05, + "loss": 1.4072, + "step": 5409 + }, + { + "epoch": 0.86, + "learning_rate": 4.738932098466317e-05, + "loss": 1.498, + "step": 5410 + }, + { + "epoch": 0.86, + "learning_rate": 4.738817303059698e-05, + "loss": 1.481, + "step": 5411 + }, + { + "epoch": 0.86, + "learning_rate": 4.738702483811034e-05, + "loss": 1.4951, + "step": 5412 + }, + { + "epoch": 0.86, + "learning_rate": 4.738587640721547e-05, + "loss": 1.5011, + "step": 5413 + }, + { + "epoch": 0.86, + "learning_rate": 4.738472773792461e-05, + "loss": 1.5412, + "step": 5414 + }, + { + "epoch": 0.86, + "learning_rate": 4.7383578830249983e-05, + "loss": 1.502, + "step": 5415 + }, + { + "epoch": 0.86, + "learning_rate": 4.738242968420384e-05, + "loss": 1.5111, + "step": 5416 + }, + { + "epoch": 0.86, + "learning_rate": 4.738128029979841e-05, + "loss": 1.4668, + "step": 5417 + }, + { + "epoch": 0.86, + "learning_rate": 4.738013067704593e-05, + "loss": 1.5226, + "step": 5418 + }, + { + "epoch": 0.86, + "learning_rate": 4.737898081595864e-05, + "loss": 1.4858, + "step": 5419 + }, + { + "epoch": 0.86, + "learning_rate": 4.7377830716548803e-05, + "loss": 1.5013, + "step": 5420 + }, + { + "epoch": 0.86, + "learning_rate": 4.737668037882865e-05, + "loss": 1.4819, + "step": 5421 + }, + { + "epoch": 0.86, + "learning_rate": 4.7375529802810434e-05, + "loss": 1.5423, + "step": 5422 + }, + { + "epoch": 0.86, + "learning_rate": 4.737437898850641e-05, + "loss": 1.5174, + "step": 5423 + }, + { + "epoch": 0.86, + "learning_rate": 4.7373227935928834e-05, + "loss": 1.5487, + "step": 5424 + }, + { + "epoch": 0.86, + "learning_rate": 4.737207664508997e-05, + "loss": 1.5145, + "step": 5425 + }, + { + "epoch": 0.86, + "learning_rate": 4.737092511600206e-05, + "loss": 1.5021, + "step": 5426 + }, + { + "epoch": 0.86, + "learning_rate": 4.736977334867739e-05, + "loss": 1.5555, + "step": 5427 + }, + { + "epoch": 0.86, + "learning_rate": 4.7368621343128205e-05, + "loss": 1.5148, + "step": 5428 + }, + { + "epoch": 0.86, + "learning_rate": 4.736746909936679e-05, + "loss": 1.4787, + "step": 5429 + }, + { + "epoch": 0.87, + "learning_rate": 4.73663166174054e-05, + "loss": 1.5246, + "step": 5430 + }, + { + "epoch": 0.87, + "learning_rate": 4.7365163897256326e-05, + "loss": 1.4919, + "step": 5431 + }, + { + "epoch": 0.87, + "learning_rate": 4.736401093893184e-05, + "loss": 1.5132, + "step": 5432 + }, + { + "epoch": 0.87, + "learning_rate": 4.736285774244421e-05, + "loss": 1.5838, + "step": 5433 + }, + { + "epoch": 0.87, + "learning_rate": 4.736170430780572e-05, + "loss": 1.5187, + "step": 5434 + }, + { + "epoch": 0.87, + "learning_rate": 4.736055063502865e-05, + "loss": 1.4911, + "step": 5435 + }, + { + "epoch": 0.87, + "learning_rate": 4.735939672412529e-05, + "loss": 1.4276, + "step": 5436 + }, + { + "epoch": 0.87, + "learning_rate": 4.735824257510794e-05, + "loss": 1.4948, + "step": 5437 + }, + { + "epoch": 0.87, + "learning_rate": 4.7357088187988874e-05, + "loss": 1.512, + "step": 5438 + }, + { + "epoch": 0.87, + "learning_rate": 4.7355933562780395e-05, + "loss": 1.5011, + "step": 5439 + }, + { + "epoch": 0.87, + "learning_rate": 4.735477869949479e-05, + "loss": 1.543, + "step": 5440 + }, + { + "epoch": 0.87, + "learning_rate": 4.7353623598144374e-05, + "loss": 1.4691, + "step": 5441 + }, + { + "epoch": 0.87, + "learning_rate": 4.735246825874144e-05, + "loss": 1.4747, + "step": 5442 + }, + { + "epoch": 0.87, + "learning_rate": 4.7351312681298274e-05, + "loss": 1.5589, + "step": 5443 + }, + { + "epoch": 0.87, + "learning_rate": 4.735015686582721e-05, + "loss": 1.4772, + "step": 5444 + }, + { + "epoch": 0.87, + "learning_rate": 4.734900081234054e-05, + "loss": 1.5575, + "step": 5445 + }, + { + "epoch": 0.87, + "learning_rate": 4.734784452085058e-05, + "loss": 1.4792, + "step": 5446 + }, + { + "epoch": 0.87, + "learning_rate": 4.734668799136965e-05, + "loss": 1.4839, + "step": 5447 + }, + { + "epoch": 0.87, + "learning_rate": 4.734553122391005e-05, + "loss": 1.4744, + "step": 5448 + }, + { + "epoch": 0.87, + "learning_rate": 4.734437421848411e-05, + "loss": 1.5158, + "step": 5449 + }, + { + "epoch": 0.87, + "learning_rate": 4.7343216975104154e-05, + "loss": 1.4769, + "step": 5450 + }, + { + "epoch": 0.87, + "learning_rate": 4.73420594937825e-05, + "loss": 1.4883, + "step": 5451 + }, + { + "epoch": 0.87, + "learning_rate": 4.734090177453148e-05, + "loss": 1.5692, + "step": 5452 + }, + { + "epoch": 0.87, + "learning_rate": 4.733974381736342e-05, + "loss": 1.5514, + "step": 5453 + }, + { + "epoch": 0.87, + "learning_rate": 4.7338585622290644e-05, + "loss": 1.5233, + "step": 5454 + }, + { + "epoch": 0.87, + "learning_rate": 4.73374271893255e-05, + "loss": 1.5098, + "step": 5455 + }, + { + "epoch": 0.87, + "learning_rate": 4.7336268518480314e-05, + "loss": 1.4561, + "step": 5456 + }, + { + "epoch": 0.87, + "learning_rate": 4.7335109609767436e-05, + "loss": 1.4801, + "step": 5457 + }, + { + "epoch": 0.87, + "learning_rate": 4.7333950463199196e-05, + "loss": 1.4025, + "step": 5458 + }, + { + "epoch": 0.87, + "learning_rate": 4.733279107878794e-05, + "loss": 1.4299, + "step": 5459 + }, + { + "epoch": 0.87, + "learning_rate": 4.733163145654602e-05, + "loss": 1.4891, + "step": 5460 + }, + { + "epoch": 0.87, + "learning_rate": 4.733047159648578e-05, + "loss": 1.5155, + "step": 5461 + }, + { + "epoch": 0.87, + "learning_rate": 4.7329311498619576e-05, + "loss": 1.5461, + "step": 5462 + }, + { + "epoch": 0.87, + "learning_rate": 4.7328151162959765e-05, + "loss": 1.4434, + "step": 5463 + }, + { + "epoch": 0.87, + "learning_rate": 4.732699058951869e-05, + "loss": 1.5514, + "step": 5464 + }, + { + "epoch": 0.87, + "learning_rate": 4.7325829778308725e-05, + "loss": 1.4696, + "step": 5465 + }, + { + "epoch": 0.87, + "learning_rate": 4.7324668729342236e-05, + "loss": 1.5068, + "step": 5466 + }, + { + "epoch": 0.87, + "learning_rate": 4.732350744263157e-05, + "loss": 1.485, + "step": 5467 + }, + { + "epoch": 0.87, + "learning_rate": 4.7322345918189104e-05, + "loss": 1.4501, + "step": 5468 + }, + { + "epoch": 0.87, + "learning_rate": 4.732118415602721e-05, + "loss": 1.449, + "step": 5469 + }, + { + "epoch": 0.87, + "learning_rate": 4.732002215615825e-05, + "loss": 1.5438, + "step": 5470 + }, + { + "epoch": 0.87, + "learning_rate": 4.731885991859461e-05, + "loss": 1.5046, + "step": 5471 + }, + { + "epoch": 0.87, + "learning_rate": 4.731769744334866e-05, + "loss": 1.4644, + "step": 5472 + }, + { + "epoch": 0.87, + "learning_rate": 4.7316534730432783e-05, + "loss": 1.5135, + "step": 5473 + }, + { + "epoch": 0.87, + "learning_rate": 4.731537177985936e-05, + "loss": 1.4715, + "step": 5474 + }, + { + "epoch": 0.87, + "learning_rate": 4.7314208591640774e-05, + "loss": 1.4657, + "step": 5475 + }, + { + "epoch": 0.87, + "learning_rate": 4.731304516578941e-05, + "loss": 1.4777, + "step": 5476 + }, + { + "epoch": 0.87, + "learning_rate": 4.731188150231767e-05, + "loss": 1.5013, + "step": 5477 + }, + { + "epoch": 0.87, + "learning_rate": 4.731071760123793e-05, + "loss": 1.4992, + "step": 5478 + }, + { + "epoch": 0.87, + "learning_rate": 4.7309553462562595e-05, + "loss": 1.502, + "step": 5479 + }, + { + "epoch": 0.87, + "learning_rate": 4.7308389086304064e-05, + "loss": 1.4816, + "step": 5480 + }, + { + "epoch": 0.87, + "learning_rate": 4.7307224472474726e-05, + "loss": 1.4844, + "step": 5481 + }, + { + "epoch": 0.87, + "learning_rate": 4.7306059621086995e-05, + "loss": 1.4733, + "step": 5482 + }, + { + "epoch": 0.87, + "learning_rate": 4.730489453215327e-05, + "loss": 1.5103, + "step": 5483 + }, + { + "epoch": 0.87, + "learning_rate": 4.7303729205685964e-05, + "loss": 1.4645, + "step": 5484 + }, + { + "epoch": 0.87, + "learning_rate": 4.730256364169747e-05, + "loss": 1.5029, + "step": 5485 + }, + { + "epoch": 0.87, + "learning_rate": 4.730139784020023e-05, + "loss": 1.5313, + "step": 5486 + }, + { + "epoch": 0.87, + "learning_rate": 4.730023180120663e-05, + "loss": 1.5485, + "step": 5487 + }, + { + "epoch": 0.87, + "learning_rate": 4.729906552472911e-05, + "loss": 1.5189, + "step": 5488 + }, + { + "epoch": 0.87, + "learning_rate": 4.729789901078008e-05, + "loss": 1.4585, + "step": 5489 + }, + { + "epoch": 0.87, + "learning_rate": 4.7296732259371956e-05, + "loss": 1.4355, + "step": 5490 + }, + { + "epoch": 0.87, + "learning_rate": 4.7295565270517176e-05, + "loss": 1.4766, + "step": 5491 + }, + { + "epoch": 0.87, + "learning_rate": 4.729439804422815e-05, + "loss": 1.5365, + "step": 5492 + }, + { + "epoch": 0.88, + "learning_rate": 4.7293230580517336e-05, + "loss": 1.5581, + "step": 5493 + }, + { + "epoch": 0.88, + "learning_rate": 4.729206287939714e-05, + "loss": 1.4704, + "step": 5494 + }, + { + "epoch": 0.88, + "learning_rate": 4.729089494088001e-05, + "loss": 1.6011, + "step": 5495 + }, + { + "epoch": 0.88, + "learning_rate": 4.728972676497838e-05, + "loss": 1.4785, + "step": 5496 + }, + { + "epoch": 0.88, + "learning_rate": 4.728855835170471e-05, + "loss": 1.429, + "step": 5497 + }, + { + "epoch": 0.88, + "learning_rate": 4.7287389701071404e-05, + "loss": 1.4736, + "step": 5498 + }, + { + "epoch": 0.88, + "learning_rate": 4.728622081309093e-05, + "loss": 1.48, + "step": 5499 + }, + { + "epoch": 0.88, + "learning_rate": 4.728505168777574e-05, + "loss": 1.4665, + "step": 5500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7283882325138286e-05, + "loss": 1.5241, + "step": 5501 + }, + { + "epoch": 0.88, + "learning_rate": 4.728271272519101e-05, + "loss": 1.4808, + "step": 5502 + }, + { + "epoch": 0.88, + "learning_rate": 4.7281542887946366e-05, + "loss": 1.4928, + "step": 5503 + }, + { + "epoch": 0.88, + "learning_rate": 4.7280372813416824e-05, + "loss": 1.4691, + "step": 5504 + }, + { + "epoch": 0.88, + "learning_rate": 4.7279202501614834e-05, + "loss": 1.5493, + "step": 5505 + }, + { + "epoch": 0.88, + "learning_rate": 4.7278031952552865e-05, + "loss": 1.4653, + "step": 5506 + }, + { + "epoch": 0.88, + "learning_rate": 4.7276861166243383e-05, + "loss": 1.4533, + "step": 5507 + }, + { + "epoch": 0.88, + "learning_rate": 4.7275690142698846e-05, + "loss": 1.5094, + "step": 5508 + }, + { + "epoch": 0.88, + "learning_rate": 4.727451888193174e-05, + "loss": 1.5392, + "step": 5509 + }, + { + "epoch": 0.88, + "learning_rate": 4.7273347383954535e-05, + "loss": 1.5457, + "step": 5510 + }, + { + "epoch": 0.88, + "learning_rate": 4.727217564877969e-05, + "loss": 1.5324, + "step": 5511 + }, + { + "epoch": 0.88, + "learning_rate": 4.727100367641971e-05, + "loss": 1.5207, + "step": 5512 + }, + { + "epoch": 0.88, + "learning_rate": 4.726983146688705e-05, + "loss": 1.508, + "step": 5513 + }, + { + "epoch": 0.88, + "learning_rate": 4.726865902019422e-05, + "loss": 1.4964, + "step": 5514 + }, + { + "epoch": 0.88, + "learning_rate": 4.7267486336353676e-05, + "loss": 1.472, + "step": 5515 + }, + { + "epoch": 0.88, + "learning_rate": 4.7266313415377924e-05, + "loss": 1.4953, + "step": 5516 + }, + { + "epoch": 0.88, + "learning_rate": 4.726514025727946e-05, + "loss": 1.5379, + "step": 5517 + }, + { + "epoch": 0.88, + "learning_rate": 4.726396686207077e-05, + "loss": 1.4756, + "step": 5518 + }, + { + "epoch": 0.88, + "learning_rate": 4.7262793229764346e-05, + "loss": 1.5299, + "step": 5519 + }, + { + "epoch": 0.88, + "learning_rate": 4.726161936037269e-05, + "loss": 1.4652, + "step": 5520 + }, + { + "epoch": 0.88, + "learning_rate": 4.7260445253908306e-05, + "loss": 1.5098, + "step": 5521 + }, + { + "epoch": 0.88, + "learning_rate": 4.725927091038369e-05, + "loss": 1.4352, + "step": 5522 + }, + { + "epoch": 0.88, + "learning_rate": 4.725809632981135e-05, + "loss": 1.5435, + "step": 5523 + }, + { + "epoch": 0.88, + "learning_rate": 4.72569215122038e-05, + "loss": 1.5521, + "step": 5524 + }, + { + "epoch": 0.88, + "learning_rate": 4.725574645757355e-05, + "loss": 1.5252, + "step": 5525 + }, + { + "epoch": 0.88, + "learning_rate": 4.725457116593312e-05, + "loss": 1.543, + "step": 5526 + }, + { + "epoch": 0.88, + "learning_rate": 4.7253395637295e-05, + "loss": 1.4893, + "step": 5527 + }, + { + "epoch": 0.88, + "learning_rate": 4.7252219871671747e-05, + "loss": 1.521, + "step": 5528 + }, + { + "epoch": 0.88, + "learning_rate": 4.7251043869075844e-05, + "loss": 1.4629, + "step": 5529 + }, + { + "epoch": 0.88, + "learning_rate": 4.724986762951984e-05, + "loss": 1.5436, + "step": 5530 + }, + { + "epoch": 0.88, + "learning_rate": 4.7248691153016254e-05, + "loss": 1.5091, + "step": 5531 + }, + { + "epoch": 0.88, + "learning_rate": 4.724751443957762e-05, + "loss": 1.5549, + "step": 5532 + }, + { + "epoch": 0.88, + "learning_rate": 4.724633748921646e-05, + "loss": 1.4544, + "step": 5533 + }, + { + "epoch": 0.88, + "learning_rate": 4.7245160301945314e-05, + "loss": 1.48, + "step": 5534 + }, + { + "epoch": 0.88, + "learning_rate": 4.724398287777671e-05, + "loss": 1.4194, + "step": 5535 + }, + { + "epoch": 0.88, + "learning_rate": 4.724280521672319e-05, + "loss": 1.5268, + "step": 5536 + }, + { + "epoch": 0.88, + "learning_rate": 4.724162731879731e-05, + "loss": 1.4512, + "step": 5537 + }, + { + "epoch": 0.88, + "learning_rate": 4.7240449184011596e-05, + "loss": 1.5024, + "step": 5538 + }, + { + "epoch": 0.88, + "learning_rate": 4.72392708123786e-05, + "loss": 1.5208, + "step": 5539 + }, + { + "epoch": 0.88, + "learning_rate": 4.723809220391088e-05, + "loss": 1.5067, + "step": 5540 + }, + { + "epoch": 0.88, + "learning_rate": 4.7236913358620974e-05, + "loss": 1.5227, + "step": 5541 + }, + { + "epoch": 0.88, + "learning_rate": 4.723573427652143e-05, + "loss": 1.5602, + "step": 5542 + }, + { + "epoch": 0.88, + "learning_rate": 4.723455495762483e-05, + "loss": 1.6009, + "step": 5543 + }, + { + "epoch": 0.88, + "learning_rate": 4.7233375401943716e-05, + "loss": 1.4339, + "step": 5544 + }, + { + "epoch": 0.88, + "learning_rate": 4.7232195609490646e-05, + "loss": 1.5226, + "step": 5545 + }, + { + "epoch": 0.88, + "learning_rate": 4.723101558027819e-05, + "loss": 1.5392, + "step": 5546 + }, + { + "epoch": 0.88, + "learning_rate": 4.7229835314318926e-05, + "loss": 1.4753, + "step": 5547 + }, + { + "epoch": 0.88, + "learning_rate": 4.72286548116254e-05, + "loss": 1.5098, + "step": 5548 + }, + { + "epoch": 0.88, + "learning_rate": 4.7227474072210206e-05, + "loss": 1.4824, + "step": 5549 + }, + { + "epoch": 0.88, + "learning_rate": 4.72262930960859e-05, + "loss": 1.5402, + "step": 5550 + }, + { + "epoch": 0.88, + "learning_rate": 4.722511188326507e-05, + "loss": 1.5352, + "step": 5551 + }, + { + "epoch": 0.88, + "learning_rate": 4.722393043376029e-05, + "loss": 1.4958, + "step": 5552 + }, + { + "epoch": 0.88, + "learning_rate": 4.7222748747584146e-05, + "loss": 1.4683, + "step": 5553 + }, + { + "epoch": 0.88, + "learning_rate": 4.7221566824749214e-05, + "loss": 1.4326, + "step": 5554 + }, + { + "epoch": 0.88, + "learning_rate": 4.722038466526809e-05, + "loss": 1.5047, + "step": 5555 + }, + { + "epoch": 0.89, + "learning_rate": 4.721920226915336e-05, + "loss": 1.4222, + "step": 5556 + }, + { + "epoch": 0.89, + "learning_rate": 4.7218019636417615e-05, + "loss": 1.5443, + "step": 5557 + }, + { + "epoch": 0.89, + "learning_rate": 4.721683676707345e-05, + "loss": 1.5519, + "step": 5558 + }, + { + "epoch": 0.89, + "learning_rate": 4.721565366113346e-05, + "loss": 1.4717, + "step": 5559 + }, + { + "epoch": 0.89, + "learning_rate": 4.721447031861025e-05, + "loss": 1.4227, + "step": 5560 + }, + { + "epoch": 0.89, + "learning_rate": 4.721328673951642e-05, + "loss": 1.4948, + "step": 5561 + }, + { + "epoch": 0.89, + "learning_rate": 4.721210292386456e-05, + "loss": 1.5124, + "step": 5562 + }, + { + "epoch": 0.89, + "learning_rate": 4.7210918871667296e-05, + "loss": 1.4611, + "step": 5563 + }, + { + "epoch": 0.89, + "learning_rate": 4.720973458293723e-05, + "loss": 1.4855, + "step": 5564 + }, + { + "epoch": 0.89, + "learning_rate": 4.720855005768697e-05, + "loss": 1.507, + "step": 5565 + }, + { + "epoch": 0.89, + "learning_rate": 4.720736529592914e-05, + "loss": 1.4979, + "step": 5566 + }, + { + "epoch": 0.89, + "learning_rate": 4.720618029767636e-05, + "loss": 1.5155, + "step": 5567 + }, + { + "epoch": 0.89, + "learning_rate": 4.7204995062941235e-05, + "loss": 1.5247, + "step": 5568 + }, + { + "epoch": 0.89, + "learning_rate": 4.720380959173639e-05, + "loss": 1.432, + "step": 5569 + }, + { + "epoch": 0.89, + "learning_rate": 4.720262388407446e-05, + "loss": 1.4192, + "step": 5570 + }, + { + "epoch": 0.89, + "learning_rate": 4.720143793996806e-05, + "loss": 1.4831, + "step": 5571 + }, + { + "epoch": 0.89, + "learning_rate": 4.720025175942983e-05, + "loss": 1.4911, + "step": 5572 + }, + { + "epoch": 0.89, + "learning_rate": 4.7199065342472386e-05, + "loss": 1.4556, + "step": 5573 + }, + { + "epoch": 0.89, + "learning_rate": 4.7197878689108375e-05, + "loss": 1.5163, + "step": 5574 + }, + { + "epoch": 0.89, + "learning_rate": 4.7196691799350444e-05, + "loss": 1.5501, + "step": 5575 + }, + { + "epoch": 0.89, + "learning_rate": 4.719550467321121e-05, + "loss": 1.4552, + "step": 5576 + }, + { + "epoch": 0.89, + "learning_rate": 4.7194317310703336e-05, + "loss": 1.5251, + "step": 5577 + }, + { + "epoch": 0.89, + "learning_rate": 4.719312971183945e-05, + "loss": 1.481, + "step": 5578 + }, + { + "epoch": 0.89, + "learning_rate": 4.719194187663221e-05, + "loss": 1.4349, + "step": 5579 + }, + { + "epoch": 0.89, + "learning_rate": 4.719075380509426e-05, + "loss": 1.5163, + "step": 5580 + }, + { + "epoch": 0.89, + "learning_rate": 4.7189565497238254e-05, + "loss": 1.4468, + "step": 5581 + }, + { + "epoch": 0.89, + "learning_rate": 4.718837695307685e-05, + "loss": 1.478, + "step": 5582 + }, + { + "epoch": 0.89, + "learning_rate": 4.71871881726227e-05, + "loss": 1.4294, + "step": 5583 + }, + { + "epoch": 0.89, + "learning_rate": 4.7185999155888475e-05, + "loss": 1.4715, + "step": 5584 + }, + { + "epoch": 0.89, + "learning_rate": 4.7184809902886815e-05, + "loss": 1.438, + "step": 5585 + }, + { + "epoch": 0.89, + "learning_rate": 4.71836204136304e-05, + "loss": 1.4974, + "step": 5586 + }, + { + "epoch": 0.89, + "learning_rate": 4.7182430688131904e-05, + "loss": 1.4297, + "step": 5587 + }, + { + "epoch": 0.89, + "learning_rate": 4.718124072640398e-05, + "loss": 1.4671, + "step": 5588 + }, + { + "epoch": 0.89, + "learning_rate": 4.718005052845931e-05, + "loss": 1.5449, + "step": 5589 + }, + { + "epoch": 0.89, + "learning_rate": 4.717886009431057e-05, + "loss": 1.563, + "step": 5590 + }, + { + "epoch": 0.89, + "learning_rate": 4.717766942397044e-05, + "loss": 1.5179, + "step": 5591 + }, + { + "epoch": 0.89, + "learning_rate": 4.717647851745159e-05, + "loss": 1.4847, + "step": 5592 + }, + { + "epoch": 0.89, + "learning_rate": 4.7175287374766705e-05, + "loss": 1.4595, + "step": 5593 + }, + { + "epoch": 0.89, + "learning_rate": 4.717409599592848e-05, + "loss": 1.408, + "step": 5594 + }, + { + "epoch": 0.89, + "learning_rate": 4.7172904380949586e-05, + "loss": 1.4079, + "step": 5595 + }, + { + "epoch": 0.89, + "learning_rate": 4.7171712529842724e-05, + "loss": 1.4627, + "step": 5596 + }, + { + "epoch": 0.89, + "learning_rate": 4.717052044262059e-05, + "loss": 1.4348, + "step": 5597 + }, + { + "epoch": 0.89, + "learning_rate": 4.716932811929587e-05, + "loss": 1.5435, + "step": 5598 + }, + { + "epoch": 0.89, + "learning_rate": 4.7168135559881266e-05, + "loss": 1.4307, + "step": 5599 + }, + { + "epoch": 0.89, + "learning_rate": 4.716694276438947e-05, + "loss": 1.5334, + "step": 5600 + }, + { + "epoch": 0.89, + "learning_rate": 4.7165749732833196e-05, + "loss": 1.5036, + "step": 5601 + }, + { + "epoch": 0.89, + "learning_rate": 4.716455646522515e-05, + "loss": 1.509, + "step": 5602 + }, + { + "epoch": 0.89, + "learning_rate": 4.7163362961578025e-05, + "loss": 1.471, + "step": 5603 + }, + { + "epoch": 0.89, + "learning_rate": 4.716216922190454e-05, + "loss": 1.5011, + "step": 5604 + }, + { + "epoch": 0.89, + "learning_rate": 4.716097524621742e-05, + "loss": 1.4154, + "step": 5605 + }, + { + "epoch": 0.89, + "learning_rate": 4.715978103452936e-05, + "loss": 1.4504, + "step": 5606 + }, + { + "epoch": 0.89, + "learning_rate": 4.7158586586853084e-05, + "loss": 1.5098, + "step": 5607 + }, + { + "epoch": 0.89, + "learning_rate": 4.7157391903201314e-05, + "loss": 1.4248, + "step": 5608 + }, + { + "epoch": 0.89, + "learning_rate": 4.715619698358678e-05, + "loss": 1.4159, + "step": 5609 + }, + { + "epoch": 0.89, + "learning_rate": 4.715500182802218e-05, + "loss": 1.4904, + "step": 5610 + }, + { + "epoch": 0.89, + "learning_rate": 4.715380643652028e-05, + "loss": 1.4455, + "step": 5611 + }, + { + "epoch": 0.89, + "learning_rate": 4.7152610809093786e-05, + "loss": 1.5025, + "step": 5612 + }, + { + "epoch": 0.89, + "learning_rate": 4.715141494575543e-05, + "loss": 1.4136, + "step": 5613 + }, + { + "epoch": 0.89, + "learning_rate": 4.715021884651797e-05, + "loss": 1.4673, + "step": 5614 + }, + { + "epoch": 0.89, + "learning_rate": 4.7149022511394115e-05, + "loss": 1.5072, + "step": 5615 + }, + { + "epoch": 0.89, + "learning_rate": 4.7147825940396616e-05, + "loss": 1.4406, + "step": 5616 + }, + { + "epoch": 0.89, + "learning_rate": 4.714662913353822e-05, + "loss": 1.4046, + "step": 5617 + }, + { + "epoch": 0.9, + "learning_rate": 4.7145432090831665e-05, + "loss": 1.5228, + "step": 5618 + }, + { + "epoch": 0.9, + "learning_rate": 4.7144234812289704e-05, + "loss": 1.4618, + "step": 5619 + }, + { + "epoch": 0.9, + "learning_rate": 4.714303729792509e-05, + "loss": 1.3828, + "step": 5620 + }, + { + "epoch": 0.9, + "learning_rate": 4.714183954775057e-05, + "loss": 1.4271, + "step": 5621 + }, + { + "epoch": 0.9, + "learning_rate": 4.71406415617789e-05, + "loss": 1.3853, + "step": 5622 + }, + { + "epoch": 0.9, + "learning_rate": 4.7139443340022846e-05, + "loss": 1.4645, + "step": 5623 + }, + { + "epoch": 0.9, + "learning_rate": 4.7138244882495156e-05, + "loss": 1.4136, + "step": 5624 + }, + { + "epoch": 0.9, + "learning_rate": 4.71370461892086e-05, + "loss": 1.4818, + "step": 5625 + }, + { + "epoch": 0.9, + "learning_rate": 4.713584726017594e-05, + "loss": 1.4393, + "step": 5626 + }, + { + "epoch": 0.9, + "learning_rate": 4.713464809540995e-05, + "loss": 1.4337, + "step": 5627 + }, + { + "epoch": 0.9, + "learning_rate": 4.713344869492339e-05, + "loss": 1.4898, + "step": 5628 + }, + { + "epoch": 0.9, + "learning_rate": 4.7132249058729046e-05, + "loss": 1.5083, + "step": 5629 + }, + { + "epoch": 0.9, + "learning_rate": 4.713104918683968e-05, + "loss": 1.5169, + "step": 5630 + }, + { + "epoch": 0.9, + "learning_rate": 4.712984907926808e-05, + "loss": 1.4435, + "step": 5631 + }, + { + "epoch": 0.9, + "learning_rate": 4.712864873602702e-05, + "loss": 1.5203, + "step": 5632 + }, + { + "epoch": 0.9, + "learning_rate": 4.7127448157129284e-05, + "loss": 1.4834, + "step": 5633 + }, + { + "epoch": 0.9, + "learning_rate": 4.7126247342587664e-05, + "loss": 1.4489, + "step": 5634 + }, + { + "epoch": 0.9, + "learning_rate": 4.712504629241494e-05, + "loss": 1.4688, + "step": 5635 + }, + { + "epoch": 0.9, + "learning_rate": 4.7123845006623914e-05, + "loss": 1.4426, + "step": 5636 + }, + { + "epoch": 0.9, + "learning_rate": 4.7122643485227357e-05, + "loss": 1.4331, + "step": 5637 + }, + { + "epoch": 0.9, + "learning_rate": 4.712144172823808e-05, + "loss": 1.4728, + "step": 5638 + }, + { + "epoch": 0.9, + "learning_rate": 4.7120239735668894e-05, + "loss": 1.3875, + "step": 5639 + }, + { + "epoch": 0.9, + "learning_rate": 4.711903750753257e-05, + "loss": 1.4829, + "step": 5640 + }, + { + "epoch": 0.9, + "learning_rate": 4.7117835043841924e-05, + "loss": 1.4919, + "step": 5641 + }, + { + "epoch": 0.9, + "learning_rate": 4.711663234460977e-05, + "loss": 1.5717, + "step": 5642 + }, + { + "epoch": 0.9, + "learning_rate": 4.71154294098489e-05, + "loss": 1.4492, + "step": 5643 + }, + { + "epoch": 0.9, + "learning_rate": 4.7114226239572144e-05, + "loss": 1.4893, + "step": 5644 + }, + { + "epoch": 0.9, + "learning_rate": 4.71130228337923e-05, + "loss": 1.4243, + "step": 5645 + }, + { + "epoch": 0.9, + "learning_rate": 4.711181919252219e-05, + "loss": 1.4481, + "step": 5646 + }, + { + "epoch": 0.9, + "learning_rate": 4.7110615315774625e-05, + "loss": 1.402, + "step": 5647 + }, + { + "epoch": 0.9, + "learning_rate": 4.710941120356243e-05, + "loss": 1.4943, + "step": 5648 + }, + { + "epoch": 0.9, + "learning_rate": 4.7108206855898435e-05, + "loss": 1.387, + "step": 5649 + }, + { + "epoch": 0.9, + "learning_rate": 4.710700227279545e-05, + "loss": 1.4006, + "step": 5650 + }, + { + "epoch": 0.9, + "learning_rate": 4.710579745426632e-05, + "loss": 1.446, + "step": 5651 + }, + { + "epoch": 0.9, + "learning_rate": 4.7104592400323866e-05, + "loss": 1.4779, + "step": 5652 + }, + { + "epoch": 0.9, + "learning_rate": 4.7103387110980925e-05, + "loss": 1.563, + "step": 5653 + }, + { + "epoch": 0.9, + "learning_rate": 4.710218158625033e-05, + "loss": 1.4289, + "step": 5654 + }, + { + "epoch": 0.9, + "learning_rate": 4.7100975826144925e-05, + "loss": 1.4408, + "step": 5655 + }, + { + "epoch": 0.9, + "learning_rate": 4.709976983067753e-05, + "loss": 1.4482, + "step": 5656 + }, + { + "epoch": 0.9, + "learning_rate": 4.709856359986101e-05, + "loss": 1.4491, + "step": 5657 + }, + { + "epoch": 0.9, + "learning_rate": 4.7097357133708206e-05, + "loss": 1.4277, + "step": 5658 + }, + { + "epoch": 0.9, + "learning_rate": 4.709615043223197e-05, + "loss": 1.4499, + "step": 5659 + }, + { + "epoch": 0.9, + "learning_rate": 4.709494349544514e-05, + "loss": 1.5632, + "step": 5660 + }, + { + "epoch": 0.9, + "learning_rate": 4.709373632336058e-05, + "loss": 1.4448, + "step": 5661 + }, + { + "epoch": 0.9, + "learning_rate": 4.709252891599113e-05, + "loss": 1.3996, + "step": 5662 + }, + { + "epoch": 0.9, + "learning_rate": 4.7091321273349673e-05, + "loss": 1.4315, + "step": 5663 + }, + { + "epoch": 0.9, + "learning_rate": 4.7090113395449055e-05, + "loss": 1.4793, + "step": 5664 + }, + { + "epoch": 0.9, + "learning_rate": 4.708890528230214e-05, + "loss": 1.48, + "step": 5665 + }, + { + "epoch": 0.9, + "learning_rate": 4.7087696933921786e-05, + "loss": 1.4528, + "step": 5666 + }, + { + "epoch": 0.9, + "learning_rate": 4.708648835032088e-05, + "loss": 1.4019, + "step": 5667 + }, + { + "epoch": 0.9, + "learning_rate": 4.708527953151227e-05, + "loss": 1.417, + "step": 5668 + }, + { + "epoch": 0.9, + "learning_rate": 4.708407047750885e-05, + "loss": 1.4468, + "step": 5669 + }, + { + "epoch": 0.9, + "learning_rate": 4.708286118832349e-05, + "loss": 1.396, + "step": 5670 + }, + { + "epoch": 0.9, + "learning_rate": 4.708165166396906e-05, + "loss": 1.4504, + "step": 5671 + }, + { + "epoch": 0.9, + "learning_rate": 4.708044190445844e-05, + "loss": 1.4564, + "step": 5672 + }, + { + "epoch": 0.9, + "learning_rate": 4.707923190980453e-05, + "loss": 1.4139, + "step": 5673 + }, + { + "epoch": 0.9, + "learning_rate": 4.70780216800202e-05, + "loss": 1.4792, + "step": 5674 + }, + { + "epoch": 0.9, + "learning_rate": 4.707681121511834e-05, + "loss": 1.4909, + "step": 5675 + }, + { + "epoch": 0.9, + "learning_rate": 4.707560051511185e-05, + "loss": 1.4186, + "step": 5676 + }, + { + "epoch": 0.9, + "learning_rate": 4.707438958001361e-05, + "loss": 1.4326, + "step": 5677 + }, + { + "epoch": 0.9, + "learning_rate": 4.707317840983653e-05, + "loss": 1.4662, + "step": 5678 + }, + { + "epoch": 0.9, + "learning_rate": 4.7071967004593496e-05, + "loss": 1.403, + "step": 5679 + }, + { + "epoch": 0.9, + "learning_rate": 4.707075536429741e-05, + "loss": 1.4051, + "step": 5680 + }, + { + "epoch": 0.91, + "learning_rate": 4.7069543488961185e-05, + "loss": 1.3688, + "step": 5681 + }, + { + "epoch": 0.91, + "learning_rate": 4.706833137859773e-05, + "loss": 1.4419, + "step": 5682 + }, + { + "epoch": 0.91, + "learning_rate": 4.706711903321993e-05, + "loss": 1.3457, + "step": 5683 + }, + { + "epoch": 0.91, + "learning_rate": 4.7065906452840716e-05, + "loss": 1.3964, + "step": 5684 + }, + { + "epoch": 0.91, + "learning_rate": 4.706469363747299e-05, + "loss": 1.389, + "step": 5685 + }, + { + "epoch": 0.91, + "learning_rate": 4.706348058712967e-05, + "loss": 1.4262, + "step": 5686 + }, + { + "epoch": 0.91, + "learning_rate": 4.706226730182368e-05, + "loss": 1.4253, + "step": 5687 + }, + { + "epoch": 0.91, + "learning_rate": 4.7061053781567946e-05, + "loss": 1.4212, + "step": 5688 + }, + { + "epoch": 0.91, + "learning_rate": 4.7059840026375375e-05, + "loss": 1.4758, + "step": 5689 + }, + { + "epoch": 0.91, + "learning_rate": 4.70586260362589e-05, + "loss": 1.3609, + "step": 5690 + }, + { + "epoch": 0.91, + "learning_rate": 4.705741181123145e-05, + "loss": 1.4823, + "step": 5691 + }, + { + "epoch": 0.91, + "learning_rate": 4.705619735130595e-05, + "loss": 1.4461, + "step": 5692 + }, + { + "epoch": 0.91, + "learning_rate": 4.705498265649535e-05, + "loss": 1.4302, + "step": 5693 + }, + { + "epoch": 0.91, + "learning_rate": 4.705376772681257e-05, + "loss": 1.4225, + "step": 5694 + }, + { + "epoch": 0.91, + "learning_rate": 4.705255256227056e-05, + "loss": 1.3999, + "step": 5695 + }, + { + "epoch": 0.91, + "learning_rate": 4.7051337162882236e-05, + "loss": 1.3289, + "step": 5696 + }, + { + "epoch": 0.91, + "learning_rate": 4.7050121528660574e-05, + "loss": 1.4202, + "step": 5697 + }, + { + "epoch": 0.91, + "learning_rate": 4.70489056596185e-05, + "loss": 1.4137, + "step": 5698 + }, + { + "epoch": 0.91, + "learning_rate": 4.704768955576897e-05, + "loss": 1.3739, + "step": 5699 + }, + { + "epoch": 0.91, + "learning_rate": 4.7046473217124926e-05, + "loss": 1.4006, + "step": 5700 + }, + { + "epoch": 0.91, + "learning_rate": 4.704525664369933e-05, + "loss": 1.3293, + "step": 5701 + }, + { + "epoch": 0.91, + "learning_rate": 4.704403983550513e-05, + "loss": 1.4373, + "step": 5702 + }, + { + "epoch": 0.91, + "learning_rate": 4.7042822792555295e-05, + "loss": 1.4002, + "step": 5703 + }, + { + "epoch": 0.91, + "learning_rate": 4.704160551486278e-05, + "loss": 1.4396, + "step": 5704 + }, + { + "epoch": 0.91, + "learning_rate": 4.704038800244055e-05, + "loss": 1.3298, + "step": 5705 + }, + { + "epoch": 0.91, + "learning_rate": 4.7039170255301564e-05, + "loss": 1.5239, + "step": 5706 + }, + { + "epoch": 0.91, + "learning_rate": 4.7037952273458795e-05, + "loss": 1.3968, + "step": 5707 + }, + { + "epoch": 0.91, + "learning_rate": 4.703673405692521e-05, + "loss": 1.3229, + "step": 5708 + }, + { + "epoch": 0.91, + "learning_rate": 4.70355156057138e-05, + "loss": 1.3734, + "step": 5709 + }, + { + "epoch": 0.91, + "learning_rate": 4.7034296919837516e-05, + "loss": 1.4261, + "step": 5710 + }, + { + "epoch": 0.91, + "learning_rate": 4.703307799930935e-05, + "loss": 1.3875, + "step": 5711 + }, + { + "epoch": 0.91, + "learning_rate": 4.703185884414227e-05, + "loss": 1.3465, + "step": 5712 + }, + { + "epoch": 0.91, + "learning_rate": 4.7030639454349276e-05, + "loss": 1.3658, + "step": 5713 + }, + { + "epoch": 0.91, + "learning_rate": 4.702941982994335e-05, + "loss": 1.3937, + "step": 5714 + }, + { + "epoch": 0.91, + "learning_rate": 4.702819997093748e-05, + "loss": 1.3682, + "step": 5715 + }, + { + "epoch": 0.91, + "learning_rate": 4.7026979877344656e-05, + "loss": 1.3714, + "step": 5716 + }, + { + "epoch": 0.91, + "learning_rate": 4.7025759549177864e-05, + "loss": 1.3392, + "step": 5717 + }, + { + "epoch": 0.91, + "learning_rate": 4.70245389864501e-05, + "loss": 1.4125, + "step": 5718 + }, + { + "epoch": 0.91, + "learning_rate": 4.702331818917437e-05, + "loss": 1.3488, + "step": 5719 + }, + { + "epoch": 0.91, + "learning_rate": 4.7022097157363675e-05, + "loss": 1.4461, + "step": 5720 + }, + { + "epoch": 0.91, + "learning_rate": 4.702087589103101e-05, + "loss": 1.5228, + "step": 5721 + }, + { + "epoch": 0.91, + "learning_rate": 4.7019654390189394e-05, + "loss": 1.3936, + "step": 5722 + }, + { + "epoch": 0.91, + "learning_rate": 4.701843265485182e-05, + "loss": 1.436, + "step": 5723 + }, + { + "epoch": 0.91, + "learning_rate": 4.701721068503132e-05, + "loss": 1.4202, + "step": 5724 + }, + { + "epoch": 0.91, + "learning_rate": 4.701598848074088e-05, + "loss": 1.464, + "step": 5725 + }, + { + "epoch": 0.91, + "learning_rate": 4.701476604199353e-05, + "loss": 1.3809, + "step": 5726 + }, + { + "epoch": 0.91, + "learning_rate": 4.701354336880228e-05, + "loss": 1.4302, + "step": 5727 + }, + { + "epoch": 0.91, + "learning_rate": 4.701232046118017e-05, + "loss": 1.3916, + "step": 5728 + }, + { + "epoch": 0.91, + "learning_rate": 4.701109731914021e-05, + "loss": 1.4079, + "step": 5729 + }, + { + "epoch": 0.91, + "learning_rate": 4.700987394269542e-05, + "loss": 1.3545, + "step": 5730 + }, + { + "epoch": 0.91, + "learning_rate": 4.700865033185884e-05, + "loss": 1.3969, + "step": 5731 + }, + { + "epoch": 0.91, + "learning_rate": 4.70074264866435e-05, + "loss": 1.4346, + "step": 5732 + }, + { + "epoch": 0.91, + "learning_rate": 4.7006202407062414e-05, + "loss": 1.3149, + "step": 5733 + }, + { + "epoch": 0.91, + "learning_rate": 4.700497809312865e-05, + "loss": 1.3984, + "step": 5734 + }, + { + "epoch": 0.91, + "learning_rate": 4.700375354485522e-05, + "loss": 1.3551, + "step": 5735 + }, + { + "epoch": 0.91, + "learning_rate": 4.7002528762255174e-05, + "loss": 1.3877, + "step": 5736 + }, + { + "epoch": 0.91, + "learning_rate": 4.7001303745341555e-05, + "loss": 1.4051, + "step": 5737 + }, + { + "epoch": 0.91, + "learning_rate": 4.700007849412741e-05, + "loss": 1.3219, + "step": 5738 + }, + { + "epoch": 0.91, + "learning_rate": 4.699885300862579e-05, + "loss": 1.4549, + "step": 5739 + }, + { + "epoch": 0.91, + "learning_rate": 4.6997627288849734e-05, + "loss": 1.4333, + "step": 5740 + }, + { + "epoch": 0.91, + "learning_rate": 4.69964013348123e-05, + "loss": 1.3828, + "step": 5741 + }, + { + "epoch": 0.91, + "learning_rate": 4.699517514652655e-05, + "loss": 1.3921, + "step": 5742 + }, + { + "epoch": 0.91, + "learning_rate": 4.699394872400554e-05, + "loss": 1.4209, + "step": 5743 + }, + { + "epoch": 0.92, + "learning_rate": 4.699272206726233e-05, + "loss": 1.4027, + "step": 5744 + }, + { + "epoch": 0.92, + "learning_rate": 4.699149517630998e-05, + "loss": 1.3687, + "step": 5745 + }, + { + "epoch": 0.92, + "learning_rate": 4.699026805116156e-05, + "loss": 1.4009, + "step": 5746 + }, + { + "epoch": 0.92, + "learning_rate": 4.6989040691830134e-05, + "loss": 1.4356, + "step": 5747 + }, + { + "epoch": 0.92, + "learning_rate": 4.698781309832877e-05, + "loss": 1.3651, + "step": 5748 + }, + { + "epoch": 0.92, + "learning_rate": 4.6986585270670544e-05, + "loss": 1.2827, + "step": 5749 + }, + { + "epoch": 0.92, + "learning_rate": 4.698535720886854e-05, + "loss": 1.3696, + "step": 5750 + }, + { + "epoch": 0.92, + "learning_rate": 4.698412891293583e-05, + "loss": 1.3714, + "step": 5751 + }, + { + "epoch": 0.92, + "learning_rate": 4.6982900382885496e-05, + "loss": 1.3376, + "step": 5752 + }, + { + "epoch": 0.92, + "learning_rate": 4.698167161873062e-05, + "loss": 1.4528, + "step": 5753 + }, + { + "epoch": 0.92, + "learning_rate": 4.698044262048428e-05, + "loss": 1.3789, + "step": 5754 + }, + { + "epoch": 0.92, + "learning_rate": 4.697921338815957e-05, + "loss": 1.3148, + "step": 5755 + }, + { + "epoch": 0.92, + "learning_rate": 4.697798392176958e-05, + "loss": 1.3425, + "step": 5756 + }, + { + "epoch": 0.92, + "learning_rate": 4.6976754221327413e-05, + "loss": 1.3711, + "step": 5757 + }, + { + "epoch": 0.92, + "learning_rate": 4.697552428684615e-05, + "loss": 1.3812, + "step": 5758 + }, + { + "epoch": 0.92, + "learning_rate": 4.697429411833889e-05, + "loss": 1.3175, + "step": 5759 + }, + { + "epoch": 0.92, + "learning_rate": 4.6973063715818746e-05, + "loss": 1.3884, + "step": 5760 + }, + { + "epoch": 0.92, + "learning_rate": 4.697183307929882e-05, + "loss": 1.3886, + "step": 5761 + }, + { + "epoch": 0.92, + "learning_rate": 4.6970602208792205e-05, + "loss": 1.3191, + "step": 5762 + }, + { + "epoch": 0.92, + "learning_rate": 4.696937110431201e-05, + "loss": 1.3317, + "step": 5763 + }, + { + "epoch": 0.92, + "learning_rate": 4.696813976587136e-05, + "loss": 1.3682, + "step": 5764 + }, + { + "epoch": 0.92, + "learning_rate": 4.6966908193483346e-05, + "loss": 1.3874, + "step": 5765 + }, + { + "epoch": 0.92, + "learning_rate": 4.6965676387161106e-05, + "loss": 1.4191, + "step": 5766 + }, + { + "epoch": 0.92, + "learning_rate": 4.696444434691774e-05, + "loss": 1.3595, + "step": 5767 + }, + { + "epoch": 0.92, + "learning_rate": 4.696321207276638e-05, + "loss": 1.3262, + "step": 5768 + }, + { + "epoch": 0.92, + "learning_rate": 4.696197956472015e-05, + "loss": 1.3815, + "step": 5769 + }, + { + "epoch": 0.92, + "learning_rate": 4.696074682279217e-05, + "loss": 1.3389, + "step": 5770 + }, + { + "epoch": 0.92, + "learning_rate": 4.695951384699557e-05, + "loss": 1.339, + "step": 5771 + }, + { + "epoch": 0.92, + "learning_rate": 4.6958280637343476e-05, + "loss": 1.4476, + "step": 5772 + }, + { + "epoch": 0.92, + "learning_rate": 4.6957047193849026e-05, + "loss": 1.3841, + "step": 5773 + }, + { + "epoch": 0.92, + "learning_rate": 4.695581351652535e-05, + "loss": 1.297, + "step": 5774 + }, + { + "epoch": 0.92, + "learning_rate": 4.6954579605385596e-05, + "loss": 1.4061, + "step": 5775 + }, + { + "epoch": 0.92, + "learning_rate": 4.6953345460442895e-05, + "loss": 1.3503, + "step": 5776 + }, + { + "epoch": 0.92, + "learning_rate": 4.695211108171039e-05, + "loss": 1.3384, + "step": 5777 + }, + { + "epoch": 0.92, + "learning_rate": 4.6950876469201235e-05, + "loss": 1.3289, + "step": 5778 + }, + { + "epoch": 0.92, + "learning_rate": 4.694964162292857e-05, + "loss": 1.348, + "step": 5779 + }, + { + "epoch": 0.92, + "learning_rate": 4.6948406542905546e-05, + "loss": 1.3187, + "step": 5780 + }, + { + "epoch": 0.92, + "learning_rate": 4.694717122914532e-05, + "loss": 1.3114, + "step": 5781 + }, + { + "epoch": 0.92, + "learning_rate": 4.694593568166104e-05, + "loss": 1.3105, + "step": 5782 + }, + { + "epoch": 0.92, + "learning_rate": 4.6944699900465875e-05, + "loss": 1.4136, + "step": 5783 + }, + { + "epoch": 0.92, + "learning_rate": 4.694346388557298e-05, + "loss": 1.3071, + "step": 5784 + }, + { + "epoch": 0.92, + "learning_rate": 4.6942227636995515e-05, + "loss": 1.3792, + "step": 5785 + }, + { + "epoch": 0.92, + "learning_rate": 4.694099115474665e-05, + "loss": 1.305, + "step": 5786 + }, + { + "epoch": 0.92, + "learning_rate": 4.693975443883954e-05, + "loss": 1.3211, + "step": 5787 + }, + { + "epoch": 0.92, + "learning_rate": 4.6938517489287374e-05, + "loss": 1.3862, + "step": 5788 + }, + { + "epoch": 0.92, + "learning_rate": 4.693728030610331e-05, + "loss": 1.3512, + "step": 5789 + }, + { + "epoch": 0.92, + "learning_rate": 4.6936042889300536e-05, + "loss": 1.3464, + "step": 5790 + }, + { + "epoch": 0.92, + "learning_rate": 4.6934805238892216e-05, + "loss": 1.3628, + "step": 5791 + }, + { + "epoch": 0.92, + "learning_rate": 4.693356735489154e-05, + "loss": 1.3361, + "step": 5792 + }, + { + "epoch": 0.92, + "learning_rate": 4.6932329237311687e-05, + "loss": 1.3703, + "step": 5793 + }, + { + "epoch": 0.92, + "learning_rate": 4.6931090886165844e-05, + "loss": 1.4186, + "step": 5794 + }, + { + "epoch": 0.92, + "learning_rate": 4.69298523014672e-05, + "loss": 1.4116, + "step": 5795 + }, + { + "epoch": 0.92, + "learning_rate": 4.692861348322894e-05, + "loss": 1.4067, + "step": 5796 + }, + { + "epoch": 0.92, + "learning_rate": 4.6927374431464254e-05, + "loss": 1.3192, + "step": 5797 + }, + { + "epoch": 0.92, + "learning_rate": 4.6926135146186354e-05, + "loss": 1.3654, + "step": 5798 + }, + { + "epoch": 0.92, + "learning_rate": 4.692489562740842e-05, + "loss": 1.3257, + "step": 5799 + }, + { + "epoch": 0.92, + "learning_rate": 4.692365587514366e-05, + "loss": 1.4163, + "step": 5800 + }, + { + "epoch": 0.92, + "learning_rate": 4.692241588940527e-05, + "loss": 1.3641, + "step": 5801 + }, + { + "epoch": 0.92, + "learning_rate": 4.692117567020648e-05, + "loss": 1.3717, + "step": 5802 + }, + { + "epoch": 0.92, + "learning_rate": 4.6919935217560453e-05, + "loss": 1.2826, + "step": 5803 + }, + { + "epoch": 0.92, + "learning_rate": 4.691869453148044e-05, + "loss": 1.3765, + "step": 5804 + }, + { + "epoch": 0.92, + "learning_rate": 4.691745361197963e-05, + "loss": 1.3394, + "step": 5805 + }, + { + "epoch": 0.92, + "learning_rate": 4.691621245907124e-05, + "loss": 1.267, + "step": 5806 + }, + { + "epoch": 0.93, + "learning_rate": 4.69149710727685e-05, + "loss": 1.3953, + "step": 5807 + }, + { + "epoch": 0.93, + "learning_rate": 4.691372945308463e-05, + "loss": 1.2802, + "step": 5808 + }, + { + "epoch": 0.93, + "learning_rate": 4.6912487600032834e-05, + "loss": 1.3962, + "step": 5809 + }, + { + "epoch": 0.93, + "learning_rate": 4.6911245513626355e-05, + "loss": 1.3591, + "step": 5810 + }, + { + "epoch": 0.93, + "learning_rate": 4.6910003193878406e-05, + "loss": 1.3571, + "step": 5811 + }, + { + "epoch": 0.93, + "learning_rate": 4.690876064080223e-05, + "loss": 1.325, + "step": 5812 + }, + { + "epoch": 0.93, + "learning_rate": 4.6907517854411056e-05, + "loss": 1.369, + "step": 5813 + }, + { + "epoch": 0.93, + "learning_rate": 4.6906274834718116e-05, + "loss": 1.3228, + "step": 5814 + }, + { + "epoch": 0.93, + "learning_rate": 4.690503158173665e-05, + "loss": 1.4548, + "step": 5815 + }, + { + "epoch": 0.93, + "learning_rate": 4.6903788095479895e-05, + "loss": 1.3296, + "step": 5816 + }, + { + "epoch": 0.93, + "learning_rate": 4.69025443759611e-05, + "loss": 1.3265, + "step": 5817 + }, + { + "epoch": 0.93, + "learning_rate": 4.6901300423193494e-05, + "loss": 1.3398, + "step": 5818 + }, + { + "epoch": 0.93, + "learning_rate": 4.6900056237190335e-05, + "loss": 1.3162, + "step": 5819 + }, + { + "epoch": 0.93, + "learning_rate": 4.689881181796488e-05, + "loss": 1.4002, + "step": 5820 + }, + { + "epoch": 0.93, + "learning_rate": 4.6897567165530376e-05, + "loss": 1.3894, + "step": 5821 + }, + { + "epoch": 0.93, + "learning_rate": 4.689632227990007e-05, + "loss": 1.2912, + "step": 5822 + }, + { + "epoch": 0.93, + "learning_rate": 4.689507716108723e-05, + "loss": 1.2762, + "step": 5823 + }, + { + "epoch": 0.93, + "learning_rate": 4.6893831809105115e-05, + "loss": 1.3597, + "step": 5824 + }, + { + "epoch": 0.93, + "learning_rate": 4.689258622396697e-05, + "loss": 1.2793, + "step": 5825 + }, + { + "epoch": 0.93, + "learning_rate": 4.689134040568608e-05, + "loss": 1.2917, + "step": 5826 + }, + { + "epoch": 0.93, + "learning_rate": 4.689009435427571e-05, + "loss": 1.3216, + "step": 5827 + }, + { + "epoch": 0.93, + "learning_rate": 4.688884806974913e-05, + "loss": 1.3763, + "step": 5828 + }, + { + "epoch": 0.93, + "learning_rate": 4.6887601552119586e-05, + "loss": 1.3063, + "step": 5829 + }, + { + "epoch": 0.93, + "learning_rate": 4.688635480140039e-05, + "loss": 1.2384, + "step": 5830 + }, + { + "epoch": 0.93, + "learning_rate": 4.68851078176048e-05, + "loss": 1.2993, + "step": 5831 + }, + { + "epoch": 0.93, + "learning_rate": 4.6883860600746094e-05, + "loss": 1.2866, + "step": 5832 + }, + { + "epoch": 0.93, + "learning_rate": 4.688261315083756e-05, + "loss": 1.3066, + "step": 5833 + }, + { + "epoch": 0.93, + "learning_rate": 4.688136546789249e-05, + "loss": 1.3154, + "step": 5834 + }, + { + "epoch": 0.93, + "learning_rate": 4.688011755192415e-05, + "loss": 1.3167, + "step": 5835 + }, + { + "epoch": 0.93, + "learning_rate": 4.687886940294585e-05, + "loss": 1.3508, + "step": 5836 + }, + { + "epoch": 0.93, + "learning_rate": 4.6877621020970866e-05, + "loss": 1.3267, + "step": 5837 + }, + { + "epoch": 0.93, + "learning_rate": 4.6876372406012505e-05, + "loss": 1.3272, + "step": 5838 + }, + { + "epoch": 0.93, + "learning_rate": 4.687512355808406e-05, + "loss": 1.2977, + "step": 5839 + }, + { + "epoch": 0.93, + "learning_rate": 4.6873874477198834e-05, + "loss": 1.2181, + "step": 5840 + }, + { + "epoch": 0.93, + "learning_rate": 4.687262516337012e-05, + "loss": 1.2914, + "step": 5841 + }, + { + "epoch": 0.93, + "learning_rate": 4.6871375616611225e-05, + "loss": 1.2634, + "step": 5842 + }, + { + "epoch": 0.93, + "learning_rate": 4.687012583693546e-05, + "loss": 1.2759, + "step": 5843 + }, + { + "epoch": 0.93, + "learning_rate": 4.6868875824356126e-05, + "loss": 1.3285, + "step": 5844 + }, + { + "epoch": 0.93, + "learning_rate": 4.6867625578886555e-05, + "loss": 1.3641, + "step": 5845 + }, + { + "epoch": 0.93, + "learning_rate": 4.686637510054004e-05, + "loss": 1.3021, + "step": 5846 + }, + { + "epoch": 0.93, + "learning_rate": 4.6865124389329904e-05, + "loss": 1.2529, + "step": 5847 + }, + { + "epoch": 0.93, + "learning_rate": 4.6863873445269476e-05, + "loss": 1.2969, + "step": 5848 + }, + { + "epoch": 0.93, + "learning_rate": 4.6862622268372057e-05, + "loss": 1.403, + "step": 5849 + }, + { + "epoch": 0.93, + "learning_rate": 4.6861370858650996e-05, + "loss": 1.3763, + "step": 5850 + }, + { + "epoch": 0.93, + "learning_rate": 4.6860119216119603e-05, + "loss": 1.2716, + "step": 5851 + }, + { + "epoch": 0.93, + "learning_rate": 4.6858867340791214e-05, + "loss": 1.3455, + "step": 5852 + }, + { + "epoch": 0.93, + "learning_rate": 4.6857615232679156e-05, + "loss": 1.3329, + "step": 5853 + }, + { + "epoch": 0.93, + "learning_rate": 4.6856362891796765e-05, + "loss": 1.418, + "step": 5854 + }, + { + "epoch": 0.93, + "learning_rate": 4.685511031815738e-05, + "loss": 1.2892, + "step": 5855 + }, + { + "epoch": 0.93, + "learning_rate": 4.6853857511774337e-05, + "loss": 1.3418, + "step": 5856 + }, + { + "epoch": 0.93, + "learning_rate": 4.685260447266098e-05, + "loss": 1.3698, + "step": 5857 + }, + { + "epoch": 0.93, + "learning_rate": 4.6851351200830654e-05, + "loss": 1.3407, + "step": 5858 + }, + { + "epoch": 0.93, + "learning_rate": 4.6850097696296704e-05, + "loss": 1.3052, + "step": 5859 + }, + { + "epoch": 0.93, + "learning_rate": 4.684884395907248e-05, + "loss": 1.3591, + "step": 5860 + }, + { + "epoch": 0.93, + "learning_rate": 4.684758998917133e-05, + "loss": 1.3682, + "step": 5861 + }, + { + "epoch": 0.93, + "learning_rate": 4.684633578660661e-05, + "loss": 1.4043, + "step": 5862 + }, + { + "epoch": 0.93, + "learning_rate": 4.684508135139168e-05, + "loss": 1.3927, + "step": 5863 + }, + { + "epoch": 0.93, + "learning_rate": 4.684382668353989e-05, + "loss": 1.3289, + "step": 5864 + }, + { + "epoch": 0.93, + "learning_rate": 4.684257178306462e-05, + "loss": 1.2951, + "step": 5865 + }, + { + "epoch": 0.93, + "learning_rate": 4.684131664997921e-05, + "loss": 1.383, + "step": 5866 + }, + { + "epoch": 0.93, + "learning_rate": 4.6840061284297046e-05, + "loss": 1.3116, + "step": 5867 + }, + { + "epoch": 0.93, + "learning_rate": 4.6838805686031476e-05, + "loss": 1.2904, + "step": 5868 + }, + { + "epoch": 0.94, + "learning_rate": 4.683754985519589e-05, + "loss": 1.2882, + "step": 5869 + }, + { + "epoch": 0.94, + "learning_rate": 4.6836293791803667e-05, + "loss": 1.3065, + "step": 5870 + }, + { + "epoch": 0.94, + "learning_rate": 4.683503749586816e-05, + "loss": 1.2881, + "step": 5871 + }, + { + "epoch": 0.94, + "learning_rate": 4.6833780967402764e-05, + "loss": 1.3488, + "step": 5872 + }, + { + "epoch": 0.94, + "learning_rate": 4.683252420642086e-05, + "loss": 1.2354, + "step": 5873 + }, + { + "epoch": 0.94, + "learning_rate": 4.683126721293582e-05, + "loss": 1.2932, + "step": 5874 + }, + { + "epoch": 0.94, + "learning_rate": 4.6830009986961044e-05, + "loss": 1.3247, + "step": 5875 + }, + { + "epoch": 0.94, + "learning_rate": 4.682875252850991e-05, + "loss": 1.3203, + "step": 5876 + }, + { + "epoch": 0.94, + "learning_rate": 4.682749483759582e-05, + "loss": 1.3178, + "step": 5877 + }, + { + "epoch": 0.94, + "learning_rate": 4.682623691423216e-05, + "loss": 1.4146, + "step": 5878 + }, + { + "epoch": 0.94, + "learning_rate": 4.6824978758432325e-05, + "loss": 1.2929, + "step": 5879 + }, + { + "epoch": 0.94, + "learning_rate": 4.6823720370209714e-05, + "loss": 1.2211, + "step": 5880 + }, + { + "epoch": 0.94, + "learning_rate": 4.682246174957774e-05, + "loss": 1.2743, + "step": 5881 + }, + { + "epoch": 0.94, + "learning_rate": 4.6821202896549786e-05, + "loss": 1.3677, + "step": 5882 + }, + { + "epoch": 0.94, + "learning_rate": 4.6819943811139275e-05, + "loss": 1.2726, + "step": 5883 + }, + { + "epoch": 0.94, + "learning_rate": 4.681868449335961e-05, + "loss": 1.356, + "step": 5884 + }, + { + "epoch": 0.94, + "learning_rate": 4.681742494322421e-05, + "loss": 1.2702, + "step": 5885 + }, + { + "epoch": 0.94, + "learning_rate": 4.681616516074647e-05, + "loss": 1.3024, + "step": 5886 + }, + { + "epoch": 0.94, + "learning_rate": 4.681490514593981e-05, + "loss": 1.4111, + "step": 5887 + }, + { + "epoch": 0.94, + "learning_rate": 4.6813644898817664e-05, + "loss": 1.2993, + "step": 5888 + }, + { + "epoch": 0.94, + "learning_rate": 4.681238441939344e-05, + "loss": 1.3566, + "step": 5889 + }, + { + "epoch": 0.94, + "learning_rate": 4.681112370768057e-05, + "loss": 1.3436, + "step": 5890 + }, + { + "epoch": 0.94, + "learning_rate": 4.680986276369247e-05, + "loss": 1.2598, + "step": 5891 + }, + { + "epoch": 0.94, + "learning_rate": 4.680860158744257e-05, + "loss": 1.341, + "step": 5892 + }, + { + "epoch": 0.94, + "learning_rate": 4.680734017894431e-05, + "loss": 1.3096, + "step": 5893 + }, + { + "epoch": 0.94, + "learning_rate": 4.680607853821111e-05, + "loss": 1.3298, + "step": 5894 + }, + { + "epoch": 0.94, + "learning_rate": 4.680481666525641e-05, + "loss": 1.355, + "step": 5895 + }, + { + "epoch": 0.94, + "learning_rate": 4.680355456009366e-05, + "loss": 1.3301, + "step": 5896 + }, + { + "epoch": 0.94, + "learning_rate": 4.680229222273629e-05, + "loss": 1.2705, + "step": 5897 + }, + { + "epoch": 0.94, + "learning_rate": 4.680102965319774e-05, + "loss": 1.3089, + "step": 5898 + }, + { + "epoch": 0.94, + "learning_rate": 4.679976685149146e-05, + "loss": 1.3283, + "step": 5899 + }, + { + "epoch": 0.94, + "learning_rate": 4.67985038176309e-05, + "loss": 1.2666, + "step": 5900 + }, + { + "epoch": 0.94, + "learning_rate": 4.6797240551629505e-05, + "loss": 1.3503, + "step": 5901 + }, + { + "epoch": 0.94, + "learning_rate": 4.6795977053500736e-05, + "loss": 1.3197, + "step": 5902 + }, + { + "epoch": 0.94, + "learning_rate": 4.6794713323258046e-05, + "loss": 1.2736, + "step": 5903 + }, + { + "epoch": 0.94, + "learning_rate": 4.679344936091488e-05, + "loss": 1.3835, + "step": 5904 + }, + { + "epoch": 0.94, + "learning_rate": 4.679218516648471e-05, + "loss": 1.2787, + "step": 5905 + }, + { + "epoch": 0.94, + "learning_rate": 4.6790920739981014e-05, + "loss": 1.3447, + "step": 5906 + }, + { + "epoch": 0.94, + "learning_rate": 4.678965608141723e-05, + "loss": 1.2588, + "step": 5907 + }, + { + "epoch": 0.94, + "learning_rate": 4.678839119080684e-05, + "loss": 1.1911, + "step": 5908 + }, + { + "epoch": 0.94, + "learning_rate": 4.678712606816331e-05, + "loss": 1.2661, + "step": 5909 + }, + { + "epoch": 0.94, + "learning_rate": 4.678586071350012e-05, + "loss": 1.2059, + "step": 5910 + }, + { + "epoch": 0.94, + "learning_rate": 4.6784595126830735e-05, + "loss": 1.265, + "step": 5911 + }, + { + "epoch": 0.94, + "learning_rate": 4.678332930816865e-05, + "loss": 1.2767, + "step": 5912 + }, + { + "epoch": 0.94, + "learning_rate": 4.6782063257527316e-05, + "loss": 1.3171, + "step": 5913 + }, + { + "epoch": 0.94, + "learning_rate": 4.678079697492024e-05, + "loss": 1.2494, + "step": 5914 + }, + { + "epoch": 0.94, + "learning_rate": 4.677953046036089e-05, + "loss": 1.3089, + "step": 5915 + }, + { + "epoch": 0.94, + "learning_rate": 4.677826371386278e-05, + "loss": 1.2713, + "step": 5916 + }, + { + "epoch": 0.94, + "learning_rate": 4.677699673543937e-05, + "loss": 1.2297, + "step": 5917 + }, + { + "epoch": 0.94, + "learning_rate": 4.677572952510417e-05, + "loss": 1.3338, + "step": 5918 + }, + { + "epoch": 0.94, + "learning_rate": 4.6774462082870675e-05, + "loss": 1.3324, + "step": 5919 + }, + { + "epoch": 0.94, + "learning_rate": 4.6773194408752376e-05, + "loss": 1.3969, + "step": 5920 + }, + { + "epoch": 0.94, + "learning_rate": 4.677192650276277e-05, + "loss": 1.2731, + "step": 5921 + }, + { + "epoch": 0.94, + "learning_rate": 4.677065836491537e-05, + "loss": 1.2788, + "step": 5922 + }, + { + "epoch": 0.94, + "learning_rate": 4.6769389995223675e-05, + "loss": 1.2307, + "step": 5923 + }, + { + "epoch": 0.94, + "learning_rate": 4.676812139370119e-05, + "loss": 1.363, + "step": 5924 + }, + { + "epoch": 0.94, + "learning_rate": 4.676685256036143e-05, + "loss": 1.3372, + "step": 5925 + }, + { + "epoch": 0.94, + "learning_rate": 4.676558349521791e-05, + "loss": 1.2842, + "step": 5926 + }, + { + "epoch": 0.94, + "learning_rate": 4.676431419828414e-05, + "loss": 1.1863, + "step": 5927 + }, + { + "epoch": 0.94, + "learning_rate": 4.676304466957363e-05, + "loss": 1.2769, + "step": 5928 + }, + { + "epoch": 0.94, + "learning_rate": 4.676177490909991e-05, + "loss": 1.2733, + "step": 5929 + }, + { + "epoch": 0.94, + "learning_rate": 4.6760504916876504e-05, + "loss": 1.2852, + "step": 5930 + }, + { + "epoch": 0.94, + "learning_rate": 4.675923469291692e-05, + "loss": 1.2397, + "step": 5931 + }, + { + "epoch": 0.95, + "learning_rate": 4.675796423723471e-05, + "loss": 1.3263, + "step": 5932 + }, + { + "epoch": 0.95, + "learning_rate": 4.675669354984338e-05, + "loss": 1.3281, + "step": 5933 + }, + { + "epoch": 0.95, + "learning_rate": 4.675542263075648e-05, + "loss": 1.2218, + "step": 5934 + }, + { + "epoch": 0.95, + "learning_rate": 4.675415147998753e-05, + "loss": 1.2308, + "step": 5935 + }, + { + "epoch": 0.95, + "learning_rate": 4.675288009755008e-05, + "loss": 1.3031, + "step": 5936 + }, + { + "epoch": 0.95, + "learning_rate": 4.6751608483457665e-05, + "loss": 1.2868, + "step": 5937 + }, + { + "epoch": 0.95, + "learning_rate": 4.675033663772382e-05, + "loss": 1.3309, + "step": 5938 + }, + { + "epoch": 0.95, + "learning_rate": 4.67490645603621e-05, + "loss": 1.2471, + "step": 5939 + }, + { + "epoch": 0.95, + "learning_rate": 4.6747792251386034e-05, + "loss": 1.2917, + "step": 5940 + }, + { + "epoch": 0.95, + "learning_rate": 4.67465197108092e-05, + "loss": 1.2312, + "step": 5941 + }, + { + "epoch": 0.95, + "learning_rate": 4.6745246938645124e-05, + "loss": 1.2194, + "step": 5942 + }, + { + "epoch": 0.95, + "learning_rate": 4.674397393490737e-05, + "loss": 1.1947, + "step": 5943 + }, + { + "epoch": 0.95, + "learning_rate": 4.67427006996095e-05, + "loss": 1.2949, + "step": 5944 + }, + { + "epoch": 0.95, + "learning_rate": 4.674142723276507e-05, + "loss": 1.3626, + "step": 5945 + }, + { + "epoch": 0.95, + "learning_rate": 4.674015353438763e-05, + "loss": 1.3465, + "step": 5946 + }, + { + "epoch": 0.95, + "learning_rate": 4.673887960449076e-05, + "loss": 1.3533, + "step": 5947 + }, + { + "epoch": 0.95, + "learning_rate": 4.6737605443088015e-05, + "loss": 1.2321, + "step": 5948 + }, + { + "epoch": 0.95, + "learning_rate": 4.673633105019298e-05, + "loss": 1.348, + "step": 5949 + }, + { + "epoch": 0.95, + "learning_rate": 4.67350564258192e-05, + "loss": 1.3136, + "step": 5950 + }, + { + "epoch": 0.95, + "learning_rate": 4.673378156998027e-05, + "loss": 1.215, + "step": 5951 + }, + { + "epoch": 0.95, + "learning_rate": 4.673250648268978e-05, + "loss": 1.2319, + "step": 5952 + }, + { + "epoch": 0.95, + "learning_rate": 4.673123116396127e-05, + "loss": 1.2183, + "step": 5953 + }, + { + "epoch": 0.95, + "learning_rate": 4.672995561380834e-05, + "loss": 1.2752, + "step": 5954 + }, + { + "epoch": 0.95, + "learning_rate": 4.672867983224458e-05, + "loss": 1.2453, + "step": 5955 + }, + { + "epoch": 0.95, + "learning_rate": 4.6727403819283576e-05, + "loss": 1.276, + "step": 5956 + }, + { + "epoch": 0.95, + "learning_rate": 4.672612757493891e-05, + "loss": 1.381, + "step": 5957 + }, + { + "epoch": 0.95, + "learning_rate": 4.6724851099224173e-05, + "loss": 1.3885, + "step": 5958 + }, + { + "epoch": 0.95, + "learning_rate": 4.672357439215297e-05, + "loss": 1.27, + "step": 5959 + }, + { + "epoch": 0.95, + "learning_rate": 4.672229745373888e-05, + "loss": 1.3138, + "step": 5960 + }, + { + "epoch": 0.95, + "learning_rate": 4.672102028399551e-05, + "loss": 1.2688, + "step": 5961 + }, + { + "epoch": 0.95, + "learning_rate": 4.6719742882936466e-05, + "loss": 1.3132, + "step": 5962 + }, + { + "epoch": 0.95, + "learning_rate": 4.671846525057534e-05, + "loss": 1.3231, + "step": 5963 + }, + { + "epoch": 0.95, + "learning_rate": 4.6717187386925746e-05, + "loss": 1.2113, + "step": 5964 + }, + { + "epoch": 0.95, + "learning_rate": 4.6715909292001296e-05, + "loss": 1.3901, + "step": 5965 + }, + { + "epoch": 0.95, + "learning_rate": 4.671463096581559e-05, + "loss": 1.2606, + "step": 5966 + }, + { + "epoch": 0.95, + "learning_rate": 4.671335240838225e-05, + "loss": 1.2992, + "step": 5967 + }, + { + "epoch": 0.95, + "learning_rate": 4.6712073619714894e-05, + "loss": 1.2882, + "step": 5968 + }, + { + "epoch": 0.95, + "learning_rate": 4.671079459982714e-05, + "loss": 1.2511, + "step": 5969 + }, + { + "epoch": 0.95, + "learning_rate": 4.670951534873259e-05, + "loss": 1.3428, + "step": 5970 + }, + { + "epoch": 0.95, + "learning_rate": 4.6708235866444886e-05, + "loss": 1.2028, + "step": 5971 + }, + { + "epoch": 0.95, + "learning_rate": 4.670695615297765e-05, + "loss": 1.2596, + "step": 5972 + }, + { + "epoch": 0.95, + "learning_rate": 4.6705676208344515e-05, + "loss": 1.1964, + "step": 5973 + }, + { + "epoch": 0.95, + "learning_rate": 4.67043960325591e-05, + "loss": 1.2661, + "step": 5974 + }, + { + "epoch": 0.95, + "learning_rate": 4.6703115625635046e-05, + "loss": 1.3242, + "step": 5975 + }, + { + "epoch": 0.95, + "learning_rate": 4.670183498758599e-05, + "loss": 1.2622, + "step": 5976 + }, + { + "epoch": 0.95, + "learning_rate": 4.6700554118425566e-05, + "loss": 1.2326, + "step": 5977 + }, + { + "epoch": 0.95, + "learning_rate": 4.669927301816741e-05, + "loss": 1.2883, + "step": 5978 + }, + { + "epoch": 0.95, + "learning_rate": 4.669799168682518e-05, + "loss": 1.265, + "step": 5979 + }, + { + "epoch": 0.95, + "learning_rate": 4.66967101244125e-05, + "loss": 1.2754, + "step": 5980 + }, + { + "epoch": 0.95, + "learning_rate": 4.669542833094303e-05, + "loss": 1.2507, + "step": 5981 + }, + { + "epoch": 0.95, + "learning_rate": 4.669414630643043e-05, + "loss": 1.2983, + "step": 5982 + }, + { + "epoch": 0.95, + "learning_rate": 4.669286405088834e-05, + "loss": 1.3037, + "step": 5983 + }, + { + "epoch": 0.95, + "learning_rate": 4.669158156433041e-05, + "loss": 1.3299, + "step": 5984 + }, + { + "epoch": 0.95, + "learning_rate": 4.6690298846770317e-05, + "loss": 1.2075, + "step": 5985 + }, + { + "epoch": 0.95, + "learning_rate": 4.66890158982217e-05, + "loss": 1.2612, + "step": 5986 + }, + { + "epoch": 0.95, + "learning_rate": 4.668773271869824e-05, + "loss": 1.2344, + "step": 5987 + }, + { + "epoch": 0.95, + "learning_rate": 4.668644930821359e-05, + "loss": 1.2044, + "step": 5988 + }, + { + "epoch": 0.95, + "learning_rate": 4.668516566678142e-05, + "loss": 1.2435, + "step": 5989 + }, + { + "epoch": 0.95, + "learning_rate": 4.668388179441539e-05, + "loss": 1.2306, + "step": 5990 + }, + { + "epoch": 0.95, + "learning_rate": 4.6682597691129206e-05, + "loss": 1.2362, + "step": 5991 + }, + { + "epoch": 0.95, + "learning_rate": 4.6681313356936505e-05, + "loss": 1.3164, + "step": 5992 + }, + { + "epoch": 0.95, + "learning_rate": 4.6680028791850984e-05, + "loss": 1.262, + "step": 5993 + }, + { + "epoch": 0.95, + "learning_rate": 4.667874399588632e-05, + "loss": 1.2264, + "step": 5994 + }, + { + "epoch": 0.96, + "learning_rate": 4.6677458969056195e-05, + "loss": 1.2712, + "step": 5995 + }, + { + "epoch": 0.96, + "learning_rate": 4.667617371137429e-05, + "loss": 1.3271, + "step": 5996 + }, + { + "epoch": 0.96, + "learning_rate": 4.66748882228543e-05, + "loss": 1.2396, + "step": 5997 + }, + { + "epoch": 0.96, + "learning_rate": 4.66736025035099e-05, + "loss": 1.2474, + "step": 5998 + }, + { + "epoch": 0.96, + "learning_rate": 4.6672316553354794e-05, + "loss": 1.271, + "step": 5999 + }, + { + "epoch": 0.96, + "learning_rate": 4.667103037240268e-05, + "loss": 1.2158, + "step": 6000 + }, + { + "epoch": 0.96, + "learning_rate": 4.6669743960667244e-05, + "loss": 1.2266, + "step": 6001 + }, + { + "epoch": 0.96, + "learning_rate": 4.6668457318162195e-05, + "loss": 1.2363, + "step": 6002 + }, + { + "epoch": 0.96, + "learning_rate": 4.666717044490123e-05, + "loss": 1.2118, + "step": 6003 + }, + { + "epoch": 0.96, + "learning_rate": 4.666588334089805e-05, + "loss": 1.2811, + "step": 6004 + }, + { + "epoch": 0.96, + "learning_rate": 4.666459600616637e-05, + "loss": 1.2433, + "step": 6005 + }, + { + "epoch": 0.96, + "learning_rate": 4.666330844071989e-05, + "loss": 1.1961, + "step": 6006 + }, + { + "epoch": 0.96, + "learning_rate": 4.6662020644572336e-05, + "loss": 1.2285, + "step": 6007 + }, + { + "epoch": 0.96, + "learning_rate": 4.666073261773741e-05, + "loss": 1.2749, + "step": 6008 + }, + { + "epoch": 0.96, + "learning_rate": 4.6659444360228824e-05, + "loss": 1.2778, + "step": 6009 + }, + { + "epoch": 0.96, + "learning_rate": 4.6658155872060315e-05, + "loss": 1.2393, + "step": 6010 + }, + { + "epoch": 0.96, + "learning_rate": 4.6656867153245594e-05, + "loss": 1.2814, + "step": 6011 + }, + { + "epoch": 0.96, + "learning_rate": 4.6655578203798384e-05, + "loss": 1.2319, + "step": 6012 + }, + { + "epoch": 0.96, + "learning_rate": 4.6654289023732415e-05, + "loss": 1.2083, + "step": 6013 + }, + { + "epoch": 0.96, + "learning_rate": 4.665299961306141e-05, + "loss": 1.3473, + "step": 6014 + }, + { + "epoch": 0.96, + "learning_rate": 4.665170997179911e-05, + "loss": 1.194, + "step": 6015 + }, + { + "epoch": 0.96, + "learning_rate": 4.6650420099959244e-05, + "loss": 1.3436, + "step": 6016 + }, + { + "epoch": 0.96, + "learning_rate": 4.664912999755555e-05, + "loss": 1.1849, + "step": 6017 + }, + { + "epoch": 0.96, + "learning_rate": 4.664783966460175e-05, + "loss": 1.3162, + "step": 6018 + }, + { + "epoch": 0.96, + "learning_rate": 4.664654910111162e-05, + "loss": 1.2749, + "step": 6019 + }, + { + "epoch": 0.96, + "learning_rate": 4.6645258307098873e-05, + "loss": 1.2749, + "step": 6020 + }, + { + "epoch": 0.96, + "learning_rate": 4.664396728257727e-05, + "loss": 1.25, + "step": 6021 + }, + { + "epoch": 0.96, + "learning_rate": 4.6642676027560556e-05, + "loss": 1.1877, + "step": 6022 + }, + { + "epoch": 0.96, + "learning_rate": 4.664138454206248e-05, + "loss": 1.2594, + "step": 6023 + }, + { + "epoch": 0.96, + "learning_rate": 4.66400928260968e-05, + "loss": 1.2795, + "step": 6024 + }, + { + "epoch": 0.96, + "learning_rate": 4.663880087967727e-05, + "loss": 1.2876, + "step": 6025 + }, + { + "epoch": 0.96, + "learning_rate": 4.663750870281764e-05, + "loss": 1.1813, + "step": 6026 + }, + { + "epoch": 0.96, + "learning_rate": 4.663621629553169e-05, + "loss": 1.2329, + "step": 6027 + }, + { + "epoch": 0.96, + "learning_rate": 4.663492365783316e-05, + "loss": 1.1997, + "step": 6028 + }, + { + "epoch": 0.96, + "learning_rate": 4.663363078973584e-05, + "loss": 1.3302, + "step": 6029 + }, + { + "epoch": 0.96, + "learning_rate": 4.6632337691253483e-05, + "loss": 1.2386, + "step": 6030 + }, + { + "epoch": 0.96, + "learning_rate": 4.6631044362399857e-05, + "loss": 1.2507, + "step": 6031 + }, + { + "epoch": 0.96, + "learning_rate": 4.662975080318874e-05, + "loss": 1.1849, + "step": 6032 + }, + { + "epoch": 0.96, + "learning_rate": 4.662845701363392e-05, + "loss": 1.2357, + "step": 6033 + }, + { + "epoch": 0.96, + "learning_rate": 4.662716299374915e-05, + "loss": 1.172, + "step": 6034 + }, + { + "epoch": 0.96, + "learning_rate": 4.662586874354823e-05, + "loss": 1.3276, + "step": 6035 + }, + { + "epoch": 0.96, + "learning_rate": 4.662457426304494e-05, + "loss": 1.1639, + "step": 6036 + }, + { + "epoch": 0.96, + "learning_rate": 4.662327955225306e-05, + "loss": 1.1524, + "step": 6037 + }, + { + "epoch": 0.96, + "learning_rate": 4.6621984611186386e-05, + "loss": 1.2746, + "step": 6038 + }, + { + "epoch": 0.96, + "learning_rate": 4.66206894398587e-05, + "loss": 1.2954, + "step": 6039 + }, + { + "epoch": 0.96, + "learning_rate": 4.6619394038283795e-05, + "loss": 1.2954, + "step": 6040 + }, + { + "epoch": 0.96, + "learning_rate": 4.6618098406475466e-05, + "loss": 1.2795, + "step": 6041 + }, + { + "epoch": 0.96, + "learning_rate": 4.6616802544447526e-05, + "loss": 1.2035, + "step": 6042 + }, + { + "epoch": 0.96, + "learning_rate": 4.661550645221375e-05, + "loss": 1.1828, + "step": 6043 + }, + { + "epoch": 0.96, + "learning_rate": 4.6614210129787964e-05, + "loss": 1.2127, + "step": 6044 + }, + { + "epoch": 0.96, + "learning_rate": 4.661291357718395e-05, + "loss": 1.2338, + "step": 6045 + }, + { + "epoch": 0.96, + "learning_rate": 4.6611616794415535e-05, + "loss": 1.2472, + "step": 6046 + }, + { + "epoch": 0.96, + "learning_rate": 4.6610319781496526e-05, + "loss": 1.2412, + "step": 6047 + }, + { + "epoch": 0.96, + "learning_rate": 4.6609022538440725e-05, + "loss": 1.2043, + "step": 6048 + }, + { + "epoch": 0.96, + "learning_rate": 4.660772506526196e-05, + "loss": 1.3566, + "step": 6049 + }, + { + "epoch": 0.96, + "learning_rate": 4.660642736197402e-05, + "loss": 1.3283, + "step": 6050 + }, + { + "epoch": 0.96, + "learning_rate": 4.660512942859077e-05, + "loss": 1.1706, + "step": 6051 + }, + { + "epoch": 0.96, + "learning_rate": 4.6603831265126e-05, + "loss": 1.2131, + "step": 6052 + }, + { + "epoch": 0.96, + "learning_rate": 4.660253287159355e-05, + "loss": 1.0871, + "step": 6053 + }, + { + "epoch": 0.96, + "learning_rate": 4.660123424800723e-05, + "loss": 1.1784, + "step": 6054 + }, + { + "epoch": 0.96, + "learning_rate": 4.659993539438088e-05, + "loss": 1.1395, + "step": 6055 + }, + { + "epoch": 0.96, + "learning_rate": 4.659863631072834e-05, + "loss": 1.2254, + "step": 6056 + }, + { + "epoch": 0.96, + "learning_rate": 4.659733699706343e-05, + "loss": 1.2412, + "step": 6057 + }, + { + "epoch": 0.97, + "learning_rate": 4.65960374534e-05, + "loss": 1.1807, + "step": 6058 + }, + { + "epoch": 0.97, + "learning_rate": 4.659473767975187e-05, + "loss": 1.2038, + "step": 6059 + }, + { + "epoch": 0.97, + "learning_rate": 4.65934376761329e-05, + "loss": 1.2311, + "step": 6060 + }, + { + "epoch": 0.97, + "learning_rate": 4.659213744255693e-05, + "loss": 1.342, + "step": 6061 + }, + { + "epoch": 0.97, + "learning_rate": 4.659083697903781e-05, + "loss": 1.1755, + "step": 6062 + }, + { + "epoch": 0.97, + "learning_rate": 4.658953628558937e-05, + "loss": 1.1914, + "step": 6063 + }, + { + "epoch": 0.97, + "learning_rate": 4.658823536222549e-05, + "loss": 1.2512, + "step": 6064 + }, + { + "epoch": 0.97, + "learning_rate": 4.6586934208960005e-05, + "loss": 1.2821, + "step": 6065 + }, + { + "epoch": 0.97, + "learning_rate": 4.658563282580678e-05, + "loss": 1.1899, + "step": 6066 + }, + { + "epoch": 0.97, + "learning_rate": 4.658433121277966e-05, + "loss": 1.1642, + "step": 6067 + }, + { + "epoch": 0.97, + "learning_rate": 4.658302936989253e-05, + "loss": 1.2176, + "step": 6068 + }, + { + "epoch": 0.97, + "learning_rate": 4.6581727297159225e-05, + "loss": 1.1169, + "step": 6069 + }, + { + "epoch": 0.97, + "learning_rate": 4.658042499459364e-05, + "loss": 1.1761, + "step": 6070 + }, + { + "epoch": 0.97, + "learning_rate": 4.6579122462209626e-05, + "loss": 1.2026, + "step": 6071 + }, + { + "epoch": 0.97, + "learning_rate": 4.657781970002106e-05, + "loss": 1.2375, + "step": 6072 + }, + { + "epoch": 0.97, + "learning_rate": 4.6576516708041814e-05, + "loss": 1.242, + "step": 6073 + }, + { + "epoch": 0.97, + "learning_rate": 4.6575213486285765e-05, + "loss": 1.173, + "step": 6074 + }, + { + "epoch": 0.97, + "learning_rate": 4.6573910034766785e-05, + "loss": 1.2852, + "step": 6075 + }, + { + "epoch": 0.97, + "learning_rate": 4.657260635349877e-05, + "loss": 1.2072, + "step": 6076 + }, + { + "epoch": 0.97, + "learning_rate": 4.657130244249559e-05, + "loss": 1.2573, + "step": 6077 + }, + { + "epoch": 0.97, + "learning_rate": 4.656999830177113e-05, + "loss": 1.1598, + "step": 6078 + }, + { + "epoch": 0.97, + "learning_rate": 4.656869393133929e-05, + "loss": 1.221, + "step": 6079 + }, + { + "epoch": 0.97, + "learning_rate": 4.656738933121396e-05, + "loss": 1.2619, + "step": 6080 + }, + { + "epoch": 0.97, + "learning_rate": 4.656608450140902e-05, + "loss": 1.3192, + "step": 6081 + }, + { + "epoch": 0.97, + "learning_rate": 4.656477944193837e-05, + "loss": 1.215, + "step": 6082 + }, + { + "epoch": 0.97, + "learning_rate": 4.656347415281592e-05, + "loss": 1.2026, + "step": 6083 + }, + { + "epoch": 0.97, + "learning_rate": 4.656216863405556e-05, + "loss": 1.2132, + "step": 6084 + }, + { + "epoch": 0.97, + "learning_rate": 4.656086288567119e-05, + "loss": 1.2367, + "step": 6085 + }, + { + "epoch": 0.97, + "learning_rate": 4.655955690767672e-05, + "loss": 1.2352, + "step": 6086 + }, + { + "epoch": 0.97, + "learning_rate": 4.655825070008606e-05, + "loss": 1.1722, + "step": 6087 + }, + { + "epoch": 0.97, + "learning_rate": 4.655694426291311e-05, + "loss": 1.2184, + "step": 6088 + }, + { + "epoch": 0.97, + "learning_rate": 4.655563759617181e-05, + "loss": 1.2126, + "step": 6089 + }, + { + "epoch": 0.97, + "learning_rate": 4.655433069987605e-05, + "loss": 1.245, + "step": 6090 + }, + { + "epoch": 0.97, + "learning_rate": 4.655302357403975e-05, + "loss": 1.2311, + "step": 6091 + }, + { + "epoch": 0.97, + "learning_rate": 4.655171621867683e-05, + "loss": 1.2861, + "step": 6092 + }, + { + "epoch": 0.97, + "learning_rate": 4.655040863380122e-05, + "loss": 1.2682, + "step": 6093 + }, + { + "epoch": 0.97, + "learning_rate": 4.654910081942684e-05, + "loss": 1.2813, + "step": 6094 + }, + { + "epoch": 0.97, + "learning_rate": 4.6547792775567625e-05, + "loss": 1.271, + "step": 6095 + }, + { + "epoch": 0.97, + "learning_rate": 4.6546484502237495e-05, + "loss": 1.2985, + "step": 6096 + }, + { + "epoch": 0.97, + "learning_rate": 4.654517599945039e-05, + "loss": 1.1542, + "step": 6097 + }, + { + "epoch": 0.97, + "learning_rate": 4.654386726722023e-05, + "loss": 1.2538, + "step": 6098 + }, + { + "epoch": 0.97, + "learning_rate": 4.654255830556097e-05, + "loss": 1.3236, + "step": 6099 + }, + { + "epoch": 0.97, + "learning_rate": 4.654124911448655e-05, + "loss": 1.3154, + "step": 6100 + }, + { + "epoch": 0.97, + "learning_rate": 4.6539939694010894e-05, + "loss": 1.2868, + "step": 6101 + }, + { + "epoch": 0.97, + "learning_rate": 4.6538630044147964e-05, + "loss": 1.1755, + "step": 6102 + }, + { + "epoch": 0.97, + "learning_rate": 4.65373201649117e-05, + "loss": 1.2427, + "step": 6103 + }, + { + "epoch": 0.97, + "learning_rate": 4.6536010056316046e-05, + "loss": 1.3027, + "step": 6104 + }, + { + "epoch": 0.97, + "learning_rate": 4.653469971837496e-05, + "loss": 1.223, + "step": 6105 + }, + { + "epoch": 0.97, + "learning_rate": 4.6533389151102405e-05, + "loss": 1.2458, + "step": 6106 + }, + { + "epoch": 0.97, + "learning_rate": 4.653207835451232e-05, + "loss": 1.2262, + "step": 6107 + }, + { + "epoch": 0.97, + "learning_rate": 4.653076732861868e-05, + "loss": 1.2134, + "step": 6108 + }, + { + "epoch": 0.97, + "learning_rate": 4.652945607343544e-05, + "loss": 1.2065, + "step": 6109 + }, + { + "epoch": 0.97, + "learning_rate": 4.652814458897656e-05, + "loss": 1.214, + "step": 6110 + }, + { + "epoch": 0.97, + "learning_rate": 4.652683287525601e-05, + "loss": 1.1734, + "step": 6111 + }, + { + "epoch": 0.97, + "learning_rate": 4.652552093228776e-05, + "loss": 1.1932, + "step": 6112 + }, + { + "epoch": 0.97, + "learning_rate": 4.6524208760085774e-05, + "loss": 1.1777, + "step": 6113 + }, + { + "epoch": 0.97, + "learning_rate": 4.6522896358664044e-05, + "loss": 1.304, + "step": 6114 + }, + { + "epoch": 0.97, + "learning_rate": 4.6521583728036526e-05, + "loss": 1.2159, + "step": 6115 + }, + { + "epoch": 0.97, + "learning_rate": 4.652027086821721e-05, + "loss": 1.2344, + "step": 6116 + }, + { + "epoch": 0.97, + "learning_rate": 4.6518957779220074e-05, + "loss": 1.2835, + "step": 6117 + }, + { + "epoch": 0.97, + "learning_rate": 4.6517644461059105e-05, + "loss": 1.228, + "step": 6118 + }, + { + "epoch": 0.97, + "learning_rate": 4.651633091374829e-05, + "loss": 1.2217, + "step": 6119 + }, + { + "epoch": 0.97, + "learning_rate": 4.65150171373016e-05, + "loss": 1.1467, + "step": 6120 + }, + { + "epoch": 0.98, + "learning_rate": 4.651370313173305e-05, + "loss": 1.271, + "step": 6121 + }, + { + "epoch": 0.98, + "learning_rate": 4.651238889705662e-05, + "loss": 1.2427, + "step": 6122 + }, + { + "epoch": 0.98, + "learning_rate": 4.6511074433286306e-05, + "loss": 1.1735, + "step": 6123 + }, + { + "epoch": 0.98, + "learning_rate": 4.6509759740436115e-05, + "loss": 1.2036, + "step": 6124 + }, + { + "epoch": 0.98, + "learning_rate": 4.650844481852003e-05, + "loss": 1.1504, + "step": 6125 + }, + { + "epoch": 0.98, + "learning_rate": 4.650712966755209e-05, + "loss": 1.1597, + "step": 6126 + }, + { + "epoch": 0.98, + "learning_rate": 4.650581428754625e-05, + "loss": 1.2616, + "step": 6127 + }, + { + "epoch": 0.98, + "learning_rate": 4.650449867851656e-05, + "loss": 1.1754, + "step": 6128 + }, + { + "epoch": 0.98, + "learning_rate": 4.650318284047701e-05, + "loss": 1.1976, + "step": 6129 + }, + { + "epoch": 0.98, + "learning_rate": 4.650186677344162e-05, + "loss": 1.1491, + "step": 6130 + }, + { + "epoch": 0.98, + "learning_rate": 4.65005504774244e-05, + "loss": 1.2271, + "step": 6131 + }, + { + "epoch": 0.98, + "learning_rate": 4.649923395243937e-05, + "loss": 1.2661, + "step": 6132 + }, + { + "epoch": 0.98, + "learning_rate": 4.649791719850055e-05, + "loss": 1.1701, + "step": 6133 + }, + { + "epoch": 0.98, + "learning_rate": 4.6496600215621965e-05, + "loss": 1.2145, + "step": 6134 + }, + { + "epoch": 0.98, + "learning_rate": 4.6495283003817634e-05, + "loss": 1.2306, + "step": 6135 + }, + { + "epoch": 0.98, + "learning_rate": 4.64939655631016e-05, + "loss": 1.2283, + "step": 6136 + }, + { + "epoch": 0.98, + "learning_rate": 4.649264789348787e-05, + "loss": 1.2013, + "step": 6137 + }, + { + "epoch": 0.98, + "learning_rate": 4.64913299949905e-05, + "loss": 1.2196, + "step": 6138 + }, + { + "epoch": 0.98, + "learning_rate": 4.6490011867623506e-05, + "loss": 1.2184, + "step": 6139 + }, + { + "epoch": 0.98, + "learning_rate": 4.648869351140094e-05, + "loss": 1.1372, + "step": 6140 + }, + { + "epoch": 0.98, + "learning_rate": 4.648737492633683e-05, + "loss": 1.2087, + "step": 6141 + }, + { + "epoch": 0.98, + "learning_rate": 4.648605611244522e-05, + "loss": 1.1958, + "step": 6142 + }, + { + "epoch": 0.98, + "learning_rate": 4.648473706974016e-05, + "loss": 1.2393, + "step": 6143 + }, + { + "epoch": 0.98, + "learning_rate": 4.648341779823569e-05, + "loss": 1.2315, + "step": 6144 + }, + { + "epoch": 0.98, + "learning_rate": 4.6482098297945867e-05, + "loss": 1.1345, + "step": 6145 + }, + { + "epoch": 0.98, + "learning_rate": 4.648077856888473e-05, + "loss": 1.1392, + "step": 6146 + }, + { + "epoch": 0.98, + "learning_rate": 4.647945861106635e-05, + "loss": 1.155, + "step": 6147 + }, + { + "epoch": 0.98, + "learning_rate": 4.647813842450478e-05, + "loss": 1.17, + "step": 6148 + }, + { + "epoch": 0.98, + "learning_rate": 4.6476818009214076e-05, + "loss": 1.2142, + "step": 6149 + }, + { + "epoch": 0.98, + "learning_rate": 4.647549736520829e-05, + "loss": 1.1628, + "step": 6150 + }, + { + "epoch": 0.98, + "learning_rate": 4.64741764925015e-05, + "loss": 1.2489, + "step": 6151 + }, + { + "epoch": 0.98, + "learning_rate": 4.647285539110777e-05, + "loss": 1.1862, + "step": 6152 + }, + { + "epoch": 0.98, + "learning_rate": 4.647153406104117e-05, + "loss": 1.2469, + "step": 6153 + }, + { + "epoch": 0.98, + "learning_rate": 4.647021250231576e-05, + "loss": 1.2801, + "step": 6154 + }, + { + "epoch": 0.98, + "learning_rate": 4.6468890714945624e-05, + "loss": 1.2033, + "step": 6155 + }, + { + "epoch": 0.98, + "learning_rate": 4.646756869894483e-05, + "loss": 1.207, + "step": 6156 + }, + { + "epoch": 0.98, + "learning_rate": 4.6466246454327465e-05, + "loss": 1.187, + "step": 6157 + }, + { + "epoch": 0.98, + "learning_rate": 4.6464923981107616e-05, + "loss": 1.2415, + "step": 6158 + }, + { + "epoch": 0.98, + "learning_rate": 4.646360127929935e-05, + "loss": 1.214, + "step": 6159 + }, + { + "epoch": 0.98, + "learning_rate": 4.646227834891677e-05, + "loss": 1.2062, + "step": 6160 + }, + { + "epoch": 0.98, + "learning_rate": 4.646095518997396e-05, + "loss": 1.1977, + "step": 6161 + }, + { + "epoch": 0.98, + "learning_rate": 4.645963180248498e-05, + "loss": 1.1761, + "step": 6162 + }, + { + "epoch": 0.98, + "learning_rate": 4.6458308186463974e-05, + "loss": 1.2296, + "step": 6163 + }, + { + "epoch": 0.98, + "learning_rate": 4.6456984341925004e-05, + "loss": 1.229, + "step": 6164 + }, + { + "epoch": 0.98, + "learning_rate": 4.6455660268882187e-05, + "loss": 1.1779, + "step": 6165 + }, + { + "epoch": 0.98, + "learning_rate": 4.64543359673496e-05, + "loss": 1.1942, + "step": 6166 + }, + { + "epoch": 0.98, + "learning_rate": 4.645301143734137e-05, + "loss": 1.1981, + "step": 6167 + }, + { + "epoch": 0.98, + "learning_rate": 4.645168667887159e-05, + "loss": 1.2212, + "step": 6168 + }, + { + "epoch": 0.98, + "learning_rate": 4.645036169195437e-05, + "loss": 1.1194, + "step": 6169 + }, + { + "epoch": 0.98, + "learning_rate": 4.644903647660382e-05, + "loss": 1.1791, + "step": 6170 + }, + { + "epoch": 0.98, + "learning_rate": 4.644771103283405e-05, + "loss": 1.1743, + "step": 6171 + }, + { + "epoch": 0.98, + "learning_rate": 4.6446385360659184e-05, + "loss": 1.2157, + "step": 6172 + }, + { + "epoch": 0.98, + "learning_rate": 4.6445059460093336e-05, + "loss": 1.1216, + "step": 6173 + }, + { + "epoch": 0.98, + "learning_rate": 4.644373333115062e-05, + "loss": 1.2469, + "step": 6174 + }, + { + "epoch": 0.98, + "learning_rate": 4.644240697384517e-05, + "loss": 1.2384, + "step": 6175 + }, + { + "epoch": 0.98, + "learning_rate": 4.64410803881911e-05, + "loss": 1.182, + "step": 6176 + }, + { + "epoch": 0.98, + "learning_rate": 4.643975357420254e-05, + "loss": 1.2208, + "step": 6177 + }, + { + "epoch": 0.98, + "learning_rate": 4.643842653189362e-05, + "loss": 1.1195, + "step": 6178 + }, + { + "epoch": 0.98, + "learning_rate": 4.643709926127847e-05, + "loss": 1.1139, + "step": 6179 + }, + { + "epoch": 0.98, + "learning_rate": 4.643577176237123e-05, + "loss": 1.163, + "step": 6180 + }, + { + "epoch": 0.98, + "learning_rate": 4.6434444035186044e-05, + "loss": 1.1724, + "step": 6181 + }, + { + "epoch": 0.98, + "learning_rate": 4.6433116079737023e-05, + "loss": 1.0762, + "step": 6182 + }, + { + "epoch": 0.99, + "learning_rate": 4.643178789603834e-05, + "loss": 1.1766, + "step": 6183 + }, + { + "epoch": 0.99, + "learning_rate": 4.6430459484104134e-05, + "loss": 1.2337, + "step": 6184 + }, + { + "epoch": 0.99, + "learning_rate": 4.642913084394853e-05, + "loss": 1.2982, + "step": 6185 + }, + { + "epoch": 0.99, + "learning_rate": 4.642780197558571e-05, + "loss": 1.2845, + "step": 6186 + }, + { + "epoch": 0.99, + "learning_rate": 4.642647287902979e-05, + "loss": 1.1263, + "step": 6187 + }, + { + "epoch": 0.99, + "learning_rate": 4.6425143554294954e-05, + "loss": 1.2589, + "step": 6188 + }, + { + "epoch": 0.99, + "learning_rate": 4.642381400139535e-05, + "loss": 1.3027, + "step": 6189 + }, + { + "epoch": 0.99, + "learning_rate": 4.642248422034512e-05, + "loss": 1.1719, + "step": 6190 + }, + { + "epoch": 0.99, + "learning_rate": 4.6421154211158446e-05, + "loss": 1.2171, + "step": 6191 + }, + { + "epoch": 0.99, + "learning_rate": 4.641982397384949e-05, + "loss": 1.1878, + "step": 6192 + }, + { + "epoch": 0.99, + "learning_rate": 4.6418493508432404e-05, + "loss": 1.2384, + "step": 6193 + }, + { + "epoch": 0.99, + "learning_rate": 4.641716281492137e-05, + "loss": 1.1231, + "step": 6194 + }, + { + "epoch": 0.99, + "learning_rate": 4.641583189333055e-05, + "loss": 1.1863, + "step": 6195 + }, + { + "epoch": 0.99, + "learning_rate": 4.6414500743674125e-05, + "loss": 1.2764, + "step": 6196 + }, + { + "epoch": 0.99, + "learning_rate": 4.641316936596627e-05, + "loss": 1.2422, + "step": 6197 + }, + { + "epoch": 0.99, + "learning_rate": 4.641183776022117e-05, + "loss": 1.2083, + "step": 6198 + }, + { + "epoch": 0.99, + "learning_rate": 4.6410505926452985e-05, + "loss": 1.1849, + "step": 6199 + }, + { + "epoch": 0.99, + "learning_rate": 4.6409173864675914e-05, + "loss": 1.1436, + "step": 6200 + }, + { + "epoch": 0.99, + "learning_rate": 4.640784157490413e-05, + "loss": 1.1353, + "step": 6201 + }, + { + "epoch": 0.99, + "learning_rate": 4.640650905715184e-05, + "loss": 1.1813, + "step": 6202 + }, + { + "epoch": 0.99, + "learning_rate": 4.640517631143323e-05, + "loss": 1.1541, + "step": 6203 + }, + { + "epoch": 0.99, + "learning_rate": 4.6403843337762474e-05, + "loss": 1.1338, + "step": 6204 + }, + { + "epoch": 0.99, + "learning_rate": 4.640251013615379e-05, + "loss": 1.2017, + "step": 6205 + }, + { + "epoch": 0.99, + "learning_rate": 4.6401176706621366e-05, + "loss": 1.269, + "step": 6206 + }, + { + "epoch": 0.99, + "learning_rate": 4.6399843049179404e-05, + "loss": 1.2202, + "step": 6207 + }, + { + "epoch": 0.99, + "learning_rate": 4.639850916384211e-05, + "loss": 1.2489, + "step": 6208 + }, + { + "epoch": 0.99, + "learning_rate": 4.639717505062368e-05, + "loss": 1.2583, + "step": 6209 + }, + { + "epoch": 0.99, + "learning_rate": 4.639584070953832e-05, + "loss": 1.1375, + "step": 6210 + }, + { + "epoch": 0.99, + "learning_rate": 4.6394506140600247e-05, + "loss": 1.1885, + "step": 6211 + }, + { + "epoch": 0.99, + "learning_rate": 4.639317134382368e-05, + "loss": 1.1827, + "step": 6212 + }, + { + "epoch": 0.99, + "learning_rate": 4.639183631922283e-05, + "loss": 1.2072, + "step": 6213 + }, + { + "epoch": 0.99, + "learning_rate": 4.63905010668119e-05, + "loss": 1.2205, + "step": 6214 + }, + { + "epoch": 0.99, + "learning_rate": 4.638916558660512e-05, + "loss": 1.0991, + "step": 6215 + }, + { + "epoch": 0.99, + "learning_rate": 4.6387829878616715e-05, + "loss": 1.1841, + "step": 6216 + }, + { + "epoch": 0.99, + "learning_rate": 4.638649394286091e-05, + "loss": 1.1686, + "step": 6217 + }, + { + "epoch": 0.99, + "learning_rate": 4.6385157779351925e-05, + "loss": 1.1387, + "step": 6218 + }, + { + "epoch": 0.99, + "learning_rate": 4.638382138810399e-05, + "loss": 1.1986, + "step": 6219 + }, + { + "epoch": 0.99, + "learning_rate": 4.638248476913135e-05, + "loss": 1.1897, + "step": 6220 + }, + { + "epoch": 0.99, + "learning_rate": 4.638114792244822e-05, + "loss": 1.2031, + "step": 6221 + }, + { + "epoch": 0.99, + "learning_rate": 4.637981084806885e-05, + "loss": 1.1374, + "step": 6222 + }, + { + "epoch": 0.99, + "learning_rate": 4.637847354600747e-05, + "loss": 1.2224, + "step": 6223 + }, + { + "epoch": 0.99, + "learning_rate": 4.6377136016278325e-05, + "loss": 1.1182, + "step": 6224 + }, + { + "epoch": 0.99, + "learning_rate": 4.6375798258895666e-05, + "loss": 1.209, + "step": 6225 + }, + { + "epoch": 0.99, + "learning_rate": 4.637446027387373e-05, + "loss": 1.2065, + "step": 6226 + }, + { + "epoch": 0.99, + "learning_rate": 4.637312206122677e-05, + "loss": 1.2306, + "step": 6227 + }, + { + "epoch": 0.99, + "learning_rate": 4.637178362096903e-05, + "loss": 1.1229, + "step": 6228 + }, + { + "epoch": 0.99, + "learning_rate": 4.637044495311478e-05, + "loss": 1.2044, + "step": 6229 + }, + { + "epoch": 0.99, + "learning_rate": 4.636910605767826e-05, + "loss": 1.1816, + "step": 6230 + }, + { + "epoch": 0.99, + "learning_rate": 4.6367766934673726e-05, + "loss": 1.119, + "step": 6231 + }, + { + "epoch": 0.99, + "learning_rate": 4.6366427584115456e-05, + "loss": 1.1746, + "step": 6232 + }, + { + "epoch": 0.99, + "learning_rate": 4.636508800601771e-05, + "loss": 1.1748, + "step": 6233 + }, + { + "epoch": 0.99, + "learning_rate": 4.6363748200394744e-05, + "loss": 1.1873, + "step": 6234 + }, + { + "epoch": 0.99, + "learning_rate": 4.6362408167260815e-05, + "loss": 1.1271, + "step": 6235 + }, + { + "epoch": 0.99, + "learning_rate": 4.636106790663023e-05, + "loss": 1.1094, + "step": 6236 + }, + { + "epoch": 0.99, + "learning_rate": 4.635972741851723e-05, + "loss": 1.2, + "step": 6237 + }, + { + "epoch": 0.99, + "learning_rate": 4.63583867029361e-05, + "loss": 1.1515, + "step": 6238 + }, + { + "epoch": 0.99, + "learning_rate": 4.635704575990112e-05, + "loss": 1.1658, + "step": 6239 + }, + { + "epoch": 0.99, + "learning_rate": 4.635570458942657e-05, + "loss": 1.2153, + "step": 6240 + }, + { + "epoch": 0.99, + "learning_rate": 4.635436319152673e-05, + "loss": 1.1226, + "step": 6241 + }, + { + "epoch": 0.99, + "learning_rate": 4.635302156621588e-05, + "loss": 1.2008, + "step": 6242 + }, + { + "epoch": 0.99, + "learning_rate": 4.635167971350832e-05, + "loss": 1.111, + "step": 6243 + }, + { + "epoch": 0.99, + "learning_rate": 4.6350337633418336e-05, + "loss": 1.1612, + "step": 6244 + }, + { + "epoch": 0.99, + "learning_rate": 4.634899532596022e-05, + "loss": 1.251, + "step": 6245 + }, + { + "epoch": 1.0, + "learning_rate": 4.634765279114827e-05, + "loss": 1.2358, + "step": 6246 + }, + { + "epoch": 1.0, + "learning_rate": 4.6346310028996765e-05, + "loss": 1.1506, + "step": 6247 + }, + { + "epoch": 1.0, + "learning_rate": 4.6344967039520024e-05, + "loss": 1.1771, + "step": 6248 + }, + { + "epoch": 1.0, + "learning_rate": 4.634362382273234e-05, + "loss": 1.2118, + "step": 6249 + }, + { + "epoch": 1.0, + "learning_rate": 4.6342280378648024e-05, + "loss": 1.2598, + "step": 6250 + }, + { + "epoch": 1.0, + "learning_rate": 4.634093670728138e-05, + "loss": 1.1768, + "step": 6251 + }, + { + "epoch": 1.0, + "learning_rate": 4.6339592808646706e-05, + "loss": 1.3053, + "step": 6252 + }, + { + "epoch": 1.0, + "learning_rate": 4.6338248682758335e-05, + "loss": 1.2277, + "step": 6253 + }, + { + "epoch": 1.0, + "learning_rate": 4.6336904329630574e-05, + "loss": 1.1865, + "step": 6254 + }, + { + "epoch": 1.0, + "learning_rate": 4.633555974927773e-05, + "loss": 1.2203, + "step": 6255 + }, + { + "epoch": 1.0, + "learning_rate": 4.633421494171413e-05, + "loss": 1.2598, + "step": 6256 + }, + { + "epoch": 1.0, + "learning_rate": 4.633286990695408e-05, + "loss": 1.1648, + "step": 6257 + }, + { + "epoch": 1.0, + "learning_rate": 4.633152464501193e-05, + "loss": 1.1565, + "step": 6258 + }, + { + "epoch": 1.0, + "learning_rate": 4.633017915590199e-05, + "loss": 1.1944, + "step": 6259 + }, + { + "epoch": 1.0, + "learning_rate": 4.63288334396386e-05, + "loss": 1.2444, + "step": 6260 + }, + { + "epoch": 1.0, + "learning_rate": 4.6327487496236066e-05, + "loss": 1.1421, + "step": 6261 + }, + { + "epoch": 1.0, + "learning_rate": 4.632614132570875e-05, + "loss": 1.1722, + "step": 6262 + }, + { + "epoch": 1.0, + "learning_rate": 4.632479492807097e-05, + "loss": 1.1364, + "step": 6263 + }, + { + "epoch": 1.0, + "learning_rate": 4.632344830333708e-05, + "loss": 1.2007, + "step": 6264 + }, + { + "epoch": 1.0, + "learning_rate": 4.6322101451521407e-05, + "loss": 1.1842, + "step": 6265 + }, + { + "epoch": 1.0, + "learning_rate": 4.63207543726383e-05, + "loss": 1.1987, + "step": 6266 + }, + { + "epoch": 1.0, + "learning_rate": 4.63194070667021e-05, + "loss": 1.2505, + "step": 6267 + }, + { + "epoch": 1.0, + "learning_rate": 4.631805953372716e-05, + "loss": 1.204, + "step": 6268 + }, + { + "epoch": 1.0, + "learning_rate": 4.631671177372782e-05, + "loss": 1.1621, + "step": 6269 + }, + { + "epoch": 1.0, + "learning_rate": 4.6315363786718454e-05, + "loss": 1.2342, + "step": 6270 + }, + { + "epoch": 1.0, + "learning_rate": 4.63140155727134e-05, + "loss": 1.2057, + "step": 6271 + }, + { + "epoch": 1.0, + "learning_rate": 4.631266713172702e-05, + "loss": 1.1341, + "step": 6272 + }, + { + "epoch": 1.0, + "learning_rate": 4.631131846377367e-05, + "loss": 1.2516, + "step": 6273 + }, + { + "epoch": 1.0, + "learning_rate": 4.630996956886772e-05, + "loss": 1.181, + "step": 6274 + }, + { + "epoch": 1.0, + "learning_rate": 4.630862044702353e-05, + "loss": 1.1846, + "step": 6275 + }, + { + "epoch": 1.0, + "learning_rate": 4.630727109825548e-05, + "loss": 1.2079, + "step": 6276 + }, + { + "epoch": 1.0, + "learning_rate": 4.630592152257791e-05, + "loss": 1.1916, + "step": 6277 + }, + { + "epoch": 1.0, + "learning_rate": 4.630457172000522e-05, + "loss": 1.1146, + "step": 6278 + }, + { + "epoch": 1.0, + "learning_rate": 4.630322169055178e-05, + "loss": 1.1294, + "step": 6279 + }, + { + "epoch": 1.0, + "learning_rate": 4.630187143423196e-05, + "loss": 1.1717, + "step": 6280 + }, + { + "epoch": 1.0, + "learning_rate": 4.6300520951060136e-05, + "loss": 1.1213, + "step": 6281 + }, + { + "epoch": 1.0, + "learning_rate": 4.6299170241050706e-05, + "loss": 1.092, + "step": 6282 + }, + { + "epoch": 1.0, + "learning_rate": 4.629781930421804e-05, + "loss": 1.106, + "step": 6283 + }, + { + "epoch": 1.0, + "learning_rate": 4.629646814057652e-05, + "loss": 1.1348, + "step": 6284 + }, + { + "epoch": 1.0, + "learning_rate": 4.629511675014056e-05, + "loss": 1.061, + "step": 6285 + }, + { + "epoch": 1.0, + "learning_rate": 4.629376513292452e-05, + "loss": 1.1469, + "step": 6286 + }, + { + "epoch": 1.0, + "learning_rate": 4.629241328894281e-05, + "loss": 1.1733, + "step": 6287 + }, + { + "epoch": 1.0, + "learning_rate": 4.629106121820984e-05, + "loss": 1.1987, + "step": 6288 + }, + { + "epoch": 1.0, + "learning_rate": 4.6289708920739986e-05, + "loss": 1.145, + "step": 6289 + }, + { + "epoch": 1.0, + "learning_rate": 4.628835639654765e-05, + "loss": 1.1115, + "step": 6290 + }, + { + "epoch": 1.0, + "learning_rate": 4.6287003645647246e-05, + "loss": 1.1647, + "step": 6291 + }, + { + "epoch": 1.0, + "learning_rate": 4.6285650668053184e-05, + "loss": 1.2146, + "step": 6292 + }, + { + "epoch": 1.0, + "learning_rate": 4.628429746377986e-05, + "loss": 1.1629, + "step": 6293 + }, + { + "epoch": 1.0, + "learning_rate": 4.6282944032841694e-05, + "loss": 1.1793, + "step": 6294 + }, + { + "epoch": 1.0, + "learning_rate": 4.6281590375253085e-05, + "loss": 1.1833, + "step": 6295 + }, + { + "epoch": 1.0, + "learning_rate": 4.6280236491028465e-05, + "loss": 1.2292, + "step": 6296 + }, + { + "epoch": 1.0, + "learning_rate": 4.627888238018225e-05, + "loss": 1.099, + "step": 6297 + }, + { + "epoch": 1.0, + "learning_rate": 4.627752804272885e-05, + "loss": 1.1115, + "step": 6298 + }, + { + "epoch": 1.0, + "learning_rate": 4.62761734786827e-05, + "loss": 1.1647, + "step": 6299 + }, + { + "epoch": 1.0, + "learning_rate": 4.627481868805822e-05, + "loss": 1.2163, + "step": 6300 + }, + { + "epoch": 1.0, + "learning_rate": 4.627346367086983e-05, + "loss": 1.0788, + "step": 6301 + }, + { + "epoch": 1.0, + "learning_rate": 4.627210842713197e-05, + "loss": 1.1846, + "step": 6302 + }, + { + "epoch": 1.0, + "learning_rate": 4.6270752956859076e-05, + "loss": 1.1587, + "step": 6303 + }, + { + "epoch": 1.0, + "learning_rate": 4.626939726006557e-05, + "loss": 1.1896, + "step": 6304 + }, + { + "epoch": 1.0, + "learning_rate": 4.62680413367659e-05, + "loss": 1.1769, + "step": 6305 + }, + { + "epoch": 1.0, + "learning_rate": 4.62666851869745e-05, + "loss": 1.1051, + "step": 6306 + }, + { + "epoch": 1.0, + "learning_rate": 4.626532881070581e-05, + "loss": 1.2173, + "step": 6307 + }, + { + "epoch": 1.0, + "learning_rate": 4.626397220797428e-05, + "loss": 1.0825, + "step": 6308 + }, + { + "epoch": 1.01, + "learning_rate": 4.626261537879436e-05, + "loss": 1.2119, + "step": 6309 + }, + { + "epoch": 1.01, + "learning_rate": 4.6261258323180504e-05, + "loss": 1.1582, + "step": 6310 + }, + { + "epoch": 1.01, + "learning_rate": 4.6259901041147136e-05, + "loss": 1.1818, + "step": 6311 + }, + { + "epoch": 1.01, + "learning_rate": 4.625854353270874e-05, + "loss": 1.2525, + "step": 6312 + }, + { + "epoch": 1.01, + "learning_rate": 4.625718579787976e-05, + "loss": 1.1803, + "step": 6313 + }, + { + "epoch": 1.01, + "learning_rate": 4.625582783667466e-05, + "loss": 1.1574, + "step": 6314 + }, + { + "epoch": 1.01, + "learning_rate": 4.625446964910789e-05, + "loss": 1.1953, + "step": 6315 + }, + { + "epoch": 1.01, + "learning_rate": 4.625311123519393e-05, + "loss": 1.1953, + "step": 6316 + }, + { + "epoch": 1.01, + "learning_rate": 4.6251752594947226e-05, + "loss": 1.1352, + "step": 6317 + }, + { + "epoch": 1.01, + "learning_rate": 4.6250393728382274e-05, + "loss": 1.0667, + "step": 6318 + }, + { + "epoch": 1.01, + "learning_rate": 4.624903463551352e-05, + "loss": 1.2266, + "step": 6319 + }, + { + "epoch": 1.01, + "learning_rate": 4.6247675316355455e-05, + "loss": 1.2461, + "step": 6320 + }, + { + "epoch": 1.01, + "learning_rate": 4.624631577092254e-05, + "loss": 1.1701, + "step": 6321 + }, + { + "epoch": 1.01, + "learning_rate": 4.624495599922927e-05, + "loss": 1.1296, + "step": 6322 + }, + { + "epoch": 1.01, + "learning_rate": 4.6243596001290114e-05, + "loss": 1.22, + "step": 6323 + }, + { + "epoch": 1.01, + "learning_rate": 4.624223577711956e-05, + "loss": 1.1025, + "step": 6324 + }, + { + "epoch": 1.01, + "learning_rate": 4.624087532673208e-05, + "loss": 1.151, + "step": 6325 + }, + { + "epoch": 1.01, + "learning_rate": 4.623951465014219e-05, + "loss": 1.1046, + "step": 6326 + }, + { + "epoch": 1.01, + "learning_rate": 4.623815374736435e-05, + "loss": 1.211, + "step": 6327 + }, + { + "epoch": 1.01, + "learning_rate": 4.623679261841307e-05, + "loss": 1.3438, + "step": 6328 + }, + { + "epoch": 1.01, + "learning_rate": 4.623543126330284e-05, + "loss": 1.2149, + "step": 6329 + }, + { + "epoch": 1.01, + "learning_rate": 4.623406968204816e-05, + "loss": 1.2276, + "step": 6330 + }, + { + "epoch": 1.01, + "learning_rate": 4.6232707874663536e-05, + "loss": 1.1623, + "step": 6331 + }, + { + "epoch": 1.01, + "learning_rate": 4.6231345841163465e-05, + "loss": 1.075, + "step": 6332 + }, + { + "epoch": 1.01, + "learning_rate": 4.6229983581562443e-05, + "loss": 1.2073, + "step": 6333 + }, + { + "epoch": 1.01, + "learning_rate": 4.622862109587499e-05, + "loss": 1.1732, + "step": 6334 + }, + { + "epoch": 1.01, + "learning_rate": 4.622725838411561e-05, + "loss": 1.222, + "step": 6335 + }, + { + "epoch": 1.01, + "learning_rate": 4.6225895446298814e-05, + "loss": 1.2415, + "step": 6336 + }, + { + "epoch": 1.01, + "learning_rate": 4.6224532282439116e-05, + "loss": 1.1794, + "step": 6337 + }, + { + "epoch": 1.01, + "learning_rate": 4.622316889255104e-05, + "loss": 1.1309, + "step": 6338 + }, + { + "epoch": 1.01, + "learning_rate": 4.622180527664909e-05, + "loss": 1.2078, + "step": 6339 + }, + { + "epoch": 1.01, + "learning_rate": 4.6220441434747806e-05, + "loss": 1.1005, + "step": 6340 + }, + { + "epoch": 1.01, + "learning_rate": 4.621907736686171e-05, + "loss": 1.1201, + "step": 6341 + }, + { + "epoch": 1.01, + "learning_rate": 4.621771307300531e-05, + "loss": 1.1642, + "step": 6342 + }, + { + "epoch": 1.01, + "learning_rate": 4.621634855319315e-05, + "loss": 1.2231, + "step": 6343 + }, + { + "epoch": 1.01, + "learning_rate": 4.621498380743976e-05, + "loss": 1.2407, + "step": 6344 + }, + { + "epoch": 1.01, + "learning_rate": 4.6213618835759674e-05, + "loss": 1.1857, + "step": 6345 + }, + { + "epoch": 1.01, + "learning_rate": 4.6212253638167426e-05, + "loss": 1.2101, + "step": 6346 + }, + { + "epoch": 1.01, + "learning_rate": 4.621088821467756e-05, + "loss": 1.1331, + "step": 6347 + }, + { + "epoch": 1.01, + "learning_rate": 4.6209522565304604e-05, + "loss": 1.1141, + "step": 6348 + }, + { + "epoch": 1.01, + "learning_rate": 4.620815669006311e-05, + "loss": 1.1042, + "step": 6349 + }, + { + "epoch": 1.01, + "learning_rate": 4.620679058896763e-05, + "loss": 1.0973, + "step": 6350 + }, + { + "epoch": 1.01, + "learning_rate": 4.62054242620327e-05, + "loss": 1.2165, + "step": 6351 + }, + { + "epoch": 1.01, + "learning_rate": 4.6204057709272873e-05, + "loss": 1.1047, + "step": 6352 + }, + { + "epoch": 1.01, + "learning_rate": 4.620269093070271e-05, + "loss": 1.2261, + "step": 6353 + }, + { + "epoch": 1.01, + "learning_rate": 4.6201323926336756e-05, + "loss": 1.1074, + "step": 6354 + }, + { + "epoch": 1.01, + "learning_rate": 4.619995669618958e-05, + "loss": 1.1658, + "step": 6355 + }, + { + "epoch": 1.01, + "learning_rate": 4.619858924027573e-05, + "loss": 1.1865, + "step": 6356 + }, + { + "epoch": 1.01, + "learning_rate": 4.619722155860978e-05, + "loss": 1.1587, + "step": 6357 + }, + { + "epoch": 1.01, + "learning_rate": 4.619585365120629e-05, + "loss": 1.1751, + "step": 6358 + }, + { + "epoch": 1.01, + "learning_rate": 4.619448551807982e-05, + "loss": 1.136, + "step": 6359 + }, + { + "epoch": 1.01, + "learning_rate": 4.619311715924495e-05, + "loss": 1.1597, + "step": 6360 + }, + { + "epoch": 1.01, + "learning_rate": 4.619174857471625e-05, + "loss": 1.17, + "step": 6361 + }, + { + "epoch": 1.01, + "learning_rate": 4.6190379764508296e-05, + "loss": 1.1693, + "step": 6362 + }, + { + "epoch": 1.01, + "learning_rate": 4.618901072863566e-05, + "loss": 1.1812, + "step": 6363 + }, + { + "epoch": 1.01, + "learning_rate": 4.618764146711292e-05, + "loss": 1.1341, + "step": 6364 + }, + { + "epoch": 1.01, + "learning_rate": 4.6186271979954666e-05, + "loss": 1.1504, + "step": 6365 + }, + { + "epoch": 1.01, + "learning_rate": 4.618490226717548e-05, + "loss": 1.1438, + "step": 6366 + }, + { + "epoch": 1.01, + "learning_rate": 4.6183532328789937e-05, + "loss": 1.1235, + "step": 6367 + }, + { + "epoch": 1.01, + "learning_rate": 4.618216216481265e-05, + "loss": 1.2138, + "step": 6368 + }, + { + "epoch": 1.01, + "learning_rate": 4.618079177525818e-05, + "loss": 1.1727, + "step": 6369 + }, + { + "epoch": 1.01, + "learning_rate": 4.6179421160141145e-05, + "loss": 1.1517, + "step": 6370 + }, + { + "epoch": 1.01, + "learning_rate": 4.6178050319476136e-05, + "loss": 1.16, + "step": 6371 + }, + { + "epoch": 1.02, + "learning_rate": 4.617667925327774e-05, + "loss": 1.1048, + "step": 6372 + }, + { + "epoch": 1.02, + "learning_rate": 4.6175307961560574e-05, + "loss": 1.2108, + "step": 6373 + }, + { + "epoch": 1.02, + "learning_rate": 4.6173936444339234e-05, + "loss": 1.1001, + "step": 6374 + }, + { + "epoch": 1.02, + "learning_rate": 4.6172564701628324e-05, + "loss": 1.208, + "step": 6375 + }, + { + "epoch": 1.02, + "learning_rate": 4.6171192733442455e-05, + "loss": 1.2, + "step": 6376 + }, + { + "epoch": 1.02, + "learning_rate": 4.6169820539796236e-05, + "loss": 1.1322, + "step": 6377 + }, + { + "epoch": 1.02, + "learning_rate": 4.6168448120704277e-05, + "loss": 1.1411, + "step": 6378 + }, + { + "epoch": 1.02, + "learning_rate": 4.61670754761812e-05, + "loss": 1.1584, + "step": 6379 + }, + { + "epoch": 1.02, + "learning_rate": 4.616570260624162e-05, + "loss": 1.0811, + "step": 6380 + }, + { + "epoch": 1.02, + "learning_rate": 4.616432951090016e-05, + "loss": 1.2131, + "step": 6381 + }, + { + "epoch": 1.02, + "learning_rate": 4.616295619017144e-05, + "loss": 1.1745, + "step": 6382 + }, + { + "epoch": 1.02, + "learning_rate": 4.616158264407008e-05, + "loss": 1.1143, + "step": 6383 + }, + { + "epoch": 1.02, + "learning_rate": 4.6160208872610714e-05, + "loss": 1.2281, + "step": 6384 + }, + { + "epoch": 1.02, + "learning_rate": 4.615883487580798e-05, + "loss": 1.197, + "step": 6385 + }, + { + "epoch": 1.02, + "learning_rate": 4.6157460653676485e-05, + "loss": 1.2484, + "step": 6386 + }, + { + "epoch": 1.02, + "learning_rate": 4.6156086206230885e-05, + "loss": 1.1056, + "step": 6387 + }, + { + "epoch": 1.02, + "learning_rate": 4.6154711533485816e-05, + "loss": 1.1644, + "step": 6388 + }, + { + "epoch": 1.02, + "learning_rate": 4.6153336635455904e-05, + "loss": 1.147, + "step": 6389 + }, + { + "epoch": 1.02, + "learning_rate": 4.61519615121558e-05, + "loss": 1.272, + "step": 6390 + }, + { + "epoch": 1.02, + "learning_rate": 4.615058616360016e-05, + "loss": 1.1889, + "step": 6391 + }, + { + "epoch": 1.02, + "learning_rate": 4.6149210589803606e-05, + "loss": 1.1467, + "step": 6392 + }, + { + "epoch": 1.02, + "learning_rate": 4.61478347907808e-05, + "loss": 1.1296, + "step": 6393 + }, + { + "epoch": 1.02, + "learning_rate": 4.6146458766546394e-05, + "loss": 1.1849, + "step": 6394 + }, + { + "epoch": 1.02, + "learning_rate": 4.614508251711505e-05, + "loss": 1.2209, + "step": 6395 + }, + { + "epoch": 1.02, + "learning_rate": 4.61437060425014e-05, + "loss": 1.1017, + "step": 6396 + }, + { + "epoch": 1.02, + "learning_rate": 4.614232934272013e-05, + "loss": 1.2139, + "step": 6397 + }, + { + "epoch": 1.02, + "learning_rate": 4.6140952417785875e-05, + "loss": 1.1642, + "step": 6398 + }, + { + "epoch": 1.02, + "learning_rate": 4.613957526771332e-05, + "loss": 1.1919, + "step": 6399 + }, + { + "epoch": 1.02, + "learning_rate": 4.613819789251712e-05, + "loss": 1.2054, + "step": 6400 + }, + { + "epoch": 1.02, + "learning_rate": 4.613682029221195e-05, + "loss": 1.0975, + "step": 6401 + }, + { + "epoch": 1.02, + "learning_rate": 4.6135442466812475e-05, + "loss": 1.1534, + "step": 6402 + }, + { + "epoch": 1.02, + "learning_rate": 4.613406441633337e-05, + "loss": 1.1353, + "step": 6403 + }, + { + "epoch": 1.02, + "learning_rate": 4.6132686140789305e-05, + "loss": 1.1136, + "step": 6404 + }, + { + "epoch": 1.02, + "learning_rate": 4.6131307640194965e-05, + "loss": 1.0786, + "step": 6405 + }, + { + "epoch": 1.02, + "learning_rate": 4.612992891456503e-05, + "loss": 1.153, + "step": 6406 + }, + { + "epoch": 1.02, + "learning_rate": 4.6128549963914175e-05, + "loss": 1.1252, + "step": 6407 + }, + { + "epoch": 1.02, + "learning_rate": 4.612717078825709e-05, + "loss": 1.1401, + "step": 6408 + }, + { + "epoch": 1.02, + "learning_rate": 4.612579138760848e-05, + "loss": 1.2767, + "step": 6409 + }, + { + "epoch": 1.02, + "learning_rate": 4.612441176198299e-05, + "loss": 1.2022, + "step": 6410 + }, + { + "epoch": 1.02, + "learning_rate": 4.612303191139536e-05, + "loss": 1.1338, + "step": 6411 + }, + { + "epoch": 1.02, + "learning_rate": 4.612165183586026e-05, + "loss": 1.2342, + "step": 6412 + }, + { + "epoch": 1.02, + "learning_rate": 4.612027153539238e-05, + "loss": 1.2028, + "step": 6413 + }, + { + "epoch": 1.02, + "learning_rate": 4.6118891010006446e-05, + "loss": 1.1077, + "step": 6414 + }, + { + "epoch": 1.02, + "learning_rate": 4.6117510259717135e-05, + "loss": 1.151, + "step": 6415 + }, + { + "epoch": 1.02, + "learning_rate": 4.6116129284539167e-05, + "loss": 1.1206, + "step": 6416 + }, + { + "epoch": 1.02, + "learning_rate": 4.611474808448724e-05, + "loss": 1.1429, + "step": 6417 + }, + { + "epoch": 1.02, + "learning_rate": 4.6113366659576064e-05, + "loss": 1.0888, + "step": 6418 + }, + { + "epoch": 1.02, + "learning_rate": 4.6111985009820345e-05, + "loss": 1.1063, + "step": 6419 + }, + { + "epoch": 1.02, + "learning_rate": 4.611060313523481e-05, + "loss": 1.2179, + "step": 6420 + }, + { + "epoch": 1.02, + "learning_rate": 4.610922103583417e-05, + "loss": 1.1489, + "step": 6421 + }, + { + "epoch": 1.02, + "learning_rate": 4.6107838711633135e-05, + "loss": 1.096, + "step": 6422 + }, + { + "epoch": 1.02, + "learning_rate": 4.6106456162646436e-05, + "loss": 1.1128, + "step": 6423 + }, + { + "epoch": 1.02, + "learning_rate": 4.610507338888879e-05, + "loss": 1.179, + "step": 6424 + }, + { + "epoch": 1.02, + "learning_rate": 4.610369039037493e-05, + "loss": 1.1211, + "step": 6425 + }, + { + "epoch": 1.02, + "learning_rate": 4.610230716711959e-05, + "loss": 1.123, + "step": 6426 + }, + { + "epoch": 1.02, + "learning_rate": 4.6100923719137465e-05, + "loss": 1.1291, + "step": 6427 + }, + { + "epoch": 1.02, + "learning_rate": 4.609954004644333e-05, + "loss": 1.1234, + "step": 6428 + }, + { + "epoch": 1.02, + "learning_rate": 4.6098156149051905e-05, + "loss": 1.2354, + "step": 6429 + }, + { + "epoch": 1.02, + "learning_rate": 4.609677202697792e-05, + "loss": 1.1248, + "step": 6430 + }, + { + "epoch": 1.02, + "learning_rate": 4.609538768023612e-05, + "loss": 1.1611, + "step": 6431 + }, + { + "epoch": 1.02, + "learning_rate": 4.609400310884125e-05, + "loss": 1.1914, + "step": 6432 + }, + { + "epoch": 1.02, + "learning_rate": 4.609261831280806e-05, + "loss": 1.0996, + "step": 6433 + }, + { + "epoch": 1.03, + "learning_rate": 4.609123329215128e-05, + "loss": 1.1147, + "step": 6434 + }, + { + "epoch": 1.03, + "learning_rate": 4.608984804688568e-05, + "loss": 1.2095, + "step": 6435 + }, + { + "epoch": 1.03, + "learning_rate": 4.608846257702599e-05, + "loss": 1.1715, + "step": 6436 + }, + { + "epoch": 1.03, + "learning_rate": 4.608707688258699e-05, + "loss": 1.0675, + "step": 6437 + }, + { + "epoch": 1.03, + "learning_rate": 4.608569096358341e-05, + "loss": 1.0695, + "step": 6438 + }, + { + "epoch": 1.03, + "learning_rate": 4.608430482003003e-05, + "loss": 1.1034, + "step": 6439 + }, + { + "epoch": 1.03, + "learning_rate": 4.6082918451941607e-05, + "loss": 1.1365, + "step": 6440 + }, + { + "epoch": 1.03, + "learning_rate": 4.6081531859332896e-05, + "loss": 1.1182, + "step": 6441 + }, + { + "epoch": 1.03, + "learning_rate": 4.608014504221867e-05, + "loss": 1.0879, + "step": 6442 + }, + { + "epoch": 1.03, + "learning_rate": 4.607875800061371e-05, + "loss": 1.164, + "step": 6443 + }, + { + "epoch": 1.03, + "learning_rate": 4.607737073453276e-05, + "loss": 1.1268, + "step": 6444 + }, + { + "epoch": 1.03, + "learning_rate": 4.6075983243990616e-05, + "loss": 1.2206, + "step": 6445 + }, + { + "epoch": 1.03, + "learning_rate": 4.607459552900204e-05, + "loss": 1.1869, + "step": 6446 + }, + { + "epoch": 1.03, + "learning_rate": 4.607320758958182e-05, + "loss": 1.1221, + "step": 6447 + }, + { + "epoch": 1.03, + "learning_rate": 4.607181942574473e-05, + "loss": 1.1354, + "step": 6448 + }, + { + "epoch": 1.03, + "learning_rate": 4.607043103750556e-05, + "loss": 1.1517, + "step": 6449 + }, + { + "epoch": 1.03, + "learning_rate": 4.606904242487909e-05, + "loss": 1.1044, + "step": 6450 + }, + { + "epoch": 1.03, + "learning_rate": 4.606765358788011e-05, + "loss": 1.1763, + "step": 6451 + }, + { + "epoch": 1.03, + "learning_rate": 4.60662645265234e-05, + "loss": 1.2041, + "step": 6452 + }, + { + "epoch": 1.03, + "learning_rate": 4.606487524082378e-05, + "loss": 1.158, + "step": 6453 + }, + { + "epoch": 1.03, + "learning_rate": 4.606348573079602e-05, + "loss": 1.1071, + "step": 6454 + }, + { + "epoch": 1.03, + "learning_rate": 4.6062095996454926e-05, + "loss": 1.1704, + "step": 6455 + }, + { + "epoch": 1.03, + "learning_rate": 4.606070603781529e-05, + "loss": 1.0963, + "step": 6456 + }, + { + "epoch": 1.03, + "learning_rate": 4.6059315854891925e-05, + "loss": 1.1193, + "step": 6457 + }, + { + "epoch": 1.03, + "learning_rate": 4.605792544769964e-05, + "loss": 1.105, + "step": 6458 + }, + { + "epoch": 1.03, + "learning_rate": 4.605653481625322e-05, + "loss": 1.1414, + "step": 6459 + }, + { + "epoch": 1.03, + "learning_rate": 4.605514396056749e-05, + "loss": 1.0104, + "step": 6460 + }, + { + "epoch": 1.03, + "learning_rate": 4.605375288065727e-05, + "loss": 1.1191, + "step": 6461 + }, + { + "epoch": 1.03, + "learning_rate": 4.605236157653736e-05, + "loss": 1.1032, + "step": 6462 + }, + { + "epoch": 1.03, + "learning_rate": 4.6050970048222576e-05, + "loss": 1.0978, + "step": 6463 + }, + { + "epoch": 1.03, + "learning_rate": 4.6049578295727744e-05, + "loss": 1.121, + "step": 6464 + }, + { + "epoch": 1.03, + "learning_rate": 4.6048186319067674e-05, + "loss": 1.1078, + "step": 6465 + }, + { + "epoch": 1.03, + "learning_rate": 4.6046794118257215e-05, + "loss": 1.0623, + "step": 6466 + }, + { + "epoch": 1.03, + "learning_rate": 4.6045401693311166e-05, + "loss": 1.1308, + "step": 6467 + }, + { + "epoch": 1.03, + "learning_rate": 4.6044009044244366e-05, + "loss": 1.2447, + "step": 6468 + }, + { + "epoch": 1.03, + "learning_rate": 4.604261617107165e-05, + "loss": 1.2501, + "step": 6469 + }, + { + "epoch": 1.03, + "learning_rate": 4.604122307380784e-05, + "loss": 1.2866, + "step": 6470 + }, + { + "epoch": 1.03, + "learning_rate": 4.603982975246779e-05, + "loss": 1.102, + "step": 6471 + }, + { + "epoch": 1.03, + "learning_rate": 4.603843620706631e-05, + "loss": 1.255, + "step": 6472 + }, + { + "epoch": 1.03, + "learning_rate": 4.603704243761827e-05, + "loss": 1.181, + "step": 6473 + }, + { + "epoch": 1.03, + "learning_rate": 4.60356484441385e-05, + "loss": 1.1147, + "step": 6474 + }, + { + "epoch": 1.03, + "learning_rate": 4.603425422664184e-05, + "loss": 1.3288, + "step": 6475 + }, + { + "epoch": 1.03, + "learning_rate": 4.603285978514314e-05, + "loss": 1.1409, + "step": 6476 + }, + { + "epoch": 1.03, + "learning_rate": 4.603146511965727e-05, + "loss": 1.1697, + "step": 6477 + }, + { + "epoch": 1.03, + "learning_rate": 4.603007023019905e-05, + "loss": 1.114, + "step": 6478 + }, + { + "epoch": 1.03, + "learning_rate": 4.6028675116783346e-05, + "loss": 1.1711, + "step": 6479 + }, + { + "epoch": 1.03, + "learning_rate": 4.602727977942503e-05, + "loss": 1.1165, + "step": 6480 + }, + { + "epoch": 1.03, + "learning_rate": 4.602588421813894e-05, + "loss": 1.1757, + "step": 6481 + }, + { + "epoch": 1.03, + "learning_rate": 4.602448843293996e-05, + "loss": 1.1875, + "step": 6482 + }, + { + "epoch": 1.03, + "learning_rate": 4.6023092423842935e-05, + "loss": 1.1309, + "step": 6483 + }, + { + "epoch": 1.03, + "learning_rate": 4.602169619086274e-05, + "loss": 1.1462, + "step": 6484 + }, + { + "epoch": 1.03, + "learning_rate": 4.602029973401424e-05, + "loss": 1.1073, + "step": 6485 + }, + { + "epoch": 1.03, + "learning_rate": 4.601890305331231e-05, + "loss": 1.1401, + "step": 6486 + }, + { + "epoch": 1.03, + "learning_rate": 4.6017506148771824e-05, + "loss": 1.1144, + "step": 6487 + }, + { + "epoch": 1.03, + "learning_rate": 4.601610902040767e-05, + "loss": 1.1126, + "step": 6488 + }, + { + "epoch": 1.03, + "learning_rate": 4.60147116682347e-05, + "loss": 1.0761, + "step": 6489 + }, + { + "epoch": 1.03, + "learning_rate": 4.6013314092267814e-05, + "loss": 1.065, + "step": 6490 + }, + { + "epoch": 1.03, + "learning_rate": 4.601191629252189e-05, + "loss": 1.1333, + "step": 6491 + }, + { + "epoch": 1.03, + "learning_rate": 4.6010518269011815e-05, + "loss": 1.1389, + "step": 6492 + }, + { + "epoch": 1.03, + "learning_rate": 4.600912002175247e-05, + "loss": 1.0881, + "step": 6493 + }, + { + "epoch": 1.03, + "learning_rate": 4.600772155075876e-05, + "loss": 1.1191, + "step": 6494 + }, + { + "epoch": 1.03, + "learning_rate": 4.600632285604556e-05, + "loss": 1.1952, + "step": 6495 + }, + { + "epoch": 1.03, + "learning_rate": 4.600492393762778e-05, + "loss": 1.2059, + "step": 6496 + }, + { + "epoch": 1.04, + "learning_rate": 4.600352479552032e-05, + "loss": 1.2006, + "step": 6497 + }, + { + "epoch": 1.04, + "learning_rate": 4.6002125429738066e-05, + "loss": 1.1676, + "step": 6498 + }, + { + "epoch": 1.04, + "learning_rate": 4.600072584029592e-05, + "loss": 1.2075, + "step": 6499 + }, + { + "epoch": 1.04, + "learning_rate": 4.599932602720881e-05, + "loss": 1.1729, + "step": 6500 + }, + { + "epoch": 1.04, + "learning_rate": 4.5997925990491615e-05, + "loss": 1.1365, + "step": 6501 + }, + { + "epoch": 1.04, + "learning_rate": 4.5996525730159254e-05, + "loss": 1.1161, + "step": 6502 + }, + { + "epoch": 1.04, + "learning_rate": 4.599512524622665e-05, + "loss": 1.0994, + "step": 6503 + }, + { + "epoch": 1.04, + "learning_rate": 4.5993724538708704e-05, + "loss": 1.1532, + "step": 6504 + }, + { + "epoch": 1.04, + "learning_rate": 4.599232360762034e-05, + "loss": 1.1548, + "step": 6505 + }, + { + "epoch": 1.04, + "learning_rate": 4.599092245297647e-05, + "loss": 1.1603, + "step": 6506 + }, + { + "epoch": 1.04, + "learning_rate": 4.598952107479202e-05, + "loss": 1.0815, + "step": 6507 + }, + { + "epoch": 1.04, + "learning_rate": 4.5988119473081914e-05, + "loss": 1.1138, + "step": 6508 + }, + { + "epoch": 1.04, + "learning_rate": 4.5986717647861076e-05, + "loss": 1.1022, + "step": 6509 + }, + { + "epoch": 1.04, + "learning_rate": 4.598531559914444e-05, + "loss": 1.0879, + "step": 6510 + }, + { + "epoch": 1.04, + "learning_rate": 4.598391332694694e-05, + "loss": 1.1461, + "step": 6511 + }, + { + "epoch": 1.04, + "learning_rate": 4.598251083128349e-05, + "loss": 1.1286, + "step": 6512 + }, + { + "epoch": 1.04, + "learning_rate": 4.5981108112169044e-05, + "loss": 1.2135, + "step": 6513 + }, + { + "epoch": 1.04, + "learning_rate": 4.5979705169618534e-05, + "loss": 1.1355, + "step": 6514 + }, + { + "epoch": 1.04, + "learning_rate": 4.59783020036469e-05, + "loss": 1.2052, + "step": 6515 + }, + { + "epoch": 1.04, + "learning_rate": 4.597689861426909e-05, + "loss": 1.1667, + "step": 6516 + }, + { + "epoch": 1.04, + "learning_rate": 4.597549500150005e-05, + "loss": 1.1912, + "step": 6517 + }, + { + "epoch": 1.04, + "learning_rate": 4.597409116535471e-05, + "loss": 1.1882, + "step": 6518 + }, + { + "epoch": 1.04, + "learning_rate": 4.597268710584804e-05, + "loss": 1.0461, + "step": 6519 + }, + { + "epoch": 1.04, + "learning_rate": 4.597128282299498e-05, + "loss": 1.147, + "step": 6520 + }, + { + "epoch": 1.04, + "learning_rate": 4.59698783168105e-05, + "loss": 1.1453, + "step": 6521 + }, + { + "epoch": 1.04, + "learning_rate": 4.596847358730954e-05, + "loss": 1.0948, + "step": 6522 + }, + { + "epoch": 1.04, + "learning_rate": 4.596706863450706e-05, + "loss": 1.0628, + "step": 6523 + }, + { + "epoch": 1.04, + "learning_rate": 4.596566345841804e-05, + "loss": 1.066, + "step": 6524 + }, + { + "epoch": 1.04, + "learning_rate": 4.596425805905742e-05, + "loss": 1.1867, + "step": 6525 + }, + { + "epoch": 1.04, + "learning_rate": 4.596285243644019e-05, + "loss": 1.174, + "step": 6526 + }, + { + "epoch": 1.04, + "learning_rate": 4.596144659058131e-05, + "loss": 1.1427, + "step": 6527 + }, + { + "epoch": 1.04, + "learning_rate": 4.5960040521495746e-05, + "loss": 1.102, + "step": 6528 + }, + { + "epoch": 1.04, + "learning_rate": 4.595863422919847e-05, + "loss": 1.1073, + "step": 6529 + }, + { + "epoch": 1.04, + "learning_rate": 4.5957227713704474e-05, + "loss": 1.1219, + "step": 6530 + }, + { + "epoch": 1.04, + "learning_rate": 4.595582097502872e-05, + "loss": 1.2093, + "step": 6531 + }, + { + "epoch": 1.04, + "learning_rate": 4.5954414013186196e-05, + "loss": 1.2235, + "step": 6532 + }, + { + "epoch": 1.04, + "learning_rate": 4.595300682819189e-05, + "loss": 1.1068, + "step": 6533 + }, + { + "epoch": 1.04, + "learning_rate": 4.5951599420060775e-05, + "loss": 1.146, + "step": 6534 + }, + { + "epoch": 1.04, + "learning_rate": 4.595019178880784e-05, + "loss": 1.1558, + "step": 6535 + }, + { + "epoch": 1.04, + "learning_rate": 4.594878393444809e-05, + "loss": 1.083, + "step": 6536 + }, + { + "epoch": 1.04, + "learning_rate": 4.594737585699652e-05, + "loss": 1.1657, + "step": 6537 + }, + { + "epoch": 1.04, + "learning_rate": 4.59459675564681e-05, + "loss": 1.1535, + "step": 6538 + }, + { + "epoch": 1.04, + "learning_rate": 4.594455903287784e-05, + "loss": 1.1472, + "step": 6539 + }, + { + "epoch": 1.04, + "learning_rate": 4.594315028624075e-05, + "loss": 1.1746, + "step": 6540 + }, + { + "epoch": 1.04, + "learning_rate": 4.594174131657183e-05, + "loss": 1.1334, + "step": 6541 + }, + { + "epoch": 1.04, + "learning_rate": 4.594033212388606e-05, + "loss": 1.0865, + "step": 6542 + }, + { + "epoch": 1.04, + "learning_rate": 4.593892270819848e-05, + "loss": 1.1344, + "step": 6543 + }, + { + "epoch": 1.04, + "learning_rate": 4.5937513069524086e-05, + "loss": 1.1173, + "step": 6544 + }, + { + "epoch": 1.04, + "learning_rate": 4.593610320787789e-05, + "loss": 1.115, + "step": 6545 + }, + { + "epoch": 1.04, + "learning_rate": 4.59346931232749e-05, + "loss": 1.1315, + "step": 6546 + }, + { + "epoch": 1.04, + "learning_rate": 4.593328281573014e-05, + "loss": 1.099, + "step": 6547 + }, + { + "epoch": 1.04, + "learning_rate": 4.593187228525862e-05, + "loss": 1.1825, + "step": 6548 + }, + { + "epoch": 1.04, + "learning_rate": 4.593046153187538e-05, + "loss": 1.0951, + "step": 6549 + }, + { + "epoch": 1.04, + "learning_rate": 4.592905055559543e-05, + "loss": 1.1681, + "step": 6550 + }, + { + "epoch": 1.04, + "learning_rate": 4.592763935643379e-05, + "loss": 1.0511, + "step": 6551 + }, + { + "epoch": 1.04, + "learning_rate": 4.592622793440551e-05, + "loss": 1.195, + "step": 6552 + }, + { + "epoch": 1.04, + "learning_rate": 4.5924816289525594e-05, + "loss": 1.1576, + "step": 6553 + }, + { + "epoch": 1.04, + "learning_rate": 4.592340442180909e-05, + "loss": 1.0983, + "step": 6554 + }, + { + "epoch": 1.04, + "learning_rate": 4.592199233127104e-05, + "loss": 1.1895, + "step": 6555 + }, + { + "epoch": 1.04, + "learning_rate": 4.5920580017926464e-05, + "loss": 1.1258, + "step": 6556 + }, + { + "epoch": 1.04, + "learning_rate": 4.591916748179042e-05, + "loss": 1.1585, + "step": 6557 + }, + { + "epoch": 1.04, + "learning_rate": 4.591775472287794e-05, + "loss": 1.1549, + "step": 6558 + }, + { + "epoch": 1.04, + "learning_rate": 4.591634174120406e-05, + "loss": 1.1823, + "step": 6559 + }, + { + "epoch": 1.05, + "learning_rate": 4.5914928536783854e-05, + "loss": 1.068, + "step": 6560 + }, + { + "epoch": 1.05, + "learning_rate": 4.5913515109632356e-05, + "loss": 1.1266, + "step": 6561 + }, + { + "epoch": 1.05, + "learning_rate": 4.591210145976461e-05, + "loss": 1.1919, + "step": 6562 + }, + { + "epoch": 1.05, + "learning_rate": 4.591068758719569e-05, + "loss": 1.0623, + "step": 6563 + }, + { + "epoch": 1.05, + "learning_rate": 4.590927349194063e-05, + "loss": 1.1912, + "step": 6564 + }, + { + "epoch": 1.05, + "learning_rate": 4.590785917401451e-05, + "loss": 1.1208, + "step": 6565 + }, + { + "epoch": 1.05, + "learning_rate": 4.5906444633432385e-05, + "loss": 1.0731, + "step": 6566 + }, + { + "epoch": 1.05, + "learning_rate": 4.5905029870209315e-05, + "loss": 1.1343, + "step": 6567 + }, + { + "epoch": 1.05, + "learning_rate": 4.590361488436037e-05, + "loss": 1.1091, + "step": 6568 + }, + { + "epoch": 1.05, + "learning_rate": 4.5902199675900606e-05, + "loss": 1.1009, + "step": 6569 + }, + { + "epoch": 1.05, + "learning_rate": 4.590078424484512e-05, + "loss": 1.1759, + "step": 6570 + }, + { + "epoch": 1.05, + "learning_rate": 4.589936859120896e-05, + "loss": 1.1117, + "step": 6571 + }, + { + "epoch": 1.05, + "learning_rate": 4.589795271500722e-05, + "loss": 1.1344, + "step": 6572 + }, + { + "epoch": 1.05, + "learning_rate": 4.589653661625497e-05, + "loss": 1.189, + "step": 6573 + }, + { + "epoch": 1.05, + "learning_rate": 4.58951202949673e-05, + "loss": 1.2453, + "step": 6574 + }, + { + "epoch": 1.05, + "learning_rate": 4.5893703751159266e-05, + "loss": 1.1683, + "step": 6575 + }, + { + "epoch": 1.05, + "learning_rate": 4.589228698484599e-05, + "loss": 1.1336, + "step": 6576 + }, + { + "epoch": 1.05, + "learning_rate": 4.589086999604253e-05, + "loss": 1.1571, + "step": 6577 + }, + { + "epoch": 1.05, + "learning_rate": 4.588945278476399e-05, + "loss": 1.1921, + "step": 6578 + }, + { + "epoch": 1.05, + "learning_rate": 4.588803535102546e-05, + "loss": 1.1012, + "step": 6579 + }, + { + "epoch": 1.05, + "learning_rate": 4.588661769484204e-05, + "loss": 1.0545, + "step": 6580 + }, + { + "epoch": 1.05, + "learning_rate": 4.588519981622882e-05, + "loss": 1.162, + "step": 6581 + }, + { + "epoch": 1.05, + "learning_rate": 4.58837817152009e-05, + "loss": 1.1442, + "step": 6582 + }, + { + "epoch": 1.05, + "learning_rate": 4.5882363391773384e-05, + "loss": 1.0712, + "step": 6583 + }, + { + "epoch": 1.05, + "learning_rate": 4.588094484596137e-05, + "loss": 1.0937, + "step": 6584 + }, + { + "epoch": 1.05, + "learning_rate": 4.587952607777998e-05, + "loss": 1.1701, + "step": 6585 + }, + { + "epoch": 1.05, + "learning_rate": 4.58781070872443e-05, + "loss": 1.1179, + "step": 6586 + }, + { + "epoch": 1.05, + "learning_rate": 4.587668787436947e-05, + "loss": 1.1028, + "step": 6587 + }, + { + "epoch": 1.05, + "learning_rate": 4.587526843917058e-05, + "loss": 1.1664, + "step": 6588 + }, + { + "epoch": 1.05, + "learning_rate": 4.5873848781662765e-05, + "loss": 1.0864, + "step": 6589 + }, + { + "epoch": 1.05, + "learning_rate": 4.587242890186112e-05, + "loss": 1.1577, + "step": 6590 + }, + { + "epoch": 1.05, + "learning_rate": 4.587100879978079e-05, + "loss": 1.1213, + "step": 6591 + }, + { + "epoch": 1.05, + "learning_rate": 4.586958847543688e-05, + "loss": 1.139, + "step": 6592 + }, + { + "epoch": 1.05, + "learning_rate": 4.586816792884453e-05, + "loss": 1.0731, + "step": 6593 + }, + { + "epoch": 1.05, + "learning_rate": 4.586674716001885e-05, + "loss": 1.0466, + "step": 6594 + }, + { + "epoch": 1.05, + "learning_rate": 4.586532616897499e-05, + "loss": 1.1162, + "step": 6595 + }, + { + "epoch": 1.05, + "learning_rate": 4.5863904955728075e-05, + "loss": 1.0718, + "step": 6596 + }, + { + "epoch": 1.05, + "learning_rate": 4.586248352029323e-05, + "loss": 1.1117, + "step": 6597 + }, + { + "epoch": 1.05, + "learning_rate": 4.586106186268561e-05, + "loss": 1.0098, + "step": 6598 + }, + { + "epoch": 1.05, + "learning_rate": 4.5859639982920346e-05, + "loss": 1.1049, + "step": 6599 + }, + { + "epoch": 1.05, + "learning_rate": 4.5858217881012576e-05, + "loss": 1.1702, + "step": 6600 + }, + { + "epoch": 1.05, + "learning_rate": 4.585679555697745e-05, + "loss": 1.0909, + "step": 6601 + }, + { + "epoch": 1.05, + "learning_rate": 4.585537301083012e-05, + "loss": 1.2122, + "step": 6602 + }, + { + "epoch": 1.05, + "learning_rate": 4.585395024258572e-05, + "loss": 1.0958, + "step": 6603 + }, + { + "epoch": 1.05, + "learning_rate": 4.585252725225941e-05, + "loss": 1.0596, + "step": 6604 + }, + { + "epoch": 1.05, + "learning_rate": 4.5851104039866354e-05, + "loss": 1.2583, + "step": 6605 + }, + { + "epoch": 1.05, + "learning_rate": 4.584968060542169e-05, + "loss": 1.1377, + "step": 6606 + }, + { + "epoch": 1.05, + "learning_rate": 4.584825694894059e-05, + "loss": 1.0913, + "step": 6607 + }, + { + "epoch": 1.05, + "learning_rate": 4.584683307043821e-05, + "loss": 1.058, + "step": 6608 + }, + { + "epoch": 1.05, + "learning_rate": 4.584540896992972e-05, + "loss": 1.1374, + "step": 6609 + }, + { + "epoch": 1.05, + "learning_rate": 4.584398464743027e-05, + "loss": 1.0229, + "step": 6610 + }, + { + "epoch": 1.05, + "learning_rate": 4.584256010295504e-05, + "loss": 1.0894, + "step": 6611 + }, + { + "epoch": 1.05, + "learning_rate": 4.58411353365192e-05, + "loss": 1.0427, + "step": 6612 + }, + { + "epoch": 1.05, + "learning_rate": 4.583971034813793e-05, + "loss": 1.0139, + "step": 6613 + }, + { + "epoch": 1.05, + "learning_rate": 4.5838285137826386e-05, + "loss": 1.1265, + "step": 6614 + }, + { + "epoch": 1.05, + "learning_rate": 4.5836859705599766e-05, + "loss": 1.106, + "step": 6615 + }, + { + "epoch": 1.05, + "learning_rate": 4.583543405147323e-05, + "loss": 1.1479, + "step": 6616 + }, + { + "epoch": 1.05, + "learning_rate": 4.583400817546197e-05, + "loss": 1.0339, + "step": 6617 + }, + { + "epoch": 1.05, + "learning_rate": 4.583258207758119e-05, + "loss": 0.998, + "step": 6618 + }, + { + "epoch": 1.05, + "learning_rate": 4.583115575784604e-05, + "loss": 1.1121, + "step": 6619 + }, + { + "epoch": 1.05, + "learning_rate": 4.5829729216271735e-05, + "loss": 1.147, + "step": 6620 + }, + { + "epoch": 1.05, + "learning_rate": 4.582830245287346e-05, + "loss": 1.1759, + "step": 6621 + }, + { + "epoch": 1.05, + "learning_rate": 4.582687546766641e-05, + "loss": 0.98, + "step": 6622 + }, + { + "epoch": 1.06, + "learning_rate": 4.582544826066578e-05, + "loss": 1.0601, + "step": 6623 + }, + { + "epoch": 1.06, + "learning_rate": 4.582402083188676e-05, + "loss": 1.0968, + "step": 6624 + }, + { + "epoch": 1.06, + "learning_rate": 4.5822593181344575e-05, + "loss": 1.0686, + "step": 6625 + }, + { + "epoch": 1.06, + "learning_rate": 4.58211653090544e-05, + "loss": 1.1077, + "step": 6626 + }, + { + "epoch": 1.06, + "learning_rate": 4.581973721503147e-05, + "loss": 1.1398, + "step": 6627 + }, + { + "epoch": 1.06, + "learning_rate": 4.5818308899290974e-05, + "loss": 1.1934, + "step": 6628 + }, + { + "epoch": 1.06, + "learning_rate": 4.581688036184812e-05, + "loss": 1.1044, + "step": 6629 + }, + { + "epoch": 1.06, + "learning_rate": 4.581545160271814e-05, + "loss": 1.1935, + "step": 6630 + }, + { + "epoch": 1.06, + "learning_rate": 4.581402262191623e-05, + "loss": 1.1544, + "step": 6631 + }, + { + "epoch": 1.06, + "learning_rate": 4.581259341945762e-05, + "loss": 1.0132, + "step": 6632 + }, + { + "epoch": 1.06, + "learning_rate": 4.581116399535752e-05, + "loss": 1.0858, + "step": 6633 + }, + { + "epoch": 1.06, + "learning_rate": 4.580973434963116e-05, + "loss": 1.1458, + "step": 6634 + }, + { + "epoch": 1.06, + "learning_rate": 4.580830448229376e-05, + "loss": 1.0623, + "step": 6635 + }, + { + "epoch": 1.06, + "learning_rate": 4.5806874393360555e-05, + "loss": 1.1649, + "step": 6636 + }, + { + "epoch": 1.06, + "learning_rate": 4.580544408284677e-05, + "loss": 1.2349, + "step": 6637 + }, + { + "epoch": 1.06, + "learning_rate": 4.580401355076764e-05, + "loss": 1.1365, + "step": 6638 + }, + { + "epoch": 1.06, + "learning_rate": 4.5802582797138395e-05, + "loss": 1.0702, + "step": 6639 + }, + { + "epoch": 1.06, + "learning_rate": 4.580115182197427e-05, + "loss": 1.1053, + "step": 6640 + }, + { + "epoch": 1.06, + "learning_rate": 4.5799720625290505e-05, + "loss": 1.1834, + "step": 6641 + }, + { + "epoch": 1.06, + "learning_rate": 4.579828920710234e-05, + "loss": 1.1393, + "step": 6642 + }, + { + "epoch": 1.06, + "learning_rate": 4.579685756742503e-05, + "loss": 1.1026, + "step": 6643 + }, + { + "epoch": 1.06, + "learning_rate": 4.579542570627381e-05, + "loss": 1.1709, + "step": 6644 + }, + { + "epoch": 1.06, + "learning_rate": 4.579399362366393e-05, + "loss": 1.179, + "step": 6645 + }, + { + "epoch": 1.06, + "learning_rate": 4.579256131961064e-05, + "loss": 1.1975, + "step": 6646 + }, + { + "epoch": 1.06, + "learning_rate": 4.57911287941292e-05, + "loss": 1.065, + "step": 6647 + }, + { + "epoch": 1.06, + "learning_rate": 4.578969604723486e-05, + "loss": 1.1574, + "step": 6648 + }, + { + "epoch": 1.06, + "learning_rate": 4.578826307894288e-05, + "loss": 1.06, + "step": 6649 + }, + { + "epoch": 1.06, + "learning_rate": 4.5786829889268523e-05, + "loss": 1.1091, + "step": 6650 + }, + { + "epoch": 1.06, + "learning_rate": 4.5785396478227036e-05, + "loss": 1.104, + "step": 6651 + }, + { + "epoch": 1.06, + "learning_rate": 4.578396284583371e-05, + "loss": 1.1554, + "step": 6652 + }, + { + "epoch": 1.06, + "learning_rate": 4.5782528992103786e-05, + "loss": 1.099, + "step": 6653 + }, + { + "epoch": 1.06, + "learning_rate": 4.5781094917052544e-05, + "loss": 1.2183, + "step": 6654 + }, + { + "epoch": 1.06, + "learning_rate": 4.577966062069527e-05, + "loss": 1.0902, + "step": 6655 + }, + { + "epoch": 1.06, + "learning_rate": 4.5778226103047216e-05, + "loss": 1.0763, + "step": 6656 + }, + { + "epoch": 1.06, + "learning_rate": 4.577679136412367e-05, + "loss": 1.0574, + "step": 6657 + }, + { + "epoch": 1.06, + "learning_rate": 4.577535640393992e-05, + "loss": 1.0845, + "step": 6658 + }, + { + "epoch": 1.06, + "learning_rate": 4.577392122251122e-05, + "loss": 1.2161, + "step": 6659 + }, + { + "epoch": 1.06, + "learning_rate": 4.577248581985288e-05, + "loss": 1.0702, + "step": 6660 + }, + { + "epoch": 1.06, + "learning_rate": 4.577105019598018e-05, + "loss": 1.1717, + "step": 6661 + }, + { + "epoch": 1.06, + "learning_rate": 4.5769614350908404e-05, + "loss": 1.0825, + "step": 6662 + }, + { + "epoch": 1.06, + "learning_rate": 4.576817828465285e-05, + "loss": 1.0968, + "step": 6663 + }, + { + "epoch": 1.06, + "learning_rate": 4.5766741997228805e-05, + "loss": 1.1091, + "step": 6664 + }, + { + "epoch": 1.06, + "learning_rate": 4.576530548865155e-05, + "loss": 1.0406, + "step": 6665 + }, + { + "epoch": 1.06, + "learning_rate": 4.5763868758936415e-05, + "loss": 1.1086, + "step": 6666 + }, + { + "epoch": 1.06, + "learning_rate": 4.576243180809867e-05, + "loss": 1.057, + "step": 6667 + }, + { + "epoch": 1.06, + "learning_rate": 4.576099463615364e-05, + "loss": 1.1321, + "step": 6668 + }, + { + "epoch": 1.06, + "learning_rate": 4.575955724311663e-05, + "loss": 1.0732, + "step": 6669 + }, + { + "epoch": 1.06, + "learning_rate": 4.575811962900293e-05, + "loss": 1.1224, + "step": 6670 + }, + { + "epoch": 1.06, + "learning_rate": 4.575668179382785e-05, + "loss": 1.0581, + "step": 6671 + }, + { + "epoch": 1.06, + "learning_rate": 4.575524373760672e-05, + "loss": 1.1146, + "step": 6672 + }, + { + "epoch": 1.06, + "learning_rate": 4.575380546035485e-05, + "loss": 1.0705, + "step": 6673 + }, + { + "epoch": 1.06, + "learning_rate": 4.575236696208754e-05, + "loss": 1.0793, + "step": 6674 + }, + { + "epoch": 1.06, + "learning_rate": 4.575092824282013e-05, + "loss": 1.0352, + "step": 6675 + }, + { + "epoch": 1.06, + "learning_rate": 4.574948930256793e-05, + "loss": 1.1006, + "step": 6676 + }, + { + "epoch": 1.06, + "learning_rate": 4.574805014134626e-05, + "loss": 1.1122, + "step": 6677 + }, + { + "epoch": 1.06, + "learning_rate": 4.574661075917046e-05, + "loss": 1.1236, + "step": 6678 + }, + { + "epoch": 1.06, + "learning_rate": 4.5745171156055844e-05, + "loss": 1.1619, + "step": 6679 + }, + { + "epoch": 1.06, + "learning_rate": 4.574373133201776e-05, + "loss": 1.0216, + "step": 6680 + }, + { + "epoch": 1.06, + "learning_rate": 4.574229128707152e-05, + "loss": 1.1541, + "step": 6681 + }, + { + "epoch": 1.06, + "learning_rate": 4.574085102123248e-05, + "loss": 1.0233, + "step": 6682 + }, + { + "epoch": 1.06, + "learning_rate": 4.5739410534515966e-05, + "loss": 1.0944, + "step": 6683 + }, + { + "epoch": 1.06, + "learning_rate": 4.5737969826937324e-05, + "loss": 1.1032, + "step": 6684 + }, + { + "epoch": 1.06, + "learning_rate": 4.5736528898511884e-05, + "loss": 1.1053, + "step": 6685 + }, + { + "epoch": 1.07, + "learning_rate": 4.5735087749255e-05, + "loss": 1.0209, + "step": 6686 + }, + { + "epoch": 1.07, + "learning_rate": 4.573364637918203e-05, + "loss": 1.1524, + "step": 6687 + }, + { + "epoch": 1.07, + "learning_rate": 4.573220478830831e-05, + "loss": 1.1027, + "step": 6688 + }, + { + "epoch": 1.07, + "learning_rate": 4.57307629766492e-05, + "loss": 1.094, + "step": 6689 + }, + { + "epoch": 1.07, + "learning_rate": 4.572932094422004e-05, + "loss": 1.0859, + "step": 6690 + }, + { + "epoch": 1.07, + "learning_rate": 4.572787869103621e-05, + "loss": 1.0777, + "step": 6691 + }, + { + "epoch": 1.07, + "learning_rate": 4.5726436217113045e-05, + "loss": 1.1641, + "step": 6692 + }, + { + "epoch": 1.07, + "learning_rate": 4.572499352246592e-05, + "loss": 1.1806, + "step": 6693 + }, + { + "epoch": 1.07, + "learning_rate": 4.57235506071102e-05, + "loss": 1.0214, + "step": 6694 + }, + { + "epoch": 1.07, + "learning_rate": 4.5722107471061246e-05, + "loss": 1.1597, + "step": 6695 + }, + { + "epoch": 1.07, + "learning_rate": 4.5720664114334424e-05, + "loss": 1.1737, + "step": 6696 + }, + { + "epoch": 1.07, + "learning_rate": 4.571922053694512e-05, + "loss": 1.1128, + "step": 6697 + }, + { + "epoch": 1.07, + "learning_rate": 4.571777673890869e-05, + "loss": 1.1895, + "step": 6698 + }, + { + "epoch": 1.07, + "learning_rate": 4.571633272024051e-05, + "loss": 1.0747, + "step": 6699 + }, + { + "epoch": 1.07, + "learning_rate": 4.571488848095597e-05, + "loss": 1.0742, + "step": 6700 + }, + { + "epoch": 1.07, + "learning_rate": 4.571344402107045e-05, + "loss": 1.1388, + "step": 6701 + }, + { + "epoch": 1.07, + "learning_rate": 4.571199934059931e-05, + "loss": 1.1016, + "step": 6702 + }, + { + "epoch": 1.07, + "learning_rate": 4.571055443955796e-05, + "loss": 1.0879, + "step": 6703 + }, + { + "epoch": 1.07, + "learning_rate": 4.5709109317961786e-05, + "loss": 1.1144, + "step": 6704 + }, + { + "epoch": 1.07, + "learning_rate": 4.5707663975826165e-05, + "loss": 1.061, + "step": 6705 + }, + { + "epoch": 1.07, + "learning_rate": 4.570621841316649e-05, + "loss": 1.1142, + "step": 6706 + }, + { + "epoch": 1.07, + "learning_rate": 4.570477262999816e-05, + "loss": 1.0972, + "step": 6707 + }, + { + "epoch": 1.07, + "learning_rate": 4.5703326626336586e-05, + "loss": 1.1532, + "step": 6708 + }, + { + "epoch": 1.07, + "learning_rate": 4.570188040219714e-05, + "loss": 1.2085, + "step": 6709 + }, + { + "epoch": 1.07, + "learning_rate": 4.5700433957595234e-05, + "loss": 1.0962, + "step": 6710 + }, + { + "epoch": 1.07, + "learning_rate": 4.569898729254628e-05, + "loss": 1.1296, + "step": 6711 + }, + { + "epoch": 1.07, + "learning_rate": 4.5697540407065674e-05, + "loss": 1.1743, + "step": 6712 + }, + { + "epoch": 1.07, + "learning_rate": 4.569609330116884e-05, + "loss": 1.1026, + "step": 6713 + }, + { + "epoch": 1.07, + "learning_rate": 4.569464597487116e-05, + "loss": 1.1192, + "step": 6714 + }, + { + "epoch": 1.07, + "learning_rate": 4.5693198428188077e-05, + "loss": 1.0553, + "step": 6715 + }, + { + "epoch": 1.07, + "learning_rate": 4.569175066113499e-05, + "loss": 1.1649, + "step": 6716 + }, + { + "epoch": 1.07, + "learning_rate": 4.5690302673727313e-05, + "loss": 1.159, + "step": 6717 + }, + { + "epoch": 1.07, + "learning_rate": 4.568885446598049e-05, + "loss": 1.0996, + "step": 6718 + }, + { + "epoch": 1.07, + "learning_rate": 4.568740603790992e-05, + "loss": 1.0905, + "step": 6719 + }, + { + "epoch": 1.07, + "learning_rate": 4.568595738953103e-05, + "loss": 1.1593, + "step": 6720 + }, + { + "epoch": 1.07, + "learning_rate": 4.5684508520859256e-05, + "loss": 1.0569, + "step": 6721 + }, + { + "epoch": 1.07, + "learning_rate": 4.5683059431910026e-05, + "loss": 1.098, + "step": 6722 + }, + { + "epoch": 1.07, + "learning_rate": 4.568161012269877e-05, + "loss": 1.1535, + "step": 6723 + }, + { + "epoch": 1.07, + "learning_rate": 4.568016059324092e-05, + "loss": 1.1059, + "step": 6724 + }, + { + "epoch": 1.07, + "learning_rate": 4.5678710843551914e-05, + "loss": 1.1196, + "step": 6725 + }, + { + "epoch": 1.07, + "learning_rate": 4.567726087364719e-05, + "loss": 1.1111, + "step": 6726 + }, + { + "epoch": 1.07, + "learning_rate": 4.56758106835422e-05, + "loss": 1.0807, + "step": 6727 + }, + { + "epoch": 1.07, + "learning_rate": 4.567436027325237e-05, + "loss": 1.1361, + "step": 6728 + }, + { + "epoch": 1.07, + "learning_rate": 4.567290964279316e-05, + "loss": 1.1884, + "step": 6729 + }, + { + "epoch": 1.07, + "learning_rate": 4.567145879218001e-05, + "loss": 1.1128, + "step": 6730 + }, + { + "epoch": 1.07, + "learning_rate": 4.567000772142838e-05, + "loss": 1.171, + "step": 6731 + }, + { + "epoch": 1.07, + "learning_rate": 4.566855643055371e-05, + "loss": 1.1382, + "step": 6732 + }, + { + "epoch": 1.07, + "learning_rate": 4.5667104919571466e-05, + "loss": 1.0147, + "step": 6733 + }, + { + "epoch": 1.07, + "learning_rate": 4.5665653188497096e-05, + "loss": 1.081, + "step": 6734 + }, + { + "epoch": 1.07, + "learning_rate": 4.566420123734607e-05, + "loss": 1.0664, + "step": 6735 + }, + { + "epoch": 1.07, + "learning_rate": 4.5662749066133845e-05, + "loss": 1.0882, + "step": 6736 + }, + { + "epoch": 1.07, + "learning_rate": 4.566129667487589e-05, + "loss": 1.0823, + "step": 6737 + }, + { + "epoch": 1.07, + "learning_rate": 4.565984406358767e-05, + "loss": 1.0294, + "step": 6738 + }, + { + "epoch": 1.07, + "learning_rate": 4.565839123228465e-05, + "loss": 1.0594, + "step": 6739 + }, + { + "epoch": 1.07, + "learning_rate": 4.565693818098231e-05, + "loss": 1.0475, + "step": 6740 + }, + { + "epoch": 1.07, + "learning_rate": 4.565548490969611e-05, + "loss": 1.1784, + "step": 6741 + }, + { + "epoch": 1.07, + "learning_rate": 4.5654031418441545e-05, + "loss": 1.1423, + "step": 6742 + }, + { + "epoch": 1.07, + "learning_rate": 4.5652577707234084e-05, + "loss": 1.105, + "step": 6743 + }, + { + "epoch": 1.07, + "learning_rate": 4.565112377608921e-05, + "loss": 1.1034, + "step": 6744 + }, + { + "epoch": 1.07, + "learning_rate": 4.56496696250224e-05, + "loss": 1.0971, + "step": 6745 + }, + { + "epoch": 1.07, + "learning_rate": 4.564821525404915e-05, + "loss": 1.1483, + "step": 6746 + }, + { + "epoch": 1.07, + "learning_rate": 4.564676066318494e-05, + "loss": 1.0925, + "step": 6747 + }, + { + "epoch": 1.08, + "learning_rate": 4.5645305852445266e-05, + "loss": 1.1117, + "step": 6748 + }, + { + "epoch": 1.08, + "learning_rate": 4.564385082184561e-05, + "loss": 1.0649, + "step": 6749 + }, + { + "epoch": 1.08, + "learning_rate": 4.564239557140149e-05, + "loss": 1.0614, + "step": 6750 + }, + { + "epoch": 1.08, + "learning_rate": 4.564094010112838e-05, + "loss": 1.0866, + "step": 6751 + }, + { + "epoch": 1.08, + "learning_rate": 4.563948441104179e-05, + "loss": 1.086, + "step": 6752 + }, + { + "epoch": 1.08, + "learning_rate": 4.5638028501157224e-05, + "loss": 1.1819, + "step": 6753 + }, + { + "epoch": 1.08, + "learning_rate": 4.563657237149019e-05, + "loss": 1.0518, + "step": 6754 + }, + { + "epoch": 1.08, + "learning_rate": 4.563511602205618e-05, + "loss": 1.0395, + "step": 6755 + }, + { + "epoch": 1.08, + "learning_rate": 4.5633659452870717e-05, + "loss": 1.2299, + "step": 6756 + }, + { + "epoch": 1.08, + "learning_rate": 4.563220266394931e-05, + "loss": 1.2229, + "step": 6757 + }, + { + "epoch": 1.08, + "learning_rate": 4.563074565530746e-05, + "loss": 1.0503, + "step": 6758 + }, + { + "epoch": 1.08, + "learning_rate": 4.56292884269607e-05, + "loss": 1.2074, + "step": 6759 + }, + { + "epoch": 1.08, + "learning_rate": 4.562783097892455e-05, + "loss": 1.1008, + "step": 6760 + }, + { + "epoch": 1.08, + "learning_rate": 4.5626373311214515e-05, + "loss": 1.1765, + "step": 6761 + }, + { + "epoch": 1.08, + "learning_rate": 4.5624915423846124e-05, + "loss": 1.0459, + "step": 6762 + }, + { + "epoch": 1.08, + "learning_rate": 4.5623457316834914e-05, + "loss": 1.0938, + "step": 6763 + }, + { + "epoch": 1.08, + "learning_rate": 4.56219989901964e-05, + "loss": 1.1751, + "step": 6764 + }, + { + "epoch": 1.08, + "learning_rate": 4.562054044394612e-05, + "loss": 1.0734, + "step": 6765 + }, + { + "epoch": 1.08, + "learning_rate": 4.56190816780996e-05, + "loss": 1.1475, + "step": 6766 + }, + { + "epoch": 1.08, + "learning_rate": 4.561762269267238e-05, + "loss": 1.0257, + "step": 6767 + }, + { + "epoch": 1.08, + "learning_rate": 4.561616348767999e-05, + "loss": 1.118, + "step": 6768 + }, + { + "epoch": 1.08, + "learning_rate": 4.561470406313798e-05, + "loss": 1.1418, + "step": 6769 + }, + { + "epoch": 1.08, + "learning_rate": 4.561324441906189e-05, + "loss": 1.1024, + "step": 6770 + }, + { + "epoch": 1.08, + "learning_rate": 4.561178455546726e-05, + "loss": 1.1309, + "step": 6771 + }, + { + "epoch": 1.08, + "learning_rate": 4.561032447236964e-05, + "loss": 1.0575, + "step": 6772 + }, + { + "epoch": 1.08, + "learning_rate": 4.5608864169784574e-05, + "loss": 1.0799, + "step": 6773 + }, + { + "epoch": 1.08, + "learning_rate": 4.560740364772761e-05, + "loss": 1.1204, + "step": 6774 + }, + { + "epoch": 1.08, + "learning_rate": 4.560594290621432e-05, + "loss": 1.1523, + "step": 6775 + }, + { + "epoch": 1.08, + "learning_rate": 4.5604481945260236e-05, + "loss": 1.0871, + "step": 6776 + }, + { + "epoch": 1.08, + "learning_rate": 4.560302076488094e-05, + "loss": 1.0871, + "step": 6777 + }, + { + "epoch": 1.08, + "learning_rate": 4.560155936509197e-05, + "loss": 1.1302, + "step": 6778 + }, + { + "epoch": 1.08, + "learning_rate": 4.560009774590891e-05, + "loss": 1.0155, + "step": 6779 + }, + { + "epoch": 1.08, + "learning_rate": 4.5598635907347314e-05, + "loss": 1.0656, + "step": 6780 + }, + { + "epoch": 1.08, + "learning_rate": 4.559717384942275e-05, + "loss": 1.1349, + "step": 6781 + }, + { + "epoch": 1.08, + "learning_rate": 4.559571157215079e-05, + "loss": 1.0815, + "step": 6782 + }, + { + "epoch": 1.08, + "learning_rate": 4.5594249075547e-05, + "loss": 1.0977, + "step": 6783 + }, + { + "epoch": 1.08, + "learning_rate": 4.559278635962697e-05, + "loss": 0.988, + "step": 6784 + }, + { + "epoch": 1.08, + "learning_rate": 4.559132342440626e-05, + "loss": 1.1525, + "step": 6785 + }, + { + "epoch": 1.08, + "learning_rate": 4.558986026990046e-05, + "loss": 1.0483, + "step": 6786 + }, + { + "epoch": 1.08, + "learning_rate": 4.558839689612515e-05, + "loss": 1.1423, + "step": 6787 + }, + { + "epoch": 1.08, + "learning_rate": 4.558693330309591e-05, + "loss": 0.9985, + "step": 6788 + }, + { + "epoch": 1.08, + "learning_rate": 4.558546949082833e-05, + "loss": 1.1728, + "step": 6789 + }, + { + "epoch": 1.08, + "learning_rate": 4.5584005459337995e-05, + "loss": 1.1227, + "step": 6790 + }, + { + "epoch": 1.08, + "learning_rate": 4.55825412086405e-05, + "loss": 1.0981, + "step": 6791 + }, + { + "epoch": 1.08, + "learning_rate": 4.5581076738751444e-05, + "loss": 1.0737, + "step": 6792 + }, + { + "epoch": 1.08, + "learning_rate": 4.557961204968641e-05, + "loss": 1.0835, + "step": 6793 + }, + { + "epoch": 1.08, + "learning_rate": 4.5578147141461006e-05, + "loss": 1.083, + "step": 6794 + }, + { + "epoch": 1.08, + "learning_rate": 4.5576682014090824e-05, + "loss": 1.03, + "step": 6795 + }, + { + "epoch": 1.08, + "learning_rate": 4.557521666759147e-05, + "loss": 1.0824, + "step": 6796 + }, + { + "epoch": 1.08, + "learning_rate": 4.5573751101978545e-05, + "loss": 1.1033, + "step": 6797 + }, + { + "epoch": 1.08, + "learning_rate": 4.557228531726767e-05, + "loss": 1.023, + "step": 6798 + }, + { + "epoch": 1.08, + "learning_rate": 4.5570819313474445e-05, + "loss": 1.0732, + "step": 6799 + }, + { + "epoch": 1.08, + "learning_rate": 4.556935309061448e-05, + "loss": 1.0929, + "step": 6800 + }, + { + "epoch": 1.08, + "learning_rate": 4.55678866487034e-05, + "loss": 1.1804, + "step": 6801 + }, + { + "epoch": 1.08, + "learning_rate": 4.55664199877568e-05, + "loss": 1.0457, + "step": 6802 + }, + { + "epoch": 1.08, + "learning_rate": 4.556495310779032e-05, + "loss": 1.1765, + "step": 6803 + }, + { + "epoch": 1.08, + "learning_rate": 4.556348600881958e-05, + "loss": 1.023, + "step": 6804 + }, + { + "epoch": 1.08, + "learning_rate": 4.5562018690860195e-05, + "loss": 1.0896, + "step": 6805 + }, + { + "epoch": 1.08, + "learning_rate": 4.55605511539278e-05, + "loss": 1.019, + "step": 6806 + }, + { + "epoch": 1.08, + "learning_rate": 4.5559083398038e-05, + "loss": 1.1094, + "step": 6807 + }, + { + "epoch": 1.08, + "learning_rate": 4.555761542320646e-05, + "loss": 1.0963, + "step": 6808 + }, + { + "epoch": 1.08, + "learning_rate": 4.5556147229448795e-05, + "loss": 1.0458, + "step": 6809 + }, + { + "epoch": 1.08, + "learning_rate": 4.5554678816780636e-05, + "loss": 1.0125, + "step": 6810 + }, + { + "epoch": 1.09, + "learning_rate": 4.555321018521763e-05, + "loss": 1.093, + "step": 6811 + }, + { + "epoch": 1.09, + "learning_rate": 4.555174133477542e-05, + "loss": 1.1909, + "step": 6812 + }, + { + "epoch": 1.09, + "learning_rate": 4.5550272265469626e-05, + "loss": 1.1089, + "step": 6813 + }, + { + "epoch": 1.09, + "learning_rate": 4.554880297731593e-05, + "loss": 1.1151, + "step": 6814 + }, + { + "epoch": 1.09, + "learning_rate": 4.5547333470329945e-05, + "loss": 0.9906, + "step": 6815 + }, + { + "epoch": 1.09, + "learning_rate": 4.554586374452733e-05, + "loss": 1.0876, + "step": 6816 + }, + { + "epoch": 1.09, + "learning_rate": 4.5544393799923744e-05, + "loss": 1.1986, + "step": 6817 + }, + { + "epoch": 1.09, + "learning_rate": 4.554292363653484e-05, + "loss": 1.0299, + "step": 6818 + }, + { + "epoch": 1.09, + "learning_rate": 4.554145325437627e-05, + "loss": 1.0369, + "step": 6819 + }, + { + "epoch": 1.09, + "learning_rate": 4.5539982653463684e-05, + "loss": 1.0861, + "step": 6820 + }, + { + "epoch": 1.09, + "learning_rate": 4.553851183381277e-05, + "loss": 1.0413, + "step": 6821 + }, + { + "epoch": 1.09, + "learning_rate": 4.553704079543916e-05, + "loss": 1.0595, + "step": 6822 + }, + { + "epoch": 1.09, + "learning_rate": 4.5535569538358536e-05, + "loss": 1.1673, + "step": 6823 + }, + { + "epoch": 1.09, + "learning_rate": 4.553409806258657e-05, + "loss": 1.066, + "step": 6824 + }, + { + "epoch": 1.09, + "learning_rate": 4.553262636813892e-05, + "loss": 1.0655, + "step": 6825 + }, + { + "epoch": 1.09, + "learning_rate": 4.553115445503127e-05, + "loss": 1.0837, + "step": 6826 + }, + { + "epoch": 1.09, + "learning_rate": 4.552968232327929e-05, + "loss": 1.0698, + "step": 6827 + }, + { + "epoch": 1.09, + "learning_rate": 4.552820997289865e-05, + "loss": 1.0638, + "step": 6828 + }, + { + "epoch": 1.09, + "learning_rate": 4.552673740390504e-05, + "loss": 1.0529, + "step": 6829 + }, + { + "epoch": 1.09, + "learning_rate": 4.5525264616314146e-05, + "loss": 1.1742, + "step": 6830 + }, + { + "epoch": 1.09, + "learning_rate": 4.552379161014164e-05, + "loss": 1.0702, + "step": 6831 + }, + { + "epoch": 1.09, + "learning_rate": 4.5522318385403215e-05, + "loss": 1.1351, + "step": 6832 + }, + { + "epoch": 1.09, + "learning_rate": 4.552084494211455e-05, + "loss": 1.0236, + "step": 6833 + }, + { + "epoch": 1.09, + "learning_rate": 4.551937128029136e-05, + "loss": 1.1283, + "step": 6834 + }, + { + "epoch": 1.09, + "learning_rate": 4.551789739994931e-05, + "loss": 1.1707, + "step": 6835 + }, + { + "epoch": 1.09, + "learning_rate": 4.551642330110412e-05, + "loss": 1.0755, + "step": 6836 + }, + { + "epoch": 1.09, + "learning_rate": 4.5514948983771466e-05, + "loss": 1.1138, + "step": 6837 + }, + { + "epoch": 1.09, + "learning_rate": 4.551347444796706e-05, + "loss": 1.0718, + "step": 6838 + }, + { + "epoch": 1.09, + "learning_rate": 4.551199969370662e-05, + "loss": 1.0558, + "step": 6839 + }, + { + "epoch": 1.09, + "learning_rate": 4.551052472100582e-05, + "loss": 1.1325, + "step": 6840 + }, + { + "epoch": 1.09, + "learning_rate": 4.55090495298804e-05, + "loss": 1.1166, + "step": 6841 + }, + { + "epoch": 1.09, + "learning_rate": 4.550757412034604e-05, + "loss": 1.1174, + "step": 6842 + }, + { + "epoch": 1.09, + "learning_rate": 4.550609849241847e-05, + "loss": 1.1243, + "step": 6843 + }, + { + "epoch": 1.09, + "learning_rate": 4.55046226461134e-05, + "loss": 1.1589, + "step": 6844 + }, + { + "epoch": 1.09, + "learning_rate": 4.550314658144655e-05, + "loss": 1.2494, + "step": 6845 + }, + { + "epoch": 1.09, + "learning_rate": 4.550167029843364e-05, + "loss": 1.0377, + "step": 6846 + }, + { + "epoch": 1.09, + "learning_rate": 4.550019379709038e-05, + "loss": 1.0556, + "step": 6847 + }, + { + "epoch": 1.09, + "learning_rate": 4.549871707743251e-05, + "loss": 1.1125, + "step": 6848 + }, + { + "epoch": 1.09, + "learning_rate": 4.549724013947575e-05, + "loss": 1.0394, + "step": 6849 + }, + { + "epoch": 1.09, + "learning_rate": 4.5495762983235827e-05, + "loss": 1.1534, + "step": 6850 + }, + { + "epoch": 1.09, + "learning_rate": 4.549428560872846e-05, + "loss": 1.0655, + "step": 6851 + }, + { + "epoch": 1.09, + "learning_rate": 4.5492808015969405e-05, + "loss": 1.0503, + "step": 6852 + }, + { + "epoch": 1.09, + "learning_rate": 4.549133020497437e-05, + "loss": 1.0624, + "step": 6853 + }, + { + "epoch": 1.09, + "learning_rate": 4.548985217575912e-05, + "loss": 1.1185, + "step": 6854 + }, + { + "epoch": 1.09, + "learning_rate": 4.548837392833939e-05, + "loss": 1.075, + "step": 6855 + }, + { + "epoch": 1.09, + "learning_rate": 4.548689546273091e-05, + "loss": 1.0985, + "step": 6856 + }, + { + "epoch": 1.09, + "learning_rate": 4.548541677894943e-05, + "loss": 1.0642, + "step": 6857 + }, + { + "epoch": 1.09, + "learning_rate": 4.54839378770107e-05, + "loss": 1.0713, + "step": 6858 + }, + { + "epoch": 1.09, + "learning_rate": 4.548245875693047e-05, + "loss": 1.0819, + "step": 6859 + }, + { + "epoch": 1.09, + "learning_rate": 4.5480979418724486e-05, + "loss": 1.1446, + "step": 6860 + }, + { + "epoch": 1.09, + "learning_rate": 4.54794998624085e-05, + "loss": 1.0101, + "step": 6861 + }, + { + "epoch": 1.09, + "learning_rate": 4.547802008799828e-05, + "loss": 1.0387, + "step": 6862 + }, + { + "epoch": 1.09, + "learning_rate": 4.5476540095509585e-05, + "loss": 1.1335, + "step": 6863 + }, + { + "epoch": 1.09, + "learning_rate": 4.547505988495816e-05, + "loss": 0.9934, + "step": 6864 + }, + { + "epoch": 1.09, + "learning_rate": 4.5473579456359783e-05, + "loss": 1.0927, + "step": 6865 + }, + { + "epoch": 1.09, + "learning_rate": 4.5472098809730215e-05, + "loss": 0.9716, + "step": 6866 + }, + { + "epoch": 1.09, + "learning_rate": 4.547061794508522e-05, + "loss": 1.0912, + "step": 6867 + }, + { + "epoch": 1.09, + "learning_rate": 4.546913686244058e-05, + "loss": 1.1646, + "step": 6868 + }, + { + "epoch": 1.09, + "learning_rate": 4.5467655561812046e-05, + "loss": 1.0243, + "step": 6869 + }, + { + "epoch": 1.09, + "learning_rate": 4.5466174043215415e-05, + "loss": 1.1138, + "step": 6870 + }, + { + "epoch": 1.09, + "learning_rate": 4.546469230666645e-05, + "loss": 1.0313, + "step": 6871 + }, + { + "epoch": 1.09, + "learning_rate": 4.546321035218094e-05, + "loss": 1.047, + "step": 6872 + }, + { + "epoch": 1.09, + "learning_rate": 4.546172817977467e-05, + "loss": 1.0817, + "step": 6873 + }, + { + "epoch": 1.1, + "learning_rate": 4.546024578946341e-05, + "loss": 1.077, + "step": 6874 + }, + { + "epoch": 1.1, + "learning_rate": 4.5458763181262955e-05, + "loss": 1.05, + "step": 6875 + }, + { + "epoch": 1.1, + "learning_rate": 4.545728035518909e-05, + "loss": 1.0841, + "step": 6876 + }, + { + "epoch": 1.1, + "learning_rate": 4.5455797311257616e-05, + "loss": 1.0372, + "step": 6877 + }, + { + "epoch": 1.1, + "learning_rate": 4.5454314049484316e-05, + "loss": 1.1287, + "step": 6878 + }, + { + "epoch": 1.1, + "learning_rate": 4.545283056988499e-05, + "loss": 1.0306, + "step": 6879 + }, + { + "epoch": 1.1, + "learning_rate": 4.5451346872475434e-05, + "loss": 1.1196, + "step": 6880 + }, + { + "epoch": 1.1, + "learning_rate": 4.544986295727145e-05, + "loss": 1.0552, + "step": 6881 + }, + { + "epoch": 1.1, + "learning_rate": 4.544837882428884e-05, + "loss": 1.0813, + "step": 6882 + }, + { + "epoch": 1.1, + "learning_rate": 4.544689447354341e-05, + "loss": 1.1256, + "step": 6883 + }, + { + "epoch": 1.1, + "learning_rate": 4.544540990505097e-05, + "loss": 1.1689, + "step": 6884 + }, + { + "epoch": 1.1, + "learning_rate": 4.5443925118827334e-05, + "loss": 1.1214, + "step": 6885 + }, + { + "epoch": 1.1, + "learning_rate": 4.5442440114888296e-05, + "loss": 1.1331, + "step": 6886 + }, + { + "epoch": 1.1, + "learning_rate": 4.544095489324969e-05, + "loss": 1.0941, + "step": 6887 + }, + { + "epoch": 1.1, + "learning_rate": 4.543946945392732e-05, + "loss": 1.0711, + "step": 6888 + }, + { + "epoch": 1.1, + "learning_rate": 4.5437983796937014e-05, + "loss": 1.1472, + "step": 6889 + }, + { + "epoch": 1.1, + "learning_rate": 4.543649792229458e-05, + "loss": 1.0676, + "step": 6890 + }, + { + "epoch": 1.1, + "learning_rate": 4.543501183001585e-05, + "loss": 1.1444, + "step": 6891 + }, + { + "epoch": 1.1, + "learning_rate": 4.543352552011666e-05, + "loss": 1.0724, + "step": 6892 + }, + { + "epoch": 1.1, + "learning_rate": 4.5432038992612824e-05, + "loss": 1.0718, + "step": 6893 + }, + { + "epoch": 1.1, + "learning_rate": 4.543055224752018e-05, + "loss": 1.1056, + "step": 6894 + }, + { + "epoch": 1.1, + "learning_rate": 4.542906528485456e-05, + "loss": 0.992, + "step": 6895 + }, + { + "epoch": 1.1, + "learning_rate": 4.542757810463178e-05, + "loss": 1.0372, + "step": 6896 + }, + { + "epoch": 1.1, + "learning_rate": 4.542609070686771e-05, + "loss": 1.0767, + "step": 6897 + }, + { + "epoch": 1.1, + "learning_rate": 4.542460309157818e-05, + "loss": 1.0566, + "step": 6898 + }, + { + "epoch": 1.1, + "learning_rate": 4.542311525877902e-05, + "loss": 1.0111, + "step": 6899 + }, + { + "epoch": 1.1, + "learning_rate": 4.542162720848608e-05, + "loss": 1.0395, + "step": 6900 + }, + { + "epoch": 1.1, + "learning_rate": 4.5420138940715204e-05, + "loss": 0.9849, + "step": 6901 + }, + { + "epoch": 1.1, + "learning_rate": 4.541865045548226e-05, + "loss": 1.13, + "step": 6902 + }, + { + "epoch": 1.1, + "learning_rate": 4.541716175280307e-05, + "loss": 1.0783, + "step": 6903 + }, + { + "epoch": 1.1, + "learning_rate": 4.541567283269351e-05, + "loss": 1.1191, + "step": 6904 + }, + { + "epoch": 1.1, + "learning_rate": 4.541418369516943e-05, + "loss": 1.1003, + "step": 6905 + }, + { + "epoch": 1.1, + "learning_rate": 4.5412694340246694e-05, + "loss": 1.087, + "step": 6906 + }, + { + "epoch": 1.1, + "learning_rate": 4.5411204767941145e-05, + "loss": 1.1143, + "step": 6907 + }, + { + "epoch": 1.1, + "learning_rate": 4.540971497826866e-05, + "loss": 1.0855, + "step": 6908 + }, + { + "epoch": 1.1, + "learning_rate": 4.540822497124511e-05, + "loss": 1.0882, + "step": 6909 + }, + { + "epoch": 1.1, + "learning_rate": 4.540673474688634e-05, + "loss": 1.0778, + "step": 6910 + }, + { + "epoch": 1.1, + "learning_rate": 4.540524430520824e-05, + "loss": 1.0745, + "step": 6911 + }, + { + "epoch": 1.1, + "learning_rate": 4.5403753646226676e-05, + "loss": 1.1367, + "step": 6912 + }, + { + "epoch": 1.1, + "learning_rate": 4.540226276995753e-05, + "loss": 1.0374, + "step": 6913 + }, + { + "epoch": 1.1, + "learning_rate": 4.540077167641667e-05, + "loss": 1.1299, + "step": 6914 + }, + { + "epoch": 1.1, + "learning_rate": 4.5399280365619965e-05, + "loss": 1.0236, + "step": 6915 + }, + { + "epoch": 1.1, + "learning_rate": 4.539778883758332e-05, + "loss": 1.1743, + "step": 6916 + }, + { + "epoch": 1.1, + "learning_rate": 4.53962970923226e-05, + "loss": 1.0357, + "step": 6917 + }, + { + "epoch": 1.1, + "learning_rate": 4.539480512985371e-05, + "loss": 1.1455, + "step": 6918 + }, + { + "epoch": 1.1, + "learning_rate": 4.539331295019252e-05, + "loss": 1.0506, + "step": 6919 + }, + { + "epoch": 1.1, + "learning_rate": 4.5391820553354925e-05, + "loss": 1.0255, + "step": 6920 + }, + { + "epoch": 1.1, + "learning_rate": 4.5390327939356827e-05, + "loss": 1.1348, + "step": 6921 + }, + { + "epoch": 1.1, + "learning_rate": 4.5388835108214114e-05, + "loss": 1.1243, + "step": 6922 + }, + { + "epoch": 1.1, + "learning_rate": 4.5387342059942685e-05, + "loss": 1.1603, + "step": 6923 + }, + { + "epoch": 1.1, + "learning_rate": 4.5385848794558436e-05, + "loss": 1.0441, + "step": 6924 + }, + { + "epoch": 1.1, + "learning_rate": 4.5384355312077284e-05, + "loss": 1.0506, + "step": 6925 + }, + { + "epoch": 1.1, + "learning_rate": 4.538286161251513e-05, + "loss": 1.0341, + "step": 6926 + }, + { + "epoch": 1.1, + "learning_rate": 4.5381367695887855e-05, + "loss": 1.0243, + "step": 6927 + }, + { + "epoch": 1.1, + "learning_rate": 4.537987356221139e-05, + "loss": 1.0627, + "step": 6928 + }, + { + "epoch": 1.1, + "learning_rate": 4.5378379211501655e-05, + "loss": 1.0076, + "step": 6929 + }, + { + "epoch": 1.1, + "learning_rate": 4.5376884643774556e-05, + "loss": 1.0992, + "step": 6930 + }, + { + "epoch": 1.1, + "learning_rate": 4.537538985904599e-05, + "loss": 1.0702, + "step": 6931 + }, + { + "epoch": 1.1, + "learning_rate": 4.537389485733191e-05, + "loss": 1.0209, + "step": 6932 + }, + { + "epoch": 1.1, + "learning_rate": 4.537239963864821e-05, + "loss": 1.0067, + "step": 6933 + }, + { + "epoch": 1.1, + "learning_rate": 4.537090420301082e-05, + "loss": 1.1221, + "step": 6934 + }, + { + "epoch": 1.1, + "learning_rate": 4.536940855043567e-05, + "loss": 1.091, + "step": 6935 + }, + { + "epoch": 1.1, + "learning_rate": 4.536791268093869e-05, + "loss": 1.0678, + "step": 6936 + }, + { + "epoch": 1.11, + "learning_rate": 4.53664165945358e-05, + "loss": 1.1232, + "step": 6937 + }, + { + "epoch": 1.11, + "learning_rate": 4.536492029124294e-05, + "loss": 0.9357, + "step": 6938 + }, + { + "epoch": 1.11, + "learning_rate": 4.536342377107604e-05, + "loss": 1.0851, + "step": 6939 + }, + { + "epoch": 1.11, + "learning_rate": 4.536192703405105e-05, + "loss": 1.0156, + "step": 6940 + }, + { + "epoch": 1.11, + "learning_rate": 4.536043008018389e-05, + "loss": 1.0885, + "step": 6941 + }, + { + "epoch": 1.11, + "learning_rate": 4.535893290949051e-05, + "loss": 1.1479, + "step": 6942 + }, + { + "epoch": 1.11, + "learning_rate": 4.5357435521986855e-05, + "loss": 1.1214, + "step": 6943 + }, + { + "epoch": 1.11, + "learning_rate": 4.5355937917688877e-05, + "loss": 1.0964, + "step": 6944 + }, + { + "epoch": 1.11, + "learning_rate": 4.5354440096612514e-05, + "loss": 1.1117, + "step": 6945 + }, + { + "epoch": 1.11, + "learning_rate": 4.5352942058773725e-05, + "loss": 1.106, + "step": 6946 + }, + { + "epoch": 1.11, + "learning_rate": 4.5351443804188454e-05, + "loss": 1.0152, + "step": 6947 + }, + { + "epoch": 1.11, + "learning_rate": 4.5349945332872667e-05, + "loss": 1.0835, + "step": 6948 + }, + { + "epoch": 1.11, + "learning_rate": 4.5348446644842314e-05, + "loss": 1.0158, + "step": 6949 + }, + { + "epoch": 1.11, + "learning_rate": 4.5346947740113354e-05, + "loss": 1.0216, + "step": 6950 + }, + { + "epoch": 1.11, + "learning_rate": 4.5345448618701765e-05, + "loss": 1.0586, + "step": 6951 + }, + { + "epoch": 1.11, + "learning_rate": 4.534394928062349e-05, + "loss": 1.0878, + "step": 6952 + }, + { + "epoch": 1.11, + "learning_rate": 4.534244972589451e-05, + "loss": 1.0829, + "step": 6953 + }, + { + "epoch": 1.11, + "learning_rate": 4.534094995453079e-05, + "loss": 1.0911, + "step": 6954 + }, + { + "epoch": 1.11, + "learning_rate": 4.533944996654831e-05, + "loss": 1.0908, + "step": 6955 + }, + { + "epoch": 1.11, + "learning_rate": 4.533794976196303e-05, + "loss": 1.0293, + "step": 6956 + }, + { + "epoch": 1.11, + "learning_rate": 4.533644934079093e-05, + "loss": 1.0187, + "step": 6957 + }, + { + "epoch": 1.11, + "learning_rate": 4.533494870304799e-05, + "loss": 1.0246, + "step": 6958 + }, + { + "epoch": 1.11, + "learning_rate": 4.533344784875019e-05, + "loss": 1.0117, + "step": 6959 + }, + { + "epoch": 1.11, + "learning_rate": 4.5331946777913525e-05, + "loss": 1.0479, + "step": 6960 + }, + { + "epoch": 1.11, + "learning_rate": 4.5330445490553967e-05, + "loss": 0.9695, + "step": 6961 + }, + { + "epoch": 1.11, + "learning_rate": 4.532894398668751e-05, + "loss": 1.012, + "step": 6962 + }, + { + "epoch": 1.11, + "learning_rate": 4.5327442266330134e-05, + "loss": 1.085, + "step": 6963 + }, + { + "epoch": 1.11, + "learning_rate": 4.532594032949784e-05, + "loss": 1.1744, + "step": 6964 + }, + { + "epoch": 1.11, + "learning_rate": 4.5324438176206624e-05, + "loss": 1.1852, + "step": 6965 + }, + { + "epoch": 1.11, + "learning_rate": 4.532293580647248e-05, + "loss": 1.1115, + "step": 6966 + }, + { + "epoch": 1.11, + "learning_rate": 4.532143322031141e-05, + "loss": 1.0413, + "step": 6967 + }, + { + "epoch": 1.11, + "learning_rate": 4.531993041773941e-05, + "loss": 1.0749, + "step": 6968 + }, + { + "epoch": 1.11, + "learning_rate": 4.531842739877249e-05, + "loss": 1.1073, + "step": 6969 + }, + { + "epoch": 1.11, + "learning_rate": 4.531692416342666e-05, + "loss": 1.1038, + "step": 6970 + }, + { + "epoch": 1.11, + "learning_rate": 4.5315420711717914e-05, + "loss": 1.0914, + "step": 6971 + }, + { + "epoch": 1.11, + "learning_rate": 4.531391704366227e-05, + "loss": 1.1176, + "step": 6972 + }, + { + "epoch": 1.11, + "learning_rate": 4.531241315927575e-05, + "loss": 1.0513, + "step": 6973 + }, + { + "epoch": 1.11, + "learning_rate": 4.531090905857436e-05, + "loss": 1.0538, + "step": 6974 + }, + { + "epoch": 1.11, + "learning_rate": 4.5309404741574114e-05, + "loss": 1.1516, + "step": 6975 + }, + { + "epoch": 1.11, + "learning_rate": 4.530790020829104e-05, + "loss": 1.0935, + "step": 6976 + }, + { + "epoch": 1.11, + "learning_rate": 4.5306395458741155e-05, + "loss": 1.1694, + "step": 6977 + }, + { + "epoch": 1.11, + "learning_rate": 4.530489049294049e-05, + "loss": 1.0793, + "step": 6978 + }, + { + "epoch": 1.11, + "learning_rate": 4.530338531090507e-05, + "loss": 1.1183, + "step": 6979 + }, + { + "epoch": 1.11, + "learning_rate": 4.5301879912650926e-05, + "loss": 1.0695, + "step": 6980 + }, + { + "epoch": 1.11, + "learning_rate": 4.530037429819408e-05, + "loss": 1.0807, + "step": 6981 + }, + { + "epoch": 1.11, + "learning_rate": 4.529886846755058e-05, + "loss": 1.0731, + "step": 6982 + }, + { + "epoch": 1.11, + "learning_rate": 4.5297362420736444e-05, + "loss": 1.1438, + "step": 6983 + }, + { + "epoch": 1.11, + "learning_rate": 4.529585615776773e-05, + "loss": 1.0426, + "step": 6984 + }, + { + "epoch": 1.11, + "learning_rate": 4.529434967866046e-05, + "loss": 1.0802, + "step": 6985 + }, + { + "epoch": 1.11, + "learning_rate": 4.52928429834307e-05, + "loss": 0.974, + "step": 6986 + }, + { + "epoch": 1.11, + "learning_rate": 4.529133607209447e-05, + "loss": 1.082, + "step": 6987 + }, + { + "epoch": 1.11, + "learning_rate": 4.528982894466784e-05, + "loss": 1.0518, + "step": 6988 + }, + { + "epoch": 1.11, + "learning_rate": 4.5288321601166844e-05, + "loss": 1.103, + "step": 6989 + }, + { + "epoch": 1.11, + "learning_rate": 4.528681404160754e-05, + "loss": 1.1676, + "step": 6990 + }, + { + "epoch": 1.11, + "learning_rate": 4.528530626600599e-05, + "loss": 1.0697, + "step": 6991 + }, + { + "epoch": 1.11, + "learning_rate": 4.5283798274378234e-05, + "loss": 1.1316, + "step": 6992 + }, + { + "epoch": 1.11, + "learning_rate": 4.528229006674035e-05, + "loss": 1.0344, + "step": 6993 + }, + { + "epoch": 1.11, + "learning_rate": 4.5280781643108386e-05, + "loss": 1.1147, + "step": 6994 + }, + { + "epoch": 1.11, + "learning_rate": 4.5279273003498414e-05, + "loss": 1.0676, + "step": 6995 + }, + { + "epoch": 1.11, + "learning_rate": 4.527776414792649e-05, + "loss": 1.0106, + "step": 6996 + }, + { + "epoch": 1.11, + "learning_rate": 4.527625507640869e-05, + "loss": 1.1328, + "step": 6997 + }, + { + "epoch": 1.11, + "learning_rate": 4.527474578896108e-05, + "loss": 1.1195, + "step": 6998 + }, + { + "epoch": 1.12, + "learning_rate": 4.527323628559975e-05, + "loss": 1.1611, + "step": 6999 + }, + { + "epoch": 1.12, + "learning_rate": 4.527172656634075e-05, + "loss": 1.0661, + "step": 7000 + }, + { + "epoch": 1.12, + "learning_rate": 4.5270216631200166e-05, + "loss": 1.082, + "step": 7001 + }, + { + "epoch": 1.12, + "learning_rate": 4.526870648019409e-05, + "loss": 1.1837, + "step": 7002 + }, + { + "epoch": 1.12, + "learning_rate": 4.5267196113338594e-05, + "loss": 1.1375, + "step": 7003 + }, + { + "epoch": 1.12, + "learning_rate": 4.5265685530649765e-05, + "loss": 1.0362, + "step": 7004 + }, + { + "epoch": 1.12, + "learning_rate": 4.5264174732143684e-05, + "loss": 1.1167, + "step": 7005 + }, + { + "epoch": 1.12, + "learning_rate": 4.526266371783645e-05, + "loss": 1.075, + "step": 7006 + }, + { + "epoch": 1.12, + "learning_rate": 4.5261152487744144e-05, + "loss": 1.0437, + "step": 7007 + }, + { + "epoch": 1.12, + "learning_rate": 4.5259641041882874e-05, + "loss": 1.0361, + "step": 7008 + }, + { + "epoch": 1.12, + "learning_rate": 4.525812938026872e-05, + "loss": 1.0169, + "step": 7009 + }, + { + "epoch": 1.12, + "learning_rate": 4.5256617502917785e-05, + "loss": 1.0894, + "step": 7010 + }, + { + "epoch": 1.12, + "learning_rate": 4.5255105409846175e-05, + "loss": 1.0183, + "step": 7011 + }, + { + "epoch": 1.12, + "learning_rate": 4.525359310106999e-05, + "loss": 1.0864, + "step": 7012 + }, + { + "epoch": 1.12, + "learning_rate": 4.5252080576605324e-05, + "loss": 1.0687, + "step": 7013 + }, + { + "epoch": 1.12, + "learning_rate": 4.525056783646831e-05, + "loss": 1.0287, + "step": 7014 + }, + { + "epoch": 1.12, + "learning_rate": 4.524905488067504e-05, + "loss": 0.9806, + "step": 7015 + }, + { + "epoch": 1.12, + "learning_rate": 4.524754170924163e-05, + "loss": 1.0658, + "step": 7016 + }, + { + "epoch": 1.12, + "learning_rate": 4.5246028322184184e-05, + "loss": 1.15, + "step": 7017 + }, + { + "epoch": 1.12, + "learning_rate": 4.524451471951884e-05, + "loss": 0.9984, + "step": 7018 + }, + { + "epoch": 1.12, + "learning_rate": 4.524300090126169e-05, + "loss": 1.0763, + "step": 7019 + }, + { + "epoch": 1.12, + "learning_rate": 4.524148686742888e-05, + "loss": 0.9967, + "step": 7020 + }, + { + "epoch": 1.12, + "learning_rate": 4.523997261803652e-05, + "loss": 1.1485, + "step": 7021 + }, + { + "epoch": 1.12, + "learning_rate": 4.523845815310075e-05, + "loss": 1.0257, + "step": 7022 + }, + { + "epoch": 1.12, + "learning_rate": 4.5236943472637674e-05, + "loss": 1.1313, + "step": 7023 + }, + { + "epoch": 1.12, + "learning_rate": 4.5235428576663446e-05, + "loss": 1.1577, + "step": 7024 + }, + { + "epoch": 1.12, + "learning_rate": 4.523391346519419e-05, + "loss": 1.0921, + "step": 7025 + }, + { + "epoch": 1.12, + "learning_rate": 4.523239813824603e-05, + "loss": 1.0438, + "step": 7026 + }, + { + "epoch": 1.12, + "learning_rate": 4.523088259583512e-05, + "loss": 1.0264, + "step": 7027 + }, + { + "epoch": 1.12, + "learning_rate": 4.5229366837977594e-05, + "loss": 0.9759, + "step": 7028 + }, + { + "epoch": 1.12, + "learning_rate": 4.5227850864689584e-05, + "loss": 1.1213, + "step": 7029 + }, + { + "epoch": 1.12, + "learning_rate": 4.5226334675987245e-05, + "loss": 1.0901, + "step": 7030 + }, + { + "epoch": 1.12, + "learning_rate": 4.522481827188673e-05, + "loss": 1.0706, + "step": 7031 + }, + { + "epoch": 1.12, + "learning_rate": 4.522330165240418e-05, + "loss": 0.956, + "step": 7032 + }, + { + "epoch": 1.12, + "learning_rate": 4.522178481755574e-05, + "loss": 1.1418, + "step": 7033 + }, + { + "epoch": 1.12, + "learning_rate": 4.522026776735756e-05, + "loss": 1.0996, + "step": 7034 + }, + { + "epoch": 1.12, + "learning_rate": 4.521875050182581e-05, + "loss": 1.0413, + "step": 7035 + }, + { + "epoch": 1.12, + "learning_rate": 4.521723302097665e-05, + "loss": 1.017, + "step": 7036 + }, + { + "epoch": 1.12, + "learning_rate": 4.521571532482623e-05, + "loss": 1.0451, + "step": 7037 + }, + { + "epoch": 1.12, + "learning_rate": 4.521419741339071e-05, + "loss": 1.1082, + "step": 7038 + }, + { + "epoch": 1.12, + "learning_rate": 4.521267928668627e-05, + "loss": 1.0588, + "step": 7039 + }, + { + "epoch": 1.12, + "learning_rate": 4.521116094472905e-05, + "loss": 1.1209, + "step": 7040 + }, + { + "epoch": 1.12, + "learning_rate": 4.520964238753524e-05, + "loss": 1.1055, + "step": 7041 + }, + { + "epoch": 1.12, + "learning_rate": 4.520812361512102e-05, + "loss": 1.0715, + "step": 7042 + }, + { + "epoch": 1.12, + "learning_rate": 4.5206604627502546e-05, + "loss": 1.1222, + "step": 7043 + }, + { + "epoch": 1.12, + "learning_rate": 4.5205085424696e-05, + "loss": 1.022, + "step": 7044 + }, + { + "epoch": 1.12, + "learning_rate": 4.520356600671757e-05, + "loss": 0.9652, + "step": 7045 + }, + { + "epoch": 1.12, + "learning_rate": 4.520204637358342e-05, + "loss": 1.0944, + "step": 7046 + }, + { + "epoch": 1.12, + "learning_rate": 4.520052652530974e-05, + "loss": 1.0959, + "step": 7047 + }, + { + "epoch": 1.12, + "learning_rate": 4.5199006461912716e-05, + "loss": 1.062, + "step": 7048 + }, + { + "epoch": 1.12, + "learning_rate": 4.5197486183408536e-05, + "loss": 1.1106, + "step": 7049 + }, + { + "epoch": 1.12, + "learning_rate": 4.519596568981339e-05, + "loss": 1.1357, + "step": 7050 + }, + { + "epoch": 1.12, + "learning_rate": 4.5194444981143466e-05, + "loss": 1.0495, + "step": 7051 + }, + { + "epoch": 1.12, + "learning_rate": 4.5192924057414974e-05, + "loss": 1.1122, + "step": 7052 + }, + { + "epoch": 1.12, + "learning_rate": 4.51914029186441e-05, + "loss": 1.1532, + "step": 7053 + }, + { + "epoch": 1.12, + "learning_rate": 4.518988156484704e-05, + "loss": 1.0698, + "step": 7054 + }, + { + "epoch": 1.12, + "learning_rate": 4.518835999604e-05, + "loss": 1.0863, + "step": 7055 + }, + { + "epoch": 1.12, + "learning_rate": 4.518683821223918e-05, + "loss": 1.0819, + "step": 7056 + }, + { + "epoch": 1.12, + "learning_rate": 4.5185316213460786e-05, + "loss": 1.069, + "step": 7057 + }, + { + "epoch": 1.12, + "learning_rate": 4.518379399972104e-05, + "loss": 1.1063, + "step": 7058 + }, + { + "epoch": 1.12, + "learning_rate": 4.518227157103613e-05, + "loss": 1.1339, + "step": 7059 + }, + { + "epoch": 1.12, + "learning_rate": 4.518074892742229e-05, + "loss": 1.1009, + "step": 7060 + }, + { + "epoch": 1.12, + "learning_rate": 4.5179226068895716e-05, + "loss": 1.1071, + "step": 7061 + }, + { + "epoch": 1.13, + "learning_rate": 4.517770299547264e-05, + "loss": 1.1063, + "step": 7062 + }, + { + "epoch": 1.13, + "learning_rate": 4.517617970716927e-05, + "loss": 1.1071, + "step": 7063 + }, + { + "epoch": 1.13, + "learning_rate": 4.5174656204001854e-05, + "loss": 1.1057, + "step": 7064 + }, + { + "epoch": 1.13, + "learning_rate": 4.517313248598658e-05, + "loss": 1.0553, + "step": 7065 + }, + { + "epoch": 1.13, + "learning_rate": 4.51716085531397e-05, + "loss": 1.1047, + "step": 7066 + }, + { + "epoch": 1.13, + "learning_rate": 4.517008440547744e-05, + "loss": 1.116, + "step": 7067 + }, + { + "epoch": 1.13, + "learning_rate": 4.5168560043016016e-05, + "loss": 1.0374, + "step": 7068 + }, + { + "epoch": 1.13, + "learning_rate": 4.516703546577168e-05, + "loss": 1.0827, + "step": 7069 + }, + { + "epoch": 1.13, + "learning_rate": 4.5165510673760656e-05, + "loss": 1.0773, + "step": 7070 + }, + { + "epoch": 1.13, + "learning_rate": 4.516398566699919e-05, + "loss": 1.0778, + "step": 7071 + }, + { + "epoch": 1.13, + "learning_rate": 4.5162460445503515e-05, + "loss": 1.0526, + "step": 7072 + }, + { + "epoch": 1.13, + "learning_rate": 4.5160935009289884e-05, + "loss": 1.0418, + "step": 7073 + }, + { + "epoch": 1.13, + "learning_rate": 4.515940935837453e-05, + "loss": 0.9492, + "step": 7074 + }, + { + "epoch": 1.13, + "learning_rate": 4.51578834927737e-05, + "loss": 0.9565, + "step": 7075 + }, + { + "epoch": 1.13, + "learning_rate": 4.515635741250366e-05, + "loss": 1.0781, + "step": 7076 + }, + { + "epoch": 1.13, + "learning_rate": 4.515483111758064e-05, + "loss": 1.0225, + "step": 7077 + }, + { + "epoch": 1.13, + "learning_rate": 4.515330460802091e-05, + "loss": 1.0681, + "step": 7078 + }, + { + "epoch": 1.13, + "learning_rate": 4.5151777883840724e-05, + "loss": 1.185, + "step": 7079 + }, + { + "epoch": 1.13, + "learning_rate": 4.515025094505633e-05, + "loss": 1.0736, + "step": 7080 + }, + { + "epoch": 1.13, + "learning_rate": 4.5148723791684006e-05, + "loss": 1.0531, + "step": 7081 + }, + { + "epoch": 1.13, + "learning_rate": 4.514719642374e-05, + "loss": 1.0719, + "step": 7082 + }, + { + "epoch": 1.13, + "learning_rate": 4.5145668841240587e-05, + "loss": 1.0762, + "step": 7083 + }, + { + "epoch": 1.13, + "learning_rate": 4.5144141044202034e-05, + "loss": 1.0378, + "step": 7084 + }, + { + "epoch": 1.13, + "learning_rate": 4.5142613032640604e-05, + "loss": 1.1046, + "step": 7085 + }, + { + "epoch": 1.13, + "learning_rate": 4.514108480657258e-05, + "loss": 1.0302, + "step": 7086 + }, + { + "epoch": 1.13, + "learning_rate": 4.513955636601422e-05, + "loss": 1.0044, + "step": 7087 + }, + { + "epoch": 1.13, + "learning_rate": 4.513802771098182e-05, + "loss": 1.044, + "step": 7088 + }, + { + "epoch": 1.13, + "learning_rate": 4.513649884149165e-05, + "loss": 0.996, + "step": 7089 + }, + { + "epoch": 1.13, + "learning_rate": 4.5134969757559985e-05, + "loss": 1.1592, + "step": 7090 + }, + { + "epoch": 1.13, + "learning_rate": 4.5133440459203124e-05, + "loss": 1.0582, + "step": 7091 + }, + { + "epoch": 1.13, + "learning_rate": 4.513191094643735e-05, + "loss": 1.04, + "step": 7092 + }, + { + "epoch": 1.13, + "learning_rate": 4.513038121927894e-05, + "loss": 1.0596, + "step": 7093 + }, + { + "epoch": 1.13, + "learning_rate": 4.512885127774419e-05, + "loss": 0.9698, + "step": 7094 + }, + { + "epoch": 1.13, + "learning_rate": 4.51273211218494e-05, + "loss": 1.0324, + "step": 7095 + }, + { + "epoch": 1.13, + "learning_rate": 4.512579075161085e-05, + "loss": 1.0606, + "step": 7096 + }, + { + "epoch": 1.13, + "learning_rate": 4.512426016704485e-05, + "loss": 1.085, + "step": 7097 + }, + { + "epoch": 1.13, + "learning_rate": 4.51227293681677e-05, + "loss": 1.1076, + "step": 7098 + }, + { + "epoch": 1.13, + "learning_rate": 4.512119835499571e-05, + "loss": 1.0184, + "step": 7099 + }, + { + "epoch": 1.13, + "learning_rate": 4.5119667127545155e-05, + "loss": 0.9925, + "step": 7100 + }, + { + "epoch": 1.13, + "learning_rate": 4.511813568583237e-05, + "loss": 1.0441, + "step": 7101 + }, + { + "epoch": 1.13, + "learning_rate": 4.511660402987366e-05, + "loss": 1.1041, + "step": 7102 + }, + { + "epoch": 1.13, + "learning_rate": 4.5115072159685315e-05, + "loss": 1.0713, + "step": 7103 + }, + { + "epoch": 1.13, + "learning_rate": 4.511354007528367e-05, + "loss": 1.0533, + "step": 7104 + }, + { + "epoch": 1.13, + "learning_rate": 4.511200777668504e-05, + "loss": 1.129, + "step": 7105 + }, + { + "epoch": 1.13, + "learning_rate": 4.5110475263905725e-05, + "loss": 1.0061, + "step": 7106 + }, + { + "epoch": 1.13, + "learning_rate": 4.510894253696207e-05, + "loss": 1.0759, + "step": 7107 + }, + { + "epoch": 1.13, + "learning_rate": 4.510740959587038e-05, + "loss": 1.0487, + "step": 7108 + }, + { + "epoch": 1.13, + "learning_rate": 4.510587644064699e-05, + "loss": 0.9531, + "step": 7109 + }, + { + "epoch": 1.13, + "learning_rate": 4.510434307130821e-05, + "loss": 1.0601, + "step": 7110 + }, + { + "epoch": 1.13, + "learning_rate": 4.510280948787039e-05, + "loss": 0.9943, + "step": 7111 + }, + { + "epoch": 1.13, + "learning_rate": 4.5101275690349856e-05, + "loss": 1.0951, + "step": 7112 + }, + { + "epoch": 1.13, + "learning_rate": 4.5099741678762944e-05, + "loss": 1.047, + "step": 7113 + }, + { + "epoch": 1.13, + "learning_rate": 4.5098207453125974e-05, + "loss": 0.9847, + "step": 7114 + }, + { + "epoch": 1.13, + "learning_rate": 4.5096673013455306e-05, + "loss": 1.0214, + "step": 7115 + }, + { + "epoch": 1.13, + "learning_rate": 4.509513835976726e-05, + "loss": 1.0361, + "step": 7116 + }, + { + "epoch": 1.13, + "learning_rate": 4.509360349207821e-05, + "loss": 1.0531, + "step": 7117 + }, + { + "epoch": 1.13, + "learning_rate": 4.5092068410404465e-05, + "loss": 1.0347, + "step": 7118 + }, + { + "epoch": 1.13, + "learning_rate": 4.5090533114762395e-05, + "loss": 1.0444, + "step": 7119 + }, + { + "epoch": 1.13, + "learning_rate": 4.508899760516834e-05, + "loss": 1.1092, + "step": 7120 + }, + { + "epoch": 1.13, + "learning_rate": 4.5087461881638666e-05, + "loss": 1.0259, + "step": 7121 + }, + { + "epoch": 1.13, + "learning_rate": 4.508592594418971e-05, + "loss": 1.0761, + "step": 7122 + }, + { + "epoch": 1.13, + "learning_rate": 4.508438979283784e-05, + "loss": 1.0945, + "step": 7123 + }, + { + "epoch": 1.13, + "learning_rate": 4.5082853427599416e-05, + "loss": 1.1079, + "step": 7124 + }, + { + "epoch": 1.14, + "learning_rate": 4.5081316848490795e-05, + "loss": 1.0619, + "step": 7125 + }, + { + "epoch": 1.14, + "learning_rate": 4.5079780055528336e-05, + "loss": 1.092, + "step": 7126 + }, + { + "epoch": 1.14, + "learning_rate": 4.507824304872841e-05, + "loss": 1.044, + "step": 7127 + }, + { + "epoch": 1.14, + "learning_rate": 4.5076705828107394e-05, + "loss": 1.1378, + "step": 7128 + }, + { + "epoch": 1.14, + "learning_rate": 4.5075168393681646e-05, + "loss": 1.0615, + "step": 7129 + }, + { + "epoch": 1.14, + "learning_rate": 4.507363074546754e-05, + "loss": 1.0178, + "step": 7130 + }, + { + "epoch": 1.14, + "learning_rate": 4.5072092883481456e-05, + "loss": 0.983, + "step": 7131 + }, + { + "epoch": 1.14, + "learning_rate": 4.507055480773976e-05, + "loss": 1.0718, + "step": 7132 + }, + { + "epoch": 1.14, + "learning_rate": 4.506901651825885e-05, + "loss": 1.0134, + "step": 7133 + }, + { + "epoch": 1.14, + "learning_rate": 4.50674780150551e-05, + "loss": 1.1199, + "step": 7134 + }, + { + "epoch": 1.14, + "learning_rate": 4.506593929814489e-05, + "loss": 1.1637, + "step": 7135 + }, + { + "epoch": 1.14, + "learning_rate": 4.50644003675446e-05, + "loss": 1.1012, + "step": 7136 + }, + { + "epoch": 1.14, + "learning_rate": 4.506286122327063e-05, + "loss": 1.1021, + "step": 7137 + }, + { + "epoch": 1.14, + "learning_rate": 4.506132186533938e-05, + "loss": 1.1624, + "step": 7138 + }, + { + "epoch": 1.14, + "learning_rate": 4.5059782293767224e-05, + "loss": 1.0983, + "step": 7139 + }, + { + "epoch": 1.14, + "learning_rate": 4.5058242508570565e-05, + "loss": 1.0885, + "step": 7140 + }, + { + "epoch": 1.14, + "learning_rate": 4.5056702509765804e-05, + "loss": 0.9801, + "step": 7141 + }, + { + "epoch": 1.14, + "learning_rate": 4.505516229736934e-05, + "loss": 1.0796, + "step": 7142 + }, + { + "epoch": 1.14, + "learning_rate": 4.505362187139756e-05, + "loss": 0.9667, + "step": 7143 + }, + { + "epoch": 1.14, + "learning_rate": 4.50520812318669e-05, + "loss": 0.9674, + "step": 7144 + }, + { + "epoch": 1.14, + "learning_rate": 4.505054037879373e-05, + "loss": 1.0671, + "step": 7145 + }, + { + "epoch": 1.14, + "learning_rate": 4.504899931219449e-05, + "loss": 1.1996, + "step": 7146 + }, + { + "epoch": 1.14, + "learning_rate": 4.504745803208558e-05, + "loss": 1.0718, + "step": 7147 + }, + { + "epoch": 1.14, + "learning_rate": 4.504591653848341e-05, + "loss": 1.0308, + "step": 7148 + }, + { + "epoch": 1.14, + "learning_rate": 4.50443748314044e-05, + "loss": 0.9739, + "step": 7149 + }, + { + "epoch": 1.14, + "learning_rate": 4.504283291086496e-05, + "loss": 1.0153, + "step": 7150 + }, + { + "epoch": 1.14, + "learning_rate": 4.504129077688153e-05, + "loss": 1.1005, + "step": 7151 + }, + { + "epoch": 1.14, + "learning_rate": 4.5039748429470505e-05, + "loss": 1.1258, + "step": 7152 + }, + { + "epoch": 1.14, + "learning_rate": 4.503820586864834e-05, + "loss": 1.0374, + "step": 7153 + }, + { + "epoch": 1.14, + "learning_rate": 4.503666309443145e-05, + "loss": 1.0181, + "step": 7154 + }, + { + "epoch": 1.14, + "learning_rate": 4.5035120106836245e-05, + "loss": 0.9963, + "step": 7155 + }, + { + "epoch": 1.14, + "learning_rate": 4.5033576905879195e-05, + "loss": 1.0654, + "step": 7156 + }, + { + "epoch": 1.14, + "learning_rate": 4.50320334915767e-05, + "loss": 1.0604, + "step": 7157 + }, + { + "epoch": 1.14, + "learning_rate": 4.503048986394521e-05, + "loss": 0.9733, + "step": 7158 + }, + { + "epoch": 1.14, + "learning_rate": 4.502894602300117e-05, + "loss": 1.0494, + "step": 7159 + }, + { + "epoch": 1.14, + "learning_rate": 4.502740196876102e-05, + "loss": 1.0181, + "step": 7160 + }, + { + "epoch": 1.14, + "learning_rate": 4.5025857701241183e-05, + "loss": 1.0426, + "step": 7161 + }, + { + "epoch": 1.14, + "learning_rate": 4.502431322045812e-05, + "loss": 1.0488, + "step": 7162 + }, + { + "epoch": 1.14, + "learning_rate": 4.502276852642829e-05, + "loss": 1.0248, + "step": 7163 + }, + { + "epoch": 1.14, + "learning_rate": 4.5021223619168126e-05, + "loss": 1.0558, + "step": 7164 + }, + { + "epoch": 1.14, + "learning_rate": 4.5019678498694095e-05, + "loss": 0.9361, + "step": 7165 + }, + { + "epoch": 1.14, + "learning_rate": 4.501813316502263e-05, + "loss": 1.0089, + "step": 7166 + }, + { + "epoch": 1.14, + "learning_rate": 4.5016587618170205e-05, + "loss": 1.054, + "step": 7167 + }, + { + "epoch": 1.14, + "learning_rate": 4.501504185815327e-05, + "loss": 1.0184, + "step": 7168 + }, + { + "epoch": 1.14, + "learning_rate": 4.50134958849883e-05, + "loss": 1.019, + "step": 7169 + }, + { + "epoch": 1.14, + "learning_rate": 4.501194969869174e-05, + "loss": 1.113, + "step": 7170 + }, + { + "epoch": 1.14, + "learning_rate": 4.501040329928007e-05, + "loss": 1.014, + "step": 7171 + }, + { + "epoch": 1.14, + "learning_rate": 4.500885668676975e-05, + "loss": 1.0155, + "step": 7172 + }, + { + "epoch": 1.14, + "learning_rate": 4.500730986117725e-05, + "loss": 1.0733, + "step": 7173 + }, + { + "epoch": 1.14, + "learning_rate": 4.500576282251906e-05, + "loss": 0.9713, + "step": 7174 + }, + { + "epoch": 1.14, + "learning_rate": 4.500421557081163e-05, + "loss": 1.0129, + "step": 7175 + }, + { + "epoch": 1.14, + "learning_rate": 4.500266810607146e-05, + "loss": 1.1197, + "step": 7176 + }, + { + "epoch": 1.14, + "learning_rate": 4.5001120428315e-05, + "loss": 1.0252, + "step": 7177 + }, + { + "epoch": 1.14, + "learning_rate": 4.4999572537558766e-05, + "loss": 1.0766, + "step": 7178 + }, + { + "epoch": 1.14, + "learning_rate": 4.499802443381922e-05, + "loss": 1.1893, + "step": 7179 + }, + { + "epoch": 1.14, + "learning_rate": 4.4996476117112865e-05, + "loss": 1.068, + "step": 7180 + }, + { + "epoch": 1.14, + "learning_rate": 4.499492758745616e-05, + "loss": 1.0619, + "step": 7181 + }, + { + "epoch": 1.14, + "learning_rate": 4.499337884486563e-05, + "loss": 1.1108, + "step": 7182 + }, + { + "epoch": 1.14, + "learning_rate": 4.499182988935775e-05, + "loss": 0.9903, + "step": 7183 + }, + { + "epoch": 1.14, + "learning_rate": 4.4990280720949024e-05, + "loss": 1.2133, + "step": 7184 + }, + { + "epoch": 1.14, + "learning_rate": 4.4988731339655946e-05, + "loss": 1.0964, + "step": 7185 + }, + { + "epoch": 1.14, + "learning_rate": 4.4987181745495e-05, + "loss": 1.1293, + "step": 7186 + }, + { + "epoch": 1.14, + "learning_rate": 4.498563193848272e-05, + "loss": 1.0725, + "step": 7187 + }, + { + "epoch": 1.15, + "learning_rate": 4.498408191863558e-05, + "loss": 1.1119, + "step": 7188 + }, + { + "epoch": 1.15, + "learning_rate": 4.498253168597011e-05, + "loss": 0.9985, + "step": 7189 + }, + { + "epoch": 1.15, + "learning_rate": 4.4980981240502805e-05, + "loss": 1.0389, + "step": 7190 + }, + { + "epoch": 1.15, + "learning_rate": 4.4979430582250184e-05, + "loss": 1.1086, + "step": 7191 + }, + { + "epoch": 1.15, + "learning_rate": 4.497787971122874e-05, + "loss": 1.0432, + "step": 7192 + }, + { + "epoch": 1.15, + "learning_rate": 4.4976328627455026e-05, + "loss": 1.0535, + "step": 7193 + }, + { + "epoch": 1.15, + "learning_rate": 4.4974777330945536e-05, + "loss": 1.0583, + "step": 7194 + }, + { + "epoch": 1.15, + "learning_rate": 4.497322582171679e-05, + "loss": 1.0152, + "step": 7195 + }, + { + "epoch": 1.15, + "learning_rate": 4.497167409978532e-05, + "loss": 1.0426, + "step": 7196 + }, + { + "epoch": 1.15, + "learning_rate": 4.4970122165167635e-05, + "loss": 1.0736, + "step": 7197 + }, + { + "epoch": 1.15, + "learning_rate": 4.496857001788028e-05, + "loss": 1.0424, + "step": 7198 + }, + { + "epoch": 1.15, + "learning_rate": 4.4967017657939786e-05, + "loss": 1.0805, + "step": 7199 + }, + { + "epoch": 1.15, + "learning_rate": 4.4965465085362665e-05, + "loss": 1.0342, + "step": 7200 + }, + { + "epoch": 1.15, + "learning_rate": 4.496391230016547e-05, + "loss": 0.9769, + "step": 7201 + }, + { + "epoch": 1.15, + "learning_rate": 4.496235930236472e-05, + "loss": 1.1004, + "step": 7202 + }, + { + "epoch": 1.15, + "learning_rate": 4.496080609197697e-05, + "loss": 1.0806, + "step": 7203 + }, + { + "epoch": 1.15, + "learning_rate": 4.495925266901876e-05, + "loss": 0.9984, + "step": 7204 + }, + { + "epoch": 1.15, + "learning_rate": 4.495769903350662e-05, + "loss": 1.0, + "step": 7205 + }, + { + "epoch": 1.15, + "learning_rate": 4.4956145185457096e-05, + "loss": 1.0858, + "step": 7206 + }, + { + "epoch": 1.15, + "learning_rate": 4.495459112488675e-05, + "loss": 1.0375, + "step": 7207 + }, + { + "epoch": 1.15, + "learning_rate": 4.495303685181211e-05, + "loss": 1.0366, + "step": 7208 + }, + { + "epoch": 1.15, + "learning_rate": 4.495148236624975e-05, + "loss": 1.1218, + "step": 7209 + }, + { + "epoch": 1.15, + "learning_rate": 4.494992766821622e-05, + "loss": 1.0179, + "step": 7210 + }, + { + "epoch": 1.15, + "learning_rate": 4.494837275772806e-05, + "loss": 1.0685, + "step": 7211 + }, + { + "epoch": 1.15, + "learning_rate": 4.494681763480185e-05, + "loss": 1.2041, + "step": 7212 + }, + { + "epoch": 1.15, + "learning_rate": 4.494526229945415e-05, + "loss": 1.0278, + "step": 7213 + }, + { + "epoch": 1.15, + "learning_rate": 4.494370675170151e-05, + "loss": 1.0632, + "step": 7214 + }, + { + "epoch": 1.15, + "learning_rate": 4.494215099156049e-05, + "loss": 0.9911, + "step": 7215 + }, + { + "epoch": 1.15, + "learning_rate": 4.4940595019047685e-05, + "loss": 0.9845, + "step": 7216 + }, + { + "epoch": 1.15, + "learning_rate": 4.493903883417964e-05, + "loss": 0.988, + "step": 7217 + }, + { + "epoch": 1.15, + "learning_rate": 4.493748243697294e-05, + "loss": 1.0578, + "step": 7218 + }, + { + "epoch": 1.15, + "learning_rate": 4.4935925827444144e-05, + "loss": 0.9752, + "step": 7219 + }, + { + "epoch": 1.15, + "learning_rate": 4.493436900560986e-05, + "loss": 1.0833, + "step": 7220 + }, + { + "epoch": 1.15, + "learning_rate": 4.4932811971486646e-05, + "loss": 1.1007, + "step": 7221 + }, + { + "epoch": 1.15, + "learning_rate": 4.493125472509108e-05, + "loss": 1.0649, + "step": 7222 + }, + { + "epoch": 1.15, + "learning_rate": 4.492969726643975e-05, + "loss": 1.0198, + "step": 7223 + }, + { + "epoch": 1.15, + "learning_rate": 4.4928139595549246e-05, + "loss": 0.9957, + "step": 7224 + }, + { + "epoch": 1.15, + "learning_rate": 4.492658171243616e-05, + "loss": 1.1302, + "step": 7225 + }, + { + "epoch": 1.15, + "learning_rate": 4.4925023617117066e-05, + "loss": 1.0334, + "step": 7226 + }, + { + "epoch": 1.15, + "learning_rate": 4.492346530960857e-05, + "loss": 1.0622, + "step": 7227 + }, + { + "epoch": 1.15, + "learning_rate": 4.4921906789927264e-05, + "loss": 0.9725, + "step": 7228 + }, + { + "epoch": 1.15, + "learning_rate": 4.4920348058089745e-05, + "loss": 0.9958, + "step": 7229 + }, + { + "epoch": 1.15, + "learning_rate": 4.491878911411262e-05, + "loss": 1.0267, + "step": 7230 + }, + { + "epoch": 1.15, + "learning_rate": 4.491722995801248e-05, + "loss": 1.0387, + "step": 7231 + }, + { + "epoch": 1.15, + "learning_rate": 4.491567058980593e-05, + "loss": 1.1043, + "step": 7232 + }, + { + "epoch": 1.15, + "learning_rate": 4.4914111009509586e-05, + "loss": 1.0688, + "step": 7233 + }, + { + "epoch": 1.15, + "learning_rate": 4.491255121714004e-05, + "loss": 1.0446, + "step": 7234 + }, + { + "epoch": 1.15, + "learning_rate": 4.491099121271393e-05, + "loss": 0.9841, + "step": 7235 + }, + { + "epoch": 1.15, + "learning_rate": 4.490943099624784e-05, + "loss": 1.029, + "step": 7236 + }, + { + "epoch": 1.15, + "learning_rate": 4.4907870567758394e-05, + "loss": 1.072, + "step": 7237 + }, + { + "epoch": 1.15, + "learning_rate": 4.490630992726221e-05, + "loss": 1.0263, + "step": 7238 + }, + { + "epoch": 1.15, + "learning_rate": 4.490474907477592e-05, + "loss": 1.0853, + "step": 7239 + }, + { + "epoch": 1.15, + "learning_rate": 4.490318801031613e-05, + "loss": 1.0079, + "step": 7240 + }, + { + "epoch": 1.15, + "learning_rate": 4.490162673389948e-05, + "loss": 1.035, + "step": 7241 + }, + { + "epoch": 1.15, + "learning_rate": 4.4900065245542585e-05, + "loss": 1.054, + "step": 7242 + }, + { + "epoch": 1.15, + "learning_rate": 4.489850354526207e-05, + "loss": 1.0203, + "step": 7243 + }, + { + "epoch": 1.15, + "learning_rate": 4.4896941633074586e-05, + "loss": 0.9881, + "step": 7244 + }, + { + "epoch": 1.15, + "learning_rate": 4.4895379508996734e-05, + "loss": 1.0724, + "step": 7245 + }, + { + "epoch": 1.15, + "learning_rate": 4.4893817173045186e-05, + "loss": 0.9758, + "step": 7246 + }, + { + "epoch": 1.15, + "learning_rate": 4.4892254625236555e-05, + "loss": 1.077, + "step": 7247 + }, + { + "epoch": 1.15, + "learning_rate": 4.489069186558749e-05, + "loss": 0.9466, + "step": 7248 + }, + { + "epoch": 1.15, + "learning_rate": 4.4889128894114644e-05, + "loss": 1.1043, + "step": 7249 + }, + { + "epoch": 1.16, + "learning_rate": 4.4887565710834635e-05, + "loss": 1.0918, + "step": 7250 + }, + { + "epoch": 1.16, + "learning_rate": 4.4886002315764134e-05, + "loss": 0.9827, + "step": 7251 + }, + { + "epoch": 1.16, + "learning_rate": 4.488443870891977e-05, + "loss": 1.0845, + "step": 7252 + }, + { + "epoch": 1.16, + "learning_rate": 4.4882874890318214e-05, + "loss": 1.1258, + "step": 7253 + }, + { + "epoch": 1.16, + "learning_rate": 4.488131085997611e-05, + "loss": 1.0589, + "step": 7254 + }, + { + "epoch": 1.16, + "learning_rate": 4.4879746617910124e-05, + "loss": 1.0194, + "step": 7255 + }, + { + "epoch": 1.16, + "learning_rate": 4.48781821641369e-05, + "loss": 0.9857, + "step": 7256 + }, + { + "epoch": 1.16, + "learning_rate": 4.48766174986731e-05, + "loss": 1.0758, + "step": 7257 + }, + { + "epoch": 1.16, + "learning_rate": 4.48750526215354e-05, + "loss": 1.0897, + "step": 7258 + }, + { + "epoch": 1.16, + "learning_rate": 4.4873487532740445e-05, + "loss": 1.0303, + "step": 7259 + }, + { + "epoch": 1.16, + "learning_rate": 4.487192223230491e-05, + "loss": 1.0133, + "step": 7260 + }, + { + "epoch": 1.16, + "learning_rate": 4.4870356720245474e-05, + "loss": 1.0391, + "step": 7261 + }, + { + "epoch": 1.16, + "learning_rate": 4.4868790996578815e-05, + "loss": 1.0293, + "step": 7262 + }, + { + "epoch": 1.16, + "learning_rate": 4.4867225061321585e-05, + "loss": 1.1346, + "step": 7263 + }, + { + "epoch": 1.16, + "learning_rate": 4.486565891449046e-05, + "loss": 0.9919, + "step": 7264 + }, + { + "epoch": 1.16, + "learning_rate": 4.486409255610214e-05, + "loss": 1.1473, + "step": 7265 + }, + { + "epoch": 1.16, + "learning_rate": 4.4862525986173296e-05, + "loss": 1.0286, + "step": 7266 + }, + { + "epoch": 1.16, + "learning_rate": 4.4860959204720597e-05, + "loss": 1.0638, + "step": 7267 + }, + { + "epoch": 1.16, + "learning_rate": 4.4859392211760755e-05, + "loss": 1.0336, + "step": 7268 + }, + { + "epoch": 1.16, + "learning_rate": 4.4857825007310434e-05, + "loss": 1.0777, + "step": 7269 + }, + { + "epoch": 1.16, + "learning_rate": 4.485625759138633e-05, + "loss": 0.9272, + "step": 7270 + }, + { + "epoch": 1.16, + "learning_rate": 4.485468996400514e-05, + "loss": 1.1052, + "step": 7271 + }, + { + "epoch": 1.16, + "learning_rate": 4.485312212518356e-05, + "loss": 1.1121, + "step": 7272 + }, + { + "epoch": 1.16, + "learning_rate": 4.485155407493828e-05, + "loss": 0.9402, + "step": 7273 + }, + { + "epoch": 1.16, + "learning_rate": 4.4849985813286e-05, + "loss": 0.9552, + "step": 7274 + }, + { + "epoch": 1.16, + "learning_rate": 4.484841734024343e-05, + "loss": 1.0195, + "step": 7275 + }, + { + "epoch": 1.16, + "learning_rate": 4.484684865582726e-05, + "loss": 1.0508, + "step": 7276 + }, + { + "epoch": 1.16, + "learning_rate": 4.4845279760054195e-05, + "loss": 1.035, + "step": 7277 + }, + { + "epoch": 1.16, + "learning_rate": 4.484371065294096e-05, + "loss": 1.0777, + "step": 7278 + }, + { + "epoch": 1.16, + "learning_rate": 4.484214133450424e-05, + "loss": 1.0413, + "step": 7279 + }, + { + "epoch": 1.16, + "learning_rate": 4.484057180476077e-05, + "loss": 1.0505, + "step": 7280 + }, + { + "epoch": 1.16, + "learning_rate": 4.4839002063727257e-05, + "loss": 1.0158, + "step": 7281 + }, + { + "epoch": 1.16, + "learning_rate": 4.483743211142042e-05, + "loss": 1.1022, + "step": 7282 + }, + { + "epoch": 1.16, + "learning_rate": 4.483586194785697e-05, + "loss": 1.0515, + "step": 7283 + }, + { + "epoch": 1.16, + "learning_rate": 4.4834291573053636e-05, + "loss": 1.0584, + "step": 7284 + }, + { + "epoch": 1.16, + "learning_rate": 4.4832720987027134e-05, + "loss": 1.0892, + "step": 7285 + }, + { + "epoch": 1.16, + "learning_rate": 4.483115018979419e-05, + "loss": 1.0827, + "step": 7286 + }, + { + "epoch": 1.16, + "learning_rate": 4.482957918137154e-05, + "loss": 1.0518, + "step": 7287 + }, + { + "epoch": 1.16, + "learning_rate": 4.482800796177591e-05, + "loss": 1.0518, + "step": 7288 + }, + { + "epoch": 1.16, + "learning_rate": 4.4826436531024043e-05, + "loss": 1.0961, + "step": 7289 + }, + { + "epoch": 1.16, + "learning_rate": 4.482486488913265e-05, + "loss": 1.0595, + "step": 7290 + }, + { + "epoch": 1.16, + "learning_rate": 4.4823293036118483e-05, + "loss": 0.9928, + "step": 7291 + }, + { + "epoch": 1.16, + "learning_rate": 4.482172097199829e-05, + "loss": 1.1017, + "step": 7292 + }, + { + "epoch": 1.16, + "learning_rate": 4.48201486967888e-05, + "loss": 1.027, + "step": 7293 + }, + { + "epoch": 1.16, + "learning_rate": 4.4818576210506755e-05, + "loss": 1.054, + "step": 7294 + }, + { + "epoch": 1.16, + "learning_rate": 4.481700351316891e-05, + "loss": 0.9947, + "step": 7295 + }, + { + "epoch": 1.16, + "learning_rate": 4.481543060479201e-05, + "loss": 1.0364, + "step": 7296 + }, + { + "epoch": 1.16, + "learning_rate": 4.48138574853928e-05, + "loss": 1.067, + "step": 7297 + }, + { + "epoch": 1.16, + "learning_rate": 4.481228415498803e-05, + "loss": 0.9529, + "step": 7298 + }, + { + "epoch": 1.16, + "learning_rate": 4.481071061359448e-05, + "loss": 1.0475, + "step": 7299 + }, + { + "epoch": 1.16, + "learning_rate": 4.480913686122887e-05, + "loss": 1.0492, + "step": 7300 + }, + { + "epoch": 1.16, + "learning_rate": 4.480756289790799e-05, + "loss": 1.0389, + "step": 7301 + }, + { + "epoch": 1.16, + "learning_rate": 4.480598872364858e-05, + "loss": 1.0241, + "step": 7302 + }, + { + "epoch": 1.16, + "learning_rate": 4.480441433846743e-05, + "loss": 1.0264, + "step": 7303 + }, + { + "epoch": 1.16, + "learning_rate": 4.4802839742381285e-05, + "loss": 1.0772, + "step": 7304 + }, + { + "epoch": 1.16, + "learning_rate": 4.480126493540692e-05, + "loss": 1.0006, + "step": 7305 + }, + { + "epoch": 1.16, + "learning_rate": 4.4799689917561106e-05, + "loss": 1.0201, + "step": 7306 + }, + { + "epoch": 1.16, + "learning_rate": 4.479811468886061e-05, + "loss": 1.0656, + "step": 7307 + }, + { + "epoch": 1.16, + "learning_rate": 4.4796539249322214e-05, + "loss": 1.1144, + "step": 7308 + }, + { + "epoch": 1.16, + "learning_rate": 4.47949635989627e-05, + "loss": 1.0649, + "step": 7309 + }, + { + "epoch": 1.16, + "learning_rate": 4.479338773779883e-05, + "loss": 1.0083, + "step": 7310 + }, + { + "epoch": 1.16, + "learning_rate": 4.4791811665847406e-05, + "loss": 1.0187, + "step": 7311 + }, + { + "epoch": 1.16, + "learning_rate": 4.47902353831252e-05, + "loss": 1.0741, + "step": 7312 + }, + { + "epoch": 1.17, + "learning_rate": 4.4788658889649e-05, + "loss": 1.1071, + "step": 7313 + }, + { + "epoch": 1.17, + "learning_rate": 4.4787082185435605e-05, + "loss": 0.9842, + "step": 7314 + }, + { + "epoch": 1.17, + "learning_rate": 4.4785505270501795e-05, + "loss": 1.0084, + "step": 7315 + }, + { + "epoch": 1.17, + "learning_rate": 4.4783928144864365e-05, + "loss": 1.0416, + "step": 7316 + }, + { + "epoch": 1.17, + "learning_rate": 4.478235080854011e-05, + "loss": 1.0835, + "step": 7317 + }, + { + "epoch": 1.17, + "learning_rate": 4.4780773261545827e-05, + "loss": 0.9828, + "step": 7318 + }, + { + "epoch": 1.17, + "learning_rate": 4.477919550389832e-05, + "loss": 0.9272, + "step": 7319 + }, + { + "epoch": 1.17, + "learning_rate": 4.477761753561439e-05, + "loss": 0.9448, + "step": 7320 + }, + { + "epoch": 1.17, + "learning_rate": 4.477603935671084e-05, + "loss": 1.05, + "step": 7321 + }, + { + "epoch": 1.17, + "learning_rate": 4.477446096720447e-05, + "loss": 1.007, + "step": 7322 + }, + { + "epoch": 1.17, + "learning_rate": 4.4772882367112106e-05, + "loss": 1.0614, + "step": 7323 + }, + { + "epoch": 1.17, + "learning_rate": 4.477130355645055e-05, + "loss": 1.0859, + "step": 7324 + }, + { + "epoch": 1.17, + "learning_rate": 4.4769724535236614e-05, + "loss": 1.0389, + "step": 7325 + }, + { + "epoch": 1.17, + "learning_rate": 4.4768145303487106e-05, + "loss": 1.0245, + "step": 7326 + }, + { + "epoch": 1.17, + "learning_rate": 4.4766565861218855e-05, + "loss": 0.9943, + "step": 7327 + }, + { + "epoch": 1.17, + "learning_rate": 4.476498620844869e-05, + "loss": 1.026, + "step": 7328 + }, + { + "epoch": 1.17, + "learning_rate": 4.4763406345193406e-05, + "loss": 0.9595, + "step": 7329 + }, + { + "epoch": 1.17, + "learning_rate": 4.4761826271469845e-05, + "loss": 1.1523, + "step": 7330 + }, + { + "epoch": 1.17, + "learning_rate": 4.4760245987294836e-05, + "loss": 0.961, + "step": 7331 + }, + { + "epoch": 1.17, + "learning_rate": 4.4758665492685204e-05, + "loss": 1.0767, + "step": 7332 + }, + { + "epoch": 1.17, + "learning_rate": 4.4757084787657776e-05, + "loss": 1.0698, + "step": 7333 + }, + { + "epoch": 1.17, + "learning_rate": 4.4755503872229396e-05, + "loss": 1.1033, + "step": 7334 + }, + { + "epoch": 1.17, + "learning_rate": 4.47539227464169e-05, + "loss": 1.0107, + "step": 7335 + }, + { + "epoch": 1.17, + "learning_rate": 4.475234141023711e-05, + "loss": 1.0657, + "step": 7336 + }, + { + "epoch": 1.17, + "learning_rate": 4.475075986370687e-05, + "loss": 1.0687, + "step": 7337 + }, + { + "epoch": 1.17, + "learning_rate": 4.474917810684304e-05, + "loss": 0.9849, + "step": 7338 + }, + { + "epoch": 1.17, + "learning_rate": 4.474759613966245e-05, + "loss": 1.0485, + "step": 7339 + }, + { + "epoch": 1.17, + "learning_rate": 4.4746013962181944e-05, + "loss": 1.0563, + "step": 7340 + }, + { + "epoch": 1.17, + "learning_rate": 4.474443157441839e-05, + "loss": 1.0441, + "step": 7341 + }, + { + "epoch": 1.17, + "learning_rate": 4.474284897638861e-05, + "loss": 1.0182, + "step": 7342 + }, + { + "epoch": 1.17, + "learning_rate": 4.474126616810949e-05, + "loss": 1.0197, + "step": 7343 + }, + { + "epoch": 1.17, + "learning_rate": 4.473968314959786e-05, + "loss": 0.9626, + "step": 7344 + }, + { + "epoch": 1.17, + "learning_rate": 4.473809992087059e-05, + "loss": 0.9985, + "step": 7345 + }, + { + "epoch": 1.17, + "learning_rate": 4.473651648194455e-05, + "loss": 1.001, + "step": 7346 + }, + { + "epoch": 1.17, + "learning_rate": 4.4734932832836585e-05, + "loss": 1.0544, + "step": 7347 + }, + { + "epoch": 1.17, + "learning_rate": 4.473334897356356e-05, + "loss": 0.975, + "step": 7348 + }, + { + "epoch": 1.17, + "learning_rate": 4.4731764904142354e-05, + "loss": 1.0083, + "step": 7349 + }, + { + "epoch": 1.17, + "learning_rate": 4.4730180624589836e-05, + "loss": 1.0465, + "step": 7350 + }, + { + "epoch": 1.17, + "learning_rate": 4.472859613492286e-05, + "loss": 0.9972, + "step": 7351 + }, + { + "epoch": 1.17, + "learning_rate": 4.472701143515833e-05, + "loss": 1.0873, + "step": 7352 + }, + { + "epoch": 1.17, + "learning_rate": 4.47254265253131e-05, + "loss": 1.0175, + "step": 7353 + }, + { + "epoch": 1.17, + "learning_rate": 4.472384140540404e-05, + "loss": 0.976, + "step": 7354 + }, + { + "epoch": 1.17, + "learning_rate": 4.4722256075448054e-05, + "loss": 0.9876, + "step": 7355 + }, + { + "epoch": 1.17, + "learning_rate": 4.472067053546202e-05, + "loss": 1.0006, + "step": 7356 + }, + { + "epoch": 1.17, + "learning_rate": 4.4719084785462806e-05, + "loss": 1.0613, + "step": 7357 + }, + { + "epoch": 1.17, + "learning_rate": 4.4717498825467313e-05, + "loss": 1.0392, + "step": 7358 + }, + { + "epoch": 1.17, + "learning_rate": 4.4715912655492434e-05, + "loss": 1.062, + "step": 7359 + }, + { + "epoch": 1.17, + "learning_rate": 4.4714326275555055e-05, + "loss": 0.9984, + "step": 7360 + }, + { + "epoch": 1.17, + "learning_rate": 4.471273968567206e-05, + "loss": 0.9345, + "step": 7361 + }, + { + "epoch": 1.17, + "learning_rate": 4.4711152885860363e-05, + "loss": 0.9727, + "step": 7362 + }, + { + "epoch": 1.17, + "learning_rate": 4.470956587613686e-05, + "loss": 1.0213, + "step": 7363 + }, + { + "epoch": 1.17, + "learning_rate": 4.470797865651843e-05, + "loss": 1.0684, + "step": 7364 + }, + { + "epoch": 1.17, + "learning_rate": 4.4706391227022006e-05, + "loss": 1.022, + "step": 7365 + }, + { + "epoch": 1.17, + "learning_rate": 4.470480358766448e-05, + "loss": 0.9303, + "step": 7366 + }, + { + "epoch": 1.17, + "learning_rate": 4.470321573846275e-05, + "loss": 1.0122, + "step": 7367 + }, + { + "epoch": 1.17, + "learning_rate": 4.4701627679433736e-05, + "loss": 1.0139, + "step": 7368 + }, + { + "epoch": 1.17, + "learning_rate": 4.470003941059435e-05, + "loss": 1.0042, + "step": 7369 + }, + { + "epoch": 1.17, + "learning_rate": 4.469845093196151e-05, + "loss": 1.0615, + "step": 7370 + }, + { + "epoch": 1.17, + "learning_rate": 4.469686224355213e-05, + "loss": 1.0014, + "step": 7371 + }, + { + "epoch": 1.17, + "learning_rate": 4.469527334538312e-05, + "loss": 1.0597, + "step": 7372 + }, + { + "epoch": 1.17, + "learning_rate": 4.46936842374714e-05, + "loss": 0.9936, + "step": 7373 + }, + { + "epoch": 1.17, + "learning_rate": 4.46920949198339e-05, + "loss": 1.0137, + "step": 7374 + }, + { + "epoch": 1.17, + "learning_rate": 4.4690505392487545e-05, + "loss": 0.9955, + "step": 7375 + }, + { + "epoch": 1.18, + "learning_rate": 4.468891565544927e-05, + "loss": 1.1194, + "step": 7376 + }, + { + "epoch": 1.18, + "learning_rate": 4.4687325708736e-05, + "loss": 1.0754, + "step": 7377 + }, + { + "epoch": 1.18, + "learning_rate": 4.4685735552364646e-05, + "loss": 1.1458, + "step": 7378 + }, + { + "epoch": 1.18, + "learning_rate": 4.4684145186352166e-05, + "loss": 0.9954, + "step": 7379 + }, + { + "epoch": 1.18, + "learning_rate": 4.46825546107155e-05, + "loss": 0.9914, + "step": 7380 + }, + { + "epoch": 1.18, + "learning_rate": 4.468096382547157e-05, + "loss": 0.8726, + "step": 7381 + }, + { + "epoch": 1.18, + "learning_rate": 4.4679372830637325e-05, + "loss": 1.0894, + "step": 7382 + }, + { + "epoch": 1.18, + "learning_rate": 4.46777816262297e-05, + "loss": 1.018, + "step": 7383 + }, + { + "epoch": 1.18, + "learning_rate": 4.4676190212265654e-05, + "loss": 0.9699, + "step": 7384 + }, + { + "epoch": 1.18, + "learning_rate": 4.467459858876213e-05, + "loss": 1.0263, + "step": 7385 + }, + { + "epoch": 1.18, + "learning_rate": 4.467300675573608e-05, + "loss": 0.9359, + "step": 7386 + }, + { + "epoch": 1.18, + "learning_rate": 4.467141471320444e-05, + "loss": 1.1257, + "step": 7387 + }, + { + "epoch": 1.18, + "learning_rate": 4.466982246118418e-05, + "loss": 0.994, + "step": 7388 + }, + { + "epoch": 1.18, + "learning_rate": 4.466822999969226e-05, + "loss": 1.0124, + "step": 7389 + }, + { + "epoch": 1.18, + "learning_rate": 4.466663732874562e-05, + "loss": 1.1152, + "step": 7390 + }, + { + "epoch": 1.18, + "learning_rate": 4.466504444836124e-05, + "loss": 0.9858, + "step": 7391 + }, + { + "epoch": 1.18, + "learning_rate": 4.4663451358556076e-05, + "loss": 0.9739, + "step": 7392 + }, + { + "epoch": 1.18, + "learning_rate": 4.4661858059347084e-05, + "loss": 0.9367, + "step": 7393 + }, + { + "epoch": 1.18, + "learning_rate": 4.466026455075125e-05, + "loss": 1.0029, + "step": 7394 + }, + { + "epoch": 1.18, + "learning_rate": 4.465867083278553e-05, + "loss": 0.963, + "step": 7395 + }, + { + "epoch": 1.18, + "learning_rate": 4.46570769054669e-05, + "loss": 0.973, + "step": 7396 + }, + { + "epoch": 1.18, + "learning_rate": 4.465548276881234e-05, + "loss": 1.0395, + "step": 7397 + }, + { + "epoch": 1.18, + "learning_rate": 4.465388842283882e-05, + "loss": 0.9176, + "step": 7398 + }, + { + "epoch": 1.18, + "learning_rate": 4.465229386756332e-05, + "loss": 1.0268, + "step": 7399 + }, + { + "epoch": 1.18, + "learning_rate": 4.465069910300282e-05, + "loss": 0.9921, + "step": 7400 + }, + { + "epoch": 1.18, + "learning_rate": 4.46491041291743e-05, + "loss": 0.972, + "step": 7401 + }, + { + "epoch": 1.18, + "learning_rate": 4.464750894609476e-05, + "loss": 1.0651, + "step": 7402 + }, + { + "epoch": 1.18, + "learning_rate": 4.464591355378117e-05, + "loss": 1.0331, + "step": 7403 + }, + { + "epoch": 1.18, + "learning_rate": 4.4644317952250535e-05, + "loss": 0.9979, + "step": 7404 + }, + { + "epoch": 1.18, + "learning_rate": 4.464272214151984e-05, + "loss": 1.0468, + "step": 7405 + }, + { + "epoch": 1.18, + "learning_rate": 4.4641126121606074e-05, + "loss": 0.9727, + "step": 7406 + }, + { + "epoch": 1.18, + "learning_rate": 4.4639529892526244e-05, + "loss": 1.0122, + "step": 7407 + }, + { + "epoch": 1.18, + "learning_rate": 4.4637933454297345e-05, + "loss": 1.0921, + "step": 7408 + }, + { + "epoch": 1.18, + "learning_rate": 4.4636336806936376e-05, + "loss": 1.0008, + "step": 7409 + }, + { + "epoch": 1.18, + "learning_rate": 4.4634739950460334e-05, + "loss": 1.0552, + "step": 7410 + }, + { + "epoch": 1.18, + "learning_rate": 4.463314288488624e-05, + "loss": 1.0584, + "step": 7411 + }, + { + "epoch": 1.18, + "learning_rate": 4.463154561023109e-05, + "loss": 1.0379, + "step": 7412 + }, + { + "epoch": 1.18, + "learning_rate": 4.46299481265119e-05, + "loss": 0.9142, + "step": 7413 + }, + { + "epoch": 1.18, + "learning_rate": 4.462835043374568e-05, + "loss": 0.9477, + "step": 7414 + }, + { + "epoch": 1.18, + "learning_rate": 4.4626752531949444e-05, + "loss": 0.9914, + "step": 7415 + }, + { + "epoch": 1.18, + "learning_rate": 4.462515442114021e-05, + "loss": 0.9967, + "step": 7416 + }, + { + "epoch": 1.18, + "learning_rate": 4.4623556101334994e-05, + "loss": 1.0472, + "step": 7417 + }, + { + "epoch": 1.18, + "learning_rate": 4.462195757255082e-05, + "loss": 0.9579, + "step": 7418 + }, + { + "epoch": 1.18, + "learning_rate": 4.462035883480471e-05, + "loss": 1.0051, + "step": 7419 + }, + { + "epoch": 1.18, + "learning_rate": 4.4618759888113694e-05, + "loss": 1.039, + "step": 7420 + }, + { + "epoch": 1.18, + "learning_rate": 4.461716073249479e-05, + "loss": 1.0127, + "step": 7421 + }, + { + "epoch": 1.18, + "learning_rate": 4.4615561367965044e-05, + "loss": 1.0986, + "step": 7422 + }, + { + "epoch": 1.18, + "learning_rate": 4.461396179454147e-05, + "loss": 0.9917, + "step": 7423 + }, + { + "epoch": 1.18, + "learning_rate": 4.461236201224111e-05, + "loss": 0.9518, + "step": 7424 + }, + { + "epoch": 1.18, + "learning_rate": 4.461076202108101e-05, + "loss": 1.0686, + "step": 7425 + }, + { + "epoch": 1.18, + "learning_rate": 4.460916182107819e-05, + "loss": 1.0758, + "step": 7426 + }, + { + "epoch": 1.18, + "learning_rate": 4.460756141224971e-05, + "loss": 0.9758, + "step": 7427 + }, + { + "epoch": 1.18, + "learning_rate": 4.46059607946126e-05, + "loss": 1.0224, + "step": 7428 + }, + { + "epoch": 1.18, + "learning_rate": 4.460435996818391e-05, + "loss": 1.0327, + "step": 7429 + }, + { + "epoch": 1.18, + "learning_rate": 4.4602758932980694e-05, + "loss": 1.0079, + "step": 7430 + }, + { + "epoch": 1.18, + "learning_rate": 4.460115768901999e-05, + "loss": 0.9867, + "step": 7431 + }, + { + "epoch": 1.18, + "learning_rate": 4.4599556236318864e-05, + "loss": 1.0792, + "step": 7432 + }, + { + "epoch": 1.18, + "learning_rate": 4.459795457489435e-05, + "loss": 0.9876, + "step": 7433 + }, + { + "epoch": 1.18, + "learning_rate": 4.459635270476353e-05, + "loss": 0.9883, + "step": 7434 + }, + { + "epoch": 1.18, + "learning_rate": 4.459475062594345e-05, + "loss": 1.0682, + "step": 7435 + }, + { + "epoch": 1.18, + "learning_rate": 4.4593148338451174e-05, + "loss": 0.979, + "step": 7436 + }, + { + "epoch": 1.18, + "learning_rate": 4.459154584230376e-05, + "loss": 1.0295, + "step": 7437 + }, + { + "epoch": 1.18, + "learning_rate": 4.4589943137518275e-05, + "loss": 1.0249, + "step": 7438 + }, + { + "epoch": 1.19, + "learning_rate": 4.458834022411179e-05, + "loss": 1.0081, + "step": 7439 + }, + { + "epoch": 1.19, + "learning_rate": 4.458673710210137e-05, + "loss": 1.0511, + "step": 7440 + }, + { + "epoch": 1.19, + "learning_rate": 4.4585133771504095e-05, + "loss": 1.0638, + "step": 7441 + }, + { + "epoch": 1.19, + "learning_rate": 4.458353023233704e-05, + "loss": 0.9528, + "step": 7442 + }, + { + "epoch": 1.19, + "learning_rate": 4.4581926484617275e-05, + "loss": 1.045, + "step": 7443 + }, + { + "epoch": 1.19, + "learning_rate": 4.458032252836188e-05, + "loss": 1.0709, + "step": 7444 + }, + { + "epoch": 1.19, + "learning_rate": 4.4578718363587934e-05, + "loss": 0.9736, + "step": 7445 + }, + { + "epoch": 1.19, + "learning_rate": 4.457711399031253e-05, + "loss": 0.9794, + "step": 7446 + }, + { + "epoch": 1.19, + "learning_rate": 4.457550940855275e-05, + "loss": 0.9851, + "step": 7447 + }, + { + "epoch": 1.19, + "learning_rate": 4.457390461832567e-05, + "loss": 1.0104, + "step": 7448 + }, + { + "epoch": 1.19, + "learning_rate": 4.457229961964839e-05, + "loss": 0.9788, + "step": 7449 + }, + { + "epoch": 1.19, + "learning_rate": 4.457069441253801e-05, + "loss": 0.9382, + "step": 7450 + }, + { + "epoch": 1.19, + "learning_rate": 4.456908899701161e-05, + "loss": 1.1463, + "step": 7451 + }, + { + "epoch": 1.19, + "learning_rate": 4.45674833730863e-05, + "loss": 0.9967, + "step": 7452 + }, + { + "epoch": 1.19, + "learning_rate": 4.456587754077917e-05, + "loss": 0.9736, + "step": 7453 + }, + { + "epoch": 1.19, + "learning_rate": 4.456427150010731e-05, + "loss": 1.0566, + "step": 7454 + }, + { + "epoch": 1.19, + "learning_rate": 4.456266525108786e-05, + "loss": 0.9757, + "step": 7455 + }, + { + "epoch": 1.19, + "learning_rate": 4.456105879373787e-05, + "loss": 1.0107, + "step": 7456 + }, + { + "epoch": 1.19, + "learning_rate": 4.455945212807451e-05, + "loss": 0.9768, + "step": 7457 + }, + { + "epoch": 1.19, + "learning_rate": 4.455784525411484e-05, + "loss": 1.0127, + "step": 7458 + }, + { + "epoch": 1.19, + "learning_rate": 4.4556238171876005e-05, + "loss": 0.9591, + "step": 7459 + }, + { + "epoch": 1.19, + "learning_rate": 4.4554630881375096e-05, + "loss": 1.0085, + "step": 7460 + }, + { + "epoch": 1.19, + "learning_rate": 4.4553023382629246e-05, + "loss": 0.9989, + "step": 7461 + }, + { + "epoch": 1.19, + "learning_rate": 4.4551415675655566e-05, + "loss": 1.0501, + "step": 7462 + }, + { + "epoch": 1.19, + "learning_rate": 4.454980776047118e-05, + "loss": 1.0772, + "step": 7463 + }, + { + "epoch": 1.19, + "learning_rate": 4.4548199637093214e-05, + "loss": 0.9958, + "step": 7464 + }, + { + "epoch": 1.19, + "learning_rate": 4.4546591305538784e-05, + "loss": 1.0355, + "step": 7465 + }, + { + "epoch": 1.19, + "learning_rate": 4.4544982765825024e-05, + "loss": 1.0492, + "step": 7466 + }, + { + "epoch": 1.19, + "learning_rate": 4.4543374017969065e-05, + "loss": 0.9448, + "step": 7467 + }, + { + "epoch": 1.19, + "learning_rate": 4.454176506198804e-05, + "loss": 0.9837, + "step": 7468 + }, + { + "epoch": 1.19, + "learning_rate": 4.454015589789907e-05, + "loss": 1.0592, + "step": 7469 + }, + { + "epoch": 1.19, + "learning_rate": 4.453854652571931e-05, + "loss": 1.0124, + "step": 7470 + }, + { + "epoch": 1.19, + "learning_rate": 4.45369369454659e-05, + "loss": 1.0964, + "step": 7471 + }, + { + "epoch": 1.19, + "learning_rate": 4.4535327157155966e-05, + "loss": 1.0129, + "step": 7472 + }, + { + "epoch": 1.19, + "learning_rate": 4.4533717160806654e-05, + "loss": 1.0426, + "step": 7473 + }, + { + "epoch": 1.19, + "learning_rate": 4.453210695643512e-05, + "loss": 0.9865, + "step": 7474 + }, + { + "epoch": 1.19, + "learning_rate": 4.4530496544058504e-05, + "loss": 1.0107, + "step": 7475 + }, + { + "epoch": 1.19, + "learning_rate": 4.4528885923693954e-05, + "loss": 1.019, + "step": 7476 + }, + { + "epoch": 1.19, + "learning_rate": 4.4527275095358625e-05, + "loss": 1.0199, + "step": 7477 + }, + { + "epoch": 1.19, + "learning_rate": 4.452566405906967e-05, + "loss": 0.9788, + "step": 7478 + }, + { + "epoch": 1.19, + "learning_rate": 4.452405281484425e-05, + "loss": 1.033, + "step": 7479 + }, + { + "epoch": 1.19, + "learning_rate": 4.4522441362699526e-05, + "loss": 1.0105, + "step": 7480 + }, + { + "epoch": 1.19, + "learning_rate": 4.452082970265265e-05, + "loss": 0.961, + "step": 7481 + }, + { + "epoch": 1.19, + "learning_rate": 4.451921783472079e-05, + "loss": 1.0299, + "step": 7482 + }, + { + "epoch": 1.19, + "learning_rate": 4.4517605758921106e-05, + "loss": 0.9888, + "step": 7483 + }, + { + "epoch": 1.19, + "learning_rate": 4.4515993475270776e-05, + "loss": 0.9983, + "step": 7484 + }, + { + "epoch": 1.19, + "learning_rate": 4.451438098378696e-05, + "loss": 1.0221, + "step": 7485 + }, + { + "epoch": 1.19, + "learning_rate": 4.451276828448683e-05, + "loss": 1.0449, + "step": 7486 + }, + { + "epoch": 1.19, + "learning_rate": 4.451115537738757e-05, + "loss": 0.9823, + "step": 7487 + }, + { + "epoch": 1.19, + "learning_rate": 4.450954226250635e-05, + "loss": 1.0188, + "step": 7488 + }, + { + "epoch": 1.19, + "learning_rate": 4.450792893986035e-05, + "loss": 1.0719, + "step": 7489 + }, + { + "epoch": 1.19, + "learning_rate": 4.450631540946675e-05, + "loss": 1.0688, + "step": 7490 + }, + { + "epoch": 1.19, + "learning_rate": 4.450470167134273e-05, + "loss": 1.0453, + "step": 7491 + }, + { + "epoch": 1.19, + "learning_rate": 4.450308772550548e-05, + "loss": 1.036, + "step": 7492 + }, + { + "epoch": 1.19, + "learning_rate": 4.450147357197219e-05, + "loss": 1.018, + "step": 7493 + }, + { + "epoch": 1.19, + "learning_rate": 4.4499859210760044e-05, + "loss": 0.9793, + "step": 7494 + }, + { + "epoch": 1.19, + "learning_rate": 4.449824464188623e-05, + "loss": 1.0857, + "step": 7495 + }, + { + "epoch": 1.19, + "learning_rate": 4.449662986536796e-05, + "loss": 0.9854, + "step": 7496 + }, + { + "epoch": 1.19, + "learning_rate": 4.4495014881222414e-05, + "loss": 1.0304, + "step": 7497 + }, + { + "epoch": 1.19, + "learning_rate": 4.4493399689466795e-05, + "loss": 1.0182, + "step": 7498 + }, + { + "epoch": 1.19, + "learning_rate": 4.44917842901183e-05, + "loss": 0.93, + "step": 7499 + }, + { + "epoch": 1.19, + "learning_rate": 4.4490168683194145e-05, + "loss": 0.9384, + "step": 7500 + }, + { + "epoch": 1.19, + "learning_rate": 4.448855286871152e-05, + "loss": 1.0645, + "step": 7501 + }, + { + "epoch": 1.2, + "learning_rate": 4.448693684668763e-05, + "loss": 1.02, + "step": 7502 + }, + { + "epoch": 1.2, + "learning_rate": 4.448532061713971e-05, + "loss": 0.9673, + "step": 7503 + }, + { + "epoch": 1.2, + "learning_rate": 4.4483704180084944e-05, + "loss": 0.9734, + "step": 7504 + }, + { + "epoch": 1.2, + "learning_rate": 4.448208753554056e-05, + "loss": 0.9891, + "step": 7505 + }, + { + "epoch": 1.2, + "learning_rate": 4.448047068352378e-05, + "loss": 1.0526, + "step": 7506 + }, + { + "epoch": 1.2, + "learning_rate": 4.4478853624051805e-05, + "loss": 0.9753, + "step": 7507 + }, + { + "epoch": 1.2, + "learning_rate": 4.447723635714187e-05, + "loss": 1.0055, + "step": 7508 + }, + { + "epoch": 1.2, + "learning_rate": 4.4475618882811186e-05, + "loss": 0.9648, + "step": 7509 + }, + { + "epoch": 1.2, + "learning_rate": 4.4474001201076986e-05, + "loss": 0.9476, + "step": 7510 + }, + { + "epoch": 1.2, + "learning_rate": 4.44723833119565e-05, + "loss": 1.0099, + "step": 7511 + }, + { + "epoch": 1.2, + "learning_rate": 4.4470765215466946e-05, + "loss": 0.9621, + "step": 7512 + }, + { + "epoch": 1.2, + "learning_rate": 4.446914691162557e-05, + "loss": 0.9924, + "step": 7513 + }, + { + "epoch": 1.2, + "learning_rate": 4.44675284004496e-05, + "loss": 0.9668, + "step": 7514 + }, + { + "epoch": 1.2, + "learning_rate": 4.446590968195628e-05, + "loss": 1.1059, + "step": 7515 + }, + { + "epoch": 1.2, + "learning_rate": 4.4464290756162826e-05, + "loss": 1.1115, + "step": 7516 + }, + { + "epoch": 1.2, + "learning_rate": 4.4462671623086494e-05, + "loss": 0.9802, + "step": 7517 + }, + { + "epoch": 1.2, + "learning_rate": 4.446105228274453e-05, + "loss": 0.9891, + "step": 7518 + }, + { + "epoch": 1.2, + "learning_rate": 4.445943273515417e-05, + "loss": 1.0706, + "step": 7519 + }, + { + "epoch": 1.2, + "learning_rate": 4.445781298033267e-05, + "loss": 0.9744, + "step": 7520 + }, + { + "epoch": 1.2, + "learning_rate": 4.445619301829728e-05, + "loss": 0.9797, + "step": 7521 + }, + { + "epoch": 1.2, + "learning_rate": 4.445457284906523e-05, + "loss": 1.0083, + "step": 7522 + }, + { + "epoch": 1.2, + "learning_rate": 4.44529524726538e-05, + "loss": 1.0221, + "step": 7523 + }, + { + "epoch": 1.2, + "learning_rate": 4.4451331889080236e-05, + "loss": 1.0599, + "step": 7524 + }, + { + "epoch": 1.2, + "learning_rate": 4.44497110983618e-05, + "loss": 0.9176, + "step": 7525 + }, + { + "epoch": 1.2, + "learning_rate": 4.4448090100515745e-05, + "loss": 0.9707, + "step": 7526 + }, + { + "epoch": 1.2, + "learning_rate": 4.444646889555934e-05, + "loss": 1.0102, + "step": 7527 + }, + { + "epoch": 1.2, + "learning_rate": 4.4444847483509846e-05, + "loss": 0.9928, + "step": 7528 + }, + { + "epoch": 1.2, + "learning_rate": 4.444322586438453e-05, + "loss": 1.052, + "step": 7529 + }, + { + "epoch": 1.2, + "learning_rate": 4.444160403820066e-05, + "loss": 1.0541, + "step": 7530 + }, + { + "epoch": 1.2, + "learning_rate": 4.4439982004975525e-05, + "loss": 1.0508, + "step": 7531 + }, + { + "epoch": 1.2, + "learning_rate": 4.443835976472637e-05, + "loss": 1.0061, + "step": 7532 + }, + { + "epoch": 1.2, + "learning_rate": 4.443673731747049e-05, + "loss": 0.9941, + "step": 7533 + }, + { + "epoch": 1.2, + "learning_rate": 4.4435114663225154e-05, + "loss": 0.9862, + "step": 7534 + }, + { + "epoch": 1.2, + "learning_rate": 4.443349180200764e-05, + "loss": 1.018, + "step": 7535 + }, + { + "epoch": 1.2, + "learning_rate": 4.4431868733835244e-05, + "loss": 0.9395, + "step": 7536 + }, + { + "epoch": 1.2, + "learning_rate": 4.443024545872525e-05, + "loss": 1.0714, + "step": 7537 + }, + { + "epoch": 1.2, + "learning_rate": 4.442862197669493e-05, + "loss": 1.095, + "step": 7538 + }, + { + "epoch": 1.2, + "learning_rate": 4.442699828776158e-05, + "loss": 1.0435, + "step": 7539 + }, + { + "epoch": 1.2, + "learning_rate": 4.4425374391942506e-05, + "loss": 0.9404, + "step": 7540 + }, + { + "epoch": 1.2, + "learning_rate": 4.442375028925497e-05, + "loss": 1.0466, + "step": 7541 + }, + { + "epoch": 1.2, + "learning_rate": 4.44221259797163e-05, + "loss": 0.939, + "step": 7542 + }, + { + "epoch": 1.2, + "learning_rate": 4.4420501463343775e-05, + "loss": 0.9943, + "step": 7543 + }, + { + "epoch": 1.2, + "learning_rate": 4.44188767401547e-05, + "loss": 0.9859, + "step": 7544 + }, + { + "epoch": 1.2, + "learning_rate": 4.441725181016637e-05, + "loss": 1.0892, + "step": 7545 + }, + { + "epoch": 1.2, + "learning_rate": 4.4415626673396106e-05, + "loss": 0.996, + "step": 7546 + }, + { + "epoch": 1.2, + "learning_rate": 4.441400132986121e-05, + "loss": 0.9329, + "step": 7547 + }, + { + "epoch": 1.2, + "learning_rate": 4.441237577957898e-05, + "loss": 1.0256, + "step": 7548 + }, + { + "epoch": 1.2, + "learning_rate": 4.441075002256673e-05, + "loss": 1.0722, + "step": 7549 + }, + { + "epoch": 1.2, + "learning_rate": 4.440912405884178e-05, + "loss": 1.0269, + "step": 7550 + }, + { + "epoch": 1.2, + "learning_rate": 4.440749788842143e-05, + "loss": 0.9738, + "step": 7551 + }, + { + "epoch": 1.2, + "learning_rate": 4.440587151132303e-05, + "loss": 1.0435, + "step": 7552 + }, + { + "epoch": 1.2, + "learning_rate": 4.440424492756387e-05, + "loss": 1.0392, + "step": 7553 + }, + { + "epoch": 1.2, + "learning_rate": 4.440261813716128e-05, + "loss": 0.974, + "step": 7554 + }, + { + "epoch": 1.2, + "learning_rate": 4.4400991140132595e-05, + "loss": 0.9731, + "step": 7555 + }, + { + "epoch": 1.2, + "learning_rate": 4.439936393649513e-05, + "loss": 1.0109, + "step": 7556 + }, + { + "epoch": 1.2, + "learning_rate": 4.4397736526266214e-05, + "loss": 1.0225, + "step": 7557 + }, + { + "epoch": 1.2, + "learning_rate": 4.439610890946318e-05, + "loss": 0.9765, + "step": 7558 + }, + { + "epoch": 1.2, + "learning_rate": 4.439448108610337e-05, + "loss": 1.0199, + "step": 7559 + }, + { + "epoch": 1.2, + "learning_rate": 4.4392853056204097e-05, + "loss": 0.998, + "step": 7560 + }, + { + "epoch": 1.2, + "learning_rate": 4.439122481978272e-05, + "loss": 0.9006, + "step": 7561 + }, + { + "epoch": 1.2, + "learning_rate": 4.4389596376856575e-05, + "loss": 0.9845, + "step": 7562 + }, + { + "epoch": 1.2, + "learning_rate": 4.4387967727442994e-05, + "loss": 0.9312, + "step": 7563 + }, + { + "epoch": 1.21, + "learning_rate": 4.438633887155933e-05, + "loss": 0.9462, + "step": 7564 + }, + { + "epoch": 1.21, + "learning_rate": 4.438470980922292e-05, + "loss": 1.0376, + "step": 7565 + }, + { + "epoch": 1.21, + "learning_rate": 4.438308054045113e-05, + "loss": 1.003, + "step": 7566 + }, + { + "epoch": 1.21, + "learning_rate": 4.43814510652613e-05, + "loss": 1.0013, + "step": 7567 + }, + { + "epoch": 1.21, + "learning_rate": 4.4379821383670786e-05, + "loss": 0.89, + "step": 7568 + }, + { + "epoch": 1.21, + "learning_rate": 4.437819149569693e-05, + "loss": 0.9818, + "step": 7569 + }, + { + "epoch": 1.21, + "learning_rate": 4.43765614013571e-05, + "loss": 0.9395, + "step": 7570 + }, + { + "epoch": 1.21, + "learning_rate": 4.437493110066866e-05, + "loss": 0.9696, + "step": 7571 + }, + { + "epoch": 1.21, + "learning_rate": 4.437330059364896e-05, + "loss": 1.0316, + "step": 7572 + }, + { + "epoch": 1.21, + "learning_rate": 4.4371669880315375e-05, + "loss": 0.9887, + "step": 7573 + }, + { + "epoch": 1.21, + "learning_rate": 4.437003896068527e-05, + "loss": 0.899, + "step": 7574 + }, + { + "epoch": 1.21, + "learning_rate": 4.436840783477601e-05, + "loss": 1.0098, + "step": 7575 + }, + { + "epoch": 1.21, + "learning_rate": 4.4366776502604954e-05, + "loss": 1.0643, + "step": 7576 + }, + { + "epoch": 1.21, + "learning_rate": 4.4365144964189496e-05, + "loss": 1.0112, + "step": 7577 + }, + { + "epoch": 1.21, + "learning_rate": 4.4363513219546995e-05, + "loss": 1.0487, + "step": 7578 + }, + { + "epoch": 1.21, + "learning_rate": 4.436188126869484e-05, + "loss": 1.0094, + "step": 7579 + }, + { + "epoch": 1.21, + "learning_rate": 4.43602491116504e-05, + "loss": 0.9948, + "step": 7580 + }, + { + "epoch": 1.21, + "learning_rate": 4.435861674843106e-05, + "loss": 0.9713, + "step": 7581 + }, + { + "epoch": 1.21, + "learning_rate": 4.435698417905421e-05, + "loss": 1.0517, + "step": 7582 + }, + { + "epoch": 1.21, + "learning_rate": 4.435535140353723e-05, + "loss": 1.039, + "step": 7583 + }, + { + "epoch": 1.21, + "learning_rate": 4.435371842189751e-05, + "loss": 1.029, + "step": 7584 + }, + { + "epoch": 1.21, + "learning_rate": 4.435208523415243e-05, + "loss": 0.9747, + "step": 7585 + }, + { + "epoch": 1.21, + "learning_rate": 4.43504518403194e-05, + "loss": 0.9766, + "step": 7586 + }, + { + "epoch": 1.21, + "learning_rate": 4.43488182404158e-05, + "loss": 0.95, + "step": 7587 + }, + { + "epoch": 1.21, + "learning_rate": 4.434718443445903e-05, + "loss": 1.0856, + "step": 7588 + }, + { + "epoch": 1.21, + "learning_rate": 4.43455504224665e-05, + "loss": 0.9842, + "step": 7589 + }, + { + "epoch": 1.21, + "learning_rate": 4.43439162044556e-05, + "loss": 0.9292, + "step": 7590 + }, + { + "epoch": 1.21, + "learning_rate": 4.434228178044373e-05, + "loss": 0.9969, + "step": 7591 + }, + { + "epoch": 1.21, + "learning_rate": 4.4340647150448314e-05, + "loss": 1.0205, + "step": 7592 + }, + { + "epoch": 1.21, + "learning_rate": 4.433901231448674e-05, + "loss": 1.0133, + "step": 7593 + }, + { + "epoch": 1.21, + "learning_rate": 4.433737727257643e-05, + "loss": 0.9725, + "step": 7594 + }, + { + "epoch": 1.21, + "learning_rate": 4.433574202473478e-05, + "loss": 0.9395, + "step": 7595 + }, + { + "epoch": 1.21, + "learning_rate": 4.433410657097923e-05, + "loss": 0.9827, + "step": 7596 + }, + { + "epoch": 1.21, + "learning_rate": 4.433247091132719e-05, + "loss": 0.9585, + "step": 7597 + }, + { + "epoch": 1.21, + "learning_rate": 4.433083504579605e-05, + "loss": 0.9839, + "step": 7598 + }, + { + "epoch": 1.21, + "learning_rate": 4.432919897440326e-05, + "loss": 1.0327, + "step": 7599 + }, + { + "epoch": 1.21, + "learning_rate": 4.4327562697166244e-05, + "loss": 1.0291, + "step": 7600 + }, + { + "epoch": 1.21, + "learning_rate": 4.432592621410242e-05, + "loss": 1.1207, + "step": 7601 + }, + { + "epoch": 1.21, + "learning_rate": 4.432428952522921e-05, + "loss": 1.0091, + "step": 7602 + }, + { + "epoch": 1.21, + "learning_rate": 4.4322652630564046e-05, + "loss": 0.9742, + "step": 7603 + }, + { + "epoch": 1.21, + "learning_rate": 4.4321015530124365e-05, + "loss": 0.9612, + "step": 7604 + }, + { + "epoch": 1.21, + "learning_rate": 4.43193782239276e-05, + "loss": 0.9934, + "step": 7605 + }, + { + "epoch": 1.21, + "learning_rate": 4.431774071199118e-05, + "loss": 1.0094, + "step": 7606 + }, + { + "epoch": 1.21, + "learning_rate": 4.431610299433255e-05, + "loss": 1.0229, + "step": 7607 + }, + { + "epoch": 1.21, + "learning_rate": 4.431446507096916e-05, + "loss": 1.0251, + "step": 7608 + }, + { + "epoch": 1.21, + "learning_rate": 4.431282694191844e-05, + "loss": 1.0142, + "step": 7609 + }, + { + "epoch": 1.21, + "learning_rate": 4.4311188607197836e-05, + "loss": 1.068, + "step": 7610 + }, + { + "epoch": 1.21, + "learning_rate": 4.43095500668248e-05, + "loss": 1.0098, + "step": 7611 + }, + { + "epoch": 1.21, + "learning_rate": 4.430791132081678e-05, + "loss": 1.0338, + "step": 7612 + }, + { + "epoch": 1.21, + "learning_rate": 4.430627236919121e-05, + "loss": 0.9907, + "step": 7613 + }, + { + "epoch": 1.21, + "learning_rate": 4.4304633211965576e-05, + "loss": 0.9967, + "step": 7614 + }, + { + "epoch": 1.21, + "learning_rate": 4.430299384915731e-05, + "loss": 0.9478, + "step": 7615 + }, + { + "epoch": 1.21, + "learning_rate": 4.430135428078389e-05, + "loss": 0.9553, + "step": 7616 + }, + { + "epoch": 1.21, + "learning_rate": 4.429971450686276e-05, + "loss": 0.9849, + "step": 7617 + }, + { + "epoch": 1.21, + "learning_rate": 4.4298074527411384e-05, + "loss": 0.9817, + "step": 7618 + }, + { + "epoch": 1.21, + "learning_rate": 4.429643434244723e-05, + "loss": 0.9363, + "step": 7619 + }, + { + "epoch": 1.21, + "learning_rate": 4.429479395198777e-05, + "loss": 0.9403, + "step": 7620 + }, + { + "epoch": 1.21, + "learning_rate": 4.429315335605047e-05, + "loss": 0.9701, + "step": 7621 + }, + { + "epoch": 1.21, + "learning_rate": 4.42915125546528e-05, + "loss": 0.9859, + "step": 7622 + }, + { + "epoch": 1.21, + "learning_rate": 4.428987154781223e-05, + "loss": 1.0054, + "step": 7623 + }, + { + "epoch": 1.21, + "learning_rate": 4.428823033554623e-05, + "loss": 0.9683, + "step": 7624 + }, + { + "epoch": 1.21, + "learning_rate": 4.428658891787231e-05, + "loss": 0.9119, + "step": 7625 + }, + { + "epoch": 1.21, + "learning_rate": 4.428494729480791e-05, + "loss": 0.971, + "step": 7626 + }, + { + "epoch": 1.22, + "learning_rate": 4.428330546637053e-05, + "loss": 0.9656, + "step": 7627 + }, + { + "epoch": 1.22, + "learning_rate": 4.428166343257766e-05, + "loss": 0.9456, + "step": 7628 + }, + { + "epoch": 1.22, + "learning_rate": 4.4280021193446774e-05, + "loss": 1.0749, + "step": 7629 + }, + { + "epoch": 1.22, + "learning_rate": 4.4278378748995375e-05, + "loss": 1.0123, + "step": 7630 + }, + { + "epoch": 1.22, + "learning_rate": 4.427673609924094e-05, + "loss": 1.0151, + "step": 7631 + }, + { + "epoch": 1.22, + "learning_rate": 4.4275093244200974e-05, + "loss": 0.954, + "step": 7632 + }, + { + "epoch": 1.22, + "learning_rate": 4.427345018389296e-05, + "loss": 0.9543, + "step": 7633 + }, + { + "epoch": 1.22, + "learning_rate": 4.4271806918334414e-05, + "loss": 1.0641, + "step": 7634 + }, + { + "epoch": 1.22, + "learning_rate": 4.4270163447542814e-05, + "loss": 0.8915, + "step": 7635 + }, + { + "epoch": 1.22, + "learning_rate": 4.426851977153568e-05, + "loss": 1.0212, + "step": 7636 + }, + { + "epoch": 1.22, + "learning_rate": 4.4266875890330506e-05, + "loss": 1.1001, + "step": 7637 + }, + { + "epoch": 1.22, + "learning_rate": 4.42652318039448e-05, + "loss": 1.001, + "step": 7638 + }, + { + "epoch": 1.22, + "learning_rate": 4.426358751239607e-05, + "loss": 0.8862, + "step": 7639 + }, + { + "epoch": 1.22, + "learning_rate": 4.426194301570183e-05, + "loss": 0.924, + "step": 7640 + }, + { + "epoch": 1.22, + "learning_rate": 4.426029831387959e-05, + "loss": 1.0257, + "step": 7641 + }, + { + "epoch": 1.22, + "learning_rate": 4.425865340694687e-05, + "loss": 0.9058, + "step": 7642 + }, + { + "epoch": 1.22, + "learning_rate": 4.4257008294921173e-05, + "loss": 0.9676, + "step": 7643 + }, + { + "epoch": 1.22, + "learning_rate": 4.4255362977820044e-05, + "loss": 1.0357, + "step": 7644 + }, + { + "epoch": 1.22, + "learning_rate": 4.425371745566098e-05, + "loss": 1.0462, + "step": 7645 + }, + { + "epoch": 1.22, + "learning_rate": 4.425207172846151e-05, + "loss": 1.0098, + "step": 7646 + }, + { + "epoch": 1.22, + "learning_rate": 4.4250425796239174e-05, + "loss": 1.0732, + "step": 7647 + }, + { + "epoch": 1.22, + "learning_rate": 4.424877965901149e-05, + "loss": 1.03, + "step": 7648 + }, + { + "epoch": 1.22, + "learning_rate": 4.424713331679598e-05, + "loss": 1.0346, + "step": 7649 + }, + { + "epoch": 1.22, + "learning_rate": 4.4245486769610196e-05, + "loss": 1.0078, + "step": 7650 + }, + { + "epoch": 1.22, + "learning_rate": 4.424384001747166e-05, + "loss": 1.0091, + "step": 7651 + }, + { + "epoch": 1.22, + "learning_rate": 4.424219306039791e-05, + "loss": 0.9299, + "step": 7652 + }, + { + "epoch": 1.22, + "learning_rate": 4.4240545898406486e-05, + "loss": 1.0086, + "step": 7653 + }, + { + "epoch": 1.22, + "learning_rate": 4.4238898531514925e-05, + "loss": 1.008, + "step": 7654 + }, + { + "epoch": 1.22, + "learning_rate": 4.423725095974077e-05, + "loss": 1.0544, + "step": 7655 + }, + { + "epoch": 1.22, + "learning_rate": 4.423560318310158e-05, + "loss": 1.0887, + "step": 7656 + }, + { + "epoch": 1.22, + "learning_rate": 4.4233955201614896e-05, + "loss": 0.9285, + "step": 7657 + }, + { + "epoch": 1.22, + "learning_rate": 4.423230701529827e-05, + "loss": 1.0149, + "step": 7658 + }, + { + "epoch": 1.22, + "learning_rate": 4.423065862416924e-05, + "loss": 0.9153, + "step": 7659 + }, + { + "epoch": 1.22, + "learning_rate": 4.422901002824538e-05, + "loss": 1.0133, + "step": 7660 + }, + { + "epoch": 1.22, + "learning_rate": 4.422736122754423e-05, + "loss": 0.9694, + "step": 7661 + }, + { + "epoch": 1.22, + "learning_rate": 4.422571222208336e-05, + "loss": 1.0174, + "step": 7662 + }, + { + "epoch": 1.22, + "learning_rate": 4.422406301188032e-05, + "loss": 0.9405, + "step": 7663 + }, + { + "epoch": 1.22, + "learning_rate": 4.422241359695269e-05, + "loss": 1.0633, + "step": 7664 + }, + { + "epoch": 1.22, + "learning_rate": 4.4220763977318026e-05, + "loss": 0.9471, + "step": 7665 + }, + { + "epoch": 1.22, + "learning_rate": 4.4219114152993886e-05, + "loss": 0.9601, + "step": 7666 + }, + { + "epoch": 1.22, + "learning_rate": 4.4217464123997854e-05, + "loss": 0.9889, + "step": 7667 + }, + { + "epoch": 1.22, + "learning_rate": 4.4215813890347495e-05, + "loss": 0.926, + "step": 7668 + }, + { + "epoch": 1.22, + "learning_rate": 4.4214163452060386e-05, + "loss": 1.0076, + "step": 7669 + }, + { + "epoch": 1.22, + "learning_rate": 4.421251280915409e-05, + "loss": 1.0003, + "step": 7670 + }, + { + "epoch": 1.22, + "learning_rate": 4.421086196164621e-05, + "loss": 0.8953, + "step": 7671 + }, + { + "epoch": 1.22, + "learning_rate": 4.420921090955431e-05, + "loss": 1.0133, + "step": 7672 + }, + { + "epoch": 1.22, + "learning_rate": 4.420755965289598e-05, + "loss": 0.9593, + "step": 7673 + }, + { + "epoch": 1.22, + "learning_rate": 4.420590819168879e-05, + "loss": 0.9033, + "step": 7674 + }, + { + "epoch": 1.22, + "learning_rate": 4.420425652595035e-05, + "loss": 0.9806, + "step": 7675 + }, + { + "epoch": 1.22, + "learning_rate": 4.4202604655698234e-05, + "loss": 1.0207, + "step": 7676 + }, + { + "epoch": 1.22, + "learning_rate": 4.420095258095004e-05, + "loss": 0.8814, + "step": 7677 + }, + { + "epoch": 1.22, + "learning_rate": 4.419930030172334e-05, + "loss": 0.9959, + "step": 7678 + }, + { + "epoch": 1.22, + "learning_rate": 4.419764781803577e-05, + "loss": 0.9596, + "step": 7679 + }, + { + "epoch": 1.22, + "learning_rate": 4.419599512990489e-05, + "loss": 0.9951, + "step": 7680 + }, + { + "epoch": 1.22, + "learning_rate": 4.419434223734833e-05, + "loss": 1.0194, + "step": 7681 + }, + { + "epoch": 1.22, + "learning_rate": 4.419268914038367e-05, + "loss": 0.9587, + "step": 7682 + }, + { + "epoch": 1.22, + "learning_rate": 4.419103583902852e-05, + "loss": 1.0178, + "step": 7683 + }, + { + "epoch": 1.22, + "learning_rate": 4.41893823333005e-05, + "loss": 0.9264, + "step": 7684 + }, + { + "epoch": 1.22, + "learning_rate": 4.41877286232172e-05, + "loss": 0.939, + "step": 7685 + }, + { + "epoch": 1.22, + "learning_rate": 4.418607470879623e-05, + "loss": 0.9857, + "step": 7686 + }, + { + "epoch": 1.22, + "learning_rate": 4.418442059005523e-05, + "loss": 0.9791, + "step": 7687 + }, + { + "epoch": 1.22, + "learning_rate": 4.418276626701179e-05, + "loss": 0.884, + "step": 7688 + }, + { + "epoch": 1.22, + "learning_rate": 4.4181111739683535e-05, + "loss": 0.9405, + "step": 7689 + }, + { + "epoch": 1.23, + "learning_rate": 4.417945700808809e-05, + "loss": 0.9675, + "step": 7690 + }, + { + "epoch": 1.23, + "learning_rate": 4.417780207224307e-05, + "loss": 0.9233, + "step": 7691 + }, + { + "epoch": 1.23, + "learning_rate": 4.41761469321661e-05, + "loss": 0.9346, + "step": 7692 + }, + { + "epoch": 1.23, + "learning_rate": 4.4174491587874805e-05, + "loss": 0.9691, + "step": 7693 + }, + { + "epoch": 1.23, + "learning_rate": 4.4172836039386825e-05, + "loss": 0.9164, + "step": 7694 + }, + { + "epoch": 1.23, + "learning_rate": 4.417118028671977e-05, + "loss": 1.0636, + "step": 7695 + }, + { + "epoch": 1.23, + "learning_rate": 4.416952432989129e-05, + "loss": 0.998, + "step": 7696 + }, + { + "epoch": 1.23, + "learning_rate": 4.416786816891901e-05, + "loss": 1.0449, + "step": 7697 + }, + { + "epoch": 1.23, + "learning_rate": 4.416621180382057e-05, + "loss": 1.0012, + "step": 7698 + }, + { + "epoch": 1.23, + "learning_rate": 4.4164555234613616e-05, + "loss": 1.019, + "step": 7699 + }, + { + "epoch": 1.23, + "learning_rate": 4.4162898461315785e-05, + "loss": 1.0141, + "step": 7700 + }, + { + "epoch": 1.23, + "learning_rate": 4.416124148394471e-05, + "loss": 0.9823, + "step": 7701 + }, + { + "epoch": 1.23, + "learning_rate": 4.4159584302518054e-05, + "loss": 1.0021, + "step": 7702 + }, + { + "epoch": 1.23, + "learning_rate": 4.415792691705345e-05, + "loss": 0.9498, + "step": 7703 + }, + { + "epoch": 1.23, + "learning_rate": 4.4156269327568564e-05, + "loss": 1.0033, + "step": 7704 + }, + { + "epoch": 1.23, + "learning_rate": 4.4154611534081035e-05, + "loss": 1.0199, + "step": 7705 + }, + { + "epoch": 1.23, + "learning_rate": 4.4152953536608525e-05, + "loss": 1.0633, + "step": 7706 + }, + { + "epoch": 1.23, + "learning_rate": 4.4151295335168686e-05, + "loss": 0.9969, + "step": 7707 + }, + { + "epoch": 1.23, + "learning_rate": 4.414963692977918e-05, + "loss": 1.0042, + "step": 7708 + }, + { + "epoch": 1.23, + "learning_rate": 4.414797832045767e-05, + "loss": 0.9336, + "step": 7709 + }, + { + "epoch": 1.23, + "learning_rate": 4.4146319507221814e-05, + "loss": 0.9417, + "step": 7710 + }, + { + "epoch": 1.23, + "learning_rate": 4.4144660490089274e-05, + "loss": 1.0005, + "step": 7711 + }, + { + "epoch": 1.23, + "learning_rate": 4.414300126907772e-05, + "loss": 0.9399, + "step": 7712 + }, + { + "epoch": 1.23, + "learning_rate": 4.4141341844204836e-05, + "loss": 1.1086, + "step": 7713 + }, + { + "epoch": 1.23, + "learning_rate": 4.4139682215488276e-05, + "loss": 0.9835, + "step": 7714 + }, + { + "epoch": 1.23, + "learning_rate": 4.413802238294572e-05, + "loss": 1.0832, + "step": 7715 + }, + { + "epoch": 1.23, + "learning_rate": 4.413636234659485e-05, + "loss": 1.028, + "step": 7716 + }, + { + "epoch": 1.23, + "learning_rate": 4.413470210645333e-05, + "loss": 0.9815, + "step": 7717 + }, + { + "epoch": 1.23, + "learning_rate": 4.4133041662538856e-05, + "loss": 1.0212, + "step": 7718 + }, + { + "epoch": 1.23, + "learning_rate": 4.41313810148691e-05, + "loss": 1.0024, + "step": 7719 + }, + { + "epoch": 1.23, + "learning_rate": 4.412972016346176e-05, + "loss": 0.9295, + "step": 7720 + }, + { + "epoch": 1.23, + "learning_rate": 4.412805910833451e-05, + "loss": 1.0128, + "step": 7721 + }, + { + "epoch": 1.23, + "learning_rate": 4.412639784950503e-05, + "loss": 0.9451, + "step": 7722 + }, + { + "epoch": 1.23, + "learning_rate": 4.4124736386991044e-05, + "loss": 1.035, + "step": 7723 + }, + { + "epoch": 1.23, + "learning_rate": 4.412307472081022e-05, + "loss": 0.9076, + "step": 7724 + }, + { + "epoch": 1.23, + "learning_rate": 4.4121412850980257e-05, + "loss": 1.0169, + "step": 7725 + }, + { + "epoch": 1.23, + "learning_rate": 4.411975077751885e-05, + "loss": 0.9532, + "step": 7726 + }, + { + "epoch": 1.23, + "learning_rate": 4.411808850044372e-05, + "loss": 0.9467, + "step": 7727 + }, + { + "epoch": 1.23, + "learning_rate": 4.411642601977255e-05, + "loss": 0.9938, + "step": 7728 + }, + { + "epoch": 1.23, + "learning_rate": 4.4114763335523044e-05, + "loss": 0.9589, + "step": 7729 + }, + { + "epoch": 1.23, + "learning_rate": 4.4113100447712915e-05, + "loss": 1.0469, + "step": 7730 + }, + { + "epoch": 1.23, + "learning_rate": 4.411143735635987e-05, + "loss": 0.9518, + "step": 7731 + }, + { + "epoch": 1.23, + "learning_rate": 4.410977406148162e-05, + "loss": 1.0231, + "step": 7732 + }, + { + "epoch": 1.23, + "learning_rate": 4.4108110563095875e-05, + "loss": 0.9781, + "step": 7733 + }, + { + "epoch": 1.23, + "learning_rate": 4.4106446861220364e-05, + "loss": 0.9486, + "step": 7734 + }, + { + "epoch": 1.23, + "learning_rate": 4.410478295587278e-05, + "loss": 1.0605, + "step": 7735 + }, + { + "epoch": 1.23, + "learning_rate": 4.410311884707087e-05, + "loss": 1.1125, + "step": 7736 + }, + { + "epoch": 1.23, + "learning_rate": 4.410145453483233e-05, + "loss": 0.9582, + "step": 7737 + }, + { + "epoch": 1.23, + "learning_rate": 4.40997900191749e-05, + "loss": 1.0974, + "step": 7738 + }, + { + "epoch": 1.23, + "learning_rate": 4.40981253001163e-05, + "loss": 0.9849, + "step": 7739 + }, + { + "epoch": 1.23, + "learning_rate": 4.409646037767426e-05, + "loss": 0.9873, + "step": 7740 + }, + { + "epoch": 1.23, + "learning_rate": 4.409479525186652e-05, + "loss": 0.9647, + "step": 7741 + }, + { + "epoch": 1.23, + "learning_rate": 4.409312992271079e-05, + "loss": 0.9454, + "step": 7742 + }, + { + "epoch": 1.23, + "learning_rate": 4.409146439022484e-05, + "loss": 0.9688, + "step": 7743 + }, + { + "epoch": 1.23, + "learning_rate": 4.408979865442636e-05, + "loss": 0.9849, + "step": 7744 + }, + { + "epoch": 1.23, + "learning_rate": 4.4088132715333124e-05, + "loss": 0.9525, + "step": 7745 + }, + { + "epoch": 1.23, + "learning_rate": 4.408646657296287e-05, + "loss": 0.9373, + "step": 7746 + }, + { + "epoch": 1.23, + "learning_rate": 4.4084800227333325e-05, + "loss": 1.023, + "step": 7747 + }, + { + "epoch": 1.23, + "learning_rate": 4.4083133678462246e-05, + "loss": 0.9544, + "step": 7748 + }, + { + "epoch": 1.23, + "learning_rate": 4.408146692636738e-05, + "loss": 0.9553, + "step": 7749 + }, + { + "epoch": 1.23, + "learning_rate": 4.407979997106648e-05, + "loss": 0.9269, + "step": 7750 + }, + { + "epoch": 1.23, + "learning_rate": 4.407813281257729e-05, + "loss": 0.9474, + "step": 7751 + }, + { + "epoch": 1.23, + "learning_rate": 4.4076465450917564e-05, + "loss": 1.0186, + "step": 7752 + }, + { + "epoch": 1.24, + "learning_rate": 4.407479788610507e-05, + "loss": 1.0065, + "step": 7753 + }, + { + "epoch": 1.24, + "learning_rate": 4.407313011815756e-05, + "loss": 0.9281, + "step": 7754 + }, + { + "epoch": 1.24, + "learning_rate": 4.407146214709278e-05, + "loss": 0.9987, + "step": 7755 + }, + { + "epoch": 1.24, + "learning_rate": 4.406979397292852e-05, + "loss": 0.9746, + "step": 7756 + }, + { + "epoch": 1.24, + "learning_rate": 4.406812559568253e-05, + "loss": 0.9134, + "step": 7757 + }, + { + "epoch": 1.24, + "learning_rate": 4.4066457015372575e-05, + "loss": 1.0095, + "step": 7758 + }, + { + "epoch": 1.24, + "learning_rate": 4.406478823201643e-05, + "loss": 0.9227, + "step": 7759 + }, + { + "epoch": 1.24, + "learning_rate": 4.406311924563187e-05, + "loss": 0.9931, + "step": 7760 + }, + { + "epoch": 1.24, + "learning_rate": 4.406145005623665e-05, + "loss": 1.0029, + "step": 7761 + }, + { + "epoch": 1.24, + "learning_rate": 4.4059780663848574e-05, + "loss": 1.0393, + "step": 7762 + }, + { + "epoch": 1.24, + "learning_rate": 4.40581110684854e-05, + "loss": 0.9505, + "step": 7763 + }, + { + "epoch": 1.24, + "learning_rate": 4.405644127016492e-05, + "loss": 0.967, + "step": 7764 + }, + { + "epoch": 1.24, + "learning_rate": 4.405477126890489e-05, + "loss": 1.0394, + "step": 7765 + }, + { + "epoch": 1.24, + "learning_rate": 4.405310106472313e-05, + "loss": 0.9146, + "step": 7766 + }, + { + "epoch": 1.24, + "learning_rate": 4.405143065763741e-05, + "loss": 0.9333, + "step": 7767 + }, + { + "epoch": 1.24, + "learning_rate": 4.404976004766552e-05, + "loss": 1.0365, + "step": 7768 + }, + { + "epoch": 1.24, + "learning_rate": 4.4048089234825254e-05, + "loss": 0.9667, + "step": 7769 + }, + { + "epoch": 1.24, + "learning_rate": 4.404641821913439e-05, + "loss": 1.0075, + "step": 7770 + }, + { + "epoch": 1.24, + "learning_rate": 4.404474700061074e-05, + "loss": 0.9224, + "step": 7771 + }, + { + "epoch": 1.24, + "learning_rate": 4.404307557927211e-05, + "loss": 0.9713, + "step": 7772 + }, + { + "epoch": 1.24, + "learning_rate": 4.4041403955136276e-05, + "loss": 1.0044, + "step": 7773 + }, + { + "epoch": 1.24, + "learning_rate": 4.403973212822105e-05, + "loss": 0.9291, + "step": 7774 + }, + { + "epoch": 1.24, + "learning_rate": 4.403806009854423e-05, + "loss": 1.0054, + "step": 7775 + }, + { + "epoch": 1.24, + "learning_rate": 4.403638786612364e-05, + "loss": 0.9974, + "step": 7776 + }, + { + "epoch": 1.24, + "learning_rate": 4.403471543097707e-05, + "loss": 0.9535, + "step": 7777 + }, + { + "epoch": 1.24, + "learning_rate": 4.4033042793122334e-05, + "loss": 1.0037, + "step": 7778 + }, + { + "epoch": 1.24, + "learning_rate": 4.403136995257725e-05, + "loss": 0.9465, + "step": 7779 + }, + { + "epoch": 1.24, + "learning_rate": 4.4029696909359636e-05, + "loss": 0.9614, + "step": 7780 + }, + { + "epoch": 1.24, + "learning_rate": 4.40280236634873e-05, + "loss": 0.9587, + "step": 7781 + }, + { + "epoch": 1.24, + "learning_rate": 4.4026350214978066e-05, + "loss": 0.9882, + "step": 7782 + }, + { + "epoch": 1.24, + "learning_rate": 4.402467656384974e-05, + "loss": 0.9559, + "step": 7783 + }, + { + "epoch": 1.24, + "learning_rate": 4.4023002710120175e-05, + "loss": 0.9938, + "step": 7784 + }, + { + "epoch": 1.24, + "learning_rate": 4.4021328653807176e-05, + "loss": 0.983, + "step": 7785 + }, + { + "epoch": 1.24, + "learning_rate": 4.401965439492857e-05, + "loss": 0.9489, + "step": 7786 + }, + { + "epoch": 1.24, + "learning_rate": 4.40179799335022e-05, + "loss": 1.0029, + "step": 7787 + }, + { + "epoch": 1.24, + "learning_rate": 4.401630526954588e-05, + "loss": 0.9957, + "step": 7788 + }, + { + "epoch": 1.24, + "learning_rate": 4.401463040307746e-05, + "loss": 0.9283, + "step": 7789 + }, + { + "epoch": 1.24, + "learning_rate": 4.401295533411477e-05, + "loss": 1.0246, + "step": 7790 + }, + { + "epoch": 1.24, + "learning_rate": 4.4011280062675656e-05, + "loss": 0.9571, + "step": 7791 + }, + { + "epoch": 1.24, + "learning_rate": 4.400960458877794e-05, + "loss": 1.0037, + "step": 7792 + }, + { + "epoch": 1.24, + "learning_rate": 4.4007928912439475e-05, + "loss": 0.9529, + "step": 7793 + }, + { + "epoch": 1.24, + "learning_rate": 4.4006253033678115e-05, + "loss": 0.9181, + "step": 7794 + }, + { + "epoch": 1.24, + "learning_rate": 4.4004576952511687e-05, + "loss": 0.9442, + "step": 7795 + }, + { + "epoch": 1.24, + "learning_rate": 4.400290066895807e-05, + "loss": 0.9322, + "step": 7796 + }, + { + "epoch": 1.24, + "learning_rate": 4.4001224183035086e-05, + "loss": 0.9925, + "step": 7797 + }, + { + "epoch": 1.24, + "learning_rate": 4.3999547494760604e-05, + "loss": 0.9163, + "step": 7798 + }, + { + "epoch": 1.24, + "learning_rate": 4.3997870604152476e-05, + "loss": 1.068, + "step": 7799 + }, + { + "epoch": 1.24, + "learning_rate": 4.3996193511228556e-05, + "loss": 0.9313, + "step": 7800 + }, + { + "epoch": 1.24, + "learning_rate": 4.399451621600671e-05, + "loss": 0.9701, + "step": 7801 + }, + { + "epoch": 1.24, + "learning_rate": 4.3992838718504805e-05, + "loss": 0.9831, + "step": 7802 + }, + { + "epoch": 1.24, + "learning_rate": 4.399116101874069e-05, + "loss": 0.9705, + "step": 7803 + }, + { + "epoch": 1.24, + "learning_rate": 4.398948311673224e-05, + "loss": 0.9392, + "step": 7804 + }, + { + "epoch": 1.24, + "learning_rate": 4.398780501249733e-05, + "loss": 0.9829, + "step": 7805 + }, + { + "epoch": 1.24, + "learning_rate": 4.398612670605382e-05, + "loss": 0.9523, + "step": 7806 + }, + { + "epoch": 1.24, + "learning_rate": 4.398444819741959e-05, + "loss": 0.9429, + "step": 7807 + }, + { + "epoch": 1.24, + "learning_rate": 4.398276948661251e-05, + "loss": 1.0219, + "step": 7808 + }, + { + "epoch": 1.24, + "learning_rate": 4.3981090573650466e-05, + "loss": 0.9721, + "step": 7809 + }, + { + "epoch": 1.24, + "learning_rate": 4.397941145855132e-05, + "loss": 1.0001, + "step": 7810 + }, + { + "epoch": 1.24, + "learning_rate": 4.3977732141332975e-05, + "loss": 0.9189, + "step": 7811 + }, + { + "epoch": 1.24, + "learning_rate": 4.3976052622013294e-05, + "loss": 1.0675, + "step": 7812 + }, + { + "epoch": 1.24, + "learning_rate": 4.397437290061018e-05, + "loss": 0.937, + "step": 7813 + }, + { + "epoch": 1.24, + "learning_rate": 4.3972692977141516e-05, + "loss": 1.001, + "step": 7814 + }, + { + "epoch": 1.25, + "learning_rate": 4.397101285162518e-05, + "loss": 1.0008, + "step": 7815 + }, + { + "epoch": 1.25, + "learning_rate": 4.3969332524079085e-05, + "loss": 0.9767, + "step": 7816 + }, + { + "epoch": 1.25, + "learning_rate": 4.396765199452111e-05, + "loss": 1.0033, + "step": 7817 + }, + { + "epoch": 1.25, + "learning_rate": 4.3965971262969164e-05, + "loss": 0.9623, + "step": 7818 + }, + { + "epoch": 1.25, + "learning_rate": 4.396429032944113e-05, + "loss": 0.9991, + "step": 7819 + }, + { + "epoch": 1.25, + "learning_rate": 4.3962609193954915e-05, + "loss": 0.918, + "step": 7820 + }, + { + "epoch": 1.25, + "learning_rate": 4.396092785652844e-05, + "loss": 0.9229, + "step": 7821 + }, + { + "epoch": 1.25, + "learning_rate": 4.395924631717958e-05, + "loss": 0.9804, + "step": 7822 + }, + { + "epoch": 1.25, + "learning_rate": 4.3957564575926265e-05, + "loss": 0.9342, + "step": 7823 + }, + { + "epoch": 1.25, + "learning_rate": 4.395588263278639e-05, + "loss": 0.9658, + "step": 7824 + }, + { + "epoch": 1.25, + "learning_rate": 4.395420048777789e-05, + "loss": 0.9574, + "step": 7825 + }, + { + "epoch": 1.25, + "learning_rate": 4.395251814091864e-05, + "loss": 0.9194, + "step": 7826 + }, + { + "epoch": 1.25, + "learning_rate": 4.3950835592226594e-05, + "loss": 0.9035, + "step": 7827 + }, + { + "epoch": 1.25, + "learning_rate": 4.394915284171964e-05, + "loss": 0.9671, + "step": 7828 + }, + { + "epoch": 1.25, + "learning_rate": 4.3947469889415726e-05, + "loss": 0.9929, + "step": 7829 + }, + { + "epoch": 1.25, + "learning_rate": 4.394578673533276e-05, + "loss": 1.1329, + "step": 7830 + }, + { + "epoch": 1.25, + "learning_rate": 4.3944103379488656e-05, + "loss": 0.9608, + "step": 7831 + }, + { + "epoch": 1.25, + "learning_rate": 4.394241982190136e-05, + "loss": 0.9186, + "step": 7832 + }, + { + "epoch": 1.25, + "learning_rate": 4.394073606258879e-05, + "loss": 0.921, + "step": 7833 + }, + { + "epoch": 1.25, + "learning_rate": 4.393905210156889e-05, + "loss": 0.9766, + "step": 7834 + }, + { + "epoch": 1.25, + "learning_rate": 4.393736793885957e-05, + "loss": 1.0627, + "step": 7835 + }, + { + "epoch": 1.25, + "learning_rate": 4.3935683574478785e-05, + "loss": 0.9669, + "step": 7836 + }, + { + "epoch": 1.25, + "learning_rate": 4.393399900844447e-05, + "loss": 0.9597, + "step": 7837 + }, + { + "epoch": 1.25, + "learning_rate": 4.3932314240774554e-05, + "loss": 1.022, + "step": 7838 + }, + { + "epoch": 1.25, + "learning_rate": 4.393062927148699e-05, + "loss": 0.9668, + "step": 7839 + }, + { + "epoch": 1.25, + "learning_rate": 4.392894410059971e-05, + "loss": 0.9063, + "step": 7840 + }, + { + "epoch": 1.25, + "learning_rate": 4.392725872813068e-05, + "loss": 0.9487, + "step": 7841 + }, + { + "epoch": 1.25, + "learning_rate": 4.392557315409782e-05, + "loss": 1.0209, + "step": 7842 + }, + { + "epoch": 1.25, + "learning_rate": 4.3923887378519115e-05, + "loss": 0.9513, + "step": 7843 + }, + { + "epoch": 1.25, + "learning_rate": 4.3922201401412485e-05, + "loss": 0.9695, + "step": 7844 + }, + { + "epoch": 1.25, + "learning_rate": 4.39205152227959e-05, + "loss": 1.0311, + "step": 7845 + }, + { + "epoch": 1.25, + "learning_rate": 4.391882884268731e-05, + "loss": 0.9929, + "step": 7846 + }, + { + "epoch": 1.25, + "learning_rate": 4.3917142261104684e-05, + "loss": 0.9463, + "step": 7847 + }, + { + "epoch": 1.25, + "learning_rate": 4.391545547806598e-05, + "loss": 0.9683, + "step": 7848 + }, + { + "epoch": 1.25, + "learning_rate": 4.391376849358916e-05, + "loss": 0.9601, + "step": 7849 + }, + { + "epoch": 1.25, + "learning_rate": 4.391208130769218e-05, + "loss": 1.0612, + "step": 7850 + }, + { + "epoch": 1.25, + "learning_rate": 4.391039392039302e-05, + "loss": 1.0409, + "step": 7851 + }, + { + "epoch": 1.25, + "learning_rate": 4.390870633170965e-05, + "loss": 0.9625, + "step": 7852 + }, + { + "epoch": 1.25, + "learning_rate": 4.3907018541660034e-05, + "loss": 1.0767, + "step": 7853 + }, + { + "epoch": 1.25, + "learning_rate": 4.390533055026215e-05, + "loss": 0.9789, + "step": 7854 + }, + { + "epoch": 1.25, + "learning_rate": 4.390364235753397e-05, + "loss": 0.9509, + "step": 7855 + }, + { + "epoch": 1.25, + "learning_rate": 4.390195396349348e-05, + "loss": 1.0966, + "step": 7856 + }, + { + "epoch": 1.25, + "learning_rate": 4.390026536815866e-05, + "loss": 1.0051, + "step": 7857 + }, + { + "epoch": 1.25, + "learning_rate": 4.389857657154747e-05, + "loss": 0.944, + "step": 7858 + }, + { + "epoch": 1.25, + "learning_rate": 4.389688757367793e-05, + "loss": 0.9895, + "step": 7859 + }, + { + "epoch": 1.25, + "learning_rate": 4.3895198374568006e-05, + "loss": 0.9333, + "step": 7860 + }, + { + "epoch": 1.25, + "learning_rate": 4.3893508974235684e-05, + "loss": 0.961, + "step": 7861 + }, + { + "epoch": 1.25, + "learning_rate": 4.389181937269897e-05, + "loss": 0.9816, + "step": 7862 + }, + { + "epoch": 1.25, + "learning_rate": 4.389012956997585e-05, + "loss": 1.0831, + "step": 7863 + }, + { + "epoch": 1.25, + "learning_rate": 4.3888439566084314e-05, + "loss": 0.9517, + "step": 7864 + }, + { + "epoch": 1.25, + "learning_rate": 4.388674936104237e-05, + "loss": 0.9956, + "step": 7865 + }, + { + "epoch": 1.25, + "learning_rate": 4.388505895486801e-05, + "loss": 1.0011, + "step": 7866 + }, + { + "epoch": 1.25, + "learning_rate": 4.388336834757924e-05, + "loss": 0.9536, + "step": 7867 + }, + { + "epoch": 1.25, + "learning_rate": 4.388167753919405e-05, + "loss": 0.9757, + "step": 7868 + }, + { + "epoch": 1.25, + "learning_rate": 4.387998652973046e-05, + "loss": 0.9535, + "step": 7869 + }, + { + "epoch": 1.25, + "learning_rate": 4.387829531920648e-05, + "loss": 0.9192, + "step": 7870 + }, + { + "epoch": 1.25, + "learning_rate": 4.387660390764012e-05, + "loss": 1.0247, + "step": 7871 + }, + { + "epoch": 1.25, + "learning_rate": 4.387491229504939e-05, + "loss": 0.9775, + "step": 7872 + }, + { + "epoch": 1.25, + "learning_rate": 4.3873220481452294e-05, + "loss": 0.9738, + "step": 7873 + }, + { + "epoch": 1.25, + "learning_rate": 4.387152846686686e-05, + "loss": 0.939, + "step": 7874 + }, + { + "epoch": 1.25, + "learning_rate": 4.386983625131111e-05, + "loss": 0.9858, + "step": 7875 + }, + { + "epoch": 1.25, + "learning_rate": 4.386814383480306e-05, + "loss": 1.0276, + "step": 7876 + }, + { + "epoch": 1.25, + "learning_rate": 4.3866451217360726e-05, + "loss": 0.9842, + "step": 7877 + }, + { + "epoch": 1.26, + "learning_rate": 4.3864758399002146e-05, + "loss": 0.9652, + "step": 7878 + }, + { + "epoch": 1.26, + "learning_rate": 4.3863065379745336e-05, + "loss": 0.972, + "step": 7879 + }, + { + "epoch": 1.26, + "learning_rate": 4.3861372159608346e-05, + "loss": 0.9806, + "step": 7880 + }, + { + "epoch": 1.26, + "learning_rate": 4.385967873860918e-05, + "loss": 0.9045, + "step": 7881 + }, + { + "epoch": 1.26, + "learning_rate": 4.385798511676589e-05, + "loss": 0.9732, + "step": 7882 + }, + { + "epoch": 1.26, + "learning_rate": 4.3856291294096505e-05, + "loss": 0.9784, + "step": 7883 + }, + { + "epoch": 1.26, + "learning_rate": 4.385459727061907e-05, + "loss": 0.898, + "step": 7884 + }, + { + "epoch": 1.26, + "learning_rate": 4.3852903046351625e-05, + "loss": 0.9438, + "step": 7885 + }, + { + "epoch": 1.26, + "learning_rate": 4.38512086213122e-05, + "loss": 0.9953, + "step": 7886 + }, + { + "epoch": 1.26, + "learning_rate": 4.3849513995518856e-05, + "loss": 1.0687, + "step": 7887 + }, + { + "epoch": 1.26, + "learning_rate": 4.384781916898962e-05, + "loss": 0.9476, + "step": 7888 + }, + { + "epoch": 1.26, + "learning_rate": 4.384612414174256e-05, + "loss": 0.8811, + "step": 7889 + }, + { + "epoch": 1.26, + "learning_rate": 4.384442891379572e-05, + "loss": 0.9624, + "step": 7890 + }, + { + "epoch": 1.26, + "learning_rate": 4.384273348516715e-05, + "loss": 1.0217, + "step": 7891 + }, + { + "epoch": 1.26, + "learning_rate": 4.384103785587491e-05, + "loss": 0.9064, + "step": 7892 + }, + { + "epoch": 1.26, + "learning_rate": 4.383934202593706e-05, + "loss": 1.0131, + "step": 7893 + }, + { + "epoch": 1.26, + "learning_rate": 4.3837645995371644e-05, + "loss": 0.8924, + "step": 7894 + }, + { + "epoch": 1.26, + "learning_rate": 4.383594976419674e-05, + "loss": 1.0315, + "step": 7895 + }, + { + "epoch": 1.26, + "learning_rate": 4.3834253332430406e-05, + "loss": 0.9032, + "step": 7896 + }, + { + "epoch": 1.26, + "learning_rate": 4.383255670009071e-05, + "loss": 0.9788, + "step": 7897 + }, + { + "epoch": 1.26, + "learning_rate": 4.383085986719572e-05, + "loss": 1.0322, + "step": 7898 + }, + { + "epoch": 1.26, + "learning_rate": 4.3829162833763505e-05, + "loss": 0.9286, + "step": 7899 + }, + { + "epoch": 1.26, + "learning_rate": 4.382746559981214e-05, + "loss": 0.9836, + "step": 7900 + }, + { + "epoch": 1.26, + "learning_rate": 4.382576816535969e-05, + "loss": 0.971, + "step": 7901 + }, + { + "epoch": 1.26, + "learning_rate": 4.3824070530424246e-05, + "loss": 0.9463, + "step": 7902 + }, + { + "epoch": 1.26, + "learning_rate": 4.382237269502387e-05, + "loss": 0.9338, + "step": 7903 + }, + { + "epoch": 1.26, + "learning_rate": 4.382067465917665e-05, + "loss": 0.8869, + "step": 7904 + }, + { + "epoch": 1.26, + "learning_rate": 4.3818976422900683e-05, + "loss": 0.9181, + "step": 7905 + }, + { + "epoch": 1.26, + "learning_rate": 4.381727798621404e-05, + "loss": 1.0411, + "step": 7906 + }, + { + "epoch": 1.26, + "learning_rate": 4.381557934913481e-05, + "loss": 0.9478, + "step": 7907 + }, + { + "epoch": 1.26, + "learning_rate": 4.381388051168108e-05, + "loss": 1.0055, + "step": 7908 + }, + { + "epoch": 1.26, + "learning_rate": 4.381218147387094e-05, + "loss": 0.9163, + "step": 7909 + }, + { + "epoch": 1.26, + "learning_rate": 4.3810482235722494e-05, + "loss": 0.9843, + "step": 7910 + }, + { + "epoch": 1.26, + "learning_rate": 4.3808782797253835e-05, + "loss": 0.9762, + "step": 7911 + }, + { + "epoch": 1.26, + "learning_rate": 4.380708315848305e-05, + "loss": 1.0173, + "step": 7912 + }, + { + "epoch": 1.26, + "learning_rate": 4.380538331942826e-05, + "loss": 0.9043, + "step": 7913 + }, + { + "epoch": 1.26, + "learning_rate": 4.3803683280107543e-05, + "loss": 0.9943, + "step": 7914 + }, + { + "epoch": 1.26, + "learning_rate": 4.380198304053902e-05, + "loss": 0.9733, + "step": 7915 + }, + { + "epoch": 1.26, + "learning_rate": 4.3800282600740797e-05, + "loss": 0.9243, + "step": 7916 + }, + { + "epoch": 1.26, + "learning_rate": 4.379858196073097e-05, + "loss": 0.9231, + "step": 7917 + }, + { + "epoch": 1.26, + "learning_rate": 4.379688112052767e-05, + "loss": 1.0068, + "step": 7918 + }, + { + "epoch": 1.26, + "learning_rate": 4.3795180080148984e-05, + "loss": 1.012, + "step": 7919 + }, + { + "epoch": 1.26, + "learning_rate": 4.379347883961305e-05, + "loss": 1.0387, + "step": 7920 + }, + { + "epoch": 1.26, + "learning_rate": 4.379177739893797e-05, + "loss": 0.9967, + "step": 7921 + }, + { + "epoch": 1.26, + "learning_rate": 4.379007575814187e-05, + "loss": 0.9409, + "step": 7922 + }, + { + "epoch": 1.26, + "learning_rate": 4.378837391724287e-05, + "loss": 1.0055, + "step": 7923 + }, + { + "epoch": 1.26, + "learning_rate": 4.37866718762591e-05, + "loss": 1.0138, + "step": 7924 + }, + { + "epoch": 1.26, + "learning_rate": 4.3784969635208686e-05, + "loss": 0.9004, + "step": 7925 + }, + { + "epoch": 1.26, + "learning_rate": 4.378326719410973e-05, + "loss": 0.9533, + "step": 7926 + }, + { + "epoch": 1.26, + "learning_rate": 4.37815645529804e-05, + "loss": 0.8719, + "step": 7927 + }, + { + "epoch": 1.26, + "learning_rate": 4.37798617118388e-05, + "loss": 0.9868, + "step": 7928 + }, + { + "epoch": 1.26, + "learning_rate": 4.377815867070308e-05, + "loss": 0.9784, + "step": 7929 + }, + { + "epoch": 1.26, + "learning_rate": 4.3776455429591374e-05, + "loss": 0.9757, + "step": 7930 + }, + { + "epoch": 1.26, + "learning_rate": 4.377475198852181e-05, + "loss": 0.9744, + "step": 7931 + }, + { + "epoch": 1.26, + "learning_rate": 4.377304834751253e-05, + "loss": 0.916, + "step": 7932 + }, + { + "epoch": 1.26, + "learning_rate": 4.37713445065817e-05, + "loss": 0.9415, + "step": 7933 + }, + { + "epoch": 1.26, + "learning_rate": 4.3769640465747434e-05, + "loss": 0.9783, + "step": 7934 + }, + { + "epoch": 1.26, + "learning_rate": 4.37679362250279e-05, + "loss": 0.9429, + "step": 7935 + }, + { + "epoch": 1.26, + "learning_rate": 4.376623178444124e-05, + "loss": 0.9127, + "step": 7936 + }, + { + "epoch": 1.26, + "learning_rate": 4.3764527144005594e-05, + "loss": 0.9622, + "step": 7937 + }, + { + "epoch": 1.26, + "learning_rate": 4.376282230373913e-05, + "loss": 0.9374, + "step": 7938 + }, + { + "epoch": 1.26, + "learning_rate": 4.376111726366e-05, + "loss": 0.927, + "step": 7939 + }, + { + "epoch": 1.26, + "learning_rate": 4.3759412023786365e-05, + "loss": 0.8859, + "step": 7940 + }, + { + "epoch": 1.27, + "learning_rate": 4.375770658413638e-05, + "loss": 0.9954, + "step": 7941 + }, + { + "epoch": 1.27, + "learning_rate": 4.375600094472821e-05, + "loss": 0.9317, + "step": 7942 + }, + { + "epoch": 1.27, + "learning_rate": 4.375429510558001e-05, + "loss": 0.9732, + "step": 7943 + }, + { + "epoch": 1.27, + "learning_rate": 4.375258906670996e-05, + "loss": 0.9761, + "step": 7944 + }, + { + "epoch": 1.27, + "learning_rate": 4.375088282813622e-05, + "loss": 0.9634, + "step": 7945 + }, + { + "epoch": 1.27, + "learning_rate": 4.374917638987696e-05, + "loss": 0.8433, + "step": 7946 + }, + { + "epoch": 1.27, + "learning_rate": 4.374746975195035e-05, + "loss": 1.0291, + "step": 7947 + }, + { + "epoch": 1.27, + "learning_rate": 4.374576291437458e-05, + "loss": 1.0103, + "step": 7948 + }, + { + "epoch": 1.27, + "learning_rate": 4.374405587716781e-05, + "loss": 0.9836, + "step": 7949 + }, + { + "epoch": 1.27, + "learning_rate": 4.374234864034822e-05, + "loss": 0.9691, + "step": 7950 + }, + { + "epoch": 1.27, + "learning_rate": 4.374064120393401e-05, + "loss": 0.9866, + "step": 7951 + }, + { + "epoch": 1.27, + "learning_rate": 4.373893356794333e-05, + "loss": 0.974, + "step": 7952 + }, + { + "epoch": 1.27, + "learning_rate": 4.373722573239439e-05, + "loss": 1.0153, + "step": 7953 + }, + { + "epoch": 1.27, + "learning_rate": 4.373551769730538e-05, + "loss": 0.9735, + "step": 7954 + }, + { + "epoch": 1.27, + "learning_rate": 4.3733809462694474e-05, + "loss": 0.9327, + "step": 7955 + }, + { + "epoch": 1.27, + "learning_rate": 4.373210102857988e-05, + "loss": 0.9543, + "step": 7956 + }, + { + "epoch": 1.27, + "learning_rate": 4.3730392394979766e-05, + "loss": 0.8862, + "step": 7957 + }, + { + "epoch": 1.27, + "learning_rate": 4.372868356191236e-05, + "loss": 0.9677, + "step": 7958 + }, + { + "epoch": 1.27, + "learning_rate": 4.3726974529395835e-05, + "loss": 0.9912, + "step": 7959 + }, + { + "epoch": 1.27, + "learning_rate": 4.372526529744841e-05, + "loss": 0.9605, + "step": 7960 + }, + { + "epoch": 1.27, + "learning_rate": 4.3723555866088274e-05, + "loss": 0.9939, + "step": 7961 + }, + { + "epoch": 1.27, + "learning_rate": 4.372184623533363e-05, + "loss": 0.935, + "step": 7962 + }, + { + "epoch": 1.27, + "learning_rate": 4.372013640520269e-05, + "loss": 0.8727, + "step": 7963 + }, + { + "epoch": 1.27, + "learning_rate": 4.371842637571367e-05, + "loss": 0.9457, + "step": 7964 + }, + { + "epoch": 1.27, + "learning_rate": 4.371671614688477e-05, + "loss": 0.9483, + "step": 7965 + }, + { + "epoch": 1.27, + "learning_rate": 4.3715005718734206e-05, + "loss": 0.9272, + "step": 7966 + }, + { + "epoch": 1.27, + "learning_rate": 4.3713295091280194e-05, + "loss": 0.9264, + "step": 7967 + }, + { + "epoch": 1.27, + "learning_rate": 4.3711584264540945e-05, + "loss": 0.9795, + "step": 7968 + }, + { + "epoch": 1.27, + "learning_rate": 4.370987323853469e-05, + "loss": 0.9498, + "step": 7969 + }, + { + "epoch": 1.27, + "learning_rate": 4.370816201327964e-05, + "loss": 0.9626, + "step": 7970 + }, + { + "epoch": 1.27, + "learning_rate": 4.3706450588794024e-05, + "loss": 1.0161, + "step": 7971 + }, + { + "epoch": 1.27, + "learning_rate": 4.370473896509607e-05, + "loss": 0.8694, + "step": 7972 + }, + { + "epoch": 1.27, + "learning_rate": 4.3703027142203996e-05, + "loss": 0.9781, + "step": 7973 + }, + { + "epoch": 1.27, + "learning_rate": 4.370131512013604e-05, + "loss": 0.9016, + "step": 7974 + }, + { + "epoch": 1.27, + "learning_rate": 4.369960289891043e-05, + "loss": 0.9904, + "step": 7975 + }, + { + "epoch": 1.27, + "learning_rate": 4.3697890478545405e-05, + "loss": 0.9086, + "step": 7976 + }, + { + "epoch": 1.27, + "learning_rate": 4.3696177859059196e-05, + "loss": 0.8932, + "step": 7977 + }, + { + "epoch": 1.27, + "learning_rate": 4.3694465040470046e-05, + "loss": 0.905, + "step": 7978 + }, + { + "epoch": 1.27, + "learning_rate": 4.369275202279619e-05, + "loss": 0.9191, + "step": 7979 + }, + { + "epoch": 1.27, + "learning_rate": 4.369103880605588e-05, + "loss": 0.9452, + "step": 7980 + }, + { + "epoch": 1.27, + "learning_rate": 4.368932539026735e-05, + "loss": 0.9778, + "step": 7981 + }, + { + "epoch": 1.27, + "learning_rate": 4.368761177544885e-05, + "loss": 1.0086, + "step": 7982 + }, + { + "epoch": 1.27, + "learning_rate": 4.368589796161863e-05, + "loss": 0.9945, + "step": 7983 + }, + { + "epoch": 1.27, + "learning_rate": 4.368418394879494e-05, + "loss": 0.9894, + "step": 7984 + }, + { + "epoch": 1.27, + "learning_rate": 4.368246973699604e-05, + "loss": 0.9691, + "step": 7985 + }, + { + "epoch": 1.27, + "learning_rate": 4.3680755326240175e-05, + "loss": 1.0115, + "step": 7986 + }, + { + "epoch": 1.27, + "learning_rate": 4.367904071654561e-05, + "loss": 0.9386, + "step": 7987 + }, + { + "epoch": 1.27, + "learning_rate": 4.36773259079306e-05, + "loss": 0.9096, + "step": 7988 + }, + { + "epoch": 1.27, + "learning_rate": 4.3675610900413404e-05, + "loss": 0.9211, + "step": 7989 + }, + { + "epoch": 1.27, + "learning_rate": 4.36738956940123e-05, + "loss": 0.8557, + "step": 7990 + }, + { + "epoch": 1.27, + "learning_rate": 4.367218028874553e-05, + "loss": 0.9679, + "step": 7991 + }, + { + "epoch": 1.27, + "learning_rate": 4.367046468463139e-05, + "loss": 0.9086, + "step": 7992 + }, + { + "epoch": 1.27, + "learning_rate": 4.366874888168813e-05, + "loss": 0.9573, + "step": 7993 + }, + { + "epoch": 1.27, + "learning_rate": 4.366703287993403e-05, + "loss": 0.9064, + "step": 7994 + }, + { + "epoch": 1.27, + "learning_rate": 4.366531667938736e-05, + "loss": 0.9017, + "step": 7995 + }, + { + "epoch": 1.27, + "learning_rate": 4.366360028006641e-05, + "loss": 0.8993, + "step": 7996 + }, + { + "epoch": 1.27, + "learning_rate": 4.3661883681989434e-05, + "loss": 0.9487, + "step": 7997 + }, + { + "epoch": 1.27, + "learning_rate": 4.366016688517474e-05, + "loss": 0.9886, + "step": 7998 + }, + { + "epoch": 1.27, + "learning_rate": 4.3658449889640584e-05, + "loss": 0.9961, + "step": 7999 + }, + { + "epoch": 1.27, + "learning_rate": 4.365673269540527e-05, + "loss": 0.9519, + "step": 8000 + }, + { + "epoch": 1.27, + "learning_rate": 4.3655015302487076e-05, + "loss": 0.9756, + "step": 8001 + }, + { + "epoch": 1.27, + "learning_rate": 4.3653297710904306e-05, + "loss": 0.9522, + "step": 8002 + }, + { + "epoch": 1.27, + "learning_rate": 4.365157992067523e-05, + "loss": 0.9035, + "step": 8003 + }, + { + "epoch": 1.28, + "learning_rate": 4.364986193181815e-05, + "loss": 0.9128, + "step": 8004 + }, + { + "epoch": 1.28, + "learning_rate": 4.364814374435137e-05, + "loss": 0.9149, + "step": 8005 + }, + { + "epoch": 1.28, + "learning_rate": 4.364642535829318e-05, + "loss": 1.0081, + "step": 8006 + }, + { + "epoch": 1.28, + "learning_rate": 4.364470677366188e-05, + "loss": 0.9566, + "step": 8007 + }, + { + "epoch": 1.28, + "learning_rate": 4.364298799047577e-05, + "loss": 0.9158, + "step": 8008 + }, + { + "epoch": 1.28, + "learning_rate": 4.364126900875316e-05, + "loss": 0.8327, + "step": 8009 + }, + { + "epoch": 1.28, + "learning_rate": 4.3639549828512343e-05, + "loss": 0.9701, + "step": 8010 + }, + { + "epoch": 1.28, + "learning_rate": 4.363783044977164e-05, + "loss": 0.9102, + "step": 8011 + }, + { + "epoch": 1.28, + "learning_rate": 4.363611087254936e-05, + "loss": 0.9726, + "step": 8012 + }, + { + "epoch": 1.28, + "learning_rate": 4.363439109686383e-05, + "loss": 1.0319, + "step": 8013 + }, + { + "epoch": 1.28, + "learning_rate": 4.3632671122733326e-05, + "loss": 0.966, + "step": 8014 + }, + { + "epoch": 1.28, + "learning_rate": 4.363095095017619e-05, + "loss": 0.8945, + "step": 8015 + }, + { + "epoch": 1.28, + "learning_rate": 4.3629230579210746e-05, + "loss": 1.005, + "step": 8016 + }, + { + "epoch": 1.28, + "learning_rate": 4.36275100098553e-05, + "loss": 0.9246, + "step": 8017 + }, + { + "epoch": 1.28, + "learning_rate": 4.362578924212818e-05, + "loss": 1.021, + "step": 8018 + }, + { + "epoch": 1.28, + "learning_rate": 4.362406827604771e-05, + "loss": 0.9082, + "step": 8019 + }, + { + "epoch": 1.28, + "learning_rate": 4.362234711163223e-05, + "loss": 0.9455, + "step": 8020 + }, + { + "epoch": 1.28, + "learning_rate": 4.3620625748900056e-05, + "loss": 0.9258, + "step": 8021 + }, + { + "epoch": 1.28, + "learning_rate": 4.361890418786952e-05, + "loss": 0.907, + "step": 8022 + }, + { + "epoch": 1.28, + "learning_rate": 4.361718242855896e-05, + "loss": 1.0048, + "step": 8023 + }, + { + "epoch": 1.28, + "learning_rate": 4.361546047098671e-05, + "loss": 0.969, + "step": 8024 + }, + { + "epoch": 1.28, + "learning_rate": 4.36137383151711e-05, + "loss": 0.892, + "step": 8025 + }, + { + "epoch": 1.28, + "learning_rate": 4.361201596113049e-05, + "loss": 0.9811, + "step": 8026 + }, + { + "epoch": 1.28, + "learning_rate": 4.3610293408883206e-05, + "loss": 0.9801, + "step": 8027 + }, + { + "epoch": 1.28, + "learning_rate": 4.3608570658447586e-05, + "loss": 0.9616, + "step": 8028 + }, + { + "epoch": 1.28, + "learning_rate": 4.3606847709842e-05, + "loss": 0.9736, + "step": 8029 + }, + { + "epoch": 1.28, + "learning_rate": 4.3605124563084775e-05, + "loss": 0.9014, + "step": 8030 + }, + { + "epoch": 1.28, + "learning_rate": 4.360340121819427e-05, + "loss": 0.9556, + "step": 8031 + }, + { + "epoch": 1.28, + "learning_rate": 4.360167767518884e-05, + "loss": 0.9093, + "step": 8032 + }, + { + "epoch": 1.28, + "learning_rate": 4.3599953934086834e-05, + "loss": 0.8938, + "step": 8033 + }, + { + "epoch": 1.28, + "learning_rate": 4.35982299949066e-05, + "loss": 0.9501, + "step": 8034 + }, + { + "epoch": 1.28, + "learning_rate": 4.3596505857666526e-05, + "loss": 0.9113, + "step": 8035 + }, + { + "epoch": 1.28, + "learning_rate": 4.3594781522384944e-05, + "loss": 0.9405, + "step": 8036 + }, + { + "epoch": 1.28, + "learning_rate": 4.359305698908023e-05, + "loss": 0.9626, + "step": 8037 + }, + { + "epoch": 1.28, + "learning_rate": 4.3591332257770745e-05, + "loss": 0.9284, + "step": 8038 + }, + { + "epoch": 1.28, + "learning_rate": 4.358960732847486e-05, + "loss": 1.0175, + "step": 8039 + }, + { + "epoch": 1.28, + "learning_rate": 4.358788220121095e-05, + "loss": 0.9441, + "step": 8040 + }, + { + "epoch": 1.28, + "learning_rate": 4.358615687599737e-05, + "loss": 0.9425, + "step": 8041 + }, + { + "epoch": 1.28, + "learning_rate": 4.3584431352852507e-05, + "loss": 0.9872, + "step": 8042 + }, + { + "epoch": 1.28, + "learning_rate": 4.358270563179473e-05, + "loss": 1.0086, + "step": 8043 + }, + { + "epoch": 1.28, + "learning_rate": 4.358097971284242e-05, + "loss": 0.9059, + "step": 8044 + }, + { + "epoch": 1.28, + "learning_rate": 4.357925359601396e-05, + "loss": 0.8715, + "step": 8045 + }, + { + "epoch": 1.28, + "learning_rate": 4.357752728132774e-05, + "loss": 0.9319, + "step": 8046 + }, + { + "epoch": 1.28, + "learning_rate": 4.357580076880211e-05, + "loss": 1.0006, + "step": 8047 + }, + { + "epoch": 1.28, + "learning_rate": 4.3574074058455494e-05, + "loss": 0.9591, + "step": 8048 + }, + { + "epoch": 1.28, + "learning_rate": 4.3572347150306266e-05, + "loss": 0.9836, + "step": 8049 + }, + { + "epoch": 1.28, + "learning_rate": 4.357062004437281e-05, + "loss": 0.9128, + "step": 8050 + }, + { + "epoch": 1.28, + "learning_rate": 4.356889274067353e-05, + "loss": 0.9595, + "step": 8051 + }, + { + "epoch": 1.28, + "learning_rate": 4.3567165239226813e-05, + "loss": 0.9392, + "step": 8052 + }, + { + "epoch": 1.28, + "learning_rate": 4.356543754005106e-05, + "loss": 0.9457, + "step": 8053 + }, + { + "epoch": 1.28, + "learning_rate": 4.356370964316466e-05, + "loss": 0.9552, + "step": 8054 + }, + { + "epoch": 1.28, + "learning_rate": 4.3561981548586034e-05, + "loss": 0.855, + "step": 8055 + }, + { + "epoch": 1.28, + "learning_rate": 4.356025325633357e-05, + "loss": 0.953, + "step": 8056 + }, + { + "epoch": 1.28, + "learning_rate": 4.355852476642568e-05, + "loss": 1.0409, + "step": 8057 + }, + { + "epoch": 1.28, + "learning_rate": 4.355679607888077e-05, + "loss": 0.9349, + "step": 8058 + }, + { + "epoch": 1.28, + "learning_rate": 4.355506719371724e-05, + "loss": 0.9057, + "step": 8059 + }, + { + "epoch": 1.28, + "learning_rate": 4.355333811095351e-05, + "loss": 1.0042, + "step": 8060 + }, + { + "epoch": 1.28, + "learning_rate": 4.3551608830607996e-05, + "loss": 0.9768, + "step": 8061 + }, + { + "epoch": 1.28, + "learning_rate": 4.3549879352699116e-05, + "loss": 0.9453, + "step": 8062 + }, + { + "epoch": 1.28, + "learning_rate": 4.354814967724528e-05, + "loss": 0.8971, + "step": 8063 + }, + { + "epoch": 1.28, + "learning_rate": 4.3546419804264904e-05, + "loss": 0.9385, + "step": 8064 + }, + { + "epoch": 1.28, + "learning_rate": 4.354468973377642e-05, + "loss": 0.961, + "step": 8065 + }, + { + "epoch": 1.29, + "learning_rate": 4.354295946579825e-05, + "loss": 0.9891, + "step": 8066 + }, + { + "epoch": 1.29, + "learning_rate": 4.3541229000348824e-05, + "loss": 1.004, + "step": 8067 + }, + { + "epoch": 1.29, + "learning_rate": 4.353949833744656e-05, + "loss": 1.0298, + "step": 8068 + }, + { + "epoch": 1.29, + "learning_rate": 4.35377674771099e-05, + "loss": 0.9902, + "step": 8069 + }, + { + "epoch": 1.29, + "learning_rate": 4.353603641935726e-05, + "loss": 0.9735, + "step": 8070 + }, + { + "epoch": 1.29, + "learning_rate": 4.3534305164207096e-05, + "loss": 0.8975, + "step": 8071 + }, + { + "epoch": 1.29, + "learning_rate": 4.353257371167783e-05, + "loss": 0.9226, + "step": 8072 + }, + { + "epoch": 1.29, + "learning_rate": 4.353084206178791e-05, + "loss": 1.0033, + "step": 8073 + }, + { + "epoch": 1.29, + "learning_rate": 4.352911021455577e-05, + "loss": 0.8856, + "step": 8074 + }, + { + "epoch": 1.29, + "learning_rate": 4.352737816999986e-05, + "loss": 0.8476, + "step": 8075 + }, + { + "epoch": 1.29, + "learning_rate": 4.352564592813861e-05, + "loss": 1.0293, + "step": 8076 + }, + { + "epoch": 1.29, + "learning_rate": 4.352391348899048e-05, + "loss": 0.9005, + "step": 8077 + }, + { + "epoch": 1.29, + "learning_rate": 4.3522180852573925e-05, + "loss": 0.9432, + "step": 8078 + }, + { + "epoch": 1.29, + "learning_rate": 4.352044801890738e-05, + "loss": 1.0044, + "step": 8079 + }, + { + "epoch": 1.29, + "learning_rate": 4.3518714988009315e-05, + "loss": 0.9789, + "step": 8080 + }, + { + "epoch": 1.29, + "learning_rate": 4.351698175989817e-05, + "loss": 0.9083, + "step": 8081 + }, + { + "epoch": 1.29, + "learning_rate": 4.351524833459242e-05, + "loss": 1.0052, + "step": 8082 + }, + { + "epoch": 1.29, + "learning_rate": 4.3513514712110513e-05, + "loss": 1.0058, + "step": 8083 + }, + { + "epoch": 1.29, + "learning_rate": 4.351178089247091e-05, + "loss": 0.9399, + "step": 8084 + }, + { + "epoch": 1.29, + "learning_rate": 4.351004687569209e-05, + "loss": 0.8907, + "step": 8085 + }, + { + "epoch": 1.29, + "learning_rate": 4.35083126617925e-05, + "loss": 0.9584, + "step": 8086 + }, + { + "epoch": 1.29, + "learning_rate": 4.350657825079062e-05, + "loss": 1.0338, + "step": 8087 + }, + { + "epoch": 1.29, + "learning_rate": 4.350484364270492e-05, + "loss": 0.9736, + "step": 8088 + }, + { + "epoch": 1.29, + "learning_rate": 4.3503108837553865e-05, + "loss": 0.9995, + "step": 8089 + }, + { + "epoch": 1.29, + "learning_rate": 4.350137383535593e-05, + "loss": 1.058, + "step": 8090 + }, + { + "epoch": 1.29, + "learning_rate": 4.34996386361296e-05, + "loss": 0.9708, + "step": 8091 + }, + { + "epoch": 1.29, + "learning_rate": 4.3497903239893355e-05, + "loss": 0.9574, + "step": 8092 + }, + { + "epoch": 1.29, + "learning_rate": 4.3496167646665666e-05, + "loss": 1.0255, + "step": 8093 + }, + { + "epoch": 1.29, + "learning_rate": 4.3494431856465025e-05, + "loss": 0.949, + "step": 8094 + }, + { + "epoch": 1.29, + "learning_rate": 4.3492695869309904e-05, + "loss": 0.9405, + "step": 8095 + }, + { + "epoch": 1.29, + "learning_rate": 4.3490959685218804e-05, + "loss": 0.9124, + "step": 8096 + }, + { + "epoch": 1.29, + "learning_rate": 4.3489223304210216e-05, + "loss": 0.9562, + "step": 8097 + }, + { + "epoch": 1.29, + "learning_rate": 4.348748672630262e-05, + "loss": 0.8862, + "step": 8098 + }, + { + "epoch": 1.29, + "learning_rate": 4.348574995151451e-05, + "loss": 0.9269, + "step": 8099 + }, + { + "epoch": 1.29, + "learning_rate": 4.348401297986439e-05, + "loss": 0.9188, + "step": 8100 + }, + { + "epoch": 1.29, + "learning_rate": 4.348227581137075e-05, + "loss": 0.9843, + "step": 8101 + }, + { + "epoch": 1.29, + "learning_rate": 4.34805384460521e-05, + "loss": 0.9531, + "step": 8102 + }, + { + "epoch": 1.29, + "learning_rate": 4.347880088392693e-05, + "loss": 0.9553, + "step": 8103 + }, + { + "epoch": 1.29, + "learning_rate": 4.347706312501375e-05, + "loss": 0.9229, + "step": 8104 + }, + { + "epoch": 1.29, + "learning_rate": 4.347532516933107e-05, + "loss": 0.9051, + "step": 8105 + }, + { + "epoch": 1.29, + "learning_rate": 4.347358701689739e-05, + "loss": 0.9775, + "step": 8106 + }, + { + "epoch": 1.29, + "learning_rate": 4.347184866773123e-05, + "loss": 0.9766, + "step": 8107 + }, + { + "epoch": 1.29, + "learning_rate": 4.3470110121851087e-05, + "loss": 0.9289, + "step": 8108 + }, + { + "epoch": 1.29, + "learning_rate": 4.346837137927549e-05, + "loss": 0.9694, + "step": 8109 + }, + { + "epoch": 1.29, + "learning_rate": 4.3466632440022946e-05, + "loss": 0.8281, + "step": 8110 + }, + { + "epoch": 1.29, + "learning_rate": 4.346489330411198e-05, + "loss": 0.8929, + "step": 8111 + }, + { + "epoch": 1.29, + "learning_rate": 4.346315397156111e-05, + "loss": 0.9596, + "step": 8112 + }, + { + "epoch": 1.29, + "learning_rate": 4.3461414442388864e-05, + "loss": 0.9842, + "step": 8113 + }, + { + "epoch": 1.29, + "learning_rate": 4.3459674716613755e-05, + "loss": 0.9694, + "step": 8114 + }, + { + "epoch": 1.29, + "learning_rate": 4.345793479425433e-05, + "loss": 0.9657, + "step": 8115 + }, + { + "epoch": 1.29, + "learning_rate": 4.3456194675329085e-05, + "loss": 0.9019, + "step": 8116 + }, + { + "epoch": 1.29, + "learning_rate": 4.345445435985659e-05, + "loss": 0.893, + "step": 8117 + }, + { + "epoch": 1.29, + "learning_rate": 4.3452713847855356e-05, + "loss": 0.9637, + "step": 8118 + }, + { + "epoch": 1.29, + "learning_rate": 4.345097313934392e-05, + "loss": 1.0271, + "step": 8119 + }, + { + "epoch": 1.29, + "learning_rate": 4.344923223434082e-05, + "loss": 0.8337, + "step": 8120 + }, + { + "epoch": 1.29, + "learning_rate": 4.34474911328646e-05, + "loss": 0.8689, + "step": 8121 + }, + { + "epoch": 1.29, + "learning_rate": 4.3445749834933804e-05, + "loss": 0.9428, + "step": 8122 + }, + { + "epoch": 1.29, + "learning_rate": 4.344400834056696e-05, + "loss": 0.9578, + "step": 8123 + }, + { + "epoch": 1.29, + "learning_rate": 4.344226664978263e-05, + "loss": 0.9494, + "step": 8124 + }, + { + "epoch": 1.29, + "learning_rate": 4.344052476259935e-05, + "loss": 0.9654, + "step": 8125 + }, + { + "epoch": 1.29, + "learning_rate": 4.34387826790357e-05, + "loss": 0.9642, + "step": 8126 + }, + { + "epoch": 1.29, + "learning_rate": 4.343704039911018e-05, + "loss": 0.9355, + "step": 8127 + }, + { + "epoch": 1.29, + "learning_rate": 4.343529792284139e-05, + "loss": 0.8617, + "step": 8128 + }, + { + "epoch": 1.3, + "learning_rate": 4.343355525024787e-05, + "loss": 1.0171, + "step": 8129 + }, + { + "epoch": 1.3, + "learning_rate": 4.343181238134817e-05, + "loss": 0.9536, + "step": 8130 + }, + { + "epoch": 1.3, + "learning_rate": 4.343006931616086e-05, + "loss": 0.9136, + "step": 8131 + }, + { + "epoch": 1.3, + "learning_rate": 4.34283260547045e-05, + "loss": 0.9076, + "step": 8132 + }, + { + "epoch": 1.3, + "learning_rate": 4.3426582596997664e-05, + "loss": 1.0147, + "step": 8133 + }, + { + "epoch": 1.3, + "learning_rate": 4.3424838943058906e-05, + "loss": 0.9355, + "step": 8134 + }, + { + "epoch": 1.3, + "learning_rate": 4.3423095092906796e-05, + "loss": 0.9906, + "step": 8135 + }, + { + "epoch": 1.3, + "learning_rate": 4.3421351046559914e-05, + "loss": 0.9323, + "step": 8136 + }, + { + "epoch": 1.3, + "learning_rate": 4.341960680403682e-05, + "loss": 1.0297, + "step": 8137 + }, + { + "epoch": 1.3, + "learning_rate": 4.34178623653561e-05, + "loss": 0.9037, + "step": 8138 + }, + { + "epoch": 1.3, + "learning_rate": 4.3416117730536324e-05, + "loss": 0.9416, + "step": 8139 + }, + { + "epoch": 1.3, + "learning_rate": 4.341437289959608e-05, + "loss": 0.9086, + "step": 8140 + }, + { + "epoch": 1.3, + "learning_rate": 4.341262787255395e-05, + "loss": 0.9276, + "step": 8141 + }, + { + "epoch": 1.3, + "learning_rate": 4.3410882649428494e-05, + "loss": 0.9613, + "step": 8142 + }, + { + "epoch": 1.3, + "learning_rate": 4.3409137230238326e-05, + "loss": 0.9508, + "step": 8143 + }, + { + "epoch": 1.3, + "learning_rate": 4.340739161500202e-05, + "loss": 0.9661, + "step": 8144 + }, + { + "epoch": 1.3, + "learning_rate": 4.340564580373818e-05, + "loss": 0.9694, + "step": 8145 + }, + { + "epoch": 1.3, + "learning_rate": 4.3403899796465375e-05, + "loss": 0.9667, + "step": 8146 + }, + { + "epoch": 1.3, + "learning_rate": 4.340215359320221e-05, + "loss": 0.9631, + "step": 8147 + }, + { + "epoch": 1.3, + "learning_rate": 4.340040719396728e-05, + "loss": 0.957, + "step": 8148 + }, + { + "epoch": 1.3, + "learning_rate": 4.33986605987792e-05, + "loss": 0.9328, + "step": 8149 + }, + { + "epoch": 1.3, + "learning_rate": 4.339691380765654e-05, + "loss": 0.9574, + "step": 8150 + }, + { + "epoch": 1.3, + "learning_rate": 4.339516682061792e-05, + "loss": 0.983, + "step": 8151 + }, + { + "epoch": 1.3, + "learning_rate": 4.339341963768194e-05, + "loss": 1.02, + "step": 8152 + }, + { + "epoch": 1.3, + "learning_rate": 4.339167225886721e-05, + "loss": 0.976, + "step": 8153 + }, + { + "epoch": 1.3, + "learning_rate": 4.338992468419234e-05, + "loss": 1.0601, + "step": 8154 + }, + { + "epoch": 1.3, + "learning_rate": 4.3388176913675934e-05, + "loss": 0.9274, + "step": 8155 + }, + { + "epoch": 1.3, + "learning_rate": 4.33864289473366e-05, + "loss": 0.9896, + "step": 8156 + }, + { + "epoch": 1.3, + "learning_rate": 4.338468078519298e-05, + "loss": 0.9146, + "step": 8157 + }, + { + "epoch": 1.3, + "learning_rate": 4.338293242726365e-05, + "loss": 1.0124, + "step": 8158 + }, + { + "epoch": 1.3, + "learning_rate": 4.3381183873567255e-05, + "loss": 0.9242, + "step": 8159 + }, + { + "epoch": 1.3, + "learning_rate": 4.337943512412242e-05, + "loss": 0.9724, + "step": 8160 + }, + { + "epoch": 1.3, + "learning_rate": 4.337768617894775e-05, + "loss": 0.9561, + "step": 8161 + }, + { + "epoch": 1.3, + "learning_rate": 4.337593703806189e-05, + "loss": 0.9902, + "step": 8162 + }, + { + "epoch": 1.3, + "learning_rate": 4.337418770148345e-05, + "loss": 0.9471, + "step": 8163 + }, + { + "epoch": 1.3, + "learning_rate": 4.3372438169231066e-05, + "loss": 0.9171, + "step": 8164 + }, + { + "epoch": 1.3, + "learning_rate": 4.337068844132337e-05, + "loss": 0.9087, + "step": 8165 + }, + { + "epoch": 1.3, + "learning_rate": 4.336893851777899e-05, + "loss": 0.9093, + "step": 8166 + }, + { + "epoch": 1.3, + "learning_rate": 4.336718839861658e-05, + "loss": 0.9187, + "step": 8167 + }, + { + "epoch": 1.3, + "learning_rate": 4.336543808385475e-05, + "loss": 0.9299, + "step": 8168 + }, + { + "epoch": 1.3, + "learning_rate": 4.336368757351217e-05, + "loss": 0.9681, + "step": 8169 + }, + { + "epoch": 1.3, + "learning_rate": 4.336193686760745e-05, + "loss": 0.9106, + "step": 8170 + }, + { + "epoch": 1.3, + "learning_rate": 4.336018596615926e-05, + "loss": 0.9278, + "step": 8171 + }, + { + "epoch": 1.3, + "learning_rate": 4.3358434869186235e-05, + "loss": 0.9443, + "step": 8172 + }, + { + "epoch": 1.3, + "learning_rate": 4.335668357670702e-05, + "loss": 0.8945, + "step": 8173 + }, + { + "epoch": 1.3, + "learning_rate": 4.3354932088740276e-05, + "loss": 0.9273, + "step": 8174 + }, + { + "epoch": 1.3, + "learning_rate": 4.3353180405304644e-05, + "loss": 0.9522, + "step": 8175 + }, + { + "epoch": 1.3, + "learning_rate": 4.335142852641879e-05, + "loss": 0.8862, + "step": 8176 + }, + { + "epoch": 1.3, + "learning_rate": 4.334967645210136e-05, + "loss": 0.9408, + "step": 8177 + }, + { + "epoch": 1.3, + "learning_rate": 4.3347924182371016e-05, + "loss": 0.8958, + "step": 8178 + }, + { + "epoch": 1.3, + "learning_rate": 4.334617171724641e-05, + "loss": 0.9055, + "step": 8179 + }, + { + "epoch": 1.3, + "learning_rate": 4.3344419056746223e-05, + "loss": 0.8957, + "step": 8180 + }, + { + "epoch": 1.3, + "learning_rate": 4.3342666200889105e-05, + "loss": 0.8698, + "step": 8181 + }, + { + "epoch": 1.3, + "learning_rate": 4.3340913149693726e-05, + "loss": 0.8955, + "step": 8182 + }, + { + "epoch": 1.3, + "learning_rate": 4.333915990317877e-05, + "loss": 0.9618, + "step": 8183 + }, + { + "epoch": 1.3, + "learning_rate": 4.333740646136288e-05, + "loss": 0.9483, + "step": 8184 + }, + { + "epoch": 1.3, + "learning_rate": 4.333565282426476e-05, + "loss": 0.9542, + "step": 8185 + }, + { + "epoch": 1.3, + "learning_rate": 4.3333898991903055e-05, + "loss": 0.9547, + "step": 8186 + }, + { + "epoch": 1.3, + "learning_rate": 4.3332144964296465e-05, + "loss": 0.9746, + "step": 8187 + }, + { + "epoch": 1.3, + "learning_rate": 4.3330390741463654e-05, + "loss": 0.984, + "step": 8188 + }, + { + "epoch": 1.3, + "learning_rate": 4.332863632342332e-05, + "loss": 0.9561, + "step": 8189 + }, + { + "epoch": 1.3, + "learning_rate": 4.3326881710194126e-05, + "loss": 1.0054, + "step": 8190 + }, + { + "epoch": 1.3, + "learning_rate": 4.332512690179478e-05, + "loss": 0.9105, + "step": 8191 + }, + { + "epoch": 1.31, + "learning_rate": 4.332337189824395e-05, + "loss": 0.9362, + "step": 8192 + }, + { + "epoch": 1.31, + "learning_rate": 4.332161669956033e-05, + "loss": 0.8981, + "step": 8193 + }, + { + "epoch": 1.31, + "learning_rate": 4.331986130576262e-05, + "loss": 0.9317, + "step": 8194 + }, + { + "epoch": 1.31, + "learning_rate": 4.3318105716869504e-05, + "loss": 0.8585, + "step": 8195 + }, + { + "epoch": 1.31, + "learning_rate": 4.331634993289969e-05, + "loss": 0.9226, + "step": 8196 + }, + { + "epoch": 1.31, + "learning_rate": 4.331459395387187e-05, + "loss": 0.9422, + "step": 8197 + }, + { + "epoch": 1.31, + "learning_rate": 4.3312837779804746e-05, + "loss": 0.9627, + "step": 8198 + }, + { + "epoch": 1.31, + "learning_rate": 4.3311081410717014e-05, + "loss": 0.9175, + "step": 8199 + }, + { + "epoch": 1.31, + "learning_rate": 4.3309324846627376e-05, + "loss": 0.8912, + "step": 8200 + }, + { + "epoch": 1.31, + "learning_rate": 4.330756808755455e-05, + "loss": 0.9729, + "step": 8201 + }, + { + "epoch": 1.31, + "learning_rate": 4.330581113351724e-05, + "loss": 0.9312, + "step": 8202 + }, + { + "epoch": 1.31, + "learning_rate": 4.330405398453415e-05, + "loss": 0.9611, + "step": 8203 + }, + { + "epoch": 1.31, + "learning_rate": 4.3302296640624004e-05, + "loss": 0.8766, + "step": 8204 + }, + { + "epoch": 1.31, + "learning_rate": 4.33005391018055e-05, + "loss": 0.9617, + "step": 8205 + }, + { + "epoch": 1.31, + "learning_rate": 4.329878136809738e-05, + "loss": 0.9019, + "step": 8206 + }, + { + "epoch": 1.31, + "learning_rate": 4.3297023439518334e-05, + "loss": 1.0267, + "step": 8207 + }, + { + "epoch": 1.31, + "learning_rate": 4.3295265316087105e-05, + "loss": 0.9231, + "step": 8208 + }, + { + "epoch": 1.31, + "learning_rate": 4.329350699782241e-05, + "loss": 0.9029, + "step": 8209 + }, + { + "epoch": 1.31, + "learning_rate": 4.3291748484742965e-05, + "loss": 0.9362, + "step": 8210 + }, + { + "epoch": 1.31, + "learning_rate": 4.328998977686751e-05, + "loss": 1.0409, + "step": 8211 + }, + { + "epoch": 1.31, + "learning_rate": 4.328823087421476e-05, + "loss": 0.9229, + "step": 8212 + }, + { + "epoch": 1.31, + "learning_rate": 4.328647177680346e-05, + "loss": 1.0246, + "step": 8213 + }, + { + "epoch": 1.31, + "learning_rate": 4.328471248465233e-05, + "loss": 0.8857, + "step": 8214 + }, + { + "epoch": 1.31, + "learning_rate": 4.328295299778013e-05, + "loss": 0.9465, + "step": 8215 + }, + { + "epoch": 1.31, + "learning_rate": 4.3281193316205574e-05, + "loss": 0.9278, + "step": 8216 + }, + { + "epoch": 1.31, + "learning_rate": 4.327943343994739e-05, + "loss": 0.8972, + "step": 8217 + }, + { + "epoch": 1.31, + "learning_rate": 4.327767336902436e-05, + "loss": 0.917, + "step": 8218 + }, + { + "epoch": 1.31, + "learning_rate": 4.3275913103455194e-05, + "loss": 0.9549, + "step": 8219 + }, + { + "epoch": 1.31, + "learning_rate": 4.327415264325865e-05, + "loss": 0.8752, + "step": 8220 + }, + { + "epoch": 1.31, + "learning_rate": 4.327239198845348e-05, + "loss": 1.0422, + "step": 8221 + }, + { + "epoch": 1.31, + "learning_rate": 4.327063113905842e-05, + "loss": 0.9299, + "step": 8222 + }, + { + "epoch": 1.31, + "learning_rate": 4.326887009509224e-05, + "loss": 0.9709, + "step": 8223 + }, + { + "epoch": 1.31, + "learning_rate": 4.326710885657368e-05, + "loss": 1.0094, + "step": 8224 + }, + { + "epoch": 1.31, + "learning_rate": 4.32653474235215e-05, + "loss": 0.9573, + "step": 8225 + }, + { + "epoch": 1.31, + "learning_rate": 4.326358579595446e-05, + "loss": 1.0038, + "step": 8226 + }, + { + "epoch": 1.31, + "learning_rate": 4.326182397389132e-05, + "loss": 0.9332, + "step": 8227 + }, + { + "epoch": 1.31, + "learning_rate": 4.326006195735084e-05, + "loss": 0.8968, + "step": 8228 + }, + { + "epoch": 1.31, + "learning_rate": 4.3258299746351795e-05, + "loss": 0.9942, + "step": 8229 + }, + { + "epoch": 1.31, + "learning_rate": 4.325653734091293e-05, + "loss": 0.8206, + "step": 8230 + }, + { + "epoch": 1.31, + "learning_rate": 4.3254774741053025e-05, + "loss": 0.8643, + "step": 8231 + }, + { + "epoch": 1.31, + "learning_rate": 4.325301194679087e-05, + "loss": 1.0005, + "step": 8232 + }, + { + "epoch": 1.31, + "learning_rate": 4.32512489581452e-05, + "loss": 0.9115, + "step": 8233 + }, + { + "epoch": 1.31, + "learning_rate": 4.324948577513482e-05, + "loss": 0.9515, + "step": 8234 + }, + { + "epoch": 1.31, + "learning_rate": 4.324772239777849e-05, + "loss": 0.8836, + "step": 8235 + }, + { + "epoch": 1.31, + "learning_rate": 4.3245958826095e-05, + "loss": 0.9877, + "step": 8236 + }, + { + "epoch": 1.31, + "learning_rate": 4.324419506010312e-05, + "loss": 0.9574, + "step": 8237 + }, + { + "epoch": 1.31, + "learning_rate": 4.324243109982164e-05, + "loss": 0.9037, + "step": 8238 + }, + { + "epoch": 1.31, + "learning_rate": 4.324066694526935e-05, + "loss": 0.9111, + "step": 8239 + }, + { + "epoch": 1.31, + "learning_rate": 4.3238902596465025e-05, + "loss": 0.9431, + "step": 8240 + }, + { + "epoch": 1.31, + "learning_rate": 4.323713805342747e-05, + "loss": 0.9485, + "step": 8241 + }, + { + "epoch": 1.31, + "learning_rate": 4.323537331617545e-05, + "loss": 0.9121, + "step": 8242 + }, + { + "epoch": 1.31, + "learning_rate": 4.323360838472779e-05, + "loss": 0.9234, + "step": 8243 + }, + { + "epoch": 1.31, + "learning_rate": 4.3231843259103265e-05, + "loss": 0.9598, + "step": 8244 + }, + { + "epoch": 1.31, + "learning_rate": 4.323007793932068e-05, + "loss": 0.9199, + "step": 8245 + }, + { + "epoch": 1.31, + "learning_rate": 4.322831242539883e-05, + "loss": 0.9513, + "step": 8246 + }, + { + "epoch": 1.31, + "learning_rate": 4.322654671735652e-05, + "loss": 0.9511, + "step": 8247 + }, + { + "epoch": 1.31, + "learning_rate": 4.322478081521256e-05, + "loss": 0.8563, + "step": 8248 + }, + { + "epoch": 1.31, + "learning_rate": 4.3223014718985744e-05, + "loss": 0.9555, + "step": 8249 + }, + { + "epoch": 1.31, + "learning_rate": 4.3221248428694884e-05, + "loss": 0.9443, + "step": 8250 + }, + { + "epoch": 1.31, + "learning_rate": 4.3219481944358784e-05, + "loss": 0.8917, + "step": 8251 + }, + { + "epoch": 1.31, + "learning_rate": 4.321771526599627e-05, + "loss": 0.8949, + "step": 8252 + }, + { + "epoch": 1.31, + "learning_rate": 4.321594839362615e-05, + "loss": 0.8947, + "step": 8253 + }, + { + "epoch": 1.31, + "learning_rate": 4.321418132726723e-05, + "loss": 0.9255, + "step": 8254 + }, + { + "epoch": 1.32, + "learning_rate": 4.321241406693834e-05, + "loss": 0.9837, + "step": 8255 + }, + { + "epoch": 1.32, + "learning_rate": 4.321064661265831e-05, + "loss": 0.9221, + "step": 8256 + }, + { + "epoch": 1.32, + "learning_rate": 4.3208878964445936e-05, + "loss": 0.9173, + "step": 8257 + }, + { + "epoch": 1.32, + "learning_rate": 4.320711112232007e-05, + "loss": 0.9905, + "step": 8258 + }, + { + "epoch": 1.32, + "learning_rate": 4.320534308629951e-05, + "loss": 0.8543, + "step": 8259 + }, + { + "epoch": 1.32, + "learning_rate": 4.320357485640309e-05, + "loss": 0.9514, + "step": 8260 + }, + { + "epoch": 1.32, + "learning_rate": 4.320180643264967e-05, + "loss": 0.9945, + "step": 8261 + }, + { + "epoch": 1.32, + "learning_rate": 4.320003781505806e-05, + "loss": 0.8411, + "step": 8262 + }, + { + "epoch": 1.32, + "learning_rate": 4.3198269003647085e-05, + "loss": 0.9963, + "step": 8263 + }, + { + "epoch": 1.32, + "learning_rate": 4.3196499998435604e-05, + "loss": 1.0034, + "step": 8264 + }, + { + "epoch": 1.32, + "learning_rate": 4.319473079944244e-05, + "loss": 0.9242, + "step": 8265 + }, + { + "epoch": 1.32, + "learning_rate": 4.319296140668644e-05, + "loss": 1.0025, + "step": 8266 + }, + { + "epoch": 1.32, + "learning_rate": 4.319119182018645e-05, + "loss": 1.0594, + "step": 8267 + }, + { + "epoch": 1.32, + "learning_rate": 4.3189422039961304e-05, + "loss": 0.9137, + "step": 8268 + }, + { + "epoch": 1.32, + "learning_rate": 4.318765206602986e-05, + "loss": 0.9432, + "step": 8269 + }, + { + "epoch": 1.32, + "learning_rate": 4.318588189841096e-05, + "loss": 1.0043, + "step": 8270 + }, + { + "epoch": 1.32, + "learning_rate": 4.318411153712347e-05, + "loss": 0.9742, + "step": 8271 + }, + { + "epoch": 1.32, + "learning_rate": 4.318234098218622e-05, + "loss": 0.9256, + "step": 8272 + }, + { + "epoch": 1.32, + "learning_rate": 4.3180570233618086e-05, + "loss": 0.8392, + "step": 8273 + }, + { + "epoch": 1.32, + "learning_rate": 4.317879929143791e-05, + "loss": 0.8299, + "step": 8274 + }, + { + "epoch": 1.32, + "learning_rate": 4.3177028155664564e-05, + "loss": 0.964, + "step": 8275 + }, + { + "epoch": 1.32, + "learning_rate": 4.31752568263169e-05, + "loss": 0.9389, + "step": 8276 + }, + { + "epoch": 1.32, + "learning_rate": 4.317348530341378e-05, + "loss": 1.0553, + "step": 8277 + }, + { + "epoch": 1.32, + "learning_rate": 4.317171358697408e-05, + "loss": 1.0019, + "step": 8278 + }, + { + "epoch": 1.32, + "learning_rate": 4.316994167701667e-05, + "loss": 0.8952, + "step": 8279 + }, + { + "epoch": 1.32, + "learning_rate": 4.3168169573560405e-05, + "loss": 0.9366, + "step": 8280 + }, + { + "epoch": 1.32, + "learning_rate": 4.316639727662416e-05, + "loss": 0.9833, + "step": 8281 + }, + { + "epoch": 1.32, + "learning_rate": 4.316462478622682e-05, + "loss": 0.9281, + "step": 8282 + }, + { + "epoch": 1.32, + "learning_rate": 4.316285210238725e-05, + "loss": 0.9419, + "step": 8283 + }, + { + "epoch": 1.32, + "learning_rate": 4.3161079225124334e-05, + "loss": 0.9106, + "step": 8284 + }, + { + "epoch": 1.32, + "learning_rate": 4.3159306154456954e-05, + "loss": 0.9129, + "step": 8285 + }, + { + "epoch": 1.32, + "learning_rate": 4.3157532890403984e-05, + "loss": 0.9533, + "step": 8286 + }, + { + "epoch": 1.32, + "learning_rate": 4.315575943298431e-05, + "loss": 0.8684, + "step": 8287 + }, + { + "epoch": 1.32, + "learning_rate": 4.315398578221682e-05, + "loss": 0.9111, + "step": 8288 + }, + { + "epoch": 1.32, + "learning_rate": 4.315221193812041e-05, + "loss": 0.868, + "step": 8289 + }, + { + "epoch": 1.32, + "learning_rate": 4.315043790071396e-05, + "loss": 0.91, + "step": 8290 + }, + { + "epoch": 1.32, + "learning_rate": 4.3148663670016364e-05, + "loss": 0.914, + "step": 8291 + }, + { + "epoch": 1.32, + "learning_rate": 4.314688924604652e-05, + "loss": 0.8366, + "step": 8292 + }, + { + "epoch": 1.32, + "learning_rate": 4.314511462882333e-05, + "loss": 0.9441, + "step": 8293 + }, + { + "epoch": 1.32, + "learning_rate": 4.314333981836567e-05, + "loss": 0.9111, + "step": 8294 + }, + { + "epoch": 1.32, + "learning_rate": 4.3141564814692466e-05, + "loss": 0.8462, + "step": 8295 + }, + { + "epoch": 1.32, + "learning_rate": 4.3139789617822606e-05, + "loss": 0.8513, + "step": 8296 + }, + { + "epoch": 1.32, + "learning_rate": 4.3138014227775004e-05, + "loss": 0.9413, + "step": 8297 + }, + { + "epoch": 1.32, + "learning_rate": 4.313623864456856e-05, + "loss": 1.0055, + "step": 8298 + }, + { + "epoch": 1.32, + "learning_rate": 4.313446286822219e-05, + "loss": 0.9054, + "step": 8299 + }, + { + "epoch": 1.32, + "learning_rate": 4.3132686898754795e-05, + "loss": 0.9191, + "step": 8300 + }, + { + "epoch": 1.32, + "learning_rate": 4.3130910736185294e-05, + "loss": 0.8474, + "step": 8301 + }, + { + "epoch": 1.32, + "learning_rate": 4.31291343805326e-05, + "loss": 0.8681, + "step": 8302 + }, + { + "epoch": 1.32, + "learning_rate": 4.3127357831815626e-05, + "loss": 0.8979, + "step": 8303 + }, + { + "epoch": 1.32, + "learning_rate": 4.3125581090053305e-05, + "loss": 0.9049, + "step": 8304 + }, + { + "epoch": 1.32, + "learning_rate": 4.312380415526455e-05, + "loss": 0.9767, + "step": 8305 + }, + { + "epoch": 1.32, + "learning_rate": 4.312202702746828e-05, + "loss": 0.917, + "step": 8306 + }, + { + "epoch": 1.32, + "learning_rate": 4.3120249706683425e-05, + "loss": 0.9181, + "step": 8307 + }, + { + "epoch": 1.32, + "learning_rate": 4.311847219292892e-05, + "loss": 0.9272, + "step": 8308 + }, + { + "epoch": 1.32, + "learning_rate": 4.3116694486223675e-05, + "loss": 0.9572, + "step": 8309 + }, + { + "epoch": 1.32, + "learning_rate": 4.311491658658663e-05, + "loss": 0.8752, + "step": 8310 + }, + { + "epoch": 1.32, + "learning_rate": 4.311313849403673e-05, + "loss": 0.9628, + "step": 8311 + }, + { + "epoch": 1.32, + "learning_rate": 4.31113602085929e-05, + "loss": 0.8747, + "step": 8312 + }, + { + "epoch": 1.32, + "learning_rate": 4.3109581730274076e-05, + "loss": 0.8192, + "step": 8313 + }, + { + "epoch": 1.32, + "learning_rate": 4.3107803059099206e-05, + "loss": 0.9024, + "step": 8314 + }, + { + "epoch": 1.32, + "learning_rate": 4.310602419508722e-05, + "loss": 0.8569, + "step": 8315 + }, + { + "epoch": 1.32, + "learning_rate": 4.3104245138257084e-05, + "loss": 0.9915, + "step": 8316 + }, + { + "epoch": 1.32, + "learning_rate": 4.310246588862772e-05, + "loss": 1.0549, + "step": 8317 + }, + { + "epoch": 1.33, + "learning_rate": 4.3100686446218075e-05, + "loss": 0.9105, + "step": 8318 + }, + { + "epoch": 1.33, + "learning_rate": 4.3098906811047116e-05, + "loss": 0.937, + "step": 8319 + }, + { + "epoch": 1.33, + "learning_rate": 4.309712698313379e-05, + "loss": 0.924, + "step": 8320 + }, + { + "epoch": 1.33, + "learning_rate": 4.309534696249705e-05, + "loss": 0.9244, + "step": 8321 + }, + { + "epoch": 1.33, + "learning_rate": 4.309356674915585e-05, + "loss": 1.0246, + "step": 8322 + }, + { + "epoch": 1.33, + "learning_rate": 4.3091786343129145e-05, + "loss": 0.8946, + "step": 8323 + }, + { + "epoch": 1.33, + "learning_rate": 4.30900057444359e-05, + "loss": 0.9613, + "step": 8324 + }, + { + "epoch": 1.33, + "learning_rate": 4.3088224953095076e-05, + "loss": 1.0093, + "step": 8325 + }, + { + "epoch": 1.33, + "learning_rate": 4.308644396912563e-05, + "loss": 0.9408, + "step": 8326 + }, + { + "epoch": 1.33, + "learning_rate": 4.308466279254655e-05, + "loss": 0.972, + "step": 8327 + }, + { + "epoch": 1.33, + "learning_rate": 4.308288142337679e-05, + "loss": 0.9633, + "step": 8328 + }, + { + "epoch": 1.33, + "learning_rate": 4.3081099861635315e-05, + "loss": 0.9022, + "step": 8329 + }, + { + "epoch": 1.33, + "learning_rate": 4.3079318107341106e-05, + "loss": 0.9574, + "step": 8330 + }, + { + "epoch": 1.33, + "learning_rate": 4.307753616051313e-05, + "loss": 0.9425, + "step": 8331 + }, + { + "epoch": 1.33, + "learning_rate": 4.3075754021170375e-05, + "loss": 0.9476, + "step": 8332 + }, + { + "epoch": 1.33, + "learning_rate": 4.307397168933181e-05, + "loss": 0.9144, + "step": 8333 + }, + { + "epoch": 1.33, + "learning_rate": 4.307218916501643e-05, + "loss": 0.9143, + "step": 8334 + }, + { + "epoch": 1.33, + "learning_rate": 4.30704064482432e-05, + "loss": 0.9762, + "step": 8335 + }, + { + "epoch": 1.33, + "learning_rate": 4.306862353903111e-05, + "loss": 0.892, + "step": 8336 + }, + { + "epoch": 1.33, + "learning_rate": 4.3066840437399156e-05, + "loss": 0.8926, + "step": 8337 + }, + { + "epoch": 1.33, + "learning_rate": 4.3065057143366314e-05, + "loss": 1.0117, + "step": 8338 + }, + { + "epoch": 1.33, + "learning_rate": 4.306327365695158e-05, + "loss": 0.9606, + "step": 8339 + }, + { + "epoch": 1.33, + "learning_rate": 4.3061489978173953e-05, + "loss": 0.9896, + "step": 8340 + }, + { + "epoch": 1.33, + "learning_rate": 4.305970610705242e-05, + "loss": 0.9461, + "step": 8341 + }, + { + "epoch": 1.33, + "learning_rate": 4.305792204360598e-05, + "loss": 0.966, + "step": 8342 + }, + { + "epoch": 1.33, + "learning_rate": 4.305613778785363e-05, + "loss": 0.9481, + "step": 8343 + }, + { + "epoch": 1.33, + "learning_rate": 4.305435333981438e-05, + "loss": 1.0136, + "step": 8344 + }, + { + "epoch": 1.33, + "learning_rate": 4.305256869950723e-05, + "loss": 0.9205, + "step": 8345 + }, + { + "epoch": 1.33, + "learning_rate": 4.305078386695118e-05, + "loss": 0.8635, + "step": 8346 + }, + { + "epoch": 1.33, + "learning_rate": 4.304899884216525e-05, + "loss": 0.8994, + "step": 8347 + }, + { + "epoch": 1.33, + "learning_rate": 4.304721362516843e-05, + "loss": 0.9108, + "step": 8348 + }, + { + "epoch": 1.33, + "learning_rate": 4.3045428215979743e-05, + "loss": 0.8927, + "step": 8349 + }, + { + "epoch": 1.33, + "learning_rate": 4.30436426146182e-05, + "loss": 0.9142, + "step": 8350 + }, + { + "epoch": 1.33, + "learning_rate": 4.304185682110282e-05, + "loss": 0.9039, + "step": 8351 + }, + { + "epoch": 1.33, + "learning_rate": 4.304007083545262e-05, + "loss": 0.9346, + "step": 8352 + }, + { + "epoch": 1.33, + "learning_rate": 4.303828465768661e-05, + "loss": 0.8486, + "step": 8353 + }, + { + "epoch": 1.33, + "learning_rate": 4.303649828782383e-05, + "loss": 0.9176, + "step": 8354 + }, + { + "epoch": 1.33, + "learning_rate": 4.303471172588329e-05, + "loss": 1.0529, + "step": 8355 + }, + { + "epoch": 1.33, + "learning_rate": 4.3032924971884016e-05, + "loss": 0.8666, + "step": 8356 + }, + { + "epoch": 1.33, + "learning_rate": 4.303113802584504e-05, + "loss": 0.9023, + "step": 8357 + }, + { + "epoch": 1.33, + "learning_rate": 4.302935088778539e-05, + "loss": 0.9593, + "step": 8358 + }, + { + "epoch": 1.33, + "learning_rate": 4.302756355772411e-05, + "loss": 0.8999, + "step": 8359 + }, + { + "epoch": 1.33, + "learning_rate": 4.302577603568021e-05, + "loss": 0.9023, + "step": 8360 + }, + { + "epoch": 1.33, + "learning_rate": 4.3023988321672736e-05, + "loss": 0.9156, + "step": 8361 + }, + { + "epoch": 1.33, + "learning_rate": 4.302220041572074e-05, + "loss": 0.8856, + "step": 8362 + }, + { + "epoch": 1.33, + "learning_rate": 4.302041231784325e-05, + "loss": 1.0034, + "step": 8363 + }, + { + "epoch": 1.33, + "learning_rate": 4.301862402805931e-05, + "loss": 0.9416, + "step": 8364 + }, + { + "epoch": 1.33, + "learning_rate": 4.3016835546387954e-05, + "loss": 0.8901, + "step": 8365 + }, + { + "epoch": 1.33, + "learning_rate": 4.301504687284824e-05, + "loss": 0.8492, + "step": 8366 + }, + { + "epoch": 1.33, + "learning_rate": 4.3013258007459225e-05, + "loss": 0.9208, + "step": 8367 + }, + { + "epoch": 1.33, + "learning_rate": 4.301146895023993e-05, + "loss": 0.9414, + "step": 8368 + }, + { + "epoch": 1.33, + "learning_rate": 4.300967970120944e-05, + "loss": 0.8831, + "step": 8369 + }, + { + "epoch": 1.33, + "learning_rate": 4.3007890260386796e-05, + "loss": 0.9076, + "step": 8370 + }, + { + "epoch": 1.33, + "learning_rate": 4.300610062779105e-05, + "loss": 0.8705, + "step": 8371 + }, + { + "epoch": 1.33, + "learning_rate": 4.300431080344126e-05, + "loss": 0.8997, + "step": 8372 + }, + { + "epoch": 1.33, + "learning_rate": 4.300252078735649e-05, + "loss": 1.0745, + "step": 8373 + }, + { + "epoch": 1.33, + "learning_rate": 4.300073057955582e-05, + "loss": 0.9672, + "step": 8374 + }, + { + "epoch": 1.33, + "learning_rate": 4.2998940180058275e-05, + "loss": 0.9268, + "step": 8375 + }, + { + "epoch": 1.33, + "learning_rate": 4.2997149588882956e-05, + "loss": 0.9631, + "step": 8376 + }, + { + "epoch": 1.33, + "learning_rate": 4.2995358806048916e-05, + "loss": 0.9221, + "step": 8377 + }, + { + "epoch": 1.33, + "learning_rate": 4.299356783157523e-05, + "loss": 0.9821, + "step": 8378 + }, + { + "epoch": 1.33, + "learning_rate": 4.2991776665480974e-05, + "loss": 0.9028, + "step": 8379 + }, + { + "epoch": 1.34, + "learning_rate": 4.298998530778523e-05, + "loss": 0.9072, + "step": 8380 + }, + { + "epoch": 1.34, + "learning_rate": 4.2988193758507044e-05, + "loss": 0.934, + "step": 8381 + }, + { + "epoch": 1.34, + "learning_rate": 4.298640201766553e-05, + "loss": 0.9166, + "step": 8382 + }, + { + "epoch": 1.34, + "learning_rate": 4.298461008527975e-05, + "loss": 0.861, + "step": 8383 + }, + { + "epoch": 1.34, + "learning_rate": 4.298281796136879e-05, + "loss": 0.8905, + "step": 8384 + }, + { + "epoch": 1.34, + "learning_rate": 4.298102564595173e-05, + "loss": 0.8931, + "step": 8385 + }, + { + "epoch": 1.34, + "learning_rate": 4.297923313904768e-05, + "loss": 1.0154, + "step": 8386 + }, + { + "epoch": 1.34, + "learning_rate": 4.2977440440675697e-05, + "loss": 0.8946, + "step": 8387 + }, + { + "epoch": 1.34, + "learning_rate": 4.297564755085489e-05, + "loss": 0.932, + "step": 8388 + }, + { + "epoch": 1.34, + "learning_rate": 4.2973854469604356e-05, + "loss": 0.8878, + "step": 8389 + }, + { + "epoch": 1.34, + "learning_rate": 4.2972061196943185e-05, + "loss": 0.8851, + "step": 8390 + }, + { + "epoch": 1.34, + "learning_rate": 4.297026773289047e-05, + "loss": 0.8407, + "step": 8391 + }, + { + "epoch": 1.34, + "learning_rate": 4.296847407746531e-05, + "loss": 0.9628, + "step": 8392 + }, + { + "epoch": 1.34, + "learning_rate": 4.2966680230686816e-05, + "loss": 0.9346, + "step": 8393 + }, + { + "epoch": 1.34, + "learning_rate": 4.296488619257408e-05, + "loss": 0.9391, + "step": 8394 + }, + { + "epoch": 1.34, + "learning_rate": 4.296309196314622e-05, + "loss": 1.0248, + "step": 8395 + }, + { + "epoch": 1.34, + "learning_rate": 4.296129754242233e-05, + "loss": 0.9707, + "step": 8396 + }, + { + "epoch": 1.34, + "learning_rate": 4.2959502930421525e-05, + "loss": 0.9584, + "step": 8397 + }, + { + "epoch": 1.34, + "learning_rate": 4.295770812716292e-05, + "loss": 0.9477, + "step": 8398 + }, + { + "epoch": 1.34, + "learning_rate": 4.295591313266563e-05, + "loss": 0.9137, + "step": 8399 + }, + { + "epoch": 1.34, + "learning_rate": 4.2954117946948766e-05, + "loss": 0.9714, + "step": 8400 + }, + { + "epoch": 1.34, + "learning_rate": 4.2952322570031434e-05, + "loss": 0.929, + "step": 8401 + }, + { + "epoch": 1.34, + "learning_rate": 4.2950527001932775e-05, + "loss": 0.9624, + "step": 8402 + }, + { + "epoch": 1.34, + "learning_rate": 4.29487312426719e-05, + "loss": 0.968, + "step": 8403 + }, + { + "epoch": 1.34, + "learning_rate": 4.294693529226794e-05, + "loss": 0.984, + "step": 8404 + }, + { + "epoch": 1.34, + "learning_rate": 4.294513915074001e-05, + "loss": 0.8555, + "step": 8405 + }, + { + "epoch": 1.34, + "learning_rate": 4.294334281810724e-05, + "loss": 0.9341, + "step": 8406 + }, + { + "epoch": 1.34, + "learning_rate": 4.294154629438877e-05, + "loss": 0.894, + "step": 8407 + }, + { + "epoch": 1.34, + "learning_rate": 4.293974957960372e-05, + "loss": 0.9486, + "step": 8408 + }, + { + "epoch": 1.34, + "learning_rate": 4.293795267377122e-05, + "loss": 0.9254, + "step": 8409 + }, + { + "epoch": 1.34, + "learning_rate": 4.293615557691043e-05, + "loss": 0.9113, + "step": 8410 + }, + { + "epoch": 1.34, + "learning_rate": 4.293435828904046e-05, + "loss": 0.8652, + "step": 8411 + }, + { + "epoch": 1.34, + "learning_rate": 4.293256081018047e-05, + "loss": 0.9564, + "step": 8412 + }, + { + "epoch": 1.34, + "learning_rate": 4.293076314034959e-05, + "loss": 0.9467, + "step": 8413 + }, + { + "epoch": 1.34, + "learning_rate": 4.292896527956697e-05, + "loss": 0.9157, + "step": 8414 + }, + { + "epoch": 1.34, + "learning_rate": 4.2927167227851755e-05, + "loss": 0.9445, + "step": 8415 + }, + { + "epoch": 1.34, + "learning_rate": 4.292536898522309e-05, + "loss": 0.9139, + "step": 8416 + }, + { + "epoch": 1.34, + "learning_rate": 4.292357055170013e-05, + "loss": 1.0317, + "step": 8417 + }, + { + "epoch": 1.34, + "learning_rate": 4.292177192730203e-05, + "loss": 0.8706, + "step": 8418 + }, + { + "epoch": 1.34, + "learning_rate": 4.291997311204794e-05, + "loss": 0.9059, + "step": 8419 + }, + { + "epoch": 1.34, + "learning_rate": 4.291817410595701e-05, + "loss": 0.9196, + "step": 8420 + }, + { + "epoch": 1.34, + "learning_rate": 4.291637490904841e-05, + "loss": 0.9392, + "step": 8421 + }, + { + "epoch": 1.34, + "learning_rate": 4.291457552134128e-05, + "loss": 0.9486, + "step": 8422 + }, + { + "epoch": 1.34, + "learning_rate": 4.291277594285481e-05, + "loss": 0.9485, + "step": 8423 + }, + { + "epoch": 1.34, + "learning_rate": 4.2910976173608155e-05, + "loss": 0.9805, + "step": 8424 + }, + { + "epoch": 1.34, + "learning_rate": 4.290917621362047e-05, + "loss": 0.9134, + "step": 8425 + }, + { + "epoch": 1.34, + "learning_rate": 4.290737606291093e-05, + "loss": 1.0128, + "step": 8426 + }, + { + "epoch": 1.34, + "learning_rate": 4.290557572149871e-05, + "loss": 1.0002, + "step": 8427 + }, + { + "epoch": 1.34, + "learning_rate": 4.2903775189402976e-05, + "loss": 0.8947, + "step": 8428 + }, + { + "epoch": 1.34, + "learning_rate": 4.290197446664291e-05, + "loss": 0.9679, + "step": 8429 + }, + { + "epoch": 1.34, + "learning_rate": 4.2900173553237685e-05, + "loss": 0.9139, + "step": 8430 + }, + { + "epoch": 1.34, + "learning_rate": 4.2898372449206474e-05, + "loss": 0.9421, + "step": 8431 + }, + { + "epoch": 1.34, + "learning_rate": 4.289657115456847e-05, + "loss": 0.9102, + "step": 8432 + }, + { + "epoch": 1.34, + "learning_rate": 4.2894769669342846e-05, + "loss": 0.9508, + "step": 8433 + }, + { + "epoch": 1.34, + "learning_rate": 4.2892967993548784e-05, + "loss": 0.9246, + "step": 8434 + }, + { + "epoch": 1.34, + "learning_rate": 4.2891166127205484e-05, + "loss": 0.9761, + "step": 8435 + }, + { + "epoch": 1.34, + "learning_rate": 4.288936407033212e-05, + "loss": 0.9395, + "step": 8436 + }, + { + "epoch": 1.34, + "learning_rate": 4.288756182294789e-05, + "loss": 0.8896, + "step": 8437 + }, + { + "epoch": 1.34, + "learning_rate": 4.288575938507199e-05, + "loss": 0.9263, + "step": 8438 + }, + { + "epoch": 1.34, + "learning_rate": 4.288395675672361e-05, + "loss": 0.9221, + "step": 8439 + }, + { + "epoch": 1.34, + "learning_rate": 4.288215393792194e-05, + "loss": 0.9452, + "step": 8440 + }, + { + "epoch": 1.34, + "learning_rate": 4.2880350928686205e-05, + "loss": 0.8749, + "step": 8441 + }, + { + "epoch": 1.34, + "learning_rate": 4.287854772903558e-05, + "loss": 0.8789, + "step": 8442 + }, + { + "epoch": 1.35, + "learning_rate": 4.287674433898927e-05, + "loss": 1.0549, + "step": 8443 + }, + { + "epoch": 1.35, + "learning_rate": 4.287494075856648e-05, + "loss": 0.9578, + "step": 8444 + }, + { + "epoch": 1.35, + "learning_rate": 4.2873136987786445e-05, + "loss": 1.0129, + "step": 8445 + }, + { + "epoch": 1.35, + "learning_rate": 4.287133302666834e-05, + "loss": 0.9701, + "step": 8446 + }, + { + "epoch": 1.35, + "learning_rate": 4.286952887523138e-05, + "loss": 0.899, + "step": 8447 + }, + { + "epoch": 1.35, + "learning_rate": 4.28677245334948e-05, + "loss": 0.8735, + "step": 8448 + }, + { + "epoch": 1.35, + "learning_rate": 4.2865920001477795e-05, + "loss": 0.859, + "step": 8449 + }, + { + "epoch": 1.35, + "learning_rate": 4.286411527919959e-05, + "loss": 0.8564, + "step": 8450 + }, + { + "epoch": 1.35, + "learning_rate": 4.28623103666794e-05, + "loss": 0.9324, + "step": 8451 + }, + { + "epoch": 1.35, + "learning_rate": 4.286050526393646e-05, + "loss": 0.9752, + "step": 8452 + }, + { + "epoch": 1.35, + "learning_rate": 4.2858699970989965e-05, + "loss": 1.0694, + "step": 8453 + }, + { + "epoch": 1.35, + "learning_rate": 4.2856894487859166e-05, + "loss": 0.9139, + "step": 8454 + }, + { + "epoch": 1.35, + "learning_rate": 4.285508881456329e-05, + "loss": 0.9223, + "step": 8455 + }, + { + "epoch": 1.35, + "learning_rate": 4.285328295112155e-05, + "loss": 0.8234, + "step": 8456 + }, + { + "epoch": 1.35, + "learning_rate": 4.2851476897553186e-05, + "loss": 0.9139, + "step": 8457 + }, + { + "epoch": 1.35, + "learning_rate": 4.2849670653877426e-05, + "loss": 0.9746, + "step": 8458 + }, + { + "epoch": 1.35, + "learning_rate": 4.284786422011352e-05, + "loss": 0.9875, + "step": 8459 + }, + { + "epoch": 1.35, + "learning_rate": 4.284605759628069e-05, + "loss": 0.8535, + "step": 8460 + }, + { + "epoch": 1.35, + "learning_rate": 4.284425078239818e-05, + "loss": 0.8805, + "step": 8461 + }, + { + "epoch": 1.35, + "learning_rate": 4.284244377848523e-05, + "loss": 0.8811, + "step": 8462 + }, + { + "epoch": 1.35, + "learning_rate": 4.28406365845611e-05, + "loss": 0.9623, + "step": 8463 + }, + { + "epoch": 1.35, + "learning_rate": 4.2838829200645015e-05, + "loss": 0.9, + "step": 8464 + }, + { + "epoch": 1.35, + "learning_rate": 4.283702162675622e-05, + "loss": 0.9361, + "step": 8465 + }, + { + "epoch": 1.35, + "learning_rate": 4.283521386291398e-05, + "loss": 1.0155, + "step": 8466 + }, + { + "epoch": 1.35, + "learning_rate": 4.283340590913755e-05, + "loss": 0.8875, + "step": 8467 + }, + { + "epoch": 1.35, + "learning_rate": 4.283159776544616e-05, + "loss": 0.9597, + "step": 8468 + }, + { + "epoch": 1.35, + "learning_rate": 4.282978943185909e-05, + "loss": 0.9216, + "step": 8469 + }, + { + "epoch": 1.35, + "learning_rate": 4.282798090839558e-05, + "loss": 0.9493, + "step": 8470 + }, + { + "epoch": 1.35, + "learning_rate": 4.2826172195074896e-05, + "loss": 1.0808, + "step": 8471 + }, + { + "epoch": 1.35, + "learning_rate": 4.28243632919163e-05, + "loss": 0.8396, + "step": 8472 + }, + { + "epoch": 1.35, + "learning_rate": 4.282255419893907e-05, + "loss": 0.9852, + "step": 8473 + }, + { + "epoch": 1.35, + "learning_rate": 4.2820744916162445e-05, + "loss": 0.9188, + "step": 8474 + }, + { + "epoch": 1.35, + "learning_rate": 4.281893544360571e-05, + "loss": 0.9857, + "step": 8475 + }, + { + "epoch": 1.35, + "learning_rate": 4.281712578128812e-05, + "loss": 0.9157, + "step": 8476 + }, + { + "epoch": 1.35, + "learning_rate": 4.2815315929228975e-05, + "loss": 0.8549, + "step": 8477 + }, + { + "epoch": 1.35, + "learning_rate": 4.2813505887447525e-05, + "loss": 0.8912, + "step": 8478 + }, + { + "epoch": 1.35, + "learning_rate": 4.2811695655963045e-05, + "loss": 0.8663, + "step": 8479 + }, + { + "epoch": 1.35, + "learning_rate": 4.280988523479483e-05, + "loss": 0.9249, + "step": 8480 + }, + { + "epoch": 1.35, + "learning_rate": 4.280807462396215e-05, + "loss": 0.9255, + "step": 8481 + }, + { + "epoch": 1.35, + "learning_rate": 4.280626382348428e-05, + "loss": 0.8511, + "step": 8482 + }, + { + "epoch": 1.35, + "learning_rate": 4.280445283338051e-05, + "loss": 0.9322, + "step": 8483 + }, + { + "epoch": 1.35, + "learning_rate": 4.280264165367013e-05, + "loss": 0.9959, + "step": 8484 + }, + { + "epoch": 1.35, + "learning_rate": 4.280083028437243e-05, + "loss": 0.9002, + "step": 8485 + }, + { + "epoch": 1.35, + "learning_rate": 4.279901872550669e-05, + "loss": 0.9384, + "step": 8486 + }, + { + "epoch": 1.35, + "learning_rate": 4.27972069770922e-05, + "loss": 0.8356, + "step": 8487 + }, + { + "epoch": 1.35, + "learning_rate": 4.279539503914827e-05, + "loss": 0.8628, + "step": 8488 + }, + { + "epoch": 1.35, + "learning_rate": 4.279358291169418e-05, + "loss": 0.8863, + "step": 8489 + }, + { + "epoch": 1.35, + "learning_rate": 4.2791770594749236e-05, + "loss": 0.9066, + "step": 8490 + }, + { + "epoch": 1.35, + "learning_rate": 4.278995808833274e-05, + "loss": 0.9325, + "step": 8491 + }, + { + "epoch": 1.35, + "learning_rate": 4.278814539246399e-05, + "loss": 0.9681, + "step": 8492 + }, + { + "epoch": 1.35, + "learning_rate": 4.278633250716229e-05, + "loss": 0.9139, + "step": 8493 + }, + { + "epoch": 1.35, + "learning_rate": 4.2784519432446954e-05, + "loss": 0.9375, + "step": 8494 + }, + { + "epoch": 1.35, + "learning_rate": 4.278270616833727e-05, + "loss": 0.9505, + "step": 8495 + }, + { + "epoch": 1.35, + "learning_rate": 4.278089271485257e-05, + "loss": 0.885, + "step": 8496 + }, + { + "epoch": 1.35, + "learning_rate": 4.277907907201216e-05, + "loss": 0.9404, + "step": 8497 + }, + { + "epoch": 1.35, + "learning_rate": 4.2777265239835345e-05, + "loss": 1.0203, + "step": 8498 + }, + { + "epoch": 1.35, + "learning_rate": 4.277545121834145e-05, + "loss": 0.9691, + "step": 8499 + }, + { + "epoch": 1.35, + "learning_rate": 4.2773637007549796e-05, + "loss": 0.9535, + "step": 8500 + }, + { + "epoch": 1.35, + "learning_rate": 4.277182260747969e-05, + "loss": 0.8952, + "step": 8501 + }, + { + "epoch": 1.35, + "learning_rate": 4.2770008018150473e-05, + "loss": 0.9417, + "step": 8502 + }, + { + "epoch": 1.35, + "learning_rate": 4.2768193239581456e-05, + "loss": 0.9556, + "step": 8503 + }, + { + "epoch": 1.35, + "learning_rate": 4.2766378271791965e-05, + "loss": 0.8938, + "step": 8504 + }, + { + "epoch": 1.35, + "learning_rate": 4.276456311480133e-05, + "loss": 0.894, + "step": 8505 + }, + { + "epoch": 1.36, + "learning_rate": 4.2762747768628886e-05, + "loss": 1.0112, + "step": 8506 + }, + { + "epoch": 1.36, + "learning_rate": 4.276093223329396e-05, + "loss": 0.8972, + "step": 8507 + }, + { + "epoch": 1.36, + "learning_rate": 4.275911650881589e-05, + "loss": 0.9517, + "step": 8508 + }, + { + "epoch": 1.36, + "learning_rate": 4.2757300595214014e-05, + "loss": 0.9437, + "step": 8509 + }, + { + "epoch": 1.36, + "learning_rate": 4.275548449250766e-05, + "loss": 1.0062, + "step": 8510 + }, + { + "epoch": 1.36, + "learning_rate": 4.275366820071618e-05, + "loss": 0.8583, + "step": 8511 + }, + { + "epoch": 1.36, + "learning_rate": 4.275185171985892e-05, + "loss": 0.9669, + "step": 8512 + }, + { + "epoch": 1.36, + "learning_rate": 4.27500350499552e-05, + "loss": 0.8737, + "step": 8513 + }, + { + "epoch": 1.36, + "learning_rate": 4.2748218191024396e-05, + "loss": 0.9036, + "step": 8514 + }, + { + "epoch": 1.36, + "learning_rate": 4.274640114308584e-05, + "loss": 0.8789, + "step": 8515 + }, + { + "epoch": 1.36, + "learning_rate": 4.274458390615889e-05, + "loss": 0.8807, + "step": 8516 + }, + { + "epoch": 1.36, + "learning_rate": 4.2742766480262884e-05, + "loss": 0.9357, + "step": 8517 + }, + { + "epoch": 1.36, + "learning_rate": 4.2740948865417195e-05, + "loss": 0.8659, + "step": 8518 + }, + { + "epoch": 1.36, + "learning_rate": 4.2739131061641166e-05, + "loss": 0.9471, + "step": 8519 + }, + { + "epoch": 1.36, + "learning_rate": 4.273731306895417e-05, + "loss": 0.8972, + "step": 8520 + }, + { + "epoch": 1.36, + "learning_rate": 4.273549488737555e-05, + "loss": 0.9568, + "step": 8521 + }, + { + "epoch": 1.36, + "learning_rate": 4.273367651692468e-05, + "loss": 0.9359, + "step": 8522 + }, + { + "epoch": 1.36, + "learning_rate": 4.2731857957620924e-05, + "loss": 0.8931, + "step": 8523 + }, + { + "epoch": 1.36, + "learning_rate": 4.273003920948364e-05, + "loss": 0.9535, + "step": 8524 + }, + { + "epoch": 1.36, + "learning_rate": 4.2728220272532204e-05, + "loss": 0.9378, + "step": 8525 + }, + { + "epoch": 1.36, + "learning_rate": 4.2726401146785986e-05, + "loss": 0.9031, + "step": 8526 + }, + { + "epoch": 1.36, + "learning_rate": 4.272458183226437e-05, + "loss": 0.8882, + "step": 8527 + }, + { + "epoch": 1.36, + "learning_rate": 4.2722762328986695e-05, + "loss": 0.9475, + "step": 8528 + }, + { + "epoch": 1.36, + "learning_rate": 4.2720942636972376e-05, + "loss": 0.9124, + "step": 8529 + }, + { + "epoch": 1.36, + "learning_rate": 4.271912275624077e-05, + "loss": 0.914, + "step": 8530 + }, + { + "epoch": 1.36, + "learning_rate": 4.2717302686811275e-05, + "loss": 0.9354, + "step": 8531 + }, + { + "epoch": 1.36, + "learning_rate": 4.2715482428703255e-05, + "loss": 1.022, + "step": 8532 + }, + { + "epoch": 1.36, + "learning_rate": 4.27136619819361e-05, + "loss": 0.9362, + "step": 8533 + }, + { + "epoch": 1.36, + "learning_rate": 4.27118413465292e-05, + "loss": 0.922, + "step": 8534 + }, + { + "epoch": 1.36, + "learning_rate": 4.271002052250195e-05, + "loss": 0.8943, + "step": 8535 + }, + { + "epoch": 1.36, + "learning_rate": 4.270819950987373e-05, + "loss": 0.8881, + "step": 8536 + }, + { + "epoch": 1.36, + "learning_rate": 4.270637830866393e-05, + "loss": 0.9101, + "step": 8537 + }, + { + "epoch": 1.36, + "learning_rate": 4.270455691889196e-05, + "loss": 0.9063, + "step": 8538 + }, + { + "epoch": 1.36, + "learning_rate": 4.27027353405772e-05, + "loss": 0.9691, + "step": 8539 + }, + { + "epoch": 1.36, + "learning_rate": 4.270091357373905e-05, + "loss": 0.9277, + "step": 8540 + }, + { + "epoch": 1.36, + "learning_rate": 4.2699091618396936e-05, + "loss": 0.9112, + "step": 8541 + }, + { + "epoch": 1.36, + "learning_rate": 4.269726947457023e-05, + "loss": 0.9078, + "step": 8542 + }, + { + "epoch": 1.36, + "learning_rate": 4.2695447142278347e-05, + "loss": 0.9044, + "step": 8543 + }, + { + "epoch": 1.36, + "learning_rate": 4.2693624621540694e-05, + "loss": 0.8783, + "step": 8544 + }, + { + "epoch": 1.36, + "learning_rate": 4.269180191237669e-05, + "loss": 0.8923, + "step": 8545 + }, + { + "epoch": 1.36, + "learning_rate": 4.268997901480573e-05, + "loss": 1.0837, + "step": 8546 + }, + { + "epoch": 1.36, + "learning_rate": 4.268815592884724e-05, + "loss": 0.9731, + "step": 8547 + }, + { + "epoch": 1.36, + "learning_rate": 4.268633265452062e-05, + "loss": 0.9709, + "step": 8548 + }, + { + "epoch": 1.36, + "learning_rate": 4.26845091918453e-05, + "loss": 0.9257, + "step": 8549 + }, + { + "epoch": 1.36, + "learning_rate": 4.2682685540840684e-05, + "loss": 0.9142, + "step": 8550 + }, + { + "epoch": 1.36, + "learning_rate": 4.268086170152621e-05, + "loss": 0.9053, + "step": 8551 + }, + { + "epoch": 1.36, + "learning_rate": 4.26790376739213e-05, + "loss": 0.8635, + "step": 8552 + }, + { + "epoch": 1.36, + "learning_rate": 4.267721345804536e-05, + "loss": 0.9886, + "step": 8553 + }, + { + "epoch": 1.36, + "learning_rate": 4.267538905391783e-05, + "loss": 0.9793, + "step": 8554 + }, + { + "epoch": 1.36, + "learning_rate": 4.2673564461558145e-05, + "loss": 0.9491, + "step": 8555 + }, + { + "epoch": 1.36, + "learning_rate": 4.2671739680985724e-05, + "loss": 0.9153, + "step": 8556 + }, + { + "epoch": 1.36, + "learning_rate": 4.2669914712220004e-05, + "loss": 0.8885, + "step": 8557 + }, + { + "epoch": 1.36, + "learning_rate": 4.266808955528042e-05, + "loss": 0.9611, + "step": 8558 + }, + { + "epoch": 1.36, + "learning_rate": 4.266626421018641e-05, + "loss": 0.9005, + "step": 8559 + }, + { + "epoch": 1.36, + "learning_rate": 4.266443867695742e-05, + "loss": 0.9635, + "step": 8560 + }, + { + "epoch": 1.36, + "learning_rate": 4.266261295561287e-05, + "loss": 0.9039, + "step": 8561 + }, + { + "epoch": 1.36, + "learning_rate": 4.266078704617222e-05, + "loss": 0.8128, + "step": 8562 + }, + { + "epoch": 1.36, + "learning_rate": 4.26589609486549e-05, + "loss": 0.913, + "step": 8563 + }, + { + "epoch": 1.36, + "learning_rate": 4.265713466308038e-05, + "loss": 0.9417, + "step": 8564 + }, + { + "epoch": 1.36, + "learning_rate": 4.265530818946809e-05, + "loss": 0.8804, + "step": 8565 + }, + { + "epoch": 1.36, + "learning_rate": 4.2653481527837494e-05, + "loss": 0.9513, + "step": 8566 + }, + { + "epoch": 1.36, + "learning_rate": 4.265165467820803e-05, + "loss": 0.9342, + "step": 8567 + }, + { + "epoch": 1.36, + "learning_rate": 4.264982764059916e-05, + "loss": 0.8511, + "step": 8568 + }, + { + "epoch": 1.37, + "learning_rate": 4.2648000415030344e-05, + "loss": 1.0083, + "step": 8569 + }, + { + "epoch": 1.37, + "learning_rate": 4.264617300152104e-05, + "loss": 0.9383, + "step": 8570 + }, + { + "epoch": 1.37, + "learning_rate": 4.2644345400090704e-05, + "loss": 0.9238, + "step": 8571 + }, + { + "epoch": 1.37, + "learning_rate": 4.2642517610758796e-05, + "loss": 0.8712, + "step": 8572 + }, + { + "epoch": 1.37, + "learning_rate": 4.2640689633544794e-05, + "loss": 0.9693, + "step": 8573 + }, + { + "epoch": 1.37, + "learning_rate": 4.263886146846815e-05, + "loss": 0.9061, + "step": 8574 + }, + { + "epoch": 1.37, + "learning_rate": 4.2637033115548354e-05, + "loss": 0.8529, + "step": 8575 + }, + { + "epoch": 1.37, + "learning_rate": 4.263520457480485e-05, + "loss": 0.9171, + "step": 8576 + }, + { + "epoch": 1.37, + "learning_rate": 4.263337584625713e-05, + "loss": 0.9235, + "step": 8577 + }, + { + "epoch": 1.37, + "learning_rate": 4.263154692992466e-05, + "loss": 0.8544, + "step": 8578 + }, + { + "epoch": 1.37, + "learning_rate": 4.262971782582692e-05, + "loss": 0.8286, + "step": 8579 + }, + { + "epoch": 1.37, + "learning_rate": 4.262788853398339e-05, + "loss": 0.949, + "step": 8580 + }, + { + "epoch": 1.37, + "learning_rate": 4.262605905441355e-05, + "loss": 0.9231, + "step": 8581 + }, + { + "epoch": 1.37, + "learning_rate": 4.262422938713688e-05, + "loss": 0.8741, + "step": 8582 + }, + { + "epoch": 1.37, + "learning_rate": 4.262239953217286e-05, + "loss": 0.8853, + "step": 8583 + }, + { + "epoch": 1.37, + "learning_rate": 4.2620569489540996e-05, + "loss": 0.9869, + "step": 8584 + }, + { + "epoch": 1.37, + "learning_rate": 4.261873925926076e-05, + "loss": 0.9745, + "step": 8585 + }, + { + "epoch": 1.37, + "learning_rate": 4.2616908841351644e-05, + "loss": 0.9443, + "step": 8586 + }, + { + "epoch": 1.37, + "learning_rate": 4.261507823583315e-05, + "loss": 0.8829, + "step": 8587 + }, + { + "epoch": 1.37, + "learning_rate": 4.2613247442724764e-05, + "loss": 0.9023, + "step": 8588 + }, + { + "epoch": 1.37, + "learning_rate": 4.261141646204599e-05, + "loss": 0.9841, + "step": 8589 + }, + { + "epoch": 1.37, + "learning_rate": 4.260958529381632e-05, + "loss": 0.942, + "step": 8590 + }, + { + "epoch": 1.37, + "learning_rate": 4.260775393805525e-05, + "loss": 0.9705, + "step": 8591 + }, + { + "epoch": 1.37, + "learning_rate": 4.26059223947823e-05, + "loss": 0.8822, + "step": 8592 + }, + { + "epoch": 1.37, + "learning_rate": 4.260409066401696e-05, + "loss": 0.9714, + "step": 8593 + }, + { + "epoch": 1.37, + "learning_rate": 4.2602258745778746e-05, + "loss": 0.9192, + "step": 8594 + }, + { + "epoch": 1.37, + "learning_rate": 4.260042664008716e-05, + "loss": 0.84, + "step": 8595 + }, + { + "epoch": 1.37, + "learning_rate": 4.2598594346961716e-05, + "loss": 0.8997, + "step": 8596 + }, + { + "epoch": 1.37, + "learning_rate": 4.259676186642193e-05, + "loss": 0.9679, + "step": 8597 + }, + { + "epoch": 1.37, + "learning_rate": 4.259492919848732e-05, + "loss": 0.9677, + "step": 8598 + }, + { + "epoch": 1.37, + "learning_rate": 4.2593096343177384e-05, + "loss": 0.8332, + "step": 8599 + }, + { + "epoch": 1.37, + "learning_rate": 4.2591263300511656e-05, + "loss": 0.9146, + "step": 8600 + }, + { + "epoch": 1.37, + "learning_rate": 4.2589430070509656e-05, + "loss": 0.8276, + "step": 8601 + }, + { + "epoch": 1.37, + "learning_rate": 4.25875966531909e-05, + "loss": 0.8503, + "step": 8602 + }, + { + "epoch": 1.37, + "learning_rate": 4.258576304857492e-05, + "loss": 0.8695, + "step": 8603 + }, + { + "epoch": 1.37, + "learning_rate": 4.258392925668124e-05, + "loss": 0.9093, + "step": 8604 + }, + { + "epoch": 1.37, + "learning_rate": 4.2582095277529396e-05, + "loss": 0.9351, + "step": 8605 + }, + { + "epoch": 1.37, + "learning_rate": 4.2580261111138905e-05, + "loss": 0.9041, + "step": 8606 + }, + { + "epoch": 1.37, + "learning_rate": 4.25784267575293e-05, + "loss": 0.9188, + "step": 8607 + }, + { + "epoch": 1.37, + "learning_rate": 4.2576592216720135e-05, + "loss": 0.9501, + "step": 8608 + }, + { + "epoch": 1.37, + "learning_rate": 4.257475748873093e-05, + "loss": 0.8954, + "step": 8609 + }, + { + "epoch": 1.37, + "learning_rate": 4.257292257358122e-05, + "loss": 1.0803, + "step": 8610 + }, + { + "epoch": 1.37, + "learning_rate": 4.257108747129056e-05, + "loss": 0.9226, + "step": 8611 + }, + { + "epoch": 1.37, + "learning_rate": 4.256925218187848e-05, + "loss": 0.8498, + "step": 8612 + }, + { + "epoch": 1.37, + "learning_rate": 4.256741670536454e-05, + "loss": 0.9412, + "step": 8613 + }, + { + "epoch": 1.37, + "learning_rate": 4.256558104176828e-05, + "loss": 0.8853, + "step": 8614 + }, + { + "epoch": 1.37, + "learning_rate": 4.256374519110923e-05, + "loss": 0.856, + "step": 8615 + }, + { + "epoch": 1.37, + "learning_rate": 4.2561909153406965e-05, + "loss": 0.8514, + "step": 8616 + }, + { + "epoch": 1.37, + "learning_rate": 4.256007292868103e-05, + "loss": 0.8757, + "step": 8617 + }, + { + "epoch": 1.37, + "learning_rate": 4.2558236516950984e-05, + "loss": 0.961, + "step": 8618 + }, + { + "epoch": 1.37, + "learning_rate": 4.2556399918236375e-05, + "loss": 0.8439, + "step": 8619 + }, + { + "epoch": 1.37, + "learning_rate": 4.2554563132556766e-05, + "loss": 0.8667, + "step": 8620 + }, + { + "epoch": 1.37, + "learning_rate": 4.255272615993172e-05, + "loss": 0.8762, + "step": 8621 + }, + { + "epoch": 1.37, + "learning_rate": 4.255088900038079e-05, + "loss": 0.9113, + "step": 8622 + }, + { + "epoch": 1.37, + "learning_rate": 4.254905165392355e-05, + "loss": 0.95, + "step": 8623 + }, + { + "epoch": 1.37, + "learning_rate": 4.254721412057957e-05, + "loss": 0.904, + "step": 8624 + }, + { + "epoch": 1.37, + "learning_rate": 4.254537640036841e-05, + "loss": 0.8871, + "step": 8625 + }, + { + "epoch": 1.37, + "learning_rate": 4.254353849330964e-05, + "loss": 0.9165, + "step": 8626 + }, + { + "epoch": 1.37, + "learning_rate": 4.254170039942284e-05, + "loss": 0.9382, + "step": 8627 + }, + { + "epoch": 1.37, + "learning_rate": 4.253986211872757e-05, + "loss": 0.9548, + "step": 8628 + }, + { + "epoch": 1.37, + "learning_rate": 4.253802365124343e-05, + "loss": 0.9058, + "step": 8629 + }, + { + "epoch": 1.37, + "learning_rate": 4.2536184996989984e-05, + "loss": 0.8914, + "step": 8630 + }, + { + "epoch": 1.38, + "learning_rate": 4.253434615598681e-05, + "loss": 0.8895, + "step": 8631 + }, + { + "epoch": 1.38, + "learning_rate": 4.253250712825349e-05, + "loss": 0.9214, + "step": 8632 + }, + { + "epoch": 1.38, + "learning_rate": 4.2530667913809615e-05, + "loss": 0.931, + "step": 8633 + }, + { + "epoch": 1.38, + "learning_rate": 4.2528828512674784e-05, + "loss": 0.9084, + "step": 8634 + }, + { + "epoch": 1.38, + "learning_rate": 4.252698892486856e-05, + "loss": 0.9117, + "step": 8635 + }, + { + "epoch": 1.38, + "learning_rate": 4.2525149150410545e-05, + "loss": 0.9745, + "step": 8636 + }, + { + "epoch": 1.38, + "learning_rate": 4.252330918932034e-05, + "loss": 1.0186, + "step": 8637 + }, + { + "epoch": 1.38, + "learning_rate": 4.252146904161752e-05, + "loss": 0.9129, + "step": 8638 + }, + { + "epoch": 1.38, + "learning_rate": 4.2519628707321695e-05, + "loss": 1.0288, + "step": 8639 + }, + { + "epoch": 1.38, + "learning_rate": 4.251778818645246e-05, + "loss": 0.8617, + "step": 8640 + }, + { + "epoch": 1.38, + "learning_rate": 4.251594747902942e-05, + "loss": 0.9037, + "step": 8641 + }, + { + "epoch": 1.38, + "learning_rate": 4.2514106585072175e-05, + "loss": 0.9164, + "step": 8642 + }, + { + "epoch": 1.38, + "learning_rate": 4.2512265504600316e-05, + "loss": 0.8881, + "step": 8643 + }, + { + "epoch": 1.38, + "learning_rate": 4.251042423763347e-05, + "loss": 0.8813, + "step": 8644 + }, + { + "epoch": 1.38, + "learning_rate": 4.250858278419124e-05, + "loss": 0.9469, + "step": 8645 + }, + { + "epoch": 1.38, + "learning_rate": 4.2506741144293224e-05, + "loss": 0.8839, + "step": 8646 + }, + { + "epoch": 1.38, + "learning_rate": 4.250489931795905e-05, + "loss": 0.9442, + "step": 8647 + }, + { + "epoch": 1.38, + "learning_rate": 4.2503057305208325e-05, + "loss": 0.9738, + "step": 8648 + }, + { + "epoch": 1.38, + "learning_rate": 4.250121510606067e-05, + "loss": 0.9545, + "step": 8649 + }, + { + "epoch": 1.38, + "learning_rate": 4.249937272053569e-05, + "loss": 0.9248, + "step": 8650 + }, + { + "epoch": 1.38, + "learning_rate": 4.249753014865303e-05, + "loss": 0.9764, + "step": 8651 + }, + { + "epoch": 1.38, + "learning_rate": 4.249568739043228e-05, + "loss": 1.0229, + "step": 8652 + }, + { + "epoch": 1.38, + "learning_rate": 4.249384444589308e-05, + "loss": 0.994, + "step": 8653 + }, + { + "epoch": 1.38, + "learning_rate": 4.249200131505507e-05, + "loss": 0.8526, + "step": 8654 + }, + { + "epoch": 1.38, + "learning_rate": 4.249015799793786e-05, + "loss": 0.8888, + "step": 8655 + }, + { + "epoch": 1.38, + "learning_rate": 4.248831449456109e-05, + "loss": 0.8824, + "step": 8656 + }, + { + "epoch": 1.38, + "learning_rate": 4.248647080494439e-05, + "loss": 0.9893, + "step": 8657 + }, + { + "epoch": 1.38, + "learning_rate": 4.248462692910738e-05, + "loss": 0.8828, + "step": 8658 + }, + { + "epoch": 1.38, + "learning_rate": 4.248278286706971e-05, + "loss": 0.9256, + "step": 8659 + }, + { + "epoch": 1.38, + "learning_rate": 4.248093861885103e-05, + "loss": 0.9341, + "step": 8660 + }, + { + "epoch": 1.38, + "learning_rate": 4.247909418447096e-05, + "loss": 0.9381, + "step": 8661 + }, + { + "epoch": 1.38, + "learning_rate": 4.2477249563949144e-05, + "loss": 0.9252, + "step": 8662 + }, + { + "epoch": 1.38, + "learning_rate": 4.247540475730524e-05, + "loss": 0.8631, + "step": 8663 + }, + { + "epoch": 1.38, + "learning_rate": 4.2473559764558874e-05, + "loss": 0.9376, + "step": 8664 + }, + { + "epoch": 1.38, + "learning_rate": 4.247171458572971e-05, + "loss": 0.8547, + "step": 8665 + }, + { + "epoch": 1.38, + "learning_rate": 4.2469869220837386e-05, + "loss": 0.9416, + "step": 8666 + }, + { + "epoch": 1.38, + "learning_rate": 4.246802366990157e-05, + "loss": 0.9561, + "step": 8667 + }, + { + "epoch": 1.38, + "learning_rate": 4.246617793294191e-05, + "loss": 0.904, + "step": 8668 + }, + { + "epoch": 1.38, + "learning_rate": 4.2464332009978055e-05, + "loss": 0.8866, + "step": 8669 + }, + { + "epoch": 1.38, + "learning_rate": 4.2462485901029666e-05, + "loss": 0.8548, + "step": 8670 + }, + { + "epoch": 1.38, + "learning_rate": 4.2460639606116406e-05, + "loss": 0.9316, + "step": 8671 + }, + { + "epoch": 1.38, + "learning_rate": 4.2458793125257926e-05, + "loss": 0.9401, + "step": 8672 + }, + { + "epoch": 1.38, + "learning_rate": 4.245694645847391e-05, + "loss": 0.8266, + "step": 8673 + }, + { + "epoch": 1.38, + "learning_rate": 4.2455099605784e-05, + "loss": 0.8812, + "step": 8674 + }, + { + "epoch": 1.38, + "learning_rate": 4.245325256720789e-05, + "loss": 0.9502, + "step": 8675 + }, + { + "epoch": 1.38, + "learning_rate": 4.245140534276523e-05, + "loss": 0.957, + "step": 8676 + }, + { + "epoch": 1.38, + "learning_rate": 4.24495579324757e-05, + "loss": 0.9054, + "step": 8677 + }, + { + "epoch": 1.38, + "learning_rate": 4.244771033635897e-05, + "loss": 0.8973, + "step": 8678 + }, + { + "epoch": 1.38, + "learning_rate": 4.2445862554434715e-05, + "loss": 0.8624, + "step": 8679 + }, + { + "epoch": 1.38, + "learning_rate": 4.2444014586722627e-05, + "loss": 0.8248, + "step": 8680 + }, + { + "epoch": 1.38, + "learning_rate": 4.2442166433242355e-05, + "loss": 0.8634, + "step": 8681 + }, + { + "epoch": 1.38, + "learning_rate": 4.244031809401361e-05, + "loss": 0.836, + "step": 8682 + }, + { + "epoch": 1.38, + "learning_rate": 4.243846956905607e-05, + "loss": 0.9115, + "step": 8683 + }, + { + "epoch": 1.38, + "learning_rate": 4.243662085838942e-05, + "loss": 0.8486, + "step": 8684 + }, + { + "epoch": 1.38, + "learning_rate": 4.2434771962033326e-05, + "loss": 0.876, + "step": 8685 + }, + { + "epoch": 1.38, + "learning_rate": 4.243292288000751e-05, + "loss": 0.9099, + "step": 8686 + }, + { + "epoch": 1.38, + "learning_rate": 4.2431073612331645e-05, + "loss": 0.9015, + "step": 8687 + }, + { + "epoch": 1.38, + "learning_rate": 4.2429224159025424e-05, + "loss": 0.9177, + "step": 8688 + }, + { + "epoch": 1.38, + "learning_rate": 4.2427374520108555e-05, + "loss": 0.9915, + "step": 8689 + }, + { + "epoch": 1.38, + "learning_rate": 4.2425524695600723e-05, + "loss": 0.8927, + "step": 8690 + }, + { + "epoch": 1.38, + "learning_rate": 4.242367468552164e-05, + "loss": 0.945, + "step": 8691 + }, + { + "epoch": 1.38, + "learning_rate": 4.242182448989099e-05, + "loss": 0.8711, + "step": 8692 + }, + { + "epoch": 1.38, + "learning_rate": 4.2419974108728496e-05, + "loss": 0.8962, + "step": 8693 + }, + { + "epoch": 1.39, + "learning_rate": 4.2418123542053845e-05, + "loss": 0.9365, + "step": 8694 + }, + { + "epoch": 1.39, + "learning_rate": 4.241627278988676e-05, + "loss": 0.9565, + "step": 8695 + }, + { + "epoch": 1.39, + "learning_rate": 4.241442185224693e-05, + "loss": 0.9534, + "step": 8696 + }, + { + "epoch": 1.39, + "learning_rate": 4.241257072915409e-05, + "loss": 0.9425, + "step": 8697 + }, + { + "epoch": 1.39, + "learning_rate": 4.2410719420627945e-05, + "loss": 0.9975, + "step": 8698 + }, + { + "epoch": 1.39, + "learning_rate": 4.2408867926688206e-05, + "loss": 1.0067, + "step": 8699 + }, + { + "epoch": 1.39, + "learning_rate": 4.240701624735459e-05, + "loss": 0.8828, + "step": 8700 + }, + { + "epoch": 1.39, + "learning_rate": 4.240516438264682e-05, + "loss": 0.9296, + "step": 8701 + }, + { + "epoch": 1.39, + "learning_rate": 4.240331233258462e-05, + "loss": 0.8766, + "step": 8702 + }, + { + "epoch": 1.39, + "learning_rate": 4.240146009718771e-05, + "loss": 0.9132, + "step": 8703 + }, + { + "epoch": 1.39, + "learning_rate": 4.239960767647581e-05, + "loss": 0.9746, + "step": 8704 + }, + { + "epoch": 1.39, + "learning_rate": 4.2397755070468655e-05, + "loss": 0.9472, + "step": 8705 + }, + { + "epoch": 1.39, + "learning_rate": 4.239590227918597e-05, + "loss": 0.9972, + "step": 8706 + }, + { + "epoch": 1.39, + "learning_rate": 4.239404930264749e-05, + "loss": 0.9203, + "step": 8707 + }, + { + "epoch": 1.39, + "learning_rate": 4.2392196140872936e-05, + "loss": 0.8962, + "step": 8708 + }, + { + "epoch": 1.39, + "learning_rate": 4.239034279388206e-05, + "loss": 0.8337, + "step": 8709 + }, + { + "epoch": 1.39, + "learning_rate": 4.238848926169459e-05, + "loss": 0.9342, + "step": 8710 + }, + { + "epoch": 1.39, + "learning_rate": 4.238663554433027e-05, + "loss": 0.9551, + "step": 8711 + }, + { + "epoch": 1.39, + "learning_rate": 4.238478164180883e-05, + "loss": 0.8676, + "step": 8712 + }, + { + "epoch": 1.39, + "learning_rate": 4.238292755415002e-05, + "loss": 0.9425, + "step": 8713 + }, + { + "epoch": 1.39, + "learning_rate": 4.2381073281373584e-05, + "loss": 0.9995, + "step": 8714 + }, + { + "epoch": 1.39, + "learning_rate": 4.237921882349928e-05, + "loss": 0.9615, + "step": 8715 + }, + { + "epoch": 1.39, + "learning_rate": 4.2377364180546835e-05, + "loss": 0.8605, + "step": 8716 + }, + { + "epoch": 1.39, + "learning_rate": 4.237550935253601e-05, + "loss": 0.8258, + "step": 8717 + }, + { + "epoch": 1.39, + "learning_rate": 4.237365433948657e-05, + "loss": 0.9043, + "step": 8718 + }, + { + "epoch": 1.39, + "learning_rate": 4.237179914141825e-05, + "loss": 0.8679, + "step": 8719 + }, + { + "epoch": 1.39, + "learning_rate": 4.236994375835083e-05, + "loss": 1.0011, + "step": 8720 + }, + { + "epoch": 1.39, + "learning_rate": 4.236808819030404e-05, + "loss": 0.8879, + "step": 8721 + }, + { + "epoch": 1.39, + "learning_rate": 4.236623243729767e-05, + "loss": 0.9046, + "step": 8722 + }, + { + "epoch": 1.39, + "learning_rate": 4.2364376499351453e-05, + "loss": 0.8936, + "step": 8723 + }, + { + "epoch": 1.39, + "learning_rate": 4.2362520376485174e-05, + "loss": 0.8736, + "step": 8724 + }, + { + "epoch": 1.39, + "learning_rate": 4.2360664068718594e-05, + "loss": 0.9492, + "step": 8725 + }, + { + "epoch": 1.39, + "learning_rate": 4.235880757607148e-05, + "loss": 0.8632, + "step": 8726 + }, + { + "epoch": 1.39, + "learning_rate": 4.235695089856361e-05, + "loss": 0.9776, + "step": 8727 + }, + { + "epoch": 1.39, + "learning_rate": 4.2355094036214745e-05, + "loss": 0.9108, + "step": 8728 + }, + { + "epoch": 1.39, + "learning_rate": 4.2353236989044666e-05, + "loss": 0.9054, + "step": 8729 + }, + { + "epoch": 1.39, + "learning_rate": 4.2351379757073154e-05, + "loss": 0.8704, + "step": 8730 + }, + { + "epoch": 1.39, + "learning_rate": 4.234952234031997e-05, + "loss": 0.9299, + "step": 8731 + }, + { + "epoch": 1.39, + "learning_rate": 4.2347664738804914e-05, + "loss": 0.9453, + "step": 8732 + }, + { + "epoch": 1.39, + "learning_rate": 4.234580695254776e-05, + "loss": 0.8943, + "step": 8733 + }, + { + "epoch": 1.39, + "learning_rate": 4.234394898156829e-05, + "loss": 0.9053, + "step": 8734 + }, + { + "epoch": 1.39, + "learning_rate": 4.234209082588629e-05, + "loss": 0.9408, + "step": 8735 + }, + { + "epoch": 1.39, + "learning_rate": 4.234023248552156e-05, + "loss": 0.8522, + "step": 8736 + }, + { + "epoch": 1.39, + "learning_rate": 4.2338373960493885e-05, + "loss": 0.9409, + "step": 8737 + }, + { + "epoch": 1.39, + "learning_rate": 4.233651525082304e-05, + "loss": 0.8566, + "step": 8738 + }, + { + "epoch": 1.39, + "learning_rate": 4.233465635652884e-05, + "loss": 0.9437, + "step": 8739 + }, + { + "epoch": 1.39, + "learning_rate": 4.2332797277631075e-05, + "loss": 0.9779, + "step": 8740 + }, + { + "epoch": 1.39, + "learning_rate": 4.233093801414954e-05, + "loss": 0.9136, + "step": 8741 + }, + { + "epoch": 1.39, + "learning_rate": 4.232907856610404e-05, + "loss": 0.8469, + "step": 8742 + }, + { + "epoch": 1.39, + "learning_rate": 4.232721893351437e-05, + "loss": 0.9094, + "step": 8743 + }, + { + "epoch": 1.39, + "learning_rate": 4.232535911640034e-05, + "loss": 0.9571, + "step": 8744 + }, + { + "epoch": 1.39, + "learning_rate": 4.232349911478175e-05, + "loss": 0.843, + "step": 8745 + }, + { + "epoch": 1.39, + "learning_rate": 4.232163892867842e-05, + "loss": 0.8815, + "step": 8746 + }, + { + "epoch": 1.39, + "learning_rate": 4.2319778558110145e-05, + "loss": 0.8577, + "step": 8747 + }, + { + "epoch": 1.39, + "learning_rate": 4.2317918003096745e-05, + "loss": 0.8973, + "step": 8748 + }, + { + "epoch": 1.39, + "learning_rate": 4.2316057263658035e-05, + "loss": 0.9034, + "step": 8749 + }, + { + "epoch": 1.39, + "learning_rate": 4.231419633981383e-05, + "loss": 0.9159, + "step": 8750 + }, + { + "epoch": 1.39, + "learning_rate": 4.231233523158393e-05, + "loss": 0.9237, + "step": 8751 + }, + { + "epoch": 1.39, + "learning_rate": 4.2310473938988186e-05, + "loss": 0.8683, + "step": 8752 + }, + { + "epoch": 1.39, + "learning_rate": 4.23086124620464e-05, + "loss": 0.9199, + "step": 8753 + }, + { + "epoch": 1.39, + "learning_rate": 4.230675080077839e-05, + "loss": 0.8848, + "step": 8754 + }, + { + "epoch": 1.39, + "learning_rate": 4.230488895520401e-05, + "loss": 0.8711, + "step": 8755 + }, + { + "epoch": 1.39, + "learning_rate": 4.230302692534306e-05, + "loss": 0.847, + "step": 8756 + }, + { + "epoch": 1.4, + "learning_rate": 4.230116471121538e-05, + "loss": 0.8698, + "step": 8757 + }, + { + "epoch": 1.4, + "learning_rate": 4.229930231284079e-05, + "loss": 0.9914, + "step": 8758 + }, + { + "epoch": 1.4, + "learning_rate": 4.2297439730239145e-05, + "loss": 0.9309, + "step": 8759 + }, + { + "epoch": 1.4, + "learning_rate": 4.229557696343026e-05, + "loss": 0.9379, + "step": 8760 + }, + { + "epoch": 1.4, + "learning_rate": 4.2293714012433986e-05, + "loss": 0.8983, + "step": 8761 + }, + { + "epoch": 1.4, + "learning_rate": 4.229185087727016e-05, + "loss": 0.9728, + "step": 8762 + }, + { + "epoch": 1.4, + "learning_rate": 4.228998755795861e-05, + "loss": 0.8288, + "step": 8763 + }, + { + "epoch": 1.4, + "learning_rate": 4.22881240545192e-05, + "loss": 0.9579, + "step": 8764 + }, + { + "epoch": 1.4, + "learning_rate": 4.228626036697175e-05, + "loss": 0.8832, + "step": 8765 + }, + { + "epoch": 1.4, + "learning_rate": 4.228439649533613e-05, + "loss": 0.9345, + "step": 8766 + }, + { + "epoch": 1.4, + "learning_rate": 4.2282532439632185e-05, + "loss": 0.9957, + "step": 8767 + }, + { + "epoch": 1.4, + "learning_rate": 4.228066819987976e-05, + "loss": 1.0225, + "step": 8768 + }, + { + "epoch": 1.4, + "learning_rate": 4.227880377609871e-05, + "loss": 0.9422, + "step": 8769 + }, + { + "epoch": 1.4, + "learning_rate": 4.227693916830889e-05, + "loss": 0.8459, + "step": 8770 + }, + { + "epoch": 1.4, + "learning_rate": 4.2275074376530153e-05, + "loss": 0.8617, + "step": 8771 + }, + { + "epoch": 1.4, + "learning_rate": 4.2273209400782365e-05, + "loss": 0.88, + "step": 8772 + }, + { + "epoch": 1.4, + "learning_rate": 4.227134424108539e-05, + "loss": 0.9011, + "step": 8773 + }, + { + "epoch": 1.4, + "learning_rate": 4.226947889745907e-05, + "loss": 0.938, + "step": 8774 + }, + { + "epoch": 1.4, + "learning_rate": 4.22676133699233e-05, + "loss": 0.9407, + "step": 8775 + }, + { + "epoch": 1.4, + "learning_rate": 4.226574765849792e-05, + "loss": 0.8981, + "step": 8776 + }, + { + "epoch": 1.4, + "learning_rate": 4.226388176320281e-05, + "loss": 0.9476, + "step": 8777 + }, + { + "epoch": 1.4, + "learning_rate": 4.226201568405784e-05, + "loss": 0.9753, + "step": 8778 + }, + { + "epoch": 1.4, + "learning_rate": 4.226014942108289e-05, + "loss": 0.8728, + "step": 8779 + }, + { + "epoch": 1.4, + "learning_rate": 4.225828297429783e-05, + "loss": 0.9535, + "step": 8780 + }, + { + "epoch": 1.4, + "learning_rate": 4.225641634372253e-05, + "loss": 0.9105, + "step": 8781 + }, + { + "epoch": 1.4, + "learning_rate": 4.225454952937687e-05, + "loss": 0.8627, + "step": 8782 + }, + { + "epoch": 1.4, + "learning_rate": 4.225268253128073e-05, + "loss": 0.9507, + "step": 8783 + }, + { + "epoch": 1.4, + "learning_rate": 4.2250815349454e-05, + "loss": 0.905, + "step": 8784 + }, + { + "epoch": 1.4, + "learning_rate": 4.224894798391656e-05, + "loss": 0.9104, + "step": 8785 + }, + { + "epoch": 1.4, + "learning_rate": 4.224708043468829e-05, + "loss": 0.914, + "step": 8786 + }, + { + "epoch": 1.4, + "learning_rate": 4.22452127017891e-05, + "loss": 0.8931, + "step": 8787 + }, + { + "epoch": 1.4, + "learning_rate": 4.224334478523885e-05, + "loss": 1.0121, + "step": 8788 + }, + { + "epoch": 1.4, + "learning_rate": 4.2241476685057445e-05, + "loss": 0.885, + "step": 8789 + }, + { + "epoch": 1.4, + "learning_rate": 4.223960840126479e-05, + "loss": 0.8608, + "step": 8790 + }, + { + "epoch": 1.4, + "learning_rate": 4.223773993388077e-05, + "loss": 0.889, + "step": 8791 + }, + { + "epoch": 1.4, + "learning_rate": 4.223587128292529e-05, + "loss": 0.8694, + "step": 8792 + }, + { + "epoch": 1.4, + "learning_rate": 4.2234002448418234e-05, + "loss": 0.9496, + "step": 8793 + }, + { + "epoch": 1.4, + "learning_rate": 4.2232133430379516e-05, + "loss": 0.9419, + "step": 8794 + }, + { + "epoch": 1.4, + "learning_rate": 4.2230264228829043e-05, + "loss": 0.8612, + "step": 8795 + }, + { + "epoch": 1.4, + "learning_rate": 4.222839484378671e-05, + "loss": 0.8547, + "step": 8796 + }, + { + "epoch": 1.4, + "learning_rate": 4.222652527527244e-05, + "loss": 0.9171, + "step": 8797 + }, + { + "epoch": 1.4, + "learning_rate": 4.2224655523306136e-05, + "loss": 0.9265, + "step": 8798 + }, + { + "epoch": 1.4, + "learning_rate": 4.22227855879077e-05, + "loss": 0.9212, + "step": 8799 + }, + { + "epoch": 1.4, + "learning_rate": 4.222091546909706e-05, + "loss": 0.9775, + "step": 8800 + }, + { + "epoch": 1.4, + "learning_rate": 4.221904516689412e-05, + "loss": 0.894, + "step": 8801 + }, + { + "epoch": 1.4, + "learning_rate": 4.2217174681318813e-05, + "loss": 0.8563, + "step": 8802 + }, + { + "epoch": 1.4, + "learning_rate": 4.221530401239104e-05, + "loss": 0.9249, + "step": 8803 + }, + { + "epoch": 1.4, + "learning_rate": 4.221343316013073e-05, + "loss": 0.8426, + "step": 8804 + }, + { + "epoch": 1.4, + "learning_rate": 4.221156212455781e-05, + "loss": 0.913, + "step": 8805 + }, + { + "epoch": 1.4, + "learning_rate": 4.22096909056922e-05, + "loss": 0.8991, + "step": 8806 + }, + { + "epoch": 1.4, + "learning_rate": 4.2207819503553834e-05, + "loss": 1.0678, + "step": 8807 + }, + { + "epoch": 1.4, + "learning_rate": 4.220594791816264e-05, + "loss": 0.8768, + "step": 8808 + }, + { + "epoch": 1.4, + "learning_rate": 4.220407614953854e-05, + "loss": 0.9429, + "step": 8809 + }, + { + "epoch": 1.4, + "learning_rate": 4.220220419770148e-05, + "loss": 0.8955, + "step": 8810 + }, + { + "epoch": 1.4, + "learning_rate": 4.220033206267138e-05, + "loss": 0.8189, + "step": 8811 + }, + { + "epoch": 1.4, + "learning_rate": 4.2198459744468195e-05, + "loss": 0.8742, + "step": 8812 + }, + { + "epoch": 1.4, + "learning_rate": 4.219658724311185e-05, + "loss": 0.9071, + "step": 8813 + }, + { + "epoch": 1.4, + "learning_rate": 4.21947145586223e-05, + "loss": 0.8897, + "step": 8814 + }, + { + "epoch": 1.4, + "learning_rate": 4.2192841691019466e-05, + "loss": 0.9925, + "step": 8815 + }, + { + "epoch": 1.4, + "learning_rate": 4.219096864032331e-05, + "loss": 0.8442, + "step": 8816 + }, + { + "epoch": 1.4, + "learning_rate": 4.218909540655378e-05, + "loss": 0.9169, + "step": 8817 + }, + { + "epoch": 1.4, + "learning_rate": 4.2187221989730805e-05, + "loss": 0.9333, + "step": 8818 + }, + { + "epoch": 1.4, + "learning_rate": 4.218534838987437e-05, + "loss": 0.7992, + "step": 8819 + }, + { + "epoch": 1.41, + "learning_rate": 4.21834746070044e-05, + "loss": 0.9641, + "step": 8820 + }, + { + "epoch": 1.41, + "learning_rate": 4.2181600641140864e-05, + "loss": 0.8903, + "step": 8821 + }, + { + "epoch": 1.41, + "learning_rate": 4.21797264923037e-05, + "loss": 0.8355, + "step": 8822 + }, + { + "epoch": 1.41, + "learning_rate": 4.217785216051289e-05, + "loss": 0.8839, + "step": 8823 + }, + { + "epoch": 1.41, + "learning_rate": 4.2175977645788376e-05, + "loss": 0.9216, + "step": 8824 + }, + { + "epoch": 1.41, + "learning_rate": 4.2174102948150126e-05, + "loss": 0.8842, + "step": 8825 + }, + { + "epoch": 1.41, + "learning_rate": 4.217222806761811e-05, + "loss": 0.8711, + "step": 8826 + }, + { + "epoch": 1.41, + "learning_rate": 4.217035300421229e-05, + "loss": 0.9662, + "step": 8827 + }, + { + "epoch": 1.41, + "learning_rate": 4.216847775795264e-05, + "loss": 0.8535, + "step": 8828 + }, + { + "epoch": 1.41, + "learning_rate": 4.2166602328859115e-05, + "loss": 0.8494, + "step": 8829 + }, + { + "epoch": 1.41, + "learning_rate": 4.216472671695171e-05, + "loss": 0.9461, + "step": 8830 + }, + { + "epoch": 1.41, + "learning_rate": 4.216285092225038e-05, + "loss": 0.942, + "step": 8831 + }, + { + "epoch": 1.41, + "learning_rate": 4.216097494477511e-05, + "loss": 0.865, + "step": 8832 + }, + { + "epoch": 1.41, + "learning_rate": 4.215909878454587e-05, + "loss": 0.8838, + "step": 8833 + }, + { + "epoch": 1.41, + "learning_rate": 4.2157222441582647e-05, + "loss": 0.8989, + "step": 8834 + }, + { + "epoch": 1.41, + "learning_rate": 4.215534591590542e-05, + "loss": 0.8718, + "step": 8835 + }, + { + "epoch": 1.41, + "learning_rate": 4.215346920753418e-05, + "loss": 0.9432, + "step": 8836 + }, + { + "epoch": 1.41, + "learning_rate": 4.215159231648891e-05, + "loss": 0.9408, + "step": 8837 + }, + { + "epoch": 1.41, + "learning_rate": 4.2149715242789575e-05, + "loss": 0.8079, + "step": 8838 + }, + { + "epoch": 1.41, + "learning_rate": 4.214783798645621e-05, + "loss": 0.9264, + "step": 8839 + }, + { + "epoch": 1.41, + "learning_rate": 4.2145960547508765e-05, + "loss": 0.875, + "step": 8840 + }, + { + "epoch": 1.41, + "learning_rate": 4.2144082925967256e-05, + "loss": 0.7987, + "step": 8841 + }, + { + "epoch": 1.41, + "learning_rate": 4.214220512185168e-05, + "loss": 0.897, + "step": 8842 + }, + { + "epoch": 1.41, + "learning_rate": 4.214032713518201e-05, + "loss": 0.8573, + "step": 8843 + }, + { + "epoch": 1.41, + "learning_rate": 4.213844896597827e-05, + "loss": 0.8937, + "step": 8844 + }, + { + "epoch": 1.41, + "learning_rate": 4.213657061426045e-05, + "loss": 0.8926, + "step": 8845 + }, + { + "epoch": 1.41, + "learning_rate": 4.213469208004856e-05, + "loss": 0.8613, + "step": 8846 + }, + { + "epoch": 1.41, + "learning_rate": 4.21328133633626e-05, + "loss": 0.8516, + "step": 8847 + }, + { + "epoch": 1.41, + "learning_rate": 4.213093446422258e-05, + "loss": 0.8266, + "step": 8848 + }, + { + "epoch": 1.41, + "learning_rate": 4.2129055382648516e-05, + "loss": 0.88, + "step": 8849 + }, + { + "epoch": 1.41, + "learning_rate": 4.21271761186604e-05, + "loss": 0.9329, + "step": 8850 + }, + { + "epoch": 1.41, + "learning_rate": 4.212529667227826e-05, + "loss": 0.9556, + "step": 8851 + }, + { + "epoch": 1.41, + "learning_rate": 4.2123417043522115e-05, + "loss": 0.8683, + "step": 8852 + }, + { + "epoch": 1.41, + "learning_rate": 4.2121537232411964e-05, + "loss": 0.8556, + "step": 8853 + }, + { + "epoch": 1.41, + "learning_rate": 4.211965723896785e-05, + "loss": 0.9347, + "step": 8854 + }, + { + "epoch": 1.41, + "learning_rate": 4.211777706320976e-05, + "loss": 0.9162, + "step": 8855 + }, + { + "epoch": 1.41, + "learning_rate": 4.211589670515775e-05, + "loss": 0.9881, + "step": 8856 + }, + { + "epoch": 1.41, + "learning_rate": 4.2114016164831826e-05, + "loss": 0.8488, + "step": 8857 + }, + { + "epoch": 1.41, + "learning_rate": 4.211213544225202e-05, + "loss": 0.9189, + "step": 8858 + }, + { + "epoch": 1.41, + "learning_rate": 4.211025453743836e-05, + "loss": 0.9226, + "step": 8859 + }, + { + "epoch": 1.41, + "learning_rate": 4.2108373450410885e-05, + "loss": 0.9312, + "step": 8860 + }, + { + "epoch": 1.41, + "learning_rate": 4.210649218118962e-05, + "loss": 0.8594, + "step": 8861 + }, + { + "epoch": 1.41, + "learning_rate": 4.210461072979459e-05, + "loss": 0.9015, + "step": 8862 + }, + { + "epoch": 1.41, + "learning_rate": 4.210272909624585e-05, + "loss": 0.8344, + "step": 8863 + }, + { + "epoch": 1.41, + "learning_rate": 4.210084728056342e-05, + "loss": 0.9083, + "step": 8864 + }, + { + "epoch": 1.41, + "learning_rate": 4.209896528276735e-05, + "loss": 0.9038, + "step": 8865 + }, + { + "epoch": 1.41, + "learning_rate": 4.209708310287768e-05, + "loss": 0.8814, + "step": 8866 + }, + { + "epoch": 1.41, + "learning_rate": 4.209520074091446e-05, + "loss": 0.8422, + "step": 8867 + }, + { + "epoch": 1.41, + "learning_rate": 4.209331819689772e-05, + "loss": 0.9358, + "step": 8868 + }, + { + "epoch": 1.41, + "learning_rate": 4.209143547084753e-05, + "loss": 0.9644, + "step": 8869 + }, + { + "epoch": 1.41, + "learning_rate": 4.208955256278393e-05, + "loss": 0.9625, + "step": 8870 + }, + { + "epoch": 1.41, + "learning_rate": 4.208766947272696e-05, + "loss": 0.8197, + "step": 8871 + }, + { + "epoch": 1.41, + "learning_rate": 4.2085786200696696e-05, + "loss": 0.9247, + "step": 8872 + }, + { + "epoch": 1.41, + "learning_rate": 4.2083902746713175e-05, + "loss": 0.8365, + "step": 8873 + }, + { + "epoch": 1.41, + "learning_rate": 4.208201911079646e-05, + "loss": 0.8315, + "step": 8874 + }, + { + "epoch": 1.41, + "learning_rate": 4.208013529296662e-05, + "loss": 0.8293, + "step": 8875 + }, + { + "epoch": 1.41, + "learning_rate": 4.20782512932437e-05, + "loss": 0.9052, + "step": 8876 + }, + { + "epoch": 1.41, + "learning_rate": 4.207636711164778e-05, + "loss": 0.8668, + "step": 8877 + }, + { + "epoch": 1.41, + "learning_rate": 4.207448274819891e-05, + "loss": 1.0445, + "step": 8878 + }, + { + "epoch": 1.41, + "learning_rate": 4.207259820291717e-05, + "loss": 0.8516, + "step": 8879 + }, + { + "epoch": 1.41, + "learning_rate": 4.2070713475822614e-05, + "loss": 0.9559, + "step": 8880 + }, + { + "epoch": 1.41, + "learning_rate": 4.2068828566935336e-05, + "loss": 1.002, + "step": 8881 + }, + { + "epoch": 1.42, + "learning_rate": 4.206694347627539e-05, + "loss": 0.9067, + "step": 8882 + }, + { + "epoch": 1.42, + "learning_rate": 4.206505820386286e-05, + "loss": 0.8514, + "step": 8883 + }, + { + "epoch": 1.42, + "learning_rate": 4.2063172749717824e-05, + "loss": 0.8914, + "step": 8884 + }, + { + "epoch": 1.42, + "learning_rate": 4.2061287113860345e-05, + "loss": 0.9364, + "step": 8885 + }, + { + "epoch": 1.42, + "learning_rate": 4.205940129631053e-05, + "loss": 0.8178, + "step": 8886 + }, + { + "epoch": 1.42, + "learning_rate": 4.205751529708845e-05, + "loss": 0.9026, + "step": 8887 + }, + { + "epoch": 1.42, + "learning_rate": 4.205562911621417e-05, + "loss": 0.8626, + "step": 8888 + }, + { + "epoch": 1.42, + "learning_rate": 4.2053742753707814e-05, + "loss": 0.9019, + "step": 8889 + }, + { + "epoch": 1.42, + "learning_rate": 4.2051856209589435e-05, + "loss": 0.8888, + "step": 8890 + }, + { + "epoch": 1.42, + "learning_rate": 4.204996948387915e-05, + "loss": 0.8966, + "step": 8891 + }, + { + "epoch": 1.42, + "learning_rate": 4.2048082576597044e-05, + "loss": 0.8804, + "step": 8892 + }, + { + "epoch": 1.42, + "learning_rate": 4.204619548776321e-05, + "loss": 0.8939, + "step": 8893 + }, + { + "epoch": 1.42, + "learning_rate": 4.204430821739773e-05, + "loss": 0.8494, + "step": 8894 + }, + { + "epoch": 1.42, + "learning_rate": 4.204242076552072e-05, + "loss": 0.8829, + "step": 8895 + }, + { + "epoch": 1.42, + "learning_rate": 4.204053313215228e-05, + "loss": 0.8931, + "step": 8896 + }, + { + "epoch": 1.42, + "learning_rate": 4.2038645317312505e-05, + "loss": 0.8822, + "step": 8897 + }, + { + "epoch": 1.42, + "learning_rate": 4.20367573210215e-05, + "loss": 0.9306, + "step": 8898 + }, + { + "epoch": 1.42, + "learning_rate": 4.2034869143299375e-05, + "loss": 0.8123, + "step": 8899 + }, + { + "epoch": 1.42, + "learning_rate": 4.203298078416624e-05, + "loss": 0.9737, + "step": 8900 + }, + { + "epoch": 1.42, + "learning_rate": 4.20310922436422e-05, + "loss": 0.9752, + "step": 8901 + }, + { + "epoch": 1.42, + "learning_rate": 4.202920352174736e-05, + "loss": 0.8763, + "step": 8902 + }, + { + "epoch": 1.42, + "learning_rate": 4.202731461850185e-05, + "loss": 0.8565, + "step": 8903 + }, + { + "epoch": 1.42, + "learning_rate": 4.202542553392577e-05, + "loss": 0.9301, + "step": 8904 + }, + { + "epoch": 1.42, + "learning_rate": 4.2023536268039246e-05, + "loss": 0.9389, + "step": 8905 + }, + { + "epoch": 1.42, + "learning_rate": 4.202164682086239e-05, + "loss": 0.9045, + "step": 8906 + }, + { + "epoch": 1.42, + "learning_rate": 4.201975719241534e-05, + "loss": 0.9192, + "step": 8907 + }, + { + "epoch": 1.42, + "learning_rate": 4.20178673827182e-05, + "loss": 0.9444, + "step": 8908 + }, + { + "epoch": 1.42, + "learning_rate": 4.2015977391791115e-05, + "loss": 0.9187, + "step": 8909 + }, + { + "epoch": 1.42, + "learning_rate": 4.201408721965419e-05, + "loss": 0.9985, + "step": 8910 + }, + { + "epoch": 1.42, + "learning_rate": 4.201219686632757e-05, + "loss": 0.8909, + "step": 8911 + }, + { + "epoch": 1.42, + "learning_rate": 4.201030633183139e-05, + "loss": 0.9119, + "step": 8912 + }, + { + "epoch": 1.42, + "learning_rate": 4.200841561618577e-05, + "loss": 0.8991, + "step": 8913 + }, + { + "epoch": 1.42, + "learning_rate": 4.200652471941084e-05, + "loss": 0.9736, + "step": 8914 + }, + { + "epoch": 1.42, + "learning_rate": 4.200463364152676e-05, + "loss": 0.9478, + "step": 8915 + }, + { + "epoch": 1.42, + "learning_rate": 4.200274238255365e-05, + "loss": 0.9291, + "step": 8916 + }, + { + "epoch": 1.42, + "learning_rate": 4.200085094251165e-05, + "loss": 0.7998, + "step": 8917 + }, + { + "epoch": 1.42, + "learning_rate": 4.199895932142092e-05, + "loss": 0.9631, + "step": 8918 + }, + { + "epoch": 1.42, + "learning_rate": 4.199706751930158e-05, + "loss": 0.9123, + "step": 8919 + }, + { + "epoch": 1.42, + "learning_rate": 4.1995175536173806e-05, + "loss": 0.887, + "step": 8920 + }, + { + "epoch": 1.42, + "learning_rate": 4.199328337205772e-05, + "loss": 0.8203, + "step": 8921 + }, + { + "epoch": 1.42, + "learning_rate": 4.199139102697349e-05, + "loss": 0.8964, + "step": 8922 + }, + { + "epoch": 1.42, + "learning_rate": 4.198949850094125e-05, + "loss": 0.8473, + "step": 8923 + }, + { + "epoch": 1.42, + "learning_rate": 4.198760579398118e-05, + "loss": 0.8178, + "step": 8924 + }, + { + "epoch": 1.42, + "learning_rate": 4.198571290611341e-05, + "loss": 0.9144, + "step": 8925 + }, + { + "epoch": 1.42, + "learning_rate": 4.198381983735811e-05, + "loss": 0.8964, + "step": 8926 + }, + { + "epoch": 1.42, + "learning_rate": 4.1981926587735443e-05, + "loss": 0.8079, + "step": 8927 + }, + { + "epoch": 1.42, + "learning_rate": 4.1980033157265576e-05, + "loss": 0.9555, + "step": 8928 + }, + { + "epoch": 1.42, + "learning_rate": 4.1978139545968656e-05, + "loss": 0.932, + "step": 8929 + }, + { + "epoch": 1.42, + "learning_rate": 4.197624575386485e-05, + "loss": 0.9122, + "step": 8930 + }, + { + "epoch": 1.42, + "learning_rate": 4.197435178097435e-05, + "loss": 0.918, + "step": 8931 + }, + { + "epoch": 1.42, + "learning_rate": 4.1972457627317304e-05, + "loss": 0.9036, + "step": 8932 + }, + { + "epoch": 1.42, + "learning_rate": 4.197056329291388e-05, + "loss": 0.8935, + "step": 8933 + }, + { + "epoch": 1.42, + "learning_rate": 4.196866877778427e-05, + "loss": 0.9399, + "step": 8934 + }, + { + "epoch": 1.42, + "learning_rate": 4.196677408194864e-05, + "loss": 0.9221, + "step": 8935 + }, + { + "epoch": 1.42, + "learning_rate": 4.196487920542715e-05, + "loss": 0.8901, + "step": 8936 + }, + { + "epoch": 1.42, + "learning_rate": 4.1962984148240016e-05, + "loss": 0.9141, + "step": 8937 + }, + { + "epoch": 1.42, + "learning_rate": 4.1961088910407384e-05, + "loss": 0.8899, + "step": 8938 + }, + { + "epoch": 1.42, + "learning_rate": 4.195919349194946e-05, + "loss": 1.0642, + "step": 8939 + }, + { + "epoch": 1.42, + "learning_rate": 4.195729789288642e-05, + "loss": 1.0027, + "step": 8940 + }, + { + "epoch": 1.42, + "learning_rate": 4.195540211323846e-05, + "loss": 0.8357, + "step": 8941 + }, + { + "epoch": 1.42, + "learning_rate": 4.195350615302576e-05, + "loss": 0.8955, + "step": 8942 + }, + { + "epoch": 1.42, + "learning_rate": 4.19516100122685e-05, + "loss": 0.8668, + "step": 8943 + }, + { + "epoch": 1.42, + "learning_rate": 4.194971369098689e-05, + "loss": 0.8859, + "step": 8944 + }, + { + "epoch": 1.43, + "learning_rate": 4.1947817189201124e-05, + "loss": 0.9031, + "step": 8945 + }, + { + "epoch": 1.43, + "learning_rate": 4.194592050693139e-05, + "loss": 0.8988, + "step": 8946 + }, + { + "epoch": 1.43, + "learning_rate": 4.194402364419789e-05, + "loss": 0.9219, + "step": 8947 + }, + { + "epoch": 1.43, + "learning_rate": 4.194212660102083e-05, + "loss": 0.9169, + "step": 8948 + }, + { + "epoch": 1.43, + "learning_rate": 4.194022937742041e-05, + "loss": 0.8898, + "step": 8949 + }, + { + "epoch": 1.43, + "learning_rate": 4.1938331973416825e-05, + "loss": 0.8573, + "step": 8950 + }, + { + "epoch": 1.43, + "learning_rate": 4.1936434389030286e-05, + "loss": 0.9344, + "step": 8951 + }, + { + "epoch": 1.43, + "learning_rate": 4.193453662428101e-05, + "loss": 0.8595, + "step": 8952 + }, + { + "epoch": 1.43, + "learning_rate": 4.19326386791892e-05, + "loss": 0.9255, + "step": 8953 + }, + { + "epoch": 1.43, + "learning_rate": 4.193074055377506e-05, + "loss": 0.8913, + "step": 8954 + }, + { + "epoch": 1.43, + "learning_rate": 4.192884224805882e-05, + "loss": 1.0084, + "step": 8955 + }, + { + "epoch": 1.43, + "learning_rate": 4.192694376206069e-05, + "loss": 0.844, + "step": 8956 + }, + { + "epoch": 1.43, + "learning_rate": 4.192504509580088e-05, + "loss": 0.8811, + "step": 8957 + }, + { + "epoch": 1.43, + "learning_rate": 4.1923146249299616e-05, + "loss": 0.8245, + "step": 8958 + }, + { + "epoch": 1.43, + "learning_rate": 4.1921247222577114e-05, + "loss": 0.8936, + "step": 8959 + }, + { + "epoch": 1.43, + "learning_rate": 4.191934801565361e-05, + "loss": 0.8261, + "step": 8960 + }, + { + "epoch": 1.43, + "learning_rate": 4.1917448628549314e-05, + "loss": 0.8856, + "step": 8961 + }, + { + "epoch": 1.43, + "learning_rate": 4.191554906128447e-05, + "loss": 0.8883, + "step": 8962 + }, + { + "epoch": 1.43, + "learning_rate": 4.191364931387929e-05, + "loss": 0.803, + "step": 8963 + }, + { + "epoch": 1.43, + "learning_rate": 4.191174938635402e-05, + "loss": 0.9392, + "step": 8964 + }, + { + "epoch": 1.43, + "learning_rate": 4.190984927872889e-05, + "loss": 0.8966, + "step": 8965 + }, + { + "epoch": 1.43, + "learning_rate": 4.1907948991024114e-05, + "loss": 0.9106, + "step": 8966 + }, + { + "epoch": 1.43, + "learning_rate": 4.190604852325997e-05, + "loss": 0.8685, + "step": 8967 + }, + { + "epoch": 1.43, + "learning_rate": 4.1904147875456655e-05, + "loss": 0.9089, + "step": 8968 + }, + { + "epoch": 1.43, + "learning_rate": 4.190224704763443e-05, + "loss": 0.9845, + "step": 8969 + }, + { + "epoch": 1.43, + "learning_rate": 4.1900346039813545e-05, + "loss": 0.8981, + "step": 8970 + }, + { + "epoch": 1.43, + "learning_rate": 4.189844485201422e-05, + "loss": 0.8718, + "step": 8971 + }, + { + "epoch": 1.43, + "learning_rate": 4.189654348425673e-05, + "loss": 0.8752, + "step": 8972 + }, + { + "epoch": 1.43, + "learning_rate": 4.189464193656131e-05, + "loss": 0.939, + "step": 8973 + }, + { + "epoch": 1.43, + "learning_rate": 4.18927402089482e-05, + "loss": 0.8989, + "step": 8974 + }, + { + "epoch": 1.43, + "learning_rate": 4.189083830143767e-05, + "loss": 0.9306, + "step": 8975 + }, + { + "epoch": 1.43, + "learning_rate": 4.1888936214049966e-05, + "loss": 1.0063, + "step": 8976 + }, + { + "epoch": 1.43, + "learning_rate": 4.188703394680534e-05, + "loss": 0.7742, + "step": 8977 + }, + { + "epoch": 1.43, + "learning_rate": 4.188513149972406e-05, + "loss": 0.8371, + "step": 8978 + }, + { + "epoch": 1.43, + "learning_rate": 4.188322887282638e-05, + "loss": 0.9538, + "step": 8979 + }, + { + "epoch": 1.43, + "learning_rate": 4.1881326066132564e-05, + "loss": 0.8693, + "step": 8980 + }, + { + "epoch": 1.43, + "learning_rate": 4.187942307966287e-05, + "loss": 0.8112, + "step": 8981 + }, + { + "epoch": 1.43, + "learning_rate": 4.187751991343757e-05, + "loss": 0.9143, + "step": 8982 + }, + { + "epoch": 1.43, + "learning_rate": 4.187561656747693e-05, + "loss": 0.8577, + "step": 8983 + }, + { + "epoch": 1.43, + "learning_rate": 4.187371304180122e-05, + "loss": 0.9069, + "step": 8984 + }, + { + "epoch": 1.43, + "learning_rate": 4.187180933643071e-05, + "loss": 0.8875, + "step": 8985 + }, + { + "epoch": 1.43, + "learning_rate": 4.186990545138567e-05, + "loss": 0.8378, + "step": 8986 + }, + { + "epoch": 1.43, + "learning_rate": 4.1868001386686386e-05, + "loss": 0.819, + "step": 8987 + }, + { + "epoch": 1.43, + "learning_rate": 4.1866097142353123e-05, + "loss": 0.898, + "step": 8988 + }, + { + "epoch": 1.43, + "learning_rate": 4.1864192718406166e-05, + "loss": 0.848, + "step": 8989 + }, + { + "epoch": 1.43, + "learning_rate": 4.18622881148658e-05, + "loss": 0.9236, + "step": 8990 + }, + { + "epoch": 1.43, + "learning_rate": 4.1860383331752295e-05, + "loss": 0.8962, + "step": 8991 + }, + { + "epoch": 1.43, + "learning_rate": 4.185847836908595e-05, + "loss": 0.8874, + "step": 8992 + }, + { + "epoch": 1.43, + "learning_rate": 4.185657322688704e-05, + "loss": 0.802, + "step": 8993 + }, + { + "epoch": 1.43, + "learning_rate": 4.185466790517586e-05, + "loss": 0.8387, + "step": 8994 + }, + { + "epoch": 1.43, + "learning_rate": 4.18527624039727e-05, + "loss": 0.8367, + "step": 8995 + }, + { + "epoch": 1.43, + "learning_rate": 4.185085672329785e-05, + "loss": 1.0749, + "step": 8996 + }, + { + "epoch": 1.43, + "learning_rate": 4.184895086317161e-05, + "loss": 0.8559, + "step": 8997 + }, + { + "epoch": 1.43, + "learning_rate": 4.184704482361427e-05, + "loss": 0.9727, + "step": 8998 + }, + { + "epoch": 1.43, + "learning_rate": 4.1845138604646135e-05, + "loss": 0.9095, + "step": 8999 + }, + { + "epoch": 1.43, + "learning_rate": 4.1843232206287495e-05, + "loss": 0.932, + "step": 9000 + }, + { + "epoch": 1.43, + "learning_rate": 4.1841325628558665e-05, + "loss": 0.9657, + "step": 9001 + }, + { + "epoch": 1.43, + "learning_rate": 4.183941887147994e-05, + "loss": 0.9213, + "step": 9002 + }, + { + "epoch": 1.43, + "learning_rate": 4.1837511935071625e-05, + "loss": 0.8499, + "step": 9003 + }, + { + "epoch": 1.43, + "learning_rate": 4.183560481935403e-05, + "loss": 0.9035, + "step": 9004 + }, + { + "epoch": 1.43, + "learning_rate": 4.1833697524347456e-05, + "loss": 0.8556, + "step": 9005 + }, + { + "epoch": 1.43, + "learning_rate": 4.183179005007224e-05, + "loss": 0.8713, + "step": 9006 + }, + { + "epoch": 1.43, + "learning_rate": 4.182988239654867e-05, + "loss": 0.8567, + "step": 9007 + }, + { + "epoch": 1.44, + "learning_rate": 4.1827974563797065e-05, + "loss": 0.9179, + "step": 9008 + }, + { + "epoch": 1.44, + "learning_rate": 4.182606655183776e-05, + "loss": 1.013, + "step": 9009 + }, + { + "epoch": 1.44, + "learning_rate": 4.1824158360691054e-05, + "loss": 0.9403, + "step": 9010 + }, + { + "epoch": 1.44, + "learning_rate": 4.1822249990377276e-05, + "loss": 0.9329, + "step": 9011 + }, + { + "epoch": 1.44, + "learning_rate": 4.1820341440916755e-05, + "loss": 0.8507, + "step": 9012 + }, + { + "epoch": 1.44, + "learning_rate": 4.18184327123298e-05, + "loss": 0.9693, + "step": 9013 + }, + { + "epoch": 1.44, + "learning_rate": 4.181652380463675e-05, + "loss": 0.85, + "step": 9014 + }, + { + "epoch": 1.44, + "learning_rate": 4.181461471785794e-05, + "loss": 0.8968, + "step": 9015 + }, + { + "epoch": 1.44, + "learning_rate": 4.181270545201368e-05, + "loss": 0.9106, + "step": 9016 + }, + { + "epoch": 1.44, + "learning_rate": 4.181079600712432e-05, + "loss": 0.8552, + "step": 9017 + }, + { + "epoch": 1.44, + "learning_rate": 4.180888638321018e-05, + "loss": 0.8879, + "step": 9018 + }, + { + "epoch": 1.44, + "learning_rate": 4.1806976580291616e-05, + "loss": 0.8952, + "step": 9019 + }, + { + "epoch": 1.44, + "learning_rate": 4.180506659838895e-05, + "loss": 0.8578, + "step": 9020 + }, + { + "epoch": 1.44, + "learning_rate": 4.180315643752253e-05, + "loss": 0.9499, + "step": 9021 + }, + { + "epoch": 1.44, + "learning_rate": 4.180124609771269e-05, + "loss": 0.8827, + "step": 9022 + }, + { + "epoch": 1.44, + "learning_rate": 4.179933557897979e-05, + "loss": 0.9396, + "step": 9023 + }, + { + "epoch": 1.44, + "learning_rate": 4.179742488134416e-05, + "loss": 0.9055, + "step": 9024 + }, + { + "epoch": 1.44, + "learning_rate": 4.1795514004826145e-05, + "loss": 0.9317, + "step": 9025 + }, + { + "epoch": 1.44, + "learning_rate": 4.1793602949446106e-05, + "loss": 0.8593, + "step": 9026 + }, + { + "epoch": 1.44, + "learning_rate": 4.17916917152244e-05, + "loss": 0.8376, + "step": 9027 + }, + { + "epoch": 1.44, + "learning_rate": 4.1789780302181365e-05, + "loss": 0.8536, + "step": 9028 + }, + { + "epoch": 1.44, + "learning_rate": 4.178786871033736e-05, + "loss": 0.9067, + "step": 9029 + }, + { + "epoch": 1.44, + "learning_rate": 4.1785956939712754e-05, + "loss": 0.8973, + "step": 9030 + }, + { + "epoch": 1.44, + "learning_rate": 4.178404499032789e-05, + "loss": 0.8442, + "step": 9031 + }, + { + "epoch": 1.44, + "learning_rate": 4.178213286220314e-05, + "loss": 0.9513, + "step": 9032 + }, + { + "epoch": 1.44, + "learning_rate": 4.1780220555358864e-05, + "loss": 0.8965, + "step": 9033 + }, + { + "epoch": 1.44, + "learning_rate": 4.177830806981543e-05, + "loss": 0.9154, + "step": 9034 + }, + { + "epoch": 1.44, + "learning_rate": 4.17763954055932e-05, + "loss": 0.887, + "step": 9035 + }, + { + "epoch": 1.44, + "learning_rate": 4.177448256271254e-05, + "loss": 0.9329, + "step": 9036 + }, + { + "epoch": 1.44, + "learning_rate": 4.1772569541193815e-05, + "loss": 0.816, + "step": 9037 + }, + { + "epoch": 1.44, + "learning_rate": 4.1770656341057426e-05, + "loss": 0.9055, + "step": 9038 + }, + { + "epoch": 1.44, + "learning_rate": 4.176874296232372e-05, + "loss": 0.8891, + "step": 9039 + }, + { + "epoch": 1.44, + "learning_rate": 4.176682940501309e-05, + "loss": 0.8893, + "step": 9040 + }, + { + "epoch": 1.44, + "learning_rate": 4.176491566914589e-05, + "loss": 0.9175, + "step": 9041 + }, + { + "epoch": 1.44, + "learning_rate": 4.1763001754742523e-05, + "loss": 0.8872, + "step": 9042 + }, + { + "epoch": 1.44, + "learning_rate": 4.176108766182337e-05, + "loss": 0.8451, + "step": 9043 + }, + { + "epoch": 1.44, + "learning_rate": 4.175917339040881e-05, + "loss": 0.9378, + "step": 9044 + }, + { + "epoch": 1.44, + "learning_rate": 4.175725894051922e-05, + "loss": 0.8887, + "step": 9045 + }, + { + "epoch": 1.44, + "learning_rate": 4.1755344312175006e-05, + "loss": 0.8959, + "step": 9046 + }, + { + "epoch": 1.44, + "learning_rate": 4.175342950539654e-05, + "loss": 0.9387, + "step": 9047 + }, + { + "epoch": 1.44, + "learning_rate": 4.175151452020422e-05, + "loss": 0.9249, + "step": 9048 + }, + { + "epoch": 1.44, + "learning_rate": 4.174959935661844e-05, + "loss": 0.8425, + "step": 9049 + }, + { + "epoch": 1.44, + "learning_rate": 4.17476840146596e-05, + "loss": 0.8591, + "step": 9050 + }, + { + "epoch": 1.44, + "learning_rate": 4.1745768494348095e-05, + "loss": 0.9639, + "step": 9051 + }, + { + "epoch": 1.44, + "learning_rate": 4.1743852795704317e-05, + "loss": 0.8421, + "step": 9052 + }, + { + "epoch": 1.44, + "learning_rate": 4.174193691874868e-05, + "loss": 0.8787, + "step": 9053 + }, + { + "epoch": 1.44, + "learning_rate": 4.174002086350157e-05, + "loss": 0.8514, + "step": 9054 + }, + { + "epoch": 1.44, + "learning_rate": 4.173810462998341e-05, + "loss": 0.8464, + "step": 9055 + }, + { + "epoch": 1.44, + "learning_rate": 4.173618821821459e-05, + "loss": 0.9007, + "step": 9056 + }, + { + "epoch": 1.44, + "learning_rate": 4.173427162821553e-05, + "loss": 0.8726, + "step": 9057 + }, + { + "epoch": 1.44, + "learning_rate": 4.173235486000663e-05, + "loss": 0.8405, + "step": 9058 + }, + { + "epoch": 1.44, + "learning_rate": 4.173043791360832e-05, + "loss": 0.8652, + "step": 9059 + }, + { + "epoch": 1.44, + "learning_rate": 4.172852078904099e-05, + "loss": 0.8529, + "step": 9060 + }, + { + "epoch": 1.44, + "learning_rate": 4.172660348632508e-05, + "loss": 0.9396, + "step": 9061 + }, + { + "epoch": 1.44, + "learning_rate": 4.172468600548099e-05, + "loss": 0.879, + "step": 9062 + }, + { + "epoch": 1.44, + "learning_rate": 4.172276834652916e-05, + "loss": 0.8883, + "step": 9063 + }, + { + "epoch": 1.44, + "learning_rate": 4.172085050949e-05, + "loss": 0.9353, + "step": 9064 + }, + { + "epoch": 1.44, + "learning_rate": 4.1718932494383924e-05, + "loss": 0.9482, + "step": 9065 + }, + { + "epoch": 1.44, + "learning_rate": 4.171701430123137e-05, + "loss": 0.8451, + "step": 9066 + }, + { + "epoch": 1.44, + "learning_rate": 4.1715095930052763e-05, + "loss": 0.9614, + "step": 9067 + }, + { + "epoch": 1.44, + "learning_rate": 4.171317738086853e-05, + "loss": 0.8644, + "step": 9068 + }, + { + "epoch": 1.44, + "learning_rate": 4.17112586536991e-05, + "loss": 0.8861, + "step": 9069 + }, + { + "epoch": 1.44, + "learning_rate": 4.1709339748564924e-05, + "loss": 0.9196, + "step": 9070 + }, + { + "epoch": 1.45, + "learning_rate": 4.170742066548642e-05, + "loss": 0.9594, + "step": 9071 + }, + { + "epoch": 1.45, + "learning_rate": 4.170550140448403e-05, + "loss": 0.8668, + "step": 9072 + }, + { + "epoch": 1.45, + "learning_rate": 4.170358196557819e-05, + "loss": 0.9084, + "step": 9073 + }, + { + "epoch": 1.45, + "learning_rate": 4.170166234878934e-05, + "loss": 0.8697, + "step": 9074 + }, + { + "epoch": 1.45, + "learning_rate": 4.1699742554137924e-05, + "loss": 0.9578, + "step": 9075 + }, + { + "epoch": 1.45, + "learning_rate": 4.16978225816444e-05, + "loss": 0.9696, + "step": 9076 + }, + { + "epoch": 1.45, + "learning_rate": 4.1695902431329194e-05, + "loss": 0.8791, + "step": 9077 + }, + { + "epoch": 1.45, + "learning_rate": 4.1693982103212755e-05, + "loss": 0.8919, + "step": 9078 + }, + { + "epoch": 1.45, + "learning_rate": 4.169206159731556e-05, + "loss": 0.8536, + "step": 9079 + }, + { + "epoch": 1.45, + "learning_rate": 4.169014091365803e-05, + "loss": 0.9473, + "step": 9080 + }, + { + "epoch": 1.45, + "learning_rate": 4.168822005226064e-05, + "loss": 0.8468, + "step": 9081 + }, + { + "epoch": 1.45, + "learning_rate": 4.168629901314383e-05, + "loss": 0.8018, + "step": 9082 + }, + { + "epoch": 1.45, + "learning_rate": 4.168437779632808e-05, + "loss": 0.8877, + "step": 9083 + }, + { + "epoch": 1.45, + "learning_rate": 4.1682456401833825e-05, + "loss": 0.996, + "step": 9084 + }, + { + "epoch": 1.45, + "learning_rate": 4.168053482968154e-05, + "loss": 0.8762, + "step": 9085 + }, + { + "epoch": 1.45, + "learning_rate": 4.167861307989168e-05, + "loss": 0.8735, + "step": 9086 + }, + { + "epoch": 1.45, + "learning_rate": 4.1676691152484723e-05, + "loss": 0.8899, + "step": 9087 + }, + { + "epoch": 1.45, + "learning_rate": 4.1674769047481135e-05, + "loss": 0.8755, + "step": 9088 + }, + { + "epoch": 1.45, + "learning_rate": 4.167284676490137e-05, + "loss": 0.8857, + "step": 9089 + }, + { + "epoch": 1.45, + "learning_rate": 4.167092430476591e-05, + "loss": 0.8852, + "step": 9090 + }, + { + "epoch": 1.45, + "learning_rate": 4.166900166709523e-05, + "loss": 0.908, + "step": 9091 + }, + { + "epoch": 1.45, + "learning_rate": 4.16670788519098e-05, + "loss": 0.8553, + "step": 9092 + }, + { + "epoch": 1.45, + "learning_rate": 4.166515585923011e-05, + "loss": 0.811, + "step": 9093 + }, + { + "epoch": 1.45, + "learning_rate": 4.1663232689076616e-05, + "loss": 0.8269, + "step": 9094 + }, + { + "epoch": 1.45, + "learning_rate": 4.166130934146982e-05, + "loss": 0.8527, + "step": 9095 + }, + { + "epoch": 1.45, + "learning_rate": 4.1659385816430184e-05, + "loss": 0.8179, + "step": 9096 + }, + { + "epoch": 1.45, + "learning_rate": 4.1657462113978205e-05, + "loss": 0.9058, + "step": 9097 + }, + { + "epoch": 1.45, + "learning_rate": 4.165553823413437e-05, + "loss": 0.9367, + "step": 9098 + }, + { + "epoch": 1.45, + "learning_rate": 4.1653614176919164e-05, + "loss": 0.928, + "step": 9099 + }, + { + "epoch": 1.45, + "learning_rate": 4.165168994235308e-05, + "loss": 0.8654, + "step": 9100 + }, + { + "epoch": 1.45, + "learning_rate": 4.1649765530456606e-05, + "loss": 0.9032, + "step": 9101 + }, + { + "epoch": 1.45, + "learning_rate": 4.164784094125024e-05, + "loss": 0.9096, + "step": 9102 + }, + { + "epoch": 1.45, + "learning_rate": 4.1645916174754475e-05, + "loss": 0.8446, + "step": 9103 + }, + { + "epoch": 1.45, + "learning_rate": 4.1643991230989807e-05, + "loss": 0.8617, + "step": 9104 + }, + { + "epoch": 1.45, + "learning_rate": 4.164206610997674e-05, + "loss": 0.9362, + "step": 9105 + }, + { + "epoch": 1.45, + "learning_rate": 4.164014081173576e-05, + "loss": 0.9228, + "step": 9106 + }, + { + "epoch": 1.45, + "learning_rate": 4.16382153362874e-05, + "loss": 0.943, + "step": 9107 + }, + { + "epoch": 1.45, + "learning_rate": 4.163628968365214e-05, + "loss": 0.8967, + "step": 9108 + }, + { + "epoch": 1.45, + "learning_rate": 4.1634363853850486e-05, + "loss": 0.8259, + "step": 9109 + }, + { + "epoch": 1.45, + "learning_rate": 4.163243784690297e-05, + "loss": 0.8826, + "step": 9110 + }, + { + "epoch": 1.45, + "learning_rate": 4.1630511662830085e-05, + "loss": 0.826, + "step": 9111 + }, + { + "epoch": 1.45, + "learning_rate": 4.162858530165235e-05, + "loss": 0.8913, + "step": 9112 + }, + { + "epoch": 1.45, + "learning_rate": 4.162665876339027e-05, + "loss": 0.9443, + "step": 9113 + }, + { + "epoch": 1.45, + "learning_rate": 4.162473204806437e-05, + "loss": 0.8411, + "step": 9114 + }, + { + "epoch": 1.45, + "learning_rate": 4.162280515569517e-05, + "loss": 0.8981, + "step": 9115 + }, + { + "epoch": 1.45, + "learning_rate": 4.1620878086303184e-05, + "loss": 0.8446, + "step": 9116 + }, + { + "epoch": 1.45, + "learning_rate": 4.1618950839908945e-05, + "loss": 0.9189, + "step": 9117 + }, + { + "epoch": 1.45, + "learning_rate": 4.1617023416532966e-05, + "loss": 0.9097, + "step": 9118 + }, + { + "epoch": 1.45, + "learning_rate": 4.1615095816195765e-05, + "loss": 0.8768, + "step": 9119 + }, + { + "epoch": 1.45, + "learning_rate": 4.161316803891789e-05, + "loss": 0.9558, + "step": 9120 + }, + { + "epoch": 1.45, + "learning_rate": 4.161124008471986e-05, + "loss": 0.939, + "step": 9121 + }, + { + "epoch": 1.45, + "learning_rate": 4.160931195362221e-05, + "loss": 0.8765, + "step": 9122 + }, + { + "epoch": 1.45, + "learning_rate": 4.160738364564547e-05, + "loss": 0.8277, + "step": 9123 + }, + { + "epoch": 1.45, + "learning_rate": 4.160545516081018e-05, + "loss": 0.8607, + "step": 9124 + }, + { + "epoch": 1.45, + "learning_rate": 4.160352649913687e-05, + "loss": 0.891, + "step": 9125 + }, + { + "epoch": 1.45, + "learning_rate": 4.160159766064608e-05, + "loss": 0.8424, + "step": 9126 + }, + { + "epoch": 1.45, + "learning_rate": 4.159966864535836e-05, + "loss": 0.891, + "step": 9127 + }, + { + "epoch": 1.45, + "learning_rate": 4.1597739453294245e-05, + "loss": 0.8989, + "step": 9128 + }, + { + "epoch": 1.45, + "learning_rate": 4.159581008447428e-05, + "loss": 0.9026, + "step": 9129 + }, + { + "epoch": 1.45, + "learning_rate": 4.1593880538919014e-05, + "loss": 0.9416, + "step": 9130 + }, + { + "epoch": 1.45, + "learning_rate": 4.1591950816648995e-05, + "loss": 0.8678, + "step": 9131 + }, + { + "epoch": 1.45, + "learning_rate": 4.1590020917684766e-05, + "loss": 0.8524, + "step": 9132 + }, + { + "epoch": 1.45, + "learning_rate": 4.15880908420469e-05, + "loss": 0.8607, + "step": 9133 + }, + { + "epoch": 1.46, + "learning_rate": 4.158616058975593e-05, + "loss": 0.8387, + "step": 9134 + }, + { + "epoch": 1.46, + "learning_rate": 4.1584230160832417e-05, + "loss": 0.8498, + "step": 9135 + }, + { + "epoch": 1.46, + "learning_rate": 4.1582299555296925e-05, + "loss": 0.8842, + "step": 9136 + }, + { + "epoch": 1.46, + "learning_rate": 4.1580368773170006e-05, + "loss": 0.8638, + "step": 9137 + }, + { + "epoch": 1.46, + "learning_rate": 4.157843781447223e-05, + "loss": 0.8338, + "step": 9138 + }, + { + "epoch": 1.46, + "learning_rate": 4.157650667922416e-05, + "loss": 0.8561, + "step": 9139 + }, + { + "epoch": 1.46, + "learning_rate": 4.157457536744635e-05, + "loss": 0.8181, + "step": 9140 + }, + { + "epoch": 1.46, + "learning_rate": 4.1572643879159376e-05, + "loss": 0.9395, + "step": 9141 + }, + { + "epoch": 1.46, + "learning_rate": 4.157071221438381e-05, + "loss": 0.913, + "step": 9142 + }, + { + "epoch": 1.46, + "learning_rate": 4.156878037314021e-05, + "loss": 0.9066, + "step": 9143 + }, + { + "epoch": 1.46, + "learning_rate": 4.156684835544917e-05, + "loss": 0.9231, + "step": 9144 + }, + { + "epoch": 1.46, + "learning_rate": 4.156491616133126e-05, + "loss": 0.8426, + "step": 9145 + }, + { + "epoch": 1.46, + "learning_rate": 4.156298379080703e-05, + "loss": 0.8826, + "step": 9146 + }, + { + "epoch": 1.46, + "learning_rate": 4.156105124389709e-05, + "loss": 0.9022, + "step": 9147 + }, + { + "epoch": 1.46, + "learning_rate": 4.1559118520622e-05, + "loss": 0.8682, + "step": 9148 + }, + { + "epoch": 1.46, + "learning_rate": 4.1557185621002356e-05, + "loss": 0.8078, + "step": 9149 + }, + { + "epoch": 1.46, + "learning_rate": 4.155525254505874e-05, + "loss": 0.8424, + "step": 9150 + }, + { + "epoch": 1.46, + "learning_rate": 4.1553319292811734e-05, + "loss": 0.8568, + "step": 9151 + }, + { + "epoch": 1.46, + "learning_rate": 4.155138586428192e-05, + "loss": 0.8247, + "step": 9152 + }, + { + "epoch": 1.46, + "learning_rate": 4.154945225948991e-05, + "loss": 0.8722, + "step": 9153 + }, + { + "epoch": 1.46, + "learning_rate": 4.154751847845627e-05, + "loss": 0.8732, + "step": 9154 + }, + { + "epoch": 1.46, + "learning_rate": 4.1545584521201606e-05, + "loss": 0.875, + "step": 9155 + }, + { + "epoch": 1.46, + "learning_rate": 4.154365038774651e-05, + "loss": 0.9268, + "step": 9156 + }, + { + "epoch": 1.46, + "learning_rate": 4.154171607811159e-05, + "loss": 0.807, + "step": 9157 + }, + { + "epoch": 1.46, + "learning_rate": 4.153978159231743e-05, + "loss": 0.8593, + "step": 9158 + }, + { + "epoch": 1.46, + "learning_rate": 4.1537846930384635e-05, + "loss": 0.8928, + "step": 9159 + }, + { + "epoch": 1.46, + "learning_rate": 4.153591209233381e-05, + "loss": 0.862, + "step": 9160 + }, + { + "epoch": 1.46, + "learning_rate": 4.153397707818556e-05, + "loss": 0.822, + "step": 9161 + }, + { + "epoch": 1.46, + "learning_rate": 4.15320418879605e-05, + "loss": 0.8986, + "step": 9162 + }, + { + "epoch": 1.46, + "learning_rate": 4.153010652167923e-05, + "loss": 0.8345, + "step": 9163 + }, + { + "epoch": 1.46, + "learning_rate": 4.1528170979362355e-05, + "loss": 0.9521, + "step": 9164 + }, + { + "epoch": 1.46, + "learning_rate": 4.15262352610305e-05, + "loss": 0.868, + "step": 9165 + }, + { + "epoch": 1.46, + "learning_rate": 4.152429936670427e-05, + "loss": 0.9264, + "step": 9166 + }, + { + "epoch": 1.46, + "learning_rate": 4.1522363296404285e-05, + "loss": 0.8713, + "step": 9167 + }, + { + "epoch": 1.46, + "learning_rate": 4.152042705015116e-05, + "loss": 0.8369, + "step": 9168 + }, + { + "epoch": 1.46, + "learning_rate": 4.151849062796551e-05, + "loss": 0.8783, + "step": 9169 + }, + { + "epoch": 1.46, + "learning_rate": 4.151655402986797e-05, + "loss": 0.8204, + "step": 9170 + }, + { + "epoch": 1.46, + "learning_rate": 4.1514617255879164e-05, + "loss": 0.8628, + "step": 9171 + }, + { + "epoch": 1.46, + "learning_rate": 4.15126803060197e-05, + "loss": 1.0304, + "step": 9172 + }, + { + "epoch": 1.46, + "learning_rate": 4.151074318031022e-05, + "loss": 0.8332, + "step": 9173 + }, + { + "epoch": 1.46, + "learning_rate": 4.150880587877135e-05, + "loss": 0.7966, + "step": 9174 + }, + { + "epoch": 1.46, + "learning_rate": 4.150686840142371e-05, + "loss": 0.7653, + "step": 9175 + }, + { + "epoch": 1.46, + "learning_rate": 4.1504930748287966e-05, + "loss": 0.9067, + "step": 9176 + }, + { + "epoch": 1.46, + "learning_rate": 4.150299291938471e-05, + "loss": 0.9084, + "step": 9177 + }, + { + "epoch": 1.46, + "learning_rate": 4.1501054914734606e-05, + "loss": 0.9098, + "step": 9178 + }, + { + "epoch": 1.46, + "learning_rate": 4.149911673435828e-05, + "loss": 0.83, + "step": 9179 + }, + { + "epoch": 1.46, + "learning_rate": 4.149717837827638e-05, + "loss": 0.8404, + "step": 9180 + }, + { + "epoch": 1.46, + "learning_rate": 4.149523984650955e-05, + "loss": 0.9233, + "step": 9181 + }, + { + "epoch": 1.46, + "learning_rate": 4.1493301139078426e-05, + "loss": 0.8965, + "step": 9182 + }, + { + "epoch": 1.46, + "learning_rate": 4.149136225600366e-05, + "loss": 0.9065, + "step": 9183 + }, + { + "epoch": 1.46, + "learning_rate": 4.1489423197305896e-05, + "loss": 0.9313, + "step": 9184 + }, + { + "epoch": 1.46, + "learning_rate": 4.148748396300579e-05, + "loss": 0.8099, + "step": 9185 + }, + { + "epoch": 1.46, + "learning_rate": 4.148554455312399e-05, + "loss": 0.9597, + "step": 9186 + }, + { + "epoch": 1.46, + "learning_rate": 4.1483604967681144e-05, + "loss": 0.8771, + "step": 9187 + }, + { + "epoch": 1.46, + "learning_rate": 4.148166520669791e-05, + "loss": 0.8547, + "step": 9188 + }, + { + "epoch": 1.46, + "learning_rate": 4.1479725270194955e-05, + "loss": 0.8149, + "step": 9189 + }, + { + "epoch": 1.46, + "learning_rate": 4.147778515819293e-05, + "loss": 0.7891, + "step": 9190 + }, + { + "epoch": 1.46, + "learning_rate": 4.147584487071249e-05, + "loss": 0.8912, + "step": 9191 + }, + { + "epoch": 1.46, + "learning_rate": 4.147390440777431e-05, + "loss": 0.8767, + "step": 9192 + }, + { + "epoch": 1.46, + "learning_rate": 4.1471963769399055e-05, + "loss": 0.8974, + "step": 9193 + }, + { + "epoch": 1.46, + "learning_rate": 4.147002295560738e-05, + "loss": 0.907, + "step": 9194 + }, + { + "epoch": 1.46, + "learning_rate": 4.146808196641996e-05, + "loss": 0.8963, + "step": 9195 + }, + { + "epoch": 1.47, + "learning_rate": 4.146614080185747e-05, + "loss": 0.8948, + "step": 9196 + }, + { + "epoch": 1.47, + "learning_rate": 4.146419946194057e-05, + "loss": 0.8901, + "step": 9197 + }, + { + "epoch": 1.47, + "learning_rate": 4.1462257946689945e-05, + "loss": 0.9038, + "step": 9198 + }, + { + "epoch": 1.47, + "learning_rate": 4.146031625612627e-05, + "loss": 0.9202, + "step": 9199 + }, + { + "epoch": 1.47, + "learning_rate": 4.145837439027022e-05, + "loss": 0.8871, + "step": 9200 + }, + { + "epoch": 1.47, + "learning_rate": 4.145643234914247e-05, + "loss": 0.9192, + "step": 9201 + }, + { + "epoch": 1.47, + "learning_rate": 4.145449013276371e-05, + "loss": 0.884, + "step": 9202 + }, + { + "epoch": 1.47, + "learning_rate": 4.1452547741154613e-05, + "loss": 0.895, + "step": 9203 + }, + { + "epoch": 1.47, + "learning_rate": 4.145060517433588e-05, + "loss": 0.8556, + "step": 9204 + }, + { + "epoch": 1.47, + "learning_rate": 4.144866243232818e-05, + "loss": 0.9671, + "step": 9205 + }, + { + "epoch": 1.47, + "learning_rate": 4.144671951515222e-05, + "loss": 0.879, + "step": 9206 + }, + { + "epoch": 1.47, + "learning_rate": 4.144477642282868e-05, + "loss": 0.8756, + "step": 9207 + }, + { + "epoch": 1.47, + "learning_rate": 4.144283315537826e-05, + "loss": 0.9241, + "step": 9208 + }, + { + "epoch": 1.47, + "learning_rate": 4.144088971282164e-05, + "loss": 0.9399, + "step": 9209 + }, + { + "epoch": 1.47, + "learning_rate": 4.1438946095179534e-05, + "loss": 0.9305, + "step": 9210 + }, + { + "epoch": 1.47, + "learning_rate": 4.143700230247262e-05, + "loss": 0.9229, + "step": 9211 + }, + { + "epoch": 1.47, + "learning_rate": 4.143505833472162e-05, + "loss": 0.9275, + "step": 9212 + }, + { + "epoch": 1.47, + "learning_rate": 4.143311419194723e-05, + "loss": 0.8947, + "step": 9213 + }, + { + "epoch": 1.47, + "learning_rate": 4.143116987417015e-05, + "loss": 0.9492, + "step": 9214 + }, + { + "epoch": 1.47, + "learning_rate": 4.142922538141108e-05, + "loss": 0.9047, + "step": 9215 + }, + { + "epoch": 1.47, + "learning_rate": 4.142728071369073e-05, + "loss": 0.9019, + "step": 9216 + }, + { + "epoch": 1.47, + "learning_rate": 4.1425335871029825e-05, + "loss": 0.8206, + "step": 9217 + }, + { + "epoch": 1.47, + "learning_rate": 4.142339085344906e-05, + "loss": 0.8927, + "step": 9218 + }, + { + "epoch": 1.47, + "learning_rate": 4.142144566096915e-05, + "loss": 0.8739, + "step": 9219 + }, + { + "epoch": 1.47, + "learning_rate": 4.141950029361082e-05, + "loss": 0.9071, + "step": 9220 + }, + { + "epoch": 1.47, + "learning_rate": 4.141755475139478e-05, + "loss": 0.8966, + "step": 9221 + }, + { + "epoch": 1.47, + "learning_rate": 4.141560903434174e-05, + "loss": 0.9218, + "step": 9222 + }, + { + "epoch": 1.47, + "learning_rate": 4.141366314247244e-05, + "loss": 0.8652, + "step": 9223 + }, + { + "epoch": 1.47, + "learning_rate": 4.1411717075807594e-05, + "loss": 0.849, + "step": 9224 + }, + { + "epoch": 1.47, + "learning_rate": 4.1409770834367917e-05, + "loss": 0.9269, + "step": 9225 + }, + { + "epoch": 1.47, + "learning_rate": 4.140782441817415e-05, + "loss": 0.7786, + "step": 9226 + }, + { + "epoch": 1.47, + "learning_rate": 4.140587782724701e-05, + "loss": 0.9078, + "step": 9227 + }, + { + "epoch": 1.47, + "learning_rate": 4.1403931061607224e-05, + "loss": 0.94, + "step": 9228 + }, + { + "epoch": 1.47, + "learning_rate": 4.1401984121275545e-05, + "loss": 0.9216, + "step": 9229 + }, + { + "epoch": 1.47, + "learning_rate": 4.1400037006272683e-05, + "loss": 0.9222, + "step": 9230 + }, + { + "epoch": 1.47, + "learning_rate": 4.139808971661939e-05, + "loss": 0.8843, + "step": 9231 + }, + { + "epoch": 1.47, + "learning_rate": 4.1396142252336404e-05, + "loss": 0.9048, + "step": 9232 + }, + { + "epoch": 1.47, + "learning_rate": 4.1394194613444436e-05, + "loss": 0.9203, + "step": 9233 + }, + { + "epoch": 1.47, + "learning_rate": 4.139224679996426e-05, + "loss": 0.887, + "step": 9234 + }, + { + "epoch": 1.47, + "learning_rate": 4.139029881191662e-05, + "loss": 0.8401, + "step": 9235 + }, + { + "epoch": 1.47, + "learning_rate": 4.138835064932223e-05, + "loss": 0.8693, + "step": 9236 + }, + { + "epoch": 1.47, + "learning_rate": 4.138640231220186e-05, + "loss": 0.8377, + "step": 9237 + }, + { + "epoch": 1.47, + "learning_rate": 4.138445380057626e-05, + "loss": 0.8053, + "step": 9238 + }, + { + "epoch": 1.47, + "learning_rate": 4.1382505114466175e-05, + "loss": 0.957, + "step": 9239 + }, + { + "epoch": 1.47, + "learning_rate": 4.138055625389235e-05, + "loss": 0.8607, + "step": 9240 + }, + { + "epoch": 1.47, + "learning_rate": 4.137860721887554e-05, + "loss": 0.8975, + "step": 9241 + }, + { + "epoch": 1.47, + "learning_rate": 4.137665800943652e-05, + "loss": 0.8371, + "step": 9242 + }, + { + "epoch": 1.47, + "learning_rate": 4.137470862559603e-05, + "loss": 0.847, + "step": 9243 + }, + { + "epoch": 1.47, + "learning_rate": 4.1372759067374836e-05, + "loss": 0.8605, + "step": 9244 + }, + { + "epoch": 1.47, + "learning_rate": 4.1370809334793694e-05, + "loss": 0.8601, + "step": 9245 + }, + { + "epoch": 1.47, + "learning_rate": 4.136885942787337e-05, + "loss": 0.9052, + "step": 9246 + }, + { + "epoch": 1.47, + "learning_rate": 4.136690934663464e-05, + "loss": 0.8805, + "step": 9247 + }, + { + "epoch": 1.47, + "learning_rate": 4.1364959091098254e-05, + "loss": 0.8796, + "step": 9248 + }, + { + "epoch": 1.47, + "learning_rate": 4.136300866128499e-05, + "loss": 0.828, + "step": 9249 + }, + { + "epoch": 1.47, + "learning_rate": 4.1361058057215617e-05, + "loss": 0.8986, + "step": 9250 + }, + { + "epoch": 1.47, + "learning_rate": 4.1359107278910915e-05, + "loss": 0.8704, + "step": 9251 + }, + { + "epoch": 1.47, + "learning_rate": 4.135715632639164e-05, + "loss": 0.9152, + "step": 9252 + }, + { + "epoch": 1.47, + "learning_rate": 4.135520519967859e-05, + "loss": 0.8669, + "step": 9253 + }, + { + "epoch": 1.47, + "learning_rate": 4.1353253898792536e-05, + "loss": 0.8931, + "step": 9254 + }, + { + "epoch": 1.47, + "learning_rate": 4.1351302423754257e-05, + "loss": 0.922, + "step": 9255 + }, + { + "epoch": 1.47, + "learning_rate": 4.134935077458453e-05, + "loss": 0.8271, + "step": 9256 + }, + { + "epoch": 1.47, + "learning_rate": 4.1347398951304144e-05, + "loss": 0.8206, + "step": 9257 + }, + { + "epoch": 1.47, + "learning_rate": 4.1345446953933884e-05, + "loss": 0.8614, + "step": 9258 + }, + { + "epoch": 1.48, + "learning_rate": 4.134349478249454e-05, + "loss": 0.9517, + "step": 9259 + }, + { + "epoch": 1.48, + "learning_rate": 4.134154243700689e-05, + "loss": 0.8466, + "step": 9260 + }, + { + "epoch": 1.48, + "learning_rate": 4.1339589917491746e-05, + "loss": 0.9335, + "step": 9261 + }, + { + "epoch": 1.48, + "learning_rate": 4.133763722396988e-05, + "loss": 0.8258, + "step": 9262 + }, + { + "epoch": 1.48, + "learning_rate": 4.13356843564621e-05, + "loss": 0.8468, + "step": 9263 + }, + { + "epoch": 1.48, + "learning_rate": 4.13337313149892e-05, + "loss": 0.8401, + "step": 9264 + }, + { + "epoch": 1.48, + "learning_rate": 4.1331778099571985e-05, + "loss": 0.8283, + "step": 9265 + }, + { + "epoch": 1.48, + "learning_rate": 4.1329824710231235e-05, + "loss": 0.8408, + "step": 9266 + }, + { + "epoch": 1.48, + "learning_rate": 4.132787114698777e-05, + "loss": 0.8416, + "step": 9267 + }, + { + "epoch": 1.48, + "learning_rate": 4.1325917409862394e-05, + "loss": 0.8722, + "step": 9268 + }, + { + "epoch": 1.48, + "learning_rate": 4.132396349887591e-05, + "loss": 0.8343, + "step": 9269 + }, + { + "epoch": 1.48, + "learning_rate": 4.1322009414049125e-05, + "loss": 0.9744, + "step": 9270 + }, + { + "epoch": 1.48, + "learning_rate": 4.1320055155402846e-05, + "loss": 0.8723, + "step": 9271 + }, + { + "epoch": 1.48, + "learning_rate": 4.131810072295789e-05, + "loss": 0.9023, + "step": 9272 + }, + { + "epoch": 1.48, + "learning_rate": 4.131614611673505e-05, + "loss": 0.8545, + "step": 9273 + }, + { + "epoch": 1.48, + "learning_rate": 4.131419133675519e-05, + "loss": 0.8175, + "step": 9274 + }, + { + "epoch": 1.48, + "learning_rate": 4.1312236383039086e-05, + "loss": 0.9246, + "step": 9275 + }, + { + "epoch": 1.48, + "learning_rate": 4.1310281255607565e-05, + "loss": 0.8453, + "step": 9276 + }, + { + "epoch": 1.48, + "learning_rate": 4.130832595448144e-05, + "loss": 0.9232, + "step": 9277 + }, + { + "epoch": 1.48, + "learning_rate": 4.130637047968157e-05, + "loss": 0.8443, + "step": 9278 + }, + { + "epoch": 1.48, + "learning_rate": 4.130441483122873e-05, + "loss": 0.9237, + "step": 9279 + }, + { + "epoch": 1.48, + "learning_rate": 4.1302459009143776e-05, + "loss": 1.0017, + "step": 9280 + }, + { + "epoch": 1.48, + "learning_rate": 4.1300503013447546e-05, + "loss": 0.9303, + "step": 9281 + }, + { + "epoch": 1.48, + "learning_rate": 4.129854684416083e-05, + "loss": 0.8046, + "step": 9282 + }, + { + "epoch": 1.48, + "learning_rate": 4.129659050130451e-05, + "loss": 0.8524, + "step": 9283 + }, + { + "epoch": 1.48, + "learning_rate": 4.1294633984899384e-05, + "loss": 0.9503, + "step": 9284 + }, + { + "epoch": 1.48, + "learning_rate": 4.1292677294966304e-05, + "loss": 0.9154, + "step": 9285 + }, + { + "epoch": 1.48, + "learning_rate": 4.12907204315261e-05, + "loss": 0.8982, + "step": 9286 + }, + { + "epoch": 1.48, + "learning_rate": 4.1288763394599605e-05, + "loss": 0.9389, + "step": 9287 + }, + { + "epoch": 1.48, + "learning_rate": 4.128680618420768e-05, + "loss": 0.8827, + "step": 9288 + }, + { + "epoch": 1.48, + "learning_rate": 4.128484880037115e-05, + "loss": 0.9453, + "step": 9289 + }, + { + "epoch": 1.48, + "learning_rate": 4.128289124311087e-05, + "loss": 0.9048, + "step": 9290 + }, + { + "epoch": 1.48, + "learning_rate": 4.1280933512447686e-05, + "loss": 0.9006, + "step": 9291 + }, + { + "epoch": 1.48, + "learning_rate": 4.1278975608402435e-05, + "loss": 0.9178, + "step": 9292 + }, + { + "epoch": 1.48, + "learning_rate": 4.127701753099599e-05, + "loss": 0.9245, + "step": 9293 + }, + { + "epoch": 1.48, + "learning_rate": 4.127505928024919e-05, + "loss": 0.8905, + "step": 9294 + }, + { + "epoch": 1.48, + "learning_rate": 4.127310085618288e-05, + "loss": 0.8949, + "step": 9295 + }, + { + "epoch": 1.48, + "learning_rate": 4.127114225881793e-05, + "loss": 0.8921, + "step": 9296 + }, + { + "epoch": 1.48, + "learning_rate": 4.126918348817519e-05, + "loss": 0.9155, + "step": 9297 + }, + { + "epoch": 1.48, + "learning_rate": 4.126722454427553e-05, + "loss": 0.9215, + "step": 9298 + }, + { + "epoch": 1.48, + "learning_rate": 4.12652654271398e-05, + "loss": 0.8884, + "step": 9299 + }, + { + "epoch": 1.48, + "learning_rate": 4.1263306136788873e-05, + "loss": 0.8374, + "step": 9300 + }, + { + "epoch": 1.48, + "learning_rate": 4.1261346673243596e-05, + "loss": 0.8595, + "step": 9301 + }, + { + "epoch": 1.48, + "learning_rate": 4.125938703652486e-05, + "loss": 0.8652, + "step": 9302 + }, + { + "epoch": 1.48, + "learning_rate": 4.1257427226653524e-05, + "loss": 0.8301, + "step": 9303 + }, + { + "epoch": 1.48, + "learning_rate": 4.125546724365045e-05, + "loss": 0.8446, + "step": 9304 + }, + { + "epoch": 1.48, + "learning_rate": 4.125350708753654e-05, + "loss": 0.8287, + "step": 9305 + }, + { + "epoch": 1.48, + "learning_rate": 4.125154675833263e-05, + "loss": 0.9176, + "step": 9306 + }, + { + "epoch": 1.48, + "learning_rate": 4.124958625605961e-05, + "loss": 0.8416, + "step": 9307 + }, + { + "epoch": 1.48, + "learning_rate": 4.124762558073837e-05, + "loss": 0.9147, + "step": 9308 + }, + { + "epoch": 1.48, + "learning_rate": 4.1245664732389774e-05, + "loss": 0.7853, + "step": 9309 + }, + { + "epoch": 1.48, + "learning_rate": 4.124370371103472e-05, + "loss": 0.8997, + "step": 9310 + }, + { + "epoch": 1.48, + "learning_rate": 4.124174251669408e-05, + "loss": 0.813, + "step": 9311 + }, + { + "epoch": 1.48, + "learning_rate": 4.123978114938874e-05, + "loss": 0.9207, + "step": 9312 + }, + { + "epoch": 1.48, + "learning_rate": 4.1237819609139596e-05, + "loss": 0.8362, + "step": 9313 + }, + { + "epoch": 1.48, + "learning_rate": 4.123585789596752e-05, + "loss": 0.8649, + "step": 9314 + }, + { + "epoch": 1.48, + "learning_rate": 4.123389600989343e-05, + "loss": 0.8507, + "step": 9315 + }, + { + "epoch": 1.48, + "learning_rate": 4.123193395093819e-05, + "loss": 0.8556, + "step": 9316 + }, + { + "epoch": 1.48, + "learning_rate": 4.1229971719122716e-05, + "loss": 0.8744, + "step": 9317 + }, + { + "epoch": 1.48, + "learning_rate": 4.12280093144679e-05, + "loss": 0.8313, + "step": 9318 + }, + { + "epoch": 1.48, + "learning_rate": 4.1226046736994627e-05, + "loss": 0.9006, + "step": 9319 + }, + { + "epoch": 1.48, + "learning_rate": 4.1224083986723816e-05, + "loss": 0.9436, + "step": 9320 + }, + { + "epoch": 1.48, + "learning_rate": 4.1222121063676356e-05, + "loss": 0.9154, + "step": 9321 + }, + { + "epoch": 1.49, + "learning_rate": 4.122015796787315e-05, + "loss": 0.7701, + "step": 9322 + }, + { + "epoch": 1.49, + "learning_rate": 4.121819469933512e-05, + "loss": 0.8665, + "step": 9323 + }, + { + "epoch": 1.49, + "learning_rate": 4.121623125808316e-05, + "loss": 0.874, + "step": 9324 + }, + { + "epoch": 1.49, + "learning_rate": 4.1214267644138184e-05, + "loss": 0.8433, + "step": 9325 + }, + { + "epoch": 1.49, + "learning_rate": 4.121230385752109e-05, + "loss": 0.8889, + "step": 9326 + }, + { + "epoch": 1.49, + "learning_rate": 4.1210339898252814e-05, + "loss": 0.8759, + "step": 9327 + }, + { + "epoch": 1.49, + "learning_rate": 4.120837576635426e-05, + "loss": 0.9104, + "step": 9328 + }, + { + "epoch": 1.49, + "learning_rate": 4.1206411461846337e-05, + "loss": 0.8768, + "step": 9329 + }, + { + "epoch": 1.49, + "learning_rate": 4.1204446984749975e-05, + "loss": 0.8547, + "step": 9330 + }, + { + "epoch": 1.49, + "learning_rate": 4.120248233508609e-05, + "loss": 0.8108, + "step": 9331 + }, + { + "epoch": 1.49, + "learning_rate": 4.12005175128756e-05, + "loss": 0.8881, + "step": 9332 + }, + { + "epoch": 1.49, + "learning_rate": 4.119855251813944e-05, + "loss": 0.894, + "step": 9333 + }, + { + "epoch": 1.49, + "learning_rate": 4.119658735089853e-05, + "loss": 0.8599, + "step": 9334 + }, + { + "epoch": 1.49, + "learning_rate": 4.11946220111738e-05, + "loss": 0.8366, + "step": 9335 + }, + { + "epoch": 1.49, + "learning_rate": 4.119265649898617e-05, + "loss": 0.8424, + "step": 9336 + }, + { + "epoch": 1.49, + "learning_rate": 4.119069081435659e-05, + "loss": 0.9146, + "step": 9337 + }, + { + "epoch": 1.49, + "learning_rate": 4.118872495730597e-05, + "loss": 0.934, + "step": 9338 + }, + { + "epoch": 1.49, + "learning_rate": 4.118675892785527e-05, + "loss": 0.8962, + "step": 9339 + }, + { + "epoch": 1.49, + "learning_rate": 4.1184792726025404e-05, + "loss": 0.8867, + "step": 9340 + }, + { + "epoch": 1.49, + "learning_rate": 4.1182826351837326e-05, + "loss": 0.8839, + "step": 9341 + }, + { + "epoch": 1.49, + "learning_rate": 4.118085980531197e-05, + "loss": 0.9224, + "step": 9342 + }, + { + "epoch": 1.49, + "learning_rate": 4.117889308647028e-05, + "loss": 0.8505, + "step": 9343 + }, + { + "epoch": 1.49, + "learning_rate": 4.117692619533321e-05, + "loss": 0.8469, + "step": 9344 + }, + { + "epoch": 1.49, + "learning_rate": 4.117495913192169e-05, + "loss": 0.883, + "step": 9345 + }, + { + "epoch": 1.49, + "learning_rate": 4.117299189625668e-05, + "loss": 0.9339, + "step": 9346 + }, + { + "epoch": 1.49, + "learning_rate": 4.1171024488359124e-05, + "loss": 0.8575, + "step": 9347 + }, + { + "epoch": 1.49, + "learning_rate": 4.116905690824997e-05, + "loss": 0.7922, + "step": 9348 + }, + { + "epoch": 1.49, + "learning_rate": 4.116708915595018e-05, + "loss": 0.8099, + "step": 9349 + }, + { + "epoch": 1.49, + "learning_rate": 4.11651212314807e-05, + "loss": 0.9375, + "step": 9350 + }, + { + "epoch": 1.49, + "learning_rate": 4.1163153134862496e-05, + "loss": 0.8437, + "step": 9351 + }, + { + "epoch": 1.49, + "learning_rate": 4.116118486611652e-05, + "loss": 0.8413, + "step": 9352 + }, + { + "epoch": 1.49, + "learning_rate": 4.115921642526375e-05, + "loss": 0.7952, + "step": 9353 + }, + { + "epoch": 1.49, + "learning_rate": 4.115724781232513e-05, + "loss": 0.8705, + "step": 9354 + }, + { + "epoch": 1.49, + "learning_rate": 4.115527902732162e-05, + "loss": 0.8565, + "step": 9355 + }, + { + "epoch": 1.49, + "learning_rate": 4.115331007027421e-05, + "loss": 0.9005, + "step": 9356 + }, + { + "epoch": 1.49, + "learning_rate": 4.115134094120385e-05, + "loss": 0.8322, + "step": 9357 + }, + { + "epoch": 1.49, + "learning_rate": 4.114937164013152e-05, + "loss": 0.816, + "step": 9358 + }, + { + "epoch": 1.49, + "learning_rate": 4.1147402167078174e-05, + "loss": 0.8395, + "step": 9359 + }, + { + "epoch": 1.49, + "learning_rate": 4.114543252206481e-05, + "loss": 0.852, + "step": 9360 + }, + { + "epoch": 1.49, + "learning_rate": 4.114346270511238e-05, + "loss": 0.8107, + "step": 9361 + }, + { + "epoch": 1.49, + "learning_rate": 4.114149271624188e-05, + "loss": 0.8361, + "step": 9362 + }, + { + "epoch": 1.49, + "learning_rate": 4.113952255547429e-05, + "loss": 0.8666, + "step": 9363 + }, + { + "epoch": 1.49, + "learning_rate": 4.113755222283057e-05, + "loss": 0.8844, + "step": 9364 + }, + { + "epoch": 1.49, + "learning_rate": 4.113558171833172e-05, + "loss": 0.7994, + "step": 9365 + }, + { + "epoch": 1.49, + "learning_rate": 4.1133611041998725e-05, + "loss": 0.889, + "step": 9366 + }, + { + "epoch": 1.49, + "learning_rate": 4.1131640193852564e-05, + "loss": 0.9426, + "step": 9367 + }, + { + "epoch": 1.49, + "learning_rate": 4.112966917391423e-05, + "loss": 0.8371, + "step": 9368 + }, + { + "epoch": 1.49, + "learning_rate": 4.112769798220471e-05, + "loss": 0.7746, + "step": 9369 + }, + { + "epoch": 1.49, + "learning_rate": 4.1125726618745e-05, + "loss": 0.8931, + "step": 9370 + }, + { + "epoch": 1.49, + "learning_rate": 4.1123755083556085e-05, + "loss": 0.8516, + "step": 9371 + }, + { + "epoch": 1.49, + "learning_rate": 4.1121783376658974e-05, + "loss": 0.8511, + "step": 9372 + }, + { + "epoch": 1.49, + "learning_rate": 4.1119811498074654e-05, + "loss": 0.9115, + "step": 9373 + }, + { + "epoch": 1.49, + "learning_rate": 4.111783944782414e-05, + "loss": 0.93, + "step": 9374 + }, + { + "epoch": 1.49, + "learning_rate": 4.111586722592841e-05, + "loss": 0.84, + "step": 9375 + }, + { + "epoch": 1.49, + "learning_rate": 4.1113894832408486e-05, + "loss": 0.8793, + "step": 9376 + }, + { + "epoch": 1.49, + "learning_rate": 4.1111922267285355e-05, + "loss": 0.9373, + "step": 9377 + }, + { + "epoch": 1.49, + "learning_rate": 4.110994953058004e-05, + "loss": 0.7702, + "step": 9378 + }, + { + "epoch": 1.49, + "learning_rate": 4.1107976622313535e-05, + "loss": 0.7988, + "step": 9379 + }, + { + "epoch": 1.49, + "learning_rate": 4.1106003542506867e-05, + "loss": 0.8825, + "step": 9380 + }, + { + "epoch": 1.49, + "learning_rate": 4.110403029118104e-05, + "loss": 0.8817, + "step": 9381 + }, + { + "epoch": 1.49, + "learning_rate": 4.110205686835706e-05, + "loss": 0.8338, + "step": 9382 + }, + { + "epoch": 1.49, + "learning_rate": 4.1100083274055955e-05, + "loss": 0.8994, + "step": 9383 + }, + { + "epoch": 1.49, + "learning_rate": 4.109810950829873e-05, + "loss": 0.8444, + "step": 9384 + }, + { + "epoch": 1.5, + "learning_rate": 4.1096135571106425e-05, + "loss": 0.8652, + "step": 9385 + }, + { + "epoch": 1.5, + "learning_rate": 4.1094161462500044e-05, + "loss": 0.9061, + "step": 9386 + }, + { + "epoch": 1.5, + "learning_rate": 4.109218718250062e-05, + "loss": 0.8959, + "step": 9387 + }, + { + "epoch": 1.5, + "learning_rate": 4.109021273112916e-05, + "loss": 0.959, + "step": 9388 + }, + { + "epoch": 1.5, + "learning_rate": 4.108823810840671e-05, + "loss": 0.8933, + "step": 9389 + }, + { + "epoch": 1.5, + "learning_rate": 4.108626331435429e-05, + "loss": 1.0919, + "step": 9390 + }, + { + "epoch": 1.5, + "learning_rate": 4.1084288348992925e-05, + "loss": 0.7637, + "step": 9391 + }, + { + "epoch": 1.5, + "learning_rate": 4.108231321234367e-05, + "loss": 0.8716, + "step": 9392 + }, + { + "epoch": 1.5, + "learning_rate": 4.108033790442753e-05, + "loss": 0.9076, + "step": 9393 + }, + { + "epoch": 1.5, + "learning_rate": 4.107836242526556e-05, + "loss": 0.9347, + "step": 9394 + }, + { + "epoch": 1.5, + "learning_rate": 4.107638677487879e-05, + "loss": 0.88, + "step": 9395 + }, + { + "epoch": 1.5, + "learning_rate": 4.107441095328826e-05, + "loss": 0.9058, + "step": 9396 + }, + { + "epoch": 1.5, + "learning_rate": 4.1072434960515016e-05, + "loss": 0.9154, + "step": 9397 + }, + { + "epoch": 1.5, + "learning_rate": 4.107045879658009e-05, + "loss": 0.9153, + "step": 9398 + }, + { + "epoch": 1.5, + "learning_rate": 4.1068482461504545e-05, + "loss": 0.9249, + "step": 9399 + }, + { + "epoch": 1.5, + "learning_rate": 4.106650595530941e-05, + "loss": 0.7774, + "step": 9400 + }, + { + "epoch": 1.5, + "learning_rate": 4.106452927801574e-05, + "loss": 0.8549, + "step": 9401 + }, + { + "epoch": 1.5, + "learning_rate": 4.1062552429644585e-05, + "loss": 0.9044, + "step": 9402 + }, + { + "epoch": 1.5, + "learning_rate": 4.106057541021701e-05, + "loss": 0.7608, + "step": 9403 + }, + { + "epoch": 1.5, + "learning_rate": 4.105859821975404e-05, + "loss": 0.9171, + "step": 9404 + }, + { + "epoch": 1.5, + "learning_rate": 4.1056620858276765e-05, + "loss": 0.8686, + "step": 9405 + }, + { + "epoch": 1.5, + "learning_rate": 4.105464332580622e-05, + "loss": 0.9262, + "step": 9406 + }, + { + "epoch": 1.5, + "learning_rate": 4.105266562236346e-05, + "loss": 0.8577, + "step": 9407 + }, + { + "epoch": 1.5, + "learning_rate": 4.1050687747969565e-05, + "loss": 0.9042, + "step": 9408 + }, + { + "epoch": 1.5, + "learning_rate": 4.1048709702645594e-05, + "loss": 0.9671, + "step": 9409 + }, + { + "epoch": 1.5, + "learning_rate": 4.1046731486412606e-05, + "loss": 0.8005, + "step": 9410 + }, + { + "epoch": 1.5, + "learning_rate": 4.1044753099291664e-05, + "loss": 0.8479, + "step": 9411 + }, + { + "epoch": 1.5, + "learning_rate": 4.1042774541303854e-05, + "loss": 0.9559, + "step": 9412 + }, + { + "epoch": 1.5, + "learning_rate": 4.104079581247023e-05, + "loss": 0.9323, + "step": 9413 + }, + { + "epoch": 1.5, + "learning_rate": 4.103881691281187e-05, + "loss": 0.8557, + "step": 9414 + }, + { + "epoch": 1.5, + "learning_rate": 4.103683784234984e-05, + "loss": 0.86, + "step": 9415 + }, + { + "epoch": 1.5, + "learning_rate": 4.103485860110523e-05, + "loss": 0.9566, + "step": 9416 + }, + { + "epoch": 1.5, + "learning_rate": 4.103287918909911e-05, + "loss": 0.8975, + "step": 9417 + }, + { + "epoch": 1.5, + "learning_rate": 4.103089960635256e-05, + "loss": 0.9346, + "step": 9418 + }, + { + "epoch": 1.5, + "learning_rate": 4.102891985288666e-05, + "loss": 0.7968, + "step": 9419 + }, + { + "epoch": 1.5, + "learning_rate": 4.10269399287225e-05, + "loss": 0.8381, + "step": 9420 + }, + { + "epoch": 1.5, + "learning_rate": 4.102495983388116e-05, + "loss": 0.8995, + "step": 9421 + }, + { + "epoch": 1.5, + "learning_rate": 4.102297956838371e-05, + "loss": 0.8157, + "step": 9422 + }, + { + "epoch": 1.5, + "learning_rate": 4.102099913225127e-05, + "loss": 0.8483, + "step": 9423 + }, + { + "epoch": 1.5, + "learning_rate": 4.1019018525504916e-05, + "loss": 0.9286, + "step": 9424 + }, + { + "epoch": 1.5, + "learning_rate": 4.101703774816574e-05, + "loss": 0.959, + "step": 9425 + }, + { + "epoch": 1.5, + "learning_rate": 4.1015056800254834e-05, + "loss": 0.9451, + "step": 9426 + }, + { + "epoch": 1.5, + "learning_rate": 4.10130756817933e-05, + "loss": 0.8481, + "step": 9427 + }, + { + "epoch": 1.5, + "learning_rate": 4.101109439280223e-05, + "loss": 0.943, + "step": 9428 + }, + { + "epoch": 1.5, + "learning_rate": 4.100911293330272e-05, + "loss": 0.8732, + "step": 9429 + }, + { + "epoch": 1.5, + "learning_rate": 4.1007131303315885e-05, + "loss": 0.8888, + "step": 9430 + }, + { + "epoch": 1.5, + "learning_rate": 4.100514950286281e-05, + "loss": 0.8903, + "step": 9431 + }, + { + "epoch": 1.5, + "learning_rate": 4.100316753196461e-05, + "loss": 0.8678, + "step": 9432 + }, + { + "epoch": 1.5, + "learning_rate": 4.10011853906424e-05, + "loss": 0.9163, + "step": 9433 + }, + { + "epoch": 1.5, + "learning_rate": 4.099920307891728e-05, + "loss": 0.8966, + "step": 9434 + }, + { + "epoch": 1.5, + "learning_rate": 4.099722059681036e-05, + "loss": 0.8298, + "step": 9435 + }, + { + "epoch": 1.5, + "learning_rate": 4.0995237944342746e-05, + "loss": 0.8452, + "step": 9436 + }, + { + "epoch": 1.5, + "learning_rate": 4.099325512153556e-05, + "loss": 0.9, + "step": 9437 + }, + { + "epoch": 1.5, + "learning_rate": 4.099127212840992e-05, + "loss": 0.8456, + "step": 9438 + }, + { + "epoch": 1.5, + "learning_rate": 4.098928896498694e-05, + "loss": 0.9801, + "step": 9439 + }, + { + "epoch": 1.5, + "learning_rate": 4.098730563128774e-05, + "loss": 0.8876, + "step": 9440 + }, + { + "epoch": 1.5, + "learning_rate": 4.098532212733344e-05, + "loss": 0.9147, + "step": 9441 + }, + { + "epoch": 1.5, + "learning_rate": 4.0983338453145166e-05, + "loss": 0.9293, + "step": 9442 + }, + { + "epoch": 1.5, + "learning_rate": 4.098135460874404e-05, + "loss": 0.8221, + "step": 9443 + }, + { + "epoch": 1.5, + "learning_rate": 4.097937059415119e-05, + "loss": 0.9419, + "step": 9444 + }, + { + "epoch": 1.5, + "learning_rate": 4.097738640938774e-05, + "loss": 0.8254, + "step": 9445 + }, + { + "epoch": 1.5, + "learning_rate": 4.097540205447483e-05, + "loss": 0.8837, + "step": 9446 + }, + { + "epoch": 1.51, + "learning_rate": 4.097341752943359e-05, + "loss": 0.9103, + "step": 9447 + }, + { + "epoch": 1.51, + "learning_rate": 4.097143283428515e-05, + "loss": 0.8597, + "step": 9448 + }, + { + "epoch": 1.51, + "learning_rate": 4.096944796905064e-05, + "loss": 0.8605, + "step": 9449 + }, + { + "epoch": 1.51, + "learning_rate": 4.096746293375121e-05, + "loss": 0.8758, + "step": 9450 + }, + { + "epoch": 1.51, + "learning_rate": 4.0965477728408e-05, + "loss": 0.897, + "step": 9451 + }, + { + "epoch": 1.51, + "learning_rate": 4.096349235304213e-05, + "loss": 0.9602, + "step": 9452 + }, + { + "epoch": 1.51, + "learning_rate": 4.096150680767476e-05, + "loss": 0.8349, + "step": 9453 + }, + { + "epoch": 1.51, + "learning_rate": 4.095952109232705e-05, + "loss": 0.9491, + "step": 9454 + }, + { + "epoch": 1.51, + "learning_rate": 4.0957535207020115e-05, + "loss": 0.8606, + "step": 9455 + }, + { + "epoch": 1.51, + "learning_rate": 4.0955549151775116e-05, + "loss": 0.8678, + "step": 9456 + }, + { + "epoch": 1.51, + "learning_rate": 4.095356292661322e-05, + "loss": 0.8861, + "step": 9457 + }, + { + "epoch": 1.51, + "learning_rate": 4.095157653155555e-05, + "loss": 0.8849, + "step": 9458 + }, + { + "epoch": 1.51, + "learning_rate": 4.094958996662328e-05, + "loss": 0.8326, + "step": 9459 + }, + { + "epoch": 1.51, + "learning_rate": 4.094760323183756e-05, + "loss": 0.9347, + "step": 9460 + }, + { + "epoch": 1.51, + "learning_rate": 4.094561632721955e-05, + "loss": 0.8946, + "step": 9461 + }, + { + "epoch": 1.51, + "learning_rate": 4.094362925279041e-05, + "loss": 0.9043, + "step": 9462 + }, + { + "epoch": 1.51, + "learning_rate": 4.094164200857128e-05, + "loss": 0.7448, + "step": 9463 + }, + { + "epoch": 1.51, + "learning_rate": 4.0939654594583354e-05, + "loss": 0.9327, + "step": 9464 + }, + { + "epoch": 1.51, + "learning_rate": 4.0937667010847784e-05, + "loss": 0.9144, + "step": 9465 + }, + { + "epoch": 1.51, + "learning_rate": 4.093567925738574e-05, + "loss": 0.8494, + "step": 9466 + }, + { + "epoch": 1.51, + "learning_rate": 4.0933691334218384e-05, + "loss": 0.849, + "step": 9467 + }, + { + "epoch": 1.51, + "learning_rate": 4.0931703241366885e-05, + "loss": 0.9006, + "step": 9468 + }, + { + "epoch": 1.51, + "learning_rate": 4.0929714978852416e-05, + "loss": 0.8161, + "step": 9469 + }, + { + "epoch": 1.51, + "learning_rate": 4.092772654669617e-05, + "loss": 0.8906, + "step": 9470 + }, + { + "epoch": 1.51, + "learning_rate": 4.092573794491928e-05, + "loss": 0.9067, + "step": 9471 + }, + { + "epoch": 1.51, + "learning_rate": 4.0923749173542966e-05, + "loss": 0.8857, + "step": 9472 + }, + { + "epoch": 1.51, + "learning_rate": 4.09217602325884e-05, + "loss": 0.9145, + "step": 9473 + }, + { + "epoch": 1.51, + "learning_rate": 4.0919771122076734e-05, + "loss": 0.8407, + "step": 9474 + }, + { + "epoch": 1.51, + "learning_rate": 4.0917781842029176e-05, + "loss": 0.8865, + "step": 9475 + }, + { + "epoch": 1.51, + "learning_rate": 4.091579239246692e-05, + "loss": 0.8743, + "step": 9476 + }, + { + "epoch": 1.51, + "learning_rate": 4.091380277341113e-05, + "loss": 0.8984, + "step": 9477 + }, + { + "epoch": 1.51, + "learning_rate": 4.091181298488299e-05, + "loss": 0.8477, + "step": 9478 + }, + { + "epoch": 1.51, + "learning_rate": 4.090982302690371e-05, + "loss": 0.946, + "step": 9479 + }, + { + "epoch": 1.51, + "learning_rate": 4.090783289949448e-05, + "loss": 0.8646, + "step": 9480 + }, + { + "epoch": 1.51, + "learning_rate": 4.0905842602676484e-05, + "loss": 0.8653, + "step": 9481 + }, + { + "epoch": 1.51, + "learning_rate": 4.090385213647091e-05, + "loss": 0.8013, + "step": 9482 + }, + { + "epoch": 1.51, + "learning_rate": 4.090186150089898e-05, + "loss": 0.8805, + "step": 9483 + }, + { + "epoch": 1.51, + "learning_rate": 4.0899870695981876e-05, + "loss": 0.8393, + "step": 9484 + }, + { + "epoch": 1.51, + "learning_rate": 4.089787972174079e-05, + "loss": 0.9215, + "step": 9485 + }, + { + "epoch": 1.51, + "learning_rate": 4.089588857819696e-05, + "loss": 0.9003, + "step": 9486 + }, + { + "epoch": 1.51, + "learning_rate": 4.0893897265371557e-05, + "loss": 0.9233, + "step": 9487 + }, + { + "epoch": 1.51, + "learning_rate": 4.089190578328579e-05, + "loss": 0.8707, + "step": 9488 + }, + { + "epoch": 1.51, + "learning_rate": 4.0889914131960884e-05, + "loss": 0.9163, + "step": 9489 + }, + { + "epoch": 1.51, + "learning_rate": 4.088792231141804e-05, + "loss": 0.8935, + "step": 9490 + }, + { + "epoch": 1.51, + "learning_rate": 4.088593032167846e-05, + "loss": 0.8835, + "step": 9491 + }, + { + "epoch": 1.51, + "learning_rate": 4.088393816276337e-05, + "loss": 0.8432, + "step": 9492 + }, + { + "epoch": 1.51, + "learning_rate": 4.0881945834693977e-05, + "loss": 0.7845, + "step": 9493 + }, + { + "epoch": 1.51, + "learning_rate": 4.087995333749151e-05, + "loss": 0.8075, + "step": 9494 + }, + { + "epoch": 1.51, + "learning_rate": 4.0877960671177186e-05, + "loss": 0.9417, + "step": 9495 + }, + { + "epoch": 1.51, + "learning_rate": 4.087596783577221e-05, + "loss": 0.8562, + "step": 9496 + }, + { + "epoch": 1.51, + "learning_rate": 4.0873974831297824e-05, + "loss": 0.9082, + "step": 9497 + }, + { + "epoch": 1.51, + "learning_rate": 4.087198165777524e-05, + "loss": 0.8946, + "step": 9498 + }, + { + "epoch": 1.51, + "learning_rate": 4.0869988315225684e-05, + "loss": 0.878, + "step": 9499 + }, + { + "epoch": 1.51, + "learning_rate": 4.086799480367039e-05, + "loss": 0.7567, + "step": 9500 + }, + { + "epoch": 1.51, + "learning_rate": 4.0866001123130585e-05, + "loss": 0.8837, + "step": 9501 + }, + { + "epoch": 1.51, + "learning_rate": 4.08640072736275e-05, + "loss": 0.814, + "step": 9502 + }, + { + "epoch": 1.51, + "learning_rate": 4.086201325518237e-05, + "loss": 0.9718, + "step": 9503 + }, + { + "epoch": 1.51, + "learning_rate": 4.086001906781642e-05, + "loss": 0.8289, + "step": 9504 + }, + { + "epoch": 1.51, + "learning_rate": 4.0858024711550915e-05, + "loss": 0.7887, + "step": 9505 + }, + { + "epoch": 1.51, + "learning_rate": 4.0856030186407054e-05, + "loss": 0.7463, + "step": 9506 + }, + { + "epoch": 1.51, + "learning_rate": 4.085403549240612e-05, + "loss": 0.835, + "step": 9507 + }, + { + "epoch": 1.51, + "learning_rate": 4.085204062956932e-05, + "loss": 0.9265, + "step": 9508 + }, + { + "epoch": 1.51, + "learning_rate": 4.085004559791791e-05, + "loss": 0.8311, + "step": 9509 + }, + { + "epoch": 1.52, + "learning_rate": 4.0848050397473134e-05, + "loss": 0.8625, + "step": 9510 + }, + { + "epoch": 1.52, + "learning_rate": 4.0846055028256245e-05, + "loss": 0.8223, + "step": 9511 + }, + { + "epoch": 1.52, + "learning_rate": 4.08440594902885e-05, + "loss": 0.8227, + "step": 9512 + }, + { + "epoch": 1.52, + "learning_rate": 4.0842063783591134e-05, + "loss": 0.8353, + "step": 9513 + }, + { + "epoch": 1.52, + "learning_rate": 4.0840067908185404e-05, + "loss": 0.9617, + "step": 9514 + }, + { + "epoch": 1.52, + "learning_rate": 4.083807186409257e-05, + "loss": 0.8489, + "step": 9515 + }, + { + "epoch": 1.52, + "learning_rate": 4.0836075651333884e-05, + "loss": 0.8411, + "step": 9516 + }, + { + "epoch": 1.52, + "learning_rate": 4.0834079269930615e-05, + "loss": 0.9282, + "step": 9517 + }, + { + "epoch": 1.52, + "learning_rate": 4.083208271990401e-05, + "loss": 0.9216, + "step": 9518 + }, + { + "epoch": 1.52, + "learning_rate": 4.083008600127533e-05, + "loss": 0.8047, + "step": 9519 + }, + { + "epoch": 1.52, + "learning_rate": 4.082808911406585e-05, + "loss": 0.9158, + "step": 9520 + }, + { + "epoch": 1.52, + "learning_rate": 4.082609205829683e-05, + "loss": 0.8504, + "step": 9521 + }, + { + "epoch": 1.52, + "learning_rate": 4.082409483398953e-05, + "loss": 0.856, + "step": 9522 + }, + { + "epoch": 1.52, + "learning_rate": 4.082209744116524e-05, + "loss": 0.8971, + "step": 9523 + }, + { + "epoch": 1.52, + "learning_rate": 4.082009987984521e-05, + "loss": 0.8523, + "step": 9524 + }, + { + "epoch": 1.52, + "learning_rate": 4.081810215005073e-05, + "loss": 0.9336, + "step": 9525 + }, + { + "epoch": 1.52, + "learning_rate": 4.0816104251803056e-05, + "loss": 0.9076, + "step": 9526 + }, + { + "epoch": 1.52, + "learning_rate": 4.081410618512348e-05, + "loss": 0.8462, + "step": 9527 + }, + { + "epoch": 1.52, + "learning_rate": 4.081210795003327e-05, + "loss": 0.805, + "step": 9528 + }, + { + "epoch": 1.52, + "learning_rate": 4.081010954655371e-05, + "loss": 0.8533, + "step": 9529 + }, + { + "epoch": 1.52, + "learning_rate": 4.080811097470608e-05, + "loss": 0.8929, + "step": 9530 + }, + { + "epoch": 1.52, + "learning_rate": 4.080611223451167e-05, + "loss": 0.9025, + "step": 9531 + }, + { + "epoch": 1.52, + "learning_rate": 4.080411332599176e-05, + "loss": 0.8363, + "step": 9532 + }, + { + "epoch": 1.52, + "learning_rate": 4.080211424916764e-05, + "loss": 0.8271, + "step": 9533 + }, + { + "epoch": 1.52, + "learning_rate": 4.080011500406059e-05, + "loss": 0.9458, + "step": 9534 + }, + { + "epoch": 1.52, + "learning_rate": 4.079811559069192e-05, + "loss": 0.8927, + "step": 9535 + }, + { + "epoch": 1.52, + "learning_rate": 4.0796116009082895e-05, + "loss": 0.8363, + "step": 9536 + }, + { + "epoch": 1.52, + "learning_rate": 4.079411625925484e-05, + "loss": 0.9304, + "step": 9537 + }, + { + "epoch": 1.52, + "learning_rate": 4.079211634122903e-05, + "loss": 0.8404, + "step": 9538 + }, + { + "epoch": 1.52, + "learning_rate": 4.079011625502676e-05, + "loss": 0.8496, + "step": 9539 + }, + { + "epoch": 1.52, + "learning_rate": 4.0788116000669346e-05, + "loss": 0.8236, + "step": 9540 + }, + { + "epoch": 1.52, + "learning_rate": 4.078611557817808e-05, + "loss": 0.9525, + "step": 9541 + }, + { + "epoch": 1.52, + "learning_rate": 4.0784114987574266e-05, + "loss": 0.9106, + "step": 9542 + }, + { + "epoch": 1.52, + "learning_rate": 4.078211422887921e-05, + "loss": 0.9456, + "step": 9543 + }, + { + "epoch": 1.52, + "learning_rate": 4.078011330211422e-05, + "loss": 0.9274, + "step": 9544 + }, + { + "epoch": 1.52, + "learning_rate": 4.077811220730061e-05, + "loss": 0.8226, + "step": 9545 + }, + { + "epoch": 1.52, + "learning_rate": 4.077611094445968e-05, + "loss": 0.9124, + "step": 9546 + }, + { + "epoch": 1.52, + "learning_rate": 4.0774109513612744e-05, + "loss": 0.8798, + "step": 9547 + }, + { + "epoch": 1.52, + "learning_rate": 4.077210791478112e-05, + "loss": 0.8503, + "step": 9548 + }, + { + "epoch": 1.52, + "learning_rate": 4.077010614798612e-05, + "loss": 0.8793, + "step": 9549 + }, + { + "epoch": 1.52, + "learning_rate": 4.076810421324906e-05, + "loss": 0.8543, + "step": 9550 + }, + { + "epoch": 1.52, + "learning_rate": 4.076610211059127e-05, + "loss": 0.9172, + "step": 9551 + }, + { + "epoch": 1.52, + "learning_rate": 4.0764099840034065e-05, + "loss": 0.955, + "step": 9552 + }, + { + "epoch": 1.52, + "learning_rate": 4.076209740159876e-05, + "loss": 0.8599, + "step": 9553 + }, + { + "epoch": 1.52, + "learning_rate": 4.076009479530669e-05, + "loss": 0.838, + "step": 9554 + }, + { + "epoch": 1.52, + "learning_rate": 4.075809202117918e-05, + "loss": 0.8256, + "step": 9555 + }, + { + "epoch": 1.52, + "learning_rate": 4.075608907923756e-05, + "loss": 0.8673, + "step": 9556 + }, + { + "epoch": 1.52, + "learning_rate": 4.075408596950315e-05, + "loss": 0.859, + "step": 9557 + }, + { + "epoch": 1.52, + "learning_rate": 4.07520826919973e-05, + "loss": 0.8569, + "step": 9558 + }, + { + "epoch": 1.52, + "learning_rate": 4.0750079246741325e-05, + "loss": 0.9312, + "step": 9559 + }, + { + "epoch": 1.52, + "learning_rate": 4.074807563375657e-05, + "loss": 0.9762, + "step": 9560 + }, + { + "epoch": 1.52, + "learning_rate": 4.074607185306437e-05, + "loss": 0.8463, + "step": 9561 + }, + { + "epoch": 1.52, + "learning_rate": 4.074406790468607e-05, + "loss": 0.8298, + "step": 9562 + }, + { + "epoch": 1.52, + "learning_rate": 4.074206378864299e-05, + "loss": 0.8603, + "step": 9563 + }, + { + "epoch": 1.52, + "learning_rate": 4.07400595049565e-05, + "loss": 0.8757, + "step": 9564 + }, + { + "epoch": 1.52, + "learning_rate": 4.073805505364793e-05, + "loss": 0.8007, + "step": 9565 + }, + { + "epoch": 1.52, + "learning_rate": 4.0736050434738624e-05, + "loss": 0.8658, + "step": 9566 + }, + { + "epoch": 1.52, + "learning_rate": 4.0734045648249944e-05, + "loss": 0.8991, + "step": 9567 + }, + { + "epoch": 1.52, + "learning_rate": 4.0732040694203225e-05, + "loss": 0.9088, + "step": 9568 + }, + { + "epoch": 1.52, + "learning_rate": 4.0730035572619826e-05, + "loss": 0.8687, + "step": 9569 + }, + { + "epoch": 1.52, + "learning_rate": 4.072803028352109e-05, + "loss": 1.0259, + "step": 9570 + }, + { + "epoch": 1.52, + "learning_rate": 4.072602482692839e-05, + "loss": 0.8901, + "step": 9571 + }, + { + "epoch": 1.52, + "learning_rate": 4.0724019202863066e-05, + "loss": 0.8774, + "step": 9572 + }, + { + "epoch": 1.53, + "learning_rate": 4.0722013411346483e-05, + "loss": 0.8757, + "step": 9573 + }, + { + "epoch": 1.53, + "learning_rate": 4.072000745240001e-05, + "loss": 0.8416, + "step": 9574 + }, + { + "epoch": 1.53, + "learning_rate": 4.0718001326045e-05, + "loss": 0.9127, + "step": 9575 + }, + { + "epoch": 1.53, + "learning_rate": 4.071599503230281e-05, + "loss": 0.8067, + "step": 9576 + }, + { + "epoch": 1.53, + "learning_rate": 4.0713988571194815e-05, + "loss": 0.9072, + "step": 9577 + }, + { + "epoch": 1.53, + "learning_rate": 4.071198194274239e-05, + "loss": 0.8602, + "step": 9578 + }, + { + "epoch": 1.53, + "learning_rate": 4.07099751469669e-05, + "loss": 0.9187, + "step": 9579 + }, + { + "epoch": 1.53, + "learning_rate": 4.0707968183889704e-05, + "loss": 0.9612, + "step": 9580 + }, + { + "epoch": 1.53, + "learning_rate": 4.0705961053532184e-05, + "loss": 0.781, + "step": 9581 + }, + { + "epoch": 1.53, + "learning_rate": 4.0703953755915716e-05, + "loss": 0.8883, + "step": 9582 + }, + { + "epoch": 1.53, + "learning_rate": 4.070194629106168e-05, + "loss": 0.8093, + "step": 9583 + }, + { + "epoch": 1.53, + "learning_rate": 4.069993865899144e-05, + "loss": 0.8556, + "step": 9584 + }, + { + "epoch": 1.53, + "learning_rate": 4.0697930859726384e-05, + "loss": 0.8218, + "step": 9585 + }, + { + "epoch": 1.53, + "learning_rate": 4.0695922893287897e-05, + "loss": 0.8916, + "step": 9586 + }, + { + "epoch": 1.53, + "learning_rate": 4.0693914759697364e-05, + "loss": 0.8279, + "step": 9587 + }, + { + "epoch": 1.53, + "learning_rate": 4.069190645897617e-05, + "loss": 0.8857, + "step": 9588 + }, + { + "epoch": 1.53, + "learning_rate": 4.068989799114569e-05, + "loss": 0.8097, + "step": 9589 + }, + { + "epoch": 1.53, + "learning_rate": 4.0687889356227326e-05, + "loss": 0.8112, + "step": 9590 + }, + { + "epoch": 1.53, + "learning_rate": 4.0685880554242465e-05, + "loss": 0.8381, + "step": 9591 + }, + { + "epoch": 1.53, + "learning_rate": 4.0683871585212494e-05, + "loss": 0.9057, + "step": 9592 + }, + { + "epoch": 1.53, + "learning_rate": 4.068186244915882e-05, + "loss": 0.8328, + "step": 9593 + }, + { + "epoch": 1.53, + "learning_rate": 4.067985314610283e-05, + "loss": 0.842, + "step": 9594 + }, + { + "epoch": 1.53, + "learning_rate": 4.0677843676065916e-05, + "loss": 0.7981, + "step": 9595 + }, + { + "epoch": 1.53, + "learning_rate": 4.067583403906949e-05, + "loss": 0.8685, + "step": 9596 + }, + { + "epoch": 1.53, + "learning_rate": 4.067382423513495e-05, + "loss": 0.8999, + "step": 9597 + }, + { + "epoch": 1.53, + "learning_rate": 4.06718142642837e-05, + "loss": 0.9102, + "step": 9598 + }, + { + "epoch": 1.53, + "learning_rate": 4.0669804126537147e-05, + "loss": 0.8429, + "step": 9599 + }, + { + "epoch": 1.53, + "learning_rate": 4.066779382191669e-05, + "loss": 0.8746, + "step": 9600 + }, + { + "epoch": 1.53, + "learning_rate": 4.0665783350443734e-05, + "loss": 0.8761, + "step": 9601 + }, + { + "epoch": 1.53, + "learning_rate": 4.0663772712139696e-05, + "loss": 0.9151, + "step": 9602 + }, + { + "epoch": 1.53, + "learning_rate": 4.0661761907026e-05, + "loss": 0.9416, + "step": 9603 + }, + { + "epoch": 1.53, + "learning_rate": 4.065975093512404e-05, + "loss": 0.834, + "step": 9604 + }, + { + "epoch": 1.53, + "learning_rate": 4.065773979645523e-05, + "loss": 0.8897, + "step": 9605 + }, + { + "epoch": 1.53, + "learning_rate": 4.065572849104101e-05, + "loss": 0.8153, + "step": 9606 + }, + { + "epoch": 1.53, + "learning_rate": 4.065371701890278e-05, + "loss": 0.8364, + "step": 9607 + }, + { + "epoch": 1.53, + "learning_rate": 4.065170538006198e-05, + "loss": 0.9208, + "step": 9608 + }, + { + "epoch": 1.53, + "learning_rate": 4.064969357454001e-05, + "loss": 0.9292, + "step": 9609 + }, + { + "epoch": 1.53, + "learning_rate": 4.06476816023583e-05, + "loss": 0.8348, + "step": 9610 + }, + { + "epoch": 1.53, + "learning_rate": 4.06456694635383e-05, + "loss": 0.8001, + "step": 9611 + }, + { + "epoch": 1.53, + "learning_rate": 4.06436571581014e-05, + "loss": 0.8976, + "step": 9612 + }, + { + "epoch": 1.53, + "learning_rate": 4.064164468606905e-05, + "loss": 0.8163, + "step": 9613 + }, + { + "epoch": 1.53, + "learning_rate": 4.063963204746268e-05, + "loss": 0.8599, + "step": 9614 + }, + { + "epoch": 1.53, + "learning_rate": 4.063761924230373e-05, + "loss": 0.8393, + "step": 9615 + }, + { + "epoch": 1.53, + "learning_rate": 4.063560627061363e-05, + "loss": 0.9189, + "step": 9616 + }, + { + "epoch": 1.53, + "learning_rate": 4.063359313241382e-05, + "loss": 0.8237, + "step": 9617 + }, + { + "epoch": 1.53, + "learning_rate": 4.063157982772573e-05, + "loss": 0.7922, + "step": 9618 + }, + { + "epoch": 1.53, + "learning_rate": 4.062956635657079e-05, + "loss": 0.9858, + "step": 9619 + }, + { + "epoch": 1.53, + "learning_rate": 4.0627552718970476e-05, + "loss": 0.8454, + "step": 9620 + }, + { + "epoch": 1.53, + "learning_rate": 4.06255389149462e-05, + "loss": 0.859, + "step": 9621 + }, + { + "epoch": 1.53, + "learning_rate": 4.0623524944519433e-05, + "loss": 0.8833, + "step": 9622 + }, + { + "epoch": 1.53, + "learning_rate": 4.06215108077116e-05, + "loss": 0.8371, + "step": 9623 + }, + { + "epoch": 1.53, + "learning_rate": 4.0619496504544176e-05, + "loss": 0.8313, + "step": 9624 + }, + { + "epoch": 1.53, + "learning_rate": 4.061748203503858e-05, + "loss": 0.8343, + "step": 9625 + }, + { + "epoch": 1.53, + "learning_rate": 4.061546739921629e-05, + "loss": 0.9463, + "step": 9626 + }, + { + "epoch": 1.53, + "learning_rate": 4.0613452597098755e-05, + "loss": 0.8759, + "step": 9627 + }, + { + "epoch": 1.53, + "learning_rate": 4.061143762870743e-05, + "loss": 0.8529, + "step": 9628 + }, + { + "epoch": 1.53, + "learning_rate": 4.060942249406377e-05, + "loss": 0.8251, + "step": 9629 + }, + { + "epoch": 1.53, + "learning_rate": 4.0607407193189234e-05, + "loss": 0.8831, + "step": 9630 + }, + { + "epoch": 1.53, + "learning_rate": 4.060539172610529e-05, + "loss": 0.9004, + "step": 9631 + }, + { + "epoch": 1.53, + "learning_rate": 4.0603376092833394e-05, + "loss": 0.8211, + "step": 9632 + }, + { + "epoch": 1.53, + "learning_rate": 4.060136029339502e-05, + "loss": 0.8244, + "step": 9633 + }, + { + "epoch": 1.53, + "learning_rate": 4.059934432781163e-05, + "loss": 0.8196, + "step": 9634 + }, + { + "epoch": 1.53, + "learning_rate": 4.059732819610469e-05, + "loss": 0.9072, + "step": 9635 + }, + { + "epoch": 1.54, + "learning_rate": 4.059531189829568e-05, + "loss": 0.8653, + "step": 9636 + }, + { + "epoch": 1.54, + "learning_rate": 4.059329543440607e-05, + "loss": 0.9118, + "step": 9637 + }, + { + "epoch": 1.54, + "learning_rate": 4.059127880445732e-05, + "loss": 0.7859, + "step": 9638 + }, + { + "epoch": 1.54, + "learning_rate": 4.0589262008470916e-05, + "loss": 0.8321, + "step": 9639 + }, + { + "epoch": 1.54, + "learning_rate": 4.058724504646834e-05, + "loss": 0.7792, + "step": 9640 + }, + { + "epoch": 1.54, + "learning_rate": 4.058522791847107e-05, + "loss": 0.8992, + "step": 9641 + }, + { + "epoch": 1.54, + "learning_rate": 4.0583210624500575e-05, + "loss": 0.9011, + "step": 9642 + }, + { + "epoch": 1.54, + "learning_rate": 4.058119316457836e-05, + "loss": 0.9245, + "step": 9643 + }, + { + "epoch": 1.54, + "learning_rate": 4.0579175538725895e-05, + "loss": 0.8657, + "step": 9644 + }, + { + "epoch": 1.54, + "learning_rate": 4.057715774696467e-05, + "loss": 0.7272, + "step": 9645 + }, + { + "epoch": 1.54, + "learning_rate": 4.057513978931616e-05, + "loss": 0.8513, + "step": 9646 + }, + { + "epoch": 1.54, + "learning_rate": 4.0573121665801876e-05, + "loss": 0.8149, + "step": 9647 + }, + { + "epoch": 1.54, + "learning_rate": 4.057110337644331e-05, + "loss": 0.9279, + "step": 9648 + }, + { + "epoch": 1.54, + "learning_rate": 4.0569084921261935e-05, + "loss": 0.7694, + "step": 9649 + }, + { + "epoch": 1.54, + "learning_rate": 4.056706630027926e-05, + "loss": 0.876, + "step": 9650 + }, + { + "epoch": 1.54, + "learning_rate": 4.056504751351677e-05, + "loss": 0.8038, + "step": 9651 + }, + { + "epoch": 1.54, + "learning_rate": 4.056302856099599e-05, + "loss": 0.8732, + "step": 9652 + }, + { + "epoch": 1.54, + "learning_rate": 4.0561009442738394e-05, + "loss": 0.9351, + "step": 9653 + }, + { + "epoch": 1.54, + "learning_rate": 4.0558990158765506e-05, + "loss": 0.9679, + "step": 9654 + }, + { + "epoch": 1.54, + "learning_rate": 4.0556970709098805e-05, + "loss": 0.8935, + "step": 9655 + }, + { + "epoch": 1.54, + "learning_rate": 4.055495109375982e-05, + "loss": 0.7498, + "step": 9656 + }, + { + "epoch": 1.54, + "learning_rate": 4.055293131277005e-05, + "loss": 0.7945, + "step": 9657 + }, + { + "epoch": 1.54, + "learning_rate": 4.055091136615099e-05, + "loss": 0.7761, + "step": 9658 + }, + { + "epoch": 1.54, + "learning_rate": 4.0548891253924183e-05, + "loss": 0.8209, + "step": 9659 + }, + { + "epoch": 1.54, + "learning_rate": 4.054687097611112e-05, + "loss": 0.8224, + "step": 9660 + }, + { + "epoch": 1.54, + "learning_rate": 4.054485053273332e-05, + "loss": 0.7903, + "step": 9661 + }, + { + "epoch": 1.54, + "learning_rate": 4.054282992381229e-05, + "loss": 0.9072, + "step": 9662 + }, + { + "epoch": 1.54, + "learning_rate": 4.0540809149369565e-05, + "loss": 0.8412, + "step": 9663 + }, + { + "epoch": 1.54, + "learning_rate": 4.053878820942666e-05, + "loss": 0.8783, + "step": 9664 + }, + { + "epoch": 1.54, + "learning_rate": 4.0536767104005086e-05, + "loss": 0.9038, + "step": 9665 + }, + { + "epoch": 1.54, + "learning_rate": 4.053474583312638e-05, + "loss": 0.8736, + "step": 9666 + }, + { + "epoch": 1.54, + "learning_rate": 4.0532724396812064e-05, + "loss": 0.8397, + "step": 9667 + }, + { + "epoch": 1.54, + "learning_rate": 4.053070279508366e-05, + "loss": 0.9675, + "step": 9668 + }, + { + "epoch": 1.54, + "learning_rate": 4.05286810279627e-05, + "loss": 0.9007, + "step": 9669 + }, + { + "epoch": 1.54, + "learning_rate": 4.052665909547072e-05, + "loss": 1.0249, + "step": 9670 + }, + { + "epoch": 1.54, + "learning_rate": 4.052463699762924e-05, + "loss": 0.8494, + "step": 9671 + }, + { + "epoch": 1.54, + "learning_rate": 4.0522614734459804e-05, + "loss": 0.8171, + "step": 9672 + }, + { + "epoch": 1.54, + "learning_rate": 4.052059230598395e-05, + "loss": 0.848, + "step": 9673 + }, + { + "epoch": 1.54, + "learning_rate": 4.0518569712223206e-05, + "loss": 0.9733, + "step": 9674 + }, + { + "epoch": 1.54, + "learning_rate": 4.051654695319911e-05, + "loss": 0.815, + "step": 9675 + }, + { + "epoch": 1.54, + "learning_rate": 4.051452402893322e-05, + "loss": 0.9053, + "step": 9676 + }, + { + "epoch": 1.54, + "learning_rate": 4.051250093944706e-05, + "loss": 0.8708, + "step": 9677 + }, + { + "epoch": 1.54, + "learning_rate": 4.051047768476219e-05, + "loss": 0.9164, + "step": 9678 + }, + { + "epoch": 1.54, + "learning_rate": 4.050845426490015e-05, + "loss": 0.8848, + "step": 9679 + }, + { + "epoch": 1.54, + "learning_rate": 4.0506430679882485e-05, + "loss": 0.8761, + "step": 9680 + }, + { + "epoch": 1.54, + "learning_rate": 4.050440692973074e-05, + "loss": 0.8499, + "step": 9681 + }, + { + "epoch": 1.54, + "learning_rate": 4.050238301446648e-05, + "loss": 0.8585, + "step": 9682 + }, + { + "epoch": 1.54, + "learning_rate": 4.050035893411126e-05, + "loss": 0.9514, + "step": 9683 + }, + { + "epoch": 1.54, + "learning_rate": 4.049833468868662e-05, + "loss": 0.9022, + "step": 9684 + }, + { + "epoch": 1.54, + "learning_rate": 4.0496310278214126e-05, + "loss": 0.7999, + "step": 9685 + }, + { + "epoch": 1.54, + "learning_rate": 4.0494285702715327e-05, + "loss": 0.8781, + "step": 9686 + }, + { + "epoch": 1.54, + "learning_rate": 4.0492260962211806e-05, + "loss": 0.8827, + "step": 9687 + }, + { + "epoch": 1.54, + "learning_rate": 4.04902360567251e-05, + "loss": 0.8315, + "step": 9688 + }, + { + "epoch": 1.54, + "learning_rate": 4.04882109862768e-05, + "loss": 0.8957, + "step": 9689 + }, + { + "epoch": 1.54, + "learning_rate": 4.048618575088844e-05, + "loss": 0.8386, + "step": 9690 + }, + { + "epoch": 1.54, + "learning_rate": 4.0484160350581616e-05, + "loss": 0.8509, + "step": 9691 + }, + { + "epoch": 1.54, + "learning_rate": 4.048213478537787e-05, + "loss": 0.7961, + "step": 9692 + }, + { + "epoch": 1.54, + "learning_rate": 4.04801090552988e-05, + "loss": 0.9272, + "step": 9693 + }, + { + "epoch": 1.54, + "learning_rate": 4.0478083160365964e-05, + "loss": 0.9099, + "step": 9694 + }, + { + "epoch": 1.54, + "learning_rate": 4.047605710060094e-05, + "loss": 0.9007, + "step": 9695 + }, + { + "epoch": 1.54, + "learning_rate": 4.04740308760253e-05, + "loss": 0.8043, + "step": 9696 + }, + { + "epoch": 1.54, + "learning_rate": 4.0472004486660624e-05, + "loss": 0.8726, + "step": 9697 + }, + { + "epoch": 1.55, + "learning_rate": 4.04699779325285e-05, + "loss": 0.8307, + "step": 9698 + }, + { + "epoch": 1.55, + "learning_rate": 4.04679512136505e-05, + "loss": 0.8288, + "step": 9699 + }, + { + "epoch": 1.55, + "learning_rate": 4.046592433004821e-05, + "loss": 0.8524, + "step": 9700 + }, + { + "epoch": 1.55, + "learning_rate": 4.046389728174322e-05, + "loss": 0.8012, + "step": 9701 + }, + { + "epoch": 1.55, + "learning_rate": 4.0461870068757104e-05, + "loss": 0.9186, + "step": 9702 + }, + { + "epoch": 1.55, + "learning_rate": 4.0459842691111465e-05, + "loss": 0.7866, + "step": 9703 + }, + { + "epoch": 1.55, + "learning_rate": 4.045781514882788e-05, + "loss": 0.7878, + "step": 9704 + }, + { + "epoch": 1.55, + "learning_rate": 4.045578744192796e-05, + "loss": 0.8587, + "step": 9705 + }, + { + "epoch": 1.55, + "learning_rate": 4.045375957043328e-05, + "loss": 0.9488, + "step": 9706 + }, + { + "epoch": 1.55, + "learning_rate": 4.045173153436544e-05, + "loss": 0.8339, + "step": 9707 + }, + { + "epoch": 1.55, + "learning_rate": 4.0449703333746046e-05, + "loss": 0.8135, + "step": 9708 + }, + { + "epoch": 1.55, + "learning_rate": 4.0447674968596695e-05, + "loss": 0.8245, + "step": 9709 + }, + { + "epoch": 1.55, + "learning_rate": 4.0445646438938975e-05, + "loss": 0.9494, + "step": 9710 + }, + { + "epoch": 1.55, + "learning_rate": 4.0443617744794505e-05, + "loss": 0.8752, + "step": 9711 + }, + { + "epoch": 1.55, + "learning_rate": 4.044158888618488e-05, + "loss": 0.8421, + "step": 9712 + }, + { + "epoch": 1.55, + "learning_rate": 4.04395598631317e-05, + "loss": 0.858, + "step": 9713 + }, + { + "epoch": 1.55, + "learning_rate": 4.043753067565659e-05, + "loss": 0.8706, + "step": 9714 + }, + { + "epoch": 1.55, + "learning_rate": 4.043550132378115e-05, + "loss": 0.8486, + "step": 9715 + }, + { + "epoch": 1.55, + "learning_rate": 4.043347180752699e-05, + "loss": 0.8285, + "step": 9716 + }, + { + "epoch": 1.55, + "learning_rate": 4.043144212691572e-05, + "loss": 0.8429, + "step": 9717 + }, + { + "epoch": 1.55, + "learning_rate": 4.042941228196897e-05, + "loss": 1.0005, + "step": 9718 + }, + { + "epoch": 1.55, + "learning_rate": 4.0427382272708345e-05, + "loss": 0.8022, + "step": 9719 + }, + { + "epoch": 1.55, + "learning_rate": 4.042535209915546e-05, + "loss": 0.8155, + "step": 9720 + }, + { + "epoch": 1.55, + "learning_rate": 4.042332176133194e-05, + "loss": 0.8311, + "step": 9721 + }, + { + "epoch": 1.55, + "learning_rate": 4.042129125925941e-05, + "loss": 0.8062, + "step": 9722 + }, + { + "epoch": 1.55, + "learning_rate": 4.041926059295949e-05, + "loss": 0.9424, + "step": 9723 + }, + { + "epoch": 1.55, + "learning_rate": 4.04172297624538e-05, + "loss": 0.97, + "step": 9724 + }, + { + "epoch": 1.55, + "learning_rate": 4.0415198767763986e-05, + "loss": 0.8551, + "step": 9725 + }, + { + "epoch": 1.55, + "learning_rate": 4.041316760891165e-05, + "loss": 0.7618, + "step": 9726 + }, + { + "epoch": 1.55, + "learning_rate": 4.0411136285918447e-05, + "loss": 0.8621, + "step": 9727 + }, + { + "epoch": 1.55, + "learning_rate": 4.0409104798806e-05, + "loss": 0.9069, + "step": 9728 + }, + { + "epoch": 1.55, + "learning_rate": 4.040707314759593e-05, + "loss": 0.8837, + "step": 9729 + }, + { + "epoch": 1.55, + "learning_rate": 4.040504133230989e-05, + "loss": 0.8022, + "step": 9730 + }, + { + "epoch": 1.55, + "learning_rate": 4.0403009352969516e-05, + "loss": 0.8194, + "step": 9731 + }, + { + "epoch": 1.55, + "learning_rate": 4.0400977209596445e-05, + "loss": 0.8478, + "step": 9732 + }, + { + "epoch": 1.55, + "learning_rate": 4.039894490221231e-05, + "loss": 0.8063, + "step": 9733 + }, + { + "epoch": 1.55, + "learning_rate": 4.039691243083876e-05, + "loss": 0.9351, + "step": 9734 + }, + { + "epoch": 1.55, + "learning_rate": 4.0394879795497455e-05, + "loss": 0.7515, + "step": 9735 + }, + { + "epoch": 1.55, + "learning_rate": 4.039284699621001e-05, + "loss": 0.9937, + "step": 9736 + }, + { + "epoch": 1.55, + "learning_rate": 4.03908140329981e-05, + "loss": 0.8121, + "step": 9737 + }, + { + "epoch": 1.55, + "learning_rate": 4.038878090588336e-05, + "loss": 0.837, + "step": 9738 + }, + { + "epoch": 1.55, + "learning_rate": 4.038674761488745e-05, + "loss": 0.9367, + "step": 9739 + }, + { + "epoch": 1.55, + "learning_rate": 4.038471416003202e-05, + "loss": 0.9742, + "step": 9740 + }, + { + "epoch": 1.55, + "learning_rate": 4.038268054133871e-05, + "loss": 0.8728, + "step": 9741 + }, + { + "epoch": 1.55, + "learning_rate": 4.038064675882921e-05, + "loss": 0.8735, + "step": 9742 + }, + { + "epoch": 1.55, + "learning_rate": 4.037861281252515e-05, + "loss": 0.856, + "step": 9743 + }, + { + "epoch": 1.55, + "learning_rate": 4.03765787024482e-05, + "loss": 0.8756, + "step": 9744 + }, + { + "epoch": 1.55, + "learning_rate": 4.037454442862003e-05, + "loss": 0.8526, + "step": 9745 + }, + { + "epoch": 1.55, + "learning_rate": 4.0372509991062285e-05, + "loss": 0.9499, + "step": 9746 + }, + { + "epoch": 1.55, + "learning_rate": 4.037047538979665e-05, + "loss": 0.8656, + "step": 9747 + }, + { + "epoch": 1.55, + "learning_rate": 4.0368440624844774e-05, + "loss": 0.9175, + "step": 9748 + }, + { + "epoch": 1.55, + "learning_rate": 4.036640569622835e-05, + "loss": 0.8876, + "step": 9749 + }, + { + "epoch": 1.55, + "learning_rate": 4.0364370603969024e-05, + "loss": 0.8826, + "step": 9750 + }, + { + "epoch": 1.55, + "learning_rate": 4.0362335348088475e-05, + "loss": 0.8305, + "step": 9751 + }, + { + "epoch": 1.55, + "learning_rate": 4.036029992860839e-05, + "loss": 0.8735, + "step": 9752 + }, + { + "epoch": 1.55, + "learning_rate": 4.0358264345550436e-05, + "loss": 0.8365, + "step": 9753 + }, + { + "epoch": 1.55, + "learning_rate": 4.035622859893629e-05, + "loss": 0.8626, + "step": 9754 + }, + { + "epoch": 1.55, + "learning_rate": 4.035419268878763e-05, + "loss": 0.7764, + "step": 9755 + }, + { + "epoch": 1.55, + "learning_rate": 4.0352156615126144e-05, + "loss": 0.8464, + "step": 9756 + }, + { + "epoch": 1.55, + "learning_rate": 4.0350120377973504e-05, + "loss": 0.9117, + "step": 9757 + }, + { + "epoch": 1.55, + "learning_rate": 4.03480839773514e-05, + "loss": 0.8713, + "step": 9758 + }, + { + "epoch": 1.55, + "learning_rate": 4.0346047413281536e-05, + "loss": 0.7616, + "step": 9759 + }, + { + "epoch": 1.55, + "learning_rate": 4.0344010685785575e-05, + "loss": 0.896, + "step": 9760 + }, + { + "epoch": 1.56, + "learning_rate": 4.034197379488521e-05, + "loss": 0.7784, + "step": 9761 + }, + { + "epoch": 1.56, + "learning_rate": 4.0339936740602134e-05, + "loss": 0.7976, + "step": 9762 + }, + { + "epoch": 1.56, + "learning_rate": 4.0337899522958056e-05, + "loss": 0.7607, + "step": 9763 + }, + { + "epoch": 1.56, + "learning_rate": 4.033586214197466e-05, + "loss": 0.906, + "step": 9764 + }, + { + "epoch": 1.56, + "learning_rate": 4.033382459767364e-05, + "loss": 0.908, + "step": 9765 + }, + { + "epoch": 1.56, + "learning_rate": 4.0331786890076696e-05, + "loss": 0.9445, + "step": 9766 + }, + { + "epoch": 1.56, + "learning_rate": 4.0329749019205525e-05, + "loss": 0.8619, + "step": 9767 + }, + { + "epoch": 1.56, + "learning_rate": 4.032771098508184e-05, + "loss": 0.9423, + "step": 9768 + }, + { + "epoch": 1.56, + "learning_rate": 4.0325672787727335e-05, + "loss": 0.8573, + "step": 9769 + }, + { + "epoch": 1.56, + "learning_rate": 4.0323634427163724e-05, + "loss": 0.8009, + "step": 9770 + }, + { + "epoch": 1.56, + "learning_rate": 4.032159590341271e-05, + "loss": 0.8538, + "step": 9771 + }, + { + "epoch": 1.56, + "learning_rate": 4.0319557216496e-05, + "loss": 0.8804, + "step": 9772 + }, + { + "epoch": 1.56, + "learning_rate": 4.03175183664353e-05, + "loss": 0.845, + "step": 9773 + }, + { + "epoch": 1.56, + "learning_rate": 4.031547935325234e-05, + "loss": 0.8918, + "step": 9774 + }, + { + "epoch": 1.56, + "learning_rate": 4.031344017696882e-05, + "loss": 0.8415, + "step": 9775 + }, + { + "epoch": 1.56, + "learning_rate": 4.031140083760645e-05, + "loss": 0.803, + "step": 9776 + }, + { + "epoch": 1.56, + "learning_rate": 4.030936133518697e-05, + "loss": 0.9146, + "step": 9777 + }, + { + "epoch": 1.56, + "learning_rate": 4.030732166973208e-05, + "loss": 0.89, + "step": 9778 + }, + { + "epoch": 1.56, + "learning_rate": 4.0305281841263504e-05, + "loss": 0.8666, + "step": 9779 + }, + { + "epoch": 1.56, + "learning_rate": 4.030324184980298e-05, + "loss": 0.814, + "step": 9780 + }, + { + "epoch": 1.56, + "learning_rate": 4.030120169537221e-05, + "loss": 0.8651, + "step": 9781 + }, + { + "epoch": 1.56, + "learning_rate": 4.0299161377992934e-05, + "loss": 0.9867, + "step": 9782 + }, + { + "epoch": 1.56, + "learning_rate": 4.029712089768688e-05, + "loss": 0.8162, + "step": 9783 + }, + { + "epoch": 1.56, + "learning_rate": 4.029508025447577e-05, + "loss": 0.875, + "step": 9784 + }, + { + "epoch": 1.56, + "learning_rate": 4.0293039448381345e-05, + "loss": 0.8652, + "step": 9785 + }, + { + "epoch": 1.56, + "learning_rate": 4.0290998479425336e-05, + "loss": 0.8835, + "step": 9786 + }, + { + "epoch": 1.56, + "learning_rate": 4.028895734762947e-05, + "loss": 0.8206, + "step": 9787 + }, + { + "epoch": 1.56, + "learning_rate": 4.02869160530155e-05, + "loss": 0.8833, + "step": 9788 + }, + { + "epoch": 1.56, + "learning_rate": 4.028487459560515e-05, + "loss": 0.8767, + "step": 9789 + }, + { + "epoch": 1.56, + "learning_rate": 4.028283297542017e-05, + "loss": 0.8396, + "step": 9790 + }, + { + "epoch": 1.56, + "learning_rate": 4.028079119248229e-05, + "loss": 0.8525, + "step": 9791 + }, + { + "epoch": 1.56, + "learning_rate": 4.027874924681326e-05, + "loss": 0.8926, + "step": 9792 + }, + { + "epoch": 1.56, + "learning_rate": 4.0276707138434836e-05, + "loss": 0.8105, + "step": 9793 + }, + { + "epoch": 1.56, + "learning_rate": 4.027466486736875e-05, + "loss": 0.8422, + "step": 9794 + }, + { + "epoch": 1.56, + "learning_rate": 4.0272622433636754e-05, + "loss": 0.7884, + "step": 9795 + }, + { + "epoch": 1.56, + "learning_rate": 4.02705798372606e-05, + "loss": 0.9067, + "step": 9796 + }, + { + "epoch": 1.56, + "learning_rate": 4.0268537078262047e-05, + "loss": 0.9036, + "step": 9797 + }, + { + "epoch": 1.56, + "learning_rate": 4.026649415666285e-05, + "loss": 0.8489, + "step": 9798 + }, + { + "epoch": 1.56, + "learning_rate": 4.026445107248474e-05, + "loss": 0.9013, + "step": 9799 + }, + { + "epoch": 1.56, + "learning_rate": 4.026240782574952e-05, + "loss": 0.8609, + "step": 9800 + }, + { + "epoch": 1.56, + "learning_rate": 4.0260364416478904e-05, + "loss": 0.9325, + "step": 9801 + }, + { + "epoch": 1.56, + "learning_rate": 4.025832084469468e-05, + "loss": 0.8935, + "step": 9802 + }, + { + "epoch": 1.56, + "learning_rate": 4.025627711041859e-05, + "loss": 0.8251, + "step": 9803 + }, + { + "epoch": 1.56, + "learning_rate": 4.025423321367242e-05, + "loss": 0.845, + "step": 9804 + }, + { + "epoch": 1.56, + "learning_rate": 4.025218915447793e-05, + "loss": 0.8133, + "step": 9805 + }, + { + "epoch": 1.56, + "learning_rate": 4.025014493285687e-05, + "loss": 0.828, + "step": 9806 + }, + { + "epoch": 1.56, + "learning_rate": 4.024810054883104e-05, + "loss": 0.885, + "step": 9807 + }, + { + "epoch": 1.56, + "learning_rate": 4.0246056002422196e-05, + "loss": 0.8709, + "step": 9808 + }, + { + "epoch": 1.56, + "learning_rate": 4.0244011293652106e-05, + "loss": 0.7992, + "step": 9809 + }, + { + "epoch": 1.56, + "learning_rate": 4.0241966422542554e-05, + "loss": 1.0063, + "step": 9810 + }, + { + "epoch": 1.56, + "learning_rate": 4.023992138911531e-05, + "loss": 0.8488, + "step": 9811 + }, + { + "epoch": 1.56, + "learning_rate": 4.023787619339217e-05, + "loss": 0.8352, + "step": 9812 + }, + { + "epoch": 1.56, + "learning_rate": 4.0235830835394885e-05, + "loss": 0.887, + "step": 9813 + }, + { + "epoch": 1.56, + "learning_rate": 4.023378531514525e-05, + "loss": 0.8284, + "step": 9814 + }, + { + "epoch": 1.56, + "learning_rate": 4.023173963266505e-05, + "loss": 0.8289, + "step": 9815 + }, + { + "epoch": 1.56, + "learning_rate": 4.022969378797608e-05, + "loss": 0.8092, + "step": 9816 + }, + { + "epoch": 1.56, + "learning_rate": 4.022764778110011e-05, + "loss": 0.8311, + "step": 9817 + }, + { + "epoch": 1.56, + "learning_rate": 4.022560161205894e-05, + "loss": 0.845, + "step": 9818 + }, + { + "epoch": 1.56, + "learning_rate": 4.022355528087435e-05, + "loss": 0.8166, + "step": 9819 + }, + { + "epoch": 1.56, + "learning_rate": 4.022150878756815e-05, + "loss": 0.8254, + "step": 9820 + }, + { + "epoch": 1.56, + "learning_rate": 4.021946213216211e-05, + "loss": 0.8934, + "step": 9821 + }, + { + "epoch": 1.56, + "learning_rate": 4.0217415314678046e-05, + "loss": 0.883, + "step": 9822 + }, + { + "epoch": 1.56, + "learning_rate": 4.021536833513775e-05, + "loss": 0.9015, + "step": 9823 + }, + { + "epoch": 1.57, + "learning_rate": 4.0213321193563014e-05, + "loss": 0.8088, + "step": 9824 + }, + { + "epoch": 1.57, + "learning_rate": 4.021127388997564e-05, + "loss": 0.8051, + "step": 9825 + }, + { + "epoch": 1.57, + "learning_rate": 4.020922642439744e-05, + "loss": 0.8697, + "step": 9826 + }, + { + "epoch": 1.57, + "learning_rate": 4.0207178796850206e-05, + "loss": 0.9338, + "step": 9827 + }, + { + "epoch": 1.57, + "learning_rate": 4.020513100735575e-05, + "loss": 0.8583, + "step": 9828 + }, + { + "epoch": 1.57, + "learning_rate": 4.0203083055935896e-05, + "loss": 0.9465, + "step": 9829 + }, + { + "epoch": 1.57, + "learning_rate": 4.0201034942612426e-05, + "loss": 0.8478, + "step": 9830 + }, + { + "epoch": 1.57, + "learning_rate": 4.0198986667407164e-05, + "loss": 0.8805, + "step": 9831 + }, + { + "epoch": 1.57, + "learning_rate": 4.019693823034192e-05, + "loss": 0.7724, + "step": 9832 + }, + { + "epoch": 1.57, + "learning_rate": 4.019488963143851e-05, + "loss": 0.8623, + "step": 9833 + }, + { + "epoch": 1.57, + "learning_rate": 4.019284087071875e-05, + "loss": 0.8207, + "step": 9834 + }, + { + "epoch": 1.57, + "learning_rate": 4.019079194820446e-05, + "loss": 1.0018, + "step": 9835 + }, + { + "epoch": 1.57, + "learning_rate": 4.0188742863917457e-05, + "loss": 0.8713, + "step": 9836 + }, + { + "epoch": 1.57, + "learning_rate": 4.018669361787957e-05, + "loss": 0.8558, + "step": 9837 + }, + { + "epoch": 1.57, + "learning_rate": 4.01846442101126e-05, + "loss": 0.8236, + "step": 9838 + }, + { + "epoch": 1.57, + "learning_rate": 4.01825946406384e-05, + "loss": 0.7934, + "step": 9839 + }, + { + "epoch": 1.57, + "learning_rate": 4.018054490947879e-05, + "loss": 0.8413, + "step": 9840 + }, + { + "epoch": 1.57, + "learning_rate": 4.017849501665558e-05, + "loss": 0.828, + "step": 9841 + }, + { + "epoch": 1.57, + "learning_rate": 4.0176444962190625e-05, + "loss": 0.9, + "step": 9842 + }, + { + "epoch": 1.57, + "learning_rate": 4.0174394746105736e-05, + "loss": 0.9482, + "step": 9843 + }, + { + "epoch": 1.57, + "learning_rate": 4.017234436842276e-05, + "loss": 0.8548, + "step": 9844 + }, + { + "epoch": 1.57, + "learning_rate": 4.017029382916353e-05, + "loss": 0.8792, + "step": 9845 + }, + { + "epoch": 1.57, + "learning_rate": 4.0168243128349876e-05, + "loss": 0.8244, + "step": 9846 + }, + { + "epoch": 1.57, + "learning_rate": 4.016619226600364e-05, + "loss": 0.8411, + "step": 9847 + }, + { + "epoch": 1.57, + "learning_rate": 4.016414124214667e-05, + "loss": 0.8978, + "step": 9848 + }, + { + "epoch": 1.57, + "learning_rate": 4.01620900568008e-05, + "loss": 0.8523, + "step": 9849 + }, + { + "epoch": 1.57, + "learning_rate": 4.016003870998788e-05, + "loss": 0.7529, + "step": 9850 + }, + { + "epoch": 1.57, + "learning_rate": 4.015798720172974e-05, + "loss": 0.8157, + "step": 9851 + }, + { + "epoch": 1.57, + "learning_rate": 4.015593553204825e-05, + "loss": 0.848, + "step": 9852 + }, + { + "epoch": 1.57, + "learning_rate": 4.015388370096524e-05, + "loss": 0.8255, + "step": 9853 + }, + { + "epoch": 1.57, + "learning_rate": 4.0151831708502574e-05, + "loss": 0.8356, + "step": 9854 + }, + { + "epoch": 1.57, + "learning_rate": 4.0149779554682096e-05, + "loss": 0.8893, + "step": 9855 + }, + { + "epoch": 1.57, + "learning_rate": 4.0147727239525667e-05, + "loss": 0.8757, + "step": 9856 + }, + { + "epoch": 1.57, + "learning_rate": 4.014567476305514e-05, + "loss": 0.8176, + "step": 9857 + }, + { + "epoch": 1.57, + "learning_rate": 4.014362212529237e-05, + "loss": 0.7939, + "step": 9858 + }, + { + "epoch": 1.57, + "learning_rate": 4.014156932625921e-05, + "loss": 0.8584, + "step": 9859 + }, + { + "epoch": 1.57, + "learning_rate": 4.013951636597755e-05, + "loss": 0.8104, + "step": 9860 + }, + { + "epoch": 1.57, + "learning_rate": 4.013746324446921e-05, + "loss": 0.8516, + "step": 9861 + }, + { + "epoch": 1.57, + "learning_rate": 4.0135409961756086e-05, + "loss": 0.8525, + "step": 9862 + }, + { + "epoch": 1.57, + "learning_rate": 4.013335651786004e-05, + "loss": 0.818, + "step": 9863 + }, + { + "epoch": 1.57, + "learning_rate": 4.0131302912802926e-05, + "loss": 0.8034, + "step": 9864 + }, + { + "epoch": 1.57, + "learning_rate": 4.012924914660663e-05, + "loss": 0.8223, + "step": 9865 + }, + { + "epoch": 1.57, + "learning_rate": 4.012719521929301e-05, + "loss": 0.8134, + "step": 9866 + }, + { + "epoch": 1.57, + "learning_rate": 4.012514113088395e-05, + "loss": 0.8269, + "step": 9867 + }, + { + "epoch": 1.57, + "learning_rate": 4.0123086881401306e-05, + "loss": 0.7647, + "step": 9868 + }, + { + "epoch": 1.57, + "learning_rate": 4.012103247086698e-05, + "loss": 0.8311, + "step": 9869 + }, + { + "epoch": 1.57, + "learning_rate": 4.0118977899302846e-05, + "loss": 0.8643, + "step": 9870 + }, + { + "epoch": 1.57, + "learning_rate": 4.011692316673076e-05, + "loss": 0.8668, + "step": 9871 + }, + { + "epoch": 1.57, + "learning_rate": 4.011486827317263e-05, + "loss": 0.8713, + "step": 9872 + }, + { + "epoch": 1.57, + "learning_rate": 4.011281321865032e-05, + "loss": 0.7975, + "step": 9873 + }, + { + "epoch": 1.57, + "learning_rate": 4.011075800318573e-05, + "loss": 0.8503, + "step": 9874 + }, + { + "epoch": 1.57, + "learning_rate": 4.0108702626800745e-05, + "loss": 0.8845, + "step": 9875 + }, + { + "epoch": 1.57, + "learning_rate": 4.010664708951725e-05, + "loss": 0.8088, + "step": 9876 + }, + { + "epoch": 1.57, + "learning_rate": 4.0104591391357124e-05, + "loss": 0.8255, + "step": 9877 + }, + { + "epoch": 1.57, + "learning_rate": 4.010253553234229e-05, + "loss": 0.7475, + "step": 9878 + }, + { + "epoch": 1.57, + "learning_rate": 4.0100479512494604e-05, + "loss": 0.8548, + "step": 9879 + }, + { + "epoch": 1.57, + "learning_rate": 4.0098423331835984e-05, + "loss": 0.8536, + "step": 9880 + }, + { + "epoch": 1.57, + "learning_rate": 4.0096366990388325e-05, + "loss": 0.8745, + "step": 9881 + }, + { + "epoch": 1.57, + "learning_rate": 4.009431048817352e-05, + "loss": 0.8145, + "step": 9882 + }, + { + "epoch": 1.57, + "learning_rate": 4.0092253825213474e-05, + "loss": 0.8951, + "step": 9883 + }, + { + "epoch": 1.57, + "learning_rate": 4.009019700153008e-05, + "loss": 0.9253, + "step": 9884 + }, + { + "epoch": 1.57, + "learning_rate": 4.008814001714526e-05, + "loss": 0.8939, + "step": 9885 + }, + { + "epoch": 1.57, + "learning_rate": 4.00860828720809e-05, + "loss": 0.8671, + "step": 9886 + }, + { + "epoch": 1.58, + "learning_rate": 4.008402556635892e-05, + "loss": 0.8822, + "step": 9887 + }, + { + "epoch": 1.58, + "learning_rate": 4.0081968100001224e-05, + "loss": 0.8924, + "step": 9888 + }, + { + "epoch": 1.58, + "learning_rate": 4.007991047302973e-05, + "loss": 0.8509, + "step": 9889 + }, + { + "epoch": 1.58, + "learning_rate": 4.007785268546634e-05, + "loss": 0.8678, + "step": 9890 + }, + { + "epoch": 1.58, + "learning_rate": 4.0075794737332975e-05, + "loss": 0.7884, + "step": 9891 + }, + { + "epoch": 1.58, + "learning_rate": 4.007373662865155e-05, + "loss": 0.8117, + "step": 9892 + }, + { + "epoch": 1.58, + "learning_rate": 4.007167835944398e-05, + "loss": 0.8755, + "step": 9893 + }, + { + "epoch": 1.58, + "learning_rate": 4.006961992973218e-05, + "loss": 0.8599, + "step": 9894 + }, + { + "epoch": 1.58, + "learning_rate": 4.0067561339538086e-05, + "loss": 0.9436, + "step": 9895 + }, + { + "epoch": 1.58, + "learning_rate": 4.006550258888361e-05, + "loss": 0.7996, + "step": 9896 + }, + { + "epoch": 1.58, + "learning_rate": 4.0063443677790666e-05, + "loss": 0.8966, + "step": 9897 + }, + { + "epoch": 1.58, + "learning_rate": 4.0061384606281205e-05, + "loss": 0.8475, + "step": 9898 + }, + { + "epoch": 1.58, + "learning_rate": 4.005932537437713e-05, + "loss": 0.9032, + "step": 9899 + }, + { + "epoch": 1.58, + "learning_rate": 4.005726598210039e-05, + "loss": 0.7987, + "step": 9900 + }, + { + "epoch": 1.58, + "learning_rate": 4.0055206429472916e-05, + "loss": 0.7956, + "step": 9901 + }, + { + "epoch": 1.58, + "learning_rate": 4.005314671651663e-05, + "loss": 0.907, + "step": 9902 + }, + { + "epoch": 1.58, + "learning_rate": 4.005108684325347e-05, + "loss": 0.8489, + "step": 9903 + }, + { + "epoch": 1.58, + "learning_rate": 4.0049026809705364e-05, + "loss": 0.8237, + "step": 9904 + }, + { + "epoch": 1.58, + "learning_rate": 4.004696661589426e-05, + "loss": 0.7963, + "step": 9905 + }, + { + "epoch": 1.58, + "learning_rate": 4.0044906261842106e-05, + "loss": 0.8946, + "step": 9906 + }, + { + "epoch": 1.58, + "learning_rate": 4.0042845747570834e-05, + "loss": 0.8338, + "step": 9907 + }, + { + "epoch": 1.58, + "learning_rate": 4.004078507310238e-05, + "loss": 0.9128, + "step": 9908 + }, + { + "epoch": 1.58, + "learning_rate": 4.00387242384587e-05, + "loss": 0.8521, + "step": 9909 + }, + { + "epoch": 1.58, + "learning_rate": 4.0036663243661734e-05, + "loss": 0.8555, + "step": 9910 + }, + { + "epoch": 1.58, + "learning_rate": 4.003460208873344e-05, + "loss": 0.9247, + "step": 9911 + }, + { + "epoch": 1.58, + "learning_rate": 4.003254077369576e-05, + "loss": 0.8816, + "step": 9912 + }, + { + "epoch": 1.58, + "learning_rate": 4.003047929857064e-05, + "loss": 0.8355, + "step": 9913 + }, + { + "epoch": 1.58, + "learning_rate": 4.0028417663380054e-05, + "loss": 0.9417, + "step": 9914 + }, + { + "epoch": 1.58, + "learning_rate": 4.002635586814594e-05, + "loss": 0.8431, + "step": 9915 + }, + { + "epoch": 1.58, + "learning_rate": 4.002429391289025e-05, + "loss": 0.8516, + "step": 9916 + }, + { + "epoch": 1.58, + "learning_rate": 4.002223179763496e-05, + "loss": 0.8448, + "step": 9917 + }, + { + "epoch": 1.58, + "learning_rate": 4.0020169522402016e-05, + "loss": 0.8772, + "step": 9918 + }, + { + "epoch": 1.58, + "learning_rate": 4.001810708721339e-05, + "loss": 0.8381, + "step": 9919 + }, + { + "epoch": 1.58, + "learning_rate": 4.001604449209103e-05, + "loss": 0.8725, + "step": 9920 + }, + { + "epoch": 1.58, + "learning_rate": 4.001398173705693e-05, + "loss": 0.8879, + "step": 9921 + }, + { + "epoch": 1.58, + "learning_rate": 4.001191882213303e-05, + "loss": 0.9818, + "step": 9922 + }, + { + "epoch": 1.58, + "learning_rate": 4.0009855747341304e-05, + "loss": 0.9428, + "step": 9923 + }, + { + "epoch": 1.58, + "learning_rate": 4.0007792512703735e-05, + "loss": 0.8063, + "step": 9924 + }, + { + "epoch": 1.58, + "learning_rate": 4.0005729118242284e-05, + "loss": 0.8552, + "step": 9925 + }, + { + "epoch": 1.58, + "learning_rate": 4.0003665563978926e-05, + "loss": 0.8167, + "step": 9926 + }, + { + "epoch": 1.58, + "learning_rate": 4.000160184993564e-05, + "loss": 0.8555, + "step": 9927 + }, + { + "epoch": 1.58, + "learning_rate": 3.9999537976134406e-05, + "loss": 0.9871, + "step": 9928 + }, + { + "epoch": 1.58, + "learning_rate": 3.9997473942597194e-05, + "loss": 0.9376, + "step": 9929 + }, + { + "epoch": 1.58, + "learning_rate": 3.999540974934599e-05, + "loss": 0.9112, + "step": 9930 + }, + { + "epoch": 1.58, + "learning_rate": 3.9993345396402785e-05, + "loss": 0.9499, + "step": 9931 + }, + { + "epoch": 1.58, + "learning_rate": 3.9991280883789545e-05, + "loss": 0.8824, + "step": 9932 + }, + { + "epoch": 1.58, + "learning_rate": 3.998921621152827e-05, + "loss": 0.9001, + "step": 9933 + }, + { + "epoch": 1.58, + "learning_rate": 3.9987151379640934e-05, + "loss": 0.7993, + "step": 9934 + }, + { + "epoch": 1.58, + "learning_rate": 3.998508638814954e-05, + "loss": 0.8106, + "step": 9935 + }, + { + "epoch": 1.58, + "learning_rate": 3.9983021237076076e-05, + "loss": 0.9278, + "step": 9936 + }, + { + "epoch": 1.58, + "learning_rate": 3.998095592644253e-05, + "loss": 0.9271, + "step": 9937 + }, + { + "epoch": 1.58, + "learning_rate": 3.99788904562709e-05, + "loss": 0.7977, + "step": 9938 + }, + { + "epoch": 1.58, + "learning_rate": 3.997682482658318e-05, + "loss": 0.7799, + "step": 9939 + }, + { + "epoch": 1.58, + "learning_rate": 3.997475903740137e-05, + "loss": 0.8101, + "step": 9940 + }, + { + "epoch": 1.58, + "learning_rate": 3.997269308874746e-05, + "loss": 0.8211, + "step": 9941 + }, + { + "epoch": 1.58, + "learning_rate": 3.997062698064347e-05, + "loss": 0.8801, + "step": 9942 + }, + { + "epoch": 1.58, + "learning_rate": 3.996856071311138e-05, + "loss": 0.8936, + "step": 9943 + }, + { + "epoch": 1.58, + "learning_rate": 3.996649428617322e-05, + "loss": 0.8752, + "step": 9944 + }, + { + "epoch": 1.58, + "learning_rate": 3.996442769985097e-05, + "loss": 0.8465, + "step": 9945 + }, + { + "epoch": 1.58, + "learning_rate": 3.996236095416665e-05, + "loss": 0.8978, + "step": 9946 + }, + { + "epoch": 1.58, + "learning_rate": 3.996029404914228e-05, + "loss": 0.8451, + "step": 9947 + }, + { + "epoch": 1.58, + "learning_rate": 3.995822698479985e-05, + "loss": 0.8881, + "step": 9948 + }, + { + "epoch": 1.58, + "learning_rate": 3.995615976116138e-05, + "loss": 0.7884, + "step": 9949 + }, + { + "epoch": 1.59, + "learning_rate": 3.99540923782489e-05, + "loss": 0.8337, + "step": 9950 + }, + { + "epoch": 1.59, + "learning_rate": 3.995202483608442e-05, + "loss": 0.8251, + "step": 9951 + }, + { + "epoch": 1.59, + "learning_rate": 3.994995713468994e-05, + "loss": 0.7907, + "step": 9952 + }, + { + "epoch": 1.59, + "learning_rate": 3.99478892740875e-05, + "loss": 0.7511, + "step": 9953 + }, + { + "epoch": 1.59, + "learning_rate": 3.9945821254299115e-05, + "loss": 0.869, + "step": 9954 + }, + { + "epoch": 1.59, + "learning_rate": 3.99437530753468e-05, + "loss": 0.8653, + "step": 9955 + }, + { + "epoch": 1.59, + "learning_rate": 3.994168473725259e-05, + "loss": 0.9304, + "step": 9956 + }, + { + "epoch": 1.59, + "learning_rate": 3.9939616240038515e-05, + "loss": 0.9211, + "step": 9957 + }, + { + "epoch": 1.59, + "learning_rate": 3.99375475837266e-05, + "loss": 0.8686, + "step": 9958 + }, + { + "epoch": 1.59, + "learning_rate": 3.993547876833886e-05, + "loss": 0.9363, + "step": 9959 + }, + { + "epoch": 1.59, + "learning_rate": 3.993340979389735e-05, + "loss": 0.7581, + "step": 9960 + }, + { + "epoch": 1.59, + "learning_rate": 3.9931340660424085e-05, + "loss": 0.955, + "step": 9961 + }, + { + "epoch": 1.59, + "learning_rate": 3.992927136794111e-05, + "loss": 0.927, + "step": 9962 + }, + { + "epoch": 1.59, + "learning_rate": 3.9927201916470454e-05, + "loss": 0.9101, + "step": 9963 + }, + { + "epoch": 1.59, + "learning_rate": 3.9925132306034165e-05, + "loss": 0.8241, + "step": 9964 + }, + { + "epoch": 1.59, + "learning_rate": 3.992306253665427e-05, + "loss": 0.8767, + "step": 9965 + }, + { + "epoch": 1.59, + "learning_rate": 3.992099260835282e-05, + "loss": 0.823, + "step": 9966 + }, + { + "epoch": 1.59, + "learning_rate": 3.991892252115187e-05, + "loss": 0.7872, + "step": 9967 + }, + { + "epoch": 1.59, + "learning_rate": 3.991685227507345e-05, + "loss": 0.8089, + "step": 9968 + }, + { + "epoch": 1.59, + "learning_rate": 3.9914781870139593e-05, + "loss": 0.8275, + "step": 9969 + }, + { + "epoch": 1.59, + "learning_rate": 3.991271130637238e-05, + "loss": 0.8687, + "step": 9970 + }, + { + "epoch": 1.59, + "learning_rate": 3.9910640583793843e-05, + "loss": 0.881, + "step": 9971 + }, + { + "epoch": 1.59, + "learning_rate": 3.9908569702426034e-05, + "loss": 0.7963, + "step": 9972 + }, + { + "epoch": 1.59, + "learning_rate": 3.990649866229101e-05, + "loss": 0.8974, + "step": 9973 + }, + { + "epoch": 1.59, + "learning_rate": 3.990442746341083e-05, + "loss": 0.7366, + "step": 9974 + }, + { + "epoch": 1.59, + "learning_rate": 3.990235610580754e-05, + "loss": 0.8088, + "step": 9975 + }, + { + "epoch": 1.59, + "learning_rate": 3.99002845895032e-05, + "loss": 0.8554, + "step": 9976 + }, + { + "epoch": 1.59, + "learning_rate": 3.989821291451988e-05, + "loss": 0.8687, + "step": 9977 + }, + { + "epoch": 1.59, + "learning_rate": 3.989614108087964e-05, + "loss": 0.8639, + "step": 9978 + }, + { + "epoch": 1.59, + "learning_rate": 3.9894069088604535e-05, + "loss": 0.8367, + "step": 9979 + }, + { + "epoch": 1.59, + "learning_rate": 3.9891996937716646e-05, + "loss": 0.8507, + "step": 9980 + }, + { + "epoch": 1.59, + "learning_rate": 3.9889924628238026e-05, + "loss": 0.8025, + "step": 9981 + }, + { + "epoch": 1.59, + "learning_rate": 3.9887852160190746e-05, + "loss": 0.8201, + "step": 9982 + }, + { + "epoch": 1.59, + "learning_rate": 3.988577953359688e-05, + "loss": 0.8583, + "step": 9983 + }, + { + "epoch": 1.59, + "learning_rate": 3.9883706748478504e-05, + "loss": 0.8605, + "step": 9984 + }, + { + "epoch": 1.59, + "learning_rate": 3.988163380485769e-05, + "loss": 0.8129, + "step": 9985 + }, + { + "epoch": 1.59, + "learning_rate": 3.98795607027565e-05, + "loss": 0.8563, + "step": 9986 + }, + { + "epoch": 1.59, + "learning_rate": 3.987748744219702e-05, + "loss": 0.8109, + "step": 9987 + }, + { + "epoch": 1.59, + "learning_rate": 3.987541402320134e-05, + "loss": 0.9056, + "step": 9988 + }, + { + "epoch": 1.59, + "learning_rate": 3.9873340445791535e-05, + "loss": 0.8779, + "step": 9989 + }, + { + "epoch": 1.59, + "learning_rate": 3.987126670998968e-05, + "loss": 0.8865, + "step": 9990 + }, + { + "epoch": 1.59, + "learning_rate": 3.986919281581786e-05, + "loss": 0.8314, + "step": 9991 + }, + { + "epoch": 1.59, + "learning_rate": 3.986711876329817e-05, + "loss": 0.8049, + "step": 9992 + }, + { + "epoch": 1.59, + "learning_rate": 3.986504455245268e-05, + "loss": 0.8284, + "step": 9993 + }, + { + "epoch": 1.59, + "learning_rate": 3.9862970183303506e-05, + "loss": 0.8858, + "step": 9994 + }, + { + "epoch": 1.59, + "learning_rate": 3.986089565587271e-05, + "loss": 0.8296, + "step": 9995 + }, + { + "epoch": 1.59, + "learning_rate": 3.98588209701824e-05, + "loss": 0.8616, + "step": 9996 + }, + { + "epoch": 1.59, + "learning_rate": 3.985674612625467e-05, + "loss": 0.8057, + "step": 9997 + }, + { + "epoch": 1.59, + "learning_rate": 3.985467112411161e-05, + "loss": 0.9228, + "step": 9998 + }, + { + "epoch": 1.59, + "learning_rate": 3.985259596377533e-05, + "loss": 0.8225, + "step": 9999 + }, + { + "epoch": 1.59, + "learning_rate": 3.9850520645267906e-05, + "loss": 0.765, + "step": 10000 + }, + { + "epoch": 1.59, + "learning_rate": 3.984844516861147e-05, + "loss": 0.8957, + "step": 10001 + }, + { + "epoch": 1.59, + "learning_rate": 3.9846369533828096e-05, + "loss": 0.8515, + "step": 10002 + }, + { + "epoch": 1.59, + "learning_rate": 3.984429374093991e-05, + "loss": 0.847, + "step": 10003 + }, + { + "epoch": 1.59, + "learning_rate": 3.984221778996899e-05, + "loss": 0.861, + "step": 10004 + }, + { + "epoch": 1.59, + "learning_rate": 3.984014168093748e-05, + "loss": 0.8161, + "step": 10005 + }, + { + "epoch": 1.59, + "learning_rate": 3.9838065413867466e-05, + "loss": 0.8535, + "step": 10006 + }, + { + "epoch": 1.59, + "learning_rate": 3.983598898878106e-05, + "loss": 0.9294, + "step": 10007 + }, + { + "epoch": 1.59, + "learning_rate": 3.983391240570037e-05, + "loss": 0.922, + "step": 10008 + }, + { + "epoch": 1.59, + "learning_rate": 3.983183566464753e-05, + "loss": 0.8228, + "step": 10009 + }, + { + "epoch": 1.59, + "learning_rate": 3.982975876564464e-05, + "loss": 0.8464, + "step": 10010 + }, + { + "epoch": 1.59, + "learning_rate": 3.982768170871383e-05, + "loss": 0.7754, + "step": 10011 + }, + { + "epoch": 1.6, + "learning_rate": 3.982560449387721e-05, + "loss": 0.8202, + "step": 10012 + }, + { + "epoch": 1.6, + "learning_rate": 3.98235271211569e-05, + "loss": 0.8642, + "step": 10013 + }, + { + "epoch": 1.6, + "learning_rate": 3.9821449590575025e-05, + "loss": 0.8993, + "step": 10014 + }, + { + "epoch": 1.6, + "learning_rate": 3.981937190215371e-05, + "loss": 0.8593, + "step": 10015 + }, + { + "epoch": 1.6, + "learning_rate": 3.981729405591509e-05, + "loss": 0.8352, + "step": 10016 + }, + { + "epoch": 1.6, + "learning_rate": 3.981521605188127e-05, + "loss": 0.8625, + "step": 10017 + }, + { + "epoch": 1.6, + "learning_rate": 3.9813137890074406e-05, + "loss": 0.7615, + "step": 10018 + }, + { + "epoch": 1.6, + "learning_rate": 3.9811059570516606e-05, + "loss": 0.8844, + "step": 10019 + }, + { + "epoch": 1.6, + "learning_rate": 3.980898109323002e-05, + "loss": 0.9337, + "step": 10020 + }, + { + "epoch": 1.6, + "learning_rate": 3.980690245823677e-05, + "loss": 0.8984, + "step": 10021 + }, + { + "epoch": 1.6, + "learning_rate": 3.980482366555901e-05, + "loss": 0.846, + "step": 10022 + }, + { + "epoch": 1.6, + "learning_rate": 3.980274471521886e-05, + "loss": 0.9298, + "step": 10023 + }, + { + "epoch": 1.6, + "learning_rate": 3.9800665607238466e-05, + "loss": 0.832, + "step": 10024 + }, + { + "epoch": 1.6, + "learning_rate": 3.979858634163996e-05, + "loss": 0.8662, + "step": 10025 + }, + { + "epoch": 1.6, + "learning_rate": 3.979650691844551e-05, + "loss": 0.8002, + "step": 10026 + }, + { + "epoch": 1.6, + "learning_rate": 3.9794427337677233e-05, + "loss": 0.9224, + "step": 10027 + }, + { + "epoch": 1.6, + "learning_rate": 3.9792347599357285e-05, + "loss": 0.8756, + "step": 10028 + }, + { + "epoch": 1.6, + "learning_rate": 3.979026770350782e-05, + "loss": 0.9207, + "step": 10029 + }, + { + "epoch": 1.6, + "learning_rate": 3.9788187650150985e-05, + "loss": 0.788, + "step": 10030 + }, + { + "epoch": 1.6, + "learning_rate": 3.978610743930893e-05, + "loss": 0.8341, + "step": 10031 + }, + { + "epoch": 1.6, + "learning_rate": 3.978402707100381e-05, + "loss": 0.8719, + "step": 10032 + }, + { + "epoch": 1.6, + "learning_rate": 3.978194654525777e-05, + "loss": 0.8108, + "step": 10033 + }, + { + "epoch": 1.6, + "learning_rate": 3.977986586209298e-05, + "loss": 0.8634, + "step": 10034 + }, + { + "epoch": 1.6, + "learning_rate": 3.977778502153159e-05, + "loss": 0.8656, + "step": 10035 + }, + { + "epoch": 1.6, + "learning_rate": 3.977570402359576e-05, + "loss": 0.8223, + "step": 10036 + }, + { + "epoch": 1.6, + "learning_rate": 3.977362286830765e-05, + "loss": 0.8577, + "step": 10037 + }, + { + "epoch": 1.6, + "learning_rate": 3.977154155568943e-05, + "loss": 0.9454, + "step": 10038 + }, + { + "epoch": 1.6, + "learning_rate": 3.976946008576326e-05, + "loss": 0.8407, + "step": 10039 + }, + { + "epoch": 1.6, + "learning_rate": 3.976737845855131e-05, + "loss": 0.7912, + "step": 10040 + }, + { + "epoch": 1.6, + "learning_rate": 3.976529667407574e-05, + "loss": 0.8672, + "step": 10041 + }, + { + "epoch": 1.6, + "learning_rate": 3.9763214732358724e-05, + "loss": 0.8607, + "step": 10042 + }, + { + "epoch": 1.6, + "learning_rate": 3.9761132633422434e-05, + "loss": 0.9207, + "step": 10043 + }, + { + "epoch": 1.6, + "learning_rate": 3.975905037728904e-05, + "loss": 0.854, + "step": 10044 + }, + { + "epoch": 1.6, + "learning_rate": 3.975696796398073e-05, + "loss": 0.9036, + "step": 10045 + }, + { + "epoch": 1.6, + "learning_rate": 3.975488539351967e-05, + "loss": 0.7995, + "step": 10046 + }, + { + "epoch": 1.6, + "learning_rate": 3.975280266592803e-05, + "loss": 0.887, + "step": 10047 + }, + { + "epoch": 1.6, + "learning_rate": 3.9750719781228005e-05, + "loss": 0.8377, + "step": 10048 + }, + { + "epoch": 1.6, + "learning_rate": 3.974863673944176e-05, + "loss": 0.9152, + "step": 10049 + }, + { + "epoch": 1.6, + "learning_rate": 3.974655354059149e-05, + "loss": 0.9133, + "step": 10050 + }, + { + "epoch": 1.6, + "learning_rate": 3.9744470184699386e-05, + "loss": 0.8393, + "step": 10051 + }, + { + "epoch": 1.6, + "learning_rate": 3.974238667178763e-05, + "loss": 0.91, + "step": 10052 + }, + { + "epoch": 1.6, + "learning_rate": 3.97403030018784e-05, + "loss": 0.8975, + "step": 10053 + }, + { + "epoch": 1.6, + "learning_rate": 3.973821917499389e-05, + "loss": 0.8645, + "step": 10054 + }, + { + "epoch": 1.6, + "learning_rate": 3.9736135191156284e-05, + "loss": 0.7828, + "step": 10055 + }, + { + "epoch": 1.6, + "learning_rate": 3.97340510503878e-05, + "loss": 0.9222, + "step": 10056 + }, + { + "epoch": 1.6, + "learning_rate": 3.9731966752710616e-05, + "loss": 0.7847, + "step": 10057 + }, + { + "epoch": 1.6, + "learning_rate": 3.972988229814693e-05, + "loss": 0.8596, + "step": 10058 + }, + { + "epoch": 1.6, + "learning_rate": 3.9727797686718926e-05, + "loss": 0.8306, + "step": 10059 + }, + { + "epoch": 1.6, + "learning_rate": 3.9725712918448834e-05, + "loss": 0.8605, + "step": 10060 + }, + { + "epoch": 1.6, + "learning_rate": 3.972362799335883e-05, + "loss": 0.8795, + "step": 10061 + }, + { + "epoch": 1.6, + "learning_rate": 3.972154291147113e-05, + "loss": 0.7669, + "step": 10062 + }, + { + "epoch": 1.6, + "learning_rate": 3.971945767280794e-05, + "loss": 0.8607, + "step": 10063 + }, + { + "epoch": 1.6, + "learning_rate": 3.971737227739146e-05, + "loss": 0.8279, + "step": 10064 + }, + { + "epoch": 1.6, + "learning_rate": 3.97152867252439e-05, + "loss": 0.8205, + "step": 10065 + }, + { + "epoch": 1.6, + "learning_rate": 3.9713201016387466e-05, + "loss": 0.8706, + "step": 10066 + }, + { + "epoch": 1.6, + "learning_rate": 3.971111515084438e-05, + "loss": 0.9049, + "step": 10067 + }, + { + "epoch": 1.6, + "learning_rate": 3.970902912863684e-05, + "loss": 0.9139, + "step": 10068 + }, + { + "epoch": 1.6, + "learning_rate": 3.970694294978708e-05, + "loss": 0.8517, + "step": 10069 + }, + { + "epoch": 1.6, + "learning_rate": 3.9704856614317306e-05, + "loss": 0.8316, + "step": 10070 + }, + { + "epoch": 1.6, + "learning_rate": 3.970277012224973e-05, + "loss": 0.7886, + "step": 10071 + }, + { + "epoch": 1.6, + "learning_rate": 3.9700683473606585e-05, + "loss": 0.8949, + "step": 10072 + }, + { + "epoch": 1.6, + "learning_rate": 3.969859666841008e-05, + "loss": 0.7725, + "step": 10073 + }, + { + "epoch": 1.6, + "learning_rate": 3.969650970668245e-05, + "loss": 0.878, + "step": 10074 + }, + { + "epoch": 1.61, + "learning_rate": 3.969442258844591e-05, + "loss": 0.9516, + "step": 10075 + }, + { + "epoch": 1.61, + "learning_rate": 3.9692335313722696e-05, + "loss": 0.8442, + "step": 10076 + }, + { + "epoch": 1.61, + "learning_rate": 3.969024788253502e-05, + "loss": 0.8381, + "step": 10077 + }, + { + "epoch": 1.61, + "learning_rate": 3.9688160294905133e-05, + "loss": 0.7911, + "step": 10078 + }, + { + "epoch": 1.61, + "learning_rate": 3.968607255085525e-05, + "loss": 0.8269, + "step": 10079 + }, + { + "epoch": 1.61, + "learning_rate": 3.9683984650407616e-05, + "loss": 0.8831, + "step": 10080 + }, + { + "epoch": 1.61, + "learning_rate": 3.968189659358445e-05, + "loss": 0.8329, + "step": 10081 + }, + { + "epoch": 1.61, + "learning_rate": 3.9679808380408014e-05, + "loss": 0.9021, + "step": 10082 + }, + { + "epoch": 1.61, + "learning_rate": 3.9677720010900524e-05, + "loss": 0.877, + "step": 10083 + }, + { + "epoch": 1.61, + "learning_rate": 3.967563148508422e-05, + "loss": 0.8864, + "step": 10084 + }, + { + "epoch": 1.61, + "learning_rate": 3.967354280298136e-05, + "loss": 0.8394, + "step": 10085 + }, + { + "epoch": 1.61, + "learning_rate": 3.967145396461418e-05, + "loss": 0.8365, + "step": 10086 + }, + { + "epoch": 1.61, + "learning_rate": 3.9669364970004904e-05, + "loss": 0.8509, + "step": 10087 + }, + { + "epoch": 1.61, + "learning_rate": 3.966727581917582e-05, + "loss": 0.8844, + "step": 10088 + }, + { + "epoch": 1.61, + "learning_rate": 3.9665186512149137e-05, + "loss": 0.8494, + "step": 10089 + }, + { + "epoch": 1.61, + "learning_rate": 3.966309704894713e-05, + "loss": 0.8541, + "step": 10090 + }, + { + "epoch": 1.61, + "learning_rate": 3.9661007429592035e-05, + "loss": 0.953, + "step": 10091 + }, + { + "epoch": 1.61, + "learning_rate": 3.965891765410612e-05, + "loss": 0.8825, + "step": 10092 + }, + { + "epoch": 1.61, + "learning_rate": 3.9656827722511626e-05, + "loss": 0.8027, + "step": 10093 + }, + { + "epoch": 1.61, + "learning_rate": 3.965473763483082e-05, + "loss": 0.8503, + "step": 10094 + }, + { + "epoch": 1.61, + "learning_rate": 3.965264739108595e-05, + "loss": 0.8547, + "step": 10095 + }, + { + "epoch": 1.61, + "learning_rate": 3.965055699129928e-05, + "loss": 0.8513, + "step": 10096 + }, + { + "epoch": 1.61, + "learning_rate": 3.964846643549308e-05, + "loss": 0.8084, + "step": 10097 + }, + { + "epoch": 1.61, + "learning_rate": 3.96463757236896e-05, + "loss": 0.8457, + "step": 10098 + }, + { + "epoch": 1.61, + "learning_rate": 3.9644284855911105e-05, + "loss": 0.8735, + "step": 10099 + }, + { + "epoch": 1.61, + "learning_rate": 3.964219383217988e-05, + "loss": 0.8914, + "step": 10100 + }, + { + "epoch": 1.61, + "learning_rate": 3.964010265251817e-05, + "loss": 0.833, + "step": 10101 + }, + { + "epoch": 1.61, + "learning_rate": 3.963801131694826e-05, + "loss": 0.8962, + "step": 10102 + }, + { + "epoch": 1.61, + "learning_rate": 3.9635919825492415e-05, + "loss": 0.7823, + "step": 10103 + }, + { + "epoch": 1.61, + "learning_rate": 3.9633828178172916e-05, + "loss": 0.9091, + "step": 10104 + }, + { + "epoch": 1.61, + "learning_rate": 3.963173637501203e-05, + "loss": 0.8628, + "step": 10105 + }, + { + "epoch": 1.61, + "learning_rate": 3.962964441603202e-05, + "loss": 0.8442, + "step": 10106 + }, + { + "epoch": 1.61, + "learning_rate": 3.962755230125519e-05, + "loss": 0.7781, + "step": 10107 + }, + { + "epoch": 1.61, + "learning_rate": 3.962546003070381e-05, + "loss": 0.8427, + "step": 10108 + }, + { + "epoch": 1.61, + "learning_rate": 3.962336760440015e-05, + "loss": 0.7634, + "step": 10109 + }, + { + "epoch": 1.61, + "learning_rate": 3.962127502236652e-05, + "loss": 0.8832, + "step": 10110 + }, + { + "epoch": 1.61, + "learning_rate": 3.961918228462518e-05, + "loss": 0.8923, + "step": 10111 + }, + { + "epoch": 1.61, + "learning_rate": 3.961708939119841e-05, + "loss": 0.8325, + "step": 10112 + }, + { + "epoch": 1.61, + "learning_rate": 3.961499634210853e-05, + "loss": 0.8894, + "step": 10113 + }, + { + "epoch": 1.61, + "learning_rate": 3.961290313737781e-05, + "loss": 0.863, + "step": 10114 + }, + { + "epoch": 1.61, + "learning_rate": 3.961080977702854e-05, + "loss": 0.8471, + "step": 10115 + }, + { + "epoch": 1.61, + "learning_rate": 3.9608716261083015e-05, + "loss": 0.8192, + "step": 10116 + }, + { + "epoch": 1.61, + "learning_rate": 3.9606622589563535e-05, + "loss": 0.8653, + "step": 10117 + }, + { + "epoch": 1.61, + "learning_rate": 3.9604528762492384e-05, + "loss": 0.8291, + "step": 10118 + }, + { + "epoch": 1.61, + "learning_rate": 3.960243477989187e-05, + "loss": 0.842, + "step": 10119 + }, + { + "epoch": 1.61, + "learning_rate": 3.9600340641784305e-05, + "loss": 0.8155, + "step": 10120 + }, + { + "epoch": 1.61, + "learning_rate": 3.959824634819196e-05, + "loss": 0.8748, + "step": 10121 + }, + { + "epoch": 1.61, + "learning_rate": 3.959615189913716e-05, + "loss": 0.8852, + "step": 10122 + }, + { + "epoch": 1.61, + "learning_rate": 3.95940572946422e-05, + "loss": 0.9156, + "step": 10123 + }, + { + "epoch": 1.61, + "learning_rate": 3.9591962534729405e-05, + "loss": 0.8563, + "step": 10124 + }, + { + "epoch": 1.61, + "learning_rate": 3.9589867619421054e-05, + "loss": 0.8603, + "step": 10125 + }, + { + "epoch": 1.61, + "learning_rate": 3.958777254873948e-05, + "loss": 0.7783, + "step": 10126 + }, + { + "epoch": 1.61, + "learning_rate": 3.958567732270697e-05, + "loss": 0.7711, + "step": 10127 + }, + { + "epoch": 1.61, + "learning_rate": 3.958358194134586e-05, + "loss": 0.8672, + "step": 10128 + }, + { + "epoch": 1.61, + "learning_rate": 3.9581486404678456e-05, + "loss": 0.877, + "step": 10129 + }, + { + "epoch": 1.61, + "learning_rate": 3.957939071272708e-05, + "loss": 0.8511, + "step": 10130 + }, + { + "epoch": 1.61, + "learning_rate": 3.9577294865514036e-05, + "loss": 0.8872, + "step": 10131 + }, + { + "epoch": 1.61, + "learning_rate": 3.957519886306166e-05, + "loss": 0.9285, + "step": 10132 + }, + { + "epoch": 1.61, + "learning_rate": 3.957310270539226e-05, + "loss": 0.8858, + "step": 10133 + }, + { + "epoch": 1.61, + "learning_rate": 3.957100639252817e-05, + "loss": 0.905, + "step": 10134 + }, + { + "epoch": 1.61, + "learning_rate": 3.9568909924491696e-05, + "loss": 0.8137, + "step": 10135 + }, + { + "epoch": 1.61, + "learning_rate": 3.9566813301305175e-05, + "loss": 0.9733, + "step": 10136 + }, + { + "epoch": 1.61, + "learning_rate": 3.9564716522990954e-05, + "loss": 0.885, + "step": 10137 + }, + { + "epoch": 1.62, + "learning_rate": 3.9562619589571325e-05, + "loss": 0.8453, + "step": 10138 + }, + { + "epoch": 1.62, + "learning_rate": 3.956052250106865e-05, + "loss": 0.9378, + "step": 10139 + }, + { + "epoch": 1.62, + "learning_rate": 3.955842525750525e-05, + "loss": 0.9113, + "step": 10140 + }, + { + "epoch": 1.62, + "learning_rate": 3.955632785890345e-05, + "loss": 0.9174, + "step": 10141 + }, + { + "epoch": 1.62, + "learning_rate": 3.9554230305285606e-05, + "loss": 0.8412, + "step": 10142 + }, + { + "epoch": 1.62, + "learning_rate": 3.955213259667404e-05, + "loss": 0.8685, + "step": 10143 + }, + { + "epoch": 1.62, + "learning_rate": 3.9550034733091104e-05, + "loss": 0.8337, + "step": 10144 + }, + { + "epoch": 1.62, + "learning_rate": 3.954793671455912e-05, + "loss": 0.8259, + "step": 10145 + }, + { + "epoch": 1.62, + "learning_rate": 3.9545838541100456e-05, + "loss": 0.7642, + "step": 10146 + }, + { + "epoch": 1.62, + "learning_rate": 3.9543740212737436e-05, + "loss": 0.8275, + "step": 10147 + }, + { + "epoch": 1.62, + "learning_rate": 3.954164172949241e-05, + "loss": 0.9025, + "step": 10148 + }, + { + "epoch": 1.62, + "learning_rate": 3.953954309138773e-05, + "loss": 0.8513, + "step": 10149 + }, + { + "epoch": 1.62, + "learning_rate": 3.953744429844574e-05, + "loss": 0.8782, + "step": 10150 + }, + { + "epoch": 1.62, + "learning_rate": 3.9535345350688795e-05, + "loss": 0.8317, + "step": 10151 + }, + { + "epoch": 1.62, + "learning_rate": 3.953324624813925e-05, + "loss": 0.8843, + "step": 10152 + }, + { + "epoch": 1.62, + "learning_rate": 3.953114699081946e-05, + "loss": 0.7909, + "step": 10153 + }, + { + "epoch": 1.62, + "learning_rate": 3.9529047578751764e-05, + "loss": 0.7777, + "step": 10154 + }, + { + "epoch": 1.62, + "learning_rate": 3.952694801195854e-05, + "loss": 0.8494, + "step": 10155 + }, + { + "epoch": 1.62, + "learning_rate": 3.952484829046214e-05, + "loss": 0.9191, + "step": 10156 + }, + { + "epoch": 1.62, + "learning_rate": 3.9522748414284924e-05, + "loss": 0.8357, + "step": 10157 + }, + { + "epoch": 1.62, + "learning_rate": 3.9520648383449254e-05, + "loss": 0.8709, + "step": 10158 + }, + { + "epoch": 1.62, + "learning_rate": 3.951854819797749e-05, + "loss": 0.8414, + "step": 10159 + }, + { + "epoch": 1.62, + "learning_rate": 3.9516447857892e-05, + "loss": 0.8034, + "step": 10160 + }, + { + "epoch": 1.62, + "learning_rate": 3.9514347363215165e-05, + "loss": 0.8936, + "step": 10161 + }, + { + "epoch": 1.62, + "learning_rate": 3.951224671396933e-05, + "loss": 0.791, + "step": 10162 + }, + { + "epoch": 1.62, + "learning_rate": 3.951014591017689e-05, + "loss": 0.9138, + "step": 10163 + }, + { + "epoch": 1.62, + "learning_rate": 3.95080449518602e-05, + "loss": 0.7455, + "step": 10164 + }, + { + "epoch": 1.62, + "learning_rate": 3.950594383904164e-05, + "loss": 0.828, + "step": 10165 + }, + { + "epoch": 1.62, + "learning_rate": 3.950384257174359e-05, + "loss": 0.9078, + "step": 10166 + }, + { + "epoch": 1.62, + "learning_rate": 3.9501741149988414e-05, + "loss": 0.8288, + "step": 10167 + }, + { + "epoch": 1.62, + "learning_rate": 3.94996395737985e-05, + "loss": 0.8866, + "step": 10168 + }, + { + "epoch": 1.62, + "learning_rate": 3.949753784319623e-05, + "loss": 0.8242, + "step": 10169 + }, + { + "epoch": 1.62, + "learning_rate": 3.949543595820397e-05, + "loss": 0.8846, + "step": 10170 + }, + { + "epoch": 1.62, + "learning_rate": 3.949333391884413e-05, + "loss": 0.8485, + "step": 10171 + }, + { + "epoch": 1.62, + "learning_rate": 3.949123172513909e-05, + "loss": 0.9152, + "step": 10172 + }, + { + "epoch": 1.62, + "learning_rate": 3.9489129377111225e-05, + "loss": 0.7522, + "step": 10173 + }, + { + "epoch": 1.62, + "learning_rate": 3.948702687478293e-05, + "loss": 0.8912, + "step": 10174 + }, + { + "epoch": 1.62, + "learning_rate": 3.948492421817658e-05, + "loss": 0.8381, + "step": 10175 + }, + { + "epoch": 1.62, + "learning_rate": 3.9482821407314595e-05, + "loss": 0.8891, + "step": 10176 + }, + { + "epoch": 1.62, + "learning_rate": 3.948071844221935e-05, + "loss": 0.8474, + "step": 10177 + }, + { + "epoch": 1.62, + "learning_rate": 3.9478615322913245e-05, + "loss": 0.8983, + "step": 10178 + }, + { + "epoch": 1.62, + "learning_rate": 3.947651204941868e-05, + "loss": 0.882, + "step": 10179 + }, + { + "epoch": 1.62, + "learning_rate": 3.947440862175805e-05, + "loss": 0.7568, + "step": 10180 + }, + { + "epoch": 1.62, + "learning_rate": 3.947230503995375e-05, + "loss": 0.8579, + "step": 10181 + }, + { + "epoch": 1.62, + "learning_rate": 3.9470201304028195e-05, + "loss": 0.9113, + "step": 10182 + }, + { + "epoch": 1.62, + "learning_rate": 3.946809741400378e-05, + "loss": 0.9021, + "step": 10183 + }, + { + "epoch": 1.62, + "learning_rate": 3.946599336990291e-05, + "loss": 0.7869, + "step": 10184 + }, + { + "epoch": 1.62, + "learning_rate": 3.9463889171747995e-05, + "loss": 0.8508, + "step": 10185 + }, + { + "epoch": 1.62, + "learning_rate": 3.946178481956144e-05, + "loss": 0.8567, + "step": 10186 + }, + { + "epoch": 1.62, + "learning_rate": 3.9459680313365656e-05, + "loss": 0.8631, + "step": 10187 + }, + { + "epoch": 1.62, + "learning_rate": 3.945757565318305e-05, + "loss": 0.7796, + "step": 10188 + }, + { + "epoch": 1.62, + "learning_rate": 3.945547083903605e-05, + "loss": 0.8796, + "step": 10189 + }, + { + "epoch": 1.62, + "learning_rate": 3.9453365870947054e-05, + "loss": 0.7535, + "step": 10190 + }, + { + "epoch": 1.62, + "learning_rate": 3.945126074893849e-05, + "loss": 0.8373, + "step": 10191 + }, + { + "epoch": 1.62, + "learning_rate": 3.944915547303277e-05, + "loss": 0.7929, + "step": 10192 + }, + { + "epoch": 1.62, + "learning_rate": 3.944705004325232e-05, + "loss": 0.7855, + "step": 10193 + }, + { + "epoch": 1.62, + "learning_rate": 3.944494445961956e-05, + "loss": 0.8804, + "step": 10194 + }, + { + "epoch": 1.62, + "learning_rate": 3.9442838722156904e-05, + "loss": 0.8518, + "step": 10195 + }, + { + "epoch": 1.62, + "learning_rate": 3.9440732830886796e-05, + "loss": 0.8474, + "step": 10196 + }, + { + "epoch": 1.62, + "learning_rate": 3.943862678583164e-05, + "loss": 0.839, + "step": 10197 + }, + { + "epoch": 1.62, + "learning_rate": 3.943652058701387e-05, + "loss": 0.8029, + "step": 10198 + }, + { + "epoch": 1.62, + "learning_rate": 3.9434414234455936e-05, + "loss": 0.8781, + "step": 10199 + }, + { + "epoch": 1.62, + "learning_rate": 3.943230772818024e-05, + "loss": 0.8222, + "step": 10200 + }, + { + "epoch": 1.63, + "learning_rate": 3.9430201068209236e-05, + "loss": 1.0109, + "step": 10201 + }, + { + "epoch": 1.63, + "learning_rate": 3.942809425456535e-05, + "loss": 0.8258, + "step": 10202 + }, + { + "epoch": 1.63, + "learning_rate": 3.9425987287271024e-05, + "loss": 0.8404, + "step": 10203 + }, + { + "epoch": 1.63, + "learning_rate": 3.9423880166348684e-05, + "loss": 0.9657, + "step": 10204 + }, + { + "epoch": 1.63, + "learning_rate": 3.942177289182078e-05, + "loss": 0.8612, + "step": 10205 + }, + { + "epoch": 1.63, + "learning_rate": 3.9419665463709755e-05, + "loss": 0.8295, + "step": 10206 + }, + { + "epoch": 1.63, + "learning_rate": 3.941755788203804e-05, + "loss": 0.847, + "step": 10207 + }, + { + "epoch": 1.63, + "learning_rate": 3.941545014682809e-05, + "loss": 0.9, + "step": 10208 + }, + { + "epoch": 1.63, + "learning_rate": 3.941334225810235e-05, + "loss": 0.8704, + "step": 10209 + }, + { + "epoch": 1.63, + "learning_rate": 3.9411234215883266e-05, + "loss": 0.8033, + "step": 10210 + }, + { + "epoch": 1.63, + "learning_rate": 3.940912602019328e-05, + "loss": 0.8401, + "step": 10211 + }, + { + "epoch": 1.63, + "learning_rate": 3.940701767105485e-05, + "loss": 0.8915, + "step": 10212 + }, + { + "epoch": 1.63, + "learning_rate": 3.9404909168490436e-05, + "loss": 0.871, + "step": 10213 + }, + { + "epoch": 1.63, + "learning_rate": 3.940280051252248e-05, + "loss": 0.8262, + "step": 10214 + }, + { + "epoch": 1.63, + "learning_rate": 3.940069170317344e-05, + "loss": 0.8728, + "step": 10215 + }, + { + "epoch": 1.63, + "learning_rate": 3.9398582740465784e-05, + "loss": 0.8197, + "step": 10216 + }, + { + "epoch": 1.63, + "learning_rate": 3.9396473624421956e-05, + "loss": 0.8467, + "step": 10217 + }, + { + "epoch": 1.63, + "learning_rate": 3.9394364355064426e-05, + "loss": 0.8287, + "step": 10218 + }, + { + "epoch": 1.63, + "learning_rate": 3.9392254932415664e-05, + "loss": 0.8927, + "step": 10219 + }, + { + "epoch": 1.63, + "learning_rate": 3.9390145356498106e-05, + "loss": 0.8403, + "step": 10220 + }, + { + "epoch": 1.63, + "learning_rate": 3.938803562733425e-05, + "loss": 0.9167, + "step": 10221 + }, + { + "epoch": 1.63, + "learning_rate": 3.938592574494655e-05, + "loss": 0.9709, + "step": 10222 + }, + { + "epoch": 1.63, + "learning_rate": 3.9383815709357474e-05, + "loss": 0.9073, + "step": 10223 + }, + { + "epoch": 1.63, + "learning_rate": 3.9381705520589495e-05, + "loss": 0.8815, + "step": 10224 + }, + { + "epoch": 1.63, + "learning_rate": 3.937959517866507e-05, + "loss": 0.8571, + "step": 10225 + }, + { + "epoch": 1.63, + "learning_rate": 3.9377484683606705e-05, + "loss": 0.8897, + "step": 10226 + }, + { + "epoch": 1.63, + "learning_rate": 3.937537403543684e-05, + "loss": 0.7679, + "step": 10227 + }, + { + "epoch": 1.63, + "learning_rate": 3.937326323417798e-05, + "loss": 0.822, + "step": 10228 + }, + { + "epoch": 1.63, + "learning_rate": 3.937115227985259e-05, + "loss": 0.7835, + "step": 10229 + }, + { + "epoch": 1.63, + "learning_rate": 3.9369041172483156e-05, + "loss": 0.811, + "step": 10230 + }, + { + "epoch": 1.63, + "learning_rate": 3.9366929912092156e-05, + "loss": 0.7856, + "step": 10231 + }, + { + "epoch": 1.63, + "learning_rate": 3.9364818498702075e-05, + "loss": 0.7898, + "step": 10232 + }, + { + "epoch": 1.63, + "learning_rate": 3.936270693233539e-05, + "loss": 0.747, + "step": 10233 + }, + { + "epoch": 1.63, + "learning_rate": 3.936059521301461e-05, + "loss": 0.8277, + "step": 10234 + }, + { + "epoch": 1.63, + "learning_rate": 3.9358483340762194e-05, + "loss": 0.842, + "step": 10235 + }, + { + "epoch": 1.63, + "learning_rate": 3.9356371315600657e-05, + "loss": 0.865, + "step": 10236 + }, + { + "epoch": 1.63, + "learning_rate": 3.935425913755248e-05, + "loss": 0.7614, + "step": 10237 + }, + { + "epoch": 1.63, + "learning_rate": 3.935214680664015e-05, + "loss": 0.8645, + "step": 10238 + }, + { + "epoch": 1.63, + "learning_rate": 3.935003432288619e-05, + "loss": 0.8392, + "step": 10239 + }, + { + "epoch": 1.63, + "learning_rate": 3.9347921686313054e-05, + "loss": 0.894, + "step": 10240 + }, + { + "epoch": 1.63, + "learning_rate": 3.934580889694327e-05, + "loss": 0.8269, + "step": 10241 + }, + { + "epoch": 1.63, + "learning_rate": 3.934369595479934e-05, + "loss": 0.8875, + "step": 10242 + }, + { + "epoch": 1.63, + "learning_rate": 3.934158285990375e-05, + "loss": 0.8203, + "step": 10243 + }, + { + "epoch": 1.63, + "learning_rate": 3.933946961227901e-05, + "loss": 0.851, + "step": 10244 + }, + { + "epoch": 1.63, + "learning_rate": 3.933735621194763e-05, + "loss": 0.8034, + "step": 10245 + }, + { + "epoch": 1.63, + "learning_rate": 3.93352426589321e-05, + "loss": 0.8422, + "step": 10246 + }, + { + "epoch": 1.63, + "learning_rate": 3.933312895325494e-05, + "loss": 0.8612, + "step": 10247 + }, + { + "epoch": 1.63, + "learning_rate": 3.933101509493866e-05, + "loss": 0.8256, + "step": 10248 + }, + { + "epoch": 1.63, + "learning_rate": 3.932890108400578e-05, + "loss": 0.8245, + "step": 10249 + }, + { + "epoch": 1.63, + "learning_rate": 3.932678692047879e-05, + "loss": 0.8651, + "step": 10250 + }, + { + "epoch": 1.63, + "learning_rate": 3.932467260438022e-05, + "loss": 0.7813, + "step": 10251 + }, + { + "epoch": 1.63, + "learning_rate": 3.9322558135732585e-05, + "loss": 0.9186, + "step": 10252 + }, + { + "epoch": 1.63, + "learning_rate": 3.93204435145584e-05, + "loss": 0.817, + "step": 10253 + }, + { + "epoch": 1.63, + "learning_rate": 3.931832874088019e-05, + "loss": 0.7653, + "step": 10254 + }, + { + "epoch": 1.63, + "learning_rate": 3.9316213814720474e-05, + "loss": 0.978, + "step": 10255 + }, + { + "epoch": 1.63, + "learning_rate": 3.931409873610177e-05, + "loss": 0.7677, + "step": 10256 + }, + { + "epoch": 1.63, + "learning_rate": 3.931198350504659e-05, + "loss": 0.884, + "step": 10257 + }, + { + "epoch": 1.63, + "learning_rate": 3.930986812157749e-05, + "loss": 0.8409, + "step": 10258 + }, + { + "epoch": 1.63, + "learning_rate": 3.9307752585716987e-05, + "loss": 0.8191, + "step": 10259 + }, + { + "epoch": 1.63, + "learning_rate": 3.930563689748759e-05, + "loss": 0.8311, + "step": 10260 + }, + { + "epoch": 1.63, + "learning_rate": 3.930352105691185e-05, + "loss": 0.7978, + "step": 10261 + }, + { + "epoch": 1.63, + "learning_rate": 3.9301405064012296e-05, + "loss": 0.8324, + "step": 10262 + }, + { + "epoch": 1.64, + "learning_rate": 3.929928891881147e-05, + "loss": 0.7631, + "step": 10263 + }, + { + "epoch": 1.64, + "learning_rate": 3.929717262133189e-05, + "loss": 0.8143, + "step": 10264 + }, + { + "epoch": 1.64, + "learning_rate": 3.9295056171596104e-05, + "loss": 0.8944, + "step": 10265 + }, + { + "epoch": 1.64, + "learning_rate": 3.929293956962664e-05, + "loss": 0.8344, + "step": 10266 + }, + { + "epoch": 1.64, + "learning_rate": 3.929082281544606e-05, + "loss": 0.7996, + "step": 10267 + }, + { + "epoch": 1.64, + "learning_rate": 3.928870590907689e-05, + "loss": 0.9381, + "step": 10268 + }, + { + "epoch": 1.64, + "learning_rate": 3.928658885054167e-05, + "loss": 0.7696, + "step": 10269 + }, + { + "epoch": 1.64, + "learning_rate": 3.928447163986295e-05, + "loss": 0.8758, + "step": 10270 + }, + { + "epoch": 1.64, + "learning_rate": 3.928235427706329e-05, + "loss": 0.8513, + "step": 10271 + }, + { + "epoch": 1.64, + "learning_rate": 3.928023676216522e-05, + "loss": 0.9242, + "step": 10272 + }, + { + "epoch": 1.64, + "learning_rate": 3.92781190951913e-05, + "loss": 0.8777, + "step": 10273 + }, + { + "epoch": 1.64, + "learning_rate": 3.9276001276164085e-05, + "loss": 0.8613, + "step": 10274 + }, + { + "epoch": 1.64, + "learning_rate": 3.927388330510612e-05, + "loss": 0.8614, + "step": 10275 + }, + { + "epoch": 1.64, + "learning_rate": 3.927176518203997e-05, + "loss": 0.8149, + "step": 10276 + }, + { + "epoch": 1.64, + "learning_rate": 3.9269646906988175e-05, + "loss": 0.883, + "step": 10277 + }, + { + "epoch": 1.64, + "learning_rate": 3.9267528479973306e-05, + "loss": 0.8062, + "step": 10278 + }, + { + "epoch": 1.64, + "learning_rate": 3.9265409901017924e-05, + "loss": 0.8496, + "step": 10279 + }, + { + "epoch": 1.64, + "learning_rate": 3.926329117014459e-05, + "loss": 0.8005, + "step": 10280 + }, + { + "epoch": 1.64, + "learning_rate": 3.9261172287375856e-05, + "loss": 0.8447, + "step": 10281 + }, + { + "epoch": 1.64, + "learning_rate": 3.92590532527343e-05, + "loss": 0.7937, + "step": 10282 + }, + { + "epoch": 1.64, + "learning_rate": 3.925693406624248e-05, + "loss": 0.7861, + "step": 10283 + }, + { + "epoch": 1.64, + "learning_rate": 3.9254814727922985e-05, + "loss": 0.7856, + "step": 10284 + }, + { + "epoch": 1.64, + "learning_rate": 3.925269523779835e-05, + "loss": 0.8578, + "step": 10285 + }, + { + "epoch": 1.64, + "learning_rate": 3.9250575595891166e-05, + "loss": 0.8305, + "step": 10286 + }, + { + "epoch": 1.64, + "learning_rate": 3.9248455802224e-05, + "loss": 0.8343, + "step": 10287 + }, + { + "epoch": 1.64, + "learning_rate": 3.924633585681944e-05, + "loss": 0.7729, + "step": 10288 + }, + { + "epoch": 1.64, + "learning_rate": 3.924421575970004e-05, + "loss": 0.837, + "step": 10289 + }, + { + "epoch": 1.64, + "learning_rate": 3.9242095510888405e-05, + "loss": 0.8704, + "step": 10290 + }, + { + "epoch": 1.64, + "learning_rate": 3.923997511040709e-05, + "loss": 0.8199, + "step": 10291 + }, + { + "epoch": 1.64, + "learning_rate": 3.9237854558278684e-05, + "loss": 0.7399, + "step": 10292 + }, + { + "epoch": 1.64, + "learning_rate": 3.923573385452578e-05, + "loss": 0.8156, + "step": 10293 + }, + { + "epoch": 1.64, + "learning_rate": 3.923361299917095e-05, + "loss": 0.8556, + "step": 10294 + }, + { + "epoch": 1.64, + "learning_rate": 3.923149199223678e-05, + "loss": 0.8211, + "step": 10295 + }, + { + "epoch": 1.64, + "learning_rate": 3.922937083374586e-05, + "loss": 0.8411, + "step": 10296 + }, + { + "epoch": 1.64, + "learning_rate": 3.922724952372078e-05, + "loss": 0.8313, + "step": 10297 + }, + { + "epoch": 1.64, + "learning_rate": 3.922512806218413e-05, + "loss": 0.8335, + "step": 10298 + }, + { + "epoch": 1.64, + "learning_rate": 3.922300644915851e-05, + "loss": 0.7961, + "step": 10299 + }, + { + "epoch": 1.64, + "learning_rate": 3.9220884684666495e-05, + "loss": 0.85, + "step": 10300 + }, + { + "epoch": 1.64, + "learning_rate": 3.9218762768730695e-05, + "loss": 0.883, + "step": 10301 + }, + { + "epoch": 1.64, + "learning_rate": 3.92166407013737e-05, + "loss": 0.88, + "step": 10302 + }, + { + "epoch": 1.64, + "learning_rate": 3.921451848261812e-05, + "loss": 0.8565, + "step": 10303 + }, + { + "epoch": 1.64, + "learning_rate": 3.921239611248655e-05, + "loss": 0.8475, + "step": 10304 + }, + { + "epoch": 1.64, + "learning_rate": 3.921027359100159e-05, + "loss": 0.8015, + "step": 10305 + }, + { + "epoch": 1.64, + "learning_rate": 3.920815091818584e-05, + "loss": 0.766, + "step": 10306 + }, + { + "epoch": 1.64, + "learning_rate": 3.9206028094061905e-05, + "loss": 0.8831, + "step": 10307 + }, + { + "epoch": 1.64, + "learning_rate": 3.9203905118652395e-05, + "loss": 0.8535, + "step": 10308 + }, + { + "epoch": 1.64, + "learning_rate": 3.9201781991979925e-05, + "loss": 0.8812, + "step": 10309 + }, + { + "epoch": 1.64, + "learning_rate": 3.919965871406709e-05, + "loss": 0.9117, + "step": 10310 + }, + { + "epoch": 1.64, + "learning_rate": 3.919753528493652e-05, + "loss": 0.8983, + "step": 10311 + }, + { + "epoch": 1.64, + "learning_rate": 3.919541170461082e-05, + "loss": 0.8164, + "step": 10312 + }, + { + "epoch": 1.64, + "learning_rate": 3.9193287973112596e-05, + "loss": 0.8757, + "step": 10313 + }, + { + "epoch": 1.64, + "learning_rate": 3.9191164090464474e-05, + "loss": 0.8032, + "step": 10314 + }, + { + "epoch": 1.64, + "learning_rate": 3.918904005668907e-05, + "loss": 0.9324, + "step": 10315 + }, + { + "epoch": 1.64, + "learning_rate": 3.9186915871809e-05, + "loss": 0.9135, + "step": 10316 + }, + { + "epoch": 1.64, + "learning_rate": 3.918479153584691e-05, + "loss": 0.8064, + "step": 10317 + }, + { + "epoch": 1.64, + "learning_rate": 3.918266704882538e-05, + "loss": 0.9539, + "step": 10318 + }, + { + "epoch": 1.64, + "learning_rate": 3.918054241076706e-05, + "loss": 0.8591, + "step": 10319 + }, + { + "epoch": 1.64, + "learning_rate": 3.9178417621694574e-05, + "loss": 0.8053, + "step": 10320 + }, + { + "epoch": 1.64, + "learning_rate": 3.917629268163055e-05, + "loss": 0.7916, + "step": 10321 + }, + { + "epoch": 1.64, + "learning_rate": 3.917416759059762e-05, + "loss": 0.8608, + "step": 10322 + }, + { + "epoch": 1.64, + "learning_rate": 3.91720423486184e-05, + "loss": 0.8206, + "step": 10323 + }, + { + "epoch": 1.64, + "learning_rate": 3.916991695571554e-05, + "loss": 0.8178, + "step": 10324 + }, + { + "epoch": 1.64, + "learning_rate": 3.916779141191167e-05, + "loss": 0.8409, + "step": 10325 + }, + { + "epoch": 1.65, + "learning_rate": 3.916566571722942e-05, + "loss": 0.8035, + "step": 10326 + }, + { + "epoch": 1.65, + "learning_rate": 3.9163539871691424e-05, + "loss": 0.9103, + "step": 10327 + }, + { + "epoch": 1.65, + "learning_rate": 3.916141387532033e-05, + "loss": 0.8384, + "step": 10328 + }, + { + "epoch": 1.65, + "learning_rate": 3.915928772813878e-05, + "loss": 0.8647, + "step": 10329 + }, + { + "epoch": 1.65, + "learning_rate": 3.915716143016941e-05, + "loss": 0.8137, + "step": 10330 + }, + { + "epoch": 1.65, + "learning_rate": 3.915503498143487e-05, + "loss": 0.8613, + "step": 10331 + }, + { + "epoch": 1.65, + "learning_rate": 3.915290838195779e-05, + "loss": 0.8865, + "step": 10332 + }, + { + "epoch": 1.65, + "learning_rate": 3.915078163176084e-05, + "loss": 0.8033, + "step": 10333 + }, + { + "epoch": 1.65, + "learning_rate": 3.914865473086665e-05, + "loss": 0.815, + "step": 10334 + }, + { + "epoch": 1.65, + "learning_rate": 3.914652767929788e-05, + "loss": 0.9275, + "step": 10335 + }, + { + "epoch": 1.65, + "learning_rate": 3.9144400477077176e-05, + "loss": 0.8324, + "step": 10336 + }, + { + "epoch": 1.65, + "learning_rate": 3.91422731242272e-05, + "loss": 0.8419, + "step": 10337 + }, + { + "epoch": 1.65, + "learning_rate": 3.91401456207706e-05, + "loss": 0.7332, + "step": 10338 + }, + { + "epoch": 1.65, + "learning_rate": 3.913801796673002e-05, + "loss": 0.8502, + "step": 10339 + }, + { + "epoch": 1.65, + "learning_rate": 3.913589016212815e-05, + "loss": 0.8326, + "step": 10340 + }, + { + "epoch": 1.65, + "learning_rate": 3.913376220698762e-05, + "loss": 0.8438, + "step": 10341 + }, + { + "epoch": 1.65, + "learning_rate": 3.913163410133111e-05, + "loss": 0.7883, + "step": 10342 + }, + { + "epoch": 1.65, + "learning_rate": 3.912950584518127e-05, + "loss": 0.7932, + "step": 10343 + }, + { + "epoch": 1.65, + "learning_rate": 3.912737743856077e-05, + "loss": 0.7883, + "step": 10344 + }, + { + "epoch": 1.65, + "learning_rate": 3.9125248881492295e-05, + "loss": 0.7984, + "step": 10345 + }, + { + "epoch": 1.65, + "learning_rate": 3.912312017399848e-05, + "loss": 0.9159, + "step": 10346 + }, + { + "epoch": 1.65, + "learning_rate": 3.912099131610202e-05, + "loss": 0.8504, + "step": 10347 + }, + { + "epoch": 1.65, + "learning_rate": 3.911886230782556e-05, + "loss": 0.8169, + "step": 10348 + }, + { + "epoch": 1.65, + "learning_rate": 3.91167331491918e-05, + "loss": 0.8494, + "step": 10349 + }, + { + "epoch": 1.65, + "learning_rate": 3.911460384022339e-05, + "loss": 0.8583, + "step": 10350 + }, + { + "epoch": 1.65, + "learning_rate": 3.911247438094303e-05, + "loss": 0.8229, + "step": 10351 + }, + { + "epoch": 1.65, + "learning_rate": 3.911034477137339e-05, + "loss": 0.9016, + "step": 10352 + }, + { + "epoch": 1.65, + "learning_rate": 3.910821501153713e-05, + "loss": 0.841, + "step": 10353 + }, + { + "epoch": 1.65, + "learning_rate": 3.910608510145695e-05, + "loss": 0.7911, + "step": 10354 + }, + { + "epoch": 1.65, + "learning_rate": 3.9103955041155536e-05, + "loss": 0.8381, + "step": 10355 + }, + { + "epoch": 1.65, + "learning_rate": 3.9101824830655556e-05, + "loss": 0.88, + "step": 10356 + }, + { + "epoch": 1.65, + "learning_rate": 3.909969446997971e-05, + "loss": 0.8538, + "step": 10357 + }, + { + "epoch": 1.65, + "learning_rate": 3.909756395915067e-05, + "loss": 0.7671, + "step": 10358 + }, + { + "epoch": 1.65, + "learning_rate": 3.9095433298191134e-05, + "loss": 0.9043, + "step": 10359 + }, + { + "epoch": 1.65, + "learning_rate": 3.9093302487123796e-05, + "loss": 0.8562, + "step": 10360 + }, + { + "epoch": 1.65, + "learning_rate": 3.909117152597134e-05, + "loss": 0.8094, + "step": 10361 + }, + { + "epoch": 1.65, + "learning_rate": 3.908904041475645e-05, + "loss": 0.8589, + "step": 10362 + }, + { + "epoch": 1.65, + "learning_rate": 3.908690915350185e-05, + "loss": 0.8211, + "step": 10363 + }, + { + "epoch": 1.65, + "learning_rate": 3.908477774223022e-05, + "loss": 0.8284, + "step": 10364 + }, + { + "epoch": 1.65, + "learning_rate": 3.908264618096425e-05, + "loss": 0.8602, + "step": 10365 + }, + { + "epoch": 1.65, + "learning_rate": 3.9080514469726645e-05, + "loss": 0.8707, + "step": 10366 + }, + { + "epoch": 1.65, + "learning_rate": 3.907838260854012e-05, + "loss": 0.8878, + "step": 10367 + }, + { + "epoch": 1.65, + "learning_rate": 3.907625059742736e-05, + "loss": 0.8057, + "step": 10368 + }, + { + "epoch": 1.65, + "learning_rate": 3.907411843641108e-05, + "loss": 0.9137, + "step": 10369 + }, + { + "epoch": 1.65, + "learning_rate": 3.907198612551398e-05, + "loss": 0.8067, + "step": 10370 + }, + { + "epoch": 1.65, + "learning_rate": 3.906985366475878e-05, + "loss": 0.8237, + "step": 10371 + }, + { + "epoch": 1.65, + "learning_rate": 3.906772105416817e-05, + "loss": 0.8581, + "step": 10372 + }, + { + "epoch": 1.65, + "learning_rate": 3.906558829376488e-05, + "loss": 0.8425, + "step": 10373 + }, + { + "epoch": 1.65, + "learning_rate": 3.9063455383571606e-05, + "loss": 0.9164, + "step": 10374 + }, + { + "epoch": 1.65, + "learning_rate": 3.906132232361108e-05, + "loss": 0.7717, + "step": 10375 + }, + { + "epoch": 1.65, + "learning_rate": 3.9059189113906006e-05, + "loss": 0.8825, + "step": 10376 + }, + { + "epoch": 1.65, + "learning_rate": 3.905705575447909e-05, + "loss": 0.8116, + "step": 10377 + }, + { + "epoch": 1.65, + "learning_rate": 3.905492224535308e-05, + "loss": 0.8703, + "step": 10378 + }, + { + "epoch": 1.65, + "learning_rate": 3.905278858655067e-05, + "loss": 0.8264, + "step": 10379 + }, + { + "epoch": 1.65, + "learning_rate": 3.90506547780946e-05, + "loss": 0.8389, + "step": 10380 + }, + { + "epoch": 1.65, + "learning_rate": 3.904852082000758e-05, + "loss": 0.8587, + "step": 10381 + }, + { + "epoch": 1.65, + "learning_rate": 3.904638671231236e-05, + "loss": 0.8784, + "step": 10382 + }, + { + "epoch": 1.65, + "learning_rate": 3.9044252455031626e-05, + "loss": 0.8914, + "step": 10383 + }, + { + "epoch": 1.65, + "learning_rate": 3.904211804818815e-05, + "loss": 0.8331, + "step": 10384 + }, + { + "epoch": 1.65, + "learning_rate": 3.9039983491804635e-05, + "loss": 0.8407, + "step": 10385 + }, + { + "epoch": 1.65, + "learning_rate": 3.903784878590381e-05, + "loss": 0.8538, + "step": 10386 + }, + { + "epoch": 1.65, + "learning_rate": 3.903571393050843e-05, + "loss": 0.8167, + "step": 10387 + }, + { + "epoch": 1.65, + "learning_rate": 3.9033578925641215e-05, + "loss": 0.8414, + "step": 10388 + }, + { + "epoch": 1.66, + "learning_rate": 3.9031443771324896e-05, + "loss": 0.8508, + "step": 10389 + }, + { + "epoch": 1.66, + "learning_rate": 3.902930846758223e-05, + "loss": 0.9255, + "step": 10390 + }, + { + "epoch": 1.66, + "learning_rate": 3.902717301443594e-05, + "loss": 0.8285, + "step": 10391 + }, + { + "epoch": 1.66, + "learning_rate": 3.9025037411908776e-05, + "loss": 0.8966, + "step": 10392 + }, + { + "epoch": 1.66, + "learning_rate": 3.902290166002348e-05, + "loss": 0.8052, + "step": 10393 + }, + { + "epoch": 1.66, + "learning_rate": 3.902076575880279e-05, + "loss": 0.7964, + "step": 10394 + }, + { + "epoch": 1.66, + "learning_rate": 3.901862970826946e-05, + "loss": 0.8158, + "step": 10395 + }, + { + "epoch": 1.66, + "learning_rate": 3.901649350844624e-05, + "loss": 0.7563, + "step": 10396 + }, + { + "epoch": 1.66, + "learning_rate": 3.901435715935587e-05, + "loss": 0.7665, + "step": 10397 + }, + { + "epoch": 1.66, + "learning_rate": 3.9012220661021104e-05, + "loss": 0.7979, + "step": 10398 + }, + { + "epoch": 1.66, + "learning_rate": 3.901008401346469e-05, + "loss": 0.9629, + "step": 10399 + }, + { + "epoch": 1.66, + "learning_rate": 3.9007947216709387e-05, + "loss": 0.9208, + "step": 10400 + }, + { + "epoch": 1.66, + "learning_rate": 3.900581027077796e-05, + "loss": 0.809, + "step": 10401 + }, + { + "epoch": 1.66, + "learning_rate": 3.900367317569315e-05, + "loss": 0.7791, + "step": 10402 + }, + { + "epoch": 1.66, + "learning_rate": 3.9001535931477726e-05, + "loss": 0.9098, + "step": 10403 + }, + { + "epoch": 1.66, + "learning_rate": 3.899939853815444e-05, + "loss": 0.8352, + "step": 10404 + }, + { + "epoch": 1.66, + "learning_rate": 3.8997260995746065e-05, + "loss": 0.8715, + "step": 10405 + }, + { + "epoch": 1.66, + "learning_rate": 3.8995123304275355e-05, + "loss": 0.8172, + "step": 10406 + }, + { + "epoch": 1.66, + "learning_rate": 3.899298546376508e-05, + "loss": 0.8312, + "step": 10407 + }, + { + "epoch": 1.66, + "learning_rate": 3.8990847474238e-05, + "loss": 0.7886, + "step": 10408 + }, + { + "epoch": 1.66, + "learning_rate": 3.898870933571689e-05, + "loss": 0.828, + "step": 10409 + }, + { + "epoch": 1.66, + "learning_rate": 3.898657104822452e-05, + "loss": 0.7736, + "step": 10410 + }, + { + "epoch": 1.66, + "learning_rate": 3.898443261178366e-05, + "loss": 0.848, + "step": 10411 + }, + { + "epoch": 1.66, + "learning_rate": 3.8982294026417085e-05, + "loss": 0.7931, + "step": 10412 + }, + { + "epoch": 1.66, + "learning_rate": 3.8980155292147566e-05, + "loss": 0.8312, + "step": 10413 + }, + { + "epoch": 1.66, + "learning_rate": 3.897801640899788e-05, + "loss": 0.9607, + "step": 10414 + }, + { + "epoch": 1.66, + "learning_rate": 3.89758773769908e-05, + "loss": 0.7866, + "step": 10415 + }, + { + "epoch": 1.66, + "learning_rate": 3.897373819614912e-05, + "loss": 0.7958, + "step": 10416 + }, + { + "epoch": 1.66, + "learning_rate": 3.897159886649561e-05, + "loss": 0.8582, + "step": 10417 + }, + { + "epoch": 1.66, + "learning_rate": 3.8969459388053055e-05, + "loss": 0.8171, + "step": 10418 + }, + { + "epoch": 1.66, + "learning_rate": 3.8967319760844235e-05, + "loss": 0.8233, + "step": 10419 + }, + { + "epoch": 1.66, + "learning_rate": 3.8965179984891944e-05, + "loss": 0.7705, + "step": 10420 + }, + { + "epoch": 1.66, + "learning_rate": 3.8963040060218956e-05, + "loss": 0.9132, + "step": 10421 + }, + { + "epoch": 1.66, + "learning_rate": 3.8960899986848074e-05, + "loss": 0.9514, + "step": 10422 + }, + { + "epoch": 1.66, + "learning_rate": 3.8958759764802086e-05, + "loss": 0.8831, + "step": 10423 + }, + { + "epoch": 1.66, + "learning_rate": 3.8956619394103774e-05, + "loss": 0.8088, + "step": 10424 + }, + { + "epoch": 1.66, + "learning_rate": 3.895447887477595e-05, + "loss": 0.7895, + "step": 10425 + }, + { + "epoch": 1.66, + "learning_rate": 3.895233820684139e-05, + "loss": 0.8696, + "step": 10426 + }, + { + "epoch": 1.66, + "learning_rate": 3.8950197390322896e-05, + "loss": 0.7405, + "step": 10427 + }, + { + "epoch": 1.66, + "learning_rate": 3.8948056425243276e-05, + "loss": 0.7834, + "step": 10428 + }, + { + "epoch": 1.66, + "learning_rate": 3.8945915311625316e-05, + "loss": 0.7651, + "step": 10429 + }, + { + "epoch": 1.66, + "learning_rate": 3.8943774049491824e-05, + "loss": 0.8385, + "step": 10430 + }, + { + "epoch": 1.66, + "learning_rate": 3.894163263886561e-05, + "loss": 0.7874, + "step": 10431 + }, + { + "epoch": 1.66, + "learning_rate": 3.8939491079769474e-05, + "loss": 0.8808, + "step": 10432 + }, + { + "epoch": 1.66, + "learning_rate": 3.8937349372226214e-05, + "loss": 0.8206, + "step": 10433 + }, + { + "epoch": 1.66, + "learning_rate": 3.893520751625865e-05, + "loss": 0.9247, + "step": 10434 + }, + { + "epoch": 1.66, + "learning_rate": 3.893306551188958e-05, + "loss": 0.8682, + "step": 10435 + }, + { + "epoch": 1.66, + "learning_rate": 3.8930923359141834e-05, + "loss": 0.8372, + "step": 10436 + }, + { + "epoch": 1.66, + "learning_rate": 3.8928781058038197e-05, + "loss": 0.9112, + "step": 10437 + }, + { + "epoch": 1.66, + "learning_rate": 3.8926638608601506e-05, + "loss": 0.8975, + "step": 10438 + }, + { + "epoch": 1.66, + "learning_rate": 3.892449601085457e-05, + "loss": 0.7998, + "step": 10439 + }, + { + "epoch": 1.66, + "learning_rate": 3.89223532648202e-05, + "loss": 0.801, + "step": 10440 + }, + { + "epoch": 1.66, + "learning_rate": 3.892021037052123e-05, + "loss": 0.8145, + "step": 10441 + }, + { + "epoch": 1.66, + "learning_rate": 3.891806732798045e-05, + "loss": 0.849, + "step": 10442 + }, + { + "epoch": 1.66, + "learning_rate": 3.891592413722073e-05, + "loss": 0.8608, + "step": 10443 + }, + { + "epoch": 1.66, + "learning_rate": 3.8913780798264855e-05, + "loss": 0.7911, + "step": 10444 + }, + { + "epoch": 1.66, + "learning_rate": 3.891163731113565e-05, + "loss": 0.8362, + "step": 10445 + }, + { + "epoch": 1.66, + "learning_rate": 3.890949367585597e-05, + "loss": 0.8011, + "step": 10446 + }, + { + "epoch": 1.66, + "learning_rate": 3.890734989244862e-05, + "loss": 0.9055, + "step": 10447 + }, + { + "epoch": 1.66, + "learning_rate": 3.8905205960936444e-05, + "loss": 0.822, + "step": 10448 + }, + { + "epoch": 1.66, + "learning_rate": 3.890306188134225e-05, + "loss": 0.8518, + "step": 10449 + }, + { + "epoch": 1.66, + "learning_rate": 3.890091765368891e-05, + "loss": 0.8076, + "step": 10450 + }, + { + "epoch": 1.66, + "learning_rate": 3.8898773277999216e-05, + "loss": 0.8482, + "step": 10451 + }, + { + "epoch": 1.67, + "learning_rate": 3.889662875429603e-05, + "loss": 0.7643, + "step": 10452 + }, + { + "epoch": 1.67, + "learning_rate": 3.889448408260219e-05, + "loss": 0.8192, + "step": 10453 + }, + { + "epoch": 1.67, + "learning_rate": 3.889233926294052e-05, + "loss": 0.7453, + "step": 10454 + }, + { + "epoch": 1.67, + "learning_rate": 3.8890194295333886e-05, + "loss": 0.8711, + "step": 10455 + }, + { + "epoch": 1.67, + "learning_rate": 3.888804917980511e-05, + "loss": 0.891, + "step": 10456 + }, + { + "epoch": 1.67, + "learning_rate": 3.888590391637704e-05, + "loss": 0.8858, + "step": 10457 + }, + { + "epoch": 1.67, + "learning_rate": 3.8883758505072526e-05, + "loss": 0.9423, + "step": 10458 + }, + { + "epoch": 1.67, + "learning_rate": 3.888161294591441e-05, + "loss": 0.7273, + "step": 10459 + }, + { + "epoch": 1.67, + "learning_rate": 3.887946723892555e-05, + "loss": 0.8679, + "step": 10460 + }, + { + "epoch": 1.67, + "learning_rate": 3.887732138412878e-05, + "loss": 0.8496, + "step": 10461 + }, + { + "epoch": 1.67, + "learning_rate": 3.887517538154697e-05, + "loss": 0.797, + "step": 10462 + }, + { + "epoch": 1.67, + "learning_rate": 3.887302923120296e-05, + "loss": 0.8352, + "step": 10463 + }, + { + "epoch": 1.67, + "learning_rate": 3.8870882933119614e-05, + "loss": 0.8482, + "step": 10464 + }, + { + "epoch": 1.67, + "learning_rate": 3.886873648731979e-05, + "loss": 0.8272, + "step": 10465 + }, + { + "epoch": 1.67, + "learning_rate": 3.886658989382634e-05, + "loss": 0.8739, + "step": 10466 + }, + { + "epoch": 1.67, + "learning_rate": 3.886444315266213e-05, + "loss": 0.8122, + "step": 10467 + }, + { + "epoch": 1.67, + "learning_rate": 3.886229626385001e-05, + "loss": 0.8261, + "step": 10468 + }, + { + "epoch": 1.67, + "learning_rate": 3.8860149227412854e-05, + "loss": 0.8807, + "step": 10469 + }, + { + "epoch": 1.67, + "learning_rate": 3.885800204337352e-05, + "loss": 0.8192, + "step": 10470 + }, + { + "epoch": 1.67, + "learning_rate": 3.885585471175488e-05, + "loss": 0.7664, + "step": 10471 + }, + { + "epoch": 1.67, + "learning_rate": 3.88537072325798e-05, + "loss": 0.8185, + "step": 10472 + }, + { + "epoch": 1.67, + "learning_rate": 3.8851559605871155e-05, + "loss": 0.8536, + "step": 10473 + }, + { + "epoch": 1.67, + "learning_rate": 3.88494118316518e-05, + "loss": 0.8345, + "step": 10474 + }, + { + "epoch": 1.67, + "learning_rate": 3.884726390994462e-05, + "loss": 0.83, + "step": 10475 + }, + { + "epoch": 1.67, + "learning_rate": 3.884511584077249e-05, + "loss": 0.8582, + "step": 10476 + }, + { + "epoch": 1.67, + "learning_rate": 3.8842967624158274e-05, + "loss": 0.8009, + "step": 10477 + }, + { + "epoch": 1.67, + "learning_rate": 3.884081926012487e-05, + "loss": 0.7842, + "step": 10478 + }, + { + "epoch": 1.67, + "learning_rate": 3.8838670748695135e-05, + "loss": 0.8976, + "step": 10479 + }, + { + "epoch": 1.67, + "learning_rate": 3.883652208989196e-05, + "loss": 0.8391, + "step": 10480 + }, + { + "epoch": 1.67, + "learning_rate": 3.8834373283738224e-05, + "loss": 0.843, + "step": 10481 + }, + { + "epoch": 1.67, + "learning_rate": 3.883222433025681e-05, + "loss": 0.7904, + "step": 10482 + }, + { + "epoch": 1.67, + "learning_rate": 3.883007522947061e-05, + "loss": 0.8439, + "step": 10483 + }, + { + "epoch": 1.67, + "learning_rate": 3.8827925981402496e-05, + "loss": 0.8272, + "step": 10484 + }, + { + "epoch": 1.67, + "learning_rate": 3.882577658607537e-05, + "loss": 0.7812, + "step": 10485 + }, + { + "epoch": 1.67, + "learning_rate": 3.882362704351212e-05, + "loss": 0.8735, + "step": 10486 + }, + { + "epoch": 1.67, + "learning_rate": 3.8821477353735634e-05, + "loss": 0.8551, + "step": 10487 + }, + { + "epoch": 1.67, + "learning_rate": 3.881932751676881e-05, + "loss": 0.8709, + "step": 10488 + }, + { + "epoch": 1.67, + "learning_rate": 3.881717753263453e-05, + "loss": 0.8063, + "step": 10489 + }, + { + "epoch": 1.67, + "learning_rate": 3.8815027401355694e-05, + "loss": 0.769, + "step": 10490 + }, + { + "epoch": 1.67, + "learning_rate": 3.8812877122955214e-05, + "loss": 0.8121, + "step": 10491 + }, + { + "epoch": 1.67, + "learning_rate": 3.881072669745597e-05, + "loss": 0.8868, + "step": 10492 + }, + { + "epoch": 1.67, + "learning_rate": 3.880857612488088e-05, + "loss": 0.9819, + "step": 10493 + }, + { + "epoch": 1.67, + "learning_rate": 3.8806425405252834e-05, + "loss": 0.8471, + "step": 10494 + }, + { + "epoch": 1.67, + "learning_rate": 3.880427453859473e-05, + "loss": 0.8711, + "step": 10495 + }, + { + "epoch": 1.67, + "learning_rate": 3.88021235249295e-05, + "loss": 0.8381, + "step": 10496 + }, + { + "epoch": 1.67, + "learning_rate": 3.879997236428002e-05, + "loss": 0.8221, + "step": 10497 + }, + { + "epoch": 1.67, + "learning_rate": 3.879782105666923e-05, + "loss": 0.9537, + "step": 10498 + }, + { + "epoch": 1.67, + "learning_rate": 3.879566960212e-05, + "loss": 0.8347, + "step": 10499 + }, + { + "epoch": 1.67, + "learning_rate": 3.879351800065528e-05, + "loss": 0.8445, + "step": 10500 + }, + { + "epoch": 1.67, + "learning_rate": 3.8791366252297955e-05, + "loss": 0.7655, + "step": 10501 + }, + { + "epoch": 1.67, + "learning_rate": 3.8789214357070963e-05, + "loss": 0.7576, + "step": 10502 + }, + { + "epoch": 1.67, + "learning_rate": 3.878706231499721e-05, + "loss": 0.8088, + "step": 10503 + }, + { + "epoch": 1.67, + "learning_rate": 3.878491012609961e-05, + "loss": 0.7476, + "step": 10504 + }, + { + "epoch": 1.67, + "learning_rate": 3.878275779040108e-05, + "loss": 0.8071, + "step": 10505 + }, + { + "epoch": 1.67, + "learning_rate": 3.8780605307924564e-05, + "loss": 0.8345, + "step": 10506 + }, + { + "epoch": 1.67, + "learning_rate": 3.877845267869296e-05, + "loss": 0.8044, + "step": 10507 + }, + { + "epoch": 1.67, + "learning_rate": 3.877629990272919e-05, + "loss": 0.8355, + "step": 10508 + }, + { + "epoch": 1.67, + "learning_rate": 3.87741469800562e-05, + "loss": 0.8452, + "step": 10509 + }, + { + "epoch": 1.67, + "learning_rate": 3.87719939106969e-05, + "loss": 0.7682, + "step": 10510 + }, + { + "epoch": 1.67, + "learning_rate": 3.876984069467423e-05, + "loss": 0.7985, + "step": 10511 + }, + { + "epoch": 1.67, + "learning_rate": 3.876768733201112e-05, + "loss": 0.7652, + "step": 10512 + }, + { + "epoch": 1.67, + "learning_rate": 3.876553382273049e-05, + "loss": 0.8785, + "step": 10513 + }, + { + "epoch": 1.68, + "learning_rate": 3.876338016685529e-05, + "loss": 0.7455, + "step": 10514 + }, + { + "epoch": 1.68, + "learning_rate": 3.876122636440845e-05, + "loss": 0.8758, + "step": 10515 + }, + { + "epoch": 1.68, + "learning_rate": 3.87590724154129e-05, + "loss": 0.9354, + "step": 10516 + }, + { + "epoch": 1.68, + "learning_rate": 3.875691831989158e-05, + "loss": 0.8352, + "step": 10517 + }, + { + "epoch": 1.68, + "learning_rate": 3.875476407786743e-05, + "loss": 0.809, + "step": 10518 + }, + { + "epoch": 1.68, + "learning_rate": 3.87526096893634e-05, + "loss": 0.8053, + "step": 10519 + }, + { + "epoch": 1.68, + "learning_rate": 3.875045515440242e-05, + "loss": 0.8186, + "step": 10520 + }, + { + "epoch": 1.68, + "learning_rate": 3.874830047300744e-05, + "loss": 0.8171, + "step": 10521 + }, + { + "epoch": 1.68, + "learning_rate": 3.8746145645201416e-05, + "loss": 0.8695, + "step": 10522 + }, + { + "epoch": 1.68, + "learning_rate": 3.874399067100728e-05, + "loss": 0.8477, + "step": 10523 + }, + { + "epoch": 1.68, + "learning_rate": 3.874183555044798e-05, + "loss": 0.8597, + "step": 10524 + }, + { + "epoch": 1.68, + "learning_rate": 3.8739680283546484e-05, + "loss": 0.8088, + "step": 10525 + }, + { + "epoch": 1.68, + "learning_rate": 3.8737524870325735e-05, + "loss": 0.8119, + "step": 10526 + }, + { + "epoch": 1.68, + "learning_rate": 3.873536931080869e-05, + "loss": 0.8003, + "step": 10527 + }, + { + "epoch": 1.68, + "learning_rate": 3.873321360501829e-05, + "loss": 1.0092, + "step": 10528 + }, + { + "epoch": 1.68, + "learning_rate": 3.873105775297751e-05, + "loss": 0.7705, + "step": 10529 + }, + { + "epoch": 1.68, + "learning_rate": 3.8728901754709294e-05, + "loss": 0.7901, + "step": 10530 + }, + { + "epoch": 1.68, + "learning_rate": 3.872674561023661e-05, + "loss": 0.8007, + "step": 10531 + }, + { + "epoch": 1.68, + "learning_rate": 3.872458931958243e-05, + "loss": 0.7931, + "step": 10532 + }, + { + "epoch": 1.68, + "learning_rate": 3.87224328827697e-05, + "loss": 0.8194, + "step": 10533 + }, + { + "epoch": 1.68, + "learning_rate": 3.872027629982138e-05, + "loss": 0.8059, + "step": 10534 + }, + { + "epoch": 1.68, + "learning_rate": 3.871811957076046e-05, + "loss": 0.8271, + "step": 10535 + }, + { + "epoch": 1.68, + "learning_rate": 3.87159626956099e-05, + "loss": 0.8109, + "step": 10536 + }, + { + "epoch": 1.68, + "learning_rate": 3.871380567439265e-05, + "loss": 0.9511, + "step": 10537 + }, + { + "epoch": 1.68, + "learning_rate": 3.8711648507131706e-05, + "loss": 0.8329, + "step": 10538 + }, + { + "epoch": 1.68, + "learning_rate": 3.8709491193850024e-05, + "loss": 0.8175, + "step": 10539 + }, + { + "epoch": 1.68, + "learning_rate": 3.870733373457059e-05, + "loss": 0.9386, + "step": 10540 + }, + { + "epoch": 1.68, + "learning_rate": 3.870517612931637e-05, + "loss": 0.9198, + "step": 10541 + }, + { + "epoch": 1.68, + "learning_rate": 3.8703018378110345e-05, + "loss": 0.8879, + "step": 10542 + }, + { + "epoch": 1.68, + "learning_rate": 3.870086048097549e-05, + "loss": 0.8125, + "step": 10543 + }, + { + "epoch": 1.68, + "learning_rate": 3.8698702437934785e-05, + "loss": 0.8114, + "step": 10544 + }, + { + "epoch": 1.68, + "learning_rate": 3.869654424901123e-05, + "loss": 0.8731, + "step": 10545 + }, + { + "epoch": 1.68, + "learning_rate": 3.8694385914227785e-05, + "loss": 0.8855, + "step": 10546 + }, + { + "epoch": 1.68, + "learning_rate": 3.869222743360745e-05, + "loss": 0.8626, + "step": 10547 + }, + { + "epoch": 1.68, + "learning_rate": 3.86900688071732e-05, + "loss": 0.8232, + "step": 10548 + }, + { + "epoch": 1.68, + "learning_rate": 3.868791003494803e-05, + "loss": 0.8871, + "step": 10549 + }, + { + "epoch": 1.68, + "learning_rate": 3.868575111695493e-05, + "loss": 0.7896, + "step": 10550 + }, + { + "epoch": 1.68, + "learning_rate": 3.868359205321688e-05, + "loss": 0.7606, + "step": 10551 + }, + { + "epoch": 1.68, + "learning_rate": 3.8681432843756894e-05, + "loss": 0.9176, + "step": 10552 + }, + { + "epoch": 1.68, + "learning_rate": 3.867927348859795e-05, + "loss": 0.8891, + "step": 10553 + }, + { + "epoch": 1.68, + "learning_rate": 3.8677113987763045e-05, + "loss": 0.8923, + "step": 10554 + }, + { + "epoch": 1.68, + "learning_rate": 3.867495434127519e-05, + "loss": 0.8438, + "step": 10555 + }, + { + "epoch": 1.68, + "learning_rate": 3.867279454915736e-05, + "loss": 0.8576, + "step": 10556 + }, + { + "epoch": 1.68, + "learning_rate": 3.8670634611432585e-05, + "loss": 0.8199, + "step": 10557 + }, + { + "epoch": 1.68, + "learning_rate": 3.8668474528123836e-05, + "loss": 0.8669, + "step": 10558 + }, + { + "epoch": 1.68, + "learning_rate": 3.8666314299254136e-05, + "loss": 0.814, + "step": 10559 + }, + { + "epoch": 1.68, + "learning_rate": 3.866415392484649e-05, + "loss": 0.8646, + "step": 10560 + }, + { + "epoch": 1.68, + "learning_rate": 3.8661993404923895e-05, + "loss": 0.9147, + "step": 10561 + }, + { + "epoch": 1.68, + "learning_rate": 3.865983273950937e-05, + "loss": 0.8834, + "step": 10562 + }, + { + "epoch": 1.68, + "learning_rate": 3.865767192862592e-05, + "loss": 0.8079, + "step": 10563 + }, + { + "epoch": 1.68, + "learning_rate": 3.865551097229655e-05, + "loss": 0.7371, + "step": 10564 + }, + { + "epoch": 1.68, + "learning_rate": 3.865334987054428e-05, + "loss": 0.9475, + "step": 10565 + }, + { + "epoch": 1.68, + "learning_rate": 3.865118862339212e-05, + "loss": 0.8645, + "step": 10566 + }, + { + "epoch": 1.68, + "learning_rate": 3.86490272308631e-05, + "loss": 0.9421, + "step": 10567 + }, + { + "epoch": 1.68, + "learning_rate": 3.8646865692980214e-05, + "loss": 0.87, + "step": 10568 + }, + { + "epoch": 1.68, + "learning_rate": 3.8644704009766504e-05, + "loss": 0.8083, + "step": 10569 + }, + { + "epoch": 1.68, + "learning_rate": 3.8642542181244976e-05, + "loss": 0.8594, + "step": 10570 + }, + { + "epoch": 1.68, + "learning_rate": 3.8640380207438656e-05, + "loss": 0.8226, + "step": 10571 + }, + { + "epoch": 1.68, + "learning_rate": 3.863821808837056e-05, + "loss": 0.8809, + "step": 10572 + }, + { + "epoch": 1.68, + "learning_rate": 3.8636055824063736e-05, + "loss": 0.8437, + "step": 10573 + }, + { + "epoch": 1.68, + "learning_rate": 3.863389341454119e-05, + "loss": 0.8445, + "step": 10574 + }, + { + "epoch": 1.68, + "learning_rate": 3.863173085982596e-05, + "loss": 0.8372, + "step": 10575 + }, + { + "epoch": 1.68, + "learning_rate": 3.862956815994106e-05, + "loss": 0.8821, + "step": 10576 + }, + { + "epoch": 1.69, + "learning_rate": 3.8627405314909546e-05, + "loss": 0.8017, + "step": 10577 + }, + { + "epoch": 1.69, + "learning_rate": 3.862524232475443e-05, + "loss": 0.8118, + "step": 10578 + }, + { + "epoch": 1.69, + "learning_rate": 3.862307918949876e-05, + "loss": 0.7915, + "step": 10579 + }, + { + "epoch": 1.69, + "learning_rate": 3.862091590916557e-05, + "loss": 0.8661, + "step": 10580 + }, + { + "epoch": 1.69, + "learning_rate": 3.8618752483777886e-05, + "loss": 0.8608, + "step": 10581 + }, + { + "epoch": 1.69, + "learning_rate": 3.8616588913358764e-05, + "loss": 0.787, + "step": 10582 + }, + { + "epoch": 1.69, + "learning_rate": 3.861442519793124e-05, + "loss": 0.8218, + "step": 10583 + }, + { + "epoch": 1.69, + "learning_rate": 3.8612261337518346e-05, + "loss": 0.8747, + "step": 10584 + }, + { + "epoch": 1.69, + "learning_rate": 3.861009733214314e-05, + "loss": 0.8485, + "step": 10585 + }, + { + "epoch": 1.69, + "learning_rate": 3.860793318182865e-05, + "loss": 0.8225, + "step": 10586 + }, + { + "epoch": 1.69, + "learning_rate": 3.860576888659794e-05, + "loss": 0.8175, + "step": 10587 + }, + { + "epoch": 1.69, + "learning_rate": 3.860360444647405e-05, + "loss": 0.818, + "step": 10588 + }, + { + "epoch": 1.69, + "learning_rate": 3.8601439861480036e-05, + "loss": 0.8331, + "step": 10589 + }, + { + "epoch": 1.69, + "learning_rate": 3.8599275131638935e-05, + "loss": 0.742, + "step": 10590 + }, + { + "epoch": 1.69, + "learning_rate": 3.859711025697382e-05, + "loss": 0.8618, + "step": 10591 + }, + { + "epoch": 1.69, + "learning_rate": 3.859494523750773e-05, + "loss": 0.8056, + "step": 10592 + }, + { + "epoch": 1.69, + "learning_rate": 3.859278007326373e-05, + "loss": 0.7852, + "step": 10593 + }, + { + "epoch": 1.69, + "learning_rate": 3.859061476426487e-05, + "loss": 0.8424, + "step": 10594 + }, + { + "epoch": 1.69, + "learning_rate": 3.8588449310534224e-05, + "loss": 0.8506, + "step": 10595 + }, + { + "epoch": 1.69, + "learning_rate": 3.858628371209483e-05, + "loss": 0.8949, + "step": 10596 + }, + { + "epoch": 1.69, + "learning_rate": 3.858411796896977e-05, + "loss": 0.8778, + "step": 10597 + }, + { + "epoch": 1.69, + "learning_rate": 3.8581952081182104e-05, + "loss": 0.8392, + "step": 10598 + }, + { + "epoch": 1.69, + "learning_rate": 3.857978604875489e-05, + "loss": 0.8269, + "step": 10599 + }, + { + "epoch": 1.69, + "learning_rate": 3.85776198717112e-05, + "loss": 0.8052, + "step": 10600 + }, + { + "epoch": 1.69, + "learning_rate": 3.85754535500741e-05, + "loss": 0.839, + "step": 10601 + }, + { + "epoch": 1.69, + "learning_rate": 3.857328708386666e-05, + "loss": 0.8818, + "step": 10602 + }, + { + "epoch": 1.69, + "learning_rate": 3.8571120473111956e-05, + "loss": 0.8157, + "step": 10603 + }, + { + "epoch": 1.69, + "learning_rate": 3.8568953717833054e-05, + "loss": 0.8118, + "step": 10604 + }, + { + "epoch": 1.69, + "learning_rate": 3.8566786818053036e-05, + "loss": 0.7733, + "step": 10605 + }, + { + "epoch": 1.69, + "learning_rate": 3.8564619773794974e-05, + "loss": 0.7808, + "step": 10606 + }, + { + "epoch": 1.69, + "learning_rate": 3.8562452585081955e-05, + "loss": 0.8887, + "step": 10607 + }, + { + "epoch": 1.69, + "learning_rate": 3.8560285251937044e-05, + "loss": 0.8185, + "step": 10608 + }, + { + "epoch": 1.69, + "learning_rate": 3.855811777438333e-05, + "loss": 0.8664, + "step": 10609 + }, + { + "epoch": 1.69, + "learning_rate": 3.8555950152443885e-05, + "loss": 0.8795, + "step": 10610 + }, + { + "epoch": 1.69, + "learning_rate": 3.855378238614181e-05, + "loss": 0.8333, + "step": 10611 + }, + { + "epoch": 1.69, + "learning_rate": 3.855161447550018e-05, + "loss": 0.8746, + "step": 10612 + }, + { + "epoch": 1.69, + "learning_rate": 3.8549446420542066e-05, + "loss": 0.8044, + "step": 10613 + }, + { + "epoch": 1.69, + "learning_rate": 3.854727822129059e-05, + "loss": 0.8388, + "step": 10614 + }, + { + "epoch": 1.69, + "learning_rate": 3.854510987776883e-05, + "loss": 0.9059, + "step": 10615 + }, + { + "epoch": 1.69, + "learning_rate": 3.8542941389999866e-05, + "loss": 0.8363, + "step": 10616 + }, + { + "epoch": 1.69, + "learning_rate": 3.8540772758006795e-05, + "loss": 0.8239, + "step": 10617 + }, + { + "epoch": 1.69, + "learning_rate": 3.853860398181273e-05, + "loss": 0.856, + "step": 10618 + }, + { + "epoch": 1.69, + "learning_rate": 3.8536435061440736e-05, + "loss": 0.8158, + "step": 10619 + }, + { + "epoch": 1.69, + "learning_rate": 3.853426599691393e-05, + "loss": 0.76, + "step": 10620 + }, + { + "epoch": 1.69, + "learning_rate": 3.853209678825541e-05, + "loss": 0.7891, + "step": 10621 + }, + { + "epoch": 1.69, + "learning_rate": 3.8529927435488274e-05, + "loss": 0.8343, + "step": 10622 + }, + { + "epoch": 1.69, + "learning_rate": 3.852775793863562e-05, + "loss": 0.8087, + "step": 10623 + }, + { + "epoch": 1.69, + "learning_rate": 3.8525588297720564e-05, + "loss": 0.8437, + "step": 10624 + }, + { + "epoch": 1.69, + "learning_rate": 3.85234185127662e-05, + "loss": 0.8114, + "step": 10625 + }, + { + "epoch": 1.69, + "learning_rate": 3.8521248583795644e-05, + "loss": 0.8289, + "step": 10626 + }, + { + "epoch": 1.69, + "learning_rate": 3.8519078510832e-05, + "loss": 0.7838, + "step": 10627 + }, + { + "epoch": 1.69, + "learning_rate": 3.851690829389838e-05, + "loss": 0.9596, + "step": 10628 + }, + { + "epoch": 1.69, + "learning_rate": 3.8514737933017884e-05, + "loss": 0.848, + "step": 10629 + }, + { + "epoch": 1.69, + "learning_rate": 3.8512567428213635e-05, + "loss": 0.8639, + "step": 10630 + }, + { + "epoch": 1.69, + "learning_rate": 3.8510396779508753e-05, + "loss": 0.8459, + "step": 10631 + }, + { + "epoch": 1.69, + "learning_rate": 3.850822598692634e-05, + "loss": 0.7929, + "step": 10632 + }, + { + "epoch": 1.69, + "learning_rate": 3.8506055050489524e-05, + "loss": 0.831, + "step": 10633 + }, + { + "epoch": 1.69, + "learning_rate": 3.8503883970221424e-05, + "loss": 0.7751, + "step": 10634 + }, + { + "epoch": 1.69, + "learning_rate": 3.850171274614516e-05, + "loss": 0.8115, + "step": 10635 + }, + { + "epoch": 1.69, + "learning_rate": 3.849954137828384e-05, + "loss": 0.8829, + "step": 10636 + }, + { + "epoch": 1.69, + "learning_rate": 3.8497369866660615e-05, + "loss": 0.8398, + "step": 10637 + }, + { + "epoch": 1.69, + "learning_rate": 3.849519821129859e-05, + "loss": 0.8271, + "step": 10638 + }, + { + "epoch": 1.69, + "learning_rate": 3.84930264122209e-05, + "loss": 0.8186, + "step": 10639 + }, + { + "epoch": 1.7, + "learning_rate": 3.8490854469450666e-05, + "loss": 0.8473, + "step": 10640 + }, + { + "epoch": 1.7, + "learning_rate": 3.848868238301102e-05, + "loss": 0.828, + "step": 10641 + }, + { + "epoch": 1.7, + "learning_rate": 3.84865101529251e-05, + "loss": 0.8328, + "step": 10642 + }, + { + "epoch": 1.7, + "learning_rate": 3.8484337779216035e-05, + "loss": 0.8757, + "step": 10643 + }, + { + "epoch": 1.7, + "learning_rate": 3.848216526190696e-05, + "loss": 0.8385, + "step": 10644 + }, + { + "epoch": 1.7, + "learning_rate": 3.847999260102101e-05, + "loss": 0.7781, + "step": 10645 + }, + { + "epoch": 1.7, + "learning_rate": 3.847781979658132e-05, + "loss": 0.8515, + "step": 10646 + }, + { + "epoch": 1.7, + "learning_rate": 3.847564684861103e-05, + "loss": 0.8276, + "step": 10647 + }, + { + "epoch": 1.7, + "learning_rate": 3.8473473757133285e-05, + "loss": 0.7279, + "step": 10648 + }, + { + "epoch": 1.7, + "learning_rate": 3.8471300522171226e-05, + "loss": 0.8717, + "step": 10649 + }, + { + "epoch": 1.7, + "learning_rate": 3.8469127143748e-05, + "loss": 0.8534, + "step": 10650 + }, + { + "epoch": 1.7, + "learning_rate": 3.846695362188674e-05, + "loss": 0.7884, + "step": 10651 + }, + { + "epoch": 1.7, + "learning_rate": 3.84647799566106e-05, + "loss": 0.9125, + "step": 10652 + }, + { + "epoch": 1.7, + "learning_rate": 3.8462606147942723e-05, + "loss": 0.8156, + "step": 10653 + }, + { + "epoch": 1.7, + "learning_rate": 3.846043219590627e-05, + "loss": 0.7708, + "step": 10654 + }, + { + "epoch": 1.7, + "learning_rate": 3.845825810052439e-05, + "loss": 0.856, + "step": 10655 + }, + { + "epoch": 1.7, + "learning_rate": 3.8456083861820224e-05, + "loss": 0.8031, + "step": 10656 + }, + { + "epoch": 1.7, + "learning_rate": 3.8453909479816944e-05, + "loss": 0.7912, + "step": 10657 + }, + { + "epoch": 1.7, + "learning_rate": 3.845173495453769e-05, + "loss": 0.7621, + "step": 10658 + }, + { + "epoch": 1.7, + "learning_rate": 3.8449560286005624e-05, + "loss": 0.8239, + "step": 10659 + }, + { + "epoch": 1.7, + "learning_rate": 3.844738547424391e-05, + "loss": 0.8, + "step": 10660 + }, + { + "epoch": 1.7, + "learning_rate": 3.84452105192757e-05, + "loss": 0.8516, + "step": 10661 + }, + { + "epoch": 1.7, + "learning_rate": 3.844303542112416e-05, + "loss": 0.8156, + "step": 10662 + }, + { + "epoch": 1.7, + "learning_rate": 3.844086017981246e-05, + "loss": 0.8339, + "step": 10663 + }, + { + "epoch": 1.7, + "learning_rate": 3.843868479536376e-05, + "loss": 0.7584, + "step": 10664 + }, + { + "epoch": 1.7, + "learning_rate": 3.8436509267801225e-05, + "loss": 0.8621, + "step": 10665 + }, + { + "epoch": 1.7, + "learning_rate": 3.843433359714802e-05, + "loss": 0.8125, + "step": 10666 + }, + { + "epoch": 1.7, + "learning_rate": 3.843215778342732e-05, + "loss": 0.7953, + "step": 10667 + }, + { + "epoch": 1.7, + "learning_rate": 3.84299818266623e-05, + "loss": 0.8457, + "step": 10668 + }, + { + "epoch": 1.7, + "learning_rate": 3.842780572687612e-05, + "loss": 0.7945, + "step": 10669 + }, + { + "epoch": 1.7, + "learning_rate": 3.842562948409196e-05, + "loss": 0.8048, + "step": 10670 + }, + { + "epoch": 1.7, + "learning_rate": 3.8423453098333004e-05, + "loss": 0.7902, + "step": 10671 + }, + { + "epoch": 1.7, + "learning_rate": 3.842127656962241e-05, + "loss": 0.7871, + "step": 10672 + }, + { + "epoch": 1.7, + "learning_rate": 3.841909989798338e-05, + "loss": 0.7747, + "step": 10673 + }, + { + "epoch": 1.7, + "learning_rate": 3.8416923083439086e-05, + "loss": 0.8598, + "step": 10674 + }, + { + "epoch": 1.7, + "learning_rate": 3.841474612601269e-05, + "loss": 0.7805, + "step": 10675 + }, + { + "epoch": 1.7, + "learning_rate": 3.841256902572741e-05, + "loss": 0.7304, + "step": 10676 + }, + { + "epoch": 1.7, + "learning_rate": 3.8410391782606404e-05, + "loss": 0.8757, + "step": 10677 + }, + { + "epoch": 1.7, + "learning_rate": 3.840821439667287e-05, + "loss": 0.8403, + "step": 10678 + }, + { + "epoch": 1.7, + "learning_rate": 3.840603686794999e-05, + "loss": 0.835, + "step": 10679 + }, + { + "epoch": 1.7, + "learning_rate": 3.840385919646096e-05, + "loss": 0.8556, + "step": 10680 + }, + { + "epoch": 1.7, + "learning_rate": 3.840168138222896e-05, + "loss": 0.8664, + "step": 10681 + }, + { + "epoch": 1.7, + "learning_rate": 3.83995034252772e-05, + "loss": 0.8309, + "step": 10682 + }, + { + "epoch": 1.7, + "learning_rate": 3.8397325325628854e-05, + "loss": 0.8613, + "step": 10683 + }, + { + "epoch": 1.7, + "learning_rate": 3.839514708330713e-05, + "loss": 0.7921, + "step": 10684 + }, + { + "epoch": 1.7, + "learning_rate": 3.839296869833523e-05, + "loss": 0.9222, + "step": 10685 + }, + { + "epoch": 1.7, + "learning_rate": 3.8390790170736334e-05, + "loss": 0.809, + "step": 10686 + }, + { + "epoch": 1.7, + "learning_rate": 3.838861150053365e-05, + "loss": 0.836, + "step": 10687 + }, + { + "epoch": 1.7, + "learning_rate": 3.83864326877504e-05, + "loss": 0.8735, + "step": 10688 + }, + { + "epoch": 1.7, + "learning_rate": 3.8384253732409754e-05, + "loss": 0.9182, + "step": 10689 + }, + { + "epoch": 1.7, + "learning_rate": 3.8382074634534934e-05, + "loss": 0.8687, + "step": 10690 + }, + { + "epoch": 1.7, + "learning_rate": 3.8379895394149145e-05, + "loss": 0.9204, + "step": 10691 + }, + { + "epoch": 1.7, + "learning_rate": 3.837771601127559e-05, + "loss": 0.9229, + "step": 10692 + }, + { + "epoch": 1.7, + "learning_rate": 3.8375536485937486e-05, + "loss": 0.8189, + "step": 10693 + }, + { + "epoch": 1.7, + "learning_rate": 3.8373356818158035e-05, + "loss": 0.8289, + "step": 10694 + }, + { + "epoch": 1.7, + "learning_rate": 3.837117700796045e-05, + "loss": 0.8291, + "step": 10695 + }, + { + "epoch": 1.7, + "learning_rate": 3.836899705536796e-05, + "loss": 0.7637, + "step": 10696 + }, + { + "epoch": 1.7, + "learning_rate": 3.836681696040376e-05, + "loss": 0.7685, + "step": 10697 + }, + { + "epoch": 1.7, + "learning_rate": 3.8364636723091075e-05, + "loss": 0.8192, + "step": 10698 + }, + { + "epoch": 1.7, + "learning_rate": 3.836245634345312e-05, + "loss": 0.8569, + "step": 10699 + }, + { + "epoch": 1.7, + "learning_rate": 3.836027582151312e-05, + "loss": 0.8569, + "step": 10700 + }, + { + "epoch": 1.7, + "learning_rate": 3.83580951572943e-05, + "loss": 0.8379, + "step": 10701 + }, + { + "epoch": 1.7, + "learning_rate": 3.8355914350819866e-05, + "loss": 0.8348, + "step": 10702 + }, + { + "epoch": 1.71, + "learning_rate": 3.835373340211306e-05, + "loss": 0.8062, + "step": 10703 + }, + { + "epoch": 1.71, + "learning_rate": 3.8351552311197095e-05, + "loss": 0.8558, + "step": 10704 + }, + { + "epoch": 1.71, + "learning_rate": 3.83493710780952e-05, + "loss": 0.897, + "step": 10705 + }, + { + "epoch": 1.71, + "learning_rate": 3.8347189702830624e-05, + "loss": 0.8289, + "step": 10706 + }, + { + "epoch": 1.71, + "learning_rate": 3.834500818542656e-05, + "loss": 0.8745, + "step": 10707 + }, + { + "epoch": 1.71, + "learning_rate": 3.834282652590628e-05, + "loss": 0.7603, + "step": 10708 + }, + { + "epoch": 1.71, + "learning_rate": 3.8340644724292985e-05, + "loss": 0.865, + "step": 10709 + }, + { + "epoch": 1.71, + "learning_rate": 3.8338462780609926e-05, + "loss": 0.9343, + "step": 10710 + }, + { + "epoch": 1.71, + "learning_rate": 3.8336280694880344e-05, + "loss": 0.8101, + "step": 10711 + }, + { + "epoch": 1.71, + "learning_rate": 3.833409846712746e-05, + "loss": 0.7385, + "step": 10712 + }, + { + "epoch": 1.71, + "learning_rate": 3.833191609737452e-05, + "loss": 0.8233, + "step": 10713 + }, + { + "epoch": 1.71, + "learning_rate": 3.832973358564478e-05, + "loss": 0.8259, + "step": 10714 + }, + { + "epoch": 1.71, + "learning_rate": 3.832755093196146e-05, + "loss": 0.7646, + "step": 10715 + }, + { + "epoch": 1.71, + "learning_rate": 3.832536813634782e-05, + "loss": 0.7635, + "step": 10716 + }, + { + "epoch": 1.71, + "learning_rate": 3.83231851988271e-05, + "loss": 0.8198, + "step": 10717 + }, + { + "epoch": 1.71, + "learning_rate": 3.8321002119422545e-05, + "loss": 0.849, + "step": 10718 + }, + { + "epoch": 1.71, + "learning_rate": 3.83188188981574e-05, + "loss": 0.8502, + "step": 10719 + }, + { + "epoch": 1.71, + "learning_rate": 3.8316635535054925e-05, + "loss": 0.8005, + "step": 10720 + }, + { + "epoch": 1.71, + "learning_rate": 3.8314452030138356e-05, + "loss": 0.8097, + "step": 10721 + }, + { + "epoch": 1.71, + "learning_rate": 3.831226838343097e-05, + "loss": 0.7926, + "step": 10722 + }, + { + "epoch": 1.71, + "learning_rate": 3.831008459495601e-05, + "loss": 0.8396, + "step": 10723 + }, + { + "epoch": 1.71, + "learning_rate": 3.830790066473671e-05, + "loss": 0.9068, + "step": 10724 + }, + { + "epoch": 1.71, + "learning_rate": 3.8305716592796366e-05, + "loss": 0.7871, + "step": 10725 + }, + { + "epoch": 1.71, + "learning_rate": 3.8303532379158216e-05, + "loss": 0.7979, + "step": 10726 + }, + { + "epoch": 1.71, + "learning_rate": 3.8301348023845506e-05, + "loss": 0.8113, + "step": 10727 + }, + { + "epoch": 1.71, + "learning_rate": 3.829916352688154e-05, + "loss": 0.8492, + "step": 10728 + }, + { + "epoch": 1.71, + "learning_rate": 3.829697888828954e-05, + "loss": 0.9169, + "step": 10729 + }, + { + "epoch": 1.71, + "learning_rate": 3.829479410809279e-05, + "loss": 0.8302, + "step": 10730 + }, + { + "epoch": 1.71, + "learning_rate": 3.829260918631456e-05, + "loss": 0.843, + "step": 10731 + }, + { + "epoch": 1.71, + "learning_rate": 3.8290424122978106e-05, + "loss": 0.8053, + "step": 10732 + }, + { + "epoch": 1.71, + "learning_rate": 3.8288238918106704e-05, + "loss": 0.8663, + "step": 10733 + }, + { + "epoch": 1.71, + "learning_rate": 3.828605357172363e-05, + "loss": 0.863, + "step": 10734 + }, + { + "epoch": 1.71, + "learning_rate": 3.828386808385215e-05, + "loss": 0.7699, + "step": 10735 + }, + { + "epoch": 1.71, + "learning_rate": 3.8281682454515535e-05, + "loss": 0.8787, + "step": 10736 + }, + { + "epoch": 1.71, + "learning_rate": 3.827949668373707e-05, + "loss": 0.7848, + "step": 10737 + }, + { + "epoch": 1.71, + "learning_rate": 3.827731077154002e-05, + "loss": 0.809, + "step": 10738 + }, + { + "epoch": 1.71, + "learning_rate": 3.827512471794769e-05, + "loss": 0.8265, + "step": 10739 + }, + { + "epoch": 1.71, + "learning_rate": 3.8272938522983324e-05, + "loss": 0.8214, + "step": 10740 + }, + { + "epoch": 1.71, + "learning_rate": 3.827075218667022e-05, + "loss": 0.8132, + "step": 10741 + }, + { + "epoch": 1.71, + "learning_rate": 3.8268565709031675e-05, + "loss": 0.8198, + "step": 10742 + }, + { + "epoch": 1.71, + "learning_rate": 3.826637909009095e-05, + "loss": 0.8457, + "step": 10743 + }, + { + "epoch": 1.71, + "learning_rate": 3.826419232987135e-05, + "loss": 0.9481, + "step": 10744 + }, + { + "epoch": 1.71, + "learning_rate": 3.826200542839615e-05, + "loss": 0.778, + "step": 10745 + }, + { + "epoch": 1.71, + "learning_rate": 3.825981838568864e-05, + "loss": 0.7555, + "step": 10746 + }, + { + "epoch": 1.71, + "learning_rate": 3.8257631201772114e-05, + "loss": 0.8277, + "step": 10747 + }, + { + "epoch": 1.71, + "learning_rate": 3.8255443876669875e-05, + "loss": 0.7534, + "step": 10748 + }, + { + "epoch": 1.71, + "learning_rate": 3.8253256410405205e-05, + "loss": 0.7356, + "step": 10749 + }, + { + "epoch": 1.71, + "learning_rate": 3.8251068803001386e-05, + "loss": 0.7907, + "step": 10750 + }, + { + "epoch": 1.71, + "learning_rate": 3.824888105448174e-05, + "loss": 0.8051, + "step": 10751 + }, + { + "epoch": 1.71, + "learning_rate": 3.824669316486955e-05, + "loss": 0.8271, + "step": 10752 + }, + { + "epoch": 1.71, + "learning_rate": 3.824450513418813e-05, + "loss": 0.7711, + "step": 10753 + }, + { + "epoch": 1.71, + "learning_rate": 3.8242316962460765e-05, + "loss": 0.8897, + "step": 10754 + }, + { + "epoch": 1.71, + "learning_rate": 3.824012864971076e-05, + "loss": 0.8929, + "step": 10755 + }, + { + "epoch": 1.71, + "learning_rate": 3.823794019596143e-05, + "loss": 0.8411, + "step": 10756 + }, + { + "epoch": 1.71, + "learning_rate": 3.823575160123607e-05, + "loss": 0.8178, + "step": 10757 + }, + { + "epoch": 1.71, + "learning_rate": 3.8233562865557994e-05, + "loss": 0.8463, + "step": 10758 + }, + { + "epoch": 1.71, + "learning_rate": 3.8231373988950506e-05, + "loss": 0.8595, + "step": 10759 + }, + { + "epoch": 1.71, + "learning_rate": 3.822918497143691e-05, + "loss": 0.8566, + "step": 10760 + }, + { + "epoch": 1.71, + "learning_rate": 3.822699581304053e-05, + "loss": 0.8862, + "step": 10761 + }, + { + "epoch": 1.71, + "learning_rate": 3.8224806513784683e-05, + "loss": 0.8595, + "step": 10762 + }, + { + "epoch": 1.71, + "learning_rate": 3.8222617073692675e-05, + "loss": 0.766, + "step": 10763 + }, + { + "epoch": 1.71, + "learning_rate": 3.8220427492787815e-05, + "loss": 0.8954, + "step": 10764 + }, + { + "epoch": 1.71, + "learning_rate": 3.8218237771093436e-05, + "loss": 0.8672, + "step": 10765 + }, + { + "epoch": 1.72, + "learning_rate": 3.8216047908632846e-05, + "loss": 0.8865, + "step": 10766 + }, + { + "epoch": 1.72, + "learning_rate": 3.821385790542936e-05, + "loss": 0.8586, + "step": 10767 + }, + { + "epoch": 1.72, + "learning_rate": 3.821166776150632e-05, + "loss": 0.7761, + "step": 10768 + }, + { + "epoch": 1.72, + "learning_rate": 3.820947747688704e-05, + "loss": 0.807, + "step": 10769 + }, + { + "epoch": 1.72, + "learning_rate": 3.8207287051594844e-05, + "loss": 0.8529, + "step": 10770 + }, + { + "epoch": 1.72, + "learning_rate": 3.8205096485653056e-05, + "loss": 0.7659, + "step": 10771 + }, + { + "epoch": 1.72, + "learning_rate": 3.8202905779085015e-05, + "loss": 0.8132, + "step": 10772 + }, + { + "epoch": 1.72, + "learning_rate": 3.820071493191403e-05, + "loss": 0.8368, + "step": 10773 + }, + { + "epoch": 1.72, + "learning_rate": 3.8198523944163455e-05, + "loss": 0.7882, + "step": 10774 + }, + { + "epoch": 1.72, + "learning_rate": 3.819633281585662e-05, + "loss": 0.8126, + "step": 10775 + }, + { + "epoch": 1.72, + "learning_rate": 3.819414154701683e-05, + "loss": 0.9836, + "step": 10776 + }, + { + "epoch": 1.72, + "learning_rate": 3.819195013766746e-05, + "loss": 0.9556, + "step": 10777 + }, + { + "epoch": 1.72, + "learning_rate": 3.818975858783182e-05, + "loss": 0.8062, + "step": 10778 + }, + { + "epoch": 1.72, + "learning_rate": 3.8187566897533276e-05, + "loss": 0.8084, + "step": 10779 + }, + { + "epoch": 1.72, + "learning_rate": 3.8185375066795135e-05, + "loss": 0.7924, + "step": 10780 + }, + { + "epoch": 1.72, + "learning_rate": 3.818318309564076e-05, + "loss": 0.9525, + "step": 10781 + }, + { + "epoch": 1.72, + "learning_rate": 3.818099098409349e-05, + "loss": 0.865, + "step": 10782 + }, + { + "epoch": 1.72, + "learning_rate": 3.8178798732176664e-05, + "loss": 0.8361, + "step": 10783 + }, + { + "epoch": 1.72, + "learning_rate": 3.817660633991364e-05, + "loss": 0.8193, + "step": 10784 + }, + { + "epoch": 1.72, + "learning_rate": 3.817441380732775e-05, + "loss": 0.7854, + "step": 10785 + }, + { + "epoch": 1.72, + "learning_rate": 3.817222113444235e-05, + "loss": 0.8236, + "step": 10786 + }, + { + "epoch": 1.72, + "learning_rate": 3.8170028321280795e-05, + "loss": 0.8618, + "step": 10787 + }, + { + "epoch": 1.72, + "learning_rate": 3.8167835367866436e-05, + "loss": 0.8126, + "step": 10788 + }, + { + "epoch": 1.72, + "learning_rate": 3.816564227422262e-05, + "loss": 0.7504, + "step": 10789 + }, + { + "epoch": 1.72, + "learning_rate": 3.816344904037271e-05, + "loss": 0.801, + "step": 10790 + }, + { + "epoch": 1.72, + "learning_rate": 3.816125566634007e-05, + "loss": 0.8029, + "step": 10791 + }, + { + "epoch": 1.72, + "learning_rate": 3.815906215214804e-05, + "loss": 0.8667, + "step": 10792 + }, + { + "epoch": 1.72, + "learning_rate": 3.815686849781997e-05, + "loss": 0.8271, + "step": 10793 + }, + { + "epoch": 1.72, + "learning_rate": 3.815467470337926e-05, + "loss": 0.9096, + "step": 10794 + }, + { + "epoch": 1.72, + "learning_rate": 3.815248076884925e-05, + "loss": 0.863, + "step": 10795 + }, + { + "epoch": 1.72, + "learning_rate": 3.815028669425329e-05, + "loss": 0.8903, + "step": 10796 + }, + { + "epoch": 1.72, + "learning_rate": 3.8148092479614774e-05, + "loss": 0.75, + "step": 10797 + }, + { + "epoch": 1.72, + "learning_rate": 3.814589812495705e-05, + "loss": 0.8306, + "step": 10798 + }, + { + "epoch": 1.72, + "learning_rate": 3.8143703630303495e-05, + "loss": 0.804, + "step": 10799 + }, + { + "epoch": 1.72, + "learning_rate": 3.8141508995677474e-05, + "loss": 0.8617, + "step": 10800 + }, + { + "epoch": 1.72, + "learning_rate": 3.813931422110236e-05, + "loss": 0.7748, + "step": 10801 + }, + { + "epoch": 1.72, + "learning_rate": 3.813711930660153e-05, + "loss": 0.8004, + "step": 10802 + }, + { + "epoch": 1.72, + "learning_rate": 3.8134924252198346e-05, + "loss": 0.8963, + "step": 10803 + }, + { + "epoch": 1.72, + "learning_rate": 3.81327290579162e-05, + "loss": 0.8746, + "step": 10804 + }, + { + "epoch": 1.72, + "learning_rate": 3.813053372377846e-05, + "loss": 0.8309, + "step": 10805 + }, + { + "epoch": 1.72, + "learning_rate": 3.812833824980852e-05, + "loss": 0.8114, + "step": 10806 + }, + { + "epoch": 1.72, + "learning_rate": 3.8126142636029724e-05, + "loss": 0.8277, + "step": 10807 + }, + { + "epoch": 1.72, + "learning_rate": 3.8123946882465496e-05, + "loss": 0.7533, + "step": 10808 + }, + { + "epoch": 1.72, + "learning_rate": 3.8121750989139205e-05, + "loss": 0.7636, + "step": 10809 + }, + { + "epoch": 1.72, + "learning_rate": 3.8119554956074224e-05, + "loss": 0.8231, + "step": 10810 + }, + { + "epoch": 1.72, + "learning_rate": 3.811735878329394e-05, + "loss": 0.8379, + "step": 10811 + }, + { + "epoch": 1.72, + "learning_rate": 3.8115162470821766e-05, + "loss": 0.8546, + "step": 10812 + }, + { + "epoch": 1.72, + "learning_rate": 3.8112966018681054e-05, + "loss": 0.7903, + "step": 10813 + }, + { + "epoch": 1.72, + "learning_rate": 3.811076942689523e-05, + "loss": 0.8009, + "step": 10814 + }, + { + "epoch": 1.72, + "learning_rate": 3.810857269548767e-05, + "loss": 0.8115, + "step": 10815 + }, + { + "epoch": 1.72, + "learning_rate": 3.810637582448176e-05, + "loss": 0.8641, + "step": 10816 + }, + { + "epoch": 1.72, + "learning_rate": 3.8104178813900915e-05, + "loss": 0.8988, + "step": 10817 + }, + { + "epoch": 1.72, + "learning_rate": 3.8101981663768516e-05, + "loss": 0.8661, + "step": 10818 + }, + { + "epoch": 1.72, + "learning_rate": 3.809978437410797e-05, + "loss": 0.8083, + "step": 10819 + }, + { + "epoch": 1.72, + "learning_rate": 3.809758694494268e-05, + "loss": 0.8451, + "step": 10820 + }, + { + "epoch": 1.72, + "learning_rate": 3.809538937629602e-05, + "loss": 0.8512, + "step": 10821 + }, + { + "epoch": 1.72, + "learning_rate": 3.809319166819143e-05, + "loss": 0.7616, + "step": 10822 + }, + { + "epoch": 1.72, + "learning_rate": 3.8090993820652295e-05, + "loss": 0.8648, + "step": 10823 + }, + { + "epoch": 1.72, + "learning_rate": 3.8088795833702024e-05, + "loss": 0.8208, + "step": 10824 + }, + { + "epoch": 1.72, + "learning_rate": 3.808659770736401e-05, + "loss": 0.8306, + "step": 10825 + }, + { + "epoch": 1.72, + "learning_rate": 3.80843994416617e-05, + "loss": 0.7388, + "step": 10826 + }, + { + "epoch": 1.72, + "learning_rate": 3.808220103661846e-05, + "loss": 0.7909, + "step": 10827 + }, + { + "epoch": 1.73, + "learning_rate": 3.8080002492257724e-05, + "loss": 0.8337, + "step": 10828 + }, + { + "epoch": 1.73, + "learning_rate": 3.807780380860292e-05, + "loss": 0.8038, + "step": 10829 + }, + { + "epoch": 1.73, + "learning_rate": 3.8075604985677424e-05, + "loss": 0.8569, + "step": 10830 + }, + { + "epoch": 1.73, + "learning_rate": 3.807340602350469e-05, + "loss": 0.9329, + "step": 10831 + }, + { + "epoch": 1.73, + "learning_rate": 3.8071206922108105e-05, + "loss": 0.7778, + "step": 10832 + }, + { + "epoch": 1.73, + "learning_rate": 3.8069007681511105e-05, + "loss": 0.8298, + "step": 10833 + }, + { + "epoch": 1.73, + "learning_rate": 3.806680830173711e-05, + "loss": 0.8499, + "step": 10834 + }, + { + "epoch": 1.73, + "learning_rate": 3.8064608782809544e-05, + "loss": 0.8564, + "step": 10835 + }, + { + "epoch": 1.73, + "learning_rate": 3.806240912475182e-05, + "loss": 0.8246, + "step": 10836 + }, + { + "epoch": 1.73, + "learning_rate": 3.806020932758737e-05, + "loss": 0.8855, + "step": 10837 + }, + { + "epoch": 1.73, + "learning_rate": 3.805800939133962e-05, + "loss": 0.8525, + "step": 10838 + }, + { + "epoch": 1.73, + "learning_rate": 3.8055809316032e-05, + "loss": 0.8372, + "step": 10839 + }, + { + "epoch": 1.73, + "learning_rate": 3.8053609101687934e-05, + "loss": 0.8543, + "step": 10840 + }, + { + "epoch": 1.73, + "learning_rate": 3.805140874833086e-05, + "loss": 0.7795, + "step": 10841 + }, + { + "epoch": 1.73, + "learning_rate": 3.80492082559842e-05, + "loss": 0.8881, + "step": 10842 + }, + { + "epoch": 1.73, + "learning_rate": 3.8047007624671406e-05, + "loss": 0.8027, + "step": 10843 + }, + { + "epoch": 1.73, + "learning_rate": 3.804480685441589e-05, + "loss": 0.8421, + "step": 10844 + }, + { + "epoch": 1.73, + "learning_rate": 3.8042605945241106e-05, + "loss": 0.8132, + "step": 10845 + }, + { + "epoch": 1.73, + "learning_rate": 3.804040489717049e-05, + "loss": 0.8087, + "step": 10846 + }, + { + "epoch": 1.73, + "learning_rate": 3.803820371022747e-05, + "loss": 0.8648, + "step": 10847 + }, + { + "epoch": 1.73, + "learning_rate": 3.80360023844355e-05, + "loss": 0.8957, + "step": 10848 + }, + { + "epoch": 1.73, + "learning_rate": 3.8033800919818016e-05, + "loss": 0.7711, + "step": 10849 + }, + { + "epoch": 1.73, + "learning_rate": 3.803159931639848e-05, + "loss": 0.8141, + "step": 10850 + }, + { + "epoch": 1.73, + "learning_rate": 3.802939757420031e-05, + "loss": 0.9109, + "step": 10851 + }, + { + "epoch": 1.73, + "learning_rate": 3.802719569324697e-05, + "loss": 0.8078, + "step": 10852 + }, + { + "epoch": 1.73, + "learning_rate": 3.8024993673561904e-05, + "loss": 0.7417, + "step": 10853 + }, + { + "epoch": 1.73, + "learning_rate": 3.802279151516856e-05, + "loss": 0.9154, + "step": 10854 + }, + { + "epoch": 1.73, + "learning_rate": 3.8020589218090394e-05, + "loss": 0.8298, + "step": 10855 + }, + { + "epoch": 1.73, + "learning_rate": 3.801838678235086e-05, + "loss": 0.883, + "step": 10856 + }, + { + "epoch": 1.73, + "learning_rate": 3.80161842079734e-05, + "loss": 0.7773, + "step": 10857 + }, + { + "epoch": 1.73, + "learning_rate": 3.80139814949815e-05, + "loss": 0.8678, + "step": 10858 + }, + { + "epoch": 1.73, + "learning_rate": 3.801177864339859e-05, + "loss": 0.7978, + "step": 10859 + }, + { + "epoch": 1.73, + "learning_rate": 3.8009575653248134e-05, + "loss": 0.8966, + "step": 10860 + }, + { + "epoch": 1.73, + "learning_rate": 3.8007372524553594e-05, + "loss": 0.8715, + "step": 10861 + }, + { + "epoch": 1.73, + "learning_rate": 3.800516925733844e-05, + "loss": 0.8746, + "step": 10862 + }, + { + "epoch": 1.73, + "learning_rate": 3.800296585162613e-05, + "loss": 0.7695, + "step": 10863 + }, + { + "epoch": 1.73, + "learning_rate": 3.800076230744012e-05, + "loss": 0.847, + "step": 10864 + }, + { + "epoch": 1.73, + "learning_rate": 3.799855862480388e-05, + "loss": 0.8896, + "step": 10865 + }, + { + "epoch": 1.73, + "learning_rate": 3.79963548037409e-05, + "loss": 0.8438, + "step": 10866 + }, + { + "epoch": 1.73, + "learning_rate": 3.7994150844274614e-05, + "loss": 0.8366, + "step": 10867 + }, + { + "epoch": 1.73, + "learning_rate": 3.799194674642852e-05, + "loss": 0.7369, + "step": 10868 + }, + { + "epoch": 1.73, + "learning_rate": 3.798974251022608e-05, + "loss": 0.8739, + "step": 10869 + }, + { + "epoch": 1.73, + "learning_rate": 3.798753813569077e-05, + "loss": 0.8345, + "step": 10870 + }, + { + "epoch": 1.73, + "learning_rate": 3.798533362284606e-05, + "loss": 0.7731, + "step": 10871 + }, + { + "epoch": 1.73, + "learning_rate": 3.7983128971715434e-05, + "loss": 0.8093, + "step": 10872 + }, + { + "epoch": 1.73, + "learning_rate": 3.798092418232236e-05, + "loss": 0.8947, + "step": 10873 + }, + { + "epoch": 1.73, + "learning_rate": 3.797871925469033e-05, + "loss": 0.8013, + "step": 10874 + }, + { + "epoch": 1.73, + "learning_rate": 3.7976514188842814e-05, + "loss": 0.8231, + "step": 10875 + }, + { + "epoch": 1.73, + "learning_rate": 3.7974308984803305e-05, + "loss": 0.8672, + "step": 10876 + }, + { + "epoch": 1.73, + "learning_rate": 3.797210364259528e-05, + "loss": 0.8706, + "step": 10877 + }, + { + "epoch": 1.73, + "learning_rate": 3.796989816224223e-05, + "loss": 0.8119, + "step": 10878 + }, + { + "epoch": 1.73, + "learning_rate": 3.7967692543767644e-05, + "loss": 0.8384, + "step": 10879 + }, + { + "epoch": 1.73, + "learning_rate": 3.796548678719499e-05, + "loss": 0.8283, + "step": 10880 + }, + { + "epoch": 1.73, + "learning_rate": 3.796328089254779e-05, + "loss": 0.7538, + "step": 10881 + }, + { + "epoch": 1.73, + "learning_rate": 3.79610748598495e-05, + "loss": 0.8543, + "step": 10882 + }, + { + "epoch": 1.73, + "learning_rate": 3.795886868912365e-05, + "loss": 0.8599, + "step": 10883 + }, + { + "epoch": 1.73, + "learning_rate": 3.7956662380393706e-05, + "loss": 0.8173, + "step": 10884 + }, + { + "epoch": 1.73, + "learning_rate": 3.795445593368317e-05, + "loss": 0.8452, + "step": 10885 + }, + { + "epoch": 1.73, + "learning_rate": 3.795224934901555e-05, + "loss": 0.8781, + "step": 10886 + }, + { + "epoch": 1.73, + "learning_rate": 3.795004262641434e-05, + "loss": 0.8355, + "step": 10887 + }, + { + "epoch": 1.73, + "learning_rate": 3.7947835765903026e-05, + "loss": 0.7854, + "step": 10888 + }, + { + "epoch": 1.73, + "learning_rate": 3.7945628767505134e-05, + "loss": 0.7673, + "step": 10889 + }, + { + "epoch": 1.73, + "learning_rate": 3.794342163124416e-05, + "loss": 0.8702, + "step": 10890 + }, + { + "epoch": 1.74, + "learning_rate": 3.7941214357143595e-05, + "loss": 0.8185, + "step": 10891 + }, + { + "epoch": 1.74, + "learning_rate": 3.793900694522695e-05, + "loss": 0.7836, + "step": 10892 + }, + { + "epoch": 1.74, + "learning_rate": 3.793679939551775e-05, + "loss": 0.8549, + "step": 10893 + }, + { + "epoch": 1.74, + "learning_rate": 3.793459170803947e-05, + "loss": 0.8283, + "step": 10894 + }, + { + "epoch": 1.74, + "learning_rate": 3.793238388281566e-05, + "loss": 0.811, + "step": 10895 + }, + { + "epoch": 1.74, + "learning_rate": 3.79301759198698e-05, + "loss": 0.792, + "step": 10896 + }, + { + "epoch": 1.74, + "learning_rate": 3.792796781922542e-05, + "loss": 0.8085, + "step": 10897 + }, + { + "epoch": 1.74, + "learning_rate": 3.792575958090604e-05, + "loss": 0.9088, + "step": 10898 + }, + { + "epoch": 1.74, + "learning_rate": 3.792355120493516e-05, + "loss": 0.867, + "step": 10899 + }, + { + "epoch": 1.74, + "learning_rate": 3.792134269133631e-05, + "loss": 0.8211, + "step": 10900 + }, + { + "epoch": 1.74, + "learning_rate": 3.791913404013301e-05, + "loss": 0.9079, + "step": 10901 + }, + { + "epoch": 1.74, + "learning_rate": 3.791692525134876e-05, + "loss": 0.877, + "step": 10902 + }, + { + "epoch": 1.74, + "learning_rate": 3.7914716325007106e-05, + "loss": 0.8612, + "step": 10903 + }, + { + "epoch": 1.74, + "learning_rate": 3.791250726113157e-05, + "loss": 0.7681, + "step": 10904 + }, + { + "epoch": 1.74, + "learning_rate": 3.7910298059745655e-05, + "loss": 0.8528, + "step": 10905 + }, + { + "epoch": 1.74, + "learning_rate": 3.790808872087292e-05, + "loss": 0.8644, + "step": 10906 + }, + { + "epoch": 1.74, + "learning_rate": 3.790587924453687e-05, + "loss": 0.891, + "step": 10907 + }, + { + "epoch": 1.74, + "learning_rate": 3.790366963076104e-05, + "loss": 0.7894, + "step": 10908 + }, + { + "epoch": 1.74, + "learning_rate": 3.790145987956897e-05, + "loss": 0.877, + "step": 10909 + }, + { + "epoch": 1.74, + "learning_rate": 3.789924999098418e-05, + "loss": 0.873, + "step": 10910 + }, + { + "epoch": 1.74, + "learning_rate": 3.789703996503021e-05, + "loss": 0.7926, + "step": 10911 + }, + { + "epoch": 1.74, + "learning_rate": 3.789482980173059e-05, + "loss": 0.8211, + "step": 10912 + }, + { + "epoch": 1.74, + "learning_rate": 3.789261950110887e-05, + "loss": 0.726, + "step": 10913 + }, + { + "epoch": 1.74, + "learning_rate": 3.789040906318857e-05, + "loss": 0.8514, + "step": 10914 + }, + { + "epoch": 1.74, + "learning_rate": 3.788819848799324e-05, + "loss": 0.7953, + "step": 10915 + }, + { + "epoch": 1.74, + "learning_rate": 3.788598777554643e-05, + "loss": 0.7811, + "step": 10916 + }, + { + "epoch": 1.74, + "learning_rate": 3.788377692587166e-05, + "loss": 0.8094, + "step": 10917 + }, + { + "epoch": 1.74, + "learning_rate": 3.78815659389925e-05, + "loss": 0.7881, + "step": 10918 + }, + { + "epoch": 1.74, + "learning_rate": 3.7879354814932475e-05, + "loss": 0.7782, + "step": 10919 + }, + { + "epoch": 1.74, + "learning_rate": 3.787714355371514e-05, + "loss": 0.8472, + "step": 10920 + }, + { + "epoch": 1.74, + "learning_rate": 3.787493215536406e-05, + "loss": 0.8801, + "step": 10921 + }, + { + "epoch": 1.74, + "learning_rate": 3.7872720619902745e-05, + "loss": 0.8034, + "step": 10922 + }, + { + "epoch": 1.74, + "learning_rate": 3.787050894735479e-05, + "loss": 0.8361, + "step": 10923 + }, + { + "epoch": 1.74, + "learning_rate": 3.786829713774372e-05, + "loss": 0.8243, + "step": 10924 + }, + { + "epoch": 1.74, + "learning_rate": 3.78660851910931e-05, + "loss": 0.8256, + "step": 10925 + }, + { + "epoch": 1.74, + "learning_rate": 3.7863873107426486e-05, + "loss": 0.8277, + "step": 10926 + }, + { + "epoch": 1.74, + "learning_rate": 3.7861660886767426e-05, + "loss": 1.1501, + "step": 10927 + }, + { + "epoch": 1.74, + "learning_rate": 3.7859448529139487e-05, + "loss": 0.7731, + "step": 10928 + }, + { + "epoch": 1.74, + "learning_rate": 3.7857236034566235e-05, + "loss": 0.8764, + "step": 10929 + }, + { + "epoch": 1.74, + "learning_rate": 3.7855023403071225e-05, + "loss": 0.9226, + "step": 10930 + }, + { + "epoch": 1.74, + "learning_rate": 3.785281063467802e-05, + "loss": 0.7703, + "step": 10931 + }, + { + "epoch": 1.74, + "learning_rate": 3.785059772941018e-05, + "loss": 0.8727, + "step": 10932 + }, + { + "epoch": 1.74, + "learning_rate": 3.7848384687291286e-05, + "loss": 0.7889, + "step": 10933 + }, + { + "epoch": 1.74, + "learning_rate": 3.7846171508344876e-05, + "loss": 0.805, + "step": 10934 + }, + { + "epoch": 1.74, + "learning_rate": 3.7843958192594554e-05, + "loss": 0.879, + "step": 10935 + }, + { + "epoch": 1.74, + "learning_rate": 3.784174474006387e-05, + "loss": 0.8708, + "step": 10936 + }, + { + "epoch": 1.74, + "learning_rate": 3.78395311507764e-05, + "loss": 0.8427, + "step": 10937 + }, + { + "epoch": 1.74, + "learning_rate": 3.783731742475573e-05, + "loss": 0.8061, + "step": 10938 + }, + { + "epoch": 1.74, + "learning_rate": 3.783510356202541e-05, + "loss": 0.7227, + "step": 10939 + }, + { + "epoch": 1.74, + "learning_rate": 3.783288956260903e-05, + "loss": 0.8438, + "step": 10940 + }, + { + "epoch": 1.74, + "learning_rate": 3.783067542653017e-05, + "loss": 0.7718, + "step": 10941 + }, + { + "epoch": 1.74, + "learning_rate": 3.7828461153812413e-05, + "loss": 0.7192, + "step": 10942 + }, + { + "epoch": 1.74, + "learning_rate": 3.782624674447932e-05, + "loss": 0.8104, + "step": 10943 + }, + { + "epoch": 1.74, + "learning_rate": 3.7824032198554494e-05, + "loss": 0.7905, + "step": 10944 + }, + { + "epoch": 1.74, + "learning_rate": 3.782181751606151e-05, + "loss": 0.7459, + "step": 10945 + }, + { + "epoch": 1.74, + "learning_rate": 3.7819602697023946e-05, + "loss": 0.8022, + "step": 10946 + }, + { + "epoch": 1.74, + "learning_rate": 3.781738774146541e-05, + "loss": 0.7813, + "step": 10947 + }, + { + "epoch": 1.74, + "learning_rate": 3.781517264940947e-05, + "loss": 0.7545, + "step": 10948 + }, + { + "epoch": 1.74, + "learning_rate": 3.7812957420879715e-05, + "loss": 0.8412, + "step": 10949 + }, + { + "epoch": 1.74, + "learning_rate": 3.781074205589975e-05, + "loss": 0.8236, + "step": 10950 + }, + { + "epoch": 1.74, + "learning_rate": 3.7808526554493165e-05, + "loss": 0.8585, + "step": 10951 + }, + { + "epoch": 1.74, + "learning_rate": 3.780631091668354e-05, + "loss": 0.8149, + "step": 10952 + }, + { + "epoch": 1.74, + "learning_rate": 3.7804095142494475e-05, + "loss": 0.7814, + "step": 10953 + }, + { + "epoch": 1.75, + "learning_rate": 3.7801879231949575e-05, + "loss": 0.8509, + "step": 10954 + }, + { + "epoch": 1.75, + "learning_rate": 3.7799663185072446e-05, + "loss": 0.7803, + "step": 10955 + }, + { + "epoch": 1.75, + "learning_rate": 3.7797447001886654e-05, + "loss": 0.8095, + "step": 10956 + }, + { + "epoch": 1.75, + "learning_rate": 3.779523068241583e-05, + "loss": 0.839, + "step": 10957 + }, + { + "epoch": 1.75, + "learning_rate": 3.779301422668357e-05, + "loss": 0.8022, + "step": 10958 + }, + { + "epoch": 1.75, + "learning_rate": 3.779079763471347e-05, + "loss": 0.7673, + "step": 10959 + }, + { + "epoch": 1.75, + "learning_rate": 3.778858090652914e-05, + "loss": 0.7638, + "step": 10960 + }, + { + "epoch": 1.75, + "learning_rate": 3.77863640421542e-05, + "loss": 0.8766, + "step": 10961 + }, + { + "epoch": 1.75, + "learning_rate": 3.778414704161224e-05, + "loss": 0.8695, + "step": 10962 + }, + { + "epoch": 1.75, + "learning_rate": 3.778192990492687e-05, + "loss": 0.7944, + "step": 10963 + }, + { + "epoch": 1.75, + "learning_rate": 3.777971263212171e-05, + "loss": 0.8618, + "step": 10964 + }, + { + "epoch": 1.75, + "learning_rate": 3.777749522322036e-05, + "loss": 0.8204, + "step": 10965 + }, + { + "epoch": 1.75, + "learning_rate": 3.777527767824645e-05, + "loss": 0.8726, + "step": 10966 + }, + { + "epoch": 1.75, + "learning_rate": 3.777305999722359e-05, + "loss": 0.7499, + "step": 10967 + }, + { + "epoch": 1.75, + "learning_rate": 3.77708421801754e-05, + "loss": 0.7762, + "step": 10968 + }, + { + "epoch": 1.75, + "learning_rate": 3.776862422712549e-05, + "loss": 0.7622, + "step": 10969 + }, + { + "epoch": 1.75, + "learning_rate": 3.776640613809748e-05, + "loss": 0.8617, + "step": 10970 + }, + { + "epoch": 1.75, + "learning_rate": 3.7764187913115e-05, + "loss": 0.788, + "step": 10971 + }, + { + "epoch": 1.75, + "learning_rate": 3.7761969552201673e-05, + "loss": 0.8029, + "step": 10972 + }, + { + "epoch": 1.75, + "learning_rate": 3.775975105538111e-05, + "loss": 0.843, + "step": 10973 + }, + { + "epoch": 1.75, + "learning_rate": 3.775753242267694e-05, + "loss": 0.8634, + "step": 10974 + }, + { + "epoch": 1.75, + "learning_rate": 3.7755313654112814e-05, + "loss": 0.8407, + "step": 10975 + }, + { + "epoch": 1.75, + "learning_rate": 3.7753094749712324e-05, + "loss": 0.8811, + "step": 10976 + }, + { + "epoch": 1.75, + "learning_rate": 3.775087570949912e-05, + "loss": 0.7856, + "step": 10977 + }, + { + "epoch": 1.75, + "learning_rate": 3.774865653349684e-05, + "loss": 0.8232, + "step": 10978 + }, + { + "epoch": 1.75, + "learning_rate": 3.7746437221729105e-05, + "loss": 0.7712, + "step": 10979 + }, + { + "epoch": 1.75, + "learning_rate": 3.7744217774219546e-05, + "loss": 0.7901, + "step": 10980 + }, + { + "epoch": 1.75, + "learning_rate": 3.7741998190991815e-05, + "loss": 0.7793, + "step": 10981 + }, + { + "epoch": 1.75, + "learning_rate": 3.7739778472069534e-05, + "loss": 0.8425, + "step": 10982 + }, + { + "epoch": 1.75, + "learning_rate": 3.7737558617476334e-05, + "loss": 0.8197, + "step": 10983 + }, + { + "epoch": 1.75, + "learning_rate": 3.773533862723588e-05, + "loss": 0.839, + "step": 10984 + }, + { + "epoch": 1.75, + "learning_rate": 3.77331185013718e-05, + "loss": 0.8183, + "step": 10985 + }, + { + "epoch": 1.75, + "learning_rate": 3.773089823990774e-05, + "loss": 0.8629, + "step": 10986 + }, + { + "epoch": 1.75, + "learning_rate": 3.772867784286734e-05, + "loss": 0.7948, + "step": 10987 + }, + { + "epoch": 1.75, + "learning_rate": 3.7726457310274255e-05, + "loss": 0.8208, + "step": 10988 + }, + { + "epoch": 1.75, + "learning_rate": 3.7724236642152115e-05, + "loss": 0.8772, + "step": 10989 + }, + { + "epoch": 1.75, + "learning_rate": 3.7722015838524586e-05, + "loss": 0.7749, + "step": 10990 + }, + { + "epoch": 1.75, + "learning_rate": 3.7719794899415316e-05, + "loss": 0.8147, + "step": 10991 + }, + { + "epoch": 1.75, + "learning_rate": 3.7717573824847954e-05, + "loss": 0.7357, + "step": 10992 + }, + { + "epoch": 1.75, + "learning_rate": 3.771535261484614e-05, + "loss": 0.9187, + "step": 10993 + }, + { + "epoch": 1.75, + "learning_rate": 3.771313126943355e-05, + "loss": 0.8256, + "step": 10994 + }, + { + "epoch": 1.75, + "learning_rate": 3.771090978863383e-05, + "loss": 0.836, + "step": 10995 + }, + { + "epoch": 1.75, + "learning_rate": 3.770868817247063e-05, + "loss": 0.7384, + "step": 10996 + }, + { + "epoch": 1.75, + "learning_rate": 3.7706466420967625e-05, + "loss": 0.7841, + "step": 10997 + }, + { + "epoch": 1.75, + "learning_rate": 3.770424453414846e-05, + "loss": 0.8306, + "step": 10998 + }, + { + "epoch": 1.75, + "learning_rate": 3.770202251203681e-05, + "loss": 0.9061, + "step": 10999 + }, + { + "epoch": 1.75, + "learning_rate": 3.7699800354656335e-05, + "loss": 0.7076, + "step": 11000 + }, + { + "epoch": 1.75, + "learning_rate": 3.7697578062030685e-05, + "loss": 0.8926, + "step": 11001 + }, + { + "epoch": 1.75, + "learning_rate": 3.769535563418355e-05, + "loss": 0.8353, + "step": 11002 + }, + { + "epoch": 1.75, + "learning_rate": 3.7693133071138574e-05, + "loss": 0.8405, + "step": 11003 + }, + { + "epoch": 1.75, + "learning_rate": 3.7690910372919444e-05, + "loss": 0.8421, + "step": 11004 + }, + { + "epoch": 1.75, + "learning_rate": 3.768868753954982e-05, + "loss": 0.8132, + "step": 11005 + }, + { + "epoch": 1.75, + "learning_rate": 3.768646457105337e-05, + "loss": 0.8075, + "step": 11006 + }, + { + "epoch": 1.75, + "learning_rate": 3.7684241467453787e-05, + "loss": 0.8113, + "step": 11007 + }, + { + "epoch": 1.75, + "learning_rate": 3.768201822877473e-05, + "loss": 0.8324, + "step": 11008 + }, + { + "epoch": 1.75, + "learning_rate": 3.767979485503988e-05, + "loss": 0.8429, + "step": 11009 + }, + { + "epoch": 1.75, + "learning_rate": 3.76775713462729e-05, + "loss": 0.8797, + "step": 11010 + }, + { + "epoch": 1.75, + "learning_rate": 3.76753477024975e-05, + "loss": 0.8186, + "step": 11011 + }, + { + "epoch": 1.75, + "learning_rate": 3.767312392373733e-05, + "loss": 0.8252, + "step": 11012 + }, + { + "epoch": 1.75, + "learning_rate": 3.767090001001609e-05, + "loss": 0.8615, + "step": 11013 + }, + { + "epoch": 1.75, + "learning_rate": 3.766867596135746e-05, + "loss": 0.8405, + "step": 11014 + }, + { + "epoch": 1.75, + "learning_rate": 3.766645177778511e-05, + "loss": 0.7627, + "step": 11015 + }, + { + "epoch": 1.75, + "learning_rate": 3.766422745932275e-05, + "loss": 0.8066, + "step": 11016 + }, + { + "epoch": 1.76, + "learning_rate": 3.7662003005994054e-05, + "loss": 0.7856, + "step": 11017 + }, + { + "epoch": 1.76, + "learning_rate": 3.7659778417822706e-05, + "loss": 0.7585, + "step": 11018 + }, + { + "epoch": 1.76, + "learning_rate": 3.765755369483242e-05, + "loss": 0.8165, + "step": 11019 + }, + { + "epoch": 1.76, + "learning_rate": 3.765532883704686e-05, + "loss": 0.7826, + "step": 11020 + }, + { + "epoch": 1.76, + "learning_rate": 3.765310384448973e-05, + "loss": 0.8244, + "step": 11021 + }, + { + "epoch": 1.76, + "learning_rate": 3.765087871718473e-05, + "loss": 0.8052, + "step": 11022 + }, + { + "epoch": 1.76, + "learning_rate": 3.764865345515555e-05, + "loss": 0.8054, + "step": 11023 + }, + { + "epoch": 1.76, + "learning_rate": 3.764642805842589e-05, + "loss": 0.7852, + "step": 11024 + }, + { + "epoch": 1.76, + "learning_rate": 3.764420252701945e-05, + "loss": 0.8262, + "step": 11025 + }, + { + "epoch": 1.76, + "learning_rate": 3.764197686095993e-05, + "loss": 0.8476, + "step": 11026 + }, + { + "epoch": 1.76, + "learning_rate": 3.763975106027102e-05, + "loss": 0.8652, + "step": 11027 + }, + { + "epoch": 1.76, + "learning_rate": 3.7637525124976454e-05, + "loss": 0.9272, + "step": 11028 + }, + { + "epoch": 1.76, + "learning_rate": 3.7635299055099906e-05, + "loss": 0.8247, + "step": 11029 + }, + { + "epoch": 1.76, + "learning_rate": 3.76330728506651e-05, + "loss": 0.8559, + "step": 11030 + }, + { + "epoch": 1.76, + "learning_rate": 3.763084651169574e-05, + "loss": 0.8271, + "step": 11031 + }, + { + "epoch": 1.76, + "learning_rate": 3.762862003821552e-05, + "loss": 0.8044, + "step": 11032 + }, + { + "epoch": 1.76, + "learning_rate": 3.7626393430248173e-05, + "loss": 0.8392, + "step": 11033 + }, + { + "epoch": 1.76, + "learning_rate": 3.762416668781741e-05, + "loss": 0.7668, + "step": 11034 + }, + { + "epoch": 1.76, + "learning_rate": 3.7621939810946924e-05, + "loss": 0.8168, + "step": 11035 + }, + { + "epoch": 1.76, + "learning_rate": 3.761971279966044e-05, + "loss": 0.7892, + "step": 11036 + }, + { + "epoch": 1.76, + "learning_rate": 3.7617485653981686e-05, + "loss": 0.7546, + "step": 11037 + }, + { + "epoch": 1.76, + "learning_rate": 3.761525837393436e-05, + "loss": 0.7565, + "step": 11038 + }, + { + "epoch": 1.76, + "learning_rate": 3.76130309595422e-05, + "loss": 0.7644, + "step": 11039 + }, + { + "epoch": 1.76, + "learning_rate": 3.7610803410828913e-05, + "loss": 0.7605, + "step": 11040 + }, + { + "epoch": 1.76, + "learning_rate": 3.7608575727818226e-05, + "loss": 0.8393, + "step": 11041 + }, + { + "epoch": 1.76, + "learning_rate": 3.760634791053387e-05, + "loss": 0.8655, + "step": 11042 + }, + { + "epoch": 1.76, + "learning_rate": 3.760411995899955e-05, + "loss": 0.827, + "step": 11043 + }, + { + "epoch": 1.76, + "learning_rate": 3.7601891873239014e-05, + "loss": 0.8022, + "step": 11044 + }, + { + "epoch": 1.76, + "learning_rate": 3.759966365327598e-05, + "loss": 0.7179, + "step": 11045 + }, + { + "epoch": 1.76, + "learning_rate": 3.759743529913417e-05, + "loss": 0.748, + "step": 11046 + }, + { + "epoch": 1.76, + "learning_rate": 3.7595206810837324e-05, + "loss": 0.8672, + "step": 11047 + }, + { + "epoch": 1.76, + "learning_rate": 3.759297818840918e-05, + "loss": 0.7885, + "step": 11048 + }, + { + "epoch": 1.76, + "learning_rate": 3.759074943187346e-05, + "loss": 0.8313, + "step": 11049 + }, + { + "epoch": 1.76, + "learning_rate": 3.75885205412539e-05, + "loss": 0.8215, + "step": 11050 + }, + { + "epoch": 1.76, + "learning_rate": 3.758629151657425e-05, + "loss": 0.8903, + "step": 11051 + }, + { + "epoch": 1.76, + "learning_rate": 3.758406235785822e-05, + "loss": 0.7651, + "step": 11052 + }, + { + "epoch": 1.76, + "learning_rate": 3.7581833065129566e-05, + "loss": 0.8049, + "step": 11053 + }, + { + "epoch": 1.76, + "learning_rate": 3.757960363841204e-05, + "loss": 0.7731, + "step": 11054 + }, + { + "epoch": 1.76, + "learning_rate": 3.757737407772937e-05, + "loss": 0.9158, + "step": 11055 + }, + { + "epoch": 1.76, + "learning_rate": 3.757514438310529e-05, + "loss": 0.8036, + "step": 11056 + }, + { + "epoch": 1.76, + "learning_rate": 3.757291455456357e-05, + "loss": 0.8066, + "step": 11057 + }, + { + "epoch": 1.76, + "learning_rate": 3.7570684592127935e-05, + "loss": 0.8052, + "step": 11058 + }, + { + "epoch": 1.76, + "learning_rate": 3.7568454495822146e-05, + "loss": 0.8406, + "step": 11059 + }, + { + "epoch": 1.76, + "learning_rate": 3.7566224265669944e-05, + "loss": 0.9023, + "step": 11060 + }, + { + "epoch": 1.76, + "learning_rate": 3.756399390169509e-05, + "loss": 0.9261, + "step": 11061 + }, + { + "epoch": 1.76, + "learning_rate": 3.7561763403921324e-05, + "loss": 0.9395, + "step": 11062 + }, + { + "epoch": 1.76, + "learning_rate": 3.7559532772372407e-05, + "loss": 0.811, + "step": 11063 + }, + { + "epoch": 1.76, + "learning_rate": 3.755730200707209e-05, + "loss": 0.7956, + "step": 11064 + }, + { + "epoch": 1.76, + "learning_rate": 3.7555071108044124e-05, + "loss": 0.8168, + "step": 11065 + }, + { + "epoch": 1.76, + "learning_rate": 3.755284007531228e-05, + "loss": 0.7713, + "step": 11066 + }, + { + "epoch": 1.76, + "learning_rate": 3.755060890890031e-05, + "loss": 0.8931, + "step": 11067 + }, + { + "epoch": 1.76, + "learning_rate": 3.7548377608831966e-05, + "loss": 0.8774, + "step": 11068 + }, + { + "epoch": 1.76, + "learning_rate": 3.754614617513103e-05, + "loss": 0.8326, + "step": 11069 + }, + { + "epoch": 1.76, + "learning_rate": 3.7543914607821244e-05, + "loss": 0.8341, + "step": 11070 + }, + { + "epoch": 1.76, + "learning_rate": 3.754168290692639e-05, + "loss": 0.8754, + "step": 11071 + }, + { + "epoch": 1.76, + "learning_rate": 3.7539451072470236e-05, + "loss": 0.8473, + "step": 11072 + }, + { + "epoch": 1.76, + "learning_rate": 3.753721910447653e-05, + "loss": 0.7992, + "step": 11073 + }, + { + "epoch": 1.76, + "learning_rate": 3.7534987002969044e-05, + "loss": 0.8206, + "step": 11074 + }, + { + "epoch": 1.76, + "learning_rate": 3.7532754767971565e-05, + "loss": 0.8847, + "step": 11075 + }, + { + "epoch": 1.76, + "learning_rate": 3.753052239950786e-05, + "loss": 0.8317, + "step": 11076 + }, + { + "epoch": 1.76, + "learning_rate": 3.7528289897601696e-05, + "loss": 0.8547, + "step": 11077 + }, + { + "epoch": 1.76, + "learning_rate": 3.752605726227685e-05, + "loss": 0.7869, + "step": 11078 + }, + { + "epoch": 1.77, + "learning_rate": 3.75238244935571e-05, + "loss": 0.8123, + "step": 11079 + }, + { + "epoch": 1.77, + "learning_rate": 3.752159159146622e-05, + "loss": 0.8671, + "step": 11080 + }, + { + "epoch": 1.77, + "learning_rate": 3.751935855602799e-05, + "loss": 0.7655, + "step": 11081 + }, + { + "epoch": 1.77, + "learning_rate": 3.75171253872662e-05, + "loss": 0.772, + "step": 11082 + }, + { + "epoch": 1.77, + "learning_rate": 3.7514892085204623e-05, + "loss": 0.853, + "step": 11083 + }, + { + "epoch": 1.77, + "learning_rate": 3.751265864986704e-05, + "loss": 0.8041, + "step": 11084 + }, + { + "epoch": 1.77, + "learning_rate": 3.751042508127724e-05, + "loss": 0.8888, + "step": 11085 + }, + { + "epoch": 1.77, + "learning_rate": 3.750819137945901e-05, + "loss": 0.7996, + "step": 11086 + }, + { + "epoch": 1.77, + "learning_rate": 3.7505957544436124e-05, + "loss": 0.8797, + "step": 11087 + }, + { + "epoch": 1.77, + "learning_rate": 3.7503723576232395e-05, + "loss": 0.8137, + "step": 11088 + }, + { + "epoch": 1.77, + "learning_rate": 3.75014894748716e-05, + "loss": 0.7755, + "step": 11089 + }, + { + "epoch": 1.77, + "learning_rate": 3.749925524037753e-05, + "loss": 0.8699, + "step": 11090 + }, + { + "epoch": 1.77, + "learning_rate": 3.749702087277398e-05, + "loss": 0.8374, + "step": 11091 + }, + { + "epoch": 1.77, + "learning_rate": 3.749478637208474e-05, + "loss": 0.8407, + "step": 11092 + }, + { + "epoch": 1.77, + "learning_rate": 3.749255173833362e-05, + "loss": 0.8202, + "step": 11093 + }, + { + "epoch": 1.77, + "learning_rate": 3.7490316971544405e-05, + "loss": 0.7887, + "step": 11094 + }, + { + "epoch": 1.77, + "learning_rate": 3.7488082071740894e-05, + "loss": 0.8305, + "step": 11095 + }, + { + "epoch": 1.77, + "learning_rate": 3.7485847038946895e-05, + "loss": 0.7416, + "step": 11096 + }, + { + "epoch": 1.77, + "learning_rate": 3.7483611873186206e-05, + "loss": 0.8102, + "step": 11097 + }, + { + "epoch": 1.77, + "learning_rate": 3.748137657448262e-05, + "loss": 0.895, + "step": 11098 + }, + { + "epoch": 1.77, + "learning_rate": 3.747914114285996e-05, + "loss": 0.837, + "step": 11099 + }, + { + "epoch": 1.77, + "learning_rate": 3.747690557834202e-05, + "loss": 0.789, + "step": 11100 + }, + { + "epoch": 1.77, + "learning_rate": 3.747466988095261e-05, + "loss": 0.8363, + "step": 11101 + }, + { + "epoch": 1.77, + "learning_rate": 3.7472434050715546e-05, + "loss": 0.9513, + "step": 11102 + }, + { + "epoch": 1.77, + "learning_rate": 3.747019808765463e-05, + "loss": 0.8575, + "step": 11103 + }, + { + "epoch": 1.77, + "learning_rate": 3.7467961991793664e-05, + "loss": 0.7847, + "step": 11104 + }, + { + "epoch": 1.77, + "learning_rate": 3.7465725763156476e-05, + "loss": 0.857, + "step": 11105 + }, + { + "epoch": 1.77, + "learning_rate": 3.7463489401766885e-05, + "loss": 0.8443, + "step": 11106 + }, + { + "epoch": 1.77, + "learning_rate": 3.7461252907648694e-05, + "loss": 0.8005, + "step": 11107 + }, + { + "epoch": 1.77, + "learning_rate": 3.745901628082573e-05, + "loss": 0.7506, + "step": 11108 + }, + { + "epoch": 1.77, + "learning_rate": 3.74567795213218e-05, + "loss": 0.824, + "step": 11109 + }, + { + "epoch": 1.77, + "learning_rate": 3.7454542629160725e-05, + "loss": 0.8963, + "step": 11110 + }, + { + "epoch": 1.77, + "learning_rate": 3.7452305604366344e-05, + "loss": 0.815, + "step": 11111 + }, + { + "epoch": 1.77, + "learning_rate": 3.745006844696247e-05, + "loss": 0.8854, + "step": 11112 + }, + { + "epoch": 1.77, + "learning_rate": 3.744783115697292e-05, + "loss": 0.7791, + "step": 11113 + }, + { + "epoch": 1.77, + "learning_rate": 3.744559373442152e-05, + "loss": 0.8028, + "step": 11114 + }, + { + "epoch": 1.77, + "learning_rate": 3.744335617933211e-05, + "loss": 0.8483, + "step": 11115 + }, + { + "epoch": 1.77, + "learning_rate": 3.744111849172851e-05, + "loss": 0.7885, + "step": 11116 + }, + { + "epoch": 1.77, + "learning_rate": 3.7438880671634545e-05, + "loss": 0.7092, + "step": 11117 + }, + { + "epoch": 1.77, + "learning_rate": 3.743664271907407e-05, + "loss": 0.8348, + "step": 11118 + }, + { + "epoch": 1.77, + "learning_rate": 3.743440463407088e-05, + "loss": 0.7548, + "step": 11119 + }, + { + "epoch": 1.77, + "learning_rate": 3.7432166416648836e-05, + "loss": 0.8377, + "step": 11120 + }, + { + "epoch": 1.77, + "learning_rate": 3.7429928066831776e-05, + "loss": 0.7955, + "step": 11121 + }, + { + "epoch": 1.77, + "learning_rate": 3.7427689584643523e-05, + "loss": 0.8355, + "step": 11122 + }, + { + "epoch": 1.77, + "learning_rate": 3.7425450970107925e-05, + "loss": 0.8177, + "step": 11123 + }, + { + "epoch": 1.77, + "learning_rate": 3.7423212223248805e-05, + "loss": 0.8569, + "step": 11124 + }, + { + "epoch": 1.77, + "learning_rate": 3.7420973344090024e-05, + "loss": 0.8912, + "step": 11125 + }, + { + "epoch": 1.77, + "learning_rate": 3.741873433265543e-05, + "loss": 0.8534, + "step": 11126 + }, + { + "epoch": 1.77, + "learning_rate": 3.741649518896883e-05, + "loss": 0.757, + "step": 11127 + }, + { + "epoch": 1.77, + "learning_rate": 3.741425591305411e-05, + "loss": 0.783, + "step": 11128 + }, + { + "epoch": 1.77, + "learning_rate": 3.741201650493511e-05, + "loss": 0.765, + "step": 11129 + }, + { + "epoch": 1.77, + "learning_rate": 3.740977696463565e-05, + "loss": 0.8573, + "step": 11130 + }, + { + "epoch": 1.77, + "learning_rate": 3.740753729217961e-05, + "loss": 0.783, + "step": 11131 + }, + { + "epoch": 1.77, + "learning_rate": 3.7405297487590834e-05, + "loss": 0.8501, + "step": 11132 + }, + { + "epoch": 1.77, + "learning_rate": 3.740305755089317e-05, + "loss": 0.8422, + "step": 11133 + }, + { + "epoch": 1.77, + "learning_rate": 3.740081748211047e-05, + "loss": 0.8403, + "step": 11134 + }, + { + "epoch": 1.77, + "learning_rate": 3.739857728126659e-05, + "loss": 0.8631, + "step": 11135 + }, + { + "epoch": 1.77, + "learning_rate": 3.739633694838539e-05, + "loss": 0.9153, + "step": 11136 + }, + { + "epoch": 1.77, + "learning_rate": 3.7394096483490734e-05, + "loss": 0.8569, + "step": 11137 + }, + { + "epoch": 1.77, + "learning_rate": 3.739185588660646e-05, + "loss": 0.7728, + "step": 11138 + }, + { + "epoch": 1.77, + "learning_rate": 3.738961515775646e-05, + "loss": 0.8622, + "step": 11139 + }, + { + "epoch": 1.77, + "learning_rate": 3.738737429696457e-05, + "loss": 0.6431, + "step": 11140 + }, + { + "epoch": 1.77, + "learning_rate": 3.7385133304254674e-05, + "loss": 0.7983, + "step": 11141 + }, + { + "epoch": 1.78, + "learning_rate": 3.738289217965062e-05, + "loss": 0.7991, + "step": 11142 + }, + { + "epoch": 1.78, + "learning_rate": 3.738065092317629e-05, + "loss": 0.8728, + "step": 11143 + }, + { + "epoch": 1.78, + "learning_rate": 3.737840953485554e-05, + "loss": 0.7866, + "step": 11144 + }, + { + "epoch": 1.78, + "learning_rate": 3.7376168014712246e-05, + "loss": 0.8609, + "step": 11145 + }, + { + "epoch": 1.78, + "learning_rate": 3.7373926362770275e-05, + "loss": 0.8806, + "step": 11146 + }, + { + "epoch": 1.78, + "learning_rate": 3.737168457905349e-05, + "loss": 0.8575, + "step": 11147 + }, + { + "epoch": 1.78, + "learning_rate": 3.736944266358579e-05, + "loss": 0.8554, + "step": 11148 + }, + { + "epoch": 1.78, + "learning_rate": 3.736720061639103e-05, + "loss": 0.8277, + "step": 11149 + }, + { + "epoch": 1.78, + "learning_rate": 3.7364958437493084e-05, + "loss": 0.806, + "step": 11150 + }, + { + "epoch": 1.78, + "learning_rate": 3.736271612691584e-05, + "loss": 0.7476, + "step": 11151 + }, + { + "epoch": 1.78, + "learning_rate": 3.736047368468319e-05, + "loss": 0.7923, + "step": 11152 + }, + { + "epoch": 1.78, + "learning_rate": 3.735823111081898e-05, + "loss": 0.7404, + "step": 11153 + }, + { + "epoch": 1.78, + "learning_rate": 3.735598840534713e-05, + "loss": 0.7266, + "step": 11154 + }, + { + "epoch": 1.78, + "learning_rate": 3.73537455682915e-05, + "loss": 0.769, + "step": 11155 + }, + { + "epoch": 1.78, + "learning_rate": 3.735150259967597e-05, + "loss": 0.8849, + "step": 11156 + }, + { + "epoch": 1.78, + "learning_rate": 3.734925949952444e-05, + "loss": 0.8743, + "step": 11157 + }, + { + "epoch": 1.78, + "learning_rate": 3.73470162678608e-05, + "loss": 0.8832, + "step": 11158 + }, + { + "epoch": 1.78, + "learning_rate": 3.734477290470892e-05, + "loss": 0.8395, + "step": 11159 + }, + { + "epoch": 1.78, + "learning_rate": 3.734252941009271e-05, + "loss": 0.8907, + "step": 11160 + }, + { + "epoch": 1.78, + "learning_rate": 3.734028578403606e-05, + "loss": 0.8202, + "step": 11161 + }, + { + "epoch": 1.78, + "learning_rate": 3.7338042026562854e-05, + "loss": 0.8678, + "step": 11162 + }, + { + "epoch": 1.78, + "learning_rate": 3.7335798137697e-05, + "loss": 0.8643, + "step": 11163 + }, + { + "epoch": 1.78, + "learning_rate": 3.733355411746237e-05, + "loss": 0.8433, + "step": 11164 + }, + { + "epoch": 1.78, + "learning_rate": 3.733130996588288e-05, + "loss": 0.7651, + "step": 11165 + }, + { + "epoch": 1.78, + "learning_rate": 3.732906568298243e-05, + "loss": 0.7952, + "step": 11166 + }, + { + "epoch": 1.78, + "learning_rate": 3.732682126878492e-05, + "loss": 0.8001, + "step": 11167 + }, + { + "epoch": 1.78, + "learning_rate": 3.732457672331424e-05, + "loss": 0.9704, + "step": 11168 + }, + { + "epoch": 1.78, + "learning_rate": 3.73223320465943e-05, + "loss": 0.7769, + "step": 11169 + }, + { + "epoch": 1.78, + "learning_rate": 3.732008723864901e-05, + "loss": 0.7797, + "step": 11170 + }, + { + "epoch": 1.78, + "learning_rate": 3.7317842299502256e-05, + "loss": 0.7966, + "step": 11171 + }, + { + "epoch": 1.78, + "learning_rate": 3.731559722917798e-05, + "loss": 0.7891, + "step": 11172 + }, + { + "epoch": 1.78, + "learning_rate": 3.731335202770006e-05, + "loss": 0.773, + "step": 11173 + }, + { + "epoch": 1.78, + "learning_rate": 3.731110669509243e-05, + "loss": 0.8074, + "step": 11174 + }, + { + "epoch": 1.78, + "learning_rate": 3.730886123137897e-05, + "loss": 0.8089, + "step": 11175 + }, + { + "epoch": 1.78, + "learning_rate": 3.7306615636583614e-05, + "loss": 0.7663, + "step": 11176 + }, + { + "epoch": 1.78, + "learning_rate": 3.7304369910730276e-05, + "loss": 0.8711, + "step": 11177 + }, + { + "epoch": 1.78, + "learning_rate": 3.730212405384287e-05, + "loss": 0.8029, + "step": 11178 + }, + { + "epoch": 1.78, + "learning_rate": 3.72998780659453e-05, + "loss": 0.786, + "step": 11179 + }, + { + "epoch": 1.78, + "learning_rate": 3.729763194706151e-05, + "loss": 0.8274, + "step": 11180 + }, + { + "epoch": 1.78, + "learning_rate": 3.7295385697215405e-05, + "loss": 0.8144, + "step": 11181 + }, + { + "epoch": 1.78, + "learning_rate": 3.72931393164309e-05, + "loss": 0.8104, + "step": 11182 + }, + { + "epoch": 1.78, + "learning_rate": 3.7290892804731936e-05, + "loss": 0.7598, + "step": 11183 + }, + { + "epoch": 1.78, + "learning_rate": 3.728864616214242e-05, + "loss": 0.7981, + "step": 11184 + }, + { + "epoch": 1.78, + "learning_rate": 3.7286399388686275e-05, + "loss": 0.8528, + "step": 11185 + }, + { + "epoch": 1.78, + "learning_rate": 3.7284152484387446e-05, + "loss": 0.9508, + "step": 11186 + }, + { + "epoch": 1.78, + "learning_rate": 3.7281905449269846e-05, + "loss": 0.8683, + "step": 11187 + }, + { + "epoch": 1.78, + "learning_rate": 3.727965828335741e-05, + "loss": 0.8783, + "step": 11188 + }, + { + "epoch": 1.78, + "learning_rate": 3.727741098667407e-05, + "loss": 0.8674, + "step": 11189 + }, + { + "epoch": 1.78, + "learning_rate": 3.7275163559243756e-05, + "loss": 0.874, + "step": 11190 + }, + { + "epoch": 1.78, + "learning_rate": 3.72729160010904e-05, + "loss": 0.7502, + "step": 11191 + }, + { + "epoch": 1.78, + "learning_rate": 3.727066831223794e-05, + "loss": 0.78, + "step": 11192 + }, + { + "epoch": 1.78, + "learning_rate": 3.726842049271032e-05, + "loss": 0.793, + "step": 11193 + }, + { + "epoch": 1.78, + "learning_rate": 3.726617254253146e-05, + "loss": 0.9282, + "step": 11194 + }, + { + "epoch": 1.78, + "learning_rate": 3.7263924461725316e-05, + "loss": 0.7832, + "step": 11195 + }, + { + "epoch": 1.78, + "learning_rate": 3.726167625031582e-05, + "loss": 0.8225, + "step": 11196 + }, + { + "epoch": 1.78, + "learning_rate": 3.725942790832691e-05, + "loss": 0.7873, + "step": 11197 + }, + { + "epoch": 1.78, + "learning_rate": 3.725717943578255e-05, + "loss": 0.8301, + "step": 11198 + }, + { + "epoch": 1.78, + "learning_rate": 3.7254930832706656e-05, + "loss": 0.7794, + "step": 11199 + }, + { + "epoch": 1.78, + "learning_rate": 3.72526820991232e-05, + "loss": 0.9028, + "step": 11200 + }, + { + "epoch": 1.78, + "learning_rate": 3.725043323505612e-05, + "loss": 0.7756, + "step": 11201 + }, + { + "epoch": 1.78, + "learning_rate": 3.724818424052935e-05, + "loss": 0.8591, + "step": 11202 + }, + { + "epoch": 1.78, + "learning_rate": 3.724593511556687e-05, + "loss": 0.8078, + "step": 11203 + }, + { + "epoch": 1.78, + "learning_rate": 3.72436858601926e-05, + "loss": 0.7877, + "step": 11204 + }, + { + "epoch": 1.79, + "learning_rate": 3.724143647443051e-05, + "loss": 0.7949, + "step": 11205 + }, + { + "epoch": 1.79, + "learning_rate": 3.7239186958304565e-05, + "loss": 0.8161, + "step": 11206 + }, + { + "epoch": 1.79, + "learning_rate": 3.723693731183871e-05, + "loss": 0.8428, + "step": 11207 + }, + { + "epoch": 1.79, + "learning_rate": 3.7234687535056885e-05, + "loss": 0.7852, + "step": 11208 + }, + { + "epoch": 1.79, + "learning_rate": 3.723243762798307e-05, + "loss": 0.8157, + "step": 11209 + }, + { + "epoch": 1.79, + "learning_rate": 3.723018759064123e-05, + "loss": 0.7497, + "step": 11210 + }, + { + "epoch": 1.79, + "learning_rate": 3.722793742305531e-05, + "loss": 0.7974, + "step": 11211 + }, + { + "epoch": 1.79, + "learning_rate": 3.722568712524929e-05, + "loss": 0.8122, + "step": 11212 + }, + { + "epoch": 1.79, + "learning_rate": 3.7223436697247116e-05, + "loss": 0.9115, + "step": 11213 + }, + { + "epoch": 1.79, + "learning_rate": 3.7221186139072764e-05, + "loss": 0.8786, + "step": 11214 + }, + { + "epoch": 1.79, + "learning_rate": 3.72189354507502e-05, + "loss": 0.7493, + "step": 11215 + }, + { + "epoch": 1.79, + "learning_rate": 3.7216684632303395e-05, + "loss": 0.861, + "step": 11216 + }, + { + "epoch": 1.79, + "learning_rate": 3.721443368375631e-05, + "loss": 0.7267, + "step": 11217 + }, + { + "epoch": 1.79, + "learning_rate": 3.721218260513293e-05, + "loss": 0.8226, + "step": 11218 + }, + { + "epoch": 1.79, + "learning_rate": 3.720993139645721e-05, + "loss": 0.7964, + "step": 11219 + }, + { + "epoch": 1.79, + "learning_rate": 3.720768005775314e-05, + "loss": 0.9083, + "step": 11220 + }, + { + "epoch": 1.79, + "learning_rate": 3.720542858904468e-05, + "loss": 0.7669, + "step": 11221 + }, + { + "epoch": 1.79, + "learning_rate": 3.7203176990355825e-05, + "loss": 0.8141, + "step": 11222 + }, + { + "epoch": 1.79, + "learning_rate": 3.720092526171054e-05, + "loss": 0.8237, + "step": 11223 + }, + { + "epoch": 1.79, + "learning_rate": 3.719867340313281e-05, + "loss": 0.8348, + "step": 11224 + }, + { + "epoch": 1.79, + "learning_rate": 3.719642141464661e-05, + "loss": 0.8909, + "step": 11225 + }, + { + "epoch": 1.79, + "learning_rate": 3.7194169296275935e-05, + "loss": 0.878, + "step": 11226 + }, + { + "epoch": 1.79, + "learning_rate": 3.719191704804476e-05, + "loss": 0.8117, + "step": 11227 + }, + { + "epoch": 1.79, + "learning_rate": 3.7189664669977056e-05, + "loss": 0.8311, + "step": 11228 + }, + { + "epoch": 1.79, + "learning_rate": 3.718741216209683e-05, + "loss": 0.7966, + "step": 11229 + }, + { + "epoch": 1.79, + "learning_rate": 3.718515952442807e-05, + "loss": 0.8108, + "step": 11230 + }, + { + "epoch": 1.79, + "learning_rate": 3.718290675699475e-05, + "loss": 0.854, + "step": 11231 + }, + { + "epoch": 1.79, + "learning_rate": 3.718065385982088e-05, + "loss": 0.8345, + "step": 11232 + }, + { + "epoch": 1.79, + "learning_rate": 3.7178400832930434e-05, + "loss": 0.8289, + "step": 11233 + }, + { + "epoch": 1.79, + "learning_rate": 3.717614767634742e-05, + "loss": 0.7126, + "step": 11234 + }, + { + "epoch": 1.79, + "learning_rate": 3.717389439009582e-05, + "loss": 0.8304, + "step": 11235 + }, + { + "epoch": 1.79, + "learning_rate": 3.717164097419964e-05, + "loss": 0.8443, + "step": 11236 + }, + { + "epoch": 1.79, + "learning_rate": 3.716938742868287e-05, + "loss": 0.8901, + "step": 11237 + }, + { + "epoch": 1.79, + "learning_rate": 3.7167133753569505e-05, + "loss": 0.7612, + "step": 11238 + }, + { + "epoch": 1.79, + "learning_rate": 3.716487994888356e-05, + "loss": 0.7527, + "step": 11239 + }, + { + "epoch": 1.79, + "learning_rate": 3.716262601464903e-05, + "loss": 0.84, + "step": 11240 + }, + { + "epoch": 1.79, + "learning_rate": 3.716037195088991e-05, + "loss": 0.818, + "step": 11241 + }, + { + "epoch": 1.79, + "learning_rate": 3.715811775763022e-05, + "loss": 0.8097, + "step": 11242 + }, + { + "epoch": 1.79, + "learning_rate": 3.7155863434893956e-05, + "loss": 0.7963, + "step": 11243 + }, + { + "epoch": 1.79, + "learning_rate": 3.7153608982705126e-05, + "loss": 0.7703, + "step": 11244 + }, + { + "epoch": 1.79, + "learning_rate": 3.715135440108773e-05, + "loss": 0.804, + "step": 11245 + }, + { + "epoch": 1.79, + "learning_rate": 3.71490996900658e-05, + "loss": 0.8293, + "step": 11246 + }, + { + "epoch": 1.79, + "learning_rate": 3.7146844849663325e-05, + "loss": 0.7791, + "step": 11247 + }, + { + "epoch": 1.79, + "learning_rate": 3.714458987990433e-05, + "loss": 0.8392, + "step": 11248 + }, + { + "epoch": 1.79, + "learning_rate": 3.7142334780812824e-05, + "loss": 0.8139, + "step": 11249 + }, + { + "epoch": 1.79, + "learning_rate": 3.7140079552412834e-05, + "loss": 0.8407, + "step": 11250 + }, + { + "epoch": 1.79, + "learning_rate": 3.7137824194728356e-05, + "loss": 0.7922, + "step": 11251 + }, + { + "epoch": 1.79, + "learning_rate": 3.713556870778342e-05, + "loss": 0.8546, + "step": 11252 + }, + { + "epoch": 1.79, + "learning_rate": 3.7133313091602054e-05, + "loss": 0.7939, + "step": 11253 + }, + { + "epoch": 1.79, + "learning_rate": 3.713105734620826e-05, + "loss": 0.8788, + "step": 11254 + }, + { + "epoch": 1.79, + "learning_rate": 3.7128801471626073e-05, + "loss": 0.8682, + "step": 11255 + }, + { + "epoch": 1.79, + "learning_rate": 3.712654546787952e-05, + "loss": 0.8612, + "step": 11256 + }, + { + "epoch": 1.79, + "learning_rate": 3.712428933499261e-05, + "loss": 0.8085, + "step": 11257 + }, + { + "epoch": 1.79, + "learning_rate": 3.712203307298939e-05, + "loss": 0.8171, + "step": 11258 + }, + { + "epoch": 1.79, + "learning_rate": 3.711977668189387e-05, + "loss": 0.7893, + "step": 11259 + }, + { + "epoch": 1.79, + "learning_rate": 3.7117520161730085e-05, + "loss": 0.8526, + "step": 11260 + }, + { + "epoch": 1.79, + "learning_rate": 3.711526351252207e-05, + "loss": 0.7931, + "step": 11261 + }, + { + "epoch": 1.79, + "learning_rate": 3.7113006734293856e-05, + "loss": 0.7939, + "step": 11262 + }, + { + "epoch": 1.79, + "learning_rate": 3.711074982706947e-05, + "loss": 0.8599, + "step": 11263 + }, + { + "epoch": 1.79, + "learning_rate": 3.710849279087296e-05, + "loss": 0.9611, + "step": 11264 + }, + { + "epoch": 1.79, + "learning_rate": 3.7106235625728334e-05, + "loss": 0.8235, + "step": 11265 + }, + { + "epoch": 1.79, + "learning_rate": 3.710397833165967e-05, + "loss": 0.8726, + "step": 11266 + }, + { + "epoch": 1.79, + "learning_rate": 3.7101720908690975e-05, + "loss": 0.805, + "step": 11267 + }, + { + "epoch": 1.8, + "learning_rate": 3.70994633568463e-05, + "loss": 0.8365, + "step": 11268 + }, + { + "epoch": 1.8, + "learning_rate": 3.709720567614968e-05, + "loss": 0.7725, + "step": 11269 + }, + { + "epoch": 1.8, + "learning_rate": 3.7094947866625177e-05, + "loss": 0.7689, + "step": 11270 + }, + { + "epoch": 1.8, + "learning_rate": 3.709268992829681e-05, + "loss": 0.7983, + "step": 11271 + }, + { + "epoch": 1.8, + "learning_rate": 3.709043186118865e-05, + "loss": 0.8158, + "step": 11272 + }, + { + "epoch": 1.8, + "learning_rate": 3.708817366532472e-05, + "loss": 0.8912, + "step": 11273 + }, + { + "epoch": 1.8, + "learning_rate": 3.7085915340729084e-05, + "loss": 0.8228, + "step": 11274 + }, + { + "epoch": 1.8, + "learning_rate": 3.7083656887425785e-05, + "loss": 0.8712, + "step": 11275 + }, + { + "epoch": 1.8, + "learning_rate": 3.708139830543888e-05, + "loss": 0.77, + "step": 11276 + }, + { + "epoch": 1.8, + "learning_rate": 3.707913959479241e-05, + "loss": 0.8113, + "step": 11277 + }, + { + "epoch": 1.8, + "learning_rate": 3.7076880755510444e-05, + "loss": 0.8062, + "step": 11278 + }, + { + "epoch": 1.8, + "learning_rate": 3.707462178761703e-05, + "loss": 0.9002, + "step": 11279 + }, + { + "epoch": 1.8, + "learning_rate": 3.707236269113622e-05, + "loss": 0.7799, + "step": 11280 + }, + { + "epoch": 1.8, + "learning_rate": 3.7070103466092074e-05, + "loss": 0.7557, + "step": 11281 + }, + { + "epoch": 1.8, + "learning_rate": 3.706784411250867e-05, + "loss": 0.8326, + "step": 11282 + }, + { + "epoch": 1.8, + "learning_rate": 3.7065584630410033e-05, + "loss": 0.8286, + "step": 11283 + }, + { + "epoch": 1.8, + "learning_rate": 3.706332501982026e-05, + "loss": 0.7929, + "step": 11284 + }, + { + "epoch": 1.8, + "learning_rate": 3.706106528076339e-05, + "loss": 0.7917, + "step": 11285 + }, + { + "epoch": 1.8, + "learning_rate": 3.7058805413263504e-05, + "loss": 0.7412, + "step": 11286 + }, + { + "epoch": 1.8, + "learning_rate": 3.7056545417344656e-05, + "loss": 0.8019, + "step": 11287 + }, + { + "epoch": 1.8, + "learning_rate": 3.705428529303092e-05, + "loss": 0.8098, + "step": 11288 + }, + { + "epoch": 1.8, + "learning_rate": 3.705202504034636e-05, + "loss": 0.9994, + "step": 11289 + }, + { + "epoch": 1.8, + "learning_rate": 3.7049764659315064e-05, + "loss": 0.8014, + "step": 11290 + }, + { + "epoch": 1.8, + "learning_rate": 3.7047504149961076e-05, + "loss": 0.745, + "step": 11291 + }, + { + "epoch": 1.8, + "learning_rate": 3.7045243512308484e-05, + "loss": 0.7918, + "step": 11292 + }, + { + "epoch": 1.8, + "learning_rate": 3.704298274638137e-05, + "loss": 0.8169, + "step": 11293 + }, + { + "epoch": 1.8, + "learning_rate": 3.7040721852203795e-05, + "loss": 0.817, + "step": 11294 + }, + { + "epoch": 1.8, + "learning_rate": 3.703846082979985e-05, + "loss": 0.8164, + "step": 11295 + }, + { + "epoch": 1.8, + "learning_rate": 3.70361996791936e-05, + "loss": 0.7643, + "step": 11296 + }, + { + "epoch": 1.8, + "learning_rate": 3.703393840040913e-05, + "loss": 0.8638, + "step": 11297 + }, + { + "epoch": 1.8, + "learning_rate": 3.703167699347052e-05, + "loss": 0.7417, + "step": 11298 + }, + { + "epoch": 1.8, + "learning_rate": 3.702941545840186e-05, + "loss": 0.8568, + "step": 11299 + }, + { + "epoch": 1.8, + "learning_rate": 3.702715379522722e-05, + "loss": 0.8127, + "step": 11300 + }, + { + "epoch": 1.8, + "learning_rate": 3.7024892003970704e-05, + "loss": 0.7751, + "step": 11301 + }, + { + "epoch": 1.8, + "learning_rate": 3.7022630084656385e-05, + "loss": 0.7826, + "step": 11302 + }, + { + "epoch": 1.8, + "learning_rate": 3.7020368037308354e-05, + "loss": 0.7373, + "step": 11303 + }, + { + "epoch": 1.8, + "learning_rate": 3.70181058619507e-05, + "loss": 0.7665, + "step": 11304 + }, + { + "epoch": 1.8, + "learning_rate": 3.701584355860752e-05, + "loss": 0.7828, + "step": 11305 + }, + { + "epoch": 1.8, + "learning_rate": 3.70135811273029e-05, + "loss": 0.8629, + "step": 11306 + }, + { + "epoch": 1.8, + "learning_rate": 3.701131856806092e-05, + "loss": 0.8097, + "step": 11307 + }, + { + "epoch": 1.8, + "learning_rate": 3.7009055880905715e-05, + "loss": 0.9244, + "step": 11308 + }, + { + "epoch": 1.8, + "learning_rate": 3.700679306586133e-05, + "loss": 0.8123, + "step": 11309 + }, + { + "epoch": 1.8, + "learning_rate": 3.700453012295191e-05, + "loss": 0.7838, + "step": 11310 + }, + { + "epoch": 1.8, + "learning_rate": 3.700226705220152e-05, + "loss": 0.8308, + "step": 11311 + }, + { + "epoch": 1.8, + "learning_rate": 3.7000003853634274e-05, + "loss": 0.8864, + "step": 11312 + }, + { + "epoch": 1.8, + "learning_rate": 3.699774052727428e-05, + "loss": 0.842, + "step": 11313 + }, + { + "epoch": 1.8, + "learning_rate": 3.6995477073145624e-05, + "loss": 0.8509, + "step": 11314 + }, + { + "epoch": 1.8, + "learning_rate": 3.699321349127243e-05, + "loss": 0.8346, + "step": 11315 + }, + { + "epoch": 1.8, + "learning_rate": 3.6990949781678784e-05, + "loss": 0.8499, + "step": 11316 + }, + { + "epoch": 1.8, + "learning_rate": 3.69886859443888e-05, + "loss": 0.772, + "step": 11317 + }, + { + "epoch": 1.8, + "learning_rate": 3.69864219794266e-05, + "loss": 0.8298, + "step": 11318 + }, + { + "epoch": 1.8, + "learning_rate": 3.6984157886816275e-05, + "loss": 0.8359, + "step": 11319 + }, + { + "epoch": 1.8, + "learning_rate": 3.698189366658195e-05, + "loss": 0.8162, + "step": 11320 + }, + { + "epoch": 1.8, + "learning_rate": 3.697962931874772e-05, + "loss": 0.8468, + "step": 11321 + }, + { + "epoch": 1.8, + "learning_rate": 3.6977364843337726e-05, + "loss": 0.7901, + "step": 11322 + }, + { + "epoch": 1.8, + "learning_rate": 3.697510024037606e-05, + "loss": 0.884, + "step": 11323 + }, + { + "epoch": 1.8, + "learning_rate": 3.6972835509886846e-05, + "loss": 0.8346, + "step": 11324 + }, + { + "epoch": 1.8, + "learning_rate": 3.697057065189421e-05, + "loss": 0.7633, + "step": 11325 + }, + { + "epoch": 1.8, + "learning_rate": 3.696830566642226e-05, + "loss": 0.7786, + "step": 11326 + }, + { + "epoch": 1.8, + "learning_rate": 3.696604055349512e-05, + "loss": 0.8106, + "step": 11327 + }, + { + "epoch": 1.8, + "learning_rate": 3.696377531313691e-05, + "loss": 0.7924, + "step": 11328 + }, + { + "epoch": 1.8, + "learning_rate": 3.696150994537176e-05, + "loss": 0.8027, + "step": 11329 + }, + { + "epoch": 1.81, + "learning_rate": 3.695924445022379e-05, + "loss": 0.8219, + "step": 11330 + }, + { + "epoch": 1.81, + "learning_rate": 3.6956978827717125e-05, + "loss": 0.7332, + "step": 11331 + }, + { + "epoch": 1.81, + "learning_rate": 3.695471307787589e-05, + "loss": 0.7413, + "step": 11332 + }, + { + "epoch": 1.81, + "learning_rate": 3.6952447200724234e-05, + "loss": 0.8434, + "step": 11333 + }, + { + "epoch": 1.81, + "learning_rate": 3.6950181196286264e-05, + "loss": 0.9085, + "step": 11334 + }, + { + "epoch": 1.81, + "learning_rate": 3.694791506458613e-05, + "loss": 0.7736, + "step": 11335 + }, + { + "epoch": 1.81, + "learning_rate": 3.694564880564794e-05, + "loss": 0.7924, + "step": 11336 + }, + { + "epoch": 1.81, + "learning_rate": 3.694338241949584e-05, + "loss": 0.8418, + "step": 11337 + }, + { + "epoch": 1.81, + "learning_rate": 3.6941115906153975e-05, + "loss": 0.8071, + "step": 11338 + }, + { + "epoch": 1.81, + "learning_rate": 3.693884926564647e-05, + "loss": 0.8485, + "step": 11339 + }, + { + "epoch": 1.81, + "learning_rate": 3.693658249799748e-05, + "loss": 0.7499, + "step": 11340 + }, + { + "epoch": 1.81, + "learning_rate": 3.6934315603231126e-05, + "loss": 0.8403, + "step": 11341 + }, + { + "epoch": 1.81, + "learning_rate": 3.6932048581371556e-05, + "loss": 0.7775, + "step": 11342 + }, + { + "epoch": 1.81, + "learning_rate": 3.6929781432442915e-05, + "loss": 0.7701, + "step": 11343 + }, + { + "epoch": 1.81, + "learning_rate": 3.692751415646934e-05, + "loss": 0.8792, + "step": 11344 + }, + { + "epoch": 1.81, + "learning_rate": 3.692524675347498e-05, + "loss": 0.9079, + "step": 11345 + }, + { + "epoch": 1.81, + "learning_rate": 3.6922979223483986e-05, + "loss": 0.8394, + "step": 11346 + }, + { + "epoch": 1.81, + "learning_rate": 3.692071156652051e-05, + "loss": 0.8219, + "step": 11347 + }, + { + "epoch": 1.81, + "learning_rate": 3.691844378260868e-05, + "loss": 0.8086, + "step": 11348 + }, + { + "epoch": 1.81, + "learning_rate": 3.691617587177266e-05, + "loss": 0.7721, + "step": 11349 + }, + { + "epoch": 1.81, + "learning_rate": 3.691390783403661e-05, + "loss": 0.8447, + "step": 11350 + }, + { + "epoch": 1.81, + "learning_rate": 3.691163966942467e-05, + "loss": 0.7091, + "step": 11351 + }, + { + "epoch": 1.81, + "learning_rate": 3.690937137796099e-05, + "loss": 0.8869, + "step": 11352 + }, + { + "epoch": 1.81, + "learning_rate": 3.6907102959669745e-05, + "loss": 0.7574, + "step": 11353 + }, + { + "epoch": 1.81, + "learning_rate": 3.690483441457509e-05, + "loss": 0.7883, + "step": 11354 + }, + { + "epoch": 1.81, + "learning_rate": 3.690256574270117e-05, + "loss": 0.8747, + "step": 11355 + }, + { + "epoch": 1.81, + "learning_rate": 3.6900296944072144e-05, + "loss": 0.9392, + "step": 11356 + }, + { + "epoch": 1.81, + "learning_rate": 3.6898028018712186e-05, + "loss": 0.8018, + "step": 11357 + }, + { + "epoch": 1.81, + "learning_rate": 3.6895758966645457e-05, + "loss": 0.8118, + "step": 11358 + }, + { + "epoch": 1.81, + "learning_rate": 3.689348978789611e-05, + "loss": 0.8346, + "step": 11359 + }, + { + "epoch": 1.81, + "learning_rate": 3.689122048248832e-05, + "loss": 0.7769, + "step": 11360 + }, + { + "epoch": 1.81, + "learning_rate": 3.688895105044625e-05, + "loss": 0.8011, + "step": 11361 + }, + { + "epoch": 1.81, + "learning_rate": 3.688668149179407e-05, + "loss": 0.8993, + "step": 11362 + }, + { + "epoch": 1.81, + "learning_rate": 3.688441180655595e-05, + "loss": 0.8178, + "step": 11363 + }, + { + "epoch": 1.81, + "learning_rate": 3.688214199475606e-05, + "loss": 0.7834, + "step": 11364 + }, + { + "epoch": 1.81, + "learning_rate": 3.687987205641858e-05, + "loss": 0.7797, + "step": 11365 + }, + { + "epoch": 1.81, + "learning_rate": 3.687760199156766e-05, + "loss": 0.8108, + "step": 11366 + }, + { + "epoch": 1.81, + "learning_rate": 3.687533180022749e-05, + "loss": 0.866, + "step": 11367 + }, + { + "epoch": 1.81, + "learning_rate": 3.687306148242225e-05, + "loss": 0.7205, + "step": 11368 + }, + { + "epoch": 1.81, + "learning_rate": 3.6870791038176115e-05, + "loss": 0.7887, + "step": 11369 + }, + { + "epoch": 1.81, + "learning_rate": 3.6868520467513265e-05, + "loss": 0.9113, + "step": 11370 + }, + { + "epoch": 1.81, + "learning_rate": 3.686624977045787e-05, + "loss": 0.7845, + "step": 11371 + }, + { + "epoch": 1.81, + "learning_rate": 3.686397894703413e-05, + "loss": 0.8776, + "step": 11372 + }, + { + "epoch": 1.81, + "learning_rate": 3.6861707997266206e-05, + "loss": 0.7848, + "step": 11373 + }, + { + "epoch": 1.81, + "learning_rate": 3.68594369211783e-05, + "loss": 0.802, + "step": 11374 + }, + { + "epoch": 1.81, + "learning_rate": 3.685716571879459e-05, + "loss": 0.7878, + "step": 11375 + }, + { + "epoch": 1.81, + "learning_rate": 3.685489439013926e-05, + "loss": 0.9667, + "step": 11376 + }, + { + "epoch": 1.81, + "learning_rate": 3.685262293523651e-05, + "loss": 0.8325, + "step": 11377 + }, + { + "epoch": 1.81, + "learning_rate": 3.6850351354110514e-05, + "loss": 0.8089, + "step": 11378 + }, + { + "epoch": 1.81, + "learning_rate": 3.6848079646785475e-05, + "loss": 0.7853, + "step": 11379 + }, + { + "epoch": 1.81, + "learning_rate": 3.6845807813285574e-05, + "loss": 0.8888, + "step": 11380 + }, + { + "epoch": 1.81, + "learning_rate": 3.6843535853635016e-05, + "loss": 0.8897, + "step": 11381 + }, + { + "epoch": 1.81, + "learning_rate": 3.6841263767857994e-05, + "loss": 0.891, + "step": 11382 + }, + { + "epoch": 1.81, + "learning_rate": 3.6838991555978704e-05, + "loss": 0.7211, + "step": 11383 + }, + { + "epoch": 1.81, + "learning_rate": 3.683671921802133e-05, + "loss": 0.8128, + "step": 11384 + }, + { + "epoch": 1.81, + "learning_rate": 3.68344467540101e-05, + "loss": 0.8817, + "step": 11385 + }, + { + "epoch": 1.81, + "learning_rate": 3.683217416396918e-05, + "loss": 0.7886, + "step": 11386 + }, + { + "epoch": 1.81, + "learning_rate": 3.68299014479228e-05, + "loss": 0.8309, + "step": 11387 + }, + { + "epoch": 1.81, + "learning_rate": 3.6827628605895145e-05, + "loss": 0.8591, + "step": 11388 + }, + { + "epoch": 1.81, + "learning_rate": 3.6825355637910425e-05, + "loss": 0.8318, + "step": 11389 + }, + { + "epoch": 1.81, + "learning_rate": 3.682308254399286e-05, + "loss": 0.8093, + "step": 11390 + }, + { + "epoch": 1.81, + "learning_rate": 3.682080932416663e-05, + "loss": 0.7587, + "step": 11391 + }, + { + "epoch": 1.81, + "learning_rate": 3.681853597845596e-05, + "loss": 0.7695, + "step": 11392 + }, + { + "epoch": 1.82, + "learning_rate": 3.681626250688506e-05, + "loss": 0.8277, + "step": 11393 + }, + { + "epoch": 1.82, + "learning_rate": 3.681398890947813e-05, + "loss": 0.808, + "step": 11394 + }, + { + "epoch": 1.82, + "learning_rate": 3.68117151862594e-05, + "loss": 0.8534, + "step": 11395 + }, + { + "epoch": 1.82, + "learning_rate": 3.680944133725307e-05, + "loss": 0.8385, + "step": 11396 + }, + { + "epoch": 1.82, + "learning_rate": 3.6807167362483364e-05, + "loss": 0.7466, + "step": 11397 + }, + { + "epoch": 1.82, + "learning_rate": 3.680489326197449e-05, + "loss": 0.7792, + "step": 11398 + }, + { + "epoch": 1.82, + "learning_rate": 3.680261903575067e-05, + "loss": 0.8497, + "step": 11399 + }, + { + "epoch": 1.82, + "learning_rate": 3.680034468383613e-05, + "loss": 0.7297, + "step": 11400 + }, + { + "epoch": 1.82, + "learning_rate": 3.6798070206255064e-05, + "loss": 0.7429, + "step": 11401 + }, + { + "epoch": 1.82, + "learning_rate": 3.679579560303173e-05, + "loss": 0.8222, + "step": 11402 + }, + { + "epoch": 1.82, + "learning_rate": 3.679352087419033e-05, + "loss": 0.8004, + "step": 11403 + }, + { + "epoch": 1.82, + "learning_rate": 3.6791246019755085e-05, + "loss": 0.8528, + "step": 11404 + }, + { + "epoch": 1.82, + "learning_rate": 3.678897103975025e-05, + "loss": 0.8079, + "step": 11405 + }, + { + "epoch": 1.82, + "learning_rate": 3.6786695934200014e-05, + "loss": 0.8929, + "step": 11406 + }, + { + "epoch": 1.82, + "learning_rate": 3.678442070312862e-05, + "loss": 0.825, + "step": 11407 + }, + { + "epoch": 1.82, + "learning_rate": 3.678214534656031e-05, + "loss": 0.9269, + "step": 11408 + }, + { + "epoch": 1.82, + "learning_rate": 3.6779869864519304e-05, + "loss": 0.8272, + "step": 11409 + }, + { + "epoch": 1.82, + "learning_rate": 3.6777594257029826e-05, + "loss": 0.8305, + "step": 11410 + }, + { + "epoch": 1.82, + "learning_rate": 3.6775318524116126e-05, + "loss": 0.9012, + "step": 11411 + }, + { + "epoch": 1.82, + "learning_rate": 3.677304266580243e-05, + "loss": 0.8034, + "step": 11412 + }, + { + "epoch": 1.82, + "learning_rate": 3.677076668211298e-05, + "loss": 0.8826, + "step": 11413 + }, + { + "epoch": 1.82, + "learning_rate": 3.6768490573072004e-05, + "loss": 0.808, + "step": 11414 + }, + { + "epoch": 1.82, + "learning_rate": 3.676621433870376e-05, + "loss": 0.8371, + "step": 11415 + }, + { + "epoch": 1.82, + "learning_rate": 3.676393797903247e-05, + "loss": 0.7682, + "step": 11416 + }, + { + "epoch": 1.82, + "learning_rate": 3.6761661494082386e-05, + "loss": 0.7839, + "step": 11417 + }, + { + "epoch": 1.82, + "learning_rate": 3.6759384883877744e-05, + "loss": 0.8451, + "step": 11418 + }, + { + "epoch": 1.82, + "learning_rate": 3.675710814844279e-05, + "loss": 0.7629, + "step": 11419 + }, + { + "epoch": 1.82, + "learning_rate": 3.6754831287801786e-05, + "loss": 0.8379, + "step": 11420 + }, + { + "epoch": 1.82, + "learning_rate": 3.675255430197895e-05, + "loss": 0.6757, + "step": 11421 + }, + { + "epoch": 1.82, + "learning_rate": 3.675027719099855e-05, + "loss": 0.7644, + "step": 11422 + }, + { + "epoch": 1.82, + "learning_rate": 3.674799995488483e-05, + "loss": 0.8077, + "step": 11423 + }, + { + "epoch": 1.82, + "learning_rate": 3.6745722593662045e-05, + "loss": 0.8188, + "step": 11424 + }, + { + "epoch": 1.82, + "learning_rate": 3.6743445107354454e-05, + "loss": 0.7488, + "step": 11425 + }, + { + "epoch": 1.82, + "learning_rate": 3.67411674959863e-05, + "loss": 0.7323, + "step": 11426 + }, + { + "epoch": 1.82, + "learning_rate": 3.673888975958182e-05, + "loss": 0.8748, + "step": 11427 + }, + { + "epoch": 1.82, + "learning_rate": 3.673661189816531e-05, + "loss": 0.7538, + "step": 11428 + }, + { + "epoch": 1.82, + "learning_rate": 3.673433391176101e-05, + "loss": 0.8278, + "step": 11429 + }, + { + "epoch": 1.82, + "learning_rate": 3.673205580039317e-05, + "loss": 0.7387, + "step": 11430 + }, + { + "epoch": 1.82, + "learning_rate": 3.6729777564086056e-05, + "loss": 0.7833, + "step": 11431 + }, + { + "epoch": 1.82, + "learning_rate": 3.672749920286394e-05, + "loss": 0.8069, + "step": 11432 + }, + { + "epoch": 1.82, + "learning_rate": 3.672522071675107e-05, + "loss": 0.8531, + "step": 11433 + }, + { + "epoch": 1.82, + "learning_rate": 3.672294210577172e-05, + "loss": 0.9027, + "step": 11434 + }, + { + "epoch": 1.82, + "learning_rate": 3.672066336995016e-05, + "loss": 0.8363, + "step": 11435 + }, + { + "epoch": 1.82, + "learning_rate": 3.671838450931065e-05, + "loss": 0.8427, + "step": 11436 + }, + { + "epoch": 1.82, + "learning_rate": 3.6716105523877456e-05, + "loss": 0.7754, + "step": 11437 + }, + { + "epoch": 1.82, + "learning_rate": 3.6713826413674844e-05, + "loss": 0.8674, + "step": 11438 + }, + { + "epoch": 1.82, + "learning_rate": 3.671154717872711e-05, + "loss": 0.7568, + "step": 11439 + }, + { + "epoch": 1.82, + "learning_rate": 3.67092678190585e-05, + "loss": 0.7971, + "step": 11440 + }, + { + "epoch": 1.82, + "learning_rate": 3.6706988334693285e-05, + "loss": 0.8153, + "step": 11441 + }, + { + "epoch": 1.82, + "learning_rate": 3.6704708725655765e-05, + "loss": 0.7695, + "step": 11442 + }, + { + "epoch": 1.82, + "learning_rate": 3.67024289919702e-05, + "loss": 0.8801, + "step": 11443 + }, + { + "epoch": 1.82, + "learning_rate": 3.670014913366087e-05, + "loss": 0.7329, + "step": 11444 + }, + { + "epoch": 1.82, + "learning_rate": 3.669786915075206e-05, + "loss": 0.8053, + "step": 11445 + }, + { + "epoch": 1.82, + "learning_rate": 3.669558904326804e-05, + "loss": 0.7694, + "step": 11446 + }, + { + "epoch": 1.82, + "learning_rate": 3.66933088112331e-05, + "loss": 0.7655, + "step": 11447 + }, + { + "epoch": 1.82, + "learning_rate": 3.669102845467152e-05, + "loss": 0.887, + "step": 11448 + }, + { + "epoch": 1.82, + "learning_rate": 3.668874797360758e-05, + "loss": 0.7966, + "step": 11449 + }, + { + "epoch": 1.82, + "learning_rate": 3.6686467368065575e-05, + "loss": 0.7472, + "step": 11450 + }, + { + "epoch": 1.82, + "learning_rate": 3.668418663806979e-05, + "loss": 0.807, + "step": 11451 + }, + { + "epoch": 1.82, + "learning_rate": 3.668190578364451e-05, + "loss": 0.7496, + "step": 11452 + }, + { + "epoch": 1.82, + "learning_rate": 3.667962480481402e-05, + "loss": 0.7882, + "step": 11453 + }, + { + "epoch": 1.82, + "learning_rate": 3.667734370160262e-05, + "loss": 0.8376, + "step": 11454 + }, + { + "epoch": 1.82, + "learning_rate": 3.6675062474034596e-05, + "loss": 0.8013, + "step": 11455 + }, + { + "epoch": 1.83, + "learning_rate": 3.667278112213425e-05, + "loss": 0.8035, + "step": 11456 + }, + { + "epoch": 1.83, + "learning_rate": 3.667049964592587e-05, + "loss": 0.7403, + "step": 11457 + }, + { + "epoch": 1.83, + "learning_rate": 3.666821804543376e-05, + "loss": 0.8551, + "step": 11458 + }, + { + "epoch": 1.83, + "learning_rate": 3.6665936320682204e-05, + "loss": 0.7647, + "step": 11459 + }, + { + "epoch": 1.83, + "learning_rate": 3.6663654471695505e-05, + "loss": 0.7352, + "step": 11460 + }, + { + "epoch": 1.83, + "learning_rate": 3.666137249849798e-05, + "loss": 0.7364, + "step": 11461 + }, + { + "epoch": 1.83, + "learning_rate": 3.6659090401113904e-05, + "loss": 0.7386, + "step": 11462 + }, + { + "epoch": 1.83, + "learning_rate": 3.66568081795676e-05, + "loss": 0.9127, + "step": 11463 + }, + { + "epoch": 1.83, + "learning_rate": 3.665452583388337e-05, + "loss": 0.8156, + "step": 11464 + }, + { + "epoch": 1.83, + "learning_rate": 3.665224336408552e-05, + "loss": 0.8502, + "step": 11465 + }, + { + "epoch": 1.83, + "learning_rate": 3.664996077019834e-05, + "loss": 0.7281, + "step": 11466 + }, + { + "epoch": 1.83, + "learning_rate": 3.664767805224615e-05, + "loss": 0.9034, + "step": 11467 + }, + { + "epoch": 1.83, + "learning_rate": 3.664539521025326e-05, + "loss": 0.7929, + "step": 11468 + }, + { + "epoch": 1.83, + "learning_rate": 3.664311224424399e-05, + "loss": 0.8602, + "step": 11469 + }, + { + "epoch": 1.83, + "learning_rate": 3.6640829154242636e-05, + "loss": 0.7989, + "step": 11470 + }, + { + "epoch": 1.83, + "learning_rate": 3.663854594027352e-05, + "loss": 0.9264, + "step": 11471 + }, + { + "epoch": 1.83, + "learning_rate": 3.663626260236096e-05, + "loss": 0.7921, + "step": 11472 + }, + { + "epoch": 1.83, + "learning_rate": 3.663397914052926e-05, + "loss": 0.8916, + "step": 11473 + }, + { + "epoch": 1.83, + "learning_rate": 3.6631695554802755e-05, + "loss": 0.8121, + "step": 11474 + }, + { + "epoch": 1.83, + "learning_rate": 3.662941184520575e-05, + "loss": 0.7746, + "step": 11475 + }, + { + "epoch": 1.83, + "learning_rate": 3.662712801176257e-05, + "loss": 0.8258, + "step": 11476 + }, + { + "epoch": 1.83, + "learning_rate": 3.662484405449754e-05, + "loss": 0.8593, + "step": 11477 + }, + { + "epoch": 1.83, + "learning_rate": 3.6622559973434975e-05, + "loss": 0.851, + "step": 11478 + }, + { + "epoch": 1.83, + "learning_rate": 3.662027576859919e-05, + "loss": 0.7905, + "step": 11479 + }, + { + "epoch": 1.83, + "learning_rate": 3.661799144001454e-05, + "loss": 0.7655, + "step": 11480 + }, + { + "epoch": 1.83, + "learning_rate": 3.6615706987705325e-05, + "loss": 0.8335, + "step": 11481 + }, + { + "epoch": 1.83, + "learning_rate": 3.661342241169588e-05, + "loss": 0.8215, + "step": 11482 + }, + { + "epoch": 1.83, + "learning_rate": 3.661113771201054e-05, + "loss": 0.7902, + "step": 11483 + }, + { + "epoch": 1.83, + "learning_rate": 3.6608852888673646e-05, + "loss": 0.8395, + "step": 11484 + }, + { + "epoch": 1.83, + "learning_rate": 3.6606567941709505e-05, + "loss": 0.8056, + "step": 11485 + }, + { + "epoch": 1.83, + "learning_rate": 3.660428287114246e-05, + "loss": 0.8458, + "step": 11486 + }, + { + "epoch": 1.83, + "learning_rate": 3.6601997676996854e-05, + "loss": 0.8374, + "step": 11487 + }, + { + "epoch": 1.83, + "learning_rate": 3.659971235929701e-05, + "loss": 0.7875, + "step": 11488 + }, + { + "epoch": 1.83, + "learning_rate": 3.659742691806728e-05, + "loss": 0.7262, + "step": 11489 + }, + { + "epoch": 1.83, + "learning_rate": 3.6595141353331984e-05, + "loss": 0.811, + "step": 11490 + }, + { + "epoch": 1.83, + "learning_rate": 3.6592855665115475e-05, + "loss": 0.766, + "step": 11491 + }, + { + "epoch": 1.83, + "learning_rate": 3.65905698534421e-05, + "loss": 0.8149, + "step": 11492 + }, + { + "epoch": 1.83, + "learning_rate": 3.658828391833618e-05, + "loss": 0.7897, + "step": 11493 + }, + { + "epoch": 1.83, + "learning_rate": 3.658599785982208e-05, + "loss": 0.8205, + "step": 11494 + }, + { + "epoch": 1.83, + "learning_rate": 3.658371167792414e-05, + "loss": 0.7935, + "step": 11495 + }, + { + "epoch": 1.83, + "learning_rate": 3.65814253726667e-05, + "loss": 0.8009, + "step": 11496 + }, + { + "epoch": 1.83, + "learning_rate": 3.657913894407411e-05, + "loss": 0.8445, + "step": 11497 + }, + { + "epoch": 1.83, + "learning_rate": 3.657685239217072e-05, + "loss": 0.8671, + "step": 11498 + }, + { + "epoch": 1.83, + "learning_rate": 3.6574565716980875e-05, + "loss": 0.7325, + "step": 11499 + }, + { + "epoch": 1.83, + "learning_rate": 3.6572278918528934e-05, + "loss": 0.7848, + "step": 11500 + }, + { + "epoch": 1.83, + "learning_rate": 3.6569991996839255e-05, + "loss": 0.8598, + "step": 11501 + }, + { + "epoch": 1.83, + "learning_rate": 3.656770495193618e-05, + "loss": 0.7186, + "step": 11502 + }, + { + "epoch": 1.83, + "learning_rate": 3.6565417783844066e-05, + "loss": 0.8233, + "step": 11503 + }, + { + "epoch": 1.83, + "learning_rate": 3.656313049258729e-05, + "loss": 0.7776, + "step": 11504 + }, + { + "epoch": 1.83, + "learning_rate": 3.6560843078190175e-05, + "loss": 0.8523, + "step": 11505 + }, + { + "epoch": 1.83, + "learning_rate": 3.655855554067712e-05, + "loss": 0.7984, + "step": 11506 + }, + { + "epoch": 1.83, + "learning_rate": 3.655626788007245e-05, + "loss": 0.7986, + "step": 11507 + }, + { + "epoch": 1.83, + "learning_rate": 3.655398009640055e-05, + "loss": 0.8394, + "step": 11508 + }, + { + "epoch": 1.83, + "learning_rate": 3.6551692189685774e-05, + "loss": 0.8558, + "step": 11509 + }, + { + "epoch": 1.83, + "learning_rate": 3.6549404159952486e-05, + "loss": 0.823, + "step": 11510 + }, + { + "epoch": 1.83, + "learning_rate": 3.654711600722506e-05, + "loss": 0.7339, + "step": 11511 + }, + { + "epoch": 1.83, + "learning_rate": 3.6544827731527865e-05, + "loss": 0.8089, + "step": 11512 + }, + { + "epoch": 1.83, + "learning_rate": 3.6542539332885254e-05, + "loss": 0.7432, + "step": 11513 + }, + { + "epoch": 1.83, + "learning_rate": 3.654025081132161e-05, + "loss": 0.821, + "step": 11514 + }, + { + "epoch": 1.83, + "learning_rate": 3.653796216686131e-05, + "loss": 0.7996, + "step": 11515 + }, + { + "epoch": 1.83, + "learning_rate": 3.65356733995287e-05, + "loss": 0.7853, + "step": 11516 + }, + { + "epoch": 1.83, + "learning_rate": 3.653338450934819e-05, + "loss": 0.8141, + "step": 11517 + }, + { + "epoch": 1.83, + "learning_rate": 3.653109549634412e-05, + "loss": 0.7615, + "step": 11518 + }, + { + "epoch": 1.84, + "learning_rate": 3.6528806360540893e-05, + "loss": 0.783, + "step": 11519 + }, + { + "epoch": 1.84, + "learning_rate": 3.6526517101962875e-05, + "loss": 0.8219, + "step": 11520 + }, + { + "epoch": 1.84, + "learning_rate": 3.6524227720634455e-05, + "loss": 0.8112, + "step": 11521 + }, + { + "epoch": 1.84, + "learning_rate": 3.652193821658e-05, + "loss": 0.832, + "step": 11522 + }, + { + "epoch": 1.84, + "learning_rate": 3.65196485898239e-05, + "loss": 0.7572, + "step": 11523 + }, + { + "epoch": 1.84, + "learning_rate": 3.651735884039053e-05, + "loss": 0.8063, + "step": 11524 + }, + { + "epoch": 1.84, + "learning_rate": 3.6515068968304285e-05, + "loss": 0.8075, + "step": 11525 + }, + { + "epoch": 1.84, + "learning_rate": 3.651277897358955e-05, + "loss": 0.8435, + "step": 11526 + }, + { + "epoch": 1.84, + "learning_rate": 3.6510488856270705e-05, + "loss": 0.8464, + "step": 11527 + }, + { + "epoch": 1.84, + "learning_rate": 3.650819861637214e-05, + "loss": 0.828, + "step": 11528 + }, + { + "epoch": 1.84, + "learning_rate": 3.650590825391825e-05, + "loss": 0.7413, + "step": 11529 + }, + { + "epoch": 1.84, + "learning_rate": 3.650361776893343e-05, + "loss": 0.772, + "step": 11530 + }, + { + "epoch": 1.84, + "learning_rate": 3.650132716144204e-05, + "loss": 0.8655, + "step": 11531 + }, + { + "epoch": 1.84, + "learning_rate": 3.649903643146852e-05, + "loss": 0.7925, + "step": 11532 + }, + { + "epoch": 1.84, + "learning_rate": 3.649674557903724e-05, + "loss": 0.8744, + "step": 11533 + }, + { + "epoch": 1.84, + "learning_rate": 3.64944546041726e-05, + "loss": 0.8724, + "step": 11534 + }, + { + "epoch": 1.84, + "learning_rate": 3.649216350689899e-05, + "loss": 0.9269, + "step": 11535 + }, + { + "epoch": 1.84, + "learning_rate": 3.648987228724082e-05, + "loss": 0.8457, + "step": 11536 + }, + { + "epoch": 1.84, + "learning_rate": 3.648758094522249e-05, + "loss": 0.9351, + "step": 11537 + }, + { + "epoch": 1.84, + "learning_rate": 3.648528948086839e-05, + "loss": 0.8598, + "step": 11538 + }, + { + "epoch": 1.84, + "learning_rate": 3.648299789420293e-05, + "loss": 0.8149, + "step": 11539 + }, + { + "epoch": 1.84, + "learning_rate": 3.648070618525052e-05, + "loss": 0.9001, + "step": 11540 + }, + { + "epoch": 1.84, + "learning_rate": 3.647841435403556e-05, + "loss": 0.7965, + "step": 11541 + }, + { + "epoch": 1.84, + "learning_rate": 3.647612240058245e-05, + "loss": 0.7937, + "step": 11542 + }, + { + "epoch": 1.84, + "learning_rate": 3.64738303249156e-05, + "loss": 0.7369, + "step": 11543 + }, + { + "epoch": 1.84, + "learning_rate": 3.647153812705943e-05, + "loss": 0.7966, + "step": 11544 + }, + { + "epoch": 1.84, + "learning_rate": 3.646924580703834e-05, + "loss": 0.7522, + "step": 11545 + }, + { + "epoch": 1.84, + "learning_rate": 3.6466953364876755e-05, + "loss": 0.8171, + "step": 11546 + }, + { + "epoch": 1.84, + "learning_rate": 3.646466080059907e-05, + "loss": 0.8171, + "step": 11547 + }, + { + "epoch": 1.84, + "learning_rate": 3.646236811422971e-05, + "loss": 0.903, + "step": 11548 + }, + { + "epoch": 1.84, + "learning_rate": 3.646007530579308e-05, + "loss": 0.8978, + "step": 11549 + }, + { + "epoch": 1.84, + "learning_rate": 3.645778237531362e-05, + "loss": 0.7908, + "step": 11550 + }, + { + "epoch": 1.84, + "learning_rate": 3.6455489322815715e-05, + "loss": 0.8706, + "step": 11551 + }, + { + "epoch": 1.84, + "learning_rate": 3.645319614832382e-05, + "loss": 0.8734, + "step": 11552 + }, + { + "epoch": 1.84, + "learning_rate": 3.6450902851862335e-05, + "loss": 0.8096, + "step": 11553 + }, + { + "epoch": 1.84, + "learning_rate": 3.644860943345568e-05, + "loss": 0.7919, + "step": 11554 + }, + { + "epoch": 1.84, + "learning_rate": 3.6446315893128295e-05, + "loss": 0.7728, + "step": 11555 + }, + { + "epoch": 1.84, + "learning_rate": 3.6444022230904596e-05, + "loss": 0.822, + "step": 11556 + }, + { + "epoch": 1.84, + "learning_rate": 3.6441728446809e-05, + "loss": 0.7549, + "step": 11557 + }, + { + "epoch": 1.84, + "learning_rate": 3.6439434540865945e-05, + "loss": 0.7767, + "step": 11558 + }, + { + "epoch": 1.84, + "learning_rate": 3.643714051309986e-05, + "loss": 0.7513, + "step": 11559 + }, + { + "epoch": 1.84, + "learning_rate": 3.643484636353517e-05, + "loss": 0.8359, + "step": 11560 + }, + { + "epoch": 1.84, + "learning_rate": 3.643255209219631e-05, + "loss": 0.7652, + "step": 11561 + }, + { + "epoch": 1.84, + "learning_rate": 3.643025769910771e-05, + "loss": 0.7481, + "step": 11562 + }, + { + "epoch": 1.84, + "learning_rate": 3.64279631842938e-05, + "loss": 0.7862, + "step": 11563 + }, + { + "epoch": 1.84, + "learning_rate": 3.642566854777903e-05, + "loss": 0.7298, + "step": 11564 + }, + { + "epoch": 1.84, + "learning_rate": 3.6423373789587824e-05, + "loss": 0.7397, + "step": 11565 + }, + { + "epoch": 1.84, + "learning_rate": 3.642107890974461e-05, + "loss": 0.8328, + "step": 11566 + }, + { + "epoch": 1.84, + "learning_rate": 3.6418783908273854e-05, + "loss": 0.7709, + "step": 11567 + }, + { + "epoch": 1.84, + "learning_rate": 3.6416488785199976e-05, + "loss": 0.7736, + "step": 11568 + }, + { + "epoch": 1.84, + "learning_rate": 3.6414193540547427e-05, + "loss": 0.7986, + "step": 11569 + }, + { + "epoch": 1.84, + "learning_rate": 3.641189817434065e-05, + "loss": 0.8428, + "step": 11570 + }, + { + "epoch": 1.84, + "learning_rate": 3.640960268660407e-05, + "loss": 0.7946, + "step": 11571 + }, + { + "epoch": 1.84, + "learning_rate": 3.640730707736216e-05, + "loss": 0.7734, + "step": 11572 + }, + { + "epoch": 1.84, + "learning_rate": 3.640501134663936e-05, + "loss": 0.8381, + "step": 11573 + }, + { + "epoch": 1.84, + "learning_rate": 3.64027154944601e-05, + "loss": 0.7804, + "step": 11574 + }, + { + "epoch": 1.84, + "learning_rate": 3.640041952084885e-05, + "loss": 0.7789, + "step": 11575 + }, + { + "epoch": 1.84, + "learning_rate": 3.6398123425830055e-05, + "loss": 0.8337, + "step": 11576 + }, + { + "epoch": 1.84, + "learning_rate": 3.6395827209428166e-05, + "loss": 0.8588, + "step": 11577 + }, + { + "epoch": 1.84, + "learning_rate": 3.639353087166764e-05, + "loss": 0.8634, + "step": 11578 + }, + { + "epoch": 1.84, + "learning_rate": 3.6391234412572914e-05, + "loss": 0.6988, + "step": 11579 + }, + { + "epoch": 1.84, + "learning_rate": 3.638893783216846e-05, + "loss": 0.8023, + "step": 11580 + }, + { + "epoch": 1.84, + "learning_rate": 3.6386641130478745e-05, + "loss": 0.7878, + "step": 11581 + }, + { + "epoch": 1.85, + "learning_rate": 3.6384344307528204e-05, + "loss": 0.8868, + "step": 11582 + }, + { + "epoch": 1.85, + "learning_rate": 3.638204736334131e-05, + "loss": 0.7582, + "step": 11583 + }, + { + "epoch": 1.85, + "learning_rate": 3.637975029794252e-05, + "loss": 0.8612, + "step": 11584 + }, + { + "epoch": 1.85, + "learning_rate": 3.6377453111356305e-05, + "loss": 0.8304, + "step": 11585 + }, + { + "epoch": 1.85, + "learning_rate": 3.637515580360712e-05, + "loss": 0.8011, + "step": 11586 + }, + { + "epoch": 1.85, + "learning_rate": 3.6372858374719424e-05, + "loss": 0.7964, + "step": 11587 + }, + { + "epoch": 1.85, + "learning_rate": 3.637056082471769e-05, + "loss": 0.8007, + "step": 11588 + }, + { + "epoch": 1.85, + "learning_rate": 3.63682631536264e-05, + "loss": 0.7774, + "step": 11589 + }, + { + "epoch": 1.85, + "learning_rate": 3.636596536147e-05, + "loss": 0.7158, + "step": 11590 + }, + { + "epoch": 1.85, + "learning_rate": 3.6363667448272965e-05, + "loss": 0.7645, + "step": 11591 + }, + { + "epoch": 1.85, + "learning_rate": 3.636136941405978e-05, + "loss": 0.7912, + "step": 11592 + }, + { + "epoch": 1.85, + "learning_rate": 3.6359071258854905e-05, + "loss": 0.7945, + "step": 11593 + }, + { + "epoch": 1.85, + "learning_rate": 3.635677298268282e-05, + "loss": 0.7422, + "step": 11594 + }, + { + "epoch": 1.85, + "learning_rate": 3.635447458556799e-05, + "loss": 0.7973, + "step": 11595 + }, + { + "epoch": 1.85, + "learning_rate": 3.635217606753491e-05, + "loss": 0.682, + "step": 11596 + }, + { + "epoch": 1.85, + "learning_rate": 3.6349877428608036e-05, + "loss": 0.7768, + "step": 11597 + }, + { + "epoch": 1.85, + "learning_rate": 3.6347578668811866e-05, + "loss": 0.7737, + "step": 11598 + }, + { + "epoch": 1.85, + "learning_rate": 3.6345279788170874e-05, + "loss": 0.8134, + "step": 11599 + }, + { + "epoch": 1.85, + "learning_rate": 3.634298078670953e-05, + "loss": 0.7477, + "step": 11600 + }, + { + "epoch": 1.85, + "learning_rate": 3.6340681664452336e-05, + "loss": 0.9034, + "step": 11601 + }, + { + "epoch": 1.85, + "learning_rate": 3.633838242142377e-05, + "loss": 0.8317, + "step": 11602 + }, + { + "epoch": 1.85, + "learning_rate": 3.6336083057648296e-05, + "loss": 0.8327, + "step": 11603 + }, + { + "epoch": 1.85, + "learning_rate": 3.6333783573150435e-05, + "loss": 0.8045, + "step": 11604 + }, + { + "epoch": 1.85, + "learning_rate": 3.633148396795466e-05, + "loss": 0.8731, + "step": 11605 + }, + { + "epoch": 1.85, + "learning_rate": 3.632918424208546e-05, + "loss": 0.8875, + "step": 11606 + }, + { + "epoch": 1.85, + "learning_rate": 3.632688439556732e-05, + "loss": 0.7966, + "step": 11607 + }, + { + "epoch": 1.85, + "learning_rate": 3.632458442842475e-05, + "loss": 0.8471, + "step": 11608 + }, + { + "epoch": 1.85, + "learning_rate": 3.6322284340682214e-05, + "loss": 0.8354, + "step": 11609 + }, + { + "epoch": 1.85, + "learning_rate": 3.631998413236424e-05, + "loss": 0.817, + "step": 11610 + }, + { + "epoch": 1.85, + "learning_rate": 3.63176838034953e-05, + "loss": 0.7469, + "step": 11611 + }, + { + "epoch": 1.85, + "learning_rate": 3.631538335409989e-05, + "loss": 0.7727, + "step": 11612 + }, + { + "epoch": 1.85, + "learning_rate": 3.631308278420253e-05, + "loss": 0.8377, + "step": 11613 + }, + { + "epoch": 1.85, + "learning_rate": 3.63107820938277e-05, + "loss": 0.7675, + "step": 11614 + }, + { + "epoch": 1.85, + "learning_rate": 3.630848128299991e-05, + "loss": 0.8484, + "step": 11615 + }, + { + "epoch": 1.85, + "learning_rate": 3.6306180351743665e-05, + "loss": 0.7962, + "step": 11616 + }, + { + "epoch": 1.85, + "learning_rate": 3.630387930008346e-05, + "loss": 0.7607, + "step": 11617 + }, + { + "epoch": 1.85, + "learning_rate": 3.63015781280438e-05, + "loss": 0.7916, + "step": 11618 + }, + { + "epoch": 1.85, + "learning_rate": 3.62992768356492e-05, + "loss": 0.762, + "step": 11619 + }, + { + "epoch": 1.85, + "learning_rate": 3.6296975422924154e-05, + "loss": 0.8739, + "step": 11620 + }, + { + "epoch": 1.85, + "learning_rate": 3.6294673889893176e-05, + "loss": 0.8263, + "step": 11621 + }, + { + "epoch": 1.85, + "learning_rate": 3.629237223658078e-05, + "loss": 0.7976, + "step": 11622 + }, + { + "epoch": 1.85, + "learning_rate": 3.629007046301148e-05, + "loss": 0.8197, + "step": 11623 + }, + { + "epoch": 1.85, + "learning_rate": 3.628776856920979e-05, + "loss": 0.7893, + "step": 11624 + }, + { + "epoch": 1.85, + "learning_rate": 3.628546655520021e-05, + "loss": 0.8419, + "step": 11625 + }, + { + "epoch": 1.85, + "learning_rate": 3.628316442100726e-05, + "loss": 0.8303, + "step": 11626 + }, + { + "epoch": 1.85, + "learning_rate": 3.628086216665547e-05, + "loss": 0.8125, + "step": 11627 + }, + { + "epoch": 1.85, + "learning_rate": 3.627855979216934e-05, + "loss": 0.7587, + "step": 11628 + }, + { + "epoch": 1.85, + "learning_rate": 3.6276257297573394e-05, + "loss": 0.869, + "step": 11629 + }, + { + "epoch": 1.85, + "learning_rate": 3.627395468289215e-05, + "loss": 0.8857, + "step": 11630 + }, + { + "epoch": 1.85, + "learning_rate": 3.627165194815014e-05, + "loss": 0.7741, + "step": 11631 + }, + { + "epoch": 1.85, + "learning_rate": 3.626934909337188e-05, + "loss": 0.7863, + "step": 11632 + }, + { + "epoch": 1.85, + "learning_rate": 3.626704611858189e-05, + "loss": 0.8189, + "step": 11633 + }, + { + "epoch": 1.85, + "learning_rate": 3.6264743023804705e-05, + "loss": 0.7957, + "step": 11634 + }, + { + "epoch": 1.85, + "learning_rate": 3.626243980906484e-05, + "loss": 0.7744, + "step": 11635 + }, + { + "epoch": 1.85, + "learning_rate": 3.626013647438683e-05, + "loss": 0.79, + "step": 11636 + }, + { + "epoch": 1.85, + "learning_rate": 3.625783301979521e-05, + "loss": 0.8588, + "step": 11637 + }, + { + "epoch": 1.85, + "learning_rate": 3.62555294453145e-05, + "loss": 0.7958, + "step": 11638 + }, + { + "epoch": 1.85, + "learning_rate": 3.625322575096923e-05, + "loss": 0.8488, + "step": 11639 + }, + { + "epoch": 1.85, + "learning_rate": 3.6250921936783935e-05, + "loss": 0.8098, + "step": 11640 + }, + { + "epoch": 1.85, + "learning_rate": 3.6248618002783154e-05, + "loss": 0.7537, + "step": 11641 + }, + { + "epoch": 1.85, + "learning_rate": 3.624631394899142e-05, + "loss": 0.9054, + "step": 11642 + }, + { + "epoch": 1.85, + "learning_rate": 3.624400977543327e-05, + "loss": 0.7683, + "step": 11643 + }, + { + "epoch": 1.86, + "learning_rate": 3.624170548213325e-05, + "loss": 0.7979, + "step": 11644 + }, + { + "epoch": 1.86, + "learning_rate": 3.623940106911588e-05, + "loss": 0.7677, + "step": 11645 + }, + { + "epoch": 1.86, + "learning_rate": 3.6237096536405726e-05, + "loss": 0.815, + "step": 11646 + }, + { + "epoch": 1.86, + "learning_rate": 3.623479188402731e-05, + "loss": 0.8428, + "step": 11647 + }, + { + "epoch": 1.86, + "learning_rate": 3.6232487112005186e-05, + "loss": 0.8524, + "step": 11648 + }, + { + "epoch": 1.86, + "learning_rate": 3.623018222036388e-05, + "loss": 0.7869, + "step": 11649 + }, + { + "epoch": 1.86, + "learning_rate": 3.6227877209127965e-05, + "loss": 0.7822, + "step": 11650 + }, + { + "epoch": 1.86, + "learning_rate": 3.622557207832197e-05, + "loss": 0.7688, + "step": 11651 + }, + { + "epoch": 1.86, + "learning_rate": 3.622326682797044e-05, + "loss": 0.8128, + "step": 11652 + }, + { + "epoch": 1.86, + "learning_rate": 3.622096145809794e-05, + "loss": 0.7634, + "step": 11653 + }, + { + "epoch": 1.86, + "learning_rate": 3.6218655968729006e-05, + "loss": 0.8016, + "step": 11654 + }, + { + "epoch": 1.86, + "learning_rate": 3.621635035988821e-05, + "loss": 0.7602, + "step": 11655 + }, + { + "epoch": 1.86, + "learning_rate": 3.621404463160008e-05, + "loss": 0.7964, + "step": 11656 + }, + { + "epoch": 1.86, + "learning_rate": 3.621173878388919e-05, + "loss": 0.7835, + "step": 11657 + }, + { + "epoch": 1.86, + "learning_rate": 3.620943281678009e-05, + "loss": 0.8031, + "step": 11658 + }, + { + "epoch": 1.86, + "learning_rate": 3.620712673029733e-05, + "loss": 0.7785, + "step": 11659 + }, + { + "epoch": 1.86, + "learning_rate": 3.620482052446547e-05, + "loss": 0.964, + "step": 11660 + }, + { + "epoch": 1.86, + "learning_rate": 3.6202514199309085e-05, + "loss": 0.8215, + "step": 11661 + }, + { + "epoch": 1.86, + "learning_rate": 3.620020775485272e-05, + "loss": 0.8689, + "step": 11662 + }, + { + "epoch": 1.86, + "learning_rate": 3.619790119112094e-05, + "loss": 0.8444, + "step": 11663 + }, + { + "epoch": 1.86, + "learning_rate": 3.619559450813832e-05, + "loss": 0.7957, + "step": 11664 + }, + { + "epoch": 1.86, + "learning_rate": 3.6193287705929404e-05, + "loss": 0.7029, + "step": 11665 + }, + { + "epoch": 1.86, + "learning_rate": 3.619098078451877e-05, + "loss": 0.8645, + "step": 11666 + }, + { + "epoch": 1.86, + "learning_rate": 3.618867374393099e-05, + "loss": 0.7243, + "step": 11667 + }, + { + "epoch": 1.86, + "learning_rate": 3.618636658419063e-05, + "loss": 0.7398, + "step": 11668 + }, + { + "epoch": 1.86, + "learning_rate": 3.6184059305322254e-05, + "loss": 0.883, + "step": 11669 + }, + { + "epoch": 1.86, + "learning_rate": 3.6181751907350434e-05, + "loss": 0.8195, + "step": 11670 + }, + { + "epoch": 1.86, + "learning_rate": 3.617944439029975e-05, + "loss": 0.7744, + "step": 11671 + }, + { + "epoch": 1.86, + "learning_rate": 3.617713675419477e-05, + "loss": 0.8686, + "step": 11672 + }, + { + "epoch": 1.86, + "learning_rate": 3.617482899906007e-05, + "loss": 0.7489, + "step": 11673 + }, + { + "epoch": 1.86, + "learning_rate": 3.617252112492022e-05, + "loss": 0.8429, + "step": 11674 + }, + { + "epoch": 1.86, + "learning_rate": 3.61702131317998e-05, + "loss": 0.7672, + "step": 11675 + }, + { + "epoch": 1.86, + "learning_rate": 3.61679050197234e-05, + "loss": 0.9054, + "step": 11676 + }, + { + "epoch": 1.86, + "learning_rate": 3.616559678871559e-05, + "loss": 0.8165, + "step": 11677 + }, + { + "epoch": 1.86, + "learning_rate": 3.616328843880095e-05, + "loss": 0.833, + "step": 11678 + }, + { + "epoch": 1.86, + "learning_rate": 3.616097997000407e-05, + "loss": 0.7351, + "step": 11679 + }, + { + "epoch": 1.86, + "learning_rate": 3.6158671382349524e-05, + "loss": 0.8219, + "step": 11680 + }, + { + "epoch": 1.86, + "learning_rate": 3.615636267586191e-05, + "loss": 0.8793, + "step": 11681 + }, + { + "epoch": 1.86, + "learning_rate": 3.61540538505658e-05, + "loss": 0.8458, + "step": 11682 + }, + { + "epoch": 1.86, + "learning_rate": 3.61517449064858e-05, + "loss": 0.8268, + "step": 11683 + }, + { + "epoch": 1.86, + "learning_rate": 3.614943584364647e-05, + "loss": 0.8245, + "step": 11684 + }, + { + "epoch": 1.86, + "learning_rate": 3.614712666207242e-05, + "loss": 0.7439, + "step": 11685 + }, + { + "epoch": 1.86, + "learning_rate": 3.614481736178825e-05, + "loss": 0.7835, + "step": 11686 + }, + { + "epoch": 1.86, + "learning_rate": 3.614250794281853e-05, + "loss": 0.794, + "step": 11687 + }, + { + "epoch": 1.86, + "learning_rate": 3.614019840518788e-05, + "loss": 0.9076, + "step": 11688 + }, + { + "epoch": 1.86, + "learning_rate": 3.613788874892086e-05, + "loss": 0.8519, + "step": 11689 + }, + { + "epoch": 1.86, + "learning_rate": 3.61355789740421e-05, + "loss": 0.8579, + "step": 11690 + }, + { + "epoch": 1.86, + "learning_rate": 3.613326908057618e-05, + "loss": 0.742, + "step": 11691 + }, + { + "epoch": 1.86, + "learning_rate": 3.6130959068547705e-05, + "loss": 0.8472, + "step": 11692 + }, + { + "epoch": 1.86, + "learning_rate": 3.6128648937981277e-05, + "loss": 0.8544, + "step": 11693 + }, + { + "epoch": 1.86, + "learning_rate": 3.612633868890148e-05, + "loss": 0.7937, + "step": 11694 + }, + { + "epoch": 1.86, + "learning_rate": 3.6124028321332945e-05, + "loss": 0.7818, + "step": 11695 + }, + { + "epoch": 1.86, + "learning_rate": 3.6121717835300264e-05, + "loss": 0.8, + "step": 11696 + }, + { + "epoch": 1.86, + "learning_rate": 3.611940723082804e-05, + "loss": 0.7626, + "step": 11697 + }, + { + "epoch": 1.86, + "learning_rate": 3.611709650794087e-05, + "loss": 0.8117, + "step": 11698 + }, + { + "epoch": 1.86, + "learning_rate": 3.6114785666663374e-05, + "loss": 0.856, + "step": 11699 + }, + { + "epoch": 1.86, + "learning_rate": 3.611247470702016e-05, + "loss": 0.8333, + "step": 11700 + }, + { + "epoch": 1.86, + "learning_rate": 3.6110163629035836e-05, + "loss": 0.8027, + "step": 11701 + }, + { + "epoch": 1.86, + "learning_rate": 3.610785243273502e-05, + "loss": 0.7847, + "step": 11702 + }, + { + "epoch": 1.86, + "learning_rate": 3.610554111814232e-05, + "loss": 0.7759, + "step": 11703 + }, + { + "epoch": 1.86, + "learning_rate": 3.610322968528234e-05, + "loss": 0.8325, + "step": 11704 + }, + { + "epoch": 1.86, + "learning_rate": 3.61009181341797e-05, + "loss": 0.8116, + "step": 11705 + }, + { + "epoch": 1.86, + "learning_rate": 3.6098606464859044e-05, + "loss": 0.7674, + "step": 11706 + }, + { + "epoch": 1.87, + "learning_rate": 3.6096294677344945e-05, + "loss": 0.8011, + "step": 11707 + }, + { + "epoch": 1.87, + "learning_rate": 3.609398277166206e-05, + "loss": 0.7301, + "step": 11708 + }, + { + "epoch": 1.87, + "learning_rate": 3.6091670747834984e-05, + "loss": 0.8567, + "step": 11709 + }, + { + "epoch": 1.87, + "learning_rate": 3.608935860588836e-05, + "loss": 0.8114, + "step": 11710 + }, + { + "epoch": 1.87, + "learning_rate": 3.6087046345846786e-05, + "loss": 0.8373, + "step": 11711 + }, + { + "epoch": 1.87, + "learning_rate": 3.60847339677349e-05, + "loss": 0.8062, + "step": 11712 + }, + { + "epoch": 1.87, + "learning_rate": 3.608242147157733e-05, + "loss": 0.7931, + "step": 11713 + }, + { + "epoch": 1.87, + "learning_rate": 3.608010885739871e-05, + "loss": 0.7171, + "step": 11714 + }, + { + "epoch": 1.87, + "learning_rate": 3.607779612522365e-05, + "loss": 0.7812, + "step": 11715 + }, + { + "epoch": 1.87, + "learning_rate": 3.607548327507678e-05, + "loss": 0.7595, + "step": 11716 + }, + { + "epoch": 1.87, + "learning_rate": 3.607317030698275e-05, + "loss": 0.7898, + "step": 11717 + }, + { + "epoch": 1.87, + "learning_rate": 3.607085722096617e-05, + "loss": 0.7444, + "step": 11718 + }, + { + "epoch": 1.87, + "learning_rate": 3.6068544017051684e-05, + "loss": 0.8249, + "step": 11719 + }, + { + "epoch": 1.87, + "learning_rate": 3.606623069526393e-05, + "loss": 0.8006, + "step": 11720 + }, + { + "epoch": 1.87, + "learning_rate": 3.606391725562752e-05, + "loss": 0.7933, + "step": 11721 + }, + { + "epoch": 1.87, + "learning_rate": 3.6061603698167126e-05, + "loss": 0.8245, + "step": 11722 + }, + { + "epoch": 1.87, + "learning_rate": 3.6059290022907364e-05, + "loss": 0.7185, + "step": 11723 + }, + { + "epoch": 1.87, + "learning_rate": 3.605697622987287e-05, + "loss": 0.7306, + "step": 11724 + }, + { + "epoch": 1.87, + "learning_rate": 3.60546623190883e-05, + "loss": 0.8303, + "step": 11725 + }, + { + "epoch": 1.87, + "learning_rate": 3.605234829057828e-05, + "loss": 0.8152, + "step": 11726 + }, + { + "epoch": 1.87, + "learning_rate": 3.6050034144367464e-05, + "loss": 0.7686, + "step": 11727 + }, + { + "epoch": 1.87, + "learning_rate": 3.60477198804805e-05, + "loss": 0.8641, + "step": 11728 + }, + { + "epoch": 1.87, + "learning_rate": 3.604540549894202e-05, + "loss": 0.7916, + "step": 11729 + }, + { + "epoch": 1.87, + "learning_rate": 3.604309099977668e-05, + "loss": 0.7827, + "step": 11730 + }, + { + "epoch": 1.87, + "learning_rate": 3.6040776383009125e-05, + "loss": 0.7616, + "step": 11731 + }, + { + "epoch": 1.87, + "learning_rate": 3.6038461648664e-05, + "loss": 0.8284, + "step": 11732 + }, + { + "epoch": 1.87, + "learning_rate": 3.603614679676597e-05, + "loss": 0.8001, + "step": 11733 + }, + { + "epoch": 1.87, + "learning_rate": 3.603383182733967e-05, + "loss": 0.784, + "step": 11734 + }, + { + "epoch": 1.87, + "learning_rate": 3.6031516740409754e-05, + "loss": 0.7401, + "step": 11735 + }, + { + "epoch": 1.87, + "learning_rate": 3.602920153600089e-05, + "loss": 0.799, + "step": 11736 + }, + { + "epoch": 1.87, + "learning_rate": 3.602688621413773e-05, + "loss": 0.8532, + "step": 11737 + }, + { + "epoch": 1.87, + "learning_rate": 3.6024570774844915e-05, + "loss": 0.7275, + "step": 11738 + }, + { + "epoch": 1.87, + "learning_rate": 3.602225521814713e-05, + "loss": 0.7837, + "step": 11739 + }, + { + "epoch": 1.87, + "learning_rate": 3.6019939544069005e-05, + "loss": 0.823, + "step": 11740 + }, + { + "epoch": 1.87, + "learning_rate": 3.601762375263521e-05, + "loss": 0.7241, + "step": 11741 + }, + { + "epoch": 1.87, + "learning_rate": 3.6015307843870424e-05, + "loss": 0.8692, + "step": 11742 + }, + { + "epoch": 1.87, + "learning_rate": 3.601299181779929e-05, + "loss": 0.8075, + "step": 11743 + }, + { + "epoch": 1.87, + "learning_rate": 3.601067567444648e-05, + "loss": 0.9312, + "step": 11744 + }, + { + "epoch": 1.87, + "learning_rate": 3.600835941383666e-05, + "loss": 0.8952, + "step": 11745 + }, + { + "epoch": 1.87, + "learning_rate": 3.6006043035994494e-05, + "loss": 0.7707, + "step": 11746 + }, + { + "epoch": 1.87, + "learning_rate": 3.6003726540944656e-05, + "loss": 0.7316, + "step": 11747 + }, + { + "epoch": 1.87, + "learning_rate": 3.600140992871181e-05, + "loss": 0.6992, + "step": 11748 + }, + { + "epoch": 1.87, + "learning_rate": 3.599909319932063e-05, + "loss": 0.8344, + "step": 11749 + }, + { + "epoch": 1.87, + "learning_rate": 3.599677635279578e-05, + "loss": 0.8368, + "step": 11750 + }, + { + "epoch": 1.87, + "learning_rate": 3.599445938916194e-05, + "loss": 0.7643, + "step": 11751 + }, + { + "epoch": 1.87, + "learning_rate": 3.599214230844378e-05, + "loss": 0.8657, + "step": 11752 + }, + { + "epoch": 1.87, + "learning_rate": 3.598982511066597e-05, + "loss": 0.7877, + "step": 11753 + }, + { + "epoch": 1.87, + "learning_rate": 3.598750779585321e-05, + "loss": 0.8958, + "step": 11754 + }, + { + "epoch": 1.87, + "learning_rate": 3.598519036403015e-05, + "loss": 0.8012, + "step": 11755 + }, + { + "epoch": 1.87, + "learning_rate": 3.5982872815221494e-05, + "loss": 0.8131, + "step": 11756 + }, + { + "epoch": 1.87, + "learning_rate": 3.59805551494519e-05, + "loss": 0.737, + "step": 11757 + }, + { + "epoch": 1.87, + "learning_rate": 3.597823736674607e-05, + "loss": 0.7641, + "step": 11758 + }, + { + "epoch": 1.87, + "learning_rate": 3.597591946712867e-05, + "loss": 0.7842, + "step": 11759 + }, + { + "epoch": 1.87, + "learning_rate": 3.5973601450624394e-05, + "loss": 0.8737, + "step": 11760 + }, + { + "epoch": 1.87, + "learning_rate": 3.597128331725792e-05, + "loss": 0.9028, + "step": 11761 + }, + { + "epoch": 1.87, + "learning_rate": 3.5968965067053945e-05, + "loss": 0.8085, + "step": 11762 + }, + { + "epoch": 1.87, + "learning_rate": 3.596664670003716e-05, + "loss": 0.7989, + "step": 11763 + }, + { + "epoch": 1.87, + "learning_rate": 3.5964328216232225e-05, + "loss": 0.7702, + "step": 11764 + }, + { + "epoch": 1.87, + "learning_rate": 3.596200961566386e-05, + "loss": 0.7338, + "step": 11765 + }, + { + "epoch": 1.87, + "learning_rate": 3.5959690898356754e-05, + "loss": 0.8114, + "step": 11766 + }, + { + "epoch": 1.87, + "learning_rate": 3.595737206433558e-05, + "loss": 0.8551, + "step": 11767 + }, + { + "epoch": 1.87, + "learning_rate": 3.595505311362507e-05, + "loss": 0.8222, + "step": 11768 + }, + { + "epoch": 1.87, + "learning_rate": 3.5952734046249874e-05, + "loss": 0.8747, + "step": 11769 + }, + { + "epoch": 1.88, + "learning_rate": 3.5950414862234714e-05, + "loss": 0.7447, + "step": 11770 + }, + { + "epoch": 1.88, + "learning_rate": 3.5948095561604286e-05, + "loss": 0.7907, + "step": 11771 + }, + { + "epoch": 1.88, + "learning_rate": 3.594577614438329e-05, + "loss": 0.8569, + "step": 11772 + }, + { + "epoch": 1.88, + "learning_rate": 3.594345661059642e-05, + "loss": 0.8337, + "step": 11773 + }, + { + "epoch": 1.88, + "learning_rate": 3.5941136960268376e-05, + "loss": 0.8251, + "step": 11774 + }, + { + "epoch": 1.88, + "learning_rate": 3.593881719342388e-05, + "loss": 0.8143, + "step": 11775 + }, + { + "epoch": 1.88, + "learning_rate": 3.593649731008761e-05, + "loss": 0.8597, + "step": 11776 + }, + { + "epoch": 1.88, + "learning_rate": 3.5934177310284285e-05, + "loss": 0.8562, + "step": 11777 + }, + { + "epoch": 1.88, + "learning_rate": 3.5931857194038616e-05, + "loss": 0.8317, + "step": 11778 + }, + { + "epoch": 1.88, + "learning_rate": 3.59295369613753e-05, + "loss": 0.7865, + "step": 11779 + }, + { + "epoch": 1.88, + "learning_rate": 3.5927216612319055e-05, + "loss": 0.84, + "step": 11780 + }, + { + "epoch": 1.88, + "learning_rate": 3.5924896146894576e-05, + "loss": 0.8853, + "step": 11781 + }, + { + "epoch": 1.88, + "learning_rate": 3.592257556512659e-05, + "loss": 0.7846, + "step": 11782 + }, + { + "epoch": 1.88, + "learning_rate": 3.5920254867039814e-05, + "loss": 0.7879, + "step": 11783 + }, + { + "epoch": 1.88, + "learning_rate": 3.5917934052658944e-05, + "loss": 0.721, + "step": 11784 + }, + { + "epoch": 1.88, + "learning_rate": 3.5915613122008714e-05, + "loss": 0.8274, + "step": 11785 + }, + { + "epoch": 1.88, + "learning_rate": 3.591329207511382e-05, + "loss": 0.8405, + "step": 11786 + }, + { + "epoch": 1.88, + "learning_rate": 3.5910970911999e-05, + "loss": 0.7527, + "step": 11787 + }, + { + "epoch": 1.88, + "learning_rate": 3.590864963268897e-05, + "loss": 0.8758, + "step": 11788 + }, + { + "epoch": 1.88, + "learning_rate": 3.5906328237208435e-05, + "loss": 0.8041, + "step": 11789 + }, + { + "epoch": 1.88, + "learning_rate": 3.590400672558212e-05, + "loss": 0.7831, + "step": 11790 + }, + { + "epoch": 1.88, + "learning_rate": 3.5901685097834766e-05, + "loss": 0.857, + "step": 11791 + }, + { + "epoch": 1.88, + "learning_rate": 3.589936335399108e-05, + "loss": 0.8145, + "step": 11792 + }, + { + "epoch": 1.88, + "learning_rate": 3.5897041494075786e-05, + "loss": 0.7741, + "step": 11793 + }, + { + "epoch": 1.88, + "learning_rate": 3.589471951811362e-05, + "loss": 0.733, + "step": 11794 + }, + { + "epoch": 1.88, + "learning_rate": 3.5892397426129307e-05, + "loss": 0.7806, + "step": 11795 + }, + { + "epoch": 1.88, + "learning_rate": 3.5890075218147564e-05, + "loss": 0.7449, + "step": 11796 + }, + { + "epoch": 1.88, + "learning_rate": 3.588775289419314e-05, + "loss": 0.8556, + "step": 11797 + }, + { + "epoch": 1.88, + "learning_rate": 3.5885430454290755e-05, + "loss": 0.7753, + "step": 11798 + }, + { + "epoch": 1.88, + "learning_rate": 3.588310789846515e-05, + "loss": 0.7641, + "step": 11799 + }, + { + "epoch": 1.88, + "learning_rate": 3.588078522674105e-05, + "loss": 0.709, + "step": 11800 + }, + { + "epoch": 1.88, + "learning_rate": 3.587846243914319e-05, + "loss": 0.8198, + "step": 11801 + }, + { + "epoch": 1.88, + "learning_rate": 3.587613953569632e-05, + "loss": 0.7867, + "step": 11802 + }, + { + "epoch": 1.88, + "learning_rate": 3.587381651642516e-05, + "loss": 0.767, + "step": 11803 + }, + { + "epoch": 1.88, + "learning_rate": 3.587149338135445e-05, + "loss": 0.8535, + "step": 11804 + }, + { + "epoch": 1.88, + "learning_rate": 3.5869170130508936e-05, + "loss": 0.7781, + "step": 11805 + }, + { + "epoch": 1.88, + "learning_rate": 3.586684676391337e-05, + "loss": 0.7784, + "step": 11806 + }, + { + "epoch": 1.88, + "learning_rate": 3.5864523281592476e-05, + "loss": 0.7715, + "step": 11807 + }, + { + "epoch": 1.88, + "learning_rate": 3.5862199683571005e-05, + "loss": 0.8053, + "step": 11808 + }, + { + "epoch": 1.88, + "learning_rate": 3.5859875969873714e-05, + "loss": 0.7228, + "step": 11809 + }, + { + "epoch": 1.88, + "learning_rate": 3.5857552140525316e-05, + "loss": 0.7699, + "step": 11810 + }, + { + "epoch": 1.88, + "learning_rate": 3.585522819555059e-05, + "loss": 0.946, + "step": 11811 + }, + { + "epoch": 1.88, + "learning_rate": 3.585290413497428e-05, + "loss": 0.7292, + "step": 11812 + }, + { + "epoch": 1.88, + "learning_rate": 3.585057995882112e-05, + "loss": 0.8243, + "step": 11813 + }, + { + "epoch": 1.88, + "learning_rate": 3.5848255667115875e-05, + "loss": 0.7882, + "step": 11814 + }, + { + "epoch": 1.88, + "learning_rate": 3.5845931259883295e-05, + "loss": 0.7183, + "step": 11815 + }, + { + "epoch": 1.88, + "learning_rate": 3.584360673714813e-05, + "loss": 0.785, + "step": 11816 + }, + { + "epoch": 1.88, + "learning_rate": 3.584128209893514e-05, + "loss": 0.7898, + "step": 11817 + }, + { + "epoch": 1.88, + "learning_rate": 3.583895734526907e-05, + "loss": 0.7814, + "step": 11818 + }, + { + "epoch": 1.88, + "learning_rate": 3.583663247617469e-05, + "loss": 0.7451, + "step": 11819 + }, + { + "epoch": 1.88, + "learning_rate": 3.583430749167676e-05, + "loss": 0.7118, + "step": 11820 + }, + { + "epoch": 1.88, + "learning_rate": 3.5831982391800025e-05, + "loss": 0.8598, + "step": 11821 + }, + { + "epoch": 1.88, + "learning_rate": 3.5829657176569245e-05, + "loss": 0.7615, + "step": 11822 + }, + { + "epoch": 1.88, + "learning_rate": 3.58273318460092e-05, + "loss": 0.7969, + "step": 11823 + }, + { + "epoch": 1.88, + "learning_rate": 3.5825006400144646e-05, + "loss": 0.8168, + "step": 11824 + }, + { + "epoch": 1.88, + "learning_rate": 3.582268083900034e-05, + "loss": 0.8062, + "step": 11825 + }, + { + "epoch": 1.88, + "learning_rate": 3.582035516260106e-05, + "loss": 0.8063, + "step": 11826 + }, + { + "epoch": 1.88, + "learning_rate": 3.5818029370971565e-05, + "loss": 0.8187, + "step": 11827 + }, + { + "epoch": 1.88, + "learning_rate": 3.581570346413662e-05, + "loss": 0.7588, + "step": 11828 + }, + { + "epoch": 1.88, + "learning_rate": 3.581337744212101e-05, + "loss": 0.8753, + "step": 11829 + }, + { + "epoch": 1.88, + "learning_rate": 3.5811051304949485e-05, + "loss": 0.8389, + "step": 11830 + }, + { + "epoch": 1.88, + "learning_rate": 3.580872505264683e-05, + "loss": 0.7507, + "step": 11831 + }, + { + "epoch": 1.88, + "learning_rate": 3.580639868523782e-05, + "loss": 0.8544, + "step": 11832 + }, + { + "epoch": 1.89, + "learning_rate": 3.580407220274722e-05, + "loss": 0.7616, + "step": 11833 + }, + { + "epoch": 1.89, + "learning_rate": 3.580174560519981e-05, + "loss": 0.8925, + "step": 11834 + }, + { + "epoch": 1.89, + "learning_rate": 3.579941889262037e-05, + "loss": 0.7581, + "step": 11835 + }, + { + "epoch": 1.89, + "learning_rate": 3.579709206503367e-05, + "loss": 0.8467, + "step": 11836 + }, + { + "epoch": 1.89, + "learning_rate": 3.57947651224645e-05, + "loss": 0.7968, + "step": 11837 + }, + { + "epoch": 1.89, + "learning_rate": 3.579243806493764e-05, + "loss": 0.8374, + "step": 11838 + }, + { + "epoch": 1.89, + "learning_rate": 3.579011089247786e-05, + "loss": 0.8136, + "step": 11839 + }, + { + "epoch": 1.89, + "learning_rate": 3.5787783605109946e-05, + "loss": 0.7994, + "step": 11840 + }, + { + "epoch": 1.89, + "learning_rate": 3.578545620285869e-05, + "loss": 0.7559, + "step": 11841 + }, + { + "epoch": 1.89, + "learning_rate": 3.578312868574887e-05, + "loss": 0.7511, + "step": 11842 + }, + { + "epoch": 1.89, + "learning_rate": 3.5780801053805285e-05, + "loss": 0.836, + "step": 11843 + }, + { + "epoch": 1.89, + "learning_rate": 3.577847330705271e-05, + "loss": 0.8757, + "step": 11844 + }, + { + "epoch": 1.89, + "learning_rate": 3.577614544551593e-05, + "loss": 0.7943, + "step": 11845 + }, + { + "epoch": 1.89, + "learning_rate": 3.5773817469219755e-05, + "loss": 0.8461, + "step": 11846 + }, + { + "epoch": 1.89, + "learning_rate": 3.577148937818896e-05, + "loss": 0.7183, + "step": 11847 + }, + { + "epoch": 1.89, + "learning_rate": 3.576916117244834e-05, + "loss": 0.8103, + "step": 11848 + }, + { + "epoch": 1.89, + "learning_rate": 3.57668328520227e-05, + "loss": 0.7752, + "step": 11849 + }, + { + "epoch": 1.89, + "learning_rate": 3.576450441693682e-05, + "loss": 0.8162, + "step": 11850 + }, + { + "epoch": 1.89, + "learning_rate": 3.5762175867215506e-05, + "loss": 0.811, + "step": 11851 + }, + { + "epoch": 1.89, + "learning_rate": 3.575984720288355e-05, + "loss": 0.7583, + "step": 11852 + }, + { + "epoch": 1.89, + "learning_rate": 3.5757518423965754e-05, + "loss": 0.8245, + "step": 11853 + }, + { + "epoch": 1.89, + "learning_rate": 3.575518953048692e-05, + "loss": 0.832, + "step": 11854 + }, + { + "epoch": 1.89, + "learning_rate": 3.575286052247185e-05, + "loss": 0.759, + "step": 11855 + }, + { + "epoch": 1.89, + "learning_rate": 3.5750531399945336e-05, + "loss": 0.8464, + "step": 11856 + }, + { + "epoch": 1.89, + "learning_rate": 3.574820216293219e-05, + "loss": 0.8092, + "step": 11857 + }, + { + "epoch": 1.89, + "learning_rate": 3.574587281145722e-05, + "loss": 0.7786, + "step": 11858 + }, + { + "epoch": 1.89, + "learning_rate": 3.574354334554523e-05, + "loss": 0.8326, + "step": 11859 + }, + { + "epoch": 1.89, + "learning_rate": 3.574121376522103e-05, + "loss": 0.8235, + "step": 11860 + }, + { + "epoch": 1.89, + "learning_rate": 3.5738884070509415e-05, + "loss": 0.8447, + "step": 11861 + }, + { + "epoch": 1.89, + "learning_rate": 3.573655426143521e-05, + "loss": 0.7353, + "step": 11862 + }, + { + "epoch": 1.89, + "learning_rate": 3.5734224338023215e-05, + "loss": 0.7908, + "step": 11863 + }, + { + "epoch": 1.89, + "learning_rate": 3.573189430029825e-05, + "loss": 0.8372, + "step": 11864 + }, + { + "epoch": 1.89, + "learning_rate": 3.5729564148285124e-05, + "loss": 0.8303, + "step": 11865 + }, + { + "epoch": 1.89, + "learning_rate": 3.572723388200866e-05, + "loss": 0.7471, + "step": 11866 + }, + { + "epoch": 1.89, + "learning_rate": 3.5724903501493665e-05, + "loss": 0.7307, + "step": 11867 + }, + { + "epoch": 1.89, + "learning_rate": 3.5722573006764956e-05, + "loss": 0.7851, + "step": 11868 + }, + { + "epoch": 1.89, + "learning_rate": 3.572024239784736e-05, + "loss": 0.8316, + "step": 11869 + }, + { + "epoch": 1.89, + "learning_rate": 3.571791167476569e-05, + "loss": 0.8163, + "step": 11870 + }, + { + "epoch": 1.89, + "learning_rate": 3.571558083754477e-05, + "loss": 0.784, + "step": 11871 + }, + { + "epoch": 1.89, + "learning_rate": 3.571324988620941e-05, + "loss": 0.8234, + "step": 11872 + }, + { + "epoch": 1.89, + "learning_rate": 3.571091882078445e-05, + "loss": 0.7682, + "step": 11873 + }, + { + "epoch": 1.89, + "learning_rate": 3.5708587641294706e-05, + "loss": 0.8527, + "step": 11874 + }, + { + "epoch": 1.89, + "learning_rate": 3.5706256347765e-05, + "loss": 0.8011, + "step": 11875 + }, + { + "epoch": 1.89, + "learning_rate": 3.570392494022017e-05, + "loss": 0.7192, + "step": 11876 + }, + { + "epoch": 1.89, + "learning_rate": 3.5701593418685036e-05, + "loss": 0.7795, + "step": 11877 + }, + { + "epoch": 1.89, + "learning_rate": 3.5699261783184436e-05, + "loss": 0.7449, + "step": 11878 + }, + { + "epoch": 1.89, + "learning_rate": 3.569693003374318e-05, + "loss": 0.8569, + "step": 11879 + }, + { + "epoch": 1.89, + "learning_rate": 3.569459817038613e-05, + "loss": 0.7241, + "step": 11880 + }, + { + "epoch": 1.89, + "learning_rate": 3.569226619313809e-05, + "loss": 0.9166, + "step": 11881 + }, + { + "epoch": 1.89, + "learning_rate": 3.5689934102023904e-05, + "loss": 0.7739, + "step": 11882 + }, + { + "epoch": 1.89, + "learning_rate": 3.568760189706842e-05, + "loss": 0.865, + "step": 11883 + }, + { + "epoch": 1.89, + "learning_rate": 3.568526957829646e-05, + "loss": 0.8005, + "step": 11884 + }, + { + "epoch": 1.89, + "learning_rate": 3.5682937145732864e-05, + "loss": 0.7998, + "step": 11885 + }, + { + "epoch": 1.89, + "learning_rate": 3.5680604599402475e-05, + "loss": 0.708, + "step": 11886 + }, + { + "epoch": 1.89, + "learning_rate": 3.567827193933013e-05, + "loss": 0.7767, + "step": 11887 + }, + { + "epoch": 1.89, + "learning_rate": 3.567593916554067e-05, + "loss": 0.8061, + "step": 11888 + }, + { + "epoch": 1.89, + "learning_rate": 3.5673606278058955e-05, + "loss": 0.7489, + "step": 11889 + }, + { + "epoch": 1.89, + "learning_rate": 3.56712732769098e-05, + "loss": 0.7689, + "step": 11890 + }, + { + "epoch": 1.89, + "learning_rate": 3.566894016211806e-05, + "loss": 0.7526, + "step": 11891 + }, + { + "epoch": 1.89, + "learning_rate": 3.5666606933708594e-05, + "loss": 0.792, + "step": 11892 + }, + { + "epoch": 1.89, + "learning_rate": 3.566427359170623e-05, + "loss": 0.8045, + "step": 11893 + }, + { + "epoch": 1.89, + "learning_rate": 3.566194013613583e-05, + "loss": 0.7388, + "step": 11894 + }, + { + "epoch": 1.9, + "learning_rate": 3.565960656702224e-05, + "loss": 0.7944, + "step": 11895 + }, + { + "epoch": 1.9, + "learning_rate": 3.565727288439031e-05, + "loss": 0.7382, + "step": 11896 + }, + { + "epoch": 1.9, + "learning_rate": 3.56549390882649e-05, + "loss": 0.7938, + "step": 11897 + }, + { + "epoch": 1.9, + "learning_rate": 3.565260517867086e-05, + "loss": 0.812, + "step": 11898 + }, + { + "epoch": 1.9, + "learning_rate": 3.565027115563303e-05, + "loss": 0.795, + "step": 11899 + }, + { + "epoch": 1.9, + "learning_rate": 3.564793701917629e-05, + "loss": 0.7071, + "step": 11900 + }, + { + "epoch": 1.9, + "learning_rate": 3.564560276932548e-05, + "loss": 0.8637, + "step": 11901 + }, + { + "epoch": 1.9, + "learning_rate": 3.5643268406105465e-05, + "loss": 0.9363, + "step": 11902 + }, + { + "epoch": 1.9, + "learning_rate": 3.5640933929541095e-05, + "loss": 0.8124, + "step": 11903 + }, + { + "epoch": 1.9, + "learning_rate": 3.5638599339657256e-05, + "loss": 0.7686, + "step": 11904 + }, + { + "epoch": 1.9, + "learning_rate": 3.5636264636478776e-05, + "loss": 0.757, + "step": 11905 + }, + { + "epoch": 1.9, + "learning_rate": 3.5633929820030544e-05, + "loss": 0.8138, + "step": 11906 + }, + { + "epoch": 1.9, + "learning_rate": 3.5631594890337415e-05, + "loss": 0.8054, + "step": 11907 + }, + { + "epoch": 1.9, + "learning_rate": 3.562925984742426e-05, + "loss": 0.7142, + "step": 11908 + }, + { + "epoch": 1.9, + "learning_rate": 3.562692469131593e-05, + "loss": 0.7399, + "step": 11909 + }, + { + "epoch": 1.9, + "learning_rate": 3.562458942203731e-05, + "loss": 0.734, + "step": 11910 + }, + { + "epoch": 1.9, + "learning_rate": 3.562225403961326e-05, + "loss": 0.8512, + "step": 11911 + }, + { + "epoch": 1.9, + "learning_rate": 3.561991854406865e-05, + "loss": 0.9115, + "step": 11912 + }, + { + "epoch": 1.9, + "learning_rate": 3.561758293542837e-05, + "loss": 0.7987, + "step": 11913 + }, + { + "epoch": 1.9, + "learning_rate": 3.561524721371726e-05, + "loss": 0.7986, + "step": 11914 + }, + { + "epoch": 1.9, + "learning_rate": 3.5612911378960226e-05, + "loss": 0.8283, + "step": 11915 + }, + { + "epoch": 1.9, + "learning_rate": 3.561057543118212e-05, + "loss": 0.8165, + "step": 11916 + }, + { + "epoch": 1.9, + "learning_rate": 3.560823937040783e-05, + "loss": 0.8341, + "step": 11917 + }, + { + "epoch": 1.9, + "learning_rate": 3.5605903196662225e-05, + "loss": 0.7975, + "step": 11918 + }, + { + "epoch": 1.9, + "learning_rate": 3.56035669099702e-05, + "loss": 0.9032, + "step": 11919 + }, + { + "epoch": 1.9, + "learning_rate": 3.560123051035662e-05, + "loss": 0.7905, + "step": 11920 + }, + { + "epoch": 1.9, + "learning_rate": 3.5598893997846374e-05, + "loss": 0.7935, + "step": 11921 + }, + { + "epoch": 1.9, + "learning_rate": 3.559655737246435e-05, + "loss": 0.8025, + "step": 11922 + }, + { + "epoch": 1.9, + "learning_rate": 3.55942206342354e-05, + "loss": 0.8352, + "step": 11923 + }, + { + "epoch": 1.9, + "learning_rate": 3.559188378318445e-05, + "loss": 0.8204, + "step": 11924 + }, + { + "epoch": 1.9, + "learning_rate": 3.558954681933636e-05, + "loss": 0.7647, + "step": 11925 + }, + { + "epoch": 1.9, + "learning_rate": 3.5587209742716024e-05, + "loss": 0.7603, + "step": 11926 + }, + { + "epoch": 1.9, + "learning_rate": 3.5584872553348344e-05, + "loss": 0.7945, + "step": 11927 + }, + { + "epoch": 1.9, + "learning_rate": 3.558253525125819e-05, + "loss": 0.7949, + "step": 11928 + }, + { + "epoch": 1.9, + "learning_rate": 3.558019783647046e-05, + "loss": 0.7839, + "step": 11929 + }, + { + "epoch": 1.9, + "learning_rate": 3.5577860309010055e-05, + "loss": 0.7842, + "step": 11930 + }, + { + "epoch": 1.9, + "learning_rate": 3.557552266890184e-05, + "loss": 0.8044, + "step": 11931 + }, + { + "epoch": 1.9, + "learning_rate": 3.557318491617074e-05, + "loss": 0.842, + "step": 11932 + }, + { + "epoch": 1.9, + "learning_rate": 3.557084705084165e-05, + "loss": 0.7783, + "step": 11933 + }, + { + "epoch": 1.9, + "learning_rate": 3.5568509072939445e-05, + "loss": 0.7813, + "step": 11934 + }, + { + "epoch": 1.9, + "learning_rate": 3.5566170982489036e-05, + "loss": 0.9454, + "step": 11935 + }, + { + "epoch": 1.9, + "learning_rate": 3.556383277951532e-05, + "loss": 0.8377, + "step": 11936 + }, + { + "epoch": 1.9, + "learning_rate": 3.55614944640432e-05, + "loss": 0.8567, + "step": 11937 + }, + { + "epoch": 1.9, + "learning_rate": 3.555915603609758e-05, + "loss": 0.8328, + "step": 11938 + }, + { + "epoch": 1.9, + "learning_rate": 3.555681749570336e-05, + "loss": 0.8949, + "step": 11939 + }, + { + "epoch": 1.9, + "learning_rate": 3.555447884288543e-05, + "loss": 0.79, + "step": 11940 + }, + { + "epoch": 1.9, + "learning_rate": 3.5552140077668724e-05, + "loss": 0.8588, + "step": 11941 + }, + { + "epoch": 1.9, + "learning_rate": 3.5549801200078126e-05, + "loss": 0.8313, + "step": 11942 + }, + { + "epoch": 1.9, + "learning_rate": 3.554746221013854e-05, + "loss": 0.7533, + "step": 11943 + }, + { + "epoch": 1.9, + "learning_rate": 3.55451231078749e-05, + "loss": 0.7622, + "step": 11944 + }, + { + "epoch": 1.9, + "learning_rate": 3.554278389331209e-05, + "loss": 0.7618, + "step": 11945 + }, + { + "epoch": 1.9, + "learning_rate": 3.554044456647504e-05, + "loss": 0.9041, + "step": 11946 + }, + { + "epoch": 1.9, + "learning_rate": 3.553810512738865e-05, + "loss": 0.756, + "step": 11947 + }, + { + "epoch": 1.9, + "learning_rate": 3.553576557607784e-05, + "loss": 0.7915, + "step": 11948 + }, + { + "epoch": 1.9, + "learning_rate": 3.5533425912567516e-05, + "loss": 0.7944, + "step": 11949 + }, + { + "epoch": 1.9, + "learning_rate": 3.5531086136882616e-05, + "loss": 0.8289, + "step": 11950 + }, + { + "epoch": 1.9, + "learning_rate": 3.552874624904803e-05, + "loss": 0.7857, + "step": 11951 + }, + { + "epoch": 1.9, + "learning_rate": 3.5526406249088696e-05, + "loss": 0.8315, + "step": 11952 + }, + { + "epoch": 1.9, + "learning_rate": 3.552406613702953e-05, + "loss": 0.7804, + "step": 11953 + }, + { + "epoch": 1.9, + "learning_rate": 3.5521725912895435e-05, + "loss": 0.8169, + "step": 11954 + }, + { + "epoch": 1.9, + "learning_rate": 3.551938557671136e-05, + "loss": 0.8405, + "step": 11955 + }, + { + "epoch": 1.9, + "learning_rate": 3.5517045128502205e-05, + "loss": 0.8705, + "step": 11956 + }, + { + "epoch": 1.9, + "learning_rate": 3.551470456829291e-05, + "loss": 0.8439, + "step": 11957 + }, + { + "epoch": 1.91, + "learning_rate": 3.551236389610839e-05, + "loss": 0.8027, + "step": 11958 + }, + { + "epoch": 1.91, + "learning_rate": 3.551002311197358e-05, + "loss": 0.7916, + "step": 11959 + }, + { + "epoch": 1.91, + "learning_rate": 3.5507682215913404e-05, + "loss": 0.7762, + "step": 11960 + }, + { + "epoch": 1.91, + "learning_rate": 3.5505341207952795e-05, + "loss": 0.8485, + "step": 11961 + }, + { + "epoch": 1.91, + "learning_rate": 3.550300008811668e-05, + "loss": 0.8586, + "step": 11962 + }, + { + "epoch": 1.91, + "learning_rate": 3.550065885642999e-05, + "loss": 0.7983, + "step": 11963 + }, + { + "epoch": 1.91, + "learning_rate": 3.5498317512917655e-05, + "loss": 0.823, + "step": 11964 + }, + { + "epoch": 1.91, + "learning_rate": 3.549597605760461e-05, + "loss": 0.7982, + "step": 11965 + }, + { + "epoch": 1.91, + "learning_rate": 3.549363449051579e-05, + "loss": 0.8954, + "step": 11966 + }, + { + "epoch": 1.91, + "learning_rate": 3.549129281167615e-05, + "loss": 0.8164, + "step": 11967 + }, + { + "epoch": 1.91, + "learning_rate": 3.548895102111059e-05, + "loss": 0.8189, + "step": 11968 + }, + { + "epoch": 1.91, + "learning_rate": 3.548660911884408e-05, + "loss": 0.8706, + "step": 11969 + }, + { + "epoch": 1.91, + "learning_rate": 3.548426710490156e-05, + "loss": 0.8418, + "step": 11970 + }, + { + "epoch": 1.91, + "learning_rate": 3.5481924979307946e-05, + "loss": 0.7489, + "step": 11971 + }, + { + "epoch": 1.91, + "learning_rate": 3.5479582742088203e-05, + "loss": 0.7804, + "step": 11972 + }, + { + "epoch": 1.91, + "learning_rate": 3.547724039326726e-05, + "loss": 0.8503, + "step": 11973 + }, + { + "epoch": 1.91, + "learning_rate": 3.5474897932870073e-05, + "loss": 0.805, + "step": 11974 + }, + { + "epoch": 1.91, + "learning_rate": 3.547255536092157e-05, + "loss": 0.8135, + "step": 11975 + }, + { + "epoch": 1.91, + "learning_rate": 3.547021267744672e-05, + "loss": 0.8721, + "step": 11976 + }, + { + "epoch": 1.91, + "learning_rate": 3.546786988247047e-05, + "loss": 0.7782, + "step": 11977 + }, + { + "epoch": 1.91, + "learning_rate": 3.546552697601774e-05, + "loss": 0.7531, + "step": 11978 + }, + { + "epoch": 1.91, + "learning_rate": 3.546318395811352e-05, + "loss": 0.8486, + "step": 11979 + }, + { + "epoch": 1.91, + "learning_rate": 3.5460840828782735e-05, + "loss": 0.8702, + "step": 11980 + }, + { + "epoch": 1.91, + "learning_rate": 3.545849758805035e-05, + "loss": 0.8283, + "step": 11981 + }, + { + "epoch": 1.91, + "learning_rate": 3.545615423594132e-05, + "loss": 0.8681, + "step": 11982 + }, + { + "epoch": 1.91, + "learning_rate": 3.545381077248058e-05, + "loss": 0.8212, + "step": 11983 + }, + { + "epoch": 1.91, + "learning_rate": 3.545146719769311e-05, + "loss": 0.7629, + "step": 11984 + }, + { + "epoch": 1.91, + "learning_rate": 3.5449123511603864e-05, + "loss": 0.7592, + "step": 11985 + }, + { + "epoch": 1.91, + "learning_rate": 3.544677971423779e-05, + "loss": 0.8512, + "step": 11986 + }, + { + "epoch": 1.91, + "learning_rate": 3.544443580561986e-05, + "loss": 0.8471, + "step": 11987 + }, + { + "epoch": 1.91, + "learning_rate": 3.544209178577502e-05, + "loss": 0.7734, + "step": 11988 + }, + { + "epoch": 1.91, + "learning_rate": 3.543974765472825e-05, + "loss": 0.8477, + "step": 11989 + }, + { + "epoch": 1.91, + "learning_rate": 3.543740341250451e-05, + "loss": 0.7421, + "step": 11990 + }, + { + "epoch": 1.91, + "learning_rate": 3.543505905912876e-05, + "loss": 0.8305, + "step": 11991 + }, + { + "epoch": 1.91, + "learning_rate": 3.543271459462595e-05, + "loss": 0.7692, + "step": 11992 + }, + { + "epoch": 1.91, + "learning_rate": 3.543037001902108e-05, + "loss": 0.8095, + "step": 11993 + }, + { + "epoch": 1.91, + "learning_rate": 3.542802533233909e-05, + "loss": 0.8445, + "step": 11994 + }, + { + "epoch": 1.91, + "learning_rate": 3.542568053460497e-05, + "loss": 0.8001, + "step": 11995 + }, + { + "epoch": 1.91, + "learning_rate": 3.542333562584368e-05, + "loss": 0.7223, + "step": 11996 + }, + { + "epoch": 1.91, + "learning_rate": 3.542099060608019e-05, + "loss": 0.7959, + "step": 11997 + }, + { + "epoch": 1.91, + "learning_rate": 3.541864547533948e-05, + "loss": 0.6952, + "step": 11998 + }, + { + "epoch": 1.91, + "learning_rate": 3.541630023364652e-05, + "loss": 0.75, + "step": 11999 + }, + { + "epoch": 1.91, + "learning_rate": 3.541395488102629e-05, + "loss": 0.7457, + "step": 12000 + }, + { + "epoch": 1.91, + "learning_rate": 3.541160941750376e-05, + "loss": 0.7949, + "step": 12001 + }, + { + "epoch": 1.91, + "learning_rate": 3.540926384310391e-05, + "loss": 0.8262, + "step": 12002 + }, + { + "epoch": 1.91, + "learning_rate": 3.540691815785172e-05, + "loss": 0.8669, + "step": 12003 + }, + { + "epoch": 1.91, + "learning_rate": 3.540457236177217e-05, + "loss": 0.766, + "step": 12004 + }, + { + "epoch": 1.91, + "learning_rate": 3.5402226454890244e-05, + "loss": 0.7607, + "step": 12005 + }, + { + "epoch": 1.91, + "learning_rate": 3.539988043723092e-05, + "loss": 0.8129, + "step": 12006 + }, + { + "epoch": 1.91, + "learning_rate": 3.5397534308819174e-05, + "loss": 0.8623, + "step": 12007 + }, + { + "epoch": 1.91, + "learning_rate": 3.539518806968001e-05, + "loss": 0.7798, + "step": 12008 + }, + { + "epoch": 1.91, + "learning_rate": 3.53928417198384e-05, + "loss": 0.8293, + "step": 12009 + }, + { + "epoch": 1.91, + "learning_rate": 3.5390495259319346e-05, + "loss": 0.846, + "step": 12010 + }, + { + "epoch": 1.91, + "learning_rate": 3.5388148688147814e-05, + "loss": 0.8046, + "step": 12011 + }, + { + "epoch": 1.91, + "learning_rate": 3.5385802006348805e-05, + "loss": 0.8496, + "step": 12012 + }, + { + "epoch": 1.91, + "learning_rate": 3.538345521394732e-05, + "loss": 0.7589, + "step": 12013 + }, + { + "epoch": 1.91, + "learning_rate": 3.5381108310968334e-05, + "loss": 0.8213, + "step": 12014 + }, + { + "epoch": 1.91, + "learning_rate": 3.537876129743685e-05, + "loss": 0.8025, + "step": 12015 + }, + { + "epoch": 1.91, + "learning_rate": 3.537641417337785e-05, + "loss": 0.8435, + "step": 12016 + }, + { + "epoch": 1.91, + "learning_rate": 3.5374066938816344e-05, + "loss": 0.8637, + "step": 12017 + }, + { + "epoch": 1.91, + "learning_rate": 3.5371719593777324e-05, + "loss": 0.7446, + "step": 12018 + }, + { + "epoch": 1.91, + "learning_rate": 3.5369372138285797e-05, + "loss": 0.7844, + "step": 12019 + }, + { + "epoch": 1.91, + "learning_rate": 3.536702457236673e-05, + "loss": 0.8418, + "step": 12020 + }, + { + "epoch": 1.92, + "learning_rate": 3.5364676896045165e-05, + "loss": 0.8389, + "step": 12021 + }, + { + "epoch": 1.92, + "learning_rate": 3.536232910934608e-05, + "loss": 0.8177, + "step": 12022 + }, + { + "epoch": 1.92, + "learning_rate": 3.535998121229448e-05, + "loss": 0.7194, + "step": 12023 + }, + { + "epoch": 1.92, + "learning_rate": 3.535763320491536e-05, + "loss": 0.759, + "step": 12024 + }, + { + "epoch": 1.92, + "learning_rate": 3.5355285087233744e-05, + "loss": 0.7109, + "step": 12025 + }, + { + "epoch": 1.92, + "learning_rate": 3.535293685927463e-05, + "loss": 0.7861, + "step": 12026 + }, + { + "epoch": 1.92, + "learning_rate": 3.5350588521063024e-05, + "loss": 0.6985, + "step": 12027 + }, + { + "epoch": 1.92, + "learning_rate": 3.534824007262393e-05, + "loss": 0.8176, + "step": 12028 + }, + { + "epoch": 1.92, + "learning_rate": 3.534589151398236e-05, + "loss": 0.783, + "step": 12029 + }, + { + "epoch": 1.92, + "learning_rate": 3.534354284516333e-05, + "loss": 0.7638, + "step": 12030 + }, + { + "epoch": 1.92, + "learning_rate": 3.534119406619186e-05, + "loss": 0.7457, + "step": 12031 + }, + { + "epoch": 1.92, + "learning_rate": 3.533884517709293e-05, + "loss": 0.8067, + "step": 12032 + }, + { + "epoch": 1.92, + "learning_rate": 3.533649617789159e-05, + "loss": 0.8384, + "step": 12033 + }, + { + "epoch": 1.92, + "learning_rate": 3.533414706861284e-05, + "loss": 0.8246, + "step": 12034 + }, + { + "epoch": 1.92, + "learning_rate": 3.533179784928169e-05, + "loss": 0.7092, + "step": 12035 + }, + { + "epoch": 1.92, + "learning_rate": 3.532944851992317e-05, + "loss": 0.7122, + "step": 12036 + }, + { + "epoch": 1.92, + "learning_rate": 3.532709908056229e-05, + "loss": 0.8319, + "step": 12037 + }, + { + "epoch": 1.92, + "learning_rate": 3.532474953122408e-05, + "loss": 0.7803, + "step": 12038 + }, + { + "epoch": 1.92, + "learning_rate": 3.532239987193355e-05, + "loss": 0.8982, + "step": 12039 + }, + { + "epoch": 1.92, + "learning_rate": 3.532005010271574e-05, + "loss": 0.8503, + "step": 12040 + }, + { + "epoch": 1.92, + "learning_rate": 3.531770022359565e-05, + "loss": 0.8385, + "step": 12041 + }, + { + "epoch": 1.92, + "learning_rate": 3.5315350234598325e-05, + "loss": 0.8564, + "step": 12042 + }, + { + "epoch": 1.92, + "learning_rate": 3.531300013574878e-05, + "loss": 0.813, + "step": 12043 + }, + { + "epoch": 1.92, + "learning_rate": 3.531064992707204e-05, + "loss": 0.8354, + "step": 12044 + }, + { + "epoch": 1.92, + "learning_rate": 3.5308299608593145e-05, + "loss": 0.8183, + "step": 12045 + }, + { + "epoch": 1.92, + "learning_rate": 3.5305949180337114e-05, + "loss": 0.7437, + "step": 12046 + }, + { + "epoch": 1.92, + "learning_rate": 3.530359864232898e-05, + "loss": 0.8592, + "step": 12047 + }, + { + "epoch": 1.92, + "learning_rate": 3.530124799459378e-05, + "loss": 0.8058, + "step": 12048 + }, + { + "epoch": 1.92, + "learning_rate": 3.5298897237156534e-05, + "loss": 0.7729, + "step": 12049 + }, + { + "epoch": 1.92, + "learning_rate": 3.52965463700423e-05, + "loss": 0.7561, + "step": 12050 + }, + { + "epoch": 1.92, + "learning_rate": 3.529419539327609e-05, + "loss": 0.8507, + "step": 12051 + }, + { + "epoch": 1.92, + "learning_rate": 3.5291844306882944e-05, + "loss": 0.7863, + "step": 12052 + }, + { + "epoch": 1.92, + "learning_rate": 3.5289493110887904e-05, + "loss": 0.9054, + "step": 12053 + }, + { + "epoch": 1.92, + "learning_rate": 3.528714180531602e-05, + "loss": 0.7943, + "step": 12054 + }, + { + "epoch": 1.92, + "learning_rate": 3.528479039019231e-05, + "loss": 0.7989, + "step": 12055 + }, + { + "epoch": 1.92, + "learning_rate": 3.5282438865541834e-05, + "loss": 0.7584, + "step": 12056 + }, + { + "epoch": 1.92, + "learning_rate": 3.528008723138963e-05, + "loss": 0.7528, + "step": 12057 + }, + { + "epoch": 1.92, + "learning_rate": 3.527773548776073e-05, + "loss": 0.8923, + "step": 12058 + }, + { + "epoch": 1.92, + "learning_rate": 3.5275383634680186e-05, + "loss": 0.8473, + "step": 12059 + }, + { + "epoch": 1.92, + "learning_rate": 3.527303167217305e-05, + "loss": 0.8204, + "step": 12060 + }, + { + "epoch": 1.92, + "learning_rate": 3.527067960026436e-05, + "loss": 0.7673, + "step": 12061 + }, + { + "epoch": 1.92, + "learning_rate": 3.526832741897917e-05, + "loss": 0.8348, + "step": 12062 + }, + { + "epoch": 1.92, + "learning_rate": 3.526597512834253e-05, + "loss": 0.8208, + "step": 12063 + }, + { + "epoch": 1.92, + "learning_rate": 3.526362272837948e-05, + "loss": 0.7926, + "step": 12064 + }, + { + "epoch": 1.92, + "learning_rate": 3.526127021911508e-05, + "loss": 0.7476, + "step": 12065 + }, + { + "epoch": 1.92, + "learning_rate": 3.5258917600574395e-05, + "loss": 0.7268, + "step": 12066 + }, + { + "epoch": 1.92, + "learning_rate": 3.525656487278245e-05, + "loss": 0.8069, + "step": 12067 + }, + { + "epoch": 1.92, + "learning_rate": 3.5254212035764324e-05, + "loss": 0.8713, + "step": 12068 + }, + { + "epoch": 1.92, + "learning_rate": 3.525185908954506e-05, + "loss": 0.8727, + "step": 12069 + }, + { + "epoch": 1.92, + "learning_rate": 3.5249506034149724e-05, + "loss": 0.7767, + "step": 12070 + }, + { + "epoch": 1.92, + "learning_rate": 3.524715286960337e-05, + "loss": 0.8513, + "step": 12071 + }, + { + "epoch": 1.92, + "learning_rate": 3.524479959593106e-05, + "loss": 0.8802, + "step": 12072 + }, + { + "epoch": 1.92, + "learning_rate": 3.524244621315786e-05, + "loss": 0.8469, + "step": 12073 + }, + { + "epoch": 1.92, + "learning_rate": 3.524009272130882e-05, + "loss": 0.8116, + "step": 12074 + }, + { + "epoch": 1.92, + "learning_rate": 3.523773912040901e-05, + "loss": 0.781, + "step": 12075 + }, + { + "epoch": 1.92, + "learning_rate": 3.523538541048349e-05, + "loss": 0.7948, + "step": 12076 + }, + { + "epoch": 1.92, + "learning_rate": 3.523303159155734e-05, + "loss": 0.9158, + "step": 12077 + }, + { + "epoch": 1.92, + "learning_rate": 3.52306776636556e-05, + "loss": 0.8652, + "step": 12078 + }, + { + "epoch": 1.92, + "learning_rate": 3.5228323626803364e-05, + "loss": 0.7642, + "step": 12079 + }, + { + "epoch": 1.92, + "learning_rate": 3.5225969481025694e-05, + "loss": 0.7948, + "step": 12080 + }, + { + "epoch": 1.92, + "learning_rate": 3.5223615226347655e-05, + "loss": 0.7941, + "step": 12081 + }, + { + "epoch": 1.92, + "learning_rate": 3.5221260862794325e-05, + "loss": 0.8302, + "step": 12082 + }, + { + "epoch": 1.92, + "learning_rate": 3.521890639039077e-05, + "loss": 0.7741, + "step": 12083 + }, + { + "epoch": 1.93, + "learning_rate": 3.521655180916206e-05, + "loss": 0.807, + "step": 12084 + }, + { + "epoch": 1.93, + "learning_rate": 3.521419711913328e-05, + "loss": 0.8285, + "step": 12085 + }, + { + "epoch": 1.93, + "learning_rate": 3.52118423203295e-05, + "loss": 0.7241, + "step": 12086 + }, + { + "epoch": 1.93, + "learning_rate": 3.520948741277579e-05, + "loss": 0.7515, + "step": 12087 + }, + { + "epoch": 1.93, + "learning_rate": 3.520713239649725e-05, + "loss": 0.7996, + "step": 12088 + }, + { + "epoch": 1.93, + "learning_rate": 3.520477727151895e-05, + "loss": 0.7352, + "step": 12089 + }, + { + "epoch": 1.93, + "learning_rate": 3.520242203786596e-05, + "loss": 0.7341, + "step": 12090 + }, + { + "epoch": 1.93, + "learning_rate": 3.5200066695563374e-05, + "loss": 0.7255, + "step": 12091 + }, + { + "epoch": 1.93, + "learning_rate": 3.519771124463627e-05, + "loss": 0.9245, + "step": 12092 + }, + { + "epoch": 1.93, + "learning_rate": 3.519535568510972e-05, + "loss": 0.7702, + "step": 12093 + }, + { + "epoch": 1.93, + "learning_rate": 3.5193000017008835e-05, + "loss": 0.8138, + "step": 12094 + }, + { + "epoch": 1.93, + "learning_rate": 3.5190644240358686e-05, + "loss": 0.7959, + "step": 12095 + }, + { + "epoch": 1.93, + "learning_rate": 3.518828835518436e-05, + "loss": 0.895, + "step": 12096 + }, + { + "epoch": 1.93, + "learning_rate": 3.518593236151095e-05, + "loss": 0.7632, + "step": 12097 + }, + { + "epoch": 1.93, + "learning_rate": 3.518357625936354e-05, + "loss": 0.8342, + "step": 12098 + }, + { + "epoch": 1.93, + "learning_rate": 3.5181220048767233e-05, + "loss": 0.8001, + "step": 12099 + }, + { + "epoch": 1.93, + "learning_rate": 3.517886372974711e-05, + "loss": 0.8437, + "step": 12100 + }, + { + "epoch": 1.93, + "learning_rate": 3.517650730232827e-05, + "loss": 0.8126, + "step": 12101 + }, + { + "epoch": 1.93, + "learning_rate": 3.5174150766535804e-05, + "loss": 0.8304, + "step": 12102 + }, + { + "epoch": 1.93, + "learning_rate": 3.517179412239481e-05, + "loss": 0.8646, + "step": 12103 + }, + { + "epoch": 1.93, + "learning_rate": 3.516943736993038e-05, + "loss": 0.7316, + "step": 12104 + }, + { + "epoch": 1.93, + "learning_rate": 3.516708050916762e-05, + "loss": 0.7712, + "step": 12105 + }, + { + "epoch": 1.93, + "learning_rate": 3.516472354013163e-05, + "loss": 0.797, + "step": 12106 + }, + { + "epoch": 1.93, + "learning_rate": 3.516236646284749e-05, + "loss": 0.8881, + "step": 12107 + }, + { + "epoch": 1.93, + "learning_rate": 3.5160009277340325e-05, + "loss": 0.8558, + "step": 12108 + }, + { + "epoch": 1.93, + "learning_rate": 3.5157651983635235e-05, + "loss": 0.8047, + "step": 12109 + }, + { + "epoch": 1.93, + "learning_rate": 3.5155294581757314e-05, + "loss": 0.7701, + "step": 12110 + }, + { + "epoch": 1.93, + "learning_rate": 3.515293707173168e-05, + "loss": 0.7113, + "step": 12111 + }, + { + "epoch": 1.93, + "learning_rate": 3.5150579453583424e-05, + "loss": 0.8193, + "step": 12112 + }, + { + "epoch": 1.93, + "learning_rate": 3.514822172733765e-05, + "loss": 0.8072, + "step": 12113 + }, + { + "epoch": 1.93, + "learning_rate": 3.514586389301948e-05, + "loss": 0.7329, + "step": 12114 + }, + { + "epoch": 1.93, + "learning_rate": 3.5143505950654025e-05, + "loss": 0.7869, + "step": 12115 + }, + { + "epoch": 1.93, + "learning_rate": 3.514114790026638e-05, + "loss": 0.7571, + "step": 12116 + }, + { + "epoch": 1.93, + "learning_rate": 3.513878974188168e-05, + "loss": 0.7385, + "step": 12117 + }, + { + "epoch": 1.93, + "learning_rate": 3.5136431475525024e-05, + "loss": 0.8225, + "step": 12118 + }, + { + "epoch": 1.93, + "learning_rate": 3.513407310122151e-05, + "loss": 0.7806, + "step": 12119 + }, + { + "epoch": 1.93, + "learning_rate": 3.513171461899629e-05, + "loss": 0.8424, + "step": 12120 + }, + { + "epoch": 1.93, + "learning_rate": 3.512935602887445e-05, + "loss": 0.8556, + "step": 12121 + }, + { + "epoch": 1.93, + "learning_rate": 3.512699733088112e-05, + "loss": 0.7512, + "step": 12122 + }, + { + "epoch": 1.93, + "learning_rate": 3.512463852504141e-05, + "loss": 0.8404, + "step": 12123 + }, + { + "epoch": 1.93, + "learning_rate": 3.5122279611380457e-05, + "loss": 0.7751, + "step": 12124 + }, + { + "epoch": 1.93, + "learning_rate": 3.5119920589923364e-05, + "loss": 0.7711, + "step": 12125 + }, + { + "epoch": 1.93, + "learning_rate": 3.511756146069527e-05, + "loss": 0.7315, + "step": 12126 + }, + { + "epoch": 1.93, + "learning_rate": 3.511520222372128e-05, + "loss": 0.8686, + "step": 12127 + }, + { + "epoch": 1.93, + "learning_rate": 3.511284287902653e-05, + "loss": 0.7289, + "step": 12128 + }, + { + "epoch": 1.93, + "learning_rate": 3.5110483426636146e-05, + "loss": 0.8385, + "step": 12129 + }, + { + "epoch": 1.93, + "learning_rate": 3.5108123866575244e-05, + "loss": 0.8418, + "step": 12130 + }, + { + "epoch": 1.93, + "learning_rate": 3.5105764198868964e-05, + "loss": 0.745, + "step": 12131 + }, + { + "epoch": 1.93, + "learning_rate": 3.510340442354244e-05, + "loss": 0.8914, + "step": 12132 + }, + { + "epoch": 1.93, + "learning_rate": 3.510104454062077e-05, + "loss": 0.7855, + "step": 12133 + }, + { + "epoch": 1.93, + "learning_rate": 3.509868455012913e-05, + "loss": 0.7613, + "step": 12134 + }, + { + "epoch": 1.93, + "learning_rate": 3.5096324452092625e-05, + "loss": 0.7783, + "step": 12135 + }, + { + "epoch": 1.93, + "learning_rate": 3.509396424653639e-05, + "loss": 0.7836, + "step": 12136 + }, + { + "epoch": 1.93, + "learning_rate": 3.5091603933485564e-05, + "loss": 0.8059, + "step": 12137 + }, + { + "epoch": 1.93, + "learning_rate": 3.508924351296529e-05, + "loss": 0.7478, + "step": 12138 + }, + { + "epoch": 1.93, + "learning_rate": 3.5086882985000684e-05, + "loss": 0.7685, + "step": 12139 + }, + { + "epoch": 1.93, + "learning_rate": 3.508452234961691e-05, + "loss": 0.8991, + "step": 12140 + }, + { + "epoch": 1.93, + "learning_rate": 3.508216160683909e-05, + "loss": 0.7666, + "step": 12141 + }, + { + "epoch": 1.93, + "learning_rate": 3.507980075669238e-05, + "loss": 0.7992, + "step": 12142 + }, + { + "epoch": 1.93, + "learning_rate": 3.507743979920191e-05, + "loss": 0.7619, + "step": 12143 + }, + { + "epoch": 1.93, + "learning_rate": 3.507507873439281e-05, + "loss": 0.8118, + "step": 12144 + }, + { + "epoch": 1.93, + "learning_rate": 3.5072717562290246e-05, + "loss": 0.7555, + "step": 12145 + }, + { + "epoch": 1.94, + "learning_rate": 3.5070356282919354e-05, + "loss": 0.8241, + "step": 12146 + }, + { + "epoch": 1.94, + "learning_rate": 3.506799489630528e-05, + "loss": 0.8027, + "step": 12147 + }, + { + "epoch": 1.94, + "learning_rate": 3.5065633402473175e-05, + "loss": 0.8518, + "step": 12148 + }, + { + "epoch": 1.94, + "learning_rate": 3.506327180144819e-05, + "loss": 0.788, + "step": 12149 + }, + { + "epoch": 1.94, + "learning_rate": 3.5060910093255464e-05, + "loss": 0.9006, + "step": 12150 + }, + { + "epoch": 1.94, + "learning_rate": 3.505854827792016e-05, + "loss": 0.8573, + "step": 12151 + }, + { + "epoch": 1.94, + "learning_rate": 3.505618635546742e-05, + "loss": 0.7711, + "step": 12152 + }, + { + "epoch": 1.94, + "learning_rate": 3.505382432592239e-05, + "loss": 0.8374, + "step": 12153 + }, + { + "epoch": 1.94, + "learning_rate": 3.5051462189310255e-05, + "loss": 0.7467, + "step": 12154 + }, + { + "epoch": 1.94, + "learning_rate": 3.504909994565614e-05, + "loss": 0.7441, + "step": 12155 + }, + { + "epoch": 1.94, + "learning_rate": 3.50467375949852e-05, + "loss": 0.7913, + "step": 12156 + }, + { + "epoch": 1.94, + "learning_rate": 3.504437513732262e-05, + "loss": 0.8577, + "step": 12157 + }, + { + "epoch": 1.94, + "learning_rate": 3.504201257269354e-05, + "loss": 0.8028, + "step": 12158 + }, + { + "epoch": 1.94, + "learning_rate": 3.503964990112312e-05, + "loss": 0.8075, + "step": 12159 + }, + { + "epoch": 1.94, + "learning_rate": 3.503728712263652e-05, + "loss": 0.7708, + "step": 12160 + }, + { + "epoch": 1.94, + "learning_rate": 3.5034924237258914e-05, + "loss": 0.8136, + "step": 12161 + }, + { + "epoch": 1.94, + "learning_rate": 3.5032561245015455e-05, + "loss": 0.7829, + "step": 12162 + }, + { + "epoch": 1.94, + "learning_rate": 3.503019814593131e-05, + "loss": 0.7736, + "step": 12163 + }, + { + "epoch": 1.94, + "learning_rate": 3.5027834940031646e-05, + "loss": 0.801, + "step": 12164 + }, + { + "epoch": 1.94, + "learning_rate": 3.5025471627341616e-05, + "loss": 0.6834, + "step": 12165 + }, + { + "epoch": 1.94, + "learning_rate": 3.502310820788641e-05, + "loss": 0.7718, + "step": 12166 + }, + { + "epoch": 1.94, + "learning_rate": 3.502074468169119e-05, + "loss": 0.7756, + "step": 12167 + }, + { + "epoch": 1.94, + "learning_rate": 3.501838104878111e-05, + "loss": 0.8423, + "step": 12168 + }, + { + "epoch": 1.94, + "learning_rate": 3.501601730918136e-05, + "loss": 0.8533, + "step": 12169 + }, + { + "epoch": 1.94, + "learning_rate": 3.501365346291712e-05, + "loss": 0.7704, + "step": 12170 + }, + { + "epoch": 1.94, + "learning_rate": 3.501128951001353e-05, + "loss": 0.7411, + "step": 12171 + }, + { + "epoch": 1.94, + "learning_rate": 3.50089254504958e-05, + "loss": 0.7491, + "step": 12172 + }, + { + "epoch": 1.94, + "learning_rate": 3.500656128438908e-05, + "loss": 0.7764, + "step": 12173 + }, + { + "epoch": 1.94, + "learning_rate": 3.500419701171856e-05, + "loss": 0.7253, + "step": 12174 + }, + { + "epoch": 1.94, + "learning_rate": 3.500183263250942e-05, + "loss": 0.7935, + "step": 12175 + }, + { + "epoch": 1.94, + "learning_rate": 3.499946814678683e-05, + "loss": 0.7699, + "step": 12176 + }, + { + "epoch": 1.94, + "learning_rate": 3.4997103554575976e-05, + "loss": 0.7907, + "step": 12177 + }, + { + "epoch": 1.94, + "learning_rate": 3.499473885590204e-05, + "loss": 0.768, + "step": 12178 + }, + { + "epoch": 1.94, + "learning_rate": 3.49923740507902e-05, + "loss": 0.7585, + "step": 12179 + }, + { + "epoch": 1.94, + "learning_rate": 3.499000913926565e-05, + "loss": 0.8368, + "step": 12180 + }, + { + "epoch": 1.94, + "learning_rate": 3.498764412135356e-05, + "loss": 0.7355, + "step": 12181 + }, + { + "epoch": 1.94, + "learning_rate": 3.498527899707913e-05, + "loss": 0.8027, + "step": 12182 + }, + { + "epoch": 1.94, + "learning_rate": 3.498291376646754e-05, + "loss": 0.7983, + "step": 12183 + }, + { + "epoch": 1.94, + "learning_rate": 3.498054842954397e-05, + "loss": 0.8327, + "step": 12184 + }, + { + "epoch": 1.94, + "learning_rate": 3.497818298633362e-05, + "loss": 0.8459, + "step": 12185 + }, + { + "epoch": 1.94, + "learning_rate": 3.497581743686169e-05, + "loss": 0.776, + "step": 12186 + }, + { + "epoch": 1.94, + "learning_rate": 3.497345178115336e-05, + "loss": 0.7094, + "step": 12187 + }, + { + "epoch": 1.94, + "learning_rate": 3.497108601923381e-05, + "loss": 0.792, + "step": 12188 + }, + { + "epoch": 1.94, + "learning_rate": 3.496872015112826e-05, + "loss": 0.7288, + "step": 12189 + }, + { + "epoch": 1.94, + "learning_rate": 3.4966354176861894e-05, + "loss": 0.7492, + "step": 12190 + }, + { + "epoch": 1.94, + "learning_rate": 3.4963988096459895e-05, + "loss": 0.7659, + "step": 12191 + }, + { + "epoch": 1.94, + "learning_rate": 3.496162190994748e-05, + "loss": 0.7137, + "step": 12192 + }, + { + "epoch": 1.94, + "learning_rate": 3.495925561734984e-05, + "loss": 0.7826, + "step": 12193 + }, + { + "epoch": 1.94, + "learning_rate": 3.495688921869217e-05, + "loss": 0.7928, + "step": 12194 + }, + { + "epoch": 1.94, + "learning_rate": 3.495452271399968e-05, + "loss": 0.8363, + "step": 12195 + }, + { + "epoch": 1.94, + "learning_rate": 3.495215610329756e-05, + "loss": 0.8677, + "step": 12196 + }, + { + "epoch": 1.94, + "learning_rate": 3.494978938661102e-05, + "loss": 0.774, + "step": 12197 + }, + { + "epoch": 1.94, + "learning_rate": 3.4947422563965274e-05, + "loss": 0.882, + "step": 12198 + }, + { + "epoch": 1.94, + "learning_rate": 3.494505563538551e-05, + "loss": 0.7515, + "step": 12199 + }, + { + "epoch": 1.94, + "learning_rate": 3.494268860089693e-05, + "loss": 0.7885, + "step": 12200 + }, + { + "epoch": 1.94, + "learning_rate": 3.494032146052477e-05, + "loss": 0.7952, + "step": 12201 + }, + { + "epoch": 1.94, + "learning_rate": 3.493795421429422e-05, + "loss": 0.7993, + "step": 12202 + }, + { + "epoch": 1.94, + "learning_rate": 3.493558686223049e-05, + "loss": 0.7467, + "step": 12203 + }, + { + "epoch": 1.94, + "learning_rate": 3.493321940435879e-05, + "loss": 0.8063, + "step": 12204 + }, + { + "epoch": 1.94, + "learning_rate": 3.4930851840704324e-05, + "loss": 0.7691, + "step": 12205 + }, + { + "epoch": 1.94, + "learning_rate": 3.492848417129233e-05, + "loss": 0.7544, + "step": 12206 + }, + { + "epoch": 1.94, + "learning_rate": 3.4926116396147993e-05, + "loss": 0.8834, + "step": 12207 + }, + { + "epoch": 1.94, + "learning_rate": 3.492374851529655e-05, + "loss": 0.865, + "step": 12208 + }, + { + "epoch": 1.95, + "learning_rate": 3.492138052876322e-05, + "loss": 0.7459, + "step": 12209 + }, + { + "epoch": 1.95, + "learning_rate": 3.49190124365732e-05, + "loss": 0.7899, + "step": 12210 + }, + { + "epoch": 1.95, + "learning_rate": 3.491664423875171e-05, + "loss": 0.8508, + "step": 12211 + }, + { + "epoch": 1.95, + "learning_rate": 3.4914275935324e-05, + "loss": 0.7725, + "step": 12212 + }, + { + "epoch": 1.95, + "learning_rate": 3.491190752631526e-05, + "loss": 0.8039, + "step": 12213 + }, + { + "epoch": 1.95, + "learning_rate": 3.490953901175072e-05, + "loss": 0.7874, + "step": 12214 + }, + { + "epoch": 1.95, + "learning_rate": 3.490717039165561e-05, + "loss": 0.7467, + "step": 12215 + }, + { + "epoch": 1.95, + "learning_rate": 3.490480166605515e-05, + "loss": 0.8324, + "step": 12216 + }, + { + "epoch": 1.95, + "learning_rate": 3.490243283497456e-05, + "loss": 0.8357, + "step": 12217 + }, + { + "epoch": 1.95, + "learning_rate": 3.490006389843908e-05, + "loss": 0.8181, + "step": 12218 + }, + { + "epoch": 1.95, + "learning_rate": 3.489769485647392e-05, + "loss": 0.7707, + "step": 12219 + }, + { + "epoch": 1.95, + "learning_rate": 3.4895325709104324e-05, + "loss": 0.7747, + "step": 12220 + }, + { + "epoch": 1.95, + "learning_rate": 3.489295645635552e-05, + "loss": 0.7345, + "step": 12221 + }, + { + "epoch": 1.95, + "learning_rate": 3.489058709825274e-05, + "loss": 0.8406, + "step": 12222 + }, + { + "epoch": 1.95, + "learning_rate": 3.4888217634821205e-05, + "loss": 0.7638, + "step": 12223 + }, + { + "epoch": 1.95, + "learning_rate": 3.4885848066086156e-05, + "loss": 0.8405, + "step": 12224 + }, + { + "epoch": 1.95, + "learning_rate": 3.488347839207283e-05, + "loss": 0.8037, + "step": 12225 + }, + { + "epoch": 1.95, + "learning_rate": 3.488110861280646e-05, + "loss": 0.7785, + "step": 12226 + }, + { + "epoch": 1.95, + "learning_rate": 3.487873872831228e-05, + "loss": 0.8225, + "step": 12227 + }, + { + "epoch": 1.95, + "learning_rate": 3.4876368738615525e-05, + "loss": 0.7462, + "step": 12228 + }, + { + "epoch": 1.95, + "learning_rate": 3.487399864374144e-05, + "loss": 0.7506, + "step": 12229 + }, + { + "epoch": 1.95, + "learning_rate": 3.487162844371527e-05, + "loss": 0.7759, + "step": 12230 + }, + { + "epoch": 1.95, + "learning_rate": 3.4869258138562244e-05, + "loss": 0.8194, + "step": 12231 + }, + { + "epoch": 1.95, + "learning_rate": 3.486688772830762e-05, + "loss": 0.832, + "step": 12232 + }, + { + "epoch": 1.95, + "learning_rate": 3.486451721297663e-05, + "loss": 0.79, + "step": 12233 + }, + { + "epoch": 1.95, + "learning_rate": 3.4862146592594515e-05, + "loss": 0.7694, + "step": 12234 + }, + { + "epoch": 1.95, + "learning_rate": 3.485977586718653e-05, + "loss": 0.8366, + "step": 12235 + }, + { + "epoch": 1.95, + "learning_rate": 3.485740503677791e-05, + "loss": 0.8511, + "step": 12236 + }, + { + "epoch": 1.95, + "learning_rate": 3.485503410139392e-05, + "loss": 0.7966, + "step": 12237 + }, + { + "epoch": 1.95, + "learning_rate": 3.48526630610598e-05, + "loss": 0.6777, + "step": 12238 + }, + { + "epoch": 1.95, + "learning_rate": 3.485029191580079e-05, + "loss": 0.8626, + "step": 12239 + }, + { + "epoch": 1.95, + "learning_rate": 3.484792066564216e-05, + "loss": 0.832, + "step": 12240 + }, + { + "epoch": 1.95, + "learning_rate": 3.4845549310609155e-05, + "loss": 0.822, + "step": 12241 + }, + { + "epoch": 1.95, + "learning_rate": 3.484317785072703e-05, + "loss": 0.7421, + "step": 12242 + }, + { + "epoch": 1.95, + "learning_rate": 3.484080628602103e-05, + "loss": 0.7345, + "step": 12243 + }, + { + "epoch": 1.95, + "learning_rate": 3.483843461651642e-05, + "loss": 0.8665, + "step": 12244 + }, + { + "epoch": 1.95, + "learning_rate": 3.483606284223845e-05, + "loss": 0.8343, + "step": 12245 + }, + { + "epoch": 1.95, + "learning_rate": 3.4833690963212384e-05, + "loss": 0.7797, + "step": 12246 + }, + { + "epoch": 1.95, + "learning_rate": 3.483131897946348e-05, + "loss": 0.7984, + "step": 12247 + }, + { + "epoch": 1.95, + "learning_rate": 3.4828946891017e-05, + "loss": 0.761, + "step": 12248 + }, + { + "epoch": 1.95, + "learning_rate": 3.48265746978982e-05, + "loss": 0.8601, + "step": 12249 + }, + { + "epoch": 1.95, + "learning_rate": 3.482420240013235e-05, + "loss": 0.8481, + "step": 12250 + }, + { + "epoch": 1.95, + "learning_rate": 3.4821829997744705e-05, + "loss": 0.8411, + "step": 12251 + }, + { + "epoch": 1.95, + "learning_rate": 3.481945749076053e-05, + "loss": 0.7668, + "step": 12252 + }, + { + "epoch": 1.95, + "learning_rate": 3.481708487920511e-05, + "loss": 0.8669, + "step": 12253 + }, + { + "epoch": 1.95, + "learning_rate": 3.481471216310368e-05, + "loss": 0.7201, + "step": 12254 + }, + { + "epoch": 1.95, + "learning_rate": 3.481233934248153e-05, + "loss": 0.8878, + "step": 12255 + }, + { + "epoch": 1.95, + "learning_rate": 3.480996641736392e-05, + "loss": 0.8486, + "step": 12256 + }, + { + "epoch": 1.95, + "learning_rate": 3.4807593387776125e-05, + "loss": 0.83, + "step": 12257 + }, + { + "epoch": 1.95, + "learning_rate": 3.4805220253743415e-05, + "loss": 0.7217, + "step": 12258 + }, + { + "epoch": 1.95, + "learning_rate": 3.480284701529106e-05, + "loss": 0.8286, + "step": 12259 + }, + { + "epoch": 1.95, + "learning_rate": 3.480047367244434e-05, + "loss": 0.7083, + "step": 12260 + }, + { + "epoch": 1.95, + "learning_rate": 3.479810022522852e-05, + "loss": 0.8097, + "step": 12261 + }, + { + "epoch": 1.95, + "learning_rate": 3.4795726673668885e-05, + "loss": 0.7938, + "step": 12262 + }, + { + "epoch": 1.95, + "learning_rate": 3.479335301779072e-05, + "loss": 0.7516, + "step": 12263 + }, + { + "epoch": 1.95, + "learning_rate": 3.479097925761927e-05, + "loss": 0.777, + "step": 12264 + }, + { + "epoch": 1.95, + "learning_rate": 3.4788605393179854e-05, + "loss": 0.7572, + "step": 12265 + }, + { + "epoch": 1.95, + "learning_rate": 3.478623142449772e-05, + "loss": 0.7929, + "step": 12266 + }, + { + "epoch": 1.95, + "learning_rate": 3.4783857351598165e-05, + "loss": 0.7035, + "step": 12267 + }, + { + "epoch": 1.95, + "learning_rate": 3.4781483174506466e-05, + "loss": 0.8592, + "step": 12268 + }, + { + "epoch": 1.95, + "learning_rate": 3.4779108893247914e-05, + "loss": 0.8337, + "step": 12269 + }, + { + "epoch": 1.95, + "learning_rate": 3.4776734507847794e-05, + "loss": 0.8556, + "step": 12270 + }, + { + "epoch": 1.95, + "learning_rate": 3.477436001833138e-05, + "loss": 0.7805, + "step": 12271 + }, + { + "epoch": 1.96, + "learning_rate": 3.477198542472396e-05, + "loss": 0.8952, + "step": 12272 + }, + { + "epoch": 1.96, + "learning_rate": 3.476961072705085e-05, + "loss": 0.7498, + "step": 12273 + }, + { + "epoch": 1.96, + "learning_rate": 3.476723592533729e-05, + "loss": 0.7649, + "step": 12274 + }, + { + "epoch": 1.96, + "learning_rate": 3.476486101960862e-05, + "loss": 0.8009, + "step": 12275 + }, + { + "epoch": 1.96, + "learning_rate": 3.47624860098901e-05, + "loss": 0.8759, + "step": 12276 + }, + { + "epoch": 1.96, + "learning_rate": 3.4760110896207025e-05, + "loss": 0.8981, + "step": 12277 + }, + { + "epoch": 1.96, + "learning_rate": 3.47577356785847e-05, + "loss": 0.8125, + "step": 12278 + }, + { + "epoch": 1.96, + "learning_rate": 3.4755360357048416e-05, + "loss": 0.7462, + "step": 12279 + }, + { + "epoch": 1.96, + "learning_rate": 3.475298493162346e-05, + "loss": 0.7415, + "step": 12280 + }, + { + "epoch": 1.96, + "learning_rate": 3.4750609402335144e-05, + "loss": 0.7817, + "step": 12281 + }, + { + "epoch": 1.96, + "learning_rate": 3.474823376920876e-05, + "loss": 0.8774, + "step": 12282 + }, + { + "epoch": 1.96, + "learning_rate": 3.47458580322696e-05, + "loss": 0.7493, + "step": 12283 + }, + { + "epoch": 1.96, + "learning_rate": 3.474348219154296e-05, + "loss": 0.8595, + "step": 12284 + }, + { + "epoch": 1.96, + "learning_rate": 3.4741106247054156e-05, + "loss": 0.7289, + "step": 12285 + }, + { + "epoch": 1.96, + "learning_rate": 3.4738730198828484e-05, + "loss": 0.6945, + "step": 12286 + }, + { + "epoch": 1.96, + "learning_rate": 3.473635404689125e-05, + "loss": 0.7969, + "step": 12287 + }, + { + "epoch": 1.96, + "learning_rate": 3.473397779126776e-05, + "loss": 0.8693, + "step": 12288 + }, + { + "epoch": 1.96, + "learning_rate": 3.473160143198331e-05, + "loss": 0.7405, + "step": 12289 + }, + { + "epoch": 1.96, + "learning_rate": 3.4729224969063214e-05, + "loss": 0.758, + "step": 12290 + }, + { + "epoch": 1.96, + "learning_rate": 3.472684840253278e-05, + "loss": 0.7593, + "step": 12291 + }, + { + "epoch": 1.96, + "learning_rate": 3.472447173241731e-05, + "loss": 0.7541, + "step": 12292 + }, + { + "epoch": 1.96, + "learning_rate": 3.4722094958742134e-05, + "loss": 0.8119, + "step": 12293 + }, + { + "epoch": 1.96, + "learning_rate": 3.471971808153254e-05, + "loss": 0.7788, + "step": 12294 + }, + { + "epoch": 1.96, + "learning_rate": 3.471734110081385e-05, + "loss": 0.8653, + "step": 12295 + }, + { + "epoch": 1.96, + "learning_rate": 3.471496401661138e-05, + "loss": 0.8145, + "step": 12296 + }, + { + "epoch": 1.96, + "learning_rate": 3.471258682895043e-05, + "loss": 0.8694, + "step": 12297 + }, + { + "epoch": 1.96, + "learning_rate": 3.471020953785634e-05, + "loss": 0.7979, + "step": 12298 + }, + { + "epoch": 1.96, + "learning_rate": 3.470783214335441e-05, + "loss": 0.8562, + "step": 12299 + }, + { + "epoch": 1.96, + "learning_rate": 3.470545464546996e-05, + "loss": 0.7602, + "step": 12300 + }, + { + "epoch": 1.96, + "learning_rate": 3.4703077044228313e-05, + "loss": 0.8171, + "step": 12301 + }, + { + "epoch": 1.96, + "learning_rate": 3.470069933965478e-05, + "loss": 0.7427, + "step": 12302 + }, + { + "epoch": 1.96, + "learning_rate": 3.4698321531774694e-05, + "loss": 0.8254, + "step": 12303 + }, + { + "epoch": 1.96, + "learning_rate": 3.4695943620613375e-05, + "loss": 0.7725, + "step": 12304 + }, + { + "epoch": 1.96, + "learning_rate": 3.469356560619614e-05, + "loss": 0.8063, + "step": 12305 + }, + { + "epoch": 1.96, + "learning_rate": 3.469118748854831e-05, + "loss": 0.6837, + "step": 12306 + }, + { + "epoch": 1.96, + "learning_rate": 3.468880926769522e-05, + "loss": 0.8146, + "step": 12307 + }, + { + "epoch": 1.96, + "learning_rate": 3.46864309436622e-05, + "loss": 0.7424, + "step": 12308 + }, + { + "epoch": 1.96, + "learning_rate": 3.4684052516474556e-05, + "loss": 0.7382, + "step": 12309 + }, + { + "epoch": 1.96, + "learning_rate": 3.4681673986157644e-05, + "loss": 0.7605, + "step": 12310 + }, + { + "epoch": 1.96, + "learning_rate": 3.4679295352736786e-05, + "loss": 0.6752, + "step": 12311 + }, + { + "epoch": 1.96, + "learning_rate": 3.4676916616237296e-05, + "loss": 0.8193, + "step": 12312 + }, + { + "epoch": 1.96, + "learning_rate": 3.467453777668453e-05, + "loss": 0.7407, + "step": 12313 + }, + { + "epoch": 1.96, + "learning_rate": 3.4672158834103804e-05, + "loss": 0.6892, + "step": 12314 + }, + { + "epoch": 1.96, + "learning_rate": 3.466977978852047e-05, + "loss": 0.771, + "step": 12315 + }, + { + "epoch": 1.96, + "learning_rate": 3.466740063995984e-05, + "loss": 0.8905, + "step": 12316 + }, + { + "epoch": 1.96, + "learning_rate": 3.466502138844727e-05, + "loss": 0.6854, + "step": 12317 + }, + { + "epoch": 1.96, + "learning_rate": 3.466264203400808e-05, + "loss": 0.7396, + "step": 12318 + }, + { + "epoch": 1.96, + "learning_rate": 3.466026257666763e-05, + "loss": 0.7032, + "step": 12319 + }, + { + "epoch": 1.96, + "learning_rate": 3.4657883016451236e-05, + "loss": 0.8198, + "step": 12320 + }, + { + "epoch": 1.96, + "learning_rate": 3.4655503353384255e-05, + "loss": 0.8796, + "step": 12321 + }, + { + "epoch": 1.96, + "learning_rate": 3.465312358749203e-05, + "loss": 0.8096, + "step": 12322 + }, + { + "epoch": 1.96, + "learning_rate": 3.46507437187999e-05, + "loss": 0.8913, + "step": 12323 + }, + { + "epoch": 1.96, + "learning_rate": 3.464836374733321e-05, + "loss": 0.8092, + "step": 12324 + }, + { + "epoch": 1.96, + "learning_rate": 3.46459836731173e-05, + "loss": 0.8204, + "step": 12325 + }, + { + "epoch": 1.96, + "learning_rate": 3.464360349617751e-05, + "loss": 0.7901, + "step": 12326 + }, + { + "epoch": 1.96, + "learning_rate": 3.464122321653921e-05, + "loss": 0.7929, + "step": 12327 + }, + { + "epoch": 1.96, + "learning_rate": 3.463884283422773e-05, + "loss": 0.7913, + "step": 12328 + }, + { + "epoch": 1.96, + "learning_rate": 3.463646234926843e-05, + "loss": 0.77, + "step": 12329 + }, + { + "epoch": 1.96, + "learning_rate": 3.463408176168666e-05, + "loss": 0.7639, + "step": 12330 + }, + { + "epoch": 1.96, + "learning_rate": 3.463170107150776e-05, + "loss": 0.7845, + "step": 12331 + }, + { + "epoch": 1.96, + "learning_rate": 3.462932027875709e-05, + "loss": 0.7926, + "step": 12332 + }, + { + "epoch": 1.96, + "learning_rate": 3.462693938346002e-05, + "loss": 0.7776, + "step": 12333 + }, + { + "epoch": 1.96, + "learning_rate": 3.462455838564188e-05, + "loss": 0.7748, + "step": 12334 + }, + { + "epoch": 1.97, + "learning_rate": 3.4622177285328035e-05, + "loss": 0.807, + "step": 12335 + }, + { + "epoch": 1.97, + "learning_rate": 3.461979608254384e-05, + "loss": 0.8114, + "step": 12336 + }, + { + "epoch": 1.97, + "learning_rate": 3.461741477731467e-05, + "loss": 0.8381, + "step": 12337 + }, + { + "epoch": 1.97, + "learning_rate": 3.461503336966586e-05, + "loss": 0.7448, + "step": 12338 + }, + { + "epoch": 1.97, + "learning_rate": 3.4612651859622785e-05, + "loss": 0.7772, + "step": 12339 + }, + { + "epoch": 1.97, + "learning_rate": 3.461027024721081e-05, + "loss": 0.772, + "step": 12340 + }, + { + "epoch": 1.97, + "learning_rate": 3.4607888532455277e-05, + "loss": 0.7403, + "step": 12341 + }, + { + "epoch": 1.97, + "learning_rate": 3.460550671538157e-05, + "loss": 0.8317, + "step": 12342 + }, + { + "epoch": 1.97, + "learning_rate": 3.460312479601506e-05, + "loss": 0.7167, + "step": 12343 + }, + { + "epoch": 1.97, + "learning_rate": 3.460074277438109e-05, + "loss": 0.776, + "step": 12344 + }, + { + "epoch": 1.97, + "learning_rate": 3.459836065050505e-05, + "loss": 0.799, + "step": 12345 + }, + { + "epoch": 1.97, + "learning_rate": 3.459597842441228e-05, + "loss": 0.7424, + "step": 12346 + }, + { + "epoch": 1.97, + "learning_rate": 3.459359609612817e-05, + "loss": 0.915, + "step": 12347 + }, + { + "epoch": 1.97, + "learning_rate": 3.459121366567809e-05, + "loss": 0.8263, + "step": 12348 + }, + { + "epoch": 1.97, + "learning_rate": 3.4588831133087404e-05, + "loss": 0.9033, + "step": 12349 + }, + { + "epoch": 1.97, + "learning_rate": 3.458644849838149e-05, + "loss": 0.7586, + "step": 12350 + }, + { + "epoch": 1.97, + "learning_rate": 3.458406576158572e-05, + "loss": 0.8448, + "step": 12351 + }, + { + "epoch": 1.97, + "learning_rate": 3.4581682922725464e-05, + "loss": 0.8001, + "step": 12352 + }, + { + "epoch": 1.97, + "learning_rate": 3.4579299981826105e-05, + "loss": 0.7945, + "step": 12353 + }, + { + "epoch": 1.97, + "learning_rate": 3.457691693891303e-05, + "loss": 0.8229, + "step": 12354 + }, + { + "epoch": 1.97, + "learning_rate": 3.457453379401159e-05, + "loss": 0.7822, + "step": 12355 + }, + { + "epoch": 1.97, + "learning_rate": 3.457215054714718e-05, + "loss": 0.7625, + "step": 12356 + }, + { + "epoch": 1.97, + "learning_rate": 3.456976719834518e-05, + "loss": 0.7793, + "step": 12357 + }, + { + "epoch": 1.97, + "learning_rate": 3.456738374763096e-05, + "loss": 0.7939, + "step": 12358 + }, + { + "epoch": 1.97, + "learning_rate": 3.456500019502992e-05, + "loss": 0.8251, + "step": 12359 + }, + { + "epoch": 1.97, + "learning_rate": 3.456261654056744e-05, + "loss": 0.7915, + "step": 12360 + }, + { + "epoch": 1.97, + "learning_rate": 3.4560232784268886e-05, + "loss": 0.7521, + "step": 12361 + }, + { + "epoch": 1.97, + "learning_rate": 3.455784892615966e-05, + "loss": 0.8341, + "step": 12362 + }, + { + "epoch": 1.97, + "learning_rate": 3.4555464966265153e-05, + "loss": 0.7141, + "step": 12363 + }, + { + "epoch": 1.97, + "learning_rate": 3.455308090461075e-05, + "loss": 0.8298, + "step": 12364 + }, + { + "epoch": 1.97, + "learning_rate": 3.455069674122182e-05, + "loss": 0.7501, + "step": 12365 + }, + { + "epoch": 1.97, + "learning_rate": 3.4548312476123774e-05, + "loss": 0.7959, + "step": 12366 + }, + { + "epoch": 1.97, + "learning_rate": 3.4545928109342e-05, + "loss": 0.7927, + "step": 12367 + }, + { + "epoch": 1.97, + "learning_rate": 3.4543543640901886e-05, + "loss": 0.8502, + "step": 12368 + }, + { + "epoch": 1.97, + "learning_rate": 3.454115907082882e-05, + "loss": 0.7389, + "step": 12369 + }, + { + "epoch": 1.97, + "learning_rate": 3.453877439914821e-05, + "loss": 0.8384, + "step": 12370 + }, + { + "epoch": 1.97, + "learning_rate": 3.4536389625885445e-05, + "loss": 0.7701, + "step": 12371 + }, + { + "epoch": 1.97, + "learning_rate": 3.453400475106592e-05, + "loss": 0.7351, + "step": 12372 + }, + { + "epoch": 1.97, + "learning_rate": 3.453161977471502e-05, + "loss": 0.7855, + "step": 12373 + }, + { + "epoch": 1.97, + "learning_rate": 3.4529234696858174e-05, + "loss": 0.781, + "step": 12374 + }, + { + "epoch": 1.97, + "learning_rate": 3.4526849517520756e-05, + "loss": 0.8487, + "step": 12375 + }, + { + "epoch": 1.97, + "learning_rate": 3.452446423672818e-05, + "loss": 0.7142, + "step": 12376 + }, + { + "epoch": 1.97, + "learning_rate": 3.452207885450584e-05, + "loss": 0.9391, + "step": 12377 + }, + { + "epoch": 1.97, + "learning_rate": 3.4519693370879124e-05, + "loss": 0.7572, + "step": 12378 + }, + { + "epoch": 1.97, + "learning_rate": 3.451730778587347e-05, + "loss": 0.8229, + "step": 12379 + }, + { + "epoch": 1.97, + "learning_rate": 3.4514922099514266e-05, + "loss": 0.7334, + "step": 12380 + }, + { + "epoch": 1.97, + "learning_rate": 3.451253631182691e-05, + "loss": 0.9032, + "step": 12381 + }, + { + "epoch": 1.97, + "learning_rate": 3.4510150422836825e-05, + "loss": 0.8075, + "step": 12382 + }, + { + "epoch": 1.97, + "learning_rate": 3.450776443256941e-05, + "loss": 0.7447, + "step": 12383 + }, + { + "epoch": 1.97, + "learning_rate": 3.4505378341050074e-05, + "loss": 0.7447, + "step": 12384 + }, + { + "epoch": 1.97, + "learning_rate": 3.450299214830424e-05, + "loss": 0.7506, + "step": 12385 + }, + { + "epoch": 1.97, + "learning_rate": 3.4500605854357295e-05, + "loss": 0.7337, + "step": 12386 + }, + { + "epoch": 1.97, + "learning_rate": 3.4498219459234674e-05, + "loss": 0.8704, + "step": 12387 + }, + { + "epoch": 1.97, + "learning_rate": 3.4495832962961776e-05, + "loss": 0.7699, + "step": 12388 + }, + { + "epoch": 1.97, + "learning_rate": 3.449344636556403e-05, + "loss": 0.7677, + "step": 12389 + }, + { + "epoch": 1.97, + "learning_rate": 3.449105966706683e-05, + "loss": 0.8106, + "step": 12390 + }, + { + "epoch": 1.97, + "learning_rate": 3.448867286749562e-05, + "loss": 0.7722, + "step": 12391 + }, + { + "epoch": 1.97, + "learning_rate": 3.4486285966875805e-05, + "loss": 0.7522, + "step": 12392 + }, + { + "epoch": 1.97, + "learning_rate": 3.448389896523279e-05, + "loss": 0.7739, + "step": 12393 + }, + { + "epoch": 1.97, + "learning_rate": 3.448151186259202e-05, + "loss": 0.7262, + "step": 12394 + }, + { + "epoch": 1.97, + "learning_rate": 3.4479124658978906e-05, + "loss": 0.8676, + "step": 12395 + }, + { + "epoch": 1.97, + "learning_rate": 3.447673735441886e-05, + "loss": 0.7803, + "step": 12396 + }, + { + "epoch": 1.97, + "learning_rate": 3.447434994893733e-05, + "loss": 0.7932, + "step": 12397 + }, + { + "epoch": 1.98, + "learning_rate": 3.447196244255971e-05, + "loss": 0.8522, + "step": 12398 + }, + { + "epoch": 1.98, + "learning_rate": 3.446957483531145e-05, + "loss": 0.8452, + "step": 12399 + }, + { + "epoch": 1.98, + "learning_rate": 3.4467187127217964e-05, + "loss": 0.8267, + "step": 12400 + }, + { + "epoch": 1.98, + "learning_rate": 3.446479931830468e-05, + "loss": 0.7754, + "step": 12401 + }, + { + "epoch": 1.98, + "learning_rate": 3.4462411408597026e-05, + "loss": 0.8569, + "step": 12402 + }, + { + "epoch": 1.98, + "learning_rate": 3.446002339812044e-05, + "loss": 0.79, + "step": 12403 + }, + { + "epoch": 1.98, + "learning_rate": 3.4457635286900355e-05, + "loss": 0.7679, + "step": 12404 + }, + { + "epoch": 1.98, + "learning_rate": 3.4455247074962196e-05, + "loss": 0.7628, + "step": 12405 + }, + { + "epoch": 1.98, + "learning_rate": 3.445285876233139e-05, + "loss": 0.8383, + "step": 12406 + }, + { + "epoch": 1.98, + "learning_rate": 3.445047034903337e-05, + "loss": 0.7568, + "step": 12407 + }, + { + "epoch": 1.98, + "learning_rate": 3.444808183509359e-05, + "loss": 0.8493, + "step": 12408 + }, + { + "epoch": 1.98, + "learning_rate": 3.444569322053747e-05, + "loss": 0.7961, + "step": 12409 + }, + { + "epoch": 1.98, + "learning_rate": 3.444330450539045e-05, + "loss": 0.7663, + "step": 12410 + }, + { + "epoch": 1.98, + "learning_rate": 3.4440915689677975e-05, + "loss": 0.7636, + "step": 12411 + }, + { + "epoch": 1.98, + "learning_rate": 3.4438526773425476e-05, + "loss": 0.7558, + "step": 12412 + }, + { + "epoch": 1.98, + "learning_rate": 3.4436137756658396e-05, + "loss": 0.7968, + "step": 12413 + }, + { + "epoch": 1.98, + "learning_rate": 3.443374863940219e-05, + "loss": 0.908, + "step": 12414 + }, + { + "epoch": 1.98, + "learning_rate": 3.443135942168227e-05, + "loss": 0.8381, + "step": 12415 + }, + { + "epoch": 1.98, + "learning_rate": 3.442897010352411e-05, + "loss": 0.8847, + "step": 12416 + }, + { + "epoch": 1.98, + "learning_rate": 3.4426580684953144e-05, + "loss": 0.752, + "step": 12417 + }, + { + "epoch": 1.98, + "learning_rate": 3.44241911659948e-05, + "loss": 0.7927, + "step": 12418 + }, + { + "epoch": 1.98, + "learning_rate": 3.442180154667456e-05, + "loss": 0.8315, + "step": 12419 + }, + { + "epoch": 1.98, + "learning_rate": 3.4419411827017845e-05, + "loss": 0.7677, + "step": 12420 + }, + { + "epoch": 1.98, + "learning_rate": 3.441702200705011e-05, + "loss": 0.8194, + "step": 12421 + }, + { + "epoch": 1.98, + "learning_rate": 3.4414632086796816e-05, + "loss": 0.7601, + "step": 12422 + }, + { + "epoch": 1.98, + "learning_rate": 3.441224206628341e-05, + "loss": 0.8095, + "step": 12423 + }, + { + "epoch": 1.98, + "learning_rate": 3.4409851945535334e-05, + "loss": 0.7938, + "step": 12424 + }, + { + "epoch": 1.98, + "learning_rate": 3.440746172457804e-05, + "loss": 0.7322, + "step": 12425 + }, + { + "epoch": 1.98, + "learning_rate": 3.4405071403437e-05, + "loss": 0.7372, + "step": 12426 + }, + { + "epoch": 1.98, + "learning_rate": 3.440268098213765e-05, + "loss": 0.7641, + "step": 12427 + }, + { + "epoch": 1.98, + "learning_rate": 3.4400290460705465e-05, + "loss": 0.8002, + "step": 12428 + }, + { + "epoch": 1.98, + "learning_rate": 3.439789983916589e-05, + "loss": 0.7872, + "step": 12429 + }, + { + "epoch": 1.98, + "learning_rate": 3.4395509117544386e-05, + "loss": 0.764, + "step": 12430 + }, + { + "epoch": 1.98, + "learning_rate": 3.4393118295866423e-05, + "loss": 0.7826, + "step": 12431 + }, + { + "epoch": 1.98, + "learning_rate": 3.439072737415744e-05, + "loss": 0.7388, + "step": 12432 + }, + { + "epoch": 1.98, + "learning_rate": 3.438833635244292e-05, + "loss": 0.7533, + "step": 12433 + }, + { + "epoch": 1.98, + "learning_rate": 3.4385945230748324e-05, + "loss": 0.8141, + "step": 12434 + }, + { + "epoch": 1.98, + "learning_rate": 3.4383554009099094e-05, + "loss": 0.7445, + "step": 12435 + }, + { + "epoch": 1.98, + "learning_rate": 3.438116268752072e-05, + "loss": 0.7764, + "step": 12436 + }, + { + "epoch": 1.98, + "learning_rate": 3.4378771266038653e-05, + "loss": 0.7776, + "step": 12437 + }, + { + "epoch": 1.98, + "learning_rate": 3.4376379744678375e-05, + "loss": 0.8248, + "step": 12438 + }, + { + "epoch": 1.98, + "learning_rate": 3.437398812346534e-05, + "loss": 0.7988, + "step": 12439 + }, + { + "epoch": 1.98, + "learning_rate": 3.437159640242502e-05, + "loss": 0.8329, + "step": 12440 + }, + { + "epoch": 1.98, + "learning_rate": 3.436920458158289e-05, + "loss": 0.7406, + "step": 12441 + }, + { + "epoch": 1.98, + "learning_rate": 3.436681266096442e-05, + "loss": 0.8311, + "step": 12442 + }, + { + "epoch": 1.98, + "learning_rate": 3.436442064059508e-05, + "loss": 0.7477, + "step": 12443 + }, + { + "epoch": 1.98, + "learning_rate": 3.436202852050035e-05, + "loss": 0.7978, + "step": 12444 + }, + { + "epoch": 1.98, + "learning_rate": 3.43596363007057e-05, + "loss": 0.6786, + "step": 12445 + }, + { + "epoch": 1.98, + "learning_rate": 3.4357243981236605e-05, + "loss": 0.7642, + "step": 12446 + }, + { + "epoch": 1.98, + "learning_rate": 3.435485156211853e-05, + "loss": 0.8535, + "step": 12447 + }, + { + "epoch": 1.98, + "learning_rate": 3.4352459043376985e-05, + "loss": 0.7351, + "step": 12448 + }, + { + "epoch": 1.98, + "learning_rate": 3.435006642503742e-05, + "loss": 0.8312, + "step": 12449 + }, + { + "epoch": 1.98, + "learning_rate": 3.434767370712532e-05, + "loss": 0.7931, + "step": 12450 + }, + { + "epoch": 1.98, + "learning_rate": 3.434528088966618e-05, + "loss": 0.789, + "step": 12451 + }, + { + "epoch": 1.98, + "learning_rate": 3.4342887972685464e-05, + "loss": 0.7696, + "step": 12452 + }, + { + "epoch": 1.98, + "learning_rate": 3.4340494956208655e-05, + "loss": 0.8382, + "step": 12453 + }, + { + "epoch": 1.98, + "learning_rate": 3.433810184026126e-05, + "loss": 0.8209, + "step": 12454 + }, + { + "epoch": 1.98, + "learning_rate": 3.4335708624868754e-05, + "loss": 0.8134, + "step": 12455 + }, + { + "epoch": 1.98, + "learning_rate": 3.433331531005661e-05, + "loss": 0.8625, + "step": 12456 + }, + { + "epoch": 1.98, + "learning_rate": 3.433092189585033e-05, + "loss": 0.7896, + "step": 12457 + }, + { + "epoch": 1.98, + "learning_rate": 3.4328528382275386e-05, + "loss": 0.7848, + "step": 12458 + }, + { + "epoch": 1.98, + "learning_rate": 3.4326134769357285e-05, + "loss": 0.7912, + "step": 12459 + }, + { + "epoch": 1.99, + "learning_rate": 3.4323741057121514e-05, + "loss": 0.9914, + "step": 12460 + }, + { + "epoch": 1.99, + "learning_rate": 3.432134724559355e-05, + "loss": 0.7725, + "step": 12461 + }, + { + "epoch": 1.99, + "learning_rate": 3.4318953334798906e-05, + "loss": 0.7848, + "step": 12462 + }, + { + "epoch": 1.99, + "learning_rate": 3.431655932476307e-05, + "loss": 0.7359, + "step": 12463 + }, + { + "epoch": 1.99, + "learning_rate": 3.431416521551152e-05, + "loss": 0.761, + "step": 12464 + }, + { + "epoch": 1.99, + "learning_rate": 3.4311771007069784e-05, + "loss": 0.867, + "step": 12465 + }, + { + "epoch": 1.99, + "learning_rate": 3.430937669946332e-05, + "loss": 0.8227, + "step": 12466 + }, + { + "epoch": 1.99, + "learning_rate": 3.430698229271766e-05, + "loss": 0.8126, + "step": 12467 + }, + { + "epoch": 1.99, + "learning_rate": 3.4304587786858285e-05, + "loss": 0.7433, + "step": 12468 + }, + { + "epoch": 1.99, + "learning_rate": 3.43021931819107e-05, + "loss": 0.7837, + "step": 12469 + }, + { + "epoch": 1.99, + "learning_rate": 3.42997984779004e-05, + "loss": 0.7477, + "step": 12470 + }, + { + "epoch": 1.99, + "learning_rate": 3.42974036748529e-05, + "loss": 0.8131, + "step": 12471 + }, + { + "epoch": 1.99, + "learning_rate": 3.429500877279369e-05, + "loss": 0.8016, + "step": 12472 + }, + { + "epoch": 1.99, + "learning_rate": 3.429261377174828e-05, + "loss": 0.8036, + "step": 12473 + }, + { + "epoch": 1.99, + "learning_rate": 3.429021867174218e-05, + "loss": 0.8297, + "step": 12474 + }, + { + "epoch": 1.99, + "learning_rate": 3.428782347280089e-05, + "loss": 0.8867, + "step": 12475 + }, + { + "epoch": 1.99, + "learning_rate": 3.428542817494991e-05, + "loss": 0.8226, + "step": 12476 + }, + { + "epoch": 1.99, + "learning_rate": 3.428303277821477e-05, + "loss": 0.8006, + "step": 12477 + }, + { + "epoch": 1.99, + "learning_rate": 3.428063728262096e-05, + "loss": 0.7777, + "step": 12478 + }, + { + "epoch": 1.99, + "learning_rate": 3.4278241688193994e-05, + "loss": 0.7493, + "step": 12479 + }, + { + "epoch": 1.99, + "learning_rate": 3.427584599495939e-05, + "loss": 0.7748, + "step": 12480 + }, + { + "epoch": 1.99, + "learning_rate": 3.4273450202942656e-05, + "loss": 0.7954, + "step": 12481 + }, + { + "epoch": 1.99, + "learning_rate": 3.427105431216931e-05, + "loss": 0.7499, + "step": 12482 + }, + { + "epoch": 1.99, + "learning_rate": 3.4268658322664866e-05, + "loss": 0.7596, + "step": 12483 + }, + { + "epoch": 1.99, + "learning_rate": 3.426626223445484e-05, + "loss": 0.7096, + "step": 12484 + }, + { + "epoch": 1.99, + "learning_rate": 3.426386604756474e-05, + "loss": 0.7459, + "step": 12485 + }, + { + "epoch": 1.99, + "learning_rate": 3.426146976202009e-05, + "loss": 0.7952, + "step": 12486 + }, + { + "epoch": 1.99, + "learning_rate": 3.425907337784642e-05, + "loss": 0.7526, + "step": 12487 + }, + { + "epoch": 1.99, + "learning_rate": 3.425667689506923e-05, + "loss": 0.6759, + "step": 12488 + }, + { + "epoch": 1.99, + "learning_rate": 3.4254280313714055e-05, + "loss": 0.7624, + "step": 12489 + }, + { + "epoch": 1.99, + "learning_rate": 3.425188363380641e-05, + "loss": 0.7475, + "step": 12490 + }, + { + "epoch": 1.99, + "learning_rate": 3.4249486855371824e-05, + "loss": 0.8679, + "step": 12491 + }, + { + "epoch": 1.99, + "learning_rate": 3.424708997843582e-05, + "loss": 0.7893, + "step": 12492 + }, + { + "epoch": 1.99, + "learning_rate": 3.424469300302392e-05, + "loss": 0.8092, + "step": 12493 + }, + { + "epoch": 1.99, + "learning_rate": 3.424229592916165e-05, + "loss": 0.7366, + "step": 12494 + }, + { + "epoch": 1.99, + "learning_rate": 3.423989875687454e-05, + "loss": 0.8566, + "step": 12495 + }, + { + "epoch": 1.99, + "learning_rate": 3.4237501486188124e-05, + "loss": 0.7957, + "step": 12496 + }, + { + "epoch": 1.99, + "learning_rate": 3.423510411712792e-05, + "loss": 0.8344, + "step": 12497 + }, + { + "epoch": 1.99, + "learning_rate": 3.423270664971947e-05, + "loss": 0.8241, + "step": 12498 + }, + { + "epoch": 1.99, + "learning_rate": 3.4230309083988285e-05, + "loss": 0.7427, + "step": 12499 + }, + { + "epoch": 1.99, + "learning_rate": 3.4227911419959926e-05, + "loss": 0.7384, + "step": 12500 + }, + { + "epoch": 1.99, + "learning_rate": 3.422551365765991e-05, + "loss": 0.7727, + "step": 12501 + }, + { + "epoch": 1.99, + "learning_rate": 3.422311579711377e-05, + "loss": 0.7648, + "step": 12502 + }, + { + "epoch": 1.99, + "learning_rate": 3.422071783834705e-05, + "loss": 0.7312, + "step": 12503 + }, + { + "epoch": 1.99, + "learning_rate": 3.421831978138528e-05, + "loss": 0.7769, + "step": 12504 + }, + { + "epoch": 1.99, + "learning_rate": 3.4215921626254e-05, + "loss": 0.7502, + "step": 12505 + }, + { + "epoch": 1.99, + "learning_rate": 3.4213523372978764e-05, + "loss": 0.7562, + "step": 12506 + }, + { + "epoch": 1.99, + "learning_rate": 3.4211125021585086e-05, + "loss": 0.7456, + "step": 12507 + }, + { + "epoch": 1.99, + "learning_rate": 3.4208726572098515e-05, + "loss": 0.8247, + "step": 12508 + }, + { + "epoch": 1.99, + "learning_rate": 3.42063280245446e-05, + "loss": 0.7585, + "step": 12509 + }, + { + "epoch": 1.99, + "learning_rate": 3.4203929378948885e-05, + "loss": 0.7861, + "step": 12510 + }, + { + "epoch": 1.99, + "learning_rate": 3.42015306353369e-05, + "loss": 0.7646, + "step": 12511 + }, + { + "epoch": 1.99, + "learning_rate": 3.4199131793734206e-05, + "loss": 0.7577, + "step": 12512 + }, + { + "epoch": 1.99, + "learning_rate": 3.4196732854166345e-05, + "loss": 0.7128, + "step": 12513 + }, + { + "epoch": 1.99, + "learning_rate": 3.419433381665886e-05, + "loss": 0.7051, + "step": 12514 + }, + { + "epoch": 1.99, + "learning_rate": 3.419193468123731e-05, + "loss": 0.8061, + "step": 12515 + }, + { + "epoch": 1.99, + "learning_rate": 3.418953544792722e-05, + "loss": 0.8021, + "step": 12516 + }, + { + "epoch": 1.99, + "learning_rate": 3.418713611675417e-05, + "loss": 0.8347, + "step": 12517 + }, + { + "epoch": 1.99, + "learning_rate": 3.41847366877437e-05, + "loss": 0.7774, + "step": 12518 + }, + { + "epoch": 1.99, + "learning_rate": 3.418233716092135e-05, + "loss": 0.731, + "step": 12519 + }, + { + "epoch": 1.99, + "learning_rate": 3.417993753631269e-05, + "loss": 0.8679, + "step": 12520 + }, + { + "epoch": 1.99, + "learning_rate": 3.417753781394327e-05, + "loss": 0.7579, + "step": 12521 + }, + { + "epoch": 1.99, + "learning_rate": 3.4175137993838636e-05, + "loss": 0.7584, + "step": 12522 + }, + { + "epoch": 2.0, + "learning_rate": 3.417273807602436e-05, + "loss": 0.7925, + "step": 12523 + }, + { + "epoch": 2.0, + "learning_rate": 3.417033806052599e-05, + "loss": 0.7102, + "step": 12524 + }, + { + "epoch": 2.0, + "learning_rate": 3.4167937947369085e-05, + "loss": 0.7102, + "step": 12525 + }, + { + "epoch": 2.0, + "learning_rate": 3.416553773657922e-05, + "loss": 0.8058, + "step": 12526 + }, + { + "epoch": 2.0, + "learning_rate": 3.416313742818193e-05, + "loss": 0.8092, + "step": 12527 + }, + { + "epoch": 2.0, + "learning_rate": 3.416073702220279e-05, + "loss": 0.7414, + "step": 12528 + }, + { + "epoch": 2.0, + "learning_rate": 3.4158336518667364e-05, + "loss": 0.7906, + "step": 12529 + }, + { + "epoch": 2.0, + "learning_rate": 3.415593591760121e-05, + "loss": 0.7709, + "step": 12530 + }, + { + "epoch": 2.0, + "learning_rate": 3.4153535219029906e-05, + "loss": 0.7852, + "step": 12531 + }, + { + "epoch": 2.0, + "learning_rate": 3.4151134422979006e-05, + "loss": 0.8279, + "step": 12532 + }, + { + "epoch": 2.0, + "learning_rate": 3.414873352947407e-05, + "loss": 0.7351, + "step": 12533 + }, + { + "epoch": 2.0, + "learning_rate": 3.4146332538540684e-05, + "loss": 0.7104, + "step": 12534 + }, + { + "epoch": 2.0, + "learning_rate": 3.414393145020442e-05, + "loss": 0.7041, + "step": 12535 + }, + { + "epoch": 2.0, + "learning_rate": 3.414153026449082e-05, + "loss": 0.7636, + "step": 12536 + }, + { + "epoch": 2.0, + "learning_rate": 3.413912898142548e-05, + "loss": 0.762, + "step": 12537 + }, + { + "epoch": 2.0, + "learning_rate": 3.413672760103397e-05, + "loss": 0.7146, + "step": 12538 + }, + { + "epoch": 2.0, + "learning_rate": 3.4134326123341855e-05, + "loss": 0.9163, + "step": 12539 + }, + { + "epoch": 2.0, + "learning_rate": 3.4131924548374703e-05, + "loss": 0.7447, + "step": 12540 + }, + { + "epoch": 2.0, + "learning_rate": 3.4129522876158113e-05, + "loss": 0.7847, + "step": 12541 + }, + { + "epoch": 2.0, + "learning_rate": 3.412712110671764e-05, + "loss": 0.7773, + "step": 12542 + }, + { + "epoch": 2.0, + "learning_rate": 3.4124719240078865e-05, + "loss": 0.8075, + "step": 12543 + }, + { + "epoch": 2.0, + "learning_rate": 3.4122317276267375e-05, + "loss": 0.7951, + "step": 12544 + }, + { + "epoch": 2.0, + "learning_rate": 3.411991521530875e-05, + "loss": 0.7594, + "step": 12545 + }, + { + "epoch": 2.0, + "learning_rate": 3.411751305722856e-05, + "loss": 0.7301, + "step": 12546 + }, + { + "epoch": 2.0, + "learning_rate": 3.411511080205238e-05, + "loss": 0.8, + "step": 12547 + }, + { + "epoch": 2.0, + "learning_rate": 3.411270844980581e-05, + "loss": 0.7936, + "step": 12548 + }, + { + "epoch": 2.0, + "learning_rate": 3.4110306000514434e-05, + "loss": 0.7857, + "step": 12549 + }, + { + "epoch": 2.0, + "learning_rate": 3.410790345420383e-05, + "loss": 0.7653, + "step": 12550 + }, + { + "epoch": 2.0, + "learning_rate": 3.410550081089958e-05, + "loss": 0.7329, + "step": 12551 + }, + { + "epoch": 2.0, + "learning_rate": 3.410309807062727e-05, + "loss": 0.8053, + "step": 12552 + }, + { + "epoch": 2.0, + "learning_rate": 3.41006952334125e-05, + "loss": 0.7718, + "step": 12553 + }, + { + "epoch": 2.0, + "learning_rate": 3.409829229928084e-05, + "loss": 0.7848, + "step": 12554 + }, + { + "epoch": 2.0, + "learning_rate": 3.4095889268257906e-05, + "loss": 0.7517, + "step": 12555 + }, + { + "epoch": 2.0, + "learning_rate": 3.4093486140369266e-05, + "loss": 0.807, + "step": 12556 + }, + { + "epoch": 2.0, + "learning_rate": 3.409108291564052e-05, + "loss": 0.7519, + "step": 12557 + }, + { + "epoch": 2.0, + "learning_rate": 3.408867959409726e-05, + "loss": 0.7182, + "step": 12558 + }, + { + "epoch": 2.0, + "learning_rate": 3.408627617576508e-05, + "loss": 0.7855, + "step": 12559 + }, + { + "epoch": 2.0, + "learning_rate": 3.4083872660669576e-05, + "loss": 0.7788, + "step": 12560 + }, + { + "epoch": 2.0, + "learning_rate": 3.408146904883634e-05, + "loss": 0.7531, + "step": 12561 + }, + { + "epoch": 2.0, + "learning_rate": 3.4079065340290975e-05, + "loss": 0.7753, + "step": 12562 + }, + { + "epoch": 2.0, + "learning_rate": 3.4076661535059076e-05, + "loss": 0.8017, + "step": 12563 + }, + { + "epoch": 2.0, + "learning_rate": 3.407425763316624e-05, + "loss": 0.7429, + "step": 12564 + }, + { + "epoch": 2.0, + "learning_rate": 3.407185363463807e-05, + "loss": 0.778, + "step": 12565 + }, + { + "epoch": 2.0, + "learning_rate": 3.406944953950017e-05, + "loss": 0.864, + "step": 12566 + }, + { + "epoch": 2.0, + "learning_rate": 3.4067045347778136e-05, + "loss": 0.7525, + "step": 12567 + }, + { + "epoch": 2.0, + "learning_rate": 3.406464105949757e-05, + "loss": 0.7107, + "step": 12568 + }, + { + "epoch": 2.0, + "learning_rate": 3.406223667468409e-05, + "loss": 0.7292, + "step": 12569 + }, + { + "epoch": 2.0, + "learning_rate": 3.4059832193363286e-05, + "loss": 0.7325, + "step": 12570 + }, + { + "epoch": 2.0, + "learning_rate": 3.405742761556076e-05, + "loss": 0.7132, + "step": 12571 + }, + { + "epoch": 2.0, + "learning_rate": 3.4055022941302136e-05, + "loss": 0.8065, + "step": 12572 + }, + { + "epoch": 2.0, + "learning_rate": 3.405261817061302e-05, + "loss": 0.6918, + "step": 12573 + }, + { + "epoch": 2.0, + "learning_rate": 3.4050213303519014e-05, + "loss": 0.753, + "step": 12574 + }, + { + "epoch": 2.0, + "learning_rate": 3.4047808340045725e-05, + "loss": 0.7956, + "step": 12575 + }, + { + "epoch": 2.0, + "learning_rate": 3.4045403280218786e-05, + "loss": 0.7321, + "step": 12576 + }, + { + "epoch": 2.0, + "learning_rate": 3.404299812406378e-05, + "loss": 0.7724, + "step": 12577 + }, + { + "epoch": 2.0, + "learning_rate": 3.4040592871606334e-05, + "loss": 0.7847, + "step": 12578 + }, + { + "epoch": 2.0, + "learning_rate": 3.403818752287207e-05, + "loss": 0.7563, + "step": 12579 + }, + { + "epoch": 2.0, + "learning_rate": 3.4035782077886585e-05, + "loss": 0.7266, + "step": 12580 + }, + { + "epoch": 2.0, + "learning_rate": 3.4033376536675514e-05, + "loss": 0.8174, + "step": 12581 + }, + { + "epoch": 2.0, + "learning_rate": 3.4030970899264466e-05, + "loss": 0.8231, + "step": 12582 + }, + { + "epoch": 2.0, + "learning_rate": 3.402856516567906e-05, + "loss": 0.8121, + "step": 12583 + }, + { + "epoch": 2.0, + "learning_rate": 3.402615933594492e-05, + "loss": 0.7384, + "step": 12584 + }, + { + "epoch": 2.0, + "learning_rate": 3.402375341008766e-05, + "loss": 0.742, + "step": 12585 + }, + { + "epoch": 2.01, + "learning_rate": 3.4021347388132906e-05, + "loss": 0.6992, + "step": 12586 + }, + { + "epoch": 2.01, + "learning_rate": 3.4018941270106284e-05, + "loss": 0.7872, + "step": 12587 + }, + { + "epoch": 2.01, + "learning_rate": 3.40165350560334e-05, + "loss": 0.7593, + "step": 12588 + }, + { + "epoch": 2.01, + "learning_rate": 3.40141287459399e-05, + "loss": 0.8264, + "step": 12589 + }, + { + "epoch": 2.01, + "learning_rate": 3.40117223398514e-05, + "loss": 0.6761, + "step": 12590 + }, + { + "epoch": 2.01, + "learning_rate": 3.4009315837793526e-05, + "loss": 0.7614, + "step": 12591 + }, + { + "epoch": 2.01, + "learning_rate": 3.4006909239791916e-05, + "loss": 0.7727, + "step": 12592 + }, + { + "epoch": 2.01, + "learning_rate": 3.400450254587219e-05, + "loss": 0.7412, + "step": 12593 + }, + { + "epoch": 2.01, + "learning_rate": 3.4002095756059966e-05, + "loss": 0.7705, + "step": 12594 + }, + { + "epoch": 2.01, + "learning_rate": 3.3999688870380895e-05, + "loss": 0.7682, + "step": 12595 + }, + { + "epoch": 2.01, + "learning_rate": 3.399728188886061e-05, + "loss": 0.8105, + "step": 12596 + }, + { + "epoch": 2.01, + "learning_rate": 3.399487481152472e-05, + "loss": 0.7153, + "step": 12597 + }, + { + "epoch": 2.01, + "learning_rate": 3.3992467638398886e-05, + "loss": 0.7821, + "step": 12598 + }, + { + "epoch": 2.01, + "learning_rate": 3.3990060369508734e-05, + "loss": 0.7936, + "step": 12599 + }, + { + "epoch": 2.01, + "learning_rate": 3.3987653004879883e-05, + "loss": 0.8787, + "step": 12600 + }, + { + "epoch": 2.01, + "learning_rate": 3.398524554453799e-05, + "loss": 0.7405, + "step": 12601 + }, + { + "epoch": 2.01, + "learning_rate": 3.398283798850868e-05, + "loss": 0.6944, + "step": 12602 + }, + { + "epoch": 2.01, + "learning_rate": 3.3980430336817616e-05, + "loss": 0.7451, + "step": 12603 + }, + { + "epoch": 2.01, + "learning_rate": 3.397802258949041e-05, + "loss": 0.7146, + "step": 12604 + }, + { + "epoch": 2.01, + "learning_rate": 3.397561474655272e-05, + "loss": 0.8303, + "step": 12605 + }, + { + "epoch": 2.01, + "learning_rate": 3.3973206808030175e-05, + "loss": 0.7935, + "step": 12606 + }, + { + "epoch": 2.01, + "learning_rate": 3.397079877394843e-05, + "loss": 0.8093, + "step": 12607 + }, + { + "epoch": 2.01, + "learning_rate": 3.396839064433313e-05, + "loss": 0.735, + "step": 12608 + }, + { + "epoch": 2.01, + "learning_rate": 3.39659824192099e-05, + "loss": 0.8206, + "step": 12609 + }, + { + "epoch": 2.01, + "learning_rate": 3.39635740986044e-05, + "loss": 0.7385, + "step": 12610 + }, + { + "epoch": 2.01, + "learning_rate": 3.396116568254229e-05, + "loss": 0.7608, + "step": 12611 + }, + { + "epoch": 2.01, + "learning_rate": 3.39587571710492e-05, + "loss": 0.7581, + "step": 12612 + }, + { + "epoch": 2.01, + "learning_rate": 3.3956348564150794e-05, + "loss": 0.7238, + "step": 12613 + }, + { + "epoch": 2.01, + "learning_rate": 3.39539398618727e-05, + "loss": 0.7782, + "step": 12614 + }, + { + "epoch": 2.01, + "learning_rate": 3.395153106424059e-05, + "loss": 0.7872, + "step": 12615 + }, + { + "epoch": 2.01, + "learning_rate": 3.3949122171280115e-05, + "loss": 0.7558, + "step": 12616 + }, + { + "epoch": 2.01, + "learning_rate": 3.394671318301691e-05, + "loss": 0.7789, + "step": 12617 + }, + { + "epoch": 2.01, + "learning_rate": 3.3944304099476644e-05, + "loss": 0.7504, + "step": 12618 + }, + { + "epoch": 2.01, + "learning_rate": 3.3941894920684974e-05, + "loss": 0.793, + "step": 12619 + }, + { + "epoch": 2.01, + "learning_rate": 3.393948564666755e-05, + "loss": 0.7904, + "step": 12620 + }, + { + "epoch": 2.01, + "learning_rate": 3.3937076277450024e-05, + "loss": 0.7736, + "step": 12621 + }, + { + "epoch": 2.01, + "learning_rate": 3.393466681305807e-05, + "loss": 0.7514, + "step": 12622 + }, + { + "epoch": 2.01, + "learning_rate": 3.3932257253517337e-05, + "loss": 0.7494, + "step": 12623 + }, + { + "epoch": 2.01, + "learning_rate": 3.392984759885349e-05, + "loss": 0.7642, + "step": 12624 + }, + { + "epoch": 2.01, + "learning_rate": 3.3927437849092184e-05, + "loss": 0.8216, + "step": 12625 + }, + { + "epoch": 2.01, + "learning_rate": 3.3925028004259086e-05, + "loss": 0.8263, + "step": 12626 + }, + { + "epoch": 2.01, + "learning_rate": 3.392261806437986e-05, + "loss": 0.8193, + "step": 12627 + }, + { + "epoch": 2.01, + "learning_rate": 3.392020802948016e-05, + "loss": 0.7682, + "step": 12628 + }, + { + "epoch": 2.01, + "learning_rate": 3.391779789958567e-05, + "loss": 0.7729, + "step": 12629 + }, + { + "epoch": 2.01, + "learning_rate": 3.391538767472204e-05, + "loss": 0.7678, + "step": 12630 + }, + { + "epoch": 2.01, + "learning_rate": 3.3912977354914946e-05, + "loss": 0.7167, + "step": 12631 + }, + { + "epoch": 2.01, + "learning_rate": 3.391056694019005e-05, + "loss": 0.7512, + "step": 12632 + }, + { + "epoch": 2.01, + "learning_rate": 3.390815643057303e-05, + "loss": 0.7905, + "step": 12633 + }, + { + "epoch": 2.01, + "learning_rate": 3.3905745826089556e-05, + "loss": 0.8608, + "step": 12634 + }, + { + "epoch": 2.01, + "learning_rate": 3.390333512676529e-05, + "loss": 0.7134, + "step": 12635 + }, + { + "epoch": 2.01, + "learning_rate": 3.390092433262592e-05, + "loss": 0.8688, + "step": 12636 + }, + { + "epoch": 2.01, + "learning_rate": 3.38985134436971e-05, + "loss": 0.7671, + "step": 12637 + }, + { + "epoch": 2.01, + "learning_rate": 3.389610246000452e-05, + "loss": 0.8127, + "step": 12638 + }, + { + "epoch": 2.01, + "learning_rate": 3.389369138157385e-05, + "loss": 0.6715, + "step": 12639 + }, + { + "epoch": 2.01, + "learning_rate": 3.3891280208430757e-05, + "loss": 0.802, + "step": 12640 + }, + { + "epoch": 2.01, + "learning_rate": 3.388886894060093e-05, + "loss": 0.766, + "step": 12641 + }, + { + "epoch": 2.01, + "learning_rate": 3.388645757811005e-05, + "loss": 0.8522, + "step": 12642 + }, + { + "epoch": 2.01, + "learning_rate": 3.388404612098379e-05, + "loss": 0.7744, + "step": 12643 + }, + { + "epoch": 2.01, + "learning_rate": 3.388163456924783e-05, + "loss": 0.7713, + "step": 12644 + }, + { + "epoch": 2.01, + "learning_rate": 3.3879222922927864e-05, + "loss": 0.7736, + "step": 12645 + }, + { + "epoch": 2.01, + "learning_rate": 3.387681118204956e-05, + "loss": 0.825, + "step": 12646 + }, + { + "epoch": 2.01, + "learning_rate": 3.387439934663861e-05, + "loss": 0.74, + "step": 12647 + }, + { + "epoch": 2.01, + "learning_rate": 3.387198741672068e-05, + "loss": 0.7688, + "step": 12648 + }, + { + "epoch": 2.02, + "learning_rate": 3.386957539232149e-05, + "loss": 0.7848, + "step": 12649 + }, + { + "epoch": 2.02, + "learning_rate": 3.386716327346669e-05, + "loss": 0.7577, + "step": 12650 + }, + { + "epoch": 2.02, + "learning_rate": 3.386475106018199e-05, + "loss": 0.7756, + "step": 12651 + }, + { + "epoch": 2.02, + "learning_rate": 3.386233875249307e-05, + "loss": 0.7957, + "step": 12652 + }, + { + "epoch": 2.02, + "learning_rate": 3.385992635042563e-05, + "loss": 0.7712, + "step": 12653 + }, + { + "epoch": 2.02, + "learning_rate": 3.385751385400535e-05, + "loss": 0.8048, + "step": 12654 + }, + { + "epoch": 2.02, + "learning_rate": 3.385510126325792e-05, + "loss": 0.7948, + "step": 12655 + }, + { + "epoch": 2.02, + "learning_rate": 3.385268857820905e-05, + "loss": 0.8092, + "step": 12656 + }, + { + "epoch": 2.02, + "learning_rate": 3.385027579888441e-05, + "loss": 0.7024, + "step": 12657 + }, + { + "epoch": 2.02, + "learning_rate": 3.3847862925309716e-05, + "loss": 0.8223, + "step": 12658 + }, + { + "epoch": 2.02, + "learning_rate": 3.384544995751064e-05, + "loss": 0.7876, + "step": 12659 + }, + { + "epoch": 2.02, + "learning_rate": 3.3843036895512905e-05, + "loss": 0.8, + "step": 12660 + }, + { + "epoch": 2.02, + "learning_rate": 3.384062373934219e-05, + "loss": 0.7559, + "step": 12661 + }, + { + "epoch": 2.02, + "learning_rate": 3.38382104890242e-05, + "loss": 0.744, + "step": 12662 + }, + { + "epoch": 2.02, + "learning_rate": 3.383579714458464e-05, + "loss": 0.7216, + "step": 12663 + }, + { + "epoch": 2.02, + "learning_rate": 3.3833383706049186e-05, + "loss": 0.7187, + "step": 12664 + }, + { + "epoch": 2.02, + "learning_rate": 3.383097017344357e-05, + "loss": 0.8049, + "step": 12665 + }, + { + "epoch": 2.02, + "learning_rate": 3.382855654679349e-05, + "loss": 0.7663, + "step": 12666 + }, + { + "epoch": 2.02, + "learning_rate": 3.3826142826124636e-05, + "loss": 0.83, + "step": 12667 + }, + { + "epoch": 2.02, + "learning_rate": 3.3823729011462726e-05, + "loss": 0.79, + "step": 12668 + }, + { + "epoch": 2.02, + "learning_rate": 3.382131510283345e-05, + "loss": 0.7783, + "step": 12669 + }, + { + "epoch": 2.02, + "learning_rate": 3.381890110026252e-05, + "loss": 0.753, + "step": 12670 + }, + { + "epoch": 2.02, + "learning_rate": 3.381648700377566e-05, + "loss": 0.7716, + "step": 12671 + }, + { + "epoch": 2.02, + "learning_rate": 3.381407281339856e-05, + "loss": 0.7263, + "step": 12672 + }, + { + "epoch": 2.02, + "learning_rate": 3.381165852915693e-05, + "loss": 0.7486, + "step": 12673 + }, + { + "epoch": 2.02, + "learning_rate": 3.3809244151076493e-05, + "loss": 0.7729, + "step": 12674 + }, + { + "epoch": 2.02, + "learning_rate": 3.380682967918295e-05, + "loss": 0.7595, + "step": 12675 + }, + { + "epoch": 2.02, + "learning_rate": 3.380441511350203e-05, + "loss": 0.7277, + "step": 12676 + }, + { + "epoch": 2.02, + "learning_rate": 3.3802000454059424e-05, + "loss": 0.7128, + "step": 12677 + }, + { + "epoch": 2.02, + "learning_rate": 3.3799585700880854e-05, + "loss": 0.8149, + "step": 12678 + }, + { + "epoch": 2.02, + "learning_rate": 3.379717085399204e-05, + "loss": 0.7902, + "step": 12679 + }, + { + "epoch": 2.02, + "learning_rate": 3.379475591341871e-05, + "loss": 0.7556, + "step": 12680 + }, + { + "epoch": 2.02, + "learning_rate": 3.3792340879186556e-05, + "loss": 0.7777, + "step": 12681 + }, + { + "epoch": 2.02, + "learning_rate": 3.378992575132131e-05, + "loss": 0.7537, + "step": 12682 + }, + { + "epoch": 2.02, + "learning_rate": 3.3787510529848696e-05, + "loss": 0.7312, + "step": 12683 + }, + { + "epoch": 2.02, + "learning_rate": 3.3785095214794426e-05, + "loss": 0.7179, + "step": 12684 + }, + { + "epoch": 2.02, + "learning_rate": 3.378267980618423e-05, + "loss": 0.7933, + "step": 12685 + }, + { + "epoch": 2.02, + "learning_rate": 3.3780264304043826e-05, + "loss": 0.7827, + "step": 12686 + }, + { + "epoch": 2.02, + "learning_rate": 3.3777848708398934e-05, + "loss": 0.8261, + "step": 12687 + }, + { + "epoch": 2.02, + "learning_rate": 3.3775433019275284e-05, + "loss": 0.7267, + "step": 12688 + }, + { + "epoch": 2.02, + "learning_rate": 3.37730172366986e-05, + "loss": 0.7821, + "step": 12689 + }, + { + "epoch": 2.02, + "learning_rate": 3.377060136069461e-05, + "loss": 0.721, + "step": 12690 + }, + { + "epoch": 2.02, + "learning_rate": 3.376818539128904e-05, + "loss": 0.856, + "step": 12691 + }, + { + "epoch": 2.02, + "learning_rate": 3.376576932850761e-05, + "loss": 0.7612, + "step": 12692 + }, + { + "epoch": 2.02, + "learning_rate": 3.3763353172376064e-05, + "loss": 0.8691, + "step": 12693 + }, + { + "epoch": 2.02, + "learning_rate": 3.3760936922920136e-05, + "loss": 0.7785, + "step": 12694 + }, + { + "epoch": 2.02, + "learning_rate": 3.3758520580165534e-05, + "loss": 0.7486, + "step": 12695 + }, + { + "epoch": 2.02, + "learning_rate": 3.3756104144138016e-05, + "loss": 0.7748, + "step": 12696 + }, + { + "epoch": 2.02, + "learning_rate": 3.37536876148633e-05, + "loss": 0.7459, + "step": 12697 + }, + { + "epoch": 2.02, + "learning_rate": 3.375127099236713e-05, + "loss": 0.6888, + "step": 12698 + }, + { + "epoch": 2.02, + "learning_rate": 3.3748854276675234e-05, + "loss": 0.803, + "step": 12699 + }, + { + "epoch": 2.02, + "learning_rate": 3.3746437467813356e-05, + "loss": 0.7771, + "step": 12700 + }, + { + "epoch": 2.02, + "learning_rate": 3.374402056580722e-05, + "loss": 0.7614, + "step": 12701 + }, + { + "epoch": 2.02, + "learning_rate": 3.374160357068258e-05, + "loss": 0.7832, + "step": 12702 + }, + { + "epoch": 2.02, + "learning_rate": 3.373918648246517e-05, + "loss": 0.7781, + "step": 12703 + }, + { + "epoch": 2.02, + "learning_rate": 3.3736769301180725e-05, + "loss": 0.8431, + "step": 12704 + }, + { + "epoch": 2.02, + "learning_rate": 3.373435202685499e-05, + "loss": 0.7762, + "step": 12705 + }, + { + "epoch": 2.02, + "learning_rate": 3.373193465951372e-05, + "loss": 0.8092, + "step": 12706 + }, + { + "epoch": 2.02, + "learning_rate": 3.372951719918264e-05, + "loss": 0.7097, + "step": 12707 + }, + { + "epoch": 2.02, + "learning_rate": 3.37270996458875e-05, + "loss": 0.7798, + "step": 12708 + }, + { + "epoch": 2.02, + "learning_rate": 3.372468199965404e-05, + "loss": 0.7603, + "step": 12709 + }, + { + "epoch": 2.02, + "learning_rate": 3.372226426050802e-05, + "loss": 0.8846, + "step": 12710 + }, + { + "epoch": 2.03, + "learning_rate": 3.371984642847519e-05, + "loss": 0.7318, + "step": 12711 + }, + { + "epoch": 2.03, + "learning_rate": 3.371742850358128e-05, + "loss": 0.8055, + "step": 12712 + }, + { + "epoch": 2.03, + "learning_rate": 3.371501048585205e-05, + "loss": 0.7616, + "step": 12713 + }, + { + "epoch": 2.03, + "learning_rate": 3.371259237531325e-05, + "loss": 0.7364, + "step": 12714 + }, + { + "epoch": 2.03, + "learning_rate": 3.371017417199063e-05, + "loss": 0.7509, + "step": 12715 + }, + { + "epoch": 2.03, + "learning_rate": 3.370775587590995e-05, + "loss": 0.824, + "step": 12716 + }, + { + "epoch": 2.03, + "learning_rate": 3.3705337487096954e-05, + "loss": 0.77, + "step": 12717 + }, + { + "epoch": 2.03, + "learning_rate": 3.370291900557739e-05, + "loss": 0.7577, + "step": 12718 + }, + { + "epoch": 2.03, + "learning_rate": 3.370050043137703e-05, + "loss": 0.8096, + "step": 12719 + }, + { + "epoch": 2.03, + "learning_rate": 3.369808176452162e-05, + "loss": 0.7102, + "step": 12720 + }, + { + "epoch": 2.03, + "learning_rate": 3.3695663005036914e-05, + "loss": 0.7146, + "step": 12721 + }, + { + "epoch": 2.03, + "learning_rate": 3.369324415294869e-05, + "loss": 0.7476, + "step": 12722 + }, + { + "epoch": 2.03, + "learning_rate": 3.3690825208282686e-05, + "loss": 0.8151, + "step": 12723 + }, + { + "epoch": 2.03, + "learning_rate": 3.368840617106467e-05, + "loss": 0.7358, + "step": 12724 + }, + { + "epoch": 2.03, + "learning_rate": 3.36859870413204e-05, + "loss": 0.6681, + "step": 12725 + }, + { + "epoch": 2.03, + "learning_rate": 3.368356781907564e-05, + "loss": 0.8223, + "step": 12726 + }, + { + "epoch": 2.03, + "learning_rate": 3.368114850435616e-05, + "loss": 0.8053, + "step": 12727 + }, + { + "epoch": 2.03, + "learning_rate": 3.3678729097187726e-05, + "loss": 0.6916, + "step": 12728 + }, + { + "epoch": 2.03, + "learning_rate": 3.367630959759609e-05, + "loss": 0.6664, + "step": 12729 + }, + { + "epoch": 2.03, + "learning_rate": 3.3673890005607015e-05, + "loss": 0.7201, + "step": 12730 + }, + { + "epoch": 2.03, + "learning_rate": 3.367147032124629e-05, + "loss": 0.7564, + "step": 12731 + }, + { + "epoch": 2.03, + "learning_rate": 3.366905054453966e-05, + "loss": 0.7035, + "step": 12732 + }, + { + "epoch": 2.03, + "learning_rate": 3.366663067551291e-05, + "loss": 0.797, + "step": 12733 + }, + { + "epoch": 2.03, + "learning_rate": 3.366421071419181e-05, + "loss": 0.7287, + "step": 12734 + }, + { + "epoch": 2.03, + "learning_rate": 3.366179066060212e-05, + "loss": 0.7142, + "step": 12735 + }, + { + "epoch": 2.03, + "learning_rate": 3.365937051476962e-05, + "loss": 0.6431, + "step": 12736 + }, + { + "epoch": 2.03, + "learning_rate": 3.365695027672009e-05, + "loss": 0.7776, + "step": 12737 + }, + { + "epoch": 2.03, + "learning_rate": 3.365452994647928e-05, + "loss": 0.7562, + "step": 12738 + }, + { + "epoch": 2.03, + "learning_rate": 3.3652109524072987e-05, + "loss": 0.7613, + "step": 12739 + }, + { + "epoch": 2.03, + "learning_rate": 3.364968900952698e-05, + "loss": 0.7528, + "step": 12740 + }, + { + "epoch": 2.03, + "learning_rate": 3.3647268402867035e-05, + "loss": 0.7963, + "step": 12741 + }, + { + "epoch": 2.03, + "learning_rate": 3.364484770411893e-05, + "loss": 0.7806, + "step": 12742 + }, + { + "epoch": 2.03, + "learning_rate": 3.364242691330846e-05, + "loss": 0.772, + "step": 12743 + }, + { + "epoch": 2.03, + "learning_rate": 3.364000603046137e-05, + "loss": 0.7887, + "step": 12744 + }, + { + "epoch": 2.03, + "learning_rate": 3.363758505560347e-05, + "loss": 0.782, + "step": 12745 + }, + { + "epoch": 2.03, + "learning_rate": 3.363516398876053e-05, + "loss": 0.7437, + "step": 12746 + }, + { + "epoch": 2.03, + "learning_rate": 3.3632742829958344e-05, + "loss": 0.8872, + "step": 12747 + }, + { + "epoch": 2.03, + "learning_rate": 3.3630321579222684e-05, + "loss": 0.8215, + "step": 12748 + }, + { + "epoch": 2.03, + "learning_rate": 3.362790023657934e-05, + "loss": 0.7637, + "step": 12749 + }, + { + "epoch": 2.03, + "learning_rate": 3.3625478802054087e-05, + "loss": 0.7528, + "step": 12750 + }, + { + "epoch": 2.03, + "learning_rate": 3.362305727567273e-05, + "loss": 0.6751, + "step": 12751 + }, + { + "epoch": 2.03, + "learning_rate": 3.3620635657461055e-05, + "loss": 0.7814, + "step": 12752 + }, + { + "epoch": 2.03, + "learning_rate": 3.3618213947444825e-05, + "loss": 0.7512, + "step": 12753 + }, + { + "epoch": 2.03, + "learning_rate": 3.361579214564986e-05, + "loss": 0.7792, + "step": 12754 + }, + { + "epoch": 2.03, + "learning_rate": 3.3613370252101945e-05, + "loss": 0.6742, + "step": 12755 + }, + { + "epoch": 2.03, + "learning_rate": 3.3610948266826856e-05, + "loss": 0.8602, + "step": 12756 + }, + { + "epoch": 2.03, + "learning_rate": 3.36085261898504e-05, + "loss": 0.7962, + "step": 12757 + }, + { + "epoch": 2.03, + "learning_rate": 3.360610402119837e-05, + "loss": 0.7384, + "step": 12758 + }, + { + "epoch": 2.03, + "learning_rate": 3.3603681760896554e-05, + "loss": 0.8575, + "step": 12759 + }, + { + "epoch": 2.03, + "learning_rate": 3.360125940897075e-05, + "loss": 0.6903, + "step": 12760 + }, + { + "epoch": 2.03, + "learning_rate": 3.359883696544675e-05, + "loss": 0.7758, + "step": 12761 + }, + { + "epoch": 2.03, + "learning_rate": 3.359641443035036e-05, + "loss": 0.7506, + "step": 12762 + }, + { + "epoch": 2.03, + "learning_rate": 3.359399180370738e-05, + "loss": 0.7607, + "step": 12763 + }, + { + "epoch": 2.03, + "learning_rate": 3.35915690855436e-05, + "loss": 0.9045, + "step": 12764 + }, + { + "epoch": 2.03, + "learning_rate": 3.358914627588483e-05, + "loss": 0.7751, + "step": 12765 + }, + { + "epoch": 2.03, + "learning_rate": 3.358672337475687e-05, + "loss": 0.7955, + "step": 12766 + }, + { + "epoch": 2.03, + "learning_rate": 3.358430038218551e-05, + "loss": 0.7992, + "step": 12767 + }, + { + "epoch": 2.03, + "learning_rate": 3.358187729819657e-05, + "loss": 0.7668, + "step": 12768 + }, + { + "epoch": 2.03, + "learning_rate": 3.357945412281585e-05, + "loss": 0.8133, + "step": 12769 + }, + { + "epoch": 2.03, + "learning_rate": 3.357703085606914e-05, + "loss": 0.7642, + "step": 12770 + }, + { + "epoch": 2.03, + "learning_rate": 3.357460749798226e-05, + "loss": 0.7647, + "step": 12771 + }, + { + "epoch": 2.03, + "learning_rate": 3.3572184048581026e-05, + "loss": 0.7962, + "step": 12772 + }, + { + "epoch": 2.03, + "learning_rate": 3.3569760507891224e-05, + "loss": 0.717, + "step": 12773 + }, + { + "epoch": 2.04, + "learning_rate": 3.356733687593868e-05, + "loss": 0.8707, + "step": 12774 + }, + { + "epoch": 2.04, + "learning_rate": 3.35649131527492e-05, + "loss": 0.7489, + "step": 12775 + }, + { + "epoch": 2.04, + "learning_rate": 3.35624893383486e-05, + "loss": 0.8108, + "step": 12776 + }, + { + "epoch": 2.04, + "learning_rate": 3.356006543276268e-05, + "loss": 0.6841, + "step": 12777 + }, + { + "epoch": 2.04, + "learning_rate": 3.355764143601726e-05, + "loss": 0.7796, + "step": 12778 + }, + { + "epoch": 2.04, + "learning_rate": 3.3555217348138165e-05, + "loss": 0.8395, + "step": 12779 + }, + { + "epoch": 2.04, + "learning_rate": 3.3552793169151195e-05, + "loss": 0.7281, + "step": 12780 + }, + { + "epoch": 2.04, + "learning_rate": 3.355036889908216e-05, + "loss": 0.7372, + "step": 12781 + }, + { + "epoch": 2.04, + "learning_rate": 3.354794453795689e-05, + "loss": 0.8279, + "step": 12782 + }, + { + "epoch": 2.04, + "learning_rate": 3.3545520085801206e-05, + "loss": 0.7546, + "step": 12783 + }, + { + "epoch": 2.04, + "learning_rate": 3.3543095542640924e-05, + "loss": 0.7426, + "step": 12784 + }, + { + "epoch": 2.04, + "learning_rate": 3.354067090850185e-05, + "loss": 0.7406, + "step": 12785 + }, + { + "epoch": 2.04, + "learning_rate": 3.353824618340983e-05, + "loss": 0.8398, + "step": 12786 + }, + { + "epoch": 2.04, + "learning_rate": 3.3535821367390664e-05, + "loss": 0.7325, + "step": 12787 + }, + { + "epoch": 2.04, + "learning_rate": 3.353339646047018e-05, + "loss": 0.8021, + "step": 12788 + }, + { + "epoch": 2.04, + "learning_rate": 3.353097146267421e-05, + "loss": 0.812, + "step": 12789 + }, + { + "epoch": 2.04, + "learning_rate": 3.352854637402857e-05, + "loss": 0.7097, + "step": 12790 + }, + { + "epoch": 2.04, + "learning_rate": 3.352612119455909e-05, + "loss": 0.8149, + "step": 12791 + }, + { + "epoch": 2.04, + "learning_rate": 3.35236959242916e-05, + "loss": 0.7974, + "step": 12792 + }, + { + "epoch": 2.04, + "learning_rate": 3.352127056325191e-05, + "loss": 0.747, + "step": 12793 + }, + { + "epoch": 2.04, + "learning_rate": 3.351884511146587e-05, + "loss": 0.7568, + "step": 12794 + }, + { + "epoch": 2.04, + "learning_rate": 3.3516419568959303e-05, + "loss": 0.7344, + "step": 12795 + }, + { + "epoch": 2.04, + "learning_rate": 3.3513993935758035e-05, + "loss": 0.7498, + "step": 12796 + }, + { + "epoch": 2.04, + "learning_rate": 3.35115682118879e-05, + "loss": 0.7408, + "step": 12797 + }, + { + "epoch": 2.04, + "learning_rate": 3.350914239737474e-05, + "loss": 0.7623, + "step": 12798 + }, + { + "epoch": 2.04, + "learning_rate": 3.3506716492244374e-05, + "loss": 0.7493, + "step": 12799 + }, + { + "epoch": 2.04, + "learning_rate": 3.350429049652264e-05, + "loss": 0.7696, + "step": 12800 + }, + { + "epoch": 2.04, + "learning_rate": 3.3501864410235377e-05, + "loss": 0.8194, + "step": 12801 + }, + { + "epoch": 2.04, + "learning_rate": 3.3499438233408416e-05, + "loss": 0.751, + "step": 12802 + }, + { + "epoch": 2.04, + "learning_rate": 3.3497011966067603e-05, + "loss": 0.7515, + "step": 12803 + }, + { + "epoch": 2.04, + "learning_rate": 3.349458560823877e-05, + "loss": 0.7673, + "step": 12804 + }, + { + "epoch": 2.04, + "learning_rate": 3.349215915994776e-05, + "loss": 0.7386, + "step": 12805 + }, + { + "epoch": 2.04, + "learning_rate": 3.348973262122041e-05, + "loss": 0.7292, + "step": 12806 + }, + { + "epoch": 2.04, + "learning_rate": 3.348730599208255e-05, + "loss": 0.7404, + "step": 12807 + }, + { + "epoch": 2.04, + "learning_rate": 3.348487927256004e-05, + "loss": 0.7595, + "step": 12808 + }, + { + "epoch": 2.04, + "learning_rate": 3.348245246267873e-05, + "loss": 0.7249, + "step": 12809 + }, + { + "epoch": 2.04, + "learning_rate": 3.3480025562464436e-05, + "loss": 0.6783, + "step": 12810 + }, + { + "epoch": 2.04, + "learning_rate": 3.347759857194303e-05, + "loss": 0.8721, + "step": 12811 + }, + { + "epoch": 2.04, + "learning_rate": 3.347517149114033e-05, + "loss": 0.7463, + "step": 12812 + }, + { + "epoch": 2.04, + "learning_rate": 3.347274432008221e-05, + "loss": 0.6864, + "step": 12813 + }, + { + "epoch": 2.04, + "learning_rate": 3.3470317058794505e-05, + "loss": 0.8014, + "step": 12814 + }, + { + "epoch": 2.04, + "learning_rate": 3.346788970730307e-05, + "loss": 0.7192, + "step": 12815 + }, + { + "epoch": 2.04, + "learning_rate": 3.346546226563374e-05, + "loss": 0.6523, + "step": 12816 + }, + { + "epoch": 2.04, + "learning_rate": 3.346303473381238e-05, + "loss": 0.7683, + "step": 12817 + }, + { + "epoch": 2.04, + "learning_rate": 3.346060711186485e-05, + "loss": 0.7236, + "step": 12818 + }, + { + "epoch": 2.04, + "learning_rate": 3.345817939981698e-05, + "loss": 0.6969, + "step": 12819 + }, + { + "epoch": 2.04, + "learning_rate": 3.3455751597694635e-05, + "loss": 0.7077, + "step": 12820 + }, + { + "epoch": 2.04, + "learning_rate": 3.3453323705523676e-05, + "loss": 0.7615, + "step": 12821 + }, + { + "epoch": 2.04, + "learning_rate": 3.345089572332994e-05, + "loss": 0.7423, + "step": 12822 + }, + { + "epoch": 2.04, + "learning_rate": 3.3448467651139305e-05, + "loss": 0.7966, + "step": 12823 + }, + { + "epoch": 2.04, + "learning_rate": 3.344603948897762e-05, + "loss": 0.7244, + "step": 12824 + }, + { + "epoch": 2.04, + "learning_rate": 3.344361123687073e-05, + "loss": 0.7611, + "step": 12825 + }, + { + "epoch": 2.04, + "learning_rate": 3.344118289484451e-05, + "loss": 0.6873, + "step": 12826 + }, + { + "epoch": 2.04, + "learning_rate": 3.343875446292483e-05, + "loss": 0.7389, + "step": 12827 + }, + { + "epoch": 2.04, + "learning_rate": 3.343632594113753e-05, + "loss": 0.7465, + "step": 12828 + }, + { + "epoch": 2.04, + "learning_rate": 3.343389732950847e-05, + "loss": 0.8745, + "step": 12829 + }, + { + "epoch": 2.04, + "learning_rate": 3.343146862806353e-05, + "loss": 0.6737, + "step": 12830 + }, + { + "epoch": 2.04, + "learning_rate": 3.342903983682858e-05, + "loss": 0.7461, + "step": 12831 + }, + { + "epoch": 2.04, + "learning_rate": 3.342661095582946e-05, + "loss": 0.837, + "step": 12832 + }, + { + "epoch": 2.04, + "learning_rate": 3.342418198509206e-05, + "loss": 0.7117, + "step": 12833 + }, + { + "epoch": 2.04, + "learning_rate": 3.342175292464223e-05, + "loss": 0.7144, + "step": 12834 + }, + { + "epoch": 2.04, + "learning_rate": 3.341932377450584e-05, + "loss": 0.8679, + "step": 12835 + }, + { + "epoch": 2.04, + "learning_rate": 3.3416894534708764e-05, + "loss": 0.8123, + "step": 12836 + }, + { + "epoch": 2.05, + "learning_rate": 3.341446520527687e-05, + "loss": 0.7468, + "step": 12837 + }, + { + "epoch": 2.05, + "learning_rate": 3.3412035786236047e-05, + "loss": 0.7358, + "step": 12838 + }, + { + "epoch": 2.05, + "learning_rate": 3.340960627761213e-05, + "loss": 0.7227, + "step": 12839 + }, + { + "epoch": 2.05, + "learning_rate": 3.340717667943102e-05, + "loss": 0.7906, + "step": 12840 + }, + { + "epoch": 2.05, + "learning_rate": 3.340474699171859e-05, + "loss": 0.7736, + "step": 12841 + }, + { + "epoch": 2.05, + "learning_rate": 3.3402317214500696e-05, + "loss": 0.7085, + "step": 12842 + }, + { + "epoch": 2.05, + "learning_rate": 3.3399887347803224e-05, + "loss": 0.7713, + "step": 12843 + }, + { + "epoch": 2.05, + "learning_rate": 3.339745739165206e-05, + "loss": 0.8573, + "step": 12844 + }, + { + "epoch": 2.05, + "learning_rate": 3.3395027346073066e-05, + "loss": 0.7383, + "step": 12845 + }, + { + "epoch": 2.05, + "learning_rate": 3.339259721109213e-05, + "loss": 0.7552, + "step": 12846 + }, + { + "epoch": 2.05, + "learning_rate": 3.3390166986735126e-05, + "loss": 0.7749, + "step": 12847 + }, + { + "epoch": 2.05, + "learning_rate": 3.338773667302795e-05, + "loss": 0.7243, + "step": 12848 + }, + { + "epoch": 2.05, + "learning_rate": 3.338530626999646e-05, + "loss": 0.7528, + "step": 12849 + }, + { + "epoch": 2.05, + "learning_rate": 3.338287577766655e-05, + "loss": 0.724, + "step": 12850 + }, + { + "epoch": 2.05, + "learning_rate": 3.33804451960641e-05, + "loss": 0.7261, + "step": 12851 + }, + { + "epoch": 2.05, + "learning_rate": 3.337801452521499e-05, + "loss": 0.6696, + "step": 12852 + }, + { + "epoch": 2.05, + "learning_rate": 3.337558376514513e-05, + "loss": 0.6985, + "step": 12853 + }, + { + "epoch": 2.05, + "learning_rate": 3.337315291588037e-05, + "loss": 0.7487, + "step": 12854 + }, + { + "epoch": 2.05, + "learning_rate": 3.3370721977446625e-05, + "loss": 0.7163, + "step": 12855 + }, + { + "epoch": 2.05, + "learning_rate": 3.336829094986978e-05, + "loss": 0.7597, + "step": 12856 + }, + { + "epoch": 2.05, + "learning_rate": 3.3365859833175705e-05, + "loss": 0.9108, + "step": 12857 + }, + { + "epoch": 2.05, + "learning_rate": 3.336342862739031e-05, + "loss": 0.8715, + "step": 12858 + }, + { + "epoch": 2.05, + "learning_rate": 3.336099733253947e-05, + "loss": 0.7786, + "step": 12859 + }, + { + "epoch": 2.05, + "learning_rate": 3.335856594864909e-05, + "loss": 0.707, + "step": 12860 + }, + { + "epoch": 2.05, + "learning_rate": 3.335613447574506e-05, + "loss": 0.7524, + "step": 12861 + }, + { + "epoch": 2.05, + "learning_rate": 3.335370291385326e-05, + "loss": 0.7773, + "step": 12862 + }, + { + "epoch": 2.05, + "learning_rate": 3.33512712629996e-05, + "loss": 0.8661, + "step": 12863 + }, + { + "epoch": 2.05, + "learning_rate": 3.3348839523209976e-05, + "loss": 0.8207, + "step": 12864 + }, + { + "epoch": 2.05, + "learning_rate": 3.334640769451027e-05, + "loss": 0.7797, + "step": 12865 + }, + { + "epoch": 2.05, + "learning_rate": 3.33439757769264e-05, + "loss": 0.7869, + "step": 12866 + }, + { + "epoch": 2.05, + "learning_rate": 3.3341543770484246e-05, + "loss": 0.7594, + "step": 12867 + }, + { + "epoch": 2.05, + "learning_rate": 3.3339111675209716e-05, + "loss": 0.7927, + "step": 12868 + }, + { + "epoch": 2.05, + "learning_rate": 3.3336679491128715e-05, + "loss": 0.7538, + "step": 12869 + }, + { + "epoch": 2.05, + "learning_rate": 3.3334247218267135e-05, + "loss": 0.7122, + "step": 12870 + }, + { + "epoch": 2.05, + "learning_rate": 3.333181485665088e-05, + "loss": 0.7642, + "step": 12871 + }, + { + "epoch": 2.05, + "learning_rate": 3.332938240630586e-05, + "loss": 0.7291, + "step": 12872 + }, + { + "epoch": 2.05, + "learning_rate": 3.3326949867257975e-05, + "loss": 0.7412, + "step": 12873 + }, + { + "epoch": 2.05, + "learning_rate": 3.332451723953312e-05, + "loss": 0.8211, + "step": 12874 + }, + { + "epoch": 2.05, + "learning_rate": 3.3322084523157215e-05, + "loss": 0.828, + "step": 12875 + }, + { + "epoch": 2.05, + "learning_rate": 3.331965171815617e-05, + "loss": 0.7943, + "step": 12876 + }, + { + "epoch": 2.05, + "learning_rate": 3.3317218824555874e-05, + "loss": 0.7424, + "step": 12877 + }, + { + "epoch": 2.05, + "learning_rate": 3.3314785842382256e-05, + "loss": 0.7362, + "step": 12878 + }, + { + "epoch": 2.05, + "learning_rate": 3.3312352771661216e-05, + "loss": 0.8065, + "step": 12879 + }, + { + "epoch": 2.05, + "learning_rate": 3.330991961241866e-05, + "loss": 0.8432, + "step": 12880 + }, + { + "epoch": 2.05, + "learning_rate": 3.330748636468051e-05, + "loss": 0.714, + "step": 12881 + }, + { + "epoch": 2.05, + "learning_rate": 3.3305053028472666e-05, + "loss": 0.6747, + "step": 12882 + }, + { + "epoch": 2.05, + "learning_rate": 3.330261960382106e-05, + "loss": 0.7291, + "step": 12883 + }, + { + "epoch": 2.05, + "learning_rate": 3.330018609075159e-05, + "loss": 0.7353, + "step": 12884 + }, + { + "epoch": 2.05, + "learning_rate": 3.3297752489290175e-05, + "loss": 0.7904, + "step": 12885 + }, + { + "epoch": 2.05, + "learning_rate": 3.329531879946274e-05, + "loss": 0.6853, + "step": 12886 + }, + { + "epoch": 2.05, + "learning_rate": 3.329288502129519e-05, + "loss": 0.7498, + "step": 12887 + }, + { + "epoch": 2.05, + "learning_rate": 3.3290451154813454e-05, + "loss": 0.7312, + "step": 12888 + }, + { + "epoch": 2.05, + "learning_rate": 3.328801720004344e-05, + "loss": 0.8026, + "step": 12889 + }, + { + "epoch": 2.05, + "learning_rate": 3.328558315701109e-05, + "loss": 0.6879, + "step": 12890 + }, + { + "epoch": 2.05, + "learning_rate": 3.328314902574229e-05, + "loss": 0.816, + "step": 12891 + }, + { + "epoch": 2.05, + "learning_rate": 3.3280714806262995e-05, + "loss": 0.6937, + "step": 12892 + }, + { + "epoch": 2.05, + "learning_rate": 3.327828049859911e-05, + "loss": 0.766, + "step": 12893 + }, + { + "epoch": 2.05, + "learning_rate": 3.3275846102776555e-05, + "loss": 0.7595, + "step": 12894 + }, + { + "epoch": 2.05, + "learning_rate": 3.3273411618821274e-05, + "loss": 0.7383, + "step": 12895 + }, + { + "epoch": 2.05, + "learning_rate": 3.3270977046759184e-05, + "loss": 0.7453, + "step": 12896 + }, + { + "epoch": 2.05, + "learning_rate": 3.32685423866162e-05, + "loss": 0.7972, + "step": 12897 + }, + { + "epoch": 2.05, + "learning_rate": 3.326610763841826e-05, + "loss": 0.7229, + "step": 12898 + }, + { + "epoch": 2.05, + "learning_rate": 3.3263672802191316e-05, + "loss": 0.7251, + "step": 12899 + }, + { + "epoch": 2.06, + "learning_rate": 3.326123787796125e-05, + "loss": 0.6956, + "step": 12900 + }, + { + "epoch": 2.06, + "learning_rate": 3.325880286575402e-05, + "loss": 0.7881, + "step": 12901 + }, + { + "epoch": 2.06, + "learning_rate": 3.325636776559555e-05, + "loss": 0.7924, + "step": 12902 + }, + { + "epoch": 2.06, + "learning_rate": 3.3253932577511784e-05, + "loss": 0.7811, + "step": 12903 + }, + { + "epoch": 2.06, + "learning_rate": 3.325149730152864e-05, + "loss": 0.7365, + "step": 12904 + }, + { + "epoch": 2.06, + "learning_rate": 3.3249061937672066e-05, + "loss": 0.7016, + "step": 12905 + }, + { + "epoch": 2.06, + "learning_rate": 3.324662648596799e-05, + "loss": 0.8073, + "step": 12906 + }, + { + "epoch": 2.06, + "learning_rate": 3.324419094644234e-05, + "loss": 0.7445, + "step": 12907 + }, + { + "epoch": 2.06, + "learning_rate": 3.324175531912107e-05, + "loss": 0.7546, + "step": 12908 + }, + { + "epoch": 2.06, + "learning_rate": 3.323931960403011e-05, + "loss": 0.7934, + "step": 12909 + }, + { + "epoch": 2.06, + "learning_rate": 3.32368838011954e-05, + "loss": 0.7906, + "step": 12910 + }, + { + "epoch": 2.06, + "learning_rate": 3.323444791064287e-05, + "loss": 0.7806, + "step": 12911 + }, + { + "epoch": 2.06, + "learning_rate": 3.323201193239848e-05, + "loss": 0.7813, + "step": 12912 + }, + { + "epoch": 2.06, + "learning_rate": 3.322957586648815e-05, + "loss": 0.7708, + "step": 12913 + }, + { + "epoch": 2.06, + "learning_rate": 3.322713971293783e-05, + "loss": 0.8722, + "step": 12914 + }, + { + "epoch": 2.06, + "learning_rate": 3.3224703471773474e-05, + "loss": 0.7665, + "step": 12915 + }, + { + "epoch": 2.06, + "learning_rate": 3.3222267143021024e-05, + "loss": 0.7251, + "step": 12916 + }, + { + "epoch": 2.06, + "learning_rate": 3.3219830726706413e-05, + "loss": 0.8463, + "step": 12917 + }, + { + "epoch": 2.06, + "learning_rate": 3.3217394222855603e-05, + "loss": 0.8239, + "step": 12918 + }, + { + "epoch": 2.06, + "learning_rate": 3.321495763149453e-05, + "loss": 0.7918, + "step": 12919 + }, + { + "epoch": 2.06, + "learning_rate": 3.321252095264914e-05, + "loss": 0.7736, + "step": 12920 + }, + { + "epoch": 2.06, + "learning_rate": 3.32100841863454e-05, + "loss": 0.7131, + "step": 12921 + }, + { + "epoch": 2.06, + "learning_rate": 3.320764733260924e-05, + "loss": 0.8123, + "step": 12922 + }, + { + "epoch": 2.06, + "learning_rate": 3.320521039146661e-05, + "loss": 0.7354, + "step": 12923 + }, + { + "epoch": 2.06, + "learning_rate": 3.320277336294349e-05, + "loss": 0.7594, + "step": 12924 + }, + { + "epoch": 2.06, + "learning_rate": 3.32003362470658e-05, + "loss": 0.7797, + "step": 12925 + }, + { + "epoch": 2.06, + "learning_rate": 3.31978990438595e-05, + "loss": 0.7284, + "step": 12926 + }, + { + "epoch": 2.06, + "learning_rate": 3.319546175335057e-05, + "loss": 0.7195, + "step": 12927 + }, + { + "epoch": 2.06, + "learning_rate": 3.319302437556494e-05, + "loss": 0.8067, + "step": 12928 + }, + { + "epoch": 2.06, + "learning_rate": 3.319058691052858e-05, + "loss": 0.7464, + "step": 12929 + }, + { + "epoch": 2.06, + "learning_rate": 3.318814935826744e-05, + "loss": 0.7791, + "step": 12930 + }, + { + "epoch": 2.06, + "learning_rate": 3.318571171880748e-05, + "loss": 0.7654, + "step": 12931 + }, + { + "epoch": 2.06, + "learning_rate": 3.3183273992174656e-05, + "loss": 0.8074, + "step": 12932 + }, + { + "epoch": 2.06, + "learning_rate": 3.3180836178394935e-05, + "loss": 0.7779, + "step": 12933 + }, + { + "epoch": 2.06, + "learning_rate": 3.317839827749427e-05, + "loss": 0.7405, + "step": 12934 + }, + { + "epoch": 2.06, + "learning_rate": 3.317596028949863e-05, + "loss": 0.8094, + "step": 12935 + }, + { + "epoch": 2.06, + "learning_rate": 3.3173522214433984e-05, + "loss": 0.7808, + "step": 12936 + }, + { + "epoch": 2.06, + "learning_rate": 3.317108405232628e-05, + "loss": 0.7716, + "step": 12937 + }, + { + "epoch": 2.06, + "learning_rate": 3.3168645803201495e-05, + "loss": 0.762, + "step": 12938 + }, + { + "epoch": 2.06, + "learning_rate": 3.3166207467085606e-05, + "loss": 0.7334, + "step": 12939 + }, + { + "epoch": 2.06, + "learning_rate": 3.3163769044004545e-05, + "loss": 0.8874, + "step": 12940 + }, + { + "epoch": 2.06, + "learning_rate": 3.316133053398431e-05, + "loss": 0.7166, + "step": 12941 + }, + { + "epoch": 2.06, + "learning_rate": 3.315889193705086e-05, + "loss": 0.7716, + "step": 12942 + }, + { + "epoch": 2.06, + "learning_rate": 3.3156453253230154e-05, + "loss": 0.7083, + "step": 12943 + }, + { + "epoch": 2.06, + "learning_rate": 3.315401448254818e-05, + "loss": 0.756, + "step": 12944 + }, + { + "epoch": 2.06, + "learning_rate": 3.3151575625030905e-05, + "loss": 0.7689, + "step": 12945 + }, + { + "epoch": 2.06, + "learning_rate": 3.314913668070429e-05, + "loss": 0.7247, + "step": 12946 + }, + { + "epoch": 2.06, + "learning_rate": 3.314669764959433e-05, + "loss": 0.6449, + "step": 12947 + }, + { + "epoch": 2.06, + "learning_rate": 3.314425853172697e-05, + "loss": 0.7554, + "step": 12948 + }, + { + "epoch": 2.06, + "learning_rate": 3.314181932712822e-05, + "loss": 0.8276, + "step": 12949 + }, + { + "epoch": 2.06, + "learning_rate": 3.313938003582403e-05, + "loss": 0.7074, + "step": 12950 + }, + { + "epoch": 2.06, + "learning_rate": 3.3136940657840373e-05, + "loss": 0.7936, + "step": 12951 + }, + { + "epoch": 2.06, + "learning_rate": 3.3134501193203244e-05, + "loss": 0.7372, + "step": 12952 + }, + { + "epoch": 2.06, + "learning_rate": 3.313206164193862e-05, + "loss": 0.8131, + "step": 12953 + }, + { + "epoch": 2.06, + "learning_rate": 3.3129622004072476e-05, + "loss": 0.7719, + "step": 12954 + }, + { + "epoch": 2.06, + "learning_rate": 3.3127182279630785e-05, + "loss": 0.7935, + "step": 12955 + }, + { + "epoch": 2.06, + "learning_rate": 3.3124742468639545e-05, + "loss": 0.7785, + "step": 12956 + }, + { + "epoch": 2.06, + "learning_rate": 3.312230257112473e-05, + "loss": 0.75, + "step": 12957 + }, + { + "epoch": 2.06, + "learning_rate": 3.311986258711232e-05, + "loss": 0.764, + "step": 12958 + }, + { + "epoch": 2.06, + "learning_rate": 3.311742251662831e-05, + "loss": 0.7135, + "step": 12959 + }, + { + "epoch": 2.06, + "learning_rate": 3.311498235969867e-05, + "loss": 0.7358, + "step": 12960 + }, + { + "epoch": 2.06, + "learning_rate": 3.31125421163494e-05, + "loss": 0.7061, + "step": 12961 + }, + { + "epoch": 2.06, + "learning_rate": 3.311010178660649e-05, + "loss": 0.7721, + "step": 12962 + }, + { + "epoch": 2.07, + "learning_rate": 3.31076613704959e-05, + "loss": 0.741, + "step": 12963 + }, + { + "epoch": 2.07, + "learning_rate": 3.310522086804365e-05, + "loss": 0.7856, + "step": 12964 + }, + { + "epoch": 2.07, + "learning_rate": 3.310278027927572e-05, + "loss": 0.752, + "step": 12965 + }, + { + "epoch": 2.07, + "learning_rate": 3.310033960421809e-05, + "loss": 0.7808, + "step": 12966 + }, + { + "epoch": 2.07, + "learning_rate": 3.309789884289677e-05, + "loss": 0.8154, + "step": 12967 + }, + { + "epoch": 2.07, + "learning_rate": 3.309545799533774e-05, + "loss": 0.6724, + "step": 12968 + }, + { + "epoch": 2.07, + "learning_rate": 3.3093017061567e-05, + "loss": 0.7674, + "step": 12969 + }, + { + "epoch": 2.07, + "learning_rate": 3.309057604161055e-05, + "loss": 0.765, + "step": 12970 + }, + { + "epoch": 2.07, + "learning_rate": 3.3088134935494363e-05, + "loss": 0.7492, + "step": 12971 + }, + { + "epoch": 2.07, + "learning_rate": 3.308569374324445e-05, + "loss": 0.7227, + "step": 12972 + }, + { + "epoch": 2.07, + "learning_rate": 3.3083252464886814e-05, + "loss": 0.7319, + "step": 12973 + }, + { + "epoch": 2.07, + "learning_rate": 3.308081110044744e-05, + "loss": 0.7852, + "step": 12974 + }, + { + "epoch": 2.07, + "learning_rate": 3.307836964995234e-05, + "loss": 0.7324, + "step": 12975 + }, + { + "epoch": 2.07, + "learning_rate": 3.3075928113427506e-05, + "loss": 0.7749, + "step": 12976 + }, + { + "epoch": 2.07, + "learning_rate": 3.3073486490898943e-05, + "loss": 0.8263, + "step": 12977 + }, + { + "epoch": 2.07, + "learning_rate": 3.307104478239264e-05, + "loss": 0.7343, + "step": 12978 + }, + { + "epoch": 2.07, + "learning_rate": 3.3068602987934624e-05, + "loss": 0.7549, + "step": 12979 + }, + { + "epoch": 2.07, + "learning_rate": 3.3066161107550875e-05, + "loss": 0.715, + "step": 12980 + }, + { + "epoch": 2.07, + "learning_rate": 3.3063719141267415e-05, + "loss": 0.7, + "step": 12981 + }, + { + "epoch": 2.07, + "learning_rate": 3.3061277089110236e-05, + "loss": 0.7449, + "step": 12982 + }, + { + "epoch": 2.07, + "learning_rate": 3.305883495110535e-05, + "loss": 0.7357, + "step": 12983 + }, + { + "epoch": 2.07, + "learning_rate": 3.305639272727877e-05, + "loss": 0.7113, + "step": 12984 + }, + { + "epoch": 2.07, + "learning_rate": 3.305395041765649e-05, + "loss": 0.751, + "step": 12985 + }, + { + "epoch": 2.07, + "learning_rate": 3.3051508022264534e-05, + "loss": 0.7083, + "step": 12986 + }, + { + "epoch": 2.07, + "learning_rate": 3.30490655411289e-05, + "loss": 0.7092, + "step": 12987 + }, + { + "epoch": 2.07, + "learning_rate": 3.304662297427561e-05, + "loss": 0.7123, + "step": 12988 + }, + { + "epoch": 2.07, + "learning_rate": 3.3044180321730676e-05, + "loss": 0.8254, + "step": 12989 + }, + { + "epoch": 2.07, + "learning_rate": 3.3041737583520096e-05, + "loss": 0.7966, + "step": 12990 + }, + { + "epoch": 2.07, + "learning_rate": 3.30392947596699e-05, + "loss": 0.7413, + "step": 12991 + }, + { + "epoch": 2.07, + "learning_rate": 3.303685185020608e-05, + "loss": 0.8155, + "step": 12992 + }, + { + "epoch": 2.07, + "learning_rate": 3.3034408855154684e-05, + "loss": 0.7524, + "step": 12993 + }, + { + "epoch": 2.07, + "learning_rate": 3.303196577454171e-05, + "loss": 0.7035, + "step": 12994 + }, + { + "epoch": 2.07, + "learning_rate": 3.3029522608393164e-05, + "loss": 0.7243, + "step": 12995 + }, + { + "epoch": 2.07, + "learning_rate": 3.302707935673509e-05, + "loss": 0.7214, + "step": 12996 + }, + { + "epoch": 2.07, + "learning_rate": 3.302463601959349e-05, + "loss": 0.773, + "step": 12997 + }, + { + "epoch": 2.07, + "learning_rate": 3.3022192596994386e-05, + "loss": 0.7087, + "step": 12998 + }, + { + "epoch": 2.07, + "learning_rate": 3.30197490889638e-05, + "loss": 0.6969, + "step": 12999 + }, + { + "epoch": 2.07, + "learning_rate": 3.301730549552776e-05, + "loss": 0.7705, + "step": 13000 + }, + { + "epoch": 2.07, + "learning_rate": 3.3014861816712286e-05, + "loss": 0.784, + "step": 13001 + }, + { + "epoch": 2.07, + "learning_rate": 3.30124180525434e-05, + "loss": 0.8151, + "step": 13002 + }, + { + "epoch": 2.07, + "learning_rate": 3.300997420304712e-05, + "loss": 0.6895, + "step": 13003 + }, + { + "epoch": 2.07, + "learning_rate": 3.3007530268249474e-05, + "loss": 0.7237, + "step": 13004 + }, + { + "epoch": 2.07, + "learning_rate": 3.30050862481765e-05, + "loss": 0.7253, + "step": 13005 + }, + { + "epoch": 2.07, + "learning_rate": 3.3002642142854224e-05, + "loss": 0.822, + "step": 13006 + }, + { + "epoch": 2.07, + "learning_rate": 3.3000197952308654e-05, + "loss": 0.7115, + "step": 13007 + }, + { + "epoch": 2.07, + "learning_rate": 3.2997753676565834e-05, + "loss": 0.7829, + "step": 13008 + }, + { + "epoch": 2.07, + "learning_rate": 3.29953093156518e-05, + "loss": 0.8036, + "step": 13009 + }, + { + "epoch": 2.07, + "learning_rate": 3.2992864869592574e-05, + "loss": 0.744, + "step": 13010 + }, + { + "epoch": 2.07, + "learning_rate": 3.299042033841419e-05, + "loss": 0.6986, + "step": 13011 + }, + { + "epoch": 2.07, + "learning_rate": 3.298797572214268e-05, + "loss": 0.7442, + "step": 13012 + }, + { + "epoch": 2.07, + "learning_rate": 3.2985531020804074e-05, + "loss": 0.8237, + "step": 13013 + }, + { + "epoch": 2.07, + "learning_rate": 3.298308623442441e-05, + "loss": 0.7124, + "step": 13014 + }, + { + "epoch": 2.07, + "learning_rate": 3.298064136302973e-05, + "loss": 0.7208, + "step": 13015 + }, + { + "epoch": 2.07, + "learning_rate": 3.297819640664607e-05, + "loss": 0.8903, + "step": 13016 + }, + { + "epoch": 2.07, + "learning_rate": 3.297575136529945e-05, + "loss": 0.7922, + "step": 13017 + }, + { + "epoch": 2.07, + "learning_rate": 3.297330623901592e-05, + "loss": 0.8315, + "step": 13018 + }, + { + "epoch": 2.07, + "learning_rate": 3.297086102782153e-05, + "loss": 0.7319, + "step": 13019 + }, + { + "epoch": 2.07, + "learning_rate": 3.2968415731742306e-05, + "loss": 0.8138, + "step": 13020 + }, + { + "epoch": 2.07, + "learning_rate": 3.2965970350804286e-05, + "loss": 0.8084, + "step": 13021 + }, + { + "epoch": 2.07, + "learning_rate": 3.2963524885033525e-05, + "loss": 0.8276, + "step": 13022 + }, + { + "epoch": 2.07, + "learning_rate": 3.2961079334456056e-05, + "loss": 0.7739, + "step": 13023 + }, + { + "epoch": 2.07, + "learning_rate": 3.295863369909792e-05, + "loss": 0.7712, + "step": 13024 + }, + { + "epoch": 2.08, + "learning_rate": 3.2956187978985175e-05, + "loss": 0.7224, + "step": 13025 + }, + { + "epoch": 2.08, + "learning_rate": 3.295374217414385e-05, + "loss": 0.7771, + "step": 13026 + }, + { + "epoch": 2.08, + "learning_rate": 3.295129628460001e-05, + "loss": 0.7492, + "step": 13027 + }, + { + "epoch": 2.08, + "learning_rate": 3.294885031037969e-05, + "loss": 0.7423, + "step": 13028 + }, + { + "epoch": 2.08, + "learning_rate": 3.2946404251508935e-05, + "loss": 0.7886, + "step": 13029 + }, + { + "epoch": 2.08, + "learning_rate": 3.29439581080138e-05, + "loss": 0.7397, + "step": 13030 + }, + { + "epoch": 2.08, + "learning_rate": 3.2941511879920336e-05, + "loss": 0.7692, + "step": 13031 + }, + { + "epoch": 2.08, + "learning_rate": 3.2939065567254585e-05, + "loss": 0.7585, + "step": 13032 + }, + { + "epoch": 2.08, + "learning_rate": 3.2936619170042614e-05, + "loss": 0.7074, + "step": 13033 + }, + { + "epoch": 2.08, + "learning_rate": 3.293417268831047e-05, + "loss": 0.8242, + "step": 13034 + }, + { + "epoch": 2.08, + "learning_rate": 3.2931726122084186e-05, + "loss": 0.7268, + "step": 13035 + }, + { + "epoch": 2.08, + "learning_rate": 3.2929279471389853e-05, + "loss": 0.7296, + "step": 13036 + }, + { + "epoch": 2.08, + "learning_rate": 3.2926832736253505e-05, + "loss": 0.7734, + "step": 13037 + }, + { + "epoch": 2.08, + "learning_rate": 3.292438591670119e-05, + "loss": 0.7203, + "step": 13038 + }, + { + "epoch": 2.08, + "learning_rate": 3.292193901275899e-05, + "loss": 0.7522, + "step": 13039 + }, + { + "epoch": 2.08, + "learning_rate": 3.291949202445295e-05, + "loss": 0.8236, + "step": 13040 + }, + { + "epoch": 2.08, + "learning_rate": 3.291704495180911e-05, + "loss": 0.8397, + "step": 13041 + }, + { + "epoch": 2.08, + "learning_rate": 3.2914597794853566e-05, + "loss": 0.7241, + "step": 13042 + }, + { + "epoch": 2.08, + "learning_rate": 3.291215055361235e-05, + "loss": 0.7125, + "step": 13043 + }, + { + "epoch": 2.08, + "learning_rate": 3.290970322811153e-05, + "loss": 0.7539, + "step": 13044 + }, + { + "epoch": 2.08, + "learning_rate": 3.290725581837718e-05, + "loss": 0.8305, + "step": 13045 + }, + { + "epoch": 2.08, + "learning_rate": 3.290480832443536e-05, + "loss": 0.8487, + "step": 13046 + }, + { + "epoch": 2.08, + "learning_rate": 3.290236074631212e-05, + "loss": 0.8677, + "step": 13047 + }, + { + "epoch": 2.08, + "learning_rate": 3.2899913084033545e-05, + "loss": 0.7684, + "step": 13048 + }, + { + "epoch": 2.08, + "learning_rate": 3.2897465337625685e-05, + "loss": 0.7583, + "step": 13049 + }, + { + "epoch": 2.08, + "learning_rate": 3.289501750711462e-05, + "loss": 0.7279, + "step": 13050 + }, + { + "epoch": 2.08, + "learning_rate": 3.2892569592526414e-05, + "loss": 0.752, + "step": 13051 + }, + { + "epoch": 2.08, + "learning_rate": 3.2890121593887125e-05, + "loss": 0.8979, + "step": 13052 + }, + { + "epoch": 2.08, + "learning_rate": 3.288767351122284e-05, + "loss": 0.7961, + "step": 13053 + }, + { + "epoch": 2.08, + "learning_rate": 3.288522534455961e-05, + "loss": 0.7705, + "step": 13054 + }, + { + "epoch": 2.08, + "learning_rate": 3.288277709392352e-05, + "loss": 0.7325, + "step": 13055 + }, + { + "epoch": 2.08, + "learning_rate": 3.2880328759340644e-05, + "loss": 0.78, + "step": 13056 + }, + { + "epoch": 2.08, + "learning_rate": 3.287788034083705e-05, + "loss": 0.8153, + "step": 13057 + }, + { + "epoch": 2.08, + "learning_rate": 3.287543183843882e-05, + "loss": 0.7266, + "step": 13058 + }, + { + "epoch": 2.08, + "learning_rate": 3.287298325217201e-05, + "loss": 0.8123, + "step": 13059 + }, + { + "epoch": 2.08, + "learning_rate": 3.2870534582062714e-05, + "loss": 0.7056, + "step": 13060 + }, + { + "epoch": 2.08, + "learning_rate": 3.2868085828137e-05, + "loss": 0.7268, + "step": 13061 + }, + { + "epoch": 2.08, + "learning_rate": 3.286563699042096e-05, + "loss": 0.7957, + "step": 13062 + }, + { + "epoch": 2.08, + "learning_rate": 3.2863188068940657e-05, + "loss": 0.8391, + "step": 13063 + }, + { + "epoch": 2.08, + "learning_rate": 3.2860739063722155e-05, + "loss": 0.7904, + "step": 13064 + }, + { + "epoch": 2.08, + "learning_rate": 3.285828997479158e-05, + "loss": 0.7916, + "step": 13065 + }, + { + "epoch": 2.08, + "learning_rate": 3.285584080217498e-05, + "loss": 0.7704, + "step": 13066 + }, + { + "epoch": 2.08, + "learning_rate": 3.285339154589843e-05, + "loss": 0.7421, + "step": 13067 + }, + { + "epoch": 2.08, + "learning_rate": 3.285094220598805e-05, + "loss": 0.7472, + "step": 13068 + }, + { + "epoch": 2.08, + "learning_rate": 3.284849278246989e-05, + "loss": 0.7621, + "step": 13069 + }, + { + "epoch": 2.08, + "learning_rate": 3.2846043275370046e-05, + "loss": 0.766, + "step": 13070 + }, + { + "epoch": 2.08, + "learning_rate": 3.284359368471461e-05, + "loss": 0.779, + "step": 13071 + }, + { + "epoch": 2.08, + "learning_rate": 3.284114401052966e-05, + "loss": 0.8148, + "step": 13072 + }, + { + "epoch": 2.08, + "learning_rate": 3.2838694252841284e-05, + "loss": 0.8035, + "step": 13073 + }, + { + "epoch": 2.08, + "learning_rate": 3.283624441167558e-05, + "loss": 0.734, + "step": 13074 + }, + { + "epoch": 2.08, + "learning_rate": 3.283379448705863e-05, + "loss": 0.7408, + "step": 13075 + }, + { + "epoch": 2.08, + "learning_rate": 3.283134447901652e-05, + "loss": 0.7761, + "step": 13076 + }, + { + "epoch": 2.08, + "learning_rate": 3.282889438757535e-05, + "loss": 0.7279, + "step": 13077 + }, + { + "epoch": 2.08, + "learning_rate": 3.282644421276121e-05, + "loss": 0.7839, + "step": 13078 + }, + { + "epoch": 2.08, + "learning_rate": 3.282399395460017e-05, + "loss": 0.7856, + "step": 13079 + }, + { + "epoch": 2.08, + "learning_rate": 3.282154361311837e-05, + "loss": 0.8303, + "step": 13080 + }, + { + "epoch": 2.08, + "learning_rate": 3.281909318834187e-05, + "loss": 0.6955, + "step": 13081 + }, + { + "epoch": 2.08, + "learning_rate": 3.281664268029677e-05, + "loss": 0.7324, + "step": 13082 + }, + { + "epoch": 2.08, + "learning_rate": 3.281419208900918e-05, + "loss": 0.7239, + "step": 13083 + }, + { + "epoch": 2.08, + "learning_rate": 3.281174141450518e-05, + "loss": 0.7271, + "step": 13084 + }, + { + "epoch": 2.08, + "learning_rate": 3.280929065681088e-05, + "loss": 0.7534, + "step": 13085 + }, + { + "epoch": 2.08, + "learning_rate": 3.280683981595239e-05, + "loss": 0.7541, + "step": 13086 + }, + { + "epoch": 2.08, + "learning_rate": 3.280438889195577e-05, + "loss": 0.7673, + "step": 13087 + }, + { + "epoch": 2.09, + "learning_rate": 3.280193788484716e-05, + "loss": 0.7794, + "step": 13088 + }, + { + "epoch": 2.09, + "learning_rate": 3.2799486794652645e-05, + "loss": 0.7129, + "step": 13089 + }, + { + "epoch": 2.09, + "learning_rate": 3.279703562139833e-05, + "loss": 0.7008, + "step": 13090 + }, + { + "epoch": 2.09, + "learning_rate": 3.279458436511033e-05, + "loss": 0.7807, + "step": 13091 + }, + { + "epoch": 2.09, + "learning_rate": 3.279213302581473e-05, + "loss": 0.7843, + "step": 13092 + }, + { + "epoch": 2.09, + "learning_rate": 3.2789681603537636e-05, + "loss": 0.6935, + "step": 13093 + }, + { + "epoch": 2.09, + "learning_rate": 3.278723009830518e-05, + "loss": 0.8581, + "step": 13094 + }, + { + "epoch": 2.09, + "learning_rate": 3.278477851014344e-05, + "loss": 0.6998, + "step": 13095 + }, + { + "epoch": 2.09, + "learning_rate": 3.278232683907853e-05, + "loss": 0.7864, + "step": 13096 + }, + { + "epoch": 2.09, + "learning_rate": 3.277987508513657e-05, + "loss": 0.7553, + "step": 13097 + }, + { + "epoch": 2.09, + "learning_rate": 3.277742324834366e-05, + "loss": 0.7376, + "step": 13098 + }, + { + "epoch": 2.09, + "learning_rate": 3.277497132872592e-05, + "loss": 0.8398, + "step": 13099 + }, + { + "epoch": 2.09, + "learning_rate": 3.277251932630946e-05, + "loss": 0.8127, + "step": 13100 + }, + { + "epoch": 2.09, + "learning_rate": 3.2770067241120386e-05, + "loss": 0.7121, + "step": 13101 + }, + { + "epoch": 2.09, + "learning_rate": 3.27676150731848e-05, + "loss": 0.7642, + "step": 13102 + }, + { + "epoch": 2.09, + "learning_rate": 3.2765162822528845e-05, + "loss": 0.8022, + "step": 13103 + }, + { + "epoch": 2.09, + "learning_rate": 3.276271048917861e-05, + "loss": 0.745, + "step": 13104 + }, + { + "epoch": 2.09, + "learning_rate": 3.2760258073160225e-05, + "loss": 0.7952, + "step": 13105 + }, + { + "epoch": 2.09, + "learning_rate": 3.2757805574499805e-05, + "loss": 0.7586, + "step": 13106 + }, + { + "epoch": 2.09, + "learning_rate": 3.275535299322346e-05, + "loss": 0.7679, + "step": 13107 + }, + { + "epoch": 2.09, + "learning_rate": 3.275290032935732e-05, + "loss": 0.832, + "step": 13108 + }, + { + "epoch": 2.09, + "learning_rate": 3.27504475829275e-05, + "loss": 0.7422, + "step": 13109 + }, + { + "epoch": 2.09, + "learning_rate": 3.27479947539601e-05, + "loss": 0.7568, + "step": 13110 + }, + { + "epoch": 2.09, + "learning_rate": 3.274554184248129e-05, + "loss": 0.7323, + "step": 13111 + }, + { + "epoch": 2.09, + "learning_rate": 3.274308884851715e-05, + "loss": 0.8324, + "step": 13112 + }, + { + "epoch": 2.09, + "learning_rate": 3.27406357720938e-05, + "loss": 0.7964, + "step": 13113 + }, + { + "epoch": 2.09, + "learning_rate": 3.273818261323739e-05, + "loss": 0.755, + "step": 13114 + }, + { + "epoch": 2.09, + "learning_rate": 3.2735729371974036e-05, + "loss": 0.7209, + "step": 13115 + }, + { + "epoch": 2.09, + "learning_rate": 3.273327604832985e-05, + "loss": 0.7474, + "step": 13116 + }, + { + "epoch": 2.09, + "learning_rate": 3.2730822642330985e-05, + "loss": 0.7356, + "step": 13117 + }, + { + "epoch": 2.09, + "learning_rate": 3.272836915400354e-05, + "loss": 0.7479, + "step": 13118 + }, + { + "epoch": 2.09, + "learning_rate": 3.272591558337365e-05, + "loss": 0.7931, + "step": 13119 + }, + { + "epoch": 2.09, + "learning_rate": 3.272346193046747e-05, + "loss": 0.7699, + "step": 13120 + }, + { + "epoch": 2.09, + "learning_rate": 3.2721008195311096e-05, + "loss": 0.7258, + "step": 13121 + }, + { + "epoch": 2.09, + "learning_rate": 3.2718554377930675e-05, + "loss": 0.787, + "step": 13122 + }, + { + "epoch": 2.09, + "learning_rate": 3.271610047835233e-05, + "loss": 0.7451, + "step": 13123 + }, + { + "epoch": 2.09, + "learning_rate": 3.2713646496602214e-05, + "loss": 0.7069, + "step": 13124 + }, + { + "epoch": 2.09, + "learning_rate": 3.2711192432706426e-05, + "loss": 0.7965, + "step": 13125 + }, + { + "epoch": 2.09, + "learning_rate": 3.270873828669113e-05, + "loss": 0.7271, + "step": 13126 + }, + { + "epoch": 2.09, + "learning_rate": 3.270628405858246e-05, + "loss": 0.748, + "step": 13127 + }, + { + "epoch": 2.09, + "learning_rate": 3.270382974840653e-05, + "loss": 0.7097, + "step": 13128 + }, + { + "epoch": 2.09, + "learning_rate": 3.2701375356189494e-05, + "loss": 0.7222, + "step": 13129 + }, + { + "epoch": 2.09, + "learning_rate": 3.269892088195749e-05, + "loss": 0.8656, + "step": 13130 + }, + { + "epoch": 2.09, + "learning_rate": 3.269646632573665e-05, + "loss": 0.7913, + "step": 13131 + }, + { + "epoch": 2.09, + "learning_rate": 3.269401168755312e-05, + "loss": 0.7402, + "step": 13132 + }, + { + "epoch": 2.09, + "learning_rate": 3.269155696743303e-05, + "loss": 0.775, + "step": 13133 + }, + { + "epoch": 2.09, + "learning_rate": 3.268910216540253e-05, + "loss": 0.7062, + "step": 13134 + }, + { + "epoch": 2.09, + "learning_rate": 3.268664728148777e-05, + "loss": 0.7563, + "step": 13135 + }, + { + "epoch": 2.09, + "learning_rate": 3.268419231571487e-05, + "loss": 0.804, + "step": 13136 + }, + { + "epoch": 2.09, + "learning_rate": 3.2681737268109994e-05, + "loss": 0.7303, + "step": 13137 + }, + { + "epoch": 2.09, + "learning_rate": 3.2679282138699275e-05, + "loss": 0.7161, + "step": 13138 + }, + { + "epoch": 2.09, + "learning_rate": 3.2676826927508866e-05, + "loss": 0.6834, + "step": 13139 + }, + { + "epoch": 2.09, + "learning_rate": 3.267437163456491e-05, + "loss": 0.7202, + "step": 13140 + }, + { + "epoch": 2.09, + "learning_rate": 3.2671916259893566e-05, + "loss": 0.7371, + "step": 13141 + }, + { + "epoch": 2.09, + "learning_rate": 3.266946080352095e-05, + "loss": 0.7537, + "step": 13142 + }, + { + "epoch": 2.09, + "learning_rate": 3.266700526547325e-05, + "loss": 0.8231, + "step": 13143 + }, + { + "epoch": 2.09, + "learning_rate": 3.266454964577659e-05, + "loss": 0.7966, + "step": 13144 + }, + { + "epoch": 2.09, + "learning_rate": 3.2662093944457126e-05, + "loss": 0.7594, + "step": 13145 + }, + { + "epoch": 2.09, + "learning_rate": 3.265963816154103e-05, + "loss": 0.7361, + "step": 13146 + }, + { + "epoch": 2.09, + "learning_rate": 3.265718229705442e-05, + "loss": 0.7817, + "step": 13147 + }, + { + "epoch": 2.09, + "learning_rate": 3.265472635102348e-05, + "loss": 0.6921, + "step": 13148 + }, + { + "epoch": 2.09, + "learning_rate": 3.2652270323474344e-05, + "loss": 0.7476, + "step": 13149 + }, + { + "epoch": 2.09, + "learning_rate": 3.2649814214433186e-05, + "loss": 0.8218, + "step": 13150 + }, + { + "epoch": 2.1, + "learning_rate": 3.264735802392614e-05, + "loss": 0.771, + "step": 13151 + }, + { + "epoch": 2.1, + "learning_rate": 3.264490175197937e-05, + "loss": 0.7357, + "step": 13152 + }, + { + "epoch": 2.1, + "learning_rate": 3.2642445398619046e-05, + "loss": 0.7728, + "step": 13153 + }, + { + "epoch": 2.1, + "learning_rate": 3.2639988963871316e-05, + "loss": 0.7714, + "step": 13154 + }, + { + "epoch": 2.1, + "learning_rate": 3.263753244776234e-05, + "loss": 0.8112, + "step": 13155 + }, + { + "epoch": 2.1, + "learning_rate": 3.263507585031828e-05, + "loss": 0.7395, + "step": 13156 + }, + { + "epoch": 2.1, + "learning_rate": 3.2632619171565306e-05, + "loss": 0.7727, + "step": 13157 + }, + { + "epoch": 2.1, + "learning_rate": 3.263016241152956e-05, + "loss": 0.6803, + "step": 13158 + }, + { + "epoch": 2.1, + "learning_rate": 3.262770557023722e-05, + "loss": 0.6808, + "step": 13159 + }, + { + "epoch": 2.1, + "learning_rate": 3.262524864771446e-05, + "loss": 0.7104, + "step": 13160 + }, + { + "epoch": 2.1, + "learning_rate": 3.2622791643987424e-05, + "loss": 0.8438, + "step": 13161 + }, + { + "epoch": 2.1, + "learning_rate": 3.262033455908228e-05, + "loss": 0.6698, + "step": 13162 + }, + { + "epoch": 2.1, + "learning_rate": 3.26178773930252e-05, + "loss": 0.759, + "step": 13163 + }, + { + "epoch": 2.1, + "learning_rate": 3.261542014584235e-05, + "loss": 0.7154, + "step": 13164 + }, + { + "epoch": 2.1, + "learning_rate": 3.26129628175599e-05, + "loss": 0.7336, + "step": 13165 + }, + { + "epoch": 2.1, + "learning_rate": 3.261050540820402e-05, + "loss": 0.6807, + "step": 13166 + }, + { + "epoch": 2.1, + "learning_rate": 3.260804791780087e-05, + "loss": 0.6556, + "step": 13167 + }, + { + "epoch": 2.1, + "learning_rate": 3.260559034637664e-05, + "loss": 0.8005, + "step": 13168 + }, + { + "epoch": 2.1, + "learning_rate": 3.260313269395748e-05, + "loss": 0.7429, + "step": 13169 + }, + { + "epoch": 2.1, + "learning_rate": 3.260067496056958e-05, + "loss": 0.7418, + "step": 13170 + }, + { + "epoch": 2.1, + "learning_rate": 3.25982171462391e-05, + "loss": 0.7202, + "step": 13171 + }, + { + "epoch": 2.1, + "learning_rate": 3.259575925099223e-05, + "loss": 0.7217, + "step": 13172 + }, + { + "epoch": 2.1, + "learning_rate": 3.2593301274855125e-05, + "loss": 0.8154, + "step": 13173 + }, + { + "epoch": 2.1, + "learning_rate": 3.259084321785398e-05, + "loss": 0.805, + "step": 13174 + }, + { + "epoch": 2.1, + "learning_rate": 3.258838508001496e-05, + "loss": 0.8665, + "step": 13175 + }, + { + "epoch": 2.1, + "learning_rate": 3.2585926861364244e-05, + "loss": 0.7101, + "step": 13176 + }, + { + "epoch": 2.1, + "learning_rate": 3.258346856192801e-05, + "loss": 0.8368, + "step": 13177 + }, + { + "epoch": 2.1, + "learning_rate": 3.258101018173245e-05, + "loss": 0.7698, + "step": 13178 + }, + { + "epoch": 2.1, + "learning_rate": 3.2578551720803726e-05, + "loss": 0.668, + "step": 13179 + }, + { + "epoch": 2.1, + "learning_rate": 3.257609317916802e-05, + "loss": 0.8669, + "step": 13180 + }, + { + "epoch": 2.1, + "learning_rate": 3.257363455685154e-05, + "loss": 0.7481, + "step": 13181 + }, + { + "epoch": 2.1, + "learning_rate": 3.257117585388044e-05, + "loss": 0.7241, + "step": 13182 + }, + { + "epoch": 2.1, + "learning_rate": 3.256871707028091e-05, + "loss": 0.7107, + "step": 13183 + }, + { + "epoch": 2.1, + "learning_rate": 3.256625820607915e-05, + "loss": 0.7298, + "step": 13184 + }, + { + "epoch": 2.1, + "learning_rate": 3.256379926130132e-05, + "loss": 0.748, + "step": 13185 + }, + { + "epoch": 2.1, + "learning_rate": 3.256134023597363e-05, + "loss": 0.7853, + "step": 13186 + }, + { + "epoch": 2.1, + "learning_rate": 3.255888113012225e-05, + "loss": 0.7376, + "step": 13187 + }, + { + "epoch": 2.1, + "learning_rate": 3.2556421943773376e-05, + "loss": 0.752, + "step": 13188 + }, + { + "epoch": 2.1, + "learning_rate": 3.25539626769532e-05, + "loss": 0.8158, + "step": 13189 + }, + { + "epoch": 2.1, + "learning_rate": 3.2551503329687906e-05, + "loss": 0.7299, + "step": 13190 + }, + { + "epoch": 2.1, + "learning_rate": 3.2549043902003685e-05, + "loss": 0.8049, + "step": 13191 + }, + { + "epoch": 2.1, + "learning_rate": 3.254658439392673e-05, + "loss": 0.7281, + "step": 13192 + }, + { + "epoch": 2.1, + "learning_rate": 3.2544124805483235e-05, + "loss": 0.7092, + "step": 13193 + }, + { + "epoch": 2.1, + "learning_rate": 3.2541665136699386e-05, + "loss": 0.7588, + "step": 13194 + }, + { + "epoch": 2.1, + "learning_rate": 3.2539205387601384e-05, + "loss": 0.7733, + "step": 13195 + }, + { + "epoch": 2.1, + "learning_rate": 3.253674555821543e-05, + "loss": 0.7871, + "step": 13196 + }, + { + "epoch": 2.1, + "learning_rate": 3.25342856485677e-05, + "loss": 0.7618, + "step": 13197 + }, + { + "epoch": 2.1, + "learning_rate": 3.25318256586844e-05, + "loss": 0.8669, + "step": 13198 + }, + { + "epoch": 2.1, + "learning_rate": 3.252936558859174e-05, + "loss": 0.7241, + "step": 13199 + }, + { + "epoch": 2.1, + "learning_rate": 3.25269054383159e-05, + "loss": 0.8146, + "step": 13200 + }, + { + "epoch": 2.1, + "learning_rate": 3.2524445207883095e-05, + "loss": 0.7548, + "step": 13201 + }, + { + "epoch": 2.1, + "learning_rate": 3.2521984897319505e-05, + "loss": 0.7626, + "step": 13202 + }, + { + "epoch": 2.1, + "learning_rate": 3.251952450665135e-05, + "loss": 0.7828, + "step": 13203 + }, + { + "epoch": 2.1, + "learning_rate": 3.2517064035904834e-05, + "loss": 0.8038, + "step": 13204 + }, + { + "epoch": 2.1, + "learning_rate": 3.2514603485106133e-05, + "loss": 0.7382, + "step": 13205 + }, + { + "epoch": 2.1, + "learning_rate": 3.2512142854281474e-05, + "loss": 0.7088, + "step": 13206 + }, + { + "epoch": 2.1, + "learning_rate": 3.250968214345705e-05, + "loss": 0.7887, + "step": 13207 + }, + { + "epoch": 2.1, + "learning_rate": 3.2507221352659074e-05, + "loss": 0.7375, + "step": 13208 + }, + { + "epoch": 2.1, + "learning_rate": 3.250476048191375e-05, + "loss": 0.8002, + "step": 13209 + }, + { + "epoch": 2.1, + "learning_rate": 3.250229953124728e-05, + "loss": 0.7976, + "step": 13210 + }, + { + "epoch": 2.1, + "learning_rate": 3.249983850068588e-05, + "loss": 0.7891, + "step": 13211 + }, + { + "epoch": 2.1, + "learning_rate": 3.249737739025575e-05, + "loss": 0.7523, + "step": 13212 + }, + { + "epoch": 2.1, + "learning_rate": 3.249491619998311e-05, + "loss": 0.7226, + "step": 13213 + }, + { + "epoch": 2.11, + "learning_rate": 3.2492454929894144e-05, + "loss": 0.7664, + "step": 13214 + }, + { + "epoch": 2.11, + "learning_rate": 3.24899935800151e-05, + "loss": 0.7156, + "step": 13215 + }, + { + "epoch": 2.11, + "learning_rate": 3.2487532150372164e-05, + "loss": 0.7425, + "step": 13216 + }, + { + "epoch": 2.11, + "learning_rate": 3.2485070640991556e-05, + "loss": 0.76, + "step": 13217 + }, + { + "epoch": 2.11, + "learning_rate": 3.248260905189949e-05, + "loss": 0.7078, + "step": 13218 + }, + { + "epoch": 2.11, + "learning_rate": 3.248014738312219e-05, + "loss": 0.7202, + "step": 13219 + }, + { + "epoch": 2.11, + "learning_rate": 3.247768563468584e-05, + "loss": 0.7562, + "step": 13220 + }, + { + "epoch": 2.11, + "learning_rate": 3.2475223806616696e-05, + "loss": 0.9522, + "step": 13221 + }, + { + "epoch": 2.11, + "learning_rate": 3.2472761898940954e-05, + "loss": 0.826, + "step": 13222 + }, + { + "epoch": 2.11, + "learning_rate": 3.247029991168483e-05, + "loss": 0.7995, + "step": 13223 + }, + { + "epoch": 2.11, + "learning_rate": 3.246783784487455e-05, + "loss": 0.7206, + "step": 13224 + }, + { + "epoch": 2.11, + "learning_rate": 3.246537569853633e-05, + "loss": 0.7821, + "step": 13225 + }, + { + "epoch": 2.11, + "learning_rate": 3.2462913472696385e-05, + "loss": 0.8053, + "step": 13226 + }, + { + "epoch": 2.11, + "learning_rate": 3.246045116738095e-05, + "loss": 0.7957, + "step": 13227 + }, + { + "epoch": 2.11, + "learning_rate": 3.2457988782616234e-05, + "loss": 0.7975, + "step": 13228 + }, + { + "epoch": 2.11, + "learning_rate": 3.2455526318428476e-05, + "loss": 0.6692, + "step": 13229 + }, + { + "epoch": 2.11, + "learning_rate": 3.245306377484388e-05, + "loss": 0.7843, + "step": 13230 + }, + { + "epoch": 2.11, + "learning_rate": 3.245060115188868e-05, + "loss": 0.7704, + "step": 13231 + }, + { + "epoch": 2.11, + "learning_rate": 3.2448138449589106e-05, + "loss": 0.7559, + "step": 13232 + }, + { + "epoch": 2.11, + "learning_rate": 3.244567566797138e-05, + "loss": 0.7468, + "step": 13233 + }, + { + "epoch": 2.11, + "learning_rate": 3.244321280706172e-05, + "loss": 0.7631, + "step": 13234 + }, + { + "epoch": 2.11, + "learning_rate": 3.244074986688637e-05, + "loss": 0.7012, + "step": 13235 + }, + { + "epoch": 2.11, + "learning_rate": 3.243828684747155e-05, + "loss": 0.7312, + "step": 13236 + }, + { + "epoch": 2.11, + "learning_rate": 3.243582374884349e-05, + "loss": 0.7112, + "step": 13237 + }, + { + "epoch": 2.11, + "learning_rate": 3.243336057102842e-05, + "loss": 0.7817, + "step": 13238 + }, + { + "epoch": 2.11, + "learning_rate": 3.2430897314052576e-05, + "loss": 0.7118, + "step": 13239 + }, + { + "epoch": 2.11, + "learning_rate": 3.242843397794218e-05, + "loss": 0.7326, + "step": 13240 + }, + { + "epoch": 2.11, + "learning_rate": 3.242597056272348e-05, + "loss": 0.7824, + "step": 13241 + }, + { + "epoch": 2.11, + "learning_rate": 3.24235070684227e-05, + "loss": 0.8368, + "step": 13242 + }, + { + "epoch": 2.11, + "learning_rate": 3.2421043495066076e-05, + "loss": 0.7021, + "step": 13243 + }, + { + "epoch": 2.11, + "learning_rate": 3.2418579842679846e-05, + "loss": 0.7477, + "step": 13244 + }, + { + "epoch": 2.11, + "learning_rate": 3.241611611129024e-05, + "loss": 0.948, + "step": 13245 + }, + { + "epoch": 2.11, + "learning_rate": 3.2413652300923504e-05, + "loss": 0.7394, + "step": 13246 + }, + { + "epoch": 2.11, + "learning_rate": 3.241118841160587e-05, + "loss": 0.7742, + "step": 13247 + }, + { + "epoch": 2.11, + "learning_rate": 3.240872444336358e-05, + "loss": 0.6848, + "step": 13248 + }, + { + "epoch": 2.11, + "learning_rate": 3.240626039622286e-05, + "loss": 0.7305, + "step": 13249 + }, + { + "epoch": 2.11, + "learning_rate": 3.240379627020999e-05, + "loss": 0.7967, + "step": 13250 + }, + { + "epoch": 2.11, + "learning_rate": 3.2401332065351165e-05, + "loss": 0.725, + "step": 13251 + }, + { + "epoch": 2.11, + "learning_rate": 3.2398867781672655e-05, + "loss": 0.7161, + "step": 13252 + }, + { + "epoch": 2.11, + "learning_rate": 3.2396403419200694e-05, + "loss": 0.8128, + "step": 13253 + }, + { + "epoch": 2.11, + "learning_rate": 3.239393897796152e-05, + "loss": 0.7507, + "step": 13254 + }, + { + "epoch": 2.11, + "learning_rate": 3.239147445798139e-05, + "loss": 0.7693, + "step": 13255 + }, + { + "epoch": 2.11, + "learning_rate": 3.238900985928655e-05, + "loss": 0.7774, + "step": 13256 + }, + { + "epoch": 2.11, + "learning_rate": 3.238654518190323e-05, + "loss": 0.753, + "step": 13257 + }, + { + "epoch": 2.11, + "learning_rate": 3.2384080425857685e-05, + "loss": 0.7214, + "step": 13258 + }, + { + "epoch": 2.11, + "learning_rate": 3.2381615591176186e-05, + "loss": 0.8538, + "step": 13259 + }, + { + "epoch": 2.11, + "learning_rate": 3.237915067788495e-05, + "loss": 0.7767, + "step": 13260 + }, + { + "epoch": 2.11, + "learning_rate": 3.237668568601023e-05, + "loss": 0.7686, + "step": 13261 + }, + { + "epoch": 2.11, + "learning_rate": 3.237422061557831e-05, + "loss": 0.7125, + "step": 13262 + }, + { + "epoch": 2.11, + "learning_rate": 3.23717554666154e-05, + "loss": 0.6931, + "step": 13263 + }, + { + "epoch": 2.11, + "learning_rate": 3.2369290239147773e-05, + "loss": 0.7557, + "step": 13264 + }, + { + "epoch": 2.11, + "learning_rate": 3.2366824933201686e-05, + "loss": 0.8071, + "step": 13265 + }, + { + "epoch": 2.11, + "learning_rate": 3.2364359548803366e-05, + "loss": 0.673, + "step": 13266 + }, + { + "epoch": 2.11, + "learning_rate": 3.2361894085979105e-05, + "loss": 0.7178, + "step": 13267 + }, + { + "epoch": 2.11, + "learning_rate": 3.235942854475514e-05, + "loss": 0.8147, + "step": 13268 + }, + { + "epoch": 2.11, + "learning_rate": 3.235696292515772e-05, + "loss": 0.7925, + "step": 13269 + }, + { + "epoch": 2.11, + "learning_rate": 3.235449722721313e-05, + "loss": 0.8009, + "step": 13270 + }, + { + "epoch": 2.11, + "learning_rate": 3.23520314509476e-05, + "loss": 0.7913, + "step": 13271 + }, + { + "epoch": 2.11, + "learning_rate": 3.234956559638739e-05, + "loss": 0.8234, + "step": 13272 + }, + { + "epoch": 2.11, + "learning_rate": 3.234709966355878e-05, + "loss": 0.7285, + "step": 13273 + }, + { + "epoch": 2.11, + "learning_rate": 3.234463365248801e-05, + "loss": 0.7566, + "step": 13274 + }, + { + "epoch": 2.11, + "learning_rate": 3.2342167563201354e-05, + "loss": 0.7122, + "step": 13275 + }, + { + "epoch": 2.12, + "learning_rate": 3.233970139572507e-05, + "loss": 0.8593, + "step": 13276 + }, + { + "epoch": 2.12, + "learning_rate": 3.2337235150085424e-05, + "loss": 0.8739, + "step": 13277 + }, + { + "epoch": 2.12, + "learning_rate": 3.233476882630868e-05, + "loss": 0.7594, + "step": 13278 + }, + { + "epoch": 2.12, + "learning_rate": 3.23323024244211e-05, + "loss": 0.7813, + "step": 13279 + }, + { + "epoch": 2.12, + "learning_rate": 3.232983594444895e-05, + "loss": 0.7594, + "step": 13280 + }, + { + "epoch": 2.12, + "learning_rate": 3.232736938641849e-05, + "loss": 0.6715, + "step": 13281 + }, + { + "epoch": 2.12, + "learning_rate": 3.232490275035601e-05, + "loss": 0.7113, + "step": 13282 + }, + { + "epoch": 2.12, + "learning_rate": 3.232243603628775e-05, + "loss": 0.8449, + "step": 13283 + }, + { + "epoch": 2.12, + "learning_rate": 3.231996924424e-05, + "loss": 0.8177, + "step": 13284 + }, + { + "epoch": 2.12, + "learning_rate": 3.231750237423902e-05, + "loss": 0.7591, + "step": 13285 + }, + { + "epoch": 2.12, + "learning_rate": 3.231503542631108e-05, + "loss": 0.8053, + "step": 13286 + }, + { + "epoch": 2.12, + "learning_rate": 3.231256840048245e-05, + "loss": 0.7764, + "step": 13287 + }, + { + "epoch": 2.12, + "learning_rate": 3.2310101296779417e-05, + "loss": 0.7926, + "step": 13288 + }, + { + "epoch": 2.12, + "learning_rate": 3.230763411522823e-05, + "loss": 0.7404, + "step": 13289 + }, + { + "epoch": 2.12, + "learning_rate": 3.2305166855855186e-05, + "loss": 0.7237, + "step": 13290 + }, + { + "epoch": 2.12, + "learning_rate": 3.230269951868654e-05, + "loss": 0.7943, + "step": 13291 + }, + { + "epoch": 2.12, + "learning_rate": 3.230023210374859e-05, + "loss": 0.8383, + "step": 13292 + }, + { + "epoch": 2.12, + "learning_rate": 3.2297764611067596e-05, + "loss": 0.7775, + "step": 13293 + }, + { + "epoch": 2.12, + "learning_rate": 3.2295297040669834e-05, + "loss": 0.756, + "step": 13294 + }, + { + "epoch": 2.12, + "learning_rate": 3.229282939258159e-05, + "loss": 0.7945, + "step": 13295 + }, + { + "epoch": 2.12, + "learning_rate": 3.2290361666829146e-05, + "loss": 0.7288, + "step": 13296 + }, + { + "epoch": 2.12, + "learning_rate": 3.228789386343877e-05, + "loss": 0.7431, + "step": 13297 + }, + { + "epoch": 2.12, + "learning_rate": 3.228542598243675e-05, + "loss": 0.8122, + "step": 13298 + }, + { + "epoch": 2.12, + "learning_rate": 3.228295802384937e-05, + "loss": 0.7757, + "step": 13299 + }, + { + "epoch": 2.12, + "learning_rate": 3.2280489987702914e-05, + "loss": 0.7838, + "step": 13300 + }, + { + "epoch": 2.12, + "learning_rate": 3.227802187402365e-05, + "loss": 0.6996, + "step": 13301 + }, + { + "epoch": 2.12, + "learning_rate": 3.227555368283787e-05, + "loss": 0.8053, + "step": 13302 + }, + { + "epoch": 2.12, + "learning_rate": 3.2273085414171864e-05, + "loss": 0.799, + "step": 13303 + }, + { + "epoch": 2.12, + "learning_rate": 3.227061706805191e-05, + "loss": 0.742, + "step": 13304 + }, + { + "epoch": 2.12, + "learning_rate": 3.226814864450431e-05, + "loss": 0.8162, + "step": 13305 + }, + { + "epoch": 2.12, + "learning_rate": 3.2265680143555324e-05, + "loss": 0.7152, + "step": 13306 + }, + { + "epoch": 2.12, + "learning_rate": 3.226321156523127e-05, + "loss": 0.7341, + "step": 13307 + }, + { + "epoch": 2.12, + "learning_rate": 3.226074290955841e-05, + "loss": 0.7173, + "step": 13308 + }, + { + "epoch": 2.12, + "learning_rate": 3.2258274176563054e-05, + "loss": 0.7105, + "step": 13309 + }, + { + "epoch": 2.12, + "learning_rate": 3.225580536627148e-05, + "loss": 0.7012, + "step": 13310 + }, + { + "epoch": 2.12, + "learning_rate": 3.225333647870999e-05, + "loss": 0.7123, + "step": 13311 + }, + { + "epoch": 2.12, + "learning_rate": 3.225086751390487e-05, + "loss": 0.7228, + "step": 13312 + }, + { + "epoch": 2.12, + "learning_rate": 3.224839847188241e-05, + "loss": 0.8213, + "step": 13313 + }, + { + "epoch": 2.12, + "learning_rate": 3.2245929352668905e-05, + "loss": 0.7063, + "step": 13314 + }, + { + "epoch": 2.12, + "learning_rate": 3.224346015629065e-05, + "loss": 0.7193, + "step": 13315 + }, + { + "epoch": 2.12, + "learning_rate": 3.224099088277395e-05, + "loss": 0.7351, + "step": 13316 + }, + { + "epoch": 2.12, + "learning_rate": 3.2238521532145085e-05, + "loss": 0.766, + "step": 13317 + }, + { + "epoch": 2.12, + "learning_rate": 3.2236052104430357e-05, + "loss": 0.7189, + "step": 13318 + }, + { + "epoch": 2.12, + "learning_rate": 3.223358259965608e-05, + "loss": 0.8084, + "step": 13319 + }, + { + "epoch": 2.12, + "learning_rate": 3.2231113017848535e-05, + "loss": 0.7107, + "step": 13320 + }, + { + "epoch": 2.12, + "learning_rate": 3.222864335903402e-05, + "loss": 0.7652, + "step": 13321 + }, + { + "epoch": 2.12, + "learning_rate": 3.2226173623238846e-05, + "loss": 0.738, + "step": 13322 + }, + { + "epoch": 2.12, + "learning_rate": 3.222370381048931e-05, + "loss": 0.7947, + "step": 13323 + }, + { + "epoch": 2.12, + "learning_rate": 3.222123392081172e-05, + "loss": 0.8369, + "step": 13324 + }, + { + "epoch": 2.12, + "learning_rate": 3.2218763954232376e-05, + "loss": 0.8265, + "step": 13325 + }, + { + "epoch": 2.12, + "learning_rate": 3.221629391077756e-05, + "loss": 0.7654, + "step": 13326 + }, + { + "epoch": 2.12, + "learning_rate": 3.2213823790473606e-05, + "loss": 0.808, + "step": 13327 + }, + { + "epoch": 2.12, + "learning_rate": 3.221135359334681e-05, + "loss": 0.736, + "step": 13328 + }, + { + "epoch": 2.12, + "learning_rate": 3.220888331942347e-05, + "loss": 0.7715, + "step": 13329 + }, + { + "epoch": 2.12, + "learning_rate": 3.22064129687299e-05, + "loss": 0.7522, + "step": 13330 + }, + { + "epoch": 2.12, + "learning_rate": 3.220394254129242e-05, + "loss": 0.6944, + "step": 13331 + }, + { + "epoch": 2.12, + "learning_rate": 3.220147203713731e-05, + "loss": 0.6898, + "step": 13332 + }, + { + "epoch": 2.12, + "learning_rate": 3.2199001456290906e-05, + "loss": 0.8742, + "step": 13333 + }, + { + "epoch": 2.12, + "learning_rate": 3.2196530798779504e-05, + "loss": 0.7607, + "step": 13334 + }, + { + "epoch": 2.12, + "learning_rate": 3.2194060064629415e-05, + "loss": 0.8128, + "step": 13335 + }, + { + "epoch": 2.12, + "learning_rate": 3.219158925386696e-05, + "loss": 0.7653, + "step": 13336 + }, + { + "epoch": 2.12, + "learning_rate": 3.2189118366518436e-05, + "loss": 0.7774, + "step": 13337 + }, + { + "epoch": 2.12, + "learning_rate": 3.2186647402610165e-05, + "loss": 0.7607, + "step": 13338 + }, + { + "epoch": 2.13, + "learning_rate": 3.218417636216847e-05, + "loss": 0.7349, + "step": 13339 + }, + { + "epoch": 2.13, + "learning_rate": 3.218170524521966e-05, + "loss": 0.7972, + "step": 13340 + }, + { + "epoch": 2.13, + "learning_rate": 3.217923405179004e-05, + "loss": 0.8633, + "step": 13341 + }, + { + "epoch": 2.13, + "learning_rate": 3.217676278190594e-05, + "loss": 0.7437, + "step": 13342 + }, + { + "epoch": 2.13, + "learning_rate": 3.2174291435593676e-05, + "loss": 0.7798, + "step": 13343 + }, + { + "epoch": 2.13, + "learning_rate": 3.217182001287957e-05, + "loss": 0.7402, + "step": 13344 + }, + { + "epoch": 2.13, + "learning_rate": 3.2169348513789927e-05, + "loss": 0.719, + "step": 13345 + }, + { + "epoch": 2.13, + "learning_rate": 3.216687693835107e-05, + "loss": 0.7483, + "step": 13346 + }, + { + "epoch": 2.13, + "learning_rate": 3.2164405286589336e-05, + "loss": 0.6631, + "step": 13347 + }, + { + "epoch": 2.13, + "learning_rate": 3.216193355853103e-05, + "loss": 0.7599, + "step": 13348 + }, + { + "epoch": 2.13, + "learning_rate": 3.215946175420248e-05, + "loss": 0.7661, + "step": 13349 + }, + { + "epoch": 2.13, + "learning_rate": 3.2156989873630006e-05, + "loss": 0.7919, + "step": 13350 + }, + { + "epoch": 2.13, + "learning_rate": 3.215451791683993e-05, + "loss": 0.7769, + "step": 13351 + }, + { + "epoch": 2.13, + "learning_rate": 3.2152045883858595e-05, + "loss": 0.7639, + "step": 13352 + }, + { + "epoch": 2.13, + "learning_rate": 3.214957377471231e-05, + "loss": 0.7458, + "step": 13353 + }, + { + "epoch": 2.13, + "learning_rate": 3.2147101589427404e-05, + "loss": 0.7005, + "step": 13354 + }, + { + "epoch": 2.13, + "learning_rate": 3.21446293280302e-05, + "loss": 0.7974, + "step": 13355 + }, + { + "epoch": 2.13, + "learning_rate": 3.214215699054703e-05, + "loss": 0.6931, + "step": 13356 + }, + { + "epoch": 2.13, + "learning_rate": 3.2139684577004234e-05, + "loss": 0.8391, + "step": 13357 + }, + { + "epoch": 2.13, + "learning_rate": 3.2137212087428126e-05, + "loss": 0.7402, + "step": 13358 + }, + { + "epoch": 2.13, + "learning_rate": 3.213473952184504e-05, + "loss": 0.7824, + "step": 13359 + }, + { + "epoch": 2.13, + "learning_rate": 3.213226688028132e-05, + "loss": 0.734, + "step": 13360 + }, + { + "epoch": 2.13, + "learning_rate": 3.212979416276328e-05, + "loss": 0.8195, + "step": 13361 + }, + { + "epoch": 2.13, + "learning_rate": 3.212732136931726e-05, + "loss": 0.7222, + "step": 13362 + }, + { + "epoch": 2.13, + "learning_rate": 3.21248484999696e-05, + "loss": 0.7912, + "step": 13363 + }, + { + "epoch": 2.13, + "learning_rate": 3.212237555474662e-05, + "loss": 0.7967, + "step": 13364 + }, + { + "epoch": 2.13, + "learning_rate": 3.211990253367467e-05, + "loss": 0.7214, + "step": 13365 + }, + { + "epoch": 2.13, + "learning_rate": 3.2117429436780086e-05, + "loss": 0.879, + "step": 13366 + }, + { + "epoch": 2.13, + "learning_rate": 3.211495626408919e-05, + "loss": 0.7377, + "step": 13367 + }, + { + "epoch": 2.13, + "learning_rate": 3.2112483015628335e-05, + "loss": 0.6534, + "step": 13368 + }, + { + "epoch": 2.13, + "learning_rate": 3.2110009691423854e-05, + "loss": 0.7054, + "step": 13369 + }, + { + "epoch": 2.13, + "learning_rate": 3.2107536291502075e-05, + "loss": 0.6817, + "step": 13370 + }, + { + "epoch": 2.13, + "learning_rate": 3.210506281588936e-05, + "loss": 0.7817, + "step": 13371 + }, + { + "epoch": 2.13, + "learning_rate": 3.2102589264612046e-05, + "loss": 0.7861, + "step": 13372 + }, + { + "epoch": 2.13, + "learning_rate": 3.210011563769646e-05, + "loss": 0.7561, + "step": 13373 + }, + { + "epoch": 2.13, + "learning_rate": 3.2097641935168956e-05, + "loss": 0.7231, + "step": 13374 + }, + { + "epoch": 2.13, + "learning_rate": 3.209516815705586e-05, + "loss": 0.7352, + "step": 13375 + }, + { + "epoch": 2.13, + "learning_rate": 3.2092694303383555e-05, + "loss": 0.7489, + "step": 13376 + }, + { + "epoch": 2.13, + "learning_rate": 3.209022037417835e-05, + "loss": 0.7885, + "step": 13377 + }, + { + "epoch": 2.13, + "learning_rate": 3.20877463694666e-05, + "loss": 0.7663, + "step": 13378 + }, + { + "epoch": 2.13, + "learning_rate": 3.208527228927466e-05, + "loss": 0.7087, + "step": 13379 + }, + { + "epoch": 2.13, + "learning_rate": 3.2082798133628865e-05, + "loss": 0.7319, + "step": 13380 + }, + { + "epoch": 2.13, + "learning_rate": 3.208032390255557e-05, + "loss": 0.7696, + "step": 13381 + }, + { + "epoch": 2.13, + "learning_rate": 3.2077849596081134e-05, + "loss": 0.7983, + "step": 13382 + }, + { + "epoch": 2.13, + "learning_rate": 3.20753752142319e-05, + "loss": 0.687, + "step": 13383 + }, + { + "epoch": 2.13, + "learning_rate": 3.2072900757034195e-05, + "loss": 0.7031, + "step": 13384 + }, + { + "epoch": 2.13, + "learning_rate": 3.2070426224514406e-05, + "loss": 0.7411, + "step": 13385 + }, + { + "epoch": 2.13, + "learning_rate": 3.206795161669887e-05, + "loss": 0.7201, + "step": 13386 + }, + { + "epoch": 2.13, + "learning_rate": 3.206547693361393e-05, + "loss": 0.7287, + "step": 13387 + }, + { + "epoch": 2.13, + "learning_rate": 3.2063002175285954e-05, + "loss": 0.8256, + "step": 13388 + }, + { + "epoch": 2.13, + "learning_rate": 3.20605273417413e-05, + "loss": 0.7153, + "step": 13389 + }, + { + "epoch": 2.13, + "learning_rate": 3.2058052433006305e-05, + "loss": 0.7673, + "step": 13390 + }, + { + "epoch": 2.13, + "learning_rate": 3.205557744910735e-05, + "loss": 0.7633, + "step": 13391 + }, + { + "epoch": 2.13, + "learning_rate": 3.205310239007077e-05, + "loss": 0.7482, + "step": 13392 + }, + { + "epoch": 2.13, + "learning_rate": 3.2050627255922936e-05, + "loss": 0.7528, + "step": 13393 + }, + { + "epoch": 2.13, + "learning_rate": 3.20481520466902e-05, + "loss": 0.7334, + "step": 13394 + }, + { + "epoch": 2.13, + "learning_rate": 3.2045676762398924e-05, + "loss": 0.7734, + "step": 13395 + }, + { + "epoch": 2.13, + "learning_rate": 3.204320140307547e-05, + "loss": 0.7658, + "step": 13396 + }, + { + "epoch": 2.13, + "learning_rate": 3.2040725968746196e-05, + "loss": 0.8, + "step": 13397 + }, + { + "epoch": 2.13, + "learning_rate": 3.203825045943746e-05, + "loss": 0.7667, + "step": 13398 + }, + { + "epoch": 2.13, + "learning_rate": 3.2035774875175635e-05, + "loss": 0.7499, + "step": 13399 + }, + { + "epoch": 2.13, + "learning_rate": 3.203329921598708e-05, + "loss": 0.7743, + "step": 13400 + }, + { + "epoch": 2.13, + "learning_rate": 3.203082348189815e-05, + "loss": 0.7358, + "step": 13401 + }, + { + "epoch": 2.14, + "learning_rate": 3.202834767293523e-05, + "loss": 0.7097, + "step": 13402 + }, + { + "epoch": 2.14, + "learning_rate": 3.2025871789124674e-05, + "loss": 0.788, + "step": 13403 + }, + { + "epoch": 2.14, + "learning_rate": 3.202339583049284e-05, + "loss": 0.7403, + "step": 13404 + }, + { + "epoch": 2.14, + "learning_rate": 3.202091979706611e-05, + "loss": 0.8012, + "step": 13405 + }, + { + "epoch": 2.14, + "learning_rate": 3.2018443688870845e-05, + "loss": 0.6909, + "step": 13406 + }, + { + "epoch": 2.14, + "learning_rate": 3.2015967505933406e-05, + "loss": 0.7816, + "step": 13407 + }, + { + "epoch": 2.14, + "learning_rate": 3.201349124828019e-05, + "loss": 0.756, + "step": 13408 + }, + { + "epoch": 2.14, + "learning_rate": 3.201101491593754e-05, + "loss": 0.817, + "step": 13409 + }, + { + "epoch": 2.14, + "learning_rate": 3.200853850893184e-05, + "loss": 0.8116, + "step": 13410 + }, + { + "epoch": 2.14, + "learning_rate": 3.2006062027289465e-05, + "loss": 0.7131, + "step": 13411 + }, + { + "epoch": 2.14, + "learning_rate": 3.2003585471036776e-05, + "loss": 0.7406, + "step": 13412 + }, + { + "epoch": 2.14, + "learning_rate": 3.200110884020016e-05, + "loss": 0.7628, + "step": 13413 + }, + { + "epoch": 2.14, + "learning_rate": 3.199863213480598e-05, + "loss": 0.7215, + "step": 13414 + }, + { + "epoch": 2.14, + "learning_rate": 3.199615535488062e-05, + "loss": 0.7089, + "step": 13415 + }, + { + "epoch": 2.14, + "learning_rate": 3.199367850045044e-05, + "loss": 0.791, + "step": 13416 + }, + { + "epoch": 2.14, + "learning_rate": 3.1991201571541846e-05, + "loss": 0.7071, + "step": 13417 + }, + { + "epoch": 2.14, + "learning_rate": 3.19887245681812e-05, + "loss": 0.77, + "step": 13418 + }, + { + "epoch": 2.14, + "learning_rate": 3.1986247490394865e-05, + "loss": 0.7685, + "step": 13419 + }, + { + "epoch": 2.14, + "learning_rate": 3.1983770338209246e-05, + "loss": 0.717, + "step": 13420 + }, + { + "epoch": 2.14, + "learning_rate": 3.198129311165071e-05, + "loss": 0.7498, + "step": 13421 + }, + { + "epoch": 2.14, + "learning_rate": 3.197881581074564e-05, + "loss": 0.7665, + "step": 13422 + }, + { + "epoch": 2.14, + "learning_rate": 3.1976338435520426e-05, + "loss": 0.6911, + "step": 13423 + }, + { + "epoch": 2.14, + "learning_rate": 3.197386098600144e-05, + "loss": 0.6933, + "step": 13424 + }, + { + "epoch": 2.14, + "learning_rate": 3.197138346221507e-05, + "loss": 0.7509, + "step": 13425 + }, + { + "epoch": 2.14, + "learning_rate": 3.1968905864187694e-05, + "loss": 0.8289, + "step": 13426 + }, + { + "epoch": 2.14, + "learning_rate": 3.1966428191945705e-05, + "loss": 0.7859, + "step": 13427 + }, + { + "epoch": 2.14, + "learning_rate": 3.1963950445515475e-05, + "loss": 0.6675, + "step": 13428 + }, + { + "epoch": 2.14, + "learning_rate": 3.196147262492342e-05, + "loss": 0.7744, + "step": 13429 + }, + { + "epoch": 2.14, + "learning_rate": 3.195899473019589e-05, + "loss": 0.7523, + "step": 13430 + }, + { + "epoch": 2.14, + "learning_rate": 3.19565167613593e-05, + "loss": 0.8184, + "step": 13431 + }, + { + "epoch": 2.14, + "learning_rate": 3.195403871844003e-05, + "loss": 0.7039, + "step": 13432 + }, + { + "epoch": 2.14, + "learning_rate": 3.195156060146446e-05, + "loss": 0.7593, + "step": 13433 + }, + { + "epoch": 2.14, + "learning_rate": 3.1949082410459e-05, + "loss": 0.8477, + "step": 13434 + }, + { + "epoch": 2.14, + "learning_rate": 3.194660414545003e-05, + "loss": 0.7077, + "step": 13435 + }, + { + "epoch": 2.14, + "learning_rate": 3.1944125806463945e-05, + "loss": 0.6653, + "step": 13436 + }, + { + "epoch": 2.14, + "learning_rate": 3.194164739352713e-05, + "loss": 0.7482, + "step": 13437 + }, + { + "epoch": 2.14, + "learning_rate": 3.193916890666599e-05, + "loss": 0.7926, + "step": 13438 + }, + { + "epoch": 2.14, + "learning_rate": 3.1936690345906905e-05, + "loss": 0.7287, + "step": 13439 + }, + { + "epoch": 2.14, + "learning_rate": 3.1934211711276293e-05, + "loss": 0.7759, + "step": 13440 + }, + { + "epoch": 2.14, + "learning_rate": 3.193173300280052e-05, + "loss": 0.7953, + "step": 13441 + }, + { + "epoch": 2.14, + "learning_rate": 3.1929254220506014e-05, + "loss": 0.8138, + "step": 13442 + }, + { + "epoch": 2.14, + "learning_rate": 3.1926775364419146e-05, + "loss": 0.6983, + "step": 13443 + }, + { + "epoch": 2.14, + "learning_rate": 3.1924296434566335e-05, + "loss": 0.8394, + "step": 13444 + }, + { + "epoch": 2.14, + "learning_rate": 3.1921817430973965e-05, + "loss": 0.6665, + "step": 13445 + }, + { + "epoch": 2.14, + "learning_rate": 3.191933835366845e-05, + "loss": 0.789, + "step": 13446 + }, + { + "epoch": 2.14, + "learning_rate": 3.191685920267617e-05, + "loss": 0.7714, + "step": 13447 + }, + { + "epoch": 2.14, + "learning_rate": 3.1914379978023546e-05, + "loss": 0.7457, + "step": 13448 + }, + { + "epoch": 2.14, + "learning_rate": 3.191190067973697e-05, + "loss": 0.6541, + "step": 13449 + }, + { + "epoch": 2.14, + "learning_rate": 3.190942130784284e-05, + "loss": 0.7307, + "step": 13450 + }, + { + "epoch": 2.14, + "learning_rate": 3.190694186236759e-05, + "loss": 0.6934, + "step": 13451 + }, + { + "epoch": 2.14, + "learning_rate": 3.190446234333759e-05, + "loss": 0.8767, + "step": 13452 + }, + { + "epoch": 2.14, + "learning_rate": 3.1901982750779245e-05, + "loss": 0.7757, + "step": 13453 + }, + { + "epoch": 2.14, + "learning_rate": 3.1899503084719e-05, + "loss": 0.7876, + "step": 13454 + }, + { + "epoch": 2.14, + "learning_rate": 3.1897023345183225e-05, + "loss": 0.7018, + "step": 13455 + }, + { + "epoch": 2.14, + "learning_rate": 3.189454353219833e-05, + "loss": 0.8564, + "step": 13456 + }, + { + "epoch": 2.14, + "learning_rate": 3.189206364579074e-05, + "loss": 0.8036, + "step": 13457 + }, + { + "epoch": 2.14, + "learning_rate": 3.188958368598686e-05, + "loss": 0.7625, + "step": 13458 + }, + { + "epoch": 2.14, + "learning_rate": 3.188710365281308e-05, + "loss": 0.7572, + "step": 13459 + }, + { + "epoch": 2.14, + "learning_rate": 3.188462354629584e-05, + "loss": 0.6789, + "step": 13460 + }, + { + "epoch": 2.14, + "learning_rate": 3.188214336646154e-05, + "loss": 0.7826, + "step": 13461 + }, + { + "epoch": 2.14, + "learning_rate": 3.187966311333659e-05, + "loss": 0.7196, + "step": 13462 + }, + { + "epoch": 2.14, + "learning_rate": 3.18771827869474e-05, + "loss": 0.7484, + "step": 13463 + }, + { + "epoch": 2.14, + "learning_rate": 3.18747023873204e-05, + "loss": 0.7612, + "step": 13464 + }, + { + "epoch": 2.15, + "learning_rate": 3.187222191448198e-05, + "loss": 0.7945, + "step": 13465 + }, + { + "epoch": 2.15, + "learning_rate": 3.186974136845858e-05, + "loss": 0.7857, + "step": 13466 + }, + { + "epoch": 2.15, + "learning_rate": 3.18672607492766e-05, + "loss": 0.6923, + "step": 13467 + }, + { + "epoch": 2.15, + "learning_rate": 3.186478005696246e-05, + "loss": 0.7778, + "step": 13468 + }, + { + "epoch": 2.15, + "learning_rate": 3.1862299291542586e-05, + "loss": 0.828, + "step": 13469 + }, + { + "epoch": 2.15, + "learning_rate": 3.1859818453043384e-05, + "loss": 0.8175, + "step": 13470 + }, + { + "epoch": 2.15, + "learning_rate": 3.1857337541491275e-05, + "loss": 0.719, + "step": 13471 + }, + { + "epoch": 2.15, + "learning_rate": 3.1854856556912696e-05, + "loss": 0.8256, + "step": 13472 + }, + { + "epoch": 2.15, + "learning_rate": 3.185237549933405e-05, + "loss": 0.7837, + "step": 13473 + }, + { + "epoch": 2.15, + "learning_rate": 3.184989436878177e-05, + "loss": 0.6918, + "step": 13474 + }, + { + "epoch": 2.15, + "learning_rate": 3.184741316528228e-05, + "loss": 0.7208, + "step": 13475 + }, + { + "epoch": 2.15, + "learning_rate": 3.1844931888861984e-05, + "loss": 0.7542, + "step": 13476 + }, + { + "epoch": 2.15, + "learning_rate": 3.1842450539547325e-05, + "loss": 0.7784, + "step": 13477 + }, + { + "epoch": 2.15, + "learning_rate": 3.183996911736472e-05, + "loss": 0.8151, + "step": 13478 + }, + { + "epoch": 2.15, + "learning_rate": 3.183748762234059e-05, + "loss": 0.7257, + "step": 13479 + }, + { + "epoch": 2.15, + "learning_rate": 3.183500605450138e-05, + "loss": 0.7306, + "step": 13480 + }, + { + "epoch": 2.15, + "learning_rate": 3.18325244138735e-05, + "loss": 0.8124, + "step": 13481 + }, + { + "epoch": 2.15, + "learning_rate": 3.183004270048338e-05, + "loss": 0.7418, + "step": 13482 + }, + { + "epoch": 2.15, + "learning_rate": 3.1827560914357454e-05, + "loss": 0.7731, + "step": 13483 + }, + { + "epoch": 2.15, + "learning_rate": 3.182507905552215e-05, + "loss": 0.7686, + "step": 13484 + }, + { + "epoch": 2.15, + "learning_rate": 3.18225971240039e-05, + "loss": 0.7568, + "step": 13485 + }, + { + "epoch": 2.15, + "learning_rate": 3.182011511982913e-05, + "loss": 0.6519, + "step": 13486 + }, + { + "epoch": 2.15, + "learning_rate": 3.181763304302428e-05, + "loss": 0.7758, + "step": 13487 + }, + { + "epoch": 2.15, + "learning_rate": 3.181515089361577e-05, + "loss": 0.8477, + "step": 13488 + }, + { + "epoch": 2.15, + "learning_rate": 3.181266867163004e-05, + "loss": 0.8755, + "step": 13489 + }, + { + "epoch": 2.15, + "learning_rate": 3.181018637709353e-05, + "loss": 0.8487, + "step": 13490 + }, + { + "epoch": 2.15, + "learning_rate": 3.1807704010032666e-05, + "loss": 0.7866, + "step": 13491 + }, + { + "epoch": 2.15, + "learning_rate": 3.1805221570473894e-05, + "loss": 0.7429, + "step": 13492 + }, + { + "epoch": 2.15, + "learning_rate": 3.180273905844364e-05, + "loss": 0.6818, + "step": 13493 + }, + { + "epoch": 2.15, + "learning_rate": 3.1800256473968345e-05, + "loss": 0.7126, + "step": 13494 + }, + { + "epoch": 2.15, + "learning_rate": 3.1797773817074444e-05, + "loss": 0.7616, + "step": 13495 + }, + { + "epoch": 2.15, + "learning_rate": 3.179529108778838e-05, + "loss": 0.7291, + "step": 13496 + }, + { + "epoch": 2.15, + "learning_rate": 3.1792808286136586e-05, + "loss": 0.745, + "step": 13497 + }, + { + "epoch": 2.15, + "learning_rate": 3.179032541214552e-05, + "loss": 0.7683, + "step": 13498 + }, + { + "epoch": 2.15, + "learning_rate": 3.17878424658416e-05, + "loss": 0.8301, + "step": 13499 + }, + { + "epoch": 2.15, + "learning_rate": 3.178535944725128e-05, + "loss": 0.7628, + "step": 13500 + }, + { + "epoch": 2.15, + "learning_rate": 3.178287635640101e-05, + "loss": 0.6848, + "step": 13501 + }, + { + "epoch": 2.15, + "learning_rate": 3.178039319331722e-05, + "loss": 0.7229, + "step": 13502 + }, + { + "epoch": 2.15, + "learning_rate": 3.177790995802635e-05, + "loss": 0.8015, + "step": 13503 + }, + { + "epoch": 2.15, + "learning_rate": 3.177542665055487e-05, + "loss": 0.748, + "step": 13504 + }, + { + "epoch": 2.15, + "learning_rate": 3.17729432709292e-05, + "loss": 0.7252, + "step": 13505 + }, + { + "epoch": 2.15, + "learning_rate": 3.17704598191758e-05, + "loss": 0.7156, + "step": 13506 + }, + { + "epoch": 2.15, + "learning_rate": 3.176797629532112e-05, + "loss": 0.6658, + "step": 13507 + }, + { + "epoch": 2.15, + "learning_rate": 3.1765492699391585e-05, + "loss": 0.783, + "step": 13508 + }, + { + "epoch": 2.15, + "learning_rate": 3.176300903141367e-05, + "loss": 0.7408, + "step": 13509 + }, + { + "epoch": 2.15, + "learning_rate": 3.1760525291413815e-05, + "loss": 0.7339, + "step": 13510 + }, + { + "epoch": 2.15, + "learning_rate": 3.175804147941847e-05, + "loss": 0.7288, + "step": 13511 + }, + { + "epoch": 2.15, + "learning_rate": 3.175555759545408e-05, + "loss": 0.7409, + "step": 13512 + }, + { + "epoch": 2.15, + "learning_rate": 3.17530736395471e-05, + "loss": 0.8197, + "step": 13513 + }, + { + "epoch": 2.15, + "learning_rate": 3.1750589611724e-05, + "loss": 0.7147, + "step": 13514 + }, + { + "epoch": 2.15, + "learning_rate": 3.174810551201122e-05, + "loss": 0.7257, + "step": 13515 + }, + { + "epoch": 2.15, + "learning_rate": 3.174562134043519e-05, + "loss": 0.6708, + "step": 13516 + }, + { + "epoch": 2.15, + "learning_rate": 3.1743137097022405e-05, + "loss": 0.7717, + "step": 13517 + }, + { + "epoch": 2.15, + "learning_rate": 3.174065278179931e-05, + "loss": 0.7313, + "step": 13518 + }, + { + "epoch": 2.15, + "learning_rate": 3.173816839479234e-05, + "loss": 0.6448, + "step": 13519 + }, + { + "epoch": 2.15, + "learning_rate": 3.173568393602798e-05, + "loss": 0.7642, + "step": 13520 + }, + { + "epoch": 2.15, + "learning_rate": 3.1733199405532663e-05, + "loss": 0.7307, + "step": 13521 + }, + { + "epoch": 2.15, + "learning_rate": 3.1730714803332874e-05, + "loss": 0.7343, + "step": 13522 + }, + { + "epoch": 2.15, + "learning_rate": 3.172823012945505e-05, + "loss": 0.7962, + "step": 13523 + }, + { + "epoch": 2.15, + "learning_rate": 3.172574538392566e-05, + "loss": 0.8536, + "step": 13524 + }, + { + "epoch": 2.15, + "learning_rate": 3.172326056677117e-05, + "loss": 0.754, + "step": 13525 + }, + { + "epoch": 2.15, + "learning_rate": 3.1720775678018034e-05, + "loss": 0.696, + "step": 13526 + }, + { + "epoch": 2.16, + "learning_rate": 3.1718290717692714e-05, + "loss": 0.738, + "step": 13527 + }, + { + "epoch": 2.16, + "learning_rate": 3.171580568582168e-05, + "loss": 0.7067, + "step": 13528 + }, + { + "epoch": 2.16, + "learning_rate": 3.1713320582431396e-05, + "loss": 0.7892, + "step": 13529 + }, + { + "epoch": 2.16, + "learning_rate": 3.1710835407548314e-05, + "loss": 0.6912, + "step": 13530 + }, + { + "epoch": 2.16, + "learning_rate": 3.1708350161198914e-05, + "loss": 0.6995, + "step": 13531 + }, + { + "epoch": 2.16, + "learning_rate": 3.170586484340966e-05, + "loss": 0.7327, + "step": 13532 + }, + { + "epoch": 2.16, + "learning_rate": 3.170337945420701e-05, + "loss": 0.781, + "step": 13533 + }, + { + "epoch": 2.16, + "learning_rate": 3.170089399361744e-05, + "loss": 0.7398, + "step": 13534 + }, + { + "epoch": 2.16, + "learning_rate": 3.169840846166743e-05, + "loss": 0.7464, + "step": 13535 + }, + { + "epoch": 2.16, + "learning_rate": 3.169592285838343e-05, + "loss": 0.8133, + "step": 13536 + }, + { + "epoch": 2.16, + "learning_rate": 3.1693437183791904e-05, + "loss": 0.7511, + "step": 13537 + }, + { + "epoch": 2.16, + "learning_rate": 3.169095143791935e-05, + "loss": 0.6952, + "step": 13538 + }, + { + "epoch": 2.16, + "learning_rate": 3.168846562079223e-05, + "loss": 0.7572, + "step": 13539 + }, + { + "epoch": 2.16, + "learning_rate": 3.1685979732436996e-05, + "loss": 0.7819, + "step": 13540 + }, + { + "epoch": 2.16, + "learning_rate": 3.168349377288014e-05, + "loss": 0.7733, + "step": 13541 + }, + { + "epoch": 2.16, + "learning_rate": 3.168100774214814e-05, + "loss": 0.7268, + "step": 13542 + }, + { + "epoch": 2.16, + "learning_rate": 3.167852164026746e-05, + "loss": 0.7963, + "step": 13543 + }, + { + "epoch": 2.16, + "learning_rate": 3.1676035467264575e-05, + "loss": 0.7293, + "step": 13544 + }, + { + "epoch": 2.16, + "learning_rate": 3.1673549223165977e-05, + "loss": 0.7712, + "step": 13545 + }, + { + "epoch": 2.16, + "learning_rate": 3.167106290799812e-05, + "loss": 0.8482, + "step": 13546 + }, + { + "epoch": 2.16, + "learning_rate": 3.16685765217875e-05, + "loss": 0.8158, + "step": 13547 + }, + { + "epoch": 2.16, + "learning_rate": 3.1666090064560585e-05, + "loss": 0.7904, + "step": 13548 + }, + { + "epoch": 2.16, + "learning_rate": 3.166360353634386e-05, + "loss": 0.7801, + "step": 13549 + }, + { + "epoch": 2.16, + "learning_rate": 3.16611169371638e-05, + "loss": 0.7693, + "step": 13550 + }, + { + "epoch": 2.16, + "learning_rate": 3.1658630267046894e-05, + "loss": 0.6791, + "step": 13551 + }, + { + "epoch": 2.16, + "learning_rate": 3.165614352601961e-05, + "loss": 0.6912, + "step": 13552 + }, + { + "epoch": 2.16, + "learning_rate": 3.165365671410845e-05, + "loss": 0.7672, + "step": 13553 + }, + { + "epoch": 2.16, + "learning_rate": 3.1651169831339874e-05, + "loss": 0.8458, + "step": 13554 + }, + { + "epoch": 2.16, + "learning_rate": 3.164868287774039e-05, + "loss": 0.6914, + "step": 13555 + }, + { + "epoch": 2.16, + "learning_rate": 3.164619585333646e-05, + "loss": 0.8506, + "step": 13556 + }, + { + "epoch": 2.16, + "learning_rate": 3.164370875815458e-05, + "loss": 0.7293, + "step": 13557 + }, + { + "epoch": 2.16, + "learning_rate": 3.164122159222124e-05, + "loss": 0.7548, + "step": 13558 + }, + { + "epoch": 2.16, + "learning_rate": 3.1638734355562915e-05, + "loss": 0.7218, + "step": 13559 + }, + { + "epoch": 2.16, + "learning_rate": 3.16362470482061e-05, + "loss": 0.8377, + "step": 13560 + }, + { + "epoch": 2.16, + "learning_rate": 3.163375967017729e-05, + "loss": 0.7769, + "step": 13561 + }, + { + "epoch": 2.16, + "learning_rate": 3.163127222150296e-05, + "loss": 0.7059, + "step": 13562 + }, + { + "epoch": 2.16, + "learning_rate": 3.16287847022096e-05, + "loss": 0.7503, + "step": 13563 + }, + { + "epoch": 2.16, + "learning_rate": 3.162629711232372e-05, + "loss": 0.7564, + "step": 13564 + }, + { + "epoch": 2.16, + "learning_rate": 3.16238094518718e-05, + "loss": 0.7432, + "step": 13565 + }, + { + "epoch": 2.16, + "learning_rate": 3.1621321720880325e-05, + "loss": 0.7546, + "step": 13566 + }, + { + "epoch": 2.16, + "learning_rate": 3.16188339193758e-05, + "loss": 0.7259, + "step": 13567 + }, + { + "epoch": 2.16, + "learning_rate": 3.161634604738471e-05, + "loss": 0.7764, + "step": 13568 + }, + { + "epoch": 2.16, + "learning_rate": 3.161385810493354e-05, + "loss": 0.7214, + "step": 13569 + }, + { + "epoch": 2.16, + "learning_rate": 3.161137009204881e-05, + "loss": 0.6935, + "step": 13570 + }, + { + "epoch": 2.16, + "learning_rate": 3.160888200875699e-05, + "loss": 0.7749, + "step": 13571 + }, + { + "epoch": 2.16, + "learning_rate": 3.16063938550846e-05, + "loss": 0.7039, + "step": 13572 + }, + { + "epoch": 2.16, + "learning_rate": 3.160390563105813e-05, + "loss": 0.7362, + "step": 13573 + }, + { + "epoch": 2.16, + "learning_rate": 3.1601417336704065e-05, + "loss": 0.7508, + "step": 13574 + }, + { + "epoch": 2.16, + "learning_rate": 3.1598928972048915e-05, + "loss": 0.7097, + "step": 13575 + }, + { + "epoch": 2.16, + "learning_rate": 3.1596440537119175e-05, + "loss": 0.7279, + "step": 13576 + }, + { + "epoch": 2.16, + "learning_rate": 3.159395203194135e-05, + "loss": 0.7529, + "step": 13577 + }, + { + "epoch": 2.16, + "learning_rate": 3.159146345654194e-05, + "loss": 0.7456, + "step": 13578 + }, + { + "epoch": 2.16, + "learning_rate": 3.1588974810947454e-05, + "loss": 0.6905, + "step": 13579 + }, + { + "epoch": 2.16, + "learning_rate": 3.158648609518437e-05, + "loss": 0.7097, + "step": 13580 + }, + { + "epoch": 2.16, + "learning_rate": 3.158399730927922e-05, + "loss": 0.7553, + "step": 13581 + }, + { + "epoch": 2.16, + "learning_rate": 3.158150845325849e-05, + "loss": 0.7463, + "step": 13582 + }, + { + "epoch": 2.16, + "learning_rate": 3.157901952714869e-05, + "loss": 0.6982, + "step": 13583 + }, + { + "epoch": 2.16, + "learning_rate": 3.157653053097633e-05, + "loss": 0.7202, + "step": 13584 + }, + { + "epoch": 2.16, + "learning_rate": 3.157404146476792e-05, + "loss": 0.7454, + "step": 13585 + }, + { + "epoch": 2.16, + "learning_rate": 3.157155232854995e-05, + "loss": 0.7323, + "step": 13586 + }, + { + "epoch": 2.16, + "learning_rate": 3.156906312234894e-05, + "loss": 0.7423, + "step": 13587 + }, + { + "epoch": 2.16, + "learning_rate": 3.156657384619139e-05, + "loss": 0.7049, + "step": 13588 + }, + { + "epoch": 2.16, + "learning_rate": 3.1564084500103825e-05, + "loss": 0.726, + "step": 13589 + }, + { + "epoch": 2.17, + "learning_rate": 3.1561595084112736e-05, + "loss": 0.7701, + "step": 13590 + }, + { + "epoch": 2.17, + "learning_rate": 3.1559105598244654e-05, + "loss": 0.7262, + "step": 13591 + }, + { + "epoch": 2.17, + "learning_rate": 3.155661604252607e-05, + "loss": 0.7622, + "step": 13592 + }, + { + "epoch": 2.17, + "learning_rate": 3.1554126416983506e-05, + "loss": 0.694, + "step": 13593 + }, + { + "epoch": 2.17, + "learning_rate": 3.155163672164348e-05, + "loss": 0.7878, + "step": 13594 + }, + { + "epoch": 2.17, + "learning_rate": 3.15491469565325e-05, + "loss": 0.675, + "step": 13595 + }, + { + "epoch": 2.17, + "learning_rate": 3.154665712167708e-05, + "loss": 0.723, + "step": 13596 + }, + { + "epoch": 2.17, + "learning_rate": 3.154416721710373e-05, + "loss": 0.722, + "step": 13597 + }, + { + "epoch": 2.17, + "learning_rate": 3.154167724283898e-05, + "loss": 0.8604, + "step": 13598 + }, + { + "epoch": 2.17, + "learning_rate": 3.153918719890934e-05, + "loss": 0.7249, + "step": 13599 + }, + { + "epoch": 2.17, + "learning_rate": 3.153669708534132e-05, + "loss": 0.7843, + "step": 13600 + }, + { + "epoch": 2.17, + "learning_rate": 3.1534206902161446e-05, + "loss": 0.7211, + "step": 13601 + }, + { + "epoch": 2.17, + "learning_rate": 3.153171664939624e-05, + "loss": 0.7118, + "step": 13602 + }, + { + "epoch": 2.17, + "learning_rate": 3.152922632707221e-05, + "loss": 0.783, + "step": 13603 + }, + { + "epoch": 2.17, + "learning_rate": 3.152673593521589e-05, + "loss": 0.8398, + "step": 13604 + }, + { + "epoch": 2.17, + "learning_rate": 3.15242454738538e-05, + "loss": 0.7086, + "step": 13605 + }, + { + "epoch": 2.17, + "learning_rate": 3.152175494301244e-05, + "loss": 0.7263, + "step": 13606 + }, + { + "epoch": 2.17, + "learning_rate": 3.1519264342718356e-05, + "loss": 0.784, + "step": 13607 + }, + { + "epoch": 2.17, + "learning_rate": 3.151677367299807e-05, + "loss": 0.7782, + "step": 13608 + }, + { + "epoch": 2.17, + "learning_rate": 3.15142829338781e-05, + "loss": 0.6881, + "step": 13609 + }, + { + "epoch": 2.17, + "learning_rate": 3.151179212538496e-05, + "loss": 0.7784, + "step": 13610 + }, + { + "epoch": 2.17, + "learning_rate": 3.1509301247545205e-05, + "loss": 0.7812, + "step": 13611 + }, + { + "epoch": 2.17, + "learning_rate": 3.150681030038533e-05, + "loss": 0.7315, + "step": 13612 + }, + { + "epoch": 2.17, + "learning_rate": 3.150431928393188e-05, + "loss": 0.7699, + "step": 13613 + }, + { + "epoch": 2.17, + "learning_rate": 3.150182819821138e-05, + "loss": 0.7534, + "step": 13614 + }, + { + "epoch": 2.17, + "learning_rate": 3.149933704325036e-05, + "loss": 0.7587, + "step": 13615 + }, + { + "epoch": 2.17, + "learning_rate": 3.149684581907535e-05, + "loss": 0.6935, + "step": 13616 + }, + { + "epoch": 2.17, + "learning_rate": 3.149435452571286e-05, + "loss": 0.7209, + "step": 13617 + }, + { + "epoch": 2.17, + "learning_rate": 3.149186316318945e-05, + "loss": 0.7553, + "step": 13618 + }, + { + "epoch": 2.17, + "learning_rate": 3.1489371731531634e-05, + "loss": 0.7031, + "step": 13619 + }, + { + "epoch": 2.17, + "learning_rate": 3.1486880230765944e-05, + "loss": 0.7466, + "step": 13620 + }, + { + "epoch": 2.17, + "learning_rate": 3.148438866091893e-05, + "loss": 0.8168, + "step": 13621 + }, + { + "epoch": 2.17, + "learning_rate": 3.14818970220171e-05, + "loss": 0.7146, + "step": 13622 + }, + { + "epoch": 2.17, + "learning_rate": 3.1479405314087016e-05, + "loss": 0.7488, + "step": 13623 + }, + { + "epoch": 2.17, + "learning_rate": 3.147691353715519e-05, + "loss": 0.7279, + "step": 13624 + }, + { + "epoch": 2.17, + "learning_rate": 3.1474421691248174e-05, + "loss": 0.7275, + "step": 13625 + }, + { + "epoch": 2.17, + "learning_rate": 3.147192977639249e-05, + "loss": 0.7346, + "step": 13626 + }, + { + "epoch": 2.17, + "learning_rate": 3.146943779261469e-05, + "loss": 0.7448, + "step": 13627 + }, + { + "epoch": 2.17, + "learning_rate": 3.14669457399413e-05, + "loss": 0.7826, + "step": 13628 + }, + { + "epoch": 2.17, + "learning_rate": 3.146445361839886e-05, + "loss": 0.7204, + "step": 13629 + }, + { + "epoch": 2.17, + "learning_rate": 3.146196142801392e-05, + "loss": 0.7891, + "step": 13630 + }, + { + "epoch": 2.17, + "learning_rate": 3.145946916881302e-05, + "loss": 0.826, + "step": 13631 + }, + { + "epoch": 2.17, + "learning_rate": 3.145697684082268e-05, + "loss": 0.7911, + "step": 13632 + }, + { + "epoch": 2.17, + "learning_rate": 3.145448444406947e-05, + "loss": 0.7682, + "step": 13633 + }, + { + "epoch": 2.17, + "learning_rate": 3.145199197857991e-05, + "loss": 0.6804, + "step": 13634 + }, + { + "epoch": 2.17, + "learning_rate": 3.144949944438056e-05, + "loss": 0.7573, + "step": 13635 + }, + { + "epoch": 2.17, + "learning_rate": 3.1447006841497955e-05, + "loss": 0.7092, + "step": 13636 + }, + { + "epoch": 2.17, + "learning_rate": 3.144451416995863e-05, + "loss": 0.7833, + "step": 13637 + }, + { + "epoch": 2.17, + "learning_rate": 3.144202142978916e-05, + "loss": 0.788, + "step": 13638 + }, + { + "epoch": 2.17, + "learning_rate": 3.143952862101606e-05, + "loss": 0.7729, + "step": 13639 + }, + { + "epoch": 2.17, + "learning_rate": 3.14370357436659e-05, + "loss": 0.7935, + "step": 13640 + }, + { + "epoch": 2.17, + "learning_rate": 3.14345427977652e-05, + "loss": 0.6958, + "step": 13641 + }, + { + "epoch": 2.17, + "learning_rate": 3.143204978334054e-05, + "loss": 0.8404, + "step": 13642 + }, + { + "epoch": 2.17, + "learning_rate": 3.142955670041846e-05, + "loss": 0.7749, + "step": 13643 + }, + { + "epoch": 2.17, + "learning_rate": 3.142706354902549e-05, + "loss": 0.8136, + "step": 13644 + }, + { + "epoch": 2.17, + "learning_rate": 3.1424570329188196e-05, + "loss": 0.7155, + "step": 13645 + }, + { + "epoch": 2.17, + "learning_rate": 3.142207704093314e-05, + "loss": 0.7016, + "step": 13646 + }, + { + "epoch": 2.17, + "learning_rate": 3.1419583684286856e-05, + "loss": 0.7734, + "step": 13647 + }, + { + "epoch": 2.17, + "learning_rate": 3.1417090259275905e-05, + "loss": 0.7532, + "step": 13648 + }, + { + "epoch": 2.17, + "learning_rate": 3.141459676592683e-05, + "loss": 0.7661, + "step": 13649 + }, + { + "epoch": 2.17, + "learning_rate": 3.14121032042662e-05, + "loss": 0.7714, + "step": 13650 + }, + { + "epoch": 2.17, + "learning_rate": 3.140960957432057e-05, + "loss": 0.8105, + "step": 13651 + }, + { + "epoch": 2.17, + "learning_rate": 3.1407115876116475e-05, + "loss": 0.7927, + "step": 13652 + }, + { + "epoch": 2.18, + "learning_rate": 3.1404622109680495e-05, + "loss": 0.7983, + "step": 13653 + }, + { + "epoch": 2.18, + "learning_rate": 3.1402128275039186e-05, + "loss": 0.697, + "step": 13654 + }, + { + "epoch": 2.18, + "learning_rate": 3.1399634372219084e-05, + "loss": 0.8325, + "step": 13655 + }, + { + "epoch": 2.18, + "learning_rate": 3.139714040124677e-05, + "loss": 0.7157, + "step": 13656 + }, + { + "epoch": 2.18, + "learning_rate": 3.139464636214879e-05, + "loss": 0.8612, + "step": 13657 + }, + { + "epoch": 2.18, + "learning_rate": 3.139215225495171e-05, + "loss": 0.7398, + "step": 13658 + }, + { + "epoch": 2.18, + "learning_rate": 3.138965807968209e-05, + "loss": 0.7238, + "step": 13659 + }, + { + "epoch": 2.18, + "learning_rate": 3.138716383636649e-05, + "loss": 0.8071, + "step": 13660 + }, + { + "epoch": 2.18, + "learning_rate": 3.138466952503147e-05, + "loss": 0.8291, + "step": 13661 + }, + { + "epoch": 2.18, + "learning_rate": 3.1382175145703596e-05, + "loss": 0.7428, + "step": 13662 + }, + { + "epoch": 2.18, + "learning_rate": 3.1379680698409435e-05, + "loss": 0.7734, + "step": 13663 + }, + { + "epoch": 2.18, + "learning_rate": 3.137718618317555e-05, + "loss": 0.7183, + "step": 13664 + }, + { + "epoch": 2.18, + "learning_rate": 3.1374691600028496e-05, + "loss": 0.6846, + "step": 13665 + }, + { + "epoch": 2.18, + "learning_rate": 3.137219694899485e-05, + "loss": 0.7597, + "step": 13666 + }, + { + "epoch": 2.18, + "learning_rate": 3.136970223010118e-05, + "loss": 0.7561, + "step": 13667 + }, + { + "epoch": 2.18, + "learning_rate": 3.136720744337405e-05, + "loss": 0.8028, + "step": 13668 + }, + { + "epoch": 2.18, + "learning_rate": 3.136471258884001e-05, + "loss": 0.7743, + "step": 13669 + }, + { + "epoch": 2.18, + "learning_rate": 3.1362217666525655e-05, + "loss": 0.7967, + "step": 13670 + }, + { + "epoch": 2.18, + "learning_rate": 3.135972267645755e-05, + "loss": 0.7682, + "step": 13671 + }, + { + "epoch": 2.18, + "learning_rate": 3.135722761866225e-05, + "loss": 0.7058, + "step": 13672 + }, + { + "epoch": 2.18, + "learning_rate": 3.135473249316634e-05, + "loss": 0.7464, + "step": 13673 + }, + { + "epoch": 2.18, + "learning_rate": 3.135223729999639e-05, + "loss": 0.7198, + "step": 13674 + }, + { + "epoch": 2.18, + "learning_rate": 3.1349742039178964e-05, + "loss": 0.7319, + "step": 13675 + }, + { + "epoch": 2.18, + "learning_rate": 3.1347246710740646e-05, + "loss": 0.8171, + "step": 13676 + }, + { + "epoch": 2.18, + "learning_rate": 3.1344751314708e-05, + "loss": 0.8042, + "step": 13677 + }, + { + "epoch": 2.18, + "learning_rate": 3.1342255851107606e-05, + "loss": 0.764, + "step": 13678 + }, + { + "epoch": 2.18, + "learning_rate": 3.133976031996604e-05, + "loss": 0.7757, + "step": 13679 + }, + { + "epoch": 2.18, + "learning_rate": 3.133726472130987e-05, + "loss": 0.7745, + "step": 13680 + }, + { + "epoch": 2.18, + "learning_rate": 3.133476905516568e-05, + "loss": 0.8016, + "step": 13681 + }, + { + "epoch": 2.18, + "learning_rate": 3.1332273321560044e-05, + "loss": 0.7756, + "step": 13682 + }, + { + "epoch": 2.18, + "learning_rate": 3.1329777520519544e-05, + "loss": 0.7685, + "step": 13683 + }, + { + "epoch": 2.18, + "learning_rate": 3.132728165207075e-05, + "loss": 0.7007, + "step": 13684 + }, + { + "epoch": 2.18, + "learning_rate": 3.132478571624026e-05, + "loss": 0.6889, + "step": 13685 + }, + { + "epoch": 2.18, + "learning_rate": 3.1322289713054636e-05, + "loss": 0.7266, + "step": 13686 + }, + { + "epoch": 2.18, + "learning_rate": 3.131979364254047e-05, + "loss": 0.7467, + "step": 13687 + }, + { + "epoch": 2.18, + "learning_rate": 3.131729750472433e-05, + "loss": 0.7192, + "step": 13688 + }, + { + "epoch": 2.18, + "learning_rate": 3.1314801299632806e-05, + "loss": 0.6592, + "step": 13689 + }, + { + "epoch": 2.18, + "learning_rate": 3.131230502729249e-05, + "loss": 0.736, + "step": 13690 + }, + { + "epoch": 2.18, + "learning_rate": 3.1309808687729955e-05, + "loss": 0.7784, + "step": 13691 + }, + { + "epoch": 2.18, + "learning_rate": 3.130731228097179e-05, + "loss": 0.8234, + "step": 13692 + }, + { + "epoch": 2.18, + "learning_rate": 3.130481580704457e-05, + "loss": 0.7811, + "step": 13693 + }, + { + "epoch": 2.18, + "learning_rate": 3.13023192659749e-05, + "loss": 0.7456, + "step": 13694 + }, + { + "epoch": 2.18, + "learning_rate": 3.129982265778935e-05, + "loss": 0.7441, + "step": 13695 + }, + { + "epoch": 2.18, + "learning_rate": 3.1297325982514516e-05, + "loss": 0.7202, + "step": 13696 + }, + { + "epoch": 2.18, + "learning_rate": 3.129482924017698e-05, + "loss": 0.7483, + "step": 13697 + }, + { + "epoch": 2.18, + "learning_rate": 3.1292332430803335e-05, + "loss": 0.6912, + "step": 13698 + }, + { + "epoch": 2.18, + "learning_rate": 3.1289835554420166e-05, + "loss": 0.7282, + "step": 13699 + }, + { + "epoch": 2.18, + "learning_rate": 3.128733861105407e-05, + "loss": 0.7456, + "step": 13700 + }, + { + "epoch": 2.18, + "learning_rate": 3.128484160073164e-05, + "loss": 0.7144, + "step": 13701 + }, + { + "epoch": 2.18, + "learning_rate": 3.1282344523479464e-05, + "loss": 0.7223, + "step": 13702 + }, + { + "epoch": 2.18, + "learning_rate": 3.127984737932412e-05, + "loss": 0.7746, + "step": 13703 + }, + { + "epoch": 2.18, + "learning_rate": 3.127735016829222e-05, + "loss": 0.7168, + "step": 13704 + }, + { + "epoch": 2.18, + "learning_rate": 3.127485289041036e-05, + "loss": 0.7927, + "step": 13705 + }, + { + "epoch": 2.18, + "learning_rate": 3.127235554570513e-05, + "loss": 0.7636, + "step": 13706 + }, + { + "epoch": 2.18, + "learning_rate": 3.126985813420311e-05, + "loss": 0.7015, + "step": 13707 + }, + { + "epoch": 2.18, + "learning_rate": 3.1267360655930905e-05, + "loss": 0.7018, + "step": 13708 + }, + { + "epoch": 2.18, + "learning_rate": 3.126486311091512e-05, + "loss": 0.7372, + "step": 13709 + }, + { + "epoch": 2.18, + "learning_rate": 3.126236549918234e-05, + "loss": 0.79, + "step": 13710 + }, + { + "epoch": 2.18, + "learning_rate": 3.1259867820759176e-05, + "loss": 0.7377, + "step": 13711 + }, + { + "epoch": 2.18, + "learning_rate": 3.1257370075672224e-05, + "loss": 0.6721, + "step": 13712 + }, + { + "epoch": 2.18, + "learning_rate": 3.1254872263948065e-05, + "loss": 0.8035, + "step": 13713 + }, + { + "epoch": 2.18, + "learning_rate": 3.125237438561332e-05, + "loss": 0.7258, + "step": 13714 + }, + { + "epoch": 2.18, + "learning_rate": 3.1249876440694595e-05, + "loss": 0.6775, + "step": 13715 + }, + { + "epoch": 2.19, + "learning_rate": 3.124737842921847e-05, + "loss": 0.9004, + "step": 13716 + }, + { + "epoch": 2.19, + "learning_rate": 3.124488035121156e-05, + "loss": 0.7424, + "step": 13717 + }, + { + "epoch": 2.19, + "learning_rate": 3.124238220670046e-05, + "loss": 0.7533, + "step": 13718 + }, + { + "epoch": 2.19, + "learning_rate": 3.1239883995711786e-05, + "loss": 0.7908, + "step": 13719 + }, + { + "epoch": 2.19, + "learning_rate": 3.123738571827214e-05, + "loss": 0.7614, + "step": 13720 + }, + { + "epoch": 2.19, + "learning_rate": 3.123488737440811e-05, + "loss": 0.7084, + "step": 13721 + }, + { + "epoch": 2.19, + "learning_rate": 3.123238896414633e-05, + "loss": 0.7378, + "step": 13722 + }, + { + "epoch": 2.19, + "learning_rate": 3.122989048751338e-05, + "loss": 0.7306, + "step": 13723 + }, + { + "epoch": 2.19, + "learning_rate": 3.122739194453588e-05, + "loss": 0.7153, + "step": 13724 + }, + { + "epoch": 2.19, + "learning_rate": 3.1224893335240435e-05, + "loss": 0.7622, + "step": 13725 + }, + { + "epoch": 2.19, + "learning_rate": 3.1222394659653665e-05, + "loss": 0.6911, + "step": 13726 + }, + { + "epoch": 2.19, + "learning_rate": 3.121989591780215e-05, + "loss": 0.7729, + "step": 13727 + }, + { + "epoch": 2.19, + "learning_rate": 3.121739710971254e-05, + "loss": 0.7907, + "step": 13728 + }, + { + "epoch": 2.19, + "learning_rate": 3.121489823541142e-05, + "loss": 0.696, + "step": 13729 + }, + { + "epoch": 2.19, + "learning_rate": 3.1212399294925404e-05, + "loss": 0.7552, + "step": 13730 + }, + { + "epoch": 2.19, + "learning_rate": 3.1209900288281105e-05, + "loss": 0.6701, + "step": 13731 + }, + { + "epoch": 2.19, + "learning_rate": 3.120740121550514e-05, + "loss": 0.6811, + "step": 13732 + }, + { + "epoch": 2.19, + "learning_rate": 3.120490207662412e-05, + "loss": 0.7216, + "step": 13733 + }, + { + "epoch": 2.19, + "learning_rate": 3.120240287166467e-05, + "loss": 0.8122, + "step": 13734 + }, + { + "epoch": 2.19, + "learning_rate": 3.1199903600653385e-05, + "loss": 0.7751, + "step": 13735 + }, + { + "epoch": 2.19, + "learning_rate": 3.119740426361689e-05, + "loss": 0.8143, + "step": 13736 + }, + { + "epoch": 2.19, + "learning_rate": 3.119490486058181e-05, + "loss": 0.7858, + "step": 13737 + }, + { + "epoch": 2.19, + "learning_rate": 3.119240539157474e-05, + "loss": 0.7664, + "step": 13738 + }, + { + "epoch": 2.19, + "learning_rate": 3.118990585662233e-05, + "loss": 0.7567, + "step": 13739 + }, + { + "epoch": 2.19, + "learning_rate": 3.1187406255751165e-05, + "loss": 0.794, + "step": 13740 + }, + { + "epoch": 2.19, + "learning_rate": 3.118490658898789e-05, + "loss": 0.7571, + "step": 13741 + }, + { + "epoch": 2.19, + "learning_rate": 3.118240685635911e-05, + "loss": 0.7219, + "step": 13742 + }, + { + "epoch": 2.19, + "learning_rate": 3.117990705789145e-05, + "loss": 0.7507, + "step": 13743 + }, + { + "epoch": 2.19, + "learning_rate": 3.117740719361153e-05, + "loss": 0.7473, + "step": 13744 + }, + { + "epoch": 2.19, + "learning_rate": 3.1174907263545985e-05, + "loss": 0.7049, + "step": 13745 + }, + { + "epoch": 2.19, + "learning_rate": 3.117240726772142e-05, + "loss": 0.7135, + "step": 13746 + }, + { + "epoch": 2.19, + "learning_rate": 3.116990720616446e-05, + "loss": 0.7197, + "step": 13747 + }, + { + "epoch": 2.19, + "learning_rate": 3.116740707890174e-05, + "loss": 0.8381, + "step": 13748 + }, + { + "epoch": 2.19, + "learning_rate": 3.116490688595988e-05, + "loss": 0.8877, + "step": 13749 + }, + { + "epoch": 2.19, + "learning_rate": 3.116240662736549e-05, + "loss": 0.7639, + "step": 13750 + }, + { + "epoch": 2.19, + "learning_rate": 3.115990630314523e-05, + "loss": 0.6803, + "step": 13751 + }, + { + "epoch": 2.19, + "learning_rate": 3.11574059133257e-05, + "loss": 0.7615, + "step": 13752 + }, + { + "epoch": 2.19, + "learning_rate": 3.1154905457933525e-05, + "loss": 0.6946, + "step": 13753 + }, + { + "epoch": 2.19, + "learning_rate": 3.115240493699536e-05, + "loss": 0.7489, + "step": 13754 + }, + { + "epoch": 2.19, + "learning_rate": 3.114990435053781e-05, + "loss": 0.7052, + "step": 13755 + }, + { + "epoch": 2.19, + "learning_rate": 3.114740369858751e-05, + "loss": 0.7052, + "step": 13756 + }, + { + "epoch": 2.19, + "learning_rate": 3.1144902981171103e-05, + "loss": 0.7939, + "step": 13757 + }, + { + "epoch": 2.19, + "learning_rate": 3.11424021983152e-05, + "loss": 0.7968, + "step": 13758 + }, + { + "epoch": 2.19, + "learning_rate": 3.113990135004645e-05, + "loss": 0.7451, + "step": 13759 + }, + { + "epoch": 2.19, + "learning_rate": 3.1137400436391474e-05, + "loss": 0.7534, + "step": 13760 + }, + { + "epoch": 2.19, + "learning_rate": 3.113489945737691e-05, + "loss": 0.7107, + "step": 13761 + }, + { + "epoch": 2.19, + "learning_rate": 3.113239841302939e-05, + "loss": 0.7452, + "step": 13762 + }, + { + "epoch": 2.19, + "learning_rate": 3.112989730337554e-05, + "loss": 0.707, + "step": 13763 + }, + { + "epoch": 2.19, + "learning_rate": 3.112739612844202e-05, + "loss": 0.7543, + "step": 13764 + }, + { + "epoch": 2.19, + "learning_rate": 3.1124894888255447e-05, + "loss": 0.7774, + "step": 13765 + }, + { + "epoch": 2.19, + "learning_rate": 3.112239358284246e-05, + "loss": 0.8337, + "step": 13766 + }, + { + "epoch": 2.19, + "learning_rate": 3.111989221222971e-05, + "loss": 0.7166, + "step": 13767 + }, + { + "epoch": 2.19, + "learning_rate": 3.111739077644381e-05, + "loss": 0.6896, + "step": 13768 + }, + { + "epoch": 2.19, + "learning_rate": 3.111488927551142e-05, + "loss": 0.7983, + "step": 13769 + }, + { + "epoch": 2.19, + "learning_rate": 3.1112387709459165e-05, + "loss": 0.7188, + "step": 13770 + }, + { + "epoch": 2.19, + "learning_rate": 3.110988607831369e-05, + "loss": 0.7432, + "step": 13771 + }, + { + "epoch": 2.19, + "learning_rate": 3.110738438210165e-05, + "loss": 0.7019, + "step": 13772 + }, + { + "epoch": 2.19, + "learning_rate": 3.110488262084966e-05, + "loss": 0.8501, + "step": 13773 + }, + { + "epoch": 2.19, + "learning_rate": 3.1102380794584383e-05, + "loss": 0.7802, + "step": 13774 + }, + { + "epoch": 2.19, + "learning_rate": 3.109987890333246e-05, + "loss": 0.7515, + "step": 13775 + }, + { + "epoch": 2.19, + "learning_rate": 3.109737694712052e-05, + "loss": 0.7836, + "step": 13776 + }, + { + "epoch": 2.19, + "learning_rate": 3.1094874925975234e-05, + "loss": 0.7388, + "step": 13777 + }, + { + "epoch": 2.19, + "learning_rate": 3.1092372839923214e-05, + "loss": 0.7996, + "step": 13778 + }, + { + "epoch": 2.2, + "learning_rate": 3.108987068899112e-05, + "loss": 0.7681, + "step": 13779 + }, + { + "epoch": 2.2, + "learning_rate": 3.108736847320561e-05, + "loss": 0.6848, + "step": 13780 + }, + { + "epoch": 2.2, + "learning_rate": 3.108486619259333e-05, + "loss": 0.734, + "step": 13781 + }, + { + "epoch": 2.2, + "learning_rate": 3.10823638471809e-05, + "loss": 0.8277, + "step": 13782 + }, + { + "epoch": 2.2, + "learning_rate": 3.1079861436995e-05, + "loss": 0.7348, + "step": 13783 + }, + { + "epoch": 2.2, + "learning_rate": 3.107735896206226e-05, + "loss": 0.7451, + "step": 13784 + }, + { + "epoch": 2.2, + "learning_rate": 3.1074856422409336e-05, + "loss": 0.7442, + "step": 13785 + }, + { + "epoch": 2.2, + "learning_rate": 3.1072353818062885e-05, + "loss": 0.6895, + "step": 13786 + }, + { + "epoch": 2.2, + "learning_rate": 3.106985114904955e-05, + "loss": 0.7955, + "step": 13787 + }, + { + "epoch": 2.2, + "learning_rate": 3.106734841539599e-05, + "loss": 0.7344, + "step": 13788 + }, + { + "epoch": 2.2, + "learning_rate": 3.106484561712884e-05, + "loss": 0.8107, + "step": 13789 + }, + { + "epoch": 2.2, + "learning_rate": 3.106234275427477e-05, + "loss": 0.778, + "step": 13790 + }, + { + "epoch": 2.2, + "learning_rate": 3.105983982686044e-05, + "loss": 0.803, + "step": 13791 + }, + { + "epoch": 2.2, + "learning_rate": 3.105733683491249e-05, + "loss": 0.7341, + "step": 13792 + }, + { + "epoch": 2.2, + "learning_rate": 3.105483377845758e-05, + "loss": 0.7894, + "step": 13793 + }, + { + "epoch": 2.2, + "learning_rate": 3.1052330657522366e-05, + "loss": 0.684, + "step": 13794 + }, + { + "epoch": 2.2, + "learning_rate": 3.104982747213351e-05, + "loss": 0.7212, + "step": 13795 + }, + { + "epoch": 2.2, + "learning_rate": 3.104732422231765e-05, + "loss": 0.7329, + "step": 13796 + }, + { + "epoch": 2.2, + "learning_rate": 3.104482090810148e-05, + "loss": 0.738, + "step": 13797 + }, + { + "epoch": 2.2, + "learning_rate": 3.104231752951163e-05, + "loss": 0.8012, + "step": 13798 + }, + { + "epoch": 2.2, + "learning_rate": 3.1039814086574755e-05, + "loss": 0.7738, + "step": 13799 + }, + { + "epoch": 2.2, + "learning_rate": 3.1037310579317535e-05, + "loss": 0.7878, + "step": 13800 + }, + { + "epoch": 2.2, + "learning_rate": 3.103480700776662e-05, + "loss": 0.7528, + "step": 13801 + }, + { + "epoch": 2.2, + "learning_rate": 3.103230337194867e-05, + "loss": 0.6877, + "step": 13802 + }, + { + "epoch": 2.2, + "learning_rate": 3.102979967189036e-05, + "loss": 0.8128, + "step": 13803 + }, + { + "epoch": 2.2, + "learning_rate": 3.102729590761835e-05, + "loss": 0.7091, + "step": 13804 + }, + { + "epoch": 2.2, + "learning_rate": 3.102479207915928e-05, + "loss": 0.7676, + "step": 13805 + }, + { + "epoch": 2.2, + "learning_rate": 3.102228818653985e-05, + "loss": 0.8049, + "step": 13806 + }, + { + "epoch": 2.2, + "learning_rate": 3.101978422978669e-05, + "loss": 0.7783, + "step": 13807 + }, + { + "epoch": 2.2, + "learning_rate": 3.1017280208926494e-05, + "loss": 0.6794, + "step": 13808 + }, + { + "epoch": 2.2, + "learning_rate": 3.101477612398592e-05, + "loss": 0.7148, + "step": 13809 + }, + { + "epoch": 2.2, + "learning_rate": 3.101227197499162e-05, + "loss": 0.6834, + "step": 13810 + }, + { + "epoch": 2.2, + "learning_rate": 3.100976776197028e-05, + "loss": 0.8209, + "step": 13811 + }, + { + "epoch": 2.2, + "learning_rate": 3.1007263484948564e-05, + "loss": 0.7384, + "step": 13812 + }, + { + "epoch": 2.2, + "learning_rate": 3.1004759143953134e-05, + "loss": 0.7728, + "step": 13813 + }, + { + "epoch": 2.2, + "learning_rate": 3.1002254739010667e-05, + "loss": 0.6492, + "step": 13814 + }, + { + "epoch": 2.2, + "learning_rate": 3.0999750270147834e-05, + "loss": 0.7939, + "step": 13815 + }, + { + "epoch": 2.2, + "learning_rate": 3.09972457373913e-05, + "loss": 0.708, + "step": 13816 + }, + { + "epoch": 2.2, + "learning_rate": 3.0994741140767744e-05, + "loss": 0.7579, + "step": 13817 + }, + { + "epoch": 2.2, + "learning_rate": 3.099223648030383e-05, + "loss": 0.727, + "step": 13818 + }, + { + "epoch": 2.2, + "learning_rate": 3.098973175602623e-05, + "loss": 0.7179, + "step": 13819 + }, + { + "epoch": 2.2, + "learning_rate": 3.098722696796163e-05, + "loss": 0.7882, + "step": 13820 + }, + { + "epoch": 2.2, + "learning_rate": 3.09847221161367e-05, + "loss": 0.6654, + "step": 13821 + }, + { + "epoch": 2.2, + "learning_rate": 3.098221720057811e-05, + "loss": 0.7296, + "step": 13822 + }, + { + "epoch": 2.2, + "learning_rate": 3.0979712221312535e-05, + "loss": 0.7966, + "step": 13823 + }, + { + "epoch": 2.2, + "learning_rate": 3.097720717836666e-05, + "loss": 0.762, + "step": 13824 + }, + { + "epoch": 2.2, + "learning_rate": 3.0974702071767156e-05, + "loss": 0.761, + "step": 13825 + }, + { + "epoch": 2.2, + "learning_rate": 3.09721969015407e-05, + "loss": 0.8073, + "step": 13826 + }, + { + "epoch": 2.2, + "learning_rate": 3.0969691667713976e-05, + "loss": 0.832, + "step": 13827 + }, + { + "epoch": 2.2, + "learning_rate": 3.096718637031366e-05, + "loss": 0.7363, + "step": 13828 + }, + { + "epoch": 2.2, + "learning_rate": 3.0964681009366436e-05, + "loss": 0.7864, + "step": 13829 + }, + { + "epoch": 2.2, + "learning_rate": 3.0962175584898976e-05, + "loss": 0.8169, + "step": 13830 + }, + { + "epoch": 2.2, + "learning_rate": 3.095967009693796e-05, + "loss": 0.7366, + "step": 13831 + }, + { + "epoch": 2.2, + "learning_rate": 3.095716454551008e-05, + "loss": 0.82, + "step": 13832 + }, + { + "epoch": 2.2, + "learning_rate": 3.095465893064202e-05, + "loss": 0.7798, + "step": 13833 + }, + { + "epoch": 2.2, + "learning_rate": 3.095215325236045e-05, + "loss": 0.7739, + "step": 13834 + }, + { + "epoch": 2.2, + "learning_rate": 3.094964751069206e-05, + "loss": 0.6896, + "step": 13835 + }, + { + "epoch": 2.2, + "learning_rate": 3.094714170566355e-05, + "loss": 0.7603, + "step": 13836 + }, + { + "epoch": 2.2, + "learning_rate": 3.094463583730158e-05, + "loss": 0.7553, + "step": 13837 + }, + { + "epoch": 2.2, + "learning_rate": 3.094212990563285e-05, + "loss": 0.7625, + "step": 13838 + }, + { + "epoch": 2.2, + "learning_rate": 3.093962391068404e-05, + "loss": 0.6642, + "step": 13839 + }, + { + "epoch": 2.2, + "learning_rate": 3.093711785248185e-05, + "loss": 0.8189, + "step": 13840 + }, + { + "epoch": 2.21, + "learning_rate": 3.093461173105295e-05, + "loss": 0.7439, + "step": 13841 + }, + { + "epoch": 2.21, + "learning_rate": 3.093210554642404e-05, + "loss": 0.7677, + "step": 13842 + }, + { + "epoch": 2.21, + "learning_rate": 3.092959929862181e-05, + "loss": 0.7255, + "step": 13843 + }, + { + "epoch": 2.21, + "learning_rate": 3.092709298767295e-05, + "loss": 0.6739, + "step": 13844 + }, + { + "epoch": 2.21, + "learning_rate": 3.092458661360414e-05, + "loss": 0.7457, + "step": 13845 + }, + { + "epoch": 2.21, + "learning_rate": 3.0922080176442084e-05, + "loss": 0.8521, + "step": 13846 + }, + { + "epoch": 2.21, + "learning_rate": 3.091957367621348e-05, + "loss": 0.7546, + "step": 13847 + }, + { + "epoch": 2.21, + "learning_rate": 3.0917067112945e-05, + "loss": 0.7153, + "step": 13848 + }, + { + "epoch": 2.21, + "learning_rate": 3.091456048666334e-05, + "loss": 0.6922, + "step": 13849 + }, + { + "epoch": 2.21, + "learning_rate": 3.091205379739522e-05, + "loss": 0.6855, + "step": 13850 + }, + { + "epoch": 2.21, + "learning_rate": 3.0909547045167295e-05, + "loss": 0.7941, + "step": 13851 + }, + { + "epoch": 2.21, + "learning_rate": 3.09070402300063e-05, + "loss": 0.7406, + "step": 13852 + }, + { + "epoch": 2.21, + "learning_rate": 3.09045333519389e-05, + "loss": 0.7339, + "step": 13853 + }, + { + "epoch": 2.21, + "learning_rate": 3.090202641099181e-05, + "loss": 0.757, + "step": 13854 + }, + { + "epoch": 2.21, + "learning_rate": 3.0899519407191726e-05, + "loss": 0.7079, + "step": 13855 + }, + { + "epoch": 2.21, + "learning_rate": 3.089701234056534e-05, + "loss": 0.712, + "step": 13856 + }, + { + "epoch": 2.21, + "learning_rate": 3.089450521113935e-05, + "loss": 0.6943, + "step": 13857 + }, + { + "epoch": 2.21, + "learning_rate": 3.089199801894046e-05, + "loss": 0.7533, + "step": 13858 + }, + { + "epoch": 2.21, + "learning_rate": 3.088949076399537e-05, + "loss": 0.6684, + "step": 13859 + }, + { + "epoch": 2.21, + "learning_rate": 3.088698344633077e-05, + "loss": 0.7811, + "step": 13860 + }, + { + "epoch": 2.21, + "learning_rate": 3.088447606597338e-05, + "loss": 0.7174, + "step": 13861 + }, + { + "epoch": 2.21, + "learning_rate": 3.0881968622949886e-05, + "loss": 0.7281, + "step": 13862 + }, + { + "epoch": 2.21, + "learning_rate": 3.0879461117287e-05, + "loss": 0.7553, + "step": 13863 + }, + { + "epoch": 2.21, + "learning_rate": 3.0876953549011426e-05, + "loss": 0.7691, + "step": 13864 + }, + { + "epoch": 2.21, + "learning_rate": 3.087444591814986e-05, + "loss": 0.7179, + "step": 13865 + }, + { + "epoch": 2.21, + "learning_rate": 3.087193822472901e-05, + "loss": 0.7002, + "step": 13866 + }, + { + "epoch": 2.21, + "learning_rate": 3.086943046877559e-05, + "loss": 0.6736, + "step": 13867 + }, + { + "epoch": 2.21, + "learning_rate": 3.0866922650316295e-05, + "loss": 0.7128, + "step": 13868 + }, + { + "epoch": 2.21, + "learning_rate": 3.0864414769377834e-05, + "loss": 0.7018, + "step": 13869 + }, + { + "epoch": 2.21, + "learning_rate": 3.0861906825986916e-05, + "loss": 0.7817, + "step": 13870 + }, + { + "epoch": 2.21, + "learning_rate": 3.085939882017025e-05, + "loss": 0.7358, + "step": 13871 + }, + { + "epoch": 2.21, + "learning_rate": 3.085689075195454e-05, + "loss": 0.7386, + "step": 13872 + }, + { + "epoch": 2.21, + "learning_rate": 3.085438262136651e-05, + "loss": 0.7314, + "step": 13873 + }, + { + "epoch": 2.21, + "learning_rate": 3.085187442843285e-05, + "loss": 0.7702, + "step": 13874 + }, + { + "epoch": 2.21, + "learning_rate": 3.084936617318028e-05, + "loss": 0.7343, + "step": 13875 + }, + { + "epoch": 2.21, + "learning_rate": 3.084685785563552e-05, + "loss": 0.7204, + "step": 13876 + }, + { + "epoch": 2.21, + "learning_rate": 3.0844349475825264e-05, + "loss": 0.703, + "step": 13877 + }, + { + "epoch": 2.21, + "learning_rate": 3.0841841033776244e-05, + "loss": 0.7649, + "step": 13878 + }, + { + "epoch": 2.21, + "learning_rate": 3.083933252951516e-05, + "loss": 0.7187, + "step": 13879 + }, + { + "epoch": 2.21, + "learning_rate": 3.083682396306872e-05, + "loss": 0.7596, + "step": 13880 + }, + { + "epoch": 2.21, + "learning_rate": 3.0834315334463656e-05, + "loss": 0.7683, + "step": 13881 + }, + { + "epoch": 2.21, + "learning_rate": 3.0831806643726685e-05, + "loss": 0.7111, + "step": 13882 + }, + { + "epoch": 2.21, + "learning_rate": 3.08292978908845e-05, + "loss": 0.7001, + "step": 13883 + }, + { + "epoch": 2.21, + "learning_rate": 3.082678907596384e-05, + "loss": 0.7855, + "step": 13884 + }, + { + "epoch": 2.21, + "learning_rate": 3.0824280198991405e-05, + "loss": 0.7199, + "step": 13885 + }, + { + "epoch": 2.21, + "learning_rate": 3.082177125999393e-05, + "loss": 0.8732, + "step": 13886 + }, + { + "epoch": 2.21, + "learning_rate": 3.081926225899812e-05, + "loss": 0.847, + "step": 13887 + }, + { + "epoch": 2.21, + "learning_rate": 3.08167531960307e-05, + "loss": 0.7538, + "step": 13888 + }, + { + "epoch": 2.21, + "learning_rate": 3.08142440711184e-05, + "loss": 0.7549, + "step": 13889 + }, + { + "epoch": 2.21, + "learning_rate": 3.0811734884287924e-05, + "loss": 0.699, + "step": 13890 + }, + { + "epoch": 2.21, + "learning_rate": 3.080922563556599e-05, + "loss": 0.6876, + "step": 13891 + }, + { + "epoch": 2.21, + "learning_rate": 3.0806716324979343e-05, + "loss": 0.7105, + "step": 13892 + }, + { + "epoch": 2.21, + "learning_rate": 3.0804206952554694e-05, + "loss": 0.6987, + "step": 13893 + }, + { + "epoch": 2.21, + "learning_rate": 3.080169751831875e-05, + "loss": 0.6615, + "step": 13894 + }, + { + "epoch": 2.21, + "learning_rate": 3.079918802229826e-05, + "loss": 0.7362, + "step": 13895 + }, + { + "epoch": 2.21, + "learning_rate": 3.079667846451994e-05, + "loss": 0.842, + "step": 13896 + }, + { + "epoch": 2.21, + "learning_rate": 3.079416884501051e-05, + "loss": 0.7217, + "step": 13897 + }, + { + "epoch": 2.21, + "learning_rate": 3.0791659163796704e-05, + "loss": 0.7891, + "step": 13898 + }, + { + "epoch": 2.21, + "learning_rate": 3.078914942090524e-05, + "loss": 0.7642, + "step": 13899 + }, + { + "epoch": 2.21, + "learning_rate": 3.0786639616362844e-05, + "loss": 0.6902, + "step": 13900 + }, + { + "epoch": 2.21, + "learning_rate": 3.078412975019626e-05, + "loss": 0.763, + "step": 13901 + }, + { + "epoch": 2.21, + "learning_rate": 3.07816198224322e-05, + "loss": 0.7581, + "step": 13902 + }, + { + "epoch": 2.21, + "learning_rate": 3.0779109833097395e-05, + "loss": 0.7239, + "step": 13903 + }, + { + "epoch": 2.22, + "learning_rate": 3.0776599782218586e-05, + "loss": 0.7402, + "step": 13904 + }, + { + "epoch": 2.22, + "learning_rate": 3.07740896698225e-05, + "loss": 0.7384, + "step": 13905 + }, + { + "epoch": 2.22, + "learning_rate": 3.077157949593585e-05, + "loss": 0.7191, + "step": 13906 + }, + { + "epoch": 2.22, + "learning_rate": 3.0769069260585395e-05, + "loss": 0.8333, + "step": 13907 + }, + { + "epoch": 2.22, + "learning_rate": 3.0766558963797845e-05, + "loss": 0.6899, + "step": 13908 + }, + { + "epoch": 2.22, + "learning_rate": 3.076404860559995e-05, + "loss": 0.7557, + "step": 13909 + }, + { + "epoch": 2.22, + "learning_rate": 3.076153818601844e-05, + "loss": 0.6677, + "step": 13910 + }, + { + "epoch": 2.22, + "learning_rate": 3.075902770508004e-05, + "loss": 0.7254, + "step": 13911 + }, + { + "epoch": 2.22, + "learning_rate": 3.075651716281149e-05, + "loss": 0.7253, + "step": 13912 + }, + { + "epoch": 2.22, + "learning_rate": 3.075400655923953e-05, + "loss": 0.6947, + "step": 13913 + }, + { + "epoch": 2.22, + "learning_rate": 3.0751495894390896e-05, + "loss": 0.6942, + "step": 13914 + }, + { + "epoch": 2.22, + "learning_rate": 3.074898516829232e-05, + "loss": 0.7722, + "step": 13915 + }, + { + "epoch": 2.22, + "learning_rate": 3.074647438097055e-05, + "loss": 0.7107, + "step": 13916 + }, + { + "epoch": 2.22, + "learning_rate": 3.0743963532452305e-05, + "loss": 0.7719, + "step": 13917 + }, + { + "epoch": 2.22, + "learning_rate": 3.074145262276434e-05, + "loss": 0.7035, + "step": 13918 + }, + { + "epoch": 2.22, + "learning_rate": 3.073894165193339e-05, + "loss": 0.7353, + "step": 13919 + }, + { + "epoch": 2.22, + "learning_rate": 3.0736430619986194e-05, + "loss": 0.7855, + "step": 13920 + }, + { + "epoch": 2.22, + "learning_rate": 3.0733919526949496e-05, + "loss": 0.7884, + "step": 13921 + }, + { + "epoch": 2.22, + "learning_rate": 3.073140837285004e-05, + "loss": 0.6927, + "step": 13922 + }, + { + "epoch": 2.22, + "learning_rate": 3.0728897157714555e-05, + "loss": 0.7427, + "step": 13923 + }, + { + "epoch": 2.22, + "learning_rate": 3.0726385881569804e-05, + "loss": 0.6847, + "step": 13924 + }, + { + "epoch": 2.22, + "learning_rate": 3.072387454444251e-05, + "loss": 0.7769, + "step": 13925 + }, + { + "epoch": 2.22, + "learning_rate": 3.072136314635943e-05, + "loss": 0.6972, + "step": 13926 + }, + { + "epoch": 2.22, + "learning_rate": 3.071885168734731e-05, + "loss": 0.7129, + "step": 13927 + }, + { + "epoch": 2.22, + "learning_rate": 3.071634016743289e-05, + "loss": 0.7542, + "step": 13928 + }, + { + "epoch": 2.22, + "learning_rate": 3.0713828586642924e-05, + "loss": 0.6922, + "step": 13929 + }, + { + "epoch": 2.22, + "learning_rate": 3.071131694500414e-05, + "loss": 0.8171, + "step": 13930 + }, + { + "epoch": 2.22, + "learning_rate": 3.070880524254331e-05, + "loss": 0.7722, + "step": 13931 + }, + { + "epoch": 2.22, + "learning_rate": 3.0706293479287164e-05, + "loss": 0.7507, + "step": 13932 + }, + { + "epoch": 2.22, + "learning_rate": 3.0703781655262456e-05, + "loss": 0.7495, + "step": 13933 + }, + { + "epoch": 2.22, + "learning_rate": 3.070126977049594e-05, + "loss": 0.7838, + "step": 13934 + }, + { + "epoch": 2.22, + "learning_rate": 3.0698757825014354e-05, + "loss": 0.6978, + "step": 13935 + }, + { + "epoch": 2.22, + "learning_rate": 3.069624581884446e-05, + "loss": 0.7341, + "step": 13936 + }, + { + "epoch": 2.22, + "learning_rate": 3.069373375201301e-05, + "loss": 0.7757, + "step": 13937 + }, + { + "epoch": 2.22, + "learning_rate": 3.069122162454675e-05, + "loss": 0.7444, + "step": 13938 + }, + { + "epoch": 2.22, + "learning_rate": 3.068870943647244e-05, + "loss": 0.6983, + "step": 13939 + }, + { + "epoch": 2.22, + "learning_rate": 3.0686197187816826e-05, + "loss": 0.752, + "step": 13940 + }, + { + "epoch": 2.22, + "learning_rate": 3.068368487860666e-05, + "loss": 0.7015, + "step": 13941 + }, + { + "epoch": 2.22, + "learning_rate": 3.06811725088687e-05, + "loss": 0.7424, + "step": 13942 + }, + { + "epoch": 2.22, + "learning_rate": 3.06786600786297e-05, + "loss": 0.6359, + "step": 13943 + }, + { + "epoch": 2.22, + "learning_rate": 3.0676147587916425e-05, + "loss": 0.7302, + "step": 13944 + }, + { + "epoch": 2.22, + "learning_rate": 3.0673635036755626e-05, + "loss": 0.7137, + "step": 13945 + }, + { + "epoch": 2.22, + "learning_rate": 3.067112242517405e-05, + "loss": 0.6689, + "step": 13946 + }, + { + "epoch": 2.22, + "learning_rate": 3.0668609753198466e-05, + "loss": 0.7274, + "step": 13947 + }, + { + "epoch": 2.22, + "learning_rate": 3.066609702085564e-05, + "loss": 0.7003, + "step": 13948 + }, + { + "epoch": 2.22, + "learning_rate": 3.0663584228172304e-05, + "loss": 0.7301, + "step": 13949 + }, + { + "epoch": 2.22, + "learning_rate": 3.0661071375175244e-05, + "loss": 0.7863, + "step": 13950 + }, + { + "epoch": 2.22, + "learning_rate": 3.065855846189121e-05, + "loss": 0.7264, + "step": 13951 + }, + { + "epoch": 2.22, + "learning_rate": 3.065604548834696e-05, + "loss": 0.7512, + "step": 13952 + }, + { + "epoch": 2.22, + "learning_rate": 3.0653532454569265e-05, + "loss": 0.7577, + "step": 13953 + }, + { + "epoch": 2.22, + "learning_rate": 3.065101936058488e-05, + "loss": 0.728, + "step": 13954 + }, + { + "epoch": 2.22, + "learning_rate": 3.064850620642056e-05, + "loss": 0.7662, + "step": 13955 + }, + { + "epoch": 2.22, + "learning_rate": 3.064599299210309e-05, + "loss": 0.7362, + "step": 13956 + }, + { + "epoch": 2.22, + "learning_rate": 3.0643479717659223e-05, + "loss": 0.7464, + "step": 13957 + }, + { + "epoch": 2.22, + "learning_rate": 3.064096638311572e-05, + "loss": 0.7974, + "step": 13958 + }, + { + "epoch": 2.22, + "learning_rate": 3.063845298849935e-05, + "loss": 0.7073, + "step": 13959 + }, + { + "epoch": 2.22, + "learning_rate": 3.063593953383687e-05, + "loss": 0.7227, + "step": 13960 + }, + { + "epoch": 2.22, + "learning_rate": 3.063342601915507e-05, + "loss": 0.6343, + "step": 13961 + }, + { + "epoch": 2.22, + "learning_rate": 3.06309124444807e-05, + "loss": 0.7786, + "step": 13962 + }, + { + "epoch": 2.22, + "learning_rate": 3.062839880984052e-05, + "loss": 0.7468, + "step": 13963 + }, + { + "epoch": 2.22, + "learning_rate": 3.062588511526132e-05, + "loss": 0.8027, + "step": 13964 + }, + { + "epoch": 2.22, + "learning_rate": 3.062337136076986e-05, + "loss": 0.7469, + "step": 13965 + }, + { + "epoch": 2.22, + "learning_rate": 3.06208575463929e-05, + "loss": 0.7031, + "step": 13966 + }, + { + "epoch": 2.23, + "learning_rate": 3.0618343672157224e-05, + "loss": 0.779, + "step": 13967 + }, + { + "epoch": 2.23, + "learning_rate": 3.06158297380896e-05, + "loss": 0.7576, + "step": 13968 + }, + { + "epoch": 2.23, + "learning_rate": 3.06133157442168e-05, + "loss": 0.7097, + "step": 13969 + }, + { + "epoch": 2.23, + "learning_rate": 3.06108016905656e-05, + "loss": 0.7155, + "step": 13970 + }, + { + "epoch": 2.23, + "learning_rate": 3.0608287577162755e-05, + "loss": 0.7235, + "step": 13971 + }, + { + "epoch": 2.23, + "learning_rate": 3.060577340403506e-05, + "loss": 0.7007, + "step": 13972 + }, + { + "epoch": 2.23, + "learning_rate": 3.060325917120928e-05, + "loss": 0.7321, + "step": 13973 + }, + { + "epoch": 2.23, + "learning_rate": 3.06007448787122e-05, + "loss": 0.7208, + "step": 13974 + }, + { + "epoch": 2.23, + "learning_rate": 3.059823052657057e-05, + "loss": 0.7331, + "step": 13975 + }, + { + "epoch": 2.23, + "learning_rate": 3.0595716114811196e-05, + "loss": 0.6646, + "step": 13976 + }, + { + "epoch": 2.23, + "learning_rate": 3.059320164346084e-05, + "loss": 0.7091, + "step": 13977 + }, + { + "epoch": 2.23, + "learning_rate": 3.059068711254629e-05, + "loss": 0.8036, + "step": 13978 + }, + { + "epoch": 2.23, + "learning_rate": 3.0588172522094313e-05, + "loss": 0.695, + "step": 13979 + }, + { + "epoch": 2.23, + "learning_rate": 3.058565787213168e-05, + "loss": 0.7135, + "step": 13980 + }, + { + "epoch": 2.23, + "learning_rate": 3.058314316268519e-05, + "loss": 0.7367, + "step": 13981 + }, + { + "epoch": 2.23, + "learning_rate": 3.058062839378161e-05, + "loss": 0.7448, + "step": 13982 + }, + { + "epoch": 2.23, + "learning_rate": 3.0578113565447733e-05, + "loss": 0.7616, + "step": 13983 + }, + { + "epoch": 2.23, + "learning_rate": 3.057559867771033e-05, + "loss": 0.6967, + "step": 13984 + }, + { + "epoch": 2.23, + "learning_rate": 3.057308373059619e-05, + "loss": 0.7059, + "step": 13985 + }, + { + "epoch": 2.23, + "learning_rate": 3.0570568724132086e-05, + "loss": 0.7272, + "step": 13986 + }, + { + "epoch": 2.23, + "learning_rate": 3.0568053658344806e-05, + "loss": 0.7472, + "step": 13987 + }, + { + "epoch": 2.23, + "learning_rate": 3.056553853326113e-05, + "loss": 0.7047, + "step": 13988 + }, + { + "epoch": 2.23, + "learning_rate": 3.056302334890786e-05, + "loss": 0.7668, + "step": 13989 + }, + { + "epoch": 2.23, + "learning_rate": 3.056050810531177e-05, + "loss": 0.7833, + "step": 13990 + }, + { + "epoch": 2.23, + "learning_rate": 3.0557992802499634e-05, + "loss": 0.7804, + "step": 13991 + }, + { + "epoch": 2.23, + "learning_rate": 3.055547744049825e-05, + "loss": 0.7212, + "step": 13992 + }, + { + "epoch": 2.23, + "learning_rate": 3.0552962019334414e-05, + "loss": 0.748, + "step": 13993 + }, + { + "epoch": 2.23, + "learning_rate": 3.05504465390349e-05, + "loss": 0.7091, + "step": 13994 + }, + { + "epoch": 2.23, + "learning_rate": 3.054793099962649e-05, + "loss": 0.7017, + "step": 13995 + }, + { + "epoch": 2.23, + "learning_rate": 3.054541540113599e-05, + "loss": 0.6866, + "step": 13996 + }, + { + "epoch": 2.23, + "learning_rate": 3.0542899743590186e-05, + "loss": 0.74, + "step": 13997 + }, + { + "epoch": 2.23, + "learning_rate": 3.0540384027015865e-05, + "loss": 0.7474, + "step": 13998 + }, + { + "epoch": 2.23, + "learning_rate": 3.0537868251439816e-05, + "loss": 0.6898, + "step": 13999 + }, + { + "epoch": 2.23, + "learning_rate": 3.0535352416888835e-05, + "loss": 0.7533, + "step": 14000 + }, + { + "epoch": 2.23, + "learning_rate": 3.05328365233897e-05, + "loss": 0.7362, + "step": 14001 + }, + { + "epoch": 2.23, + "learning_rate": 3.0530320570969226e-05, + "loss": 0.6465, + "step": 14002 + }, + { + "epoch": 2.23, + "learning_rate": 3.052780455965419e-05, + "loss": 0.7545, + "step": 14003 + }, + { + "epoch": 2.23, + "learning_rate": 3.052528848947139e-05, + "loss": 0.8213, + "step": 14004 + }, + { + "epoch": 2.23, + "learning_rate": 3.052277236044763e-05, + "loss": 0.685, + "step": 14005 + }, + { + "epoch": 2.23, + "learning_rate": 3.0520256172609695e-05, + "loss": 0.8494, + "step": 14006 + }, + { + "epoch": 2.23, + "learning_rate": 3.0517739925984374e-05, + "loss": 0.7594, + "step": 14007 + }, + { + "epoch": 2.23, + "learning_rate": 3.051522362059848e-05, + "loss": 0.7542, + "step": 14008 + }, + { + "epoch": 2.23, + "learning_rate": 3.0512707256478808e-05, + "loss": 0.6968, + "step": 14009 + }, + { + "epoch": 2.23, + "learning_rate": 3.0510190833652148e-05, + "loss": 0.7082, + "step": 14010 + }, + { + "epoch": 2.23, + "learning_rate": 3.05076743521453e-05, + "loss": 0.7712, + "step": 14011 + }, + { + "epoch": 2.23, + "learning_rate": 3.050515781198505e-05, + "loss": 0.724, + "step": 14012 + }, + { + "epoch": 2.23, + "learning_rate": 3.050264121319823e-05, + "loss": 0.7254, + "step": 14013 + }, + { + "epoch": 2.23, + "learning_rate": 3.0500124555811614e-05, + "loss": 0.7804, + "step": 14014 + }, + { + "epoch": 2.23, + "learning_rate": 3.0497607839852012e-05, + "loss": 0.7331, + "step": 14015 + }, + { + "epoch": 2.23, + "learning_rate": 3.0495091065346228e-05, + "loss": 0.7152, + "step": 14016 + }, + { + "epoch": 2.23, + "learning_rate": 3.049257423232105e-05, + "loss": 0.7604, + "step": 14017 + }, + { + "epoch": 2.23, + "learning_rate": 3.0490057340803297e-05, + "loss": 0.7806, + "step": 14018 + }, + { + "epoch": 2.23, + "learning_rate": 3.0487540390819774e-05, + "loss": 0.7105, + "step": 14019 + }, + { + "epoch": 2.23, + "learning_rate": 3.048502338239727e-05, + "loss": 0.7542, + "step": 14020 + }, + { + "epoch": 2.23, + "learning_rate": 3.0482506315562597e-05, + "loss": 0.8332, + "step": 14021 + }, + { + "epoch": 2.23, + "learning_rate": 3.047998919034256e-05, + "loss": 0.7508, + "step": 14022 + }, + { + "epoch": 2.23, + "learning_rate": 3.047747200676397e-05, + "loss": 0.793, + "step": 14023 + }, + { + "epoch": 2.23, + "learning_rate": 3.047495476485362e-05, + "loss": 0.6992, + "step": 14024 + }, + { + "epoch": 2.23, + "learning_rate": 3.0472437464638332e-05, + "loss": 0.772, + "step": 14025 + }, + { + "epoch": 2.23, + "learning_rate": 3.0469920106144906e-05, + "loss": 0.7142, + "step": 14026 + }, + { + "epoch": 2.23, + "learning_rate": 3.0467402689400147e-05, + "loss": 0.7909, + "step": 14027 + }, + { + "epoch": 2.23, + "learning_rate": 3.0464885214430872e-05, + "loss": 0.681, + "step": 14028 + }, + { + "epoch": 2.23, + "learning_rate": 3.046236768126389e-05, + "loss": 0.7981, + "step": 14029 + }, + { + "epoch": 2.24, + "learning_rate": 3.045985008992601e-05, + "loss": 0.7318, + "step": 14030 + }, + { + "epoch": 2.24, + "learning_rate": 3.0457332440444036e-05, + "loss": 0.8041, + "step": 14031 + }, + { + "epoch": 2.24, + "learning_rate": 3.045481473284478e-05, + "loss": 0.7805, + "step": 14032 + }, + { + "epoch": 2.24, + "learning_rate": 3.045229696715507e-05, + "loss": 0.7196, + "step": 14033 + }, + { + "epoch": 2.24, + "learning_rate": 3.044977914340171e-05, + "loss": 0.7185, + "step": 14034 + }, + { + "epoch": 2.24, + "learning_rate": 3.04472612616115e-05, + "loss": 0.7382, + "step": 14035 + }, + { + "epoch": 2.24, + "learning_rate": 3.0444743321811264e-05, + "loss": 0.7243, + "step": 14036 + }, + { + "epoch": 2.24, + "learning_rate": 3.0442225324027822e-05, + "loss": 0.8626, + "step": 14037 + }, + { + "epoch": 2.24, + "learning_rate": 3.0439707268287988e-05, + "loss": 0.6962, + "step": 14038 + }, + { + "epoch": 2.24, + "learning_rate": 3.0437189154618574e-05, + "loss": 0.8583, + "step": 14039 + }, + { + "epoch": 2.24, + "learning_rate": 3.0434670983046394e-05, + "loss": 0.7141, + "step": 14040 + }, + { + "epoch": 2.24, + "learning_rate": 3.043215275359826e-05, + "loss": 0.7637, + "step": 14041 + }, + { + "epoch": 2.24, + "learning_rate": 3.0429634466301006e-05, + "loss": 0.7773, + "step": 14042 + }, + { + "epoch": 2.24, + "learning_rate": 3.042711612118144e-05, + "loss": 0.7599, + "step": 14043 + }, + { + "epoch": 2.24, + "learning_rate": 3.0424597718266374e-05, + "loss": 0.735, + "step": 14044 + }, + { + "epoch": 2.24, + "learning_rate": 3.042207925758265e-05, + "loss": 0.8031, + "step": 14045 + }, + { + "epoch": 2.24, + "learning_rate": 3.041956073915706e-05, + "loss": 0.7676, + "step": 14046 + }, + { + "epoch": 2.24, + "learning_rate": 3.041704216301644e-05, + "loss": 0.7345, + "step": 14047 + }, + { + "epoch": 2.24, + "learning_rate": 3.0414523529187616e-05, + "loss": 0.77, + "step": 14048 + }, + { + "epoch": 2.24, + "learning_rate": 3.041200483769741e-05, + "loss": 0.7151, + "step": 14049 + }, + { + "epoch": 2.24, + "learning_rate": 3.0409486088572625e-05, + "loss": 0.8348, + "step": 14050 + }, + { + "epoch": 2.24, + "learning_rate": 3.0406967281840105e-05, + "loss": 0.718, + "step": 14051 + }, + { + "epoch": 2.24, + "learning_rate": 3.0404448417526664e-05, + "loss": 0.6816, + "step": 14052 + }, + { + "epoch": 2.24, + "learning_rate": 3.0401929495659127e-05, + "loss": 0.7061, + "step": 14053 + }, + { + "epoch": 2.24, + "learning_rate": 3.039941051626432e-05, + "loss": 0.7443, + "step": 14054 + }, + { + "epoch": 2.24, + "learning_rate": 3.0396891479369073e-05, + "loss": 0.7408, + "step": 14055 + }, + { + "epoch": 2.24, + "learning_rate": 3.0394372385000203e-05, + "loss": 0.6685, + "step": 14056 + }, + { + "epoch": 2.24, + "learning_rate": 3.0391853233184546e-05, + "loss": 0.7187, + "step": 14057 + }, + { + "epoch": 2.24, + "learning_rate": 3.0389334023948922e-05, + "loss": 0.7133, + "step": 14058 + }, + { + "epoch": 2.24, + "learning_rate": 3.038681475732017e-05, + "loss": 0.8285, + "step": 14059 + }, + { + "epoch": 2.24, + "learning_rate": 3.038429543332511e-05, + "loss": 0.675, + "step": 14060 + }, + { + "epoch": 2.24, + "learning_rate": 3.0381776051990563e-05, + "loss": 0.6514, + "step": 14061 + }, + { + "epoch": 2.24, + "learning_rate": 3.0379256613343376e-05, + "loss": 0.6427, + "step": 14062 + }, + { + "epoch": 2.24, + "learning_rate": 3.037673711741037e-05, + "loss": 0.7215, + "step": 14063 + }, + { + "epoch": 2.24, + "learning_rate": 3.0374217564218378e-05, + "loss": 0.75, + "step": 14064 + }, + { + "epoch": 2.24, + "learning_rate": 3.037169795379423e-05, + "loss": 0.7545, + "step": 14065 + }, + { + "epoch": 2.24, + "learning_rate": 3.0369178286164762e-05, + "loss": 0.7833, + "step": 14066 + }, + { + "epoch": 2.24, + "learning_rate": 3.03666585613568e-05, + "loss": 0.7843, + "step": 14067 + }, + { + "epoch": 2.24, + "learning_rate": 3.036413877939719e-05, + "loss": 0.711, + "step": 14068 + }, + { + "epoch": 2.24, + "learning_rate": 3.036161894031276e-05, + "loss": 0.7717, + "step": 14069 + }, + { + "epoch": 2.24, + "learning_rate": 3.035909904413034e-05, + "loss": 0.6366, + "step": 14070 + }, + { + "epoch": 2.24, + "learning_rate": 3.0356579090876768e-05, + "loss": 0.7371, + "step": 14071 + }, + { + "epoch": 2.24, + "learning_rate": 3.0354059080578884e-05, + "loss": 0.7163, + "step": 14072 + }, + { + "epoch": 2.24, + "learning_rate": 3.0351539013263515e-05, + "loss": 0.8018, + "step": 14073 + }, + { + "epoch": 2.24, + "learning_rate": 3.034901888895751e-05, + "loss": 0.7794, + "step": 14074 + }, + { + "epoch": 2.24, + "learning_rate": 3.03464987076877e-05, + "loss": 0.8809, + "step": 14075 + }, + { + "epoch": 2.24, + "learning_rate": 3.0343978469480916e-05, + "loss": 0.7675, + "step": 14076 + }, + { + "epoch": 2.24, + "learning_rate": 3.034145817436402e-05, + "loss": 0.7301, + "step": 14077 + }, + { + "epoch": 2.24, + "learning_rate": 3.0338937822363827e-05, + "loss": 0.7246, + "step": 14078 + }, + { + "epoch": 2.24, + "learning_rate": 3.033641741350719e-05, + "loss": 0.6973, + "step": 14079 + }, + { + "epoch": 2.24, + "learning_rate": 3.0333896947820954e-05, + "loss": 0.7302, + "step": 14080 + }, + { + "epoch": 2.24, + "learning_rate": 3.0331376425331946e-05, + "loss": 0.7771, + "step": 14081 + }, + { + "epoch": 2.24, + "learning_rate": 3.032885584606702e-05, + "loss": 0.7546, + "step": 14082 + }, + { + "epoch": 2.24, + "learning_rate": 3.0326335210053014e-05, + "loss": 0.7616, + "step": 14083 + }, + { + "epoch": 2.24, + "learning_rate": 3.032381451731676e-05, + "loss": 0.7485, + "step": 14084 + }, + { + "epoch": 2.24, + "learning_rate": 3.032129376788513e-05, + "loss": 0.678, + "step": 14085 + }, + { + "epoch": 2.24, + "learning_rate": 3.0318772961784948e-05, + "loss": 0.7036, + "step": 14086 + }, + { + "epoch": 2.24, + "learning_rate": 3.0316252099043056e-05, + "loss": 0.6772, + "step": 14087 + }, + { + "epoch": 2.24, + "learning_rate": 3.0313731179686312e-05, + "loss": 0.7376, + "step": 14088 + }, + { + "epoch": 2.24, + "learning_rate": 3.0311210203741557e-05, + "loss": 0.713, + "step": 14089 + }, + { + "epoch": 2.24, + "learning_rate": 3.0308689171235645e-05, + "loss": 0.7444, + "step": 14090 + }, + { + "epoch": 2.24, + "learning_rate": 3.0306168082195407e-05, + "loss": 0.7724, + "step": 14091 + }, + { + "epoch": 2.25, + "learning_rate": 3.0303646936647706e-05, + "loss": 0.7654, + "step": 14092 + }, + { + "epoch": 2.25, + "learning_rate": 3.0301125734619375e-05, + "loss": 0.7686, + "step": 14093 + }, + { + "epoch": 2.25, + "learning_rate": 3.0298604476137283e-05, + "loss": 0.7706, + "step": 14094 + }, + { + "epoch": 2.25, + "learning_rate": 3.0296083161228266e-05, + "loss": 0.7126, + "step": 14095 + }, + { + "epoch": 2.25, + "learning_rate": 3.0293561789919177e-05, + "loss": 0.7237, + "step": 14096 + }, + { + "epoch": 2.25, + "learning_rate": 3.0291040362236873e-05, + "loss": 0.7818, + "step": 14097 + }, + { + "epoch": 2.25, + "learning_rate": 3.02885188782082e-05, + "loss": 0.7295, + "step": 14098 + }, + { + "epoch": 2.25, + "learning_rate": 3.0285997337860005e-05, + "loss": 0.7205, + "step": 14099 + }, + { + "epoch": 2.25, + "learning_rate": 3.0283475741219163e-05, + "loss": 0.7631, + "step": 14100 + }, + { + "epoch": 2.25, + "learning_rate": 3.0280954088312496e-05, + "loss": 0.7904, + "step": 14101 + }, + { + "epoch": 2.25, + "learning_rate": 3.0278432379166888e-05, + "loss": 0.6959, + "step": 14102 + }, + { + "epoch": 2.25, + "learning_rate": 3.027591061380917e-05, + "loss": 0.7407, + "step": 14103 + }, + { + "epoch": 2.25, + "learning_rate": 3.027338879226621e-05, + "loss": 0.785, + "step": 14104 + }, + { + "epoch": 2.25, + "learning_rate": 3.027086691456486e-05, + "loss": 0.7734, + "step": 14105 + }, + { + "epoch": 2.25, + "learning_rate": 3.026834498073198e-05, + "loss": 0.8566, + "step": 14106 + }, + { + "epoch": 2.25, + "learning_rate": 3.0265822990794423e-05, + "loss": 0.7307, + "step": 14107 + }, + { + "epoch": 2.25, + "learning_rate": 3.0263300944779044e-05, + "loss": 0.7323, + "step": 14108 + }, + { + "epoch": 2.25, + "learning_rate": 3.0260778842712713e-05, + "loss": 0.7399, + "step": 14109 + }, + { + "epoch": 2.25, + "learning_rate": 3.0258256684622278e-05, + "loss": 0.7406, + "step": 14110 + }, + { + "epoch": 2.25, + "learning_rate": 3.0255734470534604e-05, + "loss": 0.8604, + "step": 14111 + }, + { + "epoch": 2.25, + "learning_rate": 3.0253212200476544e-05, + "loss": 0.8764, + "step": 14112 + }, + { + "epoch": 2.25, + "learning_rate": 3.0250689874474956e-05, + "loss": 0.7255, + "step": 14113 + }, + { + "epoch": 2.25, + "learning_rate": 3.0248167492556722e-05, + "loss": 0.807, + "step": 14114 + }, + { + "epoch": 2.25, + "learning_rate": 3.024564505474869e-05, + "loss": 0.773, + "step": 14115 + }, + { + "epoch": 2.25, + "learning_rate": 3.0243122561077713e-05, + "loss": 0.7637, + "step": 14116 + }, + { + "epoch": 2.25, + "learning_rate": 3.0240600011570673e-05, + "loss": 0.7889, + "step": 14117 + }, + { + "epoch": 2.25, + "learning_rate": 3.023807740625442e-05, + "loss": 0.7302, + "step": 14118 + }, + { + "epoch": 2.25, + "learning_rate": 3.0235554745155826e-05, + "loss": 0.6795, + "step": 14119 + }, + { + "epoch": 2.25, + "learning_rate": 3.0233032028301757e-05, + "loss": 0.7539, + "step": 14120 + }, + { + "epoch": 2.25, + "learning_rate": 3.0230509255719063e-05, + "loss": 0.7249, + "step": 14121 + }, + { + "epoch": 2.25, + "learning_rate": 3.0227986427434622e-05, + "loss": 0.7826, + "step": 14122 + }, + { + "epoch": 2.25, + "learning_rate": 3.022546354347531e-05, + "loss": 0.6866, + "step": 14123 + }, + { + "epoch": 2.25, + "learning_rate": 3.0222940603867977e-05, + "loss": 0.7485, + "step": 14124 + }, + { + "epoch": 2.25, + "learning_rate": 3.022041760863949e-05, + "loss": 0.8034, + "step": 14125 + }, + { + "epoch": 2.25, + "learning_rate": 3.0217894557816733e-05, + "loss": 0.6906, + "step": 14126 + }, + { + "epoch": 2.25, + "learning_rate": 3.0215371451426568e-05, + "loss": 0.7031, + "step": 14127 + }, + { + "epoch": 2.25, + "learning_rate": 3.021284828949586e-05, + "loss": 0.7456, + "step": 14128 + }, + { + "epoch": 2.25, + "learning_rate": 3.021032507205148e-05, + "loss": 0.7229, + "step": 14129 + }, + { + "epoch": 2.25, + "learning_rate": 3.020780179912031e-05, + "loss": 0.717, + "step": 14130 + }, + { + "epoch": 2.25, + "learning_rate": 3.020527847072921e-05, + "loss": 0.7685, + "step": 14131 + }, + { + "epoch": 2.25, + "learning_rate": 3.0202755086905054e-05, + "loss": 0.7729, + "step": 14132 + }, + { + "epoch": 2.25, + "learning_rate": 3.0200231647674708e-05, + "loss": 0.7059, + "step": 14133 + }, + { + "epoch": 2.25, + "learning_rate": 3.0197708153065056e-05, + "loss": 0.7243, + "step": 14134 + }, + { + "epoch": 2.25, + "learning_rate": 3.019518460310297e-05, + "loss": 0.7064, + "step": 14135 + }, + { + "epoch": 2.25, + "learning_rate": 3.019266099781532e-05, + "loss": 0.7858, + "step": 14136 + }, + { + "epoch": 2.25, + "learning_rate": 3.0190137337228985e-05, + "loss": 0.8164, + "step": 14137 + }, + { + "epoch": 2.25, + "learning_rate": 3.0187613621370842e-05, + "loss": 0.776, + "step": 14138 + }, + { + "epoch": 2.25, + "learning_rate": 3.018508985026775e-05, + "loss": 0.7466, + "step": 14139 + }, + { + "epoch": 2.25, + "learning_rate": 3.018256602394662e-05, + "loss": 0.7895, + "step": 14140 + }, + { + "epoch": 2.25, + "learning_rate": 3.0180042142434296e-05, + "loss": 0.8026, + "step": 14141 + }, + { + "epoch": 2.25, + "learning_rate": 3.017751820575766e-05, + "loss": 0.6975, + "step": 14142 + }, + { + "epoch": 2.25, + "learning_rate": 3.0174994213943607e-05, + "loss": 0.7609, + "step": 14143 + }, + { + "epoch": 2.25, + "learning_rate": 3.017247016701901e-05, + "loss": 0.8464, + "step": 14144 + }, + { + "epoch": 2.25, + "learning_rate": 3.016994606501074e-05, + "loss": 0.7477, + "step": 14145 + }, + { + "epoch": 2.25, + "learning_rate": 3.016742190794569e-05, + "loss": 0.7128, + "step": 14146 + }, + { + "epoch": 2.25, + "learning_rate": 3.016489769585073e-05, + "loss": 0.6761, + "step": 14147 + }, + { + "epoch": 2.25, + "learning_rate": 3.0162373428752744e-05, + "loss": 0.744, + "step": 14148 + }, + { + "epoch": 2.25, + "learning_rate": 3.015984910667862e-05, + "loss": 0.7722, + "step": 14149 + }, + { + "epoch": 2.25, + "learning_rate": 3.015732472965523e-05, + "loss": 0.741, + "step": 14150 + }, + { + "epoch": 2.25, + "learning_rate": 3.0154800297709467e-05, + "loss": 0.748, + "step": 14151 + }, + { + "epoch": 2.25, + "learning_rate": 3.015227581086821e-05, + "loss": 0.6903, + "step": 14152 + }, + { + "epoch": 2.25, + "learning_rate": 3.014975126915834e-05, + "loss": 0.6697, + "step": 14153 + }, + { + "epoch": 2.25, + "learning_rate": 3.0147226672606742e-05, + "loss": 0.7205, + "step": 14154 + }, + { + "epoch": 2.26, + "learning_rate": 3.0144702021240312e-05, + "loss": 0.7097, + "step": 14155 + }, + { + "epoch": 2.26, + "learning_rate": 3.0142177315085927e-05, + "loss": 0.7503, + "step": 14156 + }, + { + "epoch": 2.26, + "learning_rate": 3.0139652554170476e-05, + "loss": 0.8, + "step": 14157 + }, + { + "epoch": 2.26, + "learning_rate": 3.0137127738520848e-05, + "loss": 0.6947, + "step": 14158 + }, + { + "epoch": 2.26, + "learning_rate": 3.0134602868163926e-05, + "loss": 0.7854, + "step": 14159 + }, + { + "epoch": 2.26, + "learning_rate": 3.0132077943126602e-05, + "loss": 0.8206, + "step": 14160 + }, + { + "epoch": 2.26, + "learning_rate": 3.0129552963435765e-05, + "loss": 0.7187, + "step": 14161 + }, + { + "epoch": 2.26, + "learning_rate": 3.0127027929118295e-05, + "loss": 0.7826, + "step": 14162 + }, + { + "epoch": 2.26, + "learning_rate": 3.0124502840201096e-05, + "loss": 0.7013, + "step": 14163 + }, + { + "epoch": 2.26, + "learning_rate": 3.012197769671105e-05, + "loss": 0.7787, + "step": 14164 + }, + { + "epoch": 2.26, + "learning_rate": 3.0119452498675056e-05, + "loss": 0.8508, + "step": 14165 + }, + { + "epoch": 2.26, + "learning_rate": 3.0116927246119995e-05, + "loss": 0.7095, + "step": 14166 + }, + { + "epoch": 2.26, + "learning_rate": 3.0114401939072774e-05, + "loss": 0.8468, + "step": 14167 + }, + { + "epoch": 2.26, + "learning_rate": 3.0111876577560267e-05, + "loss": 0.819, + "step": 14168 + }, + { + "epoch": 2.26, + "learning_rate": 3.0109351161609383e-05, + "loss": 0.7871, + "step": 14169 + }, + { + "epoch": 2.26, + "learning_rate": 3.010682569124702e-05, + "loss": 0.7301, + "step": 14170 + }, + { + "epoch": 2.26, + "learning_rate": 3.010430016650005e-05, + "loss": 0.7486, + "step": 14171 + }, + { + "epoch": 2.26, + "learning_rate": 3.0101774587395386e-05, + "loss": 0.7834, + "step": 14172 + }, + { + "epoch": 2.26, + "learning_rate": 3.0099248953959923e-05, + "loss": 0.6897, + "step": 14173 + }, + { + "epoch": 2.26, + "learning_rate": 3.009672326622055e-05, + "loss": 0.7828, + "step": 14174 + }, + { + "epoch": 2.26, + "learning_rate": 3.0094197524204177e-05, + "loss": 0.7896, + "step": 14175 + }, + { + "epoch": 2.26, + "learning_rate": 3.0091671727937688e-05, + "loss": 0.8296, + "step": 14176 + }, + { + "epoch": 2.26, + "learning_rate": 3.008914587744798e-05, + "loss": 0.7507, + "step": 14177 + }, + { + "epoch": 2.26, + "learning_rate": 3.0086619972761964e-05, + "loss": 0.7597, + "step": 14178 + }, + { + "epoch": 2.26, + "learning_rate": 3.008409401390654e-05, + "loss": 0.7692, + "step": 14179 + }, + { + "epoch": 2.26, + "learning_rate": 3.0081568000908588e-05, + "loss": 0.6997, + "step": 14180 + }, + { + "epoch": 2.26, + "learning_rate": 3.0079041933795033e-05, + "loss": 0.6834, + "step": 14181 + }, + { + "epoch": 2.26, + "learning_rate": 3.007651581259276e-05, + "loss": 0.7176, + "step": 14182 + }, + { + "epoch": 2.26, + "learning_rate": 3.0073989637328674e-05, + "loss": 0.8015, + "step": 14183 + }, + { + "epoch": 2.26, + "learning_rate": 3.0071463408029683e-05, + "loss": 0.7417, + "step": 14184 + }, + { + "epoch": 2.26, + "learning_rate": 3.0068937124722678e-05, + "loss": 0.7141, + "step": 14185 + }, + { + "epoch": 2.26, + "learning_rate": 3.006641078743458e-05, + "loss": 0.7262, + "step": 14186 + }, + { + "epoch": 2.26, + "learning_rate": 3.0063884396192276e-05, + "loss": 0.7306, + "step": 14187 + }, + { + "epoch": 2.26, + "learning_rate": 3.006135795102268e-05, + "loss": 0.7286, + "step": 14188 + }, + { + "epoch": 2.26, + "learning_rate": 3.0058831451952697e-05, + "loss": 0.7253, + "step": 14189 + }, + { + "epoch": 2.26, + "learning_rate": 3.005630489900923e-05, + "loss": 0.6999, + "step": 14190 + }, + { + "epoch": 2.26, + "learning_rate": 3.005377829221918e-05, + "loss": 0.6659, + "step": 14191 + }, + { + "epoch": 2.26, + "learning_rate": 3.0051251631609467e-05, + "loss": 0.7882, + "step": 14192 + }, + { + "epoch": 2.26, + "learning_rate": 3.004872491720699e-05, + "loss": 0.7242, + "step": 14193 + }, + { + "epoch": 2.26, + "learning_rate": 3.004619814903865e-05, + "loss": 0.6724, + "step": 14194 + }, + { + "epoch": 2.26, + "learning_rate": 3.0043671327131374e-05, + "loss": 0.662, + "step": 14195 + }, + { + "epoch": 2.26, + "learning_rate": 3.0041144451512054e-05, + "loss": 0.7896, + "step": 14196 + }, + { + "epoch": 2.26, + "learning_rate": 3.0038617522207603e-05, + "loss": 0.7593, + "step": 14197 + }, + { + "epoch": 2.26, + "learning_rate": 3.0036090539244936e-05, + "loss": 0.7756, + "step": 14198 + }, + { + "epoch": 2.26, + "learning_rate": 3.003356350265097e-05, + "loss": 0.7925, + "step": 14199 + }, + { + "epoch": 2.26, + "learning_rate": 3.0031036412452596e-05, + "loss": 0.8084, + "step": 14200 + }, + { + "epoch": 2.26, + "learning_rate": 3.0028509268676754e-05, + "loss": 0.6836, + "step": 14201 + }, + { + "epoch": 2.26, + "learning_rate": 3.002598207135033e-05, + "loss": 0.7276, + "step": 14202 + }, + { + "epoch": 2.26, + "learning_rate": 3.0023454820500252e-05, + "loss": 0.7507, + "step": 14203 + }, + { + "epoch": 2.26, + "learning_rate": 3.002092751615343e-05, + "loss": 0.7506, + "step": 14204 + }, + { + "epoch": 2.26, + "learning_rate": 3.0018400158336774e-05, + "loss": 0.7003, + "step": 14205 + }, + { + "epoch": 2.26, + "learning_rate": 3.0015872747077207e-05, + "loss": 0.7668, + "step": 14206 + }, + { + "epoch": 2.26, + "learning_rate": 3.0013345282401638e-05, + "loss": 0.7476, + "step": 14207 + }, + { + "epoch": 2.26, + "learning_rate": 3.001081776433699e-05, + "loss": 0.7276, + "step": 14208 + }, + { + "epoch": 2.26, + "learning_rate": 3.0008290192910173e-05, + "loss": 0.6789, + "step": 14209 + }, + { + "epoch": 2.26, + "learning_rate": 3.0005762568148104e-05, + "loss": 0.7507, + "step": 14210 + }, + { + "epoch": 2.26, + "learning_rate": 3.0003234890077702e-05, + "loss": 0.7906, + "step": 14211 + }, + { + "epoch": 2.26, + "learning_rate": 3.0000707158725895e-05, + "loss": 0.7614, + "step": 14212 + }, + { + "epoch": 2.26, + "learning_rate": 2.9998179374119585e-05, + "loss": 0.7381, + "step": 14213 + }, + { + "epoch": 2.26, + "learning_rate": 2.9995651536285698e-05, + "loss": 0.7851, + "step": 14214 + }, + { + "epoch": 2.26, + "learning_rate": 2.9993123645251155e-05, + "loss": 0.773, + "step": 14215 + }, + { + "epoch": 2.26, + "learning_rate": 2.999059570104288e-05, + "loss": 0.7361, + "step": 14216 + }, + { + "epoch": 2.26, + "learning_rate": 2.9988067703687784e-05, + "loss": 0.7818, + "step": 14217 + }, + { + "epoch": 2.27, + "learning_rate": 2.99855396532128e-05, + "loss": 0.6952, + "step": 14218 + }, + { + "epoch": 2.27, + "learning_rate": 2.998301154964485e-05, + "loss": 0.7244, + "step": 14219 + }, + { + "epoch": 2.27, + "learning_rate": 2.9980483393010845e-05, + "loss": 0.7218, + "step": 14220 + }, + { + "epoch": 2.27, + "learning_rate": 2.9977955183337724e-05, + "loss": 0.6935, + "step": 14221 + }, + { + "epoch": 2.27, + "learning_rate": 2.99754269206524e-05, + "loss": 0.7677, + "step": 14222 + }, + { + "epoch": 2.27, + "learning_rate": 2.997289860498179e-05, + "loss": 0.6782, + "step": 14223 + }, + { + "epoch": 2.27, + "learning_rate": 2.997037023635284e-05, + "loss": 0.6795, + "step": 14224 + }, + { + "epoch": 2.27, + "learning_rate": 2.9967841814792464e-05, + "loss": 0.706, + "step": 14225 + }, + { + "epoch": 2.27, + "learning_rate": 2.9965313340327584e-05, + "loss": 0.731, + "step": 14226 + }, + { + "epoch": 2.27, + "learning_rate": 2.996278481298513e-05, + "loss": 0.7416, + "step": 14227 + }, + { + "epoch": 2.27, + "learning_rate": 2.9960256232792035e-05, + "loss": 0.6847, + "step": 14228 + }, + { + "epoch": 2.27, + "learning_rate": 2.995772759977522e-05, + "loss": 0.761, + "step": 14229 + }, + { + "epoch": 2.27, + "learning_rate": 2.995519891396162e-05, + "loss": 0.817, + "step": 14230 + }, + { + "epoch": 2.27, + "learning_rate": 2.9952670175378155e-05, + "loss": 0.7244, + "step": 14231 + }, + { + "epoch": 2.27, + "learning_rate": 2.995014138405176e-05, + "loss": 0.7156, + "step": 14232 + }, + { + "epoch": 2.27, + "learning_rate": 2.9947612540009373e-05, + "loss": 0.7266, + "step": 14233 + }, + { + "epoch": 2.27, + "learning_rate": 2.9945083643277904e-05, + "loss": 0.775, + "step": 14234 + }, + { + "epoch": 2.27, + "learning_rate": 2.99425546938843e-05, + "loss": 0.8422, + "step": 14235 + }, + { + "epoch": 2.27, + "learning_rate": 2.9940025691855495e-05, + "loss": 0.7379, + "step": 14236 + }, + { + "epoch": 2.27, + "learning_rate": 2.993749663721841e-05, + "loss": 0.8245, + "step": 14237 + }, + { + "epoch": 2.27, + "learning_rate": 2.9934967529999984e-05, + "loss": 0.7389, + "step": 14238 + }, + { + "epoch": 2.27, + "learning_rate": 2.993243837022715e-05, + "loss": 0.7104, + "step": 14239 + }, + { + "epoch": 2.27, + "learning_rate": 2.9929909157926837e-05, + "loss": 0.7241, + "step": 14240 + }, + { + "epoch": 2.27, + "learning_rate": 2.9927379893126e-05, + "loss": 0.7875, + "step": 14241 + }, + { + "epoch": 2.27, + "learning_rate": 2.992485057585155e-05, + "loss": 0.7772, + "step": 14242 + }, + { + "epoch": 2.27, + "learning_rate": 2.992232120613043e-05, + "loss": 0.7786, + "step": 14243 + }, + { + "epoch": 2.27, + "learning_rate": 2.9919791783989576e-05, + "loss": 0.6718, + "step": 14244 + }, + { + "epoch": 2.27, + "learning_rate": 2.991726230945593e-05, + "loss": 0.756, + "step": 14245 + }, + { + "epoch": 2.27, + "learning_rate": 2.991473278255642e-05, + "loss": 0.6952, + "step": 14246 + }, + { + "epoch": 2.27, + "learning_rate": 2.9912203203317997e-05, + "loss": 0.7675, + "step": 14247 + }, + { + "epoch": 2.27, + "learning_rate": 2.9909673571767587e-05, + "loss": 0.7222, + "step": 14248 + }, + { + "epoch": 2.27, + "learning_rate": 2.990714388793213e-05, + "loss": 0.7485, + "step": 14249 + }, + { + "epoch": 2.27, + "learning_rate": 2.9904614151838577e-05, + "loss": 0.7266, + "step": 14250 + }, + { + "epoch": 2.27, + "learning_rate": 2.9902084363513865e-05, + "loss": 0.7791, + "step": 14251 + }, + { + "epoch": 2.27, + "learning_rate": 2.9899554522984923e-05, + "loss": 0.6964, + "step": 14252 + }, + { + "epoch": 2.27, + "learning_rate": 2.9897024630278708e-05, + "loss": 0.7369, + "step": 14253 + }, + { + "epoch": 2.27, + "learning_rate": 2.989449468542214e-05, + "loss": 0.7336, + "step": 14254 + }, + { + "epoch": 2.27, + "learning_rate": 2.9891964688442185e-05, + "loss": 0.707, + "step": 14255 + }, + { + "epoch": 2.27, + "learning_rate": 2.9889434639365776e-05, + "loss": 0.7354, + "step": 14256 + }, + { + "epoch": 2.27, + "learning_rate": 2.988690453821985e-05, + "loss": 0.808, + "step": 14257 + }, + { + "epoch": 2.27, + "learning_rate": 2.9884374385031366e-05, + "loss": 0.7402, + "step": 14258 + }, + { + "epoch": 2.27, + "learning_rate": 2.9881844179827256e-05, + "loss": 0.7466, + "step": 14259 + }, + { + "epoch": 2.27, + "learning_rate": 2.9879313922634466e-05, + "loss": 0.7522, + "step": 14260 + }, + { + "epoch": 2.27, + "learning_rate": 2.9876783613479958e-05, + "loss": 0.7413, + "step": 14261 + }, + { + "epoch": 2.27, + "learning_rate": 2.9874253252390656e-05, + "loss": 0.7233, + "step": 14262 + }, + { + "epoch": 2.27, + "learning_rate": 2.9871722839393514e-05, + "loss": 0.7288, + "step": 14263 + }, + { + "epoch": 2.27, + "learning_rate": 2.9869192374515486e-05, + "loss": 0.7084, + "step": 14264 + }, + { + "epoch": 2.27, + "learning_rate": 2.9866661857783513e-05, + "loss": 0.8098, + "step": 14265 + }, + { + "epoch": 2.27, + "learning_rate": 2.986413128922454e-05, + "loss": 0.7416, + "step": 14266 + }, + { + "epoch": 2.27, + "learning_rate": 2.986160066886553e-05, + "loss": 0.7319, + "step": 14267 + }, + { + "epoch": 2.27, + "learning_rate": 2.9859069996733426e-05, + "loss": 0.789, + "step": 14268 + }, + { + "epoch": 2.27, + "learning_rate": 2.9856539272855165e-05, + "loss": 0.7632, + "step": 14269 + }, + { + "epoch": 2.27, + "learning_rate": 2.985400849725772e-05, + "loss": 0.707, + "step": 14270 + }, + { + "epoch": 2.27, + "learning_rate": 2.9851477669968032e-05, + "loss": 0.6836, + "step": 14271 + }, + { + "epoch": 2.27, + "learning_rate": 2.984894679101305e-05, + "loss": 0.7641, + "step": 14272 + }, + { + "epoch": 2.27, + "learning_rate": 2.9846415860419724e-05, + "loss": 0.6908, + "step": 14273 + }, + { + "epoch": 2.27, + "learning_rate": 2.984388487821502e-05, + "loss": 0.7188, + "step": 14274 + }, + { + "epoch": 2.27, + "learning_rate": 2.9841353844425875e-05, + "loss": 0.6847, + "step": 14275 + }, + { + "epoch": 2.27, + "learning_rate": 2.9838822759079248e-05, + "loss": 0.7271, + "step": 14276 + }, + { + "epoch": 2.27, + "learning_rate": 2.9836291622202105e-05, + "loss": 0.7115, + "step": 14277 + }, + { + "epoch": 2.27, + "learning_rate": 2.9833760433821384e-05, + "loss": 0.7368, + "step": 14278 + }, + { + "epoch": 2.27, + "learning_rate": 2.9831229193964056e-05, + "loss": 0.7976, + "step": 14279 + }, + { + "epoch": 2.27, + "learning_rate": 2.9828697902657072e-05, + "loss": 0.7464, + "step": 14280 + }, + { + "epoch": 2.28, + "learning_rate": 2.9826166559927375e-05, + "loss": 0.7317, + "step": 14281 + }, + { + "epoch": 2.28, + "learning_rate": 2.9823635165801955e-05, + "loss": 0.6953, + "step": 14282 + }, + { + "epoch": 2.28, + "learning_rate": 2.982110372030773e-05, + "loss": 0.7368, + "step": 14283 + }, + { + "epoch": 2.28, + "learning_rate": 2.9818572223471686e-05, + "loss": 0.6966, + "step": 14284 + }, + { + "epoch": 2.28, + "learning_rate": 2.981604067532077e-05, + "loss": 0.7257, + "step": 14285 + }, + { + "epoch": 2.28, + "learning_rate": 2.981350907588194e-05, + "loss": 0.8423, + "step": 14286 + }, + { + "epoch": 2.28, + "learning_rate": 2.9810977425182167e-05, + "loss": 0.7279, + "step": 14287 + }, + { + "epoch": 2.28, + "learning_rate": 2.9808445723248407e-05, + "loss": 0.7521, + "step": 14288 + }, + { + "epoch": 2.28, + "learning_rate": 2.980591397010761e-05, + "loss": 0.7598, + "step": 14289 + }, + { + "epoch": 2.28, + "learning_rate": 2.9803382165786758e-05, + "loss": 0.8556, + "step": 14290 + }, + { + "epoch": 2.28, + "learning_rate": 2.98008503103128e-05, + "loss": 0.6979, + "step": 14291 + }, + { + "epoch": 2.28, + "learning_rate": 2.97983184037127e-05, + "loss": 0.829, + "step": 14292 + }, + { + "epoch": 2.28, + "learning_rate": 2.9795786446013423e-05, + "loss": 0.7288, + "step": 14293 + }, + { + "epoch": 2.28, + "learning_rate": 2.979325443724193e-05, + "loss": 0.7293, + "step": 14294 + }, + { + "epoch": 2.28, + "learning_rate": 2.9790722377425185e-05, + "loss": 0.7512, + "step": 14295 + }, + { + "epoch": 2.28, + "learning_rate": 2.978819026659016e-05, + "loss": 0.6945, + "step": 14296 + }, + { + "epoch": 2.28, + "learning_rate": 2.9785658104763813e-05, + "loss": 0.8035, + "step": 14297 + }, + { + "epoch": 2.28, + "learning_rate": 2.9783125891973108e-05, + "loss": 0.6627, + "step": 14298 + }, + { + "epoch": 2.28, + "learning_rate": 2.9780593628245023e-05, + "loss": 0.7536, + "step": 14299 + }, + { + "epoch": 2.28, + "learning_rate": 2.9778061313606514e-05, + "loss": 0.7666, + "step": 14300 + }, + { + "epoch": 2.28, + "learning_rate": 2.9775528948084554e-05, + "loss": 0.7856, + "step": 14301 + }, + { + "epoch": 2.28, + "learning_rate": 2.9772996531706115e-05, + "loss": 0.7451, + "step": 14302 + }, + { + "epoch": 2.28, + "learning_rate": 2.9770464064498154e-05, + "loss": 0.6836, + "step": 14303 + }, + { + "epoch": 2.28, + "learning_rate": 2.9767931546487648e-05, + "loss": 0.7227, + "step": 14304 + }, + { + "epoch": 2.28, + "learning_rate": 2.9765398977701568e-05, + "loss": 0.7176, + "step": 14305 + }, + { + "epoch": 2.28, + "learning_rate": 2.976286635816688e-05, + "loss": 0.7378, + "step": 14306 + }, + { + "epoch": 2.28, + "learning_rate": 2.9760333687910558e-05, + "loss": 0.7399, + "step": 14307 + }, + { + "epoch": 2.28, + "learning_rate": 2.9757800966959577e-05, + "loss": 0.6897, + "step": 14308 + }, + { + "epoch": 2.28, + "learning_rate": 2.9755268195340897e-05, + "loss": 0.7571, + "step": 14309 + }, + { + "epoch": 2.28, + "learning_rate": 2.9752735373081496e-05, + "loss": 0.8161, + "step": 14310 + }, + { + "epoch": 2.28, + "learning_rate": 2.975020250020836e-05, + "loss": 0.7447, + "step": 14311 + }, + { + "epoch": 2.28, + "learning_rate": 2.9747669576748442e-05, + "loss": 0.8038, + "step": 14312 + }, + { + "epoch": 2.28, + "learning_rate": 2.974513660272873e-05, + "loss": 0.7716, + "step": 14313 + }, + { + "epoch": 2.28, + "learning_rate": 2.9742603578176193e-05, + "loss": 0.7288, + "step": 14314 + }, + { + "epoch": 2.28, + "learning_rate": 2.9740070503117802e-05, + "loss": 0.7141, + "step": 14315 + }, + { + "epoch": 2.28, + "learning_rate": 2.9737537377580544e-05, + "loss": 0.7904, + "step": 14316 + }, + { + "epoch": 2.28, + "learning_rate": 2.973500420159139e-05, + "loss": 0.778, + "step": 14317 + }, + { + "epoch": 2.28, + "learning_rate": 2.9732470975177307e-05, + "loss": 0.7858, + "step": 14318 + }, + { + "epoch": 2.28, + "learning_rate": 2.972993769836529e-05, + "loss": 0.6997, + "step": 14319 + }, + { + "epoch": 2.28, + "learning_rate": 2.9727404371182306e-05, + "loss": 0.7422, + "step": 14320 + }, + { + "epoch": 2.28, + "learning_rate": 2.9724870993655328e-05, + "loss": 0.7469, + "step": 14321 + }, + { + "epoch": 2.28, + "learning_rate": 2.9722337565811357e-05, + "loss": 0.7369, + "step": 14322 + }, + { + "epoch": 2.28, + "learning_rate": 2.971980408767735e-05, + "loss": 0.8313, + "step": 14323 + }, + { + "epoch": 2.28, + "learning_rate": 2.9717270559280296e-05, + "loss": 0.6471, + "step": 14324 + }, + { + "epoch": 2.28, + "learning_rate": 2.9714736980647174e-05, + "loss": 0.7311, + "step": 14325 + }, + { + "epoch": 2.28, + "learning_rate": 2.971220335180496e-05, + "loss": 0.694, + "step": 14326 + }, + { + "epoch": 2.28, + "learning_rate": 2.9709669672780648e-05, + "loss": 0.802, + "step": 14327 + }, + { + "epoch": 2.28, + "learning_rate": 2.9707135943601217e-05, + "loss": 0.7059, + "step": 14328 + }, + { + "epoch": 2.28, + "learning_rate": 2.9704602164293637e-05, + "loss": 0.7456, + "step": 14329 + }, + { + "epoch": 2.28, + "learning_rate": 2.97020683348849e-05, + "loss": 0.767, + "step": 14330 + }, + { + "epoch": 2.28, + "learning_rate": 2.9699534455401994e-05, + "loss": 0.7441, + "step": 14331 + }, + { + "epoch": 2.28, + "learning_rate": 2.9697000525871905e-05, + "loss": 0.7621, + "step": 14332 + }, + { + "epoch": 2.28, + "learning_rate": 2.9694466546321603e-05, + "loss": 0.7691, + "step": 14333 + }, + { + "epoch": 2.28, + "learning_rate": 2.9691932516778088e-05, + "loss": 0.7377, + "step": 14334 + }, + { + "epoch": 2.28, + "learning_rate": 2.9689398437268333e-05, + "loss": 0.7912, + "step": 14335 + }, + { + "epoch": 2.28, + "learning_rate": 2.968686430781934e-05, + "loss": 0.6973, + "step": 14336 + }, + { + "epoch": 2.28, + "learning_rate": 2.968433012845808e-05, + "loss": 0.7026, + "step": 14337 + }, + { + "epoch": 2.28, + "learning_rate": 2.968179589921155e-05, + "loss": 0.6635, + "step": 14338 + }, + { + "epoch": 2.28, + "learning_rate": 2.967926162010674e-05, + "loss": 0.6841, + "step": 14339 + }, + { + "epoch": 2.28, + "learning_rate": 2.9676727291170636e-05, + "loss": 0.8001, + "step": 14340 + }, + { + "epoch": 2.28, + "learning_rate": 2.9674192912430216e-05, + "loss": 0.7289, + "step": 14341 + }, + { + "epoch": 2.28, + "learning_rate": 2.9671658483912495e-05, + "loss": 0.7358, + "step": 14342 + }, + { + "epoch": 2.29, + "learning_rate": 2.9669124005644437e-05, + "loss": 0.7355, + "step": 14343 + }, + { + "epoch": 2.29, + "learning_rate": 2.966658947765304e-05, + "loss": 0.7463, + "step": 14344 + }, + { + "epoch": 2.29, + "learning_rate": 2.9664054899965304e-05, + "loss": 0.7338, + "step": 14345 + }, + { + "epoch": 2.29, + "learning_rate": 2.966152027260821e-05, + "loss": 0.7446, + "step": 14346 + }, + { + "epoch": 2.29, + "learning_rate": 2.9658985595608758e-05, + "loss": 0.7404, + "step": 14347 + }, + { + "epoch": 2.29, + "learning_rate": 2.965645086899394e-05, + "loss": 0.7615, + "step": 14348 + }, + { + "epoch": 2.29, + "learning_rate": 2.9653916092790746e-05, + "loss": 0.8036, + "step": 14349 + }, + { + "epoch": 2.29, + "learning_rate": 2.9651381267026168e-05, + "loss": 0.6993, + "step": 14350 + }, + { + "epoch": 2.29, + "learning_rate": 2.9648846391727204e-05, + "loss": 0.7255, + "step": 14351 + }, + { + "epoch": 2.29, + "learning_rate": 2.9646311466920852e-05, + "loss": 0.758, + "step": 14352 + }, + { + "epoch": 2.29, + "learning_rate": 2.9643776492634108e-05, + "loss": 0.7435, + "step": 14353 + }, + { + "epoch": 2.29, + "learning_rate": 2.9641241468893955e-05, + "loss": 0.6841, + "step": 14354 + }, + { + "epoch": 2.29, + "learning_rate": 2.9638706395727394e-05, + "loss": 0.7299, + "step": 14355 + }, + { + "epoch": 2.29, + "learning_rate": 2.963617127316144e-05, + "loss": 0.7164, + "step": 14356 + }, + { + "epoch": 2.29, + "learning_rate": 2.9633636101223065e-05, + "loss": 0.7863, + "step": 14357 + }, + { + "epoch": 2.29, + "learning_rate": 2.9631100879939284e-05, + "loss": 0.7427, + "step": 14358 + }, + { + "epoch": 2.29, + "learning_rate": 2.9628565609337088e-05, + "loss": 0.7358, + "step": 14359 + }, + { + "epoch": 2.29, + "learning_rate": 2.962603028944348e-05, + "loss": 0.7865, + "step": 14360 + }, + { + "epoch": 2.29, + "learning_rate": 2.9623494920285456e-05, + "loss": 0.7182, + "step": 14361 + }, + { + "epoch": 2.29, + "learning_rate": 2.9620959501890028e-05, + "loss": 0.6488, + "step": 14362 + }, + { + "epoch": 2.29, + "learning_rate": 2.9618424034284177e-05, + "loss": 0.741, + "step": 14363 + }, + { + "epoch": 2.29, + "learning_rate": 2.961588851749491e-05, + "loss": 0.7507, + "step": 14364 + }, + { + "epoch": 2.29, + "learning_rate": 2.961335295154924e-05, + "loss": 0.7415, + "step": 14365 + }, + { + "epoch": 2.29, + "learning_rate": 2.9610817336474163e-05, + "loss": 0.7556, + "step": 14366 + }, + { + "epoch": 2.29, + "learning_rate": 2.960828167229667e-05, + "loss": 0.696, + "step": 14367 + }, + { + "epoch": 2.29, + "learning_rate": 2.9605745959043785e-05, + "loss": 0.6851, + "step": 14368 + }, + { + "epoch": 2.29, + "learning_rate": 2.9603210196742503e-05, + "loss": 0.7268, + "step": 14369 + }, + { + "epoch": 2.29, + "learning_rate": 2.960067438541982e-05, + "loss": 0.8201, + "step": 14370 + }, + { + "epoch": 2.29, + "learning_rate": 2.9598138525102752e-05, + "loss": 0.7328, + "step": 14371 + }, + { + "epoch": 2.29, + "learning_rate": 2.9595602615818303e-05, + "loss": 0.7084, + "step": 14372 + }, + { + "epoch": 2.29, + "learning_rate": 2.959306665759347e-05, + "loss": 0.7381, + "step": 14373 + }, + { + "epoch": 2.29, + "learning_rate": 2.959053065045527e-05, + "loss": 0.7843, + "step": 14374 + }, + { + "epoch": 2.29, + "learning_rate": 2.9587994594430703e-05, + "loss": 0.6605, + "step": 14375 + }, + { + "epoch": 2.29, + "learning_rate": 2.958545848954678e-05, + "loss": 0.814, + "step": 14376 + }, + { + "epoch": 2.29, + "learning_rate": 2.958292233583051e-05, + "loss": 0.7977, + "step": 14377 + }, + { + "epoch": 2.29, + "learning_rate": 2.9580386133308892e-05, + "loss": 0.7785, + "step": 14378 + }, + { + "epoch": 2.29, + "learning_rate": 2.957784988200895e-05, + "loss": 0.762, + "step": 14379 + }, + { + "epoch": 2.29, + "learning_rate": 2.9575313581957686e-05, + "loss": 0.6719, + "step": 14380 + }, + { + "epoch": 2.29, + "learning_rate": 2.957277723318211e-05, + "loss": 0.7803, + "step": 14381 + }, + { + "epoch": 2.29, + "learning_rate": 2.9570240835709228e-05, + "loss": 0.7314, + "step": 14382 + }, + { + "epoch": 2.29, + "learning_rate": 2.9567704389566064e-05, + "loss": 0.7581, + "step": 14383 + }, + { + "epoch": 2.29, + "learning_rate": 2.956516789477961e-05, + "loss": 0.7741, + "step": 14384 + }, + { + "epoch": 2.29, + "learning_rate": 2.9562631351376895e-05, + "loss": 0.699, + "step": 14385 + }, + { + "epoch": 2.29, + "learning_rate": 2.956009475938493e-05, + "loss": 0.8123, + "step": 14386 + }, + { + "epoch": 2.29, + "learning_rate": 2.9557558118830718e-05, + "loss": 0.7192, + "step": 14387 + }, + { + "epoch": 2.29, + "learning_rate": 2.955502142974128e-05, + "loss": 0.7416, + "step": 14388 + }, + { + "epoch": 2.29, + "learning_rate": 2.9552484692143633e-05, + "loss": 0.7106, + "step": 14389 + }, + { + "epoch": 2.29, + "learning_rate": 2.9549947906064786e-05, + "loss": 0.7773, + "step": 14390 + }, + { + "epoch": 2.29, + "learning_rate": 2.9547411071531754e-05, + "loss": 0.777, + "step": 14391 + }, + { + "epoch": 2.29, + "learning_rate": 2.9544874188571558e-05, + "loss": 0.8451, + "step": 14392 + }, + { + "epoch": 2.29, + "learning_rate": 2.9542337257211215e-05, + "loss": 0.7284, + "step": 14393 + }, + { + "epoch": 2.29, + "learning_rate": 2.9539800277477736e-05, + "loss": 0.7022, + "step": 14394 + }, + { + "epoch": 2.29, + "learning_rate": 2.9537263249398133e-05, + "loss": 0.7189, + "step": 14395 + }, + { + "epoch": 2.29, + "learning_rate": 2.9534726172999434e-05, + "loss": 0.6654, + "step": 14396 + }, + { + "epoch": 2.29, + "learning_rate": 2.9532189048308655e-05, + "loss": 0.7884, + "step": 14397 + }, + { + "epoch": 2.29, + "learning_rate": 2.9529651875352822e-05, + "loss": 0.7946, + "step": 14398 + }, + { + "epoch": 2.29, + "learning_rate": 2.9527114654158933e-05, + "loss": 0.7785, + "step": 14399 + }, + { + "epoch": 2.29, + "learning_rate": 2.9524577384754026e-05, + "loss": 0.6865, + "step": 14400 + }, + { + "epoch": 2.29, + "learning_rate": 2.9522040067165124e-05, + "loss": 0.6818, + "step": 14401 + }, + { + "epoch": 2.29, + "learning_rate": 2.951950270141923e-05, + "loss": 0.7754, + "step": 14402 + }, + { + "epoch": 2.29, + "learning_rate": 2.9516965287543386e-05, + "loss": 0.7624, + "step": 14403 + }, + { + "epoch": 2.29, + "learning_rate": 2.9514427825564596e-05, + "loss": 0.7144, + "step": 14404 + }, + { + "epoch": 2.29, + "learning_rate": 2.9511890315509894e-05, + "loss": 0.7978, + "step": 14405 + }, + { + "epoch": 2.3, + "learning_rate": 2.9509352757406305e-05, + "loss": 0.6702, + "step": 14406 + }, + { + "epoch": 2.3, + "learning_rate": 2.9506815151280838e-05, + "loss": 0.7504, + "step": 14407 + }, + { + "epoch": 2.3, + "learning_rate": 2.9504277497160525e-05, + "loss": 0.686, + "step": 14408 + }, + { + "epoch": 2.3, + "learning_rate": 2.9501739795072398e-05, + "loss": 0.7341, + "step": 14409 + }, + { + "epoch": 2.3, + "learning_rate": 2.9499202045043466e-05, + "loss": 0.7447, + "step": 14410 + }, + { + "epoch": 2.3, + "learning_rate": 2.9496664247100774e-05, + "loss": 0.7773, + "step": 14411 + }, + { + "epoch": 2.3, + "learning_rate": 2.949412640127134e-05, + "loss": 0.6383, + "step": 14412 + }, + { + "epoch": 2.3, + "learning_rate": 2.949158850758218e-05, + "loss": 0.6964, + "step": 14413 + }, + { + "epoch": 2.3, + "learning_rate": 2.9489050566060333e-05, + "loss": 0.6724, + "step": 14414 + }, + { + "epoch": 2.3, + "learning_rate": 2.9486512576732824e-05, + "loss": 0.7808, + "step": 14415 + }, + { + "epoch": 2.3, + "learning_rate": 2.948397453962667e-05, + "loss": 0.832, + "step": 14416 + }, + { + "epoch": 2.3, + "learning_rate": 2.9481436454768917e-05, + "loss": 0.7419, + "step": 14417 + }, + { + "epoch": 2.3, + "learning_rate": 2.9478898322186587e-05, + "loss": 0.6921, + "step": 14418 + }, + { + "epoch": 2.3, + "learning_rate": 2.9476360141906704e-05, + "loss": 0.7092, + "step": 14419 + }, + { + "epoch": 2.3, + "learning_rate": 2.9473821913956312e-05, + "loss": 0.8038, + "step": 14420 + }, + { + "epoch": 2.3, + "learning_rate": 2.947128363836243e-05, + "loss": 0.7987, + "step": 14421 + }, + { + "epoch": 2.3, + "learning_rate": 2.9468745315152084e-05, + "loss": 0.8054, + "step": 14422 + }, + { + "epoch": 2.3, + "learning_rate": 2.9466206944352326e-05, + "loss": 0.7625, + "step": 14423 + }, + { + "epoch": 2.3, + "learning_rate": 2.9463668525990163e-05, + "loss": 0.7563, + "step": 14424 + }, + { + "epoch": 2.3, + "learning_rate": 2.946113006009265e-05, + "loss": 0.7663, + "step": 14425 + }, + { + "epoch": 2.3, + "learning_rate": 2.945859154668681e-05, + "loss": 0.7232, + "step": 14426 + }, + { + "epoch": 2.3, + "learning_rate": 2.9456052985799664e-05, + "loss": 0.6955, + "step": 14427 + }, + { + "epoch": 2.3, + "learning_rate": 2.9453514377458268e-05, + "loss": 0.7084, + "step": 14428 + }, + { + "epoch": 2.3, + "learning_rate": 2.9450975721689657e-05, + "loss": 0.7315, + "step": 14429 + }, + { + "epoch": 2.3, + "learning_rate": 2.944843701852084e-05, + "loss": 0.7143, + "step": 14430 + }, + { + "epoch": 2.3, + "learning_rate": 2.9445898267978876e-05, + "loss": 0.7646, + "step": 14431 + }, + { + "epoch": 2.3, + "learning_rate": 2.9443359470090798e-05, + "loss": 0.7559, + "step": 14432 + }, + { + "epoch": 2.3, + "learning_rate": 2.944082062488364e-05, + "loss": 0.7002, + "step": 14433 + }, + { + "epoch": 2.3, + "learning_rate": 2.9438281732384436e-05, + "loss": 0.6938, + "step": 14434 + }, + { + "epoch": 2.3, + "learning_rate": 2.943574279262023e-05, + "loss": 0.6984, + "step": 14435 + }, + { + "epoch": 2.3, + "learning_rate": 2.9433203805618047e-05, + "loss": 0.6674, + "step": 14436 + }, + { + "epoch": 2.3, + "learning_rate": 2.943066477140494e-05, + "loss": 0.6613, + "step": 14437 + }, + { + "epoch": 2.3, + "learning_rate": 2.9428125690007945e-05, + "loss": 0.7029, + "step": 14438 + }, + { + "epoch": 2.3, + "learning_rate": 2.9425586561454095e-05, + "loss": 0.6941, + "step": 14439 + }, + { + "epoch": 2.3, + "learning_rate": 2.9423047385770437e-05, + "loss": 0.759, + "step": 14440 + }, + { + "epoch": 2.3, + "learning_rate": 2.9420508162984012e-05, + "loss": 0.7148, + "step": 14441 + }, + { + "epoch": 2.3, + "learning_rate": 2.9417968893121854e-05, + "loss": 0.7275, + "step": 14442 + }, + { + "epoch": 2.3, + "learning_rate": 2.9415429576211017e-05, + "loss": 0.6829, + "step": 14443 + }, + { + "epoch": 2.3, + "learning_rate": 2.9412890212278525e-05, + "loss": 0.7488, + "step": 14444 + }, + { + "epoch": 2.3, + "learning_rate": 2.9410350801351443e-05, + "loss": 0.7022, + "step": 14445 + }, + { + "epoch": 2.3, + "learning_rate": 2.9407811343456796e-05, + "loss": 0.7686, + "step": 14446 + }, + { + "epoch": 2.3, + "learning_rate": 2.940527183862164e-05, + "loss": 0.706, + "step": 14447 + }, + { + "epoch": 2.3, + "learning_rate": 2.9402732286873003e-05, + "loss": 0.7775, + "step": 14448 + }, + { + "epoch": 2.3, + "learning_rate": 2.9400192688237948e-05, + "loss": 0.7979, + "step": 14449 + }, + { + "epoch": 2.3, + "learning_rate": 2.939765304274351e-05, + "loss": 0.7646, + "step": 14450 + }, + { + "epoch": 2.3, + "learning_rate": 2.9395113350416735e-05, + "loss": 0.715, + "step": 14451 + }, + { + "epoch": 2.3, + "learning_rate": 2.9392573611284674e-05, + "loss": 0.7509, + "step": 14452 + }, + { + "epoch": 2.3, + "learning_rate": 2.9390033825374374e-05, + "loss": 0.7064, + "step": 14453 + }, + { + "epoch": 2.3, + "learning_rate": 2.9387493992712877e-05, + "loss": 0.7666, + "step": 14454 + }, + { + "epoch": 2.3, + "learning_rate": 2.9384954113327233e-05, + "loss": 0.7664, + "step": 14455 + }, + { + "epoch": 2.3, + "learning_rate": 2.9382414187244483e-05, + "loss": 0.7813, + "step": 14456 + }, + { + "epoch": 2.3, + "learning_rate": 2.937987421449169e-05, + "loss": 0.713, + "step": 14457 + }, + { + "epoch": 2.3, + "learning_rate": 2.937733419509589e-05, + "loss": 0.7884, + "step": 14458 + }, + { + "epoch": 2.3, + "learning_rate": 2.9374794129084137e-05, + "loss": 0.7152, + "step": 14459 + }, + { + "epoch": 2.3, + "learning_rate": 2.9372254016483492e-05, + "loss": 0.8454, + "step": 14460 + }, + { + "epoch": 2.3, + "learning_rate": 2.936971385732099e-05, + "loss": 0.7241, + "step": 14461 + }, + { + "epoch": 2.3, + "learning_rate": 2.9367173651623685e-05, + "loss": 0.6914, + "step": 14462 + }, + { + "epoch": 2.3, + "learning_rate": 2.9364633399418644e-05, + "loss": 0.6675, + "step": 14463 + }, + { + "epoch": 2.3, + "learning_rate": 2.93620931007329e-05, + "loss": 0.7461, + "step": 14464 + }, + { + "epoch": 2.3, + "learning_rate": 2.9359552755593513e-05, + "loss": 0.7743, + "step": 14465 + }, + { + "epoch": 2.3, + "learning_rate": 2.9357012364027537e-05, + "loss": 0.6889, + "step": 14466 + }, + { + "epoch": 2.3, + "learning_rate": 2.935447192606202e-05, + "loss": 0.7076, + "step": 14467 + }, + { + "epoch": 2.3, + "learning_rate": 2.9351931441724024e-05, + "loss": 0.7114, + "step": 14468 + }, + { + "epoch": 2.31, + "learning_rate": 2.9349390911040598e-05, + "loss": 0.7537, + "step": 14469 + }, + { + "epoch": 2.31, + "learning_rate": 2.93468503340388e-05, + "loss": 0.7445, + "step": 14470 + }, + { + "epoch": 2.31, + "learning_rate": 2.934430971074569e-05, + "loss": 0.7893, + "step": 14471 + }, + { + "epoch": 2.31, + "learning_rate": 2.934176904118831e-05, + "loss": 0.6805, + "step": 14472 + }, + { + "epoch": 2.31, + "learning_rate": 2.933922832539373e-05, + "loss": 0.7098, + "step": 14473 + }, + { + "epoch": 2.31, + "learning_rate": 2.933668756338901e-05, + "loss": 0.7517, + "step": 14474 + }, + { + "epoch": 2.31, + "learning_rate": 2.9334146755201193e-05, + "loss": 0.7147, + "step": 14475 + }, + { + "epoch": 2.31, + "learning_rate": 2.933160590085734e-05, + "loss": 0.7292, + "step": 14476 + }, + { + "epoch": 2.31, + "learning_rate": 2.932906500038452e-05, + "loss": 0.7707, + "step": 14477 + }, + { + "epoch": 2.31, + "learning_rate": 2.932652405380979e-05, + "loss": 0.7058, + "step": 14478 + }, + { + "epoch": 2.31, + "learning_rate": 2.93239830611602e-05, + "loss": 0.6974, + "step": 14479 + }, + { + "epoch": 2.31, + "learning_rate": 2.9321442022462814e-05, + "loss": 0.7207, + "step": 14480 + }, + { + "epoch": 2.31, + "learning_rate": 2.93189009377447e-05, + "loss": 0.7238, + "step": 14481 + }, + { + "epoch": 2.31, + "learning_rate": 2.93163598070329e-05, + "loss": 0.7825, + "step": 14482 + }, + { + "epoch": 2.31, + "learning_rate": 2.9313818630354506e-05, + "loss": 0.729, + "step": 14483 + }, + { + "epoch": 2.31, + "learning_rate": 2.931127740773656e-05, + "loss": 0.7693, + "step": 14484 + }, + { + "epoch": 2.31, + "learning_rate": 2.9308736139206115e-05, + "loss": 0.707, + "step": 14485 + }, + { + "epoch": 2.31, + "learning_rate": 2.9306194824790256e-05, + "loss": 0.8001, + "step": 14486 + }, + { + "epoch": 2.31, + "learning_rate": 2.9303653464516033e-05, + "loss": 0.6969, + "step": 14487 + }, + { + "epoch": 2.31, + "learning_rate": 2.9301112058410507e-05, + "loss": 0.7803, + "step": 14488 + }, + { + "epoch": 2.31, + "learning_rate": 2.929857060650076e-05, + "loss": 0.7633, + "step": 14489 + }, + { + "epoch": 2.31, + "learning_rate": 2.9296029108813838e-05, + "loss": 0.7422, + "step": 14490 + }, + { + "epoch": 2.31, + "learning_rate": 2.9293487565376814e-05, + "loss": 0.7284, + "step": 14491 + }, + { + "epoch": 2.31, + "learning_rate": 2.9290945976216762e-05, + "loss": 0.7297, + "step": 14492 + }, + { + "epoch": 2.31, + "learning_rate": 2.9288404341360735e-05, + "loss": 0.7129, + "step": 14493 + }, + { + "epoch": 2.31, + "learning_rate": 2.928586266083581e-05, + "loss": 0.661, + "step": 14494 + }, + { + "epoch": 2.31, + "learning_rate": 2.9283320934669045e-05, + "loss": 0.8009, + "step": 14495 + }, + { + "epoch": 2.31, + "learning_rate": 2.928077916288751e-05, + "loss": 0.7132, + "step": 14496 + }, + { + "epoch": 2.31, + "learning_rate": 2.9278237345518276e-05, + "loss": 0.8, + "step": 14497 + }, + { + "epoch": 2.31, + "learning_rate": 2.9275695482588424e-05, + "loss": 0.6576, + "step": 14498 + }, + { + "epoch": 2.31, + "learning_rate": 2.9273153574125e-05, + "loss": 0.8156, + "step": 14499 + }, + { + "epoch": 2.31, + "learning_rate": 2.927061162015508e-05, + "loss": 0.8096, + "step": 14500 + }, + { + "epoch": 2.31, + "learning_rate": 2.926806962070575e-05, + "loss": 0.8691, + "step": 14501 + }, + { + "epoch": 2.31, + "learning_rate": 2.9265527575804063e-05, + "loss": 0.8046, + "step": 14502 + }, + { + "epoch": 2.31, + "learning_rate": 2.9262985485477103e-05, + "loss": 0.6898, + "step": 14503 + }, + { + "epoch": 2.31, + "learning_rate": 2.926044334975193e-05, + "loss": 0.7613, + "step": 14504 + }, + { + "epoch": 2.31, + "learning_rate": 2.9257901168655623e-05, + "loss": 0.8139, + "step": 14505 + }, + { + "epoch": 2.31, + "learning_rate": 2.9255358942215255e-05, + "loss": 0.718, + "step": 14506 + }, + { + "epoch": 2.31, + "learning_rate": 2.92528166704579e-05, + "loss": 0.715, + "step": 14507 + }, + { + "epoch": 2.31, + "learning_rate": 2.9250274353410622e-05, + "loss": 0.7207, + "step": 14508 + }, + { + "epoch": 2.31, + "learning_rate": 2.9247731991100508e-05, + "loss": 0.7115, + "step": 14509 + }, + { + "epoch": 2.31, + "learning_rate": 2.9245189583554627e-05, + "loss": 0.7212, + "step": 14510 + }, + { + "epoch": 2.31, + "learning_rate": 2.924264713080005e-05, + "loss": 0.7473, + "step": 14511 + }, + { + "epoch": 2.31, + "learning_rate": 2.9240104632863864e-05, + "loss": 0.6716, + "step": 14512 + }, + { + "epoch": 2.31, + "learning_rate": 2.9237562089773135e-05, + "loss": 0.8657, + "step": 14513 + }, + { + "epoch": 2.31, + "learning_rate": 2.9235019501554943e-05, + "loss": 0.7065, + "step": 14514 + }, + { + "epoch": 2.31, + "learning_rate": 2.9232476868236363e-05, + "loss": 0.8783, + "step": 14515 + }, + { + "epoch": 2.31, + "learning_rate": 2.9229934189844477e-05, + "loss": 0.6411, + "step": 14516 + }, + { + "epoch": 2.31, + "learning_rate": 2.9227391466406352e-05, + "loss": 0.7334, + "step": 14517 + }, + { + "epoch": 2.31, + "learning_rate": 2.922484869794908e-05, + "loss": 0.745, + "step": 14518 + }, + { + "epoch": 2.31, + "learning_rate": 2.9222305884499735e-05, + "loss": 0.7476, + "step": 14519 + }, + { + "epoch": 2.31, + "learning_rate": 2.921976302608539e-05, + "loss": 0.7289, + "step": 14520 + }, + { + "epoch": 2.31, + "learning_rate": 2.921722012273314e-05, + "loss": 0.7121, + "step": 14521 + }, + { + "epoch": 2.31, + "learning_rate": 2.921467717447005e-05, + "loss": 0.7685, + "step": 14522 + }, + { + "epoch": 2.31, + "learning_rate": 2.9212134181323202e-05, + "loss": 0.75, + "step": 14523 + }, + { + "epoch": 2.31, + "learning_rate": 2.9209591143319697e-05, + "loss": 0.672, + "step": 14524 + }, + { + "epoch": 2.31, + "learning_rate": 2.920704806048659e-05, + "loss": 0.7152, + "step": 14525 + }, + { + "epoch": 2.31, + "learning_rate": 2.920450493285098e-05, + "loss": 0.7512, + "step": 14526 + }, + { + "epoch": 2.31, + "learning_rate": 2.9201961760439945e-05, + "loss": 0.7983, + "step": 14527 + }, + { + "epoch": 2.31, + "learning_rate": 2.9199418543280566e-05, + "loss": 0.7279, + "step": 14528 + }, + { + "epoch": 2.31, + "learning_rate": 2.9196875281399927e-05, + "loss": 0.7674, + "step": 14529 + }, + { + "epoch": 2.31, + "learning_rate": 2.9194331974825124e-05, + "loss": 0.8076, + "step": 14530 + }, + { + "epoch": 2.31, + "learning_rate": 2.9191788623583217e-05, + "loss": 0.7113, + "step": 14531 + }, + { + "epoch": 2.32, + "learning_rate": 2.9189245227701316e-05, + "loss": 0.8051, + "step": 14532 + }, + { + "epoch": 2.32, + "learning_rate": 2.9186701787206494e-05, + "loss": 0.7395, + "step": 14533 + }, + { + "epoch": 2.32, + "learning_rate": 2.9184158302125847e-05, + "loss": 0.755, + "step": 14534 + }, + { + "epoch": 2.32, + "learning_rate": 2.918161477248645e-05, + "loss": 0.6839, + "step": 14535 + }, + { + "epoch": 2.32, + "learning_rate": 2.9179071198315387e-05, + "loss": 0.6576, + "step": 14536 + }, + { + "epoch": 2.32, + "learning_rate": 2.9176527579639755e-05, + "loss": 0.7374, + "step": 14537 + }, + { + "epoch": 2.32, + "learning_rate": 2.9173983916486646e-05, + "loss": 0.7566, + "step": 14538 + }, + { + "epoch": 2.32, + "learning_rate": 2.9171440208883145e-05, + "loss": 0.7312, + "step": 14539 + }, + { + "epoch": 2.32, + "learning_rate": 2.9168896456856328e-05, + "loss": 0.7744, + "step": 14540 + }, + { + "epoch": 2.32, + "learning_rate": 2.9166352660433298e-05, + "loss": 0.7482, + "step": 14541 + }, + { + "epoch": 2.32, + "learning_rate": 2.9163808819641147e-05, + "loss": 0.7363, + "step": 14542 + }, + { + "epoch": 2.32, + "learning_rate": 2.9161264934506954e-05, + "loss": 0.7748, + "step": 14543 + }, + { + "epoch": 2.32, + "learning_rate": 2.915872100505782e-05, + "loss": 0.778, + "step": 14544 + }, + { + "epoch": 2.32, + "learning_rate": 2.915617703132083e-05, + "loss": 0.7407, + "step": 14545 + }, + { + "epoch": 2.32, + "learning_rate": 2.9153633013323078e-05, + "loss": 0.7365, + "step": 14546 + }, + { + "epoch": 2.32, + "learning_rate": 2.915108895109166e-05, + "loss": 0.7168, + "step": 14547 + }, + { + "epoch": 2.32, + "learning_rate": 2.9148544844653656e-05, + "loss": 0.7395, + "step": 14548 + }, + { + "epoch": 2.32, + "learning_rate": 2.9146000694036173e-05, + "loss": 0.7143, + "step": 14549 + }, + { + "epoch": 2.32, + "learning_rate": 2.9143456499266304e-05, + "loss": 0.7082, + "step": 14550 + }, + { + "epoch": 2.32, + "learning_rate": 2.9140912260371128e-05, + "loss": 0.7454, + "step": 14551 + }, + { + "epoch": 2.32, + "learning_rate": 2.913836797737776e-05, + "loss": 0.6753, + "step": 14552 + }, + { + "epoch": 2.32, + "learning_rate": 2.9135823650313282e-05, + "loss": 0.7878, + "step": 14553 + }, + { + "epoch": 2.32, + "learning_rate": 2.9133279279204794e-05, + "loss": 0.7255, + "step": 14554 + }, + { + "epoch": 2.32, + "learning_rate": 2.9130734864079396e-05, + "loss": 0.7417, + "step": 14555 + }, + { + "epoch": 2.32, + "learning_rate": 2.9128190404964175e-05, + "loss": 0.7738, + "step": 14556 + }, + { + "epoch": 2.32, + "learning_rate": 2.912564590188623e-05, + "loss": 0.7543, + "step": 14557 + }, + { + "epoch": 2.32, + "learning_rate": 2.912310135487266e-05, + "loss": 0.7647, + "step": 14558 + }, + { + "epoch": 2.32, + "learning_rate": 2.912055676395057e-05, + "loss": 0.6521, + "step": 14559 + }, + { + "epoch": 2.32, + "learning_rate": 2.9118012129147043e-05, + "loss": 0.7577, + "step": 14560 + }, + { + "epoch": 2.32, + "learning_rate": 2.9115467450489197e-05, + "loss": 0.7223, + "step": 14561 + }, + { + "epoch": 2.32, + "learning_rate": 2.9112922728004117e-05, + "loss": 0.7201, + "step": 14562 + }, + { + "epoch": 2.32, + "learning_rate": 2.9110377961718903e-05, + "loss": 0.8598, + "step": 14563 + }, + { + "epoch": 2.32, + "learning_rate": 2.910783315166067e-05, + "loss": 0.7822, + "step": 14564 + }, + { + "epoch": 2.32, + "learning_rate": 2.9105288297856505e-05, + "loss": 0.7196, + "step": 14565 + }, + { + "epoch": 2.32, + "learning_rate": 2.91027434003335e-05, + "loss": 0.725, + "step": 14566 + }, + { + "epoch": 2.32, + "learning_rate": 2.9100198459118777e-05, + "loss": 0.7329, + "step": 14567 + }, + { + "epoch": 2.32, + "learning_rate": 2.9097653474239428e-05, + "loss": 0.7105, + "step": 14568 + }, + { + "epoch": 2.32, + "learning_rate": 2.9095108445722558e-05, + "loss": 0.769, + "step": 14569 + }, + { + "epoch": 2.32, + "learning_rate": 2.909256337359527e-05, + "loss": 0.7419, + "step": 14570 + }, + { + "epoch": 2.32, + "learning_rate": 2.9090018257884666e-05, + "loss": 0.7869, + "step": 14571 + }, + { + "epoch": 2.32, + "learning_rate": 2.9087473098617846e-05, + "loss": 0.7178, + "step": 14572 + }, + { + "epoch": 2.32, + "learning_rate": 2.9084927895821927e-05, + "loss": 0.7728, + "step": 14573 + }, + { + "epoch": 2.32, + "learning_rate": 2.9082382649524002e-05, + "loss": 0.7678, + "step": 14574 + }, + { + "epoch": 2.32, + "learning_rate": 2.9079837359751183e-05, + "loss": 0.7205, + "step": 14575 + }, + { + "epoch": 2.32, + "learning_rate": 2.907729202653057e-05, + "loss": 0.7079, + "step": 14576 + }, + { + "epoch": 2.32, + "learning_rate": 2.9074746649889272e-05, + "loss": 0.7137, + "step": 14577 + }, + { + "epoch": 2.32, + "learning_rate": 2.90722012298544e-05, + "loss": 0.7981, + "step": 14578 + }, + { + "epoch": 2.32, + "learning_rate": 2.9069655766453057e-05, + "loss": 0.7119, + "step": 14579 + }, + { + "epoch": 2.32, + "learning_rate": 2.9067110259712342e-05, + "loss": 0.7061, + "step": 14580 + }, + { + "epoch": 2.32, + "learning_rate": 2.906456470965938e-05, + "loss": 0.6865, + "step": 14581 + }, + { + "epoch": 2.32, + "learning_rate": 2.9062019116321275e-05, + "loss": 0.7441, + "step": 14582 + }, + { + "epoch": 2.32, + "learning_rate": 2.9059473479725124e-05, + "loss": 0.7274, + "step": 14583 + }, + { + "epoch": 2.32, + "learning_rate": 2.9056927799898054e-05, + "loss": 0.7498, + "step": 14584 + }, + { + "epoch": 2.32, + "learning_rate": 2.905438207686716e-05, + "loss": 0.7751, + "step": 14585 + }, + { + "epoch": 2.32, + "learning_rate": 2.905183631065956e-05, + "loss": 0.7832, + "step": 14586 + }, + { + "epoch": 2.32, + "learning_rate": 2.904929050130236e-05, + "loss": 0.7247, + "step": 14587 + }, + { + "epoch": 2.32, + "learning_rate": 2.904674464882268e-05, + "loss": 0.7148, + "step": 14588 + }, + { + "epoch": 2.32, + "learning_rate": 2.9044198753247613e-05, + "loss": 0.8018, + "step": 14589 + }, + { + "epoch": 2.32, + "learning_rate": 2.9041652814604296e-05, + "loss": 0.7241, + "step": 14590 + }, + { + "epoch": 2.32, + "learning_rate": 2.903910683291983e-05, + "loss": 0.7635, + "step": 14591 + }, + { + "epoch": 2.32, + "learning_rate": 2.9036560808221325e-05, + "loss": 0.75, + "step": 14592 + }, + { + "epoch": 2.32, + "learning_rate": 2.9034014740535903e-05, + "loss": 0.7349, + "step": 14593 + }, + { + "epoch": 2.32, + "learning_rate": 2.9031468629890675e-05, + "loss": 0.74, + "step": 14594 + }, + { + "epoch": 2.33, + "learning_rate": 2.9028922476312748e-05, + "loss": 0.7497, + "step": 14595 + }, + { + "epoch": 2.33, + "learning_rate": 2.902637627982924e-05, + "loss": 0.7391, + "step": 14596 + }, + { + "epoch": 2.33, + "learning_rate": 2.902383004046727e-05, + "loss": 0.6939, + "step": 14597 + }, + { + "epoch": 2.33, + "learning_rate": 2.902128375825396e-05, + "loss": 0.744, + "step": 14598 + }, + { + "epoch": 2.33, + "learning_rate": 2.9018737433216415e-05, + "loss": 0.7361, + "step": 14599 + }, + { + "epoch": 2.33, + "learning_rate": 2.9016191065381752e-05, + "loss": 0.7174, + "step": 14600 + }, + { + "epoch": 2.33, + "learning_rate": 2.9013644654777102e-05, + "loss": 0.7721, + "step": 14601 + }, + { + "epoch": 2.33, + "learning_rate": 2.9011098201429566e-05, + "loss": 0.7185, + "step": 14602 + }, + { + "epoch": 2.33, + "learning_rate": 2.900855170536627e-05, + "loss": 0.6822, + "step": 14603 + }, + { + "epoch": 2.33, + "learning_rate": 2.9006005166614335e-05, + "loss": 0.7685, + "step": 14604 + }, + { + "epoch": 2.33, + "learning_rate": 2.9003458585200883e-05, + "loss": 0.7839, + "step": 14605 + }, + { + "epoch": 2.33, + "learning_rate": 2.9000911961153014e-05, + "loss": 0.6988, + "step": 14606 + }, + { + "epoch": 2.33, + "learning_rate": 2.899836529449787e-05, + "loss": 0.6654, + "step": 14607 + }, + { + "epoch": 2.33, + "learning_rate": 2.899581858526256e-05, + "loss": 0.7937, + "step": 14608 + }, + { + "epoch": 2.33, + "learning_rate": 2.8993271833474206e-05, + "loss": 0.7196, + "step": 14609 + }, + { + "epoch": 2.33, + "learning_rate": 2.8990725039159934e-05, + "loss": 0.7081, + "step": 14610 + }, + { + "epoch": 2.33, + "learning_rate": 2.8988178202346865e-05, + "loss": 0.7923, + "step": 14611 + }, + { + "epoch": 2.33, + "learning_rate": 2.8985631323062114e-05, + "loss": 0.7032, + "step": 14612 + }, + { + "epoch": 2.33, + "learning_rate": 2.8983084401332816e-05, + "loss": 0.6721, + "step": 14613 + }, + { + "epoch": 2.33, + "learning_rate": 2.8980537437186084e-05, + "loss": 0.7529, + "step": 14614 + }, + { + "epoch": 2.33, + "learning_rate": 2.8977990430649042e-05, + "loss": 0.8516, + "step": 14615 + }, + { + "epoch": 2.33, + "learning_rate": 2.8975443381748823e-05, + "loss": 0.7221, + "step": 14616 + }, + { + "epoch": 2.33, + "learning_rate": 2.8972896290512545e-05, + "loss": 0.7051, + "step": 14617 + }, + { + "epoch": 2.33, + "learning_rate": 2.8970349156967325e-05, + "loss": 0.8525, + "step": 14618 + }, + { + "epoch": 2.33, + "learning_rate": 2.896780198114031e-05, + "loss": 0.7082, + "step": 14619 + }, + { + "epoch": 2.33, + "learning_rate": 2.8965254763058602e-05, + "loss": 0.7434, + "step": 14620 + }, + { + "epoch": 2.33, + "learning_rate": 2.8962707502749344e-05, + "loss": 0.7004, + "step": 14621 + }, + { + "epoch": 2.33, + "learning_rate": 2.896016020023965e-05, + "loss": 0.662, + "step": 14622 + }, + { + "epoch": 2.33, + "learning_rate": 2.8957612855556658e-05, + "loss": 0.7025, + "step": 14623 + }, + { + "epoch": 2.33, + "learning_rate": 2.8955065468727495e-05, + "loss": 0.7595, + "step": 14624 + }, + { + "epoch": 2.33, + "learning_rate": 2.8952518039779287e-05, + "loss": 0.7504, + "step": 14625 + }, + { + "epoch": 2.33, + "learning_rate": 2.8949970568739155e-05, + "loss": 0.6854, + "step": 14626 + }, + { + "epoch": 2.33, + "learning_rate": 2.8947423055634238e-05, + "loss": 0.7052, + "step": 14627 + }, + { + "epoch": 2.33, + "learning_rate": 2.8944875500491663e-05, + "loss": 0.679, + "step": 14628 + }, + { + "epoch": 2.33, + "learning_rate": 2.8942327903338557e-05, + "loss": 0.7191, + "step": 14629 + }, + { + "epoch": 2.33, + "learning_rate": 2.8939780264202054e-05, + "loss": 0.7771, + "step": 14630 + }, + { + "epoch": 2.33, + "learning_rate": 2.893723258310928e-05, + "loss": 0.7668, + "step": 14631 + }, + { + "epoch": 2.33, + "learning_rate": 2.893468486008737e-05, + "loss": 0.824, + "step": 14632 + }, + { + "epoch": 2.33, + "learning_rate": 2.893213709516346e-05, + "loss": 0.6973, + "step": 14633 + }, + { + "epoch": 2.33, + "learning_rate": 2.8929589288364674e-05, + "loss": 0.7088, + "step": 14634 + }, + { + "epoch": 2.33, + "learning_rate": 2.892704143971815e-05, + "loss": 0.7371, + "step": 14635 + }, + { + "epoch": 2.33, + "learning_rate": 2.8924493549251015e-05, + "loss": 0.7672, + "step": 14636 + }, + { + "epoch": 2.33, + "learning_rate": 2.8921945616990408e-05, + "loss": 0.8145, + "step": 14637 + }, + { + "epoch": 2.33, + "learning_rate": 2.8919397642963457e-05, + "loss": 0.6892, + "step": 14638 + }, + { + "epoch": 2.33, + "learning_rate": 2.8916849627197307e-05, + "loss": 0.841, + "step": 14639 + }, + { + "epoch": 2.33, + "learning_rate": 2.891430156971908e-05, + "loss": 0.6822, + "step": 14640 + }, + { + "epoch": 2.33, + "learning_rate": 2.8911753470555925e-05, + "loss": 0.7006, + "step": 14641 + }, + { + "epoch": 2.33, + "learning_rate": 2.890920532973497e-05, + "loss": 0.7834, + "step": 14642 + }, + { + "epoch": 2.33, + "learning_rate": 2.8906657147283344e-05, + "loss": 0.8134, + "step": 14643 + }, + { + "epoch": 2.33, + "learning_rate": 2.890410892322819e-05, + "loss": 0.7061, + "step": 14644 + }, + { + "epoch": 2.33, + "learning_rate": 2.890156065759666e-05, + "loss": 0.7122, + "step": 14645 + }, + { + "epoch": 2.33, + "learning_rate": 2.8899012350415865e-05, + "loss": 0.7117, + "step": 14646 + }, + { + "epoch": 2.33, + "learning_rate": 2.8896464001712963e-05, + "loss": 0.7519, + "step": 14647 + }, + { + "epoch": 2.33, + "learning_rate": 2.8893915611515086e-05, + "loss": 0.7713, + "step": 14648 + }, + { + "epoch": 2.33, + "learning_rate": 2.889136717984936e-05, + "loss": 0.7792, + "step": 14649 + }, + { + "epoch": 2.33, + "learning_rate": 2.888881870674295e-05, + "loss": 0.7752, + "step": 14650 + }, + { + "epoch": 2.33, + "learning_rate": 2.8886270192222977e-05, + "loss": 0.6812, + "step": 14651 + }, + { + "epoch": 2.33, + "learning_rate": 2.888372163631658e-05, + "loss": 0.717, + "step": 14652 + }, + { + "epoch": 2.33, + "learning_rate": 2.8881173039050912e-05, + "loss": 0.7455, + "step": 14653 + }, + { + "epoch": 2.33, + "learning_rate": 2.8878624400453102e-05, + "loss": 0.7518, + "step": 14654 + }, + { + "epoch": 2.33, + "learning_rate": 2.8876075720550306e-05, + "loss": 0.7506, + "step": 14655 + }, + { + "epoch": 2.33, + "learning_rate": 2.887352699936965e-05, + "loss": 0.7515, + "step": 14656 + }, + { + "epoch": 2.34, + "learning_rate": 2.887097823693829e-05, + "loss": 0.7243, + "step": 14657 + }, + { + "epoch": 2.34, + "learning_rate": 2.8868429433283352e-05, + "loss": 0.7428, + "step": 14658 + }, + { + "epoch": 2.34, + "learning_rate": 2.8865880588431992e-05, + "loss": 0.8096, + "step": 14659 + }, + { + "epoch": 2.34, + "learning_rate": 2.8863331702411355e-05, + "loss": 0.7728, + "step": 14660 + }, + { + "epoch": 2.34, + "learning_rate": 2.8860782775248573e-05, + "loss": 0.7046, + "step": 14661 + }, + { + "epoch": 2.34, + "learning_rate": 2.8858233806970804e-05, + "loss": 0.7225, + "step": 14662 + }, + { + "epoch": 2.34, + "learning_rate": 2.885568479760519e-05, + "loss": 0.741, + "step": 14663 + }, + { + "epoch": 2.34, + "learning_rate": 2.8853135747178868e-05, + "loss": 0.7666, + "step": 14664 + }, + { + "epoch": 2.34, + "learning_rate": 2.8850586655719e-05, + "loss": 0.7347, + "step": 14665 + }, + { + "epoch": 2.34, + "learning_rate": 2.884803752325271e-05, + "loss": 0.7532, + "step": 14666 + }, + { + "epoch": 2.34, + "learning_rate": 2.884548834980716e-05, + "loss": 0.8174, + "step": 14667 + }, + { + "epoch": 2.34, + "learning_rate": 2.8842939135409497e-05, + "loss": 0.6595, + "step": 14668 + }, + { + "epoch": 2.34, + "learning_rate": 2.8840389880086855e-05, + "loss": 0.9025, + "step": 14669 + }, + { + "epoch": 2.34, + "learning_rate": 2.8837840583866404e-05, + "loss": 0.845, + "step": 14670 + }, + { + "epoch": 2.34, + "learning_rate": 2.8835291246775277e-05, + "loss": 0.7439, + "step": 14671 + }, + { + "epoch": 2.34, + "learning_rate": 2.883274186884063e-05, + "loss": 0.7057, + "step": 14672 + }, + { + "epoch": 2.34, + "learning_rate": 2.88301924500896e-05, + "loss": 0.7, + "step": 14673 + }, + { + "epoch": 2.34, + "learning_rate": 2.8827642990549354e-05, + "loss": 0.7472, + "step": 14674 + }, + { + "epoch": 2.34, + "learning_rate": 2.882509349024703e-05, + "loss": 0.7756, + "step": 14675 + }, + { + "epoch": 2.34, + "learning_rate": 2.8822543949209784e-05, + "loss": 0.7229, + "step": 14676 + }, + { + "epoch": 2.34, + "learning_rate": 2.8819994367464765e-05, + "loss": 0.7622, + "step": 14677 + }, + { + "epoch": 2.34, + "learning_rate": 2.8817444745039125e-05, + "loss": 0.7827, + "step": 14678 + }, + { + "epoch": 2.34, + "learning_rate": 2.8814895081960013e-05, + "loss": 0.7371, + "step": 14679 + }, + { + "epoch": 2.34, + "learning_rate": 2.881234537825459e-05, + "loss": 0.736, + "step": 14680 + }, + { + "epoch": 2.34, + "learning_rate": 2.8809795633949998e-05, + "loss": 0.7284, + "step": 14681 + }, + { + "epoch": 2.34, + "learning_rate": 2.8807245849073406e-05, + "loss": 0.7325, + "step": 14682 + }, + { + "epoch": 2.34, + "learning_rate": 2.880469602365195e-05, + "loss": 0.7109, + "step": 14683 + }, + { + "epoch": 2.34, + "learning_rate": 2.8802146157712784e-05, + "loss": 0.7229, + "step": 14684 + }, + { + "epoch": 2.34, + "learning_rate": 2.8799596251283085e-05, + "loss": 0.6811, + "step": 14685 + }, + { + "epoch": 2.34, + "learning_rate": 2.8797046304389985e-05, + "loss": 0.6774, + "step": 14686 + }, + { + "epoch": 2.34, + "learning_rate": 2.8794496317060644e-05, + "loss": 0.7446, + "step": 14687 + }, + { + "epoch": 2.34, + "learning_rate": 2.8791946289322223e-05, + "loss": 0.7638, + "step": 14688 + }, + { + "epoch": 2.34, + "learning_rate": 2.878939622120188e-05, + "loss": 0.8106, + "step": 14689 + }, + { + "epoch": 2.34, + "learning_rate": 2.8786846112726763e-05, + "loss": 0.6911, + "step": 14690 + }, + { + "epoch": 2.34, + "learning_rate": 2.8784295963924034e-05, + "loss": 0.7632, + "step": 14691 + }, + { + "epoch": 2.34, + "learning_rate": 2.8781745774820857e-05, + "loss": 0.8043, + "step": 14692 + }, + { + "epoch": 2.34, + "learning_rate": 2.877919554544437e-05, + "loss": 0.7277, + "step": 14693 + }, + { + "epoch": 2.34, + "learning_rate": 2.8776645275821756e-05, + "loss": 0.8223, + "step": 14694 + }, + { + "epoch": 2.34, + "learning_rate": 2.8774094965980164e-05, + "loss": 0.7527, + "step": 14695 + }, + { + "epoch": 2.34, + "learning_rate": 2.877154461594675e-05, + "loss": 0.7257, + "step": 14696 + }, + { + "epoch": 2.34, + "learning_rate": 2.8768994225748675e-05, + "loss": 0.7134, + "step": 14697 + }, + { + "epoch": 2.34, + "learning_rate": 2.876644379541309e-05, + "loss": 0.7585, + "step": 14698 + }, + { + "epoch": 2.34, + "learning_rate": 2.8763893324967177e-05, + "loss": 0.7441, + "step": 14699 + }, + { + "epoch": 2.34, + "learning_rate": 2.8761342814438086e-05, + "loss": 0.8129, + "step": 14700 + }, + { + "epoch": 2.34, + "learning_rate": 2.8758792263852967e-05, + "loss": 0.7643, + "step": 14701 + }, + { + "epoch": 2.34, + "learning_rate": 2.8756241673238998e-05, + "loss": 0.7394, + "step": 14702 + }, + { + "epoch": 2.34, + "learning_rate": 2.8753691042623336e-05, + "loss": 0.6779, + "step": 14703 + }, + { + "epoch": 2.34, + "learning_rate": 2.8751140372033143e-05, + "loss": 0.8431, + "step": 14704 + }, + { + "epoch": 2.34, + "learning_rate": 2.874858966149559e-05, + "loss": 0.7873, + "step": 14705 + }, + { + "epoch": 2.34, + "learning_rate": 2.8746038911037826e-05, + "loss": 0.7814, + "step": 14706 + }, + { + "epoch": 2.34, + "learning_rate": 2.8743488120687017e-05, + "loss": 0.7411, + "step": 14707 + }, + { + "epoch": 2.34, + "learning_rate": 2.8740937290470345e-05, + "loss": 0.7987, + "step": 14708 + }, + { + "epoch": 2.34, + "learning_rate": 2.8738386420414955e-05, + "loss": 0.7434, + "step": 14709 + }, + { + "epoch": 2.34, + "learning_rate": 2.8735835510548014e-05, + "loss": 0.6985, + "step": 14710 + }, + { + "epoch": 2.34, + "learning_rate": 2.8733284560896694e-05, + "loss": 0.6541, + "step": 14711 + }, + { + "epoch": 2.34, + "learning_rate": 2.8730733571488165e-05, + "loss": 0.707, + "step": 14712 + }, + { + "epoch": 2.34, + "learning_rate": 2.8728182542349587e-05, + "loss": 0.7084, + "step": 14713 + }, + { + "epoch": 2.34, + "learning_rate": 2.872563147350813e-05, + "loss": 0.7031, + "step": 14714 + }, + { + "epoch": 2.34, + "learning_rate": 2.8723080364990957e-05, + "loss": 0.6886, + "step": 14715 + }, + { + "epoch": 2.34, + "learning_rate": 2.8720529216825242e-05, + "loss": 0.6857, + "step": 14716 + }, + { + "epoch": 2.34, + "learning_rate": 2.8717978029038144e-05, + "loss": 0.7218, + "step": 14717 + }, + { + "epoch": 2.34, + "learning_rate": 2.871542680165684e-05, + "loss": 0.6313, + "step": 14718 + }, + { + "epoch": 2.34, + "learning_rate": 2.8712875534708493e-05, + "loss": 0.6532, + "step": 14719 + }, + { + "epoch": 2.35, + "learning_rate": 2.8710324228220286e-05, + "loss": 0.7263, + "step": 14720 + }, + { + "epoch": 2.35, + "learning_rate": 2.8707772882219363e-05, + "loss": 0.7465, + "step": 14721 + }, + { + "epoch": 2.35, + "learning_rate": 2.8705221496732926e-05, + "loss": 0.7343, + "step": 14722 + }, + { + "epoch": 2.35, + "learning_rate": 2.870267007178812e-05, + "loss": 0.7564, + "step": 14723 + }, + { + "epoch": 2.35, + "learning_rate": 2.870011860741213e-05, + "loss": 0.7867, + "step": 14724 + }, + { + "epoch": 2.35, + "learning_rate": 2.8697567103632117e-05, + "loss": 0.76, + "step": 14725 + }, + { + "epoch": 2.35, + "learning_rate": 2.869501556047527e-05, + "loss": 0.7637, + "step": 14726 + }, + { + "epoch": 2.35, + "learning_rate": 2.8692463977968743e-05, + "loss": 0.7382, + "step": 14727 + }, + { + "epoch": 2.35, + "learning_rate": 2.8689912356139713e-05, + "loss": 0.7736, + "step": 14728 + }, + { + "epoch": 2.35, + "learning_rate": 2.8687360695015365e-05, + "loss": 0.7275, + "step": 14729 + }, + { + "epoch": 2.35, + "learning_rate": 2.8684808994622853e-05, + "loss": 0.8114, + "step": 14730 + }, + { + "epoch": 2.35, + "learning_rate": 2.8682257254989376e-05, + "loss": 0.7562, + "step": 14731 + }, + { + "epoch": 2.35, + "learning_rate": 2.8679705476142088e-05, + "loss": 0.6707, + "step": 14732 + }, + { + "epoch": 2.35, + "learning_rate": 2.8677153658108163e-05, + "loss": 0.7056, + "step": 14733 + }, + { + "epoch": 2.35, + "learning_rate": 2.8674601800914796e-05, + "loss": 0.7382, + "step": 14734 + }, + { + "epoch": 2.35, + "learning_rate": 2.8672049904589148e-05, + "loss": 0.7104, + "step": 14735 + }, + { + "epoch": 2.35, + "learning_rate": 2.8669497969158397e-05, + "loss": 0.692, + "step": 14736 + }, + { + "epoch": 2.35, + "learning_rate": 2.8666945994649717e-05, + "loss": 0.7039, + "step": 14737 + }, + { + "epoch": 2.35, + "learning_rate": 2.8664393981090298e-05, + "loss": 0.714, + "step": 14738 + }, + { + "epoch": 2.35, + "learning_rate": 2.8661841928507294e-05, + "loss": 0.6503, + "step": 14739 + }, + { + "epoch": 2.35, + "learning_rate": 2.8659289836927904e-05, + "loss": 0.8206, + "step": 14740 + }, + { + "epoch": 2.35, + "learning_rate": 2.86567377063793e-05, + "loss": 0.7475, + "step": 14741 + }, + { + "epoch": 2.35, + "learning_rate": 2.8654185536888655e-05, + "loss": 0.6484, + "step": 14742 + }, + { + "epoch": 2.35, + "learning_rate": 2.8651633328483157e-05, + "loss": 0.701, + "step": 14743 + }, + { + "epoch": 2.35, + "learning_rate": 2.8649081081189983e-05, + "loss": 0.7694, + "step": 14744 + }, + { + "epoch": 2.35, + "learning_rate": 2.8646528795036303e-05, + "loss": 0.8538, + "step": 14745 + }, + { + "epoch": 2.35, + "learning_rate": 2.8643976470049316e-05, + "loss": 0.8041, + "step": 14746 + }, + { + "epoch": 2.35, + "learning_rate": 2.8641424106256182e-05, + "loss": 0.7595, + "step": 14747 + }, + { + "epoch": 2.35, + "learning_rate": 2.8638871703684095e-05, + "loss": 0.7644, + "step": 14748 + }, + { + "epoch": 2.35, + "learning_rate": 2.863631926236024e-05, + "loss": 0.7892, + "step": 14749 + }, + { + "epoch": 2.35, + "learning_rate": 2.8633766782311777e-05, + "loss": 0.7612, + "step": 14750 + }, + { + "epoch": 2.35, + "learning_rate": 2.863121426356592e-05, + "loss": 0.7518, + "step": 14751 + }, + { + "epoch": 2.35, + "learning_rate": 2.862866170614983e-05, + "loss": 0.6897, + "step": 14752 + }, + { + "epoch": 2.35, + "learning_rate": 2.8626109110090686e-05, + "loss": 0.7078, + "step": 14753 + }, + { + "epoch": 2.35, + "learning_rate": 2.8623556475415698e-05, + "loss": 0.798, + "step": 14754 + }, + { + "epoch": 2.35, + "learning_rate": 2.8621003802152023e-05, + "loss": 0.7584, + "step": 14755 + }, + { + "epoch": 2.35, + "learning_rate": 2.8618451090326858e-05, + "loss": 0.689, + "step": 14756 + }, + { + "epoch": 2.35, + "learning_rate": 2.861589833996739e-05, + "loss": 0.6771, + "step": 14757 + }, + { + "epoch": 2.35, + "learning_rate": 2.86133455511008e-05, + "loss": 0.6572, + "step": 14758 + }, + { + "epoch": 2.35, + "learning_rate": 2.8610792723754266e-05, + "loss": 0.7161, + "step": 14759 + }, + { + "epoch": 2.35, + "learning_rate": 2.8608239857954988e-05, + "loss": 0.7403, + "step": 14760 + }, + { + "epoch": 2.35, + "learning_rate": 2.860568695373014e-05, + "loss": 0.6853, + "step": 14761 + }, + { + "epoch": 2.35, + "learning_rate": 2.860313401110692e-05, + "loss": 0.6781, + "step": 14762 + }, + { + "epoch": 2.35, + "learning_rate": 2.8600581030112512e-05, + "loss": 0.6888, + "step": 14763 + }, + { + "epoch": 2.35, + "learning_rate": 2.8598028010774096e-05, + "loss": 0.658, + "step": 14764 + }, + { + "epoch": 2.35, + "learning_rate": 2.8595474953118867e-05, + "loss": 0.7358, + "step": 14765 + }, + { + "epoch": 2.35, + "learning_rate": 2.859292185717402e-05, + "loss": 0.7371, + "step": 14766 + }, + { + "epoch": 2.35, + "learning_rate": 2.8590368722966733e-05, + "loss": 0.766, + "step": 14767 + }, + { + "epoch": 2.35, + "learning_rate": 2.8587815550524194e-05, + "loss": 0.7523, + "step": 14768 + }, + { + "epoch": 2.35, + "learning_rate": 2.85852623398736e-05, + "loss": 0.6827, + "step": 14769 + }, + { + "epoch": 2.35, + "learning_rate": 2.8582709091042138e-05, + "loss": 0.7383, + "step": 14770 + }, + { + "epoch": 2.35, + "learning_rate": 2.8580155804057e-05, + "loss": 0.7394, + "step": 14771 + }, + { + "epoch": 2.35, + "learning_rate": 2.8577602478945382e-05, + "loss": 0.7144, + "step": 14772 + }, + { + "epoch": 2.35, + "learning_rate": 2.857504911573446e-05, + "loss": 0.7048, + "step": 14773 + }, + { + "epoch": 2.35, + "learning_rate": 2.857249571445144e-05, + "loss": 0.7146, + "step": 14774 + }, + { + "epoch": 2.35, + "learning_rate": 2.8569942275123512e-05, + "loss": 0.7098, + "step": 14775 + }, + { + "epoch": 2.35, + "learning_rate": 2.8567388797777867e-05, + "loss": 0.6573, + "step": 14776 + }, + { + "epoch": 2.35, + "learning_rate": 2.8564835282441693e-05, + "loss": 0.7131, + "step": 14777 + }, + { + "epoch": 2.35, + "learning_rate": 2.8562281729142194e-05, + "loss": 0.717, + "step": 14778 + }, + { + "epoch": 2.35, + "learning_rate": 2.8559728137906542e-05, + "loss": 0.7986, + "step": 14779 + }, + { + "epoch": 2.35, + "learning_rate": 2.8557174508761958e-05, + "loss": 0.7427, + "step": 14780 + }, + { + "epoch": 2.35, + "learning_rate": 2.8554620841735624e-05, + "loss": 0.7354, + "step": 14781 + }, + { + "epoch": 2.35, + "learning_rate": 2.855206713685473e-05, + "loss": 0.7424, + "step": 14782 + }, + { + "epoch": 2.36, + "learning_rate": 2.8549513394146488e-05, + "loss": 0.7026, + "step": 14783 + }, + { + "epoch": 2.36, + "learning_rate": 2.854695961363808e-05, + "loss": 0.6913, + "step": 14784 + }, + { + "epoch": 2.36, + "learning_rate": 2.8544405795356698e-05, + "loss": 0.7415, + "step": 14785 + }, + { + "epoch": 2.36, + "learning_rate": 2.854185193932956e-05, + "loss": 0.6552, + "step": 14786 + }, + { + "epoch": 2.36, + "learning_rate": 2.853929804558384e-05, + "loss": 0.7012, + "step": 14787 + }, + { + "epoch": 2.36, + "learning_rate": 2.8536744114146747e-05, + "loss": 0.7323, + "step": 14788 + }, + { + "epoch": 2.36, + "learning_rate": 2.8534190145045476e-05, + "loss": 0.6536, + "step": 14789 + }, + { + "epoch": 2.36, + "learning_rate": 2.8531636138307227e-05, + "loss": 0.753, + "step": 14790 + }, + { + "epoch": 2.36, + "learning_rate": 2.8529082093959185e-05, + "loss": 0.8196, + "step": 14791 + }, + { + "epoch": 2.36, + "learning_rate": 2.8526528012028575e-05, + "loss": 0.7179, + "step": 14792 + }, + { + "epoch": 2.36, + "learning_rate": 2.8523973892542584e-05, + "loss": 0.7952, + "step": 14793 + }, + { + "epoch": 2.36, + "learning_rate": 2.8521419735528398e-05, + "loss": 0.7655, + "step": 14794 + }, + { + "epoch": 2.36, + "learning_rate": 2.851886554101324e-05, + "loss": 0.7507, + "step": 14795 + }, + { + "epoch": 2.36, + "learning_rate": 2.8516311309024297e-05, + "loss": 0.7201, + "step": 14796 + }, + { + "epoch": 2.36, + "learning_rate": 2.8513757039588772e-05, + "loss": 0.7933, + "step": 14797 + }, + { + "epoch": 2.36, + "learning_rate": 2.851120273273388e-05, + "loss": 0.7311, + "step": 14798 + }, + { + "epoch": 2.36, + "learning_rate": 2.850864838848679e-05, + "loss": 0.7775, + "step": 14799 + }, + { + "epoch": 2.36, + "learning_rate": 2.8506094006874738e-05, + "loss": 0.73, + "step": 14800 + }, + { + "epoch": 2.36, + "learning_rate": 2.850353958792491e-05, + "loss": 0.8224, + "step": 14801 + }, + { + "epoch": 2.36, + "learning_rate": 2.8500985131664508e-05, + "loss": 0.7857, + "step": 14802 + }, + { + "epoch": 2.36, + "learning_rate": 2.8498430638120748e-05, + "loss": 0.7611, + "step": 14803 + }, + { + "epoch": 2.36, + "learning_rate": 2.8495876107320823e-05, + "loss": 0.6839, + "step": 14804 + }, + { + "epoch": 2.36, + "learning_rate": 2.8493321539291933e-05, + "loss": 0.8858, + "step": 14805 + }, + { + "epoch": 2.36, + "learning_rate": 2.849076693406131e-05, + "loss": 0.7572, + "step": 14806 + }, + { + "epoch": 2.36, + "learning_rate": 2.848821229165612e-05, + "loss": 0.6803, + "step": 14807 + }, + { + "epoch": 2.36, + "learning_rate": 2.848565761210359e-05, + "loss": 0.8031, + "step": 14808 + }, + { + "epoch": 2.36, + "learning_rate": 2.8483102895430923e-05, + "loss": 0.7309, + "step": 14809 + }, + { + "epoch": 2.36, + "learning_rate": 2.8480548141665326e-05, + "loss": 0.7653, + "step": 14810 + }, + { + "epoch": 2.36, + "learning_rate": 2.8477993350833998e-05, + "loss": 0.7122, + "step": 14811 + }, + { + "epoch": 2.36, + "learning_rate": 2.8475438522964158e-05, + "loss": 0.7533, + "step": 14812 + }, + { + "epoch": 2.36, + "learning_rate": 2.847288365808301e-05, + "loss": 0.7083, + "step": 14813 + }, + { + "epoch": 2.36, + "learning_rate": 2.8470328756217758e-05, + "loss": 0.725, + "step": 14814 + }, + { + "epoch": 2.36, + "learning_rate": 2.8467773817395608e-05, + "loss": 0.7713, + "step": 14815 + }, + { + "epoch": 2.36, + "learning_rate": 2.846521884164378e-05, + "loss": 0.7555, + "step": 14816 + }, + { + "epoch": 2.36, + "learning_rate": 2.846266382898947e-05, + "loss": 0.7509, + "step": 14817 + }, + { + "epoch": 2.36, + "learning_rate": 2.8460108779459892e-05, + "loss": 0.7716, + "step": 14818 + }, + { + "epoch": 2.36, + "learning_rate": 2.8457553693082258e-05, + "loss": 0.766, + "step": 14819 + }, + { + "epoch": 2.36, + "learning_rate": 2.8454998569883773e-05, + "loss": 0.717, + "step": 14820 + }, + { + "epoch": 2.36, + "learning_rate": 2.8452443409891654e-05, + "loss": 0.7143, + "step": 14821 + }, + { + "epoch": 2.36, + "learning_rate": 2.84498882131331e-05, + "loss": 0.6767, + "step": 14822 + }, + { + "epoch": 2.36, + "learning_rate": 2.8447332979635344e-05, + "loss": 0.7854, + "step": 14823 + }, + { + "epoch": 2.36, + "learning_rate": 2.844477770942558e-05, + "loss": 0.7449, + "step": 14824 + }, + { + "epoch": 2.36, + "learning_rate": 2.8442222402531023e-05, + "loss": 0.7965, + "step": 14825 + }, + { + "epoch": 2.36, + "learning_rate": 2.8439667058978892e-05, + "loss": 0.7767, + "step": 14826 + }, + { + "epoch": 2.36, + "learning_rate": 2.8437111678796398e-05, + "loss": 0.7161, + "step": 14827 + }, + { + "epoch": 2.36, + "learning_rate": 2.8434556262010737e-05, + "loss": 0.7324, + "step": 14828 + }, + { + "epoch": 2.36, + "learning_rate": 2.8432000808649146e-05, + "loss": 0.7657, + "step": 14829 + }, + { + "epoch": 2.36, + "learning_rate": 2.8429445318738836e-05, + "loss": 0.6657, + "step": 14830 + }, + { + "epoch": 2.36, + "learning_rate": 2.8426889792307e-05, + "loss": 0.6687, + "step": 14831 + }, + { + "epoch": 2.36, + "learning_rate": 2.8424334229380877e-05, + "loss": 0.7509, + "step": 14832 + }, + { + "epoch": 2.36, + "learning_rate": 2.8421778629987672e-05, + "loss": 0.7002, + "step": 14833 + }, + { + "epoch": 2.36, + "learning_rate": 2.8419222994154605e-05, + "loss": 0.884, + "step": 14834 + }, + { + "epoch": 2.36, + "learning_rate": 2.8416667321908886e-05, + "loss": 0.7076, + "step": 14835 + }, + { + "epoch": 2.36, + "learning_rate": 2.8414111613277732e-05, + "loss": 0.7459, + "step": 14836 + }, + { + "epoch": 2.36, + "learning_rate": 2.841155586828837e-05, + "loss": 0.7291, + "step": 14837 + }, + { + "epoch": 2.36, + "learning_rate": 2.840900008696801e-05, + "loss": 0.6987, + "step": 14838 + }, + { + "epoch": 2.36, + "learning_rate": 2.8406444269343856e-05, + "loss": 0.7642, + "step": 14839 + }, + { + "epoch": 2.36, + "learning_rate": 2.8403888415443147e-05, + "loss": 0.7358, + "step": 14840 + }, + { + "epoch": 2.36, + "learning_rate": 2.840133252529309e-05, + "loss": 0.8105, + "step": 14841 + }, + { + "epoch": 2.36, + "learning_rate": 2.839877659892091e-05, + "loss": 0.7047, + "step": 14842 + }, + { + "epoch": 2.36, + "learning_rate": 2.839622063635382e-05, + "loss": 0.6798, + "step": 14843 + }, + { + "epoch": 2.36, + "learning_rate": 2.8393664637619044e-05, + "loss": 0.6931, + "step": 14844 + }, + { + "epoch": 2.36, + "learning_rate": 2.83911086027438e-05, + "loss": 0.7836, + "step": 14845 + }, + { + "epoch": 2.37, + "learning_rate": 2.8388552531755304e-05, + "loss": 0.782, + "step": 14846 + }, + { + "epoch": 2.37, + "learning_rate": 2.838599642468079e-05, + "loss": 0.7497, + "step": 14847 + }, + { + "epoch": 2.37, + "learning_rate": 2.8383440281547456e-05, + "loss": 0.6903, + "step": 14848 + }, + { + "epoch": 2.37, + "learning_rate": 2.838088410238255e-05, + "loss": 0.6905, + "step": 14849 + }, + { + "epoch": 2.37, + "learning_rate": 2.837832788721328e-05, + "loss": 0.728, + "step": 14850 + }, + { + "epoch": 2.37, + "learning_rate": 2.837577163606686e-05, + "loss": 0.687, + "step": 14851 + }, + { + "epoch": 2.37, + "learning_rate": 2.837321534897053e-05, + "loss": 0.6431, + "step": 14852 + }, + { + "epoch": 2.37, + "learning_rate": 2.8370659025951498e-05, + "loss": 0.7686, + "step": 14853 + }, + { + "epoch": 2.37, + "learning_rate": 2.8368102667036995e-05, + "loss": 0.7456, + "step": 14854 + }, + { + "epoch": 2.37, + "learning_rate": 2.8365546272254245e-05, + "loss": 0.7563, + "step": 14855 + }, + { + "epoch": 2.37, + "learning_rate": 2.8362989841630472e-05, + "loss": 0.8067, + "step": 14856 + }, + { + "epoch": 2.37, + "learning_rate": 2.8360433375192896e-05, + "loss": 0.7292, + "step": 14857 + }, + { + "epoch": 2.37, + "learning_rate": 2.8357876872968753e-05, + "loss": 0.7085, + "step": 14858 + }, + { + "epoch": 2.37, + "learning_rate": 2.835532033498525e-05, + "loss": 0.8244, + "step": 14859 + }, + { + "epoch": 2.37, + "learning_rate": 2.8352763761269624e-05, + "loss": 0.6694, + "step": 14860 + }, + { + "epoch": 2.37, + "learning_rate": 2.83502071518491e-05, + "loss": 0.7542, + "step": 14861 + }, + { + "epoch": 2.37, + "learning_rate": 2.834765050675091e-05, + "loss": 0.7041, + "step": 14862 + }, + { + "epoch": 2.37, + "learning_rate": 2.8345093826002263e-05, + "loss": 0.7496, + "step": 14863 + }, + { + "epoch": 2.37, + "learning_rate": 2.8342537109630407e-05, + "loss": 0.7713, + "step": 14864 + }, + { + "epoch": 2.37, + "learning_rate": 2.8339980357662554e-05, + "loss": 0.6703, + "step": 14865 + }, + { + "epoch": 2.37, + "learning_rate": 2.8337423570125944e-05, + "loss": 0.7313, + "step": 14866 + }, + { + "epoch": 2.37, + "learning_rate": 2.83348667470478e-05, + "loss": 0.7268, + "step": 14867 + }, + { + "epoch": 2.37, + "learning_rate": 2.8332309888455343e-05, + "loss": 0.7067, + "step": 14868 + }, + { + "epoch": 2.37, + "learning_rate": 2.8329752994375812e-05, + "loss": 0.7031, + "step": 14869 + }, + { + "epoch": 2.37, + "learning_rate": 2.8327196064836436e-05, + "loss": 0.7109, + "step": 14870 + }, + { + "epoch": 2.37, + "learning_rate": 2.8324639099864432e-05, + "loss": 0.7007, + "step": 14871 + }, + { + "epoch": 2.37, + "learning_rate": 2.8322082099487047e-05, + "loss": 0.7096, + "step": 14872 + }, + { + "epoch": 2.37, + "learning_rate": 2.8319525063731505e-05, + "loss": 0.686, + "step": 14873 + }, + { + "epoch": 2.37, + "learning_rate": 2.8316967992625032e-05, + "loss": 0.6948, + "step": 14874 + }, + { + "epoch": 2.37, + "learning_rate": 2.831441088619487e-05, + "loss": 0.7865, + "step": 14875 + }, + { + "epoch": 2.37, + "learning_rate": 2.8311853744468242e-05, + "loss": 0.7734, + "step": 14876 + }, + { + "epoch": 2.37, + "learning_rate": 2.830929656747239e-05, + "loss": 0.7106, + "step": 14877 + }, + { + "epoch": 2.37, + "learning_rate": 2.830673935523453e-05, + "loss": 0.7687, + "step": 14878 + }, + { + "epoch": 2.37, + "learning_rate": 2.83041821077819e-05, + "loss": 0.6989, + "step": 14879 + }, + { + "epoch": 2.37, + "learning_rate": 2.8301624825141736e-05, + "loss": 0.7247, + "step": 14880 + }, + { + "epoch": 2.37, + "learning_rate": 2.829906750734128e-05, + "loss": 0.6744, + "step": 14881 + }, + { + "epoch": 2.37, + "learning_rate": 2.8296510154407757e-05, + "loss": 0.7259, + "step": 14882 + }, + { + "epoch": 2.37, + "learning_rate": 2.8293952766368396e-05, + "loss": 0.7295, + "step": 14883 + }, + { + "epoch": 2.37, + "learning_rate": 2.8291395343250444e-05, + "loss": 0.7199, + "step": 14884 + }, + { + "epoch": 2.37, + "learning_rate": 2.8288837885081132e-05, + "loss": 0.6886, + "step": 14885 + }, + { + "epoch": 2.37, + "learning_rate": 2.8286280391887683e-05, + "loss": 0.7495, + "step": 14886 + }, + { + "epoch": 2.37, + "learning_rate": 2.8283722863697353e-05, + "loss": 0.6831, + "step": 14887 + }, + { + "epoch": 2.37, + "learning_rate": 2.828116530053736e-05, + "loss": 0.7633, + "step": 14888 + }, + { + "epoch": 2.37, + "learning_rate": 2.8278607702434956e-05, + "loss": 0.662, + "step": 14889 + }, + { + "epoch": 2.37, + "learning_rate": 2.827605006941737e-05, + "loss": 0.8197, + "step": 14890 + }, + { + "epoch": 2.37, + "learning_rate": 2.827349240151183e-05, + "loss": 0.6531, + "step": 14891 + }, + { + "epoch": 2.37, + "learning_rate": 2.8270934698745593e-05, + "loss": 0.6729, + "step": 14892 + }, + { + "epoch": 2.37, + "learning_rate": 2.8268376961145886e-05, + "loss": 0.7149, + "step": 14893 + }, + { + "epoch": 2.37, + "learning_rate": 2.826581918873994e-05, + "loss": 0.7239, + "step": 14894 + }, + { + "epoch": 2.37, + "learning_rate": 2.826326138155501e-05, + "loss": 0.7738, + "step": 14895 + }, + { + "epoch": 2.37, + "learning_rate": 2.826070353961833e-05, + "loss": 0.6795, + "step": 14896 + }, + { + "epoch": 2.37, + "learning_rate": 2.8258145662957136e-05, + "loss": 0.802, + "step": 14897 + }, + { + "epoch": 2.37, + "learning_rate": 2.8255587751598665e-05, + "loss": 0.6705, + "step": 14898 + }, + { + "epoch": 2.37, + "learning_rate": 2.8253029805570164e-05, + "loss": 0.7222, + "step": 14899 + }, + { + "epoch": 2.37, + "learning_rate": 2.8250471824898866e-05, + "loss": 0.8017, + "step": 14900 + }, + { + "epoch": 2.37, + "learning_rate": 2.824791380961202e-05, + "loss": 0.7304, + "step": 14901 + }, + { + "epoch": 2.37, + "learning_rate": 2.824535575973687e-05, + "loss": 0.7111, + "step": 14902 + }, + { + "epoch": 2.37, + "learning_rate": 2.8242797675300638e-05, + "loss": 0.772, + "step": 14903 + }, + { + "epoch": 2.37, + "learning_rate": 2.824023955633059e-05, + "loss": 0.7458, + "step": 14904 + }, + { + "epoch": 2.37, + "learning_rate": 2.8237681402853956e-05, + "loss": 0.7575, + "step": 14905 + }, + { + "epoch": 2.37, + "learning_rate": 2.8235123214897974e-05, + "loss": 0.745, + "step": 14906 + }, + { + "epoch": 2.37, + "learning_rate": 2.8232564992489903e-05, + "loss": 0.7122, + "step": 14907 + }, + { + "epoch": 2.38, + "learning_rate": 2.8230006735656973e-05, + "loss": 0.7974, + "step": 14908 + }, + { + "epoch": 2.38, + "learning_rate": 2.822744844442643e-05, + "loss": 0.7756, + "step": 14909 + }, + { + "epoch": 2.38, + "learning_rate": 2.8224890118825526e-05, + "loss": 0.7711, + "step": 14910 + }, + { + "epoch": 2.38, + "learning_rate": 2.8222331758881494e-05, + "loss": 0.678, + "step": 14911 + }, + { + "epoch": 2.38, + "learning_rate": 2.8219773364621588e-05, + "loss": 0.7292, + "step": 14912 + }, + { + "epoch": 2.38, + "learning_rate": 2.821721493607305e-05, + "loss": 0.7155, + "step": 14913 + }, + { + "epoch": 2.38, + "learning_rate": 2.8214656473263128e-05, + "loss": 0.7847, + "step": 14914 + }, + { + "epoch": 2.38, + "learning_rate": 2.821209797621906e-05, + "loss": 0.693, + "step": 14915 + }, + { + "epoch": 2.38, + "learning_rate": 2.8209539444968103e-05, + "loss": 0.7295, + "step": 14916 + }, + { + "epoch": 2.38, + "learning_rate": 2.8206980879537503e-05, + "loss": 0.7526, + "step": 14917 + }, + { + "epoch": 2.38, + "learning_rate": 2.82044222799545e-05, + "loss": 0.7266, + "step": 14918 + }, + { + "epoch": 2.38, + "learning_rate": 2.8201863646246353e-05, + "loss": 0.7676, + "step": 14919 + }, + { + "epoch": 2.38, + "learning_rate": 2.8199304978440287e-05, + "loss": 0.7802, + "step": 14920 + }, + { + "epoch": 2.38, + "learning_rate": 2.8196746276563575e-05, + "loss": 0.7607, + "step": 14921 + }, + { + "epoch": 2.38, + "learning_rate": 2.8194187540643457e-05, + "loss": 0.6928, + "step": 14922 + }, + { + "epoch": 2.38, + "learning_rate": 2.8191628770707174e-05, + "loss": 0.7882, + "step": 14923 + }, + { + "epoch": 2.38, + "learning_rate": 2.8189069966781982e-05, + "loss": 0.7396, + "step": 14924 + }, + { + "epoch": 2.38, + "learning_rate": 2.8186511128895144e-05, + "loss": 0.7459, + "step": 14925 + }, + { + "epoch": 2.38, + "learning_rate": 2.818395225707388e-05, + "loss": 0.7414, + "step": 14926 + }, + { + "epoch": 2.38, + "learning_rate": 2.818139335134547e-05, + "loss": 0.6844, + "step": 14927 + }, + { + "epoch": 2.38, + "learning_rate": 2.8178834411737154e-05, + "loss": 0.7205, + "step": 14928 + }, + { + "epoch": 2.38, + "learning_rate": 2.8176275438276167e-05, + "loss": 0.693, + "step": 14929 + }, + { + "epoch": 2.38, + "learning_rate": 2.817371643098979e-05, + "loss": 0.7773, + "step": 14930 + }, + { + "epoch": 2.38, + "learning_rate": 2.8171157389905252e-05, + "loss": 0.7306, + "step": 14931 + }, + { + "epoch": 2.38, + "learning_rate": 2.816859831504981e-05, + "loss": 0.7799, + "step": 14932 + }, + { + "epoch": 2.38, + "learning_rate": 2.816603920645073e-05, + "loss": 0.7124, + "step": 14933 + }, + { + "epoch": 2.38, + "learning_rate": 2.8163480064135245e-05, + "loss": 0.734, + "step": 14934 + }, + { + "epoch": 2.38, + "learning_rate": 2.816092088813062e-05, + "loss": 0.7675, + "step": 14935 + }, + { + "epoch": 2.38, + "learning_rate": 2.815836167846411e-05, + "loss": 0.7753, + "step": 14936 + }, + { + "epoch": 2.38, + "learning_rate": 2.8155802435162964e-05, + "loss": 0.7101, + "step": 14937 + }, + { + "epoch": 2.38, + "learning_rate": 2.815324315825444e-05, + "loss": 0.6994, + "step": 14938 + }, + { + "epoch": 2.38, + "learning_rate": 2.815068384776579e-05, + "loss": 0.7629, + "step": 14939 + }, + { + "epoch": 2.38, + "learning_rate": 2.814812450372426e-05, + "loss": 0.7, + "step": 14940 + }, + { + "epoch": 2.38, + "learning_rate": 2.8145565126157126e-05, + "loss": 0.7647, + "step": 14941 + }, + { + "epoch": 2.38, + "learning_rate": 2.814300571509163e-05, + "loss": 0.7934, + "step": 14942 + }, + { + "epoch": 2.38, + "learning_rate": 2.814044627055503e-05, + "loss": 0.7756, + "step": 14943 + }, + { + "epoch": 2.38, + "learning_rate": 2.8137886792574586e-05, + "loss": 0.8021, + "step": 14944 + }, + { + "epoch": 2.38, + "learning_rate": 2.813532728117756e-05, + "loss": 0.7871, + "step": 14945 + }, + { + "epoch": 2.38, + "learning_rate": 2.8132767736391185e-05, + "loss": 0.6931, + "step": 14946 + }, + { + "epoch": 2.38, + "learning_rate": 2.813020815824275e-05, + "loss": 0.7178, + "step": 14947 + }, + { + "epoch": 2.38, + "learning_rate": 2.81276485467595e-05, + "loss": 0.6698, + "step": 14948 + }, + { + "epoch": 2.38, + "learning_rate": 2.8125088901968682e-05, + "loss": 0.7389, + "step": 14949 + }, + { + "epoch": 2.38, + "learning_rate": 2.812252922389757e-05, + "loss": 0.7145, + "step": 14950 + }, + { + "epoch": 2.38, + "learning_rate": 2.8119969512573413e-05, + "loss": 0.6939, + "step": 14951 + }, + { + "epoch": 2.38, + "learning_rate": 2.8117409768023473e-05, + "loss": 0.7013, + "step": 14952 + }, + { + "epoch": 2.38, + "learning_rate": 2.8114849990275016e-05, + "loss": 0.7445, + "step": 14953 + }, + { + "epoch": 2.38, + "learning_rate": 2.81122901793553e-05, + "loss": 0.755, + "step": 14954 + }, + { + "epoch": 2.38, + "learning_rate": 2.810973033529158e-05, + "loss": 0.711, + "step": 14955 + }, + { + "epoch": 2.38, + "learning_rate": 2.810717045811112e-05, + "loss": 0.7113, + "step": 14956 + }, + { + "epoch": 2.38, + "learning_rate": 2.810461054784118e-05, + "loss": 0.7297, + "step": 14957 + }, + { + "epoch": 2.38, + "learning_rate": 2.8102050604509027e-05, + "loss": 0.7197, + "step": 14958 + }, + { + "epoch": 2.38, + "learning_rate": 2.8099490628141917e-05, + "loss": 0.7246, + "step": 14959 + }, + { + "epoch": 2.38, + "learning_rate": 2.809693061876711e-05, + "loss": 0.793, + "step": 14960 + }, + { + "epoch": 2.38, + "learning_rate": 2.8094370576411878e-05, + "loss": 0.7731, + "step": 14961 + }, + { + "epoch": 2.38, + "learning_rate": 2.809181050110347e-05, + "loss": 0.756, + "step": 14962 + }, + { + "epoch": 2.38, + "learning_rate": 2.8089250392869167e-05, + "loss": 0.7038, + "step": 14963 + }, + { + "epoch": 2.38, + "learning_rate": 2.8086690251736213e-05, + "loss": 0.785, + "step": 14964 + }, + { + "epoch": 2.38, + "learning_rate": 2.8084130077731885e-05, + "loss": 0.7103, + "step": 14965 + }, + { + "epoch": 2.38, + "learning_rate": 2.8081569870883444e-05, + "loss": 0.7495, + "step": 14966 + }, + { + "epoch": 2.38, + "learning_rate": 2.8079009631218156e-05, + "loss": 0.7177, + "step": 14967 + }, + { + "epoch": 2.38, + "learning_rate": 2.807644935876329e-05, + "loss": 0.6899, + "step": 14968 + }, + { + "epoch": 2.38, + "learning_rate": 2.8073889053546098e-05, + "loss": 0.8489, + "step": 14969 + }, + { + "epoch": 2.38, + "learning_rate": 2.8071328715593857e-05, + "loss": 0.7685, + "step": 14970 + }, + { + "epoch": 2.39, + "learning_rate": 2.8068768344933833e-05, + "loss": 0.7499, + "step": 14971 + }, + { + "epoch": 2.39, + "learning_rate": 2.806620794159328e-05, + "loss": 0.6862, + "step": 14972 + }, + { + "epoch": 2.39, + "learning_rate": 2.8063647505599477e-05, + "loss": 0.7155, + "step": 14973 + }, + { + "epoch": 2.39, + "learning_rate": 2.8061087036979692e-05, + "loss": 0.7194, + "step": 14974 + }, + { + "epoch": 2.39, + "learning_rate": 2.8058526535761187e-05, + "loss": 0.722, + "step": 14975 + }, + { + "epoch": 2.39, + "learning_rate": 2.805596600197123e-05, + "loss": 0.7441, + "step": 14976 + }, + { + "epoch": 2.39, + "learning_rate": 2.8053405435637093e-05, + "loss": 0.7549, + "step": 14977 + }, + { + "epoch": 2.39, + "learning_rate": 2.8050844836786044e-05, + "loss": 0.7555, + "step": 14978 + }, + { + "epoch": 2.39, + "learning_rate": 2.8048284205445348e-05, + "loss": 0.7228, + "step": 14979 + }, + { + "epoch": 2.39, + "learning_rate": 2.8045723541642277e-05, + "loss": 0.706, + "step": 14980 + }, + { + "epoch": 2.39, + "learning_rate": 2.804316284540409e-05, + "loss": 0.7067, + "step": 14981 + }, + { + "epoch": 2.39, + "learning_rate": 2.8040602116758073e-05, + "loss": 0.7684, + "step": 14982 + }, + { + "epoch": 2.39, + "learning_rate": 2.8038041355731494e-05, + "loss": 0.7132, + "step": 14983 + }, + { + "epoch": 2.39, + "learning_rate": 2.8035480562351606e-05, + "loss": 0.7021, + "step": 14984 + }, + { + "epoch": 2.39, + "learning_rate": 2.8032919736645702e-05, + "loss": 0.8669, + "step": 14985 + }, + { + "epoch": 2.39, + "learning_rate": 2.8030358878641046e-05, + "loss": 0.7594, + "step": 14986 + }, + { + "epoch": 2.39, + "learning_rate": 2.8027797988364902e-05, + "loss": 0.7585, + "step": 14987 + }, + { + "epoch": 2.39, + "learning_rate": 2.8025237065844555e-05, + "loss": 0.6932, + "step": 14988 + }, + { + "epoch": 2.39, + "learning_rate": 2.8022676111107266e-05, + "loss": 0.683, + "step": 14989 + }, + { + "epoch": 2.39, + "learning_rate": 2.8020115124180306e-05, + "loss": 0.6874, + "step": 14990 + }, + { + "epoch": 2.39, + "learning_rate": 2.8017554105090966e-05, + "loss": 0.6847, + "step": 14991 + }, + { + "epoch": 2.39, + "learning_rate": 2.8014993053866494e-05, + "loss": 0.7916, + "step": 14992 + }, + { + "epoch": 2.39, + "learning_rate": 2.801243197053418e-05, + "loss": 0.828, + "step": 14993 + }, + { + "epoch": 2.39, + "learning_rate": 2.80098708551213e-05, + "loss": 0.73, + "step": 14994 + }, + { + "epoch": 2.39, + "learning_rate": 2.8007309707655116e-05, + "loss": 0.6976, + "step": 14995 + }, + { + "epoch": 2.39, + "learning_rate": 2.800474852816291e-05, + "loss": 0.7064, + "step": 14996 + }, + { + "epoch": 2.39, + "learning_rate": 2.8002187316671963e-05, + "loss": 0.7293, + "step": 14997 + }, + { + "epoch": 2.39, + "learning_rate": 2.799962607320954e-05, + "loss": 0.7765, + "step": 14998 + }, + { + "epoch": 2.39, + "learning_rate": 2.799706479780292e-05, + "loss": 0.691, + "step": 14999 + }, + { + "epoch": 2.39, + "learning_rate": 2.7994503490479384e-05, + "loss": 0.735, + "step": 15000 + }, + { + "epoch": 2.39, + "learning_rate": 2.7991942151266193e-05, + "loss": 0.7142, + "step": 15001 + }, + { + "epoch": 2.39, + "learning_rate": 2.7989380780190645e-05, + "loss": 0.8554, + "step": 15002 + }, + { + "epoch": 2.39, + "learning_rate": 2.7986819377279997e-05, + "loss": 0.7117, + "step": 15003 + }, + { + "epoch": 2.39, + "learning_rate": 2.798425794256154e-05, + "loss": 0.7625, + "step": 15004 + }, + { + "epoch": 2.39, + "learning_rate": 2.798169647606255e-05, + "loss": 0.6991, + "step": 15005 + }, + { + "epoch": 2.39, + "learning_rate": 2.7979134977810305e-05, + "loss": 0.7323, + "step": 15006 + }, + { + "epoch": 2.39, + "learning_rate": 2.797657344783207e-05, + "loss": 0.8013, + "step": 15007 + }, + { + "epoch": 2.39, + "learning_rate": 2.7974011886155156e-05, + "loss": 0.6724, + "step": 15008 + }, + { + "epoch": 2.39, + "learning_rate": 2.79714502928068e-05, + "loss": 0.7371, + "step": 15009 + }, + { + "epoch": 2.39, + "learning_rate": 2.7968888667814314e-05, + "loss": 0.7194, + "step": 15010 + }, + { + "epoch": 2.39, + "learning_rate": 2.7966327011204963e-05, + "loss": 0.7067, + "step": 15011 + }, + { + "epoch": 2.39, + "learning_rate": 2.7963765323006025e-05, + "loss": 0.6881, + "step": 15012 + }, + { + "epoch": 2.39, + "learning_rate": 2.796120360324479e-05, + "loss": 0.9065, + "step": 15013 + }, + { + "epoch": 2.39, + "learning_rate": 2.7958641851948536e-05, + "loss": 0.6744, + "step": 15014 + }, + { + "epoch": 2.39, + "learning_rate": 2.795608006914454e-05, + "loss": 0.7479, + "step": 15015 + }, + { + "epoch": 2.39, + "learning_rate": 2.7953518254860083e-05, + "loss": 0.7476, + "step": 15016 + }, + { + "epoch": 2.39, + "learning_rate": 2.795095640912246e-05, + "loss": 0.7013, + "step": 15017 + }, + { + "epoch": 2.39, + "learning_rate": 2.794839453195894e-05, + "loss": 0.7626, + "step": 15018 + }, + { + "epoch": 2.39, + "learning_rate": 2.79458326233968e-05, + "loss": 0.6761, + "step": 15019 + }, + { + "epoch": 2.39, + "learning_rate": 2.794327068346334e-05, + "loss": 0.7236, + "step": 15020 + }, + { + "epoch": 2.39, + "learning_rate": 2.7940708712185827e-05, + "loss": 0.6968, + "step": 15021 + }, + { + "epoch": 2.39, + "learning_rate": 2.7938146709591557e-05, + "loss": 0.6681, + "step": 15022 + }, + { + "epoch": 2.39, + "learning_rate": 2.7935584675707805e-05, + "loss": 0.683, + "step": 15023 + }, + { + "epoch": 2.39, + "learning_rate": 2.7933022610561855e-05, + "loss": 0.7253, + "step": 15024 + }, + { + "epoch": 2.39, + "learning_rate": 2.7930460514181e-05, + "loss": 0.7818, + "step": 15025 + }, + { + "epoch": 2.39, + "learning_rate": 2.792789838659252e-05, + "loss": 0.8084, + "step": 15026 + }, + { + "epoch": 2.39, + "learning_rate": 2.7925336227823694e-05, + "loss": 0.7714, + "step": 15027 + }, + { + "epoch": 2.39, + "learning_rate": 2.7922774037901826e-05, + "loss": 0.7279, + "step": 15028 + }, + { + "epoch": 2.39, + "learning_rate": 2.792021181685418e-05, + "loss": 0.7268, + "step": 15029 + }, + { + "epoch": 2.39, + "learning_rate": 2.791764956470805e-05, + "loss": 0.7049, + "step": 15030 + }, + { + "epoch": 2.39, + "learning_rate": 2.7915087281490727e-05, + "loss": 0.7587, + "step": 15031 + }, + { + "epoch": 2.39, + "learning_rate": 2.791252496722949e-05, + "loss": 0.717, + "step": 15032 + }, + { + "epoch": 2.39, + "learning_rate": 2.790996262195163e-05, + "loss": 0.6816, + "step": 15033 + }, + { + "epoch": 2.4, + "learning_rate": 2.790740024568444e-05, + "loss": 0.7219, + "step": 15034 + }, + { + "epoch": 2.4, + "learning_rate": 2.79048378384552e-05, + "loss": 0.7203, + "step": 15035 + }, + { + "epoch": 2.4, + "learning_rate": 2.79022754002912e-05, + "loss": 0.7182, + "step": 15036 + }, + { + "epoch": 2.4, + "learning_rate": 2.789971293121973e-05, + "loss": 0.7285, + "step": 15037 + }, + { + "epoch": 2.4, + "learning_rate": 2.7897150431268076e-05, + "loss": 0.6847, + "step": 15038 + }, + { + "epoch": 2.4, + "learning_rate": 2.789458790046353e-05, + "loss": 0.6784, + "step": 15039 + }, + { + "epoch": 2.4, + "learning_rate": 2.7892025338833384e-05, + "loss": 0.7115, + "step": 15040 + }, + { + "epoch": 2.4, + "learning_rate": 2.7889462746404915e-05, + "loss": 0.683, + "step": 15041 + }, + { + "epoch": 2.4, + "learning_rate": 2.7886900123205434e-05, + "loss": 0.7217, + "step": 15042 + }, + { + "epoch": 2.4, + "learning_rate": 2.788433746926221e-05, + "loss": 0.7998, + "step": 15043 + }, + { + "epoch": 2.4, + "learning_rate": 2.7881774784602543e-05, + "loss": 0.7419, + "step": 15044 + }, + { + "epoch": 2.4, + "learning_rate": 2.787921206925373e-05, + "loss": 0.679, + "step": 15045 + }, + { + "epoch": 2.4, + "learning_rate": 2.7876649323243054e-05, + "loss": 0.7679, + "step": 15046 + }, + { + "epoch": 2.4, + "learning_rate": 2.7874086546597804e-05, + "loss": 0.6944, + "step": 15047 + }, + { + "epoch": 2.4, + "learning_rate": 2.7871523739345283e-05, + "loss": 0.6782, + "step": 15048 + }, + { + "epoch": 2.4, + "learning_rate": 2.7868960901512786e-05, + "loss": 0.7628, + "step": 15049 + }, + { + "epoch": 2.4, + "learning_rate": 2.7866398033127583e-05, + "loss": 0.7143, + "step": 15050 + }, + { + "epoch": 2.4, + "learning_rate": 2.7863835134216987e-05, + "loss": 0.7705, + "step": 15051 + }, + { + "epoch": 2.4, + "learning_rate": 2.7861272204808288e-05, + "loss": 0.7114, + "step": 15052 + }, + { + "epoch": 2.4, + "learning_rate": 2.7858709244928765e-05, + "loss": 0.7121, + "step": 15053 + }, + { + "epoch": 2.4, + "learning_rate": 2.7856146254605732e-05, + "loss": 0.7061, + "step": 15054 + }, + { + "epoch": 2.4, + "learning_rate": 2.7853583233866475e-05, + "loss": 0.6645, + "step": 15055 + }, + { + "epoch": 2.4, + "learning_rate": 2.785102018273829e-05, + "loss": 0.793, + "step": 15056 + }, + { + "epoch": 2.4, + "learning_rate": 2.784845710124847e-05, + "loss": 0.7487, + "step": 15057 + }, + { + "epoch": 2.4, + "learning_rate": 2.7845893989424317e-05, + "loss": 0.6436, + "step": 15058 + }, + { + "epoch": 2.4, + "learning_rate": 2.7843330847293115e-05, + "loss": 0.7346, + "step": 15059 + }, + { + "epoch": 2.4, + "learning_rate": 2.7840767674882167e-05, + "loss": 0.7729, + "step": 15060 + }, + { + "epoch": 2.4, + "learning_rate": 2.7838204472218766e-05, + "loss": 0.7385, + "step": 15061 + }, + { + "epoch": 2.4, + "learning_rate": 2.7835641239330213e-05, + "loss": 0.6197, + "step": 15062 + }, + { + "epoch": 2.4, + "learning_rate": 2.7833077976243798e-05, + "loss": 0.7099, + "step": 15063 + }, + { + "epoch": 2.4, + "learning_rate": 2.7830514682986825e-05, + "loss": 0.7437, + "step": 15064 + }, + { + "epoch": 2.4, + "learning_rate": 2.7827951359586592e-05, + "loss": 0.6154, + "step": 15065 + }, + { + "epoch": 2.4, + "learning_rate": 2.782538800607039e-05, + "loss": 0.7301, + "step": 15066 + }, + { + "epoch": 2.4, + "learning_rate": 2.7822824622465525e-05, + "loss": 0.7664, + "step": 15067 + }, + { + "epoch": 2.4, + "learning_rate": 2.7820261208799285e-05, + "loss": 0.7424, + "step": 15068 + }, + { + "epoch": 2.4, + "learning_rate": 2.7817697765098988e-05, + "loss": 0.7248, + "step": 15069 + }, + { + "epoch": 2.4, + "learning_rate": 2.7815134291391908e-05, + "loss": 0.7315, + "step": 15070 + }, + { + "epoch": 2.4, + "learning_rate": 2.781257078770536e-05, + "loss": 0.6968, + "step": 15071 + }, + { + "epoch": 2.4, + "learning_rate": 2.7810007254066644e-05, + "loss": 0.6962, + "step": 15072 + }, + { + "epoch": 2.4, + "learning_rate": 2.7807443690503056e-05, + "loss": 0.7908, + "step": 15073 + }, + { + "epoch": 2.4, + "learning_rate": 2.7804880097041892e-05, + "loss": 0.7943, + "step": 15074 + }, + { + "epoch": 2.4, + "learning_rate": 2.7802316473710467e-05, + "loss": 0.7227, + "step": 15075 + }, + { + "epoch": 2.4, + "learning_rate": 2.7799752820536062e-05, + "loss": 0.6906, + "step": 15076 + }, + { + "epoch": 2.4, + "learning_rate": 2.7797189137545993e-05, + "loss": 0.753, + "step": 15077 + }, + { + "epoch": 2.4, + "learning_rate": 2.7794625424767563e-05, + "loss": 0.7834, + "step": 15078 + }, + { + "epoch": 2.4, + "learning_rate": 2.7792061682228065e-05, + "loss": 0.641, + "step": 15079 + }, + { + "epoch": 2.4, + "learning_rate": 2.778949790995481e-05, + "loss": 0.712, + "step": 15080 + }, + { + "epoch": 2.4, + "learning_rate": 2.7786934107975094e-05, + "loss": 0.7205, + "step": 15081 + }, + { + "epoch": 2.4, + "learning_rate": 2.7784370276316214e-05, + "loss": 0.6737, + "step": 15082 + }, + { + "epoch": 2.4, + "learning_rate": 2.7781806415005485e-05, + "loss": 0.7218, + "step": 15083 + }, + { + "epoch": 2.4, + "learning_rate": 2.777924252407021e-05, + "loss": 0.7578, + "step": 15084 + }, + { + "epoch": 2.4, + "learning_rate": 2.777667860353768e-05, + "loss": 0.7744, + "step": 15085 + }, + { + "epoch": 2.4, + "learning_rate": 2.777411465343522e-05, + "loss": 0.777, + "step": 15086 + }, + { + "epoch": 2.4, + "learning_rate": 2.777155067379012e-05, + "loss": 0.7155, + "step": 15087 + }, + { + "epoch": 2.4, + "learning_rate": 2.776898666462968e-05, + "loss": 0.7715, + "step": 15088 + }, + { + "epoch": 2.4, + "learning_rate": 2.776642262598123e-05, + "loss": 0.7654, + "step": 15089 + }, + { + "epoch": 2.4, + "learning_rate": 2.7763858557872046e-05, + "loss": 0.7997, + "step": 15090 + }, + { + "epoch": 2.4, + "learning_rate": 2.7761294460329445e-05, + "loss": 0.7658, + "step": 15091 + }, + { + "epoch": 2.4, + "learning_rate": 2.7758730333380744e-05, + "loss": 0.8288, + "step": 15092 + }, + { + "epoch": 2.4, + "learning_rate": 2.7756166177053227e-05, + "loss": 0.7794, + "step": 15093 + }, + { + "epoch": 2.4, + "learning_rate": 2.7753601991374223e-05, + "loss": 0.719, + "step": 15094 + }, + { + "epoch": 2.4, + "learning_rate": 2.775103777637103e-05, + "loss": 0.7803, + "step": 15095 + }, + { + "epoch": 2.4, + "learning_rate": 2.7748473532070945e-05, + "loss": 0.674, + "step": 15096 + }, + { + "epoch": 2.41, + "learning_rate": 2.7745909258501295e-05, + "loss": 0.7583, + "step": 15097 + }, + { + "epoch": 2.41, + "learning_rate": 2.7743344955689378e-05, + "loss": 0.7349, + "step": 15098 + }, + { + "epoch": 2.41, + "learning_rate": 2.7740780623662504e-05, + "loss": 0.7778, + "step": 15099 + }, + { + "epoch": 2.41, + "learning_rate": 2.773821626244798e-05, + "loss": 0.6742, + "step": 15100 + }, + { + "epoch": 2.41, + "learning_rate": 2.773565187207311e-05, + "loss": 0.7853, + "step": 15101 + }, + { + "epoch": 2.41, + "learning_rate": 2.773308745256521e-05, + "loss": 0.8385, + "step": 15102 + }, + { + "epoch": 2.41, + "learning_rate": 2.7730523003951586e-05, + "loss": 0.7402, + "step": 15103 + }, + { + "epoch": 2.41, + "learning_rate": 2.7727958526259555e-05, + "loss": 0.7443, + "step": 15104 + }, + { + "epoch": 2.41, + "learning_rate": 2.7725394019516416e-05, + "loss": 0.6908, + "step": 15105 + }, + { + "epoch": 2.41, + "learning_rate": 2.7722829483749492e-05, + "loss": 0.7852, + "step": 15106 + }, + { + "epoch": 2.41, + "learning_rate": 2.7720264918986083e-05, + "loss": 0.724, + "step": 15107 + }, + { + "epoch": 2.41, + "learning_rate": 2.7717700325253503e-05, + "loss": 0.7938, + "step": 15108 + }, + { + "epoch": 2.41, + "learning_rate": 2.771513570257907e-05, + "loss": 0.765, + "step": 15109 + }, + { + "epoch": 2.41, + "learning_rate": 2.7712571050990083e-05, + "loss": 0.6647, + "step": 15110 + }, + { + "epoch": 2.41, + "learning_rate": 2.771000637051387e-05, + "loss": 0.7126, + "step": 15111 + }, + { + "epoch": 2.41, + "learning_rate": 2.770744166117773e-05, + "loss": 0.736, + "step": 15112 + }, + { + "epoch": 2.41, + "learning_rate": 2.770487692300897e-05, + "loss": 0.6873, + "step": 15113 + }, + { + "epoch": 2.41, + "learning_rate": 2.7702312156034928e-05, + "loss": 0.7701, + "step": 15114 + }, + { + "epoch": 2.41, + "learning_rate": 2.7699747360282896e-05, + "loss": 0.7726, + "step": 15115 + }, + { + "epoch": 2.41, + "learning_rate": 2.769718253578019e-05, + "loss": 0.7066, + "step": 15116 + }, + { + "epoch": 2.41, + "learning_rate": 2.7694617682554132e-05, + "loss": 0.7809, + "step": 15117 + }, + { + "epoch": 2.41, + "learning_rate": 2.7692052800632036e-05, + "loss": 0.7904, + "step": 15118 + }, + { + "epoch": 2.41, + "learning_rate": 2.7689487890041205e-05, + "loss": 0.7131, + "step": 15119 + }, + { + "epoch": 2.41, + "learning_rate": 2.7686922950808962e-05, + "loss": 0.7303, + "step": 15120 + }, + { + "epoch": 2.41, + "learning_rate": 2.7684357982962627e-05, + "loss": 0.7067, + "step": 15121 + }, + { + "epoch": 2.41, + "learning_rate": 2.7681792986529497e-05, + "loss": 0.711, + "step": 15122 + }, + { + "epoch": 2.41, + "learning_rate": 2.767922796153691e-05, + "loss": 0.7236, + "step": 15123 + }, + { + "epoch": 2.41, + "learning_rate": 2.767666290801217e-05, + "loss": 0.7136, + "step": 15124 + }, + { + "epoch": 2.41, + "learning_rate": 2.7674097825982588e-05, + "loss": 0.7432, + "step": 15125 + }, + { + "epoch": 2.41, + "learning_rate": 2.7671532715475495e-05, + "loss": 0.7908, + "step": 15126 + }, + { + "epoch": 2.41, + "learning_rate": 2.7668967576518202e-05, + "loss": 0.7097, + "step": 15127 + }, + { + "epoch": 2.41, + "learning_rate": 2.7666402409138014e-05, + "loss": 0.6756, + "step": 15128 + }, + { + "epoch": 2.41, + "learning_rate": 2.7663837213362276e-05, + "loss": 0.738, + "step": 15129 + }, + { + "epoch": 2.41, + "learning_rate": 2.7661271989218272e-05, + "loss": 0.7165, + "step": 15130 + }, + { + "epoch": 2.41, + "learning_rate": 2.7658706736733347e-05, + "loss": 0.7568, + "step": 15131 + }, + { + "epoch": 2.41, + "learning_rate": 2.765614145593481e-05, + "loss": 0.7526, + "step": 15132 + }, + { + "epoch": 2.41, + "learning_rate": 2.765357614684998e-05, + "loss": 0.7291, + "step": 15133 + }, + { + "epoch": 2.41, + "learning_rate": 2.7651010809506166e-05, + "loss": 0.7219, + "step": 15134 + }, + { + "epoch": 2.41, + "learning_rate": 2.7648445443930704e-05, + "loss": 0.7432, + "step": 15135 + }, + { + "epoch": 2.41, + "learning_rate": 2.76458800501509e-05, + "loss": 0.7021, + "step": 15136 + }, + { + "epoch": 2.41, + "learning_rate": 2.7643314628194083e-05, + "loss": 0.7359, + "step": 15137 + }, + { + "epoch": 2.41, + "learning_rate": 2.764074917808757e-05, + "loss": 0.7249, + "step": 15138 + }, + { + "epoch": 2.41, + "learning_rate": 2.763818369985868e-05, + "loss": 0.7041, + "step": 15139 + }, + { + "epoch": 2.41, + "learning_rate": 2.763561819353474e-05, + "loss": 0.7904, + "step": 15140 + }, + { + "epoch": 2.41, + "learning_rate": 2.7633052659143066e-05, + "loss": 0.7131, + "step": 15141 + }, + { + "epoch": 2.41, + "learning_rate": 2.7630487096710973e-05, + "loss": 0.7236, + "step": 15142 + }, + { + "epoch": 2.41, + "learning_rate": 2.7627921506265796e-05, + "loss": 0.7704, + "step": 15143 + }, + { + "epoch": 2.41, + "learning_rate": 2.7625355887834848e-05, + "loss": 0.7005, + "step": 15144 + }, + { + "epoch": 2.41, + "learning_rate": 2.762279024144545e-05, + "loss": 0.7382, + "step": 15145 + }, + { + "epoch": 2.41, + "learning_rate": 2.762022456712493e-05, + "loss": 0.7014, + "step": 15146 + }, + { + "epoch": 2.41, + "learning_rate": 2.7617658864900613e-05, + "loss": 0.7094, + "step": 15147 + }, + { + "epoch": 2.41, + "learning_rate": 2.761509313479981e-05, + "loss": 0.7115, + "step": 15148 + }, + { + "epoch": 2.41, + "learning_rate": 2.761252737684987e-05, + "loss": 0.7581, + "step": 15149 + }, + { + "epoch": 2.41, + "learning_rate": 2.760996159107809e-05, + "loss": 0.7363, + "step": 15150 + }, + { + "epoch": 2.41, + "learning_rate": 2.7607395777511795e-05, + "loss": 0.6789, + "step": 15151 + }, + { + "epoch": 2.41, + "learning_rate": 2.7604829936178326e-05, + "loss": 0.7157, + "step": 15152 + }, + { + "epoch": 2.41, + "learning_rate": 2.7602264067105006e-05, + "loss": 0.6629, + "step": 15153 + }, + { + "epoch": 2.41, + "learning_rate": 2.7599698170319143e-05, + "loss": 0.7195, + "step": 15154 + }, + { + "epoch": 2.41, + "learning_rate": 2.7597132245848072e-05, + "loss": 0.7716, + "step": 15155 + }, + { + "epoch": 2.41, + "learning_rate": 2.7594566293719126e-05, + "loss": 0.6855, + "step": 15156 + }, + { + "epoch": 2.41, + "learning_rate": 2.7592000313959622e-05, + "loss": 0.6975, + "step": 15157 + }, + { + "epoch": 2.41, + "learning_rate": 2.758943430659689e-05, + "loss": 0.7068, + "step": 15158 + }, + { + "epoch": 2.42, + "learning_rate": 2.758686827165825e-05, + "loss": 0.7371, + "step": 15159 + }, + { + "epoch": 2.42, + "learning_rate": 2.7584302209171037e-05, + "loss": 0.7702, + "step": 15160 + }, + { + "epoch": 2.42, + "learning_rate": 2.7581736119162576e-05, + "loss": 0.7269, + "step": 15161 + }, + { + "epoch": 2.42, + "learning_rate": 2.7579170001660188e-05, + "loss": 0.7043, + "step": 15162 + }, + { + "epoch": 2.42, + "learning_rate": 2.7576603856691208e-05, + "loss": 0.6733, + "step": 15163 + }, + { + "epoch": 2.42, + "learning_rate": 2.757403768428296e-05, + "loss": 0.6842, + "step": 15164 + }, + { + "epoch": 2.42, + "learning_rate": 2.7571471484462774e-05, + "loss": 0.7591, + "step": 15165 + }, + { + "epoch": 2.42, + "learning_rate": 2.7568905257257977e-05, + "loss": 0.7406, + "step": 15166 + }, + { + "epoch": 2.42, + "learning_rate": 2.75663390026959e-05, + "loss": 0.7497, + "step": 15167 + }, + { + "epoch": 2.42, + "learning_rate": 2.756377272080387e-05, + "loss": 0.6555, + "step": 15168 + }, + { + "epoch": 2.42, + "learning_rate": 2.756120641160922e-05, + "loss": 0.7382, + "step": 15169 + }, + { + "epoch": 2.42, + "learning_rate": 2.755864007513928e-05, + "loss": 0.7897, + "step": 15170 + }, + { + "epoch": 2.42, + "learning_rate": 2.7556073711421365e-05, + "loss": 0.7324, + "step": 15171 + }, + { + "epoch": 2.42, + "learning_rate": 2.7553507320482826e-05, + "loss": 0.7474, + "step": 15172 + }, + { + "epoch": 2.42, + "learning_rate": 2.755094090235098e-05, + "loss": 0.661, + "step": 15173 + }, + { + "epoch": 2.42, + "learning_rate": 2.7548374457053162e-05, + "loss": 0.6982, + "step": 15174 + }, + { + "epoch": 2.42, + "learning_rate": 2.75458079846167e-05, + "loss": 0.7358, + "step": 15175 + }, + { + "epoch": 2.42, + "learning_rate": 2.7543241485068938e-05, + "loss": 0.7858, + "step": 15176 + }, + { + "epoch": 2.42, + "learning_rate": 2.7540674958437185e-05, + "loss": 0.6982, + "step": 15177 + }, + { + "epoch": 2.42, + "learning_rate": 2.7538108404748797e-05, + "loss": 0.6917, + "step": 15178 + }, + { + "epoch": 2.42, + "learning_rate": 2.7535541824031096e-05, + "loss": 0.6967, + "step": 15179 + }, + { + "epoch": 2.42, + "learning_rate": 2.753297521631141e-05, + "loss": 0.7514, + "step": 15180 + }, + { + "epoch": 2.42, + "learning_rate": 2.753040858161708e-05, + "loss": 0.721, + "step": 15181 + }, + { + "epoch": 2.42, + "learning_rate": 2.7527841919975423e-05, + "loss": 0.7809, + "step": 15182 + }, + { + "epoch": 2.42, + "learning_rate": 2.7525275231413793e-05, + "loss": 0.7332, + "step": 15183 + }, + { + "epoch": 2.42, + "learning_rate": 2.7522708515959517e-05, + "loss": 0.747, + "step": 15184 + }, + { + "epoch": 2.42, + "learning_rate": 2.7520141773639917e-05, + "loss": 0.6651, + "step": 15185 + }, + { + "epoch": 2.42, + "learning_rate": 2.751757500448235e-05, + "loss": 0.7319, + "step": 15186 + }, + { + "epoch": 2.42, + "learning_rate": 2.7515008208514133e-05, + "loss": 0.7076, + "step": 15187 + }, + { + "epoch": 2.42, + "learning_rate": 2.7512441385762606e-05, + "loss": 0.7311, + "step": 15188 + }, + { + "epoch": 2.42, + "learning_rate": 2.7509874536255096e-05, + "loss": 0.7317, + "step": 15189 + }, + { + "epoch": 2.42, + "learning_rate": 2.750730766001896e-05, + "loss": 0.7167, + "step": 15190 + }, + { + "epoch": 2.42, + "learning_rate": 2.7504740757081508e-05, + "loss": 0.7149, + "step": 15191 + }, + { + "epoch": 2.42, + "learning_rate": 2.750217382747009e-05, + "loss": 0.7507, + "step": 15192 + }, + { + "epoch": 2.42, + "learning_rate": 2.7499606871212048e-05, + "loss": 0.7094, + "step": 15193 + }, + { + "epoch": 2.42, + "learning_rate": 2.74970398883347e-05, + "loss": 0.7744, + "step": 15194 + }, + { + "epoch": 2.42, + "learning_rate": 2.7494472878865397e-05, + "loss": 0.7677, + "step": 15195 + }, + { + "epoch": 2.42, + "learning_rate": 2.7491905842831468e-05, + "loss": 0.6967, + "step": 15196 + }, + { + "epoch": 2.42, + "learning_rate": 2.7489338780260254e-05, + "loss": 0.7929, + "step": 15197 + }, + { + "epoch": 2.42, + "learning_rate": 2.74867716911791e-05, + "loss": 0.7526, + "step": 15198 + }, + { + "epoch": 2.42, + "learning_rate": 2.7484204575615335e-05, + "loss": 0.7239, + "step": 15199 + }, + { + "epoch": 2.42, + "learning_rate": 2.7481637433596304e-05, + "loss": 0.6673, + "step": 15200 + }, + { + "epoch": 2.42, + "learning_rate": 2.7479070265149336e-05, + "loss": 0.7347, + "step": 15201 + }, + { + "epoch": 2.42, + "learning_rate": 2.7476503070301778e-05, + "loss": 0.7745, + "step": 15202 + }, + { + "epoch": 2.42, + "learning_rate": 2.7473935849080955e-05, + "loss": 0.6848, + "step": 15203 + }, + { + "epoch": 2.42, + "learning_rate": 2.747136860151423e-05, + "loss": 0.8203, + "step": 15204 + }, + { + "epoch": 2.42, + "learning_rate": 2.7468801327628925e-05, + "loss": 0.7242, + "step": 15205 + }, + { + "epoch": 2.42, + "learning_rate": 2.746623402745238e-05, + "loss": 0.6975, + "step": 15206 + }, + { + "epoch": 2.42, + "learning_rate": 2.7463666701011948e-05, + "loss": 0.705, + "step": 15207 + }, + { + "epoch": 2.42, + "learning_rate": 2.7461099348334955e-05, + "loss": 0.7457, + "step": 15208 + }, + { + "epoch": 2.42, + "learning_rate": 2.7458531969448754e-05, + "loss": 0.7827, + "step": 15209 + }, + { + "epoch": 2.42, + "learning_rate": 2.7455964564380683e-05, + "loss": 0.7955, + "step": 15210 + }, + { + "epoch": 2.42, + "learning_rate": 2.745339713315807e-05, + "loss": 0.6347, + "step": 15211 + }, + { + "epoch": 2.42, + "learning_rate": 2.745082967580827e-05, + "loss": 0.6831, + "step": 15212 + }, + { + "epoch": 2.42, + "learning_rate": 2.7448262192358626e-05, + "loss": 0.7111, + "step": 15213 + }, + { + "epoch": 2.42, + "learning_rate": 2.7445694682836472e-05, + "loss": 0.7557, + "step": 15214 + }, + { + "epoch": 2.42, + "learning_rate": 2.7443127147269155e-05, + "loss": 0.7483, + "step": 15215 + }, + { + "epoch": 2.42, + "learning_rate": 2.7440559585684022e-05, + "loss": 0.7581, + "step": 15216 + }, + { + "epoch": 2.42, + "learning_rate": 2.7437991998108405e-05, + "loss": 0.7354, + "step": 15217 + }, + { + "epoch": 2.42, + "learning_rate": 2.7435424384569652e-05, + "loss": 0.7153, + "step": 15218 + }, + { + "epoch": 2.42, + "learning_rate": 2.7432856745095113e-05, + "loss": 0.7105, + "step": 15219 + }, + { + "epoch": 2.42, + "learning_rate": 2.743028907971213e-05, + "loss": 0.6801, + "step": 15220 + }, + { + "epoch": 2.42, + "learning_rate": 2.7427721388448035e-05, + "loss": 0.5942, + "step": 15221 + }, + { + "epoch": 2.43, + "learning_rate": 2.742515367133019e-05, + "loss": 0.749, + "step": 15222 + }, + { + "epoch": 2.43, + "learning_rate": 2.7422585928385918e-05, + "loss": 0.6781, + "step": 15223 + }, + { + "epoch": 2.43, + "learning_rate": 2.742001815964259e-05, + "loss": 0.7816, + "step": 15224 + }, + { + "epoch": 2.43, + "learning_rate": 2.7417450365127534e-05, + "loss": 0.6582, + "step": 15225 + }, + { + "epoch": 2.43, + "learning_rate": 2.7414882544868094e-05, + "loss": 0.7284, + "step": 15226 + }, + { + "epoch": 2.43, + "learning_rate": 2.741231469889162e-05, + "loss": 0.7699, + "step": 15227 + }, + { + "epoch": 2.43, + "learning_rate": 2.740974682722547e-05, + "loss": 0.7087, + "step": 15228 + }, + { + "epoch": 2.43, + "learning_rate": 2.740717892989697e-05, + "loss": 0.7074, + "step": 15229 + }, + { + "epoch": 2.43, + "learning_rate": 2.7404611006933485e-05, + "loss": 0.7255, + "step": 15230 + }, + { + "epoch": 2.43, + "learning_rate": 2.7402043058362343e-05, + "loss": 0.7769, + "step": 15231 + }, + { + "epoch": 2.43, + "learning_rate": 2.73994750842109e-05, + "loss": 0.821, + "step": 15232 + }, + { + "epoch": 2.43, + "learning_rate": 2.7396907084506513e-05, + "loss": 0.7467, + "step": 15233 + }, + { + "epoch": 2.43, + "learning_rate": 2.7394339059276514e-05, + "loss": 0.7212, + "step": 15234 + }, + { + "epoch": 2.43, + "learning_rate": 2.739177100854826e-05, + "loss": 0.6519, + "step": 15235 + }, + { + "epoch": 2.43, + "learning_rate": 2.7389202932349095e-05, + "loss": 0.7387, + "step": 15236 + }, + { + "epoch": 2.43, + "learning_rate": 2.738663483070637e-05, + "loss": 0.722, + "step": 15237 + }, + { + "epoch": 2.43, + "learning_rate": 2.738406670364743e-05, + "loss": 0.7214, + "step": 15238 + }, + { + "epoch": 2.43, + "learning_rate": 2.7381498551199636e-05, + "loss": 0.7791, + "step": 15239 + }, + { + "epoch": 2.43, + "learning_rate": 2.7378930373390323e-05, + "loss": 0.7498, + "step": 15240 + }, + { + "epoch": 2.43, + "learning_rate": 2.737636217024685e-05, + "loss": 0.6569, + "step": 15241 + }, + { + "epoch": 2.43, + "learning_rate": 2.737379394179656e-05, + "loss": 0.7326, + "step": 15242 + }, + { + "epoch": 2.43, + "learning_rate": 2.73712256880668e-05, + "loss": 0.7323, + "step": 15243 + }, + { + "epoch": 2.43, + "learning_rate": 2.736865740908493e-05, + "loss": 0.6311, + "step": 15244 + }, + { + "epoch": 2.43, + "learning_rate": 2.7366089104878297e-05, + "loss": 0.7367, + "step": 15245 + }, + { + "epoch": 2.43, + "learning_rate": 2.736352077547425e-05, + "loss": 0.7349, + "step": 15246 + }, + { + "epoch": 2.43, + "learning_rate": 2.7360952420900145e-05, + "loss": 0.7463, + "step": 15247 + }, + { + "epoch": 2.43, + "learning_rate": 2.7358384041183326e-05, + "loss": 0.675, + "step": 15248 + }, + { + "epoch": 2.43, + "learning_rate": 2.7355815636351145e-05, + "loss": 0.6923, + "step": 15249 + }, + { + "epoch": 2.43, + "learning_rate": 2.7353247206430965e-05, + "loss": 0.6484, + "step": 15250 + }, + { + "epoch": 2.43, + "learning_rate": 2.7350678751450133e-05, + "loss": 0.7013, + "step": 15251 + }, + { + "epoch": 2.43, + "learning_rate": 2.7348110271436e-05, + "loss": 0.6506, + "step": 15252 + }, + { + "epoch": 2.43, + "learning_rate": 2.7345541766415915e-05, + "loss": 0.7367, + "step": 15253 + }, + { + "epoch": 2.43, + "learning_rate": 2.7342973236417235e-05, + "loss": 0.7671, + "step": 15254 + }, + { + "epoch": 2.43, + "learning_rate": 2.73404046814673e-05, + "loss": 0.7822, + "step": 15255 + }, + { + "epoch": 2.43, + "learning_rate": 2.7337836101593494e-05, + "loss": 0.7269, + "step": 15256 + }, + { + "epoch": 2.43, + "learning_rate": 2.7335267496823145e-05, + "loss": 0.7949, + "step": 15257 + }, + { + "epoch": 2.43, + "learning_rate": 2.7332698867183615e-05, + "loss": 0.7021, + "step": 15258 + }, + { + "epoch": 2.43, + "learning_rate": 2.7330130212702258e-05, + "loss": 0.6838, + "step": 15259 + }, + { + "epoch": 2.43, + "learning_rate": 2.732756153340643e-05, + "loss": 0.7083, + "step": 15260 + }, + { + "epoch": 2.43, + "learning_rate": 2.7324992829323486e-05, + "loss": 0.6781, + "step": 15261 + }, + { + "epoch": 2.43, + "learning_rate": 2.732242410048078e-05, + "loss": 0.7333, + "step": 15262 + }, + { + "epoch": 2.43, + "learning_rate": 2.7319855346905667e-05, + "loss": 0.7288, + "step": 15263 + }, + { + "epoch": 2.43, + "learning_rate": 2.73172865686255e-05, + "loss": 0.7469, + "step": 15264 + }, + { + "epoch": 2.43, + "learning_rate": 2.731471776566764e-05, + "loss": 0.7285, + "step": 15265 + }, + { + "epoch": 2.43, + "learning_rate": 2.7312148938059435e-05, + "loss": 0.6935, + "step": 15266 + }, + { + "epoch": 2.43, + "learning_rate": 2.730958008582826e-05, + "loss": 0.7125, + "step": 15267 + }, + { + "epoch": 2.43, + "learning_rate": 2.7307011209001448e-05, + "loss": 0.6426, + "step": 15268 + }, + { + "epoch": 2.43, + "learning_rate": 2.730444230760637e-05, + "loss": 0.7117, + "step": 15269 + }, + { + "epoch": 2.43, + "learning_rate": 2.7301873381670383e-05, + "loss": 0.7573, + "step": 15270 + }, + { + "epoch": 2.43, + "learning_rate": 2.729930443122085e-05, + "loss": 0.7576, + "step": 15271 + }, + { + "epoch": 2.43, + "learning_rate": 2.7296735456285107e-05, + "loss": 0.753, + "step": 15272 + }, + { + "epoch": 2.43, + "learning_rate": 2.729416645689053e-05, + "loss": 0.7787, + "step": 15273 + }, + { + "epoch": 2.43, + "learning_rate": 2.7291597433064476e-05, + "loss": 0.7176, + "step": 15274 + }, + { + "epoch": 2.43, + "learning_rate": 2.7289028384834297e-05, + "loss": 0.746, + "step": 15275 + }, + { + "epoch": 2.43, + "learning_rate": 2.7286459312227357e-05, + "loss": 0.6878, + "step": 15276 + }, + { + "epoch": 2.43, + "learning_rate": 2.7283890215271012e-05, + "loss": 0.7248, + "step": 15277 + }, + { + "epoch": 2.43, + "learning_rate": 2.7281321093992624e-05, + "loss": 0.6401, + "step": 15278 + }, + { + "epoch": 2.43, + "learning_rate": 2.7278751948419552e-05, + "loss": 0.7307, + "step": 15279 + }, + { + "epoch": 2.43, + "learning_rate": 2.7276182778579155e-05, + "loss": 0.7157, + "step": 15280 + }, + { + "epoch": 2.43, + "learning_rate": 2.7273613584498797e-05, + "loss": 0.6626, + "step": 15281 + }, + { + "epoch": 2.43, + "learning_rate": 2.7271044366205832e-05, + "loss": 0.7157, + "step": 15282 + }, + { + "epoch": 2.43, + "learning_rate": 2.7268475123727617e-05, + "loss": 0.7533, + "step": 15283 + }, + { + "epoch": 2.43, + "learning_rate": 2.7265905857091524e-05, + "loss": 0.7131, + "step": 15284 + }, + { + "epoch": 2.44, + "learning_rate": 2.726333656632491e-05, + "loss": 0.6914, + "step": 15285 + }, + { + "epoch": 2.44, + "learning_rate": 2.7260767251455132e-05, + "loss": 0.7092, + "step": 15286 + }, + { + "epoch": 2.44, + "learning_rate": 2.7258197912509557e-05, + "loss": 0.7331, + "step": 15287 + }, + { + "epoch": 2.44, + "learning_rate": 2.725562854951555e-05, + "loss": 0.7909, + "step": 15288 + }, + { + "epoch": 2.44, + "learning_rate": 2.725305916250046e-05, + "loss": 0.6872, + "step": 15289 + }, + { + "epoch": 2.44, + "learning_rate": 2.7250489751491665e-05, + "loss": 0.7067, + "step": 15290 + }, + { + "epoch": 2.44, + "learning_rate": 2.724792031651653e-05, + "loss": 0.7472, + "step": 15291 + }, + { + "epoch": 2.44, + "learning_rate": 2.7245350857602392e-05, + "loss": 0.7468, + "step": 15292 + }, + { + "epoch": 2.44, + "learning_rate": 2.7242781374776633e-05, + "loss": 0.7339, + "step": 15293 + }, + { + "epoch": 2.44, + "learning_rate": 2.724021186806662e-05, + "loss": 0.7092, + "step": 15294 + }, + { + "epoch": 2.44, + "learning_rate": 2.72376423374997e-05, + "loss": 0.767, + "step": 15295 + }, + { + "epoch": 2.44, + "learning_rate": 2.7235072783103254e-05, + "loss": 0.7014, + "step": 15296 + }, + { + "epoch": 2.44, + "learning_rate": 2.7232503204904648e-05, + "loss": 0.7407, + "step": 15297 + }, + { + "epoch": 2.44, + "learning_rate": 2.7229933602931223e-05, + "loss": 0.7008, + "step": 15298 + }, + { + "epoch": 2.44, + "learning_rate": 2.722736397721037e-05, + "loss": 0.7115, + "step": 15299 + }, + { + "epoch": 2.44, + "learning_rate": 2.722479432776944e-05, + "loss": 0.722, + "step": 15300 + }, + { + "epoch": 2.44, + "learning_rate": 2.7222224654635804e-05, + "loss": 0.6868, + "step": 15301 + }, + { + "epoch": 2.44, + "learning_rate": 2.721965495783682e-05, + "loss": 0.6671, + "step": 15302 + }, + { + "epoch": 2.44, + "learning_rate": 2.721708523739985e-05, + "loss": 0.6537, + "step": 15303 + }, + { + "epoch": 2.44, + "learning_rate": 2.7214515493352283e-05, + "loss": 0.7183, + "step": 15304 + }, + { + "epoch": 2.44, + "learning_rate": 2.7211945725721467e-05, + "loss": 0.6968, + "step": 15305 + }, + { + "epoch": 2.44, + "learning_rate": 2.720937593453477e-05, + "loss": 0.6641, + "step": 15306 + }, + { + "epoch": 2.44, + "learning_rate": 2.7206806119819554e-05, + "loss": 0.6717, + "step": 15307 + }, + { + "epoch": 2.44, + "learning_rate": 2.7204236281603203e-05, + "loss": 0.687, + "step": 15308 + }, + { + "epoch": 2.44, + "learning_rate": 2.7201666419913075e-05, + "loss": 0.6885, + "step": 15309 + }, + { + "epoch": 2.44, + "learning_rate": 2.7199096534776526e-05, + "loss": 0.7504, + "step": 15310 + }, + { + "epoch": 2.44, + "learning_rate": 2.719652662622095e-05, + "loss": 0.7009, + "step": 15311 + }, + { + "epoch": 2.44, + "learning_rate": 2.719395669427368e-05, + "loss": 0.8064, + "step": 15312 + }, + { + "epoch": 2.44, + "learning_rate": 2.719138673896211e-05, + "loss": 0.8382, + "step": 15313 + }, + { + "epoch": 2.44, + "learning_rate": 2.7188816760313606e-05, + "loss": 0.6941, + "step": 15314 + }, + { + "epoch": 2.44, + "learning_rate": 2.7186246758355526e-05, + "loss": 0.8, + "step": 15315 + }, + { + "epoch": 2.44, + "learning_rate": 2.718367673311525e-05, + "loss": 0.764, + "step": 15316 + }, + { + "epoch": 2.44, + "learning_rate": 2.7181106684620146e-05, + "loss": 0.7148, + "step": 15317 + }, + { + "epoch": 2.44, + "learning_rate": 2.717853661289757e-05, + "loss": 0.7371, + "step": 15318 + }, + { + "epoch": 2.44, + "learning_rate": 2.7175966517974903e-05, + "loss": 0.7247, + "step": 15319 + }, + { + "epoch": 2.44, + "learning_rate": 2.7173396399879523e-05, + "loss": 0.7529, + "step": 15320 + }, + { + "epoch": 2.44, + "learning_rate": 2.717082625863879e-05, + "loss": 0.7231, + "step": 15321 + }, + { + "epoch": 2.44, + "learning_rate": 2.716825609428007e-05, + "loss": 0.76, + "step": 15322 + }, + { + "epoch": 2.44, + "learning_rate": 2.716568590683074e-05, + "loss": 0.7043, + "step": 15323 + }, + { + "epoch": 2.44, + "learning_rate": 2.7163115696318164e-05, + "loss": 0.6556, + "step": 15324 + }, + { + "epoch": 2.44, + "learning_rate": 2.7160545462769727e-05, + "loss": 0.7365, + "step": 15325 + }, + { + "epoch": 2.44, + "learning_rate": 2.7157975206212795e-05, + "loss": 0.7335, + "step": 15326 + }, + { + "epoch": 2.44, + "learning_rate": 2.7155404926674727e-05, + "loss": 0.7252, + "step": 15327 + }, + { + "epoch": 2.44, + "learning_rate": 2.715283462418291e-05, + "loss": 0.7399, + "step": 15328 + }, + { + "epoch": 2.44, + "learning_rate": 2.7150264298764717e-05, + "loss": 0.7118, + "step": 15329 + }, + { + "epoch": 2.44, + "learning_rate": 2.7147693950447507e-05, + "loss": 0.7384, + "step": 15330 + }, + { + "epoch": 2.44, + "learning_rate": 2.7145123579258668e-05, + "loss": 0.7723, + "step": 15331 + }, + { + "epoch": 2.44, + "learning_rate": 2.7142553185225555e-05, + "loss": 0.6837, + "step": 15332 + }, + { + "epoch": 2.44, + "learning_rate": 2.713998276837556e-05, + "loss": 0.7826, + "step": 15333 + }, + { + "epoch": 2.44, + "learning_rate": 2.7137412328736046e-05, + "loss": 0.6893, + "step": 15334 + }, + { + "epoch": 2.44, + "learning_rate": 2.713484186633438e-05, + "loss": 0.8206, + "step": 15335 + }, + { + "epoch": 2.44, + "learning_rate": 2.713227138119795e-05, + "loss": 0.7491, + "step": 15336 + }, + { + "epoch": 2.44, + "learning_rate": 2.712970087335413e-05, + "loss": 0.7128, + "step": 15337 + }, + { + "epoch": 2.44, + "learning_rate": 2.7127130342830275e-05, + "loss": 0.6968, + "step": 15338 + }, + { + "epoch": 2.44, + "learning_rate": 2.7124559789653786e-05, + "loss": 0.7828, + "step": 15339 + }, + { + "epoch": 2.44, + "learning_rate": 2.7121989213852023e-05, + "loss": 0.6961, + "step": 15340 + }, + { + "epoch": 2.44, + "learning_rate": 2.7119418615452363e-05, + "loss": 0.706, + "step": 15341 + }, + { + "epoch": 2.44, + "learning_rate": 2.7116847994482182e-05, + "loss": 0.7108, + "step": 15342 + }, + { + "epoch": 2.44, + "learning_rate": 2.7114277350968853e-05, + "loss": 0.7605, + "step": 15343 + }, + { + "epoch": 2.44, + "learning_rate": 2.711170668493975e-05, + "loss": 0.7782, + "step": 15344 + }, + { + "epoch": 2.44, + "learning_rate": 2.7109135996422258e-05, + "loss": 0.7284, + "step": 15345 + }, + { + "epoch": 2.44, + "learning_rate": 2.7106565285443747e-05, + "loss": 0.783, + "step": 15346 + }, + { + "epoch": 2.44, + "learning_rate": 2.7103994552031592e-05, + "loss": 0.7515, + "step": 15347 + }, + { + "epoch": 2.45, + "learning_rate": 2.7101423796213177e-05, + "loss": 0.7355, + "step": 15348 + }, + { + "epoch": 2.45, + "learning_rate": 2.709885301801587e-05, + "loss": 0.7687, + "step": 15349 + }, + { + "epoch": 2.45, + "learning_rate": 2.709628221746705e-05, + "loss": 0.7268, + "step": 15350 + }, + { + "epoch": 2.45, + "learning_rate": 2.709371139459411e-05, + "loss": 0.6505, + "step": 15351 + }, + { + "epoch": 2.45, + "learning_rate": 2.7091140549424403e-05, + "loss": 0.6979, + "step": 15352 + }, + { + "epoch": 2.45, + "learning_rate": 2.708856968198532e-05, + "loss": 0.7389, + "step": 15353 + }, + { + "epoch": 2.45, + "learning_rate": 2.7085998792304247e-05, + "loss": 0.7546, + "step": 15354 + }, + { + "epoch": 2.45, + "learning_rate": 2.7083427880408542e-05, + "loss": 0.694, + "step": 15355 + }, + { + "epoch": 2.45, + "learning_rate": 2.70808569463256e-05, + "loss": 0.728, + "step": 15356 + }, + { + "epoch": 2.45, + "learning_rate": 2.7078285990082796e-05, + "loss": 0.7626, + "step": 15357 + }, + { + "epoch": 2.45, + "learning_rate": 2.7075715011707503e-05, + "loss": 0.7721, + "step": 15358 + }, + { + "epoch": 2.45, + "learning_rate": 2.7073144011227107e-05, + "loss": 0.7595, + "step": 15359 + }, + { + "epoch": 2.45, + "learning_rate": 2.707057298866899e-05, + "loss": 0.7674, + "step": 15360 + }, + { + "epoch": 2.45, + "learning_rate": 2.706800194406053e-05, + "loss": 0.7397, + "step": 15361 + }, + { + "epoch": 2.45, + "learning_rate": 2.70654308774291e-05, + "loss": 0.626, + "step": 15362 + }, + { + "epoch": 2.45, + "learning_rate": 2.706285978880208e-05, + "loss": 0.7584, + "step": 15363 + }, + { + "epoch": 2.45, + "learning_rate": 2.7060288678206856e-05, + "loss": 0.6923, + "step": 15364 + }, + { + "epoch": 2.45, + "learning_rate": 2.705771754567082e-05, + "loss": 0.7181, + "step": 15365 + }, + { + "epoch": 2.45, + "learning_rate": 2.705514639122133e-05, + "loss": 0.7339, + "step": 15366 + }, + { + "epoch": 2.45, + "learning_rate": 2.7052575214885785e-05, + "loss": 0.7036, + "step": 15367 + }, + { + "epoch": 2.45, + "learning_rate": 2.7050004016691554e-05, + "loss": 0.7231, + "step": 15368 + }, + { + "epoch": 2.45, + "learning_rate": 2.7047432796666033e-05, + "loss": 0.6415, + "step": 15369 + }, + { + "epoch": 2.45, + "learning_rate": 2.7044861554836588e-05, + "loss": 0.7616, + "step": 15370 + }, + { + "epoch": 2.45, + "learning_rate": 2.7042290291230615e-05, + "loss": 0.7013, + "step": 15371 + }, + { + "epoch": 2.45, + "learning_rate": 2.7039719005875498e-05, + "loss": 0.6978, + "step": 15372 + }, + { + "epoch": 2.45, + "learning_rate": 2.7037147698798594e-05, + "loss": 0.6526, + "step": 15373 + }, + { + "epoch": 2.45, + "learning_rate": 2.7034576370027313e-05, + "loss": 0.7345, + "step": 15374 + }, + { + "epoch": 2.45, + "learning_rate": 2.703200501958903e-05, + "loss": 0.6963, + "step": 15375 + }, + { + "epoch": 2.45, + "learning_rate": 2.702943364751112e-05, + "loss": 0.7701, + "step": 15376 + }, + { + "epoch": 2.45, + "learning_rate": 2.7026862253820978e-05, + "loss": 0.6848, + "step": 15377 + }, + { + "epoch": 2.45, + "learning_rate": 2.7024290838545986e-05, + "loss": 0.6849, + "step": 15378 + }, + { + "epoch": 2.45, + "learning_rate": 2.7021719401713518e-05, + "loss": 0.7428, + "step": 15379 + }, + { + "epoch": 2.45, + "learning_rate": 2.7019147943350977e-05, + "loss": 0.7964, + "step": 15380 + }, + { + "epoch": 2.45, + "learning_rate": 2.701657646348573e-05, + "loss": 0.7078, + "step": 15381 + }, + { + "epoch": 2.45, + "learning_rate": 2.7014004962145172e-05, + "loss": 0.6673, + "step": 15382 + }, + { + "epoch": 2.45, + "learning_rate": 2.701143343935668e-05, + "loss": 0.7452, + "step": 15383 + }, + { + "epoch": 2.45, + "learning_rate": 2.7008861895147642e-05, + "loss": 0.7191, + "step": 15384 + }, + { + "epoch": 2.45, + "learning_rate": 2.7006290329545448e-05, + "loss": 0.73, + "step": 15385 + }, + { + "epoch": 2.45, + "learning_rate": 2.7003718742577477e-05, + "loss": 0.7856, + "step": 15386 + }, + { + "epoch": 2.45, + "learning_rate": 2.7001147134271114e-05, + "loss": 0.6925, + "step": 15387 + }, + { + "epoch": 2.45, + "learning_rate": 2.6998575504653757e-05, + "loss": 0.727, + "step": 15388 + }, + { + "epoch": 2.45, + "learning_rate": 2.6996003853752783e-05, + "loss": 0.6956, + "step": 15389 + }, + { + "epoch": 2.45, + "learning_rate": 2.6993432181595572e-05, + "loss": 0.7725, + "step": 15390 + }, + { + "epoch": 2.45, + "learning_rate": 2.6990860488209523e-05, + "loss": 0.7035, + "step": 15391 + }, + { + "epoch": 2.45, + "learning_rate": 2.698828877362203e-05, + "loss": 0.7959, + "step": 15392 + }, + { + "epoch": 2.45, + "learning_rate": 2.6985717037860453e-05, + "loss": 0.6848, + "step": 15393 + }, + { + "epoch": 2.45, + "learning_rate": 2.6983145280952205e-05, + "loss": 0.7798, + "step": 15394 + }, + { + "epoch": 2.45, + "learning_rate": 2.698057350292466e-05, + "loss": 0.6807, + "step": 15395 + }, + { + "epoch": 2.45, + "learning_rate": 2.6978001703805206e-05, + "loss": 0.7664, + "step": 15396 + }, + { + "epoch": 2.45, + "learning_rate": 2.697542988362124e-05, + "loss": 0.7572, + "step": 15397 + }, + { + "epoch": 2.45, + "learning_rate": 2.697285804240014e-05, + "loss": 0.7608, + "step": 15398 + }, + { + "epoch": 2.45, + "learning_rate": 2.6970286180169302e-05, + "loss": 0.7368, + "step": 15399 + }, + { + "epoch": 2.45, + "learning_rate": 2.6967714296956115e-05, + "loss": 0.7593, + "step": 15400 + }, + { + "epoch": 2.45, + "learning_rate": 2.6965142392787963e-05, + "loss": 0.7211, + "step": 15401 + }, + { + "epoch": 2.45, + "learning_rate": 2.696257046769224e-05, + "loss": 0.7319, + "step": 15402 + }, + { + "epoch": 2.45, + "learning_rate": 2.6959998521696327e-05, + "loss": 0.7477, + "step": 15403 + }, + { + "epoch": 2.45, + "learning_rate": 2.695742655482762e-05, + "loss": 0.7701, + "step": 15404 + }, + { + "epoch": 2.45, + "learning_rate": 2.6954854567113514e-05, + "loss": 0.7359, + "step": 15405 + }, + { + "epoch": 2.45, + "learning_rate": 2.6952282558581393e-05, + "loss": 0.6921, + "step": 15406 + }, + { + "epoch": 2.45, + "learning_rate": 2.694971052925864e-05, + "loss": 0.7708, + "step": 15407 + }, + { + "epoch": 2.45, + "learning_rate": 2.6947138479172666e-05, + "loss": 0.7186, + "step": 15408 + }, + { + "epoch": 2.45, + "learning_rate": 2.6944566408350847e-05, + "loss": 0.7841, + "step": 15409 + }, + { + "epoch": 2.45, + "learning_rate": 2.694199431682057e-05, + "loss": 0.6962, + "step": 15410 + }, + { + "epoch": 2.46, + "learning_rate": 2.6939422204609233e-05, + "loss": 0.6742, + "step": 15411 + }, + { + "epoch": 2.46, + "learning_rate": 2.6936850071744236e-05, + "loss": 0.7205, + "step": 15412 + }, + { + "epoch": 2.46, + "learning_rate": 2.6934277918252952e-05, + "loss": 0.637, + "step": 15413 + }, + { + "epoch": 2.46, + "learning_rate": 2.693170574416279e-05, + "loss": 0.743, + "step": 15414 + }, + { + "epoch": 2.46, + "learning_rate": 2.6929133549501128e-05, + "loss": 0.8528, + "step": 15415 + }, + { + "epoch": 2.46, + "learning_rate": 2.6926561334295363e-05, + "loss": 0.6912, + "step": 15416 + }, + { + "epoch": 2.46, + "learning_rate": 2.6923989098572895e-05, + "loss": 0.7703, + "step": 15417 + }, + { + "epoch": 2.46, + "learning_rate": 2.6921416842361113e-05, + "loss": 0.6839, + "step": 15418 + }, + { + "epoch": 2.46, + "learning_rate": 2.69188445656874e-05, + "loss": 0.7154, + "step": 15419 + }, + { + "epoch": 2.46, + "learning_rate": 2.6916272268579164e-05, + "loss": 0.9459, + "step": 15420 + }, + { + "epoch": 2.46, + "learning_rate": 2.6913699951063786e-05, + "loss": 0.7752, + "step": 15421 + }, + { + "epoch": 2.46, + "learning_rate": 2.6911127613168673e-05, + "loss": 0.6618, + "step": 15422 + }, + { + "epoch": 2.46, + "learning_rate": 2.690855525492121e-05, + "loss": 0.6978, + "step": 15423 + }, + { + "epoch": 2.46, + "learning_rate": 2.690598287634879e-05, + "loss": 0.6834, + "step": 15424 + }, + { + "epoch": 2.46, + "learning_rate": 2.6903410477478806e-05, + "loss": 0.7965, + "step": 15425 + }, + { + "epoch": 2.46, + "learning_rate": 2.690083805833866e-05, + "loss": 0.7285, + "step": 15426 + }, + { + "epoch": 2.46, + "learning_rate": 2.6898265618955744e-05, + "loss": 0.6802, + "step": 15427 + }, + { + "epoch": 2.46, + "learning_rate": 2.6895693159357443e-05, + "loss": 0.6936, + "step": 15428 + }, + { + "epoch": 2.46, + "learning_rate": 2.6893120679571172e-05, + "loss": 0.8217, + "step": 15429 + }, + { + "epoch": 2.46, + "learning_rate": 2.6890548179624308e-05, + "loss": 0.6842, + "step": 15430 + }, + { + "epoch": 2.46, + "learning_rate": 2.6887975659544257e-05, + "loss": 0.6745, + "step": 15431 + }, + { + "epoch": 2.46, + "learning_rate": 2.6885403119358414e-05, + "loss": 0.7392, + "step": 15432 + }, + { + "epoch": 2.46, + "learning_rate": 2.6882830559094165e-05, + "loss": 0.6672, + "step": 15433 + }, + { + "epoch": 2.46, + "learning_rate": 2.6880257978778924e-05, + "loss": 0.7678, + "step": 15434 + }, + { + "epoch": 2.46, + "learning_rate": 2.687768537844007e-05, + "loss": 0.8339, + "step": 15435 + }, + { + "epoch": 2.46, + "learning_rate": 2.6875112758105004e-05, + "loss": 0.732, + "step": 15436 + }, + { + "epoch": 2.46, + "learning_rate": 2.6872540117801128e-05, + "loss": 0.7241, + "step": 15437 + }, + { + "epoch": 2.46, + "learning_rate": 2.686996745755584e-05, + "loss": 0.7329, + "step": 15438 + }, + { + "epoch": 2.46, + "learning_rate": 2.6867394777396525e-05, + "loss": 0.7236, + "step": 15439 + }, + { + "epoch": 2.46, + "learning_rate": 2.68648220773506e-05, + "loss": 0.644, + "step": 15440 + }, + { + "epoch": 2.46, + "learning_rate": 2.6862249357445446e-05, + "loss": 0.6685, + "step": 15441 + }, + { + "epoch": 2.46, + "learning_rate": 2.6859676617708474e-05, + "loss": 0.6581, + "step": 15442 + }, + { + "epoch": 2.46, + "learning_rate": 2.6857103858167066e-05, + "loss": 0.655, + "step": 15443 + }, + { + "epoch": 2.46, + "learning_rate": 2.6854531078848637e-05, + "loss": 0.7448, + "step": 15444 + }, + { + "epoch": 2.46, + "learning_rate": 2.685195827978057e-05, + "loss": 0.63, + "step": 15445 + }, + { + "epoch": 2.46, + "learning_rate": 2.6849385460990278e-05, + "loss": 0.6997, + "step": 15446 + }, + { + "epoch": 2.46, + "learning_rate": 2.684681262250515e-05, + "loss": 0.6993, + "step": 15447 + }, + { + "epoch": 2.46, + "learning_rate": 2.6844239764352587e-05, + "loss": 0.7849, + "step": 15448 + }, + { + "epoch": 2.46, + "learning_rate": 2.6841666886559995e-05, + "loss": 0.7748, + "step": 15449 + }, + { + "epoch": 2.46, + "learning_rate": 2.683909398915477e-05, + "loss": 0.7645, + "step": 15450 + }, + { + "epoch": 2.46, + "learning_rate": 2.68365210721643e-05, + "loss": 0.776, + "step": 15451 + }, + { + "epoch": 2.46, + "learning_rate": 2.683394813561601e-05, + "loss": 0.7285, + "step": 15452 + }, + { + "epoch": 2.46, + "learning_rate": 2.6831375179537277e-05, + "loss": 0.7054, + "step": 15453 + }, + { + "epoch": 2.46, + "learning_rate": 2.682880220395551e-05, + "loss": 0.6744, + "step": 15454 + }, + { + "epoch": 2.46, + "learning_rate": 2.682622920889812e-05, + "loss": 0.7373, + "step": 15455 + }, + { + "epoch": 2.46, + "learning_rate": 2.682365619439248e-05, + "loss": 0.6563, + "step": 15456 + }, + { + "epoch": 2.46, + "learning_rate": 2.682108316046602e-05, + "loss": 0.7498, + "step": 15457 + }, + { + "epoch": 2.46, + "learning_rate": 2.6818510107146132e-05, + "loss": 0.7383, + "step": 15458 + }, + { + "epoch": 2.46, + "learning_rate": 2.681593703446021e-05, + "loss": 0.8, + "step": 15459 + }, + { + "epoch": 2.46, + "learning_rate": 2.681336394243566e-05, + "loss": 0.7553, + "step": 15460 + }, + { + "epoch": 2.46, + "learning_rate": 2.6810790831099892e-05, + "loss": 0.6893, + "step": 15461 + }, + { + "epoch": 2.46, + "learning_rate": 2.6808217700480298e-05, + "loss": 0.7731, + "step": 15462 + }, + { + "epoch": 2.46, + "learning_rate": 2.680564455060428e-05, + "loss": 0.7756, + "step": 15463 + }, + { + "epoch": 2.46, + "learning_rate": 2.680307138149925e-05, + "loss": 0.7181, + "step": 15464 + }, + { + "epoch": 2.46, + "learning_rate": 2.6800498193192592e-05, + "loss": 0.7153, + "step": 15465 + }, + { + "epoch": 2.46, + "learning_rate": 2.679792498571173e-05, + "loss": 0.7463, + "step": 15466 + }, + { + "epoch": 2.46, + "learning_rate": 2.6795351759084063e-05, + "loss": 0.7327, + "step": 15467 + }, + { + "epoch": 2.46, + "learning_rate": 2.679277851333698e-05, + "loss": 0.7032, + "step": 15468 + }, + { + "epoch": 2.46, + "learning_rate": 2.67902052484979e-05, + "loss": 0.783, + "step": 15469 + }, + { + "epoch": 2.46, + "learning_rate": 2.6787631964594224e-05, + "loss": 0.7458, + "step": 15470 + }, + { + "epoch": 2.46, + "learning_rate": 2.678505866165334e-05, + "loss": 0.6913, + "step": 15471 + }, + { + "epoch": 2.46, + "learning_rate": 2.6782485339702677e-05, + "loss": 0.6576, + "step": 15472 + }, + { + "epoch": 2.47, + "learning_rate": 2.6779911998769624e-05, + "loss": 0.7301, + "step": 15473 + }, + { + "epoch": 2.47, + "learning_rate": 2.6777338638881593e-05, + "loss": 0.6906, + "step": 15474 + }, + { + "epoch": 2.47, + "learning_rate": 2.677476526006598e-05, + "loss": 0.7366, + "step": 15475 + }, + { + "epoch": 2.47, + "learning_rate": 2.6772191862350193e-05, + "loss": 0.7185, + "step": 15476 + }, + { + "epoch": 2.47, + "learning_rate": 2.676961844576164e-05, + "loss": 0.7383, + "step": 15477 + }, + { + "epoch": 2.47, + "learning_rate": 2.676704501032773e-05, + "loss": 0.7361, + "step": 15478 + }, + { + "epoch": 2.47, + "learning_rate": 2.676447155607586e-05, + "loss": 0.745, + "step": 15479 + }, + { + "epoch": 2.47, + "learning_rate": 2.6761898083033434e-05, + "loss": 0.8695, + "step": 15480 + }, + { + "epoch": 2.47, + "learning_rate": 2.6759324591227868e-05, + "loss": 0.7156, + "step": 15481 + }, + { + "epoch": 2.47, + "learning_rate": 2.675675108068656e-05, + "loss": 0.7392, + "step": 15482 + }, + { + "epoch": 2.47, + "learning_rate": 2.6754177551436927e-05, + "loss": 0.688, + "step": 15483 + }, + { + "epoch": 2.47, + "learning_rate": 2.6751604003506365e-05, + "loss": 0.682, + "step": 15484 + }, + { + "epoch": 2.47, + "learning_rate": 2.6749030436922274e-05, + "loss": 0.6831, + "step": 15485 + }, + { + "epoch": 2.47, + "learning_rate": 2.674645685171208e-05, + "loss": 0.7046, + "step": 15486 + }, + { + "epoch": 2.47, + "learning_rate": 2.6743883247903186e-05, + "loss": 0.7218, + "step": 15487 + }, + { + "epoch": 2.47, + "learning_rate": 2.674130962552298e-05, + "loss": 0.7422, + "step": 15488 + }, + { + "epoch": 2.47, + "learning_rate": 2.6738735984598888e-05, + "loss": 0.7524, + "step": 15489 + }, + { + "epoch": 2.47, + "learning_rate": 2.673616232515832e-05, + "loss": 0.6587, + "step": 15490 + }, + { + "epoch": 2.47, + "learning_rate": 2.6733588647228668e-05, + "loss": 0.7205, + "step": 15491 + }, + { + "epoch": 2.47, + "learning_rate": 2.6731014950837357e-05, + "loss": 0.7658, + "step": 15492 + }, + { + "epoch": 2.47, + "learning_rate": 2.6728441236011797e-05, + "loss": 0.6847, + "step": 15493 + }, + { + "epoch": 2.47, + "learning_rate": 2.672586750277937e-05, + "loss": 0.6721, + "step": 15494 + }, + { + "epoch": 2.47, + "learning_rate": 2.672329375116751e-05, + "loss": 0.7761, + "step": 15495 + }, + { + "epoch": 2.47, + "learning_rate": 2.672071998120362e-05, + "loss": 0.6672, + "step": 15496 + }, + { + "epoch": 2.47, + "learning_rate": 2.6718146192915094e-05, + "loss": 0.6994, + "step": 15497 + }, + { + "epoch": 2.47, + "learning_rate": 2.6715572386329368e-05, + "loss": 0.7215, + "step": 15498 + }, + { + "epoch": 2.47, + "learning_rate": 2.6712998561473835e-05, + "loss": 0.7082, + "step": 15499 + }, + { + "epoch": 2.47, + "learning_rate": 2.67104247183759e-05, + "loss": 0.7404, + "step": 15500 + }, + { + "epoch": 2.47, + "learning_rate": 2.670785085706298e-05, + "loss": 0.7428, + "step": 15501 + }, + { + "epoch": 2.47, + "learning_rate": 2.6705276977562494e-05, + "loss": 0.7666, + "step": 15502 + }, + { + "epoch": 2.47, + "learning_rate": 2.670270307990184e-05, + "loss": 0.7678, + "step": 15503 + }, + { + "epoch": 2.47, + "learning_rate": 2.6700129164108434e-05, + "loss": 0.7008, + "step": 15504 + }, + { + "epoch": 2.47, + "learning_rate": 2.669755523020968e-05, + "loss": 0.7111, + "step": 15505 + }, + { + "epoch": 2.47, + "learning_rate": 2.6694981278232995e-05, + "loss": 0.6871, + "step": 15506 + }, + { + "epoch": 2.47, + "learning_rate": 2.669240730820579e-05, + "loss": 0.766, + "step": 15507 + }, + { + "epoch": 2.47, + "learning_rate": 2.6689833320155472e-05, + "loss": 0.7157, + "step": 15508 + }, + { + "epoch": 2.47, + "learning_rate": 2.668725931410946e-05, + "loss": 0.6834, + "step": 15509 + }, + { + "epoch": 2.47, + "learning_rate": 2.668468529009516e-05, + "loss": 0.7179, + "step": 15510 + }, + { + "epoch": 2.47, + "learning_rate": 2.6682111248139972e-05, + "loss": 0.716, + "step": 15511 + }, + { + "epoch": 2.47, + "learning_rate": 2.667953718827133e-05, + "loss": 0.7247, + "step": 15512 + }, + { + "epoch": 2.47, + "learning_rate": 2.6676963110516645e-05, + "loss": 0.7734, + "step": 15513 + }, + { + "epoch": 2.47, + "learning_rate": 2.667438901490331e-05, + "loss": 0.6431, + "step": 15514 + }, + { + "epoch": 2.47, + "learning_rate": 2.6671814901458754e-05, + "loss": 0.7392, + "step": 15515 + }, + { + "epoch": 2.47, + "learning_rate": 2.666924077021038e-05, + "loss": 0.6695, + "step": 15516 + }, + { + "epoch": 2.47, + "learning_rate": 2.6666666621185605e-05, + "loss": 0.6929, + "step": 15517 + }, + { + "epoch": 2.47, + "learning_rate": 2.6664092454411844e-05, + "loss": 0.712, + "step": 15518 + }, + { + "epoch": 2.47, + "learning_rate": 2.6661518269916513e-05, + "loss": 0.7903, + "step": 15519 + }, + { + "epoch": 2.47, + "learning_rate": 2.665894406772701e-05, + "loss": 0.6426, + "step": 15520 + }, + { + "epoch": 2.47, + "learning_rate": 2.665636984787077e-05, + "loss": 0.7651, + "step": 15521 + }, + { + "epoch": 2.47, + "learning_rate": 2.6653795610375193e-05, + "loss": 0.7421, + "step": 15522 + }, + { + "epoch": 2.47, + "learning_rate": 2.6651221355267703e-05, + "loss": 0.815, + "step": 15523 + }, + { + "epoch": 2.47, + "learning_rate": 2.66486470825757e-05, + "loss": 0.7629, + "step": 15524 + }, + { + "epoch": 2.47, + "learning_rate": 2.6646072792326605e-05, + "loss": 0.6715, + "step": 15525 + }, + { + "epoch": 2.47, + "learning_rate": 2.664349848454784e-05, + "loss": 0.7501, + "step": 15526 + }, + { + "epoch": 2.47, + "learning_rate": 2.6640924159266812e-05, + "loss": 0.7383, + "step": 15527 + }, + { + "epoch": 2.47, + "learning_rate": 2.6638349816510938e-05, + "loss": 0.6977, + "step": 15528 + }, + { + "epoch": 2.47, + "learning_rate": 2.663577545630763e-05, + "loss": 0.7336, + "step": 15529 + }, + { + "epoch": 2.47, + "learning_rate": 2.663320107868431e-05, + "loss": 0.6444, + "step": 15530 + }, + { + "epoch": 2.47, + "learning_rate": 2.6630626683668393e-05, + "loss": 0.7167, + "step": 15531 + }, + { + "epoch": 2.47, + "learning_rate": 2.6628052271287284e-05, + "loss": 0.811, + "step": 15532 + }, + { + "epoch": 2.47, + "learning_rate": 2.662547784156842e-05, + "loss": 0.6764, + "step": 15533 + }, + { + "epoch": 2.47, + "learning_rate": 2.662290339453919e-05, + "loss": 0.757, + "step": 15534 + }, + { + "epoch": 2.47, + "learning_rate": 2.662032893022703e-05, + "loss": 0.7404, + "step": 15535 + }, + { + "epoch": 2.48, + "learning_rate": 2.6617754448659348e-05, + "loss": 0.7314, + "step": 15536 + }, + { + "epoch": 2.48, + "learning_rate": 2.661517994986357e-05, + "loss": 0.6852, + "step": 15537 + }, + { + "epoch": 2.48, + "learning_rate": 2.6612605433867098e-05, + "loss": 0.6534, + "step": 15538 + }, + { + "epoch": 2.48, + "learning_rate": 2.661003090069736e-05, + "loss": 0.846, + "step": 15539 + }, + { + "epoch": 2.48, + "learning_rate": 2.6607456350381764e-05, + "loss": 0.6901, + "step": 15540 + }, + { + "epoch": 2.48, + "learning_rate": 2.6604881782947744e-05, + "loss": 0.6738, + "step": 15541 + }, + { + "epoch": 2.48, + "learning_rate": 2.6602307198422706e-05, + "loss": 0.6645, + "step": 15542 + }, + { + "epoch": 2.48, + "learning_rate": 2.659973259683407e-05, + "loss": 0.7629, + "step": 15543 + }, + { + "epoch": 2.48, + "learning_rate": 2.6597157978209246e-05, + "loss": 0.8035, + "step": 15544 + }, + { + "epoch": 2.48, + "learning_rate": 2.659458334257567e-05, + "loss": 0.7517, + "step": 15545 + }, + { + "epoch": 2.48, + "learning_rate": 2.6592008689960734e-05, + "loss": 0.7144, + "step": 15546 + }, + { + "epoch": 2.48, + "learning_rate": 2.6589434020391884e-05, + "loss": 0.663, + "step": 15547 + }, + { + "epoch": 2.48, + "learning_rate": 2.6586859333896524e-05, + "loss": 0.7156, + "step": 15548 + }, + { + "epoch": 2.48, + "learning_rate": 2.658428463050207e-05, + "loss": 0.7022, + "step": 15549 + }, + { + "epoch": 2.48, + "learning_rate": 2.6581709910235952e-05, + "loss": 0.7205, + "step": 15550 + }, + { + "epoch": 2.48, + "learning_rate": 2.6579135173125586e-05, + "loss": 0.7064, + "step": 15551 + }, + { + "epoch": 2.48, + "learning_rate": 2.657656041919838e-05, + "loss": 0.7168, + "step": 15552 + }, + { + "epoch": 2.48, + "learning_rate": 2.657398564848178e-05, + "loss": 0.7125, + "step": 15553 + }, + { + "epoch": 2.48, + "learning_rate": 2.6571410861003172e-05, + "loss": 0.727, + "step": 15554 + }, + { + "epoch": 2.48, + "learning_rate": 2.656883605679e-05, + "loss": 0.744, + "step": 15555 + }, + { + "epoch": 2.48, + "learning_rate": 2.6566261235869677e-05, + "loss": 0.7102, + "step": 15556 + }, + { + "epoch": 2.48, + "learning_rate": 2.6563686398269616e-05, + "loss": 0.6588, + "step": 15557 + }, + { + "epoch": 2.48, + "learning_rate": 2.6561111544017246e-05, + "loss": 0.7327, + "step": 15558 + }, + { + "epoch": 2.48, + "learning_rate": 2.655853667313999e-05, + "loss": 0.7044, + "step": 15559 + }, + { + "epoch": 2.48, + "learning_rate": 2.6555961785665256e-05, + "loss": 0.7207, + "step": 15560 + }, + { + "epoch": 2.48, + "learning_rate": 2.6553386881620485e-05, + "loss": 0.6891, + "step": 15561 + }, + { + "epoch": 2.48, + "learning_rate": 2.6550811961033086e-05, + "loss": 0.6438, + "step": 15562 + }, + { + "epoch": 2.48, + "learning_rate": 2.654823702393048e-05, + "loss": 0.8145, + "step": 15563 + }, + { + "epoch": 2.48, + "learning_rate": 2.6545662070340084e-05, + "loss": 0.6921, + "step": 15564 + }, + { + "epoch": 2.48, + "learning_rate": 2.6543087100289326e-05, + "loss": 0.8082, + "step": 15565 + }, + { + "epoch": 2.48, + "learning_rate": 2.6540512113805626e-05, + "loss": 0.6943, + "step": 15566 + }, + { + "epoch": 2.48, + "learning_rate": 2.6537937110916417e-05, + "loss": 0.6742, + "step": 15567 + }, + { + "epoch": 2.48, + "learning_rate": 2.6535362091649103e-05, + "loss": 0.7206, + "step": 15568 + }, + { + "epoch": 2.48, + "learning_rate": 2.653278705603111e-05, + "loss": 0.6892, + "step": 15569 + }, + { + "epoch": 2.48, + "learning_rate": 2.6530212004089877e-05, + "loss": 0.6978, + "step": 15570 + }, + { + "epoch": 2.48, + "learning_rate": 2.6527636935852814e-05, + "loss": 0.7736, + "step": 15571 + }, + { + "epoch": 2.48, + "learning_rate": 2.652506185134733e-05, + "loss": 0.7019, + "step": 15572 + }, + { + "epoch": 2.48, + "learning_rate": 2.6522486750600883e-05, + "loss": 0.6886, + "step": 15573 + }, + { + "epoch": 2.48, + "learning_rate": 2.6519911633640872e-05, + "loss": 0.7017, + "step": 15574 + }, + { + "epoch": 2.48, + "learning_rate": 2.6517336500494716e-05, + "loss": 0.7521, + "step": 15575 + }, + { + "epoch": 2.48, + "learning_rate": 2.6514761351189854e-05, + "loss": 0.6912, + "step": 15576 + }, + { + "epoch": 2.48, + "learning_rate": 2.65121861857537e-05, + "loss": 0.6719, + "step": 15577 + }, + { + "epoch": 2.48, + "learning_rate": 2.6509611004213685e-05, + "loss": 0.634, + "step": 15578 + }, + { + "epoch": 2.48, + "learning_rate": 2.650703580659723e-05, + "loss": 0.7908, + "step": 15579 + }, + { + "epoch": 2.48, + "learning_rate": 2.650446059293175e-05, + "loss": 0.7266, + "step": 15580 + }, + { + "epoch": 2.48, + "learning_rate": 2.6501885363244682e-05, + "loss": 0.7074, + "step": 15581 + }, + { + "epoch": 2.48, + "learning_rate": 2.6499310117563453e-05, + "loss": 0.8115, + "step": 15582 + }, + { + "epoch": 2.48, + "learning_rate": 2.649673485591548e-05, + "loss": 0.7009, + "step": 15583 + }, + { + "epoch": 2.48, + "learning_rate": 2.6494159578328186e-05, + "loss": 0.6831, + "step": 15584 + }, + { + "epoch": 2.48, + "learning_rate": 2.6491584284829003e-05, + "loss": 0.6672, + "step": 15585 + }, + { + "epoch": 2.48, + "learning_rate": 2.6489008975445346e-05, + "loss": 0.6718, + "step": 15586 + }, + { + "epoch": 2.48, + "learning_rate": 2.648643365020465e-05, + "loss": 0.6108, + "step": 15587 + }, + { + "epoch": 2.48, + "learning_rate": 2.648385830913434e-05, + "loss": 0.7303, + "step": 15588 + }, + { + "epoch": 2.48, + "learning_rate": 2.6481282952261837e-05, + "loss": 0.7346, + "step": 15589 + }, + { + "epoch": 2.48, + "learning_rate": 2.6478707579614577e-05, + "loss": 0.7779, + "step": 15590 + }, + { + "epoch": 2.48, + "learning_rate": 2.6476132191219978e-05, + "loss": 0.7352, + "step": 15591 + }, + { + "epoch": 2.48, + "learning_rate": 2.6473556787105457e-05, + "loss": 0.7665, + "step": 15592 + }, + { + "epoch": 2.48, + "learning_rate": 2.647098136729846e-05, + "loss": 0.6759, + "step": 15593 + }, + { + "epoch": 2.48, + "learning_rate": 2.64684059318264e-05, + "loss": 0.6733, + "step": 15594 + }, + { + "epoch": 2.48, + "learning_rate": 2.6465830480716712e-05, + "loss": 0.7595, + "step": 15595 + }, + { + "epoch": 2.48, + "learning_rate": 2.6463255013996818e-05, + "loss": 0.7518, + "step": 15596 + }, + { + "epoch": 2.48, + "learning_rate": 2.646067953169415e-05, + "loss": 0.804, + "step": 15597 + }, + { + "epoch": 2.48, + "learning_rate": 2.6458104033836125e-05, + "loss": 0.7428, + "step": 15598 + }, + { + "epoch": 2.49, + "learning_rate": 2.645552852045018e-05, + "loss": 0.7056, + "step": 15599 + }, + { + "epoch": 2.49, + "learning_rate": 2.645295299156374e-05, + "loss": 0.7434, + "step": 15600 + }, + { + "epoch": 2.49, + "learning_rate": 2.6450377447204228e-05, + "loss": 0.7971, + "step": 15601 + }, + { + "epoch": 2.49, + "learning_rate": 2.644780188739909e-05, + "loss": 0.6997, + "step": 15602 + }, + { + "epoch": 2.49, + "learning_rate": 2.644522631217573e-05, + "loss": 0.6905, + "step": 15603 + }, + { + "epoch": 2.49, + "learning_rate": 2.644265072156159e-05, + "loss": 0.6746, + "step": 15604 + }, + { + "epoch": 2.49, + "learning_rate": 2.6440075115584096e-05, + "loss": 0.6945, + "step": 15605 + }, + { + "epoch": 2.49, + "learning_rate": 2.6437499494270673e-05, + "loss": 0.6708, + "step": 15606 + }, + { + "epoch": 2.49, + "learning_rate": 2.6434923857648756e-05, + "loss": 0.7396, + "step": 15607 + }, + { + "epoch": 2.49, + "learning_rate": 2.6432348205745772e-05, + "loss": 0.7513, + "step": 15608 + }, + { + "epoch": 2.49, + "learning_rate": 2.642977253858914e-05, + "loss": 0.7244, + "step": 15609 + }, + { + "epoch": 2.49, + "learning_rate": 2.6427196856206317e-05, + "loss": 0.6782, + "step": 15610 + }, + { + "epoch": 2.49, + "learning_rate": 2.6424621158624702e-05, + "loss": 0.6825, + "step": 15611 + }, + { + "epoch": 2.49, + "learning_rate": 2.6422045445871735e-05, + "loss": 0.714, + "step": 15612 + }, + { + "epoch": 2.49, + "learning_rate": 2.641946971797485e-05, + "loss": 0.706, + "step": 15613 + }, + { + "epoch": 2.49, + "learning_rate": 2.641689397496148e-05, + "loss": 0.7108, + "step": 15614 + }, + { + "epoch": 2.49, + "learning_rate": 2.641431821685904e-05, + "loss": 0.6837, + "step": 15615 + }, + { + "epoch": 2.49, + "learning_rate": 2.6411742443694975e-05, + "loss": 0.6608, + "step": 15616 + }, + { + "epoch": 2.49, + "learning_rate": 2.6409166655496716e-05, + "loss": 0.7559, + "step": 15617 + }, + { + "epoch": 2.49, + "learning_rate": 2.6406590852291675e-05, + "loss": 0.6671, + "step": 15618 + }, + { + "epoch": 2.49, + "learning_rate": 2.64040150341073e-05, + "loss": 0.6554, + "step": 15619 + }, + { + "epoch": 2.49, + "learning_rate": 2.640143920097102e-05, + "loss": 0.6891, + "step": 15620 + }, + { + "epoch": 2.49, + "learning_rate": 2.6398863352910258e-05, + "loss": 0.762, + "step": 15621 + }, + { + "epoch": 2.49, + "learning_rate": 2.6396287489952454e-05, + "loss": 0.7232, + "step": 15622 + }, + { + "epoch": 2.49, + "learning_rate": 2.6393711612125037e-05, + "loss": 0.6969, + "step": 15623 + }, + { + "epoch": 2.49, + "learning_rate": 2.639113571945544e-05, + "loss": 0.7915, + "step": 15624 + }, + { + "epoch": 2.49, + "learning_rate": 2.6388559811971087e-05, + "loss": 0.7178, + "step": 15625 + }, + { + "epoch": 2.49, + "learning_rate": 2.638598388969941e-05, + "loss": 0.7721, + "step": 15626 + }, + { + "epoch": 2.49, + "learning_rate": 2.638340795266786e-05, + "loss": 0.7191, + "step": 15627 + }, + { + "epoch": 2.49, + "learning_rate": 2.638083200090385e-05, + "loss": 0.6918, + "step": 15628 + }, + { + "epoch": 2.49, + "learning_rate": 2.6378256034434807e-05, + "loss": 0.7186, + "step": 15629 + }, + { + "epoch": 2.49, + "learning_rate": 2.637568005328819e-05, + "loss": 0.7215, + "step": 15630 + }, + { + "epoch": 2.49, + "learning_rate": 2.6373104057491403e-05, + "loss": 0.7028, + "step": 15631 + }, + { + "epoch": 2.49, + "learning_rate": 2.6370528047071895e-05, + "loss": 0.7297, + "step": 15632 + }, + { + "epoch": 2.49, + "learning_rate": 2.6367952022057095e-05, + "loss": 0.7255, + "step": 15633 + }, + { + "epoch": 2.49, + "learning_rate": 2.636537598247445e-05, + "loss": 0.7181, + "step": 15634 + }, + { + "epoch": 2.49, + "learning_rate": 2.636279992835136e-05, + "loss": 0.6653, + "step": 15635 + }, + { + "epoch": 2.49, + "learning_rate": 2.636022385971529e-05, + "loss": 0.7303, + "step": 15636 + }, + { + "epoch": 2.49, + "learning_rate": 2.635764777659366e-05, + "loss": 0.7176, + "step": 15637 + }, + { + "epoch": 2.49, + "learning_rate": 2.6355071679013898e-05, + "loss": 0.6886, + "step": 15638 + }, + { + "epoch": 2.49, + "learning_rate": 2.635249556700345e-05, + "loss": 0.7202, + "step": 15639 + }, + { + "epoch": 2.49, + "learning_rate": 2.634991944058975e-05, + "loss": 0.7561, + "step": 15640 + }, + { + "epoch": 2.49, + "learning_rate": 2.6347343299800216e-05, + "loss": 0.7108, + "step": 15641 + }, + { + "epoch": 2.49, + "learning_rate": 2.6344767144662303e-05, + "loss": 0.6885, + "step": 15642 + }, + { + "epoch": 2.49, + "learning_rate": 2.6342190975203434e-05, + "loss": 0.7847, + "step": 15643 + }, + { + "epoch": 2.49, + "learning_rate": 2.633961479145105e-05, + "loss": 0.6778, + "step": 15644 + }, + { + "epoch": 2.49, + "learning_rate": 2.6337038593432576e-05, + "loss": 0.7259, + "step": 15645 + }, + { + "epoch": 2.49, + "learning_rate": 2.6334462381175458e-05, + "loss": 0.748, + "step": 15646 + }, + { + "epoch": 2.49, + "learning_rate": 2.6331886154707118e-05, + "loss": 0.7427, + "step": 15647 + }, + { + "epoch": 2.49, + "learning_rate": 2.6329309914055012e-05, + "loss": 0.7573, + "step": 15648 + }, + { + "epoch": 2.49, + "learning_rate": 2.6326733659246556e-05, + "loss": 0.7201, + "step": 15649 + }, + { + "epoch": 2.49, + "learning_rate": 2.6324157390309184e-05, + "loss": 0.6771, + "step": 15650 + }, + { + "epoch": 2.49, + "learning_rate": 2.632158110727035e-05, + "loss": 0.699, + "step": 15651 + }, + { + "epoch": 2.49, + "learning_rate": 2.631900481015747e-05, + "loss": 0.719, + "step": 15652 + }, + { + "epoch": 2.49, + "learning_rate": 2.6316428498997996e-05, + "loss": 0.6528, + "step": 15653 + }, + { + "epoch": 2.49, + "learning_rate": 2.6313852173819364e-05, + "loss": 0.7537, + "step": 15654 + }, + { + "epoch": 2.49, + "learning_rate": 2.6311275834648992e-05, + "loss": 0.7199, + "step": 15655 + }, + { + "epoch": 2.49, + "learning_rate": 2.6308699481514332e-05, + "loss": 0.7856, + "step": 15656 + }, + { + "epoch": 2.49, + "learning_rate": 2.630612311444282e-05, + "loss": 0.6931, + "step": 15657 + }, + { + "epoch": 2.49, + "learning_rate": 2.6303546733461888e-05, + "loss": 0.757, + "step": 15658 + }, + { + "epoch": 2.49, + "learning_rate": 2.6300970338598973e-05, + "loss": 0.6745, + "step": 15659 + }, + { + "epoch": 2.49, + "learning_rate": 2.629839392988152e-05, + "loss": 0.6676, + "step": 15660 + }, + { + "epoch": 2.49, + "learning_rate": 2.629581750733695e-05, + "loss": 0.7212, + "step": 15661 + }, + { + "epoch": 2.5, + "learning_rate": 2.6293241070992718e-05, + "loss": 0.6832, + "step": 15662 + }, + { + "epoch": 2.5, + "learning_rate": 2.6290664620876255e-05, + "loss": 0.6569, + "step": 15663 + }, + { + "epoch": 2.5, + "learning_rate": 2.628808815701499e-05, + "loss": 0.7838, + "step": 15664 + }, + { + "epoch": 2.5, + "learning_rate": 2.6285511679436377e-05, + "loss": 0.6835, + "step": 15665 + }, + { + "epoch": 2.5, + "learning_rate": 2.628293518816784e-05, + "loss": 0.7346, + "step": 15666 + }, + { + "epoch": 2.5, + "learning_rate": 2.6280358683236818e-05, + "loss": 0.6733, + "step": 15667 + }, + { + "epoch": 2.5, + "learning_rate": 2.6277782164670762e-05, + "loss": 0.7598, + "step": 15668 + }, + { + "epoch": 2.5, + "learning_rate": 2.6275205632497095e-05, + "loss": 0.7214, + "step": 15669 + }, + { + "epoch": 2.5, + "learning_rate": 2.627262908674326e-05, + "loss": 0.742, + "step": 15670 + }, + { + "epoch": 2.5, + "learning_rate": 2.6270052527436706e-05, + "loss": 0.6844, + "step": 15671 + }, + { + "epoch": 2.5, + "learning_rate": 2.6267475954604858e-05, + "loss": 0.7293, + "step": 15672 + }, + { + "epoch": 2.5, + "learning_rate": 2.6264899368275164e-05, + "loss": 0.7437, + "step": 15673 + }, + { + "epoch": 2.5, + "learning_rate": 2.626232276847506e-05, + "loss": 0.708, + "step": 15674 + }, + { + "epoch": 2.5, + "learning_rate": 2.6259746155231986e-05, + "loss": 0.8143, + "step": 15675 + }, + { + "epoch": 2.5, + "learning_rate": 2.625716952857338e-05, + "loss": 0.78, + "step": 15676 + }, + { + "epoch": 2.5, + "learning_rate": 2.6254592888526675e-05, + "loss": 0.6893, + "step": 15677 + }, + { + "epoch": 2.5, + "learning_rate": 2.625201623511932e-05, + "loss": 0.6983, + "step": 15678 + }, + { + "epoch": 2.5, + "learning_rate": 2.6249439568378752e-05, + "loss": 0.7469, + "step": 15679 + }, + { + "epoch": 2.5, + "learning_rate": 2.6246862888332414e-05, + "loss": 0.6614, + "step": 15680 + }, + { + "epoch": 2.5, + "learning_rate": 2.624428619500774e-05, + "loss": 0.6857, + "step": 15681 + }, + { + "epoch": 2.5, + "learning_rate": 2.6241709488432175e-05, + "loss": 0.7083, + "step": 15682 + }, + { + "epoch": 2.5, + "learning_rate": 2.6239132768633162e-05, + "loss": 0.6954, + "step": 15683 + }, + { + "epoch": 2.5, + "learning_rate": 2.6236556035638127e-05, + "loss": 0.7488, + "step": 15684 + }, + { + "epoch": 2.5, + "learning_rate": 2.6233979289474535e-05, + "loss": 0.6927, + "step": 15685 + }, + { + "epoch": 2.5, + "learning_rate": 2.6231402530169807e-05, + "loss": 0.7378, + "step": 15686 + }, + { + "epoch": 2.5, + "learning_rate": 2.6228825757751386e-05, + "loss": 0.6678, + "step": 15687 + }, + { + "epoch": 2.5, + "learning_rate": 2.622624897224672e-05, + "loss": 0.7945, + "step": 15688 + }, + { + "epoch": 2.5, + "learning_rate": 2.6223672173683246e-05, + "loss": 0.644, + "step": 15689 + }, + { + "epoch": 2.5, + "learning_rate": 2.6221095362088406e-05, + "loss": 0.71, + "step": 15690 + }, + { + "epoch": 2.5, + "learning_rate": 2.6218518537489638e-05, + "loss": 0.6658, + "step": 15691 + }, + { + "epoch": 2.5, + "learning_rate": 2.6215941699914393e-05, + "loss": 0.7109, + "step": 15692 + }, + { + "epoch": 2.5, + "learning_rate": 2.6213364849390105e-05, + "loss": 0.7211, + "step": 15693 + }, + { + "epoch": 2.5, + "learning_rate": 2.6210787985944217e-05, + "loss": 0.6624, + "step": 15694 + }, + { + "epoch": 2.5, + "learning_rate": 2.6208211109604174e-05, + "loss": 0.7858, + "step": 15695 + }, + { + "epoch": 2.5, + "learning_rate": 2.620563422039742e-05, + "loss": 0.7287, + "step": 15696 + }, + { + "epoch": 2.5, + "learning_rate": 2.6203057318351386e-05, + "loss": 0.6536, + "step": 15697 + }, + { + "epoch": 2.5, + "learning_rate": 2.6200480403493522e-05, + "loss": 0.7043, + "step": 15698 + }, + { + "epoch": 2.5, + "learning_rate": 2.6197903475851275e-05, + "loss": 0.7274, + "step": 15699 + }, + { + "epoch": 2.5, + "learning_rate": 2.6195326535452085e-05, + "loss": 0.6834, + "step": 15700 + }, + { + "epoch": 2.5, + "learning_rate": 2.6192749582323382e-05, + "loss": 0.7502, + "step": 15701 + }, + { + "epoch": 2.5, + "learning_rate": 2.619017261649263e-05, + "loss": 0.7285, + "step": 15702 + }, + { + "epoch": 2.5, + "learning_rate": 2.6187595637987265e-05, + "loss": 0.7676, + "step": 15703 + }, + { + "epoch": 2.5, + "learning_rate": 2.6185018646834725e-05, + "loss": 0.7479, + "step": 15704 + }, + { + "epoch": 2.5, + "learning_rate": 2.6182441643062454e-05, + "loss": 0.6626, + "step": 15705 + }, + { + "epoch": 2.5, + "learning_rate": 2.6179864626697897e-05, + "loss": 0.6919, + "step": 15706 + }, + { + "epoch": 2.5, + "learning_rate": 2.6177287597768497e-05, + "loss": 0.731, + "step": 15707 + }, + { + "epoch": 2.5, + "learning_rate": 2.6174710556301695e-05, + "loss": 0.6762, + "step": 15708 + }, + { + "epoch": 2.5, + "learning_rate": 2.6172133502324948e-05, + "loss": 0.7452, + "step": 15709 + }, + { + "epoch": 2.5, + "learning_rate": 2.6169556435865683e-05, + "loss": 0.6755, + "step": 15710 + }, + { + "epoch": 2.5, + "learning_rate": 2.6166979356951354e-05, + "loss": 0.6327, + "step": 15711 + }, + { + "epoch": 2.5, + "learning_rate": 2.6164402265609405e-05, + "loss": 0.6907, + "step": 15712 + }, + { + "epoch": 2.5, + "learning_rate": 2.6161825161867277e-05, + "loss": 0.6804, + "step": 15713 + }, + { + "epoch": 2.5, + "learning_rate": 2.615924804575242e-05, + "loss": 0.6895, + "step": 15714 + }, + { + "epoch": 2.5, + "learning_rate": 2.615667091729228e-05, + "loss": 0.7271, + "step": 15715 + }, + { + "epoch": 2.5, + "learning_rate": 2.6154093776514288e-05, + "loss": 0.7355, + "step": 15716 + }, + { + "epoch": 2.5, + "learning_rate": 2.6151516623445898e-05, + "loss": 0.7054, + "step": 15717 + }, + { + "epoch": 2.5, + "learning_rate": 2.6148939458114557e-05, + "loss": 0.7372, + "step": 15718 + }, + { + "epoch": 2.5, + "learning_rate": 2.6146362280547704e-05, + "loss": 0.7611, + "step": 15719 + }, + { + "epoch": 2.5, + "learning_rate": 2.61437850907728e-05, + "loss": 0.7852, + "step": 15720 + }, + { + "epoch": 2.5, + "learning_rate": 2.614120788881727e-05, + "loss": 0.7394, + "step": 15721 + }, + { + "epoch": 2.5, + "learning_rate": 2.6138630674708563e-05, + "loss": 0.7778, + "step": 15722 + }, + { + "epoch": 2.5, + "learning_rate": 2.6136053448474147e-05, + "loss": 0.8023, + "step": 15723 + }, + { + "epoch": 2.51, + "learning_rate": 2.6133476210141438e-05, + "loss": 0.7306, + "step": 15724 + }, + { + "epoch": 2.51, + "learning_rate": 2.6130898959737903e-05, + "loss": 0.6784, + "step": 15725 + }, + { + "epoch": 2.51, + "learning_rate": 2.612832169729098e-05, + "loss": 0.7773, + "step": 15726 + }, + { + "epoch": 2.51, + "learning_rate": 2.6125744422828114e-05, + "loss": 0.6958, + "step": 15727 + }, + { + "epoch": 2.51, + "learning_rate": 2.6123167136376754e-05, + "loss": 0.7308, + "step": 15728 + }, + { + "epoch": 2.51, + "learning_rate": 2.6120589837964343e-05, + "loss": 0.6717, + "step": 15729 + }, + { + "epoch": 2.51, + "learning_rate": 2.611801252761833e-05, + "loss": 0.7005, + "step": 15730 + }, + { + "epoch": 2.51, + "learning_rate": 2.6115435205366167e-05, + "loss": 0.6603, + "step": 15731 + }, + { + "epoch": 2.51, + "learning_rate": 2.6112857871235295e-05, + "loss": 0.6922, + "step": 15732 + }, + { + "epoch": 2.51, + "learning_rate": 2.6110280525253162e-05, + "loss": 0.7833, + "step": 15733 + }, + { + "epoch": 2.51, + "learning_rate": 2.610770316744721e-05, + "loss": 0.705, + "step": 15734 + }, + { + "epoch": 2.51, + "learning_rate": 2.610512579784491e-05, + "loss": 0.6921, + "step": 15735 + }, + { + "epoch": 2.51, + "learning_rate": 2.610254841647367e-05, + "loss": 0.7075, + "step": 15736 + }, + { + "epoch": 2.51, + "learning_rate": 2.609997102336097e-05, + "loss": 0.7219, + "step": 15737 + }, + { + "epoch": 2.51, + "learning_rate": 2.6097393618534245e-05, + "loss": 0.8022, + "step": 15738 + }, + { + "epoch": 2.51, + "learning_rate": 2.609481620202094e-05, + "loss": 0.6943, + "step": 15739 + }, + { + "epoch": 2.51, + "learning_rate": 2.609223877384851e-05, + "loss": 0.768, + "step": 15740 + }, + { + "epoch": 2.51, + "learning_rate": 2.6089661334044402e-05, + "loss": 0.6485, + "step": 15741 + }, + { + "epoch": 2.51, + "learning_rate": 2.6087083882636053e-05, + "loss": 0.7379, + "step": 15742 + }, + { + "epoch": 2.51, + "learning_rate": 2.6084506419650934e-05, + "loss": 0.7767, + "step": 15743 + }, + { + "epoch": 2.51, + "learning_rate": 2.6081928945116474e-05, + "loss": 0.7273, + "step": 15744 + }, + { + "epoch": 2.51, + "learning_rate": 2.6079351459060127e-05, + "loss": 0.6214, + "step": 15745 + }, + { + "epoch": 2.51, + "learning_rate": 2.6076773961509342e-05, + "loss": 0.7574, + "step": 15746 + }, + { + "epoch": 2.51, + "learning_rate": 2.6074196452491568e-05, + "loss": 0.6137, + "step": 15747 + }, + { + "epoch": 2.51, + "learning_rate": 2.6071618932034254e-05, + "loss": 0.6692, + "step": 15748 + }, + { + "epoch": 2.51, + "learning_rate": 2.6069041400164852e-05, + "loss": 0.6726, + "step": 15749 + }, + { + "epoch": 2.51, + "learning_rate": 2.60664638569108e-05, + "loss": 0.7153, + "step": 15750 + }, + { + "epoch": 2.51, + "learning_rate": 2.606388630229957e-05, + "loss": 0.7077, + "step": 15751 + }, + { + "epoch": 2.51, + "learning_rate": 2.6061308736358585e-05, + "loss": 0.6944, + "step": 15752 + }, + { + "epoch": 2.51, + "learning_rate": 2.605873115911531e-05, + "loss": 0.6868, + "step": 15753 + }, + { + "epoch": 2.51, + "learning_rate": 2.6056153570597187e-05, + "loss": 0.7376, + "step": 15754 + }, + { + "epoch": 2.51, + "learning_rate": 2.6053575970831677e-05, + "loss": 0.7681, + "step": 15755 + }, + { + "epoch": 2.51, + "learning_rate": 2.605099835984622e-05, + "loss": 0.709, + "step": 15756 + }, + { + "epoch": 2.51, + "learning_rate": 2.6048420737668265e-05, + "loss": 0.7664, + "step": 15757 + }, + { + "epoch": 2.51, + "learning_rate": 2.604584310432527e-05, + "loss": 0.6642, + "step": 15758 + }, + { + "epoch": 2.51, + "learning_rate": 2.6043265459844673e-05, + "loss": 0.7827, + "step": 15759 + }, + { + "epoch": 2.51, + "learning_rate": 2.604068780425394e-05, + "loss": 0.6733, + "step": 15760 + }, + { + "epoch": 2.51, + "learning_rate": 2.6038110137580514e-05, + "loss": 0.7543, + "step": 15761 + }, + { + "epoch": 2.51, + "learning_rate": 2.6035532459851835e-05, + "loss": 0.8049, + "step": 15762 + }, + { + "epoch": 2.51, + "learning_rate": 2.6032954771095373e-05, + "loss": 0.7032, + "step": 15763 + }, + { + "epoch": 2.51, + "learning_rate": 2.6030377071338573e-05, + "loss": 0.7416, + "step": 15764 + }, + { + "epoch": 2.51, + "learning_rate": 2.6027799360608877e-05, + "loss": 0.6936, + "step": 15765 + }, + { + "epoch": 2.51, + "learning_rate": 2.6025221638933744e-05, + "loss": 0.7025, + "step": 15766 + }, + { + "epoch": 2.51, + "learning_rate": 2.6022643906340627e-05, + "loss": 0.7437, + "step": 15767 + }, + { + "epoch": 2.51, + "learning_rate": 2.6020066162856967e-05, + "loss": 0.7485, + "step": 15768 + }, + { + "epoch": 2.51, + "learning_rate": 2.601748840851022e-05, + "loss": 0.7178, + "step": 15769 + }, + { + "epoch": 2.51, + "learning_rate": 2.6014910643327844e-05, + "loss": 0.7311, + "step": 15770 + }, + { + "epoch": 2.51, + "learning_rate": 2.601233286733728e-05, + "loss": 0.7147, + "step": 15771 + }, + { + "epoch": 2.51, + "learning_rate": 2.600975508056599e-05, + "loss": 0.8354, + "step": 15772 + }, + { + "epoch": 2.51, + "learning_rate": 2.6007177283041424e-05, + "loss": 0.6937, + "step": 15773 + }, + { + "epoch": 2.51, + "learning_rate": 2.6004599474791026e-05, + "loss": 0.7131, + "step": 15774 + }, + { + "epoch": 2.51, + "learning_rate": 2.6002021655842263e-05, + "loss": 0.6856, + "step": 15775 + }, + { + "epoch": 2.51, + "learning_rate": 2.5999443826222562e-05, + "loss": 0.7207, + "step": 15776 + }, + { + "epoch": 2.51, + "learning_rate": 2.59968659859594e-05, + "loss": 0.6326, + "step": 15777 + }, + { + "epoch": 2.51, + "learning_rate": 2.5994288135080223e-05, + "loss": 0.7317, + "step": 15778 + }, + { + "epoch": 2.51, + "learning_rate": 2.5991710273612473e-05, + "loss": 0.6758, + "step": 15779 + }, + { + "epoch": 2.51, + "learning_rate": 2.5989132401583622e-05, + "loss": 0.7603, + "step": 15780 + }, + { + "epoch": 2.51, + "learning_rate": 2.5986554519021106e-05, + "loss": 0.6939, + "step": 15781 + }, + { + "epoch": 2.51, + "learning_rate": 2.598397662595238e-05, + "loss": 0.7083, + "step": 15782 + }, + { + "epoch": 2.51, + "learning_rate": 2.5981398722404903e-05, + "loss": 0.7043, + "step": 15783 + }, + { + "epoch": 2.51, + "learning_rate": 2.5978820808406125e-05, + "loss": 0.7195, + "step": 15784 + }, + { + "epoch": 2.51, + "learning_rate": 2.5976242883983505e-05, + "loss": 0.7106, + "step": 15785 + }, + { + "epoch": 2.51, + "learning_rate": 2.5973664949164483e-05, + "loss": 0.6478, + "step": 15786 + }, + { + "epoch": 2.52, + "learning_rate": 2.5971087003976525e-05, + "loss": 0.6598, + "step": 15787 + }, + { + "epoch": 2.52, + "learning_rate": 2.5968509048447074e-05, + "loss": 0.6952, + "step": 15788 + }, + { + "epoch": 2.52, + "learning_rate": 2.5965931082603595e-05, + "loss": 0.6849, + "step": 15789 + }, + { + "epoch": 2.52, + "learning_rate": 2.5963353106473537e-05, + "loss": 0.6608, + "step": 15790 + }, + { + "epoch": 2.52, + "learning_rate": 2.5960775120084348e-05, + "loss": 0.8133, + "step": 15791 + }, + { + "epoch": 2.52, + "learning_rate": 2.595819712346349e-05, + "loss": 0.7202, + "step": 15792 + }, + { + "epoch": 2.52, + "learning_rate": 2.595561911663842e-05, + "loss": 0.6759, + "step": 15793 + }, + { + "epoch": 2.52, + "learning_rate": 2.5953041099636576e-05, + "loss": 0.6929, + "step": 15794 + }, + { + "epoch": 2.52, + "learning_rate": 2.595046307248543e-05, + "loss": 0.7161, + "step": 15795 + }, + { + "epoch": 2.52, + "learning_rate": 2.5947885035212432e-05, + "loss": 0.7194, + "step": 15796 + }, + { + "epoch": 2.52, + "learning_rate": 2.594530698784503e-05, + "loss": 0.627, + "step": 15797 + }, + { + "epoch": 2.52, + "learning_rate": 2.594272893041068e-05, + "loss": 0.7533, + "step": 15798 + }, + { + "epoch": 2.52, + "learning_rate": 2.594015086293684e-05, + "loss": 0.7362, + "step": 15799 + }, + { + "epoch": 2.52, + "learning_rate": 2.593757278545097e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 2.52, + "learning_rate": 2.5934994697980514e-05, + "loss": 0.7404, + "step": 15801 + }, + { + "epoch": 2.52, + "learning_rate": 2.593241660055293e-05, + "loss": 0.7096, + "step": 15802 + }, + { + "epoch": 2.52, + "learning_rate": 2.5929838493195678e-05, + "loss": 0.6612, + "step": 15803 + }, + { + "epoch": 2.52, + "learning_rate": 2.5927260375936213e-05, + "loss": 0.6886, + "step": 15804 + }, + { + "epoch": 2.52, + "learning_rate": 2.592468224880198e-05, + "loss": 0.703, + "step": 15805 + }, + { + "epoch": 2.52, + "learning_rate": 2.5922104111820456e-05, + "loss": 0.6925, + "step": 15806 + }, + { + "epoch": 2.52, + "learning_rate": 2.5919525965019077e-05, + "loss": 0.6863, + "step": 15807 + }, + { + "epoch": 2.52, + "learning_rate": 2.5916947808425295e-05, + "loss": 0.7559, + "step": 15808 + }, + { + "epoch": 2.52, + "learning_rate": 2.591436964206659e-05, + "loss": 0.811, + "step": 15809 + }, + { + "epoch": 2.52, + "learning_rate": 2.59117914659704e-05, + "loss": 0.6716, + "step": 15810 + }, + { + "epoch": 2.52, + "learning_rate": 2.5909213280164175e-05, + "loss": 0.6729, + "step": 15811 + }, + { + "epoch": 2.52, + "learning_rate": 2.5906635084675386e-05, + "loss": 0.6988, + "step": 15812 + }, + { + "epoch": 2.52, + "learning_rate": 2.5904056879531485e-05, + "loss": 0.829, + "step": 15813 + }, + { + "epoch": 2.52, + "learning_rate": 2.5901478664759926e-05, + "loss": 0.7087, + "step": 15814 + }, + { + "epoch": 2.52, + "learning_rate": 2.5898900440388167e-05, + "loss": 0.7373, + "step": 15815 + }, + { + "epoch": 2.52, + "learning_rate": 2.589632220644367e-05, + "loss": 0.7216, + "step": 15816 + }, + { + "epoch": 2.52, + "learning_rate": 2.5893743962953875e-05, + "loss": 0.7405, + "step": 15817 + }, + { + "epoch": 2.52, + "learning_rate": 2.5891165709946256e-05, + "loss": 0.6866, + "step": 15818 + }, + { + "epoch": 2.52, + "learning_rate": 2.588858744744826e-05, + "loss": 0.6698, + "step": 15819 + }, + { + "epoch": 2.52, + "learning_rate": 2.588600917548735e-05, + "loss": 0.7646, + "step": 15820 + }, + { + "epoch": 2.52, + "learning_rate": 2.5883430894090983e-05, + "loss": 0.75, + "step": 15821 + }, + { + "epoch": 2.52, + "learning_rate": 2.588085260328661e-05, + "loss": 0.734, + "step": 15822 + }, + { + "epoch": 2.52, + "learning_rate": 2.5878274303101685e-05, + "loss": 0.7227, + "step": 15823 + }, + { + "epoch": 2.52, + "learning_rate": 2.587569599356368e-05, + "loss": 0.7086, + "step": 15824 + }, + { + "epoch": 2.52, + "learning_rate": 2.5873117674700043e-05, + "loss": 0.6318, + "step": 15825 + }, + { + "epoch": 2.52, + "learning_rate": 2.587053934653823e-05, + "loss": 0.7623, + "step": 15826 + }, + { + "epoch": 2.52, + "learning_rate": 2.5867961009105706e-05, + "loss": 0.8196, + "step": 15827 + }, + { + "epoch": 2.52, + "learning_rate": 2.586538266242992e-05, + "loss": 0.6018, + "step": 15828 + }, + { + "epoch": 2.52, + "learning_rate": 2.5862804306538334e-05, + "loss": 0.7666, + "step": 15829 + }, + { + "epoch": 2.52, + "learning_rate": 2.586022594145841e-05, + "loss": 0.7378, + "step": 15830 + }, + { + "epoch": 2.52, + "learning_rate": 2.5857647567217595e-05, + "loss": 0.7045, + "step": 15831 + }, + { + "epoch": 2.52, + "learning_rate": 2.5855069183843362e-05, + "loss": 0.8075, + "step": 15832 + }, + { + "epoch": 2.52, + "learning_rate": 2.585249079136316e-05, + "loss": 0.7104, + "step": 15833 + }, + { + "epoch": 2.52, + "learning_rate": 2.584991238980444e-05, + "loss": 0.7107, + "step": 15834 + }, + { + "epoch": 2.52, + "learning_rate": 2.5847333979194677e-05, + "loss": 0.6728, + "step": 15835 + }, + { + "epoch": 2.52, + "learning_rate": 2.584475555956133e-05, + "loss": 0.6328, + "step": 15836 + }, + { + "epoch": 2.52, + "learning_rate": 2.5842177130931834e-05, + "loss": 0.7142, + "step": 15837 + }, + { + "epoch": 2.52, + "learning_rate": 2.5839598693333673e-05, + "loss": 0.7186, + "step": 15838 + }, + { + "epoch": 2.52, + "learning_rate": 2.5837020246794293e-05, + "loss": 0.6845, + "step": 15839 + }, + { + "epoch": 2.52, + "learning_rate": 2.5834441791341148e-05, + "loss": 0.67, + "step": 15840 + }, + { + "epoch": 2.52, + "learning_rate": 2.583186332700171e-05, + "loss": 0.7457, + "step": 15841 + }, + { + "epoch": 2.52, + "learning_rate": 2.582928485380343e-05, + "loss": 0.7182, + "step": 15842 + }, + { + "epoch": 2.52, + "learning_rate": 2.5826706371773766e-05, + "loss": 0.7223, + "step": 15843 + }, + { + "epoch": 2.52, + "learning_rate": 2.582412788094019e-05, + "loss": 0.6331, + "step": 15844 + }, + { + "epoch": 2.52, + "learning_rate": 2.5821549381330158e-05, + "loss": 0.687, + "step": 15845 + }, + { + "epoch": 2.52, + "learning_rate": 2.5818970872971115e-05, + "loss": 0.7417, + "step": 15846 + }, + { + "epoch": 2.52, + "learning_rate": 2.581639235589053e-05, + "loss": 0.7542, + "step": 15847 + }, + { + "epoch": 2.52, + "learning_rate": 2.581381383011586e-05, + "loss": 0.7071, + "step": 15848 + }, + { + "epoch": 2.52, + "learning_rate": 2.5811235295674567e-05, + "loss": 0.727, + "step": 15849 + }, + { + "epoch": 2.53, + "learning_rate": 2.5808656752594112e-05, + "loss": 0.7253, + "step": 15850 + }, + { + "epoch": 2.53, + "learning_rate": 2.580607820090195e-05, + "loss": 0.7742, + "step": 15851 + }, + { + "epoch": 2.53, + "learning_rate": 2.580349964062555e-05, + "loss": 0.7864, + "step": 15852 + }, + { + "epoch": 2.53, + "learning_rate": 2.5800921071792368e-05, + "loss": 0.6568, + "step": 15853 + }, + { + "epoch": 2.53, + "learning_rate": 2.5798342494429857e-05, + "loss": 0.7179, + "step": 15854 + }, + { + "epoch": 2.53, + "learning_rate": 2.579576390856549e-05, + "loss": 0.7486, + "step": 15855 + }, + { + "epoch": 2.53, + "learning_rate": 2.579318531422672e-05, + "loss": 0.7384, + "step": 15856 + }, + { + "epoch": 2.53, + "learning_rate": 2.5790606711441e-05, + "loss": 0.6816, + "step": 15857 + }, + { + "epoch": 2.53, + "learning_rate": 2.5788028100235806e-05, + "loss": 0.697, + "step": 15858 + }, + { + "epoch": 2.53, + "learning_rate": 2.578544948063859e-05, + "loss": 0.7197, + "step": 15859 + }, + { + "epoch": 2.53, + "learning_rate": 2.578287085267681e-05, + "loss": 0.7463, + "step": 15860 + }, + { + "epoch": 2.53, + "learning_rate": 2.5780292216377933e-05, + "loss": 0.7323, + "step": 15861 + }, + { + "epoch": 2.53, + "learning_rate": 2.577771357176942e-05, + "loss": 0.7798, + "step": 15862 + }, + { + "epoch": 2.53, + "learning_rate": 2.577513491887872e-05, + "loss": 0.7098, + "step": 15863 + }, + { + "epoch": 2.53, + "learning_rate": 2.5772556257733316e-05, + "loss": 0.7646, + "step": 15864 + }, + { + "epoch": 2.53, + "learning_rate": 2.5769977588360648e-05, + "loss": 0.6771, + "step": 15865 + }, + { + "epoch": 2.53, + "learning_rate": 2.5767398910788193e-05, + "loss": 0.6628, + "step": 15866 + }, + { + "epoch": 2.53, + "learning_rate": 2.5764820225043406e-05, + "loss": 0.7045, + "step": 15867 + }, + { + "epoch": 2.53, + "learning_rate": 2.5762241531153742e-05, + "loss": 0.7422, + "step": 15868 + }, + { + "epoch": 2.53, + "learning_rate": 2.575966282914667e-05, + "loss": 0.6999, + "step": 15869 + }, + { + "epoch": 2.53, + "learning_rate": 2.575708411904965e-05, + "loss": 0.6723, + "step": 15870 + }, + { + "epoch": 2.53, + "learning_rate": 2.575450540089014e-05, + "loss": 0.7478, + "step": 15871 + }, + { + "epoch": 2.53, + "learning_rate": 2.5751926674695615e-05, + "loss": 0.7503, + "step": 15872 + }, + { + "epoch": 2.53, + "learning_rate": 2.5749347940493522e-05, + "loss": 0.7684, + "step": 15873 + }, + { + "epoch": 2.53, + "learning_rate": 2.5746769198311332e-05, + "loss": 0.7808, + "step": 15874 + }, + { + "epoch": 2.53, + "learning_rate": 2.5744190448176492e-05, + "loss": 0.6958, + "step": 15875 + }, + { + "epoch": 2.53, + "learning_rate": 2.5741611690116497e-05, + "loss": 0.7202, + "step": 15876 + }, + { + "epoch": 2.53, + "learning_rate": 2.573903292415877e-05, + "loss": 0.6792, + "step": 15877 + }, + { + "epoch": 2.53, + "learning_rate": 2.5736454150330795e-05, + "loss": 0.689, + "step": 15878 + }, + { + "epoch": 2.53, + "learning_rate": 2.573387536866003e-05, + "loss": 0.7391, + "step": 15879 + }, + { + "epoch": 2.53, + "learning_rate": 2.5731296579173937e-05, + "loss": 0.739, + "step": 15880 + }, + { + "epoch": 2.53, + "learning_rate": 2.572871778189998e-05, + "loss": 0.6595, + "step": 15881 + }, + { + "epoch": 2.53, + "learning_rate": 2.5726138976865627e-05, + "loss": 0.6886, + "step": 15882 + }, + { + "epoch": 2.53, + "learning_rate": 2.5723560164098325e-05, + "loss": 0.656, + "step": 15883 + }, + { + "epoch": 2.53, + "learning_rate": 2.5720981343625554e-05, + "loss": 0.7113, + "step": 15884 + }, + { + "epoch": 2.53, + "learning_rate": 2.5718402515474765e-05, + "loss": 0.7227, + "step": 15885 + }, + { + "epoch": 2.53, + "learning_rate": 2.5715823679673428e-05, + "loss": 0.7692, + "step": 15886 + }, + { + "epoch": 2.53, + "learning_rate": 2.5713244836249005e-05, + "loss": 0.6979, + "step": 15887 + }, + { + "epoch": 2.53, + "learning_rate": 2.571066598522896e-05, + "loss": 0.7443, + "step": 15888 + }, + { + "epoch": 2.53, + "learning_rate": 2.5708087126640746e-05, + "loss": 0.7371, + "step": 15889 + }, + { + "epoch": 2.53, + "learning_rate": 2.5705508260511835e-05, + "loss": 0.7043, + "step": 15890 + }, + { + "epoch": 2.53, + "learning_rate": 2.5702929386869695e-05, + "loss": 0.724, + "step": 15891 + }, + { + "epoch": 2.53, + "learning_rate": 2.5700350505741776e-05, + "loss": 0.6888, + "step": 15892 + }, + { + "epoch": 2.53, + "learning_rate": 2.569777161715555e-05, + "loss": 0.7851, + "step": 15893 + }, + { + "epoch": 2.53, + "learning_rate": 2.569519272113849e-05, + "loss": 0.7781, + "step": 15894 + }, + { + "epoch": 2.53, + "learning_rate": 2.5692613817718036e-05, + "loss": 0.6295, + "step": 15895 + }, + { + "epoch": 2.53, + "learning_rate": 2.569003490692168e-05, + "loss": 0.7372, + "step": 15896 + }, + { + "epoch": 2.53, + "learning_rate": 2.5687455988776858e-05, + "loss": 0.7161, + "step": 15897 + }, + { + "epoch": 2.53, + "learning_rate": 2.5684877063311057e-05, + "loss": 0.6569, + "step": 15898 + }, + { + "epoch": 2.53, + "learning_rate": 2.568229813055173e-05, + "loss": 0.7588, + "step": 15899 + }, + { + "epoch": 2.53, + "learning_rate": 2.567971919052633e-05, + "loss": 0.7873, + "step": 15900 + }, + { + "epoch": 2.53, + "learning_rate": 2.5677140243262348e-05, + "loss": 0.739, + "step": 15901 + }, + { + "epoch": 2.53, + "learning_rate": 2.5674561288787223e-05, + "loss": 0.6317, + "step": 15902 + }, + { + "epoch": 2.53, + "learning_rate": 2.5671982327128436e-05, + "loss": 0.7365, + "step": 15903 + }, + { + "epoch": 2.53, + "learning_rate": 2.5669403358313443e-05, + "loss": 0.712, + "step": 15904 + }, + { + "epoch": 2.53, + "learning_rate": 2.5666824382369716e-05, + "loss": 0.7476, + "step": 15905 + }, + { + "epoch": 2.53, + "learning_rate": 2.5664245399324705e-05, + "loss": 0.7059, + "step": 15906 + }, + { + "epoch": 2.53, + "learning_rate": 2.566166640920589e-05, + "loss": 0.749, + "step": 15907 + }, + { + "epoch": 2.53, + "learning_rate": 2.5659087412040733e-05, + "loss": 0.7423, + "step": 15908 + }, + { + "epoch": 2.53, + "learning_rate": 2.5656508407856684e-05, + "loss": 0.7664, + "step": 15909 + }, + { + "epoch": 2.53, + "learning_rate": 2.5653929396681224e-05, + "loss": 0.7084, + "step": 15910 + }, + { + "epoch": 2.53, + "learning_rate": 2.5651350378541816e-05, + "loss": 0.7736, + "step": 15911 + }, + { + "epoch": 2.53, + "learning_rate": 2.5648771353465915e-05, + "loss": 0.7148, + "step": 15912 + }, + { + "epoch": 2.54, + "learning_rate": 2.5646192321481e-05, + "loss": 0.6714, + "step": 15913 + }, + { + "epoch": 2.54, + "learning_rate": 2.564361328261453e-05, + "loss": 0.7149, + "step": 15914 + }, + { + "epoch": 2.54, + "learning_rate": 2.5641034236893964e-05, + "loss": 0.7356, + "step": 15915 + }, + { + "epoch": 2.54, + "learning_rate": 2.5638455184346772e-05, + "loss": 0.7053, + "step": 15916 + }, + { + "epoch": 2.54, + "learning_rate": 2.5635876125000424e-05, + "loss": 0.7197, + "step": 15917 + }, + { + "epoch": 2.54, + "learning_rate": 2.5633297058882382e-05, + "loss": 0.8071, + "step": 15918 + }, + { + "epoch": 2.54, + "learning_rate": 2.563071798602011e-05, + "loss": 0.665, + "step": 15919 + }, + { + "epoch": 2.54, + "learning_rate": 2.5628138906441067e-05, + "loss": 0.654, + "step": 15920 + }, + { + "epoch": 2.54, + "learning_rate": 2.5625559820172735e-05, + "loss": 0.726, + "step": 15921 + }, + { + "epoch": 2.54, + "learning_rate": 2.5622980727242567e-05, + "loss": 0.6387, + "step": 15922 + }, + { + "epoch": 2.54, + "learning_rate": 2.562040162767803e-05, + "loss": 0.6658, + "step": 15923 + }, + { + "epoch": 2.54, + "learning_rate": 2.5617822521506602e-05, + "loss": 0.7095, + "step": 15924 + }, + { + "epoch": 2.54, + "learning_rate": 2.5615243408755728e-05, + "loss": 0.782, + "step": 15925 + }, + { + "epoch": 2.54, + "learning_rate": 2.561266428945289e-05, + "loss": 0.7899, + "step": 15926 + }, + { + "epoch": 2.54, + "learning_rate": 2.561008516362556e-05, + "loss": 0.7983, + "step": 15927 + }, + { + "epoch": 2.54, + "learning_rate": 2.5607506031301182e-05, + "loss": 0.7308, + "step": 15928 + }, + { + "epoch": 2.54, + "learning_rate": 2.5604926892507226e-05, + "loss": 0.7207, + "step": 15929 + }, + { + "epoch": 2.54, + "learning_rate": 2.5602347747271178e-05, + "loss": 0.7538, + "step": 15930 + }, + { + "epoch": 2.54, + "learning_rate": 2.5599768595620488e-05, + "loss": 0.6906, + "step": 15931 + }, + { + "epoch": 2.54, + "learning_rate": 2.5597189437582624e-05, + "loss": 0.7248, + "step": 15932 + }, + { + "epoch": 2.54, + "learning_rate": 2.559461027318506e-05, + "loss": 0.7356, + "step": 15933 + }, + { + "epoch": 2.54, + "learning_rate": 2.5592031102455255e-05, + "loss": 0.776, + "step": 15934 + }, + { + "epoch": 2.54, + "learning_rate": 2.5589451925420675e-05, + "loss": 0.7558, + "step": 15935 + }, + { + "epoch": 2.54, + "learning_rate": 2.5586872742108798e-05, + "loss": 0.7712, + "step": 15936 + }, + { + "epoch": 2.54, + "learning_rate": 2.558429355254708e-05, + "loss": 0.7223, + "step": 15937 + }, + { + "epoch": 2.54, + "learning_rate": 2.5581714356762987e-05, + "loss": 0.7056, + "step": 15938 + }, + { + "epoch": 2.54, + "learning_rate": 2.5579135154783988e-05, + "loss": 0.6614, + "step": 15939 + }, + { + "epoch": 2.54, + "learning_rate": 2.557655594663756e-05, + "loss": 0.7701, + "step": 15940 + }, + { + "epoch": 2.54, + "learning_rate": 2.557397673235115e-05, + "loss": 0.6807, + "step": 15941 + }, + { + "epoch": 2.54, + "learning_rate": 2.5571397511952245e-05, + "loss": 0.7135, + "step": 15942 + }, + { + "epoch": 2.54, + "learning_rate": 2.5568818285468298e-05, + "loss": 0.6718, + "step": 15943 + }, + { + "epoch": 2.54, + "learning_rate": 2.5566239052926782e-05, + "loss": 0.7247, + "step": 15944 + }, + { + "epoch": 2.54, + "learning_rate": 2.5563659814355167e-05, + "loss": 0.7048, + "step": 15945 + }, + { + "epoch": 2.54, + "learning_rate": 2.5561080569780914e-05, + "loss": 0.7302, + "step": 15946 + }, + { + "epoch": 2.54, + "learning_rate": 2.5558501319231497e-05, + "loss": 0.6967, + "step": 15947 + }, + { + "epoch": 2.54, + "learning_rate": 2.5555922062734385e-05, + "loss": 0.6914, + "step": 15948 + }, + { + "epoch": 2.54, + "learning_rate": 2.5553342800317027e-05, + "loss": 0.6629, + "step": 15949 + }, + { + "epoch": 2.54, + "learning_rate": 2.555076353200691e-05, + "loss": 0.7211, + "step": 15950 + }, + { + "epoch": 2.54, + "learning_rate": 2.5548184257831496e-05, + "loss": 0.7178, + "step": 15951 + }, + { + "epoch": 2.54, + "learning_rate": 2.554560497781825e-05, + "loss": 0.6388, + "step": 15952 + }, + { + "epoch": 2.54, + "learning_rate": 2.554302569199465e-05, + "loss": 0.7189, + "step": 15953 + }, + { + "epoch": 2.54, + "learning_rate": 2.554044640038815e-05, + "loss": 0.6982, + "step": 15954 + }, + { + "epoch": 2.54, + "learning_rate": 2.5537867103026218e-05, + "loss": 0.7103, + "step": 15955 + }, + { + "epoch": 2.54, + "learning_rate": 2.553528779993634e-05, + "loss": 0.6589, + "step": 15956 + }, + { + "epoch": 2.54, + "learning_rate": 2.5532708491145975e-05, + "loss": 0.68, + "step": 15957 + }, + { + "epoch": 2.54, + "learning_rate": 2.5530129176682576e-05, + "loss": 0.7155, + "step": 15958 + }, + { + "epoch": 2.54, + "learning_rate": 2.5527549856573628e-05, + "loss": 0.7084, + "step": 15959 + }, + { + "epoch": 2.54, + "learning_rate": 2.552497053084659e-05, + "loss": 0.6829, + "step": 15960 + }, + { + "epoch": 2.54, + "learning_rate": 2.5522391199528938e-05, + "loss": 0.7067, + "step": 15961 + }, + { + "epoch": 2.54, + "learning_rate": 2.551981186264814e-05, + "loss": 0.6899, + "step": 15962 + }, + { + "epoch": 2.54, + "learning_rate": 2.551723252023166e-05, + "loss": 0.6477, + "step": 15963 + }, + { + "epoch": 2.54, + "learning_rate": 2.5514653172306962e-05, + "loss": 0.715, + "step": 15964 + }, + { + "epoch": 2.54, + "learning_rate": 2.551207381890152e-05, + "loss": 0.6791, + "step": 15965 + }, + { + "epoch": 2.54, + "learning_rate": 2.550949446004281e-05, + "loss": 0.6651, + "step": 15966 + }, + { + "epoch": 2.54, + "learning_rate": 2.5506915095758293e-05, + "loss": 0.6812, + "step": 15967 + }, + { + "epoch": 2.54, + "learning_rate": 2.5504335726075434e-05, + "loss": 0.7103, + "step": 15968 + }, + { + "epoch": 2.54, + "learning_rate": 2.5501756351021705e-05, + "loss": 0.7812, + "step": 15969 + }, + { + "epoch": 2.54, + "learning_rate": 2.5499176970624577e-05, + "loss": 0.7043, + "step": 15970 + }, + { + "epoch": 2.54, + "learning_rate": 2.5496597584911518e-05, + "loss": 0.7026, + "step": 15971 + }, + { + "epoch": 2.54, + "learning_rate": 2.5494018193909995e-05, + "loss": 0.7279, + "step": 15972 + }, + { + "epoch": 2.54, + "learning_rate": 2.5491438797647483e-05, + "loss": 0.6852, + "step": 15973 + }, + { + "epoch": 2.54, + "learning_rate": 2.5488859396151438e-05, + "loss": 0.683, + "step": 15974 + }, + { + "epoch": 2.55, + "learning_rate": 2.548627998944934e-05, + "loss": 0.6892, + "step": 15975 + }, + { + "epoch": 2.55, + "learning_rate": 2.5483700577568658e-05, + "loss": 0.7083, + "step": 15976 + }, + { + "epoch": 2.55, + "learning_rate": 2.5481121160536863e-05, + "loss": 0.7467, + "step": 15977 + }, + { + "epoch": 2.55, + "learning_rate": 2.547854173838141e-05, + "loss": 0.6707, + "step": 15978 + }, + { + "epoch": 2.55, + "learning_rate": 2.5475962311129785e-05, + "loss": 0.6967, + "step": 15979 + }, + { + "epoch": 2.55, + "learning_rate": 2.547338287880945e-05, + "loss": 0.7147, + "step": 15980 + }, + { + "epoch": 2.55, + "learning_rate": 2.5470803441447865e-05, + "loss": 0.8009, + "step": 15981 + }, + { + "epoch": 2.55, + "learning_rate": 2.5468223999072516e-05, + "loss": 0.7439, + "step": 15982 + }, + { + "epoch": 2.55, + "learning_rate": 2.546564455171087e-05, + "loss": 0.6771, + "step": 15983 + }, + { + "epoch": 2.55, + "learning_rate": 2.5463065099390383e-05, + "loss": 0.6515, + "step": 15984 + }, + { + "epoch": 2.55, + "learning_rate": 2.5460485642138544e-05, + "loss": 0.6911, + "step": 15985 + }, + { + "epoch": 2.55, + "learning_rate": 2.5457906179982804e-05, + "loss": 0.6769, + "step": 15986 + }, + { + "epoch": 2.55, + "learning_rate": 2.5455326712950645e-05, + "loss": 0.7736, + "step": 15987 + }, + { + "epoch": 2.55, + "learning_rate": 2.5452747241069537e-05, + "loss": 0.6473, + "step": 15988 + }, + { + "epoch": 2.55, + "learning_rate": 2.5450167764366935e-05, + "loss": 0.7054, + "step": 15989 + }, + { + "epoch": 2.55, + "learning_rate": 2.5447588282870326e-05, + "loss": 0.7971, + "step": 15990 + }, + { + "epoch": 2.55, + "learning_rate": 2.5445008796607178e-05, + "loss": 0.6915, + "step": 15991 + }, + { + "epoch": 2.55, + "learning_rate": 2.5442429305604952e-05, + "loss": 0.6749, + "step": 15992 + }, + { + "epoch": 2.55, + "learning_rate": 2.543984980989112e-05, + "loss": 0.7636, + "step": 15993 + }, + { + "epoch": 2.55, + "learning_rate": 2.5437270309493156e-05, + "loss": 0.7027, + "step": 15994 + }, + { + "epoch": 2.55, + "learning_rate": 2.543469080443853e-05, + "loss": 0.7405, + "step": 15995 + }, + { + "epoch": 2.55, + "learning_rate": 2.5432111294754706e-05, + "loss": 0.8566, + "step": 15996 + }, + { + "epoch": 2.55, + "learning_rate": 2.5429531780469174e-05, + "loss": 0.7574, + "step": 15997 + }, + { + "epoch": 2.55, + "learning_rate": 2.5426952261609373e-05, + "loss": 0.6817, + "step": 15998 + }, + { + "epoch": 2.55, + "learning_rate": 2.5424372738202797e-05, + "loss": 0.7262, + "step": 15999 + }, + { + "epoch": 2.55, + "learning_rate": 2.542179321027691e-05, + "loss": 0.8113, + "step": 16000 + }, + { + "epoch": 2.55, + "learning_rate": 2.541921367785917e-05, + "loss": 0.6584, + "step": 16001 + }, + { + "epoch": 2.55, + "learning_rate": 2.5416634140977073e-05, + "loss": 0.6967, + "step": 16002 + }, + { + "epoch": 2.55, + "learning_rate": 2.541405459965807e-05, + "loss": 0.7357, + "step": 16003 + }, + { + "epoch": 2.55, + "learning_rate": 2.5411475053929633e-05, + "loss": 0.7065, + "step": 16004 + }, + { + "epoch": 2.55, + "learning_rate": 2.5408895503819242e-05, + "loss": 0.7061, + "step": 16005 + }, + { + "epoch": 2.55, + "learning_rate": 2.5406315949354363e-05, + "loss": 0.7621, + "step": 16006 + }, + { + "epoch": 2.55, + "learning_rate": 2.540373639056246e-05, + "loss": 0.7089, + "step": 16007 + }, + { + "epoch": 2.55, + "learning_rate": 2.540115682747102e-05, + "loss": 0.7207, + "step": 16008 + }, + { + "epoch": 2.55, + "learning_rate": 2.53985772601075e-05, + "loss": 0.8112, + "step": 16009 + }, + { + "epoch": 2.55, + "learning_rate": 2.539599768849936e-05, + "loss": 0.6913, + "step": 16010 + }, + { + "epoch": 2.55, + "learning_rate": 2.53934181126741e-05, + "loss": 0.7437, + "step": 16011 + }, + { + "epoch": 2.55, + "learning_rate": 2.539083853265917e-05, + "loss": 0.7758, + "step": 16012 + }, + { + "epoch": 2.55, + "learning_rate": 2.5388258948482047e-05, + "loss": 0.6991, + "step": 16013 + }, + { + "epoch": 2.55, + "learning_rate": 2.5385679360170206e-05, + "loss": 0.786, + "step": 16014 + }, + { + "epoch": 2.55, + "learning_rate": 2.5383099767751112e-05, + "loss": 0.742, + "step": 16015 + }, + { + "epoch": 2.55, + "learning_rate": 2.538052017125223e-05, + "loss": 0.7489, + "step": 16016 + }, + { + "epoch": 2.55, + "learning_rate": 2.5377940570701048e-05, + "loss": 0.6949, + "step": 16017 + }, + { + "epoch": 2.55, + "learning_rate": 2.537536096612503e-05, + "loss": 0.7121, + "step": 16018 + }, + { + "epoch": 2.55, + "learning_rate": 2.5372781357551645e-05, + "loss": 0.6751, + "step": 16019 + }, + { + "epoch": 2.55, + "learning_rate": 2.5370201745008367e-05, + "loss": 0.689, + "step": 16020 + }, + { + "epoch": 2.55, + "learning_rate": 2.536762212852265e-05, + "loss": 0.732, + "step": 16021 + }, + { + "epoch": 2.55, + "learning_rate": 2.5365042508121994e-05, + "loss": 0.7667, + "step": 16022 + }, + { + "epoch": 2.55, + "learning_rate": 2.5362462883833855e-05, + "loss": 0.6858, + "step": 16023 + }, + { + "epoch": 2.55, + "learning_rate": 2.5359883255685702e-05, + "loss": 0.7856, + "step": 16024 + }, + { + "epoch": 2.55, + "learning_rate": 2.5357303623705013e-05, + "loss": 0.6978, + "step": 16025 + }, + { + "epoch": 2.55, + "learning_rate": 2.535472398791926e-05, + "loss": 0.755, + "step": 16026 + }, + { + "epoch": 2.55, + "learning_rate": 2.535214434835591e-05, + "loss": 0.7243, + "step": 16027 + }, + { + "epoch": 2.55, + "learning_rate": 2.534956470504244e-05, + "loss": 0.6897, + "step": 16028 + }, + { + "epoch": 2.55, + "learning_rate": 2.5346985058006316e-05, + "loss": 0.6936, + "step": 16029 + }, + { + "epoch": 2.55, + "learning_rate": 2.5344405407275008e-05, + "loss": 0.7303, + "step": 16030 + }, + { + "epoch": 2.55, + "learning_rate": 2.5341825752875996e-05, + "loss": 0.7503, + "step": 16031 + }, + { + "epoch": 2.55, + "learning_rate": 2.533924609483675e-05, + "loss": 0.7197, + "step": 16032 + }, + { + "epoch": 2.55, + "learning_rate": 2.5336666433184726e-05, + "loss": 0.7031, + "step": 16033 + }, + { + "epoch": 2.55, + "learning_rate": 2.533408676794742e-05, + "loss": 0.7186, + "step": 16034 + }, + { + "epoch": 2.55, + "learning_rate": 2.533150709915229e-05, + "loss": 0.7405, + "step": 16035 + }, + { + "epoch": 2.55, + "learning_rate": 2.532892742682681e-05, + "loss": 0.7447, + "step": 16036 + }, + { + "epoch": 2.55, + "learning_rate": 2.5326347750998453e-05, + "loss": 0.6471, + "step": 16037 + }, + { + "epoch": 2.56, + "learning_rate": 2.5323768071694694e-05, + "loss": 0.7766, + "step": 16038 + }, + { + "epoch": 2.56, + "learning_rate": 2.5321188388942996e-05, + "loss": 0.7083, + "step": 16039 + }, + { + "epoch": 2.56, + "learning_rate": 2.531860870277084e-05, + "loss": 0.6951, + "step": 16040 + }, + { + "epoch": 2.56, + "learning_rate": 2.5316029013205693e-05, + "loss": 0.7738, + "step": 16041 + }, + { + "epoch": 2.56, + "learning_rate": 2.5313449320275027e-05, + "loss": 0.7153, + "step": 16042 + }, + { + "epoch": 2.56, + "learning_rate": 2.5310869624006323e-05, + "loss": 0.7935, + "step": 16043 + }, + { + "epoch": 2.56, + "learning_rate": 2.5308289924427035e-05, + "loss": 0.699, + "step": 16044 + }, + { + "epoch": 2.56, + "learning_rate": 2.530571022156466e-05, + "loss": 0.7008, + "step": 16045 + }, + { + "epoch": 2.56, + "learning_rate": 2.530313051544665e-05, + "loss": 0.7716, + "step": 16046 + }, + { + "epoch": 2.56, + "learning_rate": 2.5300550806100486e-05, + "loss": 0.6715, + "step": 16047 + }, + { + "epoch": 2.56, + "learning_rate": 2.5297971093553642e-05, + "loss": 0.6924, + "step": 16048 + }, + { + "epoch": 2.56, + "learning_rate": 2.5295391377833578e-05, + "loss": 0.7218, + "step": 16049 + }, + { + "epoch": 2.56, + "learning_rate": 2.5292811658967773e-05, + "loss": 0.7232, + "step": 16050 + }, + { + "epoch": 2.56, + "learning_rate": 2.529023193698371e-05, + "loss": 0.8094, + "step": 16051 + }, + { + "epoch": 2.56, + "learning_rate": 2.5287652211908853e-05, + "loss": 0.656, + "step": 16052 + }, + { + "epoch": 2.56, + "learning_rate": 2.5285072483770667e-05, + "loss": 0.702, + "step": 16053 + }, + { + "epoch": 2.56, + "learning_rate": 2.5282492752596636e-05, + "loss": 0.6533, + "step": 16054 + }, + { + "epoch": 2.56, + "learning_rate": 2.5279913018414236e-05, + "loss": 0.7428, + "step": 16055 + }, + { + "epoch": 2.56, + "learning_rate": 2.527733328125092e-05, + "loss": 0.7026, + "step": 16056 + }, + { + "epoch": 2.56, + "learning_rate": 2.5274753541134183e-05, + "loss": 0.6228, + "step": 16057 + }, + { + "epoch": 2.56, + "learning_rate": 2.5272173798091486e-05, + "loss": 0.6885, + "step": 16058 + }, + { + "epoch": 2.56, + "learning_rate": 2.5269594052150297e-05, + "loss": 0.7052, + "step": 16059 + }, + { + "epoch": 2.56, + "learning_rate": 2.5267014303338103e-05, + "loss": 0.6035, + "step": 16060 + }, + { + "epoch": 2.56, + "learning_rate": 2.5264434551682365e-05, + "loss": 0.6833, + "step": 16061 + }, + { + "epoch": 2.56, + "learning_rate": 2.5261854797210554e-05, + "loss": 0.7075, + "step": 16062 + }, + { + "epoch": 2.56, + "learning_rate": 2.525927503995016e-05, + "loss": 0.7357, + "step": 16063 + }, + { + "epoch": 2.56, + "learning_rate": 2.5256695279928632e-05, + "loss": 0.6408, + "step": 16064 + }, + { + "epoch": 2.56, + "learning_rate": 2.525411551717346e-05, + "loss": 0.7675, + "step": 16065 + }, + { + "epoch": 2.56, + "learning_rate": 2.5251535751712114e-05, + "loss": 0.7551, + "step": 16066 + }, + { + "epoch": 2.56, + "learning_rate": 2.5248955983572065e-05, + "loss": 0.671, + "step": 16067 + }, + { + "epoch": 2.56, + "learning_rate": 2.5246376212780786e-05, + "loss": 0.682, + "step": 16068 + }, + { + "epoch": 2.56, + "learning_rate": 2.5243796439365754e-05, + "loss": 0.7173, + "step": 16069 + }, + { + "epoch": 2.56, + "learning_rate": 2.5241216663354427e-05, + "loss": 0.7515, + "step": 16070 + }, + { + "epoch": 2.56, + "learning_rate": 2.5238636884774297e-05, + "loss": 0.6499, + "step": 16071 + }, + { + "epoch": 2.56, + "learning_rate": 2.5236057103652827e-05, + "loss": 0.6775, + "step": 16072 + }, + { + "epoch": 2.56, + "learning_rate": 2.5233477320017486e-05, + "loss": 0.7446, + "step": 16073 + }, + { + "epoch": 2.56, + "learning_rate": 2.5230897533895757e-05, + "loss": 0.6842, + "step": 16074 + }, + { + "epoch": 2.56, + "learning_rate": 2.5228317745315116e-05, + "loss": 0.7685, + "step": 16075 + }, + { + "epoch": 2.56, + "learning_rate": 2.522573795430302e-05, + "loss": 0.6973, + "step": 16076 + }, + { + "epoch": 2.56, + "learning_rate": 2.522315816088696e-05, + "loss": 0.6751, + "step": 16077 + }, + { + "epoch": 2.56, + "learning_rate": 2.5220578365094406e-05, + "loss": 0.6944, + "step": 16078 + }, + { + "epoch": 2.56, + "learning_rate": 2.5217998566952815e-05, + "loss": 0.702, + "step": 16079 + }, + { + "epoch": 2.56, + "learning_rate": 2.5215418766489674e-05, + "loss": 0.7878, + "step": 16080 + }, + { + "epoch": 2.56, + "learning_rate": 2.5212838963732462e-05, + "loss": 0.7445, + "step": 16081 + }, + { + "epoch": 2.56, + "learning_rate": 2.5210259158708627e-05, + "loss": 0.7143, + "step": 16082 + }, + { + "epoch": 2.56, + "learning_rate": 2.5207679351445674e-05, + "loss": 0.6718, + "step": 16083 + }, + { + "epoch": 2.56, + "learning_rate": 2.5205099541971054e-05, + "loss": 0.7491, + "step": 16084 + }, + { + "epoch": 2.56, + "learning_rate": 2.520251973031225e-05, + "loss": 0.7222, + "step": 16085 + }, + { + "epoch": 2.56, + "learning_rate": 2.519993991649674e-05, + "loss": 0.7113, + "step": 16086 + }, + { + "epoch": 2.56, + "learning_rate": 2.5197360100551987e-05, + "loss": 0.7504, + "step": 16087 + }, + { + "epoch": 2.56, + "learning_rate": 2.519478028250547e-05, + "loss": 0.6693, + "step": 16088 + }, + { + "epoch": 2.56, + "learning_rate": 2.5192200462384663e-05, + "loss": 0.6727, + "step": 16089 + }, + { + "epoch": 2.56, + "learning_rate": 2.518962064021703e-05, + "loss": 0.7557, + "step": 16090 + }, + { + "epoch": 2.56, + "learning_rate": 2.518704081603006e-05, + "loss": 0.7008, + "step": 16091 + }, + { + "epoch": 2.56, + "learning_rate": 2.5184460989851216e-05, + "loss": 0.7048, + "step": 16092 + }, + { + "epoch": 2.56, + "learning_rate": 2.518188116170797e-05, + "loss": 0.6687, + "step": 16093 + }, + { + "epoch": 2.56, + "learning_rate": 2.517930133162781e-05, + "loss": 0.7339, + "step": 16094 + }, + { + "epoch": 2.56, + "learning_rate": 2.5176721499638196e-05, + "loss": 0.6428, + "step": 16095 + }, + { + "epoch": 2.56, + "learning_rate": 2.5174141665766598e-05, + "loss": 0.6632, + "step": 16096 + }, + { + "epoch": 2.56, + "learning_rate": 2.5171561830040502e-05, + "loss": 0.6902, + "step": 16097 + }, + { + "epoch": 2.56, + "learning_rate": 2.516898199248739e-05, + "loss": 0.6539, + "step": 16098 + }, + { + "epoch": 2.56, + "learning_rate": 2.5166402153134706e-05, + "loss": 0.6777, + "step": 16099 + }, + { + "epoch": 2.56, + "learning_rate": 2.5163822312009944e-05, + "loss": 0.7096, + "step": 16100 + }, + { + "epoch": 2.57, + "learning_rate": 2.5161242469140572e-05, + "loss": 0.7024, + "step": 16101 + }, + { + "epoch": 2.57, + "learning_rate": 2.5158662624554065e-05, + "loss": 0.7095, + "step": 16102 + }, + { + "epoch": 2.57, + "learning_rate": 2.51560827782779e-05, + "loss": 0.6976, + "step": 16103 + }, + { + "epoch": 2.57, + "learning_rate": 2.515350293033955e-05, + "loss": 0.6676, + "step": 16104 + }, + { + "epoch": 2.57, + "learning_rate": 2.5150923080766482e-05, + "loss": 0.624, + "step": 16105 + }, + { + "epoch": 2.57, + "learning_rate": 2.514834322958618e-05, + "loss": 0.6559, + "step": 16106 + }, + { + "epoch": 2.57, + "learning_rate": 2.5145763376826108e-05, + "loss": 0.6375, + "step": 16107 + }, + { + "epoch": 2.57, + "learning_rate": 2.5143183522513748e-05, + "loss": 0.7, + "step": 16108 + }, + { + "epoch": 2.57, + "learning_rate": 2.5140603666676573e-05, + "loss": 0.7599, + "step": 16109 + }, + { + "epoch": 2.57, + "learning_rate": 2.5138023809342043e-05, + "loss": 0.6686, + "step": 16110 + }, + { + "epoch": 2.57, + "learning_rate": 2.5135443950537657e-05, + "loss": 0.7137, + "step": 16111 + }, + { + "epoch": 2.57, + "learning_rate": 2.513286409029087e-05, + "loss": 0.6536, + "step": 16112 + }, + { + "epoch": 2.57, + "learning_rate": 2.513028422862916e-05, + "loss": 0.6729, + "step": 16113 + }, + { + "epoch": 2.57, + "learning_rate": 2.5127704365579997e-05, + "loss": 0.6735, + "step": 16114 + }, + { + "epoch": 2.57, + "learning_rate": 2.5125124501170865e-05, + "loss": 0.7665, + "step": 16115 + }, + { + "epoch": 2.57, + "learning_rate": 2.5122544635429235e-05, + "loss": 0.698, + "step": 16116 + }, + { + "epoch": 2.57, + "learning_rate": 2.5119964768382575e-05, + "loss": 0.6978, + "step": 16117 + }, + { + "epoch": 2.57, + "learning_rate": 2.5117384900058366e-05, + "loss": 0.7396, + "step": 16118 + }, + { + "epoch": 2.57, + "learning_rate": 2.5114805030484078e-05, + "loss": 0.7759, + "step": 16119 + }, + { + "epoch": 2.57, + "learning_rate": 2.5112225159687193e-05, + "loss": 0.6528, + "step": 16120 + }, + { + "epoch": 2.57, + "learning_rate": 2.510964528769517e-05, + "loss": 0.6485, + "step": 16121 + }, + { + "epoch": 2.57, + "learning_rate": 2.510706541453549e-05, + "loss": 0.6943, + "step": 16122 + }, + { + "epoch": 2.57, + "learning_rate": 2.5104485540235634e-05, + "loss": 0.6497, + "step": 16123 + }, + { + "epoch": 2.57, + "learning_rate": 2.5101905664823067e-05, + "loss": 0.7301, + "step": 16124 + }, + { + "epoch": 2.57, + "learning_rate": 2.5099325788325264e-05, + "loss": 0.6746, + "step": 16125 + }, + { + "epoch": 2.57, + "learning_rate": 2.5096745910769703e-05, + "loss": 0.7437, + "step": 16126 + }, + { + "epoch": 2.57, + "learning_rate": 2.5094166032183862e-05, + "loss": 0.7125, + "step": 16127 + }, + { + "epoch": 2.57, + "learning_rate": 2.5091586152595202e-05, + "loss": 0.7973, + "step": 16128 + }, + { + "epoch": 2.57, + "learning_rate": 2.5089006272031217e-05, + "loss": 0.6878, + "step": 16129 + }, + { + "epoch": 2.57, + "learning_rate": 2.5086426390519362e-05, + "loss": 0.6311, + "step": 16130 + }, + { + "epoch": 2.57, + "learning_rate": 2.5083846508087117e-05, + "loss": 0.7041, + "step": 16131 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081266624761958e-05, + "loss": 0.6583, + "step": 16132 + }, + { + "epoch": 2.57, + "learning_rate": 2.507868674057136e-05, + "loss": 0.6801, + "step": 16133 + }, + { + "epoch": 2.57, + "learning_rate": 2.507610685554279e-05, + "loss": 0.7476, + "step": 16134 + }, + { + "epoch": 2.57, + "learning_rate": 2.5073526969703736e-05, + "loss": 0.6415, + "step": 16135 + }, + { + "epoch": 2.57, + "learning_rate": 2.5070947083081663e-05, + "loss": 0.7399, + "step": 16136 + }, + { + "epoch": 2.57, + "learning_rate": 2.5068367195704047e-05, + "loss": 0.7468, + "step": 16137 + }, + { + "epoch": 2.57, + "learning_rate": 2.506578730759836e-05, + "loss": 0.6541, + "step": 16138 + }, + { + "epoch": 2.57, + "learning_rate": 2.506320741879208e-05, + "loss": 0.7153, + "step": 16139 + }, + { + "epoch": 2.57, + "learning_rate": 2.506062752931268e-05, + "loss": 0.7208, + "step": 16140 + }, + { + "epoch": 2.57, + "learning_rate": 2.505804763918763e-05, + "loss": 0.7269, + "step": 16141 + }, + { + "epoch": 2.57, + "learning_rate": 2.505546774844441e-05, + "loss": 0.6689, + "step": 16142 + }, + { + "epoch": 2.57, + "learning_rate": 2.505288785711049e-05, + "loss": 0.7601, + "step": 16143 + }, + { + "epoch": 2.57, + "learning_rate": 2.5050307965213355e-05, + "loss": 0.7437, + "step": 16144 + }, + { + "epoch": 2.57, + "learning_rate": 2.5047728072780456e-05, + "loss": 0.7392, + "step": 16145 + }, + { + "epoch": 2.57, + "learning_rate": 2.5045148179839294e-05, + "loss": 0.6708, + "step": 16146 + }, + { + "epoch": 2.57, + "learning_rate": 2.504256828641733e-05, + "loss": 0.6533, + "step": 16147 + }, + { + "epoch": 2.57, + "learning_rate": 2.503998839254203e-05, + "loss": 0.6859, + "step": 16148 + }, + { + "epoch": 2.57, + "learning_rate": 2.5037408498240894e-05, + "loss": 0.7262, + "step": 16149 + }, + { + "epoch": 2.57, + "learning_rate": 2.5034828603541373e-05, + "loss": 0.7457, + "step": 16150 + }, + { + "epoch": 2.57, + "learning_rate": 2.503224870847094e-05, + "loss": 0.7092, + "step": 16151 + }, + { + "epoch": 2.57, + "learning_rate": 2.5029668813057085e-05, + "loss": 0.688, + "step": 16152 + }, + { + "epoch": 2.57, + "learning_rate": 2.502708891732728e-05, + "loss": 0.7158, + "step": 16153 + }, + { + "epoch": 2.57, + "learning_rate": 2.5024509021308983e-05, + "loss": 0.7109, + "step": 16154 + }, + { + "epoch": 2.57, + "learning_rate": 2.5021929125029687e-05, + "loss": 0.7677, + "step": 16155 + }, + { + "epoch": 2.57, + "learning_rate": 2.501934922851686e-05, + "loss": 0.774, + "step": 16156 + }, + { + "epoch": 2.57, + "learning_rate": 2.5016769331797967e-05, + "loss": 0.7342, + "step": 16157 + }, + { + "epoch": 2.57, + "learning_rate": 2.5014189434900497e-05, + "loss": 0.6986, + "step": 16158 + }, + { + "epoch": 2.57, + "learning_rate": 2.5011609537851923e-05, + "loss": 0.7504, + "step": 16159 + }, + { + "epoch": 2.57, + "learning_rate": 2.500902964067971e-05, + "loss": 0.7555, + "step": 16160 + }, + { + "epoch": 2.57, + "learning_rate": 2.5006449743411337e-05, + "loss": 0.6629, + "step": 16161 + }, + { + "epoch": 2.57, + "learning_rate": 2.5003869846074273e-05, + "loss": 0.6751, + "step": 16162 + }, + { + "epoch": 2.57, + "learning_rate": 2.5001289948696004e-05, + "loss": 0.7008, + "step": 16163 + }, + { + "epoch": 2.58, + "learning_rate": 2.4998710051304e-05, + "loss": 0.6934, + "step": 16164 + }, + { + "epoch": 2.58, + "learning_rate": 2.4996130153925732e-05, + "loss": 0.7141, + "step": 16165 + }, + { + "epoch": 2.58, + "learning_rate": 2.4993550256588666e-05, + "loss": 0.6823, + "step": 16166 + }, + { + "epoch": 2.58, + "learning_rate": 2.4990970359320297e-05, + "loss": 0.7206, + "step": 16167 + }, + { + "epoch": 2.58, + "learning_rate": 2.4988390462148083e-05, + "loss": 0.6957, + "step": 16168 + }, + { + "epoch": 2.58, + "learning_rate": 2.4985810565099502e-05, + "loss": 0.7679, + "step": 16169 + }, + { + "epoch": 2.58, + "learning_rate": 2.4983230668202032e-05, + "loss": 0.7052, + "step": 16170 + }, + { + "epoch": 2.58, + "learning_rate": 2.4980650771483147e-05, + "loss": 0.7144, + "step": 16171 + }, + { + "epoch": 2.58, + "learning_rate": 2.4978070874970315e-05, + "loss": 0.7499, + "step": 16172 + }, + { + "epoch": 2.58, + "learning_rate": 2.497549097869102e-05, + "loss": 0.7281, + "step": 16173 + }, + { + "epoch": 2.58, + "learning_rate": 2.4972911082672733e-05, + "loss": 0.7573, + "step": 16174 + }, + { + "epoch": 2.58, + "learning_rate": 2.4970331186942925e-05, + "loss": 0.7166, + "step": 16175 + }, + { + "epoch": 2.58, + "learning_rate": 2.4967751291529067e-05, + "loss": 0.7035, + "step": 16176 + }, + { + "epoch": 2.58, + "learning_rate": 2.496517139645864e-05, + "loss": 0.7533, + "step": 16177 + }, + { + "epoch": 2.58, + "learning_rate": 2.496259150175912e-05, + "loss": 0.7355, + "step": 16178 + }, + { + "epoch": 2.58, + "learning_rate": 2.4960011607457974e-05, + "loss": 0.7166, + "step": 16179 + }, + { + "epoch": 2.58, + "learning_rate": 2.4957431713582677e-05, + "loss": 0.7527, + "step": 16180 + }, + { + "epoch": 2.58, + "learning_rate": 2.495485182016071e-05, + "loss": 0.7483, + "step": 16181 + }, + { + "epoch": 2.58, + "learning_rate": 2.4952271927219546e-05, + "loss": 0.6051, + "step": 16182 + }, + { + "epoch": 2.58, + "learning_rate": 2.4949692034786655e-05, + "loss": 0.7219, + "step": 16183 + }, + { + "epoch": 2.58, + "learning_rate": 2.494711214288951e-05, + "loss": 0.7061, + "step": 16184 + }, + { + "epoch": 2.58, + "learning_rate": 2.4944532251555598e-05, + "loss": 0.701, + "step": 16185 + }, + { + "epoch": 2.58, + "learning_rate": 2.494195236081237e-05, + "loss": 0.7429, + "step": 16186 + }, + { + "epoch": 2.58, + "learning_rate": 2.4939372470687326e-05, + "loss": 0.7371, + "step": 16187 + }, + { + "epoch": 2.58, + "learning_rate": 2.4936792581207923e-05, + "loss": 0.6676, + "step": 16188 + }, + { + "epoch": 2.58, + "learning_rate": 2.493421269240164e-05, + "loss": 0.7304, + "step": 16189 + }, + { + "epoch": 2.58, + "learning_rate": 2.4931632804295955e-05, + "loss": 0.6909, + "step": 16190 + }, + { + "epoch": 2.58, + "learning_rate": 2.492905291691834e-05, + "loss": 0.7719, + "step": 16191 + }, + { + "epoch": 2.58, + "learning_rate": 2.492647303029626e-05, + "loss": 0.6989, + "step": 16192 + }, + { + "epoch": 2.58, + "learning_rate": 2.4923893144457205e-05, + "loss": 0.682, + "step": 16193 + }, + { + "epoch": 2.58, + "learning_rate": 2.4921313259428648e-05, + "loss": 0.7272, + "step": 16194 + }, + { + "epoch": 2.58, + "learning_rate": 2.491873337523805e-05, + "loss": 0.6978, + "step": 16195 + }, + { + "epoch": 2.58, + "learning_rate": 2.49161534919129e-05, + "loss": 0.7766, + "step": 16196 + }, + { + "epoch": 2.58, + "learning_rate": 2.4913573609480647e-05, + "loss": 0.671, + "step": 16197 + }, + { + "epoch": 2.58, + "learning_rate": 2.4910993727968792e-05, + "loss": 0.6967, + "step": 16198 + }, + { + "epoch": 2.58, + "learning_rate": 2.4908413847404804e-05, + "loss": 0.7319, + "step": 16199 + }, + { + "epoch": 2.58, + "learning_rate": 2.4905833967816144e-05, + "loss": 0.6835, + "step": 16200 + }, + { + "epoch": 2.58, + "learning_rate": 2.4903254089230303e-05, + "loss": 0.7492, + "step": 16201 + }, + { + "epoch": 2.58, + "learning_rate": 2.4900674211674742e-05, + "loss": 0.7249, + "step": 16202 + }, + { + "epoch": 2.58, + "learning_rate": 2.489809433517694e-05, + "loss": 0.6991, + "step": 16203 + }, + { + "epoch": 2.58, + "learning_rate": 2.4895514459764375e-05, + "loss": 0.6774, + "step": 16204 + }, + { + "epoch": 2.58, + "learning_rate": 2.4892934585464518e-05, + "loss": 0.6852, + "step": 16205 + }, + { + "epoch": 2.58, + "learning_rate": 2.4890354712304838e-05, + "loss": 0.6815, + "step": 16206 + }, + { + "epoch": 2.58, + "learning_rate": 2.4887774840312816e-05, + "loss": 0.6312, + "step": 16207 + }, + { + "epoch": 2.58, + "learning_rate": 2.4885194969515928e-05, + "loss": 0.7581, + "step": 16208 + }, + { + "epoch": 2.58, + "learning_rate": 2.4882615099941633e-05, + "loss": 0.691, + "step": 16209 + }, + { + "epoch": 2.58, + "learning_rate": 2.4880035231617427e-05, + "loss": 0.7687, + "step": 16210 + }, + { + "epoch": 2.58, + "learning_rate": 2.487745536457077e-05, + "loss": 0.7677, + "step": 16211 + }, + { + "epoch": 2.58, + "learning_rate": 2.4874875498829134e-05, + "loss": 0.6929, + "step": 16212 + }, + { + "epoch": 2.58, + "learning_rate": 2.4872295634420002e-05, + "loss": 0.7175, + "step": 16213 + }, + { + "epoch": 2.58, + "learning_rate": 2.4869715771370853e-05, + "loss": 0.74, + "step": 16214 + }, + { + "epoch": 2.58, + "learning_rate": 2.4867135909709144e-05, + "loss": 0.7765, + "step": 16215 + }, + { + "epoch": 2.58, + "learning_rate": 2.4864556049462356e-05, + "loss": 0.7406, + "step": 16216 + }, + { + "epoch": 2.58, + "learning_rate": 2.486197619065796e-05, + "loss": 0.7738, + "step": 16217 + }, + { + "epoch": 2.58, + "learning_rate": 2.485939633332344e-05, + "loss": 0.7486, + "step": 16218 + }, + { + "epoch": 2.58, + "learning_rate": 2.485681647748626e-05, + "loss": 0.7116, + "step": 16219 + }, + { + "epoch": 2.58, + "learning_rate": 2.4854236623173895e-05, + "loss": 0.7416, + "step": 16220 + }, + { + "epoch": 2.58, + "learning_rate": 2.4851656770413827e-05, + "loss": 0.6952, + "step": 16221 + }, + { + "epoch": 2.58, + "learning_rate": 2.4849076919233524e-05, + "loss": 0.7263, + "step": 16222 + }, + { + "epoch": 2.58, + "learning_rate": 2.4846497069660456e-05, + "loss": 0.7511, + "step": 16223 + }, + { + "epoch": 2.58, + "learning_rate": 2.4843917221722103e-05, + "loss": 0.7025, + "step": 16224 + }, + { + "epoch": 2.58, + "learning_rate": 2.484133737544594e-05, + "loss": 0.7612, + "step": 16225 + }, + { + "epoch": 2.58, + "learning_rate": 2.4838757530859433e-05, + "loss": 0.684, + "step": 16226 + }, + { + "epoch": 2.59, + "learning_rate": 2.4836177687990065e-05, + "loss": 0.7022, + "step": 16227 + }, + { + "epoch": 2.59, + "learning_rate": 2.4833597846865303e-05, + "loss": 0.7886, + "step": 16228 + }, + { + "epoch": 2.59, + "learning_rate": 2.483101800751262e-05, + "loss": 0.7144, + "step": 16229 + }, + { + "epoch": 2.59, + "learning_rate": 2.4828438169959494e-05, + "loss": 0.6822, + "step": 16230 + }, + { + "epoch": 2.59, + "learning_rate": 2.48258583342334e-05, + "loss": 0.7611, + "step": 16231 + }, + { + "epoch": 2.59, + "learning_rate": 2.4823278500361806e-05, + "loss": 0.7839, + "step": 16232 + }, + { + "epoch": 2.59, + "learning_rate": 2.4820698668372193e-05, + "loss": 0.7339, + "step": 16233 + }, + { + "epoch": 2.59, + "learning_rate": 2.4818118838292026e-05, + "loss": 0.714, + "step": 16234 + }, + { + "epoch": 2.59, + "learning_rate": 2.4815539010148793e-05, + "loss": 0.8703, + "step": 16235 + }, + { + "epoch": 2.59, + "learning_rate": 2.481295918396995e-05, + "loss": 0.7257, + "step": 16236 + }, + { + "epoch": 2.59, + "learning_rate": 2.4810379359782972e-05, + "loss": 0.663, + "step": 16237 + }, + { + "epoch": 2.59, + "learning_rate": 2.480779953761535e-05, + "loss": 0.7351, + "step": 16238 + }, + { + "epoch": 2.59, + "learning_rate": 2.4805219717494538e-05, + "loss": 0.7529, + "step": 16239 + }, + { + "epoch": 2.59, + "learning_rate": 2.4802639899448016e-05, + "loss": 0.7812, + "step": 16240 + }, + { + "epoch": 2.59, + "learning_rate": 2.4800060083503266e-05, + "loss": 0.7367, + "step": 16241 + }, + { + "epoch": 2.59, + "learning_rate": 2.4797480269687755e-05, + "loss": 0.6947, + "step": 16242 + }, + { + "epoch": 2.59, + "learning_rate": 2.479490045802895e-05, + "loss": 0.7402, + "step": 16243 + }, + { + "epoch": 2.59, + "learning_rate": 2.479232064855433e-05, + "loss": 0.7157, + "step": 16244 + }, + { + "epoch": 2.59, + "learning_rate": 2.4789740841291375e-05, + "loss": 0.7759, + "step": 16245 + }, + { + "epoch": 2.59, + "learning_rate": 2.4787161036267547e-05, + "loss": 0.6855, + "step": 16246 + }, + { + "epoch": 2.59, + "learning_rate": 2.478458123351033e-05, + "loss": 0.6335, + "step": 16247 + }, + { + "epoch": 2.59, + "learning_rate": 2.478200143304719e-05, + "loss": 0.7049, + "step": 16248 + }, + { + "epoch": 2.59, + "learning_rate": 2.47794216349056e-05, + "loss": 0.699, + "step": 16249 + }, + { + "epoch": 2.59, + "learning_rate": 2.4776841839113042e-05, + "loss": 0.7473, + "step": 16250 + }, + { + "epoch": 2.59, + "learning_rate": 2.4774262045696977e-05, + "loss": 0.7629, + "step": 16251 + }, + { + "epoch": 2.59, + "learning_rate": 2.4771682254684883e-05, + "loss": 0.7126, + "step": 16252 + }, + { + "epoch": 2.59, + "learning_rate": 2.476910246610424e-05, + "loss": 0.6597, + "step": 16253 + }, + { + "epoch": 2.59, + "learning_rate": 2.4766522679982513e-05, + "loss": 0.7659, + "step": 16254 + }, + { + "epoch": 2.59, + "learning_rate": 2.4763942896347185e-05, + "loss": 0.6707, + "step": 16255 + }, + { + "epoch": 2.59, + "learning_rate": 2.4761363115225716e-05, + "loss": 0.7405, + "step": 16256 + }, + { + "epoch": 2.59, + "learning_rate": 2.475878333664558e-05, + "loss": 0.7098, + "step": 16257 + }, + { + "epoch": 2.59, + "learning_rate": 2.4756203560634262e-05, + "loss": 0.7648, + "step": 16258 + }, + { + "epoch": 2.59, + "learning_rate": 2.4753623787219224e-05, + "loss": 0.6917, + "step": 16259 + }, + { + "epoch": 2.59, + "learning_rate": 2.475104401642794e-05, + "loss": 0.6685, + "step": 16260 + }, + { + "epoch": 2.59, + "learning_rate": 2.474846424828789e-05, + "loss": 0.6771, + "step": 16261 + }, + { + "epoch": 2.59, + "learning_rate": 2.4745884482826547e-05, + "loss": 0.741, + "step": 16262 + }, + { + "epoch": 2.59, + "learning_rate": 2.474330472007137e-05, + "loss": 0.7157, + "step": 16263 + }, + { + "epoch": 2.59, + "learning_rate": 2.474072496004985e-05, + "loss": 0.7238, + "step": 16264 + }, + { + "epoch": 2.59, + "learning_rate": 2.473814520278945e-05, + "loss": 0.7201, + "step": 16265 + }, + { + "epoch": 2.59, + "learning_rate": 2.473556544831764e-05, + "loss": 0.6979, + "step": 16266 + }, + { + "epoch": 2.59, + "learning_rate": 2.4732985696661903e-05, + "loss": 0.7188, + "step": 16267 + }, + { + "epoch": 2.59, + "learning_rate": 2.473040594784971e-05, + "loss": 0.6763, + "step": 16268 + }, + { + "epoch": 2.59, + "learning_rate": 2.4727826201908516e-05, + "loss": 0.6541, + "step": 16269 + }, + { + "epoch": 2.59, + "learning_rate": 2.472524645886582e-05, + "loss": 0.7901, + "step": 16270 + }, + { + "epoch": 2.59, + "learning_rate": 2.472266671874908e-05, + "loss": 0.6986, + "step": 16271 + }, + { + "epoch": 2.59, + "learning_rate": 2.4720086981585773e-05, + "loss": 0.6747, + "step": 16272 + }, + { + "epoch": 2.59, + "learning_rate": 2.471750724740336e-05, + "loss": 0.685, + "step": 16273 + }, + { + "epoch": 2.59, + "learning_rate": 2.4714927516229332e-05, + "loss": 0.6925, + "step": 16274 + }, + { + "epoch": 2.59, + "learning_rate": 2.471234778809116e-05, + "loss": 0.6932, + "step": 16275 + }, + { + "epoch": 2.59, + "learning_rate": 2.47097680630163e-05, + "loss": 0.7185, + "step": 16276 + }, + { + "epoch": 2.59, + "learning_rate": 2.4707188341032232e-05, + "loss": 0.6913, + "step": 16277 + }, + { + "epoch": 2.59, + "learning_rate": 2.470460862216643e-05, + "loss": 0.7304, + "step": 16278 + }, + { + "epoch": 2.59, + "learning_rate": 2.4702028906446374e-05, + "loss": 0.671, + "step": 16279 + }, + { + "epoch": 2.59, + "learning_rate": 2.469944919389952e-05, + "loss": 0.6895, + "step": 16280 + }, + { + "epoch": 2.59, + "learning_rate": 2.4696869484553355e-05, + "loss": 0.7268, + "step": 16281 + }, + { + "epoch": 2.59, + "learning_rate": 2.469428977843535e-05, + "loss": 0.7455, + "step": 16282 + }, + { + "epoch": 2.59, + "learning_rate": 2.4691710075572967e-05, + "loss": 0.697, + "step": 16283 + }, + { + "epoch": 2.59, + "learning_rate": 2.4689130375993683e-05, + "loss": 0.7112, + "step": 16284 + }, + { + "epoch": 2.59, + "learning_rate": 2.4686550679724975e-05, + "loss": 0.7131, + "step": 16285 + }, + { + "epoch": 2.59, + "learning_rate": 2.4683970986794313e-05, + "loss": 0.6758, + "step": 16286 + }, + { + "epoch": 2.59, + "learning_rate": 2.4681391297229163e-05, + "loss": 0.7033, + "step": 16287 + }, + { + "epoch": 2.59, + "learning_rate": 2.4678811611057007e-05, + "loss": 0.6927, + "step": 16288 + }, + { + "epoch": 2.6, + "learning_rate": 2.4676231928305312e-05, + "loss": 0.6956, + "step": 16289 + }, + { + "epoch": 2.6, + "learning_rate": 2.4673652249001546e-05, + "loss": 0.7931, + "step": 16290 + }, + { + "epoch": 2.6, + "learning_rate": 2.4671072573173194e-05, + "loss": 0.6665, + "step": 16291 + }, + { + "epoch": 2.6, + "learning_rate": 2.4668492900847716e-05, + "loss": 0.7136, + "step": 16292 + }, + { + "epoch": 2.6, + "learning_rate": 2.466591323205258e-05, + "loss": 0.7157, + "step": 16293 + }, + { + "epoch": 2.6, + "learning_rate": 2.4663333566815273e-05, + "loss": 0.6678, + "step": 16294 + }, + { + "epoch": 2.6, + "learning_rate": 2.4660753905163264e-05, + "loss": 0.6714, + "step": 16295 + }, + { + "epoch": 2.6, + "learning_rate": 2.4658174247124017e-05, + "loss": 0.6954, + "step": 16296 + }, + { + "epoch": 2.6, + "learning_rate": 2.4655594592725005e-05, + "loss": 0.6642, + "step": 16297 + }, + { + "epoch": 2.6, + "learning_rate": 2.4653014941993693e-05, + "loss": 0.7839, + "step": 16298 + }, + { + "epoch": 2.6, + "learning_rate": 2.465043529495757e-05, + "loss": 0.6742, + "step": 16299 + }, + { + "epoch": 2.6, + "learning_rate": 2.46478556516441e-05, + "loss": 0.6474, + "step": 16300 + }, + { + "epoch": 2.6, + "learning_rate": 2.4645276012080743e-05, + "loss": 0.8088, + "step": 16301 + }, + { + "epoch": 2.6, + "learning_rate": 2.464269637629499e-05, + "loss": 0.7172, + "step": 16302 + }, + { + "epoch": 2.6, + "learning_rate": 2.4640116744314304e-05, + "loss": 0.7083, + "step": 16303 + }, + { + "epoch": 2.6, + "learning_rate": 2.463753711616615e-05, + "loss": 0.7393, + "step": 16304 + }, + { + "epoch": 2.6, + "learning_rate": 2.4634957491878012e-05, + "loss": 0.7144, + "step": 16305 + }, + { + "epoch": 2.6, + "learning_rate": 2.463237787147735e-05, + "loss": 0.6619, + "step": 16306 + }, + { + "epoch": 2.6, + "learning_rate": 2.4629798254991643e-05, + "loss": 0.6311, + "step": 16307 + }, + { + "epoch": 2.6, + "learning_rate": 2.462721864244836e-05, + "loss": 0.6963, + "step": 16308 + }, + { + "epoch": 2.6, + "learning_rate": 2.4624639033874973e-05, + "loss": 0.6643, + "step": 16309 + }, + { + "epoch": 2.6, + "learning_rate": 2.4622059429298948e-05, + "loss": 0.731, + "step": 16310 + }, + { + "epoch": 2.6, + "learning_rate": 2.461947982874777e-05, + "loss": 0.7242, + "step": 16311 + }, + { + "epoch": 2.6, + "learning_rate": 2.4616900232248894e-05, + "loss": 0.8155, + "step": 16312 + }, + { + "epoch": 2.6, + "learning_rate": 2.4614320639829797e-05, + "loss": 0.7174, + "step": 16313 + }, + { + "epoch": 2.6, + "learning_rate": 2.4611741051517952e-05, + "loss": 0.6938, + "step": 16314 + }, + { + "epoch": 2.6, + "learning_rate": 2.460916146734084e-05, + "loss": 0.667, + "step": 16315 + }, + { + "epoch": 2.6, + "learning_rate": 2.4606581887325912e-05, + "loss": 0.7542, + "step": 16316 + }, + { + "epoch": 2.6, + "learning_rate": 2.4604002311500648e-05, + "loss": 0.7078, + "step": 16317 + }, + { + "epoch": 2.6, + "learning_rate": 2.4601422739892514e-05, + "loss": 0.9176, + "step": 16318 + }, + { + "epoch": 2.6, + "learning_rate": 2.4598843172528993e-05, + "loss": 0.6905, + "step": 16319 + }, + { + "epoch": 2.6, + "learning_rate": 2.459626360943755e-05, + "loss": 0.729, + "step": 16320 + }, + { + "epoch": 2.6, + "learning_rate": 2.4593684050645642e-05, + "loss": 0.7119, + "step": 16321 + }, + { + "epoch": 2.6, + "learning_rate": 2.459110449618076e-05, + "loss": 0.6935, + "step": 16322 + }, + { + "epoch": 2.6, + "learning_rate": 2.458852494607037e-05, + "loss": 0.615, + "step": 16323 + }, + { + "epoch": 2.6, + "learning_rate": 2.4585945400341934e-05, + "loss": 0.6685, + "step": 16324 + }, + { + "epoch": 2.6, + "learning_rate": 2.4583365859022932e-05, + "loss": 0.6532, + "step": 16325 + }, + { + "epoch": 2.6, + "learning_rate": 2.458078632214083e-05, + "loss": 0.6856, + "step": 16326 + }, + { + "epoch": 2.6, + "learning_rate": 2.4578206789723095e-05, + "loss": 0.7664, + "step": 16327 + }, + { + "epoch": 2.6, + "learning_rate": 2.4575627261797206e-05, + "loss": 0.7209, + "step": 16328 + }, + { + "epoch": 2.6, + "learning_rate": 2.457304773839063e-05, + "loss": 0.693, + "step": 16329 + }, + { + "epoch": 2.6, + "learning_rate": 2.4570468219530832e-05, + "loss": 0.6906, + "step": 16330 + }, + { + "epoch": 2.6, + "learning_rate": 2.4567888705245293e-05, + "loss": 0.6545, + "step": 16331 + }, + { + "epoch": 2.6, + "learning_rate": 2.4565309195561474e-05, + "loss": 0.715, + "step": 16332 + }, + { + "epoch": 2.6, + "learning_rate": 2.4562729690506843e-05, + "loss": 0.6839, + "step": 16333 + }, + { + "epoch": 2.6, + "learning_rate": 2.456015019010888e-05, + "loss": 0.6438, + "step": 16334 + }, + { + "epoch": 2.6, + "learning_rate": 2.455757069439505e-05, + "loss": 0.6794, + "step": 16335 + }, + { + "epoch": 2.6, + "learning_rate": 2.455499120339283e-05, + "loss": 0.8149, + "step": 16336 + }, + { + "epoch": 2.6, + "learning_rate": 2.4552411717129683e-05, + "loss": 0.779, + "step": 16337 + }, + { + "epoch": 2.6, + "learning_rate": 2.4549832235633067e-05, + "loss": 0.7448, + "step": 16338 + }, + { + "epoch": 2.6, + "learning_rate": 2.4547252758930475e-05, + "loss": 0.7444, + "step": 16339 + }, + { + "epoch": 2.6, + "learning_rate": 2.4544673287049364e-05, + "loss": 0.7215, + "step": 16340 + }, + { + "epoch": 2.6, + "learning_rate": 2.45420938200172e-05, + "loss": 0.7633, + "step": 16341 + }, + { + "epoch": 2.6, + "learning_rate": 2.4539514357861465e-05, + "loss": 0.7228, + "step": 16342 + }, + { + "epoch": 2.6, + "learning_rate": 2.4536934900609623e-05, + "loss": 0.6959, + "step": 16343 + }, + { + "epoch": 2.6, + "learning_rate": 2.453435544828914e-05, + "loss": 0.7876, + "step": 16344 + }, + { + "epoch": 2.6, + "learning_rate": 2.4531776000927486e-05, + "loss": 0.7473, + "step": 16345 + }, + { + "epoch": 2.6, + "learning_rate": 2.4529196558552138e-05, + "loss": 0.7205, + "step": 16346 + }, + { + "epoch": 2.6, + "learning_rate": 2.452661712119056e-05, + "loss": 0.701, + "step": 16347 + }, + { + "epoch": 2.6, + "learning_rate": 2.452403768887022e-05, + "loss": 0.6305, + "step": 16348 + }, + { + "epoch": 2.6, + "learning_rate": 2.4521458261618595e-05, + "loss": 0.6904, + "step": 16349 + }, + { + "epoch": 2.6, + "learning_rate": 2.4518878839463142e-05, + "loss": 0.6676, + "step": 16350 + }, + { + "epoch": 2.6, + "learning_rate": 2.4516299422431345e-05, + "loss": 0.6903, + "step": 16351 + }, + { + "epoch": 2.61, + "learning_rate": 2.4513720010550663e-05, + "loss": 0.7402, + "step": 16352 + }, + { + "epoch": 2.61, + "learning_rate": 2.451114060384856e-05, + "loss": 0.6808, + "step": 16353 + }, + { + "epoch": 2.61, + "learning_rate": 2.450856120235252e-05, + "loss": 0.6816, + "step": 16354 + }, + { + "epoch": 2.61, + "learning_rate": 2.4505981806090007e-05, + "loss": 0.7008, + "step": 16355 + }, + { + "epoch": 2.61, + "learning_rate": 2.4503402415088488e-05, + "loss": 0.7356, + "step": 16356 + }, + { + "epoch": 2.61, + "learning_rate": 2.4500823029375432e-05, + "loss": 0.6463, + "step": 16357 + }, + { + "epoch": 2.61, + "learning_rate": 2.44982436489783e-05, + "loss": 0.7104, + "step": 16358 + }, + { + "epoch": 2.61, + "learning_rate": 2.4495664273924575e-05, + "loss": 0.7082, + "step": 16359 + }, + { + "epoch": 2.61, + "learning_rate": 2.4493084904241716e-05, + "loss": 0.7622, + "step": 16360 + }, + { + "epoch": 2.61, + "learning_rate": 2.4490505539957194e-05, + "loss": 0.6839, + "step": 16361 + }, + { + "epoch": 2.61, + "learning_rate": 2.448792618109848e-05, + "loss": 0.6972, + "step": 16362 + }, + { + "epoch": 2.61, + "learning_rate": 2.4485346827693044e-05, + "loss": 0.6836, + "step": 16363 + }, + { + "epoch": 2.61, + "learning_rate": 2.4482767479768347e-05, + "loss": 0.6766, + "step": 16364 + }, + { + "epoch": 2.61, + "learning_rate": 2.4480188137351868e-05, + "loss": 0.711, + "step": 16365 + }, + { + "epoch": 2.61, + "learning_rate": 2.4477608800471068e-05, + "loss": 0.7128, + "step": 16366 + }, + { + "epoch": 2.61, + "learning_rate": 2.447502946915341e-05, + "loss": 0.6903, + "step": 16367 + }, + { + "epoch": 2.61, + "learning_rate": 2.4472450143426378e-05, + "loss": 0.6623, + "step": 16368 + }, + { + "epoch": 2.61, + "learning_rate": 2.446987082331743e-05, + "loss": 0.6321, + "step": 16369 + }, + { + "epoch": 2.61, + "learning_rate": 2.446729150885403e-05, + "loss": 0.6964, + "step": 16370 + }, + { + "epoch": 2.61, + "learning_rate": 2.4464712200063662e-05, + "loss": 0.6658, + "step": 16371 + }, + { + "epoch": 2.61, + "learning_rate": 2.4462132896973778e-05, + "loss": 0.74, + "step": 16372 + }, + { + "epoch": 2.61, + "learning_rate": 2.445955359961185e-05, + "loss": 0.6745, + "step": 16373 + }, + { + "epoch": 2.61, + "learning_rate": 2.4456974308005353e-05, + "loss": 0.6922, + "step": 16374 + }, + { + "epoch": 2.61, + "learning_rate": 2.4454395022181747e-05, + "loss": 0.6936, + "step": 16375 + }, + { + "epoch": 2.61, + "learning_rate": 2.4451815742168513e-05, + "loss": 0.7019, + "step": 16376 + }, + { + "epoch": 2.61, + "learning_rate": 2.44492364679931e-05, + "loss": 0.7608, + "step": 16377 + }, + { + "epoch": 2.61, + "learning_rate": 2.444665719968298e-05, + "loss": 0.6791, + "step": 16378 + }, + { + "epoch": 2.61, + "learning_rate": 2.444407793726563e-05, + "loss": 0.7508, + "step": 16379 + }, + { + "epoch": 2.61, + "learning_rate": 2.4441498680768512e-05, + "loss": 0.7742, + "step": 16380 + }, + { + "epoch": 2.61, + "learning_rate": 2.443891943021909e-05, + "loss": 0.6996, + "step": 16381 + }, + { + "epoch": 2.61, + "learning_rate": 2.443634018564484e-05, + "loss": 0.6878, + "step": 16382 + }, + { + "epoch": 2.61, + "learning_rate": 2.4433760947073224e-05, + "loss": 0.7879, + "step": 16383 + }, + { + "epoch": 2.61, + "learning_rate": 2.4431181714531707e-05, + "loss": 0.7134, + "step": 16384 + }, + { + "epoch": 2.61, + "learning_rate": 2.442860248804776e-05, + "loss": 0.6622, + "step": 16385 + }, + { + "epoch": 2.61, + "learning_rate": 2.4426023267648855e-05, + "loss": 0.6464, + "step": 16386 + }, + { + "epoch": 2.61, + "learning_rate": 2.4423444053362447e-05, + "loss": 0.6294, + "step": 16387 + }, + { + "epoch": 2.61, + "learning_rate": 2.4420864845216014e-05, + "loss": 0.7483, + "step": 16388 + }, + { + "epoch": 2.61, + "learning_rate": 2.441828564323702e-05, + "loss": 0.709, + "step": 16389 + }, + { + "epoch": 2.61, + "learning_rate": 2.441570644745293e-05, + "loss": 0.686, + "step": 16390 + }, + { + "epoch": 2.61, + "learning_rate": 2.4413127257891204e-05, + "loss": 0.7081, + "step": 16391 + }, + { + "epoch": 2.61, + "learning_rate": 2.4410548074579327e-05, + "loss": 0.7181, + "step": 16392 + }, + { + "epoch": 2.61, + "learning_rate": 2.440796889754475e-05, + "loss": 0.6919, + "step": 16393 + }, + { + "epoch": 2.61, + "learning_rate": 2.4405389726814942e-05, + "loss": 0.6805, + "step": 16394 + }, + { + "epoch": 2.61, + "learning_rate": 2.440281056241738e-05, + "loss": 0.7364, + "step": 16395 + }, + { + "epoch": 2.61, + "learning_rate": 2.440023140437952e-05, + "loss": 0.7474, + "step": 16396 + }, + { + "epoch": 2.61, + "learning_rate": 2.4397652252728835e-05, + "loss": 0.7332, + "step": 16397 + }, + { + "epoch": 2.61, + "learning_rate": 2.4395073107492776e-05, + "loss": 0.7082, + "step": 16398 + }, + { + "epoch": 2.61, + "learning_rate": 2.4392493968698833e-05, + "loss": 0.7905, + "step": 16399 + }, + { + "epoch": 2.61, + "learning_rate": 2.4389914836374457e-05, + "loss": 0.6974, + "step": 16400 + }, + { + "epoch": 2.61, + "learning_rate": 2.4387335710547112e-05, + "loss": 0.734, + "step": 16401 + }, + { + "epoch": 2.61, + "learning_rate": 2.4384756591244275e-05, + "loss": 0.6966, + "step": 16402 + }, + { + "epoch": 2.61, + "learning_rate": 2.4382177478493407e-05, + "loss": 0.6821, + "step": 16403 + }, + { + "epoch": 2.61, + "learning_rate": 2.4379598372321972e-05, + "loss": 0.7167, + "step": 16404 + }, + { + "epoch": 2.61, + "learning_rate": 2.4377019272757436e-05, + "loss": 0.6779, + "step": 16405 + }, + { + "epoch": 2.61, + "learning_rate": 2.4374440179827268e-05, + "loss": 0.6947, + "step": 16406 + }, + { + "epoch": 2.61, + "learning_rate": 2.4371861093558936e-05, + "loss": 0.6558, + "step": 16407 + }, + { + "epoch": 2.61, + "learning_rate": 2.4369282013979894e-05, + "loss": 0.7831, + "step": 16408 + }, + { + "epoch": 2.61, + "learning_rate": 2.4366702941117624e-05, + "loss": 0.7305, + "step": 16409 + }, + { + "epoch": 2.61, + "learning_rate": 2.4364123874999582e-05, + "loss": 0.7332, + "step": 16410 + }, + { + "epoch": 2.61, + "learning_rate": 2.4361544815653227e-05, + "loss": 0.7369, + "step": 16411 + }, + { + "epoch": 2.61, + "learning_rate": 2.435896576310604e-05, + "loss": 0.715, + "step": 16412 + }, + { + "epoch": 2.61, + "learning_rate": 2.4356386717385476e-05, + "loss": 0.7014, + "step": 16413 + }, + { + "epoch": 2.61, + "learning_rate": 2.4353807678518998e-05, + "loss": 0.6575, + "step": 16414 + }, + { + "epoch": 2.62, + "learning_rate": 2.4351228646534084e-05, + "loss": 0.7463, + "step": 16415 + }, + { + "epoch": 2.62, + "learning_rate": 2.4348649621458194e-05, + "loss": 0.69, + "step": 16416 + }, + { + "epoch": 2.62, + "learning_rate": 2.4346070603318785e-05, + "loss": 0.7265, + "step": 16417 + }, + { + "epoch": 2.62, + "learning_rate": 2.434349159214333e-05, + "loss": 0.7334, + "step": 16418 + }, + { + "epoch": 2.62, + "learning_rate": 2.434091258795928e-05, + "loss": 0.7381, + "step": 16419 + }, + { + "epoch": 2.62, + "learning_rate": 2.4338333590794118e-05, + "loss": 0.7103, + "step": 16420 + }, + { + "epoch": 2.62, + "learning_rate": 2.4335754600675304e-05, + "loss": 0.6794, + "step": 16421 + }, + { + "epoch": 2.62, + "learning_rate": 2.433317561763029e-05, + "loss": 0.6782, + "step": 16422 + }, + { + "epoch": 2.62, + "learning_rate": 2.4330596641686563e-05, + "loss": 0.6869, + "step": 16423 + }, + { + "epoch": 2.62, + "learning_rate": 2.432801767287157e-05, + "loss": 0.7, + "step": 16424 + }, + { + "epoch": 2.62, + "learning_rate": 2.432543871121278e-05, + "loss": 0.6158, + "step": 16425 + }, + { + "epoch": 2.62, + "learning_rate": 2.4322859756737658e-05, + "loss": 0.7036, + "step": 16426 + }, + { + "epoch": 2.62, + "learning_rate": 2.4320280809473674e-05, + "loss": 0.7106, + "step": 16427 + }, + { + "epoch": 2.62, + "learning_rate": 2.4317701869448277e-05, + "loss": 0.7151, + "step": 16428 + }, + { + "epoch": 2.62, + "learning_rate": 2.431512293668895e-05, + "loss": 0.6948, + "step": 16429 + }, + { + "epoch": 2.62, + "learning_rate": 2.4312544011223144e-05, + "loss": 0.6857, + "step": 16430 + }, + { + "epoch": 2.62, + "learning_rate": 2.4309965093078323e-05, + "loss": 0.7498, + "step": 16431 + }, + { + "epoch": 2.62, + "learning_rate": 2.430738618228196e-05, + "loss": 0.7244, + "step": 16432 + }, + { + "epoch": 2.62, + "learning_rate": 2.4304807278861516e-05, + "loss": 0.6282, + "step": 16433 + }, + { + "epoch": 2.62, + "learning_rate": 2.4302228382844444e-05, + "loss": 0.7576, + "step": 16434 + }, + { + "epoch": 2.62, + "learning_rate": 2.4299649494258227e-05, + "loss": 0.6888, + "step": 16435 + }, + { + "epoch": 2.62, + "learning_rate": 2.429707061313031e-05, + "loss": 0.6956, + "step": 16436 + }, + { + "epoch": 2.62, + "learning_rate": 2.4294491739488174e-05, + "loss": 0.7967, + "step": 16437 + }, + { + "epoch": 2.62, + "learning_rate": 2.429191287335927e-05, + "loss": 0.7156, + "step": 16438 + }, + { + "epoch": 2.62, + "learning_rate": 2.428933401477105e-05, + "loss": 0.7381, + "step": 16439 + }, + { + "epoch": 2.62, + "learning_rate": 2.4286755163751004e-05, + "loss": 0.7, + "step": 16440 + }, + { + "epoch": 2.62, + "learning_rate": 2.4284176320326578e-05, + "loss": 0.6699, + "step": 16441 + }, + { + "epoch": 2.62, + "learning_rate": 2.4281597484525238e-05, + "loss": 0.7388, + "step": 16442 + }, + { + "epoch": 2.62, + "learning_rate": 2.4279018656374452e-05, + "loss": 0.7259, + "step": 16443 + }, + { + "epoch": 2.62, + "learning_rate": 2.427643983590168e-05, + "loss": 0.737, + "step": 16444 + }, + { + "epoch": 2.62, + "learning_rate": 2.4273861023134382e-05, + "loss": 0.6976, + "step": 16445 + }, + { + "epoch": 2.62, + "learning_rate": 2.4271282218100023e-05, + "loss": 0.8297, + "step": 16446 + }, + { + "epoch": 2.62, + "learning_rate": 2.426870342082607e-05, + "loss": 0.7385, + "step": 16447 + }, + { + "epoch": 2.62, + "learning_rate": 2.426612463133997e-05, + "loss": 0.7286, + "step": 16448 + }, + { + "epoch": 2.62, + "learning_rate": 2.426354584966921e-05, + "loss": 0.7301, + "step": 16449 + }, + { + "epoch": 2.62, + "learning_rate": 2.426096707584124e-05, + "loss": 0.7232, + "step": 16450 + }, + { + "epoch": 2.62, + "learning_rate": 2.4258388309883512e-05, + "loss": 0.7784, + "step": 16451 + }, + { + "epoch": 2.62, + "learning_rate": 2.4255809551823504e-05, + "loss": 0.711, + "step": 16452 + }, + { + "epoch": 2.62, + "learning_rate": 2.4253230801688677e-05, + "loss": 0.8023, + "step": 16453 + }, + { + "epoch": 2.62, + "learning_rate": 2.425065205950648e-05, + "loss": 0.7161, + "step": 16454 + }, + { + "epoch": 2.62, + "learning_rate": 2.4248073325304388e-05, + "loss": 0.6724, + "step": 16455 + }, + { + "epoch": 2.62, + "learning_rate": 2.4245494599109857e-05, + "loss": 0.6445, + "step": 16456 + }, + { + "epoch": 2.62, + "learning_rate": 2.4242915880950358e-05, + "loss": 0.6746, + "step": 16457 + }, + { + "epoch": 2.62, + "learning_rate": 2.4240337170853342e-05, + "loss": 0.6802, + "step": 16458 + }, + { + "epoch": 2.62, + "learning_rate": 2.4237758468846267e-05, + "loss": 0.7432, + "step": 16459 + }, + { + "epoch": 2.62, + "learning_rate": 2.4235179774956607e-05, + "loss": 0.6896, + "step": 16460 + }, + { + "epoch": 2.62, + "learning_rate": 2.423260108921182e-05, + "loss": 0.6795, + "step": 16461 + }, + { + "epoch": 2.62, + "learning_rate": 2.4230022411639354e-05, + "loss": 0.7452, + "step": 16462 + }, + { + "epoch": 2.62, + "learning_rate": 2.4227443742266693e-05, + "loss": 0.752, + "step": 16463 + }, + { + "epoch": 2.62, + "learning_rate": 2.4224865081121284e-05, + "loss": 0.6777, + "step": 16464 + }, + { + "epoch": 2.62, + "learning_rate": 2.4222286428230587e-05, + "loss": 0.7668, + "step": 16465 + }, + { + "epoch": 2.62, + "learning_rate": 2.4219707783622072e-05, + "loss": 0.7029, + "step": 16466 + }, + { + "epoch": 2.62, + "learning_rate": 2.42171291473232e-05, + "loss": 0.6903, + "step": 16467 + }, + { + "epoch": 2.62, + "learning_rate": 2.4214550519361416e-05, + "loss": 0.8103, + "step": 16468 + }, + { + "epoch": 2.62, + "learning_rate": 2.42119718997642e-05, + "loss": 0.7043, + "step": 16469 + }, + { + "epoch": 2.62, + "learning_rate": 2.4209393288559005e-05, + "loss": 0.6869, + "step": 16470 + }, + { + "epoch": 2.62, + "learning_rate": 2.4206814685773285e-05, + "loss": 0.7334, + "step": 16471 + }, + { + "epoch": 2.62, + "learning_rate": 2.4204236091434513e-05, + "loss": 0.7095, + "step": 16472 + }, + { + "epoch": 2.62, + "learning_rate": 2.4201657505570142e-05, + "loss": 0.7098, + "step": 16473 + }, + { + "epoch": 2.62, + "learning_rate": 2.419907892820763e-05, + "loss": 0.6743, + "step": 16474 + }, + { + "epoch": 2.62, + "learning_rate": 2.4196500359374447e-05, + "loss": 0.7063, + "step": 16475 + }, + { + "epoch": 2.62, + "learning_rate": 2.4193921799098045e-05, + "loss": 0.6909, + "step": 16476 + }, + { + "epoch": 2.62, + "learning_rate": 2.4191343247405894e-05, + "loss": 0.7643, + "step": 16477 + }, + { + "epoch": 2.63, + "learning_rate": 2.4188764704325442e-05, + "loss": 0.7575, + "step": 16478 + }, + { + "epoch": 2.63, + "learning_rate": 2.4186186169884147e-05, + "loss": 0.7476, + "step": 16479 + }, + { + "epoch": 2.63, + "learning_rate": 2.4183607644109483e-05, + "loss": 0.7047, + "step": 16480 + }, + { + "epoch": 2.63, + "learning_rate": 2.4181029127028898e-05, + "loss": 0.6579, + "step": 16481 + }, + { + "epoch": 2.63, + "learning_rate": 2.417845061866985e-05, + "loss": 0.6921, + "step": 16482 + }, + { + "epoch": 2.63, + "learning_rate": 2.4175872119059812e-05, + "loss": 0.7597, + "step": 16483 + }, + { + "epoch": 2.63, + "learning_rate": 2.4173293628226236e-05, + "loss": 0.7235, + "step": 16484 + }, + { + "epoch": 2.63, + "learning_rate": 2.4170715146196572e-05, + "loss": 0.7471, + "step": 16485 + }, + { + "epoch": 2.63, + "learning_rate": 2.4168136672998297e-05, + "loss": 0.7275, + "step": 16486 + }, + { + "epoch": 2.63, + "learning_rate": 2.416555820865886e-05, + "loss": 0.7253, + "step": 16487 + }, + { + "epoch": 2.63, + "learning_rate": 2.4162979753205717e-05, + "loss": 0.6906, + "step": 16488 + }, + { + "epoch": 2.63, + "learning_rate": 2.4160401306666336e-05, + "loss": 0.7208, + "step": 16489 + }, + { + "epoch": 2.63, + "learning_rate": 2.4157822869068172e-05, + "loss": 0.7027, + "step": 16490 + }, + { + "epoch": 2.63, + "learning_rate": 2.4155244440438677e-05, + "loss": 0.7362, + "step": 16491 + }, + { + "epoch": 2.63, + "learning_rate": 2.415266602080532e-05, + "loss": 0.7235, + "step": 16492 + }, + { + "epoch": 2.63, + "learning_rate": 2.415008761019556e-05, + "loss": 0.6752, + "step": 16493 + }, + { + "epoch": 2.63, + "learning_rate": 2.414750920863684e-05, + "loss": 0.6865, + "step": 16494 + }, + { + "epoch": 2.63, + "learning_rate": 2.414493081615664e-05, + "loss": 0.6994, + "step": 16495 + }, + { + "epoch": 2.63, + "learning_rate": 2.4142352432782404e-05, + "loss": 0.7311, + "step": 16496 + }, + { + "epoch": 2.63, + "learning_rate": 2.41397740585416e-05, + "loss": 0.7314, + "step": 16497 + }, + { + "epoch": 2.63, + "learning_rate": 2.4137195693461676e-05, + "loss": 0.6773, + "step": 16498 + }, + { + "epoch": 2.63, + "learning_rate": 2.4134617337570088e-05, + "loss": 0.6961, + "step": 16499 + }, + { + "epoch": 2.63, + "learning_rate": 2.4132038990894303e-05, + "loss": 0.6528, + "step": 16500 + }, + { + "epoch": 2.63, + "learning_rate": 2.412946065346178e-05, + "loss": 0.7115, + "step": 16501 + }, + { + "epoch": 2.63, + "learning_rate": 2.4126882325299963e-05, + "loss": 0.7311, + "step": 16502 + }, + { + "epoch": 2.63, + "learning_rate": 2.4124304006436325e-05, + "loss": 0.6524, + "step": 16503 + }, + { + "epoch": 2.63, + "learning_rate": 2.412172569689832e-05, + "loss": 0.6367, + "step": 16504 + }, + { + "epoch": 2.63, + "learning_rate": 2.41191473967134e-05, + "loss": 0.7194, + "step": 16505 + }, + { + "epoch": 2.63, + "learning_rate": 2.4116569105909026e-05, + "loss": 0.6576, + "step": 16506 + }, + { + "epoch": 2.63, + "learning_rate": 2.4113990824512654e-05, + "loss": 0.7275, + "step": 16507 + }, + { + "epoch": 2.63, + "learning_rate": 2.4111412552551745e-05, + "loss": 0.6843, + "step": 16508 + }, + { + "epoch": 2.63, + "learning_rate": 2.4108834290053746e-05, + "loss": 0.8408, + "step": 16509 + }, + { + "epoch": 2.63, + "learning_rate": 2.4106256037046127e-05, + "loss": 0.7485, + "step": 16510 + }, + { + "epoch": 2.63, + "learning_rate": 2.410367779355634e-05, + "loss": 0.7128, + "step": 16511 + }, + { + "epoch": 2.63, + "learning_rate": 2.410109955961183e-05, + "loss": 0.6995, + "step": 16512 + }, + { + "epoch": 2.63, + "learning_rate": 2.4098521335240076e-05, + "loss": 0.6555, + "step": 16513 + }, + { + "epoch": 2.63, + "learning_rate": 2.4095943120468517e-05, + "loss": 0.6847, + "step": 16514 + }, + { + "epoch": 2.63, + "learning_rate": 2.4093364915324613e-05, + "loss": 0.6816, + "step": 16515 + }, + { + "epoch": 2.63, + "learning_rate": 2.4090786719835827e-05, + "loss": 0.6602, + "step": 16516 + }, + { + "epoch": 2.63, + "learning_rate": 2.4088208534029614e-05, + "loss": 0.6414, + "step": 16517 + }, + { + "epoch": 2.63, + "learning_rate": 2.4085630357933424e-05, + "loss": 0.6507, + "step": 16518 + }, + { + "epoch": 2.63, + "learning_rate": 2.4083052191574707e-05, + "loss": 0.7051, + "step": 16519 + }, + { + "epoch": 2.63, + "learning_rate": 2.4080474034980936e-05, + "loss": 0.6971, + "step": 16520 + }, + { + "epoch": 2.63, + "learning_rate": 2.4077895888179556e-05, + "loss": 0.6697, + "step": 16521 + }, + { + "epoch": 2.63, + "learning_rate": 2.4075317751198023e-05, + "loss": 0.7273, + "step": 16522 + }, + { + "epoch": 2.63, + "learning_rate": 2.4072739624063793e-05, + "loss": 0.6199, + "step": 16523 + }, + { + "epoch": 2.63, + "learning_rate": 2.4070161506804324e-05, + "loss": 0.6994, + "step": 16524 + }, + { + "epoch": 2.63, + "learning_rate": 2.4067583399447077e-05, + "loss": 0.722, + "step": 16525 + }, + { + "epoch": 2.63, + "learning_rate": 2.4065005302019492e-05, + "loss": 0.7071, + "step": 16526 + }, + { + "epoch": 2.63, + "learning_rate": 2.4062427214549037e-05, + "loss": 0.7026, + "step": 16527 + }, + { + "epoch": 2.63, + "learning_rate": 2.4059849137063165e-05, + "loss": 0.715, + "step": 16528 + }, + { + "epoch": 2.63, + "learning_rate": 2.405727106958932e-05, + "loss": 0.6747, + "step": 16529 + }, + { + "epoch": 2.63, + "learning_rate": 2.4054693012154973e-05, + "loss": 0.7002, + "step": 16530 + }, + { + "epoch": 2.63, + "learning_rate": 2.4052114964787574e-05, + "loss": 0.6533, + "step": 16531 + }, + { + "epoch": 2.63, + "learning_rate": 2.4049536927514568e-05, + "loss": 0.7248, + "step": 16532 + }, + { + "epoch": 2.63, + "learning_rate": 2.4046958900363423e-05, + "loss": 0.6633, + "step": 16533 + }, + { + "epoch": 2.63, + "learning_rate": 2.4044380883361586e-05, + "loss": 0.6948, + "step": 16534 + }, + { + "epoch": 2.63, + "learning_rate": 2.4041802876536507e-05, + "loss": 0.702, + "step": 16535 + }, + { + "epoch": 2.63, + "learning_rate": 2.403922487991565e-05, + "loss": 0.7546, + "step": 16536 + }, + { + "epoch": 2.63, + "learning_rate": 2.4036646893526473e-05, + "loss": 0.7313, + "step": 16537 + }, + { + "epoch": 2.63, + "learning_rate": 2.4034068917396415e-05, + "loss": 0.6313, + "step": 16538 + }, + { + "epoch": 2.63, + "learning_rate": 2.4031490951552938e-05, + "loss": 0.6933, + "step": 16539 + }, + { + "epoch": 2.64, + "learning_rate": 2.4028912996023487e-05, + "loss": 0.6486, + "step": 16540 + }, + { + "epoch": 2.64, + "learning_rate": 2.4026335050835526e-05, + "loss": 0.6904, + "step": 16541 + }, + { + "epoch": 2.64, + "learning_rate": 2.402375711601651e-05, + "loss": 0.731, + "step": 16542 + }, + { + "epoch": 2.64, + "learning_rate": 2.402117919159388e-05, + "loss": 0.748, + "step": 16543 + }, + { + "epoch": 2.64, + "learning_rate": 2.4018601277595103e-05, + "loss": 0.6991, + "step": 16544 + }, + { + "epoch": 2.64, + "learning_rate": 2.4016023374047627e-05, + "loss": 0.7841, + "step": 16545 + }, + { + "epoch": 2.64, + "learning_rate": 2.4013445480978903e-05, + "loss": 0.7228, + "step": 16546 + }, + { + "epoch": 2.64, + "learning_rate": 2.4010867598416387e-05, + "loss": 0.7633, + "step": 16547 + }, + { + "epoch": 2.64, + "learning_rate": 2.400828972638753e-05, + "loss": 0.6478, + "step": 16548 + }, + { + "epoch": 2.64, + "learning_rate": 2.400571186491978e-05, + "loss": 0.663, + "step": 16549 + }, + { + "epoch": 2.64, + "learning_rate": 2.40031340140406e-05, + "loss": 0.6589, + "step": 16550 + }, + { + "epoch": 2.64, + "learning_rate": 2.400055617377744e-05, + "loss": 0.62, + "step": 16551 + }, + { + "epoch": 2.64, + "learning_rate": 2.3997978344157746e-05, + "loss": 0.7266, + "step": 16552 + }, + { + "epoch": 2.64, + "learning_rate": 2.3995400525208977e-05, + "loss": 0.7361, + "step": 16553 + }, + { + "epoch": 2.64, + "learning_rate": 2.3992822716958582e-05, + "loss": 0.7026, + "step": 16554 + }, + { + "epoch": 2.64, + "learning_rate": 2.399024491943401e-05, + "loss": 0.7189, + "step": 16555 + }, + { + "epoch": 2.64, + "learning_rate": 2.398766713266272e-05, + "loss": 0.6772, + "step": 16556 + }, + { + "epoch": 2.64, + "learning_rate": 2.3985089356672162e-05, + "loss": 0.6313, + "step": 16557 + }, + { + "epoch": 2.64, + "learning_rate": 2.3982511591489788e-05, + "loss": 0.6688, + "step": 16558 + }, + { + "epoch": 2.64, + "learning_rate": 2.397993383714305e-05, + "loss": 0.717, + "step": 16559 + }, + { + "epoch": 2.64, + "learning_rate": 2.3977356093659382e-05, + "loss": 0.7539, + "step": 16560 + }, + { + "epoch": 2.64, + "learning_rate": 2.3974778361066262e-05, + "loss": 0.6591, + "step": 16561 + }, + { + "epoch": 2.64, + "learning_rate": 2.3972200639391132e-05, + "loss": 0.639, + "step": 16562 + }, + { + "epoch": 2.64, + "learning_rate": 2.3969622928661433e-05, + "loss": 0.7456, + "step": 16563 + }, + { + "epoch": 2.64, + "learning_rate": 2.396704522890463e-05, + "loss": 0.681, + "step": 16564 + }, + { + "epoch": 2.64, + "learning_rate": 2.396446754014817e-05, + "loss": 0.7252, + "step": 16565 + }, + { + "epoch": 2.64, + "learning_rate": 2.3961889862419492e-05, + "loss": 0.6927, + "step": 16566 + }, + { + "epoch": 2.64, + "learning_rate": 2.3959312195746067e-05, + "loss": 0.6466, + "step": 16567 + }, + { + "epoch": 2.64, + "learning_rate": 2.3956734540155333e-05, + "loss": 0.6875, + "step": 16568 + }, + { + "epoch": 2.64, + "learning_rate": 2.3954156895674737e-05, + "loss": 0.6894, + "step": 16569 + }, + { + "epoch": 2.64, + "learning_rate": 2.395157926233174e-05, + "loss": 0.7241, + "step": 16570 + }, + { + "epoch": 2.64, + "learning_rate": 2.394900164015379e-05, + "loss": 0.6694, + "step": 16571 + }, + { + "epoch": 2.64, + "learning_rate": 2.3946424029168326e-05, + "loss": 0.6832, + "step": 16572 + }, + { + "epoch": 2.64, + "learning_rate": 2.3943846429402812e-05, + "loss": 0.6886, + "step": 16573 + }, + { + "epoch": 2.64, + "learning_rate": 2.3941268840884694e-05, + "loss": 0.7185, + "step": 16574 + }, + { + "epoch": 2.64, + "learning_rate": 2.3938691263641418e-05, + "loss": 0.736, + "step": 16575 + }, + { + "epoch": 2.64, + "learning_rate": 2.3936113697700434e-05, + "loss": 0.7522, + "step": 16576 + }, + { + "epoch": 2.64, + "learning_rate": 2.3933536143089197e-05, + "loss": 0.6979, + "step": 16577 + }, + { + "epoch": 2.64, + "learning_rate": 2.3930958599835157e-05, + "loss": 0.7157, + "step": 16578 + }, + { + "epoch": 2.64, + "learning_rate": 2.3928381067965755e-05, + "loss": 0.6956, + "step": 16579 + }, + { + "epoch": 2.64, + "learning_rate": 2.3925803547508438e-05, + "loss": 0.7632, + "step": 16580 + }, + { + "epoch": 2.64, + "learning_rate": 2.3923226038490664e-05, + "loss": 0.6587, + "step": 16581 + }, + { + "epoch": 2.64, + "learning_rate": 2.3920648540939883e-05, + "loss": 0.6843, + "step": 16582 + }, + { + "epoch": 2.64, + "learning_rate": 2.3918071054883535e-05, + "loss": 0.7764, + "step": 16583 + }, + { + "epoch": 2.64, + "learning_rate": 2.3915493580349075e-05, + "loss": 0.8368, + "step": 16584 + }, + { + "epoch": 2.64, + "learning_rate": 2.391291611736395e-05, + "loss": 0.7199, + "step": 16585 + }, + { + "epoch": 2.64, + "learning_rate": 2.3910338665955603e-05, + "loss": 0.7151, + "step": 16586 + }, + { + "epoch": 2.64, + "learning_rate": 2.3907761226151497e-05, + "loss": 0.7415, + "step": 16587 + }, + { + "epoch": 2.64, + "learning_rate": 2.3905183797979067e-05, + "loss": 0.685, + "step": 16588 + }, + { + "epoch": 2.64, + "learning_rate": 2.390260638146576e-05, + "loss": 0.8183, + "step": 16589 + }, + { + "epoch": 2.64, + "learning_rate": 2.3900028976639035e-05, + "loss": 0.7015, + "step": 16590 + }, + { + "epoch": 2.64, + "learning_rate": 2.3897451583526332e-05, + "loss": 0.7077, + "step": 16591 + }, + { + "epoch": 2.64, + "learning_rate": 2.38948742021551e-05, + "loss": 0.7448, + "step": 16592 + }, + { + "epoch": 2.64, + "learning_rate": 2.3892296832552784e-05, + "loss": 0.6638, + "step": 16593 + }, + { + "epoch": 2.64, + "learning_rate": 2.388971947474684e-05, + "loss": 0.6622, + "step": 16594 + }, + { + "epoch": 2.64, + "learning_rate": 2.3887142128764704e-05, + "loss": 0.6611, + "step": 16595 + }, + { + "epoch": 2.64, + "learning_rate": 2.3884564794633832e-05, + "loss": 0.701, + "step": 16596 + }, + { + "epoch": 2.64, + "learning_rate": 2.388198747238167e-05, + "loss": 0.8386, + "step": 16597 + }, + { + "epoch": 2.64, + "learning_rate": 2.3879410162035666e-05, + "loss": 0.7506, + "step": 16598 + }, + { + "epoch": 2.64, + "learning_rate": 2.387683286362326e-05, + "loss": 0.7813, + "step": 16599 + }, + { + "epoch": 2.64, + "learning_rate": 2.3874255577171896e-05, + "loss": 0.7068, + "step": 16600 + }, + { + "epoch": 2.64, + "learning_rate": 2.387167830270903e-05, + "loss": 0.7098, + "step": 16601 + }, + { + "epoch": 2.64, + "learning_rate": 2.3869101040262106e-05, + "loss": 0.6823, + "step": 16602 + }, + { + "epoch": 2.65, + "learning_rate": 2.3866523789858564e-05, + "loss": 0.7707, + "step": 16603 + }, + { + "epoch": 2.65, + "learning_rate": 2.3863946551525862e-05, + "loss": 0.7645, + "step": 16604 + }, + { + "epoch": 2.65, + "learning_rate": 2.386136932529144e-05, + "loss": 0.7677, + "step": 16605 + }, + { + "epoch": 2.65, + "learning_rate": 2.3858792111182735e-05, + "loss": 0.7433, + "step": 16606 + }, + { + "epoch": 2.65, + "learning_rate": 2.385621490922721e-05, + "loss": 0.7371, + "step": 16607 + }, + { + "epoch": 2.65, + "learning_rate": 2.38536377194523e-05, + "loss": 0.6992, + "step": 16608 + }, + { + "epoch": 2.65, + "learning_rate": 2.385106054188545e-05, + "loss": 0.7031, + "step": 16609 + }, + { + "epoch": 2.65, + "learning_rate": 2.3848483376554108e-05, + "loss": 0.6907, + "step": 16610 + }, + { + "epoch": 2.65, + "learning_rate": 2.384590622348572e-05, + "loss": 0.6813, + "step": 16611 + }, + { + "epoch": 2.65, + "learning_rate": 2.3843329082707728e-05, + "loss": 0.6678, + "step": 16612 + }, + { + "epoch": 2.65, + "learning_rate": 2.3840751954247582e-05, + "loss": 0.6838, + "step": 16613 + }, + { + "epoch": 2.65, + "learning_rate": 2.3838174838132722e-05, + "loss": 0.6634, + "step": 16614 + }, + { + "epoch": 2.65, + "learning_rate": 2.3835597734390597e-05, + "loss": 0.7299, + "step": 16615 + }, + { + "epoch": 2.65, + "learning_rate": 2.383302064304864e-05, + "loss": 0.6389, + "step": 16616 + }, + { + "epoch": 2.65, + "learning_rate": 2.3830443564134316e-05, + "loss": 0.6849, + "step": 16617 + }, + { + "epoch": 2.65, + "learning_rate": 2.3827866497675065e-05, + "loss": 0.7895, + "step": 16618 + }, + { + "epoch": 2.65, + "learning_rate": 2.382528944369831e-05, + "loss": 0.6709, + "step": 16619 + }, + { + "epoch": 2.65, + "learning_rate": 2.3822712402231512e-05, + "loss": 0.7681, + "step": 16620 + }, + { + "epoch": 2.65, + "learning_rate": 2.3820135373302112e-05, + "loss": 0.7452, + "step": 16621 + }, + { + "epoch": 2.65, + "learning_rate": 2.381755835693756e-05, + "loss": 0.7243, + "step": 16622 + }, + { + "epoch": 2.65, + "learning_rate": 2.3814981353165284e-05, + "loss": 0.7384, + "step": 16623 + }, + { + "epoch": 2.65, + "learning_rate": 2.381240436201274e-05, + "loss": 0.734, + "step": 16624 + }, + { + "epoch": 2.65, + "learning_rate": 2.380982738350737e-05, + "loss": 0.6816, + "step": 16625 + }, + { + "epoch": 2.65, + "learning_rate": 2.380725041767662e-05, + "loss": 0.7362, + "step": 16626 + }, + { + "epoch": 2.65, + "learning_rate": 2.380467346454792e-05, + "loss": 0.7149, + "step": 16627 + }, + { + "epoch": 2.65, + "learning_rate": 2.3802096524148727e-05, + "loss": 0.7395, + "step": 16628 + }, + { + "epoch": 2.65, + "learning_rate": 2.379951959650648e-05, + "loss": 0.7798, + "step": 16629 + }, + { + "epoch": 2.65, + "learning_rate": 2.3796942681648616e-05, + "loss": 0.7046, + "step": 16630 + }, + { + "epoch": 2.65, + "learning_rate": 2.3794365779602587e-05, + "loss": 0.6965, + "step": 16631 + }, + { + "epoch": 2.65, + "learning_rate": 2.379178889039583e-05, + "loss": 0.7353, + "step": 16632 + }, + { + "epoch": 2.65, + "learning_rate": 2.3789212014055782e-05, + "loss": 0.7179, + "step": 16633 + }, + { + "epoch": 2.65, + "learning_rate": 2.3786635150609894e-05, + "loss": 0.7587, + "step": 16634 + }, + { + "epoch": 2.65, + "learning_rate": 2.378405830008561e-05, + "loss": 0.7478, + "step": 16635 + }, + { + "epoch": 2.65, + "learning_rate": 2.378148146251036e-05, + "loss": 0.7381, + "step": 16636 + }, + { + "epoch": 2.65, + "learning_rate": 2.3778904637911596e-05, + "loss": 0.6712, + "step": 16637 + }, + { + "epoch": 2.65, + "learning_rate": 2.3776327826316763e-05, + "loss": 0.6942, + "step": 16638 + }, + { + "epoch": 2.65, + "learning_rate": 2.377375102775329e-05, + "loss": 0.73, + "step": 16639 + }, + { + "epoch": 2.65, + "learning_rate": 2.3771174242248627e-05, + "loss": 0.6863, + "step": 16640 + }, + { + "epoch": 2.65, + "learning_rate": 2.3768597469830202e-05, + "loss": 0.6556, + "step": 16641 + }, + { + "epoch": 2.65, + "learning_rate": 2.3766020710525474e-05, + "loss": 0.7301, + "step": 16642 + }, + { + "epoch": 2.65, + "learning_rate": 2.3763443964361875e-05, + "loss": 0.734, + "step": 16643 + }, + { + "epoch": 2.65, + "learning_rate": 2.3760867231366844e-05, + "loss": 0.6542, + "step": 16644 + }, + { + "epoch": 2.65, + "learning_rate": 2.375829051156783e-05, + "loss": 0.7451, + "step": 16645 + }, + { + "epoch": 2.65, + "learning_rate": 2.3755713804992265e-05, + "loss": 0.6521, + "step": 16646 + }, + { + "epoch": 2.65, + "learning_rate": 2.3753137111667588e-05, + "loss": 0.6508, + "step": 16647 + }, + { + "epoch": 2.65, + "learning_rate": 2.375056043162125e-05, + "loss": 0.7704, + "step": 16648 + }, + { + "epoch": 2.65, + "learning_rate": 2.374798376488069e-05, + "loss": 0.749, + "step": 16649 + }, + { + "epoch": 2.65, + "learning_rate": 2.374540711147333e-05, + "loss": 0.7025, + "step": 16650 + }, + { + "epoch": 2.65, + "learning_rate": 2.374283047142663e-05, + "loss": 0.7359, + "step": 16651 + }, + { + "epoch": 2.65, + "learning_rate": 2.3740253844768024e-05, + "loss": 0.7073, + "step": 16652 + }, + { + "epoch": 2.65, + "learning_rate": 2.3737677231524945e-05, + "loss": 0.6529, + "step": 16653 + }, + { + "epoch": 2.65, + "learning_rate": 2.373510063172484e-05, + "loss": 0.68, + "step": 16654 + }, + { + "epoch": 2.65, + "learning_rate": 2.3732524045395144e-05, + "loss": 0.71, + "step": 16655 + }, + { + "epoch": 2.65, + "learning_rate": 2.3729947472563293e-05, + "loss": 0.647, + "step": 16656 + }, + { + "epoch": 2.65, + "learning_rate": 2.3727370913256738e-05, + "loss": 0.6829, + "step": 16657 + }, + { + "epoch": 2.65, + "learning_rate": 2.372479436750291e-05, + "loss": 0.6742, + "step": 16658 + }, + { + "epoch": 2.65, + "learning_rate": 2.3722217835329254e-05, + "loss": 0.7708, + "step": 16659 + }, + { + "epoch": 2.65, + "learning_rate": 2.371964131676319e-05, + "loss": 0.6677, + "step": 16660 + }, + { + "epoch": 2.65, + "learning_rate": 2.371706481183217e-05, + "loss": 0.7135, + "step": 16661 + }, + { + "epoch": 2.65, + "learning_rate": 2.3714488320563636e-05, + "loss": 0.7131, + "step": 16662 + }, + { + "epoch": 2.65, + "learning_rate": 2.371191184298502e-05, + "loss": 0.6717, + "step": 16663 + }, + { + "epoch": 2.65, + "learning_rate": 2.3709335379123754e-05, + "loss": 0.6995, + "step": 16664 + }, + { + "epoch": 2.65, + "learning_rate": 2.3706758929007288e-05, + "loss": 0.7572, + "step": 16665 + }, + { + "epoch": 2.66, + "learning_rate": 2.3704182492663056e-05, + "loss": 0.6282, + "step": 16666 + }, + { + "epoch": 2.66, + "learning_rate": 2.370160607011849e-05, + "loss": 0.6925, + "step": 16667 + }, + { + "epoch": 2.66, + "learning_rate": 2.3699029661401032e-05, + "loss": 0.7181, + "step": 16668 + }, + { + "epoch": 2.66, + "learning_rate": 2.3696453266538118e-05, + "loss": 0.7275, + "step": 16669 + }, + { + "epoch": 2.66, + "learning_rate": 2.3693876885557183e-05, + "loss": 0.672, + "step": 16670 + }, + { + "epoch": 2.66, + "learning_rate": 2.369130051848567e-05, + "loss": 0.68, + "step": 16671 + }, + { + "epoch": 2.66, + "learning_rate": 2.3688724165351014e-05, + "loss": 0.7567, + "step": 16672 + }, + { + "epoch": 2.66, + "learning_rate": 2.3686147826180645e-05, + "loss": 0.725, + "step": 16673 + }, + { + "epoch": 2.66, + "learning_rate": 2.3683571501002006e-05, + "loss": 0.6931, + "step": 16674 + }, + { + "epoch": 2.66, + "learning_rate": 2.3680995189842532e-05, + "loss": 0.6846, + "step": 16675 + }, + { + "epoch": 2.66, + "learning_rate": 2.3678418892729652e-05, + "loss": 0.7804, + "step": 16676 + }, + { + "epoch": 2.66, + "learning_rate": 2.367584260969082e-05, + "loss": 0.7105, + "step": 16677 + }, + { + "epoch": 2.66, + "learning_rate": 2.3673266340753453e-05, + "loss": 0.7319, + "step": 16678 + }, + { + "epoch": 2.66, + "learning_rate": 2.3670690085945e-05, + "loss": 0.7266, + "step": 16679 + }, + { + "epoch": 2.66, + "learning_rate": 2.3668113845292888e-05, + "loss": 0.804, + "step": 16680 + }, + { + "epoch": 2.66, + "learning_rate": 2.366553761882455e-05, + "loss": 0.7098, + "step": 16681 + }, + { + "epoch": 2.66, + "learning_rate": 2.366296140656743e-05, + "loss": 0.7494, + "step": 16682 + }, + { + "epoch": 2.66, + "learning_rate": 2.366038520854896e-05, + "loss": 0.6477, + "step": 16683 + }, + { + "epoch": 2.66, + "learning_rate": 2.3657809024796568e-05, + "loss": 0.7113, + "step": 16684 + }, + { + "epoch": 2.66, + "learning_rate": 2.3655232855337703e-05, + "loss": 0.6611, + "step": 16685 + }, + { + "epoch": 2.66, + "learning_rate": 2.3652656700199787e-05, + "loss": 0.6824, + "step": 16686 + }, + { + "epoch": 2.66, + "learning_rate": 2.3650080559410256e-05, + "loss": 0.7434, + "step": 16687 + }, + { + "epoch": 2.66, + "learning_rate": 2.3647504432996556e-05, + "loss": 0.6698, + "step": 16688 + }, + { + "epoch": 2.66, + "learning_rate": 2.3644928320986108e-05, + "loss": 0.7016, + "step": 16689 + }, + { + "epoch": 2.66, + "learning_rate": 2.3642352223406347e-05, + "loss": 0.6812, + "step": 16690 + }, + { + "epoch": 2.66, + "learning_rate": 2.3639776140284717e-05, + "loss": 0.7894, + "step": 16691 + }, + { + "epoch": 2.66, + "learning_rate": 2.3637200071648645e-05, + "loss": 0.7536, + "step": 16692 + }, + { + "epoch": 2.66, + "learning_rate": 2.3634624017525557e-05, + "loss": 0.6991, + "step": 16693 + }, + { + "epoch": 2.66, + "learning_rate": 2.3632047977942904e-05, + "loss": 0.7163, + "step": 16694 + }, + { + "epoch": 2.66, + "learning_rate": 2.3629471952928107e-05, + "loss": 0.7258, + "step": 16695 + }, + { + "epoch": 2.66, + "learning_rate": 2.3626895942508596e-05, + "loss": 0.6582, + "step": 16696 + }, + { + "epoch": 2.66, + "learning_rate": 2.3624319946711814e-05, + "loss": 0.6525, + "step": 16697 + }, + { + "epoch": 2.66, + "learning_rate": 2.362174396556519e-05, + "loss": 0.6851, + "step": 16698 + }, + { + "epoch": 2.66, + "learning_rate": 2.3619167999096163e-05, + "loss": 0.6885, + "step": 16699 + }, + { + "epoch": 2.66, + "learning_rate": 2.3616592047332152e-05, + "loss": 0.7298, + "step": 16700 + }, + { + "epoch": 2.66, + "learning_rate": 2.361401611030059e-05, + "loss": 0.7161, + "step": 16701 + }, + { + "epoch": 2.66, + "learning_rate": 2.3611440188028922e-05, + "loss": 0.7324, + "step": 16702 + }, + { + "epoch": 2.66, + "learning_rate": 2.3608864280544575e-05, + "loss": 0.7559, + "step": 16703 + }, + { + "epoch": 2.66, + "learning_rate": 2.360628838787497e-05, + "loss": 0.6009, + "step": 16704 + }, + { + "epoch": 2.66, + "learning_rate": 2.3603712510047552e-05, + "loss": 0.7163, + "step": 16705 + }, + { + "epoch": 2.66, + "learning_rate": 2.360113664708975e-05, + "loss": 0.6435, + "step": 16706 + }, + { + "epoch": 2.66, + "learning_rate": 2.3598560799028987e-05, + "loss": 0.6602, + "step": 16707 + }, + { + "epoch": 2.66, + "learning_rate": 2.3595984965892705e-05, + "loss": 0.6877, + "step": 16708 + }, + { + "epoch": 2.66, + "learning_rate": 2.3593409147708334e-05, + "loss": 0.7219, + "step": 16709 + }, + { + "epoch": 2.66, + "learning_rate": 2.3590833344503294e-05, + "loss": 0.7471, + "step": 16710 + }, + { + "epoch": 2.66, + "learning_rate": 2.358825755630503e-05, + "loss": 0.7053, + "step": 16711 + }, + { + "epoch": 2.66, + "learning_rate": 2.3585681783140966e-05, + "loss": 0.7139, + "step": 16712 + }, + { + "epoch": 2.66, + "learning_rate": 2.3583106025038522e-05, + "loss": 0.6414, + "step": 16713 + }, + { + "epoch": 2.66, + "learning_rate": 2.358053028202515e-05, + "loss": 0.6518, + "step": 16714 + }, + { + "epoch": 2.66, + "learning_rate": 2.3577954554128267e-05, + "loss": 0.6784, + "step": 16715 + }, + { + "epoch": 2.66, + "learning_rate": 2.3575378841375297e-05, + "loss": 0.7109, + "step": 16716 + }, + { + "epoch": 2.66, + "learning_rate": 2.3572803143793686e-05, + "loss": 0.7448, + "step": 16717 + }, + { + "epoch": 2.66, + "learning_rate": 2.3570227461410855e-05, + "loss": 0.7208, + "step": 16718 + }, + { + "epoch": 2.66, + "learning_rate": 2.356765179425424e-05, + "loss": 0.6694, + "step": 16719 + }, + { + "epoch": 2.66, + "learning_rate": 2.3565076142351256e-05, + "loss": 0.7267, + "step": 16720 + }, + { + "epoch": 2.66, + "learning_rate": 2.3562500505729332e-05, + "loss": 0.682, + "step": 16721 + }, + { + "epoch": 2.66, + "learning_rate": 2.3559924884415913e-05, + "loss": 0.6855, + "step": 16722 + }, + { + "epoch": 2.66, + "learning_rate": 2.3557349278438422e-05, + "loss": 0.6965, + "step": 16723 + }, + { + "epoch": 2.66, + "learning_rate": 2.355477368782428e-05, + "loss": 0.6965, + "step": 16724 + }, + { + "epoch": 2.66, + "learning_rate": 2.355219811260092e-05, + "loss": 0.6597, + "step": 16725 + }, + { + "epoch": 2.66, + "learning_rate": 2.3549622552795774e-05, + "loss": 0.7866, + "step": 16726 + }, + { + "epoch": 2.66, + "learning_rate": 2.3547047008436264e-05, + "loss": 0.8093, + "step": 16727 + }, + { + "epoch": 2.66, + "learning_rate": 2.3544471479549826e-05, + "loss": 0.7341, + "step": 16728 + }, + { + "epoch": 2.67, + "learning_rate": 2.354189596616388e-05, + "loss": 0.7418, + "step": 16729 + }, + { + "epoch": 2.67, + "learning_rate": 2.3539320468305856e-05, + "loss": 0.6899, + "step": 16730 + }, + { + "epoch": 2.67, + "learning_rate": 2.3536744986003185e-05, + "loss": 0.6833, + "step": 16731 + }, + { + "epoch": 2.67, + "learning_rate": 2.353416951928329e-05, + "loss": 0.6384, + "step": 16732 + }, + { + "epoch": 2.67, + "learning_rate": 2.3531594068173603e-05, + "loss": 0.6811, + "step": 16733 + }, + { + "epoch": 2.67, + "learning_rate": 2.352901863270154e-05, + "loss": 0.7043, + "step": 16734 + }, + { + "epoch": 2.67, + "learning_rate": 2.3526443212894545e-05, + "loss": 0.6833, + "step": 16735 + }, + { + "epoch": 2.67, + "learning_rate": 2.352386780878003e-05, + "loss": 0.71, + "step": 16736 + }, + { + "epoch": 2.67, + "learning_rate": 2.3521292420385422e-05, + "loss": 0.7852, + "step": 16737 + }, + { + "epoch": 2.67, + "learning_rate": 2.3518717047738162e-05, + "loss": 0.6935, + "step": 16738 + }, + { + "epoch": 2.67, + "learning_rate": 2.3516141690865668e-05, + "loss": 0.7086, + "step": 16739 + }, + { + "epoch": 2.67, + "learning_rate": 2.351356634979536e-05, + "loss": 0.7187, + "step": 16740 + }, + { + "epoch": 2.67, + "learning_rate": 2.351099102455466e-05, + "loss": 0.7896, + "step": 16741 + }, + { + "epoch": 2.67, + "learning_rate": 2.350841571517101e-05, + "loss": 0.6938, + "step": 16742 + }, + { + "epoch": 2.67, + "learning_rate": 2.3505840421671823e-05, + "loss": 0.741, + "step": 16743 + }, + { + "epoch": 2.67, + "learning_rate": 2.3503265144084526e-05, + "loss": 0.694, + "step": 16744 + }, + { + "epoch": 2.67, + "learning_rate": 2.3500689882436553e-05, + "loss": 0.6583, + "step": 16745 + }, + { + "epoch": 2.67, + "learning_rate": 2.349811463675532e-05, + "loss": 0.6696, + "step": 16746 + }, + { + "epoch": 2.67, + "learning_rate": 2.3495539407068258e-05, + "loss": 0.7048, + "step": 16747 + }, + { + "epoch": 2.67, + "learning_rate": 2.3492964193402777e-05, + "loss": 0.6844, + "step": 16748 + }, + { + "epoch": 2.67, + "learning_rate": 2.349038899578632e-05, + "loss": 0.7878, + "step": 16749 + }, + { + "epoch": 2.67, + "learning_rate": 2.3487813814246305e-05, + "loss": 0.6844, + "step": 16750 + }, + { + "epoch": 2.67, + "learning_rate": 2.348523864881015e-05, + "loss": 0.7261, + "step": 16751 + }, + { + "epoch": 2.67, + "learning_rate": 2.3482663499505287e-05, + "loss": 0.6956, + "step": 16752 + }, + { + "epoch": 2.67, + "learning_rate": 2.3480088366359137e-05, + "loss": 0.7343, + "step": 16753 + }, + { + "epoch": 2.67, + "learning_rate": 2.347751324939912e-05, + "loss": 0.6317, + "step": 16754 + }, + { + "epoch": 2.67, + "learning_rate": 2.3474938148652665e-05, + "loss": 0.6535, + "step": 16755 + }, + { + "epoch": 2.67, + "learning_rate": 2.3472363064147192e-05, + "loss": 0.7146, + "step": 16756 + }, + { + "epoch": 2.67, + "learning_rate": 2.3469787995910122e-05, + "loss": 0.6906, + "step": 16757 + }, + { + "epoch": 2.67, + "learning_rate": 2.3467212943968886e-05, + "loss": 0.7214, + "step": 16758 + }, + { + "epoch": 2.67, + "learning_rate": 2.3464637908350903e-05, + "loss": 0.6802, + "step": 16759 + }, + { + "epoch": 2.67, + "learning_rate": 2.3462062889083596e-05, + "loss": 0.7611, + "step": 16760 + }, + { + "epoch": 2.67, + "learning_rate": 2.3459487886194383e-05, + "loss": 0.7619, + "step": 16761 + }, + { + "epoch": 2.67, + "learning_rate": 2.345691289971068e-05, + "loss": 0.6907, + "step": 16762 + }, + { + "epoch": 2.67, + "learning_rate": 2.345433792965993e-05, + "loss": 0.7096, + "step": 16763 + }, + { + "epoch": 2.67, + "learning_rate": 2.3451762976069537e-05, + "loss": 0.7196, + "step": 16764 + }, + { + "epoch": 2.67, + "learning_rate": 2.3449188038966924e-05, + "loss": 0.7389, + "step": 16765 + }, + { + "epoch": 2.67, + "learning_rate": 2.344661311837952e-05, + "loss": 0.7152, + "step": 16766 + }, + { + "epoch": 2.67, + "learning_rate": 2.3444038214334747e-05, + "loss": 0.7107, + "step": 16767 + }, + { + "epoch": 2.67, + "learning_rate": 2.3441463326860014e-05, + "loss": 0.688, + "step": 16768 + }, + { + "epoch": 2.67, + "learning_rate": 2.343888845598276e-05, + "loss": 0.7142, + "step": 16769 + }, + { + "epoch": 2.67, + "learning_rate": 2.3436313601730393e-05, + "loss": 0.6868, + "step": 16770 + }, + { + "epoch": 2.67, + "learning_rate": 2.3433738764130332e-05, + "loss": 0.7587, + "step": 16771 + }, + { + "epoch": 2.67, + "learning_rate": 2.3431163943210004e-05, + "loss": 0.6703, + "step": 16772 + }, + { + "epoch": 2.67, + "learning_rate": 2.342858913899683e-05, + "loss": 0.7815, + "step": 16773 + }, + { + "epoch": 2.67, + "learning_rate": 2.3426014351518227e-05, + "loss": 0.6357, + "step": 16774 + }, + { + "epoch": 2.67, + "learning_rate": 2.3423439580801615e-05, + "loss": 0.7284, + "step": 16775 + }, + { + "epoch": 2.67, + "learning_rate": 2.3420864826874416e-05, + "loss": 0.7271, + "step": 16776 + }, + { + "epoch": 2.67, + "learning_rate": 2.3418290089764044e-05, + "loss": 0.712, + "step": 16777 + }, + { + "epoch": 2.67, + "learning_rate": 2.3415715369497928e-05, + "loss": 0.6966, + "step": 16778 + }, + { + "epoch": 2.67, + "learning_rate": 2.341314066610348e-05, + "loss": 0.7467, + "step": 16779 + }, + { + "epoch": 2.67, + "learning_rate": 2.3410565979608125e-05, + "loss": 0.7195, + "step": 16780 + }, + { + "epoch": 2.67, + "learning_rate": 2.3407991310039272e-05, + "loss": 0.7628, + "step": 16781 + }, + { + "epoch": 2.67, + "learning_rate": 2.3405416657424342e-05, + "loss": 0.673, + "step": 16782 + }, + { + "epoch": 2.67, + "learning_rate": 2.340284202179076e-05, + "loss": 0.7049, + "step": 16783 + }, + { + "epoch": 2.67, + "learning_rate": 2.3400267403165945e-05, + "loss": 0.704, + "step": 16784 + }, + { + "epoch": 2.67, + "learning_rate": 2.33976928015773e-05, + "loss": 0.6199, + "step": 16785 + }, + { + "epoch": 2.67, + "learning_rate": 2.3395118217052262e-05, + "loss": 0.6782, + "step": 16786 + }, + { + "epoch": 2.67, + "learning_rate": 2.339254364961824e-05, + "loss": 0.6615, + "step": 16787 + }, + { + "epoch": 2.67, + "learning_rate": 2.3389969099302643e-05, + "loss": 0.7036, + "step": 16788 + }, + { + "epoch": 2.67, + "learning_rate": 2.3387394566132908e-05, + "loss": 0.6648, + "step": 16789 + }, + { + "epoch": 2.67, + "learning_rate": 2.338482005013644e-05, + "loss": 0.679, + "step": 16790 + }, + { + "epoch": 2.68, + "learning_rate": 2.3382245551340655e-05, + "loss": 0.7193, + "step": 16791 + }, + { + "epoch": 2.68, + "learning_rate": 2.3379671069772973e-05, + "loss": 0.5983, + "step": 16792 + }, + { + "epoch": 2.68, + "learning_rate": 2.3377096605460817e-05, + "loss": 0.7546, + "step": 16793 + }, + { + "epoch": 2.68, + "learning_rate": 2.3374522158431585e-05, + "loss": 0.6651, + "step": 16794 + }, + { + "epoch": 2.68, + "learning_rate": 2.3371947728712715e-05, + "loss": 0.633, + "step": 16795 + }, + { + "epoch": 2.68, + "learning_rate": 2.336937331633161e-05, + "loss": 0.6842, + "step": 16796 + }, + { + "epoch": 2.68, + "learning_rate": 2.3366798921315686e-05, + "loss": 0.6735, + "step": 16797 + }, + { + "epoch": 2.68, + "learning_rate": 2.3364224543692368e-05, + "loss": 0.6347, + "step": 16798 + }, + { + "epoch": 2.68, + "learning_rate": 2.3361650183489065e-05, + "loss": 0.6988, + "step": 16799 + }, + { + "epoch": 2.68, + "learning_rate": 2.3359075840733197e-05, + "loss": 0.6892, + "step": 16800 + }, + { + "epoch": 2.68, + "learning_rate": 2.335650151545217e-05, + "loss": 0.734, + "step": 16801 + }, + { + "epoch": 2.68, + "learning_rate": 2.3353927207673397e-05, + "loss": 0.6229, + "step": 16802 + }, + { + "epoch": 2.68, + "learning_rate": 2.335135291742431e-05, + "loss": 0.7061, + "step": 16803 + }, + { + "epoch": 2.68, + "learning_rate": 2.334877864473231e-05, + "loss": 0.6983, + "step": 16804 + }, + { + "epoch": 2.68, + "learning_rate": 2.334620438962481e-05, + "loss": 0.776, + "step": 16805 + }, + { + "epoch": 2.68, + "learning_rate": 2.3343630152129238e-05, + "loss": 0.7158, + "step": 16806 + }, + { + "epoch": 2.68, + "learning_rate": 2.3341055932272993e-05, + "loss": 0.6681, + "step": 16807 + }, + { + "epoch": 2.68, + "learning_rate": 2.3338481730083496e-05, + "loss": 0.6251, + "step": 16808 + }, + { + "epoch": 2.68, + "learning_rate": 2.333590754558816e-05, + "loss": 0.7077, + "step": 16809 + }, + { + "epoch": 2.68, + "learning_rate": 2.33333333788144e-05, + "loss": 0.6776, + "step": 16810 + }, + { + "epoch": 2.68, + "learning_rate": 2.333075922978962e-05, + "loss": 0.727, + "step": 16811 + }, + { + "epoch": 2.68, + "learning_rate": 2.3328185098541252e-05, + "loss": 0.7849, + "step": 16812 + }, + { + "epoch": 2.68, + "learning_rate": 2.3325610985096695e-05, + "loss": 0.685, + "step": 16813 + }, + { + "epoch": 2.68, + "learning_rate": 2.332303688948336e-05, + "loss": 0.7008, + "step": 16814 + }, + { + "epoch": 2.68, + "learning_rate": 2.3320462811728666e-05, + "loss": 0.6865, + "step": 16815 + }, + { + "epoch": 2.68, + "learning_rate": 2.331788875186003e-05, + "loss": 0.7612, + "step": 16816 + }, + { + "epoch": 2.68, + "learning_rate": 2.3315314709904844e-05, + "loss": 0.7076, + "step": 16817 + }, + { + "epoch": 2.68, + "learning_rate": 2.3312740685890544e-05, + "loss": 0.7434, + "step": 16818 + }, + { + "epoch": 2.68, + "learning_rate": 2.331016667984453e-05, + "loss": 0.679, + "step": 16819 + }, + { + "epoch": 2.68, + "learning_rate": 2.330759269179422e-05, + "loss": 0.6475, + "step": 16820 + }, + { + "epoch": 2.68, + "learning_rate": 2.3305018721767017e-05, + "loss": 0.6681, + "step": 16821 + }, + { + "epoch": 2.68, + "learning_rate": 2.3302444769790328e-05, + "loss": 0.7513, + "step": 16822 + }, + { + "epoch": 2.68, + "learning_rate": 2.3299870835891576e-05, + "loss": 0.7143, + "step": 16823 + }, + { + "epoch": 2.68, + "learning_rate": 2.329729692009817e-05, + "loss": 0.6937, + "step": 16824 + }, + { + "epoch": 2.68, + "learning_rate": 2.3294723022437512e-05, + "loss": 0.7363, + "step": 16825 + }, + { + "epoch": 2.68, + "learning_rate": 2.3292149142937024e-05, + "loss": 0.663, + "step": 16826 + }, + { + "epoch": 2.68, + "learning_rate": 2.328957528162411e-05, + "loss": 0.7134, + "step": 16827 + }, + { + "epoch": 2.68, + "learning_rate": 2.3287001438526174e-05, + "loss": 0.6246, + "step": 16828 + }, + { + "epoch": 2.68, + "learning_rate": 2.328442761367064e-05, + "loss": 0.6734, + "step": 16829 + }, + { + "epoch": 2.68, + "learning_rate": 2.328185380708491e-05, + "loss": 0.6589, + "step": 16830 + }, + { + "epoch": 2.68, + "learning_rate": 2.327928001879639e-05, + "loss": 0.7014, + "step": 16831 + }, + { + "epoch": 2.68, + "learning_rate": 2.3276706248832493e-05, + "loss": 0.6479, + "step": 16832 + }, + { + "epoch": 2.68, + "learning_rate": 2.3274132497220634e-05, + "loss": 0.8442, + "step": 16833 + }, + { + "epoch": 2.68, + "learning_rate": 2.3271558763988212e-05, + "loss": 0.7244, + "step": 16834 + }, + { + "epoch": 2.68, + "learning_rate": 2.326898504916264e-05, + "loss": 0.6982, + "step": 16835 + }, + { + "epoch": 2.68, + "learning_rate": 2.3266411352771328e-05, + "loss": 0.8305, + "step": 16836 + }, + { + "epoch": 2.68, + "learning_rate": 2.326383767484168e-05, + "loss": 0.6764, + "step": 16837 + }, + { + "epoch": 2.68, + "learning_rate": 2.3261264015401107e-05, + "loss": 0.6822, + "step": 16838 + }, + { + "epoch": 2.68, + "learning_rate": 2.325869037447702e-05, + "loss": 0.7156, + "step": 16839 + }, + { + "epoch": 2.68, + "learning_rate": 2.325611675209683e-05, + "loss": 0.7299, + "step": 16840 + }, + { + "epoch": 2.68, + "learning_rate": 2.3253543148287928e-05, + "loss": 0.6665, + "step": 16841 + }, + { + "epoch": 2.68, + "learning_rate": 2.325096956307773e-05, + "loss": 0.6345, + "step": 16842 + }, + { + "epoch": 2.68, + "learning_rate": 2.3248395996493648e-05, + "loss": 0.6747, + "step": 16843 + }, + { + "epoch": 2.68, + "learning_rate": 2.3245822448563086e-05, + "loss": 0.6767, + "step": 16844 + }, + { + "epoch": 2.68, + "learning_rate": 2.3243248919313443e-05, + "loss": 0.6856, + "step": 16845 + }, + { + "epoch": 2.68, + "learning_rate": 2.3240675408772138e-05, + "loss": 0.7464, + "step": 16846 + }, + { + "epoch": 2.68, + "learning_rate": 2.3238101916966572e-05, + "loss": 0.7262, + "step": 16847 + }, + { + "epoch": 2.68, + "learning_rate": 2.3235528443924148e-05, + "loss": 0.7042, + "step": 16848 + }, + { + "epoch": 2.68, + "learning_rate": 2.323295498967228e-05, + "loss": 0.9294, + "step": 16849 + }, + { + "epoch": 2.68, + "learning_rate": 2.3230381554238365e-05, + "loss": 0.6941, + "step": 16850 + }, + { + "epoch": 2.68, + "learning_rate": 2.3227808137649813e-05, + "loss": 0.6742, + "step": 16851 + }, + { + "epoch": 2.68, + "learning_rate": 2.3225234739934022e-05, + "loss": 0.6787, + "step": 16852 + }, + { + "epoch": 2.68, + "learning_rate": 2.3222661361118413e-05, + "loss": 0.705, + "step": 16853 + }, + { + "epoch": 2.69, + "learning_rate": 2.322008800123038e-05, + "loss": 0.6833, + "step": 16854 + }, + { + "epoch": 2.69, + "learning_rate": 2.3217514660297325e-05, + "loss": 0.6393, + "step": 16855 + }, + { + "epoch": 2.69, + "learning_rate": 2.3214941338346656e-05, + "loss": 0.71, + "step": 16856 + }, + { + "epoch": 2.69, + "learning_rate": 2.3212368035405785e-05, + "loss": 0.7067, + "step": 16857 + }, + { + "epoch": 2.69, + "learning_rate": 2.32097947515021e-05, + "loss": 0.6914, + "step": 16858 + }, + { + "epoch": 2.69, + "learning_rate": 2.3207221486663018e-05, + "loss": 0.68, + "step": 16859 + }, + { + "epoch": 2.69, + "learning_rate": 2.320464824091595e-05, + "loss": 0.7022, + "step": 16860 + }, + { + "epoch": 2.69, + "learning_rate": 2.3202075014288278e-05, + "loss": 0.7437, + "step": 16861 + }, + { + "epoch": 2.69, + "learning_rate": 2.319950180680741e-05, + "loss": 0.7197, + "step": 16862 + }, + { + "epoch": 2.69, + "learning_rate": 2.3196928618500763e-05, + "loss": 0.6739, + "step": 16863 + }, + { + "epoch": 2.69, + "learning_rate": 2.319435544939573e-05, + "loss": 0.666, + "step": 16864 + }, + { + "epoch": 2.69, + "learning_rate": 2.3191782299519715e-05, + "loss": 0.7133, + "step": 16865 + }, + { + "epoch": 2.69, + "learning_rate": 2.3189209168900117e-05, + "loss": 0.6322, + "step": 16866 + }, + { + "epoch": 2.69, + "learning_rate": 2.3186636057564347e-05, + "loss": 0.725, + "step": 16867 + }, + { + "epoch": 2.69, + "learning_rate": 2.31840629655398e-05, + "loss": 0.7269, + "step": 16868 + }, + { + "epoch": 2.69, + "learning_rate": 2.3181489892853874e-05, + "loss": 0.7069, + "step": 16869 + }, + { + "epoch": 2.69, + "learning_rate": 2.3178916839533985e-05, + "loss": 0.686, + "step": 16870 + }, + { + "epoch": 2.69, + "learning_rate": 2.317634380560752e-05, + "loss": 0.6886, + "step": 16871 + }, + { + "epoch": 2.69, + "learning_rate": 2.317377079110189e-05, + "loss": 0.7857, + "step": 16872 + }, + { + "epoch": 2.69, + "learning_rate": 2.3171197796044492e-05, + "loss": 0.6533, + "step": 16873 + }, + { + "epoch": 2.69, + "learning_rate": 2.316862482046273e-05, + "loss": 0.7153, + "step": 16874 + }, + { + "epoch": 2.69, + "learning_rate": 2.3166051864383995e-05, + "loss": 0.6485, + "step": 16875 + }, + { + "epoch": 2.69, + "learning_rate": 2.31634789278357e-05, + "loss": 0.7564, + "step": 16876 + }, + { + "epoch": 2.69, + "learning_rate": 2.3160906010845237e-05, + "loss": 0.6787, + "step": 16877 + }, + { + "epoch": 2.69, + "learning_rate": 2.3158333113440007e-05, + "loss": 0.741, + "step": 16878 + }, + { + "epoch": 2.69, + "learning_rate": 2.3155760235647415e-05, + "loss": 0.673, + "step": 16879 + }, + { + "epoch": 2.69, + "learning_rate": 2.3153187377494854e-05, + "loss": 0.7323, + "step": 16880 + }, + { + "epoch": 2.69, + "learning_rate": 2.3150614539009734e-05, + "loss": 0.6806, + "step": 16881 + }, + { + "epoch": 2.69, + "learning_rate": 2.3148041720219445e-05, + "loss": 0.7416, + "step": 16882 + }, + { + "epoch": 2.69, + "learning_rate": 2.3145468921151376e-05, + "loss": 0.6817, + "step": 16883 + }, + { + "epoch": 2.69, + "learning_rate": 2.3142896141832944e-05, + "loss": 0.7172, + "step": 16884 + }, + { + "epoch": 2.69, + "learning_rate": 2.314032338229154e-05, + "loss": 0.7264, + "step": 16885 + }, + { + "epoch": 2.69, + "learning_rate": 2.313775064255456e-05, + "loss": 0.7021, + "step": 16886 + }, + { + "epoch": 2.69, + "learning_rate": 2.313517792264941e-05, + "loss": 0.6693, + "step": 16887 + }, + { + "epoch": 2.69, + "learning_rate": 2.313260522260348e-05, + "loss": 0.6609, + "step": 16888 + }, + { + "epoch": 2.69, + "learning_rate": 2.3130032542444166e-05, + "loss": 0.7323, + "step": 16889 + }, + { + "epoch": 2.69, + "learning_rate": 2.3127459882198878e-05, + "loss": 0.6676, + "step": 16890 + }, + { + "epoch": 2.69, + "learning_rate": 2.3124887241895006e-05, + "loss": 0.6416, + "step": 16891 + }, + { + "epoch": 2.69, + "learning_rate": 2.3122314621559938e-05, + "loss": 0.6029, + "step": 16892 + }, + { + "epoch": 2.69, + "learning_rate": 2.3119742021221086e-05, + "loss": 0.6182, + "step": 16893 + }, + { + "epoch": 2.69, + "learning_rate": 2.3117169440905838e-05, + "loss": 0.7376, + "step": 16894 + }, + { + "epoch": 2.69, + "learning_rate": 2.311459688064159e-05, + "loss": 0.6669, + "step": 16895 + }, + { + "epoch": 2.69, + "learning_rate": 2.3112024340455746e-05, + "loss": 0.7468, + "step": 16896 + }, + { + "epoch": 2.69, + "learning_rate": 2.3109451820375694e-05, + "loss": 0.7133, + "step": 16897 + }, + { + "epoch": 2.69, + "learning_rate": 2.310687932042883e-05, + "loss": 0.6738, + "step": 16898 + }, + { + "epoch": 2.69, + "learning_rate": 2.3104306840642553e-05, + "loss": 0.6612, + "step": 16899 + }, + { + "epoch": 2.69, + "learning_rate": 2.310173438104426e-05, + "loss": 0.6346, + "step": 16900 + }, + { + "epoch": 2.69, + "learning_rate": 2.309916194166135e-05, + "loss": 0.6541, + "step": 16901 + }, + { + "epoch": 2.69, + "learning_rate": 2.3096589522521206e-05, + "loss": 0.7463, + "step": 16902 + }, + { + "epoch": 2.69, + "learning_rate": 2.309401712365122e-05, + "loss": 0.6531, + "step": 16903 + }, + { + "epoch": 2.69, + "learning_rate": 2.30914447450788e-05, + "loss": 0.8429, + "step": 16904 + }, + { + "epoch": 2.69, + "learning_rate": 2.3088872386831336e-05, + "loss": 0.6378, + "step": 16905 + }, + { + "epoch": 2.69, + "learning_rate": 2.3086300048936217e-05, + "loss": 0.71, + "step": 16906 + }, + { + "epoch": 2.69, + "learning_rate": 2.3083727731420842e-05, + "loss": 0.7222, + "step": 16907 + }, + { + "epoch": 2.69, + "learning_rate": 2.3081155434312605e-05, + "loss": 0.6727, + "step": 16908 + }, + { + "epoch": 2.69, + "learning_rate": 2.3078583157638896e-05, + "loss": 0.7172, + "step": 16909 + }, + { + "epoch": 2.69, + "learning_rate": 2.307601090142711e-05, + "loss": 0.6272, + "step": 16910 + }, + { + "epoch": 2.69, + "learning_rate": 2.3073438665704642e-05, + "loss": 0.7139, + "step": 16911 + }, + { + "epoch": 2.69, + "learning_rate": 2.3070866450498878e-05, + "loss": 0.7834, + "step": 16912 + }, + { + "epoch": 2.69, + "learning_rate": 2.306829425583722e-05, + "loss": 0.6969, + "step": 16913 + }, + { + "epoch": 2.69, + "learning_rate": 2.3065722081747053e-05, + "loss": 0.737, + "step": 16914 + }, + { + "epoch": 2.69, + "learning_rate": 2.3063149928255767e-05, + "loss": 0.6587, + "step": 16915 + }, + { + "epoch": 2.69, + "learning_rate": 2.3060577795390766e-05, + "loss": 0.689, + "step": 16916 + }, + { + "epoch": 2.7, + "learning_rate": 2.3058005683179433e-05, + "loss": 0.688, + "step": 16917 + }, + { + "epoch": 2.7, + "learning_rate": 2.3055433591649156e-05, + "loss": 0.7104, + "step": 16918 + }, + { + "epoch": 2.7, + "learning_rate": 2.3052861520827336e-05, + "loss": 0.6317, + "step": 16919 + }, + { + "epoch": 2.7, + "learning_rate": 2.3050289470741357e-05, + "loss": 0.7439, + "step": 16920 + }, + { + "epoch": 2.7, + "learning_rate": 2.3047717441418616e-05, + "loss": 0.6435, + "step": 16921 + }, + { + "epoch": 2.7, + "learning_rate": 2.3045145432886495e-05, + "loss": 0.7026, + "step": 16922 + }, + { + "epoch": 2.7, + "learning_rate": 2.3042573445172383e-05, + "loss": 0.6944, + "step": 16923 + }, + { + "epoch": 2.7, + "learning_rate": 2.304000147830368e-05, + "loss": 0.6635, + "step": 16924 + }, + { + "epoch": 2.7, + "learning_rate": 2.3037429532307776e-05, + "loss": 0.6754, + "step": 16925 + }, + { + "epoch": 2.7, + "learning_rate": 2.3034857607212042e-05, + "loss": 0.7048, + "step": 16926 + }, + { + "epoch": 2.7, + "learning_rate": 2.3032285703043894e-05, + "loss": 0.7347, + "step": 16927 + }, + { + "epoch": 2.7, + "learning_rate": 2.3029713819830707e-05, + "loss": 0.6814, + "step": 16928 + }, + { + "epoch": 2.7, + "learning_rate": 2.3027141957599862e-05, + "loss": 0.6938, + "step": 16929 + }, + { + "epoch": 2.7, + "learning_rate": 2.3024570116378767e-05, + "loss": 0.7358, + "step": 16930 + }, + { + "epoch": 2.7, + "learning_rate": 2.3021998296194803e-05, + "loss": 0.7725, + "step": 16931 + }, + { + "epoch": 2.7, + "learning_rate": 2.3019426497075345e-05, + "loss": 0.6945, + "step": 16932 + }, + { + "epoch": 2.7, + "learning_rate": 2.3016854719047804e-05, + "loss": 0.7638, + "step": 16933 + }, + { + "epoch": 2.7, + "learning_rate": 2.301428296213955e-05, + "loss": 0.6874, + "step": 16934 + }, + { + "epoch": 2.7, + "learning_rate": 2.3011711226377973e-05, + "loss": 0.7285, + "step": 16935 + }, + { + "epoch": 2.7, + "learning_rate": 2.3009139511790473e-05, + "loss": 0.6523, + "step": 16936 + }, + { + "epoch": 2.7, + "learning_rate": 2.300656781840443e-05, + "loss": 0.7423, + "step": 16937 + }, + { + "epoch": 2.7, + "learning_rate": 2.3003996146247216e-05, + "loss": 0.8097, + "step": 16938 + }, + { + "epoch": 2.7, + "learning_rate": 2.3001424495346245e-05, + "loss": 0.7609, + "step": 16939 + }, + { + "epoch": 2.7, + "learning_rate": 2.2998852865728885e-05, + "loss": 0.821, + "step": 16940 + }, + { + "epoch": 2.7, + "learning_rate": 2.2996281257422532e-05, + "loss": 0.6777, + "step": 16941 + }, + { + "epoch": 2.7, + "learning_rate": 2.2993709670454565e-05, + "loss": 0.7346, + "step": 16942 + }, + { + "epoch": 2.7, + "learning_rate": 2.2991138104852363e-05, + "loss": 0.7312, + "step": 16943 + }, + { + "epoch": 2.7, + "learning_rate": 2.2988566560643328e-05, + "loss": 0.6628, + "step": 16944 + }, + { + "epoch": 2.7, + "learning_rate": 2.298599503785484e-05, + "loss": 0.6843, + "step": 16945 + }, + { + "epoch": 2.7, + "learning_rate": 2.2983423536514274e-05, + "loss": 0.6724, + "step": 16946 + }, + { + "epoch": 2.7, + "learning_rate": 2.298085205664903e-05, + "loss": 0.6543, + "step": 16947 + }, + { + "epoch": 2.7, + "learning_rate": 2.2978280598286484e-05, + "loss": 0.7528, + "step": 16948 + }, + { + "epoch": 2.7, + "learning_rate": 2.2975709161454016e-05, + "loss": 0.7993, + "step": 16949 + }, + { + "epoch": 2.7, + "learning_rate": 2.2973137746179024e-05, + "loss": 0.7493, + "step": 16950 + }, + { + "epoch": 2.7, + "learning_rate": 2.2970566352488885e-05, + "loss": 0.6905, + "step": 16951 + }, + { + "epoch": 2.7, + "learning_rate": 2.2967994980410975e-05, + "loss": 0.7007, + "step": 16952 + }, + { + "epoch": 2.7, + "learning_rate": 2.2965423629972692e-05, + "loss": 0.8051, + "step": 16953 + }, + { + "epoch": 2.7, + "learning_rate": 2.2962852301201408e-05, + "loss": 0.6753, + "step": 16954 + }, + { + "epoch": 2.7, + "learning_rate": 2.296028099412451e-05, + "loss": 0.6502, + "step": 16955 + }, + { + "epoch": 2.7, + "learning_rate": 2.2957709708769384e-05, + "loss": 0.6693, + "step": 16956 + }, + { + "epoch": 2.7, + "learning_rate": 2.295513844516341e-05, + "loss": 0.6285, + "step": 16957 + }, + { + "epoch": 2.7, + "learning_rate": 2.2952567203333973e-05, + "loss": 0.6739, + "step": 16958 + }, + { + "epoch": 2.7, + "learning_rate": 2.294999598330844e-05, + "loss": 0.7131, + "step": 16959 + }, + { + "epoch": 2.7, + "learning_rate": 2.2947424785114218e-05, + "loss": 0.6825, + "step": 16960 + }, + { + "epoch": 2.7, + "learning_rate": 2.2944853608778675e-05, + "loss": 0.6769, + "step": 16961 + }, + { + "epoch": 2.7, + "learning_rate": 2.294228245432919e-05, + "loss": 0.7418, + "step": 16962 + }, + { + "epoch": 2.7, + "learning_rate": 2.2939711321793146e-05, + "loss": 0.6429, + "step": 16963 + }, + { + "epoch": 2.7, + "learning_rate": 2.293714021119793e-05, + "loss": 0.7083, + "step": 16964 + }, + { + "epoch": 2.7, + "learning_rate": 2.2934569122570913e-05, + "loss": 0.721, + "step": 16965 + }, + { + "epoch": 2.7, + "learning_rate": 2.293199805593948e-05, + "loss": 0.7036, + "step": 16966 + }, + { + "epoch": 2.7, + "learning_rate": 2.292942701133102e-05, + "loss": 0.6416, + "step": 16967 + }, + { + "epoch": 2.7, + "learning_rate": 2.29268559887729e-05, + "loss": 0.6446, + "step": 16968 + }, + { + "epoch": 2.7, + "learning_rate": 2.29242849882925e-05, + "loss": 0.6362, + "step": 16969 + }, + { + "epoch": 2.7, + "learning_rate": 2.292171400991721e-05, + "loss": 0.7045, + "step": 16970 + }, + { + "epoch": 2.7, + "learning_rate": 2.2919143053674408e-05, + "loss": 0.6836, + "step": 16971 + }, + { + "epoch": 2.7, + "learning_rate": 2.2916572119591464e-05, + "loss": 0.6632, + "step": 16972 + }, + { + "epoch": 2.7, + "learning_rate": 2.2914001207695762e-05, + "loss": 0.7083, + "step": 16973 + }, + { + "epoch": 2.7, + "learning_rate": 2.2911430318014682e-05, + "loss": 0.6831, + "step": 16974 + }, + { + "epoch": 2.7, + "learning_rate": 2.2908859450575603e-05, + "loss": 0.6997, + "step": 16975 + }, + { + "epoch": 2.7, + "learning_rate": 2.2906288605405894e-05, + "loss": 0.7306, + "step": 16976 + }, + { + "epoch": 2.7, + "learning_rate": 2.290371778253295e-05, + "loss": 0.8001, + "step": 16977 + }, + { + "epoch": 2.7, + "learning_rate": 2.2901146981984133e-05, + "loss": 0.6646, + "step": 16978 + }, + { + "epoch": 2.7, + "learning_rate": 2.2898576203786826e-05, + "loss": 0.7402, + "step": 16979 + }, + { + "epoch": 2.71, + "learning_rate": 2.289600544796841e-05, + "loss": 0.6505, + "step": 16980 + }, + { + "epoch": 2.71, + "learning_rate": 2.289343471455626e-05, + "loss": 0.7242, + "step": 16981 + }, + { + "epoch": 2.71, + "learning_rate": 2.289086400357775e-05, + "loss": 0.7076, + "step": 16982 + }, + { + "epoch": 2.71, + "learning_rate": 2.288829331506026e-05, + "loss": 0.7016, + "step": 16983 + }, + { + "epoch": 2.71, + "learning_rate": 2.288572264903116e-05, + "loss": 0.6752, + "step": 16984 + }, + { + "epoch": 2.71, + "learning_rate": 2.288315200551783e-05, + "loss": 0.6313, + "step": 16985 + }, + { + "epoch": 2.71, + "learning_rate": 2.2880581384547646e-05, + "loss": 0.7197, + "step": 16986 + }, + { + "epoch": 2.71, + "learning_rate": 2.2878010786147983e-05, + "loss": 0.6781, + "step": 16987 + }, + { + "epoch": 2.71, + "learning_rate": 2.287544021034622e-05, + "loss": 0.7081, + "step": 16988 + }, + { + "epoch": 2.71, + "learning_rate": 2.2872869657169727e-05, + "loss": 0.6589, + "step": 16989 + }, + { + "epoch": 2.71, + "learning_rate": 2.2870299126645877e-05, + "loss": 0.6613, + "step": 16990 + }, + { + "epoch": 2.71, + "learning_rate": 2.2867728618802052e-05, + "loss": 0.7366, + "step": 16991 + }, + { + "epoch": 2.71, + "learning_rate": 2.2865158133665625e-05, + "loss": 0.6928, + "step": 16992 + }, + { + "epoch": 2.71, + "learning_rate": 2.2862587671263963e-05, + "loss": 0.7031, + "step": 16993 + }, + { + "epoch": 2.71, + "learning_rate": 2.2860017231624447e-05, + "loss": 0.6548, + "step": 16994 + }, + { + "epoch": 2.71, + "learning_rate": 2.285744681477445e-05, + "loss": 0.6143, + "step": 16995 + }, + { + "epoch": 2.71, + "learning_rate": 2.2854876420741338e-05, + "loss": 0.6801, + "step": 16996 + }, + { + "epoch": 2.71, + "learning_rate": 2.2852306049552496e-05, + "loss": 0.6823, + "step": 16997 + }, + { + "epoch": 2.71, + "learning_rate": 2.284973570123529e-05, + "loss": 0.7339, + "step": 16998 + }, + { + "epoch": 2.71, + "learning_rate": 2.2847165375817085e-05, + "loss": 0.6396, + "step": 16999 + }, + { + "epoch": 2.71, + "learning_rate": 2.284459507332527e-05, + "loss": 0.7833, + "step": 17000 + }, + { + "epoch": 2.71, + "learning_rate": 2.284202479378721e-05, + "loss": 0.6707, + "step": 17001 + }, + { + "epoch": 2.71, + "learning_rate": 2.2839454537230282e-05, + "loss": 0.6864, + "step": 17002 + }, + { + "epoch": 2.71, + "learning_rate": 2.2836884303681845e-05, + "loss": 0.671, + "step": 17003 + }, + { + "epoch": 2.71, + "learning_rate": 2.283431409316927e-05, + "loss": 0.7135, + "step": 17004 + }, + { + "epoch": 2.71, + "learning_rate": 2.283174390571994e-05, + "loss": 0.6584, + "step": 17005 + }, + { + "epoch": 2.71, + "learning_rate": 2.2829173741361224e-05, + "loss": 0.7332, + "step": 17006 + }, + { + "epoch": 2.71, + "learning_rate": 2.2826603600120482e-05, + "loss": 0.835, + "step": 17007 + }, + { + "epoch": 2.71, + "learning_rate": 2.28240334820251e-05, + "loss": 0.7458, + "step": 17008 + }, + { + "epoch": 2.71, + "learning_rate": 2.2821463387102436e-05, + "loss": 0.6943, + "step": 17009 + }, + { + "epoch": 2.71, + "learning_rate": 2.2818893315379864e-05, + "loss": 0.712, + "step": 17010 + }, + { + "epoch": 2.71, + "learning_rate": 2.2816323266884754e-05, + "loss": 0.713, + "step": 17011 + }, + { + "epoch": 2.71, + "learning_rate": 2.281375324164448e-05, + "loss": 0.7983, + "step": 17012 + }, + { + "epoch": 2.71, + "learning_rate": 2.28111832396864e-05, + "loss": 0.6585, + "step": 17013 + }, + { + "epoch": 2.71, + "learning_rate": 2.280861326103789e-05, + "loss": 0.7002, + "step": 17014 + }, + { + "epoch": 2.71, + "learning_rate": 2.2806043305726326e-05, + "loss": 0.6962, + "step": 17015 + }, + { + "epoch": 2.71, + "learning_rate": 2.280347337377906e-05, + "loss": 0.6554, + "step": 17016 + }, + { + "epoch": 2.71, + "learning_rate": 2.2800903465223473e-05, + "loss": 0.6442, + "step": 17017 + }, + { + "epoch": 2.71, + "learning_rate": 2.2798333580086935e-05, + "loss": 0.6337, + "step": 17018 + }, + { + "epoch": 2.71, + "learning_rate": 2.2795763718396796e-05, + "loss": 0.707, + "step": 17019 + }, + { + "epoch": 2.71, + "learning_rate": 2.279319388018044e-05, + "loss": 0.7282, + "step": 17020 + }, + { + "epoch": 2.71, + "learning_rate": 2.2790624065465236e-05, + "loss": 0.6879, + "step": 17021 + }, + { + "epoch": 2.71, + "learning_rate": 2.2788054274278545e-05, + "loss": 0.671, + "step": 17022 + }, + { + "epoch": 2.71, + "learning_rate": 2.278548450664773e-05, + "loss": 0.7552, + "step": 17023 + }, + { + "epoch": 2.71, + "learning_rate": 2.278291476260015e-05, + "loss": 0.6638, + "step": 17024 + }, + { + "epoch": 2.71, + "learning_rate": 2.2780345042163192e-05, + "loss": 0.7251, + "step": 17025 + }, + { + "epoch": 2.71, + "learning_rate": 2.2777775345364212e-05, + "loss": 0.6545, + "step": 17026 + }, + { + "epoch": 2.71, + "learning_rate": 2.2775205672230564e-05, + "loss": 0.6643, + "step": 17027 + }, + { + "epoch": 2.71, + "learning_rate": 2.2772636022789636e-05, + "loss": 0.7764, + "step": 17028 + }, + { + "epoch": 2.71, + "learning_rate": 2.2770066397068782e-05, + "loss": 0.646, + "step": 17029 + }, + { + "epoch": 2.71, + "learning_rate": 2.276749679509536e-05, + "loss": 0.7175, + "step": 17030 + }, + { + "epoch": 2.71, + "learning_rate": 2.276492721689675e-05, + "loss": 0.6709, + "step": 17031 + }, + { + "epoch": 2.71, + "learning_rate": 2.2762357662500304e-05, + "loss": 0.6628, + "step": 17032 + }, + { + "epoch": 2.71, + "learning_rate": 2.2759788131933387e-05, + "loss": 0.6717, + "step": 17033 + }, + { + "epoch": 2.71, + "learning_rate": 2.275721862522337e-05, + "loss": 0.6681, + "step": 17034 + }, + { + "epoch": 2.71, + "learning_rate": 2.2754649142397617e-05, + "loss": 0.7694, + "step": 17035 + }, + { + "epoch": 2.71, + "learning_rate": 2.275207968348348e-05, + "loss": 0.6627, + "step": 17036 + }, + { + "epoch": 2.71, + "learning_rate": 2.2749510248508334e-05, + "loss": 0.6574, + "step": 17037 + }, + { + "epoch": 2.71, + "learning_rate": 2.2746940837499537e-05, + "loss": 0.7325, + "step": 17038 + }, + { + "epoch": 2.71, + "learning_rate": 2.2744371450484447e-05, + "loss": 0.7298, + "step": 17039 + }, + { + "epoch": 2.71, + "learning_rate": 2.2741802087490438e-05, + "loss": 0.725, + "step": 17040 + }, + { + "epoch": 2.71, + "learning_rate": 2.2739232748544867e-05, + "loss": 0.8031, + "step": 17041 + }, + { + "epoch": 2.71, + "learning_rate": 2.27366634336751e-05, + "loss": 0.7341, + "step": 17042 + }, + { + "epoch": 2.72, + "learning_rate": 2.2734094142908485e-05, + "loss": 0.7322, + "step": 17043 + }, + { + "epoch": 2.72, + "learning_rate": 2.273152487627239e-05, + "loss": 0.6618, + "step": 17044 + }, + { + "epoch": 2.72, + "learning_rate": 2.272895563379418e-05, + "loss": 0.7708, + "step": 17045 + }, + { + "epoch": 2.72, + "learning_rate": 2.2726386415501216e-05, + "loss": 0.7171, + "step": 17046 + }, + { + "epoch": 2.72, + "learning_rate": 2.2723817221420847e-05, + "loss": 0.7098, + "step": 17047 + }, + { + "epoch": 2.72, + "learning_rate": 2.2721248051580454e-05, + "loss": 0.6744, + "step": 17048 + }, + { + "epoch": 2.72, + "learning_rate": 2.2718678906007382e-05, + "loss": 0.733, + "step": 17049 + }, + { + "epoch": 2.72, + "learning_rate": 2.271610978472899e-05, + "loss": 0.6973, + "step": 17050 + }, + { + "epoch": 2.72, + "learning_rate": 2.271354068777265e-05, + "loss": 0.636, + "step": 17051 + }, + { + "epoch": 2.72, + "learning_rate": 2.2710971615165712e-05, + "loss": 0.7593, + "step": 17052 + }, + { + "epoch": 2.72, + "learning_rate": 2.2708402566935526e-05, + "loss": 0.6652, + "step": 17053 + }, + { + "epoch": 2.72, + "learning_rate": 2.270583354310947e-05, + "loss": 0.8223, + "step": 17054 + }, + { + "epoch": 2.72, + "learning_rate": 2.2703264543714896e-05, + "loss": 0.7272, + "step": 17055 + }, + { + "epoch": 2.72, + "learning_rate": 2.2700695568779155e-05, + "loss": 0.6479, + "step": 17056 + }, + { + "epoch": 2.72, + "learning_rate": 2.2698126618329616e-05, + "loss": 0.7671, + "step": 17057 + }, + { + "epoch": 2.72, + "learning_rate": 2.2695557692393628e-05, + "loss": 0.7896, + "step": 17058 + }, + { + "epoch": 2.72, + "learning_rate": 2.2692988790998548e-05, + "loss": 0.7386, + "step": 17059 + }, + { + "epoch": 2.72, + "learning_rate": 2.2690419914171744e-05, + "loss": 0.6592, + "step": 17060 + }, + { + "epoch": 2.72, + "learning_rate": 2.2687851061940564e-05, + "loss": 0.6798, + "step": 17061 + }, + { + "epoch": 2.72, + "learning_rate": 2.268528223433237e-05, + "loss": 0.6855, + "step": 17062 + }, + { + "epoch": 2.72, + "learning_rate": 2.268271343137451e-05, + "loss": 0.6466, + "step": 17063 + }, + { + "epoch": 2.72, + "learning_rate": 2.2680144653094343e-05, + "loss": 0.6981, + "step": 17064 + }, + { + "epoch": 2.72, + "learning_rate": 2.2677575899519228e-05, + "loss": 0.6773, + "step": 17065 + }, + { + "epoch": 2.72, + "learning_rate": 2.2675007170676524e-05, + "loss": 0.7467, + "step": 17066 + }, + { + "epoch": 2.72, + "learning_rate": 2.2672438466593575e-05, + "loss": 0.7292, + "step": 17067 + }, + { + "epoch": 2.72, + "learning_rate": 2.2669869787297748e-05, + "loss": 0.7336, + "step": 17068 + }, + { + "epoch": 2.72, + "learning_rate": 2.2667301132816394e-05, + "loss": 0.7175, + "step": 17069 + }, + { + "epoch": 2.72, + "learning_rate": 2.2664732503176857e-05, + "loss": 0.6856, + "step": 17070 + }, + { + "epoch": 2.72, + "learning_rate": 2.2662163898406512e-05, + "loss": 0.6886, + "step": 17071 + }, + { + "epoch": 2.72, + "learning_rate": 2.26595953185327e-05, + "loss": 0.7563, + "step": 17072 + }, + { + "epoch": 2.72, + "learning_rate": 2.265702676358277e-05, + "loss": 0.7079, + "step": 17073 + }, + { + "epoch": 2.72, + "learning_rate": 2.265445823358409e-05, + "loss": 0.634, + "step": 17074 + }, + { + "epoch": 2.72, + "learning_rate": 2.2651889728564008e-05, + "loss": 0.693, + "step": 17075 + }, + { + "epoch": 2.72, + "learning_rate": 2.264932124854987e-05, + "loss": 0.7305, + "step": 17076 + }, + { + "epoch": 2.72, + "learning_rate": 2.264675279356903e-05, + "loss": 0.6585, + "step": 17077 + }, + { + "epoch": 2.72, + "learning_rate": 2.264418436364885e-05, + "loss": 0.7228, + "step": 17078 + }, + { + "epoch": 2.72, + "learning_rate": 2.264161595881668e-05, + "loss": 0.7251, + "step": 17079 + }, + { + "epoch": 2.72, + "learning_rate": 2.2639047579099858e-05, + "loss": 0.6997, + "step": 17080 + }, + { + "epoch": 2.72, + "learning_rate": 2.2636479224525753e-05, + "loss": 0.7696, + "step": 17081 + }, + { + "epoch": 2.72, + "learning_rate": 2.2633910895121706e-05, + "loss": 0.6819, + "step": 17082 + }, + { + "epoch": 2.72, + "learning_rate": 2.263134259091508e-05, + "loss": 0.6685, + "step": 17083 + }, + { + "epoch": 2.72, + "learning_rate": 2.2628774311933206e-05, + "loss": 0.7426, + "step": 17084 + }, + { + "epoch": 2.72, + "learning_rate": 2.2626206058203452e-05, + "loss": 0.7607, + "step": 17085 + }, + { + "epoch": 2.72, + "learning_rate": 2.2623637829753163e-05, + "loss": 0.6583, + "step": 17086 + }, + { + "epoch": 2.72, + "learning_rate": 2.262106962660968e-05, + "loss": 0.7133, + "step": 17087 + }, + { + "epoch": 2.72, + "learning_rate": 2.261850144880037e-05, + "loss": 0.7102, + "step": 17088 + }, + { + "epoch": 2.72, + "learning_rate": 2.2615933296352574e-05, + "loss": 0.7523, + "step": 17089 + }, + { + "epoch": 2.72, + "learning_rate": 2.2613365169293636e-05, + "loss": 0.6684, + "step": 17090 + }, + { + "epoch": 2.72, + "learning_rate": 2.261079706765091e-05, + "loss": 0.7195, + "step": 17091 + }, + { + "epoch": 2.72, + "learning_rate": 2.2608228991451748e-05, + "loss": 0.6897, + "step": 17092 + }, + { + "epoch": 2.72, + "learning_rate": 2.2605660940723495e-05, + "loss": 0.6636, + "step": 17093 + }, + { + "epoch": 2.72, + "learning_rate": 2.2603092915493493e-05, + "loss": 0.7565, + "step": 17094 + }, + { + "epoch": 2.72, + "learning_rate": 2.2600524915789102e-05, + "loss": 0.7308, + "step": 17095 + }, + { + "epoch": 2.72, + "learning_rate": 2.2597956941637667e-05, + "loss": 0.6716, + "step": 17096 + }, + { + "epoch": 2.72, + "learning_rate": 2.259538899306652e-05, + "loss": 0.6678, + "step": 17097 + }, + { + "epoch": 2.72, + "learning_rate": 2.259282107010303e-05, + "loss": 0.6943, + "step": 17098 + }, + { + "epoch": 2.72, + "learning_rate": 2.2590253172774536e-05, + "loss": 0.6523, + "step": 17099 + }, + { + "epoch": 2.72, + "learning_rate": 2.2587685301108374e-05, + "loss": 0.7148, + "step": 17100 + }, + { + "epoch": 2.72, + "learning_rate": 2.258511745513191e-05, + "loss": 0.6586, + "step": 17101 + }, + { + "epoch": 2.72, + "learning_rate": 2.2582549634872475e-05, + "loss": 0.6279, + "step": 17102 + }, + { + "epoch": 2.72, + "learning_rate": 2.257998184035742e-05, + "loss": 0.7611, + "step": 17103 + }, + { + "epoch": 2.72, + "learning_rate": 2.2577414071614088e-05, + "loss": 0.6657, + "step": 17104 + }, + { + "epoch": 2.73, + "learning_rate": 2.2574846328669822e-05, + "loss": 0.7114, + "step": 17105 + }, + { + "epoch": 2.73, + "learning_rate": 2.257227861155197e-05, + "loss": 0.6355, + "step": 17106 + }, + { + "epoch": 2.73, + "learning_rate": 2.2569710920287883e-05, + "loss": 0.6647, + "step": 17107 + }, + { + "epoch": 2.73, + "learning_rate": 2.256714325490489e-05, + "loss": 0.663, + "step": 17108 + }, + { + "epoch": 2.73, + "learning_rate": 2.2564575615430354e-05, + "loss": 0.6396, + "step": 17109 + }, + { + "epoch": 2.73, + "learning_rate": 2.2562008001891605e-05, + "loss": 0.752, + "step": 17110 + }, + { + "epoch": 2.73, + "learning_rate": 2.2559440414315984e-05, + "loss": 0.6629, + "step": 17111 + }, + { + "epoch": 2.73, + "learning_rate": 2.255687285273085e-05, + "loss": 0.679, + "step": 17112 + }, + { + "epoch": 2.73, + "learning_rate": 2.2554305317163534e-05, + "loss": 0.7451, + "step": 17113 + }, + { + "epoch": 2.73, + "learning_rate": 2.255173780764138e-05, + "loss": 0.6501, + "step": 17114 + }, + { + "epoch": 2.73, + "learning_rate": 2.2549170324191736e-05, + "loss": 0.6512, + "step": 17115 + }, + { + "epoch": 2.73, + "learning_rate": 2.254660286684194e-05, + "loss": 0.7788, + "step": 17116 + }, + { + "epoch": 2.73, + "learning_rate": 2.2544035435619323e-05, + "loss": 0.6794, + "step": 17117 + }, + { + "epoch": 2.73, + "learning_rate": 2.254146803055125e-05, + "loss": 0.71, + "step": 17118 + }, + { + "epoch": 2.73, + "learning_rate": 2.2538900651665047e-05, + "loss": 0.665, + "step": 17119 + }, + { + "epoch": 2.73, + "learning_rate": 2.253633329898805e-05, + "loss": 0.706, + "step": 17120 + }, + { + "epoch": 2.73, + "learning_rate": 2.253376597254762e-05, + "loss": 0.7625, + "step": 17121 + }, + { + "epoch": 2.73, + "learning_rate": 2.2531198672371077e-05, + "loss": 0.6558, + "step": 17122 + }, + { + "epoch": 2.73, + "learning_rate": 2.252863139848578e-05, + "loss": 0.7905, + "step": 17123 + }, + { + "epoch": 2.73, + "learning_rate": 2.252606415091905e-05, + "loss": 0.6674, + "step": 17124 + }, + { + "epoch": 2.73, + "learning_rate": 2.2523496929698234e-05, + "loss": 0.7534, + "step": 17125 + }, + { + "epoch": 2.73, + "learning_rate": 2.2520929734850673e-05, + "loss": 0.6934, + "step": 17126 + }, + { + "epoch": 2.73, + "learning_rate": 2.251836256640371e-05, + "loss": 0.7705, + "step": 17127 + }, + { + "epoch": 2.73, + "learning_rate": 2.2515795424384667e-05, + "loss": 0.6217, + "step": 17128 + }, + { + "epoch": 2.73, + "learning_rate": 2.2513228308820904e-05, + "loss": 0.7421, + "step": 17129 + }, + { + "epoch": 2.73, + "learning_rate": 2.251066121973975e-05, + "loss": 0.7273, + "step": 17130 + }, + { + "epoch": 2.73, + "learning_rate": 2.2508094157168535e-05, + "loss": 0.6607, + "step": 17131 + }, + { + "epoch": 2.73, + "learning_rate": 2.2505527121134613e-05, + "loss": 0.6764, + "step": 17132 + }, + { + "epoch": 2.73, + "learning_rate": 2.250296011166531e-05, + "loss": 0.7926, + "step": 17133 + }, + { + "epoch": 2.73, + "learning_rate": 2.250039312878796e-05, + "loss": 0.7704, + "step": 17134 + }, + { + "epoch": 2.73, + "learning_rate": 2.2497826172529916e-05, + "loss": 0.7063, + "step": 17135 + }, + { + "epoch": 2.73, + "learning_rate": 2.2495259242918498e-05, + "loss": 0.7146, + "step": 17136 + }, + { + "epoch": 2.73, + "learning_rate": 2.2492692339981045e-05, + "loss": 0.6645, + "step": 17137 + }, + { + "epoch": 2.73, + "learning_rate": 2.2490125463744906e-05, + "loss": 0.7547, + "step": 17138 + }, + { + "epoch": 2.73, + "learning_rate": 2.24875586142374e-05, + "loss": 0.6451, + "step": 17139 + }, + { + "epoch": 2.73, + "learning_rate": 2.248499179148587e-05, + "loss": 0.7376, + "step": 17140 + }, + { + "epoch": 2.73, + "learning_rate": 2.248242499551765e-05, + "loss": 0.6587, + "step": 17141 + }, + { + "epoch": 2.73, + "learning_rate": 2.247985822636008e-05, + "loss": 0.688, + "step": 17142 + }, + { + "epoch": 2.73, + "learning_rate": 2.2477291484040496e-05, + "loss": 0.6782, + "step": 17143 + }, + { + "epoch": 2.73, + "learning_rate": 2.2474724768586216e-05, + "loss": 0.7177, + "step": 17144 + }, + { + "epoch": 2.73, + "learning_rate": 2.247215808002458e-05, + "loss": 0.7097, + "step": 17145 + }, + { + "epoch": 2.73, + "learning_rate": 2.2469591418382933e-05, + "loss": 0.6829, + "step": 17146 + }, + { + "epoch": 2.73, + "learning_rate": 2.24670247836886e-05, + "loss": 0.732, + "step": 17147 + }, + { + "epoch": 2.73, + "learning_rate": 2.246445817596891e-05, + "loss": 0.7455, + "step": 17148 + }, + { + "epoch": 2.73, + "learning_rate": 2.246189159525121e-05, + "loss": 0.6932, + "step": 17149 + }, + { + "epoch": 2.73, + "learning_rate": 2.2459325041562818e-05, + "loss": 0.6051, + "step": 17150 + }, + { + "epoch": 2.73, + "learning_rate": 2.245675851493107e-05, + "loss": 0.6795, + "step": 17151 + }, + { + "epoch": 2.73, + "learning_rate": 2.2454192015383305e-05, + "loss": 0.6708, + "step": 17152 + }, + { + "epoch": 2.73, + "learning_rate": 2.2451625542946847e-05, + "loss": 0.6221, + "step": 17153 + }, + { + "epoch": 2.73, + "learning_rate": 2.2449059097649024e-05, + "loss": 0.7459, + "step": 17154 + }, + { + "epoch": 2.73, + "learning_rate": 2.244649267951718e-05, + "loss": 0.6241, + "step": 17155 + }, + { + "epoch": 2.73, + "learning_rate": 2.244392628857864e-05, + "loss": 0.6681, + "step": 17156 + }, + { + "epoch": 2.73, + "learning_rate": 2.2441359924860726e-05, + "loss": 0.6409, + "step": 17157 + }, + { + "epoch": 2.73, + "learning_rate": 2.2438793588390783e-05, + "loss": 0.7137, + "step": 17158 + }, + { + "epoch": 2.73, + "learning_rate": 2.243622727919613e-05, + "loss": 0.6663, + "step": 17159 + }, + { + "epoch": 2.73, + "learning_rate": 2.24336609973041e-05, + "loss": 0.6877, + "step": 17160 + }, + { + "epoch": 2.73, + "learning_rate": 2.2431094742742022e-05, + "loss": 0.717, + "step": 17161 + }, + { + "epoch": 2.73, + "learning_rate": 2.242852851553723e-05, + "loss": 0.6454, + "step": 17162 + }, + { + "epoch": 2.73, + "learning_rate": 2.2425962315717048e-05, + "loss": 0.6369, + "step": 17163 + }, + { + "epoch": 2.73, + "learning_rate": 2.24233961433088e-05, + "loss": 0.6672, + "step": 17164 + }, + { + "epoch": 2.73, + "learning_rate": 2.2420829998339818e-05, + "loss": 0.7149, + "step": 17165 + }, + { + "epoch": 2.73, + "learning_rate": 2.2418263880837433e-05, + "loss": 0.6889, + "step": 17166 + }, + { + "epoch": 2.73, + "learning_rate": 2.2415697790828972e-05, + "loss": 0.6529, + "step": 17167 + }, + { + "epoch": 2.74, + "learning_rate": 2.2413131728341756e-05, + "loss": 0.6994, + "step": 17168 + }, + { + "epoch": 2.74, + "learning_rate": 2.241056569340312e-05, + "loss": 0.6904, + "step": 17169 + }, + { + "epoch": 2.74, + "learning_rate": 2.2407999686040383e-05, + "loss": 0.7057, + "step": 17170 + }, + { + "epoch": 2.74, + "learning_rate": 2.2405433706280877e-05, + "loss": 0.7467, + "step": 17171 + }, + { + "epoch": 2.74, + "learning_rate": 2.240286775415193e-05, + "loss": 0.7832, + "step": 17172 + }, + { + "epoch": 2.74, + "learning_rate": 2.2400301829680863e-05, + "loss": 0.673, + "step": 17173 + }, + { + "epoch": 2.74, + "learning_rate": 2.2397735932895e-05, + "loss": 0.7513, + "step": 17174 + }, + { + "epoch": 2.74, + "learning_rate": 2.2395170063821676e-05, + "loss": 0.7308, + "step": 17175 + }, + { + "epoch": 2.74, + "learning_rate": 2.2392604222488207e-05, + "loss": 0.6965, + "step": 17176 + }, + { + "epoch": 2.74, + "learning_rate": 2.2390038408921914e-05, + "loss": 0.7048, + "step": 17177 + }, + { + "epoch": 2.74, + "learning_rate": 2.2387472623150134e-05, + "loss": 0.7421, + "step": 17178 + }, + { + "epoch": 2.74, + "learning_rate": 2.2384906865200184e-05, + "loss": 0.7017, + "step": 17179 + }, + { + "epoch": 2.74, + "learning_rate": 2.2382341135099383e-05, + "loss": 0.6808, + "step": 17180 + }, + { + "epoch": 2.74, + "learning_rate": 2.2379775432875067e-05, + "loss": 0.7803, + "step": 17181 + }, + { + "epoch": 2.74, + "learning_rate": 2.2377209758554553e-05, + "loss": 0.7195, + "step": 17182 + }, + { + "epoch": 2.74, + "learning_rate": 2.2374644112165165e-05, + "loss": 0.7248, + "step": 17183 + }, + { + "epoch": 2.74, + "learning_rate": 2.2372078493734216e-05, + "loss": 0.6458, + "step": 17184 + }, + { + "epoch": 2.74, + "learning_rate": 2.2369512903289033e-05, + "loss": 0.6785, + "step": 17185 + }, + { + "epoch": 2.74, + "learning_rate": 2.2366947340856944e-05, + "loss": 0.7347, + "step": 17186 + }, + { + "epoch": 2.74, + "learning_rate": 2.236438180646527e-05, + "loss": 0.7129, + "step": 17187 + }, + { + "epoch": 2.74, + "learning_rate": 2.236181630014132e-05, + "loss": 0.7273, + "step": 17188 + }, + { + "epoch": 2.74, + "learning_rate": 2.2359250821912438e-05, + "loss": 0.7169, + "step": 17189 + }, + { + "epoch": 2.74, + "learning_rate": 2.2356685371805923e-05, + "loss": 0.7732, + "step": 17190 + }, + { + "epoch": 2.74, + "learning_rate": 2.23541199498491e-05, + "loss": 0.7519, + "step": 17191 + }, + { + "epoch": 2.74, + "learning_rate": 2.2351554556069305e-05, + "loss": 0.6223, + "step": 17192 + }, + { + "epoch": 2.74, + "learning_rate": 2.234898919049384e-05, + "loss": 0.6637, + "step": 17193 + }, + { + "epoch": 2.74, + "learning_rate": 2.234642385315003e-05, + "loss": 0.6619, + "step": 17194 + }, + { + "epoch": 2.74, + "learning_rate": 2.2343858544065193e-05, + "loss": 0.721, + "step": 17195 + }, + { + "epoch": 2.74, + "learning_rate": 2.2341293263266656e-05, + "loss": 0.6871, + "step": 17196 + }, + { + "epoch": 2.74, + "learning_rate": 2.233872801078173e-05, + "loss": 0.7669, + "step": 17197 + }, + { + "epoch": 2.74, + "learning_rate": 2.233616278663773e-05, + "loss": 0.7669, + "step": 17198 + }, + { + "epoch": 2.74, + "learning_rate": 2.2333597590861982e-05, + "loss": 0.7349, + "step": 17199 + }, + { + "epoch": 2.74, + "learning_rate": 2.2331032423481804e-05, + "loss": 0.6799, + "step": 17200 + }, + { + "epoch": 2.74, + "learning_rate": 2.2328467284524504e-05, + "loss": 0.6158, + "step": 17201 + }, + { + "epoch": 2.74, + "learning_rate": 2.232590217401741e-05, + "loss": 0.7324, + "step": 17202 + }, + { + "epoch": 2.74, + "learning_rate": 2.2323337091987837e-05, + "loss": 0.6549, + "step": 17203 + }, + { + "epoch": 2.74, + "learning_rate": 2.23207720384631e-05, + "loss": 0.7221, + "step": 17204 + }, + { + "epoch": 2.74, + "learning_rate": 2.231820701347051e-05, + "loss": 0.7758, + "step": 17205 + }, + { + "epoch": 2.74, + "learning_rate": 2.2315642017037385e-05, + "loss": 0.6168, + "step": 17206 + }, + { + "epoch": 2.74, + "learning_rate": 2.2313077049191047e-05, + "loss": 0.7329, + "step": 17207 + }, + { + "epoch": 2.74, + "learning_rate": 2.2310512109958807e-05, + "loss": 0.6579, + "step": 17208 + }, + { + "epoch": 2.74, + "learning_rate": 2.2307947199367973e-05, + "loss": 0.6523, + "step": 17209 + }, + { + "epoch": 2.74, + "learning_rate": 2.230538231744587e-05, + "loss": 0.8002, + "step": 17210 + }, + { + "epoch": 2.74, + "learning_rate": 2.2302817464219818e-05, + "loss": 0.6593, + "step": 17211 + }, + { + "epoch": 2.74, + "learning_rate": 2.230025263971711e-05, + "loss": 0.733, + "step": 17212 + }, + { + "epoch": 2.74, + "learning_rate": 2.2297687843965078e-05, + "loss": 0.6985, + "step": 17213 + }, + { + "epoch": 2.74, + "learning_rate": 2.229512307699103e-05, + "loss": 0.7141, + "step": 17214 + }, + { + "epoch": 2.74, + "learning_rate": 2.229255833882228e-05, + "loss": 0.6705, + "step": 17215 + }, + { + "epoch": 2.74, + "learning_rate": 2.2289993629486137e-05, + "loss": 0.6672, + "step": 17216 + }, + { + "epoch": 2.74, + "learning_rate": 2.228742894900992e-05, + "loss": 0.6769, + "step": 17217 + }, + { + "epoch": 2.74, + "learning_rate": 2.228486429742093e-05, + "loss": 0.727, + "step": 17218 + }, + { + "epoch": 2.74, + "learning_rate": 2.22822996747465e-05, + "loss": 0.6331, + "step": 17219 + }, + { + "epoch": 2.74, + "learning_rate": 2.2279735081013923e-05, + "loss": 0.7035, + "step": 17220 + }, + { + "epoch": 2.74, + "learning_rate": 2.227717051625051e-05, + "loss": 0.7095, + "step": 17221 + }, + { + "epoch": 2.74, + "learning_rate": 2.2274605980483583e-05, + "loss": 0.6867, + "step": 17222 + }, + { + "epoch": 2.74, + "learning_rate": 2.227204147374045e-05, + "loss": 0.6364, + "step": 17223 + }, + { + "epoch": 2.74, + "learning_rate": 2.2269476996048423e-05, + "loss": 0.6481, + "step": 17224 + }, + { + "epoch": 2.74, + "learning_rate": 2.2266912547434803e-05, + "loss": 0.7083, + "step": 17225 + }, + { + "epoch": 2.74, + "learning_rate": 2.22643481279269e-05, + "loss": 0.7036, + "step": 17226 + }, + { + "epoch": 2.74, + "learning_rate": 2.2261783737552032e-05, + "loss": 0.7236, + "step": 17227 + }, + { + "epoch": 2.74, + "learning_rate": 2.2259219376337508e-05, + "loss": 0.6816, + "step": 17228 + }, + { + "epoch": 2.74, + "learning_rate": 2.2256655044310625e-05, + "loss": 0.6774, + "step": 17229 + }, + { + "epoch": 2.74, + "learning_rate": 2.225409074149871e-05, + "loss": 0.7293, + "step": 17230 + }, + { + "epoch": 2.75, + "learning_rate": 2.2251526467929057e-05, + "loss": 0.7143, + "step": 17231 + }, + { + "epoch": 2.75, + "learning_rate": 2.2248962223628976e-05, + "loss": 0.6237, + "step": 17232 + }, + { + "epoch": 2.75, + "learning_rate": 2.2246398008625783e-05, + "loss": 0.6458, + "step": 17233 + }, + { + "epoch": 2.75, + "learning_rate": 2.224383382294678e-05, + "loss": 0.6479, + "step": 17234 + }, + { + "epoch": 2.75, + "learning_rate": 2.2241269666619265e-05, + "loss": 0.7165, + "step": 17235 + }, + { + "epoch": 2.75, + "learning_rate": 2.2238705539670557e-05, + "loss": 0.6655, + "step": 17236 + }, + { + "epoch": 2.75, + "learning_rate": 2.223614144212796e-05, + "loss": 0.6916, + "step": 17237 + }, + { + "epoch": 2.75, + "learning_rate": 2.2233577374018776e-05, + "loss": 0.6194, + "step": 17238 + }, + { + "epoch": 2.75, + "learning_rate": 2.223101333537032e-05, + "loss": 0.7306, + "step": 17239 + }, + { + "epoch": 2.75, + "learning_rate": 2.2228449326209884e-05, + "loss": 0.625, + "step": 17240 + }, + { + "epoch": 2.75, + "learning_rate": 2.222588534656478e-05, + "loss": 0.6845, + "step": 17241 + }, + { + "epoch": 2.75, + "learning_rate": 2.2223321396462318e-05, + "loss": 0.7205, + "step": 17242 + }, + { + "epoch": 2.75, + "learning_rate": 2.2220757475929796e-05, + "loss": 0.7144, + "step": 17243 + }, + { + "epoch": 2.75, + "learning_rate": 2.2218193584994527e-05, + "loss": 0.6405, + "step": 17244 + }, + { + "epoch": 2.75, + "learning_rate": 2.22156297236838e-05, + "loss": 0.6793, + "step": 17245 + }, + { + "epoch": 2.75, + "learning_rate": 2.2213065892024922e-05, + "loss": 0.6726, + "step": 17246 + }, + { + "epoch": 2.75, + "learning_rate": 2.2210502090045202e-05, + "loss": 0.5855, + "step": 17247 + }, + { + "epoch": 2.75, + "learning_rate": 2.2207938317771944e-05, + "loss": 0.7253, + "step": 17248 + }, + { + "epoch": 2.75, + "learning_rate": 2.2205374575232443e-05, + "loss": 0.7173, + "step": 17249 + }, + { + "epoch": 2.75, + "learning_rate": 2.220281086245401e-05, + "loss": 0.7462, + "step": 17250 + }, + { + "epoch": 2.75, + "learning_rate": 2.2200247179463947e-05, + "loss": 0.6872, + "step": 17251 + }, + { + "epoch": 2.75, + "learning_rate": 2.2197683526289542e-05, + "loss": 0.6313, + "step": 17252 + }, + { + "epoch": 2.75, + "learning_rate": 2.219511990295811e-05, + "loss": 0.6786, + "step": 17253 + }, + { + "epoch": 2.75, + "learning_rate": 2.2192556309496953e-05, + "loss": 0.7126, + "step": 17254 + }, + { + "epoch": 2.75, + "learning_rate": 2.2189992745933362e-05, + "loss": 0.5937, + "step": 17255 + }, + { + "epoch": 2.75, + "learning_rate": 2.2187429212294642e-05, + "loss": 0.7021, + "step": 17256 + }, + { + "epoch": 2.75, + "learning_rate": 2.2184865708608095e-05, + "loss": 0.7462, + "step": 17257 + }, + { + "epoch": 2.75, + "learning_rate": 2.2182302234901018e-05, + "loss": 0.6692, + "step": 17258 + }, + { + "epoch": 2.75, + "learning_rate": 2.2179738791200714e-05, + "loss": 0.7476, + "step": 17259 + }, + { + "epoch": 2.75, + "learning_rate": 2.217717537753448e-05, + "loss": 0.7282, + "step": 17260 + }, + { + "epoch": 2.75, + "learning_rate": 2.217461199392961e-05, + "loss": 0.7426, + "step": 17261 + }, + { + "epoch": 2.75, + "learning_rate": 2.217204864041341e-05, + "loss": 0.6814, + "step": 17262 + }, + { + "epoch": 2.75, + "learning_rate": 2.2169485317013174e-05, + "loss": 0.7063, + "step": 17263 + }, + { + "epoch": 2.75, + "learning_rate": 2.2166922023756208e-05, + "loss": 0.6211, + "step": 17264 + }, + { + "epoch": 2.75, + "learning_rate": 2.2164358760669803e-05, + "loss": 0.6696, + "step": 17265 + }, + { + "epoch": 2.75, + "learning_rate": 2.2161795527781244e-05, + "loss": 0.6516, + "step": 17266 + }, + { + "epoch": 2.75, + "learning_rate": 2.2159232325117842e-05, + "loss": 0.7112, + "step": 17267 + }, + { + "epoch": 2.75, + "learning_rate": 2.2156669152706897e-05, + "loss": 0.7168, + "step": 17268 + }, + { + "epoch": 2.75, + "learning_rate": 2.215410601057569e-05, + "loss": 0.6589, + "step": 17269 + }, + { + "epoch": 2.75, + "learning_rate": 2.2151542898751533e-05, + "loss": 0.8322, + "step": 17270 + }, + { + "epoch": 2.75, + "learning_rate": 2.2148979817261718e-05, + "loss": 0.6854, + "step": 17271 + }, + { + "epoch": 2.75, + "learning_rate": 2.2146416766133527e-05, + "loss": 0.6878, + "step": 17272 + }, + { + "epoch": 2.75, + "learning_rate": 2.214385374539427e-05, + "loss": 0.7577, + "step": 17273 + }, + { + "epoch": 2.75, + "learning_rate": 2.214129075507124e-05, + "loss": 0.665, + "step": 17274 + }, + { + "epoch": 2.75, + "learning_rate": 2.213872779519172e-05, + "loss": 0.7762, + "step": 17275 + }, + { + "epoch": 2.75, + "learning_rate": 2.2136164865783023e-05, + "loss": 0.6938, + "step": 17276 + }, + { + "epoch": 2.75, + "learning_rate": 2.2133601966872426e-05, + "loss": 0.7047, + "step": 17277 + }, + { + "epoch": 2.75, + "learning_rate": 2.2131039098487223e-05, + "loss": 0.7605, + "step": 17278 + }, + { + "epoch": 2.75, + "learning_rate": 2.212847626065472e-05, + "loss": 0.7214, + "step": 17279 + }, + { + "epoch": 2.75, + "learning_rate": 2.2125913453402195e-05, + "loss": 0.8167, + "step": 17280 + }, + { + "epoch": 2.75, + "learning_rate": 2.2123350676756945e-05, + "loss": 0.6508, + "step": 17281 + }, + { + "epoch": 2.75, + "learning_rate": 2.212078793074627e-05, + "loss": 0.699, + "step": 17282 + }, + { + "epoch": 2.75, + "learning_rate": 2.2118225215397456e-05, + "loss": 0.7309, + "step": 17283 + }, + { + "epoch": 2.75, + "learning_rate": 2.21156625307378e-05, + "loss": 0.6582, + "step": 17284 + }, + { + "epoch": 2.75, + "learning_rate": 2.211309987679458e-05, + "loss": 0.7091, + "step": 17285 + }, + { + "epoch": 2.75, + "learning_rate": 2.2110537253595087e-05, + "loss": 0.6987, + "step": 17286 + }, + { + "epoch": 2.75, + "learning_rate": 2.2107974661166625e-05, + "loss": 0.8291, + "step": 17287 + }, + { + "epoch": 2.75, + "learning_rate": 2.2105412099536478e-05, + "loss": 0.6761, + "step": 17288 + }, + { + "epoch": 2.75, + "learning_rate": 2.2102849568731926e-05, + "loss": 0.6817, + "step": 17289 + }, + { + "epoch": 2.75, + "learning_rate": 2.2100287068780275e-05, + "loss": 0.6693, + "step": 17290 + }, + { + "epoch": 2.75, + "learning_rate": 2.209772459970881e-05, + "loss": 0.6951, + "step": 17291 + }, + { + "epoch": 2.75, + "learning_rate": 2.2095162161544803e-05, + "loss": 0.6326, + "step": 17292 + }, + { + "epoch": 2.75, + "learning_rate": 2.2092599754315565e-05, + "loss": 0.6729, + "step": 17293 + }, + { + "epoch": 2.76, + "learning_rate": 2.2090037378048373e-05, + "loss": 0.6817, + "step": 17294 + }, + { + "epoch": 2.76, + "learning_rate": 2.2087475032770512e-05, + "loss": 0.6343, + "step": 17295 + }, + { + "epoch": 2.76, + "learning_rate": 2.208491271850928e-05, + "loss": 0.6835, + "step": 17296 + }, + { + "epoch": 2.76, + "learning_rate": 2.2082350435291957e-05, + "loss": 0.6807, + "step": 17297 + }, + { + "epoch": 2.76, + "learning_rate": 2.2079788183145825e-05, + "loss": 0.6469, + "step": 17298 + }, + { + "epoch": 2.76, + "learning_rate": 2.207722596209818e-05, + "loss": 0.6289, + "step": 17299 + }, + { + "epoch": 2.76, + "learning_rate": 2.2074663772176305e-05, + "loss": 0.7255, + "step": 17300 + }, + { + "epoch": 2.76, + "learning_rate": 2.2072101613407485e-05, + "loss": 0.6893, + "step": 17301 + }, + { + "epoch": 2.76, + "learning_rate": 2.2069539485819e-05, + "loss": 0.6649, + "step": 17302 + }, + { + "epoch": 2.76, + "learning_rate": 2.2066977389438144e-05, + "loss": 0.6436, + "step": 17303 + }, + { + "epoch": 2.76, + "learning_rate": 2.20644153242922e-05, + "loss": 0.7235, + "step": 17304 + }, + { + "epoch": 2.76, + "learning_rate": 2.206185329040846e-05, + "loss": 0.6349, + "step": 17305 + }, + { + "epoch": 2.76, + "learning_rate": 2.205929128781418e-05, + "loss": 0.6717, + "step": 17306 + }, + { + "epoch": 2.76, + "learning_rate": 2.2056729316536674e-05, + "loss": 0.6606, + "step": 17307 + }, + { + "epoch": 2.76, + "learning_rate": 2.2054167376603208e-05, + "loss": 0.6556, + "step": 17308 + }, + { + "epoch": 2.76, + "learning_rate": 2.205160546804107e-05, + "loss": 0.6725, + "step": 17309 + }, + { + "epoch": 2.76, + "learning_rate": 2.2049043590877546e-05, + "loss": 0.6318, + "step": 17310 + }, + { + "epoch": 2.76, + "learning_rate": 2.204648174513992e-05, + "loss": 0.6438, + "step": 17311 + }, + { + "epoch": 2.76, + "learning_rate": 2.2043919930855463e-05, + "loss": 0.7122, + "step": 17312 + }, + { + "epoch": 2.76, + "learning_rate": 2.204135814805147e-05, + "loss": 0.6579, + "step": 17313 + }, + { + "epoch": 2.76, + "learning_rate": 2.2038796396755215e-05, + "loss": 0.7075, + "step": 17314 + }, + { + "epoch": 2.76, + "learning_rate": 2.2036234676993984e-05, + "loss": 0.6602, + "step": 17315 + }, + { + "epoch": 2.76, + "learning_rate": 2.2033672988795047e-05, + "loss": 0.6528, + "step": 17316 + }, + { + "epoch": 2.76, + "learning_rate": 2.2031111332185695e-05, + "loss": 0.6554, + "step": 17317 + }, + { + "epoch": 2.76, + "learning_rate": 2.2028549707193207e-05, + "loss": 0.6672, + "step": 17318 + }, + { + "epoch": 2.76, + "learning_rate": 2.2025988113844853e-05, + "loss": 0.6505, + "step": 17319 + }, + { + "epoch": 2.76, + "learning_rate": 2.2023426552167928e-05, + "loss": 0.6678, + "step": 17320 + }, + { + "epoch": 2.76, + "learning_rate": 2.20208650221897e-05, + "loss": 0.7077, + "step": 17321 + }, + { + "epoch": 2.76, + "learning_rate": 2.201830352393745e-05, + "loss": 0.6477, + "step": 17322 + }, + { + "epoch": 2.76, + "learning_rate": 2.201574205743846e-05, + "loss": 0.6846, + "step": 17323 + }, + { + "epoch": 2.76, + "learning_rate": 2.2013180622720005e-05, + "loss": 0.6736, + "step": 17324 + }, + { + "epoch": 2.76, + "learning_rate": 2.201061921980937e-05, + "loss": 0.7205, + "step": 17325 + }, + { + "epoch": 2.76, + "learning_rate": 2.2008057848733812e-05, + "loss": 0.7376, + "step": 17326 + }, + { + "epoch": 2.76, + "learning_rate": 2.200549650952063e-05, + "loss": 0.6443, + "step": 17327 + }, + { + "epoch": 2.76, + "learning_rate": 2.200293520219709e-05, + "loss": 0.6656, + "step": 17328 + }, + { + "epoch": 2.76, + "learning_rate": 2.200037392679047e-05, + "loss": 0.7729, + "step": 17329 + }, + { + "epoch": 2.76, + "learning_rate": 2.1997812683328042e-05, + "loss": 0.6191, + "step": 17330 + }, + { + "epoch": 2.76, + "learning_rate": 2.1995251471837093e-05, + "loss": 0.727, + "step": 17331 + }, + { + "epoch": 2.76, + "learning_rate": 2.1992690292344893e-05, + "loss": 0.682, + "step": 17332 + }, + { + "epoch": 2.76, + "learning_rate": 2.1990129144878708e-05, + "loss": 0.691, + "step": 17333 + }, + { + "epoch": 2.76, + "learning_rate": 2.1987568029465825e-05, + "loss": 0.7275, + "step": 17334 + }, + { + "epoch": 2.76, + "learning_rate": 2.1985006946133512e-05, + "loss": 0.7414, + "step": 17335 + }, + { + "epoch": 2.76, + "learning_rate": 2.1982445894909043e-05, + "loss": 0.7477, + "step": 17336 + }, + { + "epoch": 2.76, + "learning_rate": 2.1979884875819696e-05, + "loss": 0.7379, + "step": 17337 + }, + { + "epoch": 2.76, + "learning_rate": 2.1977323888892743e-05, + "loss": 0.7187, + "step": 17338 + }, + { + "epoch": 2.76, + "learning_rate": 2.197476293415545e-05, + "loss": 0.7071, + "step": 17339 + }, + { + "epoch": 2.76, + "learning_rate": 2.1972202011635097e-05, + "loss": 0.7009, + "step": 17340 + }, + { + "epoch": 2.76, + "learning_rate": 2.1969641121358957e-05, + "loss": 0.6355, + "step": 17341 + }, + { + "epoch": 2.76, + "learning_rate": 2.1967080263354294e-05, + "loss": 0.7982, + "step": 17342 + }, + { + "epoch": 2.76, + "learning_rate": 2.1964519437648393e-05, + "loss": 0.8083, + "step": 17343 + }, + { + "epoch": 2.76, + "learning_rate": 2.1961958644268516e-05, + "loss": 0.6469, + "step": 17344 + }, + { + "epoch": 2.76, + "learning_rate": 2.1959397883241936e-05, + "loss": 0.8425, + "step": 17345 + }, + { + "epoch": 2.76, + "learning_rate": 2.195683715459592e-05, + "loss": 0.6749, + "step": 17346 + }, + { + "epoch": 2.76, + "learning_rate": 2.1954276458357736e-05, + "loss": 0.6384, + "step": 17347 + }, + { + "epoch": 2.76, + "learning_rate": 2.1951715794554665e-05, + "loss": 0.7021, + "step": 17348 + }, + { + "epoch": 2.76, + "learning_rate": 2.194915516321397e-05, + "loss": 0.6869, + "step": 17349 + }, + { + "epoch": 2.76, + "learning_rate": 2.194659456436291e-05, + "loss": 0.7724, + "step": 17350 + }, + { + "epoch": 2.76, + "learning_rate": 2.1944033998028773e-05, + "loss": 0.7878, + "step": 17351 + }, + { + "epoch": 2.76, + "learning_rate": 2.1941473464238822e-05, + "loss": 0.6237, + "step": 17352 + }, + { + "epoch": 2.76, + "learning_rate": 2.193891296302031e-05, + "loss": 0.7399, + "step": 17353 + }, + { + "epoch": 2.76, + "learning_rate": 2.1936352494400525e-05, + "loss": 0.7687, + "step": 17354 + }, + { + "epoch": 2.76, + "learning_rate": 2.1933792058406728e-05, + "loss": 0.7572, + "step": 17355 + }, + { + "epoch": 2.77, + "learning_rate": 2.1931231655066176e-05, + "loss": 0.6758, + "step": 17356 + }, + { + "epoch": 2.77, + "learning_rate": 2.1928671284406148e-05, + "loss": 0.669, + "step": 17357 + }, + { + "epoch": 2.77, + "learning_rate": 2.1926110946453908e-05, + "loss": 0.6812, + "step": 17358 + }, + { + "epoch": 2.77, + "learning_rate": 2.1923550641236716e-05, + "loss": 0.7839, + "step": 17359 + }, + { + "epoch": 2.77, + "learning_rate": 2.1920990368781847e-05, + "loss": 0.682, + "step": 17360 + }, + { + "epoch": 2.77, + "learning_rate": 2.1918430129116558e-05, + "loss": 0.6699, + "step": 17361 + }, + { + "epoch": 2.77, + "learning_rate": 2.1915869922268114e-05, + "loss": 0.7419, + "step": 17362 + }, + { + "epoch": 2.77, + "learning_rate": 2.191330974826379e-05, + "loss": 0.6692, + "step": 17363 + }, + { + "epoch": 2.77, + "learning_rate": 2.191074960713084e-05, + "loss": 0.6733, + "step": 17364 + }, + { + "epoch": 2.77, + "learning_rate": 2.190818949889654e-05, + "loss": 0.7021, + "step": 17365 + }, + { + "epoch": 2.77, + "learning_rate": 2.1905629423588138e-05, + "loss": 0.625, + "step": 17366 + }, + { + "epoch": 2.77, + "learning_rate": 2.1903069381232896e-05, + "loss": 0.6676, + "step": 17367 + }, + { + "epoch": 2.77, + "learning_rate": 2.1900509371858092e-05, + "loss": 0.6672, + "step": 17368 + }, + { + "epoch": 2.77, + "learning_rate": 2.1897949395490982e-05, + "loss": 0.6517, + "step": 17369 + }, + { + "epoch": 2.77, + "learning_rate": 2.1895389452158822e-05, + "loss": 0.6979, + "step": 17370 + }, + { + "epoch": 2.77, + "learning_rate": 2.1892829541888885e-05, + "loss": 0.6925, + "step": 17371 + }, + { + "epoch": 2.77, + "learning_rate": 2.1890269664708426e-05, + "loss": 0.6772, + "step": 17372 + }, + { + "epoch": 2.77, + "learning_rate": 2.18877098206447e-05, + "loss": 0.618, + "step": 17373 + }, + { + "epoch": 2.77, + "learning_rate": 2.1885150009724987e-05, + "loss": 0.7712, + "step": 17374 + }, + { + "epoch": 2.77, + "learning_rate": 2.188259023197653e-05, + "loss": 0.7273, + "step": 17375 + }, + { + "epoch": 2.77, + "learning_rate": 2.1880030487426593e-05, + "loss": 0.6222, + "step": 17376 + }, + { + "epoch": 2.77, + "learning_rate": 2.187747077610244e-05, + "loss": 0.7095, + "step": 17377 + }, + { + "epoch": 2.77, + "learning_rate": 2.1874911098031324e-05, + "loss": 0.754, + "step": 17378 + }, + { + "epoch": 2.77, + "learning_rate": 2.1872351453240507e-05, + "loss": 0.7066, + "step": 17379 + }, + { + "epoch": 2.77, + "learning_rate": 2.186979184175725e-05, + "loss": 0.6394, + "step": 17380 + }, + { + "epoch": 2.77, + "learning_rate": 2.186723226360881e-05, + "loss": 0.6888, + "step": 17381 + }, + { + "epoch": 2.77, + "learning_rate": 2.1864672718822443e-05, + "loss": 0.6875, + "step": 17382 + }, + { + "epoch": 2.77, + "learning_rate": 2.186211320742541e-05, + "loss": 0.7515, + "step": 17383 + }, + { + "epoch": 2.77, + "learning_rate": 2.185955372944497e-05, + "loss": 0.645, + "step": 17384 + }, + { + "epoch": 2.77, + "learning_rate": 2.1856994284908376e-05, + "loss": 0.6992, + "step": 17385 + }, + { + "epoch": 2.77, + "learning_rate": 2.1854434873842883e-05, + "loss": 0.6695, + "step": 17386 + }, + { + "epoch": 2.77, + "learning_rate": 2.185187549627574e-05, + "loss": 0.6684, + "step": 17387 + }, + { + "epoch": 2.77, + "learning_rate": 2.184931615223422e-05, + "loss": 0.7012, + "step": 17388 + }, + { + "epoch": 2.77, + "learning_rate": 2.1846756841745573e-05, + "loss": 0.6082, + "step": 17389 + }, + { + "epoch": 2.77, + "learning_rate": 2.1844197564837045e-05, + "loss": 0.7424, + "step": 17390 + }, + { + "epoch": 2.77, + "learning_rate": 2.1841638321535897e-05, + "loss": 0.7496, + "step": 17391 + }, + { + "epoch": 2.77, + "learning_rate": 2.1839079111869388e-05, + "loss": 0.7147, + "step": 17392 + }, + { + "epoch": 2.77, + "learning_rate": 2.1836519935864757e-05, + "loss": 0.6192, + "step": 17393 + }, + { + "epoch": 2.77, + "learning_rate": 2.183396079354928e-05, + "loss": 0.7248, + "step": 17394 + }, + { + "epoch": 2.77, + "learning_rate": 2.1831401684950196e-05, + "loss": 0.6799, + "step": 17395 + }, + { + "epoch": 2.77, + "learning_rate": 2.182884261009475e-05, + "loss": 0.6978, + "step": 17396 + }, + { + "epoch": 2.77, + "learning_rate": 2.1826283569010217e-05, + "loss": 0.6931, + "step": 17397 + }, + { + "epoch": 2.77, + "learning_rate": 2.1823724561723836e-05, + "loss": 0.6912, + "step": 17398 + }, + { + "epoch": 2.77, + "learning_rate": 2.1821165588262855e-05, + "loss": 0.6904, + "step": 17399 + }, + { + "epoch": 2.77, + "learning_rate": 2.1818606648654532e-05, + "loss": 0.7111, + "step": 17400 + }, + { + "epoch": 2.77, + "learning_rate": 2.1816047742926118e-05, + "loss": 0.6761, + "step": 17401 + }, + { + "epoch": 2.77, + "learning_rate": 2.181348887110486e-05, + "loss": 0.6396, + "step": 17402 + }, + { + "epoch": 2.77, + "learning_rate": 2.1810930033218013e-05, + "loss": 0.6672, + "step": 17403 + }, + { + "epoch": 2.77, + "learning_rate": 2.180837122929283e-05, + "loss": 0.6998, + "step": 17404 + }, + { + "epoch": 2.77, + "learning_rate": 2.1805812459356545e-05, + "loss": 0.682, + "step": 17405 + }, + { + "epoch": 2.77, + "learning_rate": 2.1803253723436437e-05, + "loss": 0.6762, + "step": 17406 + }, + { + "epoch": 2.77, + "learning_rate": 2.180069502155972e-05, + "loss": 0.692, + "step": 17407 + }, + { + "epoch": 2.77, + "learning_rate": 2.1798136353753663e-05, + "loss": 0.674, + "step": 17408 + }, + { + "epoch": 2.77, + "learning_rate": 2.179557772004551e-05, + "loss": 0.7773, + "step": 17409 + }, + { + "epoch": 2.77, + "learning_rate": 2.1793019120462503e-05, + "loss": 0.6828, + "step": 17410 + }, + { + "epoch": 2.77, + "learning_rate": 2.17904605550319e-05, + "loss": 0.7632, + "step": 17411 + }, + { + "epoch": 2.77, + "learning_rate": 2.1787902023780945e-05, + "loss": 0.763, + "step": 17412 + }, + { + "epoch": 2.77, + "learning_rate": 2.1785343526736878e-05, + "loss": 0.7438, + "step": 17413 + }, + { + "epoch": 2.77, + "learning_rate": 2.1782785063926955e-05, + "loss": 0.7875, + "step": 17414 + }, + { + "epoch": 2.77, + "learning_rate": 2.178022663537842e-05, + "loss": 0.7435, + "step": 17415 + }, + { + "epoch": 2.77, + "learning_rate": 2.177766824111851e-05, + "loss": 0.6784, + "step": 17416 + }, + { + "epoch": 2.77, + "learning_rate": 2.1775109881174483e-05, + "loss": 0.7795, + "step": 17417 + }, + { + "epoch": 2.77, + "learning_rate": 2.1772551555573574e-05, + "loss": 0.6902, + "step": 17418 + }, + { + "epoch": 2.78, + "learning_rate": 2.176999326434303e-05, + "loss": 0.7155, + "step": 17419 + }, + { + "epoch": 2.78, + "learning_rate": 2.1767435007510103e-05, + "loss": 0.714, + "step": 17420 + }, + { + "epoch": 2.78, + "learning_rate": 2.1764876785102028e-05, + "loss": 0.7174, + "step": 17421 + }, + { + "epoch": 2.78, + "learning_rate": 2.1762318597146053e-05, + "loss": 0.6579, + "step": 17422 + }, + { + "epoch": 2.78, + "learning_rate": 2.1759760443669412e-05, + "loss": 0.691, + "step": 17423 + }, + { + "epoch": 2.78, + "learning_rate": 2.1757202324699365e-05, + "loss": 0.6847, + "step": 17424 + }, + { + "epoch": 2.78, + "learning_rate": 2.175464424026314e-05, + "loss": 0.6617, + "step": 17425 + }, + { + "epoch": 2.78, + "learning_rate": 2.1752086190387988e-05, + "loss": 0.7379, + "step": 17426 + }, + { + "epoch": 2.78, + "learning_rate": 2.174952817510114e-05, + "loss": 0.7378, + "step": 17427 + }, + { + "epoch": 2.78, + "learning_rate": 2.1746970194429845e-05, + "loss": 0.5961, + "step": 17428 + }, + { + "epoch": 2.78, + "learning_rate": 2.1744412248401344e-05, + "loss": 0.778, + "step": 17429 + }, + { + "epoch": 2.78, + "learning_rate": 2.174185433704287e-05, + "loss": 0.6606, + "step": 17430 + }, + { + "epoch": 2.78, + "learning_rate": 2.1739296460381675e-05, + "loss": 0.6593, + "step": 17431 + }, + { + "epoch": 2.78, + "learning_rate": 2.1736738618444995e-05, + "loss": 0.7186, + "step": 17432 + }, + { + "epoch": 2.78, + "learning_rate": 2.1734180811260064e-05, + "loss": 0.6724, + "step": 17433 + }, + { + "epoch": 2.78, + "learning_rate": 2.173162303885412e-05, + "loss": 0.7389, + "step": 17434 + }, + { + "epoch": 2.78, + "learning_rate": 2.1729065301254416e-05, + "loss": 0.6711, + "step": 17435 + }, + { + "epoch": 2.78, + "learning_rate": 2.1726507598488173e-05, + "loss": 0.7145, + "step": 17436 + }, + { + "epoch": 2.78, + "learning_rate": 2.172394993058264e-05, + "loss": 0.7144, + "step": 17437 + }, + { + "epoch": 2.78, + "learning_rate": 2.172139229756505e-05, + "loss": 0.7111, + "step": 17438 + }, + { + "epoch": 2.78, + "learning_rate": 2.1718834699462643e-05, + "loss": 0.7072, + "step": 17439 + }, + { + "epoch": 2.78, + "learning_rate": 2.1716277136302652e-05, + "loss": 0.6868, + "step": 17440 + }, + { + "epoch": 2.78, + "learning_rate": 2.171371960811232e-05, + "loss": 0.6925, + "step": 17441 + }, + { + "epoch": 2.78, + "learning_rate": 2.1711162114918877e-05, + "loss": 0.6738, + "step": 17442 + }, + { + "epoch": 2.78, + "learning_rate": 2.1708604656749555e-05, + "loss": 0.7338, + "step": 17443 + }, + { + "epoch": 2.78, + "learning_rate": 2.1706047233631603e-05, + "loss": 0.7274, + "step": 17444 + }, + { + "epoch": 2.78, + "learning_rate": 2.170348984559225e-05, + "loss": 0.6937, + "step": 17445 + }, + { + "epoch": 2.78, + "learning_rate": 2.1700932492658728e-05, + "loss": 0.6571, + "step": 17446 + }, + { + "epoch": 2.78, + "learning_rate": 2.169837517485827e-05, + "loss": 0.6892, + "step": 17447 + }, + { + "epoch": 2.78, + "learning_rate": 2.1695817892218106e-05, + "loss": 0.71, + "step": 17448 + }, + { + "epoch": 2.78, + "learning_rate": 2.1693260644765483e-05, + "loss": 0.6852, + "step": 17449 + }, + { + "epoch": 2.78, + "learning_rate": 2.1690703432527627e-05, + "loss": 0.6582, + "step": 17450 + }, + { + "epoch": 2.78, + "learning_rate": 2.168814625553176e-05, + "loss": 0.6706, + "step": 17451 + }, + { + "epoch": 2.78, + "learning_rate": 2.1685589113805132e-05, + "loss": 0.6358, + "step": 17452 + }, + { + "epoch": 2.78, + "learning_rate": 2.168303200737497e-05, + "loss": 0.6609, + "step": 17453 + }, + { + "epoch": 2.78, + "learning_rate": 2.1680474936268497e-05, + "loss": 0.6465, + "step": 17454 + }, + { + "epoch": 2.78, + "learning_rate": 2.1677917900512955e-05, + "loss": 0.6305, + "step": 17455 + }, + { + "epoch": 2.78, + "learning_rate": 2.167536090013557e-05, + "loss": 0.6812, + "step": 17456 + }, + { + "epoch": 2.78, + "learning_rate": 2.1672803935163573e-05, + "loss": 0.6422, + "step": 17457 + }, + { + "epoch": 2.78, + "learning_rate": 2.1670247005624194e-05, + "loss": 0.6857, + "step": 17458 + }, + { + "epoch": 2.78, + "learning_rate": 2.1667690111544663e-05, + "loss": 0.613, + "step": 17459 + }, + { + "epoch": 2.78, + "learning_rate": 2.1665133252952204e-05, + "loss": 0.6732, + "step": 17460 + }, + { + "epoch": 2.78, + "learning_rate": 2.1662576429874058e-05, + "loss": 0.7567, + "step": 17461 + }, + { + "epoch": 2.78, + "learning_rate": 2.166001964233745e-05, + "loss": 0.704, + "step": 17462 + }, + { + "epoch": 2.78, + "learning_rate": 2.1657462890369596e-05, + "loss": 0.7072, + "step": 17463 + }, + { + "epoch": 2.78, + "learning_rate": 2.1654906173997736e-05, + "loss": 0.7145, + "step": 17464 + }, + { + "epoch": 2.78, + "learning_rate": 2.1652349493249098e-05, + "loss": 0.6601, + "step": 17465 + }, + { + "epoch": 2.78, + "learning_rate": 2.1649792848150908e-05, + "loss": 0.66, + "step": 17466 + }, + { + "epoch": 2.78, + "learning_rate": 2.164723623873039e-05, + "loss": 0.6337, + "step": 17467 + }, + { + "epoch": 2.78, + "learning_rate": 2.1644679665014758e-05, + "loss": 0.756, + "step": 17468 + }, + { + "epoch": 2.78, + "learning_rate": 2.164212312703126e-05, + "loss": 0.7205, + "step": 17469 + }, + { + "epoch": 2.78, + "learning_rate": 2.163956662480711e-05, + "loss": 0.6841, + "step": 17470 + }, + { + "epoch": 2.78, + "learning_rate": 2.1637010158369534e-05, + "loss": 0.7039, + "step": 17471 + }, + { + "epoch": 2.78, + "learning_rate": 2.163445372774576e-05, + "loss": 0.7019, + "step": 17472 + }, + { + "epoch": 2.78, + "learning_rate": 2.163189733296301e-05, + "loss": 0.767, + "step": 17473 + }, + { + "epoch": 2.78, + "learning_rate": 2.1629340974048505e-05, + "loss": 0.7045, + "step": 17474 + }, + { + "epoch": 2.78, + "learning_rate": 2.162678465102948e-05, + "loss": 0.6616, + "step": 17475 + }, + { + "epoch": 2.78, + "learning_rate": 2.1624228363933146e-05, + "loss": 0.6391, + "step": 17476 + }, + { + "epoch": 2.78, + "learning_rate": 2.162167211278673e-05, + "loss": 0.6672, + "step": 17477 + }, + { + "epoch": 2.78, + "learning_rate": 2.1619115897617452e-05, + "loss": 0.6621, + "step": 17478 + }, + { + "epoch": 2.78, + "learning_rate": 2.1616559718452546e-05, + "loss": 0.6616, + "step": 17479 + }, + { + "epoch": 2.78, + "learning_rate": 2.1614003575319213e-05, + "loss": 0.6545, + "step": 17480 + }, + { + "epoch": 2.78, + "learning_rate": 2.16114474682447e-05, + "loss": 0.6829, + "step": 17481 + }, + { + "epoch": 2.79, + "learning_rate": 2.1608891397256204e-05, + "loss": 0.7157, + "step": 17482 + }, + { + "epoch": 2.79, + "learning_rate": 2.1606335362380955e-05, + "loss": 0.6844, + "step": 17483 + }, + { + "epoch": 2.79, + "learning_rate": 2.1603779363646183e-05, + "loss": 0.6449, + "step": 17484 + }, + { + "epoch": 2.79, + "learning_rate": 2.1601223401079095e-05, + "loss": 0.6785, + "step": 17485 + }, + { + "epoch": 2.79, + "learning_rate": 2.159866747470692e-05, + "loss": 0.6496, + "step": 17486 + }, + { + "epoch": 2.79, + "learning_rate": 2.1596111584556866e-05, + "loss": 0.7973, + "step": 17487 + }, + { + "epoch": 2.79, + "learning_rate": 2.159355573065615e-05, + "loss": 0.7266, + "step": 17488 + }, + { + "epoch": 2.79, + "learning_rate": 2.1590999913032007e-05, + "loss": 0.7029, + "step": 17489 + }, + { + "epoch": 2.79, + "learning_rate": 2.158844413171164e-05, + "loss": 0.6895, + "step": 17490 + }, + { + "epoch": 2.79, + "learning_rate": 2.158588838672227e-05, + "loss": 0.7139, + "step": 17491 + }, + { + "epoch": 2.79, + "learning_rate": 2.158333267809112e-05, + "loss": 0.7438, + "step": 17492 + }, + { + "epoch": 2.79, + "learning_rate": 2.1580777005845404e-05, + "loss": 0.7351, + "step": 17493 + }, + { + "epoch": 2.79, + "learning_rate": 2.157822137001233e-05, + "loss": 0.7555, + "step": 17494 + }, + { + "epoch": 2.79, + "learning_rate": 2.157566577061913e-05, + "loss": 0.6446, + "step": 17495 + }, + { + "epoch": 2.79, + "learning_rate": 2.1573110207693006e-05, + "loss": 0.6637, + "step": 17496 + }, + { + "epoch": 2.79, + "learning_rate": 2.1570554681261173e-05, + "loss": 0.6549, + "step": 17497 + }, + { + "epoch": 2.79, + "learning_rate": 2.1567999191350856e-05, + "loss": 0.7876, + "step": 17498 + }, + { + "epoch": 2.79, + "learning_rate": 2.1565443737989266e-05, + "loss": 0.6848, + "step": 17499 + }, + { + "epoch": 2.79, + "learning_rate": 2.156288832120361e-05, + "loss": 0.7006, + "step": 17500 + }, + { + "epoch": 2.79, + "learning_rate": 2.156033294102111e-05, + "loss": 0.7305, + "step": 17501 + }, + { + "epoch": 2.79, + "learning_rate": 2.155777759746898e-05, + "loss": 0.6509, + "step": 17502 + }, + { + "epoch": 2.79, + "learning_rate": 2.155522229057442e-05, + "loss": 0.7033, + "step": 17503 + }, + { + "epoch": 2.79, + "learning_rate": 2.1552667020364655e-05, + "loss": 0.6931, + "step": 17504 + }, + { + "epoch": 2.79, + "learning_rate": 2.1550111786866895e-05, + "loss": 0.7604, + "step": 17505 + }, + { + "epoch": 2.79, + "learning_rate": 2.1547556590108355e-05, + "loss": 0.6624, + "step": 17506 + }, + { + "epoch": 2.79, + "learning_rate": 2.1545001430116237e-05, + "loss": 0.731, + "step": 17507 + }, + { + "epoch": 2.79, + "learning_rate": 2.154244630691775e-05, + "loss": 0.7087, + "step": 17508 + }, + { + "epoch": 2.79, + "learning_rate": 2.1539891220540114e-05, + "loss": 0.7227, + "step": 17509 + }, + { + "epoch": 2.79, + "learning_rate": 2.153733617101054e-05, + "loss": 0.699, + "step": 17510 + }, + { + "epoch": 2.79, + "learning_rate": 2.1534781158356225e-05, + "loss": 0.5912, + "step": 17511 + }, + { + "epoch": 2.79, + "learning_rate": 2.1532226182604394e-05, + "loss": 0.7114, + "step": 17512 + }, + { + "epoch": 2.79, + "learning_rate": 2.152967124378225e-05, + "loss": 0.6532, + "step": 17513 + }, + { + "epoch": 2.79, + "learning_rate": 2.1527116341916992e-05, + "loss": 0.6405, + "step": 17514 + }, + { + "epoch": 2.79, + "learning_rate": 2.1524561477035844e-05, + "loss": 0.6313, + "step": 17515 + }, + { + "epoch": 2.79, + "learning_rate": 2.1522006649166005e-05, + "loss": 0.6316, + "step": 17516 + }, + { + "epoch": 2.79, + "learning_rate": 2.151945185833468e-05, + "loss": 0.6204, + "step": 17517 + }, + { + "epoch": 2.79, + "learning_rate": 2.1516897104569083e-05, + "loss": 0.6706, + "step": 17518 + }, + { + "epoch": 2.79, + "learning_rate": 2.151434238789642e-05, + "loss": 0.6496, + "step": 17519 + }, + { + "epoch": 2.79, + "learning_rate": 2.1511787708343883e-05, + "loss": 0.8258, + "step": 17520 + }, + { + "epoch": 2.79, + "learning_rate": 2.15092330659387e-05, + "loss": 0.6694, + "step": 17521 + }, + { + "epoch": 2.79, + "learning_rate": 2.1506678460708062e-05, + "loss": 0.6316, + "step": 17522 + }, + { + "epoch": 2.79, + "learning_rate": 2.1504123892679176e-05, + "loss": 0.6933, + "step": 17523 + }, + { + "epoch": 2.79, + "learning_rate": 2.1501569361879254e-05, + "loss": 0.6578, + "step": 17524 + }, + { + "epoch": 2.79, + "learning_rate": 2.149901486833549e-05, + "loss": 0.7105, + "step": 17525 + }, + { + "epoch": 2.79, + "learning_rate": 2.1496460412075095e-05, + "loss": 0.7419, + "step": 17526 + }, + { + "epoch": 2.79, + "learning_rate": 2.1493905993125275e-05, + "loss": 0.7447, + "step": 17527 + }, + { + "epoch": 2.79, + "learning_rate": 2.1491351611513215e-05, + "loss": 0.6668, + "step": 17528 + }, + { + "epoch": 2.79, + "learning_rate": 2.1488797267266137e-05, + "loss": 0.6246, + "step": 17529 + }, + { + "epoch": 2.79, + "learning_rate": 2.1486242960411234e-05, + "loss": 0.7111, + "step": 17530 + }, + { + "epoch": 2.79, + "learning_rate": 2.1483688690975705e-05, + "loss": 0.6483, + "step": 17531 + }, + { + "epoch": 2.79, + "learning_rate": 2.1481134458986767e-05, + "loss": 0.6772, + "step": 17532 + }, + { + "epoch": 2.79, + "learning_rate": 2.1478580264471608e-05, + "loss": 0.6626, + "step": 17533 + }, + { + "epoch": 2.79, + "learning_rate": 2.1476026107457426e-05, + "loss": 0.6928, + "step": 17534 + }, + { + "epoch": 2.79, + "learning_rate": 2.147347198797143e-05, + "loss": 0.6895, + "step": 17535 + }, + { + "epoch": 2.79, + "learning_rate": 2.1470917906040818e-05, + "loss": 0.65, + "step": 17536 + }, + { + "epoch": 2.79, + "learning_rate": 2.1468363861692782e-05, + "loss": 0.699, + "step": 17537 + }, + { + "epoch": 2.79, + "learning_rate": 2.1465809854954533e-05, + "loss": 0.7432, + "step": 17538 + }, + { + "epoch": 2.79, + "learning_rate": 2.1463255885853262e-05, + "loss": 0.7569, + "step": 17539 + }, + { + "epoch": 2.79, + "learning_rate": 2.1460701954416168e-05, + "loss": 0.7022, + "step": 17540 + }, + { + "epoch": 2.79, + "learning_rate": 2.1458148060670444e-05, + "loss": 0.7001, + "step": 17541 + }, + { + "epoch": 2.79, + "learning_rate": 2.14555942046433e-05, + "loss": 0.671, + "step": 17542 + }, + { + "epoch": 2.79, + "learning_rate": 2.1453040386361924e-05, + "loss": 0.7195, + "step": 17543 + }, + { + "epoch": 2.79, + "learning_rate": 2.145048660585351e-05, + "loss": 0.7997, + "step": 17544 + }, + { + "epoch": 2.8, + "learning_rate": 2.1447932863145265e-05, + "loss": 0.6845, + "step": 17545 + }, + { + "epoch": 2.8, + "learning_rate": 2.1445379158264378e-05, + "loss": 0.7183, + "step": 17546 + }, + { + "epoch": 2.8, + "learning_rate": 2.144282549123805e-05, + "loss": 0.6944, + "step": 17547 + }, + { + "epoch": 2.8, + "learning_rate": 2.144027186209346e-05, + "loss": 0.7346, + "step": 17548 + }, + { + "epoch": 2.8, + "learning_rate": 2.1437718270857822e-05, + "loss": 0.7372, + "step": 17549 + }, + { + "epoch": 2.8, + "learning_rate": 2.143516471755832e-05, + "loss": 0.6981, + "step": 17550 + }, + { + "epoch": 2.8, + "learning_rate": 2.1432611202222146e-05, + "loss": 0.6576, + "step": 17551 + }, + { + "epoch": 2.8, + "learning_rate": 2.1430057724876494e-05, + "loss": 0.6998, + "step": 17552 + }, + { + "epoch": 2.8, + "learning_rate": 2.1427504285548564e-05, + "loss": 0.7056, + "step": 17553 + }, + { + "epoch": 2.8, + "learning_rate": 2.1424950884265547e-05, + "loss": 0.7077, + "step": 17554 + }, + { + "epoch": 2.8, + "learning_rate": 2.1422397521054623e-05, + "loss": 0.7581, + "step": 17555 + }, + { + "epoch": 2.8, + "learning_rate": 2.1419844195943005e-05, + "loss": 0.686, + "step": 17556 + }, + { + "epoch": 2.8, + "learning_rate": 2.1417290908957868e-05, + "loss": 0.709, + "step": 17557 + }, + { + "epoch": 2.8, + "learning_rate": 2.14147376601264e-05, + "loss": 0.6532, + "step": 17558 + }, + { + "epoch": 2.8, + "learning_rate": 2.1412184449475812e-05, + "loss": 0.6965, + "step": 17559 + }, + { + "epoch": 2.8, + "learning_rate": 2.1409631277033276e-05, + "loss": 0.651, + "step": 17560 + }, + { + "epoch": 2.8, + "learning_rate": 2.1407078142825984e-05, + "loss": 0.6558, + "step": 17561 + }, + { + "epoch": 2.8, + "learning_rate": 2.1404525046881132e-05, + "loss": 0.6557, + "step": 17562 + }, + { + "epoch": 2.8, + "learning_rate": 2.1401971989225906e-05, + "loss": 0.7472, + "step": 17563 + }, + { + "epoch": 2.8, + "learning_rate": 2.139941896988749e-05, + "loss": 0.776, + "step": 17564 + }, + { + "epoch": 2.8, + "learning_rate": 2.139686598889308e-05, + "loss": 0.6895, + "step": 17565 + }, + { + "epoch": 2.8, + "learning_rate": 2.139431304626986e-05, + "loss": 0.6813, + "step": 17566 + }, + { + "epoch": 2.8, + "learning_rate": 2.1391760142045024e-05, + "loss": 0.6783, + "step": 17567 + }, + { + "epoch": 2.8, + "learning_rate": 2.1389207276245746e-05, + "loss": 0.7197, + "step": 17568 + }, + { + "epoch": 2.8, + "learning_rate": 2.1386654448899212e-05, + "loss": 0.6757, + "step": 17569 + }, + { + "epoch": 2.8, + "learning_rate": 2.138410166003262e-05, + "loss": 0.7151, + "step": 17570 + }, + { + "epoch": 2.8, + "learning_rate": 2.1381548909673148e-05, + "loss": 0.7534, + "step": 17571 + }, + { + "epoch": 2.8, + "learning_rate": 2.137899619784798e-05, + "loss": 0.665, + "step": 17572 + }, + { + "epoch": 2.8, + "learning_rate": 2.137644352458431e-05, + "loss": 0.6643, + "step": 17573 + }, + { + "epoch": 2.8, + "learning_rate": 2.1373890889909316e-05, + "loss": 0.6713, + "step": 17574 + }, + { + "epoch": 2.8, + "learning_rate": 2.1371338293850177e-05, + "loss": 0.6912, + "step": 17575 + }, + { + "epoch": 2.8, + "learning_rate": 2.1368785736434087e-05, + "loss": 0.6472, + "step": 17576 + }, + { + "epoch": 2.8, + "learning_rate": 2.1366233217688225e-05, + "loss": 0.6859, + "step": 17577 + }, + { + "epoch": 2.8, + "learning_rate": 2.136368073763977e-05, + "loss": 0.729, + "step": 17578 + }, + { + "epoch": 2.8, + "learning_rate": 2.1361128296315907e-05, + "loss": 0.6611, + "step": 17579 + }, + { + "epoch": 2.8, + "learning_rate": 2.1358575893743823e-05, + "loss": 0.7419, + "step": 17580 + }, + { + "epoch": 2.8, + "learning_rate": 2.135602352995069e-05, + "loss": 0.699, + "step": 17581 + }, + { + "epoch": 2.8, + "learning_rate": 2.13534712049637e-05, + "loss": 0.6615, + "step": 17582 + }, + { + "epoch": 2.8, + "learning_rate": 2.1350918918810023e-05, + "loss": 0.7616, + "step": 17583 + }, + { + "epoch": 2.8, + "learning_rate": 2.1348366671516842e-05, + "loss": 0.7745, + "step": 17584 + }, + { + "epoch": 2.8, + "learning_rate": 2.1345814463111344e-05, + "loss": 0.6537, + "step": 17585 + }, + { + "epoch": 2.8, + "learning_rate": 2.1343262293620704e-05, + "loss": 0.707, + "step": 17586 + }, + { + "epoch": 2.8, + "learning_rate": 2.1340710163072102e-05, + "loss": 0.7301, + "step": 17587 + }, + { + "epoch": 2.8, + "learning_rate": 2.1338158071492715e-05, + "loss": 0.6316, + "step": 17588 + }, + { + "epoch": 2.8, + "learning_rate": 2.1335606018909715e-05, + "loss": 0.7142, + "step": 17589 + }, + { + "epoch": 2.8, + "learning_rate": 2.133305400535029e-05, + "loss": 0.6366, + "step": 17590 + }, + { + "epoch": 2.8, + "learning_rate": 2.1330502030841615e-05, + "loss": 0.6172, + "step": 17591 + }, + { + "epoch": 2.8, + "learning_rate": 2.1327950095410858e-05, + "loss": 0.7023, + "step": 17592 + }, + { + "epoch": 2.8, + "learning_rate": 2.132539819908521e-05, + "loss": 0.6811, + "step": 17593 + }, + { + "epoch": 2.8, + "learning_rate": 2.132284634189184e-05, + "loss": 0.7708, + "step": 17594 + }, + { + "epoch": 2.8, + "learning_rate": 2.132029452385792e-05, + "loss": 0.6929, + "step": 17595 + }, + { + "epoch": 2.8, + "learning_rate": 2.1317742745010633e-05, + "loss": 0.6251, + "step": 17596 + }, + { + "epoch": 2.8, + "learning_rate": 2.131519100537715e-05, + "loss": 0.6743, + "step": 17597 + }, + { + "epoch": 2.8, + "learning_rate": 2.131263930498464e-05, + "loss": 0.6557, + "step": 17598 + }, + { + "epoch": 2.8, + "learning_rate": 2.131008764386029e-05, + "loss": 0.7264, + "step": 17599 + }, + { + "epoch": 2.8, + "learning_rate": 2.1307536022031267e-05, + "loss": 0.6795, + "step": 17600 + }, + { + "epoch": 2.8, + "learning_rate": 2.1304984439524735e-05, + "loss": 0.6306, + "step": 17601 + }, + { + "epoch": 2.8, + "learning_rate": 2.1302432896367882e-05, + "loss": 0.6816, + "step": 17602 + }, + { + "epoch": 2.8, + "learning_rate": 2.1299881392587876e-05, + "loss": 0.7047, + "step": 17603 + }, + { + "epoch": 2.8, + "learning_rate": 2.129732992821188e-05, + "loss": 0.6694, + "step": 17604 + }, + { + "epoch": 2.8, + "learning_rate": 2.1294778503267077e-05, + "loss": 0.676, + "step": 17605 + }, + { + "epoch": 2.8, + "learning_rate": 2.1292227117780632e-05, + "loss": 0.6598, + "step": 17606 + }, + { + "epoch": 2.81, + "learning_rate": 2.1289675771779727e-05, + "loss": 0.6488, + "step": 17607 + }, + { + "epoch": 2.81, + "learning_rate": 2.1287124465291512e-05, + "loss": 0.633, + "step": 17608 + }, + { + "epoch": 2.81, + "learning_rate": 2.1284573198343165e-05, + "loss": 0.801, + "step": 17609 + }, + { + "epoch": 2.81, + "learning_rate": 2.128202197096186e-05, + "loss": 0.6333, + "step": 17610 + }, + { + "epoch": 2.81, + "learning_rate": 2.127947078317477e-05, + "loss": 0.7685, + "step": 17611 + }, + { + "epoch": 2.81, + "learning_rate": 2.127691963500905e-05, + "loss": 0.6862, + "step": 17612 + }, + { + "epoch": 2.81, + "learning_rate": 2.127436852649188e-05, + "loss": 0.6442, + "step": 17613 + }, + { + "epoch": 2.81, + "learning_rate": 2.127181745765042e-05, + "loss": 0.724, + "step": 17614 + }, + { + "epoch": 2.81, + "learning_rate": 2.1269266428511837e-05, + "loss": 0.712, + "step": 17615 + }, + { + "epoch": 2.81, + "learning_rate": 2.1266715439103308e-05, + "loss": 0.6582, + "step": 17616 + }, + { + "epoch": 2.81, + "learning_rate": 2.1264164489451992e-05, + "loss": 0.7137, + "step": 17617 + }, + { + "epoch": 2.81, + "learning_rate": 2.1261613579585054e-05, + "loss": 0.6735, + "step": 17618 + }, + { + "epoch": 2.81, + "learning_rate": 2.1259062709529665e-05, + "loss": 0.6891, + "step": 17619 + }, + { + "epoch": 2.81, + "learning_rate": 2.1256511879312986e-05, + "loss": 0.7126, + "step": 17620 + }, + { + "epoch": 2.81, + "learning_rate": 2.1253961088962176e-05, + "loss": 0.7053, + "step": 17621 + }, + { + "epoch": 2.81, + "learning_rate": 2.1251410338504417e-05, + "loss": 0.7753, + "step": 17622 + }, + { + "epoch": 2.81, + "learning_rate": 2.1248859627966856e-05, + "loss": 0.6973, + "step": 17623 + }, + { + "epoch": 2.81, + "learning_rate": 2.124630895737666e-05, + "loss": 0.7213, + "step": 17624 + }, + { + "epoch": 2.81, + "learning_rate": 2.1243758326760998e-05, + "loss": 0.7465, + "step": 17625 + }, + { + "epoch": 2.81, + "learning_rate": 2.1241207736147036e-05, + "loss": 0.6444, + "step": 17626 + }, + { + "epoch": 2.81, + "learning_rate": 2.1238657185561916e-05, + "loss": 0.7585, + "step": 17627 + }, + { + "epoch": 2.81, + "learning_rate": 2.1236106675032836e-05, + "loss": 0.7349, + "step": 17628 + }, + { + "epoch": 2.81, + "learning_rate": 2.1233556204586913e-05, + "loss": 0.7435, + "step": 17629 + }, + { + "epoch": 2.81, + "learning_rate": 2.123100577425134e-05, + "loss": 0.6186, + "step": 17630 + }, + { + "epoch": 2.81, + "learning_rate": 2.1228455384053265e-05, + "loss": 0.6878, + "step": 17631 + }, + { + "epoch": 2.81, + "learning_rate": 2.1225905034019845e-05, + "loss": 0.6866, + "step": 17632 + }, + { + "epoch": 2.81, + "learning_rate": 2.122335472417825e-05, + "loss": 0.6874, + "step": 17633 + }, + { + "epoch": 2.81, + "learning_rate": 2.1220804454555633e-05, + "loss": 0.6903, + "step": 17634 + }, + { + "epoch": 2.81, + "learning_rate": 2.1218254225179153e-05, + "loss": 0.744, + "step": 17635 + }, + { + "epoch": 2.81, + "learning_rate": 2.121570403607597e-05, + "loss": 0.6439, + "step": 17636 + }, + { + "epoch": 2.81, + "learning_rate": 2.1213153887273247e-05, + "loss": 0.6866, + "step": 17637 + }, + { + "epoch": 2.81, + "learning_rate": 2.1210603778798126e-05, + "loss": 0.7166, + "step": 17638 + }, + { + "epoch": 2.81, + "learning_rate": 2.120805371067778e-05, + "loss": 0.6379, + "step": 17639 + }, + { + "epoch": 2.81, + "learning_rate": 2.1205503682939362e-05, + "loss": 0.6814, + "step": 17640 + }, + { + "epoch": 2.81, + "learning_rate": 2.120295369561002e-05, + "loss": 0.6834, + "step": 17641 + }, + { + "epoch": 2.81, + "learning_rate": 2.120040374871692e-05, + "loss": 0.733, + "step": 17642 + }, + { + "epoch": 2.81, + "learning_rate": 2.1197853842287212e-05, + "loss": 0.6798, + "step": 17643 + }, + { + "epoch": 2.81, + "learning_rate": 2.1195303976348053e-05, + "loss": 0.642, + "step": 17644 + }, + { + "epoch": 2.81, + "learning_rate": 2.1192754150926596e-05, + "loss": 0.7008, + "step": 17645 + }, + { + "epoch": 2.81, + "learning_rate": 2.1190204366049998e-05, + "loss": 0.6326, + "step": 17646 + }, + { + "epoch": 2.81, + "learning_rate": 2.1187654621745414e-05, + "loss": 0.7176, + "step": 17647 + }, + { + "epoch": 2.81, + "learning_rate": 2.1185104918039993e-05, + "loss": 0.6715, + "step": 17648 + }, + { + "epoch": 2.81, + "learning_rate": 2.118255525496088e-05, + "loss": 0.71, + "step": 17649 + }, + { + "epoch": 2.81, + "learning_rate": 2.1180005632535244e-05, + "loss": 0.6346, + "step": 17650 + }, + { + "epoch": 2.81, + "learning_rate": 2.1177456050790225e-05, + "loss": 0.7086, + "step": 17651 + }, + { + "epoch": 2.81, + "learning_rate": 2.1174906509752975e-05, + "loss": 0.6357, + "step": 17652 + }, + { + "epoch": 2.81, + "learning_rate": 2.1172357009450655e-05, + "loss": 0.6364, + "step": 17653 + }, + { + "epoch": 2.81, + "learning_rate": 2.116980754991041e-05, + "loss": 0.734, + "step": 17654 + }, + { + "epoch": 2.81, + "learning_rate": 2.1167258131159377e-05, + "loss": 0.7035, + "step": 17655 + }, + { + "epoch": 2.81, + "learning_rate": 2.116470875322473e-05, + "loss": 0.7379, + "step": 17656 + }, + { + "epoch": 2.81, + "learning_rate": 2.1162159416133602e-05, + "loss": 0.7121, + "step": 17657 + }, + { + "epoch": 2.81, + "learning_rate": 2.115961011991315e-05, + "loss": 0.6626, + "step": 17658 + }, + { + "epoch": 2.81, + "learning_rate": 2.1157060864590512e-05, + "loss": 0.7218, + "step": 17659 + }, + { + "epoch": 2.81, + "learning_rate": 2.115451165019285e-05, + "loss": 0.6775, + "step": 17660 + }, + { + "epoch": 2.81, + "learning_rate": 2.11519624767473e-05, + "loss": 0.7677, + "step": 17661 + }, + { + "epoch": 2.81, + "learning_rate": 2.114941334428101e-05, + "loss": 0.7323, + "step": 17662 + }, + { + "epoch": 2.81, + "learning_rate": 2.1146864252821134e-05, + "loss": 0.6222, + "step": 17663 + }, + { + "epoch": 2.81, + "learning_rate": 2.1144315202394814e-05, + "loss": 0.6592, + "step": 17664 + }, + { + "epoch": 2.81, + "learning_rate": 2.1141766193029195e-05, + "loss": 0.6995, + "step": 17665 + }, + { + "epoch": 2.81, + "learning_rate": 2.1139217224751426e-05, + "loss": 0.7282, + "step": 17666 + }, + { + "epoch": 2.81, + "learning_rate": 2.113666829758865e-05, + "loss": 0.6188, + "step": 17667 + }, + { + "epoch": 2.81, + "learning_rate": 2.1134119411568017e-05, + "loss": 0.7046, + "step": 17668 + }, + { + "epoch": 2.81, + "learning_rate": 2.1131570566716654e-05, + "loss": 0.6598, + "step": 17669 + }, + { + "epoch": 2.82, + "learning_rate": 2.1129021763061724e-05, + "loss": 0.6844, + "step": 17670 + }, + { + "epoch": 2.82, + "learning_rate": 2.112647300063036e-05, + "loss": 0.6133, + "step": 17671 + }, + { + "epoch": 2.82, + "learning_rate": 2.1123924279449707e-05, + "loss": 0.642, + "step": 17672 + }, + { + "epoch": 2.82, + "learning_rate": 2.11213755995469e-05, + "loss": 0.6626, + "step": 17673 + }, + { + "epoch": 2.82, + "learning_rate": 2.1118826960949097e-05, + "loss": 0.7685, + "step": 17674 + }, + { + "epoch": 2.82, + "learning_rate": 2.1116278363683428e-05, + "loss": 0.7505, + "step": 17675 + }, + { + "epoch": 2.82, + "learning_rate": 2.111372980777703e-05, + "loss": 0.6921, + "step": 17676 + }, + { + "epoch": 2.82, + "learning_rate": 2.1111181293257057e-05, + "loss": 0.7214, + "step": 17677 + }, + { + "epoch": 2.82, + "learning_rate": 2.1108632820150643e-05, + "loss": 0.6884, + "step": 17678 + }, + { + "epoch": 2.82, + "learning_rate": 2.110608438848492e-05, + "loss": 0.6766, + "step": 17679 + }, + { + "epoch": 2.82, + "learning_rate": 2.1103535998287043e-05, + "loss": 0.7647, + "step": 17680 + }, + { + "epoch": 2.82, + "learning_rate": 2.110098764958414e-05, + "loss": 0.6133, + "step": 17681 + }, + { + "epoch": 2.82, + "learning_rate": 2.1098439342403343e-05, + "loss": 0.6873, + "step": 17682 + }, + { + "epoch": 2.82, + "learning_rate": 2.1095891076771808e-05, + "loss": 0.7329, + "step": 17683 + }, + { + "epoch": 2.82, + "learning_rate": 2.1093342852716662e-05, + "loss": 0.7819, + "step": 17684 + }, + { + "epoch": 2.82, + "learning_rate": 2.1090794670265033e-05, + "loss": 0.7059, + "step": 17685 + }, + { + "epoch": 2.82, + "learning_rate": 2.1088246529444078e-05, + "loss": 0.6978, + "step": 17686 + }, + { + "epoch": 2.82, + "learning_rate": 2.108569843028092e-05, + "loss": 0.7099, + "step": 17687 + }, + { + "epoch": 2.82, + "learning_rate": 2.1083150372802705e-05, + "loss": 0.7533, + "step": 17688 + }, + { + "epoch": 2.82, + "learning_rate": 2.1080602357036556e-05, + "loss": 0.6727, + "step": 17689 + }, + { + "epoch": 2.82, + "learning_rate": 2.10780543830096e-05, + "loss": 0.6583, + "step": 17690 + }, + { + "epoch": 2.82, + "learning_rate": 2.1075506450748998e-05, + "loss": 0.7039, + "step": 17691 + }, + { + "epoch": 2.82, + "learning_rate": 2.1072958560281864e-05, + "loss": 0.7049, + "step": 17692 + }, + { + "epoch": 2.82, + "learning_rate": 2.1070410711635332e-05, + "loss": 0.67, + "step": 17693 + }, + { + "epoch": 2.82, + "learning_rate": 2.106786290483655e-05, + "loss": 0.7887, + "step": 17694 + }, + { + "epoch": 2.82, + "learning_rate": 2.1065315139912635e-05, + "loss": 0.8372, + "step": 17695 + }, + { + "epoch": 2.82, + "learning_rate": 2.1062767416890722e-05, + "loss": 0.7055, + "step": 17696 + }, + { + "epoch": 2.82, + "learning_rate": 2.1060219735797952e-05, + "loss": 0.6842, + "step": 17697 + }, + { + "epoch": 2.82, + "learning_rate": 2.105767209666145e-05, + "loss": 0.6878, + "step": 17698 + }, + { + "epoch": 2.82, + "learning_rate": 2.1055124499508343e-05, + "loss": 0.6799, + "step": 17699 + }, + { + "epoch": 2.82, + "learning_rate": 2.1052576944365764e-05, + "loss": 0.7161, + "step": 17700 + }, + { + "epoch": 2.82, + "learning_rate": 2.1050029431260848e-05, + "loss": 0.6317, + "step": 17701 + }, + { + "epoch": 2.82, + "learning_rate": 2.1047481960220716e-05, + "loss": 0.7529, + "step": 17702 + }, + { + "epoch": 2.82, + "learning_rate": 2.1044934531272507e-05, + "loss": 0.6732, + "step": 17703 + }, + { + "epoch": 2.82, + "learning_rate": 2.1042387144443344e-05, + "loss": 0.6385, + "step": 17704 + }, + { + "epoch": 2.82, + "learning_rate": 2.1039839799760348e-05, + "loss": 0.6819, + "step": 17705 + }, + { + "epoch": 2.82, + "learning_rate": 2.103729249725066e-05, + "loss": 0.73, + "step": 17706 + }, + { + "epoch": 2.82, + "learning_rate": 2.10347452369414e-05, + "loss": 0.6491, + "step": 17707 + }, + { + "epoch": 2.82, + "learning_rate": 2.1032198018859706e-05, + "loss": 0.6535, + "step": 17708 + }, + { + "epoch": 2.82, + "learning_rate": 2.1029650843032685e-05, + "loss": 0.7175, + "step": 17709 + }, + { + "epoch": 2.82, + "learning_rate": 2.1027103709487467e-05, + "loss": 0.6987, + "step": 17710 + }, + { + "epoch": 2.82, + "learning_rate": 2.1024556618251186e-05, + "loss": 0.6711, + "step": 17711 + }, + { + "epoch": 2.82, + "learning_rate": 2.1022009569350964e-05, + "loss": 0.7363, + "step": 17712 + }, + { + "epoch": 2.82, + "learning_rate": 2.1019462562813925e-05, + "loss": 0.7242, + "step": 17713 + }, + { + "epoch": 2.82, + "learning_rate": 2.1016915598667193e-05, + "loss": 0.6593, + "step": 17714 + }, + { + "epoch": 2.82, + "learning_rate": 2.1014368676937892e-05, + "loss": 0.6613, + "step": 17715 + }, + { + "epoch": 2.82, + "learning_rate": 2.101182179765314e-05, + "loss": 0.7775, + "step": 17716 + }, + { + "epoch": 2.82, + "learning_rate": 2.1009274960840068e-05, + "loss": 0.6038, + "step": 17717 + }, + { + "epoch": 2.82, + "learning_rate": 2.1006728166525803e-05, + "loss": 0.7103, + "step": 17718 + }, + { + "epoch": 2.82, + "learning_rate": 2.1004181414737444e-05, + "loss": 0.6767, + "step": 17719 + }, + { + "epoch": 2.82, + "learning_rate": 2.1001634705502136e-05, + "loss": 0.7135, + "step": 17720 + }, + { + "epoch": 2.82, + "learning_rate": 2.0999088038846992e-05, + "loss": 0.6689, + "step": 17721 + }, + { + "epoch": 2.82, + "learning_rate": 2.0996541414799126e-05, + "loss": 0.6531, + "step": 17722 + }, + { + "epoch": 2.82, + "learning_rate": 2.0993994833385667e-05, + "loss": 0.7333, + "step": 17723 + }, + { + "epoch": 2.82, + "learning_rate": 2.099144829463373e-05, + "loss": 0.6194, + "step": 17724 + }, + { + "epoch": 2.82, + "learning_rate": 2.0988901798570433e-05, + "loss": 0.7514, + "step": 17725 + }, + { + "epoch": 2.82, + "learning_rate": 2.09863553452229e-05, + "loss": 0.6271, + "step": 17726 + }, + { + "epoch": 2.82, + "learning_rate": 2.0983808934618247e-05, + "loss": 0.6742, + "step": 17727 + }, + { + "epoch": 2.82, + "learning_rate": 2.0981262566783584e-05, + "loss": 0.6648, + "step": 17728 + }, + { + "epoch": 2.82, + "learning_rate": 2.097871624174605e-05, + "loss": 0.6645, + "step": 17729 + }, + { + "epoch": 2.82, + "learning_rate": 2.0976169959532732e-05, + "loss": 0.6753, + "step": 17730 + }, + { + "epoch": 2.82, + "learning_rate": 2.0973623720170764e-05, + "loss": 0.6982, + "step": 17731 + }, + { + "epoch": 2.82, + "learning_rate": 2.0971077523687264e-05, + "loss": 0.7152, + "step": 17732 + }, + { + "epoch": 2.83, + "learning_rate": 2.0968531370109334e-05, + "loss": 0.7392, + "step": 17733 + }, + { + "epoch": 2.83, + "learning_rate": 2.0965985259464103e-05, + "loss": 0.6654, + "step": 17734 + }, + { + "epoch": 2.83, + "learning_rate": 2.0963439191778678e-05, + "loss": 0.6925, + "step": 17735 + }, + { + "epoch": 2.83, + "learning_rate": 2.096089316708017e-05, + "loss": 0.6488, + "step": 17736 + }, + { + "epoch": 2.83, + "learning_rate": 2.0958347185395706e-05, + "loss": 0.6874, + "step": 17737 + }, + { + "epoch": 2.83, + "learning_rate": 2.095580124675239e-05, + "loss": 0.645, + "step": 17738 + }, + { + "epoch": 2.83, + "learning_rate": 2.0953255351177327e-05, + "loss": 0.6687, + "step": 17739 + }, + { + "epoch": 2.83, + "learning_rate": 2.0950709498697645e-05, + "loss": 0.6874, + "step": 17740 + }, + { + "epoch": 2.83, + "learning_rate": 2.094816368934045e-05, + "loss": 0.6955, + "step": 17741 + }, + { + "epoch": 2.83, + "learning_rate": 2.094561792313284e-05, + "loss": 0.677, + "step": 17742 + }, + { + "epoch": 2.83, + "learning_rate": 2.094307220010195e-05, + "loss": 0.7883, + "step": 17743 + }, + { + "epoch": 2.83, + "learning_rate": 2.094052652027488e-05, + "loss": 0.646, + "step": 17744 + }, + { + "epoch": 2.83, + "learning_rate": 2.0937980883678727e-05, + "loss": 0.7397, + "step": 17745 + }, + { + "epoch": 2.83, + "learning_rate": 2.0935435290340616e-05, + "loss": 0.6387, + "step": 17746 + }, + { + "epoch": 2.83, + "learning_rate": 2.0932889740287657e-05, + "loss": 0.6801, + "step": 17747 + }, + { + "epoch": 2.83, + "learning_rate": 2.0930344233546942e-05, + "loss": 0.7296, + "step": 17748 + }, + { + "epoch": 2.83, + "learning_rate": 2.092779877014561e-05, + "loss": 0.6206, + "step": 17749 + }, + { + "epoch": 2.83, + "learning_rate": 2.092525335011073e-05, + "loss": 0.659, + "step": 17750 + }, + { + "epoch": 2.83, + "learning_rate": 2.0922707973469435e-05, + "loss": 0.6724, + "step": 17751 + }, + { + "epoch": 2.83, + "learning_rate": 2.0920162640248826e-05, + "loss": 0.7692, + "step": 17752 + }, + { + "epoch": 2.83, + "learning_rate": 2.0917617350476e-05, + "loss": 0.681, + "step": 17753 + }, + { + "epoch": 2.83, + "learning_rate": 2.091507210417808e-05, + "loss": 0.6795, + "step": 17754 + }, + { + "epoch": 2.83, + "learning_rate": 2.0912526901382157e-05, + "loss": 0.7545, + "step": 17755 + }, + { + "epoch": 2.83, + "learning_rate": 2.090998174211534e-05, + "loss": 0.7782, + "step": 17756 + }, + { + "epoch": 2.83, + "learning_rate": 2.0907436626404735e-05, + "loss": 0.6808, + "step": 17757 + }, + { + "epoch": 2.83, + "learning_rate": 2.090489155427745e-05, + "loss": 0.6624, + "step": 17758 + }, + { + "epoch": 2.83, + "learning_rate": 2.0902346525760574e-05, + "loss": 0.7201, + "step": 17759 + }, + { + "epoch": 2.83, + "learning_rate": 2.089980154088123e-05, + "loss": 0.7074, + "step": 17760 + }, + { + "epoch": 2.83, + "learning_rate": 2.0897256599666506e-05, + "loss": 0.6785, + "step": 17761 + }, + { + "epoch": 2.83, + "learning_rate": 2.0894711702143505e-05, + "loss": 0.7031, + "step": 17762 + }, + { + "epoch": 2.83, + "learning_rate": 2.0892166848339336e-05, + "loss": 0.7408, + "step": 17763 + }, + { + "epoch": 2.83, + "learning_rate": 2.08896220382811e-05, + "loss": 0.6829, + "step": 17764 + }, + { + "epoch": 2.83, + "learning_rate": 2.088707727199589e-05, + "loss": 0.6727, + "step": 17765 + }, + { + "epoch": 2.83, + "learning_rate": 2.0884532549510806e-05, + "loss": 0.6694, + "step": 17766 + }, + { + "epoch": 2.83, + "learning_rate": 2.0881987870852956e-05, + "loss": 0.6954, + "step": 17767 + }, + { + "epoch": 2.83, + "learning_rate": 2.0879443236049434e-05, + "loss": 0.6658, + "step": 17768 + }, + { + "epoch": 2.83, + "learning_rate": 2.0876898645127348e-05, + "loss": 0.6927, + "step": 17769 + }, + { + "epoch": 2.83, + "learning_rate": 2.0874354098113777e-05, + "loss": 0.7039, + "step": 17770 + }, + { + "epoch": 2.83, + "learning_rate": 2.0871809595035835e-05, + "loss": 0.6539, + "step": 17771 + }, + { + "epoch": 2.83, + "learning_rate": 2.0869265135920617e-05, + "loss": 0.7553, + "step": 17772 + }, + { + "epoch": 2.83, + "learning_rate": 2.086672072079521e-05, + "loss": 0.6969, + "step": 17773 + }, + { + "epoch": 2.83, + "learning_rate": 2.086417634968672e-05, + "loss": 0.7313, + "step": 17774 + }, + { + "epoch": 2.83, + "learning_rate": 2.0861632022622245e-05, + "loss": 0.6418, + "step": 17775 + }, + { + "epoch": 2.83, + "learning_rate": 2.0859087739628874e-05, + "loss": 0.6395, + "step": 17776 + }, + { + "epoch": 2.83, + "learning_rate": 2.08565435007337e-05, + "loss": 0.6532, + "step": 17777 + }, + { + "epoch": 2.83, + "learning_rate": 2.085399930596383e-05, + "loss": 0.7098, + "step": 17778 + }, + { + "epoch": 2.83, + "learning_rate": 2.085145515534635e-05, + "loss": 0.6985, + "step": 17779 + }, + { + "epoch": 2.83, + "learning_rate": 2.0848911048908346e-05, + "loss": 0.6823, + "step": 17780 + }, + { + "epoch": 2.83, + "learning_rate": 2.0846366986676928e-05, + "loss": 0.6997, + "step": 17781 + }, + { + "epoch": 2.83, + "learning_rate": 2.0843822968679176e-05, + "loss": 0.7401, + "step": 17782 + }, + { + "epoch": 2.83, + "learning_rate": 2.0841278994942182e-05, + "loss": 0.6857, + "step": 17783 + }, + { + "epoch": 2.83, + "learning_rate": 2.0838735065493048e-05, + "loss": 0.6265, + "step": 17784 + }, + { + "epoch": 2.83, + "learning_rate": 2.083619118035886e-05, + "loss": 0.6525, + "step": 17785 + }, + { + "epoch": 2.83, + "learning_rate": 2.0833647339566698e-05, + "loss": 0.6862, + "step": 17786 + }, + { + "epoch": 2.83, + "learning_rate": 2.083110354314367e-05, + "loss": 0.7412, + "step": 17787 + }, + { + "epoch": 2.83, + "learning_rate": 2.082855979111686e-05, + "loss": 0.6733, + "step": 17788 + }, + { + "epoch": 2.83, + "learning_rate": 2.082601608351336e-05, + "loss": 0.7529, + "step": 17789 + }, + { + "epoch": 2.83, + "learning_rate": 2.082347242036025e-05, + "loss": 0.6374, + "step": 17790 + }, + { + "epoch": 2.83, + "learning_rate": 2.082092880168462e-05, + "loss": 0.7026, + "step": 17791 + }, + { + "epoch": 2.83, + "learning_rate": 2.081838522751356e-05, + "loss": 0.7221, + "step": 17792 + }, + { + "epoch": 2.83, + "learning_rate": 2.0815841697874166e-05, + "loss": 0.6353, + "step": 17793 + }, + { + "epoch": 2.83, + "learning_rate": 2.0813298212793508e-05, + "loss": 0.6574, + "step": 17794 + }, + { + "epoch": 2.83, + "learning_rate": 2.0810754772298686e-05, + "loss": 0.7403, + "step": 17795 + }, + { + "epoch": 2.84, + "learning_rate": 2.0808211376416785e-05, + "loss": 0.6883, + "step": 17796 + }, + { + "epoch": 2.84, + "learning_rate": 2.0805668025174885e-05, + "loss": 0.7826, + "step": 17797 + }, + { + "epoch": 2.84, + "learning_rate": 2.0803124718600075e-05, + "loss": 0.7359, + "step": 17798 + }, + { + "epoch": 2.84, + "learning_rate": 2.0800581456719444e-05, + "loss": 0.71, + "step": 17799 + }, + { + "epoch": 2.84, + "learning_rate": 2.0798038239560057e-05, + "loss": 0.7827, + "step": 17800 + }, + { + "epoch": 2.84, + "learning_rate": 2.0795495067149022e-05, + "loss": 0.724, + "step": 17801 + }, + { + "epoch": 2.84, + "learning_rate": 2.0792951939513412e-05, + "loss": 0.6838, + "step": 17802 + }, + { + "epoch": 2.84, + "learning_rate": 2.079040885668031e-05, + "loss": 0.7132, + "step": 17803 + }, + { + "epoch": 2.84, + "learning_rate": 2.0787865818676794e-05, + "loss": 0.7108, + "step": 17804 + }, + { + "epoch": 2.84, + "learning_rate": 2.0785322825529955e-05, + "loss": 0.6264, + "step": 17805 + }, + { + "epoch": 2.84, + "learning_rate": 2.0782779877266863e-05, + "loss": 0.7101, + "step": 17806 + }, + { + "epoch": 2.84, + "learning_rate": 2.0780236973914607e-05, + "loss": 0.7539, + "step": 17807 + }, + { + "epoch": 2.84, + "learning_rate": 2.0777694115500267e-05, + "loss": 0.6879, + "step": 17808 + }, + { + "epoch": 2.84, + "learning_rate": 2.077515130205093e-05, + "loss": 0.7861, + "step": 17809 + }, + { + "epoch": 2.84, + "learning_rate": 2.0772608533593657e-05, + "loss": 0.682, + "step": 17810 + }, + { + "epoch": 2.84, + "learning_rate": 2.0770065810155535e-05, + "loss": 0.7078, + "step": 17811 + }, + { + "epoch": 2.84, + "learning_rate": 2.0767523131763646e-05, + "loss": 0.6995, + "step": 17812 + }, + { + "epoch": 2.84, + "learning_rate": 2.076498049844507e-05, + "loss": 0.6603, + "step": 17813 + }, + { + "epoch": 2.84, + "learning_rate": 2.076243791022687e-05, + "loss": 0.6781, + "step": 17814 + }, + { + "epoch": 2.84, + "learning_rate": 2.0759895367136142e-05, + "loss": 0.7349, + "step": 17815 + }, + { + "epoch": 2.84, + "learning_rate": 2.0757352869199953e-05, + "loss": 0.6218, + "step": 17816 + }, + { + "epoch": 2.84, + "learning_rate": 2.0754810416445375e-05, + "loss": 0.6583, + "step": 17817 + }, + { + "epoch": 2.84, + "learning_rate": 2.0752268008899494e-05, + "loss": 0.6345, + "step": 17818 + }, + { + "epoch": 2.84, + "learning_rate": 2.0749725646589384e-05, + "loss": 0.6615, + "step": 17819 + }, + { + "epoch": 2.84, + "learning_rate": 2.0747183329542105e-05, + "loss": 0.6358, + "step": 17820 + }, + { + "epoch": 2.84, + "learning_rate": 2.0744641057784747e-05, + "loss": 0.7059, + "step": 17821 + }, + { + "epoch": 2.84, + "learning_rate": 2.074209883134438e-05, + "loss": 0.6589, + "step": 17822 + }, + { + "epoch": 2.84, + "learning_rate": 2.073955665024807e-05, + "loss": 0.6235, + "step": 17823 + }, + { + "epoch": 2.84, + "learning_rate": 2.07370145145229e-05, + "loss": 0.631, + "step": 17824 + }, + { + "epoch": 2.84, + "learning_rate": 2.073447242419594e-05, + "loss": 0.6778, + "step": 17825 + }, + { + "epoch": 2.84, + "learning_rate": 2.073193037929425e-05, + "loss": 0.7098, + "step": 17826 + }, + { + "epoch": 2.84, + "learning_rate": 2.0729388379844917e-05, + "loss": 0.6619, + "step": 17827 + }, + { + "epoch": 2.84, + "learning_rate": 2.0726846425875005e-05, + "loss": 0.7046, + "step": 17828 + }, + { + "epoch": 2.84, + "learning_rate": 2.0724304517411592e-05, + "loss": 0.7035, + "step": 17829 + }, + { + "epoch": 2.84, + "learning_rate": 2.072176265448173e-05, + "loss": 0.638, + "step": 17830 + }, + { + "epoch": 2.84, + "learning_rate": 2.0719220837112495e-05, + "loss": 0.6358, + "step": 17831 + }, + { + "epoch": 2.84, + "learning_rate": 2.0716679065330967e-05, + "loss": 0.6641, + "step": 17832 + }, + { + "epoch": 2.84, + "learning_rate": 2.0714137339164203e-05, + "loss": 0.6552, + "step": 17833 + }, + { + "epoch": 2.84, + "learning_rate": 2.0711595658639267e-05, + "loss": 0.6191, + "step": 17834 + }, + { + "epoch": 2.84, + "learning_rate": 2.0709054023783244e-05, + "loss": 0.666, + "step": 17835 + }, + { + "epoch": 2.84, + "learning_rate": 2.0706512434623188e-05, + "loss": 0.688, + "step": 17836 + }, + { + "epoch": 2.84, + "learning_rate": 2.0703970891186164e-05, + "loss": 0.6377, + "step": 17837 + }, + { + "epoch": 2.84, + "learning_rate": 2.0701429393499246e-05, + "loss": 0.7321, + "step": 17838 + }, + { + "epoch": 2.84, + "learning_rate": 2.0698887941589495e-05, + "loss": 0.679, + "step": 17839 + }, + { + "epoch": 2.84, + "learning_rate": 2.0696346535483973e-05, + "loss": 0.73, + "step": 17840 + }, + { + "epoch": 2.84, + "learning_rate": 2.0693805175209753e-05, + "loss": 0.7041, + "step": 17841 + }, + { + "epoch": 2.84, + "learning_rate": 2.069126386079389e-05, + "loss": 0.7334, + "step": 17842 + }, + { + "epoch": 2.84, + "learning_rate": 2.068872259226345e-05, + "loss": 0.7508, + "step": 17843 + }, + { + "epoch": 2.84, + "learning_rate": 2.0686181369645497e-05, + "loss": 0.715, + "step": 17844 + }, + { + "epoch": 2.84, + "learning_rate": 2.0683640192967098e-05, + "loss": 0.6765, + "step": 17845 + }, + { + "epoch": 2.84, + "learning_rate": 2.06810990622553e-05, + "loss": 0.6394, + "step": 17846 + }, + { + "epoch": 2.84, + "learning_rate": 2.0678557977537185e-05, + "loss": 0.652, + "step": 17847 + }, + { + "epoch": 2.84, + "learning_rate": 2.0676016938839803e-05, + "loss": 0.6755, + "step": 17848 + }, + { + "epoch": 2.84, + "learning_rate": 2.067347594619021e-05, + "loss": 0.7201, + "step": 17849 + }, + { + "epoch": 2.84, + "learning_rate": 2.067093499961549e-05, + "loss": 0.6166, + "step": 17850 + }, + { + "epoch": 2.84, + "learning_rate": 2.0668394099142662e-05, + "loss": 0.6637, + "step": 17851 + }, + { + "epoch": 2.84, + "learning_rate": 2.0665853244798816e-05, + "loss": 0.6861, + "step": 17852 + }, + { + "epoch": 2.84, + "learning_rate": 2.0663312436611002e-05, + "loss": 0.6787, + "step": 17853 + }, + { + "epoch": 2.84, + "learning_rate": 2.0660771674606273e-05, + "loss": 0.6477, + "step": 17854 + }, + { + "epoch": 2.84, + "learning_rate": 2.0658230958811694e-05, + "loss": 0.7172, + "step": 17855 + }, + { + "epoch": 2.84, + "learning_rate": 2.065569028925432e-05, + "loss": 0.6896, + "step": 17856 + }, + { + "epoch": 2.84, + "learning_rate": 2.06531496659612e-05, + "loss": 0.7457, + "step": 17857 + }, + { + "epoch": 2.84, + "learning_rate": 2.0650609088959408e-05, + "loss": 0.691, + "step": 17858 + }, + { + "epoch": 2.85, + "learning_rate": 2.0648068558275986e-05, + "loss": 0.6802, + "step": 17859 + }, + { + "epoch": 2.85, + "learning_rate": 2.0645528073937984e-05, + "loss": 0.7091, + "step": 17860 + }, + { + "epoch": 2.85, + "learning_rate": 2.0642987635972472e-05, + "loss": 0.6106, + "step": 17861 + }, + { + "epoch": 2.85, + "learning_rate": 2.0640447244406493e-05, + "loss": 0.7457, + "step": 17862 + }, + { + "epoch": 2.85, + "learning_rate": 2.0637906899267105e-05, + "loss": 0.6433, + "step": 17863 + }, + { + "epoch": 2.85, + "learning_rate": 2.0635366600581362e-05, + "loss": 0.7284, + "step": 17864 + }, + { + "epoch": 2.85, + "learning_rate": 2.0632826348376314e-05, + "loss": 0.7289, + "step": 17865 + }, + { + "epoch": 2.85, + "learning_rate": 2.063028614267901e-05, + "loss": 0.6305, + "step": 17866 + }, + { + "epoch": 2.85, + "learning_rate": 2.0627745983516507e-05, + "loss": 0.694, + "step": 17867 + }, + { + "epoch": 2.85, + "learning_rate": 2.062520587091586e-05, + "loss": 0.7003, + "step": 17868 + }, + { + "epoch": 2.85, + "learning_rate": 2.0622665804904106e-05, + "loss": 0.6627, + "step": 17869 + }, + { + "epoch": 2.85, + "learning_rate": 2.0620125785508324e-05, + "loss": 0.6112, + "step": 17870 + }, + { + "epoch": 2.85, + "learning_rate": 2.0617585812755526e-05, + "loss": 0.7069, + "step": 17871 + }, + { + "epoch": 2.85, + "learning_rate": 2.061504588667278e-05, + "loss": 0.7279, + "step": 17872 + }, + { + "epoch": 2.85, + "learning_rate": 2.0612506007287136e-05, + "loss": 0.6993, + "step": 17873 + }, + { + "epoch": 2.85, + "learning_rate": 2.0609966174625635e-05, + "loss": 0.6544, + "step": 17874 + }, + { + "epoch": 2.85, + "learning_rate": 2.060742638871533e-05, + "loss": 0.7246, + "step": 17875 + }, + { + "epoch": 2.85, + "learning_rate": 2.060488664958327e-05, + "loss": 0.6743, + "step": 17876 + }, + { + "epoch": 2.85, + "learning_rate": 2.0602346957256494e-05, + "loss": 0.724, + "step": 17877 + }, + { + "epoch": 2.85, + "learning_rate": 2.0599807311762058e-05, + "loss": 0.6754, + "step": 17878 + }, + { + "epoch": 2.85, + "learning_rate": 2.0597267713127e-05, + "loss": 0.6947, + "step": 17879 + }, + { + "epoch": 2.85, + "learning_rate": 2.0594728161378365e-05, + "loss": 0.6849, + "step": 17880 + }, + { + "epoch": 2.85, + "learning_rate": 2.0592188656543206e-05, + "loss": 0.6511, + "step": 17881 + }, + { + "epoch": 2.85, + "learning_rate": 2.0589649198648563e-05, + "loss": 0.7156, + "step": 17882 + }, + { + "epoch": 2.85, + "learning_rate": 2.0587109787721477e-05, + "loss": 0.7262, + "step": 17883 + }, + { + "epoch": 2.85, + "learning_rate": 2.058457042378899e-05, + "loss": 0.7324, + "step": 17884 + }, + { + "epoch": 2.85, + "learning_rate": 2.0582031106878148e-05, + "loss": 0.6628, + "step": 17885 + }, + { + "epoch": 2.85, + "learning_rate": 2.0579491837015994e-05, + "loss": 0.6889, + "step": 17886 + }, + { + "epoch": 2.85, + "learning_rate": 2.0576952614229562e-05, + "loss": 0.7788, + "step": 17887 + }, + { + "epoch": 2.85, + "learning_rate": 2.0574413438545907e-05, + "loss": 0.7272, + "step": 17888 + }, + { + "epoch": 2.85, + "learning_rate": 2.057187430999206e-05, + "loss": 0.7983, + "step": 17889 + }, + { + "epoch": 2.85, + "learning_rate": 2.0569335228595072e-05, + "loss": 0.6436, + "step": 17890 + }, + { + "epoch": 2.85, + "learning_rate": 2.056679619438196e-05, + "loss": 0.6871, + "step": 17891 + }, + { + "epoch": 2.85, + "learning_rate": 2.0564257207379783e-05, + "loss": 0.7115, + "step": 17892 + }, + { + "epoch": 2.85, + "learning_rate": 2.0561718267615573e-05, + "loss": 0.6836, + "step": 17893 + }, + { + "epoch": 2.85, + "learning_rate": 2.0559179375116368e-05, + "loss": 0.7075, + "step": 17894 + }, + { + "epoch": 2.85, + "learning_rate": 2.0556640529909204e-05, + "loss": 0.7303, + "step": 17895 + }, + { + "epoch": 2.85, + "learning_rate": 2.0554101732021126e-05, + "loss": 0.6683, + "step": 17896 + }, + { + "epoch": 2.85, + "learning_rate": 2.0551562981479168e-05, + "loss": 0.6565, + "step": 17897 + }, + { + "epoch": 2.85, + "learning_rate": 2.0549024278310352e-05, + "loss": 0.7287, + "step": 17898 + }, + { + "epoch": 2.85, + "learning_rate": 2.0546485622541735e-05, + "loss": 0.6884, + "step": 17899 + }, + { + "epoch": 2.85, + "learning_rate": 2.0543947014200338e-05, + "loss": 0.6737, + "step": 17900 + }, + { + "epoch": 2.85, + "learning_rate": 2.05414084533132e-05, + "loss": 0.6632, + "step": 17901 + }, + { + "epoch": 2.85, + "learning_rate": 2.0538869939907358e-05, + "loss": 0.7329, + "step": 17902 + }, + { + "epoch": 2.85, + "learning_rate": 2.053633147400984e-05, + "loss": 0.6456, + "step": 17903 + }, + { + "epoch": 2.85, + "learning_rate": 2.053379305564768e-05, + "loss": 0.6249, + "step": 17904 + }, + { + "epoch": 2.85, + "learning_rate": 2.053125468484792e-05, + "loss": 0.6878, + "step": 17905 + }, + { + "epoch": 2.85, + "learning_rate": 2.052871636163758e-05, + "loss": 0.8402, + "step": 17906 + }, + { + "epoch": 2.85, + "learning_rate": 2.052617808604369e-05, + "loss": 0.8256, + "step": 17907 + }, + { + "epoch": 2.85, + "learning_rate": 2.0523639858093295e-05, + "loss": 0.6037, + "step": 17908 + }, + { + "epoch": 2.85, + "learning_rate": 2.052110167781342e-05, + "loss": 0.7213, + "step": 17909 + }, + { + "epoch": 2.85, + "learning_rate": 2.0518563545231092e-05, + "loss": 0.638, + "step": 17910 + }, + { + "epoch": 2.85, + "learning_rate": 2.051602546037334e-05, + "loss": 0.7347, + "step": 17911 + }, + { + "epoch": 2.85, + "learning_rate": 2.051348742326719e-05, + "loss": 0.6648, + "step": 17912 + }, + { + "epoch": 2.85, + "learning_rate": 2.051094943393968e-05, + "loss": 0.6655, + "step": 17913 + }, + { + "epoch": 2.85, + "learning_rate": 2.0508411492417832e-05, + "loss": 0.6951, + "step": 17914 + }, + { + "epoch": 2.85, + "learning_rate": 2.050587359872867e-05, + "loss": 0.7625, + "step": 17915 + }, + { + "epoch": 2.85, + "learning_rate": 2.050333575289923e-05, + "loss": 0.6967, + "step": 17916 + }, + { + "epoch": 2.85, + "learning_rate": 2.0500797954956536e-05, + "loss": 0.7076, + "step": 17917 + }, + { + "epoch": 2.85, + "learning_rate": 2.0498260204927608e-05, + "loss": 0.7122, + "step": 17918 + }, + { + "epoch": 2.85, + "learning_rate": 2.049572250283948e-05, + "loss": 0.6565, + "step": 17919 + }, + { + "epoch": 2.85, + "learning_rate": 2.049318484871917e-05, + "loss": 0.6683, + "step": 17920 + }, + { + "epoch": 2.86, + "learning_rate": 2.0490647242593704e-05, + "loss": 0.6578, + "step": 17921 + }, + { + "epoch": 2.86, + "learning_rate": 2.048810968449011e-05, + "loss": 0.6594, + "step": 17922 + }, + { + "epoch": 2.86, + "learning_rate": 2.0485572174435406e-05, + "loss": 0.6867, + "step": 17923 + }, + { + "epoch": 2.86, + "learning_rate": 2.0483034712456617e-05, + "loss": 0.7388, + "step": 17924 + }, + { + "epoch": 2.86, + "learning_rate": 2.048049729858077e-05, + "loss": 0.685, + "step": 17925 + }, + { + "epoch": 2.86, + "learning_rate": 2.0477959932834885e-05, + "loss": 0.7119, + "step": 17926 + }, + { + "epoch": 2.86, + "learning_rate": 2.047542261524597e-05, + "loss": 0.6183, + "step": 17927 + }, + { + "epoch": 2.86, + "learning_rate": 2.047288534584107e-05, + "loss": 0.717, + "step": 17928 + }, + { + "epoch": 2.86, + "learning_rate": 2.0470348124647187e-05, + "loss": 0.7087, + "step": 17929 + }, + { + "epoch": 2.86, + "learning_rate": 2.0467810951691354e-05, + "loss": 0.706, + "step": 17930 + }, + { + "epoch": 2.86, + "learning_rate": 2.046527382700058e-05, + "loss": 0.7048, + "step": 17931 + }, + { + "epoch": 2.86, + "learning_rate": 2.0462736750601873e-05, + "loss": 0.712, + "step": 17932 + }, + { + "epoch": 2.86, + "learning_rate": 2.0460199722522277e-05, + "loss": 0.72, + "step": 17933 + }, + { + "epoch": 2.86, + "learning_rate": 2.0457662742788797e-05, + "loss": 0.7063, + "step": 17934 + }, + { + "epoch": 2.86, + "learning_rate": 2.0455125811428444e-05, + "loss": 0.6665, + "step": 17935 + }, + { + "epoch": 2.86, + "learning_rate": 2.045258892846825e-05, + "loss": 0.652, + "step": 17936 + }, + { + "epoch": 2.86, + "learning_rate": 2.045005209393522e-05, + "loss": 0.6431, + "step": 17937 + }, + { + "epoch": 2.86, + "learning_rate": 2.044751530785637e-05, + "loss": 0.7166, + "step": 17938 + }, + { + "epoch": 2.86, + "learning_rate": 2.0444978570258723e-05, + "loss": 0.7104, + "step": 17939 + }, + { + "epoch": 2.86, + "learning_rate": 2.0442441881169288e-05, + "loss": 0.6895, + "step": 17940 + }, + { + "epoch": 2.86, + "learning_rate": 2.0439905240615075e-05, + "loss": 0.712, + "step": 17941 + }, + { + "epoch": 2.86, + "learning_rate": 2.0437368648623107e-05, + "loss": 0.7249, + "step": 17942 + }, + { + "epoch": 2.86, + "learning_rate": 2.0434832105220395e-05, + "loss": 0.6484, + "step": 17943 + }, + { + "epoch": 2.86, + "learning_rate": 2.0432295610433945e-05, + "loss": 0.6467, + "step": 17944 + }, + { + "epoch": 2.86, + "learning_rate": 2.0429759164290775e-05, + "loss": 0.6921, + "step": 17945 + }, + { + "epoch": 2.86, + "learning_rate": 2.0427222766817897e-05, + "loss": 0.6277, + "step": 17946 + }, + { + "epoch": 2.86, + "learning_rate": 2.0424686418042316e-05, + "loss": 0.6632, + "step": 17947 + }, + { + "epoch": 2.86, + "learning_rate": 2.042215011799105e-05, + "loss": 0.7323, + "step": 17948 + }, + { + "epoch": 2.86, + "learning_rate": 2.0419613866691107e-05, + "loss": 0.703, + "step": 17949 + }, + { + "epoch": 2.86, + "learning_rate": 2.041707766416949e-05, + "loss": 0.6123, + "step": 17950 + }, + { + "epoch": 2.86, + "learning_rate": 2.041454151045323e-05, + "loss": 0.6243, + "step": 17951 + }, + { + "epoch": 2.86, + "learning_rate": 2.0412005405569303e-05, + "loss": 0.7668, + "step": 17952 + }, + { + "epoch": 2.86, + "learning_rate": 2.0409469349544738e-05, + "loss": 0.6443, + "step": 17953 + }, + { + "epoch": 2.86, + "learning_rate": 2.040693334240654e-05, + "loss": 0.6868, + "step": 17954 + }, + { + "epoch": 2.86, + "learning_rate": 2.0404397384181706e-05, + "loss": 0.6856, + "step": 17955 + }, + { + "epoch": 2.86, + "learning_rate": 2.0401861474897254e-05, + "loss": 0.7214, + "step": 17956 + }, + { + "epoch": 2.86, + "learning_rate": 2.0399325614580186e-05, + "loss": 0.6929, + "step": 17957 + }, + { + "epoch": 2.86, + "learning_rate": 2.0396789803257503e-05, + "loss": 0.7499, + "step": 17958 + }, + { + "epoch": 2.86, + "learning_rate": 2.0394254040956218e-05, + "loss": 0.6369, + "step": 17959 + }, + { + "epoch": 2.86, + "learning_rate": 2.039171832770333e-05, + "loss": 0.6824, + "step": 17960 + }, + { + "epoch": 2.86, + "learning_rate": 2.0389182663525843e-05, + "loss": 0.7146, + "step": 17961 + }, + { + "epoch": 2.86, + "learning_rate": 2.0386647048450763e-05, + "loss": 0.7884, + "step": 17962 + }, + { + "epoch": 2.86, + "learning_rate": 2.0384111482505095e-05, + "loss": 0.6853, + "step": 17963 + }, + { + "epoch": 2.86, + "learning_rate": 2.0381575965715826e-05, + "loss": 0.7198, + "step": 17964 + }, + { + "epoch": 2.86, + "learning_rate": 2.037904049810998e-05, + "loss": 0.737, + "step": 17965 + }, + { + "epoch": 2.86, + "learning_rate": 2.0376505079714546e-05, + "loss": 0.7281, + "step": 17966 + }, + { + "epoch": 2.86, + "learning_rate": 2.037396971055652e-05, + "loss": 0.6974, + "step": 17967 + }, + { + "epoch": 2.86, + "learning_rate": 2.037143439066291e-05, + "loss": 0.6536, + "step": 17968 + }, + { + "epoch": 2.86, + "learning_rate": 2.0368899120060718e-05, + "loss": 0.6762, + "step": 17969 + }, + { + "epoch": 2.86, + "learning_rate": 2.0366363898776934e-05, + "loss": 0.7005, + "step": 17970 + }, + { + "epoch": 2.86, + "learning_rate": 2.0363828726838575e-05, + "loss": 0.639, + "step": 17971 + }, + { + "epoch": 2.86, + "learning_rate": 2.036129360427261e-05, + "loss": 0.7199, + "step": 17972 + }, + { + "epoch": 2.86, + "learning_rate": 2.0358758531106057e-05, + "loss": 0.6283, + "step": 17973 + }, + { + "epoch": 2.86, + "learning_rate": 2.0356223507365908e-05, + "loss": 0.7017, + "step": 17974 + }, + { + "epoch": 2.86, + "learning_rate": 2.035368853307915e-05, + "loss": 0.6921, + "step": 17975 + }, + { + "epoch": 2.86, + "learning_rate": 2.03511536082728e-05, + "loss": 0.7194, + "step": 17976 + }, + { + "epoch": 2.86, + "learning_rate": 2.034861873297384e-05, + "loss": 0.6248, + "step": 17977 + }, + { + "epoch": 2.86, + "learning_rate": 2.034608390720926e-05, + "loss": 0.6942, + "step": 17978 + }, + { + "epoch": 2.86, + "learning_rate": 2.0343549131006066e-05, + "loss": 0.7056, + "step": 17979 + }, + { + "epoch": 2.86, + "learning_rate": 2.0341014404391248e-05, + "loss": 0.6597, + "step": 17980 + }, + { + "epoch": 2.86, + "learning_rate": 2.033847972739179e-05, + "loss": 0.6616, + "step": 17981 + }, + { + "epoch": 2.86, + "learning_rate": 2.0335945100034702e-05, + "loss": 0.6661, + "step": 17982 + }, + { + "epoch": 2.86, + "learning_rate": 2.0333410522346965e-05, + "loss": 0.6659, + "step": 17983 + }, + { + "epoch": 2.87, + "learning_rate": 2.033087599435557e-05, + "loss": 0.676, + "step": 17984 + }, + { + "epoch": 2.87, + "learning_rate": 2.0328341516087514e-05, + "loss": 0.6779, + "step": 17985 + }, + { + "epoch": 2.87, + "learning_rate": 2.032580708756978e-05, + "loss": 0.6523, + "step": 17986 + }, + { + "epoch": 2.87, + "learning_rate": 2.0323272708829367e-05, + "loss": 0.6833, + "step": 17987 + }, + { + "epoch": 2.87, + "learning_rate": 2.0320738379893258e-05, + "loss": 0.6296, + "step": 17988 + }, + { + "epoch": 2.87, + "learning_rate": 2.031820410078845e-05, + "loss": 0.7007, + "step": 17989 + }, + { + "epoch": 2.87, + "learning_rate": 2.031566987154192e-05, + "loss": 0.6148, + "step": 17990 + }, + { + "epoch": 2.87, + "learning_rate": 2.0313135692180674e-05, + "loss": 0.6163, + "step": 17991 + }, + { + "epoch": 2.87, + "learning_rate": 2.0310601562731672e-05, + "loss": 0.7697, + "step": 17992 + }, + { + "epoch": 2.87, + "learning_rate": 2.030806748322192e-05, + "loss": 0.677, + "step": 17993 + }, + { + "epoch": 2.87, + "learning_rate": 2.0305533453678406e-05, + "loss": 0.6376, + "step": 17994 + }, + { + "epoch": 2.87, + "learning_rate": 2.0302999474128104e-05, + "loss": 0.7586, + "step": 17995 + }, + { + "epoch": 2.87, + "learning_rate": 2.0300465544598008e-05, + "loss": 0.6548, + "step": 17996 + }, + { + "epoch": 2.87, + "learning_rate": 2.0297931665115104e-05, + "loss": 0.7087, + "step": 17997 + }, + { + "epoch": 2.87, + "learning_rate": 2.0295397835706366e-05, + "loss": 0.7062, + "step": 17998 + }, + { + "epoch": 2.87, + "learning_rate": 2.0292864056398793e-05, + "loss": 0.667, + "step": 17999 + }, + { + "epoch": 2.87, + "learning_rate": 2.0290330327219358e-05, + "loss": 0.6744, + "step": 18000 + }, + { + "epoch": 2.87, + "learning_rate": 2.0287796648195044e-05, + "loss": 0.6302, + "step": 18001 + }, + { + "epoch": 2.87, + "learning_rate": 2.0285263019352832e-05, + "loss": 0.7839, + "step": 18002 + }, + { + "epoch": 2.87, + "learning_rate": 2.028272944071971e-05, + "loss": 0.7345, + "step": 18003 + }, + { + "epoch": 2.87, + "learning_rate": 2.028019591232266e-05, + "loss": 0.6938, + "step": 18004 + }, + { + "epoch": 2.87, + "learning_rate": 2.0277662434188645e-05, + "loss": 0.6982, + "step": 18005 + }, + { + "epoch": 2.87, + "learning_rate": 2.027512900634467e-05, + "loss": 0.6877, + "step": 18006 + }, + { + "epoch": 2.87, + "learning_rate": 2.02725956288177e-05, + "loss": 0.6861, + "step": 18007 + }, + { + "epoch": 2.87, + "learning_rate": 2.027006230163471e-05, + "loss": 0.7574, + "step": 18008 + }, + { + "epoch": 2.87, + "learning_rate": 2.026752902482269e-05, + "loss": 0.7078, + "step": 18009 + }, + { + "epoch": 2.87, + "learning_rate": 2.0264995798408617e-05, + "loss": 0.6798, + "step": 18010 + }, + { + "epoch": 2.87, + "learning_rate": 2.0262462622419468e-05, + "loss": 0.6754, + "step": 18011 + }, + { + "epoch": 2.87, + "learning_rate": 2.02599294968822e-05, + "loss": 0.6982, + "step": 18012 + }, + { + "epoch": 2.87, + "learning_rate": 2.0257396421823816e-05, + "loss": 0.7745, + "step": 18013 + }, + { + "epoch": 2.87, + "learning_rate": 2.025486339727128e-05, + "loss": 0.635, + "step": 18014 + }, + { + "epoch": 2.87, + "learning_rate": 2.0252330423251567e-05, + "loss": 0.7097, + "step": 18015 + }, + { + "epoch": 2.87, + "learning_rate": 2.0249797499791647e-05, + "loss": 0.6538, + "step": 18016 + }, + { + "epoch": 2.87, + "learning_rate": 2.0247264626918507e-05, + "loss": 0.7356, + "step": 18017 + }, + { + "epoch": 2.87, + "learning_rate": 2.0244731804659112e-05, + "loss": 0.6625, + "step": 18018 + }, + { + "epoch": 2.87, + "learning_rate": 2.024219903304043e-05, + "loss": 0.6566, + "step": 18019 + }, + { + "epoch": 2.87, + "learning_rate": 2.0239666312089444e-05, + "loss": 0.6676, + "step": 18020 + }, + { + "epoch": 2.87, + "learning_rate": 2.0237133641833124e-05, + "loss": 0.7917, + "step": 18021 + }, + { + "epoch": 2.87, + "learning_rate": 2.0234601022298434e-05, + "loss": 0.7707, + "step": 18022 + }, + { + "epoch": 2.87, + "learning_rate": 2.0232068453512355e-05, + "loss": 0.6739, + "step": 18023 + }, + { + "epoch": 2.87, + "learning_rate": 2.022953593550185e-05, + "loss": 0.7511, + "step": 18024 + }, + { + "epoch": 2.87, + "learning_rate": 2.0227003468293887e-05, + "loss": 0.5906, + "step": 18025 + }, + { + "epoch": 2.87, + "learning_rate": 2.022447105191545e-05, + "loss": 0.6779, + "step": 18026 + }, + { + "epoch": 2.87, + "learning_rate": 2.022193868639349e-05, + "loss": 0.6681, + "step": 18027 + }, + { + "epoch": 2.87, + "learning_rate": 2.021940637175498e-05, + "loss": 0.69, + "step": 18028 + }, + { + "epoch": 2.87, + "learning_rate": 2.0216874108026894e-05, + "loss": 0.6947, + "step": 18029 + }, + { + "epoch": 2.87, + "learning_rate": 2.0214341895236192e-05, + "loss": 0.6751, + "step": 18030 + }, + { + "epoch": 2.87, + "learning_rate": 2.0211809733409854e-05, + "loss": 0.7285, + "step": 18031 + }, + { + "epoch": 2.87, + "learning_rate": 2.0209277622574827e-05, + "loss": 0.7664, + "step": 18032 + }, + { + "epoch": 2.87, + "learning_rate": 2.020674556275808e-05, + "loss": 0.6656, + "step": 18033 + }, + { + "epoch": 2.87, + "learning_rate": 2.0204213553986586e-05, + "loss": 0.8072, + "step": 18034 + }, + { + "epoch": 2.87, + "learning_rate": 2.020168159628731e-05, + "loss": 0.6323, + "step": 18035 + }, + { + "epoch": 2.87, + "learning_rate": 2.0199149689687204e-05, + "loss": 0.7122, + "step": 18036 + }, + { + "epoch": 2.87, + "learning_rate": 2.0196617834213248e-05, + "loss": 0.6462, + "step": 18037 + }, + { + "epoch": 2.87, + "learning_rate": 2.019408602989239e-05, + "loss": 0.6436, + "step": 18038 + }, + { + "epoch": 2.87, + "learning_rate": 2.0191554276751596e-05, + "loss": 0.6899, + "step": 18039 + }, + { + "epoch": 2.87, + "learning_rate": 2.0189022574817835e-05, + "loss": 0.7059, + "step": 18040 + }, + { + "epoch": 2.87, + "learning_rate": 2.018649092411806e-05, + "loss": 0.6765, + "step": 18041 + }, + { + "epoch": 2.87, + "learning_rate": 2.0183959324679234e-05, + "loss": 0.7569, + "step": 18042 + }, + { + "epoch": 2.87, + "learning_rate": 2.0181427776528324e-05, + "loss": 0.6453, + "step": 18043 + }, + { + "epoch": 2.87, + "learning_rate": 2.0178896279692278e-05, + "loss": 0.7028, + "step": 18044 + }, + { + "epoch": 2.87, + "learning_rate": 2.0176364834198054e-05, + "loss": 0.6942, + "step": 18045 + }, + { + "epoch": 2.87, + "learning_rate": 2.017383344007262e-05, + "loss": 0.6234, + "step": 18046 + }, + { + "epoch": 2.88, + "learning_rate": 2.0171302097342934e-05, + "loss": 0.6578, + "step": 18047 + }, + { + "epoch": 2.88, + "learning_rate": 2.0168770806035943e-05, + "loss": 0.6568, + "step": 18048 + }, + { + "epoch": 2.88, + "learning_rate": 2.0166239566178615e-05, + "loss": 0.6521, + "step": 18049 + }, + { + "epoch": 2.88, + "learning_rate": 2.01637083777979e-05, + "loss": 0.7065, + "step": 18050 + }, + { + "epoch": 2.88, + "learning_rate": 2.0161177240920747e-05, + "loss": 0.6361, + "step": 18051 + }, + { + "epoch": 2.88, + "learning_rate": 2.015864615557414e-05, + "loss": 0.6972, + "step": 18052 + }, + { + "epoch": 2.88, + "learning_rate": 2.015611512178499e-05, + "loss": 0.7263, + "step": 18053 + }, + { + "epoch": 2.88, + "learning_rate": 2.0153584139580285e-05, + "loss": 0.6785, + "step": 18054 + }, + { + "epoch": 2.88, + "learning_rate": 2.0151053208986964e-05, + "loss": 0.6666, + "step": 18055 + }, + { + "epoch": 2.88, + "learning_rate": 2.0148522330031977e-05, + "loss": 0.6737, + "step": 18056 + }, + { + "epoch": 2.88, + "learning_rate": 2.0145991502742283e-05, + "loss": 0.6772, + "step": 18057 + }, + { + "epoch": 2.88, + "learning_rate": 2.0143460727144837e-05, + "loss": 0.6761, + "step": 18058 + }, + { + "epoch": 2.88, + "learning_rate": 2.014093000326658e-05, + "loss": 0.7397, + "step": 18059 + }, + { + "epoch": 2.88, + "learning_rate": 2.0138399331134474e-05, + "loss": 0.6108, + "step": 18060 + }, + { + "epoch": 2.88, + "learning_rate": 2.0135868710775464e-05, + "loss": 0.7136, + "step": 18061 + }, + { + "epoch": 2.88, + "learning_rate": 2.0133338142216493e-05, + "loss": 0.7179, + "step": 18062 + }, + { + "epoch": 2.88, + "learning_rate": 2.013080762548452e-05, + "loss": 0.7077, + "step": 18063 + }, + { + "epoch": 2.88, + "learning_rate": 2.0128277160606492e-05, + "loss": 0.6863, + "step": 18064 + }, + { + "epoch": 2.88, + "learning_rate": 2.0125746747609347e-05, + "loss": 0.6737, + "step": 18065 + }, + { + "epoch": 2.88, + "learning_rate": 2.012321638652005e-05, + "loss": 0.7542, + "step": 18066 + }, + { + "epoch": 2.88, + "learning_rate": 2.0120686077365533e-05, + "loss": 0.7046, + "step": 18067 + }, + { + "epoch": 2.88, + "learning_rate": 2.011815582017274e-05, + "loss": 0.7354, + "step": 18068 + }, + { + "epoch": 2.88, + "learning_rate": 2.0115625614968633e-05, + "loss": 0.6956, + "step": 18069 + }, + { + "epoch": 2.88, + "learning_rate": 2.0113095461780145e-05, + "loss": 0.6634, + "step": 18070 + }, + { + "epoch": 2.88, + "learning_rate": 2.0110565360634223e-05, + "loss": 0.6517, + "step": 18071 + }, + { + "epoch": 2.88, + "learning_rate": 2.0108035311557824e-05, + "loss": 0.693, + "step": 18072 + }, + { + "epoch": 2.88, + "learning_rate": 2.010550531457786e-05, + "loss": 0.6685, + "step": 18073 + }, + { + "epoch": 2.88, + "learning_rate": 2.0102975369721305e-05, + "loss": 0.6995, + "step": 18074 + }, + { + "epoch": 2.88, + "learning_rate": 2.0100445477015083e-05, + "loss": 0.6871, + "step": 18075 + }, + { + "epoch": 2.88, + "learning_rate": 2.009791563648614e-05, + "loss": 0.6891, + "step": 18076 + }, + { + "epoch": 2.88, + "learning_rate": 2.0095385848161425e-05, + "loss": 0.6519, + "step": 18077 + }, + { + "epoch": 2.88, + "learning_rate": 2.0092856112067875e-05, + "loss": 0.6653, + "step": 18078 + }, + { + "epoch": 2.88, + "learning_rate": 2.009032642823242e-05, + "loss": 0.753, + "step": 18079 + }, + { + "epoch": 2.88, + "learning_rate": 2.008779679668201e-05, + "loss": 0.6625, + "step": 18080 + }, + { + "epoch": 2.88, + "learning_rate": 2.008526721744359e-05, + "loss": 0.6116, + "step": 18081 + }, + { + "epoch": 2.88, + "learning_rate": 2.0082737690544076e-05, + "loss": 0.5915, + "step": 18082 + }, + { + "epoch": 2.88, + "learning_rate": 2.008020821601043e-05, + "loss": 0.668, + "step": 18083 + }, + { + "epoch": 2.88, + "learning_rate": 2.0077678793869577e-05, + "loss": 0.7451, + "step": 18084 + }, + { + "epoch": 2.88, + "learning_rate": 2.0075149424148453e-05, + "loss": 0.6578, + "step": 18085 + }, + { + "epoch": 2.88, + "learning_rate": 2.0072620106874006e-05, + "loss": 0.6065, + "step": 18086 + }, + { + "epoch": 2.88, + "learning_rate": 2.0070090842073158e-05, + "loss": 0.7039, + "step": 18087 + }, + { + "epoch": 2.88, + "learning_rate": 2.006756162977285e-05, + "loss": 0.698, + "step": 18088 + }, + { + "epoch": 2.88, + "learning_rate": 2.0065032470000018e-05, + "loss": 0.7959, + "step": 18089 + }, + { + "epoch": 2.88, + "learning_rate": 2.0062503362781594e-05, + "loss": 0.7354, + "step": 18090 + }, + { + "epoch": 2.88, + "learning_rate": 2.0059974308144504e-05, + "loss": 0.6258, + "step": 18091 + }, + { + "epoch": 2.88, + "learning_rate": 2.0057445306115707e-05, + "loss": 0.7416, + "step": 18092 + }, + { + "epoch": 2.88, + "learning_rate": 2.0054916356722102e-05, + "loss": 0.767, + "step": 18093 + }, + { + "epoch": 2.88, + "learning_rate": 2.005238745999064e-05, + "loss": 0.6905, + "step": 18094 + }, + { + "epoch": 2.88, + "learning_rate": 2.004985861594825e-05, + "loss": 0.6647, + "step": 18095 + }, + { + "epoch": 2.88, + "learning_rate": 2.004732982462185e-05, + "loss": 0.7203, + "step": 18096 + }, + { + "epoch": 2.88, + "learning_rate": 2.004480108603839e-05, + "loss": 0.7035, + "step": 18097 + }, + { + "epoch": 2.88, + "learning_rate": 2.004227240022479e-05, + "loss": 0.6557, + "step": 18098 + }, + { + "epoch": 2.88, + "learning_rate": 2.0039743767207968e-05, + "loss": 0.6737, + "step": 18099 + }, + { + "epoch": 2.88, + "learning_rate": 2.0037215187014873e-05, + "loss": 0.69, + "step": 18100 + }, + { + "epoch": 2.88, + "learning_rate": 2.0034686659672425e-05, + "loss": 0.6453, + "step": 18101 + }, + { + "epoch": 2.88, + "learning_rate": 2.003215818520754e-05, + "loss": 0.714, + "step": 18102 + }, + { + "epoch": 2.88, + "learning_rate": 2.0029629763647166e-05, + "loss": 0.6822, + "step": 18103 + }, + { + "epoch": 2.88, + "learning_rate": 2.0027101395018212e-05, + "loss": 0.6538, + "step": 18104 + }, + { + "epoch": 2.88, + "learning_rate": 2.0024573079347604e-05, + "loss": 0.6241, + "step": 18105 + }, + { + "epoch": 2.88, + "learning_rate": 2.002204481666228e-05, + "loss": 0.6678, + "step": 18106 + }, + { + "epoch": 2.88, + "learning_rate": 2.0019516606989154e-05, + "loss": 0.6303, + "step": 18107 + }, + { + "epoch": 2.88, + "learning_rate": 2.0016988450355155e-05, + "loss": 0.7487, + "step": 18108 + }, + { + "epoch": 2.88, + "learning_rate": 2.0014460346787198e-05, + "loss": 0.6418, + "step": 18109 + }, + { + "epoch": 2.89, + "learning_rate": 2.0011932296312215e-05, + "loss": 0.6767, + "step": 18110 + }, + { + "epoch": 2.89, + "learning_rate": 2.0009404298957124e-05, + "loss": 0.6949, + "step": 18111 + }, + { + "epoch": 2.89, + "learning_rate": 2.0006876354748854e-05, + "loss": 0.6557, + "step": 18112 + }, + { + "epoch": 2.89, + "learning_rate": 2.000434846371431e-05, + "loss": 0.6553, + "step": 18113 + }, + { + "epoch": 2.89, + "learning_rate": 2.0001820625880427e-05, + "loss": 0.6882, + "step": 18114 + }, + { + "epoch": 2.89, + "learning_rate": 1.999929284127412e-05, + "loss": 0.7063, + "step": 18115 + }, + { + "epoch": 2.89, + "learning_rate": 1.99967651099223e-05, + "loss": 0.6833, + "step": 18116 + }, + { + "epoch": 2.89, + "learning_rate": 1.99942374318519e-05, + "loss": 0.7176, + "step": 18117 + }, + { + "epoch": 2.89, + "learning_rate": 1.9991709807089836e-05, + "loss": 0.6785, + "step": 18118 + }, + { + "epoch": 2.89, + "learning_rate": 1.9989182235663013e-05, + "loss": 0.6887, + "step": 18119 + }, + { + "epoch": 2.89, + "learning_rate": 1.9986654717598365e-05, + "loss": 0.6547, + "step": 18120 + }, + { + "epoch": 2.89, + "learning_rate": 1.99841272529228e-05, + "loss": 0.6679, + "step": 18121 + }, + { + "epoch": 2.89, + "learning_rate": 1.9981599841663235e-05, + "loss": 0.6248, + "step": 18122 + }, + { + "epoch": 2.89, + "learning_rate": 1.9979072483846578e-05, + "loss": 0.6681, + "step": 18123 + }, + { + "epoch": 2.89, + "learning_rate": 1.9976545179499757e-05, + "loss": 0.7365, + "step": 18124 + }, + { + "epoch": 2.89, + "learning_rate": 1.9974017928649676e-05, + "loss": 0.5602, + "step": 18125 + }, + { + "epoch": 2.89, + "learning_rate": 1.9971490731323252e-05, + "loss": 0.6453, + "step": 18126 + }, + { + "epoch": 2.89, + "learning_rate": 1.9968963587547403e-05, + "loss": 0.6597, + "step": 18127 + }, + { + "epoch": 2.89, + "learning_rate": 1.996643649734904e-05, + "loss": 0.7389, + "step": 18128 + }, + { + "epoch": 2.89, + "learning_rate": 1.9963909460755063e-05, + "loss": 0.6917, + "step": 18129 + }, + { + "epoch": 2.89, + "learning_rate": 1.99613824777924e-05, + "loss": 0.6763, + "step": 18130 + }, + { + "epoch": 2.89, + "learning_rate": 1.9958855548487955e-05, + "loss": 0.7075, + "step": 18131 + }, + { + "epoch": 2.89, + "learning_rate": 1.9956328672868642e-05, + "loss": 0.7176, + "step": 18132 + }, + { + "epoch": 2.89, + "learning_rate": 1.995380185096136e-05, + "loss": 0.7312, + "step": 18133 + }, + { + "epoch": 2.89, + "learning_rate": 1.995127508279302e-05, + "loss": 0.6521, + "step": 18134 + }, + { + "epoch": 2.89, + "learning_rate": 1.9948748368390542e-05, + "loss": 0.7781, + "step": 18135 + }, + { + "epoch": 2.89, + "learning_rate": 1.9946221707780825e-05, + "loss": 0.7475, + "step": 18136 + }, + { + "epoch": 2.89, + "learning_rate": 1.994369510099078e-05, + "loss": 0.6873, + "step": 18137 + }, + { + "epoch": 2.89, + "learning_rate": 1.994116854804731e-05, + "loss": 0.6602, + "step": 18138 + }, + { + "epoch": 2.89, + "learning_rate": 1.9938642048977323e-05, + "loss": 0.6613, + "step": 18139 + }, + { + "epoch": 2.89, + "learning_rate": 1.9936115603807726e-05, + "loss": 0.6505, + "step": 18140 + }, + { + "epoch": 2.89, + "learning_rate": 1.9933589212565427e-05, + "loss": 0.6744, + "step": 18141 + }, + { + "epoch": 2.89, + "learning_rate": 1.9931062875277328e-05, + "loss": 0.6453, + "step": 18142 + }, + { + "epoch": 2.89, + "learning_rate": 1.992853659197032e-05, + "loss": 0.6432, + "step": 18143 + }, + { + "epoch": 2.89, + "learning_rate": 1.9926010362671328e-05, + "loss": 0.6877, + "step": 18144 + }, + { + "epoch": 2.89, + "learning_rate": 1.992348418740725e-05, + "loss": 0.644, + "step": 18145 + }, + { + "epoch": 2.89, + "learning_rate": 1.992095806620497e-05, + "loss": 0.7395, + "step": 18146 + }, + { + "epoch": 2.89, + "learning_rate": 1.9918431999091415e-05, + "loss": 0.6557, + "step": 18147 + }, + { + "epoch": 2.89, + "learning_rate": 1.991590598609347e-05, + "loss": 0.7026, + "step": 18148 + }, + { + "epoch": 2.89, + "learning_rate": 1.9913380027238035e-05, + "loss": 0.7222, + "step": 18149 + }, + { + "epoch": 2.89, + "learning_rate": 1.991085412255202e-05, + "loss": 0.6471, + "step": 18150 + }, + { + "epoch": 2.89, + "learning_rate": 1.9908328272062318e-05, + "loss": 0.6983, + "step": 18151 + }, + { + "epoch": 2.89, + "learning_rate": 1.9905802475795836e-05, + "loss": 0.8328, + "step": 18152 + }, + { + "epoch": 2.89, + "learning_rate": 1.990327673377946e-05, + "loss": 0.7161, + "step": 18153 + }, + { + "epoch": 2.89, + "learning_rate": 1.9900751046040083e-05, + "loss": 0.6242, + "step": 18154 + }, + { + "epoch": 2.89, + "learning_rate": 1.989822541260462e-05, + "loss": 0.6414, + "step": 18155 + }, + { + "epoch": 2.89, + "learning_rate": 1.989569983349996e-05, + "loss": 0.6835, + "step": 18156 + }, + { + "epoch": 2.89, + "learning_rate": 1.989317430875299e-05, + "loss": 0.6383, + "step": 18157 + }, + { + "epoch": 2.89, + "learning_rate": 1.9890648838390623e-05, + "loss": 0.6863, + "step": 18158 + }, + { + "epoch": 2.89, + "learning_rate": 1.988812342243974e-05, + "loss": 0.6658, + "step": 18159 + }, + { + "epoch": 2.89, + "learning_rate": 1.9885598060927232e-05, + "loss": 0.7312, + "step": 18160 + }, + { + "epoch": 2.89, + "learning_rate": 1.9883072753880007e-05, + "loss": 0.6616, + "step": 18161 + }, + { + "epoch": 2.89, + "learning_rate": 1.9880547501324953e-05, + "loss": 0.6625, + "step": 18162 + }, + { + "epoch": 2.89, + "learning_rate": 1.987802230328895e-05, + "loss": 0.6129, + "step": 18163 + }, + { + "epoch": 2.89, + "learning_rate": 1.987549715979891e-05, + "loss": 0.7152, + "step": 18164 + }, + { + "epoch": 2.89, + "learning_rate": 1.9872972070881714e-05, + "loss": 0.6391, + "step": 18165 + }, + { + "epoch": 2.89, + "learning_rate": 1.9870447036564245e-05, + "loss": 0.6824, + "step": 18166 + }, + { + "epoch": 2.89, + "learning_rate": 1.9867922056873407e-05, + "loss": 0.7082, + "step": 18167 + }, + { + "epoch": 2.89, + "learning_rate": 1.986539713183608e-05, + "loss": 0.7497, + "step": 18168 + }, + { + "epoch": 2.89, + "learning_rate": 1.9862872261479155e-05, + "loss": 0.7791, + "step": 18169 + }, + { + "epoch": 2.89, + "learning_rate": 1.9860347445829526e-05, + "loss": 0.6956, + "step": 18170 + }, + { + "epoch": 2.89, + "learning_rate": 1.9857822684914075e-05, + "loss": 0.6304, + "step": 18171 + }, + { + "epoch": 2.9, + "learning_rate": 1.9855297978759684e-05, + "loss": 0.6265, + "step": 18172 + }, + { + "epoch": 2.9, + "learning_rate": 1.9852773327393264e-05, + "loss": 0.666, + "step": 18173 + }, + { + "epoch": 2.9, + "learning_rate": 1.985024873084167e-05, + "loss": 0.8177, + "step": 18174 + }, + { + "epoch": 2.9, + "learning_rate": 1.9847724189131804e-05, + "loss": 0.6345, + "step": 18175 + }, + { + "epoch": 2.9, + "learning_rate": 1.9845199702290546e-05, + "loss": 0.6799, + "step": 18176 + }, + { + "epoch": 2.9, + "learning_rate": 1.9842675270344776e-05, + "loss": 0.6952, + "step": 18177 + }, + { + "epoch": 2.9, + "learning_rate": 1.984015089332139e-05, + "loss": 0.6698, + "step": 18178 + }, + { + "epoch": 2.9, + "learning_rate": 1.9837626571247265e-05, + "loss": 0.7079, + "step": 18179 + }, + { + "epoch": 2.9, + "learning_rate": 1.9835102304149275e-05, + "loss": 0.7255, + "step": 18180 + }, + { + "epoch": 2.9, + "learning_rate": 1.9832578092054315e-05, + "loss": 0.7361, + "step": 18181 + }, + { + "epoch": 2.9, + "learning_rate": 1.9830053934989262e-05, + "loss": 0.6738, + "step": 18182 + }, + { + "epoch": 2.9, + "learning_rate": 1.982752983298099e-05, + "loss": 0.7097, + "step": 18183 + }, + { + "epoch": 2.9, + "learning_rate": 1.9825005786056395e-05, + "loss": 0.6981, + "step": 18184 + }, + { + "epoch": 2.9, + "learning_rate": 1.9822481794242344e-05, + "loss": 0.6182, + "step": 18185 + }, + { + "epoch": 2.9, + "learning_rate": 1.981995785756571e-05, + "loss": 0.6937, + "step": 18186 + }, + { + "epoch": 2.9, + "learning_rate": 1.9817433976053387e-05, + "loss": 0.6515, + "step": 18187 + }, + { + "epoch": 2.9, + "learning_rate": 1.9814910149732247e-05, + "loss": 0.675, + "step": 18188 + }, + { + "epoch": 2.9, + "learning_rate": 1.981238637862916e-05, + "loss": 0.6659, + "step": 18189 + }, + { + "epoch": 2.9, + "learning_rate": 1.9809862662771014e-05, + "loss": 0.6518, + "step": 18190 + }, + { + "epoch": 2.9, + "learning_rate": 1.980733900218468e-05, + "loss": 0.6928, + "step": 18191 + }, + { + "epoch": 2.9, + "learning_rate": 1.9804815396897025e-05, + "loss": 0.7077, + "step": 18192 + }, + { + "epoch": 2.9, + "learning_rate": 1.980229184693495e-05, + "loss": 0.665, + "step": 18193 + }, + { + "epoch": 2.9, + "learning_rate": 1.9799768352325298e-05, + "loss": 0.6895, + "step": 18194 + }, + { + "epoch": 2.9, + "learning_rate": 1.979724491309496e-05, + "loss": 0.5961, + "step": 18195 + }, + { + "epoch": 2.9, + "learning_rate": 1.9794721529270803e-05, + "loss": 0.6319, + "step": 18196 + }, + { + "epoch": 2.9, + "learning_rate": 1.9792198200879696e-05, + "loss": 0.7026, + "step": 18197 + }, + { + "epoch": 2.9, + "learning_rate": 1.978967492794852e-05, + "loss": 0.6358, + "step": 18198 + }, + { + "epoch": 2.9, + "learning_rate": 1.9787151710504147e-05, + "loss": 0.7572, + "step": 18199 + }, + { + "epoch": 2.9, + "learning_rate": 1.9784628548573435e-05, + "loss": 0.7129, + "step": 18200 + }, + { + "epoch": 2.9, + "learning_rate": 1.9782105442183272e-05, + "loss": 0.6248, + "step": 18201 + }, + { + "epoch": 2.9, + "learning_rate": 1.977958239136051e-05, + "loss": 0.6606, + "step": 18202 + }, + { + "epoch": 2.9, + "learning_rate": 1.977705939613203e-05, + "loss": 0.6947, + "step": 18203 + }, + { + "epoch": 2.9, + "learning_rate": 1.9774536456524697e-05, + "loss": 0.6444, + "step": 18204 + }, + { + "epoch": 2.9, + "learning_rate": 1.977201357256538e-05, + "loss": 0.7091, + "step": 18205 + }, + { + "epoch": 2.9, + "learning_rate": 1.976949074428094e-05, + "loss": 0.6718, + "step": 18206 + }, + { + "epoch": 2.9, + "learning_rate": 1.9766967971698252e-05, + "loss": 0.6175, + "step": 18207 + }, + { + "epoch": 2.9, + "learning_rate": 1.9764445254844177e-05, + "loss": 0.6911, + "step": 18208 + }, + { + "epoch": 2.9, + "learning_rate": 1.9761922593745578e-05, + "loss": 0.7061, + "step": 18209 + }, + { + "epoch": 2.9, + "learning_rate": 1.975939998842933e-05, + "loss": 0.6209, + "step": 18210 + }, + { + "epoch": 2.9, + "learning_rate": 1.9756877438922286e-05, + "loss": 0.7367, + "step": 18211 + }, + { + "epoch": 2.9, + "learning_rate": 1.975435494525131e-05, + "loss": 0.6526, + "step": 18212 + }, + { + "epoch": 2.9, + "learning_rate": 1.9751832507443287e-05, + "loss": 0.6716, + "step": 18213 + }, + { + "epoch": 2.9, + "learning_rate": 1.9749310125525046e-05, + "loss": 0.6853, + "step": 18214 + }, + { + "epoch": 2.9, + "learning_rate": 1.9746787799523465e-05, + "loss": 0.689, + "step": 18215 + }, + { + "epoch": 2.9, + "learning_rate": 1.9744265529465412e-05, + "loss": 0.7218, + "step": 18216 + }, + { + "epoch": 2.9, + "learning_rate": 1.974174331537773e-05, + "loss": 0.6905, + "step": 18217 + }, + { + "epoch": 2.9, + "learning_rate": 1.9739221157287296e-05, + "loss": 0.6517, + "step": 18218 + }, + { + "epoch": 2.9, + "learning_rate": 1.9736699055220962e-05, + "loss": 0.6924, + "step": 18219 + }, + { + "epoch": 2.9, + "learning_rate": 1.9734177009205583e-05, + "loss": 0.6641, + "step": 18220 + }, + { + "epoch": 2.9, + "learning_rate": 1.9731655019268024e-05, + "loss": 0.6701, + "step": 18221 + }, + { + "epoch": 2.9, + "learning_rate": 1.972913308543515e-05, + "loss": 0.7186, + "step": 18222 + }, + { + "epoch": 2.9, + "learning_rate": 1.9726611207733792e-05, + "loss": 0.7351, + "step": 18223 + }, + { + "epoch": 2.9, + "learning_rate": 1.9724089386190835e-05, + "loss": 0.7552, + "step": 18224 + }, + { + "epoch": 2.9, + "learning_rate": 1.972156762083312e-05, + "loss": 0.6946, + "step": 18225 + }, + { + "epoch": 2.9, + "learning_rate": 1.9719045911687506e-05, + "loss": 0.7192, + "step": 18226 + }, + { + "epoch": 2.9, + "learning_rate": 1.9716524258780843e-05, + "loss": 0.7145, + "step": 18227 + }, + { + "epoch": 2.9, + "learning_rate": 1.9714002662139994e-05, + "loss": 0.6543, + "step": 18228 + }, + { + "epoch": 2.9, + "learning_rate": 1.9711481121791805e-05, + "loss": 0.6501, + "step": 18229 + }, + { + "epoch": 2.9, + "learning_rate": 1.970895963776313e-05, + "loss": 0.6178, + "step": 18230 + }, + { + "epoch": 2.9, + "learning_rate": 1.9706438210080826e-05, + "loss": 0.6882, + "step": 18231 + }, + { + "epoch": 2.9, + "learning_rate": 1.9703916838771737e-05, + "loss": 0.707, + "step": 18232 + }, + { + "epoch": 2.9, + "learning_rate": 1.970139552386273e-05, + "loss": 0.6481, + "step": 18233 + }, + { + "epoch": 2.9, + "learning_rate": 1.9698874265380628e-05, + "loss": 0.6877, + "step": 18234 + }, + { + "epoch": 2.91, + "learning_rate": 1.9696353063352306e-05, + "loss": 0.7637, + "step": 18235 + }, + { + "epoch": 2.91, + "learning_rate": 1.9693831917804602e-05, + "loss": 0.6804, + "step": 18236 + }, + { + "epoch": 2.91, + "learning_rate": 1.9691310828764364e-05, + "loss": 0.6934, + "step": 18237 + }, + { + "epoch": 2.91, + "learning_rate": 1.9688789796258445e-05, + "loss": 0.6494, + "step": 18238 + }, + { + "epoch": 2.91, + "learning_rate": 1.968626882031369e-05, + "loss": 0.6816, + "step": 18239 + }, + { + "epoch": 2.91, + "learning_rate": 1.968374790095695e-05, + "loss": 0.6313, + "step": 18240 + }, + { + "epoch": 2.91, + "learning_rate": 1.9681227038215058e-05, + "loss": 0.6356, + "step": 18241 + }, + { + "epoch": 2.91, + "learning_rate": 1.9678706232114873e-05, + "loss": 0.6649, + "step": 18242 + }, + { + "epoch": 2.91, + "learning_rate": 1.967618548268324e-05, + "loss": 0.6756, + "step": 18243 + }, + { + "epoch": 2.91, + "learning_rate": 1.9673664789946995e-05, + "loss": 0.6578, + "step": 18244 + }, + { + "epoch": 2.91, + "learning_rate": 1.9671144153932984e-05, + "loss": 0.7479, + "step": 18245 + }, + { + "epoch": 2.91, + "learning_rate": 1.966862357466806e-05, + "loss": 0.6652, + "step": 18246 + }, + { + "epoch": 2.91, + "learning_rate": 1.9666103052179052e-05, + "loss": 0.6322, + "step": 18247 + }, + { + "epoch": 2.91, + "learning_rate": 1.9663582586492812e-05, + "loss": 0.669, + "step": 18248 + }, + { + "epoch": 2.91, + "learning_rate": 1.966106217763618e-05, + "loss": 0.6757, + "step": 18249 + }, + { + "epoch": 2.91, + "learning_rate": 1.9658541825635984e-05, + "loss": 0.722, + "step": 18250 + }, + { + "epoch": 2.91, + "learning_rate": 1.965602153051908e-05, + "loss": 0.6708, + "step": 18251 + }, + { + "epoch": 2.91, + "learning_rate": 1.9653501292312307e-05, + "loss": 0.6733, + "step": 18252 + }, + { + "epoch": 2.91, + "learning_rate": 1.9650981111042506e-05, + "loss": 0.7093, + "step": 18253 + }, + { + "epoch": 2.91, + "learning_rate": 1.9648460986736497e-05, + "loss": 0.6783, + "step": 18254 + }, + { + "epoch": 2.91, + "learning_rate": 1.9645940919421125e-05, + "loss": 0.6458, + "step": 18255 + }, + { + "epoch": 2.91, + "learning_rate": 1.964342090912324e-05, + "loss": 0.7165, + "step": 18256 + }, + { + "epoch": 2.91, + "learning_rate": 1.964090095586967e-05, + "loss": 0.6618, + "step": 18257 + }, + { + "epoch": 2.91, + "learning_rate": 1.9638381059687247e-05, + "loss": 0.6924, + "step": 18258 + }, + { + "epoch": 2.91, + "learning_rate": 1.9635861220602812e-05, + "loss": 0.6211, + "step": 18259 + }, + { + "epoch": 2.91, + "learning_rate": 1.96333414386432e-05, + "loss": 0.6687, + "step": 18260 + }, + { + "epoch": 2.91, + "learning_rate": 1.963082171383524e-05, + "loss": 0.731, + "step": 18261 + }, + { + "epoch": 2.91, + "learning_rate": 1.9628302046205778e-05, + "loss": 0.6383, + "step": 18262 + }, + { + "epoch": 2.91, + "learning_rate": 1.962578243578163e-05, + "loss": 0.6823, + "step": 18263 + }, + { + "epoch": 2.91, + "learning_rate": 1.9623262882589634e-05, + "loss": 0.7532, + "step": 18264 + }, + { + "epoch": 2.91, + "learning_rate": 1.962074338665663e-05, + "loss": 0.6759, + "step": 18265 + }, + { + "epoch": 2.91, + "learning_rate": 1.9618223948009443e-05, + "loss": 0.6801, + "step": 18266 + }, + { + "epoch": 2.91, + "learning_rate": 1.9615704566674898e-05, + "loss": 0.7151, + "step": 18267 + }, + { + "epoch": 2.91, + "learning_rate": 1.9613185242679835e-05, + "loss": 0.6833, + "step": 18268 + }, + { + "epoch": 2.91, + "learning_rate": 1.961066597605108e-05, + "loss": 0.691, + "step": 18269 + }, + { + "epoch": 2.91, + "learning_rate": 1.9608146766815456e-05, + "loss": 0.6944, + "step": 18270 + }, + { + "epoch": 2.91, + "learning_rate": 1.9605627614999796e-05, + "loss": 0.7046, + "step": 18271 + }, + { + "epoch": 2.91, + "learning_rate": 1.960310852063093e-05, + "loss": 0.6259, + "step": 18272 + }, + { + "epoch": 2.91, + "learning_rate": 1.9600589483735678e-05, + "loss": 0.6806, + "step": 18273 + }, + { + "epoch": 2.91, + "learning_rate": 1.9598070504340885e-05, + "loss": 0.6816, + "step": 18274 + }, + { + "epoch": 2.91, + "learning_rate": 1.9595551582473345e-05, + "loss": 0.6793, + "step": 18275 + }, + { + "epoch": 2.91, + "learning_rate": 1.9593032718159904e-05, + "loss": 0.6688, + "step": 18276 + }, + { + "epoch": 2.91, + "learning_rate": 1.9590513911427384e-05, + "loss": 0.6698, + "step": 18277 + }, + { + "epoch": 2.91, + "learning_rate": 1.95879951623026e-05, + "loss": 0.6613, + "step": 18278 + }, + { + "epoch": 2.91, + "learning_rate": 1.9585476470812386e-05, + "loss": 0.6886, + "step": 18279 + }, + { + "epoch": 2.91, + "learning_rate": 1.9582957836983563e-05, + "loss": 0.6689, + "step": 18280 + }, + { + "epoch": 2.91, + "learning_rate": 1.9580439260842942e-05, + "loss": 0.7111, + "step": 18281 + }, + { + "epoch": 2.91, + "learning_rate": 1.957792074241736e-05, + "loss": 0.6675, + "step": 18282 + }, + { + "epoch": 2.91, + "learning_rate": 1.957540228173363e-05, + "loss": 0.709, + "step": 18283 + }, + { + "epoch": 2.91, + "learning_rate": 1.9572883878818564e-05, + "loss": 0.6629, + "step": 18284 + }, + { + "epoch": 2.91, + "learning_rate": 1.9570365533699e-05, + "loss": 0.5901, + "step": 18285 + }, + { + "epoch": 2.91, + "learning_rate": 1.9567847246401747e-05, + "loss": 0.6862, + "step": 18286 + }, + { + "epoch": 2.91, + "learning_rate": 1.9565329016953615e-05, + "loss": 0.694, + "step": 18287 + }, + { + "epoch": 2.91, + "learning_rate": 1.9562810845381435e-05, + "loss": 0.6837, + "step": 18288 + }, + { + "epoch": 2.91, + "learning_rate": 1.9560292731712015e-05, + "loss": 0.7597, + "step": 18289 + }, + { + "epoch": 2.91, + "learning_rate": 1.9557774675972173e-05, + "loss": 0.6937, + "step": 18290 + }, + { + "epoch": 2.91, + "learning_rate": 1.9555256678188732e-05, + "loss": 0.6831, + "step": 18291 + }, + { + "epoch": 2.91, + "learning_rate": 1.9552738738388504e-05, + "loss": 0.7183, + "step": 18292 + }, + { + "epoch": 2.91, + "learning_rate": 1.9550220856598293e-05, + "loss": 0.6886, + "step": 18293 + }, + { + "epoch": 2.91, + "learning_rate": 1.954770303284494e-05, + "loss": 0.7559, + "step": 18294 + }, + { + "epoch": 2.91, + "learning_rate": 1.954518526715522e-05, + "loss": 0.6039, + "step": 18295 + }, + { + "epoch": 2.91, + "learning_rate": 1.954266755955597e-05, + "loss": 0.6755, + "step": 18296 + }, + { + "epoch": 2.91, + "learning_rate": 1.9540149910074e-05, + "loss": 0.7083, + "step": 18297 + }, + { + "epoch": 2.92, + "learning_rate": 1.9537632318736114e-05, + "loss": 0.7498, + "step": 18298 + }, + { + "epoch": 2.92, + "learning_rate": 1.953511478556913e-05, + "loss": 0.6547, + "step": 18299 + }, + { + "epoch": 2.92, + "learning_rate": 1.953259731059986e-05, + "loss": 0.6596, + "step": 18300 + }, + { + "epoch": 2.92, + "learning_rate": 1.9530079893855103e-05, + "loss": 0.6233, + "step": 18301 + }, + { + "epoch": 2.92, + "learning_rate": 1.9527562535361677e-05, + "loss": 0.6754, + "step": 18302 + }, + { + "epoch": 2.92, + "learning_rate": 1.952504523514639e-05, + "loss": 0.6807, + "step": 18303 + }, + { + "epoch": 2.92, + "learning_rate": 1.9522527993236037e-05, + "loss": 0.7853, + "step": 18304 + }, + { + "epoch": 2.92, + "learning_rate": 1.9520010809657442e-05, + "loss": 0.6052, + "step": 18305 + }, + { + "epoch": 2.92, + "learning_rate": 1.951749368443741e-05, + "loss": 0.6851, + "step": 18306 + }, + { + "epoch": 2.92, + "learning_rate": 1.9514976617602733e-05, + "loss": 0.6742, + "step": 18307 + }, + { + "epoch": 2.92, + "learning_rate": 1.9512459609180232e-05, + "loss": 0.7127, + "step": 18308 + }, + { + "epoch": 2.92, + "learning_rate": 1.9509942659196702e-05, + "loss": 0.6978, + "step": 18309 + }, + { + "epoch": 2.92, + "learning_rate": 1.9507425767678947e-05, + "loss": 0.6956, + "step": 18310 + }, + { + "epoch": 2.92, + "learning_rate": 1.9504908934653775e-05, + "loss": 0.7482, + "step": 18311 + }, + { + "epoch": 2.92, + "learning_rate": 1.950239216014799e-05, + "loss": 0.7106, + "step": 18312 + }, + { + "epoch": 2.92, + "learning_rate": 1.9499875444188385e-05, + "loss": 0.6734, + "step": 18313 + }, + { + "epoch": 2.92, + "learning_rate": 1.9497358786801782e-05, + "loss": 0.7035, + "step": 18314 + }, + { + "epoch": 2.92, + "learning_rate": 1.949484218801495e-05, + "loss": 0.6628, + "step": 18315 + }, + { + "epoch": 2.92, + "learning_rate": 1.9492325647854713e-05, + "loss": 0.7212, + "step": 18316 + }, + { + "epoch": 2.92, + "learning_rate": 1.9489809166347868e-05, + "loss": 0.679, + "step": 18317 + }, + { + "epoch": 2.92, + "learning_rate": 1.9487292743521198e-05, + "loss": 0.6383, + "step": 18318 + }, + { + "epoch": 2.92, + "learning_rate": 1.9484776379401522e-05, + "loss": 0.7509, + "step": 18319 + }, + { + "epoch": 2.92, + "learning_rate": 1.9482260074015628e-05, + "loss": 0.7062, + "step": 18320 + }, + { + "epoch": 2.92, + "learning_rate": 1.947974382739031e-05, + "loss": 0.6675, + "step": 18321 + }, + { + "epoch": 2.92, + "learning_rate": 1.9477227639552377e-05, + "loss": 0.7164, + "step": 18322 + }, + { + "epoch": 2.92, + "learning_rate": 1.9474711510528616e-05, + "loss": 0.6628, + "step": 18323 + }, + { + "epoch": 2.92, + "learning_rate": 1.9472195440345815e-05, + "loss": 0.6375, + "step": 18324 + }, + { + "epoch": 2.92, + "learning_rate": 1.946967942903078e-05, + "loss": 0.6662, + "step": 18325 + }, + { + "epoch": 2.92, + "learning_rate": 1.9467163476610302e-05, + "loss": 0.7043, + "step": 18326 + }, + { + "epoch": 2.92, + "learning_rate": 1.9464647583111174e-05, + "loss": 0.6842, + "step": 18327 + }, + { + "epoch": 2.92, + "learning_rate": 1.946213174856019e-05, + "loss": 0.6695, + "step": 18328 + }, + { + "epoch": 2.92, + "learning_rate": 1.945961597298414e-05, + "loss": 0.6416, + "step": 18329 + }, + { + "epoch": 2.92, + "learning_rate": 1.9457100256409813e-05, + "loss": 0.6947, + "step": 18330 + }, + { + "epoch": 2.92, + "learning_rate": 1.9454584598864008e-05, + "loss": 0.6821, + "step": 18331 + }, + { + "epoch": 2.92, + "learning_rate": 1.945206900037351e-05, + "loss": 0.5958, + "step": 18332 + }, + { + "epoch": 2.92, + "learning_rate": 1.9449553460965107e-05, + "loss": 0.6803, + "step": 18333 + }, + { + "epoch": 2.92, + "learning_rate": 1.9447037980665598e-05, + "loss": 0.7109, + "step": 18334 + }, + { + "epoch": 2.92, + "learning_rate": 1.9444522559501752e-05, + "loss": 0.6662, + "step": 18335 + }, + { + "epoch": 2.92, + "learning_rate": 1.9442007197500372e-05, + "loss": 0.601, + "step": 18336 + }, + { + "epoch": 2.92, + "learning_rate": 1.9439491894688245e-05, + "loss": 0.6898, + "step": 18337 + }, + { + "epoch": 2.92, + "learning_rate": 1.9436976651092144e-05, + "loss": 0.7009, + "step": 18338 + }, + { + "epoch": 2.92, + "learning_rate": 1.943446146673887e-05, + "loss": 0.6374, + "step": 18339 + }, + { + "epoch": 2.92, + "learning_rate": 1.9431946341655203e-05, + "loss": 0.6625, + "step": 18340 + }, + { + "epoch": 2.92, + "learning_rate": 1.942943127586792e-05, + "loss": 0.6616, + "step": 18341 + }, + { + "epoch": 2.92, + "learning_rate": 1.942691626940382e-05, + "loss": 0.6505, + "step": 18342 + }, + { + "epoch": 2.92, + "learning_rate": 1.9424401322289676e-05, + "loss": 0.7978, + "step": 18343 + }, + { + "epoch": 2.92, + "learning_rate": 1.942188643455227e-05, + "loss": 0.677, + "step": 18344 + }, + { + "epoch": 2.92, + "learning_rate": 1.941937160621839e-05, + "loss": 0.62, + "step": 18345 + }, + { + "epoch": 2.92, + "learning_rate": 1.9416856837314816e-05, + "loss": 0.6155, + "step": 18346 + }, + { + "epoch": 2.92, + "learning_rate": 1.9414342127868327e-05, + "loss": 0.7779, + "step": 18347 + }, + { + "epoch": 2.92, + "learning_rate": 1.94118274779057e-05, + "loss": 0.7245, + "step": 18348 + }, + { + "epoch": 2.92, + "learning_rate": 1.940931288745372e-05, + "loss": 0.6802, + "step": 18349 + }, + { + "epoch": 2.92, + "learning_rate": 1.9406798356539163e-05, + "loss": 0.701, + "step": 18350 + }, + { + "epoch": 2.92, + "learning_rate": 1.94042838851888e-05, + "loss": 0.6679, + "step": 18351 + }, + { + "epoch": 2.92, + "learning_rate": 1.9401769473429428e-05, + "loss": 0.6719, + "step": 18352 + }, + { + "epoch": 2.92, + "learning_rate": 1.9399255121287806e-05, + "loss": 0.6662, + "step": 18353 + }, + { + "epoch": 2.92, + "learning_rate": 1.9396740828790727e-05, + "loss": 0.7674, + "step": 18354 + }, + { + "epoch": 2.92, + "learning_rate": 1.9394226595964944e-05, + "loss": 0.6401, + "step": 18355 + }, + { + "epoch": 2.92, + "learning_rate": 1.939171242283725e-05, + "loss": 0.6874, + "step": 18356 + }, + { + "epoch": 2.92, + "learning_rate": 1.9389198309434416e-05, + "loss": 0.6908, + "step": 18357 + }, + { + "epoch": 2.92, + "learning_rate": 1.938668425578321e-05, + "loss": 0.6615, + "step": 18358 + }, + { + "epoch": 2.92, + "learning_rate": 1.9384170261910405e-05, + "loss": 0.6554, + "step": 18359 + }, + { + "epoch": 2.92, + "learning_rate": 1.938165632784278e-05, + "loss": 0.7136, + "step": 18360 + }, + { + "epoch": 2.93, + "learning_rate": 1.9379142453607106e-05, + "loss": 0.6431, + "step": 18361 + }, + { + "epoch": 2.93, + "learning_rate": 1.9376628639230148e-05, + "loss": 0.6534, + "step": 18362 + }, + { + "epoch": 2.93, + "learning_rate": 1.9374114884738686e-05, + "loss": 0.7008, + "step": 18363 + }, + { + "epoch": 2.93, + "learning_rate": 1.9371601190159485e-05, + "loss": 0.6769, + "step": 18364 + }, + { + "epoch": 2.93, + "learning_rate": 1.936908755551931e-05, + "loss": 0.6772, + "step": 18365 + }, + { + "epoch": 2.93, + "learning_rate": 1.9366573980844937e-05, + "loss": 0.6676, + "step": 18366 + }, + { + "epoch": 2.93, + "learning_rate": 1.936406046616313e-05, + "loss": 0.7188, + "step": 18367 + }, + { + "epoch": 2.93, + "learning_rate": 1.9361547011500654e-05, + "loss": 0.7106, + "step": 18368 + }, + { + "epoch": 2.93, + "learning_rate": 1.9359033616884286e-05, + "loss": 0.8027, + "step": 18369 + }, + { + "epoch": 2.93, + "learning_rate": 1.9356520282340786e-05, + "loss": 0.6968, + "step": 18370 + }, + { + "epoch": 2.93, + "learning_rate": 1.9354007007896906e-05, + "loss": 0.6667, + "step": 18371 + }, + { + "epoch": 2.93, + "learning_rate": 1.9351493793579436e-05, + "loss": 0.7254, + "step": 18372 + }, + { + "epoch": 2.93, + "learning_rate": 1.9348980639415125e-05, + "loss": 0.6676, + "step": 18373 + }, + { + "epoch": 2.93, + "learning_rate": 1.9346467545430734e-05, + "loss": 0.6892, + "step": 18374 + }, + { + "epoch": 2.93, + "learning_rate": 1.934395451165305e-05, + "loss": 0.7092, + "step": 18375 + }, + { + "epoch": 2.93, + "learning_rate": 1.93414415381088e-05, + "loss": 0.6705, + "step": 18376 + }, + { + "epoch": 2.93, + "learning_rate": 1.9338928624824765e-05, + "loss": 0.6709, + "step": 18377 + }, + { + "epoch": 2.93, + "learning_rate": 1.9336415771827705e-05, + "loss": 0.7585, + "step": 18378 + }, + { + "epoch": 2.93, + "learning_rate": 1.933390297914437e-05, + "loss": 0.6884, + "step": 18379 + }, + { + "epoch": 2.93, + "learning_rate": 1.933139024680154e-05, + "loss": 0.5838, + "step": 18380 + }, + { + "epoch": 2.93, + "learning_rate": 1.9328877574825958e-05, + "loss": 0.6649, + "step": 18381 + }, + { + "epoch": 2.93, + "learning_rate": 1.932636496324438e-05, + "loss": 0.7078, + "step": 18382 + }, + { + "epoch": 2.93, + "learning_rate": 1.932385241208358e-05, + "loss": 0.6529, + "step": 18383 + }, + { + "epoch": 2.93, + "learning_rate": 1.93213399213703e-05, + "loss": 0.6648, + "step": 18384 + }, + { + "epoch": 2.93, + "learning_rate": 1.93188274911313e-05, + "loss": 0.6235, + "step": 18385 + }, + { + "epoch": 2.93, + "learning_rate": 1.9316315121393348e-05, + "loss": 0.6449, + "step": 18386 + }, + { + "epoch": 2.93, + "learning_rate": 1.9313802812183186e-05, + "loss": 0.6728, + "step": 18387 + }, + { + "epoch": 2.93, + "learning_rate": 1.9311290563527566e-05, + "loss": 0.671, + "step": 18388 + }, + { + "epoch": 2.93, + "learning_rate": 1.9308778375453253e-05, + "loss": 0.7861, + "step": 18389 + }, + { + "epoch": 2.93, + "learning_rate": 1.9306266247986993e-05, + "loss": 0.6324, + "step": 18390 + }, + { + "epoch": 2.93, + "learning_rate": 1.930375418115554e-05, + "loss": 0.6978, + "step": 18391 + }, + { + "epoch": 2.93, + "learning_rate": 1.930124217498565e-05, + "loss": 0.6612, + "step": 18392 + }, + { + "epoch": 2.93, + "learning_rate": 1.929873022950407e-05, + "loss": 0.7398, + "step": 18393 + }, + { + "epoch": 2.93, + "learning_rate": 1.9296218344737547e-05, + "loss": 0.6493, + "step": 18394 + }, + { + "epoch": 2.93, + "learning_rate": 1.929370652071285e-05, + "loss": 0.6322, + "step": 18395 + }, + { + "epoch": 2.93, + "learning_rate": 1.9291194757456702e-05, + "loss": 0.6386, + "step": 18396 + }, + { + "epoch": 2.93, + "learning_rate": 1.9288683054995864e-05, + "loss": 0.6168, + "step": 18397 + }, + { + "epoch": 2.93, + "learning_rate": 1.9286171413357092e-05, + "loss": 0.6683, + "step": 18398 + }, + { + "epoch": 2.93, + "learning_rate": 1.9283659832567113e-05, + "loss": 0.7041, + "step": 18399 + }, + { + "epoch": 2.93, + "learning_rate": 1.9281148312652694e-05, + "loss": 0.7566, + "step": 18400 + }, + { + "epoch": 2.93, + "learning_rate": 1.9278636853640574e-05, + "loss": 0.6496, + "step": 18401 + }, + { + "epoch": 2.93, + "learning_rate": 1.9276125455557492e-05, + "loss": 0.6102, + "step": 18402 + }, + { + "epoch": 2.93, + "learning_rate": 1.9273614118430205e-05, + "loss": 0.6412, + "step": 18403 + }, + { + "epoch": 2.93, + "learning_rate": 1.9271102842285448e-05, + "loss": 0.6162, + "step": 18404 + }, + { + "epoch": 2.93, + "learning_rate": 1.9268591627149966e-05, + "loss": 0.6823, + "step": 18405 + }, + { + "epoch": 2.93, + "learning_rate": 1.9266080473050507e-05, + "loss": 0.7025, + "step": 18406 + }, + { + "epoch": 2.93, + "learning_rate": 1.9263569380013812e-05, + "loss": 0.6808, + "step": 18407 + }, + { + "epoch": 2.93, + "learning_rate": 1.9261058348066613e-05, + "loss": 0.6086, + "step": 18408 + }, + { + "epoch": 2.93, + "learning_rate": 1.9258547377235665e-05, + "loss": 0.6754, + "step": 18409 + }, + { + "epoch": 2.93, + "learning_rate": 1.92560364675477e-05, + "loss": 0.691, + "step": 18410 + }, + { + "epoch": 2.93, + "learning_rate": 1.9253525619029455e-05, + "loss": 0.6417, + "step": 18411 + }, + { + "epoch": 2.93, + "learning_rate": 1.9251014831707683e-05, + "loss": 0.6931, + "step": 18412 + }, + { + "epoch": 2.93, + "learning_rate": 1.9248504105609106e-05, + "loss": 0.6908, + "step": 18413 + }, + { + "epoch": 2.93, + "learning_rate": 1.9245993440760464e-05, + "loss": 0.675, + "step": 18414 + }, + { + "epoch": 2.93, + "learning_rate": 1.9243482837188517e-05, + "loss": 0.7229, + "step": 18415 + }, + { + "epoch": 2.93, + "learning_rate": 1.9240972294919966e-05, + "loss": 0.6796, + "step": 18416 + }, + { + "epoch": 2.93, + "learning_rate": 1.923846181398157e-05, + "loss": 0.7439, + "step": 18417 + }, + { + "epoch": 2.93, + "learning_rate": 1.9235951394400058e-05, + "loss": 0.8292, + "step": 18418 + }, + { + "epoch": 2.93, + "learning_rate": 1.9233441036202154e-05, + "loss": 0.6541, + "step": 18419 + }, + { + "epoch": 2.93, + "learning_rate": 1.9230930739414614e-05, + "loss": 0.5642, + "step": 18420 + }, + { + "epoch": 2.93, + "learning_rate": 1.9228420504064155e-05, + "loss": 0.6936, + "step": 18421 + }, + { + "epoch": 2.93, + "learning_rate": 1.922591033017751e-05, + "loss": 0.6326, + "step": 18422 + }, + { + "epoch": 2.94, + "learning_rate": 1.9223400217781417e-05, + "loss": 0.6994, + "step": 18423 + }, + { + "epoch": 2.94, + "learning_rate": 1.9220890166902608e-05, + "loss": 0.6225, + "step": 18424 + }, + { + "epoch": 2.94, + "learning_rate": 1.9218380177567804e-05, + "loss": 0.7131, + "step": 18425 + }, + { + "epoch": 2.94, + "learning_rate": 1.9215870249803746e-05, + "loss": 0.7887, + "step": 18426 + }, + { + "epoch": 2.94, + "learning_rate": 1.9213360383637158e-05, + "loss": 0.6686, + "step": 18427 + }, + { + "epoch": 2.94, + "learning_rate": 1.9210850579094765e-05, + "loss": 0.6886, + "step": 18428 + }, + { + "epoch": 2.94, + "learning_rate": 1.9208340836203302e-05, + "loss": 0.7181, + "step": 18429 + }, + { + "epoch": 2.94, + "learning_rate": 1.9205831154989496e-05, + "loss": 0.685, + "step": 18430 + }, + { + "epoch": 2.94, + "learning_rate": 1.9203321535480064e-05, + "loss": 0.7236, + "step": 18431 + }, + { + "epoch": 2.94, + "learning_rate": 1.920081197770174e-05, + "loss": 0.6667, + "step": 18432 + }, + { + "epoch": 2.94, + "learning_rate": 1.9198302481681248e-05, + "loss": 0.7573, + "step": 18433 + }, + { + "epoch": 2.94, + "learning_rate": 1.9195793047445308e-05, + "loss": 0.6608, + "step": 18434 + }, + { + "epoch": 2.94, + "learning_rate": 1.919328367502067e-05, + "loss": 0.6697, + "step": 18435 + }, + { + "epoch": 2.94, + "learning_rate": 1.919077436443401e-05, + "loss": 0.6257, + "step": 18436 + }, + { + "epoch": 2.94, + "learning_rate": 1.9188265115712088e-05, + "loss": 0.655, + "step": 18437 + }, + { + "epoch": 2.94, + "learning_rate": 1.9185755928881614e-05, + "loss": 0.6448, + "step": 18438 + }, + { + "epoch": 2.94, + "learning_rate": 1.9183246803969303e-05, + "loss": 0.7695, + "step": 18439 + }, + { + "epoch": 2.94, + "learning_rate": 1.9180737741001887e-05, + "loss": 0.7161, + "step": 18440 + }, + { + "epoch": 2.94, + "learning_rate": 1.917822874000608e-05, + "loss": 0.7643, + "step": 18441 + }, + { + "epoch": 2.94, + "learning_rate": 1.9175719801008594e-05, + "loss": 0.8054, + "step": 18442 + }, + { + "epoch": 2.94, + "learning_rate": 1.917321092403617e-05, + "loss": 0.632, + "step": 18443 + }, + { + "epoch": 2.94, + "learning_rate": 1.9170702109115507e-05, + "loss": 0.7436, + "step": 18444 + }, + { + "epoch": 2.94, + "learning_rate": 1.9168193356273325e-05, + "loss": 0.61, + "step": 18445 + }, + { + "epoch": 2.94, + "learning_rate": 1.9165684665536343e-05, + "loss": 0.6373, + "step": 18446 + }, + { + "epoch": 2.94, + "learning_rate": 1.916317603693128e-05, + "loss": 0.7351, + "step": 18447 + }, + { + "epoch": 2.94, + "learning_rate": 1.9160667470484844e-05, + "loss": 0.7253, + "step": 18448 + }, + { + "epoch": 2.94, + "learning_rate": 1.9158158966223762e-05, + "loss": 0.6937, + "step": 18449 + }, + { + "epoch": 2.94, + "learning_rate": 1.915565052417474e-05, + "loss": 0.7634, + "step": 18450 + }, + { + "epoch": 2.94, + "learning_rate": 1.9153142144364487e-05, + "loss": 0.7406, + "step": 18451 + }, + { + "epoch": 2.94, + "learning_rate": 1.9150633826819715e-05, + "loss": 0.6377, + "step": 18452 + }, + { + "epoch": 2.94, + "learning_rate": 1.9148125571567152e-05, + "loss": 0.7479, + "step": 18453 + }, + { + "epoch": 2.94, + "learning_rate": 1.9145617378633497e-05, + "loss": 0.6938, + "step": 18454 + }, + { + "epoch": 2.94, + "learning_rate": 1.9143109248045465e-05, + "loss": 0.6754, + "step": 18455 + }, + { + "epoch": 2.94, + "learning_rate": 1.9140601179829757e-05, + "loss": 0.77, + "step": 18456 + }, + { + "epoch": 2.94, + "learning_rate": 1.9138093174013093e-05, + "loss": 0.7156, + "step": 18457 + }, + { + "epoch": 2.94, + "learning_rate": 1.9135585230622178e-05, + "loss": 0.6679, + "step": 18458 + }, + { + "epoch": 2.94, + "learning_rate": 1.9133077349683714e-05, + "loss": 0.8011, + "step": 18459 + }, + { + "epoch": 2.94, + "learning_rate": 1.913056953122442e-05, + "loss": 0.6663, + "step": 18460 + }, + { + "epoch": 2.94, + "learning_rate": 1.9128061775271e-05, + "loss": 0.7695, + "step": 18461 + }, + { + "epoch": 2.94, + "learning_rate": 1.9125554081850145e-05, + "loss": 0.6817, + "step": 18462 + }, + { + "epoch": 2.94, + "learning_rate": 1.9123046450988583e-05, + "loss": 0.6619, + "step": 18463 + }, + { + "epoch": 2.94, + "learning_rate": 1.9120538882713006e-05, + "loss": 0.6812, + "step": 18464 + }, + { + "epoch": 2.94, + "learning_rate": 1.911803137705012e-05, + "loss": 0.654, + "step": 18465 + }, + { + "epoch": 2.94, + "learning_rate": 1.911552393402663e-05, + "loss": 0.6654, + "step": 18466 + }, + { + "epoch": 2.94, + "learning_rate": 1.9113016553669234e-05, + "loss": 0.7042, + "step": 18467 + }, + { + "epoch": 2.94, + "learning_rate": 1.9110509236004644e-05, + "loss": 0.7229, + "step": 18468 + }, + { + "epoch": 2.94, + "learning_rate": 1.9108001981059547e-05, + "loss": 0.7052, + "step": 18469 + }, + { + "epoch": 2.94, + "learning_rate": 1.9105494788860655e-05, + "loss": 0.6742, + "step": 18470 + }, + { + "epoch": 2.94, + "learning_rate": 1.910298765943467e-05, + "loss": 0.656, + "step": 18471 + }, + { + "epoch": 2.94, + "learning_rate": 1.9100480592808276e-05, + "loss": 0.6906, + "step": 18472 + }, + { + "epoch": 2.94, + "learning_rate": 1.909797358900819e-05, + "loss": 0.6882, + "step": 18473 + }, + { + "epoch": 2.94, + "learning_rate": 1.90954666480611e-05, + "loss": 0.7232, + "step": 18474 + }, + { + "epoch": 2.94, + "learning_rate": 1.909295976999371e-05, + "loss": 0.5894, + "step": 18475 + }, + { + "epoch": 2.94, + "learning_rate": 1.909045295483271e-05, + "loss": 0.7007, + "step": 18476 + }, + { + "epoch": 2.94, + "learning_rate": 1.9087946202604795e-05, + "loss": 0.6858, + "step": 18477 + }, + { + "epoch": 2.94, + "learning_rate": 1.9085439513336666e-05, + "loss": 0.6913, + "step": 18478 + }, + { + "epoch": 2.94, + "learning_rate": 1.9082932887055014e-05, + "loss": 0.5928, + "step": 18479 + }, + { + "epoch": 2.94, + "learning_rate": 1.9080426323786532e-05, + "loss": 0.6345, + "step": 18480 + }, + { + "epoch": 2.94, + "learning_rate": 1.9077919823557918e-05, + "loss": 0.8598, + "step": 18481 + }, + { + "epoch": 2.94, + "learning_rate": 1.9075413386395864e-05, + "loss": 0.6992, + "step": 18482 + }, + { + "epoch": 2.94, + "learning_rate": 1.9072907012327058e-05, + "loss": 0.6896, + "step": 18483 + }, + { + "epoch": 2.94, + "learning_rate": 1.9070400701378195e-05, + "loss": 0.6317, + "step": 18484 + }, + { + "epoch": 2.94, + "learning_rate": 1.9067894453575966e-05, + "loss": 0.7441, + "step": 18485 + }, + { + "epoch": 2.95, + "learning_rate": 1.9065388268947055e-05, + "loss": 0.7144, + "step": 18486 + }, + { + "epoch": 2.95, + "learning_rate": 1.9062882147518158e-05, + "loss": 0.6729, + "step": 18487 + }, + { + "epoch": 2.95, + "learning_rate": 1.9060376089315964e-05, + "loss": 0.6846, + "step": 18488 + }, + { + "epoch": 2.95, + "learning_rate": 1.9057870094367155e-05, + "loss": 0.6812, + "step": 18489 + }, + { + "epoch": 2.95, + "learning_rate": 1.9055364162698426e-05, + "loss": 0.7591, + "step": 18490 + }, + { + "epoch": 2.95, + "learning_rate": 1.9052858294336458e-05, + "loss": 0.6574, + "step": 18491 + }, + { + "epoch": 2.95, + "learning_rate": 1.9050352489307933e-05, + "loss": 0.6848, + "step": 18492 + }, + { + "epoch": 2.95, + "learning_rate": 1.9047846747639553e-05, + "loss": 0.6743, + "step": 18493 + }, + { + "epoch": 2.95, + "learning_rate": 1.9045341069357986e-05, + "loss": 0.6982, + "step": 18494 + }, + { + "epoch": 2.95, + "learning_rate": 1.9042835454489916e-05, + "loss": 0.6547, + "step": 18495 + }, + { + "epoch": 2.95, + "learning_rate": 1.9040329903062053e-05, + "loss": 0.6716, + "step": 18496 + }, + { + "epoch": 2.95, + "learning_rate": 1.9037824415101037e-05, + "loss": 0.7066, + "step": 18497 + }, + { + "epoch": 2.95, + "learning_rate": 1.9035318990633577e-05, + "loss": 0.6621, + "step": 18498 + }, + { + "epoch": 2.95, + "learning_rate": 1.903281362968635e-05, + "loss": 0.6734, + "step": 18499 + }, + { + "epoch": 2.95, + "learning_rate": 1.903030833228603e-05, + "loss": 0.7246, + "step": 18500 + }, + { + "epoch": 2.95, + "learning_rate": 1.9027803098459305e-05, + "loss": 0.669, + "step": 18501 + }, + { + "epoch": 2.95, + "learning_rate": 1.9025297928232853e-05, + "loss": 0.6278, + "step": 18502 + }, + { + "epoch": 2.95, + "learning_rate": 1.9022792821633345e-05, + "loss": 0.6347, + "step": 18503 + }, + { + "epoch": 2.95, + "learning_rate": 1.902028777868747e-05, + "loss": 0.6955, + "step": 18504 + }, + { + "epoch": 2.95, + "learning_rate": 1.9017782799421903e-05, + "loss": 0.6146, + "step": 18505 + }, + { + "epoch": 2.95, + "learning_rate": 1.901527788386331e-05, + "loss": 0.6519, + "step": 18506 + }, + { + "epoch": 2.95, + "learning_rate": 1.9012773032038376e-05, + "loss": 0.6012, + "step": 18507 + }, + { + "epoch": 2.95, + "learning_rate": 1.9010268243973775e-05, + "loss": 0.6237, + "step": 18508 + }, + { + "epoch": 2.95, + "learning_rate": 1.9007763519696177e-05, + "loss": 0.686, + "step": 18509 + }, + { + "epoch": 2.95, + "learning_rate": 1.9005258859232266e-05, + "loss": 0.6125, + "step": 18510 + }, + { + "epoch": 2.95, + "learning_rate": 1.9002754262608706e-05, + "loss": 0.6971, + "step": 18511 + }, + { + "epoch": 2.95, + "learning_rate": 1.9000249729852172e-05, + "loss": 0.6493, + "step": 18512 + }, + { + "epoch": 2.95, + "learning_rate": 1.8997745260989336e-05, + "loss": 0.7011, + "step": 18513 + }, + { + "epoch": 2.95, + "learning_rate": 1.899524085604687e-05, + "loss": 0.7165, + "step": 18514 + }, + { + "epoch": 2.95, + "learning_rate": 1.899273651505144e-05, + "loss": 0.7032, + "step": 18515 + }, + { + "epoch": 2.95, + "learning_rate": 1.899023223802973e-05, + "loss": 0.6945, + "step": 18516 + }, + { + "epoch": 2.95, + "learning_rate": 1.8987728025008388e-05, + "loss": 0.6808, + "step": 18517 + }, + { + "epoch": 2.95, + "learning_rate": 1.8985223876014094e-05, + "loss": 0.6521, + "step": 18518 + }, + { + "epoch": 2.95, + "learning_rate": 1.8982719791073518e-05, + "loss": 0.706, + "step": 18519 + }, + { + "epoch": 2.95, + "learning_rate": 1.8980215770213313e-05, + "loss": 0.6945, + "step": 18520 + }, + { + "epoch": 2.95, + "learning_rate": 1.8977711813460162e-05, + "loss": 0.7026, + "step": 18521 + }, + { + "epoch": 2.95, + "learning_rate": 1.8975207920840725e-05, + "loss": 0.6479, + "step": 18522 + }, + { + "epoch": 2.95, + "learning_rate": 1.897270409238166e-05, + "loss": 0.6759, + "step": 18523 + }, + { + "epoch": 2.95, + "learning_rate": 1.8970200328109644e-05, + "loss": 0.6659, + "step": 18524 + }, + { + "epoch": 2.95, + "learning_rate": 1.8967696628051332e-05, + "loss": 0.7422, + "step": 18525 + }, + { + "epoch": 2.95, + "learning_rate": 1.8965192992233382e-05, + "loss": 0.6529, + "step": 18526 + }, + { + "epoch": 2.95, + "learning_rate": 1.896268942068247e-05, + "loss": 0.6761, + "step": 18527 + }, + { + "epoch": 2.95, + "learning_rate": 1.896018591342525e-05, + "loss": 0.7211, + "step": 18528 + }, + { + "epoch": 2.95, + "learning_rate": 1.895768247048838e-05, + "loss": 0.7134, + "step": 18529 + }, + { + "epoch": 2.95, + "learning_rate": 1.8955179091898525e-05, + "loss": 0.6656, + "step": 18530 + }, + { + "epoch": 2.95, + "learning_rate": 1.8952675777682342e-05, + "loss": 0.6576, + "step": 18531 + }, + { + "epoch": 2.95, + "learning_rate": 1.895017252786649e-05, + "loss": 0.6568, + "step": 18532 + }, + { + "epoch": 2.95, + "learning_rate": 1.894766934247763e-05, + "loss": 0.6803, + "step": 18533 + }, + { + "epoch": 2.95, + "learning_rate": 1.894516622154242e-05, + "loss": 0.7398, + "step": 18534 + }, + { + "epoch": 2.95, + "learning_rate": 1.8942663165087504e-05, + "loss": 0.7417, + "step": 18535 + }, + { + "epoch": 2.95, + "learning_rate": 1.8940160173139568e-05, + "loss": 0.6636, + "step": 18536 + }, + { + "epoch": 2.95, + "learning_rate": 1.8937657245725226e-05, + "loss": 0.7192, + "step": 18537 + }, + { + "epoch": 2.95, + "learning_rate": 1.8935154382871166e-05, + "loss": 0.6276, + "step": 18538 + }, + { + "epoch": 2.95, + "learning_rate": 1.8932651584604026e-05, + "loss": 0.6765, + "step": 18539 + }, + { + "epoch": 2.95, + "learning_rate": 1.8930148850950456e-05, + "loss": 0.6704, + "step": 18540 + }, + { + "epoch": 2.95, + "learning_rate": 1.892764618193712e-05, + "loss": 0.6489, + "step": 18541 + }, + { + "epoch": 2.95, + "learning_rate": 1.892514357759067e-05, + "loss": 0.6812, + "step": 18542 + }, + { + "epoch": 2.95, + "learning_rate": 1.8922641037937748e-05, + "loss": 0.649, + "step": 18543 + }, + { + "epoch": 2.95, + "learning_rate": 1.892013856300501e-05, + "loss": 0.6501, + "step": 18544 + }, + { + "epoch": 2.95, + "learning_rate": 1.8917636152819108e-05, + "loss": 0.6423, + "step": 18545 + }, + { + "epoch": 2.95, + "learning_rate": 1.891513380740668e-05, + "loss": 0.6426, + "step": 18546 + }, + { + "epoch": 2.95, + "learning_rate": 1.8912631526794392e-05, + "loss": 0.6489, + "step": 18547 + }, + { + "epoch": 2.95, + "learning_rate": 1.8910129311008882e-05, + "loss": 0.6307, + "step": 18548 + }, + { + "epoch": 2.96, + "learning_rate": 1.890762716007679e-05, + "loss": 0.7059, + "step": 18549 + }, + { + "epoch": 2.96, + "learning_rate": 1.8905125074024775e-05, + "loss": 0.6606, + "step": 18550 + }, + { + "epoch": 2.96, + "learning_rate": 1.890262305287948e-05, + "loss": 0.6976, + "step": 18551 + }, + { + "epoch": 2.96, + "learning_rate": 1.8900121096667543e-05, + "loss": 0.6923, + "step": 18552 + }, + { + "epoch": 2.96, + "learning_rate": 1.8897619205415616e-05, + "loss": 0.6816, + "step": 18553 + }, + { + "epoch": 2.96, + "learning_rate": 1.889511737915034e-05, + "loss": 0.597, + "step": 18554 + }, + { + "epoch": 2.96, + "learning_rate": 1.889261561789835e-05, + "loss": 0.6715, + "step": 18555 + }, + { + "epoch": 2.96, + "learning_rate": 1.8890113921686318e-05, + "loss": 0.6128, + "step": 18556 + }, + { + "epoch": 2.96, + "learning_rate": 1.8887612290540844e-05, + "loss": 0.6602, + "step": 18557 + }, + { + "epoch": 2.96, + "learning_rate": 1.888511072448859e-05, + "loss": 0.6693, + "step": 18558 + }, + { + "epoch": 2.96, + "learning_rate": 1.88826092235562e-05, + "loss": 0.6694, + "step": 18559 + }, + { + "epoch": 2.96, + "learning_rate": 1.88801077877703e-05, + "loss": 0.7472, + "step": 18560 + }, + { + "epoch": 2.96, + "learning_rate": 1.8877606417157543e-05, + "loss": 0.6124, + "step": 18561 + }, + { + "epoch": 2.96, + "learning_rate": 1.8875105111744556e-05, + "loss": 0.6335, + "step": 18562 + }, + { + "epoch": 2.96, + "learning_rate": 1.887260387155798e-05, + "loss": 0.762, + "step": 18563 + }, + { + "epoch": 2.96, + "learning_rate": 1.887010269662446e-05, + "loss": 0.7119, + "step": 18564 + }, + { + "epoch": 2.96, + "learning_rate": 1.8867601586970622e-05, + "loss": 0.6747, + "step": 18565 + }, + { + "epoch": 2.96, + "learning_rate": 1.88651005426231e-05, + "loss": 0.7065, + "step": 18566 + }, + { + "epoch": 2.96, + "learning_rate": 1.8862599563608535e-05, + "loss": 0.6935, + "step": 18567 + }, + { + "epoch": 2.96, + "learning_rate": 1.8860098649953558e-05, + "loss": 0.6578, + "step": 18568 + }, + { + "epoch": 2.96, + "learning_rate": 1.88575978016848e-05, + "loss": 0.6689, + "step": 18569 + }, + { + "epoch": 2.96, + "learning_rate": 1.8855097018828906e-05, + "loss": 0.7367, + "step": 18570 + }, + { + "epoch": 2.96, + "learning_rate": 1.885259630141249e-05, + "loss": 0.6832, + "step": 18571 + }, + { + "epoch": 2.96, + "learning_rate": 1.8850095649462194e-05, + "loss": 0.6325, + "step": 18572 + }, + { + "epoch": 2.96, + "learning_rate": 1.884759506300464e-05, + "loss": 0.7389, + "step": 18573 + }, + { + "epoch": 2.96, + "learning_rate": 1.884509454206647e-05, + "loss": 0.6084, + "step": 18574 + }, + { + "epoch": 2.96, + "learning_rate": 1.8842594086674308e-05, + "loss": 0.6676, + "step": 18575 + }, + { + "epoch": 2.96, + "learning_rate": 1.8840093696854782e-05, + "loss": 0.7425, + "step": 18576 + }, + { + "epoch": 2.96, + "learning_rate": 1.883759337263451e-05, + "loss": 0.6405, + "step": 18577 + }, + { + "epoch": 2.96, + "learning_rate": 1.8835093114040135e-05, + "loss": 0.6517, + "step": 18578 + }, + { + "epoch": 2.96, + "learning_rate": 1.8832592921098272e-05, + "loss": 0.7458, + "step": 18579 + }, + { + "epoch": 2.96, + "learning_rate": 1.8830092793835547e-05, + "loss": 0.6956, + "step": 18580 + }, + { + "epoch": 2.96, + "learning_rate": 1.882759273227859e-05, + "loss": 0.6472, + "step": 18581 + }, + { + "epoch": 2.96, + "learning_rate": 1.8825092736454024e-05, + "loss": 0.6892, + "step": 18582 + }, + { + "epoch": 2.96, + "learning_rate": 1.8822592806388474e-05, + "loss": 0.6536, + "step": 18583 + }, + { + "epoch": 2.96, + "learning_rate": 1.882009294210855e-05, + "loss": 0.7266, + "step": 18584 + }, + { + "epoch": 2.96, + "learning_rate": 1.8817593143640896e-05, + "loss": 0.7275, + "step": 18585 + }, + { + "epoch": 2.96, + "learning_rate": 1.881509341101212e-05, + "loss": 0.7184, + "step": 18586 + }, + { + "epoch": 2.96, + "learning_rate": 1.8812593744248834e-05, + "loss": 0.6601, + "step": 18587 + }, + { + "epoch": 2.96, + "learning_rate": 1.881009414337768e-05, + "loss": 0.6735, + "step": 18588 + }, + { + "epoch": 2.96, + "learning_rate": 1.880759460842526e-05, + "loss": 0.6692, + "step": 18589 + }, + { + "epoch": 2.96, + "learning_rate": 1.8805095139418198e-05, + "loss": 0.6347, + "step": 18590 + }, + { + "epoch": 2.96, + "learning_rate": 1.880259573638311e-05, + "loss": 0.7183, + "step": 18591 + }, + { + "epoch": 2.96, + "learning_rate": 1.880009639934662e-05, + "loss": 0.6479, + "step": 18592 + }, + { + "epoch": 2.96, + "learning_rate": 1.8797597128335334e-05, + "loss": 0.6663, + "step": 18593 + }, + { + "epoch": 2.96, + "learning_rate": 1.8795097923375875e-05, + "loss": 0.6675, + "step": 18594 + }, + { + "epoch": 2.96, + "learning_rate": 1.879259878449486e-05, + "loss": 0.6439, + "step": 18595 + }, + { + "epoch": 2.96, + "learning_rate": 1.879009971171889e-05, + "loss": 0.6821, + "step": 18596 + }, + { + "epoch": 2.96, + "learning_rate": 1.878760070507461e-05, + "loss": 0.6738, + "step": 18597 + }, + { + "epoch": 2.96, + "learning_rate": 1.8785101764588588e-05, + "loss": 0.7467, + "step": 18598 + }, + { + "epoch": 2.96, + "learning_rate": 1.8782602890287468e-05, + "loss": 0.6241, + "step": 18599 + }, + { + "epoch": 2.96, + "learning_rate": 1.8780104082197854e-05, + "loss": 0.6828, + "step": 18600 + }, + { + "epoch": 2.96, + "learning_rate": 1.8777605340346347e-05, + "loss": 0.719, + "step": 18601 + }, + { + "epoch": 2.96, + "learning_rate": 1.877510666475957e-05, + "loss": 0.6291, + "step": 18602 + }, + { + "epoch": 2.96, + "learning_rate": 1.877260805546413e-05, + "loss": 0.6277, + "step": 18603 + }, + { + "epoch": 2.96, + "learning_rate": 1.877010951248663e-05, + "loss": 0.6654, + "step": 18604 + }, + { + "epoch": 2.96, + "learning_rate": 1.876761103585368e-05, + "loss": 0.7297, + "step": 18605 + }, + { + "epoch": 2.96, + "learning_rate": 1.8765112625591898e-05, + "loss": 0.6462, + "step": 18606 + }, + { + "epoch": 2.96, + "learning_rate": 1.8762614281727868e-05, + "loss": 0.7288, + "step": 18607 + }, + { + "epoch": 2.96, + "learning_rate": 1.8760116004288217e-05, + "loss": 0.7694, + "step": 18608 + }, + { + "epoch": 2.96, + "learning_rate": 1.8757617793299543e-05, + "loss": 0.7032, + "step": 18609 + }, + { + "epoch": 2.96, + "learning_rate": 1.8755119648788445e-05, + "loss": 0.6331, + "step": 18610 + }, + { + "epoch": 2.96, + "learning_rate": 1.8752621570781535e-05, + "loss": 0.6322, + "step": 18611 + }, + { + "epoch": 2.97, + "learning_rate": 1.8750123559305415e-05, + "loss": 0.6245, + "step": 18612 + }, + { + "epoch": 2.97, + "learning_rate": 1.8747625614386676e-05, + "loss": 0.6857, + "step": 18613 + }, + { + "epoch": 2.97, + "learning_rate": 1.8745127736051934e-05, + "loss": 0.7147, + "step": 18614 + }, + { + "epoch": 2.97, + "learning_rate": 1.8742629924327785e-05, + "loss": 0.6388, + "step": 18615 + }, + { + "epoch": 2.97, + "learning_rate": 1.8740132179240823e-05, + "loss": 0.629, + "step": 18616 + }, + { + "epoch": 2.97, + "learning_rate": 1.8737634500817667e-05, + "loss": 0.6691, + "step": 18617 + }, + { + "epoch": 2.97, + "learning_rate": 1.8735136889084892e-05, + "loss": 0.7845, + "step": 18618 + }, + { + "epoch": 2.97, + "learning_rate": 1.8732639344069104e-05, + "loss": 0.6815, + "step": 18619 + }, + { + "epoch": 2.97, + "learning_rate": 1.8730141865796907e-05, + "loss": 0.7142, + "step": 18620 + }, + { + "epoch": 2.97, + "learning_rate": 1.8727644454294883e-05, + "loss": 0.6668, + "step": 18621 + }, + { + "epoch": 2.97, + "learning_rate": 1.8725147109589648e-05, + "loss": 0.7484, + "step": 18622 + }, + { + "epoch": 2.97, + "learning_rate": 1.872264983170778e-05, + "loss": 0.64, + "step": 18623 + }, + { + "epoch": 2.97, + "learning_rate": 1.872015262067588e-05, + "loss": 0.6714, + "step": 18624 + }, + { + "epoch": 2.97, + "learning_rate": 1.8717655476520545e-05, + "loss": 0.679, + "step": 18625 + }, + { + "epoch": 2.97, + "learning_rate": 1.871515839926837e-05, + "loss": 0.7579, + "step": 18626 + }, + { + "epoch": 2.97, + "learning_rate": 1.871266138894593e-05, + "loss": 0.6717, + "step": 18627 + }, + { + "epoch": 2.97, + "learning_rate": 1.8710164445579836e-05, + "loss": 0.7104, + "step": 18628 + }, + { + "epoch": 2.97, + "learning_rate": 1.8707667569196674e-05, + "loss": 0.6166, + "step": 18629 + }, + { + "epoch": 2.97, + "learning_rate": 1.8705170759823028e-05, + "loss": 0.663, + "step": 18630 + }, + { + "epoch": 2.97, + "learning_rate": 1.8702674017485493e-05, + "loss": 0.6429, + "step": 18631 + }, + { + "epoch": 2.97, + "learning_rate": 1.870017734221066e-05, + "loss": 0.6853, + "step": 18632 + }, + { + "epoch": 2.97, + "learning_rate": 1.8697680734025106e-05, + "loss": 0.7499, + "step": 18633 + }, + { + "epoch": 2.97, + "learning_rate": 1.869518419295543e-05, + "loss": 0.7232, + "step": 18634 + }, + { + "epoch": 2.97, + "learning_rate": 1.869268771902822e-05, + "loss": 0.6334, + "step": 18635 + }, + { + "epoch": 2.97, + "learning_rate": 1.8690191312270048e-05, + "loss": 0.6781, + "step": 18636 + }, + { + "epoch": 2.97, + "learning_rate": 1.868769497270752e-05, + "loss": 0.6752, + "step": 18637 + }, + { + "epoch": 2.97, + "learning_rate": 1.86851987003672e-05, + "loss": 0.7849, + "step": 18638 + }, + { + "epoch": 2.97, + "learning_rate": 1.868270249527568e-05, + "loss": 0.6497, + "step": 18639 + }, + { + "epoch": 2.97, + "learning_rate": 1.8680206357459546e-05, + "loss": 0.7725, + "step": 18640 + }, + { + "epoch": 2.97, + "learning_rate": 1.867771028694537e-05, + "loss": 0.6472, + "step": 18641 + }, + { + "epoch": 2.97, + "learning_rate": 1.8675214283759746e-05, + "loss": 0.7068, + "step": 18642 + }, + { + "epoch": 2.97, + "learning_rate": 1.8672718347929253e-05, + "loss": 0.6345, + "step": 18643 + }, + { + "epoch": 2.97, + "learning_rate": 1.867022247948046e-05, + "loss": 0.6715, + "step": 18644 + }, + { + "epoch": 2.97, + "learning_rate": 1.8667726678439962e-05, + "loss": 0.6549, + "step": 18645 + }, + { + "epoch": 2.97, + "learning_rate": 1.8665230944834327e-05, + "loss": 0.6172, + "step": 18646 + }, + { + "epoch": 2.97, + "learning_rate": 1.8662735278690136e-05, + "loss": 0.7713, + "step": 18647 + }, + { + "epoch": 2.97, + "learning_rate": 1.866023968003397e-05, + "loss": 0.6765, + "step": 18648 + }, + { + "epoch": 2.97, + "learning_rate": 1.86577441488924e-05, + "loss": 0.6859, + "step": 18649 + }, + { + "epoch": 2.97, + "learning_rate": 1.8655248685292e-05, + "loss": 0.6304, + "step": 18650 + }, + { + "epoch": 2.97, + "learning_rate": 1.8652753289259356e-05, + "loss": 0.7306, + "step": 18651 + }, + { + "epoch": 2.97, + "learning_rate": 1.8650257960821038e-05, + "loss": 0.689, + "step": 18652 + }, + { + "epoch": 2.97, + "learning_rate": 1.864776270000361e-05, + "loss": 0.6065, + "step": 18653 + }, + { + "epoch": 2.97, + "learning_rate": 1.864526750683366e-05, + "loss": 0.7219, + "step": 18654 + }, + { + "epoch": 2.97, + "learning_rate": 1.864277238133775e-05, + "loss": 0.6326, + "step": 18655 + }, + { + "epoch": 2.97, + "learning_rate": 1.8640277323542448e-05, + "loss": 0.6331, + "step": 18656 + }, + { + "epoch": 2.97, + "learning_rate": 1.863778233347435e-05, + "loss": 0.7134, + "step": 18657 + }, + { + "epoch": 2.97, + "learning_rate": 1.8635287411159996e-05, + "loss": 0.6545, + "step": 18658 + }, + { + "epoch": 2.97, + "learning_rate": 1.8632792556625967e-05, + "loss": 0.6744, + "step": 18659 + }, + { + "epoch": 2.97, + "learning_rate": 1.863029776989883e-05, + "loss": 0.7027, + "step": 18660 + }, + { + "epoch": 2.97, + "learning_rate": 1.8627803051005156e-05, + "loss": 0.7642, + "step": 18661 + }, + { + "epoch": 2.97, + "learning_rate": 1.8625308399971507e-05, + "loss": 0.6296, + "step": 18662 + }, + { + "epoch": 2.97, + "learning_rate": 1.8622813816824462e-05, + "loss": 0.6759, + "step": 18663 + }, + { + "epoch": 2.97, + "learning_rate": 1.862031930159057e-05, + "loss": 0.6254, + "step": 18664 + }, + { + "epoch": 2.97, + "learning_rate": 1.8617824854296406e-05, + "loss": 0.7738, + "step": 18665 + }, + { + "epoch": 2.97, + "learning_rate": 1.8615330474968538e-05, + "loss": 0.6185, + "step": 18666 + }, + { + "epoch": 2.97, + "learning_rate": 1.8612836163633515e-05, + "loss": 0.7129, + "step": 18667 + }, + { + "epoch": 2.97, + "learning_rate": 1.8610341920317915e-05, + "loss": 0.7048, + "step": 18668 + }, + { + "epoch": 2.97, + "learning_rate": 1.8607847745048297e-05, + "loss": 0.696, + "step": 18669 + }, + { + "epoch": 2.97, + "learning_rate": 1.860535363785121e-05, + "loss": 0.6611, + "step": 18670 + }, + { + "epoch": 2.97, + "learning_rate": 1.8602859598753234e-05, + "loss": 0.6109, + "step": 18671 + }, + { + "epoch": 2.97, + "learning_rate": 1.860036562778092e-05, + "loss": 0.6819, + "step": 18672 + }, + { + "epoch": 2.97, + "learning_rate": 1.8597871724960816e-05, + "loss": 0.6449, + "step": 18673 + }, + { + "epoch": 2.97, + "learning_rate": 1.8595377890319497e-05, + "loss": 0.7105, + "step": 18674 + }, + { + "epoch": 2.98, + "learning_rate": 1.859288412388352e-05, + "loss": 0.6143, + "step": 18675 + }, + { + "epoch": 2.98, + "learning_rate": 1.8590390425679437e-05, + "loss": 0.6744, + "step": 18676 + }, + { + "epoch": 2.98, + "learning_rate": 1.8587896795733807e-05, + "loss": 0.6928, + "step": 18677 + }, + { + "epoch": 2.98, + "learning_rate": 1.8585403234073172e-05, + "loss": 0.7628, + "step": 18678 + }, + { + "epoch": 2.98, + "learning_rate": 1.8582909740724108e-05, + "loss": 0.7088, + "step": 18679 + }, + { + "epoch": 2.98, + "learning_rate": 1.8580416315713157e-05, + "loss": 0.752, + "step": 18680 + }, + { + "epoch": 2.98, + "learning_rate": 1.857792295906687e-05, + "loss": 0.6966, + "step": 18681 + }, + { + "epoch": 2.98, + "learning_rate": 1.8575429670811806e-05, + "loss": 0.6043, + "step": 18682 + }, + { + "epoch": 2.98, + "learning_rate": 1.8572936450974522e-05, + "loss": 0.6437, + "step": 18683 + }, + { + "epoch": 2.98, + "learning_rate": 1.8570443299581552e-05, + "loss": 0.7187, + "step": 18684 + }, + { + "epoch": 2.98, + "learning_rate": 1.8567950216659463e-05, + "loss": 0.6158, + "step": 18685 + }, + { + "epoch": 2.98, + "learning_rate": 1.8565457202234804e-05, + "loss": 0.709, + "step": 18686 + }, + { + "epoch": 2.98, + "learning_rate": 1.8562964256334108e-05, + "loss": 0.6403, + "step": 18687 + }, + { + "epoch": 2.98, + "learning_rate": 1.8560471378983944e-05, + "loss": 0.7438, + "step": 18688 + }, + { + "epoch": 2.98, + "learning_rate": 1.8557978570210848e-05, + "loss": 0.726, + "step": 18689 + }, + { + "epoch": 2.98, + "learning_rate": 1.855548583004137e-05, + "loss": 0.6658, + "step": 18690 + }, + { + "epoch": 2.98, + "learning_rate": 1.855299315850205e-05, + "loss": 0.6675, + "step": 18691 + }, + { + "epoch": 2.98, + "learning_rate": 1.8550500555619442e-05, + "loss": 0.7117, + "step": 18692 + }, + { + "epoch": 2.98, + "learning_rate": 1.8548008021420092e-05, + "loss": 0.6741, + "step": 18693 + }, + { + "epoch": 2.98, + "learning_rate": 1.854551555593053e-05, + "loss": 0.6419, + "step": 18694 + }, + { + "epoch": 2.98, + "learning_rate": 1.854302315917732e-05, + "loss": 0.6718, + "step": 18695 + }, + { + "epoch": 2.98, + "learning_rate": 1.854053083118699e-05, + "loss": 0.6449, + "step": 18696 + }, + { + "epoch": 2.98, + "learning_rate": 1.8538038571986077e-05, + "loss": 0.6744, + "step": 18697 + }, + { + "epoch": 2.98, + "learning_rate": 1.8535546381601145e-05, + "loss": 0.6921, + "step": 18698 + }, + { + "epoch": 2.98, + "learning_rate": 1.853305426005871e-05, + "loss": 0.6777, + "step": 18699 + }, + { + "epoch": 2.98, + "learning_rate": 1.8530562207385322e-05, + "loss": 0.6931, + "step": 18700 + }, + { + "epoch": 2.98, + "learning_rate": 1.8528070223607517e-05, + "loss": 0.6829, + "step": 18701 + }, + { + "epoch": 2.98, + "learning_rate": 1.8525578308751835e-05, + "loss": 0.7113, + "step": 18702 + }, + { + "epoch": 2.98, + "learning_rate": 1.8523086462844817e-05, + "loss": 0.6957, + "step": 18703 + }, + { + "epoch": 2.98, + "learning_rate": 1.8520594685912997e-05, + "loss": 0.7369, + "step": 18704 + }, + { + "epoch": 2.98, + "learning_rate": 1.85181029779829e-05, + "loss": 0.6552, + "step": 18705 + }, + { + "epoch": 2.98, + "learning_rate": 1.8515611339081078e-05, + "loss": 0.6766, + "step": 18706 + }, + { + "epoch": 2.98, + "learning_rate": 1.8513119769234058e-05, + "loss": 0.6132, + "step": 18707 + }, + { + "epoch": 2.98, + "learning_rate": 1.8510628268468372e-05, + "loss": 0.6664, + "step": 18708 + }, + { + "epoch": 2.98, + "learning_rate": 1.850813683681056e-05, + "loss": 0.6746, + "step": 18709 + }, + { + "epoch": 2.98, + "learning_rate": 1.850564547428714e-05, + "loss": 0.703, + "step": 18710 + }, + { + "epoch": 2.98, + "learning_rate": 1.850315418092466e-05, + "loss": 0.6286, + "step": 18711 + }, + { + "epoch": 2.98, + "learning_rate": 1.8500662956749643e-05, + "loss": 0.7291, + "step": 18712 + }, + { + "epoch": 2.98, + "learning_rate": 1.849817180178862e-05, + "loss": 0.6255, + "step": 18713 + }, + { + "epoch": 2.98, + "learning_rate": 1.8495680716068116e-05, + "loss": 0.7052, + "step": 18714 + }, + { + "epoch": 2.98, + "learning_rate": 1.849318969961467e-05, + "loss": 0.6467, + "step": 18715 + }, + { + "epoch": 2.98, + "learning_rate": 1.84906987524548e-05, + "loss": 0.645, + "step": 18716 + }, + { + "epoch": 2.98, + "learning_rate": 1.8488207874615033e-05, + "loss": 0.6461, + "step": 18717 + }, + { + "epoch": 2.98, + "learning_rate": 1.8485717066121917e-05, + "loss": 0.64, + "step": 18718 + }, + { + "epoch": 2.98, + "learning_rate": 1.848322632700194e-05, + "loss": 0.6822, + "step": 18719 + }, + { + "epoch": 2.98, + "learning_rate": 1.848073565728165e-05, + "loss": 0.7673, + "step": 18720 + }, + { + "epoch": 2.98, + "learning_rate": 1.847824505698757e-05, + "loss": 0.703, + "step": 18721 + }, + { + "epoch": 2.98, + "learning_rate": 1.8475754526146215e-05, + "loss": 0.6567, + "step": 18722 + }, + { + "epoch": 2.98, + "learning_rate": 1.847326406478412e-05, + "loss": 0.7017, + "step": 18723 + }, + { + "epoch": 2.98, + "learning_rate": 1.84707736729278e-05, + "loss": 0.6963, + "step": 18724 + }, + { + "epoch": 2.98, + "learning_rate": 1.846828335060377e-05, + "loss": 0.751, + "step": 18725 + }, + { + "epoch": 2.98, + "learning_rate": 1.846579309783856e-05, + "loss": 0.6742, + "step": 18726 + }, + { + "epoch": 2.98, + "learning_rate": 1.846330291465869e-05, + "loss": 0.6747, + "step": 18727 + }, + { + "epoch": 2.98, + "learning_rate": 1.8460812801090667e-05, + "loss": 0.6268, + "step": 18728 + }, + { + "epoch": 2.98, + "learning_rate": 1.8458322757161026e-05, + "loss": 0.6641, + "step": 18729 + }, + { + "epoch": 2.98, + "learning_rate": 1.8455832782896274e-05, + "loss": 0.6317, + "step": 18730 + }, + { + "epoch": 2.98, + "learning_rate": 1.8453342878322927e-05, + "loss": 0.6834, + "step": 18731 + }, + { + "epoch": 2.98, + "learning_rate": 1.8450853043467506e-05, + "loss": 0.6432, + "step": 18732 + }, + { + "epoch": 2.98, + "learning_rate": 1.8448363278356524e-05, + "loss": 0.6535, + "step": 18733 + }, + { + "epoch": 2.98, + "learning_rate": 1.8445873583016493e-05, + "loss": 0.6428, + "step": 18734 + }, + { + "epoch": 2.98, + "learning_rate": 1.844338395747393e-05, + "loss": 0.6447, + "step": 18735 + }, + { + "epoch": 2.98, + "learning_rate": 1.8440894401755355e-05, + "loss": 0.7046, + "step": 18736 + }, + { + "epoch": 2.99, + "learning_rate": 1.843840491588726e-05, + "loss": 0.6844, + "step": 18737 + }, + { + "epoch": 2.99, + "learning_rate": 1.8435915499896187e-05, + "loss": 0.6756, + "step": 18738 + }, + { + "epoch": 2.99, + "learning_rate": 1.8433426153808613e-05, + "loss": 0.6268, + "step": 18739 + }, + { + "epoch": 2.99, + "learning_rate": 1.843093687765107e-05, + "loss": 0.6568, + "step": 18740 + }, + { + "epoch": 2.99, + "learning_rate": 1.842844767145006e-05, + "loss": 0.7257, + "step": 18741 + }, + { + "epoch": 2.99, + "learning_rate": 1.8425958535232087e-05, + "loss": 0.6608, + "step": 18742 + }, + { + "epoch": 2.99, + "learning_rate": 1.842346946902367e-05, + "loss": 0.6535, + "step": 18743 + }, + { + "epoch": 2.99, + "learning_rate": 1.8420980472851312e-05, + "loss": 0.7507, + "step": 18744 + }, + { + "epoch": 2.99, + "learning_rate": 1.841849154674151e-05, + "loss": 0.6149, + "step": 18745 + }, + { + "epoch": 2.99, + "learning_rate": 1.8416002690720786e-05, + "loss": 0.6609, + "step": 18746 + }, + { + "epoch": 2.99, + "learning_rate": 1.8413513904815632e-05, + "loss": 0.5785, + "step": 18747 + }, + { + "epoch": 2.99, + "learning_rate": 1.8411025189052555e-05, + "loss": 0.609, + "step": 18748 + }, + { + "epoch": 2.99, + "learning_rate": 1.840853654345806e-05, + "loss": 0.6366, + "step": 18749 + }, + { + "epoch": 2.99, + "learning_rate": 1.8406047968058653e-05, + "loss": 0.6627, + "step": 18750 + }, + { + "epoch": 2.99, + "learning_rate": 1.8403559462880824e-05, + "loss": 0.6903, + "step": 18751 + }, + { + "epoch": 2.99, + "learning_rate": 1.8401071027951087e-05, + "loss": 0.6235, + "step": 18752 + }, + { + "epoch": 2.99, + "learning_rate": 1.8398582663295944e-05, + "loss": 0.6587, + "step": 18753 + }, + { + "epoch": 2.99, + "learning_rate": 1.8396094368941875e-05, + "loss": 0.6277, + "step": 18754 + }, + { + "epoch": 2.99, + "learning_rate": 1.8393606144915397e-05, + "loss": 0.689, + "step": 18755 + }, + { + "epoch": 2.99, + "learning_rate": 1.8391117991243008e-05, + "loss": 0.622, + "step": 18756 + }, + { + "epoch": 2.99, + "learning_rate": 1.8388629907951192e-05, + "loss": 0.6693, + "step": 18757 + }, + { + "epoch": 2.99, + "learning_rate": 1.8386141895066467e-05, + "loss": 0.7121, + "step": 18758 + }, + { + "epoch": 2.99, + "learning_rate": 1.8383653952615306e-05, + "loss": 0.6925, + "step": 18759 + }, + { + "epoch": 2.99, + "learning_rate": 1.838116608062421e-05, + "loss": 0.6436, + "step": 18760 + }, + { + "epoch": 2.99, + "learning_rate": 1.837867827911968e-05, + "loss": 0.6727, + "step": 18761 + }, + { + "epoch": 2.99, + "learning_rate": 1.8376190548128205e-05, + "loss": 0.7052, + "step": 18762 + }, + { + "epoch": 2.99, + "learning_rate": 1.837370288767628e-05, + "loss": 0.6816, + "step": 18763 + }, + { + "epoch": 2.99, + "learning_rate": 1.83712152977904e-05, + "loss": 0.7443, + "step": 18764 + }, + { + "epoch": 2.99, + "learning_rate": 1.8368727778497048e-05, + "loss": 0.6697, + "step": 18765 + }, + { + "epoch": 2.99, + "learning_rate": 1.836624032982272e-05, + "loss": 0.7201, + "step": 18766 + }, + { + "epoch": 2.99, + "learning_rate": 1.8363752951793908e-05, + "loss": 0.6373, + "step": 18767 + }, + { + "epoch": 2.99, + "learning_rate": 1.8361265644437094e-05, + "loss": 0.6174, + "step": 18768 + }, + { + "epoch": 2.99, + "learning_rate": 1.8358778407778772e-05, + "loss": 0.6442, + "step": 18769 + }, + { + "epoch": 2.99, + "learning_rate": 1.835629124184543e-05, + "loss": 0.6408, + "step": 18770 + }, + { + "epoch": 2.99, + "learning_rate": 1.8353804146663546e-05, + "loss": 0.6592, + "step": 18771 + }, + { + "epoch": 2.99, + "learning_rate": 1.835131712225962e-05, + "loss": 0.6851, + "step": 18772 + }, + { + "epoch": 2.99, + "learning_rate": 1.834883016866013e-05, + "loss": 0.6317, + "step": 18773 + }, + { + "epoch": 2.99, + "learning_rate": 1.8346343285891555e-05, + "loss": 0.6099, + "step": 18774 + }, + { + "epoch": 2.99, + "learning_rate": 1.8343856473980388e-05, + "loss": 0.715, + "step": 18775 + }, + { + "epoch": 2.99, + "learning_rate": 1.8341369732953112e-05, + "loss": 0.641, + "step": 18776 + }, + { + "epoch": 2.99, + "learning_rate": 1.83388830628362e-05, + "loss": 0.6519, + "step": 18777 + }, + { + "epoch": 2.99, + "learning_rate": 1.8336396463656154e-05, + "loss": 0.6688, + "step": 18778 + }, + { + "epoch": 2.99, + "learning_rate": 1.8333909935439418e-05, + "loss": 0.6863, + "step": 18779 + }, + { + "epoch": 2.99, + "learning_rate": 1.833142347821251e-05, + "loss": 0.6768, + "step": 18780 + }, + { + "epoch": 2.99, + "learning_rate": 1.8328937092001887e-05, + "loss": 0.6037, + "step": 18781 + }, + { + "epoch": 2.99, + "learning_rate": 1.8326450776834033e-05, + "loss": 0.6624, + "step": 18782 + }, + { + "epoch": 2.99, + "learning_rate": 1.8323964532735427e-05, + "loss": 0.7053, + "step": 18783 + }, + { + "epoch": 2.99, + "learning_rate": 1.8321478359732545e-05, + "loss": 0.7158, + "step": 18784 + }, + { + "epoch": 2.99, + "learning_rate": 1.8318992257851862e-05, + "loss": 0.6466, + "step": 18785 + }, + { + "epoch": 2.99, + "learning_rate": 1.8316506227119863e-05, + "loss": 0.6423, + "step": 18786 + }, + { + "epoch": 2.99, + "learning_rate": 1.8314020267563013e-05, + "loss": 0.8118, + "step": 18787 + }, + { + "epoch": 2.99, + "learning_rate": 1.8311534379207782e-05, + "loss": 0.6332, + "step": 18788 + }, + { + "epoch": 2.99, + "learning_rate": 1.8309048562080654e-05, + "loss": 0.7463, + "step": 18789 + }, + { + "epoch": 2.99, + "learning_rate": 1.8306562816208095e-05, + "loss": 0.6689, + "step": 18790 + }, + { + "epoch": 2.99, + "learning_rate": 1.8304077141616577e-05, + "loss": 0.6366, + "step": 18791 + }, + { + "epoch": 2.99, + "learning_rate": 1.8301591538332575e-05, + "loss": 0.6936, + "step": 18792 + }, + { + "epoch": 2.99, + "learning_rate": 1.8299106006382555e-05, + "loss": 0.7327, + "step": 18793 + }, + { + "epoch": 2.99, + "learning_rate": 1.8296620545792986e-05, + "loss": 0.7398, + "step": 18794 + }, + { + "epoch": 2.99, + "learning_rate": 1.829413515659034e-05, + "loss": 0.7184, + "step": 18795 + }, + { + "epoch": 2.99, + "learning_rate": 1.8291649838801088e-05, + "loss": 0.6701, + "step": 18796 + }, + { + "epoch": 2.99, + "learning_rate": 1.8289164592451692e-05, + "loss": 0.6816, + "step": 18797 + }, + { + "epoch": 2.99, + "learning_rate": 1.828667941756862e-05, + "loss": 0.6762, + "step": 18798 + }, + { + "epoch": 2.99, + "learning_rate": 1.8284194314178327e-05, + "loss": 0.6217, + "step": 18799 + }, + { + "epoch": 3.0, + "learning_rate": 1.8281709282307295e-05, + "loss": 0.636, + "step": 18800 + }, + { + "epoch": 3.0, + "learning_rate": 1.827922432198198e-05, + "loss": 0.6378, + "step": 18801 + }, + { + "epoch": 3.0, + "learning_rate": 1.827673943322884e-05, + "loss": 0.6744, + "step": 18802 + }, + { + "epoch": 3.0, + "learning_rate": 1.8274254616074347e-05, + "loss": 0.6351, + "step": 18803 + }, + { + "epoch": 3.0, + "learning_rate": 1.8271769870544958e-05, + "loss": 0.7205, + "step": 18804 + }, + { + "epoch": 3.0, + "learning_rate": 1.8269285196667132e-05, + "loss": 0.7275, + "step": 18805 + }, + { + "epoch": 3.0, + "learning_rate": 1.8266800594467336e-05, + "loss": 0.6892, + "step": 18806 + }, + { + "epoch": 3.0, + "learning_rate": 1.8264316063972032e-05, + "loss": 0.6186, + "step": 18807 + }, + { + "epoch": 3.0, + "learning_rate": 1.8261831605207663e-05, + "loss": 0.6651, + "step": 18808 + }, + { + "epoch": 3.0, + "learning_rate": 1.8259347218200697e-05, + "loss": 0.6904, + "step": 18809 + }, + { + "epoch": 3.0, + "learning_rate": 1.8256862902977594e-05, + "loss": 0.7286, + "step": 18810 + }, + { + "epoch": 3.0, + "learning_rate": 1.8254378659564812e-05, + "loss": 0.6698, + "step": 18811 + }, + { + "epoch": 3.0, + "learning_rate": 1.825189448798879e-05, + "loss": 0.6548, + "step": 18812 + }, + { + "epoch": 3.0, + "learning_rate": 1.8249410388276006e-05, + "loss": 0.7238, + "step": 18813 + }, + { + "epoch": 3.0, + "learning_rate": 1.82469263604529e-05, + "loss": 0.6624, + "step": 18814 + }, + { + "epoch": 3.0, + "learning_rate": 1.8244442404545923e-05, + "loss": 0.7322, + "step": 18815 + }, + { + "epoch": 3.0, + "learning_rate": 1.8241958520581538e-05, + "loss": 0.639, + "step": 18816 + }, + { + "epoch": 3.0, + "learning_rate": 1.823947470858619e-05, + "loss": 0.707, + "step": 18817 + }, + { + "epoch": 3.0, + "learning_rate": 1.823699096858633e-05, + "loss": 0.6416, + "step": 18818 + }, + { + "epoch": 3.0, + "learning_rate": 1.823450730060842e-05, + "loss": 0.687, + "step": 18819 + }, + { + "epoch": 3.0, + "learning_rate": 1.8232023704678897e-05, + "loss": 0.7344, + "step": 18820 + }, + { + "epoch": 3.0, + "learning_rate": 1.8229540180824206e-05, + "loss": 0.6895, + "step": 18821 + }, + { + "epoch": 3.0, + "learning_rate": 1.8227056729070806e-05, + "loss": 0.6663, + "step": 18822 + }, + { + "epoch": 3.0, + "learning_rate": 1.8224573349445134e-05, + "loss": 0.6667, + "step": 18823 + }, + { + "epoch": 3.0, + "learning_rate": 1.822209004197365e-05, + "loss": 0.6787, + "step": 18824 + }, + { + "epoch": 3.0, + "learning_rate": 1.821960680668279e-05, + "loss": 0.7155, + "step": 18825 + }, + { + "epoch": 3.0, + "learning_rate": 1.8217123643598993e-05, + "loss": 0.6283, + "step": 18826 + }, + { + "epoch": 3.0, + "learning_rate": 1.821464055274872e-05, + "loss": 0.6472, + "step": 18827 + }, + { + "epoch": 3.0, + "learning_rate": 1.8212157534158405e-05, + "loss": 0.6957, + "step": 18828 + }, + { + "epoch": 3.0, + "learning_rate": 1.8209674587854487e-05, + "loss": 0.6965, + "step": 18829 + }, + { + "epoch": 3.0, + "learning_rate": 1.8207191713863413e-05, + "loss": 0.6701, + "step": 18830 + }, + { + "epoch": 3.0, + "learning_rate": 1.820470891221163e-05, + "loss": 0.719, + "step": 18831 + }, + { + "epoch": 3.0, + "learning_rate": 1.820222618292556e-05, + "loss": 0.6479, + "step": 18832 + }, + { + "epoch": 3.0, + "learning_rate": 1.8199743526031664e-05, + "loss": 0.6272, + "step": 18833 + }, + { + "epoch": 3.0, + "learning_rate": 1.819726094155637e-05, + "loss": 0.6119, + "step": 18834 + }, + { + "epoch": 3.0, + "learning_rate": 1.8194778429526112e-05, + "loss": 0.7519, + "step": 18835 + }, + { + "epoch": 3.0, + "learning_rate": 1.8192295989967333e-05, + "loss": 0.6521, + "step": 18836 + }, + { + "epoch": 3.0, + "learning_rate": 1.818981362290647e-05, + "loss": 0.6391, + "step": 18837 + }, + { + "epoch": 3.0, + "learning_rate": 1.8187331328369957e-05, + "loss": 0.6295, + "step": 18838 + }, + { + "epoch": 3.0, + "learning_rate": 1.818484910638424e-05, + "loss": 0.6674, + "step": 18839 + }, + { + "epoch": 3.0, + "learning_rate": 1.818236695697573e-05, + "loss": 0.6321, + "step": 18840 + }, + { + "epoch": 3.0, + "learning_rate": 1.8179884880170877e-05, + "loss": 0.6519, + "step": 18841 + }, + { + "epoch": 3.0, + "learning_rate": 1.817740287599611e-05, + "loss": 0.6641, + "step": 18842 + }, + { + "epoch": 3.0, + "learning_rate": 1.8174920944477853e-05, + "loss": 0.6233, + "step": 18843 + }, + { + "epoch": 3.0, + "learning_rate": 1.817243908564255e-05, + "loss": 0.6982, + "step": 18844 + }, + { + "epoch": 3.0, + "learning_rate": 1.8169957299516626e-05, + "loss": 0.574, + "step": 18845 + }, + { + "epoch": 3.0, + "learning_rate": 1.8167475586126505e-05, + "loss": 0.6576, + "step": 18846 + }, + { + "epoch": 3.0, + "learning_rate": 1.8164993945498627e-05, + "loss": 0.6509, + "step": 18847 + }, + { + "epoch": 3.0, + "learning_rate": 1.816251237765941e-05, + "loss": 0.6655, + "step": 18848 + }, + { + "epoch": 3.0, + "learning_rate": 1.8160030882635288e-05, + "loss": 0.7043, + "step": 18849 + }, + { + "epoch": 3.0, + "learning_rate": 1.8157549460452685e-05, + "loss": 0.6649, + "step": 18850 + }, + { + "epoch": 3.0, + "learning_rate": 1.8155068111138025e-05, + "loss": 0.6185, + "step": 18851 + }, + { + "epoch": 3.0, + "learning_rate": 1.8152586834717727e-05, + "loss": 0.6855, + "step": 18852 + }, + { + "epoch": 3.0, + "learning_rate": 1.8150105631218227e-05, + "loss": 0.6194, + "step": 18853 + }, + { + "epoch": 3.0, + "learning_rate": 1.814762450066595e-05, + "loss": 0.67, + "step": 18854 + }, + { + "epoch": 3.0, + "learning_rate": 1.8145143443087303e-05, + "loss": 0.6673, + "step": 18855 + }, + { + "epoch": 3.0, + "learning_rate": 1.814266245850872e-05, + "loss": 0.6208, + "step": 18856 + }, + { + "epoch": 3.0, + "learning_rate": 1.814018154695662e-05, + "loss": 0.6676, + "step": 18857 + }, + { + "epoch": 3.0, + "learning_rate": 1.8137700708457416e-05, + "loss": 0.6501, + "step": 18858 + }, + { + "epoch": 3.0, + "learning_rate": 1.8135219943037553e-05, + "loss": 0.6275, + "step": 18859 + }, + { + "epoch": 3.0, + "learning_rate": 1.813273925072341e-05, + "loss": 0.7191, + "step": 18860 + }, + { + "epoch": 3.0, + "learning_rate": 1.813025863154143e-05, + "loss": 0.6752, + "step": 18861 + }, + { + "epoch": 3.0, + "learning_rate": 1.8127778085518028e-05, + "loss": 0.6724, + "step": 18862 + }, + { + "epoch": 3.01, + "learning_rate": 1.8125297612679608e-05, + "loss": 0.6358, + "step": 18863 + }, + { + "epoch": 3.01, + "learning_rate": 1.81228172130526e-05, + "loss": 0.6094, + "step": 18864 + }, + { + "epoch": 3.01, + "learning_rate": 1.8120336886663415e-05, + "loss": 0.6958, + "step": 18865 + }, + { + "epoch": 3.01, + "learning_rate": 1.811785663353846e-05, + "loss": 0.6951, + "step": 18866 + }, + { + "epoch": 3.01, + "learning_rate": 1.8115376453704162e-05, + "loss": 0.6296, + "step": 18867 + }, + { + "epoch": 3.01, + "learning_rate": 1.8112896347186923e-05, + "loss": 0.6724, + "step": 18868 + }, + { + "epoch": 3.01, + "learning_rate": 1.811041631401315e-05, + "loss": 0.6175, + "step": 18869 + }, + { + "epoch": 3.01, + "learning_rate": 1.8107936354209265e-05, + "loss": 0.6497, + "step": 18870 + }, + { + "epoch": 3.01, + "learning_rate": 1.8105456467801675e-05, + "loss": 0.6641, + "step": 18871 + }, + { + "epoch": 3.01, + "learning_rate": 1.8102976654816784e-05, + "loss": 0.6117, + "step": 18872 + }, + { + "epoch": 3.01, + "learning_rate": 1.8100496915281007e-05, + "loss": 0.5997, + "step": 18873 + }, + { + "epoch": 3.01, + "learning_rate": 1.809801724922075e-05, + "loss": 0.5863, + "step": 18874 + }, + { + "epoch": 3.01, + "learning_rate": 1.8095537656662415e-05, + "loss": 0.6787, + "step": 18875 + }, + { + "epoch": 3.01, + "learning_rate": 1.8093058137632415e-05, + "loss": 0.6619, + "step": 18876 + }, + { + "epoch": 3.01, + "learning_rate": 1.8090578692157153e-05, + "loss": 0.7572, + "step": 18877 + }, + { + "epoch": 3.01, + "learning_rate": 1.808809932026303e-05, + "loss": 0.679, + "step": 18878 + }, + { + "epoch": 3.01, + "learning_rate": 1.808562002197647e-05, + "loss": 0.6293, + "step": 18879 + }, + { + "epoch": 3.01, + "learning_rate": 1.808314079732384e-05, + "loss": 0.6937, + "step": 18880 + }, + { + "epoch": 3.01, + "learning_rate": 1.8080661646331565e-05, + "loss": 0.6791, + "step": 18881 + }, + { + "epoch": 3.01, + "learning_rate": 1.807818256902604e-05, + "loss": 0.6366, + "step": 18882 + }, + { + "epoch": 3.01, + "learning_rate": 1.807570356543367e-05, + "loss": 0.636, + "step": 18883 + }, + { + "epoch": 3.01, + "learning_rate": 1.8073224635580856e-05, + "loss": 0.6414, + "step": 18884 + }, + { + "epoch": 3.01, + "learning_rate": 1.8070745779494e-05, + "loss": 0.6434, + "step": 18885 + }, + { + "epoch": 3.01, + "learning_rate": 1.806826699719948e-05, + "loss": 0.6449, + "step": 18886 + }, + { + "epoch": 3.01, + "learning_rate": 1.8065788288723716e-05, + "loss": 0.6314, + "step": 18887 + }, + { + "epoch": 3.01, + "learning_rate": 1.8063309654093097e-05, + "loss": 0.6567, + "step": 18888 + }, + { + "epoch": 3.01, + "learning_rate": 1.8060831093334015e-05, + "loss": 0.6427, + "step": 18889 + }, + { + "epoch": 3.01, + "learning_rate": 1.8058352606472875e-05, + "loss": 0.7214, + "step": 18890 + }, + { + "epoch": 3.01, + "learning_rate": 1.8055874193536064e-05, + "loss": 0.6295, + "step": 18891 + }, + { + "epoch": 3.01, + "learning_rate": 1.805339585454997e-05, + "loss": 0.6675, + "step": 18892 + }, + { + "epoch": 3.01, + "learning_rate": 1.8050917589541e-05, + "loss": 0.6883, + "step": 18893 + }, + { + "epoch": 3.01, + "learning_rate": 1.804843939853554e-05, + "loss": 0.569, + "step": 18894 + }, + { + "epoch": 3.01, + "learning_rate": 1.8045961281559975e-05, + "loss": 0.6593, + "step": 18895 + }, + { + "epoch": 3.01, + "learning_rate": 1.8043483238640702e-05, + "loss": 0.6176, + "step": 18896 + }, + { + "epoch": 3.01, + "learning_rate": 1.8041005269804113e-05, + "loss": 0.5913, + "step": 18897 + }, + { + "epoch": 3.01, + "learning_rate": 1.8038527375076585e-05, + "loss": 0.6224, + "step": 18898 + }, + { + "epoch": 3.01, + "learning_rate": 1.803604955448453e-05, + "loss": 0.6104, + "step": 18899 + }, + { + "epoch": 3.01, + "learning_rate": 1.8033571808054307e-05, + "loss": 0.6482, + "step": 18900 + }, + { + "epoch": 3.01, + "learning_rate": 1.8031094135812315e-05, + "loss": 0.6621, + "step": 18901 + }, + { + "epoch": 3.01, + "learning_rate": 1.802861653778494e-05, + "loss": 0.6454, + "step": 18902 + }, + { + "epoch": 3.01, + "learning_rate": 1.8026139013998568e-05, + "loss": 0.6211, + "step": 18903 + }, + { + "epoch": 3.01, + "learning_rate": 1.8023661564479576e-05, + "loss": 0.6947, + "step": 18904 + }, + { + "epoch": 3.01, + "learning_rate": 1.8021184189254363e-05, + "loss": 0.6597, + "step": 18905 + }, + { + "epoch": 3.01, + "learning_rate": 1.801870688834929e-05, + "loss": 0.6422, + "step": 18906 + }, + { + "epoch": 3.01, + "learning_rate": 1.8016229661790756e-05, + "loss": 0.6369, + "step": 18907 + }, + { + "epoch": 3.01, + "learning_rate": 1.801375250960514e-05, + "loss": 0.6521, + "step": 18908 + }, + { + "epoch": 3.01, + "learning_rate": 1.801127543181881e-05, + "loss": 0.6781, + "step": 18909 + }, + { + "epoch": 3.01, + "learning_rate": 1.800879842845816e-05, + "loss": 0.6094, + "step": 18910 + }, + { + "epoch": 3.01, + "learning_rate": 1.800632149954956e-05, + "loss": 0.6565, + "step": 18911 + }, + { + "epoch": 3.01, + "learning_rate": 1.8003844645119387e-05, + "loss": 0.6402, + "step": 18912 + }, + { + "epoch": 3.01, + "learning_rate": 1.8001367865194028e-05, + "loss": 0.5531, + "step": 18913 + }, + { + "epoch": 3.01, + "learning_rate": 1.799889115979985e-05, + "loss": 0.6303, + "step": 18914 + }, + { + "epoch": 3.01, + "learning_rate": 1.799641452896323e-05, + "loss": 0.704, + "step": 18915 + }, + { + "epoch": 3.01, + "learning_rate": 1.799393797271054e-05, + "loss": 0.6956, + "step": 18916 + }, + { + "epoch": 3.01, + "learning_rate": 1.7991461491068163e-05, + "loss": 0.6428, + "step": 18917 + }, + { + "epoch": 3.01, + "learning_rate": 1.7988985084062462e-05, + "loss": 0.6217, + "step": 18918 + }, + { + "epoch": 3.01, + "learning_rate": 1.798650875171981e-05, + "loss": 0.6377, + "step": 18919 + }, + { + "epoch": 3.01, + "learning_rate": 1.7984032494066593e-05, + "loss": 0.6333, + "step": 18920 + }, + { + "epoch": 3.01, + "learning_rate": 1.7981556311129168e-05, + "loss": 0.6064, + "step": 18921 + }, + { + "epoch": 3.01, + "learning_rate": 1.7979080202933902e-05, + "loss": 0.6549, + "step": 18922 + }, + { + "epoch": 3.01, + "learning_rate": 1.7976604169507167e-05, + "loss": 0.7351, + "step": 18923 + }, + { + "epoch": 3.01, + "learning_rate": 1.7974128210875338e-05, + "loss": 0.6799, + "step": 18924 + }, + { + "epoch": 3.01, + "learning_rate": 1.797165232706478e-05, + "loss": 0.6226, + "step": 18925 + }, + { + "epoch": 3.02, + "learning_rate": 1.7969176518101856e-05, + "loss": 0.6463, + "step": 18926 + }, + { + "epoch": 3.02, + "learning_rate": 1.7966700784012925e-05, + "loss": 0.639, + "step": 18927 + }, + { + "epoch": 3.02, + "learning_rate": 1.7964225124824367e-05, + "loss": 0.6799, + "step": 18928 + }, + { + "epoch": 3.02, + "learning_rate": 1.7961749540562544e-05, + "loss": 0.6292, + "step": 18929 + }, + { + "epoch": 3.02, + "learning_rate": 1.795927403125381e-05, + "loss": 0.6309, + "step": 18930 + }, + { + "epoch": 3.02, + "learning_rate": 1.7956798596924533e-05, + "loss": 0.6593, + "step": 18931 + }, + { + "epoch": 3.02, + "learning_rate": 1.795432323760108e-05, + "loss": 0.5729, + "step": 18932 + }, + { + "epoch": 3.02, + "learning_rate": 1.7951847953309804e-05, + "loss": 0.6483, + "step": 18933 + }, + { + "epoch": 3.02, + "learning_rate": 1.7949372744077066e-05, + "loss": 0.6291, + "step": 18934 + }, + { + "epoch": 3.02, + "learning_rate": 1.794689760992923e-05, + "loss": 0.6058, + "step": 18935 + }, + { + "epoch": 3.02, + "learning_rate": 1.7944422550892652e-05, + "loss": 0.6179, + "step": 18936 + }, + { + "epoch": 3.02, + "learning_rate": 1.794194756699369e-05, + "loss": 0.6961, + "step": 18937 + }, + { + "epoch": 3.02, + "learning_rate": 1.7939472658258706e-05, + "loss": 0.672, + "step": 18938 + }, + { + "epoch": 3.02, + "learning_rate": 1.7936997824714042e-05, + "loss": 0.6452, + "step": 18939 + }, + { + "epoch": 3.02, + "learning_rate": 1.7934523066386084e-05, + "loss": 0.6778, + "step": 18940 + }, + { + "epoch": 3.02, + "learning_rate": 1.7932048383301143e-05, + "loss": 0.6929, + "step": 18941 + }, + { + "epoch": 3.02, + "learning_rate": 1.7929573775485607e-05, + "loss": 0.6782, + "step": 18942 + }, + { + "epoch": 3.02, + "learning_rate": 1.7927099242965814e-05, + "loss": 0.7187, + "step": 18943 + }, + { + "epoch": 3.02, + "learning_rate": 1.7924624785768118e-05, + "loss": 0.7065, + "step": 18944 + }, + { + "epoch": 3.02, + "learning_rate": 1.7922150403918875e-05, + "loss": 0.6567, + "step": 18945 + }, + { + "epoch": 3.02, + "learning_rate": 1.7919676097444434e-05, + "loss": 0.6712, + "step": 18946 + }, + { + "epoch": 3.02, + "learning_rate": 1.7917201866371138e-05, + "loss": 0.6261, + "step": 18947 + }, + { + "epoch": 3.02, + "learning_rate": 1.791472771072535e-05, + "loss": 0.6309, + "step": 18948 + }, + { + "epoch": 3.02, + "learning_rate": 1.7912253630533406e-05, + "loss": 0.7455, + "step": 18949 + }, + { + "epoch": 3.02, + "learning_rate": 1.7909779625821655e-05, + "loss": 0.6875, + "step": 18950 + }, + { + "epoch": 3.02, + "learning_rate": 1.790730569661645e-05, + "loss": 0.6522, + "step": 18951 + }, + { + "epoch": 3.02, + "learning_rate": 1.7904831842944136e-05, + "loss": 0.6965, + "step": 18952 + }, + { + "epoch": 3.02, + "learning_rate": 1.790235806483105e-05, + "loss": 0.6944, + "step": 18953 + }, + { + "epoch": 3.02, + "learning_rate": 1.789988436230354e-05, + "loss": 0.6807, + "step": 18954 + }, + { + "epoch": 3.02, + "learning_rate": 1.789741073538796e-05, + "loss": 0.6073, + "step": 18955 + }, + { + "epoch": 3.02, + "learning_rate": 1.7894937184110636e-05, + "loss": 0.6711, + "step": 18956 + }, + { + "epoch": 3.02, + "learning_rate": 1.789246370849792e-05, + "loss": 0.5869, + "step": 18957 + }, + { + "epoch": 3.02, + "learning_rate": 1.7889990308576155e-05, + "loss": 0.6457, + "step": 18958 + }, + { + "epoch": 3.02, + "learning_rate": 1.7887516984371667e-05, + "loss": 0.6871, + "step": 18959 + }, + { + "epoch": 3.02, + "learning_rate": 1.7885043735910824e-05, + "loss": 0.6447, + "step": 18960 + }, + { + "epoch": 3.02, + "learning_rate": 1.7882570563219923e-05, + "loss": 0.6054, + "step": 18961 + }, + { + "epoch": 3.02, + "learning_rate": 1.788009746632534e-05, + "loss": 0.6475, + "step": 18962 + }, + { + "epoch": 3.02, + "learning_rate": 1.7877624445253388e-05, + "loss": 0.5785, + "step": 18963 + }, + { + "epoch": 3.02, + "learning_rate": 1.787515150003041e-05, + "loss": 0.6288, + "step": 18964 + }, + { + "epoch": 3.02, + "learning_rate": 1.7872678630682747e-05, + "loss": 0.6635, + "step": 18965 + }, + { + "epoch": 3.02, + "learning_rate": 1.787020583723673e-05, + "loss": 0.6937, + "step": 18966 + }, + { + "epoch": 3.02, + "learning_rate": 1.7867733119718687e-05, + "loss": 0.7566, + "step": 18967 + }, + { + "epoch": 3.02, + "learning_rate": 1.786526047815496e-05, + "loss": 0.6397, + "step": 18968 + }, + { + "epoch": 3.02, + "learning_rate": 1.786278791257188e-05, + "loss": 0.6254, + "step": 18969 + }, + { + "epoch": 3.02, + "learning_rate": 1.786031542299577e-05, + "loss": 0.6571, + "step": 18970 + }, + { + "epoch": 3.02, + "learning_rate": 1.7857843009452967e-05, + "loss": 0.6448, + "step": 18971 + }, + { + "epoch": 3.02, + "learning_rate": 1.7855370671969807e-05, + "loss": 0.6391, + "step": 18972 + }, + { + "epoch": 3.02, + "learning_rate": 1.78528984105726e-05, + "loss": 0.6536, + "step": 18973 + }, + { + "epoch": 3.02, + "learning_rate": 1.7850426225287696e-05, + "loss": 0.6248, + "step": 18974 + }, + { + "epoch": 3.02, + "learning_rate": 1.7847954116141407e-05, + "loss": 0.6326, + "step": 18975 + }, + { + "epoch": 3.02, + "learning_rate": 1.7845482083160063e-05, + "loss": 0.6106, + "step": 18976 + }, + { + "epoch": 3.02, + "learning_rate": 1.7843010126369996e-05, + "loss": 0.6131, + "step": 18977 + }, + { + "epoch": 3.02, + "learning_rate": 1.7840538245797527e-05, + "loss": 0.6489, + "step": 18978 + }, + { + "epoch": 3.02, + "learning_rate": 1.783806644146897e-05, + "loss": 0.7062, + "step": 18979 + }, + { + "epoch": 3.02, + "learning_rate": 1.7835594713410677e-05, + "loss": 0.6391, + "step": 18980 + }, + { + "epoch": 3.02, + "learning_rate": 1.7833123061648935e-05, + "loss": 0.6718, + "step": 18981 + }, + { + "epoch": 3.02, + "learning_rate": 1.7830651486210086e-05, + "loss": 0.651, + "step": 18982 + }, + { + "epoch": 3.02, + "learning_rate": 1.7828179987120442e-05, + "loss": 0.6481, + "step": 18983 + }, + { + "epoch": 3.02, + "learning_rate": 1.7825708564406326e-05, + "loss": 0.6433, + "step": 18984 + }, + { + "epoch": 3.02, + "learning_rate": 1.7823237218094064e-05, + "loss": 0.6972, + "step": 18985 + }, + { + "epoch": 3.02, + "learning_rate": 1.7820765948209967e-05, + "loss": 0.6863, + "step": 18986 + }, + { + "epoch": 3.02, + "learning_rate": 1.7818294754780345e-05, + "loss": 0.6623, + "step": 18987 + }, + { + "epoch": 3.03, + "learning_rate": 1.7815823637831536e-05, + "loss": 0.6332, + "step": 18988 + }, + { + "epoch": 3.03, + "learning_rate": 1.7813352597389837e-05, + "loss": 0.6588, + "step": 18989 + }, + { + "epoch": 3.03, + "learning_rate": 1.781088163348157e-05, + "loss": 0.6031, + "step": 18990 + }, + { + "epoch": 3.03, + "learning_rate": 1.780841074613305e-05, + "loss": 0.5872, + "step": 18991 + }, + { + "epoch": 3.03, + "learning_rate": 1.780593993537059e-05, + "loss": 0.7457, + "step": 18992 + }, + { + "epoch": 3.03, + "learning_rate": 1.78034692012205e-05, + "loss": 0.6245, + "step": 18993 + }, + { + "epoch": 3.03, + "learning_rate": 1.7800998543709096e-05, + "loss": 0.6601, + "step": 18994 + }, + { + "epoch": 3.03, + "learning_rate": 1.7798527962862686e-05, + "loss": 0.6321, + "step": 18995 + }, + { + "epoch": 3.03, + "learning_rate": 1.7796057458707582e-05, + "loss": 0.7412, + "step": 18996 + }, + { + "epoch": 3.03, + "learning_rate": 1.779358703127009e-05, + "loss": 0.6282, + "step": 18997 + }, + { + "epoch": 3.03, + "learning_rate": 1.7791116680576526e-05, + "loss": 0.5921, + "step": 18998 + }, + { + "epoch": 3.03, + "learning_rate": 1.7788646406653186e-05, + "loss": 0.6963, + "step": 18999 + }, + { + "epoch": 3.03, + "learning_rate": 1.77861762095264e-05, + "loss": 0.6405, + "step": 19000 + }, + { + "epoch": 3.03, + "learning_rate": 1.7783706089222443e-05, + "loss": 0.5924, + "step": 19001 + }, + { + "epoch": 3.03, + "learning_rate": 1.778123604576764e-05, + "loss": 0.6153, + "step": 19002 + }, + { + "epoch": 3.03, + "learning_rate": 1.7778766079188287e-05, + "loss": 0.6985, + "step": 19003 + }, + { + "epoch": 3.03, + "learning_rate": 1.777629618951069e-05, + "loss": 0.6994, + "step": 19004 + }, + { + "epoch": 3.03, + "learning_rate": 1.7773826376761153e-05, + "loss": 0.6356, + "step": 19005 + }, + { + "epoch": 3.03, + "learning_rate": 1.7771356640965986e-05, + "loss": 0.6346, + "step": 19006 + }, + { + "epoch": 3.03, + "learning_rate": 1.7768886982151474e-05, + "loss": 0.6935, + "step": 19007 + }, + { + "epoch": 3.03, + "learning_rate": 1.7766417400343925e-05, + "loss": 0.5939, + "step": 19008 + }, + { + "epoch": 3.03, + "learning_rate": 1.7763947895569642e-05, + "loss": 0.6266, + "step": 19009 + }, + { + "epoch": 3.03, + "learning_rate": 1.776147846785492e-05, + "loss": 0.6147, + "step": 19010 + }, + { + "epoch": 3.03, + "learning_rate": 1.775900911722606e-05, + "loss": 0.6497, + "step": 19011 + }, + { + "epoch": 3.03, + "learning_rate": 1.7756539843709354e-05, + "loss": 0.575, + "step": 19012 + }, + { + "epoch": 3.03, + "learning_rate": 1.77540706473311e-05, + "loss": 0.6733, + "step": 19013 + }, + { + "epoch": 3.03, + "learning_rate": 1.7751601528117596e-05, + "loss": 0.6296, + "step": 19014 + }, + { + "epoch": 3.03, + "learning_rate": 1.774913248609514e-05, + "loss": 0.6956, + "step": 19015 + }, + { + "epoch": 3.03, + "learning_rate": 1.774666352129001e-05, + "loss": 0.6247, + "step": 19016 + }, + { + "epoch": 3.03, + "learning_rate": 1.774419463372852e-05, + "loss": 0.6649, + "step": 19017 + }, + { + "epoch": 3.03, + "learning_rate": 1.774172582343695e-05, + "loss": 0.5874, + "step": 19018 + }, + { + "epoch": 3.03, + "learning_rate": 1.7739257090441584e-05, + "loss": 0.6299, + "step": 19019 + }, + { + "epoch": 3.03, + "learning_rate": 1.7736788434768734e-05, + "loss": 0.6514, + "step": 19020 + }, + { + "epoch": 3.03, + "learning_rate": 1.7734319856444682e-05, + "loss": 0.6139, + "step": 19021 + }, + { + "epoch": 3.03, + "learning_rate": 1.7731851355495704e-05, + "loss": 0.6505, + "step": 19022 + }, + { + "epoch": 3.03, + "learning_rate": 1.7729382931948096e-05, + "loss": 0.6396, + "step": 19023 + }, + { + "epoch": 3.03, + "learning_rate": 1.7726914585828142e-05, + "loss": 0.7179, + "step": 19024 + }, + { + "epoch": 3.03, + "learning_rate": 1.7724446317162138e-05, + "loss": 0.6339, + "step": 19025 + }, + { + "epoch": 3.03, + "learning_rate": 1.772197812597636e-05, + "loss": 0.6113, + "step": 19026 + }, + { + "epoch": 3.03, + "learning_rate": 1.77195100122971e-05, + "loss": 0.6153, + "step": 19027 + }, + { + "epoch": 3.03, + "learning_rate": 1.7717041976150634e-05, + "loss": 0.6266, + "step": 19028 + }, + { + "epoch": 3.03, + "learning_rate": 1.7714574017563256e-05, + "loss": 0.6785, + "step": 19029 + }, + { + "epoch": 3.03, + "learning_rate": 1.7712106136561233e-05, + "loss": 0.6146, + "step": 19030 + }, + { + "epoch": 3.03, + "learning_rate": 1.7709638333170857e-05, + "loss": 0.6056, + "step": 19031 + }, + { + "epoch": 3.03, + "learning_rate": 1.7707170607418413e-05, + "loss": 0.6145, + "step": 19032 + }, + { + "epoch": 3.03, + "learning_rate": 1.7704702959330172e-05, + "loss": 0.628, + "step": 19033 + }, + { + "epoch": 3.03, + "learning_rate": 1.770223538893241e-05, + "loss": 0.6634, + "step": 19034 + }, + { + "epoch": 3.03, + "learning_rate": 1.7699767896251415e-05, + "loss": 0.633, + "step": 19035 + }, + { + "epoch": 3.03, + "learning_rate": 1.7697300481313463e-05, + "loss": 0.6551, + "step": 19036 + }, + { + "epoch": 3.03, + "learning_rate": 1.7694833144144817e-05, + "loss": 0.6013, + "step": 19037 + }, + { + "epoch": 3.03, + "learning_rate": 1.7692365884771773e-05, + "loss": 0.6187, + "step": 19038 + }, + { + "epoch": 3.03, + "learning_rate": 1.7689898703220593e-05, + "loss": 0.6721, + "step": 19039 + }, + { + "epoch": 3.03, + "learning_rate": 1.7687431599517548e-05, + "loss": 0.6376, + "step": 19040 + }, + { + "epoch": 3.03, + "learning_rate": 1.768496457368893e-05, + "loss": 0.6113, + "step": 19041 + }, + { + "epoch": 3.03, + "learning_rate": 1.768249762576099e-05, + "loss": 0.6186, + "step": 19042 + }, + { + "epoch": 3.03, + "learning_rate": 1.7680030755760006e-05, + "loss": 0.6779, + "step": 19043 + }, + { + "epoch": 3.03, + "learning_rate": 1.767756396371225e-05, + "loss": 0.6403, + "step": 19044 + }, + { + "epoch": 3.03, + "learning_rate": 1.7675097249643996e-05, + "loss": 0.7043, + "step": 19045 + }, + { + "epoch": 3.03, + "learning_rate": 1.767263061358151e-05, + "loss": 0.6657, + "step": 19046 + }, + { + "epoch": 3.03, + "learning_rate": 1.7670164055551057e-05, + "loss": 0.6948, + "step": 19047 + }, + { + "epoch": 3.03, + "learning_rate": 1.7667697575578907e-05, + "loss": 0.7026, + "step": 19048 + }, + { + "epoch": 3.03, + "learning_rate": 1.7665231173691326e-05, + "loss": 0.6301, + "step": 19049 + }, + { + "epoch": 3.03, + "learning_rate": 1.766276484991458e-05, + "loss": 0.6494, + "step": 19050 + }, + { + "epoch": 3.04, + "learning_rate": 1.766029860427493e-05, + "loss": 0.6219, + "step": 19051 + }, + { + "epoch": 3.04, + "learning_rate": 1.7657832436798648e-05, + "loss": 0.5793, + "step": 19052 + }, + { + "epoch": 3.04, + "learning_rate": 1.7655366347511995e-05, + "loss": 0.666, + "step": 19053 + }, + { + "epoch": 3.04, + "learning_rate": 1.7652900336441224e-05, + "loss": 0.613, + "step": 19054 + }, + { + "epoch": 3.04, + "learning_rate": 1.765043440361261e-05, + "loss": 0.6169, + "step": 19055 + }, + { + "epoch": 3.04, + "learning_rate": 1.764796854905241e-05, + "loss": 0.6609, + "step": 19056 + }, + { + "epoch": 3.04, + "learning_rate": 1.7645502772786875e-05, + "loss": 0.7019, + "step": 19057 + }, + { + "epoch": 3.04, + "learning_rate": 1.764303707484227e-05, + "loss": 0.649, + "step": 19058 + }, + { + "epoch": 3.04, + "learning_rate": 1.7640571455244863e-05, + "loss": 0.6574, + "step": 19059 + }, + { + "epoch": 3.04, + "learning_rate": 1.763810591402089e-05, + "loss": 0.6941, + "step": 19060 + }, + { + "epoch": 3.04, + "learning_rate": 1.763564045119664e-05, + "loss": 0.6669, + "step": 19061 + }, + { + "epoch": 3.04, + "learning_rate": 1.763317506679833e-05, + "loss": 0.6089, + "step": 19062 + }, + { + "epoch": 3.04, + "learning_rate": 1.763070976085224e-05, + "loss": 0.6069, + "step": 19063 + }, + { + "epoch": 3.04, + "learning_rate": 1.7628244533384614e-05, + "loss": 0.6934, + "step": 19064 + }, + { + "epoch": 3.04, + "learning_rate": 1.7625779384421704e-05, + "loss": 0.6178, + "step": 19065 + }, + { + "epoch": 3.04, + "learning_rate": 1.762331431398977e-05, + "loss": 0.7162, + "step": 19066 + }, + { + "epoch": 3.04, + "learning_rate": 1.7620849322115063e-05, + "loss": 0.6839, + "step": 19067 + }, + { + "epoch": 3.04, + "learning_rate": 1.7618384408823823e-05, + "loss": 0.7019, + "step": 19068 + }, + { + "epoch": 3.04, + "learning_rate": 1.7615919574142314e-05, + "loss": 0.6628, + "step": 19069 + }, + { + "epoch": 3.04, + "learning_rate": 1.761345481809678e-05, + "loss": 0.6417, + "step": 19070 + }, + { + "epoch": 3.04, + "learning_rate": 1.761099014071346e-05, + "loss": 0.6139, + "step": 19071 + }, + { + "epoch": 3.04, + "learning_rate": 1.7608525542018617e-05, + "loss": 0.6225, + "step": 19072 + }, + { + "epoch": 3.04, + "learning_rate": 1.7606061022038487e-05, + "loss": 0.6691, + "step": 19073 + }, + { + "epoch": 3.04, + "learning_rate": 1.7603596580799316e-05, + "loss": 0.6686, + "step": 19074 + }, + { + "epoch": 3.04, + "learning_rate": 1.760113221832735e-05, + "loss": 0.5736, + "step": 19075 + }, + { + "epoch": 3.04, + "learning_rate": 1.7598667934648837e-05, + "loss": 0.6919, + "step": 19076 + }, + { + "epoch": 3.04, + "learning_rate": 1.759620372979002e-05, + "loss": 0.5782, + "step": 19077 + }, + { + "epoch": 3.04, + "learning_rate": 1.759373960377713e-05, + "loss": 0.7069, + "step": 19078 + }, + { + "epoch": 3.04, + "learning_rate": 1.7591275556636426e-05, + "loss": 0.5963, + "step": 19079 + }, + { + "epoch": 3.04, + "learning_rate": 1.758881158839413e-05, + "loss": 0.7017, + "step": 19080 + }, + { + "epoch": 3.04, + "learning_rate": 1.758634769907651e-05, + "loss": 0.6383, + "step": 19081 + }, + { + "epoch": 3.04, + "learning_rate": 1.7583883888709766e-05, + "loss": 0.6423, + "step": 19082 + }, + { + "epoch": 3.04, + "learning_rate": 1.7581420157320167e-05, + "loss": 0.6484, + "step": 19083 + }, + { + "epoch": 3.04, + "learning_rate": 1.7578956504933936e-05, + "loss": 0.6501, + "step": 19084 + }, + { + "epoch": 3.04, + "learning_rate": 1.7576492931577306e-05, + "loss": 0.5995, + "step": 19085 + }, + { + "epoch": 3.04, + "learning_rate": 1.7574029437276525e-05, + "loss": 0.6786, + "step": 19086 + }, + { + "epoch": 3.04, + "learning_rate": 1.7571566022057825e-05, + "loss": 0.6261, + "step": 19087 + }, + { + "epoch": 3.04, + "learning_rate": 1.756910268594743e-05, + "loss": 0.7592, + "step": 19088 + }, + { + "epoch": 3.04, + "learning_rate": 1.7566639428971588e-05, + "loss": 0.6531, + "step": 19089 + }, + { + "epoch": 3.04, + "learning_rate": 1.756417625115652e-05, + "loss": 0.7107, + "step": 19090 + }, + { + "epoch": 3.04, + "learning_rate": 1.7561713152528452e-05, + "loss": 0.6443, + "step": 19091 + }, + { + "epoch": 3.04, + "learning_rate": 1.7559250133113636e-05, + "loss": 0.6043, + "step": 19092 + }, + { + "epoch": 3.04, + "learning_rate": 1.7556787192938285e-05, + "loss": 0.7034, + "step": 19093 + }, + { + "epoch": 3.04, + "learning_rate": 1.7554324332028626e-05, + "loss": 0.6044, + "step": 19094 + }, + { + "epoch": 3.04, + "learning_rate": 1.7551861550410897e-05, + "loss": 0.7507, + "step": 19095 + }, + { + "epoch": 3.04, + "learning_rate": 1.754939884811132e-05, + "loss": 0.7157, + "step": 19096 + }, + { + "epoch": 3.04, + "learning_rate": 1.7546936225156118e-05, + "loss": 0.6687, + "step": 19097 + }, + { + "epoch": 3.04, + "learning_rate": 1.7544473681571526e-05, + "loss": 0.6534, + "step": 19098 + }, + { + "epoch": 3.04, + "learning_rate": 1.7542011217383765e-05, + "loss": 0.6527, + "step": 19099 + }, + { + "epoch": 3.04, + "learning_rate": 1.7539548832619047e-05, + "loss": 0.6616, + "step": 19100 + }, + { + "epoch": 3.04, + "learning_rate": 1.7537086527303624e-05, + "loss": 0.7137, + "step": 19101 + }, + { + "epoch": 3.04, + "learning_rate": 1.753462430146368e-05, + "loss": 0.6241, + "step": 19102 + }, + { + "epoch": 3.04, + "learning_rate": 1.7532162155125466e-05, + "loss": 0.6628, + "step": 19103 + }, + { + "epoch": 3.04, + "learning_rate": 1.752970008831518e-05, + "loss": 0.6562, + "step": 19104 + }, + { + "epoch": 3.04, + "learning_rate": 1.7527238101059058e-05, + "loss": 0.678, + "step": 19105 + }, + { + "epoch": 3.04, + "learning_rate": 1.7524776193383314e-05, + "loss": 0.674, + "step": 19106 + }, + { + "epoch": 3.04, + "learning_rate": 1.7522314365314165e-05, + "loss": 0.6456, + "step": 19107 + }, + { + "epoch": 3.04, + "learning_rate": 1.7519852616877826e-05, + "loss": 0.6479, + "step": 19108 + }, + { + "epoch": 3.04, + "learning_rate": 1.7517390948100517e-05, + "loss": 0.6458, + "step": 19109 + }, + { + "epoch": 3.04, + "learning_rate": 1.7514929359008453e-05, + "loss": 0.605, + "step": 19110 + }, + { + "epoch": 3.04, + "learning_rate": 1.751246784962784e-05, + "loss": 0.6352, + "step": 19111 + }, + { + "epoch": 3.04, + "learning_rate": 1.751000641998491e-05, + "loss": 0.5974, + "step": 19112 + }, + { + "epoch": 3.04, + "learning_rate": 1.750754507010586e-05, + "loss": 0.6174, + "step": 19113 + }, + { + "epoch": 3.05, + "learning_rate": 1.75050838000169e-05, + "loss": 0.6235, + "step": 19114 + }, + { + "epoch": 3.05, + "learning_rate": 1.750262260974425e-05, + "loss": 0.6447, + "step": 19115 + }, + { + "epoch": 3.05, + "learning_rate": 1.7500161499314124e-05, + "loss": 0.6691, + "step": 19116 + }, + { + "epoch": 3.05, + "learning_rate": 1.7497700468752716e-05, + "loss": 0.6161, + "step": 19117 + }, + { + "epoch": 3.05, + "learning_rate": 1.7495239518086247e-05, + "loss": 0.7118, + "step": 19118 + }, + { + "epoch": 3.05, + "learning_rate": 1.7492778647340925e-05, + "loss": 0.6579, + "step": 19119 + }, + { + "epoch": 3.05, + "learning_rate": 1.7490317856542946e-05, + "loss": 0.6398, + "step": 19120 + }, + { + "epoch": 3.05, + "learning_rate": 1.748785714571854e-05, + "loss": 0.601, + "step": 19121 + }, + { + "epoch": 3.05, + "learning_rate": 1.7485396514893876e-05, + "loss": 0.668, + "step": 19122 + }, + { + "epoch": 3.05, + "learning_rate": 1.748293596409518e-05, + "loss": 0.6389, + "step": 19123 + }, + { + "epoch": 3.05, + "learning_rate": 1.7480475493348656e-05, + "loss": 0.6816, + "step": 19124 + }, + { + "epoch": 3.05, + "learning_rate": 1.7478015102680494e-05, + "loss": 0.6305, + "step": 19125 + }, + { + "epoch": 3.05, + "learning_rate": 1.7475554792116915e-05, + "loss": 0.644, + "step": 19126 + }, + { + "epoch": 3.05, + "learning_rate": 1.7473094561684106e-05, + "loss": 0.6886, + "step": 19127 + }, + { + "epoch": 3.05, + "learning_rate": 1.7470634411408264e-05, + "loss": 0.6269, + "step": 19128 + }, + { + "epoch": 3.05, + "learning_rate": 1.74681743413156e-05, + "loss": 0.6928, + "step": 19129 + }, + { + "epoch": 3.05, + "learning_rate": 1.746571435143231e-05, + "loss": 0.6972, + "step": 19130 + }, + { + "epoch": 3.05, + "learning_rate": 1.7463254441784582e-05, + "loss": 0.6467, + "step": 19131 + }, + { + "epoch": 3.05, + "learning_rate": 1.746079461239862e-05, + "loss": 0.6716, + "step": 19132 + }, + { + "epoch": 3.05, + "learning_rate": 1.745833486330062e-05, + "loss": 0.6348, + "step": 19133 + }, + { + "epoch": 3.05, + "learning_rate": 1.745587519451677e-05, + "loss": 0.5981, + "step": 19134 + }, + { + "epoch": 3.05, + "learning_rate": 1.7453415606073276e-05, + "loss": 0.689, + "step": 19135 + }, + { + "epoch": 3.05, + "learning_rate": 1.7450956097996324e-05, + "loss": 0.6933, + "step": 19136 + }, + { + "epoch": 3.05, + "learning_rate": 1.74484966703121e-05, + "loss": 0.5964, + "step": 19137 + }, + { + "epoch": 3.05, + "learning_rate": 1.7446037323046806e-05, + "loss": 0.669, + "step": 19138 + }, + { + "epoch": 3.05, + "learning_rate": 1.744357805622663e-05, + "loss": 0.6251, + "step": 19139 + }, + { + "epoch": 3.05, + "learning_rate": 1.7441118869877755e-05, + "loss": 0.7152, + "step": 19140 + }, + { + "epoch": 3.05, + "learning_rate": 1.7438659764026375e-05, + "loss": 0.6424, + "step": 19141 + }, + { + "epoch": 3.05, + "learning_rate": 1.7436200738698688e-05, + "loss": 0.6456, + "step": 19142 + }, + { + "epoch": 3.05, + "learning_rate": 1.7433741793920865e-05, + "loss": 0.6261, + "step": 19143 + }, + { + "epoch": 3.05, + "learning_rate": 1.7431282929719102e-05, + "loss": 0.7443, + "step": 19144 + }, + { + "epoch": 3.05, + "learning_rate": 1.7428824146119567e-05, + "loss": 0.6169, + "step": 19145 + }, + { + "epoch": 3.05, + "learning_rate": 1.7426365443148472e-05, + "loss": 0.6732, + "step": 19146 + }, + { + "epoch": 3.05, + "learning_rate": 1.7423906820831983e-05, + "loss": 0.6748, + "step": 19147 + }, + { + "epoch": 3.05, + "learning_rate": 1.742144827919628e-05, + "loss": 0.6215, + "step": 19148 + }, + { + "epoch": 3.05, + "learning_rate": 1.7418989818267563e-05, + "loss": 0.6038, + "step": 19149 + }, + { + "epoch": 3.05, + "learning_rate": 1.7416531438071996e-05, + "loss": 0.6755, + "step": 19150 + }, + { + "epoch": 3.05, + "learning_rate": 1.7414073138635766e-05, + "loss": 0.6435, + "step": 19151 + }, + { + "epoch": 3.05, + "learning_rate": 1.741161491998505e-05, + "loss": 0.6325, + "step": 19152 + }, + { + "epoch": 3.05, + "learning_rate": 1.740915678214603e-05, + "loss": 0.723, + "step": 19153 + }, + { + "epoch": 3.05, + "learning_rate": 1.740669872514488e-05, + "loss": 0.6295, + "step": 19154 + }, + { + "epoch": 3.05, + "learning_rate": 1.7404240749007777e-05, + "loss": 0.58, + "step": 19155 + }, + { + "epoch": 3.05, + "learning_rate": 1.74017828537609e-05, + "loss": 0.6406, + "step": 19156 + }, + { + "epoch": 3.05, + "learning_rate": 1.7399325039430427e-05, + "loss": 0.6875, + "step": 19157 + }, + { + "epoch": 3.05, + "learning_rate": 1.739686730604252e-05, + "loss": 0.6239, + "step": 19158 + }, + { + "epoch": 3.05, + "learning_rate": 1.7394409653623367e-05, + "loss": 0.5635, + "step": 19159 + }, + { + "epoch": 3.05, + "learning_rate": 1.739195208219913e-05, + "loss": 0.6202, + "step": 19160 + }, + { + "epoch": 3.05, + "learning_rate": 1.7389494591795984e-05, + "loss": 0.5884, + "step": 19161 + }, + { + "epoch": 3.05, + "learning_rate": 1.7387037182440108e-05, + "loss": 0.7431, + "step": 19162 + }, + { + "epoch": 3.05, + "learning_rate": 1.7384579854157662e-05, + "loss": 0.6189, + "step": 19163 + }, + { + "epoch": 3.05, + "learning_rate": 1.7382122606974816e-05, + "loss": 0.711, + "step": 19164 + }, + { + "epoch": 3.05, + "learning_rate": 1.7379665440917735e-05, + "loss": 0.6947, + "step": 19165 + }, + { + "epoch": 3.05, + "learning_rate": 1.7377208356012588e-05, + "loss": 0.6499, + "step": 19166 + }, + { + "epoch": 3.05, + "learning_rate": 1.737475135228555e-05, + "loss": 0.688, + "step": 19167 + }, + { + "epoch": 3.05, + "learning_rate": 1.7372294429762783e-05, + "loss": 0.6178, + "step": 19168 + }, + { + "epoch": 3.05, + "learning_rate": 1.736983758847044e-05, + "loss": 0.6295, + "step": 19169 + }, + { + "epoch": 3.05, + "learning_rate": 1.73673808284347e-05, + "loss": 0.6646, + "step": 19170 + }, + { + "epoch": 3.05, + "learning_rate": 1.7364924149681722e-05, + "loss": 0.6494, + "step": 19171 + }, + { + "epoch": 3.05, + "learning_rate": 1.736246755223766e-05, + "loss": 0.607, + "step": 19172 + }, + { + "epoch": 3.05, + "learning_rate": 1.7360011036128686e-05, + "loss": 0.5981, + "step": 19173 + }, + { + "epoch": 3.05, + "learning_rate": 1.735755460138096e-05, + "loss": 0.6537, + "step": 19174 + }, + { + "epoch": 3.05, + "learning_rate": 1.735509824802063e-05, + "loss": 0.6283, + "step": 19175 + }, + { + "epoch": 3.05, + "learning_rate": 1.735264197607387e-05, + "loss": 0.6974, + "step": 19176 + }, + { + "epoch": 3.06, + "learning_rate": 1.7350185785566823e-05, + "loss": 0.6826, + "step": 19177 + }, + { + "epoch": 3.06, + "learning_rate": 1.7347729676525655e-05, + "loss": 0.6954, + "step": 19178 + }, + { + "epoch": 3.06, + "learning_rate": 1.7345273648976523e-05, + "loss": 0.6628, + "step": 19179 + }, + { + "epoch": 3.06, + "learning_rate": 1.734281770294558e-05, + "loss": 0.6783, + "step": 19180 + }, + { + "epoch": 3.06, + "learning_rate": 1.7340361838458972e-05, + "loss": 0.7151, + "step": 19181 + }, + { + "epoch": 3.06, + "learning_rate": 1.733790605554288e-05, + "loss": 0.6687, + "step": 19182 + }, + { + "epoch": 3.06, + "learning_rate": 1.733545035422342e-05, + "loss": 0.6614, + "step": 19183 + }, + { + "epoch": 3.06, + "learning_rate": 1.7332994734526764e-05, + "loss": 0.6282, + "step": 19184 + }, + { + "epoch": 3.06, + "learning_rate": 1.7330539196479056e-05, + "loss": 0.6252, + "step": 19185 + }, + { + "epoch": 3.06, + "learning_rate": 1.732808374010645e-05, + "loss": 0.6522, + "step": 19186 + }, + { + "epoch": 3.06, + "learning_rate": 1.7325628365435098e-05, + "loss": 0.6416, + "step": 19187 + }, + { + "epoch": 3.06, + "learning_rate": 1.7323173072491143e-05, + "loss": 0.6179, + "step": 19188 + }, + { + "epoch": 3.06, + "learning_rate": 1.7320717861300728e-05, + "loss": 0.6585, + "step": 19189 + }, + { + "epoch": 3.06, + "learning_rate": 1.7318262731890015e-05, + "loss": 0.6642, + "step": 19190 + }, + { + "epoch": 3.06, + "learning_rate": 1.7315807684285138e-05, + "loss": 0.6275, + "step": 19191 + }, + { + "epoch": 3.06, + "learning_rate": 1.7313352718512236e-05, + "loss": 0.5832, + "step": 19192 + }, + { + "epoch": 3.06, + "learning_rate": 1.7310897834597473e-05, + "loss": 0.6325, + "step": 19193 + }, + { + "epoch": 3.06, + "learning_rate": 1.7308443032566973e-05, + "loss": 0.6192, + "step": 19194 + }, + { + "epoch": 3.06, + "learning_rate": 1.7305988312446884e-05, + "loss": 0.6769, + "step": 19195 + }, + { + "epoch": 3.06, + "learning_rate": 1.730353367426335e-05, + "loss": 0.6812, + "step": 19196 + }, + { + "epoch": 3.06, + "learning_rate": 1.7301079118042514e-05, + "loss": 0.6558, + "step": 19197 + }, + { + "epoch": 3.06, + "learning_rate": 1.72986246438105e-05, + "loss": 0.6202, + "step": 19198 + }, + { + "epoch": 3.06, + "learning_rate": 1.7296170251593468e-05, + "loss": 0.7092, + "step": 19199 + }, + { + "epoch": 3.06, + "learning_rate": 1.729371594141755e-05, + "loss": 0.6453, + "step": 19200 + }, + { + "epoch": 3.06, + "learning_rate": 1.7291261713308866e-05, + "loss": 0.6054, + "step": 19201 + }, + { + "epoch": 3.06, + "learning_rate": 1.728880756729358e-05, + "loss": 0.6391, + "step": 19202 + }, + { + "epoch": 3.06, + "learning_rate": 1.7286353503397802e-05, + "loss": 0.6641, + "step": 19203 + }, + { + "epoch": 3.06, + "learning_rate": 1.7283899521647676e-05, + "loss": 0.6908, + "step": 19204 + }, + { + "epoch": 3.06, + "learning_rate": 1.7281445622069338e-05, + "loss": 0.7268, + "step": 19205 + }, + { + "epoch": 3.06, + "learning_rate": 1.727899180468891e-05, + "loss": 0.7204, + "step": 19206 + }, + { + "epoch": 3.06, + "learning_rate": 1.7276538069532542e-05, + "loss": 0.667, + "step": 19207 + }, + { + "epoch": 3.06, + "learning_rate": 1.7274084416626353e-05, + "loss": 0.6681, + "step": 19208 + }, + { + "epoch": 3.06, + "learning_rate": 1.7271630845996468e-05, + "loss": 0.6364, + "step": 19209 + }, + { + "epoch": 3.06, + "learning_rate": 1.7269177357669024e-05, + "loss": 0.7006, + "step": 19210 + }, + { + "epoch": 3.06, + "learning_rate": 1.726672395167015e-05, + "loss": 0.6801, + "step": 19211 + }, + { + "epoch": 3.06, + "learning_rate": 1.726427062802597e-05, + "loss": 0.6484, + "step": 19212 + }, + { + "epoch": 3.06, + "learning_rate": 1.7261817386762614e-05, + "loss": 0.655, + "step": 19213 + }, + { + "epoch": 3.06, + "learning_rate": 1.7259364227906206e-05, + "loss": 0.6668, + "step": 19214 + }, + { + "epoch": 3.06, + "learning_rate": 1.725691115148286e-05, + "loss": 0.6906, + "step": 19215 + }, + { + "epoch": 3.06, + "learning_rate": 1.725445815751872e-05, + "loss": 0.6069, + "step": 19216 + }, + { + "epoch": 3.06, + "learning_rate": 1.7252005246039893e-05, + "loss": 0.6259, + "step": 19217 + }, + { + "epoch": 3.06, + "learning_rate": 1.7249552417072505e-05, + "loss": 0.676, + "step": 19218 + }, + { + "epoch": 3.06, + "learning_rate": 1.724709967064268e-05, + "loss": 0.5999, + "step": 19219 + }, + { + "epoch": 3.06, + "learning_rate": 1.724464700677654e-05, + "loss": 0.71, + "step": 19220 + }, + { + "epoch": 3.06, + "learning_rate": 1.7242194425500197e-05, + "loss": 0.658, + "step": 19221 + }, + { + "epoch": 3.06, + "learning_rate": 1.7239741926839788e-05, + "loss": 0.6567, + "step": 19222 + }, + { + "epoch": 3.06, + "learning_rate": 1.7237289510821398e-05, + "loss": 0.6689, + "step": 19223 + }, + { + "epoch": 3.06, + "learning_rate": 1.7234837177471164e-05, + "loss": 0.7406, + "step": 19224 + }, + { + "epoch": 3.06, + "learning_rate": 1.7232384926815205e-05, + "loss": 0.617, + "step": 19225 + }, + { + "epoch": 3.06, + "learning_rate": 1.7229932758879623e-05, + "loss": 0.6145, + "step": 19226 + }, + { + "epoch": 3.06, + "learning_rate": 1.722748067369055e-05, + "loss": 0.6883, + "step": 19227 + }, + { + "epoch": 3.06, + "learning_rate": 1.7225028671274084e-05, + "loss": 0.682, + "step": 19228 + }, + { + "epoch": 3.06, + "learning_rate": 1.7222576751656337e-05, + "loss": 0.6762, + "step": 19229 + }, + { + "epoch": 3.06, + "learning_rate": 1.7220124914863434e-05, + "loss": 0.6146, + "step": 19230 + }, + { + "epoch": 3.06, + "learning_rate": 1.7217673160921477e-05, + "loss": 0.7904, + "step": 19231 + }, + { + "epoch": 3.06, + "learning_rate": 1.721522148985657e-05, + "loss": 0.609, + "step": 19232 + }, + { + "epoch": 3.06, + "learning_rate": 1.721276990169483e-05, + "loss": 0.5876, + "step": 19233 + }, + { + "epoch": 3.06, + "learning_rate": 1.7210318396462366e-05, + "loss": 0.5955, + "step": 19234 + }, + { + "epoch": 3.06, + "learning_rate": 1.7207866974185276e-05, + "loss": 0.6199, + "step": 19235 + }, + { + "epoch": 3.06, + "learning_rate": 1.7205415634889677e-05, + "loss": 0.616, + "step": 19236 + }, + { + "epoch": 3.06, + "learning_rate": 1.720296437860167e-05, + "loss": 0.6728, + "step": 19237 + }, + { + "epoch": 3.06, + "learning_rate": 1.7200513205347354e-05, + "loss": 0.6168, + "step": 19238 + }, + { + "epoch": 3.06, + "learning_rate": 1.719806211515284e-05, + "loss": 0.6189, + "step": 19239 + }, + { + "epoch": 3.07, + "learning_rate": 1.719561110804423e-05, + "loss": 0.6379, + "step": 19240 + }, + { + "epoch": 3.07, + "learning_rate": 1.719316018404762e-05, + "loss": 0.6158, + "step": 19241 + }, + { + "epoch": 3.07, + "learning_rate": 1.7190709343189116e-05, + "loss": 0.5769, + "step": 19242 + }, + { + "epoch": 3.07, + "learning_rate": 1.7188258585494823e-05, + "loss": 0.6287, + "step": 19243 + }, + { + "epoch": 3.07, + "learning_rate": 1.718580791099083e-05, + "loss": 0.6177, + "step": 19244 + }, + { + "epoch": 3.07, + "learning_rate": 1.7183357319703236e-05, + "loss": 0.6412, + "step": 19245 + }, + { + "epoch": 3.07, + "learning_rate": 1.7180906811658137e-05, + "loss": 0.6993, + "step": 19246 + }, + { + "epoch": 3.07, + "learning_rate": 1.7178456386881636e-05, + "loss": 0.6493, + "step": 19247 + }, + { + "epoch": 3.07, + "learning_rate": 1.7176006045399827e-05, + "loss": 0.6866, + "step": 19248 + }, + { + "epoch": 3.07, + "learning_rate": 1.7173555787238802e-05, + "loss": 0.6777, + "step": 19249 + }, + { + "epoch": 3.07, + "learning_rate": 1.717110561242466e-05, + "loss": 0.6708, + "step": 19250 + }, + { + "epoch": 3.07, + "learning_rate": 1.716865552098349e-05, + "loss": 0.6652, + "step": 19251 + }, + { + "epoch": 3.07, + "learning_rate": 1.7166205512941376e-05, + "loss": 0.6278, + "step": 19252 + }, + { + "epoch": 3.07, + "learning_rate": 1.7163755588324426e-05, + "loss": 0.6025, + "step": 19253 + }, + { + "epoch": 3.07, + "learning_rate": 1.7161305747158718e-05, + "loss": 0.6353, + "step": 19254 + }, + { + "epoch": 3.07, + "learning_rate": 1.715885598947034e-05, + "loss": 0.7131, + "step": 19255 + }, + { + "epoch": 3.07, + "learning_rate": 1.7156406315285396e-05, + "loss": 0.6519, + "step": 19256 + }, + { + "epoch": 3.07, + "learning_rate": 1.715395672462996e-05, + "loss": 0.6727, + "step": 19257 + }, + { + "epoch": 3.07, + "learning_rate": 1.7151507217530115e-05, + "loss": 0.6627, + "step": 19258 + }, + { + "epoch": 3.07, + "learning_rate": 1.7149057794011955e-05, + "loss": 0.6491, + "step": 19259 + }, + { + "epoch": 3.07, + "learning_rate": 1.7146608454101568e-05, + "loss": 0.6752, + "step": 19260 + }, + { + "epoch": 3.07, + "learning_rate": 1.714415919782503e-05, + "loss": 0.6436, + "step": 19261 + }, + { + "epoch": 3.07, + "learning_rate": 1.7141710025208425e-05, + "loss": 0.6227, + "step": 19262 + }, + { + "epoch": 3.07, + "learning_rate": 1.7139260936277847e-05, + "loss": 0.6611, + "step": 19263 + }, + { + "epoch": 3.07, + "learning_rate": 1.713681193105936e-05, + "loss": 0.6418, + "step": 19264 + }, + { + "epoch": 3.07, + "learning_rate": 1.7134363009579054e-05, + "loss": 0.6498, + "step": 19265 + }, + { + "epoch": 3.07, + "learning_rate": 1.7131914171863e-05, + "loss": 0.6366, + "step": 19266 + }, + { + "epoch": 3.07, + "learning_rate": 1.712946541793729e-05, + "loss": 0.6908, + "step": 19267 + }, + { + "epoch": 3.07, + "learning_rate": 1.7127016747827995e-05, + "loss": 0.687, + "step": 19268 + }, + { + "epoch": 3.07, + "learning_rate": 1.712456816156119e-05, + "loss": 0.7345, + "step": 19269 + }, + { + "epoch": 3.07, + "learning_rate": 1.712211965916295e-05, + "loss": 0.6452, + "step": 19270 + }, + { + "epoch": 3.07, + "learning_rate": 1.711967124065936e-05, + "loss": 0.6714, + "step": 19271 + }, + { + "epoch": 3.07, + "learning_rate": 1.7117222906076483e-05, + "loss": 0.688, + "step": 19272 + }, + { + "epoch": 3.07, + "learning_rate": 1.711477465544039e-05, + "loss": 0.7218, + "step": 19273 + }, + { + "epoch": 3.07, + "learning_rate": 1.711232648877717e-05, + "loss": 0.5771, + "step": 19274 + }, + { + "epoch": 3.07, + "learning_rate": 1.710987840611288e-05, + "loss": 0.6152, + "step": 19275 + }, + { + "epoch": 3.07, + "learning_rate": 1.710743040747359e-05, + "loss": 0.6119, + "step": 19276 + }, + { + "epoch": 3.07, + "learning_rate": 1.7104982492885383e-05, + "loss": 0.6484, + "step": 19277 + }, + { + "epoch": 3.07, + "learning_rate": 1.7102534662374314e-05, + "loss": 0.6623, + "step": 19278 + }, + { + "epoch": 3.07, + "learning_rate": 1.7100086915966458e-05, + "loss": 0.6279, + "step": 19279 + }, + { + "epoch": 3.07, + "learning_rate": 1.709763925368788e-05, + "loss": 0.6165, + "step": 19280 + }, + { + "epoch": 3.07, + "learning_rate": 1.7095191675564646e-05, + "loss": 0.6447, + "step": 19281 + }, + { + "epoch": 3.07, + "learning_rate": 1.7092744181622817e-05, + "loss": 0.5963, + "step": 19282 + }, + { + "epoch": 3.07, + "learning_rate": 1.709029677188848e-05, + "loss": 0.6215, + "step": 19283 + }, + { + "epoch": 3.07, + "learning_rate": 1.708784944638766e-05, + "loss": 0.6446, + "step": 19284 + }, + { + "epoch": 3.07, + "learning_rate": 1.708540220514645e-05, + "loss": 0.6669, + "step": 19285 + }, + { + "epoch": 3.07, + "learning_rate": 1.7082955048190902e-05, + "loss": 0.652, + "step": 19286 + }, + { + "epoch": 3.07, + "learning_rate": 1.7080507975547066e-05, + "loss": 0.6025, + "step": 19287 + }, + { + "epoch": 3.07, + "learning_rate": 1.707806098724102e-05, + "loss": 0.6406, + "step": 19288 + }, + { + "epoch": 3.07, + "learning_rate": 1.7075614083298814e-05, + "loss": 0.6179, + "step": 19289 + }, + { + "epoch": 3.07, + "learning_rate": 1.7073167263746504e-05, + "loss": 0.6068, + "step": 19290 + }, + { + "epoch": 3.07, + "learning_rate": 1.7070720528610152e-05, + "loss": 0.7008, + "step": 19291 + }, + { + "epoch": 3.07, + "learning_rate": 1.7068273877915813e-05, + "loss": 0.7475, + "step": 19292 + }, + { + "epoch": 3.07, + "learning_rate": 1.706582731168954e-05, + "loss": 0.6015, + "step": 19293 + }, + { + "epoch": 3.07, + "learning_rate": 1.7063380829957392e-05, + "loss": 0.6543, + "step": 19294 + }, + { + "epoch": 3.07, + "learning_rate": 1.7060934432745417e-05, + "loss": 0.6602, + "step": 19295 + }, + { + "epoch": 3.07, + "learning_rate": 1.705848812007967e-05, + "loss": 0.6303, + "step": 19296 + }, + { + "epoch": 3.07, + "learning_rate": 1.705604189198621e-05, + "loss": 0.5932, + "step": 19297 + }, + { + "epoch": 3.07, + "learning_rate": 1.7053595748491074e-05, + "loss": 0.694, + "step": 19298 + }, + { + "epoch": 3.07, + "learning_rate": 1.7051149689620315e-05, + "loss": 0.6324, + "step": 19299 + }, + { + "epoch": 3.07, + "learning_rate": 1.7048703715399995e-05, + "loss": 0.6813, + "step": 19300 + }, + { + "epoch": 3.07, + "learning_rate": 1.704625782585615e-05, + "loss": 0.7276, + "step": 19301 + }, + { + "epoch": 3.08, + "learning_rate": 1.7043812021014827e-05, + "loss": 0.5859, + "step": 19302 + }, + { + "epoch": 3.08, + "learning_rate": 1.704136630090209e-05, + "loss": 0.6484, + "step": 19303 + }, + { + "epoch": 3.08, + "learning_rate": 1.7038920665543956e-05, + "loss": 0.6785, + "step": 19304 + }, + { + "epoch": 3.08, + "learning_rate": 1.7036475114966488e-05, + "loss": 0.7922, + "step": 19305 + }, + { + "epoch": 3.08, + "learning_rate": 1.7034029649195723e-05, + "loss": 0.5761, + "step": 19306 + }, + { + "epoch": 3.08, + "learning_rate": 1.7031584268257707e-05, + "loss": 0.6482, + "step": 19307 + }, + { + "epoch": 3.08, + "learning_rate": 1.7029138972178477e-05, + "loss": 0.6313, + "step": 19308 + }, + { + "epoch": 3.08, + "learning_rate": 1.7026693760984085e-05, + "loss": 0.6088, + "step": 19309 + }, + { + "epoch": 3.08, + "learning_rate": 1.7024248634700558e-05, + "loss": 0.6761, + "step": 19310 + }, + { + "epoch": 3.08, + "learning_rate": 1.702180359335394e-05, + "loss": 0.606, + "step": 19311 + }, + { + "epoch": 3.08, + "learning_rate": 1.7019358636970274e-05, + "loss": 0.6095, + "step": 19312 + }, + { + "epoch": 3.08, + "learning_rate": 1.701691376557559e-05, + "loss": 0.6665, + "step": 19313 + }, + { + "epoch": 3.08, + "learning_rate": 1.701446897919593e-05, + "loss": 0.6269, + "step": 19314 + }, + { + "epoch": 3.08, + "learning_rate": 1.7012024277857332e-05, + "loss": 0.6166, + "step": 19315 + }, + { + "epoch": 3.08, + "learning_rate": 1.7009579661585813e-05, + "loss": 0.6724, + "step": 19316 + }, + { + "epoch": 3.08, + "learning_rate": 1.700713513040743e-05, + "loss": 0.7466, + "step": 19317 + }, + { + "epoch": 3.08, + "learning_rate": 1.7004690684348204e-05, + "loss": 0.6313, + "step": 19318 + }, + { + "epoch": 3.08, + "learning_rate": 1.700224632343416e-05, + "loss": 0.6768, + "step": 19319 + }, + { + "epoch": 3.08, + "learning_rate": 1.6999802047691348e-05, + "loss": 0.621, + "step": 19320 + }, + { + "epoch": 3.08, + "learning_rate": 1.6997357857145785e-05, + "loss": 0.6358, + "step": 19321 + }, + { + "epoch": 3.08, + "learning_rate": 1.6994913751823497e-05, + "loss": 0.6685, + "step": 19322 + }, + { + "epoch": 3.08, + "learning_rate": 1.6992469731750532e-05, + "loss": 0.6827, + "step": 19323 + }, + { + "epoch": 3.08, + "learning_rate": 1.699002579695289e-05, + "loss": 0.6601, + "step": 19324 + }, + { + "epoch": 3.08, + "learning_rate": 1.6987581947456612e-05, + "loss": 0.6364, + "step": 19325 + }, + { + "epoch": 3.08, + "learning_rate": 1.6985138183287723e-05, + "loss": 0.6571, + "step": 19326 + }, + { + "epoch": 3.08, + "learning_rate": 1.698269450447224e-05, + "loss": 0.6492, + "step": 19327 + }, + { + "epoch": 3.08, + "learning_rate": 1.69802509110362e-05, + "loss": 0.6331, + "step": 19328 + }, + { + "epoch": 3.08, + "learning_rate": 1.697780740300562e-05, + "loss": 0.6653, + "step": 19329 + }, + { + "epoch": 3.08, + "learning_rate": 1.6975363980406517e-05, + "loss": 0.6323, + "step": 19330 + }, + { + "epoch": 3.08, + "learning_rate": 1.6972920643264917e-05, + "loss": 0.6937, + "step": 19331 + }, + { + "epoch": 3.08, + "learning_rate": 1.697047739160684e-05, + "loss": 0.6148, + "step": 19332 + }, + { + "epoch": 3.08, + "learning_rate": 1.6968034225458297e-05, + "loss": 0.5744, + "step": 19333 + }, + { + "epoch": 3.08, + "learning_rate": 1.6965591144845322e-05, + "loss": 0.6573, + "step": 19334 + }, + { + "epoch": 3.08, + "learning_rate": 1.6963148149793923e-05, + "loss": 0.6742, + "step": 19335 + }, + { + "epoch": 3.08, + "learning_rate": 1.6960705240330107e-05, + "loss": 0.6282, + "step": 19336 + }, + { + "epoch": 3.08, + "learning_rate": 1.695826241647991e-05, + "loss": 0.6182, + "step": 19337 + }, + { + "epoch": 3.08, + "learning_rate": 1.6955819678269334e-05, + "loss": 0.7347, + "step": 19338 + }, + { + "epoch": 3.08, + "learning_rate": 1.6953377025724387e-05, + "loss": 0.6597, + "step": 19339 + }, + { + "epoch": 3.08, + "learning_rate": 1.6950934458871097e-05, + "loss": 0.6127, + "step": 19340 + }, + { + "epoch": 3.08, + "learning_rate": 1.694849197773547e-05, + "loss": 0.7009, + "step": 19341 + }, + { + "epoch": 3.08, + "learning_rate": 1.694604958234351e-05, + "loss": 0.646, + "step": 19342 + }, + { + "epoch": 3.08, + "learning_rate": 1.6943607272721235e-05, + "loss": 0.6574, + "step": 19343 + }, + { + "epoch": 3.08, + "learning_rate": 1.694116504889466e-05, + "loss": 0.6252, + "step": 19344 + }, + { + "epoch": 3.08, + "learning_rate": 1.6938722910889773e-05, + "loss": 0.6178, + "step": 19345 + }, + { + "epoch": 3.08, + "learning_rate": 1.6936280858732598e-05, + "loss": 0.6825, + "step": 19346 + }, + { + "epoch": 3.08, + "learning_rate": 1.6933838892449127e-05, + "loss": 0.8064, + "step": 19347 + }, + { + "epoch": 3.08, + "learning_rate": 1.6931397012065385e-05, + "loss": 0.6132, + "step": 19348 + }, + { + "epoch": 3.08, + "learning_rate": 1.6928955217607362e-05, + "loss": 0.6498, + "step": 19349 + }, + { + "epoch": 3.08, + "learning_rate": 1.6926513509101066e-05, + "loss": 0.6357, + "step": 19350 + }, + { + "epoch": 3.08, + "learning_rate": 1.69240718865725e-05, + "loss": 0.739, + "step": 19351 + }, + { + "epoch": 3.08, + "learning_rate": 1.6921630350047668e-05, + "loss": 0.6316, + "step": 19352 + }, + { + "epoch": 3.08, + "learning_rate": 1.6919188899552564e-05, + "loss": 0.5721, + "step": 19353 + }, + { + "epoch": 3.08, + "learning_rate": 1.6916747535113192e-05, + "loss": 0.7713, + "step": 19354 + }, + { + "epoch": 3.08, + "learning_rate": 1.6914306256755556e-05, + "loss": 0.6772, + "step": 19355 + }, + { + "epoch": 3.08, + "learning_rate": 1.6911865064505642e-05, + "loss": 0.7052, + "step": 19356 + }, + { + "epoch": 3.08, + "learning_rate": 1.690942395838946e-05, + "loss": 0.6827, + "step": 19357 + }, + { + "epoch": 3.08, + "learning_rate": 1.6906982938433003e-05, + "loss": 0.6659, + "step": 19358 + }, + { + "epoch": 3.08, + "learning_rate": 1.6904542004662254e-05, + "loss": 0.5803, + "step": 19359 + }, + { + "epoch": 3.08, + "learning_rate": 1.6902101157103227e-05, + "loss": 0.6422, + "step": 19360 + }, + { + "epoch": 3.08, + "learning_rate": 1.689966039578191e-05, + "loss": 0.6549, + "step": 19361 + }, + { + "epoch": 3.08, + "learning_rate": 1.6897219720724282e-05, + "loss": 0.6368, + "step": 19362 + }, + { + "epoch": 3.08, + "learning_rate": 1.689477913195635e-05, + "loss": 0.6057, + "step": 19363 + }, + { + "epoch": 3.08, + "learning_rate": 1.6892338629504107e-05, + "loss": 0.6304, + "step": 19364 + }, + { + "epoch": 3.09, + "learning_rate": 1.6889898213393526e-05, + "loss": 0.6906, + "step": 19365 + }, + { + "epoch": 3.09, + "learning_rate": 1.6887457883650608e-05, + "loss": 0.6477, + "step": 19366 + }, + { + "epoch": 3.09, + "learning_rate": 1.688501764030133e-05, + "loss": 0.6727, + "step": 19367 + }, + { + "epoch": 3.09, + "learning_rate": 1.6882577483371698e-05, + "loss": 0.7262, + "step": 19368 + }, + { + "epoch": 3.09, + "learning_rate": 1.6880137412887686e-05, + "loss": 0.6651, + "step": 19369 + }, + { + "epoch": 3.09, + "learning_rate": 1.6877697428875276e-05, + "loss": 0.6329, + "step": 19370 + }, + { + "epoch": 3.09, + "learning_rate": 1.687525753136046e-05, + "loss": 0.6528, + "step": 19371 + }, + { + "epoch": 3.09, + "learning_rate": 1.6872817720369218e-05, + "loss": 0.7354, + "step": 19372 + }, + { + "epoch": 3.09, + "learning_rate": 1.687037799592753e-05, + "loss": 0.6864, + "step": 19373 + }, + { + "epoch": 3.09, + "learning_rate": 1.6867938358061385e-05, + "loss": 0.5984, + "step": 19374 + }, + { + "epoch": 3.09, + "learning_rate": 1.6865498806796762e-05, + "loss": 0.585, + "step": 19375 + }, + { + "epoch": 3.09, + "learning_rate": 1.6863059342159636e-05, + "loss": 0.641, + "step": 19376 + }, + { + "epoch": 3.09, + "learning_rate": 1.6860619964175982e-05, + "loss": 0.673, + "step": 19377 + }, + { + "epoch": 3.09, + "learning_rate": 1.6858180672871792e-05, + "loss": 0.7163, + "step": 19378 + }, + { + "epoch": 3.09, + "learning_rate": 1.685574146827303e-05, + "loss": 0.6331, + "step": 19379 + }, + { + "epoch": 3.09, + "learning_rate": 1.6853302350405674e-05, + "loss": 0.6247, + "step": 19380 + }, + { + "epoch": 3.09, + "learning_rate": 1.685086331929571e-05, + "loss": 0.623, + "step": 19381 + }, + { + "epoch": 3.09, + "learning_rate": 1.68484243749691e-05, + "loss": 0.6396, + "step": 19382 + }, + { + "epoch": 3.09, + "learning_rate": 1.6845985517451818e-05, + "loss": 0.6888, + "step": 19383 + }, + { + "epoch": 3.09, + "learning_rate": 1.6843546746769852e-05, + "loss": 0.6408, + "step": 19384 + }, + { + "epoch": 3.09, + "learning_rate": 1.6841108062949153e-05, + "loss": 0.6524, + "step": 19385 + }, + { + "epoch": 3.09, + "learning_rate": 1.6838669466015698e-05, + "loss": 0.717, + "step": 19386 + }, + { + "epoch": 3.09, + "learning_rate": 1.6836230955995457e-05, + "loss": 0.7251, + "step": 19387 + }, + { + "epoch": 3.09, + "learning_rate": 1.6833792532914407e-05, + "loss": 0.6293, + "step": 19388 + }, + { + "epoch": 3.09, + "learning_rate": 1.6831354196798504e-05, + "loss": 0.6308, + "step": 19389 + }, + { + "epoch": 3.09, + "learning_rate": 1.6828915947673724e-05, + "loss": 0.6743, + "step": 19390 + }, + { + "epoch": 3.09, + "learning_rate": 1.682647778556602e-05, + "loss": 0.6633, + "step": 19391 + }, + { + "epoch": 3.09, + "learning_rate": 1.682403971050137e-05, + "loss": 0.6029, + "step": 19392 + }, + { + "epoch": 3.09, + "learning_rate": 1.6821601722505736e-05, + "loss": 0.5831, + "step": 19393 + }, + { + "epoch": 3.09, + "learning_rate": 1.681916382160507e-05, + "loss": 0.6461, + "step": 19394 + }, + { + "epoch": 3.09, + "learning_rate": 1.681672600782535e-05, + "loss": 0.6636, + "step": 19395 + }, + { + "epoch": 3.09, + "learning_rate": 1.681428828119253e-05, + "loss": 0.768, + "step": 19396 + }, + { + "epoch": 3.09, + "learning_rate": 1.6811850641732565e-05, + "loss": 0.5996, + "step": 19397 + }, + { + "epoch": 3.09, + "learning_rate": 1.6809413089471427e-05, + "loss": 0.66, + "step": 19398 + }, + { + "epoch": 3.09, + "learning_rate": 1.6806975624435062e-05, + "loss": 0.5959, + "step": 19399 + }, + { + "epoch": 3.09, + "learning_rate": 1.680453824664943e-05, + "loss": 0.6313, + "step": 19400 + }, + { + "epoch": 3.09, + "learning_rate": 1.6802100956140494e-05, + "loss": 0.6986, + "step": 19401 + }, + { + "epoch": 3.09, + "learning_rate": 1.6799663752934207e-05, + "loss": 0.6019, + "step": 19402 + }, + { + "epoch": 3.09, + "learning_rate": 1.6797226637056516e-05, + "loss": 0.7361, + "step": 19403 + }, + { + "epoch": 3.09, + "learning_rate": 1.6794789608533396e-05, + "loss": 0.7301, + "step": 19404 + }, + { + "epoch": 3.09, + "learning_rate": 1.6792352667390772e-05, + "loss": 0.6029, + "step": 19405 + }, + { + "epoch": 3.09, + "learning_rate": 1.6789915813654612e-05, + "loss": 0.6563, + "step": 19406 + }, + { + "epoch": 3.09, + "learning_rate": 1.6787479047350867e-05, + "loss": 0.6932, + "step": 19407 + }, + { + "epoch": 3.09, + "learning_rate": 1.6785042368505476e-05, + "loss": 0.6163, + "step": 19408 + }, + { + "epoch": 3.09, + "learning_rate": 1.6782605777144406e-05, + "loss": 0.6919, + "step": 19409 + }, + { + "epoch": 3.09, + "learning_rate": 1.6780169273293592e-05, + "loss": 0.6658, + "step": 19410 + }, + { + "epoch": 3.09, + "learning_rate": 1.677773285697898e-05, + "loss": 0.6521, + "step": 19411 + }, + { + "epoch": 3.09, + "learning_rate": 1.6775296528226525e-05, + "loss": 0.635, + "step": 19412 + }, + { + "epoch": 3.09, + "learning_rate": 1.6772860287062173e-05, + "loss": 0.6846, + "step": 19413 + }, + { + "epoch": 3.09, + "learning_rate": 1.6770424133511854e-05, + "loss": 0.6581, + "step": 19414 + }, + { + "epoch": 3.09, + "learning_rate": 1.676798806760153e-05, + "loss": 0.7279, + "step": 19415 + }, + { + "epoch": 3.09, + "learning_rate": 1.6765552089357134e-05, + "loss": 0.6012, + "step": 19416 + }, + { + "epoch": 3.09, + "learning_rate": 1.676311619880461e-05, + "loss": 0.6237, + "step": 19417 + }, + { + "epoch": 3.09, + "learning_rate": 1.676068039596989e-05, + "loss": 0.6931, + "step": 19418 + }, + { + "epoch": 3.09, + "learning_rate": 1.6758244680878932e-05, + "loss": 0.6612, + "step": 19419 + }, + { + "epoch": 3.09, + "learning_rate": 1.6755809053557657e-05, + "loss": 0.6731, + "step": 19420 + }, + { + "epoch": 3.09, + "learning_rate": 1.675337351403201e-05, + "loss": 0.6292, + "step": 19421 + }, + { + "epoch": 3.09, + "learning_rate": 1.6750938062327937e-05, + "loss": 0.6991, + "step": 19422 + }, + { + "epoch": 3.09, + "learning_rate": 1.6748502698471354e-05, + "loss": 0.609, + "step": 19423 + }, + { + "epoch": 3.09, + "learning_rate": 1.6746067422488225e-05, + "loss": 0.6305, + "step": 19424 + }, + { + "epoch": 3.09, + "learning_rate": 1.6743632234404456e-05, + "loss": 0.6437, + "step": 19425 + }, + { + "epoch": 3.09, + "learning_rate": 1.674119713424599e-05, + "loss": 0.6465, + "step": 19426 + }, + { + "epoch": 3.09, + "learning_rate": 1.6738762122038765e-05, + "loss": 0.6418, + "step": 19427 + }, + { + "epoch": 3.1, + "learning_rate": 1.67363271978087e-05, + "loss": 0.6766, + "step": 19428 + }, + { + "epoch": 3.1, + "learning_rate": 1.6733892361581737e-05, + "loss": 0.6558, + "step": 19429 + }, + { + "epoch": 3.1, + "learning_rate": 1.6731457613383804e-05, + "loss": 0.6115, + "step": 19430 + }, + { + "epoch": 3.1, + "learning_rate": 1.672902295324082e-05, + "loss": 0.6481, + "step": 19431 + }, + { + "epoch": 3.1, + "learning_rate": 1.6726588381178732e-05, + "loss": 0.6112, + "step": 19432 + }, + { + "epoch": 3.1, + "learning_rate": 1.672415389722345e-05, + "loss": 0.6584, + "step": 19433 + }, + { + "epoch": 3.1, + "learning_rate": 1.6721719501400897e-05, + "loss": 0.6644, + "step": 19434 + }, + { + "epoch": 3.1, + "learning_rate": 1.6719285193737018e-05, + "loss": 0.693, + "step": 19435 + }, + { + "epoch": 3.1, + "learning_rate": 1.6716850974257715e-05, + "loss": 0.6651, + "step": 19436 + }, + { + "epoch": 3.1, + "learning_rate": 1.671441684298892e-05, + "loss": 0.6418, + "step": 19437 + }, + { + "epoch": 3.1, + "learning_rate": 1.671198279995656e-05, + "loss": 0.6527, + "step": 19438 + }, + { + "epoch": 3.1, + "learning_rate": 1.6709548845186552e-05, + "loss": 0.67, + "step": 19439 + }, + { + "epoch": 3.1, + "learning_rate": 1.670711497870481e-05, + "loss": 0.7464, + "step": 19440 + }, + { + "epoch": 3.1, + "learning_rate": 1.6704681200537264e-05, + "loss": 0.626, + "step": 19441 + }, + { + "epoch": 3.1, + "learning_rate": 1.6702247510709828e-05, + "loss": 0.5811, + "step": 19442 + }, + { + "epoch": 3.1, + "learning_rate": 1.6699813909248413e-05, + "loss": 0.6327, + "step": 19443 + }, + { + "epoch": 3.1, + "learning_rate": 1.6697380396178955e-05, + "loss": 0.5916, + "step": 19444 + }, + { + "epoch": 3.1, + "learning_rate": 1.669494697152734e-05, + "loss": 0.6595, + "step": 19445 + }, + { + "epoch": 3.1, + "learning_rate": 1.66925136353195e-05, + "loss": 0.6114, + "step": 19446 + }, + { + "epoch": 3.1, + "learning_rate": 1.669008038758135e-05, + "loss": 0.6562, + "step": 19447 + }, + { + "epoch": 3.1, + "learning_rate": 1.6687647228338793e-05, + "loss": 0.6923, + "step": 19448 + }, + { + "epoch": 3.1, + "learning_rate": 1.668521415761775e-05, + "loss": 0.6177, + "step": 19449 + }, + { + "epoch": 3.1, + "learning_rate": 1.6682781175444132e-05, + "loss": 0.6535, + "step": 19450 + }, + { + "epoch": 3.1, + "learning_rate": 1.6680348281843838e-05, + "loss": 0.6058, + "step": 19451 + }, + { + "epoch": 3.1, + "learning_rate": 1.6677915476842787e-05, + "loss": 0.5984, + "step": 19452 + }, + { + "epoch": 3.1, + "learning_rate": 1.6675482760466885e-05, + "loss": 0.6471, + "step": 19453 + }, + { + "epoch": 3.1, + "learning_rate": 1.667305013274203e-05, + "loss": 0.6443, + "step": 19454 + }, + { + "epoch": 3.1, + "learning_rate": 1.6670617593694143e-05, + "loss": 0.6644, + "step": 19455 + }, + { + "epoch": 3.1, + "learning_rate": 1.6668185143349125e-05, + "loss": 0.6461, + "step": 19456 + }, + { + "epoch": 3.1, + "learning_rate": 1.6665752781732868e-05, + "loss": 0.6342, + "step": 19457 + }, + { + "epoch": 3.1, + "learning_rate": 1.6663320508871287e-05, + "loss": 0.6977, + "step": 19458 + }, + { + "epoch": 3.1, + "learning_rate": 1.6660888324790286e-05, + "loss": 0.7051, + "step": 19459 + }, + { + "epoch": 3.1, + "learning_rate": 1.6658456229515757e-05, + "loss": 0.6238, + "step": 19460 + }, + { + "epoch": 3.1, + "learning_rate": 1.6656024223073608e-05, + "loss": 0.6256, + "step": 19461 + }, + { + "epoch": 3.1, + "learning_rate": 1.6653592305489735e-05, + "loss": 0.682, + "step": 19462 + }, + { + "epoch": 3.1, + "learning_rate": 1.665116047679003e-05, + "loss": 0.7386, + "step": 19463 + }, + { + "epoch": 3.1, + "learning_rate": 1.66487287370004e-05, + "loss": 0.644, + "step": 19464 + }, + { + "epoch": 3.1, + "learning_rate": 1.664629708614675e-05, + "loss": 0.6113, + "step": 19465 + }, + { + "epoch": 3.1, + "learning_rate": 1.664386552425496e-05, + "loss": 0.6528, + "step": 19466 + }, + { + "epoch": 3.1, + "learning_rate": 1.6641434051350924e-05, + "loss": 0.6543, + "step": 19467 + }, + { + "epoch": 3.1, + "learning_rate": 1.6639002667460536e-05, + "loss": 0.5916, + "step": 19468 + }, + { + "epoch": 3.1, + "learning_rate": 1.66365713726097e-05, + "loss": 0.6518, + "step": 19469 + }, + { + "epoch": 3.1, + "learning_rate": 1.66341401668243e-05, + "loss": 0.6134, + "step": 19470 + }, + { + "epoch": 3.1, + "learning_rate": 1.663170905013023e-05, + "loss": 0.6487, + "step": 19471 + }, + { + "epoch": 3.1, + "learning_rate": 1.6629278022553374e-05, + "loss": 0.6759, + "step": 19472 + }, + { + "epoch": 3.1, + "learning_rate": 1.6626847084119628e-05, + "loss": 0.6857, + "step": 19473 + }, + { + "epoch": 3.1, + "learning_rate": 1.6624416234854874e-05, + "loss": 0.6309, + "step": 19474 + }, + { + "epoch": 3.1, + "learning_rate": 1.6621985474785006e-05, + "loss": 0.6389, + "step": 19475 + }, + { + "epoch": 3.1, + "learning_rate": 1.6619554803935908e-05, + "loss": 0.6356, + "step": 19476 + }, + { + "epoch": 3.1, + "learning_rate": 1.6617124222333456e-05, + "loss": 0.6755, + "step": 19477 + }, + { + "epoch": 3.1, + "learning_rate": 1.6614693730003548e-05, + "loss": 0.6642, + "step": 19478 + }, + { + "epoch": 3.1, + "learning_rate": 1.661226332697206e-05, + "loss": 0.7368, + "step": 19479 + }, + { + "epoch": 3.1, + "learning_rate": 1.660983301326487e-05, + "loss": 0.7105, + "step": 19480 + }, + { + "epoch": 3.1, + "learning_rate": 1.660740278890787e-05, + "loss": 0.5882, + "step": 19481 + }, + { + "epoch": 3.1, + "learning_rate": 1.6604972653926936e-05, + "loss": 0.6001, + "step": 19482 + }, + { + "epoch": 3.1, + "learning_rate": 1.6602542608347947e-05, + "loss": 0.6409, + "step": 19483 + }, + { + "epoch": 3.1, + "learning_rate": 1.6600112652196775e-05, + "loss": 0.6049, + "step": 19484 + }, + { + "epoch": 3.1, + "learning_rate": 1.6597682785499313e-05, + "loss": 0.7327, + "step": 19485 + }, + { + "epoch": 3.1, + "learning_rate": 1.6595253008281426e-05, + "loss": 0.6291, + "step": 19486 + }, + { + "epoch": 3.1, + "learning_rate": 1.6592823320568988e-05, + "loss": 0.5931, + "step": 19487 + }, + { + "epoch": 3.1, + "learning_rate": 1.6590393722387872e-05, + "loss": 0.6895, + "step": 19488 + }, + { + "epoch": 3.1, + "learning_rate": 1.6587964213763963e-05, + "loss": 0.6431, + "step": 19489 + }, + { + "epoch": 3.1, + "learning_rate": 1.658553479472313e-05, + "loss": 0.6608, + "step": 19490 + }, + { + "epoch": 3.11, + "learning_rate": 1.6583105465291238e-05, + "loss": 0.6733, + "step": 19491 + }, + { + "epoch": 3.11, + "learning_rate": 1.658067622549417e-05, + "loss": 0.5874, + "step": 19492 + }, + { + "epoch": 3.11, + "learning_rate": 1.6578247075357782e-05, + "loss": 0.7048, + "step": 19493 + }, + { + "epoch": 3.11, + "learning_rate": 1.657581801490795e-05, + "loss": 0.6681, + "step": 19494 + }, + { + "epoch": 3.11, + "learning_rate": 1.657338904417054e-05, + "loss": 0.6517, + "step": 19495 + }, + { + "epoch": 3.11, + "learning_rate": 1.657096016317143e-05, + "loss": 0.6325, + "step": 19496 + }, + { + "epoch": 3.11, + "learning_rate": 1.656853137193647e-05, + "loss": 0.7715, + "step": 19497 + }, + { + "epoch": 3.11, + "learning_rate": 1.6566102670491528e-05, + "loss": 0.6263, + "step": 19498 + }, + { + "epoch": 3.11, + "learning_rate": 1.6563674058862482e-05, + "loss": 0.5964, + "step": 19499 + }, + { + "epoch": 3.11, + "learning_rate": 1.6561245537075182e-05, + "loss": 0.6222, + "step": 19500 + }, + { + "epoch": 3.11, + "learning_rate": 1.655881710515549e-05, + "loss": 0.684, + "step": 19501 + }, + { + "epoch": 3.11, + "learning_rate": 1.6556388763129275e-05, + "loss": 0.654, + "step": 19502 + }, + { + "epoch": 3.11, + "learning_rate": 1.6553960511022392e-05, + "loss": 0.7269, + "step": 19503 + }, + { + "epoch": 3.11, + "learning_rate": 1.6551532348860697e-05, + "loss": 0.6366, + "step": 19504 + }, + { + "epoch": 3.11, + "learning_rate": 1.6549104276670066e-05, + "loss": 0.7257, + "step": 19505 + }, + { + "epoch": 3.11, + "learning_rate": 1.6546676294476337e-05, + "loss": 0.693, + "step": 19506 + }, + { + "epoch": 3.11, + "learning_rate": 1.6544248402305374e-05, + "loss": 0.6825, + "step": 19507 + }, + { + "epoch": 3.11, + "learning_rate": 1.6541820600183033e-05, + "loss": 0.6445, + "step": 19508 + }, + { + "epoch": 3.11, + "learning_rate": 1.6539392888135158e-05, + "loss": 0.6953, + "step": 19509 + }, + { + "epoch": 3.11, + "learning_rate": 1.653696526618762e-05, + "loss": 0.6585, + "step": 19510 + }, + { + "epoch": 3.11, + "learning_rate": 1.6534537734366266e-05, + "loss": 0.6523, + "step": 19511 + }, + { + "epoch": 3.11, + "learning_rate": 1.653211029269694e-05, + "loss": 0.6709, + "step": 19512 + }, + { + "epoch": 3.11, + "learning_rate": 1.65296829412055e-05, + "loss": 0.6482, + "step": 19513 + }, + { + "epoch": 3.11, + "learning_rate": 1.6527255679917796e-05, + "loss": 0.6722, + "step": 19514 + }, + { + "epoch": 3.11, + "learning_rate": 1.6524828508859676e-05, + "loss": 0.6073, + "step": 19515 + }, + { + "epoch": 3.11, + "learning_rate": 1.6522401428056983e-05, + "loss": 0.661, + "step": 19516 + }, + { + "epoch": 3.11, + "learning_rate": 1.651997443753557e-05, + "loss": 0.6387, + "step": 19517 + }, + { + "epoch": 3.11, + "learning_rate": 1.651754753732128e-05, + "loss": 0.6226, + "step": 19518 + }, + { + "epoch": 3.11, + "learning_rate": 1.6515120727439957e-05, + "loss": 0.593, + "step": 19519 + }, + { + "epoch": 3.11, + "learning_rate": 1.6512694007917453e-05, + "loss": 0.6218, + "step": 19520 + }, + { + "epoch": 3.11, + "learning_rate": 1.6510267378779598e-05, + "loss": 0.6402, + "step": 19521 + }, + { + "epoch": 3.11, + "learning_rate": 1.6507840840052248e-05, + "loss": 0.6423, + "step": 19522 + }, + { + "epoch": 3.11, + "learning_rate": 1.6505414391761237e-05, + "loss": 0.7458, + "step": 19523 + }, + { + "epoch": 3.11, + "learning_rate": 1.65029880339324e-05, + "loss": 0.6771, + "step": 19524 + }, + { + "epoch": 3.11, + "learning_rate": 1.6500561766591593e-05, + "loss": 0.653, + "step": 19525 + }, + { + "epoch": 3.11, + "learning_rate": 1.6498135589764633e-05, + "loss": 0.6351, + "step": 19526 + }, + { + "epoch": 3.11, + "learning_rate": 1.649570950347737e-05, + "loss": 0.6216, + "step": 19527 + }, + { + "epoch": 3.11, + "learning_rate": 1.6493283507755635e-05, + "loss": 0.6078, + "step": 19528 + }, + { + "epoch": 3.11, + "learning_rate": 1.6490857602625264e-05, + "loss": 0.6235, + "step": 19529 + }, + { + "epoch": 3.11, + "learning_rate": 1.6488431788112103e-05, + "loss": 0.7386, + "step": 19530 + }, + { + "epoch": 3.11, + "learning_rate": 1.648600606424197e-05, + "loss": 0.6171, + "step": 19531 + }, + { + "epoch": 3.11, + "learning_rate": 1.6483580431040702e-05, + "loss": 0.7292, + "step": 19532 + }, + { + "epoch": 3.11, + "learning_rate": 1.6481154888534134e-05, + "loss": 0.6783, + "step": 19533 + }, + { + "epoch": 3.11, + "learning_rate": 1.64787294367481e-05, + "loss": 0.6652, + "step": 19534 + }, + { + "epoch": 3.11, + "learning_rate": 1.647630407570841e-05, + "loss": 0.6955, + "step": 19535 + }, + { + "epoch": 3.11, + "learning_rate": 1.6473878805440917e-05, + "loss": 0.656, + "step": 19536 + }, + { + "epoch": 3.11, + "learning_rate": 1.647145362597144e-05, + "loss": 0.61, + "step": 19537 + }, + { + "epoch": 3.11, + "learning_rate": 1.6469028537325793e-05, + "loss": 0.7011, + "step": 19538 + }, + { + "epoch": 3.11, + "learning_rate": 1.6466603539529824e-05, + "loss": 0.6739, + "step": 19539 + }, + { + "epoch": 3.11, + "learning_rate": 1.6464178632609346e-05, + "loss": 0.705, + "step": 19540 + }, + { + "epoch": 3.11, + "learning_rate": 1.6461753816590177e-05, + "loss": 0.6999, + "step": 19541 + }, + { + "epoch": 3.11, + "learning_rate": 1.645932909149815e-05, + "loss": 0.5782, + "step": 19542 + }, + { + "epoch": 3.11, + "learning_rate": 1.645690445735908e-05, + "loss": 0.606, + "step": 19543 + }, + { + "epoch": 3.11, + "learning_rate": 1.645447991419879e-05, + "loss": 0.635, + "step": 19544 + }, + { + "epoch": 3.11, + "learning_rate": 1.6452055462043118e-05, + "loss": 0.6642, + "step": 19545 + }, + { + "epoch": 3.11, + "learning_rate": 1.6449631100917847e-05, + "loss": 0.6208, + "step": 19546 + }, + { + "epoch": 3.11, + "learning_rate": 1.644720683084882e-05, + "loss": 0.7184, + "step": 19547 + }, + { + "epoch": 3.11, + "learning_rate": 1.6444782651861848e-05, + "loss": 0.641, + "step": 19548 + }, + { + "epoch": 3.11, + "learning_rate": 1.644235856398274e-05, + "loss": 0.6588, + "step": 19549 + }, + { + "epoch": 3.11, + "learning_rate": 1.6439934567237326e-05, + "loss": 0.6427, + "step": 19550 + }, + { + "epoch": 3.11, + "learning_rate": 1.6437510661651412e-05, + "loss": 0.636, + "step": 19551 + }, + { + "epoch": 3.11, + "learning_rate": 1.6435086847250804e-05, + "loss": 0.6557, + "step": 19552 + }, + { + "epoch": 3.12, + "learning_rate": 1.6432663124061326e-05, + "loss": 0.6663, + "step": 19553 + }, + { + "epoch": 3.12, + "learning_rate": 1.6430239492108786e-05, + "loss": 0.6661, + "step": 19554 + }, + { + "epoch": 3.12, + "learning_rate": 1.6427815951418984e-05, + "loss": 0.5927, + "step": 19555 + }, + { + "epoch": 3.12, + "learning_rate": 1.6425392502017744e-05, + "loss": 0.6277, + "step": 19556 + }, + { + "epoch": 3.12, + "learning_rate": 1.642296914393087e-05, + "loss": 0.6177, + "step": 19557 + }, + { + "epoch": 3.12, + "learning_rate": 1.6420545877184162e-05, + "loss": 0.7339, + "step": 19558 + }, + { + "epoch": 3.12, + "learning_rate": 1.6418122701803434e-05, + "loss": 0.6676, + "step": 19559 + }, + { + "epoch": 3.12, + "learning_rate": 1.6415699617814495e-05, + "loss": 0.6111, + "step": 19560 + }, + { + "epoch": 3.12, + "learning_rate": 1.6413276625243135e-05, + "loss": 0.6523, + "step": 19561 + }, + { + "epoch": 3.12, + "learning_rate": 1.641085372411517e-05, + "loss": 0.7249, + "step": 19562 + }, + { + "epoch": 3.12, + "learning_rate": 1.6408430914456396e-05, + "loss": 0.5861, + "step": 19563 + }, + { + "epoch": 3.12, + "learning_rate": 1.6406008196292615e-05, + "loss": 0.6866, + "step": 19564 + }, + { + "epoch": 3.12, + "learning_rate": 1.6403585569649636e-05, + "loss": 0.6581, + "step": 19565 + }, + { + "epoch": 3.12, + "learning_rate": 1.6401163034553256e-05, + "loss": 0.6185, + "step": 19566 + }, + { + "epoch": 3.12, + "learning_rate": 1.639874059102926e-05, + "loss": 0.6375, + "step": 19567 + }, + { + "epoch": 3.12, + "learning_rate": 1.639631823910346e-05, + "loss": 0.6449, + "step": 19568 + }, + { + "epoch": 3.12, + "learning_rate": 1.6393895978801637e-05, + "loss": 0.6681, + "step": 19569 + }, + { + "epoch": 3.12, + "learning_rate": 1.6391473810149606e-05, + "loss": 0.6832, + "step": 19570 + }, + { + "epoch": 3.12, + "learning_rate": 1.638905173317315e-05, + "loss": 0.6806, + "step": 19571 + }, + { + "epoch": 3.12, + "learning_rate": 1.6386629747898064e-05, + "loss": 0.604, + "step": 19572 + }, + { + "epoch": 3.12, + "learning_rate": 1.6384207854350144e-05, + "loss": 0.6466, + "step": 19573 + }, + { + "epoch": 3.12, + "learning_rate": 1.6381786052555177e-05, + "loss": 0.6173, + "step": 19574 + }, + { + "epoch": 3.12, + "learning_rate": 1.6379364342538954e-05, + "loss": 0.5547, + "step": 19575 + }, + { + "epoch": 3.12, + "learning_rate": 1.6376942724327273e-05, + "loss": 0.6486, + "step": 19576 + }, + { + "epoch": 3.12, + "learning_rate": 1.6374521197945916e-05, + "loss": 0.5938, + "step": 19577 + }, + { + "epoch": 3.12, + "learning_rate": 1.6372099763420667e-05, + "loss": 0.6799, + "step": 19578 + }, + { + "epoch": 3.12, + "learning_rate": 1.6369678420777325e-05, + "loss": 0.6733, + "step": 19579 + }, + { + "epoch": 3.12, + "learning_rate": 1.636725717004166e-05, + "loss": 0.6547, + "step": 19580 + }, + { + "epoch": 3.12, + "learning_rate": 1.6364836011239468e-05, + "loss": 0.6073, + "step": 19581 + }, + { + "epoch": 3.12, + "learning_rate": 1.636241494439653e-05, + "loss": 0.6284, + "step": 19582 + }, + { + "epoch": 3.12, + "learning_rate": 1.6359993969538635e-05, + "loss": 0.6779, + "step": 19583 + }, + { + "epoch": 3.12, + "learning_rate": 1.6357573086691546e-05, + "loss": 0.6013, + "step": 19584 + }, + { + "epoch": 3.12, + "learning_rate": 1.6355152295881064e-05, + "loss": 0.6158, + "step": 19585 + }, + { + "epoch": 3.12, + "learning_rate": 1.635273159713297e-05, + "loss": 0.6319, + "step": 19586 + }, + { + "epoch": 3.12, + "learning_rate": 1.635031099047303e-05, + "loss": 0.6433, + "step": 19587 + }, + { + "epoch": 3.12, + "learning_rate": 1.6347890475927026e-05, + "loss": 0.7035, + "step": 19588 + }, + { + "epoch": 3.12, + "learning_rate": 1.6345470053520725e-05, + "loss": 0.6143, + "step": 19589 + }, + { + "epoch": 3.12, + "learning_rate": 1.6343049723279922e-05, + "loss": 0.6801, + "step": 19590 + }, + { + "epoch": 3.12, + "learning_rate": 1.6340629485230386e-05, + "loss": 0.6114, + "step": 19591 + }, + { + "epoch": 3.12, + "learning_rate": 1.633820933939788e-05, + "loss": 0.6106, + "step": 19592 + }, + { + "epoch": 3.12, + "learning_rate": 1.6335789285808195e-05, + "loss": 0.6243, + "step": 19593 + }, + { + "epoch": 3.12, + "learning_rate": 1.633336932448709e-05, + "loss": 0.6906, + "step": 19594 + }, + { + "epoch": 3.12, + "learning_rate": 1.6330949455460336e-05, + "loss": 0.6646, + "step": 19595 + }, + { + "epoch": 3.12, + "learning_rate": 1.6328529678753715e-05, + "loss": 0.6424, + "step": 19596 + }, + { + "epoch": 3.12, + "learning_rate": 1.6326109994392984e-05, + "loss": 0.6405, + "step": 19597 + }, + { + "epoch": 3.12, + "learning_rate": 1.6323690402403917e-05, + "loss": 0.6337, + "step": 19598 + }, + { + "epoch": 3.12, + "learning_rate": 1.632127090281228e-05, + "loss": 0.626, + "step": 19599 + }, + { + "epoch": 3.12, + "learning_rate": 1.631885149564384e-05, + "loss": 0.6634, + "step": 19600 + }, + { + "epoch": 3.12, + "learning_rate": 1.631643218092436e-05, + "loss": 0.6923, + "step": 19601 + }, + { + "epoch": 3.12, + "learning_rate": 1.63140129586796e-05, + "loss": 0.648, + "step": 19602 + }, + { + "epoch": 3.12, + "learning_rate": 1.6311593828935335e-05, + "loss": 0.6761, + "step": 19603 + }, + { + "epoch": 3.12, + "learning_rate": 1.630917479171732e-05, + "loss": 0.6524, + "step": 19604 + }, + { + "epoch": 3.12, + "learning_rate": 1.6306755847051312e-05, + "loss": 0.6667, + "step": 19605 + }, + { + "epoch": 3.12, + "learning_rate": 1.630433699496309e-05, + "loss": 0.6297, + "step": 19606 + }, + { + "epoch": 3.12, + "learning_rate": 1.6301918235478388e-05, + "loss": 0.583, + "step": 19607 + }, + { + "epoch": 3.12, + "learning_rate": 1.6299499568622985e-05, + "loss": 0.6532, + "step": 19608 + }, + { + "epoch": 3.12, + "learning_rate": 1.6297080994422615e-05, + "loss": 0.6142, + "step": 19609 + }, + { + "epoch": 3.12, + "learning_rate": 1.629466251290306e-05, + "loss": 0.6446, + "step": 19610 + }, + { + "epoch": 3.12, + "learning_rate": 1.6292244124090062e-05, + "loss": 0.7325, + "step": 19611 + }, + { + "epoch": 3.12, + "learning_rate": 1.628982582800937e-05, + "loss": 0.6753, + "step": 19612 + }, + { + "epoch": 3.12, + "learning_rate": 1.6287407624686753e-05, + "loss": 0.5968, + "step": 19613 + }, + { + "epoch": 3.12, + "learning_rate": 1.6284989514147956e-05, + "loss": 0.651, + "step": 19614 + }, + { + "epoch": 3.12, + "learning_rate": 1.628257149641873e-05, + "loss": 0.5948, + "step": 19615 + }, + { + "epoch": 3.13, + "learning_rate": 1.6280153571524814e-05, + "loss": 0.6693, + "step": 19616 + }, + { + "epoch": 3.13, + "learning_rate": 1.6277735739491978e-05, + "loss": 0.7161, + "step": 19617 + }, + { + "epoch": 3.13, + "learning_rate": 1.627531800034596e-05, + "loss": 0.681, + "step": 19618 + }, + { + "epoch": 3.13, + "learning_rate": 1.627290035411251e-05, + "loss": 0.6877, + "step": 19619 + }, + { + "epoch": 3.13, + "learning_rate": 1.627048280081737e-05, + "loss": 0.6959, + "step": 19620 + }, + { + "epoch": 3.13, + "learning_rate": 1.6268065340486288e-05, + "loss": 0.6812, + "step": 19621 + }, + { + "epoch": 3.13, + "learning_rate": 1.6265647973145007e-05, + "loss": 0.7289, + "step": 19622 + }, + { + "epoch": 3.13, + "learning_rate": 1.6263230698819277e-05, + "loss": 0.644, + "step": 19623 + }, + { + "epoch": 3.13, + "learning_rate": 1.6260813517534834e-05, + "loss": 0.6691, + "step": 19624 + }, + { + "epoch": 3.13, + "learning_rate": 1.625839642931742e-05, + "loss": 0.7235, + "step": 19625 + }, + { + "epoch": 3.13, + "learning_rate": 1.625597943419279e-05, + "loss": 0.8006, + "step": 19626 + }, + { + "epoch": 3.13, + "learning_rate": 1.6253562532186656e-05, + "loss": 0.6682, + "step": 19627 + }, + { + "epoch": 3.13, + "learning_rate": 1.6251145723324772e-05, + "loss": 0.6467, + "step": 19628 + }, + { + "epoch": 3.13, + "learning_rate": 1.624872900763288e-05, + "loss": 0.7224, + "step": 19629 + }, + { + "epoch": 3.13, + "learning_rate": 1.6246312385136703e-05, + "loss": 0.5503, + "step": 19630 + }, + { + "epoch": 3.13, + "learning_rate": 1.624389585586199e-05, + "loss": 0.6289, + "step": 19631 + }, + { + "epoch": 3.13, + "learning_rate": 1.6241479419834472e-05, + "loss": 0.6282, + "step": 19632 + }, + { + "epoch": 3.13, + "learning_rate": 1.6239063077079873e-05, + "loss": 0.6383, + "step": 19633 + }, + { + "epoch": 3.13, + "learning_rate": 1.6236646827623938e-05, + "loss": 0.6766, + "step": 19634 + }, + { + "epoch": 3.13, + "learning_rate": 1.6234230671492396e-05, + "loss": 0.6405, + "step": 19635 + }, + { + "epoch": 3.13, + "learning_rate": 1.623181460871097e-05, + "loss": 0.6615, + "step": 19636 + }, + { + "epoch": 3.13, + "learning_rate": 1.62293986393054e-05, + "loss": 0.5967, + "step": 19637 + }, + { + "epoch": 3.13, + "learning_rate": 1.6226982763301406e-05, + "loss": 0.6134, + "step": 19638 + }, + { + "epoch": 3.13, + "learning_rate": 1.6224566980724722e-05, + "loss": 0.6355, + "step": 19639 + }, + { + "epoch": 3.13, + "learning_rate": 1.6222151291601075e-05, + "loss": 0.6536, + "step": 19640 + }, + { + "epoch": 3.13, + "learning_rate": 1.6219735695956184e-05, + "loss": 0.6471, + "step": 19641 + }, + { + "epoch": 3.13, + "learning_rate": 1.621732019381577e-05, + "loss": 0.6226, + "step": 19642 + }, + { + "epoch": 3.13, + "learning_rate": 1.6214904785205577e-05, + "loss": 0.6462, + "step": 19643 + }, + { + "epoch": 3.13, + "learning_rate": 1.621248947015131e-05, + "loss": 0.6028, + "step": 19644 + }, + { + "epoch": 3.13, + "learning_rate": 1.621007424867869e-05, + "loss": 0.5932, + "step": 19645 + }, + { + "epoch": 3.13, + "learning_rate": 1.6207659120813456e-05, + "loss": 0.6746, + "step": 19646 + }, + { + "epoch": 3.13, + "learning_rate": 1.62052440865813e-05, + "loss": 0.6405, + "step": 19647 + }, + { + "epoch": 3.13, + "learning_rate": 1.6202829146007964e-05, + "loss": 0.652, + "step": 19648 + }, + { + "epoch": 3.13, + "learning_rate": 1.6200414299119155e-05, + "loss": 0.6078, + "step": 19649 + }, + { + "epoch": 3.13, + "learning_rate": 1.6197999545940585e-05, + "loss": 0.6199, + "step": 19650 + }, + { + "epoch": 3.13, + "learning_rate": 1.6195584886497982e-05, + "loss": 0.6248, + "step": 19651 + }, + { + "epoch": 3.13, + "learning_rate": 1.6193170320817053e-05, + "loss": 0.6141, + "step": 19652 + }, + { + "epoch": 3.13, + "learning_rate": 1.6190755848923512e-05, + "loss": 0.6067, + "step": 19653 + }, + { + "epoch": 3.13, + "learning_rate": 1.6188341470843072e-05, + "loss": 0.6633, + "step": 19654 + }, + { + "epoch": 3.13, + "learning_rate": 1.618592718660145e-05, + "loss": 0.6075, + "step": 19655 + }, + { + "epoch": 3.13, + "learning_rate": 1.6183512996224348e-05, + "loss": 0.6053, + "step": 19656 + }, + { + "epoch": 3.13, + "learning_rate": 1.6181098899737483e-05, + "loss": 0.6175, + "step": 19657 + }, + { + "epoch": 3.13, + "learning_rate": 1.617868489716656e-05, + "loss": 0.6503, + "step": 19658 + }, + { + "epoch": 3.13, + "learning_rate": 1.6176270988537283e-05, + "loss": 0.6486, + "step": 19659 + }, + { + "epoch": 3.13, + "learning_rate": 1.6173857173875366e-05, + "loss": 0.6602, + "step": 19660 + }, + { + "epoch": 3.13, + "learning_rate": 1.6171443453206517e-05, + "loss": 0.6209, + "step": 19661 + }, + { + "epoch": 3.13, + "learning_rate": 1.6169029826556427e-05, + "loss": 0.7026, + "step": 19662 + }, + { + "epoch": 3.13, + "learning_rate": 1.6166616293950813e-05, + "loss": 0.6134, + "step": 19663 + }, + { + "epoch": 3.13, + "learning_rate": 1.6164202855415374e-05, + "loss": 0.6509, + "step": 19664 + }, + { + "epoch": 3.13, + "learning_rate": 1.6161789510975802e-05, + "loss": 0.6706, + "step": 19665 + }, + { + "epoch": 3.13, + "learning_rate": 1.6159376260657812e-05, + "loss": 0.5977, + "step": 19666 + }, + { + "epoch": 3.13, + "learning_rate": 1.6156963104487105e-05, + "loss": 0.6715, + "step": 19667 + }, + { + "epoch": 3.13, + "learning_rate": 1.6154550042489367e-05, + "loss": 0.6507, + "step": 19668 + }, + { + "epoch": 3.13, + "learning_rate": 1.61521370746903e-05, + "loss": 0.5595, + "step": 19669 + }, + { + "epoch": 3.13, + "learning_rate": 1.6149724201115594e-05, + "loss": 0.6404, + "step": 19670 + }, + { + "epoch": 3.13, + "learning_rate": 1.6147311421790963e-05, + "loss": 0.6584, + "step": 19671 + }, + { + "epoch": 3.13, + "learning_rate": 1.6144898736742085e-05, + "loss": 0.6547, + "step": 19672 + }, + { + "epoch": 3.13, + "learning_rate": 1.6142486145994655e-05, + "loss": 0.63, + "step": 19673 + }, + { + "epoch": 3.13, + "learning_rate": 1.614007364957438e-05, + "loss": 0.6178, + "step": 19674 + }, + { + "epoch": 3.13, + "learning_rate": 1.6137661247506937e-05, + "loss": 0.6004, + "step": 19675 + }, + { + "epoch": 3.13, + "learning_rate": 1.6135248939818016e-05, + "loss": 0.5867, + "step": 19676 + }, + { + "epoch": 3.13, + "learning_rate": 1.613283672653332e-05, + "loss": 0.6382, + "step": 19677 + }, + { + "epoch": 3.13, + "learning_rate": 1.6130424607678522e-05, + "loss": 0.5703, + "step": 19678 + }, + { + "epoch": 3.14, + "learning_rate": 1.6128012583279317e-05, + "loss": 0.6245, + "step": 19679 + }, + { + "epoch": 3.14, + "learning_rate": 1.6125600653361402e-05, + "loss": 0.6056, + "step": 19680 + }, + { + "epoch": 3.14, + "learning_rate": 1.6123188817950446e-05, + "loss": 0.6986, + "step": 19681 + }, + { + "epoch": 3.14, + "learning_rate": 1.6120777077072135e-05, + "loss": 0.5972, + "step": 19682 + }, + { + "epoch": 3.14, + "learning_rate": 1.6118365430752163e-05, + "loss": 0.6705, + "step": 19683 + }, + { + "epoch": 3.14, + "learning_rate": 1.611595387901621e-05, + "loss": 0.6043, + "step": 19684 + }, + { + "epoch": 3.14, + "learning_rate": 1.6113542421889946e-05, + "loss": 0.5817, + "step": 19685 + }, + { + "epoch": 3.14, + "learning_rate": 1.6111131059399063e-05, + "loss": 0.6261, + "step": 19686 + }, + { + "epoch": 3.14, + "learning_rate": 1.610871979156925e-05, + "loss": 0.5841, + "step": 19687 + }, + { + "epoch": 3.14, + "learning_rate": 1.6106308618426165e-05, + "loss": 0.6335, + "step": 19688 + }, + { + "epoch": 3.14, + "learning_rate": 1.6103897539995493e-05, + "loss": 0.6405, + "step": 19689 + }, + { + "epoch": 3.14, + "learning_rate": 1.6101486556302905e-05, + "loss": 0.5916, + "step": 19690 + }, + { + "epoch": 3.14, + "learning_rate": 1.6099075667374093e-05, + "loss": 0.639, + "step": 19691 + }, + { + "epoch": 3.14, + "learning_rate": 1.609666487323471e-05, + "loss": 0.6154, + "step": 19692 + }, + { + "epoch": 3.14, + "learning_rate": 1.6094254173910447e-05, + "loss": 0.6337, + "step": 19693 + }, + { + "epoch": 3.14, + "learning_rate": 1.6091843569426974e-05, + "loss": 0.7094, + "step": 19694 + }, + { + "epoch": 3.14, + "learning_rate": 1.6089433059809954e-05, + "loss": 0.677, + "step": 19695 + }, + { + "epoch": 3.14, + "learning_rate": 1.608702264508506e-05, + "loss": 0.6169, + "step": 19696 + }, + { + "epoch": 3.14, + "learning_rate": 1.6084612325277963e-05, + "loss": 0.6273, + "step": 19697 + }, + { + "epoch": 3.14, + "learning_rate": 1.608220210041434e-05, + "loss": 0.6889, + "step": 19698 + }, + { + "epoch": 3.14, + "learning_rate": 1.607979197051984e-05, + "loss": 0.6591, + "step": 19699 + }, + { + "epoch": 3.14, + "learning_rate": 1.607738193562015e-05, + "loss": 0.5963, + "step": 19700 + }, + { + "epoch": 3.14, + "learning_rate": 1.6074971995740923e-05, + "loss": 0.5978, + "step": 19701 + }, + { + "epoch": 3.14, + "learning_rate": 1.607256215090782e-05, + "loss": 0.5972, + "step": 19702 + }, + { + "epoch": 3.14, + "learning_rate": 1.6070152401146516e-05, + "loss": 0.6823, + "step": 19703 + }, + { + "epoch": 3.14, + "learning_rate": 1.6067742746482666e-05, + "loss": 0.7355, + "step": 19704 + }, + { + "epoch": 3.14, + "learning_rate": 1.6065333186941927e-05, + "loss": 0.6499, + "step": 19705 + }, + { + "epoch": 3.14, + "learning_rate": 1.606292372254997e-05, + "loss": 0.6791, + "step": 19706 + }, + { + "epoch": 3.14, + "learning_rate": 1.6060514353332463e-05, + "loss": 0.7048, + "step": 19707 + }, + { + "epoch": 3.14, + "learning_rate": 1.605810507931504e-05, + "loss": 0.6507, + "step": 19708 + }, + { + "epoch": 3.14, + "learning_rate": 1.6055695900523365e-05, + "loss": 0.6886, + "step": 19709 + }, + { + "epoch": 3.14, + "learning_rate": 1.6053286816983097e-05, + "loss": 0.6613, + "step": 19710 + }, + { + "epoch": 3.14, + "learning_rate": 1.6050877828719898e-05, + "loss": 0.6224, + "step": 19711 + }, + { + "epoch": 3.14, + "learning_rate": 1.6048468935759418e-05, + "loss": 0.6464, + "step": 19712 + }, + { + "epoch": 3.14, + "learning_rate": 1.60460601381273e-05, + "loss": 0.6434, + "step": 19713 + }, + { + "epoch": 3.14, + "learning_rate": 1.6043651435849215e-05, + "loss": 0.6392, + "step": 19714 + }, + { + "epoch": 3.14, + "learning_rate": 1.6041242828950803e-05, + "loss": 0.66, + "step": 19715 + }, + { + "epoch": 3.14, + "learning_rate": 1.6038834317457712e-05, + "loss": 0.6785, + "step": 19716 + }, + { + "epoch": 3.14, + "learning_rate": 1.6036425901395596e-05, + "loss": 0.6895, + "step": 19717 + }, + { + "epoch": 3.14, + "learning_rate": 1.6034017580790105e-05, + "loss": 0.6064, + "step": 19718 + }, + { + "epoch": 3.14, + "learning_rate": 1.6031609355666884e-05, + "loss": 0.5774, + "step": 19719 + }, + { + "epoch": 3.14, + "learning_rate": 1.6029201226051574e-05, + "loss": 0.5737, + "step": 19720 + }, + { + "epoch": 3.14, + "learning_rate": 1.602679319196983e-05, + "loss": 0.629, + "step": 19721 + }, + { + "epoch": 3.14, + "learning_rate": 1.602438525344729e-05, + "loss": 0.6552, + "step": 19722 + }, + { + "epoch": 3.14, + "learning_rate": 1.6021977410509592e-05, + "loss": 0.5721, + "step": 19723 + }, + { + "epoch": 3.14, + "learning_rate": 1.6019569663182386e-05, + "loss": 0.627, + "step": 19724 + }, + { + "epoch": 3.14, + "learning_rate": 1.6017162011491316e-05, + "loss": 0.6157, + "step": 19725 + }, + { + "epoch": 3.14, + "learning_rate": 1.601475445546201e-05, + "loss": 0.6139, + "step": 19726 + }, + { + "epoch": 3.14, + "learning_rate": 1.6012346995120126e-05, + "loss": 0.6895, + "step": 19727 + }, + { + "epoch": 3.14, + "learning_rate": 1.6009939630491282e-05, + "loss": 0.6598, + "step": 19728 + }, + { + "epoch": 3.14, + "learning_rate": 1.6007532361601123e-05, + "loss": 0.6493, + "step": 19729 + }, + { + "epoch": 3.14, + "learning_rate": 1.6005125188475282e-05, + "loss": 0.6182, + "step": 19730 + }, + { + "epoch": 3.14, + "learning_rate": 1.60027181111394e-05, + "loss": 0.7253, + "step": 19731 + }, + { + "epoch": 3.14, + "learning_rate": 1.6000311129619107e-05, + "loss": 0.6228, + "step": 19732 + }, + { + "epoch": 3.14, + "learning_rate": 1.599790424394004e-05, + "loss": 0.6813, + "step": 19733 + }, + { + "epoch": 3.14, + "learning_rate": 1.599549745412782e-05, + "loss": 0.7018, + "step": 19734 + }, + { + "epoch": 3.14, + "learning_rate": 1.599309076020809e-05, + "loss": 0.6515, + "step": 19735 + }, + { + "epoch": 3.14, + "learning_rate": 1.5990684162206476e-05, + "loss": 0.6421, + "step": 19736 + }, + { + "epoch": 3.14, + "learning_rate": 1.5988277660148603e-05, + "loss": 0.6567, + "step": 19737 + }, + { + "epoch": 3.14, + "learning_rate": 1.5985871254060102e-05, + "loss": 0.6465, + "step": 19738 + }, + { + "epoch": 3.14, + "learning_rate": 1.5983464943966603e-05, + "loss": 0.7177, + "step": 19739 + }, + { + "epoch": 3.14, + "learning_rate": 1.5981058729893722e-05, + "loss": 0.6824, + "step": 19740 + }, + { + "epoch": 3.14, + "learning_rate": 1.5978652611867096e-05, + "loss": 0.6676, + "step": 19741 + }, + { + "epoch": 3.15, + "learning_rate": 1.5976246589912346e-05, + "loss": 0.6772, + "step": 19742 + }, + { + "epoch": 3.15, + "learning_rate": 1.597384066405508e-05, + "loss": 0.6968, + "step": 19743 + }, + { + "epoch": 3.15, + "learning_rate": 1.5971434834320938e-05, + "loss": 0.6517, + "step": 19744 + }, + { + "epoch": 3.15, + "learning_rate": 1.5969029100735536e-05, + "loss": 0.6396, + "step": 19745 + }, + { + "epoch": 3.15, + "learning_rate": 1.5966623463324485e-05, + "loss": 0.6259, + "step": 19746 + }, + { + "epoch": 3.15, + "learning_rate": 1.596421792211342e-05, + "loss": 0.6132, + "step": 19747 + }, + { + "epoch": 3.15, + "learning_rate": 1.596181247712794e-05, + "loss": 0.6593, + "step": 19748 + }, + { + "epoch": 3.15, + "learning_rate": 1.5959407128393672e-05, + "loss": 0.6506, + "step": 19749 + }, + { + "epoch": 3.15, + "learning_rate": 1.5957001875936235e-05, + "loss": 0.6386, + "step": 19750 + }, + { + "epoch": 3.15, + "learning_rate": 1.5954596719781223e-05, + "loss": 0.6418, + "step": 19751 + }, + { + "epoch": 3.15, + "learning_rate": 1.5952191659954274e-05, + "loss": 0.6292, + "step": 19752 + }, + { + "epoch": 3.15, + "learning_rate": 1.5949786696480995e-05, + "loss": 0.6847, + "step": 19753 + }, + { + "epoch": 3.15, + "learning_rate": 1.594738182938698e-05, + "loss": 0.71, + "step": 19754 + }, + { + "epoch": 3.15, + "learning_rate": 1.5944977058697863e-05, + "loss": 0.6282, + "step": 19755 + }, + { + "epoch": 3.15, + "learning_rate": 1.594257238443924e-05, + "loss": 0.7078, + "step": 19756 + }, + { + "epoch": 3.15, + "learning_rate": 1.5940167806636723e-05, + "loss": 0.6382, + "step": 19757 + }, + { + "epoch": 3.15, + "learning_rate": 1.593776332531592e-05, + "loss": 0.6533, + "step": 19758 + }, + { + "epoch": 3.15, + "learning_rate": 1.593535894050243e-05, + "loss": 0.6386, + "step": 19759 + }, + { + "epoch": 3.15, + "learning_rate": 1.5932954652221867e-05, + "loss": 0.6176, + "step": 19760 + }, + { + "epoch": 3.15, + "learning_rate": 1.5930550460499832e-05, + "loss": 0.62, + "step": 19761 + }, + { + "epoch": 3.15, + "learning_rate": 1.5928146365361936e-05, + "loss": 0.6269, + "step": 19762 + }, + { + "epoch": 3.15, + "learning_rate": 1.592574236683376e-05, + "loss": 0.5979, + "step": 19763 + }, + { + "epoch": 3.15, + "learning_rate": 1.592333846494093e-05, + "loss": 0.6784, + "step": 19764 + }, + { + "epoch": 3.15, + "learning_rate": 1.592093465970903e-05, + "loss": 0.6358, + "step": 19765 + }, + { + "epoch": 3.15, + "learning_rate": 1.591853095116366e-05, + "loss": 0.5928, + "step": 19766 + }, + { + "epoch": 3.15, + "learning_rate": 1.591612733933044e-05, + "loss": 0.6045, + "step": 19767 + }, + { + "epoch": 3.15, + "learning_rate": 1.5913723824234926e-05, + "loss": 0.6146, + "step": 19768 + }, + { + "epoch": 3.15, + "learning_rate": 1.5911320405902748e-05, + "loss": 0.728, + "step": 19769 + }, + { + "epoch": 3.15, + "learning_rate": 1.5908917084359493e-05, + "loss": 0.7135, + "step": 19770 + }, + { + "epoch": 3.15, + "learning_rate": 1.590651385963074e-05, + "loss": 0.6284, + "step": 19771 + }, + { + "epoch": 3.15, + "learning_rate": 1.59041107317421e-05, + "loss": 0.6126, + "step": 19772 + }, + { + "epoch": 3.15, + "learning_rate": 1.5901707700719162e-05, + "loss": 0.6324, + "step": 19773 + }, + { + "epoch": 3.15, + "learning_rate": 1.5899304766587507e-05, + "loss": 0.6029, + "step": 19774 + }, + { + "epoch": 3.15, + "learning_rate": 1.5896901929372735e-05, + "loss": 0.5988, + "step": 19775 + }, + { + "epoch": 3.15, + "learning_rate": 1.589449918910043e-05, + "loss": 0.6724, + "step": 19776 + }, + { + "epoch": 3.15, + "learning_rate": 1.5892096545796175e-05, + "loss": 0.6143, + "step": 19777 + }, + { + "epoch": 3.15, + "learning_rate": 1.588969399948557e-05, + "loss": 0.6009, + "step": 19778 + }, + { + "epoch": 3.15, + "learning_rate": 1.5887291550194193e-05, + "loss": 0.59, + "step": 19779 + }, + { + "epoch": 3.15, + "learning_rate": 1.5884889197947623e-05, + "loss": 0.6809, + "step": 19780 + }, + { + "epoch": 3.15, + "learning_rate": 1.5882486942771452e-05, + "loss": 0.6306, + "step": 19781 + }, + { + "epoch": 3.15, + "learning_rate": 1.5880084784691258e-05, + "loss": 0.608, + "step": 19782 + }, + { + "epoch": 3.15, + "learning_rate": 1.5877682723732624e-05, + "loss": 0.6147, + "step": 19783 + }, + { + "epoch": 3.15, + "learning_rate": 1.5875280759921134e-05, + "loss": 0.656, + "step": 19784 + }, + { + "epoch": 3.15, + "learning_rate": 1.5872878893282364e-05, + "loss": 0.7074, + "step": 19785 + }, + { + "epoch": 3.15, + "learning_rate": 1.5870477123841886e-05, + "loss": 0.6877, + "step": 19786 + }, + { + "epoch": 3.15, + "learning_rate": 1.5868075451625292e-05, + "loss": 0.6696, + "step": 19787 + }, + { + "epoch": 3.15, + "learning_rate": 1.5865673876658154e-05, + "loss": 0.6488, + "step": 19788 + }, + { + "epoch": 3.15, + "learning_rate": 1.5863272398966038e-05, + "loss": 0.659, + "step": 19789 + }, + { + "epoch": 3.15, + "learning_rate": 1.5860871018574524e-05, + "loss": 0.6724, + "step": 19790 + }, + { + "epoch": 3.15, + "learning_rate": 1.5858469735509184e-05, + "loss": 0.6587, + "step": 19791 + }, + { + "epoch": 3.15, + "learning_rate": 1.5856068549795592e-05, + "loss": 0.6697, + "step": 19792 + }, + { + "epoch": 3.15, + "learning_rate": 1.585366746145932e-05, + "loss": 0.654, + "step": 19793 + }, + { + "epoch": 3.15, + "learning_rate": 1.585126647052593e-05, + "loss": 0.6308, + "step": 19794 + }, + { + "epoch": 3.15, + "learning_rate": 1.5848865577021006e-05, + "loss": 0.6171, + "step": 19795 + }, + { + "epoch": 3.15, + "learning_rate": 1.5846464780970106e-05, + "loss": 0.672, + "step": 19796 + }, + { + "epoch": 3.15, + "learning_rate": 1.584406408239879e-05, + "loss": 0.7096, + "step": 19797 + }, + { + "epoch": 3.15, + "learning_rate": 1.5841663481332642e-05, + "loss": 0.6731, + "step": 19798 + }, + { + "epoch": 3.15, + "learning_rate": 1.5839262977797218e-05, + "loss": 0.6426, + "step": 19799 + }, + { + "epoch": 3.15, + "learning_rate": 1.5836862571818075e-05, + "loss": 0.637, + "step": 19800 + }, + { + "epoch": 3.15, + "learning_rate": 1.583446226342079e-05, + "loss": 0.665, + "step": 19801 + }, + { + "epoch": 3.15, + "learning_rate": 1.5832062052630914e-05, + "loss": 0.6757, + "step": 19802 + }, + { + "epoch": 3.15, + "learning_rate": 1.582966193947401e-05, + "loss": 0.6443, + "step": 19803 + }, + { + "epoch": 3.16, + "learning_rate": 1.5827261923975638e-05, + "loss": 0.659, + "step": 19804 + }, + { + "epoch": 3.16, + "learning_rate": 1.5824862006161363e-05, + "loss": 0.7128, + "step": 19805 + }, + { + "epoch": 3.16, + "learning_rate": 1.582246218605673e-05, + "loss": 0.639, + "step": 19806 + }, + { + "epoch": 3.16, + "learning_rate": 1.582006246368731e-05, + "loss": 0.669, + "step": 19807 + }, + { + "epoch": 3.16, + "learning_rate": 1.5817662839078656e-05, + "loss": 0.6394, + "step": 19808 + }, + { + "epoch": 3.16, + "learning_rate": 1.5815263312256314e-05, + "loss": 0.5935, + "step": 19809 + }, + { + "epoch": 3.16, + "learning_rate": 1.5812863883245836e-05, + "loss": 0.6653, + "step": 19810 + }, + { + "epoch": 3.16, + "learning_rate": 1.581046455207278e-05, + "loss": 0.6478, + "step": 19811 + }, + { + "epoch": 3.16, + "learning_rate": 1.58080653187627e-05, + "loss": 0.6163, + "step": 19812 + }, + { + "epoch": 3.16, + "learning_rate": 1.5805666183341146e-05, + "loss": 0.6575, + "step": 19813 + }, + { + "epoch": 3.16, + "learning_rate": 1.5803267145833657e-05, + "loss": 0.5898, + "step": 19814 + }, + { + "epoch": 3.16, + "learning_rate": 1.5800868206265796e-05, + "loss": 0.5966, + "step": 19815 + }, + { + "epoch": 3.16, + "learning_rate": 1.5798469364663103e-05, + "loss": 0.6592, + "step": 19816 + }, + { + "epoch": 3.16, + "learning_rate": 1.5796070621051124e-05, + "loss": 0.6789, + "step": 19817 + }, + { + "epoch": 3.16, + "learning_rate": 1.5793671975455405e-05, + "loss": 0.6781, + "step": 19818 + }, + { + "epoch": 3.16, + "learning_rate": 1.5791273427901494e-05, + "loss": 0.5986, + "step": 19819 + }, + { + "epoch": 3.16, + "learning_rate": 1.5788874978414923e-05, + "loss": 0.6266, + "step": 19820 + }, + { + "epoch": 3.16, + "learning_rate": 1.5786476627021245e-05, + "loss": 0.6188, + "step": 19821 + }, + { + "epoch": 3.16, + "learning_rate": 1.5784078373746e-05, + "loss": 0.6706, + "step": 19822 + }, + { + "epoch": 3.16, + "learning_rate": 1.5781680218614718e-05, + "loss": 0.7299, + "step": 19823 + }, + { + "epoch": 3.16, + "learning_rate": 1.5779282161652952e-05, + "loss": 0.6095, + "step": 19824 + }, + { + "epoch": 3.16, + "learning_rate": 1.577688420288623e-05, + "loss": 0.5916, + "step": 19825 + }, + { + "epoch": 3.16, + "learning_rate": 1.5774486342340093e-05, + "loss": 0.6781, + "step": 19826 + }, + { + "epoch": 3.16, + "learning_rate": 1.5772088580040073e-05, + "loss": 0.709, + "step": 19827 + }, + { + "epoch": 3.16, + "learning_rate": 1.5769690916011717e-05, + "loss": 0.6214, + "step": 19828 + }, + { + "epoch": 3.16, + "learning_rate": 1.5767293350280543e-05, + "loss": 0.674, + "step": 19829 + }, + { + "epoch": 3.16, + "learning_rate": 1.576489588287209e-05, + "loss": 0.6374, + "step": 19830 + }, + { + "epoch": 3.16, + "learning_rate": 1.576249851381188e-05, + "loss": 0.6543, + "step": 19831 + }, + { + "epoch": 3.16, + "learning_rate": 1.5760101243125464e-05, + "loss": 0.6114, + "step": 19832 + }, + { + "epoch": 3.16, + "learning_rate": 1.5757704070838356e-05, + "loss": 0.6212, + "step": 19833 + }, + { + "epoch": 3.16, + "learning_rate": 1.5755306996976083e-05, + "loss": 0.6443, + "step": 19834 + }, + { + "epoch": 3.16, + "learning_rate": 1.5752910021564185e-05, + "loss": 0.6267, + "step": 19835 + }, + { + "epoch": 3.16, + "learning_rate": 1.5750513144628182e-05, + "loss": 0.6047, + "step": 19836 + }, + { + "epoch": 3.16, + "learning_rate": 1.574811636619359e-05, + "loss": 0.5865, + "step": 19837 + }, + { + "epoch": 3.16, + "learning_rate": 1.5745719686285948e-05, + "loss": 0.5766, + "step": 19838 + }, + { + "epoch": 3.16, + "learning_rate": 1.5743323104930775e-05, + "loss": 0.6371, + "step": 19839 + }, + { + "epoch": 3.16, + "learning_rate": 1.574092662215359e-05, + "loss": 0.5958, + "step": 19840 + }, + { + "epoch": 3.16, + "learning_rate": 1.573853023797991e-05, + "loss": 0.7081, + "step": 19841 + }, + { + "epoch": 3.16, + "learning_rate": 1.5736133952435264e-05, + "loss": 0.7376, + "step": 19842 + }, + { + "epoch": 3.16, + "learning_rate": 1.573373776554517e-05, + "loss": 0.6054, + "step": 19843 + }, + { + "epoch": 3.16, + "learning_rate": 1.5731341677335136e-05, + "loss": 0.6332, + "step": 19844 + }, + { + "epoch": 3.16, + "learning_rate": 1.572894568783069e-05, + "loss": 0.6578, + "step": 19845 + }, + { + "epoch": 3.16, + "learning_rate": 1.5726549797057343e-05, + "loss": 0.6514, + "step": 19846 + }, + { + "epoch": 3.16, + "learning_rate": 1.572415400504061e-05, + "loss": 0.7511, + "step": 19847 + }, + { + "epoch": 3.16, + "learning_rate": 1.5721758311806012e-05, + "loss": 0.6291, + "step": 19848 + }, + { + "epoch": 3.16, + "learning_rate": 1.571936271737905e-05, + "loss": 0.6493, + "step": 19849 + }, + { + "epoch": 3.16, + "learning_rate": 1.5716967221785245e-05, + "loss": 0.586, + "step": 19850 + }, + { + "epoch": 3.16, + "learning_rate": 1.5714571825050102e-05, + "loss": 0.6278, + "step": 19851 + }, + { + "epoch": 3.16, + "learning_rate": 1.571217652719912e-05, + "loss": 0.63, + "step": 19852 + }, + { + "epoch": 3.16, + "learning_rate": 1.5709781328257832e-05, + "loss": 0.6381, + "step": 19853 + }, + { + "epoch": 3.16, + "learning_rate": 1.5707386228251727e-05, + "loss": 0.6831, + "step": 19854 + }, + { + "epoch": 3.16, + "learning_rate": 1.5704991227206317e-05, + "loss": 0.699, + "step": 19855 + }, + { + "epoch": 3.16, + "learning_rate": 1.5702596325147105e-05, + "loss": 0.5871, + "step": 19856 + }, + { + "epoch": 3.16, + "learning_rate": 1.5700201522099605e-05, + "loss": 0.6978, + "step": 19857 + }, + { + "epoch": 3.16, + "learning_rate": 1.5697806818089307e-05, + "loss": 0.6538, + "step": 19858 + }, + { + "epoch": 3.16, + "learning_rate": 1.5695412213141718e-05, + "loss": 0.6908, + "step": 19859 + }, + { + "epoch": 3.16, + "learning_rate": 1.5693017707282347e-05, + "loss": 0.7007, + "step": 19860 + }, + { + "epoch": 3.16, + "learning_rate": 1.5690623300536676e-05, + "loss": 0.621, + "step": 19861 + }, + { + "epoch": 3.16, + "learning_rate": 1.5688228992930225e-05, + "loss": 0.6579, + "step": 19862 + }, + { + "epoch": 3.16, + "learning_rate": 1.568583478448848e-05, + "loss": 0.6414, + "step": 19863 + }, + { + "epoch": 3.16, + "learning_rate": 1.5683440675236934e-05, + "loss": 0.6606, + "step": 19864 + }, + { + "epoch": 3.16, + "learning_rate": 1.5681046665201093e-05, + "loss": 0.6066, + "step": 19865 + }, + { + "epoch": 3.16, + "learning_rate": 1.567865275440645e-05, + "loss": 0.6239, + "step": 19866 + }, + { + "epoch": 3.17, + "learning_rate": 1.5676258942878492e-05, + "loss": 0.6348, + "step": 19867 + }, + { + "epoch": 3.17, + "learning_rate": 1.5673865230642725e-05, + "loss": 0.6501, + "step": 19868 + }, + { + "epoch": 3.17, + "learning_rate": 1.567147161772462e-05, + "loss": 0.635, + "step": 19869 + }, + { + "epoch": 3.17, + "learning_rate": 1.5669078104149686e-05, + "loss": 0.6105, + "step": 19870 + }, + { + "epoch": 3.17, + "learning_rate": 1.56666846899434e-05, + "loss": 0.6209, + "step": 19871 + }, + { + "epoch": 3.17, + "learning_rate": 1.5664291375131255e-05, + "loss": 0.6504, + "step": 19872 + }, + { + "epoch": 3.17, + "learning_rate": 1.5661898159738743e-05, + "loss": 0.7228, + "step": 19873 + }, + { + "epoch": 3.17, + "learning_rate": 1.5659505043791344e-05, + "loss": 0.6257, + "step": 19874 + }, + { + "epoch": 3.17, + "learning_rate": 1.565711202731454e-05, + "loss": 0.6046, + "step": 19875 + }, + { + "epoch": 3.17, + "learning_rate": 1.565471911033383e-05, + "loss": 0.6803, + "step": 19876 + }, + { + "epoch": 3.17, + "learning_rate": 1.5652326292874686e-05, + "loss": 0.6185, + "step": 19877 + }, + { + "epoch": 3.17, + "learning_rate": 1.5649933574962588e-05, + "loss": 0.6584, + "step": 19878 + }, + { + "epoch": 3.17, + "learning_rate": 1.5647540956623024e-05, + "loss": 0.64, + "step": 19879 + }, + { + "epoch": 3.17, + "learning_rate": 1.564514843788147e-05, + "loss": 0.6481, + "step": 19880 + }, + { + "epoch": 3.17, + "learning_rate": 1.56427560187634e-05, + "loss": 0.6478, + "step": 19881 + }, + { + "epoch": 3.17, + "learning_rate": 1.5640363699294307e-05, + "loss": 0.6369, + "step": 19882 + }, + { + "epoch": 3.17, + "learning_rate": 1.5637971479499655e-05, + "loss": 0.6373, + "step": 19883 + }, + { + "epoch": 3.17, + "learning_rate": 1.5635579359404918e-05, + "loss": 0.6192, + "step": 19884 + }, + { + "epoch": 3.17, + "learning_rate": 1.5633187339035582e-05, + "loss": 0.6401, + "step": 19885 + }, + { + "epoch": 3.17, + "learning_rate": 1.5630795418417114e-05, + "loss": 0.6496, + "step": 19886 + }, + { + "epoch": 3.17, + "learning_rate": 1.562840359757498e-05, + "loss": 0.5732, + "step": 19887 + }, + { + "epoch": 3.17, + "learning_rate": 1.5626011876534664e-05, + "loss": 0.6543, + "step": 19888 + }, + { + "epoch": 3.17, + "learning_rate": 1.5623620255321635e-05, + "loss": 0.6511, + "step": 19889 + }, + { + "epoch": 3.17, + "learning_rate": 1.5621228733961352e-05, + "loss": 0.6866, + "step": 19890 + }, + { + "epoch": 3.17, + "learning_rate": 1.561883731247929e-05, + "loss": 0.6573, + "step": 19891 + }, + { + "epoch": 3.17, + "learning_rate": 1.561644599090091e-05, + "loss": 0.5774, + "step": 19892 + }, + { + "epoch": 3.17, + "learning_rate": 1.561405476925169e-05, + "loss": 0.6555, + "step": 19893 + }, + { + "epoch": 3.17, + "learning_rate": 1.5611663647557085e-05, + "loss": 0.7227, + "step": 19894 + }, + { + "epoch": 3.17, + "learning_rate": 1.5609272625842558e-05, + "loss": 0.6003, + "step": 19895 + }, + { + "epoch": 3.17, + "learning_rate": 1.5606881704133586e-05, + "loss": 0.5848, + "step": 19896 + }, + { + "epoch": 3.17, + "learning_rate": 1.5604490882455616e-05, + "loss": 0.6423, + "step": 19897 + }, + { + "epoch": 3.17, + "learning_rate": 1.560210016083411e-05, + "loss": 0.6805, + "step": 19898 + }, + { + "epoch": 3.17, + "learning_rate": 1.5599709539294538e-05, + "loss": 0.6506, + "step": 19899 + }, + { + "epoch": 3.17, + "learning_rate": 1.559731901786235e-05, + "loss": 0.6584, + "step": 19900 + }, + { + "epoch": 3.17, + "learning_rate": 1.5594928596563003e-05, + "loss": 0.6603, + "step": 19901 + }, + { + "epoch": 3.17, + "learning_rate": 1.5592538275421962e-05, + "loss": 0.6484, + "step": 19902 + }, + { + "epoch": 3.17, + "learning_rate": 1.5590148054464676e-05, + "loss": 0.6864, + "step": 19903 + }, + { + "epoch": 3.17, + "learning_rate": 1.5587757933716597e-05, + "loss": 0.7108, + "step": 19904 + }, + { + "epoch": 3.17, + "learning_rate": 1.5585367913203183e-05, + "loss": 0.6028, + "step": 19905 + }, + { + "epoch": 3.17, + "learning_rate": 1.5582977992949887e-05, + "loss": 0.61, + "step": 19906 + }, + { + "epoch": 3.17, + "learning_rate": 1.558058817298215e-05, + "loss": 0.5746, + "step": 19907 + }, + { + "epoch": 3.17, + "learning_rate": 1.5578198453325444e-05, + "loss": 0.6521, + "step": 19908 + }, + { + "epoch": 3.17, + "learning_rate": 1.55758088340052e-05, + "loss": 0.6611, + "step": 19909 + }, + { + "epoch": 3.17, + "learning_rate": 1.5573419315046872e-05, + "loss": 0.7214, + "step": 19910 + }, + { + "epoch": 3.17, + "learning_rate": 1.5571029896475903e-05, + "loss": 0.6584, + "step": 19911 + }, + { + "epoch": 3.17, + "learning_rate": 1.5568640578317733e-05, + "loss": 0.6768, + "step": 19912 + }, + { + "epoch": 3.17, + "learning_rate": 1.5566251360597822e-05, + "loss": 0.6152, + "step": 19913 + }, + { + "epoch": 3.17, + "learning_rate": 1.556386224334161e-05, + "loss": 0.6749, + "step": 19914 + }, + { + "epoch": 3.17, + "learning_rate": 1.5561473226574523e-05, + "loss": 0.6114, + "step": 19915 + }, + { + "epoch": 3.17, + "learning_rate": 1.555908431032203e-05, + "loss": 0.6454, + "step": 19916 + }, + { + "epoch": 3.17, + "learning_rate": 1.555669549460955e-05, + "loss": 0.6187, + "step": 19917 + }, + { + "epoch": 3.17, + "learning_rate": 1.555430677946253e-05, + "loss": 0.5391, + "step": 19918 + }, + { + "epoch": 3.17, + "learning_rate": 1.5551918164906414e-05, + "loss": 0.5883, + "step": 19919 + }, + { + "epoch": 3.17, + "learning_rate": 1.5549529650966633e-05, + "loss": 0.6195, + "step": 19920 + }, + { + "epoch": 3.17, + "learning_rate": 1.5547141237668613e-05, + "loss": 0.6552, + "step": 19921 + }, + { + "epoch": 3.17, + "learning_rate": 1.554475292503781e-05, + "loss": 0.5483, + "step": 19922 + }, + { + "epoch": 3.17, + "learning_rate": 1.5542364713099647e-05, + "loss": 0.7283, + "step": 19923 + }, + { + "epoch": 3.17, + "learning_rate": 1.5539976601879554e-05, + "loss": 0.6758, + "step": 19924 + }, + { + "epoch": 3.17, + "learning_rate": 1.553758859140297e-05, + "loss": 0.6514, + "step": 19925 + }, + { + "epoch": 3.17, + "learning_rate": 1.553520068169532e-05, + "loss": 0.6772, + "step": 19926 + }, + { + "epoch": 3.17, + "learning_rate": 1.5532812872782038e-05, + "loss": 0.6222, + "step": 19927 + }, + { + "epoch": 3.17, + "learning_rate": 1.553042516468855e-05, + "loss": 0.5741, + "step": 19928 + }, + { + "epoch": 3.17, + "learning_rate": 1.5528037557440296e-05, + "loss": 0.6696, + "step": 19929 + }, + { + "epoch": 3.18, + "learning_rate": 1.5525650051062683e-05, + "loss": 0.6174, + "step": 19930 + }, + { + "epoch": 3.18, + "learning_rate": 1.5523262645581143e-05, + "loss": 0.6875, + "step": 19931 + }, + { + "epoch": 3.18, + "learning_rate": 1.5520875341021103e-05, + "loss": 0.6646, + "step": 19932 + }, + { + "epoch": 3.18, + "learning_rate": 1.5518488137407983e-05, + "loss": 0.6081, + "step": 19933 + }, + { + "epoch": 3.18, + "learning_rate": 1.5516101034767214e-05, + "loss": 0.6171, + "step": 19934 + }, + { + "epoch": 3.18, + "learning_rate": 1.5513714033124204e-05, + "loss": 0.5968, + "step": 19935 + }, + { + "epoch": 3.18, + "learning_rate": 1.5511327132504385e-05, + "loss": 0.6913, + "step": 19936 + }, + { + "epoch": 3.18, + "learning_rate": 1.5508940332933174e-05, + "loss": 0.65, + "step": 19937 + }, + { + "epoch": 3.18, + "learning_rate": 1.5506553634435978e-05, + "loss": 0.5819, + "step": 19938 + }, + { + "epoch": 3.18, + "learning_rate": 1.550416703703823e-05, + "loss": 0.6023, + "step": 19939 + }, + { + "epoch": 3.18, + "learning_rate": 1.5501780540765335e-05, + "loss": 0.6781, + "step": 19940 + }, + { + "epoch": 3.18, + "learning_rate": 1.5499394145642707e-05, + "loss": 0.567, + "step": 19941 + }, + { + "epoch": 3.18, + "learning_rate": 1.549700785169577e-05, + "loss": 0.5401, + "step": 19942 + }, + { + "epoch": 3.18, + "learning_rate": 1.5494621658949925e-05, + "loss": 0.5764, + "step": 19943 + }, + { + "epoch": 3.18, + "learning_rate": 1.549223556743059e-05, + "loss": 0.584, + "step": 19944 + }, + { + "epoch": 3.18, + "learning_rate": 1.5489849577163174e-05, + "loss": 0.6016, + "step": 19945 + }, + { + "epoch": 3.18, + "learning_rate": 1.5487463688173086e-05, + "loss": 0.634, + "step": 19946 + }, + { + "epoch": 3.18, + "learning_rate": 1.548507790048574e-05, + "loss": 0.6495, + "step": 19947 + }, + { + "epoch": 3.18, + "learning_rate": 1.548269221412653e-05, + "loss": 0.5966, + "step": 19948 + }, + { + "epoch": 3.18, + "learning_rate": 1.548030662912088e-05, + "loss": 0.6165, + "step": 19949 + }, + { + "epoch": 3.18, + "learning_rate": 1.547792114549418e-05, + "loss": 0.6351, + "step": 19950 + }, + { + "epoch": 3.18, + "learning_rate": 1.5475535763271838e-05, + "loss": 0.6356, + "step": 19951 + }, + { + "epoch": 3.18, + "learning_rate": 1.547315048247925e-05, + "loss": 0.6614, + "step": 19952 + }, + { + "epoch": 3.18, + "learning_rate": 1.547076530314183e-05, + "loss": 0.7017, + "step": 19953 + }, + { + "epoch": 3.18, + "learning_rate": 1.5468380225284978e-05, + "loss": 0.6515, + "step": 19954 + }, + { + "epoch": 3.18, + "learning_rate": 1.5465995248934083e-05, + "loss": 0.6977, + "step": 19955 + }, + { + "epoch": 3.18, + "learning_rate": 1.546361037411456e-05, + "loss": 0.5805, + "step": 19956 + }, + { + "epoch": 3.18, + "learning_rate": 1.546122560085179e-05, + "loss": 0.6868, + "step": 19957 + }, + { + "epoch": 3.18, + "learning_rate": 1.545884092917118e-05, + "loss": 0.5907, + "step": 19958 + }, + { + "epoch": 3.18, + "learning_rate": 1.5456456359098117e-05, + "loss": 0.6038, + "step": 19959 + }, + { + "epoch": 3.18, + "learning_rate": 1.5454071890658006e-05, + "loss": 0.6152, + "step": 19960 + }, + { + "epoch": 3.18, + "learning_rate": 1.5451687523876228e-05, + "loss": 0.6641, + "step": 19961 + }, + { + "epoch": 3.18, + "learning_rate": 1.544930325877818e-05, + "loss": 0.6122, + "step": 19962 + }, + { + "epoch": 3.18, + "learning_rate": 1.544691909538926e-05, + "loss": 0.6506, + "step": 19963 + }, + { + "epoch": 3.18, + "learning_rate": 1.5444535033734852e-05, + "loss": 0.5814, + "step": 19964 + }, + { + "epoch": 3.18, + "learning_rate": 1.544215107384034e-05, + "loss": 0.6482, + "step": 19965 + }, + { + "epoch": 3.18, + "learning_rate": 1.5439767215731117e-05, + "loss": 0.6592, + "step": 19966 + }, + { + "epoch": 3.18, + "learning_rate": 1.543738345943257e-05, + "loss": 0.7157, + "step": 19967 + }, + { + "epoch": 3.18, + "learning_rate": 1.5434999804970078e-05, + "loss": 0.6824, + "step": 19968 + }, + { + "epoch": 3.18, + "learning_rate": 1.5432616252369044e-05, + "loss": 0.6503, + "step": 19969 + }, + { + "epoch": 3.18, + "learning_rate": 1.5430232801654832e-05, + "loss": 0.6344, + "step": 19970 + }, + { + "epoch": 3.18, + "learning_rate": 1.5427849452852832e-05, + "loss": 0.7808, + "step": 19971 + }, + { + "epoch": 3.18, + "learning_rate": 1.542546620598842e-05, + "loss": 0.6414, + "step": 19972 + }, + { + "epoch": 3.18, + "learning_rate": 1.5423083061086978e-05, + "loss": 0.7493, + "step": 19973 + }, + { + "epoch": 3.18, + "learning_rate": 1.5420700018173894e-05, + "loss": 0.7108, + "step": 19974 + }, + { + "epoch": 3.18, + "learning_rate": 1.541831707727454e-05, + "loss": 0.6928, + "step": 19975 + }, + { + "epoch": 3.18, + "learning_rate": 1.541593423841428e-05, + "loss": 0.6694, + "step": 19976 + }, + { + "epoch": 3.18, + "learning_rate": 1.5413551501618512e-05, + "loss": 0.6102, + "step": 19977 + }, + { + "epoch": 3.18, + "learning_rate": 1.5411168866912602e-05, + "loss": 0.616, + "step": 19978 + }, + { + "epoch": 3.18, + "learning_rate": 1.540878633432191e-05, + "loss": 0.5874, + "step": 19979 + }, + { + "epoch": 3.18, + "learning_rate": 1.5406403903871833e-05, + "loss": 0.6734, + "step": 19980 + }, + { + "epoch": 3.18, + "learning_rate": 1.5404021575587725e-05, + "loss": 0.6899, + "step": 19981 + }, + { + "epoch": 3.18, + "learning_rate": 1.540163934949496e-05, + "loss": 0.672, + "step": 19982 + }, + { + "epoch": 3.18, + "learning_rate": 1.5399257225618914e-05, + "loss": 0.6488, + "step": 19983 + }, + { + "epoch": 3.18, + "learning_rate": 1.5396875203984944e-05, + "loss": 0.6553, + "step": 19984 + }, + { + "epoch": 3.18, + "learning_rate": 1.5394493284618423e-05, + "loss": 0.6212, + "step": 19985 + }, + { + "epoch": 3.18, + "learning_rate": 1.539211146754472e-05, + "loss": 0.63, + "step": 19986 + }, + { + "epoch": 3.18, + "learning_rate": 1.5389729752789197e-05, + "loss": 0.6068, + "step": 19987 + }, + { + "epoch": 3.18, + "learning_rate": 1.538734814037721e-05, + "loss": 0.6379, + "step": 19988 + }, + { + "epoch": 3.18, + "learning_rate": 1.538496663033414e-05, + "loss": 0.593, + "step": 19989 + }, + { + "epoch": 3.18, + "learning_rate": 1.538258522268534e-05, + "loss": 0.6774, + "step": 19990 + }, + { + "epoch": 3.18, + "learning_rate": 1.5380203917456165e-05, + "loss": 0.6463, + "step": 19991 + }, + { + "epoch": 3.18, + "learning_rate": 1.5377822714671975e-05, + "loss": 0.6227, + "step": 19992 + }, + { + "epoch": 3.19, + "learning_rate": 1.5375441614358127e-05, + "loss": 0.6111, + "step": 19993 + }, + { + "epoch": 3.19, + "learning_rate": 1.5373060616539993e-05, + "loss": 0.652, + "step": 19994 + }, + { + "epoch": 3.19, + "learning_rate": 1.537067972124291e-05, + "loss": 0.63, + "step": 19995 + }, + { + "epoch": 3.19, + "learning_rate": 1.5368298928492242e-05, + "loss": 0.6507, + "step": 19996 + }, + { + "epoch": 3.19, + "learning_rate": 1.5365918238313347e-05, + "loss": 0.646, + "step": 19997 + }, + { + "epoch": 3.19, + "learning_rate": 1.5363537650731575e-05, + "loss": 0.6402, + "step": 19998 + }, + { + "epoch": 3.19, + "learning_rate": 1.536115716577227e-05, + "loss": 0.6427, + "step": 19999 + }, + { + "epoch": 3.19, + "learning_rate": 1.5358776783460794e-05, + "loss": 0.6805, + "step": 20000 + }, + { + "epoch": 3.19, + "learning_rate": 1.535639650382249e-05, + "loss": 0.72, + "step": 20001 + }, + { + "epoch": 3.19, + "learning_rate": 1.535401632688271e-05, + "loss": 0.638, + "step": 20002 + }, + { + "epoch": 3.19, + "learning_rate": 1.53516362526668e-05, + "loss": 0.6833, + "step": 20003 + }, + { + "epoch": 3.19, + "learning_rate": 1.5349256281200107e-05, + "loss": 0.697, + "step": 20004 + }, + { + "epoch": 3.19, + "learning_rate": 1.534687641250797e-05, + "loss": 0.651, + "step": 20005 + }, + { + "epoch": 3.19, + "learning_rate": 1.5344496646615744e-05, + "loss": 0.6866, + "step": 20006 + }, + { + "epoch": 3.19, + "learning_rate": 1.5342116983548767e-05, + "loss": 0.6149, + "step": 20007 + }, + { + "epoch": 3.19, + "learning_rate": 1.5339737423332375e-05, + "loss": 0.6402, + "step": 20008 + }, + { + "epoch": 3.19, + "learning_rate": 1.5337357965991924e-05, + "loss": 0.6153, + "step": 20009 + }, + { + "epoch": 3.19, + "learning_rate": 1.533497861155274e-05, + "loss": 0.6213, + "step": 20010 + }, + { + "epoch": 3.19, + "learning_rate": 1.533259936004017e-05, + "loss": 0.5729, + "step": 20011 + }, + { + "epoch": 3.19, + "learning_rate": 1.5330220211479545e-05, + "loss": 0.6178, + "step": 20012 + }, + { + "epoch": 3.19, + "learning_rate": 1.5327841165896198e-05, + "loss": 0.7448, + "step": 20013 + }, + { + "epoch": 3.19, + "learning_rate": 1.5325462223315473e-05, + "loss": 0.6589, + "step": 20014 + }, + { + "epoch": 3.19, + "learning_rate": 1.5323083383762706e-05, + "loss": 0.6063, + "step": 20015 + }, + { + "epoch": 3.19, + "learning_rate": 1.532070464726322e-05, + "loss": 0.6547, + "step": 20016 + }, + { + "epoch": 3.19, + "learning_rate": 1.5318326013842358e-05, + "loss": 0.5982, + "step": 20017 + }, + { + "epoch": 3.19, + "learning_rate": 1.5315947483525443e-05, + "loss": 0.6436, + "step": 20018 + }, + { + "epoch": 3.19, + "learning_rate": 1.5313569056337805e-05, + "loss": 0.6779, + "step": 20019 + }, + { + "epoch": 3.19, + "learning_rate": 1.5311190732304784e-05, + "loss": 0.694, + "step": 20020 + }, + { + "epoch": 3.19, + "learning_rate": 1.5308812511451698e-05, + "loss": 0.6708, + "step": 20021 + }, + { + "epoch": 3.19, + "learning_rate": 1.530643439380387e-05, + "loss": 0.6387, + "step": 20022 + }, + { + "epoch": 3.19, + "learning_rate": 1.5304056379386634e-05, + "loss": 0.6659, + "step": 20023 + }, + { + "epoch": 3.19, + "learning_rate": 1.530167846822531e-05, + "loss": 0.6003, + "step": 20024 + }, + { + "epoch": 3.19, + "learning_rate": 1.5299300660345216e-05, + "loss": 0.6856, + "step": 20025 + }, + { + "epoch": 3.19, + "learning_rate": 1.5296922955771692e-05, + "loss": 0.6591, + "step": 20026 + }, + { + "epoch": 3.19, + "learning_rate": 1.5294545354530042e-05, + "loss": 0.6442, + "step": 20027 + }, + { + "epoch": 3.19, + "learning_rate": 1.529216785664559e-05, + "loss": 0.6716, + "step": 20028 + }, + { + "epoch": 3.19, + "learning_rate": 1.528979046214366e-05, + "loss": 0.6182, + "step": 20029 + }, + { + "epoch": 3.19, + "learning_rate": 1.5287413171049573e-05, + "loss": 0.5965, + "step": 20030 + }, + { + "epoch": 3.19, + "learning_rate": 1.5285035983388632e-05, + "loss": 0.6295, + "step": 20031 + }, + { + "epoch": 3.19, + "learning_rate": 1.528265889918616e-05, + "loss": 0.666, + "step": 20032 + }, + { + "epoch": 3.19, + "learning_rate": 1.5280281918467466e-05, + "loss": 0.6396, + "step": 20033 + }, + { + "epoch": 3.19, + "learning_rate": 1.5277905041257872e-05, + "loss": 0.6632, + "step": 20034 + }, + { + "epoch": 3.19, + "learning_rate": 1.527552826758269e-05, + "loss": 0.6514, + "step": 20035 + }, + { + "epoch": 3.19, + "learning_rate": 1.5273151597467223e-05, + "loss": 0.5964, + "step": 20036 + }, + { + "epoch": 3.19, + "learning_rate": 1.527077503093679e-05, + "loss": 0.6029, + "step": 20037 + }, + { + "epoch": 3.19, + "learning_rate": 1.52683985680167e-05, + "loss": 0.6076, + "step": 20038 + }, + { + "epoch": 3.19, + "learning_rate": 1.5266022208732245e-05, + "loss": 0.6114, + "step": 20039 + }, + { + "epoch": 3.19, + "learning_rate": 1.526364595310875e-05, + "loss": 0.648, + "step": 20040 + }, + { + "epoch": 3.19, + "learning_rate": 1.5261269801171518e-05, + "loss": 0.5911, + "step": 20041 + }, + { + "epoch": 3.19, + "learning_rate": 1.5258893752945847e-05, + "loss": 0.6121, + "step": 20042 + }, + { + "epoch": 3.19, + "learning_rate": 1.5256517808457044e-05, + "loss": 0.6466, + "step": 20043 + }, + { + "epoch": 3.19, + "learning_rate": 1.5254141967730411e-05, + "loss": 0.7164, + "step": 20044 + }, + { + "epoch": 3.19, + "learning_rate": 1.5251766230791247e-05, + "loss": 0.7766, + "step": 20045 + }, + { + "epoch": 3.19, + "learning_rate": 1.524939059766486e-05, + "loss": 0.6123, + "step": 20046 + }, + { + "epoch": 3.19, + "learning_rate": 1.524701506837654e-05, + "loss": 0.6128, + "step": 20047 + }, + { + "epoch": 3.19, + "learning_rate": 1.5244639642951587e-05, + "loss": 0.684, + "step": 20048 + }, + { + "epoch": 3.19, + "learning_rate": 1.52422643214153e-05, + "loss": 0.6642, + "step": 20049 + }, + { + "epoch": 3.19, + "learning_rate": 1.523988910379298e-05, + "loss": 0.7111, + "step": 20050 + }, + { + "epoch": 3.19, + "learning_rate": 1.523751399010991e-05, + "loss": 0.6397, + "step": 20051 + }, + { + "epoch": 3.19, + "learning_rate": 1.5235138980391391e-05, + "loss": 0.693, + "step": 20052 + }, + { + "epoch": 3.19, + "learning_rate": 1.523276407466271e-05, + "loss": 0.6938, + "step": 20053 + }, + { + "epoch": 3.19, + "learning_rate": 1.5230389272949163e-05, + "loss": 0.6453, + "step": 20054 + }, + { + "epoch": 3.19, + "learning_rate": 1.522801457527604e-05, + "loss": 0.5691, + "step": 20055 + }, + { + "epoch": 3.2, + "learning_rate": 1.5225639981668626e-05, + "loss": 0.7168, + "step": 20056 + }, + { + "epoch": 3.2, + "learning_rate": 1.5223265492152217e-05, + "loss": 0.5949, + "step": 20057 + }, + { + "epoch": 3.2, + "learning_rate": 1.5220891106752092e-05, + "loss": 0.5756, + "step": 20058 + }, + { + "epoch": 3.2, + "learning_rate": 1.5218516825493533e-05, + "loss": 0.6772, + "step": 20059 + }, + { + "epoch": 3.2, + "learning_rate": 1.5216142648401844e-05, + "loss": 0.7168, + "step": 20060 + }, + { + "epoch": 3.2, + "learning_rate": 1.5213768575502291e-05, + "loss": 0.6143, + "step": 20061 + }, + { + "epoch": 3.2, + "learning_rate": 1.5211394606820156e-05, + "loss": 0.6485, + "step": 20062 + }, + { + "epoch": 3.2, + "learning_rate": 1.520902074238073e-05, + "loss": 0.6208, + "step": 20063 + }, + { + "epoch": 3.2, + "learning_rate": 1.520664698220929e-05, + "loss": 0.6349, + "step": 20064 + }, + { + "epoch": 3.2, + "learning_rate": 1.5204273326331114e-05, + "loss": 0.6311, + "step": 20065 + }, + { + "epoch": 3.2, + "learning_rate": 1.5201899774771477e-05, + "loss": 0.7185, + "step": 20066 + }, + { + "epoch": 3.2, + "learning_rate": 1.5199526327555664e-05, + "loss": 0.6731, + "step": 20067 + }, + { + "epoch": 3.2, + "learning_rate": 1.519715298470894e-05, + "loss": 0.6075, + "step": 20068 + }, + { + "epoch": 3.2, + "learning_rate": 1.5194779746256586e-05, + "loss": 0.6129, + "step": 20069 + }, + { + "epoch": 3.2, + "learning_rate": 1.5192406612223881e-05, + "loss": 0.6182, + "step": 20070 + }, + { + "epoch": 3.2, + "learning_rate": 1.5190033582636087e-05, + "loss": 0.6495, + "step": 20071 + }, + { + "epoch": 3.2, + "learning_rate": 1.5187660657518483e-05, + "loss": 0.6238, + "step": 20072 + }, + { + "epoch": 3.2, + "learning_rate": 1.5185287836896327e-05, + "loss": 0.6445, + "step": 20073 + }, + { + "epoch": 3.2, + "learning_rate": 1.5182915120794905e-05, + "loss": 0.6327, + "step": 20074 + }, + { + "epoch": 3.2, + "learning_rate": 1.5180542509239474e-05, + "loss": 0.5959, + "step": 20075 + }, + { + "epoch": 3.2, + "learning_rate": 1.5178170002255304e-05, + "loss": 0.5985, + "step": 20076 + }, + { + "epoch": 3.2, + "learning_rate": 1.5175797599867656e-05, + "loss": 0.6174, + "step": 20077 + }, + { + "epoch": 3.2, + "learning_rate": 1.5173425302101805e-05, + "loss": 0.6749, + "step": 20078 + }, + { + "epoch": 3.2, + "learning_rate": 1.5171053108983007e-05, + "loss": 0.572, + "step": 20079 + }, + { + "epoch": 3.2, + "learning_rate": 1.5168681020536525e-05, + "loss": 0.5901, + "step": 20080 + }, + { + "epoch": 3.2, + "learning_rate": 1.516630903678762e-05, + "loss": 0.6366, + "step": 20081 + }, + { + "epoch": 3.2, + "learning_rate": 1.516393715776156e-05, + "loss": 0.6245, + "step": 20082 + }, + { + "epoch": 3.2, + "learning_rate": 1.5161565383483589e-05, + "loss": 0.6689, + "step": 20083 + }, + { + "epoch": 3.2, + "learning_rate": 1.5159193713978979e-05, + "loss": 0.6802, + "step": 20084 + }, + { + "epoch": 3.2, + "learning_rate": 1.5156822149272981e-05, + "loss": 0.7887, + "step": 20085 + }, + { + "epoch": 3.2, + "learning_rate": 1.5154450689390848e-05, + "loss": 0.6172, + "step": 20086 + }, + { + "epoch": 3.2, + "learning_rate": 1.5152079334357841e-05, + "loss": 0.5936, + "step": 20087 + }, + { + "epoch": 3.2, + "learning_rate": 1.514970808419921e-05, + "loss": 0.654, + "step": 20088 + }, + { + "epoch": 3.2, + "learning_rate": 1.5147336938940205e-05, + "loss": 0.7323, + "step": 20089 + }, + { + "epoch": 3.2, + "learning_rate": 1.5144965898606084e-05, + "loss": 0.6591, + "step": 20090 + }, + { + "epoch": 3.2, + "learning_rate": 1.5142594963222095e-05, + "loss": 0.6856, + "step": 20091 + }, + { + "epoch": 3.2, + "learning_rate": 1.5140224132813486e-05, + "loss": 0.6698, + "step": 20092 + }, + { + "epoch": 3.2, + "learning_rate": 1.51378534074055e-05, + "loss": 0.6563, + "step": 20093 + }, + { + "epoch": 3.2, + "learning_rate": 1.513548278702338e-05, + "loss": 0.6644, + "step": 20094 + }, + { + "epoch": 3.2, + "learning_rate": 1.513311227169239e-05, + "loss": 0.7023, + "step": 20095 + }, + { + "epoch": 3.2, + "learning_rate": 1.5130741861437762e-05, + "loss": 0.6665, + "step": 20096 + }, + { + "epoch": 3.2, + "learning_rate": 1.5128371556284737e-05, + "loss": 0.6348, + "step": 20097 + }, + { + "epoch": 3.2, + "learning_rate": 1.5126001356258564e-05, + "loss": 0.7035, + "step": 20098 + }, + { + "epoch": 3.2, + "learning_rate": 1.5123631261384486e-05, + "loss": 0.6092, + "step": 20099 + }, + { + "epoch": 3.2, + "learning_rate": 1.512126127168773e-05, + "loss": 0.6914, + "step": 20100 + }, + { + "epoch": 3.2, + "learning_rate": 1.5118891387193552e-05, + "loss": 0.6676, + "step": 20101 + }, + { + "epoch": 3.2, + "learning_rate": 1.511652160792718e-05, + "loss": 0.6178, + "step": 20102 + }, + { + "epoch": 3.2, + "learning_rate": 1.5114151933913848e-05, + "loss": 0.627, + "step": 20103 + }, + { + "epoch": 3.2, + "learning_rate": 1.5111782365178801e-05, + "loss": 0.6222, + "step": 20104 + }, + { + "epoch": 3.2, + "learning_rate": 1.5109412901747267e-05, + "loss": 0.6554, + "step": 20105 + }, + { + "epoch": 3.2, + "learning_rate": 1.5107043543644477e-05, + "loss": 0.6633, + "step": 20106 + }, + { + "epoch": 3.2, + "learning_rate": 1.5104674290895673e-05, + "loss": 0.5996, + "step": 20107 + }, + { + "epoch": 3.2, + "learning_rate": 1.510230514352608e-05, + "loss": 0.6627, + "step": 20108 + }, + { + "epoch": 3.2, + "learning_rate": 1.5099936101560923e-05, + "loss": 0.6946, + "step": 20109 + }, + { + "epoch": 3.2, + "learning_rate": 1.5097567165025443e-05, + "loss": 0.6736, + "step": 20110 + }, + { + "epoch": 3.2, + "learning_rate": 1.5095198333944862e-05, + "loss": 0.6061, + "step": 20111 + }, + { + "epoch": 3.2, + "learning_rate": 1.5092829608344402e-05, + "loss": 0.6405, + "step": 20112 + }, + { + "epoch": 3.2, + "learning_rate": 1.5090460988249294e-05, + "loss": 0.6783, + "step": 20113 + }, + { + "epoch": 3.2, + "learning_rate": 1.5088092473684751e-05, + "loss": 0.6378, + "step": 20114 + }, + { + "epoch": 3.2, + "learning_rate": 1.5085724064676013e-05, + "loss": 0.6012, + "step": 20115 + }, + { + "epoch": 3.2, + "learning_rate": 1.5083355761248297e-05, + "loss": 0.6287, + "step": 20116 + }, + { + "epoch": 3.2, + "learning_rate": 1.5080987563426812e-05, + "loss": 0.6051, + "step": 20117 + }, + { + "epoch": 3.21, + "learning_rate": 1.5078619471236794e-05, + "loss": 0.7161, + "step": 20118 + }, + { + "epoch": 3.21, + "learning_rate": 1.5076251484703458e-05, + "loss": 0.644, + "step": 20119 + }, + { + "epoch": 3.21, + "learning_rate": 1.5073883603852009e-05, + "loss": 0.6302, + "step": 20120 + }, + { + "epoch": 3.21, + "learning_rate": 1.5071515828707683e-05, + "loss": 0.6042, + "step": 20121 + }, + { + "epoch": 3.21, + "learning_rate": 1.5069148159295682e-05, + "loss": 0.6262, + "step": 20122 + }, + { + "epoch": 3.21, + "learning_rate": 1.5066780595641219e-05, + "loss": 0.6174, + "step": 20123 + }, + { + "epoch": 3.21, + "learning_rate": 1.5064413137769517e-05, + "loss": 0.6048, + "step": 20124 + }, + { + "epoch": 3.21, + "learning_rate": 1.5062045785705785e-05, + "loss": 0.5922, + "step": 20125 + }, + { + "epoch": 3.21, + "learning_rate": 1.5059678539475227e-05, + "loss": 0.6653, + "step": 20126 + }, + { + "epoch": 3.21, + "learning_rate": 1.5057311399103064e-05, + "loss": 0.7179, + "step": 20127 + }, + { + "epoch": 3.21, + "learning_rate": 1.5054944364614496e-05, + "loss": 0.6306, + "step": 20128 + }, + { + "epoch": 3.21, + "learning_rate": 1.5052577436034725e-05, + "loss": 0.7218, + "step": 20129 + }, + { + "epoch": 3.21, + "learning_rate": 1.5050210613388976e-05, + "loss": 0.6531, + "step": 20130 + }, + { + "epoch": 3.21, + "learning_rate": 1.5047843896702445e-05, + "loss": 0.6417, + "step": 20131 + }, + { + "epoch": 3.21, + "learning_rate": 1.5045477286000331e-05, + "loss": 0.6075, + "step": 20132 + }, + { + "epoch": 3.21, + "learning_rate": 1.504311078130784e-05, + "loss": 0.6682, + "step": 20133 + }, + { + "epoch": 3.21, + "learning_rate": 1.5040744382650168e-05, + "loss": 0.6383, + "step": 20134 + }, + { + "epoch": 3.21, + "learning_rate": 1.5038378090052526e-05, + "loss": 0.6634, + "step": 20135 + }, + { + "epoch": 3.21, + "learning_rate": 1.5036011903540109e-05, + "loss": 0.6435, + "step": 20136 + }, + { + "epoch": 3.21, + "learning_rate": 1.5033645823138115e-05, + "loss": 0.6675, + "step": 20137 + }, + { + "epoch": 3.21, + "learning_rate": 1.5031279848871743e-05, + "loss": 0.6483, + "step": 20138 + }, + { + "epoch": 3.21, + "learning_rate": 1.502891398076619e-05, + "loss": 0.6729, + "step": 20139 + }, + { + "epoch": 3.21, + "learning_rate": 1.5026548218846648e-05, + "loss": 0.6287, + "step": 20140 + }, + { + "epoch": 3.21, + "learning_rate": 1.5024182563138312e-05, + "loss": 0.6943, + "step": 20141 + }, + { + "epoch": 3.21, + "learning_rate": 1.5021817013666377e-05, + "loss": 0.5873, + "step": 20142 + }, + { + "epoch": 3.21, + "learning_rate": 1.5019451570456031e-05, + "loss": 0.6734, + "step": 20143 + }, + { + "epoch": 3.21, + "learning_rate": 1.5017086233532468e-05, + "loss": 0.621, + "step": 20144 + }, + { + "epoch": 3.21, + "learning_rate": 1.5014721002920879e-05, + "loss": 0.6295, + "step": 20145 + }, + { + "epoch": 3.21, + "learning_rate": 1.5012355878646438e-05, + "loss": 0.6496, + "step": 20146 + }, + { + "epoch": 3.21, + "learning_rate": 1.5009990860734355e-05, + "loss": 0.6359, + "step": 20147 + }, + { + "epoch": 3.21, + "learning_rate": 1.5007625949209803e-05, + "loss": 0.7013, + "step": 20148 + }, + { + "epoch": 3.21, + "learning_rate": 1.500526114409796e-05, + "loss": 0.6804, + "step": 20149 + }, + { + "epoch": 3.21, + "learning_rate": 1.5002896445424024e-05, + "loss": 0.6139, + "step": 20150 + }, + { + "epoch": 3.21, + "learning_rate": 1.5000531853213179e-05, + "loss": 0.7084, + "step": 20151 + }, + { + "epoch": 3.21, + "learning_rate": 1.499816736749059e-05, + "loss": 0.6401, + "step": 20152 + }, + { + "epoch": 3.21, + "learning_rate": 1.499580298828145e-05, + "loss": 0.6694, + "step": 20153 + }, + { + "epoch": 3.21, + "learning_rate": 1.4993438715610925e-05, + "loss": 0.721, + "step": 20154 + }, + { + "epoch": 3.21, + "learning_rate": 1.499107454950421e-05, + "loss": 0.6042, + "step": 20155 + }, + { + "epoch": 3.21, + "learning_rate": 1.4988710489986474e-05, + "loss": 0.5988, + "step": 20156 + }, + { + "epoch": 3.21, + "learning_rate": 1.498634653708289e-05, + "loss": 0.6378, + "step": 20157 + }, + { + "epoch": 3.21, + "learning_rate": 1.498398269081864e-05, + "loss": 0.6982, + "step": 20158 + }, + { + "epoch": 3.21, + "learning_rate": 1.4981618951218892e-05, + "loss": 0.6859, + "step": 20159 + }, + { + "epoch": 3.21, + "learning_rate": 1.4979255318308816e-05, + "loss": 0.6076, + "step": 20160 + }, + { + "epoch": 3.21, + "learning_rate": 1.4976891792113595e-05, + "loss": 0.662, + "step": 20161 + }, + { + "epoch": 3.21, + "learning_rate": 1.4974528372658386e-05, + "loss": 0.6583, + "step": 20162 + }, + { + "epoch": 3.21, + "learning_rate": 1.4972165059968362e-05, + "loss": 0.6103, + "step": 20163 + }, + { + "epoch": 3.21, + "learning_rate": 1.4969801854068696e-05, + "loss": 0.6487, + "step": 20164 + }, + { + "epoch": 3.21, + "learning_rate": 1.496743875498455e-05, + "loss": 0.6184, + "step": 20165 + }, + { + "epoch": 3.21, + "learning_rate": 1.4965075762741087e-05, + "loss": 0.6368, + "step": 20166 + }, + { + "epoch": 3.21, + "learning_rate": 1.4962712877363477e-05, + "loss": 0.653, + "step": 20167 + }, + { + "epoch": 3.21, + "learning_rate": 1.4960350098876885e-05, + "loss": 0.6344, + "step": 20168 + }, + { + "epoch": 3.21, + "learning_rate": 1.495798742730646e-05, + "loss": 0.6423, + "step": 20169 + }, + { + "epoch": 3.21, + "learning_rate": 1.4955624862677382e-05, + "loss": 0.6702, + "step": 20170 + }, + { + "epoch": 3.21, + "learning_rate": 1.4953262405014801e-05, + "loss": 0.6879, + "step": 20171 + }, + { + "epoch": 3.21, + "learning_rate": 1.4950900054343875e-05, + "loss": 0.5765, + "step": 20172 + }, + { + "epoch": 3.21, + "learning_rate": 1.494853781068976e-05, + "loss": 0.5854, + "step": 20173 + }, + { + "epoch": 3.21, + "learning_rate": 1.4946175674077611e-05, + "loss": 0.593, + "step": 20174 + }, + { + "epoch": 3.21, + "learning_rate": 1.4943813644532589e-05, + "loss": 0.5849, + "step": 20175 + }, + { + "epoch": 3.21, + "learning_rate": 1.4941451722079851e-05, + "loss": 0.5951, + "step": 20176 + }, + { + "epoch": 3.21, + "learning_rate": 1.4939089906744539e-05, + "loss": 0.6596, + "step": 20177 + }, + { + "epoch": 3.21, + "learning_rate": 1.4936728198551816e-05, + "loss": 0.6098, + "step": 20178 + }, + { + "epoch": 3.21, + "learning_rate": 1.4934366597526827e-05, + "loss": 0.6905, + "step": 20179 + }, + { + "epoch": 3.21, + "learning_rate": 1.4932005103694718e-05, + "loss": 0.6532, + "step": 20180 + }, + { + "epoch": 3.22, + "learning_rate": 1.4929643717080649e-05, + "loss": 0.6018, + "step": 20181 + }, + { + "epoch": 3.22, + "learning_rate": 1.492728243770976e-05, + "loss": 0.5668, + "step": 20182 + }, + { + "epoch": 3.22, + "learning_rate": 1.49249212656072e-05, + "loss": 0.5512, + "step": 20183 + }, + { + "epoch": 3.22, + "learning_rate": 1.4922560200798102e-05, + "loss": 0.6712, + "step": 20184 + }, + { + "epoch": 3.22, + "learning_rate": 1.4920199243307628e-05, + "loss": 0.6176, + "step": 20185 + }, + { + "epoch": 3.22, + "learning_rate": 1.491783839316091e-05, + "loss": 0.606, + "step": 20186 + }, + { + "epoch": 3.22, + "learning_rate": 1.4915477650383092e-05, + "loss": 0.6197, + "step": 20187 + }, + { + "epoch": 3.22, + "learning_rate": 1.4913117014999315e-05, + "loss": 0.6266, + "step": 20188 + }, + { + "epoch": 3.22, + "learning_rate": 1.4910756487034716e-05, + "loss": 0.6388, + "step": 20189 + }, + { + "epoch": 3.22, + "learning_rate": 1.4908396066514435e-05, + "loss": 0.6529, + "step": 20190 + }, + { + "epoch": 3.22, + "learning_rate": 1.4906035753463618e-05, + "loss": 0.5921, + "step": 20191 + }, + { + "epoch": 3.22, + "learning_rate": 1.4903675547907386e-05, + "loss": 0.6127, + "step": 20192 + }, + { + "epoch": 3.22, + "learning_rate": 1.4901315449870879e-05, + "loss": 0.6909, + "step": 20193 + }, + { + "epoch": 3.22, + "learning_rate": 1.4898955459379226e-05, + "loss": 0.6098, + "step": 20194 + }, + { + "epoch": 3.22, + "learning_rate": 1.4896595576457573e-05, + "loss": 0.6527, + "step": 20195 + }, + { + "epoch": 3.22, + "learning_rate": 1.489423580113104e-05, + "loss": 0.649, + "step": 20196 + }, + { + "epoch": 3.22, + "learning_rate": 1.4891876133424765e-05, + "loss": 0.6398, + "step": 20197 + }, + { + "epoch": 3.22, + "learning_rate": 1.4889516573363862e-05, + "loss": 0.5884, + "step": 20198 + }, + { + "epoch": 3.22, + "learning_rate": 1.4887157120973477e-05, + "loss": 0.6622, + "step": 20199 + }, + { + "epoch": 3.22, + "learning_rate": 1.488479777627873e-05, + "loss": 0.6038, + "step": 20200 + }, + { + "epoch": 3.22, + "learning_rate": 1.4882438539304738e-05, + "loss": 0.6243, + "step": 20201 + }, + { + "epoch": 3.22, + "learning_rate": 1.4880079410076638e-05, + "loss": 0.621, + "step": 20202 + }, + { + "epoch": 3.22, + "learning_rate": 1.4877720388619551e-05, + "loss": 0.6565, + "step": 20203 + }, + { + "epoch": 3.22, + "learning_rate": 1.4875361474958589e-05, + "loss": 0.6556, + "step": 20204 + }, + { + "epoch": 3.22, + "learning_rate": 1.4873002669118884e-05, + "loss": 0.6864, + "step": 20205 + }, + { + "epoch": 3.22, + "learning_rate": 1.4870643971125558e-05, + "loss": 0.7042, + "step": 20206 + }, + { + "epoch": 3.22, + "learning_rate": 1.4868285381003716e-05, + "loss": 0.649, + "step": 20207 + }, + { + "epoch": 3.22, + "learning_rate": 1.4865926898778487e-05, + "loss": 0.5726, + "step": 20208 + }, + { + "epoch": 3.22, + "learning_rate": 1.4863568524474985e-05, + "loss": 0.6678, + "step": 20209 + }, + { + "epoch": 3.22, + "learning_rate": 1.4861210258118318e-05, + "loss": 0.6218, + "step": 20210 + }, + { + "epoch": 3.22, + "learning_rate": 1.4858852099733616e-05, + "loss": 0.6165, + "step": 20211 + }, + { + "epoch": 3.22, + "learning_rate": 1.4856494049345984e-05, + "loss": 0.6138, + "step": 20212 + }, + { + "epoch": 3.22, + "learning_rate": 1.4854136106980526e-05, + "loss": 0.6615, + "step": 20213 + }, + { + "epoch": 3.22, + "learning_rate": 1.4851778272662361e-05, + "loss": 0.629, + "step": 20214 + }, + { + "epoch": 3.22, + "learning_rate": 1.4849420546416592e-05, + "loss": 0.6073, + "step": 20215 + }, + { + "epoch": 3.22, + "learning_rate": 1.4847062928268335e-05, + "loss": 0.6321, + "step": 20216 + }, + { + "epoch": 3.22, + "learning_rate": 1.4844705418242693e-05, + "loss": 0.6379, + "step": 20217 + }, + { + "epoch": 3.22, + "learning_rate": 1.4842348016364768e-05, + "loss": 0.6678, + "step": 20218 + }, + { + "epoch": 3.22, + "learning_rate": 1.4839990722659675e-05, + "loss": 0.5913, + "step": 20219 + }, + { + "epoch": 3.22, + "learning_rate": 1.4837633537152512e-05, + "loss": 0.6743, + "step": 20220 + }, + { + "epoch": 3.22, + "learning_rate": 1.4835276459868381e-05, + "loss": 0.6173, + "step": 20221 + }, + { + "epoch": 3.22, + "learning_rate": 1.4832919490832386e-05, + "loss": 0.6636, + "step": 20222 + }, + { + "epoch": 3.22, + "learning_rate": 1.4830562630069625e-05, + "loss": 0.6706, + "step": 20223 + }, + { + "epoch": 3.22, + "learning_rate": 1.4828205877605194e-05, + "loss": 0.6019, + "step": 20224 + }, + { + "epoch": 3.22, + "learning_rate": 1.48258492334642e-05, + "loss": 0.6991, + "step": 20225 + }, + { + "epoch": 3.22, + "learning_rate": 1.4823492697671737e-05, + "loss": 0.6605, + "step": 20226 + }, + { + "epoch": 3.22, + "learning_rate": 1.4821136270252888e-05, + "loss": 0.6572, + "step": 20227 + }, + { + "epoch": 3.22, + "learning_rate": 1.4818779951232769e-05, + "loss": 0.6877, + "step": 20228 + }, + { + "epoch": 3.22, + "learning_rate": 1.481642374063646e-05, + "loss": 0.6254, + "step": 20229 + }, + { + "epoch": 3.22, + "learning_rate": 1.4814067638489049e-05, + "loss": 0.6777, + "step": 20230 + }, + { + "epoch": 3.22, + "learning_rate": 1.481171164481564e-05, + "loss": 0.5809, + "step": 20231 + }, + { + "epoch": 3.22, + "learning_rate": 1.4809355759641325e-05, + "loss": 0.6344, + "step": 20232 + }, + { + "epoch": 3.22, + "learning_rate": 1.4806999982991174e-05, + "loss": 0.5508, + "step": 20233 + }, + { + "epoch": 3.22, + "learning_rate": 1.4804644314890288e-05, + "loss": 0.7513, + "step": 20234 + }, + { + "epoch": 3.22, + "learning_rate": 1.4802288755363741e-05, + "loss": 0.5843, + "step": 20235 + }, + { + "epoch": 3.22, + "learning_rate": 1.4799933304436635e-05, + "loss": 0.5806, + "step": 20236 + }, + { + "epoch": 3.22, + "learning_rate": 1.4797577962134051e-05, + "loss": 0.5688, + "step": 20237 + }, + { + "epoch": 3.22, + "learning_rate": 1.4795222728481056e-05, + "loss": 0.6224, + "step": 20238 + }, + { + "epoch": 3.22, + "learning_rate": 1.4792867603502752e-05, + "loss": 0.6592, + "step": 20239 + }, + { + "epoch": 3.22, + "learning_rate": 1.4790512587224211e-05, + "loss": 0.701, + "step": 20240 + }, + { + "epoch": 3.22, + "learning_rate": 1.4788157679670508e-05, + "loss": 0.6631, + "step": 20241 + }, + { + "epoch": 3.22, + "learning_rate": 1.478580288086673e-05, + "loss": 0.5549, + "step": 20242 + }, + { + "epoch": 3.22, + "learning_rate": 1.478344819083795e-05, + "loss": 0.7059, + "step": 20243 + }, + { + "epoch": 3.23, + "learning_rate": 1.478109360960924e-05, + "loss": 0.5862, + "step": 20244 + }, + { + "epoch": 3.23, + "learning_rate": 1.4778739137205683e-05, + "loss": 0.5996, + "step": 20245 + }, + { + "epoch": 3.23, + "learning_rate": 1.477638477365235e-05, + "loss": 0.6285, + "step": 20246 + }, + { + "epoch": 3.23, + "learning_rate": 1.4774030518974308e-05, + "loss": 0.6619, + "step": 20247 + }, + { + "epoch": 3.23, + "learning_rate": 1.4771676373196635e-05, + "loss": 0.5911, + "step": 20248 + }, + { + "epoch": 3.23, + "learning_rate": 1.47693223363444e-05, + "loss": 0.6462, + "step": 20249 + }, + { + "epoch": 3.23, + "learning_rate": 1.4766968408442667e-05, + "loss": 0.6082, + "step": 20250 + }, + { + "epoch": 3.23, + "learning_rate": 1.476461458951651e-05, + "loss": 0.6632, + "step": 20251 + }, + { + "epoch": 3.23, + "learning_rate": 1.4762260879591e-05, + "loss": 0.5957, + "step": 20252 + }, + { + "epoch": 3.23, + "learning_rate": 1.4759907278691194e-05, + "loss": 0.6176, + "step": 20253 + }, + { + "epoch": 3.23, + "learning_rate": 1.4757553786842155e-05, + "loss": 0.5833, + "step": 20254 + }, + { + "epoch": 3.23, + "learning_rate": 1.4755200404068942e-05, + "loss": 0.5819, + "step": 20255 + }, + { + "epoch": 3.23, + "learning_rate": 1.4752847130396635e-05, + "loss": 0.6287, + "step": 20256 + }, + { + "epoch": 3.23, + "learning_rate": 1.4750493965850285e-05, + "loss": 0.7794, + "step": 20257 + }, + { + "epoch": 3.23, + "learning_rate": 1.4748140910454944e-05, + "loss": 0.6148, + "step": 20258 + }, + { + "epoch": 3.23, + "learning_rate": 1.4745787964235686e-05, + "loss": 0.6027, + "step": 20259 + }, + { + "epoch": 3.23, + "learning_rate": 1.4743435127217558e-05, + "loss": 0.6324, + "step": 20260 + }, + { + "epoch": 3.23, + "learning_rate": 1.4741082399425616e-05, + "loss": 0.6663, + "step": 20261 + }, + { + "epoch": 3.23, + "learning_rate": 1.4738729780884918e-05, + "loss": 0.6117, + "step": 20262 + }, + { + "epoch": 3.23, + "learning_rate": 1.4736377271620524e-05, + "loss": 0.6585, + "step": 20263 + }, + { + "epoch": 3.23, + "learning_rate": 1.4734024871657476e-05, + "loss": 0.5852, + "step": 20264 + }, + { + "epoch": 3.23, + "learning_rate": 1.4731672581020834e-05, + "loss": 0.7187, + "step": 20265 + }, + { + "epoch": 3.23, + "learning_rate": 1.4729320399735644e-05, + "loss": 0.65, + "step": 20266 + }, + { + "epoch": 3.23, + "learning_rate": 1.472696832782695e-05, + "loss": 0.6066, + "step": 20267 + }, + { + "epoch": 3.23, + "learning_rate": 1.4724616365319815e-05, + "loss": 0.5803, + "step": 20268 + }, + { + "epoch": 3.23, + "learning_rate": 1.4722264512239276e-05, + "loss": 0.6851, + "step": 20269 + }, + { + "epoch": 3.23, + "learning_rate": 1.4719912768610372e-05, + "loss": 0.7006, + "step": 20270 + }, + { + "epoch": 3.23, + "learning_rate": 1.4717561134458165e-05, + "loss": 0.6633, + "step": 20271 + }, + { + "epoch": 3.23, + "learning_rate": 1.4715209609807695e-05, + "loss": 0.6431, + "step": 20272 + }, + { + "epoch": 3.23, + "learning_rate": 1.471285819468399e-05, + "loss": 0.6173, + "step": 20273 + }, + { + "epoch": 3.23, + "learning_rate": 1.4710506889112103e-05, + "loss": 0.6458, + "step": 20274 + }, + { + "epoch": 3.23, + "learning_rate": 1.4708155693117062e-05, + "loss": 0.6081, + "step": 20275 + }, + { + "epoch": 3.23, + "learning_rate": 1.4705804606723922e-05, + "loss": 0.6908, + "step": 20276 + }, + { + "epoch": 3.23, + "learning_rate": 1.4703453629957714e-05, + "loss": 0.6781, + "step": 20277 + }, + { + "epoch": 3.23, + "learning_rate": 1.4701102762843465e-05, + "loss": 0.5997, + "step": 20278 + }, + { + "epoch": 3.23, + "learning_rate": 1.4698752005406228e-05, + "loss": 0.6243, + "step": 20279 + }, + { + "epoch": 3.23, + "learning_rate": 1.4696401357671024e-05, + "loss": 0.761, + "step": 20280 + }, + { + "epoch": 3.23, + "learning_rate": 1.469405081966289e-05, + "loss": 0.6278, + "step": 20281 + }, + { + "epoch": 3.23, + "learning_rate": 1.469170039140686e-05, + "loss": 0.6364, + "step": 20282 + }, + { + "epoch": 3.23, + "learning_rate": 1.4689350072927963e-05, + "loss": 0.6936, + "step": 20283 + }, + { + "epoch": 3.23, + "learning_rate": 1.4686999864251222e-05, + "loss": 0.6385, + "step": 20284 + }, + { + "epoch": 3.23, + "learning_rate": 1.4684649765401679e-05, + "loss": 0.6625, + "step": 20285 + }, + { + "epoch": 3.23, + "learning_rate": 1.468229977640435e-05, + "loss": 0.6319, + "step": 20286 + }, + { + "epoch": 3.23, + "learning_rate": 1.4679949897284262e-05, + "loss": 0.6237, + "step": 20287 + }, + { + "epoch": 3.23, + "learning_rate": 1.4677600128066447e-05, + "loss": 0.623, + "step": 20288 + }, + { + "epoch": 3.23, + "learning_rate": 1.4675250468775919e-05, + "loss": 0.6423, + "step": 20289 + }, + { + "epoch": 3.23, + "learning_rate": 1.467290091943771e-05, + "loss": 0.6742, + "step": 20290 + }, + { + "epoch": 3.23, + "learning_rate": 1.4670551480076833e-05, + "loss": 0.6322, + "step": 20291 + }, + { + "epoch": 3.23, + "learning_rate": 1.4668202150718319e-05, + "loss": 0.5852, + "step": 20292 + }, + { + "epoch": 3.23, + "learning_rate": 1.4665852931387175e-05, + "loss": 0.6497, + "step": 20293 + }, + { + "epoch": 3.23, + "learning_rate": 1.4663503822108421e-05, + "loss": 0.6534, + "step": 20294 + }, + { + "epoch": 3.23, + "learning_rate": 1.4661154822907074e-05, + "loss": 0.6823, + "step": 20295 + }, + { + "epoch": 3.23, + "learning_rate": 1.4658805933808154e-05, + "loss": 0.6366, + "step": 20296 + }, + { + "epoch": 3.23, + "learning_rate": 1.4656457154836672e-05, + "loss": 0.7106, + "step": 20297 + }, + { + "epoch": 3.23, + "learning_rate": 1.465410848601764e-05, + "loss": 0.6833, + "step": 20298 + }, + { + "epoch": 3.23, + "learning_rate": 1.4651759927376074e-05, + "loss": 0.6289, + "step": 20299 + }, + { + "epoch": 3.23, + "learning_rate": 1.4649411478936981e-05, + "loss": 0.6489, + "step": 20300 + }, + { + "epoch": 3.23, + "learning_rate": 1.4647063140725376e-05, + "loss": 0.6099, + "step": 20301 + }, + { + "epoch": 3.23, + "learning_rate": 1.4644714912766255e-05, + "loss": 0.6549, + "step": 20302 + }, + { + "epoch": 3.23, + "learning_rate": 1.464236679508464e-05, + "loss": 0.5755, + "step": 20303 + }, + { + "epoch": 3.23, + "learning_rate": 1.4640018787705528e-05, + "loss": 0.6779, + "step": 20304 + }, + { + "epoch": 3.23, + "learning_rate": 1.4637670890653921e-05, + "loss": 0.6543, + "step": 20305 + }, + { + "epoch": 3.23, + "learning_rate": 1.4635323103954834e-05, + "loss": 0.6287, + "step": 20306 + }, + { + "epoch": 3.24, + "learning_rate": 1.4632975427633266e-05, + "loss": 0.6986, + "step": 20307 + }, + { + "epoch": 3.24, + "learning_rate": 1.4630627861714207e-05, + "loss": 0.645, + "step": 20308 + }, + { + "epoch": 3.24, + "learning_rate": 1.4628280406222672e-05, + "loss": 0.7266, + "step": 20309 + }, + { + "epoch": 3.24, + "learning_rate": 1.4625933061183655e-05, + "loss": 0.6555, + "step": 20310 + }, + { + "epoch": 3.24, + "learning_rate": 1.462358582662215e-05, + "loss": 0.7161, + "step": 20311 + }, + { + "epoch": 3.24, + "learning_rate": 1.4621238702563156e-05, + "loss": 0.658, + "step": 20312 + }, + { + "epoch": 3.24, + "learning_rate": 1.4618891689031677e-05, + "loss": 0.6101, + "step": 20313 + }, + { + "epoch": 3.24, + "learning_rate": 1.4616544786052694e-05, + "loss": 0.6328, + "step": 20314 + }, + { + "epoch": 3.24, + "learning_rate": 1.4614197993651201e-05, + "loss": 0.5848, + "step": 20315 + }, + { + "epoch": 3.24, + "learning_rate": 1.4611851311852192e-05, + "loss": 0.6432, + "step": 20316 + }, + { + "epoch": 3.24, + "learning_rate": 1.4609504740680665e-05, + "loss": 0.5887, + "step": 20317 + }, + { + "epoch": 3.24, + "learning_rate": 1.4607158280161604e-05, + "loss": 0.6153, + "step": 20318 + }, + { + "epoch": 3.24, + "learning_rate": 1.4604811930319994e-05, + "loss": 0.6453, + "step": 20319 + }, + { + "epoch": 3.24, + "learning_rate": 1.4602465691180829e-05, + "loss": 0.5603, + "step": 20320 + }, + { + "epoch": 3.24, + "learning_rate": 1.4600119562769093e-05, + "loss": 0.6633, + "step": 20321 + }, + { + "epoch": 3.24, + "learning_rate": 1.4597773545109763e-05, + "loss": 0.6278, + "step": 20322 + }, + { + "epoch": 3.24, + "learning_rate": 1.4595427638227838e-05, + "loss": 0.6765, + "step": 20323 + }, + { + "epoch": 3.24, + "learning_rate": 1.4593081842148287e-05, + "loss": 0.5787, + "step": 20324 + }, + { + "epoch": 3.24, + "learning_rate": 1.4590736156896102e-05, + "loss": 0.5828, + "step": 20325 + }, + { + "epoch": 3.24, + "learning_rate": 1.4588390582496242e-05, + "loss": 0.6495, + "step": 20326 + }, + { + "epoch": 3.24, + "learning_rate": 1.4586045118973713e-05, + "loss": 0.6783, + "step": 20327 + }, + { + "epoch": 3.24, + "learning_rate": 1.4583699766353482e-05, + "loss": 0.6385, + "step": 20328 + }, + { + "epoch": 3.24, + "learning_rate": 1.4581354524660524e-05, + "loss": 0.5775, + "step": 20329 + }, + { + "epoch": 3.24, + "learning_rate": 1.4579009393919813e-05, + "loss": 0.6444, + "step": 20330 + }, + { + "epoch": 3.24, + "learning_rate": 1.4576664374156329e-05, + "loss": 0.6619, + "step": 20331 + }, + { + "epoch": 3.24, + "learning_rate": 1.4574319465395025e-05, + "loss": 0.7056, + "step": 20332 + }, + { + "epoch": 3.24, + "learning_rate": 1.4571974667660914e-05, + "loss": 0.6248, + "step": 20333 + }, + { + "epoch": 3.24, + "learning_rate": 1.4569629980978932e-05, + "loss": 0.6452, + "step": 20334 + }, + { + "epoch": 3.24, + "learning_rate": 1.4567285405374059e-05, + "loss": 0.6391, + "step": 20335 + }, + { + "epoch": 3.24, + "learning_rate": 1.4564940940871261e-05, + "loss": 0.7295, + "step": 20336 + }, + { + "epoch": 3.24, + "learning_rate": 1.4562596587495497e-05, + "loss": 0.7017, + "step": 20337 + }, + { + "epoch": 3.24, + "learning_rate": 1.4560252345271751e-05, + "loss": 0.6061, + "step": 20338 + }, + { + "epoch": 3.24, + "learning_rate": 1.455790821422498e-05, + "loss": 0.6755, + "step": 20339 + }, + { + "epoch": 3.24, + "learning_rate": 1.455556419438015e-05, + "loss": 0.6681, + "step": 20340 + }, + { + "epoch": 3.24, + "learning_rate": 1.4553220285762217e-05, + "loss": 0.5976, + "step": 20341 + }, + { + "epoch": 3.24, + "learning_rate": 1.4550876488396147e-05, + "loss": 0.5906, + "step": 20342 + }, + { + "epoch": 3.24, + "learning_rate": 1.454853280230689e-05, + "loss": 0.7605, + "step": 20343 + }, + { + "epoch": 3.24, + "learning_rate": 1.4546189227519419e-05, + "loss": 0.6432, + "step": 20344 + }, + { + "epoch": 3.24, + "learning_rate": 1.4543845764058688e-05, + "loss": 0.6529, + "step": 20345 + }, + { + "epoch": 3.24, + "learning_rate": 1.4541502411949653e-05, + "loss": 0.6182, + "step": 20346 + }, + { + "epoch": 3.24, + "learning_rate": 1.4539159171217267e-05, + "loss": 0.6205, + "step": 20347 + }, + { + "epoch": 3.24, + "learning_rate": 1.4536816041886486e-05, + "loss": 0.6236, + "step": 20348 + }, + { + "epoch": 3.24, + "learning_rate": 1.453447302398226e-05, + "loss": 0.6086, + "step": 20349 + }, + { + "epoch": 3.24, + "learning_rate": 1.4532130117529536e-05, + "loss": 0.6515, + "step": 20350 + }, + { + "epoch": 3.24, + "learning_rate": 1.4529787322553276e-05, + "loss": 0.6456, + "step": 20351 + }, + { + "epoch": 3.24, + "learning_rate": 1.4527444639078427e-05, + "loss": 0.6176, + "step": 20352 + }, + { + "epoch": 3.24, + "learning_rate": 1.4525102067129944e-05, + "loss": 0.602, + "step": 20353 + }, + { + "epoch": 3.24, + "learning_rate": 1.4522759606732745e-05, + "loss": 0.6094, + "step": 20354 + }, + { + "epoch": 3.24, + "learning_rate": 1.4520417257911806e-05, + "loss": 0.6159, + "step": 20355 + }, + { + "epoch": 3.24, + "learning_rate": 1.451807502069206e-05, + "loss": 0.6239, + "step": 20356 + }, + { + "epoch": 3.24, + "learning_rate": 1.451573289509845e-05, + "loss": 0.5881, + "step": 20357 + }, + { + "epoch": 3.24, + "learning_rate": 1.451339088115592e-05, + "loss": 0.6183, + "step": 20358 + }, + { + "epoch": 3.24, + "learning_rate": 1.4511048978889413e-05, + "loss": 0.6234, + "step": 20359 + }, + { + "epoch": 3.24, + "learning_rate": 1.4508707188323855e-05, + "loss": 0.7202, + "step": 20360 + }, + { + "epoch": 3.24, + "learning_rate": 1.4506365509484204e-05, + "loss": 0.682, + "step": 20361 + }, + { + "epoch": 3.24, + "learning_rate": 1.4504023942395393e-05, + "loss": 0.6404, + "step": 20362 + }, + { + "epoch": 3.24, + "learning_rate": 1.4501682487082352e-05, + "loss": 0.5926, + "step": 20363 + }, + { + "epoch": 3.24, + "learning_rate": 1.4499341143570019e-05, + "loss": 0.6235, + "step": 20364 + }, + { + "epoch": 3.24, + "learning_rate": 1.449699991188333e-05, + "loss": 0.6517, + "step": 20365 + }, + { + "epoch": 3.24, + "learning_rate": 1.4494658792047214e-05, + "loss": 0.6698, + "step": 20366 + }, + { + "epoch": 3.24, + "learning_rate": 1.4492317784086593e-05, + "loss": 0.6914, + "step": 20367 + }, + { + "epoch": 3.24, + "learning_rate": 1.448997688802642e-05, + "loss": 0.6338, + "step": 20368 + }, + { + "epoch": 3.25, + "learning_rate": 1.4487636103891612e-05, + "loss": 0.6589, + "step": 20369 + }, + { + "epoch": 3.25, + "learning_rate": 1.4485295431707097e-05, + "loss": 0.7195, + "step": 20370 + }, + { + "epoch": 3.25, + "learning_rate": 1.44829548714978e-05, + "loss": 0.5946, + "step": 20371 + }, + { + "epoch": 3.25, + "learning_rate": 1.4480614423288652e-05, + "loss": 0.6739, + "step": 20372 + }, + { + "epoch": 3.25, + "learning_rate": 1.4478274087104571e-05, + "loss": 0.6082, + "step": 20373 + }, + { + "epoch": 3.25, + "learning_rate": 1.4475933862970484e-05, + "loss": 0.702, + "step": 20374 + }, + { + "epoch": 3.25, + "learning_rate": 1.4473593750911314e-05, + "loss": 0.6095, + "step": 20375 + }, + { + "epoch": 3.25, + "learning_rate": 1.4471253750951977e-05, + "loss": 0.6051, + "step": 20376 + }, + { + "epoch": 3.25, + "learning_rate": 1.4468913863117395e-05, + "loss": 0.6627, + "step": 20377 + }, + { + "epoch": 3.25, + "learning_rate": 1.4466574087432477e-05, + "loss": 0.6817, + "step": 20378 + }, + { + "epoch": 3.25, + "learning_rate": 1.4464234423922163e-05, + "loss": 0.6349, + "step": 20379 + }, + { + "epoch": 3.25, + "learning_rate": 1.4461894872611353e-05, + "loss": 0.6451, + "step": 20380 + }, + { + "epoch": 3.25, + "learning_rate": 1.4459555433524969e-05, + "loss": 0.6344, + "step": 20381 + }, + { + "epoch": 3.25, + "learning_rate": 1.4457216106687914e-05, + "loss": 0.6443, + "step": 20382 + }, + { + "epoch": 3.25, + "learning_rate": 1.4454876892125113e-05, + "loss": 0.6695, + "step": 20383 + }, + { + "epoch": 3.25, + "learning_rate": 1.445253778986146e-05, + "loss": 0.6219, + "step": 20384 + }, + { + "epoch": 3.25, + "learning_rate": 1.4450198799921882e-05, + "loss": 0.5949, + "step": 20385 + }, + { + "epoch": 3.25, + "learning_rate": 1.4447859922331287e-05, + "loss": 0.7467, + "step": 20386 + }, + { + "epoch": 3.25, + "learning_rate": 1.4445521157114573e-05, + "loss": 0.592, + "step": 20387 + }, + { + "epoch": 3.25, + "learning_rate": 1.444318250429665e-05, + "loss": 0.6545, + "step": 20388 + }, + { + "epoch": 3.25, + "learning_rate": 1.4440843963902428e-05, + "loss": 0.5924, + "step": 20389 + }, + { + "epoch": 3.25, + "learning_rate": 1.4438505535956795e-05, + "loss": 0.5929, + "step": 20390 + }, + { + "epoch": 3.25, + "learning_rate": 1.4436167220484676e-05, + "loss": 0.6882, + "step": 20391 + }, + { + "epoch": 3.25, + "learning_rate": 1.4433829017510963e-05, + "loss": 0.6076, + "step": 20392 + }, + { + "epoch": 3.25, + "learning_rate": 1.4431490927060568e-05, + "loss": 0.7047, + "step": 20393 + }, + { + "epoch": 3.25, + "learning_rate": 1.4429152949158367e-05, + "loss": 0.6553, + "step": 20394 + }, + { + "epoch": 3.25, + "learning_rate": 1.442681508382926e-05, + "loss": 0.6719, + "step": 20395 + }, + { + "epoch": 3.25, + "learning_rate": 1.4424477331098163e-05, + "loss": 0.6511, + "step": 20396 + }, + { + "epoch": 3.25, + "learning_rate": 1.4422139690989961e-05, + "loss": 0.6506, + "step": 20397 + }, + { + "epoch": 3.25, + "learning_rate": 1.441980216352955e-05, + "loss": 0.5658, + "step": 20398 + }, + { + "epoch": 3.25, + "learning_rate": 1.4417464748741821e-05, + "loss": 0.5946, + "step": 20399 + }, + { + "epoch": 3.25, + "learning_rate": 1.4415127446651667e-05, + "loss": 0.6548, + "step": 20400 + }, + { + "epoch": 3.25, + "learning_rate": 1.441279025728397e-05, + "loss": 0.6382, + "step": 20401 + }, + { + "epoch": 3.25, + "learning_rate": 1.441045318066364e-05, + "loss": 0.662, + "step": 20402 + }, + { + "epoch": 3.25, + "learning_rate": 1.4408116216815556e-05, + "loss": 0.6364, + "step": 20403 + }, + { + "epoch": 3.25, + "learning_rate": 1.4405779365764605e-05, + "loss": 0.6309, + "step": 20404 + }, + { + "epoch": 3.25, + "learning_rate": 1.4403442627535668e-05, + "loss": 0.6178, + "step": 20405 + }, + { + "epoch": 3.25, + "learning_rate": 1.4401106002153635e-05, + "loss": 0.6509, + "step": 20406 + }, + { + "epoch": 3.25, + "learning_rate": 1.4398769489643379e-05, + "loss": 0.6416, + "step": 20407 + }, + { + "epoch": 3.25, + "learning_rate": 1.43964330900298e-05, + "loss": 0.6791, + "step": 20408 + }, + { + "epoch": 3.25, + "learning_rate": 1.4394096803337776e-05, + "loss": 0.6528, + "step": 20409 + }, + { + "epoch": 3.25, + "learning_rate": 1.4391760629592177e-05, + "loss": 0.6351, + "step": 20410 + }, + { + "epoch": 3.25, + "learning_rate": 1.4389424568817888e-05, + "loss": 0.5866, + "step": 20411 + }, + { + "epoch": 3.25, + "learning_rate": 1.4387088621039785e-05, + "loss": 0.6024, + "step": 20412 + }, + { + "epoch": 3.25, + "learning_rate": 1.4384752786282735e-05, + "loss": 0.6027, + "step": 20413 + }, + { + "epoch": 3.25, + "learning_rate": 1.4382417064571645e-05, + "loss": 0.6231, + "step": 20414 + }, + { + "epoch": 3.25, + "learning_rate": 1.4380081455931355e-05, + "loss": 0.7382, + "step": 20415 + }, + { + "epoch": 3.25, + "learning_rate": 1.4377745960386751e-05, + "loss": 0.7074, + "step": 20416 + }, + { + "epoch": 3.25, + "learning_rate": 1.4375410577962706e-05, + "loss": 0.5646, + "step": 20417 + }, + { + "epoch": 3.25, + "learning_rate": 1.4373075308684076e-05, + "loss": 0.5983, + "step": 20418 + }, + { + "epoch": 3.25, + "learning_rate": 1.437074015257575e-05, + "loss": 0.6366, + "step": 20419 + }, + { + "epoch": 3.25, + "learning_rate": 1.4368405109662589e-05, + "loss": 0.588, + "step": 20420 + }, + { + "epoch": 3.25, + "learning_rate": 1.436607017996946e-05, + "loss": 0.6165, + "step": 20421 + }, + { + "epoch": 3.25, + "learning_rate": 1.4363735363521227e-05, + "loss": 0.6131, + "step": 20422 + }, + { + "epoch": 3.25, + "learning_rate": 1.4361400660342755e-05, + "loss": 0.675, + "step": 20423 + }, + { + "epoch": 3.25, + "learning_rate": 1.4359066070458899e-05, + "loss": 0.7643, + "step": 20424 + }, + { + "epoch": 3.25, + "learning_rate": 1.4356731593894538e-05, + "loss": 0.5828, + "step": 20425 + }, + { + "epoch": 3.25, + "learning_rate": 1.4354397230674522e-05, + "loss": 0.65, + "step": 20426 + }, + { + "epoch": 3.25, + "learning_rate": 1.4352062980823716e-05, + "loss": 0.6213, + "step": 20427 + }, + { + "epoch": 3.25, + "learning_rate": 1.4349728844366971e-05, + "loss": 0.6641, + "step": 20428 + }, + { + "epoch": 3.25, + "learning_rate": 1.4347394821329153e-05, + "loss": 0.626, + "step": 20429 + }, + { + "epoch": 3.25, + "learning_rate": 1.4345060911735097e-05, + "loss": 0.6009, + "step": 20430 + }, + { + "epoch": 3.25, + "learning_rate": 1.4342727115609683e-05, + "loss": 0.5775, + "step": 20431 + }, + { + "epoch": 3.26, + "learning_rate": 1.434039343297776e-05, + "loss": 0.6361, + "step": 20432 + }, + { + "epoch": 3.26, + "learning_rate": 1.4338059863864173e-05, + "loss": 0.6949, + "step": 20433 + }, + { + "epoch": 3.26, + "learning_rate": 1.4335726408293787e-05, + "loss": 0.6223, + "step": 20434 + }, + { + "epoch": 3.26, + "learning_rate": 1.4333393066291417e-05, + "loss": 0.6117, + "step": 20435 + }, + { + "epoch": 3.26, + "learning_rate": 1.4331059837881949e-05, + "loss": 0.569, + "step": 20436 + }, + { + "epoch": 3.26, + "learning_rate": 1.4328726723090213e-05, + "loss": 0.6685, + "step": 20437 + }, + { + "epoch": 3.26, + "learning_rate": 1.4326393721941061e-05, + "loss": 0.6292, + "step": 20438 + }, + { + "epoch": 3.26, + "learning_rate": 1.4324060834459336e-05, + "loss": 0.6157, + "step": 20439 + }, + { + "epoch": 3.26, + "learning_rate": 1.4321728060669879e-05, + "loss": 0.6903, + "step": 20440 + }, + { + "epoch": 3.26, + "learning_rate": 1.4319395400597524e-05, + "loss": 0.6591, + "step": 20441 + }, + { + "epoch": 3.26, + "learning_rate": 1.4317062854267139e-05, + "loss": 0.6628, + "step": 20442 + }, + { + "epoch": 3.26, + "learning_rate": 1.4314730421703546e-05, + "loss": 0.5956, + "step": 20443 + }, + { + "epoch": 3.26, + "learning_rate": 1.4312398102931584e-05, + "loss": 0.6108, + "step": 20444 + }, + { + "epoch": 3.26, + "learning_rate": 1.4310065897976099e-05, + "loss": 0.666, + "step": 20445 + }, + { + "epoch": 3.26, + "learning_rate": 1.430773380686192e-05, + "loss": 0.6322, + "step": 20446 + }, + { + "epoch": 3.26, + "learning_rate": 1.4305401829613874e-05, + "loss": 0.6204, + "step": 20447 + }, + { + "epoch": 3.26, + "learning_rate": 1.4303069966256816e-05, + "loss": 0.5883, + "step": 20448 + }, + { + "epoch": 3.26, + "learning_rate": 1.4300738216815568e-05, + "loss": 0.6539, + "step": 20449 + }, + { + "epoch": 3.26, + "learning_rate": 1.4298406581314963e-05, + "loss": 0.7239, + "step": 20450 + }, + { + "epoch": 3.26, + "learning_rate": 1.4296075059779828e-05, + "loss": 0.6133, + "step": 20451 + }, + { + "epoch": 3.26, + "learning_rate": 1.4293743652235001e-05, + "loss": 0.6108, + "step": 20452 + }, + { + "epoch": 3.26, + "learning_rate": 1.4291412358705292e-05, + "loss": 0.6299, + "step": 20453 + }, + { + "epoch": 3.26, + "learning_rate": 1.4289081179215558e-05, + "loss": 0.576, + "step": 20454 + }, + { + "epoch": 3.26, + "learning_rate": 1.4286750113790598e-05, + "loss": 0.5667, + "step": 20455 + }, + { + "epoch": 3.26, + "learning_rate": 1.4284419162455243e-05, + "loss": 0.6567, + "step": 20456 + }, + { + "epoch": 3.26, + "learning_rate": 1.4282088325234321e-05, + "loss": 0.5711, + "step": 20457 + }, + { + "epoch": 3.26, + "learning_rate": 1.4279757602152638e-05, + "loss": 0.6199, + "step": 20458 + }, + { + "epoch": 3.26, + "learning_rate": 1.4277426993235043e-05, + "loss": 0.6616, + "step": 20459 + }, + { + "epoch": 3.26, + "learning_rate": 1.4275096498506337e-05, + "loss": 0.6135, + "step": 20460 + }, + { + "epoch": 3.26, + "learning_rate": 1.4272766117991343e-05, + "loss": 0.6636, + "step": 20461 + }, + { + "epoch": 3.26, + "learning_rate": 1.427043585171488e-05, + "loss": 0.7215, + "step": 20462 + }, + { + "epoch": 3.26, + "learning_rate": 1.4268105699701756e-05, + "loss": 0.608, + "step": 20463 + }, + { + "epoch": 3.26, + "learning_rate": 1.4265775661976782e-05, + "loss": 0.6292, + "step": 20464 + }, + { + "epoch": 3.26, + "learning_rate": 1.4263445738564796e-05, + "loss": 0.6578, + "step": 20465 + }, + { + "epoch": 3.26, + "learning_rate": 1.4261115929490587e-05, + "loss": 0.6617, + "step": 20466 + }, + { + "epoch": 3.26, + "learning_rate": 1.425878623477898e-05, + "loss": 0.6319, + "step": 20467 + }, + { + "epoch": 3.26, + "learning_rate": 1.4256456654454775e-05, + "loss": 0.6305, + "step": 20468 + }, + { + "epoch": 3.26, + "learning_rate": 1.4254127188542782e-05, + "loss": 0.6545, + "step": 20469 + }, + { + "epoch": 3.26, + "learning_rate": 1.4251797837067814e-05, + "loss": 0.638, + "step": 20470 + }, + { + "epoch": 3.26, + "learning_rate": 1.4249468600054661e-05, + "loss": 0.6327, + "step": 20471 + }, + { + "epoch": 3.26, + "learning_rate": 1.4247139477528154e-05, + "loss": 0.677, + "step": 20472 + }, + { + "epoch": 3.26, + "learning_rate": 1.4244810469513082e-05, + "loss": 0.6494, + "step": 20473 + }, + { + "epoch": 3.26, + "learning_rate": 1.4242481576034255e-05, + "loss": 0.6101, + "step": 20474 + }, + { + "epoch": 3.26, + "learning_rate": 1.4240152797116452e-05, + "loss": 0.6246, + "step": 20475 + }, + { + "epoch": 3.26, + "learning_rate": 1.42378241327845e-05, + "loss": 0.6682, + "step": 20476 + }, + { + "epoch": 3.26, + "learning_rate": 1.4235495583063185e-05, + "loss": 0.6921, + "step": 20477 + }, + { + "epoch": 3.26, + "learning_rate": 1.4233167147977309e-05, + "loss": 0.672, + "step": 20478 + }, + { + "epoch": 3.26, + "learning_rate": 1.4230838827551662e-05, + "loss": 0.6454, + "step": 20479 + }, + { + "epoch": 3.26, + "learning_rate": 1.4228510621811047e-05, + "loss": 0.5864, + "step": 20480 + }, + { + "epoch": 3.26, + "learning_rate": 1.4226182530780256e-05, + "loss": 0.6337, + "step": 20481 + }, + { + "epoch": 3.26, + "learning_rate": 1.4223854554484068e-05, + "loss": 0.6663, + "step": 20482 + }, + { + "epoch": 3.26, + "learning_rate": 1.4221526692947295e-05, + "loss": 0.6293, + "step": 20483 + }, + { + "epoch": 3.26, + "learning_rate": 1.421919894619472e-05, + "loss": 0.6211, + "step": 20484 + }, + { + "epoch": 3.26, + "learning_rate": 1.421687131425113e-05, + "loss": 0.6175, + "step": 20485 + }, + { + "epoch": 3.26, + "learning_rate": 1.4214543797141316e-05, + "loss": 0.5946, + "step": 20486 + }, + { + "epoch": 3.26, + "learning_rate": 1.421221639489006e-05, + "loss": 0.5972, + "step": 20487 + }, + { + "epoch": 3.26, + "learning_rate": 1.4209889107522143e-05, + "loss": 0.6558, + "step": 20488 + }, + { + "epoch": 3.26, + "learning_rate": 1.4207561935062363e-05, + "loss": 0.7377, + "step": 20489 + }, + { + "epoch": 3.26, + "learning_rate": 1.42052348775355e-05, + "loss": 0.6124, + "step": 20490 + }, + { + "epoch": 3.26, + "learning_rate": 1.4202907934966331e-05, + "loss": 0.5854, + "step": 20491 + }, + { + "epoch": 3.26, + "learning_rate": 1.4200581107379634e-05, + "loss": 0.6274, + "step": 20492 + }, + { + "epoch": 3.26, + "learning_rate": 1.4198254394800195e-05, + "loss": 0.6324, + "step": 20493 + }, + { + "epoch": 3.26, + "learning_rate": 1.4195927797252787e-05, + "loss": 0.648, + "step": 20494 + }, + { + "epoch": 3.27, + "learning_rate": 1.419360131476219e-05, + "loss": 0.6737, + "step": 20495 + }, + { + "epoch": 3.27, + "learning_rate": 1.4191274947353178e-05, + "loss": 0.6451, + "step": 20496 + }, + { + "epoch": 3.27, + "learning_rate": 1.4188948695050524e-05, + "loss": 0.6036, + "step": 20497 + }, + { + "epoch": 3.27, + "learning_rate": 1.4186622557879004e-05, + "loss": 0.634, + "step": 20498 + }, + { + "epoch": 3.27, + "learning_rate": 1.4184296535863378e-05, + "loss": 0.697, + "step": 20499 + }, + { + "epoch": 3.27, + "learning_rate": 1.418197062902844e-05, + "loss": 0.653, + "step": 20500 + }, + { + "epoch": 3.27, + "learning_rate": 1.4179644837398948e-05, + "loss": 0.5977, + "step": 20501 + }, + { + "epoch": 3.27, + "learning_rate": 1.4177319160999663e-05, + "loss": 0.5741, + "step": 20502 + }, + { + "epoch": 3.27, + "learning_rate": 1.4174993599855364e-05, + "loss": 0.6202, + "step": 20503 + }, + { + "epoch": 3.27, + "learning_rate": 1.417266815399081e-05, + "loss": 0.6369, + "step": 20504 + }, + { + "epoch": 3.27, + "learning_rate": 1.4170342823430754e-05, + "loss": 0.6201, + "step": 20505 + }, + { + "epoch": 3.27, + "learning_rate": 1.4168017608199985e-05, + "loss": 0.6538, + "step": 20506 + }, + { + "epoch": 3.27, + "learning_rate": 1.4165692508323252e-05, + "loss": 0.6729, + "step": 20507 + }, + { + "epoch": 3.27, + "learning_rate": 1.4163367523825316e-05, + "loss": 0.608, + "step": 20508 + }, + { + "epoch": 3.27, + "learning_rate": 1.4161042654730936e-05, + "loss": 0.6611, + "step": 20509 + }, + { + "epoch": 3.27, + "learning_rate": 1.4158717901064873e-05, + "loss": 0.6436, + "step": 20510 + }, + { + "epoch": 3.27, + "learning_rate": 1.4156393262851869e-05, + "loss": 0.6803, + "step": 20511 + }, + { + "epoch": 3.27, + "learning_rate": 1.4154068740116704e-05, + "loss": 0.6134, + "step": 20512 + }, + { + "epoch": 3.27, + "learning_rate": 1.4151744332884125e-05, + "loss": 0.6204, + "step": 20513 + }, + { + "epoch": 3.27, + "learning_rate": 1.414942004117889e-05, + "loss": 0.5966, + "step": 20514 + }, + { + "epoch": 3.27, + "learning_rate": 1.4147095865025736e-05, + "loss": 0.6253, + "step": 20515 + }, + { + "epoch": 3.27, + "learning_rate": 1.414477180444941e-05, + "loss": 0.6193, + "step": 20516 + }, + { + "epoch": 3.27, + "learning_rate": 1.4142447859474686e-05, + "loss": 0.7462, + "step": 20517 + }, + { + "epoch": 3.27, + "learning_rate": 1.41401240301263e-05, + "loss": 0.655, + "step": 20518 + }, + { + "epoch": 3.27, + "learning_rate": 1.4137800316429e-05, + "loss": 0.6487, + "step": 20519 + }, + { + "epoch": 3.27, + "learning_rate": 1.4135476718407531e-05, + "loss": 0.5801, + "step": 20520 + }, + { + "epoch": 3.27, + "learning_rate": 1.4133153236086638e-05, + "loss": 0.6152, + "step": 20521 + }, + { + "epoch": 3.27, + "learning_rate": 1.4130829869491058e-05, + "loss": 0.653, + "step": 20522 + }, + { + "epoch": 3.27, + "learning_rate": 1.4128506618645551e-05, + "loss": 0.5856, + "step": 20523 + }, + { + "epoch": 3.27, + "learning_rate": 1.4126183483574847e-05, + "loss": 0.6701, + "step": 20524 + }, + { + "epoch": 3.27, + "learning_rate": 1.412386046430369e-05, + "loss": 0.6047, + "step": 20525 + }, + { + "epoch": 3.27, + "learning_rate": 1.4121537560856813e-05, + "loss": 0.6076, + "step": 20526 + }, + { + "epoch": 3.27, + "learning_rate": 1.4119214773258956e-05, + "loss": 0.604, + "step": 20527 + }, + { + "epoch": 3.27, + "learning_rate": 1.4116892101534847e-05, + "loss": 0.6442, + "step": 20528 + }, + { + "epoch": 3.27, + "learning_rate": 1.4114569545709242e-05, + "loss": 0.6131, + "step": 20529 + }, + { + "epoch": 3.27, + "learning_rate": 1.4112247105806858e-05, + "loss": 0.6477, + "step": 20530 + }, + { + "epoch": 3.27, + "learning_rate": 1.4109924781852435e-05, + "loss": 0.6221, + "step": 20531 + }, + { + "epoch": 3.27, + "learning_rate": 1.4107602573870704e-05, + "loss": 0.65, + "step": 20532 + }, + { + "epoch": 3.27, + "learning_rate": 1.410528048188639e-05, + "loss": 0.5807, + "step": 20533 + }, + { + "epoch": 3.27, + "learning_rate": 1.4102958505924213e-05, + "loss": 0.6696, + "step": 20534 + }, + { + "epoch": 3.27, + "learning_rate": 1.4100636646008936e-05, + "loss": 0.579, + "step": 20535 + }, + { + "epoch": 3.27, + "learning_rate": 1.4098314902165246e-05, + "loss": 0.6549, + "step": 20536 + }, + { + "epoch": 3.27, + "learning_rate": 1.4095993274417888e-05, + "loss": 0.6801, + "step": 20537 + }, + { + "epoch": 3.27, + "learning_rate": 1.409367176279158e-05, + "loss": 0.6568, + "step": 20538 + }, + { + "epoch": 3.27, + "learning_rate": 1.4091350367311037e-05, + "loss": 0.6339, + "step": 20539 + }, + { + "epoch": 3.27, + "learning_rate": 1.4089029088000998e-05, + "loss": 0.6181, + "step": 20540 + }, + { + "epoch": 3.27, + "learning_rate": 1.4086707924886177e-05, + "loss": 0.6187, + "step": 20541 + }, + { + "epoch": 3.27, + "learning_rate": 1.408438687799129e-05, + "loss": 0.6213, + "step": 20542 + }, + { + "epoch": 3.27, + "learning_rate": 1.4082065947341058e-05, + "loss": 0.6062, + "step": 20543 + }, + { + "epoch": 3.27, + "learning_rate": 1.4079745132960193e-05, + "loss": 0.6546, + "step": 20544 + }, + { + "epoch": 3.27, + "learning_rate": 1.4077424434873404e-05, + "loss": 0.6912, + "step": 20545 + }, + { + "epoch": 3.27, + "learning_rate": 1.4075103853105423e-05, + "loss": 0.7232, + "step": 20546 + }, + { + "epoch": 3.27, + "learning_rate": 1.4072783387680955e-05, + "loss": 0.6301, + "step": 20547 + }, + { + "epoch": 3.27, + "learning_rate": 1.4070463038624703e-05, + "loss": 0.6471, + "step": 20548 + }, + { + "epoch": 3.27, + "learning_rate": 1.4068142805961394e-05, + "loss": 0.69, + "step": 20549 + }, + { + "epoch": 3.27, + "learning_rate": 1.4065822689715719e-05, + "loss": 0.6464, + "step": 20550 + }, + { + "epoch": 3.27, + "learning_rate": 1.4063502689912388e-05, + "loss": 0.6075, + "step": 20551 + }, + { + "epoch": 3.27, + "learning_rate": 1.4061182806576124e-05, + "loss": 0.6957, + "step": 20552 + }, + { + "epoch": 3.27, + "learning_rate": 1.4058863039731618e-05, + "loss": 0.6453, + "step": 20553 + }, + { + "epoch": 3.27, + "learning_rate": 1.4056543389403581e-05, + "loss": 0.5999, + "step": 20554 + }, + { + "epoch": 3.27, + "learning_rate": 1.4054223855616725e-05, + "loss": 0.6507, + "step": 20555 + }, + { + "epoch": 3.27, + "learning_rate": 1.4051904438395716e-05, + "loss": 0.601, + "step": 20556 + }, + { + "epoch": 3.27, + "learning_rate": 1.404958513776529e-05, + "loss": 0.5824, + "step": 20557 + }, + { + "epoch": 3.28, + "learning_rate": 1.4047265953750133e-05, + "loss": 0.6404, + "step": 20558 + }, + { + "epoch": 3.28, + "learning_rate": 1.4044946886374943e-05, + "loss": 0.648, + "step": 20559 + }, + { + "epoch": 3.28, + "learning_rate": 1.404262793566442e-05, + "loss": 0.6456, + "step": 20560 + }, + { + "epoch": 3.28, + "learning_rate": 1.4040309101643257e-05, + "loss": 0.5785, + "step": 20561 + }, + { + "epoch": 3.28, + "learning_rate": 1.4037990384336139e-05, + "loss": 0.6226, + "step": 20562 + }, + { + "epoch": 3.28, + "learning_rate": 1.4035671783767776e-05, + "loss": 0.6233, + "step": 20563 + }, + { + "epoch": 3.28, + "learning_rate": 1.4033353299962854e-05, + "loss": 0.5833, + "step": 20564 + }, + { + "epoch": 3.28, + "learning_rate": 1.4031034932946057e-05, + "loss": 0.6142, + "step": 20565 + }, + { + "epoch": 3.28, + "learning_rate": 1.4028716682742083e-05, + "loss": 0.5981, + "step": 20566 + }, + { + "epoch": 3.28, + "learning_rate": 1.4026398549375613e-05, + "loss": 0.7201, + "step": 20567 + }, + { + "epoch": 3.28, + "learning_rate": 1.4024080532871331e-05, + "loss": 0.641, + "step": 20568 + }, + { + "epoch": 3.28, + "learning_rate": 1.4021762633253932e-05, + "loss": 0.5766, + "step": 20569 + }, + { + "epoch": 3.28, + "learning_rate": 1.4019444850548097e-05, + "loss": 0.5845, + "step": 20570 + }, + { + "epoch": 3.28, + "learning_rate": 1.4017127184778512e-05, + "loss": 0.6392, + "step": 20571 + }, + { + "epoch": 3.28, + "learning_rate": 1.401480963596985e-05, + "loss": 0.6015, + "step": 20572 + }, + { + "epoch": 3.28, + "learning_rate": 1.4012492204146799e-05, + "loss": 0.628, + "step": 20573 + }, + { + "epoch": 3.28, + "learning_rate": 1.4010174889334033e-05, + "loss": 0.6789, + "step": 20574 + }, + { + "epoch": 3.28, + "learning_rate": 1.400785769155623e-05, + "loss": 0.6286, + "step": 20575 + }, + { + "epoch": 3.28, + "learning_rate": 1.4005540610838075e-05, + "loss": 0.601, + "step": 20576 + }, + { + "epoch": 3.28, + "learning_rate": 1.4003223647204234e-05, + "loss": 0.7075, + "step": 20577 + }, + { + "epoch": 3.28, + "learning_rate": 1.4000906800679386e-05, + "loss": 0.6641, + "step": 20578 + }, + { + "epoch": 3.28, + "learning_rate": 1.3998590071288193e-05, + "loss": 0.6694, + "step": 20579 + }, + { + "epoch": 3.28, + "learning_rate": 1.3996273459055348e-05, + "loss": 0.6486, + "step": 20580 + }, + { + "epoch": 3.28, + "learning_rate": 1.3993956964005505e-05, + "loss": 0.6087, + "step": 20581 + }, + { + "epoch": 3.28, + "learning_rate": 1.3991640586163343e-05, + "loss": 0.6109, + "step": 20582 + }, + { + "epoch": 3.28, + "learning_rate": 1.3989324325553526e-05, + "loss": 0.63, + "step": 20583 + }, + { + "epoch": 3.28, + "learning_rate": 1.3987008182200717e-05, + "loss": 0.6196, + "step": 20584 + }, + { + "epoch": 3.28, + "learning_rate": 1.3984692156129575e-05, + "loss": 0.6198, + "step": 20585 + }, + { + "epoch": 3.28, + "learning_rate": 1.3982376247364788e-05, + "loss": 0.5933, + "step": 20586 + }, + { + "epoch": 3.28, + "learning_rate": 1.3980060455930999e-05, + "loss": 0.6733, + "step": 20587 + }, + { + "epoch": 3.28, + "learning_rate": 1.3977744781852881e-05, + "loss": 0.5782, + "step": 20588 + }, + { + "epoch": 3.28, + "learning_rate": 1.3975429225155087e-05, + "loss": 0.6277, + "step": 20589 + }, + { + "epoch": 3.28, + "learning_rate": 1.397311378586228e-05, + "loss": 0.6321, + "step": 20590 + }, + { + "epoch": 3.28, + "learning_rate": 1.3970798463999114e-05, + "loss": 0.6073, + "step": 20591 + }, + { + "epoch": 3.28, + "learning_rate": 1.396848325959024e-05, + "loss": 0.6076, + "step": 20592 + }, + { + "epoch": 3.28, + "learning_rate": 1.3966168172660332e-05, + "loss": 0.5922, + "step": 20593 + }, + { + "epoch": 3.28, + "learning_rate": 1.3963853203234034e-05, + "loss": 0.6914, + "step": 20594 + }, + { + "epoch": 3.28, + "learning_rate": 1.3961538351336006e-05, + "loss": 0.6686, + "step": 20595 + }, + { + "epoch": 3.28, + "learning_rate": 1.3959223616990879e-05, + "loss": 0.6735, + "step": 20596 + }, + { + "epoch": 3.28, + "learning_rate": 1.3956909000223323e-05, + "loss": 0.6212, + "step": 20597 + }, + { + "epoch": 3.28, + "learning_rate": 1.3954594501057983e-05, + "loss": 0.6659, + "step": 20598 + }, + { + "epoch": 3.28, + "learning_rate": 1.3952280119519507e-05, + "loss": 0.6857, + "step": 20599 + }, + { + "epoch": 3.28, + "learning_rate": 1.394996585563254e-05, + "loss": 0.6579, + "step": 20600 + }, + { + "epoch": 3.28, + "learning_rate": 1.3947651709421728e-05, + "loss": 0.7522, + "step": 20601 + }, + { + "epoch": 3.28, + "learning_rate": 1.3945337680911714e-05, + "loss": 0.6207, + "step": 20602 + }, + { + "epoch": 3.28, + "learning_rate": 1.3943023770127134e-05, + "loss": 0.6691, + "step": 20603 + }, + { + "epoch": 3.28, + "learning_rate": 1.3940709977092645e-05, + "loss": 0.6227, + "step": 20604 + }, + { + "epoch": 3.28, + "learning_rate": 1.3938396301832882e-05, + "loss": 0.639, + "step": 20605 + }, + { + "epoch": 3.28, + "learning_rate": 1.3936082744372484e-05, + "loss": 0.6379, + "step": 20606 + }, + { + "epoch": 3.28, + "learning_rate": 1.3933769304736085e-05, + "loss": 0.6296, + "step": 20607 + }, + { + "epoch": 3.28, + "learning_rate": 1.3931455982948327e-05, + "loss": 0.6147, + "step": 20608 + }, + { + "epoch": 3.28, + "learning_rate": 1.3929142779033832e-05, + "loss": 0.6434, + "step": 20609 + }, + { + "epoch": 3.28, + "learning_rate": 1.3926829693017258e-05, + "loss": 0.6508, + "step": 20610 + }, + { + "epoch": 3.28, + "learning_rate": 1.392451672492322e-05, + "loss": 0.6445, + "step": 20611 + }, + { + "epoch": 3.28, + "learning_rate": 1.3922203874776358e-05, + "loss": 0.549, + "step": 20612 + }, + { + "epoch": 3.28, + "learning_rate": 1.3919891142601296e-05, + "loss": 0.6252, + "step": 20613 + }, + { + "epoch": 3.28, + "learning_rate": 1.391757852842267e-05, + "loss": 0.6335, + "step": 20614 + }, + { + "epoch": 3.28, + "learning_rate": 1.39152660322651e-05, + "loss": 0.6106, + "step": 20615 + }, + { + "epoch": 3.28, + "learning_rate": 1.3912953654153222e-05, + "loss": 0.6498, + "step": 20616 + }, + { + "epoch": 3.28, + "learning_rate": 1.3910641394111656e-05, + "loss": 0.674, + "step": 20617 + }, + { + "epoch": 3.28, + "learning_rate": 1.3908329252165025e-05, + "loss": 0.6112, + "step": 20618 + }, + { + "epoch": 3.28, + "learning_rate": 1.3906017228337953e-05, + "loss": 0.6309, + "step": 20619 + }, + { + "epoch": 3.29, + "learning_rate": 1.3903705322655052e-05, + "loss": 0.6891, + "step": 20620 + }, + { + "epoch": 3.29, + "learning_rate": 1.3901393535140967e-05, + "loss": 0.6208, + "step": 20621 + }, + { + "epoch": 3.29, + "learning_rate": 1.3899081865820296e-05, + "loss": 0.6424, + "step": 20622 + }, + { + "epoch": 3.29, + "learning_rate": 1.389677031471767e-05, + "loss": 0.6676, + "step": 20623 + }, + { + "epoch": 3.29, + "learning_rate": 1.3894458881857694e-05, + "loss": 0.5885, + "step": 20624 + }, + { + "epoch": 3.29, + "learning_rate": 1.389214756726499e-05, + "loss": 0.6076, + "step": 20625 + }, + { + "epoch": 3.29, + "learning_rate": 1.3889836370964163e-05, + "loss": 0.804, + "step": 20626 + }, + { + "epoch": 3.29, + "learning_rate": 1.3887525292979842e-05, + "loss": 0.6294, + "step": 20627 + }, + { + "epoch": 3.29, + "learning_rate": 1.388521433333663e-05, + "loss": 0.6126, + "step": 20628 + }, + { + "epoch": 3.29, + "learning_rate": 1.3882903492059137e-05, + "loss": 0.6047, + "step": 20629 + }, + { + "epoch": 3.29, + "learning_rate": 1.3880592769171973e-05, + "loss": 0.6296, + "step": 20630 + }, + { + "epoch": 3.29, + "learning_rate": 1.3878282164699746e-05, + "loss": 0.5899, + "step": 20631 + }, + { + "epoch": 3.29, + "learning_rate": 1.387597167866705e-05, + "loss": 0.6203, + "step": 20632 + }, + { + "epoch": 3.29, + "learning_rate": 1.3873661311098512e-05, + "loss": 0.5903, + "step": 20633 + }, + { + "epoch": 3.29, + "learning_rate": 1.3871351062018727e-05, + "loss": 0.6362, + "step": 20634 + }, + { + "epoch": 3.29, + "learning_rate": 1.3869040931452298e-05, + "loss": 0.6003, + "step": 20635 + }, + { + "epoch": 3.29, + "learning_rate": 1.3866730919423835e-05, + "loss": 0.6016, + "step": 20636 + }, + { + "epoch": 3.29, + "learning_rate": 1.3864421025957907e-05, + "loss": 0.6101, + "step": 20637 + }, + { + "epoch": 3.29, + "learning_rate": 1.3862111251079147e-05, + "loss": 0.6443, + "step": 20638 + }, + { + "epoch": 3.29, + "learning_rate": 1.3859801594812136e-05, + "loss": 0.7074, + "step": 20639 + }, + { + "epoch": 3.29, + "learning_rate": 1.3857492057181475e-05, + "loss": 0.6573, + "step": 20640 + }, + { + "epoch": 3.29, + "learning_rate": 1.3855182638211761e-05, + "loss": 0.6, + "step": 20641 + }, + { + "epoch": 3.29, + "learning_rate": 1.3852873337927586e-05, + "loss": 0.6163, + "step": 20642 + }, + { + "epoch": 3.29, + "learning_rate": 1.3850564156353535e-05, + "loss": 0.6475, + "step": 20643 + }, + { + "epoch": 3.29, + "learning_rate": 1.3848255093514212e-05, + "loss": 0.6126, + "step": 20644 + }, + { + "epoch": 3.29, + "learning_rate": 1.3845946149434203e-05, + "loss": 0.5976, + "step": 20645 + }, + { + "epoch": 3.29, + "learning_rate": 1.3843637324138098e-05, + "loss": 0.6163, + "step": 20646 + }, + { + "epoch": 3.29, + "learning_rate": 1.3841328617650478e-05, + "loss": 0.6228, + "step": 20647 + }, + { + "epoch": 3.29, + "learning_rate": 1.3839020029995936e-05, + "loss": 0.6412, + "step": 20648 + }, + { + "epoch": 3.29, + "learning_rate": 1.3836711561199044e-05, + "loss": 0.6507, + "step": 20649 + }, + { + "epoch": 3.29, + "learning_rate": 1.3834403211284407e-05, + "loss": 0.6519, + "step": 20650 + }, + { + "epoch": 3.29, + "learning_rate": 1.38320949802766e-05, + "loss": 0.614, + "step": 20651 + }, + { + "epoch": 3.29, + "learning_rate": 1.38297868682002e-05, + "loss": 0.6226, + "step": 20652 + }, + { + "epoch": 3.29, + "learning_rate": 1.3827478875079786e-05, + "loss": 0.6765, + "step": 20653 + }, + { + "epoch": 3.29, + "learning_rate": 1.382517100093994e-05, + "loss": 0.6537, + "step": 20654 + }, + { + "epoch": 3.29, + "learning_rate": 1.3822863245805229e-05, + "loss": 0.6014, + "step": 20655 + }, + { + "epoch": 3.29, + "learning_rate": 1.3820555609700256e-05, + "loss": 0.6814, + "step": 20656 + }, + { + "epoch": 3.29, + "learning_rate": 1.3818248092649572e-05, + "loss": 0.6256, + "step": 20657 + }, + { + "epoch": 3.29, + "learning_rate": 1.3815940694677753e-05, + "loss": 0.6516, + "step": 20658 + }, + { + "epoch": 3.29, + "learning_rate": 1.381363341580938e-05, + "loss": 0.5979, + "step": 20659 + }, + { + "epoch": 3.29, + "learning_rate": 1.3811326256069008e-05, + "loss": 0.6634, + "step": 20660 + }, + { + "epoch": 3.29, + "learning_rate": 1.3809019215481228e-05, + "loss": 0.6432, + "step": 20661 + }, + { + "epoch": 3.29, + "learning_rate": 1.38067122940706e-05, + "loss": 0.6603, + "step": 20662 + }, + { + "epoch": 3.29, + "learning_rate": 1.3804405491861688e-05, + "loss": 0.6785, + "step": 20663 + }, + { + "epoch": 3.29, + "learning_rate": 1.3802098808879065e-05, + "loss": 0.6859, + "step": 20664 + }, + { + "epoch": 3.29, + "learning_rate": 1.3799792245147289e-05, + "loss": 0.7242, + "step": 20665 + }, + { + "epoch": 3.29, + "learning_rate": 1.3797485800690916e-05, + "loss": 0.6597, + "step": 20666 + }, + { + "epoch": 3.29, + "learning_rate": 1.3795179475534526e-05, + "loss": 0.6208, + "step": 20667 + }, + { + "epoch": 3.29, + "learning_rate": 1.3792873269702674e-05, + "loss": 0.6179, + "step": 20668 + }, + { + "epoch": 3.29, + "learning_rate": 1.379056718321992e-05, + "loss": 0.6388, + "step": 20669 + }, + { + "epoch": 3.29, + "learning_rate": 1.3788261216110815e-05, + "loss": 0.6344, + "step": 20670 + }, + { + "epoch": 3.29, + "learning_rate": 1.3785955368399925e-05, + "loss": 0.5952, + "step": 20671 + }, + { + "epoch": 3.29, + "learning_rate": 1.3783649640111789e-05, + "loss": 0.6298, + "step": 20672 + }, + { + "epoch": 3.29, + "learning_rate": 1.3781344031270988e-05, + "loss": 0.6754, + "step": 20673 + }, + { + "epoch": 3.29, + "learning_rate": 1.3779038541902063e-05, + "loss": 0.6524, + "step": 20674 + }, + { + "epoch": 3.29, + "learning_rate": 1.377673317202956e-05, + "loss": 0.6435, + "step": 20675 + }, + { + "epoch": 3.29, + "learning_rate": 1.3774427921678046e-05, + "loss": 0.5734, + "step": 20676 + }, + { + "epoch": 3.29, + "learning_rate": 1.3772122790872044e-05, + "loss": 0.6438, + "step": 20677 + }, + { + "epoch": 3.29, + "learning_rate": 1.3769817779636124e-05, + "loss": 0.6871, + "step": 20678 + }, + { + "epoch": 3.29, + "learning_rate": 1.3767512887994826e-05, + "loss": 0.6011, + "step": 20679 + }, + { + "epoch": 3.29, + "learning_rate": 1.37652081159727e-05, + "loss": 0.63, + "step": 20680 + }, + { + "epoch": 3.29, + "learning_rate": 1.3762903463594285e-05, + "loss": 0.5865, + "step": 20681 + }, + { + "epoch": 3.29, + "learning_rate": 1.3760598930884121e-05, + "loss": 0.6252, + "step": 20682 + }, + { + "epoch": 3.3, + "learning_rate": 1.3758294517866749e-05, + "loss": 0.6236, + "step": 20683 + }, + { + "epoch": 3.3, + "learning_rate": 1.3755990224566728e-05, + "loss": 0.6161, + "step": 20684 + }, + { + "epoch": 3.3, + "learning_rate": 1.375368605100858e-05, + "loss": 0.5922, + "step": 20685 + }, + { + "epoch": 3.3, + "learning_rate": 1.375138199721685e-05, + "loss": 0.6155, + "step": 20686 + }, + { + "epoch": 3.3, + "learning_rate": 1.3749078063216075e-05, + "loss": 0.6388, + "step": 20687 + }, + { + "epoch": 3.3, + "learning_rate": 1.3746774249030786e-05, + "loss": 0.6297, + "step": 20688 + }, + { + "epoch": 3.3, + "learning_rate": 1.3744470554685507e-05, + "loss": 0.619, + "step": 20689 + }, + { + "epoch": 3.3, + "learning_rate": 1.3742166980204796e-05, + "loss": 0.6016, + "step": 20690 + }, + { + "epoch": 3.3, + "learning_rate": 1.3739863525613169e-05, + "loss": 0.5851, + "step": 20691 + }, + { + "epoch": 3.3, + "learning_rate": 1.3737560190935161e-05, + "loss": 0.6819, + "step": 20692 + }, + { + "epoch": 3.3, + "learning_rate": 1.3735256976195299e-05, + "loss": 0.6085, + "step": 20693 + }, + { + "epoch": 3.3, + "learning_rate": 1.3732953881418114e-05, + "loss": 0.6358, + "step": 20694 + }, + { + "epoch": 3.3, + "learning_rate": 1.3730650906628126e-05, + "loss": 0.6343, + "step": 20695 + }, + { + "epoch": 3.3, + "learning_rate": 1.3728348051849865e-05, + "loss": 0.5976, + "step": 20696 + }, + { + "epoch": 3.3, + "learning_rate": 1.3726045317107855e-05, + "loss": 0.6825, + "step": 20697 + }, + { + "epoch": 3.3, + "learning_rate": 1.3723742702426617e-05, + "loss": 0.5678, + "step": 20698 + }, + { + "epoch": 3.3, + "learning_rate": 1.3721440207830674e-05, + "loss": 0.6321, + "step": 20699 + }, + { + "epoch": 3.3, + "learning_rate": 1.3719137833344536e-05, + "loss": 0.6318, + "step": 20700 + }, + { + "epoch": 3.3, + "learning_rate": 1.3716835578992737e-05, + "loss": 0.672, + "step": 20701 + }, + { + "epoch": 3.3, + "learning_rate": 1.3714533444799793e-05, + "loss": 0.6174, + "step": 20702 + }, + { + "epoch": 3.3, + "learning_rate": 1.3712231430790217e-05, + "loss": 0.6538, + "step": 20703 + }, + { + "epoch": 3.3, + "learning_rate": 1.3709929536988519e-05, + "loss": 0.617, + "step": 20704 + }, + { + "epoch": 3.3, + "learning_rate": 1.3707627763419223e-05, + "loss": 0.7035, + "step": 20705 + }, + { + "epoch": 3.3, + "learning_rate": 1.3705326110106831e-05, + "loss": 0.6053, + "step": 20706 + }, + { + "epoch": 3.3, + "learning_rate": 1.3703024577075852e-05, + "loss": 0.5846, + "step": 20707 + }, + { + "epoch": 3.3, + "learning_rate": 1.370072316435081e-05, + "loss": 0.6499, + "step": 20708 + }, + { + "epoch": 3.3, + "learning_rate": 1.3698421871956207e-05, + "loss": 0.6396, + "step": 20709 + }, + { + "epoch": 3.3, + "learning_rate": 1.3696120699916549e-05, + "loss": 0.6996, + "step": 20710 + }, + { + "epoch": 3.3, + "learning_rate": 1.3693819648256345e-05, + "loss": 0.6251, + "step": 20711 + }, + { + "epoch": 3.3, + "learning_rate": 1.3691518717000095e-05, + "loss": 0.6897, + "step": 20712 + }, + { + "epoch": 3.3, + "learning_rate": 1.3689217906172297e-05, + "loss": 0.6562, + "step": 20713 + }, + { + "epoch": 3.3, + "learning_rate": 1.368691721579747e-05, + "loss": 0.5797, + "step": 20714 + }, + { + "epoch": 3.3, + "learning_rate": 1.3684616645900105e-05, + "loss": 0.6223, + "step": 20715 + }, + { + "epoch": 3.3, + "learning_rate": 1.3682316196504718e-05, + "loss": 0.6362, + "step": 20716 + }, + { + "epoch": 3.3, + "learning_rate": 1.3680015867635779e-05, + "loss": 0.6056, + "step": 20717 + }, + { + "epoch": 3.3, + "learning_rate": 1.3677715659317789e-05, + "loss": 0.5878, + "step": 20718 + }, + { + "epoch": 3.3, + "learning_rate": 1.3675415571575262e-05, + "loss": 0.6484, + "step": 20719 + }, + { + "epoch": 3.3, + "learning_rate": 1.3673115604432685e-05, + "loss": 0.5826, + "step": 20720 + }, + { + "epoch": 3.3, + "learning_rate": 1.3670815757914552e-05, + "loss": 0.6204, + "step": 20721 + }, + { + "epoch": 3.3, + "learning_rate": 1.3668516032045347e-05, + "loss": 0.6361, + "step": 20722 + }, + { + "epoch": 3.3, + "learning_rate": 1.3666216426849574e-05, + "loss": 0.6493, + "step": 20723 + }, + { + "epoch": 3.3, + "learning_rate": 1.3663916942351701e-05, + "loss": 0.6884, + "step": 20724 + }, + { + "epoch": 3.3, + "learning_rate": 1.366161757857624e-05, + "loss": 0.606, + "step": 20725 + }, + { + "epoch": 3.3, + "learning_rate": 1.3659318335547671e-05, + "loss": 0.6074, + "step": 20726 + }, + { + "epoch": 3.3, + "learning_rate": 1.3657019213290476e-05, + "loss": 0.5968, + "step": 20727 + }, + { + "epoch": 3.3, + "learning_rate": 1.3654720211829136e-05, + "loss": 0.5981, + "step": 20728 + }, + { + "epoch": 3.3, + "learning_rate": 1.3652421331188142e-05, + "loss": 0.6592, + "step": 20729 + }, + { + "epoch": 3.3, + "learning_rate": 1.365012257139196e-05, + "loss": 0.6438, + "step": 20730 + }, + { + "epoch": 3.3, + "learning_rate": 1.3647823932465093e-05, + "loss": 0.6883, + "step": 20731 + }, + { + "epoch": 3.3, + "learning_rate": 1.3645525414432008e-05, + "loss": 0.7057, + "step": 20732 + }, + { + "epoch": 3.3, + "learning_rate": 1.3643227017317184e-05, + "loss": 0.6765, + "step": 20733 + }, + { + "epoch": 3.3, + "learning_rate": 1.3640928741145098e-05, + "loss": 0.649, + "step": 20734 + }, + { + "epoch": 3.3, + "learning_rate": 1.3638630585940226e-05, + "loss": 0.6304, + "step": 20735 + }, + { + "epoch": 3.3, + "learning_rate": 1.3636332551727027e-05, + "loss": 0.6381, + "step": 20736 + }, + { + "epoch": 3.3, + "learning_rate": 1.3634034638530007e-05, + "loss": 0.6273, + "step": 20737 + }, + { + "epoch": 3.3, + "learning_rate": 1.363173684637361e-05, + "loss": 0.6506, + "step": 20738 + }, + { + "epoch": 3.3, + "learning_rate": 1.3629439175282315e-05, + "loss": 0.6593, + "step": 20739 + }, + { + "epoch": 3.3, + "learning_rate": 1.3627141625280586e-05, + "loss": 0.7039, + "step": 20740 + }, + { + "epoch": 3.3, + "learning_rate": 1.3624844196392889e-05, + "loss": 0.6331, + "step": 20741 + }, + { + "epoch": 3.3, + "learning_rate": 1.36225468886437e-05, + "loss": 0.6134, + "step": 20742 + }, + { + "epoch": 3.3, + "learning_rate": 1.3620249702057483e-05, + "loss": 0.6963, + "step": 20743 + }, + { + "epoch": 3.3, + "learning_rate": 1.3617952636658699e-05, + "loss": 0.7297, + "step": 20744 + }, + { + "epoch": 3.3, + "learning_rate": 1.3615655692471804e-05, + "loss": 0.6369, + "step": 20745 + }, + { + "epoch": 3.31, + "learning_rate": 1.3613358869521266e-05, + "loss": 0.5729, + "step": 20746 + }, + { + "epoch": 3.31, + "learning_rate": 1.3611062167831534e-05, + "loss": 0.6674, + "step": 20747 + }, + { + "epoch": 3.31, + "learning_rate": 1.3608765587427087e-05, + "loss": 0.583, + "step": 20748 + }, + { + "epoch": 3.31, + "learning_rate": 1.3606469128332369e-05, + "loss": 0.6649, + "step": 20749 + }, + { + "epoch": 3.31, + "learning_rate": 1.360417279057184e-05, + "loss": 0.665, + "step": 20750 + }, + { + "epoch": 3.31, + "learning_rate": 1.3601876574169947e-05, + "loss": 0.6332, + "step": 20751 + }, + { + "epoch": 3.31, + "learning_rate": 1.3599580479151152e-05, + "loss": 0.6315, + "step": 20752 + }, + { + "epoch": 3.31, + "learning_rate": 1.3597284505539892e-05, + "loss": 0.6381, + "step": 20753 + }, + { + "epoch": 3.31, + "learning_rate": 1.3594988653360641e-05, + "loss": 0.63, + "step": 20754 + }, + { + "epoch": 3.31, + "learning_rate": 1.3592692922637837e-05, + "loss": 0.6494, + "step": 20755 + }, + { + "epoch": 3.31, + "learning_rate": 1.3590397313395925e-05, + "loss": 0.5739, + "step": 20756 + }, + { + "epoch": 3.31, + "learning_rate": 1.3588101825659366e-05, + "loss": 0.5968, + "step": 20757 + }, + { + "epoch": 3.31, + "learning_rate": 1.3585806459452578e-05, + "loss": 0.6144, + "step": 20758 + }, + { + "epoch": 3.31, + "learning_rate": 1.3583511214800027e-05, + "loss": 0.6835, + "step": 20759 + }, + { + "epoch": 3.31, + "learning_rate": 1.3581216091726152e-05, + "loss": 0.5728, + "step": 20760 + }, + { + "epoch": 3.31, + "learning_rate": 1.3578921090255391e-05, + "loss": 0.6209, + "step": 20761 + }, + { + "epoch": 3.31, + "learning_rate": 1.357662621041219e-05, + "loss": 0.6325, + "step": 20762 + }, + { + "epoch": 3.31, + "learning_rate": 1.3574331452220985e-05, + "loss": 0.6227, + "step": 20763 + }, + { + "epoch": 3.31, + "learning_rate": 1.35720368157062e-05, + "loss": 0.6658, + "step": 20764 + }, + { + "epoch": 3.31, + "learning_rate": 1.3569742300892296e-05, + "loss": 0.6277, + "step": 20765 + }, + { + "epoch": 3.31, + "learning_rate": 1.3567447907803695e-05, + "loss": 0.639, + "step": 20766 + }, + { + "epoch": 3.31, + "learning_rate": 1.3565153636464836e-05, + "loss": 0.59, + "step": 20767 + }, + { + "epoch": 3.31, + "learning_rate": 1.3562859486900149e-05, + "loss": 0.6313, + "step": 20768 + }, + { + "epoch": 3.31, + "learning_rate": 1.3560565459134061e-05, + "loss": 0.6541, + "step": 20769 + }, + { + "epoch": 3.31, + "learning_rate": 1.3558271553190998e-05, + "loss": 0.6836, + "step": 20770 + }, + { + "epoch": 3.31, + "learning_rate": 1.3555977769095407e-05, + "loss": 0.6424, + "step": 20771 + }, + { + "epoch": 3.31, + "learning_rate": 1.3553684106871705e-05, + "loss": 0.6513, + "step": 20772 + }, + { + "epoch": 3.31, + "learning_rate": 1.3551390566544317e-05, + "loss": 0.6331, + "step": 20773 + }, + { + "epoch": 3.31, + "learning_rate": 1.3549097148137669e-05, + "loss": 0.6511, + "step": 20774 + }, + { + "epoch": 3.31, + "learning_rate": 1.3546803851676184e-05, + "loss": 0.5838, + "step": 20775 + }, + { + "epoch": 3.31, + "learning_rate": 1.3544510677184274e-05, + "loss": 0.6213, + "step": 20776 + }, + { + "epoch": 3.31, + "learning_rate": 1.3542217624686392e-05, + "loss": 0.6348, + "step": 20777 + }, + { + "epoch": 3.31, + "learning_rate": 1.3539924694206929e-05, + "loss": 0.6313, + "step": 20778 + }, + { + "epoch": 3.31, + "learning_rate": 1.3537631885770307e-05, + "loss": 0.6278, + "step": 20779 + }, + { + "epoch": 3.31, + "learning_rate": 1.3535339199400948e-05, + "loss": 0.6144, + "step": 20780 + }, + { + "epoch": 3.31, + "learning_rate": 1.3533046635123254e-05, + "loss": 0.6283, + "step": 20781 + }, + { + "epoch": 3.31, + "learning_rate": 1.3530754192961661e-05, + "loss": 0.6737, + "step": 20782 + }, + { + "epoch": 3.31, + "learning_rate": 1.3528461872940572e-05, + "loss": 0.5985, + "step": 20783 + }, + { + "epoch": 3.31, + "learning_rate": 1.3526169675084405e-05, + "loss": 0.6342, + "step": 20784 + }, + { + "epoch": 3.31, + "learning_rate": 1.3523877599417561e-05, + "loss": 0.6129, + "step": 20785 + }, + { + "epoch": 3.31, + "learning_rate": 1.3521585645964454e-05, + "loss": 0.6608, + "step": 20786 + }, + { + "epoch": 3.31, + "learning_rate": 1.3519293814749481e-05, + "loss": 0.6863, + "step": 20787 + }, + { + "epoch": 3.31, + "learning_rate": 1.351700210579707e-05, + "loss": 0.638, + "step": 20788 + }, + { + "epoch": 3.31, + "learning_rate": 1.3514710519131613e-05, + "loss": 0.6038, + "step": 20789 + }, + { + "epoch": 3.31, + "learning_rate": 1.3512419054777517e-05, + "loss": 0.6121, + "step": 20790 + }, + { + "epoch": 3.31, + "learning_rate": 1.3510127712759186e-05, + "loss": 0.6213, + "step": 20791 + }, + { + "epoch": 3.31, + "learning_rate": 1.3507836493101018e-05, + "loss": 0.6831, + "step": 20792 + }, + { + "epoch": 3.31, + "learning_rate": 1.3505545395827401e-05, + "loss": 0.6345, + "step": 20793 + }, + { + "epoch": 3.31, + "learning_rate": 1.3503254420962758e-05, + "loss": 0.63, + "step": 20794 + }, + { + "epoch": 3.31, + "learning_rate": 1.3500963568531478e-05, + "loss": 0.6348, + "step": 20795 + }, + { + "epoch": 3.31, + "learning_rate": 1.3498672838557952e-05, + "loss": 0.6059, + "step": 20796 + }, + { + "epoch": 3.31, + "learning_rate": 1.349638223106659e-05, + "loss": 0.6391, + "step": 20797 + }, + { + "epoch": 3.31, + "learning_rate": 1.3494091746081755e-05, + "loss": 0.711, + "step": 20798 + }, + { + "epoch": 3.31, + "learning_rate": 1.3491801383627864e-05, + "loss": 0.6333, + "step": 20799 + }, + { + "epoch": 3.31, + "learning_rate": 1.3489511143729303e-05, + "loss": 0.6174, + "step": 20800 + }, + { + "epoch": 3.31, + "learning_rate": 1.348722102641046e-05, + "loss": 0.677, + "step": 20801 + }, + { + "epoch": 3.31, + "learning_rate": 1.3484931031695724e-05, + "loss": 0.7184, + "step": 20802 + }, + { + "epoch": 3.31, + "learning_rate": 1.348264115960948e-05, + "loss": 0.6066, + "step": 20803 + }, + { + "epoch": 3.31, + "learning_rate": 1.3480351410176106e-05, + "loss": 0.5772, + "step": 20804 + }, + { + "epoch": 3.31, + "learning_rate": 1.3478061783420004e-05, + "loss": 0.6794, + "step": 20805 + }, + { + "epoch": 3.31, + "learning_rate": 1.3475772279365551e-05, + "loss": 0.5886, + "step": 20806 + }, + { + "epoch": 3.31, + "learning_rate": 1.3473482898037126e-05, + "loss": 0.6199, + "step": 20807 + }, + { + "epoch": 3.31, + "learning_rate": 1.3471193639459112e-05, + "loss": 0.6452, + "step": 20808 + }, + { + "epoch": 3.32, + "learning_rate": 1.3468904503655882e-05, + "loss": 0.6962, + "step": 20809 + }, + { + "epoch": 3.32, + "learning_rate": 1.3466615490651812e-05, + "loss": 0.5628, + "step": 20810 + }, + { + "epoch": 3.32, + "learning_rate": 1.3464326600471294e-05, + "loss": 0.6299, + "step": 20811 + }, + { + "epoch": 3.32, + "learning_rate": 1.3462037833138697e-05, + "loss": 0.6347, + "step": 20812 + }, + { + "epoch": 3.32, + "learning_rate": 1.3459749188678388e-05, + "loss": 0.6578, + "step": 20813 + }, + { + "epoch": 3.32, + "learning_rate": 1.3457460667114747e-05, + "loss": 0.6108, + "step": 20814 + }, + { + "epoch": 3.32, + "learning_rate": 1.345517226847214e-05, + "loss": 0.5614, + "step": 20815 + }, + { + "epoch": 3.32, + "learning_rate": 1.345288399277494e-05, + "loss": 0.5525, + "step": 20816 + }, + { + "epoch": 3.32, + "learning_rate": 1.3450595840047513e-05, + "loss": 0.601, + "step": 20817 + }, + { + "epoch": 3.32, + "learning_rate": 1.3448307810314234e-05, + "loss": 0.5865, + "step": 20818 + }, + { + "epoch": 3.32, + "learning_rate": 1.3446019903599461e-05, + "loss": 0.6172, + "step": 20819 + }, + { + "epoch": 3.32, + "learning_rate": 1.344373211992756e-05, + "loss": 0.6231, + "step": 20820 + }, + { + "epoch": 3.32, + "learning_rate": 1.344144445932289e-05, + "loss": 0.6348, + "step": 20821 + }, + { + "epoch": 3.32, + "learning_rate": 1.3439156921809824e-05, + "loss": 0.6978, + "step": 20822 + }, + { + "epoch": 3.32, + "learning_rate": 1.3436869507412719e-05, + "loss": 0.6432, + "step": 20823 + }, + { + "epoch": 3.32, + "learning_rate": 1.3434582216155933e-05, + "loss": 0.6139, + "step": 20824 + }, + { + "epoch": 3.32, + "learning_rate": 1.3432295048063826e-05, + "loss": 0.705, + "step": 20825 + }, + { + "epoch": 3.32, + "learning_rate": 1.3430008003160755e-05, + "loss": 0.6336, + "step": 20826 + }, + { + "epoch": 3.32, + "learning_rate": 1.3427721081471072e-05, + "loss": 0.5899, + "step": 20827 + }, + { + "epoch": 3.32, + "learning_rate": 1.3425434283019129e-05, + "loss": 0.6335, + "step": 20828 + }, + { + "epoch": 3.32, + "learning_rate": 1.342314760782929e-05, + "loss": 0.6128, + "step": 20829 + }, + { + "epoch": 3.32, + "learning_rate": 1.3420861055925898e-05, + "loss": 0.63, + "step": 20830 + }, + { + "epoch": 3.32, + "learning_rate": 1.3418574627333308e-05, + "loss": 0.6007, + "step": 20831 + }, + { + "epoch": 3.32, + "learning_rate": 1.341628832207587e-05, + "loss": 0.6331, + "step": 20832 + }, + { + "epoch": 3.32, + "learning_rate": 1.3414002140177925e-05, + "loss": 0.658, + "step": 20833 + }, + { + "epoch": 3.32, + "learning_rate": 1.3411716081663816e-05, + "loss": 0.7001, + "step": 20834 + }, + { + "epoch": 3.32, + "learning_rate": 1.3409430146557902e-05, + "loss": 0.5963, + "step": 20835 + }, + { + "epoch": 3.32, + "learning_rate": 1.3407144334884523e-05, + "loss": 0.6509, + "step": 20836 + }, + { + "epoch": 3.32, + "learning_rate": 1.3404858646668028e-05, + "loss": 0.7236, + "step": 20837 + }, + { + "epoch": 3.32, + "learning_rate": 1.3402573081932737e-05, + "loss": 0.6108, + "step": 20838 + }, + { + "epoch": 3.32, + "learning_rate": 1.3400287640702991e-05, + "loss": 0.6238, + "step": 20839 + }, + { + "epoch": 3.32, + "learning_rate": 1.3398002323003153e-05, + "loss": 0.6715, + "step": 20840 + }, + { + "epoch": 3.32, + "learning_rate": 1.3395717128857543e-05, + "loss": 0.5929, + "step": 20841 + }, + { + "epoch": 3.32, + "learning_rate": 1.3393432058290506e-05, + "loss": 0.6929, + "step": 20842 + }, + { + "epoch": 3.32, + "learning_rate": 1.3391147111326365e-05, + "loss": 0.6642, + "step": 20843 + }, + { + "epoch": 3.32, + "learning_rate": 1.3388862287989461e-05, + "loss": 0.6127, + "step": 20844 + }, + { + "epoch": 3.32, + "learning_rate": 1.3386577588304116e-05, + "loss": 0.5789, + "step": 20845 + }, + { + "epoch": 3.32, + "learning_rate": 1.3384293012294679e-05, + "loss": 0.6056, + "step": 20846 + }, + { + "epoch": 3.32, + "learning_rate": 1.3382008559985467e-05, + "loss": 0.6444, + "step": 20847 + }, + { + "epoch": 3.32, + "learning_rate": 1.3379724231400815e-05, + "loss": 0.6354, + "step": 20848 + }, + { + "epoch": 3.32, + "learning_rate": 1.3377440026565041e-05, + "loss": 0.5987, + "step": 20849 + }, + { + "epoch": 3.32, + "learning_rate": 1.3375155945502474e-05, + "loss": 0.6667, + "step": 20850 + }, + { + "epoch": 3.32, + "learning_rate": 1.3372871988237428e-05, + "loss": 0.5906, + "step": 20851 + }, + { + "epoch": 3.32, + "learning_rate": 1.337058815479425e-05, + "loss": 0.587, + "step": 20852 + }, + { + "epoch": 3.32, + "learning_rate": 1.3368304445197244e-05, + "loss": 0.5885, + "step": 20853 + }, + { + "epoch": 3.32, + "learning_rate": 1.3366020859470737e-05, + "loss": 0.6368, + "step": 20854 + }, + { + "epoch": 3.32, + "learning_rate": 1.3363737397639043e-05, + "loss": 0.6461, + "step": 20855 + }, + { + "epoch": 3.32, + "learning_rate": 1.3361454059726483e-05, + "loss": 0.6302, + "step": 20856 + }, + { + "epoch": 3.32, + "learning_rate": 1.335917084575736e-05, + "loss": 0.6492, + "step": 20857 + }, + { + "epoch": 3.32, + "learning_rate": 1.335688775575602e-05, + "loss": 0.6452, + "step": 20858 + }, + { + "epoch": 3.32, + "learning_rate": 1.3354604789746747e-05, + "loss": 0.7391, + "step": 20859 + }, + { + "epoch": 3.32, + "learning_rate": 1.3352321947753862e-05, + "loss": 0.5981, + "step": 20860 + }, + { + "epoch": 3.32, + "learning_rate": 1.3350039229801681e-05, + "loss": 0.6248, + "step": 20861 + }, + { + "epoch": 3.32, + "learning_rate": 1.3347756635914494e-05, + "loss": 0.6925, + "step": 20862 + }, + { + "epoch": 3.32, + "learning_rate": 1.3345474166116637e-05, + "loss": 0.6558, + "step": 20863 + }, + { + "epoch": 3.32, + "learning_rate": 1.3343191820432404e-05, + "loss": 0.5793, + "step": 20864 + }, + { + "epoch": 3.32, + "learning_rate": 1.33409095988861e-05, + "loss": 0.5981, + "step": 20865 + }, + { + "epoch": 3.32, + "learning_rate": 1.3338627501502032e-05, + "loss": 0.669, + "step": 20866 + }, + { + "epoch": 3.32, + "learning_rate": 1.33363455283045e-05, + "loss": 0.6407, + "step": 20867 + }, + { + "epoch": 3.32, + "learning_rate": 1.3334063679317798e-05, + "loss": 0.6242, + "step": 20868 + }, + { + "epoch": 3.32, + "learning_rate": 1.3331781954566248e-05, + "loss": 0.6083, + "step": 20869 + }, + { + "epoch": 3.32, + "learning_rate": 1.3329500354074131e-05, + "loss": 0.6828, + "step": 20870 + }, + { + "epoch": 3.32, + "learning_rate": 1.3327218877865752e-05, + "loss": 0.6738, + "step": 20871 + }, + { + "epoch": 3.33, + "learning_rate": 1.3324937525965406e-05, + "loss": 0.6528, + "step": 20872 + }, + { + "epoch": 3.33, + "learning_rate": 1.3322656298397385e-05, + "loss": 0.6073, + "step": 20873 + }, + { + "epoch": 3.33, + "learning_rate": 1.3320375195185977e-05, + "loss": 0.6701, + "step": 20874 + }, + { + "epoch": 3.33, + "learning_rate": 1.3318094216355492e-05, + "loss": 0.5885, + "step": 20875 + }, + { + "epoch": 3.33, + "learning_rate": 1.3315813361930213e-05, + "loss": 0.6918, + "step": 20876 + }, + { + "epoch": 3.33, + "learning_rate": 1.3313532631934426e-05, + "loss": 0.5893, + "step": 20877 + }, + { + "epoch": 3.33, + "learning_rate": 1.331125202639243e-05, + "loss": 0.6867, + "step": 20878 + }, + { + "epoch": 3.33, + "learning_rate": 1.3308971545328487e-05, + "loss": 0.6757, + "step": 20879 + }, + { + "epoch": 3.33, + "learning_rate": 1.3306691188766906e-05, + "loss": 0.558, + "step": 20880 + }, + { + "epoch": 3.33, + "learning_rate": 1.3304410956731966e-05, + "loss": 0.5751, + "step": 20881 + }, + { + "epoch": 3.33, + "learning_rate": 1.3302130849247948e-05, + "loss": 0.6416, + "step": 20882 + }, + { + "epoch": 3.33, + "learning_rate": 1.3299850866339137e-05, + "loss": 0.7485, + "step": 20883 + }, + { + "epoch": 3.33, + "learning_rate": 1.3297571008029808e-05, + "loss": 0.6035, + "step": 20884 + }, + { + "epoch": 3.33, + "learning_rate": 1.3295291274344234e-05, + "loss": 0.6254, + "step": 20885 + }, + { + "epoch": 3.33, + "learning_rate": 1.3293011665306716e-05, + "loss": 0.5952, + "step": 20886 + }, + { + "epoch": 3.33, + "learning_rate": 1.3290732180941508e-05, + "loss": 0.6506, + "step": 20887 + }, + { + "epoch": 3.33, + "learning_rate": 1.32884528212729e-05, + "loss": 0.6177, + "step": 20888 + }, + { + "epoch": 3.33, + "learning_rate": 1.3286173586325157e-05, + "loss": 0.6674, + "step": 20889 + }, + { + "epoch": 3.33, + "learning_rate": 1.3283894476122555e-05, + "loss": 0.613, + "step": 20890 + }, + { + "epoch": 3.33, + "learning_rate": 1.3281615490689354e-05, + "loss": 0.6483, + "step": 20891 + }, + { + "epoch": 3.33, + "learning_rate": 1.3279336630049837e-05, + "loss": 0.637, + "step": 20892 + }, + { + "epoch": 3.33, + "learning_rate": 1.3277057894228276e-05, + "loss": 0.6326, + "step": 20893 + }, + { + "epoch": 3.33, + "learning_rate": 1.3274779283248928e-05, + "loss": 0.6178, + "step": 20894 + }, + { + "epoch": 3.33, + "learning_rate": 1.3272500797136066e-05, + "loss": 0.5787, + "step": 20895 + }, + { + "epoch": 3.33, + "learning_rate": 1.3270222435913948e-05, + "loss": 0.5981, + "step": 20896 + }, + { + "epoch": 3.33, + "learning_rate": 1.326794419960683e-05, + "loss": 0.6782, + "step": 20897 + }, + { + "epoch": 3.33, + "learning_rate": 1.3265666088239004e-05, + "loss": 0.6181, + "step": 20898 + }, + { + "epoch": 3.33, + "learning_rate": 1.32633881018347e-05, + "loss": 0.5885, + "step": 20899 + }, + { + "epoch": 3.33, + "learning_rate": 1.3261110240418184e-05, + "loss": 0.5769, + "step": 20900 + }, + { + "epoch": 3.33, + "learning_rate": 1.3258832504013721e-05, + "loss": 0.5944, + "step": 20901 + }, + { + "epoch": 3.33, + "learning_rate": 1.3256554892645553e-05, + "loss": 0.6076, + "step": 20902 + }, + { + "epoch": 3.33, + "learning_rate": 1.3254277406337956e-05, + "loss": 0.611, + "step": 20903 + }, + { + "epoch": 3.33, + "learning_rate": 1.325200004511517e-05, + "loss": 0.6458, + "step": 20904 + }, + { + "epoch": 3.33, + "learning_rate": 1.3249722809001456e-05, + "loss": 0.5873, + "step": 20905 + }, + { + "epoch": 3.33, + "learning_rate": 1.3247445698021057e-05, + "loss": 0.5814, + "step": 20906 + }, + { + "epoch": 3.33, + "learning_rate": 1.324516871219823e-05, + "loss": 0.6021, + "step": 20907 + }, + { + "epoch": 3.33, + "learning_rate": 1.3242891851557205e-05, + "loss": 0.6136, + "step": 20908 + }, + { + "epoch": 3.33, + "learning_rate": 1.3240615116122257e-05, + "loss": 0.5978, + "step": 20909 + }, + { + "epoch": 3.33, + "learning_rate": 1.323833850591762e-05, + "loss": 0.5301, + "step": 20910 + }, + { + "epoch": 3.33, + "learning_rate": 1.3236062020967533e-05, + "loss": 0.6961, + "step": 20911 + }, + { + "epoch": 3.33, + "learning_rate": 1.3233785661296244e-05, + "loss": 0.628, + "step": 20912 + }, + { + "epoch": 3.33, + "learning_rate": 1.3231509426927995e-05, + "loss": 0.5831, + "step": 20913 + }, + { + "epoch": 3.33, + "learning_rate": 1.3229233317887019e-05, + "loss": 0.5717, + "step": 20914 + }, + { + "epoch": 3.33, + "learning_rate": 1.322695733419757e-05, + "loss": 0.6861, + "step": 20915 + }, + { + "epoch": 3.33, + "learning_rate": 1.3224681475883876e-05, + "loss": 0.595, + "step": 20916 + }, + { + "epoch": 3.33, + "learning_rate": 1.3222405742970175e-05, + "loss": 0.6657, + "step": 20917 + }, + { + "epoch": 3.33, + "learning_rate": 1.3220130135480715e-05, + "loss": 0.6597, + "step": 20918 + }, + { + "epoch": 3.33, + "learning_rate": 1.3217854653439698e-05, + "loss": 0.6961, + "step": 20919 + }, + { + "epoch": 3.33, + "learning_rate": 1.3215579296871384e-05, + "loss": 0.5967, + "step": 20920 + }, + { + "epoch": 3.33, + "learning_rate": 1.3213304065799995e-05, + "loss": 0.6528, + "step": 20921 + }, + { + "epoch": 3.33, + "learning_rate": 1.3211028960249764e-05, + "loss": 0.6815, + "step": 20922 + }, + { + "epoch": 3.33, + "learning_rate": 1.3208753980244915e-05, + "loss": 0.6737, + "step": 20923 + }, + { + "epoch": 3.33, + "learning_rate": 1.3206479125809679e-05, + "loss": 0.5851, + "step": 20924 + }, + { + "epoch": 3.33, + "learning_rate": 1.320420439696827e-05, + "loss": 0.5898, + "step": 20925 + }, + { + "epoch": 3.33, + "learning_rate": 1.3201929793744933e-05, + "loss": 0.6388, + "step": 20926 + }, + { + "epoch": 3.33, + "learning_rate": 1.319965531616388e-05, + "loss": 0.6462, + "step": 20927 + }, + { + "epoch": 3.33, + "learning_rate": 1.3197380964249334e-05, + "loss": 0.607, + "step": 20928 + }, + { + "epoch": 3.33, + "learning_rate": 1.3195106738025517e-05, + "loss": 0.5784, + "step": 20929 + }, + { + "epoch": 3.33, + "learning_rate": 1.3192832637516645e-05, + "loss": 0.6311, + "step": 20930 + }, + { + "epoch": 3.33, + "learning_rate": 1.3190558662746939e-05, + "loss": 0.6724, + "step": 20931 + }, + { + "epoch": 3.33, + "learning_rate": 1.3188284813740598e-05, + "loss": 0.6003, + "step": 20932 + }, + { + "epoch": 3.33, + "learning_rate": 1.3186011090521868e-05, + "loss": 0.6472, + "step": 20933 + }, + { + "epoch": 3.34, + "learning_rate": 1.3183737493114945e-05, + "loss": 0.5828, + "step": 20934 + }, + { + "epoch": 3.34, + "learning_rate": 1.3181464021544043e-05, + "loss": 0.6375, + "step": 20935 + }, + { + "epoch": 3.34, + "learning_rate": 1.3179190675833377e-05, + "loss": 0.5916, + "step": 20936 + }, + { + "epoch": 3.34, + "learning_rate": 1.3176917456007154e-05, + "loss": 0.6832, + "step": 20937 + }, + { + "epoch": 3.34, + "learning_rate": 1.3174644362089578e-05, + "loss": 0.5824, + "step": 20938 + }, + { + "epoch": 3.34, + "learning_rate": 1.3172371394104866e-05, + "loss": 0.6746, + "step": 20939 + }, + { + "epoch": 3.34, + "learning_rate": 1.3170098552077214e-05, + "loss": 0.6047, + "step": 20940 + }, + { + "epoch": 3.34, + "learning_rate": 1.3167825836030827e-05, + "loss": 0.6197, + "step": 20941 + }, + { + "epoch": 3.34, + "learning_rate": 1.3165553245989918e-05, + "loss": 0.6294, + "step": 20942 + }, + { + "epoch": 3.34, + "learning_rate": 1.316328078197867e-05, + "loss": 0.6366, + "step": 20943 + }, + { + "epoch": 3.34, + "learning_rate": 1.3161008444021305e-05, + "loss": 0.5936, + "step": 20944 + }, + { + "epoch": 3.34, + "learning_rate": 1.315873623214201e-05, + "loss": 0.6134, + "step": 20945 + }, + { + "epoch": 3.34, + "learning_rate": 1.3156464146364988e-05, + "loss": 0.6363, + "step": 20946 + }, + { + "epoch": 3.34, + "learning_rate": 1.3154192186714432e-05, + "loss": 0.6668, + "step": 20947 + }, + { + "epoch": 3.34, + "learning_rate": 1.3151920353214536e-05, + "loss": 0.6689, + "step": 20948 + }, + { + "epoch": 3.34, + "learning_rate": 1.3149648645889487e-05, + "loss": 0.6147, + "step": 20949 + }, + { + "epoch": 3.34, + "learning_rate": 1.3147377064763495e-05, + "loss": 0.6533, + "step": 20950 + }, + { + "epoch": 3.34, + "learning_rate": 1.3145105609860741e-05, + "loss": 0.6608, + "step": 20951 + }, + { + "epoch": 3.34, + "learning_rate": 1.3142834281205413e-05, + "loss": 0.6171, + "step": 20952 + }, + { + "epoch": 3.34, + "learning_rate": 1.3140563078821704e-05, + "loss": 0.7351, + "step": 20953 + }, + { + "epoch": 3.34, + "learning_rate": 1.3138292002733798e-05, + "loss": 0.7417, + "step": 20954 + }, + { + "epoch": 3.34, + "learning_rate": 1.3136021052965868e-05, + "loss": 0.6139, + "step": 20955 + }, + { + "epoch": 3.34, + "learning_rate": 1.3133750229542125e-05, + "loss": 0.6064, + "step": 20956 + }, + { + "epoch": 3.34, + "learning_rate": 1.3131479532486738e-05, + "loss": 0.6875, + "step": 20957 + }, + { + "epoch": 3.34, + "learning_rate": 1.3129208961823885e-05, + "loss": 0.6935, + "step": 20958 + }, + { + "epoch": 3.34, + "learning_rate": 1.3126938517577762e-05, + "loss": 0.6731, + "step": 20959 + }, + { + "epoch": 3.34, + "learning_rate": 1.3124668199772513e-05, + "loss": 0.667, + "step": 20960 + }, + { + "epoch": 3.34, + "learning_rate": 1.3122398008432351e-05, + "loss": 0.6103, + "step": 20961 + }, + { + "epoch": 3.34, + "learning_rate": 1.3120127943581438e-05, + "loss": 0.6418, + "step": 20962 + }, + { + "epoch": 3.34, + "learning_rate": 1.3117858005243949e-05, + "loss": 0.6559, + "step": 20963 + }, + { + "epoch": 3.34, + "learning_rate": 1.3115588193444059e-05, + "loss": 0.6726, + "step": 20964 + }, + { + "epoch": 3.34, + "learning_rate": 1.3113318508205941e-05, + "loss": 0.578, + "step": 20965 + }, + { + "epoch": 3.34, + "learning_rate": 1.3111048949553751e-05, + "loss": 0.6574, + "step": 20966 + }, + { + "epoch": 3.34, + "learning_rate": 1.3108779517511682e-05, + "loss": 0.5822, + "step": 20967 + }, + { + "epoch": 3.34, + "learning_rate": 1.3106510212103895e-05, + "loss": 0.6068, + "step": 20968 + }, + { + "epoch": 3.34, + "learning_rate": 1.3104241033354553e-05, + "loss": 0.648, + "step": 20969 + }, + { + "epoch": 3.34, + "learning_rate": 1.310197198128782e-05, + "loss": 0.7358, + "step": 20970 + }, + { + "epoch": 3.34, + "learning_rate": 1.3099703055927865e-05, + "loss": 0.5516, + "step": 20971 + }, + { + "epoch": 3.34, + "learning_rate": 1.3097434257298835e-05, + "loss": 0.6727, + "step": 20972 + }, + { + "epoch": 3.34, + "learning_rate": 1.3095165585424913e-05, + "loss": 0.6551, + "step": 20973 + }, + { + "epoch": 3.34, + "learning_rate": 1.3092897040330252e-05, + "loss": 0.5953, + "step": 20974 + }, + { + "epoch": 3.34, + "learning_rate": 1.3090628622039009e-05, + "loss": 0.6327, + "step": 20975 + }, + { + "epoch": 3.34, + "learning_rate": 1.3088360330575338e-05, + "loss": 0.6136, + "step": 20976 + }, + { + "epoch": 3.34, + "learning_rate": 1.3086092165963399e-05, + "loss": 0.6427, + "step": 20977 + }, + { + "epoch": 3.34, + "learning_rate": 1.3083824128227335e-05, + "loss": 0.6817, + "step": 20978 + }, + { + "epoch": 3.34, + "learning_rate": 1.3081556217391331e-05, + "loss": 0.6141, + "step": 20979 + }, + { + "epoch": 3.34, + "learning_rate": 1.3079288433479506e-05, + "loss": 0.6605, + "step": 20980 + }, + { + "epoch": 3.34, + "learning_rate": 1.3077020776516025e-05, + "loss": 0.6514, + "step": 20981 + }, + { + "epoch": 3.34, + "learning_rate": 1.3074753246525028e-05, + "loss": 0.7219, + "step": 20982 + }, + { + "epoch": 3.34, + "learning_rate": 1.3072485843530664e-05, + "loss": 0.6178, + "step": 20983 + }, + { + "epoch": 3.34, + "learning_rate": 1.3070218567557094e-05, + "loss": 0.6117, + "step": 20984 + }, + { + "epoch": 3.34, + "learning_rate": 1.3067951418628452e-05, + "loss": 0.6222, + "step": 20985 + }, + { + "epoch": 3.34, + "learning_rate": 1.3065684396768885e-05, + "loss": 0.5986, + "step": 20986 + }, + { + "epoch": 3.34, + "learning_rate": 1.306341750200253e-05, + "loss": 0.6223, + "step": 20987 + }, + { + "epoch": 3.34, + "learning_rate": 1.3061150734353536e-05, + "loss": 0.6094, + "step": 20988 + }, + { + "epoch": 3.34, + "learning_rate": 1.3058884093846025e-05, + "loss": 0.5999, + "step": 20989 + }, + { + "epoch": 3.34, + "learning_rate": 1.305661758050416e-05, + "loss": 0.5929, + "step": 20990 + }, + { + "epoch": 3.34, + "learning_rate": 1.3054351194352068e-05, + "loss": 0.6588, + "step": 20991 + }, + { + "epoch": 3.34, + "learning_rate": 1.3052084935413882e-05, + "loss": 0.6638, + "step": 20992 + }, + { + "epoch": 3.34, + "learning_rate": 1.304981880371374e-05, + "loss": 0.6509, + "step": 20993 + }, + { + "epoch": 3.34, + "learning_rate": 1.304755279927577e-05, + "loss": 0.6313, + "step": 20994 + }, + { + "epoch": 3.34, + "learning_rate": 1.3045286922124097e-05, + "loss": 0.6601, + "step": 20995 + }, + { + "epoch": 3.34, + "learning_rate": 1.304302117228287e-05, + "loss": 0.6962, + "step": 20996 + }, + { + "epoch": 3.35, + "learning_rate": 1.304075554977621e-05, + "loss": 0.6094, + "step": 20997 + }, + { + "epoch": 3.35, + "learning_rate": 1.3038490054628241e-05, + "loss": 0.5675, + "step": 20998 + }, + { + "epoch": 3.35, + "learning_rate": 1.3036224686863103e-05, + "loss": 0.6254, + "step": 20999 + }, + { + "epoch": 3.35, + "learning_rate": 1.3033959446504884e-05, + "loss": 0.5884, + "step": 21000 + } + ], + "max_steps": 31385, + "num_train_epochs": 5, + "total_flos": 8.62762112472685e+18, + "trial_name": null, + "trial_params": null +}