{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999363016752659, "eval_steps": 500, "global_step": 7849, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.736907991191692, "learning_rate": 8.474576271186441e-09, "loss": 0.6756, "step": 1 }, { "epoch": 0.0, "grad_norm": 2.037276039206706, "learning_rate": 1.6949152542372882e-08, "loss": 0.8562, "step": 2 }, { "epoch": 0.0, "grad_norm": 1.9584492301874183, "learning_rate": 2.5423728813559323e-08, "loss": 0.8069, "step": 3 }, { "epoch": 0.0, "grad_norm": 1.6159148861978476, "learning_rate": 3.3898305084745764e-08, "loss": 0.6642, "step": 4 }, { "epoch": 0.0, "grad_norm": 1.7507462262585467, "learning_rate": 4.23728813559322e-08, "loss": 0.6532, "step": 5 }, { "epoch": 0.0, "grad_norm": 1.72856886019445, "learning_rate": 5.0847457627118645e-08, "loss": 0.643, "step": 6 }, { "epoch": 0.0, "grad_norm": 1.8661752453165121, "learning_rate": 5.932203389830508e-08, "loss": 0.7828, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.7845271408013215, "learning_rate": 6.779661016949153e-08, "loss": 0.6582, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.9253846188650432, "learning_rate": 7.627118644067796e-08, "loss": 0.8298, "step": 9 }, { "epoch": 0.0, "grad_norm": 1.9423391927911509, "learning_rate": 8.47457627118644e-08, "loss": 0.7816, "step": 10 }, { "epoch": 0.0, "grad_norm": 1.9276764473749466, "learning_rate": 9.322033898305084e-08, "loss": 0.837, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.8090641850120723, "learning_rate": 1.0169491525423729e-07, "loss": 0.6383, "step": 12 }, { "epoch": 0.0, "grad_norm": 1.9330060837522742, "learning_rate": 1.1016949152542372e-07, "loss": 0.8522, "step": 13 }, { "epoch": 0.0, "grad_norm": 1.9741691900741056, "learning_rate": 1.1864406779661017e-07, "loss": 0.8672, "step": 14 }, { "epoch": 0.0, "grad_norm": 1.7229307581715625, "learning_rate": 1.271186440677966e-07, "loss": 0.6488, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.7513066679665468, "learning_rate": 1.3559322033898305e-07, "loss": 0.6687, "step": 16 }, { "epoch": 0.0, "grad_norm": 2.124278059287193, "learning_rate": 1.440677966101695e-07, "loss": 0.8082, "step": 17 }, { "epoch": 0.0, "grad_norm": 2.452047425526202, "learning_rate": 1.5254237288135593e-07, "loss": 0.8645, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.5986937931149383, "learning_rate": 1.6101694915254234e-07, "loss": 0.6787, "step": 19 }, { "epoch": 0.0, "grad_norm": 1.964984784515822, "learning_rate": 1.694915254237288e-07, "loss": 0.8268, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.9634178593301097, "learning_rate": 1.7796610169491524e-07, "loss": 0.8554, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.8321764340988278, "learning_rate": 1.8644067796610168e-07, "loss": 0.6615, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.0461313931003997, "learning_rate": 1.9491525423728814e-07, "loss": 0.8641, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.6747467242769618, "learning_rate": 2.0338983050847458e-07, "loss": 0.6928, "step": 24 }, { "epoch": 0.0, "grad_norm": 1.5553017316746622, "learning_rate": 2.11864406779661e-07, "loss": 0.7425, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.8900258047171405, "learning_rate": 2.2033898305084743e-07, "loss": 0.8241, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.7904468737726056, "learning_rate": 2.288135593220339e-07, "loss": 0.7992, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.7227275208881083, "learning_rate": 2.3728813559322033e-07, "loss": 0.6786, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.6089708826434301, "learning_rate": 2.457627118644068e-07, "loss": 0.6599, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.2838727573755087, "learning_rate": 2.542372881355932e-07, "loss": 0.866, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.6500016352473748, "learning_rate": 2.6271186440677967e-07, "loss": 0.656, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.7800906444768814, "learning_rate": 2.711864406779661e-07, "loss": 0.7132, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.7238876681241084, "learning_rate": 2.796610169491525e-07, "loss": 0.6724, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.5146284027965435, "learning_rate": 2.88135593220339e-07, "loss": 0.6049, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.5963153470274938, "learning_rate": 2.966101694915254e-07, "loss": 0.6265, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.8534980062117663, "learning_rate": 3.0508474576271186e-07, "loss": 0.8883, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.7748900141364816, "learning_rate": 3.135593220338983e-07, "loss": 0.7677, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.6702097228507513, "learning_rate": 3.220338983050847e-07, "loss": 0.6794, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.880199727289493, "learning_rate": 3.3050847457627117e-07, "loss": 0.8155, "step": 39 }, { "epoch": 0.01, "grad_norm": 1.4414042551540716, "learning_rate": 3.389830508474576e-07, "loss": 0.6662, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.8226410905100863, "learning_rate": 3.4745762711864405e-07, "loss": 0.8292, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.4265036111952265, "learning_rate": 3.559322033898305e-07, "loss": 0.735, "step": 42 }, { "epoch": 0.01, "grad_norm": 1.3513287066156798, "learning_rate": 3.644067796610169e-07, "loss": 0.5731, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.4786209127441055, "learning_rate": 3.7288135593220336e-07, "loss": 0.6241, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.8510598139820174, "learning_rate": 3.813559322033898e-07, "loss": 0.8815, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.3290851439739941, "learning_rate": 3.898305084745763e-07, "loss": 0.6544, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.33582877345575, "learning_rate": 3.9830508474576267e-07, "loss": 0.6757, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.979031845647636, "learning_rate": 4.0677966101694916e-07, "loss": 0.8738, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.751990682888358, "learning_rate": 4.152542372881356e-07, "loss": 0.8014, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.3604883945939532, "learning_rate": 4.23728813559322e-07, "loss": 0.6335, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.6485861706131393, "learning_rate": 4.322033898305085e-07, "loss": 0.8294, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.1751355376740993, "learning_rate": 4.4067796610169486e-07, "loss": 0.5926, "step": 52 }, { "epoch": 0.01, "grad_norm": 1.12095195415341, "learning_rate": 4.4915254237288135e-07, "loss": 0.64, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.4812998599977767, "learning_rate": 4.576271186440678e-07, "loss": 0.7446, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.2057323995503466, "learning_rate": 4.661016949152542e-07, "loss": 0.6702, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.5227887973019671, "learning_rate": 4.7457627118644066e-07, "loss": 0.7957, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.645979191935828, "learning_rate": 4.830508474576271e-07, "loss": 0.8409, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.06135731514654, "learning_rate": 4.915254237288136e-07, "loss": 0.6786, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.9645039052788676, "learning_rate": 5e-07, "loss": 0.6491, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.1322859683136068, "learning_rate": 5.084745762711864e-07, "loss": 0.6446, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.5744537082289736, "learning_rate": 5.169491525423729e-07, "loss": 0.8238, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.0371717244642593, "learning_rate": 5.254237288135593e-07, "loss": 0.619, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.0200097922405438, "learning_rate": 5.338983050847457e-07, "loss": 0.7248, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.9014095782184974, "learning_rate": 5.423728813559322e-07, "loss": 0.5905, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.3748332438239352, "learning_rate": 5.508474576271186e-07, "loss": 0.7573, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.941859025394431, "learning_rate": 5.59322033898305e-07, "loss": 0.6675, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.542485954445273, "learning_rate": 5.677966101694916e-07, "loss": 0.8226, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.8670841859736869, "learning_rate": 5.76271186440678e-07, "loss": 0.6528, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.8206143637356134, "learning_rate": 5.847457627118644e-07, "loss": 0.6114, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.1695599053335903, "learning_rate": 5.932203389830508e-07, "loss": 0.7393, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.8807560514359539, "learning_rate": 6.016949152542372e-07, "loss": 0.6264, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.8495158693418726, "learning_rate": 6.101694915254237e-07, "loss": 0.5833, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.1037404169285423, "learning_rate": 6.186440677966102e-07, "loss": 0.8157, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.3898605466952594, "learning_rate": 6.271186440677966e-07, "loss": 0.791, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.1774056983193821, "learning_rate": 6.35593220338983e-07, "loss": 0.7602, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.8878185503609648, "learning_rate": 6.440677966101694e-07, "loss": 0.6094, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.9377796089897491, "learning_rate": 6.52542372881356e-07, "loss": 0.6189, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.7735081300754656, "learning_rate": 6.610169491525423e-07, "loss": 0.5823, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.3035050675411433, "learning_rate": 6.694915254237287e-07, "loss": 0.7418, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.0666600791413023, "learning_rate": 6.779661016949152e-07, "loss": 0.7282, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.017823886818616, "learning_rate": 6.864406779661016e-07, "loss": 0.7493, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.7547975133145556, "learning_rate": 6.949152542372881e-07, "loss": 0.5746, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.73184861994517, "learning_rate": 7.033898305084746e-07, "loss": 0.6166, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.9582702160049402, "learning_rate": 7.11864406779661e-07, "loss": 0.6313, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.1239899531200919, "learning_rate": 7.203389830508474e-07, "loss": 0.7616, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.3986129979013686, "learning_rate": 7.288135593220338e-07, "loss": 0.8416, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.0064870167909685, "learning_rate": 7.372881355932203e-07, "loss": 0.7437, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.029078360489064, "learning_rate": 7.457627118644067e-07, "loss": 0.6885, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.0839861541694729, "learning_rate": 7.542372881355932e-07, "loss": 0.7679, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.7358869048632126, "learning_rate": 7.627118644067796e-07, "loss": 0.5264, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.2088648839893386, "learning_rate": 7.711864406779661e-07, "loss": 0.7689, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.120303884023188, "learning_rate": 7.796610169491526e-07, "loss": 0.8094, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.9631871045216217, "learning_rate": 7.88135593220339e-07, "loss": 0.7234, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.7794033238755568, "learning_rate": 7.966101694915253e-07, "loss": 0.7052, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.9551524577761956, "learning_rate": 8.050847457627118e-07, "loss": 0.7059, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.241098614689768, "learning_rate": 8.135593220338983e-07, "loss": 0.7344, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.685452828600968, "learning_rate": 8.220338983050847e-07, "loss": 0.5852, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.9908084317417779, "learning_rate": 8.305084745762712e-07, "loss": 0.7078, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.7759864093838978, "learning_rate": 8.389830508474576e-07, "loss": 0.6286, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.6588435616176912, "learning_rate": 8.47457627118644e-07, "loss": 0.6297, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.0780622896941123, "learning_rate": 8.559322033898306e-07, "loss": 0.7916, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.8634576635077581, "learning_rate": 8.64406779661017e-07, "loss": 0.6609, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.9633133489846274, "learning_rate": 8.728813559322033e-07, "loss": 0.7608, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.0763224900705421, "learning_rate": 8.813559322033897e-07, "loss": 0.7842, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.6933564189256128, "learning_rate": 8.898305084745762e-07, "loss": 0.5805, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.2201861803855387, "learning_rate": 8.983050847457627e-07, "loss": 0.7236, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.9969497849781063, "learning_rate": 9.067796610169491e-07, "loss": 0.7544, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.0977139267521236, "learning_rate": 9.152542372881356e-07, "loss": 0.7179, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.8838634691331352, "learning_rate": 9.23728813559322e-07, "loss": 0.7207, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.748110567914877, "learning_rate": 9.322033898305083e-07, "loss": 0.6404, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.9035080674620525, "learning_rate": 9.406779661016949e-07, "loss": 0.666, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.8215729440307579, "learning_rate": 9.491525423728813e-07, "loss": 0.6738, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.9985290224208793, "learning_rate": 9.576271186440678e-07, "loss": 0.7507, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.8881217346231228, "learning_rate": 9.661016949152542e-07, "loss": 0.6597, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.8387068607992167, "learning_rate": 9.745762711864406e-07, "loss": 0.5862, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.7013763758333112, "learning_rate": 9.830508474576272e-07, "loss": 0.638, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.9729094966631777, "learning_rate": 9.915254237288136e-07, "loss": 0.7219, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.075490651244943, "learning_rate": 1e-06, "loss": 0.7233, "step": 118 }, { "epoch": 0.02, "grad_norm": 1.4660095232612844, "learning_rate": 1.0084745762711863e-06, "loss": 0.7159, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.6953319855373109, "learning_rate": 1.0169491525423727e-06, "loss": 0.6101, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.7487617910462017, "learning_rate": 1.0254237288135591e-06, "loss": 0.5972, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.9754826562644147, "learning_rate": 1.0338983050847457e-06, "loss": 0.7256, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.7014813783630192, "learning_rate": 1.0423728813559323e-06, "loss": 0.5945, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.8535104986573425, "learning_rate": 1.0508474576271187e-06, "loss": 0.6683, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.9792311827984785, "learning_rate": 1.059322033898305e-06, "loss": 0.692, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.7402035619731817, "learning_rate": 1.0677966101694915e-06, "loss": 0.597, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.7054330763593275, "learning_rate": 1.0762711864406778e-06, "loss": 0.5649, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.7752496570670936, "learning_rate": 1.0847457627118644e-06, "loss": 0.6389, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.708578374649769, "learning_rate": 1.0932203389830508e-06, "loss": 0.584, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.7694352115175147, "learning_rate": 1.1016949152542372e-06, "loss": 0.6728, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.1038397003062124, "learning_rate": 1.1101694915254236e-06, "loss": 0.7486, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.2155941234125076, "learning_rate": 1.11864406779661e-06, "loss": 0.8189, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.7971035604067273, "learning_rate": 1.1271186440677966e-06, "loss": 0.5827, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.9358974996612655, "learning_rate": 1.1355932203389832e-06, "loss": 0.6304, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.6799249401488097, "learning_rate": 1.1440677966101696e-06, "loss": 0.6041, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.8728300170888112, "learning_rate": 1.152542372881356e-06, "loss": 0.7383, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.9499540369375933, "learning_rate": 1.1610169491525423e-06, "loss": 0.718, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.6769253124950282, "learning_rate": 1.1694915254237287e-06, "loss": 0.5817, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.2204843539263122, "learning_rate": 1.177966101694915e-06, "loss": 0.743, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.6716394240064161, "learning_rate": 1.1864406779661017e-06, "loss": 0.5908, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.015631981503019, "learning_rate": 1.194915254237288e-06, "loss": 0.6783, "step": 141 }, { "epoch": 0.02, "grad_norm": 1.0183453896762789, "learning_rate": 1.2033898305084745e-06, "loss": 0.6437, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.0723305416299242, "learning_rate": 1.211864406779661e-06, "loss": 0.7592, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.6677300249274252, "learning_rate": 1.2203389830508474e-06, "loss": 0.5473, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.7642530848110866, "learning_rate": 1.2288135593220338e-06, "loss": 0.6643, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.7785754946837703, "learning_rate": 1.2372881355932204e-06, "loss": 0.7155, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.8377482285397575, "learning_rate": 1.2457627118644068e-06, "loss": 0.5985, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.0977320492459255, "learning_rate": 1.2542372881355932e-06, "loss": 0.7161, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.6518293469641255, "learning_rate": 1.2627118644067796e-06, "loss": 0.5659, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.7253123451641916, "learning_rate": 1.271186440677966e-06, "loss": 0.6691, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.7245782196375631, "learning_rate": 1.2796610169491523e-06, "loss": 0.5796, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.8873047078263828, "learning_rate": 1.2881355932203387e-06, "loss": 0.7144, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.7978648182453759, "learning_rate": 1.2966101694915255e-06, "loss": 0.6118, "step": 153 }, { "epoch": 0.02, "grad_norm": 1.9492892287716275, "learning_rate": 1.305084745762712e-06, "loss": 0.726, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.6411207453563611, "learning_rate": 1.3135593220338983e-06, "loss": 0.6108, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.6765600144090188, "learning_rate": 1.3220338983050847e-06, "loss": 0.5846, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.763246198843269, "learning_rate": 1.330508474576271e-06, "loss": 0.6339, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.6774376546958141, "learning_rate": 1.3389830508474575e-06, "loss": 0.6263, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.9304974601713167, "learning_rate": 1.347457627118644e-06, "loss": 0.7302, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.7346087648293955, "learning_rate": 1.3559322033898304e-06, "loss": 0.6046, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.7591533949697616, "learning_rate": 1.3644067796610168e-06, "loss": 0.6218, "step": 161 }, { "epoch": 0.02, "grad_norm": 1.3182181126014183, "learning_rate": 1.3728813559322032e-06, "loss": 0.699, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.7867617731133156, "learning_rate": 1.3813559322033898e-06, "loss": 0.5479, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.8341730596805433, "learning_rate": 1.3898305084745762e-06, "loss": 0.6217, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.6068608848025123, "learning_rate": 1.3983050847457628e-06, "loss": 0.5667, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.0938284867069135, "learning_rate": 1.4067796610169492e-06, "loss": 0.7484, "step": 166 }, { "epoch": 0.02, "grad_norm": 1.134039413749055, "learning_rate": 1.4152542372881356e-06, "loss": 0.7122, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.6571908554662265, "learning_rate": 1.423728813559322e-06, "loss": 0.5929, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.6333718599518113, "learning_rate": 1.4322033898305083e-06, "loss": 0.5604, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.8441410553098189, "learning_rate": 1.4406779661016947e-06, "loss": 0.5584, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.7553081798861739, "learning_rate": 1.4491525423728813e-06, "loss": 0.6201, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.9715502616767301, "learning_rate": 1.4576271186440677e-06, "loss": 0.7184, "step": 172 }, { "epoch": 0.02, "grad_norm": 1.0630557439864587, "learning_rate": 1.4661016949152543e-06, "loss": 0.7068, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.7085120333954494, "learning_rate": 1.4745762711864407e-06, "loss": 0.5687, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.920567393855287, "learning_rate": 1.483050847457627e-06, "loss": 0.6649, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.8859635417658249, "learning_rate": 1.4915254237288134e-06, "loss": 0.7284, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.8575496337923804, "learning_rate": 1.5e-06, "loss": 0.6348, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.6154629229842142, "learning_rate": 1.5084745762711864e-06, "loss": 0.5649, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.7748967016331698, "learning_rate": 1.5169491525423728e-06, "loss": 0.6059, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.6601826638948631, "learning_rate": 1.5254237288135592e-06, "loss": 0.6107, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.7083304048584579, "learning_rate": 1.5338983050847456e-06, "loss": 0.6084, "step": 181 }, { "epoch": 0.02, "grad_norm": 1.0141184973912347, "learning_rate": 1.5423728813559322e-06, "loss": 0.727, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.8866145473315817, "learning_rate": 1.5508474576271186e-06, "loss": 0.7106, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.7883327930960949, "learning_rate": 1.5593220338983052e-06, "loss": 0.6055, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.001263325318397, "learning_rate": 1.5677966101694915e-06, "loss": 0.7681, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.886301112577374, "learning_rate": 1.576271186440678e-06, "loss": 0.7628, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.7786724627559611, "learning_rate": 1.5847457627118643e-06, "loss": 0.5789, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.792192151552707, "learning_rate": 1.5932203389830507e-06, "loss": 0.6173, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.6851675266587554, "learning_rate": 1.601694915254237e-06, "loss": 0.5854, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.6235249334397138, "learning_rate": 1.6101694915254237e-06, "loss": 0.6144, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.3399635605596842, "learning_rate": 1.61864406779661e-06, "loss": 0.6902, "step": 191 }, { "epoch": 0.02, "grad_norm": 0.9262852886186115, "learning_rate": 1.6271186440677967e-06, "loss": 0.7442, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.8481980246517118, "learning_rate": 1.635593220338983e-06, "loss": 0.6338, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.1255447307355642, "learning_rate": 1.6440677966101694e-06, "loss": 0.7652, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.8908038467928799, "learning_rate": 1.6525423728813558e-06, "loss": 0.6901, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.8222134161165471, "learning_rate": 1.6610169491525424e-06, "loss": 0.6373, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.7326869515693809, "learning_rate": 1.6694915254237288e-06, "loss": 0.615, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.7054126369518261, "learning_rate": 1.6779661016949152e-06, "loss": 0.5617, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.2784394598959574, "learning_rate": 1.6864406779661016e-06, "loss": 0.65, "step": 199 }, { "epoch": 0.03, "grad_norm": 0.8791275870588131, "learning_rate": 1.694915254237288e-06, "loss": 0.6436, "step": 200 }, { "epoch": 0.03, "grad_norm": 0.9542073313260363, "learning_rate": 1.7033898305084743e-06, "loss": 0.7317, "step": 201 }, { "epoch": 0.03, "grad_norm": 0.9229840191225319, "learning_rate": 1.7118644067796611e-06, "loss": 0.6364, "step": 202 }, { "epoch": 0.03, "grad_norm": 0.9622714461397894, "learning_rate": 1.7203389830508475e-06, "loss": 0.6868, "step": 203 }, { "epoch": 0.03, "grad_norm": 0.5899307448507893, "learning_rate": 1.728813559322034e-06, "loss": 0.5548, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.6242083877762877, "learning_rate": 1.7372881355932203e-06, "loss": 0.5873, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.692201534933359, "learning_rate": 1.7457627118644067e-06, "loss": 0.5528, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.6975976844947165, "learning_rate": 1.754237288135593e-06, "loss": 0.61, "step": 207 }, { "epoch": 0.03, "grad_norm": 0.896528926808541, "learning_rate": 1.7627118644067794e-06, "loss": 0.6469, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.173139683805913, "learning_rate": 1.771186440677966e-06, "loss": 0.6983, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.7057066492934985, "learning_rate": 1.7796610169491524e-06, "loss": 0.562, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.6674353468969882, "learning_rate": 1.7881355932203388e-06, "loss": 0.6048, "step": 211 }, { "epoch": 0.03, "grad_norm": 0.9005066283379761, "learning_rate": 1.7966101694915254e-06, "loss": 0.6698, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.7900697075878133, "learning_rate": 1.8050847457627118e-06, "loss": 0.7254, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.6709247827891486, "learning_rate": 1.8135593220338982e-06, "loss": 0.5656, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.7225734968984099, "learning_rate": 1.8220338983050848e-06, "loss": 0.6251, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.7523079531355961, "learning_rate": 1.8305084745762712e-06, "loss": 0.6351, "step": 216 }, { "epoch": 0.03, "grad_norm": 0.9032931881053967, "learning_rate": 1.8389830508474575e-06, "loss": 0.6848, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.7338938725122566, "learning_rate": 1.847457627118644e-06, "loss": 0.5706, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.8021844151100668, "learning_rate": 1.8559322033898303e-06, "loss": 0.5991, "step": 219 }, { "epoch": 0.03, "grad_norm": 0.7449679398020179, "learning_rate": 1.8644067796610167e-06, "loss": 0.6129, "step": 220 }, { "epoch": 0.03, "grad_norm": 0.8654100394073853, "learning_rate": 1.8728813559322033e-06, "loss": 0.653, "step": 221 }, { "epoch": 0.03, "grad_norm": 0.7538362027872093, "learning_rate": 1.8813559322033899e-06, "loss": 0.6084, "step": 222 }, { "epoch": 0.03, "grad_norm": 0.728231635158702, "learning_rate": 1.8898305084745763e-06, "loss": 0.6296, "step": 223 }, { "epoch": 0.03, "grad_norm": 0.6250014532731726, "learning_rate": 1.8983050847457627e-06, "loss": 0.6077, "step": 224 }, { "epoch": 0.03, "grad_norm": 0.6077670298424928, "learning_rate": 1.906779661016949e-06, "loss": 0.6227, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.5565700456470007, "learning_rate": 1.9152542372881356e-06, "loss": 0.4893, "step": 226 }, { "epoch": 0.03, "grad_norm": 0.6178364207463651, "learning_rate": 1.923728813559322e-06, "loss": 0.5292, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.8163350391093931, "learning_rate": 1.9322033898305084e-06, "loss": 0.6135, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.6997941270332168, "learning_rate": 1.9406779661016946e-06, "loss": 0.6565, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.100103558563823, "learning_rate": 1.949152542372881e-06, "loss": 0.7274, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.9737007994959831, "learning_rate": 1.9576271186440678e-06, "loss": 0.7127, "step": 231 }, { "epoch": 0.03, "grad_norm": 0.8818286457797845, "learning_rate": 1.9661016949152544e-06, "loss": 0.5957, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.7042373402430965, "learning_rate": 1.9745762711864405e-06, "loss": 0.6315, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.8290814391329554, "learning_rate": 1.983050847457627e-06, "loss": 0.6889, "step": 234 }, { "epoch": 0.03, "grad_norm": 1.0850347895148524, "learning_rate": 1.9915254237288133e-06, "loss": 0.6217, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.7561140413632217, "learning_rate": 2e-06, "loss": 0.5888, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.8625860213461869, "learning_rate": 1.9999999148552087e-06, "loss": 0.7218, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.8166646229523257, "learning_rate": 1.99999965942085e-06, "loss": 0.6284, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.6551901369601575, "learning_rate": 1.9999992336969675e-06, "loss": 0.5826, "step": 239 }, { "epoch": 0.03, "grad_norm": 0.9304362470364699, "learning_rate": 1.9999986376836335e-06, "loss": 0.617, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.0359386542519404, "learning_rate": 1.9999978713809487e-06, "loss": 0.693, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.6159971874590597, "learning_rate": 1.999996934789045e-06, "loss": 0.5893, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.7307011844542025, "learning_rate": 1.999995827908081e-06, "loss": 0.6368, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.9208318783383151, "learning_rate": 1.9999945507382454e-06, "loss": 0.7098, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.9179643292882264, "learning_rate": 1.9999931032797555e-06, "loss": 0.6841, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.7855969182168681, "learning_rate": 1.999991485532858e-06, "loss": 0.5896, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.8809749072365072, "learning_rate": 1.9999896974978285e-06, "loss": 0.6883, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.9665855957156472, "learning_rate": 1.999987739174971e-06, "loss": 0.762, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.7802559405751034, "learning_rate": 1.99998561056462e-06, "loss": 0.6096, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.6257717038750468, "learning_rate": 1.9999833116671367e-06, "loss": 0.5582, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.6304227616063485, "learning_rate": 1.9999808424829135e-06, "loss": 0.5243, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.7272651060843149, "learning_rate": 1.9999782030123702e-06, "loss": 0.6365, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.6841305216463638, "learning_rate": 1.9999753932559567e-06, "loss": 0.6012, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.8512458896259056, "learning_rate": 1.9999724132141516e-06, "loss": 0.6715, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.7166163865557734, "learning_rate": 1.999969262887462e-06, "loss": 0.5863, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.6931728423039321, "learning_rate": 1.9999659422764248e-06, "loss": 0.5759, "step": 256 }, { "epoch": 0.03, "grad_norm": 0.8867546810799449, "learning_rate": 1.999962451381605e-06, "loss": 0.6563, "step": 257 }, { "epoch": 0.03, "grad_norm": 0.9755805820638891, "learning_rate": 1.999958790203597e-06, "loss": 0.6736, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.748505813485469, "learning_rate": 1.999954958743025e-06, "loss": 0.7157, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.8280797001119953, "learning_rate": 1.9999509570005406e-06, "loss": 0.6616, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.9041278479566665, "learning_rate": 1.999946784976826e-06, "loss": 0.7529, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.9158660580121655, "learning_rate": 1.999942442672591e-06, "loss": 0.655, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.6594797430670454, "learning_rate": 1.9999379300885754e-06, "loss": 0.5778, "step": 263 }, { "epoch": 0.03, "grad_norm": 0.8084013065232527, "learning_rate": 1.999933247225548e-06, "loss": 0.6182, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.7741929925462311, "learning_rate": 1.999928394084305e-06, "loss": 0.5902, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.6113369204995305, "learning_rate": 1.9999233706656743e-06, "loss": 0.5736, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.8158108757664658, "learning_rate": 1.999918176970511e-06, "loss": 0.6617, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.7170955866836686, "learning_rate": 1.9999128129996984e-06, "loss": 0.6386, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.6456584365381373, "learning_rate": 1.999907278754151e-06, "loss": 0.5859, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.6257160586247476, "learning_rate": 1.999901574234811e-06, "loss": 0.5918, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.7624562451180734, "learning_rate": 1.99989569944265e-06, "loss": 0.6811, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.2449455730842223, "learning_rate": 1.9998896543786683e-06, "loss": 0.6845, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.0122328677163068, "learning_rate": 1.999883439043895e-06, "loss": 0.6639, "step": 273 }, { "epoch": 0.03, "grad_norm": 0.6669394614726304, "learning_rate": 1.999877053439389e-06, "loss": 0.6313, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.8928051159400339, "learning_rate": 1.999870497566237e-06, "loss": 0.6152, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.9021328838958339, "learning_rate": 1.9998637714255562e-06, "loss": 0.6512, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.9101399433152626, "learning_rate": 1.9998568750184916e-06, "loss": 0.6087, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.7449958707856078, "learning_rate": 1.9998498083462172e-06, "loss": 0.5713, "step": 278 }, { "epoch": 0.04, "grad_norm": 0.7639084341476106, "learning_rate": 1.9998425714099375e-06, "loss": 0.6, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.7279828890510773, "learning_rate": 1.9998351642108836e-06, "loss": 0.5928, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.815161596122287, "learning_rate": 1.9998275867503177e-06, "loss": 0.5829, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.6351697163457766, "learning_rate": 1.9998198390295297e-06, "loss": 0.6166, "step": 282 }, { "epoch": 0.04, "grad_norm": 0.6783285677768011, "learning_rate": 1.9998119210498397e-06, "loss": 0.5675, "step": 283 }, { "epoch": 0.04, "grad_norm": 0.7119826568632397, "learning_rate": 1.999803832812595e-06, "loss": 0.5777, "step": 284 }, { "epoch": 0.04, "grad_norm": 0.9578081686428986, "learning_rate": 1.9997955743191736e-06, "loss": 0.6802, "step": 285 }, { "epoch": 0.04, "grad_norm": 0.970271384687729, "learning_rate": 1.9997871455709817e-06, "loss": 0.6425, "step": 286 }, { "epoch": 0.04, "grad_norm": 0.8391588999241391, "learning_rate": 1.9997785465694545e-06, "loss": 0.6482, "step": 287 }, { "epoch": 0.04, "grad_norm": 0.7559879265309987, "learning_rate": 1.9997697773160566e-06, "loss": 0.6013, "step": 288 }, { "epoch": 0.04, "grad_norm": 0.8002748589128438, "learning_rate": 1.999760837812281e-06, "loss": 0.6673, "step": 289 }, { "epoch": 0.04, "grad_norm": 0.6300829015929642, "learning_rate": 1.9997517280596503e-06, "loss": 0.5778, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.7266115572516427, "learning_rate": 1.9997424480597157e-06, "loss": 0.6705, "step": 291 }, { "epoch": 0.04, "grad_norm": 0.5856628988568351, "learning_rate": 1.999732997814057e-06, "loss": 0.5574, "step": 292 }, { "epoch": 0.04, "grad_norm": 0.6912973568761449, "learning_rate": 1.9997233773242845e-06, "loss": 0.6121, "step": 293 }, { "epoch": 0.04, "grad_norm": 0.6981284546199165, "learning_rate": 1.9997135865920354e-06, "loss": 0.6535, "step": 294 }, { "epoch": 0.04, "grad_norm": 0.7437958073516342, "learning_rate": 1.999703625618978e-06, "loss": 0.5956, "step": 295 }, { "epoch": 0.04, "grad_norm": 0.7987457717931304, "learning_rate": 1.9996934944068075e-06, "loss": 0.6469, "step": 296 }, { "epoch": 0.04, "grad_norm": 0.8670990352336557, "learning_rate": 1.99968319295725e-06, "loss": 0.6559, "step": 297 }, { "epoch": 0.04, "grad_norm": 0.8145183156889081, "learning_rate": 1.9996727212720587e-06, "loss": 0.7407, "step": 298 }, { "epoch": 0.04, "grad_norm": 0.6949355368150785, "learning_rate": 1.999662079353018e-06, "loss": 0.6135, "step": 299 }, { "epoch": 0.04, "grad_norm": 0.7637559389237999, "learning_rate": 1.9996512672019395e-06, "loss": 0.6037, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.7101951019587592, "learning_rate": 1.9996402848206647e-06, "loss": 0.569, "step": 301 }, { "epoch": 0.04, "grad_norm": 0.8554522757524007, "learning_rate": 1.9996291322110633e-06, "loss": 0.6624, "step": 302 }, { "epoch": 0.04, "grad_norm": 0.6989714697190335, "learning_rate": 1.999617809375035e-06, "loss": 0.5813, "step": 303 }, { "epoch": 0.04, "grad_norm": 0.9230558846204552, "learning_rate": 1.9996063163145075e-06, "loss": 0.6439, "step": 304 }, { "epoch": 0.04, "grad_norm": 0.7548869346421067, "learning_rate": 1.9995946530314383e-06, "loss": 0.6058, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.6974381289402516, "learning_rate": 1.9995828195278133e-06, "loss": 0.5988, "step": 306 }, { "epoch": 0.04, "grad_norm": 0.9482477842978514, "learning_rate": 1.9995708158056473e-06, "loss": 0.753, "step": 307 }, { "epoch": 0.04, "grad_norm": 0.7553292869751534, "learning_rate": 1.9995586418669853e-06, "loss": 0.6253, "step": 308 }, { "epoch": 0.04, "grad_norm": 0.8287173499925506, "learning_rate": 1.9995462977139e-06, "loss": 0.6017, "step": 309 }, { "epoch": 0.04, "grad_norm": 0.8928406702802455, "learning_rate": 1.999533783348493e-06, "loss": 0.7325, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.6410943924428769, "learning_rate": 1.999521098772896e-06, "loss": 0.5277, "step": 311 }, { "epoch": 0.04, "grad_norm": 0.744168490567295, "learning_rate": 1.999508243989269e-06, "loss": 0.5779, "step": 312 }, { "epoch": 0.04, "grad_norm": 0.7628269145518654, "learning_rate": 1.9994952189998007e-06, "loss": 0.6934, "step": 313 }, { "epoch": 0.04, "grad_norm": 0.6178141324596883, "learning_rate": 1.9994820238067093e-06, "loss": 0.6109, "step": 314 }, { "epoch": 0.04, "grad_norm": 0.9101328667244376, "learning_rate": 1.9994686584122417e-06, "loss": 0.7364, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.7133317665538189, "learning_rate": 1.999455122818674e-06, "loss": 0.6025, "step": 316 }, { "epoch": 0.04, "grad_norm": 0.646645637586129, "learning_rate": 1.999441417028311e-06, "loss": 0.5914, "step": 317 }, { "epoch": 0.04, "grad_norm": 0.8781012727347236, "learning_rate": 1.999427541043487e-06, "loss": 0.7113, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.7833710064149482, "learning_rate": 1.9994134948665646e-06, "loss": 0.6952, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.7751873987941748, "learning_rate": 1.9993992784999358e-06, "loss": 0.6617, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.6007609206877843, "learning_rate": 1.9993848919460212e-06, "loss": 0.5767, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.8028915776237311, "learning_rate": 1.9993703352072714e-06, "loss": 0.5928, "step": 322 }, { "epoch": 0.04, "grad_norm": 0.9478064106321482, "learning_rate": 1.999355608286165e-06, "loss": 0.6444, "step": 323 }, { "epoch": 0.04, "grad_norm": 0.9097189962760596, "learning_rate": 1.9993407111852097e-06, "loss": 0.7153, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.6934288937132419, "learning_rate": 1.9993256439069423e-06, "loss": 0.583, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.6777138899758901, "learning_rate": 1.9993104064539287e-06, "loss": 0.5626, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.6507862813625521, "learning_rate": 1.9992949988287636e-06, "loss": 0.6106, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.7488861670223014, "learning_rate": 1.9992794210340705e-06, "loss": 0.6585, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.7219018574477484, "learning_rate": 1.9992636730725025e-06, "loss": 0.5834, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.6736314658646795, "learning_rate": 1.9992477549467415e-06, "loss": 0.5688, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.7023867874955008, "learning_rate": 1.999231666659498e-06, "loss": 0.5607, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.8030130857913237, "learning_rate": 1.999215408213511e-06, "loss": 0.5323, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.9262149514699182, "learning_rate": 1.9991989796115502e-06, "loss": 0.6496, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.8663654491781175, "learning_rate": 1.9991823808564125e-06, "loss": 0.6494, "step": 334 }, { "epoch": 0.04, "grad_norm": 1.0026044603291644, "learning_rate": 1.9991656119509246e-06, "loss": 0.7175, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.902878487920715, "learning_rate": 1.9991486728979426e-06, "loss": 0.6908, "step": 336 }, { "epoch": 0.04, "grad_norm": 0.8437281463568033, "learning_rate": 1.9991315637003504e-06, "loss": 0.7043, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.875589412073372, "learning_rate": 1.9991142843610624e-06, "loss": 0.6696, "step": 338 }, { "epoch": 0.04, "grad_norm": 1.0851819180964717, "learning_rate": 1.9990968348830198e-06, "loss": 0.709, "step": 339 }, { "epoch": 0.04, "grad_norm": 0.9044385211502225, "learning_rate": 1.999079215269195e-06, "loss": 0.6506, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.7809235906964044, "learning_rate": 1.9990614255225878e-06, "loss": 0.608, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.6095029885370832, "learning_rate": 1.9990434656462284e-06, "loss": 0.5945, "step": 342 }, { "epoch": 0.04, "grad_norm": 0.9220214106594393, "learning_rate": 1.9990253356431745e-06, "loss": 0.6963, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.9001375257853677, "learning_rate": 1.999007035516514e-06, "loss": 0.6403, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.8459662460180508, "learning_rate": 1.998988565269363e-06, "loss": 0.6638, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.6129945305513942, "learning_rate": 1.9989699249048662e-06, "loss": 0.5613, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.8336338793790357, "learning_rate": 1.9989511144261988e-06, "loss": 0.6887, "step": 347 }, { "epoch": 0.04, "grad_norm": 0.9154432557238324, "learning_rate": 1.9989321338365636e-06, "loss": 0.6035, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.7792883337104146, "learning_rate": 1.9989129831391927e-06, "loss": 0.6286, "step": 349 }, { "epoch": 0.04, "grad_norm": 0.8489696176930003, "learning_rate": 1.9988936623373475e-06, "loss": 0.6918, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.7414159012919416, "learning_rate": 1.9988741714343175e-06, "loss": 0.6978, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.7886948019403772, "learning_rate": 1.998854510433423e-06, "loss": 0.6262, "step": 352 }, { "epoch": 0.04, "grad_norm": 0.8428040774724056, "learning_rate": 1.998834679338011e-06, "loss": 0.6843, "step": 353 }, { "epoch": 0.05, "grad_norm": 0.7611416632666679, "learning_rate": 1.9988146781514587e-06, "loss": 0.6253, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.9635616594157547, "learning_rate": 1.998794506877173e-06, "loss": 0.5757, "step": 355 }, { "epoch": 0.05, "grad_norm": 7.55843438226619, "learning_rate": 1.998774165518587e-06, "loss": 0.7075, "step": 356 }, { "epoch": 0.05, "grad_norm": 0.7477539747945171, "learning_rate": 1.9987536540791667e-06, "loss": 0.5852, "step": 357 }, { "epoch": 0.05, "grad_norm": 0.6554910573429403, "learning_rate": 1.998732972562404e-06, "loss": 0.5484, "step": 358 }, { "epoch": 0.05, "grad_norm": 0.8127809210005906, "learning_rate": 1.9987121209718203e-06, "loss": 0.534, "step": 359 }, { "epoch": 0.05, "grad_norm": 0.7529362415409898, "learning_rate": 1.9986910993109674e-06, "loss": 0.5677, "step": 360 }, { "epoch": 0.05, "grad_norm": 0.853655342997557, "learning_rate": 1.9986699075834242e-06, "loss": 0.6316, "step": 361 }, { "epoch": 0.05, "grad_norm": 0.7094755895769688, "learning_rate": 1.9986485457928e-06, "loss": 0.6035, "step": 362 }, { "epoch": 0.05, "grad_norm": 0.960450051591529, "learning_rate": 1.9986270139427323e-06, "loss": 0.6422, "step": 363 }, { "epoch": 0.05, "grad_norm": 0.6725458366668882, "learning_rate": 1.998605312036888e-06, "loss": 0.5842, "step": 364 }, { "epoch": 0.05, "grad_norm": 0.6659406004405898, "learning_rate": 1.998583440078962e-06, "loss": 0.5732, "step": 365 }, { "epoch": 0.05, "grad_norm": 0.9920451039700708, "learning_rate": 1.9985613980726794e-06, "loss": 0.6518, "step": 366 }, { "epoch": 0.05, "grad_norm": 1.1236637195135857, "learning_rate": 1.998539186021794e-06, "loss": 0.7254, "step": 367 }, { "epoch": 0.05, "grad_norm": 0.6801252339305343, "learning_rate": 1.998516803930087e-06, "loss": 0.5683, "step": 368 }, { "epoch": 0.05, "grad_norm": 0.8331883060473511, "learning_rate": 1.998494251801372e-06, "loss": 0.6656, "step": 369 }, { "epoch": 0.05, "grad_norm": 0.8255426690603981, "learning_rate": 1.9984715296394874e-06, "loss": 0.6337, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.7515752677935769, "learning_rate": 1.9984486374483036e-06, "loss": 0.6105, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.8471385911343636, "learning_rate": 1.9984255752317185e-06, "loss": 0.5816, "step": 372 }, { "epoch": 0.05, "grad_norm": 0.8592903638985805, "learning_rate": 1.9984023429936595e-06, "loss": 0.6064, "step": 373 }, { "epoch": 0.05, "grad_norm": 0.8795317440284671, "learning_rate": 1.9983789407380825e-06, "loss": 0.624, "step": 374 }, { "epoch": 0.05, "grad_norm": 0.7483281469460907, "learning_rate": 1.998355368468973e-06, "loss": 0.5929, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.6753107671083313, "learning_rate": 1.9983316261903454e-06, "loss": 0.5904, "step": 376 }, { "epoch": 0.05, "grad_norm": 0.7599112145704452, "learning_rate": 1.998307713906242e-06, "loss": 0.6541, "step": 377 }, { "epoch": 0.05, "grad_norm": 0.9125710764193486, "learning_rate": 1.998283631620736e-06, "loss": 0.6487, "step": 378 }, { "epoch": 0.05, "grad_norm": 0.8136045737757747, "learning_rate": 1.998259379337927e-06, "loss": 0.6288, "step": 379 }, { "epoch": 0.05, "grad_norm": 0.602431024946439, "learning_rate": 1.998234957061945e-06, "loss": 0.5567, "step": 380 }, { "epoch": 0.05, "grad_norm": 0.8766381742741123, "learning_rate": 1.99821036479695e-06, "loss": 0.7868, "step": 381 }, { "epoch": 0.05, "grad_norm": 0.8995830783826105, "learning_rate": 1.998185602547129e-06, "loss": 0.5891, "step": 382 }, { "epoch": 0.05, "grad_norm": 0.6788697854745167, "learning_rate": 1.998160670316699e-06, "loss": 0.5666, "step": 383 }, { "epoch": 0.05, "grad_norm": 0.796738389057112, "learning_rate": 1.9981355681099053e-06, "loss": 0.7052, "step": 384 }, { "epoch": 0.05, "grad_norm": 0.7518308815659634, "learning_rate": 1.998110295931023e-06, "loss": 0.6227, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.2820655771792429, "learning_rate": 1.9980848537843555e-06, "loss": 0.6452, "step": 386 }, { "epoch": 0.05, "grad_norm": 0.9415761880118673, "learning_rate": 1.9980592416742355e-06, "loss": 0.7437, "step": 387 }, { "epoch": 0.05, "grad_norm": 0.6111766231882224, "learning_rate": 1.9980334596050247e-06, "loss": 0.5343, "step": 388 }, { "epoch": 0.05, "grad_norm": 0.650041383753845, "learning_rate": 1.9980075075811125e-06, "loss": 0.5456, "step": 389 }, { "epoch": 0.05, "grad_norm": 0.9342830809689981, "learning_rate": 1.9979813856069193e-06, "loss": 0.6755, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.830498412942376, "learning_rate": 1.997955093686893e-06, "loss": 0.6308, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.8414585563356384, "learning_rate": 1.9979286318255104e-06, "loss": 0.6603, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.7417964186877467, "learning_rate": 1.9979020000272787e-06, "loss": 0.6582, "step": 393 }, { "epoch": 0.05, "grad_norm": 0.6350388014950262, "learning_rate": 1.997875198296732e-06, "loss": 0.6192, "step": 394 }, { "epoch": 0.05, "grad_norm": 0.727422189560833, "learning_rate": 1.9978482266384354e-06, "loss": 0.6194, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.7538397104096738, "learning_rate": 1.997821085056981e-06, "loss": 0.6172, "step": 396 }, { "epoch": 0.05, "grad_norm": 0.8555191334261188, "learning_rate": 1.997793773556991e-06, "loss": 0.657, "step": 397 }, { "epoch": 0.05, "grad_norm": 0.7205520909245473, "learning_rate": 1.9977662921431167e-06, "loss": 0.6061, "step": 398 }, { "epoch": 0.05, "grad_norm": 0.7538479516126748, "learning_rate": 1.997738640820037e-06, "loss": 0.5984, "step": 399 }, { "epoch": 0.05, "grad_norm": 0.8029526196745616, "learning_rate": 1.9977108195924613e-06, "loss": 0.6469, "step": 400 }, { "epoch": 0.05, "grad_norm": 1.12361740607408, "learning_rate": 1.9976828284651273e-06, "loss": 0.6364, "step": 401 }, { "epoch": 0.05, "grad_norm": 0.9998887207797958, "learning_rate": 1.9976546674428012e-06, "loss": 0.6408, "step": 402 }, { "epoch": 0.05, "grad_norm": 0.6331078684674015, "learning_rate": 1.9976263365302786e-06, "loss": 0.5904, "step": 403 }, { "epoch": 0.05, "grad_norm": 0.8439407709010107, "learning_rate": 1.9975978357323845e-06, "loss": 0.6771, "step": 404 }, { "epoch": 0.05, "grad_norm": 0.7898180393420955, "learning_rate": 1.9975691650539712e-06, "loss": 0.6306, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.6638433144801531, "learning_rate": 1.9975403244999223e-06, "loss": 0.5796, "step": 406 }, { "epoch": 0.05, "grad_norm": 0.9641760611378809, "learning_rate": 1.9975113140751484e-06, "loss": 0.6647, "step": 407 }, { "epoch": 0.05, "grad_norm": 0.6976555198536526, "learning_rate": 1.9974821337845895e-06, "loss": 0.5524, "step": 408 }, { "epoch": 0.05, "grad_norm": 0.7870931942980063, "learning_rate": 1.9974527836332146e-06, "loss": 0.6556, "step": 409 }, { "epoch": 0.05, "grad_norm": 0.8607140365717414, "learning_rate": 1.9974232636260225e-06, "loss": 0.6384, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.6205235370420497, "learning_rate": 1.9973935737680395e-06, "loss": 0.5491, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.8029058689101558, "learning_rate": 1.997363714064322e-06, "loss": 0.5687, "step": 412 }, { "epoch": 0.05, "grad_norm": 0.8565690131809278, "learning_rate": 1.997333684519954e-06, "loss": 0.6499, "step": 413 }, { "epoch": 0.05, "grad_norm": 0.872306166184901, "learning_rate": 1.99730348514005e-06, "loss": 0.6984, "step": 414 }, { "epoch": 0.05, "grad_norm": 0.7058095527728597, "learning_rate": 1.997273115929752e-06, "loss": 0.5688, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.7386514309222343, "learning_rate": 1.997242576894232e-06, "loss": 0.5799, "step": 416 }, { "epoch": 0.05, "grad_norm": 0.7273716879003697, "learning_rate": 1.9972118680386903e-06, "loss": 0.6194, "step": 417 }, { "epoch": 0.05, "grad_norm": 0.7342382312263401, "learning_rate": 1.9971809893683565e-06, "loss": 0.5739, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.7275605551295318, "learning_rate": 1.997149940888489e-06, "loss": 0.5337, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.7393495700873955, "learning_rate": 1.9971187226043744e-06, "loss": 0.655, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.8097193901619947, "learning_rate": 1.997087334521329e-06, "loss": 0.6812, "step": 421 }, { "epoch": 0.05, "grad_norm": 1.1007582266099434, "learning_rate": 1.997055776644699e-06, "loss": 0.7001, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.6115835360113718, "learning_rate": 1.9970240489798567e-06, "loss": 0.5459, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.6519049847636927, "learning_rate": 1.9969921515322057e-06, "loss": 0.54, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.6863780538412851, "learning_rate": 1.9969600843071783e-06, "loss": 0.6256, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.6162443703739764, "learning_rate": 1.996927847310235e-06, "loss": 0.5844, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.7886252374418712, "learning_rate": 1.996895440546865e-06, "loss": 0.5857, "step": 427 }, { "epoch": 0.05, "grad_norm": 0.8084761217273592, "learning_rate": 1.996862864022587e-06, "loss": 0.6244, "step": 428 }, { "epoch": 0.05, "grad_norm": 0.7488517497681477, "learning_rate": 1.9968301177429482e-06, "loss": 0.6193, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.7155175447410872, "learning_rate": 1.9967972017135257e-06, "loss": 0.6058, "step": 430 }, { "epoch": 0.05, "grad_norm": 0.9753882013330053, "learning_rate": 1.996764115939924e-06, "loss": 0.6374, "step": 431 }, { "epoch": 0.06, "grad_norm": 0.6084892181187914, "learning_rate": 1.996730860427778e-06, "loss": 0.5586, "step": 432 }, { "epoch": 0.06, "grad_norm": 0.9057634887775081, "learning_rate": 1.99669743518275e-06, "loss": 0.6766, "step": 433 }, { "epoch": 0.06, "grad_norm": 0.9674983190091517, "learning_rate": 1.9966638402105326e-06, "loss": 0.5723, "step": 434 }, { "epoch": 0.06, "grad_norm": 0.8598216942434391, "learning_rate": 1.996630075516846e-06, "loss": 0.6522, "step": 435 }, { "epoch": 0.06, "grad_norm": 0.7187819145132707, "learning_rate": 1.9965961411074406e-06, "loss": 0.5842, "step": 436 }, { "epoch": 0.06, "grad_norm": 0.662830114474183, "learning_rate": 1.996562036988095e-06, "loss": 0.5486, "step": 437 }, { "epoch": 0.06, "grad_norm": 0.7206508711581904, "learning_rate": 1.9965277631646156e-06, "loss": 0.5915, "step": 438 }, { "epoch": 0.06, "grad_norm": 0.9050266889353334, "learning_rate": 1.996493319642841e-06, "loss": 0.6914, "step": 439 }, { "epoch": 0.06, "grad_norm": 0.644419177040819, "learning_rate": 1.9964587064286353e-06, "loss": 0.6161, "step": 440 }, { "epoch": 0.06, "grad_norm": 0.8980599632794635, "learning_rate": 1.9964239235278922e-06, "loss": 0.6016, "step": 441 }, { "epoch": 0.06, "grad_norm": 0.8132854686340011, "learning_rate": 1.9963889709465365e-06, "loss": 0.6598, "step": 442 }, { "epoch": 0.06, "grad_norm": 0.7718078124800741, "learning_rate": 1.9963538486905186e-06, "loss": 0.641, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.6522656866198393, "learning_rate": 1.996318556765821e-06, "loss": 0.5915, "step": 444 }, { "epoch": 0.06, "grad_norm": 0.8683128077530391, "learning_rate": 1.996283095178452e-06, "loss": 0.5898, "step": 445 }, { "epoch": 0.06, "grad_norm": 0.8293878684604463, "learning_rate": 1.9962474639344514e-06, "loss": 0.6358, "step": 446 }, { "epoch": 0.06, "grad_norm": 0.773598182112365, "learning_rate": 1.9962116630398865e-06, "loss": 0.5926, "step": 447 }, { "epoch": 0.06, "grad_norm": 0.6860253798573333, "learning_rate": 1.996175692500854e-06, "loss": 0.5808, "step": 448 }, { "epoch": 0.06, "grad_norm": 0.9029446960124653, "learning_rate": 1.996139552323479e-06, "loss": 0.6698, "step": 449 }, { "epoch": 0.06, "grad_norm": 0.6595919887257407, "learning_rate": 1.996103242513916e-06, "loss": 0.5714, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.6264165343534965, "learning_rate": 1.996066763078348e-06, "loss": 0.5299, "step": 451 }, { "epoch": 0.06, "grad_norm": 0.7175733174312324, "learning_rate": 1.9960301140229874e-06, "loss": 0.5333, "step": 452 }, { "epoch": 0.06, "grad_norm": 0.873568294212464, "learning_rate": 1.995993295354075e-06, "loss": 0.6863, "step": 453 }, { "epoch": 0.06, "grad_norm": 0.8824102340737074, "learning_rate": 1.9959563070778805e-06, "loss": 0.6326, "step": 454 }, { "epoch": 0.06, "grad_norm": 0.6195609281265144, "learning_rate": 1.9959191492007025e-06, "loss": 0.6096, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.8051827517846066, "learning_rate": 1.9958818217288687e-06, "loss": 0.6429, "step": 456 }, { "epoch": 0.06, "grad_norm": 0.7602985880173697, "learning_rate": 1.995844324668736e-06, "loss": 0.543, "step": 457 }, { "epoch": 0.06, "grad_norm": 0.6994694603792948, "learning_rate": 1.9958066580266894e-06, "loss": 0.649, "step": 458 }, { "epoch": 0.06, "grad_norm": 0.769562412206393, "learning_rate": 1.9957688218091434e-06, "loss": 0.5918, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.8607009036612931, "learning_rate": 1.9957308160225406e-06, "loss": 0.6612, "step": 460 }, { "epoch": 0.06, "grad_norm": 1.0990021737889297, "learning_rate": 1.9956926406733533e-06, "loss": 0.6467, "step": 461 }, { "epoch": 0.06, "grad_norm": 0.7054435098964321, "learning_rate": 1.9956542957680824e-06, "loss": 0.6062, "step": 462 }, { "epoch": 0.06, "grad_norm": 0.7565580420791708, "learning_rate": 1.9956157813132573e-06, "loss": 0.6155, "step": 463 }, { "epoch": 0.06, "grad_norm": 0.8254829621953097, "learning_rate": 1.995577097315437e-06, "loss": 0.6318, "step": 464 }, { "epoch": 0.06, "grad_norm": 0.8376409535744992, "learning_rate": 1.9955382437812093e-06, "loss": 0.6111, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.6324610420016746, "learning_rate": 1.9954992207171895e-06, "loss": 0.5611, "step": 466 }, { "epoch": 0.06, "grad_norm": 0.7741536575285715, "learning_rate": 1.995460028130024e-06, "loss": 0.6892, "step": 467 }, { "epoch": 0.06, "grad_norm": 0.7010385432251874, "learning_rate": 1.995420666026386e-06, "loss": 0.5707, "step": 468 }, { "epoch": 0.06, "grad_norm": 0.8179419862545474, "learning_rate": 1.995381134412979e-06, "loss": 0.6544, "step": 469 }, { "epoch": 0.06, "grad_norm": 0.8209329876527347, "learning_rate": 1.9953414332965344e-06, "loss": 0.6304, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.6602453497344377, "learning_rate": 1.9953015626838132e-06, "loss": 0.5876, "step": 471 }, { "epoch": 0.06, "grad_norm": 1.700014033721996, "learning_rate": 1.995261522581605e-06, "loss": 0.6623, "step": 472 }, { "epoch": 0.06, "grad_norm": 0.7137584730827138, "learning_rate": 1.995221312996728e-06, "loss": 0.6087, "step": 473 }, { "epoch": 0.06, "grad_norm": 0.8008197649868769, "learning_rate": 1.995180933936029e-06, "loss": 0.6114, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.7344816607657141, "learning_rate": 1.9951403854063857e-06, "loss": 0.5712, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.0962893537156433, "learning_rate": 1.9950996674147013e-06, "loss": 0.6992, "step": 476 }, { "epoch": 0.06, "grad_norm": 0.7188700522057578, "learning_rate": 1.9950587799679107e-06, "loss": 0.5839, "step": 477 }, { "epoch": 0.06, "grad_norm": 0.9062586258796633, "learning_rate": 1.9950177230729765e-06, "loss": 0.6295, "step": 478 }, { "epoch": 0.06, "grad_norm": 0.9130385854423889, "learning_rate": 1.9949764967368897e-06, "loss": 0.6229, "step": 479 }, { "epoch": 0.06, "grad_norm": 0.8214759920337584, "learning_rate": 1.994935100966671e-06, "loss": 0.6747, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.7628341427462815, "learning_rate": 1.99489353576937e-06, "loss": 0.6667, "step": 481 }, { "epoch": 0.06, "grad_norm": 0.5913864957882137, "learning_rate": 1.9948518011520646e-06, "loss": 0.5501, "step": 482 }, { "epoch": 0.06, "grad_norm": 0.8607712987666438, "learning_rate": 1.9948098971218615e-06, "loss": 0.6423, "step": 483 }, { "epoch": 0.06, "grad_norm": 0.854437823294419, "learning_rate": 1.994767823685897e-06, "loss": 0.6678, "step": 484 }, { "epoch": 0.06, "grad_norm": 0.6329898822777572, "learning_rate": 1.9947255808513356e-06, "loss": 0.6115, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.7071754119219292, "learning_rate": 1.994683168625371e-06, "loss": 0.5803, "step": 486 }, { "epoch": 0.06, "grad_norm": 0.8645829146535393, "learning_rate": 1.994640587015225e-06, "loss": 0.6927, "step": 487 }, { "epoch": 0.06, "grad_norm": 0.7001108300136843, "learning_rate": 1.994597836028149e-06, "loss": 0.6013, "step": 488 }, { "epoch": 0.06, "grad_norm": 0.7276133918104546, "learning_rate": 1.994554915671423e-06, "loss": 0.5975, "step": 489 }, { "epoch": 0.06, "grad_norm": 0.6693202431399882, "learning_rate": 1.9945118259523566e-06, "loss": 0.5793, "step": 490 }, { "epoch": 0.06, "grad_norm": 0.886659854560941, "learning_rate": 1.9944685668782866e-06, "loss": 0.6687, "step": 491 }, { "epoch": 0.06, "grad_norm": 0.7786195211250581, "learning_rate": 1.9944251384565802e-06, "loss": 0.6558, "step": 492 }, { "epoch": 0.06, "grad_norm": 0.755450925655243, "learning_rate": 1.9943815406946325e-06, "loss": 0.6361, "step": 493 }, { "epoch": 0.06, "grad_norm": 0.7175708601429536, "learning_rate": 1.9943377735998677e-06, "loss": 0.6066, "step": 494 }, { "epoch": 0.06, "grad_norm": 0.6737474261183992, "learning_rate": 1.9942938371797392e-06, "loss": 0.5348, "step": 495 }, { "epoch": 0.06, "grad_norm": 0.6726366108513238, "learning_rate": 1.9942497314417285e-06, "loss": 0.5691, "step": 496 }, { "epoch": 0.06, "grad_norm": 0.7766121790255995, "learning_rate": 1.9942054563933466e-06, "loss": 0.6386, "step": 497 }, { "epoch": 0.06, "grad_norm": 0.9466418524135742, "learning_rate": 1.994161012042133e-06, "loss": 0.6884, "step": 498 }, { "epoch": 0.06, "grad_norm": 0.6064936698573115, "learning_rate": 1.9941163983956564e-06, "loss": 0.5475, "step": 499 }, { "epoch": 0.06, "grad_norm": 0.729347407140469, "learning_rate": 1.9940716154615137e-06, "loss": 0.5673, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.7925009390349311, "learning_rate": 1.994026663247331e-06, "loss": 0.5988, "step": 501 }, { "epoch": 0.06, "grad_norm": 0.8799582084510409, "learning_rate": 1.9939815417607632e-06, "loss": 0.6289, "step": 502 }, { "epoch": 0.06, "grad_norm": 0.630126424146249, "learning_rate": 1.993936251009494e-06, "loss": 0.6375, "step": 503 }, { "epoch": 0.06, "grad_norm": 0.7668011395566893, "learning_rate": 1.993890791001236e-06, "loss": 0.6055, "step": 504 }, { "epoch": 0.06, "grad_norm": 0.9127365570320746, "learning_rate": 1.9938451617437306e-06, "loss": 0.7055, "step": 505 }, { "epoch": 0.06, "grad_norm": 0.8686573791774603, "learning_rate": 1.9937993632447483e-06, "loss": 0.6904, "step": 506 }, { "epoch": 0.06, "grad_norm": 0.7641158662166746, "learning_rate": 1.9937533955120872e-06, "loss": 0.5906, "step": 507 }, { "epoch": 0.06, "grad_norm": 0.7095443133999298, "learning_rate": 1.9937072585535758e-06, "loss": 0.5743, "step": 508 }, { "epoch": 0.06, "grad_norm": 0.8136751763390865, "learning_rate": 1.993660952377071e-06, "loss": 0.5829, "step": 509 }, { "epoch": 0.06, "grad_norm": 0.8441615515146447, "learning_rate": 1.9936144769904575e-06, "loss": 0.6955, "step": 510 }, { "epoch": 0.07, "grad_norm": 0.69454392770871, "learning_rate": 1.9935678324016504e-06, "loss": 0.5893, "step": 511 }, { "epoch": 0.07, "grad_norm": 0.9138403095149903, "learning_rate": 1.993521018618592e-06, "loss": 0.6462, "step": 512 }, { "epoch": 0.07, "grad_norm": 0.6488234500048476, "learning_rate": 1.9934740356492546e-06, "loss": 0.5746, "step": 513 }, { "epoch": 0.07, "grad_norm": 0.7232167794691058, "learning_rate": 1.993426883501639e-06, "loss": 0.5837, "step": 514 }, { "epoch": 0.07, "grad_norm": 0.8914086635543118, "learning_rate": 1.9933795621837744e-06, "loss": 0.6579, "step": 515 }, { "epoch": 0.07, "grad_norm": 0.7827369727963219, "learning_rate": 1.9933320717037192e-06, "loss": 0.568, "step": 516 }, { "epoch": 0.07, "grad_norm": 0.8330877192635204, "learning_rate": 1.993284412069561e-06, "loss": 0.6593, "step": 517 }, { "epoch": 0.07, "grad_norm": 0.9670235134915235, "learning_rate": 1.9932365832894156e-06, "loss": 0.6997, "step": 518 }, { "epoch": 0.07, "grad_norm": 0.8633228944132165, "learning_rate": 1.993188585371427e-06, "loss": 0.5734, "step": 519 }, { "epoch": 0.07, "grad_norm": 0.6430274093038407, "learning_rate": 1.9931404183237693e-06, "loss": 0.6233, "step": 520 }, { "epoch": 0.07, "grad_norm": 0.6957965916265959, "learning_rate": 1.9930920821546452e-06, "loss": 0.5427, "step": 521 }, { "epoch": 0.07, "grad_norm": 0.662545927951576, "learning_rate": 1.993043576872285e-06, "loss": 0.6064, "step": 522 }, { "epoch": 0.07, "grad_norm": 0.6526612293277911, "learning_rate": 1.99299490248495e-06, "loss": 0.5615, "step": 523 }, { "epoch": 0.07, "grad_norm": 2.5086238833618104, "learning_rate": 1.992946059000927e-06, "loss": 0.665, "step": 524 }, { "epoch": 0.07, "grad_norm": 0.7675668011819149, "learning_rate": 1.992897046428535e-06, "loss": 0.5994, "step": 525 }, { "epoch": 0.07, "grad_norm": 0.9130233856179439, "learning_rate": 1.99284786477612e-06, "loss": 0.6728, "step": 526 }, { "epoch": 0.07, "grad_norm": 0.8210887431824286, "learning_rate": 1.992798514052057e-06, "loss": 0.6813, "step": 527 }, { "epoch": 0.07, "grad_norm": 0.7416502243640162, "learning_rate": 1.99274899426475e-06, "loss": 0.5835, "step": 528 }, { "epoch": 0.07, "grad_norm": 0.7002805608352144, "learning_rate": 1.9926993054226317e-06, "loss": 0.6252, "step": 529 }, { "epoch": 0.07, "grad_norm": 0.7409014266039685, "learning_rate": 1.9926494475341632e-06, "loss": 0.6191, "step": 530 }, { "epoch": 0.07, "grad_norm": 0.9939483676049348, "learning_rate": 1.9925994206078353e-06, "loss": 0.5919, "step": 531 }, { "epoch": 0.07, "grad_norm": 0.7711549899709258, "learning_rate": 1.992549224652167e-06, "loss": 0.6566, "step": 532 }, { "epoch": 0.07, "grad_norm": 0.7965055630250588, "learning_rate": 1.992498859675706e-06, "loss": 0.6962, "step": 533 }, { "epoch": 0.07, "grad_norm": 0.8250135235957727, "learning_rate": 1.992448325687029e-06, "loss": 0.6099, "step": 534 }, { "epoch": 0.07, "grad_norm": 0.6374817796334638, "learning_rate": 1.9923976226947413e-06, "loss": 0.5362, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.7866966560468455, "learning_rate": 1.9923467507074774e-06, "loss": 0.5872, "step": 536 }, { "epoch": 0.07, "grad_norm": 1.8780860175483405, "learning_rate": 1.9922957097339e-06, "loss": 0.6597, "step": 537 }, { "epoch": 0.07, "grad_norm": 0.8232361897762414, "learning_rate": 1.9922444997827007e-06, "loss": 0.6831, "step": 538 }, { "epoch": 0.07, "grad_norm": 0.9518994100733643, "learning_rate": 1.9921931208626003e-06, "loss": 0.5798, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.8439863343606367, "learning_rate": 1.992141572982348e-06, "loss": 0.5875, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.6539230806691111, "learning_rate": 1.992089856150722e-06, "loss": 0.557, "step": 541 }, { "epoch": 0.07, "grad_norm": 0.8024210266163878, "learning_rate": 1.992037970376529e-06, "loss": 0.6311, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.6851560582765146, "learning_rate": 1.9919859156686046e-06, "loss": 0.5768, "step": 543 }, { "epoch": 0.07, "grad_norm": 0.6728905386475592, "learning_rate": 1.991933692035813e-06, "loss": 0.5454, "step": 544 }, { "epoch": 0.07, "grad_norm": 0.8297912590440232, "learning_rate": 1.9918812994870477e-06, "loss": 0.6528, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.722362985156497, "learning_rate": 1.99182873803123e-06, "loss": 0.588, "step": 546 }, { "epoch": 0.07, "grad_norm": 0.6833773640554377, "learning_rate": 1.9917760076773117e-06, "loss": 0.534, "step": 547 }, { "epoch": 0.07, "grad_norm": 0.6494228908847088, "learning_rate": 1.9917231084342716e-06, "loss": 0.5509, "step": 548 }, { "epoch": 0.07, "grad_norm": 0.6020082727644316, "learning_rate": 1.9916700403111174e-06, "loss": 0.5622, "step": 549 }, { "epoch": 0.07, "grad_norm": 0.7402075214920953, "learning_rate": 1.9916168033168862e-06, "loss": 0.6502, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.7932825698733326, "learning_rate": 1.9915633974606447e-06, "loss": 0.6707, "step": 551 }, { "epoch": 0.07, "grad_norm": 0.9320208296213243, "learning_rate": 1.991509822751486e-06, "loss": 0.6215, "step": 552 }, { "epoch": 0.07, "grad_norm": 0.6570217784161347, "learning_rate": 1.9914560791985343e-06, "loss": 0.5781, "step": 553 }, { "epoch": 0.07, "grad_norm": 0.7455204330314638, "learning_rate": 1.9914021668109414e-06, "loss": 0.5999, "step": 554 }, { "epoch": 0.07, "grad_norm": 0.6728673335636264, "learning_rate": 1.9913480855978877e-06, "loss": 0.58, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.2052907452724413, "learning_rate": 1.991293835568583e-06, "loss": 0.7481, "step": 556 }, { "epoch": 0.07, "grad_norm": 0.6256460585619213, "learning_rate": 1.991239416732265e-06, "loss": 0.601, "step": 557 }, { "epoch": 0.07, "grad_norm": 0.7632164316726977, "learning_rate": 1.9911848290982008e-06, "loss": 0.6731, "step": 558 }, { "epoch": 0.07, "grad_norm": 0.8504271689212162, "learning_rate": 1.9911300726756863e-06, "loss": 0.6465, "step": 559 }, { "epoch": 0.07, "grad_norm": 0.7030363463851032, "learning_rate": 1.9910751474740467e-06, "loss": 0.5653, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.6219413514707076, "learning_rate": 1.9910200535026335e-06, "loss": 0.5529, "step": 561 }, { "epoch": 0.07, "grad_norm": 0.874478506488717, "learning_rate": 1.99096479077083e-06, "loss": 0.6317, "step": 562 }, { "epoch": 0.07, "grad_norm": 0.753319092066656, "learning_rate": 1.9909093592880467e-06, "loss": 0.619, "step": 563 }, { "epoch": 0.07, "grad_norm": 0.6603021049409944, "learning_rate": 1.9908537590637222e-06, "loss": 0.5789, "step": 564 }, { "epoch": 0.07, "grad_norm": 0.9341903408844926, "learning_rate": 1.9907979901073256e-06, "loss": 0.6835, "step": 565 }, { "epoch": 0.07, "grad_norm": 0.9158173897189834, "learning_rate": 1.990742052428353e-06, "loss": 0.7234, "step": 566 }, { "epoch": 0.07, "grad_norm": 0.6994294970537547, "learning_rate": 1.9906859460363304e-06, "loss": 0.6211, "step": 567 }, { "epoch": 0.07, "grad_norm": 0.766674955504057, "learning_rate": 1.9906296709408125e-06, "loss": 0.6073, "step": 568 }, { "epoch": 0.07, "grad_norm": 0.8361008042379616, "learning_rate": 1.9905732271513818e-06, "loss": 0.5853, "step": 569 }, { "epoch": 0.07, "grad_norm": 0.8318469677219228, "learning_rate": 1.99051661467765e-06, "loss": 0.6818, "step": 570 }, { "epoch": 0.07, "grad_norm": 0.7019197979384322, "learning_rate": 1.990459833529258e-06, "loss": 0.5677, "step": 571 }, { "epoch": 0.07, "grad_norm": 0.8514044815844871, "learning_rate": 1.9904028837158746e-06, "loss": 0.5864, "step": 572 }, { "epoch": 0.07, "grad_norm": 0.9651689292138219, "learning_rate": 1.9903457652471982e-06, "loss": 0.6982, "step": 573 }, { "epoch": 0.07, "grad_norm": 0.8455242221546466, "learning_rate": 1.9902884781329555e-06, "loss": 0.6439, "step": 574 }, { "epoch": 0.07, "grad_norm": 0.6561544674849393, "learning_rate": 1.9902310223829014e-06, "loss": 0.6085, "step": 575 }, { "epoch": 0.07, "grad_norm": 0.937057215300035, "learning_rate": 1.9901733980068206e-06, "loss": 0.712, "step": 576 }, { "epoch": 0.07, "grad_norm": 0.6849258021895515, "learning_rate": 1.9901156050145257e-06, "loss": 0.5343, "step": 577 }, { "epoch": 0.07, "grad_norm": 0.6278893872899395, "learning_rate": 1.990057643415858e-06, "loss": 0.5769, "step": 578 }, { "epoch": 0.07, "grad_norm": 0.780980702399632, "learning_rate": 1.9899995132206883e-06, "loss": 0.655, "step": 579 }, { "epoch": 0.07, "grad_norm": 0.7935012714201848, "learning_rate": 1.989941214438915e-06, "loss": 0.6604, "step": 580 }, { "epoch": 0.07, "grad_norm": 0.7947200362013553, "learning_rate": 1.989882747080466e-06, "loss": 0.7058, "step": 581 }, { "epoch": 0.07, "grad_norm": 1.15899467874248, "learning_rate": 1.989824111155298e-06, "loss": 0.6747, "step": 582 }, { "epoch": 0.07, "grad_norm": 0.9260353981555233, "learning_rate": 1.989765306673395e-06, "loss": 0.7168, "step": 583 }, { "epoch": 0.07, "grad_norm": 0.7619693256660294, "learning_rate": 1.9897063336447727e-06, "loss": 0.5069, "step": 584 }, { "epoch": 0.07, "grad_norm": 0.8632482255427164, "learning_rate": 1.9896471920794717e-06, "loss": 0.647, "step": 585 }, { "epoch": 0.07, "grad_norm": 0.7211371193610369, "learning_rate": 1.989587881987564e-06, "loss": 0.5243, "step": 586 }, { "epoch": 0.07, "grad_norm": 0.7083689444678922, "learning_rate": 1.9895284033791497e-06, "loss": 0.6235, "step": 587 }, { "epoch": 0.07, "grad_norm": 0.881945082230812, "learning_rate": 1.9894687562643568e-06, "loss": 0.655, "step": 588 }, { "epoch": 0.08, "grad_norm": 0.6935289256962699, "learning_rate": 1.9894089406533434e-06, "loss": 0.5911, "step": 589 }, { "epoch": 0.08, "grad_norm": 1.0039966400809808, "learning_rate": 1.9893489565562947e-06, "loss": 0.6712, "step": 590 }, { "epoch": 0.08, "grad_norm": 0.747033744133976, "learning_rate": 1.989288803983426e-06, "loss": 0.6162, "step": 591 }, { "epoch": 0.08, "grad_norm": 0.7436823348875847, "learning_rate": 1.9892284829449797e-06, "loss": 0.5801, "step": 592 }, { "epoch": 0.08, "grad_norm": 1.033523517673034, "learning_rate": 1.989167993451229e-06, "loss": 0.7125, "step": 593 }, { "epoch": 0.08, "grad_norm": 0.9095181071253053, "learning_rate": 1.9891073355124733e-06, "loss": 0.6476, "step": 594 }, { "epoch": 0.08, "grad_norm": 0.7434436203530099, "learning_rate": 1.989046509139044e-06, "loss": 0.5981, "step": 595 }, { "epoch": 0.08, "grad_norm": 0.5802384952938089, "learning_rate": 1.9889855143412972e-06, "loss": 0.5591, "step": 596 }, { "epoch": 0.08, "grad_norm": 1.085697433779284, "learning_rate": 1.9889243511296207e-06, "loss": 0.6402, "step": 597 }, { "epoch": 0.08, "grad_norm": 0.6303180573801902, "learning_rate": 1.9888630195144293e-06, "loss": 0.5474, "step": 598 }, { "epoch": 0.08, "grad_norm": 1.3413915865312702, "learning_rate": 1.9888015195061683e-06, "loss": 0.6136, "step": 599 }, { "epoch": 0.08, "grad_norm": 0.8511319653754635, "learning_rate": 1.9887398511153095e-06, "loss": 0.5975, "step": 600 }, { "epoch": 0.08, "grad_norm": 0.715852057825207, "learning_rate": 1.9886780143523545e-06, "loss": 0.5588, "step": 601 }, { "epoch": 0.08, "grad_norm": 0.659271833376575, "learning_rate": 1.988616009227834e-06, "loss": 0.5404, "step": 602 }, { "epoch": 0.08, "grad_norm": 0.9664161784962277, "learning_rate": 1.988553835752306e-06, "loss": 0.6136, "step": 603 }, { "epoch": 0.08, "grad_norm": 0.6026822005444106, "learning_rate": 1.9884914939363586e-06, "loss": 0.5593, "step": 604 }, { "epoch": 0.08, "grad_norm": 0.6705326196647514, "learning_rate": 1.988428983790608e-06, "loss": 0.5999, "step": 605 }, { "epoch": 0.08, "grad_norm": 0.6552312766630315, "learning_rate": 1.9883663053256985e-06, "loss": 0.5647, "step": 606 }, { "epoch": 0.08, "grad_norm": 0.6596561548714385, "learning_rate": 1.9883034585523046e-06, "loss": 0.5642, "step": 607 }, { "epoch": 0.08, "grad_norm": 0.7064415105841974, "learning_rate": 1.988240443481127e-06, "loss": 0.6005, "step": 608 }, { "epoch": 0.08, "grad_norm": 0.6904601283825006, "learning_rate": 1.9881772601228977e-06, "loss": 0.6041, "step": 609 }, { "epoch": 0.08, "grad_norm": 0.6404068538065457, "learning_rate": 1.9881139084883756e-06, "loss": 0.568, "step": 610 }, { "epoch": 0.08, "grad_norm": 0.8816219968018946, "learning_rate": 1.988050388588349e-06, "loss": 0.6788, "step": 611 }, { "epoch": 0.08, "grad_norm": 0.7543776958800256, "learning_rate": 1.987986700433635e-06, "loss": 0.5709, "step": 612 }, { "epoch": 0.08, "grad_norm": 0.8238846293414066, "learning_rate": 1.9879228440350785e-06, "loss": 0.6749, "step": 613 }, { "epoch": 0.08, "grad_norm": 0.8917840822332804, "learning_rate": 1.9878588194035535e-06, "loss": 0.6748, "step": 614 }, { "epoch": 0.08, "grad_norm": 0.7763996238794073, "learning_rate": 1.9877946265499633e-06, "loss": 0.654, "step": 615 }, { "epoch": 0.08, "grad_norm": 0.8261125263298917, "learning_rate": 1.987730265485239e-06, "loss": 0.6936, "step": 616 }, { "epoch": 0.08, "grad_norm": 0.627811287734862, "learning_rate": 1.9876657362203402e-06, "loss": 0.5532, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.0901250486987362, "learning_rate": 1.987601038766256e-06, "loss": 0.6665, "step": 618 }, { "epoch": 0.08, "grad_norm": 0.6094014173150708, "learning_rate": 1.987536173134004e-06, "loss": 0.5272, "step": 619 }, { "epoch": 0.08, "grad_norm": 0.6692087696129586, "learning_rate": 1.9874711393346297e-06, "loss": 0.5886, "step": 620 }, { "epoch": 0.08, "grad_norm": 0.808027861904522, "learning_rate": 1.9874059373792074e-06, "loss": 0.6266, "step": 621 }, { "epoch": 0.08, "grad_norm": 0.6837399777962959, "learning_rate": 1.9873405672788414e-06, "loss": 0.5829, "step": 622 }, { "epoch": 0.08, "grad_norm": 0.8522117219788022, "learning_rate": 1.9872750290446623e-06, "loss": 0.6163, "step": 623 }, { "epoch": 0.08, "grad_norm": 0.5720629187471247, "learning_rate": 1.987209322687831e-06, "loss": 0.5127, "step": 624 }, { "epoch": 0.08, "grad_norm": 0.7999731719370359, "learning_rate": 1.987143448219537e-06, "loss": 0.6713, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.8899122717311352, "learning_rate": 1.987077405650998e-06, "loss": 0.6459, "step": 626 }, { "epoch": 0.08, "grad_norm": 0.6563699550164905, "learning_rate": 1.9870111949934595e-06, "loss": 0.6049, "step": 627 }, { "epoch": 0.08, "grad_norm": 0.7830118377700895, "learning_rate": 1.9869448162581976e-06, "loss": 0.6171, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.5708054261065402, "learning_rate": 1.986878269456515e-06, "loss": 0.5094, "step": 629 }, { "epoch": 0.08, "grad_norm": 0.9134654561678494, "learning_rate": 1.9868115545997446e-06, "loss": 0.6862, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.8121496977579589, "learning_rate": 1.986744671699247e-06, "loss": 0.6473, "step": 631 }, { "epoch": 0.08, "grad_norm": 0.7623226263514385, "learning_rate": 1.9866776207664116e-06, "loss": 0.5886, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.93291751183117, "learning_rate": 1.9866104018126566e-06, "loss": 0.7166, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.7678529839128109, "learning_rate": 1.9865430148494283e-06, "loss": 0.6146, "step": 634 }, { "epoch": 0.08, "grad_norm": 0.7059831230569459, "learning_rate": 1.9864754598882025e-06, "loss": 0.5803, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.7476221086128951, "learning_rate": 1.986407736940483e-06, "loss": 0.5519, "step": 636 }, { "epoch": 0.08, "grad_norm": 0.7212173288812114, "learning_rate": 1.986339846017802e-06, "loss": 0.5859, "step": 637 }, { "epoch": 0.08, "grad_norm": 0.7176351810897198, "learning_rate": 1.986271787131721e-06, "loss": 0.571, "step": 638 }, { "epoch": 0.08, "grad_norm": 0.9956946442398525, "learning_rate": 1.9862035602938297e-06, "loss": 0.6726, "step": 639 }, { "epoch": 0.08, "grad_norm": 0.6564916395068399, "learning_rate": 1.986135165515746e-06, "loss": 0.5427, "step": 640 }, { "epoch": 0.08, "grad_norm": 0.6854838385962346, "learning_rate": 1.9860666028091173e-06, "loss": 0.5967, "step": 641 }, { "epoch": 0.08, "grad_norm": 0.7293395742362232, "learning_rate": 1.9859978721856184e-06, "loss": 0.578, "step": 642 }, { "epoch": 0.08, "grad_norm": 1.0096567650949269, "learning_rate": 1.9859289736569544e-06, "loss": 0.6577, "step": 643 }, { "epoch": 0.08, "grad_norm": 0.6861508890341291, "learning_rate": 1.9858599072348575e-06, "loss": 0.5239, "step": 644 }, { "epoch": 0.08, "grad_norm": 0.7025329605117591, "learning_rate": 1.9857906729310893e-06, "loss": 0.5244, "step": 645 }, { "epoch": 0.08, "grad_norm": 0.6780686207067961, "learning_rate": 1.985721270757439e-06, "loss": 0.5654, "step": 646 }, { "epoch": 0.08, "grad_norm": 0.7355168902551373, "learning_rate": 1.985651700725725e-06, "loss": 0.5985, "step": 647 }, { "epoch": 0.08, "grad_norm": 0.6782576859012697, "learning_rate": 1.985581962847796e-06, "loss": 0.567, "step": 648 }, { "epoch": 0.08, "grad_norm": 0.7748055135540624, "learning_rate": 1.9855120571355256e-06, "loss": 0.6489, "step": 649 }, { "epoch": 0.08, "grad_norm": 0.797995101481714, "learning_rate": 1.9854419836008187e-06, "loss": 0.636, "step": 650 }, { "epoch": 0.08, "grad_norm": 1.068919679452347, "learning_rate": 1.9853717422556086e-06, "loss": 0.6934, "step": 651 }, { "epoch": 0.08, "grad_norm": 0.652277275370946, "learning_rate": 1.9853013331118566e-06, "loss": 0.5965, "step": 652 }, { "epoch": 0.08, "grad_norm": 0.6838579928501709, "learning_rate": 1.985230756181552e-06, "loss": 0.592, "step": 653 }, { "epoch": 0.08, "grad_norm": 0.8509369132939175, "learning_rate": 1.985160011476714e-06, "loss": 0.6302, "step": 654 }, { "epoch": 0.08, "grad_norm": 0.638192128708149, "learning_rate": 1.9850890990093887e-06, "loss": 0.5739, "step": 655 }, { "epoch": 0.08, "grad_norm": 0.8472933976053362, "learning_rate": 1.9850180187916526e-06, "loss": 0.6324, "step": 656 }, { "epoch": 0.08, "grad_norm": 0.7302032593503742, "learning_rate": 1.98494677083561e-06, "loss": 0.5844, "step": 657 }, { "epoch": 0.08, "grad_norm": 0.6978153532388472, "learning_rate": 1.9848753551533938e-06, "loss": 0.5769, "step": 658 }, { "epoch": 0.08, "grad_norm": 0.8981173286577327, "learning_rate": 1.9848037717571643e-06, "loss": 0.6869, "step": 659 }, { "epoch": 0.08, "grad_norm": 1.3002925257464328, "learning_rate": 1.9847320206591127e-06, "loss": 0.6434, "step": 660 }, { "epoch": 0.08, "grad_norm": 0.6903143137091778, "learning_rate": 1.9846601018714565e-06, "loss": 0.6287, "step": 661 }, { "epoch": 0.08, "grad_norm": 0.6611786972979261, "learning_rate": 1.9845880154064427e-06, "loss": 0.6288, "step": 662 }, { "epoch": 0.08, "grad_norm": 0.8570696709079825, "learning_rate": 1.9845157612763476e-06, "loss": 0.7148, "step": 663 }, { "epoch": 0.08, "grad_norm": 0.8632101911499828, "learning_rate": 1.9844433394934752e-06, "loss": 0.6443, "step": 664 }, { "epoch": 0.08, "grad_norm": 0.7211289966916442, "learning_rate": 1.9843707500701576e-06, "loss": 0.5307, "step": 665 }, { "epoch": 0.08, "grad_norm": 0.669404058601053, "learning_rate": 1.9842979930187567e-06, "loss": 0.5953, "step": 666 }, { "epoch": 0.08, "grad_norm": 0.9615249294392699, "learning_rate": 1.9842250683516613e-06, "loss": 0.7195, "step": 667 }, { "epoch": 0.09, "grad_norm": 0.8504393865612274, "learning_rate": 1.984151976081291e-06, "loss": 0.6468, "step": 668 }, { "epoch": 0.09, "grad_norm": 0.8216159208214725, "learning_rate": 1.984078716220092e-06, "loss": 0.6562, "step": 669 }, { "epoch": 0.09, "grad_norm": 0.6381573640965261, "learning_rate": 1.9840052887805393e-06, "loss": 0.5942, "step": 670 }, { "epoch": 0.09, "grad_norm": 2.1493429130505968, "learning_rate": 1.9839316937751375e-06, "loss": 0.7116, "step": 671 }, { "epoch": 0.09, "grad_norm": 0.7066314067389571, "learning_rate": 1.9838579312164183e-06, "loss": 0.5911, "step": 672 }, { "epoch": 0.09, "grad_norm": 0.9357411774356766, "learning_rate": 1.9837840011169438e-06, "loss": 0.6336, "step": 673 }, { "epoch": 0.09, "grad_norm": 0.6030118498947621, "learning_rate": 1.9837099034893023e-06, "loss": 0.5461, "step": 674 }, { "epoch": 0.09, "grad_norm": 0.6169748181955841, "learning_rate": 1.9836356383461127e-06, "loss": 0.5685, "step": 675 }, { "epoch": 0.09, "grad_norm": 0.6152438497554215, "learning_rate": 1.9835612057000216e-06, "loss": 0.5398, "step": 676 }, { "epoch": 0.09, "grad_norm": 0.9247181940605527, "learning_rate": 1.9834866055637036e-06, "loss": 0.5918, "step": 677 }, { "epoch": 0.09, "grad_norm": 0.9769797312158686, "learning_rate": 1.9834118379498627e-06, "loss": 0.6289, "step": 678 }, { "epoch": 0.09, "grad_norm": 0.5877982936371612, "learning_rate": 1.9833369028712305e-06, "loss": 0.5781, "step": 679 }, { "epoch": 0.09, "grad_norm": 0.6105643811295265, "learning_rate": 1.9832618003405687e-06, "loss": 0.5566, "step": 680 }, { "epoch": 0.09, "grad_norm": 0.8964336800230498, "learning_rate": 1.9831865303706654e-06, "loss": 0.6654, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.601525226866425, "learning_rate": 1.9831110929743387e-06, "loss": 0.5364, "step": 682 }, { "epoch": 0.09, "grad_norm": 0.7153807518569122, "learning_rate": 1.9830354881644352e-06, "loss": 0.6017, "step": 683 }, { "epoch": 0.09, "grad_norm": 0.6613277913097433, "learning_rate": 1.9829597159538285e-06, "loss": 0.6032, "step": 684 }, { "epoch": 0.09, "grad_norm": 0.9356859925227377, "learning_rate": 1.982883776355423e-06, "loss": 0.6625, "step": 685 }, { "epoch": 0.09, "grad_norm": 0.7458433874832644, "learning_rate": 1.9828076693821504e-06, "loss": 0.595, "step": 686 }, { "epoch": 0.09, "grad_norm": 0.6714002784813978, "learning_rate": 1.9827313950469703e-06, "loss": 0.5596, "step": 687 }, { "epoch": 0.09, "grad_norm": 0.5846272822520122, "learning_rate": 1.9826549533628715e-06, "loss": 0.5436, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.6442604704176558, "learning_rate": 1.982578344342872e-06, "loss": 0.5682, "step": 689 }, { "epoch": 0.09, "grad_norm": 0.7033242533518118, "learning_rate": 1.982501568000016e-06, "loss": 0.6249, "step": 690 }, { "epoch": 0.09, "grad_norm": 0.8201422698235402, "learning_rate": 1.982424624347379e-06, "loss": 0.6259, "step": 691 }, { "epoch": 0.09, "grad_norm": 0.8269105215466174, "learning_rate": 1.982347513398063e-06, "loss": 0.6717, "step": 692 }, { "epoch": 0.09, "grad_norm": 0.6831797804868314, "learning_rate": 1.9822702351652e-06, "loss": 0.5644, "step": 693 }, { "epoch": 0.09, "grad_norm": 0.8786783283100819, "learning_rate": 1.9821927896619485e-06, "loss": 0.6106, "step": 694 }, { "epoch": 0.09, "grad_norm": 0.7369761423317939, "learning_rate": 1.982115176901498e-06, "loss": 0.5609, "step": 695 }, { "epoch": 0.09, "grad_norm": 0.6579635394648186, "learning_rate": 1.982037396897064e-06, "loss": 0.5416, "step": 696 }, { "epoch": 0.09, "grad_norm": 0.6304713145194579, "learning_rate": 1.9819594496618923e-06, "loss": 0.6018, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.8358720668230432, "learning_rate": 1.9818813352092563e-06, "loss": 0.6196, "step": 698 }, { "epoch": 0.09, "grad_norm": 0.7491183598980263, "learning_rate": 1.981803053552458e-06, "loss": 0.637, "step": 699 }, { "epoch": 0.09, "grad_norm": 0.721564288749998, "learning_rate": 1.9817246047048283e-06, "loss": 0.5666, "step": 700 }, { "epoch": 0.09, "grad_norm": 0.850259146024705, "learning_rate": 1.9816459886797255e-06, "loss": 0.6978, "step": 701 }, { "epoch": 0.09, "grad_norm": 0.7261715688498724, "learning_rate": 1.981567205490538e-06, "loss": 0.5794, "step": 702 }, { "epoch": 0.09, "grad_norm": 0.8818505414130215, "learning_rate": 1.981488255150681e-06, "loss": 0.719, "step": 703 }, { "epoch": 0.09, "grad_norm": 0.996778087177852, "learning_rate": 1.9814091376735994e-06, "loss": 0.6621, "step": 704 }, { "epoch": 0.09, "grad_norm": 0.7435369607876877, "learning_rate": 1.981329853072766e-06, "loss": 0.6197, "step": 705 }, { "epoch": 0.09, "grad_norm": 0.6614107989283773, "learning_rate": 1.9812504013616818e-06, "loss": 0.5634, "step": 706 }, { "epoch": 0.09, "grad_norm": 0.822138660115289, "learning_rate": 1.981170782553877e-06, "loss": 0.5917, "step": 707 }, { "epoch": 0.09, "grad_norm": 1.562372679884486, "learning_rate": 1.9810909966629095e-06, "loss": 0.5999, "step": 708 }, { "epoch": 0.09, "grad_norm": 0.6851523294104136, "learning_rate": 1.981011043702366e-06, "loss": 0.5803, "step": 709 }, { "epoch": 0.09, "grad_norm": 0.8202947131891288, "learning_rate": 1.980930923685862e-06, "loss": 0.6446, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.8168578829032651, "learning_rate": 1.9808506366270416e-06, "loss": 0.6598, "step": 711 }, { "epoch": 0.09, "grad_norm": 0.7948613365837286, "learning_rate": 1.9807701825395754e-06, "loss": 0.6593, "step": 712 }, { "epoch": 0.09, "grad_norm": 1.0769684651848344, "learning_rate": 1.9806895614371652e-06, "loss": 0.6486, "step": 713 }, { "epoch": 0.09, "grad_norm": 0.7735145860441905, "learning_rate": 1.9806087733335392e-06, "loss": 0.6403, "step": 714 }, { "epoch": 0.09, "grad_norm": 0.6972305435984866, "learning_rate": 1.980527818242455e-06, "loss": 0.5982, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.7035279575541616, "learning_rate": 1.9804466961776987e-06, "loss": 0.5373, "step": 716 }, { "epoch": 0.09, "grad_norm": 0.8513575120217315, "learning_rate": 1.980365407153084e-06, "loss": 0.6601, "step": 717 }, { "epoch": 0.09, "grad_norm": 0.6889164678998441, "learning_rate": 1.9802839511824537e-06, "loss": 0.5766, "step": 718 }, { "epoch": 0.09, "grad_norm": 0.6074745451393104, "learning_rate": 1.9802023282796794e-06, "loss": 0.5141, "step": 719 }, { "epoch": 0.09, "grad_norm": 0.7494585477055167, "learning_rate": 1.98012053845866e-06, "loss": 0.6424, "step": 720 }, { "epoch": 0.09, "grad_norm": 0.744756094324546, "learning_rate": 1.980038581733324e-06, "loss": 0.5569, "step": 721 }, { "epoch": 0.09, "grad_norm": 0.6963258127690319, "learning_rate": 1.979956458117627e-06, "loss": 0.5898, "step": 722 }, { "epoch": 0.09, "grad_norm": 0.7443315653394612, "learning_rate": 1.979874167625555e-06, "loss": 0.6094, "step": 723 }, { "epoch": 0.09, "grad_norm": 0.6783430466692265, "learning_rate": 1.97979171027112e-06, "loss": 0.5341, "step": 724 }, { "epoch": 0.09, "grad_norm": 1.0547625462312304, "learning_rate": 1.979709086068364e-06, "loss": 0.6716, "step": 725 }, { "epoch": 0.09, "grad_norm": 0.9200871025087154, "learning_rate": 1.9796262950313574e-06, "loss": 0.7183, "step": 726 }, { "epoch": 0.09, "grad_norm": 0.7375319506273126, "learning_rate": 1.979543337174199e-06, "loss": 0.5304, "step": 727 }, { "epoch": 0.09, "grad_norm": 0.6500347364959113, "learning_rate": 1.979460212511014e-06, "loss": 0.5277, "step": 728 }, { "epoch": 0.09, "grad_norm": 0.8393473988890956, "learning_rate": 1.9793769210559593e-06, "loss": 0.6023, "step": 729 }, { "epoch": 0.09, "grad_norm": 0.7615589291819547, "learning_rate": 1.9792934628232182e-06, "loss": 0.6326, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.755037844078346, "learning_rate": 1.9792098378270024e-06, "loss": 0.6899, "step": 731 }, { "epoch": 0.09, "grad_norm": 0.7538385948811868, "learning_rate": 1.979126046081553e-06, "loss": 0.5357, "step": 732 }, { "epoch": 0.09, "grad_norm": 1.957082455704836, "learning_rate": 1.9790420876011375e-06, "loss": 0.6522, "step": 733 }, { "epoch": 0.09, "grad_norm": 1.0596192482506819, "learning_rate": 1.9789579624000547e-06, "loss": 0.5922, "step": 734 }, { "epoch": 0.09, "grad_norm": 0.68645085342628, "learning_rate": 1.9788736704926295e-06, "loss": 0.5492, "step": 735 }, { "epoch": 0.09, "grad_norm": 0.6237244323698963, "learning_rate": 1.9787892118932157e-06, "loss": 0.5538, "step": 736 }, { "epoch": 0.09, "grad_norm": 0.6661273263181292, "learning_rate": 1.9787045866161963e-06, "loss": 0.5469, "step": 737 }, { "epoch": 0.09, "grad_norm": 0.8873004904484652, "learning_rate": 1.9786197946759823e-06, "loss": 0.6573, "step": 738 }, { "epoch": 0.09, "grad_norm": 0.7708683347714198, "learning_rate": 1.978534836087012e-06, "loss": 0.6333, "step": 739 }, { "epoch": 0.09, "grad_norm": 0.6640724985353499, "learning_rate": 1.9784497108637535e-06, "loss": 0.5494, "step": 740 }, { "epoch": 0.09, "grad_norm": 0.7680990101790869, "learning_rate": 1.9783644190207025e-06, "loss": 0.5547, "step": 741 }, { "epoch": 0.09, "grad_norm": 0.8040354475152123, "learning_rate": 1.978278960572384e-06, "loss": 0.5584, "step": 742 }, { "epoch": 0.09, "grad_norm": 0.5995552064853391, "learning_rate": 1.9781933355333496e-06, "loss": 0.5583, "step": 743 }, { "epoch": 0.09, "grad_norm": 0.6078379065944832, "learning_rate": 1.9781075439181813e-06, "loss": 0.4987, "step": 744 }, { "epoch": 0.09, "grad_norm": 0.6836467921643848, "learning_rate": 1.9780215857414878e-06, "loss": 0.6178, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.614677285489946, "learning_rate": 1.977935461017907e-06, "loss": 0.4891, "step": 746 }, { "epoch": 0.1, "grad_norm": 0.9059187188466682, "learning_rate": 1.9778491697621055e-06, "loss": 0.6396, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.6726997689046449, "learning_rate": 1.977762711988778e-06, "loss": 0.6019, "step": 748 }, { "epoch": 0.1, "grad_norm": 1.0082093747366472, "learning_rate": 1.977676087712646e-06, "loss": 0.6241, "step": 749 }, { "epoch": 0.1, "grad_norm": 0.8012353153348502, "learning_rate": 1.9775892969484622e-06, "loss": 0.6084, "step": 750 }, { "epoch": 0.1, "grad_norm": 0.7681270068283917, "learning_rate": 1.9775023397110056e-06, "loss": 0.6478, "step": 751 }, { "epoch": 0.1, "grad_norm": 0.6186195967705558, "learning_rate": 1.977415216015084e-06, "loss": 0.5715, "step": 752 }, { "epoch": 0.1, "grad_norm": 0.5727427056040725, "learning_rate": 1.9773279258755336e-06, "loss": 0.5509, "step": 753 }, { "epoch": 0.1, "grad_norm": 0.7936499913214147, "learning_rate": 1.9772404693072197e-06, "loss": 0.6106, "step": 754 }, { "epoch": 0.1, "grad_norm": 1.050579912611296, "learning_rate": 1.9771528463250342e-06, "loss": 0.6438, "step": 755 }, { "epoch": 0.1, "grad_norm": 0.7738457636617446, "learning_rate": 1.9770650569438988e-06, "loss": 0.6052, "step": 756 }, { "epoch": 0.1, "grad_norm": 0.617902845393335, "learning_rate": 1.9769771011787635e-06, "loss": 0.6029, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.8106681826993078, "learning_rate": 1.976888979044606e-06, "loss": 0.5978, "step": 758 }, { "epoch": 0.1, "grad_norm": 0.6493905243301383, "learning_rate": 1.976800690556432e-06, "loss": 0.545, "step": 759 }, { "epoch": 0.1, "grad_norm": 0.8113768682418111, "learning_rate": 1.9767122357292772e-06, "loss": 0.618, "step": 760 }, { "epoch": 0.1, "grad_norm": 0.6743122755685925, "learning_rate": 1.976623614578204e-06, "loss": 0.552, "step": 761 }, { "epoch": 0.1, "grad_norm": 0.7090764247025748, "learning_rate": 1.9765348271183033e-06, "loss": 0.6299, "step": 762 }, { "epoch": 0.1, "grad_norm": 0.611676256337836, "learning_rate": 1.976445873364695e-06, "loss": 0.5453, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.7442564411507394, "learning_rate": 1.976356753332527e-06, "loss": 0.5545, "step": 764 }, { "epoch": 0.1, "grad_norm": 0.6769787023241824, "learning_rate": 1.9762674670369754e-06, "loss": 0.5113, "step": 765 }, { "epoch": 0.1, "grad_norm": 0.7499204560811521, "learning_rate": 1.976178014493245e-06, "loss": 0.6017, "step": 766 }, { "epoch": 0.1, "grad_norm": 0.8227860002787356, "learning_rate": 1.9760883957165687e-06, "loss": 0.6627, "step": 767 }, { "epoch": 0.1, "grad_norm": 0.6669506457369923, "learning_rate": 1.975998610722207e-06, "loss": 0.524, "step": 768 }, { "epoch": 0.1, "grad_norm": 1.6845905311063933, "learning_rate": 1.9759086595254503e-06, "loss": 0.6344, "step": 769 }, { "epoch": 0.1, "grad_norm": 0.7689295491697293, "learning_rate": 1.9758185421416154e-06, "loss": 0.5903, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.8614482838117034, "learning_rate": 1.975728258586049e-06, "loss": 0.6386, "step": 771 }, { "epoch": 0.1, "grad_norm": 0.7993298795256107, "learning_rate": 1.9756378088741254e-06, "loss": 0.6282, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.6164833275253789, "learning_rate": 1.9755471930212465e-06, "loss": 0.5859, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.6283345723717823, "learning_rate": 1.975456411042844e-06, "loss": 0.5594, "step": 774 }, { "epoch": 0.1, "grad_norm": 0.9230906296433633, "learning_rate": 1.9753654629543775e-06, "loss": 0.6469, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.8415304821767399, "learning_rate": 1.9752743487713333e-06, "loss": 0.6234, "step": 776 }, { "epoch": 0.1, "grad_norm": 0.7485345883437187, "learning_rate": 1.975183068509228e-06, "loss": 0.6548, "step": 777 }, { "epoch": 0.1, "grad_norm": 0.6999141079604583, "learning_rate": 1.9750916221836057e-06, "loss": 0.5655, "step": 778 }, { "epoch": 0.1, "grad_norm": 0.720893133797929, "learning_rate": 1.9750000098100384e-06, "loss": 0.6021, "step": 779 }, { "epoch": 0.1, "grad_norm": 0.8770269361720169, "learning_rate": 1.974908231404127e-06, "loss": 0.7092, "step": 780 }, { "epoch": 0.1, "grad_norm": 0.7047161075071706, "learning_rate": 1.9748162869815e-06, "loss": 0.5996, "step": 781 }, { "epoch": 0.1, "grad_norm": 0.6967152116901362, "learning_rate": 1.974724176557815e-06, "loss": 0.5853, "step": 782 }, { "epoch": 0.1, "grad_norm": 0.6833733719656645, "learning_rate": 1.974631900148758e-06, "loss": 0.5669, "step": 783 }, { "epoch": 0.1, "grad_norm": 1.0753168782421432, "learning_rate": 1.9745394577700412e-06, "loss": 0.6255, "step": 784 }, { "epoch": 0.1, "grad_norm": 0.7924289733640827, "learning_rate": 1.974446849437408e-06, "loss": 0.535, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.8641187993501612, "learning_rate": 1.9743540751666275e-06, "loss": 0.6754, "step": 786 }, { "epoch": 0.1, "grad_norm": 0.5993352150914808, "learning_rate": 1.9742611349734993e-06, "loss": 0.5303, "step": 787 }, { "epoch": 0.1, "grad_norm": 0.7612309741319502, "learning_rate": 1.9741680288738493e-06, "loss": 0.5318, "step": 788 }, { "epoch": 0.1, "grad_norm": 0.6563031969741099, "learning_rate": 1.9740747568835326e-06, "loss": 0.5954, "step": 789 }, { "epoch": 0.1, "grad_norm": 0.8653190011904542, "learning_rate": 1.973981319018433e-06, "loss": 0.6767, "step": 790 }, { "epoch": 0.1, "grad_norm": 0.8851278973729406, "learning_rate": 1.973887715294461e-06, "loss": 0.635, "step": 791 }, { "epoch": 0.1, "grad_norm": 0.8335661893994004, "learning_rate": 1.9737939457275577e-06, "loss": 0.6645, "step": 792 }, { "epoch": 0.1, "grad_norm": 0.7013083132095101, "learning_rate": 1.9737000103336904e-06, "loss": 0.5917, "step": 793 }, { "epoch": 0.1, "grad_norm": 0.8045268022193846, "learning_rate": 1.9736059091288547e-06, "loss": 0.6328, "step": 794 }, { "epoch": 0.1, "grad_norm": 0.8157123521063797, "learning_rate": 1.9735116421290763e-06, "loss": 0.7478, "step": 795 }, { "epoch": 0.1, "grad_norm": 0.8162946383338712, "learning_rate": 1.9734172093504066e-06, "loss": 0.6151, "step": 796 }, { "epoch": 0.1, "grad_norm": 1.0025998518349999, "learning_rate": 1.9733226108089274e-06, "loss": 0.6719, "step": 797 }, { "epoch": 0.1, "grad_norm": 0.7219412772478457, "learning_rate": 1.973227846520748e-06, "loss": 0.5632, "step": 798 }, { "epoch": 0.1, "grad_norm": 0.6539449767895833, "learning_rate": 1.9731329165020044e-06, "loss": 0.5341, "step": 799 }, { "epoch": 0.1, "grad_norm": 0.6308148494124889, "learning_rate": 1.9730378207688636e-06, "loss": 0.5423, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.8244248254207861, "learning_rate": 1.9729425593375194e-06, "loss": 0.6382, "step": 801 }, { "epoch": 0.1, "grad_norm": 0.6446769236880466, "learning_rate": 1.972847132224193e-06, "loss": 0.4918, "step": 802 }, { "epoch": 0.1, "grad_norm": 0.9353138395325179, "learning_rate": 1.972751539445135e-06, "loss": 0.6713, "step": 803 }, { "epoch": 0.1, "grad_norm": 1.1516783449500636, "learning_rate": 1.9726557810166236e-06, "loss": 0.6724, "step": 804 }, { "epoch": 0.1, "grad_norm": 0.7267148545537543, "learning_rate": 1.9725598569549666e-06, "loss": 0.5869, "step": 805 }, { "epoch": 0.1, "grad_norm": 1.152682738515493, "learning_rate": 1.972463767276497e-06, "loss": 0.6124, "step": 806 }, { "epoch": 0.1, "grad_norm": 0.7854841173684677, "learning_rate": 1.9723675119975794e-06, "loss": 0.6663, "step": 807 }, { "epoch": 0.1, "grad_norm": 0.6451516063426489, "learning_rate": 1.972271091134605e-06, "loss": 0.5704, "step": 808 }, { "epoch": 0.1, "grad_norm": 0.6692814395934845, "learning_rate": 1.972174504703992e-06, "loss": 0.5863, "step": 809 }, { "epoch": 0.1, "grad_norm": 0.7777329592678502, "learning_rate": 1.9720777527221895e-06, "loss": 0.678, "step": 810 }, { "epoch": 0.1, "grad_norm": 0.5661195228618701, "learning_rate": 1.9719808352056723e-06, "loss": 0.5203, "step": 811 }, { "epoch": 0.1, "grad_norm": 0.9646879244718761, "learning_rate": 1.971883752170945e-06, "loss": 0.667, "step": 812 }, { "epoch": 0.1, "grad_norm": 0.8074883107794468, "learning_rate": 1.97178650363454e-06, "loss": 0.6822, "step": 813 }, { "epoch": 0.1, "grad_norm": 0.8825110475294269, "learning_rate": 1.971689089613017e-06, "loss": 0.7125, "step": 814 }, { "epoch": 0.1, "grad_norm": 0.629931632306053, "learning_rate": 1.9715915101229653e-06, "loss": 0.5206, "step": 815 }, { "epoch": 0.1, "grad_norm": 0.7516799941162974, "learning_rate": 1.9714937651810014e-06, "loss": 0.6123, "step": 816 }, { "epoch": 0.1, "grad_norm": 0.6652751750052173, "learning_rate": 1.97139585480377e-06, "loss": 0.5244, "step": 817 }, { "epoch": 0.1, "grad_norm": 0.6768018857299368, "learning_rate": 1.971297779007945e-06, "loss": 0.566, "step": 818 }, { "epoch": 0.1, "grad_norm": 0.6978941505933295, "learning_rate": 1.9711995378102266e-06, "loss": 0.547, "step": 819 }, { "epoch": 0.1, "grad_norm": 0.6861533062120846, "learning_rate": 1.971101131227345e-06, "loss": 0.5593, "step": 820 }, { "epoch": 0.1, "grad_norm": 0.9063479975989226, "learning_rate": 1.9710025592760577e-06, "loss": 0.655, "step": 821 }, { "epoch": 0.1, "grad_norm": 0.6790587749117465, "learning_rate": 1.97090382197315e-06, "loss": 0.5371, "step": 822 }, { "epoch": 0.1, "grad_norm": 0.7536524000166772, "learning_rate": 1.9708049193354365e-06, "loss": 0.5867, "step": 823 }, { "epoch": 0.1, "grad_norm": 0.8856482839691099, "learning_rate": 1.9707058513797594e-06, "loss": 0.6552, "step": 824 }, { "epoch": 0.11, "grad_norm": 0.767530599739631, "learning_rate": 1.970606618122988e-06, "loss": 0.6333, "step": 825 }, { "epoch": 0.11, "grad_norm": 0.7464623585787192, "learning_rate": 1.970507219582021e-06, "loss": 0.5018, "step": 826 }, { "epoch": 0.11, "grad_norm": 0.7837843379036434, "learning_rate": 1.9704076557737855e-06, "loss": 0.6545, "step": 827 }, { "epoch": 0.11, "grad_norm": 0.7020161513102735, "learning_rate": 1.970307926715236e-06, "loss": 0.563, "step": 828 }, { "epoch": 0.11, "grad_norm": 0.8334557405686772, "learning_rate": 1.9702080324233552e-06, "loss": 0.5838, "step": 829 }, { "epoch": 0.11, "grad_norm": 0.8230648051282579, "learning_rate": 1.970107972915154e-06, "loss": 0.5788, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.7091907961059148, "learning_rate": 1.9700077482076717e-06, "loss": 0.5616, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.7750383593538468, "learning_rate": 1.9699073583179747e-06, "loss": 0.5997, "step": 832 }, { "epoch": 0.11, "grad_norm": 0.6740022634100381, "learning_rate": 1.9698068032631596e-06, "loss": 0.6132, "step": 833 }, { "epoch": 0.11, "grad_norm": 0.7882570790571856, "learning_rate": 1.969706083060349e-06, "loss": 0.5558, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.6957575466412099, "learning_rate": 1.969605197726695e-06, "loss": 0.5424, "step": 835 }, { "epoch": 0.11, "grad_norm": 0.7208118349747086, "learning_rate": 1.9695041472793767e-06, "loss": 0.621, "step": 836 }, { "epoch": 0.11, "grad_norm": 0.644365790890913, "learning_rate": 1.969402931735603e-06, "loss": 0.5271, "step": 837 }, { "epoch": 0.11, "grad_norm": 0.8177597935463959, "learning_rate": 1.9693015511126085e-06, "loss": 0.6274, "step": 838 }, { "epoch": 0.11, "grad_norm": 1.072878796563849, "learning_rate": 1.969200005427658e-06, "loss": 0.6189, "step": 839 }, { "epoch": 0.11, "grad_norm": 0.8998226534320273, "learning_rate": 1.969098294698044e-06, "loss": 0.7163, "step": 840 }, { "epoch": 0.11, "grad_norm": 0.7604696326589294, "learning_rate": 1.9689964189410863e-06, "loss": 0.5678, "step": 841 }, { "epoch": 0.11, "grad_norm": 0.7911457679644602, "learning_rate": 1.9688943781741335e-06, "loss": 0.6205, "step": 842 }, { "epoch": 0.11, "grad_norm": 0.6680396094349825, "learning_rate": 1.9687921724145617e-06, "loss": 0.5554, "step": 843 }, { "epoch": 0.11, "grad_norm": 0.6661923195634976, "learning_rate": 1.968689801679776e-06, "loss": 0.5841, "step": 844 }, { "epoch": 0.11, "grad_norm": 0.8159686257413361, "learning_rate": 1.968587265987209e-06, "loss": 0.5559, "step": 845 }, { "epoch": 0.11, "grad_norm": 0.5730838723654572, "learning_rate": 1.9684845653543208e-06, "loss": 0.5074, "step": 846 }, { "epoch": 0.11, "grad_norm": 0.8333071881854135, "learning_rate": 1.9683816997986007e-06, "loss": 0.6619, "step": 847 }, { "epoch": 0.11, "grad_norm": 0.7575372228641677, "learning_rate": 1.968278669337566e-06, "loss": 0.6183, "step": 848 }, { "epoch": 0.11, "grad_norm": 0.745386428295944, "learning_rate": 1.968175473988761e-06, "loss": 0.5712, "step": 849 }, { "epoch": 0.11, "grad_norm": 0.8320975073747173, "learning_rate": 1.9680721137697596e-06, "loss": 0.704, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.7717622861848287, "learning_rate": 1.9679685886981623e-06, "loss": 0.5409, "step": 851 }, { "epoch": 0.11, "grad_norm": 0.9336652978512102, "learning_rate": 1.9678648987915986e-06, "loss": 0.662, "step": 852 }, { "epoch": 0.11, "grad_norm": 0.7288405466530615, "learning_rate": 1.9677610440677255e-06, "loss": 0.5965, "step": 853 }, { "epoch": 0.11, "grad_norm": 0.620954768399733, "learning_rate": 1.9676570245442286e-06, "loss": 0.5472, "step": 854 }, { "epoch": 0.11, "grad_norm": 0.8738518908654507, "learning_rate": 1.967552840238822e-06, "loss": 0.6513, "step": 855 }, { "epoch": 0.11, "grad_norm": 0.9019528013021154, "learning_rate": 1.9674484911692463e-06, "loss": 0.6896, "step": 856 }, { "epoch": 0.11, "grad_norm": 0.6704085801939242, "learning_rate": 1.967343977353271e-06, "loss": 0.5332, "step": 857 }, { "epoch": 0.11, "grad_norm": 0.8028041816294855, "learning_rate": 1.9672392988086946e-06, "loss": 0.6112, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.6130844073092024, "learning_rate": 1.9671344555533424e-06, "loss": 0.5117, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.9980313890468107, "learning_rate": 1.967029447605067e-06, "loss": 0.5986, "step": 860 }, { "epoch": 0.11, "grad_norm": 0.7556821285252112, "learning_rate": 1.966924274981752e-06, "loss": 0.5601, "step": 861 }, { "epoch": 0.11, "grad_norm": 0.6003288460302754, "learning_rate": 1.9668189377013064e-06, "loss": 0.5582, "step": 862 }, { "epoch": 0.11, "grad_norm": 0.8872025885557276, "learning_rate": 1.9667134357816673e-06, "loss": 0.617, "step": 863 }, { "epoch": 0.11, "grad_norm": 0.807200861570258, "learning_rate": 1.9666077692408014e-06, "loss": 0.6408, "step": 864 }, { "epoch": 0.11, "grad_norm": 0.6506359020241926, "learning_rate": 1.9665019380967026e-06, "loss": 0.5487, "step": 865 }, { "epoch": 0.11, "grad_norm": 1.1707914896724807, "learning_rate": 1.9663959423673927e-06, "loss": 0.6624, "step": 866 }, { "epoch": 0.11, "grad_norm": 0.8941151580606443, "learning_rate": 1.9662897820709218e-06, "loss": 0.6957, "step": 867 }, { "epoch": 0.11, "grad_norm": 0.7871389132599979, "learning_rate": 1.9661834572253675e-06, "loss": 0.6946, "step": 868 }, { "epoch": 0.11, "grad_norm": 0.6656862842487034, "learning_rate": 1.966076967848836e-06, "loss": 0.5343, "step": 869 }, { "epoch": 0.11, "grad_norm": 0.8925960749544167, "learning_rate": 1.9659703139594614e-06, "loss": 0.6423, "step": 870 }, { "epoch": 0.11, "grad_norm": 0.8469531925547247, "learning_rate": 1.965863495575406e-06, "loss": 0.5805, "step": 871 }, { "epoch": 0.11, "grad_norm": 1.1238085900777257, "learning_rate": 1.9657565127148595e-06, "loss": 0.679, "step": 872 }, { "epoch": 0.11, "grad_norm": 0.992012246640505, "learning_rate": 1.96564936539604e-06, "loss": 0.6616, "step": 873 }, { "epoch": 0.11, "grad_norm": 0.8081121377049941, "learning_rate": 1.9655420536371937e-06, "loss": 0.6942, "step": 874 }, { "epoch": 0.11, "grad_norm": 0.8756769987784445, "learning_rate": 1.9654345774565946e-06, "loss": 0.6944, "step": 875 }, { "epoch": 0.11, "grad_norm": 0.9070341601901982, "learning_rate": 1.9653269368725447e-06, "loss": 0.6715, "step": 876 }, { "epoch": 0.11, "grad_norm": 0.6954680880537243, "learning_rate": 1.965219131903374e-06, "loss": 0.5463, "step": 877 }, { "epoch": 0.11, "grad_norm": 0.9178385644566754, "learning_rate": 1.965111162567441e-06, "loss": 0.67, "step": 878 }, { "epoch": 0.11, "grad_norm": 0.7087952952823579, "learning_rate": 1.9650030288831316e-06, "loss": 0.5693, "step": 879 }, { "epoch": 0.11, "grad_norm": 0.899839627152507, "learning_rate": 1.9648947308688595e-06, "loss": 0.6994, "step": 880 }, { "epoch": 0.11, "grad_norm": 1.1177015279770126, "learning_rate": 1.9647862685430667e-06, "loss": 0.6434, "step": 881 }, { "epoch": 0.11, "grad_norm": 0.7648707249075081, "learning_rate": 1.9646776419242236e-06, "loss": 0.5736, "step": 882 }, { "epoch": 0.11, "grad_norm": 0.7836341961325934, "learning_rate": 1.964568851030828e-06, "loss": 0.6624, "step": 883 }, { "epoch": 0.11, "grad_norm": 0.7209668121937278, "learning_rate": 1.964459895881406e-06, "loss": 0.6178, "step": 884 }, { "epoch": 0.11, "grad_norm": 0.7395842368060255, "learning_rate": 1.9643507764945113e-06, "loss": 0.5613, "step": 885 }, { "epoch": 0.11, "grad_norm": 0.9523517694947433, "learning_rate": 1.964241492888726e-06, "loss": 0.712, "step": 886 }, { "epoch": 0.11, "grad_norm": 0.759579988839089, "learning_rate": 1.96413204508266e-06, "loss": 0.6016, "step": 887 }, { "epoch": 0.11, "grad_norm": 0.6415294384338988, "learning_rate": 1.964022433094951e-06, "loss": 0.5848, "step": 888 }, { "epoch": 0.11, "grad_norm": 0.7033070413563287, "learning_rate": 1.9639126569442644e-06, "loss": 0.5537, "step": 889 }, { "epoch": 0.11, "grad_norm": 0.862684507791676, "learning_rate": 1.9638027166492942e-06, "loss": 0.6993, "step": 890 }, { "epoch": 0.11, "grad_norm": 0.8488339878826154, "learning_rate": 1.9636926122287627e-06, "loss": 0.666, "step": 891 }, { "epoch": 0.11, "grad_norm": 0.7676733778553675, "learning_rate": 1.9635823437014184e-06, "loss": 0.6668, "step": 892 }, { "epoch": 0.11, "grad_norm": 0.6198485149103461, "learning_rate": 1.96347191108604e-06, "loss": 0.574, "step": 893 }, { "epoch": 0.11, "grad_norm": 0.7306946953524489, "learning_rate": 1.9633613144014324e-06, "loss": 0.5582, "step": 894 }, { "epoch": 0.11, "grad_norm": 1.1483857194936544, "learning_rate": 1.963250553666429e-06, "loss": 0.6477, "step": 895 }, { "epoch": 0.11, "grad_norm": 0.8361601279203786, "learning_rate": 1.9631396288998917e-06, "loss": 0.7013, "step": 896 }, { "epoch": 0.11, "grad_norm": 0.6508504075829842, "learning_rate": 1.9630285401207093e-06, "loss": 0.5468, "step": 897 }, { "epoch": 0.11, "grad_norm": 0.9719928320331945, "learning_rate": 1.9629172873477994e-06, "loss": 0.7296, "step": 898 }, { "epoch": 0.11, "grad_norm": 0.6445508578130046, "learning_rate": 1.9628058706001067e-06, "loss": 0.5512, "step": 899 }, { "epoch": 0.11, "grad_norm": 0.8914683069239387, "learning_rate": 1.962694289896605e-06, "loss": 0.6344, "step": 900 }, { "epoch": 0.11, "grad_norm": 0.8449164193588393, "learning_rate": 1.962582545256295e-06, "loss": 0.6636, "step": 901 }, { "epoch": 0.11, "grad_norm": 0.9756133621343571, "learning_rate": 1.9624706366982055e-06, "loss": 0.7175, "step": 902 }, { "epoch": 0.12, "grad_norm": 0.8818123628675876, "learning_rate": 1.9623585642413935e-06, "loss": 0.633, "step": 903 }, { "epoch": 0.12, "grad_norm": 0.6678019445350563, "learning_rate": 1.962246327904944e-06, "loss": 0.5766, "step": 904 }, { "epoch": 0.12, "grad_norm": 0.8317168953782076, "learning_rate": 1.96213392770797e-06, "loss": 0.6421, "step": 905 }, { "epoch": 0.12, "grad_norm": 0.6981180381964973, "learning_rate": 1.96202136366961e-06, "loss": 0.6194, "step": 906 }, { "epoch": 0.12, "grad_norm": 0.7622682944041159, "learning_rate": 1.9619086358090354e-06, "loss": 0.5531, "step": 907 }, { "epoch": 0.12, "grad_norm": 0.75578595477043, "learning_rate": 1.961795744145441e-06, "loss": 0.6329, "step": 908 }, { "epoch": 0.12, "grad_norm": 0.8655359020541531, "learning_rate": 1.9616826886980505e-06, "loss": 0.6678, "step": 909 }, { "epoch": 0.12, "grad_norm": 0.616476173207024, "learning_rate": 1.961569469486118e-06, "loss": 0.5736, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.9039803077091871, "learning_rate": 1.9614560865289216e-06, "loss": 0.6836, "step": 911 }, { "epoch": 0.12, "grad_norm": 0.8515318698678036, "learning_rate": 1.9613425398457705e-06, "loss": 0.6925, "step": 912 }, { "epoch": 0.12, "grad_norm": 0.8825450267905519, "learning_rate": 1.9612288294559996e-06, "loss": 0.6572, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.7856344574933853, "learning_rate": 1.961114955378973e-06, "loss": 0.6003, "step": 914 }, { "epoch": 0.12, "grad_norm": 0.8901537464858076, "learning_rate": 1.9610009176340827e-06, "loss": 0.7376, "step": 915 }, { "epoch": 0.12, "grad_norm": 0.6570273588128311, "learning_rate": 1.9608867162407478e-06, "loss": 0.5807, "step": 916 }, { "epoch": 0.12, "grad_norm": 0.679573944086101, "learning_rate": 1.960772351218415e-06, "loss": 0.5499, "step": 917 }, { "epoch": 0.12, "grad_norm": 1.44691379510409, "learning_rate": 1.960657822586561e-06, "loss": 0.6875, "step": 918 }, { "epoch": 0.12, "grad_norm": 0.8628824940057535, "learning_rate": 1.960543130364687e-06, "loss": 0.6168, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.6644367545946832, "learning_rate": 1.960428274572325e-06, "loss": 0.5247, "step": 920 }, { "epoch": 0.12, "grad_norm": 1.1566375496791679, "learning_rate": 1.960313255229034e-06, "loss": 0.594, "step": 921 }, { "epoch": 0.12, "grad_norm": 0.8486626490370862, "learning_rate": 1.9601980723543995e-06, "loss": 0.6319, "step": 922 }, { "epoch": 0.12, "grad_norm": 0.635568398261324, "learning_rate": 1.960082725968037e-06, "loss": 0.5464, "step": 923 }, { "epoch": 0.12, "grad_norm": 0.9806401726812076, "learning_rate": 1.959967216089588e-06, "loss": 0.6549, "step": 924 }, { "epoch": 0.12, "grad_norm": 0.7509605500822271, "learning_rate": 1.959851542738723e-06, "loss": 0.5991, "step": 925 }, { "epoch": 0.12, "grad_norm": 0.9731404587910542, "learning_rate": 1.95973570593514e-06, "loss": 0.678, "step": 926 }, { "epoch": 0.12, "grad_norm": 0.9326375492091475, "learning_rate": 1.9596197056985647e-06, "loss": 0.7253, "step": 927 }, { "epoch": 0.12, "grad_norm": 0.7179170579867882, "learning_rate": 1.959503542048751e-06, "loss": 0.5706, "step": 928 }, { "epoch": 0.12, "grad_norm": 1.6737422602401815, "learning_rate": 1.95938721500548e-06, "loss": 0.6426, "step": 929 }, { "epoch": 0.12, "grad_norm": 0.7240756204658625, "learning_rate": 1.959270724588561e-06, "loss": 0.574, "step": 930 }, { "epoch": 0.12, "grad_norm": 0.6409236687110115, "learning_rate": 1.9591540708178313e-06, "loss": 0.5373, "step": 931 }, { "epoch": 0.12, "grad_norm": 0.825775295230597, "learning_rate": 1.9590372537131557e-06, "loss": 0.6122, "step": 932 }, { "epoch": 0.12, "grad_norm": 1.01668741116574, "learning_rate": 1.9589202732944267e-06, "loss": 0.6588, "step": 933 }, { "epoch": 0.12, "grad_norm": 0.71473525983015, "learning_rate": 1.9588031295815656e-06, "loss": 0.5933, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.7219179053880909, "learning_rate": 1.95868582259452e-06, "loss": 0.5991, "step": 935 }, { "epoch": 0.12, "grad_norm": 0.6357498739505724, "learning_rate": 1.9585683523532663e-06, "loss": 0.5712, "step": 936 }, { "epoch": 0.12, "grad_norm": 0.685819651357079, "learning_rate": 1.9584507188778088e-06, "loss": 0.5586, "step": 937 }, { "epoch": 0.12, "grad_norm": 0.6831178281641995, "learning_rate": 1.9583329221881787e-06, "loss": 0.6349, "step": 938 }, { "epoch": 0.12, "grad_norm": 0.6512072512955748, "learning_rate": 1.9582149623044355e-06, "loss": 0.5205, "step": 939 }, { "epoch": 0.12, "grad_norm": 0.6091723736912761, "learning_rate": 1.958096839246667e-06, "loss": 0.5683, "step": 940 }, { "epoch": 0.12, "grad_norm": 0.6268845064929525, "learning_rate": 1.9579785530349886e-06, "loss": 0.5942, "step": 941 }, { "epoch": 0.12, "grad_norm": 0.8077310486510025, "learning_rate": 1.9578601036895426e-06, "loss": 0.6525, "step": 942 }, { "epoch": 0.12, "grad_norm": 0.652601740939098, "learning_rate": 1.9577414912304994e-06, "loss": 0.5633, "step": 943 }, { "epoch": 0.12, "grad_norm": 0.8628204471744981, "learning_rate": 1.9576227156780584e-06, "loss": 0.6376, "step": 944 }, { "epoch": 0.12, "grad_norm": 0.664294270848172, "learning_rate": 1.957503777052445e-06, "loss": 0.5413, "step": 945 }, { "epoch": 0.12, "grad_norm": 0.7130336865682105, "learning_rate": 1.957384675373914e-06, "loss": 0.6602, "step": 946 }, { "epoch": 0.12, "grad_norm": 0.7017616579615986, "learning_rate": 1.9572654106627465e-06, "loss": 0.5861, "step": 947 }, { "epoch": 0.12, "grad_norm": 0.8793748076341631, "learning_rate": 1.957145982939252e-06, "loss": 0.6156, "step": 948 }, { "epoch": 0.12, "grad_norm": 0.8792784393219998, "learning_rate": 1.9570263922237684e-06, "loss": 0.6221, "step": 949 }, { "epoch": 0.12, "grad_norm": 0.8944236817388244, "learning_rate": 1.9569066385366606e-06, "loss": 0.6488, "step": 950 }, { "epoch": 0.12, "grad_norm": 0.6130274294674304, "learning_rate": 1.956786721898321e-06, "loss": 0.552, "step": 951 }, { "epoch": 0.12, "grad_norm": 0.9118972880536662, "learning_rate": 1.9566666423291703e-06, "loss": 0.6614, "step": 952 }, { "epoch": 0.12, "grad_norm": 1.065551707146563, "learning_rate": 1.956546399849657e-06, "loss": 0.7326, "step": 953 }, { "epoch": 0.12, "grad_norm": 0.7582685110301709, "learning_rate": 1.956425994480257e-06, "loss": 0.531, "step": 954 }, { "epoch": 0.12, "grad_norm": 0.6653634723779718, "learning_rate": 1.956305426241474e-06, "loss": 0.5496, "step": 955 }, { "epoch": 0.12, "grad_norm": 0.7575504632353254, "learning_rate": 1.95618469515384e-06, "loss": 0.6576, "step": 956 }, { "epoch": 0.12, "grad_norm": 0.8653980295606406, "learning_rate": 1.9560638012379134e-06, "loss": 0.6313, "step": 957 }, { "epoch": 0.12, "grad_norm": 0.8547768248261564, "learning_rate": 1.9559427445142822e-06, "loss": 0.6521, "step": 958 }, { "epoch": 0.12, "grad_norm": 0.7431944461334282, "learning_rate": 1.9558215250035605e-06, "loss": 0.5849, "step": 959 }, { "epoch": 0.12, "grad_norm": 0.7654466003345309, "learning_rate": 1.9557001427263905e-06, "loss": 0.6681, "step": 960 }, { "epoch": 0.12, "grad_norm": 0.8455995576267222, "learning_rate": 1.9555785977034424e-06, "loss": 0.623, "step": 961 }, { "epoch": 0.12, "grad_norm": 0.839411812822503, "learning_rate": 1.9554568899554146e-06, "loss": 0.6912, "step": 962 }, { "epoch": 0.12, "grad_norm": 0.7728414189959457, "learning_rate": 1.9553350195030323e-06, "loss": 0.595, "step": 963 }, { "epoch": 0.12, "grad_norm": 0.6091506068133549, "learning_rate": 1.955212986367049e-06, "loss": 0.5307, "step": 964 }, { "epoch": 0.12, "grad_norm": 0.750434124018174, "learning_rate": 1.9550907905682447e-06, "loss": 0.5552, "step": 965 }, { "epoch": 0.12, "grad_norm": 0.7316859418842425, "learning_rate": 1.9549684321274295e-06, "loss": 0.6154, "step": 966 }, { "epoch": 0.12, "grad_norm": 0.8255528426582034, "learning_rate": 1.954845911065439e-06, "loss": 0.6492, "step": 967 }, { "epoch": 0.12, "grad_norm": 0.7060440370455678, "learning_rate": 1.954723227403137e-06, "loss": 0.5759, "step": 968 }, { "epoch": 0.12, "grad_norm": 0.6691216005609768, "learning_rate": 1.954600381161416e-06, "loss": 0.5781, "step": 969 }, { "epoch": 0.12, "grad_norm": 0.9218309446186043, "learning_rate": 1.954477372361195e-06, "loss": 0.6753, "step": 970 }, { "epoch": 0.12, "grad_norm": 0.8788392012012401, "learning_rate": 1.954354201023421e-06, "loss": 0.6261, "step": 971 }, { "epoch": 0.12, "grad_norm": 1.0672925516054856, "learning_rate": 1.9542308671690687e-06, "loss": 0.6717, "step": 972 }, { "epoch": 0.12, "grad_norm": 0.8246027701640113, "learning_rate": 1.9541073708191413e-06, "loss": 0.6718, "step": 973 }, { "epoch": 0.12, "grad_norm": 0.6586986780432992, "learning_rate": 1.9539837119946684e-06, "loss": 0.5095, "step": 974 }, { "epoch": 0.12, "grad_norm": 0.6308706666400716, "learning_rate": 1.953859890716708e-06, "loss": 0.5711, "step": 975 }, { "epoch": 0.12, "grad_norm": 0.8512473873273898, "learning_rate": 1.953735907006345e-06, "loss": 0.6957, "step": 976 }, { "epoch": 0.12, "grad_norm": 0.8637807721394538, "learning_rate": 1.9536117608846934e-06, "loss": 0.648, "step": 977 }, { "epoch": 0.12, "grad_norm": 0.899870011434305, "learning_rate": 1.9534874523728934e-06, "loss": 0.6489, "step": 978 }, { "epoch": 0.12, "grad_norm": 0.6545434401739678, "learning_rate": 1.9533629814921135e-06, "loss": 0.5448, "step": 979 }, { "epoch": 0.12, "grad_norm": 0.7016465648287954, "learning_rate": 1.95323834826355e-06, "loss": 0.5948, "step": 980 }, { "epoch": 0.12, "grad_norm": 0.885420103869679, "learning_rate": 1.9531135527084266e-06, "loss": 0.6262, "step": 981 }, { "epoch": 0.13, "grad_norm": 1.0647739163061707, "learning_rate": 1.952988594847995e-06, "loss": 0.6571, "step": 982 }, { "epoch": 0.13, "grad_norm": 0.6760371753851048, "learning_rate": 1.952863474703533e-06, "loss": 0.5715, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.6315718168842409, "learning_rate": 1.9527381922963485e-06, "loss": 0.5408, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.7097733938811038, "learning_rate": 1.952612747647776e-06, "loss": 0.5439, "step": 985 }, { "epoch": 0.13, "grad_norm": 0.8054186509689291, "learning_rate": 1.952487140779176e-06, "loss": 0.6633, "step": 986 }, { "epoch": 0.13, "grad_norm": 0.7057688267227478, "learning_rate": 1.9523613717119395e-06, "loss": 0.5785, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.8099029387321683, "learning_rate": 1.9522354404674825e-06, "loss": 0.6959, "step": 988 }, { "epoch": 0.13, "grad_norm": 0.8145500774131236, "learning_rate": 1.9521093470672506e-06, "loss": 0.6414, "step": 989 }, { "epoch": 0.13, "grad_norm": 0.8786986949218748, "learning_rate": 1.951983091532716e-06, "loss": 0.6559, "step": 990 }, { "epoch": 0.13, "grad_norm": 0.8970142537100135, "learning_rate": 1.9518566738853783e-06, "loss": 0.6331, "step": 991 }, { "epoch": 0.13, "grad_norm": 0.8894416744581568, "learning_rate": 1.9517300941467654e-06, "loss": 0.6474, "step": 992 }, { "epoch": 0.13, "grad_norm": 0.9298782210216464, "learning_rate": 1.951603352338433e-06, "loss": 0.6992, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.7446927148432636, "learning_rate": 1.951476448481963e-06, "loss": 0.5679, "step": 994 }, { "epoch": 0.13, "grad_norm": 1.077894191402499, "learning_rate": 1.9513493825989663e-06, "loss": 0.7248, "step": 995 }, { "epoch": 0.13, "grad_norm": 0.9748796028914771, "learning_rate": 1.951222154711081e-06, "loss": 0.6901, "step": 996 }, { "epoch": 0.13, "grad_norm": 0.8218818474312494, "learning_rate": 1.951094764839972e-06, "loss": 0.6209, "step": 997 }, { "epoch": 0.13, "grad_norm": 0.7422075710612643, "learning_rate": 1.9509672130073336e-06, "loss": 0.5551, "step": 998 }, { "epoch": 0.13, "grad_norm": 0.8760039636783977, "learning_rate": 1.9508394992348855e-06, "loss": 0.6702, "step": 999 }, { "epoch": 0.13, "grad_norm": 0.6571775809869482, "learning_rate": 1.9507116235443768e-06, "loss": 0.5427, "step": 1000 }, { "epoch": 0.13, "grad_norm": 0.7412628423125, "learning_rate": 1.9505835859575827e-06, "loss": 0.5228, "step": 1001 }, { "epoch": 0.13, "grad_norm": 0.6826359699964017, "learning_rate": 1.9504553864963075e-06, "loss": 0.5629, "step": 1002 }, { "epoch": 0.13, "grad_norm": 0.8420154398581653, "learning_rate": 1.9503270251823813e-06, "loss": 0.6353, "step": 1003 }, { "epoch": 0.13, "grad_norm": 1.3194748711940778, "learning_rate": 1.950198502037663e-06, "loss": 0.6344, "step": 1004 }, { "epoch": 0.13, "grad_norm": 0.7165792860786188, "learning_rate": 1.9500698170840395e-06, "loss": 0.5669, "step": 1005 }, { "epoch": 0.13, "grad_norm": 0.8242020899197583, "learning_rate": 1.9499409703434235e-06, "loss": 0.6216, "step": 1006 }, { "epoch": 0.13, "grad_norm": 0.7774332047427653, "learning_rate": 1.9498119618377567e-06, "loss": 0.5632, "step": 1007 }, { "epoch": 0.13, "grad_norm": 0.8802185040286188, "learning_rate": 1.949682791589008e-06, "loss": 0.6679, "step": 1008 }, { "epoch": 0.13, "grad_norm": 0.8885269717467343, "learning_rate": 1.9495534596191737e-06, "loss": 0.6544, "step": 1009 }, { "epoch": 0.13, "grad_norm": 0.6753136727591234, "learning_rate": 1.9494239659502777e-06, "loss": 0.5372, "step": 1010 }, { "epoch": 0.13, "grad_norm": 0.691133612907113, "learning_rate": 1.9492943106043707e-06, "loss": 0.5546, "step": 1011 }, { "epoch": 0.13, "grad_norm": 0.7857458950595669, "learning_rate": 1.9491644936035327e-06, "loss": 0.6296, "step": 1012 }, { "epoch": 0.13, "grad_norm": 0.7673595644084344, "learning_rate": 1.94903451496987e-06, "loss": 0.6087, "step": 1013 }, { "epoch": 0.13, "grad_norm": 0.9954746942129905, "learning_rate": 1.948904374725516e-06, "loss": 0.6648, "step": 1014 }, { "epoch": 0.13, "grad_norm": 1.0116626132420077, "learning_rate": 1.948774072892633e-06, "loss": 0.6337, "step": 1015 }, { "epoch": 0.13, "grad_norm": 0.8298368388961275, "learning_rate": 1.948643609493409e-06, "loss": 0.6495, "step": 1016 }, { "epoch": 0.13, "grad_norm": 0.7551172622682851, "learning_rate": 1.9485129845500617e-06, "loss": 0.6196, "step": 1017 }, { "epoch": 0.13, "grad_norm": 0.5858320444489276, "learning_rate": 1.9483821980848344e-06, "loss": 0.5332, "step": 1018 }, { "epoch": 0.13, "grad_norm": 0.8764961495990471, "learning_rate": 1.948251250119999e-06, "loss": 0.6675, "step": 1019 }, { "epoch": 0.13, "grad_norm": 0.8536165652066033, "learning_rate": 1.948120140677855e-06, "loss": 0.6727, "step": 1020 }, { "epoch": 0.13, "grad_norm": 1.0002376823235686, "learning_rate": 1.947988869780728e-06, "loss": 0.7281, "step": 1021 }, { "epoch": 0.13, "grad_norm": 0.5923346454463809, "learning_rate": 1.9478574374509723e-06, "loss": 0.5542, "step": 1022 }, { "epoch": 0.13, "grad_norm": 0.889302258009896, "learning_rate": 1.94772584371097e-06, "loss": 0.6577, "step": 1023 }, { "epoch": 0.13, "grad_norm": 0.704760735458918, "learning_rate": 1.94759408858313e-06, "loss": 0.6083, "step": 1024 }, { "epoch": 0.13, "grad_norm": 0.6406526632342663, "learning_rate": 1.9474621720898884e-06, "loss": 0.5366, "step": 1025 }, { "epoch": 0.13, "grad_norm": 0.8694413281708848, "learning_rate": 1.9473300942537097e-06, "loss": 0.5898, "step": 1026 }, { "epoch": 0.13, "grad_norm": 0.8963454623231697, "learning_rate": 1.947197855097085e-06, "loss": 0.6668, "step": 1027 }, { "epoch": 0.13, "grad_norm": 0.7813685713235143, "learning_rate": 1.9470654546425336e-06, "loss": 0.6389, "step": 1028 }, { "epoch": 0.13, "grad_norm": 0.7472000999867008, "learning_rate": 1.946932892912602e-06, "loss": 0.6222, "step": 1029 }, { "epoch": 0.13, "grad_norm": 0.7848193039094754, "learning_rate": 1.9468001699298633e-06, "loss": 0.541, "step": 1030 }, { "epoch": 0.13, "grad_norm": 0.7863990173191229, "learning_rate": 1.9466672857169195e-06, "loss": 0.6143, "step": 1031 }, { "epoch": 0.13, "grad_norm": 0.8157438486641113, "learning_rate": 1.9465342402963993e-06, "loss": 0.6673, "step": 1032 }, { "epoch": 0.13, "grad_norm": 0.6156427076836347, "learning_rate": 1.9464010336909586e-06, "loss": 0.5463, "step": 1033 }, { "epoch": 0.13, "grad_norm": 0.7647861765516883, "learning_rate": 1.946267665923282e-06, "loss": 0.6641, "step": 1034 }, { "epoch": 0.13, "grad_norm": 0.7117216345407701, "learning_rate": 1.9461341370160796e-06, "loss": 0.5857, "step": 1035 }, { "epoch": 0.13, "grad_norm": 0.9080814882338024, "learning_rate": 1.9460004469920905e-06, "loss": 0.6243, "step": 1036 }, { "epoch": 0.13, "grad_norm": 0.6305319940050684, "learning_rate": 1.9458665958740803e-06, "loss": 0.6011, "step": 1037 }, { "epoch": 0.13, "grad_norm": 0.7998340149246962, "learning_rate": 1.9457325836848435e-06, "loss": 0.653, "step": 1038 }, { "epoch": 0.13, "grad_norm": 0.7963617009389307, "learning_rate": 1.9455984104471995e-06, "loss": 0.6008, "step": 1039 }, { "epoch": 0.13, "grad_norm": 0.692624102242373, "learning_rate": 1.945464076183998e-06, "loss": 0.5426, "step": 1040 }, { "epoch": 0.13, "grad_norm": 0.6760912764255483, "learning_rate": 1.9453295809181142e-06, "loss": 0.5765, "step": 1041 }, { "epoch": 0.13, "grad_norm": 0.7388277011341495, "learning_rate": 1.945194924672451e-06, "loss": 0.6318, "step": 1042 }, { "epoch": 0.13, "grad_norm": 0.7447928289285848, "learning_rate": 1.9450601074699386e-06, "loss": 0.5986, "step": 1043 }, { "epoch": 0.13, "grad_norm": 0.9642657349404752, "learning_rate": 1.944925129333536e-06, "loss": 0.6817, "step": 1044 }, { "epoch": 0.13, "grad_norm": 0.6877706625509188, "learning_rate": 1.9447899902862276e-06, "loss": 0.597, "step": 1045 }, { "epoch": 0.13, "grad_norm": 0.8698614090595488, "learning_rate": 1.944654690351027e-06, "loss": 0.6931, "step": 1046 }, { "epoch": 0.13, "grad_norm": 2.0981606813145115, "learning_rate": 1.9445192295509737e-06, "loss": 0.618, "step": 1047 }, { "epoch": 0.13, "grad_norm": 0.6518020122966293, "learning_rate": 1.9443836079091356e-06, "loss": 0.5644, "step": 1048 }, { "epoch": 0.13, "grad_norm": 0.8747205072849583, "learning_rate": 1.9442478254486074e-06, "loss": 0.6974, "step": 1049 }, { "epoch": 0.13, "grad_norm": 0.9746118674511695, "learning_rate": 1.944111882192512e-06, "loss": 0.5603, "step": 1050 }, { "epoch": 0.13, "grad_norm": 0.653193248833744, "learning_rate": 1.9439757781639987e-06, "loss": 0.5498, "step": 1051 }, { "epoch": 0.13, "grad_norm": 0.633627634503781, "learning_rate": 1.9438395133862445e-06, "loss": 0.5263, "step": 1052 }, { "epoch": 0.13, "grad_norm": 0.6637626784956492, "learning_rate": 1.9437030878824537e-06, "loss": 0.5283, "step": 1053 }, { "epoch": 0.13, "grad_norm": 1.0968658575531014, "learning_rate": 1.9435665016758593e-06, "loss": 0.6985, "step": 1054 }, { "epoch": 0.13, "grad_norm": 0.7148896291312722, "learning_rate": 1.9434297547897193e-06, "loss": 0.5965, "step": 1055 }, { "epoch": 0.13, "grad_norm": 0.6091746246190839, "learning_rate": 1.94329284724732e-06, "loss": 0.5453, "step": 1056 }, { "epoch": 0.13, "grad_norm": 0.8983334622505745, "learning_rate": 1.9431557790719767e-06, "loss": 0.6965, "step": 1057 }, { "epoch": 0.13, "grad_norm": 0.826693885522798, "learning_rate": 1.94301855028703e-06, "loss": 0.6532, "step": 1058 }, { "epoch": 0.13, "grad_norm": 0.9314442890591906, "learning_rate": 1.942881160915848e-06, "loss": 0.681, "step": 1059 }, { "epoch": 0.14, "grad_norm": 0.7907594912255953, "learning_rate": 1.9427436109818273e-06, "loss": 0.5827, "step": 1060 }, { "epoch": 0.14, "grad_norm": 0.7616363607335165, "learning_rate": 1.9426059005083914e-06, "loss": 0.5528, "step": 1061 }, { "epoch": 0.14, "grad_norm": 0.8336706489849964, "learning_rate": 1.9424680295189902e-06, "loss": 0.6203, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.7967345028916949, "learning_rate": 1.942329998037103e-06, "loss": 0.6553, "step": 1063 }, { "epoch": 0.14, "grad_norm": 0.6806951122840847, "learning_rate": 1.942191806086233e-06, "loss": 0.5197, "step": 1064 }, { "epoch": 0.14, "grad_norm": 0.6617381784232839, "learning_rate": 1.9420534536899147e-06, "loss": 0.5088, "step": 1065 }, { "epoch": 0.14, "grad_norm": 0.7308480331089972, "learning_rate": 1.941914940871708e-06, "loss": 0.5363, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.6933216069577045, "learning_rate": 1.941776267655199e-06, "loss": 0.6042, "step": 1067 }, { "epoch": 0.14, "grad_norm": 0.9587349504708711, "learning_rate": 1.941637434064003e-06, "loss": 0.6103, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.8942256929096047, "learning_rate": 1.941498440121762e-06, "loss": 0.6581, "step": 1069 }, { "epoch": 0.14, "grad_norm": 0.6647307134916989, "learning_rate": 1.9413592858521454e-06, "loss": 0.5873, "step": 1070 }, { "epoch": 0.14, "grad_norm": 0.6955678592577135, "learning_rate": 1.941219971278849e-06, "loss": 0.5614, "step": 1071 }, { "epoch": 0.14, "grad_norm": 0.8754055677719764, "learning_rate": 1.941080496425597e-06, "loss": 0.5592, "step": 1072 }, { "epoch": 0.14, "grad_norm": 0.6329092371201205, "learning_rate": 1.940940861316141e-06, "loss": 0.4778, "step": 1073 }, { "epoch": 0.14, "grad_norm": 0.7674168003496451, "learning_rate": 1.940801065974259e-06, "loss": 0.6417, "step": 1074 }, { "epoch": 0.14, "grad_norm": 0.7114106878583818, "learning_rate": 1.9406611104237563e-06, "loss": 0.5752, "step": 1075 }, { "epoch": 0.14, "grad_norm": 0.800945537690414, "learning_rate": 1.9405209946884663e-06, "loss": 0.6345, "step": 1076 }, { "epoch": 0.14, "grad_norm": 0.8979484550796473, "learning_rate": 1.9403807187922488e-06, "loss": 0.6309, "step": 1077 }, { "epoch": 0.14, "grad_norm": 0.6812372448121354, "learning_rate": 1.9402402827589922e-06, "loss": 0.575, "step": 1078 }, { "epoch": 0.14, "grad_norm": 0.7416745660763798, "learning_rate": 1.940099686612611e-06, "loss": 0.5853, "step": 1079 }, { "epoch": 0.14, "grad_norm": 0.8105997977534364, "learning_rate": 1.939958930377047e-06, "loss": 0.6405, "step": 1080 }, { "epoch": 0.14, "grad_norm": 0.7066066770629402, "learning_rate": 1.9398180140762693e-06, "loss": 0.571, "step": 1081 }, { "epoch": 0.14, "grad_norm": 0.8486341886656302, "learning_rate": 1.9396769377342746e-06, "loss": 0.6644, "step": 1082 }, { "epoch": 0.14, "grad_norm": 0.8071678200527111, "learning_rate": 1.939535701375087e-06, "loss": 0.5723, "step": 1083 }, { "epoch": 0.14, "grad_norm": 0.6930113320194837, "learning_rate": 1.9393943050227577e-06, "loss": 0.5229, "step": 1084 }, { "epoch": 0.14, "grad_norm": 0.9604468919910021, "learning_rate": 1.9392527487013644e-06, "loss": 0.5919, "step": 1085 }, { "epoch": 0.14, "grad_norm": 0.7988565519555352, "learning_rate": 1.9391110324350137e-06, "loss": 0.5853, "step": 1086 }, { "epoch": 0.14, "grad_norm": 0.6915793982238381, "learning_rate": 1.938969156247837e-06, "loss": 0.5946, "step": 1087 }, { "epoch": 0.14, "grad_norm": 0.8067418405855121, "learning_rate": 1.938827120163996e-06, "loss": 0.5705, "step": 1088 }, { "epoch": 0.14, "grad_norm": 1.1070727048737719, "learning_rate": 1.9386849242076765e-06, "loss": 0.7025, "step": 1089 }, { "epoch": 0.14, "grad_norm": 0.5964867751025502, "learning_rate": 1.9385425684030932e-06, "loss": 0.5059, "step": 1090 }, { "epoch": 0.14, "grad_norm": 0.6949607044823535, "learning_rate": 1.938400052774489e-06, "loss": 0.5495, "step": 1091 }, { "epoch": 0.14, "grad_norm": 0.9194822693613911, "learning_rate": 1.938257377346131e-06, "loss": 0.7148, "step": 1092 }, { "epoch": 0.14, "grad_norm": 0.9797311933107593, "learning_rate": 1.9381145421423174e-06, "loss": 0.687, "step": 1093 }, { "epoch": 0.14, "grad_norm": 0.9141821067600461, "learning_rate": 1.9379715471873698e-06, "loss": 0.6134, "step": 1094 }, { "epoch": 0.14, "grad_norm": 0.668377195455847, "learning_rate": 1.9378283925056397e-06, "loss": 0.519, "step": 1095 }, { "epoch": 0.14, "grad_norm": 0.6993184531571733, "learning_rate": 1.9376850781215043e-06, "loss": 0.5371, "step": 1096 }, { "epoch": 0.14, "grad_norm": 0.7525847791352538, "learning_rate": 1.937541604059369e-06, "loss": 0.5674, "step": 1097 }, { "epoch": 0.14, "grad_norm": 0.693140727198669, "learning_rate": 1.937397970343666e-06, "loss": 0.6131, "step": 1098 }, { "epoch": 0.14, "grad_norm": 0.7664767087052541, "learning_rate": 1.937254176998854e-06, "loss": 0.5932, "step": 1099 }, { "epoch": 0.14, "grad_norm": 0.9408770088213105, "learning_rate": 1.9371102240494204e-06, "loss": 0.6181, "step": 1100 }, { "epoch": 0.14, "grad_norm": 0.9873221004482454, "learning_rate": 1.9369661115198777e-06, "loss": 0.6332, "step": 1101 }, { "epoch": 0.14, "grad_norm": 0.8872834912327237, "learning_rate": 1.936821839434768e-06, "loss": 0.6027, "step": 1102 }, { "epoch": 0.14, "grad_norm": 0.8205679072103947, "learning_rate": 1.9366774078186585e-06, "loss": 0.6139, "step": 1103 }, { "epoch": 0.14, "grad_norm": 0.8048346361478271, "learning_rate": 1.936532816696145e-06, "loss": 0.6687, "step": 1104 }, { "epoch": 0.14, "grad_norm": 0.7004455698753029, "learning_rate": 1.936388066091849e-06, "loss": 0.5787, "step": 1105 }, { "epoch": 0.14, "grad_norm": 0.7495237974386355, "learning_rate": 1.9362431560304212e-06, "loss": 0.6173, "step": 1106 }, { "epoch": 0.14, "grad_norm": 0.5798545724881402, "learning_rate": 1.9360980865365374e-06, "loss": 0.5187, "step": 1107 }, { "epoch": 0.14, "grad_norm": 0.8217930525066944, "learning_rate": 1.9359528576349018e-06, "loss": 0.617, "step": 1108 }, { "epoch": 0.14, "grad_norm": 0.6276164211494447, "learning_rate": 1.9358074693502448e-06, "loss": 0.5076, "step": 1109 }, { "epoch": 0.14, "grad_norm": 0.7804785567807171, "learning_rate": 1.9356619217073252e-06, "loss": 0.5665, "step": 1110 }, { "epoch": 0.14, "grad_norm": 0.666291863444723, "learning_rate": 1.9355162147309277e-06, "loss": 0.5794, "step": 1111 }, { "epoch": 0.14, "grad_norm": 0.8244689364191091, "learning_rate": 1.9353703484458656e-06, "loss": 0.6334, "step": 1112 }, { "epoch": 0.14, "grad_norm": 0.6449582618268463, "learning_rate": 1.9352243228769773e-06, "loss": 0.5536, "step": 1113 }, { "epoch": 0.14, "grad_norm": 0.8638339888611027, "learning_rate": 1.93507813804913e-06, "loss": 0.6589, "step": 1114 }, { "epoch": 0.14, "grad_norm": 0.6733519285145788, "learning_rate": 1.9349317939872172e-06, "loss": 0.5967, "step": 1115 }, { "epoch": 0.14, "grad_norm": 0.8664413783808915, "learning_rate": 1.9347852907161603e-06, "loss": 0.6614, "step": 1116 }, { "epoch": 0.14, "grad_norm": 0.6346462987485083, "learning_rate": 1.9346386282609065e-06, "loss": 0.5033, "step": 1117 }, { "epoch": 0.14, "grad_norm": 0.8142530139378948, "learning_rate": 1.934491806646432e-06, "loss": 0.6477, "step": 1118 }, { "epoch": 0.14, "grad_norm": 0.6169218432922565, "learning_rate": 1.9343448258977373e-06, "loss": 0.5653, "step": 1119 }, { "epoch": 0.14, "grad_norm": 0.774852317712845, "learning_rate": 1.9341976860398537e-06, "loss": 0.6113, "step": 1120 }, { "epoch": 0.14, "grad_norm": 0.6720945225673164, "learning_rate": 1.9340503870978357e-06, "loss": 0.5514, "step": 1121 }, { "epoch": 0.14, "grad_norm": 0.6363619153800679, "learning_rate": 1.933902929096768e-06, "loss": 0.5394, "step": 1122 }, { "epoch": 0.14, "grad_norm": 0.787582133429448, "learning_rate": 1.933755312061761e-06, "loss": 0.6362, "step": 1123 }, { "epoch": 0.14, "grad_norm": 0.5939045871277261, "learning_rate": 1.933607536017952e-06, "loss": 0.5286, "step": 1124 }, { "epoch": 0.14, "grad_norm": 0.6270578282888595, "learning_rate": 1.9334596009905057e-06, "loss": 0.5314, "step": 1125 }, { "epoch": 0.14, "grad_norm": 0.8158005581580233, "learning_rate": 1.9333115070046143e-06, "loss": 0.6076, "step": 1126 }, { "epoch": 0.14, "grad_norm": 0.940805203778106, "learning_rate": 1.9331632540854964e-06, "loss": 0.6323, "step": 1127 }, { "epoch": 0.14, "grad_norm": 0.938243551934598, "learning_rate": 1.9330148422583978e-06, "loss": 0.6594, "step": 1128 }, { "epoch": 0.14, "grad_norm": 0.6647476577798627, "learning_rate": 1.932866271548592e-06, "loss": 0.579, "step": 1129 }, { "epoch": 0.14, "grad_norm": 0.7710684211151441, "learning_rate": 1.932717541981378e-06, "loss": 0.5412, "step": 1130 }, { "epoch": 0.14, "grad_norm": 0.7015095693859049, "learning_rate": 1.932568653582084e-06, "loss": 0.6298, "step": 1131 }, { "epoch": 0.14, "grad_norm": 0.9974915380239072, "learning_rate": 1.9324196063760637e-06, "loss": 0.6829, "step": 1132 }, { "epoch": 0.14, "grad_norm": 0.7183317653473398, "learning_rate": 1.9322704003886987e-06, "loss": 0.5412, "step": 1133 }, { "epoch": 0.14, "grad_norm": 0.9903190718425698, "learning_rate": 1.932121035645396e-06, "loss": 0.68, "step": 1134 }, { "epoch": 0.14, "grad_norm": 0.6292104037721264, "learning_rate": 1.9319715121715927e-06, "loss": 0.5313, "step": 1135 }, { "epoch": 0.14, "grad_norm": 0.8115527558001782, "learning_rate": 1.9318218299927495e-06, "loss": 0.5862, "step": 1136 }, { "epoch": 0.14, "grad_norm": 0.737012421342107, "learning_rate": 1.9316719891343564e-06, "loss": 0.5973, "step": 1137 }, { "epoch": 0.14, "grad_norm": 0.6210320997555163, "learning_rate": 1.9315219896219294e-06, "loss": 0.5288, "step": 1138 }, { "epoch": 0.15, "grad_norm": 0.8140718376612642, "learning_rate": 1.9313718314810125e-06, "loss": 0.6733, "step": 1139 }, { "epoch": 0.15, "grad_norm": 0.9391408687771978, "learning_rate": 1.931221514737175e-06, "loss": 0.6565, "step": 1140 }, { "epoch": 0.15, "grad_norm": 0.9767178375198257, "learning_rate": 1.9310710394160153e-06, "loss": 0.6191, "step": 1141 }, { "epoch": 0.15, "grad_norm": 0.8675583238319798, "learning_rate": 1.9309204055431576e-06, "loss": 0.6381, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.7599491843557507, "learning_rate": 1.930769613144253e-06, "loss": 0.5328, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.7414144123790753, "learning_rate": 1.93061866224498e-06, "loss": 0.5424, "step": 1144 }, { "epoch": 0.15, "grad_norm": 0.6698290718586358, "learning_rate": 1.930467552871044e-06, "loss": 0.565, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.7534501339864335, "learning_rate": 1.930316285048177e-06, "loss": 0.6427, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.8339853243125236, "learning_rate": 1.930164858802139e-06, "loss": 0.6028, "step": 1147 }, { "epoch": 0.15, "grad_norm": 0.9429979528669824, "learning_rate": 1.9300132741587156e-06, "loss": 0.5771, "step": 1148 }, { "epoch": 0.15, "grad_norm": 0.7354834951424498, "learning_rate": 1.9298615311437206e-06, "loss": 0.5634, "step": 1149 }, { "epoch": 0.15, "grad_norm": 0.860742281353345, "learning_rate": 1.929709629782994e-06, "loss": 0.5994, "step": 1150 }, { "epoch": 0.15, "grad_norm": 0.6398553148802747, "learning_rate": 1.929557570102403e-06, "loss": 0.5468, "step": 1151 }, { "epoch": 0.15, "grad_norm": 1.4751903744678585, "learning_rate": 1.9294053521278423e-06, "loss": 0.6816, "step": 1152 }, { "epoch": 0.15, "grad_norm": 0.6097576223036705, "learning_rate": 1.929252975885232e-06, "loss": 0.4991, "step": 1153 }, { "epoch": 0.15, "grad_norm": 0.7700467787991755, "learning_rate": 1.9291004414005213e-06, "loss": 0.5818, "step": 1154 }, { "epoch": 0.15, "grad_norm": 0.8715224903533787, "learning_rate": 1.9289477486996842e-06, "loss": 0.6894, "step": 1155 }, { "epoch": 0.15, "grad_norm": 0.786570041884722, "learning_rate": 1.928794897808724e-06, "loss": 0.5328, "step": 1156 }, { "epoch": 0.15, "grad_norm": 0.8336596147355181, "learning_rate": 1.9286418887536683e-06, "loss": 0.611, "step": 1157 }, { "epoch": 0.15, "grad_norm": 0.8712321263213032, "learning_rate": 1.9284887215605735e-06, "loss": 0.6658, "step": 1158 }, { "epoch": 0.15, "grad_norm": 0.7776757576728112, "learning_rate": 1.928335396255522e-06, "loss": 0.5644, "step": 1159 }, { "epoch": 0.15, "grad_norm": 0.7908444408008988, "learning_rate": 1.9281819128646246e-06, "loss": 0.5738, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.670328077386675, "learning_rate": 1.9280282714140167e-06, "loss": 0.5772, "step": 1161 }, { "epoch": 0.15, "grad_norm": 0.7629518462574484, "learning_rate": 1.9278744719298622e-06, "loss": 0.6581, "step": 1162 }, { "epoch": 0.15, "grad_norm": 0.7432328372310353, "learning_rate": 1.927720514438352e-06, "loss": 0.5842, "step": 1163 }, { "epoch": 0.15, "grad_norm": 0.6532966563548461, "learning_rate": 1.9275663989657025e-06, "loss": 0.5392, "step": 1164 }, { "epoch": 0.15, "grad_norm": 0.6472521563349904, "learning_rate": 1.927412125538159e-06, "loss": 0.583, "step": 1165 }, { "epoch": 0.15, "grad_norm": 0.724790562194451, "learning_rate": 1.927257694181993e-06, "loss": 0.5886, "step": 1166 }, { "epoch": 0.15, "grad_norm": 0.6812233207189767, "learning_rate": 1.9271031049235005e-06, "loss": 0.5393, "step": 1167 }, { "epoch": 0.15, "grad_norm": 0.7686983648087132, "learning_rate": 1.9269483577890083e-06, "loss": 0.6548, "step": 1168 }, { "epoch": 0.15, "grad_norm": 0.9370445233884142, "learning_rate": 1.9267934528048675e-06, "loss": 0.66, "step": 1169 }, { "epoch": 0.15, "grad_norm": 0.6936458927325159, "learning_rate": 1.9266383899974572e-06, "loss": 0.5285, "step": 1170 }, { "epoch": 0.15, "grad_norm": 0.762879632520005, "learning_rate": 1.9264831693931825e-06, "loss": 0.5218, "step": 1171 }, { "epoch": 0.15, "grad_norm": 0.9400237105848452, "learning_rate": 1.926327791018476e-06, "loss": 0.6967, "step": 1172 }, { "epoch": 0.15, "grad_norm": 0.7673926913017607, "learning_rate": 1.9261722548997974e-06, "loss": 0.6223, "step": 1173 }, { "epoch": 0.15, "grad_norm": 0.8304891664403465, "learning_rate": 1.9260165610636326e-06, "loss": 0.6436, "step": 1174 }, { "epoch": 0.15, "grad_norm": 0.7012564208699866, "learning_rate": 1.9258607095364943e-06, "loss": 0.5651, "step": 1175 }, { "epoch": 0.15, "grad_norm": 0.6886271716997511, "learning_rate": 1.925704700344923e-06, "loss": 0.5646, "step": 1176 }, { "epoch": 0.15, "grad_norm": 0.7800014769126741, "learning_rate": 1.925548533515485e-06, "loss": 0.661, "step": 1177 }, { "epoch": 0.15, "grad_norm": 0.8427186840743779, "learning_rate": 1.925392209074774e-06, "loss": 0.6423, "step": 1178 }, { "epoch": 0.15, "grad_norm": 0.698738182465362, "learning_rate": 1.9252357270494107e-06, "loss": 0.5776, "step": 1179 }, { "epoch": 0.15, "grad_norm": 0.7513397289871464, "learning_rate": 1.9250790874660422e-06, "loss": 0.5704, "step": 1180 }, { "epoch": 0.15, "grad_norm": 0.6303067424437214, "learning_rate": 1.924922290351342e-06, "loss": 0.5467, "step": 1181 }, { "epoch": 0.15, "grad_norm": 0.6395174639967565, "learning_rate": 1.9247653357320117e-06, "loss": 0.5346, "step": 1182 }, { "epoch": 0.15, "grad_norm": 0.7957693693675866, "learning_rate": 1.9246082236347792e-06, "loss": 0.6355, "step": 1183 }, { "epoch": 0.15, "grad_norm": 0.6398065719930426, "learning_rate": 1.9244509540863983e-06, "loss": 0.5209, "step": 1184 }, { "epoch": 0.15, "grad_norm": 0.639712705423486, "learning_rate": 1.924293527113651e-06, "loss": 0.6506, "step": 1185 }, { "epoch": 0.15, "grad_norm": 0.6980691997113211, "learning_rate": 1.924135942743345e-06, "loss": 0.5528, "step": 1186 }, { "epoch": 0.15, "grad_norm": 0.638585163911734, "learning_rate": 1.923978201002316e-06, "loss": 0.631, "step": 1187 }, { "epoch": 0.15, "grad_norm": 0.8966225408569751, "learning_rate": 1.9238203019174247e-06, "loss": 0.7119, "step": 1188 }, { "epoch": 0.15, "grad_norm": 0.9158143712687993, "learning_rate": 1.923662245515561e-06, "loss": 0.6357, "step": 1189 }, { "epoch": 0.15, "grad_norm": 0.9116909233445981, "learning_rate": 1.923504031823639e-06, "loss": 0.6825, "step": 1190 }, { "epoch": 0.15, "grad_norm": 0.9560421431791293, "learning_rate": 1.9233456608686013e-06, "loss": 0.7004, "step": 1191 }, { "epoch": 0.15, "grad_norm": 0.6541064378880194, "learning_rate": 1.9231871326774175e-06, "loss": 0.5368, "step": 1192 }, { "epoch": 0.15, "grad_norm": 0.6955362963247771, "learning_rate": 1.9230284472770823e-06, "loss": 0.5227, "step": 1193 }, { "epoch": 0.15, "grad_norm": 1.0697263745374144, "learning_rate": 1.9228696046946184e-06, "loss": 0.6743, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.7785250147133688, "learning_rate": 1.9227106049570754e-06, "loss": 0.6277, "step": 1195 }, { "epoch": 0.15, "grad_norm": 1.2999743228369813, "learning_rate": 1.922551448091529e-06, "loss": 0.6481, "step": 1196 }, { "epoch": 0.15, "grad_norm": 0.8350688508876185, "learning_rate": 1.9223921341250823e-06, "loss": 0.6411, "step": 1197 }, { "epoch": 0.15, "grad_norm": 0.7493061457870274, "learning_rate": 1.9222326630848642e-06, "loss": 0.5784, "step": 1198 }, { "epoch": 0.15, "grad_norm": 0.7494630702656563, "learning_rate": 1.922073034998032e-06, "loss": 0.6195, "step": 1199 }, { "epoch": 0.15, "grad_norm": 0.7440114217932107, "learning_rate": 1.921913249891767e-06, "loss": 0.5639, "step": 1200 }, { "epoch": 0.15, "grad_norm": 0.8520563469938737, "learning_rate": 1.9217533077932808e-06, "loss": 0.704, "step": 1201 }, { "epoch": 0.15, "grad_norm": 0.7426956896876387, "learning_rate": 1.921593208729809e-06, "loss": 0.5392, "step": 1202 }, { "epoch": 0.15, "grad_norm": 0.634241890454269, "learning_rate": 1.9214329527286144e-06, "loss": 0.5168, "step": 1203 }, { "epoch": 0.15, "grad_norm": 0.5920910859766049, "learning_rate": 1.9212725398169876e-06, "loss": 0.5321, "step": 1204 }, { "epoch": 0.15, "grad_norm": 0.6291221979034487, "learning_rate": 1.921111970022245e-06, "loss": 0.5392, "step": 1205 }, { "epoch": 0.15, "grad_norm": 0.7099488534998689, "learning_rate": 1.92095124337173e-06, "loss": 0.5931, "step": 1206 }, { "epoch": 0.15, "grad_norm": 0.7095776035344237, "learning_rate": 1.9207903598928127e-06, "loss": 0.6143, "step": 1207 }, { "epoch": 0.15, "grad_norm": 0.9281897737044447, "learning_rate": 1.9206293196128898e-06, "loss": 0.6815, "step": 1208 }, { "epoch": 0.15, "grad_norm": 0.6131777117406543, "learning_rate": 1.9204681225593846e-06, "loss": 0.5465, "step": 1209 }, { "epoch": 0.15, "grad_norm": 0.6978483711135388, "learning_rate": 1.920306768759748e-06, "loss": 0.6274, "step": 1210 }, { "epoch": 0.15, "grad_norm": 0.7313055184658558, "learning_rate": 1.9201452582414557e-06, "loss": 0.5637, "step": 1211 }, { "epoch": 0.15, "grad_norm": 0.7262895437306012, "learning_rate": 1.9199835910320125e-06, "loss": 0.643, "step": 1212 }, { "epoch": 0.15, "grad_norm": 0.7738901016999956, "learning_rate": 1.919821767158948e-06, "loss": 0.6595, "step": 1213 }, { "epoch": 0.15, "grad_norm": 0.8209433126268498, "learning_rate": 1.919659786649819e-06, "loss": 0.5931, "step": 1214 }, { "epoch": 0.15, "grad_norm": 0.7406550822176572, "learning_rate": 1.9194976495322093e-06, "loss": 0.6336, "step": 1215 }, { "epoch": 0.15, "grad_norm": 0.7307479253798567, "learning_rate": 1.9193353558337295e-06, "loss": 0.6122, "step": 1216 }, { "epoch": 0.16, "grad_norm": 0.7776519926617271, "learning_rate": 1.9191729055820155e-06, "loss": 0.6052, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.797352638810382, "learning_rate": 1.9190102988047317e-06, "loss": 0.7196, "step": 1218 }, { "epoch": 0.16, "grad_norm": 0.7274545846154133, "learning_rate": 1.9188475355295687e-06, "loss": 0.6051, "step": 1219 }, { "epoch": 0.16, "grad_norm": 0.9351566668143381, "learning_rate": 1.9186846157842425e-06, "loss": 0.6405, "step": 1220 }, { "epoch": 0.16, "grad_norm": 0.6407916109139107, "learning_rate": 1.918521539596497e-06, "loss": 0.519, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.6005573923046778, "learning_rate": 1.9183583069941023e-06, "loss": 0.577, "step": 1222 }, { "epoch": 0.16, "grad_norm": 0.7745414623542163, "learning_rate": 1.9181949180048557e-06, "loss": 0.6147, "step": 1223 }, { "epoch": 0.16, "grad_norm": 0.8834513708937011, "learning_rate": 1.91803137265658e-06, "loss": 0.663, "step": 1224 }, { "epoch": 0.16, "grad_norm": 0.5710287257601654, "learning_rate": 1.9178676709771255e-06, "loss": 0.5369, "step": 1225 }, { "epoch": 0.16, "grad_norm": 0.7079436663247748, "learning_rate": 1.917703812994369e-06, "loss": 0.5929, "step": 1226 }, { "epoch": 0.16, "grad_norm": 0.6460241075000596, "learning_rate": 1.9175397987362135e-06, "loss": 0.5378, "step": 1227 }, { "epoch": 0.16, "grad_norm": 0.7081389172815225, "learning_rate": 1.9173756282305896e-06, "loss": 0.5482, "step": 1228 }, { "epoch": 0.16, "grad_norm": 0.6461571994523665, "learning_rate": 1.9172113015054528e-06, "loss": 0.5734, "step": 1229 }, { "epoch": 0.16, "grad_norm": 0.7534929250182225, "learning_rate": 1.9170468185887872e-06, "loss": 0.6634, "step": 1230 }, { "epoch": 0.16, "grad_norm": 0.7345784124499537, "learning_rate": 1.916882179508602e-06, "loss": 0.5427, "step": 1231 }, { "epoch": 0.16, "grad_norm": 0.7672033332877664, "learning_rate": 1.916717384292934e-06, "loss": 0.5881, "step": 1232 }, { "epoch": 0.16, "grad_norm": 0.829451785820827, "learning_rate": 1.916552432969845e-06, "loss": 0.6594, "step": 1233 }, { "epoch": 0.16, "grad_norm": 0.6975740148881469, "learning_rate": 1.9163873255674257e-06, "loss": 0.5478, "step": 1234 }, { "epoch": 0.16, "grad_norm": 0.6780230212112954, "learning_rate": 1.916222062113792e-06, "loss": 0.5676, "step": 1235 }, { "epoch": 0.16, "grad_norm": 0.7803842373117257, "learning_rate": 1.9160566426370855e-06, "loss": 0.6543, "step": 1236 }, { "epoch": 0.16, "grad_norm": 1.0437282719949326, "learning_rate": 1.9158910671654764e-06, "loss": 0.6369, "step": 1237 }, { "epoch": 0.16, "grad_norm": 0.6786858065995152, "learning_rate": 1.9157253357271602e-06, "loss": 0.5551, "step": 1238 }, { "epoch": 0.16, "grad_norm": 0.7318216384548416, "learning_rate": 1.9155594483503597e-06, "loss": 0.5494, "step": 1239 }, { "epoch": 0.16, "grad_norm": 0.7815951022116578, "learning_rate": 1.9153934050633234e-06, "loss": 0.6044, "step": 1240 }, { "epoch": 0.16, "grad_norm": 0.7414886556393671, "learning_rate": 1.915227205894326e-06, "loss": 0.6446, "step": 1241 }, { "epoch": 0.16, "grad_norm": 0.7273874048409005, "learning_rate": 1.9150608508716708e-06, "loss": 0.5764, "step": 1242 }, { "epoch": 0.16, "grad_norm": 0.7212465846983127, "learning_rate": 1.9148943400236855e-06, "loss": 0.5691, "step": 1243 }, { "epoch": 0.16, "grad_norm": 1.0413453606803564, "learning_rate": 1.9147276733787258e-06, "loss": 0.6958, "step": 1244 }, { "epoch": 0.16, "grad_norm": 0.8133200220645784, "learning_rate": 1.9145608509651724e-06, "loss": 0.6337, "step": 1245 }, { "epoch": 0.16, "grad_norm": 1.0306294731727854, "learning_rate": 1.9143938728114343e-06, "loss": 0.6847, "step": 1246 }, { "epoch": 0.16, "grad_norm": 0.6220869872372307, "learning_rate": 1.9142267389459453e-06, "loss": 0.5334, "step": 1247 }, { "epoch": 0.16, "grad_norm": 0.7080435666478909, "learning_rate": 1.914059449397167e-06, "loss": 0.5926, "step": 1248 }, { "epoch": 0.16, "grad_norm": 0.8293647595700109, "learning_rate": 1.9138920041935875e-06, "loss": 0.5953, "step": 1249 }, { "epoch": 0.16, "grad_norm": 0.9641881247719795, "learning_rate": 1.9137244033637203e-06, "loss": 0.6273, "step": 1250 }, { "epoch": 0.16, "grad_norm": 0.8277414240893562, "learning_rate": 1.9135566469361068e-06, "loss": 0.5815, "step": 1251 }, { "epoch": 0.16, "grad_norm": 0.8658589704596521, "learning_rate": 1.913388734939313e-06, "loss": 0.5862, "step": 1252 }, { "epoch": 0.16, "grad_norm": 0.6346157033126076, "learning_rate": 1.9132206674019335e-06, "loss": 0.5671, "step": 1253 }, { "epoch": 0.16, "grad_norm": 0.9127955686681459, "learning_rate": 1.9130524443525883e-06, "loss": 0.6379, "step": 1254 }, { "epoch": 0.16, "grad_norm": 0.6747840178417314, "learning_rate": 1.9128840658199237e-06, "loss": 0.5597, "step": 1255 }, { "epoch": 0.16, "grad_norm": 0.5813740513546736, "learning_rate": 1.9127155318326135e-06, "loss": 0.5278, "step": 1256 }, { "epoch": 0.16, "grad_norm": 1.0716128028851892, "learning_rate": 1.9125468424193565e-06, "loss": 0.7162, "step": 1257 }, { "epoch": 0.16, "grad_norm": 0.9188823255134607, "learning_rate": 1.912377997608879e-06, "loss": 0.6272, "step": 1258 }, { "epoch": 0.16, "grad_norm": 0.6186359647696887, "learning_rate": 1.9122089974299336e-06, "loss": 0.5559, "step": 1259 }, { "epoch": 0.16, "grad_norm": 0.622892585064229, "learning_rate": 1.912039841911299e-06, "loss": 0.5312, "step": 1260 }, { "epoch": 0.16, "grad_norm": 0.695871055324035, "learning_rate": 1.9118705310817817e-06, "loss": 0.5627, "step": 1261 }, { "epoch": 0.16, "grad_norm": 0.8405922177942429, "learning_rate": 1.911701064970212e-06, "loss": 0.6813, "step": 1262 }, { "epoch": 0.16, "grad_norm": 0.6578183848835084, "learning_rate": 1.91153144360545e-06, "loss": 0.6057, "step": 1263 }, { "epoch": 0.16, "grad_norm": 0.7935525933662255, "learning_rate": 1.9113616670163782e-06, "loss": 0.5378, "step": 1264 }, { "epoch": 0.16, "grad_norm": 0.6573479326253858, "learning_rate": 1.9111917352319098e-06, "loss": 0.5974, "step": 1265 }, { "epoch": 0.16, "grad_norm": 0.9396959459736868, "learning_rate": 1.911021648280981e-06, "loss": 0.5983, "step": 1266 }, { "epoch": 0.16, "grad_norm": 0.7973031799678535, "learning_rate": 1.9108514061925567e-06, "loss": 0.6374, "step": 1267 }, { "epoch": 0.16, "grad_norm": 0.9517347872214054, "learning_rate": 1.9106810089956274e-06, "loss": 0.5958, "step": 1268 }, { "epoch": 0.16, "grad_norm": 0.6765829249951395, "learning_rate": 1.9105104567192093e-06, "loss": 0.5347, "step": 1269 }, { "epoch": 0.16, "grad_norm": 0.5764316749898823, "learning_rate": 1.9103397493923463e-06, "loss": 0.5159, "step": 1270 }, { "epoch": 0.16, "grad_norm": 0.8133369206446016, "learning_rate": 1.910168887044108e-06, "loss": 0.6322, "step": 1271 }, { "epoch": 0.16, "grad_norm": 0.8914684733991114, "learning_rate": 1.9099978697035897e-06, "loss": 0.5212, "step": 1272 }, { "epoch": 0.16, "grad_norm": 0.6316762454061254, "learning_rate": 1.909826697399915e-06, "loss": 0.5137, "step": 1273 }, { "epoch": 0.16, "grad_norm": 0.6960176471699184, "learning_rate": 1.909655370162232e-06, "loss": 0.544, "step": 1274 }, { "epoch": 0.16, "grad_norm": 0.5947001165482533, "learning_rate": 1.909483888019716e-06, "loss": 0.5129, "step": 1275 }, { "epoch": 0.16, "grad_norm": 0.6213878790167677, "learning_rate": 1.9093122510015693e-06, "loss": 0.5589, "step": 1276 }, { "epoch": 0.16, "grad_norm": 0.9262035985184728, "learning_rate": 1.909140459137019e-06, "loss": 0.6347, "step": 1277 }, { "epoch": 0.16, "grad_norm": 0.9551474398199575, "learning_rate": 1.90896851245532e-06, "loss": 0.66, "step": 1278 }, { "epoch": 0.16, "grad_norm": 0.8494437024299266, "learning_rate": 1.908796410985753e-06, "loss": 0.6099, "step": 1279 }, { "epoch": 0.16, "grad_norm": 0.7206498411641772, "learning_rate": 1.908624154757625e-06, "loss": 0.6469, "step": 1280 }, { "epoch": 0.16, "grad_norm": 0.7388187024377662, "learning_rate": 1.908451743800269e-06, "loss": 0.5482, "step": 1281 }, { "epoch": 0.16, "grad_norm": 0.884655458201063, "learning_rate": 1.9082791781430457e-06, "loss": 0.6916, "step": 1282 }, { "epoch": 0.16, "grad_norm": 0.8488393930851411, "learning_rate": 1.908106457815341e-06, "loss": 0.6106, "step": 1283 }, { "epoch": 0.16, "grad_norm": 0.8998056910210946, "learning_rate": 1.9079335828465663e-06, "loss": 0.6286, "step": 1284 }, { "epoch": 0.16, "grad_norm": 0.7517361194655062, "learning_rate": 1.9077605532661616e-06, "loss": 0.5916, "step": 1285 }, { "epoch": 0.16, "grad_norm": 0.8327857559207879, "learning_rate": 1.9075873691035917e-06, "loss": 0.6751, "step": 1286 }, { "epoch": 0.16, "grad_norm": 0.6916511469239959, "learning_rate": 1.907414030388348e-06, "loss": 0.5277, "step": 1287 }, { "epoch": 0.16, "grad_norm": 0.8240926328440982, "learning_rate": 1.9072405371499482e-06, "loss": 0.6406, "step": 1288 }, { "epoch": 0.16, "grad_norm": 0.6871242302185181, "learning_rate": 1.9070668894179366e-06, "loss": 0.573, "step": 1289 }, { "epoch": 0.16, "grad_norm": 0.9105669924162509, "learning_rate": 1.9068930872218836e-06, "loss": 0.5645, "step": 1290 }, { "epoch": 0.16, "grad_norm": 0.6602122580816693, "learning_rate": 1.9067191305913855e-06, "loss": 0.5355, "step": 1291 }, { "epoch": 0.16, "grad_norm": 0.8290096593865575, "learning_rate": 1.906545019556066e-06, "loss": 0.6341, "step": 1292 }, { "epoch": 0.16, "grad_norm": 0.8467532295273246, "learning_rate": 1.9063707541455735e-06, "loss": 0.5977, "step": 1293 }, { "epoch": 0.16, "grad_norm": 0.8897051722777256, "learning_rate": 1.9061963343895844e-06, "loss": 0.6406, "step": 1294 }, { "epoch": 0.16, "grad_norm": 0.7012413760275412, "learning_rate": 1.9060217603178002e-06, "loss": 0.6222, "step": 1295 }, { "epoch": 0.17, "grad_norm": 0.7577228171228438, "learning_rate": 1.9058470319599489e-06, "loss": 0.6474, "step": 1296 }, { "epoch": 0.17, "grad_norm": 0.9819909210794312, "learning_rate": 1.9056721493457852e-06, "loss": 0.685, "step": 1297 }, { "epoch": 0.17, "grad_norm": 0.7355255741767956, "learning_rate": 1.9054971125050897e-06, "loss": 0.6048, "step": 1298 }, { "epoch": 0.17, "grad_norm": 0.7972585266426047, "learning_rate": 1.9053219214676695e-06, "loss": 0.5389, "step": 1299 }, { "epoch": 0.17, "grad_norm": 1.1120679246374776, "learning_rate": 1.9051465762633573e-06, "loss": 0.687, "step": 1300 }, { "epoch": 0.17, "grad_norm": 0.7819321447005778, "learning_rate": 1.9049710769220132e-06, "loss": 0.6082, "step": 1301 }, { "epoch": 0.17, "grad_norm": 0.8263074917401989, "learning_rate": 1.9047954234735225e-06, "loss": 0.6115, "step": 1302 }, { "epoch": 0.17, "grad_norm": 0.6746510852270694, "learning_rate": 1.9046196159477973e-06, "loss": 0.4663, "step": 1303 }, { "epoch": 0.17, "grad_norm": 0.9490439852062861, "learning_rate": 1.904443654374776e-06, "loss": 0.6457, "step": 1304 }, { "epoch": 0.17, "grad_norm": 0.8460661359437417, "learning_rate": 1.9042675387844225e-06, "loss": 0.5976, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.6581950417367757, "learning_rate": 1.9040912692067276e-06, "loss": 0.551, "step": 1306 }, { "epoch": 0.17, "grad_norm": 0.6233308407007312, "learning_rate": 1.9039148456717086e-06, "loss": 0.5216, "step": 1307 }, { "epoch": 0.17, "grad_norm": 0.869239516250715, "learning_rate": 1.9037382682094083e-06, "loss": 0.6254, "step": 1308 }, { "epoch": 0.17, "grad_norm": 0.8329661797218253, "learning_rate": 1.9035615368498958e-06, "loss": 0.6373, "step": 1309 }, { "epoch": 0.17, "grad_norm": 1.513823221618247, "learning_rate": 1.903384651623267e-06, "loss": 0.6632, "step": 1310 }, { "epoch": 0.17, "grad_norm": 0.7684143636690005, "learning_rate": 1.9032076125596435e-06, "loss": 0.6959, "step": 1311 }, { "epoch": 0.17, "grad_norm": 1.9460109980724525, "learning_rate": 1.9030304196891728e-06, "loss": 0.6199, "step": 1312 }, { "epoch": 0.17, "grad_norm": 0.828871077102763, "learning_rate": 1.9028530730420295e-06, "loss": 0.6186, "step": 1313 }, { "epoch": 0.17, "grad_norm": 2.78262792739587, "learning_rate": 1.9026755726484138e-06, "loss": 0.6385, "step": 1314 }, { "epoch": 0.17, "grad_norm": 0.8712835251749558, "learning_rate": 1.9024979185385519e-06, "loss": 0.6376, "step": 1315 }, { "epoch": 0.17, "grad_norm": 0.7678760883656018, "learning_rate": 1.9023201107426968e-06, "loss": 0.5915, "step": 1316 }, { "epoch": 0.17, "grad_norm": 0.8310289211065707, "learning_rate": 1.902142149291127e-06, "loss": 0.6514, "step": 1317 }, { "epoch": 0.17, "grad_norm": 0.6408899267212803, "learning_rate": 1.9019640342141479e-06, "loss": 0.5114, "step": 1318 }, { "epoch": 0.17, "grad_norm": 0.6836346106253465, "learning_rate": 1.90178576554209e-06, "loss": 0.5579, "step": 1319 }, { "epoch": 0.17, "grad_norm": 0.9109105827040568, "learning_rate": 1.9016073433053111e-06, "loss": 0.6484, "step": 1320 }, { "epoch": 0.17, "grad_norm": 0.889981745090896, "learning_rate": 1.9014287675341944e-06, "loss": 0.5997, "step": 1321 }, { "epoch": 0.17, "grad_norm": 0.6466073637549428, "learning_rate": 1.9012500382591499e-06, "loss": 0.5278, "step": 1322 }, { "epoch": 0.17, "grad_norm": 0.6473793041109502, "learning_rate": 1.9010711555106126e-06, "loss": 0.5243, "step": 1323 }, { "epoch": 0.17, "grad_norm": 0.715345896876541, "learning_rate": 1.9008921193190455e-06, "loss": 0.596, "step": 1324 }, { "epoch": 0.17, "grad_norm": 0.8245356914229638, "learning_rate": 1.9007129297149353e-06, "loss": 0.6401, "step": 1325 }, { "epoch": 0.17, "grad_norm": 0.8044321719926135, "learning_rate": 1.9005335867287968e-06, "loss": 0.5835, "step": 1326 }, { "epoch": 0.17, "grad_norm": 0.8875345126597416, "learning_rate": 1.9003540903911706e-06, "loss": 0.6037, "step": 1327 }, { "epoch": 0.17, "grad_norm": 1.0666678654491368, "learning_rate": 1.9001744407326222e-06, "loss": 0.6784, "step": 1328 }, { "epoch": 0.17, "grad_norm": 0.7215700772546143, "learning_rate": 1.8999946377837445e-06, "loss": 0.5524, "step": 1329 }, { "epoch": 0.17, "grad_norm": 0.7933907643969997, "learning_rate": 1.8998146815751563e-06, "loss": 0.6494, "step": 1330 }, { "epoch": 0.17, "grad_norm": 0.6206367052227725, "learning_rate": 1.899634572137502e-06, "loss": 0.5694, "step": 1331 }, { "epoch": 0.17, "grad_norm": 0.7613027193343308, "learning_rate": 1.8994543095014523e-06, "loss": 0.5595, "step": 1332 }, { "epoch": 0.17, "grad_norm": 0.6734253750150848, "learning_rate": 1.8992738936977042e-06, "loss": 0.5371, "step": 1333 }, { "epoch": 0.17, "grad_norm": 0.8264198632205328, "learning_rate": 1.8990933247569805e-06, "loss": 0.6469, "step": 1334 }, { "epoch": 0.17, "grad_norm": 0.6348817567269129, "learning_rate": 1.8989126027100301e-06, "loss": 0.5296, "step": 1335 }, { "epoch": 0.17, "grad_norm": 0.8915956778382629, "learning_rate": 1.8987317275876284e-06, "loss": 0.6478, "step": 1336 }, { "epoch": 0.17, "grad_norm": 0.748769976795235, "learning_rate": 1.8985506994205768e-06, "loss": 0.5881, "step": 1337 }, { "epoch": 0.17, "grad_norm": 0.720236636621758, "learning_rate": 1.8983695182397015e-06, "loss": 0.5346, "step": 1338 }, { "epoch": 0.17, "grad_norm": 0.9649324795585792, "learning_rate": 1.8981881840758569e-06, "loss": 0.662, "step": 1339 }, { "epoch": 0.17, "grad_norm": 0.7755062849826476, "learning_rate": 1.8980066969599213e-06, "loss": 0.6363, "step": 1340 }, { "epoch": 0.17, "grad_norm": 0.7709400146488374, "learning_rate": 1.897825056922801e-06, "loss": 0.6234, "step": 1341 }, { "epoch": 0.17, "grad_norm": 0.9136686313914626, "learning_rate": 1.8976432639954267e-06, "loss": 0.6603, "step": 1342 }, { "epoch": 0.17, "grad_norm": 0.7425405742853465, "learning_rate": 1.8974613182087565e-06, "loss": 0.575, "step": 1343 }, { "epoch": 0.17, "grad_norm": 0.6157107016683719, "learning_rate": 1.897279219593773e-06, "loss": 0.5649, "step": 1344 }, { "epoch": 0.17, "grad_norm": 0.6213917904408933, "learning_rate": 1.8970969681814866e-06, "loss": 0.5413, "step": 1345 }, { "epoch": 0.17, "grad_norm": 0.7742290397722541, "learning_rate": 1.8969145640029323e-06, "loss": 0.6341, "step": 1346 }, { "epoch": 0.17, "grad_norm": 0.6185264232925686, "learning_rate": 1.8967320070891715e-06, "loss": 0.5416, "step": 1347 }, { "epoch": 0.17, "grad_norm": 0.7108215400423297, "learning_rate": 1.8965492974712923e-06, "loss": 0.579, "step": 1348 }, { "epoch": 0.17, "grad_norm": 0.9343243669509215, "learning_rate": 1.896366435180408e-06, "loss": 0.6123, "step": 1349 }, { "epoch": 0.17, "grad_norm": 1.032110290756634, "learning_rate": 1.8961834202476576e-06, "loss": 0.6272, "step": 1350 }, { "epoch": 0.17, "grad_norm": 0.7663364315207016, "learning_rate": 1.8960002527042073e-06, "loss": 0.5984, "step": 1351 }, { "epoch": 0.17, "grad_norm": 0.8507335222219339, "learning_rate": 1.8958169325812485e-06, "loss": 0.6393, "step": 1352 }, { "epoch": 0.17, "grad_norm": 0.8420373119697528, "learning_rate": 1.8956334599099989e-06, "loss": 0.6453, "step": 1353 }, { "epoch": 0.17, "grad_norm": 0.8483904760776666, "learning_rate": 1.8954498347217011e-06, "loss": 0.591, "step": 1354 }, { "epoch": 0.17, "grad_norm": 0.7937243189604148, "learning_rate": 1.8952660570476256e-06, "loss": 0.5524, "step": 1355 }, { "epoch": 0.17, "grad_norm": 0.7695723245289143, "learning_rate": 1.8950821269190676e-06, "loss": 0.644, "step": 1356 }, { "epoch": 0.17, "grad_norm": 0.6052266500519187, "learning_rate": 1.894898044367348e-06, "loss": 0.598, "step": 1357 }, { "epoch": 0.17, "grad_norm": 0.7912854040229887, "learning_rate": 1.8947138094238143e-06, "loss": 0.5986, "step": 1358 }, { "epoch": 0.17, "grad_norm": 0.6425185916487481, "learning_rate": 1.89452942211984e-06, "loss": 0.5948, "step": 1359 }, { "epoch": 0.17, "grad_norm": 0.6513553896385886, "learning_rate": 1.8943448824868241e-06, "loss": 0.5715, "step": 1360 }, { "epoch": 0.17, "grad_norm": 0.6474812983139311, "learning_rate": 1.8941601905561923e-06, "loss": 0.5677, "step": 1361 }, { "epoch": 0.17, "grad_norm": 0.8622919181858628, "learning_rate": 1.893975346359395e-06, "loss": 0.6769, "step": 1362 }, { "epoch": 0.17, "grad_norm": 0.7392316950879241, "learning_rate": 1.89379034992791e-06, "loss": 0.5786, "step": 1363 }, { "epoch": 0.17, "grad_norm": 0.8828617522356548, "learning_rate": 1.8936052012932395e-06, "loss": 0.6977, "step": 1364 }, { "epoch": 0.17, "grad_norm": 0.9772946958451301, "learning_rate": 1.893419900486913e-06, "loss": 0.6513, "step": 1365 }, { "epoch": 0.17, "grad_norm": 0.8891138493186889, "learning_rate": 1.8932344475404846e-06, "loss": 0.673, "step": 1366 }, { "epoch": 0.17, "grad_norm": 0.8591831123617806, "learning_rate": 1.8930488424855357e-06, "loss": 0.6258, "step": 1367 }, { "epoch": 0.17, "grad_norm": 0.6307369962604498, "learning_rate": 1.8928630853536726e-06, "loss": 0.5535, "step": 1368 }, { "epoch": 0.17, "grad_norm": 0.7971084390737865, "learning_rate": 1.892677176176528e-06, "loss": 0.6752, "step": 1369 }, { "epoch": 0.17, "grad_norm": 0.8489528936099523, "learning_rate": 1.89249111498576e-06, "loss": 0.7058, "step": 1370 }, { "epoch": 0.17, "grad_norm": 0.7209278011399826, "learning_rate": 1.8923049018130531e-06, "loss": 0.6247, "step": 1371 }, { "epoch": 0.17, "grad_norm": 0.6756220714416059, "learning_rate": 1.892118536690117e-06, "loss": 0.5664, "step": 1372 }, { "epoch": 0.17, "grad_norm": 0.8532171815517061, "learning_rate": 1.8919320196486888e-06, "loss": 0.6183, "step": 1373 }, { "epoch": 0.18, "grad_norm": 0.7450629541248743, "learning_rate": 1.8917453507205292e-06, "loss": 0.6392, "step": 1374 }, { "epoch": 0.18, "grad_norm": 0.8815355095910226, "learning_rate": 1.8915585299374269e-06, "loss": 0.6638, "step": 1375 }, { "epoch": 0.18, "grad_norm": 0.7983113053103803, "learning_rate": 1.8913715573311948e-06, "loss": 0.6395, "step": 1376 }, { "epoch": 0.18, "grad_norm": 0.7705890612828493, "learning_rate": 1.8911844329336728e-06, "loss": 0.5366, "step": 1377 }, { "epoch": 0.18, "grad_norm": 0.657974349547334, "learning_rate": 1.8909971567767263e-06, "loss": 0.5611, "step": 1378 }, { "epoch": 0.18, "grad_norm": 0.6246208838748795, "learning_rate": 1.8908097288922464e-06, "loss": 0.546, "step": 1379 }, { "epoch": 0.18, "grad_norm": 0.7700891831833455, "learning_rate": 1.8906221493121498e-06, "loss": 0.6498, "step": 1380 }, { "epoch": 0.18, "grad_norm": 0.7263520646987879, "learning_rate": 1.8904344180683798e-06, "loss": 0.5328, "step": 1381 }, { "epoch": 0.18, "grad_norm": 0.7330832675032019, "learning_rate": 1.8902465351929048e-06, "loss": 0.6025, "step": 1382 }, { "epoch": 0.18, "grad_norm": 0.5984790398867992, "learning_rate": 1.8900585007177195e-06, "loss": 0.5246, "step": 1383 }, { "epoch": 0.18, "grad_norm": 0.8701614869780259, "learning_rate": 1.889870314674844e-06, "loss": 0.6445, "step": 1384 }, { "epoch": 0.18, "grad_norm": 0.7907572025047873, "learning_rate": 1.8896819770963244e-06, "loss": 0.6354, "step": 1385 }, { "epoch": 0.18, "grad_norm": 0.6633698120613989, "learning_rate": 1.8894934880142329e-06, "loss": 0.5399, "step": 1386 }, { "epoch": 0.18, "grad_norm": 0.8213707931902501, "learning_rate": 1.8893048474606669e-06, "loss": 0.5547, "step": 1387 }, { "epoch": 0.18, "grad_norm": 0.61504477406459, "learning_rate": 1.8891160554677504e-06, "loss": 0.493, "step": 1388 }, { "epoch": 0.18, "grad_norm": 0.9338211443552195, "learning_rate": 1.888927112067632e-06, "loss": 0.6933, "step": 1389 }, { "epoch": 0.18, "grad_norm": 0.7898151894960762, "learning_rate": 1.8887380172924874e-06, "loss": 0.6178, "step": 1390 }, { "epoch": 0.18, "grad_norm": 0.9130750236593735, "learning_rate": 1.8885487711745172e-06, "loss": 0.6901, "step": 1391 }, { "epoch": 0.18, "grad_norm": 0.7861914759513458, "learning_rate": 1.8883593737459481e-06, "loss": 0.6054, "step": 1392 }, { "epoch": 0.18, "grad_norm": 0.93255765255696, "learning_rate": 1.8881698250390327e-06, "loss": 0.6066, "step": 1393 }, { "epoch": 0.18, "grad_norm": 0.7053209255910798, "learning_rate": 1.8879801250860488e-06, "loss": 0.5501, "step": 1394 }, { "epoch": 0.18, "grad_norm": 0.6746232384959575, "learning_rate": 1.8877902739193005e-06, "loss": 0.5506, "step": 1395 }, { "epoch": 0.18, "grad_norm": 0.8694554234808456, "learning_rate": 1.8876002715711174e-06, "loss": 0.6422, "step": 1396 }, { "epoch": 0.18, "grad_norm": 0.6488615831681229, "learning_rate": 1.8874101180738551e-06, "loss": 0.5564, "step": 1397 }, { "epoch": 0.18, "grad_norm": 0.7171262171924074, "learning_rate": 1.887219813459895e-06, "loss": 0.5002, "step": 1398 }, { "epoch": 0.18, "grad_norm": 0.8801699313849026, "learning_rate": 1.8870293577616431e-06, "loss": 0.7131, "step": 1399 }, { "epoch": 0.18, "grad_norm": 0.5774440785315017, "learning_rate": 1.886838751011533e-06, "loss": 0.5055, "step": 1400 }, { "epoch": 0.18, "grad_norm": 0.8679009795144756, "learning_rate": 1.8866479932420223e-06, "loss": 0.6527, "step": 1401 }, { "epoch": 0.18, "grad_norm": 0.7461308918842194, "learning_rate": 1.8864570844855955e-06, "loss": 0.5402, "step": 1402 }, { "epoch": 0.18, "grad_norm": 0.7330932003526601, "learning_rate": 1.8862660247747624e-06, "loss": 0.5584, "step": 1403 }, { "epoch": 0.18, "grad_norm": 0.718373289951845, "learning_rate": 1.8860748141420584e-06, "loss": 0.6077, "step": 1404 }, { "epoch": 0.18, "grad_norm": 0.8260744124627369, "learning_rate": 1.8858834526200441e-06, "loss": 0.6898, "step": 1405 }, { "epoch": 0.18, "grad_norm": 0.7998337744758383, "learning_rate": 1.8856919402413074e-06, "loss": 0.5602, "step": 1406 }, { "epoch": 0.18, "grad_norm": 0.689305456188954, "learning_rate": 1.88550027703846e-06, "loss": 0.4978, "step": 1407 }, { "epoch": 0.18, "grad_norm": 0.7148375569518989, "learning_rate": 1.8853084630441409e-06, "loss": 0.5797, "step": 1408 }, { "epoch": 0.18, "grad_norm": 0.8752247174372582, "learning_rate": 1.8851164982910133e-06, "loss": 0.6578, "step": 1409 }, { "epoch": 0.18, "grad_norm": 0.6594379961802735, "learning_rate": 1.8849243828117675e-06, "loss": 0.5284, "step": 1410 }, { "epoch": 0.18, "grad_norm": 0.6523415768785581, "learning_rate": 1.8847321166391179e-06, "loss": 0.531, "step": 1411 }, { "epoch": 0.18, "grad_norm": 0.9246346703975636, "learning_rate": 1.884539699805806e-06, "loss": 0.6255, "step": 1412 }, { "epoch": 0.18, "grad_norm": 0.9958921693800686, "learning_rate": 1.8843471323445984e-06, "loss": 0.6974, "step": 1413 }, { "epoch": 0.18, "grad_norm": 0.9048531078986073, "learning_rate": 1.8841544142882872e-06, "loss": 0.6526, "step": 1414 }, { "epoch": 0.18, "grad_norm": 0.6733251265732655, "learning_rate": 1.8839615456696905e-06, "loss": 0.5144, "step": 1415 }, { "epoch": 0.18, "grad_norm": 0.864988634447487, "learning_rate": 1.8837685265216514e-06, "loss": 0.688, "step": 1416 }, { "epoch": 0.18, "grad_norm": 1.0865077309639943, "learning_rate": 1.8835753568770391e-06, "loss": 0.582, "step": 1417 }, { "epoch": 0.18, "grad_norm": 0.758120683673968, "learning_rate": 1.883382036768749e-06, "loss": 0.5942, "step": 1418 }, { "epoch": 0.18, "grad_norm": 0.7365233432680053, "learning_rate": 1.8831885662297004e-06, "loss": 0.5309, "step": 1419 }, { "epoch": 0.18, "grad_norm": 0.8534301458209403, "learning_rate": 1.8829949452928405e-06, "loss": 0.6677, "step": 1420 }, { "epoch": 0.18, "grad_norm": 0.7406340910491646, "learning_rate": 1.8828011739911403e-06, "loss": 0.5937, "step": 1421 }, { "epoch": 0.18, "grad_norm": 0.9042631175936158, "learning_rate": 1.882607252357597e-06, "loss": 0.609, "step": 1422 }, { "epoch": 0.18, "grad_norm": 0.700002435330286, "learning_rate": 1.8824131804252338e-06, "loss": 0.5579, "step": 1423 }, { "epoch": 0.18, "grad_norm": 0.90234925065053, "learning_rate": 1.8822189582270985e-06, "loss": 0.6723, "step": 1424 }, { "epoch": 0.18, "grad_norm": 0.6288808774549141, "learning_rate": 1.8820245857962659e-06, "loss": 0.5218, "step": 1425 }, { "epoch": 0.18, "grad_norm": 0.8957382750808092, "learning_rate": 1.881830063165835e-06, "loss": 0.6804, "step": 1426 }, { "epoch": 0.18, "grad_norm": 0.6544487602944442, "learning_rate": 1.881635390368931e-06, "loss": 0.5396, "step": 1427 }, { "epoch": 0.18, "grad_norm": 0.8346212058866364, "learning_rate": 1.8814405674387052e-06, "loss": 0.6149, "step": 1428 }, { "epoch": 0.18, "grad_norm": 1.016528017418266, "learning_rate": 1.8812455944083335e-06, "loss": 0.6898, "step": 1429 }, { "epoch": 0.18, "grad_norm": 0.8592181226518223, "learning_rate": 1.8810504713110177e-06, "loss": 0.6622, "step": 1430 }, { "epoch": 0.18, "grad_norm": 0.8736610215251372, "learning_rate": 1.8808551981799852e-06, "loss": 0.6772, "step": 1431 }, { "epoch": 0.18, "grad_norm": 0.9904027370266641, "learning_rate": 1.8806597750484894e-06, "loss": 0.6989, "step": 1432 }, { "epoch": 0.18, "grad_norm": 4.949093262548183, "learning_rate": 1.8804642019498084e-06, "loss": 0.6716, "step": 1433 }, { "epoch": 0.18, "grad_norm": 0.9175499727798004, "learning_rate": 1.8802684789172465e-06, "loss": 0.6272, "step": 1434 }, { "epoch": 0.18, "grad_norm": 0.7995299428628458, "learning_rate": 1.8800726059841331e-06, "loss": 0.5741, "step": 1435 }, { "epoch": 0.18, "grad_norm": 0.7744835209488642, "learning_rate": 1.8798765831838238e-06, "loss": 0.592, "step": 1436 }, { "epoch": 0.18, "grad_norm": 0.7061534253852233, "learning_rate": 1.8796804105496984e-06, "loss": 0.5686, "step": 1437 }, { "epoch": 0.18, "grad_norm": 0.9463607835406957, "learning_rate": 1.8794840881151637e-06, "loss": 0.6328, "step": 1438 }, { "epoch": 0.18, "grad_norm": 0.7546901513434331, "learning_rate": 1.8792876159136513e-06, "loss": 0.5685, "step": 1439 }, { "epoch": 0.18, "grad_norm": 0.6764873211596125, "learning_rate": 1.8790909939786182e-06, "loss": 0.5176, "step": 1440 }, { "epoch": 0.18, "grad_norm": 0.6149637875791749, "learning_rate": 1.878894222343547e-06, "loss": 0.5695, "step": 1441 }, { "epoch": 0.18, "grad_norm": 0.9719458895275768, "learning_rate": 1.878697301041946e-06, "loss": 0.679, "step": 1442 }, { "epoch": 0.18, "grad_norm": 0.7790371186504014, "learning_rate": 1.878500230107349e-06, "loss": 0.5944, "step": 1443 }, { "epoch": 0.18, "grad_norm": 0.7781016825361322, "learning_rate": 1.8783030095733148e-06, "loss": 0.6313, "step": 1444 }, { "epoch": 0.18, "grad_norm": 0.6454660905715187, "learning_rate": 1.8781056394734282e-06, "loss": 0.5388, "step": 1445 }, { "epoch": 0.18, "grad_norm": 0.7437658858289923, "learning_rate": 1.8779081198412991e-06, "loss": 0.5244, "step": 1446 }, { "epoch": 0.18, "grad_norm": 0.9140722313688361, "learning_rate": 1.8777104507105635e-06, "loss": 0.6512, "step": 1447 }, { "epoch": 0.18, "grad_norm": 0.9498360509598618, "learning_rate": 1.877512632114882e-06, "loss": 0.6699, "step": 1448 }, { "epoch": 0.18, "grad_norm": 0.8165953212973491, "learning_rate": 1.8773146640879411e-06, "loss": 0.6272, "step": 1449 }, { "epoch": 0.18, "grad_norm": 0.8967076765131066, "learning_rate": 1.8771165466634526e-06, "loss": 0.6881, "step": 1450 }, { "epoch": 0.18, "grad_norm": 0.9111754379537457, "learning_rate": 1.876918279875154e-06, "loss": 0.6934, "step": 1451 }, { "epoch": 0.18, "grad_norm": 0.670068748256077, "learning_rate": 1.8767198637568078e-06, "loss": 0.5638, "step": 1452 }, { "epoch": 0.19, "grad_norm": 0.6573602871606139, "learning_rate": 1.8765212983422027e-06, "loss": 0.5716, "step": 1453 }, { "epoch": 0.19, "grad_norm": 0.7923046841795471, "learning_rate": 1.8763225836651517e-06, "loss": 0.6405, "step": 1454 }, { "epoch": 0.19, "grad_norm": 0.8059996935637745, "learning_rate": 1.8761237197594943e-06, "loss": 0.5589, "step": 1455 }, { "epoch": 0.19, "grad_norm": 0.6789931413183536, "learning_rate": 1.8759247066590951e-06, "loss": 0.5066, "step": 1456 }, { "epoch": 0.19, "grad_norm": 0.6072192764639288, "learning_rate": 1.8757255443978436e-06, "loss": 0.5229, "step": 1457 }, { "epoch": 0.19, "grad_norm": 0.8538674155270647, "learning_rate": 1.8755262330096548e-06, "loss": 0.6169, "step": 1458 }, { "epoch": 0.19, "grad_norm": 0.7914518981982325, "learning_rate": 1.8753267725284698e-06, "loss": 0.5698, "step": 1459 }, { "epoch": 0.19, "grad_norm": 0.6599571130296518, "learning_rate": 1.8751271629882548e-06, "loss": 0.5431, "step": 1460 }, { "epoch": 0.19, "grad_norm": 0.6823738125902976, "learning_rate": 1.8749274044230008e-06, "loss": 0.5907, "step": 1461 }, { "epoch": 0.19, "grad_norm": 0.7232856278055456, "learning_rate": 1.8747274968667246e-06, "loss": 0.6202, "step": 1462 }, { "epoch": 0.19, "grad_norm": 1.0068455656891342, "learning_rate": 1.8745274403534688e-06, "loss": 0.6749, "step": 1463 }, { "epoch": 0.19, "grad_norm": 0.6057762565423743, "learning_rate": 1.8743272349173004e-06, "loss": 0.5567, "step": 1464 }, { "epoch": 0.19, "grad_norm": 0.6185598676221526, "learning_rate": 1.8741268805923126e-06, "loss": 0.5663, "step": 1465 }, { "epoch": 0.19, "grad_norm": 0.8030462414919578, "learning_rate": 1.8739263774126237e-06, "loss": 0.595, "step": 1466 }, { "epoch": 0.19, "grad_norm": 0.8888718951191156, "learning_rate": 1.8737257254123771e-06, "loss": 0.648, "step": 1467 }, { "epoch": 0.19, "grad_norm": 1.4006107934925882, "learning_rate": 1.8735249246257421e-06, "loss": 0.6308, "step": 1468 }, { "epoch": 0.19, "grad_norm": 0.7135154310043672, "learning_rate": 1.8733239750869125e-06, "loss": 0.5917, "step": 1469 }, { "epoch": 0.19, "grad_norm": 0.782394831700157, "learning_rate": 1.8731228768301085e-06, "loss": 0.5976, "step": 1470 }, { "epoch": 0.19, "grad_norm": 0.6689222476452088, "learning_rate": 1.8729216298895744e-06, "loss": 0.5752, "step": 1471 }, { "epoch": 0.19, "grad_norm": 0.8826201579171571, "learning_rate": 1.872720234299581e-06, "loss": 0.6374, "step": 1472 }, { "epoch": 0.19, "grad_norm": 0.9010196246609141, "learning_rate": 1.8725186900944235e-06, "loss": 0.7016, "step": 1473 }, { "epoch": 0.19, "grad_norm": 0.9678219103782075, "learning_rate": 1.8723169973084229e-06, "loss": 0.691, "step": 1474 }, { "epoch": 0.19, "grad_norm": 0.822075764884198, "learning_rate": 1.8721151559759254e-06, "loss": 0.5876, "step": 1475 }, { "epoch": 0.19, "grad_norm": 0.8389367359319551, "learning_rate": 1.8719131661313025e-06, "loss": 0.6117, "step": 1476 }, { "epoch": 0.19, "grad_norm": 0.614925444113953, "learning_rate": 1.8717110278089509e-06, "loss": 0.484, "step": 1477 }, { "epoch": 0.19, "grad_norm": 0.8665390833131461, "learning_rate": 1.8715087410432927e-06, "loss": 0.5645, "step": 1478 }, { "epoch": 0.19, "grad_norm": 0.8261241258026107, "learning_rate": 1.8713063058687753e-06, "loss": 0.6678, "step": 1479 }, { "epoch": 0.19, "grad_norm": 0.7401957025562489, "learning_rate": 1.8711037223198711e-06, "loss": 0.6047, "step": 1480 }, { "epoch": 0.19, "grad_norm": 0.6207232410826065, "learning_rate": 1.8709009904310779e-06, "loss": 0.4907, "step": 1481 }, { "epoch": 0.19, "grad_norm": 0.7086837531272505, "learning_rate": 1.8706981102369192e-06, "loss": 0.5852, "step": 1482 }, { "epoch": 0.19, "grad_norm": 0.8126501151940565, "learning_rate": 1.8704950817719434e-06, "loss": 0.6094, "step": 1483 }, { "epoch": 0.19, "grad_norm": 0.92484605431923, "learning_rate": 1.8702919050707235e-06, "loss": 0.5841, "step": 1484 }, { "epoch": 0.19, "grad_norm": 0.761858247086483, "learning_rate": 1.8700885801678592e-06, "loss": 0.5544, "step": 1485 }, { "epoch": 0.19, "grad_norm": 0.91800126943183, "learning_rate": 1.869885107097974e-06, "loss": 0.648, "step": 1486 }, { "epoch": 0.19, "grad_norm": 0.7985310063862452, "learning_rate": 1.8696814858957176e-06, "loss": 0.5989, "step": 1487 }, { "epoch": 0.19, "grad_norm": 0.7129341482784679, "learning_rate": 1.8694777165957643e-06, "loss": 0.5124, "step": 1488 }, { "epoch": 0.19, "grad_norm": 0.7871653487663614, "learning_rate": 1.869273799232814e-06, "loss": 0.6334, "step": 1489 }, { "epoch": 0.19, "grad_norm": 0.8335509787552419, "learning_rate": 1.8690697338415919e-06, "loss": 0.6284, "step": 1490 }, { "epoch": 0.19, "grad_norm": 0.8424956207943637, "learning_rate": 1.8688655204568477e-06, "loss": 0.6148, "step": 1491 }, { "epoch": 0.19, "grad_norm": 0.9133961533385199, "learning_rate": 1.8686611591133574e-06, "loss": 0.6915, "step": 1492 }, { "epoch": 0.19, "grad_norm": 0.7936779004477725, "learning_rate": 1.8684566498459214e-06, "loss": 0.5274, "step": 1493 }, { "epoch": 0.19, "grad_norm": 0.6657406363979287, "learning_rate": 1.8682519926893653e-06, "loss": 0.6057, "step": 1494 }, { "epoch": 0.19, "grad_norm": 0.6808686452955117, "learning_rate": 1.8680471876785402e-06, "loss": 0.6002, "step": 1495 }, { "epoch": 0.19, "grad_norm": 0.6818550759296733, "learning_rate": 1.8678422348483226e-06, "loss": 0.599, "step": 1496 }, { "epoch": 0.19, "grad_norm": 0.8437556983559646, "learning_rate": 1.8676371342336133e-06, "loss": 0.6966, "step": 1497 }, { "epoch": 0.19, "grad_norm": 0.8137366309667806, "learning_rate": 1.867431885869339e-06, "loss": 0.6524, "step": 1498 }, { "epoch": 0.19, "grad_norm": 0.616784488141124, "learning_rate": 1.8672264897904513e-06, "loss": 0.572, "step": 1499 }, { "epoch": 0.19, "grad_norm": 0.9246619262601333, "learning_rate": 1.867020946031927e-06, "loss": 0.6364, "step": 1500 }, { "epoch": 0.19, "grad_norm": 0.704886930088419, "learning_rate": 1.8668152546287686e-06, "loss": 0.5452, "step": 1501 }, { "epoch": 0.19, "grad_norm": 0.9156452026597057, "learning_rate": 1.8666094156160022e-06, "loss": 0.6582, "step": 1502 }, { "epoch": 0.19, "grad_norm": 0.7206072647687064, "learning_rate": 1.866403429028681e-06, "loss": 0.5776, "step": 1503 }, { "epoch": 0.19, "grad_norm": 0.8498808736027476, "learning_rate": 1.8661972949018817e-06, "loss": 0.6199, "step": 1504 }, { "epoch": 0.19, "grad_norm": 0.7714233311502476, "learning_rate": 1.8659910132707072e-06, "loss": 0.5968, "step": 1505 }, { "epoch": 0.19, "grad_norm": 0.7529971436371257, "learning_rate": 1.8657845841702851e-06, "loss": 0.6058, "step": 1506 }, { "epoch": 0.19, "grad_norm": 0.9634732061273164, "learning_rate": 1.8655780076357676e-06, "loss": 0.6427, "step": 1507 }, { "epoch": 0.19, "grad_norm": 0.6611391815528458, "learning_rate": 1.8653712837023331e-06, "loss": 0.6008, "step": 1508 }, { "epoch": 0.19, "grad_norm": 0.6026054575525503, "learning_rate": 1.8651644124051846e-06, "loss": 0.5112, "step": 1509 }, { "epoch": 0.19, "grad_norm": 0.759700431303414, "learning_rate": 1.8649573937795497e-06, "loss": 0.5964, "step": 1510 }, { "epoch": 0.19, "grad_norm": 1.2853988612736746, "learning_rate": 1.8647502278606817e-06, "loss": 0.719, "step": 1511 }, { "epoch": 0.19, "grad_norm": 0.6856213266623999, "learning_rate": 1.864542914683859e-06, "loss": 0.6191, "step": 1512 }, { "epoch": 0.19, "grad_norm": 0.7615831952053184, "learning_rate": 1.8643354542843843e-06, "loss": 0.584, "step": 1513 }, { "epoch": 0.19, "grad_norm": 0.846341851913998, "learning_rate": 1.8641278466975866e-06, "loss": 0.6433, "step": 1514 }, { "epoch": 0.19, "grad_norm": 0.8215122282671824, "learning_rate": 1.8639200919588188e-06, "loss": 0.6283, "step": 1515 }, { "epoch": 0.19, "grad_norm": 0.795272875064585, "learning_rate": 1.8637121901034596e-06, "loss": 0.6254, "step": 1516 }, { "epoch": 0.19, "grad_norm": 0.7240585297503832, "learning_rate": 1.8635041411669128e-06, "loss": 0.5854, "step": 1517 }, { "epoch": 0.19, "grad_norm": 0.714945504217413, "learning_rate": 1.8632959451846064e-06, "loss": 0.5721, "step": 1518 }, { "epoch": 0.19, "grad_norm": 0.743579998599788, "learning_rate": 1.8630876021919945e-06, "loss": 0.5831, "step": 1519 }, { "epoch": 0.19, "grad_norm": 0.9240985924157026, "learning_rate": 1.8628791122245552e-06, "loss": 0.6851, "step": 1520 }, { "epoch": 0.19, "grad_norm": 0.8025818524376312, "learning_rate": 1.862670475317793e-06, "loss": 0.7181, "step": 1521 }, { "epoch": 0.19, "grad_norm": 0.8822210286432239, "learning_rate": 1.8624616915072357e-06, "loss": 0.7238, "step": 1522 }, { "epoch": 0.19, "grad_norm": 0.6700355783987793, "learning_rate": 1.8622527608284375e-06, "loss": 0.5768, "step": 1523 }, { "epoch": 0.19, "grad_norm": 0.7030711299139792, "learning_rate": 1.862043683316977e-06, "loss": 0.6145, "step": 1524 }, { "epoch": 0.19, "grad_norm": 0.7115454925875814, "learning_rate": 1.8618344590084581e-06, "loss": 0.6172, "step": 1525 }, { "epoch": 0.19, "grad_norm": 0.704657357650042, "learning_rate": 1.861625087938509e-06, "loss": 0.5725, "step": 1526 }, { "epoch": 0.19, "grad_norm": 0.6957507793091998, "learning_rate": 1.8614155701427842e-06, "loss": 0.5127, "step": 1527 }, { "epoch": 0.19, "grad_norm": 0.6130606203074686, "learning_rate": 1.8612059056569617e-06, "loss": 0.5121, "step": 1528 }, { "epoch": 0.19, "grad_norm": 0.9082709271088465, "learning_rate": 1.8609960945167457e-06, "loss": 0.5822, "step": 1529 }, { "epoch": 0.19, "grad_norm": 0.8234701937282133, "learning_rate": 1.860786136757864e-06, "loss": 0.6577, "step": 1530 }, { "epoch": 0.2, "grad_norm": 0.8457913960111877, "learning_rate": 1.8605760324160716e-06, "loss": 0.5995, "step": 1531 }, { "epoch": 0.2, "grad_norm": 0.842631693621379, "learning_rate": 1.860365781527146e-06, "loss": 0.6386, "step": 1532 }, { "epoch": 0.2, "grad_norm": 0.8178491529706742, "learning_rate": 1.8601553841268908e-06, "loss": 0.5644, "step": 1533 }, { "epoch": 0.2, "grad_norm": 0.7358100734343288, "learning_rate": 1.8599448402511349e-06, "loss": 0.5729, "step": 1534 }, { "epoch": 0.2, "grad_norm": 0.7461473102745995, "learning_rate": 1.8597341499357313e-06, "loss": 0.5649, "step": 1535 }, { "epoch": 0.2, "grad_norm": 0.724348912661096, "learning_rate": 1.859523313216559e-06, "loss": 0.5984, "step": 1536 }, { "epoch": 0.2, "grad_norm": 0.7011046106966219, "learning_rate": 1.8593123301295206e-06, "loss": 0.6444, "step": 1537 }, { "epoch": 0.2, "grad_norm": 0.7196170194623498, "learning_rate": 1.8591012007105446e-06, "loss": 0.5995, "step": 1538 }, { "epoch": 0.2, "grad_norm": 0.6903779304166274, "learning_rate": 1.8588899249955845e-06, "loss": 0.5809, "step": 1539 }, { "epoch": 0.2, "grad_norm": 0.7052761456163396, "learning_rate": 1.8586785030206178e-06, "loss": 0.5761, "step": 1540 }, { "epoch": 0.2, "grad_norm": 0.9335321038714185, "learning_rate": 1.8584669348216478e-06, "loss": 0.6777, "step": 1541 }, { "epoch": 0.2, "grad_norm": 0.8711748668844923, "learning_rate": 1.8582552204347022e-06, "loss": 0.6091, "step": 1542 }, { "epoch": 0.2, "grad_norm": 0.6871931266865535, "learning_rate": 1.8580433598958336e-06, "loss": 0.5659, "step": 1543 }, { "epoch": 0.2, "grad_norm": 0.8363310541937206, "learning_rate": 1.8578313532411201e-06, "loss": 0.6582, "step": 1544 }, { "epoch": 0.2, "grad_norm": 0.9181072818304548, "learning_rate": 1.8576192005066638e-06, "loss": 0.6524, "step": 1545 }, { "epoch": 0.2, "grad_norm": 0.7588096734473051, "learning_rate": 1.8574069017285923e-06, "loss": 0.6215, "step": 1546 }, { "epoch": 0.2, "grad_norm": 0.7000373063035533, "learning_rate": 1.8571944569430579e-06, "loss": 0.5948, "step": 1547 }, { "epoch": 0.2, "grad_norm": 0.639826043496221, "learning_rate": 1.8569818661862378e-06, "loss": 0.5206, "step": 1548 }, { "epoch": 0.2, "grad_norm": 0.6643999658948555, "learning_rate": 1.8567691294943338e-06, "loss": 0.5573, "step": 1549 }, { "epoch": 0.2, "grad_norm": 0.9411235801371197, "learning_rate": 1.8565562469035724e-06, "loss": 0.6485, "step": 1550 }, { "epoch": 0.2, "grad_norm": 0.890023205918796, "learning_rate": 1.856343218450206e-06, "loss": 0.6709, "step": 1551 }, { "epoch": 0.2, "grad_norm": 0.8699130101007695, "learning_rate": 1.856130044170511e-06, "loss": 0.5302, "step": 1552 }, { "epoch": 0.2, "grad_norm": 0.7681743423180455, "learning_rate": 1.8559167241007882e-06, "loss": 0.5833, "step": 1553 }, { "epoch": 0.2, "grad_norm": 0.7158332723218168, "learning_rate": 1.8557032582773644e-06, "loss": 0.6029, "step": 1554 }, { "epoch": 0.2, "grad_norm": 0.8991008720968421, "learning_rate": 1.8554896467365904e-06, "loss": 0.6147, "step": 1555 }, { "epoch": 0.2, "grad_norm": 0.6981369116680725, "learning_rate": 1.855275889514842e-06, "loss": 0.5879, "step": 1556 }, { "epoch": 0.2, "grad_norm": 0.7575680684905965, "learning_rate": 1.8550619866485198e-06, "loss": 0.6812, "step": 1557 }, { "epoch": 0.2, "grad_norm": 0.6210549294180444, "learning_rate": 1.854847938174049e-06, "loss": 0.6182, "step": 1558 }, { "epoch": 0.2, "grad_norm": 0.6113870208978385, "learning_rate": 1.8546337441278804e-06, "loss": 0.545, "step": 1559 }, { "epoch": 0.2, "grad_norm": 0.6891737176087768, "learning_rate": 1.8544194045464886e-06, "loss": 0.5571, "step": 1560 }, { "epoch": 0.2, "grad_norm": 0.7808280946484666, "learning_rate": 1.8542049194663733e-06, "loss": 0.629, "step": 1561 }, { "epoch": 0.2, "grad_norm": 0.8041643510081159, "learning_rate": 1.8539902889240592e-06, "loss": 0.6545, "step": 1562 }, { "epoch": 0.2, "grad_norm": 0.7271054027740419, "learning_rate": 1.853775512956096e-06, "loss": 0.5696, "step": 1563 }, { "epoch": 0.2, "grad_norm": 0.724937895189766, "learning_rate": 1.8535605915990575e-06, "loss": 0.5593, "step": 1564 }, { "epoch": 0.2, "grad_norm": 0.7125528779112926, "learning_rate": 1.8533455248895424e-06, "loss": 0.5653, "step": 1565 }, { "epoch": 0.2, "grad_norm": 0.8766515601495122, "learning_rate": 1.8531303128641747e-06, "loss": 0.5966, "step": 1566 }, { "epoch": 0.2, "grad_norm": 0.8201049937340776, "learning_rate": 1.8529149555596023e-06, "loss": 0.6521, "step": 1567 }, { "epoch": 0.2, "grad_norm": 0.6927285733566425, "learning_rate": 1.852699453012499e-06, "loss": 0.6279, "step": 1568 }, { "epoch": 0.2, "grad_norm": 0.8261624818313235, "learning_rate": 1.8524838052595615e-06, "loss": 0.5984, "step": 1569 }, { "epoch": 0.2, "grad_norm": 0.7491854715114691, "learning_rate": 1.8522680123375137e-06, "loss": 0.6153, "step": 1570 }, { "epoch": 0.2, "grad_norm": 0.8175076377427374, "learning_rate": 1.852052074283102e-06, "loss": 0.6289, "step": 1571 }, { "epoch": 0.2, "grad_norm": 0.8888378084094476, "learning_rate": 1.8518359911330985e-06, "loss": 0.5817, "step": 1572 }, { "epoch": 0.2, "grad_norm": 0.8494571473596824, "learning_rate": 1.8516197629243004e-06, "loss": 0.6519, "step": 1573 }, { "epoch": 0.2, "grad_norm": 0.8541460569904148, "learning_rate": 1.8514033896935289e-06, "loss": 0.5678, "step": 1574 }, { "epoch": 0.2, "grad_norm": 0.9843343628358772, "learning_rate": 1.8511868714776297e-06, "loss": 0.6382, "step": 1575 }, { "epoch": 0.2, "grad_norm": 0.6755790039685406, "learning_rate": 1.8509702083134736e-06, "loss": 0.5584, "step": 1576 }, { "epoch": 0.2, "grad_norm": 0.7788887557572672, "learning_rate": 1.850753400237957e-06, "loss": 0.5836, "step": 1577 }, { "epoch": 0.2, "grad_norm": 0.8598998733318207, "learning_rate": 1.8505364472879991e-06, "loss": 0.5706, "step": 1578 }, { "epoch": 0.2, "grad_norm": 0.8425294085246251, "learning_rate": 1.850319349500545e-06, "loss": 0.5908, "step": 1579 }, { "epoch": 0.2, "grad_norm": 0.7908951978455421, "learning_rate": 1.8501021069125646e-06, "loss": 0.7094, "step": 1580 }, { "epoch": 0.2, "grad_norm": 0.899689484122659, "learning_rate": 1.8498847195610513e-06, "loss": 0.6277, "step": 1581 }, { "epoch": 0.2, "grad_norm": 0.7636934706126499, "learning_rate": 1.8496671874830245e-06, "loss": 0.606, "step": 1582 }, { "epoch": 0.2, "grad_norm": 0.8536968776789445, "learning_rate": 1.8494495107155275e-06, "loss": 0.6591, "step": 1583 }, { "epoch": 0.2, "grad_norm": 0.9416638224501291, "learning_rate": 1.8492316892956281e-06, "loss": 0.6614, "step": 1584 }, { "epoch": 0.2, "grad_norm": 0.6546502213605624, "learning_rate": 1.8490137232604193e-06, "loss": 0.5608, "step": 1585 }, { "epoch": 0.2, "grad_norm": 0.7190647756823703, "learning_rate": 1.8487956126470183e-06, "loss": 0.5661, "step": 1586 }, { "epoch": 0.2, "grad_norm": 0.6849726006403514, "learning_rate": 1.8485773574925673e-06, "loss": 0.5645, "step": 1587 }, { "epoch": 0.2, "grad_norm": 0.6625476578229635, "learning_rate": 1.8483589578342328e-06, "loss": 0.5734, "step": 1588 }, { "epoch": 0.2, "grad_norm": 0.8173091134547499, "learning_rate": 1.8481404137092058e-06, "loss": 0.6444, "step": 1589 }, { "epoch": 0.2, "grad_norm": 0.7714196727777476, "learning_rate": 1.8479217251547024e-06, "loss": 0.5746, "step": 1590 }, { "epoch": 0.2, "grad_norm": 0.8704460499297837, "learning_rate": 1.8477028922079625e-06, "loss": 0.6517, "step": 1591 }, { "epoch": 0.2, "grad_norm": 0.8852174635292996, "learning_rate": 1.8474839149062515e-06, "loss": 0.647, "step": 1592 }, { "epoch": 0.2, "grad_norm": 0.71944605210508, "learning_rate": 1.8472647932868589e-06, "loss": 0.5802, "step": 1593 }, { "epoch": 0.2, "grad_norm": 0.6792289397359085, "learning_rate": 1.8470455273870984e-06, "loss": 0.5714, "step": 1594 }, { "epoch": 0.2, "grad_norm": 0.8619672738229428, "learning_rate": 1.8468261172443093e-06, "loss": 0.6432, "step": 1595 }, { "epoch": 0.2, "grad_norm": 0.6812147663616255, "learning_rate": 1.8466065628958548e-06, "loss": 0.5761, "step": 1596 }, { "epoch": 0.2, "grad_norm": 0.7189688282279665, "learning_rate": 1.8463868643791222e-06, "loss": 0.602, "step": 1597 }, { "epoch": 0.2, "grad_norm": 0.8268830126333812, "learning_rate": 1.8461670217315245e-06, "loss": 0.618, "step": 1598 }, { "epoch": 0.2, "grad_norm": 0.8163055792325702, "learning_rate": 1.8459470349904982e-06, "loss": 0.6359, "step": 1599 }, { "epoch": 0.2, "grad_norm": 0.8167488815846617, "learning_rate": 1.8457269041935046e-06, "loss": 0.6057, "step": 1600 }, { "epoch": 0.2, "grad_norm": 0.6130862347226241, "learning_rate": 1.8455066293780301e-06, "loss": 0.492, "step": 1601 }, { "epoch": 0.2, "grad_norm": 0.8797532647556527, "learning_rate": 1.8452862105815852e-06, "loss": 0.6898, "step": 1602 }, { "epoch": 0.2, "grad_norm": 0.8575619584769479, "learning_rate": 1.8450656478417046e-06, "loss": 0.6626, "step": 1603 }, { "epoch": 0.2, "grad_norm": 0.8842413009105622, "learning_rate": 1.844844941195948e-06, "loss": 0.6487, "step": 1604 }, { "epoch": 0.2, "grad_norm": 0.8671488568461713, "learning_rate": 1.8446240906818992e-06, "loss": 0.6336, "step": 1605 }, { "epoch": 0.2, "grad_norm": 0.8136743670512881, "learning_rate": 1.8444030963371673e-06, "loss": 0.6218, "step": 1606 }, { "epoch": 0.2, "grad_norm": 0.8026436637794968, "learning_rate": 1.8441819581993853e-06, "loss": 0.5952, "step": 1607 }, { "epoch": 0.2, "grad_norm": 0.6048654511049967, "learning_rate": 1.8439606763062102e-06, "loss": 0.5721, "step": 1608 }, { "epoch": 0.2, "grad_norm": 0.8577062998504724, "learning_rate": 1.843739250695324e-06, "loss": 0.6058, "step": 1609 }, { "epoch": 0.21, "grad_norm": 0.9576287252393064, "learning_rate": 1.8435176814044335e-06, "loss": 0.6244, "step": 1610 }, { "epoch": 0.21, "grad_norm": 0.8530261475714226, "learning_rate": 1.8432959684712696e-06, "loss": 0.625, "step": 1611 }, { "epoch": 0.21, "grad_norm": 0.6204601050428569, "learning_rate": 1.8430741119335878e-06, "loss": 0.552, "step": 1612 }, { "epoch": 0.21, "grad_norm": 0.6750004670512109, "learning_rate": 1.8428521118291677e-06, "loss": 0.5606, "step": 1613 }, { "epoch": 0.21, "grad_norm": 0.7177576521021638, "learning_rate": 1.8426299681958137e-06, "loss": 0.529, "step": 1614 }, { "epoch": 0.21, "grad_norm": 0.721978236216542, "learning_rate": 1.8424076810713546e-06, "loss": 0.6059, "step": 1615 }, { "epoch": 0.21, "grad_norm": 0.734533118829263, "learning_rate": 1.8421852504936438e-06, "loss": 0.5548, "step": 1616 }, { "epoch": 0.21, "grad_norm": 0.8390804804927139, "learning_rate": 1.8419626765005582e-06, "loss": 0.661, "step": 1617 }, { "epoch": 0.21, "grad_norm": 0.6626508871287348, "learning_rate": 1.8417399591300006e-06, "loss": 0.5709, "step": 1618 }, { "epoch": 0.21, "grad_norm": 0.8192595083432876, "learning_rate": 1.8415170984198972e-06, "loss": 0.6367, "step": 1619 }, { "epoch": 0.21, "grad_norm": 0.6564226787446984, "learning_rate": 1.8412940944081985e-06, "loss": 0.5851, "step": 1620 }, { "epoch": 0.21, "grad_norm": 0.7411693365720023, "learning_rate": 1.8410709471328802e-06, "loss": 0.5881, "step": 1621 }, { "epoch": 0.21, "grad_norm": 0.6385309303484951, "learning_rate": 1.8408476566319417e-06, "loss": 0.568, "step": 1622 }, { "epoch": 0.21, "grad_norm": 0.9445621766163685, "learning_rate": 1.8406242229434077e-06, "loss": 0.6242, "step": 1623 }, { "epoch": 0.21, "grad_norm": 0.69876762095836, "learning_rate": 1.8404006461053254e-06, "loss": 0.5812, "step": 1624 }, { "epoch": 0.21, "grad_norm": 0.8081198671482689, "learning_rate": 1.8401769261557689e-06, "loss": 0.5827, "step": 1625 }, { "epoch": 0.21, "grad_norm": 1.0521242807327829, "learning_rate": 1.8399530631328343e-06, "loss": 0.6856, "step": 1626 }, { "epoch": 0.21, "grad_norm": 0.8597917404694845, "learning_rate": 1.8397290570746437e-06, "loss": 0.686, "step": 1627 }, { "epoch": 0.21, "grad_norm": 0.6759553242308229, "learning_rate": 1.839504908019343e-06, "loss": 0.535, "step": 1628 }, { "epoch": 0.21, "grad_norm": 0.6898034013754697, "learning_rate": 1.8392806160051024e-06, "loss": 0.5055, "step": 1629 }, { "epoch": 0.21, "grad_norm": 0.7590441117950109, "learning_rate": 1.8390561810701166e-06, "loss": 0.5686, "step": 1630 }, { "epoch": 0.21, "grad_norm": 0.8738872756606386, "learning_rate": 1.8388316032526041e-06, "loss": 0.5802, "step": 1631 }, { "epoch": 0.21, "grad_norm": 0.7929798309793638, "learning_rate": 1.8386068825908087e-06, "loss": 0.6955, "step": 1632 }, { "epoch": 0.21, "grad_norm": 1.1056654293640331, "learning_rate": 1.8383820191229976e-06, "loss": 0.626, "step": 1633 }, { "epoch": 0.21, "grad_norm": 0.7657061595178817, "learning_rate": 1.8381570128874628e-06, "loss": 0.6203, "step": 1634 }, { "epoch": 0.21, "grad_norm": 0.6328234068891088, "learning_rate": 1.8379318639225208e-06, "loss": 0.5859, "step": 1635 }, { "epoch": 0.21, "grad_norm": 0.7256429380307872, "learning_rate": 1.8377065722665116e-06, "loss": 0.5432, "step": 1636 }, { "epoch": 0.21, "grad_norm": 0.7145570773435055, "learning_rate": 1.8374811379578004e-06, "loss": 0.5923, "step": 1637 }, { "epoch": 0.21, "grad_norm": 0.873654850708191, "learning_rate": 1.8372555610347762e-06, "loss": 0.6396, "step": 1638 }, { "epoch": 0.21, "grad_norm": 0.83740763688326, "learning_rate": 1.8370298415358524e-06, "loss": 0.6106, "step": 1639 }, { "epoch": 0.21, "grad_norm": 0.9507709414012546, "learning_rate": 1.836803979499467e-06, "loss": 0.5879, "step": 1640 }, { "epoch": 0.21, "grad_norm": 0.7162572026325102, "learning_rate": 1.836577974964081e-06, "loss": 0.5486, "step": 1641 }, { "epoch": 0.21, "grad_norm": 0.8452709874405113, "learning_rate": 1.836351827968182e-06, "loss": 0.6542, "step": 1642 }, { "epoch": 0.21, "grad_norm": 0.6893632087623576, "learning_rate": 1.8361255385502794e-06, "loss": 0.6106, "step": 1643 }, { "epoch": 0.21, "grad_norm": 0.6506495065444791, "learning_rate": 1.8358991067489082e-06, "loss": 0.5207, "step": 1644 }, { "epoch": 0.21, "grad_norm": 0.8825450209886359, "learning_rate": 1.8356725326026275e-06, "loss": 0.6257, "step": 1645 }, { "epoch": 0.21, "grad_norm": 0.7559691618728528, "learning_rate": 1.8354458161500206e-06, "loss": 0.6567, "step": 1646 }, { "epoch": 0.21, "grad_norm": 0.6704168487885038, "learning_rate": 1.8352189574296948e-06, "loss": 0.5112, "step": 1647 }, { "epoch": 0.21, "grad_norm": 1.6894047726274033, "learning_rate": 1.8349919564802815e-06, "loss": 0.6116, "step": 1648 }, { "epoch": 0.21, "grad_norm": 0.6948196917908928, "learning_rate": 1.8347648133404375e-06, "loss": 0.5751, "step": 1649 }, { "epoch": 0.21, "grad_norm": 0.692581434834775, "learning_rate": 1.8345375280488418e-06, "loss": 0.5461, "step": 1650 }, { "epoch": 0.21, "grad_norm": 0.8194038517587576, "learning_rate": 1.8343101006441994e-06, "loss": 0.6431, "step": 1651 }, { "epoch": 0.21, "grad_norm": 0.6407235700782669, "learning_rate": 1.8340825311652383e-06, "loss": 0.5589, "step": 1652 }, { "epoch": 0.21, "grad_norm": 0.8468378631389253, "learning_rate": 1.8338548196507121e-06, "loss": 0.6439, "step": 1653 }, { "epoch": 0.21, "grad_norm": 0.8734794123389028, "learning_rate": 1.833626966139397e-06, "loss": 0.6749, "step": 1654 }, { "epoch": 0.21, "grad_norm": 0.8013533866339752, "learning_rate": 1.8333989706700939e-06, "loss": 0.6677, "step": 1655 }, { "epoch": 0.21, "grad_norm": 0.5928042213781552, "learning_rate": 1.8331708332816288e-06, "loss": 0.5278, "step": 1656 }, { "epoch": 0.21, "grad_norm": 0.8144997164678949, "learning_rate": 1.8329425540128504e-06, "loss": 0.6359, "step": 1657 }, { "epoch": 0.21, "grad_norm": 0.977124050802364, "learning_rate": 1.8327141329026327e-06, "loss": 0.6846, "step": 1658 }, { "epoch": 0.21, "grad_norm": 1.0087841667824307, "learning_rate": 1.8324855699898733e-06, "loss": 0.6619, "step": 1659 }, { "epoch": 0.21, "grad_norm": 0.8986226558281747, "learning_rate": 1.8322568653134942e-06, "loss": 0.6628, "step": 1660 }, { "epoch": 0.21, "grad_norm": 0.7921554826764078, "learning_rate": 1.8320280189124412e-06, "loss": 0.5922, "step": 1661 }, { "epoch": 0.21, "grad_norm": 0.5876378741089907, "learning_rate": 1.8317990308256848e-06, "loss": 0.5087, "step": 1662 }, { "epoch": 0.21, "grad_norm": 0.7370933222467385, "learning_rate": 1.8315699010922187e-06, "loss": 0.5453, "step": 1663 }, { "epoch": 0.21, "grad_norm": 0.6018550117834901, "learning_rate": 1.8313406297510618e-06, "loss": 0.5572, "step": 1664 }, { "epoch": 0.21, "grad_norm": 0.7644657865460001, "learning_rate": 1.8311112168412568e-06, "loss": 0.6237, "step": 1665 }, { "epoch": 0.21, "grad_norm": 0.5795405957249056, "learning_rate": 1.83088166240187e-06, "loss": 0.5211, "step": 1666 }, { "epoch": 0.21, "grad_norm": 0.963861691677456, "learning_rate": 1.8306519664719917e-06, "loss": 0.6206, "step": 1667 }, { "epoch": 0.21, "grad_norm": 7.215607269140423, "learning_rate": 1.8304221290907376e-06, "loss": 0.624, "step": 1668 }, { "epoch": 0.21, "grad_norm": 0.6081156974487874, "learning_rate": 1.8301921502972459e-06, "loss": 0.5061, "step": 1669 }, { "epoch": 0.21, "grad_norm": 0.8615416937711721, "learning_rate": 1.82996203013068e-06, "loss": 0.6891, "step": 1670 }, { "epoch": 0.21, "grad_norm": 0.6322853460124794, "learning_rate": 1.8297317686302269e-06, "loss": 0.5688, "step": 1671 }, { "epoch": 0.21, "grad_norm": 0.8375491865368652, "learning_rate": 1.8295013658350978e-06, "loss": 0.6053, "step": 1672 }, { "epoch": 0.21, "grad_norm": 0.6625210088498944, "learning_rate": 1.8292708217845276e-06, "loss": 0.5334, "step": 1673 }, { "epoch": 0.21, "grad_norm": 0.9419629383492433, "learning_rate": 1.8290401365177757e-06, "loss": 0.5954, "step": 1674 }, { "epoch": 0.21, "grad_norm": 0.8105340052486486, "learning_rate": 1.8288093100741256e-06, "loss": 0.5891, "step": 1675 }, { "epoch": 0.21, "grad_norm": 0.6243193170773744, "learning_rate": 1.8285783424928842e-06, "loss": 0.5527, "step": 1676 }, { "epoch": 0.21, "grad_norm": 0.6761909696830369, "learning_rate": 1.8283472338133831e-06, "loss": 0.5493, "step": 1677 }, { "epoch": 0.21, "grad_norm": 0.6400055123021755, "learning_rate": 1.8281159840749782e-06, "loss": 0.5792, "step": 1678 }, { "epoch": 0.21, "grad_norm": 0.9514989781518169, "learning_rate": 1.827884593317048e-06, "loss": 0.6216, "step": 1679 }, { "epoch": 0.21, "grad_norm": 0.704368678874323, "learning_rate": 1.8276530615789968e-06, "loss": 0.5719, "step": 1680 }, { "epoch": 0.21, "grad_norm": 0.6376935677805395, "learning_rate": 1.8274213889002513e-06, "loss": 0.5404, "step": 1681 }, { "epoch": 0.21, "grad_norm": 0.7016259978977398, "learning_rate": 1.8271895753202636e-06, "loss": 0.585, "step": 1682 }, { "epoch": 0.21, "grad_norm": 0.667379014621767, "learning_rate": 1.8269576208785087e-06, "loss": 0.5768, "step": 1683 }, { "epoch": 0.21, "grad_norm": 0.7169820387837189, "learning_rate": 1.826725525614486e-06, "loss": 0.5238, "step": 1684 }, { "epoch": 0.21, "grad_norm": 0.8535744437583227, "learning_rate": 1.8264932895677193e-06, "loss": 0.6208, "step": 1685 }, { "epoch": 0.21, "grad_norm": 0.759325381467501, "learning_rate": 1.8262609127777555e-06, "loss": 0.5592, "step": 1686 }, { "epoch": 0.21, "grad_norm": 0.6867251456947394, "learning_rate": 1.8260283952841662e-06, "loss": 0.5821, "step": 1687 }, { "epoch": 0.22, "grad_norm": 0.6844502400655375, "learning_rate": 1.8257957371265466e-06, "loss": 0.5078, "step": 1688 }, { "epoch": 0.22, "grad_norm": 1.0501574860131644, "learning_rate": 1.8255629383445164e-06, "loss": 0.6296, "step": 1689 }, { "epoch": 0.22, "grad_norm": 0.7359696825495957, "learning_rate": 1.8253299989777182e-06, "loss": 0.6298, "step": 1690 }, { "epoch": 0.22, "grad_norm": 0.5944791727806555, "learning_rate": 1.8250969190658194e-06, "loss": 0.5334, "step": 1691 }, { "epoch": 0.22, "grad_norm": 0.7086113176730976, "learning_rate": 1.824863698648511e-06, "loss": 0.5977, "step": 1692 }, { "epoch": 0.22, "grad_norm": 0.6682657789934663, "learning_rate": 1.8246303377655083e-06, "loss": 0.5774, "step": 1693 }, { "epoch": 0.22, "grad_norm": 0.7311237871921148, "learning_rate": 1.82439683645655e-06, "loss": 0.5557, "step": 1694 }, { "epoch": 0.22, "grad_norm": 0.9351573993722992, "learning_rate": 1.824163194761399e-06, "loss": 0.6239, "step": 1695 }, { "epoch": 0.22, "grad_norm": 1.0573278920230318, "learning_rate": 1.8239294127198416e-06, "loss": 0.6899, "step": 1696 }, { "epoch": 0.22, "grad_norm": 0.8344512612404636, "learning_rate": 1.8236954903716892e-06, "loss": 0.5651, "step": 1697 }, { "epoch": 0.22, "grad_norm": 0.6888017534953439, "learning_rate": 1.823461427756776e-06, "loss": 0.533, "step": 1698 }, { "epoch": 0.22, "grad_norm": 0.7732419714499459, "learning_rate": 1.8232272249149604e-06, "loss": 0.616, "step": 1699 }, { "epoch": 0.22, "grad_norm": 0.8433822631234789, "learning_rate": 1.8229928818861244e-06, "loss": 0.6228, "step": 1700 }, { "epoch": 0.22, "grad_norm": 0.9359366082002993, "learning_rate": 1.8227583987101749e-06, "loss": 0.6523, "step": 1701 }, { "epoch": 0.22, "grad_norm": 1.2195359312216238, "learning_rate": 1.8225237754270413e-06, "loss": 0.6511, "step": 1702 }, { "epoch": 0.22, "grad_norm": 0.717747879583717, "learning_rate": 1.822289012076678e-06, "loss": 0.5978, "step": 1703 }, { "epoch": 0.22, "grad_norm": 0.5640604308698282, "learning_rate": 1.822054108699062e-06, "loss": 0.4917, "step": 1704 }, { "epoch": 0.22, "grad_norm": 0.8031217560664929, "learning_rate": 1.8218190653341959e-06, "loss": 0.6298, "step": 1705 }, { "epoch": 0.22, "grad_norm": 0.5787144426985771, "learning_rate": 1.8215838820221044e-06, "loss": 0.5319, "step": 1706 }, { "epoch": 0.22, "grad_norm": 0.8937066735404819, "learning_rate": 1.8213485588028367e-06, "loss": 0.6945, "step": 1707 }, { "epoch": 0.22, "grad_norm": 0.6863783673075151, "learning_rate": 1.8211130957164666e-06, "loss": 0.5567, "step": 1708 }, { "epoch": 0.22, "grad_norm": 0.7265154892039355, "learning_rate": 1.8208774928030905e-06, "loss": 0.5754, "step": 1709 }, { "epoch": 0.22, "grad_norm": 0.772943659476795, "learning_rate": 1.8206417501028292e-06, "loss": 0.6054, "step": 1710 }, { "epoch": 0.22, "grad_norm": 0.9176846667592462, "learning_rate": 1.820405867655827e-06, "loss": 0.6722, "step": 1711 }, { "epoch": 0.22, "grad_norm": 1.5169606561675282, "learning_rate": 1.820169845502253e-06, "loss": 0.5989, "step": 1712 }, { "epoch": 0.22, "grad_norm": 0.9321053736372229, "learning_rate": 1.8199336836822984e-06, "loss": 0.6108, "step": 1713 }, { "epoch": 0.22, "grad_norm": 0.6613246859591668, "learning_rate": 1.8196973822361796e-06, "loss": 0.5643, "step": 1714 }, { "epoch": 0.22, "grad_norm": 0.8621861741978805, "learning_rate": 1.8194609412041362e-06, "loss": 0.6714, "step": 1715 }, { "epoch": 0.22, "grad_norm": 0.7322492213774321, "learning_rate": 1.8192243606264314e-06, "loss": 0.5575, "step": 1716 }, { "epoch": 0.22, "grad_norm": 0.6400037426835998, "learning_rate": 1.8189876405433527e-06, "loss": 0.5424, "step": 1717 }, { "epoch": 0.22, "grad_norm": 0.7130718263249135, "learning_rate": 1.818750780995211e-06, "loss": 0.5323, "step": 1718 }, { "epoch": 0.22, "grad_norm": 0.7786314679117163, "learning_rate": 1.8185137820223408e-06, "loss": 0.6134, "step": 1719 }, { "epoch": 0.22, "grad_norm": 0.863903543277782, "learning_rate": 1.818276643665101e-06, "loss": 0.6036, "step": 1720 }, { "epoch": 0.22, "grad_norm": 0.7695356755769173, "learning_rate": 1.8180393659638733e-06, "loss": 0.5403, "step": 1721 }, { "epoch": 0.22, "grad_norm": 0.7961285574841794, "learning_rate": 1.817801948959064e-06, "loss": 0.6411, "step": 1722 }, { "epoch": 0.22, "grad_norm": 0.6933802464700348, "learning_rate": 1.8175643926911026e-06, "loss": 0.5944, "step": 1723 }, { "epoch": 0.22, "grad_norm": 0.7414849206408384, "learning_rate": 1.8173266972004424e-06, "loss": 0.5961, "step": 1724 }, { "epoch": 0.22, "grad_norm": 0.6705604842210674, "learning_rate": 1.8170888625275605e-06, "loss": 0.5409, "step": 1725 }, { "epoch": 0.22, "grad_norm": 0.6495933271204001, "learning_rate": 1.8168508887129576e-06, "loss": 0.5894, "step": 1726 }, { "epoch": 0.22, "grad_norm": 0.9815922397408414, "learning_rate": 1.8166127757971583e-06, "loss": 0.6456, "step": 1727 }, { "epoch": 0.22, "grad_norm": 0.8000133973683352, "learning_rate": 1.8163745238207109e-06, "loss": 0.6337, "step": 1728 }, { "epoch": 0.22, "grad_norm": 0.8675632344121847, "learning_rate": 1.8161361328241868e-06, "loss": 0.5925, "step": 1729 }, { "epoch": 0.22, "grad_norm": 0.6320894598546554, "learning_rate": 1.8158976028481816e-06, "loss": 0.5524, "step": 1730 }, { "epoch": 0.22, "grad_norm": 0.7189990178330681, "learning_rate": 1.815658933933315e-06, "loss": 0.585, "step": 1731 }, { "epoch": 0.22, "grad_norm": 0.7963348748625861, "learning_rate": 1.815420126120229e-06, "loss": 0.5724, "step": 1732 }, { "epoch": 0.22, "grad_norm": 0.7083215336159754, "learning_rate": 1.815181179449591e-06, "loss": 0.5717, "step": 1733 }, { "epoch": 0.22, "grad_norm": 0.8559419765520545, "learning_rate": 1.8149420939620904e-06, "loss": 0.5772, "step": 1734 }, { "epoch": 0.22, "grad_norm": 0.7659617275875781, "learning_rate": 1.8147028696984408e-06, "loss": 0.5821, "step": 1735 }, { "epoch": 0.22, "grad_norm": 0.6793268043214676, "learning_rate": 1.8144635066993806e-06, "loss": 0.5852, "step": 1736 }, { "epoch": 0.22, "grad_norm": 0.8470579173275415, "learning_rate": 1.81422400500567e-06, "loss": 0.6695, "step": 1737 }, { "epoch": 0.22, "grad_norm": 0.7237771145517838, "learning_rate": 1.8139843646580937e-06, "loss": 0.5823, "step": 1738 }, { "epoch": 0.22, "grad_norm": 0.7518205025159073, "learning_rate": 1.8137445856974602e-06, "loss": 0.5894, "step": 1739 }, { "epoch": 0.22, "grad_norm": 0.609674305048156, "learning_rate": 1.8135046681646014e-06, "loss": 0.5869, "step": 1740 }, { "epoch": 0.22, "grad_norm": 0.8015002984003656, "learning_rate": 1.8132646121003728e-06, "loss": 0.6024, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.9621602535567867, "learning_rate": 1.8130244175456531e-06, "loss": 0.7251, "step": 1742 }, { "epoch": 0.22, "grad_norm": 0.6922865833202008, "learning_rate": 1.812784084541345e-06, "loss": 0.5429, "step": 1743 }, { "epoch": 0.22, "grad_norm": 0.7716825465299049, "learning_rate": 1.812543613128375e-06, "loss": 0.6176, "step": 1744 }, { "epoch": 0.22, "grad_norm": 0.7450223219492245, "learning_rate": 1.8123030033476924e-06, "loss": 0.6246, "step": 1745 }, { "epoch": 0.22, "grad_norm": 0.719050302690064, "learning_rate": 1.812062255240271e-06, "loss": 0.5622, "step": 1746 }, { "epoch": 0.22, "grad_norm": 0.7811080873572938, "learning_rate": 1.8118213688471077e-06, "loss": 0.6673, "step": 1747 }, { "epoch": 0.22, "grad_norm": 0.7601059733631147, "learning_rate": 1.8115803442092226e-06, "loss": 0.5587, "step": 1748 }, { "epoch": 0.22, "grad_norm": 1.0545204832270898, "learning_rate": 1.8113391813676602e-06, "loss": 0.648, "step": 1749 }, { "epoch": 0.22, "grad_norm": 1.4159443131826095, "learning_rate": 1.8110978803634873e-06, "loss": 0.6413, "step": 1750 }, { "epoch": 0.22, "grad_norm": 0.7753009876390837, "learning_rate": 1.8108564412377954e-06, "loss": 0.6135, "step": 1751 }, { "epoch": 0.22, "grad_norm": 0.6402930474897482, "learning_rate": 1.8106148640316991e-06, "loss": 0.5219, "step": 1752 }, { "epoch": 0.22, "grad_norm": 0.7184571126578188, "learning_rate": 1.8103731487863363e-06, "loss": 0.5541, "step": 1753 }, { "epoch": 0.22, "grad_norm": 0.8164535530610884, "learning_rate": 1.8101312955428689e-06, "loss": 0.7062, "step": 1754 }, { "epoch": 0.22, "grad_norm": 0.7036791649722098, "learning_rate": 1.8098893043424814e-06, "loss": 0.5044, "step": 1755 }, { "epoch": 0.22, "grad_norm": 0.8196196271565832, "learning_rate": 1.809647175226383e-06, "loss": 0.5597, "step": 1756 }, { "epoch": 0.22, "grad_norm": 0.9076930818021154, "learning_rate": 1.8094049082358053e-06, "loss": 0.6464, "step": 1757 }, { "epoch": 0.22, "grad_norm": 1.3252900544878141, "learning_rate": 1.8091625034120042e-06, "loss": 0.6324, "step": 1758 }, { "epoch": 0.22, "grad_norm": 0.8820197688188727, "learning_rate": 1.8089199607962586e-06, "loss": 0.6482, "step": 1759 }, { "epoch": 0.22, "grad_norm": 0.9436784053090133, "learning_rate": 1.8086772804298706e-06, "loss": 0.6262, "step": 1760 }, { "epoch": 0.22, "grad_norm": 0.8388288289573467, "learning_rate": 1.808434462354167e-06, "loss": 0.6119, "step": 1761 }, { "epoch": 0.22, "grad_norm": 0.7436074536410334, "learning_rate": 1.8081915066104965e-06, "loss": 0.6424, "step": 1762 }, { "epoch": 0.22, "grad_norm": 0.6391458944845819, "learning_rate": 1.807948413240232e-06, "loss": 0.5543, "step": 1763 }, { "epoch": 0.22, "grad_norm": 0.6616441871034829, "learning_rate": 1.8077051822847697e-06, "loss": 0.5654, "step": 1764 }, { "epoch": 0.22, "grad_norm": 0.7153939696029903, "learning_rate": 1.8074618137855298e-06, "loss": 0.5288, "step": 1765 }, { "epoch": 0.22, "grad_norm": 0.7166501300969018, "learning_rate": 1.8072183077839549e-06, "loss": 0.5613, "step": 1766 }, { "epoch": 0.23, "grad_norm": 0.9375966353132854, "learning_rate": 1.8069746643215117e-06, "loss": 0.6596, "step": 1767 }, { "epoch": 0.23, "grad_norm": 0.6168753495415233, "learning_rate": 1.8067308834396903e-06, "loss": 0.5634, "step": 1768 }, { "epoch": 0.23, "grad_norm": 0.793592030923942, "learning_rate": 1.8064869651800038e-06, "loss": 0.5713, "step": 1769 }, { "epoch": 0.23, "grad_norm": 0.7221843069016274, "learning_rate": 1.806242909583989e-06, "loss": 0.5747, "step": 1770 }, { "epoch": 0.23, "grad_norm": 0.754349000250631, "learning_rate": 1.8059987166932062e-06, "loss": 0.5353, "step": 1771 }, { "epoch": 0.23, "grad_norm": 0.7377007516511078, "learning_rate": 1.8057543865492389e-06, "loss": 0.5858, "step": 1772 }, { "epoch": 0.23, "grad_norm": 0.8914179174487533, "learning_rate": 1.8055099191936938e-06, "loss": 0.6039, "step": 1773 }, { "epoch": 0.23, "grad_norm": 0.8358315477250097, "learning_rate": 1.805265314668201e-06, "loss": 0.612, "step": 1774 }, { "epoch": 0.23, "grad_norm": 0.6752937680619854, "learning_rate": 1.8050205730144144e-06, "loss": 0.5876, "step": 1775 }, { "epoch": 0.23, "grad_norm": 0.9635903635095093, "learning_rate": 1.804775694274011e-06, "loss": 0.6358, "step": 1776 }, { "epoch": 0.23, "grad_norm": 1.1013710186720425, "learning_rate": 1.8045306784886907e-06, "loss": 0.6133, "step": 1777 }, { "epoch": 0.23, "grad_norm": 0.8217142853232547, "learning_rate": 1.8042855257001777e-06, "loss": 0.6286, "step": 1778 }, { "epoch": 0.23, "grad_norm": 0.7735564763486008, "learning_rate": 1.8040402359502185e-06, "loss": 0.6093, "step": 1779 }, { "epoch": 0.23, "grad_norm": 0.6718518428706335, "learning_rate": 1.8037948092805834e-06, "loss": 0.546, "step": 1780 }, { "epoch": 0.23, "grad_norm": 0.6967613842264047, "learning_rate": 1.8035492457330663e-06, "loss": 0.5527, "step": 1781 }, { "epoch": 0.23, "grad_norm": 0.5857458394611412, "learning_rate": 1.8033035453494839e-06, "loss": 0.5344, "step": 1782 }, { "epoch": 0.23, "grad_norm": 0.7777387049701806, "learning_rate": 1.8030577081716767e-06, "loss": 0.6168, "step": 1783 }, { "epoch": 0.23, "grad_norm": 0.7292091758705487, "learning_rate": 1.8028117342415076e-06, "loss": 0.5525, "step": 1784 }, { "epoch": 0.23, "grad_norm": 0.8399172280979899, "learning_rate": 1.8025656236008642e-06, "loss": 0.6659, "step": 1785 }, { "epoch": 0.23, "grad_norm": 0.7873550511642023, "learning_rate": 1.802319376291656e-06, "loss": 0.5645, "step": 1786 }, { "epoch": 0.23, "grad_norm": 0.7011668376376946, "learning_rate": 1.8020729923558165e-06, "loss": 0.6104, "step": 1787 }, { "epoch": 0.23, "grad_norm": 0.6575708396640998, "learning_rate": 1.8018264718353024e-06, "loss": 0.5711, "step": 1788 }, { "epoch": 0.23, "grad_norm": 1.075276911909434, "learning_rate": 1.8015798147720935e-06, "loss": 0.6481, "step": 1789 }, { "epoch": 0.23, "grad_norm": 0.6511075333869601, "learning_rate": 1.801333021208193e-06, "loss": 0.5462, "step": 1790 }, { "epoch": 0.23, "grad_norm": 0.7116117749272244, "learning_rate": 1.8010860911856271e-06, "loss": 0.5712, "step": 1791 }, { "epoch": 0.23, "grad_norm": 0.7749589141582869, "learning_rate": 1.8008390247464455e-06, "loss": 0.6493, "step": 1792 }, { "epoch": 0.23, "grad_norm": 0.5759491996597453, "learning_rate": 1.8005918219327212e-06, "loss": 0.4923, "step": 1793 }, { "epoch": 0.23, "grad_norm": 0.709414032631128, "learning_rate": 1.8003444827865497e-06, "loss": 0.5401, "step": 1794 }, { "epoch": 0.23, "grad_norm": 0.7270460076709832, "learning_rate": 1.8000970073500512e-06, "loss": 0.5872, "step": 1795 }, { "epoch": 0.23, "grad_norm": 0.7303684683271232, "learning_rate": 1.7998493956653675e-06, "loss": 0.5505, "step": 1796 }, { "epoch": 0.23, "grad_norm": 0.6632609688806054, "learning_rate": 1.7996016477746644e-06, "loss": 0.5627, "step": 1797 }, { "epoch": 0.23, "grad_norm": 0.6716472636542259, "learning_rate": 1.7993537637201313e-06, "loss": 0.5347, "step": 1798 }, { "epoch": 0.23, "grad_norm": 0.8940057563993772, "learning_rate": 1.7991057435439796e-06, "loss": 0.6268, "step": 1799 }, { "epoch": 0.23, "grad_norm": 0.9901882066021813, "learning_rate": 1.7988575872884448e-06, "loss": 0.668, "step": 1800 }, { "epoch": 0.23, "grad_norm": 0.7736175987787409, "learning_rate": 1.7986092949957851e-06, "loss": 0.6248, "step": 1801 }, { "epoch": 0.23, "grad_norm": 0.6297204859594099, "learning_rate": 1.7983608667082825e-06, "loss": 0.5351, "step": 1802 }, { "epoch": 0.23, "grad_norm": 1.0389765068198484, "learning_rate": 1.7981123024682417e-06, "loss": 0.6717, "step": 1803 }, { "epoch": 0.23, "grad_norm": 0.6310245126822918, "learning_rate": 1.7978636023179902e-06, "loss": 0.5346, "step": 1804 }, { "epoch": 0.23, "grad_norm": 0.8822144262115715, "learning_rate": 1.7976147662998795e-06, "loss": 0.6763, "step": 1805 }, { "epoch": 0.23, "grad_norm": 0.8367915734638388, "learning_rate": 1.7973657944562835e-06, "loss": 0.5568, "step": 1806 }, { "epoch": 0.23, "grad_norm": 0.6700320410292668, "learning_rate": 1.7971166868295996e-06, "loss": 0.5901, "step": 1807 }, { "epoch": 0.23, "grad_norm": 0.8039083210573608, "learning_rate": 1.7968674434622484e-06, "loss": 0.6085, "step": 1808 }, { "epoch": 0.23, "grad_norm": 0.7332711376306712, "learning_rate": 1.7966180643966731e-06, "loss": 0.5897, "step": 1809 }, { "epoch": 0.23, "grad_norm": 0.8017395546653407, "learning_rate": 1.7963685496753404e-06, "loss": 0.623, "step": 1810 }, { "epoch": 0.23, "grad_norm": 0.9731423940169147, "learning_rate": 1.7961188993407405e-06, "loss": 0.7114, "step": 1811 }, { "epoch": 0.23, "grad_norm": 0.9136533455331627, "learning_rate": 1.7958691134353859e-06, "loss": 0.6612, "step": 1812 }, { "epoch": 0.23, "grad_norm": 0.6824813024463036, "learning_rate": 1.7956191920018124e-06, "loss": 0.5442, "step": 1813 }, { "epoch": 0.23, "grad_norm": 0.6547783313914429, "learning_rate": 1.7953691350825793e-06, "loss": 0.5869, "step": 1814 }, { "epoch": 0.23, "grad_norm": 0.9107461437175577, "learning_rate": 1.7951189427202686e-06, "loss": 0.6044, "step": 1815 }, { "epoch": 0.23, "grad_norm": 0.9049523623551637, "learning_rate": 1.7948686149574854e-06, "loss": 0.6302, "step": 1816 }, { "epoch": 0.23, "grad_norm": 0.6533488673986121, "learning_rate": 1.7946181518368582e-06, "loss": 0.5485, "step": 1817 }, { "epoch": 0.23, "grad_norm": 0.7732942418058559, "learning_rate": 1.7943675534010376e-06, "loss": 0.6102, "step": 1818 }, { "epoch": 0.23, "grad_norm": 0.7161350974579639, "learning_rate": 1.7941168196926987e-06, "loss": 0.582, "step": 1819 }, { "epoch": 0.23, "grad_norm": 0.7637448549180914, "learning_rate": 1.7938659507545385e-06, "loss": 0.5291, "step": 1820 }, { "epoch": 0.23, "grad_norm": 0.8450502837249684, "learning_rate": 1.793614946629277e-06, "loss": 0.6706, "step": 1821 }, { "epoch": 0.23, "grad_norm": 0.874345592622125, "learning_rate": 1.793363807359658e-06, "loss": 0.6393, "step": 1822 }, { "epoch": 0.23, "grad_norm": 0.8434333773652687, "learning_rate": 1.7931125329884478e-06, "loss": 0.6015, "step": 1823 }, { "epoch": 0.23, "grad_norm": 0.6753504198295516, "learning_rate": 1.7928611235584356e-06, "loss": 0.5709, "step": 1824 }, { "epoch": 0.23, "grad_norm": 0.840266876293863, "learning_rate": 1.7926095791124344e-06, "loss": 0.66, "step": 1825 }, { "epoch": 0.23, "grad_norm": 0.784821383743405, "learning_rate": 1.7923578996932794e-06, "loss": 0.6212, "step": 1826 }, { "epoch": 0.23, "grad_norm": 0.6884032469451853, "learning_rate": 1.7921060853438284e-06, "loss": 0.542, "step": 1827 }, { "epoch": 0.23, "grad_norm": 0.8075422832975773, "learning_rate": 1.7918541361069633e-06, "loss": 0.6222, "step": 1828 }, { "epoch": 0.23, "grad_norm": 0.7399777559833074, "learning_rate": 1.7916020520255884e-06, "loss": 0.5651, "step": 1829 }, { "epoch": 0.23, "grad_norm": 0.6104505732127692, "learning_rate": 1.7913498331426308e-06, "loss": 0.5023, "step": 1830 }, { "epoch": 0.23, "grad_norm": 0.6845071756461392, "learning_rate": 1.7910974795010405e-06, "loss": 0.5669, "step": 1831 }, { "epoch": 0.23, "grad_norm": 0.6983308791905123, "learning_rate": 1.7908449911437914e-06, "loss": 0.5664, "step": 1832 }, { "epoch": 0.23, "grad_norm": 1.5641879746773053, "learning_rate": 1.7905923681138792e-06, "loss": 0.6193, "step": 1833 }, { "epoch": 0.23, "grad_norm": 0.9189165504242603, "learning_rate": 1.7903396104543232e-06, "loss": 0.586, "step": 1834 }, { "epoch": 0.23, "grad_norm": 0.6768418230283442, "learning_rate": 1.7900867182081648e-06, "loss": 0.5129, "step": 1835 }, { "epoch": 0.23, "grad_norm": 0.7459100265506579, "learning_rate": 1.7898336914184698e-06, "loss": 0.5502, "step": 1836 }, { "epoch": 0.23, "grad_norm": 0.894648220461959, "learning_rate": 1.7895805301283256e-06, "loss": 0.6167, "step": 1837 }, { "epoch": 0.23, "grad_norm": 0.7826766630910629, "learning_rate": 1.7893272343808426e-06, "loss": 0.6205, "step": 1838 }, { "epoch": 0.23, "grad_norm": 0.6585168820675301, "learning_rate": 1.789073804219155e-06, "loss": 0.5373, "step": 1839 }, { "epoch": 0.23, "grad_norm": 0.6929156481527876, "learning_rate": 1.788820239686419e-06, "loss": 0.5531, "step": 1840 }, { "epoch": 0.23, "grad_norm": 0.8118485495405864, "learning_rate": 1.7885665408258138e-06, "loss": 0.6576, "step": 1841 }, { "epoch": 0.23, "grad_norm": 0.633392084229221, "learning_rate": 1.788312707680542e-06, "loss": 0.5176, "step": 1842 }, { "epoch": 0.23, "grad_norm": 0.5939779440611529, "learning_rate": 1.7880587402938287e-06, "loss": 0.458, "step": 1843 }, { "epoch": 0.23, "grad_norm": 0.7921128822794303, "learning_rate": 1.787804638708922e-06, "loss": 0.5956, "step": 1844 }, { "epoch": 0.24, "grad_norm": 0.7262286736980104, "learning_rate": 1.7875504029690926e-06, "loss": 0.5532, "step": 1845 }, { "epoch": 0.24, "grad_norm": 0.8430102284907283, "learning_rate": 1.7872960331176344e-06, "loss": 0.6652, "step": 1846 }, { "epoch": 0.24, "grad_norm": 0.8025138987104528, "learning_rate": 1.7870415291978633e-06, "loss": 0.6081, "step": 1847 }, { "epoch": 0.24, "grad_norm": 0.7528508808124932, "learning_rate": 1.7867868912531195e-06, "loss": 0.5858, "step": 1848 }, { "epoch": 0.24, "grad_norm": 0.82065606932345, "learning_rate": 1.7865321193267645e-06, "loss": 0.6887, "step": 1849 }, { "epoch": 0.24, "grad_norm": 0.6852720152451314, "learning_rate": 1.7862772134621835e-06, "loss": 0.513, "step": 1850 }, { "epoch": 0.24, "grad_norm": 0.8240005478968955, "learning_rate": 1.786022173702785e-06, "loss": 0.6599, "step": 1851 }, { "epoch": 0.24, "grad_norm": 0.6526852813261855, "learning_rate": 1.7857670000919986e-06, "loss": 0.5462, "step": 1852 }, { "epoch": 0.24, "grad_norm": 0.8147762917629007, "learning_rate": 1.7855116926732783e-06, "loss": 0.6072, "step": 1853 }, { "epoch": 0.24, "grad_norm": 0.6855452714477639, "learning_rate": 1.7852562514901e-06, "loss": 0.5743, "step": 1854 }, { "epoch": 0.24, "grad_norm": 0.7763801136740046, "learning_rate": 1.785000676585963e-06, "loss": 0.6082, "step": 1855 }, { "epoch": 0.24, "grad_norm": 0.69595133939554, "learning_rate": 1.7847449680043886e-06, "loss": 0.4729, "step": 1856 }, { "epoch": 0.24, "grad_norm": 0.8918771608434011, "learning_rate": 1.784489125788922e-06, "loss": 0.6664, "step": 1857 }, { "epoch": 0.24, "grad_norm": 0.9657679285298948, "learning_rate": 1.7842331499831294e-06, "loss": 0.7128, "step": 1858 }, { "epoch": 0.24, "grad_norm": 0.8663473904549109, "learning_rate": 1.783977040630602e-06, "loss": 0.7076, "step": 1859 }, { "epoch": 0.24, "grad_norm": 0.6453726744615014, "learning_rate": 1.7837207977749516e-06, "loss": 0.5352, "step": 1860 }, { "epoch": 0.24, "grad_norm": 0.6530170249749625, "learning_rate": 1.7834644214598145e-06, "loss": 0.5621, "step": 1861 }, { "epoch": 0.24, "grad_norm": 0.95251570310003, "learning_rate": 1.7832079117288483e-06, "loss": 0.7001, "step": 1862 }, { "epoch": 0.24, "grad_norm": 0.6722346808365075, "learning_rate": 1.782951268625734e-06, "loss": 0.558, "step": 1863 }, { "epoch": 0.24, "grad_norm": 0.9274179274129589, "learning_rate": 1.7826944921941753e-06, "loss": 0.6076, "step": 1864 }, { "epoch": 0.24, "grad_norm": 0.6853838023398372, "learning_rate": 1.782437582477899e-06, "loss": 0.5541, "step": 1865 }, { "epoch": 0.24, "grad_norm": 0.7599640349084955, "learning_rate": 1.7821805395206535e-06, "loss": 0.6366, "step": 1866 }, { "epoch": 0.24, "grad_norm": 0.7769829132601709, "learning_rate": 1.7819233633662109e-06, "loss": 0.624, "step": 1867 }, { "epoch": 0.24, "grad_norm": 0.8693199033196352, "learning_rate": 1.7816660540583656e-06, "loss": 0.6348, "step": 1868 }, { "epoch": 0.24, "grad_norm": 0.7822921014276808, "learning_rate": 1.7814086116409346e-06, "loss": 0.5689, "step": 1869 }, { "epoch": 0.24, "grad_norm": 0.6611646884305945, "learning_rate": 1.7811510361577576e-06, "loss": 0.5696, "step": 1870 }, { "epoch": 0.24, "grad_norm": 0.800443096287394, "learning_rate": 1.7808933276526971e-06, "loss": 0.6115, "step": 1871 }, { "epoch": 0.24, "grad_norm": 0.8331911398045193, "learning_rate": 1.7806354861696384e-06, "loss": 0.6356, "step": 1872 }, { "epoch": 0.24, "grad_norm": 0.6844827744325065, "learning_rate": 1.7803775117524888e-06, "loss": 0.5514, "step": 1873 }, { "epoch": 0.24, "grad_norm": 0.8547481653654526, "learning_rate": 1.7801194044451791e-06, "loss": 0.5512, "step": 1874 }, { "epoch": 0.24, "grad_norm": 0.7221053148217083, "learning_rate": 1.7798611642916619e-06, "loss": 0.5735, "step": 1875 }, { "epoch": 0.24, "grad_norm": 0.7739495685772367, "learning_rate": 1.7796027913359129e-06, "loss": 0.5793, "step": 1876 }, { "epoch": 0.24, "grad_norm": 0.7297646355104602, "learning_rate": 1.7793442856219304e-06, "loss": 0.5421, "step": 1877 }, { "epoch": 0.24, "grad_norm": 0.6461841130311093, "learning_rate": 1.7790856471937352e-06, "loss": 0.5127, "step": 1878 }, { "epoch": 0.24, "grad_norm": 0.9236566265321192, "learning_rate": 1.778826876095371e-06, "loss": 0.66, "step": 1879 }, { "epoch": 0.24, "grad_norm": 0.7539636166850503, "learning_rate": 1.778567972370903e-06, "loss": 0.6375, "step": 1880 }, { "epoch": 0.24, "grad_norm": 0.7286288032686404, "learning_rate": 1.7783089360644209e-06, "loss": 0.579, "step": 1881 }, { "epoch": 0.24, "grad_norm": 0.8678458234518728, "learning_rate": 1.7780497672200352e-06, "loss": 0.6182, "step": 1882 }, { "epoch": 0.24, "grad_norm": 0.8712181169348084, "learning_rate": 1.7777904658818797e-06, "loss": 0.5817, "step": 1883 }, { "epoch": 0.24, "grad_norm": 0.9606644706029104, "learning_rate": 1.777531032094111e-06, "loss": 0.6993, "step": 1884 }, { "epoch": 0.24, "grad_norm": 0.7675116112509338, "learning_rate": 1.7772714659009075e-06, "loss": 0.5638, "step": 1885 }, { "epoch": 0.24, "grad_norm": 0.804147269510392, "learning_rate": 1.777011767346471e-06, "loss": 0.6418, "step": 1886 }, { "epoch": 0.24, "grad_norm": 0.8462590077494584, "learning_rate": 1.7767519364750254e-06, "loss": 0.6466, "step": 1887 }, { "epoch": 0.24, "grad_norm": 0.7649575442190123, "learning_rate": 1.7764919733308172e-06, "loss": 0.6224, "step": 1888 }, { "epoch": 0.24, "grad_norm": 0.6641919143395882, "learning_rate": 1.7762318779581155e-06, "loss": 0.5599, "step": 1889 }, { "epoch": 0.24, "grad_norm": 0.6191015615607506, "learning_rate": 1.7759716504012117e-06, "loss": 0.5471, "step": 1890 }, { "epoch": 0.24, "grad_norm": 0.826367261722918, "learning_rate": 1.7757112907044198e-06, "loss": 0.6312, "step": 1891 }, { "epoch": 0.24, "grad_norm": 0.7329093125374098, "learning_rate": 1.7754507989120762e-06, "loss": 0.5807, "step": 1892 }, { "epoch": 0.24, "grad_norm": 1.0880791361518078, "learning_rate": 1.7751901750685402e-06, "loss": 0.6877, "step": 1893 }, { "epoch": 0.24, "grad_norm": 0.7794289140878874, "learning_rate": 1.7749294192181936e-06, "loss": 0.6075, "step": 1894 }, { "epoch": 0.24, "grad_norm": 0.9488668021981287, "learning_rate": 1.7746685314054397e-06, "loss": 0.6564, "step": 1895 }, { "epoch": 0.24, "grad_norm": 0.7419709195127533, "learning_rate": 1.7744075116747055e-06, "loss": 0.5439, "step": 1896 }, { "epoch": 0.24, "grad_norm": 0.609804270726911, "learning_rate": 1.7741463600704396e-06, "loss": 0.5878, "step": 1897 }, { "epoch": 0.24, "grad_norm": 0.9678376361798278, "learning_rate": 1.7738850766371138e-06, "loss": 0.7068, "step": 1898 }, { "epoch": 0.24, "grad_norm": 0.7030072723511392, "learning_rate": 1.7736236614192216e-06, "loss": 0.5142, "step": 1899 }, { "epoch": 0.24, "grad_norm": 0.7356532445456269, "learning_rate": 1.7733621144612796e-06, "loss": 0.5814, "step": 1900 }, { "epoch": 0.24, "grad_norm": 0.6125566865459748, "learning_rate": 1.7731004358078261e-06, "loss": 0.4986, "step": 1901 }, { "epoch": 0.24, "grad_norm": 0.6870762972652394, "learning_rate": 1.7728386255034224e-06, "loss": 0.5201, "step": 1902 }, { "epoch": 0.24, "grad_norm": 0.6047305834695749, "learning_rate": 1.7725766835926526e-06, "loss": 0.5208, "step": 1903 }, { "epoch": 0.24, "grad_norm": 0.6297307208200389, "learning_rate": 1.7723146101201218e-06, "loss": 0.517, "step": 1904 }, { "epoch": 0.24, "grad_norm": 0.7478567894659794, "learning_rate": 1.772052405130459e-06, "loss": 0.6344, "step": 1905 }, { "epoch": 0.24, "grad_norm": 0.8826083647122114, "learning_rate": 1.7717900686683148e-06, "loss": 0.5932, "step": 1906 }, { "epoch": 0.24, "grad_norm": 0.836202440522907, "learning_rate": 1.7715276007783621e-06, "loss": 0.6345, "step": 1907 }, { "epoch": 0.24, "grad_norm": 0.7268851100974629, "learning_rate": 1.771265001505297e-06, "loss": 0.5825, "step": 1908 }, { "epoch": 0.24, "grad_norm": 0.6382083365342568, "learning_rate": 1.771002270893837e-06, "loss": 0.5736, "step": 1909 }, { "epoch": 0.24, "grad_norm": 0.8316729488570054, "learning_rate": 1.7707394089887226e-06, "loss": 0.6759, "step": 1910 }, { "epoch": 0.24, "grad_norm": 0.602748423526947, "learning_rate": 1.7704764158347164e-06, "loss": 0.4984, "step": 1911 }, { "epoch": 0.24, "grad_norm": 1.1711787851824031, "learning_rate": 1.7702132914766033e-06, "loss": 0.6554, "step": 1912 }, { "epoch": 0.24, "grad_norm": 0.8060070938716503, "learning_rate": 1.7699500359591904e-06, "loss": 0.6326, "step": 1913 }, { "epoch": 0.24, "grad_norm": 0.7796377904710955, "learning_rate": 1.7696866493273079e-06, "loss": 0.5641, "step": 1914 }, { "epoch": 0.24, "grad_norm": 0.9707287094226319, "learning_rate": 1.7694231316258077e-06, "loss": 0.65, "step": 1915 }, { "epoch": 0.24, "grad_norm": 0.9936923167622201, "learning_rate": 1.7691594828995637e-06, "loss": 0.6512, "step": 1916 }, { "epoch": 0.24, "grad_norm": 0.8277719101354267, "learning_rate": 1.768895703193473e-06, "loss": 0.6593, "step": 1917 }, { "epoch": 0.24, "grad_norm": 0.7802760148694542, "learning_rate": 1.7686317925524544e-06, "loss": 0.659, "step": 1918 }, { "epoch": 0.24, "grad_norm": 0.876995905368891, "learning_rate": 1.7683677510214489e-06, "loss": 0.6658, "step": 1919 }, { "epoch": 0.24, "grad_norm": 0.6567710691631918, "learning_rate": 1.7681035786454202e-06, "loss": 0.5623, "step": 1920 }, { "epoch": 0.24, "grad_norm": 0.7822242399840562, "learning_rate": 1.767839275469354e-06, "loss": 0.5837, "step": 1921 }, { "epoch": 0.24, "grad_norm": 0.9110085841157554, "learning_rate": 1.7675748415382587e-06, "loss": 0.6497, "step": 1922 }, { "epoch": 0.24, "grad_norm": 0.7521348787254326, "learning_rate": 1.7673102768971644e-06, "loss": 0.5934, "step": 1923 }, { "epoch": 0.25, "grad_norm": 0.8471070197928361, "learning_rate": 1.7670455815911236e-06, "loss": 0.6324, "step": 1924 }, { "epoch": 0.25, "grad_norm": 0.8958541573756849, "learning_rate": 1.7667807556652115e-06, "loss": 0.6207, "step": 1925 }, { "epoch": 0.25, "grad_norm": 0.7457863893156891, "learning_rate": 1.7665157991645247e-06, "loss": 0.5553, "step": 1926 }, { "epoch": 0.25, "grad_norm": 0.7302968150403704, "learning_rate": 1.766250712134183e-06, "loss": 0.6165, "step": 1927 }, { "epoch": 0.25, "grad_norm": 0.6122864161430793, "learning_rate": 1.7659854946193278e-06, "loss": 0.5737, "step": 1928 }, { "epoch": 0.25, "grad_norm": 0.7549936233355308, "learning_rate": 1.7657201466651226e-06, "loss": 0.5881, "step": 1929 }, { "epoch": 0.25, "grad_norm": 0.655692417023371, "learning_rate": 1.7654546683167539e-06, "loss": 0.5723, "step": 1930 }, { "epoch": 0.25, "grad_norm": 0.6368001873576175, "learning_rate": 1.7651890596194293e-06, "loss": 0.5573, "step": 1931 }, { "epoch": 0.25, "grad_norm": 0.702391316569007, "learning_rate": 1.7649233206183797e-06, "loss": 0.4837, "step": 1932 }, { "epoch": 0.25, "grad_norm": 0.7221252873810466, "learning_rate": 1.764657451358858e-06, "loss": 0.5834, "step": 1933 }, { "epoch": 0.25, "grad_norm": 0.8900272411860402, "learning_rate": 1.7643914518861377e-06, "loss": 0.6337, "step": 1934 }, { "epoch": 0.25, "grad_norm": 0.717110735990919, "learning_rate": 1.7641253222455169e-06, "loss": 0.5198, "step": 1935 }, { "epoch": 0.25, "grad_norm": 0.8049182601354071, "learning_rate": 1.7638590624823144e-06, "loss": 0.6588, "step": 1936 }, { "epoch": 0.25, "grad_norm": 0.835161513069722, "learning_rate": 1.763592672641871e-06, "loss": 0.5984, "step": 1937 }, { "epoch": 0.25, "grad_norm": 0.6904690097870843, "learning_rate": 1.763326152769551e-06, "loss": 0.591, "step": 1938 }, { "epoch": 0.25, "grad_norm": 0.8632441537915783, "learning_rate": 1.7630595029107393e-06, "loss": 0.648, "step": 1939 }, { "epoch": 0.25, "grad_norm": 0.7826437403444958, "learning_rate": 1.7627927231108436e-06, "loss": 0.5874, "step": 1940 }, { "epoch": 0.25, "grad_norm": 0.6287215463480835, "learning_rate": 1.762525813415294e-06, "loss": 0.5535, "step": 1941 }, { "epoch": 0.25, "grad_norm": 0.7659824403455862, "learning_rate": 1.7622587738695423e-06, "loss": 0.5302, "step": 1942 }, { "epoch": 0.25, "grad_norm": 0.843256894496395, "learning_rate": 1.7619916045190623e-06, "loss": 0.6414, "step": 1943 }, { "epoch": 0.25, "grad_norm": 0.7774421131888085, "learning_rate": 1.7617243054093507e-06, "loss": 0.623, "step": 1944 }, { "epoch": 0.25, "grad_norm": 0.6506932904808445, "learning_rate": 1.7614568765859256e-06, "loss": 0.5609, "step": 1945 }, { "epoch": 0.25, "grad_norm": 0.7815221517381211, "learning_rate": 1.7611893180943268e-06, "loss": 0.5653, "step": 1946 }, { "epoch": 0.25, "grad_norm": 0.6522070242029065, "learning_rate": 1.7609216299801174e-06, "loss": 0.5913, "step": 1947 }, { "epoch": 0.25, "grad_norm": 0.7507391850490923, "learning_rate": 1.7606538122888817e-06, "loss": 0.5346, "step": 1948 }, { "epoch": 0.25, "grad_norm": 0.5720325102953173, "learning_rate": 1.7603858650662259e-06, "loss": 0.5217, "step": 1949 }, { "epoch": 0.25, "grad_norm": 0.6441855593318525, "learning_rate": 1.7601177883577791e-06, "loss": 0.5129, "step": 1950 }, { "epoch": 0.25, "grad_norm": 0.7232599441643058, "learning_rate": 1.7598495822091919e-06, "loss": 0.5437, "step": 1951 }, { "epoch": 0.25, "grad_norm": 0.6517641379863649, "learning_rate": 1.759581246666137e-06, "loss": 0.6005, "step": 1952 }, { "epoch": 0.25, "grad_norm": 0.7861779904728772, "learning_rate": 1.7593127817743087e-06, "loss": 0.5863, "step": 1953 }, { "epoch": 0.25, "grad_norm": 0.698028513976124, "learning_rate": 1.7590441875794244e-06, "loss": 0.5555, "step": 1954 }, { "epoch": 0.25, "grad_norm": 0.6573264891159366, "learning_rate": 1.7587754641272226e-06, "loss": 0.5982, "step": 1955 }, { "epoch": 0.25, "grad_norm": 0.7823142755271408, "learning_rate": 1.758506611463464e-06, "loss": 0.6726, "step": 1956 }, { "epoch": 0.25, "grad_norm": 0.9503604097595598, "learning_rate": 1.7582376296339317e-06, "loss": 0.6091, "step": 1957 }, { "epoch": 0.25, "grad_norm": 0.8649748996681877, "learning_rate": 1.7579685186844302e-06, "loss": 0.6053, "step": 1958 }, { "epoch": 0.25, "grad_norm": 0.6890272099073813, "learning_rate": 1.7576992786607865e-06, "loss": 0.5852, "step": 1959 }, { "epoch": 0.25, "grad_norm": 0.6892385383971602, "learning_rate": 1.7574299096088493e-06, "loss": 0.5764, "step": 1960 }, { "epoch": 0.25, "grad_norm": 0.6998493343120642, "learning_rate": 1.757160411574489e-06, "loss": 0.5651, "step": 1961 }, { "epoch": 0.25, "grad_norm": 1.1327296386247903, "learning_rate": 1.7568907846035992e-06, "loss": 0.6668, "step": 1962 }, { "epoch": 0.25, "grad_norm": 0.9262251267845324, "learning_rate": 1.7566210287420935e-06, "loss": 0.6537, "step": 1963 }, { "epoch": 0.25, "grad_norm": 0.7848158578849249, "learning_rate": 1.7563511440359094e-06, "loss": 0.5477, "step": 1964 }, { "epoch": 0.25, "grad_norm": 0.730317574898393, "learning_rate": 1.7560811305310051e-06, "loss": 0.5811, "step": 1965 }, { "epoch": 0.25, "grad_norm": 0.8278870905522837, "learning_rate": 1.7558109882733606e-06, "loss": 0.6411, "step": 1966 }, { "epoch": 0.25, "grad_norm": 0.8054967152969693, "learning_rate": 1.7555407173089794e-06, "loss": 0.6234, "step": 1967 }, { "epoch": 0.25, "grad_norm": 0.8773575157836968, "learning_rate": 1.7552703176838848e-06, "loss": 0.6383, "step": 1968 }, { "epoch": 0.25, "grad_norm": 0.6508041652847512, "learning_rate": 1.7549997894441236e-06, "loss": 0.5225, "step": 1969 }, { "epoch": 0.25, "grad_norm": 0.9175595680321516, "learning_rate": 1.7547291326357638e-06, "loss": 0.6402, "step": 1970 }, { "epoch": 0.25, "grad_norm": 0.6227471094894387, "learning_rate": 1.7544583473048955e-06, "loss": 0.5223, "step": 1971 }, { "epoch": 0.25, "grad_norm": 0.8155018910323264, "learning_rate": 1.7541874334976303e-06, "loss": 0.5812, "step": 1972 }, { "epoch": 0.25, "grad_norm": 0.6350887075095119, "learning_rate": 1.7539163912601023e-06, "loss": 0.4831, "step": 1973 }, { "epoch": 0.25, "grad_norm": 0.8050582414820544, "learning_rate": 1.7536452206384671e-06, "loss": 0.6321, "step": 1974 }, { "epoch": 0.25, "grad_norm": 0.7538737987100852, "learning_rate": 1.7533739216789024e-06, "loss": 0.5709, "step": 1975 }, { "epoch": 0.25, "grad_norm": 0.9600290402927429, "learning_rate": 1.7531024944276074e-06, "loss": 0.6369, "step": 1976 }, { "epoch": 0.25, "grad_norm": 0.8174533281317452, "learning_rate": 1.7528309389308032e-06, "loss": 0.6263, "step": 1977 }, { "epoch": 0.25, "grad_norm": 0.6971373595206908, "learning_rate": 1.752559255234733e-06, "loss": 0.5605, "step": 1978 }, { "epoch": 0.25, "grad_norm": 0.9526745824675443, "learning_rate": 1.752287443385662e-06, "loss": 0.6108, "step": 1979 }, { "epoch": 0.25, "grad_norm": 0.689269069695771, "learning_rate": 1.7520155034298768e-06, "loss": 0.5268, "step": 1980 }, { "epoch": 0.25, "grad_norm": 0.8708647272556966, "learning_rate": 1.7517434354136854e-06, "loss": 0.6028, "step": 1981 }, { "epoch": 0.25, "grad_norm": 0.6361585543414858, "learning_rate": 1.7514712393834188e-06, "loss": 0.5453, "step": 1982 }, { "epoch": 0.25, "grad_norm": 0.8392434249678136, "learning_rate": 1.7511989153854288e-06, "loss": 0.6148, "step": 1983 }, { "epoch": 0.25, "grad_norm": 0.7075448213622311, "learning_rate": 1.7509264634660894e-06, "loss": 0.5685, "step": 1984 }, { "epoch": 0.25, "grad_norm": 0.8528814133011654, "learning_rate": 1.7506538836717964e-06, "loss": 0.6456, "step": 1985 }, { "epoch": 0.25, "grad_norm": 0.8265923423981631, "learning_rate": 1.7503811760489673e-06, "loss": 0.5389, "step": 1986 }, { "epoch": 0.25, "grad_norm": 0.8008823004752446, "learning_rate": 1.7501083406440412e-06, "loss": 0.5195, "step": 1987 }, { "epoch": 0.25, "grad_norm": 0.8033964058898991, "learning_rate": 1.7498353775034796e-06, "loss": 0.6242, "step": 1988 }, { "epoch": 0.25, "grad_norm": 1.0737669195935626, "learning_rate": 1.7495622866737647e-06, "loss": 0.6399, "step": 1989 }, { "epoch": 0.25, "grad_norm": 0.8321079576771585, "learning_rate": 1.7492890682014014e-06, "loss": 0.5917, "step": 1990 }, { "epoch": 0.25, "grad_norm": 0.9312648408941783, "learning_rate": 1.7490157221329155e-06, "loss": 0.699, "step": 1991 }, { "epoch": 0.25, "grad_norm": 0.7327398403417205, "learning_rate": 1.7487422485148557e-06, "loss": 0.5712, "step": 1992 }, { "epoch": 0.25, "grad_norm": 0.6849808079150778, "learning_rate": 1.748468647393791e-06, "loss": 0.5024, "step": 1993 }, { "epoch": 0.25, "grad_norm": 0.9348076194541277, "learning_rate": 1.7481949188163134e-06, "loss": 0.6158, "step": 1994 }, { "epoch": 0.25, "grad_norm": 0.6982390069361648, "learning_rate": 1.7479210628290356e-06, "loss": 0.5888, "step": 1995 }, { "epoch": 0.25, "grad_norm": 0.6678979061957547, "learning_rate": 1.7476470794785927e-06, "loss": 0.5531, "step": 1996 }, { "epoch": 0.25, "grad_norm": 0.6122456060231373, "learning_rate": 1.747372968811641e-06, "loss": 0.5081, "step": 1997 }, { "epoch": 0.25, "grad_norm": 0.847510734132804, "learning_rate": 1.7470987308748588e-06, "loss": 0.6683, "step": 1998 }, { "epoch": 0.25, "grad_norm": 0.8597071874715856, "learning_rate": 1.746824365714946e-06, "loss": 0.5579, "step": 1999 }, { "epoch": 0.25, "grad_norm": 1.0049671843803445, "learning_rate": 1.7465498733786238e-06, "loss": 0.6882, "step": 2000 }, { "epoch": 0.25, "grad_norm": 0.9632850134203702, "learning_rate": 1.7462752539126359e-06, "loss": 0.6602, "step": 2001 }, { "epoch": 0.26, "grad_norm": 0.6204375705688481, "learning_rate": 1.746000507363747e-06, "loss": 0.4932, "step": 2002 }, { "epoch": 0.26, "grad_norm": 0.7255976172712437, "learning_rate": 1.7457256337787434e-06, "loss": 0.5694, "step": 2003 }, { "epoch": 0.26, "grad_norm": 1.0890434377212321, "learning_rate": 1.7454506332044331e-06, "loss": 0.6209, "step": 2004 }, { "epoch": 0.26, "grad_norm": 0.8036445124670614, "learning_rate": 1.7451755056876466e-06, "loss": 0.6673, "step": 2005 }, { "epoch": 0.26, "grad_norm": 0.8681376660133959, "learning_rate": 1.744900251275234e-06, "loss": 0.6258, "step": 2006 }, { "epoch": 0.26, "grad_norm": 0.7520154982144631, "learning_rate": 1.744624870014069e-06, "loss": 0.5474, "step": 2007 }, { "epoch": 0.26, "grad_norm": 0.7584632840173634, "learning_rate": 1.7443493619510465e-06, "loss": 0.5703, "step": 2008 }, { "epoch": 0.26, "grad_norm": 0.6902469840265371, "learning_rate": 1.7440737271330818e-06, "loss": 0.5149, "step": 2009 }, { "epoch": 0.26, "grad_norm": 0.6299731996901248, "learning_rate": 1.7437979656071135e-06, "loss": 0.5525, "step": 2010 }, { "epoch": 0.26, "grad_norm": 0.7647183068795858, "learning_rate": 1.7435220774201e-06, "loss": 0.5799, "step": 2011 }, { "epoch": 0.26, "grad_norm": 0.9352919537488072, "learning_rate": 1.7432460626190228e-06, "loss": 0.6229, "step": 2012 }, { "epoch": 0.26, "grad_norm": 1.0063283423286586, "learning_rate": 1.7429699212508845e-06, "loss": 0.6647, "step": 2013 }, { "epoch": 0.26, "grad_norm": 0.5984889037514833, "learning_rate": 1.7426936533627088e-06, "loss": 0.4911, "step": 2014 }, { "epoch": 0.26, "grad_norm": 0.92772233450929, "learning_rate": 1.742417259001541e-06, "loss": 0.6753, "step": 2015 }, { "epoch": 0.26, "grad_norm": 0.6863813135188469, "learning_rate": 1.7421407382144486e-06, "loss": 0.563, "step": 2016 }, { "epoch": 0.26, "grad_norm": 0.793328552887762, "learning_rate": 1.7418640910485198e-06, "loss": 0.607, "step": 2017 }, { "epoch": 0.26, "grad_norm": 0.7328694300580574, "learning_rate": 1.7415873175508654e-06, "loss": 0.6, "step": 2018 }, { "epoch": 0.26, "grad_norm": 0.9601012962013009, "learning_rate": 1.7413104177686165e-06, "loss": 0.6537, "step": 2019 }, { "epoch": 0.26, "grad_norm": 0.7216811978034054, "learning_rate": 1.741033391748926e-06, "loss": 0.572, "step": 2020 }, { "epoch": 0.26, "grad_norm": 0.7626175276009577, "learning_rate": 1.7407562395389693e-06, "loss": 0.5778, "step": 2021 }, { "epoch": 0.26, "grad_norm": 0.8036142869090829, "learning_rate": 1.7404789611859419e-06, "loss": 0.6058, "step": 2022 }, { "epoch": 0.26, "grad_norm": 0.8210347776463577, "learning_rate": 1.740201556737062e-06, "loss": 0.6407, "step": 2023 }, { "epoch": 0.26, "grad_norm": 1.4226676888999619, "learning_rate": 1.739924026239568e-06, "loss": 0.7019, "step": 2024 }, { "epoch": 0.26, "grad_norm": 0.8600482768017905, "learning_rate": 1.739646369740721e-06, "loss": 0.6404, "step": 2025 }, { "epoch": 0.26, "grad_norm": 0.6722306567439322, "learning_rate": 1.7393685872878026e-06, "loss": 0.5103, "step": 2026 }, { "epoch": 0.26, "grad_norm": 0.8070650001578221, "learning_rate": 1.7390906789281167e-06, "loss": 0.5428, "step": 2027 }, { "epoch": 0.26, "grad_norm": 0.8182578516738027, "learning_rate": 1.7388126447089879e-06, "loss": 0.5756, "step": 2028 }, { "epoch": 0.26, "grad_norm": 0.6760540250660055, "learning_rate": 1.7385344846777628e-06, "loss": 0.5388, "step": 2029 }, { "epoch": 0.26, "grad_norm": 0.8547478698231441, "learning_rate": 1.7382561988818086e-06, "loss": 0.6966, "step": 2030 }, { "epoch": 0.26, "grad_norm": 0.6028461219783835, "learning_rate": 1.7379777873685152e-06, "loss": 0.5249, "step": 2031 }, { "epoch": 0.26, "grad_norm": 0.7751331584886236, "learning_rate": 1.7376992501852922e-06, "loss": 0.5576, "step": 2032 }, { "epoch": 0.26, "grad_norm": 0.8529732318524347, "learning_rate": 1.7374205873795727e-06, "loss": 0.6323, "step": 2033 }, { "epoch": 0.26, "grad_norm": 0.784708571185131, "learning_rate": 1.7371417989988095e-06, "loss": 0.6677, "step": 2034 }, { "epoch": 0.26, "grad_norm": 0.8445203811228591, "learning_rate": 1.736862885090477e-06, "loss": 0.5922, "step": 2035 }, { "epoch": 0.26, "grad_norm": 0.626953814265976, "learning_rate": 1.7365838457020723e-06, "loss": 0.5533, "step": 2036 }, { "epoch": 0.26, "grad_norm": 0.9373658271261294, "learning_rate": 1.7363046808811123e-06, "loss": 0.5383, "step": 2037 }, { "epoch": 0.26, "grad_norm": 0.8206727013516608, "learning_rate": 1.7360253906751358e-06, "loss": 0.6051, "step": 2038 }, { "epoch": 0.26, "grad_norm": 0.7839039452778948, "learning_rate": 1.735745975131703e-06, "loss": 0.6538, "step": 2039 }, { "epoch": 0.26, "grad_norm": 0.7428453455709109, "learning_rate": 1.7354664342983959e-06, "loss": 0.5713, "step": 2040 }, { "epoch": 0.26, "grad_norm": 0.6882113452087494, "learning_rate": 1.7351867682228167e-06, "loss": 0.5198, "step": 2041 }, { "epoch": 0.26, "grad_norm": 0.7405770388955921, "learning_rate": 1.7349069769525902e-06, "loss": 0.5891, "step": 2042 }, { "epoch": 0.26, "grad_norm": 0.6889721181823543, "learning_rate": 1.7346270605353617e-06, "loss": 0.5596, "step": 2043 }, { "epoch": 0.26, "grad_norm": 0.7022138186757454, "learning_rate": 1.734347019018798e-06, "loss": 0.5185, "step": 2044 }, { "epoch": 0.26, "grad_norm": 0.6419877180474133, "learning_rate": 1.7340668524505873e-06, "loss": 0.5331, "step": 2045 }, { "epoch": 0.26, "grad_norm": 0.8225161794894839, "learning_rate": 1.7337865608784392e-06, "loss": 0.5974, "step": 2046 }, { "epoch": 0.26, "grad_norm": 0.9045042253625015, "learning_rate": 1.7335061443500845e-06, "loss": 0.6199, "step": 2047 }, { "epoch": 0.26, "grad_norm": 0.8477206326338558, "learning_rate": 1.7332256029132748e-06, "loss": 0.6107, "step": 2048 }, { "epoch": 0.26, "grad_norm": 0.6724255637797462, "learning_rate": 1.7329449366157833e-06, "loss": 0.5241, "step": 2049 }, { "epoch": 0.26, "grad_norm": 0.8198267896718139, "learning_rate": 1.7326641455054055e-06, "loss": 0.6457, "step": 2050 }, { "epoch": 0.26, "grad_norm": 0.7062066621521382, "learning_rate": 1.7323832296299562e-06, "loss": 0.5616, "step": 2051 }, { "epoch": 0.26, "grad_norm": 0.6419552377439518, "learning_rate": 1.7321021890372727e-06, "loss": 0.5069, "step": 2052 }, { "epoch": 0.26, "grad_norm": 0.8959814502871365, "learning_rate": 1.7318210237752135e-06, "loss": 0.675, "step": 2053 }, { "epoch": 0.26, "grad_norm": 1.0490021598480384, "learning_rate": 1.7315397338916581e-06, "loss": 0.6811, "step": 2054 }, { "epoch": 0.26, "grad_norm": 0.8716286483232623, "learning_rate": 1.731258319434507e-06, "loss": 0.6459, "step": 2055 }, { "epoch": 0.26, "grad_norm": 0.698011633756415, "learning_rate": 1.7309767804516823e-06, "loss": 0.5216, "step": 2056 }, { "epoch": 0.26, "grad_norm": 0.7508879306314754, "learning_rate": 1.7306951169911275e-06, "loss": 0.6254, "step": 2057 }, { "epoch": 0.26, "grad_norm": 0.9881541263076405, "learning_rate": 1.7304133291008062e-06, "loss": 0.6911, "step": 2058 }, { "epoch": 0.26, "grad_norm": 0.7116438983719325, "learning_rate": 1.7301314168287046e-06, "loss": 0.5544, "step": 2059 }, { "epoch": 0.26, "grad_norm": 0.6403762726257071, "learning_rate": 1.7298493802228289e-06, "loss": 0.5406, "step": 2060 }, { "epoch": 0.26, "grad_norm": 0.8413679826564827, "learning_rate": 1.7295672193312076e-06, "loss": 0.5872, "step": 2061 }, { "epoch": 0.26, "grad_norm": 0.7017827772766783, "learning_rate": 1.7292849342018893e-06, "loss": 0.5858, "step": 2062 }, { "epoch": 0.26, "grad_norm": 0.5594918324840721, "learning_rate": 1.7290025248829443e-06, "loss": 0.4846, "step": 2063 }, { "epoch": 0.26, "grad_norm": 0.8874858545529425, "learning_rate": 1.7287199914224639e-06, "loss": 0.6424, "step": 2064 }, { "epoch": 0.26, "grad_norm": 0.641578037508466, "learning_rate": 1.7284373338685608e-06, "loss": 0.5471, "step": 2065 }, { "epoch": 0.26, "grad_norm": 0.8688531300229159, "learning_rate": 1.7281545522693687e-06, "loss": 0.6333, "step": 2066 }, { "epoch": 0.26, "grad_norm": 0.629678099551423, "learning_rate": 1.727871646673042e-06, "loss": 0.5726, "step": 2067 }, { "epoch": 0.26, "grad_norm": 0.7194016810169793, "learning_rate": 1.7275886171277568e-06, "loss": 0.6058, "step": 2068 }, { "epoch": 0.26, "grad_norm": 0.7756462750011853, "learning_rate": 1.72730546368171e-06, "loss": 0.6118, "step": 2069 }, { "epoch": 0.26, "grad_norm": 0.6106209097449659, "learning_rate": 1.72702218638312e-06, "loss": 0.4811, "step": 2070 }, { "epoch": 0.26, "grad_norm": 0.6379225934836319, "learning_rate": 1.7267387852802255e-06, "loss": 0.5492, "step": 2071 }, { "epoch": 0.26, "grad_norm": 0.73557929057043, "learning_rate": 1.7264552604212871e-06, "loss": 0.5174, "step": 2072 }, { "epoch": 0.26, "grad_norm": 0.6568481489244096, "learning_rate": 1.7261716118545859e-06, "loss": 0.5299, "step": 2073 }, { "epoch": 0.26, "grad_norm": 0.7522097401125556, "learning_rate": 1.7258878396284246e-06, "loss": 0.5839, "step": 2074 }, { "epoch": 0.26, "grad_norm": 0.8063774593821758, "learning_rate": 1.7256039437911263e-06, "loss": 0.5549, "step": 2075 }, { "epoch": 0.26, "grad_norm": 0.6184881228464356, "learning_rate": 1.7253199243910354e-06, "loss": 0.5388, "step": 2076 }, { "epoch": 0.26, "grad_norm": 0.6959889378114918, "learning_rate": 1.7250357814765182e-06, "loss": 0.5626, "step": 2077 }, { "epoch": 0.26, "grad_norm": 0.8693089443641364, "learning_rate": 1.7247515150959603e-06, "loss": 0.6737, "step": 2078 }, { "epoch": 0.26, "grad_norm": 0.6741355987290173, "learning_rate": 1.7244671252977702e-06, "loss": 0.564, "step": 2079 }, { "epoch": 0.26, "grad_norm": 0.6584876047266681, "learning_rate": 1.724182612130376e-06, "loss": 0.5259, "step": 2080 }, { "epoch": 0.27, "grad_norm": 0.5970392328563369, "learning_rate": 1.7238979756422275e-06, "loss": 0.5478, "step": 2081 }, { "epoch": 0.27, "grad_norm": 0.7172137035039096, "learning_rate": 1.723613215881795e-06, "loss": 0.5559, "step": 2082 }, { "epoch": 0.27, "grad_norm": 0.9840110340669319, "learning_rate": 1.7233283328975707e-06, "loss": 0.6053, "step": 2083 }, { "epoch": 0.27, "grad_norm": 0.6411436631160083, "learning_rate": 1.723043326738067e-06, "loss": 0.5191, "step": 2084 }, { "epoch": 0.27, "grad_norm": 0.6482826657422097, "learning_rate": 1.7227581974518168e-06, "loss": 0.5563, "step": 2085 }, { "epoch": 0.27, "grad_norm": 1.0901317704013287, "learning_rate": 1.7224729450873757e-06, "loss": 0.6605, "step": 2086 }, { "epoch": 0.27, "grad_norm": 0.8055626110352178, "learning_rate": 1.7221875696933184e-06, "loss": 0.5819, "step": 2087 }, { "epoch": 0.27, "grad_norm": 0.7366317969484671, "learning_rate": 1.7219020713182417e-06, "loss": 0.6884, "step": 2088 }, { "epoch": 0.27, "grad_norm": 0.8276012714795393, "learning_rate": 1.721616450010763e-06, "loss": 0.5683, "step": 2089 }, { "epoch": 0.27, "grad_norm": 0.6588587281175609, "learning_rate": 1.7213307058195207e-06, "loss": 0.5875, "step": 2090 }, { "epoch": 0.27, "grad_norm": 0.8109735591974573, "learning_rate": 1.7210448387931739e-06, "loss": 0.652, "step": 2091 }, { "epoch": 0.27, "grad_norm": 0.7802042736312507, "learning_rate": 1.7207588489804033e-06, "loss": 0.5691, "step": 2092 }, { "epoch": 0.27, "grad_norm": 0.697161140030107, "learning_rate": 1.720472736429909e-06, "loss": 0.5457, "step": 2093 }, { "epoch": 0.27, "grad_norm": 0.7997910661808749, "learning_rate": 1.7201865011904135e-06, "loss": 0.5979, "step": 2094 }, { "epoch": 0.27, "grad_norm": 0.945852131385185, "learning_rate": 1.71990014331066e-06, "loss": 0.651, "step": 2095 }, { "epoch": 0.27, "grad_norm": 0.6451086111413631, "learning_rate": 1.7196136628394116e-06, "loss": 0.5439, "step": 2096 }, { "epoch": 0.27, "grad_norm": 0.6548722712150665, "learning_rate": 1.7193270598254537e-06, "loss": 0.5457, "step": 2097 }, { "epoch": 0.27, "grad_norm": 0.5997866798412184, "learning_rate": 1.719040334317591e-06, "loss": 0.4917, "step": 2098 }, { "epoch": 0.27, "grad_norm": 0.8882770414282587, "learning_rate": 1.7187534863646506e-06, "loss": 0.6783, "step": 2099 }, { "epoch": 0.27, "grad_norm": 1.0264187187429725, "learning_rate": 1.7184665160154793e-06, "loss": 0.6868, "step": 2100 }, { "epoch": 0.27, "grad_norm": 0.7550911790740567, "learning_rate": 1.7181794233189453e-06, "loss": 0.6052, "step": 2101 }, { "epoch": 0.27, "grad_norm": 0.8415172948757969, "learning_rate": 1.7178922083239372e-06, "loss": 0.5525, "step": 2102 }, { "epoch": 0.27, "grad_norm": 0.8350341661703067, "learning_rate": 1.7176048710793651e-06, "loss": 0.5958, "step": 2103 }, { "epoch": 0.27, "grad_norm": 0.8350758247524337, "learning_rate": 1.7173174116341596e-06, "loss": 0.6172, "step": 2104 }, { "epoch": 0.27, "grad_norm": 0.9668490416955452, "learning_rate": 1.7170298300372716e-06, "loss": 0.6661, "step": 2105 }, { "epoch": 0.27, "grad_norm": 0.6326829767314572, "learning_rate": 1.7167421263376736e-06, "loss": 0.4648, "step": 2106 }, { "epoch": 0.27, "grad_norm": 0.6216500110967164, "learning_rate": 1.7164543005843585e-06, "loss": 0.5154, "step": 2107 }, { "epoch": 0.27, "grad_norm": 0.9075321451175721, "learning_rate": 1.7161663528263398e-06, "loss": 0.6852, "step": 2108 }, { "epoch": 0.27, "grad_norm": 0.5629755789408005, "learning_rate": 1.7158782831126525e-06, "loss": 0.4656, "step": 2109 }, { "epoch": 0.27, "grad_norm": 0.6178415172064557, "learning_rate": 1.7155900914923515e-06, "loss": 0.5403, "step": 2110 }, { "epoch": 0.27, "grad_norm": 0.8607887958474726, "learning_rate": 1.7153017780145126e-06, "loss": 0.6229, "step": 2111 }, { "epoch": 0.27, "grad_norm": 0.7276456747468057, "learning_rate": 1.7150133427282332e-06, "loss": 0.5528, "step": 2112 }, { "epoch": 0.27, "grad_norm": 0.9511762620559335, "learning_rate": 1.7147247856826303e-06, "loss": 0.6678, "step": 2113 }, { "epoch": 0.27, "grad_norm": 0.7305804001135221, "learning_rate": 1.7144361069268425e-06, "loss": 0.5621, "step": 2114 }, { "epoch": 0.27, "grad_norm": 0.7952042001228986, "learning_rate": 1.7141473065100283e-06, "loss": 0.5323, "step": 2115 }, { "epoch": 0.27, "grad_norm": 0.8422396313309024, "learning_rate": 1.7138583844813678e-06, "loss": 0.671, "step": 2116 }, { "epoch": 0.27, "grad_norm": 0.8628427171623998, "learning_rate": 1.7135693408900615e-06, "loss": 0.6349, "step": 2117 }, { "epoch": 0.27, "grad_norm": 0.5829056475092363, "learning_rate": 1.7132801757853304e-06, "loss": 0.5336, "step": 2118 }, { "epoch": 0.27, "grad_norm": 0.7426735022617582, "learning_rate": 1.7129908892164161e-06, "loss": 0.5609, "step": 2119 }, { "epoch": 0.27, "grad_norm": 0.7630862592830553, "learning_rate": 1.7127014812325816e-06, "loss": 0.6093, "step": 2120 }, { "epoch": 0.27, "grad_norm": 0.8410352422783963, "learning_rate": 1.7124119518831095e-06, "loss": 0.6607, "step": 2121 }, { "epoch": 0.27, "grad_norm": 0.6977230046435009, "learning_rate": 1.7121223012173035e-06, "loss": 0.5645, "step": 2122 }, { "epoch": 0.27, "grad_norm": 0.9439437599152608, "learning_rate": 1.711832529284489e-06, "loss": 0.623, "step": 2123 }, { "epoch": 0.27, "grad_norm": 0.8083589046331039, "learning_rate": 1.7115426361340103e-06, "loss": 0.5622, "step": 2124 }, { "epoch": 0.27, "grad_norm": 0.6963257663638225, "learning_rate": 1.7112526218152334e-06, "loss": 0.5976, "step": 2125 }, { "epoch": 0.27, "grad_norm": 1.07970299910784, "learning_rate": 1.710962486377545e-06, "loss": 0.649, "step": 2126 }, { "epoch": 0.27, "grad_norm": 0.8541522078462299, "learning_rate": 1.7106722298703517e-06, "loss": 0.5869, "step": 2127 }, { "epoch": 0.27, "grad_norm": 0.7878792812618934, "learning_rate": 1.7103818523430814e-06, "loss": 0.589, "step": 2128 }, { "epoch": 0.27, "grad_norm": 0.8478693557248129, "learning_rate": 1.7100913538451826e-06, "loss": 0.5821, "step": 2129 }, { "epoch": 0.27, "grad_norm": 0.9474649894452436, "learning_rate": 1.7098007344261237e-06, "loss": 0.7052, "step": 2130 }, { "epoch": 0.27, "grad_norm": 0.8630035329237031, "learning_rate": 1.7095099941353943e-06, "loss": 0.6952, "step": 2131 }, { "epoch": 0.27, "grad_norm": 0.6534126745300457, "learning_rate": 1.7092191330225045e-06, "loss": 0.5817, "step": 2132 }, { "epoch": 0.27, "grad_norm": 4.963705659323309, "learning_rate": 1.7089281511369854e-06, "loss": 0.6528, "step": 2133 }, { "epoch": 0.27, "grad_norm": 0.7096005633145203, "learning_rate": 1.7086370485283874e-06, "loss": 0.5686, "step": 2134 }, { "epoch": 0.27, "grad_norm": 0.8818279876651732, "learning_rate": 1.7083458252462824e-06, "loss": 0.6102, "step": 2135 }, { "epoch": 0.27, "grad_norm": 1.028091000548899, "learning_rate": 1.708054481340263e-06, "loss": 0.672, "step": 2136 }, { "epoch": 0.27, "grad_norm": 0.796414800709641, "learning_rate": 1.707763016859942e-06, "loss": 0.5378, "step": 2137 }, { "epoch": 0.27, "grad_norm": 0.7351851921067623, "learning_rate": 1.7074714318549525e-06, "loss": 0.572, "step": 2138 }, { "epoch": 0.27, "grad_norm": 0.9407748319376167, "learning_rate": 1.7071797263749485e-06, "loss": 0.6571, "step": 2139 }, { "epoch": 0.27, "grad_norm": 0.7340355185471747, "learning_rate": 1.7068879004696047e-06, "loss": 0.5459, "step": 2140 }, { "epoch": 0.27, "grad_norm": 0.8183131026333355, "learning_rate": 1.7065959541886156e-06, "loss": 0.6309, "step": 2141 }, { "epoch": 0.27, "grad_norm": 0.6822109820329837, "learning_rate": 1.7063038875816966e-06, "loss": 0.54, "step": 2142 }, { "epoch": 0.27, "grad_norm": 0.6513381943331386, "learning_rate": 1.7060117006985842e-06, "loss": 0.5288, "step": 2143 }, { "epoch": 0.27, "grad_norm": 0.7558075510653973, "learning_rate": 1.7057193935890339e-06, "loss": 0.6757, "step": 2144 }, { "epoch": 0.27, "grad_norm": 0.9018917721839882, "learning_rate": 1.7054269663028232e-06, "loss": 0.674, "step": 2145 }, { "epoch": 0.27, "grad_norm": 0.9186939348802294, "learning_rate": 1.7051344188897491e-06, "loss": 0.5707, "step": 2146 }, { "epoch": 0.27, "grad_norm": 0.8249037723100083, "learning_rate": 1.7048417513996296e-06, "loss": 0.5581, "step": 2147 }, { "epoch": 0.27, "grad_norm": 0.9342802725875994, "learning_rate": 1.704548963882303e-06, "loss": 0.6334, "step": 2148 }, { "epoch": 0.27, "grad_norm": 0.8866595308803102, "learning_rate": 1.7042560563876277e-06, "loss": 0.6711, "step": 2149 }, { "epoch": 0.27, "grad_norm": 0.7065738011302267, "learning_rate": 1.7039630289654827e-06, "loss": 0.5646, "step": 2150 }, { "epoch": 0.27, "grad_norm": 0.7656869103137477, "learning_rate": 1.7036698816657677e-06, "loss": 0.6056, "step": 2151 }, { "epoch": 0.27, "grad_norm": 0.7387149381313486, "learning_rate": 1.7033766145384027e-06, "loss": 0.5454, "step": 2152 }, { "epoch": 0.27, "grad_norm": 1.0575021119036667, "learning_rate": 1.703083227633328e-06, "loss": 0.6513, "step": 2153 }, { "epoch": 0.27, "grad_norm": 0.9676258533584826, "learning_rate": 1.7027897210005045e-06, "loss": 0.6428, "step": 2154 }, { "epoch": 0.27, "grad_norm": 0.7409357932862124, "learning_rate": 1.7024960946899128e-06, "loss": 0.5933, "step": 2155 }, { "epoch": 0.27, "grad_norm": 0.6695974485071609, "learning_rate": 1.7022023487515548e-06, "loss": 0.5797, "step": 2156 }, { "epoch": 0.27, "grad_norm": 1.0754988509306311, "learning_rate": 1.7019084832354524e-06, "loss": 0.615, "step": 2157 }, { "epoch": 0.27, "grad_norm": 0.7124817761920776, "learning_rate": 1.7016144981916476e-06, "loss": 0.5159, "step": 2158 }, { "epoch": 0.28, "grad_norm": 0.7691015840482227, "learning_rate": 1.7013203936702035e-06, "loss": 0.5417, "step": 2159 }, { "epoch": 0.28, "grad_norm": 0.839744772127702, "learning_rate": 1.7010261697212022e-06, "loss": 0.6657, "step": 2160 }, { "epoch": 0.28, "grad_norm": 0.990325147278458, "learning_rate": 1.7007318263947478e-06, "loss": 0.6372, "step": 2161 }, { "epoch": 0.28, "grad_norm": 0.7801825037429803, "learning_rate": 1.7004373637409632e-06, "loss": 0.6368, "step": 2162 }, { "epoch": 0.28, "grad_norm": 0.8370609305765528, "learning_rate": 1.7001427818099927e-06, "loss": 0.6605, "step": 2163 }, { "epoch": 0.28, "grad_norm": 1.2888462949319373, "learning_rate": 1.6998480806520008e-06, "loss": 0.6666, "step": 2164 }, { "epoch": 0.28, "grad_norm": 0.7252536869720716, "learning_rate": 1.6995532603171716e-06, "loss": 0.5664, "step": 2165 }, { "epoch": 0.28, "grad_norm": 0.8545147508933769, "learning_rate": 1.69925832085571e-06, "loss": 0.6451, "step": 2166 }, { "epoch": 0.28, "grad_norm": 0.8812759949943069, "learning_rate": 1.6989632623178412e-06, "loss": 0.6652, "step": 2167 }, { "epoch": 0.28, "grad_norm": 0.6882869846149489, "learning_rate": 1.6986680847538104e-06, "loss": 0.5652, "step": 2168 }, { "epoch": 0.28, "grad_norm": 0.8762816365596262, "learning_rate": 1.6983727882138833e-06, "loss": 0.65, "step": 2169 }, { "epoch": 0.28, "grad_norm": 0.658497060245278, "learning_rate": 1.6980773727483464e-06, "loss": 0.5033, "step": 2170 }, { "epoch": 0.28, "grad_norm": 0.8674016520601762, "learning_rate": 1.6977818384075053e-06, "loss": 0.5227, "step": 2171 }, { "epoch": 0.28, "grad_norm": 0.7927083404164387, "learning_rate": 1.6974861852416867e-06, "loss": 0.6301, "step": 2172 }, { "epoch": 0.28, "grad_norm": 0.6360577575131724, "learning_rate": 1.6971904133012368e-06, "loss": 0.5262, "step": 2173 }, { "epoch": 0.28, "grad_norm": 0.820140594018224, "learning_rate": 1.696894522636523e-06, "loss": 0.6283, "step": 2174 }, { "epoch": 0.28, "grad_norm": 0.8620870491879562, "learning_rate": 1.6965985132979322e-06, "loss": 0.6135, "step": 2175 }, { "epoch": 0.28, "grad_norm": 0.652693623692442, "learning_rate": 1.6963023853358715e-06, "loss": 0.5596, "step": 2176 }, { "epoch": 0.28, "grad_norm": 0.7554056127491318, "learning_rate": 1.6960061388007686e-06, "loss": 0.6226, "step": 2177 }, { "epoch": 0.28, "grad_norm": 0.7479137007145724, "learning_rate": 1.6957097737430714e-06, "loss": 0.6069, "step": 2178 }, { "epoch": 0.28, "grad_norm": 0.8956987391539133, "learning_rate": 1.6954132902132476e-06, "loss": 0.6982, "step": 2179 }, { "epoch": 0.28, "grad_norm": 0.8309677659853003, "learning_rate": 1.6951166882617853e-06, "loss": 0.6087, "step": 2180 }, { "epoch": 0.28, "grad_norm": 0.6053209532532329, "learning_rate": 1.6948199679391926e-06, "loss": 0.5292, "step": 2181 }, { "epoch": 0.28, "grad_norm": 0.7523583404994801, "learning_rate": 1.6945231292959975e-06, "loss": 0.5881, "step": 2182 }, { "epoch": 0.28, "grad_norm": 0.7684195868276319, "learning_rate": 1.6942261723827495e-06, "loss": 0.5566, "step": 2183 }, { "epoch": 0.28, "grad_norm": 0.8987275600204758, "learning_rate": 1.6939290972500166e-06, "loss": 0.6278, "step": 2184 }, { "epoch": 0.28, "grad_norm": 0.6671804008480465, "learning_rate": 1.6936319039483876e-06, "loss": 0.5623, "step": 2185 }, { "epoch": 0.28, "grad_norm": 0.8694903380476243, "learning_rate": 1.6933345925284718e-06, "loss": 0.6667, "step": 2186 }, { "epoch": 0.28, "grad_norm": 0.9155380878028245, "learning_rate": 1.6930371630408979e-06, "loss": 0.6425, "step": 2187 }, { "epoch": 0.28, "grad_norm": 0.7419916258652419, "learning_rate": 1.6927396155363151e-06, "loss": 0.5502, "step": 2188 }, { "epoch": 0.28, "grad_norm": 0.732808851770632, "learning_rate": 1.6924419500653927e-06, "loss": 0.6347, "step": 2189 }, { "epoch": 0.28, "grad_norm": 0.7653711880219417, "learning_rate": 1.6921441666788198e-06, "loss": 0.64, "step": 2190 }, { "epoch": 0.28, "grad_norm": 0.6934293603697798, "learning_rate": 1.6918462654273062e-06, "loss": 0.5029, "step": 2191 }, { "epoch": 0.28, "grad_norm": 1.0112695200560367, "learning_rate": 1.6915482463615813e-06, "loss": 0.6946, "step": 2192 }, { "epoch": 0.28, "grad_norm": 0.738589792174627, "learning_rate": 1.6912501095323943e-06, "loss": 0.5959, "step": 2193 }, { "epoch": 0.28, "grad_norm": 0.8591417693415365, "learning_rate": 1.6909518549905153e-06, "loss": 0.5827, "step": 2194 }, { "epoch": 0.28, "grad_norm": 0.6855854364045008, "learning_rate": 1.6906534827867335e-06, "loss": 0.517, "step": 2195 }, { "epoch": 0.28, "grad_norm": 0.6330346039478522, "learning_rate": 1.690354992971859e-06, "loss": 0.5681, "step": 2196 }, { "epoch": 0.28, "grad_norm": 0.7019132553112578, "learning_rate": 1.6900563855967212e-06, "loss": 0.5604, "step": 2197 }, { "epoch": 0.28, "grad_norm": 0.6539614271170227, "learning_rate": 1.6897576607121697e-06, "loss": 0.5317, "step": 2198 }, { "epoch": 0.28, "grad_norm": 0.8485886921396372, "learning_rate": 1.6894588183690745e-06, "loss": 0.636, "step": 2199 }, { "epoch": 0.28, "grad_norm": 0.6416593480921033, "learning_rate": 1.6891598586183253e-06, "loss": 0.507, "step": 2200 }, { "epoch": 0.28, "grad_norm": 0.7501176460034265, "learning_rate": 1.6888607815108318e-06, "loss": 0.5204, "step": 2201 }, { "epoch": 0.28, "grad_norm": 0.7224924742172312, "learning_rate": 1.6885615870975237e-06, "loss": 0.5474, "step": 2202 }, { "epoch": 0.28, "grad_norm": 0.6771364641228486, "learning_rate": 1.6882622754293507e-06, "loss": 0.5473, "step": 2203 }, { "epoch": 0.28, "grad_norm": 0.7022359759512, "learning_rate": 1.6879628465572823e-06, "loss": 0.5482, "step": 2204 }, { "epoch": 0.28, "grad_norm": 0.7317940359312319, "learning_rate": 1.6876633005323083e-06, "loss": 0.5889, "step": 2205 }, { "epoch": 0.28, "grad_norm": 0.7363327691843138, "learning_rate": 1.6873636374054385e-06, "loss": 0.558, "step": 2206 }, { "epoch": 0.28, "grad_norm": 0.8099894789713618, "learning_rate": 1.6870638572277021e-06, "loss": 0.6176, "step": 2207 }, { "epoch": 0.28, "grad_norm": 0.8634233915749285, "learning_rate": 1.6867639600501486e-06, "loss": 0.6528, "step": 2208 }, { "epoch": 0.28, "grad_norm": 0.8468848127915406, "learning_rate": 1.6864639459238472e-06, "loss": 0.6448, "step": 2209 }, { "epoch": 0.28, "grad_norm": 0.6383206096613858, "learning_rate": 1.6861638148998872e-06, "loss": 0.5535, "step": 2210 }, { "epoch": 0.28, "grad_norm": 0.6960432536249203, "learning_rate": 1.6858635670293784e-06, "loss": 0.5437, "step": 2211 }, { "epoch": 0.28, "grad_norm": 0.6220077639527575, "learning_rate": 1.6855632023634493e-06, "loss": 0.5992, "step": 2212 }, { "epoch": 0.28, "grad_norm": 0.9292095849828487, "learning_rate": 1.6852627209532484e-06, "loss": 0.6438, "step": 2213 }, { "epoch": 0.28, "grad_norm": 0.8107418055082478, "learning_rate": 1.6849621228499457e-06, "loss": 0.6005, "step": 2214 }, { "epoch": 0.28, "grad_norm": 0.6527167311307057, "learning_rate": 1.6846614081047292e-06, "loss": 0.4994, "step": 2215 }, { "epoch": 0.28, "grad_norm": 0.7091420183396735, "learning_rate": 1.6843605767688076e-06, "loss": 0.5786, "step": 2216 }, { "epoch": 0.28, "grad_norm": 0.6839856739860061, "learning_rate": 1.6840596288934097e-06, "loss": 0.5489, "step": 2217 }, { "epoch": 0.28, "grad_norm": 1.025710684817184, "learning_rate": 1.6837585645297832e-06, "loss": 0.6287, "step": 2218 }, { "epoch": 0.28, "grad_norm": 0.6923564110321747, "learning_rate": 1.6834573837291966e-06, "loss": 0.5646, "step": 2219 }, { "epoch": 0.28, "grad_norm": 0.787324540757855, "learning_rate": 1.6831560865429375e-06, "loss": 0.7215, "step": 2220 }, { "epoch": 0.28, "grad_norm": 0.8933669440639492, "learning_rate": 1.6828546730223143e-06, "loss": 0.6658, "step": 2221 }, { "epoch": 0.28, "grad_norm": 0.6783579195856242, "learning_rate": 1.682553143218654e-06, "loss": 0.5428, "step": 2222 }, { "epoch": 0.28, "grad_norm": 0.8570040110131358, "learning_rate": 1.6822514971833042e-06, "loss": 0.6425, "step": 2223 }, { "epoch": 0.28, "grad_norm": 0.6936390810707274, "learning_rate": 1.6819497349676324e-06, "loss": 0.5189, "step": 2224 }, { "epoch": 0.28, "grad_norm": 0.8854234399893237, "learning_rate": 1.6816478566230248e-06, "loss": 0.6357, "step": 2225 }, { "epoch": 0.28, "grad_norm": 0.7097785662217849, "learning_rate": 1.6813458622008887e-06, "loss": 0.5515, "step": 2226 }, { "epoch": 0.28, "grad_norm": 0.7523741797678828, "learning_rate": 1.6810437517526505e-06, "loss": 0.572, "step": 2227 }, { "epoch": 0.28, "grad_norm": 0.9036820917288725, "learning_rate": 1.6807415253297563e-06, "loss": 0.6496, "step": 2228 }, { "epoch": 0.28, "grad_norm": 0.8331417571658979, "learning_rate": 1.6804391829836726e-06, "loss": 0.6016, "step": 2229 }, { "epoch": 0.28, "grad_norm": 0.9526594615281615, "learning_rate": 1.6801367247658847e-06, "loss": 0.692, "step": 2230 }, { "epoch": 0.28, "grad_norm": 0.8370655857106266, "learning_rate": 1.6798341507278982e-06, "loss": 0.6133, "step": 2231 }, { "epoch": 0.28, "grad_norm": 0.8143533186848736, "learning_rate": 1.6795314609212379e-06, "loss": 0.6026, "step": 2232 }, { "epoch": 0.28, "grad_norm": 0.7100219613158313, "learning_rate": 1.6792286553974493e-06, "loss": 0.542, "step": 2233 }, { "epoch": 0.28, "grad_norm": 0.6208700582065056, "learning_rate": 1.678925734208097e-06, "loss": 0.514, "step": 2234 }, { "epoch": 0.28, "grad_norm": 0.6913565943684202, "learning_rate": 1.6786226974047653e-06, "loss": 0.5515, "step": 2235 }, { "epoch": 0.28, "grad_norm": 0.7052182927874939, "learning_rate": 1.678319545039058e-06, "loss": 0.5403, "step": 2236 }, { "epoch": 0.28, "grad_norm": 0.9051046305580936, "learning_rate": 1.6780162771625985e-06, "loss": 0.6663, "step": 2237 }, { "epoch": 0.29, "grad_norm": 0.7315610295571439, "learning_rate": 1.6777128938270308e-06, "loss": 0.5223, "step": 2238 }, { "epoch": 0.29, "grad_norm": 0.8231513082580345, "learning_rate": 1.6774093950840175e-06, "loss": 0.6256, "step": 2239 }, { "epoch": 0.29, "grad_norm": 0.8845994669674028, "learning_rate": 1.6771057809852416e-06, "loss": 0.5548, "step": 2240 }, { "epoch": 0.29, "grad_norm": 0.6945980589547308, "learning_rate": 1.6768020515824051e-06, "loss": 0.553, "step": 2241 }, { "epoch": 0.29, "grad_norm": 0.6577207950136928, "learning_rate": 1.67649820692723e-06, "loss": 0.559, "step": 2242 }, { "epoch": 0.29, "grad_norm": 0.6085064589394716, "learning_rate": 1.6761942470714582e-06, "loss": 0.524, "step": 2243 }, { "epoch": 0.29, "grad_norm": 1.8926260411514289, "learning_rate": 1.6758901720668504e-06, "loss": 0.6265, "step": 2244 }, { "epoch": 0.29, "grad_norm": 0.8207606742559838, "learning_rate": 1.675585981965188e-06, "loss": 0.529, "step": 2245 }, { "epoch": 0.29, "grad_norm": 0.8049246284630345, "learning_rate": 1.6752816768182708e-06, "loss": 0.6373, "step": 2246 }, { "epoch": 0.29, "grad_norm": 0.8578391069480552, "learning_rate": 1.6749772566779191e-06, "loss": 0.6124, "step": 2247 }, { "epoch": 0.29, "grad_norm": 0.7511569100479492, "learning_rate": 1.6746727215959729e-06, "loss": 0.5504, "step": 2248 }, { "epoch": 0.29, "grad_norm": 0.7736810626021798, "learning_rate": 1.6743680716242903e-06, "loss": 0.5563, "step": 2249 }, { "epoch": 0.29, "grad_norm": 0.6583082914792537, "learning_rate": 1.6740633068147507e-06, "loss": 0.5874, "step": 2250 }, { "epoch": 0.29, "grad_norm": 0.72117599210976, "learning_rate": 1.6737584272192526e-06, "loss": 0.5713, "step": 2251 }, { "epoch": 0.29, "grad_norm": 0.9200312396943667, "learning_rate": 1.6734534328897135e-06, "loss": 0.6364, "step": 2252 }, { "epoch": 0.29, "grad_norm": 0.6207388513138897, "learning_rate": 1.6731483238780703e-06, "loss": 0.5673, "step": 2253 }, { "epoch": 0.29, "grad_norm": 0.6309643338798087, "learning_rate": 1.6728431002362809e-06, "loss": 0.5518, "step": 2254 }, { "epoch": 0.29, "grad_norm": 0.7768519097348239, "learning_rate": 1.672537762016321e-06, "loss": 0.5521, "step": 2255 }, { "epoch": 0.29, "grad_norm": 0.6849134614583908, "learning_rate": 1.6722323092701864e-06, "loss": 0.4893, "step": 2256 }, { "epoch": 0.29, "grad_norm": 0.6725543501430364, "learning_rate": 1.671926742049893e-06, "loss": 0.5008, "step": 2257 }, { "epoch": 0.29, "grad_norm": 0.6851289843771293, "learning_rate": 1.6716210604074755e-06, "loss": 0.6357, "step": 2258 }, { "epoch": 0.29, "grad_norm": 0.8675747215822092, "learning_rate": 1.6713152643949883e-06, "loss": 0.5649, "step": 2259 }, { "epoch": 0.29, "grad_norm": 0.9673955956468508, "learning_rate": 1.6710093540645054e-06, "loss": 0.5983, "step": 2260 }, { "epoch": 0.29, "grad_norm": 0.964842357696227, "learning_rate": 1.6707033294681198e-06, "loss": 0.6021, "step": 2261 }, { "epoch": 0.29, "grad_norm": 0.6928061079172201, "learning_rate": 1.6703971906579448e-06, "loss": 0.5774, "step": 2262 }, { "epoch": 0.29, "grad_norm": 0.9382228022663371, "learning_rate": 1.6700909376861123e-06, "loss": 0.6263, "step": 2263 }, { "epoch": 0.29, "grad_norm": 0.8421502593237556, "learning_rate": 1.669784570604774e-06, "loss": 0.5932, "step": 2264 }, { "epoch": 0.29, "grad_norm": 0.6425987452711236, "learning_rate": 1.6694780894661012e-06, "loss": 0.5011, "step": 2265 }, { "epoch": 0.29, "grad_norm": 0.7526716561326535, "learning_rate": 1.6691714943222844e-06, "loss": 0.5227, "step": 2266 }, { "epoch": 0.29, "grad_norm": 0.7221312555383687, "learning_rate": 1.6688647852255332e-06, "loss": 0.5323, "step": 2267 }, { "epoch": 0.29, "grad_norm": 0.8119916887480131, "learning_rate": 1.6685579622280774e-06, "loss": 0.5432, "step": 2268 }, { "epoch": 0.29, "grad_norm": 0.7171680935434755, "learning_rate": 1.6682510253821653e-06, "loss": 0.5566, "step": 2269 }, { "epoch": 0.29, "grad_norm": 0.7719739126137437, "learning_rate": 1.6679439747400658e-06, "loss": 0.639, "step": 2270 }, { "epoch": 0.29, "grad_norm": 0.8923696052523371, "learning_rate": 1.667636810354066e-06, "loss": 0.6324, "step": 2271 }, { "epoch": 0.29, "grad_norm": 0.7345210794312194, "learning_rate": 1.6673295322764727e-06, "loss": 0.6282, "step": 2272 }, { "epoch": 0.29, "grad_norm": 0.7436414393996811, "learning_rate": 1.6670221405596122e-06, "loss": 0.5206, "step": 2273 }, { "epoch": 0.29, "grad_norm": 0.8816672301127467, "learning_rate": 1.6667146352558301e-06, "loss": 0.5875, "step": 2274 }, { "epoch": 0.29, "grad_norm": 0.7447957577804639, "learning_rate": 1.6664070164174912e-06, "loss": 0.5688, "step": 2275 }, { "epoch": 0.29, "grad_norm": 0.8616723679693312, "learning_rate": 1.6660992840969806e-06, "loss": 0.5899, "step": 2276 }, { "epoch": 0.29, "grad_norm": 0.7858703729609936, "learning_rate": 1.6657914383467006e-06, "loss": 0.6384, "step": 2277 }, { "epoch": 0.29, "grad_norm": 0.8024283384801922, "learning_rate": 1.6654834792190752e-06, "loss": 0.6317, "step": 2278 }, { "epoch": 0.29, "grad_norm": 0.8712190824907251, "learning_rate": 1.6651754067665463e-06, "loss": 0.689, "step": 2279 }, { "epoch": 0.29, "grad_norm": 0.8997270818758489, "learning_rate": 1.6648672210415753e-06, "loss": 0.6101, "step": 2280 }, { "epoch": 0.29, "grad_norm": 0.8029948043294698, "learning_rate": 1.664558922096643e-06, "loss": 0.6127, "step": 2281 }, { "epoch": 0.29, "grad_norm": 1.0726666760859815, "learning_rate": 1.6642505099842494e-06, "loss": 0.6659, "step": 2282 }, { "epoch": 0.29, "grad_norm": 0.7398779796528825, "learning_rate": 1.6639419847569145e-06, "loss": 0.5747, "step": 2283 }, { "epoch": 0.29, "grad_norm": 0.8316765691468839, "learning_rate": 1.6636333464671762e-06, "loss": 0.6217, "step": 2284 }, { "epoch": 0.29, "grad_norm": 0.8852604912307752, "learning_rate": 1.6633245951675929e-06, "loss": 0.6752, "step": 2285 }, { "epoch": 0.29, "grad_norm": 0.803011091492394, "learning_rate": 1.6630157309107414e-06, "loss": 0.6076, "step": 2286 }, { "epoch": 0.29, "grad_norm": 0.6817290443298669, "learning_rate": 1.662706753749218e-06, "loss": 0.5343, "step": 2287 }, { "epoch": 0.29, "grad_norm": 0.917512307434402, "learning_rate": 1.6623976637356386e-06, "loss": 0.5907, "step": 2288 }, { "epoch": 0.29, "grad_norm": 0.7541601285437791, "learning_rate": 1.662088460922638e-06, "loss": 0.5925, "step": 2289 }, { "epoch": 0.29, "grad_norm": 0.8691287099435786, "learning_rate": 1.66177914536287e-06, "loss": 0.5871, "step": 2290 }, { "epoch": 0.29, "grad_norm": 0.6689808027758937, "learning_rate": 1.6614697171090079e-06, "loss": 0.5408, "step": 2291 }, { "epoch": 0.29, "grad_norm": 0.7006442914430474, "learning_rate": 1.6611601762137441e-06, "loss": 0.6893, "step": 2292 }, { "epoch": 0.29, "grad_norm": 0.9187981664622983, "learning_rate": 1.66085052272979e-06, "loss": 0.6508, "step": 2293 }, { "epoch": 0.29, "grad_norm": 0.8936349090826531, "learning_rate": 1.660540756709877e-06, "loss": 0.6993, "step": 2294 }, { "epoch": 0.29, "grad_norm": 0.809402071303986, "learning_rate": 1.6602308782067542e-06, "loss": 0.6045, "step": 2295 }, { "epoch": 0.29, "grad_norm": 1.0191718764605853, "learning_rate": 1.6599208872731913e-06, "loss": 0.5613, "step": 2296 }, { "epoch": 0.29, "grad_norm": 0.6462000918388331, "learning_rate": 1.6596107839619764e-06, "loss": 0.5177, "step": 2297 }, { "epoch": 0.29, "grad_norm": 0.7818637801795143, "learning_rate": 1.6593005683259166e-06, "loss": 0.5711, "step": 2298 }, { "epoch": 0.29, "grad_norm": 0.7832499060108764, "learning_rate": 1.6589902404178387e-06, "loss": 0.6449, "step": 2299 }, { "epoch": 0.29, "grad_norm": 0.7167426861659866, "learning_rate": 1.6586798002905879e-06, "loss": 0.4956, "step": 2300 }, { "epoch": 0.29, "grad_norm": 0.7981649902841359, "learning_rate": 1.6583692479970296e-06, "loss": 0.5745, "step": 2301 }, { "epoch": 0.29, "grad_norm": 1.0394544041561944, "learning_rate": 1.6580585835900468e-06, "loss": 0.6887, "step": 2302 }, { "epoch": 0.29, "grad_norm": 0.7675248187917575, "learning_rate": 1.6577478071225431e-06, "loss": 0.6302, "step": 2303 }, { "epoch": 0.29, "grad_norm": 0.6444274444867941, "learning_rate": 1.6574369186474399e-06, "loss": 0.5546, "step": 2304 }, { "epoch": 0.29, "grad_norm": 0.6841058641790826, "learning_rate": 1.657125918217679e-06, "loss": 0.5303, "step": 2305 }, { "epoch": 0.29, "grad_norm": 0.8565966713599275, "learning_rate": 1.6568148058862196e-06, "loss": 0.5888, "step": 2306 }, { "epoch": 0.29, "grad_norm": 0.6317157520018506, "learning_rate": 1.6565035817060417e-06, "loss": 0.4773, "step": 2307 }, { "epoch": 0.29, "grad_norm": 0.750669986215123, "learning_rate": 1.6561922457301433e-06, "loss": 0.5648, "step": 2308 }, { "epoch": 0.29, "grad_norm": 0.6503446444656306, "learning_rate": 1.6558807980115414e-06, "loss": 0.5903, "step": 2309 }, { "epoch": 0.29, "grad_norm": 0.7782256286129265, "learning_rate": 1.6555692386032728e-06, "loss": 0.6047, "step": 2310 }, { "epoch": 0.29, "grad_norm": 0.8272628479115911, "learning_rate": 1.6552575675583925e-06, "loss": 0.5708, "step": 2311 }, { "epoch": 0.29, "grad_norm": 0.7166399877044437, "learning_rate": 1.654945784929975e-06, "loss": 0.521, "step": 2312 }, { "epoch": 0.29, "grad_norm": 0.6774154561178422, "learning_rate": 1.654633890771113e-06, "loss": 0.5682, "step": 2313 }, { "epoch": 0.29, "grad_norm": 0.6533579453885231, "learning_rate": 1.6543218851349197e-06, "loss": 0.5088, "step": 2314 }, { "epoch": 0.29, "grad_norm": 0.6312431647770271, "learning_rate": 1.654009768074526e-06, "loss": 0.5896, "step": 2315 }, { "epoch": 0.3, "grad_norm": 0.7387827647138251, "learning_rate": 1.6536975396430818e-06, "loss": 0.5641, "step": 2316 }, { "epoch": 0.3, "grad_norm": 0.7239294104014358, "learning_rate": 1.6533851998937573e-06, "loss": 0.5516, "step": 2317 }, { "epoch": 0.3, "grad_norm": 0.6570674184915435, "learning_rate": 1.65307274887974e-06, "loss": 0.5285, "step": 2318 }, { "epoch": 0.3, "grad_norm": 0.6624897315764817, "learning_rate": 1.652760186654237e-06, "loss": 0.5046, "step": 2319 }, { "epoch": 0.3, "grad_norm": 0.6657928223274541, "learning_rate": 1.6524475132704751e-06, "loss": 0.4946, "step": 2320 }, { "epoch": 0.3, "grad_norm": 0.8645205927978623, "learning_rate": 1.6521347287816988e-06, "loss": 0.6241, "step": 2321 }, { "epoch": 0.3, "grad_norm": 0.589597953878077, "learning_rate": 1.6518218332411718e-06, "loss": 0.5053, "step": 2322 }, { "epoch": 0.3, "grad_norm": 0.7594268110390386, "learning_rate": 1.651508826702177e-06, "loss": 0.5863, "step": 2323 }, { "epoch": 0.3, "grad_norm": 1.2129293039840563, "learning_rate": 1.651195709218017e-06, "loss": 0.6448, "step": 2324 }, { "epoch": 0.3, "grad_norm": 0.7119140943137071, "learning_rate": 1.6508824808420112e-06, "loss": 0.4943, "step": 2325 }, { "epoch": 0.3, "grad_norm": 0.6363155883333226, "learning_rate": 1.6505691416275003e-06, "loss": 0.5251, "step": 2326 }, { "epoch": 0.3, "grad_norm": 1.1308335713379853, "learning_rate": 1.6502556916278418e-06, "loss": 0.6279, "step": 2327 }, { "epoch": 0.3, "grad_norm": 0.8068985149723921, "learning_rate": 1.6499421308964135e-06, "loss": 0.6217, "step": 2328 }, { "epoch": 0.3, "grad_norm": 0.8901190320921132, "learning_rate": 1.6496284594866112e-06, "loss": 0.6038, "step": 2329 }, { "epoch": 0.3, "grad_norm": 0.7626283782749874, "learning_rate": 1.6493146774518506e-06, "loss": 0.5759, "step": 2330 }, { "epoch": 0.3, "grad_norm": 0.7799676810791333, "learning_rate": 1.649000784845564e-06, "loss": 0.5832, "step": 2331 }, { "epoch": 0.3, "grad_norm": 0.6360549269754372, "learning_rate": 1.6486867817212056e-06, "loss": 0.5435, "step": 2332 }, { "epoch": 0.3, "grad_norm": 0.7779230395910909, "learning_rate": 1.6483726681322461e-06, "loss": 0.6128, "step": 2333 }, { "epoch": 0.3, "grad_norm": 0.6112762446758263, "learning_rate": 1.648058444132176e-06, "loss": 0.5072, "step": 2334 }, { "epoch": 0.3, "grad_norm": 0.9881742657538033, "learning_rate": 1.6477441097745042e-06, "loss": 0.6259, "step": 2335 }, { "epoch": 0.3, "grad_norm": 0.7780897752507852, "learning_rate": 1.6474296651127588e-06, "loss": 0.6123, "step": 2336 }, { "epoch": 0.3, "grad_norm": 0.6749877450734497, "learning_rate": 1.647115110200486e-06, "loss": 0.5516, "step": 2337 }, { "epoch": 0.3, "grad_norm": 0.6851667217340366, "learning_rate": 1.6468004450912516e-06, "loss": 0.5948, "step": 2338 }, { "epoch": 0.3, "grad_norm": 0.7570637587155784, "learning_rate": 1.64648566983864e-06, "loss": 0.506, "step": 2339 }, { "epoch": 0.3, "grad_norm": 0.7578852869872799, "learning_rate": 1.6461707844962539e-06, "loss": 0.6284, "step": 2340 }, { "epoch": 0.3, "grad_norm": 0.9736562576339315, "learning_rate": 1.6458557891177145e-06, "loss": 0.672, "step": 2341 }, { "epoch": 0.3, "grad_norm": 0.7911551009825062, "learning_rate": 1.645540683756663e-06, "loss": 0.565, "step": 2342 }, { "epoch": 0.3, "grad_norm": 0.7337107681992371, "learning_rate": 1.6452254684667581e-06, "loss": 0.5699, "step": 2343 }, { "epoch": 0.3, "grad_norm": 1.0215494421752733, "learning_rate": 1.644910143301678e-06, "loss": 0.696, "step": 2344 }, { "epoch": 0.3, "grad_norm": 1.0468356409979203, "learning_rate": 1.644594708315119e-06, "loss": 0.6238, "step": 2345 }, { "epoch": 0.3, "grad_norm": 0.7596438252754523, "learning_rate": 1.6442791635607964e-06, "loss": 0.5861, "step": 2346 }, { "epoch": 0.3, "grad_norm": 0.7256526369079759, "learning_rate": 1.6439635090924445e-06, "loss": 0.5942, "step": 2347 }, { "epoch": 0.3, "grad_norm": 1.0619954182828173, "learning_rate": 1.6436477449638155e-06, "loss": 0.6431, "step": 2348 }, { "epoch": 0.3, "grad_norm": 0.7237890976675088, "learning_rate": 1.6433318712286814e-06, "loss": 0.6125, "step": 2349 }, { "epoch": 0.3, "grad_norm": 0.9616126805869909, "learning_rate": 1.6430158879408316e-06, "loss": 0.6363, "step": 2350 }, { "epoch": 0.3, "grad_norm": 0.8294412405010342, "learning_rate": 1.642699795154075e-06, "loss": 0.5791, "step": 2351 }, { "epoch": 0.3, "grad_norm": 1.1519112918432959, "learning_rate": 1.642383592922239e-06, "loss": 0.7158, "step": 2352 }, { "epoch": 0.3, "grad_norm": 0.7875231071807859, "learning_rate": 1.6420672812991692e-06, "loss": 0.5756, "step": 2353 }, { "epoch": 0.3, "grad_norm": 0.6571544104245998, "learning_rate": 1.6417508603387305e-06, "loss": 0.5562, "step": 2354 }, { "epoch": 0.3, "grad_norm": 0.8412274194169784, "learning_rate": 1.641434330094806e-06, "loss": 0.6497, "step": 2355 }, { "epoch": 0.3, "grad_norm": 0.8363808880091436, "learning_rate": 1.6411176906212977e-06, "loss": 0.5551, "step": 2356 }, { "epoch": 0.3, "grad_norm": 0.8563361692298546, "learning_rate": 1.6408009419721257e-06, "loss": 0.637, "step": 2357 }, { "epoch": 0.3, "grad_norm": 1.057885992611512, "learning_rate": 1.6404840842012289e-06, "loss": 0.6173, "step": 2358 }, { "epoch": 0.3, "grad_norm": 0.6113859591085109, "learning_rate": 1.6401671173625653e-06, "loss": 0.5186, "step": 2359 }, { "epoch": 0.3, "grad_norm": 0.8486987977833653, "learning_rate": 1.6398500415101105e-06, "loss": 0.6464, "step": 2360 }, { "epoch": 0.3, "grad_norm": 0.9936103341651286, "learning_rate": 1.6395328566978598e-06, "loss": 0.6316, "step": 2361 }, { "epoch": 0.3, "grad_norm": 0.758076046012566, "learning_rate": 1.6392155629798264e-06, "loss": 0.6283, "step": 2362 }, { "epoch": 0.3, "grad_norm": 0.9906072994713594, "learning_rate": 1.6388981604100414e-06, "loss": 0.6951, "step": 2363 }, { "epoch": 0.3, "grad_norm": 0.7185617787091048, "learning_rate": 1.6385806490425563e-06, "loss": 0.5307, "step": 2364 }, { "epoch": 0.3, "grad_norm": 0.6620030528498446, "learning_rate": 1.6382630289314387e-06, "loss": 0.5502, "step": 2365 }, { "epoch": 0.3, "grad_norm": 0.8647116780674396, "learning_rate": 1.637945300130777e-06, "loss": 0.6361, "step": 2366 }, { "epoch": 0.3, "grad_norm": 0.9200256011638845, "learning_rate": 1.6376274626946768e-06, "loss": 0.6973, "step": 2367 }, { "epoch": 0.3, "grad_norm": 0.9230542867983506, "learning_rate": 1.6373095166772623e-06, "loss": 0.642, "step": 2368 }, { "epoch": 0.3, "grad_norm": 0.824400848646835, "learning_rate": 1.6369914621326766e-06, "loss": 0.6024, "step": 2369 }, { "epoch": 0.3, "grad_norm": 0.8806979314894896, "learning_rate": 1.636673299115081e-06, "loss": 0.6113, "step": 2370 }, { "epoch": 0.3, "grad_norm": 0.706173534729791, "learning_rate": 1.6363550276786556e-06, "loss": 0.6201, "step": 2371 }, { "epoch": 0.3, "grad_norm": 0.8001607992274602, "learning_rate": 1.6360366478775985e-06, "loss": 0.6365, "step": 2372 }, { "epoch": 0.3, "grad_norm": 0.8543371330877693, "learning_rate": 1.6357181597661262e-06, "loss": 0.5995, "step": 2373 }, { "epoch": 0.3, "grad_norm": 0.6659332907908532, "learning_rate": 1.635399563398474e-06, "loss": 0.5875, "step": 2374 }, { "epoch": 0.3, "grad_norm": 0.8040486016647955, "learning_rate": 1.6350808588288963e-06, "loss": 0.6191, "step": 2375 }, { "epoch": 0.3, "grad_norm": 0.7173793405162839, "learning_rate": 1.6347620461116641e-06, "loss": 0.5852, "step": 2376 }, { "epoch": 0.3, "grad_norm": 0.6196421944512187, "learning_rate": 1.6344431253010681e-06, "loss": 0.5434, "step": 2377 }, { "epoch": 0.3, "grad_norm": 0.7323373030450488, "learning_rate": 1.634124096451418e-06, "loss": 0.5758, "step": 2378 }, { "epoch": 0.3, "grad_norm": 0.6751335987404704, "learning_rate": 1.6338049596170404e-06, "loss": 0.5678, "step": 2379 }, { "epoch": 0.3, "grad_norm": 0.7790034151519865, "learning_rate": 1.633485714852281e-06, "loss": 0.6175, "step": 2380 }, { "epoch": 0.3, "grad_norm": 0.9696325653204507, "learning_rate": 1.633166362211504e-06, "loss": 0.6251, "step": 2381 }, { "epoch": 0.3, "grad_norm": 0.6632506370100224, "learning_rate": 1.6328469017490916e-06, "loss": 0.5802, "step": 2382 }, { "epoch": 0.3, "grad_norm": 0.5983570118186374, "learning_rate": 1.6325273335194453e-06, "loss": 0.4923, "step": 2383 }, { "epoch": 0.3, "grad_norm": 0.8677759100084825, "learning_rate": 1.6322076575769831e-06, "loss": 0.6506, "step": 2384 }, { "epoch": 0.3, "grad_norm": 0.7335365848007822, "learning_rate": 1.6318878739761438e-06, "loss": 0.5485, "step": 2385 }, { "epoch": 0.3, "grad_norm": 0.9250802287361223, "learning_rate": 1.6315679827713818e-06, "loss": 0.638, "step": 2386 }, { "epoch": 0.3, "grad_norm": 0.8205803849399236, "learning_rate": 1.6312479840171723e-06, "loss": 0.5749, "step": 2387 }, { "epoch": 0.3, "grad_norm": 0.7085837658599741, "learning_rate": 1.6309278777680072e-06, "loss": 0.5229, "step": 2388 }, { "epoch": 0.3, "grad_norm": 0.8861137858527499, "learning_rate": 1.630607664078398e-06, "loss": 0.6483, "step": 2389 }, { "epoch": 0.3, "grad_norm": 0.7156129198086779, "learning_rate": 1.6302873430028727e-06, "loss": 0.5732, "step": 2390 }, { "epoch": 0.3, "grad_norm": 0.8344517049046373, "learning_rate": 1.6299669145959793e-06, "loss": 0.5696, "step": 2391 }, { "epoch": 0.3, "grad_norm": 0.7321636055022906, "learning_rate": 1.6296463789122836e-06, "loss": 0.5457, "step": 2392 }, { "epoch": 0.3, "grad_norm": 0.6911067974935775, "learning_rate": 1.6293257360063688e-06, "loss": 0.5685, "step": 2393 }, { "epoch": 0.3, "grad_norm": 0.6658619910325687, "learning_rate": 1.6290049859328376e-06, "loss": 0.5165, "step": 2394 }, { "epoch": 0.31, "grad_norm": 0.6493384821902125, "learning_rate": 1.6286841287463101e-06, "loss": 0.6049, "step": 2395 }, { "epoch": 0.31, "grad_norm": 0.7008670596026451, "learning_rate": 1.628363164501425e-06, "loss": 0.5421, "step": 2396 }, { "epoch": 0.31, "grad_norm": 0.8431782441593939, "learning_rate": 1.6280420932528396e-06, "loss": 0.6518, "step": 2397 }, { "epoch": 0.31, "grad_norm": 0.9823599729694824, "learning_rate": 1.6277209150552285e-06, "loss": 0.6794, "step": 2398 }, { "epoch": 0.31, "grad_norm": 0.6943816521833125, "learning_rate": 1.6273996299632852e-06, "loss": 0.5739, "step": 2399 }, { "epoch": 0.31, "grad_norm": 0.6467356624099669, "learning_rate": 1.6270782380317204e-06, "loss": 0.5248, "step": 2400 }, { "epoch": 0.31, "grad_norm": 0.938370308704877, "learning_rate": 1.6267567393152652e-06, "loss": 0.7083, "step": 2401 }, { "epoch": 0.31, "grad_norm": 0.9920508713784483, "learning_rate": 1.6264351338686667e-06, "loss": 0.6965, "step": 2402 }, { "epoch": 0.31, "grad_norm": 0.6590284702696324, "learning_rate": 1.626113421746691e-06, "loss": 0.5191, "step": 2403 }, { "epoch": 0.31, "grad_norm": 0.7747546202901052, "learning_rate": 1.6257916030041226e-06, "loss": 0.5541, "step": 2404 }, { "epoch": 0.31, "grad_norm": 0.6075229475271532, "learning_rate": 1.6254696776957632e-06, "loss": 0.4826, "step": 2405 }, { "epoch": 0.31, "grad_norm": 0.5841065106206605, "learning_rate": 1.6251476458764341e-06, "loss": 0.5255, "step": 2406 }, { "epoch": 0.31, "grad_norm": 0.6223450101011191, "learning_rate": 1.6248255076009737e-06, "loss": 0.5612, "step": 2407 }, { "epoch": 0.31, "grad_norm": 1.765984493381814, "learning_rate": 1.6245032629242386e-06, "loss": 0.6309, "step": 2408 }, { "epoch": 0.31, "grad_norm": 0.6204132870654709, "learning_rate": 1.6241809119011039e-06, "loss": 0.5515, "step": 2409 }, { "epoch": 0.31, "grad_norm": 0.6264563112729826, "learning_rate": 1.6238584545864624e-06, "loss": 0.5584, "step": 2410 }, { "epoch": 0.31, "grad_norm": 0.7593805124146197, "learning_rate": 1.6235358910352254e-06, "loss": 0.5881, "step": 2411 }, { "epoch": 0.31, "grad_norm": 0.6655583567106697, "learning_rate": 1.6232132213023223e-06, "loss": 0.56, "step": 2412 }, { "epoch": 0.31, "grad_norm": 0.8740795942896238, "learning_rate": 1.6228904454427004e-06, "loss": 0.5877, "step": 2413 }, { "epoch": 0.31, "grad_norm": 0.7012817919546442, "learning_rate": 1.6225675635113243e-06, "loss": 0.5318, "step": 2414 }, { "epoch": 0.31, "grad_norm": 0.9690617995791702, "learning_rate": 1.6222445755631784e-06, "loss": 0.6033, "step": 2415 }, { "epoch": 0.31, "grad_norm": 0.6427159761259156, "learning_rate": 1.6219214816532638e-06, "loss": 0.5024, "step": 2416 }, { "epoch": 0.31, "grad_norm": 0.7242575030527468, "learning_rate": 1.6215982818366e-06, "loss": 0.536, "step": 2417 }, { "epoch": 0.31, "grad_norm": 0.764664173744657, "learning_rate": 1.6212749761682242e-06, "loss": 0.5973, "step": 2418 }, { "epoch": 0.31, "grad_norm": 0.951898856320164, "learning_rate": 1.6209515647031926e-06, "loss": 0.6705, "step": 2419 }, { "epoch": 0.31, "grad_norm": 0.8360996482643221, "learning_rate": 1.6206280474965786e-06, "loss": 0.5914, "step": 2420 }, { "epoch": 0.31, "grad_norm": 0.7676795201261866, "learning_rate": 1.6203044246034738e-06, "loss": 0.5706, "step": 2421 }, { "epoch": 0.31, "grad_norm": 0.7999125974543403, "learning_rate": 1.6199806960789877e-06, "loss": 0.6025, "step": 2422 }, { "epoch": 0.31, "grad_norm": 0.6383580183100955, "learning_rate": 1.6196568619782477e-06, "loss": 0.55, "step": 2423 }, { "epoch": 0.31, "grad_norm": 0.8731628886419787, "learning_rate": 1.6193329223564e-06, "loss": 0.6439, "step": 2424 }, { "epoch": 0.31, "grad_norm": 0.6323919993838435, "learning_rate": 1.619008877268608e-06, "loss": 0.5126, "step": 2425 }, { "epoch": 0.31, "grad_norm": 0.8206851311123455, "learning_rate": 1.6186847267700526e-06, "loss": 0.6454, "step": 2426 }, { "epoch": 0.31, "grad_norm": 0.762313230538518, "learning_rate": 1.6183604709159338e-06, "loss": 0.5798, "step": 2427 }, { "epoch": 0.31, "grad_norm": 0.8283840566294349, "learning_rate": 1.6180361097614687e-06, "loss": 0.6523, "step": 2428 }, { "epoch": 0.31, "grad_norm": 0.8172610893630339, "learning_rate": 1.617711643361893e-06, "loss": 0.6468, "step": 2429 }, { "epoch": 0.31, "grad_norm": 0.8843385755315869, "learning_rate": 1.6173870717724593e-06, "loss": 0.7126, "step": 2430 }, { "epoch": 0.31, "grad_norm": 0.6610234427777778, "learning_rate": 1.6170623950484395e-06, "loss": 0.574, "step": 2431 }, { "epoch": 0.31, "grad_norm": 1.1011538083067407, "learning_rate": 1.6167376132451223e-06, "loss": 0.6353, "step": 2432 }, { "epoch": 0.31, "grad_norm": 0.8249121693532128, "learning_rate": 1.6164127264178148e-06, "loss": 0.6096, "step": 2433 }, { "epoch": 0.31, "grad_norm": 0.7202904134932954, "learning_rate": 1.6160877346218412e-06, "loss": 0.5653, "step": 2434 }, { "epoch": 0.31, "grad_norm": 0.7015551908514589, "learning_rate": 1.6157626379125452e-06, "loss": 0.543, "step": 2435 }, { "epoch": 0.31, "grad_norm": 0.8594224000593357, "learning_rate": 1.615437436345287e-06, "loss": 0.597, "step": 2436 }, { "epoch": 0.31, "grad_norm": 0.6934066909545274, "learning_rate": 1.615112129975445e-06, "loss": 0.5328, "step": 2437 }, { "epoch": 0.31, "grad_norm": 0.9620232933009184, "learning_rate": 1.614786718858415e-06, "loss": 0.6071, "step": 2438 }, { "epoch": 0.31, "grad_norm": 0.8831929393948162, "learning_rate": 1.614461203049612e-06, "loss": 0.6358, "step": 2439 }, { "epoch": 0.31, "grad_norm": 1.4336418674581353, "learning_rate": 1.6141355826044673e-06, "loss": 0.6582, "step": 2440 }, { "epoch": 0.31, "grad_norm": 0.6975080236078062, "learning_rate": 1.6138098575784312e-06, "loss": 0.5566, "step": 2441 }, { "epoch": 0.31, "grad_norm": 0.6507088099613131, "learning_rate": 1.6134840280269706e-06, "loss": 0.5283, "step": 2442 }, { "epoch": 0.31, "grad_norm": 0.9363721258016622, "learning_rate": 1.6131580940055712e-06, "loss": 0.6425, "step": 2443 }, { "epoch": 0.31, "grad_norm": 0.6122468905708789, "learning_rate": 1.6128320555697363e-06, "loss": 0.5576, "step": 2444 }, { "epoch": 0.31, "grad_norm": 0.9294130186039169, "learning_rate": 1.6125059127749872e-06, "loss": 0.672, "step": 2445 }, { "epoch": 0.31, "grad_norm": 0.7875480254101632, "learning_rate": 1.6121796656768617e-06, "loss": 0.6259, "step": 2446 }, { "epoch": 0.31, "grad_norm": 0.8145221918196556, "learning_rate": 1.6118533143309172e-06, "loss": 0.5752, "step": 2447 }, { "epoch": 0.31, "grad_norm": 0.6496847006190792, "learning_rate": 1.6115268587927273e-06, "loss": 0.5428, "step": 2448 }, { "epoch": 0.31, "grad_norm": 0.6883355407075407, "learning_rate": 1.6112002991178844e-06, "loss": 0.5449, "step": 2449 }, { "epoch": 0.31, "grad_norm": 0.7052931724647741, "learning_rate": 1.6108736353619976e-06, "loss": 0.5357, "step": 2450 }, { "epoch": 0.31, "grad_norm": 0.5941846904261885, "learning_rate": 1.610546867580695e-06, "loss": 0.532, "step": 2451 }, { "epoch": 0.31, "grad_norm": 0.8982521189621095, "learning_rate": 1.6102199958296212e-06, "loss": 0.6702, "step": 2452 }, { "epoch": 0.31, "grad_norm": 1.0101531571169227, "learning_rate": 1.6098930201644397e-06, "loss": 0.6758, "step": 2453 }, { "epoch": 0.31, "grad_norm": 0.6646981900247438, "learning_rate": 1.6095659406408303e-06, "loss": 0.5389, "step": 2454 }, { "epoch": 0.31, "grad_norm": 0.7416050659828799, "learning_rate": 1.6092387573144922e-06, "loss": 0.6317, "step": 2455 }, { "epoch": 0.31, "grad_norm": 0.7900815299170257, "learning_rate": 1.6089114702411402e-06, "loss": 0.5627, "step": 2456 }, { "epoch": 0.31, "grad_norm": 0.8010280613785924, "learning_rate": 1.6085840794765086e-06, "loss": 0.5433, "step": 2457 }, { "epoch": 0.31, "grad_norm": 0.8742315414879183, "learning_rate": 1.6082565850763485e-06, "loss": 0.6507, "step": 2458 }, { "epoch": 0.31, "grad_norm": 0.67646055167846, "learning_rate": 1.6079289870964287e-06, "loss": 0.5436, "step": 2459 }, { "epoch": 0.31, "grad_norm": 0.7843736049200927, "learning_rate": 1.6076012855925355e-06, "loss": 0.5358, "step": 2460 }, { "epoch": 0.31, "grad_norm": 0.7003149444836645, "learning_rate": 1.6072734806204736e-06, "loss": 0.5866, "step": 2461 }, { "epoch": 0.31, "grad_norm": 0.7792713533690936, "learning_rate": 1.6069455722360646e-06, "loss": 0.6319, "step": 2462 }, { "epoch": 0.31, "grad_norm": 0.6356591196424144, "learning_rate": 1.6066175604951475e-06, "loss": 0.4954, "step": 2463 }, { "epoch": 0.31, "grad_norm": 0.6809362063775646, "learning_rate": 1.6062894454535795e-06, "loss": 0.5494, "step": 2464 }, { "epoch": 0.31, "grad_norm": 0.7203944359582988, "learning_rate": 1.6059612271672357e-06, "loss": 0.5601, "step": 2465 }, { "epoch": 0.31, "grad_norm": 0.7245022804153479, "learning_rate": 1.6056329056920072e-06, "loss": 0.5512, "step": 2466 }, { "epoch": 0.31, "grad_norm": 0.8686957250749607, "learning_rate": 1.6053044810838046e-06, "loss": 0.6285, "step": 2467 }, { "epoch": 0.31, "grad_norm": 0.8110071934068561, "learning_rate": 1.6049759533985548e-06, "loss": 0.572, "step": 2468 }, { "epoch": 0.31, "grad_norm": 0.9144998841884497, "learning_rate": 1.6046473226922027e-06, "loss": 0.6431, "step": 2469 }, { "epoch": 0.31, "grad_norm": 0.6434320653532378, "learning_rate": 1.6043185890207108e-06, "loss": 0.5543, "step": 2470 }, { "epoch": 0.31, "grad_norm": 0.6661684012779162, "learning_rate": 1.603989752440059e-06, "loss": 0.6089, "step": 2471 }, { "epoch": 0.31, "grad_norm": 0.7193466316599335, "learning_rate": 1.6036608130062446e-06, "loss": 0.5608, "step": 2472 }, { "epoch": 0.32, "grad_norm": 0.7573869097575038, "learning_rate": 1.6033317707752825e-06, "loss": 0.6222, "step": 2473 }, { "epoch": 0.32, "grad_norm": 0.6001335926397549, "learning_rate": 1.6030026258032052e-06, "loss": 0.5133, "step": 2474 }, { "epoch": 0.32, "grad_norm": 1.0595959896185636, "learning_rate": 1.6026733781460631e-06, "loss": 0.632, "step": 2475 }, { "epoch": 0.32, "grad_norm": 0.9067400249753934, "learning_rate": 1.6023440278599231e-06, "loss": 0.7087, "step": 2476 }, { "epoch": 0.32, "grad_norm": 0.8598448498347431, "learning_rate": 1.6020145750008704e-06, "loss": 0.6318, "step": 2477 }, { "epoch": 0.32, "grad_norm": 0.8204190221856763, "learning_rate": 1.6016850196250076e-06, "loss": 0.5861, "step": 2478 }, { "epoch": 0.32, "grad_norm": 0.6125485697791082, "learning_rate": 1.6013553617884538e-06, "loss": 0.4971, "step": 2479 }, { "epoch": 0.32, "grad_norm": 0.7783806425118769, "learning_rate": 1.601025601547347e-06, "loss": 0.6048, "step": 2480 }, { "epoch": 0.32, "grad_norm": 0.980387288644324, "learning_rate": 1.6006957389578418e-06, "loss": 0.6171, "step": 2481 }, { "epoch": 0.32, "grad_norm": 0.9886067736108141, "learning_rate": 1.6003657740761098e-06, "loss": 0.7051, "step": 2482 }, { "epoch": 0.32, "grad_norm": 0.8058527532276617, "learning_rate": 1.6000357069583416e-06, "loss": 0.5703, "step": 2483 }, { "epoch": 0.32, "grad_norm": 0.86366129071733, "learning_rate": 1.599705537660743e-06, "loss": 0.6252, "step": 2484 }, { "epoch": 0.32, "grad_norm": 0.6802258492773239, "learning_rate": 1.5993752662395396e-06, "loss": 0.5407, "step": 2485 }, { "epoch": 0.32, "grad_norm": 0.7175130793056197, "learning_rate": 1.5990448927509725e-06, "loss": 0.5794, "step": 2486 }, { "epoch": 0.32, "grad_norm": 0.8156238219024209, "learning_rate": 1.5987144172513008e-06, "loss": 0.5665, "step": 2487 }, { "epoch": 0.32, "grad_norm": 1.0249659995464693, "learning_rate": 1.5983838397968015e-06, "loss": 0.6434, "step": 2488 }, { "epoch": 0.32, "grad_norm": 0.6435962513942913, "learning_rate": 1.5980531604437679e-06, "loss": 0.5541, "step": 2489 }, { "epoch": 0.32, "grad_norm": 0.7087593495119783, "learning_rate": 1.5977223792485118e-06, "loss": 0.5924, "step": 2490 }, { "epoch": 0.32, "grad_norm": 0.9495528512309235, "learning_rate": 1.597391496267361e-06, "loss": 0.538, "step": 2491 }, { "epoch": 0.32, "grad_norm": 0.8381530399046914, "learning_rate": 1.5970605115566626e-06, "loss": 0.5916, "step": 2492 }, { "epoch": 0.32, "grad_norm": 0.8604637729409753, "learning_rate": 1.596729425172779e-06, "loss": 0.6059, "step": 2493 }, { "epoch": 0.32, "grad_norm": 0.7723417897315928, "learning_rate": 1.5963982371720908e-06, "loss": 0.6123, "step": 2494 }, { "epoch": 0.32, "grad_norm": 0.670312886232341, "learning_rate": 1.5960669476109961e-06, "loss": 0.5862, "step": 2495 }, { "epoch": 0.32, "grad_norm": 0.7716198652033283, "learning_rate": 1.59573555654591e-06, "loss": 0.5354, "step": 2496 }, { "epoch": 0.32, "grad_norm": 0.8622364136133446, "learning_rate": 1.595404064033265e-06, "loss": 0.6311, "step": 2497 }, { "epoch": 0.32, "grad_norm": 0.8987069583454859, "learning_rate": 1.5950724701295108e-06, "loss": 0.6282, "step": 2498 }, { "epoch": 0.32, "grad_norm": 0.6151092477882707, "learning_rate": 1.5947407748911143e-06, "loss": 0.5578, "step": 2499 }, { "epoch": 0.32, "grad_norm": 0.664115475728601, "learning_rate": 1.59440897837456e-06, "loss": 0.4931, "step": 2500 }, { "epoch": 0.32, "grad_norm": 0.7232250085254985, "learning_rate": 1.594077080636349e-06, "loss": 0.5689, "step": 2501 }, { "epoch": 0.32, "grad_norm": 0.9236167824439739, "learning_rate": 1.5937450817330001e-06, "loss": 0.5629, "step": 2502 }, { "epoch": 0.32, "grad_norm": 0.699353727957285, "learning_rate": 1.5934129817210495e-06, "loss": 0.5517, "step": 2503 }, { "epoch": 0.32, "grad_norm": 0.8514409644870283, "learning_rate": 1.5930807806570504e-06, "loss": 0.576, "step": 2504 }, { "epoch": 0.32, "grad_norm": 0.6053863337802154, "learning_rate": 1.592748478597573e-06, "loss": 0.5335, "step": 2505 }, { "epoch": 0.32, "grad_norm": 0.8120303052027267, "learning_rate": 1.5924160755992046e-06, "loss": 0.5797, "step": 2506 }, { "epoch": 0.32, "grad_norm": 0.8915178960787578, "learning_rate": 1.5920835717185505e-06, "loss": 0.652, "step": 2507 }, { "epoch": 0.32, "grad_norm": 0.9805617462708859, "learning_rate": 1.5917509670122325e-06, "loss": 0.6406, "step": 2508 }, { "epoch": 0.32, "grad_norm": 0.8741302904383479, "learning_rate": 1.5914182615368895e-06, "loss": 0.6164, "step": 2509 }, { "epoch": 0.32, "grad_norm": 0.7965169578604819, "learning_rate": 1.5910854553491779e-06, "loss": 0.639, "step": 2510 }, { "epoch": 0.32, "grad_norm": 0.6230515659073382, "learning_rate": 1.5907525485057711e-06, "loss": 0.5203, "step": 2511 }, { "epoch": 0.32, "grad_norm": 0.6826528847712071, "learning_rate": 1.59041954106336e-06, "loss": 0.6116, "step": 2512 }, { "epoch": 0.32, "grad_norm": 0.7552508212742363, "learning_rate": 1.5900864330786517e-06, "loss": 0.5262, "step": 2513 }, { "epoch": 0.32, "grad_norm": 0.6961164460872856, "learning_rate": 1.5897532246083714e-06, "loss": 0.497, "step": 2514 }, { "epoch": 0.32, "grad_norm": 0.9377466296813155, "learning_rate": 1.5894199157092608e-06, "loss": 0.6382, "step": 2515 }, { "epoch": 0.32, "grad_norm": 0.848146698078804, "learning_rate": 1.5890865064380794e-06, "loss": 0.6622, "step": 2516 }, { "epoch": 0.32, "grad_norm": 0.8903305119855955, "learning_rate": 1.588752996851603e-06, "loss": 0.6549, "step": 2517 }, { "epoch": 0.32, "grad_norm": 0.9407229285183042, "learning_rate": 1.5884193870066245e-06, "loss": 0.6432, "step": 2518 }, { "epoch": 0.32, "grad_norm": 0.7741778203903938, "learning_rate": 1.5880856769599547e-06, "loss": 0.6066, "step": 2519 }, { "epoch": 0.32, "grad_norm": 0.7359609943961934, "learning_rate": 1.5877518667684206e-06, "loss": 0.6127, "step": 2520 }, { "epoch": 0.32, "grad_norm": 0.6953691945317098, "learning_rate": 1.587417956488867e-06, "loss": 0.5536, "step": 2521 }, { "epoch": 0.32, "grad_norm": 0.8787307548852056, "learning_rate": 1.5870839461781552e-06, "loss": 0.6405, "step": 2522 }, { "epoch": 0.32, "grad_norm": 0.7184219862136817, "learning_rate": 1.5867498358931632e-06, "loss": 0.6011, "step": 2523 }, { "epoch": 0.32, "grad_norm": 0.996028043678537, "learning_rate": 1.5864156256907867e-06, "loss": 0.6764, "step": 2524 }, { "epoch": 0.32, "grad_norm": 0.829706731799562, "learning_rate": 1.5860813156279388e-06, "loss": 0.6686, "step": 2525 }, { "epoch": 0.32, "grad_norm": 0.8589054483102626, "learning_rate": 1.5857469057615484e-06, "loss": 0.6433, "step": 2526 }, { "epoch": 0.32, "grad_norm": 0.8026838624368913, "learning_rate": 1.5854123961485623e-06, "loss": 0.5964, "step": 2527 }, { "epoch": 0.32, "grad_norm": 0.6534581656985162, "learning_rate": 1.585077786845944e-06, "loss": 0.5147, "step": 2528 }, { "epoch": 0.32, "grad_norm": 0.6788244109516006, "learning_rate": 1.5847430779106738e-06, "loss": 0.5607, "step": 2529 }, { "epoch": 0.32, "grad_norm": 0.78794488184345, "learning_rate": 1.5844082693997492e-06, "loss": 0.545, "step": 2530 }, { "epoch": 0.32, "grad_norm": 0.5785985190393186, "learning_rate": 1.5840733613701846e-06, "loss": 0.517, "step": 2531 }, { "epoch": 0.32, "grad_norm": 0.765495744139225, "learning_rate": 1.5837383538790117e-06, "loss": 0.5553, "step": 2532 }, { "epoch": 0.32, "grad_norm": 1.4622075492059616, "learning_rate": 1.5834032469832779e-06, "loss": 0.6404, "step": 2533 }, { "epoch": 0.32, "grad_norm": 0.7118768854225761, "learning_rate": 1.5830680407400493e-06, "loss": 0.5494, "step": 2534 }, { "epoch": 0.32, "grad_norm": 0.7239251765369579, "learning_rate": 1.582732735206408e-06, "loss": 0.5996, "step": 2535 }, { "epoch": 0.32, "grad_norm": 0.6336755093853782, "learning_rate": 1.5823973304394525e-06, "loss": 0.5032, "step": 2536 }, { "epoch": 0.32, "grad_norm": 0.8569447892908659, "learning_rate": 1.5820618264962991e-06, "loss": 0.6205, "step": 2537 }, { "epoch": 0.32, "grad_norm": 0.9217174191761583, "learning_rate": 1.5817262234340802e-06, "loss": 0.5595, "step": 2538 }, { "epoch": 0.32, "grad_norm": 0.6321285215224897, "learning_rate": 1.5813905213099461e-06, "loss": 0.5294, "step": 2539 }, { "epoch": 0.32, "grad_norm": 0.9349849489719363, "learning_rate": 1.581054720181063e-06, "loss": 0.5887, "step": 2540 }, { "epoch": 0.32, "grad_norm": 0.6290311246925899, "learning_rate": 1.5807188201046148e-06, "loss": 0.4475, "step": 2541 }, { "epoch": 0.32, "grad_norm": 0.6644933028940619, "learning_rate": 1.580382821137801e-06, "loss": 0.5329, "step": 2542 }, { "epoch": 0.32, "grad_norm": 0.7187526847658339, "learning_rate": 1.5800467233378392e-06, "loss": 0.5623, "step": 2543 }, { "epoch": 0.32, "grad_norm": 0.6989345234753084, "learning_rate": 1.5797105267619635e-06, "loss": 0.5808, "step": 2544 }, { "epoch": 0.32, "grad_norm": 0.6969569742660636, "learning_rate": 1.5793742314674244e-06, "loss": 0.5406, "step": 2545 }, { "epoch": 0.32, "grad_norm": 0.7094646639337747, "learning_rate": 1.5790378375114893e-06, "loss": 0.5616, "step": 2546 }, { "epoch": 0.32, "grad_norm": 0.6416630524813071, "learning_rate": 1.5787013449514431e-06, "loss": 0.526, "step": 2547 }, { "epoch": 0.32, "grad_norm": 0.6417409229303236, "learning_rate": 1.5783647538445867e-06, "loss": 0.4926, "step": 2548 }, { "epoch": 0.32, "grad_norm": 0.6827726362657659, "learning_rate": 1.578028064248238e-06, "loss": 0.5623, "step": 2549 }, { "epoch": 0.32, "grad_norm": 0.5857222717958704, "learning_rate": 1.5776912762197316e-06, "loss": 0.4899, "step": 2550 }, { "epoch": 0.32, "grad_norm": 0.8168226975915017, "learning_rate": 1.5773543898164197e-06, "loss": 0.5597, "step": 2551 }, { "epoch": 0.33, "grad_norm": 0.8172095171983793, "learning_rate": 1.5770174050956697e-06, "loss": 0.6461, "step": 2552 }, { "epoch": 0.33, "grad_norm": 0.6520127149793095, "learning_rate": 1.5766803221148673e-06, "loss": 0.555, "step": 2553 }, { "epoch": 0.33, "grad_norm": 0.9341753512799217, "learning_rate": 1.5763431409314134e-06, "loss": 0.6497, "step": 2554 }, { "epoch": 0.33, "grad_norm": 0.6074898402501409, "learning_rate": 1.576005861602727e-06, "loss": 0.4805, "step": 2555 }, { "epoch": 0.33, "grad_norm": 0.9411713466066226, "learning_rate": 1.5756684841862433e-06, "loss": 0.6332, "step": 2556 }, { "epoch": 0.33, "grad_norm": 0.6541898654687741, "learning_rate": 1.5753310087394138e-06, "loss": 0.5227, "step": 2557 }, { "epoch": 0.33, "grad_norm": 0.6946972794125692, "learning_rate": 1.5749934353197077e-06, "loss": 0.4887, "step": 2558 }, { "epoch": 0.33, "grad_norm": 0.7959812327613129, "learning_rate": 1.5746557639846094e-06, "loss": 0.5427, "step": 2559 }, { "epoch": 0.33, "grad_norm": 0.804697109798102, "learning_rate": 1.5743179947916215e-06, "loss": 0.6305, "step": 2560 }, { "epoch": 0.33, "grad_norm": 0.9152540668240107, "learning_rate": 1.573980127798262e-06, "loss": 0.6052, "step": 2561 }, { "epoch": 0.33, "grad_norm": 0.7504039316691035, "learning_rate": 1.5736421630620664e-06, "loss": 0.621, "step": 2562 }, { "epoch": 0.33, "grad_norm": 0.7499569737288814, "learning_rate": 1.573304100640587e-06, "loss": 0.5588, "step": 2563 }, { "epoch": 0.33, "grad_norm": 0.9488640031365498, "learning_rate": 1.5729659405913918e-06, "loss": 0.6668, "step": 2564 }, { "epoch": 0.33, "grad_norm": 0.8631889542941432, "learning_rate": 1.5726276829720656e-06, "loss": 0.6285, "step": 2565 }, { "epoch": 0.33, "grad_norm": 0.665238955751868, "learning_rate": 1.572289327840211e-06, "loss": 0.5662, "step": 2566 }, { "epoch": 0.33, "grad_norm": 0.8616516322435755, "learning_rate": 1.5719508752534457e-06, "loss": 0.6374, "step": 2567 }, { "epoch": 0.33, "grad_norm": 0.6970467853670573, "learning_rate": 1.5716123252694052e-06, "loss": 0.5167, "step": 2568 }, { "epoch": 0.33, "grad_norm": 0.7295224205098898, "learning_rate": 1.5712736779457405e-06, "loss": 0.5925, "step": 2569 }, { "epoch": 0.33, "grad_norm": 1.0316168982120097, "learning_rate": 1.5709349333401198e-06, "loss": 0.6559, "step": 2570 }, { "epoch": 0.33, "grad_norm": 0.8984282965748758, "learning_rate": 1.570596091510228e-06, "loss": 0.5505, "step": 2571 }, { "epoch": 0.33, "grad_norm": 0.8315430479178283, "learning_rate": 1.5702571525137662e-06, "loss": 0.5757, "step": 2572 }, { "epoch": 0.33, "grad_norm": 0.6238297765901686, "learning_rate": 1.5699181164084525e-06, "loss": 0.513, "step": 2573 }, { "epoch": 0.33, "grad_norm": 0.6806460929370697, "learning_rate": 1.5695789832520208e-06, "loss": 0.538, "step": 2574 }, { "epoch": 0.33, "grad_norm": 0.7542735790422822, "learning_rate": 1.5692397531022219e-06, "loss": 0.6493, "step": 2575 }, { "epoch": 0.33, "grad_norm": 0.6332481844776309, "learning_rate": 1.5689004260168235e-06, "loss": 0.4859, "step": 2576 }, { "epoch": 0.33, "grad_norm": 0.6591643176035831, "learning_rate": 1.5685610020536092e-06, "loss": 0.506, "step": 2577 }, { "epoch": 0.33, "grad_norm": 1.0120765311465691, "learning_rate": 1.5682214812703799e-06, "loss": 0.6555, "step": 2578 }, { "epoch": 0.33, "grad_norm": 1.0649137168994636, "learning_rate": 1.5678818637249516e-06, "loss": 0.6878, "step": 2579 }, { "epoch": 0.33, "grad_norm": 0.6035584493561864, "learning_rate": 1.5675421494751579e-06, "loss": 0.5196, "step": 2580 }, { "epoch": 0.33, "grad_norm": 0.9110625809264739, "learning_rate": 1.5672023385788494e-06, "loss": 0.5959, "step": 2581 }, { "epoch": 0.33, "grad_norm": 0.8972276439204921, "learning_rate": 1.5668624310938912e-06, "loss": 0.6396, "step": 2582 }, { "epoch": 0.33, "grad_norm": 0.7180481987408107, "learning_rate": 1.5665224270781664e-06, "loss": 0.5579, "step": 2583 }, { "epoch": 0.33, "grad_norm": 0.6053509147967748, "learning_rate": 1.5661823265895745e-06, "loss": 0.5417, "step": 2584 }, { "epoch": 0.33, "grad_norm": 0.9241062013651185, "learning_rate": 1.565842129686031e-06, "loss": 0.5897, "step": 2585 }, { "epoch": 0.33, "grad_norm": 0.5915035287666525, "learning_rate": 1.5655018364254676e-06, "loss": 0.5161, "step": 2586 }, { "epoch": 0.33, "grad_norm": 0.6803520701884275, "learning_rate": 1.5651614468658326e-06, "loss": 0.5278, "step": 2587 }, { "epoch": 0.33, "grad_norm": 0.714474250923458, "learning_rate": 1.564820961065091e-06, "loss": 0.4996, "step": 2588 }, { "epoch": 0.33, "grad_norm": 0.7686287571519791, "learning_rate": 1.5644803790812242e-06, "loss": 0.4935, "step": 2589 }, { "epoch": 0.33, "grad_norm": 0.8511682260474583, "learning_rate": 1.5641397009722295e-06, "loss": 0.6221, "step": 2590 }, { "epoch": 0.33, "grad_norm": 0.5845829630963592, "learning_rate": 1.5637989267961208e-06, "loss": 0.484, "step": 2591 }, { "epoch": 0.33, "grad_norm": 0.7938446202665874, "learning_rate": 1.5634580566109287e-06, "loss": 0.5211, "step": 2592 }, { "epoch": 0.33, "grad_norm": 0.6402248829404993, "learning_rate": 1.5631170904746998e-06, "loss": 0.5522, "step": 2593 }, { "epoch": 0.33, "grad_norm": 0.8252877052754484, "learning_rate": 1.5627760284454965e-06, "loss": 0.5961, "step": 2594 }, { "epoch": 0.33, "grad_norm": 0.7571600186175518, "learning_rate": 1.5624348705813985e-06, "loss": 0.6209, "step": 2595 }, { "epoch": 0.33, "grad_norm": 0.8450428323230402, "learning_rate": 1.5620936169405017e-06, "loss": 0.5823, "step": 2596 }, { "epoch": 0.33, "grad_norm": 0.7534341431685809, "learning_rate": 1.5617522675809176e-06, "loss": 0.5713, "step": 2597 }, { "epoch": 0.33, "grad_norm": 0.8955971057312625, "learning_rate": 1.5614108225607744e-06, "loss": 0.6297, "step": 2598 }, { "epoch": 0.33, "grad_norm": 0.6934151626076508, "learning_rate": 1.5610692819382171e-06, "loss": 0.5837, "step": 2599 }, { "epoch": 0.33, "grad_norm": 0.8515024880142362, "learning_rate": 1.5607276457714064e-06, "loss": 0.6559, "step": 2600 }, { "epoch": 0.33, "grad_norm": 0.6892410379736257, "learning_rate": 1.5603859141185192e-06, "loss": 0.5376, "step": 2601 }, { "epoch": 0.33, "grad_norm": 0.6390983788952416, "learning_rate": 1.5600440870377487e-06, "loss": 0.5057, "step": 2602 }, { "epoch": 0.33, "grad_norm": 0.8613673266229193, "learning_rate": 1.5597021645873048e-06, "loss": 0.5624, "step": 2603 }, { "epoch": 0.33, "grad_norm": 0.8647907981066635, "learning_rate": 1.5593601468254132e-06, "loss": 0.6273, "step": 2604 }, { "epoch": 0.33, "grad_norm": 0.6267038146529416, "learning_rate": 1.5590180338103159e-06, "loss": 0.5009, "step": 2605 }, { "epoch": 0.33, "grad_norm": 0.6934131044956916, "learning_rate": 1.5586758256002712e-06, "loss": 0.5126, "step": 2606 }, { "epoch": 0.33, "grad_norm": 1.0414196184194449, "learning_rate": 1.5583335222535538e-06, "loss": 0.652, "step": 2607 }, { "epoch": 0.33, "grad_norm": 0.7398529670522938, "learning_rate": 1.5579911238284544e-06, "loss": 0.5883, "step": 2608 }, { "epoch": 0.33, "grad_norm": 0.965627394189997, "learning_rate": 1.5576486303832794e-06, "loss": 0.6914, "step": 2609 }, { "epoch": 0.33, "grad_norm": 0.7185502354647558, "learning_rate": 1.5573060419763523e-06, "loss": 0.5233, "step": 2610 }, { "epoch": 0.33, "grad_norm": 0.6331843843806698, "learning_rate": 1.556963358666012e-06, "loss": 0.577, "step": 2611 }, { "epoch": 0.33, "grad_norm": 0.7284391326409974, "learning_rate": 1.5566205805106145e-06, "loss": 0.5263, "step": 2612 }, { "epoch": 0.33, "grad_norm": 0.6932440436696805, "learning_rate": 1.556277707568531e-06, "loss": 0.5964, "step": 2613 }, { "epoch": 0.33, "grad_norm": 0.5746316977070192, "learning_rate": 1.5559347398981488e-06, "loss": 0.5233, "step": 2614 }, { "epoch": 0.33, "grad_norm": 1.0539568152592735, "learning_rate": 1.5555916775578724e-06, "loss": 0.6742, "step": 2615 }, { "epoch": 0.33, "grad_norm": 0.718179580752316, "learning_rate": 1.5552485206061213e-06, "loss": 0.5824, "step": 2616 }, { "epoch": 0.33, "grad_norm": 0.7661280116288639, "learning_rate": 1.5549052691013315e-06, "loss": 0.5629, "step": 2617 }, { "epoch": 0.33, "grad_norm": 1.0025493822600118, "learning_rate": 1.5545619231019556e-06, "loss": 0.6283, "step": 2618 }, { "epoch": 0.33, "grad_norm": 0.9264561063452965, "learning_rate": 1.5542184826664614e-06, "loss": 0.5819, "step": 2619 }, { "epoch": 0.33, "grad_norm": 0.6946940380093737, "learning_rate": 1.5538749478533335e-06, "loss": 0.5804, "step": 2620 }, { "epoch": 0.33, "grad_norm": 0.7984629008591758, "learning_rate": 1.553531318721072e-06, "loss": 0.5786, "step": 2621 }, { "epoch": 0.33, "grad_norm": 0.8243519179312014, "learning_rate": 1.5531875953281941e-06, "loss": 0.6575, "step": 2622 }, { "epoch": 0.33, "grad_norm": 0.6586957314476799, "learning_rate": 1.5528437777332312e-06, "loss": 0.5644, "step": 2623 }, { "epoch": 0.33, "grad_norm": 13.25863321152729, "learning_rate": 1.5524998659947328e-06, "loss": 0.5221, "step": 2624 }, { "epoch": 0.33, "grad_norm": 0.6014881760340378, "learning_rate": 1.552155860171263e-06, "loss": 0.5394, "step": 2625 }, { "epoch": 0.33, "grad_norm": 0.7419662625122913, "learning_rate": 1.5518117603214024e-06, "loss": 0.5465, "step": 2626 }, { "epoch": 0.33, "grad_norm": 0.6409063539651462, "learning_rate": 1.551467566503748e-06, "loss": 0.5719, "step": 2627 }, { "epoch": 0.33, "grad_norm": 0.7021138527211542, "learning_rate": 1.5511232787769122e-06, "loss": 0.5568, "step": 2628 }, { "epoch": 0.33, "grad_norm": 0.7025644408801217, "learning_rate": 1.5507788971995233e-06, "loss": 0.5563, "step": 2629 }, { "epoch": 0.34, "grad_norm": 0.7527568901142943, "learning_rate": 1.5504344218302264e-06, "loss": 0.5389, "step": 2630 }, { "epoch": 0.34, "grad_norm": 0.8043522246569156, "learning_rate": 1.5500898527276816e-06, "loss": 0.6632, "step": 2631 }, { "epoch": 0.34, "grad_norm": 0.8994663693422271, "learning_rate": 1.5497451899505659e-06, "loss": 0.6169, "step": 2632 }, { "epoch": 0.34, "grad_norm": 0.8580824636401024, "learning_rate": 1.5494004335575714e-06, "loss": 0.5901, "step": 2633 }, { "epoch": 0.34, "grad_norm": 0.6848044735678862, "learning_rate": 1.549055583607407e-06, "loss": 0.5377, "step": 2634 }, { "epoch": 0.34, "grad_norm": 0.5721144793992392, "learning_rate": 1.5487106401587964e-06, "loss": 0.5033, "step": 2635 }, { "epoch": 0.34, "grad_norm": 0.9792911523867096, "learning_rate": 1.5483656032704802e-06, "loss": 0.6435, "step": 2636 }, { "epoch": 0.34, "grad_norm": 0.7590737426095436, "learning_rate": 1.5480204730012147e-06, "loss": 0.5843, "step": 2637 }, { "epoch": 0.34, "grad_norm": 0.8757857293005646, "learning_rate": 1.5476752494097718e-06, "loss": 0.6589, "step": 2638 }, { "epoch": 0.34, "grad_norm": 0.8201512994882567, "learning_rate": 1.54732993255494e-06, "loss": 0.6211, "step": 2639 }, { "epoch": 0.34, "grad_norm": 0.8233198522442915, "learning_rate": 1.546984522495522e-06, "loss": 0.5813, "step": 2640 }, { "epoch": 0.34, "grad_norm": 0.7545402015931093, "learning_rate": 1.5466390192903389e-06, "loss": 0.5498, "step": 2641 }, { "epoch": 0.34, "grad_norm": 0.8911395834575572, "learning_rate": 1.5462934229982253e-06, "loss": 0.6497, "step": 2642 }, { "epoch": 0.34, "grad_norm": 0.8700750056893659, "learning_rate": 1.5459477336780333e-06, "loss": 0.6964, "step": 2643 }, { "epoch": 0.34, "grad_norm": 0.8467776322339813, "learning_rate": 1.5456019513886296e-06, "loss": 0.5749, "step": 2644 }, { "epoch": 0.34, "grad_norm": 0.6413629750641375, "learning_rate": 1.5452560761888976e-06, "loss": 0.5533, "step": 2645 }, { "epoch": 0.34, "grad_norm": 0.6376908601672958, "learning_rate": 1.5449101081377365e-06, "loss": 0.5282, "step": 2646 }, { "epoch": 0.34, "grad_norm": 0.672180386303089, "learning_rate": 1.5445640472940603e-06, "loss": 0.5538, "step": 2647 }, { "epoch": 0.34, "grad_norm": 0.721379258380877, "learning_rate": 1.5442178937168008e-06, "loss": 0.5749, "step": 2648 }, { "epoch": 0.34, "grad_norm": 0.7966303071124852, "learning_rate": 1.5438716474649033e-06, "loss": 0.5702, "step": 2649 }, { "epoch": 0.34, "grad_norm": 0.9026711106169523, "learning_rate": 1.54352530859733e-06, "loss": 0.6773, "step": 2650 }, { "epoch": 0.34, "grad_norm": 0.7636672857345796, "learning_rate": 1.5431788771730597e-06, "loss": 0.5948, "step": 2651 }, { "epoch": 0.34, "grad_norm": 0.7065413830477413, "learning_rate": 1.5428323532510847e-06, "loss": 0.5152, "step": 2652 }, { "epoch": 0.34, "grad_norm": 0.7222540732276216, "learning_rate": 1.5424857368904154e-06, "loss": 0.6065, "step": 2653 }, { "epoch": 0.34, "grad_norm": 0.7580740319422532, "learning_rate": 1.5421390281500768e-06, "loss": 0.5466, "step": 2654 }, { "epoch": 0.34, "grad_norm": 0.7121332525199152, "learning_rate": 1.5417922270891096e-06, "loss": 0.5579, "step": 2655 }, { "epoch": 0.34, "grad_norm": 0.7773608159610655, "learning_rate": 1.5414453337665705e-06, "loss": 0.6659, "step": 2656 }, { "epoch": 0.34, "grad_norm": 0.7004766212385289, "learning_rate": 1.5410983482415318e-06, "loss": 0.5615, "step": 2657 }, { "epoch": 0.34, "grad_norm": 0.7280071581017894, "learning_rate": 1.5407512705730815e-06, "loss": 0.5269, "step": 2658 }, { "epoch": 0.34, "grad_norm": 0.6751841557706953, "learning_rate": 1.5404041008203232e-06, "loss": 0.5954, "step": 2659 }, { "epoch": 0.34, "grad_norm": 0.7021591715651088, "learning_rate": 1.5400568390423765e-06, "loss": 0.5752, "step": 2660 }, { "epoch": 0.34, "grad_norm": 0.7591204428905055, "learning_rate": 1.5397094852983765e-06, "loss": 0.5546, "step": 2661 }, { "epoch": 0.34, "grad_norm": 0.7181087615662662, "learning_rate": 1.5393620396474738e-06, "loss": 0.6051, "step": 2662 }, { "epoch": 0.34, "grad_norm": 0.7828763797756219, "learning_rate": 1.5390145021488348e-06, "loss": 0.6174, "step": 2663 }, { "epoch": 0.34, "grad_norm": 0.6934627441020993, "learning_rate": 1.5386668728616412e-06, "loss": 0.5559, "step": 2664 }, { "epoch": 0.34, "grad_norm": 3.5735505747640333, "learning_rate": 1.5383191518450912e-06, "loss": 0.6212, "step": 2665 }, { "epoch": 0.34, "grad_norm": 1.401230211660289, "learning_rate": 1.5379713391583977e-06, "loss": 0.5869, "step": 2666 }, { "epoch": 0.34, "grad_norm": 0.6693739799842298, "learning_rate": 1.53762343486079e-06, "loss": 0.5573, "step": 2667 }, { "epoch": 0.34, "grad_norm": 0.7949351107821199, "learning_rate": 1.5372754390115116e-06, "loss": 0.6494, "step": 2668 }, { "epoch": 0.34, "grad_norm": 0.7574293124796494, "learning_rate": 1.5369273516698238e-06, "loss": 0.5419, "step": 2669 }, { "epoch": 0.34, "grad_norm": 0.7608196273776373, "learning_rate": 1.5365791728950016e-06, "loss": 0.5567, "step": 2670 }, { "epoch": 0.34, "grad_norm": 0.6949936688001659, "learning_rate": 1.536230902746336e-06, "loss": 0.5462, "step": 2671 }, { "epoch": 0.34, "grad_norm": 0.7792803931478585, "learning_rate": 1.5358825412831342e-06, "loss": 0.594, "step": 2672 }, { "epoch": 0.34, "grad_norm": 1.319311447755787, "learning_rate": 1.5355340885647185e-06, "loss": 0.5978, "step": 2673 }, { "epoch": 0.34, "grad_norm": 0.6711776727009926, "learning_rate": 1.5351855446504267e-06, "loss": 0.5762, "step": 2674 }, { "epoch": 0.34, "grad_norm": 0.9042131609964323, "learning_rate": 1.5348369095996124e-06, "loss": 0.6052, "step": 2675 }, { "epoch": 0.34, "grad_norm": 0.8977802272894979, "learning_rate": 1.534488183471644e-06, "loss": 0.6486, "step": 2676 }, { "epoch": 0.34, "grad_norm": 0.8882656406496038, "learning_rate": 1.5341393663259062e-06, "loss": 0.6168, "step": 2677 }, { "epoch": 0.34, "grad_norm": 0.6326980024734311, "learning_rate": 1.533790458221799e-06, "loss": 0.4965, "step": 2678 }, { "epoch": 0.34, "grad_norm": 0.6801749988222119, "learning_rate": 1.5334414592187375e-06, "loss": 0.5782, "step": 2679 }, { "epoch": 0.34, "grad_norm": 0.9635715184246961, "learning_rate": 1.533092369376153e-06, "loss": 0.6547, "step": 2680 }, { "epoch": 0.34, "grad_norm": 0.6873721846576595, "learning_rate": 1.5327431887534918e-06, "loss": 0.5819, "step": 2681 }, { "epoch": 0.34, "grad_norm": 0.6532313658259804, "learning_rate": 1.5323939174102157e-06, "loss": 0.5122, "step": 2682 }, { "epoch": 0.34, "grad_norm": 0.8968013897358722, "learning_rate": 1.5320445554058017e-06, "loss": 0.716, "step": 2683 }, { "epoch": 0.34, "grad_norm": 0.7974510128954213, "learning_rate": 1.531695102799743e-06, "loss": 0.5615, "step": 2684 }, { "epoch": 0.34, "grad_norm": 0.8318503927719663, "learning_rate": 1.5313455596515471e-06, "loss": 0.647, "step": 2685 }, { "epoch": 0.34, "grad_norm": 0.6467112436761154, "learning_rate": 1.5309959260207382e-06, "loss": 0.5461, "step": 2686 }, { "epoch": 0.34, "grad_norm": 0.8362236594797428, "learning_rate": 1.5306462019668549e-06, "loss": 0.5586, "step": 2687 }, { "epoch": 0.34, "grad_norm": 0.7132130331418559, "learning_rate": 1.5302963875494515e-06, "loss": 0.5165, "step": 2688 }, { "epoch": 0.34, "grad_norm": 0.7382787231691627, "learning_rate": 1.5299464828280982e-06, "loss": 0.5823, "step": 2689 }, { "epoch": 0.34, "grad_norm": 0.7616743477518817, "learning_rate": 1.5295964878623795e-06, "loss": 0.565, "step": 2690 }, { "epoch": 0.34, "grad_norm": 0.8228494077737853, "learning_rate": 1.5292464027118963e-06, "loss": 0.6518, "step": 2691 }, { "epoch": 0.34, "grad_norm": 0.8944313482742176, "learning_rate": 1.5288962274362644e-06, "loss": 0.6315, "step": 2692 }, { "epoch": 0.34, "grad_norm": 0.6619007072477948, "learning_rate": 1.528545962095115e-06, "loss": 0.5152, "step": 2693 }, { "epoch": 0.34, "grad_norm": 0.8333396114998701, "learning_rate": 1.5281956067480946e-06, "loss": 0.5385, "step": 2694 }, { "epoch": 0.34, "grad_norm": 0.9393108186707092, "learning_rate": 1.527845161454865e-06, "loss": 0.6351, "step": 2695 }, { "epoch": 0.34, "grad_norm": 0.9081170153823361, "learning_rate": 1.5274946262751031e-06, "loss": 0.6442, "step": 2696 }, { "epoch": 0.34, "grad_norm": 0.7271146582802742, "learning_rate": 1.5271440012685025e-06, "loss": 0.6141, "step": 2697 }, { "epoch": 0.34, "grad_norm": 0.871481643528823, "learning_rate": 1.5267932864947697e-06, "loss": 0.6061, "step": 2698 }, { "epoch": 0.34, "grad_norm": 0.820711219643142, "learning_rate": 1.5264424820136284e-06, "loss": 0.604, "step": 2699 }, { "epoch": 0.34, "grad_norm": 0.6934625101454092, "learning_rate": 1.526091587884817e-06, "loss": 0.5738, "step": 2700 }, { "epoch": 0.34, "grad_norm": 0.9303136764778933, "learning_rate": 1.5257406041680888e-06, "loss": 0.6371, "step": 2701 }, { "epoch": 0.34, "grad_norm": 0.8238363027177719, "learning_rate": 1.525389530923213e-06, "loss": 0.5884, "step": 2702 }, { "epoch": 0.34, "grad_norm": 0.6846651392697702, "learning_rate": 1.5250383682099734e-06, "loss": 0.6141, "step": 2703 }, { "epoch": 0.34, "grad_norm": 0.8096055578771191, "learning_rate": 1.5246871160881696e-06, "loss": 0.5942, "step": 2704 }, { "epoch": 0.34, "grad_norm": 0.8198797547309176, "learning_rate": 1.524335774617616e-06, "loss": 0.6364, "step": 2705 }, { "epoch": 0.34, "grad_norm": 0.8683203268013842, "learning_rate": 1.5239843438581428e-06, "loss": 0.6124, "step": 2706 }, { "epoch": 0.34, "grad_norm": 0.7076189648847233, "learning_rate": 1.5236328238695944e-06, "loss": 0.5785, "step": 2707 }, { "epoch": 0.34, "grad_norm": 0.8542546187791273, "learning_rate": 1.5232812147118312e-06, "loss": 0.5986, "step": 2708 }, { "epoch": 0.35, "grad_norm": 0.7117857628580446, "learning_rate": 1.5229295164447287e-06, "loss": 0.541, "step": 2709 }, { "epoch": 0.35, "grad_norm": 0.7678354301752931, "learning_rate": 1.5225777291281773e-06, "loss": 0.6051, "step": 2710 }, { "epoch": 0.35, "grad_norm": 0.7191800430412006, "learning_rate": 1.5222258528220827e-06, "loss": 0.5587, "step": 2711 }, { "epoch": 0.35, "grad_norm": 0.836783627728259, "learning_rate": 1.5218738875863662e-06, "loss": 0.6526, "step": 2712 }, { "epoch": 0.35, "grad_norm": 0.933315406061919, "learning_rate": 1.5215218334809632e-06, "loss": 0.6604, "step": 2713 }, { "epoch": 0.35, "grad_norm": 0.7998197801255282, "learning_rate": 1.5211696905658253e-06, "loss": 0.6083, "step": 2714 }, { "epoch": 0.35, "grad_norm": 0.7234124947493389, "learning_rate": 1.5208174589009186e-06, "loss": 0.5658, "step": 2715 }, { "epoch": 0.35, "grad_norm": 0.9049726544674671, "learning_rate": 1.5204651385462245e-06, "loss": 0.6345, "step": 2716 }, { "epoch": 0.35, "grad_norm": 0.7478737383979305, "learning_rate": 1.5201127295617395e-06, "loss": 0.5742, "step": 2717 }, { "epoch": 0.35, "grad_norm": 0.6849346212885078, "learning_rate": 1.519760232007475e-06, "loss": 0.5422, "step": 2718 }, { "epoch": 0.35, "grad_norm": 0.7069993737189381, "learning_rate": 1.5194076459434582e-06, "loss": 0.5409, "step": 2719 }, { "epoch": 0.35, "grad_norm": 1.1248963155834466, "learning_rate": 1.5190549714297301e-06, "loss": 0.6115, "step": 2720 }, { "epoch": 0.35, "grad_norm": 0.6455352371672562, "learning_rate": 1.5187022085263479e-06, "loss": 0.5708, "step": 2721 }, { "epoch": 0.35, "grad_norm": 0.5891843057979788, "learning_rate": 1.5183493572933833e-06, "loss": 0.5254, "step": 2722 }, { "epoch": 0.35, "grad_norm": 0.8090551137943908, "learning_rate": 1.5179964177909237e-06, "loss": 0.5554, "step": 2723 }, { "epoch": 0.35, "grad_norm": 0.7338631039466889, "learning_rate": 1.5176433900790703e-06, "loss": 0.5495, "step": 2724 }, { "epoch": 0.35, "grad_norm": 0.8385053041376656, "learning_rate": 1.5172902742179398e-06, "loss": 0.5966, "step": 2725 }, { "epoch": 0.35, "grad_norm": 0.6479743866352093, "learning_rate": 1.5169370702676654e-06, "loss": 0.5331, "step": 2726 }, { "epoch": 0.35, "grad_norm": 0.6640071581864379, "learning_rate": 1.516583778288393e-06, "loss": 0.5401, "step": 2727 }, { "epoch": 0.35, "grad_norm": 0.7844153558121632, "learning_rate": 1.5162303983402852e-06, "loss": 0.5855, "step": 2728 }, { "epoch": 0.35, "grad_norm": 0.5987192666997421, "learning_rate": 1.5158769304835183e-06, "loss": 0.5359, "step": 2729 }, { "epoch": 0.35, "grad_norm": 0.7039900578000646, "learning_rate": 1.5155233747782848e-06, "loss": 0.5257, "step": 2730 }, { "epoch": 0.35, "grad_norm": 0.7553581939022301, "learning_rate": 1.515169731284791e-06, "loss": 0.6061, "step": 2731 }, { "epoch": 0.35, "grad_norm": 0.728643723089328, "learning_rate": 1.5148160000632593e-06, "loss": 0.5546, "step": 2732 }, { "epoch": 0.35, "grad_norm": 0.6774852140911092, "learning_rate": 1.5144621811739257e-06, "loss": 0.5009, "step": 2733 }, { "epoch": 0.35, "grad_norm": 0.5982599252176639, "learning_rate": 1.5141082746770425e-06, "loss": 0.5066, "step": 2734 }, { "epoch": 0.35, "grad_norm": 0.7794687556181809, "learning_rate": 1.513754280632876e-06, "loss": 0.5856, "step": 2735 }, { "epoch": 0.35, "grad_norm": 0.762073971344144, "learning_rate": 1.5134001991017079e-06, "loss": 0.6338, "step": 2736 }, { "epoch": 0.35, "grad_norm": 0.7895890497786219, "learning_rate": 1.5130460301438343e-06, "loss": 0.7224, "step": 2737 }, { "epoch": 0.35, "grad_norm": 0.603641608550316, "learning_rate": 1.5126917738195663e-06, "loss": 0.5036, "step": 2738 }, { "epoch": 0.35, "grad_norm": 0.7622576154880916, "learning_rate": 1.512337430189231e-06, "loss": 0.5549, "step": 2739 }, { "epoch": 0.35, "grad_norm": 0.8449130382612617, "learning_rate": 1.5119829993131688e-06, "loss": 0.6073, "step": 2740 }, { "epoch": 0.35, "grad_norm": 0.6942946227993302, "learning_rate": 1.5116284812517352e-06, "loss": 0.5307, "step": 2741 }, { "epoch": 0.35, "grad_norm": 0.6841852898616607, "learning_rate": 1.5112738760653017e-06, "loss": 0.5277, "step": 2742 }, { "epoch": 0.35, "grad_norm": 0.6514421506194327, "learning_rate": 1.5109191838142534e-06, "loss": 0.5553, "step": 2743 }, { "epoch": 0.35, "grad_norm": 0.7595529152611123, "learning_rate": 1.5105644045589907e-06, "loss": 0.6087, "step": 2744 }, { "epoch": 0.35, "grad_norm": 0.6229450328247342, "learning_rate": 1.5102095383599291e-06, "loss": 0.5246, "step": 2745 }, { "epoch": 0.35, "grad_norm": 0.6892993834567736, "learning_rate": 1.5098545852774984e-06, "loss": 0.5769, "step": 2746 }, { "epoch": 0.35, "grad_norm": 0.8436699065958566, "learning_rate": 1.5094995453721433e-06, "loss": 0.6344, "step": 2747 }, { "epoch": 0.35, "grad_norm": 0.8568324571966234, "learning_rate": 1.5091444187043238e-06, "loss": 0.6211, "step": 2748 }, { "epoch": 0.35, "grad_norm": 0.7779622307365157, "learning_rate": 1.5087892053345138e-06, "loss": 0.554, "step": 2749 }, { "epoch": 0.35, "grad_norm": 0.7576944072476369, "learning_rate": 1.5084339053232027e-06, "loss": 0.5932, "step": 2750 }, { "epoch": 0.35, "grad_norm": 0.6756978614860609, "learning_rate": 1.5080785187308944e-06, "loss": 0.6188, "step": 2751 }, { "epoch": 0.35, "grad_norm": 0.5988848029112629, "learning_rate": 1.5077230456181075e-06, "loss": 0.5242, "step": 2752 }, { "epoch": 0.35, "grad_norm": 0.7492957318785779, "learning_rate": 1.5073674860453753e-06, "loss": 0.5597, "step": 2753 }, { "epoch": 0.35, "grad_norm": 0.7166376408933957, "learning_rate": 1.5070118400732458e-06, "loss": 0.5071, "step": 2754 }, { "epoch": 0.35, "grad_norm": 0.8986433350250779, "learning_rate": 1.506656107762282e-06, "loss": 0.5994, "step": 2755 }, { "epoch": 0.35, "grad_norm": 0.8321511006444927, "learning_rate": 1.506300289173062e-06, "loss": 0.6704, "step": 2756 }, { "epoch": 0.35, "grad_norm": 0.9131508275823171, "learning_rate": 1.5059443843661765e-06, "loss": 0.6587, "step": 2757 }, { "epoch": 0.35, "grad_norm": 0.70888542543407, "learning_rate": 1.5055883934022337e-06, "loss": 0.5876, "step": 2758 }, { "epoch": 0.35, "grad_norm": 0.8627024208851585, "learning_rate": 1.5052323163418548e-06, "loss": 0.5649, "step": 2759 }, { "epoch": 0.35, "grad_norm": 0.8349774162336141, "learning_rate": 1.5048761532456755e-06, "loss": 0.607, "step": 2760 }, { "epoch": 0.35, "grad_norm": 0.7957402233973435, "learning_rate": 1.5045199041743473e-06, "loss": 0.6034, "step": 2761 }, { "epoch": 0.35, "grad_norm": 0.7084440018877063, "learning_rate": 1.5041635691885356e-06, "loss": 0.5183, "step": 2762 }, { "epoch": 0.35, "grad_norm": 0.9475343341965512, "learning_rate": 1.5038071483489198e-06, "loss": 0.6376, "step": 2763 }, { "epoch": 0.35, "grad_norm": 0.866160550320788, "learning_rate": 1.5034506417161959e-06, "loss": 0.6165, "step": 2764 }, { "epoch": 0.35, "grad_norm": 0.7353045429084609, "learning_rate": 1.5030940493510725e-06, "loss": 0.5819, "step": 2765 }, { "epoch": 0.35, "grad_norm": 0.6121072871793188, "learning_rate": 1.5027373713142733e-06, "loss": 0.5233, "step": 2766 }, { "epoch": 0.35, "grad_norm": 0.6701214150061907, "learning_rate": 1.5023806076665375e-06, "loss": 0.5397, "step": 2767 }, { "epoch": 0.35, "grad_norm": 0.6611134483316184, "learning_rate": 1.502023758468618e-06, "loss": 0.6005, "step": 2768 }, { "epoch": 0.35, "grad_norm": 0.8047607500676393, "learning_rate": 1.5016668237812824e-06, "loss": 0.6107, "step": 2769 }, { "epoch": 0.35, "grad_norm": 0.8756824725920811, "learning_rate": 1.501309803665313e-06, "loss": 0.6251, "step": 2770 }, { "epoch": 0.35, "grad_norm": 0.6971907337980469, "learning_rate": 1.5009526981815071e-06, "loss": 0.5642, "step": 2771 }, { "epoch": 0.35, "grad_norm": 0.9330454267014099, "learning_rate": 1.500595507390675e-06, "loss": 0.6399, "step": 2772 }, { "epoch": 0.35, "grad_norm": 0.7790275898812877, "learning_rate": 1.5002382313536434e-06, "loss": 0.5169, "step": 2773 }, { "epoch": 0.35, "grad_norm": 0.8339705375493703, "learning_rate": 1.4998808701312527e-06, "loss": 0.6005, "step": 2774 }, { "epoch": 0.35, "grad_norm": 0.8264765201551842, "learning_rate": 1.4995234237843573e-06, "loss": 0.715, "step": 2775 }, { "epoch": 0.35, "grad_norm": 0.8523558755788513, "learning_rate": 1.4991658923738267e-06, "loss": 0.6297, "step": 2776 }, { "epoch": 0.35, "grad_norm": 0.7363873528091556, "learning_rate": 1.4988082759605449e-06, "loss": 0.5881, "step": 2777 }, { "epoch": 0.35, "grad_norm": 0.6480111023873047, "learning_rate": 1.4984505746054107e-06, "loss": 0.548, "step": 2778 }, { "epoch": 0.35, "grad_norm": 0.6320121842214815, "learning_rate": 1.498092788369336e-06, "loss": 0.5256, "step": 2779 }, { "epoch": 0.35, "grad_norm": 0.8398572977393959, "learning_rate": 1.4977349173132487e-06, "loss": 0.6366, "step": 2780 }, { "epoch": 0.35, "grad_norm": 0.8723385410414921, "learning_rate": 1.49737696149809e-06, "loss": 0.6517, "step": 2781 }, { "epoch": 0.35, "grad_norm": 0.6248675120915862, "learning_rate": 1.4970189209848167e-06, "loss": 0.5592, "step": 2782 }, { "epoch": 0.35, "grad_norm": 1.3276181095905166, "learning_rate": 1.496660795834399e-06, "loss": 0.6757, "step": 2783 }, { "epoch": 0.35, "grad_norm": 0.6417394041741807, "learning_rate": 1.4963025861078222e-06, "loss": 0.5412, "step": 2784 }, { "epoch": 0.35, "grad_norm": 0.8788874658431991, "learning_rate": 1.4959442918660852e-06, "loss": 0.665, "step": 2785 }, { "epoch": 0.35, "grad_norm": 0.7497994614758708, "learning_rate": 1.495585913170202e-06, "loss": 0.5713, "step": 2786 }, { "epoch": 0.36, "grad_norm": 0.6804242412535482, "learning_rate": 1.495227450081201e-06, "loss": 0.5469, "step": 2787 }, { "epoch": 0.36, "grad_norm": 0.991025611563954, "learning_rate": 1.4948689026601243e-06, "loss": 0.6434, "step": 2788 }, { "epoch": 0.36, "grad_norm": 0.8070624838200096, "learning_rate": 1.4945102709680289e-06, "loss": 0.5259, "step": 2789 }, { "epoch": 0.36, "grad_norm": 0.6973771458339837, "learning_rate": 1.4941515550659861e-06, "loss": 0.5371, "step": 2790 }, { "epoch": 0.36, "grad_norm": 0.6130608976219806, "learning_rate": 1.4937927550150817e-06, "loss": 0.5215, "step": 2791 }, { "epoch": 0.36, "grad_norm": 0.752072773044262, "learning_rate": 1.4934338708764155e-06, "loss": 0.5728, "step": 2792 }, { "epoch": 0.36, "grad_norm": 0.7596111844720105, "learning_rate": 1.4930749027111013e-06, "loss": 0.6334, "step": 2793 }, { "epoch": 0.36, "grad_norm": 0.7547208706911472, "learning_rate": 1.492715850580268e-06, "loss": 0.5898, "step": 2794 }, { "epoch": 0.36, "grad_norm": 0.8256548376558105, "learning_rate": 1.4923567145450587e-06, "loss": 0.5682, "step": 2795 }, { "epoch": 0.36, "grad_norm": 0.878382667385202, "learning_rate": 1.49199749466663e-06, "loss": 0.6889, "step": 2796 }, { "epoch": 0.36, "grad_norm": 0.6844584629431314, "learning_rate": 1.4916381910061537e-06, "loss": 0.5099, "step": 2797 }, { "epoch": 0.36, "grad_norm": 0.9417801837383127, "learning_rate": 1.4912788036248154e-06, "loss": 0.6883, "step": 2798 }, { "epoch": 0.36, "grad_norm": 0.9993115709276478, "learning_rate": 1.490919332583815e-06, "loss": 0.6205, "step": 2799 }, { "epoch": 0.36, "grad_norm": 0.712431838687457, "learning_rate": 1.4905597779443662e-06, "loss": 0.5624, "step": 2800 }, { "epoch": 0.36, "grad_norm": 0.9954101807394483, "learning_rate": 1.490200139767698e-06, "loss": 0.6487, "step": 2801 }, { "epoch": 0.36, "grad_norm": 0.7703680125427543, "learning_rate": 1.4898404181150533e-06, "loss": 0.5292, "step": 2802 }, { "epoch": 0.36, "grad_norm": 0.6801230389210384, "learning_rate": 1.489480613047688e-06, "loss": 0.5354, "step": 2803 }, { "epoch": 0.36, "grad_norm": 0.6869755998172585, "learning_rate": 1.4891207246268737e-06, "loss": 0.5433, "step": 2804 }, { "epoch": 0.36, "grad_norm": 1.0097618511519242, "learning_rate": 1.488760752913896e-06, "loss": 0.5963, "step": 2805 }, { "epoch": 0.36, "grad_norm": 0.9611198456023415, "learning_rate": 1.4884006979700536e-06, "loss": 0.63, "step": 2806 }, { "epoch": 0.36, "grad_norm": 0.8568639259941129, "learning_rate": 1.4880405598566608e-06, "loss": 0.6519, "step": 2807 }, { "epoch": 0.36, "grad_norm": 0.7807086486932002, "learning_rate": 1.4876803386350447e-06, "loss": 0.5361, "step": 2808 }, { "epoch": 0.36, "grad_norm": 0.9452024241556469, "learning_rate": 1.4873200343665478e-06, "loss": 0.5776, "step": 2809 }, { "epoch": 0.36, "grad_norm": 0.7422888166236278, "learning_rate": 1.4869596471125253e-06, "loss": 0.5894, "step": 2810 }, { "epoch": 0.36, "grad_norm": 0.8395395847453998, "learning_rate": 1.4865991769343488e-06, "loss": 0.5987, "step": 2811 }, { "epoch": 0.36, "grad_norm": 0.8145206628244746, "learning_rate": 1.4862386238934015e-06, "loss": 0.6312, "step": 2812 }, { "epoch": 0.36, "grad_norm": 0.6211170414080287, "learning_rate": 1.485877988051082e-06, "loss": 0.5155, "step": 2813 }, { "epoch": 0.36, "grad_norm": 0.9451681959565611, "learning_rate": 1.4855172694688032e-06, "loss": 0.618, "step": 2814 }, { "epoch": 0.36, "grad_norm": 0.7405673746521159, "learning_rate": 1.4851564682079913e-06, "loss": 0.5899, "step": 2815 }, { "epoch": 0.36, "grad_norm": 0.6604511842601581, "learning_rate": 1.4847955843300875e-06, "loss": 0.55, "step": 2816 }, { "epoch": 0.36, "grad_norm": 0.7123543532700324, "learning_rate": 1.484434617896546e-06, "loss": 0.5873, "step": 2817 }, { "epoch": 0.36, "grad_norm": 0.8855042096725597, "learning_rate": 1.484073568968836e-06, "loss": 0.6245, "step": 2818 }, { "epoch": 0.36, "grad_norm": 0.8806194203178593, "learning_rate": 1.4837124376084403e-06, "loss": 0.5766, "step": 2819 }, { "epoch": 0.36, "grad_norm": 0.8791602683071743, "learning_rate": 1.4833512238768555e-06, "loss": 0.6288, "step": 2820 }, { "epoch": 0.36, "grad_norm": 0.7665832568695121, "learning_rate": 1.482989927835593e-06, "loss": 0.6016, "step": 2821 }, { "epoch": 0.36, "grad_norm": 0.8047272116294628, "learning_rate": 1.4826285495461773e-06, "loss": 0.5624, "step": 2822 }, { "epoch": 0.36, "grad_norm": 0.9131174662422704, "learning_rate": 1.4822670890701476e-06, "loss": 0.6033, "step": 2823 }, { "epoch": 0.36, "grad_norm": 0.8821364233152592, "learning_rate": 1.4819055464690571e-06, "loss": 0.6315, "step": 2824 }, { "epoch": 0.36, "grad_norm": 0.8429396770684198, "learning_rate": 1.4815439218044726e-06, "loss": 0.61, "step": 2825 }, { "epoch": 0.36, "grad_norm": 0.9525227212937452, "learning_rate": 1.4811822151379745e-06, "loss": 0.6842, "step": 2826 }, { "epoch": 0.36, "grad_norm": 0.7585383691230887, "learning_rate": 1.4808204265311583e-06, "loss": 0.5962, "step": 2827 }, { "epoch": 0.36, "grad_norm": 0.7895312482008328, "learning_rate": 1.4804585560456327e-06, "loss": 0.6291, "step": 2828 }, { "epoch": 0.36, "grad_norm": 0.7991188232234464, "learning_rate": 1.4800966037430204e-06, "loss": 0.592, "step": 2829 }, { "epoch": 0.36, "grad_norm": 0.8834088301011445, "learning_rate": 1.479734569684958e-06, "loss": 0.6057, "step": 2830 }, { "epoch": 0.36, "grad_norm": 0.6791035197970052, "learning_rate": 1.479372453933096e-06, "loss": 0.5297, "step": 2831 }, { "epoch": 0.36, "grad_norm": 0.8498852482456768, "learning_rate": 1.4790102565490994e-06, "loss": 0.621, "step": 2832 }, { "epoch": 0.36, "grad_norm": 1.0275595307419705, "learning_rate": 1.4786479775946466e-06, "loss": 0.6571, "step": 2833 }, { "epoch": 0.36, "grad_norm": 0.6908995911305638, "learning_rate": 1.4782856171314296e-06, "loss": 0.5729, "step": 2834 }, { "epoch": 0.36, "grad_norm": 2.367175357037824, "learning_rate": 1.4779231752211546e-06, "loss": 0.6134, "step": 2835 }, { "epoch": 0.36, "grad_norm": 0.6338766342409193, "learning_rate": 1.4775606519255414e-06, "loss": 0.5281, "step": 2836 }, { "epoch": 0.36, "grad_norm": 0.8166535003548172, "learning_rate": 1.477198047306325e-06, "loss": 0.6466, "step": 2837 }, { "epoch": 0.36, "grad_norm": 0.8271732737616625, "learning_rate": 1.4768353614252525e-06, "loss": 0.5821, "step": 2838 }, { "epoch": 0.36, "grad_norm": 0.8602234663684609, "learning_rate": 1.4764725943440854e-06, "loss": 0.5774, "step": 2839 }, { "epoch": 0.36, "grad_norm": 0.8149732392319375, "learning_rate": 1.4761097461245995e-06, "loss": 0.5995, "step": 2840 }, { "epoch": 0.36, "grad_norm": 1.0375300098110611, "learning_rate": 1.4757468168285838e-06, "loss": 0.6654, "step": 2841 }, { "epoch": 0.36, "grad_norm": 0.8720895143222749, "learning_rate": 1.4753838065178416e-06, "loss": 0.6445, "step": 2842 }, { "epoch": 0.36, "grad_norm": 0.8401376207135789, "learning_rate": 1.4750207152541896e-06, "loss": 0.5922, "step": 2843 }, { "epoch": 0.36, "grad_norm": 0.7137166118185889, "learning_rate": 1.4746575430994585e-06, "loss": 0.5949, "step": 2844 }, { "epoch": 0.36, "grad_norm": 0.904275787449756, "learning_rate": 1.474294290115493e-06, "loss": 0.6895, "step": 2845 }, { "epoch": 0.36, "grad_norm": 0.6278408433970168, "learning_rate": 1.4739309563641507e-06, "loss": 0.533, "step": 2846 }, { "epoch": 0.36, "grad_norm": 0.9317759418471361, "learning_rate": 1.4735675419073038e-06, "loss": 0.6687, "step": 2847 }, { "epoch": 0.36, "grad_norm": 0.8541067919028597, "learning_rate": 1.4732040468068385e-06, "loss": 0.6245, "step": 2848 }, { "epoch": 0.36, "grad_norm": 0.6409720847446649, "learning_rate": 1.4728404711246537e-06, "loss": 0.5416, "step": 2849 }, { "epoch": 0.36, "grad_norm": 0.9758490467491027, "learning_rate": 1.4724768149226625e-06, "loss": 0.6967, "step": 2850 }, { "epoch": 0.36, "grad_norm": 0.9595496507858726, "learning_rate": 1.472113078262792e-06, "loss": 0.6379, "step": 2851 }, { "epoch": 0.36, "grad_norm": 0.7398188097774925, "learning_rate": 1.4717492612069826e-06, "loss": 0.5374, "step": 2852 }, { "epoch": 0.36, "grad_norm": 0.6662992799903374, "learning_rate": 1.471385363817189e-06, "loss": 0.5476, "step": 2853 }, { "epoch": 0.36, "grad_norm": 0.7834642834856776, "learning_rate": 1.4710213861553787e-06, "loss": 0.6131, "step": 2854 }, { "epoch": 0.36, "grad_norm": 0.7295562995003764, "learning_rate": 1.4706573282835332e-06, "loss": 0.6309, "step": 2855 }, { "epoch": 0.36, "grad_norm": 0.9048690458751024, "learning_rate": 1.4702931902636483e-06, "loss": 0.6745, "step": 2856 }, { "epoch": 0.36, "grad_norm": 0.6431363382022175, "learning_rate": 1.4699289721577323e-06, "loss": 0.5243, "step": 2857 }, { "epoch": 0.36, "grad_norm": 0.8723334360771513, "learning_rate": 1.4695646740278084e-06, "loss": 0.6523, "step": 2858 }, { "epoch": 0.36, "grad_norm": 0.6340711863349675, "learning_rate": 1.469200295935912e-06, "loss": 0.5409, "step": 2859 }, { "epoch": 0.36, "grad_norm": 0.6390113345264566, "learning_rate": 1.4688358379440935e-06, "loss": 0.5456, "step": 2860 }, { "epoch": 0.36, "grad_norm": 0.6242937988933909, "learning_rate": 1.468471300114416e-06, "loss": 0.5319, "step": 2861 }, { "epoch": 0.36, "grad_norm": 1.049302200516771, "learning_rate": 1.4681066825089568e-06, "loss": 0.6488, "step": 2862 }, { "epoch": 0.36, "grad_norm": 1.0021388610490307, "learning_rate": 1.467741985189806e-06, "loss": 0.6531, "step": 2863 }, { "epoch": 0.36, "grad_norm": 0.688816073543492, "learning_rate": 1.4673772082190682e-06, "loss": 0.529, "step": 2864 }, { "epoch": 0.36, "grad_norm": 0.7954243258998963, "learning_rate": 1.4670123516588607e-06, "loss": 0.6383, "step": 2865 }, { "epoch": 0.37, "grad_norm": 0.6231693026395826, "learning_rate": 1.466647415571315e-06, "loss": 0.5526, "step": 2866 }, { "epoch": 0.37, "grad_norm": 0.6612116878820933, "learning_rate": 1.4662824000185762e-06, "loss": 0.507, "step": 2867 }, { "epoch": 0.37, "grad_norm": 0.7228177788745414, "learning_rate": 1.4659173050628024e-06, "loss": 0.5903, "step": 2868 }, { "epoch": 0.37, "grad_norm": 0.7297713297166086, "learning_rate": 1.4655521307661654e-06, "loss": 0.5533, "step": 2869 }, { "epoch": 0.37, "grad_norm": 0.6761672205441746, "learning_rate": 1.4651868771908504e-06, "loss": 0.6158, "step": 2870 }, { "epoch": 0.37, "grad_norm": 0.5985113491739767, "learning_rate": 1.4648215443990566e-06, "loss": 0.4987, "step": 2871 }, { "epoch": 0.37, "grad_norm": 0.9215464434007808, "learning_rate": 1.4644561324529962e-06, "loss": 0.6571, "step": 2872 }, { "epoch": 0.37, "grad_norm": 0.9393647895589455, "learning_rate": 1.4640906414148955e-06, "loss": 0.5502, "step": 2873 }, { "epoch": 0.37, "grad_norm": 0.7430131579773921, "learning_rate": 1.463725071346993e-06, "loss": 0.5574, "step": 2874 }, { "epoch": 0.37, "grad_norm": 0.8321939537438792, "learning_rate": 1.463359422311542e-06, "loss": 0.5985, "step": 2875 }, { "epoch": 0.37, "grad_norm": 0.6758182032167424, "learning_rate": 1.4629936943708088e-06, "loss": 0.5883, "step": 2876 }, { "epoch": 0.37, "grad_norm": 0.8417081927849046, "learning_rate": 1.4626278875870726e-06, "loss": 0.7009, "step": 2877 }, { "epoch": 0.37, "grad_norm": 0.6306337002851999, "learning_rate": 1.462262002022627e-06, "loss": 0.5223, "step": 2878 }, { "epoch": 0.37, "grad_norm": 2.059711686595755, "learning_rate": 1.4618960377397779e-06, "loss": 0.6694, "step": 2879 }, { "epoch": 0.37, "grad_norm": 0.8022784255350268, "learning_rate": 1.461529994800846e-06, "loss": 0.5821, "step": 2880 }, { "epoch": 0.37, "grad_norm": 0.6730733279404993, "learning_rate": 1.4611638732681639e-06, "loss": 0.5142, "step": 2881 }, { "epoch": 0.37, "grad_norm": 0.7054927846232008, "learning_rate": 1.4607976732040788e-06, "loss": 0.5754, "step": 2882 }, { "epoch": 0.37, "grad_norm": 0.8192668455977995, "learning_rate": 1.4604313946709502e-06, "loss": 0.5604, "step": 2883 }, { "epoch": 0.37, "grad_norm": 0.6230196729695052, "learning_rate": 1.460065037731152e-06, "loss": 0.5403, "step": 2884 }, { "epoch": 0.37, "grad_norm": 0.6641421122507739, "learning_rate": 1.459698602447071e-06, "loss": 0.5808, "step": 2885 }, { "epoch": 0.37, "grad_norm": 0.8781730793043107, "learning_rate": 1.4593320888811066e-06, "loss": 0.6291, "step": 2886 }, { "epoch": 0.37, "grad_norm": 0.7726082555341107, "learning_rate": 1.4589654970956732e-06, "loss": 0.5692, "step": 2887 }, { "epoch": 0.37, "grad_norm": 0.9369017412148846, "learning_rate": 1.4585988271531971e-06, "loss": 0.6818, "step": 2888 }, { "epoch": 0.37, "grad_norm": 0.643393742497125, "learning_rate": 1.4582320791161181e-06, "loss": 0.5383, "step": 2889 }, { "epoch": 0.37, "grad_norm": 0.7461200581658407, "learning_rate": 1.4578652530468902e-06, "loss": 0.5833, "step": 2890 }, { "epoch": 0.37, "grad_norm": 0.7767761854201827, "learning_rate": 1.4574983490079795e-06, "loss": 0.6174, "step": 2891 }, { "epoch": 0.37, "grad_norm": 0.7910762618030133, "learning_rate": 1.4571313670618662e-06, "loss": 0.6166, "step": 2892 }, { "epoch": 0.37, "grad_norm": 0.9296065308190369, "learning_rate": 1.4567643072710431e-06, "loss": 0.6027, "step": 2893 }, { "epoch": 0.37, "grad_norm": 1.1739797110213461, "learning_rate": 1.4563971696980176e-06, "loss": 0.6332, "step": 2894 }, { "epoch": 0.37, "grad_norm": 0.6382087052426432, "learning_rate": 1.4560299544053086e-06, "loss": 0.5518, "step": 2895 }, { "epoch": 0.37, "grad_norm": 0.6421374223954901, "learning_rate": 1.455662661455449e-06, "loss": 0.599, "step": 2896 }, { "epoch": 0.37, "grad_norm": 0.7957639045037196, "learning_rate": 1.4552952909109856e-06, "loss": 0.5561, "step": 2897 }, { "epoch": 0.37, "grad_norm": 0.7121533918037124, "learning_rate": 1.454927842834477e-06, "loss": 0.5764, "step": 2898 }, { "epoch": 0.37, "grad_norm": 0.8269561110956469, "learning_rate": 1.4545603172884964e-06, "loss": 0.5928, "step": 2899 }, { "epoch": 0.37, "grad_norm": 0.6188116101653932, "learning_rate": 1.4541927143356292e-06, "loss": 0.5533, "step": 2900 }, { "epoch": 0.37, "grad_norm": 0.783880788082337, "learning_rate": 1.4538250340384745e-06, "loss": 0.5221, "step": 2901 }, { "epoch": 0.37, "grad_norm": 0.912251620763405, "learning_rate": 1.4534572764596445e-06, "loss": 0.6544, "step": 2902 }, { "epoch": 0.37, "grad_norm": 0.7014016807515112, "learning_rate": 1.4530894416617644e-06, "loss": 0.5702, "step": 2903 }, { "epoch": 0.37, "grad_norm": 0.8340215462439355, "learning_rate": 1.4527215297074726e-06, "loss": 0.6467, "step": 2904 }, { "epoch": 0.37, "grad_norm": 0.8029867615115054, "learning_rate": 1.452353540659421e-06, "loss": 0.5442, "step": 2905 }, { "epoch": 0.37, "grad_norm": 0.7897695705767267, "learning_rate": 1.4519854745802738e-06, "loss": 0.5633, "step": 2906 }, { "epoch": 0.37, "grad_norm": 0.7537989229950761, "learning_rate": 1.4516173315327088e-06, "loss": 0.5711, "step": 2907 }, { "epoch": 0.37, "grad_norm": 0.7630143942739561, "learning_rate": 1.4512491115794173e-06, "loss": 0.6208, "step": 2908 }, { "epoch": 0.37, "grad_norm": 0.6708950374021295, "learning_rate": 1.4508808147831034e-06, "loss": 0.5783, "step": 2909 }, { "epoch": 0.37, "grad_norm": 0.6723952230171419, "learning_rate": 1.4505124412064836e-06, "loss": 0.5084, "step": 2910 }, { "epoch": 0.37, "grad_norm": 0.8366969820925751, "learning_rate": 1.4501439909122888e-06, "loss": 0.6213, "step": 2911 }, { "epoch": 0.37, "grad_norm": 0.8287191932939644, "learning_rate": 1.4497754639632619e-06, "loss": 0.6112, "step": 2912 }, { "epoch": 0.37, "grad_norm": 0.7110835605944674, "learning_rate": 1.449406860422159e-06, "loss": 0.5916, "step": 2913 }, { "epoch": 0.37, "grad_norm": 0.9402308625779777, "learning_rate": 1.4490381803517497e-06, "loss": 0.6397, "step": 2914 }, { "epoch": 0.37, "grad_norm": 0.8443431419548582, "learning_rate": 1.4486694238148164e-06, "loss": 0.6066, "step": 2915 }, { "epoch": 0.37, "grad_norm": 0.6579200597845478, "learning_rate": 1.4483005908741545e-06, "loss": 0.5455, "step": 2916 }, { "epoch": 0.37, "grad_norm": 0.6535907733263981, "learning_rate": 1.4479316815925722e-06, "loss": 0.555, "step": 2917 }, { "epoch": 0.37, "grad_norm": 0.9902580977837102, "learning_rate": 1.4475626960328913e-06, "loss": 0.6462, "step": 2918 }, { "epoch": 0.37, "grad_norm": 0.7236014878111918, "learning_rate": 1.4471936342579455e-06, "loss": 0.5563, "step": 2919 }, { "epoch": 0.37, "grad_norm": 0.7341781045361712, "learning_rate": 1.4468244963305832e-06, "loss": 0.5364, "step": 2920 }, { "epoch": 0.37, "grad_norm": 2.183641005048583, "learning_rate": 1.4464552823136635e-06, "loss": 0.6825, "step": 2921 }, { "epoch": 0.37, "grad_norm": 0.6909245131386688, "learning_rate": 1.4460859922700608e-06, "loss": 0.5861, "step": 2922 }, { "epoch": 0.37, "grad_norm": 0.7223689064219212, "learning_rate": 1.4457166262626608e-06, "loss": 0.5159, "step": 2923 }, { "epoch": 0.37, "grad_norm": 0.665803479727205, "learning_rate": 1.4453471843543628e-06, "loss": 0.5532, "step": 2924 }, { "epoch": 0.37, "grad_norm": 0.7120534362115426, "learning_rate": 1.4449776666080786e-06, "loss": 0.6129, "step": 2925 }, { "epoch": 0.37, "grad_norm": 0.6686969939002663, "learning_rate": 1.4446080730867341e-06, "loss": 0.5209, "step": 2926 }, { "epoch": 0.37, "grad_norm": 0.7118750910800716, "learning_rate": 1.4442384038532662e-06, "loss": 0.5839, "step": 2927 }, { "epoch": 0.37, "grad_norm": 0.6910578137628784, "learning_rate": 1.4438686589706264e-06, "loss": 0.5311, "step": 2928 }, { "epoch": 0.37, "grad_norm": 0.638091606586386, "learning_rate": 1.4434988385017781e-06, "loss": 0.5577, "step": 2929 }, { "epoch": 0.37, "grad_norm": 0.9138323103990887, "learning_rate": 1.443128942509698e-06, "loss": 0.6284, "step": 2930 }, { "epoch": 0.37, "grad_norm": 0.847941011196858, "learning_rate": 1.4427589710573754e-06, "loss": 0.658, "step": 2931 }, { "epoch": 0.37, "grad_norm": 0.7998018825646488, "learning_rate": 1.4423889242078128e-06, "loss": 0.6142, "step": 2932 }, { "epoch": 0.37, "grad_norm": 0.7124106139576353, "learning_rate": 1.4420188020240248e-06, "loss": 0.5786, "step": 2933 }, { "epoch": 0.37, "grad_norm": 0.6466545513567014, "learning_rate": 1.4416486045690397e-06, "loss": 0.556, "step": 2934 }, { "epoch": 0.37, "grad_norm": 0.8484743945606571, "learning_rate": 1.4412783319058987e-06, "loss": 0.6077, "step": 2935 }, { "epoch": 0.37, "grad_norm": 0.7720705726445568, "learning_rate": 1.4409079840976548e-06, "loss": 0.5729, "step": 2936 }, { "epoch": 0.37, "grad_norm": 1.0143677895973267, "learning_rate": 1.4405375612073746e-06, "loss": 0.6857, "step": 2937 }, { "epoch": 0.37, "grad_norm": 0.6898835444306005, "learning_rate": 1.4401670632981372e-06, "loss": 0.5898, "step": 2938 }, { "epoch": 0.37, "grad_norm": 0.8562239409554988, "learning_rate": 1.4397964904330347e-06, "loss": 0.6312, "step": 2939 }, { "epoch": 0.37, "grad_norm": 0.6275942633881774, "learning_rate": 1.4394258426751715e-06, "loss": 0.5333, "step": 2940 }, { "epoch": 0.37, "grad_norm": 0.8306793606486005, "learning_rate": 1.4390551200876653e-06, "loss": 0.639, "step": 2941 }, { "epoch": 0.37, "grad_norm": 0.8698843479431698, "learning_rate": 1.4386843227336461e-06, "loss": 0.707, "step": 2942 }, { "epoch": 0.37, "grad_norm": 1.0234986116008342, "learning_rate": 1.4383134506762569e-06, "loss": 0.6562, "step": 2943 }, { "epoch": 0.38, "grad_norm": 0.7935783697085956, "learning_rate": 1.4379425039786533e-06, "loss": 0.5731, "step": 2944 }, { "epoch": 0.38, "grad_norm": 0.7231751743066511, "learning_rate": 1.437571482704004e-06, "loss": 0.5423, "step": 2945 }, { "epoch": 0.38, "grad_norm": 0.6201009366394794, "learning_rate": 1.4372003869154894e-06, "loss": 0.5388, "step": 2946 }, { "epoch": 0.38, "grad_norm": 0.6704760156784091, "learning_rate": 1.4368292166763036e-06, "loss": 0.6096, "step": 2947 }, { "epoch": 0.38, "grad_norm": 0.7873334139932725, "learning_rate": 1.4364579720496534e-06, "loss": 0.5379, "step": 2948 }, { "epoch": 0.38, "grad_norm": 1.076230516332461, "learning_rate": 1.4360866530987573e-06, "loss": 0.629, "step": 2949 }, { "epoch": 0.38, "grad_norm": 0.8936582269689555, "learning_rate": 1.4357152598868476e-06, "loss": 0.6616, "step": 2950 }, { "epoch": 0.38, "grad_norm": 0.6852108334721988, "learning_rate": 1.4353437924771683e-06, "loss": 0.582, "step": 2951 }, { "epoch": 0.38, "grad_norm": 0.8820267243875591, "learning_rate": 1.4349722509329762e-06, "loss": 0.6588, "step": 2952 }, { "epoch": 0.38, "grad_norm": 0.6501467731105091, "learning_rate": 1.4346006353175415e-06, "loss": 0.5353, "step": 2953 }, { "epoch": 0.38, "grad_norm": 0.6364286013092798, "learning_rate": 1.434228945694146e-06, "loss": 0.5451, "step": 2954 }, { "epoch": 0.38, "grad_norm": 0.8567306597962844, "learning_rate": 1.4338571821260851e-06, "loss": 0.5639, "step": 2955 }, { "epoch": 0.38, "grad_norm": 0.7656357571916407, "learning_rate": 1.4334853446766661e-06, "loss": 0.5687, "step": 2956 }, { "epoch": 0.38, "grad_norm": 0.8219383776743547, "learning_rate": 1.4331134334092088e-06, "loss": 0.5937, "step": 2957 }, { "epoch": 0.38, "grad_norm": 0.6597001514861968, "learning_rate": 1.4327414483870458e-06, "loss": 0.5559, "step": 2958 }, { "epoch": 0.38, "grad_norm": 0.7711896010893342, "learning_rate": 1.4323693896735226e-06, "loss": 0.5518, "step": 2959 }, { "epoch": 0.38, "grad_norm": 0.9238898201112902, "learning_rate": 1.4319972573319966e-06, "loss": 0.7341, "step": 2960 }, { "epoch": 0.38, "grad_norm": 1.2172468232222828, "learning_rate": 1.431625051425838e-06, "loss": 0.6553, "step": 2961 }, { "epoch": 0.38, "grad_norm": 0.8476898689698178, "learning_rate": 1.4312527720184301e-06, "loss": 0.6205, "step": 2962 }, { "epoch": 0.38, "grad_norm": 0.8852796017840445, "learning_rate": 1.4308804191731678e-06, "loss": 0.6729, "step": 2963 }, { "epoch": 0.38, "grad_norm": 0.9393060547855648, "learning_rate": 1.430507992953459e-06, "loss": 0.6203, "step": 2964 }, { "epoch": 0.38, "grad_norm": 0.7592432396521749, "learning_rate": 1.4301354934227238e-06, "loss": 0.5786, "step": 2965 }, { "epoch": 0.38, "grad_norm": 0.6694951255797104, "learning_rate": 1.4297629206443956e-06, "loss": 0.5381, "step": 2966 }, { "epoch": 0.38, "grad_norm": 0.7917638356329877, "learning_rate": 1.4293902746819188e-06, "loss": 0.6327, "step": 2967 }, { "epoch": 0.38, "grad_norm": 0.7838022234930193, "learning_rate": 1.4290175555987518e-06, "loss": 0.5735, "step": 2968 }, { "epoch": 0.38, "grad_norm": 0.685394067805382, "learning_rate": 1.4286447634583647e-06, "loss": 0.558, "step": 2969 }, { "epoch": 0.38, "grad_norm": 0.9383921313502919, "learning_rate": 1.4282718983242398e-06, "loss": 0.6604, "step": 2970 }, { "epoch": 0.38, "grad_norm": 0.9219988580776781, "learning_rate": 1.4278989602598724e-06, "loss": 0.6025, "step": 2971 }, { "epoch": 0.38, "grad_norm": 1.2579449453242686, "learning_rate": 1.42752594932877e-06, "loss": 0.6656, "step": 2972 }, { "epoch": 0.38, "grad_norm": 0.8465667951234505, "learning_rate": 1.427152865594452e-06, "loss": 0.6122, "step": 2973 }, { "epoch": 0.38, "grad_norm": 0.6760139087277035, "learning_rate": 1.4267797091204513e-06, "loss": 0.5723, "step": 2974 }, { "epoch": 0.38, "grad_norm": 0.7926683835961463, "learning_rate": 1.4264064799703121e-06, "loss": 0.5981, "step": 2975 }, { "epoch": 0.38, "grad_norm": 0.9138611273730386, "learning_rate": 1.4260331782075915e-06, "loss": 0.6187, "step": 2976 }, { "epoch": 0.38, "grad_norm": 0.6638668854685227, "learning_rate": 1.4256598038958595e-06, "loss": 0.4685, "step": 2977 }, { "epoch": 0.38, "grad_norm": 0.7188822482728612, "learning_rate": 1.425286357098697e-06, "loss": 0.5485, "step": 2978 }, { "epoch": 0.38, "grad_norm": 0.7546331998820988, "learning_rate": 1.4249128378796988e-06, "loss": 0.5901, "step": 2979 }, { "epoch": 0.38, "grad_norm": 0.7591776088816059, "learning_rate": 1.4245392463024707e-06, "loss": 0.5778, "step": 2980 }, { "epoch": 0.38, "grad_norm": 0.8272853667040697, "learning_rate": 1.424165582430632e-06, "loss": 0.5813, "step": 2981 }, { "epoch": 0.38, "grad_norm": 1.2093875956963143, "learning_rate": 1.4237918463278135e-06, "loss": 0.6989, "step": 2982 }, { "epoch": 0.38, "grad_norm": 0.8927824448074608, "learning_rate": 1.4234180380576586e-06, "loss": 0.6271, "step": 2983 }, { "epoch": 0.38, "grad_norm": 0.650836045109679, "learning_rate": 1.423044157683823e-06, "loss": 0.59, "step": 2984 }, { "epoch": 0.38, "grad_norm": 0.853130869444366, "learning_rate": 1.4226702052699746e-06, "loss": 0.6401, "step": 2985 }, { "epoch": 0.38, "grad_norm": 0.8142910491659119, "learning_rate": 1.4222961808797935e-06, "loss": 0.6347, "step": 2986 }, { "epoch": 0.38, "grad_norm": 0.664138194848671, "learning_rate": 1.4219220845769723e-06, "loss": 0.5771, "step": 2987 }, { "epoch": 0.38, "grad_norm": 1.035956849405899, "learning_rate": 1.4215479164252154e-06, "loss": 0.6414, "step": 2988 }, { "epoch": 0.38, "grad_norm": 0.6543999571962653, "learning_rate": 1.4211736764882404e-06, "loss": 0.4885, "step": 2989 }, { "epoch": 0.38, "grad_norm": 0.7516219631136275, "learning_rate": 1.4207993648297757e-06, "loss": 0.5815, "step": 2990 }, { "epoch": 0.38, "grad_norm": 0.7298867242030105, "learning_rate": 1.4204249815135631e-06, "loss": 0.5296, "step": 2991 }, { "epoch": 0.38, "grad_norm": 0.6563684290607786, "learning_rate": 1.4200505266033562e-06, "loss": 0.5572, "step": 2992 }, { "epoch": 0.38, "grad_norm": 0.8828322877649945, "learning_rate": 1.4196760001629208e-06, "loss": 0.6054, "step": 2993 }, { "epoch": 0.38, "grad_norm": 0.9215149141934957, "learning_rate": 1.4193014022560345e-06, "loss": 0.6476, "step": 2994 }, { "epoch": 0.38, "grad_norm": 0.670080022202974, "learning_rate": 1.4189267329464878e-06, "loss": 0.5583, "step": 2995 }, { "epoch": 0.38, "grad_norm": 0.8516403528710943, "learning_rate": 1.418551992298083e-06, "loss": 0.6149, "step": 2996 }, { "epoch": 0.38, "grad_norm": 1.2123042609569503, "learning_rate": 1.4181771803746338e-06, "loss": 0.6236, "step": 2997 }, { "epoch": 0.38, "grad_norm": 0.8213229933310824, "learning_rate": 1.4178022972399678e-06, "loss": 0.5624, "step": 2998 }, { "epoch": 0.38, "grad_norm": 0.8121597080178251, "learning_rate": 1.417427342957923e-06, "loss": 0.554, "step": 2999 }, { "epoch": 0.38, "grad_norm": 0.7638328428765224, "learning_rate": 1.4170523175923506e-06, "loss": 0.5722, "step": 3000 }, { "epoch": 0.38, "grad_norm": 0.7944807682202171, "learning_rate": 1.4166772212071129e-06, "loss": 0.6668, "step": 3001 }, { "epoch": 0.38, "grad_norm": 0.7069733834003264, "learning_rate": 1.416302053866086e-06, "loss": 0.5926, "step": 3002 }, { "epoch": 0.38, "grad_norm": 0.9058787848456458, "learning_rate": 1.4159268156331557e-06, "loss": 0.6613, "step": 3003 }, { "epoch": 0.38, "grad_norm": 0.6519594140127106, "learning_rate": 1.4155515065722221e-06, "loss": 0.5665, "step": 3004 }, { "epoch": 0.38, "grad_norm": 0.8104227746843757, "learning_rate": 1.415176126747196e-06, "loss": 0.594, "step": 3005 }, { "epoch": 0.38, "grad_norm": 0.6700789093562772, "learning_rate": 1.4148006762220007e-06, "loss": 0.526, "step": 3006 }, { "epoch": 0.38, "grad_norm": 0.824616350017847, "learning_rate": 1.4144251550605718e-06, "loss": 0.5188, "step": 3007 }, { "epoch": 0.38, "grad_norm": 0.750720628544232, "learning_rate": 1.4140495633268563e-06, "loss": 0.6419, "step": 3008 }, { "epoch": 0.38, "grad_norm": 0.6071372547875775, "learning_rate": 1.413673901084814e-06, "loss": 0.5189, "step": 3009 }, { "epoch": 0.38, "grad_norm": 0.8659769801520558, "learning_rate": 1.4132981683984157e-06, "loss": 0.6489, "step": 3010 }, { "epoch": 0.38, "grad_norm": 0.7356404101400091, "learning_rate": 1.4129223653316451e-06, "loss": 0.5602, "step": 3011 }, { "epoch": 0.38, "grad_norm": 1.1566432139450225, "learning_rate": 1.4125464919484976e-06, "loss": 0.7056, "step": 3012 }, { "epoch": 0.38, "grad_norm": 0.743582772039973, "learning_rate": 1.4121705483129803e-06, "loss": 0.5227, "step": 3013 }, { "epoch": 0.38, "grad_norm": 0.7186620456629237, "learning_rate": 1.4117945344891125e-06, "loss": 0.5518, "step": 3014 }, { "epoch": 0.38, "grad_norm": 0.9094206463847527, "learning_rate": 1.4114184505409256e-06, "loss": 0.6286, "step": 3015 }, { "epoch": 0.38, "grad_norm": 0.8690500851435035, "learning_rate": 1.4110422965324627e-06, "loss": 0.6014, "step": 3016 }, { "epoch": 0.38, "grad_norm": 0.7398355711169552, "learning_rate": 1.4106660725277789e-06, "loss": 0.5439, "step": 3017 }, { "epoch": 0.38, "grad_norm": 0.7255804476456763, "learning_rate": 1.410289778590941e-06, "loss": 0.5492, "step": 3018 }, { "epoch": 0.38, "grad_norm": 0.7063142306127926, "learning_rate": 1.4099134147860286e-06, "loss": 0.5334, "step": 3019 }, { "epoch": 0.38, "grad_norm": 0.6079952451320196, "learning_rate": 1.4095369811771318e-06, "loss": 0.5111, "step": 3020 }, { "epoch": 0.38, "grad_norm": 0.7804555742539693, "learning_rate": 1.4091604778283536e-06, "loss": 0.5797, "step": 3021 }, { "epoch": 0.38, "grad_norm": 0.8194947326422135, "learning_rate": 1.4087839048038089e-06, "loss": 0.6465, "step": 3022 }, { "epoch": 0.39, "grad_norm": 0.7537315301762143, "learning_rate": 1.408407262167624e-06, "loss": 0.6193, "step": 3023 }, { "epoch": 0.39, "grad_norm": 0.6700254409911592, "learning_rate": 1.4080305499839366e-06, "loss": 0.5404, "step": 3024 }, { "epoch": 0.39, "grad_norm": 0.9170093021613263, "learning_rate": 1.4076537683168978e-06, "loss": 0.6207, "step": 3025 }, { "epoch": 0.39, "grad_norm": 0.7117474086503168, "learning_rate": 1.4072769172306692e-06, "loss": 0.5453, "step": 3026 }, { "epoch": 0.39, "grad_norm": 0.964764291098878, "learning_rate": 1.4068999967894241e-06, "loss": 0.6272, "step": 3027 }, { "epoch": 0.39, "grad_norm": 0.6562264011715662, "learning_rate": 1.4065230070573491e-06, "loss": 0.5573, "step": 3028 }, { "epoch": 0.39, "grad_norm": 0.6618160073636681, "learning_rate": 1.4061459480986408e-06, "loss": 0.5492, "step": 3029 }, { "epoch": 0.39, "grad_norm": 0.8266839236692214, "learning_rate": 1.4057688199775089e-06, "loss": 0.6083, "step": 3030 }, { "epoch": 0.39, "grad_norm": 0.8762895024041505, "learning_rate": 1.4053916227581741e-06, "loss": 0.5438, "step": 3031 }, { "epoch": 0.39, "grad_norm": 0.808153798077984, "learning_rate": 1.4050143565048694e-06, "loss": 0.6486, "step": 3032 }, { "epoch": 0.39, "grad_norm": 0.639288981719959, "learning_rate": 1.404637021281839e-06, "loss": 0.5381, "step": 3033 }, { "epoch": 0.39, "grad_norm": 0.7924822386103259, "learning_rate": 1.4042596171533396e-06, "loss": 0.5984, "step": 3034 }, { "epoch": 0.39, "grad_norm": 0.7499464198484329, "learning_rate": 1.403882144183639e-06, "loss": 0.5027, "step": 3035 }, { "epoch": 0.39, "grad_norm": 0.8992025054839136, "learning_rate": 1.4035046024370164e-06, "loss": 0.6477, "step": 3036 }, { "epoch": 0.39, "grad_norm": 1.051836967969425, "learning_rate": 1.4031269919777642e-06, "loss": 0.6676, "step": 3037 }, { "epoch": 0.39, "grad_norm": 0.7277688500631552, "learning_rate": 1.4027493128701848e-06, "loss": 0.5815, "step": 3038 }, { "epoch": 0.39, "grad_norm": 0.6729576222209144, "learning_rate": 1.4023715651785933e-06, "loss": 0.5789, "step": 3039 }, { "epoch": 0.39, "grad_norm": 0.7468178601292582, "learning_rate": 1.4019937489673161e-06, "loss": 0.5135, "step": 3040 }, { "epoch": 0.39, "grad_norm": 0.7244252516724392, "learning_rate": 1.4016158643006912e-06, "loss": 0.5282, "step": 3041 }, { "epoch": 0.39, "grad_norm": 0.8936983683746277, "learning_rate": 1.401237911243069e-06, "loss": 0.5878, "step": 3042 }, { "epoch": 0.39, "grad_norm": 0.987620945908303, "learning_rate": 1.4008598898588102e-06, "loss": 0.6095, "step": 3043 }, { "epoch": 0.39, "grad_norm": 0.6558393297049334, "learning_rate": 1.4004818002122885e-06, "loss": 0.4651, "step": 3044 }, { "epoch": 0.39, "grad_norm": 0.8562602435726057, "learning_rate": 1.4001036423678882e-06, "loss": 0.6527, "step": 3045 }, { "epoch": 0.39, "grad_norm": 0.6621995669639869, "learning_rate": 1.3997254163900062e-06, "loss": 0.5093, "step": 3046 }, { "epoch": 0.39, "grad_norm": 0.7160092328534885, "learning_rate": 1.3993471223430498e-06, "loss": 0.6176, "step": 3047 }, { "epoch": 0.39, "grad_norm": 0.7467335392432723, "learning_rate": 1.3989687602914389e-06, "loss": 0.5445, "step": 3048 }, { "epoch": 0.39, "grad_norm": 0.7874308348344078, "learning_rate": 1.3985903302996046e-06, "loss": 0.5782, "step": 3049 }, { "epoch": 0.39, "grad_norm": 0.7158873000819497, "learning_rate": 1.3982118324319893e-06, "loss": 0.578, "step": 3050 }, { "epoch": 0.39, "grad_norm": 0.8480559730948038, "learning_rate": 1.3978332667530478e-06, "loss": 0.6524, "step": 3051 }, { "epoch": 0.39, "grad_norm": 0.660844913526248, "learning_rate": 1.3974546333272455e-06, "loss": 0.4931, "step": 3052 }, { "epoch": 0.39, "grad_norm": 1.0188867200790888, "learning_rate": 1.3970759322190597e-06, "loss": 0.6447, "step": 3053 }, { "epoch": 0.39, "grad_norm": 0.596417535339059, "learning_rate": 1.3966971634929794e-06, "loss": 0.5162, "step": 3054 }, { "epoch": 0.39, "grad_norm": 0.6187896284640257, "learning_rate": 1.3963183272135048e-06, "loss": 0.5228, "step": 3055 }, { "epoch": 0.39, "grad_norm": 0.9067817932211217, "learning_rate": 1.395939423445148e-06, "loss": 0.6231, "step": 3056 }, { "epoch": 0.39, "grad_norm": 0.7661487121994174, "learning_rate": 1.3955604522524325e-06, "loss": 0.5615, "step": 3057 }, { "epoch": 0.39, "grad_norm": 0.7956239030597039, "learning_rate": 1.3951814136998925e-06, "loss": 0.5917, "step": 3058 }, { "epoch": 0.39, "grad_norm": 0.7205426572844849, "learning_rate": 1.3948023078520749e-06, "loss": 0.5969, "step": 3059 }, { "epoch": 0.39, "grad_norm": 0.7345846234321981, "learning_rate": 1.3944231347735374e-06, "loss": 0.6286, "step": 3060 }, { "epoch": 0.39, "grad_norm": 0.7401107859636606, "learning_rate": 1.394043894528849e-06, "loss": 0.5552, "step": 3061 }, { "epoch": 0.39, "grad_norm": 0.8503840356783579, "learning_rate": 1.3936645871825903e-06, "loss": 0.6239, "step": 3062 }, { "epoch": 0.39, "grad_norm": 0.9026937311097736, "learning_rate": 1.3932852127993537e-06, "loss": 0.6779, "step": 3063 }, { "epoch": 0.39, "grad_norm": 0.8313075353277407, "learning_rate": 1.3929057714437429e-06, "loss": 0.6999, "step": 3064 }, { "epoch": 0.39, "grad_norm": 0.7472509686513, "learning_rate": 1.392526263180372e-06, "loss": 0.5924, "step": 3065 }, { "epoch": 0.39, "grad_norm": 0.7998443157356848, "learning_rate": 1.3921466880738682e-06, "loss": 0.6034, "step": 3066 }, { "epoch": 0.39, "grad_norm": 0.7669568935498641, "learning_rate": 1.3917670461888685e-06, "loss": 0.5476, "step": 3067 }, { "epoch": 0.39, "grad_norm": 0.8210074144624843, "learning_rate": 1.3913873375900224e-06, "loss": 0.5335, "step": 3068 }, { "epoch": 0.39, "grad_norm": 0.5805165683117435, "learning_rate": 1.3910075623419902e-06, "loss": 0.4953, "step": 3069 }, { "epoch": 0.39, "grad_norm": 0.7357027164797455, "learning_rate": 1.3906277205094434e-06, "loss": 0.5573, "step": 3070 }, { "epoch": 0.39, "grad_norm": 0.6370652266256153, "learning_rate": 1.3902478121570654e-06, "loss": 0.541, "step": 3071 }, { "epoch": 0.39, "grad_norm": 0.640638932286323, "learning_rate": 1.3898678373495507e-06, "loss": 0.5923, "step": 3072 }, { "epoch": 0.39, "grad_norm": 0.8434632039172507, "learning_rate": 1.3894877961516047e-06, "loss": 0.6602, "step": 3073 }, { "epoch": 0.39, "grad_norm": 0.7189335733407953, "learning_rate": 1.3891076886279444e-06, "loss": 0.549, "step": 3074 }, { "epoch": 0.39, "grad_norm": 0.870419102859188, "learning_rate": 1.3887275148432988e-06, "loss": 0.5516, "step": 3075 }, { "epoch": 0.39, "grad_norm": 0.6543415411451026, "learning_rate": 1.3883472748624075e-06, "loss": 0.5195, "step": 3076 }, { "epoch": 0.39, "grad_norm": 3.198898936309915, "learning_rate": 1.3879669687500207e-06, "loss": 0.6356, "step": 3077 }, { "epoch": 0.39, "grad_norm": 0.8617258263879347, "learning_rate": 1.3875865965709008e-06, "loss": 0.635, "step": 3078 }, { "epoch": 0.39, "grad_norm": 0.7976794770841682, "learning_rate": 1.3872061583898216e-06, "loss": 0.5621, "step": 3079 }, { "epoch": 0.39, "grad_norm": 0.911971024750434, "learning_rate": 1.3868256542715673e-06, "loss": 0.6053, "step": 3080 }, { "epoch": 0.39, "grad_norm": 0.5894705140986786, "learning_rate": 1.3864450842809342e-06, "loss": 0.5087, "step": 3081 }, { "epoch": 0.39, "grad_norm": 0.7114305446740168, "learning_rate": 1.3860644484827291e-06, "loss": 0.5772, "step": 3082 }, { "epoch": 0.39, "grad_norm": 0.8065196633682881, "learning_rate": 1.3856837469417704e-06, "loss": 0.5648, "step": 3083 }, { "epoch": 0.39, "grad_norm": 0.5785131775924585, "learning_rate": 1.3853029797228878e-06, "loss": 0.5044, "step": 3084 }, { "epoch": 0.39, "grad_norm": 0.8108596744535257, "learning_rate": 1.3849221468909215e-06, "loss": 0.5644, "step": 3085 }, { "epoch": 0.39, "grad_norm": 0.7784592870930646, "learning_rate": 1.3845412485107238e-06, "loss": 0.5499, "step": 3086 }, { "epoch": 0.39, "grad_norm": 0.7725760553097077, "learning_rate": 1.3841602846471575e-06, "loss": 0.5861, "step": 3087 }, { "epoch": 0.39, "grad_norm": 0.6924838413344677, "learning_rate": 1.3837792553650968e-06, "loss": 0.5775, "step": 3088 }, { "epoch": 0.39, "grad_norm": 0.9237084490598435, "learning_rate": 1.3833981607294274e-06, "loss": 0.6616, "step": 3089 }, { "epoch": 0.39, "grad_norm": 0.7075983734160326, "learning_rate": 1.3830170008050455e-06, "loss": 0.5589, "step": 3090 }, { "epoch": 0.39, "grad_norm": 0.7489193476917966, "learning_rate": 1.3826357756568584e-06, "loss": 0.565, "step": 3091 }, { "epoch": 0.39, "grad_norm": 0.763601664487913, "learning_rate": 1.3822544853497848e-06, "loss": 0.5747, "step": 3092 }, { "epoch": 0.39, "grad_norm": 0.8524374001901855, "learning_rate": 1.381873129948755e-06, "loss": 0.5659, "step": 3093 }, { "epoch": 0.39, "grad_norm": 0.8185151052335679, "learning_rate": 1.3814917095187096e-06, "loss": 0.6405, "step": 3094 }, { "epoch": 0.39, "grad_norm": 0.6260629653074958, "learning_rate": 1.3811102241246e-06, "loss": 0.5432, "step": 3095 }, { "epoch": 0.39, "grad_norm": 0.9674082594299532, "learning_rate": 1.3807286738313896e-06, "loss": 0.6274, "step": 3096 }, { "epoch": 0.39, "grad_norm": 0.9438089849834327, "learning_rate": 1.380347058704053e-06, "loss": 0.5551, "step": 3097 }, { "epoch": 0.39, "grad_norm": 0.6236022789990685, "learning_rate": 1.3799653788075742e-06, "loss": 0.5221, "step": 3098 }, { "epoch": 0.39, "grad_norm": 0.838193510354797, "learning_rate": 1.37958363420695e-06, "loss": 0.6315, "step": 3099 }, { "epoch": 0.39, "grad_norm": 0.899187492836811, "learning_rate": 1.3792018249671874e-06, "loss": 0.6108, "step": 3100 }, { "epoch": 0.4, "grad_norm": 0.7708992022961609, "learning_rate": 1.3788199511533044e-06, "loss": 0.5763, "step": 3101 }, { "epoch": 0.4, "grad_norm": 0.848960857513316, "learning_rate": 1.3784380128303301e-06, "loss": 0.6074, "step": 3102 }, { "epoch": 0.4, "grad_norm": 0.6297093655135994, "learning_rate": 1.3780560100633052e-06, "loss": 0.583, "step": 3103 }, { "epoch": 0.4, "grad_norm": 0.6218245145001554, "learning_rate": 1.3776739429172798e-06, "loss": 0.526, "step": 3104 }, { "epoch": 0.4, "grad_norm": 0.7730874329693123, "learning_rate": 1.3772918114573169e-06, "loss": 0.6303, "step": 3105 }, { "epoch": 0.4, "grad_norm": 0.7149510079393127, "learning_rate": 1.3769096157484889e-06, "loss": 0.5415, "step": 3106 }, { "epoch": 0.4, "grad_norm": 1.0364738409190069, "learning_rate": 1.37652735585588e-06, "loss": 0.6031, "step": 3107 }, { "epoch": 0.4, "grad_norm": 0.9678099774230313, "learning_rate": 1.3761450318445853e-06, "loss": 0.6382, "step": 3108 }, { "epoch": 0.4, "grad_norm": 0.8108791160273627, "learning_rate": 1.37576264377971e-06, "loss": 0.6645, "step": 3109 }, { "epoch": 0.4, "grad_norm": 0.9724002909238344, "learning_rate": 1.3753801917263714e-06, "loss": 0.649, "step": 3110 }, { "epoch": 0.4, "grad_norm": 0.6211448285366288, "learning_rate": 1.3749976757496967e-06, "loss": 0.5451, "step": 3111 }, { "epoch": 0.4, "grad_norm": 0.6568689205316481, "learning_rate": 1.3746150959148243e-06, "loss": 0.5728, "step": 3112 }, { "epoch": 0.4, "grad_norm": 0.8233192986327745, "learning_rate": 1.3742324522869041e-06, "loss": 0.56, "step": 3113 }, { "epoch": 0.4, "grad_norm": 0.6809746166867059, "learning_rate": 1.373849744931096e-06, "loss": 0.5019, "step": 3114 }, { "epoch": 0.4, "grad_norm": 0.943166931435314, "learning_rate": 1.373466973912571e-06, "loss": 0.6573, "step": 3115 }, { "epoch": 0.4, "grad_norm": 0.5882815260604138, "learning_rate": 1.373084139296511e-06, "loss": 0.4802, "step": 3116 }, { "epoch": 0.4, "grad_norm": 0.8068045842217108, "learning_rate": 1.3727012411481087e-06, "loss": 0.6063, "step": 3117 }, { "epoch": 0.4, "grad_norm": 0.86223879857831, "learning_rate": 1.372318279532568e-06, "loss": 0.6278, "step": 3118 }, { "epoch": 0.4, "grad_norm": 0.664244863660214, "learning_rate": 1.3719352545151033e-06, "loss": 0.4939, "step": 3119 }, { "epoch": 0.4, "grad_norm": 0.7072949414515083, "learning_rate": 1.3715521661609393e-06, "loss": 0.5278, "step": 3120 }, { "epoch": 0.4, "grad_norm": 0.7384809252601513, "learning_rate": 1.371169014535312e-06, "loss": 0.5434, "step": 3121 }, { "epoch": 0.4, "grad_norm": 0.846938334630315, "learning_rate": 1.3707857997034688e-06, "loss": 0.5968, "step": 3122 }, { "epoch": 0.4, "grad_norm": 0.8544331089889083, "learning_rate": 1.3704025217306665e-06, "loss": 0.6514, "step": 3123 }, { "epoch": 0.4, "grad_norm": 0.8370021669868832, "learning_rate": 1.3700191806821736e-06, "loss": 0.6372, "step": 3124 }, { "epoch": 0.4, "grad_norm": 0.7986911461133066, "learning_rate": 1.3696357766232688e-06, "loss": 0.6058, "step": 3125 }, { "epoch": 0.4, "grad_norm": 0.6689093839745374, "learning_rate": 1.3692523096192426e-06, "loss": 0.5592, "step": 3126 }, { "epoch": 0.4, "grad_norm": 1.0717142386647223, "learning_rate": 1.3688687797353947e-06, "loss": 0.6302, "step": 3127 }, { "epoch": 0.4, "grad_norm": 0.747197193819771, "learning_rate": 1.3684851870370361e-06, "loss": 0.6016, "step": 3128 }, { "epoch": 0.4, "grad_norm": 0.877188497479501, "learning_rate": 1.3681015315894894e-06, "loss": 0.6387, "step": 3129 }, { "epoch": 0.4, "grad_norm": 0.9247276902703885, "learning_rate": 1.3677178134580864e-06, "loss": 0.6822, "step": 3130 }, { "epoch": 0.4, "grad_norm": 0.6887574677773939, "learning_rate": 1.3673340327081708e-06, "loss": 0.5489, "step": 3131 }, { "epoch": 0.4, "grad_norm": 0.8526943693413179, "learning_rate": 1.3669501894050965e-06, "loss": 0.6922, "step": 3132 }, { "epoch": 0.4, "grad_norm": 0.8304518035434634, "learning_rate": 1.3665662836142276e-06, "loss": 0.5929, "step": 3133 }, { "epoch": 0.4, "grad_norm": 0.6331114649829831, "learning_rate": 1.3661823154009395e-06, "loss": 0.5417, "step": 3134 }, { "epoch": 0.4, "grad_norm": 0.8502887746345837, "learning_rate": 1.365798284830618e-06, "loss": 0.6374, "step": 3135 }, { "epoch": 0.4, "grad_norm": 0.8642413734434841, "learning_rate": 1.3654141919686595e-06, "loss": 0.6276, "step": 3136 }, { "epoch": 0.4, "grad_norm": 0.7091491060224494, "learning_rate": 1.3650300368804708e-06, "loss": 0.5453, "step": 3137 }, { "epoch": 0.4, "grad_norm": 0.9307605324887979, "learning_rate": 1.3646458196314695e-06, "loss": 0.623, "step": 3138 }, { "epoch": 0.4, "grad_norm": 0.6415044754138837, "learning_rate": 1.3642615402870843e-06, "loss": 0.5221, "step": 3139 }, { "epoch": 0.4, "grad_norm": 0.83617105723574, "learning_rate": 1.3638771989127539e-06, "loss": 0.5845, "step": 3140 }, { "epoch": 0.4, "grad_norm": 0.7822973256829734, "learning_rate": 1.3634927955739268e-06, "loss": 0.5476, "step": 3141 }, { "epoch": 0.4, "grad_norm": 0.6476854661631996, "learning_rate": 1.3631083303360638e-06, "loss": 0.5269, "step": 3142 }, { "epoch": 0.4, "grad_norm": 0.8666166236841678, "learning_rate": 1.3627238032646344e-06, "loss": 0.6059, "step": 3143 }, { "epoch": 0.4, "grad_norm": 0.6463605818779308, "learning_rate": 1.3623392144251208e-06, "loss": 0.4798, "step": 3144 }, { "epoch": 0.4, "grad_norm": 0.7624725078840393, "learning_rate": 1.3619545638830135e-06, "loss": 0.5876, "step": 3145 }, { "epoch": 0.4, "grad_norm": 0.6723072603347222, "learning_rate": 1.3615698517038147e-06, "loss": 0.5645, "step": 3146 }, { "epoch": 0.4, "grad_norm": 0.727938924759253, "learning_rate": 1.3611850779530372e-06, "loss": 0.5521, "step": 3147 }, { "epoch": 0.4, "grad_norm": 0.6754738633814731, "learning_rate": 1.3608002426962037e-06, "loss": 0.5658, "step": 3148 }, { "epoch": 0.4, "grad_norm": 0.978854654804893, "learning_rate": 1.3604153459988477e-06, "loss": 0.6582, "step": 3149 }, { "epoch": 0.4, "grad_norm": 0.6935635550638868, "learning_rate": 1.360030387926513e-06, "loss": 0.5103, "step": 3150 }, { "epoch": 0.4, "grad_norm": 7.807264348455229, "learning_rate": 1.359645368544754e-06, "loss": 0.6146, "step": 3151 }, { "epoch": 0.4, "grad_norm": 0.8422202752377788, "learning_rate": 1.3592602879191355e-06, "loss": 0.6492, "step": 3152 }, { "epoch": 0.4, "grad_norm": 0.6273807932752843, "learning_rate": 1.3588751461152326e-06, "loss": 0.5396, "step": 3153 }, { "epoch": 0.4, "grad_norm": 0.8055941314827889, "learning_rate": 1.3584899431986312e-06, "loss": 0.5858, "step": 3154 }, { "epoch": 0.4, "grad_norm": 0.7446448759696299, "learning_rate": 1.3581046792349268e-06, "loss": 0.5673, "step": 3155 }, { "epoch": 0.4, "grad_norm": 0.7601649806980579, "learning_rate": 1.3577193542897268e-06, "loss": 0.6, "step": 3156 }, { "epoch": 0.4, "grad_norm": 0.7409455094714247, "learning_rate": 1.357333968428647e-06, "loss": 0.5341, "step": 3157 }, { "epoch": 0.4, "grad_norm": 0.81195555360734, "learning_rate": 1.3569485217173154e-06, "loss": 0.5782, "step": 3158 }, { "epoch": 0.4, "grad_norm": 1.4578402339710572, "learning_rate": 1.3565630142213688e-06, "loss": 0.5474, "step": 3159 }, { "epoch": 0.4, "grad_norm": 0.7311758558170522, "learning_rate": 1.3561774460064558e-06, "loss": 0.5755, "step": 3160 }, { "epoch": 0.4, "grad_norm": 1.6113590002475466, "learning_rate": 1.3557918171382346e-06, "loss": 0.5995, "step": 3161 }, { "epoch": 0.4, "grad_norm": 0.7257949969455444, "learning_rate": 1.3554061276823734e-06, "loss": 0.5561, "step": 3162 }, { "epoch": 0.4, "grad_norm": 0.7246678457553412, "learning_rate": 1.355020377704551e-06, "loss": 0.5298, "step": 3163 }, { "epoch": 0.4, "grad_norm": 0.7895414637291118, "learning_rate": 1.354634567270457e-06, "loss": 0.5584, "step": 3164 }, { "epoch": 0.4, "grad_norm": 0.8923554945155004, "learning_rate": 1.3542486964457908e-06, "loss": 0.6145, "step": 3165 }, { "epoch": 0.4, "grad_norm": 0.6591103543333665, "learning_rate": 1.3538627652962623e-06, "loss": 0.4964, "step": 3166 }, { "epoch": 0.4, "grad_norm": 0.6018531932359873, "learning_rate": 1.3534767738875912e-06, "loss": 0.5199, "step": 3167 }, { "epoch": 0.4, "grad_norm": 0.8488573210629706, "learning_rate": 1.3530907222855082e-06, "loss": 0.6438, "step": 3168 }, { "epoch": 0.4, "grad_norm": 0.7989660437156402, "learning_rate": 1.3527046105557533e-06, "loss": 0.5447, "step": 3169 }, { "epoch": 0.4, "grad_norm": 0.8978216842932966, "learning_rate": 1.3523184387640778e-06, "loss": 0.5685, "step": 3170 }, { "epoch": 0.4, "grad_norm": 0.809185518203035, "learning_rate": 1.3519322069762426e-06, "loss": 0.6423, "step": 3171 }, { "epoch": 0.4, "grad_norm": 0.7597523496026483, "learning_rate": 1.3515459152580188e-06, "loss": 0.5692, "step": 3172 }, { "epoch": 0.4, "grad_norm": 0.770231835490272, "learning_rate": 1.3511595636751884e-06, "loss": 0.6442, "step": 3173 }, { "epoch": 0.4, "grad_norm": 0.8101266705127789, "learning_rate": 1.3507731522935423e-06, "loss": 0.5725, "step": 3174 }, { "epoch": 0.4, "grad_norm": 0.6753879915997955, "learning_rate": 1.350386681178883e-06, "loss": 0.5731, "step": 3175 }, { "epoch": 0.4, "grad_norm": 0.8328720995548307, "learning_rate": 1.3500001503970222e-06, "loss": 0.6846, "step": 3176 }, { "epoch": 0.4, "grad_norm": 0.9374435250492774, "learning_rate": 1.3496135600137819e-06, "loss": 0.6713, "step": 3177 }, { "epoch": 0.4, "grad_norm": 0.7788667464014785, "learning_rate": 1.3492269100949947e-06, "loss": 0.5946, "step": 3178 }, { "epoch": 0.4, "grad_norm": 0.8083630862370003, "learning_rate": 1.3488402007065028e-06, "loss": 0.5938, "step": 3179 }, { "epoch": 0.41, "grad_norm": 0.7373585914822439, "learning_rate": 1.3484534319141589e-06, "loss": 0.603, "step": 3180 }, { "epoch": 0.41, "grad_norm": 0.6743197913779339, "learning_rate": 1.3480666037838258e-06, "loss": 0.5753, "step": 3181 }, { "epoch": 0.41, "grad_norm": 0.7392879429291906, "learning_rate": 1.3476797163813763e-06, "loss": 0.5936, "step": 3182 }, { "epoch": 0.41, "grad_norm": 0.6553076300650169, "learning_rate": 1.347292769772693e-06, "loss": 0.5536, "step": 3183 }, { "epoch": 0.41, "grad_norm": 0.6875834155421443, "learning_rate": 1.3469057640236692e-06, "loss": 0.4904, "step": 3184 }, { "epoch": 0.41, "grad_norm": 0.9341915234218361, "learning_rate": 1.3465186992002076e-06, "loss": 0.6325, "step": 3185 }, { "epoch": 0.41, "grad_norm": 0.7687823625064975, "learning_rate": 1.3461315753682217e-06, "loss": 0.5817, "step": 3186 }, { "epoch": 0.41, "grad_norm": 0.6790283646174142, "learning_rate": 1.3457443925936345e-06, "loss": 0.5369, "step": 3187 }, { "epoch": 0.41, "grad_norm": 0.8637083633029924, "learning_rate": 1.345357150942379e-06, "loss": 0.6311, "step": 3188 }, { "epoch": 0.41, "grad_norm": 1.04125083334269, "learning_rate": 1.3449698504803987e-06, "loss": 0.662, "step": 3189 }, { "epoch": 0.41, "grad_norm": 0.7817874265299143, "learning_rate": 1.344582491273647e-06, "loss": 0.5921, "step": 3190 }, { "epoch": 0.41, "grad_norm": 0.9366924946113052, "learning_rate": 1.3441950733880868e-06, "loss": 0.6464, "step": 3191 }, { "epoch": 0.41, "grad_norm": 0.9837820954024494, "learning_rate": 1.3438075968896912e-06, "loss": 0.6376, "step": 3192 }, { "epoch": 0.41, "grad_norm": 0.8581081309300206, "learning_rate": 1.3434200618444437e-06, "loss": 0.606, "step": 3193 }, { "epoch": 0.41, "grad_norm": 0.7747160596833594, "learning_rate": 1.3430324683183374e-06, "loss": 0.6469, "step": 3194 }, { "epoch": 0.41, "grad_norm": 0.9041638445708565, "learning_rate": 1.3426448163773754e-06, "loss": 0.5664, "step": 3195 }, { "epoch": 0.41, "grad_norm": 0.9632457733948682, "learning_rate": 1.342257106087571e-06, "loss": 0.6173, "step": 3196 }, { "epoch": 0.41, "grad_norm": 0.5577196273315523, "learning_rate": 1.3418693375149468e-06, "loss": 0.4592, "step": 3197 }, { "epoch": 0.41, "grad_norm": 1.0150767730592254, "learning_rate": 1.3414815107255356e-06, "loss": 0.6254, "step": 3198 }, { "epoch": 0.41, "grad_norm": 0.7086105708653562, "learning_rate": 1.3410936257853813e-06, "loss": 0.5075, "step": 3199 }, { "epoch": 0.41, "grad_norm": 0.9861404915567523, "learning_rate": 1.3407056827605358e-06, "loss": 0.6755, "step": 3200 }, { "epoch": 0.41, "grad_norm": 0.8312200664055616, "learning_rate": 1.340317681717062e-06, "loss": 0.5709, "step": 3201 }, { "epoch": 0.41, "grad_norm": 0.8509505972842122, "learning_rate": 1.3399296227210325e-06, "loss": 0.6523, "step": 3202 }, { "epoch": 0.41, "grad_norm": 0.8269904426955954, "learning_rate": 1.3395415058385295e-06, "loss": 0.5561, "step": 3203 }, { "epoch": 0.41, "grad_norm": 0.6520606831182475, "learning_rate": 1.3391533311356456e-06, "loss": 0.5512, "step": 3204 }, { "epoch": 0.41, "grad_norm": 0.6815837590776441, "learning_rate": 1.338765098678482e-06, "loss": 0.5671, "step": 3205 }, { "epoch": 0.41, "grad_norm": 0.7026734045243225, "learning_rate": 1.3383768085331519e-06, "loss": 0.5241, "step": 3206 }, { "epoch": 0.41, "grad_norm": 0.5779496527062108, "learning_rate": 1.3379884607657762e-06, "loss": 0.5244, "step": 3207 }, { "epoch": 0.41, "grad_norm": 0.7166224513347719, "learning_rate": 1.337600055442487e-06, "loss": 0.5523, "step": 3208 }, { "epoch": 0.41, "grad_norm": 0.6868285517960843, "learning_rate": 1.3372115926294255e-06, "loss": 0.5573, "step": 3209 }, { "epoch": 0.41, "grad_norm": 0.828572894529487, "learning_rate": 1.336823072392743e-06, "loss": 0.6514, "step": 3210 }, { "epoch": 0.41, "grad_norm": 0.7674812262136249, "learning_rate": 1.3364344947986e-06, "loss": 0.5723, "step": 3211 }, { "epoch": 0.41, "grad_norm": 0.7343969418610498, "learning_rate": 1.3360458599131674e-06, "loss": 0.6015, "step": 3212 }, { "epoch": 0.41, "grad_norm": 0.8302521786110378, "learning_rate": 1.335657167802626e-06, "loss": 0.6106, "step": 3213 }, { "epoch": 0.41, "grad_norm": 0.8567005059301759, "learning_rate": 1.3352684185331653e-06, "loss": 0.6102, "step": 3214 }, { "epoch": 0.41, "grad_norm": 0.8547885192745827, "learning_rate": 1.334879612170986e-06, "loss": 0.6209, "step": 3215 }, { "epoch": 0.41, "grad_norm": 0.8263934551928097, "learning_rate": 1.3344907487822976e-06, "loss": 0.6231, "step": 3216 }, { "epoch": 0.41, "grad_norm": 0.8144031028232864, "learning_rate": 1.3341018284333191e-06, "loss": 0.5717, "step": 3217 }, { "epoch": 0.41, "grad_norm": 1.1071137090028118, "learning_rate": 1.3337128511902802e-06, "loss": 0.6306, "step": 3218 }, { "epoch": 0.41, "grad_norm": 0.9926357057765708, "learning_rate": 1.3333238171194193e-06, "loss": 0.6937, "step": 3219 }, { "epoch": 0.41, "grad_norm": 0.7061901927485962, "learning_rate": 1.3329347262869849e-06, "loss": 0.5681, "step": 3220 }, { "epoch": 0.41, "grad_norm": 0.7621318450675911, "learning_rate": 1.332545578759235e-06, "loss": 0.5232, "step": 3221 }, { "epoch": 0.41, "grad_norm": 1.0593868469669958, "learning_rate": 1.3321563746024374e-06, "loss": 0.6153, "step": 3222 }, { "epoch": 0.41, "grad_norm": 0.6190417366021056, "learning_rate": 1.33176711388287e-06, "loss": 0.5182, "step": 3223 }, { "epoch": 0.41, "grad_norm": 0.682193214251899, "learning_rate": 1.3313777966668189e-06, "loss": 0.5336, "step": 3224 }, { "epoch": 0.41, "grad_norm": 0.8293320959855693, "learning_rate": 1.3309884230205815e-06, "loss": 0.6275, "step": 3225 }, { "epoch": 0.41, "grad_norm": 0.7229915027538876, "learning_rate": 1.3305989930104637e-06, "loss": 0.5411, "step": 3226 }, { "epoch": 0.41, "grad_norm": 0.825661076950281, "learning_rate": 1.330209506702782e-06, "loss": 0.5892, "step": 3227 }, { "epoch": 0.41, "grad_norm": 0.7284466195676561, "learning_rate": 1.3298199641638608e-06, "loss": 0.5836, "step": 3228 }, { "epoch": 0.41, "grad_norm": 0.9874980091651275, "learning_rate": 1.3294303654600362e-06, "loss": 0.6325, "step": 3229 }, { "epoch": 0.41, "grad_norm": 0.6350749365439009, "learning_rate": 1.3290407106576522e-06, "loss": 0.5867, "step": 3230 }, { "epoch": 0.41, "grad_norm": 0.7394017616640288, "learning_rate": 1.328650999823063e-06, "loss": 0.5954, "step": 3231 }, { "epoch": 0.41, "grad_norm": 0.7490331417970126, "learning_rate": 1.3282612330226327e-06, "loss": 0.5265, "step": 3232 }, { "epoch": 0.41, "grad_norm": 0.8923344869383985, "learning_rate": 1.327871410322734e-06, "loss": 0.6457, "step": 3233 }, { "epoch": 0.41, "grad_norm": 0.6437171311499403, "learning_rate": 1.3274815317897496e-06, "loss": 0.5205, "step": 3234 }, { "epoch": 0.41, "grad_norm": 0.6408675055178553, "learning_rate": 1.3270915974900726e-06, "loss": 0.5797, "step": 3235 }, { "epoch": 0.41, "grad_norm": 0.8419146248207814, "learning_rate": 1.3267016074901036e-06, "loss": 0.6205, "step": 3236 }, { "epoch": 0.41, "grad_norm": 0.9393184627688516, "learning_rate": 1.3263115618562548e-06, "loss": 0.6865, "step": 3237 }, { "epoch": 0.41, "grad_norm": 0.8540253360010526, "learning_rate": 1.3259214606549462e-06, "loss": 0.6216, "step": 3238 }, { "epoch": 0.41, "grad_norm": 0.6238602301338959, "learning_rate": 1.3255313039526086e-06, "loss": 0.5279, "step": 3239 }, { "epoch": 0.41, "grad_norm": 0.8825220536807993, "learning_rate": 1.3251410918156812e-06, "loss": 0.6451, "step": 3240 }, { "epoch": 0.41, "grad_norm": 0.6972580561275145, "learning_rate": 1.3247508243106128e-06, "loss": 0.5618, "step": 3241 }, { "epoch": 0.41, "grad_norm": 0.8725191295321476, "learning_rate": 1.3243605015038626e-06, "loss": 0.5624, "step": 3242 }, { "epoch": 0.41, "grad_norm": 0.9364130124977239, "learning_rate": 1.3239701234618982e-06, "loss": 0.6032, "step": 3243 }, { "epoch": 0.41, "grad_norm": 0.7939521178433963, "learning_rate": 1.3235796902511967e-06, "loss": 0.6004, "step": 3244 }, { "epoch": 0.41, "grad_norm": 0.84684225317031, "learning_rate": 1.3231892019382448e-06, "loss": 0.6399, "step": 3245 }, { "epoch": 0.41, "grad_norm": 0.6953278979413027, "learning_rate": 1.3227986585895392e-06, "loss": 0.5439, "step": 3246 }, { "epoch": 0.41, "grad_norm": 0.6257395945902634, "learning_rate": 1.3224080602715849e-06, "loss": 0.5885, "step": 3247 }, { "epoch": 0.41, "grad_norm": 0.794537493401352, "learning_rate": 1.3220174070508965e-06, "loss": 0.5203, "step": 3248 }, { "epoch": 0.41, "grad_norm": 0.875391361082337, "learning_rate": 1.3216266989939985e-06, "loss": 0.6744, "step": 3249 }, { "epoch": 0.41, "grad_norm": 0.707289779302033, "learning_rate": 1.3212359361674244e-06, "loss": 0.5614, "step": 3250 }, { "epoch": 0.41, "grad_norm": 0.5675252365456321, "learning_rate": 1.320845118637717e-06, "loss": 0.5006, "step": 3251 }, { "epoch": 0.41, "grad_norm": 0.7992583593614434, "learning_rate": 1.3204542464714284e-06, "loss": 0.632, "step": 3252 }, { "epoch": 0.41, "grad_norm": 0.9768740470911037, "learning_rate": 1.32006331973512e-06, "loss": 0.5968, "step": 3253 }, { "epoch": 0.41, "grad_norm": 0.835988949470314, "learning_rate": 1.3196723384953625e-06, "loss": 0.5635, "step": 3254 }, { "epoch": 0.41, "grad_norm": 0.6600769389206114, "learning_rate": 1.3192813028187365e-06, "loss": 0.5329, "step": 3255 }, { "epoch": 0.41, "grad_norm": 0.7223176993339757, "learning_rate": 1.3188902127718307e-06, "loss": 0.5909, "step": 3256 }, { "epoch": 0.41, "grad_norm": 3.0284075119527447, "learning_rate": 1.3184990684212436e-06, "loss": 0.68, "step": 3257 }, { "epoch": 0.42, "grad_norm": 0.8717203464863248, "learning_rate": 1.3181078698335835e-06, "loss": 0.6254, "step": 3258 }, { "epoch": 0.42, "grad_norm": 0.6887856399857266, "learning_rate": 1.3177166170754672e-06, "loss": 0.5874, "step": 3259 }, { "epoch": 0.42, "grad_norm": 0.7372800398653568, "learning_rate": 1.317325310213521e-06, "loss": 0.5696, "step": 3260 }, { "epoch": 0.42, "grad_norm": 0.9353425292776814, "learning_rate": 1.3169339493143804e-06, "loss": 0.6404, "step": 3261 }, { "epoch": 0.42, "grad_norm": 0.6599754107419443, "learning_rate": 1.3165425344446897e-06, "loss": 0.5374, "step": 3262 }, { "epoch": 0.42, "grad_norm": 0.8273748784814803, "learning_rate": 1.3161510656711032e-06, "loss": 0.5818, "step": 3263 }, { "epoch": 0.42, "grad_norm": 0.639812927435432, "learning_rate": 1.3157595430602842e-06, "loss": 0.5337, "step": 3264 }, { "epoch": 0.42, "grad_norm": 0.9335668778257221, "learning_rate": 1.315367966678904e-06, "loss": 0.6793, "step": 3265 }, { "epoch": 0.42, "grad_norm": 0.7864941229991357, "learning_rate": 1.314976336593645e-06, "loss": 0.5976, "step": 3266 }, { "epoch": 0.42, "grad_norm": 0.7610356316307099, "learning_rate": 1.3145846528711972e-06, "loss": 0.5805, "step": 3267 }, { "epoch": 0.42, "grad_norm": 0.9453438087375742, "learning_rate": 1.3141929155782606e-06, "loss": 0.6748, "step": 3268 }, { "epoch": 0.42, "grad_norm": 0.6789994110028671, "learning_rate": 1.3138011247815432e-06, "loss": 0.5562, "step": 3269 }, { "epoch": 0.42, "grad_norm": 0.7401736692214915, "learning_rate": 1.3134092805477635e-06, "loss": 0.5712, "step": 3270 }, { "epoch": 0.42, "grad_norm": 0.6694640474481076, "learning_rate": 1.3130173829436486e-06, "loss": 0.5625, "step": 3271 }, { "epoch": 0.42, "grad_norm": 0.8352922375837759, "learning_rate": 1.3126254320359342e-06, "loss": 0.6711, "step": 3272 }, { "epoch": 0.42, "grad_norm": 0.7444006361870095, "learning_rate": 1.3122334278913657e-06, "loss": 0.5561, "step": 3273 }, { "epoch": 0.42, "grad_norm": 0.6154834963246166, "learning_rate": 1.3118413705766973e-06, "loss": 0.5421, "step": 3274 }, { "epoch": 0.42, "grad_norm": 0.8050774339312807, "learning_rate": 1.311449260158692e-06, "loss": 0.6038, "step": 3275 }, { "epoch": 0.42, "grad_norm": 0.6343657576946908, "learning_rate": 1.3110570967041229e-06, "loss": 0.5213, "step": 3276 }, { "epoch": 0.42, "grad_norm": 0.6937406759934448, "learning_rate": 1.3106648802797704e-06, "loss": 0.5782, "step": 3277 }, { "epoch": 0.42, "grad_norm": 1.0076431919405162, "learning_rate": 1.3102726109524254e-06, "loss": 0.5861, "step": 3278 }, { "epoch": 0.42, "grad_norm": 0.6563030106577998, "learning_rate": 1.3098802887888871e-06, "loss": 0.4955, "step": 3279 }, { "epoch": 0.42, "grad_norm": 0.6538920608749379, "learning_rate": 1.3094879138559639e-06, "loss": 0.5663, "step": 3280 }, { "epoch": 0.42, "grad_norm": 0.652825857247094, "learning_rate": 1.3090954862204732e-06, "loss": 0.5542, "step": 3281 }, { "epoch": 0.42, "grad_norm": 0.8587935110954399, "learning_rate": 1.3087030059492415e-06, "loss": 0.5759, "step": 3282 }, { "epoch": 0.42, "grad_norm": 0.8457222209373402, "learning_rate": 1.3083104731091036e-06, "loss": 0.6263, "step": 3283 }, { "epoch": 0.42, "grad_norm": 0.8517683680205455, "learning_rate": 1.3079178877669043e-06, "loss": 0.6748, "step": 3284 }, { "epoch": 0.42, "grad_norm": 0.8418843932122059, "learning_rate": 1.3075252499894962e-06, "loss": 0.574, "step": 3285 }, { "epoch": 0.42, "grad_norm": 0.6956242145074383, "learning_rate": 1.3071325598437422e-06, "loss": 0.5483, "step": 3286 }, { "epoch": 0.42, "grad_norm": 0.8422642122805161, "learning_rate": 1.3067398173965129e-06, "loss": 0.6222, "step": 3287 }, { "epoch": 0.42, "grad_norm": 0.7163594974871501, "learning_rate": 1.3063470227146883e-06, "loss": 0.5916, "step": 3288 }, { "epoch": 0.42, "grad_norm": 0.8523255636621831, "learning_rate": 1.3059541758651573e-06, "loss": 0.6061, "step": 3289 }, { "epoch": 0.42, "grad_norm": 0.8168275933198325, "learning_rate": 1.3055612769148174e-06, "loss": 0.6589, "step": 3290 }, { "epoch": 0.42, "grad_norm": 0.6877761232641633, "learning_rate": 1.3051683259305754e-06, "loss": 0.5294, "step": 3291 }, { "epoch": 0.42, "grad_norm": 0.6688557810007784, "learning_rate": 1.3047753229793467e-06, "loss": 0.5574, "step": 3292 }, { "epoch": 0.42, "grad_norm": 0.6395487869905357, "learning_rate": 1.3043822681280557e-06, "loss": 0.525, "step": 3293 }, { "epoch": 0.42, "grad_norm": 0.7653283145590872, "learning_rate": 1.3039891614436353e-06, "loss": 0.5012, "step": 3294 }, { "epoch": 0.42, "grad_norm": 0.878619747254257, "learning_rate": 1.3035960029930278e-06, "loss": 0.6576, "step": 3295 }, { "epoch": 0.42, "grad_norm": 0.6534335364800078, "learning_rate": 1.3032027928431836e-06, "loss": 0.5299, "step": 3296 }, { "epoch": 0.42, "grad_norm": 0.8724891374183859, "learning_rate": 1.3028095310610628e-06, "loss": 0.6419, "step": 3297 }, { "epoch": 0.42, "grad_norm": 0.8839956409009281, "learning_rate": 1.3024162177136334e-06, "loss": 0.6162, "step": 3298 }, { "epoch": 0.42, "grad_norm": 0.7734297232894441, "learning_rate": 1.3020228528678728e-06, "loss": 0.5555, "step": 3299 }, { "epoch": 0.42, "grad_norm": 0.7711563165219623, "learning_rate": 1.3016294365907667e-06, "loss": 0.5824, "step": 3300 }, { "epoch": 0.42, "grad_norm": 0.7771673801541849, "learning_rate": 1.3012359689493098e-06, "loss": 0.6379, "step": 3301 }, { "epoch": 0.42, "grad_norm": 0.7285088780928793, "learning_rate": 1.3008424500105057e-06, "loss": 0.5607, "step": 3302 }, { "epoch": 0.42, "grad_norm": 0.6257770971449789, "learning_rate": 1.3004488798413665e-06, "loss": 0.4782, "step": 3303 }, { "epoch": 0.42, "grad_norm": 0.8799810489433684, "learning_rate": 1.3000552585089132e-06, "loss": 0.7265, "step": 3304 }, { "epoch": 0.42, "grad_norm": 0.6606651364879231, "learning_rate": 1.2996615860801755e-06, "loss": 0.5383, "step": 3305 }, { "epoch": 0.42, "grad_norm": 0.6590870983392961, "learning_rate": 1.2992678626221913e-06, "loss": 0.5474, "step": 3306 }, { "epoch": 0.42, "grad_norm": 0.9381068943661567, "learning_rate": 1.2988740882020078e-06, "loss": 0.6289, "step": 3307 }, { "epoch": 0.42, "grad_norm": 0.8984664136537935, "learning_rate": 1.2984802628866806e-06, "loss": 0.6477, "step": 3308 }, { "epoch": 0.42, "grad_norm": 0.6860120465549272, "learning_rate": 1.2980863867432744e-06, "loss": 0.5146, "step": 3309 }, { "epoch": 0.42, "grad_norm": 0.6714889889524687, "learning_rate": 1.2976924598388622e-06, "loss": 0.4868, "step": 3310 }, { "epoch": 0.42, "grad_norm": 0.7798243901241998, "learning_rate": 1.297298482240525e-06, "loss": 0.6203, "step": 3311 }, { "epoch": 0.42, "grad_norm": 0.6594567957805334, "learning_rate": 1.2969044540153538e-06, "loss": 0.5382, "step": 3312 }, { "epoch": 0.42, "grad_norm": 0.6834619348970734, "learning_rate": 1.296510375230447e-06, "loss": 0.5571, "step": 3313 }, { "epoch": 0.42, "grad_norm": 0.7238979531337046, "learning_rate": 1.2961162459529124e-06, "loss": 0.5606, "step": 3314 }, { "epoch": 0.42, "grad_norm": 0.5727852685586161, "learning_rate": 1.295722066249866e-06, "loss": 0.4845, "step": 3315 }, { "epoch": 0.42, "grad_norm": 0.5646015288128052, "learning_rate": 1.2953278361884327e-06, "loss": 0.5245, "step": 3316 }, { "epoch": 0.42, "grad_norm": 1.1400377531401094, "learning_rate": 1.2949335558357455e-06, "loss": 0.6418, "step": 3317 }, { "epoch": 0.42, "grad_norm": 0.9525625221539132, "learning_rate": 1.2945392252589464e-06, "loss": 0.6791, "step": 3318 }, { "epoch": 0.42, "grad_norm": 1.0096711081335639, "learning_rate": 1.2941448445251857e-06, "loss": 0.638, "step": 3319 }, { "epoch": 0.42, "grad_norm": 0.7258459002494552, "learning_rate": 1.2937504137016222e-06, "loss": 0.5738, "step": 3320 }, { "epoch": 0.42, "grad_norm": 0.7556854520973495, "learning_rate": 1.2933559328554239e-06, "loss": 0.6384, "step": 3321 }, { "epoch": 0.42, "grad_norm": 0.8801951243075732, "learning_rate": 1.292961402053766e-06, "loss": 0.6369, "step": 3322 }, { "epoch": 0.42, "grad_norm": 0.7045401739039692, "learning_rate": 1.2925668213638334e-06, "loss": 0.519, "step": 3323 }, { "epoch": 0.42, "grad_norm": 0.7538596239881666, "learning_rate": 1.292172190852819e-06, "loss": 0.5844, "step": 3324 }, { "epoch": 0.42, "grad_norm": 0.8846163323567655, "learning_rate": 1.2917775105879245e-06, "loss": 0.6177, "step": 3325 }, { "epoch": 0.42, "grad_norm": 0.7303755344174485, "learning_rate": 1.2913827806363598e-06, "loss": 0.586, "step": 3326 }, { "epoch": 0.42, "grad_norm": 0.8934896893571518, "learning_rate": 1.2909880010653429e-06, "loss": 0.6802, "step": 3327 }, { "epoch": 0.42, "grad_norm": 0.7102038836386658, "learning_rate": 1.290593171942101e-06, "loss": 0.537, "step": 3328 }, { "epoch": 0.42, "grad_norm": 0.8312001312753735, "learning_rate": 1.2901982933338693e-06, "loss": 0.6384, "step": 3329 }, { "epoch": 0.42, "grad_norm": 0.7529674224506352, "learning_rate": 1.2898033653078918e-06, "loss": 0.5845, "step": 3330 }, { "epoch": 0.42, "grad_norm": 1.1005330143752516, "learning_rate": 1.28940838793142e-06, "loss": 0.6755, "step": 3331 }, { "epoch": 0.42, "grad_norm": 0.7747228312033063, "learning_rate": 1.289013361271715e-06, "loss": 0.525, "step": 3332 }, { "epoch": 0.42, "grad_norm": 0.8844564195390425, "learning_rate": 1.2886182853960453e-06, "loss": 0.5565, "step": 3333 }, { "epoch": 0.42, "grad_norm": 0.7812192245778813, "learning_rate": 1.2882231603716882e-06, "loss": 0.5603, "step": 3334 }, { "epoch": 0.42, "grad_norm": 0.8699741417611265, "learning_rate": 1.2878279862659299e-06, "loss": 0.6544, "step": 3335 }, { "epoch": 0.42, "grad_norm": 0.6618041237046565, "learning_rate": 1.287432763146064e-06, "loss": 0.5897, "step": 3336 }, { "epoch": 0.43, "grad_norm": 0.9823142683133963, "learning_rate": 1.2870374910793931e-06, "loss": 0.6819, "step": 3337 }, { "epoch": 0.43, "grad_norm": 0.6236889047936274, "learning_rate": 1.2866421701332277e-06, "loss": 0.5178, "step": 3338 }, { "epoch": 0.43, "grad_norm": 0.6613926178290512, "learning_rate": 1.2862468003748868e-06, "loss": 0.5339, "step": 3339 }, { "epoch": 0.43, "grad_norm": 0.655019620224501, "learning_rate": 1.285851381871698e-06, "loss": 0.5688, "step": 3340 }, { "epoch": 0.43, "grad_norm": 0.64133204393391, "learning_rate": 1.285455914690997e-06, "loss": 0.5617, "step": 3341 }, { "epoch": 0.43, "grad_norm": 0.6712138773433193, "learning_rate": 1.2850603989001274e-06, "loss": 0.5488, "step": 3342 }, { "epoch": 0.43, "grad_norm": 0.8116479914918476, "learning_rate": 1.2846648345664417e-06, "loss": 0.5971, "step": 3343 }, { "epoch": 0.43, "grad_norm": 0.6145426345684483, "learning_rate": 1.2842692217573004e-06, "loss": 0.5668, "step": 3344 }, { "epoch": 0.43, "grad_norm": 0.8499326715066393, "learning_rate": 1.283873560540072e-06, "loss": 0.6602, "step": 3345 }, { "epoch": 0.43, "grad_norm": 0.6574847498141632, "learning_rate": 1.2834778509821336e-06, "loss": 0.5525, "step": 3346 }, { "epoch": 0.43, "grad_norm": 0.813409694800284, "learning_rate": 1.2830820931508703e-06, "loss": 0.6433, "step": 3347 }, { "epoch": 0.43, "grad_norm": 0.6303618460433006, "learning_rate": 1.2826862871136758e-06, "loss": 0.5202, "step": 3348 }, { "epoch": 0.43, "grad_norm": 0.8732138105022681, "learning_rate": 1.2822904329379518e-06, "loss": 0.6628, "step": 3349 }, { "epoch": 0.43, "grad_norm": 0.6681841802266762, "learning_rate": 1.2818945306911078e-06, "loss": 0.5268, "step": 3350 }, { "epoch": 0.43, "grad_norm": 0.8421769768090649, "learning_rate": 1.2814985804405618e-06, "loss": 0.6236, "step": 3351 }, { "epoch": 0.43, "grad_norm": 2.7755706909416378, "learning_rate": 1.2811025822537403e-06, "loss": 0.6961, "step": 3352 }, { "epoch": 0.43, "grad_norm": 0.8632243224795411, "learning_rate": 1.2807065361980775e-06, "loss": 0.6615, "step": 3353 }, { "epoch": 0.43, "grad_norm": 0.8390964227970069, "learning_rate": 1.2803104423410163e-06, "loss": 0.6404, "step": 3354 }, { "epoch": 0.43, "grad_norm": 0.6815920601011772, "learning_rate": 1.2799143007500066e-06, "loss": 0.5491, "step": 3355 }, { "epoch": 0.43, "grad_norm": 0.6282068161711633, "learning_rate": 1.2795181114925077e-06, "loss": 0.5493, "step": 3356 }, { "epoch": 0.43, "grad_norm": 0.6853154986537563, "learning_rate": 1.2791218746359865e-06, "loss": 0.5722, "step": 3357 }, { "epoch": 0.43, "grad_norm": 0.9280089992684355, "learning_rate": 1.2787255902479177e-06, "loss": 0.6027, "step": 3358 }, { "epoch": 0.43, "grad_norm": 0.6067628356687095, "learning_rate": 1.278329258395785e-06, "loss": 0.5114, "step": 3359 }, { "epoch": 0.43, "grad_norm": 0.9800808081891872, "learning_rate": 1.2779328791470788e-06, "loss": 0.6647, "step": 3360 }, { "epoch": 0.43, "grad_norm": 0.710059558208818, "learning_rate": 1.2775364525692992e-06, "loss": 0.5244, "step": 3361 }, { "epoch": 0.43, "grad_norm": 0.7042081052339655, "learning_rate": 1.277139978729953e-06, "loss": 0.5481, "step": 3362 }, { "epoch": 0.43, "grad_norm": 0.7912707458068898, "learning_rate": 1.2767434576965554e-06, "loss": 0.5789, "step": 3363 }, { "epoch": 0.43, "grad_norm": 0.5971891433469877, "learning_rate": 1.2763468895366302e-06, "loss": 0.5297, "step": 3364 }, { "epoch": 0.43, "grad_norm": 0.7322991169431246, "learning_rate": 1.275950274317709e-06, "loss": 0.5311, "step": 3365 }, { "epoch": 0.43, "grad_norm": 0.6092525470296591, "learning_rate": 1.2755536121073303e-06, "loss": 0.4881, "step": 3366 }, { "epoch": 0.43, "grad_norm": 0.9742116412972592, "learning_rate": 1.2751569029730425e-06, "loss": 0.6631, "step": 3367 }, { "epoch": 0.43, "grad_norm": 0.7695403692536669, "learning_rate": 1.2747601469824004e-06, "loss": 0.5214, "step": 3368 }, { "epoch": 0.43, "grad_norm": 0.9341757611024345, "learning_rate": 1.274363344202968e-06, "loss": 0.5945, "step": 3369 }, { "epoch": 0.43, "grad_norm": 0.7867373247523365, "learning_rate": 1.273966494702316e-06, "loss": 0.5663, "step": 3370 }, { "epoch": 0.43, "grad_norm": 0.8870320601230012, "learning_rate": 1.2735695985480246e-06, "loss": 0.6485, "step": 3371 }, { "epoch": 0.43, "grad_norm": 0.8316187401911879, "learning_rate": 1.2731726558076804e-06, "loss": 0.5677, "step": 3372 }, { "epoch": 0.43, "grad_norm": 0.9058338470846122, "learning_rate": 1.272775666548879e-06, "loss": 0.641, "step": 3373 }, { "epoch": 0.43, "grad_norm": 0.8170332040092044, "learning_rate": 1.272378630839223e-06, "loss": 0.6372, "step": 3374 }, { "epoch": 0.43, "grad_norm": 0.693806391769718, "learning_rate": 1.271981548746324e-06, "loss": 0.6364, "step": 3375 }, { "epoch": 0.43, "grad_norm": 0.6577175872382137, "learning_rate": 1.2715844203378006e-06, "loss": 0.5645, "step": 3376 }, { "epoch": 0.43, "grad_norm": 0.7509998844162311, "learning_rate": 1.2711872456812798e-06, "loss": 0.5516, "step": 3377 }, { "epoch": 0.43, "grad_norm": 0.6461950249872955, "learning_rate": 1.2707900248443963e-06, "loss": 0.528, "step": 3378 }, { "epoch": 0.43, "grad_norm": 0.7271464198795982, "learning_rate": 1.270392757894793e-06, "loss": 0.546, "step": 3379 }, { "epoch": 0.43, "grad_norm": 0.8273230341103045, "learning_rate": 1.2699954449001194e-06, "loss": 0.666, "step": 3380 }, { "epoch": 0.43, "grad_norm": 0.6995459867274946, "learning_rate": 1.269598085928034e-06, "loss": 0.5265, "step": 3381 }, { "epoch": 0.43, "grad_norm": 0.8456678224394687, "learning_rate": 1.2692006810462038e-06, "loss": 0.6585, "step": 3382 }, { "epoch": 0.43, "grad_norm": 0.7565589148455836, "learning_rate": 1.2688032303223023e-06, "loss": 0.6431, "step": 3383 }, { "epoch": 0.43, "grad_norm": 1.1385869516841591, "learning_rate": 1.2684057338240107e-06, "loss": 0.6291, "step": 3384 }, { "epoch": 0.43, "grad_norm": 0.5987028065304266, "learning_rate": 1.268008191619019e-06, "loss": 0.5064, "step": 3385 }, { "epoch": 0.43, "grad_norm": 0.7164745984306417, "learning_rate": 1.2676106037750245e-06, "loss": 0.5608, "step": 3386 }, { "epoch": 0.43, "grad_norm": 0.6261083118948386, "learning_rate": 1.2672129703597319e-06, "loss": 0.5081, "step": 3387 }, { "epoch": 0.43, "grad_norm": 0.8388443636536251, "learning_rate": 1.2668152914408544e-06, "loss": 0.5922, "step": 3388 }, { "epoch": 0.43, "grad_norm": 1.0239046358596777, "learning_rate": 1.2664175670861123e-06, "loss": 0.5991, "step": 3389 }, { "epoch": 0.43, "grad_norm": 0.6922970028050641, "learning_rate": 1.2660197973632342e-06, "loss": 0.5644, "step": 3390 }, { "epoch": 0.43, "grad_norm": 0.9427557757947126, "learning_rate": 1.265621982339956e-06, "loss": 0.5483, "step": 3391 }, { "epoch": 0.43, "grad_norm": 0.9088472495794239, "learning_rate": 1.2652241220840212e-06, "loss": 0.623, "step": 3392 }, { "epoch": 0.43, "grad_norm": 0.8145879286954059, "learning_rate": 1.2648262166631817e-06, "loss": 0.5934, "step": 3393 }, { "epoch": 0.43, "grad_norm": 0.8177304800390499, "learning_rate": 1.264428266145196e-06, "loss": 0.5675, "step": 3394 }, { "epoch": 0.43, "grad_norm": 0.9238719271570184, "learning_rate": 1.2640302705978316e-06, "loss": 0.6824, "step": 3395 }, { "epoch": 0.43, "grad_norm": 1.1621413569030212, "learning_rate": 1.263632230088863e-06, "loss": 0.5993, "step": 3396 }, { "epoch": 0.43, "grad_norm": 0.8158202483390974, "learning_rate": 1.263234144686072e-06, "loss": 0.6005, "step": 3397 }, { "epoch": 0.43, "grad_norm": 0.9803404650474931, "learning_rate": 1.2628360144572485e-06, "loss": 0.6591, "step": 3398 }, { "epoch": 0.43, "grad_norm": 0.5899113826155633, "learning_rate": 1.26243783947019e-06, "loss": 0.502, "step": 3399 }, { "epoch": 0.43, "grad_norm": 1.1623375560181854, "learning_rate": 1.2620396197927015e-06, "loss": 0.6717, "step": 3400 }, { "epoch": 0.43, "grad_norm": 0.7003538770088509, "learning_rate": 1.2616413554925955e-06, "loss": 0.5325, "step": 3401 }, { "epoch": 0.43, "grad_norm": 0.8885872131233911, "learning_rate": 1.2612430466376927e-06, "loss": 0.6773, "step": 3402 }, { "epoch": 0.43, "grad_norm": 0.7284611836028604, "learning_rate": 1.2608446932958204e-06, "loss": 0.5384, "step": 3403 }, { "epoch": 0.43, "grad_norm": 1.448395725539661, "learning_rate": 1.2604462955348142e-06, "loss": 0.6121, "step": 3404 }, { "epoch": 0.43, "grad_norm": 0.9604914172554712, "learning_rate": 1.2600478534225177e-06, "loss": 0.5951, "step": 3405 }, { "epoch": 0.43, "grad_norm": 1.006396651048926, "learning_rate": 1.2596493670267804e-06, "loss": 0.6359, "step": 3406 }, { "epoch": 0.43, "grad_norm": 0.7283109912120818, "learning_rate": 1.259250836415461e-06, "loss": 0.5391, "step": 3407 }, { "epoch": 0.43, "grad_norm": 0.8738993678837599, "learning_rate": 1.258852261656425e-06, "loss": 0.6537, "step": 3408 }, { "epoch": 0.43, "grad_norm": 0.8292802022782253, "learning_rate": 1.2584536428175457e-06, "loss": 0.6267, "step": 3409 }, { "epoch": 0.43, "grad_norm": 0.8139289064334948, "learning_rate": 1.2580549799667034e-06, "loss": 0.5526, "step": 3410 }, { "epoch": 0.43, "grad_norm": 1.3019620613581468, "learning_rate": 1.2576562731717867e-06, "loss": 0.6566, "step": 3411 }, { "epoch": 0.43, "grad_norm": 0.8960845023587747, "learning_rate": 1.2572575225006908e-06, "loss": 0.6441, "step": 3412 }, { "epoch": 0.43, "grad_norm": 0.6769975101071087, "learning_rate": 1.2568587280213185e-06, "loss": 0.5693, "step": 3413 }, { "epoch": 0.43, "grad_norm": 0.7355051910269776, "learning_rate": 1.2564598898015812e-06, "loss": 0.5069, "step": 3414 }, { "epoch": 0.44, "grad_norm": 0.8015446960604529, "learning_rate": 1.2560610079093964e-06, "loss": 0.5668, "step": 3415 }, { "epoch": 0.44, "grad_norm": 0.8208171487654028, "learning_rate": 1.2556620824126895e-06, "loss": 0.5877, "step": 3416 }, { "epoch": 0.44, "grad_norm": 0.5899352498327027, "learning_rate": 1.2552631133793937e-06, "loss": 0.5108, "step": 3417 }, { "epoch": 0.44, "grad_norm": 1.0333316429996067, "learning_rate": 1.2548641008774487e-06, "loss": 0.5889, "step": 3418 }, { "epoch": 0.44, "grad_norm": 0.923566005586143, "learning_rate": 1.2544650449748026e-06, "loss": 0.6344, "step": 3419 }, { "epoch": 0.44, "grad_norm": 0.7676206718343277, "learning_rate": 1.2540659457394102e-06, "loss": 0.5556, "step": 3420 }, { "epoch": 0.44, "grad_norm": 0.8219237885679513, "learning_rate": 1.2536668032392344e-06, "loss": 0.6028, "step": 3421 }, { "epoch": 0.44, "grad_norm": 0.8959544582212658, "learning_rate": 1.2532676175422443e-06, "loss": 0.5876, "step": 3422 }, { "epoch": 0.44, "grad_norm": 0.8011427328712676, "learning_rate": 1.2528683887164175e-06, "loss": 0.6148, "step": 3423 }, { "epoch": 0.44, "grad_norm": 0.6699374892247882, "learning_rate": 1.2524691168297385e-06, "loss": 0.5409, "step": 3424 }, { "epoch": 0.44, "grad_norm": 0.8616310503183756, "learning_rate": 1.252069801950199e-06, "loss": 0.5899, "step": 3425 }, { "epoch": 0.44, "grad_norm": 0.6242208496552898, "learning_rate": 1.2516704441457989e-06, "loss": 0.5264, "step": 3426 }, { "epoch": 0.44, "grad_norm": 1.1189441273570602, "learning_rate": 1.2512710434845436e-06, "loss": 0.5859, "step": 3427 }, { "epoch": 0.44, "grad_norm": 0.6141265533412329, "learning_rate": 1.2508716000344472e-06, "loss": 0.5402, "step": 3428 }, { "epoch": 0.44, "grad_norm": 0.6698615109814212, "learning_rate": 1.250472113863531e-06, "loss": 0.5331, "step": 3429 }, { "epoch": 0.44, "grad_norm": 0.7540188602215551, "learning_rate": 1.2500725850398233e-06, "loss": 0.5849, "step": 3430 }, { "epoch": 0.44, "grad_norm": 0.7458633727259069, "learning_rate": 1.2496730136313596e-06, "loss": 0.604, "step": 3431 }, { "epoch": 0.44, "grad_norm": 0.757186897604443, "learning_rate": 1.2492733997061826e-06, "loss": 0.5496, "step": 3432 }, { "epoch": 0.44, "grad_norm": 0.605860575374263, "learning_rate": 1.2488737433323427e-06, "loss": 0.4774, "step": 3433 }, { "epoch": 0.44, "grad_norm": 0.8706353454028379, "learning_rate": 1.2484740445778968e-06, "loss": 0.6354, "step": 3434 }, { "epoch": 0.44, "grad_norm": 1.0812780453949584, "learning_rate": 1.24807430351091e-06, "loss": 0.6232, "step": 3435 }, { "epoch": 0.44, "grad_norm": 0.6570292955181182, "learning_rate": 1.2476745201994534e-06, "loss": 0.4746, "step": 3436 }, { "epoch": 0.44, "grad_norm": 0.6425051683417388, "learning_rate": 1.2472746947116064e-06, "loss": 0.6216, "step": 3437 }, { "epoch": 0.44, "grad_norm": 0.9994540936014995, "learning_rate": 1.246874827115455e-06, "loss": 0.6239, "step": 3438 }, { "epoch": 0.44, "grad_norm": 0.6760532823680406, "learning_rate": 1.2464749174790924e-06, "loss": 0.5912, "step": 3439 }, { "epoch": 0.44, "grad_norm": 0.8049445617718202, "learning_rate": 1.2460749658706194e-06, "loss": 0.6635, "step": 3440 }, { "epoch": 0.44, "grad_norm": 0.8227554407782018, "learning_rate": 1.245674972358143e-06, "loss": 0.603, "step": 3441 }, { "epoch": 0.44, "grad_norm": 0.6314904416614506, "learning_rate": 1.2452749370097784e-06, "loss": 0.5372, "step": 3442 }, { "epoch": 0.44, "grad_norm": 0.8620222120396919, "learning_rate": 1.2448748598936474e-06, "loss": 0.6668, "step": 3443 }, { "epoch": 0.44, "grad_norm": 0.6588789954602593, "learning_rate": 1.2444747410778786e-06, "loss": 0.5692, "step": 3444 }, { "epoch": 0.44, "grad_norm": 0.626607037013441, "learning_rate": 1.2440745806306084e-06, "loss": 0.5008, "step": 3445 }, { "epoch": 0.44, "grad_norm": 0.7248826159058039, "learning_rate": 1.24367437861998e-06, "loss": 0.5571, "step": 3446 }, { "epoch": 0.44, "grad_norm": 0.8476510343427638, "learning_rate": 1.2432741351141433e-06, "loss": 0.637, "step": 3447 }, { "epoch": 0.44, "grad_norm": 0.8191762717670863, "learning_rate": 1.242873850181256e-06, "loss": 0.5802, "step": 3448 }, { "epoch": 0.44, "grad_norm": 0.7661319521650517, "learning_rate": 1.2424735238894817e-06, "loss": 0.623, "step": 3449 }, { "epoch": 0.44, "grad_norm": 0.7413009685244539, "learning_rate": 1.2420731563069929e-06, "loss": 0.5773, "step": 3450 }, { "epoch": 0.44, "grad_norm": 0.7389292123156826, "learning_rate": 1.2416727475019672e-06, "loss": 0.5446, "step": 3451 }, { "epoch": 0.44, "grad_norm": 0.8642685681826094, "learning_rate": 1.2412722975425907e-06, "loss": 0.6654, "step": 3452 }, { "epoch": 0.44, "grad_norm": 0.8070867740593191, "learning_rate": 1.240871806497055e-06, "loss": 0.55, "step": 3453 }, { "epoch": 0.44, "grad_norm": 0.6704846689688515, "learning_rate": 1.2404712744335606e-06, "loss": 0.5459, "step": 3454 }, { "epoch": 0.44, "grad_norm": 0.6647625967910236, "learning_rate": 1.2400707014203131e-06, "loss": 0.565, "step": 3455 }, { "epoch": 0.44, "grad_norm": 0.6628246977028461, "learning_rate": 1.2396700875255261e-06, "loss": 0.5507, "step": 3456 }, { "epoch": 0.44, "grad_norm": 0.6763865887843218, "learning_rate": 1.23926943281742e-06, "loss": 0.532, "step": 3457 }, { "epoch": 0.44, "grad_norm": 0.7450907036108734, "learning_rate": 1.2388687373642224e-06, "loss": 0.573, "step": 3458 }, { "epoch": 0.44, "grad_norm": 0.6748485167281576, "learning_rate": 1.238468001234167e-06, "loss": 0.4873, "step": 3459 }, { "epoch": 0.44, "grad_norm": 0.841741974100605, "learning_rate": 1.2380672244954958e-06, "loss": 0.6526, "step": 3460 }, { "epoch": 0.44, "grad_norm": 0.7609501005344621, "learning_rate": 1.237666407216456e-06, "loss": 0.5615, "step": 3461 }, { "epoch": 0.44, "grad_norm": 0.786562005395296, "learning_rate": 1.2372655494653034e-06, "loss": 0.5958, "step": 3462 }, { "epoch": 0.44, "grad_norm": 0.7064669514094032, "learning_rate": 1.2368646513102995e-06, "loss": 0.595, "step": 3463 }, { "epoch": 0.44, "grad_norm": 0.7480501674597492, "learning_rate": 1.2364637128197128e-06, "loss": 0.5844, "step": 3464 }, { "epoch": 0.44, "grad_norm": 0.7073759640954551, "learning_rate": 1.2360627340618195e-06, "loss": 0.5604, "step": 3465 }, { "epoch": 0.44, "grad_norm": 0.7983536201032089, "learning_rate": 1.2356617151049019e-06, "loss": 0.6597, "step": 3466 }, { "epoch": 0.44, "grad_norm": 1.010896235137408, "learning_rate": 1.2352606560172493e-06, "loss": 0.6371, "step": 3467 }, { "epoch": 0.44, "grad_norm": 0.6820091353286809, "learning_rate": 1.2348595568671579e-06, "loss": 0.5269, "step": 3468 }, { "epoch": 0.44, "grad_norm": 0.8201409636545012, "learning_rate": 1.2344584177229308e-06, "loss": 0.6261, "step": 3469 }, { "epoch": 0.44, "grad_norm": 0.9449405762468769, "learning_rate": 1.2340572386528774e-06, "loss": 0.6991, "step": 3470 }, { "epoch": 0.44, "grad_norm": 0.956736777566163, "learning_rate": 1.233656019725315e-06, "loss": 0.6395, "step": 3471 }, { "epoch": 0.44, "grad_norm": 0.6905920414599401, "learning_rate": 1.2332547610085667e-06, "loss": 0.5717, "step": 3472 }, { "epoch": 0.44, "grad_norm": 0.6399633310230656, "learning_rate": 1.2328534625709623e-06, "loss": 0.5305, "step": 3473 }, { "epoch": 0.44, "grad_norm": 0.7312166848146624, "learning_rate": 1.2324521244808392e-06, "loss": 0.5237, "step": 3474 }, { "epoch": 0.44, "grad_norm": 0.74368641764576, "learning_rate": 1.232050746806541e-06, "loss": 0.5498, "step": 3475 }, { "epoch": 0.44, "grad_norm": 0.868664332311871, "learning_rate": 1.2316493296164184e-06, "loss": 0.5361, "step": 3476 }, { "epoch": 0.44, "grad_norm": 0.8351169527651191, "learning_rate": 1.2312478729788279e-06, "loss": 0.6267, "step": 3477 }, { "epoch": 0.44, "grad_norm": 0.8820336230346343, "learning_rate": 1.2308463769621337e-06, "loss": 0.6849, "step": 3478 }, { "epoch": 0.44, "grad_norm": 0.6226316920184087, "learning_rate": 1.2304448416347064e-06, "loss": 0.5273, "step": 3479 }, { "epoch": 0.44, "grad_norm": 0.6916069414038876, "learning_rate": 1.2300432670649236e-06, "loss": 0.5444, "step": 3480 }, { "epoch": 0.44, "grad_norm": 0.8228558755845252, "learning_rate": 1.229641653321169e-06, "loss": 0.5367, "step": 3481 }, { "epoch": 0.44, "grad_norm": 0.6164059343360774, "learning_rate": 1.229240000471833e-06, "loss": 0.5208, "step": 3482 }, { "epoch": 0.44, "grad_norm": 0.6586283733931749, "learning_rate": 1.2288383085853134e-06, "loss": 0.5523, "step": 3483 }, { "epoch": 0.44, "grad_norm": 0.8085229269288404, "learning_rate": 1.2284365777300136e-06, "loss": 0.5216, "step": 3484 }, { "epoch": 0.44, "grad_norm": 0.699436947228876, "learning_rate": 1.2280348079743448e-06, "loss": 0.5485, "step": 3485 }, { "epoch": 0.44, "grad_norm": 0.6590446660419724, "learning_rate": 1.2276329993867235e-06, "loss": 0.5542, "step": 3486 }, { "epoch": 0.44, "grad_norm": 0.7500688209600497, "learning_rate": 1.2272311520355741e-06, "loss": 0.5671, "step": 3487 }, { "epoch": 0.44, "grad_norm": 0.7564204262637355, "learning_rate": 1.2268292659893267e-06, "loss": 0.6206, "step": 3488 }, { "epoch": 0.44, "grad_norm": 1.0072123234702361, "learning_rate": 1.2264273413164183e-06, "loss": 0.6375, "step": 3489 }, { "epoch": 0.44, "grad_norm": 0.8200357213764622, "learning_rate": 1.2260253780852928e-06, "loss": 0.6346, "step": 3490 }, { "epoch": 0.44, "grad_norm": 0.65039404510652, "learning_rate": 1.2256233763644e-06, "loss": 0.5703, "step": 3491 }, { "epoch": 0.44, "grad_norm": 0.7245000953603701, "learning_rate": 1.2252213362221963e-06, "loss": 0.633, "step": 3492 }, { "epoch": 0.44, "grad_norm": 0.6982241036020059, "learning_rate": 1.2248192577271458e-06, "loss": 0.5615, "step": 3493 }, { "epoch": 0.45, "grad_norm": 0.7150806297184724, "learning_rate": 1.224417140947718e-06, "loss": 0.5227, "step": 3494 }, { "epoch": 0.45, "grad_norm": 0.9414136688035221, "learning_rate": 1.224014985952389e-06, "loss": 0.6703, "step": 3495 }, { "epoch": 0.45, "grad_norm": 0.8782191815859718, "learning_rate": 1.2236127928096416e-06, "loss": 0.6325, "step": 3496 }, { "epoch": 0.45, "grad_norm": 0.7626170396020309, "learning_rate": 1.223210561587965e-06, "loss": 0.558, "step": 3497 }, { "epoch": 0.45, "grad_norm": 0.7838715416650078, "learning_rate": 1.2228082923558555e-06, "loss": 0.6677, "step": 3498 }, { "epoch": 0.45, "grad_norm": 0.7176376461562528, "learning_rate": 1.2224059851818147e-06, "loss": 0.5711, "step": 3499 }, { "epoch": 0.45, "grad_norm": 0.8268322531912228, "learning_rate": 1.2220036401343517e-06, "loss": 0.6459, "step": 3500 }, { "epoch": 0.45, "grad_norm": 0.9829849582498553, "learning_rate": 1.2216012572819816e-06, "loss": 0.6522, "step": 3501 }, { "epoch": 0.45, "grad_norm": 0.8431369153822154, "learning_rate": 1.221198836693226e-06, "loss": 0.5731, "step": 3502 }, { "epoch": 0.45, "grad_norm": 0.9031175798980924, "learning_rate": 1.220796378436613e-06, "loss": 0.5263, "step": 3503 }, { "epoch": 0.45, "grad_norm": 0.7422289770319375, "learning_rate": 1.220393882580677e-06, "loss": 0.536, "step": 3504 }, { "epoch": 0.45, "grad_norm": 0.7354224347397338, "learning_rate": 1.2199913491939587e-06, "loss": 0.6256, "step": 3505 }, { "epoch": 0.45, "grad_norm": 1.2435135239274218, "learning_rate": 1.2195887783450057e-06, "loss": 0.6702, "step": 3506 }, { "epoch": 0.45, "grad_norm": 0.6357125744487298, "learning_rate": 1.2191861701023713e-06, "loss": 0.5134, "step": 3507 }, { "epoch": 0.45, "grad_norm": 0.80221880768022, "learning_rate": 1.2187835245346157e-06, "loss": 0.6338, "step": 3508 }, { "epoch": 0.45, "grad_norm": 0.8558452782699452, "learning_rate": 1.218380841710305e-06, "loss": 0.6288, "step": 3509 }, { "epoch": 0.45, "grad_norm": 0.6784191302069856, "learning_rate": 1.2179781216980119e-06, "loss": 0.5725, "step": 3510 }, { "epoch": 0.45, "grad_norm": 0.8606559269711934, "learning_rate": 1.2175753645663158e-06, "loss": 0.6282, "step": 3511 }, { "epoch": 0.45, "grad_norm": 1.0389767753729784, "learning_rate": 1.2171725703838016e-06, "loss": 0.6481, "step": 3512 }, { "epoch": 0.45, "grad_norm": 0.8516509288230759, "learning_rate": 1.2167697392190615e-06, "loss": 0.5788, "step": 3513 }, { "epoch": 0.45, "grad_norm": 0.9621465340911541, "learning_rate": 1.2163668711406927e-06, "loss": 0.6841, "step": 3514 }, { "epoch": 0.45, "grad_norm": 0.6604704361449661, "learning_rate": 1.2159639662173002e-06, "loss": 0.5037, "step": 3515 }, { "epoch": 0.45, "grad_norm": 0.9442474852325309, "learning_rate": 1.2155610245174938e-06, "loss": 0.6403, "step": 3516 }, { "epoch": 0.45, "grad_norm": 0.8288217901532556, "learning_rate": 1.215158046109891e-06, "loss": 0.592, "step": 3517 }, { "epoch": 0.45, "grad_norm": 0.6995881457148487, "learning_rate": 1.2147550310631144e-06, "loss": 0.5935, "step": 3518 }, { "epoch": 0.45, "grad_norm": 0.829631410530379, "learning_rate": 1.214351979445793e-06, "loss": 0.6048, "step": 3519 }, { "epoch": 0.45, "grad_norm": 0.8898685045932799, "learning_rate": 1.2139488913265627e-06, "loss": 0.6588, "step": 3520 }, { "epoch": 0.45, "grad_norm": 0.8126242333817216, "learning_rate": 1.213545766774065e-06, "loss": 0.4989, "step": 3521 }, { "epoch": 0.45, "grad_norm": 0.6797205780512917, "learning_rate": 1.213142605856948e-06, "loss": 0.5708, "step": 3522 }, { "epoch": 0.45, "grad_norm": 0.6475613630795397, "learning_rate": 1.212739408643866e-06, "loss": 0.5133, "step": 3523 }, { "epoch": 0.45, "grad_norm": 0.9009108059159847, "learning_rate": 1.2123361752034788e-06, "loss": 0.701, "step": 3524 }, { "epoch": 0.45, "grad_norm": 0.7861965965835049, "learning_rate": 1.2119329056044531e-06, "loss": 0.6197, "step": 3525 }, { "epoch": 0.45, "grad_norm": 0.7545364981942796, "learning_rate": 1.2115295999154616e-06, "loss": 0.595, "step": 3526 }, { "epoch": 0.45, "grad_norm": 0.8987236680853078, "learning_rate": 1.2111262582051828e-06, "loss": 0.5923, "step": 3527 }, { "epoch": 0.45, "grad_norm": 0.961434028963848, "learning_rate": 1.2107228805423017e-06, "loss": 0.5643, "step": 3528 }, { "epoch": 0.45, "grad_norm": 0.9019295295037031, "learning_rate": 1.2103194669955095e-06, "loss": 0.6859, "step": 3529 }, { "epoch": 0.45, "grad_norm": 0.5989102138813082, "learning_rate": 1.209916017633503e-06, "loss": 0.5494, "step": 3530 }, { "epoch": 0.45, "grad_norm": 0.9381521708968847, "learning_rate": 1.2095125325249855e-06, "loss": 0.6185, "step": 3531 }, { "epoch": 0.45, "grad_norm": 0.9430053574763092, "learning_rate": 1.2091090117386667e-06, "loss": 0.7124, "step": 3532 }, { "epoch": 0.45, "grad_norm": 0.8340145910181755, "learning_rate": 1.2087054553432613e-06, "loss": 0.6287, "step": 3533 }, { "epoch": 0.45, "grad_norm": 0.816956118263287, "learning_rate": 1.2083018634074913e-06, "loss": 0.6121, "step": 3534 }, { "epoch": 0.45, "grad_norm": 0.8054961930566695, "learning_rate": 1.207898236000084e-06, "loss": 0.6078, "step": 3535 }, { "epoch": 0.45, "grad_norm": 0.9359150661914656, "learning_rate": 1.2074945731897733e-06, "loss": 0.6107, "step": 3536 }, { "epoch": 0.45, "grad_norm": 0.656594809588484, "learning_rate": 1.2070908750452981e-06, "loss": 0.5402, "step": 3537 }, { "epoch": 0.45, "grad_norm": 0.6922193443069765, "learning_rate": 1.2066871416354046e-06, "loss": 0.5859, "step": 3538 }, { "epoch": 0.45, "grad_norm": 0.8318259615102429, "learning_rate": 1.2062833730288437e-06, "loss": 0.6157, "step": 3539 }, { "epoch": 0.45, "grad_norm": 0.7613914655063581, "learning_rate": 1.2058795692943737e-06, "loss": 0.5618, "step": 3540 }, { "epoch": 0.45, "grad_norm": 0.6902467577418846, "learning_rate": 1.2054757305007578e-06, "loss": 0.5449, "step": 3541 }, { "epoch": 0.45, "grad_norm": 0.8834482235574148, "learning_rate": 1.205071856716766e-06, "loss": 0.5409, "step": 3542 }, { "epoch": 0.45, "grad_norm": 0.7400526361213282, "learning_rate": 1.204667948011173e-06, "loss": 0.5472, "step": 3543 }, { "epoch": 0.45, "grad_norm": 0.7140997336484658, "learning_rate": 1.2042640044527608e-06, "loss": 0.5131, "step": 3544 }, { "epoch": 0.45, "grad_norm": 0.7843670136104182, "learning_rate": 1.2038600261103166e-06, "loss": 0.6106, "step": 3545 }, { "epoch": 0.45, "grad_norm": 0.7658831103915218, "learning_rate": 1.203456013052634e-06, "loss": 0.5923, "step": 3546 }, { "epoch": 0.45, "grad_norm": 0.7938317778517147, "learning_rate": 1.2030519653485114e-06, "loss": 0.6578, "step": 3547 }, { "epoch": 0.45, "grad_norm": 0.8036531896012116, "learning_rate": 1.202647883066755e-06, "loss": 0.6071, "step": 3548 }, { "epoch": 0.45, "grad_norm": 0.6535604999248441, "learning_rate": 1.2022437662761751e-06, "loss": 0.5462, "step": 3549 }, { "epoch": 0.45, "grad_norm": 0.7322384545035954, "learning_rate": 1.2018396150455887e-06, "loss": 0.5963, "step": 3550 }, { "epoch": 0.45, "grad_norm": 0.6845579907614295, "learning_rate": 1.2014354294438187e-06, "loss": 0.5342, "step": 3551 }, { "epoch": 0.45, "grad_norm": 0.9334066464255726, "learning_rate": 1.2010312095396937e-06, "loss": 0.6211, "step": 3552 }, { "epoch": 0.45, "grad_norm": 0.5922909587093564, "learning_rate": 1.2006269554020478e-06, "loss": 0.5212, "step": 3553 }, { "epoch": 0.45, "grad_norm": 0.7731553485473244, "learning_rate": 1.2002226670997217e-06, "loss": 0.5956, "step": 3554 }, { "epoch": 0.45, "grad_norm": 0.8253981664558916, "learning_rate": 1.1998183447015614e-06, "loss": 0.5833, "step": 3555 }, { "epoch": 0.45, "grad_norm": 0.851786114519458, "learning_rate": 1.1994139882764184e-06, "loss": 0.6396, "step": 3556 }, { "epoch": 0.45, "grad_norm": 0.7888010578851617, "learning_rate": 1.199009597893151e-06, "loss": 0.5633, "step": 3557 }, { "epoch": 0.45, "grad_norm": 0.6215559814448506, "learning_rate": 1.1986051736206222e-06, "loss": 0.518, "step": 3558 }, { "epoch": 0.45, "grad_norm": 0.7999177239958768, "learning_rate": 1.1982007155277015e-06, "loss": 0.5845, "step": 3559 }, { "epoch": 0.45, "grad_norm": 0.8665466274285062, "learning_rate": 1.1977962236832633e-06, "loss": 0.619, "step": 3560 }, { "epoch": 0.45, "grad_norm": 0.7670270411518367, "learning_rate": 1.1973916981561892e-06, "loss": 0.567, "step": 3561 }, { "epoch": 0.45, "grad_norm": 0.887300188600661, "learning_rate": 1.1969871390153651e-06, "loss": 0.6309, "step": 3562 }, { "epoch": 0.45, "grad_norm": 0.9260395919546728, "learning_rate": 1.1965825463296835e-06, "loss": 0.634, "step": 3563 }, { "epoch": 0.45, "grad_norm": 0.8479487607795843, "learning_rate": 1.1961779201680425e-06, "loss": 0.57, "step": 3564 }, { "epoch": 0.45, "grad_norm": 0.9141854212178816, "learning_rate": 1.195773260599345e-06, "loss": 0.5999, "step": 3565 }, { "epoch": 0.45, "grad_norm": 0.8684873873171398, "learning_rate": 1.1953685676925013e-06, "loss": 0.646, "step": 3566 }, { "epoch": 0.45, "grad_norm": 0.5810265591366123, "learning_rate": 1.1949638415164256e-06, "loss": 0.5137, "step": 3567 }, { "epoch": 0.45, "grad_norm": 0.6225818658740272, "learning_rate": 1.1945590821400389e-06, "loss": 0.5562, "step": 3568 }, { "epoch": 0.45, "grad_norm": 0.8324657505468169, "learning_rate": 1.1941542896322673e-06, "loss": 0.6534, "step": 3569 }, { "epoch": 0.45, "grad_norm": 0.7548587874902005, "learning_rate": 1.1937494640620432e-06, "loss": 0.5012, "step": 3570 }, { "epoch": 0.45, "grad_norm": 0.712866040846828, "learning_rate": 1.1933446054983033e-06, "loss": 0.6043, "step": 3571 }, { "epoch": 0.46, "grad_norm": 0.9162511901355743, "learning_rate": 1.1929397140099917e-06, "loss": 0.6013, "step": 3572 }, { "epoch": 0.46, "grad_norm": 0.8577123638222974, "learning_rate": 1.1925347896660566e-06, "loss": 0.6185, "step": 3573 }, { "epoch": 0.46, "grad_norm": 0.7767311538423525, "learning_rate": 1.192129832535453e-06, "loss": 0.5669, "step": 3574 }, { "epoch": 0.46, "grad_norm": 0.7024101133439205, "learning_rate": 1.1917248426871403e-06, "loss": 0.5057, "step": 3575 }, { "epoch": 0.46, "grad_norm": 0.8294044919234587, "learning_rate": 1.191319820190084e-06, "loss": 0.6465, "step": 3576 }, { "epoch": 0.46, "grad_norm": 1.238667729040125, "learning_rate": 1.1909147651132557e-06, "loss": 0.5808, "step": 3577 }, { "epoch": 0.46, "grad_norm": 0.9885318384501535, "learning_rate": 1.1905096775256317e-06, "loss": 0.6625, "step": 3578 }, { "epoch": 0.46, "grad_norm": 0.7157083943182658, "learning_rate": 1.1901045574961948e-06, "loss": 0.5752, "step": 3579 }, { "epoch": 0.46, "grad_norm": 0.6792223639703365, "learning_rate": 1.1896994050939318e-06, "loss": 0.5072, "step": 3580 }, { "epoch": 0.46, "grad_norm": 0.7970826960305226, "learning_rate": 1.1892942203878367e-06, "loss": 0.5703, "step": 3581 }, { "epoch": 0.46, "grad_norm": 0.7502523933950223, "learning_rate": 1.1888890034469077e-06, "loss": 0.5724, "step": 3582 }, { "epoch": 0.46, "grad_norm": 0.824172217398551, "learning_rate": 1.1884837543401492e-06, "loss": 0.6502, "step": 3583 }, { "epoch": 0.46, "grad_norm": 0.9276335087977379, "learning_rate": 1.1880784731365712e-06, "loss": 0.6335, "step": 3584 }, { "epoch": 0.46, "grad_norm": 0.6427226445088194, "learning_rate": 1.1876731599051883e-06, "loss": 0.5143, "step": 3585 }, { "epoch": 0.46, "grad_norm": 0.720098945112339, "learning_rate": 1.1872678147150218e-06, "loss": 0.5373, "step": 3586 }, { "epoch": 0.46, "grad_norm": 0.9961433033558384, "learning_rate": 1.186862437635097e-06, "loss": 0.5635, "step": 3587 }, { "epoch": 0.46, "grad_norm": 0.7830376357780922, "learning_rate": 1.186457028734446e-06, "loss": 0.5761, "step": 3588 }, { "epoch": 0.46, "grad_norm": 0.9559946980963099, "learning_rate": 1.1860515880821055e-06, "loss": 0.6418, "step": 3589 }, { "epoch": 0.46, "grad_norm": 0.9073301422693981, "learning_rate": 1.1856461157471176e-06, "loss": 0.6128, "step": 3590 }, { "epoch": 0.46, "grad_norm": 0.7934666438398036, "learning_rate": 1.1852406117985303e-06, "loss": 0.5332, "step": 3591 }, { "epoch": 0.46, "grad_norm": 0.609847028068369, "learning_rate": 1.184835076305397e-06, "loss": 0.5135, "step": 3592 }, { "epoch": 0.46, "grad_norm": 0.7078636384587128, "learning_rate": 1.1844295093367755e-06, "loss": 0.5601, "step": 3593 }, { "epoch": 0.46, "grad_norm": 0.8185389873708361, "learning_rate": 1.18402391096173e-06, "loss": 0.5951, "step": 3594 }, { "epoch": 0.46, "grad_norm": 0.7520725252078428, "learning_rate": 1.1836182812493296e-06, "loss": 0.5639, "step": 3595 }, { "epoch": 0.46, "grad_norm": 0.7513295419044058, "learning_rate": 1.1832126202686488e-06, "loss": 0.5551, "step": 3596 }, { "epoch": 0.46, "grad_norm": 0.6845442067765931, "learning_rate": 1.1828069280887676e-06, "loss": 0.5318, "step": 3597 }, { "epoch": 0.46, "grad_norm": 0.7088369274585685, "learning_rate": 1.1824012047787708e-06, "loss": 0.5184, "step": 3598 }, { "epoch": 0.46, "grad_norm": 0.7227816165963745, "learning_rate": 1.1819954504077493e-06, "loss": 0.5559, "step": 3599 }, { "epoch": 0.46, "grad_norm": 0.6548997655710692, "learning_rate": 1.1815896650447984e-06, "loss": 0.4961, "step": 3600 }, { "epoch": 0.46, "grad_norm": 0.9082241109198962, "learning_rate": 1.1811838487590195e-06, "loss": 0.6575, "step": 3601 }, { "epoch": 0.46, "grad_norm": 0.8929461602817208, "learning_rate": 1.1807780016195185e-06, "loss": 0.6624, "step": 3602 }, { "epoch": 0.46, "grad_norm": 0.6970893108929611, "learning_rate": 1.1803721236954072e-06, "loss": 0.54, "step": 3603 }, { "epoch": 0.46, "grad_norm": 0.8972978568092888, "learning_rate": 1.1799662150558026e-06, "loss": 0.5476, "step": 3604 }, { "epoch": 0.46, "grad_norm": 1.0023706851915388, "learning_rate": 1.1795602757698262e-06, "loss": 0.6898, "step": 3605 }, { "epoch": 0.46, "grad_norm": 0.719470700779904, "learning_rate": 1.1791543059066056e-06, "loss": 0.592, "step": 3606 }, { "epoch": 0.46, "grad_norm": 0.632633400204935, "learning_rate": 1.1787483055352732e-06, "loss": 0.5662, "step": 3607 }, { "epoch": 0.46, "grad_norm": 0.6344294369769692, "learning_rate": 1.1783422747249662e-06, "loss": 0.5142, "step": 3608 }, { "epoch": 0.46, "grad_norm": 0.7607594570472552, "learning_rate": 1.177936213544828e-06, "loss": 0.5877, "step": 3609 }, { "epoch": 0.46, "grad_norm": 0.8811266431526009, "learning_rate": 1.1775301220640064e-06, "loss": 0.5915, "step": 3610 }, { "epoch": 0.46, "grad_norm": 0.7426080584864317, "learning_rate": 1.1771240003516546e-06, "loss": 0.5683, "step": 3611 }, { "epoch": 0.46, "grad_norm": 0.8188080429467737, "learning_rate": 1.1767178484769304e-06, "loss": 0.6017, "step": 3612 }, { "epoch": 0.46, "grad_norm": 0.7286962636520827, "learning_rate": 1.176311666508998e-06, "loss": 0.5407, "step": 3613 }, { "epoch": 0.46, "grad_norm": 2.1358811674511573, "learning_rate": 1.1759054545170254e-06, "loss": 0.6891, "step": 3614 }, { "epoch": 0.46, "grad_norm": 0.9603309573533548, "learning_rate": 1.1754992125701865e-06, "loss": 0.6492, "step": 3615 }, { "epoch": 0.46, "grad_norm": 0.8415097107855828, "learning_rate": 1.1750929407376604e-06, "loss": 0.6394, "step": 3616 }, { "epoch": 0.46, "grad_norm": 0.5980757173338465, "learning_rate": 1.1746866390886302e-06, "loss": 0.5071, "step": 3617 }, { "epoch": 0.46, "grad_norm": 0.7323593191321959, "learning_rate": 1.1742803076922853e-06, "loss": 0.5612, "step": 3618 }, { "epoch": 0.46, "grad_norm": 0.692883621933488, "learning_rate": 1.1738739466178195e-06, "loss": 0.5528, "step": 3619 }, { "epoch": 0.46, "grad_norm": 0.6433014908711976, "learning_rate": 1.1734675559344321e-06, "loss": 0.5776, "step": 3620 }, { "epoch": 0.46, "grad_norm": 0.81345233531994, "learning_rate": 1.1730611357113275e-06, "loss": 0.5704, "step": 3621 }, { "epoch": 0.46, "grad_norm": 0.7058413315869119, "learning_rate": 1.172654686017714e-06, "loss": 0.5252, "step": 3622 }, { "epoch": 0.46, "grad_norm": 0.7208480001793919, "learning_rate": 1.1722482069228063e-06, "loss": 0.5394, "step": 3623 }, { "epoch": 0.46, "grad_norm": 0.6371528961380661, "learning_rate": 1.1718416984958234e-06, "loss": 0.5338, "step": 3624 }, { "epoch": 0.46, "grad_norm": 0.6905815433262681, "learning_rate": 1.1714351608059895e-06, "loss": 0.58, "step": 3625 }, { "epoch": 0.46, "grad_norm": 0.8719170090264644, "learning_rate": 1.1710285939225338e-06, "loss": 0.653, "step": 3626 }, { "epoch": 0.46, "grad_norm": 0.6932977381804137, "learning_rate": 1.17062199791469e-06, "loss": 0.5653, "step": 3627 }, { "epoch": 0.46, "grad_norm": 0.9410155657055277, "learning_rate": 1.1702153728516979e-06, "loss": 0.6247, "step": 3628 }, { "epoch": 0.46, "grad_norm": 0.7067631055995084, "learning_rate": 1.169808718802801e-06, "loss": 0.5774, "step": 3629 }, { "epoch": 0.46, "grad_norm": 0.7034274838749379, "learning_rate": 1.169402035837248e-06, "loss": 0.5672, "step": 3630 }, { "epoch": 0.46, "grad_norm": 0.7080389290734543, "learning_rate": 1.1689953240242933e-06, "loss": 0.5493, "step": 3631 }, { "epoch": 0.46, "grad_norm": 0.6269167055961934, "learning_rate": 1.1685885834331954e-06, "loss": 0.5205, "step": 3632 }, { "epoch": 0.46, "grad_norm": 0.7170609024089468, "learning_rate": 1.1681818141332184e-06, "loss": 0.5306, "step": 3633 }, { "epoch": 0.46, "grad_norm": 0.6947624802037049, "learning_rate": 1.1677750161936303e-06, "loss": 0.5197, "step": 3634 }, { "epoch": 0.46, "grad_norm": 0.5798515003428385, "learning_rate": 1.1673681896837048e-06, "loss": 0.5094, "step": 3635 }, { "epoch": 0.46, "grad_norm": 1.0250640243396019, "learning_rate": 1.1669613346727205e-06, "loss": 0.6112, "step": 3636 }, { "epoch": 0.46, "grad_norm": 0.7634876935829814, "learning_rate": 1.1665544512299602e-06, "loss": 0.5253, "step": 3637 }, { "epoch": 0.46, "grad_norm": 0.6861988645706409, "learning_rate": 1.166147539424712e-06, "loss": 0.5492, "step": 3638 }, { "epoch": 0.46, "grad_norm": 0.8551424666892626, "learning_rate": 1.1657405993262685e-06, "loss": 0.6528, "step": 3639 }, { "epoch": 0.46, "grad_norm": 0.7983300943076339, "learning_rate": 1.165333631003928e-06, "loss": 0.5504, "step": 3640 }, { "epoch": 0.46, "grad_norm": 0.7629837307304309, "learning_rate": 1.1649266345269925e-06, "loss": 0.5339, "step": 3641 }, { "epoch": 0.46, "grad_norm": 0.9712587407829955, "learning_rate": 1.164519609964769e-06, "loss": 0.6982, "step": 3642 }, { "epoch": 0.46, "grad_norm": 0.8637557019882236, "learning_rate": 1.16411255738657e-06, "loss": 0.6335, "step": 3643 }, { "epoch": 0.46, "grad_norm": 0.9013452188179716, "learning_rate": 1.1637054768617126e-06, "loss": 0.6154, "step": 3644 }, { "epoch": 0.46, "grad_norm": 0.8348139784766735, "learning_rate": 1.1632983684595172e-06, "loss": 0.5824, "step": 3645 }, { "epoch": 0.46, "grad_norm": 1.0048097682164636, "learning_rate": 1.1628912322493113e-06, "loss": 0.6535, "step": 3646 }, { "epoch": 0.46, "grad_norm": 0.8707463514413742, "learning_rate": 1.1624840683004255e-06, "loss": 0.6079, "step": 3647 }, { "epoch": 0.46, "grad_norm": 0.7311340909923244, "learning_rate": 1.1620768766821957e-06, "loss": 0.5596, "step": 3648 }, { "epoch": 0.46, "grad_norm": 0.8895525895175532, "learning_rate": 1.1616696574639625e-06, "loss": 0.5585, "step": 3649 }, { "epoch": 0.46, "grad_norm": 1.0816762534577864, "learning_rate": 1.1612624107150705e-06, "loss": 0.5964, "step": 3650 }, { "epoch": 0.47, "grad_norm": 0.7253532672108746, "learning_rate": 1.1608551365048702e-06, "loss": 0.583, "step": 3651 }, { "epoch": 0.47, "grad_norm": 1.2551727308539335, "learning_rate": 1.1604478349027159e-06, "loss": 0.6498, "step": 3652 }, { "epoch": 0.47, "grad_norm": 0.8910985828614785, "learning_rate": 1.1600405059779669e-06, "loss": 0.657, "step": 3653 }, { "epoch": 0.47, "grad_norm": 0.777946976312628, "learning_rate": 1.1596331497999868e-06, "loss": 0.5381, "step": 3654 }, { "epoch": 0.47, "grad_norm": 0.7098297271464186, "learning_rate": 1.1592257664381444e-06, "loss": 0.5525, "step": 3655 }, { "epoch": 0.47, "grad_norm": 1.0725023480542142, "learning_rate": 1.158818355961813e-06, "loss": 0.6458, "step": 3656 }, { "epoch": 0.47, "grad_norm": 0.8757536584562327, "learning_rate": 1.1584109184403699e-06, "loss": 0.6191, "step": 3657 }, { "epoch": 0.47, "grad_norm": 0.9470950137264245, "learning_rate": 1.1580034539431975e-06, "loss": 0.6043, "step": 3658 }, { "epoch": 0.47, "grad_norm": 0.843629809137184, "learning_rate": 1.1575959625396833e-06, "loss": 0.6378, "step": 3659 }, { "epoch": 0.47, "grad_norm": 0.9293213466033156, "learning_rate": 1.1571884442992185e-06, "loss": 0.6646, "step": 3660 }, { "epoch": 0.47, "grad_norm": 0.9115810132416571, "learning_rate": 1.156780899291199e-06, "loss": 0.6559, "step": 3661 }, { "epoch": 0.47, "grad_norm": 0.6705874144665556, "learning_rate": 1.1563733275850257e-06, "loss": 0.5367, "step": 3662 }, { "epoch": 0.47, "grad_norm": 0.8697428400624346, "learning_rate": 1.1559657292501042e-06, "loss": 0.6489, "step": 3663 }, { "epoch": 0.47, "grad_norm": 0.7948070151430111, "learning_rate": 1.1555581043558435e-06, "loss": 0.6551, "step": 3664 }, { "epoch": 0.47, "grad_norm": 0.6117085911461957, "learning_rate": 1.1551504529716582e-06, "loss": 0.5378, "step": 3665 }, { "epoch": 0.47, "grad_norm": 0.765917003819845, "learning_rate": 1.1547427751669673e-06, "loss": 0.6041, "step": 3666 }, { "epoch": 0.47, "grad_norm": 0.700041290568281, "learning_rate": 1.154335071011194e-06, "loss": 0.617, "step": 3667 }, { "epoch": 0.47, "grad_norm": 0.6392624973599721, "learning_rate": 1.1539273405737657e-06, "loss": 0.5471, "step": 3668 }, { "epoch": 0.47, "grad_norm": 0.8544402601481563, "learning_rate": 1.153519583924115e-06, "loss": 0.612, "step": 3669 }, { "epoch": 0.47, "grad_norm": 0.8223919630677149, "learning_rate": 1.1531118011316784e-06, "loss": 0.6013, "step": 3670 }, { "epoch": 0.47, "grad_norm": 0.8508279096714498, "learning_rate": 1.1527039922658973e-06, "loss": 0.6388, "step": 3671 }, { "epoch": 0.47, "grad_norm": 0.8950839673167871, "learning_rate": 1.1522961573962171e-06, "loss": 0.5651, "step": 3672 }, { "epoch": 0.47, "grad_norm": 0.9189110118822066, "learning_rate": 1.151888296592088e-06, "loss": 0.6199, "step": 3673 }, { "epoch": 0.47, "grad_norm": 0.7508253015712515, "learning_rate": 1.151480409922964e-06, "loss": 0.4707, "step": 3674 }, { "epoch": 0.47, "grad_norm": 0.9090710908928314, "learning_rate": 1.151072497458305e-06, "loss": 0.6338, "step": 3675 }, { "epoch": 0.47, "grad_norm": 0.8621805441555135, "learning_rate": 1.1506645592675728e-06, "loss": 0.5627, "step": 3676 }, { "epoch": 0.47, "grad_norm": 0.9268394408692671, "learning_rate": 1.1502565954202361e-06, "loss": 0.5843, "step": 3677 }, { "epoch": 0.47, "grad_norm": 0.7515184236025934, "learning_rate": 1.1498486059857666e-06, "loss": 0.537, "step": 3678 }, { "epoch": 0.47, "grad_norm": 0.6477237371280447, "learning_rate": 1.1494405910336407e-06, "loss": 0.556, "step": 3679 }, { "epoch": 0.47, "grad_norm": 0.8396348207200406, "learning_rate": 1.1490325506333389e-06, "loss": 0.5993, "step": 3680 }, { "epoch": 0.47, "grad_norm": 0.6943649408965813, "learning_rate": 1.1486244848543462e-06, "loss": 0.5007, "step": 3681 }, { "epoch": 0.47, "grad_norm": 0.6393568502865359, "learning_rate": 1.1482163937661522e-06, "loss": 0.5417, "step": 3682 }, { "epoch": 0.47, "grad_norm": 0.8266632755864813, "learning_rate": 1.1478082774382505e-06, "loss": 0.6441, "step": 3683 }, { "epoch": 0.47, "grad_norm": 0.6551815974005002, "learning_rate": 1.147400135940139e-06, "loss": 0.5375, "step": 3684 }, { "epoch": 0.47, "grad_norm": 0.8912795042690866, "learning_rate": 1.14699196934132e-06, "loss": 0.6012, "step": 3685 }, { "epoch": 0.47, "grad_norm": 0.8241691594437537, "learning_rate": 1.1465837777112998e-06, "loss": 0.6245, "step": 3686 }, { "epoch": 0.47, "grad_norm": 0.725901301920027, "learning_rate": 1.1461755611195893e-06, "loss": 0.5508, "step": 3687 }, { "epoch": 0.47, "grad_norm": 0.6755848514912907, "learning_rate": 1.1457673196357038e-06, "loss": 0.5524, "step": 3688 }, { "epoch": 0.47, "grad_norm": 0.8828134442489497, "learning_rate": 1.1453590533291624e-06, "loss": 0.5888, "step": 3689 }, { "epoch": 0.47, "grad_norm": 0.9128717908805856, "learning_rate": 1.1449507622694884e-06, "loss": 0.6043, "step": 3690 }, { "epoch": 0.47, "grad_norm": 0.8866033268773514, "learning_rate": 1.1445424465262097e-06, "loss": 0.6216, "step": 3691 }, { "epoch": 0.47, "grad_norm": 0.8879525934127555, "learning_rate": 1.1441341061688584e-06, "loss": 0.6366, "step": 3692 }, { "epoch": 0.47, "grad_norm": 0.5915697097182501, "learning_rate": 1.14372574126697e-06, "loss": 0.4858, "step": 3693 }, { "epoch": 0.47, "grad_norm": 0.6991436830838438, "learning_rate": 1.1433173518900854e-06, "loss": 0.5721, "step": 3694 }, { "epoch": 0.47, "grad_norm": 0.9151881948612605, "learning_rate": 1.1429089381077489e-06, "loss": 0.6026, "step": 3695 }, { "epoch": 0.47, "grad_norm": 0.8006592540436487, "learning_rate": 1.1425004999895088e-06, "loss": 0.5129, "step": 3696 }, { "epoch": 0.47, "grad_norm": 0.7275436935940939, "learning_rate": 1.1420920376049186e-06, "loss": 0.485, "step": 3697 }, { "epoch": 0.47, "grad_norm": 0.7053797849964247, "learning_rate": 1.1416835510235343e-06, "loss": 0.5718, "step": 3698 }, { "epoch": 0.47, "grad_norm": 0.9048433002689582, "learning_rate": 1.1412750403149174e-06, "loss": 0.5784, "step": 3699 }, { "epoch": 0.47, "grad_norm": 0.9967803378225267, "learning_rate": 1.140866505548633e-06, "loss": 0.6206, "step": 3700 }, { "epoch": 0.47, "grad_norm": 1.3082645798244654, "learning_rate": 1.14045794679425e-06, "loss": 0.6834, "step": 3701 }, { "epoch": 0.47, "grad_norm": 0.8582512126729813, "learning_rate": 1.140049364121342e-06, "loss": 0.638, "step": 3702 }, { "epoch": 0.47, "grad_norm": 0.6361548545182607, "learning_rate": 1.1396407575994867e-06, "loss": 0.4675, "step": 3703 }, { "epoch": 0.47, "grad_norm": 0.8410226784040911, "learning_rate": 1.1392321272982649e-06, "loss": 0.6167, "step": 3704 }, { "epoch": 0.47, "grad_norm": 0.6418828525016536, "learning_rate": 1.138823473287262e-06, "loss": 0.5871, "step": 3705 }, { "epoch": 0.47, "grad_norm": 0.9008430286223994, "learning_rate": 1.1384147956360683e-06, "loss": 0.6289, "step": 3706 }, { "epoch": 0.47, "grad_norm": 0.676416370227893, "learning_rate": 1.1380060944142769e-06, "loss": 0.534, "step": 3707 }, { "epoch": 0.47, "grad_norm": 0.6988483285968501, "learning_rate": 1.137597369691485e-06, "loss": 0.5188, "step": 3708 }, { "epoch": 0.47, "grad_norm": 0.9025668718192182, "learning_rate": 1.137188621537295e-06, "loss": 0.618, "step": 3709 }, { "epoch": 0.47, "grad_norm": 0.7008834273975737, "learning_rate": 1.1367798500213118e-06, "loss": 0.5541, "step": 3710 }, { "epoch": 0.47, "grad_norm": 0.8540132983700257, "learning_rate": 1.136371055213145e-06, "loss": 0.6122, "step": 3711 }, { "epoch": 0.47, "grad_norm": 0.7464078803435495, "learning_rate": 1.135962237182408e-06, "loss": 0.573, "step": 3712 }, { "epoch": 0.47, "grad_norm": 0.6801124148173194, "learning_rate": 1.1355533959987186e-06, "loss": 0.5426, "step": 3713 }, { "epoch": 0.47, "grad_norm": 0.7818158282964908, "learning_rate": 1.1351445317316982e-06, "loss": 0.637, "step": 3714 }, { "epoch": 0.47, "grad_norm": 0.9673976542536208, "learning_rate": 1.134735644450972e-06, "loss": 0.6118, "step": 3715 }, { "epoch": 0.47, "grad_norm": 0.995751027227567, "learning_rate": 1.1343267342261689e-06, "loss": 0.6628, "step": 3716 }, { "epoch": 0.47, "grad_norm": 0.9881288452763789, "learning_rate": 1.1339178011269223e-06, "loss": 0.6063, "step": 3717 }, { "epoch": 0.47, "grad_norm": 0.6767177842201771, "learning_rate": 1.1335088452228699e-06, "loss": 0.5539, "step": 3718 }, { "epoch": 0.47, "grad_norm": 1.7183936169016847, "learning_rate": 1.1330998665836516e-06, "loss": 0.6567, "step": 3719 }, { "epoch": 0.47, "grad_norm": 0.6289116546719111, "learning_rate": 1.1326908652789127e-06, "loss": 0.5115, "step": 3720 }, { "epoch": 0.47, "grad_norm": 0.9004751858649493, "learning_rate": 1.1322818413783019e-06, "loss": 0.6551, "step": 3721 }, { "epoch": 0.47, "grad_norm": 0.7868643488220931, "learning_rate": 1.1318727949514718e-06, "loss": 0.5665, "step": 3722 }, { "epoch": 0.47, "grad_norm": 0.7065282969122566, "learning_rate": 1.1314637260680782e-06, "loss": 0.5059, "step": 3723 }, { "epoch": 0.47, "grad_norm": 0.7397267666147376, "learning_rate": 1.1310546347977818e-06, "loss": 0.5461, "step": 3724 }, { "epoch": 0.47, "grad_norm": 0.8862590294791912, "learning_rate": 1.1306455212102464e-06, "loss": 0.5655, "step": 3725 }, { "epoch": 0.47, "grad_norm": 0.7033322582630152, "learning_rate": 1.1302363853751398e-06, "loss": 0.5638, "step": 3726 }, { "epoch": 0.47, "grad_norm": 0.8339013442877581, "learning_rate": 1.1298272273621336e-06, "loss": 0.6519, "step": 3727 }, { "epoch": 0.47, "grad_norm": 0.9169636846092699, "learning_rate": 1.1294180472409032e-06, "loss": 0.6548, "step": 3728 }, { "epoch": 0.48, "grad_norm": 0.8498628592305902, "learning_rate": 1.1290088450811275e-06, "loss": 0.6347, "step": 3729 }, { "epoch": 0.48, "grad_norm": 0.6597006717997809, "learning_rate": 1.1285996209524894e-06, "loss": 0.5115, "step": 3730 }, { "epoch": 0.48, "grad_norm": 0.6359356228337353, "learning_rate": 1.1281903749246755e-06, "loss": 0.5767, "step": 3731 }, { "epoch": 0.48, "grad_norm": 0.9535178063760222, "learning_rate": 1.1277811070673764e-06, "loss": 0.6414, "step": 3732 }, { "epoch": 0.48, "grad_norm": 0.8990969220219319, "learning_rate": 1.127371817450286e-06, "loss": 0.6207, "step": 3733 }, { "epoch": 0.48, "grad_norm": 1.0071475212603687, "learning_rate": 1.126962506143102e-06, "loss": 0.6912, "step": 3734 }, { "epoch": 0.48, "grad_norm": 0.6583596770126955, "learning_rate": 1.1265531732155258e-06, "loss": 0.5315, "step": 3735 }, { "epoch": 0.48, "grad_norm": 0.8863981748527064, "learning_rate": 1.1261438187372628e-06, "loss": 0.6536, "step": 3736 }, { "epoch": 0.48, "grad_norm": 1.0963785519983609, "learning_rate": 1.1257344427780214e-06, "loss": 0.6762, "step": 3737 }, { "epoch": 0.48, "grad_norm": 0.7733203820016387, "learning_rate": 1.1253250454075147e-06, "loss": 0.534, "step": 3738 }, { "epoch": 0.48, "grad_norm": 0.8922322902342977, "learning_rate": 1.124915626695458e-06, "loss": 0.6192, "step": 3739 }, { "epoch": 0.48, "grad_norm": 0.6648158056979462, "learning_rate": 1.1245061867115716e-06, "loss": 0.5945, "step": 3740 }, { "epoch": 0.48, "grad_norm": 0.6830001108168379, "learning_rate": 1.124096725525579e-06, "loss": 0.5631, "step": 3741 }, { "epoch": 0.48, "grad_norm": 1.060176801282398, "learning_rate": 1.1236872432072064e-06, "loss": 0.6603, "step": 3742 }, { "epoch": 0.48, "grad_norm": 0.7121320884043028, "learning_rate": 1.1232777398261849e-06, "loss": 0.5468, "step": 3743 }, { "epoch": 0.48, "grad_norm": 0.9011222436166068, "learning_rate": 1.1228682154522483e-06, "loss": 0.6281, "step": 3744 }, { "epoch": 0.48, "grad_norm": 0.6890188999299539, "learning_rate": 1.1224586701551352e-06, "loss": 0.5862, "step": 3745 }, { "epoch": 0.48, "grad_norm": 0.7369567946649698, "learning_rate": 1.1220491040045863e-06, "loss": 0.5816, "step": 3746 }, { "epoch": 0.48, "grad_norm": 0.8188797863087116, "learning_rate": 1.1216395170703464e-06, "loss": 0.5919, "step": 3747 }, { "epoch": 0.48, "grad_norm": 0.8599577500462381, "learning_rate": 1.1212299094221637e-06, "loss": 0.6565, "step": 3748 }, { "epoch": 0.48, "grad_norm": 0.9007165031718727, "learning_rate": 1.120820281129791e-06, "loss": 0.6178, "step": 3749 }, { "epoch": 0.48, "grad_norm": 2.032435497903724, "learning_rate": 1.1204106322629825e-06, "loss": 0.6512, "step": 3750 }, { "epoch": 0.48, "grad_norm": 0.9283412192753755, "learning_rate": 1.1200009628914981e-06, "loss": 0.6182, "step": 3751 }, { "epoch": 0.48, "grad_norm": 0.6968683829086844, "learning_rate": 1.1195912730850998e-06, "loss": 0.5338, "step": 3752 }, { "epoch": 0.48, "grad_norm": 0.9379148262693374, "learning_rate": 1.1191815629135535e-06, "loss": 0.6586, "step": 3753 }, { "epoch": 0.48, "grad_norm": 0.79342834666322, "learning_rate": 1.118771832446629e-06, "loss": 0.6212, "step": 3754 }, { "epoch": 0.48, "grad_norm": 0.8028522756418939, "learning_rate": 1.1183620817540983e-06, "loss": 0.5885, "step": 3755 }, { "epoch": 0.48, "grad_norm": 0.7217049043580395, "learning_rate": 1.1179523109057382e-06, "loss": 0.6321, "step": 3756 }, { "epoch": 0.48, "grad_norm": 0.9594892450996403, "learning_rate": 1.1175425199713288e-06, "loss": 0.6456, "step": 3757 }, { "epoch": 0.48, "grad_norm": 0.79504947455154, "learning_rate": 1.1171327090206524e-06, "loss": 0.5723, "step": 3758 }, { "epoch": 0.48, "grad_norm": 0.867307420014309, "learning_rate": 1.1167228781234962e-06, "loss": 0.6149, "step": 3759 }, { "epoch": 0.48, "grad_norm": 0.6536211299277561, "learning_rate": 1.1163130273496494e-06, "loss": 0.5224, "step": 3760 }, { "epoch": 0.48, "grad_norm": 0.6046069131717426, "learning_rate": 1.1159031567689064e-06, "loss": 0.4821, "step": 3761 }, { "epoch": 0.48, "grad_norm": 0.7578825162793845, "learning_rate": 1.115493266451063e-06, "loss": 0.5329, "step": 3762 }, { "epoch": 0.48, "grad_norm": 0.636194825568033, "learning_rate": 1.1150833564659195e-06, "loss": 0.5063, "step": 3763 }, { "epoch": 0.48, "grad_norm": 0.7856334926588111, "learning_rate": 1.1146734268832792e-06, "loss": 0.5638, "step": 3764 }, { "epoch": 0.48, "grad_norm": 0.8652311349391903, "learning_rate": 1.114263477772949e-06, "loss": 0.5868, "step": 3765 }, { "epoch": 0.48, "grad_norm": 0.8088234401103918, "learning_rate": 1.113853509204739e-06, "loss": 0.6278, "step": 3766 }, { "epoch": 0.48, "grad_norm": 0.7222621717914747, "learning_rate": 1.1134435212484627e-06, "loss": 0.5506, "step": 3767 }, { "epoch": 0.48, "grad_norm": 0.7988033825367326, "learning_rate": 1.1130335139739364e-06, "loss": 0.6317, "step": 3768 }, { "epoch": 0.48, "grad_norm": 1.0223647643000335, "learning_rate": 1.1126234874509802e-06, "loss": 0.7092, "step": 3769 }, { "epoch": 0.48, "grad_norm": 0.7156755330820165, "learning_rate": 1.1122134417494176e-06, "loss": 0.5678, "step": 3770 }, { "epoch": 0.48, "grad_norm": 0.6528438149216544, "learning_rate": 1.1118033769390746e-06, "loss": 0.5734, "step": 3771 }, { "epoch": 0.48, "grad_norm": 0.8173070531450803, "learning_rate": 1.1113932930897815e-06, "loss": 0.607, "step": 3772 }, { "epoch": 0.48, "grad_norm": 0.8632260217466431, "learning_rate": 1.1109831902713708e-06, "loss": 0.631, "step": 3773 }, { "epoch": 0.48, "grad_norm": 1.0617871786752167, "learning_rate": 1.1105730685536791e-06, "loss": 0.6124, "step": 3774 }, { "epoch": 0.48, "grad_norm": 0.6355142294590497, "learning_rate": 1.1101629280065458e-06, "loss": 0.4945, "step": 3775 }, { "epoch": 0.48, "grad_norm": 0.7206036986520717, "learning_rate": 1.1097527686998139e-06, "loss": 0.5421, "step": 3776 }, { "epoch": 0.48, "grad_norm": 0.7392804000610621, "learning_rate": 1.1093425907033285e-06, "loss": 0.5526, "step": 3777 }, { "epoch": 0.48, "grad_norm": 0.7527049604930378, "learning_rate": 1.1089323940869391e-06, "loss": 0.6016, "step": 3778 }, { "epoch": 0.48, "grad_norm": 0.795641520521779, "learning_rate": 1.1085221789204979e-06, "loss": 0.5911, "step": 3779 }, { "epoch": 0.48, "grad_norm": 0.8873159662634362, "learning_rate": 1.10811194527386e-06, "loss": 0.6565, "step": 3780 }, { "epoch": 0.48, "grad_norm": 0.8584678805936323, "learning_rate": 1.1077016932168844e-06, "loss": 0.6536, "step": 3781 }, { "epoch": 0.48, "grad_norm": 0.746276777271781, "learning_rate": 1.1072914228194324e-06, "loss": 0.5504, "step": 3782 }, { "epoch": 0.48, "grad_norm": 0.824509842356158, "learning_rate": 1.106881134151369e-06, "loss": 0.5948, "step": 3783 }, { "epoch": 0.48, "grad_norm": 1.0532894836327467, "learning_rate": 1.1064708272825616e-06, "loss": 0.6042, "step": 3784 }, { "epoch": 0.48, "grad_norm": 0.9252619387671036, "learning_rate": 1.1060605022828812e-06, "loss": 0.6352, "step": 3785 }, { "epoch": 0.48, "grad_norm": 0.5756753115479418, "learning_rate": 1.1056501592222028e-06, "loss": 0.5102, "step": 3786 }, { "epoch": 0.48, "grad_norm": 0.6600129052974246, "learning_rate": 1.1052397981704027e-06, "loss": 0.5989, "step": 3787 }, { "epoch": 0.48, "grad_norm": 0.7665406632703332, "learning_rate": 1.1048294191973615e-06, "loss": 0.6081, "step": 3788 }, { "epoch": 0.48, "grad_norm": 0.7065913862031409, "learning_rate": 1.1044190223729623e-06, "loss": 0.5694, "step": 3789 }, { "epoch": 0.48, "grad_norm": 0.8372081957596091, "learning_rate": 1.1040086077670914e-06, "loss": 0.6313, "step": 3790 }, { "epoch": 0.48, "grad_norm": 0.7802504736342917, "learning_rate": 1.103598175449638e-06, "loss": 0.6344, "step": 3791 }, { "epoch": 0.48, "grad_norm": 0.8241439835305071, "learning_rate": 1.1031877254904949e-06, "loss": 0.5173, "step": 3792 }, { "epoch": 0.48, "grad_norm": 0.7936629514530036, "learning_rate": 1.1027772579595569e-06, "loss": 0.5726, "step": 3793 }, { "epoch": 0.48, "grad_norm": 0.6707457628203407, "learning_rate": 1.1023667729267228e-06, "loss": 0.5439, "step": 3794 }, { "epoch": 0.48, "grad_norm": 0.6737119798223995, "learning_rate": 1.1019562704618934e-06, "loss": 0.5663, "step": 3795 }, { "epoch": 0.48, "grad_norm": 1.026687685928366, "learning_rate": 1.1015457506349735e-06, "loss": 0.688, "step": 3796 }, { "epoch": 0.48, "grad_norm": 0.6902037707638047, "learning_rate": 1.1011352135158702e-06, "loss": 0.5388, "step": 3797 }, { "epoch": 0.48, "grad_norm": 0.6896284640902063, "learning_rate": 1.1007246591744933e-06, "loss": 0.5395, "step": 3798 }, { "epoch": 0.48, "grad_norm": 0.6760964269128757, "learning_rate": 1.1003140876807568e-06, "loss": 0.5612, "step": 3799 }, { "epoch": 0.48, "grad_norm": 0.8592723960805339, "learning_rate": 1.0999034991045759e-06, "loss": 0.5917, "step": 3800 }, { "epoch": 0.48, "grad_norm": 0.6579217247288897, "learning_rate": 1.0994928935158701e-06, "loss": 0.5029, "step": 3801 }, { "epoch": 0.48, "grad_norm": 1.1605091520723925, "learning_rate": 1.099082270984561e-06, "loss": 0.6257, "step": 3802 }, { "epoch": 0.48, "grad_norm": 0.7335910140892303, "learning_rate": 1.0986716315805734e-06, "loss": 0.5657, "step": 3803 }, { "epoch": 0.48, "grad_norm": 0.7177762446051024, "learning_rate": 1.0982609753738347e-06, "loss": 0.5271, "step": 3804 }, { "epoch": 0.48, "grad_norm": 0.6701885208288325, "learning_rate": 1.0978503024342757e-06, "loss": 0.5778, "step": 3805 }, { "epoch": 0.48, "grad_norm": 0.6528951719816746, "learning_rate": 1.0974396128318297e-06, "loss": 0.506, "step": 3806 }, { "epoch": 0.48, "grad_norm": 0.7807931138521967, "learning_rate": 1.0970289066364327e-06, "loss": 0.5959, "step": 3807 }, { "epoch": 0.49, "grad_norm": 0.7991688314550837, "learning_rate": 1.0966181839180238e-06, "loss": 0.6082, "step": 3808 }, { "epoch": 0.49, "grad_norm": 0.8395158315412257, "learning_rate": 1.0962074447465447e-06, "loss": 0.5399, "step": 3809 }, { "epoch": 0.49, "grad_norm": 0.865385124994614, "learning_rate": 1.09579668919194e-06, "loss": 0.6287, "step": 3810 }, { "epoch": 0.49, "grad_norm": 0.8194872326607046, "learning_rate": 1.0953859173241572e-06, "loss": 0.6143, "step": 3811 }, { "epoch": 0.49, "grad_norm": 0.892921925064196, "learning_rate": 1.0949751292131459e-06, "loss": 0.6113, "step": 3812 }, { "epoch": 0.49, "grad_norm": 0.6671846647685193, "learning_rate": 1.0945643249288602e-06, "loss": 0.5772, "step": 3813 }, { "epoch": 0.49, "grad_norm": 0.9540490829202073, "learning_rate": 1.0941535045412546e-06, "loss": 0.6188, "step": 3814 }, { "epoch": 0.49, "grad_norm": 0.660394261701563, "learning_rate": 1.0937426681202882e-06, "loss": 0.5444, "step": 3815 }, { "epoch": 0.49, "grad_norm": 0.7728879872997366, "learning_rate": 1.0933318157359222e-06, "loss": 0.5977, "step": 3816 }, { "epoch": 0.49, "grad_norm": 1.1626768208170344, "learning_rate": 1.0929209474581203e-06, "loss": 0.6156, "step": 3817 }, { "epoch": 0.49, "grad_norm": 0.8493130879720228, "learning_rate": 1.0925100633568492e-06, "loss": 0.6057, "step": 3818 }, { "epoch": 0.49, "grad_norm": 0.8088094553452906, "learning_rate": 1.0920991635020779e-06, "loss": 0.5864, "step": 3819 }, { "epoch": 0.49, "grad_norm": 0.7842648992513092, "learning_rate": 1.0916882479637785e-06, "loss": 0.6019, "step": 3820 }, { "epoch": 0.49, "grad_norm": 0.841139903055992, "learning_rate": 1.0912773168119259e-06, "loss": 0.6242, "step": 3821 }, { "epoch": 0.49, "grad_norm": 0.7308709317388855, "learning_rate": 1.0908663701164969e-06, "loss": 0.5397, "step": 3822 }, { "epoch": 0.49, "grad_norm": 0.6211828800090295, "learning_rate": 1.0904554079474719e-06, "loss": 0.492, "step": 3823 }, { "epoch": 0.49, "grad_norm": 0.7938120391719792, "learning_rate": 1.090044430374833e-06, "loss": 0.6355, "step": 3824 }, { "epoch": 0.49, "grad_norm": 0.9829076867715043, "learning_rate": 1.089633437468566e-06, "loss": 0.6416, "step": 3825 }, { "epoch": 0.49, "grad_norm": 1.2232518176506675, "learning_rate": 1.0892224292986583e-06, "loss": 0.6335, "step": 3826 }, { "epoch": 0.49, "grad_norm": 1.8599576998217255, "learning_rate": 1.0888114059351003e-06, "loss": 0.5989, "step": 3827 }, { "epoch": 0.49, "grad_norm": 0.9150294669012223, "learning_rate": 1.0884003674478852e-06, "loss": 0.6648, "step": 3828 }, { "epoch": 0.49, "grad_norm": 0.6182961202250848, "learning_rate": 1.0879893139070085e-06, "loss": 0.5696, "step": 3829 }, { "epoch": 0.49, "grad_norm": 0.8014491375053396, "learning_rate": 1.0875782453824681e-06, "loss": 0.5861, "step": 3830 }, { "epoch": 0.49, "grad_norm": 0.6952491829345054, "learning_rate": 1.087167161944265e-06, "loss": 0.5799, "step": 3831 }, { "epoch": 0.49, "grad_norm": 0.8085927162660927, "learning_rate": 1.0867560636624025e-06, "loss": 0.5638, "step": 3832 }, { "epoch": 0.49, "grad_norm": 0.7054277513052164, "learning_rate": 1.086344950606886e-06, "loss": 0.617, "step": 3833 }, { "epoch": 0.49, "grad_norm": 0.8580699861278577, "learning_rate": 1.0859338228477243e-06, "loss": 0.6003, "step": 3834 }, { "epoch": 0.49, "grad_norm": 1.0033146581137338, "learning_rate": 1.0855226804549273e-06, "loss": 0.6182, "step": 3835 }, { "epoch": 0.49, "grad_norm": 0.9612631871360799, "learning_rate": 1.0851115234985091e-06, "loss": 0.6445, "step": 3836 }, { "epoch": 0.49, "grad_norm": 0.8540633835722822, "learning_rate": 1.084700352048485e-06, "loss": 0.6564, "step": 3837 }, { "epoch": 0.49, "grad_norm": 0.8199905602636428, "learning_rate": 1.0842891661748734e-06, "loss": 0.6231, "step": 3838 }, { "epoch": 0.49, "grad_norm": 0.7222605964180555, "learning_rate": 1.0838779659476949e-06, "loss": 0.5398, "step": 3839 }, { "epoch": 0.49, "grad_norm": 1.035798317002971, "learning_rate": 1.0834667514369727e-06, "loss": 0.605, "step": 3840 }, { "epoch": 0.49, "grad_norm": 0.7897088010826849, "learning_rate": 1.0830555227127321e-06, "loss": 0.5774, "step": 3841 }, { "epoch": 0.49, "grad_norm": 0.7246090443171848, "learning_rate": 1.0826442798450014e-06, "loss": 0.5344, "step": 3842 }, { "epoch": 0.49, "grad_norm": 1.130234961522196, "learning_rate": 1.0822330229038108e-06, "loss": 0.6231, "step": 3843 }, { "epoch": 0.49, "grad_norm": 0.7564057590220903, "learning_rate": 1.081821751959193e-06, "loss": 0.5563, "step": 3844 }, { "epoch": 0.49, "grad_norm": 0.8510239387676737, "learning_rate": 1.0814104670811833e-06, "loss": 0.6283, "step": 3845 }, { "epoch": 0.49, "grad_norm": 0.914314979152228, "learning_rate": 1.0809991683398195e-06, "loss": 0.6649, "step": 3846 }, { "epoch": 0.49, "grad_norm": 0.8560634215890172, "learning_rate": 1.080587855805141e-06, "loss": 0.574, "step": 3847 }, { "epoch": 0.49, "grad_norm": 0.664576971323027, "learning_rate": 1.0801765295471898e-06, "loss": 0.4649, "step": 3848 }, { "epoch": 0.49, "grad_norm": 0.8119712580438827, "learning_rate": 1.0797651896360115e-06, "loss": 0.5963, "step": 3849 }, { "epoch": 0.49, "grad_norm": 0.6905304309678921, "learning_rate": 1.079353836141652e-06, "loss": 0.5271, "step": 3850 }, { "epoch": 0.49, "grad_norm": 0.689858010676965, "learning_rate": 1.078942469134161e-06, "loss": 0.5499, "step": 3851 }, { "epoch": 0.49, "grad_norm": 0.8511031963719486, "learning_rate": 1.07853108868359e-06, "loss": 0.6045, "step": 3852 }, { "epoch": 0.49, "grad_norm": 0.5933672823661634, "learning_rate": 1.0781196948599927e-06, "loss": 0.5446, "step": 3853 }, { "epoch": 0.49, "grad_norm": 0.650819812780896, "learning_rate": 1.0777082877334248e-06, "loss": 0.5657, "step": 3854 }, { "epoch": 0.49, "grad_norm": 0.8688002130109596, "learning_rate": 1.0772968673739456e-06, "loss": 0.6502, "step": 3855 }, { "epoch": 0.49, "grad_norm": 0.5891920618562846, "learning_rate": 1.076885433851615e-06, "loss": 0.5524, "step": 3856 }, { "epoch": 0.49, "grad_norm": 0.7119758778190097, "learning_rate": 1.076473987236496e-06, "loss": 0.5874, "step": 3857 }, { "epoch": 0.49, "grad_norm": 0.6809975208555901, "learning_rate": 1.0760625275986537e-06, "loss": 0.5411, "step": 3858 }, { "epoch": 0.49, "grad_norm": 0.6578472864380163, "learning_rate": 1.0756510550081552e-06, "loss": 0.5142, "step": 3859 }, { "epoch": 0.49, "grad_norm": 0.6672513708727161, "learning_rate": 1.0752395695350705e-06, "loss": 0.6074, "step": 3860 }, { "epoch": 0.49, "grad_norm": 0.6913751395916042, "learning_rate": 1.0748280712494706e-06, "loss": 0.491, "step": 3861 }, { "epoch": 0.49, "grad_norm": 0.8604718985823631, "learning_rate": 1.0744165602214298e-06, "loss": 0.5996, "step": 3862 }, { "epoch": 0.49, "grad_norm": 0.8724895646075216, "learning_rate": 1.0740050365210242e-06, "loss": 0.6199, "step": 3863 }, { "epoch": 0.49, "grad_norm": 0.7442813208318847, "learning_rate": 1.0735935002183318e-06, "loss": 0.5772, "step": 3864 }, { "epoch": 0.49, "grad_norm": 0.6709914225219399, "learning_rate": 1.0731819513834328e-06, "loss": 0.4842, "step": 3865 }, { "epoch": 0.49, "grad_norm": 0.7523437199230651, "learning_rate": 1.0727703900864101e-06, "loss": 0.5764, "step": 3866 }, { "epoch": 0.49, "grad_norm": 0.9526097681622248, "learning_rate": 1.072358816397348e-06, "loss": 0.648, "step": 3867 }, { "epoch": 0.49, "grad_norm": 0.7030348455236145, "learning_rate": 1.0719472303863335e-06, "loss": 0.5317, "step": 3868 }, { "epoch": 0.49, "grad_norm": 0.740501434365152, "learning_rate": 1.071535632123455e-06, "loss": 0.5571, "step": 3869 }, { "epoch": 0.49, "grad_norm": 1.033093203075489, "learning_rate": 1.0711240216788035e-06, "loss": 0.637, "step": 3870 }, { "epoch": 0.49, "grad_norm": 0.880591638091897, "learning_rate": 1.0707123991224722e-06, "loss": 0.683, "step": 3871 }, { "epoch": 0.49, "grad_norm": 0.8529215121011173, "learning_rate": 1.070300764524556e-06, "loss": 0.6485, "step": 3872 }, { "epoch": 0.49, "grad_norm": 0.8631923330379587, "learning_rate": 1.0698891179551522e-06, "loss": 0.6825, "step": 3873 }, { "epoch": 0.49, "grad_norm": 0.6342530938842654, "learning_rate": 1.0694774594843594e-06, "loss": 0.5796, "step": 3874 }, { "epoch": 0.49, "grad_norm": 0.7491222660612251, "learning_rate": 1.0690657891822791e-06, "loss": 0.5693, "step": 3875 }, { "epoch": 0.49, "grad_norm": 0.825898420056976, "learning_rate": 1.0686541071190148e-06, "loss": 0.5906, "step": 3876 }, { "epoch": 0.49, "grad_norm": 0.8097381829024631, "learning_rate": 1.068242413364671e-06, "loss": 0.5695, "step": 3877 }, { "epoch": 0.49, "grad_norm": 0.6421327163809314, "learning_rate": 1.067830707989355e-06, "loss": 0.5558, "step": 3878 }, { "epoch": 0.49, "grad_norm": 0.6985690310693011, "learning_rate": 1.0674189910631763e-06, "loss": 0.5093, "step": 3879 }, { "epoch": 0.49, "grad_norm": 0.7497215227335424, "learning_rate": 1.0670072626562458e-06, "loss": 0.6437, "step": 3880 }, { "epoch": 0.49, "grad_norm": 0.8081997138787704, "learning_rate": 1.0665955228386765e-06, "loss": 0.6681, "step": 3881 }, { "epoch": 0.49, "grad_norm": 0.9434005408799359, "learning_rate": 1.0661837716805835e-06, "loss": 0.648, "step": 3882 }, { "epoch": 0.49, "grad_norm": 0.5787022719610794, "learning_rate": 1.0657720092520834e-06, "loss": 0.5391, "step": 3883 }, { "epoch": 0.49, "grad_norm": 0.5873612957661662, "learning_rate": 1.0653602356232956e-06, "loss": 0.5553, "step": 3884 }, { "epoch": 0.49, "grad_norm": 0.7853388611484223, "learning_rate": 1.0649484508643405e-06, "loss": 0.6244, "step": 3885 }, { "epoch": 0.5, "grad_norm": 0.7848610768613511, "learning_rate": 1.064536655045341e-06, "loss": 0.5665, "step": 3886 }, { "epoch": 0.5, "grad_norm": 0.8953459996706609, "learning_rate": 1.0641248482364213e-06, "loss": 0.5958, "step": 3887 }, { "epoch": 0.5, "grad_norm": 0.8676432916916758, "learning_rate": 1.063713030507708e-06, "loss": 0.5839, "step": 3888 }, { "epoch": 0.5, "grad_norm": 0.877460098847511, "learning_rate": 1.0633012019293295e-06, "loss": 0.6756, "step": 3889 }, { "epoch": 0.5, "grad_norm": 0.7433011992374953, "learning_rate": 1.0628893625714156e-06, "loss": 0.5749, "step": 3890 }, { "epoch": 0.5, "grad_norm": 0.7096798235459288, "learning_rate": 1.0624775125040985e-06, "loss": 0.5083, "step": 3891 }, { "epoch": 0.5, "grad_norm": 0.8443575004382138, "learning_rate": 1.062065651797512e-06, "loss": 0.6717, "step": 3892 }, { "epoch": 0.5, "grad_norm": 0.6452954591097366, "learning_rate": 1.0616537805217915e-06, "loss": 0.5538, "step": 3893 }, { "epoch": 0.5, "grad_norm": 0.8031244017548903, "learning_rate": 1.0612418987470746e-06, "loss": 0.6132, "step": 3894 }, { "epoch": 0.5, "grad_norm": 0.8697034853078133, "learning_rate": 1.0608300065435003e-06, "loss": 0.5416, "step": 3895 }, { "epoch": 0.5, "grad_norm": 0.6714619420844369, "learning_rate": 1.0604181039812095e-06, "loss": 0.5639, "step": 3896 }, { "epoch": 0.5, "grad_norm": 0.7370519725970414, "learning_rate": 1.0600061911303452e-06, "loss": 0.6182, "step": 3897 }, { "epoch": 0.5, "grad_norm": 0.6596369833655409, "learning_rate": 1.0595942680610515e-06, "loss": 0.5131, "step": 3898 }, { "epoch": 0.5, "grad_norm": 0.9500943696770713, "learning_rate": 1.0591823348434748e-06, "loss": 0.6657, "step": 3899 }, { "epoch": 0.5, "grad_norm": 0.8154919743641041, "learning_rate": 1.058770391547763e-06, "loss": 0.5574, "step": 3900 }, { "epoch": 0.5, "grad_norm": 0.967953390206161, "learning_rate": 1.058358438244066e-06, "loss": 0.6574, "step": 3901 }, { "epoch": 0.5, "grad_norm": 0.868098052130379, "learning_rate": 1.057946475002535e-06, "loss": 0.6124, "step": 3902 }, { "epoch": 0.5, "grad_norm": 0.7150064478781477, "learning_rate": 1.0575345018933226e-06, "loss": 0.5605, "step": 3903 }, { "epoch": 0.5, "grad_norm": 0.7058665895310818, "learning_rate": 1.0571225189865841e-06, "loss": 0.5152, "step": 3904 }, { "epoch": 0.5, "grad_norm": 0.786037628469416, "learning_rate": 1.056710526352476e-06, "loss": 0.6119, "step": 3905 }, { "epoch": 0.5, "grad_norm": 0.9175916653397623, "learning_rate": 1.0562985240611557e-06, "loss": 0.5961, "step": 3906 }, { "epoch": 0.5, "grad_norm": 0.9191431536283905, "learning_rate": 1.0558865121827835e-06, "loss": 0.663, "step": 3907 }, { "epoch": 0.5, "grad_norm": 0.7306920205492291, "learning_rate": 1.0554744907875203e-06, "loss": 0.5682, "step": 3908 }, { "epoch": 0.5, "grad_norm": 0.7447064032929676, "learning_rate": 1.0550624599455293e-06, "loss": 0.5806, "step": 3909 }, { "epoch": 0.5, "grad_norm": 0.733320319574239, "learning_rate": 1.054650419726975e-06, "loss": 0.6166, "step": 3910 }, { "epoch": 0.5, "grad_norm": 0.8592750930973613, "learning_rate": 1.0542383702020236e-06, "loss": 0.6186, "step": 3911 }, { "epoch": 0.5, "grad_norm": 0.8238014266103234, "learning_rate": 1.053826311440843e-06, "loss": 0.6188, "step": 3912 }, { "epoch": 0.5, "grad_norm": 0.7570717714256754, "learning_rate": 1.053414243513602e-06, "loss": 0.5647, "step": 3913 }, { "epoch": 0.5, "grad_norm": 1.0139280007419562, "learning_rate": 1.053002166490472e-06, "loss": 0.6609, "step": 3914 }, { "epoch": 0.5, "grad_norm": 0.7054410266181337, "learning_rate": 1.0525900804416253e-06, "loss": 0.4938, "step": 3915 }, { "epoch": 0.5, "grad_norm": 0.9347438682503897, "learning_rate": 1.0521779854372352e-06, "loss": 0.6817, "step": 3916 }, { "epoch": 0.5, "grad_norm": 0.7965423866960685, "learning_rate": 1.0517658815474783e-06, "loss": 0.6266, "step": 3917 }, { "epoch": 0.5, "grad_norm": 0.7398869969764291, "learning_rate": 1.051353768842531e-06, "loss": 0.5385, "step": 3918 }, { "epoch": 0.5, "grad_norm": 0.8850875654796503, "learning_rate": 1.0509416473925718e-06, "loss": 0.634, "step": 3919 }, { "epoch": 0.5, "grad_norm": 0.6114771403782506, "learning_rate": 1.0505295172677808e-06, "loss": 0.5273, "step": 3920 }, { "epoch": 0.5, "grad_norm": 0.8201424147336556, "learning_rate": 1.0501173785383395e-06, "loss": 0.6353, "step": 3921 }, { "epoch": 0.5, "grad_norm": 0.7482803528353174, "learning_rate": 1.0497052312744305e-06, "loss": 0.5051, "step": 3922 }, { "epoch": 0.5, "grad_norm": 0.7458468457169541, "learning_rate": 1.0492930755462389e-06, "loss": 0.5479, "step": 3923 }, { "epoch": 0.5, "grad_norm": 0.6888045191343626, "learning_rate": 1.0488809114239497e-06, "loss": 0.5482, "step": 3924 }, { "epoch": 0.5, "grad_norm": 0.8179030633433859, "learning_rate": 1.0484687389777506e-06, "loss": 0.6072, "step": 3925 }, { "epoch": 0.5, "grad_norm": 0.6732904983991791, "learning_rate": 1.04805655827783e-06, "loss": 0.5396, "step": 3926 }, { "epoch": 0.5, "grad_norm": 0.6163327353073763, "learning_rate": 1.0476443693943784e-06, "loss": 0.4855, "step": 3927 }, { "epoch": 0.5, "grad_norm": 0.7807933292132991, "learning_rate": 1.047232172397587e-06, "loss": 0.5834, "step": 3928 }, { "epoch": 0.5, "grad_norm": 0.8805661562693393, "learning_rate": 1.0468199673576488e-06, "loss": 0.6062, "step": 3929 }, { "epoch": 0.5, "grad_norm": 0.8440207367625445, "learning_rate": 1.0464077543447579e-06, "loss": 0.6368, "step": 3930 }, { "epoch": 0.5, "grad_norm": 0.5943227126422429, "learning_rate": 1.0459955334291097e-06, "loss": 0.5306, "step": 3931 }, { "epoch": 0.5, "grad_norm": 0.7896803954432793, "learning_rate": 1.0455833046809014e-06, "loss": 0.6197, "step": 3932 }, { "epoch": 0.5, "grad_norm": 0.5917989574511008, "learning_rate": 1.0451710681703314e-06, "loss": 0.4784, "step": 3933 }, { "epoch": 0.5, "grad_norm": 0.9302562793942062, "learning_rate": 1.0447588239675992e-06, "loss": 0.6343, "step": 3934 }, { "epoch": 0.5, "grad_norm": 0.7996792728413433, "learning_rate": 1.044346572142905e-06, "loss": 0.6288, "step": 3935 }, { "epoch": 0.5, "grad_norm": 0.6283572343936312, "learning_rate": 1.043934312766452e-06, "loss": 0.5228, "step": 3936 }, { "epoch": 0.5, "grad_norm": 1.1109787567597211, "learning_rate": 1.0435220459084431e-06, "loss": 0.6398, "step": 3937 }, { "epoch": 0.5, "grad_norm": 0.6257724462070454, "learning_rate": 1.0431097716390834e-06, "loss": 0.5035, "step": 3938 }, { "epoch": 0.5, "grad_norm": 1.053415268460185, "learning_rate": 1.0426974900285782e-06, "loss": 0.6144, "step": 3939 }, { "epoch": 0.5, "grad_norm": 0.8213883921432065, "learning_rate": 1.0422852011471357e-06, "loss": 0.5746, "step": 3940 }, { "epoch": 0.5, "grad_norm": 0.902854843586186, "learning_rate": 1.0418729050649638e-06, "loss": 0.6444, "step": 3941 }, { "epoch": 0.5, "grad_norm": 0.8087522392584068, "learning_rate": 1.0414606018522725e-06, "loss": 0.5179, "step": 3942 }, { "epoch": 0.5, "grad_norm": 0.8729807567042316, "learning_rate": 1.0410482915792727e-06, "loss": 0.5912, "step": 3943 }, { "epoch": 0.5, "grad_norm": 0.8474807240135175, "learning_rate": 1.0406359743161763e-06, "loss": 0.6741, "step": 3944 }, { "epoch": 0.5, "grad_norm": 0.9667650959501345, "learning_rate": 1.0402236501331968e-06, "loss": 0.6589, "step": 3945 }, { "epoch": 0.5, "grad_norm": 0.9905821222995113, "learning_rate": 1.0398113191005488e-06, "loss": 0.6148, "step": 3946 }, { "epoch": 0.5, "grad_norm": 0.6928597986572332, "learning_rate": 1.0393989812884481e-06, "loss": 0.5533, "step": 3947 }, { "epoch": 0.5, "grad_norm": 0.8637172684357884, "learning_rate": 1.038986636767111e-06, "loss": 0.6296, "step": 3948 }, { "epoch": 0.5, "grad_norm": 0.6622042634972852, "learning_rate": 1.0385742856067563e-06, "loss": 0.5337, "step": 3949 }, { "epoch": 0.5, "grad_norm": 0.7128074260625501, "learning_rate": 1.038161927877602e-06, "loss": 0.5365, "step": 3950 }, { "epoch": 0.5, "grad_norm": 0.7038837964538542, "learning_rate": 1.0377495636498694e-06, "loss": 0.5547, "step": 3951 }, { "epoch": 0.5, "grad_norm": 0.8381262824610105, "learning_rate": 1.037337192993779e-06, "loss": 0.6466, "step": 3952 }, { "epoch": 0.5, "grad_norm": 0.9398967536161033, "learning_rate": 1.036924815979554e-06, "loss": 0.667, "step": 3953 }, { "epoch": 0.5, "grad_norm": 0.7910159798527919, "learning_rate": 1.0365124326774175e-06, "loss": 0.6295, "step": 3954 }, { "epoch": 0.5, "grad_norm": 0.7323735986017068, "learning_rate": 1.0361000431575938e-06, "loss": 0.5664, "step": 3955 }, { "epoch": 0.5, "grad_norm": 0.7761849050641181, "learning_rate": 1.0356876474903091e-06, "loss": 0.5664, "step": 3956 }, { "epoch": 0.5, "grad_norm": 0.8865195041143753, "learning_rate": 1.0352752457457898e-06, "loss": 0.6375, "step": 3957 }, { "epoch": 0.5, "grad_norm": 0.9427649083299603, "learning_rate": 1.0348628379942636e-06, "loss": 0.6284, "step": 3958 }, { "epoch": 0.5, "grad_norm": 0.7261646227967554, "learning_rate": 1.0344504243059593e-06, "loss": 0.5974, "step": 3959 }, { "epoch": 0.5, "grad_norm": 0.7870359545237403, "learning_rate": 1.0340380047511067e-06, "loss": 0.5265, "step": 3960 }, { "epoch": 0.5, "grad_norm": 0.6049997408918022, "learning_rate": 1.0336255793999364e-06, "loss": 0.5043, "step": 3961 }, { "epoch": 0.5, "grad_norm": 0.8874125526695527, "learning_rate": 1.0332131483226803e-06, "loss": 0.671, "step": 3962 }, { "epoch": 0.5, "grad_norm": 0.8856247810297638, "learning_rate": 1.032800711589571e-06, "loss": 0.6455, "step": 3963 }, { "epoch": 0.51, "grad_norm": 0.6199453692921119, "learning_rate": 1.0323882692708421e-06, "loss": 0.561, "step": 3964 }, { "epoch": 0.51, "grad_norm": 0.7753468811998736, "learning_rate": 1.0319758214367284e-06, "loss": 0.5393, "step": 3965 }, { "epoch": 0.51, "grad_norm": 0.8264809156062267, "learning_rate": 1.0315633681574656e-06, "loss": 0.5653, "step": 3966 }, { "epoch": 0.51, "grad_norm": 0.7300175095099699, "learning_rate": 1.03115090950329e-06, "loss": 0.5652, "step": 3967 }, { "epoch": 0.51, "grad_norm": 0.7835390353198578, "learning_rate": 1.0307384455444388e-06, "loss": 0.6166, "step": 3968 }, { "epoch": 0.51, "grad_norm": 0.6322211766332422, "learning_rate": 1.0303259763511507e-06, "loss": 0.5242, "step": 3969 }, { "epoch": 0.51, "grad_norm": 0.6380481297410306, "learning_rate": 1.029913501993665e-06, "loss": 0.5632, "step": 3970 }, { "epoch": 0.51, "grad_norm": 0.9564291574942357, "learning_rate": 1.029501022542221e-06, "loss": 0.6433, "step": 3971 }, { "epoch": 0.51, "grad_norm": 0.5898622615693613, "learning_rate": 1.0290885380670606e-06, "loss": 0.4788, "step": 3972 }, { "epoch": 0.51, "grad_norm": 0.6651670926640367, "learning_rate": 1.0286760486384254e-06, "loss": 0.5336, "step": 3973 }, { "epoch": 0.51, "grad_norm": 0.6565061791168472, "learning_rate": 1.0282635543265576e-06, "loss": 0.5363, "step": 3974 }, { "epoch": 0.51, "grad_norm": 0.7270690913400814, "learning_rate": 1.027851055201701e-06, "loss": 0.6176, "step": 3975 }, { "epoch": 0.51, "grad_norm": 0.7047257352390576, "learning_rate": 1.0274385513340997e-06, "loss": 0.5408, "step": 3976 }, { "epoch": 0.51, "grad_norm": 0.769791455769964, "learning_rate": 1.0270260427939993e-06, "loss": 0.575, "step": 3977 }, { "epoch": 0.51, "grad_norm": 0.800814138557785, "learning_rate": 1.0266135296516452e-06, "loss": 0.5262, "step": 3978 }, { "epoch": 0.51, "grad_norm": 0.9860361085103975, "learning_rate": 1.0262010119772841e-06, "loss": 0.6296, "step": 3979 }, { "epoch": 0.51, "grad_norm": 1.567476773407051, "learning_rate": 1.0257884898411637e-06, "loss": 0.607, "step": 3980 }, { "epoch": 0.51, "grad_norm": 0.874877982112583, "learning_rate": 1.025375963313532e-06, "loss": 0.6855, "step": 3981 }, { "epoch": 0.51, "grad_norm": 0.6483971447857484, "learning_rate": 1.0249634324646383e-06, "loss": 0.5226, "step": 3982 }, { "epoch": 0.51, "grad_norm": 0.798495018448395, "learning_rate": 1.0245508973647321e-06, "loss": 0.4909, "step": 3983 }, { "epoch": 0.51, "grad_norm": 0.6942577721945494, "learning_rate": 1.0241383580840639e-06, "loss": 0.5504, "step": 3984 }, { "epoch": 0.51, "grad_norm": 0.7651904961850547, "learning_rate": 1.0237258146928847e-06, "loss": 0.5483, "step": 3985 }, { "epoch": 0.51, "grad_norm": 0.6277175529287368, "learning_rate": 1.0233132672614466e-06, "loss": 0.5178, "step": 3986 }, { "epoch": 0.51, "grad_norm": 0.7883856533443289, "learning_rate": 1.0229007158600018e-06, "loss": 0.5856, "step": 3987 }, { "epoch": 0.51, "grad_norm": 0.8361812929931454, "learning_rate": 1.0224881605588035e-06, "loss": 0.5502, "step": 3988 }, { "epoch": 0.51, "grad_norm": 0.7951486772944203, "learning_rate": 1.0220756014281062e-06, "loss": 0.5573, "step": 3989 }, { "epoch": 0.51, "grad_norm": 0.7984559250100252, "learning_rate": 1.0216630385381637e-06, "loss": 0.5613, "step": 3990 }, { "epoch": 0.51, "grad_norm": 0.797050952454509, "learning_rate": 1.0212504719592314e-06, "loss": 0.6482, "step": 3991 }, { "epoch": 0.51, "grad_norm": 0.6302645477617852, "learning_rate": 1.0208379017615652e-06, "loss": 0.5242, "step": 3992 }, { "epoch": 0.51, "grad_norm": 1.2678002733441267, "learning_rate": 1.0204253280154213e-06, "loss": 0.6062, "step": 3993 }, { "epoch": 0.51, "grad_norm": 1.1408691660851742, "learning_rate": 1.0200127507910568e-06, "loss": 0.6947, "step": 3994 }, { "epoch": 0.51, "grad_norm": 0.8093658203588632, "learning_rate": 1.0196001701587292e-06, "loss": 0.5875, "step": 3995 }, { "epoch": 0.51, "grad_norm": 0.6515160361575365, "learning_rate": 1.0191875861886971e-06, "loss": 0.5473, "step": 3996 }, { "epoch": 0.51, "grad_norm": 0.5843407811172102, "learning_rate": 1.0187749989512189e-06, "loss": 0.5166, "step": 3997 }, { "epoch": 0.51, "grad_norm": 0.6397536714736679, "learning_rate": 1.018362408516554e-06, "loss": 0.5572, "step": 3998 }, { "epoch": 0.51, "grad_norm": 0.7639246900847029, "learning_rate": 1.0179498149549622e-06, "loss": 0.6231, "step": 3999 }, { "epoch": 0.51, "grad_norm": 0.8597893018924857, "learning_rate": 1.017537218336704e-06, "loss": 0.5589, "step": 4000 }, { "epoch": 0.51, "grad_norm": 0.7144516680330706, "learning_rate": 1.01712461873204e-06, "loss": 0.596, "step": 4001 }, { "epoch": 0.51, "grad_norm": 1.2383366817385282, "learning_rate": 1.016712016211232e-06, "loss": 0.6462, "step": 4002 }, { "epoch": 0.51, "grad_norm": 0.8790867234554465, "learning_rate": 1.0162994108445417e-06, "loss": 0.5953, "step": 4003 }, { "epoch": 0.51, "grad_norm": 0.9610151138234565, "learning_rate": 1.0158868027022317e-06, "loss": 0.6477, "step": 4004 }, { "epoch": 0.51, "grad_norm": 0.6300057160524882, "learning_rate": 1.0154741918545645e-06, "loss": 0.5366, "step": 4005 }, { "epoch": 0.51, "grad_norm": 0.6970676041769038, "learning_rate": 1.0150615783718038e-06, "loss": 0.5302, "step": 4006 }, { "epoch": 0.51, "grad_norm": 0.6910440085633736, "learning_rate": 1.0146489623242128e-06, "loss": 0.5324, "step": 4007 }, { "epoch": 0.51, "grad_norm": 0.828281284801449, "learning_rate": 1.0142363437820564e-06, "loss": 0.5874, "step": 4008 }, { "epoch": 0.51, "grad_norm": 0.7092287996038259, "learning_rate": 1.013823722815599e-06, "loss": 0.4951, "step": 4009 }, { "epoch": 0.51, "grad_norm": 0.7324447642374535, "learning_rate": 1.0134110994951058e-06, "loss": 0.542, "step": 4010 }, { "epoch": 0.51, "grad_norm": 0.7380816150657683, "learning_rate": 1.0129984738908416e-06, "loss": 0.4936, "step": 4011 }, { "epoch": 0.51, "grad_norm": 0.7478899266467467, "learning_rate": 1.0125858460730733e-06, "loss": 0.5219, "step": 4012 }, { "epoch": 0.51, "grad_norm": 0.7172115090257446, "learning_rate": 1.0121732161120661e-06, "loss": 0.5471, "step": 4013 }, { "epoch": 0.51, "grad_norm": 0.7414220473019699, "learning_rate": 1.011760584078087e-06, "loss": 0.5417, "step": 4014 }, { "epoch": 0.51, "grad_norm": 0.6672186992636696, "learning_rate": 1.0113479500414032e-06, "loss": 0.5911, "step": 4015 }, { "epoch": 0.51, "grad_norm": 0.7478806271094177, "learning_rate": 1.0109353140722817e-06, "loss": 0.5412, "step": 4016 }, { "epoch": 0.51, "grad_norm": 0.7148435444411001, "learning_rate": 1.01052267624099e-06, "loss": 0.5978, "step": 4017 }, { "epoch": 0.51, "grad_norm": 1.0211470877494204, "learning_rate": 1.0101100366177962e-06, "loss": 0.6284, "step": 4018 }, { "epoch": 0.51, "grad_norm": 0.8132487899592956, "learning_rate": 1.0096973952729685e-06, "loss": 0.6083, "step": 4019 }, { "epoch": 0.51, "grad_norm": 0.9444978934803208, "learning_rate": 1.0092847522767754e-06, "loss": 0.6183, "step": 4020 }, { "epoch": 0.51, "grad_norm": 0.8729443688646102, "learning_rate": 1.0088721076994857e-06, "loss": 0.6258, "step": 4021 }, { "epoch": 0.51, "grad_norm": 0.6231189410952798, "learning_rate": 1.0084594616113687e-06, "loss": 0.5141, "step": 4022 }, { "epoch": 0.51, "grad_norm": 0.8521687622595465, "learning_rate": 1.008046814082693e-06, "loss": 0.6297, "step": 4023 }, { "epoch": 0.51, "grad_norm": 0.6198862621132919, "learning_rate": 1.007634165183729e-06, "loss": 0.5403, "step": 4024 }, { "epoch": 0.51, "grad_norm": 2.239050235525572, "learning_rate": 1.0072215149847463e-06, "loss": 0.6066, "step": 4025 }, { "epoch": 0.51, "grad_norm": 0.701925588095664, "learning_rate": 1.0068088635560148e-06, "loss": 0.5522, "step": 4026 }, { "epoch": 0.51, "grad_norm": 1.171306524508904, "learning_rate": 1.0063962109678048e-06, "loss": 0.6609, "step": 4027 }, { "epoch": 0.51, "grad_norm": 0.6334884131240638, "learning_rate": 1.0059835572903866e-06, "loss": 0.5388, "step": 4028 }, { "epoch": 0.51, "grad_norm": 0.7646913063301076, "learning_rate": 1.005570902594031e-06, "loss": 0.5719, "step": 4029 }, { "epoch": 0.51, "grad_norm": 0.7313513058847428, "learning_rate": 1.0051582469490088e-06, "loss": 0.5531, "step": 4030 }, { "epoch": 0.51, "grad_norm": 0.925651997099406, "learning_rate": 1.0047455904255907e-06, "loss": 0.6538, "step": 4031 }, { "epoch": 0.51, "grad_norm": 0.934075621514861, "learning_rate": 1.0043329330940482e-06, "loss": 0.6548, "step": 4032 }, { "epoch": 0.51, "grad_norm": 0.7231849018440127, "learning_rate": 1.003920275024652e-06, "loss": 0.5393, "step": 4033 }, { "epoch": 0.51, "grad_norm": 0.8357336702110518, "learning_rate": 1.003507616287674e-06, "loss": 0.6169, "step": 4034 }, { "epoch": 0.51, "grad_norm": 0.6999159288523993, "learning_rate": 1.0030949569533857e-06, "loss": 0.5442, "step": 4035 }, { "epoch": 0.51, "grad_norm": 0.7100283487511012, "learning_rate": 1.002682297092058e-06, "loss": 0.5666, "step": 4036 }, { "epoch": 0.51, "grad_norm": 0.747897229935645, "learning_rate": 1.0022696367739633e-06, "loss": 0.5328, "step": 4037 }, { "epoch": 0.51, "grad_norm": 0.7480696118540834, "learning_rate": 1.0018569760693732e-06, "loss": 0.5495, "step": 4038 }, { "epoch": 0.51, "grad_norm": 0.8526365738518683, "learning_rate": 1.0014443150485593e-06, "loss": 0.6171, "step": 4039 }, { "epoch": 0.51, "grad_norm": 0.8455812581849723, "learning_rate": 1.0010316537817938e-06, "loss": 0.5973, "step": 4040 }, { "epoch": 0.51, "grad_norm": 1.1796952877067763, "learning_rate": 1.0006189923393482e-06, "loss": 0.5546, "step": 4041 }, { "epoch": 0.51, "grad_norm": 1.0861382639728887, "learning_rate": 1.0002063307914948e-06, "loss": 0.6262, "step": 4042 }, { "epoch": 0.52, "grad_norm": 0.8046297798717974, "learning_rate": 9.997936692085052e-07, "loss": 0.6167, "step": 4043 }, { "epoch": 0.52, "grad_norm": 0.8613331644675869, "learning_rate": 9.993810076606515e-07, "loss": 0.5786, "step": 4044 }, { "epoch": 0.52, "grad_norm": 0.7521839550768583, "learning_rate": 9.98968346218206e-07, "loss": 0.5177, "step": 4045 }, { "epoch": 0.52, "grad_norm": 1.0166766197318529, "learning_rate": 9.985556849514404e-07, "loss": 0.6709, "step": 4046 }, { "epoch": 0.52, "grad_norm": 0.7152174356688125, "learning_rate": 9.981430239306265e-07, "loss": 0.5945, "step": 4047 }, { "epoch": 0.52, "grad_norm": 0.7015662948508359, "learning_rate": 9.977303632260364e-07, "loss": 0.5405, "step": 4048 }, { "epoch": 0.52, "grad_norm": 0.7492238653394052, "learning_rate": 9.973177029079418e-07, "loss": 0.5465, "step": 4049 }, { "epoch": 0.52, "grad_norm": 0.6741582586124618, "learning_rate": 9.969050430466145e-07, "loss": 0.5227, "step": 4050 }, { "epoch": 0.52, "grad_norm": 0.7598857786946245, "learning_rate": 9.964923837123258e-07, "loss": 0.5133, "step": 4051 }, { "epoch": 0.52, "grad_norm": 0.8080740783606586, "learning_rate": 9.960797249753477e-07, "loss": 0.5836, "step": 4052 }, { "epoch": 0.52, "grad_norm": 0.723092833199605, "learning_rate": 9.956670669059517e-07, "loss": 0.5534, "step": 4053 }, { "epoch": 0.52, "grad_norm": 0.845508530046538, "learning_rate": 9.952544095744092e-07, "loss": 0.5422, "step": 4054 }, { "epoch": 0.52, "grad_norm": 1.09747854274627, "learning_rate": 9.94841753050991e-07, "loss": 0.6221, "step": 4055 }, { "epoch": 0.52, "grad_norm": 1.2684147479672245, "learning_rate": 9.944290974059689e-07, "loss": 0.6907, "step": 4056 }, { "epoch": 0.52, "grad_norm": 0.7512029971245496, "learning_rate": 9.940164427096131e-07, "loss": 0.5354, "step": 4057 }, { "epoch": 0.52, "grad_norm": 0.6525944606465818, "learning_rate": 9.93603789032195e-07, "loss": 0.5526, "step": 4058 }, { "epoch": 0.52, "grad_norm": 0.6818785319605767, "learning_rate": 9.93191136443985e-07, "loss": 0.553, "step": 4059 }, { "epoch": 0.52, "grad_norm": 0.6749978606065694, "learning_rate": 9.927784850152536e-07, "loss": 0.5619, "step": 4060 }, { "epoch": 0.52, "grad_norm": 0.88130006013368, "learning_rate": 9.923658348162708e-07, "loss": 0.5678, "step": 4061 }, { "epoch": 0.52, "grad_norm": 0.6988150931329539, "learning_rate": 9.919531859173067e-07, "loss": 0.5893, "step": 4062 }, { "epoch": 0.52, "grad_norm": 0.8273146938615004, "learning_rate": 9.915405383886315e-07, "loss": 0.6574, "step": 4063 }, { "epoch": 0.52, "grad_norm": 0.6273508340472395, "learning_rate": 9.911278923005143e-07, "loss": 0.5359, "step": 4064 }, { "epoch": 0.52, "grad_norm": 0.9238912868431238, "learning_rate": 9.907152477232247e-07, "loss": 0.6204, "step": 4065 }, { "epoch": 0.52, "grad_norm": 0.9253023417377175, "learning_rate": 9.903026047270315e-07, "loss": 0.6476, "step": 4066 }, { "epoch": 0.52, "grad_norm": 1.0025960092813542, "learning_rate": 9.898899633822037e-07, "loss": 0.6722, "step": 4067 }, { "epoch": 0.52, "grad_norm": 0.6348855971883023, "learning_rate": 9.8947732375901e-07, "loss": 0.5166, "step": 4068 }, { "epoch": 0.52, "grad_norm": 0.6545241748638911, "learning_rate": 9.890646859277182e-07, "loss": 0.5359, "step": 4069 }, { "epoch": 0.52, "grad_norm": 0.7061468852357173, "learning_rate": 9.886520499585967e-07, "loss": 0.5072, "step": 4070 }, { "epoch": 0.52, "grad_norm": 0.8161696141213848, "learning_rate": 9.882394159219128e-07, "loss": 0.6117, "step": 4071 }, { "epoch": 0.52, "grad_norm": 0.8270990711291616, "learning_rate": 9.87826783887934e-07, "loss": 0.6336, "step": 4072 }, { "epoch": 0.52, "grad_norm": 0.8166649378440293, "learning_rate": 9.87414153926927e-07, "loss": 0.615, "step": 4073 }, { "epoch": 0.52, "grad_norm": 0.7385222545312957, "learning_rate": 9.870015261091583e-07, "loss": 0.5288, "step": 4074 }, { "epoch": 0.52, "grad_norm": 0.7777795219296944, "learning_rate": 9.865889005048945e-07, "loss": 0.5727, "step": 4075 }, { "epoch": 0.52, "grad_norm": 0.6674958023494952, "learning_rate": 9.86176277184401e-07, "loss": 0.5715, "step": 4076 }, { "epoch": 0.52, "grad_norm": 1.3205863441284955, "learning_rate": 9.857636562179437e-07, "loss": 0.6632, "step": 4077 }, { "epoch": 0.52, "grad_norm": 0.9283006507110512, "learning_rate": 9.853510376757873e-07, "loss": 0.6157, "step": 4078 }, { "epoch": 0.52, "grad_norm": 0.72809740878621, "learning_rate": 9.849384216281965e-07, "loss": 0.6113, "step": 4079 }, { "epoch": 0.52, "grad_norm": 1.3967767993163964, "learning_rate": 9.845258081454354e-07, "loss": 0.6203, "step": 4080 }, { "epoch": 0.52, "grad_norm": 0.6185487967576118, "learning_rate": 9.841131972977684e-07, "loss": 0.5354, "step": 4081 }, { "epoch": 0.52, "grad_norm": 0.8401851779178235, "learning_rate": 9.837005891554582e-07, "loss": 0.5342, "step": 4082 }, { "epoch": 0.52, "grad_norm": 0.8334001564380961, "learning_rate": 9.83287983788768e-07, "loss": 0.624, "step": 4083 }, { "epoch": 0.52, "grad_norm": 0.7278718850449575, "learning_rate": 9.8287538126796e-07, "loss": 0.5242, "step": 4084 }, { "epoch": 0.52, "grad_norm": 1.0845750589989187, "learning_rate": 9.824627816632961e-07, "loss": 0.5557, "step": 4085 }, { "epoch": 0.52, "grad_norm": 0.6759707660138367, "learning_rate": 9.820501850450377e-07, "loss": 0.5345, "step": 4086 }, { "epoch": 0.52, "grad_norm": 0.7191545147028133, "learning_rate": 9.816375914834461e-07, "loss": 0.5386, "step": 4087 }, { "epoch": 0.52, "grad_norm": 1.0861492873234466, "learning_rate": 9.812250010487812e-07, "loss": 0.6304, "step": 4088 }, { "epoch": 0.52, "grad_norm": 2.022119259564415, "learning_rate": 9.808124138113028e-07, "loss": 0.6755, "step": 4089 }, { "epoch": 0.52, "grad_norm": 0.8835588333804869, "learning_rate": 9.803998298412709e-07, "loss": 0.595, "step": 4090 }, { "epoch": 0.52, "grad_norm": 0.7014126960393718, "learning_rate": 9.799872492089433e-07, "loss": 0.5778, "step": 4091 }, { "epoch": 0.52, "grad_norm": 0.6175034394161963, "learning_rate": 9.795746719845788e-07, "loss": 0.4809, "step": 4092 }, { "epoch": 0.52, "grad_norm": 0.9347880628166808, "learning_rate": 9.79162098238435e-07, "loss": 0.6007, "step": 4093 }, { "epoch": 0.52, "grad_norm": 0.8503609216707406, "learning_rate": 9.787495280407687e-07, "loss": 0.5999, "step": 4094 }, { "epoch": 0.52, "grad_norm": 0.6503850637885226, "learning_rate": 9.783369614618364e-07, "loss": 0.5497, "step": 4095 }, { "epoch": 0.52, "grad_norm": 0.8699228803576291, "learning_rate": 9.77924398571894e-07, "loss": 0.6623, "step": 4096 }, { "epoch": 0.52, "grad_norm": 0.7009152718132402, "learning_rate": 9.775118394411964e-07, "loss": 0.5476, "step": 4097 }, { "epoch": 0.52, "grad_norm": 0.692576490631869, "learning_rate": 9.770992841399983e-07, "loss": 0.5777, "step": 4098 }, { "epoch": 0.52, "grad_norm": 0.7443609267917054, "learning_rate": 9.766867327385535e-07, "loss": 0.5534, "step": 4099 }, { "epoch": 0.52, "grad_norm": 0.7562628993151985, "learning_rate": 9.762741853071152e-07, "loss": 0.5819, "step": 4100 }, { "epoch": 0.52, "grad_norm": 0.862377459986156, "learning_rate": 9.75861641915936e-07, "loss": 0.6008, "step": 4101 }, { "epoch": 0.52, "grad_norm": 0.8167814227376619, "learning_rate": 9.75449102635268e-07, "loss": 0.5602, "step": 4102 }, { "epoch": 0.52, "grad_norm": 0.8120384608859262, "learning_rate": 9.750365675353618e-07, "loss": 0.587, "step": 4103 }, { "epoch": 0.52, "grad_norm": 0.8127878281621244, "learning_rate": 9.74624036686468e-07, "loss": 0.5405, "step": 4104 }, { "epoch": 0.52, "grad_norm": 0.9096109424939246, "learning_rate": 9.742115101588362e-07, "loss": 0.5947, "step": 4105 }, { "epoch": 0.52, "grad_norm": 0.6686785476644775, "learning_rate": 9.73798988022716e-07, "loss": 0.5708, "step": 4106 }, { "epoch": 0.52, "grad_norm": 0.8810009263291421, "learning_rate": 9.73386470348355e-07, "loss": 0.5863, "step": 4107 }, { "epoch": 0.52, "grad_norm": 0.7336351978172198, "learning_rate": 9.729739572060009e-07, "loss": 0.5689, "step": 4108 }, { "epoch": 0.52, "grad_norm": 0.639124592665011, "learning_rate": 9.725614486659004e-07, "loss": 0.4765, "step": 4109 }, { "epoch": 0.52, "grad_norm": 0.7446335712629271, "learning_rate": 9.721489447982992e-07, "loss": 0.48, "step": 4110 }, { "epoch": 0.52, "grad_norm": 1.1309245837710413, "learning_rate": 9.717364456734426e-07, "loss": 0.6334, "step": 4111 }, { "epoch": 0.52, "grad_norm": 0.894309706664065, "learning_rate": 9.713239513615747e-07, "loss": 0.5599, "step": 4112 }, { "epoch": 0.52, "grad_norm": 0.8825534767707301, "learning_rate": 9.709114619329393e-07, "loss": 0.6155, "step": 4113 }, { "epoch": 0.52, "grad_norm": 0.5794773782431641, "learning_rate": 9.704989774577788e-07, "loss": 0.5106, "step": 4114 }, { "epoch": 0.52, "grad_norm": 1.0458752187601215, "learning_rate": 9.700864980063351e-07, "loss": 0.6638, "step": 4115 }, { "epoch": 0.52, "grad_norm": 0.8495442961341314, "learning_rate": 9.696740236488494e-07, "loss": 0.6258, "step": 4116 }, { "epoch": 0.52, "grad_norm": 0.7332250736427953, "learning_rate": 9.692615544555613e-07, "loss": 0.531, "step": 4117 }, { "epoch": 0.52, "grad_norm": 0.8433618558250839, "learning_rate": 9.688490904967102e-07, "loss": 0.6506, "step": 4118 }, { "epoch": 0.52, "grad_norm": 0.6553116035863851, "learning_rate": 9.684366318425345e-07, "loss": 0.5569, "step": 4119 }, { "epoch": 0.52, "grad_norm": 0.7376833076759458, "learning_rate": 9.680241785632717e-07, "loss": 0.5834, "step": 4120 }, { "epoch": 0.53, "grad_norm": 0.893288830920056, "learning_rate": 9.67611730729158e-07, "loss": 0.5953, "step": 4121 }, { "epoch": 0.53, "grad_norm": 0.6819016325338135, "learning_rate": 9.671992884104294e-07, "loss": 0.5777, "step": 4122 }, { "epoch": 0.53, "grad_norm": 0.8342987725345183, "learning_rate": 9.6678685167732e-07, "loss": 0.6459, "step": 4123 }, { "epoch": 0.53, "grad_norm": 0.9615942048320508, "learning_rate": 9.66374420600064e-07, "loss": 0.6392, "step": 4124 }, { "epoch": 0.53, "grad_norm": 0.6929793217487995, "learning_rate": 9.659619952488935e-07, "loss": 0.5604, "step": 4125 }, { "epoch": 0.53, "grad_norm": 0.6482765316986931, "learning_rate": 9.655495756940409e-07, "loss": 0.5591, "step": 4126 }, { "epoch": 0.53, "grad_norm": 0.9203992084135164, "learning_rate": 9.651371620057365e-07, "loss": 0.6121, "step": 4127 }, { "epoch": 0.53, "grad_norm": 0.8632544329518762, "learning_rate": 9.647247542542103e-07, "loss": 0.6373, "step": 4128 }, { "epoch": 0.53, "grad_norm": 0.8475211859031183, "learning_rate": 9.643123525096912e-07, "loss": 0.6143, "step": 4129 }, { "epoch": 0.53, "grad_norm": 0.6294951497084769, "learning_rate": 9.638999568424063e-07, "loss": 0.5649, "step": 4130 }, { "epoch": 0.53, "grad_norm": 0.5986176438968739, "learning_rate": 9.634875673225826e-07, "loss": 0.5217, "step": 4131 }, { "epoch": 0.53, "grad_norm": 0.7860857569223271, "learning_rate": 9.63075184020446e-07, "loss": 0.6208, "step": 4132 }, { "epoch": 0.53, "grad_norm": 0.9030222268724944, "learning_rate": 9.626628070062208e-07, "loss": 0.6027, "step": 4133 }, { "epoch": 0.53, "grad_norm": 0.8001817069233831, "learning_rate": 9.622504363501307e-07, "loss": 0.523, "step": 4134 }, { "epoch": 0.53, "grad_norm": 0.7330516788618869, "learning_rate": 9.61838072122398e-07, "loss": 0.5218, "step": 4135 }, { "epoch": 0.53, "grad_norm": 0.8144771634172261, "learning_rate": 9.61425714393244e-07, "loss": 0.6025, "step": 4136 }, { "epoch": 0.53, "grad_norm": 0.9175324610900722, "learning_rate": 9.61013363232889e-07, "loss": 0.6606, "step": 4137 }, { "epoch": 0.53, "grad_norm": 0.6340844375508794, "learning_rate": 9.606010187115522e-07, "loss": 0.5168, "step": 4138 }, { "epoch": 0.53, "grad_norm": 0.7692249481244269, "learning_rate": 9.601886808994513e-07, "loss": 0.5542, "step": 4139 }, { "epoch": 0.53, "grad_norm": 0.8024708904434787, "learning_rate": 9.597763498668033e-07, "loss": 0.5289, "step": 4140 }, { "epoch": 0.53, "grad_norm": 0.7186982380169172, "learning_rate": 9.593640256838238e-07, "loss": 0.56, "step": 4141 }, { "epoch": 0.53, "grad_norm": 0.6687348417920621, "learning_rate": 9.589517084207276e-07, "loss": 0.5466, "step": 4142 }, { "epoch": 0.53, "grad_norm": 0.9579639208483804, "learning_rate": 9.585393981477278e-07, "loss": 0.6062, "step": 4143 }, { "epoch": 0.53, "grad_norm": 1.0112724464652076, "learning_rate": 9.581270949350365e-07, "loss": 0.6732, "step": 4144 }, { "epoch": 0.53, "grad_norm": 0.7240882816628017, "learning_rate": 9.577147988528642e-07, "loss": 0.5245, "step": 4145 }, { "epoch": 0.53, "grad_norm": 0.7697764037823359, "learning_rate": 9.573025099714217e-07, "loss": 0.5474, "step": 4146 }, { "epoch": 0.53, "grad_norm": 0.7279697277335438, "learning_rate": 9.568902283609167e-07, "loss": 0.5966, "step": 4147 }, { "epoch": 0.53, "grad_norm": 0.5965898761272731, "learning_rate": 9.564779540915568e-07, "loss": 0.515, "step": 4148 }, { "epoch": 0.53, "grad_norm": 1.0917314470292665, "learning_rate": 9.56065687233548e-07, "loss": 0.6098, "step": 4149 }, { "epoch": 0.53, "grad_norm": 0.8472349514126258, "learning_rate": 9.556534278570946e-07, "loss": 0.6872, "step": 4150 }, { "epoch": 0.53, "grad_norm": 0.8689391579881356, "learning_rate": 9.552411760324008e-07, "loss": 0.6189, "step": 4151 }, { "epoch": 0.53, "grad_norm": 0.7104869424942737, "learning_rate": 9.548289318296683e-07, "loss": 0.568, "step": 4152 }, { "epoch": 0.53, "grad_norm": 0.8172918081341343, "learning_rate": 9.544166953190983e-07, "loss": 0.5752, "step": 4153 }, { "epoch": 0.53, "grad_norm": 0.8921670432884192, "learning_rate": 9.5400446657089e-07, "loss": 0.6329, "step": 4154 }, { "epoch": 0.53, "grad_norm": 0.6293551508688074, "learning_rate": 9.535922456552421e-07, "loss": 0.4918, "step": 4155 }, { "epoch": 0.53, "grad_norm": 0.8739927866237936, "learning_rate": 9.531800326423512e-07, "loss": 0.6089, "step": 4156 }, { "epoch": 0.53, "grad_norm": 0.8425753363889352, "learning_rate": 9.527678276024128e-07, "loss": 0.555, "step": 4157 }, { "epoch": 0.53, "grad_norm": 0.8136250011780449, "learning_rate": 9.523556306056215e-07, "loss": 0.6575, "step": 4158 }, { "epoch": 0.53, "grad_norm": 0.7414935937144411, "learning_rate": 9.519434417221699e-07, "loss": 0.5045, "step": 4159 }, { "epoch": 0.53, "grad_norm": 0.8652014013202736, "learning_rate": 9.515312610222495e-07, "loss": 0.6066, "step": 4160 }, { "epoch": 0.53, "grad_norm": 0.920411067000238, "learning_rate": 9.511190885760504e-07, "loss": 0.6239, "step": 4161 }, { "epoch": 0.53, "grad_norm": 0.6747263830882381, "learning_rate": 9.507069244537612e-07, "loss": 0.5323, "step": 4162 }, { "epoch": 0.53, "grad_norm": 0.623834230786709, "learning_rate": 9.502947687255692e-07, "loss": 0.4948, "step": 4163 }, { "epoch": 0.53, "grad_norm": 0.807972946340756, "learning_rate": 9.498826214616603e-07, "loss": 0.6457, "step": 4164 }, { "epoch": 0.53, "grad_norm": 0.9051060059517153, "learning_rate": 9.49470482732219e-07, "loss": 0.6695, "step": 4165 }, { "epoch": 0.53, "grad_norm": 0.5634847321969713, "learning_rate": 9.490583526074279e-07, "loss": 0.5021, "step": 4166 }, { "epoch": 0.53, "grad_norm": 1.87319012061689, "learning_rate": 9.486462311574688e-07, "loss": 0.6282, "step": 4167 }, { "epoch": 0.53, "grad_norm": 0.751955318502002, "learning_rate": 9.482341184525214e-07, "loss": 0.6161, "step": 4168 }, { "epoch": 0.53, "grad_norm": 0.8951591878594662, "learning_rate": 9.478220145627644e-07, "loss": 0.652, "step": 4169 }, { "epoch": 0.53, "grad_norm": 0.8045176229465819, "learning_rate": 9.474099195583749e-07, "loss": 0.6194, "step": 4170 }, { "epoch": 0.53, "grad_norm": 0.8309263624041435, "learning_rate": 9.46997833509528e-07, "loss": 0.6486, "step": 4171 }, { "epoch": 0.53, "grad_norm": 0.7166931918852805, "learning_rate": 9.465857564863979e-07, "loss": 0.5201, "step": 4172 }, { "epoch": 0.53, "grad_norm": 0.8190482028306955, "learning_rate": 9.461736885591571e-07, "loss": 0.672, "step": 4173 }, { "epoch": 0.53, "grad_norm": 0.783166751335455, "learning_rate": 9.457616297979763e-07, "loss": 0.5927, "step": 4174 }, { "epoch": 0.53, "grad_norm": 0.7054808774759557, "learning_rate": 9.45349580273025e-07, "loss": 0.5199, "step": 4175 }, { "epoch": 0.53, "grad_norm": 0.9386648757027651, "learning_rate": 9.449375400544707e-07, "loss": 0.6147, "step": 4176 }, { "epoch": 0.53, "grad_norm": 0.8774987289703396, "learning_rate": 9.445255092124796e-07, "loss": 0.6254, "step": 4177 }, { "epoch": 0.53, "grad_norm": 0.7102529867092512, "learning_rate": 9.441134878172165e-07, "loss": 0.5191, "step": 4178 }, { "epoch": 0.53, "grad_norm": 0.7203824021184562, "learning_rate": 9.437014759388442e-07, "loss": 0.5382, "step": 4179 }, { "epoch": 0.53, "grad_norm": 0.635036269412544, "learning_rate": 9.43289473647524e-07, "loss": 0.5193, "step": 4180 }, { "epoch": 0.53, "grad_norm": 0.6875585516314872, "learning_rate": 9.428774810134158e-07, "loss": 0.5239, "step": 4181 }, { "epoch": 0.53, "grad_norm": 0.9248002396459893, "learning_rate": 9.424654981066773e-07, "loss": 0.5802, "step": 4182 }, { "epoch": 0.53, "grad_norm": 0.6934972415896687, "learning_rate": 9.420535249974652e-07, "loss": 0.585, "step": 4183 }, { "epoch": 0.53, "grad_norm": 1.612241522579423, "learning_rate": 9.41641561755934e-07, "loss": 0.6191, "step": 4184 }, { "epoch": 0.53, "grad_norm": 0.7983685601355182, "learning_rate": 9.41229608452237e-07, "loss": 0.5765, "step": 4185 }, { "epoch": 0.53, "grad_norm": 0.6549671047661653, "learning_rate": 9.408176651565253e-07, "loss": 0.5718, "step": 4186 }, { "epoch": 0.53, "grad_norm": 0.8806633235662676, "learning_rate": 9.404057319389487e-07, "loss": 0.6016, "step": 4187 }, { "epoch": 0.53, "grad_norm": 0.6274154977745044, "learning_rate": 9.399938088696551e-07, "loss": 0.5235, "step": 4188 }, { "epoch": 0.53, "grad_norm": 0.7170616100805673, "learning_rate": 9.395818960187906e-07, "loss": 0.5218, "step": 4189 }, { "epoch": 0.53, "grad_norm": 0.6319217083116779, "learning_rate": 9.391699934564998e-07, "loss": 0.5434, "step": 4190 }, { "epoch": 0.53, "grad_norm": 1.060098937663879, "learning_rate": 9.387581012529254e-07, "loss": 0.6878, "step": 4191 }, { "epoch": 0.53, "grad_norm": 0.9027671311727884, "learning_rate": 9.383462194782085e-07, "loss": 0.6688, "step": 4192 }, { "epoch": 0.53, "grad_norm": 0.7463820968525148, "learning_rate": 9.379343482024879e-07, "loss": 0.6053, "step": 4193 }, { "epoch": 0.53, "grad_norm": 0.755942997218373, "learning_rate": 9.375224874959014e-07, "loss": 0.5625, "step": 4194 }, { "epoch": 0.53, "grad_norm": 0.9487369744149864, "learning_rate": 9.371106374285844e-07, "loss": 0.6172, "step": 4195 }, { "epoch": 0.53, "grad_norm": 0.9615080882888112, "learning_rate": 9.366987980706707e-07, "loss": 0.6151, "step": 4196 }, { "epoch": 0.53, "grad_norm": 0.8182133658377385, "learning_rate": 9.36286969492292e-07, "loss": 0.6226, "step": 4197 }, { "epoch": 0.53, "grad_norm": 0.6246577628125036, "learning_rate": 9.358751517635788e-07, "loss": 0.5315, "step": 4198 }, { "epoch": 0.53, "grad_norm": 0.7752934716701233, "learning_rate": 9.354633449546592e-07, "loss": 0.5403, "step": 4199 }, { "epoch": 0.54, "grad_norm": 0.8099846986146835, "learning_rate": 9.350515491356596e-07, "loss": 0.5239, "step": 4200 }, { "epoch": 0.54, "grad_norm": 0.7690886537026896, "learning_rate": 9.346397643767045e-07, "loss": 0.5865, "step": 4201 }, { "epoch": 0.54, "grad_norm": 0.6886239649275603, "learning_rate": 9.342279907479167e-07, "loss": 0.5827, "step": 4202 }, { "epoch": 0.54, "grad_norm": 0.615303133766021, "learning_rate": 9.338162283194165e-07, "loss": 0.5387, "step": 4203 }, { "epoch": 0.54, "grad_norm": 0.6035741787730857, "learning_rate": 9.334044771613235e-07, "loss": 0.5374, "step": 4204 }, { "epoch": 0.54, "grad_norm": 0.9010916903133757, "learning_rate": 9.329927373437542e-07, "loss": 0.5897, "step": 4205 }, { "epoch": 0.54, "grad_norm": 0.7559971978967951, "learning_rate": 9.325810089368236e-07, "loss": 0.5562, "step": 4206 }, { "epoch": 0.54, "grad_norm": 0.800751156836085, "learning_rate": 9.321692920106449e-07, "loss": 0.5412, "step": 4207 }, { "epoch": 0.54, "grad_norm": 0.7687374585983546, "learning_rate": 9.317575866353291e-07, "loss": 0.5667, "step": 4208 }, { "epoch": 0.54, "grad_norm": 0.8648650869801775, "learning_rate": 9.313458928809853e-07, "loss": 0.5964, "step": 4209 }, { "epoch": 0.54, "grad_norm": 0.6996239344262282, "learning_rate": 9.309342108177207e-07, "loss": 0.5678, "step": 4210 }, { "epoch": 0.54, "grad_norm": 0.5802374983355189, "learning_rate": 9.305225405156405e-07, "loss": 0.5184, "step": 4211 }, { "epoch": 0.54, "grad_norm": 1.0814671941293699, "learning_rate": 9.301108820448479e-07, "loss": 0.6023, "step": 4212 }, { "epoch": 0.54, "grad_norm": 0.6964403863164248, "learning_rate": 9.29699235475444e-07, "loss": 0.6232, "step": 4213 }, { "epoch": 0.54, "grad_norm": 0.6588083433173392, "learning_rate": 9.292876008775278e-07, "loss": 0.4995, "step": 4214 }, { "epoch": 0.54, "grad_norm": 0.6503971182274163, "learning_rate": 9.288759783211966e-07, "loss": 0.5332, "step": 4215 }, { "epoch": 0.54, "grad_norm": 0.7419889624899512, "learning_rate": 9.284643678765451e-07, "loss": 0.5873, "step": 4216 }, { "epoch": 0.54, "grad_norm": 0.6810367432761317, "learning_rate": 9.280527696136665e-07, "loss": 0.5054, "step": 4217 }, { "epoch": 0.54, "grad_norm": 0.8504324398746961, "learning_rate": 9.276411836026519e-07, "loss": 0.5888, "step": 4218 }, { "epoch": 0.54, "grad_norm": 0.6624640151641176, "learning_rate": 9.272296099135899e-07, "loss": 0.5312, "step": 4219 }, { "epoch": 0.54, "grad_norm": 0.7081170116488755, "learning_rate": 9.268180486165672e-07, "loss": 0.5521, "step": 4220 }, { "epoch": 0.54, "grad_norm": 0.7885729177519016, "learning_rate": 9.264064997816684e-07, "loss": 0.5333, "step": 4221 }, { "epoch": 0.54, "grad_norm": 0.8245222865873894, "learning_rate": 9.25994963478976e-07, "loss": 0.6442, "step": 4222 }, { "epoch": 0.54, "grad_norm": 0.6687644250895864, "learning_rate": 9.255834397785703e-07, "loss": 0.5331, "step": 4223 }, { "epoch": 0.54, "grad_norm": 0.7559072731626507, "learning_rate": 9.251719287505295e-07, "loss": 0.5778, "step": 4224 }, { "epoch": 0.54, "grad_norm": 0.6304886966194684, "learning_rate": 9.247604304649298e-07, "loss": 0.5677, "step": 4225 }, { "epoch": 0.54, "grad_norm": 0.8002334460815927, "learning_rate": 9.24348944991845e-07, "loss": 0.561, "step": 4226 }, { "epoch": 0.54, "grad_norm": 0.9670193810656911, "learning_rate": 9.239374724013466e-07, "loss": 0.6129, "step": 4227 }, { "epoch": 0.54, "grad_norm": 0.7361565055771447, "learning_rate": 9.235260127635043e-07, "loss": 0.4944, "step": 4228 }, { "epoch": 0.54, "grad_norm": 0.7951832168100417, "learning_rate": 9.23114566148385e-07, "loss": 0.5278, "step": 4229 }, { "epoch": 0.54, "grad_norm": 0.8816036566802652, "learning_rate": 9.227031326260544e-07, "loss": 0.5968, "step": 4230 }, { "epoch": 0.54, "grad_norm": 0.701058006739136, "learning_rate": 9.222917122665751e-07, "loss": 0.5358, "step": 4231 }, { "epoch": 0.54, "grad_norm": 0.8434419968040899, "learning_rate": 9.218803051400076e-07, "loss": 0.571, "step": 4232 }, { "epoch": 0.54, "grad_norm": 0.9220581378185699, "learning_rate": 9.214689113164103e-07, "loss": 0.6004, "step": 4233 }, { "epoch": 0.54, "grad_norm": 0.7369616029126502, "learning_rate": 9.210575308658393e-07, "loss": 0.523, "step": 4234 }, { "epoch": 0.54, "grad_norm": 0.8344457780753681, "learning_rate": 9.206461638583482e-07, "loss": 0.575, "step": 4235 }, { "epoch": 0.54, "grad_norm": 0.7136621231049342, "learning_rate": 9.202348103639889e-07, "loss": 0.5416, "step": 4236 }, { "epoch": 0.54, "grad_norm": 0.7389894331702248, "learning_rate": 9.198234704528102e-07, "loss": 0.511, "step": 4237 }, { "epoch": 0.54, "grad_norm": 0.7914182617678724, "learning_rate": 9.194121441948594e-07, "loss": 0.5527, "step": 4238 }, { "epoch": 0.54, "grad_norm": 0.6998737358799848, "learning_rate": 9.190008316601808e-07, "loss": 0.548, "step": 4239 }, { "epoch": 0.54, "grad_norm": 1.1144381728602772, "learning_rate": 9.185895329188168e-07, "loss": 0.673, "step": 4240 }, { "epoch": 0.54, "grad_norm": 1.3112793676318024, "learning_rate": 9.181782480408071e-07, "loss": 0.6057, "step": 4241 }, { "epoch": 0.54, "grad_norm": 1.1824852085945596, "learning_rate": 9.177669770961893e-07, "loss": 0.6408, "step": 4242 }, { "epoch": 0.54, "grad_norm": 0.8588036145415717, "learning_rate": 9.173557201549987e-07, "loss": 0.5546, "step": 4243 }, { "epoch": 0.54, "grad_norm": 0.6916381799373548, "learning_rate": 9.16944477287268e-07, "loss": 0.5518, "step": 4244 }, { "epoch": 0.54, "grad_norm": 1.019248619479582, "learning_rate": 9.165332485630275e-07, "loss": 0.6275, "step": 4245 }, { "epoch": 0.54, "grad_norm": 0.8266025853741792, "learning_rate": 9.16122034052305e-07, "loss": 0.5984, "step": 4246 }, { "epoch": 0.54, "grad_norm": 0.790795855755217, "learning_rate": 9.157108338251265e-07, "loss": 0.6308, "step": 4247 }, { "epoch": 0.54, "grad_norm": 0.8063475011404077, "learning_rate": 9.152996479515147e-07, "loss": 0.6056, "step": 4248 }, { "epoch": 0.54, "grad_norm": 0.7094044881662649, "learning_rate": 9.148884765014907e-07, "loss": 0.5366, "step": 4249 }, { "epoch": 0.54, "grad_norm": 0.8734309355566028, "learning_rate": 9.144773195450725e-07, "loss": 0.6106, "step": 4250 }, { "epoch": 0.54, "grad_norm": 0.7812895535800443, "learning_rate": 9.140661771522758e-07, "loss": 0.5527, "step": 4251 }, { "epoch": 0.54, "grad_norm": 0.9131467945721664, "learning_rate": 9.136550493931136e-07, "loss": 0.6349, "step": 4252 }, { "epoch": 0.54, "grad_norm": 0.64972067695835, "learning_rate": 9.132439363375972e-07, "loss": 0.5359, "step": 4253 }, { "epoch": 0.54, "grad_norm": 0.6024688009271509, "learning_rate": 9.128328380557346e-07, "loss": 0.5239, "step": 4254 }, { "epoch": 0.54, "grad_norm": 0.6943213785525026, "learning_rate": 9.124217546175317e-07, "loss": 0.5244, "step": 4255 }, { "epoch": 0.54, "grad_norm": 0.6711773526033229, "learning_rate": 9.120106860929914e-07, "loss": 0.5069, "step": 4256 }, { "epoch": 0.54, "grad_norm": 0.7370365690701016, "learning_rate": 9.115996325521147e-07, "loss": 0.5793, "step": 4257 }, { "epoch": 0.54, "grad_norm": 0.9607557265109317, "learning_rate": 9.111885940648995e-07, "loss": 0.5774, "step": 4258 }, { "epoch": 0.54, "grad_norm": 0.729675861972166, "learning_rate": 9.107775707013417e-07, "loss": 0.5849, "step": 4259 }, { "epoch": 0.54, "grad_norm": 0.6159676931679806, "learning_rate": 9.103665625314339e-07, "loss": 0.5242, "step": 4260 }, { "epoch": 0.54, "grad_norm": 0.6311393671390613, "learning_rate": 9.099555696251666e-07, "loss": 0.5439, "step": 4261 }, { "epoch": 0.54, "grad_norm": 0.973075032003322, "learning_rate": 9.09544592052528e-07, "loss": 0.6583, "step": 4262 }, { "epoch": 0.54, "grad_norm": 0.664693632049828, "learning_rate": 9.091336298835029e-07, "loss": 0.5571, "step": 4263 }, { "epoch": 0.54, "grad_norm": 0.6225378340117613, "learning_rate": 9.087226831880741e-07, "loss": 0.5328, "step": 4264 }, { "epoch": 0.54, "grad_norm": 0.8626233023735204, "learning_rate": 9.083117520362214e-07, "loss": 0.6063, "step": 4265 }, { "epoch": 0.54, "grad_norm": 0.7037474555819017, "learning_rate": 9.07900836497922e-07, "loss": 0.6001, "step": 4266 }, { "epoch": 0.54, "grad_norm": 0.7876191454985318, "learning_rate": 9.074899366431508e-07, "loss": 0.6011, "step": 4267 }, { "epoch": 0.54, "grad_norm": 0.8925878416249354, "learning_rate": 9.070790525418796e-07, "loss": 0.6071, "step": 4268 }, { "epoch": 0.54, "grad_norm": 0.8826752981924412, "learning_rate": 9.066681842640776e-07, "loss": 0.6402, "step": 4269 }, { "epoch": 0.54, "grad_norm": 0.8027240549731722, "learning_rate": 9.062573318797115e-07, "loss": 0.6166, "step": 4270 }, { "epoch": 0.54, "grad_norm": 0.6629788585223861, "learning_rate": 9.058464954587454e-07, "loss": 0.5439, "step": 4271 }, { "epoch": 0.54, "grad_norm": 0.7073235836323415, "learning_rate": 9.0543567507114e-07, "loss": 0.5219, "step": 4272 }, { "epoch": 0.54, "grad_norm": 0.8553963697294419, "learning_rate": 9.05024870786854e-07, "loss": 0.5751, "step": 4273 }, { "epoch": 0.54, "grad_norm": 0.9603666097145462, "learning_rate": 9.046140826758431e-07, "loss": 0.647, "step": 4274 }, { "epoch": 0.54, "grad_norm": 0.8930712127459738, "learning_rate": 9.042033108080601e-07, "loss": 0.6731, "step": 4275 }, { "epoch": 0.54, "grad_norm": 0.9915107142478033, "learning_rate": 9.037925552534555e-07, "loss": 0.6102, "step": 4276 }, { "epoch": 0.54, "grad_norm": 0.848178591658344, "learning_rate": 9.033818160819763e-07, "loss": 0.6469, "step": 4277 }, { "epoch": 0.55, "grad_norm": 0.6897620600640135, "learning_rate": 9.029710933635672e-07, "loss": 0.5506, "step": 4278 }, { "epoch": 0.55, "grad_norm": 0.7163357573932562, "learning_rate": 9.025603871681703e-07, "loss": 0.5245, "step": 4279 }, { "epoch": 0.55, "grad_norm": 0.7488244028779203, "learning_rate": 9.021496975657242e-07, "loss": 0.5558, "step": 4280 }, { "epoch": 0.55, "grad_norm": 0.6843840578917433, "learning_rate": 9.017390246261653e-07, "loss": 0.5473, "step": 4281 }, { "epoch": 0.55, "grad_norm": 0.6955301382588726, "learning_rate": 9.013283684194266e-07, "loss": 0.5324, "step": 4282 }, { "epoch": 0.55, "grad_norm": 1.0469520452571741, "learning_rate": 9.00917729015439e-07, "loss": 0.6166, "step": 4283 }, { "epoch": 0.55, "grad_norm": 0.8937561573387722, "learning_rate": 9.005071064841299e-07, "loss": 0.6304, "step": 4284 }, { "epoch": 0.55, "grad_norm": 0.9519448065067486, "learning_rate": 9.00096500895424e-07, "loss": 0.6801, "step": 4285 }, { "epoch": 0.55, "grad_norm": 0.6698841963338981, "learning_rate": 8.996859123192433e-07, "loss": 0.5287, "step": 4286 }, { "epoch": 0.55, "grad_norm": 0.8671753597844356, "learning_rate": 8.992753408255066e-07, "loss": 0.601, "step": 4287 }, { "epoch": 0.55, "grad_norm": 0.854077686564304, "learning_rate": 8.988647864841298e-07, "loss": 0.6285, "step": 4288 }, { "epoch": 0.55, "grad_norm": 0.9107788386640909, "learning_rate": 8.984542493650264e-07, "loss": 0.6099, "step": 4289 }, { "epoch": 0.55, "grad_norm": 0.9853097283346544, "learning_rate": 8.980437295381066e-07, "loss": 0.6807, "step": 4290 }, { "epoch": 0.55, "grad_norm": 0.9091257977303385, "learning_rate": 8.976332270732774e-07, "loss": 0.6033, "step": 4291 }, { "epoch": 0.55, "grad_norm": 0.7512386935749161, "learning_rate": 8.972227420404432e-07, "loss": 0.4975, "step": 4292 }, { "epoch": 0.55, "grad_norm": 0.929334066789151, "learning_rate": 8.968122745095052e-07, "loss": 0.655, "step": 4293 }, { "epoch": 0.55, "grad_norm": 0.8926586015303035, "learning_rate": 8.964018245503619e-07, "loss": 0.6033, "step": 4294 }, { "epoch": 0.55, "grad_norm": 1.9385424787861008, "learning_rate": 8.959913922329086e-07, "loss": 0.6549, "step": 4295 }, { "epoch": 0.55, "grad_norm": 0.6254800364839701, "learning_rate": 8.955809776270378e-07, "loss": 0.5057, "step": 4296 }, { "epoch": 0.55, "grad_norm": 0.7901618241725072, "learning_rate": 8.951705808026385e-07, "loss": 0.6265, "step": 4297 }, { "epoch": 0.55, "grad_norm": 0.674177411548708, "learning_rate": 8.947602018295972e-07, "loss": 0.5023, "step": 4298 }, { "epoch": 0.55, "grad_norm": 0.8154719348406759, "learning_rate": 8.943498407777973e-07, "loss": 0.5759, "step": 4299 }, { "epoch": 0.55, "grad_norm": 0.8501513334850993, "learning_rate": 8.939394977171188e-07, "loss": 0.6582, "step": 4300 }, { "epoch": 0.55, "grad_norm": 0.6169060023111056, "learning_rate": 8.935291727174385e-07, "loss": 0.4984, "step": 4301 }, { "epoch": 0.55, "grad_norm": 0.9119839941455917, "learning_rate": 8.931188658486313e-07, "loss": 0.6606, "step": 4302 }, { "epoch": 0.55, "grad_norm": 1.172266408554823, "learning_rate": 8.927085771805677e-07, "loss": 0.6224, "step": 4303 }, { "epoch": 0.55, "grad_norm": 0.7717861737413592, "learning_rate": 8.922983067831156e-07, "loss": 0.5011, "step": 4304 }, { "epoch": 0.55, "grad_norm": 0.7034668522712864, "learning_rate": 8.918880547261399e-07, "loss": 0.4934, "step": 4305 }, { "epoch": 0.55, "grad_norm": 0.6901386263488744, "learning_rate": 8.914778210795022e-07, "loss": 0.5433, "step": 4306 }, { "epoch": 0.55, "grad_norm": 1.115795217953946, "learning_rate": 8.91067605913061e-07, "loss": 0.657, "step": 4307 }, { "epoch": 0.55, "grad_norm": 0.87187866267265, "learning_rate": 8.906574092966716e-07, "loss": 0.6415, "step": 4308 }, { "epoch": 0.55, "grad_norm": 0.6630197762062282, "learning_rate": 8.902472313001863e-07, "loss": 0.537, "step": 4309 }, { "epoch": 0.55, "grad_norm": 0.8456538133432309, "learning_rate": 8.898370719934541e-07, "loss": 0.6959, "step": 4310 }, { "epoch": 0.55, "grad_norm": 0.8157688806982841, "learning_rate": 8.894269314463209e-07, "loss": 0.5515, "step": 4311 }, { "epoch": 0.55, "grad_norm": 0.7714051483240109, "learning_rate": 8.890168097286293e-07, "loss": 0.5564, "step": 4312 }, { "epoch": 0.55, "grad_norm": 0.856294607042504, "learning_rate": 8.886067069102188e-07, "loss": 0.6507, "step": 4313 }, { "epoch": 0.55, "grad_norm": 0.7001480784581334, "learning_rate": 8.881966230609253e-07, "loss": 0.5935, "step": 4314 }, { "epoch": 0.55, "grad_norm": 1.0243738644820575, "learning_rate": 8.877865582505826e-07, "loss": 0.7513, "step": 4315 }, { "epoch": 0.55, "grad_norm": 0.7980486920681704, "learning_rate": 8.873765125490198e-07, "loss": 0.6407, "step": 4316 }, { "epoch": 0.55, "grad_norm": 0.8899862037077803, "learning_rate": 8.869664860260638e-07, "loss": 0.6786, "step": 4317 }, { "epoch": 0.55, "grad_norm": 0.9578675815887929, "learning_rate": 8.865564787515374e-07, "loss": 0.6436, "step": 4318 }, { "epoch": 0.55, "grad_norm": 0.9347230672494605, "learning_rate": 8.86146490795261e-07, "loss": 0.5932, "step": 4319 }, { "epoch": 0.55, "grad_norm": 0.7594051120356744, "learning_rate": 8.85736522227051e-07, "loss": 0.6453, "step": 4320 }, { "epoch": 0.55, "grad_norm": 0.6622924231339418, "learning_rate": 8.853265731167209e-07, "loss": 0.5969, "step": 4321 }, { "epoch": 0.55, "grad_norm": 0.9221515064131514, "learning_rate": 8.849166435340808e-07, "loss": 0.68, "step": 4322 }, { "epoch": 0.55, "grad_norm": 1.111433727786823, "learning_rate": 8.845067335489373e-07, "loss": 0.5907, "step": 4323 }, { "epoch": 0.55, "grad_norm": 0.7451623818425012, "learning_rate": 8.840968432310939e-07, "loss": 0.6289, "step": 4324 }, { "epoch": 0.55, "grad_norm": 0.8002511370589401, "learning_rate": 8.836869726503506e-07, "loss": 0.5754, "step": 4325 }, { "epoch": 0.55, "grad_norm": 0.9837739990994597, "learning_rate": 8.832771218765043e-07, "loss": 0.6249, "step": 4326 }, { "epoch": 0.55, "grad_norm": 0.7656427804748445, "learning_rate": 8.828672909793477e-07, "loss": 0.6826, "step": 4327 }, { "epoch": 0.55, "grad_norm": 0.9315810794760181, "learning_rate": 8.824574800286715e-07, "loss": 0.5702, "step": 4328 }, { "epoch": 0.55, "grad_norm": 0.9739677109187496, "learning_rate": 8.820476890942618e-07, "loss": 0.7022, "step": 4329 }, { "epoch": 0.55, "grad_norm": 0.7862195473905366, "learning_rate": 8.81637918245902e-07, "loss": 0.6509, "step": 4330 }, { "epoch": 0.55, "grad_norm": 0.6116817229526151, "learning_rate": 8.812281675533715e-07, "loss": 0.5643, "step": 4331 }, { "epoch": 0.55, "grad_norm": 0.7945559412592476, "learning_rate": 8.808184370864467e-07, "loss": 0.635, "step": 4332 }, { "epoch": 0.55, "grad_norm": 0.9161091054723991, "learning_rate": 8.804087269149006e-07, "loss": 0.618, "step": 4333 }, { "epoch": 0.55, "grad_norm": 0.6730060510387534, "learning_rate": 8.799990371085022e-07, "loss": 0.5561, "step": 4334 }, { "epoch": 0.55, "grad_norm": 0.5714905441344984, "learning_rate": 8.795893677370176e-07, "loss": 0.5101, "step": 4335 }, { "epoch": 0.55, "grad_norm": 0.8273489649008264, "learning_rate": 8.791797188702095e-07, "loss": 0.607, "step": 4336 }, { "epoch": 0.55, "grad_norm": 0.9701996840807429, "learning_rate": 8.787700905778363e-07, "loss": 0.6709, "step": 4337 }, { "epoch": 0.55, "grad_norm": 0.9998299616877546, "learning_rate": 8.78360482929654e-07, "loss": 0.7, "step": 4338 }, { "epoch": 0.55, "grad_norm": 0.6526094794044471, "learning_rate": 8.77950895995414e-07, "loss": 0.525, "step": 4339 }, { "epoch": 0.55, "grad_norm": 0.8686475812395749, "learning_rate": 8.775413298448647e-07, "loss": 0.6289, "step": 4340 }, { "epoch": 0.55, "grad_norm": 0.6598174778975326, "learning_rate": 8.771317845477515e-07, "loss": 0.5122, "step": 4341 }, { "epoch": 0.55, "grad_norm": 2.04954213513913, "learning_rate": 8.767222601738154e-07, "loss": 0.641, "step": 4342 }, { "epoch": 0.55, "grad_norm": 0.8685400399035179, "learning_rate": 8.763127567927939e-07, "loss": 0.646, "step": 4343 }, { "epoch": 0.55, "grad_norm": 0.8192464439169792, "learning_rate": 8.759032744744214e-07, "loss": 0.6072, "step": 4344 }, { "epoch": 0.55, "grad_norm": 1.7489210133290618, "learning_rate": 8.754938132884285e-07, "loss": 0.6366, "step": 4345 }, { "epoch": 0.55, "grad_norm": 0.8562919675452393, "learning_rate": 8.750843733045421e-07, "loss": 0.5879, "step": 4346 }, { "epoch": 0.55, "grad_norm": 0.6834417499868792, "learning_rate": 8.746749545924856e-07, "loss": 0.5602, "step": 4347 }, { "epoch": 0.55, "grad_norm": 0.6921782362157067, "learning_rate": 8.742655572219781e-07, "loss": 0.5603, "step": 4348 }, { "epoch": 0.55, "grad_norm": 0.6702685895705479, "learning_rate": 8.73856181262737e-07, "loss": 0.4854, "step": 4349 }, { "epoch": 0.55, "grad_norm": 0.8258815100226267, "learning_rate": 8.734468267844739e-07, "loss": 0.5865, "step": 4350 }, { "epoch": 0.55, "grad_norm": 0.8945023333176835, "learning_rate": 8.730374938568978e-07, "loss": 0.6034, "step": 4351 }, { "epoch": 0.55, "grad_norm": 0.953152374906618, "learning_rate": 8.726281825497139e-07, "loss": 0.6409, "step": 4352 }, { "epoch": 0.55, "grad_norm": 0.8105626421193868, "learning_rate": 8.722188929326235e-07, "loss": 0.6261, "step": 4353 }, { "epoch": 0.55, "grad_norm": 0.9839243328410233, "learning_rate": 8.718096250753245e-07, "loss": 0.6381, "step": 4354 }, { "epoch": 0.55, "grad_norm": 0.7892151402751505, "learning_rate": 8.714003790475106e-07, "loss": 0.5957, "step": 4355 }, { "epoch": 0.55, "grad_norm": 0.6128995201935047, "learning_rate": 8.709911549188727e-07, "loss": 0.4879, "step": 4356 }, { "epoch": 0.56, "grad_norm": 0.6598930583064968, "learning_rate": 8.70581952759097e-07, "loss": 0.5265, "step": 4357 }, { "epoch": 0.56, "grad_norm": 0.9934205399315076, "learning_rate": 8.701727726378665e-07, "loss": 0.6281, "step": 4358 }, { "epoch": 0.56, "grad_norm": 0.7137625407409584, "learning_rate": 8.6976361462486e-07, "loss": 0.5735, "step": 4359 }, { "epoch": 0.56, "grad_norm": 0.7889458367502884, "learning_rate": 8.693544787897533e-07, "loss": 0.5324, "step": 4360 }, { "epoch": 0.56, "grad_norm": 0.6743913393827506, "learning_rate": 8.68945365202218e-07, "loss": 0.5435, "step": 4361 }, { "epoch": 0.56, "grad_norm": 0.754816607889499, "learning_rate": 8.685362739319215e-07, "loss": 0.5406, "step": 4362 }, { "epoch": 0.56, "grad_norm": 1.0339312428289114, "learning_rate": 8.681272050485282e-07, "loss": 0.6476, "step": 4363 }, { "epoch": 0.56, "grad_norm": 0.8828034006597015, "learning_rate": 8.677181586216978e-07, "loss": 0.5851, "step": 4364 }, { "epoch": 0.56, "grad_norm": 1.0901390978262449, "learning_rate": 8.67309134721087e-07, "loss": 0.5864, "step": 4365 }, { "epoch": 0.56, "grad_norm": 0.8774822801944981, "learning_rate": 8.669001334163483e-07, "loss": 0.6177, "step": 4366 }, { "epoch": 0.56, "grad_norm": 0.9698201288952298, "learning_rate": 8.6649115477713e-07, "loss": 0.6376, "step": 4367 }, { "epoch": 0.56, "grad_norm": 0.7167055012005153, "learning_rate": 8.660821988730775e-07, "loss": 0.6135, "step": 4368 }, { "epoch": 0.56, "grad_norm": 0.6766076268670432, "learning_rate": 8.65673265773831e-07, "loss": 0.5462, "step": 4369 }, { "epoch": 0.56, "grad_norm": 0.8601286818164767, "learning_rate": 8.652643555490282e-07, "loss": 0.6669, "step": 4370 }, { "epoch": 0.56, "grad_norm": 1.0456136896856079, "learning_rate": 8.648554682683019e-07, "loss": 0.6421, "step": 4371 }, { "epoch": 0.56, "grad_norm": 0.8218063040494518, "learning_rate": 8.644466040012812e-07, "loss": 0.5018, "step": 4372 }, { "epoch": 0.56, "grad_norm": 0.6717431055626376, "learning_rate": 8.640377628175919e-07, "loss": 0.5744, "step": 4373 }, { "epoch": 0.56, "grad_norm": 0.7887283319809039, "learning_rate": 8.636289447868551e-07, "loss": 0.5466, "step": 4374 }, { "epoch": 0.56, "grad_norm": 0.7858057501818361, "learning_rate": 8.632201499786884e-07, "loss": 0.6062, "step": 4375 }, { "epoch": 0.56, "grad_norm": 0.8673568392837707, "learning_rate": 8.628113784627051e-07, "loss": 0.6158, "step": 4376 }, { "epoch": 0.56, "grad_norm": 0.7034664482802332, "learning_rate": 8.624026303085149e-07, "loss": 0.5608, "step": 4377 }, { "epoch": 0.56, "grad_norm": 0.8124035468770514, "learning_rate": 8.619939055857232e-07, "loss": 0.6704, "step": 4378 }, { "epoch": 0.56, "grad_norm": 0.8988299765412034, "learning_rate": 8.615852043639317e-07, "loss": 0.5984, "step": 4379 }, { "epoch": 0.56, "grad_norm": 0.9003321581839285, "learning_rate": 8.611765267127379e-07, "loss": 0.5741, "step": 4380 }, { "epoch": 0.56, "grad_norm": 1.6989153265172532, "learning_rate": 8.607678727017354e-07, "loss": 0.6974, "step": 4381 }, { "epoch": 0.56, "grad_norm": 1.0373341257473043, "learning_rate": 8.603592424005135e-07, "loss": 0.698, "step": 4382 }, { "epoch": 0.56, "grad_norm": 0.9295846536302472, "learning_rate": 8.599506358786579e-07, "loss": 0.5879, "step": 4383 }, { "epoch": 0.56, "grad_norm": 0.9416069470398654, "learning_rate": 8.5954205320575e-07, "loss": 0.642, "step": 4384 }, { "epoch": 0.56, "grad_norm": 0.8115604261071249, "learning_rate": 8.59133494451367e-07, "loss": 0.6357, "step": 4385 }, { "epoch": 0.56, "grad_norm": 0.6589826222363065, "learning_rate": 8.587249596850826e-07, "loss": 0.5238, "step": 4386 }, { "epoch": 0.56, "grad_norm": 0.8303940297314657, "learning_rate": 8.583164489764657e-07, "loss": 0.5636, "step": 4387 }, { "epoch": 0.56, "grad_norm": 0.8608177759408698, "learning_rate": 8.579079623950815e-07, "loss": 0.6173, "step": 4388 }, { "epoch": 0.56, "grad_norm": 1.0461245382223046, "learning_rate": 8.57499500010491e-07, "loss": 0.6193, "step": 4389 }, { "epoch": 0.56, "grad_norm": 0.6855916166681705, "learning_rate": 8.570910618922512e-07, "loss": 0.539, "step": 4390 }, { "epoch": 0.56, "grad_norm": 0.9313322121941922, "learning_rate": 8.566826481099146e-07, "loss": 0.6586, "step": 4391 }, { "epoch": 0.56, "grad_norm": 3.9925538461902144, "learning_rate": 8.5627425873303e-07, "loss": 0.631, "step": 4392 }, { "epoch": 0.56, "grad_norm": 0.9254096673534389, "learning_rate": 8.558658938311418e-07, "loss": 0.6216, "step": 4393 }, { "epoch": 0.56, "grad_norm": 0.6978149943255844, "learning_rate": 8.554575534737903e-07, "loss": 0.593, "step": 4394 }, { "epoch": 0.56, "grad_norm": 0.7531397854943812, "learning_rate": 8.550492377305117e-07, "loss": 0.6041, "step": 4395 }, { "epoch": 0.56, "grad_norm": 0.7344864652395828, "learning_rate": 8.546409466708377e-07, "loss": 0.5318, "step": 4396 }, { "epoch": 0.56, "grad_norm": 0.8937678320199621, "learning_rate": 8.542326803642964e-07, "loss": 0.6101, "step": 4397 }, { "epoch": 0.56, "grad_norm": 0.8261304320800287, "learning_rate": 8.538244388804107e-07, "loss": 0.6235, "step": 4398 }, { "epoch": 0.56, "grad_norm": 0.7952779299378832, "learning_rate": 8.534162222887001e-07, "loss": 0.5805, "step": 4399 }, { "epoch": 0.56, "grad_norm": 0.5873623059793163, "learning_rate": 8.5300803065868e-07, "loss": 0.4971, "step": 4400 }, { "epoch": 0.56, "grad_norm": 0.6685141322953755, "learning_rate": 8.525998640598609e-07, "loss": 0.6068, "step": 4401 }, { "epoch": 0.56, "grad_norm": 1.025109335787052, "learning_rate": 8.521917225617494e-07, "loss": 0.6049, "step": 4402 }, { "epoch": 0.56, "grad_norm": 0.9571568649955058, "learning_rate": 8.517836062338477e-07, "loss": 0.6499, "step": 4403 }, { "epoch": 0.56, "grad_norm": 0.76438684892713, "learning_rate": 8.513755151456538e-07, "loss": 0.5865, "step": 4404 }, { "epoch": 0.56, "grad_norm": 0.6400721371976708, "learning_rate": 8.509674493666613e-07, "loss": 0.5642, "step": 4405 }, { "epoch": 0.56, "grad_norm": 1.0628893967632864, "learning_rate": 8.505594089663593e-07, "loss": 0.5987, "step": 4406 }, { "epoch": 0.56, "grad_norm": 0.9977226661958044, "learning_rate": 8.501513940142334e-07, "loss": 0.6465, "step": 4407 }, { "epoch": 0.56, "grad_norm": 0.7521716654381636, "learning_rate": 8.497434045797639e-07, "loss": 0.5639, "step": 4408 }, { "epoch": 0.56, "grad_norm": 0.6625186403754416, "learning_rate": 8.493354407324271e-07, "loss": 0.5507, "step": 4409 }, { "epoch": 0.56, "grad_norm": 0.6740161222925183, "learning_rate": 8.489275025416953e-07, "loss": 0.5613, "step": 4410 }, { "epoch": 0.56, "grad_norm": 1.0391103162816349, "learning_rate": 8.485195900770359e-07, "loss": 0.6512, "step": 4411 }, { "epoch": 0.56, "grad_norm": 0.6406993195800389, "learning_rate": 8.481117034079121e-07, "loss": 0.5216, "step": 4412 }, { "epoch": 0.56, "grad_norm": 0.5839211063909073, "learning_rate": 8.477038426037828e-07, "loss": 0.5483, "step": 4413 }, { "epoch": 0.56, "grad_norm": 0.6870323192384473, "learning_rate": 8.472960077341027e-07, "loss": 0.5327, "step": 4414 }, { "epoch": 0.56, "grad_norm": 0.8262537355135767, "learning_rate": 8.468881988683216e-07, "loss": 0.5928, "step": 4415 }, { "epoch": 0.56, "grad_norm": 0.6417359010071221, "learning_rate": 8.46480416075885e-07, "loss": 0.4852, "step": 4416 }, { "epoch": 0.56, "grad_norm": 0.9154321503081716, "learning_rate": 8.460726594262344e-07, "loss": 0.6106, "step": 4417 }, { "epoch": 0.56, "grad_norm": 0.8010739754806401, "learning_rate": 8.456649289888062e-07, "loss": 0.6741, "step": 4418 }, { "epoch": 0.56, "grad_norm": 0.8075288073734203, "learning_rate": 8.452572248330327e-07, "loss": 0.6186, "step": 4419 }, { "epoch": 0.56, "grad_norm": 0.7889541622498499, "learning_rate": 8.448495470283416e-07, "loss": 0.5984, "step": 4420 }, { "epoch": 0.56, "grad_norm": 0.9192773921998046, "learning_rate": 8.444418956441564e-07, "loss": 0.5825, "step": 4421 }, { "epoch": 0.56, "grad_norm": 0.6886975534113211, "learning_rate": 8.44034270749896e-07, "loss": 0.5365, "step": 4422 }, { "epoch": 0.56, "grad_norm": 0.6955568301630137, "learning_rate": 8.436266724149742e-07, "loss": 0.5554, "step": 4423 }, { "epoch": 0.56, "grad_norm": 0.749267186644043, "learning_rate": 8.43219100708801e-07, "loss": 0.5206, "step": 4424 }, { "epoch": 0.56, "grad_norm": 0.7754763325920387, "learning_rate": 8.428115557007816e-07, "loss": 0.5981, "step": 4425 }, { "epoch": 0.56, "grad_norm": 0.904184734458228, "learning_rate": 8.424040374603167e-07, "loss": 0.6541, "step": 4426 }, { "epoch": 0.56, "grad_norm": 0.6439836598345291, "learning_rate": 8.419965460568024e-07, "loss": 0.53, "step": 4427 }, { "epoch": 0.56, "grad_norm": 0.6301531974679986, "learning_rate": 8.415890815596302e-07, "loss": 0.5378, "step": 4428 }, { "epoch": 0.56, "grad_norm": 0.7927964117573223, "learning_rate": 8.411816440381873e-07, "loss": 0.5789, "step": 4429 }, { "epoch": 0.56, "grad_norm": 0.8502649303013989, "learning_rate": 8.407742335618558e-07, "loss": 0.55, "step": 4430 }, { "epoch": 0.56, "grad_norm": 0.8651862909217385, "learning_rate": 8.403668502000134e-07, "loss": 0.6556, "step": 4431 }, { "epoch": 0.56, "grad_norm": 0.6917982362734562, "learning_rate": 8.399594940220335e-07, "loss": 0.5235, "step": 4432 }, { "epoch": 0.56, "grad_norm": 1.1022455507397788, "learning_rate": 8.395521650972845e-07, "loss": 0.5916, "step": 4433 }, { "epoch": 0.56, "grad_norm": 0.6359777780117996, "learning_rate": 8.391448634951302e-07, "loss": 0.4786, "step": 4434 }, { "epoch": 0.57, "grad_norm": 0.7952608177843232, "learning_rate": 8.387375892849298e-07, "loss": 0.6167, "step": 4435 }, { "epoch": 0.57, "grad_norm": 0.8173152019312616, "learning_rate": 8.38330342536038e-07, "loss": 0.6203, "step": 4436 }, { "epoch": 0.57, "grad_norm": 0.7995853170433503, "learning_rate": 8.379231233178046e-07, "loss": 0.5438, "step": 4437 }, { "epoch": 0.57, "grad_norm": 0.6736881049979994, "learning_rate": 8.375159316995745e-07, "loss": 0.4624, "step": 4438 }, { "epoch": 0.57, "grad_norm": 0.6326873104560141, "learning_rate": 8.371087677506887e-07, "loss": 0.5435, "step": 4439 }, { "epoch": 0.57, "grad_norm": 0.7577139148618389, "learning_rate": 8.367016315404829e-07, "loss": 0.5268, "step": 4440 }, { "epoch": 0.57, "grad_norm": 0.9494573405854854, "learning_rate": 8.362945231382878e-07, "loss": 0.6341, "step": 4441 }, { "epoch": 0.57, "grad_norm": 0.7646406077632162, "learning_rate": 8.358874426134301e-07, "loss": 0.5407, "step": 4442 }, { "epoch": 0.57, "grad_norm": 1.1090071477377337, "learning_rate": 8.354803900352312e-07, "loss": 0.6596, "step": 4443 }, { "epoch": 0.57, "grad_norm": 0.8438761177233356, "learning_rate": 8.35073365473008e-07, "loss": 0.5935, "step": 4444 }, { "epoch": 0.57, "grad_norm": 0.7812584317672604, "learning_rate": 8.346663689960723e-07, "loss": 0.5743, "step": 4445 }, { "epoch": 0.57, "grad_norm": 0.6987482571055537, "learning_rate": 8.342594006737316e-07, "loss": 0.5214, "step": 4446 }, { "epoch": 0.57, "grad_norm": 0.7976585141054953, "learning_rate": 8.338524605752884e-07, "loss": 0.6305, "step": 4447 }, { "epoch": 0.57, "grad_norm": 0.7003560386565325, "learning_rate": 8.334455487700401e-07, "loss": 0.5496, "step": 4448 }, { "epoch": 0.57, "grad_norm": 0.7185814923597355, "learning_rate": 8.330386653272793e-07, "loss": 0.5472, "step": 4449 }, { "epoch": 0.57, "grad_norm": 0.7041345390072647, "learning_rate": 8.326318103162948e-07, "loss": 0.5261, "step": 4450 }, { "epoch": 0.57, "grad_norm": 0.8159515849511303, "learning_rate": 8.322249838063694e-07, "loss": 0.5956, "step": 4451 }, { "epoch": 0.57, "grad_norm": 1.0231998869245233, "learning_rate": 8.318181858667814e-07, "loss": 0.6768, "step": 4452 }, { "epoch": 0.57, "grad_norm": 0.6276018783962658, "learning_rate": 8.314114165668043e-07, "loss": 0.5591, "step": 4453 }, { "epoch": 0.57, "grad_norm": 0.7976296813635937, "learning_rate": 8.310046759757066e-07, "loss": 0.6015, "step": 4454 }, { "epoch": 0.57, "grad_norm": 0.8951627493784508, "learning_rate": 8.305979641627519e-07, "loss": 0.677, "step": 4455 }, { "epoch": 0.57, "grad_norm": 0.806826869495861, "learning_rate": 8.301912811971992e-07, "loss": 0.6101, "step": 4456 }, { "epoch": 0.57, "grad_norm": 0.8597374286665916, "learning_rate": 8.29784627148302e-07, "loss": 0.6084, "step": 4457 }, { "epoch": 0.57, "grad_norm": 0.8576328131619287, "learning_rate": 8.293780020853096e-07, "loss": 0.6439, "step": 4458 }, { "epoch": 0.57, "grad_norm": 0.832415776489216, "learning_rate": 8.289714060774661e-07, "loss": 0.6707, "step": 4459 }, { "epoch": 0.57, "grad_norm": 0.8876866668275865, "learning_rate": 8.285648391940103e-07, "loss": 0.6066, "step": 4460 }, { "epoch": 0.57, "grad_norm": 0.9658130819064014, "learning_rate": 8.281583015041765e-07, "loss": 0.6451, "step": 4461 }, { "epoch": 0.57, "grad_norm": 0.8210744129302442, "learning_rate": 8.277517930771936e-07, "loss": 0.61, "step": 4462 }, { "epoch": 0.57, "grad_norm": 0.6808304848564876, "learning_rate": 8.27345313982286e-07, "loss": 0.5173, "step": 4463 }, { "epoch": 0.57, "grad_norm": 0.8424003101607799, "learning_rate": 8.269388642886725e-07, "loss": 0.6221, "step": 4464 }, { "epoch": 0.57, "grad_norm": 0.5855964935998733, "learning_rate": 8.265324440655676e-07, "loss": 0.5107, "step": 4465 }, { "epoch": 0.57, "grad_norm": 1.0715582237692367, "learning_rate": 8.261260533821802e-07, "loss": 0.5997, "step": 4466 }, { "epoch": 0.57, "grad_norm": 0.6968121242968011, "learning_rate": 8.257196923077146e-07, "loss": 0.5774, "step": 4467 }, { "epoch": 0.57, "grad_norm": 0.7909278365337222, "learning_rate": 8.253133609113698e-07, "loss": 0.6105, "step": 4468 }, { "epoch": 0.57, "grad_norm": 0.8312639291648662, "learning_rate": 8.249070592623397e-07, "loss": 0.6261, "step": 4469 }, { "epoch": 0.57, "grad_norm": 0.6832492340332178, "learning_rate": 8.245007874298131e-07, "loss": 0.5708, "step": 4470 }, { "epoch": 0.57, "grad_norm": 0.8304565094430321, "learning_rate": 8.240945454829743e-07, "loss": 0.6103, "step": 4471 }, { "epoch": 0.57, "grad_norm": 0.7051691976784179, "learning_rate": 8.236883334910019e-07, "loss": 0.5851, "step": 4472 }, { "epoch": 0.57, "grad_norm": 0.6417564627866752, "learning_rate": 8.232821515230694e-07, "loss": 0.5162, "step": 4473 }, { "epoch": 0.57, "grad_norm": 0.7511251361959094, "learning_rate": 8.228759996483454e-07, "loss": 0.4704, "step": 4474 }, { "epoch": 0.57, "grad_norm": 0.9426632185414435, "learning_rate": 8.224698779359935e-07, "loss": 0.6123, "step": 4475 }, { "epoch": 0.57, "grad_norm": 0.6315561437743511, "learning_rate": 8.22063786455172e-07, "loss": 0.4892, "step": 4476 }, { "epoch": 0.57, "grad_norm": 0.9648121440106838, "learning_rate": 8.216577252750338e-07, "loss": 0.6213, "step": 4477 }, { "epoch": 0.57, "grad_norm": 0.8710801020070927, "learning_rate": 8.212516944647269e-07, "loss": 0.5275, "step": 4478 }, { "epoch": 0.57, "grad_norm": 1.1012890650463816, "learning_rate": 8.208456940933946e-07, "loss": 0.6352, "step": 4479 }, { "epoch": 0.57, "grad_norm": 0.7390421353193901, "learning_rate": 8.204397242301738e-07, "loss": 0.5665, "step": 4480 }, { "epoch": 0.57, "grad_norm": 0.9599406650057072, "learning_rate": 8.200337849441976e-07, "loss": 0.6287, "step": 4481 }, { "epoch": 0.57, "grad_norm": 0.8878454744602954, "learning_rate": 8.196278763045927e-07, "loss": 0.65, "step": 4482 }, { "epoch": 0.57, "grad_norm": 0.6508726349286498, "learning_rate": 8.192219983804814e-07, "loss": 0.4902, "step": 4483 }, { "epoch": 0.57, "grad_norm": 0.6599468058205433, "learning_rate": 8.188161512409806e-07, "loss": 0.5485, "step": 4484 }, { "epoch": 0.57, "grad_norm": 0.9323491640592634, "learning_rate": 8.184103349552016e-07, "loss": 0.576, "step": 4485 }, { "epoch": 0.57, "grad_norm": 0.8556299488864367, "learning_rate": 8.180045495922508e-07, "loss": 0.6371, "step": 4486 }, { "epoch": 0.57, "grad_norm": 0.7877193821817254, "learning_rate": 8.175987952212291e-07, "loss": 0.5929, "step": 4487 }, { "epoch": 0.57, "grad_norm": 0.6662714781629077, "learning_rate": 8.171930719112326e-07, "loss": 0.4956, "step": 4488 }, { "epoch": 0.57, "grad_norm": 0.6089862583928608, "learning_rate": 8.167873797313511e-07, "loss": 0.563, "step": 4489 }, { "epoch": 0.57, "grad_norm": 0.9050876520646741, "learning_rate": 8.163817187506704e-07, "loss": 0.6501, "step": 4490 }, { "epoch": 0.57, "grad_norm": 0.6613344083618417, "learning_rate": 8.159760890382701e-07, "loss": 0.5279, "step": 4491 }, { "epoch": 0.57, "grad_norm": 0.7144884356138853, "learning_rate": 8.155704906632245e-07, "loss": 0.5694, "step": 4492 }, { "epoch": 0.57, "grad_norm": 0.9576235890353428, "learning_rate": 8.151649236946031e-07, "loss": 0.6127, "step": 4493 }, { "epoch": 0.57, "grad_norm": 0.8414381080095936, "learning_rate": 8.147593882014696e-07, "loss": 0.631, "step": 4494 }, { "epoch": 0.57, "grad_norm": 0.8720951839570097, "learning_rate": 8.143538842528824e-07, "loss": 0.5934, "step": 4495 }, { "epoch": 0.57, "grad_norm": 0.6435782029534615, "learning_rate": 8.139484119178946e-07, "loss": 0.5045, "step": 4496 }, { "epoch": 0.57, "grad_norm": 0.7764288454639161, "learning_rate": 8.135429712655541e-07, "loss": 0.5299, "step": 4497 }, { "epoch": 0.57, "grad_norm": 0.8911294909684478, "learning_rate": 8.13137562364903e-07, "loss": 0.6067, "step": 4498 }, { "epoch": 0.57, "grad_norm": 0.6173502374664481, "learning_rate": 8.127321852849784e-07, "loss": 0.5823, "step": 4499 }, { "epoch": 0.57, "grad_norm": 0.7245969396212082, "learning_rate": 8.123268400948117e-07, "loss": 0.5413, "step": 4500 }, { "epoch": 0.57, "grad_norm": 0.6390509947268154, "learning_rate": 8.119215268634289e-07, "loss": 0.5475, "step": 4501 }, { "epoch": 0.57, "grad_norm": 0.631263114402124, "learning_rate": 8.115162456598508e-07, "loss": 0.5432, "step": 4502 }, { "epoch": 0.57, "grad_norm": 0.8725795895470396, "learning_rate": 8.111109965530924e-07, "loss": 0.6916, "step": 4503 }, { "epoch": 0.57, "grad_norm": 0.8605659700950471, "learning_rate": 8.107057796121634e-07, "loss": 0.5915, "step": 4504 }, { "epoch": 0.57, "grad_norm": 0.7596198436019139, "learning_rate": 8.103005949060682e-07, "loss": 0.522, "step": 4505 }, { "epoch": 0.57, "grad_norm": 0.6673171882155366, "learning_rate": 8.098954425038054e-07, "loss": 0.5278, "step": 4506 }, { "epoch": 0.57, "grad_norm": 0.6122869190020669, "learning_rate": 8.094903224743681e-07, "loss": 0.5329, "step": 4507 }, { "epoch": 0.57, "grad_norm": 0.8100072013592492, "learning_rate": 8.090852348867443e-07, "loss": 0.5895, "step": 4508 }, { "epoch": 0.57, "grad_norm": 1.1460229237500226, "learning_rate": 8.086801798099161e-07, "loss": 0.6704, "step": 4509 }, { "epoch": 0.57, "grad_norm": 0.9797330493978239, "learning_rate": 8.082751573128598e-07, "loss": 0.5954, "step": 4510 }, { "epoch": 0.57, "grad_norm": 0.584449722409891, "learning_rate": 8.078701674645471e-07, "loss": 0.4905, "step": 4511 }, { "epoch": 0.57, "grad_norm": 0.7720810547370168, "learning_rate": 8.074652103339432e-07, "loss": 0.5545, "step": 4512 }, { "epoch": 0.57, "grad_norm": 0.7362052527979962, "learning_rate": 8.070602859900083e-07, "loss": 0.5406, "step": 4513 }, { "epoch": 0.58, "grad_norm": 0.7945417463210773, "learning_rate": 8.066553945016966e-07, "loss": 0.681, "step": 4514 }, { "epoch": 0.58, "grad_norm": 0.6260393520427715, "learning_rate": 8.06250535937957e-07, "loss": 0.5359, "step": 4515 }, { "epoch": 0.58, "grad_norm": 0.6871108136460321, "learning_rate": 8.058457103677327e-07, "loss": 0.5849, "step": 4516 }, { "epoch": 0.58, "grad_norm": 0.8361762324012048, "learning_rate": 8.05440917859961e-07, "loss": 0.5749, "step": 4517 }, { "epoch": 0.58, "grad_norm": 0.7605161584032075, "learning_rate": 8.050361584835743e-07, "loss": 0.524, "step": 4518 }, { "epoch": 0.58, "grad_norm": 0.8467409857792648, "learning_rate": 8.046314323074988e-07, "loss": 0.6454, "step": 4519 }, { "epoch": 0.58, "grad_norm": 0.8506852896050776, "learning_rate": 8.042267394006548e-07, "loss": 0.618, "step": 4520 }, { "epoch": 0.58, "grad_norm": 0.8885013934131349, "learning_rate": 8.038220798319576e-07, "loss": 0.658, "step": 4521 }, { "epoch": 0.58, "grad_norm": 0.769174884191062, "learning_rate": 8.034174536703165e-07, "loss": 0.5353, "step": 4522 }, { "epoch": 0.58, "grad_norm": 0.7919686173296258, "learning_rate": 8.030128609846349e-07, "loss": 0.552, "step": 4523 }, { "epoch": 0.58, "grad_norm": 0.7865025008807413, "learning_rate": 8.026083018438109e-07, "loss": 0.6054, "step": 4524 }, { "epoch": 0.58, "grad_norm": 0.6988040675747746, "learning_rate": 8.022037763167367e-07, "loss": 0.4998, "step": 4525 }, { "epoch": 0.58, "grad_norm": 0.9151759860968967, "learning_rate": 8.017992844722989e-07, "loss": 0.6446, "step": 4526 }, { "epoch": 0.58, "grad_norm": 0.7666409570659523, "learning_rate": 8.013948263793782e-07, "loss": 0.5813, "step": 4527 }, { "epoch": 0.58, "grad_norm": 0.8306154507543775, "learning_rate": 8.009904021068493e-07, "loss": 0.6161, "step": 4528 }, { "epoch": 0.58, "grad_norm": 0.9117827068480757, "learning_rate": 8.005860117235817e-07, "loss": 0.6148, "step": 4529 }, { "epoch": 0.58, "grad_norm": 0.951092141154977, "learning_rate": 8.001816552984389e-07, "loss": 0.6443, "step": 4530 }, { "epoch": 0.58, "grad_norm": 0.6181074404591639, "learning_rate": 7.997773329002784e-07, "loss": 0.491, "step": 4531 }, { "epoch": 0.58, "grad_norm": 0.8591603109391228, "learning_rate": 7.993730445979523e-07, "loss": 0.6089, "step": 4532 }, { "epoch": 0.58, "grad_norm": 0.8282227697844028, "learning_rate": 7.989687904603065e-07, "loss": 0.6108, "step": 4533 }, { "epoch": 0.58, "grad_norm": 0.8685270249658449, "learning_rate": 7.985645705561814e-07, "loss": 0.6437, "step": 4534 }, { "epoch": 0.58, "grad_norm": 1.0984010570407696, "learning_rate": 7.981603849544115e-07, "loss": 0.6324, "step": 4535 }, { "epoch": 0.58, "grad_norm": 0.6714605561062976, "learning_rate": 7.97756233723825e-07, "loss": 0.5327, "step": 4536 }, { "epoch": 0.58, "grad_norm": 0.6678223481535148, "learning_rate": 7.973521169332452e-07, "loss": 0.5127, "step": 4537 }, { "epoch": 0.58, "grad_norm": 0.6398739734070572, "learning_rate": 7.969480346514885e-07, "loss": 0.5101, "step": 4538 }, { "epoch": 0.58, "grad_norm": 0.7932789599755855, "learning_rate": 7.965439869473663e-07, "loss": 0.6047, "step": 4539 }, { "epoch": 0.58, "grad_norm": 1.8863438898874132, "learning_rate": 7.961399738896835e-07, "loss": 0.5954, "step": 4540 }, { "epoch": 0.58, "grad_norm": 0.7218634755510498, "learning_rate": 7.957359955472394e-07, "loss": 0.545, "step": 4541 }, { "epoch": 0.58, "grad_norm": 0.6818991810193427, "learning_rate": 7.953320519888272e-07, "loss": 0.5534, "step": 4542 }, { "epoch": 0.58, "grad_norm": 0.8394884998730925, "learning_rate": 7.949281432832343e-07, "loss": 0.5485, "step": 4543 }, { "epoch": 0.58, "grad_norm": 0.8388736100218905, "learning_rate": 7.945242694992422e-07, "loss": 0.6639, "step": 4544 }, { "epoch": 0.58, "grad_norm": 0.803462496959316, "learning_rate": 7.941204307056263e-07, "loss": 0.5732, "step": 4545 }, { "epoch": 0.58, "grad_norm": 0.7103044622661596, "learning_rate": 7.937166269711564e-07, "loss": 0.5311, "step": 4546 }, { "epoch": 0.58, "grad_norm": 0.7120974461642388, "learning_rate": 7.933128583645958e-07, "loss": 0.5487, "step": 4547 }, { "epoch": 0.58, "grad_norm": 0.609604761614603, "learning_rate": 7.929091249547022e-07, "loss": 0.4851, "step": 4548 }, { "epoch": 0.58, "grad_norm": 0.6428578128298722, "learning_rate": 7.92505426810227e-07, "loss": 0.5353, "step": 4549 }, { "epoch": 0.58, "grad_norm": 0.740511401528985, "learning_rate": 7.921017639999159e-07, "loss": 0.5669, "step": 4550 }, { "epoch": 0.58, "grad_norm": 0.7251425042785961, "learning_rate": 7.916981365925085e-07, "loss": 0.4702, "step": 4551 }, { "epoch": 0.58, "grad_norm": 0.7004291219783643, "learning_rate": 7.912945446567385e-07, "loss": 0.5412, "step": 4552 }, { "epoch": 0.58, "grad_norm": 0.9167653477705164, "learning_rate": 7.908909882613334e-07, "loss": 0.6827, "step": 4553 }, { "epoch": 0.58, "grad_norm": 0.676759473796595, "learning_rate": 7.904874674750143e-07, "loss": 0.489, "step": 4554 }, { "epoch": 0.58, "grad_norm": 0.7050430261469035, "learning_rate": 7.900839823664968e-07, "loss": 0.5355, "step": 4555 }, { "epoch": 0.58, "grad_norm": 0.9357232954766475, "learning_rate": 7.896805330044904e-07, "loss": 0.6487, "step": 4556 }, { "epoch": 0.58, "grad_norm": 0.9773619978800818, "learning_rate": 7.892771194576981e-07, "loss": 0.6116, "step": 4557 }, { "epoch": 0.58, "grad_norm": 0.8534888485786647, "learning_rate": 7.88873741794817e-07, "loss": 0.643, "step": 4558 }, { "epoch": 0.58, "grad_norm": 0.6260255360923213, "learning_rate": 7.884704000845383e-07, "loss": 0.5512, "step": 4559 }, { "epoch": 0.58, "grad_norm": 0.6552396664549461, "learning_rate": 7.880670943955467e-07, "loss": 0.5517, "step": 4560 }, { "epoch": 0.58, "grad_norm": 1.0272710256880404, "learning_rate": 7.876638247965209e-07, "loss": 0.5263, "step": 4561 }, { "epoch": 0.58, "grad_norm": 1.0271278113592697, "learning_rate": 7.872605913561339e-07, "loss": 0.6089, "step": 4562 }, { "epoch": 0.58, "grad_norm": 0.940157527057518, "learning_rate": 7.868573941430517e-07, "loss": 0.6399, "step": 4563 }, { "epoch": 0.58, "grad_norm": 1.1264549596729316, "learning_rate": 7.864542332259347e-07, "loss": 0.5761, "step": 4564 }, { "epoch": 0.58, "grad_norm": 0.8460134652616151, "learning_rate": 7.860511086734372e-07, "loss": 0.5935, "step": 4565 }, { "epoch": 0.58, "grad_norm": 0.7997918553895481, "learning_rate": 7.856480205542071e-07, "loss": 0.5244, "step": 4566 }, { "epoch": 0.58, "grad_norm": 1.041178810508615, "learning_rate": 7.852449689368859e-07, "loss": 0.6703, "step": 4567 }, { "epoch": 0.58, "grad_norm": 0.6753425393830857, "learning_rate": 7.848419538901088e-07, "loss": 0.5352, "step": 4568 }, { "epoch": 0.58, "grad_norm": 0.83424019214742, "learning_rate": 7.844389754825058e-07, "loss": 0.697, "step": 4569 }, { "epoch": 0.58, "grad_norm": 0.8955471174197359, "learning_rate": 7.840360337826997e-07, "loss": 0.584, "step": 4570 }, { "epoch": 0.58, "grad_norm": 0.7588449936114927, "learning_rate": 7.83633128859307e-07, "loss": 0.5939, "step": 4571 }, { "epoch": 0.58, "grad_norm": 0.7058234768819842, "learning_rate": 7.832302607809385e-07, "loss": 0.5297, "step": 4572 }, { "epoch": 0.58, "grad_norm": 0.725974614510219, "learning_rate": 7.828274296161981e-07, "loss": 0.5504, "step": 4573 }, { "epoch": 0.58, "grad_norm": 0.7989844424337375, "learning_rate": 7.824246354336841e-07, "loss": 0.6008, "step": 4574 }, { "epoch": 0.58, "grad_norm": 0.9142473221933964, "learning_rate": 7.820218783019879e-07, "loss": 0.648, "step": 4575 }, { "epoch": 0.58, "grad_norm": 0.7369134387935785, "learning_rate": 7.81619158289695e-07, "loss": 0.5695, "step": 4576 }, { "epoch": 0.58, "grad_norm": 1.0291555622531634, "learning_rate": 7.812164754653843e-07, "loss": 0.6861, "step": 4577 }, { "epoch": 0.58, "grad_norm": 0.6711406389690804, "learning_rate": 7.808138298976287e-07, "loss": 0.5435, "step": 4578 }, { "epoch": 0.58, "grad_norm": 0.954091642335392, "learning_rate": 7.804112216549944e-07, "loss": 0.5435, "step": 4579 }, { "epoch": 0.58, "grad_norm": 0.7918542949079569, "learning_rate": 7.800086508060413e-07, "loss": 0.6621, "step": 4580 }, { "epoch": 0.58, "grad_norm": 0.6853913497865066, "learning_rate": 7.796061174193229e-07, "loss": 0.5366, "step": 4581 }, { "epoch": 0.58, "grad_norm": 0.8532256376625035, "learning_rate": 7.792036215633869e-07, "loss": 0.6248, "step": 4582 }, { "epoch": 0.58, "grad_norm": 0.6363035220611221, "learning_rate": 7.788011633067739e-07, "loss": 0.5487, "step": 4583 }, { "epoch": 0.58, "grad_norm": 0.8979061268851936, "learning_rate": 7.783987427180184e-07, "loss": 0.6894, "step": 4584 }, { "epoch": 0.58, "grad_norm": 0.7577084445961293, "learning_rate": 7.779963598656483e-07, "loss": 0.5296, "step": 4585 }, { "epoch": 0.58, "grad_norm": 0.6912127071688652, "learning_rate": 7.775940148181854e-07, "loss": 0.5352, "step": 4586 }, { "epoch": 0.58, "grad_norm": 0.6312711928244904, "learning_rate": 7.771917076441448e-07, "loss": 0.5342, "step": 4587 }, { "epoch": 0.58, "grad_norm": 0.6987860867570039, "learning_rate": 7.767894384120351e-07, "loss": 0.5754, "step": 4588 }, { "epoch": 0.58, "grad_norm": 0.9412352851156882, "learning_rate": 7.763872071903585e-07, "loss": 0.6623, "step": 4589 }, { "epoch": 0.58, "grad_norm": 0.962573025438982, "learning_rate": 7.759850140476112e-07, "loss": 0.645, "step": 4590 }, { "epoch": 0.58, "grad_norm": 0.6116535077631905, "learning_rate": 7.755828590522821e-07, "loss": 0.5269, "step": 4591 }, { "epoch": 0.59, "grad_norm": 0.6710875214906262, "learning_rate": 7.751807422728541e-07, "loss": 0.5766, "step": 4592 }, { "epoch": 0.59, "grad_norm": 0.8557459342193208, "learning_rate": 7.747786637778036e-07, "loss": 0.5894, "step": 4593 }, { "epoch": 0.59, "grad_norm": 0.6329826791252896, "learning_rate": 7.743766236356001e-07, "loss": 0.5113, "step": 4594 }, { "epoch": 0.59, "grad_norm": 0.7782833048037182, "learning_rate": 7.739746219147074e-07, "loss": 0.5741, "step": 4595 }, { "epoch": 0.59, "grad_norm": 0.8354850734273055, "learning_rate": 7.735726586835817e-07, "loss": 0.6758, "step": 4596 }, { "epoch": 0.59, "grad_norm": 0.7251766516621277, "learning_rate": 7.731707340106735e-07, "loss": 0.5656, "step": 4597 }, { "epoch": 0.59, "grad_norm": 0.7143094153366544, "learning_rate": 7.72768847964426e-07, "loss": 0.5434, "step": 4598 }, { "epoch": 0.59, "grad_norm": 0.679445169344833, "learning_rate": 7.723670006132766e-07, "loss": 0.5778, "step": 4599 }, { "epoch": 0.59, "grad_norm": 0.6506971650996102, "learning_rate": 7.719651920256554e-07, "loss": 0.5564, "step": 4600 }, { "epoch": 0.59, "grad_norm": 0.6894419037479782, "learning_rate": 7.715634222699863e-07, "loss": 0.5488, "step": 4601 }, { "epoch": 0.59, "grad_norm": 0.6573239256783519, "learning_rate": 7.711616914146867e-07, "loss": 0.5392, "step": 4602 }, { "epoch": 0.59, "grad_norm": 0.705605050783217, "learning_rate": 7.707599995281669e-07, "loss": 0.5168, "step": 4603 }, { "epoch": 0.59, "grad_norm": 0.7851485071948949, "learning_rate": 7.70358346678831e-07, "loss": 0.6084, "step": 4604 }, { "epoch": 0.59, "grad_norm": 0.7146763912843771, "learning_rate": 7.699567329350764e-07, "loss": 0.517, "step": 4605 }, { "epoch": 0.59, "grad_norm": 0.800656471178309, "learning_rate": 7.695551583652935e-07, "loss": 0.6063, "step": 4606 }, { "epoch": 0.59, "grad_norm": 0.7172055415932195, "learning_rate": 7.691536230378663e-07, "loss": 0.5792, "step": 4607 }, { "epoch": 0.59, "grad_norm": 0.8427149182820386, "learning_rate": 7.687521270211722e-07, "loss": 0.6003, "step": 4608 }, { "epoch": 0.59, "grad_norm": 0.8010817335534846, "learning_rate": 7.683506703835819e-07, "loss": 0.5523, "step": 4609 }, { "epoch": 0.59, "grad_norm": 0.8856615881364245, "learning_rate": 7.67949253193459e-07, "loss": 0.6417, "step": 4610 }, { "epoch": 0.59, "grad_norm": 0.9174482137748715, "learning_rate": 7.675478755191608e-07, "loss": 0.66, "step": 4611 }, { "epoch": 0.59, "grad_norm": 0.7656927127188602, "learning_rate": 7.671465374290377e-07, "loss": 0.5733, "step": 4612 }, { "epoch": 0.59, "grad_norm": 0.5876054403084492, "learning_rate": 7.667452389914335e-07, "loss": 0.523, "step": 4613 }, { "epoch": 0.59, "grad_norm": 0.7781226375716676, "learning_rate": 7.66343980274685e-07, "loss": 0.5764, "step": 4614 }, { "epoch": 0.59, "grad_norm": 0.8877643606292442, "learning_rate": 7.659427613471225e-07, "loss": 0.5859, "step": 4615 }, { "epoch": 0.59, "grad_norm": 0.8260360219315455, "learning_rate": 7.655415822770694e-07, "loss": 0.5186, "step": 4616 }, { "epoch": 0.59, "grad_norm": 0.6782346620800964, "learning_rate": 7.651404431328422e-07, "loss": 0.5323, "step": 4617 }, { "epoch": 0.59, "grad_norm": 0.9761799686544933, "learning_rate": 7.647393439827509e-07, "loss": 0.5492, "step": 4618 }, { "epoch": 0.59, "grad_norm": 0.62295116664252, "learning_rate": 7.643382848950983e-07, "loss": 0.5185, "step": 4619 }, { "epoch": 0.59, "grad_norm": 0.7384212769707755, "learning_rate": 7.639372659381807e-07, "loss": 0.6122, "step": 4620 }, { "epoch": 0.59, "grad_norm": 0.6892532558942126, "learning_rate": 7.63536287180287e-07, "loss": 0.5369, "step": 4621 }, { "epoch": 0.59, "grad_norm": 0.9356120149106564, "learning_rate": 7.631353486897007e-07, "loss": 0.6643, "step": 4622 }, { "epoch": 0.59, "grad_norm": 0.698262273672187, "learning_rate": 7.627344505346967e-07, "loss": 0.5455, "step": 4623 }, { "epoch": 0.59, "grad_norm": 0.8420032236511614, "learning_rate": 7.623335927835439e-07, "loss": 0.6816, "step": 4624 }, { "epoch": 0.59, "grad_norm": 0.6954213438632111, "learning_rate": 7.619327755045044e-07, "loss": 0.5345, "step": 4625 }, { "epoch": 0.59, "grad_norm": 0.7359233502150834, "learning_rate": 7.615319987658329e-07, "loss": 0.5563, "step": 4626 }, { "epoch": 0.59, "grad_norm": 0.8008121403321444, "learning_rate": 7.611312626357778e-07, "loss": 0.6321, "step": 4627 }, { "epoch": 0.59, "grad_norm": 0.6168772001672514, "learning_rate": 7.607305671825801e-07, "loss": 0.5193, "step": 4628 }, { "epoch": 0.59, "grad_norm": 0.615696886486798, "learning_rate": 7.603299124744742e-07, "loss": 0.5458, "step": 4629 }, { "epoch": 0.59, "grad_norm": 0.9591415552767593, "learning_rate": 7.599292985796872e-07, "loss": 0.6429, "step": 4630 }, { "epoch": 0.59, "grad_norm": 0.8534921827141352, "learning_rate": 7.595287255664397e-07, "loss": 0.6668, "step": 4631 }, { "epoch": 0.59, "grad_norm": 0.7628529554156048, "learning_rate": 7.591281935029451e-07, "loss": 0.5185, "step": 4632 }, { "epoch": 0.59, "grad_norm": 0.864197104032635, "learning_rate": 7.587277024574097e-07, "loss": 0.6433, "step": 4633 }, { "epoch": 0.59, "grad_norm": 1.2150934010289645, "learning_rate": 7.583272524980328e-07, "loss": 0.6201, "step": 4634 }, { "epoch": 0.59, "grad_norm": 0.6417121943460334, "learning_rate": 7.579268436930071e-07, "loss": 0.5261, "step": 4635 }, { "epoch": 0.59, "grad_norm": 0.7844758660721878, "learning_rate": 7.575264761105183e-07, "loss": 0.5145, "step": 4636 }, { "epoch": 0.59, "grad_norm": 0.700112271081736, "learning_rate": 7.571261498187445e-07, "loss": 0.5941, "step": 4637 }, { "epoch": 0.59, "grad_norm": 0.7378502873485852, "learning_rate": 7.56725864885857e-07, "loss": 0.5766, "step": 4638 }, { "epoch": 0.59, "grad_norm": 0.6867059380167689, "learning_rate": 7.563256213800204e-07, "loss": 0.5108, "step": 4639 }, { "epoch": 0.59, "grad_norm": 0.7067113976408538, "learning_rate": 7.559254193693918e-07, "loss": 0.5378, "step": 4640 }, { "epoch": 0.59, "grad_norm": 0.7759131213204004, "learning_rate": 7.555252589221216e-07, "loss": 0.5896, "step": 4641 }, { "epoch": 0.59, "grad_norm": 0.8097277187549107, "learning_rate": 7.551251401063529e-07, "loss": 0.6013, "step": 4642 }, { "epoch": 0.59, "grad_norm": 0.6525574627584387, "learning_rate": 7.547250629902218e-07, "loss": 0.4476, "step": 4643 }, { "epoch": 0.59, "grad_norm": 0.6907440232419042, "learning_rate": 7.543250276418571e-07, "loss": 0.5555, "step": 4644 }, { "epoch": 0.59, "grad_norm": 0.8745761837626473, "learning_rate": 7.539250341293807e-07, "loss": 0.6538, "step": 4645 }, { "epoch": 0.59, "grad_norm": 0.6248851881105257, "learning_rate": 7.535250825209076e-07, "loss": 0.5661, "step": 4646 }, { "epoch": 0.59, "grad_norm": 0.7952465044865602, "learning_rate": 7.53125172884545e-07, "loss": 0.5478, "step": 4647 }, { "epoch": 0.59, "grad_norm": 0.8807436548110773, "learning_rate": 7.527253052883937e-07, "loss": 0.6771, "step": 4648 }, { "epoch": 0.59, "grad_norm": 0.7399930567277663, "learning_rate": 7.523254798005467e-07, "loss": 0.5753, "step": 4649 }, { "epoch": 0.59, "grad_norm": 0.9029797464352047, "learning_rate": 7.519256964890904e-07, "loss": 0.6368, "step": 4650 }, { "epoch": 0.59, "grad_norm": 0.8460450898321868, "learning_rate": 7.515259554221034e-07, "loss": 0.6524, "step": 4651 }, { "epoch": 0.59, "grad_norm": 1.4870350968799977, "learning_rate": 7.511262566676573e-07, "loss": 0.6217, "step": 4652 }, { "epoch": 0.59, "grad_norm": 0.7483945037386243, "learning_rate": 7.507266002938173e-07, "loss": 0.5817, "step": 4653 }, { "epoch": 0.59, "grad_norm": 0.962709854634627, "learning_rate": 7.503269863686404e-07, "loss": 0.68, "step": 4654 }, { "epoch": 0.59, "grad_norm": 0.6005231109259365, "learning_rate": 7.499274149601767e-07, "loss": 0.546, "step": 4655 }, { "epoch": 0.59, "grad_norm": 0.8678806913637388, "learning_rate": 7.495278861364688e-07, "loss": 0.6056, "step": 4656 }, { "epoch": 0.59, "grad_norm": 0.794653227524971, "learning_rate": 7.491283999655527e-07, "loss": 0.577, "step": 4657 }, { "epoch": 0.59, "grad_norm": 0.8479838697856121, "learning_rate": 7.487289565154564e-07, "loss": 0.5684, "step": 4658 }, { "epoch": 0.59, "grad_norm": 1.0609542765235078, "learning_rate": 7.483295558542011e-07, "loss": 0.6208, "step": 4659 }, { "epoch": 0.59, "grad_norm": 0.9296092999805887, "learning_rate": 7.479301980498005e-07, "loss": 0.6197, "step": 4660 }, { "epoch": 0.59, "grad_norm": 1.0021656146874283, "learning_rate": 7.475308831702613e-07, "loss": 0.6318, "step": 4661 }, { "epoch": 0.59, "grad_norm": 0.7611505002022495, "learning_rate": 7.471316112835824e-07, "loss": 0.5964, "step": 4662 }, { "epoch": 0.59, "grad_norm": 0.7548929860805427, "learning_rate": 7.467323824577556e-07, "loss": 0.5403, "step": 4663 }, { "epoch": 0.59, "grad_norm": 0.6718064505765661, "learning_rate": 7.463331967607658e-07, "loss": 0.565, "step": 4664 }, { "epoch": 0.59, "grad_norm": 0.8805517177677572, "learning_rate": 7.459340542605898e-07, "loss": 0.5443, "step": 4665 }, { "epoch": 0.59, "grad_norm": 0.9371012558059065, "learning_rate": 7.455349550251973e-07, "loss": 0.6207, "step": 4666 }, { "epoch": 0.59, "grad_norm": 0.9337285013399496, "learning_rate": 7.451358991225511e-07, "loss": 0.5988, "step": 4667 }, { "epoch": 0.59, "grad_norm": 0.8131459584864631, "learning_rate": 7.447368866206062e-07, "loss": 0.5527, "step": 4668 }, { "epoch": 0.59, "grad_norm": 0.6709459238126295, "learning_rate": 7.443379175873101e-07, "loss": 0.5699, "step": 4669 }, { "epoch": 0.59, "grad_norm": 0.8232073721970536, "learning_rate": 7.439389920906034e-07, "loss": 0.6442, "step": 4670 }, { "epoch": 0.6, "grad_norm": 1.3781458671367686, "learning_rate": 7.435401101984184e-07, "loss": 0.6534, "step": 4671 }, { "epoch": 0.6, "grad_norm": 0.8325234841060969, "learning_rate": 7.431412719786812e-07, "loss": 0.6457, "step": 4672 }, { "epoch": 0.6, "grad_norm": 0.8054547754562412, "learning_rate": 7.427424774993092e-07, "loss": 0.5343, "step": 4673 }, { "epoch": 0.6, "grad_norm": 0.7575849771574459, "learning_rate": 7.423437268282133e-07, "loss": 0.5335, "step": 4674 }, { "epoch": 0.6, "grad_norm": 0.7529255085358358, "learning_rate": 7.419450200332964e-07, "loss": 0.5509, "step": 4675 }, { "epoch": 0.6, "grad_norm": 0.8247884849635866, "learning_rate": 7.415463571824543e-07, "loss": 0.5778, "step": 4676 }, { "epoch": 0.6, "grad_norm": 0.8288704393201374, "learning_rate": 7.411477383435749e-07, "loss": 0.6264, "step": 4677 }, { "epoch": 0.6, "grad_norm": 1.0334082185780522, "learning_rate": 7.407491635845391e-07, "loss": 0.6183, "step": 4678 }, { "epoch": 0.6, "grad_norm": 0.6588801952902797, "learning_rate": 7.403506329732196e-07, "loss": 0.5509, "step": 4679 }, { "epoch": 0.6, "grad_norm": 0.5830229812751486, "learning_rate": 7.399521465774825e-07, "loss": 0.5126, "step": 4680 }, { "epoch": 0.6, "grad_norm": 0.688490175027201, "learning_rate": 7.395537044651856e-07, "loss": 0.5249, "step": 4681 }, { "epoch": 0.6, "grad_norm": 0.6536797243682454, "learning_rate": 7.391553067041796e-07, "loss": 0.5658, "step": 4682 }, { "epoch": 0.6, "grad_norm": 0.568285200373215, "learning_rate": 7.387569533623075e-07, "loss": 0.4703, "step": 4683 }, { "epoch": 0.6, "grad_norm": 0.6606576995329159, "learning_rate": 7.383586445074045e-07, "loss": 0.5009, "step": 4684 }, { "epoch": 0.6, "grad_norm": 0.7512373699871159, "learning_rate": 7.379603802072985e-07, "loss": 0.5387, "step": 4685 }, { "epoch": 0.6, "grad_norm": 0.6581974072856829, "learning_rate": 7.3756216052981e-07, "loss": 0.5654, "step": 4686 }, { "epoch": 0.6, "grad_norm": 0.882176911414058, "learning_rate": 7.371639855427515e-07, "loss": 0.5697, "step": 4687 }, { "epoch": 0.6, "grad_norm": 0.695208999370907, "learning_rate": 7.36765855313928e-07, "loss": 0.5922, "step": 4688 }, { "epoch": 0.6, "grad_norm": 0.762618526729417, "learning_rate": 7.363677699111371e-07, "loss": 0.5934, "step": 4689 }, { "epoch": 0.6, "grad_norm": 0.634117474220558, "learning_rate": 7.359697294021683e-07, "loss": 0.5492, "step": 4690 }, { "epoch": 0.6, "grad_norm": 0.7495758296103276, "learning_rate": 7.35571733854804e-07, "loss": 0.5764, "step": 4691 }, { "epoch": 0.6, "grad_norm": 0.7248532616668805, "learning_rate": 7.351737833368186e-07, "loss": 0.555, "step": 4692 }, { "epoch": 0.6, "grad_norm": 0.8142070068131667, "learning_rate": 7.34775877915979e-07, "loss": 0.5717, "step": 4693 }, { "epoch": 0.6, "grad_norm": 0.6086909421132517, "learning_rate": 7.343780176600443e-07, "loss": 0.5201, "step": 4694 }, { "epoch": 0.6, "grad_norm": 0.7676643504626551, "learning_rate": 7.339802026367659e-07, "loss": 0.5438, "step": 4695 }, { "epoch": 0.6, "grad_norm": 0.6492795722392808, "learning_rate": 7.335824329138877e-07, "loss": 0.5132, "step": 4696 }, { "epoch": 0.6, "grad_norm": 0.9600232142678801, "learning_rate": 7.331847085591456e-07, "loss": 0.6632, "step": 4697 }, { "epoch": 0.6, "grad_norm": 0.7928907216319171, "learning_rate": 7.32787029640268e-07, "loss": 0.5906, "step": 4698 }, { "epoch": 0.6, "grad_norm": 0.6618419610396009, "learning_rate": 7.323893962249756e-07, "loss": 0.5465, "step": 4699 }, { "epoch": 0.6, "grad_norm": 0.8211597659624463, "learning_rate": 7.319918083809809e-07, "loss": 0.6704, "step": 4700 }, { "epoch": 0.6, "grad_norm": 0.6174588663405822, "learning_rate": 7.315942661759893e-07, "loss": 0.5175, "step": 4701 }, { "epoch": 0.6, "grad_norm": 0.8403476956720377, "learning_rate": 7.311967696776977e-07, "loss": 0.5595, "step": 4702 }, { "epoch": 0.6, "grad_norm": 0.5922708820270163, "learning_rate": 7.30799318953796e-07, "loss": 0.497, "step": 4703 }, { "epoch": 0.6, "grad_norm": 0.8824279673640503, "learning_rate": 7.304019140719657e-07, "loss": 0.641, "step": 4704 }, { "epoch": 0.6, "grad_norm": 0.7772768815676877, "learning_rate": 7.300045550998808e-07, "loss": 0.6031, "step": 4705 }, { "epoch": 0.6, "grad_norm": 0.7863181926651701, "learning_rate": 7.296072421052073e-07, "loss": 0.5813, "step": 4706 }, { "epoch": 0.6, "grad_norm": 0.8142065329853736, "learning_rate": 7.292099751556037e-07, "loss": 0.6466, "step": 4707 }, { "epoch": 0.6, "grad_norm": 0.8227180633025091, "learning_rate": 7.288127543187201e-07, "loss": 0.569, "step": 4708 }, { "epoch": 0.6, "grad_norm": 0.8522694641556555, "learning_rate": 7.284155796621993e-07, "loss": 0.6085, "step": 4709 }, { "epoch": 0.6, "grad_norm": 0.8688279956537864, "learning_rate": 7.280184512536761e-07, "loss": 0.6169, "step": 4710 }, { "epoch": 0.6, "grad_norm": 0.952854827398654, "learning_rate": 7.276213691607771e-07, "loss": 0.627, "step": 4711 }, { "epoch": 0.6, "grad_norm": 0.8552010284755536, "learning_rate": 7.272243334511212e-07, "loss": 0.6254, "step": 4712 }, { "epoch": 0.6, "grad_norm": 0.6604314546165456, "learning_rate": 7.268273441923196e-07, "loss": 0.5416, "step": 4713 }, { "epoch": 0.6, "grad_norm": 0.6551079047775132, "learning_rate": 7.264304014519754e-07, "loss": 0.5188, "step": 4714 }, { "epoch": 0.6, "grad_norm": 0.8703541187488316, "learning_rate": 7.260335052976838e-07, "loss": 0.5851, "step": 4715 }, { "epoch": 0.6, "grad_norm": 0.757046968976391, "learning_rate": 7.256366557970321e-07, "loss": 0.5838, "step": 4716 }, { "epoch": 0.6, "grad_norm": 1.0515175950519395, "learning_rate": 7.252398530175996e-07, "loss": 0.6763, "step": 4717 }, { "epoch": 0.6, "grad_norm": 0.6271552253514056, "learning_rate": 7.248430970269575e-07, "loss": 0.5292, "step": 4718 }, { "epoch": 0.6, "grad_norm": 0.9782847485924131, "learning_rate": 7.244463878926696e-07, "loss": 0.648, "step": 4719 }, { "epoch": 0.6, "grad_norm": 0.7581041034417082, "learning_rate": 7.240497256822913e-07, "loss": 0.5712, "step": 4720 }, { "epoch": 0.6, "grad_norm": 1.0719275935078403, "learning_rate": 7.236531104633697e-07, "loss": 0.673, "step": 4721 }, { "epoch": 0.6, "grad_norm": 0.8758944997911009, "learning_rate": 7.232565423034446e-07, "loss": 0.6491, "step": 4722 }, { "epoch": 0.6, "grad_norm": 0.6586421684348878, "learning_rate": 7.22860021270047e-07, "loss": 0.533, "step": 4723 }, { "epoch": 0.6, "grad_norm": 1.0152947338005016, "learning_rate": 7.224635474307008e-07, "loss": 0.6698, "step": 4724 }, { "epoch": 0.6, "grad_norm": 0.8251852441354635, "learning_rate": 7.22067120852921e-07, "loss": 0.582, "step": 4725 }, { "epoch": 0.6, "grad_norm": 0.8094211623621679, "learning_rate": 7.216707416042151e-07, "loss": 0.6151, "step": 4726 }, { "epoch": 0.6, "grad_norm": 0.7105081712664875, "learning_rate": 7.212744097520823e-07, "loss": 0.5295, "step": 4727 }, { "epoch": 0.6, "grad_norm": 0.8330324682726733, "learning_rate": 7.208781253640137e-07, "loss": 0.6153, "step": 4728 }, { "epoch": 0.6, "grad_norm": 0.6253953210825905, "learning_rate": 7.204818885074923e-07, "loss": 0.4871, "step": 4729 }, { "epoch": 0.6, "grad_norm": 0.659277464914254, "learning_rate": 7.200856992499936e-07, "loss": 0.5435, "step": 4730 }, { "epoch": 0.6, "grad_norm": 0.8791191992573567, "learning_rate": 7.196895576589839e-07, "loss": 0.6294, "step": 4731 }, { "epoch": 0.6, "grad_norm": 0.7303186214955407, "learning_rate": 7.192934638019225e-07, "loss": 0.5325, "step": 4732 }, { "epoch": 0.6, "grad_norm": 0.8121226973761069, "learning_rate": 7.188974177462598e-07, "loss": 0.6303, "step": 4733 }, { "epoch": 0.6, "grad_norm": 0.6876573230811017, "learning_rate": 7.185014195594383e-07, "loss": 0.5492, "step": 4734 }, { "epoch": 0.6, "grad_norm": 1.041398486557105, "learning_rate": 7.181054693088925e-07, "loss": 0.6473, "step": 4735 }, { "epoch": 0.6, "grad_norm": 0.9008191068244247, "learning_rate": 7.177095670620484e-07, "loss": 0.6194, "step": 4736 }, { "epoch": 0.6, "grad_norm": 0.6668835663744821, "learning_rate": 7.173137128863242e-07, "loss": 0.5479, "step": 4737 }, { "epoch": 0.6, "grad_norm": 1.1132586446348012, "learning_rate": 7.169179068491298e-07, "loss": 0.5977, "step": 4738 }, { "epoch": 0.6, "grad_norm": 0.8519859757653813, "learning_rate": 7.165221490178667e-07, "loss": 0.6078, "step": 4739 }, { "epoch": 0.6, "grad_norm": 0.8054190747440693, "learning_rate": 7.161264394599282e-07, "loss": 0.5944, "step": 4740 }, { "epoch": 0.6, "grad_norm": 0.8187173000516479, "learning_rate": 7.157307782427e-07, "loss": 0.6244, "step": 4741 }, { "epoch": 0.6, "grad_norm": 0.8127632953418372, "learning_rate": 7.153351654335585e-07, "loss": 0.6508, "step": 4742 }, { "epoch": 0.6, "grad_norm": 0.9956703648143062, "learning_rate": 7.149396010998728e-07, "loss": 0.6022, "step": 4743 }, { "epoch": 0.6, "grad_norm": 1.088198199748241, "learning_rate": 7.145440853090033e-07, "loss": 0.6578, "step": 4744 }, { "epoch": 0.6, "grad_norm": 0.5976694021071232, "learning_rate": 7.14148618128302e-07, "loss": 0.551, "step": 4745 }, { "epoch": 0.6, "grad_norm": 0.7343452996291056, "learning_rate": 7.137531996251133e-07, "loss": 0.5526, "step": 4746 }, { "epoch": 0.6, "grad_norm": 0.78515709508061, "learning_rate": 7.133578298667727e-07, "loss": 0.5978, "step": 4747 }, { "epoch": 0.6, "grad_norm": 0.797112240208171, "learning_rate": 7.129625089206071e-07, "loss": 0.6731, "step": 4748 }, { "epoch": 0.61, "grad_norm": 0.9425807633858226, "learning_rate": 7.125672368539362e-07, "loss": 0.6574, "step": 4749 }, { "epoch": 0.61, "grad_norm": 1.0512058925097378, "learning_rate": 7.121720137340704e-07, "loss": 0.6929, "step": 4750 }, { "epoch": 0.61, "grad_norm": 0.750028257637845, "learning_rate": 7.11776839628312e-07, "loss": 0.5504, "step": 4751 }, { "epoch": 0.61, "grad_norm": 0.7176662080854478, "learning_rate": 7.113817146039552e-07, "loss": 0.4925, "step": 4752 }, { "epoch": 0.61, "grad_norm": 0.7821620274320258, "learning_rate": 7.109866387282855e-07, "loss": 0.6034, "step": 4753 }, { "epoch": 0.61, "grad_norm": 0.6721927883594091, "learning_rate": 7.105916120685799e-07, "loss": 0.5977, "step": 4754 }, { "epoch": 0.61, "grad_norm": 0.6895200844556199, "learning_rate": 7.101966346921083e-07, "loss": 0.5626, "step": 4755 }, { "epoch": 0.61, "grad_norm": 0.9564520497941171, "learning_rate": 7.098017066661303e-07, "loss": 0.6268, "step": 4756 }, { "epoch": 0.61, "grad_norm": 0.6292146662297082, "learning_rate": 7.094068280578988e-07, "loss": 0.4715, "step": 4757 }, { "epoch": 0.61, "grad_norm": 1.0769687087604, "learning_rate": 7.090119989346568e-07, "loss": 0.6413, "step": 4758 }, { "epoch": 0.61, "grad_norm": 0.633486954765076, "learning_rate": 7.086172193636401e-07, "loss": 0.5259, "step": 4759 }, { "epoch": 0.61, "grad_norm": 0.7853610452488977, "learning_rate": 7.082224894120752e-07, "loss": 0.5467, "step": 4760 }, { "epoch": 0.61, "grad_norm": 0.7624862105817894, "learning_rate": 7.078278091471808e-07, "loss": 0.5271, "step": 4761 }, { "epoch": 0.61, "grad_norm": 0.8148066972309069, "learning_rate": 7.074331786361665e-07, "loss": 0.6085, "step": 4762 }, { "epoch": 0.61, "grad_norm": 0.7895519103863339, "learning_rate": 7.070385979462338e-07, "loss": 0.5312, "step": 4763 }, { "epoch": 0.61, "grad_norm": 0.692040207682597, "learning_rate": 7.066440671445762e-07, "loss": 0.5558, "step": 4764 }, { "epoch": 0.61, "grad_norm": 0.74430700688589, "learning_rate": 7.062495862983775e-07, "loss": 0.582, "step": 4765 }, { "epoch": 0.61, "grad_norm": 0.7387533240209501, "learning_rate": 7.058551554748142e-07, "loss": 0.5714, "step": 4766 }, { "epoch": 0.61, "grad_norm": 0.5849698414425039, "learning_rate": 7.054607747410535e-07, "loss": 0.4758, "step": 4767 }, { "epoch": 0.61, "grad_norm": 1.1246617157719698, "learning_rate": 7.050664441642543e-07, "loss": 0.7112, "step": 4768 }, { "epoch": 0.61, "grad_norm": 0.7486763369816976, "learning_rate": 7.046721638115672e-07, "loss": 0.5703, "step": 4769 }, { "epoch": 0.61, "grad_norm": 0.7241175110856931, "learning_rate": 7.042779337501336e-07, "loss": 0.57, "step": 4770 }, { "epoch": 0.61, "grad_norm": 0.7401943771307347, "learning_rate": 7.038837540470874e-07, "loss": 0.5706, "step": 4771 }, { "epoch": 0.61, "grad_norm": 0.7587195794397982, "learning_rate": 7.034896247695528e-07, "loss": 0.5416, "step": 4772 }, { "epoch": 0.61, "grad_norm": 0.7072404208831705, "learning_rate": 7.030955459846462e-07, "loss": 0.5857, "step": 4773 }, { "epoch": 0.61, "grad_norm": 0.7705257992585705, "learning_rate": 7.027015177594748e-07, "loss": 0.6023, "step": 4774 }, { "epoch": 0.61, "grad_norm": 0.6617289582873193, "learning_rate": 7.023075401611379e-07, "loss": 0.5765, "step": 4775 }, { "epoch": 0.61, "grad_norm": 0.835679920633192, "learning_rate": 7.019136132567255e-07, "loss": 0.5522, "step": 4776 }, { "epoch": 0.61, "grad_norm": 0.7879134341223859, "learning_rate": 7.01519737113319e-07, "loss": 0.6415, "step": 4777 }, { "epoch": 0.61, "grad_norm": 0.6502323819788756, "learning_rate": 7.011259117979922e-07, "loss": 0.5162, "step": 4778 }, { "epoch": 0.61, "grad_norm": 1.0055612498664552, "learning_rate": 7.007321373778088e-07, "loss": 0.6376, "step": 4779 }, { "epoch": 0.61, "grad_norm": 0.7660119665206292, "learning_rate": 7.003384139198245e-07, "loss": 0.5454, "step": 4780 }, { "epoch": 0.61, "grad_norm": 0.9900353074170458, "learning_rate": 6.999447414910867e-07, "loss": 0.649, "step": 4781 }, { "epoch": 0.61, "grad_norm": 0.8163926150169506, "learning_rate": 6.995511201586334e-07, "loss": 0.5574, "step": 4782 }, { "epoch": 0.61, "grad_norm": 0.7709304368712685, "learning_rate": 6.991575499894941e-07, "loss": 0.6046, "step": 4783 }, { "epoch": 0.61, "grad_norm": 0.8009888505157126, "learning_rate": 6.987640310506901e-07, "loss": 0.5392, "step": 4784 }, { "epoch": 0.61, "grad_norm": 0.7185691835158405, "learning_rate": 6.983705634092334e-07, "loss": 0.5595, "step": 4785 }, { "epoch": 0.61, "grad_norm": 0.6107605902040845, "learning_rate": 6.979771471321273e-07, "loss": 0.5033, "step": 4786 }, { "epoch": 0.61, "grad_norm": 0.6487810679107059, "learning_rate": 6.975837822863666e-07, "loss": 0.4997, "step": 4787 }, { "epoch": 0.61, "grad_norm": 0.7144406443356014, "learning_rate": 6.971904689389372e-07, "loss": 0.5008, "step": 4788 }, { "epoch": 0.61, "grad_norm": 0.6349709191191786, "learning_rate": 6.967972071568165e-07, "loss": 0.5675, "step": 4789 }, { "epoch": 0.61, "grad_norm": 0.753590452373471, "learning_rate": 6.964039970069721e-07, "loss": 0.5336, "step": 4790 }, { "epoch": 0.61, "grad_norm": 0.8046679245658495, "learning_rate": 6.960108385563645e-07, "loss": 0.5679, "step": 4791 }, { "epoch": 0.61, "grad_norm": 0.8426606403966166, "learning_rate": 6.956177318719443e-07, "loss": 0.5877, "step": 4792 }, { "epoch": 0.61, "grad_norm": 0.8815655198821202, "learning_rate": 6.952246770206534e-07, "loss": 0.6334, "step": 4793 }, { "epoch": 0.61, "grad_norm": 0.8660118505291564, "learning_rate": 6.948316740694246e-07, "loss": 0.6019, "step": 4794 }, { "epoch": 0.61, "grad_norm": 0.7808875781669933, "learning_rate": 6.944387230851827e-07, "loss": 0.5575, "step": 4795 }, { "epoch": 0.61, "grad_norm": 0.8112020505434123, "learning_rate": 6.940458241348428e-07, "loss": 0.6398, "step": 4796 }, { "epoch": 0.61, "grad_norm": 0.7635212708499578, "learning_rate": 6.936529772853117e-07, "loss": 0.5612, "step": 4797 }, { "epoch": 0.61, "grad_norm": 0.8859448764387569, "learning_rate": 6.932601826034871e-07, "loss": 0.6575, "step": 4798 }, { "epoch": 0.61, "grad_norm": 0.8304377529016366, "learning_rate": 6.928674401562578e-07, "loss": 0.5844, "step": 4799 }, { "epoch": 0.61, "grad_norm": 0.8629948719835527, "learning_rate": 6.924747500105037e-07, "loss": 0.6188, "step": 4800 }, { "epoch": 0.61, "grad_norm": 0.6749040285598692, "learning_rate": 6.92082112233096e-07, "loss": 0.5453, "step": 4801 }, { "epoch": 0.61, "grad_norm": 0.6510188861081363, "learning_rate": 6.916895268908967e-07, "loss": 0.5841, "step": 4802 }, { "epoch": 0.61, "grad_norm": 0.7898676989975746, "learning_rate": 6.912969940507587e-07, "loss": 0.6037, "step": 4803 }, { "epoch": 0.61, "grad_norm": 0.7154769251012439, "learning_rate": 6.909045137795268e-07, "loss": 0.5556, "step": 4804 }, { "epoch": 0.61, "grad_norm": 0.8761612356477939, "learning_rate": 6.905120861440362e-07, "loss": 0.5936, "step": 4805 }, { "epoch": 0.61, "grad_norm": 0.906458135120129, "learning_rate": 6.90119711211113e-07, "loss": 0.6344, "step": 4806 }, { "epoch": 0.61, "grad_norm": 0.9520359562907782, "learning_rate": 6.897273890475746e-07, "loss": 0.5961, "step": 4807 }, { "epoch": 0.61, "grad_norm": 0.7054099333791667, "learning_rate": 6.893351197202296e-07, "loss": 0.5583, "step": 4808 }, { "epoch": 0.61, "grad_norm": 0.8439252488244353, "learning_rate": 6.889429032958772e-07, "loss": 0.6299, "step": 4809 }, { "epoch": 0.61, "grad_norm": 0.9203848960347051, "learning_rate": 6.885507398413077e-07, "loss": 0.6462, "step": 4810 }, { "epoch": 0.61, "grad_norm": 0.7672046552640451, "learning_rate": 6.881586294233027e-07, "loss": 0.5989, "step": 4811 }, { "epoch": 0.61, "grad_norm": 0.7684940879176732, "learning_rate": 6.877665721086343e-07, "loss": 0.5589, "step": 4812 }, { "epoch": 0.61, "grad_norm": 1.635692612464444, "learning_rate": 6.873745679640659e-07, "loss": 0.6498, "step": 4813 }, { "epoch": 0.61, "grad_norm": 0.7161497112484605, "learning_rate": 6.869826170563515e-07, "loss": 0.5284, "step": 4814 }, { "epoch": 0.61, "grad_norm": 0.7283410519946485, "learning_rate": 6.865907194522365e-07, "loss": 0.5661, "step": 4815 }, { "epoch": 0.61, "grad_norm": 1.0361594151789284, "learning_rate": 6.861988752184567e-07, "loss": 0.6555, "step": 4816 }, { "epoch": 0.61, "grad_norm": 0.7136941108175854, "learning_rate": 6.858070844217396e-07, "loss": 0.544, "step": 4817 }, { "epoch": 0.61, "grad_norm": 0.7853751720583463, "learning_rate": 6.854153471288027e-07, "loss": 0.5413, "step": 4818 }, { "epoch": 0.61, "grad_norm": 0.8406235118737051, "learning_rate": 6.850236634063548e-07, "loss": 0.6425, "step": 4819 }, { "epoch": 0.61, "grad_norm": 0.716756228798767, "learning_rate": 6.846320333210957e-07, "loss": 0.5209, "step": 4820 }, { "epoch": 0.61, "grad_norm": 0.7147562991430463, "learning_rate": 6.84240456939716e-07, "loss": 0.5155, "step": 4821 }, { "epoch": 0.61, "grad_norm": 0.7887131612775877, "learning_rate": 6.838489343288967e-07, "loss": 0.5537, "step": 4822 }, { "epoch": 0.61, "grad_norm": 0.6479113876271091, "learning_rate": 6.834574655553102e-07, "loss": 0.6061, "step": 4823 }, { "epoch": 0.61, "grad_norm": 0.9503540453255233, "learning_rate": 6.830660506856199e-07, "loss": 0.637, "step": 4824 }, { "epoch": 0.61, "grad_norm": 0.7032096319455334, "learning_rate": 6.82674689786479e-07, "loss": 0.5231, "step": 4825 }, { "epoch": 0.61, "grad_norm": 0.6644745911897748, "learning_rate": 6.822833829245329e-07, "loss": 0.5296, "step": 4826 }, { "epoch": 0.61, "grad_norm": 0.7283875314178863, "learning_rate": 6.818921301664165e-07, "loss": 0.5618, "step": 4827 }, { "epoch": 0.62, "grad_norm": 0.617711306475877, "learning_rate": 6.815009315787564e-07, "loss": 0.4938, "step": 4828 }, { "epoch": 0.62, "grad_norm": 0.7196728513342804, "learning_rate": 6.811097872281694e-07, "loss": 0.5741, "step": 4829 }, { "epoch": 0.62, "grad_norm": 0.7230316673128611, "learning_rate": 6.807186971812635e-07, "loss": 0.552, "step": 4830 }, { "epoch": 0.62, "grad_norm": 0.8262021710428504, "learning_rate": 6.803276615046374e-07, "loss": 0.6414, "step": 4831 }, { "epoch": 0.62, "grad_norm": 0.8585626083512728, "learning_rate": 6.7993668026488e-07, "loss": 0.5918, "step": 4832 }, { "epoch": 0.62, "grad_norm": 0.7077791245103183, "learning_rate": 6.795457535285718e-07, "loss": 0.5925, "step": 4833 }, { "epoch": 0.62, "grad_norm": 0.7523436891900062, "learning_rate": 6.791548813622833e-07, "loss": 0.5262, "step": 4834 }, { "epoch": 0.62, "grad_norm": 0.8780827062911793, "learning_rate": 6.787640638325758e-07, "loss": 0.636, "step": 4835 }, { "epoch": 0.62, "grad_norm": 0.7275887987262222, "learning_rate": 6.783733010060017e-07, "loss": 0.5634, "step": 4836 }, { "epoch": 0.62, "grad_norm": 0.9700338728247637, "learning_rate": 6.779825929491038e-07, "loss": 0.6412, "step": 4837 }, { "epoch": 0.62, "grad_norm": 0.8826238063417386, "learning_rate": 6.775919397284156e-07, "loss": 0.6449, "step": 4838 }, { "epoch": 0.62, "grad_norm": 0.8440895353053162, "learning_rate": 6.77201341410461e-07, "loss": 0.6341, "step": 4839 }, { "epoch": 0.62, "grad_norm": 0.7871904032268768, "learning_rate": 6.768107980617552e-07, "loss": 0.5403, "step": 4840 }, { "epoch": 0.62, "grad_norm": 0.7824694377949004, "learning_rate": 6.764203097488036e-07, "loss": 0.6156, "step": 4841 }, { "epoch": 0.62, "grad_norm": 0.6602031235662502, "learning_rate": 6.760298765381021e-07, "loss": 0.5598, "step": 4842 }, { "epoch": 0.62, "grad_norm": 0.8206943267582267, "learning_rate": 6.756394984961375e-07, "loss": 0.6576, "step": 4843 }, { "epoch": 0.62, "grad_norm": 0.8191598842750761, "learning_rate": 6.752491756893872e-07, "loss": 0.6043, "step": 4844 }, { "epoch": 0.62, "grad_norm": 1.1280344224488659, "learning_rate": 6.748589081843191e-07, "loss": 0.6174, "step": 4845 }, { "epoch": 0.62, "grad_norm": 0.6563546862316217, "learning_rate": 6.744686960473916e-07, "loss": 0.5476, "step": 4846 }, { "epoch": 0.62, "grad_norm": 0.7288512080022654, "learning_rate": 6.740785393450539e-07, "loss": 0.5211, "step": 4847 }, { "epoch": 0.62, "grad_norm": 0.8494058124002419, "learning_rate": 6.736884381437455e-07, "loss": 0.6217, "step": 4848 }, { "epoch": 0.62, "grad_norm": 0.7171345994963593, "learning_rate": 6.732983925098965e-07, "loss": 0.5246, "step": 4849 }, { "epoch": 0.62, "grad_norm": 0.6451486662683338, "learning_rate": 6.729084025099278e-07, "loss": 0.5247, "step": 4850 }, { "epoch": 0.62, "grad_norm": 0.8416736189261517, "learning_rate": 6.725184682102505e-07, "loss": 0.5529, "step": 4851 }, { "epoch": 0.62, "grad_norm": 0.8257004682909737, "learning_rate": 6.721285896772664e-07, "loss": 0.6359, "step": 4852 }, { "epoch": 0.62, "grad_norm": 0.7284664332304899, "learning_rate": 6.717387669773678e-07, "loss": 0.5252, "step": 4853 }, { "epoch": 0.62, "grad_norm": 0.6978963816922141, "learning_rate": 6.713490001769373e-07, "loss": 0.5532, "step": 4854 }, { "epoch": 0.62, "grad_norm": 0.6712188116042048, "learning_rate": 6.709592893423477e-07, "loss": 0.508, "step": 4855 }, { "epoch": 0.62, "grad_norm": 0.8599403718104743, "learning_rate": 6.705696345399638e-07, "loss": 0.5752, "step": 4856 }, { "epoch": 0.62, "grad_norm": 1.0296835087056169, "learning_rate": 6.70180035836139e-07, "loss": 0.5951, "step": 4857 }, { "epoch": 0.62, "grad_norm": 0.8870984777002201, "learning_rate": 6.697904932972182e-07, "loss": 0.6199, "step": 4858 }, { "epoch": 0.62, "grad_norm": 0.6961652486444441, "learning_rate": 6.694010069895361e-07, "loss": 0.6272, "step": 4859 }, { "epoch": 0.62, "grad_norm": 0.7107906739428895, "learning_rate": 6.690115769794186e-07, "loss": 0.5466, "step": 4860 }, { "epoch": 0.62, "grad_norm": 0.9138769885240373, "learning_rate": 6.68622203333181e-07, "loss": 0.6471, "step": 4861 }, { "epoch": 0.62, "grad_norm": 0.7926008244441692, "learning_rate": 6.6823288611713e-07, "loss": 0.6957, "step": 4862 }, { "epoch": 0.62, "grad_norm": 0.7272846279357286, "learning_rate": 6.678436253975624e-07, "loss": 0.571, "step": 4863 }, { "epoch": 0.62, "grad_norm": 0.6896564498174491, "learning_rate": 6.674544212407649e-07, "loss": 0.5562, "step": 4864 }, { "epoch": 0.62, "grad_norm": 0.9890947250370119, "learning_rate": 6.670652737130149e-07, "loss": 0.6269, "step": 4865 }, { "epoch": 0.62, "grad_norm": 0.8642369579729864, "learning_rate": 6.666761828805804e-07, "loss": 0.592, "step": 4866 }, { "epoch": 0.62, "grad_norm": 0.774674201419634, "learning_rate": 6.662871488097195e-07, "loss": 0.5547, "step": 4867 }, { "epoch": 0.62, "grad_norm": 0.8643518231961775, "learning_rate": 6.658981715666805e-07, "loss": 0.5964, "step": 4868 }, { "epoch": 0.62, "grad_norm": 0.6936260213035784, "learning_rate": 6.655092512177023e-07, "loss": 0.5712, "step": 4869 }, { "epoch": 0.62, "grad_norm": 0.6547179077665197, "learning_rate": 6.651203878290138e-07, "loss": 0.5679, "step": 4870 }, { "epoch": 0.62, "grad_norm": 1.057312308194285, "learning_rate": 6.647315814668347e-07, "loss": 0.6252, "step": 4871 }, { "epoch": 0.62, "grad_norm": 0.666867936718001, "learning_rate": 6.643428321973742e-07, "loss": 0.5501, "step": 4872 }, { "epoch": 0.62, "grad_norm": 0.9389527106922586, "learning_rate": 6.639541400868327e-07, "loss": 0.6276, "step": 4873 }, { "epoch": 0.62, "grad_norm": 0.8852747374209617, "learning_rate": 6.635655052014e-07, "loss": 0.5932, "step": 4874 }, { "epoch": 0.62, "grad_norm": 0.771451574434832, "learning_rate": 6.63176927607257e-07, "loss": 0.5992, "step": 4875 }, { "epoch": 0.62, "grad_norm": 0.9028245080355353, "learning_rate": 6.627884073705742e-07, "loss": 0.6082, "step": 4876 }, { "epoch": 0.62, "grad_norm": 0.9110975698470604, "learning_rate": 6.623999445575125e-07, "loss": 0.6536, "step": 4877 }, { "epoch": 0.62, "grad_norm": 1.0452207657694201, "learning_rate": 6.620115392342234e-07, "loss": 0.6379, "step": 4878 }, { "epoch": 0.62, "grad_norm": 0.7547747845777661, "learning_rate": 6.616231914668479e-07, "loss": 0.5126, "step": 4879 }, { "epoch": 0.62, "grad_norm": 0.9623014156166316, "learning_rate": 6.612349013215177e-07, "loss": 0.6469, "step": 4880 }, { "epoch": 0.62, "grad_norm": 0.6467433454366135, "learning_rate": 6.608466688643545e-07, "loss": 0.5522, "step": 4881 }, { "epoch": 0.62, "grad_norm": 0.8681980473138912, "learning_rate": 6.604584941614705e-07, "loss": 0.6493, "step": 4882 }, { "epoch": 0.62, "grad_norm": 0.8694519271810779, "learning_rate": 6.600703772789675e-07, "loss": 0.5946, "step": 4883 }, { "epoch": 0.62, "grad_norm": 0.6406747073233817, "learning_rate": 6.596823182829379e-07, "loss": 0.5606, "step": 4884 }, { "epoch": 0.62, "grad_norm": 0.7468883034562007, "learning_rate": 6.592943172394642e-07, "loss": 0.6015, "step": 4885 }, { "epoch": 0.62, "grad_norm": 0.8233223464111684, "learning_rate": 6.589063742146187e-07, "loss": 0.6341, "step": 4886 }, { "epoch": 0.62, "grad_norm": 0.9797785284570254, "learning_rate": 6.585184892744642e-07, "loss": 0.6879, "step": 4887 }, { "epoch": 0.62, "grad_norm": 0.8533240303983688, "learning_rate": 6.581306624850534e-07, "loss": 0.6323, "step": 4888 }, { "epoch": 0.62, "grad_norm": 0.7621165448330751, "learning_rate": 6.577428939124292e-07, "loss": 0.5355, "step": 4889 }, { "epoch": 0.62, "grad_norm": 0.840352662520139, "learning_rate": 6.573551836226246e-07, "loss": 0.659, "step": 4890 }, { "epoch": 0.62, "grad_norm": 1.0078896780483122, "learning_rate": 6.569675316816627e-07, "loss": 0.601, "step": 4891 }, { "epoch": 0.62, "grad_norm": 0.7549521867020064, "learning_rate": 6.565799381555564e-07, "loss": 0.5707, "step": 4892 }, { "epoch": 0.62, "grad_norm": 0.6325902417172217, "learning_rate": 6.561924031103088e-07, "loss": 0.5759, "step": 4893 }, { "epoch": 0.62, "grad_norm": 0.6447953323478757, "learning_rate": 6.558049266119133e-07, "loss": 0.5188, "step": 4894 }, { "epoch": 0.62, "grad_norm": 0.6147895339031084, "learning_rate": 6.55417508726353e-07, "loss": 0.5047, "step": 4895 }, { "epoch": 0.62, "grad_norm": 0.6786957387957474, "learning_rate": 6.550301495196011e-07, "loss": 0.5613, "step": 4896 }, { "epoch": 0.62, "grad_norm": 0.7580510879088299, "learning_rate": 6.54642849057621e-07, "loss": 0.5675, "step": 4897 }, { "epoch": 0.62, "grad_norm": 0.6332224947528619, "learning_rate": 6.542556074063657e-07, "loss": 0.5337, "step": 4898 }, { "epoch": 0.62, "grad_norm": 1.1718889512545676, "learning_rate": 6.538684246317784e-07, "loss": 0.6109, "step": 4899 }, { "epoch": 0.62, "grad_norm": 0.8399563557643595, "learning_rate": 6.534813007997925e-07, "loss": 0.6011, "step": 4900 }, { "epoch": 0.62, "grad_norm": 0.9099523307497173, "learning_rate": 6.530942359763309e-07, "loss": 0.6492, "step": 4901 }, { "epoch": 0.62, "grad_norm": 0.6976258901635441, "learning_rate": 6.527072302273071e-07, "loss": 0.5559, "step": 4902 }, { "epoch": 0.62, "grad_norm": 0.7967638308160917, "learning_rate": 6.523202836186239e-07, "loss": 0.5806, "step": 4903 }, { "epoch": 0.62, "grad_norm": 0.6629516413739844, "learning_rate": 6.519333962161741e-07, "loss": 0.5277, "step": 4904 }, { "epoch": 0.62, "grad_norm": 0.7550426941653392, "learning_rate": 6.51546568085841e-07, "loss": 0.5795, "step": 4905 }, { "epoch": 0.63, "grad_norm": 0.8871271406604001, "learning_rate": 6.511597992934972e-07, "loss": 0.6128, "step": 4906 }, { "epoch": 0.63, "grad_norm": 0.9368843154016564, "learning_rate": 6.507730899050054e-07, "loss": 0.6358, "step": 4907 }, { "epoch": 0.63, "grad_norm": 0.7608914456432945, "learning_rate": 6.503864399862181e-07, "loss": 0.5634, "step": 4908 }, { "epoch": 0.63, "grad_norm": 0.788032965909177, "learning_rate": 6.49999849602978e-07, "loss": 0.5283, "step": 4909 }, { "epoch": 0.63, "grad_norm": 1.347956943566713, "learning_rate": 6.49613318821117e-07, "loss": 0.6361, "step": 4910 }, { "epoch": 0.63, "grad_norm": 0.7137491502381899, "learning_rate": 6.492268477064575e-07, "loss": 0.562, "step": 4911 }, { "epoch": 0.63, "grad_norm": 0.783047037516308, "learning_rate": 6.488404363248117e-07, "loss": 0.5507, "step": 4912 }, { "epoch": 0.63, "grad_norm": 1.1501506603744205, "learning_rate": 6.484540847419811e-07, "loss": 0.6462, "step": 4913 }, { "epoch": 0.63, "grad_norm": 0.8478163000494211, "learning_rate": 6.480677930237572e-07, "loss": 0.6522, "step": 4914 }, { "epoch": 0.63, "grad_norm": 0.6116167156423842, "learning_rate": 6.476815612359221e-07, "loss": 0.5725, "step": 4915 }, { "epoch": 0.63, "grad_norm": 0.6469411895445523, "learning_rate": 6.472953894442468e-07, "loss": 0.5538, "step": 4916 }, { "epoch": 0.63, "grad_norm": 0.8415444045244135, "learning_rate": 6.46909277714492e-07, "loss": 0.5466, "step": 4917 }, { "epoch": 0.63, "grad_norm": 4.605308570927544, "learning_rate": 6.465232261124088e-07, "loss": 0.599, "step": 4918 }, { "epoch": 0.63, "grad_norm": 0.8598605706008651, "learning_rate": 6.461372347037377e-07, "loss": 0.5505, "step": 4919 }, { "epoch": 0.63, "grad_norm": 1.0261045172423413, "learning_rate": 6.457513035542091e-07, "loss": 0.6164, "step": 4920 }, { "epoch": 0.63, "grad_norm": 0.8466212245841493, "learning_rate": 6.453654327295429e-07, "loss": 0.545, "step": 4921 }, { "epoch": 0.63, "grad_norm": 0.6431198835841655, "learning_rate": 6.449796222954489e-07, "loss": 0.5003, "step": 4922 }, { "epoch": 0.63, "grad_norm": 1.0649376284705676, "learning_rate": 6.445938723176267e-07, "loss": 0.6223, "step": 4923 }, { "epoch": 0.63, "grad_norm": 0.8248291366932888, "learning_rate": 6.442081828617655e-07, "loss": 0.5315, "step": 4924 }, { "epoch": 0.63, "grad_norm": 0.7572796617768951, "learning_rate": 6.438225539935441e-07, "loss": 0.5838, "step": 4925 }, { "epoch": 0.63, "grad_norm": 0.8449101231554983, "learning_rate": 6.434369857786312e-07, "loss": 0.6347, "step": 4926 }, { "epoch": 0.63, "grad_norm": 0.859352111189483, "learning_rate": 6.430514782826845e-07, "loss": 0.6348, "step": 4927 }, { "epoch": 0.63, "grad_norm": 0.6878526838899733, "learning_rate": 6.426660315713529e-07, "loss": 0.5251, "step": 4928 }, { "epoch": 0.63, "grad_norm": 0.7421909660845593, "learning_rate": 6.422806457102734e-07, "loss": 0.57, "step": 4929 }, { "epoch": 0.63, "grad_norm": 0.6469185857412609, "learning_rate": 6.418953207650731e-07, "loss": 0.5521, "step": 4930 }, { "epoch": 0.63, "grad_norm": 0.782972449613917, "learning_rate": 6.415100568013691e-07, "loss": 0.542, "step": 4931 }, { "epoch": 0.63, "grad_norm": 0.9030844618435762, "learning_rate": 6.411248538847676e-07, "loss": 0.6509, "step": 4932 }, { "epoch": 0.63, "grad_norm": 0.8168228495247508, "learning_rate": 6.407397120808649e-07, "loss": 0.5653, "step": 4933 }, { "epoch": 0.63, "grad_norm": 0.9057969670312863, "learning_rate": 6.403546314552463e-07, "loss": 0.5937, "step": 4934 }, { "epoch": 0.63, "grad_norm": 0.8205666056281299, "learning_rate": 6.399696120734872e-07, "loss": 0.5691, "step": 4935 }, { "epoch": 0.63, "grad_norm": 0.8057065321119412, "learning_rate": 6.395846540011526e-07, "loss": 0.7093, "step": 4936 }, { "epoch": 0.63, "grad_norm": 0.6656842535810251, "learning_rate": 6.391997573037965e-07, "loss": 0.5145, "step": 4937 }, { "epoch": 0.63, "grad_norm": 0.6596052612510509, "learning_rate": 6.38814922046963e-07, "loss": 0.5813, "step": 4938 }, { "epoch": 0.63, "grad_norm": 0.7166697856992714, "learning_rate": 6.384301482961854e-07, "loss": 0.5288, "step": 4939 }, { "epoch": 0.63, "grad_norm": 0.9365063315004525, "learning_rate": 6.380454361169866e-07, "loss": 0.6492, "step": 4940 }, { "epoch": 0.63, "grad_norm": 0.665813560245457, "learning_rate": 6.376607855748794e-07, "loss": 0.5745, "step": 4941 }, { "epoch": 0.63, "grad_norm": 1.169198048318818, "learning_rate": 6.372761967353655e-07, "loss": 0.6489, "step": 4942 }, { "epoch": 0.63, "grad_norm": 0.8428087752392551, "learning_rate": 6.368916696639366e-07, "loss": 0.6564, "step": 4943 }, { "epoch": 0.63, "grad_norm": 0.8990641256495119, "learning_rate": 6.365072044260735e-07, "loss": 0.6098, "step": 4944 }, { "epoch": 0.63, "grad_norm": 0.7594257757747117, "learning_rate": 6.361228010872466e-07, "loss": 0.5333, "step": 4945 }, { "epoch": 0.63, "grad_norm": 0.6563218401053665, "learning_rate": 6.357384597129158e-07, "loss": 0.5534, "step": 4946 }, { "epoch": 0.63, "grad_norm": 0.7406002029461745, "learning_rate": 6.353541803685305e-07, "loss": 0.5951, "step": 4947 }, { "epoch": 0.63, "grad_norm": 0.8165278684328335, "learning_rate": 6.349699631195295e-07, "loss": 0.5904, "step": 4948 }, { "epoch": 0.63, "grad_norm": 0.813183338067519, "learning_rate": 6.345858080313409e-07, "loss": 0.6175, "step": 4949 }, { "epoch": 0.63, "grad_norm": 0.709216320269669, "learning_rate": 6.342017151693822e-07, "loss": 0.5362, "step": 4950 }, { "epoch": 0.63, "grad_norm": 0.7170581577683568, "learning_rate": 6.338176845990607e-07, "loss": 0.6163, "step": 4951 }, { "epoch": 0.63, "grad_norm": 0.9079859049636873, "learning_rate": 6.334337163857725e-07, "loss": 0.7018, "step": 4952 }, { "epoch": 0.63, "grad_norm": 1.0218457930170581, "learning_rate": 6.330498105949036e-07, "loss": 0.609, "step": 4953 }, { "epoch": 0.63, "grad_norm": 0.8245608196816369, "learning_rate": 6.326659672918291e-07, "loss": 0.6108, "step": 4954 }, { "epoch": 0.63, "grad_norm": 0.9290067189320299, "learning_rate": 6.322821865419135e-07, "loss": 0.6427, "step": 4955 }, { "epoch": 0.63, "grad_norm": 0.9281370575539485, "learning_rate": 6.318984684105108e-07, "loss": 0.6065, "step": 4956 }, { "epoch": 0.63, "grad_norm": 0.8067568147951699, "learning_rate": 6.315148129629638e-07, "loss": 0.5724, "step": 4957 }, { "epoch": 0.63, "grad_norm": 0.7161480404545053, "learning_rate": 6.311312202646055e-07, "loss": 0.565, "step": 4958 }, { "epoch": 0.63, "grad_norm": 0.9334238781309483, "learning_rate": 6.307476903807573e-07, "loss": 0.64, "step": 4959 }, { "epoch": 0.63, "grad_norm": 0.7613104224188724, "learning_rate": 6.303642233767308e-07, "loss": 0.5323, "step": 4960 }, { "epoch": 0.63, "grad_norm": 0.89251144490296, "learning_rate": 6.299808193178262e-07, "loss": 0.6009, "step": 4961 }, { "epoch": 0.63, "grad_norm": 0.8289484968645028, "learning_rate": 6.295974782693334e-07, "loss": 0.626, "step": 4962 }, { "epoch": 0.63, "grad_norm": 0.8509765562880285, "learning_rate": 6.292142002965311e-07, "loss": 0.6412, "step": 4963 }, { "epoch": 0.63, "grad_norm": 0.8514468617566323, "learning_rate": 6.288309854646876e-07, "loss": 0.6503, "step": 4964 }, { "epoch": 0.63, "grad_norm": 0.780924284027837, "learning_rate": 6.284478338390606e-07, "loss": 0.5875, "step": 4965 }, { "epoch": 0.63, "grad_norm": 0.905755235832951, "learning_rate": 6.280647454848966e-07, "loss": 0.6757, "step": 4966 }, { "epoch": 0.63, "grad_norm": 1.0500278098327973, "learning_rate": 6.276817204674319e-07, "loss": 0.6809, "step": 4967 }, { "epoch": 0.63, "grad_norm": 0.9029279716988909, "learning_rate": 6.272987588518911e-07, "loss": 0.6644, "step": 4968 }, { "epoch": 0.63, "grad_norm": 0.8283752539344065, "learning_rate": 6.26915860703489e-07, "loss": 0.6099, "step": 4969 }, { "epoch": 0.63, "grad_norm": 0.8716947743952852, "learning_rate": 6.26533026087429e-07, "loss": 0.6019, "step": 4970 }, { "epoch": 0.63, "grad_norm": 0.7360810923821216, "learning_rate": 6.26150255068904e-07, "loss": 0.5001, "step": 4971 }, { "epoch": 0.63, "grad_norm": 0.7178246439773184, "learning_rate": 6.257675477130957e-07, "loss": 0.5755, "step": 4972 }, { "epoch": 0.63, "grad_norm": 0.8299638203459027, "learning_rate": 6.253849040851753e-07, "loss": 0.5972, "step": 4973 }, { "epoch": 0.63, "grad_norm": 0.8508951565266676, "learning_rate": 6.250023242503031e-07, "loss": 0.5596, "step": 4974 }, { "epoch": 0.63, "grad_norm": 0.6500818153881901, "learning_rate": 6.246198082736285e-07, "loss": 0.5384, "step": 4975 }, { "epoch": 0.63, "grad_norm": 0.7187411393553, "learning_rate": 6.242373562202897e-07, "loss": 0.546, "step": 4976 }, { "epoch": 0.63, "grad_norm": 0.6473034671658946, "learning_rate": 6.238549681554147e-07, "loss": 0.5537, "step": 4977 }, { "epoch": 0.63, "grad_norm": 0.6640803537389897, "learning_rate": 6.234726441441197e-07, "loss": 0.5025, "step": 4978 }, { "epoch": 0.63, "grad_norm": 0.9227965725247756, "learning_rate": 6.230903842515109e-07, "loss": 0.6559, "step": 4979 }, { "epoch": 0.63, "grad_norm": 0.7673545910840528, "learning_rate": 6.22708188542683e-07, "loss": 0.5565, "step": 4980 }, { "epoch": 0.63, "grad_norm": 0.756460568659084, "learning_rate": 6.2232605708272e-07, "loss": 0.554, "step": 4981 }, { "epoch": 0.63, "grad_norm": 0.8739971322497097, "learning_rate": 6.219439899366949e-07, "loss": 0.644, "step": 4982 }, { "epoch": 0.63, "grad_norm": 0.8567515583734208, "learning_rate": 6.215619871696698e-07, "loss": 0.5921, "step": 4983 }, { "epoch": 0.63, "grad_norm": 0.7556644032725468, "learning_rate": 6.211800488466957e-07, "loss": 0.5824, "step": 4984 }, { "epoch": 0.64, "grad_norm": 0.914646205473272, "learning_rate": 6.207981750328126e-07, "loss": 0.621, "step": 4985 }, { "epoch": 0.64, "grad_norm": 0.6417673288921504, "learning_rate": 6.2041636579305e-07, "loss": 0.4747, "step": 4986 }, { "epoch": 0.64, "grad_norm": 0.6618414921234277, "learning_rate": 6.200346211924259e-07, "loss": 0.5547, "step": 4987 }, { "epoch": 0.64, "grad_norm": 0.6902203006156109, "learning_rate": 6.196529412959472e-07, "loss": 0.5667, "step": 4988 }, { "epoch": 0.64, "grad_norm": 0.7550065684709346, "learning_rate": 6.192713261686102e-07, "loss": 0.6165, "step": 4989 }, { "epoch": 0.64, "grad_norm": 0.8216213827326314, "learning_rate": 6.188897758754001e-07, "loss": 0.5624, "step": 4990 }, { "epoch": 0.64, "grad_norm": 0.8093315922316112, "learning_rate": 6.185082904812906e-07, "loss": 0.5545, "step": 4991 }, { "epoch": 0.64, "grad_norm": 0.8742182036929619, "learning_rate": 6.18126870051245e-07, "loss": 0.6409, "step": 4992 }, { "epoch": 0.64, "grad_norm": 0.7978051546813751, "learning_rate": 6.177455146502151e-07, "loss": 0.6093, "step": 4993 }, { "epoch": 0.64, "grad_norm": 0.6897747425776496, "learning_rate": 6.173642243431417e-07, "loss": 0.5288, "step": 4994 }, { "epoch": 0.64, "grad_norm": 0.7532305702078202, "learning_rate": 6.169829991949546e-07, "loss": 0.5767, "step": 4995 }, { "epoch": 0.64, "grad_norm": 0.6569957616707532, "learning_rate": 6.166018392705726e-07, "loss": 0.5476, "step": 4996 }, { "epoch": 0.64, "grad_norm": 0.7957341683337777, "learning_rate": 6.162207446349031e-07, "loss": 0.5657, "step": 4997 }, { "epoch": 0.64, "grad_norm": 0.9004763465437087, "learning_rate": 6.158397153528427e-07, "loss": 0.6882, "step": 4998 }, { "epoch": 0.64, "grad_norm": 0.7772164644087306, "learning_rate": 6.154587514892761e-07, "loss": 0.5861, "step": 4999 }, { "epoch": 0.64, "grad_norm": 0.7687490873669001, "learning_rate": 6.150778531090785e-07, "loss": 0.5211, "step": 5000 }, { "epoch": 0.64, "grad_norm": 0.8093706322265606, "learning_rate": 6.146970202771124e-07, "loss": 0.5507, "step": 5001 }, { "epoch": 0.64, "grad_norm": 0.9918926436562822, "learning_rate": 6.143162530582297e-07, "loss": 0.6053, "step": 5002 }, { "epoch": 0.64, "grad_norm": 0.8130443721276802, "learning_rate": 6.13935551517271e-07, "loss": 0.5955, "step": 5003 }, { "epoch": 0.64, "grad_norm": 0.6471742389513562, "learning_rate": 6.135549157190659e-07, "loss": 0.5063, "step": 5004 }, { "epoch": 0.64, "grad_norm": 1.0437946907972142, "learning_rate": 6.131743457284328e-07, "loss": 0.634, "step": 5005 }, { "epoch": 0.64, "grad_norm": 1.0687083116021114, "learning_rate": 6.127938416101786e-07, "loss": 0.6465, "step": 5006 }, { "epoch": 0.64, "grad_norm": 0.8612141361521637, "learning_rate": 6.124134034290993e-07, "loss": 0.5909, "step": 5007 }, { "epoch": 0.64, "grad_norm": 0.7759203963871272, "learning_rate": 6.120330312499795e-07, "loss": 0.519, "step": 5008 }, { "epoch": 0.64, "grad_norm": 0.8009859750770079, "learning_rate": 6.116527251375927e-07, "loss": 0.6054, "step": 5009 }, { "epoch": 0.64, "grad_norm": 0.8065844323118677, "learning_rate": 6.11272485156701e-07, "loss": 0.5771, "step": 5010 }, { "epoch": 0.64, "grad_norm": 0.8211173251871965, "learning_rate": 6.108923113720554e-07, "loss": 0.6547, "step": 5011 }, { "epoch": 0.64, "grad_norm": 0.6755481365742355, "learning_rate": 6.105122038483954e-07, "loss": 0.5352, "step": 5012 }, { "epoch": 0.64, "grad_norm": 0.8660748235591168, "learning_rate": 6.101321626504494e-07, "loss": 0.6249, "step": 5013 }, { "epoch": 0.64, "grad_norm": 0.8637177658083318, "learning_rate": 6.097521878429345e-07, "loss": 0.592, "step": 5014 }, { "epoch": 0.64, "grad_norm": 0.9253391958662451, "learning_rate": 6.093722794905567e-07, "loss": 0.6361, "step": 5015 }, { "epoch": 0.64, "grad_norm": 0.9166015021716615, "learning_rate": 6.0899243765801e-07, "loss": 0.6264, "step": 5016 }, { "epoch": 0.64, "grad_norm": 0.6983658180805641, "learning_rate": 6.086126624099777e-07, "loss": 0.5922, "step": 5017 }, { "epoch": 0.64, "grad_norm": 0.9835948279146213, "learning_rate": 6.082329538111315e-07, "loss": 0.6904, "step": 5018 }, { "epoch": 0.64, "grad_norm": 0.9284813364489757, "learning_rate": 6.078533119261318e-07, "loss": 0.5865, "step": 5019 }, { "epoch": 0.64, "grad_norm": 0.7306505092383813, "learning_rate": 6.074737368196279e-07, "loss": 0.5766, "step": 5020 }, { "epoch": 0.64, "grad_norm": 0.8444092087241589, "learning_rate": 6.070942285562572e-07, "loss": 0.6053, "step": 5021 }, { "epoch": 0.64, "grad_norm": 0.8143710418037718, "learning_rate": 6.067147872006461e-07, "loss": 0.6022, "step": 5022 }, { "epoch": 0.64, "grad_norm": 0.6834113062292975, "learning_rate": 6.063354128174096e-07, "loss": 0.517, "step": 5023 }, { "epoch": 0.64, "grad_norm": 0.654444081468434, "learning_rate": 6.059561054711512e-07, "loss": 0.5208, "step": 5024 }, { "epoch": 0.64, "grad_norm": 0.6829282752172614, "learning_rate": 6.055768652264625e-07, "loss": 0.5196, "step": 5025 }, { "epoch": 0.64, "grad_norm": 0.8858204863458103, "learning_rate": 6.051976921479249e-07, "loss": 0.6152, "step": 5026 }, { "epoch": 0.64, "grad_norm": 0.8260474743105054, "learning_rate": 6.048185863001074e-07, "loss": 0.6457, "step": 5027 }, { "epoch": 0.64, "grad_norm": 0.704128804470987, "learning_rate": 6.044395477475676e-07, "loss": 0.5256, "step": 5028 }, { "epoch": 0.64, "grad_norm": 0.9848551422363379, "learning_rate": 6.040605765548519e-07, "loss": 0.6342, "step": 5029 }, { "epoch": 0.64, "grad_norm": 0.7628276912534608, "learning_rate": 6.03681672786495e-07, "loss": 0.5759, "step": 5030 }, { "epoch": 0.64, "grad_norm": 0.8110916594432857, "learning_rate": 6.033028365070207e-07, "loss": 0.5471, "step": 5031 }, { "epoch": 0.64, "grad_norm": 0.9016975759148083, "learning_rate": 6.029240677809404e-07, "loss": 0.6218, "step": 5032 }, { "epoch": 0.64, "grad_norm": 0.7183808734749543, "learning_rate": 6.025453666727547e-07, "loss": 0.5283, "step": 5033 }, { "epoch": 0.64, "grad_norm": 1.4390614234386085, "learning_rate": 6.021667332469523e-07, "loss": 0.6238, "step": 5034 }, { "epoch": 0.64, "grad_norm": 0.7554507786921595, "learning_rate": 6.017881675680107e-07, "loss": 0.6051, "step": 5035 }, { "epoch": 0.64, "grad_norm": 0.6358237871152954, "learning_rate": 6.014096697003957e-07, "loss": 0.5695, "step": 5036 }, { "epoch": 0.64, "grad_norm": 0.6377939608862446, "learning_rate": 6.010312397085613e-07, "loss": 0.5701, "step": 5037 }, { "epoch": 0.64, "grad_norm": 0.7695221234903264, "learning_rate": 6.006528776569504e-07, "loss": 0.5821, "step": 5038 }, { "epoch": 0.64, "grad_norm": 0.7715144763859078, "learning_rate": 6.002745836099941e-07, "loss": 0.6339, "step": 5039 }, { "epoch": 0.64, "grad_norm": 0.7857716793280495, "learning_rate": 5.998963576321119e-07, "loss": 0.5451, "step": 5040 }, { "epoch": 0.64, "grad_norm": 1.0522187748236873, "learning_rate": 5.995181997877117e-07, "loss": 0.5672, "step": 5041 }, { "epoch": 0.64, "grad_norm": 0.6669504917647237, "learning_rate": 5.991401101411899e-07, "loss": 0.5319, "step": 5042 }, { "epoch": 0.64, "grad_norm": 0.80251396948831, "learning_rate": 5.987620887569313e-07, "loss": 0.5986, "step": 5043 }, { "epoch": 0.64, "grad_norm": 0.7015073834912915, "learning_rate": 5.983841356993089e-07, "loss": 0.5751, "step": 5044 }, { "epoch": 0.64, "grad_norm": 0.6475561378305018, "learning_rate": 5.980062510326842e-07, "loss": 0.5207, "step": 5045 }, { "epoch": 0.64, "grad_norm": 0.7527385662977498, "learning_rate": 5.97628434821407e-07, "loss": 0.5953, "step": 5046 }, { "epoch": 0.64, "grad_norm": 0.8501566073388109, "learning_rate": 5.972506871298156e-07, "loss": 0.5161, "step": 5047 }, { "epoch": 0.64, "grad_norm": 0.9160663091141346, "learning_rate": 5.968730080222361e-07, "loss": 0.6188, "step": 5048 }, { "epoch": 0.64, "grad_norm": 0.861499366523754, "learning_rate": 5.964953975629836e-07, "loss": 0.5615, "step": 5049 }, { "epoch": 0.64, "grad_norm": 0.6664628322734598, "learning_rate": 5.961178558163614e-07, "loss": 0.582, "step": 5050 }, { "epoch": 0.64, "grad_norm": 1.1579646048558891, "learning_rate": 5.957403828466605e-07, "loss": 0.6524, "step": 5051 }, { "epoch": 0.64, "grad_norm": 0.821458669601864, "learning_rate": 5.95362978718161e-07, "loss": 0.5755, "step": 5052 }, { "epoch": 0.64, "grad_norm": 0.5985831094136993, "learning_rate": 5.949856434951308e-07, "loss": 0.511, "step": 5053 }, { "epoch": 0.64, "grad_norm": 0.9497938554733488, "learning_rate": 5.94608377241826e-07, "loss": 0.6587, "step": 5054 }, { "epoch": 0.64, "grad_norm": 1.1859720698253693, "learning_rate": 5.942311800224913e-07, "loss": 0.608, "step": 5055 }, { "epoch": 0.64, "grad_norm": 1.0771315826328354, "learning_rate": 5.938540519013594e-07, "loss": 0.5927, "step": 5056 }, { "epoch": 0.64, "grad_norm": 0.8184816624990666, "learning_rate": 5.934769929426512e-07, "loss": 0.5614, "step": 5057 }, { "epoch": 0.64, "grad_norm": 0.7813791802459399, "learning_rate": 5.931000032105757e-07, "loss": 0.6085, "step": 5058 }, { "epoch": 0.64, "grad_norm": 0.751256812824723, "learning_rate": 5.927230827693308e-07, "loss": 0.5623, "step": 5059 }, { "epoch": 0.64, "grad_norm": 0.6000530065662966, "learning_rate": 5.92346231683102e-07, "loss": 0.566, "step": 5060 }, { "epoch": 0.64, "grad_norm": 0.7739495245918401, "learning_rate": 5.919694500160631e-07, "loss": 0.5726, "step": 5061 }, { "epoch": 0.64, "grad_norm": 0.7772571431207527, "learning_rate": 5.91592737832376e-07, "loss": 0.5335, "step": 5062 }, { "epoch": 0.65, "grad_norm": 0.8113011841768453, "learning_rate": 5.912160951961908e-07, "loss": 0.5996, "step": 5063 }, { "epoch": 0.65, "grad_norm": 0.7983879658872886, "learning_rate": 5.90839522171646e-07, "loss": 0.5792, "step": 5064 }, { "epoch": 0.65, "grad_norm": 0.8743015732055807, "learning_rate": 5.90463018822868e-07, "loss": 0.5926, "step": 5065 }, { "epoch": 0.65, "grad_norm": 0.7830507508463779, "learning_rate": 5.900865852139713e-07, "loss": 0.5693, "step": 5066 }, { "epoch": 0.65, "grad_norm": 0.9189113356999796, "learning_rate": 5.897102214090587e-07, "loss": 0.6091, "step": 5067 }, { "epoch": 0.65, "grad_norm": 0.8739337434649757, "learning_rate": 5.893339274722211e-07, "loss": 0.637, "step": 5068 }, { "epoch": 0.65, "grad_norm": 0.681804251152822, "learning_rate": 5.889577034675373e-07, "loss": 0.5645, "step": 5069 }, { "epoch": 0.65, "grad_norm": 0.6555366560265268, "learning_rate": 5.885815494590742e-07, "loss": 0.4481, "step": 5070 }, { "epoch": 0.65, "grad_norm": 0.7614095732517614, "learning_rate": 5.882054655108874e-07, "loss": 0.6166, "step": 5071 }, { "epoch": 0.65, "grad_norm": 0.7326413647073123, "learning_rate": 5.878294516870196e-07, "loss": 0.5576, "step": 5072 }, { "epoch": 0.65, "grad_norm": 0.8186075662888059, "learning_rate": 5.874535080515024e-07, "loss": 0.5208, "step": 5073 }, { "epoch": 0.65, "grad_norm": 0.7574887958945209, "learning_rate": 5.870776346683548e-07, "loss": 0.5781, "step": 5074 }, { "epoch": 0.65, "grad_norm": 0.7830121607743807, "learning_rate": 5.867018316015842e-07, "loss": 0.6194, "step": 5075 }, { "epoch": 0.65, "grad_norm": 0.732968374536864, "learning_rate": 5.863260989151859e-07, "loss": 0.5147, "step": 5076 }, { "epoch": 0.65, "grad_norm": 0.7162454519792573, "learning_rate": 5.859504366731435e-07, "loss": 0.5325, "step": 5077 }, { "epoch": 0.65, "grad_norm": 0.911551691518996, "learning_rate": 5.855748449394281e-07, "loss": 0.6274, "step": 5078 }, { "epoch": 0.65, "grad_norm": 0.7245923035418435, "learning_rate": 5.851993237779992e-07, "loss": 0.5346, "step": 5079 }, { "epoch": 0.65, "grad_norm": 0.6876521023441245, "learning_rate": 5.848238732528041e-07, "loss": 0.5554, "step": 5080 }, { "epoch": 0.65, "grad_norm": 0.6324907612905983, "learning_rate": 5.84448493427778e-07, "loss": 0.5138, "step": 5081 }, { "epoch": 0.65, "grad_norm": 0.6885048684524788, "learning_rate": 5.840731843668444e-07, "loss": 0.4976, "step": 5082 }, { "epoch": 0.65, "grad_norm": 0.8005902867831588, "learning_rate": 5.836979461339142e-07, "loss": 0.5747, "step": 5083 }, { "epoch": 0.65, "grad_norm": 0.8322618149027909, "learning_rate": 5.833227787928869e-07, "loss": 0.6141, "step": 5084 }, { "epoch": 0.65, "grad_norm": 0.708675636533902, "learning_rate": 5.829476824076496e-07, "loss": 0.5216, "step": 5085 }, { "epoch": 0.65, "grad_norm": 0.7270248432377032, "learning_rate": 5.825726570420771e-07, "loss": 0.5796, "step": 5086 }, { "epoch": 0.65, "grad_norm": 1.1813317279481148, "learning_rate": 5.821977027600322e-07, "loss": 0.6608, "step": 5087 }, { "epoch": 0.65, "grad_norm": 0.8512731345911401, "learning_rate": 5.818228196253661e-07, "loss": 0.6379, "step": 5088 }, { "epoch": 0.65, "grad_norm": 0.8179900658273415, "learning_rate": 5.814480077019173e-07, "loss": 0.5787, "step": 5089 }, { "epoch": 0.65, "grad_norm": 0.8794787436230713, "learning_rate": 5.810732670535122e-07, "loss": 0.6216, "step": 5090 }, { "epoch": 0.65, "grad_norm": 0.6985198773689165, "learning_rate": 5.806985977439654e-07, "loss": 0.509, "step": 5091 }, { "epoch": 0.65, "grad_norm": 0.8681988780824225, "learning_rate": 5.803239998370793e-07, "loss": 0.6763, "step": 5092 }, { "epoch": 0.65, "grad_norm": 0.8750881326770189, "learning_rate": 5.799494733966437e-07, "loss": 0.6112, "step": 5093 }, { "epoch": 0.65, "grad_norm": 0.6328604194952325, "learning_rate": 5.795750184864368e-07, "loss": 0.6096, "step": 5094 }, { "epoch": 0.65, "grad_norm": 0.8601784461554801, "learning_rate": 5.792006351702244e-07, "loss": 0.6246, "step": 5095 }, { "epoch": 0.65, "grad_norm": 0.6784051214222683, "learning_rate": 5.788263235117598e-07, "loss": 0.4941, "step": 5096 }, { "epoch": 0.65, "grad_norm": 0.945907718371338, "learning_rate": 5.784520835747846e-07, "loss": 0.6286, "step": 5097 }, { "epoch": 0.65, "grad_norm": 0.8524267272989058, "learning_rate": 5.78077915423028e-07, "loss": 0.6033, "step": 5098 }, { "epoch": 0.65, "grad_norm": 0.8962315033504842, "learning_rate": 5.777038191202067e-07, "loss": 0.6304, "step": 5099 }, { "epoch": 0.65, "grad_norm": 0.9873022625942054, "learning_rate": 5.773297947300257e-07, "loss": 0.6585, "step": 5100 }, { "epoch": 0.65, "grad_norm": 0.7584336395440215, "learning_rate": 5.769558423161771e-07, "loss": 0.6081, "step": 5101 }, { "epoch": 0.65, "grad_norm": 0.9249639892367475, "learning_rate": 5.765819619423415e-07, "loss": 0.6676, "step": 5102 }, { "epoch": 0.65, "grad_norm": 0.868116745146901, "learning_rate": 5.762081536721862e-07, "loss": 0.6122, "step": 5103 }, { "epoch": 0.65, "grad_norm": 0.654113464241392, "learning_rate": 5.758344175693677e-07, "loss": 0.5753, "step": 5104 }, { "epoch": 0.65, "grad_norm": 0.7902854325303073, "learning_rate": 5.75460753697529e-07, "loss": 0.567, "step": 5105 }, { "epoch": 0.65, "grad_norm": 1.423628550163955, "learning_rate": 5.75087162120301e-07, "loss": 0.6036, "step": 5106 }, { "epoch": 0.65, "grad_norm": 0.6393862123543143, "learning_rate": 5.747136429013028e-07, "loss": 0.5266, "step": 5107 }, { "epoch": 0.65, "grad_norm": 0.6791805419557542, "learning_rate": 5.743401961041404e-07, "loss": 0.5425, "step": 5108 }, { "epoch": 0.65, "grad_norm": 0.6584652165860774, "learning_rate": 5.739668217924083e-07, "loss": 0.5533, "step": 5109 }, { "epoch": 0.65, "grad_norm": 0.8580499077340181, "learning_rate": 5.735935200296879e-07, "loss": 0.6582, "step": 5110 }, { "epoch": 0.65, "grad_norm": 0.6511144807862034, "learning_rate": 5.732202908795488e-07, "loss": 0.5189, "step": 5111 }, { "epoch": 0.65, "grad_norm": 0.636381576170471, "learning_rate": 5.728471344055481e-07, "loss": 0.5052, "step": 5112 }, { "epoch": 0.65, "grad_norm": 0.9835570522882756, "learning_rate": 5.724740506712302e-07, "loss": 0.6209, "step": 5113 }, { "epoch": 0.65, "grad_norm": 0.6064867154013767, "learning_rate": 5.721010397401277e-07, "loss": 0.5286, "step": 5114 }, { "epoch": 0.65, "grad_norm": 1.3811986984629432, "learning_rate": 5.717281016757603e-07, "loss": 0.6718, "step": 5115 }, { "epoch": 0.65, "grad_norm": 0.8258201055263272, "learning_rate": 5.713552365416354e-07, "loss": 0.5629, "step": 5116 }, { "epoch": 0.65, "grad_norm": 0.6500044924096365, "learning_rate": 5.70982444401248e-07, "loss": 0.5105, "step": 5117 }, { "epoch": 0.65, "grad_norm": 1.0653608392245684, "learning_rate": 5.70609725318081e-07, "loss": 0.6806, "step": 5118 }, { "epoch": 0.65, "grad_norm": 1.02720684755916, "learning_rate": 5.702370793556046e-07, "loss": 0.6554, "step": 5119 }, { "epoch": 0.65, "grad_norm": 0.6860602899612561, "learning_rate": 5.698645065772762e-07, "loss": 0.5561, "step": 5120 }, { "epoch": 0.65, "grad_norm": 0.9025652293740645, "learning_rate": 5.694920070465411e-07, "loss": 0.5812, "step": 5121 }, { "epoch": 0.65, "grad_norm": 0.9436602663030286, "learning_rate": 5.691195808268323e-07, "loss": 0.6488, "step": 5122 }, { "epoch": 0.65, "grad_norm": 0.633609690431565, "learning_rate": 5.687472279815699e-07, "loss": 0.5565, "step": 5123 }, { "epoch": 0.65, "grad_norm": 0.6022063065860346, "learning_rate": 5.68374948574162e-07, "loss": 0.4931, "step": 5124 }, { "epoch": 0.65, "grad_norm": 0.6385614404213904, "learning_rate": 5.680027426680036e-07, "loss": 0.5405, "step": 5125 }, { "epoch": 0.65, "grad_norm": 0.9114009286591812, "learning_rate": 5.676306103264777e-07, "loss": 0.6692, "step": 5126 }, { "epoch": 0.65, "grad_norm": 0.7093953761933804, "learning_rate": 5.672585516129543e-07, "loss": 0.5969, "step": 5127 }, { "epoch": 0.65, "grad_norm": 1.6452944119401267, "learning_rate": 5.668865665907914e-07, "loss": 0.5911, "step": 5128 }, { "epoch": 0.65, "grad_norm": 0.8516414229812703, "learning_rate": 5.665146553233338e-07, "loss": 0.6173, "step": 5129 }, { "epoch": 0.65, "grad_norm": 0.8136173473983309, "learning_rate": 5.661428178739145e-07, "loss": 0.5986, "step": 5130 }, { "epoch": 0.65, "grad_norm": 0.8592251324604326, "learning_rate": 5.657710543058536e-07, "loss": 0.5955, "step": 5131 }, { "epoch": 0.65, "grad_norm": 0.6570834221119015, "learning_rate": 5.653993646824584e-07, "loss": 0.5254, "step": 5132 }, { "epoch": 0.65, "grad_norm": 0.8413622070655353, "learning_rate": 5.650277490670237e-07, "loss": 0.6109, "step": 5133 }, { "epoch": 0.65, "grad_norm": 0.6899290267214773, "learning_rate": 5.646562075228319e-07, "loss": 0.5049, "step": 5134 }, { "epoch": 0.65, "grad_norm": 0.920818851600754, "learning_rate": 5.642847401131525e-07, "loss": 0.6255, "step": 5135 }, { "epoch": 0.65, "grad_norm": 0.7166500348282763, "learning_rate": 5.639133469012426e-07, "loss": 0.5297, "step": 5136 }, { "epoch": 0.65, "grad_norm": 0.6843371088937003, "learning_rate": 5.635420279503467e-07, "loss": 0.5405, "step": 5137 }, { "epoch": 0.65, "grad_norm": 0.9024191480953071, "learning_rate": 5.631707833236962e-07, "loss": 0.6381, "step": 5138 }, { "epoch": 0.65, "grad_norm": 1.1764222110411937, "learning_rate": 5.627996130845107e-07, "loss": 0.6383, "step": 5139 }, { "epoch": 0.65, "grad_norm": 0.7319410664791259, "learning_rate": 5.624285172959964e-07, "loss": 0.5676, "step": 5140 }, { "epoch": 0.65, "grad_norm": 0.7858930832094569, "learning_rate": 5.620574960213469e-07, "loss": 0.6339, "step": 5141 }, { "epoch": 0.66, "grad_norm": 0.7286445804589572, "learning_rate": 5.616865493237434e-07, "loss": 0.5778, "step": 5142 }, { "epoch": 0.66, "grad_norm": 1.020132381465057, "learning_rate": 5.613156772663542e-07, "loss": 0.6351, "step": 5143 }, { "epoch": 0.66, "grad_norm": 0.7152149051029734, "learning_rate": 5.609448799123351e-07, "loss": 0.5828, "step": 5144 }, { "epoch": 0.66, "grad_norm": 0.7854618541545233, "learning_rate": 5.605741573248288e-07, "loss": 0.5715, "step": 5145 }, { "epoch": 0.66, "grad_norm": 0.8407183333760758, "learning_rate": 5.602035095669656e-07, "loss": 0.5505, "step": 5146 }, { "epoch": 0.66, "grad_norm": 0.7089945351036089, "learning_rate": 5.59832936701863e-07, "loss": 0.5229, "step": 5147 }, { "epoch": 0.66, "grad_norm": 0.8258125813082188, "learning_rate": 5.594624387926257e-07, "loss": 0.5526, "step": 5148 }, { "epoch": 0.66, "grad_norm": 0.843474521147449, "learning_rate": 5.590920159023455e-07, "loss": 0.6266, "step": 5149 }, { "epoch": 0.66, "grad_norm": 0.9479985000773808, "learning_rate": 5.587216680941016e-07, "loss": 0.6262, "step": 5150 }, { "epoch": 0.66, "grad_norm": 0.7077055844087151, "learning_rate": 5.583513954309604e-07, "loss": 0.5375, "step": 5151 }, { "epoch": 0.66, "grad_norm": 0.8657708405175799, "learning_rate": 5.579811979759757e-07, "loss": 0.6395, "step": 5152 }, { "epoch": 0.66, "grad_norm": 0.6971206919020668, "learning_rate": 5.576110757921879e-07, "loss": 0.5451, "step": 5153 }, { "epoch": 0.66, "grad_norm": 0.9949717184285992, "learning_rate": 5.572410289426252e-07, "loss": 0.6733, "step": 5154 }, { "epoch": 0.66, "grad_norm": 0.6800846574139368, "learning_rate": 5.568710574903026e-07, "loss": 0.5601, "step": 5155 }, { "epoch": 0.66, "grad_norm": 0.9761555239442732, "learning_rate": 5.56501161498222e-07, "loss": 0.5988, "step": 5156 }, { "epoch": 0.66, "grad_norm": 0.6113763756320756, "learning_rate": 5.561313410293738e-07, "loss": 0.4914, "step": 5157 }, { "epoch": 0.66, "grad_norm": 0.8399259385106103, "learning_rate": 5.557615961467338e-07, "loss": 0.6606, "step": 5158 }, { "epoch": 0.66, "grad_norm": 1.1852452293414182, "learning_rate": 5.553919269132661e-07, "loss": 0.6251, "step": 5159 }, { "epoch": 0.66, "grad_norm": 0.8663631238133971, "learning_rate": 5.550223333919213e-07, "loss": 0.6486, "step": 5160 }, { "epoch": 0.66, "grad_norm": 0.890695778807739, "learning_rate": 5.546528156456369e-07, "loss": 0.6247, "step": 5161 }, { "epoch": 0.66, "grad_norm": 0.7970453546045762, "learning_rate": 5.542833737373389e-07, "loss": 0.6115, "step": 5162 }, { "epoch": 0.66, "grad_norm": 0.8987296081236028, "learning_rate": 5.539140077299389e-07, "loss": 0.6107, "step": 5163 }, { "epoch": 0.66, "grad_norm": 0.8065912801456753, "learning_rate": 5.535447176863362e-07, "loss": 0.5893, "step": 5164 }, { "epoch": 0.66, "grad_norm": 0.7072572019971983, "learning_rate": 5.531755036694168e-07, "loss": 0.5346, "step": 5165 }, { "epoch": 0.66, "grad_norm": 0.7469512045447022, "learning_rate": 5.528063657420542e-07, "loss": 0.5323, "step": 5166 }, { "epoch": 0.66, "grad_norm": 0.6837402980708217, "learning_rate": 5.524373039671087e-07, "loss": 0.5098, "step": 5167 }, { "epoch": 0.66, "grad_norm": 1.021774197352233, "learning_rate": 5.520683184074275e-07, "loss": 0.6121, "step": 5168 }, { "epoch": 0.66, "grad_norm": 0.7043376218330065, "learning_rate": 5.516994091258453e-07, "loss": 0.5924, "step": 5169 }, { "epoch": 0.66, "grad_norm": 0.9033220579349501, "learning_rate": 5.513305761851834e-07, "loss": 0.6169, "step": 5170 }, { "epoch": 0.66, "grad_norm": 0.8364284660030101, "learning_rate": 5.509618196482501e-07, "loss": 0.6166, "step": 5171 }, { "epoch": 0.66, "grad_norm": 0.8296727124169542, "learning_rate": 5.50593139577841e-07, "loss": 0.5872, "step": 5172 }, { "epoch": 0.66, "grad_norm": 0.6929753337300463, "learning_rate": 5.502245360367382e-07, "loss": 0.5299, "step": 5173 }, { "epoch": 0.66, "grad_norm": 0.7934575827250357, "learning_rate": 5.498560090877111e-07, "loss": 0.5614, "step": 5174 }, { "epoch": 0.66, "grad_norm": 0.6700536830112204, "learning_rate": 5.494875587935162e-07, "loss": 0.5411, "step": 5175 }, { "epoch": 0.66, "grad_norm": 0.6993977583203144, "learning_rate": 5.491191852168966e-07, "loss": 0.5175, "step": 5176 }, { "epoch": 0.66, "grad_norm": 0.9524299668903423, "learning_rate": 5.487508884205825e-07, "loss": 0.6005, "step": 5177 }, { "epoch": 0.66, "grad_norm": 0.8444431556505653, "learning_rate": 5.483826684672912e-07, "loss": 0.6565, "step": 5178 }, { "epoch": 0.66, "grad_norm": 0.8737859785765094, "learning_rate": 5.480145254197264e-07, "loss": 0.633, "step": 5179 }, { "epoch": 0.66, "grad_norm": 0.6588048936046896, "learning_rate": 5.476464593405792e-07, "loss": 0.5359, "step": 5180 }, { "epoch": 0.66, "grad_norm": 0.7902504023352743, "learning_rate": 5.472784702925272e-07, "loss": 0.5842, "step": 5181 }, { "epoch": 0.66, "grad_norm": 0.6387543013069655, "learning_rate": 5.469105583382356e-07, "loss": 0.5498, "step": 5182 }, { "epoch": 0.66, "grad_norm": 0.7674972225753357, "learning_rate": 5.465427235403555e-07, "loss": 0.5356, "step": 5183 }, { "epoch": 0.66, "grad_norm": 0.8145385097341103, "learning_rate": 5.461749659615255e-07, "loss": 0.5962, "step": 5184 }, { "epoch": 0.66, "grad_norm": 0.8783139324471076, "learning_rate": 5.458072856643709e-07, "loss": 0.6601, "step": 5185 }, { "epoch": 0.66, "grad_norm": 0.725858528279023, "learning_rate": 5.454396827115037e-07, "loss": 0.5142, "step": 5186 }, { "epoch": 0.66, "grad_norm": 0.8066255320758969, "learning_rate": 5.450721571655232e-07, "loss": 0.6002, "step": 5187 }, { "epoch": 0.66, "grad_norm": 0.8642919131376363, "learning_rate": 5.447047090890145e-07, "loss": 0.5816, "step": 5188 }, { "epoch": 0.66, "grad_norm": 0.6848398998053352, "learning_rate": 5.443373385445509e-07, "loss": 0.4815, "step": 5189 }, { "epoch": 0.66, "grad_norm": 0.7775814435855888, "learning_rate": 5.439700455946914e-07, "loss": 0.5717, "step": 5190 }, { "epoch": 0.66, "grad_norm": 0.7960884124749111, "learning_rate": 5.436028303019824e-07, "loss": 0.613, "step": 5191 }, { "epoch": 0.66, "grad_norm": 0.7021504380318427, "learning_rate": 5.432356927289565e-07, "loss": 0.5207, "step": 5192 }, { "epoch": 0.66, "grad_norm": 0.6856621254639056, "learning_rate": 5.428686329381338e-07, "loss": 0.5331, "step": 5193 }, { "epoch": 0.66, "grad_norm": 0.6291448066646765, "learning_rate": 5.425016509920205e-07, "loss": 0.5126, "step": 5194 }, { "epoch": 0.66, "grad_norm": 0.8292705863694717, "learning_rate": 5.421347469531099e-07, "loss": 0.6414, "step": 5195 }, { "epoch": 0.66, "grad_norm": 0.8525337103530422, "learning_rate": 5.417679208838817e-07, "loss": 0.6, "step": 5196 }, { "epoch": 0.66, "grad_norm": 0.8952269229248355, "learning_rate": 5.414011728468029e-07, "loss": 0.6451, "step": 5197 }, { "epoch": 0.66, "grad_norm": 0.7685862903210215, "learning_rate": 5.410345029043268e-07, "loss": 0.5807, "step": 5198 }, { "epoch": 0.66, "grad_norm": 0.6384502248765729, "learning_rate": 5.406679111188932e-07, "loss": 0.5162, "step": 5199 }, { "epoch": 0.66, "grad_norm": 0.8052630061141106, "learning_rate": 5.403013975529292e-07, "loss": 0.5488, "step": 5200 }, { "epoch": 0.66, "grad_norm": 0.8783642939826254, "learning_rate": 5.399349622688478e-07, "loss": 0.6469, "step": 5201 }, { "epoch": 0.66, "grad_norm": 0.675450997167393, "learning_rate": 5.395686053290498e-07, "loss": 0.4977, "step": 5202 }, { "epoch": 0.66, "grad_norm": 0.7347055482177096, "learning_rate": 5.392023267959214e-07, "loss": 0.5329, "step": 5203 }, { "epoch": 0.66, "grad_norm": 0.8199308797303028, "learning_rate": 5.38836126731836e-07, "loss": 0.5848, "step": 5204 }, { "epoch": 0.66, "grad_norm": 0.771495710274613, "learning_rate": 5.384700051991541e-07, "loss": 0.579, "step": 5205 }, { "epoch": 0.66, "grad_norm": 0.7051338569643383, "learning_rate": 5.38103962260222e-07, "loss": 0.5147, "step": 5206 }, { "epoch": 0.66, "grad_norm": 1.7373923577448827, "learning_rate": 5.377379979773732e-07, "loss": 0.619, "step": 5207 }, { "epoch": 0.66, "grad_norm": 0.8119409401938465, "learning_rate": 5.373721124129276e-07, "loss": 0.5838, "step": 5208 }, { "epoch": 0.66, "grad_norm": 0.9924237958524367, "learning_rate": 5.370063056291915e-07, "loss": 0.647, "step": 5209 }, { "epoch": 0.66, "grad_norm": 0.7252596210894041, "learning_rate": 5.366405776884582e-07, "loss": 0.5346, "step": 5210 }, { "epoch": 0.66, "grad_norm": 0.7913602694784511, "learning_rate": 5.362749286530073e-07, "loss": 0.5421, "step": 5211 }, { "epoch": 0.66, "grad_norm": 1.167464005039399, "learning_rate": 5.359093585851049e-07, "loss": 0.6329, "step": 5212 }, { "epoch": 0.66, "grad_norm": 0.8985184017865717, "learning_rate": 5.355438675470039e-07, "loss": 0.6151, "step": 5213 }, { "epoch": 0.66, "grad_norm": 0.862180951670977, "learning_rate": 5.351784556009432e-07, "loss": 0.5768, "step": 5214 }, { "epoch": 0.66, "grad_norm": 0.6506968274593046, "learning_rate": 5.348131228091495e-07, "loss": 0.5149, "step": 5215 }, { "epoch": 0.66, "grad_norm": 0.7131402508909719, "learning_rate": 5.344478692338346e-07, "loss": 0.5561, "step": 5216 }, { "epoch": 0.66, "grad_norm": 0.8794935801556957, "learning_rate": 5.340826949371975e-07, "loss": 0.6572, "step": 5217 }, { "epoch": 0.66, "grad_norm": 0.9366247061925059, "learning_rate": 5.337175999814236e-07, "loss": 0.6555, "step": 5218 }, { "epoch": 0.66, "grad_norm": 0.6770348353006898, "learning_rate": 5.333525844286846e-07, "loss": 0.5645, "step": 5219 }, { "epoch": 0.67, "grad_norm": 0.9267644799894419, "learning_rate": 5.32987648341139e-07, "loss": 0.6636, "step": 5220 }, { "epoch": 0.67, "grad_norm": 1.220326052728481, "learning_rate": 5.326227917809318e-07, "loss": 0.6033, "step": 5221 }, { "epoch": 0.67, "grad_norm": 2.3252353418415628, "learning_rate": 5.322580148101939e-07, "loss": 0.614, "step": 5222 }, { "epoch": 0.67, "grad_norm": 0.7738683498856262, "learning_rate": 5.318933174910433e-07, "loss": 0.6084, "step": 5223 }, { "epoch": 0.67, "grad_norm": 0.8526850884592909, "learning_rate": 5.315286998855839e-07, "loss": 0.6476, "step": 5224 }, { "epoch": 0.67, "grad_norm": 0.7076687168973954, "learning_rate": 5.311641620559064e-07, "loss": 0.5804, "step": 5225 }, { "epoch": 0.67, "grad_norm": 0.7663700577450718, "learning_rate": 5.307997040640879e-07, "loss": 0.5296, "step": 5226 }, { "epoch": 0.67, "grad_norm": 0.745538053676213, "learning_rate": 5.304353259721917e-07, "loss": 0.5772, "step": 5227 }, { "epoch": 0.67, "grad_norm": 0.6682047573668682, "learning_rate": 5.300710278422675e-07, "loss": 0.5327, "step": 5228 }, { "epoch": 0.67, "grad_norm": 0.9022911408941751, "learning_rate": 5.297068097363517e-07, "loss": 0.6274, "step": 5229 }, { "epoch": 0.67, "grad_norm": 0.9149487331492179, "learning_rate": 5.293426717164666e-07, "loss": 0.6526, "step": 5230 }, { "epoch": 0.67, "grad_norm": 0.9033151642430932, "learning_rate": 5.289786138446213e-07, "loss": 0.6279, "step": 5231 }, { "epoch": 0.67, "grad_norm": 0.7831631700441274, "learning_rate": 5.286146361828109e-07, "loss": 0.5918, "step": 5232 }, { "epoch": 0.67, "grad_norm": 0.6958329192529397, "learning_rate": 5.282507387930172e-07, "loss": 0.5249, "step": 5233 }, { "epoch": 0.67, "grad_norm": 0.7075252222219387, "learning_rate": 5.278869217372082e-07, "loss": 0.5515, "step": 5234 }, { "epoch": 0.67, "grad_norm": 0.9183978303526275, "learning_rate": 5.275231850773378e-07, "loss": 0.6541, "step": 5235 }, { "epoch": 0.67, "grad_norm": 0.8780136239883257, "learning_rate": 5.271595288753468e-07, "loss": 0.6591, "step": 5236 }, { "epoch": 0.67, "grad_norm": 0.7353623876622807, "learning_rate": 5.267959531931619e-07, "loss": 0.5891, "step": 5237 }, { "epoch": 0.67, "grad_norm": 0.9051689094153474, "learning_rate": 5.264324580926964e-07, "loss": 0.6401, "step": 5238 }, { "epoch": 0.67, "grad_norm": 0.6730220712735507, "learning_rate": 5.260690436358497e-07, "loss": 0.5547, "step": 5239 }, { "epoch": 0.67, "grad_norm": 0.9176290991194557, "learning_rate": 5.257057098845072e-07, "loss": 0.6345, "step": 5240 }, { "epoch": 0.67, "grad_norm": 0.7813820128151073, "learning_rate": 5.253424569005415e-07, "loss": 0.57, "step": 5241 }, { "epoch": 0.67, "grad_norm": 0.6420388475196485, "learning_rate": 5.249792847458105e-07, "loss": 0.4953, "step": 5242 }, { "epoch": 0.67, "grad_norm": 0.6832168349309564, "learning_rate": 5.246161934821585e-07, "loss": 0.5252, "step": 5243 }, { "epoch": 0.67, "grad_norm": 0.7940851752621456, "learning_rate": 5.242531831714161e-07, "loss": 0.6698, "step": 5244 }, { "epoch": 0.67, "grad_norm": 0.8741667047660675, "learning_rate": 5.238902538754005e-07, "loss": 0.6437, "step": 5245 }, { "epoch": 0.67, "grad_norm": 0.8216352739697113, "learning_rate": 5.235274056559146e-07, "loss": 0.7138, "step": 5246 }, { "epoch": 0.67, "grad_norm": 0.84412337834813, "learning_rate": 5.231646385747476e-07, "loss": 0.6224, "step": 5247 }, { "epoch": 0.67, "grad_norm": 1.154907958338507, "learning_rate": 5.22801952693675e-07, "loss": 0.6757, "step": 5248 }, { "epoch": 0.67, "grad_norm": 0.7255701556653747, "learning_rate": 5.224393480744584e-07, "loss": 0.5331, "step": 5249 }, { "epoch": 0.67, "grad_norm": 0.618183486053049, "learning_rate": 5.220768247788458e-07, "loss": 0.538, "step": 5250 }, { "epoch": 0.67, "grad_norm": 0.6478060961208488, "learning_rate": 5.217143828685709e-07, "loss": 0.5424, "step": 5251 }, { "epoch": 0.67, "grad_norm": 0.5914186487450298, "learning_rate": 5.213520224053538e-07, "loss": 0.5292, "step": 5252 }, { "epoch": 0.67, "grad_norm": 0.753169827567143, "learning_rate": 5.209897434509008e-07, "loss": 0.6128, "step": 5253 }, { "epoch": 0.67, "grad_norm": 0.7355045094771601, "learning_rate": 5.206275460669041e-07, "loss": 0.5514, "step": 5254 }, { "epoch": 0.67, "grad_norm": 1.010414235637921, "learning_rate": 5.202654303150423e-07, "loss": 0.6564, "step": 5255 }, { "epoch": 0.67, "grad_norm": 0.7013839497595362, "learning_rate": 5.199033962569799e-07, "loss": 0.5068, "step": 5256 }, { "epoch": 0.67, "grad_norm": 1.0835941442264907, "learning_rate": 5.195414439543675e-07, "loss": 0.6619, "step": 5257 }, { "epoch": 0.67, "grad_norm": 0.6667410188714661, "learning_rate": 5.191795734688418e-07, "loss": 0.5756, "step": 5258 }, { "epoch": 0.67, "grad_norm": 0.7830396229887109, "learning_rate": 5.188177848620257e-07, "loss": 0.5894, "step": 5259 }, { "epoch": 0.67, "grad_norm": 0.6798527671521377, "learning_rate": 5.184560781955278e-07, "loss": 0.5363, "step": 5260 }, { "epoch": 0.67, "grad_norm": 0.8589500006781942, "learning_rate": 5.180944535309428e-07, "loss": 0.5931, "step": 5261 }, { "epoch": 0.67, "grad_norm": 0.7886108360564799, "learning_rate": 5.177329109298522e-07, "loss": 0.55, "step": 5262 }, { "epoch": 0.67, "grad_norm": 0.6966495938930823, "learning_rate": 5.173714504538227e-07, "loss": 0.5222, "step": 5263 }, { "epoch": 0.67, "grad_norm": 0.866548546216134, "learning_rate": 5.170100721644071e-07, "loss": 0.602, "step": 5264 }, { "epoch": 0.67, "grad_norm": 0.7165204682880972, "learning_rate": 5.166487761231446e-07, "loss": 0.56, "step": 5265 }, { "epoch": 0.67, "grad_norm": 0.8978838793850341, "learning_rate": 5.1628756239156e-07, "loss": 0.6409, "step": 5266 }, { "epoch": 0.67, "grad_norm": 0.8754657190017986, "learning_rate": 5.159264310311641e-07, "loss": 0.588, "step": 5267 }, { "epoch": 0.67, "grad_norm": 0.719354666719648, "learning_rate": 5.15565382103454e-07, "loss": 0.5615, "step": 5268 }, { "epoch": 0.67, "grad_norm": 0.9280298812015718, "learning_rate": 5.152044156699126e-07, "loss": 0.6659, "step": 5269 }, { "epoch": 0.67, "grad_norm": 1.2946603685013434, "learning_rate": 5.148435317920086e-07, "loss": 0.6875, "step": 5270 }, { "epoch": 0.67, "grad_norm": 0.8132268787898511, "learning_rate": 5.144827305311968e-07, "loss": 0.578, "step": 5271 }, { "epoch": 0.67, "grad_norm": 0.7560382621948439, "learning_rate": 5.141220119489176e-07, "loss": 0.5722, "step": 5272 }, { "epoch": 0.67, "grad_norm": 0.8388697796158662, "learning_rate": 5.137613761065982e-07, "loss": 0.6233, "step": 5273 }, { "epoch": 0.67, "grad_norm": 0.7212878676237563, "learning_rate": 5.134008230656509e-07, "loss": 0.6134, "step": 5274 }, { "epoch": 0.67, "grad_norm": 1.0102500376145254, "learning_rate": 5.130403528874741e-07, "loss": 0.5896, "step": 5275 }, { "epoch": 0.67, "grad_norm": 0.8897316528675647, "learning_rate": 5.126799656334521e-07, "loss": 0.6537, "step": 5276 }, { "epoch": 0.67, "grad_norm": 0.8505618394093455, "learning_rate": 5.123196613649551e-07, "loss": 0.6015, "step": 5277 }, { "epoch": 0.67, "grad_norm": 0.9282130023848788, "learning_rate": 5.119594401433391e-07, "loss": 0.6375, "step": 5278 }, { "epoch": 0.67, "grad_norm": 0.8378939228950745, "learning_rate": 5.11599302029946e-07, "loss": 0.6331, "step": 5279 }, { "epoch": 0.67, "grad_norm": 0.8671395913515828, "learning_rate": 5.112392470861038e-07, "loss": 0.6107, "step": 5280 }, { "epoch": 0.67, "grad_norm": 0.7353977111902207, "learning_rate": 5.108792753731259e-07, "loss": 0.5415, "step": 5281 }, { "epoch": 0.67, "grad_norm": 0.615257343846587, "learning_rate": 5.105193869523119e-07, "loss": 0.5139, "step": 5282 }, { "epoch": 0.67, "grad_norm": 0.878878376143998, "learning_rate": 5.101595818849467e-07, "loss": 0.6632, "step": 5283 }, { "epoch": 0.67, "grad_norm": 0.7426325358489514, "learning_rate": 5.097998602323017e-07, "loss": 0.5937, "step": 5284 }, { "epoch": 0.67, "grad_norm": 1.0063001703405317, "learning_rate": 5.094402220556336e-07, "loss": 0.7121, "step": 5285 }, { "epoch": 0.67, "grad_norm": 0.629877317027908, "learning_rate": 5.090806674161852e-07, "loss": 0.5293, "step": 5286 }, { "epoch": 0.67, "grad_norm": 2.4359764263321377, "learning_rate": 5.087211963751845e-07, "loss": 0.6293, "step": 5287 }, { "epoch": 0.67, "grad_norm": 0.9735182744330695, "learning_rate": 5.083618089938461e-07, "loss": 0.6462, "step": 5288 }, { "epoch": 0.67, "grad_norm": 0.7410989473893468, "learning_rate": 5.080025053333699e-07, "loss": 0.601, "step": 5289 }, { "epoch": 0.67, "grad_norm": 0.9191921987929599, "learning_rate": 5.076432854549413e-07, "loss": 0.6169, "step": 5290 }, { "epoch": 0.67, "grad_norm": 0.7102658769122203, "learning_rate": 5.072841494197319e-07, "loss": 0.5737, "step": 5291 }, { "epoch": 0.67, "grad_norm": 0.7545552669266572, "learning_rate": 5.069250972888987e-07, "loss": 0.6293, "step": 5292 }, { "epoch": 0.67, "grad_norm": 0.737364114686024, "learning_rate": 5.065661291235848e-07, "loss": 0.5635, "step": 5293 }, { "epoch": 0.67, "grad_norm": 1.24965071666077, "learning_rate": 5.062072449849184e-07, "loss": 0.6541, "step": 5294 }, { "epoch": 0.67, "grad_norm": 0.7110705014516703, "learning_rate": 5.058484449340139e-07, "loss": 0.5581, "step": 5295 }, { "epoch": 0.67, "grad_norm": 0.685611848078359, "learning_rate": 5.054897290319713e-07, "loss": 0.5915, "step": 5296 }, { "epoch": 0.67, "grad_norm": 0.8795104117308582, "learning_rate": 5.05131097339876e-07, "loss": 0.5966, "step": 5297 }, { "epoch": 0.67, "grad_norm": 0.969993802229717, "learning_rate": 5.047725499187994e-07, "loss": 0.6101, "step": 5298 }, { "epoch": 0.68, "grad_norm": 0.7949357695213908, "learning_rate": 5.044140868297977e-07, "loss": 0.6163, "step": 5299 }, { "epoch": 0.68, "grad_norm": 0.8837592275707001, "learning_rate": 5.040557081339147e-07, "loss": 0.5009, "step": 5300 }, { "epoch": 0.68, "grad_norm": 0.8843526084793552, "learning_rate": 5.036974138921777e-07, "loss": 0.615, "step": 5301 }, { "epoch": 0.68, "grad_norm": 0.6142618186009866, "learning_rate": 5.033392041656006e-07, "loss": 0.5127, "step": 5302 }, { "epoch": 0.68, "grad_norm": 0.6937589805017076, "learning_rate": 5.02981079015183e-07, "loss": 0.5385, "step": 5303 }, { "epoch": 0.68, "grad_norm": 0.7304270496164742, "learning_rate": 5.026230385019098e-07, "loss": 0.5652, "step": 5304 }, { "epoch": 0.68, "grad_norm": 0.9875031822363577, "learning_rate": 5.022650826867513e-07, "loss": 0.604, "step": 5305 }, { "epoch": 0.68, "grad_norm": 0.6684967370460357, "learning_rate": 5.019072116306641e-07, "loss": 0.5338, "step": 5306 }, { "epoch": 0.68, "grad_norm": 0.7063319387154257, "learning_rate": 5.015494253945896e-07, "loss": 0.538, "step": 5307 }, { "epoch": 0.68, "grad_norm": 0.8665107843552544, "learning_rate": 5.01191724039455e-07, "loss": 0.6361, "step": 5308 }, { "epoch": 0.68, "grad_norm": 0.7686115630357471, "learning_rate": 5.008341076261733e-07, "loss": 0.4826, "step": 5309 }, { "epoch": 0.68, "grad_norm": 0.887435457854863, "learning_rate": 5.004765762156429e-07, "loss": 0.5868, "step": 5310 }, { "epoch": 0.68, "grad_norm": 0.8869826586975291, "learning_rate": 5.001191298687475e-07, "loss": 0.6357, "step": 5311 }, { "epoch": 0.68, "grad_norm": 0.7425486620191694, "learning_rate": 4.997617686463565e-07, "loss": 0.5726, "step": 5312 }, { "epoch": 0.68, "grad_norm": 0.7858603556155882, "learning_rate": 4.994044926093249e-07, "loss": 0.5464, "step": 5313 }, { "epoch": 0.68, "grad_norm": 0.7577009504435037, "learning_rate": 4.990473018184931e-07, "loss": 0.6019, "step": 5314 }, { "epoch": 0.68, "grad_norm": 0.8668237698065206, "learning_rate": 4.986901963346869e-07, "loss": 0.6207, "step": 5315 }, { "epoch": 0.68, "grad_norm": 0.7245318167686288, "learning_rate": 4.983331762187176e-07, "loss": 0.524, "step": 5316 }, { "epoch": 0.68, "grad_norm": 0.683395776996338, "learning_rate": 4.97976241531382e-07, "loss": 0.5644, "step": 5317 }, { "epoch": 0.68, "grad_norm": 0.7571299879501692, "learning_rate": 4.976193923334626e-07, "loss": 0.5495, "step": 5318 }, { "epoch": 0.68, "grad_norm": 0.7300483211339173, "learning_rate": 4.972626286857268e-07, "loss": 0.5593, "step": 5319 }, { "epoch": 0.68, "grad_norm": 0.7047413234461785, "learning_rate": 4.96905950648928e-07, "loss": 0.5551, "step": 5320 }, { "epoch": 0.68, "grad_norm": 0.6813292877537857, "learning_rate": 4.965493582838044e-07, "loss": 0.5423, "step": 5321 }, { "epoch": 0.68, "grad_norm": 0.8411840294161926, "learning_rate": 4.961928516510803e-07, "loss": 0.636, "step": 5322 }, { "epoch": 0.68, "grad_norm": 0.8450954306169717, "learning_rate": 4.958364308114649e-07, "loss": 0.6756, "step": 5323 }, { "epoch": 0.68, "grad_norm": 0.7771242857376862, "learning_rate": 4.95480095825653e-07, "loss": 0.5848, "step": 5324 }, { "epoch": 0.68, "grad_norm": 0.6992460123020512, "learning_rate": 4.951238467543244e-07, "loss": 0.5164, "step": 5325 }, { "epoch": 0.68, "grad_norm": 1.0010501165629846, "learning_rate": 4.947676836581452e-07, "loss": 0.5568, "step": 5326 }, { "epoch": 0.68, "grad_norm": 0.9998962142215632, "learning_rate": 4.94411606597766e-07, "loss": 0.635, "step": 5327 }, { "epoch": 0.68, "grad_norm": 0.74514198843349, "learning_rate": 4.940556156338231e-07, "loss": 0.5368, "step": 5328 }, { "epoch": 0.68, "grad_norm": 0.6677414549393945, "learning_rate": 4.936997108269381e-07, "loss": 0.5323, "step": 5329 }, { "epoch": 0.68, "grad_norm": 0.6885715516513323, "learning_rate": 4.933438922377176e-07, "loss": 0.5661, "step": 5330 }, { "epoch": 0.68, "grad_norm": 2.56358116654663, "learning_rate": 4.929881599267539e-07, "loss": 0.6491, "step": 5331 }, { "epoch": 0.68, "grad_norm": 0.6476102502359745, "learning_rate": 4.926325139546246e-07, "loss": 0.5714, "step": 5332 }, { "epoch": 0.68, "grad_norm": 0.6232819097893236, "learning_rate": 4.922769543818924e-07, "loss": 0.5471, "step": 5333 }, { "epoch": 0.68, "grad_norm": 0.7244851355407912, "learning_rate": 4.919214812691056e-07, "loss": 0.6057, "step": 5334 }, { "epoch": 0.68, "grad_norm": 0.9873731326871554, "learning_rate": 4.915660946767972e-07, "loss": 0.5867, "step": 5335 }, { "epoch": 0.68, "grad_norm": 0.7993046286504325, "learning_rate": 4.912107946654862e-07, "loss": 0.5991, "step": 5336 }, { "epoch": 0.68, "grad_norm": 0.7952390025935714, "learning_rate": 4.908555812956764e-07, "loss": 0.5952, "step": 5337 }, { "epoch": 0.68, "grad_norm": 0.6855435946785916, "learning_rate": 4.905004546278567e-07, "loss": 0.5639, "step": 5338 }, { "epoch": 0.68, "grad_norm": 0.8821705625877165, "learning_rate": 4.901454147225018e-07, "loss": 0.6427, "step": 5339 }, { "epoch": 0.68, "grad_norm": 0.7911148304304257, "learning_rate": 4.897904616400711e-07, "loss": 0.6082, "step": 5340 }, { "epoch": 0.68, "grad_norm": 0.7955060654750401, "learning_rate": 4.894355954410095e-07, "loss": 0.5583, "step": 5341 }, { "epoch": 0.68, "grad_norm": 0.731755458288174, "learning_rate": 4.890808161857468e-07, "loss": 0.5424, "step": 5342 }, { "epoch": 0.68, "grad_norm": 0.8486989880107244, "learning_rate": 4.887261239346986e-07, "loss": 0.5924, "step": 5343 }, { "epoch": 0.68, "grad_norm": 1.0712876951691928, "learning_rate": 4.88371518748265e-07, "loss": 0.6255, "step": 5344 }, { "epoch": 0.68, "grad_norm": 0.6678311957511555, "learning_rate": 4.880170006868316e-07, "loss": 0.5236, "step": 5345 }, { "epoch": 0.68, "grad_norm": 0.6848651438621817, "learning_rate": 4.876625698107691e-07, "loss": 0.5932, "step": 5346 }, { "epoch": 0.68, "grad_norm": 0.7594326502880127, "learning_rate": 4.873082261804336e-07, "loss": 0.5756, "step": 5347 }, { "epoch": 0.68, "grad_norm": 0.8984224184727355, "learning_rate": 4.869539698561661e-07, "loss": 0.6246, "step": 5348 }, { "epoch": 0.68, "grad_norm": 0.7187588824413186, "learning_rate": 4.865998008982926e-07, "loss": 0.5678, "step": 5349 }, { "epoch": 0.68, "grad_norm": 0.650651644887821, "learning_rate": 4.862457193671243e-07, "loss": 0.5338, "step": 5350 }, { "epoch": 0.68, "grad_norm": 0.7043223100513321, "learning_rate": 4.858917253229574e-07, "loss": 0.5058, "step": 5351 }, { "epoch": 0.68, "grad_norm": 0.6202547253881282, "learning_rate": 4.855378188260743e-07, "loss": 0.5137, "step": 5352 }, { "epoch": 0.68, "grad_norm": 0.7706827222787114, "learning_rate": 4.851839999367407e-07, "loss": 0.5549, "step": 5353 }, { "epoch": 0.68, "grad_norm": 0.6466321952568665, "learning_rate": 4.848302687152089e-07, "loss": 0.5025, "step": 5354 }, { "epoch": 0.68, "grad_norm": 0.8410774590558495, "learning_rate": 4.844766252217152e-07, "loss": 0.6136, "step": 5355 }, { "epoch": 0.68, "grad_norm": 0.7369558673607465, "learning_rate": 4.841230695164815e-07, "loss": 0.5931, "step": 5356 }, { "epoch": 0.68, "grad_norm": 0.8479113600681114, "learning_rate": 4.837696016597148e-07, "loss": 0.622, "step": 5357 }, { "epoch": 0.68, "grad_norm": 0.6616251657493748, "learning_rate": 4.834162217116069e-07, "loss": 0.5081, "step": 5358 }, { "epoch": 0.68, "grad_norm": 0.9767221266654141, "learning_rate": 4.830629297323346e-07, "loss": 0.6177, "step": 5359 }, { "epoch": 0.68, "grad_norm": 0.8424822991394464, "learning_rate": 4.827097257820601e-07, "loss": 0.5938, "step": 5360 }, { "epoch": 0.68, "grad_norm": 0.7770916356224592, "learning_rate": 4.823566099209301e-07, "loss": 0.5845, "step": 5361 }, { "epoch": 0.68, "grad_norm": 0.6222341504716099, "learning_rate": 4.820035822090767e-07, "loss": 0.485, "step": 5362 }, { "epoch": 0.68, "grad_norm": 0.6942105915552753, "learning_rate": 4.816506427066164e-07, "loss": 0.5289, "step": 5363 }, { "epoch": 0.68, "grad_norm": 0.6426510202102209, "learning_rate": 4.812977914736519e-07, "loss": 0.5284, "step": 5364 }, { "epoch": 0.68, "grad_norm": 0.6379116141685525, "learning_rate": 4.809450285702697e-07, "loss": 0.5211, "step": 5365 }, { "epoch": 0.68, "grad_norm": 0.7268946608784657, "learning_rate": 4.805923540565417e-07, "loss": 0.5552, "step": 5366 }, { "epoch": 0.68, "grad_norm": 0.70876899929458, "learning_rate": 4.802397679925246e-07, "loss": 0.5821, "step": 5367 }, { "epoch": 0.68, "grad_norm": 0.7440894802423124, "learning_rate": 4.798872704382602e-07, "loss": 0.6089, "step": 5368 }, { "epoch": 0.68, "grad_norm": 0.914052763542029, "learning_rate": 4.795348614537752e-07, "loss": 0.6564, "step": 5369 }, { "epoch": 0.68, "grad_norm": 0.8626181854178324, "learning_rate": 4.79182541099081e-07, "loss": 0.6446, "step": 5370 }, { "epoch": 0.68, "grad_norm": 0.6466558367009497, "learning_rate": 4.788303094341744e-07, "loss": 0.546, "step": 5371 }, { "epoch": 0.68, "grad_norm": 0.6993432650760381, "learning_rate": 4.784781665190365e-07, "loss": 0.5231, "step": 5372 }, { "epoch": 0.68, "grad_norm": 0.6505623513086882, "learning_rate": 4.781261124136337e-07, "loss": 0.5454, "step": 5373 }, { "epoch": 0.68, "grad_norm": 0.7258466623182719, "learning_rate": 4.777741471779171e-07, "loss": 0.5641, "step": 5374 }, { "epoch": 0.68, "grad_norm": 0.7849937377412379, "learning_rate": 4.774222708718226e-07, "loss": 0.6144, "step": 5375 }, { "epoch": 0.68, "grad_norm": 0.6637375345104587, "learning_rate": 4.770704835552713e-07, "loss": 0.5251, "step": 5376 }, { "epoch": 0.69, "grad_norm": 0.9213384543008514, "learning_rate": 4.767187852881689e-07, "loss": 0.6607, "step": 5377 }, { "epoch": 0.69, "grad_norm": 0.6176858218836799, "learning_rate": 4.763671761304058e-07, "loss": 0.5049, "step": 5378 }, { "epoch": 0.69, "grad_norm": 0.6585776331949535, "learning_rate": 4.7601565614185733e-07, "loss": 0.5306, "step": 5379 }, { "epoch": 0.69, "grad_norm": 0.826581327533941, "learning_rate": 4.7566422538238396e-07, "loss": 0.5775, "step": 5380 }, { "epoch": 0.69, "grad_norm": 0.7015746641579432, "learning_rate": 4.7531288391183045e-07, "loss": 0.5218, "step": 5381 }, { "epoch": 0.69, "grad_norm": 0.9698192069115962, "learning_rate": 4.7496163179002657e-07, "loss": 0.5877, "step": 5382 }, { "epoch": 0.69, "grad_norm": 0.6367217678719059, "learning_rate": 4.746104690767867e-07, "loss": 0.4858, "step": 5383 }, { "epoch": 0.69, "grad_norm": 0.8053692197725493, "learning_rate": 4.7425939583191086e-07, "loss": 0.6054, "step": 5384 }, { "epoch": 0.69, "grad_norm": 0.841630921104746, "learning_rate": 4.739084121151827e-07, "loss": 0.5425, "step": 5385 }, { "epoch": 0.69, "grad_norm": 0.825536499366827, "learning_rate": 4.7355751798637125e-07, "loss": 0.6113, "step": 5386 }, { "epoch": 0.69, "grad_norm": 0.731044592470668, "learning_rate": 4.7320671350523014e-07, "loss": 0.5158, "step": 5387 }, { "epoch": 0.69, "grad_norm": 0.7928247003372195, "learning_rate": 4.728559987314974e-07, "loss": 0.5618, "step": 5388 }, { "epoch": 0.69, "grad_norm": 0.8854642415833501, "learning_rate": 4.725053737248965e-07, "loss": 0.6243, "step": 5389 }, { "epoch": 0.69, "grad_norm": 0.6138392983921646, "learning_rate": 4.7215483854513503e-07, "loss": 0.4653, "step": 5390 }, { "epoch": 0.69, "grad_norm": 0.8706432280004185, "learning_rate": 4.7180439325190546e-07, "loss": 0.6316, "step": 5391 }, { "epoch": 0.69, "grad_norm": 0.9340318775890795, "learning_rate": 4.71454037904885e-07, "loss": 0.6265, "step": 5392 }, { "epoch": 0.69, "grad_norm": 0.7660005863966213, "learning_rate": 4.711037725637356e-07, "loss": 0.5089, "step": 5393 }, { "epoch": 0.69, "grad_norm": 0.7385013963002642, "learning_rate": 4.7075359728810373e-07, "loss": 0.5637, "step": 5394 }, { "epoch": 0.69, "grad_norm": 0.9181219030284592, "learning_rate": 4.704035121376205e-07, "loss": 0.6872, "step": 5395 }, { "epoch": 0.69, "grad_norm": 0.6058211069189818, "learning_rate": 4.70053517171902e-07, "loss": 0.537, "step": 5396 }, { "epoch": 0.69, "grad_norm": 0.7788836708133637, "learning_rate": 4.697036124505485e-07, "loss": 0.6061, "step": 5397 }, { "epoch": 0.69, "grad_norm": 0.6522445791673998, "learning_rate": 4.693537980331451e-07, "loss": 0.5134, "step": 5398 }, { "epoch": 0.69, "grad_norm": 0.6574731077984831, "learning_rate": 4.690040739792619e-07, "loss": 0.5385, "step": 5399 }, { "epoch": 0.69, "grad_norm": 0.6340058946059042, "learning_rate": 4.686544403484528e-07, "loss": 0.5056, "step": 5400 }, { "epoch": 0.69, "grad_norm": 0.8216824012672439, "learning_rate": 4.683048972002571e-07, "loss": 0.645, "step": 5401 }, { "epoch": 0.69, "grad_norm": 0.985222828142204, "learning_rate": 4.6795544459419833e-07, "loss": 0.6752, "step": 5402 }, { "epoch": 0.69, "grad_norm": 0.8274199509625679, "learning_rate": 4.676060825897844e-07, "loss": 0.5486, "step": 5403 }, { "epoch": 0.69, "grad_norm": 0.7895062043621063, "learning_rate": 4.672568112465083e-07, "loss": 0.517, "step": 5404 }, { "epoch": 0.69, "grad_norm": 0.710469230499792, "learning_rate": 4.669076306238471e-07, "loss": 0.5158, "step": 5405 }, { "epoch": 0.69, "grad_norm": 0.7545364115552494, "learning_rate": 4.6655854078126255e-07, "loss": 0.545, "step": 5406 }, { "epoch": 0.69, "grad_norm": 0.8152647831343788, "learning_rate": 4.662095417782014e-07, "loss": 0.5832, "step": 5407 }, { "epoch": 0.69, "grad_norm": 0.7791371657582434, "learning_rate": 4.658606336740941e-07, "loss": 0.6179, "step": 5408 }, { "epoch": 0.69, "grad_norm": 0.8236075044117962, "learning_rate": 4.6551181652835635e-07, "loss": 0.6195, "step": 5409 }, { "epoch": 0.69, "grad_norm": 0.8621228278848556, "learning_rate": 4.651630904003876e-07, "loss": 0.6138, "step": 5410 }, { "epoch": 0.69, "grad_norm": 0.6768093849047949, "learning_rate": 4.648144553495731e-07, "loss": 0.5115, "step": 5411 }, { "epoch": 0.69, "grad_norm": 0.7283713505965954, "learning_rate": 4.6446591143528125e-07, "loss": 0.5281, "step": 5412 }, { "epoch": 0.69, "grad_norm": 0.7995898360124369, "learning_rate": 4.6411745871686547e-07, "loss": 0.6187, "step": 5413 }, { "epoch": 0.69, "grad_norm": 0.7457992718961622, "learning_rate": 4.6376909725366385e-07, "loss": 0.5287, "step": 5414 }, { "epoch": 0.69, "grad_norm": 0.8039543359968708, "learning_rate": 4.634208271049984e-07, "loss": 0.5951, "step": 5415 }, { "epoch": 0.69, "grad_norm": 0.682833332912845, "learning_rate": 4.6307264833017614e-07, "loss": 0.5317, "step": 5416 }, { "epoch": 0.69, "grad_norm": 0.8726562211047633, "learning_rate": 4.627245609884882e-07, "loss": 0.6236, "step": 5417 }, { "epoch": 0.69, "grad_norm": 0.6876557906121669, "learning_rate": 4.623765651392102e-07, "loss": 0.5487, "step": 5418 }, { "epoch": 0.69, "grad_norm": 0.8346329905768244, "learning_rate": 4.620286608416023e-07, "loss": 0.6494, "step": 5419 }, { "epoch": 0.69, "grad_norm": 0.8903808785599667, "learning_rate": 4.616808481549088e-07, "loss": 0.6894, "step": 5420 }, { "epoch": 0.69, "grad_norm": 0.8858233931578466, "learning_rate": 4.6133312713835884e-07, "loss": 0.654, "step": 5421 }, { "epoch": 0.69, "grad_norm": 0.709617619202685, "learning_rate": 4.609854978511654e-07, "loss": 0.618, "step": 5422 }, { "epoch": 0.69, "grad_norm": 0.6515773590469973, "learning_rate": 4.606379603525263e-07, "loss": 0.5169, "step": 5423 }, { "epoch": 0.69, "grad_norm": 0.7719779062568372, "learning_rate": 4.602905147016235e-07, "loss": 0.6225, "step": 5424 }, { "epoch": 0.69, "grad_norm": 0.674537950314378, "learning_rate": 4.599431609576234e-07, "loss": 0.5827, "step": 5425 }, { "epoch": 0.69, "grad_norm": 0.8868170888717785, "learning_rate": 4.5959589917967677e-07, "loss": 0.6377, "step": 5426 }, { "epoch": 0.69, "grad_norm": 0.8509949939718056, "learning_rate": 4.592487294269186e-07, "loss": 0.6024, "step": 5427 }, { "epoch": 0.69, "grad_norm": 0.8585723877774838, "learning_rate": 4.589016517584683e-07, "loss": 0.5923, "step": 5428 }, { "epoch": 0.69, "grad_norm": 0.9555101806054861, "learning_rate": 4.5855466623342965e-07, "loss": 0.5156, "step": 5429 }, { "epoch": 0.69, "grad_norm": 0.7065212174768765, "learning_rate": 4.5820777291089054e-07, "loss": 0.5895, "step": 5430 }, { "epoch": 0.69, "grad_norm": 1.1458470540580727, "learning_rate": 4.578609718499233e-07, "loss": 0.6076, "step": 5431 }, { "epoch": 0.69, "grad_norm": 0.6417300910768817, "learning_rate": 4.5751426310958465e-07, "loss": 0.525, "step": 5432 }, { "epoch": 0.69, "grad_norm": 0.7317180485591868, "learning_rate": 4.571676467489154e-07, "loss": 0.5767, "step": 5433 }, { "epoch": 0.69, "grad_norm": 0.8181700208681539, "learning_rate": 4.568211228269407e-07, "loss": 0.6143, "step": 5434 }, { "epoch": 0.69, "grad_norm": 0.9302081049439518, "learning_rate": 4.5647469140267e-07, "loss": 0.6227, "step": 5435 }, { "epoch": 0.69, "grad_norm": 0.8865776394814306, "learning_rate": 4.5612835253509665e-07, "loss": 0.6302, "step": 5436 }, { "epoch": 0.69, "grad_norm": 0.6732611085819743, "learning_rate": 4.5578210628319915e-07, "loss": 0.5563, "step": 5437 }, { "epoch": 0.69, "grad_norm": 0.8192906377847118, "learning_rate": 4.554359527059394e-07, "loss": 0.6171, "step": 5438 }, { "epoch": 0.69, "grad_norm": 0.8966213269191172, "learning_rate": 4.5508989186226356e-07, "loss": 0.6291, "step": 5439 }, { "epoch": 0.69, "grad_norm": 0.8168979636280663, "learning_rate": 4.547439238111024e-07, "loss": 0.5361, "step": 5440 }, { "epoch": 0.69, "grad_norm": 0.6535345477706126, "learning_rate": 4.5439804861137055e-07, "loss": 0.5414, "step": 5441 }, { "epoch": 0.69, "grad_norm": 0.8215570384535594, "learning_rate": 4.540522663219669e-07, "loss": 0.6539, "step": 5442 }, { "epoch": 0.69, "grad_norm": 0.8151970345566367, "learning_rate": 4.537065770017747e-07, "loss": 0.6144, "step": 5443 }, { "epoch": 0.69, "grad_norm": 0.8846753475701035, "learning_rate": 4.5336098070966124e-07, "loss": 0.6216, "step": 5444 }, { "epoch": 0.69, "grad_norm": 0.729705915871295, "learning_rate": 4.5301547750447786e-07, "loss": 0.572, "step": 5445 }, { "epoch": 0.69, "grad_norm": 0.7217828875465638, "learning_rate": 4.526700674450603e-07, "loss": 0.5095, "step": 5446 }, { "epoch": 0.69, "grad_norm": 0.9496949765365021, "learning_rate": 4.523247505902282e-07, "loss": 0.6538, "step": 5447 }, { "epoch": 0.69, "grad_norm": 0.9126392855265738, "learning_rate": 4.519795269987854e-07, "loss": 0.6182, "step": 5448 }, { "epoch": 0.69, "grad_norm": 0.8371751953861961, "learning_rate": 4.5163439672951987e-07, "loss": 0.5764, "step": 5449 }, { "epoch": 0.69, "grad_norm": 0.7579529939217367, "learning_rate": 4.5128935984120384e-07, "loss": 0.604, "step": 5450 }, { "epoch": 0.69, "grad_norm": 0.6854682027542485, "learning_rate": 4.509444163925934e-07, "loss": 0.5506, "step": 5451 }, { "epoch": 0.69, "grad_norm": 0.7390644556190168, "learning_rate": 4.505995664424287e-07, "loss": 0.5809, "step": 5452 }, { "epoch": 0.69, "grad_norm": 0.7566369743146946, "learning_rate": 4.5025481004943435e-07, "loss": 0.5219, "step": 5453 }, { "epoch": 0.69, "grad_norm": 0.7995271557726705, "learning_rate": 4.499101472723186e-07, "loss": 0.538, "step": 5454 }, { "epoch": 0.69, "grad_norm": 0.8391448800061801, "learning_rate": 4.49565578169774e-07, "loss": 0.5668, "step": 5455 }, { "epoch": 0.7, "grad_norm": 0.6280776698791654, "learning_rate": 4.4922110280047697e-07, "loss": 0.5291, "step": 5456 }, { "epoch": 0.7, "grad_norm": 0.725429061941008, "learning_rate": 4.488767212230883e-07, "loss": 0.4987, "step": 5457 }, { "epoch": 0.7, "grad_norm": 0.784989057575474, "learning_rate": 4.4853243349625235e-07, "loss": 0.5771, "step": 5458 }, { "epoch": 0.7, "grad_norm": 0.8681769671128385, "learning_rate": 4.4818823967859786e-07, "loss": 0.6138, "step": 5459 }, { "epoch": 0.7, "grad_norm": 0.6212673666159269, "learning_rate": 4.4784413982873735e-07, "loss": 0.5826, "step": 5460 }, { "epoch": 0.7, "grad_norm": 0.8093977730511368, "learning_rate": 4.4750013400526755e-07, "loss": 0.6426, "step": 5461 }, { "epoch": 0.7, "grad_norm": 0.7435666575191491, "learning_rate": 4.4715622226676873e-07, "loss": 0.5513, "step": 5462 }, { "epoch": 0.7, "grad_norm": 0.9319125983948559, "learning_rate": 4.46812404671806e-07, "loss": 0.6438, "step": 5463 }, { "epoch": 0.7, "grad_norm": 0.9092075390017121, "learning_rate": 4.4646868127892777e-07, "loss": 0.6443, "step": 5464 }, { "epoch": 0.7, "grad_norm": 0.8003580713638484, "learning_rate": 4.461250521466665e-07, "loss": 0.598, "step": 5465 }, { "epoch": 0.7, "grad_norm": 0.7505898516711969, "learning_rate": 4.4578151733353866e-07, "loss": 0.5437, "step": 5466 }, { "epoch": 0.7, "grad_norm": 0.9278691649570422, "learning_rate": 4.4543807689804445e-07, "loss": 0.6703, "step": 5467 }, { "epoch": 0.7, "grad_norm": 0.6251566013535413, "learning_rate": 4.450947308986681e-07, "loss": 0.5165, "step": 5468 }, { "epoch": 0.7, "grad_norm": 0.7784289964980218, "learning_rate": 4.4475147939387845e-07, "loss": 0.5551, "step": 5469 }, { "epoch": 0.7, "grad_norm": 0.7046830964982311, "learning_rate": 4.444083224421274e-07, "loss": 0.5774, "step": 5470 }, { "epoch": 0.7, "grad_norm": 1.007966619535289, "learning_rate": 4.440652601018509e-07, "loss": 0.6578, "step": 5471 }, { "epoch": 0.7, "grad_norm": 0.8001453062216861, "learning_rate": 4.4372229243146885e-07, "loss": 0.6104, "step": 5472 }, { "epoch": 0.7, "grad_norm": 0.777214701326824, "learning_rate": 4.433794194893852e-07, "loss": 0.5306, "step": 5473 }, { "epoch": 0.7, "grad_norm": 0.8694563725406029, "learning_rate": 4.4303664133398755e-07, "loss": 0.6012, "step": 5474 }, { "epoch": 0.7, "grad_norm": 0.7123246974548884, "learning_rate": 4.4269395802364753e-07, "loss": 0.5779, "step": 5475 }, { "epoch": 0.7, "grad_norm": 0.8319572261155206, "learning_rate": 4.423513696167204e-07, "loss": 0.5454, "step": 5476 }, { "epoch": 0.7, "grad_norm": 0.6065292101247821, "learning_rate": 4.420088761715455e-07, "loss": 0.5134, "step": 5477 }, { "epoch": 0.7, "grad_norm": 0.8560047090675773, "learning_rate": 4.416664777464459e-07, "loss": 0.6129, "step": 5478 }, { "epoch": 0.7, "grad_norm": 0.7535040239512761, "learning_rate": 4.413241743997285e-07, "loss": 0.6001, "step": 5479 }, { "epoch": 0.7, "grad_norm": 0.9068458815192074, "learning_rate": 4.409819661896839e-07, "loss": 0.5758, "step": 5480 }, { "epoch": 0.7, "grad_norm": 0.7593317337484493, "learning_rate": 4.406398531745866e-07, "loss": 0.5953, "step": 5481 }, { "epoch": 0.7, "grad_norm": 0.833624812597557, "learning_rate": 4.4029783541269507e-07, "loss": 0.6255, "step": 5482 }, { "epoch": 0.7, "grad_norm": 0.8526117079446215, "learning_rate": 4.399559129622512e-07, "loss": 0.5308, "step": 5483 }, { "epoch": 0.7, "grad_norm": 0.8410899081206291, "learning_rate": 4.396140858814809e-07, "loss": 0.6614, "step": 5484 }, { "epoch": 0.7, "grad_norm": 1.0075001997508735, "learning_rate": 4.3927235422859355e-07, "loss": 0.6677, "step": 5485 }, { "epoch": 0.7, "grad_norm": 0.7921228782738354, "learning_rate": 4.389307180617827e-07, "loss": 0.5428, "step": 5486 }, { "epoch": 0.7, "grad_norm": 0.7628654098416533, "learning_rate": 4.3858917743922554e-07, "loss": 0.5724, "step": 5487 }, { "epoch": 0.7, "grad_norm": 0.6754606242335749, "learning_rate": 4.3824773241908255e-07, "loss": 0.5401, "step": 5488 }, { "epoch": 0.7, "grad_norm": 0.8325025122674288, "learning_rate": 4.3790638305949855e-07, "loss": 0.5876, "step": 5489 }, { "epoch": 0.7, "grad_norm": 0.7374963133273117, "learning_rate": 4.375651294186016e-07, "loss": 0.6136, "step": 5490 }, { "epoch": 0.7, "grad_norm": 0.738609331673596, "learning_rate": 4.3722397155450376e-07, "loss": 0.5348, "step": 5491 }, { "epoch": 0.7, "grad_norm": 0.8320151306912408, "learning_rate": 4.368829095253006e-07, "loss": 0.6434, "step": 5492 }, { "epoch": 0.7, "grad_norm": 0.7718077296972778, "learning_rate": 4.3654194338907137e-07, "loss": 0.541, "step": 5493 }, { "epoch": 0.7, "grad_norm": 0.6513105141051528, "learning_rate": 4.362010732038788e-07, "loss": 0.5369, "step": 5494 }, { "epoch": 0.7, "grad_norm": 0.7277128432539303, "learning_rate": 4.358602990277702e-07, "loss": 0.5627, "step": 5495 }, { "epoch": 0.7, "grad_norm": 0.8881761845421416, "learning_rate": 4.355196209187755e-07, "loss": 0.6361, "step": 5496 }, { "epoch": 0.7, "grad_norm": 0.763204549274549, "learning_rate": 4.351790389349087e-07, "loss": 0.5723, "step": 5497 }, { "epoch": 0.7, "grad_norm": 0.7201401145253701, "learning_rate": 4.348385531341673e-07, "loss": 0.5773, "step": 5498 }, { "epoch": 0.7, "grad_norm": 0.9060093358781092, "learning_rate": 4.344981635745324e-07, "loss": 0.6483, "step": 5499 }, { "epoch": 0.7, "grad_norm": 0.5965228711429188, "learning_rate": 4.34157870313969e-07, "loss": 0.5326, "step": 5500 }, { "epoch": 0.7, "grad_norm": 1.004376062188023, "learning_rate": 4.338176734104253e-07, "loss": 0.6063, "step": 5501 }, { "epoch": 0.7, "grad_norm": 0.9089965818285728, "learning_rate": 4.334775729218334e-07, "loss": 0.6732, "step": 5502 }, { "epoch": 0.7, "grad_norm": 0.800569772158204, "learning_rate": 4.331375689061089e-07, "loss": 0.6087, "step": 5503 }, { "epoch": 0.7, "grad_norm": 0.6734584341516239, "learning_rate": 4.327976614211508e-07, "loss": 0.5426, "step": 5504 }, { "epoch": 0.7, "grad_norm": 0.8396604902951967, "learning_rate": 4.32457850524842e-07, "loss": 0.6049, "step": 5505 }, { "epoch": 0.7, "grad_norm": 0.7400415508096883, "learning_rate": 4.3211813627504843e-07, "loss": 0.5582, "step": 5506 }, { "epoch": 0.7, "grad_norm": 0.8380971393410184, "learning_rate": 4.3177851872962035e-07, "loss": 0.6492, "step": 5507 }, { "epoch": 0.7, "grad_norm": 0.8236259890371715, "learning_rate": 4.314389979463907e-07, "loss": 0.6377, "step": 5508 }, { "epoch": 0.7, "grad_norm": 0.8305802797963977, "learning_rate": 4.3109957398317653e-07, "loss": 0.5864, "step": 5509 }, { "epoch": 0.7, "grad_norm": 1.1018208532116782, "learning_rate": 4.3076024689777813e-07, "loss": 0.6848, "step": 5510 }, { "epoch": 0.7, "grad_norm": 0.7469160916261077, "learning_rate": 4.3042101674797937e-07, "loss": 0.5668, "step": 5511 }, { "epoch": 0.7, "grad_norm": 0.753334978112115, "learning_rate": 4.3008188359154764e-07, "loss": 0.5494, "step": 5512 }, { "epoch": 0.7, "grad_norm": 0.7018113143756353, "learning_rate": 4.297428474862337e-07, "loss": 0.5151, "step": 5513 }, { "epoch": 0.7, "grad_norm": 0.7211559822408069, "learning_rate": 4.2940390848977203e-07, "loss": 0.5586, "step": 5514 }, { "epoch": 0.7, "grad_norm": 0.6394960443270393, "learning_rate": 4.290650666598803e-07, "loss": 0.5642, "step": 5515 }, { "epoch": 0.7, "grad_norm": 0.6831244535396285, "learning_rate": 4.287263220542597e-07, "loss": 0.5809, "step": 5516 }, { "epoch": 0.7, "grad_norm": 0.8256276629619962, "learning_rate": 4.283876747305951e-07, "loss": 0.5735, "step": 5517 }, { "epoch": 0.7, "grad_norm": 0.8141503761402799, "learning_rate": 4.2804912474655443e-07, "loss": 0.584, "step": 5518 }, { "epoch": 0.7, "grad_norm": 0.6692471322025692, "learning_rate": 4.277106721597892e-07, "loss": 0.5567, "step": 5519 }, { "epoch": 0.7, "grad_norm": 0.735135640863164, "learning_rate": 4.2737231702793455e-07, "loss": 0.5081, "step": 5520 }, { "epoch": 0.7, "grad_norm": 0.6381807141610216, "learning_rate": 4.2703405940860826e-07, "loss": 0.5094, "step": 5521 }, { "epoch": 0.7, "grad_norm": 0.6404782407849322, "learning_rate": 4.2669589935941284e-07, "loss": 0.534, "step": 5522 }, { "epoch": 0.7, "grad_norm": 0.9221702588493129, "learning_rate": 4.2635783693793324e-07, "loss": 0.5956, "step": 5523 }, { "epoch": 0.7, "grad_norm": 0.9063132128079254, "learning_rate": 4.2601987220173785e-07, "loss": 0.5846, "step": 5524 }, { "epoch": 0.7, "grad_norm": 0.7819918722991914, "learning_rate": 4.256820052083785e-07, "loss": 0.5443, "step": 5525 }, { "epoch": 0.7, "grad_norm": 0.8391613121642005, "learning_rate": 4.2534423601539047e-07, "loss": 0.6487, "step": 5526 }, { "epoch": 0.7, "grad_norm": 0.9948903162400459, "learning_rate": 4.250065646802923e-07, "loss": 0.6315, "step": 5527 }, { "epoch": 0.7, "grad_norm": 0.8522128341197969, "learning_rate": 4.24668991260586e-07, "loss": 0.6446, "step": 5528 }, { "epoch": 0.7, "grad_norm": 0.8546929810890971, "learning_rate": 4.243315158137566e-07, "loss": 0.5616, "step": 5529 }, { "epoch": 0.7, "grad_norm": 0.6860412444730833, "learning_rate": 4.2399413839727296e-07, "loss": 0.6015, "step": 5530 }, { "epoch": 0.7, "grad_norm": 0.7878612177956198, "learning_rate": 4.236568590685866e-07, "loss": 0.5659, "step": 5531 }, { "epoch": 0.7, "grad_norm": 0.7275369402110485, "learning_rate": 4.233196778851329e-07, "loss": 0.5439, "step": 5532 }, { "epoch": 0.7, "grad_norm": 0.840614643868386, "learning_rate": 4.229825949043303e-07, "loss": 0.5312, "step": 5533 }, { "epoch": 0.71, "grad_norm": 0.7060702643029093, "learning_rate": 4.2264561018358047e-07, "loss": 0.5083, "step": 5534 }, { "epoch": 0.71, "grad_norm": 0.8970001489253682, "learning_rate": 4.223087237802683e-07, "loss": 0.541, "step": 5535 }, { "epoch": 0.71, "grad_norm": 0.8826539833368219, "learning_rate": 4.2197193575176217e-07, "loss": 0.6611, "step": 5536 }, { "epoch": 0.71, "grad_norm": 0.6435859316896598, "learning_rate": 4.216352461554136e-07, "loss": 0.5372, "step": 5537 }, { "epoch": 0.71, "grad_norm": 0.8131539739702812, "learning_rate": 4.212986550485571e-07, "loss": 0.6648, "step": 5538 }, { "epoch": 0.71, "grad_norm": 0.896726324143933, "learning_rate": 4.209621624885109e-07, "loss": 0.6451, "step": 5539 }, { "epoch": 0.71, "grad_norm": 0.6782656362241735, "learning_rate": 4.20625768532576e-07, "loss": 0.5786, "step": 5540 }, { "epoch": 0.71, "grad_norm": 0.7944266615404366, "learning_rate": 4.2028947323803686e-07, "loss": 0.6402, "step": 5541 }, { "epoch": 0.71, "grad_norm": 0.8766927667934332, "learning_rate": 4.199532766621611e-07, "loss": 0.592, "step": 5542 }, { "epoch": 0.71, "grad_norm": 0.8541194918668754, "learning_rate": 4.1961717886219926e-07, "loss": 0.531, "step": 5543 }, { "epoch": 0.71, "grad_norm": 0.8632312428897135, "learning_rate": 4.192811798953857e-07, "loss": 0.668, "step": 5544 }, { "epoch": 0.71, "grad_norm": 0.7723574711996681, "learning_rate": 4.1894527981893713e-07, "loss": 0.5253, "step": 5545 }, { "epoch": 0.71, "grad_norm": 0.7540666300392412, "learning_rate": 4.1860947869005416e-07, "loss": 0.5755, "step": 5546 }, { "epoch": 0.71, "grad_norm": 3.576732516607456, "learning_rate": 4.182737765659197e-07, "loss": 0.6129, "step": 5547 }, { "epoch": 0.71, "grad_norm": 1.1285833861803778, "learning_rate": 4.17938173503701e-07, "loss": 0.5671, "step": 5548 }, { "epoch": 0.71, "grad_norm": 0.9817668805690377, "learning_rate": 4.176026695605476e-07, "loss": 0.6565, "step": 5549 }, { "epoch": 0.71, "grad_norm": 0.731479778961776, "learning_rate": 4.172672647935921e-07, "loss": 0.5108, "step": 5550 }, { "epoch": 0.71, "grad_norm": 0.6402725864570282, "learning_rate": 4.169319592599505e-07, "loss": 0.5348, "step": 5551 }, { "epoch": 0.71, "grad_norm": 0.6377211230997982, "learning_rate": 4.165967530167219e-07, "loss": 0.522, "step": 5552 }, { "epoch": 0.71, "grad_norm": 0.6580860332714137, "learning_rate": 4.162616461209886e-07, "loss": 0.552, "step": 5553 }, { "epoch": 0.71, "grad_norm": 0.8534743055359045, "learning_rate": 4.1592663862981534e-07, "loss": 0.5554, "step": 5554 }, { "epoch": 0.71, "grad_norm": 0.762920555317595, "learning_rate": 4.1559173060025086e-07, "loss": 0.5212, "step": 5555 }, { "epoch": 0.71, "grad_norm": 0.7513913814383733, "learning_rate": 4.152569220893264e-07, "loss": 0.4937, "step": 5556 }, { "epoch": 0.71, "grad_norm": 0.9096179858363339, "learning_rate": 4.149222131540562e-07, "loss": 0.6915, "step": 5557 }, { "epoch": 0.71, "grad_norm": 0.8283759306999988, "learning_rate": 4.145876038514378e-07, "loss": 0.5996, "step": 5558 }, { "epoch": 0.71, "grad_norm": 0.7923393508116303, "learning_rate": 4.142530942384517e-07, "loss": 0.6166, "step": 5559 }, { "epoch": 0.71, "grad_norm": 1.4006509383864563, "learning_rate": 4.139186843720613e-07, "loss": 0.5845, "step": 5560 }, { "epoch": 0.71, "grad_norm": 0.7081437891227408, "learning_rate": 4.1358437430921336e-07, "loss": 0.5648, "step": 5561 }, { "epoch": 0.71, "grad_norm": 0.8103297699357376, "learning_rate": 4.132501641068371e-07, "loss": 0.6215, "step": 5562 }, { "epoch": 0.71, "grad_norm": 1.0271387867929571, "learning_rate": 4.1291605382184525e-07, "loss": 0.6089, "step": 5563 }, { "epoch": 0.71, "grad_norm": 0.8478144195009228, "learning_rate": 4.1258204351113325e-07, "loss": 0.6546, "step": 5564 }, { "epoch": 0.71, "grad_norm": 0.7643265149216378, "learning_rate": 4.122481332315795e-07, "loss": 0.5944, "step": 5565 }, { "epoch": 0.71, "grad_norm": 1.0342018426447956, "learning_rate": 4.119143230400451e-07, "loss": 0.6221, "step": 5566 }, { "epoch": 0.71, "grad_norm": 0.6330615775923993, "learning_rate": 4.1158061299337534e-07, "loss": 0.5793, "step": 5567 }, { "epoch": 0.71, "grad_norm": 0.9337947529072448, "learning_rate": 4.112470031483971e-07, "loss": 0.6478, "step": 5568 }, { "epoch": 0.71, "grad_norm": 0.7803883003424512, "learning_rate": 4.1091349356192053e-07, "loss": 0.6011, "step": 5569 }, { "epoch": 0.71, "grad_norm": 0.6224534916134425, "learning_rate": 4.10580084290739e-07, "loss": 0.4646, "step": 5570 }, { "epoch": 0.71, "grad_norm": 0.6904933503810439, "learning_rate": 4.1024677539162855e-07, "loss": 0.5069, "step": 5571 }, { "epoch": 0.71, "grad_norm": 0.8430228862862325, "learning_rate": 4.0991356692134824e-07, "loss": 0.6169, "step": 5572 }, { "epoch": 0.71, "grad_norm": 0.7251585423754022, "learning_rate": 4.0958045893664005e-07, "loss": 0.559, "step": 5573 }, { "epoch": 0.71, "grad_norm": 0.787634014497346, "learning_rate": 4.092474514942288e-07, "loss": 0.5764, "step": 5574 }, { "epoch": 0.71, "grad_norm": 0.9395665278061174, "learning_rate": 4.089145446508221e-07, "loss": 0.6621, "step": 5575 }, { "epoch": 0.71, "grad_norm": 0.631691205020156, "learning_rate": 4.0858173846311053e-07, "loss": 0.5216, "step": 5576 }, { "epoch": 0.71, "grad_norm": 0.8114871879530118, "learning_rate": 4.082490329877676e-07, "loss": 0.5746, "step": 5577 }, { "epoch": 0.71, "grad_norm": 0.7738383667642961, "learning_rate": 4.0791642828144944e-07, "loss": 0.5957, "step": 5578 }, { "epoch": 0.71, "grad_norm": 0.8160265639013825, "learning_rate": 4.07583924400795e-07, "loss": 0.6197, "step": 5579 }, { "epoch": 0.71, "grad_norm": 0.6608551580797347, "learning_rate": 4.072515214024268e-07, "loss": 0.6036, "step": 5580 }, { "epoch": 0.71, "grad_norm": 0.7443339107534718, "learning_rate": 4.0691921934294937e-07, "loss": 0.5794, "step": 5581 }, { "epoch": 0.71, "grad_norm": 1.0708714061671945, "learning_rate": 4.0658701827895016e-07, "loss": 0.5967, "step": 5582 }, { "epoch": 0.71, "grad_norm": 0.8255139039207315, "learning_rate": 4.0625491826699953e-07, "loss": 0.5086, "step": 5583 }, { "epoch": 0.71, "grad_norm": 0.7097246845586394, "learning_rate": 4.0592291936365086e-07, "loss": 0.5201, "step": 5584 }, { "epoch": 0.71, "grad_norm": 0.7349182839850993, "learning_rate": 4.055910216254399e-07, "loss": 0.5859, "step": 5585 }, { "epoch": 0.71, "grad_norm": 0.9690631065422957, "learning_rate": 4.0525922510888546e-07, "loss": 0.6849, "step": 5586 }, { "epoch": 0.71, "grad_norm": 0.7225041063573439, "learning_rate": 4.0492752987048893e-07, "loss": 0.5281, "step": 5587 }, { "epoch": 0.71, "grad_norm": 0.6843323170409429, "learning_rate": 4.045959359667348e-07, "loss": 0.5059, "step": 5588 }, { "epoch": 0.71, "grad_norm": 1.011963930556679, "learning_rate": 4.0426444345408983e-07, "loss": 0.5892, "step": 5589 }, { "epoch": 0.71, "grad_norm": 0.7712460039729854, "learning_rate": 4.0393305238900374e-07, "loss": 0.5907, "step": 5590 }, { "epoch": 0.71, "grad_norm": 0.86221860675215, "learning_rate": 4.0360176282790925e-07, "loss": 0.6344, "step": 5591 }, { "epoch": 0.71, "grad_norm": 0.8832338119075592, "learning_rate": 4.0327057482722117e-07, "loss": 0.5771, "step": 5592 }, { "epoch": 0.71, "grad_norm": 0.6865511323517142, "learning_rate": 4.029394884433375e-07, "loss": 0.5319, "step": 5593 }, { "epoch": 0.71, "grad_norm": 0.6615191354260711, "learning_rate": 4.0260850373263887e-07, "loss": 0.5508, "step": 5594 }, { "epoch": 0.71, "grad_norm": 0.7890917712144613, "learning_rate": 4.022776207514884e-07, "loss": 0.5576, "step": 5595 }, { "epoch": 0.71, "grad_norm": 0.8532376133816559, "learning_rate": 4.0194683955623223e-07, "loss": 0.5468, "step": 5596 }, { "epoch": 0.71, "grad_norm": 0.7520894444895001, "learning_rate": 4.0161616020319866e-07, "loss": 0.4907, "step": 5597 }, { "epoch": 0.71, "grad_norm": 0.7219070237325335, "learning_rate": 4.012855827486992e-07, "loss": 0.5953, "step": 5598 }, { "epoch": 0.71, "grad_norm": 0.6857148720499368, "learning_rate": 4.009551072490276e-07, "loss": 0.5449, "step": 5599 }, { "epoch": 0.71, "grad_norm": 0.6500294845000614, "learning_rate": 4.006247337604605e-07, "loss": 0.5143, "step": 5600 }, { "epoch": 0.71, "grad_norm": 0.623466335763457, "learning_rate": 4.002944623392568e-07, "loss": 0.5327, "step": 5601 }, { "epoch": 0.71, "grad_norm": 0.9025375596739125, "learning_rate": 3.9996429304165856e-07, "loss": 0.6474, "step": 5602 }, { "epoch": 0.71, "grad_norm": 0.9046501915525249, "learning_rate": 3.996342259238902e-07, "loss": 0.652, "step": 5603 }, { "epoch": 0.71, "grad_norm": 0.7039723057634765, "learning_rate": 3.993042610421585e-07, "loss": 0.5535, "step": 5604 }, { "epoch": 0.71, "grad_norm": 0.6334470707881604, "learning_rate": 3.989743984526528e-07, "loss": 0.508, "step": 5605 }, { "epoch": 0.71, "grad_norm": 0.82091195721671, "learning_rate": 3.98644638211546e-07, "loss": 0.6107, "step": 5606 }, { "epoch": 0.71, "grad_norm": 0.8624439606559575, "learning_rate": 3.9831498037499243e-07, "loss": 0.6108, "step": 5607 }, { "epoch": 0.71, "grad_norm": 0.9094038244399927, "learning_rate": 3.9798542499912936e-07, "loss": 0.6297, "step": 5608 }, { "epoch": 0.71, "grad_norm": 1.0521058320113514, "learning_rate": 3.976559721400766e-07, "loss": 0.6478, "step": 5609 }, { "epoch": 0.71, "grad_norm": 0.8231717023383446, "learning_rate": 3.9732662185393674e-07, "loss": 0.6199, "step": 5610 }, { "epoch": 0.71, "grad_norm": 0.7926876986400526, "learning_rate": 3.969973741967946e-07, "loss": 0.5665, "step": 5611 }, { "epoch": 0.71, "grad_norm": 0.835191320991798, "learning_rate": 3.966682292247174e-07, "loss": 0.6034, "step": 5612 }, { "epoch": 0.72, "grad_norm": 0.8296815347919673, "learning_rate": 3.9633918699375544e-07, "loss": 0.6052, "step": 5613 }, { "epoch": 0.72, "grad_norm": 0.7360078426614185, "learning_rate": 3.9601024755994104e-07, "loss": 0.5072, "step": 5614 }, { "epoch": 0.72, "grad_norm": 0.8824243689174313, "learning_rate": 3.956814109792892e-07, "loss": 0.6522, "step": 5615 }, { "epoch": 0.72, "grad_norm": 0.6793761013810344, "learning_rate": 3.953526773077972e-07, "loss": 0.5409, "step": 5616 }, { "epoch": 0.72, "grad_norm": 0.642444997815709, "learning_rate": 3.950240466014452e-07, "loss": 0.4993, "step": 5617 }, { "epoch": 0.72, "grad_norm": 0.8816442534590868, "learning_rate": 3.9469551891619536e-07, "loss": 0.5663, "step": 5618 }, { "epoch": 0.72, "grad_norm": 0.6929419192053166, "learning_rate": 3.9436709430799277e-07, "loss": 0.5232, "step": 5619 }, { "epoch": 0.72, "grad_norm": 0.9947276098428101, "learning_rate": 3.9403877283276446e-07, "loss": 0.633, "step": 5620 }, { "epoch": 0.72, "grad_norm": 0.7480748335773182, "learning_rate": 3.937105545464203e-07, "loss": 0.5277, "step": 5621 }, { "epoch": 0.72, "grad_norm": 0.6698008638046612, "learning_rate": 3.933824395048525e-07, "loss": 0.5263, "step": 5622 }, { "epoch": 0.72, "grad_norm": 0.6977147704052601, "learning_rate": 3.930544277639355e-07, "loss": 0.5666, "step": 5623 }, { "epoch": 0.72, "grad_norm": 0.9917816910174254, "learning_rate": 3.9272651937952626e-07, "loss": 0.7003, "step": 5624 }, { "epoch": 0.72, "grad_norm": 0.8498837195360109, "learning_rate": 3.923987144074644e-07, "loss": 0.6543, "step": 5625 }, { "epoch": 0.72, "grad_norm": 0.6713859259844839, "learning_rate": 3.9207101290357145e-07, "loss": 0.5373, "step": 5626 }, { "epoch": 0.72, "grad_norm": 0.7737901209783398, "learning_rate": 3.917434149236516e-07, "loss": 0.5723, "step": 5627 }, { "epoch": 0.72, "grad_norm": 0.7753213536762295, "learning_rate": 3.9141592052349147e-07, "loss": 0.5504, "step": 5628 }, { "epoch": 0.72, "grad_norm": 0.6852210620905105, "learning_rate": 3.910885297588599e-07, "loss": 0.5383, "step": 5629 }, { "epoch": 0.72, "grad_norm": 0.6460473619354119, "learning_rate": 3.9076124268550815e-07, "loss": 0.5231, "step": 5630 }, { "epoch": 0.72, "grad_norm": 0.7334996676346494, "learning_rate": 3.9043405935916975e-07, "loss": 0.5924, "step": 5631 }, { "epoch": 0.72, "grad_norm": 1.1855916115819223, "learning_rate": 3.901069798355602e-07, "loss": 0.6142, "step": 5632 }, { "epoch": 0.72, "grad_norm": 0.9231902141472927, "learning_rate": 3.897800041703785e-07, "loss": 0.6079, "step": 5633 }, { "epoch": 0.72, "grad_norm": 0.6605595845266886, "learning_rate": 3.89453132419305e-07, "loss": 0.4868, "step": 5634 }, { "epoch": 0.72, "grad_norm": 0.7437356844070493, "learning_rate": 3.891263646380023e-07, "loss": 0.5119, "step": 5635 }, { "epoch": 0.72, "grad_norm": 0.9299997125364499, "learning_rate": 3.887997008821158e-07, "loss": 0.6432, "step": 5636 }, { "epoch": 0.72, "grad_norm": 0.7290486746367228, "learning_rate": 3.884731412072726e-07, "loss": 0.512, "step": 5637 }, { "epoch": 0.72, "grad_norm": 0.6625466735837077, "learning_rate": 3.881466856690828e-07, "loss": 0.5427, "step": 5638 }, { "epoch": 0.72, "grad_norm": 0.7947228523207731, "learning_rate": 3.878203343231381e-07, "loss": 0.5534, "step": 5639 }, { "epoch": 0.72, "grad_norm": 0.7598809309543411, "learning_rate": 3.8749408722501286e-07, "loss": 0.584, "step": 5640 }, { "epoch": 0.72, "grad_norm": 0.6799863980299155, "learning_rate": 3.8716794443026346e-07, "loss": 0.5211, "step": 5641 }, { "epoch": 0.72, "grad_norm": 0.7177061591114916, "learning_rate": 3.8684190599442877e-07, "loss": 0.5389, "step": 5642 }, { "epoch": 0.72, "grad_norm": 0.6774474055425095, "learning_rate": 3.8651597197302957e-07, "loss": 0.5144, "step": 5643 }, { "epoch": 0.72, "grad_norm": 0.7936497786347725, "learning_rate": 3.861901424215691e-07, "loss": 0.6249, "step": 5644 }, { "epoch": 0.72, "grad_norm": 0.7193171634884387, "learning_rate": 3.8586441739553286e-07, "loss": 0.5401, "step": 5645 }, { "epoch": 0.72, "grad_norm": 0.6407225657827852, "learning_rate": 3.8553879695038825e-07, "loss": 0.5302, "step": 5646 }, { "epoch": 0.72, "grad_norm": 0.7594564268589244, "learning_rate": 3.8521328114158514e-07, "loss": 0.5709, "step": 5647 }, { "epoch": 0.72, "grad_norm": 0.7235973825228644, "learning_rate": 3.848878700245555e-07, "loss": 0.5508, "step": 5648 }, { "epoch": 0.72, "grad_norm": 0.7082406274337057, "learning_rate": 3.845625636547133e-07, "loss": 0.5594, "step": 5649 }, { "epoch": 0.72, "grad_norm": 0.6849050190232984, "learning_rate": 3.8423736208745495e-07, "loss": 0.5544, "step": 5650 }, { "epoch": 0.72, "grad_norm": 0.7083126181878967, "learning_rate": 3.8391226537815893e-07, "loss": 0.5608, "step": 5651 }, { "epoch": 0.72, "grad_norm": 0.9165264121721303, "learning_rate": 3.8358727358218567e-07, "loss": 0.6017, "step": 5652 }, { "epoch": 0.72, "grad_norm": 0.6792139031008324, "learning_rate": 3.8326238675487796e-07, "loss": 0.5051, "step": 5653 }, { "epoch": 0.72, "grad_norm": 0.8783079974285088, "learning_rate": 3.829376049515608e-07, "loss": 0.6141, "step": 5654 }, { "epoch": 0.72, "grad_norm": 0.6910445849666734, "learning_rate": 3.826129282275409e-07, "loss": 0.5792, "step": 5655 }, { "epoch": 0.72, "grad_norm": 0.6264432886792263, "learning_rate": 3.8228835663810744e-07, "loss": 0.5001, "step": 5656 }, { "epoch": 0.72, "grad_norm": 0.6886425668490055, "learning_rate": 3.819638902385316e-07, "loss": 0.5326, "step": 5657 }, { "epoch": 0.72, "grad_norm": 0.7083541693510764, "learning_rate": 3.816395290840663e-07, "loss": 0.5718, "step": 5658 }, { "epoch": 0.72, "grad_norm": 0.8706110717209241, "learning_rate": 3.813152732299474e-07, "loss": 0.5923, "step": 5659 }, { "epoch": 0.72, "grad_norm": 0.6481811781065423, "learning_rate": 3.809911227313921e-07, "loss": 0.5564, "step": 5660 }, { "epoch": 0.72, "grad_norm": 0.8860276958758795, "learning_rate": 3.8066707764359984e-07, "loss": 0.6638, "step": 5661 }, { "epoch": 0.72, "grad_norm": 0.7065248016366197, "learning_rate": 3.803431380217521e-07, "loss": 0.5159, "step": 5662 }, { "epoch": 0.72, "grad_norm": 0.8184495735106552, "learning_rate": 3.800193039210123e-07, "loss": 0.5614, "step": 5663 }, { "epoch": 0.72, "grad_norm": 0.7456130862153767, "learning_rate": 3.796955753965263e-07, "loss": 0.5629, "step": 5664 }, { "epoch": 0.72, "grad_norm": 0.6494410600876533, "learning_rate": 3.7937195250342146e-07, "loss": 0.4446, "step": 5665 }, { "epoch": 0.72, "grad_norm": 0.8292052811304128, "learning_rate": 3.790484352968074e-07, "loss": 0.5864, "step": 5666 }, { "epoch": 0.72, "grad_norm": 0.943749969158664, "learning_rate": 3.787250238317755e-07, "loss": 0.6446, "step": 5667 }, { "epoch": 0.72, "grad_norm": 1.1305559908320273, "learning_rate": 3.7840171816339995e-07, "loss": 0.6887, "step": 5668 }, { "epoch": 0.72, "grad_norm": 0.9048990157465916, "learning_rate": 3.780785183467361e-07, "loss": 0.5743, "step": 5669 }, { "epoch": 0.72, "grad_norm": 1.0013800266330322, "learning_rate": 3.777554244368214e-07, "loss": 0.6507, "step": 5670 }, { "epoch": 0.72, "grad_norm": 0.6408286246853133, "learning_rate": 3.774324364886754e-07, "loss": 0.545, "step": 5671 }, { "epoch": 0.72, "grad_norm": 0.6987290941933705, "learning_rate": 3.7710955455729965e-07, "loss": 0.5518, "step": 5672 }, { "epoch": 0.72, "grad_norm": 0.6554024374966796, "learning_rate": 3.767867786976775e-07, "loss": 0.5338, "step": 5673 }, { "epoch": 0.72, "grad_norm": 0.6239037578107334, "learning_rate": 3.7646410896477434e-07, "loss": 0.5475, "step": 5674 }, { "epoch": 0.72, "grad_norm": 0.7884867587550346, "learning_rate": 3.761415454135375e-07, "loss": 0.5515, "step": 5675 }, { "epoch": 0.72, "grad_norm": 0.6726928476493904, "learning_rate": 3.758190880988962e-07, "loss": 0.5805, "step": 5676 }, { "epoch": 0.72, "grad_norm": 0.7442757405552903, "learning_rate": 3.7549673707576134e-07, "loss": 0.5993, "step": 5677 }, { "epoch": 0.72, "grad_norm": 0.9335524228198817, "learning_rate": 3.751744923990263e-07, "loss": 0.6677, "step": 5678 }, { "epoch": 0.72, "grad_norm": 0.8306350597170582, "learning_rate": 3.7485235412356587e-07, "loss": 0.5326, "step": 5679 }, { "epoch": 0.72, "grad_norm": 0.6374638959772995, "learning_rate": 3.7453032230423667e-07, "loss": 0.597, "step": 5680 }, { "epoch": 0.72, "grad_norm": 0.6991136645641728, "learning_rate": 3.7420839699587746e-07, "loss": 0.5833, "step": 5681 }, { "epoch": 0.72, "grad_norm": 0.6509100616241281, "learning_rate": 3.7388657825330896e-07, "loss": 0.538, "step": 5682 }, { "epoch": 0.72, "grad_norm": 0.741819997366788, "learning_rate": 3.735648661313332e-07, "loss": 0.5591, "step": 5683 }, { "epoch": 0.72, "grad_norm": 0.7237088359086035, "learning_rate": 3.7324326068473465e-07, "loss": 0.6578, "step": 5684 }, { "epoch": 0.72, "grad_norm": 0.6856475418533206, "learning_rate": 3.729217619682794e-07, "loss": 0.5173, "step": 5685 }, { "epoch": 0.72, "grad_norm": 0.9904712018620103, "learning_rate": 3.7260037003671507e-07, "loss": 0.5813, "step": 5686 }, { "epoch": 0.72, "grad_norm": 0.9435615051526234, "learning_rate": 3.7227908494477166e-07, "loss": 0.6004, "step": 5687 }, { "epoch": 0.72, "grad_norm": 0.9248242407688326, "learning_rate": 3.7195790674716043e-07, "loss": 0.6485, "step": 5688 }, { "epoch": 0.72, "grad_norm": 1.3706062483485777, "learning_rate": 3.7163683549857483e-07, "loss": 0.6197, "step": 5689 }, { "epoch": 0.72, "grad_norm": 0.8803934424241523, "learning_rate": 3.7131587125368957e-07, "loss": 0.6409, "step": 5690 }, { "epoch": 0.73, "grad_norm": 0.8302924323683479, "learning_rate": 3.7099501406716213e-07, "loss": 0.6899, "step": 5691 }, { "epoch": 0.73, "grad_norm": 0.6412812985269206, "learning_rate": 3.70674263993631e-07, "loss": 0.5179, "step": 5692 }, { "epoch": 0.73, "grad_norm": 0.7041495309602617, "learning_rate": 3.703536210877163e-07, "loss": 0.5592, "step": 5693 }, { "epoch": 0.73, "grad_norm": 0.7343601360002561, "learning_rate": 3.7003308540402043e-07, "loss": 0.623, "step": 5694 }, { "epoch": 0.73, "grad_norm": 1.265452661775937, "learning_rate": 3.6971265699712707e-07, "loss": 0.6617, "step": 5695 }, { "epoch": 0.73, "grad_norm": 1.6367484623399495, "learning_rate": 3.69392335921602e-07, "loss": 0.6298, "step": 5696 }, { "epoch": 0.73, "grad_norm": 0.6541144746138948, "learning_rate": 3.6907212223199245e-07, "loss": 0.5352, "step": 5697 }, { "epoch": 0.73, "grad_norm": 0.887057917053404, "learning_rate": 3.687520159828277e-07, "loss": 0.6013, "step": 5698 }, { "epoch": 0.73, "grad_norm": 0.7097280276076654, "learning_rate": 3.6843201722861815e-07, "loss": 0.5318, "step": 5699 }, { "epoch": 0.73, "grad_norm": 0.7642529179001789, "learning_rate": 3.6811212602385643e-07, "loss": 0.6056, "step": 5700 }, { "epoch": 0.73, "grad_norm": 0.7825105960744196, "learning_rate": 3.6779234242301684e-07, "loss": 0.4952, "step": 5701 }, { "epoch": 0.73, "grad_norm": 0.6646143998436757, "learning_rate": 3.6747266648055486e-07, "loss": 0.5654, "step": 5702 }, { "epoch": 0.73, "grad_norm": 0.7662180896345064, "learning_rate": 3.671530982509082e-07, "loss": 0.514, "step": 5703 }, { "epoch": 0.73, "grad_norm": 0.8918002016201372, "learning_rate": 3.6683363778849606e-07, "loss": 0.6056, "step": 5704 }, { "epoch": 0.73, "grad_norm": 0.6431338243137317, "learning_rate": 3.665142851477191e-07, "loss": 0.4868, "step": 5705 }, { "epoch": 0.73, "grad_norm": 0.7079225303547374, "learning_rate": 3.661950403829597e-07, "loss": 0.5162, "step": 5706 }, { "epoch": 0.73, "grad_norm": 0.693117849949462, "learning_rate": 3.65875903548582e-07, "loss": 0.5862, "step": 5707 }, { "epoch": 0.73, "grad_norm": 0.9528997946336036, "learning_rate": 3.655568746989317e-07, "loss": 0.563, "step": 5708 }, { "epoch": 0.73, "grad_norm": 0.8025995367302733, "learning_rate": 3.652379538883361e-07, "loss": 0.5413, "step": 5709 }, { "epoch": 0.73, "grad_norm": 0.9338997104986322, "learning_rate": 3.64919141171104e-07, "loss": 0.6396, "step": 5710 }, { "epoch": 0.73, "grad_norm": 0.9213992574521623, "learning_rate": 3.6460043660152597e-07, "loss": 0.6392, "step": 5711 }, { "epoch": 0.73, "grad_norm": 0.7497322852113424, "learning_rate": 3.64281840233874e-07, "loss": 0.5543, "step": 5712 }, { "epoch": 0.73, "grad_norm": 0.9883316551361626, "learning_rate": 3.639633521224018e-07, "loss": 0.6072, "step": 5713 }, { "epoch": 0.73, "grad_norm": 0.6022824292986073, "learning_rate": 3.636449723213445e-07, "loss": 0.5175, "step": 5714 }, { "epoch": 0.73, "grad_norm": 0.6988706816024985, "learning_rate": 3.63326700884919e-07, "loss": 0.5022, "step": 5715 }, { "epoch": 0.73, "grad_norm": 0.6423306824324638, "learning_rate": 3.630085378673231e-07, "loss": 0.552, "step": 5716 }, { "epoch": 0.73, "grad_norm": 0.7070433458716213, "learning_rate": 3.626904833227374e-07, "loss": 0.524, "step": 5717 }, { "epoch": 0.73, "grad_norm": 0.9211856589031331, "learning_rate": 3.62372537305323e-07, "loss": 0.5994, "step": 5718 }, { "epoch": 0.73, "grad_norm": 0.8412038039125522, "learning_rate": 3.6205469986922276e-07, "loss": 0.6806, "step": 5719 }, { "epoch": 0.73, "grad_norm": 0.9619305359062121, "learning_rate": 3.6173697106856105e-07, "loss": 0.5893, "step": 5720 }, { "epoch": 0.73, "grad_norm": 0.6670960280214673, "learning_rate": 3.614193509574438e-07, "loss": 0.4929, "step": 5721 }, { "epoch": 0.73, "grad_norm": 0.8134727560394401, "learning_rate": 3.611018395899584e-07, "loss": 0.5222, "step": 5722 }, { "epoch": 0.73, "grad_norm": 0.9100486479867437, "learning_rate": 3.607844370201737e-07, "loss": 0.6301, "step": 5723 }, { "epoch": 0.73, "grad_norm": 0.9654131959319229, "learning_rate": 3.604671433021401e-07, "loss": 0.6433, "step": 5724 }, { "epoch": 0.73, "grad_norm": 0.8745255679086882, "learning_rate": 3.601499584898894e-07, "loss": 0.6353, "step": 5725 }, { "epoch": 0.73, "grad_norm": 0.7874900499305247, "learning_rate": 3.5983288263743484e-07, "loss": 0.6016, "step": 5726 }, { "epoch": 0.73, "grad_norm": 0.92974620714117, "learning_rate": 3.5951591579877116e-07, "loss": 0.6301, "step": 5727 }, { "epoch": 0.73, "grad_norm": 1.095512968931211, "learning_rate": 3.5919905802787444e-07, "loss": 0.6404, "step": 5728 }, { "epoch": 0.73, "grad_norm": 0.6936820973871652, "learning_rate": 3.5888230937870235e-07, "loss": 0.4504, "step": 5729 }, { "epoch": 0.73, "grad_norm": 0.885761147455906, "learning_rate": 3.5856566990519386e-07, "loss": 0.6426, "step": 5730 }, { "epoch": 0.73, "grad_norm": 0.7578193556243144, "learning_rate": 3.582491396612695e-07, "loss": 0.5432, "step": 5731 }, { "epoch": 0.73, "grad_norm": 0.7692919709012693, "learning_rate": 3.5793271870083076e-07, "loss": 0.5671, "step": 5732 }, { "epoch": 0.73, "grad_norm": 0.705146091921639, "learning_rate": 3.576164070777611e-07, "loss": 0.5412, "step": 5733 }, { "epoch": 0.73, "grad_norm": 0.8251223016221925, "learning_rate": 3.5730020484592503e-07, "loss": 0.5869, "step": 5734 }, { "epoch": 0.73, "grad_norm": 0.8413417025311235, "learning_rate": 3.569841120591684e-07, "loss": 0.6398, "step": 5735 }, { "epoch": 0.73, "grad_norm": 0.7767730569925216, "learning_rate": 3.566681287713187e-07, "loss": 0.6751, "step": 5736 }, { "epoch": 0.73, "grad_norm": 0.9218267478738325, "learning_rate": 3.5635225503618436e-07, "loss": 0.5885, "step": 5737 }, { "epoch": 0.73, "grad_norm": 0.9371008718996421, "learning_rate": 3.560364909075556e-07, "loss": 0.6263, "step": 5738 }, { "epoch": 0.73, "grad_norm": 0.8505065826853668, "learning_rate": 3.557208364392037e-07, "loss": 0.6352, "step": 5739 }, { "epoch": 0.73, "grad_norm": 0.9971196099513305, "learning_rate": 3.554052916848813e-07, "loss": 0.6141, "step": 5740 }, { "epoch": 0.73, "grad_norm": 0.8669741693440715, "learning_rate": 3.550898566983224e-07, "loss": 0.5501, "step": 5741 }, { "epoch": 0.73, "grad_norm": 0.734184248733248, "learning_rate": 3.547745315332422e-07, "loss": 0.4984, "step": 5742 }, { "epoch": 0.73, "grad_norm": 0.9132682328556705, "learning_rate": 3.54459316243337e-07, "loss": 0.637, "step": 5743 }, { "epoch": 0.73, "grad_norm": 0.7293704124754273, "learning_rate": 3.541442108822855e-07, "loss": 0.5558, "step": 5744 }, { "epoch": 0.73, "grad_norm": 0.6583476480373075, "learning_rate": 3.538292155037463e-07, "loss": 0.5578, "step": 5745 }, { "epoch": 0.73, "grad_norm": 0.8273508700945823, "learning_rate": 3.5351433016136e-07, "loss": 0.6368, "step": 5746 }, { "epoch": 0.73, "grad_norm": 1.0801050890692236, "learning_rate": 3.5319955490874833e-07, "loss": 0.6997, "step": 5747 }, { "epoch": 0.73, "grad_norm": 0.6923928977206866, "learning_rate": 3.5288488979951405e-07, "loss": 0.5793, "step": 5748 }, { "epoch": 0.73, "grad_norm": 0.6475577225395311, "learning_rate": 3.525703348872414e-07, "loss": 0.5399, "step": 5749 }, { "epoch": 0.73, "grad_norm": 0.717391738841494, "learning_rate": 3.5225589022549587e-07, "loss": 0.5385, "step": 5750 }, { "epoch": 0.73, "grad_norm": 0.619170186234015, "learning_rate": 3.5194155586782405e-07, "loss": 0.4734, "step": 5751 }, { "epoch": 0.73, "grad_norm": 0.6865834786064441, "learning_rate": 3.5162733186775395e-07, "loss": 0.5155, "step": 5752 }, { "epoch": 0.73, "grad_norm": 0.8425316295286494, "learning_rate": 3.5131321827879445e-07, "loss": 0.6992, "step": 5753 }, { "epoch": 0.73, "grad_norm": 0.7089700642503015, "learning_rate": 3.50999215154436e-07, "loss": 0.5359, "step": 5754 }, { "epoch": 0.73, "grad_norm": 0.7548519832301605, "learning_rate": 3.5068532254814985e-07, "loss": 0.5743, "step": 5755 }, { "epoch": 0.73, "grad_norm": 0.8162225543428423, "learning_rate": 3.503715405133888e-07, "loss": 0.5814, "step": 5756 }, { "epoch": 0.73, "grad_norm": 0.6999851248255272, "learning_rate": 3.5005786910358657e-07, "loss": 0.6528, "step": 5757 }, { "epoch": 0.73, "grad_norm": 1.0286441846532688, "learning_rate": 3.497443083721583e-07, "loss": 0.6493, "step": 5758 }, { "epoch": 0.73, "grad_norm": 0.6056998477752776, "learning_rate": 3.4943085837249996e-07, "loss": 0.488, "step": 5759 }, { "epoch": 0.73, "grad_norm": 0.7336988272175029, "learning_rate": 3.4911751915798884e-07, "loss": 0.5099, "step": 5760 }, { "epoch": 0.73, "grad_norm": 0.7082676119891784, "learning_rate": 3.4880429078198336e-07, "loss": 0.541, "step": 5761 }, { "epoch": 0.73, "grad_norm": 0.6961277029557731, "learning_rate": 3.4849117329782307e-07, "loss": 0.6181, "step": 5762 }, { "epoch": 0.73, "grad_norm": 0.6360665241105075, "learning_rate": 3.481781667588286e-07, "loss": 0.5429, "step": 5763 }, { "epoch": 0.73, "grad_norm": 0.6546843052751988, "learning_rate": 3.478652712183017e-07, "loss": 0.5436, "step": 5764 }, { "epoch": 0.73, "grad_norm": 0.7534481495545311, "learning_rate": 3.475524867295251e-07, "loss": 0.532, "step": 5765 }, { "epoch": 0.73, "grad_norm": 0.9141756088033309, "learning_rate": 3.47239813345763e-07, "loss": 0.5733, "step": 5766 }, { "epoch": 0.73, "grad_norm": 0.6552895499153839, "learning_rate": 3.4692725112026023e-07, "loss": 0.5349, "step": 5767 }, { "epoch": 0.73, "grad_norm": 0.8989269405108575, "learning_rate": 3.46614800106243e-07, "loss": 0.6189, "step": 5768 }, { "epoch": 0.73, "grad_norm": 0.8157002812079178, "learning_rate": 3.4630246035691803e-07, "loss": 0.6272, "step": 5769 }, { "epoch": 0.74, "grad_norm": 0.765591443916113, "learning_rate": 3.4599023192547416e-07, "loss": 0.5348, "step": 5770 }, { "epoch": 0.74, "grad_norm": 0.6610493503221078, "learning_rate": 3.4567811486508036e-07, "loss": 0.5578, "step": 5771 }, { "epoch": 0.74, "grad_norm": 0.6154499474453796, "learning_rate": 3.45366109228887e-07, "loss": 0.5131, "step": 5772 }, { "epoch": 0.74, "grad_norm": 0.736466065843992, "learning_rate": 3.450542150700253e-07, "loss": 0.5905, "step": 5773 }, { "epoch": 0.74, "grad_norm": 0.8641074444330841, "learning_rate": 3.447424324416076e-07, "loss": 0.6174, "step": 5774 }, { "epoch": 0.74, "grad_norm": 0.9036901292081694, "learning_rate": 3.4443076139672686e-07, "loss": 0.5486, "step": 5775 }, { "epoch": 0.74, "grad_norm": 0.777909857157713, "learning_rate": 3.441192019884581e-07, "loss": 0.5217, "step": 5776 }, { "epoch": 0.74, "grad_norm": 0.6953340673619705, "learning_rate": 3.438077542698564e-07, "loss": 0.522, "step": 5777 }, { "epoch": 0.74, "grad_norm": 0.7925712093410265, "learning_rate": 3.434964182939579e-07, "loss": 0.5432, "step": 5778 }, { "epoch": 0.74, "grad_norm": 0.8698399505396617, "learning_rate": 3.4318519411378e-07, "loss": 0.6053, "step": 5779 }, { "epoch": 0.74, "grad_norm": 0.6562668687938756, "learning_rate": 3.4287408178232093e-07, "loss": 0.5674, "step": 5780 }, { "epoch": 0.74, "grad_norm": 0.6393016710646782, "learning_rate": 3.4256308135255986e-07, "loss": 0.5374, "step": 5781 }, { "epoch": 0.74, "grad_norm": 0.8482827901332342, "learning_rate": 3.422521928774569e-07, "loss": 0.6118, "step": 5782 }, { "epoch": 0.74, "grad_norm": 0.8137188698815405, "learning_rate": 3.41941416409953e-07, "loss": 0.5832, "step": 5783 }, { "epoch": 0.74, "grad_norm": 0.7175818979624727, "learning_rate": 3.4163075200297044e-07, "loss": 0.5306, "step": 5784 }, { "epoch": 0.74, "grad_norm": 0.6499453413626055, "learning_rate": 3.413201997094118e-07, "loss": 0.5078, "step": 5785 }, { "epoch": 0.74, "grad_norm": 0.8887045495644119, "learning_rate": 3.410097595821613e-07, "loss": 0.6071, "step": 5786 }, { "epoch": 0.74, "grad_norm": 0.8068510664159982, "learning_rate": 3.406994316740832e-07, "loss": 0.6095, "step": 5787 }, { "epoch": 0.74, "grad_norm": 0.5978211233773183, "learning_rate": 3.4038921603802353e-07, "loss": 0.5437, "step": 5788 }, { "epoch": 0.74, "grad_norm": 0.6397747946298286, "learning_rate": 3.4007911272680845e-07, "loss": 0.5287, "step": 5789 }, { "epoch": 0.74, "grad_norm": 0.743824166198371, "learning_rate": 3.397691217932456e-07, "loss": 0.5567, "step": 5790 }, { "epoch": 0.74, "grad_norm": 0.8610524241864733, "learning_rate": 3.39459243290123e-07, "loss": 0.5567, "step": 5791 }, { "epoch": 0.74, "grad_norm": 0.8834732216972989, "learning_rate": 3.3914947727020983e-07, "loss": 0.6245, "step": 5792 }, { "epoch": 0.74, "grad_norm": 0.7323000852870963, "learning_rate": 3.3883982378625587e-07, "loss": 0.5299, "step": 5793 }, { "epoch": 0.74, "grad_norm": 1.1178825406788375, "learning_rate": 3.385302828909922e-07, "loss": 0.6723, "step": 5794 }, { "epoch": 0.74, "grad_norm": 0.6663765502003052, "learning_rate": 3.3822085463713e-07, "loss": 0.5355, "step": 5795 }, { "epoch": 0.74, "grad_norm": 0.7275387942290371, "learning_rate": 3.37911539077362e-07, "loss": 0.5456, "step": 5796 }, { "epoch": 0.74, "grad_norm": 0.793264902488691, "learning_rate": 3.3760233626436143e-07, "loss": 0.6138, "step": 5797 }, { "epoch": 0.74, "grad_norm": 0.6357927591353114, "learning_rate": 3.3729324625078204e-07, "loss": 0.5409, "step": 5798 }, { "epoch": 0.74, "grad_norm": 0.6616743283827022, "learning_rate": 3.369842690892588e-07, "loss": 0.5123, "step": 5799 }, { "epoch": 0.74, "grad_norm": 0.6557755226080524, "learning_rate": 3.366754048324073e-07, "loss": 0.5258, "step": 5800 }, { "epoch": 0.74, "grad_norm": 0.7016171342251507, "learning_rate": 3.3636665353282356e-07, "loss": 0.5419, "step": 5801 }, { "epoch": 0.74, "grad_norm": 0.8264927178926433, "learning_rate": 3.360580152430853e-07, "loss": 0.5457, "step": 5802 }, { "epoch": 0.74, "grad_norm": 0.6757933884449984, "learning_rate": 3.3574949001575024e-07, "loss": 0.5131, "step": 5803 }, { "epoch": 0.74, "grad_norm": 0.6978315581337292, "learning_rate": 3.3544107790335686e-07, "loss": 0.5245, "step": 5804 }, { "epoch": 0.74, "grad_norm": 1.168345711241714, "learning_rate": 3.3513277895842464e-07, "loss": 0.5766, "step": 5805 }, { "epoch": 0.74, "grad_norm": 1.7072733393663226, "learning_rate": 3.3482459323345357e-07, "loss": 0.626, "step": 5806 }, { "epoch": 0.74, "grad_norm": 0.87929478218828, "learning_rate": 3.345165207809246e-07, "loss": 0.6377, "step": 5807 }, { "epoch": 0.74, "grad_norm": 0.7302395341845647, "learning_rate": 3.3420856165329914e-07, "loss": 0.5043, "step": 5808 }, { "epoch": 0.74, "grad_norm": 0.7562458096444351, "learning_rate": 3.339007159030195e-07, "loss": 0.6078, "step": 5809 }, { "epoch": 0.74, "grad_norm": 0.9176892728148279, "learning_rate": 3.3359298358250856e-07, "loss": 0.6179, "step": 5810 }, { "epoch": 0.74, "grad_norm": 1.0168648141318495, "learning_rate": 3.332853647441699e-07, "loss": 0.6196, "step": 5811 }, { "epoch": 0.74, "grad_norm": 0.8792832325335851, "learning_rate": 3.329778594403879e-07, "loss": 0.61, "step": 5812 }, { "epoch": 0.74, "grad_norm": 0.620818500133194, "learning_rate": 3.3267046772352736e-07, "loss": 0.5233, "step": 5813 }, { "epoch": 0.74, "grad_norm": 0.7956865772804173, "learning_rate": 3.32363189645934e-07, "loss": 0.5224, "step": 5814 }, { "epoch": 0.74, "grad_norm": 0.9712212396831633, "learning_rate": 3.3205602525993414e-07, "loss": 0.575, "step": 5815 }, { "epoch": 0.74, "grad_norm": 0.687942536881875, "learning_rate": 3.317489746178346e-07, "loss": 0.5423, "step": 5816 }, { "epoch": 0.74, "grad_norm": 0.790649606600395, "learning_rate": 3.314420377719228e-07, "loss": 0.5504, "step": 5817 }, { "epoch": 0.74, "grad_norm": 0.8911346697113687, "learning_rate": 3.3113521477446694e-07, "loss": 0.633, "step": 5818 }, { "epoch": 0.74, "grad_norm": 0.8655680726192567, "learning_rate": 3.308285056777159e-07, "loss": 0.6088, "step": 5819 }, { "epoch": 0.74, "grad_norm": 0.6209669470859536, "learning_rate": 3.30521910533899e-07, "loss": 0.5294, "step": 5820 }, { "epoch": 0.74, "grad_norm": 0.884016614082206, "learning_rate": 3.3021542939522604e-07, "loss": 0.5896, "step": 5821 }, { "epoch": 0.74, "grad_norm": 0.8238501434643635, "learning_rate": 3.299090623138878e-07, "loss": 0.6253, "step": 5822 }, { "epoch": 0.74, "grad_norm": 0.5901895001899615, "learning_rate": 3.296028093420553e-07, "loss": 0.5076, "step": 5823 }, { "epoch": 0.74, "grad_norm": 0.7141574395921789, "learning_rate": 3.2929667053188016e-07, "loss": 0.5554, "step": 5824 }, { "epoch": 0.74, "grad_norm": 0.8327205427871258, "learning_rate": 3.2899064593549473e-07, "loss": 0.5562, "step": 5825 }, { "epoch": 0.74, "grad_norm": 0.7349262171116876, "learning_rate": 3.2868473560501173e-07, "loss": 0.5904, "step": 5826 }, { "epoch": 0.74, "grad_norm": 0.8749755200595951, "learning_rate": 3.2837893959252427e-07, "loss": 0.5949, "step": 5827 }, { "epoch": 0.74, "grad_norm": 0.8812178432126165, "learning_rate": 3.2807325795010675e-07, "loss": 0.5829, "step": 5828 }, { "epoch": 0.74, "grad_norm": 0.6641614595818213, "learning_rate": 3.2776769072981334e-07, "loss": 0.5149, "step": 5829 }, { "epoch": 0.74, "grad_norm": 0.7229082763539239, "learning_rate": 3.27462237983679e-07, "loss": 0.5831, "step": 5830 }, { "epoch": 0.74, "grad_norm": 0.8685170072285386, "learning_rate": 3.2715689976371905e-07, "loss": 0.5929, "step": 5831 }, { "epoch": 0.74, "grad_norm": 0.8442555346061793, "learning_rate": 3.268516761219294e-07, "loss": 0.6283, "step": 5832 }, { "epoch": 0.74, "grad_norm": 0.9047127824991924, "learning_rate": 3.265465671102865e-07, "loss": 0.5944, "step": 5833 }, { "epoch": 0.74, "grad_norm": 0.7487992377221222, "learning_rate": 3.2624157278074725e-07, "loss": 0.6051, "step": 5834 }, { "epoch": 0.74, "grad_norm": 0.7440641737492663, "learning_rate": 3.2593669318524906e-07, "loss": 0.4919, "step": 5835 }, { "epoch": 0.74, "grad_norm": 0.699072335321257, "learning_rate": 3.256319283757096e-07, "loss": 0.6298, "step": 5836 }, { "epoch": 0.74, "grad_norm": 0.7779862057038283, "learning_rate": 3.253272784040273e-07, "loss": 0.5882, "step": 5837 }, { "epoch": 0.74, "grad_norm": 0.7093812299805684, "learning_rate": 3.2502274332208066e-07, "loss": 0.54, "step": 5838 }, { "epoch": 0.74, "grad_norm": 0.7921287549656686, "learning_rate": 3.247183231817291e-07, "loss": 0.5281, "step": 5839 }, { "epoch": 0.74, "grad_norm": 0.7431927855377036, "learning_rate": 3.2441401803481204e-07, "loss": 0.5662, "step": 5840 }, { "epoch": 0.74, "grad_norm": 0.7988834614200662, "learning_rate": 3.2410982793314943e-07, "loss": 0.5267, "step": 5841 }, { "epoch": 0.74, "grad_norm": 0.7196114429735521, "learning_rate": 3.2380575292854185e-07, "loss": 0.5484, "step": 5842 }, { "epoch": 0.74, "grad_norm": 0.9493327805246935, "learning_rate": 3.2350179307277003e-07, "loss": 0.6253, "step": 5843 }, { "epoch": 0.74, "grad_norm": 0.923116914420983, "learning_rate": 3.231979484175951e-07, "loss": 0.6609, "step": 5844 }, { "epoch": 0.74, "grad_norm": 0.7335184992438261, "learning_rate": 3.2289421901475866e-07, "loss": 0.5611, "step": 5845 }, { "epoch": 0.74, "grad_norm": 0.7473007903135083, "learning_rate": 3.2259060491598265e-07, "loss": 0.6196, "step": 5846 }, { "epoch": 0.74, "grad_norm": 0.6626600084839624, "learning_rate": 3.222871061729694e-07, "loss": 0.5469, "step": 5847 }, { "epoch": 0.75, "grad_norm": 0.9307967898959475, "learning_rate": 3.2198372283740174e-07, "loss": 0.6419, "step": 5848 }, { "epoch": 0.75, "grad_norm": 1.1239732910627858, "learning_rate": 3.216804549609424e-07, "loss": 0.6662, "step": 5849 }, { "epoch": 0.75, "grad_norm": 0.9860592383288433, "learning_rate": 3.21377302595235e-07, "loss": 0.6305, "step": 5850 }, { "epoch": 0.75, "grad_norm": 0.7037699287905361, "learning_rate": 3.2107426579190313e-07, "loss": 0.5379, "step": 5851 }, { "epoch": 0.75, "grad_norm": 0.8041110153243058, "learning_rate": 3.2077134460255085e-07, "loss": 0.6147, "step": 5852 }, { "epoch": 0.75, "grad_norm": 0.9277156991105695, "learning_rate": 3.2046853907876226e-07, "loss": 0.6315, "step": 5853 }, { "epoch": 0.75, "grad_norm": 0.6645947637164459, "learning_rate": 3.20165849272102e-07, "loss": 0.526, "step": 5854 }, { "epoch": 0.75, "grad_norm": 0.630829437090905, "learning_rate": 3.198632752341154e-07, "loss": 0.5276, "step": 5855 }, { "epoch": 0.75, "grad_norm": 0.9585301332652266, "learning_rate": 3.1956081701632744e-07, "loss": 0.6518, "step": 5856 }, { "epoch": 0.75, "grad_norm": 0.8075130004812773, "learning_rate": 3.192584746702435e-07, "loss": 0.5808, "step": 5857 }, { "epoch": 0.75, "grad_norm": 0.896658701161667, "learning_rate": 3.1895624824734944e-07, "loss": 0.6567, "step": 5858 }, { "epoch": 0.75, "grad_norm": 0.6993892189305252, "learning_rate": 3.186541377991113e-07, "loss": 0.572, "step": 5859 }, { "epoch": 0.75, "grad_norm": 0.9111677077086835, "learning_rate": 3.183521433769752e-07, "loss": 0.6346, "step": 5860 }, { "epoch": 0.75, "grad_norm": 0.8241060621320211, "learning_rate": 3.1805026503236786e-07, "loss": 0.5864, "step": 5861 }, { "epoch": 0.75, "grad_norm": 0.8453980519869486, "learning_rate": 3.1774850281669574e-07, "loss": 0.5797, "step": 5862 }, { "epoch": 0.75, "grad_norm": 0.6837030792523212, "learning_rate": 3.174468567813461e-07, "loss": 0.539, "step": 5863 }, { "epoch": 0.75, "grad_norm": 0.7145268606034808, "learning_rate": 3.171453269776858e-07, "loss": 0.523, "step": 5864 }, { "epoch": 0.75, "grad_norm": 0.873029284981752, "learning_rate": 3.168439134570625e-07, "loss": 0.602, "step": 5865 }, { "epoch": 0.75, "grad_norm": 0.9084347916469823, "learning_rate": 3.165426162708036e-07, "loss": 0.6118, "step": 5866 }, { "epoch": 0.75, "grad_norm": 0.7668492368718475, "learning_rate": 3.16241435470217e-07, "loss": 0.5235, "step": 5867 }, { "epoch": 0.75, "grad_norm": 0.9148247845735581, "learning_rate": 3.1594037110659055e-07, "loss": 0.6085, "step": 5868 }, { "epoch": 0.75, "grad_norm": 0.7505909157839912, "learning_rate": 3.156394232311924e-07, "loss": 0.5847, "step": 5869 }, { "epoch": 0.75, "grad_norm": 0.972669951420328, "learning_rate": 3.1533859189527056e-07, "loss": 0.6468, "step": 5870 }, { "epoch": 0.75, "grad_norm": 0.7073165524131723, "learning_rate": 3.1503787715005413e-07, "loss": 0.5613, "step": 5871 }, { "epoch": 0.75, "grad_norm": 0.7364884884168592, "learning_rate": 3.147372790467513e-07, "loss": 0.5884, "step": 5872 }, { "epoch": 0.75, "grad_norm": 0.8198146454217182, "learning_rate": 3.1443679763655085e-07, "loss": 0.6002, "step": 5873 }, { "epoch": 0.75, "grad_norm": 0.640736354809276, "learning_rate": 3.141364329706215e-07, "loss": 0.5498, "step": 5874 }, { "epoch": 0.75, "grad_norm": 0.8282411289472325, "learning_rate": 3.1383618510011256e-07, "loss": 0.6634, "step": 5875 }, { "epoch": 0.75, "grad_norm": 0.6898672165851946, "learning_rate": 3.135360540761527e-07, "loss": 0.543, "step": 5876 }, { "epoch": 0.75, "grad_norm": 0.7772202242358836, "learning_rate": 3.1323603994985136e-07, "loss": 0.484, "step": 5877 }, { "epoch": 0.75, "grad_norm": 0.7160851177015373, "learning_rate": 3.1293614277229783e-07, "loss": 0.5489, "step": 5878 }, { "epoch": 0.75, "grad_norm": 3.470634077254529, "learning_rate": 3.126363625945614e-07, "loss": 0.6202, "step": 5879 }, { "epoch": 0.75, "grad_norm": 0.6962478631085542, "learning_rate": 3.1233669946769146e-07, "loss": 0.5654, "step": 5880 }, { "epoch": 0.75, "grad_norm": 1.2372474509010094, "learning_rate": 3.1203715344271763e-07, "loss": 0.5885, "step": 5881 }, { "epoch": 0.75, "grad_norm": 0.6905374522776376, "learning_rate": 3.117377245706494e-07, "loss": 0.5601, "step": 5882 }, { "epoch": 0.75, "grad_norm": 0.6446096421491658, "learning_rate": 3.114384129024763e-07, "loss": 0.5369, "step": 5883 }, { "epoch": 0.75, "grad_norm": 0.8597559308851518, "learning_rate": 3.1113921848916824e-07, "loss": 0.5895, "step": 5884 }, { "epoch": 0.75, "grad_norm": 0.8772365787598089, "learning_rate": 3.108401413816747e-07, "loss": 0.6113, "step": 5885 }, { "epoch": 0.75, "grad_norm": 0.9534231117900916, "learning_rate": 3.105411816309252e-07, "loss": 0.6511, "step": 5886 }, { "epoch": 0.75, "grad_norm": 0.6426617550519783, "learning_rate": 3.1024233928783007e-07, "loss": 0.5189, "step": 5887 }, { "epoch": 0.75, "grad_norm": 0.7991458684372903, "learning_rate": 3.0994361440327865e-07, "loss": 0.6336, "step": 5888 }, { "epoch": 0.75, "grad_norm": 0.7863653407208486, "learning_rate": 3.096450070281408e-07, "loss": 0.559, "step": 5889 }, { "epoch": 0.75, "grad_norm": 0.8139991693851578, "learning_rate": 3.093465172132661e-07, "loss": 0.6072, "step": 5890 }, { "epoch": 0.75, "grad_norm": 0.7531112597954773, "learning_rate": 3.090481450094844e-07, "loss": 0.5563, "step": 5891 }, { "epoch": 0.75, "grad_norm": 0.7928777938975712, "learning_rate": 3.0874989046760537e-07, "loss": 0.6599, "step": 5892 }, { "epoch": 0.75, "grad_norm": 0.8305517739709349, "learning_rate": 3.0845175363841857e-07, "loss": 0.6036, "step": 5893 }, { "epoch": 0.75, "grad_norm": 0.8707600837671162, "learning_rate": 3.0815373457269354e-07, "loss": 0.5195, "step": 5894 }, { "epoch": 0.75, "grad_norm": 0.6826618122513075, "learning_rate": 3.0785583332118005e-07, "loss": 0.566, "step": 5895 }, { "epoch": 0.75, "grad_norm": 0.9580070604257871, "learning_rate": 3.075580499346073e-07, "loss": 0.6318, "step": 5896 }, { "epoch": 0.75, "grad_norm": 0.9024557428755139, "learning_rate": 3.07260384463685e-07, "loss": 0.643, "step": 5897 }, { "epoch": 0.75, "grad_norm": 0.9010132672597385, "learning_rate": 3.069628369591022e-07, "loss": 0.6662, "step": 5898 }, { "epoch": 0.75, "grad_norm": 0.8653428609624886, "learning_rate": 3.0666540747152825e-07, "loss": 0.6094, "step": 5899 }, { "epoch": 0.75, "grad_norm": 0.7567622702802866, "learning_rate": 3.063680960516123e-07, "loss": 0.5387, "step": 5900 }, { "epoch": 0.75, "grad_norm": 0.7058286972326075, "learning_rate": 3.060709027499835e-07, "loss": 0.5688, "step": 5901 }, { "epoch": 0.75, "grad_norm": 0.7099711315485172, "learning_rate": 3.0577382761725057e-07, "loss": 0.5554, "step": 5902 }, { "epoch": 0.75, "grad_norm": 1.3374451369285574, "learning_rate": 3.054768707040024e-07, "loss": 0.6636, "step": 5903 }, { "epoch": 0.75, "grad_norm": 0.7678088930422041, "learning_rate": 3.0518003206080764e-07, "loss": 0.5435, "step": 5904 }, { "epoch": 0.75, "grad_norm": 0.8057889743666774, "learning_rate": 3.048833117382149e-07, "loss": 0.6084, "step": 5905 }, { "epoch": 0.75, "grad_norm": 0.789402270116011, "learning_rate": 3.045867097867525e-07, "loss": 0.6166, "step": 5906 }, { "epoch": 0.75, "grad_norm": 0.6010092065464289, "learning_rate": 3.0429022625692855e-07, "loss": 0.521, "step": 5907 }, { "epoch": 0.75, "grad_norm": 0.7685883267794263, "learning_rate": 3.039938611992313e-07, "loss": 0.5646, "step": 5908 }, { "epoch": 0.75, "grad_norm": 0.7271137045588643, "learning_rate": 3.036976146641286e-07, "loss": 0.5217, "step": 5909 }, { "epoch": 0.75, "grad_norm": 0.8723033513420435, "learning_rate": 3.03401486702068e-07, "loss": 0.6198, "step": 5910 }, { "epoch": 0.75, "grad_norm": 0.6420631709705109, "learning_rate": 3.0310547736347716e-07, "loss": 0.5385, "step": 5911 }, { "epoch": 0.75, "grad_norm": 0.7285856950327538, "learning_rate": 3.02809586698763e-07, "loss": 0.508, "step": 5912 }, { "epoch": 0.75, "grad_norm": 0.9364729073239833, "learning_rate": 3.025138147583133e-07, "loss": 0.6238, "step": 5913 }, { "epoch": 0.75, "grad_norm": 0.672512677048132, "learning_rate": 3.022181615924945e-07, "loss": 0.5078, "step": 5914 }, { "epoch": 0.75, "grad_norm": 0.7286426086039016, "learning_rate": 3.0192262725165343e-07, "loss": 0.5476, "step": 5915 }, { "epoch": 0.75, "grad_norm": 0.6051222656158866, "learning_rate": 3.016272117861164e-07, "loss": 0.5213, "step": 5916 }, { "epoch": 0.75, "grad_norm": 0.7458263641943669, "learning_rate": 3.013319152461895e-07, "loss": 0.5218, "step": 5917 }, { "epoch": 0.75, "grad_norm": 0.5798087896752275, "learning_rate": 3.0103673768215885e-07, "loss": 0.5144, "step": 5918 }, { "epoch": 0.75, "grad_norm": 0.8593932169101195, "learning_rate": 3.0074167914429003e-07, "loss": 0.6413, "step": 5919 }, { "epoch": 0.75, "grad_norm": 1.1249442734507533, "learning_rate": 3.004467396828285e-07, "loss": 0.6472, "step": 5920 }, { "epoch": 0.75, "grad_norm": 1.1032764200996437, "learning_rate": 3.001519193479992e-07, "loss": 0.6142, "step": 5921 }, { "epoch": 0.75, "grad_norm": 0.7960672483162087, "learning_rate": 2.9985721819000707e-07, "loss": 0.5517, "step": 5922 }, { "epoch": 0.75, "grad_norm": 0.8905330011675561, "learning_rate": 2.9956263625903675e-07, "loss": 0.6225, "step": 5923 }, { "epoch": 0.75, "grad_norm": 0.7375109057317656, "learning_rate": 2.9926817360525226e-07, "loss": 0.5631, "step": 5924 }, { "epoch": 0.75, "grad_norm": 0.7315892574651534, "learning_rate": 2.989738302787977e-07, "loss": 0.5022, "step": 5925 }, { "epoch": 0.75, "grad_norm": 0.8902411162171869, "learning_rate": 2.986796063297966e-07, "loss": 0.6337, "step": 5926 }, { "epoch": 0.76, "grad_norm": 0.6648039128691491, "learning_rate": 2.983855018083522e-07, "loss": 0.5424, "step": 5927 }, { "epoch": 0.76, "grad_norm": 0.7350085097037302, "learning_rate": 2.980915167645476e-07, "loss": 0.5902, "step": 5928 }, { "epoch": 0.76, "grad_norm": 0.8940501390351756, "learning_rate": 2.977976512484451e-07, "loss": 0.6454, "step": 5929 }, { "epoch": 0.76, "grad_norm": 0.6993363739562425, "learning_rate": 2.9750390531008716e-07, "loss": 0.54, "step": 5930 }, { "epoch": 0.76, "grad_norm": 0.6200667335529357, "learning_rate": 2.972102789994956e-07, "loss": 0.5284, "step": 5931 }, { "epoch": 0.76, "grad_norm": 0.7822235653729906, "learning_rate": 2.969167723666719e-07, "loss": 0.5475, "step": 5932 }, { "epoch": 0.76, "grad_norm": 0.8330225515108399, "learning_rate": 2.966233854615973e-07, "loss": 0.6172, "step": 5933 }, { "epoch": 0.76, "grad_norm": 0.6636067570718531, "learning_rate": 2.963301183342323e-07, "loss": 0.5242, "step": 5934 }, { "epoch": 0.76, "grad_norm": 0.9121945330320309, "learning_rate": 2.960369710345174e-07, "loss": 0.6105, "step": 5935 }, { "epoch": 0.76, "grad_norm": 0.8167515517254055, "learning_rate": 2.9574394361237256e-07, "loss": 0.5645, "step": 5936 }, { "epoch": 0.76, "grad_norm": 0.812513448596765, "learning_rate": 2.9545103611769715e-07, "loss": 0.6111, "step": 5937 }, { "epoch": 0.76, "grad_norm": 0.9495300615855655, "learning_rate": 2.9515824860037007e-07, "loss": 0.616, "step": 5938 }, { "epoch": 0.76, "grad_norm": 0.6913689970084194, "learning_rate": 2.948655811102506e-07, "loss": 0.5371, "step": 5939 }, { "epoch": 0.76, "grad_norm": 0.8802937277625881, "learning_rate": 2.945730336971767e-07, "loss": 0.6405, "step": 5940 }, { "epoch": 0.76, "grad_norm": 0.7373584911815954, "learning_rate": 2.9428060641096597e-07, "loss": 0.514, "step": 5941 }, { "epoch": 0.76, "grad_norm": 0.7069808947771954, "learning_rate": 2.939882993014158e-07, "loss": 0.5611, "step": 5942 }, { "epoch": 0.76, "grad_norm": 0.8262990620670041, "learning_rate": 2.9369611241830317e-07, "loss": 0.6475, "step": 5943 }, { "epoch": 0.76, "grad_norm": 0.9805695536166312, "learning_rate": 2.934040458113843e-07, "loss": 0.6657, "step": 5944 }, { "epoch": 0.76, "grad_norm": 0.8416006490534759, "learning_rate": 2.931120995303953e-07, "loss": 0.6377, "step": 5945 }, { "epoch": 0.76, "grad_norm": 0.7024144332158757, "learning_rate": 2.928202736250514e-07, "loss": 0.561, "step": 5946 }, { "epoch": 0.76, "grad_norm": 0.8380097521976135, "learning_rate": 2.9252856814504744e-07, "loss": 0.6261, "step": 5947 }, { "epoch": 0.76, "grad_norm": 0.6676750358608934, "learning_rate": 2.9223698314005795e-07, "loss": 0.6036, "step": 5948 }, { "epoch": 0.76, "grad_norm": 0.7503804444841885, "learning_rate": 2.9194551865973693e-07, "loss": 0.5397, "step": 5949 }, { "epoch": 0.76, "grad_norm": 0.7185406873018652, "learning_rate": 2.9165417475371755e-07, "loss": 0.5518, "step": 5950 }, { "epoch": 0.76, "grad_norm": 0.9515600478127305, "learning_rate": 2.913629514716127e-07, "loss": 0.6256, "step": 5951 }, { "epoch": 0.76, "grad_norm": 0.8155721731434047, "learning_rate": 2.910718488630147e-07, "loss": 0.5559, "step": 5952 }, { "epoch": 0.76, "grad_norm": 0.8346433566380455, "learning_rate": 2.907808669774954e-07, "loss": 0.5992, "step": 5953 }, { "epoch": 0.76, "grad_norm": 0.8681765162946844, "learning_rate": 2.904900058646057e-07, "loss": 0.6027, "step": 5954 }, { "epoch": 0.76, "grad_norm": 0.8985961344220704, "learning_rate": 2.9019926557387655e-07, "loss": 0.6296, "step": 5955 }, { "epoch": 0.76, "grad_norm": 0.8250808935886176, "learning_rate": 2.899086461548177e-07, "loss": 0.6135, "step": 5956 }, { "epoch": 0.76, "grad_norm": 0.8543350021824618, "learning_rate": 2.896181476569187e-07, "loss": 0.5992, "step": 5957 }, { "epoch": 0.76, "grad_norm": 0.6290483916150993, "learning_rate": 2.8932777012964847e-07, "loss": 0.5285, "step": 5958 }, { "epoch": 0.76, "grad_norm": 0.7897683100273575, "learning_rate": 2.890375136224553e-07, "loss": 0.5494, "step": 5959 }, { "epoch": 0.76, "grad_norm": 0.6985844366504481, "learning_rate": 2.887473781847668e-07, "loss": 0.5162, "step": 5960 }, { "epoch": 0.76, "grad_norm": 0.8868476607320602, "learning_rate": 2.8845736386598996e-07, "loss": 0.6152, "step": 5961 }, { "epoch": 0.76, "grad_norm": 0.661612283398571, "learning_rate": 2.881674707155114e-07, "loss": 0.5509, "step": 5962 }, { "epoch": 0.76, "grad_norm": 0.6592782315845923, "learning_rate": 2.8787769878269663e-07, "loss": 0.5782, "step": 5963 }, { "epoch": 0.76, "grad_norm": 0.6621554586034782, "learning_rate": 2.87588048116891e-07, "loss": 0.5321, "step": 5964 }, { "epoch": 0.76, "grad_norm": 0.6302506449920465, "learning_rate": 2.8729851876741853e-07, "loss": 0.5387, "step": 5965 }, { "epoch": 0.76, "grad_norm": 0.7597814475514004, "learning_rate": 2.870091107835838e-07, "loss": 0.5877, "step": 5966 }, { "epoch": 0.76, "grad_norm": 0.8048621459678099, "learning_rate": 2.8671982421466955e-07, "loss": 0.5961, "step": 5967 }, { "epoch": 0.76, "grad_norm": 0.8260896335850648, "learning_rate": 2.864306591099385e-07, "loss": 0.6144, "step": 5968 }, { "epoch": 0.76, "grad_norm": 0.6679263097156994, "learning_rate": 2.8614161551863216e-07, "loss": 0.5776, "step": 5969 }, { "epoch": 0.76, "grad_norm": 0.8429044437378839, "learning_rate": 2.858526934899718e-07, "loss": 0.5991, "step": 5970 }, { "epoch": 0.76, "grad_norm": 0.6192099758622432, "learning_rate": 2.8556389307315777e-07, "loss": 0.5455, "step": 5971 }, { "epoch": 0.76, "grad_norm": 0.6656384729197151, "learning_rate": 2.8527521431736956e-07, "loss": 0.5413, "step": 5972 }, { "epoch": 0.76, "grad_norm": 0.8303735985887707, "learning_rate": 2.849866572717666e-07, "loss": 0.5686, "step": 5973 }, { "epoch": 0.76, "grad_norm": 0.6814074621153496, "learning_rate": 2.846982219854871e-07, "loss": 0.5108, "step": 5974 }, { "epoch": 0.76, "grad_norm": 0.9248419250513557, "learning_rate": 2.8440990850764837e-07, "loss": 0.6285, "step": 5975 }, { "epoch": 0.76, "grad_norm": 0.7171224594582833, "learning_rate": 2.841217168873472e-07, "loss": 0.5253, "step": 5976 }, { "epoch": 0.76, "grad_norm": 1.3774248509425957, "learning_rate": 2.8383364717365977e-07, "loss": 0.5233, "step": 5977 }, { "epoch": 0.76, "grad_norm": 0.8276614614456784, "learning_rate": 2.835456994156412e-07, "loss": 0.6004, "step": 5978 }, { "epoch": 0.76, "grad_norm": 0.7087019241257078, "learning_rate": 2.832578736623261e-07, "loss": 0.5144, "step": 5979 }, { "epoch": 0.76, "grad_norm": 0.6276359430334197, "learning_rate": 2.8297016996272815e-07, "loss": 0.5452, "step": 5980 }, { "epoch": 0.76, "grad_norm": 0.8217087958089072, "learning_rate": 2.8268258836584026e-07, "loss": 0.532, "step": 5981 }, { "epoch": 0.76, "grad_norm": 0.7237063868456727, "learning_rate": 2.8239512892063465e-07, "loss": 0.553, "step": 5982 }, { "epoch": 0.76, "grad_norm": 0.7978746153924494, "learning_rate": 2.821077916760626e-07, "loss": 0.511, "step": 5983 }, { "epoch": 0.76, "grad_norm": 0.801993817746113, "learning_rate": 2.8182057668105477e-07, "loss": 0.5745, "step": 5984 }, { "epoch": 0.76, "grad_norm": 0.9009017945670311, "learning_rate": 2.815334839845207e-07, "loss": 0.601, "step": 5985 }, { "epoch": 0.76, "grad_norm": 0.6382229369215264, "learning_rate": 2.8124651363534937e-07, "loss": 0.5638, "step": 5986 }, { "epoch": 0.76, "grad_norm": 0.6531849683090609, "learning_rate": 2.809596656824088e-07, "loss": 0.5314, "step": 5987 }, { "epoch": 0.76, "grad_norm": 0.858003054363273, "learning_rate": 2.8067294017454634e-07, "loss": 0.606, "step": 5988 }, { "epoch": 0.76, "grad_norm": 0.6534372121008329, "learning_rate": 2.803863371605883e-07, "loss": 0.5379, "step": 5989 }, { "epoch": 0.76, "grad_norm": 0.6780509901106644, "learning_rate": 2.800998566893401e-07, "loss": 0.567, "step": 5990 }, { "epoch": 0.76, "grad_norm": 0.7245963483016915, "learning_rate": 2.7981349880958647e-07, "loss": 0.6026, "step": 5991 }, { "epoch": 0.76, "grad_norm": 0.6834542212246184, "learning_rate": 2.7952726357009117e-07, "loss": 0.5741, "step": 5992 }, { "epoch": 0.76, "grad_norm": 0.7117522101772877, "learning_rate": 2.79241151019597e-07, "loss": 0.5602, "step": 5993 }, { "epoch": 0.76, "grad_norm": 0.7829435136749733, "learning_rate": 2.7895516120682604e-07, "loss": 0.5894, "step": 5994 }, { "epoch": 0.76, "grad_norm": 0.8552278471823893, "learning_rate": 2.7866929418047924e-07, "loss": 0.5855, "step": 5995 }, { "epoch": 0.76, "grad_norm": 0.7876571787609605, "learning_rate": 2.7838354998923696e-07, "loss": 0.6259, "step": 5996 }, { "epoch": 0.76, "grad_norm": 0.7468719952193309, "learning_rate": 2.7809792868175806e-07, "loss": 0.6015, "step": 5997 }, { "epoch": 0.76, "grad_norm": 0.6653389532943565, "learning_rate": 2.7781243030668146e-07, "loss": 0.5129, "step": 5998 }, { "epoch": 0.76, "grad_norm": 0.7302850748586067, "learning_rate": 2.775270549126243e-07, "loss": 0.5613, "step": 5999 }, { "epoch": 0.76, "grad_norm": 0.8012297022188196, "learning_rate": 2.772418025481831e-07, "loss": 0.5791, "step": 6000 }, { "epoch": 0.76, "grad_norm": 0.7712472965593518, "learning_rate": 2.7695667326193315e-07, "loss": 0.6132, "step": 6001 }, { "epoch": 0.76, "grad_norm": 0.9609163877266168, "learning_rate": 2.7667166710242917e-07, "loss": 0.621, "step": 6002 }, { "epoch": 0.76, "grad_norm": 0.7093871269858573, "learning_rate": 2.763867841182047e-07, "loss": 0.5344, "step": 6003 }, { "epoch": 0.76, "grad_norm": 0.8373493860580146, "learning_rate": 2.761020243577724e-07, "loss": 0.6002, "step": 6004 }, { "epoch": 0.77, "grad_norm": 0.8757701649812755, "learning_rate": 2.7581738786962383e-07, "loss": 0.6883, "step": 6005 }, { "epoch": 0.77, "grad_norm": 0.8850009422470035, "learning_rate": 2.755328747022296e-07, "loss": 0.6528, "step": 6006 }, { "epoch": 0.77, "grad_norm": 0.6119258812233408, "learning_rate": 2.752484849040394e-07, "loss": 0.5361, "step": 6007 }, { "epoch": 0.77, "grad_norm": 0.9181877129155992, "learning_rate": 2.7496421852348174e-07, "loss": 0.6422, "step": 6008 }, { "epoch": 0.77, "grad_norm": 0.9390595934923341, "learning_rate": 2.7468007560896436e-07, "loss": 0.6581, "step": 6009 }, { "epoch": 0.77, "grad_norm": 0.599311594049132, "learning_rate": 2.7439605620887373e-07, "loss": 0.4629, "step": 6010 }, { "epoch": 0.77, "grad_norm": 0.6323790413306916, "learning_rate": 2.7411216037157547e-07, "loss": 0.5128, "step": 6011 }, { "epoch": 0.77, "grad_norm": 0.8333163902664827, "learning_rate": 2.738283881454141e-07, "loss": 0.6086, "step": 6012 }, { "epoch": 0.77, "grad_norm": 0.7831915336004608, "learning_rate": 2.7354473957871296e-07, "loss": 0.6011, "step": 6013 }, { "epoch": 0.77, "grad_norm": 1.0086546721777832, "learning_rate": 2.7326121471977446e-07, "loss": 0.6588, "step": 6014 }, { "epoch": 0.77, "grad_norm": 0.6512889296463563, "learning_rate": 2.7297781361688013e-07, "loss": 0.5406, "step": 6015 }, { "epoch": 0.77, "grad_norm": 0.7056414248507044, "learning_rate": 2.7269453631828997e-07, "loss": 0.5633, "step": 6016 }, { "epoch": 0.77, "grad_norm": 0.685780711374309, "learning_rate": 2.724113828722433e-07, "loss": 0.5203, "step": 6017 }, { "epoch": 0.77, "grad_norm": 0.95818693964581, "learning_rate": 2.7212835332695816e-07, "loss": 0.6883, "step": 6018 }, { "epoch": 0.77, "grad_norm": 1.1733131071209184, "learning_rate": 2.7184544773063155e-07, "loss": 0.6331, "step": 6019 }, { "epoch": 0.77, "grad_norm": 0.8648006787176854, "learning_rate": 2.715626661314393e-07, "loss": 0.618, "step": 6020 }, { "epoch": 0.77, "grad_norm": 0.9174846417979671, "learning_rate": 2.712800085775362e-07, "loss": 0.6733, "step": 6021 }, { "epoch": 0.77, "grad_norm": 0.9044695136020101, "learning_rate": 2.7099747511705596e-07, "loss": 0.6042, "step": 6022 }, { "epoch": 0.77, "grad_norm": 0.7249793149771113, "learning_rate": 2.707150657981107e-07, "loss": 0.5477, "step": 6023 }, { "epoch": 0.77, "grad_norm": 0.9120084937602344, "learning_rate": 2.704327806687923e-07, "loss": 0.5806, "step": 6024 }, { "epoch": 0.77, "grad_norm": 0.7179680252188136, "learning_rate": 2.7015061977717077e-07, "loss": 0.5615, "step": 6025 }, { "epoch": 0.77, "grad_norm": 0.8767730419977466, "learning_rate": 2.698685831712952e-07, "loss": 0.5966, "step": 6026 }, { "epoch": 0.77, "grad_norm": 0.6030170582314912, "learning_rate": 2.695866708991936e-07, "loss": 0.5098, "step": 6027 }, { "epoch": 0.77, "grad_norm": 0.7998701878114364, "learning_rate": 2.6930488300887245e-07, "loss": 0.6114, "step": 6028 }, { "epoch": 0.77, "grad_norm": 0.5926202499380043, "learning_rate": 2.690232195483174e-07, "loss": 0.5378, "step": 6029 }, { "epoch": 0.77, "grad_norm": 0.7452987961358355, "learning_rate": 2.6874168056549287e-07, "loss": 0.5214, "step": 6030 }, { "epoch": 0.77, "grad_norm": 0.8779067594455152, "learning_rate": 2.6846026610834184e-07, "loss": 0.6497, "step": 6031 }, { "epoch": 0.77, "grad_norm": 0.8918113440799582, "learning_rate": 2.6817897622478635e-07, "loss": 0.6204, "step": 6032 }, { "epoch": 0.77, "grad_norm": 0.6555849519456907, "learning_rate": 2.678978109627272e-07, "loss": 0.5411, "step": 6033 }, { "epoch": 0.77, "grad_norm": 0.82334238102189, "learning_rate": 2.676167703700439e-07, "loss": 0.6289, "step": 6034 }, { "epoch": 0.77, "grad_norm": 0.9163059483655831, "learning_rate": 2.673358544945946e-07, "loss": 0.619, "step": 6035 }, { "epoch": 0.77, "grad_norm": 0.7993175819126311, "learning_rate": 2.670550633842166e-07, "loss": 0.5785, "step": 6036 }, { "epoch": 0.77, "grad_norm": 0.6472885092900198, "learning_rate": 2.6677439708672544e-07, "loss": 0.5259, "step": 6037 }, { "epoch": 0.77, "grad_norm": 0.673458237520553, "learning_rate": 2.664938556499158e-07, "loss": 0.5284, "step": 6038 }, { "epoch": 0.77, "grad_norm": 0.72758721732903, "learning_rate": 2.662134391215608e-07, "loss": 0.585, "step": 6039 }, { "epoch": 0.77, "grad_norm": 0.8546450319004737, "learning_rate": 2.6593314754941276e-07, "loss": 0.609, "step": 6040 }, { "epoch": 0.77, "grad_norm": 0.7430057513295224, "learning_rate": 2.6565298098120213e-07, "loss": 0.4881, "step": 6041 }, { "epoch": 0.77, "grad_norm": 1.0513023731653521, "learning_rate": 2.6537293946463846e-07, "loss": 0.6306, "step": 6042 }, { "epoch": 0.77, "grad_norm": 0.7072360416962058, "learning_rate": 2.6509302304741e-07, "loss": 0.5034, "step": 6043 }, { "epoch": 0.77, "grad_norm": 0.7029103184322868, "learning_rate": 2.648132317771834e-07, "loss": 0.5379, "step": 6044 }, { "epoch": 0.77, "grad_norm": 0.6968776175618545, "learning_rate": 2.6453356570160445e-07, "loss": 0.5981, "step": 6045 }, { "epoch": 0.77, "grad_norm": 0.9282120789079186, "learning_rate": 2.6425402486829706e-07, "loss": 0.5668, "step": 6046 }, { "epoch": 0.77, "grad_norm": 0.7024100666034648, "learning_rate": 2.639746093248644e-07, "loss": 0.5308, "step": 6047 }, { "epoch": 0.77, "grad_norm": 0.7797537546210808, "learning_rate": 2.636953191188879e-07, "loss": 0.6172, "step": 6048 }, { "epoch": 0.77, "grad_norm": 0.8477346694084857, "learning_rate": 2.634161542979275e-07, "loss": 0.6261, "step": 6049 }, { "epoch": 0.77, "grad_norm": 0.6226113146710814, "learning_rate": 2.631371149095226e-07, "loss": 0.5089, "step": 6050 }, { "epoch": 0.77, "grad_norm": 0.6862058312423902, "learning_rate": 2.628582010011905e-07, "loss": 0.5606, "step": 6051 }, { "epoch": 0.77, "grad_norm": 0.6596465176514164, "learning_rate": 2.625794126204273e-07, "loss": 0.5599, "step": 6052 }, { "epoch": 0.77, "grad_norm": 0.706557169153166, "learning_rate": 2.623007498147076e-07, "loss": 0.5908, "step": 6053 }, { "epoch": 0.77, "grad_norm": 1.2337620003495469, "learning_rate": 2.620222126314851e-07, "loss": 0.6388, "step": 6054 }, { "epoch": 0.77, "grad_norm": 0.8286792519104608, "learning_rate": 2.6174380111819136e-07, "loss": 0.6585, "step": 6055 }, { "epoch": 0.77, "grad_norm": 0.6660888805168419, "learning_rate": 2.614655153222374e-07, "loss": 0.5667, "step": 6056 }, { "epoch": 0.77, "grad_norm": 0.84946454983359, "learning_rate": 2.611873552910121e-07, "loss": 0.6307, "step": 6057 }, { "epoch": 0.77, "grad_norm": 0.6954618567056445, "learning_rate": 2.6090932107188334e-07, "loss": 0.567, "step": 6058 }, { "epoch": 0.77, "grad_norm": 0.6363999654044589, "learning_rate": 2.6063141271219733e-07, "loss": 0.5084, "step": 6059 }, { "epoch": 0.77, "grad_norm": 0.8863282394633892, "learning_rate": 2.603536302592791e-07, "loss": 0.6457, "step": 6060 }, { "epoch": 0.77, "grad_norm": 0.7716979753587137, "learning_rate": 2.6007597376043213e-07, "loss": 0.6087, "step": 6061 }, { "epoch": 0.77, "grad_norm": 0.8491321496740809, "learning_rate": 2.597984432629382e-07, "loss": 0.5783, "step": 6062 }, { "epoch": 0.77, "grad_norm": 0.7575038963855081, "learning_rate": 2.5952103881405816e-07, "loss": 0.5343, "step": 6063 }, { "epoch": 0.77, "grad_norm": 0.8790303573683372, "learning_rate": 2.592437604610309e-07, "loss": 0.5781, "step": 6064 }, { "epoch": 0.77, "grad_norm": 0.7175466448177668, "learning_rate": 2.589666082510741e-07, "loss": 0.5735, "step": 6065 }, { "epoch": 0.77, "grad_norm": 0.9008488225721069, "learning_rate": 2.586895822313838e-07, "loss": 0.6768, "step": 6066 }, { "epoch": 0.77, "grad_norm": 0.7813207459569818, "learning_rate": 2.584126824491348e-07, "loss": 0.5979, "step": 6067 }, { "epoch": 0.77, "grad_norm": 0.7536466706599362, "learning_rate": 2.581359089514802e-07, "loss": 0.5722, "step": 6068 }, { "epoch": 0.77, "grad_norm": 0.6098455091943951, "learning_rate": 2.578592617855516e-07, "loss": 0.5599, "step": 6069 }, { "epoch": 0.77, "grad_norm": 0.7379375632818016, "learning_rate": 2.5758274099845924e-07, "loss": 0.5646, "step": 6070 }, { "epoch": 0.77, "grad_norm": 1.1268492324067776, "learning_rate": 2.5730634663729155e-07, "loss": 0.5671, "step": 6071 }, { "epoch": 0.77, "grad_norm": 0.9876355433443764, "learning_rate": 2.5703007874911566e-07, "loss": 0.5809, "step": 6072 }, { "epoch": 0.77, "grad_norm": 0.8503826355065556, "learning_rate": 2.5675393738097695e-07, "loss": 0.5519, "step": 6073 }, { "epoch": 0.77, "grad_norm": 0.7633957151592401, "learning_rate": 2.564779225798999e-07, "loss": 0.5927, "step": 6074 }, { "epoch": 0.77, "grad_norm": 0.7498349488460568, "learning_rate": 2.5620203439288654e-07, "loss": 0.5553, "step": 6075 }, { "epoch": 0.77, "grad_norm": 0.7646240192835654, "learning_rate": 2.55926272866918e-07, "loss": 0.5602, "step": 6076 }, { "epoch": 0.77, "grad_norm": 0.6923670154849167, "learning_rate": 2.556506380489536e-07, "loss": 0.5248, "step": 6077 }, { "epoch": 0.77, "grad_norm": 0.5882822932529663, "learning_rate": 2.553751299859308e-07, "loss": 0.5038, "step": 6078 }, { "epoch": 0.77, "grad_norm": 0.7184045870360668, "learning_rate": 2.550997487247659e-07, "loss": 0.614, "step": 6079 }, { "epoch": 0.77, "grad_norm": 0.7396086174753792, "learning_rate": 2.5482449431235367e-07, "loss": 0.5546, "step": 6080 }, { "epoch": 0.77, "grad_norm": 0.839498982699552, "learning_rate": 2.5454936679556637e-07, "loss": 0.6348, "step": 6081 }, { "epoch": 0.77, "grad_norm": 1.2844836248116014, "learning_rate": 2.542743662212563e-07, "loss": 0.6028, "step": 6082 }, { "epoch": 0.77, "grad_norm": 0.8626961173870437, "learning_rate": 2.5399949263625264e-07, "loss": 0.6127, "step": 6083 }, { "epoch": 0.78, "grad_norm": 0.7169707515493922, "learning_rate": 2.537247460873637e-07, "loss": 0.5375, "step": 6084 }, { "epoch": 0.78, "grad_norm": 0.9177461478876875, "learning_rate": 2.5345012662137576e-07, "loss": 0.6239, "step": 6085 }, { "epoch": 0.78, "grad_norm": 0.86032458799023, "learning_rate": 2.5317563428505384e-07, "loss": 0.5523, "step": 6086 }, { "epoch": 0.78, "grad_norm": 0.6310966666878378, "learning_rate": 2.529012691251411e-07, "loss": 0.5294, "step": 6087 }, { "epoch": 0.78, "grad_norm": 0.8590898962341832, "learning_rate": 2.5262703118835893e-07, "loss": 0.6141, "step": 6088 }, { "epoch": 0.78, "grad_norm": 0.6990846512341045, "learning_rate": 2.5235292052140724e-07, "loss": 0.539, "step": 6089 }, { "epoch": 0.78, "grad_norm": 0.711640847688368, "learning_rate": 2.520789371709643e-07, "loss": 0.5281, "step": 6090 }, { "epoch": 0.78, "grad_norm": 0.9508934633138567, "learning_rate": 2.518050811836865e-07, "loss": 0.6899, "step": 6091 }, { "epoch": 0.78, "grad_norm": 0.8414401434256702, "learning_rate": 2.5153135260620884e-07, "loss": 0.5984, "step": 6092 }, { "epoch": 0.78, "grad_norm": 0.8030659867717226, "learning_rate": 2.512577514851443e-07, "loss": 0.5571, "step": 6093 }, { "epoch": 0.78, "grad_norm": 0.682602018310188, "learning_rate": 2.509842778670843e-07, "loss": 0.5477, "step": 6094 }, { "epoch": 0.78, "grad_norm": 0.7733376679539254, "learning_rate": 2.507109317985986e-07, "loss": 0.5766, "step": 6095 }, { "epoch": 0.78, "grad_norm": 0.653894245541389, "learning_rate": 2.504377133262352e-07, "loss": 0.5307, "step": 6096 }, { "epoch": 0.78, "grad_norm": 0.8025611776222085, "learning_rate": 2.501646224965204e-07, "loss": 0.5452, "step": 6097 }, { "epoch": 0.78, "grad_norm": 0.9064154088879603, "learning_rate": 2.4989165935595856e-07, "loss": 0.6348, "step": 6098 }, { "epoch": 0.78, "grad_norm": 0.9701378869557915, "learning_rate": 2.496188239510326e-07, "loss": 0.6102, "step": 6099 }, { "epoch": 0.78, "grad_norm": 0.8636391009582799, "learning_rate": 2.4934611632820355e-07, "loss": 0.6453, "step": 6100 }, { "epoch": 0.78, "grad_norm": 0.6444961434363945, "learning_rate": 2.490735365339106e-07, "loss": 0.5063, "step": 6101 }, { "epoch": 0.78, "grad_norm": 0.7861813193236913, "learning_rate": 2.488010846145713e-07, "loss": 0.5004, "step": 6102 }, { "epoch": 0.78, "grad_norm": 0.9246001674383162, "learning_rate": 2.4852876061658135e-07, "loss": 0.6255, "step": 6103 }, { "epoch": 0.78, "grad_norm": 0.6998986128582865, "learning_rate": 2.482565645863146e-07, "loss": 0.5556, "step": 6104 }, { "epoch": 0.78, "grad_norm": 0.7750874413452558, "learning_rate": 2.4798449657012346e-07, "loss": 0.5391, "step": 6105 }, { "epoch": 0.78, "grad_norm": 0.7330613515598562, "learning_rate": 2.47712556614338e-07, "loss": 0.6398, "step": 6106 }, { "epoch": 0.78, "grad_norm": 0.6800382695291657, "learning_rate": 2.474407447652669e-07, "loss": 0.5456, "step": 6107 }, { "epoch": 0.78, "grad_norm": 0.7945426239907315, "learning_rate": 2.4716906106919655e-07, "loss": 0.6407, "step": 6108 }, { "epoch": 0.78, "grad_norm": 1.0273469437952154, "learning_rate": 2.4689750557239254e-07, "loss": 0.6423, "step": 6109 }, { "epoch": 0.78, "grad_norm": 0.6693935614946517, "learning_rate": 2.466260783210975e-07, "loss": 0.5235, "step": 6110 }, { "epoch": 0.78, "grad_norm": 0.6390362376226462, "learning_rate": 2.463547793615327e-07, "loss": 0.6077, "step": 6111 }, { "epoch": 0.78, "grad_norm": 0.6704570518970413, "learning_rate": 2.460836087398975e-07, "loss": 0.4968, "step": 6112 }, { "epoch": 0.78, "grad_norm": 0.8912108939109336, "learning_rate": 2.4581256650236957e-07, "loss": 0.6156, "step": 6113 }, { "epoch": 0.78, "grad_norm": 0.8322900410594176, "learning_rate": 2.4554165269510464e-07, "loss": 0.6619, "step": 6114 }, { "epoch": 0.78, "grad_norm": 0.8037544824383916, "learning_rate": 2.452708673642362e-07, "loss": 0.5208, "step": 6115 }, { "epoch": 0.78, "grad_norm": 0.7923775818520773, "learning_rate": 2.450002105558763e-07, "loss": 0.5446, "step": 6116 }, { "epoch": 0.78, "grad_norm": 0.6541664350567258, "learning_rate": 2.4472968231611514e-07, "loss": 0.5657, "step": 6117 }, { "epoch": 0.78, "grad_norm": 0.7507058070968536, "learning_rate": 2.444592826910207e-07, "loss": 0.5654, "step": 6118 }, { "epoch": 0.78, "grad_norm": 0.8131987818553688, "learning_rate": 2.441890117266392e-07, "loss": 0.5368, "step": 6119 }, { "epoch": 0.78, "grad_norm": 0.8194160494509573, "learning_rate": 2.43918869468995e-07, "loss": 0.6116, "step": 6120 }, { "epoch": 0.78, "grad_norm": 0.5885947732279796, "learning_rate": 2.436488559640906e-07, "loss": 0.5304, "step": 6121 }, { "epoch": 0.78, "grad_norm": 0.912678274163651, "learning_rate": 2.433789712579064e-07, "loss": 0.6646, "step": 6122 }, { "epoch": 0.78, "grad_norm": 0.697436056537751, "learning_rate": 2.4310921539640096e-07, "loss": 0.5468, "step": 6123 }, { "epoch": 0.78, "grad_norm": 0.917269640770019, "learning_rate": 2.428395884255109e-07, "loss": 0.6514, "step": 6124 }, { "epoch": 0.78, "grad_norm": 0.6277692097972063, "learning_rate": 2.4257009039115086e-07, "loss": 0.5016, "step": 6125 }, { "epoch": 0.78, "grad_norm": 0.8851186465441103, "learning_rate": 2.4230072133921363e-07, "loss": 0.6733, "step": 6126 }, { "epoch": 0.78, "grad_norm": 0.7167326259486761, "learning_rate": 2.4203148131556994e-07, "loss": 0.5695, "step": 6127 }, { "epoch": 0.78, "grad_norm": 0.795848265873327, "learning_rate": 2.417623703660685e-07, "loss": 0.5531, "step": 6128 }, { "epoch": 0.78, "grad_norm": 0.8395441831281, "learning_rate": 2.414933885365361e-07, "loss": 0.6273, "step": 6129 }, { "epoch": 0.78, "grad_norm": 0.9083910954991953, "learning_rate": 2.412245358727776e-07, "loss": 0.6536, "step": 6130 }, { "epoch": 0.78, "grad_norm": 0.8073624926381848, "learning_rate": 2.4095581242057574e-07, "loss": 0.5776, "step": 6131 }, { "epoch": 0.78, "grad_norm": 0.966493862579685, "learning_rate": 2.406872182256913e-07, "loss": 0.6228, "step": 6132 }, { "epoch": 0.78, "grad_norm": 1.3744782102181836, "learning_rate": 2.404187533338632e-07, "loss": 0.6135, "step": 6133 }, { "epoch": 0.78, "grad_norm": 0.7040664936061835, "learning_rate": 2.401504177908079e-07, "loss": 0.4891, "step": 6134 }, { "epoch": 0.78, "grad_norm": 0.9007136033087073, "learning_rate": 2.3988221164222057e-07, "loss": 0.6053, "step": 6135 }, { "epoch": 0.78, "grad_norm": 0.8825921920360752, "learning_rate": 2.396141349337738e-07, "loss": 0.6231, "step": 6136 }, { "epoch": 0.78, "grad_norm": 0.8061405324686548, "learning_rate": 2.3934618771111827e-07, "loss": 0.6048, "step": 6137 }, { "epoch": 0.78, "grad_norm": 0.896511943011456, "learning_rate": 2.390783700198825e-07, "loss": 0.6417, "step": 6138 }, { "epoch": 0.78, "grad_norm": 0.6907283545552877, "learning_rate": 2.3881068190567313e-07, "loss": 0.5087, "step": 6139 }, { "epoch": 0.78, "grad_norm": 0.8643050289739771, "learning_rate": 2.3854312341407456e-07, "loss": 0.6739, "step": 6140 }, { "epoch": 0.78, "grad_norm": 1.1313963787201058, "learning_rate": 2.3827569459064921e-07, "loss": 0.6172, "step": 6141 }, { "epoch": 0.78, "grad_norm": 0.6790436340164293, "learning_rate": 2.3800839548093766e-07, "loss": 0.536, "step": 6142 }, { "epoch": 0.78, "grad_norm": 0.9025010247593205, "learning_rate": 2.3774122613045788e-07, "loss": 0.6232, "step": 6143 }, { "epoch": 0.78, "grad_norm": 0.6983096215804301, "learning_rate": 2.374741865847061e-07, "loss": 0.5871, "step": 6144 }, { "epoch": 0.78, "grad_norm": 0.8720582820253614, "learning_rate": 2.3720727688915643e-07, "loss": 0.6272, "step": 6145 }, { "epoch": 0.78, "grad_norm": 0.8103668726560552, "learning_rate": 2.369404970892609e-07, "loss": 0.6021, "step": 6146 }, { "epoch": 0.78, "grad_norm": 0.8380919811573346, "learning_rate": 2.3667384723044915e-07, "loss": 0.6051, "step": 6147 }, { "epoch": 0.78, "grad_norm": 0.7051538427291505, "learning_rate": 2.3640732735812884e-07, "loss": 0.5595, "step": 6148 }, { "epoch": 0.78, "grad_norm": 0.6753894229103087, "learning_rate": 2.361409375176857e-07, "loss": 0.603, "step": 6149 }, { "epoch": 0.78, "grad_norm": 0.8540110218099938, "learning_rate": 2.3587467775448312e-07, "loss": 0.6301, "step": 6150 }, { "epoch": 0.78, "grad_norm": 0.6839159268351425, "learning_rate": 2.3560854811386233e-07, "loss": 0.5673, "step": 6151 }, { "epoch": 0.78, "grad_norm": 0.849517657035319, "learning_rate": 2.3534254864114234e-07, "loss": 0.6076, "step": 6152 }, { "epoch": 0.78, "grad_norm": 0.601423182989811, "learning_rate": 2.3507667938162025e-07, "loss": 0.539, "step": 6153 }, { "epoch": 0.78, "grad_norm": 0.7347950639052361, "learning_rate": 2.348109403805707e-07, "loss": 0.6093, "step": 6154 }, { "epoch": 0.78, "grad_norm": 0.6234778762735239, "learning_rate": 2.345453316832463e-07, "loss": 0.5314, "step": 6155 }, { "epoch": 0.78, "grad_norm": 0.7080346633341409, "learning_rate": 2.3427985333487755e-07, "loss": 0.631, "step": 6156 }, { "epoch": 0.78, "grad_norm": 0.8627958933750759, "learning_rate": 2.3401450538067246e-07, "loss": 0.5575, "step": 6157 }, { "epoch": 0.78, "grad_norm": 0.7698302461478834, "learning_rate": 2.3374928786581717e-07, "loss": 0.6538, "step": 6158 }, { "epoch": 0.78, "grad_norm": 0.8191399467609689, "learning_rate": 2.3348420083547549e-07, "loss": 0.6162, "step": 6159 }, { "epoch": 0.78, "grad_norm": 0.7978954900859313, "learning_rate": 2.3321924433478845e-07, "loss": 0.58, "step": 6160 }, { "epoch": 0.78, "grad_norm": 1.0096037009488432, "learning_rate": 2.3295441840887632e-07, "loss": 0.6583, "step": 6161 }, { "epoch": 0.79, "grad_norm": 0.8077684486627513, "learning_rate": 2.3268972310283552e-07, "loss": 0.557, "step": 6162 }, { "epoch": 0.79, "grad_norm": 0.9059330619770233, "learning_rate": 2.3242515846174125e-07, "loss": 0.6176, "step": 6163 }, { "epoch": 0.79, "grad_norm": 0.8456246308072842, "learning_rate": 2.3216072453064583e-07, "loss": 0.6181, "step": 6164 }, { "epoch": 0.79, "grad_norm": 0.6662105081644466, "learning_rate": 2.3189642135457987e-07, "loss": 0.4938, "step": 6165 }, { "epoch": 0.79, "grad_norm": 0.6412952348471704, "learning_rate": 2.3163224897855116e-07, "loss": 0.4717, "step": 6166 }, { "epoch": 0.79, "grad_norm": 0.658051029100701, "learning_rate": 2.313682074475458e-07, "loss": 0.5605, "step": 6167 }, { "epoch": 0.79, "grad_norm": 0.6546807765105235, "learning_rate": 2.311042968065271e-07, "loss": 0.5024, "step": 6168 }, { "epoch": 0.79, "grad_norm": 0.7221144963752282, "learning_rate": 2.308405171004363e-07, "loss": 0.5603, "step": 6169 }, { "epoch": 0.79, "grad_norm": 0.6648297769418202, "learning_rate": 2.3057686837419243e-07, "loss": 0.5343, "step": 6170 }, { "epoch": 0.79, "grad_norm": 1.345693928460443, "learning_rate": 2.3031335067269208e-07, "loss": 0.6107, "step": 6171 }, { "epoch": 0.79, "grad_norm": 0.7682834688878059, "learning_rate": 2.3004996404080957e-07, "loss": 0.5876, "step": 6172 }, { "epoch": 0.79, "grad_norm": 0.9228110819314236, "learning_rate": 2.2978670852339698e-07, "loss": 0.6508, "step": 6173 }, { "epoch": 0.79, "grad_norm": 0.8414256310721255, "learning_rate": 2.2952358416528383e-07, "loss": 0.6273, "step": 6174 }, { "epoch": 0.79, "grad_norm": 0.825416034353555, "learning_rate": 2.2926059101127727e-07, "loss": 0.6615, "step": 6175 }, { "epoch": 0.79, "grad_norm": 0.7903043909293808, "learning_rate": 2.2899772910616276e-07, "loss": 0.6043, "step": 6176 }, { "epoch": 0.79, "grad_norm": 0.6344609516988309, "learning_rate": 2.287349984947028e-07, "loss": 0.5433, "step": 6177 }, { "epoch": 0.79, "grad_norm": 0.7786275728995095, "learning_rate": 2.2847239922163763e-07, "loss": 0.6226, "step": 6178 }, { "epoch": 0.79, "grad_norm": 0.8085348294502698, "learning_rate": 2.282099313316852e-07, "loss": 0.6045, "step": 6179 }, { "epoch": 0.79, "grad_norm": 0.8402381436845174, "learning_rate": 2.2794759486954095e-07, "loss": 0.6763, "step": 6180 }, { "epoch": 0.79, "grad_norm": 0.784085074016798, "learning_rate": 2.2768538987987808e-07, "loss": 0.6018, "step": 6181 }, { "epoch": 0.79, "grad_norm": 0.7082911759927489, "learning_rate": 2.2742331640734745e-07, "loss": 0.5078, "step": 6182 }, { "epoch": 0.79, "grad_norm": 0.6877394593846569, "learning_rate": 2.2716137449657734e-07, "loss": 0.5292, "step": 6183 }, { "epoch": 0.79, "grad_norm": 0.7131794318013999, "learning_rate": 2.268995641921738e-07, "loss": 0.5109, "step": 6184 }, { "epoch": 0.79, "grad_norm": 0.8070935178112008, "learning_rate": 2.2663788553872042e-07, "loss": 0.6405, "step": 6185 }, { "epoch": 0.79, "grad_norm": 0.8510749910399822, "learning_rate": 2.263763385807783e-07, "loss": 0.6401, "step": 6186 }, { "epoch": 0.79, "grad_norm": 0.7421056235210514, "learning_rate": 2.2611492336288617e-07, "loss": 0.5731, "step": 6187 }, { "epoch": 0.79, "grad_norm": 0.8394451344147188, "learning_rate": 2.258536399295603e-07, "loss": 0.6112, "step": 6188 }, { "epoch": 0.79, "grad_norm": 0.8608248262112019, "learning_rate": 2.2559248832529453e-07, "loss": 0.649, "step": 6189 }, { "epoch": 0.79, "grad_norm": 0.8231386492152521, "learning_rate": 2.2533146859456032e-07, "loss": 0.586, "step": 6190 }, { "epoch": 0.79, "grad_norm": 0.8864386376330439, "learning_rate": 2.2507058078180652e-07, "loss": 0.6393, "step": 6191 }, { "epoch": 0.79, "grad_norm": 0.8523150774911349, "learning_rate": 2.248098249314594e-07, "loss": 0.5791, "step": 6192 }, { "epoch": 0.79, "grad_norm": 0.7245519485811553, "learning_rate": 2.2454920108792352e-07, "loss": 0.5142, "step": 6193 }, { "epoch": 0.79, "grad_norm": 0.6229072817668845, "learning_rate": 2.2428870929558007e-07, "loss": 0.4774, "step": 6194 }, { "epoch": 0.79, "grad_norm": 0.615961047832533, "learning_rate": 2.2402834959878813e-07, "loss": 0.544, "step": 6195 }, { "epoch": 0.79, "grad_norm": 0.8719132548508182, "learning_rate": 2.2376812204188423e-07, "loss": 0.6309, "step": 6196 }, { "epoch": 0.79, "grad_norm": 0.6511790523559398, "learning_rate": 2.2350802666918245e-07, "loss": 0.5465, "step": 6197 }, { "epoch": 0.79, "grad_norm": 1.129917346950309, "learning_rate": 2.2324806352497427e-07, "loss": 0.5943, "step": 6198 }, { "epoch": 0.79, "grad_norm": 0.5643226897481541, "learning_rate": 2.2298823265352873e-07, "loss": 0.4993, "step": 6199 }, { "epoch": 0.79, "grad_norm": 0.5828763068031434, "learning_rate": 2.227285340990924e-07, "loss": 0.4714, "step": 6200 }, { "epoch": 0.79, "grad_norm": 0.8251071262766166, "learning_rate": 2.224689679058891e-07, "loss": 0.5888, "step": 6201 }, { "epoch": 0.79, "grad_norm": 0.8262014776427893, "learning_rate": 2.2220953411812026e-07, "loss": 0.6359, "step": 6202 }, { "epoch": 0.79, "grad_norm": 0.7236075754734544, "learning_rate": 2.2195023277996482e-07, "loss": 0.5711, "step": 6203 }, { "epoch": 0.79, "grad_norm": 0.7054642196908039, "learning_rate": 2.2169106393557914e-07, "loss": 0.5468, "step": 6204 }, { "epoch": 0.79, "grad_norm": 0.8547135533385697, "learning_rate": 2.2143202762909686e-07, "loss": 0.5628, "step": 6205 }, { "epoch": 0.79, "grad_norm": 0.8491345267036173, "learning_rate": 2.2117312390462916e-07, "loss": 0.6379, "step": 6206 }, { "epoch": 0.79, "grad_norm": 1.0459942755065024, "learning_rate": 2.209143528062648e-07, "loss": 0.6973, "step": 6207 }, { "epoch": 0.79, "grad_norm": 0.739616360893179, "learning_rate": 2.2065571437806962e-07, "loss": 0.5401, "step": 6208 }, { "epoch": 0.79, "grad_norm": 0.817552372591894, "learning_rate": 2.2039720866408717e-07, "loss": 0.6163, "step": 6209 }, { "epoch": 0.79, "grad_norm": 1.0250304468731926, "learning_rate": 2.2013883570833823e-07, "loss": 0.6374, "step": 6210 }, { "epoch": 0.79, "grad_norm": 0.7444594755446832, "learning_rate": 2.1988059555482098e-07, "loss": 0.5512, "step": 6211 }, { "epoch": 0.79, "grad_norm": 0.6644078161501535, "learning_rate": 2.1962248824751118e-07, "loss": 0.518, "step": 6212 }, { "epoch": 0.79, "grad_norm": 0.6448817598128918, "learning_rate": 2.1936451383036168e-07, "loss": 0.5224, "step": 6213 }, { "epoch": 0.79, "grad_norm": 0.7767852020030122, "learning_rate": 2.1910667234730284e-07, "loss": 0.5148, "step": 6214 }, { "epoch": 0.79, "grad_norm": 0.6387847716845088, "learning_rate": 2.188489638422425e-07, "loss": 0.5277, "step": 6215 }, { "epoch": 0.79, "grad_norm": 0.6310793953834269, "learning_rate": 2.1859138835906553e-07, "loss": 0.5051, "step": 6216 }, { "epoch": 0.79, "grad_norm": 0.654122115748672, "learning_rate": 2.1833394594163458e-07, "loss": 0.5397, "step": 6217 }, { "epoch": 0.79, "grad_norm": 0.8739973972298009, "learning_rate": 2.1807663663378918e-07, "loss": 0.6393, "step": 6218 }, { "epoch": 0.79, "grad_norm": 0.8104825042986271, "learning_rate": 2.178194604793463e-07, "loss": 0.5591, "step": 6219 }, { "epoch": 0.79, "grad_norm": 0.6499605015633183, "learning_rate": 2.1756241752210092e-07, "loss": 0.4538, "step": 6220 }, { "epoch": 0.79, "grad_norm": 1.019532995564683, "learning_rate": 2.1730550780582446e-07, "loss": 0.6201, "step": 6221 }, { "epoch": 0.79, "grad_norm": 0.7135091890960624, "learning_rate": 2.1704873137426593e-07, "loss": 0.5264, "step": 6222 }, { "epoch": 0.79, "grad_norm": 0.6391255040093908, "learning_rate": 2.1679208827115169e-07, "loss": 0.5279, "step": 6223 }, { "epoch": 0.79, "grad_norm": 0.7775088685578503, "learning_rate": 2.1653557854018555e-07, "loss": 0.6043, "step": 6224 }, { "epoch": 0.79, "grad_norm": 0.6772384287864399, "learning_rate": 2.1627920222504826e-07, "loss": 0.5552, "step": 6225 }, { "epoch": 0.79, "grad_norm": 0.771834559345236, "learning_rate": 2.1602295936939796e-07, "loss": 0.6346, "step": 6226 }, { "epoch": 0.79, "grad_norm": 0.8337009838122383, "learning_rate": 2.1576685001687045e-07, "loss": 0.6111, "step": 6227 }, { "epoch": 0.79, "grad_norm": 0.6756951104199925, "learning_rate": 2.1551087421107817e-07, "loss": 0.5199, "step": 6228 }, { "epoch": 0.79, "grad_norm": 0.8064353383610573, "learning_rate": 2.1525503199561135e-07, "loss": 0.6212, "step": 6229 }, { "epoch": 0.79, "grad_norm": 0.7793518603617083, "learning_rate": 2.1499932341403705e-07, "loss": 0.6158, "step": 6230 }, { "epoch": 0.79, "grad_norm": 0.6559695483889093, "learning_rate": 2.1474374850989996e-07, "loss": 0.4771, "step": 6231 }, { "epoch": 0.79, "grad_norm": 0.8343644003109523, "learning_rate": 2.1448830732672185e-07, "loss": 0.6676, "step": 6232 }, { "epoch": 0.79, "grad_norm": 0.7788103340204466, "learning_rate": 2.1423299990800149e-07, "loss": 0.5985, "step": 6233 }, { "epoch": 0.79, "grad_norm": 0.7170363534888302, "learning_rate": 2.1397782629721517e-07, "loss": 0.6061, "step": 6234 }, { "epoch": 0.79, "grad_norm": 0.806468822073873, "learning_rate": 2.137227865378164e-07, "loss": 0.6236, "step": 6235 }, { "epoch": 0.79, "grad_norm": 0.8513196987582275, "learning_rate": 2.1346788067323563e-07, "loss": 0.5906, "step": 6236 }, { "epoch": 0.79, "grad_norm": 0.8289682413831649, "learning_rate": 2.1321310874688082e-07, "loss": 0.5969, "step": 6237 }, { "epoch": 0.79, "grad_norm": 1.3137590899314386, "learning_rate": 2.129584708021368e-07, "loss": 0.6239, "step": 6238 }, { "epoch": 0.79, "grad_norm": 1.0789711622266045, "learning_rate": 2.1270396688236593e-07, "loss": 0.5637, "step": 6239 }, { "epoch": 0.79, "grad_norm": 0.6990102676105047, "learning_rate": 2.1244959703090748e-07, "loss": 0.5226, "step": 6240 }, { "epoch": 0.8, "grad_norm": 0.8457214667465854, "learning_rate": 2.1219536129107807e-07, "loss": 0.6199, "step": 6241 }, { "epoch": 0.8, "grad_norm": 0.797665763783231, "learning_rate": 2.1194125970617128e-07, "loss": 0.6065, "step": 6242 }, { "epoch": 0.8, "grad_norm": 0.8932407104696183, "learning_rate": 2.1168729231945814e-07, "loss": 0.6087, "step": 6243 }, { "epoch": 0.8, "grad_norm": 0.6819998509438194, "learning_rate": 2.1143345917418643e-07, "loss": 0.4935, "step": 6244 }, { "epoch": 0.8, "grad_norm": 0.937928650863166, "learning_rate": 2.111797603135811e-07, "loss": 0.5585, "step": 6245 }, { "epoch": 0.8, "grad_norm": 0.5516065324835596, "learning_rate": 2.1092619578084504e-07, "loss": 0.503, "step": 6246 }, { "epoch": 0.8, "grad_norm": 0.9594721420673439, "learning_rate": 2.1067276561915736e-07, "loss": 0.6298, "step": 6247 }, { "epoch": 0.8, "grad_norm": 0.5980685326717005, "learning_rate": 2.1041946987167447e-07, "loss": 0.5303, "step": 6248 }, { "epoch": 0.8, "grad_norm": 0.6756724552520457, "learning_rate": 2.1016630858153016e-07, "loss": 0.5465, "step": 6249 }, { "epoch": 0.8, "grad_norm": 0.7983669310966843, "learning_rate": 2.0991328179183498e-07, "loss": 0.6209, "step": 6250 }, { "epoch": 0.8, "grad_norm": 0.7678742555934108, "learning_rate": 2.0966038954567688e-07, "loss": 0.5971, "step": 6251 }, { "epoch": 0.8, "grad_norm": 0.9154015018958837, "learning_rate": 2.0940763188612077e-07, "loss": 0.6669, "step": 6252 }, { "epoch": 0.8, "grad_norm": 0.8220317850433128, "learning_rate": 2.0915500885620862e-07, "loss": 0.5726, "step": 6253 }, { "epoch": 0.8, "grad_norm": 0.6589395038463134, "learning_rate": 2.089025204989594e-07, "loss": 0.5232, "step": 6254 }, { "epoch": 0.8, "grad_norm": 0.6709481000249065, "learning_rate": 2.0865016685736947e-07, "loss": 0.5982, "step": 6255 }, { "epoch": 0.8, "grad_norm": 0.7788342113952555, "learning_rate": 2.083979479744118e-07, "loss": 0.5763, "step": 6256 }, { "epoch": 0.8, "grad_norm": 1.3512401019032951, "learning_rate": 2.0814586389303678e-07, "loss": 0.6627, "step": 6257 }, { "epoch": 0.8, "grad_norm": 0.6488245785651335, "learning_rate": 2.0789391465617168e-07, "loss": 0.5433, "step": 6258 }, { "epoch": 0.8, "grad_norm": 0.6617934735702057, "learning_rate": 2.0764210030672081e-07, "loss": 0.5228, "step": 6259 }, { "epoch": 0.8, "grad_norm": 0.7233697790640223, "learning_rate": 2.0739042088756553e-07, "loss": 0.587, "step": 6260 }, { "epoch": 0.8, "grad_norm": 0.9254888381235377, "learning_rate": 2.0713887644156424e-07, "loss": 0.5935, "step": 6261 }, { "epoch": 0.8, "grad_norm": 0.9100774967680485, "learning_rate": 2.0688746701155236e-07, "loss": 0.6123, "step": 6262 }, { "epoch": 0.8, "grad_norm": 0.6436387665579886, "learning_rate": 2.0663619264034214e-07, "loss": 0.5319, "step": 6263 }, { "epoch": 0.8, "grad_norm": 0.8590345538005468, "learning_rate": 2.063850533707232e-07, "loss": 0.6104, "step": 6264 }, { "epoch": 0.8, "grad_norm": 0.8463191713603788, "learning_rate": 2.0613404924546184e-07, "loss": 0.5961, "step": 6265 }, { "epoch": 0.8, "grad_norm": 0.8085234682305709, "learning_rate": 2.0588318030730144e-07, "loss": 0.5678, "step": 6266 }, { "epoch": 0.8, "grad_norm": 0.6914137412706343, "learning_rate": 2.0563244659896238e-07, "loss": 0.5121, "step": 6267 }, { "epoch": 0.8, "grad_norm": 0.6843283864341307, "learning_rate": 2.0538184816314208e-07, "loss": 0.5124, "step": 6268 }, { "epoch": 0.8, "grad_norm": 0.8946948998774575, "learning_rate": 2.051313850425147e-07, "loss": 0.6132, "step": 6269 }, { "epoch": 0.8, "grad_norm": 0.7428266860491495, "learning_rate": 2.0488105727973148e-07, "loss": 0.5607, "step": 6270 }, { "epoch": 0.8, "grad_norm": 0.8730111544453218, "learning_rate": 2.0463086491742055e-07, "loss": 0.6008, "step": 6271 }, { "epoch": 0.8, "grad_norm": 0.7252408618930705, "learning_rate": 2.0438080799818746e-07, "loss": 0.5374, "step": 6272 }, { "epoch": 0.8, "grad_norm": 0.8438369637444263, "learning_rate": 2.0413088656461408e-07, "loss": 0.5972, "step": 6273 }, { "epoch": 0.8, "grad_norm": 0.8131843149474692, "learning_rate": 2.0388110065925944e-07, "loss": 0.6137, "step": 6274 }, { "epoch": 0.8, "grad_norm": 0.8415116289003903, "learning_rate": 2.0363145032465944e-07, "loss": 0.6101, "step": 6275 }, { "epoch": 0.8, "grad_norm": 0.7714995326644103, "learning_rate": 2.0338193560332695e-07, "loss": 0.5445, "step": 6276 }, { "epoch": 0.8, "grad_norm": 0.7327847152386519, "learning_rate": 2.0313255653775141e-07, "loss": 0.5384, "step": 6277 }, { "epoch": 0.8, "grad_norm": 0.629290614789581, "learning_rate": 2.0288331317040008e-07, "loss": 0.5305, "step": 6278 }, { "epoch": 0.8, "grad_norm": 0.6621278271819153, "learning_rate": 2.0263420554371623e-07, "loss": 0.5472, "step": 6279 }, { "epoch": 0.8, "grad_norm": 0.7942750294566222, "learning_rate": 2.023852337001203e-07, "loss": 0.6171, "step": 6280 }, { "epoch": 0.8, "grad_norm": 0.6458812080589901, "learning_rate": 2.0213639768200953e-07, "loss": 0.5199, "step": 6281 }, { "epoch": 0.8, "grad_norm": 0.8938848159028191, "learning_rate": 2.0188769753175815e-07, "loss": 0.6954, "step": 6282 }, { "epoch": 0.8, "grad_norm": 0.8522091108964585, "learning_rate": 2.0163913329171722e-07, "loss": 0.5363, "step": 6283 }, { "epoch": 0.8, "grad_norm": 0.8090130223193264, "learning_rate": 2.0139070500421462e-07, "loss": 0.5808, "step": 6284 }, { "epoch": 0.8, "grad_norm": 0.8329550738956516, "learning_rate": 2.011424127115552e-07, "loss": 0.5909, "step": 6285 }, { "epoch": 0.8, "grad_norm": 0.6936498490632294, "learning_rate": 2.008942564560203e-07, "loss": 0.5191, "step": 6286 }, { "epoch": 0.8, "grad_norm": 0.7242559743663319, "learning_rate": 2.0064623627986865e-07, "loss": 0.5806, "step": 6287 }, { "epoch": 0.8, "grad_norm": 0.6389128592749685, "learning_rate": 2.003983522253352e-07, "loss": 0.5405, "step": 6288 }, { "epoch": 0.8, "grad_norm": 0.7194923453893851, "learning_rate": 2.0015060433463227e-07, "loss": 0.5223, "step": 6289 }, { "epoch": 0.8, "grad_norm": 0.7754058787819915, "learning_rate": 1.999029926499487e-07, "loss": 0.5452, "step": 6290 }, { "epoch": 0.8, "grad_norm": 0.7336404798764289, "learning_rate": 1.996555172134501e-07, "loss": 0.538, "step": 6291 }, { "epoch": 0.8, "grad_norm": 0.9305867929066064, "learning_rate": 1.9940817806727895e-07, "loss": 0.6457, "step": 6292 }, { "epoch": 0.8, "grad_norm": 0.7816832650370892, "learning_rate": 1.9916097525355458e-07, "loss": 0.6481, "step": 6293 }, { "epoch": 0.8, "grad_norm": 0.8856471665991816, "learning_rate": 1.9891390881437299e-07, "loss": 0.6572, "step": 6294 }, { "epoch": 0.8, "grad_norm": 0.7133195386681009, "learning_rate": 1.9866697879180715e-07, "loss": 0.5318, "step": 6295 }, { "epoch": 0.8, "grad_norm": 1.013253942911983, "learning_rate": 1.984201852279066e-07, "loss": 0.7022, "step": 6296 }, { "epoch": 0.8, "grad_norm": 0.8727206688441004, "learning_rate": 1.981735281646977e-07, "loss": 0.5987, "step": 6297 }, { "epoch": 0.8, "grad_norm": 0.7742667803597295, "learning_rate": 1.9792700764418345e-07, "loss": 0.5594, "step": 6298 }, { "epoch": 0.8, "grad_norm": 0.745606837374919, "learning_rate": 1.9768062370834405e-07, "loss": 0.5461, "step": 6299 }, { "epoch": 0.8, "grad_norm": 1.0833835682966506, "learning_rate": 1.9743437639913585e-07, "loss": 0.6158, "step": 6300 }, { "epoch": 0.8, "grad_norm": 0.7640600021136525, "learning_rate": 1.9718826575849224e-07, "loss": 0.6632, "step": 6301 }, { "epoch": 0.8, "grad_norm": 0.9434436905733974, "learning_rate": 1.9694229182832346e-07, "loss": 0.6293, "step": 6302 }, { "epoch": 0.8, "grad_norm": 0.6272984304251428, "learning_rate": 1.9669645465051589e-07, "loss": 0.4862, "step": 6303 }, { "epoch": 0.8, "grad_norm": 0.8349542597411452, "learning_rate": 1.9645075426693348e-07, "loss": 0.5976, "step": 6304 }, { "epoch": 0.8, "grad_norm": 0.7185382204153398, "learning_rate": 1.9620519071941643e-07, "loss": 0.5265, "step": 6305 }, { "epoch": 0.8, "grad_norm": 0.8431576955594597, "learning_rate": 1.9595976404978143e-07, "loss": 0.6224, "step": 6306 }, { "epoch": 0.8, "grad_norm": 0.8780822050580305, "learning_rate": 1.9571447429982224e-07, "loss": 0.6449, "step": 6307 }, { "epoch": 0.8, "grad_norm": 0.7396418814743158, "learning_rate": 1.9546932151130912e-07, "loss": 0.5831, "step": 6308 }, { "epoch": 0.8, "grad_norm": 0.7519152428086103, "learning_rate": 1.9522430572598892e-07, "loss": 0.5698, "step": 6309 }, { "epoch": 0.8, "grad_norm": 0.6843386158412604, "learning_rate": 1.9497942698558546e-07, "loss": 0.5245, "step": 6310 }, { "epoch": 0.8, "grad_norm": 0.7895614587897429, "learning_rate": 1.9473468533179892e-07, "loss": 0.6306, "step": 6311 }, { "epoch": 0.8, "grad_norm": 0.7587816135022525, "learning_rate": 1.9449008080630624e-07, "loss": 0.503, "step": 6312 }, { "epoch": 0.8, "grad_norm": 0.7672071734838551, "learning_rate": 1.9424561345076108e-07, "loss": 0.5462, "step": 6313 }, { "epoch": 0.8, "grad_norm": 0.9808399568330299, "learning_rate": 1.9400128330679355e-07, "loss": 0.6619, "step": 6314 }, { "epoch": 0.8, "grad_norm": 0.8715896927010516, "learning_rate": 1.9375709041601074e-07, "loss": 0.6793, "step": 6315 }, { "epoch": 0.8, "grad_norm": 0.8521171782075534, "learning_rate": 1.9351303481999614e-07, "loss": 0.6833, "step": 6316 }, { "epoch": 0.8, "grad_norm": 0.7482531927697557, "learning_rate": 1.9326911656030965e-07, "loss": 0.512, "step": 6317 }, { "epoch": 0.8, "grad_norm": 0.8575181625791382, "learning_rate": 1.9302533567848823e-07, "loss": 0.5831, "step": 6318 }, { "epoch": 0.81, "grad_norm": 0.6566070350060423, "learning_rate": 1.9278169221604513e-07, "loss": 0.523, "step": 6319 }, { "epoch": 0.81, "grad_norm": 0.9529699050001621, "learning_rate": 1.9253818621447026e-07, "loss": 0.6402, "step": 6320 }, { "epoch": 0.81, "grad_norm": 1.0139588522930159, "learning_rate": 1.9229481771523027e-07, "loss": 0.6214, "step": 6321 }, { "epoch": 0.81, "grad_norm": 0.7329070842248678, "learning_rate": 1.9205158675976808e-07, "loss": 0.5349, "step": 6322 }, { "epoch": 0.81, "grad_norm": 0.6931646080031605, "learning_rate": 1.9180849338950366e-07, "loss": 0.5474, "step": 6323 }, { "epoch": 0.81, "grad_norm": 0.8263718797189621, "learning_rate": 1.9156553764583305e-07, "loss": 0.6064, "step": 6324 }, { "epoch": 0.81, "grad_norm": 0.7814413757375455, "learning_rate": 1.9132271957012923e-07, "loss": 0.6105, "step": 6325 }, { "epoch": 0.81, "grad_norm": 0.8070805576527864, "learning_rate": 1.910800392037415e-07, "loss": 0.5949, "step": 6326 }, { "epoch": 0.81, "grad_norm": 0.6912477101000122, "learning_rate": 1.9083749658799585e-07, "loss": 0.5715, "step": 6327 }, { "epoch": 0.81, "grad_norm": 0.7748718444805102, "learning_rate": 1.9059509176419475e-07, "loss": 0.5671, "step": 6328 }, { "epoch": 0.81, "grad_norm": 0.8287448142592743, "learning_rate": 1.9035282477361714e-07, "loss": 0.5249, "step": 6329 }, { "epoch": 0.81, "grad_norm": 0.806782163536333, "learning_rate": 1.9011069565751836e-07, "loss": 0.638, "step": 6330 }, { "epoch": 0.81, "grad_norm": 0.9420943436691243, "learning_rate": 1.898687044571311e-07, "loss": 0.675, "step": 6331 }, { "epoch": 0.81, "grad_norm": 0.7196863345732111, "learning_rate": 1.896268512136635e-07, "loss": 0.5583, "step": 6332 }, { "epoch": 0.81, "grad_norm": 0.880855053144274, "learning_rate": 1.8938513596830073e-07, "loss": 0.6334, "step": 6333 }, { "epoch": 0.81, "grad_norm": 0.9545686648502021, "learning_rate": 1.8914355876220445e-07, "loss": 0.6407, "step": 6334 }, { "epoch": 0.81, "grad_norm": 0.7281200473455491, "learning_rate": 1.8890211963651258e-07, "loss": 0.5606, "step": 6335 }, { "epoch": 0.81, "grad_norm": 0.8711095709007972, "learning_rate": 1.8866081863233985e-07, "loss": 0.6629, "step": 6336 }, { "epoch": 0.81, "grad_norm": 0.7852680421749125, "learning_rate": 1.8841965579077724e-07, "loss": 0.5847, "step": 6337 }, { "epoch": 0.81, "grad_norm": 0.7730199525005151, "learning_rate": 1.8817863115289222e-07, "loss": 0.6129, "step": 6338 }, { "epoch": 0.81, "grad_norm": 0.6427158482924086, "learning_rate": 1.879377447597289e-07, "loss": 0.5347, "step": 6339 }, { "epoch": 0.81, "grad_norm": 0.6581761014726824, "learning_rate": 1.8769699665230754e-07, "loss": 0.5525, "step": 6340 }, { "epoch": 0.81, "grad_norm": 0.9869996411358212, "learning_rate": 1.8745638687162523e-07, "loss": 0.6665, "step": 6341 }, { "epoch": 0.81, "grad_norm": 0.9526859933328118, "learning_rate": 1.8721591545865512e-07, "loss": 0.6288, "step": 6342 }, { "epoch": 0.81, "grad_norm": 0.8641573246907084, "learning_rate": 1.869755824543472e-07, "loss": 0.6347, "step": 6343 }, { "epoch": 0.81, "grad_norm": 0.6036746107142822, "learning_rate": 1.867353878996275e-07, "loss": 0.4964, "step": 6344 }, { "epoch": 0.81, "grad_norm": 0.7271420038311592, "learning_rate": 1.8649533183539867e-07, "loss": 0.5888, "step": 6345 }, { "epoch": 0.81, "grad_norm": 0.8457423292929522, "learning_rate": 1.8625541430253988e-07, "loss": 0.62, "step": 6346 }, { "epoch": 0.81, "grad_norm": 0.8524668566132301, "learning_rate": 1.8601563534190645e-07, "loss": 0.6276, "step": 6347 }, { "epoch": 0.81, "grad_norm": 0.6489201474489246, "learning_rate": 1.8577599499433027e-07, "loss": 0.5255, "step": 6348 }, { "epoch": 0.81, "grad_norm": 0.7334057555958982, "learning_rate": 1.855364933006197e-07, "loss": 0.586, "step": 6349 }, { "epoch": 0.81, "grad_norm": 0.6961730046553802, "learning_rate": 1.8529713030155926e-07, "loss": 0.5236, "step": 6350 }, { "epoch": 0.81, "grad_norm": 0.7147010474141334, "learning_rate": 1.8505790603790993e-07, "loss": 0.5408, "step": 6351 }, { "epoch": 0.81, "grad_norm": 0.6164425135836364, "learning_rate": 1.848188205504093e-07, "loss": 0.5392, "step": 6352 }, { "epoch": 0.81, "grad_norm": 1.0597653165636454, "learning_rate": 1.8457987387977103e-07, "loss": 0.6279, "step": 6353 }, { "epoch": 0.81, "grad_norm": 0.624523829496321, "learning_rate": 1.843410660666852e-07, "loss": 0.5621, "step": 6354 }, { "epoch": 0.81, "grad_norm": 0.9573255030824929, "learning_rate": 1.841023971518184e-07, "loss": 0.6869, "step": 6355 }, { "epoch": 0.81, "grad_norm": 0.7037267213184512, "learning_rate": 1.8386386717581315e-07, "loss": 0.5891, "step": 6356 }, { "epoch": 0.81, "grad_norm": 0.7800208927624231, "learning_rate": 1.8362547617928915e-07, "loss": 0.5133, "step": 6357 }, { "epoch": 0.81, "grad_norm": 0.6584307138097006, "learning_rate": 1.833872242028416e-07, "loss": 0.5452, "step": 6358 }, { "epoch": 0.81, "grad_norm": 0.7216430296428064, "learning_rate": 1.8314911128704223e-07, "loss": 0.5622, "step": 6359 }, { "epoch": 0.81, "grad_norm": 0.6251242607662475, "learning_rate": 1.8291113747243947e-07, "loss": 0.5292, "step": 6360 }, { "epoch": 0.81, "grad_norm": 0.7087409223720543, "learning_rate": 1.8267330279955762e-07, "loss": 0.5252, "step": 6361 }, { "epoch": 0.81, "grad_norm": 0.867110843805794, "learning_rate": 1.824356073088974e-07, "loss": 0.6113, "step": 6362 }, { "epoch": 0.81, "grad_norm": 0.7110211789926288, "learning_rate": 1.82198051040936e-07, "loss": 0.5981, "step": 6363 }, { "epoch": 0.81, "grad_norm": 0.8016313328242031, "learning_rate": 1.8196063403612672e-07, "loss": 0.5634, "step": 6364 }, { "epoch": 0.81, "grad_norm": 1.01569137624723, "learning_rate": 1.817233563348991e-07, "loss": 0.6485, "step": 6365 }, { "epoch": 0.81, "grad_norm": 0.6591051211191011, "learning_rate": 1.8148621797765917e-07, "loss": 0.5895, "step": 6366 }, { "epoch": 0.81, "grad_norm": 0.7030828793386561, "learning_rate": 1.812492190047892e-07, "loss": 0.5617, "step": 6367 }, { "epoch": 0.81, "grad_norm": 0.6738638626550979, "learning_rate": 1.8101235945664738e-07, "loss": 0.5458, "step": 6368 }, { "epoch": 0.81, "grad_norm": 0.942073762382475, "learning_rate": 1.8077563937356876e-07, "loss": 0.6222, "step": 6369 }, { "epoch": 0.81, "grad_norm": 0.7136713015062975, "learning_rate": 1.8053905879586406e-07, "loss": 0.5724, "step": 6370 }, { "epoch": 0.81, "grad_norm": 0.591526390254191, "learning_rate": 1.803026177638205e-07, "loss": 0.4962, "step": 6371 }, { "epoch": 0.81, "grad_norm": 0.7220390551766422, "learning_rate": 1.8006631631770165e-07, "loss": 0.555, "step": 6372 }, { "epoch": 0.81, "grad_norm": 0.7602859269961497, "learning_rate": 1.7983015449774718e-07, "loss": 0.5702, "step": 6373 }, { "epoch": 0.81, "grad_norm": 0.6320896953743237, "learning_rate": 1.7959413234417287e-07, "loss": 0.4979, "step": 6374 }, { "epoch": 0.81, "grad_norm": 0.9495571312759755, "learning_rate": 1.7935824989717096e-07, "loss": 0.6251, "step": 6375 }, { "epoch": 0.81, "grad_norm": 0.7477847773011265, "learning_rate": 1.7912250719690958e-07, "loss": 0.5071, "step": 6376 }, { "epoch": 0.81, "grad_norm": 0.7600333163371802, "learning_rate": 1.7888690428353348e-07, "loss": 0.57, "step": 6377 }, { "epoch": 0.81, "grad_norm": 0.8884002058168086, "learning_rate": 1.7865144119716303e-07, "loss": 0.6447, "step": 6378 }, { "epoch": 0.81, "grad_norm": 0.6977790288722401, "learning_rate": 1.7841611797789567e-07, "loss": 0.5408, "step": 6379 }, { "epoch": 0.81, "grad_norm": 0.8262005274786918, "learning_rate": 1.7818093466580418e-07, "loss": 0.6134, "step": 6380 }, { "epoch": 0.81, "grad_norm": 0.9694590862128035, "learning_rate": 1.7794589130093784e-07, "loss": 0.6135, "step": 6381 }, { "epoch": 0.81, "grad_norm": 0.9078416562536212, "learning_rate": 1.7771098792332216e-07, "loss": 0.5155, "step": 6382 }, { "epoch": 0.81, "grad_norm": 0.7790278931943109, "learning_rate": 1.7747622457295864e-07, "loss": 0.4999, "step": 6383 }, { "epoch": 0.81, "grad_norm": 0.8656281436552976, "learning_rate": 1.7724160128982514e-07, "loss": 0.6482, "step": 6384 }, { "epoch": 0.81, "grad_norm": 0.8495498071544636, "learning_rate": 1.7700711811387548e-07, "loss": 0.5911, "step": 6385 }, { "epoch": 0.81, "grad_norm": 0.6620275459228138, "learning_rate": 1.7677277508503975e-07, "loss": 0.4804, "step": 6386 }, { "epoch": 0.81, "grad_norm": 1.1846555280971403, "learning_rate": 1.765385722432241e-07, "loss": 0.6611, "step": 6387 }, { "epoch": 0.81, "grad_norm": 0.927148551253297, "learning_rate": 1.7630450962831057e-07, "loss": 0.6511, "step": 6388 }, { "epoch": 0.81, "grad_norm": 0.8331121006933375, "learning_rate": 1.760705872801581e-07, "loss": 0.5948, "step": 6389 }, { "epoch": 0.81, "grad_norm": 0.639205670609675, "learning_rate": 1.75836805238601e-07, "loss": 0.5146, "step": 6390 }, { "epoch": 0.81, "grad_norm": 1.0216423260893694, "learning_rate": 1.7560316354344985e-07, "loss": 0.6268, "step": 6391 }, { "epoch": 0.81, "grad_norm": 0.6681394359426978, "learning_rate": 1.7536966223449157e-07, "loss": 0.5128, "step": 6392 }, { "epoch": 0.81, "grad_norm": 0.9974760248514055, "learning_rate": 1.751363013514887e-07, "loss": 0.5872, "step": 6393 }, { "epoch": 0.81, "grad_norm": 0.6670593548664926, "learning_rate": 1.749030809341805e-07, "loss": 0.5307, "step": 6394 }, { "epoch": 0.81, "grad_norm": 0.7544519465139703, "learning_rate": 1.746700010222817e-07, "loss": 0.5412, "step": 6395 }, { "epoch": 0.81, "grad_norm": 0.7160350778658963, "learning_rate": 1.7443706165548357e-07, "loss": 0.5142, "step": 6396 }, { "epoch": 0.81, "grad_norm": 0.6549785071593615, "learning_rate": 1.7420426287345314e-07, "loss": 0.5015, "step": 6397 }, { "epoch": 0.82, "grad_norm": 0.6642450313320617, "learning_rate": 1.7397160471583372e-07, "loss": 0.4854, "step": 6398 }, { "epoch": 0.82, "grad_norm": 0.7636945659730447, "learning_rate": 1.737390872222445e-07, "loss": 0.6147, "step": 6399 }, { "epoch": 0.82, "grad_norm": 0.8260526902724561, "learning_rate": 1.7350671043228072e-07, "loss": 0.6781, "step": 6400 }, { "epoch": 0.82, "grad_norm": 0.7191454276398482, "learning_rate": 1.732744743855139e-07, "loss": 0.5649, "step": 6401 }, { "epoch": 0.82, "grad_norm": 0.8297999983852089, "learning_rate": 1.7304237912149133e-07, "loss": 0.5537, "step": 6402 }, { "epoch": 0.82, "grad_norm": 0.9557534527789039, "learning_rate": 1.728104246797364e-07, "loss": 0.6836, "step": 6403 }, { "epoch": 0.82, "grad_norm": 0.9206662740081434, "learning_rate": 1.7257861109974858e-07, "loss": 0.5252, "step": 6404 }, { "epoch": 0.82, "grad_norm": 1.0026819383098646, "learning_rate": 1.7234693842100324e-07, "loss": 0.6116, "step": 6405 }, { "epoch": 0.82, "grad_norm": 0.7924675594308621, "learning_rate": 1.7211540668295178e-07, "loss": 0.6099, "step": 6406 }, { "epoch": 0.82, "grad_norm": 0.8181930154286083, "learning_rate": 1.7188401592502177e-07, "loss": 0.5612, "step": 6407 }, { "epoch": 0.82, "grad_norm": 0.8815405876265957, "learning_rate": 1.716527661866166e-07, "loss": 0.6692, "step": 6408 }, { "epoch": 0.82, "grad_norm": 0.9286025048840973, "learning_rate": 1.7142165750711578e-07, "loss": 0.6278, "step": 6409 }, { "epoch": 0.82, "grad_norm": 0.6329255217331994, "learning_rate": 1.7119068992587459e-07, "loss": 0.5136, "step": 6410 }, { "epoch": 0.82, "grad_norm": 0.7731308199990283, "learning_rate": 1.7095986348222435e-07, "loss": 0.6067, "step": 6411 }, { "epoch": 0.82, "grad_norm": 0.8612889388826246, "learning_rate": 1.707291782154725e-07, "loss": 0.6371, "step": 6412 }, { "epoch": 0.82, "grad_norm": 0.7644036995048867, "learning_rate": 1.704986341649024e-07, "loss": 0.5106, "step": 6413 }, { "epoch": 0.82, "grad_norm": 0.9343442730788272, "learning_rate": 1.7026823136977286e-07, "loss": 0.5797, "step": 6414 }, { "epoch": 0.82, "grad_norm": 0.7196132490374512, "learning_rate": 1.7003796986931973e-07, "loss": 0.5818, "step": 6415 }, { "epoch": 0.82, "grad_norm": 0.8957309653844695, "learning_rate": 1.6980784970275387e-07, "loss": 0.6632, "step": 6416 }, { "epoch": 0.82, "grad_norm": 0.8440408274405804, "learning_rate": 1.6957787090926236e-07, "loss": 0.6011, "step": 6417 }, { "epoch": 0.82, "grad_norm": 0.93792552823572, "learning_rate": 1.6934803352800808e-07, "loss": 0.6583, "step": 6418 }, { "epoch": 0.82, "grad_norm": 0.9322096866592263, "learning_rate": 1.691183375981301e-07, "loss": 0.5917, "step": 6419 }, { "epoch": 0.82, "grad_norm": 0.6503567276753359, "learning_rate": 1.688887831587431e-07, "loss": 0.5503, "step": 6420 }, { "epoch": 0.82, "grad_norm": 0.8316738841530569, "learning_rate": 1.6865937024893795e-07, "loss": 0.6626, "step": 6421 }, { "epoch": 0.82, "grad_norm": 0.6956798547432238, "learning_rate": 1.6843009890778114e-07, "loss": 0.5684, "step": 6422 }, { "epoch": 0.82, "grad_norm": 0.9562807844716208, "learning_rate": 1.6820096917431526e-07, "loss": 0.6184, "step": 6423 }, { "epoch": 0.82, "grad_norm": 0.7183818880857306, "learning_rate": 1.6797198108755872e-07, "loss": 0.589, "step": 6424 }, { "epoch": 0.82, "grad_norm": 0.66546701926384, "learning_rate": 1.677431346865057e-07, "loss": 0.4897, "step": 6425 }, { "epoch": 0.82, "grad_norm": 0.9082990924901301, "learning_rate": 1.6751443001012654e-07, "loss": 0.6684, "step": 6426 }, { "epoch": 0.82, "grad_norm": 0.6905442657991575, "learning_rate": 1.6728586709736726e-07, "loss": 0.5521, "step": 6427 }, { "epoch": 0.82, "grad_norm": 0.9785544715746913, "learning_rate": 1.6705744598714955e-07, "loss": 0.7008, "step": 6428 }, { "epoch": 0.82, "grad_norm": 0.9293403069099524, "learning_rate": 1.6682916671837122e-07, "loss": 0.6076, "step": 6429 }, { "epoch": 0.82, "grad_norm": 0.8678694414405309, "learning_rate": 1.6660102932990594e-07, "loss": 0.6377, "step": 6430 }, { "epoch": 0.82, "grad_norm": 0.7844578098871634, "learning_rate": 1.663730338606032e-07, "loss": 0.5752, "step": 6431 }, { "epoch": 0.82, "grad_norm": 0.8202003274152232, "learning_rate": 1.6614518034928793e-07, "loss": 0.5561, "step": 6432 }, { "epoch": 0.82, "grad_norm": 0.9173399870710137, "learning_rate": 1.6591746883476155e-07, "loss": 0.4989, "step": 6433 }, { "epoch": 0.82, "grad_norm": 0.8762911447371399, "learning_rate": 1.656898993558007e-07, "loss": 0.6048, "step": 6434 }, { "epoch": 0.82, "grad_norm": 0.7340270326810643, "learning_rate": 1.6546247195115835e-07, "loss": 0.5098, "step": 6435 }, { "epoch": 0.82, "grad_norm": 0.9272776908177466, "learning_rate": 1.6523518665956282e-07, "loss": 0.6135, "step": 6436 }, { "epoch": 0.82, "grad_norm": 0.6040889990067481, "learning_rate": 1.6500804351971841e-07, "loss": 0.4972, "step": 6437 }, { "epoch": 0.82, "grad_norm": 0.661345947261647, "learning_rate": 1.6478104257030534e-07, "loss": 0.559, "step": 6438 }, { "epoch": 0.82, "grad_norm": 0.649652803496156, "learning_rate": 1.6455418384997954e-07, "loss": 0.5231, "step": 6439 }, { "epoch": 0.82, "grad_norm": 0.7995945062024263, "learning_rate": 1.6432746739737258e-07, "loss": 0.5921, "step": 6440 }, { "epoch": 0.82, "grad_norm": 0.8492162415656811, "learning_rate": 1.6410089325109167e-07, "loss": 0.5771, "step": 6441 }, { "epoch": 0.82, "grad_norm": 0.9078490728373156, "learning_rate": 1.638744614497206e-07, "loss": 0.6423, "step": 6442 }, { "epoch": 0.82, "grad_norm": 0.7413654105852527, "learning_rate": 1.6364817203181802e-07, "loss": 0.5211, "step": 6443 }, { "epoch": 0.82, "grad_norm": 0.8239655648294082, "learning_rate": 1.6342202503591873e-07, "loss": 0.6064, "step": 6444 }, { "epoch": 0.82, "grad_norm": 0.6670570384021737, "learning_rate": 1.6319602050053315e-07, "loss": 0.5126, "step": 6445 }, { "epoch": 0.82, "grad_norm": 0.7648876064152663, "learning_rate": 1.6297015846414753e-07, "loss": 0.59, "step": 6446 }, { "epoch": 0.82, "grad_norm": 0.8368277373518951, "learning_rate": 1.627444389652237e-07, "loss": 0.5981, "step": 6447 }, { "epoch": 0.82, "grad_norm": 0.5976086122367198, "learning_rate": 1.6251886204219956e-07, "loss": 0.5213, "step": 6448 }, { "epoch": 0.82, "grad_norm": 0.8258949888052496, "learning_rate": 1.6229342773348842e-07, "loss": 0.5766, "step": 6449 }, { "epoch": 0.82, "grad_norm": 0.7984499235273745, "learning_rate": 1.6206813607747938e-07, "loss": 0.5326, "step": 6450 }, { "epoch": 0.82, "grad_norm": 0.8518211429643289, "learning_rate": 1.6184298711253718e-07, "loss": 0.5654, "step": 6451 }, { "epoch": 0.82, "grad_norm": 0.6397122071272141, "learning_rate": 1.616179808770024e-07, "loss": 0.5184, "step": 6452 }, { "epoch": 0.82, "grad_norm": 0.630496127055483, "learning_rate": 1.6139311740919137e-07, "loss": 0.4745, "step": 6453 }, { "epoch": 0.82, "grad_norm": 0.7713550866422154, "learning_rate": 1.6116839674739578e-07, "loss": 0.5725, "step": 6454 }, { "epoch": 0.82, "grad_norm": 0.9324817635530495, "learning_rate": 1.609438189298835e-07, "loss": 0.5728, "step": 6455 }, { "epoch": 0.82, "grad_norm": 0.9914071622430473, "learning_rate": 1.607193839948975e-07, "loss": 0.6457, "step": 6456 }, { "epoch": 0.82, "grad_norm": 0.655805911409877, "learning_rate": 1.6049509198065692e-07, "loss": 0.4996, "step": 6457 }, { "epoch": 0.82, "grad_norm": 0.6184139576260695, "learning_rate": 1.6027094292535625e-07, "loss": 0.5512, "step": 6458 }, { "epoch": 0.82, "grad_norm": 0.7214272013046492, "learning_rate": 1.6004693686716574e-07, "loss": 0.5581, "step": 6459 }, { "epoch": 0.82, "grad_norm": 0.8772221301785722, "learning_rate": 1.5982307384423132e-07, "loss": 0.5602, "step": 6460 }, { "epoch": 0.82, "grad_norm": 0.854963003663283, "learning_rate": 1.5959935389467448e-07, "loss": 0.6465, "step": 6461 }, { "epoch": 0.82, "grad_norm": 0.8809321236222114, "learning_rate": 1.5937577705659254e-07, "loss": 0.6256, "step": 6462 }, { "epoch": 0.82, "grad_norm": 0.7255865413800601, "learning_rate": 1.5915234336805815e-07, "loss": 0.5249, "step": 6463 }, { "epoch": 0.82, "grad_norm": 0.7992754982921081, "learning_rate": 1.589290528671199e-07, "loss": 0.6538, "step": 6464 }, { "epoch": 0.82, "grad_norm": 0.873305641435741, "learning_rate": 1.5870590559180164e-07, "loss": 0.6024, "step": 6465 }, { "epoch": 0.82, "grad_norm": 0.838081913665831, "learning_rate": 1.5848290158010313e-07, "loss": 0.6351, "step": 6466 }, { "epoch": 0.82, "grad_norm": 0.7088100619406694, "learning_rate": 1.5826004086999932e-07, "loss": 0.5718, "step": 6467 }, { "epoch": 0.82, "grad_norm": 0.8293577427819205, "learning_rate": 1.5803732349944166e-07, "loss": 0.5441, "step": 6468 }, { "epoch": 0.82, "grad_norm": 0.9314062763345221, "learning_rate": 1.5781474950635633e-07, "loss": 0.5891, "step": 6469 }, { "epoch": 0.82, "grad_norm": 0.6529738743956238, "learning_rate": 1.5759231892864533e-07, "loss": 0.5498, "step": 6470 }, { "epoch": 0.82, "grad_norm": 0.7630417774586676, "learning_rate": 1.5737003180418628e-07, "loss": 0.5154, "step": 6471 }, { "epoch": 0.82, "grad_norm": 0.6866345975506298, "learning_rate": 1.5714788817083235e-07, "loss": 0.5353, "step": 6472 }, { "epoch": 0.82, "grad_norm": 0.6653141842795357, "learning_rate": 1.5692588806641228e-07, "loss": 0.583, "step": 6473 }, { "epoch": 0.82, "grad_norm": 0.8473097530843544, "learning_rate": 1.5670403152873036e-07, "loss": 0.6193, "step": 6474 }, { "epoch": 0.82, "grad_norm": 0.7435850532859277, "learning_rate": 1.564823185955665e-07, "loss": 0.5124, "step": 6475 }, { "epoch": 0.83, "grad_norm": 0.7896586640866586, "learning_rate": 1.5626074930467615e-07, "loss": 0.5009, "step": 6476 }, { "epoch": 0.83, "grad_norm": 0.6976975356759486, "learning_rate": 1.5603932369379002e-07, "loss": 0.5352, "step": 6477 }, { "epoch": 0.83, "grad_norm": 0.8325220740811106, "learning_rate": 1.5581804180061487e-07, "loss": 0.6602, "step": 6478 }, { "epoch": 0.83, "grad_norm": 0.9408500210225593, "learning_rate": 1.555969036628324e-07, "loss": 0.6169, "step": 6479 }, { "epoch": 0.83, "grad_norm": 0.8963494881602688, "learning_rate": 1.553759093181004e-07, "loss": 0.646, "step": 6480 }, { "epoch": 0.83, "grad_norm": 0.9014680548878518, "learning_rate": 1.551550588040519e-07, "loss": 0.5986, "step": 6481 }, { "epoch": 0.83, "grad_norm": 0.6472376954796372, "learning_rate": 1.549343521582953e-07, "loss": 0.4855, "step": 6482 }, { "epoch": 0.83, "grad_norm": 0.681807805793271, "learning_rate": 1.5471378941841474e-07, "loss": 0.5761, "step": 6483 }, { "epoch": 0.83, "grad_norm": 0.7122307063924425, "learning_rate": 1.5449337062196975e-07, "loss": 0.5398, "step": 6484 }, { "epoch": 0.83, "grad_norm": 0.842584534863215, "learning_rate": 1.5427309580649527e-07, "loss": 0.5483, "step": 6485 }, { "epoch": 0.83, "grad_norm": 0.6091861360422927, "learning_rate": 1.5405296500950182e-07, "loss": 0.4722, "step": 6486 }, { "epoch": 0.83, "grad_norm": 0.687036081919355, "learning_rate": 1.538329782684754e-07, "loss": 0.5438, "step": 6487 }, { "epoch": 0.83, "grad_norm": 0.8466948947588971, "learning_rate": 1.5361313562087764e-07, "loss": 0.6259, "step": 6488 }, { "epoch": 0.83, "grad_norm": 0.6992040618921059, "learning_rate": 1.5339343710414508e-07, "loss": 0.5537, "step": 6489 }, { "epoch": 0.83, "grad_norm": 0.8639102897517498, "learning_rate": 1.5317388275569044e-07, "loss": 0.6229, "step": 6490 }, { "epoch": 0.83, "grad_norm": 0.649975156685462, "learning_rate": 1.529544726129014e-07, "loss": 0.5119, "step": 6491 }, { "epoch": 0.83, "grad_norm": 0.755327241357558, "learning_rate": 1.527352067131411e-07, "loss": 0.5791, "step": 6492 }, { "epoch": 0.83, "grad_norm": 0.7029121462507827, "learning_rate": 1.5251608509374847e-07, "loss": 0.5323, "step": 6493 }, { "epoch": 0.83, "grad_norm": 0.8877752542191736, "learning_rate": 1.5229710779203742e-07, "loss": 0.6282, "step": 6494 }, { "epoch": 0.83, "grad_norm": 0.9334873840229414, "learning_rate": 1.5207827484529768e-07, "loss": 0.6141, "step": 6495 }, { "epoch": 0.83, "grad_norm": 0.6687585141221396, "learning_rate": 1.518595862907942e-07, "loss": 0.5169, "step": 6496 }, { "epoch": 0.83, "grad_norm": 0.792020114956357, "learning_rate": 1.5164104216576723e-07, "loss": 0.5918, "step": 6497 }, { "epoch": 0.83, "grad_norm": 0.7956780566363902, "learning_rate": 1.5142264250743265e-07, "loss": 0.6191, "step": 6498 }, { "epoch": 0.83, "grad_norm": 0.8872366557514223, "learning_rate": 1.5120438735298135e-07, "loss": 0.6191, "step": 6499 }, { "epoch": 0.83, "grad_norm": 0.6983377532530025, "learning_rate": 1.5098627673958054e-07, "loss": 0.535, "step": 6500 }, { "epoch": 0.83, "grad_norm": 0.8204140662095852, "learning_rate": 1.507683107043718e-07, "loss": 0.5886, "step": 6501 }, { "epoch": 0.83, "grad_norm": 0.8971501688744877, "learning_rate": 1.5055048928447256e-07, "loss": 0.6626, "step": 6502 }, { "epoch": 0.83, "grad_norm": 0.8648254248438829, "learning_rate": 1.503328125169754e-07, "loss": 0.7023, "step": 6503 }, { "epoch": 0.83, "grad_norm": 0.8068553009722405, "learning_rate": 1.501152804389485e-07, "loss": 0.5585, "step": 6504 }, { "epoch": 0.83, "grad_norm": 0.6309762813601055, "learning_rate": 1.498978930874354e-07, "loss": 0.6116, "step": 6505 }, { "epoch": 0.83, "grad_norm": 0.6222580135887993, "learning_rate": 1.4968065049945478e-07, "loss": 0.4785, "step": 6506 }, { "epoch": 0.83, "grad_norm": 0.8481866242584207, "learning_rate": 1.4946355271200074e-07, "loss": 0.6318, "step": 6507 }, { "epoch": 0.83, "grad_norm": 0.9584963075347593, "learning_rate": 1.492465997620429e-07, "loss": 0.6556, "step": 6508 }, { "epoch": 0.83, "grad_norm": 1.6162561214865427, "learning_rate": 1.4902979168652607e-07, "loss": 0.5706, "step": 6509 }, { "epoch": 0.83, "grad_norm": 0.7648698987514884, "learning_rate": 1.4881312852237026e-07, "loss": 0.5659, "step": 6510 }, { "epoch": 0.83, "grad_norm": 0.7030439748636484, "learning_rate": 1.485966103064712e-07, "loss": 0.5465, "step": 6511 }, { "epoch": 0.83, "grad_norm": 0.667857675456393, "learning_rate": 1.4838023707569946e-07, "loss": 0.5491, "step": 6512 }, { "epoch": 0.83, "grad_norm": 0.7100249374472007, "learning_rate": 1.4816400886690127e-07, "loss": 0.5057, "step": 6513 }, { "epoch": 0.83, "grad_norm": 0.7514819565522591, "learning_rate": 1.4794792571689797e-07, "loss": 0.5868, "step": 6514 }, { "epoch": 0.83, "grad_norm": 0.7257981876811831, "learning_rate": 1.477319876624864e-07, "loss": 0.5318, "step": 6515 }, { "epoch": 0.83, "grad_norm": 0.8036673745491099, "learning_rate": 1.4751619474043843e-07, "loss": 0.6503, "step": 6516 }, { "epoch": 0.83, "grad_norm": 0.6741845464808413, "learning_rate": 1.4730054698750126e-07, "loss": 0.5379, "step": 6517 }, { "epoch": 0.83, "grad_norm": 0.7783101949147241, "learning_rate": 1.470850444403977e-07, "loss": 0.5397, "step": 6518 }, { "epoch": 0.83, "grad_norm": 0.8422940959410613, "learning_rate": 1.4686968713582549e-07, "loss": 0.5795, "step": 6519 }, { "epoch": 0.83, "grad_norm": 0.872014422020499, "learning_rate": 1.4665447511045758e-07, "loss": 0.7028, "step": 6520 }, { "epoch": 0.83, "grad_norm": 0.7123893456551749, "learning_rate": 1.464394084009426e-07, "loss": 0.5459, "step": 6521 }, { "epoch": 0.83, "grad_norm": 0.8817654664448331, "learning_rate": 1.46224487043904e-07, "loss": 0.6513, "step": 6522 }, { "epoch": 0.83, "grad_norm": 0.9435918283860236, "learning_rate": 1.4600971107594074e-07, "loss": 0.6788, "step": 6523 }, { "epoch": 0.83, "grad_norm": 0.9313283743227634, "learning_rate": 1.4579508053362677e-07, "loss": 0.6509, "step": 6524 }, { "epoch": 0.83, "grad_norm": 0.7585167595958451, "learning_rate": 1.4558059545351142e-07, "loss": 0.585, "step": 6525 }, { "epoch": 0.83, "grad_norm": 0.7899481298766209, "learning_rate": 1.4536625587211947e-07, "loss": 0.6273, "step": 6526 }, { "epoch": 0.83, "grad_norm": 0.8742309760882022, "learning_rate": 1.4515206182595073e-07, "loss": 0.5328, "step": 6527 }, { "epoch": 0.83, "grad_norm": 0.7876055280872883, "learning_rate": 1.4493801335148015e-07, "loss": 0.5582, "step": 6528 }, { "epoch": 0.83, "grad_norm": 0.777145513648088, "learning_rate": 1.4472411048515786e-07, "loss": 0.5226, "step": 6529 }, { "epoch": 0.83, "grad_norm": 0.736675628086677, "learning_rate": 1.4451035326340943e-07, "loss": 0.5286, "step": 6530 }, { "epoch": 0.83, "grad_norm": 0.7234362508432582, "learning_rate": 1.4429674172263538e-07, "loss": 0.5426, "step": 6531 }, { "epoch": 0.83, "grad_norm": 0.943305132926063, "learning_rate": 1.4408327589921155e-07, "loss": 0.6382, "step": 6532 }, { "epoch": 0.83, "grad_norm": 0.7693478465087069, "learning_rate": 1.438699558294889e-07, "loss": 0.5437, "step": 6533 }, { "epoch": 0.83, "grad_norm": 0.925944123753023, "learning_rate": 1.4365678154979376e-07, "loss": 0.6312, "step": 6534 }, { "epoch": 0.83, "grad_norm": 0.7165278339926276, "learning_rate": 1.4344375309642742e-07, "loss": 0.5477, "step": 6535 }, { "epoch": 0.83, "grad_norm": 0.8518564301080275, "learning_rate": 1.4323087050566628e-07, "loss": 0.6002, "step": 6536 }, { "epoch": 0.83, "grad_norm": 0.982185612662202, "learning_rate": 1.4301813381376226e-07, "loss": 0.6417, "step": 6537 }, { "epoch": 0.83, "grad_norm": 0.8347147269926177, "learning_rate": 1.4280554305694203e-07, "loss": 0.5943, "step": 6538 }, { "epoch": 0.83, "grad_norm": 0.814560850972755, "learning_rate": 1.4259309827140764e-07, "loss": 0.6051, "step": 6539 }, { "epoch": 0.83, "grad_norm": 1.113341492585925, "learning_rate": 1.4238079949333615e-07, "loss": 0.6408, "step": 6540 }, { "epoch": 0.83, "grad_norm": 0.8238855606112827, "learning_rate": 1.4216864675887997e-07, "loss": 0.5631, "step": 6541 }, { "epoch": 0.83, "grad_norm": 1.0253125235743323, "learning_rate": 1.4195664010416641e-07, "loss": 0.6234, "step": 6542 }, { "epoch": 0.83, "grad_norm": 0.649662714718655, "learning_rate": 1.4174477956529795e-07, "loss": 0.5368, "step": 6543 }, { "epoch": 0.83, "grad_norm": 0.8377916733057882, "learning_rate": 1.415330651783523e-07, "loss": 0.5323, "step": 6544 }, { "epoch": 0.83, "grad_norm": 0.9085764746768034, "learning_rate": 1.4132149697938221e-07, "loss": 0.6741, "step": 6545 }, { "epoch": 0.83, "grad_norm": 0.7606017187112979, "learning_rate": 1.4111007500441564e-07, "loss": 0.5638, "step": 6546 }, { "epoch": 0.83, "grad_norm": 0.789828875096549, "learning_rate": 1.4089879928945536e-07, "loss": 0.6151, "step": 6547 }, { "epoch": 0.83, "grad_norm": 0.8561631290638471, "learning_rate": 1.4068766987047952e-07, "loss": 0.6275, "step": 6548 }, { "epoch": 0.83, "grad_norm": 0.8636281879678054, "learning_rate": 1.4047668678344125e-07, "loss": 0.6171, "step": 6549 }, { "epoch": 0.83, "grad_norm": 0.7077571834976603, "learning_rate": 1.4026585006426884e-07, "loss": 0.5741, "step": 6550 }, { "epoch": 0.83, "grad_norm": 4.345115247163805, "learning_rate": 1.400551597488654e-07, "loss": 0.658, "step": 6551 }, { "epoch": 0.83, "grad_norm": 0.63129245352723, "learning_rate": 1.3984461587310925e-07, "loss": 0.4978, "step": 6552 }, { "epoch": 0.83, "grad_norm": 1.0860823177194958, "learning_rate": 1.3963421847285417e-07, "loss": 0.6435, "step": 6553 }, { "epoch": 0.83, "grad_norm": 0.8203381484315194, "learning_rate": 1.394239675839285e-07, "loss": 0.6694, "step": 6554 }, { "epoch": 0.84, "grad_norm": 0.6810333823363631, "learning_rate": 1.392138632421358e-07, "loss": 0.5704, "step": 6555 }, { "epoch": 0.84, "grad_norm": 0.7186016607021293, "learning_rate": 1.3900390548325447e-07, "loss": 0.5295, "step": 6556 }, { "epoch": 0.84, "grad_norm": 0.6262430513637797, "learning_rate": 1.3879409434303825e-07, "loss": 0.5101, "step": 6557 }, { "epoch": 0.84, "grad_norm": 0.9365582301978889, "learning_rate": 1.3858442985721586e-07, "loss": 0.6707, "step": 6558 }, { "epoch": 0.84, "grad_norm": 0.6925915340299651, "learning_rate": 1.3837491206149088e-07, "loss": 0.51, "step": 6559 }, { "epoch": 0.84, "grad_norm": 0.6342401201483552, "learning_rate": 1.3816554099154198e-07, "loss": 0.5631, "step": 6560 }, { "epoch": 0.84, "grad_norm": 0.6756196714090805, "learning_rate": 1.37956316683023e-07, "loss": 0.5132, "step": 6561 }, { "epoch": 0.84, "grad_norm": 0.8126883442128703, "learning_rate": 1.377472391715625e-07, "loss": 0.6157, "step": 6562 }, { "epoch": 0.84, "grad_norm": 0.8588731426247326, "learning_rate": 1.3753830849276428e-07, "loss": 0.6847, "step": 6563 }, { "epoch": 0.84, "grad_norm": 0.809003457832747, "learning_rate": 1.3732952468220716e-07, "loss": 0.6098, "step": 6564 }, { "epoch": 0.84, "grad_norm": 0.6594047518370252, "learning_rate": 1.3712088777544462e-07, "loss": 0.5286, "step": 6565 }, { "epoch": 0.84, "grad_norm": 0.8988460711217129, "learning_rate": 1.3691239780800557e-07, "loss": 0.6244, "step": 6566 }, { "epoch": 0.84, "grad_norm": 0.7264885947235417, "learning_rate": 1.367040548153936e-07, "loss": 0.594, "step": 6567 }, { "epoch": 0.84, "grad_norm": 0.6736899763373626, "learning_rate": 1.3649585883308734e-07, "loss": 0.5066, "step": 6568 }, { "epoch": 0.84, "grad_norm": 0.7554583611591057, "learning_rate": 1.362878098965403e-07, "loss": 0.4949, "step": 6569 }, { "epoch": 0.84, "grad_norm": 0.6796017835448974, "learning_rate": 1.360799080411813e-07, "loss": 0.6228, "step": 6570 }, { "epoch": 0.84, "grad_norm": 0.7803923320094418, "learning_rate": 1.3587215330241365e-07, "loss": 0.585, "step": 6571 }, { "epoch": 0.84, "grad_norm": 0.8553407627882796, "learning_rate": 1.3566454571561582e-07, "loss": 0.6361, "step": 6572 }, { "epoch": 0.84, "grad_norm": 0.8012576759831905, "learning_rate": 1.354570853161413e-07, "loss": 0.6001, "step": 6573 }, { "epoch": 0.84, "grad_norm": 0.731128933531591, "learning_rate": 1.3524977213931843e-07, "loss": 0.5697, "step": 6574 }, { "epoch": 0.84, "grad_norm": 0.7872161738737213, "learning_rate": 1.350426062204505e-07, "loss": 0.6165, "step": 6575 }, { "epoch": 0.84, "grad_norm": 0.6933003117401341, "learning_rate": 1.3483558759481561e-07, "loss": 0.5444, "step": 6576 }, { "epoch": 0.84, "grad_norm": 0.6910999712129957, "learning_rate": 1.3462871629766692e-07, "loss": 0.5722, "step": 6577 }, { "epoch": 0.84, "grad_norm": 1.0973078411389914, "learning_rate": 1.3442199236423225e-07, "loss": 0.5955, "step": 6578 }, { "epoch": 0.84, "grad_norm": 0.6472127559721503, "learning_rate": 1.3421541582971497e-07, "loss": 0.5018, "step": 6579 }, { "epoch": 0.84, "grad_norm": 0.8270865354894193, "learning_rate": 1.3400898672929272e-07, "loss": 0.6506, "step": 6580 }, { "epoch": 0.84, "grad_norm": 0.7976147252189003, "learning_rate": 1.338027050981183e-07, "loss": 0.5201, "step": 6581 }, { "epoch": 0.84, "grad_norm": 0.7405648149171674, "learning_rate": 1.3359657097131905e-07, "loss": 0.5778, "step": 6582 }, { "epoch": 0.84, "grad_norm": 0.6801264794094015, "learning_rate": 1.3339058438399752e-07, "loss": 0.5348, "step": 6583 }, { "epoch": 0.84, "grad_norm": 0.646340423713667, "learning_rate": 1.3318474537123137e-07, "loss": 0.567, "step": 6584 }, { "epoch": 0.84, "grad_norm": 0.7109740501792439, "learning_rate": 1.3297905396807273e-07, "loss": 0.5903, "step": 6585 }, { "epoch": 0.84, "grad_norm": 1.0232668221402046, "learning_rate": 1.3277351020954864e-07, "loss": 0.588, "step": 6586 }, { "epoch": 0.84, "grad_norm": 0.6259289048602668, "learning_rate": 1.3256811413066104e-07, "loss": 0.5054, "step": 6587 }, { "epoch": 0.84, "grad_norm": 0.791669412735896, "learning_rate": 1.3236286576638667e-07, "loss": 0.622, "step": 6588 }, { "epoch": 0.84, "grad_norm": 0.7194127365164742, "learning_rate": 1.3215776515167743e-07, "loss": 0.5322, "step": 6589 }, { "epoch": 0.84, "grad_norm": 0.9017032117481077, "learning_rate": 1.3195281232145961e-07, "loss": 0.6096, "step": 6590 }, { "epoch": 0.84, "grad_norm": 0.9121874485161451, "learning_rate": 1.317480073106345e-07, "loss": 0.6584, "step": 6591 }, { "epoch": 0.84, "grad_norm": 0.7457618736352661, "learning_rate": 1.3154335015407847e-07, "loss": 0.5421, "step": 6592 }, { "epoch": 0.84, "grad_norm": 0.9045701643052891, "learning_rate": 1.313388408866424e-07, "loss": 0.6081, "step": 6593 }, { "epoch": 0.84, "grad_norm": 0.9323097142890862, "learning_rate": 1.3113447954315205e-07, "loss": 0.6649, "step": 6594 }, { "epoch": 0.84, "grad_norm": 0.8931746042883778, "learning_rate": 1.3093026615840809e-07, "loss": 0.6516, "step": 6595 }, { "epoch": 0.84, "grad_norm": 0.7415344973990833, "learning_rate": 1.307262007671859e-07, "loss": 0.5884, "step": 6596 }, { "epoch": 0.84, "grad_norm": 0.7973027568681745, "learning_rate": 1.3052228340423565e-07, "loss": 0.5734, "step": 6597 }, { "epoch": 0.84, "grad_norm": 0.7061783740926056, "learning_rate": 1.303185141042824e-07, "loss": 0.5592, "step": 6598 }, { "epoch": 0.84, "grad_norm": 0.7744447772824435, "learning_rate": 1.3011489290202593e-07, "loss": 0.6237, "step": 6599 }, { "epoch": 0.84, "grad_norm": 0.6878177373660196, "learning_rate": 1.2991141983214072e-07, "loss": 0.5688, "step": 6600 }, { "epoch": 0.84, "grad_norm": 0.7995798396023718, "learning_rate": 1.2970809492927626e-07, "loss": 0.5606, "step": 6601 }, { "epoch": 0.84, "grad_norm": 0.902353775933625, "learning_rate": 1.2950491822805665e-07, "loss": 0.6286, "step": 6602 }, { "epoch": 0.84, "grad_norm": 0.7842112889824372, "learning_rate": 1.2930188976308065e-07, "loss": 0.5938, "step": 6603 }, { "epoch": 0.84, "grad_norm": 1.1880893590675192, "learning_rate": 1.2909900956892195e-07, "loss": 0.6885, "step": 6604 }, { "epoch": 0.84, "grad_norm": 0.7075721410512502, "learning_rate": 1.28896277680129e-07, "loss": 0.568, "step": 6605 }, { "epoch": 0.84, "grad_norm": 0.8686764695268824, "learning_rate": 1.286936941312249e-07, "loss": 0.615, "step": 6606 }, { "epoch": 0.84, "grad_norm": 0.6797875534778155, "learning_rate": 1.2849125895670732e-07, "loss": 0.539, "step": 6607 }, { "epoch": 0.84, "grad_norm": 0.9561620123645299, "learning_rate": 1.2828897219104918e-07, "loss": 0.6255, "step": 6608 }, { "epoch": 0.84, "grad_norm": 0.6576725219393901, "learning_rate": 1.2808683386869757e-07, "loss": 0.5053, "step": 6609 }, { "epoch": 0.84, "grad_norm": 0.8602697108693139, "learning_rate": 1.278848440240744e-07, "loss": 0.6381, "step": 6610 }, { "epoch": 0.84, "grad_norm": 0.7007164457034769, "learning_rate": 1.2768300269157694e-07, "loss": 0.5356, "step": 6611 }, { "epoch": 0.84, "grad_norm": 0.6900494935389696, "learning_rate": 1.2748130990557638e-07, "loss": 0.5185, "step": 6612 }, { "epoch": 0.84, "grad_norm": 0.9019610861536197, "learning_rate": 1.2727976570041888e-07, "loss": 0.5171, "step": 6613 }, { "epoch": 0.84, "grad_norm": 0.9003308091236472, "learning_rate": 1.2707837011042545e-07, "loss": 0.6327, "step": 6614 }, { "epoch": 0.84, "grad_norm": 0.6655901590600044, "learning_rate": 1.268771231698914e-07, "loss": 0.524, "step": 6615 }, { "epoch": 0.84, "grad_norm": 0.6637540631482273, "learning_rate": 1.2667602491308727e-07, "loss": 0.5023, "step": 6616 }, { "epoch": 0.84, "grad_norm": 0.8584162978628107, "learning_rate": 1.2647507537425784e-07, "loss": 0.6208, "step": 6617 }, { "epoch": 0.84, "grad_norm": 2.9482405812779877, "learning_rate": 1.2627427458762275e-07, "loss": 0.6399, "step": 6618 }, { "epoch": 0.84, "grad_norm": 0.8778367638183109, "learning_rate": 1.260736225873762e-07, "loss": 0.6317, "step": 6619 }, { "epoch": 0.84, "grad_norm": 0.7334119359015416, "learning_rate": 1.258731194076873e-07, "loss": 0.577, "step": 6620 }, { "epoch": 0.84, "grad_norm": 0.8178849640619678, "learning_rate": 1.2567276508269965e-07, "loss": 0.6157, "step": 6621 }, { "epoch": 0.84, "grad_norm": 0.7374329202968979, "learning_rate": 1.2547255964653137e-07, "loss": 0.5201, "step": 6622 }, { "epoch": 0.84, "grad_norm": 0.7011504105362326, "learning_rate": 1.2527250313327543e-07, "loss": 0.5415, "step": 6623 }, { "epoch": 0.84, "grad_norm": 0.7193264607394868, "learning_rate": 1.2507259557699933e-07, "loss": 0.5295, "step": 6624 }, { "epoch": 0.84, "grad_norm": 0.7737404606027619, "learning_rate": 1.2487283701174522e-07, "loss": 0.549, "step": 6625 }, { "epoch": 0.84, "grad_norm": 0.7787757083245738, "learning_rate": 1.2467322747153008e-07, "loss": 0.6091, "step": 6626 }, { "epoch": 0.84, "grad_norm": 0.7599066282218115, "learning_rate": 1.2447376699034518e-07, "loss": 0.5127, "step": 6627 }, { "epoch": 0.84, "grad_norm": 0.6251734130236115, "learning_rate": 1.2427445560215654e-07, "loss": 0.5376, "step": 6628 }, { "epoch": 0.84, "grad_norm": 0.9058772093256446, "learning_rate": 1.2407529334090494e-07, "loss": 0.6395, "step": 6629 }, { "epoch": 0.84, "grad_norm": 0.6487502377371414, "learning_rate": 1.2387628024050556e-07, "loss": 0.551, "step": 6630 }, { "epoch": 0.84, "grad_norm": 0.7243707741245649, "learning_rate": 1.236774163348482e-07, "loss": 0.51, "step": 6631 }, { "epoch": 0.84, "grad_norm": 0.8853909691891717, "learning_rate": 1.2347870165779739e-07, "loss": 0.6221, "step": 6632 }, { "epoch": 0.85, "grad_norm": 0.6470700735591604, "learning_rate": 1.2328013624319223e-07, "loss": 0.482, "step": 6633 }, { "epoch": 0.85, "grad_norm": 0.5932581835428871, "learning_rate": 1.2308172012484618e-07, "loss": 0.4663, "step": 6634 }, { "epoch": 0.85, "grad_norm": 0.7669032840159081, "learning_rate": 1.2288345333654758e-07, "loss": 0.5212, "step": 6635 }, { "epoch": 0.85, "grad_norm": 0.8486069859351337, "learning_rate": 1.2268533591205889e-07, "loss": 0.6351, "step": 6636 }, { "epoch": 0.85, "grad_norm": 0.7400250037729184, "learning_rate": 1.2248736788511794e-07, "loss": 0.5091, "step": 6637 }, { "epoch": 0.85, "grad_norm": 0.8382119795140021, "learning_rate": 1.2228954928943634e-07, "loss": 0.6198, "step": 6638 }, { "epoch": 0.85, "grad_norm": 0.889488081623315, "learning_rate": 1.2209188015870064e-07, "loss": 0.6795, "step": 6639 }, { "epoch": 0.85, "grad_norm": 0.953436924447045, "learning_rate": 1.2189436052657164e-07, "loss": 0.5815, "step": 6640 }, { "epoch": 0.85, "grad_norm": 0.7016326675850533, "learning_rate": 1.2169699042668514e-07, "loss": 0.5207, "step": 6641 }, { "epoch": 0.85, "grad_norm": 0.7439956264510875, "learning_rate": 1.2149976989265097e-07, "loss": 0.5763, "step": 6642 }, { "epoch": 0.85, "grad_norm": 0.9074550608377534, "learning_rate": 1.213026989580539e-07, "loss": 0.6486, "step": 6643 }, { "epoch": 0.85, "grad_norm": 0.8841208838313453, "learning_rate": 1.2110577765645303e-07, "loss": 0.6226, "step": 6644 }, { "epoch": 0.85, "grad_norm": 0.7875208315192488, "learning_rate": 1.2090900602138198e-07, "loss": 0.4861, "step": 6645 }, { "epoch": 0.85, "grad_norm": 0.7897848812241941, "learning_rate": 1.2071238408634875e-07, "loss": 0.5597, "step": 6646 }, { "epoch": 0.85, "grad_norm": 0.6734837743998734, "learning_rate": 1.2051591188483633e-07, "loss": 0.5519, "step": 6647 }, { "epoch": 0.85, "grad_norm": 0.6516856111343156, "learning_rate": 1.2031958945030164e-07, "loss": 0.5175, "step": 6648 }, { "epoch": 0.85, "grad_norm": 0.9139426363160901, "learning_rate": 1.2012341681617643e-07, "loss": 0.6112, "step": 6649 }, { "epoch": 0.85, "grad_norm": 0.8736849144145844, "learning_rate": 1.1992739401586682e-07, "loss": 0.6887, "step": 6650 }, { "epoch": 0.85, "grad_norm": 0.6141920765886194, "learning_rate": 1.1973152108275352e-07, "loss": 0.5306, "step": 6651 }, { "epoch": 0.85, "grad_norm": 0.954838920264758, "learning_rate": 1.1953579805019165e-07, "loss": 0.6632, "step": 6652 }, { "epoch": 0.85, "grad_norm": 1.0271555591763564, "learning_rate": 1.1934022495151065e-07, "loss": 0.6309, "step": 6653 }, { "epoch": 0.85, "grad_norm": 0.6437111020352964, "learning_rate": 1.1914480182001475e-07, "loss": 0.5644, "step": 6654 }, { "epoch": 0.85, "grad_norm": 0.6737005848950873, "learning_rate": 1.1894952868898245e-07, "loss": 0.5591, "step": 6655 }, { "epoch": 0.85, "grad_norm": 0.6195736235668116, "learning_rate": 1.1875440559166672e-07, "loss": 0.513, "step": 6656 }, { "epoch": 0.85, "grad_norm": 1.0443924884149405, "learning_rate": 1.1855943256129485e-07, "loss": 0.577, "step": 6657 }, { "epoch": 0.85, "grad_norm": 0.9480243516281474, "learning_rate": 1.1836460963106887e-07, "loss": 0.7036, "step": 6658 }, { "epoch": 0.85, "grad_norm": 0.8349313421226502, "learning_rate": 1.181699368341651e-07, "loss": 0.6673, "step": 6659 }, { "epoch": 0.85, "grad_norm": 0.6178351783855008, "learning_rate": 1.1797541420373424e-07, "loss": 0.5053, "step": 6660 }, { "epoch": 0.85, "grad_norm": 0.839456525068892, "learning_rate": 1.1778104177290149e-07, "loss": 0.5853, "step": 6661 }, { "epoch": 0.85, "grad_norm": 1.0201704392513333, "learning_rate": 1.1758681957476623e-07, "loss": 0.5908, "step": 6662 }, { "epoch": 0.85, "grad_norm": 0.8626420618788531, "learning_rate": 1.1739274764240282e-07, "loss": 0.5979, "step": 6663 }, { "epoch": 0.85, "grad_norm": 0.7453625560484749, "learning_rate": 1.1719882600885967e-07, "loss": 0.5746, "step": 6664 }, { "epoch": 0.85, "grad_norm": 0.6674614786355566, "learning_rate": 1.1700505470715938e-07, "loss": 0.5521, "step": 6665 }, { "epoch": 0.85, "grad_norm": 0.7203034476137303, "learning_rate": 1.1681143377029934e-07, "loss": 0.5599, "step": 6666 }, { "epoch": 0.85, "grad_norm": 0.9124766647135, "learning_rate": 1.1661796323125117e-07, "loss": 0.625, "step": 6667 }, { "epoch": 0.85, "grad_norm": 0.7487601393714207, "learning_rate": 1.1642464312296074e-07, "loss": 0.5108, "step": 6668 }, { "epoch": 0.85, "grad_norm": 0.6626124685098754, "learning_rate": 1.1623147347834871e-07, "loss": 0.5573, "step": 6669 }, { "epoch": 0.85, "grad_norm": 0.7639055273102708, "learning_rate": 1.1603845433030967e-07, "loss": 0.5677, "step": 6670 }, { "epoch": 0.85, "grad_norm": 0.7295418202806963, "learning_rate": 1.1584558571171277e-07, "loss": 0.5922, "step": 6671 }, { "epoch": 0.85, "grad_norm": 0.5666244146704841, "learning_rate": 1.1565286765540161e-07, "loss": 0.4978, "step": 6672 }, { "epoch": 0.85, "grad_norm": 0.6774337091699196, "learning_rate": 1.1546030019419396e-07, "loss": 0.5196, "step": 6673 }, { "epoch": 0.85, "grad_norm": 0.8641139739513589, "learning_rate": 1.1526788336088222e-07, "loss": 0.5155, "step": 6674 }, { "epoch": 0.85, "grad_norm": 0.8363027669486846, "learning_rate": 1.1507561718823277e-07, "loss": 0.6075, "step": 6675 }, { "epoch": 0.85, "grad_norm": 0.8787427159601146, "learning_rate": 1.1488350170898675e-07, "loss": 0.6419, "step": 6676 }, { "epoch": 0.85, "grad_norm": 0.8447805169335512, "learning_rate": 1.146915369558592e-07, "loss": 0.6523, "step": 6677 }, { "epoch": 0.85, "grad_norm": 0.8384500708112046, "learning_rate": 1.1449972296153987e-07, "loss": 0.6211, "step": 6678 }, { "epoch": 0.85, "grad_norm": 0.8155321378313181, "learning_rate": 1.1430805975869262e-07, "loss": 0.6251, "step": 6679 }, { "epoch": 0.85, "grad_norm": 0.911429919984612, "learning_rate": 1.1411654737995579e-07, "loss": 0.5734, "step": 6680 }, { "epoch": 0.85, "grad_norm": 0.746567988582566, "learning_rate": 1.1392518585794186e-07, "loss": 0.5465, "step": 6681 }, { "epoch": 0.85, "grad_norm": 0.6976705153159293, "learning_rate": 1.1373397522523742e-07, "loss": 0.5425, "step": 6682 }, { "epoch": 0.85, "grad_norm": 0.712378668253382, "learning_rate": 1.1354291551440431e-07, "loss": 0.5517, "step": 6683 }, { "epoch": 0.85, "grad_norm": 2.9491721928659422, "learning_rate": 1.1335200675797751e-07, "loss": 0.6098, "step": 6684 }, { "epoch": 0.85, "grad_norm": 0.9076368822175975, "learning_rate": 1.1316124898846702e-07, "loss": 0.5922, "step": 6685 }, { "epoch": 0.85, "grad_norm": 0.7155415984720567, "learning_rate": 1.1297064223835672e-07, "loss": 0.5271, "step": 6686 }, { "epoch": 0.85, "grad_norm": 0.7594468891339862, "learning_rate": 1.1278018654010513e-07, "loss": 0.5371, "step": 6687 }, { "epoch": 0.85, "grad_norm": 1.0190801826453102, "learning_rate": 1.1258988192614471e-07, "loss": 0.5833, "step": 6688 }, { "epoch": 0.85, "grad_norm": 0.7339736667083511, "learning_rate": 1.1239972842888246e-07, "loss": 0.5326, "step": 6689 }, { "epoch": 0.85, "grad_norm": 0.8338657461801464, "learning_rate": 1.1220972608069946e-07, "loss": 0.6, "step": 6690 }, { "epoch": 0.85, "grad_norm": 0.7041980556719474, "learning_rate": 1.1201987491395126e-07, "loss": 0.5693, "step": 6691 }, { "epoch": 0.85, "grad_norm": 0.8706531457370236, "learning_rate": 1.1183017496096736e-07, "loss": 0.6453, "step": 6692 }, { "epoch": 0.85, "grad_norm": 0.6890933788648442, "learning_rate": 1.1164062625405191e-07, "loss": 0.5388, "step": 6693 }, { "epoch": 0.85, "grad_norm": 0.8628548936245998, "learning_rate": 1.1145122882548263e-07, "loss": 0.6328, "step": 6694 }, { "epoch": 0.85, "grad_norm": 0.7744261864043575, "learning_rate": 1.1126198270751241e-07, "loss": 0.5908, "step": 6695 }, { "epoch": 0.85, "grad_norm": 0.7732611108477915, "learning_rate": 1.110728879323678e-07, "loss": 0.5533, "step": 6696 }, { "epoch": 0.85, "grad_norm": 0.6085845797745635, "learning_rate": 1.108839445322497e-07, "loss": 0.5499, "step": 6697 }, { "epoch": 0.85, "grad_norm": 0.8152828133194631, "learning_rate": 1.1069515253933292e-07, "loss": 0.5925, "step": 6698 }, { "epoch": 0.85, "grad_norm": 0.8523757693168615, "learning_rate": 1.1050651198576711e-07, "loss": 0.6044, "step": 6699 }, { "epoch": 0.85, "grad_norm": 0.9048013996429554, "learning_rate": 1.1031802290367553e-07, "loss": 0.6124, "step": 6700 }, { "epoch": 0.85, "grad_norm": 0.6768827886691475, "learning_rate": 1.1012968532515598e-07, "loss": 0.5274, "step": 6701 }, { "epoch": 0.85, "grad_norm": 0.7129570815656142, "learning_rate": 1.0994149928228047e-07, "loss": 0.5526, "step": 6702 }, { "epoch": 0.85, "grad_norm": 0.680973949174689, "learning_rate": 1.0975346480709512e-07, "loss": 0.5859, "step": 6703 }, { "epoch": 0.85, "grad_norm": 0.8397369873679229, "learning_rate": 1.0956558193162014e-07, "loss": 0.6224, "step": 6704 }, { "epoch": 0.85, "grad_norm": 0.9432661576334819, "learning_rate": 1.093778506878501e-07, "loss": 0.6018, "step": 6705 }, { "epoch": 0.85, "grad_norm": 0.6928328038403601, "learning_rate": 1.0919027110775358e-07, "loss": 0.4614, "step": 6706 }, { "epoch": 0.85, "grad_norm": 0.9027244651066589, "learning_rate": 1.0900284322327358e-07, "loss": 0.6275, "step": 6707 }, { "epoch": 0.85, "grad_norm": 0.8266825092053888, "learning_rate": 1.08815567066327e-07, "loss": 0.5362, "step": 6708 }, { "epoch": 0.85, "grad_norm": 0.8748823078427032, "learning_rate": 1.086284426688051e-07, "loss": 0.6195, "step": 6709 }, { "epoch": 0.85, "grad_norm": 0.6129085600296452, "learning_rate": 1.084414700625731e-07, "loss": 0.5331, "step": 6710 }, { "epoch": 0.85, "grad_norm": 0.9133326556204208, "learning_rate": 1.0825464927947059e-07, "loss": 0.6493, "step": 6711 }, { "epoch": 0.86, "grad_norm": 0.7360747300532534, "learning_rate": 1.0806798035131126e-07, "loss": 0.5375, "step": 6712 }, { "epoch": 0.86, "grad_norm": 0.8988102722728267, "learning_rate": 1.0788146330988269e-07, "loss": 0.651, "step": 6713 }, { "epoch": 0.86, "grad_norm": 0.7607042658100939, "learning_rate": 1.0769509818694689e-07, "loss": 0.5714, "step": 6714 }, { "epoch": 0.86, "grad_norm": 0.8142816991598115, "learning_rate": 1.0750888501423994e-07, "loss": 0.5754, "step": 6715 }, { "epoch": 0.86, "grad_norm": 0.6766259696647785, "learning_rate": 1.0732282382347202e-07, "loss": 0.517, "step": 6716 }, { "epoch": 0.86, "grad_norm": 0.9160529022852956, "learning_rate": 1.0713691464632735e-07, "loss": 0.6545, "step": 6717 }, { "epoch": 0.86, "grad_norm": 0.8144254014278861, "learning_rate": 1.0695115751446426e-07, "loss": 0.5743, "step": 6718 }, { "epoch": 0.86, "grad_norm": 0.7499536822694304, "learning_rate": 1.0676555245951535e-07, "loss": 0.5662, "step": 6719 }, { "epoch": 0.86, "grad_norm": 0.8724351031869012, "learning_rate": 1.0658009951308722e-07, "loss": 0.6212, "step": 6720 }, { "epoch": 0.86, "grad_norm": 0.9128834690329203, "learning_rate": 1.0639479870676038e-07, "loss": 0.6287, "step": 6721 }, { "epoch": 0.86, "grad_norm": 0.6103191872658422, "learning_rate": 1.0620965007208993e-07, "loss": 0.4862, "step": 6722 }, { "epoch": 0.86, "grad_norm": 0.6551613737967841, "learning_rate": 1.0602465364060475e-07, "loss": 0.5272, "step": 6723 }, { "epoch": 0.86, "grad_norm": 0.8371605329167151, "learning_rate": 1.0583980944380755e-07, "loss": 0.5837, "step": 6724 }, { "epoch": 0.86, "grad_norm": 0.7277728065936244, "learning_rate": 1.0565511751317558e-07, "loss": 0.529, "step": 6725 }, { "epoch": 0.86, "grad_norm": 0.7643517966513762, "learning_rate": 1.054705778801599e-07, "loss": 0.5615, "step": 6726 }, { "epoch": 0.86, "grad_norm": 0.9436863124961647, "learning_rate": 1.0528619057618571e-07, "loss": 0.6281, "step": 6727 }, { "epoch": 0.86, "grad_norm": 0.828605408227628, "learning_rate": 1.0510195563265212e-07, "loss": 0.5317, "step": 6728 }, { "epoch": 0.86, "grad_norm": 0.7401904632986045, "learning_rate": 1.0491787308093248e-07, "loss": 0.5529, "step": 6729 }, { "epoch": 0.86, "grad_norm": 0.7270841622546429, "learning_rate": 1.0473394295237426e-07, "loss": 0.5062, "step": 6730 }, { "epoch": 0.86, "grad_norm": 0.7055885270528672, "learning_rate": 1.0455016527829863e-07, "loss": 0.5582, "step": 6731 }, { "epoch": 0.86, "grad_norm": 0.7020935718856278, "learning_rate": 1.0436654009000123e-07, "loss": 0.544, "step": 6732 }, { "epoch": 0.86, "grad_norm": 0.6334362518632649, "learning_rate": 1.0418306741875138e-07, "loss": 0.5319, "step": 6733 }, { "epoch": 0.86, "grad_norm": 0.6615210119243968, "learning_rate": 1.0399974729579253e-07, "loss": 0.4932, "step": 6734 }, { "epoch": 0.86, "grad_norm": 1.0173080969906356, "learning_rate": 1.0381657975234237e-07, "loss": 0.5955, "step": 6735 }, { "epoch": 0.86, "grad_norm": 1.0273283002421458, "learning_rate": 1.0363356481959218e-07, "loss": 0.6401, "step": 6736 }, { "epoch": 0.86, "grad_norm": 0.8691917665086982, "learning_rate": 1.0345070252870769e-07, "loss": 0.6366, "step": 6737 }, { "epoch": 0.86, "grad_norm": 0.6528316075955349, "learning_rate": 1.0326799291082832e-07, "loss": 0.5443, "step": 6738 }, { "epoch": 0.86, "grad_norm": 1.009417440100602, "learning_rate": 1.0308543599706776e-07, "loss": 0.5954, "step": 6739 }, { "epoch": 0.86, "grad_norm": 0.7135821122723486, "learning_rate": 1.0290303181851345e-07, "loss": 0.5412, "step": 6740 }, { "epoch": 0.86, "grad_norm": 0.6629733017127923, "learning_rate": 1.027207804062269e-07, "loss": 0.5674, "step": 6741 }, { "epoch": 0.86, "grad_norm": 0.8109402686849455, "learning_rate": 1.025386817912437e-07, "loss": 0.6101, "step": 6742 }, { "epoch": 0.86, "grad_norm": 0.8884085208743441, "learning_rate": 1.0235673600457317e-07, "loss": 0.5541, "step": 6743 }, { "epoch": 0.86, "grad_norm": 0.7224869856825186, "learning_rate": 1.0217494307719909e-07, "loss": 0.5261, "step": 6744 }, { "epoch": 0.86, "grad_norm": 0.839018281235952, "learning_rate": 1.0199330304007859e-07, "loss": 0.5994, "step": 6745 }, { "epoch": 0.86, "grad_norm": 0.6687246683527613, "learning_rate": 1.0181181592414334e-07, "loss": 0.464, "step": 6746 }, { "epoch": 0.86, "grad_norm": 0.8836726903950467, "learning_rate": 1.0163048176029831e-07, "loss": 0.6691, "step": 6747 }, { "epoch": 0.86, "grad_norm": 0.8297961842309421, "learning_rate": 1.0144930057942336e-07, "loss": 0.646, "step": 6748 }, { "epoch": 0.86, "grad_norm": 0.6105724368319058, "learning_rate": 1.0126827241237134e-07, "loss": 0.5387, "step": 6749 }, { "epoch": 0.86, "grad_norm": 0.849690045802289, "learning_rate": 1.0108739728996973e-07, "loss": 0.6008, "step": 6750 }, { "epoch": 0.86, "grad_norm": 0.7515886197845002, "learning_rate": 1.0090667524301954e-07, "loss": 0.5837, "step": 6751 }, { "epoch": 0.86, "grad_norm": 0.6302776493671874, "learning_rate": 1.0072610630229583e-07, "loss": 0.5255, "step": 6752 }, { "epoch": 0.86, "grad_norm": 0.6942068817321367, "learning_rate": 1.0054569049854766e-07, "loss": 0.5685, "step": 6753 }, { "epoch": 0.86, "grad_norm": 0.8936073299368944, "learning_rate": 1.0036542786249802e-07, "loss": 0.6345, "step": 6754 }, { "epoch": 0.86, "grad_norm": 1.153845869124329, "learning_rate": 1.0018531842484368e-07, "loss": 0.6331, "step": 6755 }, { "epoch": 0.86, "grad_norm": 0.8380349440536838, "learning_rate": 1.0000536221625532e-07, "loss": 0.6178, "step": 6756 }, { "epoch": 0.86, "grad_norm": 0.7862464877506053, "learning_rate": 9.98255592673778e-08, "loss": 0.6437, "step": 6757 }, { "epoch": 0.86, "grad_norm": 0.9286609372073289, "learning_rate": 9.964590960882946e-08, "loss": 0.6507, "step": 6758 }, { "epoch": 0.86, "grad_norm": 0.6256742970285631, "learning_rate": 9.946641327120297e-08, "loss": 0.5031, "step": 6759 }, { "epoch": 0.86, "grad_norm": 0.6209429785739257, "learning_rate": 9.928707028506466e-08, "loss": 0.5255, "step": 6760 }, { "epoch": 0.86, "grad_norm": 0.8562337446806013, "learning_rate": 9.910788068095466e-08, "loss": 0.5645, "step": 6761 }, { "epoch": 0.86, "grad_norm": 0.9810535114074306, "learning_rate": 9.892884448938709e-08, "loss": 0.5959, "step": 6762 }, { "epoch": 0.86, "grad_norm": 0.8862165087264969, "learning_rate": 9.874996174085015e-08, "loss": 0.5996, "step": 6763 }, { "epoch": 0.86, "grad_norm": 0.8918420413990427, "learning_rate": 9.857123246580545e-08, "loss": 0.6678, "step": 6764 }, { "epoch": 0.86, "grad_norm": 0.7868093801462644, "learning_rate": 9.839265669468899e-08, "loss": 0.6026, "step": 6765 }, { "epoch": 0.86, "grad_norm": 0.8363795999689583, "learning_rate": 9.821423445791011e-08, "loss": 0.6248, "step": 6766 }, { "epoch": 0.86, "grad_norm": 0.6860195080010946, "learning_rate": 9.803596578585238e-08, "loss": 0.5215, "step": 6767 }, { "epoch": 0.86, "grad_norm": 0.854664158450517, "learning_rate": 9.785785070887309e-08, "loss": 0.6111, "step": 6768 }, { "epoch": 0.86, "grad_norm": 0.7564539389942752, "learning_rate": 9.767988925730331e-08, "loss": 0.4769, "step": 6769 }, { "epoch": 0.86, "grad_norm": 0.7328477552225422, "learning_rate": 9.750208146144822e-08, "loss": 0.5716, "step": 6770 }, { "epoch": 0.86, "grad_norm": 0.8957478061560773, "learning_rate": 9.732442735158641e-08, "loss": 0.6395, "step": 6771 }, { "epoch": 0.86, "grad_norm": 0.6577035334055743, "learning_rate": 9.714692695797066e-08, "loss": 0.5609, "step": 6772 }, { "epoch": 0.86, "grad_norm": 0.5869134142808156, "learning_rate": 9.696958031082714e-08, "loss": 0.5027, "step": 6773 }, { "epoch": 0.86, "grad_norm": 0.7181504617935077, "learning_rate": 9.679238744035656e-08, "loss": 0.5586, "step": 6774 }, { "epoch": 0.86, "grad_norm": 0.8398668452246594, "learning_rate": 9.661534837673291e-08, "loss": 0.5303, "step": 6775 }, { "epoch": 0.86, "grad_norm": 0.8391012936346779, "learning_rate": 9.643846315010406e-08, "loss": 0.6083, "step": 6776 }, { "epoch": 0.86, "grad_norm": 0.8112297265116198, "learning_rate": 9.626173179059171e-08, "loss": 0.6048, "step": 6777 }, { "epoch": 0.86, "grad_norm": 0.8721029422727922, "learning_rate": 9.608515432829134e-08, "loss": 0.6883, "step": 6778 }, { "epoch": 0.86, "grad_norm": 0.7728073598942554, "learning_rate": 9.590873079327222e-08, "loss": 0.6255, "step": 6779 }, { "epoch": 0.86, "grad_norm": 0.8533586624456085, "learning_rate": 9.573246121557754e-08, "loss": 0.5756, "step": 6780 }, { "epoch": 0.86, "grad_norm": 0.6284498892168462, "learning_rate": 9.555634562522408e-08, "loss": 0.5617, "step": 6781 }, { "epoch": 0.86, "grad_norm": 0.9408383467899323, "learning_rate": 9.538038405220262e-08, "loss": 0.6014, "step": 6782 }, { "epoch": 0.86, "grad_norm": 0.7705686234462577, "learning_rate": 9.52045765264774e-08, "loss": 0.5751, "step": 6783 }, { "epoch": 0.86, "grad_norm": 0.8099089307706886, "learning_rate": 9.50289230779866e-08, "loss": 0.6579, "step": 6784 }, { "epoch": 0.86, "grad_norm": 0.6284434496973932, "learning_rate": 9.485342373664241e-08, "loss": 0.5404, "step": 6785 }, { "epoch": 0.86, "grad_norm": 0.6383037304909641, "learning_rate": 9.467807853233046e-08, "loss": 0.5221, "step": 6786 }, { "epoch": 0.86, "grad_norm": 0.7100400672729043, "learning_rate": 9.450288749491008e-08, "loss": 0.604, "step": 6787 }, { "epoch": 0.86, "grad_norm": 0.7752669299867923, "learning_rate": 9.432785065421467e-08, "loss": 0.5676, "step": 6788 }, { "epoch": 0.86, "grad_norm": 0.6708949231566096, "learning_rate": 9.415296804005101e-08, "loss": 0.5785, "step": 6789 }, { "epoch": 0.87, "grad_norm": 0.6875327835454713, "learning_rate": 9.397823968219976e-08, "loss": 0.4991, "step": 6790 }, { "epoch": 0.87, "grad_norm": 0.7659104269082397, "learning_rate": 9.380366561041552e-08, "loss": 0.5743, "step": 6791 }, { "epoch": 0.87, "grad_norm": 0.5971162799968365, "learning_rate": 9.362924585442633e-08, "loss": 0.5374, "step": 6792 }, { "epoch": 0.87, "grad_norm": 0.6941701316312453, "learning_rate": 9.345498044393408e-08, "loss": 0.5088, "step": 6793 }, { "epoch": 0.87, "grad_norm": 0.7270743750489037, "learning_rate": 9.328086940861435e-08, "loss": 0.5094, "step": 6794 }, { "epoch": 0.87, "grad_norm": 0.7172624570677782, "learning_rate": 9.310691277811644e-08, "loss": 0.5975, "step": 6795 }, { "epoch": 0.87, "grad_norm": 0.6851460501216999, "learning_rate": 9.293311058206332e-08, "loss": 0.5365, "step": 6796 }, { "epoch": 0.87, "grad_norm": 0.9029254320862996, "learning_rate": 9.275946285005165e-08, "loss": 0.574, "step": 6797 }, { "epoch": 0.87, "grad_norm": 0.6902533354083347, "learning_rate": 9.258596961165199e-08, "loss": 0.5551, "step": 6798 }, { "epoch": 0.87, "grad_norm": 0.8687318222840329, "learning_rate": 9.241263089640827e-08, "loss": 0.5519, "step": 6799 }, { "epoch": 0.87, "grad_norm": 0.8312866618744909, "learning_rate": 9.223944673383832e-08, "loss": 0.6318, "step": 6800 }, { "epoch": 0.87, "grad_norm": 0.7180592716626668, "learning_rate": 9.206641715343367e-08, "loss": 0.5728, "step": 6801 }, { "epoch": 0.87, "grad_norm": 0.672076819008067, "learning_rate": 9.189354218465938e-08, "loss": 0.5678, "step": 6802 }, { "epoch": 0.87, "grad_norm": 1.1059381931601633, "learning_rate": 9.172082185695429e-08, "loss": 0.6648, "step": 6803 }, { "epoch": 0.87, "grad_norm": 0.7191162566734296, "learning_rate": 9.154825619973083e-08, "loss": 0.5593, "step": 6804 }, { "epoch": 0.87, "grad_norm": 0.9737008990219779, "learning_rate": 9.137584524237496e-08, "loss": 0.6572, "step": 6805 }, { "epoch": 0.87, "grad_norm": 0.6693208582118326, "learning_rate": 9.120358901424685e-08, "loss": 0.5508, "step": 6806 }, { "epoch": 0.87, "grad_norm": 0.684606707735644, "learning_rate": 9.10314875446797e-08, "loss": 0.5768, "step": 6807 }, { "epoch": 0.87, "grad_norm": 0.6354787066206922, "learning_rate": 9.085954086298076e-08, "loss": 0.5536, "step": 6808 }, { "epoch": 0.87, "grad_norm": 0.832107147400525, "learning_rate": 9.068774899843057e-08, "loss": 0.5872, "step": 6809 }, { "epoch": 0.87, "grad_norm": 0.733975620543821, "learning_rate": 9.051611198028364e-08, "loss": 0.5637, "step": 6810 }, { "epoch": 0.87, "grad_norm": 0.8387209005960767, "learning_rate": 9.034462983776791e-08, "loss": 0.6311, "step": 6811 }, { "epoch": 0.87, "grad_norm": 0.7780183170063538, "learning_rate": 9.017330260008493e-08, "loss": 0.573, "step": 6812 }, { "epoch": 0.87, "grad_norm": 0.7907845579268973, "learning_rate": 9.000213029641012e-08, "loss": 0.6162, "step": 6813 }, { "epoch": 0.87, "grad_norm": 0.6715061782753139, "learning_rate": 8.983111295589218e-08, "loss": 0.5145, "step": 6814 }, { "epoch": 0.87, "grad_norm": 0.6940391696848187, "learning_rate": 8.966025060765359e-08, "loss": 0.4955, "step": 6815 }, { "epoch": 0.87, "grad_norm": 0.7961286683178959, "learning_rate": 8.948954328079051e-08, "loss": 0.6167, "step": 6816 }, { "epoch": 0.87, "grad_norm": 0.7465720521570403, "learning_rate": 8.931899100437257e-08, "loss": 0.608, "step": 6817 }, { "epoch": 0.87, "grad_norm": 0.7664232984714339, "learning_rate": 8.914859380744311e-08, "loss": 0.5918, "step": 6818 }, { "epoch": 0.87, "grad_norm": 0.8825795416165956, "learning_rate": 8.89783517190189e-08, "loss": 0.6424, "step": 6819 }, { "epoch": 0.87, "grad_norm": 0.9360786729769102, "learning_rate": 8.880826476809034e-08, "loss": 0.5909, "step": 6820 }, { "epoch": 0.87, "grad_norm": 0.7633767306266623, "learning_rate": 8.863833298362167e-08, "loss": 0.5401, "step": 6821 }, { "epoch": 0.87, "grad_norm": 0.9749441124753508, "learning_rate": 8.846855639455042e-08, "loss": 0.6215, "step": 6822 }, { "epoch": 0.87, "grad_norm": 0.867090895690543, "learning_rate": 8.829893502978769e-08, "loss": 0.6568, "step": 6823 }, { "epoch": 0.87, "grad_norm": 0.7151972195190742, "learning_rate": 8.812946891821838e-08, "loss": 0.5422, "step": 6824 }, { "epoch": 0.87, "grad_norm": 0.7712009868711653, "learning_rate": 8.796015808870061e-08, "loss": 0.6041, "step": 6825 }, { "epoch": 0.87, "grad_norm": 0.6215407183641615, "learning_rate": 8.779100257006633e-08, "loss": 0.5468, "step": 6826 }, { "epoch": 0.87, "grad_norm": 0.6780762773168175, "learning_rate": 8.762200239112105e-08, "loss": 0.5593, "step": 6827 }, { "epoch": 0.87, "grad_norm": 0.7676743870246849, "learning_rate": 8.745315758064375e-08, "loss": 0.5213, "step": 6828 }, { "epoch": 0.87, "grad_norm": 0.8277305592600002, "learning_rate": 8.728446816738677e-08, "loss": 0.5945, "step": 6829 }, { "epoch": 0.87, "grad_norm": 0.8501984783227978, "learning_rate": 8.711593418007624e-08, "loss": 0.6046, "step": 6830 }, { "epoch": 0.87, "grad_norm": 0.8214772176370492, "learning_rate": 8.694755564741185e-08, "loss": 0.5359, "step": 6831 }, { "epoch": 0.87, "grad_norm": 0.9015815903611316, "learning_rate": 8.677933259806635e-08, "loss": 0.601, "step": 6832 }, { "epoch": 0.87, "grad_norm": 0.7653392459857699, "learning_rate": 8.661126506068695e-08, "loss": 0.6486, "step": 6833 }, { "epoch": 0.87, "grad_norm": 0.820007074501415, "learning_rate": 8.644335306389339e-08, "loss": 0.5817, "step": 6834 }, { "epoch": 0.87, "grad_norm": 0.6975463357538378, "learning_rate": 8.627559663627948e-08, "loss": 0.5298, "step": 6835 }, { "epoch": 0.87, "grad_norm": 0.6850010282215753, "learning_rate": 8.610799580641237e-08, "loss": 0.5475, "step": 6836 }, { "epoch": 0.87, "grad_norm": 0.7917599700550572, "learning_rate": 8.594055060283267e-08, "loss": 0.5761, "step": 6837 }, { "epoch": 0.87, "grad_norm": 1.21340983869454, "learning_rate": 8.577326105405458e-08, "loss": 0.6429, "step": 6838 }, { "epoch": 0.87, "grad_norm": 0.7849072325192115, "learning_rate": 8.560612718856586e-08, "loss": 0.565, "step": 6839 }, { "epoch": 0.87, "grad_norm": 0.7349181219805981, "learning_rate": 8.543914903482752e-08, "loss": 0.5591, "step": 6840 }, { "epoch": 0.87, "grad_norm": 0.7777222731112763, "learning_rate": 8.527232662127426e-08, "loss": 0.5123, "step": 6841 }, { "epoch": 0.87, "grad_norm": 0.8321514468362095, "learning_rate": 8.510565997631425e-08, "loss": 0.5706, "step": 6842 }, { "epoch": 0.87, "grad_norm": 1.0468228365570798, "learning_rate": 8.49391491283291e-08, "loss": 0.6007, "step": 6843 }, { "epoch": 0.87, "grad_norm": 0.8740319734554856, "learning_rate": 8.477279410567384e-08, "loss": 0.569, "step": 6844 }, { "epoch": 0.87, "grad_norm": 0.8861879983603689, "learning_rate": 8.460659493667687e-08, "loss": 0.662, "step": 6845 }, { "epoch": 0.87, "grad_norm": 0.9551669969880263, "learning_rate": 8.444055164964025e-08, "loss": 0.6242, "step": 6846 }, { "epoch": 0.87, "grad_norm": 0.6677460159827141, "learning_rate": 8.427466427283957e-08, "loss": 0.5507, "step": 6847 }, { "epoch": 0.87, "grad_norm": 0.8121108930165598, "learning_rate": 8.410893283452359e-08, "loss": 0.5806, "step": 6848 }, { "epoch": 0.87, "grad_norm": 0.703116842796417, "learning_rate": 8.394335736291459e-08, "loss": 0.592, "step": 6849 }, { "epoch": 0.87, "grad_norm": 0.8266183317464724, "learning_rate": 8.377793788620847e-08, "loss": 0.6302, "step": 6850 }, { "epoch": 0.87, "grad_norm": 0.8936519535434424, "learning_rate": 8.361267443257448e-08, "loss": 0.5631, "step": 6851 }, { "epoch": 0.87, "grad_norm": 0.6829197600005379, "learning_rate": 8.34475670301551e-08, "loss": 0.5748, "step": 6852 }, { "epoch": 0.87, "grad_norm": 0.8986446533029359, "learning_rate": 8.32826157070664e-08, "loss": 0.687, "step": 6853 }, { "epoch": 0.87, "grad_norm": 0.8186926384201073, "learning_rate": 8.311782049139804e-08, "loss": 0.6157, "step": 6854 }, { "epoch": 0.87, "grad_norm": 0.64727726731569, "learning_rate": 8.295318141121288e-08, "loss": 0.5528, "step": 6855 }, { "epoch": 0.87, "grad_norm": 0.7666734035984534, "learning_rate": 8.278869849454717e-08, "loss": 0.5799, "step": 6856 }, { "epoch": 0.87, "grad_norm": 0.7176478842717808, "learning_rate": 8.262437176941062e-08, "loss": 0.5072, "step": 6857 }, { "epoch": 0.87, "grad_norm": 0.6066560301355026, "learning_rate": 8.246020126378628e-08, "loss": 0.5247, "step": 6858 }, { "epoch": 0.87, "grad_norm": 0.7680876047187593, "learning_rate": 8.22961870056309e-08, "loss": 0.5427, "step": 6859 }, { "epoch": 0.87, "grad_norm": 0.8654548931119209, "learning_rate": 8.213232902287438e-08, "loss": 0.588, "step": 6860 }, { "epoch": 0.87, "grad_norm": 0.870994139970731, "learning_rate": 8.196862734341992e-08, "loss": 0.5837, "step": 6861 }, { "epoch": 0.87, "grad_norm": 0.7705763922204295, "learning_rate": 8.180508199514424e-08, "loss": 0.5334, "step": 6862 }, { "epoch": 0.87, "grad_norm": 0.7066147013413556, "learning_rate": 8.164169300589752e-08, "loss": 0.5706, "step": 6863 }, { "epoch": 0.87, "grad_norm": 0.6249259219526876, "learning_rate": 8.147846040350292e-08, "loss": 0.5289, "step": 6864 }, { "epoch": 0.87, "grad_norm": 0.8670330036495717, "learning_rate": 8.131538421575757e-08, "loss": 0.5499, "step": 6865 }, { "epoch": 0.87, "grad_norm": 0.7392985231468201, "learning_rate": 8.115246447043144e-08, "loss": 0.6194, "step": 6866 }, { "epoch": 0.87, "grad_norm": 0.7235977291327412, "learning_rate": 8.098970119526816e-08, "loss": 0.5477, "step": 6867 }, { "epoch": 0.87, "grad_norm": 0.6186889907820781, "learning_rate": 8.082709441798452e-08, "loss": 0.4982, "step": 6868 }, { "epoch": 0.88, "grad_norm": 0.6065518503945571, "learning_rate": 8.066464416627084e-08, "loss": 0.5618, "step": 6869 }, { "epoch": 0.88, "grad_norm": 0.8429031935760314, "learning_rate": 8.050235046779074e-08, "loss": 0.6179, "step": 6870 }, { "epoch": 0.88, "grad_norm": 1.1464909020852825, "learning_rate": 8.034021335018104e-08, "loss": 0.6275, "step": 6871 }, { "epoch": 0.88, "grad_norm": 0.6162559446109809, "learning_rate": 8.017823284105207e-08, "loss": 0.49, "step": 6872 }, { "epoch": 0.88, "grad_norm": 1.0452705994828997, "learning_rate": 8.001640896798744e-08, "loss": 0.6896, "step": 6873 }, { "epoch": 0.88, "grad_norm": 0.8626459048987661, "learning_rate": 7.985474175854401e-08, "loss": 0.6274, "step": 6874 }, { "epoch": 0.88, "grad_norm": 0.8007173766631358, "learning_rate": 7.969323124025207e-08, "loss": 0.5676, "step": 6875 }, { "epoch": 0.88, "grad_norm": 0.8178685703215661, "learning_rate": 7.953187744061517e-08, "loss": 0.6273, "step": 6876 }, { "epoch": 0.88, "grad_norm": 0.794724527709687, "learning_rate": 7.937068038711025e-08, "loss": 0.5413, "step": 6877 }, { "epoch": 0.88, "grad_norm": 0.7981443349812419, "learning_rate": 7.920964010718733e-08, "loss": 0.6435, "step": 6878 }, { "epoch": 0.88, "grad_norm": 0.8169239414073827, "learning_rate": 7.904875662826993e-08, "loss": 0.5827, "step": 6879 }, { "epoch": 0.88, "grad_norm": 0.8469438538860539, "learning_rate": 7.888802997775501e-08, "loss": 0.5744, "step": 6880 }, { "epoch": 0.88, "grad_norm": 0.9210301163912626, "learning_rate": 7.872746018301235e-08, "loss": 0.6209, "step": 6881 }, { "epoch": 0.88, "grad_norm": 0.7361719091158877, "learning_rate": 7.856704727138563e-08, "loss": 0.5594, "step": 6882 }, { "epoch": 0.88, "grad_norm": 0.866455375895114, "learning_rate": 7.840679127019123e-08, "loss": 0.6465, "step": 6883 }, { "epoch": 0.88, "grad_norm": 0.6579234938884513, "learning_rate": 7.824669220671909e-08, "loss": 0.5257, "step": 6884 }, { "epoch": 0.88, "grad_norm": 0.9855475943417021, "learning_rate": 7.808675010823262e-08, "loss": 0.6268, "step": 6885 }, { "epoch": 0.88, "grad_norm": 0.6715333014422652, "learning_rate": 7.792696500196816e-08, "loss": 0.5347, "step": 6886 }, { "epoch": 0.88, "grad_norm": 0.715290090561249, "learning_rate": 7.77673369151356e-08, "loss": 0.5348, "step": 6887 }, { "epoch": 0.88, "grad_norm": 0.9966692315370363, "learning_rate": 7.760786587491763e-08, "loss": 0.6532, "step": 6888 }, { "epoch": 0.88, "grad_norm": 0.6739805185981467, "learning_rate": 7.744855190847089e-08, "loss": 0.5344, "step": 6889 }, { "epoch": 0.88, "grad_norm": 1.1403019993341226, "learning_rate": 7.728939504292442e-08, "loss": 0.6209, "step": 6890 }, { "epoch": 0.88, "grad_norm": 0.8204945819940481, "learning_rate": 7.713039530538135e-08, "loss": 0.6173, "step": 6891 }, { "epoch": 0.88, "grad_norm": 0.7066961701393989, "learning_rate": 7.697155272291767e-08, "loss": 0.5282, "step": 6892 }, { "epoch": 0.88, "grad_norm": 1.3334245427667666, "learning_rate": 7.681286732258251e-08, "loss": 0.636, "step": 6893 }, { "epoch": 0.88, "grad_norm": 0.8053545368900017, "learning_rate": 7.66543391313983e-08, "loss": 0.6726, "step": 6894 }, { "epoch": 0.88, "grad_norm": 0.729481934191792, "learning_rate": 7.649596817636084e-08, "loss": 0.5351, "step": 6895 }, { "epoch": 0.88, "grad_norm": 0.6659848902818687, "learning_rate": 7.633775448443903e-08, "loss": 0.5576, "step": 6896 }, { "epoch": 0.88, "grad_norm": 0.9400457311828868, "learning_rate": 7.617969808257485e-08, "loss": 0.6463, "step": 6897 }, { "epoch": 0.88, "grad_norm": 0.9545068535770227, "learning_rate": 7.602179899768391e-08, "loss": 0.5893, "step": 6898 }, { "epoch": 0.88, "grad_norm": 0.981148363417466, "learning_rate": 7.586405725665468e-08, "loss": 0.6496, "step": 6899 }, { "epoch": 0.88, "grad_norm": 1.032086894043614, "learning_rate": 7.57064728863489e-08, "loss": 0.6571, "step": 6900 }, { "epoch": 0.88, "grad_norm": 0.786972767534941, "learning_rate": 7.554904591360156e-08, "loss": 0.6192, "step": 6901 }, { "epoch": 0.88, "grad_norm": 1.0593063026666267, "learning_rate": 7.539177636522077e-08, "loss": 0.6161, "step": 6902 }, { "epoch": 0.88, "grad_norm": 0.8062060563216312, "learning_rate": 7.523466426798808e-08, "loss": 0.5461, "step": 6903 }, { "epoch": 0.88, "grad_norm": 0.6133803446117289, "learning_rate": 7.507770964865789e-08, "loss": 0.5395, "step": 6904 }, { "epoch": 0.88, "grad_norm": 0.6903782765967106, "learning_rate": 7.492091253395793e-08, "loss": 0.5662, "step": 6905 }, { "epoch": 0.88, "grad_norm": 0.6716531382689335, "learning_rate": 7.476427295058918e-08, "loss": 0.4571, "step": 6906 }, { "epoch": 0.88, "grad_norm": 0.7151594756951869, "learning_rate": 7.460779092522573e-08, "loss": 0.5715, "step": 6907 }, { "epoch": 0.88, "grad_norm": 0.9219911491229557, "learning_rate": 7.445146648451483e-08, "loss": 0.6418, "step": 6908 }, { "epoch": 0.88, "grad_norm": 0.8375210506370196, "learning_rate": 7.429529965507686e-08, "loss": 0.6223, "step": 6909 }, { "epoch": 0.88, "grad_norm": 0.9321159735628902, "learning_rate": 7.413929046350542e-08, "loss": 0.6681, "step": 6910 }, { "epoch": 0.88, "grad_norm": 0.8602580715954953, "learning_rate": 7.398343893636738e-08, "loss": 0.6061, "step": 6911 }, { "epoch": 0.88, "grad_norm": 0.6394954138442802, "learning_rate": 7.38277451002024e-08, "loss": 0.5277, "step": 6912 }, { "epoch": 0.88, "grad_norm": 0.66555279040408, "learning_rate": 7.367220898152371e-08, "loss": 0.5411, "step": 6913 }, { "epoch": 0.88, "grad_norm": 0.9494891599745592, "learning_rate": 7.351683060681734e-08, "loss": 0.6659, "step": 6914 }, { "epoch": 0.88, "grad_norm": 0.9262798418832864, "learning_rate": 7.336161000254281e-08, "loss": 0.6533, "step": 6915 }, { "epoch": 0.88, "grad_norm": 0.9229669373747947, "learning_rate": 7.32065471951322e-08, "loss": 0.5959, "step": 6916 }, { "epoch": 0.88, "grad_norm": 0.7179226066448882, "learning_rate": 7.305164221099147e-08, "loss": 0.5648, "step": 6917 }, { "epoch": 0.88, "grad_norm": 0.650157866450246, "learning_rate": 7.289689507649921e-08, "loss": 0.528, "step": 6918 }, { "epoch": 0.88, "grad_norm": 0.648120408204685, "learning_rate": 7.274230581800722e-08, "loss": 0.54, "step": 6919 }, { "epoch": 0.88, "grad_norm": 0.7868036011208724, "learning_rate": 7.258787446184055e-08, "loss": 0.5845, "step": 6920 }, { "epoch": 0.88, "grad_norm": 0.8290873009807476, "learning_rate": 7.243360103429707e-08, "loss": 0.6417, "step": 6921 }, { "epoch": 0.88, "grad_norm": 0.7578942271529542, "learning_rate": 7.227948556164798e-08, "loss": 0.57, "step": 6922 }, { "epoch": 0.88, "grad_norm": 0.8918249702654337, "learning_rate": 7.212552807013761e-08, "loss": 0.6151, "step": 6923 }, { "epoch": 0.88, "grad_norm": 1.0562440358099203, "learning_rate": 7.197172858598333e-08, "loss": 0.5901, "step": 6924 }, { "epoch": 0.88, "grad_norm": 0.9166551971054305, "learning_rate": 7.181808713537552e-08, "loss": 0.6726, "step": 6925 }, { "epoch": 0.88, "grad_norm": 0.833281185013969, "learning_rate": 7.16646037444777e-08, "loss": 0.6054, "step": 6926 }, { "epoch": 0.88, "grad_norm": 0.8724139484318465, "learning_rate": 7.151127843942661e-08, "loss": 0.6755, "step": 6927 }, { "epoch": 0.88, "grad_norm": 0.8033149169612629, "learning_rate": 7.135811124633185e-08, "loss": 0.4989, "step": 6928 }, { "epoch": 0.88, "grad_norm": 0.9029011618830645, "learning_rate": 7.120510219127618e-08, "loss": 0.5228, "step": 6929 }, { "epoch": 0.88, "grad_norm": 0.6433894776261151, "learning_rate": 7.105225130031544e-08, "loss": 0.5469, "step": 6930 }, { "epoch": 0.88, "grad_norm": 0.7362709223353763, "learning_rate": 7.089955859947872e-08, "loss": 0.5882, "step": 6931 }, { "epoch": 0.88, "grad_norm": 0.7247743627710322, "learning_rate": 7.074702411476785e-08, "loss": 0.5566, "step": 6932 }, { "epoch": 0.88, "grad_norm": 0.6466446013557478, "learning_rate": 7.059464787215786e-08, "loss": 0.5219, "step": 6933 }, { "epoch": 0.88, "grad_norm": 0.6515325887976694, "learning_rate": 7.044242989759685e-08, "loss": 0.5661, "step": 6934 }, { "epoch": 0.88, "grad_norm": 0.7577275332162173, "learning_rate": 7.02903702170059e-08, "loss": 0.5311, "step": 6935 }, { "epoch": 0.88, "grad_norm": 0.6932903478452594, "learning_rate": 7.013846885627939e-08, "loss": 0.5552, "step": 6936 }, { "epoch": 0.88, "grad_norm": 0.7037976890395362, "learning_rate": 6.99867258412844e-08, "loss": 0.5222, "step": 6937 }, { "epoch": 0.88, "grad_norm": 0.7013182816015341, "learning_rate": 6.983514119786104e-08, "loss": 0.5652, "step": 6938 }, { "epoch": 0.88, "grad_norm": 0.9648116059149787, "learning_rate": 6.96837149518229e-08, "loss": 0.5932, "step": 6939 }, { "epoch": 0.88, "grad_norm": 0.8665016930562945, "learning_rate": 6.953244712895612e-08, "loss": 0.6361, "step": 6940 }, { "epoch": 0.88, "grad_norm": 0.7298476186018755, "learning_rate": 6.938133775501998e-08, "loss": 0.5444, "step": 6941 }, { "epoch": 0.88, "grad_norm": 0.8048790705659395, "learning_rate": 6.9230386855747e-08, "loss": 0.6153, "step": 6942 }, { "epoch": 0.88, "grad_norm": 0.9725276862344931, "learning_rate": 6.90795944568422e-08, "loss": 0.6696, "step": 6943 }, { "epoch": 0.88, "grad_norm": 0.6531194821576215, "learning_rate": 6.892896058398435e-08, "loss": 0.5341, "step": 6944 }, { "epoch": 0.88, "grad_norm": 0.5720547383127168, "learning_rate": 6.877848526282482e-08, "loss": 0.4684, "step": 6945 }, { "epoch": 0.88, "grad_norm": 0.6424773152354798, "learning_rate": 6.862816851898778e-08, "loss": 0.5028, "step": 6946 }, { "epoch": 0.89, "grad_norm": 0.732354076579313, "learning_rate": 6.847801037807066e-08, "loss": 0.5809, "step": 6947 }, { "epoch": 0.89, "grad_norm": 0.9890253360174299, "learning_rate": 6.832801086564377e-08, "loss": 0.578, "step": 6948 }, { "epoch": 0.89, "grad_norm": 0.9257025794569517, "learning_rate": 6.817817000725068e-08, "loss": 0.6644, "step": 6949 }, { "epoch": 0.89, "grad_norm": 0.8020415586958842, "learning_rate": 6.802848782840754e-08, "loss": 0.5971, "step": 6950 }, { "epoch": 0.89, "grad_norm": 0.8301822486951606, "learning_rate": 6.787896435460372e-08, "loss": 0.6396, "step": 6951 }, { "epoch": 0.89, "grad_norm": 0.68628990903234, "learning_rate": 6.772959961130153e-08, "loss": 0.5739, "step": 6952 }, { "epoch": 0.89, "grad_norm": 0.7055082842241646, "learning_rate": 6.758039362393609e-08, "loss": 0.4978, "step": 6953 }, { "epoch": 0.89, "grad_norm": 0.8264713731605237, "learning_rate": 6.743134641791581e-08, "loss": 0.6464, "step": 6954 }, { "epoch": 0.89, "grad_norm": 1.1122096246422692, "learning_rate": 6.728245801862187e-08, "loss": 0.7371, "step": 6955 }, { "epoch": 0.89, "grad_norm": 0.6698898490741051, "learning_rate": 6.71337284514083e-08, "loss": 0.4681, "step": 6956 }, { "epoch": 0.89, "grad_norm": 0.793084449984343, "learning_rate": 6.698515774160219e-08, "loss": 0.6508, "step": 6957 }, { "epoch": 0.89, "grad_norm": 0.950195459429806, "learning_rate": 6.683674591450372e-08, "loss": 0.5983, "step": 6958 }, { "epoch": 0.89, "grad_norm": 0.7011857713541121, "learning_rate": 6.66884929953857e-08, "loss": 0.5585, "step": 6959 }, { "epoch": 0.89, "grad_norm": 0.7370790662525254, "learning_rate": 6.654039900949426e-08, "loss": 0.6015, "step": 6960 }, { "epoch": 0.89, "grad_norm": 0.6586092218759189, "learning_rate": 6.639246398204812e-08, "loss": 0.5576, "step": 6961 }, { "epoch": 0.89, "grad_norm": 0.7761813259055178, "learning_rate": 6.624468793823912e-08, "loss": 0.5801, "step": 6962 }, { "epoch": 0.89, "grad_norm": 0.7705640854811164, "learning_rate": 6.60970709032319e-08, "loss": 0.5624, "step": 6963 }, { "epoch": 0.89, "grad_norm": 0.7753959052012914, "learning_rate": 6.594961290216427e-08, "loss": 0.5676, "step": 6964 }, { "epoch": 0.89, "grad_norm": 0.791167667817404, "learning_rate": 6.580231396014657e-08, "loss": 0.59, "step": 6965 }, { "epoch": 0.89, "grad_norm": 0.7919152950924248, "learning_rate": 6.565517410226251e-08, "loss": 0.5922, "step": 6966 }, { "epoch": 0.89, "grad_norm": 0.8313115429330323, "learning_rate": 6.550819335356839e-08, "loss": 0.6168, "step": 6967 }, { "epoch": 0.89, "grad_norm": 0.9077003998401888, "learning_rate": 6.53613717390934e-08, "loss": 0.6841, "step": 6968 }, { "epoch": 0.89, "grad_norm": 0.9032696126034437, "learning_rate": 6.521470928383965e-08, "loss": 0.5138, "step": 6969 }, { "epoch": 0.89, "grad_norm": 0.9068482657213722, "learning_rate": 6.506820601278262e-08, "loss": 0.6309, "step": 6970 }, { "epoch": 0.89, "grad_norm": 0.9267133061026424, "learning_rate": 6.492186195087002e-08, "loss": 0.602, "step": 6971 }, { "epoch": 0.89, "grad_norm": 0.8797253382735752, "learning_rate": 6.477567712302267e-08, "loss": 0.5649, "step": 6972 }, { "epoch": 0.89, "grad_norm": 0.7487443713547458, "learning_rate": 6.462965155413457e-08, "loss": 0.5517, "step": 6973 }, { "epoch": 0.89, "grad_norm": 0.7508024197982183, "learning_rate": 6.448378526907206e-08, "loss": 0.5297, "step": 6974 }, { "epoch": 0.89, "grad_norm": 0.8968118617383457, "learning_rate": 6.43380782926749e-08, "loss": 0.6843, "step": 6975 }, { "epoch": 0.89, "grad_norm": 0.8169930457748107, "learning_rate": 6.419253064975527e-08, "loss": 0.4929, "step": 6976 }, { "epoch": 0.89, "grad_norm": 0.6906877405065984, "learning_rate": 6.404714236509845e-08, "loss": 0.5425, "step": 6977 }, { "epoch": 0.89, "grad_norm": 0.5982670614112038, "learning_rate": 6.390191346346263e-08, "loss": 0.5457, "step": 6978 }, { "epoch": 0.89, "grad_norm": 0.6819306750645038, "learning_rate": 6.375684396957881e-08, "loss": 0.5602, "step": 6979 }, { "epoch": 0.89, "grad_norm": 0.6665020075968046, "learning_rate": 6.361193390815078e-08, "loss": 0.539, "step": 6980 }, { "epoch": 0.89, "grad_norm": 0.7364039475920091, "learning_rate": 6.346718330385503e-08, "loss": 0.5585, "step": 6981 }, { "epoch": 0.89, "grad_norm": 0.7455926190571361, "learning_rate": 6.33225921813414e-08, "loss": 0.5917, "step": 6982 }, { "epoch": 0.89, "grad_norm": 0.6836527738813388, "learning_rate": 6.317816056523195e-08, "loss": 0.5856, "step": 6983 }, { "epoch": 0.89, "grad_norm": 0.8991476596377391, "learning_rate": 6.303388848012215e-08, "loss": 0.6005, "step": 6984 }, { "epoch": 0.89, "grad_norm": 0.8854265293043774, "learning_rate": 6.288977595057975e-08, "loss": 0.6581, "step": 6985 }, { "epoch": 0.89, "grad_norm": 0.9572188658011584, "learning_rate": 6.274582300114584e-08, "loss": 0.5685, "step": 6986 }, { "epoch": 0.89, "grad_norm": 0.6462710448571821, "learning_rate": 6.260202965633387e-08, "loss": 0.5496, "step": 6987 }, { "epoch": 0.89, "grad_norm": 0.781748665267685, "learning_rate": 6.245839594063074e-08, "loss": 0.5519, "step": 6988 }, { "epoch": 0.89, "grad_norm": 0.8598895717648117, "learning_rate": 6.231492187849541e-08, "loss": 0.619, "step": 6989 }, { "epoch": 0.89, "grad_norm": 0.6034370443589493, "learning_rate": 6.217160749436023e-08, "loss": 0.4898, "step": 6990 }, { "epoch": 0.89, "grad_norm": 0.6773241990681657, "learning_rate": 6.202845281263014e-08, "loss": 0.5306, "step": 6991 }, { "epoch": 0.89, "grad_norm": 0.6741899411891884, "learning_rate": 6.188545785768274e-08, "loss": 0.5343, "step": 6992 }, { "epoch": 0.89, "grad_norm": 0.7714288834477202, "learning_rate": 6.174262265386865e-08, "loss": 0.5628, "step": 6993 }, { "epoch": 0.89, "grad_norm": 0.8146217862817768, "learning_rate": 6.159994722551121e-08, "loss": 0.57, "step": 6994 }, { "epoch": 0.89, "grad_norm": 0.7105403885842833, "learning_rate": 6.145743159690653e-08, "loss": 0.5453, "step": 6995 }, { "epoch": 0.89, "grad_norm": 0.6904829573549789, "learning_rate": 6.131507579232364e-08, "loss": 0.5643, "step": 6996 }, { "epoch": 0.89, "grad_norm": 0.8801054327049488, "learning_rate": 6.117287983600427e-08, "loss": 0.6681, "step": 6997 }, { "epoch": 0.89, "grad_norm": 0.8619721623384892, "learning_rate": 6.103084375216271e-08, "loss": 0.5772, "step": 6998 }, { "epoch": 0.89, "grad_norm": 0.9398662087432044, "learning_rate": 6.08889675649864e-08, "loss": 0.638, "step": 6999 }, { "epoch": 0.89, "grad_norm": 0.6969350419724655, "learning_rate": 6.074725129863534e-08, "loss": 0.5578, "step": 7000 }, { "epoch": 0.89, "grad_norm": 0.7070171214536989, "learning_rate": 6.060569497724222e-08, "loss": 0.596, "step": 7001 }, { "epoch": 0.89, "grad_norm": 0.7023650513547002, "learning_rate": 6.046429862491276e-08, "loss": 0.561, "step": 7002 }, { "epoch": 0.89, "grad_norm": 0.6639650128132786, "learning_rate": 6.032306226572525e-08, "loss": 0.5626, "step": 7003 }, { "epoch": 0.89, "grad_norm": 0.7198296153213678, "learning_rate": 6.01819859237308e-08, "loss": 0.5298, "step": 7004 }, { "epoch": 0.89, "grad_norm": 0.9616764860856934, "learning_rate": 6.004106962295319e-08, "loss": 0.6104, "step": 7005 }, { "epoch": 0.89, "grad_norm": 0.9494099441791194, "learning_rate": 5.9900313387389e-08, "loss": 0.6133, "step": 7006 }, { "epoch": 0.89, "grad_norm": 1.0206743623902785, "learning_rate": 5.975971724100748e-08, "loss": 0.6901, "step": 7007 }, { "epoch": 0.89, "grad_norm": 0.8288342593858744, "learning_rate": 5.961928120775084e-08, "loss": 0.5236, "step": 7008 }, { "epoch": 0.89, "grad_norm": 0.6030666047651422, "learning_rate": 5.947900531153371e-08, "loss": 0.5813, "step": 7009 }, { "epoch": 0.89, "grad_norm": 0.614061384405718, "learning_rate": 5.933888957624378e-08, "loss": 0.5164, "step": 7010 }, { "epoch": 0.89, "grad_norm": 0.7664054837683564, "learning_rate": 5.9198934025741164e-08, "loss": 0.6525, "step": 7011 }, { "epoch": 0.89, "grad_norm": 0.6988778252210806, "learning_rate": 5.905913868385892e-08, "loss": 0.5482, "step": 7012 }, { "epoch": 0.89, "grad_norm": 0.6528872784454941, "learning_rate": 5.8919503574402785e-08, "loss": 0.5366, "step": 7013 }, { "epoch": 0.89, "grad_norm": 0.7425451749060353, "learning_rate": 5.878002872115095e-08, "loss": 0.5561, "step": 7014 }, { "epoch": 0.89, "grad_norm": 0.7827920819464114, "learning_rate": 5.8640714147854766e-08, "loss": 0.5005, "step": 7015 }, { "epoch": 0.89, "grad_norm": 0.806860379693222, "learning_rate": 5.8501559878237904e-08, "loss": 0.6517, "step": 7016 }, { "epoch": 0.89, "grad_norm": 0.6331917739821395, "learning_rate": 5.836256593599698e-08, "loss": 0.5347, "step": 7017 }, { "epoch": 0.89, "grad_norm": 0.8435690262669994, "learning_rate": 5.8223732344801046e-08, "loss": 0.5854, "step": 7018 }, { "epoch": 0.89, "grad_norm": 0.67138523681544, "learning_rate": 5.80850591282922e-08, "loss": 0.5289, "step": 7019 }, { "epoch": 0.89, "grad_norm": 0.6205320590980302, "learning_rate": 5.794654631008511e-08, "loss": 0.4477, "step": 7020 }, { "epoch": 0.89, "grad_norm": 0.8250685349284711, "learning_rate": 5.780819391376679e-08, "loss": 0.642, "step": 7021 }, { "epoch": 0.89, "grad_norm": 0.8751999149495736, "learning_rate": 5.7670001962897396e-08, "loss": 0.5948, "step": 7022 }, { "epoch": 0.89, "grad_norm": 0.7425268216368743, "learning_rate": 5.753197048100955e-08, "loss": 0.5194, "step": 7023 }, { "epoch": 0.89, "grad_norm": 0.8262700752859442, "learning_rate": 5.739409949160867e-08, "loss": 0.6444, "step": 7024 }, { "epoch": 0.89, "grad_norm": 0.6358490444121793, "learning_rate": 5.725638901817254e-08, "loss": 0.5239, "step": 7025 }, { "epoch": 0.9, "grad_norm": 0.8395759610166903, "learning_rate": 5.711883908415194e-08, "loss": 0.5691, "step": 7026 }, { "epoch": 0.9, "grad_norm": 0.8892731931701809, "learning_rate": 5.698144971297003e-08, "loss": 0.6155, "step": 7027 }, { "epoch": 0.9, "grad_norm": 0.6750710652037517, "learning_rate": 5.68442209280231e-08, "loss": 0.506, "step": 7028 }, { "epoch": 0.9, "grad_norm": 0.6111345783371932, "learning_rate": 5.6707152752679567e-08, "loss": 0.4973, "step": 7029 }, { "epoch": 0.9, "grad_norm": 0.9993048765030773, "learning_rate": 5.6570245210280864e-08, "loss": 0.6385, "step": 7030 }, { "epoch": 0.9, "grad_norm": 0.7757039205668105, "learning_rate": 5.6433498324140795e-08, "loss": 0.5025, "step": 7031 }, { "epoch": 0.9, "grad_norm": 0.85497661377225, "learning_rate": 5.629691211754595e-08, "loss": 0.6327, "step": 7032 }, { "epoch": 0.9, "grad_norm": 1.0247603448137435, "learning_rate": 5.61604866137555e-08, "loss": 0.5397, "step": 7033 }, { "epoch": 0.9, "grad_norm": 0.6443613605825536, "learning_rate": 5.602422183600131e-08, "loss": 0.5259, "step": 7034 }, { "epoch": 0.9, "grad_norm": 1.0876592870216841, "learning_rate": 5.588811780748792e-08, "loss": 0.6284, "step": 7035 }, { "epoch": 0.9, "grad_norm": 0.7177688025976733, "learning_rate": 5.575217455139247e-08, "loss": 0.5266, "step": 7036 }, { "epoch": 0.9, "grad_norm": 0.6234408428229516, "learning_rate": 5.561639209086444e-08, "loss": 0.4917, "step": 7037 }, { "epoch": 0.9, "grad_norm": 0.8060715388877318, "learning_rate": 5.548077044902644e-08, "loss": 0.6093, "step": 7038 }, { "epoch": 0.9, "grad_norm": 0.7742599930981701, "learning_rate": 5.534530964897322e-08, "loss": 0.571, "step": 7039 }, { "epoch": 0.9, "grad_norm": 0.7436829975707513, "learning_rate": 5.521000971377243e-08, "loss": 0.5458, "step": 7040 }, { "epoch": 0.9, "grad_norm": 0.708445701556239, "learning_rate": 5.507487066646432e-08, "loss": 0.5171, "step": 7041 }, { "epoch": 0.9, "grad_norm": 0.7551362747512802, "learning_rate": 5.4939892530061485e-08, "loss": 0.5488, "step": 7042 }, { "epoch": 0.9, "grad_norm": 0.7865885025849543, "learning_rate": 5.480507532754941e-08, "loss": 0.5609, "step": 7043 }, { "epoch": 0.9, "grad_norm": 0.6160009906099309, "learning_rate": 5.4670419081886073e-08, "loss": 0.5023, "step": 7044 }, { "epoch": 0.9, "grad_norm": 0.7311021506957778, "learning_rate": 5.453592381600191e-08, "loss": 0.5586, "step": 7045 }, { "epoch": 0.9, "grad_norm": 0.6759879553861401, "learning_rate": 5.440158955280016e-08, "loss": 0.5435, "step": 7046 }, { "epoch": 0.9, "grad_norm": 0.8353157525552973, "learning_rate": 5.426741631515663e-08, "loss": 0.5944, "step": 7047 }, { "epoch": 0.9, "grad_norm": 0.7160200590554945, "learning_rate": 5.413340412591949e-08, "loss": 0.5461, "step": 7048 }, { "epoch": 0.9, "grad_norm": 0.9450780792153107, "learning_rate": 5.3999553007909594e-08, "loss": 0.6754, "step": 7049 }, { "epoch": 0.9, "grad_norm": 0.760357082744667, "learning_rate": 5.3865862983920595e-08, "loss": 0.5559, "step": 7050 }, { "epoch": 0.9, "grad_norm": 0.6081104902986652, "learning_rate": 5.373233407671829e-08, "loss": 0.5177, "step": 7051 }, { "epoch": 0.9, "grad_norm": 0.6930087126034485, "learning_rate": 5.359896630904137e-08, "loss": 0.5835, "step": 7052 }, { "epoch": 0.9, "grad_norm": 0.8777596102280375, "learning_rate": 5.34657597036009e-08, "loss": 0.6064, "step": 7053 }, { "epoch": 0.9, "grad_norm": 0.5989904712053076, "learning_rate": 5.33327142830805e-08, "loss": 0.4793, "step": 7054 }, { "epoch": 0.9, "grad_norm": 0.9133858191939173, "learning_rate": 5.319983007013673e-08, "loss": 0.6029, "step": 7055 }, { "epoch": 0.9, "grad_norm": 0.9920762105671919, "learning_rate": 5.306710708739814e-08, "loss": 0.6254, "step": 7056 }, { "epoch": 0.9, "grad_norm": 0.7416429024815674, "learning_rate": 5.293454535746622e-08, "loss": 0.5798, "step": 7057 }, { "epoch": 0.9, "grad_norm": 0.7535890438574102, "learning_rate": 5.280214490291479e-08, "loss": 0.5535, "step": 7058 }, { "epoch": 0.9, "grad_norm": 0.8825413650833511, "learning_rate": 5.266990574629016e-08, "loss": 0.6337, "step": 7059 }, { "epoch": 0.9, "grad_norm": 0.7062372806110339, "learning_rate": 5.253782791011141e-08, "loss": 0.5501, "step": 7060 }, { "epoch": 0.9, "grad_norm": 0.7367564663337032, "learning_rate": 5.2405911416870006e-08, "loss": 0.5477, "step": 7061 }, { "epoch": 0.9, "grad_norm": 0.8149490217147053, "learning_rate": 5.227415628902987e-08, "loss": 0.5408, "step": 7062 }, { "epoch": 0.9, "grad_norm": 0.8822065742107744, "learning_rate": 5.2142562549027604e-08, "loss": 0.6295, "step": 7063 }, { "epoch": 0.9, "grad_norm": 0.6190621102572367, "learning_rate": 5.201113021927217e-08, "loss": 0.4959, "step": 7064 }, { "epoch": 0.9, "grad_norm": 0.8501209122122887, "learning_rate": 5.187985932214523e-08, "loss": 0.6135, "step": 7065 }, { "epoch": 0.9, "grad_norm": 0.8778404556831634, "learning_rate": 5.1748749880000884e-08, "loss": 0.634, "step": 7066 }, { "epoch": 0.9, "grad_norm": 0.9739838137615773, "learning_rate": 5.161780191516552e-08, "loss": 0.5767, "step": 7067 }, { "epoch": 0.9, "grad_norm": 0.8158645640468021, "learning_rate": 5.148701544993839e-08, "loss": 0.6124, "step": 7068 }, { "epoch": 0.9, "grad_norm": 0.7288472685719366, "learning_rate": 5.135639050659091e-08, "loss": 0.5519, "step": 7069 }, { "epoch": 0.9, "grad_norm": 0.6568639135983344, "learning_rate": 5.122592710736728e-08, "loss": 0.5571, "step": 7070 }, { "epoch": 0.9, "grad_norm": 0.6537953585821406, "learning_rate": 5.1095625274483964e-08, "loss": 0.5061, "step": 7071 }, { "epoch": 0.9, "grad_norm": 0.7381449002199798, "learning_rate": 5.096548503013009e-08, "loss": 0.5413, "step": 7072 }, { "epoch": 0.9, "grad_norm": 0.6862588099764385, "learning_rate": 5.0835506396467165e-08, "loss": 0.5268, "step": 7073 }, { "epoch": 0.9, "grad_norm": 0.7055649282177653, "learning_rate": 5.070568939562914e-08, "loss": 0.5105, "step": 7074 }, { "epoch": 0.9, "grad_norm": 0.7475369890384103, "learning_rate": 5.057603404972255e-08, "loss": 0.5104, "step": 7075 }, { "epoch": 0.9, "grad_norm": 0.7185419618830547, "learning_rate": 5.04465403808263e-08, "loss": 0.6057, "step": 7076 }, { "epoch": 0.9, "grad_norm": 0.7070674943234772, "learning_rate": 5.0317208410991854e-08, "loss": 0.5505, "step": 7077 }, { "epoch": 0.9, "grad_norm": 0.6663443623184074, "learning_rate": 5.018803816224315e-08, "loss": 0.5695, "step": 7078 }, { "epoch": 0.9, "grad_norm": 0.882182931294623, "learning_rate": 5.005902965657638e-08, "loss": 0.6274, "step": 7079 }, { "epoch": 0.9, "grad_norm": 0.8666546667993995, "learning_rate": 4.993018291596041e-08, "loss": 0.6241, "step": 7080 }, { "epoch": 0.9, "grad_norm": 0.5910146516458711, "learning_rate": 4.980149796233657e-08, "loss": 0.4944, "step": 7081 }, { "epoch": 0.9, "grad_norm": 0.7272342859564757, "learning_rate": 4.967297481761856e-08, "loss": 0.618, "step": 7082 }, { "epoch": 0.9, "grad_norm": 0.8588081871627958, "learning_rate": 4.9544613503692546e-08, "loss": 0.6146, "step": 7083 }, { "epoch": 0.9, "grad_norm": 0.7477562946487415, "learning_rate": 4.9416414042417144e-08, "loss": 0.5459, "step": 7084 }, { "epoch": 0.9, "grad_norm": 0.6543054875937356, "learning_rate": 4.928837645562323e-08, "loss": 0.475, "step": 7085 }, { "epoch": 0.9, "grad_norm": 0.9231795196464616, "learning_rate": 4.916050076511435e-08, "loss": 0.657, "step": 7086 }, { "epoch": 0.9, "grad_norm": 0.8594975675436148, "learning_rate": 4.903278699266644e-08, "loss": 0.6339, "step": 7087 }, { "epoch": 0.9, "grad_norm": 0.8995687606157043, "learning_rate": 4.8905235160027644e-08, "loss": 0.6627, "step": 7088 }, { "epoch": 0.9, "grad_norm": 0.7371662714394093, "learning_rate": 4.877784528891904e-08, "loss": 0.5324, "step": 7089 }, { "epoch": 0.9, "grad_norm": 0.833185405578994, "learning_rate": 4.86506174010336e-08, "loss": 0.6103, "step": 7090 }, { "epoch": 0.9, "grad_norm": 0.9626452603802091, "learning_rate": 4.8523551518036884e-08, "loss": 0.658, "step": 7091 }, { "epoch": 0.9, "grad_norm": 0.7900499206002348, "learning_rate": 4.839664766156703e-08, "loss": 0.6127, "step": 7092 }, { "epoch": 0.9, "grad_norm": 0.7760255512577368, "learning_rate": 4.8269905853234315e-08, "loss": 0.5384, "step": 7093 }, { "epoch": 0.9, "grad_norm": 0.9667553356722294, "learning_rate": 4.814332611462157e-08, "loss": 0.5627, "step": 7094 }, { "epoch": 0.9, "grad_norm": 0.8588471544107891, "learning_rate": 4.801690846728401e-08, "loss": 0.5258, "step": 7095 }, { "epoch": 0.9, "grad_norm": 0.8708853159121567, "learning_rate": 4.78906529327493e-08, "loss": 0.5456, "step": 7096 }, { "epoch": 0.9, "grad_norm": 0.9616157916829089, "learning_rate": 4.7764559532517345e-08, "loss": 0.6465, "step": 7097 }, { "epoch": 0.9, "grad_norm": 0.817519178173823, "learning_rate": 4.7638628288060534e-08, "loss": 0.6502, "step": 7098 }, { "epoch": 0.9, "grad_norm": 0.8025424717523184, "learning_rate": 4.751285922082382e-08, "loss": 0.6346, "step": 7099 }, { "epoch": 0.9, "grad_norm": 0.7401134729771175, "learning_rate": 4.7387252352224186e-08, "loss": 0.5317, "step": 7100 }, { "epoch": 0.9, "grad_norm": 0.9694573693026775, "learning_rate": 4.726180770365118e-08, "loss": 0.6543, "step": 7101 }, { "epoch": 0.9, "grad_norm": 0.7251888551124789, "learning_rate": 4.713652529646684e-08, "loss": 0.5796, "step": 7102 }, { "epoch": 0.9, "grad_norm": 0.6199777705453603, "learning_rate": 4.70114051520053e-08, "loss": 0.4946, "step": 7103 }, { "epoch": 0.91, "grad_norm": 0.7119197393054936, "learning_rate": 4.688644729157332e-08, "loss": 0.5365, "step": 7104 }, { "epoch": 0.91, "grad_norm": 1.021404825701395, "learning_rate": 4.676165173644997e-08, "loss": 0.6352, "step": 7105 }, { "epoch": 0.91, "grad_norm": 0.6912348957742143, "learning_rate": 4.663701850788648e-08, "loss": 0.596, "step": 7106 }, { "epoch": 0.91, "grad_norm": 0.5989807492777921, "learning_rate": 4.651254762710677e-08, "loss": 0.5125, "step": 7107 }, { "epoch": 0.91, "grad_norm": 0.7177950720290377, "learning_rate": 4.638823911530676e-08, "loss": 0.5482, "step": 7108 }, { "epoch": 0.91, "grad_norm": 0.7485929148265179, "learning_rate": 4.6264092993654965e-08, "loss": 0.5596, "step": 7109 }, { "epoch": 0.91, "grad_norm": 0.748183996680338, "learning_rate": 4.614010928329226e-08, "loss": 0.5841, "step": 7110 }, { "epoch": 0.91, "grad_norm": 0.8976080435887559, "learning_rate": 4.601628800533164e-08, "loss": 0.6357, "step": 7111 }, { "epoch": 0.91, "grad_norm": 0.8661695501186198, "learning_rate": 4.5892629180858564e-08, "loss": 0.6162, "step": 7112 }, { "epoch": 0.91, "grad_norm": 0.632409699117834, "learning_rate": 4.576913283093098e-08, "loss": 0.5431, "step": 7113 }, { "epoch": 0.91, "grad_norm": 0.7109373222620569, "learning_rate": 4.564579897657894e-08, "loss": 0.6196, "step": 7114 }, { "epoch": 0.91, "grad_norm": 0.6735811584042484, "learning_rate": 4.55226276388051e-08, "loss": 0.5906, "step": 7115 }, { "epoch": 0.91, "grad_norm": 0.9699331907104476, "learning_rate": 4.5399618838584006e-08, "loss": 0.6472, "step": 7116 }, { "epoch": 0.91, "grad_norm": 0.8931269347818763, "learning_rate": 4.52767725968628e-08, "loss": 0.6587, "step": 7117 }, { "epoch": 0.91, "grad_norm": 0.81883981589009, "learning_rate": 4.5154088934561074e-08, "loss": 0.6021, "step": 7118 }, { "epoch": 0.91, "grad_norm": 0.7033671652305435, "learning_rate": 4.5031567872570455e-08, "loss": 0.5927, "step": 7119 }, { "epoch": 0.91, "grad_norm": 0.857634813047258, "learning_rate": 4.490920943175513e-08, "loss": 0.6488, "step": 7120 }, { "epoch": 0.91, "grad_norm": 0.6349022435150049, "learning_rate": 4.478701363295123e-08, "loss": 0.516, "step": 7121 }, { "epoch": 0.91, "grad_norm": 0.7306331520582102, "learning_rate": 4.4664980496967654e-08, "loss": 0.5355, "step": 7122 }, { "epoch": 0.91, "grad_norm": 0.8457295956274435, "learning_rate": 4.454311004458533e-08, "loss": 0.6086, "step": 7123 }, { "epoch": 0.91, "grad_norm": 0.7958149532172307, "learning_rate": 4.4421402296557445e-08, "loss": 0.5688, "step": 7124 }, { "epoch": 0.91, "grad_norm": 0.9381254527256889, "learning_rate": 4.429985727360963e-08, "loss": 0.5554, "step": 7125 }, { "epoch": 0.91, "grad_norm": 0.6978996014654406, "learning_rate": 4.417847499643967e-08, "loss": 0.5839, "step": 7126 }, { "epoch": 0.91, "grad_norm": 0.8493705016348178, "learning_rate": 4.4057255485717703e-08, "loss": 0.5929, "step": 7127 }, { "epoch": 0.91, "grad_norm": 0.9178695732280715, "learning_rate": 4.3936198762086204e-08, "loss": 0.6366, "step": 7128 }, { "epoch": 0.91, "grad_norm": 0.6174457365629217, "learning_rate": 4.381530484615992e-08, "loss": 0.5038, "step": 7129 }, { "epoch": 0.91, "grad_norm": 0.6500445749860795, "learning_rate": 4.36945737585257e-08, "loss": 0.5456, "step": 7130 }, { "epoch": 0.91, "grad_norm": 0.9482588019428619, "learning_rate": 4.3574005519742886e-08, "loss": 0.6176, "step": 7131 }, { "epoch": 0.91, "grad_norm": 0.6950817042258106, "learning_rate": 4.345360015034294e-08, "loss": 0.5068, "step": 7132 }, { "epoch": 0.91, "grad_norm": 0.8381910895617419, "learning_rate": 4.3333357670829686e-08, "loss": 0.6411, "step": 7133 }, { "epoch": 0.91, "grad_norm": 0.6348147078184392, "learning_rate": 4.3213278101679185e-08, "loss": 0.5313, "step": 7134 }, { "epoch": 0.91, "grad_norm": 0.7145672213947187, "learning_rate": 4.309336146333953e-08, "loss": 0.5708, "step": 7135 }, { "epoch": 0.91, "grad_norm": 0.8620921036337574, "learning_rate": 4.29736077762316e-08, "loss": 0.632, "step": 7136 }, { "epoch": 0.91, "grad_norm": 0.834679905708729, "learning_rate": 4.285401706074798e-08, "loss": 0.6311, "step": 7137 }, { "epoch": 0.91, "grad_norm": 1.0042730052916666, "learning_rate": 4.27345893372536e-08, "loss": 0.712, "step": 7138 }, { "epoch": 0.91, "grad_norm": 0.9379435061058117, "learning_rate": 4.261532462608608e-08, "loss": 0.5938, "step": 7139 }, { "epoch": 0.91, "grad_norm": 0.6124783480040105, "learning_rate": 4.249622294755484e-08, "loss": 0.5006, "step": 7140 }, { "epoch": 0.91, "grad_norm": 0.6456666245248436, "learning_rate": 4.237728432194165e-08, "loss": 0.5272, "step": 7141 }, { "epoch": 0.91, "grad_norm": 0.7507603412337811, "learning_rate": 4.2258508769500544e-08, "loss": 0.5754, "step": 7142 }, { "epoch": 0.91, "grad_norm": 0.6930989098774712, "learning_rate": 4.2139896310457664e-08, "loss": 0.5914, "step": 7143 }, { "epoch": 0.91, "grad_norm": 0.7154411481545742, "learning_rate": 4.202144696501142e-08, "loss": 0.6095, "step": 7144 }, { "epoch": 0.91, "grad_norm": 0.9725727757248239, "learning_rate": 4.190316075333278e-08, "loss": 0.5987, "step": 7145 }, { "epoch": 0.91, "grad_norm": 0.7052807949186115, "learning_rate": 4.1785037695564406e-08, "loss": 0.5113, "step": 7146 }, { "epoch": 0.91, "grad_norm": 0.7046130375006884, "learning_rate": 4.166707781182144e-08, "loss": 0.5749, "step": 7147 }, { "epoch": 0.91, "grad_norm": 0.891805321843176, "learning_rate": 4.154928112219136e-08, "loss": 0.5487, "step": 7148 }, { "epoch": 0.91, "grad_norm": 0.8229178893233402, "learning_rate": 4.143164764673368e-08, "loss": 0.6539, "step": 7149 }, { "epoch": 0.91, "grad_norm": 0.8147697102789799, "learning_rate": 4.131417740548005e-08, "loss": 0.6011, "step": 7150 }, { "epoch": 0.91, "grad_norm": 0.8910696341610276, "learning_rate": 4.119687041843445e-08, "loss": 0.6408, "step": 7151 }, { "epoch": 0.91, "grad_norm": 1.059194312791156, "learning_rate": 4.107972670557314e-08, "loss": 0.6251, "step": 7152 }, { "epoch": 0.91, "grad_norm": 0.7536534690818572, "learning_rate": 4.0962746286844484e-08, "loss": 0.4945, "step": 7153 }, { "epoch": 0.91, "grad_norm": 0.8423638996356531, "learning_rate": 4.084592918216878e-08, "loss": 0.6262, "step": 7154 }, { "epoch": 0.91, "grad_norm": 0.8618229194243596, "learning_rate": 4.072927541143911e-08, "loss": 0.6402, "step": 7155 }, { "epoch": 0.91, "grad_norm": 0.714774034112328, "learning_rate": 4.061278499452014e-08, "loss": 0.5442, "step": 7156 }, { "epoch": 0.91, "grad_norm": 0.8441440093708332, "learning_rate": 4.0496457951249007e-08, "loss": 0.6261, "step": 7157 }, { "epoch": 0.91, "grad_norm": 0.8114637488635104, "learning_rate": 4.038029430143519e-08, "loss": 0.5562, "step": 7158 }, { "epoch": 0.91, "grad_norm": 0.750838213921343, "learning_rate": 4.026429406485987e-08, "loss": 0.5349, "step": 7159 }, { "epoch": 0.91, "grad_norm": 0.7781925083901424, "learning_rate": 4.0148457261276915e-08, "loss": 0.5603, "step": 7160 }, { "epoch": 0.91, "grad_norm": 0.5867784781540418, "learning_rate": 4.003278391041198e-08, "loss": 0.524, "step": 7161 }, { "epoch": 0.91, "grad_norm": 0.7574011713934733, "learning_rate": 3.99172740319631e-08, "loss": 0.5695, "step": 7162 }, { "epoch": 0.91, "grad_norm": 0.8473887044771929, "learning_rate": 3.980192764560042e-08, "loss": 0.5997, "step": 7163 }, { "epoch": 0.91, "grad_norm": 0.9172164980396105, "learning_rate": 3.968674477096612e-08, "loss": 0.6173, "step": 7164 }, { "epoch": 0.91, "grad_norm": 0.6896181166092821, "learning_rate": 3.957172542767462e-08, "loss": 0.5707, "step": 7165 }, { "epoch": 0.91, "grad_norm": 0.656604934678135, "learning_rate": 3.9456869635312696e-08, "loss": 0.555, "step": 7166 }, { "epoch": 0.91, "grad_norm": 0.7506505139070786, "learning_rate": 3.9342177413439034e-08, "loss": 0.5362, "step": 7167 }, { "epoch": 0.91, "grad_norm": 0.853660700599245, "learning_rate": 3.922764878158458e-08, "loss": 0.5905, "step": 7168 }, { "epoch": 0.91, "grad_norm": 0.9314305138257395, "learning_rate": 3.911328375925216e-08, "loss": 0.6082, "step": 7169 }, { "epoch": 0.91, "grad_norm": 0.9067266282631755, "learning_rate": 3.8999082365917205e-08, "loss": 0.6039, "step": 7170 }, { "epoch": 0.91, "grad_norm": 0.9713194134395533, "learning_rate": 3.888504462102671e-08, "loss": 0.5715, "step": 7171 }, { "epoch": 0.91, "grad_norm": 0.7581396164467741, "learning_rate": 3.8771170544000476e-08, "loss": 0.5464, "step": 7172 }, { "epoch": 0.91, "grad_norm": 0.9723807679027652, "learning_rate": 3.865746015422977e-08, "loss": 0.6142, "step": 7173 }, { "epoch": 0.91, "grad_norm": 0.653068481439923, "learning_rate": 3.854391347107844e-08, "loss": 0.5012, "step": 7174 }, { "epoch": 0.91, "grad_norm": 0.9595688151240492, "learning_rate": 3.8430530513882234e-08, "loss": 0.6621, "step": 7175 }, { "epoch": 0.91, "grad_norm": 0.7064121205284901, "learning_rate": 3.831731130194915e-08, "loss": 0.5366, "step": 7176 }, { "epoch": 0.91, "grad_norm": 0.607973441060355, "learning_rate": 3.820425585455922e-08, "loss": 0.5553, "step": 7177 }, { "epoch": 0.91, "grad_norm": 0.7097374217166457, "learning_rate": 3.8091364190964594e-08, "loss": 0.5677, "step": 7178 }, { "epoch": 0.91, "grad_norm": 0.9128480656249681, "learning_rate": 3.797863633038956e-08, "loss": 0.6171, "step": 7179 }, { "epoch": 0.91, "grad_norm": 0.9570136409365261, "learning_rate": 3.7866072292030536e-08, "loss": 0.6547, "step": 7180 }, { "epoch": 0.91, "grad_norm": 0.8204851485444133, "learning_rate": 3.775367209505587e-08, "loss": 0.5881, "step": 7181 }, { "epoch": 0.91, "grad_norm": 0.8552027503442323, "learning_rate": 3.764143575860634e-08, "loss": 0.5816, "step": 7182 }, { "epoch": 0.92, "grad_norm": 0.8804620269774404, "learning_rate": 3.7529363301794456e-08, "loss": 0.595, "step": 7183 }, { "epoch": 0.92, "grad_norm": 0.9194297746200314, "learning_rate": 3.7417454743705055e-08, "loss": 0.6072, "step": 7184 }, { "epoch": 0.92, "grad_norm": 0.6583391070102755, "learning_rate": 3.7305710103395006e-08, "loss": 0.5236, "step": 7185 }, { "epoch": 0.92, "grad_norm": 0.9693601314804884, "learning_rate": 3.71941293998933e-08, "loss": 0.6088, "step": 7186 }, { "epoch": 0.92, "grad_norm": 0.714101203682966, "learning_rate": 3.708271265220087e-08, "loss": 0.5191, "step": 7187 }, { "epoch": 0.92, "grad_norm": 0.77974703696639, "learning_rate": 3.6971459879290845e-08, "loss": 0.5321, "step": 7188 }, { "epoch": 0.92, "grad_norm": 0.8239874487680002, "learning_rate": 3.6860371100108425e-08, "loss": 0.5488, "step": 7189 }, { "epoch": 0.92, "grad_norm": 0.8023767621208219, "learning_rate": 3.674944633357091e-08, "loss": 0.6049, "step": 7190 }, { "epoch": 0.92, "grad_norm": 0.7314401278371676, "learning_rate": 3.663868559856764e-08, "loss": 0.5922, "step": 7191 }, { "epoch": 0.92, "grad_norm": 0.7611048174198054, "learning_rate": 3.6528088913959975e-08, "loss": 0.4599, "step": 7192 }, { "epoch": 0.92, "grad_norm": 0.8167428607990136, "learning_rate": 3.64176562985814e-08, "loss": 0.5585, "step": 7193 }, { "epoch": 0.92, "grad_norm": 0.7146657342058549, "learning_rate": 3.630738777123743e-08, "loss": 0.5726, "step": 7194 }, { "epoch": 0.92, "grad_norm": 0.5956300473937473, "learning_rate": 3.61972833507056e-08, "loss": 0.534, "step": 7195 }, { "epoch": 0.92, "grad_norm": 0.7209344806653497, "learning_rate": 3.6087343055735685e-08, "loss": 0.5432, "step": 7196 }, { "epoch": 0.92, "grad_norm": 0.6448001863977473, "learning_rate": 3.597756690504916e-08, "loss": 0.5299, "step": 7197 }, { "epoch": 0.92, "grad_norm": 0.8827879915835674, "learning_rate": 3.586795491733996e-08, "loss": 0.6059, "step": 7198 }, { "epoch": 0.92, "grad_norm": 0.9292134261683238, "learning_rate": 3.575850711127371e-08, "loss": 0.6585, "step": 7199 }, { "epoch": 0.92, "grad_norm": 0.8468578807585195, "learning_rate": 3.564922350548838e-08, "loss": 0.6154, "step": 7200 }, { "epoch": 0.92, "grad_norm": 0.780823537727861, "learning_rate": 3.5540104118593764e-08, "loss": 0.6104, "step": 7201 }, { "epoch": 0.92, "grad_norm": 0.8792149806695043, "learning_rate": 3.5431148969171765e-08, "loss": 0.6205, "step": 7202 }, { "epoch": 0.92, "grad_norm": 0.6579195156185577, "learning_rate": 3.5322358075776215e-08, "loss": 0.562, "step": 7203 }, { "epoch": 0.92, "grad_norm": 0.8699845775597669, "learning_rate": 3.5213731456933184e-08, "loss": 0.5848, "step": 7204 }, { "epoch": 0.92, "grad_norm": 0.691209828793836, "learning_rate": 3.5105269131140644e-08, "loss": 0.5564, "step": 7205 }, { "epoch": 0.92, "grad_norm": 0.7016006248956878, "learning_rate": 3.4996971116868504e-08, "loss": 0.5791, "step": 7206 }, { "epoch": 0.92, "grad_norm": 0.7414726586664325, "learning_rate": 3.488883743255888e-08, "loss": 0.5418, "step": 7207 }, { "epoch": 0.92, "grad_norm": 0.8093075303033707, "learning_rate": 3.4780868096625836e-08, "loss": 0.5809, "step": 7208 }, { "epoch": 0.92, "grad_norm": 0.774431031285429, "learning_rate": 3.467306312745533e-08, "loss": 0.6164, "step": 7209 }, { "epoch": 0.92, "grad_norm": 0.8157691724781662, "learning_rate": 3.4565422543405445e-08, "loss": 0.6628, "step": 7210 }, { "epoch": 0.92, "grad_norm": 0.8773993062084237, "learning_rate": 3.44579463628063e-08, "loss": 0.6411, "step": 7211 }, { "epoch": 0.92, "grad_norm": 0.8227400561991806, "learning_rate": 3.4350634603959926e-08, "loss": 0.6238, "step": 7212 }, { "epoch": 0.92, "grad_norm": 0.7074201918671139, "learning_rate": 3.4243487285140484e-08, "loss": 0.5413, "step": 7213 }, { "epoch": 0.92, "grad_norm": 0.6848832491727511, "learning_rate": 3.413650442459392e-08, "loss": 0.5555, "step": 7214 }, { "epoch": 0.92, "grad_norm": 0.7250389367073469, "learning_rate": 3.4029686040538355e-08, "loss": 0.5534, "step": 7215 }, { "epoch": 0.92, "grad_norm": 0.7594176957296942, "learning_rate": 3.392303215116388e-08, "loss": 0.6051, "step": 7216 }, { "epoch": 0.92, "grad_norm": 0.9641137288820174, "learning_rate": 3.3816542774632525e-08, "loss": 0.6634, "step": 7217 }, { "epoch": 0.92, "grad_norm": 0.5833329912636238, "learning_rate": 3.371021792907824e-08, "loss": 0.4933, "step": 7218 }, { "epoch": 0.92, "grad_norm": 0.7892558823960883, "learning_rate": 3.360405763260721e-08, "loss": 0.5776, "step": 7219 }, { "epoch": 0.92, "grad_norm": 0.9206306921635786, "learning_rate": 3.349806190329729e-08, "loss": 0.6446, "step": 7220 }, { "epoch": 0.92, "grad_norm": 0.854440005895728, "learning_rate": 3.33922307591985e-08, "loss": 0.6386, "step": 7221 }, { "epoch": 0.92, "grad_norm": 0.7690595505962444, "learning_rate": 3.328656421833265e-08, "loss": 0.5581, "step": 7222 }, { "epoch": 0.92, "grad_norm": 0.7890436698791222, "learning_rate": 3.318106229869377e-08, "loss": 0.6469, "step": 7223 }, { "epoch": 0.92, "grad_norm": 0.7642773256452028, "learning_rate": 3.307572501824785e-08, "loss": 0.5162, "step": 7224 }, { "epoch": 0.92, "grad_norm": 0.6809035325164974, "learning_rate": 3.2970552394932514e-08, "loss": 0.5866, "step": 7225 }, { "epoch": 0.92, "grad_norm": 0.915155489499076, "learning_rate": 3.286554444665779e-08, "loss": 0.6369, "step": 7226 }, { "epoch": 0.92, "grad_norm": 0.9001421047089216, "learning_rate": 3.276070119130525e-08, "loss": 0.5881, "step": 7227 }, { "epoch": 0.92, "grad_norm": 0.8949228771138282, "learning_rate": 3.265602264672862e-08, "loss": 0.5898, "step": 7228 }, { "epoch": 0.92, "grad_norm": 0.8707282010304314, "learning_rate": 3.255150883075364e-08, "loss": 0.6089, "step": 7229 }, { "epoch": 0.92, "grad_norm": 0.7934597702914037, "learning_rate": 3.244715976117795e-08, "loss": 0.5883, "step": 7230 }, { "epoch": 0.92, "grad_norm": 0.7319047635306315, "learning_rate": 3.234297545577103e-08, "loss": 0.4796, "step": 7231 }, { "epoch": 0.92, "grad_norm": 0.6470208084072265, "learning_rate": 3.2238955932274436e-08, "loss": 0.5395, "step": 7232 }, { "epoch": 0.92, "grad_norm": 0.8728715111073796, "learning_rate": 3.213510120840157e-08, "loss": 0.5963, "step": 7233 }, { "epoch": 0.92, "grad_norm": 0.6100379460119404, "learning_rate": 3.203141130183784e-08, "loss": 0.4884, "step": 7234 }, { "epoch": 0.92, "grad_norm": 0.95594915873736, "learning_rate": 3.1927886230240565e-08, "loss": 0.6348, "step": 7235 }, { "epoch": 0.92, "grad_norm": 0.6372277462554713, "learning_rate": 3.182452601123886e-08, "loss": 0.5211, "step": 7236 }, { "epoch": 0.92, "grad_norm": 0.9355422071950238, "learning_rate": 3.1721330662434097e-08, "loss": 0.6102, "step": 7237 }, { "epoch": 0.92, "grad_norm": 0.6241549176875114, "learning_rate": 3.161830020139922e-08, "loss": 0.551, "step": 7238 }, { "epoch": 0.92, "grad_norm": 0.8099211821245752, "learning_rate": 3.151543464567929e-08, "loss": 0.6635, "step": 7239 }, { "epoch": 0.92, "grad_norm": 0.7190754261763039, "learning_rate": 3.141273401279121e-08, "loss": 0.5687, "step": 7240 }, { "epoch": 0.92, "grad_norm": 0.8301615452743462, "learning_rate": 3.1310198320223966e-08, "loss": 0.6394, "step": 7241 }, { "epoch": 0.92, "grad_norm": 0.9850435987507413, "learning_rate": 3.120782758543816e-08, "loss": 0.6667, "step": 7242 }, { "epoch": 0.92, "grad_norm": 0.9911629314843788, "learning_rate": 3.1105621825866404e-08, "loss": 0.6536, "step": 7243 }, { "epoch": 0.92, "grad_norm": 0.8571239668326772, "learning_rate": 3.100358105891354e-08, "loss": 0.6152, "step": 7244 }, { "epoch": 0.92, "grad_norm": 0.7551905552359298, "learning_rate": 3.0901705301955794e-08, "loss": 0.5224, "step": 7245 }, { "epoch": 0.92, "grad_norm": 1.0492874851314253, "learning_rate": 3.079999457234173e-08, "loss": 0.6088, "step": 7246 }, { "epoch": 0.92, "grad_norm": 0.8273582058139619, "learning_rate": 3.069844888739148e-08, "loss": 0.5396, "step": 7247 }, { "epoch": 0.92, "grad_norm": 0.8235138075880819, "learning_rate": 3.059706826439723e-08, "loss": 0.6047, "step": 7248 }, { "epoch": 0.92, "grad_norm": 0.8528860805385543, "learning_rate": 3.049585272062305e-08, "loss": 0.6051, "step": 7249 }, { "epoch": 0.92, "grad_norm": 0.705706960238756, "learning_rate": 3.0394802273305045e-08, "loss": 0.5418, "step": 7250 }, { "epoch": 0.92, "grad_norm": 0.596981970820245, "learning_rate": 3.0293916939650884e-08, "loss": 0.4724, "step": 7251 }, { "epoch": 0.92, "grad_norm": 0.8848243018426158, "learning_rate": 3.01931967368404e-08, "loss": 0.5155, "step": 7252 }, { "epoch": 0.92, "grad_norm": 0.8744242365468112, "learning_rate": 3.009264168202508e-08, "loss": 0.6706, "step": 7253 }, { "epoch": 0.92, "grad_norm": 0.7024447149278906, "learning_rate": 2.999225179232845e-08, "loss": 0.5425, "step": 7254 }, { "epoch": 0.92, "grad_norm": 0.8951284829159555, "learning_rate": 2.989202708484595e-08, "loss": 0.5822, "step": 7255 }, { "epoch": 0.92, "grad_norm": 0.7250759890585469, "learning_rate": 2.9791967576644594e-08, "loss": 0.6121, "step": 7256 }, { "epoch": 0.92, "grad_norm": 0.6234521575965822, "learning_rate": 2.969207328476375e-08, "loss": 0.5335, "step": 7257 }, { "epoch": 0.92, "grad_norm": 0.749795267394905, "learning_rate": 2.959234422621415e-08, "loss": 0.5508, "step": 7258 }, { "epoch": 0.92, "grad_norm": 0.8636073533020747, "learning_rate": 2.9492780417978758e-08, "loss": 0.6294, "step": 7259 }, { "epoch": 0.92, "grad_norm": 0.887876625054137, "learning_rate": 2.9393381877012123e-08, "loss": 0.6636, "step": 7260 }, { "epoch": 0.93, "grad_norm": 0.8324545958682875, "learning_rate": 2.9294148620240933e-08, "loss": 0.5188, "step": 7261 }, { "epoch": 0.93, "grad_norm": 0.7611168344149271, "learning_rate": 2.9195080664563442e-08, "loss": 0.5837, "step": 7262 }, { "epoch": 0.93, "grad_norm": 0.7661226206087224, "learning_rate": 2.9096178026849937e-08, "loss": 0.5137, "step": 7263 }, { "epoch": 0.93, "grad_norm": 0.8322838728884923, "learning_rate": 2.8997440723942503e-08, "loss": 0.6285, "step": 7264 }, { "epoch": 0.93, "grad_norm": 0.7744264146490837, "learning_rate": 2.8898868772655018e-08, "loss": 0.5542, "step": 7265 }, { "epoch": 0.93, "grad_norm": 0.7336767171354189, "learning_rate": 2.880046218977339e-08, "loss": 0.5468, "step": 7266 }, { "epoch": 0.93, "grad_norm": 0.7635367687773135, "learning_rate": 2.8702220992055214e-08, "loss": 0.5508, "step": 7267 }, { "epoch": 0.93, "grad_norm": 0.7487195728202541, "learning_rate": 2.860414519622989e-08, "loss": 0.5394, "step": 7268 }, { "epoch": 0.93, "grad_norm": 0.7158286138890791, "learning_rate": 2.8506234818998608e-08, "loss": 0.5556, "step": 7269 }, { "epoch": 0.93, "grad_norm": 0.6490072219759189, "learning_rate": 2.84084898770347e-08, "loss": 0.5256, "step": 7270 }, { "epoch": 0.93, "grad_norm": 0.8112796828001055, "learning_rate": 2.8310910386982966e-08, "loss": 0.5909, "step": 7271 }, { "epoch": 0.93, "grad_norm": 0.7572296856179627, "learning_rate": 2.821349636546011e-08, "loss": 0.5261, "step": 7272 }, { "epoch": 0.93, "grad_norm": 0.816570987913261, "learning_rate": 2.8116247829054972e-08, "loss": 0.5648, "step": 7273 }, { "epoch": 0.93, "grad_norm": 0.8919148018207494, "learning_rate": 2.8019164794327755e-08, "loss": 0.6664, "step": 7274 }, { "epoch": 0.93, "grad_norm": 0.7051256950320341, "learning_rate": 2.7922247277810673e-08, "loss": 0.5406, "step": 7275 }, { "epoch": 0.93, "grad_norm": 0.6680055891827361, "learning_rate": 2.782549529600786e-08, "loss": 0.5274, "step": 7276 }, { "epoch": 0.93, "grad_norm": 0.8542932690428376, "learning_rate": 2.7728908865395252e-08, "loss": 0.6911, "step": 7277 }, { "epoch": 0.93, "grad_norm": 0.6551618724016277, "learning_rate": 2.7632488002420353e-08, "loss": 0.5093, "step": 7278 }, { "epoch": 0.93, "grad_norm": 0.8320909840614118, "learning_rate": 2.7536232723502695e-08, "loss": 0.608, "step": 7279 }, { "epoch": 0.93, "grad_norm": 0.686917210990391, "learning_rate": 2.7440143045033614e-08, "loss": 0.5148, "step": 7280 }, { "epoch": 0.93, "grad_norm": 0.7453461359383318, "learning_rate": 2.734421898337613e-08, "loss": 0.5873, "step": 7281 }, { "epoch": 0.93, "grad_norm": 0.8993783897062503, "learning_rate": 2.724846055486507e-08, "loss": 0.5819, "step": 7282 }, { "epoch": 0.93, "grad_norm": 1.0555958199116768, "learning_rate": 2.7152867775807054e-08, "loss": 0.6366, "step": 7283 }, { "epoch": 0.93, "grad_norm": 0.8152790512325929, "learning_rate": 2.7057440662480724e-08, "loss": 0.6516, "step": 7284 }, { "epoch": 0.93, "grad_norm": 0.8240071192024564, "learning_rate": 2.6962179231136085e-08, "loss": 0.5548, "step": 7285 }, { "epoch": 0.93, "grad_norm": 0.6295839041812001, "learning_rate": 2.686708349799538e-08, "loss": 0.4942, "step": 7286 }, { "epoch": 0.93, "grad_norm": 0.9425429406384146, "learning_rate": 2.6772153479252322e-08, "loss": 0.6029, "step": 7287 }, { "epoch": 0.93, "grad_norm": 0.6957958570867699, "learning_rate": 2.667738919107243e-08, "loss": 0.5191, "step": 7288 }, { "epoch": 0.93, "grad_norm": 0.8516117539878484, "learning_rate": 2.6582790649593233e-08, "loss": 0.678, "step": 7289 }, { "epoch": 0.93, "grad_norm": 0.7935133376542478, "learning_rate": 2.648835787092385e-08, "loss": 0.5302, "step": 7290 }, { "epoch": 0.93, "grad_norm": 0.6482413083242479, "learning_rate": 2.6394090871144968e-08, "loss": 0.4846, "step": 7291 }, { "epoch": 0.93, "grad_norm": 0.5905827050214838, "learning_rate": 2.629998966630953e-08, "loss": 0.5223, "step": 7292 }, { "epoch": 0.93, "grad_norm": 0.6173549039822884, "learning_rate": 2.6206054272441934e-08, "loss": 0.4756, "step": 7293 }, { "epoch": 0.93, "grad_norm": 0.7609342824956519, "learning_rate": 2.6112284705538503e-08, "loss": 0.5816, "step": 7294 }, { "epoch": 0.93, "grad_norm": 0.7781689248715798, "learning_rate": 2.601868098156701e-08, "loss": 0.5823, "step": 7295 }, { "epoch": 0.93, "grad_norm": 0.9271994559550089, "learning_rate": 2.5925243116467265e-08, "loss": 0.6238, "step": 7296 }, { "epoch": 0.93, "grad_norm": 0.7379924298783651, "learning_rate": 2.5831971126150764e-08, "loss": 0.5769, "step": 7297 }, { "epoch": 0.93, "grad_norm": 0.5920473005317936, "learning_rate": 2.57388650265008e-08, "loss": 0.4842, "step": 7298 }, { "epoch": 0.93, "grad_norm": 0.9016036351096196, "learning_rate": 2.5645924833372356e-08, "loss": 0.6253, "step": 7299 }, { "epoch": 0.93, "grad_norm": 0.6805383060911628, "learning_rate": 2.5553150562592106e-08, "loss": 0.5866, "step": 7300 }, { "epoch": 0.93, "grad_norm": 0.8856669460180067, "learning_rate": 2.5460542229958746e-08, "loss": 0.5897, "step": 7301 }, { "epoch": 0.93, "grad_norm": 0.649128930799852, "learning_rate": 2.536809985124222e-08, "loss": 0.48, "step": 7302 }, { "epoch": 0.93, "grad_norm": 0.758725213366629, "learning_rate": 2.5275823442184707e-08, "loss": 0.5776, "step": 7303 }, { "epoch": 0.93, "grad_norm": 0.8264163011593813, "learning_rate": 2.5183713018499865e-08, "loss": 0.6253, "step": 7304 }, { "epoch": 0.93, "grad_norm": 0.7050846529919563, "learning_rate": 2.5091768595873143e-08, "loss": 0.5388, "step": 7305 }, { "epoch": 0.93, "grad_norm": 0.7560979029581031, "learning_rate": 2.4999990189961683e-08, "loss": 0.6035, "step": 7306 }, { "epoch": 0.93, "grad_norm": 0.7155094135096984, "learning_rate": 2.490837781639432e-08, "loss": 0.5835, "step": 7307 }, { "epoch": 0.93, "grad_norm": 0.8090863112793168, "learning_rate": 2.4816931490771797e-08, "loss": 0.6159, "step": 7308 }, { "epoch": 0.93, "grad_norm": 0.6639803448318626, "learning_rate": 2.4725651228666544e-08, "loss": 0.558, "step": 7309 }, { "epoch": 0.93, "grad_norm": 0.634450185349951, "learning_rate": 2.463453704562257e-08, "loss": 0.56, "step": 7310 }, { "epoch": 0.93, "grad_norm": 0.9012663735561001, "learning_rate": 2.4543588957155693e-08, "loss": 0.6496, "step": 7311 }, { "epoch": 0.93, "grad_norm": 0.7179294875779447, "learning_rate": 2.4452806978753292e-08, "loss": 0.5553, "step": 7312 }, { "epoch": 0.93, "grad_norm": 0.7318889642436464, "learning_rate": 2.4362191125874676e-08, "loss": 0.5641, "step": 7313 }, { "epoch": 0.93, "grad_norm": 0.629847707636561, "learning_rate": 2.427174141395083e-08, "loss": 0.4975, "step": 7314 }, { "epoch": 0.93, "grad_norm": 0.6811445202685522, "learning_rate": 2.4181457858384436e-08, "loss": 0.5409, "step": 7315 }, { "epoch": 0.93, "grad_norm": 0.8376259600602651, "learning_rate": 2.4091340474549636e-08, "loss": 0.5573, "step": 7316 }, { "epoch": 0.93, "grad_norm": 0.9537920904601983, "learning_rate": 2.4001389277792716e-08, "loss": 0.606, "step": 7317 }, { "epoch": 0.93, "grad_norm": 0.8517656877010927, "learning_rate": 2.3911604283431196e-08, "loss": 0.6274, "step": 7318 }, { "epoch": 0.93, "grad_norm": 0.8238777681106071, "learning_rate": 2.3821985506754737e-08, "loss": 0.5827, "step": 7319 }, { "epoch": 0.93, "grad_norm": 1.042579964028282, "learning_rate": 2.3732532963024466e-08, "loss": 0.6567, "step": 7320 }, { "epoch": 0.93, "grad_norm": 0.8152585007025988, "learning_rate": 2.364324666747297e-08, "loss": 0.6257, "step": 7321 }, { "epoch": 0.93, "grad_norm": 0.9035497507915733, "learning_rate": 2.3554126635305095e-08, "loss": 0.725, "step": 7322 }, { "epoch": 0.93, "grad_norm": 0.6135358852854472, "learning_rate": 2.3465172881696914e-08, "loss": 0.5534, "step": 7323 }, { "epoch": 0.93, "grad_norm": 0.7256928190829117, "learning_rate": 2.337638542179632e-08, "loss": 0.4633, "step": 7324 }, { "epoch": 0.93, "grad_norm": 0.6861522571682235, "learning_rate": 2.3287764270722765e-08, "loss": 0.5559, "step": 7325 }, { "epoch": 0.93, "grad_norm": 0.7282070536821227, "learning_rate": 2.319930944356774e-08, "loss": 0.501, "step": 7326 }, { "epoch": 0.93, "grad_norm": 0.917918311517255, "learning_rate": 2.3111020955394078e-08, "loss": 0.6442, "step": 7327 }, { "epoch": 0.93, "grad_norm": 0.7619179860137575, "learning_rate": 2.3022898821236424e-08, "loss": 0.5584, "step": 7328 }, { "epoch": 0.93, "grad_norm": 0.8653088917477231, "learning_rate": 2.2934943056101e-08, "loss": 0.6383, "step": 7329 }, { "epoch": 0.93, "grad_norm": 0.9468279249576645, "learning_rate": 2.2847153674965813e-08, "loss": 0.7024, "step": 7330 }, { "epoch": 0.93, "grad_norm": 0.7532869035839799, "learning_rate": 2.275953069278036e-08, "loss": 0.5766, "step": 7331 }, { "epoch": 0.93, "grad_norm": 0.7770475302388583, "learning_rate": 2.2672074124466145e-08, "loss": 0.5753, "step": 7332 }, { "epoch": 0.93, "grad_norm": 0.9552309094567915, "learning_rate": 2.2584783984915813e-08, "loss": 0.6889, "step": 7333 }, { "epoch": 0.93, "grad_norm": 0.9430881173078659, "learning_rate": 2.2497660288994137e-08, "loss": 0.681, "step": 7334 }, { "epoch": 0.93, "grad_norm": 0.7925380942108152, "learning_rate": 2.241070305153747e-08, "loss": 0.5865, "step": 7335 }, { "epoch": 0.93, "grad_norm": 0.8483727468173223, "learning_rate": 2.232391228735364e-08, "loss": 0.6069, "step": 7336 }, { "epoch": 0.93, "grad_norm": 0.9580201230614123, "learning_rate": 2.2237288011222156e-08, "loss": 0.6277, "step": 7337 }, { "epoch": 0.93, "grad_norm": 0.6540616035077086, "learning_rate": 2.2150830237894212e-08, "loss": 0.4598, "step": 7338 }, { "epoch": 0.93, "grad_norm": 0.5939019635070065, "learning_rate": 2.2064538982092818e-08, "loss": 0.5047, "step": 7339 }, { "epoch": 0.94, "grad_norm": 0.7803918132292064, "learning_rate": 2.1978414258512324e-08, "loss": 0.6045, "step": 7340 }, { "epoch": 0.94, "grad_norm": 0.8808798253134988, "learning_rate": 2.1892456081818888e-08, "loss": 0.6285, "step": 7341 }, { "epoch": 0.94, "grad_norm": 0.6342257557730554, "learning_rate": 2.180666446665036e-08, "loss": 0.478, "step": 7342 }, { "epoch": 0.94, "grad_norm": 0.9032675516702545, "learning_rate": 2.1721039427616162e-08, "loss": 0.6091, "step": 7343 }, { "epoch": 0.94, "grad_norm": 0.7420888302984697, "learning_rate": 2.1635580979297295e-08, "loss": 0.5302, "step": 7344 }, { "epoch": 0.94, "grad_norm": 0.7346921491147425, "learning_rate": 2.1550289136246458e-08, "loss": 0.5417, "step": 7345 }, { "epoch": 0.94, "grad_norm": 0.7045662810830867, "learning_rate": 2.1465163912988028e-08, "loss": 0.5171, "step": 7346 }, { "epoch": 0.94, "grad_norm": 0.7147259598666014, "learning_rate": 2.1380205324017852e-08, "loss": 0.5713, "step": 7347 }, { "epoch": 0.94, "grad_norm": 0.5889244485434171, "learning_rate": 2.1295413383803472e-08, "loss": 0.5493, "step": 7348 }, { "epoch": 0.94, "grad_norm": 0.7927584494338501, "learning_rate": 2.1210788106784116e-08, "loss": 0.5937, "step": 7349 }, { "epoch": 0.94, "grad_norm": 1.1904287972337695, "learning_rate": 2.1126329507370587e-08, "loss": 0.6513, "step": 7350 }, { "epoch": 0.94, "grad_norm": 0.7858485830915337, "learning_rate": 2.1042037599945382e-08, "loss": 0.5783, "step": 7351 }, { "epoch": 0.94, "grad_norm": 0.6998750538399491, "learning_rate": 2.0957912398862465e-08, "loss": 0.5638, "step": 7352 }, { "epoch": 0.94, "grad_norm": 0.7715934201734731, "learning_rate": 2.0873953918447374e-08, "loss": 0.5412, "step": 7353 }, { "epoch": 0.94, "grad_norm": 0.7332572984356304, "learning_rate": 2.079016217299756e-08, "loss": 0.5368, "step": 7354 }, { "epoch": 0.94, "grad_norm": 0.906684047198429, "learning_rate": 2.0706537176781725e-08, "loss": 0.6709, "step": 7355 }, { "epoch": 0.94, "grad_norm": 0.6968944961918091, "learning_rate": 2.062307894404047e-08, "loss": 0.4994, "step": 7356 }, { "epoch": 0.94, "grad_norm": 0.7297407488979746, "learning_rate": 2.053978748898577e-08, "loss": 0.5915, "step": 7357 }, { "epoch": 0.94, "grad_norm": 0.6533872329803413, "learning_rate": 2.0456662825801385e-08, "loss": 0.5079, "step": 7358 }, { "epoch": 0.94, "grad_norm": 0.9324718133578759, "learning_rate": 2.0373704968642436e-08, "loss": 0.564, "step": 7359 }, { "epoch": 0.94, "grad_norm": 0.7564071407681818, "learning_rate": 2.0290913931635734e-08, "loss": 0.5515, "step": 7360 }, { "epoch": 0.94, "grad_norm": 0.791927611658742, "learning_rate": 2.020828972888e-08, "loss": 0.5713, "step": 7361 }, { "epoch": 0.94, "grad_norm": 0.688344054909427, "learning_rate": 2.0125832374445096e-08, "loss": 0.5076, "step": 7362 }, { "epoch": 0.94, "grad_norm": 1.0923643566937382, "learning_rate": 2.0043541882372783e-08, "loss": 0.6203, "step": 7363 }, { "epoch": 0.94, "grad_norm": 0.8687605679872175, "learning_rate": 1.9961418266676078e-08, "loss": 0.6389, "step": 7364 }, { "epoch": 0.94, "grad_norm": 0.7001244065773278, "learning_rate": 1.9879461541339905e-08, "loss": 0.5755, "step": 7365 }, { "epoch": 0.94, "grad_norm": 0.7328643788992382, "learning_rate": 1.979767172032054e-08, "loss": 0.5902, "step": 7366 }, { "epoch": 0.94, "grad_norm": 0.6380921976386079, "learning_rate": 1.971604881754607e-08, "loss": 0.5541, "step": 7367 }, { "epoch": 0.94, "grad_norm": 0.7210434911134327, "learning_rate": 1.963459284691593e-08, "loss": 0.5525, "step": 7368 }, { "epoch": 0.94, "grad_norm": 0.8062029944399082, "learning_rate": 1.955330382230136e-08, "loss": 0.6188, "step": 7369 }, { "epoch": 0.94, "grad_norm": 1.2224542636201894, "learning_rate": 1.9472181757544838e-08, "loss": 0.6359, "step": 7370 }, { "epoch": 0.94, "grad_norm": 0.8517990928845706, "learning_rate": 1.9391226666460757e-08, "loss": 0.6318, "step": 7371 }, { "epoch": 0.94, "grad_norm": 0.8870327592080854, "learning_rate": 1.9310438562834765e-08, "loss": 0.6024, "step": 7372 }, { "epoch": 0.94, "grad_norm": 0.7804076023913208, "learning_rate": 1.92298174604244e-08, "loss": 0.6133, "step": 7373 }, { "epoch": 0.94, "grad_norm": 0.7633874061136199, "learning_rate": 1.914936337295847e-08, "loss": 0.5507, "step": 7374 }, { "epoch": 0.94, "grad_norm": 0.6976314768152295, "learning_rate": 1.9069076314137678e-08, "loss": 0.5203, "step": 7375 }, { "epoch": 0.94, "grad_norm": 0.7132493863241013, "learning_rate": 1.898895629763375e-08, "loss": 0.5458, "step": 7376 }, { "epoch": 0.94, "grad_norm": 0.6431605460472087, "learning_rate": 1.8909003337090557e-08, "loss": 0.483, "step": 7377 }, { "epoch": 0.94, "grad_norm": 0.6924352692329612, "learning_rate": 1.8829217446123203e-08, "loss": 0.5264, "step": 7378 }, { "epoch": 0.94, "grad_norm": 0.6651086187715306, "learning_rate": 1.8749598638318265e-08, "loss": 0.5175, "step": 7379 }, { "epoch": 0.94, "grad_norm": 0.8293932579453627, "learning_rate": 1.867014692723412e-08, "loss": 0.5979, "step": 7380 }, { "epoch": 0.94, "grad_norm": 1.5332156596110267, "learning_rate": 1.8590862326400612e-08, "loss": 0.6547, "step": 7381 }, { "epoch": 0.94, "grad_norm": 0.8448468623784054, "learning_rate": 1.8511744849318833e-08, "loss": 0.6288, "step": 7382 }, { "epoch": 0.94, "grad_norm": 0.7525873697326189, "learning_rate": 1.8432794509462003e-08, "loss": 0.5914, "step": 7383 }, { "epoch": 0.94, "grad_norm": 0.6062885852603496, "learning_rate": 1.8354011320274255e-08, "loss": 0.5123, "step": 7384 }, { "epoch": 0.94, "grad_norm": 0.7319035825840275, "learning_rate": 1.827539529517175e-08, "loss": 0.5747, "step": 7385 }, { "epoch": 0.94, "grad_norm": 0.8080882088267415, "learning_rate": 1.8196946447541882e-08, "loss": 0.6239, "step": 7386 }, { "epoch": 0.94, "grad_norm": 0.6648203856734201, "learning_rate": 1.8118664790743532e-08, "loss": 0.4993, "step": 7387 }, { "epoch": 0.94, "grad_norm": 0.7230985894853645, "learning_rate": 1.8040550338107585e-08, "loss": 0.5071, "step": 7388 }, { "epoch": 0.94, "grad_norm": 0.8253701678454294, "learning_rate": 1.7962603102935847e-08, "loss": 0.5434, "step": 7389 }, { "epoch": 0.94, "grad_norm": 0.6550046141942629, "learning_rate": 1.7884823098502035e-08, "loss": 0.4814, "step": 7390 }, { "epoch": 0.94, "grad_norm": 0.9027399667574526, "learning_rate": 1.7807210338051216e-08, "loss": 0.5575, "step": 7391 }, { "epoch": 0.94, "grad_norm": 0.697773544514756, "learning_rate": 1.7729764834800155e-08, "loss": 0.5795, "step": 7392 }, { "epoch": 0.94, "grad_norm": 0.7043387893402796, "learning_rate": 1.7652486601936745e-08, "loss": 0.5851, "step": 7393 }, { "epoch": 0.94, "grad_norm": 0.774481556592844, "learning_rate": 1.7575375652621016e-08, "loss": 0.6229, "step": 7394 }, { "epoch": 0.94, "grad_norm": 0.7548891288143791, "learning_rate": 1.749843199998391e-08, "loss": 0.5257, "step": 7395 }, { "epoch": 0.94, "grad_norm": 0.9761776761402281, "learning_rate": 1.7421655657128274e-08, "loss": 0.5469, "step": 7396 }, { "epoch": 0.94, "grad_norm": 0.9522549230260436, "learning_rate": 1.7345046637128213e-08, "loss": 0.5645, "step": 7397 }, { "epoch": 0.94, "grad_norm": 0.6655055146099254, "learning_rate": 1.726860495302951e-08, "loss": 0.5063, "step": 7398 }, { "epoch": 0.94, "grad_norm": 0.807614336920565, "learning_rate": 1.7192330617849415e-08, "loss": 0.5202, "step": 7399 }, { "epoch": 0.94, "grad_norm": 0.6579943421904548, "learning_rate": 1.7116223644576433e-08, "loss": 0.5387, "step": 7400 }, { "epoch": 0.94, "grad_norm": 0.8859530869317134, "learning_rate": 1.704028404617108e-08, "loss": 0.6233, "step": 7401 }, { "epoch": 0.94, "grad_norm": 0.7800016614422606, "learning_rate": 1.6964511835564908e-08, "loss": 0.5638, "step": 7402 }, { "epoch": 0.94, "grad_norm": 0.7031839129420778, "learning_rate": 1.688890702566126e-08, "loss": 0.5555, "step": 7403 }, { "epoch": 0.94, "grad_norm": 0.6573089507009723, "learning_rate": 1.681346962933472e-08, "loss": 0.5605, "step": 7404 }, { "epoch": 0.94, "grad_norm": 0.6060935506335959, "learning_rate": 1.6738199659431463e-08, "loss": 0.5208, "step": 7405 }, { "epoch": 0.94, "grad_norm": 0.9492646602240872, "learning_rate": 1.6663097128769343e-08, "loss": 0.6424, "step": 7406 }, { "epoch": 0.94, "grad_norm": 0.6295299022225685, "learning_rate": 1.6588162050137466e-08, "loss": 0.5073, "step": 7407 }, { "epoch": 0.94, "grad_norm": 0.72031449700312, "learning_rate": 1.65133944362964e-08, "loss": 0.5363, "step": 7408 }, { "epoch": 0.94, "grad_norm": 0.5847103223098346, "learning_rate": 1.643879429997841e-08, "loss": 0.5116, "step": 7409 }, { "epoch": 0.94, "grad_norm": 0.9150892898250279, "learning_rate": 1.6364361653887104e-08, "loss": 0.6601, "step": 7410 }, { "epoch": 0.94, "grad_norm": 0.8738454243198037, "learning_rate": 1.629009651069757e-08, "loss": 0.6274, "step": 7411 }, { "epoch": 0.94, "grad_norm": 0.8284571349550164, "learning_rate": 1.621599888305636e-08, "loss": 0.6604, "step": 7412 }, { "epoch": 0.94, "grad_norm": 0.6714369343398056, "learning_rate": 1.61420687835816e-08, "loss": 0.5306, "step": 7413 }, { "epoch": 0.94, "grad_norm": 0.6938815098184956, "learning_rate": 1.606830622486266e-08, "loss": 0.5666, "step": 7414 }, { "epoch": 0.94, "grad_norm": 0.7671555204626684, "learning_rate": 1.599471121946072e-08, "loss": 0.5117, "step": 7415 }, { "epoch": 0.94, "grad_norm": 0.8545314562719327, "learning_rate": 1.5921283779908202e-08, "loss": 0.5999, "step": 7416 }, { "epoch": 0.94, "grad_norm": 0.8685526562210228, "learning_rate": 1.5848023918708986e-08, "loss": 0.6141, "step": 7417 }, { "epoch": 0.95, "grad_norm": 1.0092263866324755, "learning_rate": 1.577493164833843e-08, "loss": 0.6168, "step": 7418 }, { "epoch": 0.95, "grad_norm": 0.8591542960214308, "learning_rate": 1.5702006981243355e-08, "loss": 0.6605, "step": 7419 }, { "epoch": 0.95, "grad_norm": 0.8004052873341032, "learning_rate": 1.562924992984227e-08, "loss": 0.6, "step": 7420 }, { "epoch": 0.95, "grad_norm": 0.7848706576290132, "learning_rate": 1.555666050652471e-08, "loss": 0.6131, "step": 7421 }, { "epoch": 0.95, "grad_norm": 0.8597694772581106, "learning_rate": 1.548423872365212e-08, "loss": 0.5943, "step": 7422 }, { "epoch": 0.95, "grad_norm": 0.7626861860910683, "learning_rate": 1.541198459355708e-08, "loss": 0.5937, "step": 7423 }, { "epoch": 0.95, "grad_norm": 0.7279878451756645, "learning_rate": 1.5339898128543637e-08, "loss": 0.4908, "step": 7424 }, { "epoch": 0.95, "grad_norm": 0.8642458864623317, "learning_rate": 1.5267979340887415e-08, "loss": 0.6659, "step": 7425 }, { "epoch": 0.95, "grad_norm": 0.822340669725819, "learning_rate": 1.5196228242835507e-08, "loss": 0.6116, "step": 7426 }, { "epoch": 0.95, "grad_norm": 0.8263607963159706, "learning_rate": 1.5124644846606248e-08, "loss": 0.6259, "step": 7427 }, { "epoch": 0.95, "grad_norm": 0.7760617981752432, "learning_rate": 1.505322916438967e-08, "loss": 0.5328, "step": 7428 }, { "epoch": 0.95, "grad_norm": 1.0307063617662702, "learning_rate": 1.498198120834704e-08, "loss": 0.6466, "step": 7429 }, { "epoch": 0.95, "grad_norm": 1.1231308044113038, "learning_rate": 1.4910900990611207e-08, "loss": 0.6534, "step": 7430 }, { "epoch": 0.95, "grad_norm": 0.7148018690501072, "learning_rate": 1.4839988523286384e-08, "loss": 0.56, "step": 7431 }, { "epoch": 0.95, "grad_norm": 0.9841373491236265, "learning_rate": 1.4769243818448129e-08, "loss": 0.6287, "step": 7432 }, { "epoch": 0.95, "grad_norm": 0.7792377440416043, "learning_rate": 1.4698666888143585e-08, "loss": 0.6155, "step": 7433 }, { "epoch": 0.95, "grad_norm": 0.666459001113147, "learning_rate": 1.4628257744391248e-08, "loss": 0.5487, "step": 7434 }, { "epoch": 0.95, "grad_norm": 0.7087115673400899, "learning_rate": 1.4558016399181084e-08, "loss": 0.4958, "step": 7435 }, { "epoch": 0.95, "grad_norm": 0.6818492188497828, "learning_rate": 1.4487942864474522e-08, "loss": 0.5873, "step": 7436 }, { "epoch": 0.95, "grad_norm": 0.6286302695212599, "learning_rate": 1.4418037152204354e-08, "loss": 0.4887, "step": 7437 }, { "epoch": 0.95, "grad_norm": 0.8460944440847032, "learning_rate": 1.4348299274274612e-08, "loss": 0.5765, "step": 7438 }, { "epoch": 0.95, "grad_norm": 0.8292750788786889, "learning_rate": 1.4278729242561127e-08, "loss": 0.637, "step": 7439 }, { "epoch": 0.95, "grad_norm": 0.678072001461441, "learning_rate": 1.420932706891087e-08, "loss": 0.5331, "step": 7440 }, { "epoch": 0.95, "grad_norm": 0.7772186134042712, "learning_rate": 1.4140092765142387e-08, "loss": 0.5564, "step": 7441 }, { "epoch": 0.95, "grad_norm": 0.6797351926525314, "learning_rate": 1.4071026343045355e-08, "loss": 0.5248, "step": 7442 }, { "epoch": 0.95, "grad_norm": 0.8103672034012288, "learning_rate": 1.400212781438126e-08, "loss": 0.6331, "step": 7443 }, { "epoch": 0.95, "grad_norm": 0.936394948464033, "learning_rate": 1.3933397190882823e-08, "loss": 0.6616, "step": 7444 }, { "epoch": 0.95, "grad_norm": 0.7610103144312518, "learning_rate": 1.3864834484253906e-08, "loss": 0.508, "step": 7445 }, { "epoch": 0.95, "grad_norm": 0.8859683214603586, "learning_rate": 1.3796439706170282e-08, "loss": 0.6462, "step": 7446 }, { "epoch": 0.95, "grad_norm": 0.6440572387053101, "learning_rate": 1.3728212868278855e-08, "loss": 0.5618, "step": 7447 }, { "epoch": 0.95, "grad_norm": 0.697272406276553, "learning_rate": 1.3660153982197775e-08, "loss": 0.5075, "step": 7448 }, { "epoch": 0.95, "grad_norm": 0.8296097730272598, "learning_rate": 1.3592263059516884e-08, "loss": 0.6706, "step": 7449 }, { "epoch": 0.95, "grad_norm": 0.6039206591322959, "learning_rate": 1.3524540111797378e-08, "loss": 0.5221, "step": 7450 }, { "epoch": 0.95, "grad_norm": 0.8865933372831674, "learning_rate": 1.3456985150571587e-08, "loss": 0.6587, "step": 7451 }, { "epoch": 0.95, "grad_norm": 0.7016480035513186, "learning_rate": 1.338959818734342e-08, "loss": 0.5517, "step": 7452 }, { "epoch": 0.95, "grad_norm": 0.9516630256351832, "learning_rate": 1.3322379233588365e-08, "loss": 0.58, "step": 7453 }, { "epoch": 0.95, "grad_norm": 0.9685653087456392, "learning_rate": 1.3255328300752932e-08, "loss": 0.6121, "step": 7454 }, { "epoch": 0.95, "grad_norm": 0.8724263003376227, "learning_rate": 1.318844540025521e-08, "loss": 0.5344, "step": 7455 }, { "epoch": 0.95, "grad_norm": 0.8628209379113889, "learning_rate": 1.3121730543484865e-08, "loss": 0.6213, "step": 7456 }, { "epoch": 0.95, "grad_norm": 0.8279215376848827, "learning_rate": 1.3055183741802478e-08, "loss": 0.6529, "step": 7457 }, { "epoch": 0.95, "grad_norm": 0.6928467558296452, "learning_rate": 1.2988805006540316e-08, "loss": 0.5445, "step": 7458 }, { "epoch": 0.95, "grad_norm": 0.8882408833985649, "learning_rate": 1.2922594349002114e-08, "loss": 0.6437, "step": 7459 }, { "epoch": 0.95, "grad_norm": 0.8568455764166591, "learning_rate": 1.2856551780462854e-08, "loss": 0.6094, "step": 7460 }, { "epoch": 0.95, "grad_norm": 0.8757168124385708, "learning_rate": 1.2790677312168763e-08, "loss": 0.5459, "step": 7461 }, { "epoch": 0.95, "grad_norm": 0.7134921079396285, "learning_rate": 1.2724970955337755e-08, "loss": 0.5288, "step": 7462 }, { "epoch": 0.95, "grad_norm": 0.745076102036001, "learning_rate": 1.2659432721158769e-08, "loss": 0.5164, "step": 7463 }, { "epoch": 0.95, "grad_norm": 2.316617603426175, "learning_rate": 1.2594062620792323e-08, "loss": 0.5783, "step": 7464 }, { "epoch": 0.95, "grad_norm": 0.6863625560447916, "learning_rate": 1.2528860665370288e-08, "loss": 0.5186, "step": 7465 }, { "epoch": 0.95, "grad_norm": 0.6393061656456104, "learning_rate": 1.2463826865995896e-08, "loss": 0.5161, "step": 7466 }, { "epoch": 0.95, "grad_norm": 1.0707017558819005, "learning_rate": 1.2398961233743733e-08, "loss": 0.6401, "step": 7467 }, { "epoch": 0.95, "grad_norm": 0.6396429205127209, "learning_rate": 1.2334263779659737e-08, "loss": 0.5309, "step": 7468 }, { "epoch": 0.95, "grad_norm": 0.9416409047348248, "learning_rate": 1.226973451476121e-08, "loss": 0.6542, "step": 7469 }, { "epoch": 0.95, "grad_norm": 0.8974829573647967, "learning_rate": 1.2205373450036694e-08, "loss": 0.6397, "step": 7470 }, { "epoch": 0.95, "grad_norm": 0.811570936155315, "learning_rate": 1.214118059644631e-08, "loss": 0.5829, "step": 7471 }, { "epoch": 0.95, "grad_norm": 0.6586064858591256, "learning_rate": 1.2077155964921536e-08, "loss": 0.5774, "step": 7472 }, { "epoch": 0.95, "grad_norm": 0.7827020231054449, "learning_rate": 1.2013299566364988e-08, "loss": 0.6318, "step": 7473 }, { "epoch": 0.95, "grad_norm": 0.7747931346889434, "learning_rate": 1.1949611411650628e-08, "loss": 0.5929, "step": 7474 }, { "epoch": 0.95, "grad_norm": 1.0526926416869506, "learning_rate": 1.1886091511624118e-08, "loss": 0.6061, "step": 7475 }, { "epoch": 0.95, "grad_norm": 0.6893402279171401, "learning_rate": 1.182273987710214e-08, "loss": 0.5609, "step": 7476 }, { "epoch": 0.95, "grad_norm": 0.7755770870514901, "learning_rate": 1.1759556518872726e-08, "loss": 0.5239, "step": 7477 }, { "epoch": 0.95, "grad_norm": 0.808182937599015, "learning_rate": 1.1696541447695495e-08, "loss": 0.5811, "step": 7478 }, { "epoch": 0.95, "grad_norm": 0.7554955483621599, "learning_rate": 1.1633694674301198e-08, "loss": 0.5583, "step": 7479 }, { "epoch": 0.95, "grad_norm": 0.6532446438972842, "learning_rate": 1.1571016209391938e-08, "loss": 0.5547, "step": 7480 }, { "epoch": 0.95, "grad_norm": 0.9858596140611197, "learning_rate": 1.1508506063641177e-08, "loss": 0.68, "step": 7481 }, { "epoch": 0.95, "grad_norm": 0.8526843507711517, "learning_rate": 1.1446164247693846e-08, "loss": 0.5612, "step": 7482 }, { "epoch": 0.95, "grad_norm": 0.8569036759493792, "learning_rate": 1.1383990772166118e-08, "loss": 0.6691, "step": 7483 }, { "epoch": 0.95, "grad_norm": 0.9169812059723932, "learning_rate": 1.1321985647645415e-08, "loss": 0.6925, "step": 7484 }, { "epoch": 0.95, "grad_norm": 0.6331171599888407, "learning_rate": 1.1260148884690512e-08, "loss": 0.5513, "step": 7485 }, { "epoch": 0.95, "grad_norm": 0.6837030869785222, "learning_rate": 1.1198480493831764e-08, "loss": 0.5164, "step": 7486 }, { "epoch": 0.95, "grad_norm": 0.7809302427240681, "learning_rate": 1.1136980485570436e-08, "loss": 0.5731, "step": 7487 }, { "epoch": 0.95, "grad_norm": 1.138571079313505, "learning_rate": 1.1075648870379484e-08, "loss": 0.6818, "step": 7488 }, { "epoch": 0.95, "grad_norm": 0.9838949559959377, "learning_rate": 1.1014485658702888e-08, "loss": 0.6787, "step": 7489 }, { "epoch": 0.95, "grad_norm": 1.0092771007766697, "learning_rate": 1.0953490860956316e-08, "loss": 0.6073, "step": 7490 }, { "epoch": 0.95, "grad_norm": 0.8794828659376731, "learning_rate": 1.0892664487526349e-08, "loss": 0.6543, "step": 7491 }, { "epoch": 0.95, "grad_norm": 0.8545104096145523, "learning_rate": 1.0832006548771255e-08, "loss": 0.5683, "step": 7492 }, { "epoch": 0.95, "grad_norm": 0.7436780616702736, "learning_rate": 1.0771517055020329e-08, "loss": 0.4984, "step": 7493 }, { "epoch": 0.95, "grad_norm": 0.7381212442322622, "learning_rate": 1.071119601657433e-08, "loss": 0.5462, "step": 7494 }, { "epoch": 0.95, "grad_norm": 0.6773485257548485, "learning_rate": 1.0651043443705265e-08, "loss": 0.5475, "step": 7495 }, { "epoch": 0.95, "grad_norm": 0.7800486506403884, "learning_rate": 1.0591059346656606e-08, "loss": 0.5794, "step": 7496 }, { "epoch": 0.96, "grad_norm": 0.6908874198785283, "learning_rate": 1.0531243735642958e-08, "loss": 0.5335, "step": 7497 }, { "epoch": 0.96, "grad_norm": 0.7456726693453808, "learning_rate": 1.0471596620850287e-08, "loss": 0.5766, "step": 7498 }, { "epoch": 0.96, "grad_norm": 0.609232105365008, "learning_rate": 1.0412118012435912e-08, "loss": 0.5124, "step": 7499 }, { "epoch": 0.96, "grad_norm": 2.709611044411409, "learning_rate": 1.0352807920528284e-08, "loss": 0.6198, "step": 7500 }, { "epoch": 0.96, "grad_norm": 0.8431788329165055, "learning_rate": 1.0293666355227437e-08, "loss": 0.595, "step": 7501 }, { "epoch": 0.96, "grad_norm": 0.6091891525029052, "learning_rate": 1.0234693326604648e-08, "loss": 0.523, "step": 7502 }, { "epoch": 0.96, "grad_norm": 0.7246699663512606, "learning_rate": 1.0175888844702108e-08, "loss": 0.5218, "step": 7503 }, { "epoch": 0.96, "grad_norm": 0.8068193393415852, "learning_rate": 1.0117252919533802e-08, "loss": 0.5332, "step": 7504 }, { "epoch": 0.96, "grad_norm": 0.6373922915593233, "learning_rate": 1.005878556108486e-08, "loss": 0.5682, "step": 7505 }, { "epoch": 0.96, "grad_norm": 0.7515995577292833, "learning_rate": 1.0000486779311645e-08, "loss": 0.5191, "step": 7506 }, { "epoch": 0.96, "grad_norm": 0.6966555053826615, "learning_rate": 9.942356584141775e-09, "loss": 0.577, "step": 7507 }, { "epoch": 0.96, "grad_norm": 0.7129370865897096, "learning_rate": 9.88439498547422e-09, "loss": 0.5538, "step": 7508 }, { "epoch": 0.96, "grad_norm": 0.8789884911484607, "learning_rate": 9.826601993179196e-09, "loss": 0.6099, "step": 7509 }, { "epoch": 0.96, "grad_norm": 0.8510842384671977, "learning_rate": 9.768977617098383e-09, "loss": 0.6461, "step": 7510 }, { "epoch": 0.96, "grad_norm": 0.9996106063734769, "learning_rate": 9.711521867044492e-09, "loss": 0.6423, "step": 7511 }, { "epoch": 0.96, "grad_norm": 0.8052995363976938, "learning_rate": 9.654234752801693e-09, "loss": 0.5758, "step": 7512 }, { "epoch": 0.96, "grad_norm": 0.7165004307117471, "learning_rate": 9.597116284125294e-09, "loss": 0.567, "step": 7513 }, { "epoch": 0.96, "grad_norm": 2.706665762132291, "learning_rate": 9.540166470742072e-09, "loss": 0.6398, "step": 7514 }, { "epoch": 0.96, "grad_norm": 0.8344278649040886, "learning_rate": 9.483385322350046e-09, "loss": 0.6218, "step": 7515 }, { "epoch": 0.96, "grad_norm": 0.7593176809706591, "learning_rate": 9.426772848618258e-09, "loss": 0.6078, "step": 7516 }, { "epoch": 0.96, "grad_norm": 0.9479314684841266, "learning_rate": 9.370329059187442e-09, "loss": 0.5904, "step": 7517 }, { "epoch": 0.96, "grad_norm": 1.608550619826079, "learning_rate": 9.314053963669244e-09, "loss": 0.6117, "step": 7518 }, { "epoch": 0.96, "grad_norm": 0.8107647270814977, "learning_rate": 9.257947571646774e-09, "loss": 0.6328, "step": 7519 }, { "epoch": 0.96, "grad_norm": 0.6796869017596302, "learning_rate": 9.20200989267439e-09, "loss": 0.5194, "step": 7520 }, { "epoch": 0.96, "grad_norm": 0.8902480216979246, "learning_rate": 9.146240936277583e-09, "loss": 0.6832, "step": 7521 }, { "epoch": 0.96, "grad_norm": 0.7409395001127745, "learning_rate": 9.090640711953312e-09, "loss": 0.5787, "step": 7522 }, { "epoch": 0.96, "grad_norm": 0.7028859756724618, "learning_rate": 9.035209229169783e-09, "loss": 0.5208, "step": 7523 }, { "epoch": 0.96, "grad_norm": 0.7086890186586934, "learning_rate": 8.97994649736622e-09, "loss": 0.508, "step": 7524 }, { "epoch": 0.96, "grad_norm": 0.708304875757451, "learning_rate": 8.92485252595343e-09, "loss": 0.5651, "step": 7525 }, { "epoch": 0.96, "grad_norm": 4.238797266998316, "learning_rate": 8.869927324313354e-09, "loss": 0.6069, "step": 7526 }, { "epoch": 0.96, "grad_norm": 0.8580278984963516, "learning_rate": 8.815170901799174e-09, "loss": 0.6286, "step": 7527 }, { "epoch": 0.96, "grad_norm": 0.9809102282323726, "learning_rate": 8.76058326773521e-09, "loss": 0.6391, "step": 7528 }, { "epoch": 0.96, "grad_norm": 0.8829039812636805, "learning_rate": 8.706164431417251e-09, "loss": 0.6717, "step": 7529 }, { "epoch": 0.96, "grad_norm": 0.6128742736129354, "learning_rate": 8.651914402112326e-09, "loss": 0.5345, "step": 7530 }, { "epoch": 0.96, "grad_norm": 0.9039532102247477, "learning_rate": 8.59783318905849e-09, "loss": 0.6712, "step": 7531 }, { "epoch": 0.96, "grad_norm": 0.9371011545992333, "learning_rate": 8.543920801465488e-09, "loss": 0.6179, "step": 7532 }, { "epoch": 0.96, "grad_norm": 0.8420040601590139, "learning_rate": 8.490177248513753e-09, "loss": 0.6019, "step": 7533 }, { "epoch": 0.96, "grad_norm": 0.7675033169086196, "learning_rate": 8.436602539355297e-09, "loss": 0.587, "step": 7534 }, { "epoch": 0.96, "grad_norm": 0.7892858097666121, "learning_rate": 8.383196683113491e-09, "loss": 0.6465, "step": 7535 }, { "epoch": 0.96, "grad_norm": 0.8587179331982684, "learning_rate": 8.329959688882614e-09, "loss": 0.6084, "step": 7536 }, { "epoch": 0.96, "grad_norm": 0.6881603006339427, "learning_rate": 8.276891565728528e-09, "loss": 0.5315, "step": 7537 }, { "epoch": 0.96, "grad_norm": 0.8182522569649632, "learning_rate": 8.22399232268811e-09, "loss": 0.6257, "step": 7538 }, { "epoch": 0.96, "grad_norm": 0.8143999278603906, "learning_rate": 8.171261968769606e-09, "loss": 0.5258, "step": 7539 }, { "epoch": 0.96, "grad_norm": 0.7768436738322156, "learning_rate": 8.11870051295227e-09, "loss": 0.5145, "step": 7540 }, { "epoch": 0.96, "grad_norm": 0.6696638197929685, "learning_rate": 8.066307964186947e-09, "loss": 0.517, "step": 7541 }, { "epoch": 0.96, "grad_norm": 0.8719131752139349, "learning_rate": 8.014084331395498e-09, "loss": 0.5838, "step": 7542 }, { "epoch": 0.96, "grad_norm": 0.5781892332883033, "learning_rate": 7.962029623471033e-09, "loss": 0.5079, "step": 7543 }, { "epoch": 0.96, "grad_norm": 0.8020540480226601, "learning_rate": 7.910143849278017e-09, "loss": 0.5469, "step": 7544 }, { "epoch": 0.96, "grad_norm": 0.7054626042109186, "learning_rate": 7.858427017651825e-09, "loss": 0.5356, "step": 7545 }, { "epoch": 0.96, "grad_norm": 0.7413876744199586, "learning_rate": 7.806879137399636e-09, "loss": 0.5502, "step": 7546 }, { "epoch": 0.96, "grad_norm": 0.6756865411673445, "learning_rate": 7.755500217299205e-09, "loss": 0.5704, "step": 7547 }, { "epoch": 0.96, "grad_norm": 0.8095412022910256, "learning_rate": 7.704290266099977e-09, "loss": 0.6823, "step": 7548 }, { "epoch": 0.96, "grad_norm": 0.8373607604943534, "learning_rate": 7.653249292522535e-09, "loss": 0.5535, "step": 7549 }, { "epoch": 0.96, "grad_norm": 0.935621437830354, "learning_rate": 7.602377305258478e-09, "loss": 0.6729, "step": 7550 }, { "epoch": 0.96, "grad_norm": 0.7419776401823711, "learning_rate": 7.55167431297088e-09, "loss": 0.5253, "step": 7551 }, { "epoch": 0.96, "grad_norm": 0.8696753973084435, "learning_rate": 7.501140324293943e-09, "loss": 0.5868, "step": 7552 }, { "epoch": 0.96, "grad_norm": 0.7322803708925193, "learning_rate": 7.450775347832893e-09, "loss": 0.5401, "step": 7553 }, { "epoch": 0.96, "grad_norm": 0.9341471986371082, "learning_rate": 7.400579392164541e-09, "loss": 0.6244, "step": 7554 }, { "epoch": 0.96, "grad_norm": 0.685579468148357, "learning_rate": 7.350552465836712e-09, "loss": 0.5639, "step": 7555 }, { "epoch": 0.96, "grad_norm": 0.7995344196590488, "learning_rate": 7.300694577368482e-09, "loss": 0.5198, "step": 7556 }, { "epoch": 0.96, "grad_norm": 0.86902026154038, "learning_rate": 7.2510057352500595e-09, "loss": 0.6428, "step": 7557 }, { "epoch": 0.96, "grad_norm": 0.8713701417272404, "learning_rate": 7.201485947943009e-09, "loss": 0.6575, "step": 7558 }, { "epoch": 0.96, "grad_norm": 0.7802405231514628, "learning_rate": 7.152135223879918e-09, "loss": 0.568, "step": 7559 }, { "epoch": 0.96, "grad_norm": 0.7373286111494213, "learning_rate": 7.102953571464953e-09, "loss": 0.5952, "step": 7560 }, { "epoch": 0.96, "grad_norm": 0.9750381066857418, "learning_rate": 7.053940999072971e-09, "loss": 0.6339, "step": 7561 }, { "epoch": 0.96, "grad_norm": 0.7696393262829982, "learning_rate": 7.005097515050296e-09, "loss": 0.5303, "step": 7562 }, { "epoch": 0.96, "grad_norm": 0.6953188160217638, "learning_rate": 6.956423127714717e-09, "loss": 0.5522, "step": 7563 }, { "epoch": 0.96, "grad_norm": 0.8061153277992281, "learning_rate": 6.907917845354827e-09, "loss": 0.6269, "step": 7564 }, { "epoch": 0.96, "grad_norm": 0.5999099740193771, "learning_rate": 6.8595816762304635e-09, "loss": 0.4523, "step": 7565 }, { "epoch": 0.96, "grad_norm": 0.7719315501949956, "learning_rate": 6.811414628572931e-09, "loss": 0.5961, "step": 7566 }, { "epoch": 0.96, "grad_norm": 0.6362304056432907, "learning_rate": 6.763416710584557e-09, "loss": 0.5383, "step": 7567 }, { "epoch": 0.96, "grad_norm": 0.7091126867296464, "learning_rate": 6.7155879304388045e-09, "loss": 0.571, "step": 7568 }, { "epoch": 0.96, "grad_norm": 0.665901378440214, "learning_rate": 6.66792829628049e-09, "loss": 0.5673, "step": 7569 }, { "epoch": 0.96, "grad_norm": 0.6725077902039263, "learning_rate": 6.620437816225566e-09, "loss": 0.5197, "step": 7570 }, { "epoch": 0.96, "grad_norm": 0.7286549194993233, "learning_rate": 6.573116498361009e-09, "loss": 0.5076, "step": 7571 }, { "epoch": 0.96, "grad_norm": 0.6323778390854087, "learning_rate": 6.525964350745372e-09, "loss": 0.5159, "step": 7572 }, { "epoch": 0.96, "grad_norm": 0.8544654773466921, "learning_rate": 6.478981381408011e-09, "loss": 0.6284, "step": 7573 }, { "epoch": 0.96, "grad_norm": 0.7153054579615711, "learning_rate": 6.432167598349747e-09, "loss": 0.5424, "step": 7574 }, { "epoch": 0.97, "grad_norm": 0.6533476176169017, "learning_rate": 6.385523009542426e-09, "loss": 0.5575, "step": 7575 }, { "epoch": 0.97, "grad_norm": 0.7119882130740285, "learning_rate": 6.339047622929139e-09, "loss": 0.5327, "step": 7576 }, { "epoch": 0.97, "grad_norm": 0.6642632343696933, "learning_rate": 6.292741446424111e-09, "loss": 0.5612, "step": 7577 }, { "epoch": 0.97, "grad_norm": 0.6621859589178055, "learning_rate": 6.24660448791281e-09, "loss": 0.5462, "step": 7578 }, { "epoch": 0.97, "grad_norm": 0.8249447035337744, "learning_rate": 6.200636755251954e-09, "loss": 0.6099, "step": 7579 }, { "epoch": 0.97, "grad_norm": 0.8682403610990475, "learning_rate": 6.1548382562693905e-09, "loss": 0.6382, "step": 7580 }, { "epoch": 0.97, "grad_norm": 0.6928112996598041, "learning_rate": 6.109208998763993e-09, "loss": 0.5973, "step": 7581 }, { "epoch": 0.97, "grad_norm": 0.693022665382722, "learning_rate": 6.06374899050599e-09, "loss": 0.5738, "step": 7582 }, { "epoch": 0.97, "grad_norm": 0.6516607857370383, "learning_rate": 6.018458239236857e-09, "loss": 0.5562, "step": 7583 }, { "epoch": 0.97, "grad_norm": 0.8842785760140708, "learning_rate": 5.973336752669089e-09, "loss": 0.4995, "step": 7584 }, { "epoch": 0.97, "grad_norm": 0.8075849493962141, "learning_rate": 5.9283845384863196e-09, "loss": 0.5766, "step": 7585 }, { "epoch": 0.97, "grad_norm": 0.6170562325150267, "learning_rate": 5.883601604343536e-09, "loss": 0.5061, "step": 7586 }, { "epoch": 0.97, "grad_norm": 0.7602226824331247, "learning_rate": 5.838987957866748e-09, "loss": 0.5354, "step": 7587 }, { "epoch": 0.97, "grad_norm": 0.9442038542114577, "learning_rate": 5.794543606653213e-09, "loss": 0.5684, "step": 7588 }, { "epoch": 0.97, "grad_norm": 0.7863367425134906, "learning_rate": 5.750268558271432e-09, "loss": 0.5711, "step": 7589 }, { "epoch": 0.97, "grad_norm": 0.6497559128150161, "learning_rate": 5.706162820260818e-09, "loss": 0.5574, "step": 7590 }, { "epoch": 0.97, "grad_norm": 0.8846944384559432, "learning_rate": 5.662226400132142e-09, "loss": 0.6085, "step": 7591 }, { "epoch": 0.97, "grad_norm": 0.9398363031465404, "learning_rate": 5.618459305367418e-09, "loss": 0.628, "step": 7592 }, { "epoch": 0.97, "grad_norm": 1.0153481351861122, "learning_rate": 5.574861543419685e-09, "loss": 0.6186, "step": 7593 }, { "epoch": 0.97, "grad_norm": 0.5649109335713043, "learning_rate": 5.531433121713225e-09, "loss": 0.4594, "step": 7594 }, { "epoch": 0.97, "grad_norm": 0.9364037205962281, "learning_rate": 5.488174047643346e-09, "loss": 0.5367, "step": 7595 }, { "epoch": 0.97, "grad_norm": 0.6014264577058027, "learning_rate": 5.44508432857671e-09, "loss": 0.55, "step": 7596 }, { "epoch": 0.97, "grad_norm": 0.9408193818242538, "learning_rate": 5.4021639718510036e-09, "loss": 0.6493, "step": 7597 }, { "epoch": 0.97, "grad_norm": 0.8171306413202737, "learning_rate": 5.359412984775158e-09, "loss": 0.6587, "step": 7598 }, { "epoch": 0.97, "grad_norm": 1.0144819978904633, "learning_rate": 5.316831374629127e-09, "loss": 0.5918, "step": 7599 }, { "epoch": 0.97, "grad_norm": 0.7954285966062634, "learning_rate": 5.274419148664222e-09, "loss": 0.5745, "step": 7600 }, { "epoch": 0.97, "grad_norm": 0.6878477292867142, "learning_rate": 5.232176314102776e-09, "loss": 0.5426, "step": 7601 }, { "epoch": 0.97, "grad_norm": 0.8673852009716464, "learning_rate": 5.1901028781382585e-09, "loss": 0.6206, "step": 7602 }, { "epoch": 0.97, "grad_norm": 0.9948198275492526, "learning_rate": 5.148198847935381e-09, "loss": 0.5714, "step": 7603 }, { "epoch": 0.97, "grad_norm": 0.9125179997256873, "learning_rate": 5.106464230629992e-09, "loss": 0.551, "step": 7604 }, { "epoch": 0.97, "grad_norm": 0.7157607717861895, "learning_rate": 5.064899033328962e-09, "loss": 0.5627, "step": 7605 }, { "epoch": 0.97, "grad_norm": 0.6682642272129844, "learning_rate": 5.023503263110407e-09, "loss": 0.4957, "step": 7606 }, { "epoch": 0.97, "grad_norm": 0.7953733617703158, "learning_rate": 4.982276927023799e-09, "loss": 0.5285, "step": 7607 }, { "epoch": 0.97, "grad_norm": 0.718964951000397, "learning_rate": 4.9412200320893e-09, "loss": 0.5832, "step": 7608 }, { "epoch": 0.97, "grad_norm": 0.6123040979142189, "learning_rate": 4.90033258529865e-09, "loss": 0.5128, "step": 7609 }, { "epoch": 0.97, "grad_norm": 0.9467179917810105, "learning_rate": 4.859614593614392e-09, "loss": 0.6767, "step": 7610 }, { "epoch": 0.97, "grad_norm": 0.6992169519985888, "learning_rate": 4.819066063970534e-09, "loss": 0.5393, "step": 7611 }, { "epoch": 0.97, "grad_norm": 2.131074199317081, "learning_rate": 4.778687003271997e-09, "loss": 0.6541, "step": 7612 }, { "epoch": 0.97, "grad_norm": 0.7338188373512529, "learning_rate": 4.738477418394948e-09, "loss": 0.5243, "step": 7613 }, { "epoch": 0.97, "grad_norm": 0.6583435644039286, "learning_rate": 4.698437316186577e-09, "loss": 0.5273, "step": 7614 }, { "epoch": 0.97, "grad_norm": 0.6466553645180572, "learning_rate": 4.6585667034654274e-09, "loss": 0.5048, "step": 7615 }, { "epoch": 0.97, "grad_norm": 0.5981060631320428, "learning_rate": 4.618865587020959e-09, "loss": 0.5504, "step": 7616 }, { "epoch": 0.97, "grad_norm": 0.6898142205021601, "learning_rate": 4.579333973613875e-09, "loss": 0.5743, "step": 7617 }, { "epoch": 0.97, "grad_norm": 0.6211126441222631, "learning_rate": 4.539971869976011e-09, "loss": 0.5334, "step": 7618 }, { "epoch": 0.97, "grad_norm": 0.6840162615366923, "learning_rate": 4.50077928281023e-09, "loss": 0.5295, "step": 7619 }, { "epoch": 0.97, "grad_norm": 0.7990232594480967, "learning_rate": 4.4617562187907465e-09, "loss": 0.528, "step": 7620 }, { "epoch": 0.97, "grad_norm": 0.686368405213289, "learning_rate": 4.422902684562801e-09, "loss": 0.5203, "step": 7621 }, { "epoch": 0.97, "grad_norm": 0.6491699685270039, "learning_rate": 4.384218686742547e-09, "loss": 0.5418, "step": 7622 }, { "epoch": 0.97, "grad_norm": 0.9100667459930547, "learning_rate": 4.345704231917713e-09, "loss": 0.6337, "step": 7623 }, { "epoch": 0.97, "grad_norm": 1.1719803524622097, "learning_rate": 4.30735932664672e-09, "loss": 0.6221, "step": 7624 }, { "epoch": 0.97, "grad_norm": 0.8048379569254922, "learning_rate": 4.2691839774594565e-09, "loss": 0.5924, "step": 7625 }, { "epoch": 0.97, "grad_norm": 0.7802571740658417, "learning_rate": 4.2311781908567216e-09, "loss": 0.5241, "step": 7626 }, { "epoch": 0.97, "grad_norm": 0.845511673519417, "learning_rate": 4.193341973310449e-09, "loss": 0.6649, "step": 7627 }, { "epoch": 0.97, "grad_norm": 0.6393972393716844, "learning_rate": 4.155675331263819e-09, "loss": 0.4927, "step": 7628 }, { "epoch": 0.97, "grad_norm": 0.6890336142953265, "learning_rate": 4.118178271131034e-09, "loss": 0.5183, "step": 7629 }, { "epoch": 0.97, "grad_norm": 0.8202212507536755, "learning_rate": 4.080850799297431e-09, "loss": 0.6735, "step": 7630 }, { "epoch": 0.97, "grad_norm": 0.647954104831195, "learning_rate": 4.043692922119591e-09, "loss": 0.5316, "step": 7631 }, { "epoch": 0.97, "grad_norm": 0.8259723120291497, "learning_rate": 4.006704645925007e-09, "loss": 0.6294, "step": 7632 }, { "epoch": 0.97, "grad_norm": 0.7872624547178528, "learning_rate": 3.969885977012533e-09, "loss": 0.5916, "step": 7633 }, { "epoch": 0.97, "grad_norm": 0.6404077507063713, "learning_rate": 3.933236921651817e-09, "loss": 0.5218, "step": 7634 }, { "epoch": 0.97, "grad_norm": 1.5060169456152206, "learning_rate": 3.896757486083868e-09, "loss": 0.5951, "step": 7635 }, { "epoch": 0.97, "grad_norm": 0.7093728253530815, "learning_rate": 3.860447676520828e-09, "loss": 0.5539, "step": 7636 }, { "epoch": 0.97, "grad_norm": 0.8092384093324037, "learning_rate": 3.824307499145862e-09, "loss": 0.5684, "step": 7637 }, { "epoch": 0.97, "grad_norm": 0.7659066425808306, "learning_rate": 3.7883369601132694e-09, "loss": 0.5319, "step": 7638 }, { "epoch": 0.97, "grad_norm": 0.7761458426071753, "learning_rate": 3.752536065548373e-09, "loss": 0.5269, "step": 7639 }, { "epoch": 0.97, "grad_norm": 0.7568130168088266, "learning_rate": 3.7169048215478504e-09, "loss": 0.61, "step": 7640 }, { "epoch": 0.97, "grad_norm": 0.6244437764699241, "learning_rate": 3.6814432341790713e-09, "loss": 0.4834, "step": 7641 }, { "epoch": 0.97, "grad_norm": 0.8281084506953066, "learning_rate": 3.6461513094810936e-09, "loss": 0.5916, "step": 7642 }, { "epoch": 0.97, "grad_norm": 0.8286724921222312, "learning_rate": 3.6110290534635547e-09, "loss": 0.6562, "step": 7643 }, { "epoch": 0.97, "grad_norm": 0.8075634307883415, "learning_rate": 3.576076472107448e-09, "loss": 0.5911, "step": 7644 }, { "epoch": 0.97, "grad_norm": 0.787500315394439, "learning_rate": 3.5412935713649004e-09, "loss": 0.5502, "step": 7645 }, { "epoch": 0.97, "grad_norm": 0.8787649689236644, "learning_rate": 3.506680357158953e-09, "loss": 0.6382, "step": 7646 }, { "epoch": 0.97, "grad_norm": 0.6937920361951332, "learning_rate": 3.4722368353840013e-09, "loss": 0.5517, "step": 7647 }, { "epoch": 0.97, "grad_norm": 0.6960367795578051, "learning_rate": 3.437963011905354e-09, "loss": 0.567, "step": 7648 }, { "epoch": 0.97, "grad_norm": 1.1229765833217076, "learning_rate": 3.4038588925594524e-09, "loss": 0.6191, "step": 7649 }, { "epoch": 0.97, "grad_norm": 0.8310550563839344, "learning_rate": 3.369924483153985e-09, "loss": 0.603, "step": 7650 }, { "epoch": 0.97, "grad_norm": 1.0452837904722785, "learning_rate": 3.336159789467552e-09, "loss": 0.6169, "step": 7651 }, { "epoch": 0.97, "grad_norm": 0.8207085435232901, "learning_rate": 3.3025648172498865e-09, "loss": 0.6807, "step": 7652 }, { "epoch": 0.97, "grad_norm": 1.7210016621085138, "learning_rate": 3.2691395722219685e-09, "loss": 0.6257, "step": 7653 }, { "epoch": 0.98, "grad_norm": 0.7735222422313226, "learning_rate": 3.2358840600756888e-09, "loss": 0.5507, "step": 7654 }, { "epoch": 0.98, "grad_norm": 0.9065330390227576, "learning_rate": 3.2027982864740733e-09, "loss": 0.6434, "step": 7655 }, { "epoch": 0.98, "grad_norm": 0.770361162034577, "learning_rate": 3.169882257051504e-09, "loss": 0.6266, "step": 7656 }, { "epoch": 0.98, "grad_norm": 0.6561794731907594, "learning_rate": 3.1371359774129414e-09, "loss": 0.487, "step": 7657 }, { "epoch": 0.98, "grad_norm": 0.7370926196644199, "learning_rate": 3.104559453134925e-09, "loss": 0.5795, "step": 7658 }, { "epoch": 0.98, "grad_norm": 0.7709353637612014, "learning_rate": 3.0721526897649064e-09, "loss": 0.5098, "step": 7659 }, { "epoch": 0.98, "grad_norm": 0.644998010024961, "learning_rate": 3.0399156928213596e-09, "loss": 0.5302, "step": 7660 }, { "epoch": 0.98, "grad_norm": 0.8908427809021644, "learning_rate": 3.007848467793894e-09, "loss": 0.5863, "step": 7661 }, { "epoch": 0.98, "grad_norm": 0.905303774682902, "learning_rate": 2.9759510201432526e-09, "loss": 0.6324, "step": 7662 }, { "epoch": 0.98, "grad_norm": 0.9907247459803514, "learning_rate": 2.9442233553012006e-09, "loss": 0.6295, "step": 7663 }, { "epoch": 0.98, "grad_norm": 0.99766259334281, "learning_rate": 2.9126654786706394e-09, "loss": 0.6072, "step": 7664 }, { "epoch": 0.98, "grad_norm": 0.8583573024255204, "learning_rate": 2.881277395625603e-09, "loss": 0.6481, "step": 7665 }, { "epoch": 0.98, "grad_norm": 0.7610332640630907, "learning_rate": 2.8500591115111493e-09, "loss": 0.5614, "step": 7666 }, { "epoch": 0.98, "grad_norm": 0.9271367726365909, "learning_rate": 2.8190106316432483e-09, "loss": 0.5837, "step": 7667 }, { "epoch": 0.98, "grad_norm": 0.6844169353815958, "learning_rate": 2.788131961309448e-09, "loss": 0.4852, "step": 7668 }, { "epoch": 0.98, "grad_norm": 0.8008408192047864, "learning_rate": 2.7574231057678753e-09, "loss": 0.5741, "step": 7669 }, { "epoch": 0.98, "grad_norm": 0.6076879576816461, "learning_rate": 2.7268840702479033e-09, "loss": 0.5035, "step": 7670 }, { "epoch": 0.98, "grad_norm": 0.8505519349028723, "learning_rate": 2.6965148599500387e-09, "loss": 0.6122, "step": 7671 }, { "epoch": 0.98, "grad_norm": 0.7036491110490439, "learning_rate": 2.666315480045922e-09, "loss": 0.5529, "step": 7672 }, { "epoch": 0.98, "grad_norm": 1.1616194954521126, "learning_rate": 2.6362859356781065e-09, "loss": 0.6597, "step": 7673 }, { "epoch": 0.98, "grad_norm": 0.6834273032154738, "learning_rate": 2.606426231960279e-09, "loss": 0.5729, "step": 7674 }, { "epoch": 0.98, "grad_norm": 0.9741503292856879, "learning_rate": 2.5767363739773728e-09, "loss": 0.6398, "step": 7675 }, { "epoch": 0.98, "grad_norm": 0.6662352293095339, "learning_rate": 2.547216366785121e-09, "loss": 0.5717, "step": 7676 }, { "epoch": 0.98, "grad_norm": 0.8953487963619526, "learning_rate": 2.5178662154106136e-09, "loss": 0.6119, "step": 7677 }, { "epoch": 0.98, "grad_norm": 0.8857828237363115, "learning_rate": 2.488685924851741e-09, "loss": 0.6418, "step": 7678 }, { "epoch": 0.98, "grad_norm": 0.7400300465617861, "learning_rate": 2.45967550007764e-09, "loss": 0.6139, "step": 7679 }, { "epoch": 0.98, "grad_norm": 0.6322052522698717, "learning_rate": 2.4308349460285817e-09, "loss": 0.5086, "step": 7680 }, { "epoch": 0.98, "grad_norm": 0.7535875327503674, "learning_rate": 2.4021642676156363e-09, "loss": 0.5437, "step": 7681 }, { "epoch": 0.98, "grad_norm": 0.9387460652472008, "learning_rate": 2.373663469721232e-09, "loss": 0.6471, "step": 7682 }, { "epoch": 0.98, "grad_norm": 0.8836071519759069, "learning_rate": 2.3453325571988203e-09, "loss": 0.6758, "step": 7683 }, { "epoch": 0.98, "grad_norm": 0.751826562558573, "learning_rate": 2.317171534872764e-09, "loss": 0.6335, "step": 7684 }, { "epoch": 0.98, "grad_norm": 0.8693007266561128, "learning_rate": 2.28918040753856e-09, "loss": 0.5482, "step": 7685 }, { "epoch": 0.98, "grad_norm": 0.6629854904796727, "learning_rate": 2.2613591799628408e-09, "loss": 0.53, "step": 7686 }, { "epoch": 0.98, "grad_norm": 0.8247883006609313, "learning_rate": 2.2337078568833713e-09, "loss": 0.5913, "step": 7687 }, { "epoch": 0.98, "grad_norm": 0.9207997536665475, "learning_rate": 2.2062264430087185e-09, "loss": 0.6562, "step": 7688 }, { "epoch": 0.98, "grad_norm": 0.9820487790399751, "learning_rate": 2.178914943018806e-09, "loss": 0.6132, "step": 7689 }, { "epoch": 0.98, "grad_norm": 0.7787798399632668, "learning_rate": 2.1517733615644686e-09, "loss": 0.6295, "step": 7690 }, { "epoch": 0.98, "grad_norm": 0.8993581695610413, "learning_rate": 2.1248017032675645e-09, "loss": 0.6496, "step": 7691 }, { "epoch": 0.98, "grad_norm": 1.4198671075398537, "learning_rate": 2.0979999727211985e-09, "loss": 0.6371, "step": 7692 }, { "epoch": 0.98, "grad_norm": 0.9520256670361313, "learning_rate": 2.071368174489274e-09, "loss": 0.6709, "step": 7693 }, { "epoch": 0.98, "grad_norm": 0.6894919582458224, "learning_rate": 2.0449063131070534e-09, "loss": 0.5331, "step": 7694 }, { "epoch": 0.98, "grad_norm": 0.906231865334116, "learning_rate": 2.0186143930807087e-09, "loss": 0.6682, "step": 7695 }, { "epoch": 0.98, "grad_norm": 0.8742463885006678, "learning_rate": 1.992492418887437e-09, "loss": 0.6608, "step": 7696 }, { "epoch": 0.98, "grad_norm": 0.6993170826483747, "learning_rate": 1.966540394975458e-09, "loss": 0.5378, "step": 7697 }, { "epoch": 0.98, "grad_norm": 0.6062064496637353, "learning_rate": 1.940758325764347e-09, "loss": 0.5204, "step": 7698 }, { "epoch": 0.98, "grad_norm": 1.1752873068564542, "learning_rate": 1.915146215644259e-09, "loss": 0.6338, "step": 7699 }, { "epoch": 0.98, "grad_norm": 0.6096760859842739, "learning_rate": 1.8897040689768164e-09, "loss": 0.5329, "step": 7700 }, { "epoch": 0.98, "grad_norm": 0.7857277761261512, "learning_rate": 1.8644318900945532e-09, "loss": 0.6288, "step": 7701 }, { "epoch": 0.98, "grad_norm": 3.2029585915958085, "learning_rate": 1.8393296833011386e-09, "loss": 0.6395, "step": 7702 }, { "epoch": 0.98, "grad_norm": 0.6362993007555012, "learning_rate": 1.8143974528710414e-09, "loss": 0.4728, "step": 7703 }, { "epoch": 0.98, "grad_norm": 0.7797341961480369, "learning_rate": 1.7896352030499772e-09, "loss": 0.5878, "step": 7704 }, { "epoch": 0.98, "grad_norm": 0.8110437963890815, "learning_rate": 1.7650429380547948e-09, "loss": 0.5823, "step": 7705 }, { "epoch": 0.98, "grad_norm": 0.7500184973892016, "learning_rate": 1.7406206620732556e-09, "loss": 0.54, "step": 7706 }, { "epoch": 0.98, "grad_norm": 0.8058649993254625, "learning_rate": 1.7163683792642546e-09, "loss": 0.5941, "step": 7707 }, { "epoch": 0.98, "grad_norm": 0.8857687890081204, "learning_rate": 1.692286093757711e-09, "loss": 0.5895, "step": 7708 }, { "epoch": 0.98, "grad_norm": 0.8903378301260115, "learning_rate": 1.6683738096544553e-09, "loss": 0.6421, "step": 7709 }, { "epoch": 0.98, "grad_norm": 0.8627915731427043, "learning_rate": 1.6446315310266746e-09, "loss": 0.5657, "step": 7710 }, { "epoch": 0.98, "grad_norm": 0.6371493798251553, "learning_rate": 1.6210592619173569e-09, "loss": 0.5156, "step": 7711 }, { "epoch": 0.98, "grad_norm": 0.6422608474991933, "learning_rate": 1.5976570063405136e-09, "loss": 0.573, "step": 7712 }, { "epoch": 0.98, "grad_norm": 0.9502088844667549, "learning_rate": 1.5744247682815126e-09, "loss": 0.6595, "step": 7713 }, { "epoch": 0.98, "grad_norm": 0.7767363529129933, "learning_rate": 1.5513625516964113e-09, "loss": 0.5963, "step": 7714 }, { "epoch": 0.98, "grad_norm": 0.9082009653060671, "learning_rate": 1.5284703605125126e-09, "loss": 0.6714, "step": 7715 }, { "epoch": 0.98, "grad_norm": 0.7720588542362435, "learning_rate": 1.5057481986280318e-09, "loss": 0.5769, "step": 7716 }, { "epoch": 0.98, "grad_norm": 0.9967270142968973, "learning_rate": 1.48319606991254e-09, "loss": 0.5964, "step": 7717 }, { "epoch": 0.98, "grad_norm": 0.7945730618890671, "learning_rate": 1.460813978206188e-09, "loss": 0.5834, "step": 7718 }, { "epoch": 0.98, "grad_norm": 0.8832620734287215, "learning_rate": 1.4386019273204819e-09, "loss": 0.6154, "step": 7719 }, { "epoch": 0.98, "grad_norm": 0.906122491245456, "learning_rate": 1.416559921037952e-09, "loss": 0.682, "step": 7720 }, { "epoch": 0.98, "grad_norm": 0.7870923371120171, "learning_rate": 1.3946879631121511e-09, "loss": 0.5618, "step": 7721 }, { "epoch": 0.98, "grad_norm": 0.9082631059173026, "learning_rate": 1.3729860572675446e-09, "loss": 0.5875, "step": 7722 }, { "epoch": 0.98, "grad_norm": 0.6666193874646723, "learning_rate": 1.3514542071998425e-09, "loss": 0.5145, "step": 7723 }, { "epoch": 0.98, "grad_norm": 0.7670320844392445, "learning_rate": 1.3300924165756678e-09, "loss": 0.6272, "step": 7724 }, { "epoch": 0.98, "grad_norm": 1.1991674558183119, "learning_rate": 1.3089006890325549e-09, "loss": 0.6411, "step": 7725 }, { "epoch": 0.98, "grad_norm": 0.895322372351799, "learning_rate": 1.287879028179395e-09, "loss": 0.6169, "step": 7726 }, { "epoch": 0.98, "grad_norm": 1.161419626608704, "learning_rate": 1.2670274375959911e-09, "loss": 0.5765, "step": 7727 }, { "epoch": 0.98, "grad_norm": 1.0821107551447402, "learning_rate": 1.2463459208331696e-09, "loss": 0.6794, "step": 7728 }, { "epoch": 0.98, "grad_norm": 0.7683768294915218, "learning_rate": 1.2258344814125577e-09, "loss": 0.5954, "step": 7729 }, { "epoch": 0.98, "grad_norm": 0.6318128180742039, "learning_rate": 1.2054931228272502e-09, "loss": 0.5592, "step": 7730 }, { "epoch": 0.98, "grad_norm": 0.7149259086210914, "learning_rate": 1.185321848541032e-09, "loss": 0.5495, "step": 7731 }, { "epoch": 0.99, "grad_norm": 0.7055943271727431, "learning_rate": 1.1653206619889334e-09, "loss": 0.5383, "step": 7732 }, { "epoch": 0.99, "grad_norm": 0.7022404021510396, "learning_rate": 1.145489566577007e-09, "loss": 0.5056, "step": 7733 }, { "epoch": 0.99, "grad_norm": 1.125695385968074, "learning_rate": 1.1258285656822186e-09, "loss": 0.682, "step": 7734 }, { "epoch": 0.99, "grad_norm": 0.7435298571653468, "learning_rate": 1.1063376626525567e-09, "loss": 0.5697, "step": 7735 }, { "epoch": 0.99, "grad_norm": 0.6841377407229922, "learning_rate": 1.087016860807255e-09, "loss": 0.5257, "step": 7736 }, { "epoch": 0.99, "grad_norm": 0.6312959323075776, "learning_rate": 1.0678661634363484e-09, "loss": 0.5139, "step": 7737 }, { "epoch": 0.99, "grad_norm": 0.7454446328939776, "learning_rate": 1.048885573801117e-09, "loss": 0.5308, "step": 7738 }, { "epoch": 0.99, "grad_norm": 0.930261987513385, "learning_rate": 1.0300750951335313e-09, "loss": 0.641, "step": 7739 }, { "epoch": 0.99, "grad_norm": 0.651838990915629, "learning_rate": 1.0114347306370286e-09, "loss": 0.5046, "step": 7740 }, { "epoch": 0.99, "grad_norm": 0.6633323430717634, "learning_rate": 9.929644834858474e-10, "loss": 0.5125, "step": 7741 }, { "epoch": 0.99, "grad_norm": 2.3962291920591583, "learning_rate": 9.7466435682525e-10, "loss": 0.6676, "step": 7742 }, { "epoch": 0.99, "grad_norm": 0.5905443934316996, "learning_rate": 9.565343537714098e-10, "loss": 0.4721, "step": 7743 }, { "epoch": 0.99, "grad_norm": 0.7652373649870421, "learning_rate": 9.385744774119687e-10, "loss": 0.5672, "step": 7744 }, { "epoch": 0.99, "grad_norm": 0.8355364862447022, "learning_rate": 9.207847308050354e-10, "loss": 0.5349, "step": 7745 }, { "epoch": 0.99, "grad_norm": 0.9812709719835333, "learning_rate": 9.031651169801868e-10, "loss": 0.6637, "step": 7746 }, { "epoch": 0.99, "grad_norm": 0.640559651817363, "learning_rate": 8.857156389378007e-10, "loss": 0.5059, "step": 7747 }, { "epoch": 0.99, "grad_norm": 0.7622410352315059, "learning_rate": 8.684362996492778e-10, "loss": 0.6082, "step": 7748 }, { "epoch": 0.99, "grad_norm": 0.759436239412125, "learning_rate": 8.513271020572644e-10, "loss": 0.5591, "step": 7749 }, { "epoch": 0.99, "grad_norm": 0.7087432165089936, "learning_rate": 8.343880490750965e-10, "loss": 0.5734, "step": 7750 }, { "epoch": 0.99, "grad_norm": 0.9115974543764354, "learning_rate": 8.176191435874668e-10, "loss": 0.684, "step": 7751 }, { "epoch": 0.99, "grad_norm": 0.8429322905559871, "learning_rate": 8.01020388449869e-10, "loss": 0.6412, "step": 7752 }, { "epoch": 0.99, "grad_norm": 0.7105458421511506, "learning_rate": 7.845917864889307e-10, "loss": 0.5514, "step": 7753 }, { "epoch": 0.99, "grad_norm": 0.7190693131106404, "learning_rate": 7.683333405023029e-10, "loss": 0.5358, "step": 7754 }, { "epoch": 0.99, "grad_norm": 0.6579933881946854, "learning_rate": 7.522450532585489e-10, "loss": 0.5174, "step": 7755 }, { "epoch": 0.99, "grad_norm": 0.8520475146054648, "learning_rate": 7.36326927497366e-10, "loss": 0.5876, "step": 7756 }, { "epoch": 0.99, "grad_norm": 0.8391052673828491, "learning_rate": 7.205789659294748e-10, "loss": 0.5632, "step": 7757 }, { "epoch": 0.99, "grad_norm": 0.687771013003076, "learning_rate": 7.050011712365078e-10, "loss": 0.5678, "step": 7758 }, { "epoch": 0.99, "grad_norm": 0.7231398444955385, "learning_rate": 6.895935460713431e-10, "loss": 0.5073, "step": 7759 }, { "epoch": 0.99, "grad_norm": 1.0439955976693296, "learning_rate": 6.743560930576597e-10, "loss": 0.5563, "step": 7760 }, { "epoch": 0.99, "grad_norm": 0.7748237796749509, "learning_rate": 6.592888147901599e-10, "loss": 0.6211, "step": 7761 }, { "epoch": 0.99, "grad_norm": 0.6697530504609222, "learning_rate": 6.44391713834791e-10, "loss": 0.5906, "step": 7762 }, { "epoch": 0.99, "grad_norm": 0.8173992007651356, "learning_rate": 6.296647927283016e-10, "loss": 0.5727, "step": 7763 }, { "epoch": 0.99, "grad_norm": 0.6995655725056337, "learning_rate": 6.151080539784637e-10, "loss": 0.5837, "step": 7764 }, { "epoch": 0.99, "grad_norm": 0.8340377363415067, "learning_rate": 6.007215000641829e-10, "loss": 0.5955, "step": 7765 }, { "epoch": 0.99, "grad_norm": 1.1362039877575774, "learning_rate": 5.865051334354998e-10, "loss": 0.5827, "step": 7766 }, { "epoch": 0.99, "grad_norm": 0.9204452107089917, "learning_rate": 5.724589565130333e-10, "loss": 0.7043, "step": 7767 }, { "epoch": 0.99, "grad_norm": 0.8969727321660015, "learning_rate": 5.585829716889812e-10, "loss": 0.6007, "step": 7768 }, { "epoch": 0.99, "grad_norm": 0.7245955990855155, "learning_rate": 5.4487718132612e-10, "loss": 0.596, "step": 7769 }, { "epoch": 0.99, "grad_norm": 0.7821150569561376, "learning_rate": 5.313415877583605e-10, "loss": 0.5417, "step": 7770 }, { "epoch": 0.99, "grad_norm": 0.7902372250417986, "learning_rate": 5.179761932907478e-10, "loss": 0.5379, "step": 7771 }, { "epoch": 0.99, "grad_norm": 0.8452380216172978, "learning_rate": 5.047810001992392e-10, "loss": 0.5909, "step": 7772 }, { "epoch": 0.99, "grad_norm": 0.6879838393543596, "learning_rate": 4.91756010730926e-10, "loss": 0.5234, "step": 7773 }, { "epoch": 0.99, "grad_norm": 0.811489517106738, "learning_rate": 4.789012271037008e-10, "loss": 0.631, "step": 7774 }, { "epoch": 0.99, "grad_norm": 0.8862776548730364, "learning_rate": 4.662166515067011e-10, "loss": 0.6016, "step": 7775 }, { "epoch": 0.99, "grad_norm": 0.8361010535139795, "learning_rate": 4.5370228609997727e-10, "loss": 0.6399, "step": 7776 }, { "epoch": 0.99, "grad_norm": 0.9146471098773087, "learning_rate": 4.413581330144911e-10, "loss": 0.6132, "step": 7777 }, { "epoch": 0.99, "grad_norm": 0.8103643952406688, "learning_rate": 4.291841943524499e-10, "loss": 0.5938, "step": 7778 }, { "epoch": 0.99, "grad_norm": 0.8131392206165737, "learning_rate": 4.1718047218686213e-10, "loss": 0.5584, "step": 7779 }, { "epoch": 0.99, "grad_norm": 0.6557486036260337, "learning_rate": 4.053469685617594e-10, "loss": 0.5386, "step": 7780 }, { "epoch": 0.99, "grad_norm": 0.8535844316312553, "learning_rate": 3.936836854925296e-10, "loss": 0.658, "step": 7781 }, { "epoch": 0.99, "grad_norm": 0.7168049015838625, "learning_rate": 3.821906249650286e-10, "loss": 0.5863, "step": 7782 }, { "epoch": 0.99, "grad_norm": 0.7211229890962785, "learning_rate": 3.7086778893657967e-10, "loss": 0.5311, "step": 7783 }, { "epoch": 0.99, "grad_norm": 0.6421714804378829, "learning_rate": 3.5971517933519604e-10, "loss": 0.494, "step": 7784 }, { "epoch": 0.99, "grad_norm": 0.6805517185959694, "learning_rate": 3.4873279806024725e-10, "loss": 0.5376, "step": 7785 }, { "epoch": 0.99, "grad_norm": 0.7969189570048373, "learning_rate": 3.37920646981793e-10, "loss": 0.607, "step": 7786 }, { "epoch": 0.99, "grad_norm": 0.8668643062043864, "learning_rate": 3.2727872794102716e-10, "loss": 0.6167, "step": 7787 }, { "epoch": 0.99, "grad_norm": 0.6634571073149956, "learning_rate": 3.168070427501668e-10, "loss": 0.5154, "step": 7788 }, { "epoch": 0.99, "grad_norm": 0.9179457487398366, "learning_rate": 3.0650559319245204e-10, "loss": 0.6571, "step": 7789 }, { "epoch": 0.99, "grad_norm": 0.9379168258139087, "learning_rate": 2.963743810221464e-10, "loss": 0.5888, "step": 7790 }, { "epoch": 0.99, "grad_norm": 0.8212681366255622, "learning_rate": 2.864134079644254e-10, "loss": 0.6102, "step": 7791 }, { "epoch": 0.99, "grad_norm": 0.7316386443460332, "learning_rate": 2.766226757154877e-10, "loss": 0.5935, "step": 7792 }, { "epoch": 0.99, "grad_norm": 0.6463410458828635, "learning_rate": 2.6700218594277737e-10, "loss": 0.5248, "step": 7793 }, { "epoch": 0.99, "grad_norm": 0.6446284100699337, "learning_rate": 2.575519402843174e-10, "loss": 0.578, "step": 7794 }, { "epoch": 0.99, "grad_norm": 0.6456447775135491, "learning_rate": 2.482719403495981e-10, "loss": 0.4896, "step": 7795 }, { "epoch": 0.99, "grad_norm": 0.9125556746929285, "learning_rate": 2.391621877189109e-10, "loss": 0.6629, "step": 7796 }, { "epoch": 0.99, "grad_norm": 0.8832089692283861, "learning_rate": 2.3022268394334855e-10, "loss": 0.6494, "step": 7797 }, { "epoch": 0.99, "grad_norm": 0.8066100811593482, "learning_rate": 2.214534305453597e-10, "loss": 0.5208, "step": 7798 }, { "epoch": 0.99, "grad_norm": 0.8096489835068553, "learning_rate": 2.1285442901830541e-10, "loss": 0.578, "step": 7799 }, { "epoch": 0.99, "grad_norm": 1.006701301731855, "learning_rate": 2.0442568082634782e-10, "loss": 0.6289, "step": 7800 }, { "epoch": 0.99, "grad_norm": 0.763797774964898, "learning_rate": 1.9616718740489425e-10, "loss": 0.5515, "step": 7801 }, { "epoch": 0.99, "grad_norm": 0.8841713862164309, "learning_rate": 1.8807895016037524e-10, "loss": 0.5832, "step": 7802 }, { "epoch": 0.99, "grad_norm": 0.597487272678739, "learning_rate": 1.8016097047002243e-10, "loss": 0.4738, "step": 7803 }, { "epoch": 0.99, "grad_norm": 0.8886282154411391, "learning_rate": 1.724132496822017e-10, "loss": 0.6397, "step": 7804 }, { "epoch": 0.99, "grad_norm": 0.6359561392149431, "learning_rate": 1.648357891161911e-10, "loss": 0.4994, "step": 7805 }, { "epoch": 0.99, "grad_norm": 0.7587363409585012, "learning_rate": 1.5742859006251386e-10, "loss": 0.5851, "step": 7806 }, { "epoch": 0.99, "grad_norm": 0.8215674682659871, "learning_rate": 1.5019165378249432e-10, "loss": 0.5476, "step": 7807 }, { "epoch": 0.99, "grad_norm": 0.9217323252068718, "learning_rate": 1.4312498150848006e-10, "loss": 0.5978, "step": 7808 }, { "epoch": 0.99, "grad_norm": 0.830744659980955, "learning_rate": 1.3622857444373082e-10, "loss": 0.5912, "step": 7809 }, { "epoch": 0.99, "grad_norm": 0.8300094806674186, "learning_rate": 1.295024337628625e-10, "loss": 0.5933, "step": 7810 }, { "epoch": 1.0, "grad_norm": 0.8814962220993272, "learning_rate": 1.2294656061107022e-10, "loss": 0.6512, "step": 7811 }, { "epoch": 1.0, "grad_norm": 0.6399356028053811, "learning_rate": 1.1656095610490523e-10, "loss": 0.5339, "step": 7812 }, { "epoch": 1.0, "grad_norm": 0.7287650404842321, "learning_rate": 1.1034562133171999e-10, "loss": 0.5567, "step": 7813 }, { "epoch": 1.0, "grad_norm": 0.8066223167633728, "learning_rate": 1.0430055734977904e-10, "loss": 0.6095, "step": 7814 }, { "epoch": 1.0, "grad_norm": 0.7740593184487227, "learning_rate": 9.842576518870327e-11, "loss": 0.5634, "step": 7815 }, { "epoch": 1.0, "grad_norm": 0.707158995805899, "learning_rate": 9.272124584880358e-11, "loss": 0.5644, "step": 7816 }, { "epoch": 1.0, "grad_norm": 0.6842630828829599, "learning_rate": 8.718700030152515e-11, "loss": 0.5464, "step": 7817 }, { "epoch": 1.0, "grad_norm": 0.8533620146819175, "learning_rate": 8.182302948933629e-11, "loss": 0.6548, "step": 7818 }, { "epoch": 1.0, "grad_norm": 0.7664416011039629, "learning_rate": 7.662933432550644e-11, "loss": 0.5899, "step": 7819 }, { "epoch": 1.0, "grad_norm": 0.6175570252393078, "learning_rate": 7.160591569466134e-11, "loss": 0.5415, "step": 7820 }, { "epoch": 1.0, "grad_norm": 1.11096689069913, "learning_rate": 6.67527744521168e-11, "loss": 0.6343, "step": 7821 }, { "epoch": 1.0, "grad_norm": 0.7960029206260274, "learning_rate": 6.206991142432283e-11, "loss": 0.6311, "step": 7822 }, { "epoch": 1.0, "grad_norm": 0.7914337769007089, "learning_rate": 5.7557327408863655e-11, "loss": 0.5674, "step": 7823 }, { "epoch": 1.0, "grad_norm": 0.7868433726722298, "learning_rate": 5.321502317390258e-11, "loss": 0.541, "step": 7824 }, { "epoch": 1.0, "grad_norm": 0.6164095389455341, "learning_rate": 4.9042999459181224e-11, "loss": 0.4973, "step": 7825 }, { "epoch": 1.0, "grad_norm": 0.8386565776760152, "learning_rate": 4.5041256974909236e-11, "loss": 0.6178, "step": 7826 }, { "epoch": 1.0, "grad_norm": 0.7972901883819165, "learning_rate": 4.120979640276356e-11, "loss": 0.5664, "step": 7827 }, { "epoch": 1.0, "grad_norm": 0.8440276444110991, "learning_rate": 3.754861839500023e-11, "loss": 0.555, "step": 7828 }, { "epoch": 1.0, "grad_norm": 0.618396863229862, "learning_rate": 3.405772357523151e-11, "loss": 0.5868, "step": 7829 }, { "epoch": 1.0, "grad_norm": 1.0686622682608307, "learning_rate": 3.0737112537870816e-11, "loss": 0.6117, "step": 7830 }, { "epoch": 1.0, "grad_norm": 0.6916540299878454, "learning_rate": 2.758678584835472e-11, "loss": 0.526, "step": 7831 }, { "epoch": 1.0, "grad_norm": 0.8215045800812389, "learning_rate": 2.4606744043143e-11, "loss": 0.6567, "step": 7832 }, { "epoch": 1.0, "grad_norm": 0.765299166724864, "learning_rate": 2.17969876297186e-11, "loss": 0.5321, "step": 7833 }, { "epoch": 1.0, "grad_norm": 0.6363264239762301, "learning_rate": 1.9157517086587638e-11, "loss": 0.5737, "step": 7834 }, { "epoch": 1.0, "grad_norm": 0.7716953401591824, "learning_rate": 1.668833286327942e-11, "loss": 0.536, "step": 7835 }, { "epoch": 1.0, "grad_norm": 0.7293217330316644, "learning_rate": 1.4389435380124382e-11, "loss": 0.5084, "step": 7836 }, { "epoch": 1.0, "grad_norm": 0.820382705806208, "learning_rate": 1.2260825028698186e-11, "loss": 0.6494, "step": 7837 }, { "epoch": 1.0, "grad_norm": 0.6894220363458476, "learning_rate": 1.0302502171488648e-11, "loss": 0.5902, "step": 7838 }, { "epoch": 1.0, "grad_norm": 0.9321954346199218, "learning_rate": 8.514467141895743e-12, "loss": 0.616, "step": 7839 }, { "epoch": 1.0, "grad_norm": 0.6768627957877051, "learning_rate": 6.896720244453647e-12, "loss": 0.5578, "step": 7840 }, { "epoch": 1.0, "grad_norm": 0.7034212835050537, "learning_rate": 5.449261754608692e-12, "loss": 0.5526, "step": 7841 }, { "epoch": 1.0, "grad_norm": 0.7680212927368978, "learning_rate": 4.172091918941412e-12, "loss": 0.5579, "step": 7842 }, { "epoch": 1.0, "grad_norm": 0.7864126355901061, "learning_rate": 3.065210954944497e-12, "loss": 0.5619, "step": 7843 }, { "epoch": 1.0, "grad_norm": 0.7162419939094142, "learning_rate": 2.1286190510227952e-12, "loss": 0.5462, "step": 7844 }, { "epoch": 1.0, "grad_norm": 0.8687403737639042, "learning_rate": 1.3623163666043324e-12, "loss": 0.6186, "step": 7845 }, { "epoch": 1.0, "grad_norm": 0.8735836295831937, "learning_rate": 7.663030323623587e-13, "loss": 0.6025, "step": 7846 }, { "epoch": 1.0, "grad_norm": 0.750286920792172, "learning_rate": 3.4057914966023613e-13, "loss": 0.5743, "step": 7847 }, { "epoch": 1.0, "grad_norm": 0.6733822382820582, "learning_rate": 8.514479099552829e-14, "loss": 0.5113, "step": 7848 }, { "epoch": 1.0, "grad_norm": 0.8236251711940762, "learning_rate": 0.0, "loss": 0.5915, "step": 7849 }, { "epoch": 1.0, "step": 7849, "total_flos": 4047659296358400.0, "train_loss": 0.5910339839694788, "train_runtime": 40587.4686, "train_samples_per_second": 24.755, "train_steps_per_second": 0.193 } ], "logging_steps": 1.0, "max_steps": 7849, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 60000, "total_flos": 4047659296358400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }