{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999363016752659, "eval_steps": 500, "global_step": 7849, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 6.315755160919551, "learning_rate": 4.237288135593221e-08, "loss": 0.9479, "step": 1 }, { "epoch": 0.0, "grad_norm": 8.100218490677534, "learning_rate": 8.474576271186442e-08, "loss": 1.2069, "step": 2 }, { "epoch": 0.0, "grad_norm": 7.171566849825352, "learning_rate": 1.2711864406779662e-07, "loss": 1.1277, "step": 3 }, { "epoch": 0.0, "grad_norm": 5.584630662833674, "learning_rate": 1.6949152542372883e-07, "loss": 0.9206, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.100846456380341, "learning_rate": 2.1186440677966102e-07, "loss": 0.9232, "step": 5 }, { "epoch": 0.0, "grad_norm": 5.4705123107118965, "learning_rate": 2.5423728813559323e-07, "loss": 0.8992, "step": 6 }, { "epoch": 0.0, "grad_norm": 6.295949465651534, "learning_rate": 2.966101694915255e-07, "loss": 1.0882, "step": 7 }, { "epoch": 0.0, "grad_norm": 6.023526451128331, "learning_rate": 3.3898305084745766e-07, "loss": 0.9273, "step": 8 }, { "epoch": 0.0, "grad_norm": 6.598934446265413, "learning_rate": 3.813559322033899e-07, "loss": 1.1617, "step": 9 }, { "epoch": 0.0, "grad_norm": 7.255483401248126, "learning_rate": 4.2372881355932204e-07, "loss": 1.106, "step": 10 }, { "epoch": 0.0, "grad_norm": 6.586857366208579, "learning_rate": 4.661016949152543e-07, "loss": 1.1648, "step": 11 }, { "epoch": 0.0, "grad_norm": 4.903139126054202, "learning_rate": 5.084745762711865e-07, "loss": 0.883, "step": 12 }, { "epoch": 0.0, "grad_norm": 6.202622651142046, "learning_rate": 5.508474576271187e-07, "loss": 1.1534, "step": 13 }, { "epoch": 0.0, "grad_norm": 5.512018510847586, "learning_rate": 5.93220338983051e-07, "loss": 1.1664, "step": 14 }, { "epoch": 0.0, "grad_norm": 4.499201174169701, "learning_rate": 6.355932203389831e-07, "loss": 0.8748, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.9090400452428966, "learning_rate": 6.779661016949153e-07, "loss": 0.8748, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.38152180767904, "learning_rate": 7.203389830508476e-07, "loss": 1.0645, "step": 17 }, { "epoch": 0.0, "grad_norm": 5.053943119085202, "learning_rate": 7.627118644067798e-07, "loss": 1.1386, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.8066592158020813, "learning_rate": 8.050847457627118e-07, "loss": 0.8644, "step": 19 }, { "epoch": 0.0, "grad_norm": 5.401110354097955, "learning_rate": 8.474576271186441e-07, "loss": 1.0844, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.559152082114432, "learning_rate": 8.898305084745763e-07, "loss": 1.0849, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.8745037562094358, "learning_rate": 9.322033898305086e-07, "loss": 0.8012, "step": 22 }, { "epoch": 0.0, "grad_norm": 3.88624059061097, "learning_rate": 9.745762711864408e-07, "loss": 1.0375, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.715535508413173, "learning_rate": 1.016949152542373e-06, "loss": 0.8254, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.6402729503537405, "learning_rate": 1.059322033898305e-06, "loss": 0.8766, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.8482238838073206, "learning_rate": 1.1016949152542374e-06, "loss": 0.9553, "step": 26 }, { "epoch": 0.0, "grad_norm": 3.0497968975133296, "learning_rate": 1.1440677966101696e-06, "loss": 0.9557, "step": 27 }, { "epoch": 0.0, "grad_norm": 2.2061093249472625, "learning_rate": 1.186440677966102e-06, "loss": 0.7813, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.8067996733068905, "learning_rate": 1.228813559322034e-06, "loss": 0.7334, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.298191883020086, "learning_rate": 1.2711864406779662e-06, "loss": 0.9672, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.5705838510255912, "learning_rate": 1.3135593220338985e-06, "loss": 0.712, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.6088494092491419, "learning_rate": 1.3559322033898307e-06, "loss": 0.7545, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.4744942694005478, "learning_rate": 1.3983050847457628e-06, "loss": 0.7158, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.3407350211239946, "learning_rate": 1.4406779661016951e-06, "loss": 0.653, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.3950625631306801, "learning_rate": 1.4830508474576273e-06, "loss": 0.6612, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.755463643787986, "learning_rate": 1.5254237288135596e-06, "loss": 0.9663, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.6523762136186397, "learning_rate": 1.5677966101694915e-06, "loss": 0.8251, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.2628654326647275, "learning_rate": 1.6101694915254237e-06, "loss": 0.6934, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.5372606404011073, "learning_rate": 1.652542372881356e-06, "loss": 0.8577, "step": 39 }, { "epoch": 0.01, "grad_norm": 1.0473609509684376, "learning_rate": 1.6949152542372882e-06, "loss": 0.6877, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.4049463579442723, "learning_rate": 1.7372881355932205e-06, "loss": 0.86, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.0127261251945565, "learning_rate": 1.7796610169491526e-06, "loss": 0.745, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.8838010865593211, "learning_rate": 1.8220338983050848e-06, "loss": 0.5871, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.0609299135935981, "learning_rate": 1.8644067796610171e-06, "loss": 0.6314, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.9159445100927852, "learning_rate": 1.9067796610169493e-06, "loss": 0.8977, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.9773402968846524, "learning_rate": 1.9491525423728816e-06, "loss": 0.6562, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.11450266217315, "learning_rate": 1.9915254237288137e-06, "loss": 0.6779, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.5612110238194168, "learning_rate": 2.033898305084746e-06, "loss": 0.8962, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.4987795309713536, "learning_rate": 2.076271186440678e-06, "loss": 0.8172, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.2072667175753797, "learning_rate": 2.11864406779661e-06, "loss": 0.6323, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.1535750512983887, "learning_rate": 2.1610169491525427e-06, "loss": 0.8327, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.0199047844541156, "learning_rate": 2.203389830508475e-06, "loss": 0.5918, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.8864849172154937, "learning_rate": 2.245762711864407e-06, "loss": 0.6398, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.1514523578372728, "learning_rate": 2.288135593220339e-06, "loss": 0.7384, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.1239926372958742, "learning_rate": 2.3305084745762712e-06, "loss": 0.6809, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.4453255002614818, "learning_rate": 2.372881355932204e-06, "loss": 0.7847, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.1403389296713031, "learning_rate": 2.415254237288136e-06, "loss": 0.8264, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.9067072880055732, "learning_rate": 2.457627118644068e-06, "loss": 0.6831, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.8498698865320297, "learning_rate": 2.5e-06, "loss": 0.654, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.9265346653911731, "learning_rate": 2.5423728813559323e-06, "loss": 0.6422, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.1596522463743175, "learning_rate": 2.5847457627118645e-06, "loss": 0.8155, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.9474405179772677, "learning_rate": 2.627118644067797e-06, "loss": 0.6162, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.9095862376115516, "learning_rate": 2.669491525423729e-06, "loss": 0.7255, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.8318515654495965, "learning_rate": 2.7118644067796613e-06, "loss": 0.5899, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.1369539475488417, "learning_rate": 2.7542372881355934e-06, "loss": 0.731, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.9723618370693417, "learning_rate": 2.7966101694915256e-06, "loss": 0.6611, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.4000758610623976, "learning_rate": 2.838983050847458e-06, "loss": 0.8021, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.8956344339444645, "learning_rate": 2.8813559322033903e-06, "loss": 0.6644, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.8523212039125865, "learning_rate": 2.9237288135593224e-06, "loss": 0.6268, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.0703861752332404, "learning_rate": 2.9661016949152545e-06, "loss": 0.7397, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.9326192666797318, "learning_rate": 3.0084745762711862e-06, "loss": 0.6247, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.8313311945247217, "learning_rate": 3.0508474576271192e-06, "loss": 0.5699, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.0409291394971776, "learning_rate": 3.0932203389830514e-06, "loss": 0.8053, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.159631053983387, "learning_rate": 3.135593220338983e-06, "loss": 0.7713, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.0142393261515645, "learning_rate": 3.1779661016949152e-06, "loss": 0.7452, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.1177153115205338, "learning_rate": 3.2203389830508473e-06, "loss": 0.6191, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.934158324801368, "learning_rate": 3.26271186440678e-06, "loss": 0.5971, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.8198664871974447, "learning_rate": 3.305084745762712e-06, "loss": 0.5894, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.12560097722721, "learning_rate": 3.347457627118644e-06, "loss": 0.7259, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.1372264114353359, "learning_rate": 3.3898305084745763e-06, "loss": 0.7235, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.946308089908942, "learning_rate": 3.4322033898305084e-06, "loss": 0.7423, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.768706538717993, "learning_rate": 3.474576271186441e-06, "loss": 0.5814, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.7882263274857597, "learning_rate": 3.516949152542373e-06, "loss": 0.6227, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.0130484490917735, "learning_rate": 3.5593220338983053e-06, "loss": 0.6292, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.0625999194751543, "learning_rate": 3.6016949152542374e-06, "loss": 0.756, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.1806468563909829, "learning_rate": 3.6440677966101695e-06, "loss": 0.8181, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.8915103932048728, "learning_rate": 3.686440677966102e-06, "loss": 0.7278, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.0544449375949836, "learning_rate": 3.7288135593220342e-06, "loss": 0.6745, "step": 88 }, { "epoch": 0.01, "grad_norm": 1.061924278121735, "learning_rate": 3.7711864406779664e-06, "loss": 0.762, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.7838385105137881, "learning_rate": 3.8135593220338985e-06, "loss": 0.5319, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.8940767055179175, "learning_rate": 3.8559322033898315e-06, "loss": 0.7681, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.0434976840215688, "learning_rate": 3.898305084745763e-06, "loss": 0.7914, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.9909272239591096, "learning_rate": 3.940677966101695e-06, "loss": 0.7253, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.7948844412832544, "learning_rate": 3.9830508474576275e-06, "loss": 0.7036, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.9165645687951253, "learning_rate": 4.025423728813559e-06, "loss": 0.6991, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.9924627256847344, "learning_rate": 4.067796610169492e-06, "loss": 0.7001, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.7887320484875681, "learning_rate": 4.110169491525424e-06, "loss": 0.5986, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.9114726726862187, "learning_rate": 4.152542372881356e-06, "loss": 0.6901, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.8754590567958539, "learning_rate": 4.1949152542372886e-06, "loss": 0.6272, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.7911143593574747, "learning_rate": 4.23728813559322e-06, "loss": 0.6387, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.1497787822708534, "learning_rate": 4.279661016949153e-06, "loss": 0.7806, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.961636545453514, "learning_rate": 4.322033898305085e-06, "loss": 0.6654, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.8643465998348661, "learning_rate": 4.364406779661017e-06, "loss": 0.761, "step": 103 }, { "epoch": 0.01, "grad_norm": 1.0623573430856974, "learning_rate": 4.40677966101695e-06, "loss": 0.7737, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.7924110178523148, "learning_rate": 4.449152542372881e-06, "loss": 0.5905, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.0547457932502393, "learning_rate": 4.491525423728814e-06, "loss": 0.7155, "step": 106 }, { "epoch": 0.01, "grad_norm": 1.0021114670760642, "learning_rate": 4.5338983050847465e-06, "loss": 0.7308, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.1147786425678303, "learning_rate": 4.576271186440678e-06, "loss": 0.7139, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.9188378962317331, "learning_rate": 4.618644067796611e-06, "loss": 0.722, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.8417003882693744, "learning_rate": 4.6610169491525425e-06, "loss": 0.6588, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.9742289791982495, "learning_rate": 4.703389830508475e-06, "loss": 0.6528, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.9680593640556735, "learning_rate": 4.745762711864408e-06, "loss": 0.6821, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.131304634234551, "learning_rate": 4.788135593220339e-06, "loss": 0.7428, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.9588135285950953, "learning_rate": 4.830508474576272e-06, "loss": 0.6703, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.9066996549067298, "learning_rate": 4.872881355932204e-06, "loss": 0.5918, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.833010228931143, "learning_rate": 4.915254237288136e-06, "loss": 0.6471, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.0443031332541632, "learning_rate": 4.957627118644069e-06, "loss": 0.7252, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.1057503996909768, "learning_rate": 5e-06, "loss": 0.7125, "step": 118 }, { "epoch": 0.02, "grad_norm": 1.288078975820244, "learning_rate": 5.042372881355932e-06, "loss": 0.7089, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.7787012931038324, "learning_rate": 5.084745762711865e-06, "loss": 0.614, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.8671198361828785, "learning_rate": 5.127118644067796e-06, "loss": 0.6033, "step": 121 }, { "epoch": 0.02, "grad_norm": 1.0562366014378568, "learning_rate": 5.169491525423729e-06, "loss": 0.7258, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.825424722924917, "learning_rate": 5.211864406779662e-06, "loss": 0.6106, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.9535747173058154, "learning_rate": 5.254237288135594e-06, "loss": 0.6698, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.9964029349528971, "learning_rate": 5.296610169491526e-06, "loss": 0.6864, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.8496061929889926, "learning_rate": 5.338983050847458e-06, "loss": 0.6085, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.8169307113798493, "learning_rate": 5.38135593220339e-06, "loss": 0.577, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.8828226478416715, "learning_rate": 5.423728813559323e-06, "loss": 0.6536, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.8111566368468357, "learning_rate": 5.466101694915254e-06, "loss": 0.5861, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.8629999756794673, "learning_rate": 5.508474576271187e-06, "loss": 0.6798, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.0649694021172096, "learning_rate": 5.550847457627119e-06, "loss": 0.7477, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.2379080105519054, "learning_rate": 5.593220338983051e-06, "loss": 0.8184, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.9008620998842783, "learning_rate": 5.635593220338984e-06, "loss": 0.5844, "step": 133 }, { "epoch": 0.02, "grad_norm": 1.057219847760786, "learning_rate": 5.677966101694916e-06, "loss": 0.6303, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.8110716988102185, "learning_rate": 5.720338983050848e-06, "loss": 0.6138, "step": 135 }, { "epoch": 0.02, "grad_norm": 1.007863377115401, "learning_rate": 5.7627118644067805e-06, "loss": 0.7345, "step": 136 }, { "epoch": 0.02, "grad_norm": 1.0012050419482001, "learning_rate": 5.805084745762712e-06, "loss": 0.7099, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.8503972784915824, "learning_rate": 5.847457627118645e-06, "loss": 0.5918, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.203598531953053, "learning_rate": 5.8898305084745765e-06, "loss": 0.7367, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.8131031453284981, "learning_rate": 5.932203389830509e-06, "loss": 0.5979, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.1498567432766191, "learning_rate": 5.974576271186441e-06, "loss": 0.6825, "step": 141 }, { "epoch": 0.02, "grad_norm": 0.9748267037249195, "learning_rate": 6.0169491525423725e-06, "loss": 0.6309, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.1147298161057293, "learning_rate": 6.059322033898306e-06, "loss": 0.7522, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.7798848047498792, "learning_rate": 6.1016949152542385e-06, "loss": 0.5549, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.9114141439174802, "learning_rate": 6.14406779661017e-06, "loss": 0.6766, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.9897454653838555, "learning_rate": 6.186440677966103e-06, "loss": 0.7221, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.9453356509228966, "learning_rate": 6.2288135593220344e-06, "loss": 0.6089, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.1046986048460117, "learning_rate": 6.271186440677966e-06, "loss": 0.7237, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.7679291507056515, "learning_rate": 6.313559322033899e-06, "loss": 0.5788, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.8568576370735939, "learning_rate": 6.3559322033898304e-06, "loss": 0.6798, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.8832945038337772, "learning_rate": 6.398305084745763e-06, "loss": 0.5891, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.9761886742433304, "learning_rate": 6.440677966101695e-06, "loss": 0.719, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.898779892810249, "learning_rate": 6.483050847457628e-06, "loss": 0.625, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.8946339479763311, "learning_rate": 6.52542372881356e-06, "loss": 0.7351, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.7873558089294436, "learning_rate": 6.567796610169492e-06, "loss": 0.6204, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.8160069510964497, "learning_rate": 6.610169491525424e-06, "loss": 0.5863, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.9217008221003499, "learning_rate": 6.652542372881357e-06, "loss": 0.64, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.8146830461284279, "learning_rate": 6.694915254237288e-06, "loss": 0.6413, "step": 158 }, { "epoch": 0.02, "grad_norm": 1.119588204107743, "learning_rate": 6.737288135593221e-06, "loss": 0.7497, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.8891169665144224, "learning_rate": 6.779661016949153e-06, "loss": 0.6159, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.9544571651440298, "learning_rate": 6.822033898305085e-06, "loss": 0.6307, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.9748541918685296, "learning_rate": 6.864406779661017e-06, "loss": 0.7042, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.8928036088788414, "learning_rate": 6.90677966101695e-06, "loss": 0.5525, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.9447750031112148, "learning_rate": 6.949152542372882e-06, "loss": 0.6309, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.7293762637264555, "learning_rate": 6.9915254237288146e-06, "loss": 0.5824, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.9935951614871427, "learning_rate": 7.033898305084746e-06, "loss": 0.7509, "step": 166 }, { "epoch": 0.02, "grad_norm": 1.2187072550538158, "learning_rate": 7.076271186440679e-06, "loss": 0.7195, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.7743930446324943, "learning_rate": 7.1186440677966106e-06, "loss": 0.6039, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.7757997144586963, "learning_rate": 7.161016949152543e-06, "loss": 0.5715, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.9041618886185299, "learning_rate": 7.203389830508475e-06, "loss": 0.5617, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.8781459413382111, "learning_rate": 7.2457627118644065e-06, "loss": 0.6267, "step": 171 }, { "epoch": 0.02, "grad_norm": 1.0213172173088207, "learning_rate": 7.288135593220339e-06, "loss": 0.7325, "step": 172 }, { "epoch": 0.02, "grad_norm": 1.001334402469566, "learning_rate": 7.3305084745762725e-06, "loss": 0.7242, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.9066076828879898, "learning_rate": 7.372881355932204e-06, "loss": 0.5799, "step": 174 }, { "epoch": 0.02, "grad_norm": 1.011559773485519, "learning_rate": 7.415254237288137e-06, "loss": 0.6684, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.1146404100163043, "learning_rate": 7.4576271186440685e-06, "loss": 0.7341, "step": 176 }, { "epoch": 0.02, "grad_norm": 1.0343761477564852, "learning_rate": 7.500000000000001e-06, "loss": 0.6357, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.7725674905493598, "learning_rate": 7.542372881355933e-06, "loss": 0.575, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.8621063821193666, "learning_rate": 7.5847457627118645e-06, "loss": 0.6182, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.844374090282496, "learning_rate": 7.627118644067797e-06, "loss": 0.6298, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.9800783922527415, "learning_rate": 7.66949152542373e-06, "loss": 0.623, "step": 181 }, { "epoch": 0.02, "grad_norm": 1.1236727397729043, "learning_rate": 7.711864406779663e-06, "loss": 0.7356, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.9659078982971598, "learning_rate": 7.754237288135595e-06, "loss": 0.7238, "step": 183 }, { "epoch": 0.02, "grad_norm": 1.001387908180136, "learning_rate": 7.796610169491526e-06, "loss": 0.6177, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.1512598543745955, "learning_rate": 7.838983050847458e-06, "loss": 0.7762, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.9569207735160119, "learning_rate": 7.88135593220339e-06, "loss": 0.7724, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.8822836126824283, "learning_rate": 7.923728813559323e-06, "loss": 0.586, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.9410329153890431, "learning_rate": 7.966101694915255e-06, "loss": 0.6237, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.8131163568514106, "learning_rate": 8.008474576271187e-06, "loss": 0.5999, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.7878807001243879, "learning_rate": 8.050847457627118e-06, "loss": 0.6251, "step": 190 }, { "epoch": 0.02, "grad_norm": 1.3021336547354976, "learning_rate": 8.093220338983052e-06, "loss": 0.6943, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.0357888044807833, "learning_rate": 8.135593220338983e-06, "loss": 0.7529, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.9798088952833964, "learning_rate": 8.177966101694917e-06, "loss": 0.6445, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.2025857531069202, "learning_rate": 8.220338983050849e-06, "loss": 0.7624, "step": 194 }, { "epoch": 0.02, "grad_norm": 1.2444329236365508, "learning_rate": 8.26271186440678e-06, "loss": 0.7065, "step": 195 }, { "epoch": 0.02, "grad_norm": 1.0099626060159643, "learning_rate": 8.305084745762712e-06, "loss": 0.6457, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.8750853835711236, "learning_rate": 8.347457627118645e-06, "loss": 0.6198, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.8943179057603692, "learning_rate": 8.389830508474577e-06, "loss": 0.5735, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.020139596778078, "learning_rate": 8.432203389830509e-06, "loss": 0.6681, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.2038740689317602, "learning_rate": 8.47457627118644e-06, "loss": 0.6539, "step": 200 }, { "epoch": 0.03, "grad_norm": 1.0491948555402582, "learning_rate": 8.516949152542372e-06, "loss": 0.7375, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.0492347673279663, "learning_rate": 8.559322033898306e-06, "loss": 0.6445, "step": 202 }, { "epoch": 0.03, "grad_norm": 1.0718128205114044, "learning_rate": 8.601694915254239e-06, "loss": 0.7002, "step": 203 }, { "epoch": 0.03, "grad_norm": 0.734556758763377, "learning_rate": 8.64406779661017e-06, "loss": 0.5629, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.7412973081240049, "learning_rate": 8.686440677966103e-06, "loss": 0.5947, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.8180267124924756, "learning_rate": 8.728813559322034e-06, "loss": 0.5656, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.8448009928921776, "learning_rate": 8.771186440677966e-06, "loss": 0.6197, "step": 207 }, { "epoch": 0.03, "grad_norm": 2.892668338028599, "learning_rate": 8.8135593220339e-06, "loss": 0.6715, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.0712961761495488, "learning_rate": 8.855932203389831e-06, "loss": 0.7181, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.8682510234712424, "learning_rate": 8.898305084745763e-06, "loss": 0.577, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.8059324335317097, "learning_rate": 8.940677966101694e-06, "loss": 0.6141, "step": 211 }, { "epoch": 0.03, "grad_norm": 1.2501186941750848, "learning_rate": 8.983050847457628e-06, "loss": 0.6886, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.9097074020678482, "learning_rate": 9.02542372881356e-06, "loss": 0.7408, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.801724007745236, "learning_rate": 9.067796610169493e-06, "loss": 0.5711, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.8516202040963996, "learning_rate": 9.110169491525425e-06, "loss": 0.6395, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.9631488325325961, "learning_rate": 9.152542372881356e-06, "loss": 0.6496, "step": 216 }, { "epoch": 0.03, "grad_norm": 1.084435462085635, "learning_rate": 9.194915254237288e-06, "loss": 0.6989, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.8363119000814339, "learning_rate": 9.237288135593222e-06, "loss": 0.5734, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.9511490838269513, "learning_rate": 9.279661016949153e-06, "loss": 0.6169, "step": 219 }, { "epoch": 0.03, "grad_norm": 0.8895238466035947, "learning_rate": 9.322033898305085e-06, "loss": 0.6267, "step": 220 }, { "epoch": 0.03, "grad_norm": 1.1514987460633765, "learning_rate": 9.364406779661017e-06, "loss": 0.6641, "step": 221 }, { "epoch": 0.03, "grad_norm": 0.9565801841708472, "learning_rate": 9.40677966101695e-06, "loss": 0.6133, "step": 222 }, { "epoch": 0.03, "grad_norm": 0.8669101354574144, "learning_rate": 9.449152542372882e-06, "loss": 0.6412, "step": 223 }, { "epoch": 0.03, "grad_norm": 0.7342602405274954, "learning_rate": 9.491525423728815e-06, "loss": 0.6167, "step": 224 }, { "epoch": 0.03, "grad_norm": 0.7382289769982713, "learning_rate": 9.533898305084747e-06, "loss": 0.6358, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.6923445266648418, "learning_rate": 9.576271186440679e-06, "loss": 0.4961, "step": 226 }, { "epoch": 0.03, "grad_norm": 0.7434130124115883, "learning_rate": 9.61864406779661e-06, "loss": 0.5386, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.9396672981659955, "learning_rate": 9.661016949152544e-06, "loss": 0.6244, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.8403025796295461, "learning_rate": 9.703389830508475e-06, "loss": 0.6751, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.346216823414232, "learning_rate": 9.745762711864407e-06, "loss": 0.747, "step": 230 }, { "epoch": 0.03, "grad_norm": 1.0732661890075386, "learning_rate": 9.788135593220339e-06, "loss": 0.7309, "step": 231 }, { "epoch": 0.03, "grad_norm": 0.9194804417202522, "learning_rate": 9.830508474576272e-06, "loss": 0.5994, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.8500279144644034, "learning_rate": 9.872881355932204e-06, "loss": 0.6503, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.9479766715163666, "learning_rate": 9.915254237288137e-06, "loss": 0.7034, "step": 234 }, { "epoch": 0.03, "grad_norm": 1.024106341163967, "learning_rate": 9.957627118644069e-06, "loss": 0.64, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.8773894145226389, "learning_rate": 1e-05, "loss": 0.6003, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.9473859245843204, "learning_rate": 9.999999574276046e-06, "loss": 0.7454, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.9577208668886873, "learning_rate": 9.999998297104252e-06, "loss": 0.6419, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.8602749658395881, "learning_rate": 9.99999616848484e-06, "loss": 0.603, "step": 239 }, { "epoch": 0.03, "grad_norm": 1.0045808750730014, "learning_rate": 9.999993188418168e-06, "loss": 0.6388, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.0650052995930719, "learning_rate": 9.999989356904746e-06, "loss": 0.7113, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.8045082422482904, "learning_rate": 9.999984673945226e-06, "loss": 0.6069, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.8436870361222776, "learning_rate": 9.999979139540406e-06, "loss": 0.645, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.9947510382185858, "learning_rate": 9.999972753691227e-06, "loss": 0.7262, "step": 244 }, { "epoch": 0.03, "grad_norm": 2.742499938099767, "learning_rate": 9.999965516398778e-06, "loss": 0.7071, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.9349758417642406, "learning_rate": 9.99995742766429e-06, "loss": 0.6056, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.9643565158039966, "learning_rate": 9.999948487489144e-06, "loss": 0.6991, "step": 247 }, { "epoch": 0.03, "grad_norm": 1.041471885230738, "learning_rate": 9.999938695874857e-06, "loss": 0.7836, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.9026920203140789, "learning_rate": 9.9999280528231e-06, "loss": 0.6217, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.7483237548345875, "learning_rate": 9.999916558335685e-06, "loss": 0.5728, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.7523195268586251, "learning_rate": 9.999904212414569e-06, "loss": 0.5335, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.8561396872975037, "learning_rate": 9.999891015061853e-06, "loss": 0.6514, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.7684453975305775, "learning_rate": 9.999876966279784e-06, "loss": 0.6109, "step": 253 }, { "epoch": 0.03, "grad_norm": 1.0075623622613596, "learning_rate": 9.999862066070759e-06, "loss": 0.6978, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.9330297959886233, "learning_rate": 9.999846314437312e-06, "loss": 0.6024, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.8153131674765485, "learning_rate": 9.999829711382125e-06, "loss": 0.58, "step": 256 }, { "epoch": 0.03, "grad_norm": 1.0285588577508944, "learning_rate": 9.999812256908025e-06, "loss": 0.6716, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.2039006035827808, "learning_rate": 9.999793951017986e-06, "loss": 0.6966, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.8965745425797729, "learning_rate": 9.999774793715126e-06, "loss": 0.7421, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.917614617516794, "learning_rate": 9.999754785002705e-06, "loss": 0.679, "step": 260 }, { "epoch": 0.03, "grad_norm": 1.0362393643384793, "learning_rate": 9.999733924884132e-06, "loss": 0.7768, "step": 261 }, { "epoch": 0.03, "grad_norm": 1.0174413250119518, "learning_rate": 9.999712213362957e-06, "loss": 0.6778, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.7670304828921283, "learning_rate": 9.999689650442878e-06, "loss": 0.5888, "step": 263 }, { "epoch": 0.03, "grad_norm": 1.0250762401374716, "learning_rate": 9.999666236127741e-06, "loss": 0.6437, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.8724926110458359, "learning_rate": 9.999641970421528e-06, "loss": 0.6002, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.7471948393811478, "learning_rate": 9.999616853328373e-06, "loss": 0.5884, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.9431722393770324, "learning_rate": 9.999590884852554e-06, "loss": 0.6848, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.8904762183461915, "learning_rate": 9.999564064998493e-06, "loss": 0.6573, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.7787426366104326, "learning_rate": 9.999536393770757e-06, "loss": 0.6033, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.7488704336463297, "learning_rate": 9.999507871174056e-06, "loss": 0.6111, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.9063140732547219, "learning_rate": 9.999478497213252e-06, "loss": 0.7089, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.1333482461657327, "learning_rate": 9.999448271893342e-06, "loss": 0.6964, "step": 272 }, { "epoch": 0.03, "grad_norm": 1.0304558799971852, "learning_rate": 9.999417195219476e-06, "loss": 0.6852, "step": 273 }, { "epoch": 0.03, "grad_norm": 0.7818934205810679, "learning_rate": 9.999385267196947e-06, "loss": 0.6542, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.9892073103285758, "learning_rate": 9.999352487831187e-06, "loss": 0.626, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.9616757237790202, "learning_rate": 9.999318857127783e-06, "loss": 0.6752, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.9961448141781154, "learning_rate": 9.999284375092458e-06, "loss": 0.6269, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.795420515321003, "learning_rate": 9.999249041731089e-06, "loss": 0.5838, "step": 278 }, { "epoch": 0.04, "grad_norm": 0.8826509457705606, "learning_rate": 9.999212857049688e-06, "loss": 0.6113, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.9583078016380316, "learning_rate": 9.99917582105442e-06, "loss": 0.61, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.8359376697608909, "learning_rate": 9.999137933751591e-06, "loss": 0.5932, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.7554090887881599, "learning_rate": 9.99909919514765e-06, "loss": 0.6292, "step": 282 }, { "epoch": 0.04, "grad_norm": 0.7969393806474914, "learning_rate": 9.9990596052492e-06, "loss": 0.5801, "step": 283 }, { "epoch": 0.04, "grad_norm": 0.8339723105361531, "learning_rate": 9.999019164062976e-06, "loss": 0.5922, "step": 284 }, { "epoch": 0.04, "grad_norm": 1.0184188001217274, "learning_rate": 9.99897787159587e-06, "loss": 0.7114, "step": 285 }, { "epoch": 0.04, "grad_norm": 0.9345821403070972, "learning_rate": 9.99893572785491e-06, "loss": 0.6693, "step": 286 }, { "epoch": 0.04, "grad_norm": 0.9489832490319505, "learning_rate": 9.998892732847275e-06, "loss": 0.6747, "step": 287 }, { "epoch": 0.04, "grad_norm": 0.9491822975095311, "learning_rate": 9.998848886580283e-06, "loss": 0.6135, "step": 288 }, { "epoch": 0.04, "grad_norm": 1.0307820768893219, "learning_rate": 9.998804189061407e-06, "loss": 0.6866, "step": 289 }, { "epoch": 0.04, "grad_norm": 0.7608263414528763, "learning_rate": 9.998758640298253e-06, "loss": 0.5903, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.8599073151312877, "learning_rate": 9.998712240298579e-06, "loss": 0.69, "step": 291 }, { "epoch": 0.04, "grad_norm": 0.7205713598023247, "learning_rate": 9.998664989070287e-06, "loss": 0.5683, "step": 292 }, { "epoch": 0.04, "grad_norm": 0.8110085756398755, "learning_rate": 9.998616886621424e-06, "loss": 0.6288, "step": 293 }, { "epoch": 0.04, "grad_norm": 0.9154328323405975, "learning_rate": 9.99856793296018e-06, "loss": 0.6759, "step": 294 }, { "epoch": 0.04, "grad_norm": 0.8200883833415538, "learning_rate": 9.99851812809489e-06, "loss": 0.61, "step": 295 }, { "epoch": 0.04, "grad_norm": 0.9288553336473484, "learning_rate": 9.998467472034039e-06, "loss": 0.6731, "step": 296 }, { "epoch": 0.04, "grad_norm": 1.0248926938844098, "learning_rate": 9.99841596478625e-06, "loss": 0.675, "step": 297 }, { "epoch": 0.04, "grad_norm": 0.9662476530548694, "learning_rate": 9.998363606360295e-06, "loss": 0.7635, "step": 298 }, { "epoch": 0.04, "grad_norm": 0.8337459948893531, "learning_rate": 9.998310396765092e-06, "loss": 0.6224, "step": 299 }, { "epoch": 0.04, "grad_norm": 0.8930266443405209, "learning_rate": 9.9982563360097e-06, "loss": 0.6268, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.8621128050741431, "learning_rate": 9.998201424103325e-06, "loss": 0.5759, "step": 301 }, { "epoch": 0.04, "grad_norm": 1.218581113023825, "learning_rate": 9.998145661055318e-06, "loss": 0.6884, "step": 302 }, { "epoch": 0.04, "grad_norm": 0.8165414786659695, "learning_rate": 9.998089046875176e-06, "loss": 0.5984, "step": 303 }, { "epoch": 0.04, "grad_norm": 1.1533663305554112, "learning_rate": 9.998031581572538e-06, "loss": 0.6714, "step": 304 }, { "epoch": 0.04, "grad_norm": 0.8257686275582143, "learning_rate": 9.997973265157192e-06, "loss": 0.6049, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.8418914276868043, "learning_rate": 9.997914097639066e-06, "loss": 0.6021, "step": 306 }, { "epoch": 0.04, "grad_norm": 1.169835228410195, "learning_rate": 9.997854079028239e-06, "loss": 0.7795, "step": 307 }, { "epoch": 0.04, "grad_norm": 0.8813017380551073, "learning_rate": 9.997793209334928e-06, "loss": 0.6428, "step": 308 }, { "epoch": 0.04, "grad_norm": 0.887480511846037, "learning_rate": 9.9977314885695e-06, "loss": 0.6193, "step": 309 }, { "epoch": 0.04, "grad_norm": 1.02812925541616, "learning_rate": 9.997668916742466e-06, "loss": 0.761, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.7943587048634801, "learning_rate": 9.997605493864482e-06, "loss": 0.5408, "step": 311 }, { "epoch": 0.04, "grad_norm": 0.8382077658643962, "learning_rate": 9.997541219946346e-06, "loss": 0.5964, "step": 312 }, { "epoch": 0.04, "grad_norm": 0.9590898612777877, "learning_rate": 9.997476094999005e-06, "loss": 0.7203, "step": 313 }, { "epoch": 0.04, "grad_norm": 0.7771017969668923, "learning_rate": 9.997410119033548e-06, "loss": 0.6217, "step": 314 }, { "epoch": 0.04, "grad_norm": 0.9717986029539818, "learning_rate": 9.997343292061208e-06, "loss": 0.7661, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.8374225704031762, "learning_rate": 9.99727561409337e-06, "loss": 0.6195, "step": 316 }, { "epoch": 0.04, "grad_norm": 0.7693484657466653, "learning_rate": 9.997207085141555e-06, "loss": 0.6024, "step": 317 }, { "epoch": 0.04, "grad_norm": 1.5543932190442702, "learning_rate": 9.997137705217435e-06, "loss": 0.7393, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.9669158468522525, "learning_rate": 9.997067474332824e-06, "loss": 0.7073, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.9043492810600272, "learning_rate": 9.99699639249968e-06, "loss": 0.6805, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.7212983946702716, "learning_rate": 9.996924459730108e-06, "loss": 0.5934, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.9636016130383295, "learning_rate": 9.996851676036358e-06, "loss": 0.615, "step": 322 }, { "epoch": 0.04, "grad_norm": 1.1145403303057233, "learning_rate": 9.996778041430826e-06, "loss": 0.6685, "step": 323 }, { "epoch": 0.04, "grad_norm": 2.647098903247357, "learning_rate": 9.99670355592605e-06, "loss": 0.7344, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.8188894099687588, "learning_rate": 9.996628219534713e-06, "loss": 0.5963, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.7971616385327054, "learning_rate": 9.996552032269643e-06, "loss": 0.5691, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.7662516889475968, "learning_rate": 9.996474994143819e-06, "loss": 0.6288, "step": 327 }, { "epoch": 0.04, "grad_norm": 1.0796277220899329, "learning_rate": 9.996397105170353e-06, "loss": 0.6825, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.8240757295568947, "learning_rate": 9.996318365362514e-06, "loss": 0.5904, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.805990665340262, "learning_rate": 9.996238774733708e-06, "loss": 0.5812, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.8234970501145048, "learning_rate": 9.99615833329749e-06, "loss": 0.5773, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.9420760857449071, "learning_rate": 9.996077041067555e-06, "loss": 0.5468, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.9809175860162507, "learning_rate": 9.995994898057752e-06, "loss": 0.6663, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.942092634391354, "learning_rate": 9.995911904282063e-06, "loss": 0.6731, "step": 334 }, { "epoch": 0.04, "grad_norm": 1.1619584512173078, "learning_rate": 9.995828059754625e-06, "loss": 0.747, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.9416861391157988, "learning_rate": 9.995743364489715e-06, "loss": 0.7163, "step": 336 }, { "epoch": 0.04, "grad_norm": 1.0041871482847478, "learning_rate": 9.995657818501754e-06, "loss": 0.7309, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.8976708106014296, "learning_rate": 9.995571421805312e-06, "loss": 0.6954, "step": 338 }, { "epoch": 0.04, "grad_norm": 0.9970549295496145, "learning_rate": 9.9954841744151e-06, "loss": 0.7289, "step": 339 }, { "epoch": 0.04, "grad_norm": 1.1211833889866902, "learning_rate": 9.995396076345976e-06, "loss": 0.6742, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.8830745823232112, "learning_rate": 9.995307127612942e-06, "loss": 0.612, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.7209637307243821, "learning_rate": 9.995217328231144e-06, "loss": 0.614, "step": 342 }, { "epoch": 0.04, "grad_norm": 0.9340561683492581, "learning_rate": 9.995126678215874e-06, "loss": 0.7104, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.9738537795595587, "learning_rate": 9.995035177582572e-06, "loss": 0.6642, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.9440417239739113, "learning_rate": 9.994942826346817e-06, "loss": 0.6892, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.7497514014731164, "learning_rate": 9.994849624524333e-06, "loss": 0.5784, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.9106193950516023, "learning_rate": 9.994755572130996e-06, "loss": 0.7091, "step": 347 }, { "epoch": 0.04, "grad_norm": 1.053805118303543, "learning_rate": 9.994660669182818e-06, "loss": 0.62, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.8778116627318417, "learning_rate": 9.994564915695965e-06, "loss": 0.6443, "step": 349 }, { "epoch": 0.04, "grad_norm": 1.0489939813220084, "learning_rate": 9.994468311686738e-06, "loss": 0.713, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.938005710011667, "learning_rate": 9.99437085717159e-06, "loss": 0.7186, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.9125421530777931, "learning_rate": 9.994272552167116e-06, "loss": 0.6405, "step": 352 }, { "epoch": 0.04, "grad_norm": 1.0192868039618457, "learning_rate": 9.994173396690055e-06, "loss": 0.7072, "step": 353 }, { "epoch": 0.05, "grad_norm": 1.017436335831891, "learning_rate": 9.994073390757296e-06, "loss": 0.6565, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.9485778133461242, "learning_rate": 9.993972534385866e-06, "loss": 0.5994, "step": 355 }, { "epoch": 0.05, "grad_norm": 1.0398677839945885, "learning_rate": 9.993870827592937e-06, "loss": 0.7275, "step": 356 }, { "epoch": 0.05, "grad_norm": 0.8255473602037663, "learning_rate": 9.993768270395836e-06, "loss": 0.5959, "step": 357 }, { "epoch": 0.05, "grad_norm": 0.8258622309155117, "learning_rate": 9.99366486281202e-06, "loss": 0.5621, "step": 358 }, { "epoch": 0.05, "grad_norm": 0.8949061455906837, "learning_rate": 9.993560604859103e-06, "loss": 0.5521, "step": 359 }, { "epoch": 0.05, "grad_norm": 0.8527556019739144, "learning_rate": 9.993455496554838e-06, "loss": 0.58, "step": 360 }, { "epoch": 0.05, "grad_norm": 0.8789357795080626, "learning_rate": 9.993349537917123e-06, "loss": 0.6461, "step": 361 }, { "epoch": 0.05, "grad_norm": 0.8246177657611419, "learning_rate": 9.993242728964001e-06, "loss": 0.6181, "step": 362 }, { "epoch": 0.05, "grad_norm": 1.0375105332126155, "learning_rate": 9.993135069713663e-06, "loss": 0.6614, "step": 363 }, { "epoch": 0.05, "grad_norm": 0.8104965800012588, "learning_rate": 9.99302656018444e-06, "loss": 0.6015, "step": 364 }, { "epoch": 0.05, "grad_norm": 0.7596096101521556, "learning_rate": 9.992917200394812e-06, "loss": 0.5891, "step": 365 }, { "epoch": 0.05, "grad_norm": 1.1644348170500525, "learning_rate": 9.992806990363399e-06, "loss": 0.6752, "step": 366 }, { "epoch": 0.05, "grad_norm": 1.1436977846077838, "learning_rate": 9.99269593010897e-06, "loss": 0.7497, "step": 367 }, { "epoch": 0.05, "grad_norm": 0.7724466008975204, "learning_rate": 9.992584019650437e-06, "loss": 0.5779, "step": 368 }, { "epoch": 0.05, "grad_norm": 1.0039725391305003, "learning_rate": 9.992471259006861e-06, "loss": 0.6915, "step": 369 }, { "epoch": 0.05, "grad_norm": 0.8643750102532078, "learning_rate": 9.992357648197438e-06, "loss": 0.6565, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.8769587422934415, "learning_rate": 9.992243187241519e-06, "loss": 0.6275, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.999357823422199, "learning_rate": 9.992127876158593e-06, "loss": 0.588, "step": 372 }, { "epoch": 0.05, "grad_norm": 0.9815912258356045, "learning_rate": 9.992011714968299e-06, "loss": 0.6181, "step": 373 }, { "epoch": 0.05, "grad_norm": 0.9379284359788672, "learning_rate": 9.991894703690414e-06, "loss": 0.6335, "step": 374 }, { "epoch": 0.05, "grad_norm": 0.8802045143811273, "learning_rate": 9.991776842344866e-06, "loss": 0.6009, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.7951951084578475, "learning_rate": 9.991658130951729e-06, "loss": 0.6046, "step": 376 }, { "epoch": 0.05, "grad_norm": 1.0391377129467918, "learning_rate": 9.991538569531211e-06, "loss": 0.6763, "step": 377 }, { "epoch": 0.05, "grad_norm": 1.3049012800130029, "learning_rate": 9.99141815810368e-06, "loss": 0.6774, "step": 378 }, { "epoch": 0.05, "grad_norm": 0.945194996853557, "learning_rate": 9.991296896689635e-06, "loss": 0.6464, "step": 379 }, { "epoch": 0.05, "grad_norm": 0.7312511061611014, "learning_rate": 9.991174785309727e-06, "loss": 0.5764, "step": 380 }, { "epoch": 0.05, "grad_norm": 1.2765573937218397, "learning_rate": 9.991051823984751e-06, "loss": 0.8081, "step": 381 }, { "epoch": 0.05, "grad_norm": 0.9244902302904036, "learning_rate": 9.990928012735646e-06, "loss": 0.605, "step": 382 }, { "epoch": 0.05, "grad_norm": 0.7325463852480184, "learning_rate": 9.990803351583496e-06, "loss": 0.5762, "step": 383 }, { "epoch": 0.05, "grad_norm": 0.994216536194795, "learning_rate": 9.990677840549528e-06, "loss": 0.7333, "step": 384 }, { "epoch": 0.05, "grad_norm": 0.8182606885795133, "learning_rate": 9.990551479655117e-06, "loss": 0.6297, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.840992663713055, "learning_rate": 9.990424268921779e-06, "loss": 0.68, "step": 386 }, { "epoch": 0.05, "grad_norm": 1.0141039755293508, "learning_rate": 9.99029620837118e-06, "loss": 0.7713, "step": 387 }, { "epoch": 0.05, "grad_norm": 0.7619333670671702, "learning_rate": 9.990167298025123e-06, "loss": 0.5444, "step": 388 }, { "epoch": 0.05, "grad_norm": 0.766071906719017, "learning_rate": 9.990037537905563e-06, "loss": 0.5622, "step": 389 }, { "epoch": 0.05, "grad_norm": 0.9833012585342708, "learning_rate": 9.989906928034597e-06, "loss": 0.6846, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.8688067861021305, "learning_rate": 9.989775468434467e-06, "loss": 0.6543, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.9549303508672472, "learning_rate": 9.989643159127554e-06, "loss": 0.6923, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.826282494354979, "learning_rate": 9.989510000136395e-06, "loss": 0.6737, "step": 393 }, { "epoch": 0.05, "grad_norm": 0.7552895110973618, "learning_rate": 9.989375991483664e-06, "loss": 0.6333, "step": 394 }, { "epoch": 0.05, "grad_norm": 0.8315473541600976, "learning_rate": 9.989241133192178e-06, "loss": 0.6379, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.8378720067306025, "learning_rate": 9.989105425284907e-06, "loss": 0.6259, "step": 396 }, { "epoch": 0.05, "grad_norm": 1.0566130118550656, "learning_rate": 9.988968867784958e-06, "loss": 0.6804, "step": 397 }, { "epoch": 0.05, "grad_norm": 0.8419946463946731, "learning_rate": 9.988831460715584e-06, "loss": 0.6163, "step": 398 }, { "epoch": 0.05, "grad_norm": 0.834007810611835, "learning_rate": 9.988693204100185e-06, "loss": 0.6101, "step": 399 }, { "epoch": 0.05, "grad_norm": 0.8746465329802927, "learning_rate": 9.988554097962308e-06, "loss": 0.6724, "step": 400 }, { "epoch": 0.05, "grad_norm": 0.8639812892611826, "learning_rate": 9.988414142325637e-06, "loss": 0.6684, "step": 401 }, { "epoch": 0.05, "grad_norm": 0.9759578689345811, "learning_rate": 9.988273337214008e-06, "loss": 0.6557, "step": 402 }, { "epoch": 0.05, "grad_norm": 0.7408369216955119, "learning_rate": 9.988131682651394e-06, "loss": 0.6094, "step": 403 }, { "epoch": 0.05, "grad_norm": 1.0295894842371458, "learning_rate": 9.987989178661923e-06, "loss": 0.7053, "step": 404 }, { "epoch": 0.05, "grad_norm": 0.8860845771509531, "learning_rate": 9.987845825269858e-06, "loss": 0.6481, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.7513098847924715, "learning_rate": 9.987701622499612e-06, "loss": 0.5924, "step": 406 }, { "epoch": 0.05, "grad_norm": 1.0426045003443716, "learning_rate": 9.987556570375743e-06, "loss": 0.6915, "step": 407 }, { "epoch": 0.05, "grad_norm": 0.8521945757805716, "learning_rate": 9.987410668922948e-06, "loss": 0.5732, "step": 408 }, { "epoch": 0.05, "grad_norm": 0.9585014676721031, "learning_rate": 9.987263918166075e-06, "loss": 0.6817, "step": 409 }, { "epoch": 0.05, "grad_norm": 1.0589047866327423, "learning_rate": 9.987116318130115e-06, "loss": 0.664, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.7451349527570841, "learning_rate": 9.9869678688402e-06, "loss": 0.5642, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.9152208995938579, "learning_rate": 9.98681857032161e-06, "loss": 0.5812, "step": 412 }, { "epoch": 0.05, "grad_norm": 0.9758349805011866, "learning_rate": 9.986668422599772e-06, "loss": 0.6745, "step": 413 }, { "epoch": 0.05, "grad_norm": 1.0349726333391913, "learning_rate": 9.986517425700251e-06, "loss": 0.7296, "step": 414 }, { "epoch": 0.05, "grad_norm": 0.841148207353582, "learning_rate": 9.986365579648762e-06, "loss": 0.5802, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.8046401674836037, "learning_rate": 9.986212884471162e-06, "loss": 0.5903, "step": 416 }, { "epoch": 0.05, "grad_norm": 0.8075564935762903, "learning_rate": 9.986059340193453e-06, "loss": 0.6427, "step": 417 }, { "epoch": 0.05, "grad_norm": 0.8285220615452736, "learning_rate": 9.985904946841785e-06, "loss": 0.59, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.8361566593322827, "learning_rate": 9.985749704442446e-06, "loss": 0.5477, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.894200543261818, "learning_rate": 9.985593613021873e-06, "loss": 0.6825, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.9402822126531339, "learning_rate": 9.985436672606647e-06, "loss": 0.7059, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.9927059480094714, "learning_rate": 9.985278883223494e-06, "loss": 0.7266, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.6917812724522772, "learning_rate": 9.985120244899285e-06, "loss": 0.5574, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.7832286582978668, "learning_rate": 9.984960757661031e-06, "loss": 0.5573, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.7987835239334181, "learning_rate": 9.984800421535893e-06, "loss": 0.6408, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.7297111616191886, "learning_rate": 9.984639236551177e-06, "loss": 0.5983, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.8390909979092573, "learning_rate": 9.984477202734327e-06, "loss": 0.5989, "step": 427 }, { "epoch": 0.05, "grad_norm": 0.9470731744767806, "learning_rate": 9.984314320112936e-06, "loss": 0.6401, "step": 428 }, { "epoch": 0.05, "grad_norm": 0.906598707603198, "learning_rate": 9.984150588714742e-06, "loss": 0.6463, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.8337135665759178, "learning_rate": 9.98398600856763e-06, "loss": 0.6216, "step": 430 }, { "epoch": 0.05, "grad_norm": 1.0073563290493255, "learning_rate": 9.983820579699622e-06, "loss": 0.6583, "step": 431 }, { "epoch": 0.06, "grad_norm": 0.7707538345782596, "learning_rate": 9.983654302138892e-06, "loss": 0.5752, "step": 432 }, { "epoch": 0.06, "grad_norm": 0.9674923073232958, "learning_rate": 9.98348717591375e-06, "loss": 0.6957, "step": 433 }, { "epoch": 0.06, "grad_norm": 0.905475220676663, "learning_rate": 9.983319201052664e-06, "loss": 0.5946, "step": 434 }, { "epoch": 0.06, "grad_norm": 0.9246782522026172, "learning_rate": 9.983150377584231e-06, "loss": 0.6663, "step": 435 }, { "epoch": 0.06, "grad_norm": 0.8247841052316118, "learning_rate": 9.982980705537204e-06, "loss": 0.5997, "step": 436 }, { "epoch": 0.06, "grad_norm": 0.7650610923549515, "learning_rate": 9.982810184940475e-06, "loss": 0.557, "step": 437 }, { "epoch": 0.06, "grad_norm": 0.8039381465215515, "learning_rate": 9.98263881582308e-06, "loss": 0.6065, "step": 438 }, { "epoch": 0.06, "grad_norm": 1.0640664396586468, "learning_rate": 9.982466598214207e-06, "loss": 0.7222, "step": 439 }, { "epoch": 0.06, "grad_norm": 0.7784472993334844, "learning_rate": 9.982293532143177e-06, "loss": 0.6365, "step": 440 }, { "epoch": 0.06, "grad_norm": 1.2232123825374588, "learning_rate": 9.982119617639463e-06, "loss": 0.6252, "step": 441 }, { "epoch": 0.06, "grad_norm": 0.9452941580986269, "learning_rate": 9.981944854732684e-06, "loss": 0.6777, "step": 442 }, { "epoch": 0.06, "grad_norm": 0.8886308700699889, "learning_rate": 9.981769243452595e-06, "loss": 0.6608, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.7735213545941916, "learning_rate": 9.981592783829106e-06, "loss": 0.6055, "step": 444 }, { "epoch": 0.06, "grad_norm": 1.0651443112878105, "learning_rate": 9.981415475892261e-06, "loss": 0.6116, "step": 445 }, { "epoch": 0.06, "grad_norm": 0.9515438486310432, "learning_rate": 9.981237319672258e-06, "loss": 0.6564, "step": 446 }, { "epoch": 0.06, "grad_norm": 0.8446593836206666, "learning_rate": 9.981058315199435e-06, "loss": 0.6036, "step": 447 }, { "epoch": 0.06, "grad_norm": 0.7648027821877263, "learning_rate": 9.980878462504271e-06, "loss": 0.5905, "step": 448 }, { "epoch": 0.06, "grad_norm": 0.9424532217216225, "learning_rate": 9.980697761617397e-06, "loss": 0.6954, "step": 449 }, { "epoch": 0.06, "grad_norm": 0.7914859399082917, "learning_rate": 9.980516212569582e-06, "loss": 0.5867, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.7296700315535216, "learning_rate": 9.980333815391742e-06, "loss": 0.5423, "step": 451 }, { "epoch": 0.06, "grad_norm": 0.8157784098786468, "learning_rate": 9.980150570114939e-06, "loss": 0.5467, "step": 452 }, { "epoch": 0.06, "grad_norm": 0.9873444507617037, "learning_rate": 9.979966476770375e-06, "loss": 0.7114, "step": 453 }, { "epoch": 0.06, "grad_norm": 0.9925090296729879, "learning_rate": 9.979781535389404e-06, "loss": 0.6591, "step": 454 }, { "epoch": 0.06, "grad_norm": 0.7257964395187778, "learning_rate": 9.979595746003514e-06, "loss": 0.6221, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.9116833056992497, "learning_rate": 9.979409108644345e-06, "loss": 0.6573, "step": 456 }, { "epoch": 0.06, "grad_norm": 0.8609637143918085, "learning_rate": 9.979221623343682e-06, "loss": 0.5552, "step": 457 }, { "epoch": 0.06, "grad_norm": 0.8324267584272339, "learning_rate": 9.979033290133449e-06, "loss": 0.6656, "step": 458 }, { "epoch": 0.06, "grad_norm": 0.8641787671699229, "learning_rate": 9.978844109045718e-06, "loss": 0.6022, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.9705356247339784, "learning_rate": 9.978654080112704e-06, "loss": 0.6856, "step": 460 }, { "epoch": 0.06, "grad_norm": 1.0588435046428086, "learning_rate": 9.978463203366768e-06, "loss": 0.6653, "step": 461 }, { "epoch": 0.06, "grad_norm": 0.8165401999178358, "learning_rate": 9.978271478840412e-06, "loss": 0.6163, "step": 462 }, { "epoch": 0.06, "grad_norm": 0.895253816049949, "learning_rate": 9.978078906566289e-06, "loss": 0.6314, "step": 463 }, { "epoch": 0.06, "grad_norm": 1.3960561444274502, "learning_rate": 9.977885486577188e-06, "loss": 0.6623, "step": 464 }, { "epoch": 0.06, "grad_norm": 1.1365427772340655, "learning_rate": 9.977691218906048e-06, "loss": 0.6384, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.7533101727198697, "learning_rate": 9.977496103585949e-06, "loss": 0.5776, "step": 466 }, { "epoch": 0.06, "grad_norm": 0.9827800301025504, "learning_rate": 9.977300140650121e-06, "loss": 0.7097, "step": 467 }, { "epoch": 0.06, "grad_norm": 0.8250401177590654, "learning_rate": 9.977103330131933e-06, "loss": 0.5915, "step": 468 }, { "epoch": 0.06, "grad_norm": 0.9309698276185471, "learning_rate": 9.976905672064897e-06, "loss": 0.678, "step": 469 }, { "epoch": 0.06, "grad_norm": 0.9348244603567505, "learning_rate": 9.976707166482674e-06, "loss": 0.6381, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.7649254438122951, "learning_rate": 9.976507813419067e-06, "loss": 0.604, "step": 471 }, { "epoch": 0.06, "grad_norm": 1.1565357102438112, "learning_rate": 9.976307612908027e-06, "loss": 0.6867, "step": 472 }, { "epoch": 0.06, "grad_norm": 0.879665977717225, "learning_rate": 9.976106564983641e-06, "loss": 0.6238, "step": 473 }, { "epoch": 0.06, "grad_norm": 0.8888154006042165, "learning_rate": 9.975904669680148e-06, "loss": 0.6245, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.8553969990167989, "learning_rate": 9.975701927031929e-06, "loss": 0.585, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.1480627460703725, "learning_rate": 9.975498337073508e-06, "loss": 0.7234, "step": 476 }, { "epoch": 0.06, "grad_norm": 0.8510315847624589, "learning_rate": 9.975293899839555e-06, "loss": 0.5932, "step": 477 }, { "epoch": 0.06, "grad_norm": 0.959623216416685, "learning_rate": 9.975088615364883e-06, "loss": 0.6432, "step": 478 }, { "epoch": 0.06, "grad_norm": 0.9991654847289766, "learning_rate": 9.97488248368445e-06, "loss": 0.6237, "step": 479 }, { "epoch": 0.06, "grad_norm": 0.9434243787216019, "learning_rate": 9.974675504833356e-06, "loss": 0.7033, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.8816702660460409, "learning_rate": 9.974467678846851e-06, "loss": 0.6907, "step": 481 }, { "epoch": 0.06, "grad_norm": 0.698224354471496, "learning_rate": 9.974259005760324e-06, "loss": 0.5637, "step": 482 }, { "epoch": 0.06, "grad_norm": 0.9431702468716794, "learning_rate": 9.97404948560931e-06, "loss": 0.6639, "step": 483 }, { "epoch": 0.06, "grad_norm": 1.0089455605187896, "learning_rate": 9.973839118429486e-06, "loss": 0.6859, "step": 484 }, { "epoch": 0.06, "grad_norm": 0.7341477743553783, "learning_rate": 9.97362790425668e-06, "loss": 0.6228, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.788455171090866, "learning_rate": 9.973415843126854e-06, "loss": 0.5872, "step": 486 }, { "epoch": 0.06, "grad_norm": 0.989000580701188, "learning_rate": 9.973202935076125e-06, "loss": 0.7115, "step": 487 }, { "epoch": 0.06, "grad_norm": 0.8341745173777312, "learning_rate": 9.972989180140746e-06, "loss": 0.6161, "step": 488 }, { "epoch": 0.06, "grad_norm": 0.8408517299637173, "learning_rate": 9.972774578357118e-06, "loss": 0.603, "step": 489 }, { "epoch": 0.06, "grad_norm": 0.7641029908200004, "learning_rate": 9.972559129761784e-06, "loss": 0.597, "step": 490 }, { "epoch": 0.06, "grad_norm": 1.049208228809266, "learning_rate": 9.972342834391435e-06, "loss": 0.6909, "step": 491 }, { "epoch": 0.06, "grad_norm": 0.9352269185055221, "learning_rate": 9.972125692282902e-06, "loss": 0.6827, "step": 492 }, { "epoch": 0.06, "grad_norm": 1.027543683357878, "learning_rate": 9.971907703473164e-06, "loss": 0.6594, "step": 493 }, { "epoch": 0.06, "grad_norm": 0.8274700480961436, "learning_rate": 9.97168886799934e-06, "loss": 0.6168, "step": 494 }, { "epoch": 0.06, "grad_norm": 0.7663034374898712, "learning_rate": 9.971469185898697e-06, "loss": 0.5452, "step": 495 }, { "epoch": 0.06, "grad_norm": 0.7609980260288177, "learning_rate": 9.971248657208644e-06, "loss": 0.5837, "step": 496 }, { "epoch": 0.06, "grad_norm": 0.8611117285561701, "learning_rate": 9.971027281966735e-06, "loss": 0.6567, "step": 497 }, { "epoch": 0.06, "grad_norm": 1.0093881198913883, "learning_rate": 9.970805060210666e-06, "loss": 0.7164, "step": 498 }, { "epoch": 0.06, "grad_norm": 0.7038884292490618, "learning_rate": 9.970581991978283e-06, "loss": 0.5605, "step": 499 }, { "epoch": 0.06, "grad_norm": 0.8478959947775854, "learning_rate": 9.97035807730757e-06, "loss": 0.5768, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.8660748011522885, "learning_rate": 9.970133316236656e-06, "loss": 0.6082, "step": 501 }, { "epoch": 0.06, "grad_norm": 0.970012780534241, "learning_rate": 9.969907708803816e-06, "loss": 0.6396, "step": 502 }, { "epoch": 0.06, "grad_norm": 0.7214940225898968, "learning_rate": 9.96968125504747e-06, "loss": 0.6538, "step": 503 }, { "epoch": 0.06, "grad_norm": 0.9216898815650768, "learning_rate": 9.969453955006182e-06, "loss": 0.6315, "step": 504 }, { "epoch": 0.06, "grad_norm": 0.9806306613287539, "learning_rate": 9.969225808718655e-06, "loss": 0.7167, "step": 505 }, { "epoch": 0.06, "grad_norm": 1.0923750163480554, "learning_rate": 9.968996816223741e-06, "loss": 0.7147, "step": 506 }, { "epoch": 0.06, "grad_norm": 0.8489171584194607, "learning_rate": 9.968766977560437e-06, "loss": 0.6071, "step": 507 }, { "epoch": 0.06, "grad_norm": 0.8024965268568347, "learning_rate": 9.968536292767881e-06, "loss": 0.5921, "step": 508 }, { "epoch": 0.06, "grad_norm": 0.9430179010466601, "learning_rate": 9.968304761885355e-06, "loss": 0.5994, "step": 509 }, { "epoch": 0.06, "grad_norm": 1.0330657022410388, "learning_rate": 9.96807238495229e-06, "loss": 0.7171, "step": 510 }, { "epoch": 0.07, "grad_norm": 0.7778810841555546, "learning_rate": 9.967839162008253e-06, "loss": 0.5984, "step": 511 }, { "epoch": 0.07, "grad_norm": 0.9431657191058711, "learning_rate": 9.96760509309296e-06, "loss": 0.6662, "step": 512 }, { "epoch": 0.07, "grad_norm": 0.756202180613133, "learning_rate": 9.967370178246274e-06, "loss": 0.5899, "step": 513 }, { "epoch": 0.07, "grad_norm": 0.802061654331361, "learning_rate": 9.967134417508196e-06, "loss": 0.5971, "step": 514 }, { "epoch": 0.07, "grad_norm": 0.9856204798795912, "learning_rate": 9.966897810918874e-06, "loss": 0.6816, "step": 515 }, { "epoch": 0.07, "grad_norm": 0.8534539607791247, "learning_rate": 9.966660358518599e-06, "loss": 0.5815, "step": 516 }, { "epoch": 0.07, "grad_norm": 0.9286042081053918, "learning_rate": 9.966422060347806e-06, "loss": 0.6834, "step": 517 }, { "epoch": 0.07, "grad_norm": 0.9700822499918169, "learning_rate": 9.966182916447077e-06, "loss": 0.7254, "step": 518 }, { "epoch": 0.07, "grad_norm": 0.9878118402578369, "learning_rate": 9.965942926857136e-06, "loss": 0.5882, "step": 519 }, { "epoch": 0.07, "grad_norm": 0.7579724665624524, "learning_rate": 9.965702091618848e-06, "loss": 0.6427, "step": 520 }, { "epoch": 0.07, "grad_norm": 0.7994295267037167, "learning_rate": 9.965460410773227e-06, "loss": 0.5647, "step": 521 }, { "epoch": 0.07, "grad_norm": 0.7749684827307922, "learning_rate": 9.965217884361427e-06, "loss": 0.6228, "step": 522 }, { "epoch": 0.07, "grad_norm": 0.7561752724398992, "learning_rate": 9.964974512424749e-06, "loss": 0.5752, "step": 523 }, { "epoch": 0.07, "grad_norm": 0.858260704256212, "learning_rate": 9.964730295004637e-06, "loss": 0.6834, "step": 524 }, { "epoch": 0.07, "grad_norm": 0.8504676037936518, "learning_rate": 9.964485232142676e-06, "loss": 0.611, "step": 525 }, { "epoch": 0.07, "grad_norm": 0.9507580212115982, "learning_rate": 9.964239323880602e-06, "loss": 0.6919, "step": 526 }, { "epoch": 0.07, "grad_norm": 0.926602669416903, "learning_rate": 9.963992570260286e-06, "loss": 0.7052, "step": 527 }, { "epoch": 0.07, "grad_norm": 0.7900489724282319, "learning_rate": 9.96374497132375e-06, "loss": 0.592, "step": 528 }, { "epoch": 0.07, "grad_norm": 0.8126066341296388, "learning_rate": 9.96349652711316e-06, "loss": 0.6392, "step": 529 }, { "epoch": 0.07, "grad_norm": 0.8378157404905768, "learning_rate": 9.963247237670816e-06, "loss": 0.629, "step": 530 }, { "epoch": 0.07, "grad_norm": 1.0248692233377634, "learning_rate": 9.962997103039177e-06, "loss": 0.618, "step": 531 }, { "epoch": 0.07, "grad_norm": 0.8909205812077798, "learning_rate": 9.962746123260835e-06, "loss": 0.6763, "step": 532 }, { "epoch": 0.07, "grad_norm": 0.8930382251743185, "learning_rate": 9.962494298378532e-06, "loss": 0.7251, "step": 533 }, { "epoch": 0.07, "grad_norm": 0.9299771533511041, "learning_rate": 9.962241628435146e-06, "loss": 0.6301, "step": 534 }, { "epoch": 0.07, "grad_norm": 0.7208837913193927, "learning_rate": 9.961988113473708e-06, "loss": 0.5523, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.9322336634144031, "learning_rate": 9.961733753537388e-06, "loss": 0.6001, "step": 536 }, { "epoch": 0.07, "grad_norm": 0.9973221223155072, "learning_rate": 9.9614785486695e-06, "loss": 0.6837, "step": 537 }, { "epoch": 0.07, "grad_norm": 0.9298660693205547, "learning_rate": 9.961222498913506e-06, "loss": 0.715, "step": 538 }, { "epoch": 0.07, "grad_norm": 1.0199642534082014, "learning_rate": 9.960965604313003e-06, "loss": 0.5889, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.9184903210157217, "learning_rate": 9.960707864911741e-06, "loss": 0.5917, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.75524194232963, "learning_rate": 9.960449280753612e-06, "loss": 0.5755, "step": 541 }, { "epoch": 0.07, "grad_norm": 1.490969590131637, "learning_rate": 9.960189851882646e-06, "loss": 0.6529, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.7892066654765252, "learning_rate": 9.959929578343024e-06, "loss": 0.5954, "step": 543 }, { "epoch": 0.07, "grad_norm": 0.7625967089780826, "learning_rate": 9.959668460179066e-06, "loss": 0.5567, "step": 544 }, { "epoch": 0.07, "grad_norm": 0.9337933156009779, "learning_rate": 9.95940649743524e-06, "loss": 0.6746, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.8018923757576564, "learning_rate": 9.959143690156152e-06, "loss": 0.5953, "step": 546 }, { "epoch": 0.07, "grad_norm": 0.772375140157595, "learning_rate": 9.95888003838656e-06, "loss": 0.551, "step": 547 }, { "epoch": 0.07, "grad_norm": 0.7270472389387665, "learning_rate": 9.958615542171359e-06, "loss": 0.566, "step": 548 }, { "epoch": 0.07, "grad_norm": 0.7037495319898727, "learning_rate": 9.958350201555587e-06, "loss": 0.5796, "step": 549 }, { "epoch": 0.07, "grad_norm": 0.8365102880514173, "learning_rate": 9.958084016584432e-06, "loss": 0.6753, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.8268714161478222, "learning_rate": 9.957816987303223e-06, "loss": 0.6888, "step": 551 }, { "epoch": 0.07, "grad_norm": 0.9462676729925515, "learning_rate": 9.957549113757433e-06, "loss": 0.6411, "step": 552 }, { "epoch": 0.07, "grad_norm": 0.7633213086191619, "learning_rate": 9.957280395992674e-06, "loss": 0.5946, "step": 553 }, { "epoch": 0.07, "grad_norm": 0.8291074079135722, "learning_rate": 9.957010834054707e-06, "loss": 0.6093, "step": 554 }, { "epoch": 0.07, "grad_norm": 0.7439977973378293, "learning_rate": 9.95674042798944e-06, "loss": 0.592, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.031637133465603, "learning_rate": 9.956469177842914e-06, "loss": 0.7773, "step": 556 }, { "epoch": 0.07, "grad_norm": 0.7283800531091312, "learning_rate": 9.956197083661325e-06, "loss": 0.6187, "step": 557 }, { "epoch": 0.07, "grad_norm": 0.8848565692866931, "learning_rate": 9.955924145491005e-06, "loss": 0.6969, "step": 558 }, { "epoch": 0.07, "grad_norm": 0.8844647691938945, "learning_rate": 9.955650363378434e-06, "loss": 0.6591, "step": 559 }, { "epoch": 0.07, "grad_norm": 0.8726042709977126, "learning_rate": 9.955375737370234e-06, "loss": 0.5797, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.7313265588369671, "learning_rate": 9.95510026751317e-06, "loss": 0.5666, "step": 561 }, { "epoch": 0.07, "grad_norm": 0.9607523313588162, "learning_rate": 9.954823953854152e-06, "loss": 0.6443, "step": 562 }, { "epoch": 0.07, "grad_norm": 0.8490946250137187, "learning_rate": 9.954546796440234e-06, "loss": 0.6299, "step": 563 }, { "epoch": 0.07, "grad_norm": 0.750610669112727, "learning_rate": 9.954268795318614e-06, "loss": 0.5952, "step": 564 }, { "epoch": 0.07, "grad_norm": 1.0445261825511276, "learning_rate": 9.95398995053663e-06, "loss": 0.7069, "step": 565 }, { "epoch": 0.07, "grad_norm": 1.2254757431723826, "learning_rate": 9.953710262141767e-06, "loss": 0.7476, "step": 566 }, { "epoch": 0.07, "grad_norm": 0.8103706528243231, "learning_rate": 9.953429730181653e-06, "loss": 0.6309, "step": 567 }, { "epoch": 0.07, "grad_norm": 0.8165130976313479, "learning_rate": 9.953148354704063e-06, "loss": 0.6171, "step": 568 }, { "epoch": 0.07, "grad_norm": 0.8806847613412369, "learning_rate": 9.95286613575691e-06, "loss": 0.5946, "step": 569 }, { "epoch": 0.07, "grad_norm": 0.908668295729378, "learning_rate": 9.952583073388251e-06, "loss": 0.7032, "step": 570 }, { "epoch": 0.07, "grad_norm": 0.8053027613304646, "learning_rate": 9.95229916764629e-06, "loss": 0.584, "step": 571 }, { "epoch": 0.07, "grad_norm": 0.93489348644883, "learning_rate": 9.952014418579375e-06, "loss": 0.6011, "step": 572 }, { "epoch": 0.07, "grad_norm": 1.0654744724998089, "learning_rate": 9.951728826235993e-06, "loss": 0.7225, "step": 573 }, { "epoch": 0.07, "grad_norm": 0.9128936705681024, "learning_rate": 9.951442390664778e-06, "loss": 0.6607, "step": 574 }, { "epoch": 0.07, "grad_norm": 0.7638159978482243, "learning_rate": 9.951155111914508e-06, "loss": 0.6214, "step": 575 }, { "epoch": 0.07, "grad_norm": 1.0592710862221595, "learning_rate": 9.950866990034105e-06, "loss": 0.7441, "step": 576 }, { "epoch": 0.07, "grad_norm": 0.7626376664673241, "learning_rate": 9.95057802507263e-06, "loss": 0.5457, "step": 577 }, { "epoch": 0.07, "grad_norm": 0.7101151695410787, "learning_rate": 9.950288217079292e-06, "loss": 0.5885, "step": 578 }, { "epoch": 0.07, "grad_norm": 0.8943339463215348, "learning_rate": 9.949997566103442e-06, "loss": 0.6849, "step": 579 }, { "epoch": 0.07, "grad_norm": 0.8531538235528026, "learning_rate": 9.949706072194575e-06, "loss": 0.6846, "step": 580 }, { "epoch": 0.07, "grad_norm": 0.8623475976212491, "learning_rate": 9.949413735402332e-06, "loss": 0.7354, "step": 581 }, { "epoch": 0.07, "grad_norm": 0.8640465576649092, "learning_rate": 9.94912055577649e-06, "loss": 0.6962, "step": 582 }, { "epoch": 0.07, "grad_norm": 0.9768915207729412, "learning_rate": 9.948826533366977e-06, "loss": 0.7436, "step": 583 }, { "epoch": 0.07, "grad_norm": 0.8315723975788107, "learning_rate": 9.948531668223865e-06, "loss": 0.5185, "step": 584 }, { "epoch": 0.07, "grad_norm": 1.3308345548950664, "learning_rate": 9.948235960397358e-06, "loss": 0.6698, "step": 585 }, { "epoch": 0.07, "grad_norm": 0.9270428971647223, "learning_rate": 9.94793940993782e-06, "loss": 0.5367, "step": 586 }, { "epoch": 0.07, "grad_norm": 0.7989050894598666, "learning_rate": 9.947642016895749e-06, "loss": 0.6386, "step": 587 }, { "epoch": 0.07, "grad_norm": 0.9777221564182457, "learning_rate": 9.947343781321786e-06, "loss": 0.6644, "step": 588 }, { "epoch": 0.08, "grad_norm": 0.76484889469589, "learning_rate": 9.947044703266718e-06, "loss": 0.6045, "step": 589 }, { "epoch": 0.08, "grad_norm": 0.93063026304871, "learning_rate": 9.946744782781474e-06, "loss": 0.6963, "step": 590 }, { "epoch": 0.08, "grad_norm": 0.8551049796434639, "learning_rate": 9.94644401991713e-06, "loss": 0.6274, "step": 591 }, { "epoch": 0.08, "grad_norm": 0.8134392412997804, "learning_rate": 9.9461424147249e-06, "loss": 0.5948, "step": 592 }, { "epoch": 0.08, "grad_norm": 0.896278160288477, "learning_rate": 9.945839967256146e-06, "loss": 0.7359, "step": 593 }, { "epoch": 0.08, "grad_norm": 0.9812497988712363, "learning_rate": 9.945536677562368e-06, "loss": 0.6704, "step": 594 }, { "epoch": 0.08, "grad_norm": 0.8209110223875445, "learning_rate": 9.94523254569522e-06, "loss": 0.6057, "step": 595 }, { "epoch": 0.08, "grad_norm": 0.6947022442188788, "learning_rate": 9.944927571706486e-06, "loss": 0.5691, "step": 596 }, { "epoch": 0.08, "grad_norm": 1.0114013030643787, "learning_rate": 9.944621755648105e-06, "loss": 0.6591, "step": 597 }, { "epoch": 0.08, "grad_norm": 0.7373920211522685, "learning_rate": 9.944315097572148e-06, "loss": 0.5652, "step": 598 }, { "epoch": 0.08, "grad_norm": 0.8274232703460835, "learning_rate": 9.944007597530842e-06, "loss": 0.6304, "step": 599 }, { "epoch": 0.08, "grad_norm": 0.922224773657226, "learning_rate": 9.943699255576549e-06, "loss": 0.6006, "step": 600 }, { "epoch": 0.08, "grad_norm": 0.8164923669342885, "learning_rate": 9.943390071761774e-06, "loss": 0.5697, "step": 601 }, { "epoch": 0.08, "grad_norm": 0.7756446463136811, "learning_rate": 9.94308004613917e-06, "loss": 0.5573, "step": 602 }, { "epoch": 0.08, "grad_norm": 0.9780185005685222, "learning_rate": 9.94276917876153e-06, "loss": 0.6268, "step": 603 }, { "epoch": 0.08, "grad_norm": 0.750615458466306, "learning_rate": 9.942457469681794e-06, "loss": 0.5706, "step": 604 }, { "epoch": 0.08, "grad_norm": 0.7668287276372201, "learning_rate": 9.94214491895304e-06, "loss": 0.6163, "step": 605 }, { "epoch": 0.08, "grad_norm": 0.7516588483787702, "learning_rate": 9.941831526628495e-06, "loss": 0.5768, "step": 606 }, { "epoch": 0.08, "grad_norm": 0.7837604787075109, "learning_rate": 9.941517292761523e-06, "loss": 0.577, "step": 607 }, { "epoch": 0.08, "grad_norm": 0.8484282516972161, "learning_rate": 9.941202217405637e-06, "loss": 0.6096, "step": 608 }, { "epoch": 0.08, "grad_norm": 0.7569221912628195, "learning_rate": 9.94088630061449e-06, "loss": 0.6125, "step": 609 }, { "epoch": 0.08, "grad_norm": 0.7038152962167636, "learning_rate": 9.940569542441881e-06, "loss": 0.5778, "step": 610 }, { "epoch": 0.08, "grad_norm": 0.9820651063216177, "learning_rate": 9.940251942941747e-06, "loss": 0.7132, "step": 611 }, { "epoch": 0.08, "grad_norm": 0.8446938223008503, "learning_rate": 9.939933502168175e-06, "loss": 0.5826, "step": 612 }, { "epoch": 0.08, "grad_norm": 0.9444598688114014, "learning_rate": 9.939614220175393e-06, "loss": 0.6982, "step": 613 }, { "epoch": 0.08, "grad_norm": 1.1620093172968176, "learning_rate": 9.939294097017768e-06, "loss": 0.6894, "step": 614 }, { "epoch": 0.08, "grad_norm": 0.9226729624115031, "learning_rate": 9.938973132749818e-06, "loss": 0.6751, "step": 615 }, { "epoch": 0.08, "grad_norm": 0.8861209902999666, "learning_rate": 9.938651327426196e-06, "loss": 0.7189, "step": 616 }, { "epoch": 0.08, "grad_norm": 0.730152321451675, "learning_rate": 9.938328681101703e-06, "loss": 0.5676, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.18406401003547, "learning_rate": 9.938005193831282e-06, "loss": 0.6978, "step": 618 }, { "epoch": 0.08, "grad_norm": 0.7328855186267786, "learning_rate": 9.93768086567002e-06, "loss": 0.5415, "step": 619 }, { "epoch": 0.08, "grad_norm": 0.7631245160779453, "learning_rate": 9.93735569667315e-06, "loss": 0.6004, "step": 620 }, { "epoch": 0.08, "grad_norm": 1.1313154696333698, "learning_rate": 9.93702968689604e-06, "loss": 0.6476, "step": 621 }, { "epoch": 0.08, "grad_norm": 0.7750062025343325, "learning_rate": 9.936702836394208e-06, "loss": 0.6006, "step": 622 }, { "epoch": 0.08, "grad_norm": 1.074813819328632, "learning_rate": 9.936375145223313e-06, "loss": 0.6401, "step": 623 }, { "epoch": 0.08, "grad_norm": 0.6498973923795885, "learning_rate": 9.936046613439157e-06, "loss": 0.5269, "step": 624 }, { "epoch": 0.08, "grad_norm": 0.9313633736715803, "learning_rate": 9.935717241097686e-06, "loss": 0.698, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.9218926344512484, "learning_rate": 9.93538702825499e-06, "loss": 0.6667, "step": 626 }, { "epoch": 0.08, "grad_norm": 0.7298388773146208, "learning_rate": 9.935055974967299e-06, "loss": 0.618, "step": 627 }, { "epoch": 0.08, "grad_norm": 0.845635745197338, "learning_rate": 9.93472408129099e-06, "loss": 0.6254, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.6506275909397085, "learning_rate": 9.934391347282576e-06, "loss": 0.5231, "step": 629 }, { "epoch": 0.08, "grad_norm": 0.994721563230209, "learning_rate": 9.934057772998725e-06, "loss": 0.7173, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.8938602928885192, "learning_rate": 9.933723358496236e-06, "loss": 0.6704, "step": 631 }, { "epoch": 0.08, "grad_norm": 0.8833262463756573, "learning_rate": 9.93338810383206e-06, "loss": 0.609, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.9583228384857131, "learning_rate": 9.933052009063285e-06, "loss": 0.7423, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.9040067970946756, "learning_rate": 9.932715074247144e-06, "loss": 0.6348, "step": 634 }, { "epoch": 0.08, "grad_norm": 0.7926578120685713, "learning_rate": 9.932377299441014e-06, "loss": 0.5922, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.8436771748734967, "learning_rate": 9.932038684702417e-06, "loss": 0.5635, "step": 636 }, { "epoch": 0.08, "grad_norm": 0.8237469260465538, "learning_rate": 9.931699230089011e-06, "loss": 0.6019, "step": 637 }, { "epoch": 0.08, "grad_norm": 0.8090287198120906, "learning_rate": 9.931358935658606e-06, "loss": 0.5803, "step": 638 }, { "epoch": 0.08, "grad_norm": 0.9953598802689478, "learning_rate": 9.93101780146915e-06, "loss": 0.6923, "step": 639 }, { "epoch": 0.08, "grad_norm": 0.7630701341604019, "learning_rate": 9.930675827578731e-06, "loss": 0.5498, "step": 640 }, { "epoch": 0.08, "grad_norm": 0.7716374554508619, "learning_rate": 9.930333014045587e-06, "loss": 0.607, "step": 641 }, { "epoch": 0.08, "grad_norm": 0.8305878537378821, "learning_rate": 9.929989360928094e-06, "loss": 0.5926, "step": 642 }, { "epoch": 0.08, "grad_norm": 1.3823498431942076, "learning_rate": 9.929644868284773e-06, "loss": 0.6768, "step": 643 }, { "epoch": 0.08, "grad_norm": 0.7682614029356418, "learning_rate": 9.929299536174289e-06, "loss": 0.5367, "step": 644 }, { "epoch": 0.08, "grad_norm": 0.7677354413042392, "learning_rate": 9.928953364655447e-06, "loss": 0.5381, "step": 645 }, { "epoch": 0.08, "grad_norm": 0.7548625077982958, "learning_rate": 9.928606353787195e-06, "loss": 0.5819, "step": 646 }, { "epoch": 0.08, "grad_norm": 0.8086537365085523, "learning_rate": 9.928258503628627e-06, "loss": 0.6115, "step": 647 }, { "epoch": 0.08, "grad_norm": 0.7668718255159341, "learning_rate": 9.927909814238979e-06, "loss": 0.5757, "step": 648 }, { "epoch": 0.08, "grad_norm": 0.9101451122571584, "learning_rate": 9.927560285677628e-06, "loss": 0.676, "step": 649 }, { "epoch": 0.08, "grad_norm": 0.8663523860531017, "learning_rate": 9.927209918004095e-06, "loss": 0.6648, "step": 650 }, { "epoch": 0.08, "grad_norm": 0.9804079749783077, "learning_rate": 9.926858711278045e-06, "loss": 0.7192, "step": 651 }, { "epoch": 0.08, "grad_norm": 0.7713409436857132, "learning_rate": 9.926506665559283e-06, "loss": 0.612, "step": 652 }, { "epoch": 0.08, "grad_norm": 0.8105425218434699, "learning_rate": 9.926153780907761e-06, "loss": 0.6037, "step": 653 }, { "epoch": 0.08, "grad_norm": 0.9602434806882082, "learning_rate": 9.92580005738357e-06, "loss": 0.6451, "step": 654 }, { "epoch": 0.08, "grad_norm": 0.7355057116304701, "learning_rate": 9.925445495046945e-06, "loss": 0.5907, "step": 655 }, { "epoch": 0.08, "grad_norm": 0.922586656832762, "learning_rate": 9.925090093958265e-06, "loss": 0.6592, "step": 656 }, { "epoch": 0.08, "grad_norm": 0.8218299017388514, "learning_rate": 9.924733854178052e-06, "loss": 0.5916, "step": 657 }, { "epoch": 0.08, "grad_norm": 0.8246134875640933, "learning_rate": 9.92437677576697e-06, "loss": 0.5924, "step": 658 }, { "epoch": 0.08, "grad_norm": 0.8761614197107747, "learning_rate": 9.924018858785824e-06, "loss": 0.7165, "step": 659 }, { "epoch": 0.08, "grad_norm": 0.843966322355839, "learning_rate": 9.923660103295563e-06, "loss": 0.6611, "step": 660 }, { "epoch": 0.08, "grad_norm": 0.7858714903646885, "learning_rate": 9.923300509357283e-06, "loss": 0.6406, "step": 661 }, { "epoch": 0.08, "grad_norm": 0.7209605043597681, "learning_rate": 9.922940077032214e-06, "loss": 0.6448, "step": 662 }, { "epoch": 0.08, "grad_norm": 0.9623215583902911, "learning_rate": 9.92257880638174e-06, "loss": 0.7363, "step": 663 }, { "epoch": 0.08, "grad_norm": 0.9502695108766952, "learning_rate": 9.922216697467378e-06, "loss": 0.6628, "step": 664 }, { "epoch": 0.08, "grad_norm": 0.7806007967785518, "learning_rate": 9.92185375035079e-06, "loss": 0.54, "step": 665 }, { "epoch": 0.08, "grad_norm": 0.8806724514659502, "learning_rate": 9.921489965093785e-06, "loss": 0.6124, "step": 666 }, { "epoch": 0.08, "grad_norm": 1.1387755783997202, "learning_rate": 9.921125341758308e-06, "loss": 0.7379, "step": 667 }, { "epoch": 0.09, "grad_norm": 0.9299146349283249, "learning_rate": 9.920759880406456e-06, "loss": 0.6754, "step": 668 }, { "epoch": 0.09, "grad_norm": 0.8669405325971468, "learning_rate": 9.92039358110046e-06, "loss": 0.6673, "step": 669 }, { "epoch": 0.09, "grad_norm": 0.717965757974735, "learning_rate": 9.920026443902698e-06, "loss": 0.6088, "step": 670 }, { "epoch": 0.09, "grad_norm": 1.0134517646772696, "learning_rate": 9.919658468875689e-06, "loss": 0.7304, "step": 671 }, { "epoch": 0.09, "grad_norm": 0.8253834412205865, "learning_rate": 9.919289656082093e-06, "loss": 0.6032, "step": 672 }, { "epoch": 0.09, "grad_norm": 0.8908513498578015, "learning_rate": 9.91892000558472e-06, "loss": 0.6556, "step": 673 }, { "epoch": 0.09, "grad_norm": 0.692491090448771, "learning_rate": 9.918549517446513e-06, "loss": 0.5595, "step": 674 }, { "epoch": 0.09, "grad_norm": 0.7525718663539719, "learning_rate": 9.918178191730565e-06, "loss": 0.585, "step": 675 }, { "epoch": 0.09, "grad_norm": 0.708981498945706, "learning_rate": 9.917806028500109e-06, "loss": 0.5522, "step": 676 }, { "epoch": 0.09, "grad_norm": 0.8877344425437592, "learning_rate": 9.91743302781852e-06, "loss": 0.5998, "step": 677 }, { "epoch": 0.09, "grad_norm": 0.9016956231450495, "learning_rate": 9.917059189749314e-06, "loss": 0.6565, "step": 678 }, { "epoch": 0.09, "grad_norm": 0.6946495146054391, "learning_rate": 9.916684514356153e-06, "loss": 0.5896, "step": 679 }, { "epoch": 0.09, "grad_norm": 0.7027121668087554, "learning_rate": 9.916309001702844e-06, "loss": 0.5723, "step": 680 }, { "epoch": 0.09, "grad_norm": 0.9192736379936258, "learning_rate": 9.915932651853327e-06, "loss": 0.6864, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.7098074011381186, "learning_rate": 9.915555464871694e-06, "loss": 0.5533, "step": 682 }, { "epoch": 0.09, "grad_norm": 0.8120449469522245, "learning_rate": 9.915177440822177e-06, "loss": 0.61, "step": 683 }, { "epoch": 0.09, "grad_norm": 0.7409337740136418, "learning_rate": 9.914798579769144e-06, "loss": 0.6112, "step": 684 }, { "epoch": 0.09, "grad_norm": 1.0142076692103514, "learning_rate": 9.914418881777118e-06, "loss": 0.6815, "step": 685 }, { "epoch": 0.09, "grad_norm": 0.8327428479309273, "learning_rate": 9.914038346910754e-06, "loss": 0.6142, "step": 686 }, { "epoch": 0.09, "grad_norm": 0.7391678451146036, "learning_rate": 9.913656975234853e-06, "loss": 0.5659, "step": 687 }, { "epoch": 0.09, "grad_norm": 0.6909004767866131, "learning_rate": 9.91327476681436e-06, "loss": 0.5616, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.7291099800620955, "learning_rate": 9.91289172171436e-06, "loss": 0.5801, "step": 689 }, { "epoch": 0.09, "grad_norm": 0.8525452639489284, "learning_rate": 9.912507840000081e-06, "loss": 0.6405, "step": 690 }, { "epoch": 0.09, "grad_norm": 0.8416155895502007, "learning_rate": 9.912123121736896e-06, "loss": 0.6471, "step": 691 }, { "epoch": 0.09, "grad_norm": 0.9151314287094644, "learning_rate": 9.911737566990316e-06, "loss": 0.696, "step": 692 }, { "epoch": 0.09, "grad_norm": 0.7510500670706307, "learning_rate": 9.911351175826e-06, "loss": 0.579, "step": 693 }, { "epoch": 0.09, "grad_norm": 0.9476143372389542, "learning_rate": 9.910963948309744e-06, "loss": 0.6292, "step": 694 }, { "epoch": 0.09, "grad_norm": 0.7858968747578426, "learning_rate": 9.91057588450749e-06, "loss": 0.5726, "step": 695 }, { "epoch": 0.09, "grad_norm": 0.7324210337557129, "learning_rate": 9.910186984485321e-06, "loss": 0.5577, "step": 696 }, { "epoch": 0.09, "grad_norm": 0.7029992476164638, "learning_rate": 9.909797248309463e-06, "loss": 0.6156, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.9636352807473783, "learning_rate": 9.909406676046282e-06, "loss": 0.6438, "step": 698 }, { "epoch": 0.09, "grad_norm": 0.8213434682319596, "learning_rate": 9.909015267762292e-06, "loss": 0.6516, "step": 699 }, { "epoch": 0.09, "grad_norm": 0.7998111431369197, "learning_rate": 9.908623023524143e-06, "loss": 0.5824, "step": 700 }, { "epoch": 0.09, "grad_norm": 0.8959014133293823, "learning_rate": 9.908229943398629e-06, "loss": 0.7201, "step": 701 }, { "epoch": 0.09, "grad_norm": 0.8667368634098876, "learning_rate": 9.90783602745269e-06, "loss": 0.5919, "step": 702 }, { "epoch": 0.09, "grad_norm": 0.9801267598627209, "learning_rate": 9.907441275753407e-06, "loss": 0.7418, "step": 703 }, { "epoch": 0.09, "grad_norm": 0.9703574617065216, "learning_rate": 9.907045688367998e-06, "loss": 0.6869, "step": 704 }, { "epoch": 0.09, "grad_norm": 0.8406415430843424, "learning_rate": 9.90664926536383e-06, "loss": 0.6407, "step": 705 }, { "epoch": 0.09, "grad_norm": 0.7629023886992453, "learning_rate": 9.906252006808411e-06, "loss": 0.5756, "step": 706 }, { "epoch": 0.09, "grad_norm": 1.0298108255803995, "learning_rate": 9.905853912769385e-06, "loss": 0.6129, "step": 707 }, { "epoch": 0.09, "grad_norm": 0.9282339551154486, "learning_rate": 9.90545498331455e-06, "loss": 0.6178, "step": 708 }, { "epoch": 0.09, "grad_norm": 0.7636587035815693, "learning_rate": 9.905055218511832e-06, "loss": 0.5936, "step": 709 }, { "epoch": 0.09, "grad_norm": 0.9119017883934157, "learning_rate": 9.904654618429312e-06, "loss": 0.6629, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.8810501453237173, "learning_rate": 9.904253183135209e-06, "loss": 0.6775, "step": 711 }, { "epoch": 0.09, "grad_norm": 0.917049858842309, "learning_rate": 9.903850912697879e-06, "loss": 0.6797, "step": 712 }, { "epoch": 0.09, "grad_norm": 1.1023512708693004, "learning_rate": 9.903447807185827e-06, "loss": 0.6732, "step": 713 }, { "epoch": 0.09, "grad_norm": 0.8893156064284122, "learning_rate": 9.903043866667697e-06, "loss": 0.6614, "step": 714 }, { "epoch": 0.09, "grad_norm": 0.7901393067993758, "learning_rate": 9.902639091212276e-06, "loss": 0.6138, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.7873768019751562, "learning_rate": 9.902233480888495e-06, "loss": 0.5492, "step": 716 }, { "epoch": 0.09, "grad_norm": 0.9237841453100953, "learning_rate": 9.901827035765422e-06, "loss": 0.685, "step": 717 }, { "epoch": 0.09, "grad_norm": 0.7998758371783121, "learning_rate": 9.90141975591227e-06, "loss": 0.5834, "step": 718 }, { "epoch": 0.09, "grad_norm": 0.6702074175253155, "learning_rate": 9.901011641398398e-06, "loss": 0.524, "step": 719 }, { "epoch": 0.09, "grad_norm": 0.8423372947256208, "learning_rate": 9.900602692293303e-06, "loss": 0.6597, "step": 720 }, { "epoch": 0.09, "grad_norm": 0.8275800935623158, "learning_rate": 9.90019290866662e-06, "loss": 0.5679, "step": 721 }, { "epoch": 0.09, "grad_norm": 0.7686967913973015, "learning_rate": 9.899782290588138e-06, "loss": 0.604, "step": 722 }, { "epoch": 0.09, "grad_norm": 0.857706491871605, "learning_rate": 9.899370838127775e-06, "loss": 0.6264, "step": 723 }, { "epoch": 0.09, "grad_norm": 0.7333631846418711, "learning_rate": 9.8989585513556e-06, "loss": 0.5393, "step": 724 }, { "epoch": 0.09, "grad_norm": 0.9838373877664329, "learning_rate": 9.898545430341821e-06, "loss": 0.7017, "step": 725 }, { "epoch": 0.09, "grad_norm": 0.974486654890674, "learning_rate": 9.898131475156788e-06, "loss": 0.739, "step": 726 }, { "epoch": 0.09, "grad_norm": 0.812168267768625, "learning_rate": 9.897716685870994e-06, "loss": 0.5399, "step": 727 }, { "epoch": 0.09, "grad_norm": 0.7579017390354547, "learning_rate": 9.89730106255507e-06, "loss": 0.5417, "step": 728 }, { "epoch": 0.09, "grad_norm": 0.9102695778461423, "learning_rate": 9.896884605279798e-06, "loss": 0.6113, "step": 729 }, { "epoch": 0.09, "grad_norm": 0.8304422211649709, "learning_rate": 9.89646731411609e-06, "loss": 0.6445, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.9882610697091398, "learning_rate": 9.896049189135012e-06, "loss": 0.7118, "step": 731 }, { "epoch": 0.09, "grad_norm": 0.8567763171882996, "learning_rate": 9.895630230407764e-06, "loss": 0.5456, "step": 732 }, { "epoch": 0.09, "grad_norm": 0.9809733207959377, "learning_rate": 9.89521043800569e-06, "loss": 0.6782, "step": 733 }, { "epoch": 0.09, "grad_norm": 0.9884907501295046, "learning_rate": 9.894789812000273e-06, "loss": 0.6151, "step": 734 }, { "epoch": 0.09, "grad_norm": 0.7460169974638555, "learning_rate": 9.89436835246315e-06, "loss": 0.5616, "step": 735 }, { "epoch": 0.09, "grad_norm": 0.721660580584569, "learning_rate": 9.89394605946608e-06, "loss": 0.566, "step": 736 }, { "epoch": 0.09, "grad_norm": 0.7413767170113644, "learning_rate": 9.893522933080984e-06, "loss": 0.5614, "step": 737 }, { "epoch": 0.09, "grad_norm": 0.9417369873345561, "learning_rate": 9.893098973379912e-06, "loss": 0.6768, "step": 738 }, { "epoch": 0.09, "grad_norm": 0.8578953015537314, "learning_rate": 9.89267418043506e-06, "loss": 0.6425, "step": 739 }, { "epoch": 0.09, "grad_norm": 0.7480721188181566, "learning_rate": 9.892248554318769e-06, "loss": 0.5589, "step": 740 }, { "epoch": 0.09, "grad_norm": 0.8452017810861409, "learning_rate": 9.891822095103513e-06, "loss": 0.5668, "step": 741 }, { "epoch": 0.09, "grad_norm": 0.9039908276260701, "learning_rate": 9.89139480286192e-06, "loss": 0.573, "step": 742 }, { "epoch": 0.09, "grad_norm": 0.6779024497797658, "learning_rate": 9.890966677666749e-06, "loss": 0.5731, "step": 743 }, { "epoch": 0.09, "grad_norm": 0.6946726295782174, "learning_rate": 9.890537719590907e-06, "loss": 0.5142, "step": 744 }, { "epoch": 0.09, "grad_norm": 0.808765705975854, "learning_rate": 9.89010792870744e-06, "loss": 0.6342, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.6628179380064558, "learning_rate": 9.889677305089536e-06, "loss": 0.4992, "step": 746 }, { "epoch": 0.1, "grad_norm": 1.0453292672839902, "learning_rate": 9.889245848810529e-06, "loss": 0.6636, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.7621998662806297, "learning_rate": 9.888813559943891e-06, "loss": 0.612, "step": 748 }, { "epoch": 0.1, "grad_norm": 0.9565742612518761, "learning_rate": 9.888380438563232e-06, "loss": 0.6449, "step": 749 }, { "epoch": 0.1, "grad_norm": 0.8487784993806144, "learning_rate": 9.887946484742314e-06, "loss": 0.6162, "step": 750 }, { "epoch": 0.1, "grad_norm": 0.8597309250550237, "learning_rate": 9.88751169855503e-06, "loss": 0.6689, "step": 751 }, { "epoch": 0.1, "grad_norm": 0.6981979908490888, "learning_rate": 9.887076080075422e-06, "loss": 0.5867, "step": 752 }, { "epoch": 0.1, "grad_norm": 0.6548144431248555, "learning_rate": 9.88663962937767e-06, "loss": 0.5657, "step": 753 }, { "epoch": 0.1, "grad_norm": 0.8881334549929241, "learning_rate": 9.8862023465361e-06, "loss": 0.628, "step": 754 }, { "epoch": 0.1, "grad_norm": 0.9692547090827943, "learning_rate": 9.885764231625172e-06, "loss": 0.6624, "step": 755 }, { "epoch": 0.1, "grad_norm": 0.8393730405810584, "learning_rate": 9.885325284719495e-06, "loss": 0.6269, "step": 756 }, { "epoch": 0.1, "grad_norm": 0.7104667944001845, "learning_rate": 9.884885505893819e-06, "loss": 0.6165, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.872908550944843, "learning_rate": 9.88444489522303e-06, "loss": 0.6199, "step": 758 }, { "epoch": 0.1, "grad_norm": 0.7429284831172942, "learning_rate": 9.884003452782162e-06, "loss": 0.5557, "step": 759 }, { "epoch": 0.1, "grad_norm": 0.8818667407877973, "learning_rate": 9.883561178646387e-06, "loss": 0.6332, "step": 760 }, { "epoch": 0.1, "grad_norm": 0.74189317405622, "learning_rate": 9.88311807289102e-06, "loss": 0.5621, "step": 761 }, { "epoch": 0.1, "grad_norm": 0.8038857385308639, "learning_rate": 9.882674135591516e-06, "loss": 0.6475, "step": 762 }, { "epoch": 0.1, "grad_norm": 0.694763821643562, "learning_rate": 9.882229366823475e-06, "loss": 0.5596, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.8216853227179713, "learning_rate": 9.881783766662635e-06, "loss": 0.5651, "step": 764 }, { "epoch": 0.1, "grad_norm": 0.7505648721723389, "learning_rate": 9.881337335184879e-06, "loss": 0.5246, "step": 765 }, { "epoch": 0.1, "grad_norm": 0.8171642955631234, "learning_rate": 9.880890072466227e-06, "loss": 0.6126, "step": 766 }, { "epoch": 0.1, "grad_norm": 0.8696487145327768, "learning_rate": 9.880441978582844e-06, "loss": 0.687, "step": 767 }, { "epoch": 0.1, "grad_norm": 0.7238153115144139, "learning_rate": 9.879993053611037e-06, "loss": 0.5398, "step": 768 }, { "epoch": 0.1, "grad_norm": 0.9478120655667572, "learning_rate": 9.879543297627252e-06, "loss": 0.6493, "step": 769 }, { "epoch": 0.1, "grad_norm": 0.9027328102335653, "learning_rate": 9.879092710708078e-06, "loss": 0.599, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.9284578286283937, "learning_rate": 9.878641292930247e-06, "loss": 0.6689, "step": 771 }, { "epoch": 0.1, "grad_norm": 0.8548342980449497, "learning_rate": 9.878189044370628e-06, "loss": 0.6352, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.6962359676542369, "learning_rate": 9.877735965106234e-06, "loss": 0.5981, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.7441704767941549, "learning_rate": 9.877282055214222e-06, "loss": 0.5656, "step": 774 }, { "epoch": 0.1, "grad_norm": 0.9518230764048553, "learning_rate": 9.876827314771889e-06, "loss": 0.6684, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.8658989510790782, "learning_rate": 9.876371743856667e-06, "loss": 0.631, "step": 776 }, { "epoch": 0.1, "grad_norm": 0.8626920094777617, "learning_rate": 9.87591534254614e-06, "loss": 0.678, "step": 777 }, { "epoch": 0.1, "grad_norm": 0.7569179234065797, "learning_rate": 9.87545811091803e-06, "loss": 0.5777, "step": 778 }, { "epoch": 0.1, "grad_norm": 0.7667165817688688, "learning_rate": 9.875000049050193e-06, "loss": 0.6184, "step": 779 }, { "epoch": 0.1, "grad_norm": 1.0001633898335596, "learning_rate": 9.874541157020636e-06, "loss": 0.7321, "step": 780 }, { "epoch": 0.1, "grad_norm": 0.8045789331930552, "learning_rate": 9.8740814349075e-06, "loss": 0.6109, "step": 781 }, { "epoch": 0.1, "grad_norm": 0.7588845064764372, "learning_rate": 9.873620882789077e-06, "loss": 0.5983, "step": 782 }, { "epoch": 0.1, "grad_norm": 0.7544801666102019, "learning_rate": 9.87315950074379e-06, "loss": 0.5767, "step": 783 }, { "epoch": 0.1, "grad_norm": 0.9743045985283472, "learning_rate": 9.872697288850208e-06, "loss": 0.6516, "step": 784 }, { "epoch": 0.1, "grad_norm": 0.8479404444370521, "learning_rate": 9.87223424718704e-06, "loss": 0.5399, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.9695049182869461, "learning_rate": 9.871770375833139e-06, "loss": 0.6951, "step": 786 }, { "epoch": 0.1, "grad_norm": 0.6634064398127952, "learning_rate": 9.871305674867497e-06, "loss": 0.5388, "step": 787 }, { "epoch": 0.1, "grad_norm": 0.8137494509652877, "learning_rate": 9.870840144369247e-06, "loss": 0.5487, "step": 788 }, { "epoch": 0.1, "grad_norm": 0.7223817338627702, "learning_rate": 9.870373784417664e-06, "loss": 0.6069, "step": 789 }, { "epoch": 0.1, "grad_norm": 0.9629317971760376, "learning_rate": 9.869906595092165e-06, "loss": 0.703, "step": 790 }, { "epoch": 0.1, "grad_norm": 1.0079896733283227, "learning_rate": 9.869438576472307e-06, "loss": 0.6507, "step": 791 }, { "epoch": 0.1, "grad_norm": 0.9078559674663124, "learning_rate": 9.86896972863779e-06, "loss": 0.6823, "step": 792 }, { "epoch": 0.1, "grad_norm": 0.7656748314841163, "learning_rate": 9.868500051668453e-06, "loss": 0.5989, "step": 793 }, { "epoch": 0.1, "grad_norm": 0.9071940990542837, "learning_rate": 9.868029545644275e-06, "loss": 0.6498, "step": 794 }, { "epoch": 0.1, "grad_norm": 0.9636092570882405, "learning_rate": 9.867558210645383e-06, "loss": 0.77, "step": 795 }, { "epoch": 0.1, "grad_norm": 0.8433778188273502, "learning_rate": 9.867086046752035e-06, "loss": 0.6403, "step": 796 }, { "epoch": 0.1, "grad_norm": 0.9317821873088836, "learning_rate": 9.866613054044638e-06, "loss": 0.6834, "step": 797 }, { "epoch": 0.1, "grad_norm": 0.7757055786101275, "learning_rate": 9.86613923260374e-06, "loss": 0.5763, "step": 798 }, { "epoch": 0.1, "grad_norm": 0.7242323135700258, "learning_rate": 9.865664582510024e-06, "loss": 0.5487, "step": 799 }, { "epoch": 0.1, "grad_norm": 0.7321232632336565, "learning_rate": 9.86518910384432e-06, "loss": 0.5557, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.9355060804299221, "learning_rate": 9.864712796687598e-06, "loss": 0.656, "step": 801 }, { "epoch": 0.1, "grad_norm": 0.7276862239843759, "learning_rate": 9.864235661120966e-06, "loss": 0.5042, "step": 802 }, { "epoch": 0.1, "grad_norm": 0.9988728639970739, "learning_rate": 9.863757697225675e-06, "loss": 0.6934, "step": 803 }, { "epoch": 0.1, "grad_norm": 1.0564490011879095, "learning_rate": 9.86327890508312e-06, "loss": 0.697, "step": 804 }, { "epoch": 0.1, "grad_norm": 0.7895425899944992, "learning_rate": 9.862799284774834e-06, "loss": 0.5954, "step": 805 }, { "epoch": 0.1, "grad_norm": 1.0199665631266697, "learning_rate": 9.862318836382487e-06, "loss": 0.632, "step": 806 }, { "epoch": 0.1, "grad_norm": 0.8763614495723234, "learning_rate": 9.861837559987898e-06, "loss": 0.6926, "step": 807 }, { "epoch": 0.1, "grad_norm": 0.7360426015650645, "learning_rate": 9.861355455673024e-06, "loss": 0.586, "step": 808 }, { "epoch": 0.1, "grad_norm": 0.7586022371468344, "learning_rate": 9.860872523519962e-06, "loss": 0.5994, "step": 809 }, { "epoch": 0.1, "grad_norm": 1.0711323291671404, "learning_rate": 9.86038876361095e-06, "loss": 0.7022, "step": 810 }, { "epoch": 0.1, "grad_norm": 0.631180684540505, "learning_rate": 9.859904176028364e-06, "loss": 0.5343, "step": 811 }, { "epoch": 0.1, "grad_norm": 1.1013725126350422, "learning_rate": 9.859418760854726e-06, "loss": 0.687, "step": 812 }, { "epoch": 0.1, "grad_norm": 1.2576703117967731, "learning_rate": 9.858932518172701e-06, "loss": 0.7069, "step": 813 }, { "epoch": 0.1, "grad_norm": 0.933101646011243, "learning_rate": 9.858445448065086e-06, "loss": 0.7437, "step": 814 }, { "epoch": 0.1, "grad_norm": 0.7101302992080752, "learning_rate": 9.857957550614828e-06, "loss": 0.5372, "step": 815 }, { "epoch": 0.1, "grad_norm": 1.1048112345458718, "learning_rate": 9.857468825905007e-06, "loss": 0.6348, "step": 816 }, { "epoch": 0.1, "grad_norm": 0.7125169819096057, "learning_rate": 9.856979274018851e-06, "loss": 0.5432, "step": 817 }, { "epoch": 0.1, "grad_norm": 0.7520784771603117, "learning_rate": 9.856488895039725e-06, "loss": 0.5822, "step": 818 }, { "epoch": 0.1, "grad_norm": 0.7589155967095328, "learning_rate": 9.855997689051133e-06, "loss": 0.5628, "step": 819 }, { "epoch": 0.1, "grad_norm": 0.7577179625755758, "learning_rate": 9.855505656136726e-06, "loss": 0.5711, "step": 820 }, { "epoch": 0.1, "grad_norm": 1.1601702201883908, "learning_rate": 9.855012796380288e-06, "loss": 0.6733, "step": 821 }, { "epoch": 0.1, "grad_norm": 0.7271300461503057, "learning_rate": 9.854519109865752e-06, "loss": 0.5451, "step": 822 }, { "epoch": 0.1, "grad_norm": 0.8171650892152054, "learning_rate": 9.854024596677184e-06, "loss": 0.6004, "step": 823 }, { "epoch": 0.1, "grad_norm": 0.9927864718958673, "learning_rate": 9.853529256898797e-06, "loss": 0.6765, "step": 824 }, { "epoch": 0.11, "grad_norm": 0.8764586363158297, "learning_rate": 9.85303309061494e-06, "loss": 0.6576, "step": 825 }, { "epoch": 0.11, "grad_norm": 0.8268840784096786, "learning_rate": 9.852536097910106e-06, "loss": 0.5165, "step": 826 }, { "epoch": 0.11, "grad_norm": 0.9348262430461438, "learning_rate": 9.852038278868929e-06, "loss": 0.6729, "step": 827 }, { "epoch": 0.11, "grad_norm": 0.806380040487769, "learning_rate": 9.851539633576181e-06, "loss": 0.5675, "step": 828 }, { "epoch": 0.11, "grad_norm": 0.8787725180343682, "learning_rate": 9.851040162116779e-06, "loss": 0.5907, "step": 829 }, { "epoch": 0.11, "grad_norm": 0.8522407660506429, "learning_rate": 9.85053986457577e-06, "loss": 0.584, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.8465694669618473, "learning_rate": 9.850038741038359e-06, "loss": 0.5721, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.8621850695821015, "learning_rate": 9.849536791589875e-06, "loss": 0.6153, "step": 832 }, { "epoch": 0.11, "grad_norm": 0.7578180186210306, "learning_rate": 9.849034016315799e-06, "loss": 0.6304, "step": 833 }, { "epoch": 0.11, "grad_norm": 0.8566597011108543, "learning_rate": 9.848530415301748e-06, "loss": 0.5615, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.7449314252925586, "learning_rate": 9.848025988633476e-06, "loss": 0.5538, "step": 835 }, { "epoch": 0.11, "grad_norm": 0.7898047349879547, "learning_rate": 9.847520736396885e-06, "loss": 0.641, "step": 836 }, { "epoch": 0.11, "grad_norm": 0.701933988336018, "learning_rate": 9.847014658678016e-06, "loss": 0.5408, "step": 837 }, { "epoch": 0.11, "grad_norm": 0.9429442665495136, "learning_rate": 9.846507755563044e-06, "loss": 0.6466, "step": 838 }, { "epoch": 0.11, "grad_norm": 0.9355697954917016, "learning_rate": 9.846000027138292e-06, "loss": 0.634, "step": 839 }, { "epoch": 0.11, "grad_norm": 1.0728623110408484, "learning_rate": 9.845491473490221e-06, "loss": 0.7414, "step": 840 }, { "epoch": 0.11, "grad_norm": 0.8166910649109188, "learning_rate": 9.844982094705433e-06, "loss": 0.5779, "step": 841 }, { "epoch": 0.11, "grad_norm": 0.8737318423935748, "learning_rate": 9.844471890870668e-06, "loss": 0.6269, "step": 842 }, { "epoch": 0.11, "grad_norm": 0.7338586418238993, "learning_rate": 9.84396086207281e-06, "loss": 0.5636, "step": 843 }, { "epoch": 0.11, "grad_norm": 0.7501059775882307, "learning_rate": 9.843449008398883e-06, "loss": 0.5934, "step": 844 }, { "epoch": 0.11, "grad_norm": 0.9017164094919561, "learning_rate": 9.842936329936045e-06, "loss": 0.5763, "step": 845 }, { "epoch": 0.11, "grad_norm": 0.6615896203996664, "learning_rate": 9.842422826771605e-06, "loss": 0.52, "step": 846 }, { "epoch": 0.11, "grad_norm": 0.9864792548753551, "learning_rate": 9.841908498993005e-06, "loss": 0.6916, "step": 847 }, { "epoch": 0.11, "grad_norm": 0.8356849710899119, "learning_rate": 9.84139334668783e-06, "loss": 0.626, "step": 848 }, { "epoch": 0.11, "grad_norm": 0.8603619971110945, "learning_rate": 9.840877369943806e-06, "loss": 0.5858, "step": 849 }, { "epoch": 0.11, "grad_norm": 1.0552369639591088, "learning_rate": 9.8403605688488e-06, "loss": 0.7282, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.8398213414730651, "learning_rate": 9.839842943490811e-06, "loss": 0.5538, "step": 851 }, { "epoch": 0.11, "grad_norm": 0.9752097830125197, "learning_rate": 9.839324493957993e-06, "loss": 0.6833, "step": 852 }, { "epoch": 0.11, "grad_norm": 0.8183433100358727, "learning_rate": 9.838805220338628e-06, "loss": 0.615, "step": 853 }, { "epoch": 0.11, "grad_norm": 0.7559456200466549, "learning_rate": 9.838285122721145e-06, "loss": 0.5609, "step": 854 }, { "epoch": 0.11, "grad_norm": 0.9765798333547278, "learning_rate": 9.83776420119411e-06, "loss": 0.6724, "step": 855 }, { "epoch": 0.11, "grad_norm": 1.1344909109455168, "learning_rate": 9.837242455846232e-06, "loss": 0.7112, "step": 856 }, { "epoch": 0.11, "grad_norm": 0.7375559015233665, "learning_rate": 9.836719886766357e-06, "loss": 0.5447, "step": 857 }, { "epoch": 0.11, "grad_norm": 0.966985778646837, "learning_rate": 9.836196494043476e-06, "loss": 0.6375, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.6824484636669631, "learning_rate": 9.835672277766713e-06, "loss": 0.5211, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.9439544582968047, "learning_rate": 9.835147238025337e-06, "loss": 0.6153, "step": 860 }, { "epoch": 0.11, "grad_norm": 0.819608376113469, "learning_rate": 9.834621374908761e-06, "loss": 0.5687, "step": 861 }, { "epoch": 0.11, "grad_norm": 0.6926186871755378, "learning_rate": 9.834094688506533e-06, "loss": 0.5735, "step": 862 }, { "epoch": 0.11, "grad_norm": 0.9745182323192518, "learning_rate": 9.833567178908337e-06, "loss": 0.6365, "step": 863 }, { "epoch": 0.11, "grad_norm": 0.989031005255609, "learning_rate": 9.833038846204008e-06, "loss": 0.6627, "step": 864 }, { "epoch": 0.11, "grad_norm": 0.7080838838706122, "learning_rate": 9.832509690483516e-06, "loss": 0.5545, "step": 865 }, { "epoch": 0.11, "grad_norm": 1.107339068251972, "learning_rate": 9.831979711836966e-06, "loss": 0.6803, "step": 866 }, { "epoch": 0.11, "grad_norm": 1.3062900015879466, "learning_rate": 9.83144891035461e-06, "loss": 0.7197, "step": 867 }, { "epoch": 0.11, "grad_norm": 0.8497236986328994, "learning_rate": 9.830917286126838e-06, "loss": 0.7121, "step": 868 }, { "epoch": 0.11, "grad_norm": 0.7757920744231476, "learning_rate": 9.83038483924418e-06, "loss": 0.5507, "step": 869 }, { "epoch": 0.11, "grad_norm": 1.0615132639191454, "learning_rate": 9.829851569797308e-06, "loss": 0.6602, "step": 870 }, { "epoch": 0.11, "grad_norm": 0.956227306772144, "learning_rate": 9.829317477877031e-06, "loss": 0.5922, "step": 871 }, { "epoch": 0.11, "grad_norm": 0.9965856925151164, "learning_rate": 9.828782563574299e-06, "loss": 0.7016, "step": 872 }, { "epoch": 0.11, "grad_norm": 0.950627188504573, "learning_rate": 9.8282468269802e-06, "loss": 0.6849, "step": 873 }, { "epoch": 0.11, "grad_norm": 0.9460454498869647, "learning_rate": 9.827710268185969e-06, "loss": 0.7197, "step": 874 }, { "epoch": 0.11, "grad_norm": 1.019841546323306, "learning_rate": 9.827172887282974e-06, "loss": 0.7185, "step": 875 }, { "epoch": 0.11, "grad_norm": 0.889620093786492, "learning_rate": 9.826634684362724e-06, "loss": 0.6965, "step": 876 }, { "epoch": 0.11, "grad_norm": 0.7465778324800886, "learning_rate": 9.826095659516872e-06, "loss": 0.5581, "step": 877 }, { "epoch": 0.11, "grad_norm": 1.0637192538791145, "learning_rate": 9.825555812837207e-06, "loss": 0.6928, "step": 878 }, { "epoch": 0.11, "grad_norm": 0.7646975750362563, "learning_rate": 9.825015144415658e-06, "loss": 0.5791, "step": 879 }, { "epoch": 0.11, "grad_norm": 0.9466364333058634, "learning_rate": 9.824473654344297e-06, "loss": 0.7225, "step": 880 }, { "epoch": 0.11, "grad_norm": 1.0179728010770526, "learning_rate": 9.823931342715335e-06, "loss": 0.6604, "step": 881 }, { "epoch": 0.11, "grad_norm": 0.894929363812397, "learning_rate": 9.82338820962112e-06, "loss": 0.5946, "step": 882 }, { "epoch": 0.11, "grad_norm": 0.9527469721754865, "learning_rate": 9.822844255154143e-06, "loss": 0.6836, "step": 883 }, { "epoch": 0.11, "grad_norm": 0.7842014338538598, "learning_rate": 9.822299479407031e-06, "loss": 0.6258, "step": 884 }, { "epoch": 0.11, "grad_norm": 0.8438387077422554, "learning_rate": 9.82175388247256e-06, "loss": 0.5754, "step": 885 }, { "epoch": 0.11, "grad_norm": 0.995184189225101, "learning_rate": 9.821207464443631e-06, "loss": 0.7388, "step": 886 }, { "epoch": 0.11, "grad_norm": 0.818907499545714, "learning_rate": 9.8206602254133e-06, "loss": 0.6103, "step": 887 }, { "epoch": 0.11, "grad_norm": 0.7209778471552986, "learning_rate": 9.820112165474756e-06, "loss": 0.6015, "step": 888 }, { "epoch": 0.11, "grad_norm": 0.7540231073394064, "learning_rate": 9.819563284721323e-06, "loss": 0.5681, "step": 889 }, { "epoch": 0.11, "grad_norm": 0.9391713549848318, "learning_rate": 9.819013583246473e-06, "loss": 0.7238, "step": 890 }, { "epoch": 0.11, "grad_norm": 0.9053162761539533, "learning_rate": 9.818463061143813e-06, "loss": 0.6896, "step": 891 }, { "epoch": 0.11, "grad_norm": 1.0561921375057277, "learning_rate": 9.817911718507093e-06, "loss": 0.6872, "step": 892 }, { "epoch": 0.11, "grad_norm": 0.6846470415644579, "learning_rate": 9.817359555430201e-06, "loss": 0.5836, "step": 893 }, { "epoch": 0.11, "grad_norm": 0.7982779920786313, "learning_rate": 9.816806572007162e-06, "loss": 0.57, "step": 894 }, { "epoch": 0.11, "grad_norm": 0.9620989650618625, "learning_rate": 9.816252768332146e-06, "loss": 0.6669, "step": 895 }, { "epoch": 0.11, "grad_norm": 1.056540597627523, "learning_rate": 9.81569814449946e-06, "loss": 0.7202, "step": 896 }, { "epoch": 0.11, "grad_norm": 0.7390798962525975, "learning_rate": 9.815142700603546e-06, "loss": 0.5573, "step": 897 }, { "epoch": 0.11, "grad_norm": 1.1473763586635846, "learning_rate": 9.814586436738998e-06, "loss": 0.7548, "step": 898 }, { "epoch": 0.11, "grad_norm": 0.7025789120657356, "learning_rate": 9.814029353000536e-06, "loss": 0.5642, "step": 899 }, { "epoch": 0.11, "grad_norm": 0.9986804575352153, "learning_rate": 9.813471449483027e-06, "loss": 0.6569, "step": 900 }, { "epoch": 0.11, "grad_norm": 0.9814569058152091, "learning_rate": 9.812912726281475e-06, "loss": 0.6935, "step": 901 }, { "epoch": 0.11, "grad_norm": 1.1753092195010622, "learning_rate": 9.812353183491029e-06, "loss": 0.7441, "step": 902 }, { "epoch": 0.12, "grad_norm": 0.8935409273789665, "learning_rate": 9.81179282120697e-06, "loss": 0.6555, "step": 903 }, { "epoch": 0.12, "grad_norm": 0.7452936131510355, "learning_rate": 9.81123163952472e-06, "loss": 0.5895, "step": 904 }, { "epoch": 0.12, "grad_norm": 0.9321491826631151, "learning_rate": 9.810669638539849e-06, "loss": 0.6648, "step": 905 }, { "epoch": 0.12, "grad_norm": 0.8000600526917718, "learning_rate": 9.810106818348054e-06, "loss": 0.6396, "step": 906 }, { "epoch": 0.12, "grad_norm": 0.7797029623000101, "learning_rate": 9.809543179045178e-06, "loss": 0.5644, "step": 907 }, { "epoch": 0.12, "grad_norm": 0.8896708866855105, "learning_rate": 9.808978720727204e-06, "loss": 0.6514, "step": 908 }, { "epoch": 0.12, "grad_norm": 1.154650611722061, "learning_rate": 9.808413443490254e-06, "loss": 0.6867, "step": 909 }, { "epoch": 0.12, "grad_norm": 0.6915883733884495, "learning_rate": 9.80784734743059e-06, "loss": 0.5851, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.9574323984000062, "learning_rate": 9.80728043264461e-06, "loss": 0.7087, "step": 911 }, { "epoch": 0.12, "grad_norm": 0.8863031933018034, "learning_rate": 9.806712699228852e-06, "loss": 0.7166, "step": 912 }, { "epoch": 0.12, "grad_norm": 8.996658414550817, "learning_rate": 9.80614414728e-06, "loss": 0.6715, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.8143489316746981, "learning_rate": 9.805574776894867e-06, "loss": 0.6165, "step": 914 }, { "epoch": 0.12, "grad_norm": 1.0770707197919507, "learning_rate": 9.805004588170415e-06, "loss": 0.7589, "step": 915 }, { "epoch": 0.12, "grad_norm": 0.7684996000512531, "learning_rate": 9.804433581203741e-06, "loss": 0.5971, "step": 916 }, { "epoch": 0.12, "grad_norm": 0.8456081358683584, "learning_rate": 9.803861756092078e-06, "loss": 0.559, "step": 917 }, { "epoch": 0.12, "grad_norm": 1.0837457215584743, "learning_rate": 9.803289112932805e-06, "loss": 0.706, "step": 918 }, { "epoch": 0.12, "grad_norm": 0.9236713359625933, "learning_rate": 9.802715651823438e-06, "loss": 0.6298, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.7318138955841155, "learning_rate": 9.802141372861628e-06, "loss": 0.5385, "step": 920 }, { "epoch": 0.12, "grad_norm": 1.0370023513206552, "learning_rate": 9.801566276145172e-06, "loss": 0.6184, "step": 921 }, { "epoch": 0.12, "grad_norm": 0.9136035452552146, "learning_rate": 9.800990361772e-06, "loss": 0.6461, "step": 922 }, { "epoch": 0.12, "grad_norm": 0.6927797318964658, "learning_rate": 9.800413629840187e-06, "loss": 0.555, "step": 923 }, { "epoch": 0.12, "grad_norm": 0.9253387844988584, "learning_rate": 9.799836080447943e-06, "loss": 0.6749, "step": 924 }, { "epoch": 0.12, "grad_norm": 0.7927433710973957, "learning_rate": 9.799257713693617e-06, "loss": 0.6099, "step": 925 }, { "epoch": 0.12, "grad_norm": 0.9807196104183729, "learning_rate": 9.798678529675702e-06, "loss": 0.6982, "step": 926 }, { "epoch": 0.12, "grad_norm": 1.0085773156376008, "learning_rate": 9.798098528492826e-06, "loss": 0.7441, "step": 927 }, { "epoch": 0.12, "grad_norm": 0.7927967848273345, "learning_rate": 9.797517710243756e-06, "loss": 0.5853, "step": 928 }, { "epoch": 0.12, "grad_norm": 1.069726899127923, "learning_rate": 9.796936075027401e-06, "loss": 0.6674, "step": 929 }, { "epoch": 0.12, "grad_norm": 0.783194424160951, "learning_rate": 9.796353622942807e-06, "loss": 0.5844, "step": 930 }, { "epoch": 0.12, "grad_norm": 0.6975517719419201, "learning_rate": 9.795770354089157e-06, "loss": 0.5501, "step": 931 }, { "epoch": 0.12, "grad_norm": 0.8645177028322643, "learning_rate": 9.79518626856578e-06, "loss": 0.6335, "step": 932 }, { "epoch": 0.12, "grad_norm": 1.0269379995644559, "learning_rate": 9.794601366472136e-06, "loss": 0.6961, "step": 933 }, { "epoch": 0.12, "grad_norm": 0.7705554334294366, "learning_rate": 9.794015647907828e-06, "loss": 0.6035, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.7594167003974919, "learning_rate": 9.7934291129726e-06, "loss": 0.6144, "step": 935 }, { "epoch": 0.12, "grad_norm": 0.7408399246519117, "learning_rate": 9.792841761766333e-06, "loss": 0.5881, "step": 936 }, { "epoch": 0.12, "grad_norm": 0.7355307957508304, "learning_rate": 9.792253594389044e-06, "loss": 0.5657, "step": 937 }, { "epoch": 0.12, "grad_norm": 0.7755297683590929, "learning_rate": 9.791664610940893e-06, "loss": 0.6467, "step": 938 }, { "epoch": 0.12, "grad_norm": 0.7664237885590591, "learning_rate": 9.79107481152218e-06, "loss": 0.533, "step": 939 }, { "epoch": 0.12, "grad_norm": 0.7168882984652725, "learning_rate": 9.790484196233336e-06, "loss": 0.584, "step": 940 }, { "epoch": 0.12, "grad_norm": 0.7241817065915962, "learning_rate": 9.789892765174944e-06, "loss": 0.6039, "step": 941 }, { "epoch": 0.12, "grad_norm": 0.8749790333098371, "learning_rate": 9.789300518447713e-06, "loss": 0.664, "step": 942 }, { "epoch": 0.12, "grad_norm": 0.7044735322261202, "learning_rate": 9.788707456152498e-06, "loss": 0.5767, "step": 943 }, { "epoch": 0.12, "grad_norm": 0.970296065815631, "learning_rate": 9.788113578390293e-06, "loss": 0.6558, "step": 944 }, { "epoch": 0.12, "grad_norm": 0.7398011612547214, "learning_rate": 9.787518885262226e-06, "loss": 0.5542, "step": 945 }, { "epoch": 0.12, "grad_norm": 0.8183708747049194, "learning_rate": 9.78692337686957e-06, "loss": 0.6625, "step": 946 }, { "epoch": 0.12, "grad_norm": 0.7834175913686896, "learning_rate": 9.786327053313733e-06, "loss": 0.5959, "step": 947 }, { "epoch": 0.12, "grad_norm": 1.0816430807169304, "learning_rate": 9.785729914696261e-06, "loss": 0.6339, "step": 948 }, { "epoch": 0.12, "grad_norm": 1.0556911246687808, "learning_rate": 9.785131961118843e-06, "loss": 0.6434, "step": 949 }, { "epoch": 0.12, "grad_norm": 0.9440275994707563, "learning_rate": 9.784533192683303e-06, "loss": 0.6687, "step": 950 }, { "epoch": 0.12, "grad_norm": 0.6913959567580054, "learning_rate": 9.783933609491605e-06, "loss": 0.5648, "step": 951 }, { "epoch": 0.12, "grad_norm": 0.9497479875551993, "learning_rate": 9.783333211645853e-06, "loss": 0.6779, "step": 952 }, { "epoch": 0.12, "grad_norm": 1.4999423740102389, "learning_rate": 9.782731999248287e-06, "loss": 0.7535, "step": 953 }, { "epoch": 0.12, "grad_norm": 0.7762702970572679, "learning_rate": 9.782129972401286e-06, "loss": 0.5445, "step": 954 }, { "epoch": 0.12, "grad_norm": 0.7175732985264717, "learning_rate": 9.781527131207372e-06, "loss": 0.5621, "step": 955 }, { "epoch": 0.12, "grad_norm": 0.8913573472220098, "learning_rate": 9.780923475769202e-06, "loss": 0.6791, "step": 956 }, { "epoch": 0.12, "grad_norm": 0.9284652814579771, "learning_rate": 9.780319006189568e-06, "loss": 0.6466, "step": 957 }, { "epoch": 0.12, "grad_norm": 0.9185713718739321, "learning_rate": 9.779713722571413e-06, "loss": 0.6671, "step": 958 }, { "epoch": 0.12, "grad_norm": 0.8139619101491286, "learning_rate": 9.779107625017802e-06, "loss": 0.5951, "step": 959 }, { "epoch": 0.12, "grad_norm": 0.8664524718030011, "learning_rate": 9.778500713631953e-06, "loss": 0.6892, "step": 960 }, { "epoch": 0.12, "grad_norm": 0.8937139509912793, "learning_rate": 9.777892988517214e-06, "loss": 0.6415, "step": 961 }, { "epoch": 0.12, "grad_norm": 0.991570211257772, "learning_rate": 9.777284449777073e-06, "loss": 0.7085, "step": 962 }, { "epoch": 0.12, "grad_norm": 0.8168177573579105, "learning_rate": 9.776675097515162e-06, "loss": 0.6058, "step": 963 }, { "epoch": 0.12, "grad_norm": 0.6871005300940596, "learning_rate": 9.776064931835246e-06, "loss": 0.5371, "step": 964 }, { "epoch": 0.12, "grad_norm": 0.8165778920391509, "learning_rate": 9.775453952841225e-06, "loss": 0.5662, "step": 965 }, { "epoch": 0.12, "grad_norm": 0.8464232421113248, "learning_rate": 9.774842160637149e-06, "loss": 0.6342, "step": 966 }, { "epoch": 0.12, "grad_norm": 0.8658763667272781, "learning_rate": 9.774229555327195e-06, "loss": 0.6706, "step": 967 }, { "epoch": 0.12, "grad_norm": 0.7637882509715292, "learning_rate": 9.773616137015687e-06, "loss": 0.5882, "step": 968 }, { "epoch": 0.12, "grad_norm": 0.734113029645848, "learning_rate": 9.77300190580708e-06, "loss": 0.5914, "step": 969 }, { "epoch": 0.12, "grad_norm": 1.4586489261626505, "learning_rate": 9.772386861805976e-06, "loss": 0.6932, "step": 970 }, { "epoch": 0.12, "grad_norm": 2.6788801684356507, "learning_rate": 9.771771005117105e-06, "loss": 0.638, "step": 971 }, { "epoch": 0.12, "grad_norm": 0.9238597025655824, "learning_rate": 9.771154335845345e-06, "loss": 0.6903, "step": 972 }, { "epoch": 0.12, "grad_norm": 0.9639763461980686, "learning_rate": 9.770536854095709e-06, "loss": 0.6939, "step": 973 }, { "epoch": 0.12, "grad_norm": 0.731914543768533, "learning_rate": 9.769918559973345e-06, "loss": 0.521, "step": 974 }, { "epoch": 0.12, "grad_norm": 0.6916666229147426, "learning_rate": 9.76929945358354e-06, "loss": 0.5847, "step": 975 }, { "epoch": 0.12, "grad_norm": 0.9747492556513749, "learning_rate": 9.768679535031727e-06, "loss": 0.7139, "step": 976 }, { "epoch": 0.12, "grad_norm": 0.9273153089722869, "learning_rate": 9.768058804423469e-06, "loss": 0.6703, "step": 977 }, { "epoch": 0.12, "grad_norm": 1.072224791794499, "learning_rate": 9.767437261864468e-06, "loss": 0.6733, "step": 978 }, { "epoch": 0.12, "grad_norm": 0.7342018129204232, "learning_rate": 9.766814907460568e-06, "loss": 0.5607, "step": 979 }, { "epoch": 0.12, "grad_norm": 0.7804168063653115, "learning_rate": 9.766191741317752e-06, "loss": 0.6068, "step": 980 }, { "epoch": 0.12, "grad_norm": 0.9542109348711877, "learning_rate": 9.765567763542134e-06, "loss": 0.6467, "step": 981 }, { "epoch": 0.13, "grad_norm": 0.9321754241138277, "learning_rate": 9.764942974239974e-06, "loss": 0.6776, "step": 982 }, { "epoch": 0.13, "grad_norm": 0.7488462299163969, "learning_rate": 9.764317373517666e-06, "loss": 0.5895, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.6984439554320233, "learning_rate": 9.763690961481744e-06, "loss": 0.553, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.770024500901702, "learning_rate": 9.76306373823888e-06, "loss": 0.5555, "step": 985 }, { "epoch": 0.13, "grad_norm": 0.9338611833990433, "learning_rate": 9.76243570389588e-06, "loss": 0.6835, "step": 986 }, { "epoch": 0.13, "grad_norm": 0.7757083981569457, "learning_rate": 9.761806858559699e-06, "loss": 0.5928, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.9439698032040154, "learning_rate": 9.761177202337414e-06, "loss": 0.7229, "step": 988 }, { "epoch": 0.13, "grad_norm": 0.8339692414501271, "learning_rate": 9.760546735336254e-06, "loss": 0.6523, "step": 989 }, { "epoch": 0.13, "grad_norm": 0.9450676285213673, "learning_rate": 9.759915457663581e-06, "loss": 0.6762, "step": 990 }, { "epoch": 0.13, "grad_norm": 0.8757514882704517, "learning_rate": 9.759283369426893e-06, "loss": 0.6528, "step": 991 }, { "epoch": 0.13, "grad_norm": 0.9245344168940118, "learning_rate": 9.758650470733829e-06, "loss": 0.6634, "step": 992 }, { "epoch": 0.13, "grad_norm": 0.9880739165135637, "learning_rate": 9.758016761692166e-06, "loss": 0.7216, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.7660910515782134, "learning_rate": 9.757382242409816e-06, "loss": 0.583, "step": 994 }, { "epoch": 0.13, "grad_norm": 1.0359208438444523, "learning_rate": 9.756746912994832e-06, "loss": 0.7472, "step": 995 }, { "epoch": 0.13, "grad_norm": 1.1092944594479166, "learning_rate": 9.756110773555404e-06, "loss": 0.718, "step": 996 }, { "epoch": 0.13, "grad_norm": 0.94668456975757, "learning_rate": 9.75547382419986e-06, "loss": 0.6427, "step": 997 }, { "epoch": 0.13, "grad_norm": 0.8017944595849557, "learning_rate": 9.754836065036669e-06, "loss": 0.5633, "step": 998 }, { "epoch": 0.13, "grad_norm": 0.9561908986980844, "learning_rate": 9.754197496174429e-06, "loss": 0.6899, "step": 999 }, { "epoch": 0.13, "grad_norm": 0.7211650157357844, "learning_rate": 9.753558117721885e-06, "loss": 0.5579, "step": 1000 }, { "epoch": 0.13, "grad_norm": 0.7897919614351709, "learning_rate": 9.752917929787915e-06, "loss": 0.5281, "step": 1001 }, { "epoch": 0.13, "grad_norm": 0.7201909321133236, "learning_rate": 9.752276932481539e-06, "loss": 0.5745, "step": 1002 }, { "epoch": 0.13, "grad_norm": 0.9483731542265567, "learning_rate": 9.751635125911907e-06, "loss": 0.6339, "step": 1003 }, { "epoch": 0.13, "grad_norm": 0.9626046220155712, "learning_rate": 9.750992510188317e-06, "loss": 0.6568, "step": 1004 }, { "epoch": 0.13, "grad_norm": 0.7637031277927749, "learning_rate": 9.7503490854202e-06, "loss": 0.5791, "step": 1005 }, { "epoch": 0.13, "grad_norm": 0.976385085270203, "learning_rate": 9.74970485171712e-06, "loss": 0.6398, "step": 1006 }, { "epoch": 0.13, "grad_norm": 0.8391172780535088, "learning_rate": 9.749059809188785e-06, "loss": 0.5802, "step": 1007 }, { "epoch": 0.13, "grad_norm": 0.9220323658444436, "learning_rate": 9.748413957945043e-06, "loss": 0.6955, "step": 1008 }, { "epoch": 0.13, "grad_norm": 0.9578984673934916, "learning_rate": 9.74776729809587e-06, "loss": 0.6702, "step": 1009 }, { "epoch": 0.13, "grad_norm": 0.7191793711644885, "learning_rate": 9.747119829751389e-06, "loss": 0.5447, "step": 1010 }, { "epoch": 0.13, "grad_norm": 0.7480485905936854, "learning_rate": 9.746471553021856e-06, "loss": 0.573, "step": 1011 }, { "epoch": 0.13, "grad_norm": 0.8264376770169287, "learning_rate": 9.745822468017665e-06, "loss": 0.6534, "step": 1012 }, { "epoch": 0.13, "grad_norm": 0.9759465547374736, "learning_rate": 9.745172574849351e-06, "loss": 0.6313, "step": 1013 }, { "epoch": 0.13, "grad_norm": 1.1134486251567228, "learning_rate": 9.744521873627582e-06, "loss": 0.6856, "step": 1014 }, { "epoch": 0.13, "grad_norm": 1.1389548492991892, "learning_rate": 9.743870364463166e-06, "loss": 0.6529, "step": 1015 }, { "epoch": 0.13, "grad_norm": 0.882265415250145, "learning_rate": 9.743218047467046e-06, "loss": 0.6679, "step": 1016 }, { "epoch": 0.13, "grad_norm": 0.8827573084123569, "learning_rate": 9.742564922750309e-06, "loss": 0.6371, "step": 1017 }, { "epoch": 0.13, "grad_norm": 0.6788938172861039, "learning_rate": 9.741910990424173e-06, "loss": 0.5494, "step": 1018 }, { "epoch": 0.13, "grad_norm": 1.0172654869506512, "learning_rate": 9.741256250599997e-06, "loss": 0.6905, "step": 1019 }, { "epoch": 0.13, "grad_norm": 0.9582774285962188, "learning_rate": 9.740600703389274e-06, "loss": 0.7014, "step": 1020 }, { "epoch": 0.13, "grad_norm": 1.1862352499822466, "learning_rate": 9.73994434890364e-06, "loss": 0.7549, "step": 1021 }, { "epoch": 0.13, "grad_norm": 0.6761142935246048, "learning_rate": 9.739287187254864e-06, "loss": 0.5652, "step": 1022 }, { "epoch": 0.13, "grad_norm": 0.9980643274991466, "learning_rate": 9.73862921855485e-06, "loss": 0.6802, "step": 1023 }, { "epoch": 0.13, "grad_norm": 0.7314237102873798, "learning_rate": 9.737970442915653e-06, "loss": 0.6126, "step": 1024 }, { "epoch": 0.13, "grad_norm": 0.7040482478083115, "learning_rate": 9.737310860449444e-06, "loss": 0.5493, "step": 1025 }, { "epoch": 0.13, "grad_norm": 0.8643812791877247, "learning_rate": 9.73665047126855e-06, "loss": 0.6013, "step": 1026 }, { "epoch": 0.13, "grad_norm": 0.9441418949555234, "learning_rate": 9.735989275485428e-06, "loss": 0.6912, "step": 1027 }, { "epoch": 0.13, "grad_norm": 1.0338277339784687, "learning_rate": 9.73532727321267e-06, "loss": 0.6561, "step": 1028 }, { "epoch": 0.13, "grad_norm": 0.9062938706512511, "learning_rate": 9.734664464563011e-06, "loss": 0.6404, "step": 1029 }, { "epoch": 0.13, "grad_norm": 0.7982633464234161, "learning_rate": 9.734000849649317e-06, "loss": 0.5534, "step": 1030 }, { "epoch": 0.13, "grad_norm": 0.8492831011762463, "learning_rate": 9.7333364285846e-06, "loss": 0.6233, "step": 1031 }, { "epoch": 0.13, "grad_norm": 1.1000241608977386, "learning_rate": 9.732671201481997e-06, "loss": 0.6886, "step": 1032 }, { "epoch": 0.13, "grad_norm": 0.7073212107498936, "learning_rate": 9.732005168454795e-06, "loss": 0.5611, "step": 1033 }, { "epoch": 0.13, "grad_norm": 1.0494888540451057, "learning_rate": 9.73133832961641e-06, "loss": 0.6855, "step": 1034 }, { "epoch": 0.13, "grad_norm": 0.781161144930962, "learning_rate": 9.7306706850804e-06, "loss": 0.5918, "step": 1035 }, { "epoch": 0.13, "grad_norm": 0.9902122493389596, "learning_rate": 9.730002234960453e-06, "loss": 0.6479, "step": 1036 }, { "epoch": 0.13, "grad_norm": 0.737029365030554, "learning_rate": 9.729332979370405e-06, "loss": 0.6123, "step": 1037 }, { "epoch": 0.13, "grad_norm": 0.8339284811106819, "learning_rate": 9.728662918424218e-06, "loss": 0.672, "step": 1038 }, { "epoch": 0.13, "grad_norm": 0.8840243708359882, "learning_rate": 9.727992052235999e-06, "loss": 0.6238, "step": 1039 }, { "epoch": 0.13, "grad_norm": 0.7438545594674684, "learning_rate": 9.727320380919992e-06, "loss": 0.5543, "step": 1040 }, { "epoch": 0.13, "grad_norm": 0.7579648842249721, "learning_rate": 9.726647904590572e-06, "loss": 0.5899, "step": 1041 }, { "epoch": 0.13, "grad_norm": 0.8268839444898627, "learning_rate": 9.725974623362254e-06, "loss": 0.6403, "step": 1042 }, { "epoch": 0.13, "grad_norm": 0.8628467212515322, "learning_rate": 9.725300537349694e-06, "loss": 0.6215, "step": 1043 }, { "epoch": 0.13, "grad_norm": 0.9261329941212265, "learning_rate": 9.72462564666768e-06, "loss": 0.7053, "step": 1044 }, { "epoch": 0.13, "grad_norm": 0.7392891572898306, "learning_rate": 9.723949951431138e-06, "loss": 0.6042, "step": 1045 }, { "epoch": 0.13, "grad_norm": 0.9499640161631635, "learning_rate": 9.723273451755136e-06, "loss": 0.7164, "step": 1046 }, { "epoch": 0.13, "grad_norm": 1.0904090502528248, "learning_rate": 9.722596147754869e-06, "loss": 0.6382, "step": 1047 }, { "epoch": 0.13, "grad_norm": 0.7117735979327462, "learning_rate": 9.72191803954568e-06, "loss": 0.5743, "step": 1048 }, { "epoch": 0.13, "grad_norm": 1.0810130601145984, "learning_rate": 9.721239127243039e-06, "loss": 0.7218, "step": 1049 }, { "epoch": 0.13, "grad_norm": 1.0109747313607806, "learning_rate": 9.720559410962562e-06, "loss": 0.5746, "step": 1050 }, { "epoch": 0.13, "grad_norm": 0.7235026809880478, "learning_rate": 9.719878890819993e-06, "loss": 0.5635, "step": 1051 }, { "epoch": 0.13, "grad_norm": 0.6637332522030321, "learning_rate": 9.719197566931224e-06, "loss": 0.5365, "step": 1052 }, { "epoch": 0.13, "grad_norm": 0.7273680135982696, "learning_rate": 9.718515439412271e-06, "loss": 0.5412, "step": 1053 }, { "epoch": 0.13, "grad_norm": 0.9994433539312929, "learning_rate": 9.717832508379296e-06, "loss": 0.7267, "step": 1054 }, { "epoch": 0.13, "grad_norm": 0.7587576190483052, "learning_rate": 9.717148773948597e-06, "loss": 0.6008, "step": 1055 }, { "epoch": 0.13, "grad_norm": 0.6657828765042793, "learning_rate": 9.716464236236602e-06, "loss": 0.561, "step": 1056 }, { "epoch": 0.13, "grad_norm": 0.9822613120832019, "learning_rate": 9.715778895359886e-06, "loss": 0.721, "step": 1057 }, { "epoch": 0.13, "grad_norm": 0.913418529960109, "learning_rate": 9.715092751435151e-06, "loss": 0.6781, "step": 1058 }, { "epoch": 0.13, "grad_norm": 1.197345944173032, "learning_rate": 9.714405804579241e-06, "loss": 0.6978, "step": 1059 }, { "epoch": 0.14, "grad_norm": 0.8180931306214322, "learning_rate": 9.713718054909139e-06, "loss": 0.5931, "step": 1060 }, { "epoch": 0.14, "grad_norm": 0.8300969100386351, "learning_rate": 9.713029502541957e-06, "loss": 0.5673, "step": 1061 }, { "epoch": 0.14, "grad_norm": 0.8834312789834103, "learning_rate": 9.712340147594954e-06, "loss": 0.6409, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.8853772685576812, "learning_rate": 9.711649990185514e-06, "loss": 0.667, "step": 1063 }, { "epoch": 0.14, "grad_norm": 0.7104042442121519, "learning_rate": 9.710959030431167e-06, "loss": 0.533, "step": 1064 }, { "epoch": 0.14, "grad_norm": 0.7145865737843454, "learning_rate": 9.710267268449577e-06, "loss": 0.5199, "step": 1065 }, { "epoch": 0.14, "grad_norm": 0.776888570192149, "learning_rate": 9.70957470435854e-06, "loss": 0.5413, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.7498830641191208, "learning_rate": 9.708881338275995e-06, "loss": 0.617, "step": 1067 }, { "epoch": 0.14, "grad_norm": 0.8598604092731292, "learning_rate": 9.708187170320015e-06, "loss": 0.6306, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.956591081146534, "learning_rate": 9.707492200608811e-06, "loss": 0.68, "step": 1069 }, { "epoch": 0.14, "grad_norm": 0.7254653266397849, "learning_rate": 9.706796429260728e-06, "loss": 0.599, "step": 1070 }, { "epoch": 0.14, "grad_norm": 0.7433384368002686, "learning_rate": 9.706099856394246e-06, "loss": 0.5751, "step": 1071 }, { "epoch": 0.14, "grad_norm": 1.0059953111056341, "learning_rate": 9.705402482127986e-06, "loss": 0.5667, "step": 1072 }, { "epoch": 0.14, "grad_norm": 0.7208744575145551, "learning_rate": 9.704704306580705e-06, "loss": 0.4906, "step": 1073 }, { "epoch": 0.14, "grad_norm": 0.8653827368334114, "learning_rate": 9.704005329871295e-06, "loss": 0.6629, "step": 1074 }, { "epoch": 0.14, "grad_norm": 0.7497916614217609, "learning_rate": 9.703305552118782e-06, "loss": 0.5831, "step": 1075 }, { "epoch": 0.14, "grad_norm": 0.88776582778491, "learning_rate": 9.702604973442332e-06, "loss": 0.6538, "step": 1076 }, { "epoch": 0.14, "grad_norm": 0.9699490968295508, "learning_rate": 9.701903593961245e-06, "loss": 0.6528, "step": 1077 }, { "epoch": 0.14, "grad_norm": 0.7545594653859838, "learning_rate": 9.701201413794964e-06, "loss": 0.5888, "step": 1078 }, { "epoch": 0.14, "grad_norm": 0.7820740378328453, "learning_rate": 9.700498433063056e-06, "loss": 0.5955, "step": 1079 }, { "epoch": 0.14, "grad_norm": 0.8793005282937691, "learning_rate": 9.699794651885235e-06, "loss": 0.6599, "step": 1080 }, { "epoch": 0.14, "grad_norm": 0.924265123805297, "learning_rate": 9.699090070381348e-06, "loss": 0.5911, "step": 1081 }, { "epoch": 0.14, "grad_norm": 1.0259195837887995, "learning_rate": 9.698384688671374e-06, "loss": 0.685, "step": 1082 }, { "epoch": 0.14, "grad_norm": 0.8459164126899845, "learning_rate": 9.697678506875436e-06, "loss": 0.585, "step": 1083 }, { "epoch": 0.14, "grad_norm": 0.7689786376811757, "learning_rate": 9.696971525113789e-06, "loss": 0.5344, "step": 1084 }, { "epoch": 0.14, "grad_norm": 1.4437406001675164, "learning_rate": 9.696263743506825e-06, "loss": 0.6077, "step": 1085 }, { "epoch": 0.14, "grad_norm": 0.8605546054943277, "learning_rate": 9.695555162175069e-06, "loss": 0.5968, "step": 1086 }, { "epoch": 0.14, "grad_norm": 0.7742404794889682, "learning_rate": 9.694845781239188e-06, "loss": 0.6106, "step": 1087 }, { "epoch": 0.14, "grad_norm": 0.8577012986494054, "learning_rate": 9.69413560081998e-06, "loss": 0.5776, "step": 1088 }, { "epoch": 0.14, "grad_norm": 1.0295973009575785, "learning_rate": 9.693424621038384e-06, "loss": 0.7274, "step": 1089 }, { "epoch": 0.14, "grad_norm": 0.6782095924667602, "learning_rate": 9.692712842015469e-06, "loss": 0.517, "step": 1090 }, { "epoch": 0.14, "grad_norm": 0.7516306029792222, "learning_rate": 9.692000263872445e-06, "loss": 0.5644, "step": 1091 }, { "epoch": 0.14, "grad_norm": 0.9706041776408542, "learning_rate": 9.691286886730657e-06, "loss": 0.7394, "step": 1092 }, { "epoch": 0.14, "grad_norm": 1.06954227014014, "learning_rate": 9.690572710711588e-06, "loss": 0.7047, "step": 1093 }, { "epoch": 0.14, "grad_norm": 0.9821778866755354, "learning_rate": 9.689857735936849e-06, "loss": 0.628, "step": 1094 }, { "epoch": 0.14, "grad_norm": 0.6886707091719464, "learning_rate": 9.6891419625282e-06, "loss": 0.5254, "step": 1095 }, { "epoch": 0.14, "grad_norm": 0.7397853229300382, "learning_rate": 9.688425390607523e-06, "loss": 0.5499, "step": 1096 }, { "epoch": 0.14, "grad_norm": 0.8823471648877111, "learning_rate": 9.687708020296846e-06, "loss": 0.5849, "step": 1097 }, { "epoch": 0.14, "grad_norm": 0.9308471678744554, "learning_rate": 9.68698985171833e-06, "loss": 0.6343, "step": 1098 }, { "epoch": 0.14, "grad_norm": 1.0676986761338185, "learning_rate": 9.686270884994272e-06, "loss": 0.6127, "step": 1099 }, { "epoch": 0.14, "grad_norm": 0.969104487868543, "learning_rate": 9.685551120247103e-06, "loss": 0.6349, "step": 1100 }, { "epoch": 0.14, "grad_norm": 1.1454085753369472, "learning_rate": 9.68483055759939e-06, "loss": 0.6521, "step": 1101 }, { "epoch": 0.14, "grad_norm": 0.953063094752734, "learning_rate": 9.684109197173841e-06, "loss": 0.6181, "step": 1102 }, { "epoch": 0.14, "grad_norm": 0.8904523114796692, "learning_rate": 9.683387039093295e-06, "loss": 0.6284, "step": 1103 }, { "epoch": 0.14, "grad_norm": 0.8860311215614417, "learning_rate": 9.682664083480726e-06, "loss": 0.6905, "step": 1104 }, { "epoch": 0.14, "grad_norm": 0.7844583441293042, "learning_rate": 9.681940330459248e-06, "loss": 0.5937, "step": 1105 }, { "epoch": 0.14, "grad_norm": 0.8099289491219696, "learning_rate": 9.681215780152107e-06, "loss": 0.6305, "step": 1106 }, { "epoch": 0.14, "grad_norm": 0.6478141978785894, "learning_rate": 9.680490432682687e-06, "loss": 0.5326, "step": 1107 }, { "epoch": 0.14, "grad_norm": 0.9145285861436038, "learning_rate": 9.679764288174508e-06, "loss": 0.6394, "step": 1108 }, { "epoch": 0.14, "grad_norm": 0.6784913547196753, "learning_rate": 9.679037346751226e-06, "loss": 0.5186, "step": 1109 }, { "epoch": 0.14, "grad_norm": 0.8815448592817704, "learning_rate": 9.678309608536626e-06, "loss": 0.5811, "step": 1110 }, { "epoch": 0.14, "grad_norm": 0.7106822261694445, "learning_rate": 9.677581073654641e-06, "loss": 0.5921, "step": 1111 }, { "epoch": 0.14, "grad_norm": 0.9254064954950437, "learning_rate": 9.676851742229328e-06, "loss": 0.656, "step": 1112 }, { "epoch": 0.14, "grad_norm": 0.7159298742841554, "learning_rate": 9.676121614384889e-06, "loss": 0.562, "step": 1113 }, { "epoch": 0.14, "grad_norm": 0.9774645473746579, "learning_rate": 9.675390690245651e-06, "loss": 0.6846, "step": 1114 }, { "epoch": 0.14, "grad_norm": 0.7475468147852461, "learning_rate": 9.674658969936089e-06, "loss": 0.6124, "step": 1115 }, { "epoch": 0.14, "grad_norm": 1.0075918023127493, "learning_rate": 9.673926453580802e-06, "loss": 0.6877, "step": 1116 }, { "epoch": 0.14, "grad_norm": 0.7062696716317111, "learning_rate": 9.673193141304535e-06, "loss": 0.5099, "step": 1117 }, { "epoch": 0.14, "grad_norm": 1.0169237664621988, "learning_rate": 9.67245903323216e-06, "loss": 0.6672, "step": 1118 }, { "epoch": 0.14, "grad_norm": 0.6701822605912964, "learning_rate": 9.67172412948869e-06, "loss": 0.576, "step": 1119 }, { "epoch": 0.14, "grad_norm": 0.8420563298436072, "learning_rate": 9.670988430199269e-06, "loss": 0.6324, "step": 1120 }, { "epoch": 0.14, "grad_norm": 0.7481025134684111, "learning_rate": 9.67025193548918e-06, "loss": 0.5686, "step": 1121 }, { "epoch": 0.14, "grad_norm": 0.673254264784351, "learning_rate": 9.669514645483842e-06, "loss": 0.5544, "step": 1122 }, { "epoch": 0.14, "grad_norm": 1.0703485066815706, "learning_rate": 9.668776560308806e-06, "loss": 0.6644, "step": 1123 }, { "epoch": 0.14, "grad_norm": 0.6549672369818021, "learning_rate": 9.66803768008976e-06, "loss": 0.5379, "step": 1124 }, { "epoch": 0.14, "grad_norm": 0.6630891429727088, "learning_rate": 9.66729800495253e-06, "loss": 0.5412, "step": 1125 }, { "epoch": 0.14, "grad_norm": 0.9211641810654386, "learning_rate": 9.666557535023073e-06, "loss": 0.6177, "step": 1126 }, { "epoch": 0.14, "grad_norm": 1.2112562188855034, "learning_rate": 9.665816270427483e-06, "loss": 0.6545, "step": 1127 }, { "epoch": 0.14, "grad_norm": 0.996255440801724, "learning_rate": 9.66507421129199e-06, "loss": 0.667, "step": 1128 }, { "epoch": 0.14, "grad_norm": 0.7445179415592746, "learning_rate": 9.66433135774296e-06, "loss": 0.5892, "step": 1129 }, { "epoch": 0.14, "grad_norm": 0.8526749575390422, "learning_rate": 9.663587709906891e-06, "loss": 0.5533, "step": 1130 }, { "epoch": 0.14, "grad_norm": 0.7790559722425667, "learning_rate": 9.662843267910422e-06, "loss": 0.6401, "step": 1131 }, { "epoch": 0.14, "grad_norm": 1.0261150291855647, "learning_rate": 9.662098031880321e-06, "loss": 0.703, "step": 1132 }, { "epoch": 0.14, "grad_norm": 0.7651288290761387, "learning_rate": 9.661352001943494e-06, "loss": 0.5477, "step": 1133 }, { "epoch": 0.14, "grad_norm": 0.9821048154097823, "learning_rate": 9.660605178226983e-06, "loss": 0.7015, "step": 1134 }, { "epoch": 0.14, "grad_norm": 0.6986033135320731, "learning_rate": 9.659857560857963e-06, "loss": 0.5451, "step": 1135 }, { "epoch": 0.14, "grad_norm": 0.8463580272517893, "learning_rate": 9.659109149963748e-06, "loss": 0.5994, "step": 1136 }, { "epoch": 0.14, "grad_norm": 0.8270293495072578, "learning_rate": 9.658359945671782e-06, "loss": 0.6094, "step": 1137 }, { "epoch": 0.14, "grad_norm": 0.6989255274229249, "learning_rate": 9.657609948109648e-06, "loss": 0.5407, "step": 1138 }, { "epoch": 0.15, "grad_norm": 0.9548477427367056, "learning_rate": 9.656859157405063e-06, "loss": 0.6926, "step": 1139 }, { "epoch": 0.15, "grad_norm": 1.054333808008999, "learning_rate": 9.656107573685877e-06, "loss": 0.6763, "step": 1140 }, { "epoch": 0.15, "grad_norm": 0.9114501941058868, "learning_rate": 9.655355197080078e-06, "loss": 0.6377, "step": 1141 }, { "epoch": 0.15, "grad_norm": 0.9065384295139649, "learning_rate": 9.65460202771579e-06, "loss": 0.6585, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.8744252898688392, "learning_rate": 9.653848065721268e-06, "loss": 0.5465, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.8317512666969036, "learning_rate": 9.6530933112249e-06, "loss": 0.5598, "step": 1144 }, { "epoch": 0.15, "grad_norm": 0.7442850105164438, "learning_rate": 9.65233776435522e-06, "loss": 0.5808, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.8213509926674526, "learning_rate": 9.651581425240888e-06, "loss": 0.6625, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.8414158128669929, "learning_rate": 9.650824294010697e-06, "loss": 0.618, "step": 1147 }, { "epoch": 0.15, "grad_norm": 0.999969920526632, "learning_rate": 9.65006637079358e-06, "loss": 0.5781, "step": 1148 }, { "epoch": 0.15, "grad_norm": 0.842766238559155, "learning_rate": 9.649307655718605e-06, "loss": 0.5731, "step": 1149 }, { "epoch": 0.15, "grad_norm": 0.9281001688366916, "learning_rate": 9.64854814891497e-06, "loss": 0.619, "step": 1150 }, { "epoch": 0.15, "grad_norm": 0.6950859861173151, "learning_rate": 9.647787850512017e-06, "loss": 0.5583, "step": 1151 }, { "epoch": 0.15, "grad_norm": 1.035776547415945, "learning_rate": 9.647026760639212e-06, "loss": 0.7074, "step": 1152 }, { "epoch": 0.15, "grad_norm": 0.6491145986864243, "learning_rate": 9.646264879426161e-06, "loss": 0.5103, "step": 1153 }, { "epoch": 0.15, "grad_norm": 1.125381227607009, "learning_rate": 9.645502207002607e-06, "loss": 0.5963, "step": 1154 }, { "epoch": 0.15, "grad_norm": 0.9732219064604967, "learning_rate": 9.644738743498423e-06, "loss": 0.7124, "step": 1155 }, { "epoch": 0.15, "grad_norm": 0.8424016143749036, "learning_rate": 9.64397448904362e-06, "loss": 0.5485, "step": 1156 }, { "epoch": 0.15, "grad_norm": 0.8702349203298858, "learning_rate": 9.643209443768343e-06, "loss": 0.6251, "step": 1157 }, { "epoch": 0.15, "grad_norm": 1.0510521108240087, "learning_rate": 9.642443607802867e-06, "loss": 0.6843, "step": 1158 }, { "epoch": 0.15, "grad_norm": 0.8156081835652739, "learning_rate": 9.641676981277612e-06, "loss": 0.5847, "step": 1159 }, { "epoch": 0.15, "grad_norm": 0.8363072598497219, "learning_rate": 9.640909564323123e-06, "loss": 0.5835, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.7463112659389821, "learning_rate": 9.640141357070085e-06, "loss": 0.5928, "step": 1161 }, { "epoch": 0.15, "grad_norm": 0.8886616963084485, "learning_rate": 9.639372359649313e-06, "loss": 0.6857, "step": 1162 }, { "epoch": 0.15, "grad_norm": 0.8355239815009524, "learning_rate": 9.63860257219176e-06, "loss": 0.6026, "step": 1163 }, { "epoch": 0.15, "grad_norm": 0.6925469113905485, "learning_rate": 9.637831994828515e-06, "loss": 0.5464, "step": 1164 }, { "epoch": 0.15, "grad_norm": 0.7175132947005278, "learning_rate": 9.637060627690798e-06, "loss": 0.593, "step": 1165 }, { "epoch": 0.15, "grad_norm": 0.8518135673261236, "learning_rate": 9.636288470909964e-06, "loss": 0.6057, "step": 1166 }, { "epoch": 0.15, "grad_norm": 0.7355591355569097, "learning_rate": 9.635515524617505e-06, "loss": 0.5536, "step": 1167 }, { "epoch": 0.15, "grad_norm": 1.1557573645824277, "learning_rate": 9.634741788945044e-06, "loss": 0.6778, "step": 1168 }, { "epoch": 0.15, "grad_norm": 0.9569740068844284, "learning_rate": 9.63396726402434e-06, "loss": 0.6761, "step": 1169 }, { "epoch": 0.15, "grad_norm": 0.7362876019275366, "learning_rate": 9.633191949987288e-06, "loss": 0.5451, "step": 1170 }, { "epoch": 0.15, "grad_norm": 0.7998783587460482, "learning_rate": 9.632415846965915e-06, "loss": 0.5286, "step": 1171 }, { "epoch": 0.15, "grad_norm": 1.0378395289191231, "learning_rate": 9.631638955092383e-06, "loss": 0.7157, "step": 1172 }, { "epoch": 0.15, "grad_norm": 0.8621606932984904, "learning_rate": 9.630861274498988e-06, "loss": 0.64, "step": 1173 }, { "epoch": 0.15, "grad_norm": 0.934780351828164, "learning_rate": 9.630082805318164e-06, "loss": 0.6658, "step": 1174 }, { "epoch": 0.15, "grad_norm": 0.7837739866398066, "learning_rate": 9.629303547682474e-06, "loss": 0.5764, "step": 1175 }, { "epoch": 0.15, "grad_norm": 0.7628641187468533, "learning_rate": 9.628523501724615e-06, "loss": 0.5772, "step": 1176 }, { "epoch": 0.15, "grad_norm": 0.884087789020235, "learning_rate": 9.627742667577426e-06, "loss": 0.6771, "step": 1177 }, { "epoch": 0.15, "grad_norm": 0.9218733121321974, "learning_rate": 9.626961045373872e-06, "loss": 0.6625, "step": 1178 }, { "epoch": 0.15, "grad_norm": 0.7472406617081606, "learning_rate": 9.626178635247054e-06, "loss": 0.5881, "step": 1179 }, { "epoch": 0.15, "grad_norm": 0.8232236242235816, "learning_rate": 9.625395437330211e-06, "loss": 0.5835, "step": 1180 }, { "epoch": 0.15, "grad_norm": 0.6886164365483731, "learning_rate": 9.62461145175671e-06, "loss": 0.5571, "step": 1181 }, { "epoch": 0.15, "grad_norm": 0.7064570403578206, "learning_rate": 9.62382667866006e-06, "loss": 0.5474, "step": 1182 }, { "epoch": 0.15, "grad_norm": 0.869138090757438, "learning_rate": 9.623041118173896e-06, "loss": 0.6563, "step": 1183 }, { "epoch": 0.15, "grad_norm": 0.6795762825107204, "learning_rate": 9.622254770431994e-06, "loss": 0.5333, "step": 1184 }, { "epoch": 0.15, "grad_norm": 0.7212851449776427, "learning_rate": 9.621467635568255e-06, "loss": 0.6618, "step": 1185 }, { "epoch": 0.15, "grad_norm": 0.7538214840226601, "learning_rate": 9.620679713716728e-06, "loss": 0.5698, "step": 1186 }, { "epoch": 0.15, "grad_norm": 0.6892806362148062, "learning_rate": 9.619891005011582e-06, "loss": 0.6479, "step": 1187 }, { "epoch": 0.15, "grad_norm": 0.9848723775223823, "learning_rate": 9.619101509587125e-06, "loss": 0.7322, "step": 1188 }, { "epoch": 0.15, "grad_norm": 0.9732895235932517, "learning_rate": 9.618311227577806e-06, "loss": 0.6589, "step": 1189 }, { "epoch": 0.15, "grad_norm": 1.0724050384550887, "learning_rate": 9.617520159118197e-06, "loss": 0.6994, "step": 1190 }, { "epoch": 0.15, "grad_norm": 1.0181995790334541, "learning_rate": 9.616728304343008e-06, "loss": 0.7214, "step": 1191 }, { "epoch": 0.15, "grad_norm": 0.7228228092266846, "learning_rate": 9.615935663387088e-06, "loss": 0.5514, "step": 1192 }, { "epoch": 0.15, "grad_norm": 0.7450269701663594, "learning_rate": 9.615142236385412e-06, "loss": 0.5309, "step": 1193 }, { "epoch": 0.15, "grad_norm": 1.3238983007013911, "learning_rate": 9.614348023473094e-06, "loss": 0.6923, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.9288096349008675, "learning_rate": 9.613553024785379e-06, "loss": 0.6487, "step": 1195 }, { "epoch": 0.15, "grad_norm": 1.208575480176333, "learning_rate": 9.612757240457646e-06, "loss": 0.6699, "step": 1196 }, { "epoch": 0.15, "grad_norm": 0.8757025919147509, "learning_rate": 9.611960670625413e-06, "loss": 0.6494, "step": 1197 }, { "epoch": 0.15, "grad_norm": 0.8056622177242443, "learning_rate": 9.611163315424323e-06, "loss": 0.5831, "step": 1198 }, { "epoch": 0.15, "grad_norm": 0.8641291452915447, "learning_rate": 9.610365174990159e-06, "loss": 0.6328, "step": 1199 }, { "epoch": 0.15, "grad_norm": 0.7917733455120687, "learning_rate": 9.609566249458837e-06, "loss": 0.5737, "step": 1200 }, { "epoch": 0.15, "grad_norm": 0.9591997879783773, "learning_rate": 9.608766538966406e-06, "loss": 0.7323, "step": 1201 }, { "epoch": 0.15, "grad_norm": 0.750861223364628, "learning_rate": 9.607966043649047e-06, "loss": 0.5482, "step": 1202 }, { "epoch": 0.15, "grad_norm": 0.7101822686666331, "learning_rate": 9.607164763643074e-06, "loss": 0.5283, "step": 1203 }, { "epoch": 0.15, "grad_norm": 0.6666417525300337, "learning_rate": 9.606362699084939e-06, "loss": 0.5455, "step": 1204 }, { "epoch": 0.15, "grad_norm": 0.7032687074743561, "learning_rate": 9.605559850111227e-06, "loss": 0.551, "step": 1205 }, { "epoch": 0.15, "grad_norm": 0.7607037512055422, "learning_rate": 9.604756216858652e-06, "loss": 0.6034, "step": 1206 }, { "epoch": 0.15, "grad_norm": 0.7806540494720198, "learning_rate": 9.603951799464066e-06, "loss": 0.6233, "step": 1207 }, { "epoch": 0.15, "grad_norm": 1.0171141749292825, "learning_rate": 9.60314659806445e-06, "loss": 0.7097, "step": 1208 }, { "epoch": 0.15, "grad_norm": 0.6747970315407621, "learning_rate": 9.602340612796925e-06, "loss": 0.5566, "step": 1209 }, { "epoch": 0.15, "grad_norm": 0.758521470820783, "learning_rate": 9.60153384379874e-06, "loss": 0.6382, "step": 1210 }, { "epoch": 0.15, "grad_norm": 0.7822713992315741, "learning_rate": 9.60072629120728e-06, "loss": 0.5669, "step": 1211 }, { "epoch": 0.15, "grad_norm": 0.7808871287274356, "learning_rate": 9.599917955160065e-06, "loss": 0.6584, "step": 1212 }, { "epoch": 0.15, "grad_norm": 0.8656125058638835, "learning_rate": 9.599108835794742e-06, "loss": 0.6772, "step": 1213 }, { "epoch": 0.15, "grad_norm": 0.9191812405082243, "learning_rate": 9.598298933249096e-06, "loss": 0.6139, "step": 1214 }, { "epoch": 0.15, "grad_norm": 0.8571340533160847, "learning_rate": 9.597488247661048e-06, "loss": 0.6567, "step": 1215 }, { "epoch": 0.15, "grad_norm": 0.8026308537690293, "learning_rate": 9.596676779168647e-06, "loss": 0.6198, "step": 1216 }, { "epoch": 0.16, "grad_norm": 0.8233099851703983, "learning_rate": 9.59586452791008e-06, "loss": 0.6195, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.9362317565985335, "learning_rate": 9.59505149402366e-06, "loss": 0.7417, "step": 1218 }, { "epoch": 0.16, "grad_norm": 0.7583445333257788, "learning_rate": 9.594237677647844e-06, "loss": 0.6168, "step": 1219 }, { "epoch": 0.16, "grad_norm": 1.1582105541248802, "learning_rate": 9.593423078921214e-06, "loss": 0.6568, "step": 1220 }, { "epoch": 0.16, "grad_norm": 0.7023939603418193, "learning_rate": 9.592607697982485e-06, "loss": 0.5315, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.6786928111652569, "learning_rate": 9.591791534970514e-06, "loss": 0.5904, "step": 1222 }, { "epoch": 0.16, "grad_norm": 0.9867821033906398, "learning_rate": 9.59097459002428e-06, "loss": 0.6369, "step": 1223 }, { "epoch": 0.16, "grad_norm": 0.8624348907891688, "learning_rate": 9.590156863282901e-06, "loss": 0.6836, "step": 1224 }, { "epoch": 0.16, "grad_norm": 0.641528820287354, "learning_rate": 9.58933835488563e-06, "loss": 0.552, "step": 1225 }, { "epoch": 0.16, "grad_norm": 0.7724136707887936, "learning_rate": 9.588519064971847e-06, "loss": 0.6094, "step": 1226 }, { "epoch": 0.16, "grad_norm": 0.7523022956947315, "learning_rate": 9.58769899368107e-06, "loss": 0.5516, "step": 1227 }, { "epoch": 0.16, "grad_norm": 0.7636949449716391, "learning_rate": 9.58687814115295e-06, "loss": 0.5556, "step": 1228 }, { "epoch": 0.16, "grad_norm": 0.747497773473688, "learning_rate": 9.586056507527266e-06, "loss": 0.5844, "step": 1229 }, { "epoch": 0.16, "grad_norm": 0.8793282485212105, "learning_rate": 9.585234092943938e-06, "loss": 0.6807, "step": 1230 }, { "epoch": 0.16, "grad_norm": 0.7666083980324859, "learning_rate": 9.584410897543011e-06, "loss": 0.5529, "step": 1231 }, { "epoch": 0.16, "grad_norm": 0.8713840967095233, "learning_rate": 9.583586921464669e-06, "loss": 0.6026, "step": 1232 }, { "epoch": 0.16, "grad_norm": 0.9035009561916743, "learning_rate": 9.582762164849227e-06, "loss": 0.6771, "step": 1233 }, { "epoch": 0.16, "grad_norm": 0.7725721359950758, "learning_rate": 9.58193662783713e-06, "loss": 0.5566, "step": 1234 }, { "epoch": 0.16, "grad_norm": 0.7362983873862653, "learning_rate": 9.58111031056896e-06, "loss": 0.5788, "step": 1235 }, { "epoch": 0.16, "grad_norm": 0.8966396035795065, "learning_rate": 9.580283213185429e-06, "loss": 0.6745, "step": 1236 }, { "epoch": 0.16, "grad_norm": 0.9777643020445494, "learning_rate": 9.579455335827383e-06, "loss": 0.6537, "step": 1237 }, { "epoch": 0.16, "grad_norm": 0.770767692946826, "learning_rate": 9.578626678635802e-06, "loss": 0.564, "step": 1238 }, { "epoch": 0.16, "grad_norm": 0.8181996530156633, "learning_rate": 9.577797241751799e-06, "loss": 0.5622, "step": 1239 }, { "epoch": 0.16, "grad_norm": 0.8244602581987415, "learning_rate": 9.576967025316617e-06, "loss": 0.6193, "step": 1240 }, { "epoch": 0.16, "grad_norm": 0.8935014812877475, "learning_rate": 9.576136029471632e-06, "loss": 0.6623, "step": 1241 }, { "epoch": 0.16, "grad_norm": 0.8503335464566736, "learning_rate": 9.575304254358355e-06, "loss": 0.5882, "step": 1242 }, { "epoch": 0.16, "grad_norm": 0.783896665510255, "learning_rate": 9.57447170011843e-06, "loss": 0.5873, "step": 1243 }, { "epoch": 0.16, "grad_norm": 1.0204014653181868, "learning_rate": 9.573638366893629e-06, "loss": 0.7158, "step": 1244 }, { "epoch": 0.16, "grad_norm": 0.8902354987488832, "learning_rate": 9.572804254825863e-06, "loss": 0.6518, "step": 1245 }, { "epoch": 0.16, "grad_norm": 1.2378426385612038, "learning_rate": 9.571969364057172e-06, "loss": 0.7121, "step": 1246 }, { "epoch": 0.16, "grad_norm": 0.702352719161442, "learning_rate": 9.57113369472973e-06, "loss": 0.5464, "step": 1247 }, { "epoch": 0.16, "grad_norm": 0.7896264029247491, "learning_rate": 9.570297246985838e-06, "loss": 0.5968, "step": 1248 }, { "epoch": 0.16, "grad_norm": 0.8472804287380558, "learning_rate": 9.569460020967939e-06, "loss": 0.6041, "step": 1249 }, { "epoch": 0.16, "grad_norm": 0.98904717772101, "learning_rate": 9.568622016818604e-06, "loss": 0.6474, "step": 1250 }, { "epoch": 0.16, "grad_norm": 1.1273179779897353, "learning_rate": 9.567783234680534e-06, "loss": 0.5979, "step": 1251 }, { "epoch": 0.16, "grad_norm": 0.9400886228454184, "learning_rate": 9.566943674696567e-06, "loss": 0.6036, "step": 1252 }, { "epoch": 0.16, "grad_norm": 0.6921289897057276, "learning_rate": 9.56610333700967e-06, "loss": 0.5782, "step": 1253 }, { "epoch": 0.16, "grad_norm": 0.932953292240861, "learning_rate": 9.565262221762942e-06, "loss": 0.6507, "step": 1254 }, { "epoch": 0.16, "grad_norm": 0.7559671494446775, "learning_rate": 9.56442032909962e-06, "loss": 0.5761, "step": 1255 }, { "epoch": 0.16, "grad_norm": 0.6580896751907516, "learning_rate": 9.563577659163069e-06, "loss": 0.5418, "step": 1256 }, { "epoch": 0.16, "grad_norm": 1.095064655241494, "learning_rate": 9.562734212096783e-06, "loss": 0.733, "step": 1257 }, { "epoch": 0.16, "grad_norm": 1.061601469498517, "learning_rate": 9.561889988044396e-06, "loss": 0.6513, "step": 1258 }, { "epoch": 0.16, "grad_norm": 0.6710504504104681, "learning_rate": 9.56104498714967e-06, "loss": 0.5667, "step": 1259 }, { "epoch": 0.16, "grad_norm": 0.8084330586767373, "learning_rate": 9.560199209556498e-06, "loss": 0.5473, "step": 1260 }, { "epoch": 0.16, "grad_norm": 0.7853431439067693, "learning_rate": 9.55935265540891e-06, "loss": 0.5789, "step": 1261 }, { "epoch": 0.16, "grad_norm": 0.9051643593523143, "learning_rate": 9.558505324851063e-06, "loss": 0.6987, "step": 1262 }, { "epoch": 0.16, "grad_norm": 0.7178026246597083, "learning_rate": 9.55765721802725e-06, "loss": 0.6186, "step": 1263 }, { "epoch": 0.16, "grad_norm": 0.855279285925669, "learning_rate": 9.556808335081894e-06, "loss": 0.554, "step": 1264 }, { "epoch": 0.16, "grad_norm": 0.721062302262685, "learning_rate": 9.555958676159549e-06, "loss": 0.6089, "step": 1265 }, { "epoch": 0.16, "grad_norm": 1.0955402584854657, "learning_rate": 9.555108241404906e-06, "loss": 0.624, "step": 1266 }, { "epoch": 0.16, "grad_norm": 0.9105813138786871, "learning_rate": 9.554257030962786e-06, "loss": 0.6591, "step": 1267 }, { "epoch": 0.16, "grad_norm": 1.1865448796817364, "learning_rate": 9.553405044978138e-06, "loss": 0.6059, "step": 1268 }, { "epoch": 0.16, "grad_norm": 0.7758621117933575, "learning_rate": 9.552552283596049e-06, "loss": 0.5479, "step": 1269 }, { "epoch": 0.16, "grad_norm": 0.656123552463779, "learning_rate": 9.551698746961732e-06, "loss": 0.525, "step": 1270 }, { "epoch": 0.16, "grad_norm": 1.0307526875301047, "learning_rate": 9.55084443522054e-06, "loss": 0.6545, "step": 1271 }, { "epoch": 0.16, "grad_norm": 0.6740851988235872, "learning_rate": 9.549989348517949e-06, "loss": 0.5257, "step": 1272 }, { "epoch": 0.16, "grad_norm": 0.6725997419652654, "learning_rate": 9.549133486999575e-06, "loss": 0.5246, "step": 1273 }, { "epoch": 0.16, "grad_norm": 0.7537544187323888, "learning_rate": 9.54827685081116e-06, "loss": 0.5513, "step": 1274 }, { "epoch": 0.16, "grad_norm": 0.6948168622802673, "learning_rate": 9.547419440098582e-06, "loss": 0.5217, "step": 1275 }, { "epoch": 0.16, "grad_norm": 0.6915872697364935, "learning_rate": 9.546561255007847e-06, "loss": 0.5671, "step": 1276 }, { "epoch": 0.16, "grad_norm": 0.9430528685843382, "learning_rate": 9.545702295685097e-06, "loss": 0.6592, "step": 1277 }, { "epoch": 0.16, "grad_norm": 1.1604352641866587, "learning_rate": 9.544842562276602e-06, "loss": 0.6832, "step": 1278 }, { "epoch": 0.16, "grad_norm": 0.9445783621589375, "learning_rate": 9.543982054928766e-06, "loss": 0.627, "step": 1279 }, { "epoch": 0.16, "grad_norm": 0.863848836315885, "learning_rate": 9.543120773788126e-06, "loss": 0.666, "step": 1280 }, { "epoch": 0.16, "grad_norm": 0.8108027169148443, "learning_rate": 9.542258719001348e-06, "loss": 0.5592, "step": 1281 }, { "epoch": 0.16, "grad_norm": 0.9487478444979899, "learning_rate": 9.54139589071523e-06, "loss": 0.7096, "step": 1282 }, { "epoch": 0.16, "grad_norm": 0.9399022546125204, "learning_rate": 9.540532289076706e-06, "loss": 0.63, "step": 1283 }, { "epoch": 0.16, "grad_norm": 1.0395451375644704, "learning_rate": 9.539667914232832e-06, "loss": 0.6457, "step": 1284 }, { "epoch": 0.16, "grad_norm": 0.8584812975254499, "learning_rate": 9.538802766330809e-06, "loss": 0.6111, "step": 1285 }, { "epoch": 0.16, "grad_norm": 0.8812697105514897, "learning_rate": 9.53793684551796e-06, "loss": 0.6983, "step": 1286 }, { "epoch": 0.16, "grad_norm": 0.7534564534921276, "learning_rate": 9.537070151941741e-06, "loss": 0.5386, "step": 1287 }, { "epoch": 0.16, "grad_norm": 0.8743037647883589, "learning_rate": 9.536202685749743e-06, "loss": 0.6558, "step": 1288 }, { "epoch": 0.16, "grad_norm": 0.7103679472254497, "learning_rate": 9.535334447089685e-06, "loss": 0.5814, "step": 1289 }, { "epoch": 0.16, "grad_norm": 0.8371217587202098, "learning_rate": 9.534465436109418e-06, "loss": 0.5793, "step": 1290 }, { "epoch": 0.16, "grad_norm": 0.740545149772196, "learning_rate": 9.533595652956929e-06, "loss": 0.5435, "step": 1291 }, { "epoch": 0.16, "grad_norm": 0.8682122680849824, "learning_rate": 9.532725097780331e-06, "loss": 0.6506, "step": 1292 }, { "epoch": 0.16, "grad_norm": 0.9426054733014517, "learning_rate": 9.53185377072787e-06, "loss": 0.6164, "step": 1293 }, { "epoch": 0.16, "grad_norm": 1.0308009824249327, "learning_rate": 9.530981671947924e-06, "loss": 0.6619, "step": 1294 }, { "epoch": 0.16, "grad_norm": 0.7947818632864568, "learning_rate": 9.530108801589002e-06, "loss": 0.6304, "step": 1295 }, { "epoch": 0.17, "grad_norm": 0.764682880171863, "learning_rate": 9.529235159799746e-06, "loss": 0.6653, "step": 1296 }, { "epoch": 0.17, "grad_norm": 1.1214070766818558, "learning_rate": 9.528360746728928e-06, "loss": 0.6993, "step": 1297 }, { "epoch": 0.17, "grad_norm": 0.7562199877765657, "learning_rate": 9.52748556252545e-06, "loss": 0.6169, "step": 1298 }, { "epoch": 0.17, "grad_norm": 0.8306518017777018, "learning_rate": 9.526609607338348e-06, "loss": 0.5494, "step": 1299 }, { "epoch": 0.17, "grad_norm": 1.0674848113812165, "learning_rate": 9.525732881316789e-06, "loss": 0.7042, "step": 1300 }, { "epoch": 0.17, "grad_norm": 0.8407257477850494, "learning_rate": 9.524855384610068e-06, "loss": 0.6177, "step": 1301 }, { "epoch": 0.17, "grad_norm": 0.8408582982523104, "learning_rate": 9.523977117367614e-06, "loss": 0.6294, "step": 1302 }, { "epoch": 0.17, "grad_norm": 0.704748987946337, "learning_rate": 9.523098079738989e-06, "loss": 0.4795, "step": 1303 }, { "epoch": 0.17, "grad_norm": 1.2722567543723173, "learning_rate": 9.52221827187388e-06, "loss": 0.6661, "step": 1304 }, { "epoch": 0.17, "grad_norm": 0.8704658826792098, "learning_rate": 9.521337693922114e-06, "loss": 0.6194, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.7226552441015462, "learning_rate": 9.52045634603364e-06, "loss": 0.5709, "step": 1306 }, { "epoch": 0.17, "grad_norm": 0.6664619971419585, "learning_rate": 9.519574228358544e-06, "loss": 0.5347, "step": 1307 }, { "epoch": 0.17, "grad_norm": 0.9934761130127118, "learning_rate": 9.518691341047043e-06, "loss": 0.648, "step": 1308 }, { "epoch": 0.17, "grad_norm": 0.8571797360041568, "learning_rate": 9.517807684249481e-06, "loss": 0.6536, "step": 1309 }, { "epoch": 0.17, "grad_norm": 1.0939841004661959, "learning_rate": 9.516923258116336e-06, "loss": 0.681, "step": 1310 }, { "epoch": 0.17, "grad_norm": 0.7984360821842511, "learning_rate": 9.516038062798219e-06, "loss": 0.7158, "step": 1311 }, { "epoch": 0.17, "grad_norm": 0.8572021021466015, "learning_rate": 9.515152098445866e-06, "loss": 0.6399, "step": 1312 }, { "epoch": 0.17, "grad_norm": 0.9215105632257056, "learning_rate": 9.51426536521015e-06, "loss": 0.6357, "step": 1313 }, { "epoch": 0.17, "grad_norm": 0.8020917486611238, "learning_rate": 9.513377863242071e-06, "loss": 0.6523, "step": 1314 }, { "epoch": 0.17, "grad_norm": 0.9130889751057123, "learning_rate": 9.51248959269276e-06, "loss": 0.6568, "step": 1315 }, { "epoch": 0.17, "grad_norm": 0.8971737636326172, "learning_rate": 9.511600553713485e-06, "loss": 0.6094, "step": 1316 }, { "epoch": 0.17, "grad_norm": 1.035346530355026, "learning_rate": 9.510710746455636e-06, "loss": 0.6682, "step": 1317 }, { "epoch": 0.17, "grad_norm": 0.6889771668440428, "learning_rate": 9.509820171070741e-06, "loss": 0.5252, "step": 1318 }, { "epoch": 0.17, "grad_norm": 0.766535392284087, "learning_rate": 9.508928827710452e-06, "loss": 0.5695, "step": 1319 }, { "epoch": 0.17, "grad_norm": 1.0384726436607847, "learning_rate": 9.508036716526556e-06, "loss": 0.6701, "step": 1320 }, { "epoch": 0.17, "grad_norm": 0.9105623989806091, "learning_rate": 9.507143837670973e-06, "loss": 0.6165, "step": 1321 }, { "epoch": 0.17, "grad_norm": 0.7046746961580885, "learning_rate": 9.506250191295751e-06, "loss": 0.5403, "step": 1322 }, { "epoch": 0.17, "grad_norm": 0.8150641543105733, "learning_rate": 9.505355777553065e-06, "loss": 0.5389, "step": 1323 }, { "epoch": 0.17, "grad_norm": 0.7873057040910879, "learning_rate": 9.504460596595228e-06, "loss": 0.612, "step": 1324 }, { "epoch": 0.17, "grad_norm": 0.8925400447272772, "learning_rate": 9.503564648574678e-06, "loss": 0.6575, "step": 1325 }, { "epoch": 0.17, "grad_norm": 1.2570727146303182, "learning_rate": 9.502667933643985e-06, "loss": 0.6012, "step": 1326 }, { "epoch": 0.17, "grad_norm": 0.8297815118344147, "learning_rate": 9.501770451955855e-06, "loss": 0.626, "step": 1327 }, { "epoch": 0.17, "grad_norm": 1.0230441873015468, "learning_rate": 9.500872203663112e-06, "loss": 0.6949, "step": 1328 }, { "epoch": 0.17, "grad_norm": 0.7587205312937768, "learning_rate": 9.499973188918724e-06, "loss": 0.5581, "step": 1329 }, { "epoch": 0.17, "grad_norm": 1.0792922226017754, "learning_rate": 9.499073407875783e-06, "loss": 0.67, "step": 1330 }, { "epoch": 0.17, "grad_norm": 0.682004069953878, "learning_rate": 9.498172860687511e-06, "loss": 0.5813, "step": 1331 }, { "epoch": 0.17, "grad_norm": 0.8220654565512721, "learning_rate": 9.497271547507263e-06, "loss": 0.572, "step": 1332 }, { "epoch": 0.17, "grad_norm": 0.7103120261987019, "learning_rate": 9.496369468488522e-06, "loss": 0.5478, "step": 1333 }, { "epoch": 0.17, "grad_norm": 0.8726501575090189, "learning_rate": 9.495466623784903e-06, "loss": 0.6616, "step": 1334 }, { "epoch": 0.17, "grad_norm": 0.6891245386464124, "learning_rate": 9.494563013550151e-06, "loss": 0.5409, "step": 1335 }, { "epoch": 0.17, "grad_norm": 0.9539126902889865, "learning_rate": 9.493658637938143e-06, "loss": 0.6693, "step": 1336 }, { "epoch": 0.17, "grad_norm": 0.8299801617799516, "learning_rate": 9.492753497102884e-06, "loss": 0.6006, "step": 1337 }, { "epoch": 0.17, "grad_norm": 0.7813806787708597, "learning_rate": 9.491847591198509e-06, "loss": 0.5414, "step": 1338 }, { "epoch": 0.17, "grad_norm": 0.9879701067871018, "learning_rate": 9.490940920379286e-06, "loss": 0.6847, "step": 1339 }, { "epoch": 0.17, "grad_norm": 0.818481226282068, "learning_rate": 9.490033484799608e-06, "loss": 0.6516, "step": 1340 }, { "epoch": 0.17, "grad_norm": 0.8626262788636114, "learning_rate": 9.489125284614006e-06, "loss": 0.6466, "step": 1341 }, { "epoch": 0.17, "grad_norm": 1.2256309471064777, "learning_rate": 9.488216319977135e-06, "loss": 0.6807, "step": 1342 }, { "epoch": 0.17, "grad_norm": 0.7882644073183519, "learning_rate": 9.487306591043783e-06, "loss": 0.5856, "step": 1343 }, { "epoch": 0.17, "grad_norm": 0.6529156386967632, "learning_rate": 9.486396097968866e-06, "loss": 0.5762, "step": 1344 }, { "epoch": 0.17, "grad_norm": 0.6751147266659726, "learning_rate": 9.485484840907434e-06, "loss": 0.5548, "step": 1345 }, { "epoch": 0.17, "grad_norm": 0.7995661715136811, "learning_rate": 9.484572820014662e-06, "loss": 0.6517, "step": 1346 }, { "epoch": 0.17, "grad_norm": 0.6993121254898311, "learning_rate": 9.483660035445858e-06, "loss": 0.5523, "step": 1347 }, { "epoch": 0.17, "grad_norm": 0.7295088926293203, "learning_rate": 9.482746487356462e-06, "loss": 0.5845, "step": 1348 }, { "epoch": 0.17, "grad_norm": 1.1168786182956545, "learning_rate": 9.48183217590204e-06, "loss": 0.6238, "step": 1349 }, { "epoch": 0.17, "grad_norm": 1.0466771172496752, "learning_rate": 9.48091710123829e-06, "loss": 0.6458, "step": 1350 }, { "epoch": 0.17, "grad_norm": 0.8604326667107473, "learning_rate": 9.480001263521039e-06, "loss": 0.6148, "step": 1351 }, { "epoch": 0.17, "grad_norm": 0.8604001846611475, "learning_rate": 9.479084662906243e-06, "loss": 0.6519, "step": 1352 }, { "epoch": 0.17, "grad_norm": 0.9135423547478635, "learning_rate": 9.478167299549995e-06, "loss": 0.6636, "step": 1353 }, { "epoch": 0.17, "grad_norm": 0.8622045769363359, "learning_rate": 9.477249173608507e-06, "loss": 0.6096, "step": 1354 }, { "epoch": 0.17, "grad_norm": 0.8074485690598203, "learning_rate": 9.47633028523813e-06, "loss": 0.56, "step": 1355 }, { "epoch": 0.17, "grad_norm": 0.8264406834688497, "learning_rate": 9.475410634595339e-06, "loss": 0.6559, "step": 1356 }, { "epoch": 0.17, "grad_norm": 0.6960446781572884, "learning_rate": 9.474490221836741e-06, "loss": 0.611, "step": 1357 }, { "epoch": 0.17, "grad_norm": 0.8979341051489285, "learning_rate": 9.473569047119073e-06, "loss": 0.6103, "step": 1358 }, { "epoch": 0.17, "grad_norm": 0.7018476691732929, "learning_rate": 9.472647110599201e-06, "loss": 0.6043, "step": 1359 }, { "epoch": 0.17, "grad_norm": 0.716661844343984, "learning_rate": 9.471724412434122e-06, "loss": 0.5832, "step": 1360 }, { "epoch": 0.17, "grad_norm": 0.7050578556926759, "learning_rate": 9.470800952780963e-06, "loss": 0.5809, "step": 1361 }, { "epoch": 0.17, "grad_norm": 0.9556523897582249, "learning_rate": 9.469876731796976e-06, "loss": 0.7005, "step": 1362 }, { "epoch": 0.17, "grad_norm": 0.8066380348719958, "learning_rate": 9.468951749639552e-06, "loss": 0.5963, "step": 1363 }, { "epoch": 0.17, "grad_norm": 1.175657773107267, "learning_rate": 9.4680260064662e-06, "loss": 0.7185, "step": 1364 }, { "epoch": 0.17, "grad_norm": 1.0799387829964824, "learning_rate": 9.467099502434566e-06, "loss": 0.6695, "step": 1365 }, { "epoch": 0.17, "grad_norm": 1.4124809392199231, "learning_rate": 9.466172237702425e-06, "loss": 0.6987, "step": 1366 }, { "epoch": 0.17, "grad_norm": 0.8914422200422153, "learning_rate": 9.46524421242768e-06, "loss": 0.6439, "step": 1367 }, { "epoch": 0.17, "grad_norm": 0.7152675167335548, "learning_rate": 9.464315426768365e-06, "loss": 0.5595, "step": 1368 }, { "epoch": 0.17, "grad_norm": 0.9298567589824199, "learning_rate": 9.463385880882641e-06, "loss": 0.6938, "step": 1369 }, { "epoch": 0.17, "grad_norm": 1.1389939999792575, "learning_rate": 9.462455574928802e-06, "loss": 0.728, "step": 1370 }, { "epoch": 0.17, "grad_norm": 0.8292786689476357, "learning_rate": 9.461524509065266e-06, "loss": 0.6418, "step": 1371 }, { "epoch": 0.17, "grad_norm": 0.7443732277340811, "learning_rate": 9.460592683450587e-06, "loss": 0.5763, "step": 1372 }, { "epoch": 0.17, "grad_norm": 0.935505321703371, "learning_rate": 9.459660098243444e-06, "loss": 0.6357, "step": 1373 }, { "epoch": 0.18, "grad_norm": 0.8762314456229187, "learning_rate": 9.458726753602648e-06, "loss": 0.6572, "step": 1374 }, { "epoch": 0.18, "grad_norm": 0.8708782293166181, "learning_rate": 9.457792649687135e-06, "loss": 0.6781, "step": 1375 }, { "epoch": 0.18, "grad_norm": 0.8148322706339097, "learning_rate": 9.456857786655975e-06, "loss": 0.6513, "step": 1376 }, { "epoch": 0.18, "grad_norm": 0.7563675711411383, "learning_rate": 9.455922164668366e-06, "loss": 0.5445, "step": 1377 }, { "epoch": 0.18, "grad_norm": 0.7395653501631607, "learning_rate": 9.454985783883633e-06, "loss": 0.5728, "step": 1378 }, { "epoch": 0.18, "grad_norm": 0.6643466566999617, "learning_rate": 9.454048644461233e-06, "loss": 0.5562, "step": 1379 }, { "epoch": 0.18, "grad_norm": 0.8586519316683581, "learning_rate": 9.45311074656075e-06, "loss": 0.6678, "step": 1380 }, { "epoch": 0.18, "grad_norm": 0.7106282287523121, "learning_rate": 9.4521720903419e-06, "loss": 0.5403, "step": 1381 }, { "epoch": 0.18, "grad_norm": 0.7872496072655488, "learning_rate": 9.451232675964526e-06, "loss": 0.6083, "step": 1382 }, { "epoch": 0.18, "grad_norm": 0.6381917221280797, "learning_rate": 9.450292503588599e-06, "loss": 0.5332, "step": 1383 }, { "epoch": 0.18, "grad_norm": 0.9896206723666053, "learning_rate": 9.449351573374221e-06, "loss": 0.6607, "step": 1384 }, { "epoch": 0.18, "grad_norm": 1.487989563253944, "learning_rate": 9.448409885481623e-06, "loss": 0.6584, "step": 1385 }, { "epoch": 0.18, "grad_norm": 0.7021826800844788, "learning_rate": 9.447467440071165e-06, "loss": 0.5486, "step": 1386 }, { "epoch": 0.18, "grad_norm": 0.8628202573182997, "learning_rate": 9.446524237303337e-06, "loss": 0.565, "step": 1387 }, { "epoch": 0.18, "grad_norm": 0.6472861922924706, "learning_rate": 9.445580277338753e-06, "loss": 0.5045, "step": 1388 }, { "epoch": 0.18, "grad_norm": 0.9832946693572294, "learning_rate": 9.444635560338161e-06, "loss": 0.7067, "step": 1389 }, { "epoch": 0.18, "grad_norm": 0.870460099038199, "learning_rate": 9.443690086462438e-06, "loss": 0.6368, "step": 1390 }, { "epoch": 0.18, "grad_norm": 0.9355886593765449, "learning_rate": 9.442743855872588e-06, "loss": 0.7125, "step": 1391 }, { "epoch": 0.18, "grad_norm": 0.8267630267433251, "learning_rate": 9.441796868729743e-06, "loss": 0.6197, "step": 1392 }, { "epoch": 0.18, "grad_norm": 1.3247391721524735, "learning_rate": 9.440849125195165e-06, "loss": 0.6262, "step": 1393 }, { "epoch": 0.18, "grad_norm": 0.7697499940467353, "learning_rate": 9.439900625430245e-06, "loss": 0.564, "step": 1394 }, { "epoch": 0.18, "grad_norm": 0.731071347295179, "learning_rate": 9.438951369596502e-06, "loss": 0.5628, "step": 1395 }, { "epoch": 0.18, "grad_norm": 0.9867238236472955, "learning_rate": 9.438001357855589e-06, "loss": 0.6645, "step": 1396 }, { "epoch": 0.18, "grad_norm": 0.6913602809795198, "learning_rate": 9.437050590369277e-06, "loss": 0.5717, "step": 1397 }, { "epoch": 0.18, "grad_norm": 0.7644375256545279, "learning_rate": 9.436099067299476e-06, "loss": 0.5158, "step": 1398 }, { "epoch": 0.18, "grad_norm": 1.0514777782654239, "learning_rate": 9.435146788808217e-06, "loss": 0.7343, "step": 1399 }, { "epoch": 0.18, "grad_norm": 0.6504492789035554, "learning_rate": 9.434193755057666e-06, "loss": 0.5177, "step": 1400 }, { "epoch": 0.18, "grad_norm": 0.8861569283497982, "learning_rate": 9.433239966210113e-06, "loss": 0.6767, "step": 1401 }, { "epoch": 0.18, "grad_norm": 0.7366680027550628, "learning_rate": 9.432285422427979e-06, "loss": 0.5461, "step": 1402 }, { "epoch": 0.18, "grad_norm": 0.8129918274310444, "learning_rate": 9.431330123873813e-06, "loss": 0.5718, "step": 1403 }, { "epoch": 0.18, "grad_norm": 0.7986296907262868, "learning_rate": 9.430374070710293e-06, "loss": 0.6201, "step": 1404 }, { "epoch": 0.18, "grad_norm": 0.9412767660410114, "learning_rate": 9.429417263100222e-06, "loss": 0.7098, "step": 1405 }, { "epoch": 0.18, "grad_norm": 0.7980304664743195, "learning_rate": 9.428459701206538e-06, "loss": 0.5718, "step": 1406 }, { "epoch": 0.18, "grad_norm": 0.7384508780938681, "learning_rate": 9.427501385192302e-06, "loss": 0.509, "step": 1407 }, { "epoch": 0.18, "grad_norm": 0.7535811350858405, "learning_rate": 9.426542315220706e-06, "loss": 0.5938, "step": 1408 }, { "epoch": 0.18, "grad_norm": 0.880508787461307, "learning_rate": 9.425582491455068e-06, "loss": 0.6789, "step": 1409 }, { "epoch": 0.18, "grad_norm": 0.6830295549212533, "learning_rate": 9.424621914058837e-06, "loss": 0.5382, "step": 1410 }, { "epoch": 0.18, "grad_norm": 0.7009254136090947, "learning_rate": 9.423660583195591e-06, "loss": 0.5395, "step": 1411 }, { "epoch": 0.18, "grad_norm": 0.9452855425942293, "learning_rate": 9.422698499029032e-06, "loss": 0.6383, "step": 1412 }, { "epoch": 0.18, "grad_norm": 1.0505196889008737, "learning_rate": 9.421735661722993e-06, "loss": 0.7178, "step": 1413 }, { "epoch": 0.18, "grad_norm": 1.0373377940917945, "learning_rate": 9.420772071441437e-06, "loss": 0.6746, "step": 1414 }, { "epoch": 0.18, "grad_norm": 0.7321410015833552, "learning_rate": 9.419807728348454e-06, "loss": 0.5227, "step": 1415 }, { "epoch": 0.18, "grad_norm": 0.9335362591314534, "learning_rate": 9.418842632608258e-06, "loss": 0.7, "step": 1416 }, { "epoch": 0.18, "grad_norm": 0.8204885066714896, "learning_rate": 9.417876784385197e-06, "loss": 0.5958, "step": 1417 }, { "epoch": 0.18, "grad_norm": 0.8012454116399874, "learning_rate": 9.416910183843746e-06, "loss": 0.6025, "step": 1418 }, { "epoch": 0.18, "grad_norm": 0.7928185226323354, "learning_rate": 9.415942831148503e-06, "loss": 0.5419, "step": 1419 }, { "epoch": 0.18, "grad_norm": 1.5483265539463231, "learning_rate": 9.414974726464203e-06, "loss": 0.6869, "step": 1420 }, { "epoch": 0.18, "grad_norm": 0.900369485492935, "learning_rate": 9.414005869955702e-06, "loss": 0.6095, "step": 1421 }, { "epoch": 0.18, "grad_norm": 0.9433803283459624, "learning_rate": 9.413036261787987e-06, "loss": 0.6284, "step": 1422 }, { "epoch": 0.18, "grad_norm": 0.7701997617779117, "learning_rate": 9.41206590212617e-06, "loss": 0.5691, "step": 1423 }, { "epoch": 0.18, "grad_norm": 0.8970706584427929, "learning_rate": 9.411094791135494e-06, "loss": 0.6914, "step": 1424 }, { "epoch": 0.18, "grad_norm": 0.6680764281951888, "learning_rate": 9.410122928981332e-06, "loss": 0.5313, "step": 1425 }, { "epoch": 0.18, "grad_norm": 0.9388747901596838, "learning_rate": 9.409150315829177e-06, "loss": 0.6981, "step": 1426 }, { "epoch": 0.18, "grad_norm": 0.7466058230171558, "learning_rate": 9.408176951844657e-06, "loss": 0.553, "step": 1427 }, { "epoch": 0.18, "grad_norm": 0.867009821561423, "learning_rate": 9.407202837193527e-06, "loss": 0.6286, "step": 1428 }, { "epoch": 0.18, "grad_norm": 0.9830153456580065, "learning_rate": 9.406227972041668e-06, "loss": 0.7027, "step": 1429 }, { "epoch": 0.18, "grad_norm": 0.9238394132248928, "learning_rate": 9.405252356555089e-06, "loss": 0.6758, "step": 1430 }, { "epoch": 0.18, "grad_norm": 0.9919844551003053, "learning_rate": 9.404275990899927e-06, "loss": 0.6995, "step": 1431 }, { "epoch": 0.18, "grad_norm": 1.00166222259337, "learning_rate": 9.403298875242448e-06, "loss": 0.7228, "step": 1432 }, { "epoch": 0.18, "grad_norm": 0.9935346146699167, "learning_rate": 9.402321009749043e-06, "loss": 0.6908, "step": 1433 }, { "epoch": 0.18, "grad_norm": 0.9314062931808536, "learning_rate": 9.401342394586234e-06, "loss": 0.6446, "step": 1434 }, { "epoch": 0.18, "grad_norm": 0.8467541347200406, "learning_rate": 9.400363029920667e-06, "loss": 0.5836, "step": 1435 }, { "epoch": 0.18, "grad_norm": 0.8297993023382004, "learning_rate": 9.399382915919119e-06, "loss": 0.61, "step": 1436 }, { "epoch": 0.18, "grad_norm": 0.7607304868667946, "learning_rate": 9.398402052748492e-06, "loss": 0.5807, "step": 1437 }, { "epoch": 0.18, "grad_norm": 1.0092052694956086, "learning_rate": 9.39742044057582e-06, "loss": 0.6518, "step": 1438 }, { "epoch": 0.18, "grad_norm": 0.800278530033686, "learning_rate": 9.396438079568257e-06, "loss": 0.5804, "step": 1439 }, { "epoch": 0.18, "grad_norm": 0.724408868218778, "learning_rate": 9.395454969893092e-06, "loss": 0.5308, "step": 1440 }, { "epoch": 0.18, "grad_norm": 0.6459185893983147, "learning_rate": 9.394471111717735e-06, "loss": 0.5805, "step": 1441 }, { "epoch": 0.18, "grad_norm": 1.2806999126351153, "learning_rate": 9.393486505209731e-06, "loss": 0.7055, "step": 1442 }, { "epoch": 0.18, "grad_norm": 0.8090690580736293, "learning_rate": 9.392501150536747e-06, "loss": 0.6058, "step": 1443 }, { "epoch": 0.18, "grad_norm": 0.8876635050002758, "learning_rate": 9.391515047866575e-06, "loss": 0.6519, "step": 1444 }, { "epoch": 0.18, "grad_norm": 0.6573797049659426, "learning_rate": 9.390528197367143e-06, "loss": 0.5477, "step": 1445 }, { "epoch": 0.18, "grad_norm": 0.7708956511975157, "learning_rate": 9.389540599206497e-06, "loss": 0.5361, "step": 1446 }, { "epoch": 0.18, "grad_norm": 0.9360230370143557, "learning_rate": 9.388552253552818e-06, "loss": 0.6693, "step": 1447 }, { "epoch": 0.18, "grad_norm": 0.970158527075951, "learning_rate": 9.387563160574411e-06, "loss": 0.6898, "step": 1448 }, { "epoch": 0.18, "grad_norm": 0.8756441277447434, "learning_rate": 9.386573320439706e-06, "loss": 0.6457, "step": 1449 }, { "epoch": 0.18, "grad_norm": 0.8956500569817887, "learning_rate": 9.385582733317264e-06, "loss": 0.7085, "step": 1450 }, { "epoch": 0.18, "grad_norm": 0.9998782305202425, "learning_rate": 9.38459139937577e-06, "loss": 0.7146, "step": 1451 }, { "epoch": 0.18, "grad_norm": 0.7230468564212061, "learning_rate": 9.38359931878404e-06, "loss": 0.5747, "step": 1452 }, { "epoch": 0.19, "grad_norm": 0.713890040657229, "learning_rate": 9.382606491711015e-06, "loss": 0.5823, "step": 1453 }, { "epoch": 0.19, "grad_norm": 0.867719149009511, "learning_rate": 9.38161291832576e-06, "loss": 0.6558, "step": 1454 }, { "epoch": 0.19, "grad_norm": 0.8203425675832203, "learning_rate": 9.380618598797473e-06, "loss": 0.5727, "step": 1455 }, { "epoch": 0.19, "grad_norm": 0.7142825890240405, "learning_rate": 9.379623533295476e-06, "loss": 0.5146, "step": 1456 }, { "epoch": 0.19, "grad_norm": 0.6592433207121559, "learning_rate": 9.378627721989218e-06, "loss": 0.5358, "step": 1457 }, { "epoch": 0.19, "grad_norm": 0.9731278707589606, "learning_rate": 9.377631165048276e-06, "loss": 0.6394, "step": 1458 }, { "epoch": 0.19, "grad_norm": 0.8385952273910086, "learning_rate": 9.37663386264235e-06, "loss": 0.5822, "step": 1459 }, { "epoch": 0.19, "grad_norm": 0.7254346473306581, "learning_rate": 9.375635814941276e-06, "loss": 0.553, "step": 1460 }, { "epoch": 0.19, "grad_norm": 0.7584583063743576, "learning_rate": 9.374637022115005e-06, "loss": 0.5965, "step": 1461 }, { "epoch": 0.19, "grad_norm": 0.7261967805197976, "learning_rate": 9.373637484333625e-06, "loss": 0.6309, "step": 1462 }, { "epoch": 0.19, "grad_norm": 1.03802615713192, "learning_rate": 9.372637201767345e-06, "loss": 0.6953, "step": 1463 }, { "epoch": 0.19, "grad_norm": 0.6553971967453723, "learning_rate": 9.371636174586503e-06, "loss": 0.5668, "step": 1464 }, { "epoch": 0.19, "grad_norm": 0.6941242968879597, "learning_rate": 9.370634402961565e-06, "loss": 0.5796, "step": 1465 }, { "epoch": 0.19, "grad_norm": 0.8691198834770186, "learning_rate": 9.36963188706312e-06, "loss": 0.602, "step": 1466 }, { "epoch": 0.19, "grad_norm": 0.951613735756575, "learning_rate": 9.368628627061886e-06, "loss": 0.6696, "step": 1467 }, { "epoch": 0.19, "grad_norm": 0.8382284889157989, "learning_rate": 9.367624623128711e-06, "loss": 0.6489, "step": 1468 }, { "epoch": 0.19, "grad_norm": 0.7626337112380835, "learning_rate": 9.366619875434563e-06, "loss": 0.6022, "step": 1469 }, { "epoch": 0.19, "grad_norm": 0.8098200177881252, "learning_rate": 9.365614384150543e-06, "loss": 0.6116, "step": 1470 }, { "epoch": 0.19, "grad_norm": 0.7226955687707713, "learning_rate": 9.364608149447873e-06, "loss": 0.587, "step": 1471 }, { "epoch": 0.19, "grad_norm": 0.9743341693176809, "learning_rate": 9.363601171497906e-06, "loss": 0.6565, "step": 1472 }, { "epoch": 0.19, "grad_norm": 0.9084918718709991, "learning_rate": 9.362593450472118e-06, "loss": 0.7229, "step": 1473 }, { "epoch": 0.19, "grad_norm": 0.9738504066676951, "learning_rate": 9.361584986542116e-06, "loss": 0.7066, "step": 1474 }, { "epoch": 0.19, "grad_norm": 1.3025922797655187, "learning_rate": 9.360575779879628e-06, "loss": 0.6047, "step": 1475 }, { "epoch": 0.19, "grad_norm": 0.8598603129016218, "learning_rate": 9.359565830656514e-06, "loss": 0.6249, "step": 1476 }, { "epoch": 0.19, "grad_norm": 0.717159892697812, "learning_rate": 9.358555139044755e-06, "loss": 0.4972, "step": 1477 }, { "epoch": 0.19, "grad_norm": 0.812296559753277, "learning_rate": 9.357543705216465e-06, "loss": 0.5865, "step": 1478 }, { "epoch": 0.19, "grad_norm": 0.91581960766695, "learning_rate": 9.356531529343878e-06, "loss": 0.6833, "step": 1479 }, { "epoch": 0.19, "grad_norm": 0.8470751419432861, "learning_rate": 9.355518611599357e-06, "loss": 0.6275, "step": 1480 }, { "epoch": 0.19, "grad_norm": 0.7002160091450838, "learning_rate": 9.354504952155392e-06, "loss": 0.5048, "step": 1481 }, { "epoch": 0.19, "grad_norm": 0.7655283760417843, "learning_rate": 9.353490551184597e-06, "loss": 0.595, "step": 1482 }, { "epoch": 0.19, "grad_norm": 0.8797984194132432, "learning_rate": 9.352475408859718e-06, "loss": 0.6278, "step": 1483 }, { "epoch": 0.19, "grad_norm": 0.9037980559426629, "learning_rate": 9.351459525353618e-06, "loss": 0.6017, "step": 1484 }, { "epoch": 0.19, "grad_norm": 0.7717744164747941, "learning_rate": 9.350442900839297e-06, "loss": 0.5593, "step": 1485 }, { "epoch": 0.19, "grad_norm": 1.0467950755337083, "learning_rate": 9.349425535489871e-06, "loss": 0.658, "step": 1486 }, { "epoch": 0.19, "grad_norm": 0.9252749318662788, "learning_rate": 9.348407429478589e-06, "loss": 0.6179, "step": 1487 }, { "epoch": 0.19, "grad_norm": 0.7601673282213051, "learning_rate": 9.347388582978822e-06, "loss": 0.5189, "step": 1488 }, { "epoch": 0.19, "grad_norm": 0.8738311803154118, "learning_rate": 9.346368996164072e-06, "loss": 0.652, "step": 1489 }, { "epoch": 0.19, "grad_norm": 1.0064289054746205, "learning_rate": 9.345348669207961e-06, "loss": 0.6466, "step": 1490 }, { "epoch": 0.19, "grad_norm": 0.8673882636723294, "learning_rate": 9.34432760228424e-06, "loss": 0.6283, "step": 1491 }, { "epoch": 0.19, "grad_norm": 0.8999332216547168, "learning_rate": 9.343305795566788e-06, "loss": 0.7124, "step": 1492 }, { "epoch": 0.19, "grad_norm": 0.797703822431825, "learning_rate": 9.342283249229608e-06, "loss": 0.5313, "step": 1493 }, { "epoch": 0.19, "grad_norm": 0.7253846052120263, "learning_rate": 9.341259963446827e-06, "loss": 0.6194, "step": 1494 }, { "epoch": 0.19, "grad_norm": 0.7344136739618077, "learning_rate": 9.340235938392703e-06, "loss": 0.6112, "step": 1495 }, { "epoch": 0.19, "grad_norm": 0.7522858647091321, "learning_rate": 9.339211174241613e-06, "loss": 0.6099, "step": 1496 }, { "epoch": 0.19, "grad_norm": 0.8406951207448761, "learning_rate": 9.338185671168067e-06, "loss": 0.7125, "step": 1497 }, { "epoch": 0.19, "grad_norm": 0.8489361295417559, "learning_rate": 9.337159429346697e-06, "loss": 0.6684, "step": 1498 }, { "epoch": 0.19, "grad_norm": 0.6619646370906785, "learning_rate": 9.336132448952258e-06, "loss": 0.583, "step": 1499 }, { "epoch": 0.19, "grad_norm": 0.942165444345931, "learning_rate": 9.335104730159637e-06, "loss": 0.6481, "step": 1500 }, { "epoch": 0.19, "grad_norm": 0.7299514233441786, "learning_rate": 9.334076273143843e-06, "loss": 0.5574, "step": 1501 }, { "epoch": 0.19, "grad_norm": 0.9408891725261744, "learning_rate": 9.333047078080013e-06, "loss": 0.6766, "step": 1502 }, { "epoch": 0.19, "grad_norm": 0.7670756329385489, "learning_rate": 9.332017145143407e-06, "loss": 0.5757, "step": 1503 }, { "epoch": 0.19, "grad_norm": 0.8372802244388492, "learning_rate": 9.33098647450941e-06, "loss": 0.6382, "step": 1504 }, { "epoch": 0.19, "grad_norm": 0.8219203876378051, "learning_rate": 9.329955066353538e-06, "loss": 0.6065, "step": 1505 }, { "epoch": 0.19, "grad_norm": 0.7913249540779064, "learning_rate": 9.328922920851426e-06, "loss": 0.6177, "step": 1506 }, { "epoch": 0.19, "grad_norm": 1.0051561912811227, "learning_rate": 9.32789003817884e-06, "loss": 0.6596, "step": 1507 }, { "epoch": 0.19, "grad_norm": 0.7105315374674107, "learning_rate": 9.326856418511668e-06, "loss": 0.6082, "step": 1508 }, { "epoch": 0.19, "grad_norm": 0.6807468876100213, "learning_rate": 9.325822062025924e-06, "loss": 0.5192, "step": 1509 }, { "epoch": 0.19, "grad_norm": 0.8061319371190606, "learning_rate": 9.32478696889775e-06, "loss": 0.6076, "step": 1510 }, { "epoch": 0.19, "grad_norm": 1.2858866682190981, "learning_rate": 9.32375113930341e-06, "loss": 0.7408, "step": 1511 }, { "epoch": 0.19, "grad_norm": 0.7252193360405388, "learning_rate": 9.322714573419296e-06, "loss": 0.6354, "step": 1512 }, { "epoch": 0.19, "grad_norm": 0.79000626868656, "learning_rate": 9.321677271421923e-06, "loss": 0.6011, "step": 1513 }, { "epoch": 0.19, "grad_norm": 0.861675515987892, "learning_rate": 9.320639233487934e-06, "loss": 0.6566, "step": 1514 }, { "epoch": 0.19, "grad_norm": 0.9193408449032802, "learning_rate": 9.319600459794097e-06, "loss": 0.6488, "step": 1515 }, { "epoch": 0.19, "grad_norm": 0.9942805917249699, "learning_rate": 9.318560950517298e-06, "loss": 0.6469, "step": 1516 }, { "epoch": 0.19, "grad_norm": 0.8442956214935163, "learning_rate": 9.317520705834566e-06, "loss": 0.5933, "step": 1517 }, { "epoch": 0.19, "grad_norm": 0.7672275061678321, "learning_rate": 9.316479725923034e-06, "loss": 0.5912, "step": 1518 }, { "epoch": 0.19, "grad_norm": 0.7771469973618885, "learning_rate": 9.315438010959974e-06, "loss": 0.591, "step": 1519 }, { "epoch": 0.19, "grad_norm": 0.9186584532423744, "learning_rate": 9.314395561122778e-06, "loss": 0.704, "step": 1520 }, { "epoch": 0.19, "grad_norm": 0.922574073865881, "learning_rate": 9.313352376588966e-06, "loss": 0.7353, "step": 1521 }, { "epoch": 0.19, "grad_norm": 0.9866940849240626, "learning_rate": 9.31230845753618e-06, "loss": 0.739, "step": 1522 }, { "epoch": 0.19, "grad_norm": 0.7401484272040525, "learning_rate": 9.31126380414219e-06, "loss": 0.583, "step": 1523 }, { "epoch": 0.19, "grad_norm": 0.7426936246810004, "learning_rate": 9.310218416584887e-06, "loss": 0.6274, "step": 1524 }, { "epoch": 0.19, "grad_norm": 0.7480885517503246, "learning_rate": 9.309172295042291e-06, "loss": 0.6283, "step": 1525 }, { "epoch": 0.19, "grad_norm": 0.7414999595938194, "learning_rate": 9.308125439692546e-06, "loss": 0.5747, "step": 1526 }, { "epoch": 0.19, "grad_norm": 0.8200381512798903, "learning_rate": 9.307077850713922e-06, "loss": 0.5237, "step": 1527 }, { "epoch": 0.19, "grad_norm": 0.696438969130969, "learning_rate": 9.30602952828481e-06, "loss": 0.5271, "step": 1528 }, { "epoch": 0.19, "grad_norm": 0.8900412487177588, "learning_rate": 9.304980472583729e-06, "loss": 0.601, "step": 1529 }, { "epoch": 0.19, "grad_norm": 0.9933262422355958, "learning_rate": 9.303930683789322e-06, "loss": 0.6783, "step": 1530 }, { "epoch": 0.2, "grad_norm": 0.8104814466678987, "learning_rate": 9.302880162080358e-06, "loss": 0.6113, "step": 1531 }, { "epoch": 0.2, "grad_norm": 1.105396965825193, "learning_rate": 9.30182890763573e-06, "loss": 0.6562, "step": 1532 }, { "epoch": 0.2, "grad_norm": 0.872229444170377, "learning_rate": 9.300776920634454e-06, "loss": 0.5773, "step": 1533 }, { "epoch": 0.2, "grad_norm": 0.818441395272117, "learning_rate": 9.299724201255676e-06, "loss": 0.5879, "step": 1534 }, { "epoch": 0.2, "grad_norm": 0.7809104368939722, "learning_rate": 9.298670749678657e-06, "loss": 0.5796, "step": 1535 }, { "epoch": 0.2, "grad_norm": 0.7604625908175191, "learning_rate": 9.297616566082796e-06, "loss": 0.604, "step": 1536 }, { "epoch": 0.2, "grad_norm": 0.7545217657442671, "learning_rate": 9.296561650647605e-06, "loss": 0.6624, "step": 1537 }, { "epoch": 0.2, "grad_norm": 0.8019431217621308, "learning_rate": 9.295506003552725e-06, "loss": 0.6174, "step": 1538 }, { "epoch": 0.2, "grad_norm": 0.718438041337846, "learning_rate": 9.294449624977923e-06, "loss": 0.5909, "step": 1539 }, { "epoch": 0.2, "grad_norm": 0.7164170654957996, "learning_rate": 9.29339251510309e-06, "loss": 0.5872, "step": 1540 }, { "epoch": 0.2, "grad_norm": 1.028751660001763, "learning_rate": 9.292334674108239e-06, "loss": 0.6968, "step": 1541 }, { "epoch": 0.2, "grad_norm": 0.8817943770725851, "learning_rate": 9.291276102173512e-06, "loss": 0.6208, "step": 1542 }, { "epoch": 0.2, "grad_norm": 0.775162344489918, "learning_rate": 9.290216799479169e-06, "loss": 0.5801, "step": 1543 }, { "epoch": 0.2, "grad_norm": 2.1747453556725396, "learning_rate": 9.289156766205601e-06, "loss": 0.6815, "step": 1544 }, { "epoch": 0.2, "grad_norm": 0.9637029153848131, "learning_rate": 9.28809600253332e-06, "loss": 0.6674, "step": 1545 }, { "epoch": 0.2, "grad_norm": 0.843118713672653, "learning_rate": 9.287034508642962e-06, "loss": 0.6359, "step": 1546 }, { "epoch": 0.2, "grad_norm": 0.7437236261442519, "learning_rate": 9.285972284715291e-06, "loss": 0.6044, "step": 1547 }, { "epoch": 0.2, "grad_norm": 0.6883973872906871, "learning_rate": 9.28490933093119e-06, "loss": 0.5307, "step": 1548 }, { "epoch": 0.2, "grad_norm": 0.7291908889033247, "learning_rate": 9.28384564747167e-06, "loss": 0.5675, "step": 1549 }, { "epoch": 0.2, "grad_norm": 1.0589610166955687, "learning_rate": 9.282781234517863e-06, "loss": 0.6647, "step": 1550 }, { "epoch": 0.2, "grad_norm": 0.8798381364021589, "learning_rate": 9.281716092251033e-06, "loss": 0.6854, "step": 1551 }, { "epoch": 0.2, "grad_norm": 1.0137580050664556, "learning_rate": 9.280650220852556e-06, "loss": 0.5418, "step": 1552 }, { "epoch": 0.2, "grad_norm": 0.8788401563020618, "learning_rate": 9.279583620503942e-06, "loss": 0.6015, "step": 1553 }, { "epoch": 0.2, "grad_norm": 0.7614720491735161, "learning_rate": 9.278516291386823e-06, "loss": 0.6122, "step": 1554 }, { "epoch": 0.2, "grad_norm": 1.1049815457556047, "learning_rate": 9.277448233682953e-06, "loss": 0.6349, "step": 1555 }, { "epoch": 0.2, "grad_norm": 0.7226597521231889, "learning_rate": 9.27637944757421e-06, "loss": 0.6001, "step": 1556 }, { "epoch": 0.2, "grad_norm": 0.7922304003103005, "learning_rate": 9.2753099332426e-06, "loss": 0.6968, "step": 1557 }, { "epoch": 0.2, "grad_norm": 0.6425191668751903, "learning_rate": 9.274239690870247e-06, "loss": 0.6281, "step": 1558 }, { "epoch": 0.2, "grad_norm": 0.6448625054268583, "learning_rate": 9.273168720639403e-06, "loss": 0.5568, "step": 1559 }, { "epoch": 0.2, "grad_norm": 0.738643651642791, "learning_rate": 9.272097022732444e-06, "loss": 0.566, "step": 1560 }, { "epoch": 0.2, "grad_norm": 0.8697136168268381, "learning_rate": 9.271024597331868e-06, "loss": 0.6514, "step": 1561 }, { "epoch": 0.2, "grad_norm": 0.8536718043641645, "learning_rate": 9.269951444620298e-06, "loss": 0.6756, "step": 1562 }, { "epoch": 0.2, "grad_norm": 0.773309672526406, "learning_rate": 9.26887756478048e-06, "loss": 0.5816, "step": 1563 }, { "epoch": 0.2, "grad_norm": 0.739379961373571, "learning_rate": 9.267802957995288e-06, "loss": 0.5694, "step": 1564 }, { "epoch": 0.2, "grad_norm": 0.759744457689315, "learning_rate": 9.266727624447713e-06, "loss": 0.5789, "step": 1565 }, { "epoch": 0.2, "grad_norm": 0.8303819792975942, "learning_rate": 9.265651564320874e-06, "loss": 0.6046, "step": 1566 }, { "epoch": 0.2, "grad_norm": 0.9527546143783323, "learning_rate": 9.264574777798012e-06, "loss": 0.6698, "step": 1567 }, { "epoch": 0.2, "grad_norm": 0.732659884433006, "learning_rate": 9.263497265062495e-06, "loss": 0.6422, "step": 1568 }, { "epoch": 0.2, "grad_norm": 1.0661966462269492, "learning_rate": 9.262419026297808e-06, "loss": 0.6183, "step": 1569 }, { "epoch": 0.2, "grad_norm": 0.8858284874157666, "learning_rate": 9.26134006168757e-06, "loss": 0.6316, "step": 1570 }, { "epoch": 0.2, "grad_norm": 0.9242246793291119, "learning_rate": 9.26026037141551e-06, "loss": 0.6513, "step": 1571 }, { "epoch": 0.2, "grad_norm": 0.9055532390060402, "learning_rate": 9.259179955665494e-06, "loss": 0.5904, "step": 1572 }, { "epoch": 0.2, "grad_norm": 0.841539947337721, "learning_rate": 9.258098814621504e-06, "loss": 0.6753, "step": 1573 }, { "epoch": 0.2, "grad_norm": 1.00011037120198, "learning_rate": 9.257016948467645e-06, "loss": 0.5826, "step": 1574 }, { "epoch": 0.2, "grad_norm": 1.0750876747016627, "learning_rate": 9.25593435738815e-06, "loss": 0.6633, "step": 1575 }, { "epoch": 0.2, "grad_norm": 0.7224467352543059, "learning_rate": 9.25485104156737e-06, "loss": 0.5668, "step": 1576 }, { "epoch": 0.2, "grad_norm": 0.7844720425299436, "learning_rate": 9.253767001189786e-06, "loss": 0.5961, "step": 1577 }, { "epoch": 0.2, "grad_norm": 0.8788938058359038, "learning_rate": 9.252682236439997e-06, "loss": 0.5804, "step": 1578 }, { "epoch": 0.2, "grad_norm": 0.9190541094053893, "learning_rate": 9.251596747502727e-06, "loss": 0.6116, "step": 1579 }, { "epoch": 0.2, "grad_norm": 0.9083903462274635, "learning_rate": 9.250510534562825e-06, "loss": 0.73, "step": 1580 }, { "epoch": 0.2, "grad_norm": 0.8906140248633336, "learning_rate": 9.249423597805259e-06, "loss": 0.6472, "step": 1581 }, { "epoch": 0.2, "grad_norm": 0.8347708668401195, "learning_rate": 9.248335937415123e-06, "loss": 0.6227, "step": 1582 }, { "epoch": 0.2, "grad_norm": 0.9770089252885229, "learning_rate": 9.247247553577638e-06, "loss": 0.679, "step": 1583 }, { "epoch": 0.2, "grad_norm": 0.8986923003889647, "learning_rate": 9.246158446478142e-06, "loss": 0.6786, "step": 1584 }, { "epoch": 0.2, "grad_norm": 0.6988598417055969, "learning_rate": 9.245068616302097e-06, "loss": 0.5696, "step": 1585 }, { "epoch": 0.2, "grad_norm": 0.7718127107832289, "learning_rate": 9.243978063235094e-06, "loss": 0.5774, "step": 1586 }, { "epoch": 0.2, "grad_norm": 0.6919991601864117, "learning_rate": 9.242886787462839e-06, "loss": 0.5694, "step": 1587 }, { "epoch": 0.2, "grad_norm": 0.72027523637707, "learning_rate": 9.241794789171165e-06, "loss": 0.5833, "step": 1588 }, { "epoch": 0.2, "grad_norm": 0.9107096864147478, "learning_rate": 9.240702068546031e-06, "loss": 0.6604, "step": 1589 }, { "epoch": 0.2, "grad_norm": 0.7746850364965003, "learning_rate": 9.239608625773514e-06, "loss": 0.5806, "step": 1590 }, { "epoch": 0.2, "grad_norm": 0.9129621240403727, "learning_rate": 9.238514461039814e-06, "loss": 0.6693, "step": 1591 }, { "epoch": 0.2, "grad_norm": 0.9149265552176682, "learning_rate": 9.237419574531258e-06, "loss": 0.6662, "step": 1592 }, { "epoch": 0.2, "grad_norm": 0.758431309483103, "learning_rate": 9.236323966434296e-06, "loss": 0.587, "step": 1593 }, { "epoch": 0.2, "grad_norm": 0.7210470304216425, "learning_rate": 9.235227636935493e-06, "loss": 0.579, "step": 1594 }, { "epoch": 0.2, "grad_norm": 0.8891628584429903, "learning_rate": 9.234130586221547e-06, "loss": 0.6658, "step": 1595 }, { "epoch": 0.2, "grad_norm": 0.7837551821738634, "learning_rate": 9.233032814479274e-06, "loss": 0.586, "step": 1596 }, { "epoch": 0.2, "grad_norm": 0.8054776919415938, "learning_rate": 9.231934321895612e-06, "loss": 0.6173, "step": 1597 }, { "epoch": 0.2, "grad_norm": 0.7998421405052893, "learning_rate": 9.230835108657622e-06, "loss": 0.6281, "step": 1598 }, { "epoch": 0.2, "grad_norm": 0.834651104824456, "learning_rate": 9.229735174952492e-06, "loss": 0.6568, "step": 1599 }, { "epoch": 0.2, "grad_norm": 0.8726927003821472, "learning_rate": 9.228634520967524e-06, "loss": 0.6238, "step": 1600 }, { "epoch": 0.2, "grad_norm": 0.6707846465045022, "learning_rate": 9.227533146890151e-06, "loss": 0.5032, "step": 1601 }, { "epoch": 0.2, "grad_norm": 0.9350381129189336, "learning_rate": 9.226431052907928e-06, "loss": 0.7071, "step": 1602 }, { "epoch": 0.2, "grad_norm": 0.8920358796328383, "learning_rate": 9.225328239208524e-06, "loss": 0.6769, "step": 1603 }, { "epoch": 0.2, "grad_norm": 0.8886833812133887, "learning_rate": 9.22422470597974e-06, "loss": 0.6678, "step": 1604 }, { "epoch": 0.2, "grad_norm": 0.9323716529954269, "learning_rate": 9.223120453409497e-06, "loss": 0.6494, "step": 1605 }, { "epoch": 0.2, "grad_norm": 0.928100267784525, "learning_rate": 9.222015481685838e-06, "loss": 0.6367, "step": 1606 }, { "epoch": 0.2, "grad_norm": 1.5399558746976043, "learning_rate": 9.220909790996927e-06, "loss": 0.6123, "step": 1607 }, { "epoch": 0.2, "grad_norm": 0.6509323967942054, "learning_rate": 9.219803381531052e-06, "loss": 0.585, "step": 1608 }, { "epoch": 0.2, "grad_norm": 0.9086364129417434, "learning_rate": 9.218696253476621e-06, "loss": 0.6239, "step": 1609 }, { "epoch": 0.21, "grad_norm": 0.9596573688039909, "learning_rate": 9.217588407022169e-06, "loss": 0.6381, "step": 1610 }, { "epoch": 0.21, "grad_norm": 1.232707657005479, "learning_rate": 9.21647984235635e-06, "loss": 0.636, "step": 1611 }, { "epoch": 0.21, "grad_norm": 0.6594714692610681, "learning_rate": 9.21537055966794e-06, "loss": 0.5623, "step": 1612 }, { "epoch": 0.21, "grad_norm": 0.737598094075262, "learning_rate": 9.21426055914584e-06, "loss": 0.5699, "step": 1613 }, { "epoch": 0.21, "grad_norm": 0.7689082783417887, "learning_rate": 9.21314984097907e-06, "loss": 0.5382, "step": 1614 }, { "epoch": 0.21, "grad_norm": 0.8047690448365722, "learning_rate": 9.212038405356775e-06, "loss": 0.6147, "step": 1615 }, { "epoch": 0.21, "grad_norm": 0.796643699226512, "learning_rate": 9.21092625246822e-06, "loss": 0.5642, "step": 1616 }, { "epoch": 0.21, "grad_norm": 0.8466573833017538, "learning_rate": 9.209813382502793e-06, "loss": 0.6775, "step": 1617 }, { "epoch": 0.21, "grad_norm": 0.7409986778646053, "learning_rate": 9.208699795650005e-06, "loss": 0.5849, "step": 1618 }, { "epoch": 0.21, "grad_norm": 0.9326774107754888, "learning_rate": 9.207585492099487e-06, "loss": 0.6519, "step": 1619 }, { "epoch": 0.21, "grad_norm": 0.7115929101705958, "learning_rate": 9.206470472040994e-06, "loss": 0.596, "step": 1620 }, { "epoch": 0.21, "grad_norm": 0.7739898060274144, "learning_rate": 9.205354735664402e-06, "loss": 0.5937, "step": 1621 }, { "epoch": 0.21, "grad_norm": 0.672379987028906, "learning_rate": 9.20423828315971e-06, "loss": 0.5775, "step": 1622 }, { "epoch": 0.21, "grad_norm": 0.9324137991561445, "learning_rate": 9.203121114717039e-06, "loss": 0.6453, "step": 1623 }, { "epoch": 0.21, "grad_norm": 0.7412879055921451, "learning_rate": 9.202003230526628e-06, "loss": 0.5937, "step": 1624 }, { "epoch": 0.21, "grad_norm": 0.8740019331205825, "learning_rate": 9.200884630778846e-06, "loss": 0.5968, "step": 1625 }, { "epoch": 0.21, "grad_norm": 0.970542072675399, "learning_rate": 9.199765315664174e-06, "loss": 0.6984, "step": 1626 }, { "epoch": 0.21, "grad_norm": 0.8887041774449264, "learning_rate": 9.19864528537322e-06, "loss": 0.705, "step": 1627 }, { "epoch": 0.21, "grad_norm": 0.6978549815955, "learning_rate": 9.197524540096717e-06, "loss": 0.545, "step": 1628 }, { "epoch": 0.21, "grad_norm": 0.7511206735398799, "learning_rate": 9.196403080025513e-06, "loss": 0.5219, "step": 1629 }, { "epoch": 0.21, "grad_norm": 0.7507031381515049, "learning_rate": 9.195280905350585e-06, "loss": 0.5712, "step": 1630 }, { "epoch": 0.21, "grad_norm": 1.0106114340176788, "learning_rate": 9.194158016263022e-06, "loss": 0.5972, "step": 1631 }, { "epoch": 0.21, "grad_norm": 1.0284939054733948, "learning_rate": 9.193034412954045e-06, "loss": 0.7125, "step": 1632 }, { "epoch": 0.21, "grad_norm": 1.3734633990939342, "learning_rate": 9.191910095614988e-06, "loss": 0.6419, "step": 1633 }, { "epoch": 0.21, "grad_norm": 0.7741141269209555, "learning_rate": 9.190785064437316e-06, "loss": 0.6414, "step": 1634 }, { "epoch": 0.21, "grad_norm": 0.6746760975869747, "learning_rate": 9.189659319612605e-06, "loss": 0.5929, "step": 1635 }, { "epoch": 0.21, "grad_norm": 0.7608116551985806, "learning_rate": 9.18853286133256e-06, "loss": 0.5565, "step": 1636 }, { "epoch": 0.21, "grad_norm": 0.7455855580757017, "learning_rate": 9.187405689789004e-06, "loss": 0.5991, "step": 1637 }, { "epoch": 0.21, "grad_norm": 0.972677599332281, "learning_rate": 9.186277805173883e-06, "loss": 0.65, "step": 1638 }, { "epoch": 0.21, "grad_norm": 1.0057386471776937, "learning_rate": 9.185149207679263e-06, "loss": 0.6243, "step": 1639 }, { "epoch": 0.21, "grad_norm": 1.064579559959113, "learning_rate": 9.184019897497336e-06, "loss": 0.6059, "step": 1640 }, { "epoch": 0.21, "grad_norm": 0.7814696682955395, "learning_rate": 9.182889874820407e-06, "loss": 0.559, "step": 1641 }, { "epoch": 0.21, "grad_norm": 0.9138967319158429, "learning_rate": 9.181759139840911e-06, "loss": 0.6777, "step": 1642 }, { "epoch": 0.21, "grad_norm": 0.730233423577774, "learning_rate": 9.180627692751399e-06, "loss": 0.6225, "step": 1643 }, { "epoch": 0.21, "grad_norm": 0.7141906934945305, "learning_rate": 9.179495533744543e-06, "loss": 0.5305, "step": 1644 }, { "epoch": 0.21, "grad_norm": 0.946653839003957, "learning_rate": 9.178362663013138e-06, "loss": 0.6481, "step": 1645 }, { "epoch": 0.21, "grad_norm": 0.9400709446865988, "learning_rate": 9.177229080750104e-06, "loss": 0.6749, "step": 1646 }, { "epoch": 0.21, "grad_norm": 0.7072012908197184, "learning_rate": 9.176094787148476e-06, "loss": 0.5231, "step": 1647 }, { "epoch": 0.21, "grad_norm": 0.9977195913128668, "learning_rate": 9.174959782401408e-06, "loss": 0.6293, "step": 1648 }, { "epoch": 0.21, "grad_norm": 0.7188404747338388, "learning_rate": 9.173824066702189e-06, "loss": 0.5823, "step": 1649 }, { "epoch": 0.21, "grad_norm": 0.7602720468912013, "learning_rate": 9.17268764024421e-06, "loss": 0.559, "step": 1650 }, { "epoch": 0.21, "grad_norm": 0.8279132093175969, "learning_rate": 9.171550503220999e-06, "loss": 0.6642, "step": 1651 }, { "epoch": 0.21, "grad_norm": 0.6879512022624757, "learning_rate": 9.170412655826194e-06, "loss": 0.5689, "step": 1652 }, { "epoch": 0.21, "grad_norm": 0.9531262221201743, "learning_rate": 9.169274098253563e-06, "loss": 0.6646, "step": 1653 }, { "epoch": 0.21, "grad_norm": 1.0268354778495947, "learning_rate": 9.168134830696986e-06, "loss": 0.6945, "step": 1654 }, { "epoch": 0.21, "grad_norm": 0.8427359424288263, "learning_rate": 9.16699485335047e-06, "loss": 0.6899, "step": 1655 }, { "epoch": 0.21, "grad_norm": 0.6398606680437324, "learning_rate": 9.165854166408144e-06, "loss": 0.5371, "step": 1656 }, { "epoch": 0.21, "grad_norm": 0.8065401688150998, "learning_rate": 9.164712770064254e-06, "loss": 0.6532, "step": 1657 }, { "epoch": 0.21, "grad_norm": 0.8182513753790843, "learning_rate": 9.163570664513166e-06, "loss": 0.7089, "step": 1658 }, { "epoch": 0.21, "grad_norm": 0.8535790071663967, "learning_rate": 9.162427849949367e-06, "loss": 0.6781, "step": 1659 }, { "epoch": 0.21, "grad_norm": 0.8844564779764534, "learning_rate": 9.161284326567472e-06, "loss": 0.6756, "step": 1660 }, { "epoch": 0.21, "grad_norm": 0.8595583674542, "learning_rate": 9.160140094562208e-06, "loss": 0.6069, "step": 1661 }, { "epoch": 0.21, "grad_norm": 0.6414844032512167, "learning_rate": 9.158995154128425e-06, "loss": 0.5205, "step": 1662 }, { "epoch": 0.21, "grad_norm": 0.8286912256625293, "learning_rate": 9.157849505461095e-06, "loss": 0.5599, "step": 1663 }, { "epoch": 0.21, "grad_norm": 0.6441001181128698, "learning_rate": 9.156703148755311e-06, "loss": 0.5708, "step": 1664 }, { "epoch": 0.21, "grad_norm": 0.8654583278911271, "learning_rate": 9.155556084206285e-06, "loss": 0.6391, "step": 1665 }, { "epoch": 0.21, "grad_norm": 0.6150761946345886, "learning_rate": 9.15440831200935e-06, "loss": 0.5328, "step": 1666 }, { "epoch": 0.21, "grad_norm": 1.0571055738759445, "learning_rate": 9.15325983235996e-06, "loss": 0.6307, "step": 1667 }, { "epoch": 0.21, "grad_norm": 0.9494184518754794, "learning_rate": 9.152110645453689e-06, "loss": 0.64, "step": 1668 }, { "epoch": 0.21, "grad_norm": 0.6707018124037974, "learning_rate": 9.150960751486231e-06, "loss": 0.5204, "step": 1669 }, { "epoch": 0.21, "grad_norm": 0.9007994494048571, "learning_rate": 9.149810150653401e-06, "loss": 0.707, "step": 1670 }, { "epoch": 0.21, "grad_norm": 0.6784877630337013, "learning_rate": 9.148658843151135e-06, "loss": 0.5799, "step": 1671 }, { "epoch": 0.21, "grad_norm": 0.9809801270704389, "learning_rate": 9.14750682917549e-06, "loss": 0.6248, "step": 1672 }, { "epoch": 0.21, "grad_norm": 0.7054687903373112, "learning_rate": 9.14635410892264e-06, "loss": 0.5414, "step": 1673 }, { "epoch": 0.21, "grad_norm": 1.0399084742103415, "learning_rate": 9.14520068258888e-06, "loss": 0.6032, "step": 1674 }, { "epoch": 0.21, "grad_norm": 0.8591677486606304, "learning_rate": 9.144046550370628e-06, "loss": 0.6036, "step": 1675 }, { "epoch": 0.21, "grad_norm": 0.7084094942979969, "learning_rate": 9.142891712464422e-06, "loss": 0.5596, "step": 1676 }, { "epoch": 0.21, "grad_norm": 0.7105250801983417, "learning_rate": 9.141736169066917e-06, "loss": 0.5632, "step": 1677 }, { "epoch": 0.21, "grad_norm": 0.6914877695214648, "learning_rate": 9.140579920374892e-06, "loss": 0.5886, "step": 1678 }, { "epoch": 0.21, "grad_norm": 1.2709845408500606, "learning_rate": 9.139422966585242e-06, "loss": 0.6392, "step": 1679 }, { "epoch": 0.21, "grad_norm": 0.7384326267337248, "learning_rate": 9.138265307894985e-06, "loss": 0.5864, "step": 1680 }, { "epoch": 0.21, "grad_norm": 0.6790450115939309, "learning_rate": 9.137106944501258e-06, "loss": 0.5476, "step": 1681 }, { "epoch": 0.21, "grad_norm": 0.7258679594378835, "learning_rate": 9.13594787660132e-06, "loss": 0.5925, "step": 1682 }, { "epoch": 0.21, "grad_norm": 0.8256179175482496, "learning_rate": 9.134788104392545e-06, "loss": 0.5882, "step": 1683 }, { "epoch": 0.21, "grad_norm": 0.730446393041975, "learning_rate": 9.13362762807243e-06, "loss": 0.5312, "step": 1684 }, { "epoch": 0.21, "grad_norm": 0.8232794625624785, "learning_rate": 9.132466447838598e-06, "loss": 0.6386, "step": 1685 }, { "epoch": 0.21, "grad_norm": 0.7983654532685514, "learning_rate": 9.131304563888779e-06, "loss": 0.5708, "step": 1686 }, { "epoch": 0.21, "grad_norm": 0.732272239705348, "learning_rate": 9.130141976420833e-06, "loss": 0.5916, "step": 1687 }, { "epoch": 0.22, "grad_norm": 0.7224169664842395, "learning_rate": 9.128978685632735e-06, "loss": 0.5226, "step": 1688 }, { "epoch": 0.22, "grad_norm": 1.235211970309404, "learning_rate": 9.127814691722583e-06, "loss": 0.6462, "step": 1689 }, { "epoch": 0.22, "grad_norm": 0.7541626413456836, "learning_rate": 9.126649994888592e-06, "loss": 0.6395, "step": 1690 }, { "epoch": 0.22, "grad_norm": 0.6406834135485577, "learning_rate": 9.125484595329098e-06, "loss": 0.5465, "step": 1691 }, { "epoch": 0.22, "grad_norm": 0.726974134855615, "learning_rate": 9.124318493242556e-06, "loss": 0.6037, "step": 1692 }, { "epoch": 0.22, "grad_norm": 0.704239500325948, "learning_rate": 9.123151688827542e-06, "loss": 0.5871, "step": 1693 }, { "epoch": 0.22, "grad_norm": 0.7566892783724908, "learning_rate": 9.12198418228275e-06, "loss": 0.56, "step": 1694 }, { "epoch": 0.22, "grad_norm": 0.9994032720300379, "learning_rate": 9.120815973806996e-06, "loss": 0.6398, "step": 1695 }, { "epoch": 0.22, "grad_norm": 0.8762560320159125, "learning_rate": 9.11964706359921e-06, "loss": 0.7044, "step": 1696 }, { "epoch": 0.22, "grad_norm": 0.7967810767753024, "learning_rate": 9.118477451858448e-06, "loss": 0.5665, "step": 1697 }, { "epoch": 0.22, "grad_norm": 0.7243554820040298, "learning_rate": 9.117307138783881e-06, "loss": 0.5448, "step": 1698 }, { "epoch": 0.22, "grad_norm": 1.275838034418713, "learning_rate": 9.116136124574803e-06, "loss": 0.6344, "step": 1699 }, { "epoch": 0.22, "grad_norm": 0.8811931568828208, "learning_rate": 9.114964409430624e-06, "loss": 0.6334, "step": 1700 }, { "epoch": 0.22, "grad_norm": 1.0282201244574742, "learning_rate": 9.113791993550876e-06, "loss": 0.6758, "step": 1701 }, { "epoch": 0.22, "grad_norm": 0.845582938833086, "learning_rate": 9.112618877135208e-06, "loss": 0.6672, "step": 1702 }, { "epoch": 0.22, "grad_norm": 0.7532367719484728, "learning_rate": 9.11144506038339e-06, "loss": 0.615, "step": 1703 }, { "epoch": 0.22, "grad_norm": 0.6218901306944322, "learning_rate": 9.110270543495312e-06, "loss": 0.5041, "step": 1704 }, { "epoch": 0.22, "grad_norm": 0.8728065675683571, "learning_rate": 9.10909532667098e-06, "loss": 0.6425, "step": 1705 }, { "epoch": 0.22, "grad_norm": 0.630190568494268, "learning_rate": 9.107919410110522e-06, "loss": 0.5451, "step": 1706 }, { "epoch": 0.22, "grad_norm": 0.9244379329995707, "learning_rate": 9.106742794014186e-06, "loss": 0.7089, "step": 1707 }, { "epoch": 0.22, "grad_norm": 0.7055678750403788, "learning_rate": 9.105565478582335e-06, "loss": 0.5625, "step": 1708 }, { "epoch": 0.22, "grad_norm": 0.7722422835886537, "learning_rate": 9.104387464015453e-06, "loss": 0.5857, "step": 1709 }, { "epoch": 0.22, "grad_norm": 0.805357366465741, "learning_rate": 9.103208750514147e-06, "loss": 0.6148, "step": 1710 }, { "epoch": 0.22, "grad_norm": 0.9550107373176977, "learning_rate": 9.102029338279138e-06, "loss": 0.6898, "step": 1711 }, { "epoch": 0.22, "grad_norm": 1.0599975987628432, "learning_rate": 9.100849227511266e-06, "loss": 0.6153, "step": 1712 }, { "epoch": 0.22, "grad_norm": 1.0974351460383835, "learning_rate": 9.099668418411494e-06, "loss": 0.6296, "step": 1713 }, { "epoch": 0.22, "grad_norm": 0.6993288675937175, "learning_rate": 9.0984869111809e-06, "loss": 0.5713, "step": 1714 }, { "epoch": 0.22, "grad_norm": 0.9196388164774751, "learning_rate": 9.097304706020682e-06, "loss": 0.6892, "step": 1715 }, { "epoch": 0.22, "grad_norm": 0.7456097217629195, "learning_rate": 9.096121803132158e-06, "loss": 0.5695, "step": 1716 }, { "epoch": 0.22, "grad_norm": 0.6867136241361311, "learning_rate": 9.094938202716764e-06, "loss": 0.553, "step": 1717 }, { "epoch": 0.22, "grad_norm": 0.7778179548021698, "learning_rate": 9.093753904976056e-06, "loss": 0.5442, "step": 1718 }, { "epoch": 0.22, "grad_norm": 0.81808374340776, "learning_rate": 9.092568910111705e-06, "loss": 0.619, "step": 1719 }, { "epoch": 0.22, "grad_norm": 0.9386670540754333, "learning_rate": 9.091383218325506e-06, "loss": 0.6255, "step": 1720 }, { "epoch": 0.22, "grad_norm": 0.8158233269671321, "learning_rate": 9.090196829819368e-06, "loss": 0.548, "step": 1721 }, { "epoch": 0.22, "grad_norm": 0.8769296571815728, "learning_rate": 9.08900974479532e-06, "loss": 0.6549, "step": 1722 }, { "epoch": 0.22, "grad_norm": 0.7363202803849038, "learning_rate": 9.087821963455514e-06, "loss": 0.6063, "step": 1723 }, { "epoch": 0.22, "grad_norm": 0.7527555082525598, "learning_rate": 9.086633486002213e-06, "loss": 0.608, "step": 1724 }, { "epoch": 0.22, "grad_norm": 0.7091510522164963, "learning_rate": 9.085444312637804e-06, "loss": 0.5448, "step": 1725 }, { "epoch": 0.22, "grad_norm": 0.6896993880892248, "learning_rate": 9.08425444356479e-06, "loss": 0.6033, "step": 1726 }, { "epoch": 0.22, "grad_norm": 0.9764830673685948, "learning_rate": 9.083063878985793e-06, "loss": 0.6537, "step": 1727 }, { "epoch": 0.22, "grad_norm": 0.785031296757777, "learning_rate": 9.081872619103556e-06, "loss": 0.6464, "step": 1728 }, { "epoch": 0.22, "grad_norm": 0.8211053894022589, "learning_rate": 9.080680664120935e-06, "loss": 0.6085, "step": 1729 }, { "epoch": 0.22, "grad_norm": 0.6683698084612498, "learning_rate": 9.07948801424091e-06, "loss": 0.5627, "step": 1730 }, { "epoch": 0.22, "grad_norm": 0.7623640773008702, "learning_rate": 9.078294669666577e-06, "loss": 0.5943, "step": 1731 }, { "epoch": 0.22, "grad_norm": 0.8322766288219557, "learning_rate": 9.077100630601147e-06, "loss": 0.5815, "step": 1732 }, { "epoch": 0.22, "grad_norm": 0.7214948388412612, "learning_rate": 9.075905897247955e-06, "loss": 0.5807, "step": 1733 }, { "epoch": 0.22, "grad_norm": 0.809341957344064, "learning_rate": 9.074710469810453e-06, "loss": 0.5881, "step": 1734 }, { "epoch": 0.22, "grad_norm": 0.8104278580302012, "learning_rate": 9.073514348492204e-06, "loss": 0.6027, "step": 1735 }, { "epoch": 0.22, "grad_norm": 0.7238055896556168, "learning_rate": 9.072317533496905e-06, "loss": 0.5938, "step": 1736 }, { "epoch": 0.22, "grad_norm": 0.7911169611676274, "learning_rate": 9.07112002502835e-06, "loss": 0.6824, "step": 1737 }, { "epoch": 0.22, "grad_norm": 0.7938304964870317, "learning_rate": 9.06992182329047e-06, "loss": 0.5913, "step": 1738 }, { "epoch": 0.22, "grad_norm": 0.7636631258455324, "learning_rate": 9.068722928487302e-06, "loss": 0.5965, "step": 1739 }, { "epoch": 0.22, "grad_norm": 0.6691466688006485, "learning_rate": 9.067523340823007e-06, "loss": 0.5975, "step": 1740 }, { "epoch": 0.22, "grad_norm": 1.0070475167764037, "learning_rate": 9.066323060501865e-06, "loss": 0.6221, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.9511724200450111, "learning_rate": 9.065122087728267e-06, "loss": 0.7474, "step": 1742 }, { "epoch": 0.22, "grad_norm": 0.7252248964130821, "learning_rate": 9.063920422706727e-06, "loss": 0.551, "step": 1743 }, { "epoch": 0.22, "grad_norm": 1.1211443025301424, "learning_rate": 9.062718065641875e-06, "loss": 0.6351, "step": 1744 }, { "epoch": 0.22, "grad_norm": 0.7876488197922967, "learning_rate": 9.061515016738464e-06, "loss": 0.6354, "step": 1745 }, { "epoch": 0.22, "grad_norm": 0.7531567610715258, "learning_rate": 9.060311276201356e-06, "loss": 0.571, "step": 1746 }, { "epoch": 0.22, "grad_norm": 0.8503792231372225, "learning_rate": 9.059106844235539e-06, "loss": 0.6851, "step": 1747 }, { "epoch": 0.22, "grad_norm": 0.8001217877679286, "learning_rate": 9.057901721046113e-06, "loss": 0.5749, "step": 1748 }, { "epoch": 0.22, "grad_norm": 1.0012384941941663, "learning_rate": 9.056695906838302e-06, "loss": 0.6708, "step": 1749 }, { "epoch": 0.22, "grad_norm": 0.8541094011939935, "learning_rate": 9.055489401817438e-06, "loss": 0.656, "step": 1750 }, { "epoch": 0.22, "grad_norm": 0.8357890172107632, "learning_rate": 9.054282206188978e-06, "loss": 0.6269, "step": 1751 }, { "epoch": 0.22, "grad_norm": 0.6866886328457728, "learning_rate": 9.053074320158497e-06, "loss": 0.5281, "step": 1752 }, { "epoch": 0.22, "grad_norm": 0.7532696921676845, "learning_rate": 9.051865743931683e-06, "loss": 0.5627, "step": 1753 }, { "epoch": 0.22, "grad_norm": 0.9603772805197949, "learning_rate": 9.050656477714345e-06, "loss": 0.728, "step": 1754 }, { "epoch": 0.22, "grad_norm": 0.7283825730725442, "learning_rate": 9.049446521712408e-06, "loss": 0.511, "step": 1755 }, { "epoch": 0.22, "grad_norm": 0.8273130154245885, "learning_rate": 9.048235876131917e-06, "loss": 0.5645, "step": 1756 }, { "epoch": 0.22, "grad_norm": 0.9765117143848513, "learning_rate": 9.047024541179029e-06, "loss": 0.662, "step": 1757 }, { "epoch": 0.22, "grad_norm": 0.9010999202200949, "learning_rate": 9.045812517060023e-06, "loss": 0.6461, "step": 1758 }, { "epoch": 0.22, "grad_norm": 0.8374348905093684, "learning_rate": 9.044599803981294e-06, "loss": 0.6681, "step": 1759 }, { "epoch": 0.22, "grad_norm": 0.9809866375278119, "learning_rate": 9.043386402149355e-06, "loss": 0.6289, "step": 1760 }, { "epoch": 0.22, "grad_norm": 0.8456193304173618, "learning_rate": 9.042172311770835e-06, "loss": 0.6256, "step": 1761 }, { "epoch": 0.22, "grad_norm": 0.7710525567112971, "learning_rate": 9.040957533052483e-06, "loss": 0.6517, "step": 1762 }, { "epoch": 0.22, "grad_norm": 0.7273969252066462, "learning_rate": 9.03974206620116e-06, "loss": 0.564, "step": 1763 }, { "epoch": 0.22, "grad_norm": 0.6965816596919657, "learning_rate": 9.03852591142385e-06, "loss": 0.5681, "step": 1764 }, { "epoch": 0.22, "grad_norm": 0.7580748481208671, "learning_rate": 9.03730906892765e-06, "loss": 0.5378, "step": 1765 }, { "epoch": 0.22, "grad_norm": 0.7221588641200527, "learning_rate": 9.036091538919776e-06, "loss": 0.5671, "step": 1766 }, { "epoch": 0.23, "grad_norm": 0.9371739324533304, "learning_rate": 9.03487332160756e-06, "loss": 0.6793, "step": 1767 }, { "epoch": 0.23, "grad_norm": 0.6651522889103836, "learning_rate": 9.033654417198452e-06, "loss": 0.5774, "step": 1768 }, { "epoch": 0.23, "grad_norm": 0.7734725954649293, "learning_rate": 9.03243482590002e-06, "loss": 0.5849, "step": 1769 }, { "epoch": 0.23, "grad_norm": 0.7610915536999594, "learning_rate": 9.031214547919946e-06, "loss": 0.5784, "step": 1770 }, { "epoch": 0.23, "grad_norm": 0.803952299480627, "learning_rate": 9.029993583466033e-06, "loss": 0.546, "step": 1771 }, { "epoch": 0.23, "grad_norm": 0.8000546888386627, "learning_rate": 9.028771932746195e-06, "loss": 0.5937, "step": 1772 }, { "epoch": 0.23, "grad_norm": 0.8291186462310567, "learning_rate": 9.02754959596847e-06, "loss": 0.6171, "step": 1773 }, { "epoch": 0.23, "grad_norm": 1.0146855901019032, "learning_rate": 9.026326573341006e-06, "loss": 0.6356, "step": 1774 }, { "epoch": 0.23, "grad_norm": 0.702740851301099, "learning_rate": 9.025102865072072e-06, "loss": 0.6019, "step": 1775 }, { "epoch": 0.23, "grad_norm": 0.9512756159800014, "learning_rate": 9.023878471370056e-06, "loss": 0.6472, "step": 1776 }, { "epoch": 0.23, "grad_norm": 0.9773728277386027, "learning_rate": 9.022653392443455e-06, "loss": 0.6328, "step": 1777 }, { "epoch": 0.23, "grad_norm": 0.878590570708946, "learning_rate": 9.02142762850089e-06, "loss": 0.6452, "step": 1778 }, { "epoch": 0.23, "grad_norm": 0.8192464553656723, "learning_rate": 9.020201179751094e-06, "loss": 0.6292, "step": 1779 }, { "epoch": 0.23, "grad_norm": 0.7253603418913679, "learning_rate": 9.018974046402918e-06, "loss": 0.5598, "step": 1780 }, { "epoch": 0.23, "grad_norm": 0.7347599244829282, "learning_rate": 9.017746228665332e-06, "loss": 0.5605, "step": 1781 }, { "epoch": 0.23, "grad_norm": 0.6342052185363972, "learning_rate": 9.01651772674742e-06, "loss": 0.5442, "step": 1782 }, { "epoch": 0.23, "grad_norm": 1.3426891750251602, "learning_rate": 9.015288540858384e-06, "loss": 0.6333, "step": 1783 }, { "epoch": 0.23, "grad_norm": 0.753102034929604, "learning_rate": 9.01405867120754e-06, "loss": 0.5563, "step": 1784 }, { "epoch": 0.23, "grad_norm": 0.842818485641212, "learning_rate": 9.012828118004322e-06, "loss": 0.6807, "step": 1785 }, { "epoch": 0.23, "grad_norm": 0.8012205485276382, "learning_rate": 9.011596881458282e-06, "loss": 0.5676, "step": 1786 }, { "epoch": 0.23, "grad_norm": 0.7302957077071275, "learning_rate": 9.010364961779084e-06, "loss": 0.6211, "step": 1787 }, { "epoch": 0.23, "grad_norm": 0.6700622046011556, "learning_rate": 9.009132359176514e-06, "loss": 0.5806, "step": 1788 }, { "epoch": 0.23, "grad_norm": 0.8229276054442922, "learning_rate": 9.00789907386047e-06, "loss": 0.6697, "step": 1789 }, { "epoch": 0.23, "grad_norm": 0.6765304334818132, "learning_rate": 9.006665106040967e-06, "loss": 0.5546, "step": 1790 }, { "epoch": 0.23, "grad_norm": 0.8205021457066586, "learning_rate": 9.005430455928137e-06, "loss": 0.5767, "step": 1791 }, { "epoch": 0.23, "grad_norm": 0.8226513884576943, "learning_rate": 9.004195123732229e-06, "loss": 0.6665, "step": 1792 }, { "epoch": 0.23, "grad_norm": 0.5984631525126797, "learning_rate": 9.002959109663607e-06, "loss": 0.4969, "step": 1793 }, { "epoch": 0.23, "grad_norm": 0.7540334707310535, "learning_rate": 9.001722413932749e-06, "loss": 0.5493, "step": 1794 }, { "epoch": 0.23, "grad_norm": 0.7725749445519982, "learning_rate": 9.000485036750258e-06, "loss": 0.5964, "step": 1795 }, { "epoch": 0.23, "grad_norm": 0.7516895027875931, "learning_rate": 8.99924697832684e-06, "loss": 0.5661, "step": 1796 }, { "epoch": 0.23, "grad_norm": 0.6852269325010806, "learning_rate": 8.998008238873323e-06, "loss": 0.5695, "step": 1797 }, { "epoch": 0.23, "grad_norm": 0.7228251665479638, "learning_rate": 8.996768818600657e-06, "loss": 0.5431, "step": 1798 }, { "epoch": 0.23, "grad_norm": 0.9622573445433384, "learning_rate": 8.995528717719899e-06, "loss": 0.6439, "step": 1799 }, { "epoch": 0.23, "grad_norm": 0.9910610099206466, "learning_rate": 8.994287936442226e-06, "loss": 0.6857, "step": 1800 }, { "epoch": 0.23, "grad_norm": 0.7672007423183251, "learning_rate": 8.993046474978927e-06, "loss": 0.6335, "step": 1801 }, { "epoch": 0.23, "grad_norm": 0.6598043353365445, "learning_rate": 8.991804333541414e-06, "loss": 0.5456, "step": 1802 }, { "epoch": 0.23, "grad_norm": 1.486939874109273, "learning_rate": 8.990561512341209e-06, "loss": 0.6917, "step": 1803 }, { "epoch": 0.23, "grad_norm": 0.6741282238255342, "learning_rate": 8.989318011589953e-06, "loss": 0.5372, "step": 1804 }, { "epoch": 0.23, "grad_norm": 0.9392212109917977, "learning_rate": 8.988073831499399e-06, "loss": 0.6905, "step": 1805 }, { "epoch": 0.23, "grad_norm": 0.8339190627582693, "learning_rate": 8.986828972281419e-06, "loss": 0.5738, "step": 1806 }, { "epoch": 0.23, "grad_norm": 0.7192273804772419, "learning_rate": 8.985583434148e-06, "loss": 0.5996, "step": 1807 }, { "epoch": 0.23, "grad_norm": 0.9992670727070457, "learning_rate": 8.984337217311243e-06, "loss": 0.6253, "step": 1808 }, { "epoch": 0.23, "grad_norm": 0.7980599567684804, "learning_rate": 8.983090321983366e-06, "loss": 0.6055, "step": 1809 }, { "epoch": 0.23, "grad_norm": 0.8102957500803778, "learning_rate": 8.981842748376703e-06, "loss": 0.641, "step": 1810 }, { "epoch": 0.23, "grad_norm": 1.159963884646134, "learning_rate": 8.980594496703704e-06, "loss": 0.7301, "step": 1811 }, { "epoch": 0.23, "grad_norm": 0.8683511509988018, "learning_rate": 8.97934556717693e-06, "loss": 0.6774, "step": 1812 }, { "epoch": 0.23, "grad_norm": 0.6996519858164988, "learning_rate": 8.978095960009063e-06, "loss": 0.5469, "step": 1813 }, { "epoch": 0.23, "grad_norm": 0.7021957454306138, "learning_rate": 8.976845675412898e-06, "loss": 0.5955, "step": 1814 }, { "epoch": 0.23, "grad_norm": 0.8300163218034267, "learning_rate": 8.975594713601344e-06, "loss": 0.6219, "step": 1815 }, { "epoch": 0.23, "grad_norm": 0.8599336208706447, "learning_rate": 8.974343074787428e-06, "loss": 0.6434, "step": 1816 }, { "epoch": 0.23, "grad_norm": 0.7045302061021801, "learning_rate": 8.973090759184292e-06, "loss": 0.5583, "step": 1817 }, { "epoch": 0.23, "grad_norm": 0.8920187062229696, "learning_rate": 8.971837767005189e-06, "loss": 0.6269, "step": 1818 }, { "epoch": 0.23, "grad_norm": 0.776925122013056, "learning_rate": 8.970584098463495e-06, "loss": 0.5931, "step": 1819 }, { "epoch": 0.23, "grad_norm": 0.7999708058542344, "learning_rate": 8.969329753772694e-06, "loss": 0.5413, "step": 1820 }, { "epoch": 0.23, "grad_norm": 0.9039186862186455, "learning_rate": 8.968074733146386e-06, "loss": 0.6863, "step": 1821 }, { "epoch": 0.23, "grad_norm": 0.8842326024923454, "learning_rate": 8.966819036798292e-06, "loss": 0.6523, "step": 1822 }, { "epoch": 0.23, "grad_norm": 0.8641127840864393, "learning_rate": 8.96556266494224e-06, "loss": 0.6172, "step": 1823 }, { "epoch": 0.23, "grad_norm": 0.7052621432635187, "learning_rate": 8.96430561779218e-06, "loss": 0.5831, "step": 1824 }, { "epoch": 0.23, "grad_norm": 0.803996299174675, "learning_rate": 8.963047895562174e-06, "loss": 0.6715, "step": 1825 }, { "epoch": 0.23, "grad_norm": 0.82726587738202, "learning_rate": 8.961789498466398e-06, "loss": 0.634, "step": 1826 }, { "epoch": 0.23, "grad_norm": 0.7321807215769772, "learning_rate": 8.960530426719143e-06, "loss": 0.55, "step": 1827 }, { "epoch": 0.23, "grad_norm": 0.8171572146780561, "learning_rate": 8.959270680534817e-06, "loss": 0.6361, "step": 1828 }, { "epoch": 0.23, "grad_norm": 0.7719496052869266, "learning_rate": 8.958010260127943e-06, "loss": 0.5752, "step": 1829 }, { "epoch": 0.23, "grad_norm": 0.6335785748131367, "learning_rate": 8.956749165713154e-06, "loss": 0.5056, "step": 1830 }, { "epoch": 0.23, "grad_norm": 0.7282550491907417, "learning_rate": 8.955487397505203e-06, "loss": 0.5757, "step": 1831 }, { "epoch": 0.23, "grad_norm": 0.7236594269106582, "learning_rate": 8.954224955718957e-06, "loss": 0.5752, "step": 1832 }, { "epoch": 0.23, "grad_norm": 1.784011160239139, "learning_rate": 8.952961840569397e-06, "loss": 0.635, "step": 1833 }, { "epoch": 0.23, "grad_norm": 0.8652267571371319, "learning_rate": 8.951698052271617e-06, "loss": 0.6014, "step": 1834 }, { "epoch": 0.23, "grad_norm": 0.7044958461739276, "learning_rate": 8.950433591040826e-06, "loss": 0.5222, "step": 1835 }, { "epoch": 0.23, "grad_norm": 0.7727336242658484, "learning_rate": 8.94916845709235e-06, "loss": 0.5573, "step": 1836 }, { "epoch": 0.23, "grad_norm": 0.8327714610213779, "learning_rate": 8.94790265064163e-06, "loss": 0.6279, "step": 1837 }, { "epoch": 0.23, "grad_norm": 0.8263280956362629, "learning_rate": 8.946636171904214e-06, "loss": 0.6368, "step": 1838 }, { "epoch": 0.23, "grad_norm": 0.7042401870432945, "learning_rate": 8.945369021095775e-06, "loss": 0.5422, "step": 1839 }, { "epoch": 0.23, "grad_norm": 0.7334952707005559, "learning_rate": 8.944101198432095e-06, "loss": 0.553, "step": 1840 }, { "epoch": 0.23, "grad_norm": 0.8826243897435279, "learning_rate": 8.94283270412907e-06, "loss": 0.6735, "step": 1841 }, { "epoch": 0.23, "grad_norm": 0.651128765798537, "learning_rate": 8.941563538402711e-06, "loss": 0.525, "step": 1842 }, { "epoch": 0.23, "grad_norm": 0.6543518611513667, "learning_rate": 8.940293701469145e-06, "loss": 0.4693, "step": 1843 }, { "epoch": 0.23, "grad_norm": 0.7884774038528798, "learning_rate": 8.939023193544611e-06, "loss": 0.6058, "step": 1844 }, { "epoch": 0.24, "grad_norm": 0.7757565788220621, "learning_rate": 8.937752014845465e-06, "loss": 0.5616, "step": 1845 }, { "epoch": 0.24, "grad_norm": 0.9395952151613353, "learning_rate": 8.936480165588174e-06, "loss": 0.6875, "step": 1846 }, { "epoch": 0.24, "grad_norm": 0.830597098331696, "learning_rate": 8.935207645989318e-06, "loss": 0.6265, "step": 1847 }, { "epoch": 0.24, "grad_norm": 0.7788980540091662, "learning_rate": 8.933934456265599e-06, "loss": 0.5913, "step": 1848 }, { "epoch": 0.24, "grad_norm": 0.8883618040930412, "learning_rate": 8.932660596633824e-06, "loss": 0.7046, "step": 1849 }, { "epoch": 0.24, "grad_norm": 0.7456816563417507, "learning_rate": 8.931386067310919e-06, "loss": 0.5248, "step": 1850 }, { "epoch": 0.24, "grad_norm": 0.8936548297518376, "learning_rate": 8.930110868513927e-06, "loss": 0.6754, "step": 1851 }, { "epoch": 0.24, "grad_norm": 0.7254390140125193, "learning_rate": 8.928835000459994e-06, "loss": 0.556, "step": 1852 }, { "epoch": 0.24, "grad_norm": 0.8332523834069967, "learning_rate": 8.927558463366393e-06, "loss": 0.621, "step": 1853 }, { "epoch": 0.24, "grad_norm": 0.7278124760817184, "learning_rate": 8.9262812574505e-06, "loss": 0.5773, "step": 1854 }, { "epoch": 0.24, "grad_norm": 0.8343111125701007, "learning_rate": 8.925003382929816e-06, "loss": 0.6167, "step": 1855 }, { "epoch": 0.24, "grad_norm": 0.7610900922685739, "learning_rate": 8.923724840021945e-06, "loss": 0.4841, "step": 1856 }, { "epoch": 0.24, "grad_norm": 0.8924361275608047, "learning_rate": 8.92244562894461e-06, "loss": 0.6835, "step": 1857 }, { "epoch": 0.24, "grad_norm": 0.9062770758503514, "learning_rate": 8.921165749915647e-06, "loss": 0.7286, "step": 1858 }, { "epoch": 0.24, "grad_norm": 0.9391657111707795, "learning_rate": 8.919885203153012e-06, "loss": 0.7292, "step": 1859 }, { "epoch": 0.24, "grad_norm": 0.691653049080268, "learning_rate": 8.918603988874758e-06, "loss": 0.5435, "step": 1860 }, { "epoch": 0.24, "grad_norm": 0.6736304768430776, "learning_rate": 8.917322107299073e-06, "loss": 0.5695, "step": 1861 }, { "epoch": 0.24, "grad_norm": 1.079970504137458, "learning_rate": 8.916039558644244e-06, "loss": 0.7181, "step": 1862 }, { "epoch": 0.24, "grad_norm": 0.7031316216922165, "learning_rate": 8.91475634312867e-06, "loss": 0.5656, "step": 1863 }, { "epoch": 0.24, "grad_norm": 0.970306401055872, "learning_rate": 8.913472460970878e-06, "loss": 0.6201, "step": 1864 }, { "epoch": 0.24, "grad_norm": 0.7128104168366728, "learning_rate": 8.912187912389496e-06, "loss": 0.5623, "step": 1865 }, { "epoch": 0.24, "grad_norm": 0.7749996497785987, "learning_rate": 8.91090269760327e-06, "loss": 0.6453, "step": 1866 }, { "epoch": 0.24, "grad_norm": 0.830955404676633, "learning_rate": 8.909616816831056e-06, "loss": 0.6385, "step": 1867 }, { "epoch": 0.24, "grad_norm": 0.9974152088693675, "learning_rate": 8.90833027029183e-06, "loss": 0.6511, "step": 1868 }, { "epoch": 0.24, "grad_norm": 0.7839959446154744, "learning_rate": 8.907043058204674e-06, "loss": 0.5792, "step": 1869 }, { "epoch": 0.24, "grad_norm": 0.6862329878214277, "learning_rate": 8.905755180788788e-06, "loss": 0.5807, "step": 1870 }, { "epoch": 0.24, "grad_norm": 2.30580512635051, "learning_rate": 8.904466638263488e-06, "loss": 0.6334, "step": 1871 }, { "epoch": 0.24, "grad_norm": 0.8340907851346976, "learning_rate": 8.903177430848193e-06, "loss": 0.6501, "step": 1872 }, { "epoch": 0.24, "grad_norm": 0.72020565012702, "learning_rate": 8.901887558762446e-06, "loss": 0.5641, "step": 1873 }, { "epoch": 0.24, "grad_norm": 0.8439023503821602, "learning_rate": 8.900597022225896e-06, "loss": 0.5621, "step": 1874 }, { "epoch": 0.24, "grad_norm": 0.770583617352573, "learning_rate": 8.89930582145831e-06, "loss": 0.5813, "step": 1875 }, { "epoch": 0.24, "grad_norm": 0.8259131544552497, "learning_rate": 8.898013956679566e-06, "loss": 0.5932, "step": 1876 }, { "epoch": 0.24, "grad_norm": 0.7522258289376651, "learning_rate": 8.896721428109654e-06, "loss": 0.5499, "step": 1877 }, { "epoch": 0.24, "grad_norm": 0.6484770788920831, "learning_rate": 8.895428235968677e-06, "loss": 0.5207, "step": 1878 }, { "epoch": 0.24, "grad_norm": 0.9698414328940885, "learning_rate": 8.894134380476856e-06, "loss": 0.6768, "step": 1879 }, { "epoch": 0.24, "grad_norm": 0.7966621324568566, "learning_rate": 8.892839861854517e-06, "loss": 0.6589, "step": 1880 }, { "epoch": 0.24, "grad_norm": 0.7419699525830253, "learning_rate": 8.891544680322104e-06, "loss": 0.5834, "step": 1881 }, { "epoch": 0.24, "grad_norm": 0.902361146062035, "learning_rate": 8.890248836100177e-06, "loss": 0.6265, "step": 1882 }, { "epoch": 0.24, "grad_norm": 0.749160813780667, "learning_rate": 8.8889523294094e-06, "loss": 0.598, "step": 1883 }, { "epoch": 0.24, "grad_norm": 1.0104706830256829, "learning_rate": 8.887655160470556e-06, "loss": 0.7211, "step": 1884 }, { "epoch": 0.24, "grad_norm": 0.8059422574537072, "learning_rate": 8.886357329504538e-06, "loss": 0.5739, "step": 1885 }, { "epoch": 0.24, "grad_norm": 0.9080984138864188, "learning_rate": 8.885058836732357e-06, "loss": 0.6633, "step": 1886 }, { "epoch": 0.24, "grad_norm": 0.8006087752635738, "learning_rate": 8.883759682375128e-06, "loss": 0.6584, "step": 1887 }, { "epoch": 0.24, "grad_norm": 0.8747630444494514, "learning_rate": 8.882459866654087e-06, "loss": 0.6383, "step": 1888 }, { "epoch": 0.24, "grad_norm": 0.7102603370532257, "learning_rate": 8.88115938979058e-06, "loss": 0.5734, "step": 1889 }, { "epoch": 0.24, "grad_norm": 0.6597827751743804, "learning_rate": 8.879858252006059e-06, "loss": 0.5551, "step": 1890 }, { "epoch": 0.24, "grad_norm": 0.8325595763530709, "learning_rate": 8.8785564535221e-06, "loss": 0.6503, "step": 1891 }, { "epoch": 0.24, "grad_norm": 0.7679174860187776, "learning_rate": 8.877253994560381e-06, "loss": 0.5856, "step": 1892 }, { "epoch": 0.24, "grad_norm": 1.0062543563982924, "learning_rate": 8.875950875342702e-06, "loss": 0.705, "step": 1893 }, { "epoch": 0.24, "grad_norm": 0.8122397657471703, "learning_rate": 8.87464709609097e-06, "loss": 0.6204, "step": 1894 }, { "epoch": 0.24, "grad_norm": 0.9111254883782234, "learning_rate": 8.8733426570272e-06, "loss": 0.6752, "step": 1895 }, { "epoch": 0.24, "grad_norm": 0.7652815044790452, "learning_rate": 8.872037558373528e-06, "loss": 0.5467, "step": 1896 }, { "epoch": 0.24, "grad_norm": 0.6484927033413499, "learning_rate": 8.870731800352199e-06, "loss": 0.5948, "step": 1897 }, { "epoch": 0.24, "grad_norm": 0.9539929020240348, "learning_rate": 8.869425383185571e-06, "loss": 0.7258, "step": 1898 }, { "epoch": 0.24, "grad_norm": 0.756187742469807, "learning_rate": 8.86811830709611e-06, "loss": 0.5285, "step": 1899 }, { "epoch": 0.24, "grad_norm": 0.7696346106774211, "learning_rate": 8.866810572306398e-06, "loss": 0.5931, "step": 1900 }, { "epoch": 0.24, "grad_norm": 0.6536423669380937, "learning_rate": 8.865502179039132e-06, "loss": 0.506, "step": 1901 }, { "epoch": 0.24, "grad_norm": 0.706241097957143, "learning_rate": 8.864193127517114e-06, "loss": 0.5268, "step": 1902 }, { "epoch": 0.24, "grad_norm": 0.6289214904022691, "learning_rate": 8.862883417963264e-06, "loss": 0.5271, "step": 1903 }, { "epoch": 0.24, "grad_norm": 0.6635196321111894, "learning_rate": 8.861573050600611e-06, "loss": 0.5283, "step": 1904 }, { "epoch": 0.24, "grad_norm": 0.7934363408931292, "learning_rate": 8.860262025652296e-06, "loss": 0.6488, "step": 1905 }, { "epoch": 0.24, "grad_norm": 1.0827572303624151, "learning_rate": 8.858950343341574e-06, "loss": 0.61, "step": 1906 }, { "epoch": 0.24, "grad_norm": 0.8718578971625196, "learning_rate": 8.857638003891812e-06, "loss": 0.6515, "step": 1907 }, { "epoch": 0.24, "grad_norm": 0.7607392679233425, "learning_rate": 8.856325007526486e-06, "loss": 0.594, "step": 1908 }, { "epoch": 0.24, "grad_norm": 0.6656428915012981, "learning_rate": 8.855011354469186e-06, "loss": 0.5829, "step": 1909 }, { "epoch": 0.24, "grad_norm": 0.8437637996622123, "learning_rate": 8.853697044943614e-06, "loss": 0.6929, "step": 1910 }, { "epoch": 0.24, "grad_norm": 0.6464399577651874, "learning_rate": 8.852382079173583e-06, "loss": 0.5059, "step": 1911 }, { "epoch": 0.24, "grad_norm": 1.0195887543999693, "learning_rate": 8.851066457383017e-06, "loss": 0.672, "step": 1912 }, { "epoch": 0.24, "grad_norm": 0.8240469623607751, "learning_rate": 8.849750179795953e-06, "loss": 0.6511, "step": 1913 }, { "epoch": 0.24, "grad_norm": 0.8413518626973495, "learning_rate": 8.848433246636541e-06, "loss": 0.5762, "step": 1914 }, { "epoch": 0.24, "grad_norm": 0.9879940685040388, "learning_rate": 8.84711565812904e-06, "loss": 0.6609, "step": 1915 }, { "epoch": 0.24, "grad_norm": 1.0832942220529356, "learning_rate": 8.84579741449782e-06, "loss": 0.6648, "step": 1916 }, { "epoch": 0.24, "grad_norm": 0.9730298180017065, "learning_rate": 8.844478515967367e-06, "loss": 0.6809, "step": 1917 }, { "epoch": 0.24, "grad_norm": 0.8061031482338935, "learning_rate": 8.843158962762273e-06, "loss": 0.6742, "step": 1918 }, { "epoch": 0.24, "grad_norm": 0.9104671072644314, "learning_rate": 8.841838755107246e-06, "loss": 0.6817, "step": 1919 }, { "epoch": 0.24, "grad_norm": 0.7060884077682582, "learning_rate": 8.840517893227101e-06, "loss": 0.5717, "step": 1920 }, { "epoch": 0.24, "grad_norm": 0.7810269809370137, "learning_rate": 8.839196377346772e-06, "loss": 0.5964, "step": 1921 }, { "epoch": 0.24, "grad_norm": 0.9504857012050918, "learning_rate": 8.837874207691296e-06, "loss": 0.6704, "step": 1922 }, { "epoch": 0.24, "grad_norm": 0.7884607914060926, "learning_rate": 8.836551384485823e-06, "loss": 0.6114, "step": 1923 }, { "epoch": 0.25, "grad_norm": 0.8739752002779261, "learning_rate": 8.83522790795562e-06, "loss": 0.6485, "step": 1924 }, { "epoch": 0.25, "grad_norm": 0.867517280808017, "learning_rate": 8.833903778326058e-06, "loss": 0.6386, "step": 1925 }, { "epoch": 0.25, "grad_norm": 0.8877451867182851, "learning_rate": 8.832578995822625e-06, "loss": 0.5696, "step": 1926 }, { "epoch": 0.25, "grad_norm": 0.7559521946462631, "learning_rate": 8.831253560670916e-06, "loss": 0.6256, "step": 1927 }, { "epoch": 0.25, "grad_norm": 0.6354190981571656, "learning_rate": 8.82992747309664e-06, "loss": 0.5828, "step": 1928 }, { "epoch": 0.25, "grad_norm": 0.7795912661459384, "learning_rate": 8.828600733325614e-06, "loss": 0.5932, "step": 1929 }, { "epoch": 0.25, "grad_norm": 0.7028386677922696, "learning_rate": 8.827273341583772e-06, "loss": 0.5791, "step": 1930 }, { "epoch": 0.25, "grad_norm": 0.6564266314487655, "learning_rate": 8.825945298097148e-06, "loss": 0.5675, "step": 1931 }, { "epoch": 0.25, "grad_norm": 0.7149073789063699, "learning_rate": 8.8246166030919e-06, "loss": 0.4908, "step": 1932 }, { "epoch": 0.25, "grad_norm": 0.8483383277559516, "learning_rate": 8.823287256794291e-06, "loss": 0.5936, "step": 1933 }, { "epoch": 0.25, "grad_norm": 0.9041161346428437, "learning_rate": 8.82195725943069e-06, "loss": 0.653, "step": 1934 }, { "epoch": 0.25, "grad_norm": 0.7650128872232571, "learning_rate": 8.820626611227585e-06, "loss": 0.5333, "step": 1935 }, { "epoch": 0.25, "grad_norm": 0.89738252844998, "learning_rate": 8.819295312411574e-06, "loss": 0.6814, "step": 1936 }, { "epoch": 0.25, "grad_norm": 0.9622346150592043, "learning_rate": 8.817963363209357e-06, "loss": 0.6191, "step": 1937 }, { "epoch": 0.25, "grad_norm": 0.7158658712391445, "learning_rate": 8.816630763847756e-06, "loss": 0.5994, "step": 1938 }, { "epoch": 0.25, "grad_norm": 0.9612611211488961, "learning_rate": 8.815297514553698e-06, "loss": 0.6603, "step": 1939 }, { "epoch": 0.25, "grad_norm": 0.8063463112974308, "learning_rate": 8.81396361555422e-06, "loss": 0.6003, "step": 1940 }, { "epoch": 0.25, "grad_norm": 0.6659642173391825, "learning_rate": 8.81262906707647e-06, "loss": 0.5648, "step": 1941 }, { "epoch": 0.25, "grad_norm": 0.7847860001456343, "learning_rate": 8.811293869347712e-06, "loss": 0.5353, "step": 1942 }, { "epoch": 0.25, "grad_norm": 0.8425879345472, "learning_rate": 8.809958022595312e-06, "loss": 0.6594, "step": 1943 }, { "epoch": 0.25, "grad_norm": 0.8463724987506398, "learning_rate": 8.808621527046754e-06, "loss": 0.6396, "step": 1944 }, { "epoch": 0.25, "grad_norm": 0.6834593236146296, "learning_rate": 8.807284382929629e-06, "loss": 0.5694, "step": 1945 }, { "epoch": 0.25, "grad_norm": 0.8389541406839688, "learning_rate": 8.805946590471635e-06, "loss": 0.5769, "step": 1946 }, { "epoch": 0.25, "grad_norm": 0.6989983001587766, "learning_rate": 8.804608149900588e-06, "loss": 0.6015, "step": 1947 }, { "epoch": 0.25, "grad_norm": 0.7093666612422437, "learning_rate": 8.80326906144441e-06, "loss": 0.5425, "step": 1948 }, { "epoch": 0.25, "grad_norm": 0.6224127019888014, "learning_rate": 8.80192932533113e-06, "loss": 0.5313, "step": 1949 }, { "epoch": 0.25, "grad_norm": 0.6918084500758087, "learning_rate": 8.800588941788897e-06, "loss": 0.5208, "step": 1950 }, { "epoch": 0.25, "grad_norm": 0.7703729385959307, "learning_rate": 8.79924791104596e-06, "loss": 0.5499, "step": 1951 }, { "epoch": 0.25, "grad_norm": 0.7450610941849798, "learning_rate": 8.797906233330686e-06, "loss": 0.6096, "step": 1952 }, { "epoch": 0.25, "grad_norm": 0.928819808935517, "learning_rate": 8.796563908871545e-06, "loss": 0.6055, "step": 1953 }, { "epoch": 0.25, "grad_norm": 0.6956657056066134, "learning_rate": 8.795220937897122e-06, "loss": 0.5585, "step": 1954 }, { "epoch": 0.25, "grad_norm": 0.6905355248613142, "learning_rate": 8.793877320636114e-06, "loss": 0.6038, "step": 1955 }, { "epoch": 0.25, "grad_norm": 0.7962590954490573, "learning_rate": 8.792533057317321e-06, "loss": 0.6896, "step": 1956 }, { "epoch": 0.25, "grad_norm": 1.0255821883870986, "learning_rate": 8.79118814816966e-06, "loss": 0.6259, "step": 1957 }, { "epoch": 0.25, "grad_norm": 1.1322157432206734, "learning_rate": 8.789842593422152e-06, "loss": 0.6198, "step": 1958 }, { "epoch": 0.25, "grad_norm": 0.7255377115872316, "learning_rate": 8.788496393303933e-06, "loss": 0.6005, "step": 1959 }, { "epoch": 0.25, "grad_norm": 0.7339380270816392, "learning_rate": 8.787149548044247e-06, "loss": 0.5793, "step": 1960 }, { "epoch": 0.25, "grad_norm": 0.7054192692519413, "learning_rate": 8.785802057872447e-06, "loss": 0.5719, "step": 1961 }, { "epoch": 0.25, "grad_norm": 1.3124111360469808, "learning_rate": 8.784453923017996e-06, "loss": 0.6837, "step": 1962 }, { "epoch": 0.25, "grad_norm": 0.9634097673238025, "learning_rate": 8.783105143710469e-06, "loss": 0.6715, "step": 1963 }, { "epoch": 0.25, "grad_norm": 0.8045861810672543, "learning_rate": 8.781755720179548e-06, "loss": 0.5531, "step": 1964 }, { "epoch": 0.25, "grad_norm": 0.7543958285659339, "learning_rate": 8.780405652655027e-06, "loss": 0.5886, "step": 1965 }, { "epoch": 0.25, "grad_norm": 0.9289003264134142, "learning_rate": 8.779054941366805e-06, "loss": 0.6553, "step": 1966 }, { "epoch": 0.25, "grad_norm": 0.827454154354535, "learning_rate": 8.777703586544897e-06, "loss": 0.6391, "step": 1967 }, { "epoch": 0.25, "grad_norm": 0.9853509429499995, "learning_rate": 8.776351588419425e-06, "loss": 0.6532, "step": 1968 }, { "epoch": 0.25, "grad_norm": 0.6937018570118536, "learning_rate": 8.774998947220619e-06, "loss": 0.5313, "step": 1969 }, { "epoch": 0.25, "grad_norm": 0.9342839057222853, "learning_rate": 8.77364566317882e-06, "loss": 0.6544, "step": 1970 }, { "epoch": 0.25, "grad_norm": 0.6729214109393628, "learning_rate": 8.772291736524478e-06, "loss": 0.5335, "step": 1971 }, { "epoch": 0.25, "grad_norm": 0.8305344686297439, "learning_rate": 8.770937167488154e-06, "loss": 0.598, "step": 1972 }, { "epoch": 0.25, "grad_norm": 0.6595802411010885, "learning_rate": 8.769581956300512e-06, "loss": 0.4895, "step": 1973 }, { "epoch": 0.25, "grad_norm": 0.8545399367197936, "learning_rate": 8.768226103192336e-06, "loss": 0.6485, "step": 1974 }, { "epoch": 0.25, "grad_norm": 0.7915246781934338, "learning_rate": 8.766869608394512e-06, "loss": 0.5833, "step": 1975 }, { "epoch": 0.25, "grad_norm": 0.93530170660416, "learning_rate": 8.765512472138037e-06, "loss": 0.6544, "step": 1976 }, { "epoch": 0.25, "grad_norm": 0.8310656400882053, "learning_rate": 8.764154694654016e-06, "loss": 0.6418, "step": 1977 }, { "epoch": 0.25, "grad_norm": 0.7014259729144102, "learning_rate": 8.762796276173667e-06, "loss": 0.5694, "step": 1978 }, { "epoch": 0.25, "grad_norm": 1.0598943972449064, "learning_rate": 8.76143721692831e-06, "loss": 0.6319, "step": 1979 }, { "epoch": 0.25, "grad_norm": 0.6898015713549526, "learning_rate": 8.760077517149384e-06, "loss": 0.5441, "step": 1980 }, { "epoch": 0.25, "grad_norm": 0.8634276901857205, "learning_rate": 8.758717177068427e-06, "loss": 0.62, "step": 1981 }, { "epoch": 0.25, "grad_norm": 0.6935936183248117, "learning_rate": 8.757356196917095e-06, "loss": 0.5618, "step": 1982 }, { "epoch": 0.25, "grad_norm": 0.9987592371884148, "learning_rate": 8.755994576927145e-06, "loss": 0.6239, "step": 1983 }, { "epoch": 0.25, "grad_norm": 0.7415630301483688, "learning_rate": 8.754632317330448e-06, "loss": 0.582, "step": 1984 }, { "epoch": 0.25, "grad_norm": 0.8938029349833673, "learning_rate": 8.753269418358983e-06, "loss": 0.6617, "step": 1985 }, { "epoch": 0.25, "grad_norm": 0.8725319532925707, "learning_rate": 8.751905880244837e-06, "loss": 0.5492, "step": 1986 }, { "epoch": 0.25, "grad_norm": 0.7396266314568015, "learning_rate": 8.750541703220208e-06, "loss": 0.5339, "step": 1987 }, { "epoch": 0.25, "grad_norm": 0.809179527782143, "learning_rate": 8.749176887517399e-06, "loss": 0.6409, "step": 1988 }, { "epoch": 0.25, "grad_norm": 1.0185237417094761, "learning_rate": 8.747811433368825e-06, "loss": 0.6568, "step": 1989 }, { "epoch": 0.25, "grad_norm": 0.8025513635725298, "learning_rate": 8.746445341007009e-06, "loss": 0.6102, "step": 1990 }, { "epoch": 0.25, "grad_norm": 1.071151409477144, "learning_rate": 8.745078610664578e-06, "loss": 0.7174, "step": 1991 }, { "epoch": 0.25, "grad_norm": 0.7697855789527388, "learning_rate": 8.74371124257428e-06, "loss": 0.5793, "step": 1992 }, { "epoch": 0.25, "grad_norm": 0.6946729035521078, "learning_rate": 8.742343236968956e-06, "loss": 0.5093, "step": 1993 }, { "epoch": 0.25, "grad_norm": 1.1491730457924645, "learning_rate": 8.740974594081569e-06, "loss": 0.6336, "step": 1994 }, { "epoch": 0.25, "grad_norm": 0.7804596962556815, "learning_rate": 8.73960531414518e-06, "loss": 0.6033, "step": 1995 }, { "epoch": 0.25, "grad_norm": 0.7455237343297116, "learning_rate": 8.738235397392964e-06, "loss": 0.563, "step": 1996 }, { "epoch": 0.25, "grad_norm": 0.6483947551739727, "learning_rate": 8.736864844058206e-06, "loss": 0.5164, "step": 1997 }, { "epoch": 0.25, "grad_norm": 0.9117178337068256, "learning_rate": 8.735493654374296e-06, "loss": 0.6814, "step": 1998 }, { "epoch": 0.25, "grad_norm": 0.8589054560981413, "learning_rate": 8.734121828574731e-06, "loss": 0.5597, "step": 1999 }, { "epoch": 0.25, "grad_norm": 0.8987904440844824, "learning_rate": 8.73274936689312e-06, "loss": 0.7028, "step": 2000 }, { "epoch": 0.25, "grad_norm": 1.1207140563730151, "learning_rate": 8.73137626956318e-06, "loss": 0.6755, "step": 2001 }, { "epoch": 0.26, "grad_norm": 0.6509964449240633, "learning_rate": 8.730002536818736e-06, "loss": 0.5016, "step": 2002 }, { "epoch": 0.26, "grad_norm": 0.7609586453415649, "learning_rate": 8.728628168893717e-06, "loss": 0.5753, "step": 2003 }, { "epoch": 0.26, "grad_norm": 1.0883921733033437, "learning_rate": 8.727253166022168e-06, "loss": 0.6415, "step": 2004 }, { "epoch": 0.26, "grad_norm": 0.8460363062499621, "learning_rate": 8.725877528438234e-06, "loss": 0.6809, "step": 2005 }, { "epoch": 0.26, "grad_norm": 0.8701782027194154, "learning_rate": 8.72450125637617e-06, "loss": 0.6428, "step": 2006 }, { "epoch": 0.26, "grad_norm": 0.7733324420419228, "learning_rate": 8.723124350070347e-06, "loss": 0.5572, "step": 2007 }, { "epoch": 0.26, "grad_norm": 0.7843190467587263, "learning_rate": 8.721746809755235e-06, "loss": 0.5808, "step": 2008 }, { "epoch": 0.26, "grad_norm": 0.6977752975021547, "learning_rate": 8.72036863566541e-06, "loss": 0.5221, "step": 2009 }, { "epoch": 0.26, "grad_norm": 0.6500606511990463, "learning_rate": 8.71898982803557e-06, "loss": 0.5569, "step": 2010 }, { "epoch": 0.26, "grad_norm": 0.8051197501810271, "learning_rate": 8.717610387100501e-06, "loss": 0.5888, "step": 2011 }, { "epoch": 0.26, "grad_norm": 1.1629330578255817, "learning_rate": 8.716230313095116e-06, "loss": 0.64, "step": 2012 }, { "epoch": 0.26, "grad_norm": 1.0711681351535367, "learning_rate": 8.714849606254424e-06, "loss": 0.6834, "step": 2013 }, { "epoch": 0.26, "grad_norm": 0.6166175941373604, "learning_rate": 8.713468266813545e-06, "loss": 0.5019, "step": 2014 }, { "epoch": 0.26, "grad_norm": 0.9143238490482865, "learning_rate": 8.712086295007707e-06, "loss": 0.6894, "step": 2015 }, { "epoch": 0.26, "grad_norm": 0.7229066480285856, "learning_rate": 8.710703691072245e-06, "loss": 0.5706, "step": 2016 }, { "epoch": 0.26, "grad_norm": 0.9226781049553117, "learning_rate": 8.7093204552426e-06, "loss": 0.6209, "step": 2017 }, { "epoch": 0.26, "grad_norm": 0.765747774540585, "learning_rate": 8.707936587754328e-06, "loss": 0.6107, "step": 2018 }, { "epoch": 0.26, "grad_norm": 1.0083131241533017, "learning_rate": 8.706552088843083e-06, "loss": 0.6685, "step": 2019 }, { "epoch": 0.26, "grad_norm": 0.7511748652686437, "learning_rate": 8.70516695874463e-06, "loss": 0.5829, "step": 2020 }, { "epoch": 0.26, "grad_norm": 0.7984665351737803, "learning_rate": 8.703781197694846e-06, "loss": 0.5806, "step": 2021 }, { "epoch": 0.26, "grad_norm": 0.795803786220064, "learning_rate": 8.70239480592971e-06, "loss": 0.6181, "step": 2022 }, { "epoch": 0.26, "grad_norm": 0.8257924125457154, "learning_rate": 8.70100778368531e-06, "loss": 0.6598, "step": 2023 }, { "epoch": 0.26, "grad_norm": 0.9263239734883454, "learning_rate": 8.699620131197841e-06, "loss": 0.717, "step": 2024 }, { "epoch": 0.26, "grad_norm": 0.876756335859248, "learning_rate": 8.698231848703607e-06, "loss": 0.6539, "step": 2025 }, { "epoch": 0.26, "grad_norm": 0.6942243270989207, "learning_rate": 8.696842936439014e-06, "loss": 0.5172, "step": 2026 }, { "epoch": 0.26, "grad_norm": 0.8696010987635775, "learning_rate": 8.695453394640585e-06, "loss": 0.5492, "step": 2027 }, { "epoch": 0.26, "grad_norm": 0.9299807553823802, "learning_rate": 8.69406322354494e-06, "loss": 0.5929, "step": 2028 }, { "epoch": 0.26, "grad_norm": 0.6878120117457226, "learning_rate": 8.692672423388815e-06, "loss": 0.5435, "step": 2029 }, { "epoch": 0.26, "grad_norm": 0.9066210113148992, "learning_rate": 8.691280994409044e-06, "loss": 0.7152, "step": 2030 }, { "epoch": 0.26, "grad_norm": 0.6301839089769938, "learning_rate": 8.689888936842577e-06, "loss": 0.536, "step": 2031 }, { "epoch": 0.26, "grad_norm": 0.7807011748418788, "learning_rate": 8.688496250926462e-06, "loss": 0.5718, "step": 2032 }, { "epoch": 0.26, "grad_norm": 0.8825215192963675, "learning_rate": 8.687102936897865e-06, "loss": 0.6461, "step": 2033 }, { "epoch": 0.26, "grad_norm": 0.8887339265334426, "learning_rate": 8.685708994994049e-06, "loss": 0.6902, "step": 2034 }, { "epoch": 0.26, "grad_norm": 0.8185013271988214, "learning_rate": 8.684314425452387e-06, "loss": 0.606, "step": 2035 }, { "epoch": 0.26, "grad_norm": 0.6625731532622935, "learning_rate": 8.682919228510363e-06, "loss": 0.5619, "step": 2036 }, { "epoch": 0.26, "grad_norm": 0.8317143721022817, "learning_rate": 8.681523404405563e-06, "loss": 0.5546, "step": 2037 }, { "epoch": 0.26, "grad_norm": 0.8095247090684597, "learning_rate": 8.68012695337568e-06, "loss": 0.6133, "step": 2038 }, { "epoch": 0.26, "grad_norm": 0.8171315384294896, "learning_rate": 8.678729875658515e-06, "loss": 0.6712, "step": 2039 }, { "epoch": 0.26, "grad_norm": 0.7197046499482802, "learning_rate": 8.67733217149198e-06, "loss": 0.5778, "step": 2040 }, { "epoch": 0.26, "grad_norm": 0.7384342032029247, "learning_rate": 8.675933841114084e-06, "loss": 0.527, "step": 2041 }, { "epoch": 0.26, "grad_norm": 0.7967624935348813, "learning_rate": 8.674534884762952e-06, "loss": 0.6029, "step": 2042 }, { "epoch": 0.26, "grad_norm": 0.832874454259647, "learning_rate": 8.67313530267681e-06, "loss": 0.5684, "step": 2043 }, { "epoch": 0.26, "grad_norm": 0.7255863271746238, "learning_rate": 8.671735095093991e-06, "loss": 0.5276, "step": 2044 }, { "epoch": 0.26, "grad_norm": 0.6575818971835546, "learning_rate": 8.670334262252937e-06, "loss": 0.5409, "step": 2045 }, { "epoch": 0.26, "grad_norm": 0.8841140027179337, "learning_rate": 8.668932804392198e-06, "loss": 0.6146, "step": 2046 }, { "epoch": 0.26, "grad_norm": 0.9824928661209361, "learning_rate": 8.667530721750423e-06, "loss": 0.6324, "step": 2047 }, { "epoch": 0.26, "grad_norm": 0.9411398889212877, "learning_rate": 8.666128014566375e-06, "loss": 0.6287, "step": 2048 }, { "epoch": 0.26, "grad_norm": 0.7561122228860881, "learning_rate": 8.664724683078918e-06, "loss": 0.5325, "step": 2049 }, { "epoch": 0.26, "grad_norm": 0.859191991494865, "learning_rate": 8.663320727527027e-06, "loss": 0.6567, "step": 2050 }, { "epoch": 0.26, "grad_norm": 0.728821202243004, "learning_rate": 8.661916148149782e-06, "loss": 0.57, "step": 2051 }, { "epoch": 0.26, "grad_norm": 0.6767639622734398, "learning_rate": 8.660510945186365e-06, "loss": 0.5133, "step": 2052 }, { "epoch": 0.26, "grad_norm": 0.9279525090801648, "learning_rate": 8.659105118876068e-06, "loss": 0.6907, "step": 2053 }, { "epoch": 0.26, "grad_norm": 1.2235500374261368, "learning_rate": 8.657698669458292e-06, "loss": 0.6988, "step": 2054 }, { "epoch": 0.26, "grad_norm": 0.902774933098236, "learning_rate": 8.656291597172537e-06, "loss": 0.6603, "step": 2055 }, { "epoch": 0.26, "grad_norm": 0.7326495475483635, "learning_rate": 8.654883902258414e-06, "loss": 0.5314, "step": 2056 }, { "epoch": 0.26, "grad_norm": 0.9244288847282367, "learning_rate": 8.653475584955638e-06, "loss": 0.6417, "step": 2057 }, { "epoch": 0.26, "grad_norm": 1.0393911334648622, "learning_rate": 8.652066645504033e-06, "loss": 0.7027, "step": 2058 }, { "epoch": 0.26, "grad_norm": 0.7509214752343563, "learning_rate": 8.650657084143525e-06, "loss": 0.5611, "step": 2059 }, { "epoch": 0.26, "grad_norm": 0.6756456299191473, "learning_rate": 8.649246901114145e-06, "loss": 0.5491, "step": 2060 }, { "epoch": 0.26, "grad_norm": 0.8305792834618921, "learning_rate": 8.64783609665604e-06, "loss": 0.603, "step": 2061 }, { "epoch": 0.26, "grad_norm": 0.7344523217111673, "learning_rate": 8.646424671009448e-06, "loss": 0.6001, "step": 2062 }, { "epoch": 0.26, "grad_norm": 0.5961418564054419, "learning_rate": 8.645012624414722e-06, "loss": 0.4929, "step": 2063 }, { "epoch": 0.26, "grad_norm": 0.8494504540561819, "learning_rate": 8.643599957112321e-06, "loss": 0.6588, "step": 2064 }, { "epoch": 0.26, "grad_norm": 0.6721975809201963, "learning_rate": 8.642186669342804e-06, "loss": 0.5578, "step": 2065 }, { "epoch": 0.26, "grad_norm": 1.0692361988277472, "learning_rate": 8.640772761346844e-06, "loss": 0.6489, "step": 2066 }, { "epoch": 0.26, "grad_norm": 0.6710279541943218, "learning_rate": 8.639358233365211e-06, "loss": 0.5862, "step": 2067 }, { "epoch": 0.26, "grad_norm": 0.740578187892062, "learning_rate": 8.637943085638785e-06, "loss": 0.6169, "step": 2068 }, { "epoch": 0.26, "grad_norm": 0.7688921713149206, "learning_rate": 8.636527318408552e-06, "loss": 0.6221, "step": 2069 }, { "epoch": 0.26, "grad_norm": 0.639425782952498, "learning_rate": 8.635110931915602e-06, "loss": 0.4929, "step": 2070 }, { "epoch": 0.26, "grad_norm": 0.7070039780932834, "learning_rate": 8.63369392640113e-06, "loss": 0.5593, "step": 2071 }, { "epoch": 0.26, "grad_norm": 0.7868259360319192, "learning_rate": 8.632276302106437e-06, "loss": 0.5261, "step": 2072 }, { "epoch": 0.26, "grad_norm": 0.6942834473668794, "learning_rate": 8.63085805927293e-06, "loss": 0.5338, "step": 2073 }, { "epoch": 0.26, "grad_norm": 0.7524201881278602, "learning_rate": 8.629439198142124e-06, "loss": 0.591, "step": 2074 }, { "epoch": 0.26, "grad_norm": 0.8374464490401248, "learning_rate": 8.628019718955633e-06, "loss": 0.566, "step": 2075 }, { "epoch": 0.26, "grad_norm": 0.6701416033371944, "learning_rate": 8.626599621955179e-06, "loss": 0.5474, "step": 2076 }, { "epoch": 0.26, "grad_norm": 0.7375947444405496, "learning_rate": 8.625178907382593e-06, "loss": 0.5714, "step": 2077 }, { "epoch": 0.26, "grad_norm": 0.8588690128028347, "learning_rate": 8.623757575479803e-06, "loss": 0.6954, "step": 2078 }, { "epoch": 0.26, "grad_norm": 0.7770740039038444, "learning_rate": 8.622335626488852e-06, "loss": 0.5761, "step": 2079 }, { "epoch": 0.26, "grad_norm": 0.7126757195708782, "learning_rate": 8.62091306065188e-06, "loss": 0.5358, "step": 2080 }, { "epoch": 0.27, "grad_norm": 0.6357528170915606, "learning_rate": 8.619489878211138e-06, "loss": 0.5538, "step": 2081 }, { "epoch": 0.27, "grad_norm": 0.773575886183575, "learning_rate": 8.618066079408976e-06, "loss": 0.5655, "step": 2082 }, { "epoch": 0.27, "grad_norm": 1.103543041083038, "learning_rate": 8.616641664487856e-06, "loss": 0.621, "step": 2083 }, { "epoch": 0.27, "grad_norm": 0.6786409522469372, "learning_rate": 8.615216633690336e-06, "loss": 0.5249, "step": 2084 }, { "epoch": 0.27, "grad_norm": 0.709366941217034, "learning_rate": 8.613790987259085e-06, "loss": 0.5657, "step": 2085 }, { "epoch": 0.27, "grad_norm": 0.9936592780503476, "learning_rate": 8.612364725436879e-06, "loss": 0.6814, "step": 2086 }, { "epoch": 0.27, "grad_norm": 0.8070679307666362, "learning_rate": 8.610937848466593e-06, "loss": 0.5997, "step": 2087 }, { "epoch": 0.27, "grad_norm": 0.7741105837673501, "learning_rate": 8.60951035659121e-06, "loss": 0.6954, "step": 2088 }, { "epoch": 0.27, "grad_norm": 0.8598441994349721, "learning_rate": 8.608082250053817e-06, "loss": 0.5753, "step": 2089 }, { "epoch": 0.27, "grad_norm": 0.6871093253471288, "learning_rate": 8.606653529097606e-06, "loss": 0.5978, "step": 2090 }, { "epoch": 0.27, "grad_norm": 0.8402978610523177, "learning_rate": 8.60522419396587e-06, "loss": 0.6622, "step": 2091 }, { "epoch": 0.27, "grad_norm": 0.8025997635649196, "learning_rate": 8.603794244902017e-06, "loss": 0.5746, "step": 2092 }, { "epoch": 0.27, "grad_norm": 0.8079648866775898, "learning_rate": 8.602363682149546e-06, "loss": 0.5526, "step": 2093 }, { "epoch": 0.27, "grad_norm": 0.7848806863731498, "learning_rate": 8.600932505952068e-06, "loss": 0.6087, "step": 2094 }, { "epoch": 0.27, "grad_norm": 0.9165143762221796, "learning_rate": 8.599500716553301e-06, "loss": 0.6672, "step": 2095 }, { "epoch": 0.27, "grad_norm": 0.7098452875125559, "learning_rate": 8.598068314197059e-06, "loss": 0.5533, "step": 2096 }, { "epoch": 0.27, "grad_norm": 0.7106573150898514, "learning_rate": 8.596635299127269e-06, "loss": 0.5544, "step": 2097 }, { "epoch": 0.27, "grad_norm": 0.6275054786560426, "learning_rate": 8.595201671587956e-06, "loss": 0.5001, "step": 2098 }, { "epoch": 0.27, "grad_norm": 0.9032591792337562, "learning_rate": 8.593767431823255e-06, "loss": 0.6973, "step": 2099 }, { "epoch": 0.27, "grad_norm": 0.9832893791400691, "learning_rate": 8.592332580077398e-06, "loss": 0.6853, "step": 2100 }, { "epoch": 0.27, "grad_norm": 0.7983767797769051, "learning_rate": 8.590897116594727e-06, "loss": 0.618, "step": 2101 }, { "epoch": 0.27, "grad_norm": 0.8611346675740268, "learning_rate": 8.589461041619687e-06, "loss": 0.5636, "step": 2102 }, { "epoch": 0.27, "grad_norm": 0.8881373293479193, "learning_rate": 8.588024355396827e-06, "loss": 0.6137, "step": 2103 }, { "epoch": 0.27, "grad_norm": 0.9065098589149884, "learning_rate": 8.586587058170799e-06, "loss": 0.634, "step": 2104 }, { "epoch": 0.27, "grad_norm": 0.9678113091015726, "learning_rate": 8.585149150186359e-06, "loss": 0.6795, "step": 2105 }, { "epoch": 0.27, "grad_norm": 0.6585278408268294, "learning_rate": 8.583710631688369e-06, "loss": 0.4703, "step": 2106 }, { "epoch": 0.27, "grad_norm": 0.666075991773359, "learning_rate": 8.582271502921794e-06, "loss": 0.5236, "step": 2107 }, { "epoch": 0.27, "grad_norm": 0.8308239244690797, "learning_rate": 8.5808317641317e-06, "loss": 0.696, "step": 2108 }, { "epoch": 0.27, "grad_norm": 0.6109566217595789, "learning_rate": 8.579391415563263e-06, "loss": 0.4739, "step": 2109 }, { "epoch": 0.27, "grad_norm": 0.6665019209388863, "learning_rate": 8.57795045746176e-06, "loss": 0.552, "step": 2110 }, { "epoch": 0.27, "grad_norm": 0.8794364432256047, "learning_rate": 8.576508890072565e-06, "loss": 0.6399, "step": 2111 }, { "epoch": 0.27, "grad_norm": 0.7373689901029139, "learning_rate": 8.575066713641168e-06, "loss": 0.5586, "step": 2112 }, { "epoch": 0.27, "grad_norm": 1.1756731041277932, "learning_rate": 8.573623928413152e-06, "loss": 0.6884, "step": 2113 }, { "epoch": 0.27, "grad_norm": 0.7926003558780015, "learning_rate": 8.572180534634214e-06, "loss": 0.5712, "step": 2114 }, { "epoch": 0.27, "grad_norm": 0.8698030506537611, "learning_rate": 8.570736532550142e-06, "loss": 0.5424, "step": 2115 }, { "epoch": 0.27, "grad_norm": 0.8811133398906865, "learning_rate": 8.569291922406841e-06, "loss": 0.688, "step": 2116 }, { "epoch": 0.27, "grad_norm": 0.8866401031040106, "learning_rate": 8.56784670445031e-06, "loss": 0.65, "step": 2117 }, { "epoch": 0.27, "grad_norm": 0.6260009011465755, "learning_rate": 8.566400878926653e-06, "loss": 0.5443, "step": 2118 }, { "epoch": 0.27, "grad_norm": 0.8172623392123525, "learning_rate": 8.564954446082083e-06, "loss": 0.5753, "step": 2119 }, { "epoch": 0.27, "grad_norm": 0.8622881918556228, "learning_rate": 8.56350740616291e-06, "loss": 0.625, "step": 2120 }, { "epoch": 0.27, "grad_norm": 0.9047210862684324, "learning_rate": 8.56205975941555e-06, "loss": 0.6794, "step": 2121 }, { "epoch": 0.27, "grad_norm": 0.7415236097481217, "learning_rate": 8.560611506086518e-06, "loss": 0.576, "step": 2122 }, { "epoch": 0.27, "grad_norm": 0.8138831101181325, "learning_rate": 8.559162646422445e-06, "loss": 0.6395, "step": 2123 }, { "epoch": 0.27, "grad_norm": 0.8191648361935001, "learning_rate": 8.557713180670052e-06, "loss": 0.5721, "step": 2124 }, { "epoch": 0.27, "grad_norm": 0.7113374776305473, "learning_rate": 8.556263109076168e-06, "loss": 0.6063, "step": 2125 }, { "epoch": 0.27, "grad_norm": 1.070745353392222, "learning_rate": 8.554812431887725e-06, "loss": 0.6671, "step": 2126 }, { "epoch": 0.27, "grad_norm": 0.9631473166536872, "learning_rate": 8.55336114935176e-06, "loss": 0.5968, "step": 2127 }, { "epoch": 0.27, "grad_norm": 0.8063882176780597, "learning_rate": 8.551909261715408e-06, "loss": 0.6017, "step": 2128 }, { "epoch": 0.27, "grad_norm": 1.3713162586413101, "learning_rate": 8.550456769225914e-06, "loss": 0.6019, "step": 2129 }, { "epoch": 0.27, "grad_norm": 0.895627671462431, "learning_rate": 8.549003672130619e-06, "loss": 0.7221, "step": 2130 }, { "epoch": 0.27, "grad_norm": 1.115825569332588, "learning_rate": 8.547549970676972e-06, "loss": 0.7126, "step": 2131 }, { "epoch": 0.27, "grad_norm": 0.6744296830114518, "learning_rate": 8.546095665112524e-06, "loss": 0.5879, "step": 2132 }, { "epoch": 0.27, "grad_norm": 1.0542291389317007, "learning_rate": 8.544640755684927e-06, "loss": 0.6625, "step": 2133 }, { "epoch": 0.27, "grad_norm": 0.7067083329411854, "learning_rate": 8.543185242641938e-06, "loss": 0.5762, "step": 2134 }, { "epoch": 0.27, "grad_norm": 0.8899550250092108, "learning_rate": 8.541729126231414e-06, "loss": 0.6177, "step": 2135 }, { "epoch": 0.27, "grad_norm": 0.9358455833625566, "learning_rate": 8.540272406701316e-06, "loss": 0.6917, "step": 2136 }, { "epoch": 0.27, "grad_norm": 0.8019144856821593, "learning_rate": 8.53881508429971e-06, "loss": 0.5475, "step": 2137 }, { "epoch": 0.27, "grad_norm": 0.7309143066553038, "learning_rate": 8.537357159274764e-06, "loss": 0.5758, "step": 2138 }, { "epoch": 0.27, "grad_norm": 0.8548437747811373, "learning_rate": 8.535898631874744e-06, "loss": 0.6729, "step": 2139 }, { "epoch": 0.27, "grad_norm": 0.7278671687332745, "learning_rate": 8.534439502348025e-06, "loss": 0.5547, "step": 2140 }, { "epoch": 0.27, "grad_norm": 0.8136868999320196, "learning_rate": 8.53297977094308e-06, "loss": 0.6425, "step": 2141 }, { "epoch": 0.27, "grad_norm": 0.7242295356994636, "learning_rate": 8.531519437908485e-06, "loss": 0.5474, "step": 2142 }, { "epoch": 0.27, "grad_norm": 0.6885692419793408, "learning_rate": 8.530058503492921e-06, "loss": 0.539, "step": 2143 }, { "epoch": 0.27, "grad_norm": 0.7968254199191971, "learning_rate": 8.528596967945172e-06, "loss": 0.6923, "step": 2144 }, { "epoch": 0.27, "grad_norm": 0.8958611999211049, "learning_rate": 8.527134831514116e-06, "loss": 0.6885, "step": 2145 }, { "epoch": 0.27, "grad_norm": 0.9463412415335809, "learning_rate": 8.525672094448746e-06, "loss": 0.5787, "step": 2146 }, { "epoch": 0.27, "grad_norm": 1.0415556795944747, "learning_rate": 8.524208756998148e-06, "loss": 0.5712, "step": 2147 }, { "epoch": 0.27, "grad_norm": 0.9288608356670949, "learning_rate": 8.522744819411515e-06, "loss": 0.6482, "step": 2148 }, { "epoch": 0.27, "grad_norm": 0.8879779258522612, "learning_rate": 8.52128028193814e-06, "loss": 0.6896, "step": 2149 }, { "epoch": 0.27, "grad_norm": 0.7540301814467247, "learning_rate": 8.519815144827415e-06, "loss": 0.5766, "step": 2150 }, { "epoch": 0.27, "grad_norm": 0.8111217497985278, "learning_rate": 8.51834940832884e-06, "loss": 0.612, "step": 2151 }, { "epoch": 0.27, "grad_norm": 0.7946179201424982, "learning_rate": 8.516883072692015e-06, "loss": 0.5573, "step": 2152 }, { "epoch": 0.27, "grad_norm": 1.0863149604567819, "learning_rate": 8.51541613816664e-06, "loss": 0.6631, "step": 2153 }, { "epoch": 0.27, "grad_norm": 1.006336809524803, "learning_rate": 8.513948605002523e-06, "loss": 0.6602, "step": 2154 }, { "epoch": 0.27, "grad_norm": 0.779702080676902, "learning_rate": 8.512480473449564e-06, "loss": 0.6007, "step": 2155 }, { "epoch": 0.27, "grad_norm": 0.7109553376450497, "learning_rate": 8.511011743757776e-06, "loss": 0.5873, "step": 2156 }, { "epoch": 0.27, "grad_norm": 0.9299938710168193, "learning_rate": 8.509542416177264e-06, "loss": 0.6237, "step": 2157 }, { "epoch": 0.27, "grad_norm": 0.7855393275143485, "learning_rate": 8.508072490958239e-06, "loss": 0.5287, "step": 2158 }, { "epoch": 0.28, "grad_norm": 0.7615302768685427, "learning_rate": 8.506601968351018e-06, "loss": 0.5529, "step": 2159 }, { "epoch": 0.28, "grad_norm": 1.2983792695241654, "learning_rate": 8.505130848606012e-06, "loss": 0.6771, "step": 2160 }, { "epoch": 0.28, "grad_norm": 1.0047322485772034, "learning_rate": 8.50365913197374e-06, "loss": 0.6538, "step": 2161 }, { "epoch": 0.28, "grad_norm": 0.814560739020013, "learning_rate": 8.502186818704818e-06, "loss": 0.641, "step": 2162 }, { "epoch": 0.28, "grad_norm": 0.8877892445975503, "learning_rate": 8.500713909049964e-06, "loss": 0.6746, "step": 2163 }, { "epoch": 0.28, "grad_norm": 1.1054896961186658, "learning_rate": 8.499240403260005e-06, "loss": 0.6849, "step": 2164 }, { "epoch": 0.28, "grad_norm": 0.757828476419521, "learning_rate": 8.49776630158586e-06, "loss": 0.573, "step": 2165 }, { "epoch": 0.28, "grad_norm": 0.8272076603238785, "learning_rate": 8.49629160427855e-06, "loss": 0.6607, "step": 2166 }, { "epoch": 0.28, "grad_norm": 1.0791597327946632, "learning_rate": 8.494816311589207e-06, "loss": 0.6825, "step": 2167 }, { "epoch": 0.28, "grad_norm": 0.7144652410692978, "learning_rate": 8.493340423769054e-06, "loss": 0.5706, "step": 2168 }, { "epoch": 0.28, "grad_norm": 0.9188422557919373, "learning_rate": 8.491863941069418e-06, "loss": 0.6679, "step": 2169 }, { "epoch": 0.28, "grad_norm": 0.7159994207143344, "learning_rate": 8.490386863741734e-06, "loss": 0.5178, "step": 2170 }, { "epoch": 0.28, "grad_norm": 0.858370155632041, "learning_rate": 8.488909192037528e-06, "loss": 0.526, "step": 2171 }, { "epoch": 0.28, "grad_norm": 0.9027915665559302, "learning_rate": 8.487430926208434e-06, "loss": 0.6456, "step": 2172 }, { "epoch": 0.28, "grad_norm": 0.6498804864319441, "learning_rate": 8.485952066506184e-06, "loss": 0.5328, "step": 2173 }, { "epoch": 0.28, "grad_norm": 0.8150303077014934, "learning_rate": 8.484472613182616e-06, "loss": 0.6359, "step": 2174 }, { "epoch": 0.28, "grad_norm": 0.830216916423939, "learning_rate": 8.482992566489662e-06, "loss": 0.632, "step": 2175 }, { "epoch": 0.28, "grad_norm": 0.6720845793888305, "learning_rate": 8.481511926679359e-06, "loss": 0.5683, "step": 2176 }, { "epoch": 0.28, "grad_norm": 0.8320690779942326, "learning_rate": 8.480030694003845e-06, "loss": 0.6374, "step": 2177 }, { "epoch": 0.28, "grad_norm": 0.7880540618495274, "learning_rate": 8.478548868715357e-06, "loss": 0.6154, "step": 2178 }, { "epoch": 0.28, "grad_norm": 0.8951410434168142, "learning_rate": 8.47706645106624e-06, "loss": 0.7151, "step": 2179 }, { "epoch": 0.28, "grad_norm": 0.8925758755852837, "learning_rate": 8.475583441308928e-06, "loss": 0.6223, "step": 2180 }, { "epoch": 0.28, "grad_norm": 0.6324310257727276, "learning_rate": 8.474099839695964e-06, "loss": 0.5392, "step": 2181 }, { "epoch": 0.28, "grad_norm": 0.7507939165391605, "learning_rate": 8.47261564647999e-06, "loss": 0.5969, "step": 2182 }, { "epoch": 0.28, "grad_norm": 0.787797717060103, "learning_rate": 8.471130861913749e-06, "loss": 0.5623, "step": 2183 }, { "epoch": 0.28, "grad_norm": 0.8845384114704159, "learning_rate": 8.469645486250084e-06, "loss": 0.6375, "step": 2184 }, { "epoch": 0.28, "grad_norm": 0.7383754094764808, "learning_rate": 8.468159519741938e-06, "loss": 0.5635, "step": 2185 }, { "epoch": 0.28, "grad_norm": 0.8098315815047168, "learning_rate": 8.46667296264236e-06, "loss": 0.6853, "step": 2186 }, { "epoch": 0.28, "grad_norm": 0.9490789806280994, "learning_rate": 8.46518581520449e-06, "loss": 0.6579, "step": 2187 }, { "epoch": 0.28, "grad_norm": 0.7422561054979356, "learning_rate": 8.463698077681577e-06, "loss": 0.5551, "step": 2188 }, { "epoch": 0.28, "grad_norm": 0.7858395228715994, "learning_rate": 8.462209750326963e-06, "loss": 0.6412, "step": 2189 }, { "epoch": 0.28, "grad_norm": 0.9364773322415074, "learning_rate": 8.4607208333941e-06, "loss": 0.6497, "step": 2190 }, { "epoch": 0.28, "grad_norm": 0.7143508810113824, "learning_rate": 8.459231327136532e-06, "loss": 0.5127, "step": 2191 }, { "epoch": 0.28, "grad_norm": 1.2191503626928628, "learning_rate": 8.457741231807907e-06, "loss": 0.7118, "step": 2192 }, { "epoch": 0.28, "grad_norm": 0.7736365530016299, "learning_rate": 8.456250547661973e-06, "loss": 0.608, "step": 2193 }, { "epoch": 0.28, "grad_norm": 0.8706011162297675, "learning_rate": 8.454759274952577e-06, "loss": 0.5982, "step": 2194 }, { "epoch": 0.28, "grad_norm": 0.7150820882839987, "learning_rate": 8.453267413933669e-06, "loss": 0.5272, "step": 2195 }, { "epoch": 0.28, "grad_norm": 0.6566668156139139, "learning_rate": 8.451774964859296e-06, "loss": 0.5797, "step": 2196 }, { "epoch": 0.28, "grad_norm": 0.7185485218865221, "learning_rate": 8.450281927983607e-06, "loss": 0.5722, "step": 2197 }, { "epoch": 0.28, "grad_norm": 0.7063778632695681, "learning_rate": 8.44878830356085e-06, "loss": 0.5434, "step": 2198 }, { "epoch": 0.28, "grad_norm": 0.8246634333512559, "learning_rate": 8.447294091845373e-06, "loss": 0.65, "step": 2199 }, { "epoch": 0.28, "grad_norm": 0.6656324357775885, "learning_rate": 8.445799293091628e-06, "loss": 0.5146, "step": 2200 }, { "epoch": 0.28, "grad_norm": 0.7478585890620928, "learning_rate": 8.44430390755416e-06, "loss": 0.532, "step": 2201 }, { "epoch": 0.28, "grad_norm": 0.7583049251737343, "learning_rate": 8.44280793548762e-06, "loss": 0.5526, "step": 2202 }, { "epoch": 0.28, "grad_norm": 0.6890454817297638, "learning_rate": 8.441311377146754e-06, "loss": 0.5538, "step": 2203 }, { "epoch": 0.28, "grad_norm": 0.7254490833623586, "learning_rate": 8.439814232786413e-06, "loss": 0.553, "step": 2204 }, { "epoch": 0.28, "grad_norm": 0.7564763081740711, "learning_rate": 8.438316502661544e-06, "loss": 0.5945, "step": 2205 }, { "epoch": 0.28, "grad_norm": 0.7738617156740418, "learning_rate": 8.436818187027193e-06, "loss": 0.5651, "step": 2206 }, { "epoch": 0.28, "grad_norm": 0.9692124796333613, "learning_rate": 8.435319286138511e-06, "loss": 0.6351, "step": 2207 }, { "epoch": 0.28, "grad_norm": 0.86735897283853, "learning_rate": 8.433819800250744e-06, "loss": 0.667, "step": 2208 }, { "epoch": 0.28, "grad_norm": 1.2284460624112894, "learning_rate": 8.432319729619237e-06, "loss": 0.6638, "step": 2209 }, { "epoch": 0.28, "grad_norm": 0.6791999343574601, "learning_rate": 8.430819074499437e-06, "loss": 0.5608, "step": 2210 }, { "epoch": 0.28, "grad_norm": 0.7325113456201207, "learning_rate": 8.429317835146894e-06, "loss": 0.5556, "step": 2211 }, { "epoch": 0.28, "grad_norm": 0.6633786406712496, "learning_rate": 8.427816011817248e-06, "loss": 0.606, "step": 2212 }, { "epoch": 0.28, "grad_norm": 0.777425623423544, "learning_rate": 8.426313604766244e-06, "loss": 0.6582, "step": 2213 }, { "epoch": 0.28, "grad_norm": 0.8435551586757459, "learning_rate": 8.42481061424973e-06, "loss": 0.6145, "step": 2214 }, { "epoch": 0.28, "grad_norm": 0.697428677223934, "learning_rate": 8.423307040523647e-06, "loss": 0.5103, "step": 2215 }, { "epoch": 0.28, "grad_norm": 0.7217355041361375, "learning_rate": 8.421802883844039e-06, "loss": 0.584, "step": 2216 }, { "epoch": 0.28, "grad_norm": 0.7151265216951087, "learning_rate": 8.42029814446705e-06, "loss": 0.5571, "step": 2217 }, { "epoch": 0.28, "grad_norm": 1.0484622953667317, "learning_rate": 8.418792822648916e-06, "loss": 0.6418, "step": 2218 }, { "epoch": 0.28, "grad_norm": 0.722841475289307, "learning_rate": 8.417286918645983e-06, "loss": 0.5698, "step": 2219 }, { "epoch": 0.28, "grad_norm": 0.8564957188519151, "learning_rate": 8.41578043271469e-06, "loss": 0.7363, "step": 2220 }, { "epoch": 0.28, "grad_norm": 0.9328001249275764, "learning_rate": 8.414273365111573e-06, "loss": 0.6782, "step": 2221 }, { "epoch": 0.28, "grad_norm": 0.7156457025596012, "learning_rate": 8.412765716093273e-06, "loss": 0.5452, "step": 2222 }, { "epoch": 0.28, "grad_norm": 0.8842615329605965, "learning_rate": 8.411257485916521e-06, "loss": 0.6599, "step": 2223 }, { "epoch": 0.28, "grad_norm": 0.7209672383047154, "learning_rate": 8.409748674838162e-06, "loss": 0.5264, "step": 2224 }, { "epoch": 0.28, "grad_norm": 1.2792306948369405, "learning_rate": 8.408239283115125e-06, "loss": 0.649, "step": 2225 }, { "epoch": 0.28, "grad_norm": 0.7170581910545252, "learning_rate": 8.406729311004445e-06, "loss": 0.5572, "step": 2226 }, { "epoch": 0.28, "grad_norm": 1.1869669307617312, "learning_rate": 8.405218758763254e-06, "loss": 0.5861, "step": 2227 }, { "epoch": 0.28, "grad_norm": 0.9366676348224308, "learning_rate": 8.403707626648782e-06, "loss": 0.6616, "step": 2228 }, { "epoch": 0.28, "grad_norm": 0.9093745631113076, "learning_rate": 8.402195914918364e-06, "loss": 0.6157, "step": 2229 }, { "epoch": 0.28, "grad_norm": 0.9514606703634301, "learning_rate": 8.400683623829424e-06, "loss": 0.7011, "step": 2230 }, { "epoch": 0.28, "grad_norm": 0.849618490958349, "learning_rate": 8.399170753639491e-06, "loss": 0.6317, "step": 2231 }, { "epoch": 0.28, "grad_norm": 0.8293474873591513, "learning_rate": 8.39765730460619e-06, "loss": 0.6104, "step": 2232 }, { "epoch": 0.28, "grad_norm": 0.7281890059835165, "learning_rate": 8.396143276987248e-06, "loss": 0.5497, "step": 2233 }, { "epoch": 0.28, "grad_norm": 0.6626105277162515, "learning_rate": 8.394628671040486e-06, "loss": 0.5255, "step": 2234 }, { "epoch": 0.28, "grad_norm": 0.7165942482190107, "learning_rate": 8.393113487023828e-06, "loss": 0.5581, "step": 2235 }, { "epoch": 0.28, "grad_norm": 0.7369761403757858, "learning_rate": 8.39159772519529e-06, "loss": 0.5464, "step": 2236 }, { "epoch": 0.28, "grad_norm": 0.8380325066830904, "learning_rate": 8.390081385812993e-06, "loss": 0.6802, "step": 2237 }, { "epoch": 0.29, "grad_norm": 0.7614642265833558, "learning_rate": 8.388564469135155e-06, "loss": 0.5304, "step": 2238 }, { "epoch": 0.29, "grad_norm": 0.811008603365971, "learning_rate": 8.38704697542009e-06, "loss": 0.6395, "step": 2239 }, { "epoch": 0.29, "grad_norm": 0.8465238608175868, "learning_rate": 8.385528904926209e-06, "loss": 0.5597, "step": 2240 }, { "epoch": 0.29, "grad_norm": 0.7472055324454421, "learning_rate": 8.384010257912026e-06, "loss": 0.5564, "step": 2241 }, { "epoch": 0.29, "grad_norm": 0.7080499019280277, "learning_rate": 8.382491034636151e-06, "loss": 0.5721, "step": 2242 }, { "epoch": 0.29, "grad_norm": 0.6533341600262631, "learning_rate": 8.380971235357292e-06, "loss": 0.5327, "step": 2243 }, { "epoch": 0.29, "grad_norm": 0.939988898220049, "learning_rate": 8.379450860334253e-06, "loss": 0.6453, "step": 2244 }, { "epoch": 0.29, "grad_norm": 0.80635138992417, "learning_rate": 8.377929909825942e-06, "loss": 0.5354, "step": 2245 }, { "epoch": 0.29, "grad_norm": 0.9563518196570222, "learning_rate": 8.376408384091354e-06, "loss": 0.6526, "step": 2246 }, { "epoch": 0.29, "grad_norm": 0.9389607629729652, "learning_rate": 8.374886283389597e-06, "loss": 0.6261, "step": 2247 }, { "epoch": 0.29, "grad_norm": 0.8393685822603515, "learning_rate": 8.373363607979865e-06, "loss": 0.5596, "step": 2248 }, { "epoch": 0.29, "grad_norm": 0.7701859541160611, "learning_rate": 8.371840358121452e-06, "loss": 0.5583, "step": 2249 }, { "epoch": 0.29, "grad_norm": 0.6863315195615804, "learning_rate": 8.370316534073754e-06, "loss": 0.5918, "step": 2250 }, { "epoch": 0.29, "grad_norm": 0.7404503845856089, "learning_rate": 8.368792136096265e-06, "loss": 0.5826, "step": 2251 }, { "epoch": 0.29, "grad_norm": 1.1365211872584071, "learning_rate": 8.367267164448569e-06, "loss": 0.6445, "step": 2252 }, { "epoch": 0.29, "grad_norm": 0.6686324976583061, "learning_rate": 8.365741619390353e-06, "loss": 0.573, "step": 2253 }, { "epoch": 0.29, "grad_norm": 0.6669577025316342, "learning_rate": 8.364215501181405e-06, "loss": 0.56, "step": 2254 }, { "epoch": 0.29, "grad_norm": 0.7743203676057054, "learning_rate": 8.362688810081605e-06, "loss": 0.5571, "step": 2255 }, { "epoch": 0.29, "grad_norm": 0.6463048592606979, "learning_rate": 8.361161546350934e-06, "loss": 0.4956, "step": 2256 }, { "epoch": 0.29, "grad_norm": 0.6923095161337977, "learning_rate": 8.359633710249467e-06, "loss": 0.5083, "step": 2257 }, { "epoch": 0.29, "grad_norm": 0.70794842662976, "learning_rate": 8.35810530203738e-06, "loss": 0.6492, "step": 2258 }, { "epoch": 0.29, "grad_norm": 0.9378970275223311, "learning_rate": 8.356576321974943e-06, "loss": 0.5743, "step": 2259 }, { "epoch": 0.29, "grad_norm": 0.9333887998052925, "learning_rate": 8.355046770322528e-06, "loss": 0.6174, "step": 2260 }, { "epoch": 0.29, "grad_norm": 0.944990552343333, "learning_rate": 8.353516647340601e-06, "loss": 0.6071, "step": 2261 }, { "epoch": 0.29, "grad_norm": 0.7379723372631333, "learning_rate": 8.351985953289725e-06, "loss": 0.5888, "step": 2262 }, { "epoch": 0.29, "grad_norm": 1.0192604728391195, "learning_rate": 8.350454688430563e-06, "loss": 0.6379, "step": 2263 }, { "epoch": 0.29, "grad_norm": 0.8257305988462342, "learning_rate": 8.348922853023872e-06, "loss": 0.6037, "step": 2264 }, { "epoch": 0.29, "grad_norm": 0.650025901664581, "learning_rate": 8.347390447330507e-06, "loss": 0.5053, "step": 2265 }, { "epoch": 0.29, "grad_norm": 0.8250901334042201, "learning_rate": 8.345857471611423e-06, "loss": 0.5287, "step": 2266 }, { "epoch": 0.29, "grad_norm": 0.7989820808565055, "learning_rate": 8.344323926127666e-06, "loss": 0.5452, "step": 2267 }, { "epoch": 0.29, "grad_norm": 0.8819463352133603, "learning_rate": 8.342789811140388e-06, "loss": 0.5485, "step": 2268 }, { "epoch": 0.29, "grad_norm": 0.7577632775211289, "learning_rate": 8.341255126910827e-06, "loss": 0.5688, "step": 2269 }, { "epoch": 0.29, "grad_norm": 0.9011162908581771, "learning_rate": 8.33971987370033e-06, "loss": 0.655, "step": 2270 }, { "epoch": 0.29, "grad_norm": 0.9716433692526733, "learning_rate": 8.33818405177033e-06, "loss": 0.6465, "step": 2271 }, { "epoch": 0.29, "grad_norm": 0.8084445685435369, "learning_rate": 8.336647661382364e-06, "loss": 0.6368, "step": 2272 }, { "epoch": 0.29, "grad_norm": 0.7236691898606522, "learning_rate": 8.335110702798061e-06, "loss": 0.5251, "step": 2273 }, { "epoch": 0.29, "grad_norm": 1.1478440821584148, "learning_rate": 8.333573176279151e-06, "loss": 0.5994, "step": 2274 }, { "epoch": 0.29, "grad_norm": 0.7554411600406318, "learning_rate": 8.332035082087457e-06, "loss": 0.5746, "step": 2275 }, { "epoch": 0.29, "grad_norm": 0.9094963471762483, "learning_rate": 8.330496420484904e-06, "loss": 0.6002, "step": 2276 }, { "epoch": 0.29, "grad_norm": 0.8301530304303779, "learning_rate": 8.328957191733504e-06, "loss": 0.6467, "step": 2277 }, { "epoch": 0.29, "grad_norm": 0.8254058430689047, "learning_rate": 8.327417396095377e-06, "loss": 0.647, "step": 2278 }, { "epoch": 0.29, "grad_norm": 0.8778719242595915, "learning_rate": 8.325877033832732e-06, "loss": 0.6989, "step": 2279 }, { "epoch": 0.29, "grad_norm": 1.0124746902448545, "learning_rate": 8.324336105207878e-06, "loss": 0.6239, "step": 2280 }, { "epoch": 0.29, "grad_norm": 0.8055130580747742, "learning_rate": 8.322794610483216e-06, "loss": 0.6248, "step": 2281 }, { "epoch": 0.29, "grad_norm": 1.1166269802304396, "learning_rate": 8.321252549921248e-06, "loss": 0.6788, "step": 2282 }, { "epoch": 0.29, "grad_norm": 0.7585865734303134, "learning_rate": 8.319709923784573e-06, "loss": 0.5829, "step": 2283 }, { "epoch": 0.29, "grad_norm": 0.8856231181748737, "learning_rate": 8.318166732335884e-06, "loss": 0.6253, "step": 2284 }, { "epoch": 0.29, "grad_norm": 0.8972869238426218, "learning_rate": 8.316622975837966e-06, "loss": 0.694, "step": 2285 }, { "epoch": 0.29, "grad_norm": 0.9705902192043009, "learning_rate": 8.315078654553708e-06, "loss": 0.6195, "step": 2286 }, { "epoch": 0.29, "grad_norm": 0.7017308734996244, "learning_rate": 8.313533768746091e-06, "loss": 0.5384, "step": 2287 }, { "epoch": 0.29, "grad_norm": 1.0124319702531217, "learning_rate": 8.311988318678194e-06, "loss": 0.6089, "step": 2288 }, { "epoch": 0.29, "grad_norm": 0.7586416027830107, "learning_rate": 8.310442304613191e-06, "loss": 0.5994, "step": 2289 }, { "epoch": 0.29, "grad_norm": 0.8293707163254753, "learning_rate": 8.308895726814351e-06, "loss": 0.6004, "step": 2290 }, { "epoch": 0.29, "grad_norm": 0.7028920180702602, "learning_rate": 8.30734858554504e-06, "loss": 0.5458, "step": 2291 }, { "epoch": 0.29, "grad_norm": 0.7471238512523348, "learning_rate": 8.305800881068723e-06, "loss": 0.6993, "step": 2292 }, { "epoch": 0.29, "grad_norm": 0.8885433476333644, "learning_rate": 8.304252613648951e-06, "loss": 0.6521, "step": 2293 }, { "epoch": 0.29, "grad_norm": 0.9279617129950012, "learning_rate": 8.302703783549386e-06, "loss": 0.7125, "step": 2294 }, { "epoch": 0.29, "grad_norm": 0.8296039701038359, "learning_rate": 8.301154391033772e-06, "loss": 0.6176, "step": 2295 }, { "epoch": 0.29, "grad_norm": 0.965010297378682, "learning_rate": 8.299604436365959e-06, "loss": 0.5779, "step": 2296 }, { "epoch": 0.29, "grad_norm": 0.693672460330611, "learning_rate": 8.298053919809883e-06, "loss": 0.5199, "step": 2297 }, { "epoch": 0.29, "grad_norm": 0.833706684496561, "learning_rate": 8.296502841629583e-06, "loss": 0.5847, "step": 2298 }, { "epoch": 0.29, "grad_norm": 0.8793859250764897, "learning_rate": 8.294951202089193e-06, "loss": 0.6586, "step": 2299 }, { "epoch": 0.29, "grad_norm": 0.754715350764544, "learning_rate": 8.293399001452941e-06, "loss": 0.5095, "step": 2300 }, { "epoch": 0.29, "grad_norm": 0.8577411749327027, "learning_rate": 8.291846239985149e-06, "loss": 0.5888, "step": 2301 }, { "epoch": 0.29, "grad_norm": 1.0214445655733677, "learning_rate": 8.290292917950236e-06, "loss": 0.7071, "step": 2302 }, { "epoch": 0.29, "grad_norm": 0.7875576995868492, "learning_rate": 8.288739035612717e-06, "loss": 0.6436, "step": 2303 }, { "epoch": 0.29, "grad_norm": 0.6641594132293381, "learning_rate": 8.287184593237201e-06, "loss": 0.5595, "step": 2304 }, { "epoch": 0.29, "grad_norm": 0.7055590548372608, "learning_rate": 8.285629591088395e-06, "loss": 0.5394, "step": 2305 }, { "epoch": 0.29, "grad_norm": 0.8464940831198519, "learning_rate": 8.2840740294311e-06, "loss": 0.5961, "step": 2306 }, { "epoch": 0.29, "grad_norm": 0.642164613007127, "learning_rate": 8.28251790853021e-06, "loss": 0.4882, "step": 2307 }, { "epoch": 0.29, "grad_norm": 0.788710077963095, "learning_rate": 8.280961228650717e-06, "loss": 0.5749, "step": 2308 }, { "epoch": 0.29, "grad_norm": 0.6686451690887508, "learning_rate": 8.279403990057709e-06, "loss": 0.5973, "step": 2309 }, { "epoch": 0.29, "grad_norm": 0.8393608167533704, "learning_rate": 8.277846193016365e-06, "loss": 0.617, "step": 2310 }, { "epoch": 0.29, "grad_norm": 0.8723931358823335, "learning_rate": 8.276287837791964e-06, "loss": 0.5834, "step": 2311 }, { "epoch": 0.29, "grad_norm": 0.7358226441137731, "learning_rate": 8.274728924649875e-06, "loss": 0.5282, "step": 2312 }, { "epoch": 0.29, "grad_norm": 0.7070045408010385, "learning_rate": 8.273169453855567e-06, "loss": 0.5765, "step": 2313 }, { "epoch": 0.29, "grad_norm": 0.6809552001776816, "learning_rate": 8.271609425674599e-06, "loss": 0.5126, "step": 2314 }, { "epoch": 0.29, "grad_norm": 0.673713676762806, "learning_rate": 8.27004884037263e-06, "loss": 0.5998, "step": 2315 }, { "epoch": 0.3, "grad_norm": 0.7309875676978831, "learning_rate": 8.268487698215411e-06, "loss": 0.5706, "step": 2316 }, { "epoch": 0.3, "grad_norm": 0.7287597480529917, "learning_rate": 8.266925999468788e-06, "loss": 0.5583, "step": 2317 }, { "epoch": 0.3, "grad_norm": 0.6670359682652354, "learning_rate": 8.265363744398701e-06, "loss": 0.5383, "step": 2318 }, { "epoch": 0.3, "grad_norm": 0.6799576113854798, "learning_rate": 8.263800933271186e-06, "loss": 0.51, "step": 2319 }, { "epoch": 0.3, "grad_norm": 0.662289019802724, "learning_rate": 8.262237566352376e-06, "loss": 0.5019, "step": 2320 }, { "epoch": 0.3, "grad_norm": 0.8462811630433921, "learning_rate": 8.260673643908495e-06, "loss": 0.6381, "step": 2321 }, { "epoch": 0.3, "grad_norm": 0.6189393677821143, "learning_rate": 8.25910916620586e-06, "loss": 0.5138, "step": 2322 }, { "epoch": 0.3, "grad_norm": 0.7925747174293012, "learning_rate": 8.257544133510887e-06, "loss": 0.5927, "step": 2323 }, { "epoch": 0.3, "grad_norm": 1.0187331849280385, "learning_rate": 8.255978546090085e-06, "loss": 0.6674, "step": 2324 }, { "epoch": 0.3, "grad_norm": 0.7066273633781068, "learning_rate": 8.254412404210056e-06, "loss": 0.5002, "step": 2325 }, { "epoch": 0.3, "grad_norm": 0.6916764410112194, "learning_rate": 8.252845708137502e-06, "loss": 0.5348, "step": 2326 }, { "epoch": 0.3, "grad_norm": 0.8730779608962944, "learning_rate": 8.25127845813921e-06, "loss": 0.6412, "step": 2327 }, { "epoch": 0.3, "grad_norm": 0.8922911818776277, "learning_rate": 8.249710654482067e-06, "loss": 0.6298, "step": 2328 }, { "epoch": 0.3, "grad_norm": 1.2167202797266854, "learning_rate": 8.248142297433058e-06, "loss": 0.6156, "step": 2329 }, { "epoch": 0.3, "grad_norm": 0.764427728420986, "learning_rate": 8.246573387259253e-06, "loss": 0.581, "step": 2330 }, { "epoch": 0.3, "grad_norm": 0.795355208072967, "learning_rate": 8.245003924227821e-06, "loss": 0.593, "step": 2331 }, { "epoch": 0.3, "grad_norm": 0.6713290959598207, "learning_rate": 8.243433908606029e-06, "loss": 0.5503, "step": 2332 }, { "epoch": 0.3, "grad_norm": 0.7684664533688154, "learning_rate": 8.24186334066123e-06, "loss": 0.6085, "step": 2333 }, { "epoch": 0.3, "grad_norm": 0.6375726576552677, "learning_rate": 8.240292220660881e-06, "loss": 0.5144, "step": 2334 }, { "epoch": 0.3, "grad_norm": 0.9105157625967314, "learning_rate": 8.238720548872523e-06, "loss": 0.6389, "step": 2335 }, { "epoch": 0.3, "grad_norm": 0.8387875591422956, "learning_rate": 8.237148325563795e-06, "loss": 0.6228, "step": 2336 }, { "epoch": 0.3, "grad_norm": 0.6983564368234124, "learning_rate": 8.23557555100243e-06, "loss": 0.56, "step": 2337 }, { "epoch": 0.3, "grad_norm": 0.7232134892748823, "learning_rate": 8.234002225456259e-06, "loss": 0.6053, "step": 2338 }, { "epoch": 0.3, "grad_norm": 0.7311475337497146, "learning_rate": 8.232428349193201e-06, "loss": 0.5067, "step": 2339 }, { "epoch": 0.3, "grad_norm": 0.8301871923589819, "learning_rate": 8.23085392248127e-06, "loss": 0.6399, "step": 2340 }, { "epoch": 0.3, "grad_norm": 1.1605622126682777, "learning_rate": 8.229278945588574e-06, "loss": 0.6874, "step": 2341 }, { "epoch": 0.3, "grad_norm": 0.7515228924479411, "learning_rate": 8.227703418783315e-06, "loss": 0.5694, "step": 2342 }, { "epoch": 0.3, "grad_norm": 0.7367554868434087, "learning_rate": 8.226127342333791e-06, "loss": 0.5751, "step": 2343 }, { "epoch": 0.3, "grad_norm": 1.1195425747080676, "learning_rate": 8.224550716508391e-06, "loss": 0.7148, "step": 2344 }, { "epoch": 0.3, "grad_norm": 0.9707320690293301, "learning_rate": 8.222973541575596e-06, "loss": 0.6403, "step": 2345 }, { "epoch": 0.3, "grad_norm": 0.7795332197889939, "learning_rate": 8.221395817803983e-06, "loss": 0.5996, "step": 2346 }, { "epoch": 0.3, "grad_norm": 0.799105812375165, "learning_rate": 8.219817545462223e-06, "loss": 0.6068, "step": 2347 }, { "epoch": 0.3, "grad_norm": 1.0960130367747722, "learning_rate": 8.218238724819079e-06, "loss": 0.6576, "step": 2348 }, { "epoch": 0.3, "grad_norm": 0.758546164841495, "learning_rate": 8.216659356143409e-06, "loss": 0.6209, "step": 2349 }, { "epoch": 0.3, "grad_norm": 0.9558347850765019, "learning_rate": 8.215079439704159e-06, "loss": 0.6516, "step": 2350 }, { "epoch": 0.3, "grad_norm": 0.8539978001923682, "learning_rate": 8.213498975770376e-06, "loss": 0.5873, "step": 2351 }, { "epoch": 0.3, "grad_norm": 0.8760203002805484, "learning_rate": 8.211917964611197e-06, "loss": 0.7309, "step": 2352 }, { "epoch": 0.3, "grad_norm": 0.9270815012110396, "learning_rate": 8.210336406495846e-06, "loss": 0.5724, "step": 2353 }, { "epoch": 0.3, "grad_norm": 0.7070308310026626, "learning_rate": 8.208754301693653e-06, "loss": 0.5629, "step": 2354 }, { "epoch": 0.3, "grad_norm": 0.912194533535536, "learning_rate": 8.207171650474031e-06, "loss": 0.6649, "step": 2355 }, { "epoch": 0.3, "grad_norm": 0.8121180500589926, "learning_rate": 8.20558845310649e-06, "loss": 0.559, "step": 2356 }, { "epoch": 0.3, "grad_norm": 0.9841246901195542, "learning_rate": 8.20400470986063e-06, "loss": 0.6554, "step": 2357 }, { "epoch": 0.3, "grad_norm": 0.9320436045071452, "learning_rate": 8.202420421006145e-06, "loss": 0.6312, "step": 2358 }, { "epoch": 0.3, "grad_norm": 0.6420075735054956, "learning_rate": 8.200835586812827e-06, "loss": 0.5281, "step": 2359 }, { "epoch": 0.3, "grad_norm": 0.882644943412692, "learning_rate": 8.199250207550553e-06, "loss": 0.6597, "step": 2360 }, { "epoch": 0.3, "grad_norm": 0.8829214326658736, "learning_rate": 8.1976642834893e-06, "loss": 0.6419, "step": 2361 }, { "epoch": 0.3, "grad_norm": 0.7359545705589885, "learning_rate": 8.196077814899133e-06, "loss": 0.6393, "step": 2362 }, { "epoch": 0.3, "grad_norm": 0.8197324154581176, "learning_rate": 8.194490802050207e-06, "loss": 0.7147, "step": 2363 }, { "epoch": 0.3, "grad_norm": 0.7461906058265823, "learning_rate": 8.192903245212783e-06, "loss": 0.5423, "step": 2364 }, { "epoch": 0.3, "grad_norm": 0.7084348334767367, "learning_rate": 8.191315144657195e-06, "loss": 0.5623, "step": 2365 }, { "epoch": 0.3, "grad_norm": 0.7559982367656779, "learning_rate": 8.189726500653885e-06, "loss": 0.6456, "step": 2366 }, { "epoch": 0.3, "grad_norm": 0.8946669145049603, "learning_rate": 8.188137313473385e-06, "loss": 0.7122, "step": 2367 }, { "epoch": 0.3, "grad_norm": 1.1765562511966037, "learning_rate": 8.186547583386313e-06, "loss": 0.6626, "step": 2368 }, { "epoch": 0.3, "grad_norm": 0.7738727324336859, "learning_rate": 8.184957310663383e-06, "loss": 0.6152, "step": 2369 }, { "epoch": 0.3, "grad_norm": 0.93837278706296, "learning_rate": 8.183366495575407e-06, "loss": 0.6255, "step": 2370 }, { "epoch": 0.3, "grad_norm": 0.7344891325672279, "learning_rate": 8.18177513839328e-06, "loss": 0.6352, "step": 2371 }, { "epoch": 0.3, "grad_norm": 0.8200370488893685, "learning_rate": 8.180183239387993e-06, "loss": 0.6523, "step": 2372 }, { "epoch": 0.3, "grad_norm": 0.8367676183361057, "learning_rate": 8.178590798830632e-06, "loss": 0.6063, "step": 2373 }, { "epoch": 0.3, "grad_norm": 0.679871202151405, "learning_rate": 8.176997816992371e-06, "loss": 0.594, "step": 2374 }, { "epoch": 0.3, "grad_norm": 0.8633656632553506, "learning_rate": 8.175404294144482e-06, "loss": 0.6361, "step": 2375 }, { "epoch": 0.3, "grad_norm": 0.7516707451458304, "learning_rate": 8.173810230558321e-06, "loss": 0.5853, "step": 2376 }, { "epoch": 0.3, "grad_norm": 0.6695076153640627, "learning_rate": 8.172215626505342e-06, "loss": 0.5495, "step": 2377 }, { "epoch": 0.3, "grad_norm": 0.7809538479366334, "learning_rate": 8.170620482257091e-06, "loss": 0.5839, "step": 2378 }, { "epoch": 0.3, "grad_norm": 0.6874530502127046, "learning_rate": 8.169024798085203e-06, "loss": 0.5773, "step": 2379 }, { "epoch": 0.3, "grad_norm": 0.7855193159842875, "learning_rate": 8.167428574261405e-06, "loss": 0.6299, "step": 2380 }, { "epoch": 0.3, "grad_norm": 0.8657778906017942, "learning_rate": 8.16583181105752e-06, "loss": 0.6432, "step": 2381 }, { "epoch": 0.3, "grad_norm": 0.6966905917563424, "learning_rate": 8.16423450874546e-06, "loss": 0.5877, "step": 2382 }, { "epoch": 0.3, "grad_norm": 0.6483608972281321, "learning_rate": 8.162636667597227e-06, "loss": 0.5026, "step": 2383 }, { "epoch": 0.3, "grad_norm": 0.8628910982855723, "learning_rate": 8.161038287884917e-06, "loss": 0.6635, "step": 2384 }, { "epoch": 0.3, "grad_norm": 0.784810443655044, "learning_rate": 8.15943936988072e-06, "loss": 0.5554, "step": 2385 }, { "epoch": 0.3, "grad_norm": 0.8347137420278369, "learning_rate": 8.15783991385691e-06, "loss": 0.6538, "step": 2386 }, { "epoch": 0.3, "grad_norm": 0.7984138701956977, "learning_rate": 8.156239920085863e-06, "loss": 0.5875, "step": 2387 }, { "epoch": 0.3, "grad_norm": 0.7000855657456476, "learning_rate": 8.154639388840037e-06, "loss": 0.5287, "step": 2388 }, { "epoch": 0.3, "grad_norm": 0.9287400096514483, "learning_rate": 8.15303832039199e-06, "loss": 0.6628, "step": 2389 }, { "epoch": 0.3, "grad_norm": 0.7305778435529804, "learning_rate": 8.151436715014365e-06, "loss": 0.5841, "step": 2390 }, { "epoch": 0.3, "grad_norm": 0.87603672255926, "learning_rate": 8.149834572979898e-06, "loss": 0.5822, "step": 2391 }, { "epoch": 0.3, "grad_norm": 0.7445847748529795, "learning_rate": 8.148231894561419e-06, "loss": 0.5502, "step": 2392 }, { "epoch": 0.3, "grad_norm": 0.709503236081038, "learning_rate": 8.146628680031846e-06, "loss": 0.5739, "step": 2393 }, { "epoch": 0.3, "grad_norm": 0.691261548060571, "learning_rate": 8.145024929664189e-06, "loss": 0.5187, "step": 2394 }, { "epoch": 0.31, "grad_norm": 0.6685156980087661, "learning_rate": 8.143420643731552e-06, "loss": 0.6101, "step": 2395 }, { "epoch": 0.31, "grad_norm": 0.7061501973708891, "learning_rate": 8.141815822507127e-06, "loss": 0.5506, "step": 2396 }, { "epoch": 0.31, "grad_norm": 0.797025716621591, "learning_rate": 8.1402104662642e-06, "loss": 0.6619, "step": 2397 }, { "epoch": 0.31, "grad_norm": 0.8455375934747812, "learning_rate": 8.138604575276143e-06, "loss": 0.695, "step": 2398 }, { "epoch": 0.31, "grad_norm": 0.7057355020350179, "learning_rate": 8.136998149816426e-06, "loss": 0.5822, "step": 2399 }, { "epoch": 0.31, "grad_norm": 0.6883007978932285, "learning_rate": 8.135391190158604e-06, "loss": 0.5306, "step": 2400 }, { "epoch": 0.31, "grad_norm": 1.1434335361647263, "learning_rate": 8.133783696576328e-06, "loss": 0.7271, "step": 2401 }, { "epoch": 0.31, "grad_norm": 0.9714201781879716, "learning_rate": 8.132175669343334e-06, "loss": 0.7127, "step": 2402 }, { "epoch": 0.31, "grad_norm": 0.6714920337965887, "learning_rate": 8.130567108733456e-06, "loss": 0.5244, "step": 2403 }, { "epoch": 0.31, "grad_norm": 0.7969258703829071, "learning_rate": 8.128958015020615e-06, "loss": 0.5628, "step": 2404 }, { "epoch": 0.31, "grad_norm": 0.6185889595083937, "learning_rate": 8.127348388478817e-06, "loss": 0.4908, "step": 2405 }, { "epoch": 0.31, "grad_norm": 0.6214940054709104, "learning_rate": 8.125738229382172e-06, "loss": 0.5299, "step": 2406 }, { "epoch": 0.31, "grad_norm": 0.6481716959167375, "learning_rate": 8.124127538004868e-06, "loss": 0.5694, "step": 2407 }, { "epoch": 0.31, "grad_norm": 0.8864681221105619, "learning_rate": 8.122516314621194e-06, "loss": 0.6452, "step": 2408 }, { "epoch": 0.31, "grad_norm": 0.6381478772664615, "learning_rate": 8.120904559505521e-06, "loss": 0.5594, "step": 2409 }, { "epoch": 0.31, "grad_norm": 0.6471198915573602, "learning_rate": 8.119292272932313e-06, "loss": 0.5577, "step": 2410 }, { "epoch": 0.31, "grad_norm": 0.9268059780946492, "learning_rate": 8.117679455176129e-06, "loss": 0.5967, "step": 2411 }, { "epoch": 0.31, "grad_norm": 0.671799763037132, "learning_rate": 8.116066106511614e-06, "loss": 0.5659, "step": 2412 }, { "epoch": 0.31, "grad_norm": 0.8725333658708626, "learning_rate": 8.114452227213504e-06, "loss": 0.6023, "step": 2413 }, { "epoch": 0.31, "grad_norm": 0.7179522017545644, "learning_rate": 8.112837817556622e-06, "loss": 0.5396, "step": 2414 }, { "epoch": 0.31, "grad_norm": 0.956163501847583, "learning_rate": 8.111222877815893e-06, "loss": 0.6173, "step": 2415 }, { "epoch": 0.31, "grad_norm": 0.6409828894773908, "learning_rate": 8.10960740826632e-06, "loss": 0.5101, "step": 2416 }, { "epoch": 0.31, "grad_norm": 0.73718109085478, "learning_rate": 8.107991409183e-06, "loss": 0.5397, "step": 2417 }, { "epoch": 0.31, "grad_norm": 0.7972367992417654, "learning_rate": 8.106374880841122e-06, "loss": 0.609, "step": 2418 }, { "epoch": 0.31, "grad_norm": 1.0962035078179866, "learning_rate": 8.104757823515963e-06, "loss": 0.6826, "step": 2419 }, { "epoch": 0.31, "grad_norm": 0.8999997635617961, "learning_rate": 8.103140237482893e-06, "loss": 0.607, "step": 2420 }, { "epoch": 0.31, "grad_norm": 0.7633029670947203, "learning_rate": 8.10152212301737e-06, "loss": 0.5786, "step": 2421 }, { "epoch": 0.31, "grad_norm": 0.8641991505547325, "learning_rate": 8.09990348039494e-06, "loss": 0.6185, "step": 2422 }, { "epoch": 0.31, "grad_norm": 0.6753976871296726, "learning_rate": 8.09828430989124e-06, "loss": 0.5593, "step": 2423 }, { "epoch": 0.31, "grad_norm": 1.3453134465895997, "learning_rate": 8.096664611782e-06, "loss": 0.6555, "step": 2424 }, { "epoch": 0.31, "grad_norm": 0.6283961223166129, "learning_rate": 8.09504438634304e-06, "loss": 0.5187, "step": 2425 }, { "epoch": 0.31, "grad_norm": 0.8093742672212454, "learning_rate": 8.093423633850264e-06, "loss": 0.6538, "step": 2426 }, { "epoch": 0.31, "grad_norm": 0.7708124981369415, "learning_rate": 8.09180235457967e-06, "loss": 0.5891, "step": 2427 }, { "epoch": 0.31, "grad_norm": 0.8635398702545797, "learning_rate": 8.090180548807345e-06, "loss": 0.6664, "step": 2428 }, { "epoch": 0.31, "grad_norm": 0.8315911275206549, "learning_rate": 8.088558216809466e-06, "loss": 0.6638, "step": 2429 }, { "epoch": 0.31, "grad_norm": 0.9211971888802181, "learning_rate": 8.086935358862298e-06, "loss": 0.7258, "step": 2430 }, { "epoch": 0.31, "grad_norm": 0.6950157196344704, "learning_rate": 8.085311975242197e-06, "loss": 0.5816, "step": 2431 }, { "epoch": 0.31, "grad_norm": 0.8014085377556148, "learning_rate": 8.083688066225612e-06, "loss": 0.6427, "step": 2432 }, { "epoch": 0.31, "grad_norm": 1.5068086702278343, "learning_rate": 8.082063632089074e-06, "loss": 0.6234, "step": 2433 }, { "epoch": 0.31, "grad_norm": 1.0546594702262562, "learning_rate": 8.080438673109208e-06, "loss": 0.5803, "step": 2434 }, { "epoch": 0.31, "grad_norm": 0.7247851083692188, "learning_rate": 8.078813189562726e-06, "loss": 0.5556, "step": 2435 }, { "epoch": 0.31, "grad_norm": 0.9504107091406003, "learning_rate": 8.077187181726435e-06, "loss": 0.6036, "step": 2436 }, { "epoch": 0.31, "grad_norm": 0.7090271250440942, "learning_rate": 8.075560649877225e-06, "loss": 0.5441, "step": 2437 }, { "epoch": 0.31, "grad_norm": 0.9825085742197898, "learning_rate": 8.073933594292076e-06, "loss": 0.6299, "step": 2438 }, { "epoch": 0.31, "grad_norm": 0.8955139156672639, "learning_rate": 8.07230601524806e-06, "loss": 0.6486, "step": 2439 }, { "epoch": 0.31, "grad_norm": 1.1949433903896163, "learning_rate": 8.070677913022337e-06, "loss": 0.675, "step": 2440 }, { "epoch": 0.31, "grad_norm": 0.7250620937795847, "learning_rate": 8.069049287892156e-06, "loss": 0.5646, "step": 2441 }, { "epoch": 0.31, "grad_norm": 0.6894576515771151, "learning_rate": 8.067420140134853e-06, "loss": 0.5368, "step": 2442 }, { "epoch": 0.31, "grad_norm": 0.9022798191370169, "learning_rate": 8.065790470027858e-06, "loss": 0.651, "step": 2443 }, { "epoch": 0.31, "grad_norm": 0.6533579034497073, "learning_rate": 8.064160277848683e-06, "loss": 0.5669, "step": 2444 }, { "epoch": 0.31, "grad_norm": 0.8932472826635292, "learning_rate": 8.062529563874936e-06, "loss": 0.6869, "step": 2445 }, { "epoch": 0.31, "grad_norm": 0.8594734332609194, "learning_rate": 8.06089832838431e-06, "loss": 0.6371, "step": 2446 }, { "epoch": 0.31, "grad_norm": 0.8717000748731434, "learning_rate": 8.059266571654587e-06, "loss": 0.5834, "step": 2447 }, { "epoch": 0.31, "grad_norm": 0.6808450695348428, "learning_rate": 8.057634293963638e-06, "loss": 0.5526, "step": 2448 }, { "epoch": 0.31, "grad_norm": 0.7191729396321469, "learning_rate": 8.056001495589423e-06, "loss": 0.55, "step": 2449 }, { "epoch": 0.31, "grad_norm": 0.757146767073832, "learning_rate": 8.054368176809989e-06, "loss": 0.5379, "step": 2450 }, { "epoch": 0.31, "grad_norm": 0.637281541931795, "learning_rate": 8.052734337903475e-06, "loss": 0.5413, "step": 2451 }, { "epoch": 0.31, "grad_norm": 0.9473006707620221, "learning_rate": 8.051099979148107e-06, "loss": 0.688, "step": 2452 }, { "epoch": 0.31, "grad_norm": 0.9958919657111568, "learning_rate": 8.049465100822199e-06, "loss": 0.6938, "step": 2453 }, { "epoch": 0.31, "grad_norm": 0.6531987531351056, "learning_rate": 8.047829703204153e-06, "loss": 0.5455, "step": 2454 }, { "epoch": 0.31, "grad_norm": 0.7334787860884254, "learning_rate": 8.04619378657246e-06, "loss": 0.6432, "step": 2455 }, { "epoch": 0.31, "grad_norm": 0.7826736634056337, "learning_rate": 8.044557351205701e-06, "loss": 0.5645, "step": 2456 }, { "epoch": 0.31, "grad_norm": 0.8053293519694975, "learning_rate": 8.042920397382545e-06, "loss": 0.5477, "step": 2457 }, { "epoch": 0.31, "grad_norm": 0.9279243409733561, "learning_rate": 8.041282925381744e-06, "loss": 0.6708, "step": 2458 }, { "epoch": 0.31, "grad_norm": 0.7008979599487158, "learning_rate": 8.039644935482146e-06, "loss": 0.5515, "step": 2459 }, { "epoch": 0.31, "grad_norm": 0.8258988150015644, "learning_rate": 8.03800642796268e-06, "loss": 0.5415, "step": 2460 }, { "epoch": 0.31, "grad_norm": 0.7278754952623744, "learning_rate": 8.03636740310237e-06, "loss": 0.5939, "step": 2461 }, { "epoch": 0.31, "grad_norm": 0.8589164665755079, "learning_rate": 8.034727861180325e-06, "loss": 0.6438, "step": 2462 }, { "epoch": 0.31, "grad_norm": 0.6572701827942246, "learning_rate": 8.033087802475738e-06, "loss": 0.5033, "step": 2463 }, { "epoch": 0.31, "grad_norm": 0.6961925136844825, "learning_rate": 8.031447227267899e-06, "loss": 0.5616, "step": 2464 }, { "epoch": 0.31, "grad_norm": 0.729922186668422, "learning_rate": 8.029806135836179e-06, "loss": 0.5667, "step": 2465 }, { "epoch": 0.31, "grad_norm": 0.6895181293323871, "learning_rate": 8.028164528460038e-06, "loss": 0.5536, "step": 2466 }, { "epoch": 0.31, "grad_norm": 0.8285958684695105, "learning_rate": 8.026522405419024e-06, "loss": 0.6401, "step": 2467 }, { "epoch": 0.31, "grad_norm": 0.8209750437508495, "learning_rate": 8.024879766992775e-06, "loss": 0.5852, "step": 2468 }, { "epoch": 0.31, "grad_norm": 0.8493622450233307, "learning_rate": 8.023236613461015e-06, "loss": 0.6564, "step": 2469 }, { "epoch": 0.31, "grad_norm": 0.6537449367996405, "learning_rate": 8.021592945103556e-06, "loss": 0.5613, "step": 2470 }, { "epoch": 0.31, "grad_norm": 0.696603681320776, "learning_rate": 8.019948762200296e-06, "loss": 0.6168, "step": 2471 }, { "epoch": 0.31, "grad_norm": 0.7477846669270151, "learning_rate": 8.018304065031224e-06, "loss": 0.5684, "step": 2472 }, { "epoch": 0.32, "grad_norm": 0.7825913723700246, "learning_rate": 8.016658853876414e-06, "loss": 0.6334, "step": 2473 }, { "epoch": 0.32, "grad_norm": 0.6292176457888397, "learning_rate": 8.015013129016028e-06, "loss": 0.5221, "step": 2474 }, { "epoch": 0.32, "grad_norm": 0.8695789854112458, "learning_rate": 8.013366890730317e-06, "loss": 0.6467, "step": 2475 }, { "epoch": 0.32, "grad_norm": 0.829989412277105, "learning_rate": 8.011720139299616e-06, "loss": 0.7213, "step": 2476 }, { "epoch": 0.32, "grad_norm": 0.9624281026563432, "learning_rate": 8.010072875004353e-06, "loss": 0.6433, "step": 2477 }, { "epoch": 0.32, "grad_norm": 0.7952852016003795, "learning_rate": 8.00842509812504e-06, "loss": 0.5972, "step": 2478 }, { "epoch": 0.32, "grad_norm": 0.6316703587172406, "learning_rate": 8.00677680894227e-06, "loss": 0.505, "step": 2479 }, { "epoch": 0.32, "grad_norm": 0.80696919028627, "learning_rate": 8.005128007736735e-06, "loss": 0.6106, "step": 2480 }, { "epoch": 0.32, "grad_norm": 0.8837760032650265, "learning_rate": 8.003478694789209e-06, "loss": 0.6366, "step": 2481 }, { "epoch": 0.32, "grad_norm": 0.8860760198477845, "learning_rate": 8.00182887038055e-06, "loss": 0.7186, "step": 2482 }, { "epoch": 0.32, "grad_norm": 0.7813272915667994, "learning_rate": 8.00017853479171e-06, "loss": 0.5827, "step": 2483 }, { "epoch": 0.32, "grad_norm": 0.9872855712179546, "learning_rate": 7.998527688303717e-06, "loss": 0.6395, "step": 2484 }, { "epoch": 0.32, "grad_norm": 0.7117307247343383, "learning_rate": 7.9968763311977e-06, "loss": 0.5503, "step": 2485 }, { "epoch": 0.32, "grad_norm": 0.7423196946569927, "learning_rate": 7.995224463754864e-06, "loss": 0.5885, "step": 2486 }, { "epoch": 0.32, "grad_norm": 0.8474004325991016, "learning_rate": 7.993572086256505e-06, "loss": 0.5756, "step": 2487 }, { "epoch": 0.32, "grad_norm": 0.7914307702218254, "learning_rate": 7.991919198984008e-06, "loss": 0.6561, "step": 2488 }, { "epoch": 0.32, "grad_norm": 0.6602603759540381, "learning_rate": 7.990265802218841e-06, "loss": 0.5606, "step": 2489 }, { "epoch": 0.32, "grad_norm": 0.7346298851190962, "learning_rate": 7.98861189624256e-06, "loss": 0.6043, "step": 2490 }, { "epoch": 0.32, "grad_norm": 0.9204659928277439, "learning_rate": 7.986957481336807e-06, "loss": 0.5506, "step": 2491 }, { "epoch": 0.32, "grad_norm": 0.8077814019873388, "learning_rate": 7.985302557783313e-06, "loss": 0.6049, "step": 2492 }, { "epoch": 0.32, "grad_norm": 0.9393025971159807, "learning_rate": 7.983647125863896e-06, "loss": 0.6196, "step": 2493 }, { "epoch": 0.32, "grad_norm": 0.7132024985184515, "learning_rate": 7.981991185860455e-06, "loss": 0.6178, "step": 2494 }, { "epoch": 0.32, "grad_norm": 0.6914206090420442, "learning_rate": 7.980334738054981e-06, "loss": 0.5919, "step": 2495 }, { "epoch": 0.32, "grad_norm": 0.79141234078157, "learning_rate": 7.97867778272955e-06, "loss": 0.5487, "step": 2496 }, { "epoch": 0.32, "grad_norm": 0.8503196578459087, "learning_rate": 7.977020320166326e-06, "loss": 0.6473, "step": 2497 }, { "epoch": 0.32, "grad_norm": 0.9325666661039893, "learning_rate": 7.975362350647555e-06, "loss": 0.6439, "step": 2498 }, { "epoch": 0.32, "grad_norm": 0.6462498372332367, "learning_rate": 7.973703874455572e-06, "loss": 0.567, "step": 2499 }, { "epoch": 0.32, "grad_norm": 0.7122219694881514, "learning_rate": 7.972044891872801e-06, "loss": 0.5, "step": 2500 }, { "epoch": 0.32, "grad_norm": 0.7732411768919837, "learning_rate": 7.970385403181747e-06, "loss": 0.5749, "step": 2501 }, { "epoch": 0.32, "grad_norm": 0.8991509340178808, "learning_rate": 7.968725408665002e-06, "loss": 0.5696, "step": 2502 }, { "epoch": 0.32, "grad_norm": 0.7761067203987014, "learning_rate": 7.967064908605248e-06, "loss": 0.5586, "step": 2503 }, { "epoch": 0.32, "grad_norm": 0.8454747433864903, "learning_rate": 7.965403903285254e-06, "loss": 0.584, "step": 2504 }, { "epoch": 0.32, "grad_norm": 0.6238616540530499, "learning_rate": 7.963742392987866e-06, "loss": 0.5403, "step": 2505 }, { "epoch": 0.32, "grad_norm": 0.8425217019040718, "learning_rate": 7.962080377996025e-06, "loss": 0.5984, "step": 2506 }, { "epoch": 0.32, "grad_norm": 0.8158533627725938, "learning_rate": 7.960417858592754e-06, "loss": 0.6689, "step": 2507 }, { "epoch": 0.32, "grad_norm": 0.9281872264549673, "learning_rate": 7.958754835061163e-06, "loss": 0.6559, "step": 2508 }, { "epoch": 0.32, "grad_norm": 0.8827825259015066, "learning_rate": 7.957091307684448e-06, "loss": 0.6167, "step": 2509 }, { "epoch": 0.32, "grad_norm": 0.8150032148072945, "learning_rate": 7.95542727674589e-06, "loss": 0.6545, "step": 2510 }, { "epoch": 0.32, "grad_norm": 0.6698875166365278, "learning_rate": 7.953762742528857e-06, "loss": 0.528, "step": 2511 }, { "epoch": 0.32, "grad_norm": 0.7163075212287794, "learning_rate": 7.952097705316801e-06, "loss": 0.6209, "step": 2512 }, { "epoch": 0.32, "grad_norm": 0.7621034156605405, "learning_rate": 7.95043216539326e-06, "loss": 0.5259, "step": 2513 }, { "epoch": 0.32, "grad_norm": 0.6826220118007611, "learning_rate": 7.948766123041857e-06, "loss": 0.5081, "step": 2514 }, { "epoch": 0.32, "grad_norm": 0.8563664187016866, "learning_rate": 7.947099578546306e-06, "loss": 0.6523, "step": 2515 }, { "epoch": 0.32, "grad_norm": 0.8670941949433756, "learning_rate": 7.945432532190399e-06, "loss": 0.6752, "step": 2516 }, { "epoch": 0.32, "grad_norm": 0.877482645838485, "learning_rate": 7.943764984258016e-06, "loss": 0.6668, "step": 2517 }, { "epoch": 0.32, "grad_norm": 0.899598297456225, "learning_rate": 7.942096935033123e-06, "loss": 0.655, "step": 2518 }, { "epoch": 0.32, "grad_norm": 0.7832144233710502, "learning_rate": 7.940428384799774e-06, "loss": 0.6215, "step": 2519 }, { "epoch": 0.32, "grad_norm": 0.7310364127174431, "learning_rate": 7.938759333842104e-06, "loss": 0.6088, "step": 2520 }, { "epoch": 0.32, "grad_norm": 0.7229043869381659, "learning_rate": 7.937089782444336e-06, "loss": 0.5604, "step": 2521 }, { "epoch": 0.32, "grad_norm": 0.960971123990552, "learning_rate": 7.935419730890777e-06, "loss": 0.6554, "step": 2522 }, { "epoch": 0.32, "grad_norm": 0.7503397463291318, "learning_rate": 7.933749179465818e-06, "loss": 0.6154, "step": 2523 }, { "epoch": 0.32, "grad_norm": 0.9152919278244538, "learning_rate": 7.932078128453935e-06, "loss": 0.6879, "step": 2524 }, { "epoch": 0.32, "grad_norm": 0.8546445546531586, "learning_rate": 7.930406578139696e-06, "loss": 0.6837, "step": 2525 }, { "epoch": 0.32, "grad_norm": 0.8380134771721715, "learning_rate": 7.928734528807743e-06, "loss": 0.6548, "step": 2526 }, { "epoch": 0.32, "grad_norm": 0.7924546971997317, "learning_rate": 7.927061980742813e-06, "loss": 0.5924, "step": 2527 }, { "epoch": 0.32, "grad_norm": 0.6821382187546627, "learning_rate": 7.925388934229722e-06, "loss": 0.524, "step": 2528 }, { "epoch": 0.32, "grad_norm": 0.6883932018359311, "learning_rate": 7.92371538955337e-06, "loss": 0.5638, "step": 2529 }, { "epoch": 0.32, "grad_norm": 0.8231348135440547, "learning_rate": 7.922041346998746e-06, "loss": 0.5555, "step": 2530 }, { "epoch": 0.32, "grad_norm": 0.60726826564693, "learning_rate": 7.920366806850924e-06, "loss": 0.5225, "step": 2531 }, { "epoch": 0.32, "grad_norm": 0.7899820717684318, "learning_rate": 7.918691769395059e-06, "loss": 0.5634, "step": 2532 }, { "epoch": 0.32, "grad_norm": 0.8267961106501716, "learning_rate": 7.91701623491639e-06, "loss": 0.6542, "step": 2533 }, { "epoch": 0.32, "grad_norm": 0.7075588782026214, "learning_rate": 7.915340203700248e-06, "loss": 0.5534, "step": 2534 }, { "epoch": 0.32, "grad_norm": 0.7568078402070907, "learning_rate": 7.91366367603204e-06, "loss": 0.6049, "step": 2535 }, { "epoch": 0.32, "grad_norm": 0.6559061209506393, "learning_rate": 7.911986652197263e-06, "loss": 0.507, "step": 2536 }, { "epoch": 0.32, "grad_norm": 0.8298837424790314, "learning_rate": 7.910309132481497e-06, "loss": 0.6374, "step": 2537 }, { "epoch": 0.32, "grad_norm": 0.8974549372589856, "learning_rate": 7.908631117170402e-06, "loss": 0.5655, "step": 2538 }, { "epoch": 0.32, "grad_norm": 0.6551685153596557, "learning_rate": 7.906952606549732e-06, "loss": 0.5331, "step": 2539 }, { "epoch": 0.32, "grad_norm": 0.8996817778918511, "learning_rate": 7.905273600905317e-06, "loss": 0.5992, "step": 2540 }, { "epoch": 0.32, "grad_norm": 0.6614138677097244, "learning_rate": 7.903594100523074e-06, "loss": 0.4518, "step": 2541 }, { "epoch": 0.32, "grad_norm": 0.6637871928608828, "learning_rate": 7.901914105689007e-06, "loss": 0.5384, "step": 2542 }, { "epoch": 0.32, "grad_norm": 0.7325192268408454, "learning_rate": 7.900233616689196e-06, "loss": 0.5604, "step": 2543 }, { "epoch": 0.32, "grad_norm": 0.7146358875951913, "learning_rate": 7.898552633809819e-06, "loss": 0.5827, "step": 2544 }, { "epoch": 0.32, "grad_norm": 0.7205279077807724, "learning_rate": 7.896871157337123e-06, "loss": 0.5487, "step": 2545 }, { "epoch": 0.32, "grad_norm": 0.7275630420373732, "learning_rate": 7.895189187557447e-06, "loss": 0.5678, "step": 2546 }, { "epoch": 0.32, "grad_norm": 0.6610505797029942, "learning_rate": 7.893506724757215e-06, "loss": 0.5356, "step": 2547 }, { "epoch": 0.32, "grad_norm": 0.6632201705253028, "learning_rate": 7.891823769222935e-06, "loss": 0.5015, "step": 2548 }, { "epoch": 0.32, "grad_norm": 0.7311937552921745, "learning_rate": 7.890140321241191e-06, "loss": 0.5673, "step": 2549 }, { "epoch": 0.32, "grad_norm": 0.5856291982631545, "learning_rate": 7.88845638109866e-06, "loss": 0.496, "step": 2550 }, { "epoch": 0.32, "grad_norm": 0.8342177773865391, "learning_rate": 7.886771949082099e-06, "loss": 0.5613, "step": 2551 }, { "epoch": 0.33, "grad_norm": 0.9125768272986223, "learning_rate": 7.885087025478349e-06, "loss": 0.6657, "step": 2552 }, { "epoch": 0.33, "grad_norm": 0.6725524406232308, "learning_rate": 7.883401610574338e-06, "loss": 0.5629, "step": 2553 }, { "epoch": 0.33, "grad_norm": 0.8386107467603523, "learning_rate": 7.881715704657069e-06, "loss": 0.6623, "step": 2554 }, { "epoch": 0.33, "grad_norm": 0.6312653752267728, "learning_rate": 7.880029308013636e-06, "loss": 0.4883, "step": 2555 }, { "epoch": 0.33, "grad_norm": 0.9468639063815736, "learning_rate": 7.878342420931218e-06, "loss": 0.648, "step": 2556 }, { "epoch": 0.33, "grad_norm": 0.6443327226938073, "learning_rate": 7.87665504369707e-06, "loss": 0.5272, "step": 2557 }, { "epoch": 0.33, "grad_norm": 0.6679444751410026, "learning_rate": 7.87496717659854e-06, "loss": 0.4959, "step": 2558 }, { "epoch": 0.33, "grad_norm": 0.7396494512111418, "learning_rate": 7.873278819923047e-06, "loss": 0.548, "step": 2559 }, { "epoch": 0.33, "grad_norm": 0.8451742426384206, "learning_rate": 7.871589973958108e-06, "loss": 0.6472, "step": 2560 }, { "epoch": 0.33, "grad_norm": 0.8730460960016027, "learning_rate": 7.869900638991312e-06, "loss": 0.6132, "step": 2561 }, { "epoch": 0.33, "grad_norm": 0.7674033136995915, "learning_rate": 7.868210815310333e-06, "loss": 0.6321, "step": 2562 }, { "epoch": 0.33, "grad_norm": 0.7491348633229841, "learning_rate": 7.866520503202936e-06, "loss": 0.5676, "step": 2563 }, { "epoch": 0.33, "grad_norm": 0.8737321680038387, "learning_rate": 7.86482970295696e-06, "loss": 0.6832, "step": 2564 }, { "epoch": 0.33, "grad_norm": 0.8625761542615646, "learning_rate": 7.863138414860329e-06, "loss": 0.6435, "step": 2565 }, { "epoch": 0.33, "grad_norm": 0.6893637920448848, "learning_rate": 7.861446639201055e-06, "loss": 0.5706, "step": 2566 }, { "epoch": 0.33, "grad_norm": 0.8259452774939636, "learning_rate": 7.85975437626723e-06, "loss": 0.647, "step": 2567 }, { "epoch": 0.33, "grad_norm": 0.7399999907724197, "learning_rate": 7.858061626347027e-06, "loss": 0.53, "step": 2568 }, { "epoch": 0.33, "grad_norm": 0.8105529923507633, "learning_rate": 7.856368389728703e-06, "loss": 0.6058, "step": 2569 }, { "epoch": 0.33, "grad_norm": 0.7992308674734614, "learning_rate": 7.8546746667006e-06, "loss": 0.6668, "step": 2570 }, { "epoch": 0.33, "grad_norm": 0.7824888784709066, "learning_rate": 7.852980457551141e-06, "loss": 0.553, "step": 2571 }, { "epoch": 0.33, "grad_norm": 0.8133884195215355, "learning_rate": 7.851285762568832e-06, "loss": 0.5796, "step": 2572 }, { "epoch": 0.33, "grad_norm": 0.6657023880623612, "learning_rate": 7.849590582042264e-06, "loss": 0.521, "step": 2573 }, { "epoch": 0.33, "grad_norm": 0.6955755630905265, "learning_rate": 7.847894916260105e-06, "loss": 0.5466, "step": 2574 }, { "epoch": 0.33, "grad_norm": 0.7488901507444516, "learning_rate": 7.84619876551111e-06, "loss": 0.6626, "step": 2575 }, { "epoch": 0.33, "grad_norm": 0.6487639796479855, "learning_rate": 7.844502130084118e-06, "loss": 0.4951, "step": 2576 }, { "epoch": 0.33, "grad_norm": 0.6568164520554413, "learning_rate": 7.842805010268048e-06, "loss": 0.5083, "step": 2577 }, { "epoch": 0.33, "grad_norm": 0.9332953661221384, "learning_rate": 7.8411074063519e-06, "loss": 0.666, "step": 2578 }, { "epoch": 0.33, "grad_norm": 0.9396683036746047, "learning_rate": 7.83940931862476e-06, "loss": 0.6999, "step": 2579 }, { "epoch": 0.33, "grad_norm": 0.6311110308948953, "learning_rate": 7.837710747375791e-06, "loss": 0.5263, "step": 2580 }, { "epoch": 0.33, "grad_norm": 0.9121499150221932, "learning_rate": 7.836011692894248e-06, "loss": 0.607, "step": 2581 }, { "epoch": 0.33, "grad_norm": 0.891271255627214, "learning_rate": 7.834312155469457e-06, "loss": 0.6441, "step": 2582 }, { "epoch": 0.33, "grad_norm": 0.7215152006747193, "learning_rate": 7.832612135390834e-06, "loss": 0.5639, "step": 2583 }, { "epoch": 0.33, "grad_norm": 0.6482651857803328, "learning_rate": 7.830911632947874e-06, "loss": 0.5489, "step": 2584 }, { "epoch": 0.33, "grad_norm": 0.9852562284290151, "learning_rate": 7.829210648430156e-06, "loss": 0.6084, "step": 2585 }, { "epoch": 0.33, "grad_norm": 0.6424167107507144, "learning_rate": 7.82750918212734e-06, "loss": 0.5218, "step": 2586 }, { "epoch": 0.33, "grad_norm": 0.7007343923974474, "learning_rate": 7.825807234329164e-06, "loss": 0.5362, "step": 2587 }, { "epoch": 0.33, "grad_norm": 0.7282085946018152, "learning_rate": 7.824104805325456e-06, "loss": 0.5064, "step": 2588 }, { "epoch": 0.33, "grad_norm": 0.8006967154599658, "learning_rate": 7.822401895406122e-06, "loss": 0.4994, "step": 2589 }, { "epoch": 0.33, "grad_norm": 0.8297582044554498, "learning_rate": 7.820698504861148e-06, "loss": 0.6315, "step": 2590 }, { "epoch": 0.33, "grad_norm": 0.6279406260961634, "learning_rate": 7.818994633980606e-06, "loss": 0.4887, "step": 2591 }, { "epoch": 0.33, "grad_norm": 0.7815953777160795, "learning_rate": 7.817290283054645e-06, "loss": 0.5239, "step": 2592 }, { "epoch": 0.33, "grad_norm": 0.6659167228692938, "learning_rate": 7.8155854523735e-06, "loss": 0.5618, "step": 2593 }, { "epoch": 0.33, "grad_norm": 0.9146085296280144, "learning_rate": 7.813880142227484e-06, "loss": 0.5996, "step": 2594 }, { "epoch": 0.33, "grad_norm": 0.7767742804006268, "learning_rate": 7.812174352906994e-06, "loss": 0.6261, "step": 2595 }, { "epoch": 0.33, "grad_norm": 0.9300097972902812, "learning_rate": 7.810468084702509e-06, "loss": 0.5839, "step": 2596 }, { "epoch": 0.33, "grad_norm": 0.7701663458094374, "learning_rate": 7.80876133790459e-06, "loss": 0.5762, "step": 2597 }, { "epoch": 0.33, "grad_norm": 0.9072707067662261, "learning_rate": 7.807054112803873e-06, "loss": 0.647, "step": 2598 }, { "epoch": 0.33, "grad_norm": 0.7365818612922479, "learning_rate": 7.805346409691087e-06, "loss": 0.5958, "step": 2599 }, { "epoch": 0.33, "grad_norm": 1.2363445052865716, "learning_rate": 7.803638228857033e-06, "loss": 0.6707, "step": 2600 }, { "epoch": 0.33, "grad_norm": 0.7119821784150594, "learning_rate": 7.801929570592596e-06, "loss": 0.5439, "step": 2601 }, { "epoch": 0.33, "grad_norm": 0.6598534839689683, "learning_rate": 7.800220435188745e-06, "loss": 0.5131, "step": 2602 }, { "epoch": 0.33, "grad_norm": 0.8441879458884096, "learning_rate": 7.798510822936524e-06, "loss": 0.5649, "step": 2603 }, { "epoch": 0.33, "grad_norm": 0.7967374297291847, "learning_rate": 7.796800734127067e-06, "loss": 0.6435, "step": 2604 }, { "epoch": 0.33, "grad_norm": 0.6684031028252342, "learning_rate": 7.79509016905158e-06, "loss": 0.5081, "step": 2605 }, { "epoch": 0.33, "grad_norm": 0.6965217701833712, "learning_rate": 7.793379128001357e-06, "loss": 0.5189, "step": 2606 }, { "epoch": 0.33, "grad_norm": 0.7937954420407329, "learning_rate": 7.79166761126777e-06, "loss": 0.6644, "step": 2607 }, { "epoch": 0.33, "grad_norm": 0.7879431334860658, "learning_rate": 7.789955619142274e-06, "loss": 0.5937, "step": 2608 }, { "epoch": 0.33, "grad_norm": 0.9502952661475601, "learning_rate": 7.788243151916398e-06, "loss": 0.7026, "step": 2609 }, { "epoch": 0.33, "grad_norm": 0.7153132251224847, "learning_rate": 7.786530209881764e-06, "loss": 0.5298, "step": 2610 }, { "epoch": 0.33, "grad_norm": 0.6663791801858784, "learning_rate": 7.784816793330061e-06, "loss": 0.5836, "step": 2611 }, { "epoch": 0.33, "grad_norm": 0.8051280634364077, "learning_rate": 7.783102902553074e-06, "loss": 0.5348, "step": 2612 }, { "epoch": 0.33, "grad_norm": 0.7296436912052862, "learning_rate": 7.781388537842655e-06, "loss": 0.6022, "step": 2613 }, { "epoch": 0.33, "grad_norm": 0.5982526136725665, "learning_rate": 7.779673699490745e-06, "loss": 0.5316, "step": 2614 }, { "epoch": 0.33, "grad_norm": 0.8859201612994138, "learning_rate": 7.777958387789363e-06, "loss": 0.6874, "step": 2615 }, { "epoch": 0.33, "grad_norm": 0.741578824655153, "learning_rate": 7.776242603030607e-06, "loss": 0.589, "step": 2616 }, { "epoch": 0.33, "grad_norm": 0.7815747806402055, "learning_rate": 7.77452634550666e-06, "loss": 0.5679, "step": 2617 }, { "epoch": 0.33, "grad_norm": 0.9514552926162047, "learning_rate": 7.772809615509778e-06, "loss": 0.636, "step": 2618 }, { "epoch": 0.33, "grad_norm": 0.9527660278391786, "learning_rate": 7.77109241333231e-06, "loss": 0.5986, "step": 2619 }, { "epoch": 0.33, "grad_norm": 0.7058014015975158, "learning_rate": 7.76937473926667e-06, "loss": 0.5832, "step": 2620 }, { "epoch": 0.33, "grad_norm": 0.7616638271468806, "learning_rate": 7.767656593605361e-06, "loss": 0.5832, "step": 2621 }, { "epoch": 0.33, "grad_norm": 0.8496370494565307, "learning_rate": 7.765937976640971e-06, "loss": 0.6752, "step": 2622 }, { "epoch": 0.33, "grad_norm": 0.6780181517885135, "learning_rate": 7.764218888666157e-06, "loss": 0.5719, "step": 2623 }, { "epoch": 0.33, "grad_norm": 0.917307437254972, "learning_rate": 7.762499329973665e-06, "loss": 0.5332, "step": 2624 }, { "epoch": 0.33, "grad_norm": 0.6286513399673823, "learning_rate": 7.760779300856316e-06, "loss": 0.5466, "step": 2625 }, { "epoch": 0.33, "grad_norm": 0.7623334595597904, "learning_rate": 7.759058801607014e-06, "loss": 0.5539, "step": 2626 }, { "epoch": 0.33, "grad_norm": 0.6519166843010614, "learning_rate": 7.75733783251874e-06, "loss": 0.5794, "step": 2627 }, { "epoch": 0.33, "grad_norm": 0.7068296690458556, "learning_rate": 7.755616393884562e-06, "loss": 0.5674, "step": 2628 }, { "epoch": 0.33, "grad_norm": 0.7175341821364184, "learning_rate": 7.753894485997617e-06, "loss": 0.5667, "step": 2629 }, { "epoch": 0.34, "grad_norm": 0.7641779419448566, "learning_rate": 7.752172109151132e-06, "loss": 0.5459, "step": 2630 }, { "epoch": 0.34, "grad_norm": 0.7954156821645183, "learning_rate": 7.750449263638409e-06, "loss": 0.6731, "step": 2631 }, { "epoch": 0.34, "grad_norm": 0.7725568454526498, "learning_rate": 7.74872594975283e-06, "loss": 0.6309, "step": 2632 }, { "epoch": 0.34, "grad_norm": 0.872619278687467, "learning_rate": 7.747002167787858e-06, "loss": 0.5876, "step": 2633 }, { "epoch": 0.34, "grad_norm": 0.7117154358653245, "learning_rate": 7.745277918037035e-06, "loss": 0.55, "step": 2634 }, { "epoch": 0.34, "grad_norm": 0.585826210737778, "learning_rate": 7.743553200793983e-06, "loss": 0.5113, "step": 2635 }, { "epoch": 0.34, "grad_norm": 1.0611140337280847, "learning_rate": 7.741828016352402e-06, "loss": 0.6573, "step": 2636 }, { "epoch": 0.34, "grad_norm": 0.7773519884842081, "learning_rate": 7.740102365006075e-06, "loss": 0.5905, "step": 2637 }, { "epoch": 0.34, "grad_norm": 0.8298155813853204, "learning_rate": 7.73837624704886e-06, "loss": 0.6691, "step": 2638 }, { "epoch": 0.34, "grad_norm": 0.8438407192665045, "learning_rate": 7.7366496627747e-06, "loss": 0.6326, "step": 2639 }, { "epoch": 0.34, "grad_norm": 0.9777680300044581, "learning_rate": 7.734922612477611e-06, "loss": 0.5894, "step": 2640 }, { "epoch": 0.34, "grad_norm": 0.7666751165110746, "learning_rate": 7.733195096451696e-06, "loss": 0.5562, "step": 2641 }, { "epoch": 0.34, "grad_norm": 0.9476262775001394, "learning_rate": 7.731467114991128e-06, "loss": 0.6626, "step": 2642 }, { "epoch": 0.34, "grad_norm": 0.8878137461203477, "learning_rate": 7.729738668390167e-06, "loss": 0.7162, "step": 2643 }, { "epoch": 0.34, "grad_norm": 0.8375041323196158, "learning_rate": 7.728009756943149e-06, "loss": 0.5794, "step": 2644 }, { "epoch": 0.34, "grad_norm": 0.6922060112985475, "learning_rate": 7.726280380944489e-06, "loss": 0.562, "step": 2645 }, { "epoch": 0.34, "grad_norm": 0.6551729313080378, "learning_rate": 7.724550540688683e-06, "loss": 0.5369, "step": 2646 }, { "epoch": 0.34, "grad_norm": 0.6891749134127959, "learning_rate": 7.722820236470302e-06, "loss": 0.558, "step": 2647 }, { "epoch": 0.34, "grad_norm": 0.726680985435801, "learning_rate": 7.721089468584005e-06, "loss": 0.5829, "step": 2648 }, { "epoch": 0.34, "grad_norm": 0.8354331783022282, "learning_rate": 7.719358237324516e-06, "loss": 0.5797, "step": 2649 }, { "epoch": 0.34, "grad_norm": 0.8946148413226056, "learning_rate": 7.717626542986652e-06, "loss": 0.6946, "step": 2650 }, { "epoch": 0.34, "grad_norm": 0.7937513114640031, "learning_rate": 7.715894385865299e-06, "loss": 0.6061, "step": 2651 }, { "epoch": 0.34, "grad_norm": 0.7332761043630776, "learning_rate": 7.714161766255425e-06, "loss": 0.5246, "step": 2652 }, { "epoch": 0.34, "grad_norm": 0.7397786301881935, "learning_rate": 7.712428684452079e-06, "loss": 0.6019, "step": 2653 }, { "epoch": 0.34, "grad_norm": 0.77137686310079, "learning_rate": 7.710695140750384e-06, "loss": 0.5535, "step": 2654 }, { "epoch": 0.34, "grad_norm": 0.7307719167737979, "learning_rate": 7.708961135445549e-06, "loss": 0.5648, "step": 2655 }, { "epoch": 0.34, "grad_norm": 0.8138713320314339, "learning_rate": 7.707226668832854e-06, "loss": 0.6749, "step": 2656 }, { "epoch": 0.34, "grad_norm": 0.7127461887665362, "learning_rate": 7.705491741207661e-06, "loss": 0.5622, "step": 2657 }, { "epoch": 0.34, "grad_norm": 0.7307288235631265, "learning_rate": 7.703756352865408e-06, "loss": 0.5328, "step": 2658 }, { "epoch": 0.34, "grad_norm": 0.7257445412833696, "learning_rate": 7.702020504101618e-06, "loss": 0.6048, "step": 2659 }, { "epoch": 0.34, "grad_norm": 0.715546508454551, "learning_rate": 7.700284195211883e-06, "loss": 0.5824, "step": 2660 }, { "epoch": 0.34, "grad_norm": 0.7443185337086762, "learning_rate": 7.698547426491883e-06, "loss": 0.5566, "step": 2661 }, { "epoch": 0.34, "grad_norm": 0.7393450034687399, "learning_rate": 7.69681019823737e-06, "loss": 0.6106, "step": 2662 }, { "epoch": 0.34, "grad_norm": 0.7490774446747605, "learning_rate": 7.695072510744174e-06, "loss": 0.6337, "step": 2663 }, { "epoch": 0.34, "grad_norm": 0.726061307116932, "learning_rate": 7.693334364308208e-06, "loss": 0.5574, "step": 2664 }, { "epoch": 0.34, "grad_norm": 0.8610484167206152, "learning_rate": 7.691595759225457e-06, "loss": 0.6321, "step": 2665 }, { "epoch": 0.34, "grad_norm": 1.1912650606538906, "learning_rate": 7.68985669579199e-06, "loss": 0.5986, "step": 2666 }, { "epoch": 0.34, "grad_norm": 0.6857360125297636, "learning_rate": 7.688117174303951e-06, "loss": 0.5652, "step": 2667 }, { "epoch": 0.34, "grad_norm": 0.8003695506270507, "learning_rate": 7.68637719505756e-06, "loss": 0.6612, "step": 2668 }, { "epoch": 0.34, "grad_norm": 0.7283821617771326, "learning_rate": 7.68463675834912e-06, "loss": 0.5464, "step": 2669 }, { "epoch": 0.34, "grad_norm": 0.8199876486185335, "learning_rate": 7.68289586447501e-06, "loss": 0.5653, "step": 2670 }, { "epoch": 0.34, "grad_norm": 0.7057179652499728, "learning_rate": 7.68115451373168e-06, "loss": 0.5521, "step": 2671 }, { "epoch": 0.34, "grad_norm": 0.8235515349311006, "learning_rate": 7.679412706415672e-06, "loss": 0.6092, "step": 2672 }, { "epoch": 0.34, "grad_norm": 0.8636819956537833, "learning_rate": 7.677670442823594e-06, "loss": 0.6093, "step": 2673 }, { "epoch": 0.34, "grad_norm": 0.7080627750988319, "learning_rate": 7.675927723252134e-06, "loss": 0.5801, "step": 2674 }, { "epoch": 0.34, "grad_norm": 0.8708700839746261, "learning_rate": 7.674184547998063e-06, "loss": 0.6093, "step": 2675 }, { "epoch": 0.34, "grad_norm": 0.8913291843622851, "learning_rate": 7.672440917358221e-06, "loss": 0.6604, "step": 2676 }, { "epoch": 0.34, "grad_norm": 0.9449274162922053, "learning_rate": 7.670696831629531e-06, "loss": 0.6289, "step": 2677 }, { "epoch": 0.34, "grad_norm": 0.6919000466214699, "learning_rate": 7.668952291108996e-06, "loss": 0.5046, "step": 2678 }, { "epoch": 0.34, "grad_norm": 0.7020000035327029, "learning_rate": 7.667207296093688e-06, "loss": 0.5862, "step": 2679 }, { "epoch": 0.34, "grad_norm": 0.88971465457254, "learning_rate": 7.665461846880766e-06, "loss": 0.6598, "step": 2680 }, { "epoch": 0.34, "grad_norm": 0.7005520549385662, "learning_rate": 7.663715943767461e-06, "loss": 0.5893, "step": 2681 }, { "epoch": 0.34, "grad_norm": 0.6739883084933995, "learning_rate": 7.661969587051079e-06, "loss": 0.5181, "step": 2682 }, { "epoch": 0.34, "grad_norm": 0.9180342638820376, "learning_rate": 7.66022277702901e-06, "loss": 0.7337, "step": 2683 }, { "epoch": 0.34, "grad_norm": 0.7918545103119662, "learning_rate": 7.658475513998716e-06, "loss": 0.5704, "step": 2684 }, { "epoch": 0.34, "grad_norm": 0.8428144287265061, "learning_rate": 7.656727798257737e-06, "loss": 0.6585, "step": 2685 }, { "epoch": 0.34, "grad_norm": 0.6611212100572257, "learning_rate": 7.65497963010369e-06, "loss": 0.5521, "step": 2686 }, { "epoch": 0.34, "grad_norm": 0.767935941411705, "learning_rate": 7.653231009834276e-06, "loss": 0.56, "step": 2687 }, { "epoch": 0.34, "grad_norm": 0.7576161741805942, "learning_rate": 7.651481937747258e-06, "loss": 0.5223, "step": 2688 }, { "epoch": 0.34, "grad_norm": 0.7571483621672602, "learning_rate": 7.649732414140491e-06, "loss": 0.5856, "step": 2689 }, { "epoch": 0.34, "grad_norm": 0.8427817003990717, "learning_rate": 7.647982439311898e-06, "loss": 0.5714, "step": 2690 }, { "epoch": 0.34, "grad_norm": 0.7665876080198591, "learning_rate": 7.646232013559482e-06, "loss": 0.6659, "step": 2691 }, { "epoch": 0.34, "grad_norm": 0.8484365654735585, "learning_rate": 7.644481137181323e-06, "loss": 0.64, "step": 2692 }, { "epoch": 0.34, "grad_norm": 0.6733679821941454, "learning_rate": 7.642729810475576e-06, "loss": 0.5212, "step": 2693 }, { "epoch": 0.34, "grad_norm": 0.8771972006029759, "learning_rate": 7.640978033740473e-06, "loss": 0.5415, "step": 2694 }, { "epoch": 0.34, "grad_norm": 0.9257431839033528, "learning_rate": 7.639225807274327e-06, "loss": 0.6464, "step": 2695 }, { "epoch": 0.34, "grad_norm": 0.9159739177431039, "learning_rate": 7.637473131375517e-06, "loss": 0.6526, "step": 2696 }, { "epoch": 0.34, "grad_norm": 0.7275415689700523, "learning_rate": 7.635720006342513e-06, "loss": 0.6221, "step": 2697 }, { "epoch": 0.34, "grad_norm": 0.8171859241625635, "learning_rate": 7.63396643247385e-06, "loss": 0.6199, "step": 2698 }, { "epoch": 0.34, "grad_norm": 0.8177450800477294, "learning_rate": 7.632212410068142e-06, "loss": 0.6168, "step": 2699 }, { "epoch": 0.34, "grad_norm": 0.7171593163823642, "learning_rate": 7.630457939424086e-06, "loss": 0.5828, "step": 2700 }, { "epoch": 0.34, "grad_norm": 0.8668162516708091, "learning_rate": 7.6287030208404446e-06, "loss": 0.6431, "step": 2701 }, { "epoch": 0.34, "grad_norm": 0.8757720485570994, "learning_rate": 7.626947654616066e-06, "loss": 0.6018, "step": 2702 }, { "epoch": 0.34, "grad_norm": 0.6997164772348926, "learning_rate": 7.625191841049868e-06, "loss": 0.62, "step": 2703 }, { "epoch": 0.34, "grad_norm": 0.812722705207381, "learning_rate": 7.623435580440849e-06, "loss": 0.6075, "step": 2704 }, { "epoch": 0.34, "grad_norm": 1.391656340481501, "learning_rate": 7.6216788730880806e-06, "loss": 0.6465, "step": 2705 }, { "epoch": 0.34, "grad_norm": 0.7922823100985256, "learning_rate": 7.619921719290715e-06, "loss": 0.6256, "step": 2706 }, { "epoch": 0.34, "grad_norm": 0.6967721956254086, "learning_rate": 7.618164119347973e-06, "loss": 0.5802, "step": 2707 }, { "epoch": 0.34, "grad_norm": 0.7735384700934813, "learning_rate": 7.616406073559157e-06, "loss": 0.6124, "step": 2708 }, { "epoch": 0.35, "grad_norm": 0.7328953130306884, "learning_rate": 7.614647582223645e-06, "loss": 0.5486, "step": 2709 }, { "epoch": 0.35, "grad_norm": 0.7580614630146327, "learning_rate": 7.612888645640888e-06, "loss": 0.6099, "step": 2710 }, { "epoch": 0.35, "grad_norm": 0.7422254501911365, "learning_rate": 7.611129264110415e-06, "loss": 0.5647, "step": 2711 }, { "epoch": 0.35, "grad_norm": 0.8964542805292356, "learning_rate": 7.6093694379318315e-06, "loss": 0.6675, "step": 2712 }, { "epoch": 0.35, "grad_norm": 0.9092064325050946, "learning_rate": 7.607609167404817e-06, "loss": 0.6781, "step": 2713 }, { "epoch": 0.35, "grad_norm": 0.79206317898653, "learning_rate": 7.605848452829128e-06, "loss": 0.6226, "step": 2714 }, { "epoch": 0.35, "grad_norm": 0.7478648086548831, "learning_rate": 7.604087294504594e-06, "loss": 0.5787, "step": 2715 }, { "epoch": 0.35, "grad_norm": 0.8645050906102298, "learning_rate": 7.602325692731124e-06, "loss": 0.6491, "step": 2716 }, { "epoch": 0.35, "grad_norm": 0.7465382285852344, "learning_rate": 7.6005636478086984e-06, "loss": 0.5789, "step": 2717 }, { "epoch": 0.35, "grad_norm": 0.7159849857769799, "learning_rate": 7.598801160037377e-06, "loss": 0.5498, "step": 2718 }, { "epoch": 0.35, "grad_norm": 0.7605018557718445, "learning_rate": 7.597038229717292e-06, "loss": 0.5538, "step": 2719 }, { "epoch": 0.35, "grad_norm": 0.9528717637188856, "learning_rate": 7.595274857148651e-06, "loss": 0.6247, "step": 2720 }, { "epoch": 0.35, "grad_norm": 0.6593706026499795, "learning_rate": 7.59351104263174e-06, "loss": 0.5774, "step": 2721 }, { "epoch": 0.35, "grad_norm": 0.6359761904607262, "learning_rate": 7.591746786466918e-06, "loss": 0.5323, "step": 2722 }, { "epoch": 0.35, "grad_norm": 0.7614280946412803, "learning_rate": 7.589982088954619e-06, "loss": 0.568, "step": 2723 }, { "epoch": 0.35, "grad_norm": 0.7848791632750642, "learning_rate": 7.588216950395352e-06, "loss": 0.5552, "step": 2724 }, { "epoch": 0.35, "grad_norm": 0.8317204207936151, "learning_rate": 7.5864513710897e-06, "loss": 0.6109, "step": 2725 }, { "epoch": 0.35, "grad_norm": 0.6667625430147582, "learning_rate": 7.584685351338328e-06, "loss": 0.5427, "step": 2726 }, { "epoch": 0.35, "grad_norm": 1.1569563083416816, "learning_rate": 7.582918891441966e-06, "loss": 0.5475, "step": 2727 }, { "epoch": 0.35, "grad_norm": 0.7892831427043902, "learning_rate": 7.5811519917014275e-06, "loss": 0.5906, "step": 2728 }, { "epoch": 0.35, "grad_norm": 0.6176416801938055, "learning_rate": 7.579384652417593e-06, "loss": 0.5425, "step": 2729 }, { "epoch": 0.35, "grad_norm": 0.7099653290600364, "learning_rate": 7.577616873891425e-06, "loss": 0.5331, "step": 2730 }, { "epoch": 0.35, "grad_norm": 0.7664426581160024, "learning_rate": 7.575848656423957e-06, "loss": 0.6165, "step": 2731 }, { "epoch": 0.35, "grad_norm": 0.7404908163675877, "learning_rate": 7.574080000316298e-06, "loss": 0.5557, "step": 2732 }, { "epoch": 0.35, "grad_norm": 0.6935629556588265, "learning_rate": 7.57231090586963e-06, "loss": 0.5074, "step": 2733 }, { "epoch": 0.35, "grad_norm": 0.6362762852049509, "learning_rate": 7.570541373385214e-06, "loss": 0.5104, "step": 2734 }, { "epoch": 0.35, "grad_norm": 0.7821687552125114, "learning_rate": 7.568771403164381e-06, "loss": 0.5898, "step": 2735 }, { "epoch": 0.35, "grad_norm": 0.7937424086711169, "learning_rate": 7.56700099550854e-06, "loss": 0.6487, "step": 2736 }, { "epoch": 0.35, "grad_norm": 0.7678714627836957, "learning_rate": 7.565230150719173e-06, "loss": 0.7374, "step": 2737 }, { "epoch": 0.35, "grad_norm": 0.6299692120238257, "learning_rate": 7.563458869097833e-06, "loss": 0.5106, "step": 2738 }, { "epoch": 0.35, "grad_norm": 0.774927246316752, "learning_rate": 7.5616871509461554e-06, "loss": 0.5603, "step": 2739 }, { "epoch": 0.35, "grad_norm": 0.8094760484704159, "learning_rate": 7.559914996565845e-06, "loss": 0.6185, "step": 2740 }, { "epoch": 0.35, "grad_norm": 0.7135069867572567, "learning_rate": 7.5581424062586775e-06, "loss": 0.5388, "step": 2741 }, { "epoch": 0.35, "grad_norm": 0.7143339361036312, "learning_rate": 7.556369380326509e-06, "loss": 0.5359, "step": 2742 }, { "epoch": 0.35, "grad_norm": 0.6756601431276299, "learning_rate": 7.554595919071268e-06, "loss": 0.5613, "step": 2743 }, { "epoch": 0.35, "grad_norm": 0.8097902147122563, "learning_rate": 7.552822022794954e-06, "loss": 0.6186, "step": 2744 }, { "epoch": 0.35, "grad_norm": 0.6419991556514685, "learning_rate": 7.551047691799647e-06, "loss": 0.5343, "step": 2745 }, { "epoch": 0.35, "grad_norm": 0.7184236465573266, "learning_rate": 7.549272926387493e-06, "loss": 0.5848, "step": 2746 }, { "epoch": 0.35, "grad_norm": 0.9904900295460088, "learning_rate": 7.547497726860717e-06, "loss": 0.65, "step": 2747 }, { "epoch": 0.35, "grad_norm": 0.8037002343582437, "learning_rate": 7.54572209352162e-06, "loss": 0.6202, "step": 2748 }, { "epoch": 0.35, "grad_norm": 0.8032833193111435, "learning_rate": 7.54394602667257e-06, "loss": 0.559, "step": 2749 }, { "epoch": 0.35, "grad_norm": 0.7930011601371217, "learning_rate": 7.542169526616014e-06, "loss": 0.599, "step": 2750 }, { "epoch": 0.35, "grad_norm": 0.7249984697972786, "learning_rate": 7.540392593654473e-06, "loss": 0.6266, "step": 2751 }, { "epoch": 0.35, "grad_norm": 0.6278352392284934, "learning_rate": 7.538615228090539e-06, "loss": 0.5312, "step": 2752 }, { "epoch": 0.35, "grad_norm": 0.7400362046125013, "learning_rate": 7.536837430226878e-06, "loss": 0.5651, "step": 2753 }, { "epoch": 0.35, "grad_norm": 0.7442681454334864, "learning_rate": 7.53505920036623e-06, "loss": 0.513, "step": 2754 }, { "epoch": 0.35, "grad_norm": 0.8024840202920478, "learning_rate": 7.533280538811412e-06, "loss": 0.6104, "step": 2755 }, { "epoch": 0.35, "grad_norm": 0.8495531730016921, "learning_rate": 7.53150144586531e-06, "loss": 0.6881, "step": 2756 }, { "epoch": 0.35, "grad_norm": 0.8709920391686855, "learning_rate": 7.529721921830884e-06, "loss": 0.6733, "step": 2757 }, { "epoch": 0.35, "grad_norm": 0.728838250694351, "learning_rate": 7.52794196701117e-06, "loss": 0.5883, "step": 2758 }, { "epoch": 0.35, "grad_norm": 0.9153913788268614, "learning_rate": 7.526161581709274e-06, "loss": 0.5702, "step": 2759 }, { "epoch": 0.35, "grad_norm": 0.8399246142832262, "learning_rate": 7.524380766228379e-06, "loss": 0.6209, "step": 2760 }, { "epoch": 0.35, "grad_norm": 0.7878379884857731, "learning_rate": 7.522599520871737e-06, "loss": 0.6188, "step": 2761 }, { "epoch": 0.35, "grad_norm": 0.7164761220948352, "learning_rate": 7.5208178459426785e-06, "loss": 0.5234, "step": 2762 }, { "epoch": 0.35, "grad_norm": 0.807324703744647, "learning_rate": 7.5190357417446e-06, "loss": 0.6468, "step": 2763 }, { "epoch": 0.35, "grad_norm": 0.8131748586399106, "learning_rate": 7.51725320858098e-06, "loss": 0.6327, "step": 2764 }, { "epoch": 0.35, "grad_norm": 0.7531929692127107, "learning_rate": 7.515470246755363e-06, "loss": 0.5862, "step": 2765 }, { "epoch": 0.35, "grad_norm": 0.6796607539272923, "learning_rate": 7.513686856571367e-06, "loss": 0.5318, "step": 2766 }, { "epoch": 0.35, "grad_norm": 0.6910802424104073, "learning_rate": 7.5119030383326885e-06, "loss": 0.5495, "step": 2767 }, { "epoch": 0.35, "grad_norm": 0.6968499155138721, "learning_rate": 7.5101187923430906e-06, "loss": 0.6042, "step": 2768 }, { "epoch": 0.35, "grad_norm": 0.7758256024486233, "learning_rate": 7.5083341189064126e-06, "loss": 0.6205, "step": 2769 }, { "epoch": 0.35, "grad_norm": 0.8950642908825718, "learning_rate": 7.5065490183265665e-06, "loss": 0.6357, "step": 2770 }, { "epoch": 0.35, "grad_norm": 0.7071771462489088, "learning_rate": 7.5047634909075365e-06, "loss": 0.5672, "step": 2771 }, { "epoch": 0.35, "grad_norm": 0.9758817932018523, "learning_rate": 7.502977536953376e-06, "loss": 0.6574, "step": 2772 }, { "epoch": 0.35, "grad_norm": 0.8241684574713491, "learning_rate": 7.501191156768218e-06, "loss": 0.5251, "step": 2773 }, { "epoch": 0.35, "grad_norm": 0.7882206503512899, "learning_rate": 7.499404350656264e-06, "loss": 0.6108, "step": 2774 }, { "epoch": 0.35, "grad_norm": 0.8151266626473438, "learning_rate": 7.4976171189217875e-06, "loss": 0.722, "step": 2775 }, { "epoch": 0.35, "grad_norm": 0.8404012937128001, "learning_rate": 7.495829461869134e-06, "loss": 0.6414, "step": 2776 }, { "epoch": 0.35, "grad_norm": 0.7365351816341162, "learning_rate": 7.4940413798027255e-06, "loss": 0.5924, "step": 2777 }, { "epoch": 0.35, "grad_norm": 0.6975033031649471, "learning_rate": 7.492252873027054e-06, "loss": 0.5568, "step": 2778 }, { "epoch": 0.35, "grad_norm": 0.6434841359262528, "learning_rate": 7.490463941846681e-06, "loss": 0.5301, "step": 2779 }, { "epoch": 0.35, "grad_norm": 0.9067545162989353, "learning_rate": 7.488674586566245e-06, "loss": 0.6515, "step": 2780 }, { "epoch": 0.35, "grad_norm": 0.7846673484292362, "learning_rate": 7.486884807490451e-06, "loss": 0.6605, "step": 2781 }, { "epoch": 0.35, "grad_norm": 0.6596953844332776, "learning_rate": 7.485094604924085e-06, "loss": 0.5675, "step": 2782 }, { "epoch": 0.35, "grad_norm": 1.139512770587114, "learning_rate": 7.483303979171996e-06, "loss": 0.6852, "step": 2783 }, { "epoch": 0.35, "grad_norm": 0.6699009441477491, "learning_rate": 7.481512930539112e-06, "loss": 0.5497, "step": 2784 }, { "epoch": 0.35, "grad_norm": 0.809741275490068, "learning_rate": 7.4797214593304265e-06, "loss": 0.6738, "step": 2785 }, { "epoch": 0.35, "grad_norm": 0.7771526737241754, "learning_rate": 7.477929565851011e-06, "loss": 0.578, "step": 2786 }, { "epoch": 0.36, "grad_norm": 0.704105988508471, "learning_rate": 7.476137250406006e-06, "loss": 0.5494, "step": 2787 }, { "epoch": 0.36, "grad_norm": 0.9720871354255579, "learning_rate": 7.474344513300622e-06, "loss": 0.661, "step": 2788 }, { "epoch": 0.36, "grad_norm": 0.7935676464818128, "learning_rate": 7.4725513548401455e-06, "loss": 0.5307, "step": 2789 }, { "epoch": 0.36, "grad_norm": 0.736125146135366, "learning_rate": 7.470757775329932e-06, "loss": 0.546, "step": 2790 }, { "epoch": 0.36, "grad_norm": 0.6434049693512166, "learning_rate": 7.468963775075409e-06, "loss": 0.5257, "step": 2791 }, { "epoch": 0.36, "grad_norm": 0.7486314378965022, "learning_rate": 7.467169354382078e-06, "loss": 0.5787, "step": 2792 }, { "epoch": 0.36, "grad_norm": 0.7830765573065516, "learning_rate": 7.465374513555508e-06, "loss": 0.6406, "step": 2793 }, { "epoch": 0.36, "grad_norm": 0.7416752147609592, "learning_rate": 7.463579252901341e-06, "loss": 0.6001, "step": 2794 }, { "epoch": 0.36, "grad_norm": 0.7932544895583734, "learning_rate": 7.461783572725294e-06, "loss": 0.5725, "step": 2795 }, { "epoch": 0.36, "grad_norm": 0.8456748438592332, "learning_rate": 7.459987473333151e-06, "loss": 0.7023, "step": 2796 }, { "epoch": 0.36, "grad_norm": 0.7125644413675578, "learning_rate": 7.45819095503077e-06, "loss": 0.5158, "step": 2797 }, { "epoch": 0.36, "grad_norm": 0.9033570139068714, "learning_rate": 7.456394018124078e-06, "loss": 0.7051, "step": 2798 }, { "epoch": 0.36, "grad_norm": 1.2770720614207984, "learning_rate": 7.454596662919076e-06, "loss": 0.6321, "step": 2799 }, { "epoch": 0.36, "grad_norm": 0.7295604218527582, "learning_rate": 7.452798889721832e-06, "loss": 0.5698, "step": 2800 }, { "epoch": 0.36, "grad_norm": 0.9898411589870634, "learning_rate": 7.451000698838491e-06, "loss": 0.6618, "step": 2801 }, { "epoch": 0.36, "grad_norm": 0.9119650963677739, "learning_rate": 7.449202090575267e-06, "loss": 0.5417, "step": 2802 }, { "epoch": 0.36, "grad_norm": 0.7104281812219381, "learning_rate": 7.447403065238441e-06, "loss": 0.5384, "step": 2803 }, { "epoch": 0.36, "grad_norm": 0.7205310094790782, "learning_rate": 7.4456036231343695e-06, "loss": 0.5506, "step": 2804 }, { "epoch": 0.36, "grad_norm": 0.9059139551531954, "learning_rate": 7.443803764569481e-06, "loss": 0.6102, "step": 2805 }, { "epoch": 0.36, "grad_norm": 0.9067937407859735, "learning_rate": 7.442003489850269e-06, "loss": 0.6438, "step": 2806 }, { "epoch": 0.36, "grad_norm": 0.7852243507239536, "learning_rate": 7.440202799283305e-06, "loss": 0.6652, "step": 2807 }, { "epoch": 0.36, "grad_norm": 0.7721161633766079, "learning_rate": 7.438401693175225e-06, "loss": 0.5445, "step": 2808 }, { "epoch": 0.36, "grad_norm": 0.8972149443350992, "learning_rate": 7.436600171832739e-06, "loss": 0.5881, "step": 2809 }, { "epoch": 0.36, "grad_norm": 0.7626009422103235, "learning_rate": 7.434798235562628e-06, "loss": 0.5941, "step": 2810 }, { "epoch": 0.36, "grad_norm": 0.8047749854107277, "learning_rate": 7.432995884671744e-06, "loss": 0.6144, "step": 2811 }, { "epoch": 0.36, "grad_norm": 1.4678456824114459, "learning_rate": 7.4311931194670085e-06, "loss": 0.6417, "step": 2812 }, { "epoch": 0.36, "grad_norm": 0.6604353020511844, "learning_rate": 7.429389940255412e-06, "loss": 0.5274, "step": 2813 }, { "epoch": 0.36, "grad_norm": 0.8529674817005919, "learning_rate": 7.427586347344017e-06, "loss": 0.6289, "step": 2814 }, { "epoch": 0.36, "grad_norm": 0.8013202131373539, "learning_rate": 7.425782341039957e-06, "loss": 0.596, "step": 2815 }, { "epoch": 0.36, "grad_norm": 0.6834557147795206, "learning_rate": 7.4239779216504386e-06, "loss": 0.5578, "step": 2816 }, { "epoch": 0.36, "grad_norm": 0.7633287135547377, "learning_rate": 7.422173089482732e-06, "loss": 0.5902, "step": 2817 }, { "epoch": 0.36, "grad_norm": 0.9158569476634496, "learning_rate": 7.420367844844181e-06, "loss": 0.6439, "step": 2818 }, { "epoch": 0.36, "grad_norm": 0.8827029514428754, "learning_rate": 7.418562188042202e-06, "loss": 0.5777, "step": 2819 }, { "epoch": 0.36, "grad_norm": 0.8158684938546891, "learning_rate": 7.416756119384278e-06, "loss": 0.6359, "step": 2820 }, { "epoch": 0.36, "grad_norm": 0.7436591213506316, "learning_rate": 7.414949639177965e-06, "loss": 0.6018, "step": 2821 }, { "epoch": 0.36, "grad_norm": 0.792487060961103, "learning_rate": 7.413142747730888e-06, "loss": 0.5646, "step": 2822 }, { "epoch": 0.36, "grad_norm": 1.1969064272659133, "learning_rate": 7.411335445350739e-06, "loss": 0.6117, "step": 2823 }, { "epoch": 0.36, "grad_norm": 0.8680410099423544, "learning_rate": 7.409527732345286e-06, "loss": 0.6409, "step": 2824 }, { "epoch": 0.36, "grad_norm": 0.9210056257054146, "learning_rate": 7.407719609022364e-06, "loss": 0.6288, "step": 2825 }, { "epoch": 0.36, "grad_norm": 0.8640229418824921, "learning_rate": 7.405911075689873e-06, "loss": 0.7012, "step": 2826 }, { "epoch": 0.36, "grad_norm": 0.7635452820306983, "learning_rate": 7.404102132655793e-06, "loss": 0.5999, "step": 2827 }, { "epoch": 0.36, "grad_norm": 0.7809114951757538, "learning_rate": 7.402292780228164e-06, "loss": 0.6313, "step": 2828 }, { "epoch": 0.36, "grad_norm": 0.8133700076302167, "learning_rate": 7.400483018715102e-06, "loss": 0.5951, "step": 2829 }, { "epoch": 0.36, "grad_norm": 0.885618376770053, "learning_rate": 7.39867284842479e-06, "loss": 0.6154, "step": 2830 }, { "epoch": 0.36, "grad_norm": 0.6922785924001391, "learning_rate": 7.396862269665481e-06, "loss": 0.5355, "step": 2831 }, { "epoch": 0.36, "grad_norm": 0.8011579540325787, "learning_rate": 7.395051282745498e-06, "loss": 0.6297, "step": 2832 }, { "epoch": 0.36, "grad_norm": 0.9363355576810274, "learning_rate": 7.393239887973233e-06, "loss": 0.6647, "step": 2833 }, { "epoch": 0.36, "grad_norm": 0.6980248750014387, "learning_rate": 7.391428085657149e-06, "loss": 0.5771, "step": 2834 }, { "epoch": 0.36, "grad_norm": 0.9281539487708826, "learning_rate": 7.389615876105773e-06, "loss": 0.6259, "step": 2835 }, { "epoch": 0.36, "grad_norm": 0.6357150834755935, "learning_rate": 7.387803259627708e-06, "loss": 0.5345, "step": 2836 }, { "epoch": 0.36, "grad_norm": 0.8004713244179662, "learning_rate": 7.385990236531626e-06, "loss": 0.661, "step": 2837 }, { "epoch": 0.36, "grad_norm": 0.9092593636169148, "learning_rate": 7.384176807126263e-06, "loss": 0.5931, "step": 2838 }, { "epoch": 0.36, "grad_norm": 0.8641341664554995, "learning_rate": 7.382362971720428e-06, "loss": 0.5792, "step": 2839 }, { "epoch": 0.36, "grad_norm": 0.9384934754070131, "learning_rate": 7.380548730622998e-06, "loss": 0.6101, "step": 2840 }, { "epoch": 0.36, "grad_norm": 0.8993188027728529, "learning_rate": 7.3787340841429204e-06, "loss": 0.6773, "step": 2841 }, { "epoch": 0.36, "grad_norm": 0.8216448945664718, "learning_rate": 7.376919032589209e-06, "loss": 0.6545, "step": 2842 }, { "epoch": 0.36, "grad_norm": 0.9490359401112175, "learning_rate": 7.375103576270949e-06, "loss": 0.6019, "step": 2843 }, { "epoch": 0.36, "grad_norm": 0.7424189196626539, "learning_rate": 7.373287715497294e-06, "loss": 0.5986, "step": 2844 }, { "epoch": 0.36, "grad_norm": 0.8599509592323239, "learning_rate": 7.371471450577466e-06, "loss": 0.6992, "step": 2845 }, { "epoch": 0.36, "grad_norm": 0.6385025053064425, "learning_rate": 7.369654781820754e-06, "loss": 0.5393, "step": 2846 }, { "epoch": 0.36, "grad_norm": 1.0448492836003456, "learning_rate": 7.36783770953652e-06, "loss": 0.6766, "step": 2847 }, { "epoch": 0.36, "grad_norm": 0.8375821318025848, "learning_rate": 7.366020234034194e-06, "loss": 0.6283, "step": 2848 }, { "epoch": 0.36, "grad_norm": 0.6567250037850638, "learning_rate": 7.364202355623269e-06, "loss": 0.5475, "step": 2849 }, { "epoch": 0.36, "grad_norm": 1.0388567737752157, "learning_rate": 7.362384074613313e-06, "loss": 0.7117, "step": 2850 }, { "epoch": 0.36, "grad_norm": 1.201138998696804, "learning_rate": 7.360565391313961e-06, "loss": 0.6528, "step": 2851 }, { "epoch": 0.36, "grad_norm": 0.7902597459524227, "learning_rate": 7.358746306034914e-06, "loss": 0.5434, "step": 2852 }, { "epoch": 0.36, "grad_norm": 0.7008384269371887, "learning_rate": 7.356926819085946e-06, "loss": 0.55, "step": 2853 }, { "epoch": 0.36, "grad_norm": 0.7799511169318565, "learning_rate": 7.355106930776894e-06, "loss": 0.6287, "step": 2854 }, { "epoch": 0.36, "grad_norm": 0.7599687231764983, "learning_rate": 7.353286641417667e-06, "loss": 0.6309, "step": 2855 }, { "epoch": 0.36, "grad_norm": 0.990798707368135, "learning_rate": 7.351465951318243e-06, "loss": 0.6853, "step": 2856 }, { "epoch": 0.36, "grad_norm": 0.679323887927624, "learning_rate": 7.349644860788663e-06, "loss": 0.5334, "step": 2857 }, { "epoch": 0.36, "grad_norm": 0.8631613997105384, "learning_rate": 7.347823370139042e-06, "loss": 0.6657, "step": 2858 }, { "epoch": 0.36, "grad_norm": 0.6484232606824437, "learning_rate": 7.346001479679561e-06, "loss": 0.5463, "step": 2859 }, { "epoch": 0.36, "grad_norm": 0.6429189109587077, "learning_rate": 7.344179189720468e-06, "loss": 0.5535, "step": 2860 }, { "epoch": 0.36, "grad_norm": 0.6226618855258182, "learning_rate": 7.342356500572081e-06, "loss": 0.5343, "step": 2861 }, { "epoch": 0.36, "grad_norm": 0.9298565980352995, "learning_rate": 7.340533412544784e-06, "loss": 0.6588, "step": 2862 }, { "epoch": 0.36, "grad_norm": 1.151576818973023, "learning_rate": 7.338709925949031e-06, "loss": 0.6652, "step": 2863 }, { "epoch": 0.36, "grad_norm": 0.7102917109206107, "learning_rate": 7.336886041095342e-06, "loss": 0.532, "step": 2864 }, { "epoch": 0.36, "grad_norm": 0.8113055560444782, "learning_rate": 7.335061758294304e-06, "loss": 0.641, "step": 2865 }, { "epoch": 0.37, "grad_norm": 0.6465447535820217, "learning_rate": 7.3332370778565765e-06, "loss": 0.5574, "step": 2866 }, { "epoch": 0.37, "grad_norm": 0.6648166402403834, "learning_rate": 7.331412000092882e-06, "loss": 0.5098, "step": 2867 }, { "epoch": 0.37, "grad_norm": 0.7895565954237493, "learning_rate": 7.329586525314013e-06, "loss": 0.5931, "step": 2868 }, { "epoch": 0.37, "grad_norm": 0.7414527944765841, "learning_rate": 7.327760653830827e-06, "loss": 0.5588, "step": 2869 }, { "epoch": 0.37, "grad_norm": 0.6896143202134559, "learning_rate": 7.325934385954253e-06, "loss": 0.6228, "step": 2870 }, { "epoch": 0.37, "grad_norm": 0.6476335485398594, "learning_rate": 7.3241077219952835e-06, "loss": 0.505, "step": 2871 }, { "epoch": 0.37, "grad_norm": 0.9408007461651444, "learning_rate": 7.322280662264982e-06, "loss": 0.6682, "step": 2872 }, { "epoch": 0.37, "grad_norm": 0.9091270413358847, "learning_rate": 7.3204532070744786e-06, "loss": 0.5484, "step": 2873 }, { "epoch": 0.37, "grad_norm": 0.7436389426827509, "learning_rate": 7.318625356734966e-06, "loss": 0.5657, "step": 2874 }, { "epoch": 0.37, "grad_norm": 0.8365383818303793, "learning_rate": 7.31679711155771e-06, "loss": 0.6025, "step": 2875 }, { "epoch": 0.37, "grad_norm": 0.6783044647999716, "learning_rate": 7.314968471854045e-06, "loss": 0.5881, "step": 2876 }, { "epoch": 0.37, "grad_norm": 0.8138430098964579, "learning_rate": 7.313139437935363e-06, "loss": 0.7131, "step": 2877 }, { "epoch": 0.37, "grad_norm": 0.6648183022913393, "learning_rate": 7.311310010113136e-06, "loss": 0.5293, "step": 2878 }, { "epoch": 0.37, "grad_norm": 0.8661903257434097, "learning_rate": 7.309480188698891e-06, "loss": 0.6796, "step": 2879 }, { "epoch": 0.37, "grad_norm": 0.7955237115574195, "learning_rate": 7.3076499740042304e-06, "loss": 0.5831, "step": 2880 }, { "epoch": 0.37, "grad_norm": 0.7005981900877916, "learning_rate": 7.30581936634082e-06, "loss": 0.5195, "step": 2881 }, { "epoch": 0.37, "grad_norm": 0.7125403564423088, "learning_rate": 7.303988366020395e-06, "loss": 0.5774, "step": 2882 }, { "epoch": 0.37, "grad_norm": 0.7966605262025139, "learning_rate": 7.3021569733547525e-06, "loss": 0.5672, "step": 2883 }, { "epoch": 0.37, "grad_norm": 0.6408581123666159, "learning_rate": 7.300325188655762e-06, "loss": 0.5491, "step": 2884 }, { "epoch": 0.37, "grad_norm": 0.6729588686163938, "learning_rate": 7.298493012235356e-06, "loss": 0.5882, "step": 2885 }, { "epoch": 0.37, "grad_norm": 0.8267576202984049, "learning_rate": 7.296660444405535e-06, "loss": 0.6398, "step": 2886 }, { "epoch": 0.37, "grad_norm": 0.7966558166896005, "learning_rate": 7.2948274854783675e-06, "loss": 0.5778, "step": 2887 }, { "epoch": 0.37, "grad_norm": 0.8561943791015211, "learning_rate": 7.292994135765986e-06, "loss": 0.6957, "step": 2888 }, { "epoch": 0.37, "grad_norm": 0.6623849906121865, "learning_rate": 7.291160395580592e-06, "loss": 0.5477, "step": 2889 }, { "epoch": 0.37, "grad_norm": 0.7343714851941893, "learning_rate": 7.2893262652344525e-06, "loss": 0.5849, "step": 2890 }, { "epoch": 0.37, "grad_norm": 0.7873218477879738, "learning_rate": 7.287491745039899e-06, "loss": 0.6311, "step": 2891 }, { "epoch": 0.37, "grad_norm": 0.839586644181958, "learning_rate": 7.285656835309331e-06, "loss": 0.6209, "step": 2892 }, { "epoch": 0.37, "grad_norm": 0.8396634681771563, "learning_rate": 7.283821536355217e-06, "loss": 0.6205, "step": 2893 }, { "epoch": 0.37, "grad_norm": 0.8624888896223887, "learning_rate": 7.281985848490088e-06, "loss": 0.6439, "step": 2894 }, { "epoch": 0.37, "grad_norm": 0.6606634156738503, "learning_rate": 7.280149772026543e-06, "loss": 0.5578, "step": 2895 }, { "epoch": 0.37, "grad_norm": 0.6403054329211018, "learning_rate": 7.278313307277246e-06, "loss": 0.601, "step": 2896 }, { "epoch": 0.37, "grad_norm": 0.8262560993345395, "learning_rate": 7.276476454554929e-06, "loss": 0.5662, "step": 2897 }, { "epoch": 0.37, "grad_norm": 0.708835024781072, "learning_rate": 7.2746392141723855e-06, "loss": 0.5806, "step": 2898 }, { "epoch": 0.37, "grad_norm": 0.7789276040053643, "learning_rate": 7.272801586442483e-06, "loss": 0.6009, "step": 2899 }, { "epoch": 0.37, "grad_norm": 0.6546894783018286, "learning_rate": 7.270963571678147e-06, "loss": 0.5633, "step": 2900 }, { "epoch": 0.37, "grad_norm": 0.7976681468572578, "learning_rate": 7.269125170192374e-06, "loss": 0.5243, "step": 2901 }, { "epoch": 0.37, "grad_norm": 0.9280675913614975, "learning_rate": 7.267286382298223e-06, "loss": 0.6668, "step": 2902 }, { "epoch": 0.37, "grad_norm": 0.7159934928959768, "learning_rate": 7.265447208308823e-06, "loss": 0.5767, "step": 2903 }, { "epoch": 0.37, "grad_norm": 0.8084192931223748, "learning_rate": 7.2636076485373645e-06, "loss": 0.6601, "step": 2904 }, { "epoch": 0.37, "grad_norm": 0.7838871066062002, "learning_rate": 7.261767703297106e-06, "loss": 0.5528, "step": 2905 }, { "epoch": 0.37, "grad_norm": 0.7651466819633849, "learning_rate": 7.25992737290137e-06, "loss": 0.5644, "step": 2906 }, { "epoch": 0.37, "grad_norm": 0.7475776654515275, "learning_rate": 7.2580866576635455e-06, "loss": 0.5759, "step": 2907 }, { "epoch": 0.37, "grad_norm": 0.97313575108032, "learning_rate": 7.256245557897087e-06, "loss": 0.6327, "step": 2908 }, { "epoch": 0.37, "grad_norm": 0.7019378626324887, "learning_rate": 7.2544040739155175e-06, "loss": 0.5846, "step": 2909 }, { "epoch": 0.37, "grad_norm": 0.7030979096116602, "learning_rate": 7.252562206032419e-06, "loss": 0.5168, "step": 2910 }, { "epoch": 0.37, "grad_norm": 0.9477283408724446, "learning_rate": 7.250719954561445e-06, "loss": 0.6217, "step": 2911 }, { "epoch": 0.37, "grad_norm": 0.8233896715207245, "learning_rate": 7.24887731981631e-06, "loss": 0.6198, "step": 2912 }, { "epoch": 0.37, "grad_norm": 0.7470560269126937, "learning_rate": 7.247034302110796e-06, "loss": 0.5991, "step": 2913 }, { "epoch": 0.37, "grad_norm": 0.7791388102540987, "learning_rate": 7.24519090175875e-06, "loss": 0.6573, "step": 2914 }, { "epoch": 0.37, "grad_norm": 0.9035331631095982, "learning_rate": 7.243347119074083e-06, "loss": 0.6181, "step": 2915 }, { "epoch": 0.37, "grad_norm": 0.6795602372065908, "learning_rate": 7.241502954370774e-06, "loss": 0.5515, "step": 2916 }, { "epoch": 0.37, "grad_norm": 0.6716714562267163, "learning_rate": 7.239658407962862e-06, "loss": 0.5555, "step": 2917 }, { "epoch": 0.37, "grad_norm": 0.9917691432338173, "learning_rate": 7.2378134801644575e-06, "loss": 0.6633, "step": 2918 }, { "epoch": 0.37, "grad_norm": 0.7413779978681652, "learning_rate": 7.235968171289729e-06, "loss": 0.561, "step": 2919 }, { "epoch": 0.37, "grad_norm": 0.7652866883129045, "learning_rate": 7.234122481652916e-06, "loss": 0.5384, "step": 2920 }, { "epoch": 0.37, "grad_norm": 0.9967114250440547, "learning_rate": 7.232276411568319e-06, "loss": 0.6926, "step": 2921 }, { "epoch": 0.37, "grad_norm": 0.7318013497361314, "learning_rate": 7.230429961350305e-06, "loss": 0.5927, "step": 2922 }, { "epoch": 0.37, "grad_norm": 0.717276143424521, "learning_rate": 7.228583131313304e-06, "loss": 0.5191, "step": 2923 }, { "epoch": 0.37, "grad_norm": 0.7155339257913605, "learning_rate": 7.226735921771815e-06, "loss": 0.558, "step": 2924 }, { "epoch": 0.37, "grad_norm": 0.7376311403813524, "learning_rate": 7.224888333040394e-06, "loss": 0.6196, "step": 2925 }, { "epoch": 0.37, "grad_norm": 0.6774890991823183, "learning_rate": 7.2230403654336715e-06, "loss": 0.5225, "step": 2926 }, { "epoch": 0.37, "grad_norm": 0.7249595976177491, "learning_rate": 7.221192019266332e-06, "loss": 0.5864, "step": 2927 }, { "epoch": 0.37, "grad_norm": 0.7036284081710583, "learning_rate": 7.219343294853133e-06, "loss": 0.5383, "step": 2928 }, { "epoch": 0.37, "grad_norm": 0.6407480774273943, "learning_rate": 7.2174941925088915e-06, "loss": 0.5635, "step": 2929 }, { "epoch": 0.37, "grad_norm": 0.9036098755703411, "learning_rate": 7.215644712548491e-06, "loss": 0.639, "step": 2930 }, { "epoch": 0.37, "grad_norm": 0.8668411908393069, "learning_rate": 7.213794855286877e-06, "loss": 0.6745, "step": 2931 }, { "epoch": 0.37, "grad_norm": 0.8156597128858952, "learning_rate": 7.211944621039065e-06, "loss": 0.6251, "step": 2932 }, { "epoch": 0.37, "grad_norm": 0.7649428027318845, "learning_rate": 7.210094010120124e-06, "loss": 0.5865, "step": 2933 }, { "epoch": 0.37, "grad_norm": 0.6756427023930736, "learning_rate": 7.208243022845199e-06, "loss": 0.5635, "step": 2934 }, { "epoch": 0.37, "grad_norm": 0.9203702234857714, "learning_rate": 7.206391659529494e-06, "loss": 0.6179, "step": 2935 }, { "epoch": 0.37, "grad_norm": 0.8042192424233775, "learning_rate": 7.204539920488275e-06, "loss": 0.5802, "step": 2936 }, { "epoch": 0.37, "grad_norm": 0.9942020614469318, "learning_rate": 7.202687806036874e-06, "loss": 0.6994, "step": 2937 }, { "epoch": 0.37, "grad_norm": 0.7070935986584366, "learning_rate": 7.200835316490687e-06, "loss": 0.5932, "step": 2938 }, { "epoch": 0.37, "grad_norm": 0.8814074724996127, "learning_rate": 7.198982452165174e-06, "loss": 0.6463, "step": 2939 }, { "epoch": 0.37, "grad_norm": 0.6661383112331489, "learning_rate": 7.197129213375858e-06, "loss": 0.5369, "step": 2940 }, { "epoch": 0.37, "grad_norm": 0.8050566964842777, "learning_rate": 7.195275600438328e-06, "loss": 0.6541, "step": 2941 }, { "epoch": 0.37, "grad_norm": 0.856675902555081, "learning_rate": 7.193421613668231e-06, "loss": 0.7227, "step": 2942 }, { "epoch": 0.37, "grad_norm": 0.9589320120285684, "learning_rate": 7.1915672533812855e-06, "loss": 0.6704, "step": 2943 }, { "epoch": 0.38, "grad_norm": 0.80216088362574, "learning_rate": 7.189712519893267e-06, "loss": 0.5809, "step": 2944 }, { "epoch": 0.38, "grad_norm": 0.7093480385216203, "learning_rate": 7.1878574135200204e-06, "loss": 0.5389, "step": 2945 }, { "epoch": 0.38, "grad_norm": 0.6413752649400862, "learning_rate": 7.186001934577448e-06, "loss": 0.5348, "step": 2946 }, { "epoch": 0.38, "grad_norm": 0.6690118492461999, "learning_rate": 7.184146083381519e-06, "loss": 0.6138, "step": 2947 }, { "epoch": 0.38, "grad_norm": 0.7668010369648064, "learning_rate": 7.182289860248269e-06, "loss": 0.543, "step": 2948 }, { "epoch": 0.38, "grad_norm": 0.8273713241864384, "learning_rate": 7.180433265493788e-06, "loss": 0.6426, "step": 2949 }, { "epoch": 0.38, "grad_norm": 0.8591190933924621, "learning_rate": 7.178576299434239e-06, "loss": 0.6759, "step": 2950 }, { "epoch": 0.38, "grad_norm": 0.6911908997598203, "learning_rate": 7.176718962385842e-06, "loss": 0.581, "step": 2951 }, { "epoch": 0.38, "grad_norm": 0.8646514389701477, "learning_rate": 7.174861254664882e-06, "loss": 0.6688, "step": 2952 }, { "epoch": 0.38, "grad_norm": 0.6958365536625196, "learning_rate": 7.173003176587708e-06, "loss": 0.5448, "step": 2953 }, { "epoch": 0.38, "grad_norm": 0.6588055074421009, "learning_rate": 7.171144728470731e-06, "loss": 0.5518, "step": 2954 }, { "epoch": 0.38, "grad_norm": 0.8246773105027293, "learning_rate": 7.169285910630427e-06, "loss": 0.5615, "step": 2955 }, { "epoch": 0.38, "grad_norm": 0.757300244285086, "learning_rate": 7.167426723383331e-06, "loss": 0.5736, "step": 2956 }, { "epoch": 0.38, "grad_norm": 0.8375233005988142, "learning_rate": 7.165567167046044e-06, "loss": 0.605, "step": 2957 }, { "epoch": 0.38, "grad_norm": 0.6707128095407431, "learning_rate": 7.16370724193523e-06, "loss": 0.558, "step": 2958 }, { "epoch": 0.38, "grad_norm": 0.7536820099014473, "learning_rate": 7.161846948367613e-06, "loss": 0.5662, "step": 2959 }, { "epoch": 0.38, "grad_norm": 0.9359011354478194, "learning_rate": 7.159986286659983e-06, "loss": 0.7473, "step": 2960 }, { "epoch": 0.38, "grad_norm": 0.90758992217512, "learning_rate": 7.158125257129192e-06, "loss": 0.6664, "step": 2961 }, { "epoch": 0.38, "grad_norm": 0.9845409959856751, "learning_rate": 7.156263860092152e-06, "loss": 0.6374, "step": 2962 }, { "epoch": 0.38, "grad_norm": 0.8893635388579256, "learning_rate": 7.15440209586584e-06, "loss": 0.6815, "step": 2963 }, { "epoch": 0.38, "grad_norm": 0.8687825036573555, "learning_rate": 7.152539964767295e-06, "loss": 0.6312, "step": 2964 }, { "epoch": 0.38, "grad_norm": 0.803700756845121, "learning_rate": 7.150677467113621e-06, "loss": 0.5827, "step": 2965 }, { "epoch": 0.38, "grad_norm": 0.7049773100509169, "learning_rate": 7.148814603221978e-06, "loss": 0.5445, "step": 2966 }, { "epoch": 0.38, "grad_norm": 0.7573555676458228, "learning_rate": 7.1469513734095955e-06, "loss": 0.6377, "step": 2967 }, { "epoch": 0.38, "grad_norm": 0.7777461335177506, "learning_rate": 7.14508777799376e-06, "loss": 0.5789, "step": 2968 }, { "epoch": 0.38, "grad_norm": 0.7135669149533928, "learning_rate": 7.143223817291825e-06, "loss": 0.559, "step": 2969 }, { "epoch": 0.38, "grad_norm": 1.0027785554945337, "learning_rate": 7.1413594916212e-06, "loss": 0.6783, "step": 2970 }, { "epoch": 0.38, "grad_norm": 0.9135368783533783, "learning_rate": 7.139494801299363e-06, "loss": 0.6151, "step": 2971 }, { "epoch": 0.38, "grad_norm": 1.69814885290027, "learning_rate": 7.13762974664385e-06, "loss": 0.6754, "step": 2972 }, { "epoch": 0.38, "grad_norm": 0.9057520587358571, "learning_rate": 7.135764327972261e-06, "loss": 0.6243, "step": 2973 }, { "epoch": 0.38, "grad_norm": 0.7018201853203253, "learning_rate": 7.133898545602257e-06, "loss": 0.5772, "step": 2974 }, { "epoch": 0.38, "grad_norm": 0.7735526587878031, "learning_rate": 7.132032399851562e-06, "loss": 0.6132, "step": 2975 }, { "epoch": 0.38, "grad_norm": 0.8228602001091586, "learning_rate": 7.130165891037959e-06, "loss": 0.6294, "step": 2976 }, { "epoch": 0.38, "grad_norm": 0.6759064904761289, "learning_rate": 7.128299019479298e-06, "loss": 0.4742, "step": 2977 }, { "epoch": 0.38, "grad_norm": 0.7559012628964766, "learning_rate": 7.126431785493486e-06, "loss": 0.5535, "step": 2978 }, { "epoch": 0.38, "grad_norm": 0.7869832363043596, "learning_rate": 7.124564189398495e-06, "loss": 0.5967, "step": 2979 }, { "epoch": 0.38, "grad_norm": 0.7895781880525654, "learning_rate": 7.122696231512355e-06, "loss": 0.5846, "step": 2980 }, { "epoch": 0.38, "grad_norm": 0.8597787266529932, "learning_rate": 7.120827912153161e-06, "loss": 0.5833, "step": 2981 }, { "epoch": 0.38, "grad_norm": 0.843680876080643, "learning_rate": 7.1189592316390685e-06, "loss": 0.7085, "step": 2982 }, { "epoch": 0.38, "grad_norm": 0.8383654361633295, "learning_rate": 7.117090190288294e-06, "loss": 0.643, "step": 2983 }, { "epoch": 0.38, "grad_norm": 0.6872504404666091, "learning_rate": 7.115220788419116e-06, "loss": 0.6007, "step": 2984 }, { "epoch": 0.38, "grad_norm": 0.8326014425177376, "learning_rate": 7.113351026349874e-06, "loss": 0.6527, "step": 2985 }, { "epoch": 0.38, "grad_norm": 0.8221375517019802, "learning_rate": 7.111480904398968e-06, "loss": 0.638, "step": 2986 }, { "epoch": 0.38, "grad_norm": 0.6888734970277014, "learning_rate": 7.109610422884863e-06, "loss": 0.5807, "step": 2987 }, { "epoch": 0.38, "grad_norm": 0.8722924451178358, "learning_rate": 7.107739582126079e-06, "loss": 0.6522, "step": 2988 }, { "epoch": 0.38, "grad_norm": 0.6622645554734401, "learning_rate": 7.105868382441203e-06, "loss": 0.4967, "step": 2989 }, { "epoch": 0.38, "grad_norm": 0.7541751695646751, "learning_rate": 7.10399682414888e-06, "loss": 0.5876, "step": 2990 }, { "epoch": 0.38, "grad_norm": 0.701777870712533, "learning_rate": 7.102124907567817e-06, "loss": 0.5317, "step": 2991 }, { "epoch": 0.38, "grad_norm": 0.6615256579651861, "learning_rate": 7.100252633016782e-06, "loss": 0.5595, "step": 2992 }, { "epoch": 0.38, "grad_norm": 0.8252558841635048, "learning_rate": 7.098380000814604e-06, "loss": 0.6203, "step": 2993 }, { "epoch": 0.38, "grad_norm": 0.862270861427249, "learning_rate": 7.096507011280174e-06, "loss": 0.6588, "step": 2994 }, { "epoch": 0.38, "grad_norm": 0.6865980275228062, "learning_rate": 7.0946336647324395e-06, "loss": 0.5632, "step": 2995 }, { "epoch": 0.38, "grad_norm": 0.8258391478606953, "learning_rate": 7.092759961490415e-06, "loss": 0.6238, "step": 2996 }, { "epoch": 0.38, "grad_norm": 0.8701166937030154, "learning_rate": 7.09088590187317e-06, "loss": 0.6292, "step": 2997 }, { "epoch": 0.38, "grad_norm": 0.8037308928188226, "learning_rate": 7.089011486199839e-06, "loss": 0.5649, "step": 2998 }, { "epoch": 0.38, "grad_norm": 0.9051578679306854, "learning_rate": 7.087136714789615e-06, "loss": 0.568, "step": 2999 }, { "epoch": 0.38, "grad_norm": 0.7969085168896913, "learning_rate": 7.085261587961754e-06, "loss": 0.5811, "step": 3000 }, { "epoch": 0.38, "grad_norm": 0.8198312366243342, "learning_rate": 7.083386106035566e-06, "loss": 0.684, "step": 3001 }, { "epoch": 0.38, "grad_norm": 0.7210405659644403, "learning_rate": 7.081510269330431e-06, "loss": 0.5961, "step": 3002 }, { "epoch": 0.38, "grad_norm": 0.9613916846184977, "learning_rate": 7.079634078165779e-06, "loss": 0.6756, "step": 3003 }, { "epoch": 0.38, "grad_norm": 0.6785757163276998, "learning_rate": 7.077757532861111e-06, "loss": 0.5701, "step": 3004 }, { "epoch": 0.38, "grad_norm": 0.8426624974124951, "learning_rate": 7.075880633735981e-06, "loss": 0.6101, "step": 3005 }, { "epoch": 0.38, "grad_norm": 0.7403011533148561, "learning_rate": 7.074003381110004e-06, "loss": 0.5289, "step": 3006 }, { "epoch": 0.38, "grad_norm": 0.8080912879893964, "learning_rate": 7.07212577530286e-06, "loss": 0.5245, "step": 3007 }, { "epoch": 0.38, "grad_norm": 0.7603773416727762, "learning_rate": 7.070247816634282e-06, "loss": 0.6549, "step": 3008 }, { "epoch": 0.38, "grad_norm": 0.6176757502518891, "learning_rate": 7.068369505424071e-06, "loss": 0.5248, "step": 3009 }, { "epoch": 0.38, "grad_norm": 0.8684138846327495, "learning_rate": 7.06649084199208e-06, "loss": 0.6572, "step": 3010 }, { "epoch": 0.38, "grad_norm": 0.7587533120760415, "learning_rate": 7.064611826658226e-06, "loss": 0.565, "step": 3011 }, { "epoch": 0.38, "grad_norm": 1.211897124888826, "learning_rate": 7.062732459742488e-06, "loss": 0.7096, "step": 3012 }, { "epoch": 0.38, "grad_norm": 0.7770537992997335, "learning_rate": 7.060852741564902e-06, "loss": 0.5265, "step": 3013 }, { "epoch": 0.38, "grad_norm": 0.7649425612924766, "learning_rate": 7.0589726724455635e-06, "loss": 0.5549, "step": 3014 }, { "epoch": 0.38, "grad_norm": 0.8725765030621405, "learning_rate": 7.057092252704629e-06, "loss": 0.6385, "step": 3015 }, { "epoch": 0.38, "grad_norm": 0.9611956530411195, "learning_rate": 7.055211482662314e-06, "loss": 0.6128, "step": 3016 }, { "epoch": 0.38, "grad_norm": 0.7014431993207055, "learning_rate": 7.053330362638895e-06, "loss": 0.5523, "step": 3017 }, { "epoch": 0.38, "grad_norm": 0.7096150088676013, "learning_rate": 7.051448892954707e-06, "loss": 0.558, "step": 3018 }, { "epoch": 0.38, "grad_norm": 0.692644979852246, "learning_rate": 7.0495670739301435e-06, "loss": 0.5391, "step": 3019 }, { "epoch": 0.38, "grad_norm": 0.6333665728426101, "learning_rate": 7.04768490588566e-06, "loss": 0.5205, "step": 3020 }, { "epoch": 0.38, "grad_norm": 0.7845396951098721, "learning_rate": 7.045802389141769e-06, "loss": 0.5915, "step": 3021 }, { "epoch": 0.38, "grad_norm": 0.8729908592073734, "learning_rate": 7.043919524019045e-06, "loss": 0.666, "step": 3022 }, { "epoch": 0.39, "grad_norm": 0.7697630140273541, "learning_rate": 7.042036310838121e-06, "loss": 0.623, "step": 3023 }, { "epoch": 0.39, "grad_norm": 0.6887642417593378, "learning_rate": 7.040152749919684e-06, "loss": 0.5462, "step": 3024 }, { "epoch": 0.39, "grad_norm": 0.8556995734980393, "learning_rate": 7.03826884158449e-06, "loss": 0.6267, "step": 3025 }, { "epoch": 0.39, "grad_norm": 0.7395549806069585, "learning_rate": 7.036384586153347e-06, "loss": 0.5459, "step": 3026 }, { "epoch": 0.39, "grad_norm": 0.8450834446911724, "learning_rate": 7.0344999839471215e-06, "loss": 0.6404, "step": 3027 }, { "epoch": 0.39, "grad_norm": 0.7110333313762182, "learning_rate": 7.032615035286746e-06, "loss": 0.5624, "step": 3028 }, { "epoch": 0.39, "grad_norm": 0.6904359686086238, "learning_rate": 7.0307297404932054e-06, "loss": 0.5581, "step": 3029 }, { "epoch": 0.39, "grad_norm": 0.7725053120319834, "learning_rate": 7.028844099887545e-06, "loss": 0.6169, "step": 3030 }, { "epoch": 0.39, "grad_norm": 1.1161044161946523, "learning_rate": 7.026958113790872e-06, "loss": 0.5549, "step": 3031 }, { "epoch": 0.39, "grad_norm": 0.8500349776705105, "learning_rate": 7.0250717825243475e-06, "loss": 0.6614, "step": 3032 }, { "epoch": 0.39, "grad_norm": 0.6471553978570826, "learning_rate": 7.0231851064091964e-06, "loss": 0.5426, "step": 3033 }, { "epoch": 0.39, "grad_norm": 0.7874830851454462, "learning_rate": 7.021298085766699e-06, "loss": 0.6075, "step": 3034 }, { "epoch": 0.39, "grad_norm": 0.7465750236562145, "learning_rate": 7.019410720918195e-06, "loss": 0.5118, "step": 3035 }, { "epoch": 0.39, "grad_norm": 0.8756801487259661, "learning_rate": 7.017523012185084e-06, "loss": 0.6584, "step": 3036 }, { "epoch": 0.39, "grad_norm": 0.8806619712880692, "learning_rate": 7.015634959888822e-06, "loss": 0.6879, "step": 3037 }, { "epoch": 0.39, "grad_norm": 0.7409501660262899, "learning_rate": 7.013746564350925e-06, "loss": 0.5824, "step": 3038 }, { "epoch": 0.39, "grad_norm": 0.6864269455368546, "learning_rate": 7.011857825892967e-06, "loss": 0.5815, "step": 3039 }, { "epoch": 0.39, "grad_norm": 0.7403561125378395, "learning_rate": 7.009968744836581e-06, "loss": 0.5183, "step": 3040 }, { "epoch": 0.39, "grad_norm": 0.7044296311078336, "learning_rate": 7.0080793215034575e-06, "loss": 0.5316, "step": 3041 }, { "epoch": 0.39, "grad_norm": 0.85886460842654, "learning_rate": 7.006189556215346e-06, "loss": 0.5988, "step": 3042 }, { "epoch": 0.39, "grad_norm": 0.7751996573303258, "learning_rate": 7.004299449294052e-06, "loss": 0.6244, "step": 3043 }, { "epoch": 0.39, "grad_norm": 0.6418762495525675, "learning_rate": 7.002409001061443e-06, "loss": 0.4667, "step": 3044 }, { "epoch": 0.39, "grad_norm": 0.8914649877271704, "learning_rate": 7.000518211839442e-06, "loss": 0.6646, "step": 3045 }, { "epoch": 0.39, "grad_norm": 0.6789236560892044, "learning_rate": 6.998627081950031e-06, "loss": 0.5189, "step": 3046 }, { "epoch": 0.39, "grad_norm": 0.7058168389931408, "learning_rate": 6.99673561171525e-06, "loss": 0.6283, "step": 3047 }, { "epoch": 0.39, "grad_norm": 0.7630030832539432, "learning_rate": 6.994843801457195e-06, "loss": 0.5512, "step": 3048 }, { "epoch": 0.39, "grad_norm": 0.7953126952077831, "learning_rate": 6.992951651498023e-06, "loss": 0.5903, "step": 3049 }, { "epoch": 0.39, "grad_norm": 0.8646574910949234, "learning_rate": 6.9910591621599475e-06, "loss": 0.5875, "step": 3050 }, { "epoch": 0.39, "grad_norm": 1.045980948661036, "learning_rate": 6.9891663337652406e-06, "loss": 0.664, "step": 3051 }, { "epoch": 0.39, "grad_norm": 0.722237762580419, "learning_rate": 6.987273166636228e-06, "loss": 0.5038, "step": 3052 }, { "epoch": 0.39, "grad_norm": 1.1306625855487964, "learning_rate": 6.9853796610952996e-06, "loss": 0.6601, "step": 3053 }, { "epoch": 0.39, "grad_norm": 0.6150494391570147, "learning_rate": 6.983485817464898e-06, "loss": 0.5215, "step": 3054 }, { "epoch": 0.39, "grad_norm": 0.6399278371217657, "learning_rate": 6.981591636067525e-06, "loss": 0.531, "step": 3055 }, { "epoch": 0.39, "grad_norm": 0.977642676846304, "learning_rate": 6.979697117225741e-06, "loss": 0.6353, "step": 3056 }, { "epoch": 0.39, "grad_norm": 0.8147357744747524, "learning_rate": 6.9778022612621634e-06, "loss": 0.5678, "step": 3057 }, { "epoch": 0.39, "grad_norm": 0.7627617329262633, "learning_rate": 6.975907068499463e-06, "loss": 0.5923, "step": 3058 }, { "epoch": 0.39, "grad_norm": 0.7276258102130468, "learning_rate": 6.9740115392603755e-06, "loss": 0.6029, "step": 3059 }, { "epoch": 0.39, "grad_norm": 0.7416330702505679, "learning_rate": 6.972115673867688e-06, "loss": 0.6355, "step": 3060 }, { "epoch": 0.39, "grad_norm": 0.7778534726874673, "learning_rate": 6.970219472644245e-06, "loss": 0.5545, "step": 3061 }, { "epoch": 0.39, "grad_norm": 0.8056824387964002, "learning_rate": 6.968322935912952e-06, "loss": 0.6321, "step": 3062 }, { "epoch": 0.39, "grad_norm": 0.8631674383596292, "learning_rate": 6.96642606399677e-06, "loss": 0.6895, "step": 3063 }, { "epoch": 0.39, "grad_norm": 0.8182784743662215, "learning_rate": 6.964528857218716e-06, "loss": 0.7138, "step": 3064 }, { "epoch": 0.39, "grad_norm": 0.7573460239146297, "learning_rate": 6.962631315901861e-06, "loss": 0.5998, "step": 3065 }, { "epoch": 0.39, "grad_norm": 0.7837527974687174, "learning_rate": 6.960733440369341e-06, "loss": 0.6131, "step": 3066 }, { "epoch": 0.39, "grad_norm": 0.7597360214399947, "learning_rate": 6.958835230944343e-06, "loss": 0.553, "step": 3067 }, { "epoch": 0.39, "grad_norm": 0.7856010228720411, "learning_rate": 6.9569366879501135e-06, "loss": 0.5427, "step": 3068 }, { "epoch": 0.39, "grad_norm": 0.5964918127289016, "learning_rate": 6.955037811709951e-06, "loss": 0.5037, "step": 3069 }, { "epoch": 0.39, "grad_norm": 0.7511345645795674, "learning_rate": 6.9531386025472185e-06, "loss": 0.5628, "step": 3070 }, { "epoch": 0.39, "grad_norm": 0.6575948986607947, "learning_rate": 6.951239060785328e-06, "loss": 0.5468, "step": 3071 }, { "epoch": 0.39, "grad_norm": 0.6612274073302472, "learning_rate": 6.949339186747754e-06, "loss": 0.5976, "step": 3072 }, { "epoch": 0.39, "grad_norm": 0.8792149834096752, "learning_rate": 6.947438980758024e-06, "loss": 0.674, "step": 3073 }, { "epoch": 0.39, "grad_norm": 0.7321475312312102, "learning_rate": 6.945538443139724e-06, "loss": 0.5579, "step": 3074 }, { "epoch": 0.39, "grad_norm": 0.8522100475420138, "learning_rate": 6.943637574216495e-06, "loss": 0.5569, "step": 3075 }, { "epoch": 0.39, "grad_norm": 0.6551523813967494, "learning_rate": 6.9417363743120385e-06, "loss": 0.524, "step": 3076 }, { "epoch": 0.39, "grad_norm": 0.989402426249599, "learning_rate": 6.939834843750104e-06, "loss": 0.6502, "step": 3077 }, { "epoch": 0.39, "grad_norm": 0.7951736653668456, "learning_rate": 6.937932982854505e-06, "loss": 0.6458, "step": 3078 }, { "epoch": 0.39, "grad_norm": 0.8513931480773506, "learning_rate": 6.936030791949109e-06, "loss": 0.5716, "step": 3079 }, { "epoch": 0.39, "grad_norm": 0.8654912063052131, "learning_rate": 6.9341282713578375e-06, "loss": 0.6142, "step": 3080 }, { "epoch": 0.39, "grad_norm": 0.6190609515458507, "learning_rate": 6.932225421404672e-06, "loss": 0.5135, "step": 3081 }, { "epoch": 0.39, "grad_norm": 0.7160370389087627, "learning_rate": 6.930322242413646e-06, "loss": 0.5766, "step": 3082 }, { "epoch": 0.39, "grad_norm": 0.8354032388748516, "learning_rate": 6.928418734708853e-06, "loss": 0.5804, "step": 3083 }, { "epoch": 0.39, "grad_norm": 0.5921561514179442, "learning_rate": 6.9265148986144395e-06, "loss": 0.5094, "step": 3084 }, { "epoch": 0.39, "grad_norm": 0.8469265363888865, "learning_rate": 6.9246107344546084e-06, "loss": 0.5703, "step": 3085 }, { "epoch": 0.39, "grad_norm": 0.8162632074634929, "learning_rate": 6.922706242553619e-06, "loss": 0.5606, "step": 3086 }, { "epoch": 0.39, "grad_norm": 0.7893259432336682, "learning_rate": 6.920801423235788e-06, "loss": 0.5926, "step": 3087 }, { "epoch": 0.39, "grad_norm": 0.7208294458872869, "learning_rate": 6.918896276825485e-06, "loss": 0.5843, "step": 3088 }, { "epoch": 0.39, "grad_norm": 0.969547270473866, "learning_rate": 6.916990803647138e-06, "loss": 0.6715, "step": 3089 }, { "epoch": 0.39, "grad_norm": 0.7333606861879639, "learning_rate": 6.915085004025228e-06, "loss": 0.5656, "step": 3090 }, { "epoch": 0.39, "grad_norm": 0.7243864518832337, "learning_rate": 6.913178878284292e-06, "loss": 0.5723, "step": 3091 }, { "epoch": 0.39, "grad_norm": 0.7733485646023331, "learning_rate": 6.911272426748925e-06, "loss": 0.58, "step": 3092 }, { "epoch": 0.39, "grad_norm": 0.8290283947921372, "learning_rate": 6.909365649743776e-06, "loss": 0.5705, "step": 3093 }, { "epoch": 0.39, "grad_norm": 0.8644394568238655, "learning_rate": 6.907458547593548e-06, "loss": 0.6574, "step": 3094 }, { "epoch": 0.39, "grad_norm": 0.658465575996566, "learning_rate": 6.9055511206230005e-06, "loss": 0.5469, "step": 3095 }, { "epoch": 0.39, "grad_norm": 0.8493209645300295, "learning_rate": 6.90364336915695e-06, "loss": 0.6327, "step": 3096 }, { "epoch": 0.39, "grad_norm": 0.92504291754693, "learning_rate": 6.901735293520265e-06, "loss": 0.557, "step": 3097 }, { "epoch": 0.39, "grad_norm": 0.6396218634339993, "learning_rate": 6.8998268940378725e-06, "loss": 0.5226, "step": 3098 }, { "epoch": 0.39, "grad_norm": 0.8850390742956666, "learning_rate": 6.897918171034751e-06, "loss": 0.638, "step": 3099 }, { "epoch": 0.39, "grad_norm": 0.8775568763224451, "learning_rate": 6.896009124835939e-06, "loss": 0.6182, "step": 3100 }, { "epoch": 0.4, "grad_norm": 0.7875971341491276, "learning_rate": 6.894099755766522e-06, "loss": 0.5804, "step": 3101 }, { "epoch": 0.4, "grad_norm": 0.8304681731763327, "learning_rate": 6.892190064151651e-06, "loss": 0.6095, "step": 3102 }, { "epoch": 0.4, "grad_norm": 0.6532507961761137, "learning_rate": 6.890280050316527e-06, "loss": 0.5862, "step": 3103 }, { "epoch": 0.4, "grad_norm": 0.6184833781789275, "learning_rate": 6.8883697145864e-06, "loss": 0.5317, "step": 3104 }, { "epoch": 0.4, "grad_norm": 0.7444624422064668, "learning_rate": 6.886459057286585e-06, "loss": 0.6293, "step": 3105 }, { "epoch": 0.4, "grad_norm": 0.738324939179867, "learning_rate": 6.884548078742446e-06, "loss": 0.5459, "step": 3106 }, { "epoch": 0.4, "grad_norm": 0.8875548189622121, "learning_rate": 6.882636779279401e-06, "loss": 0.6171, "step": 3107 }, { "epoch": 0.4, "grad_norm": 0.8910214080001175, "learning_rate": 6.8807251592229276e-06, "loss": 0.6506, "step": 3108 }, { "epoch": 0.4, "grad_norm": 0.7878766016929267, "learning_rate": 6.878813218898552e-06, "loss": 0.6775, "step": 3109 }, { "epoch": 0.4, "grad_norm": 0.943945346482362, "learning_rate": 6.876900958631858e-06, "loss": 0.66, "step": 3110 }, { "epoch": 0.4, "grad_norm": 0.6527262444038725, "learning_rate": 6.874988378748484e-06, "loss": 0.5501, "step": 3111 }, { "epoch": 0.4, "grad_norm": 0.6653928895498751, "learning_rate": 6.873075479574123e-06, "loss": 0.578, "step": 3112 }, { "epoch": 0.4, "grad_norm": 0.8089927798537804, "learning_rate": 6.871162261434522e-06, "loss": 0.5665, "step": 3113 }, { "epoch": 0.4, "grad_norm": 0.6763072751204212, "learning_rate": 6.869248724655481e-06, "loss": 0.5076, "step": 3114 }, { "epoch": 0.4, "grad_norm": 0.826047655370385, "learning_rate": 6.867334869562856e-06, "loss": 0.6616, "step": 3115 }, { "epoch": 0.4, "grad_norm": 0.595283618930961, "learning_rate": 6.865420696482556e-06, "loss": 0.4852, "step": 3116 }, { "epoch": 0.4, "grad_norm": 0.7854427702934368, "learning_rate": 6.863506205740545e-06, "loss": 0.6097, "step": 3117 }, { "epoch": 0.4, "grad_norm": 0.8498000284568843, "learning_rate": 6.861591397662841e-06, "loss": 0.6396, "step": 3118 }, { "epoch": 0.4, "grad_norm": 0.6949193333856496, "learning_rate": 6.8596762725755174e-06, "loss": 0.5008, "step": 3119 }, { "epoch": 0.4, "grad_norm": 0.7310086649370663, "learning_rate": 6.857760830804698e-06, "loss": 0.5324, "step": 3120 }, { "epoch": 0.4, "grad_norm": 0.8035859394807092, "learning_rate": 6.855845072676561e-06, "loss": 0.5492, "step": 3121 }, { "epoch": 0.4, "grad_norm": 0.8315343140182654, "learning_rate": 6.853928998517345e-06, "loss": 0.5999, "step": 3122 }, { "epoch": 0.4, "grad_norm": 0.9001319960101497, "learning_rate": 6.852012608653333e-06, "loss": 0.6652, "step": 3123 }, { "epoch": 0.4, "grad_norm": 0.7925692888896836, "learning_rate": 6.8500959034108685e-06, "loss": 0.6486, "step": 3124 }, { "epoch": 0.4, "grad_norm": 0.793768615901906, "learning_rate": 6.8481788831163456e-06, "loss": 0.6117, "step": 3125 }, { "epoch": 0.4, "grad_norm": 0.6864897108882693, "learning_rate": 6.8462615480962135e-06, "loss": 0.565, "step": 3126 }, { "epoch": 0.4, "grad_norm": 0.9300878639242727, "learning_rate": 6.844343898676974e-06, "loss": 0.6411, "step": 3127 }, { "epoch": 0.4, "grad_norm": 0.7449617326643326, "learning_rate": 6.842425935185182e-06, "loss": 0.6166, "step": 3128 }, { "epoch": 0.4, "grad_norm": 0.8810017578701566, "learning_rate": 6.840507657947448e-06, "loss": 0.6504, "step": 3129 }, { "epoch": 0.4, "grad_norm": 0.8958492042116712, "learning_rate": 6.8385890672904335e-06, "loss": 0.6901, "step": 3130 }, { "epoch": 0.4, "grad_norm": 0.7009467942245847, "learning_rate": 6.836670163540855e-06, "loss": 0.5546, "step": 3131 }, { "epoch": 0.4, "grad_norm": 0.7918060625086003, "learning_rate": 6.834750947025483e-06, "loss": 0.7054, "step": 3132 }, { "epoch": 0.4, "grad_norm": 0.8232011534323697, "learning_rate": 6.832831418071138e-06, "loss": 0.6045, "step": 3133 }, { "epoch": 0.4, "grad_norm": 0.6564152366542801, "learning_rate": 6.8309115770046986e-06, "loss": 0.5502, "step": 3134 }, { "epoch": 0.4, "grad_norm": 1.321622089122117, "learning_rate": 6.828991424153091e-06, "loss": 0.6455, "step": 3135 }, { "epoch": 0.4, "grad_norm": 0.8484232465784994, "learning_rate": 6.827070959843298e-06, "loss": 0.6354, "step": 3136 }, { "epoch": 0.4, "grad_norm": 0.7288481206610785, "learning_rate": 6.825150184402355e-06, "loss": 0.5481, "step": 3137 }, { "epoch": 0.4, "grad_norm": 0.9364694896455233, "learning_rate": 6.823229098157349e-06, "loss": 0.6366, "step": 3138 }, { "epoch": 0.4, "grad_norm": 0.6479771240738635, "learning_rate": 6.8213077014354225e-06, "loss": 0.5284, "step": 3139 }, { "epoch": 0.4, "grad_norm": 0.8417858710911886, "learning_rate": 6.81938599456377e-06, "loss": 0.5908, "step": 3140 }, { "epoch": 0.4, "grad_norm": 0.8163345219001725, "learning_rate": 6.817463977869635e-06, "loss": 0.5524, "step": 3141 }, { "epoch": 0.4, "grad_norm": 0.6774325183800121, "learning_rate": 6.81554165168032e-06, "loss": 0.5332, "step": 3142 }, { "epoch": 0.4, "grad_norm": 1.1067585780349518, "learning_rate": 6.813619016323173e-06, "loss": 0.6145, "step": 3143 }, { "epoch": 0.4, "grad_norm": 0.6624982518864575, "learning_rate": 6.8116960721256045e-06, "loss": 0.4881, "step": 3144 }, { "epoch": 0.4, "grad_norm": 0.7622283633145275, "learning_rate": 6.809772819415068e-06, "loss": 0.5974, "step": 3145 }, { "epoch": 0.4, "grad_norm": 0.6784652595666629, "learning_rate": 6.8078492585190745e-06, "loss": 0.5697, "step": 3146 }, { "epoch": 0.4, "grad_norm": 0.7587648289455629, "learning_rate": 6.805925389765187e-06, "loss": 0.554, "step": 3147 }, { "epoch": 0.4, "grad_norm": 0.6714839990117176, "learning_rate": 6.8040012134810195e-06, "loss": 0.5742, "step": 3148 }, { "epoch": 0.4, "grad_norm": 0.9266579357296365, "learning_rate": 6.802076729994239e-06, "loss": 0.6707, "step": 3149 }, { "epoch": 0.4, "grad_norm": 0.6987203582529693, "learning_rate": 6.800151939632566e-06, "loss": 0.5117, "step": 3150 }, { "epoch": 0.4, "grad_norm": 0.7686564940678177, "learning_rate": 6.798226842723771e-06, "loss": 0.6231, "step": 3151 }, { "epoch": 0.4, "grad_norm": 0.8959511075127252, "learning_rate": 6.796301439595679e-06, "loss": 0.6638, "step": 3152 }, { "epoch": 0.4, "grad_norm": 0.6326212021463321, "learning_rate": 6.794375730576164e-06, "loss": 0.5458, "step": 3153 }, { "epoch": 0.4, "grad_norm": 0.7906085799100441, "learning_rate": 6.792449715993157e-06, "loss": 0.5985, "step": 3154 }, { "epoch": 0.4, "grad_norm": 0.7422869145777646, "learning_rate": 6.7905233961746355e-06, "loss": 0.5693, "step": 3155 }, { "epoch": 0.4, "grad_norm": 0.765429292131019, "learning_rate": 6.788596771448634e-06, "loss": 0.6078, "step": 3156 }, { "epoch": 0.4, "grad_norm": 0.7580896572283728, "learning_rate": 6.786669842143236e-06, "loss": 0.5373, "step": 3157 }, { "epoch": 0.4, "grad_norm": 0.9719416053772618, "learning_rate": 6.784742608586577e-06, "loss": 0.5904, "step": 3158 }, { "epoch": 0.4, "grad_norm": 0.8082674605117065, "learning_rate": 6.782815071106845e-06, "loss": 0.5575, "step": 3159 }, { "epoch": 0.4, "grad_norm": 0.7208816270972793, "learning_rate": 6.780887230032281e-06, "loss": 0.5805, "step": 3160 }, { "epoch": 0.4, "grad_norm": 0.777265275770535, "learning_rate": 6.778959085691173e-06, "loss": 0.6106, "step": 3161 }, { "epoch": 0.4, "grad_norm": 0.7120851283795944, "learning_rate": 6.777030638411868e-06, "loss": 0.5555, "step": 3162 }, { "epoch": 0.4, "grad_norm": 0.7330718151822807, "learning_rate": 6.775101888522756e-06, "loss": 0.5327, "step": 3163 }, { "epoch": 0.4, "grad_norm": 0.7822749721695045, "learning_rate": 6.773172836352286e-06, "loss": 0.5624, "step": 3164 }, { "epoch": 0.4, "grad_norm": 0.8139946793221429, "learning_rate": 6.771243482228955e-06, "loss": 0.6261, "step": 3165 }, { "epoch": 0.4, "grad_norm": 0.6741856946053867, "learning_rate": 6.769313826481312e-06, "loss": 0.5016, "step": 3166 }, { "epoch": 0.4, "grad_norm": 0.6408114836203054, "learning_rate": 6.767383869437957e-06, "loss": 0.5221, "step": 3167 }, { "epoch": 0.4, "grad_norm": 0.9087656316098638, "learning_rate": 6.765453611427542e-06, "loss": 0.6566, "step": 3168 }, { "epoch": 0.4, "grad_norm": 0.8002643323377501, "learning_rate": 6.763523052778768e-06, "loss": 0.551, "step": 3169 }, { "epoch": 0.4, "grad_norm": 0.9425315303180866, "learning_rate": 6.76159219382039e-06, "loss": 0.5786, "step": 3170 }, { "epoch": 0.4, "grad_norm": 0.8154677819308739, "learning_rate": 6.759661034881214e-06, "loss": 0.6527, "step": 3171 }, { "epoch": 0.4, "grad_norm": 0.7606896127080065, "learning_rate": 6.7577295762900955e-06, "loss": 0.5681, "step": 3172 }, { "epoch": 0.4, "grad_norm": 0.8168234133648932, "learning_rate": 6.755797818375943e-06, "loss": 0.6529, "step": 3173 }, { "epoch": 0.4, "grad_norm": 0.7869517117903847, "learning_rate": 6.7538657614677126e-06, "loss": 0.5873, "step": 3174 }, { "epoch": 0.4, "grad_norm": 0.7060304129302237, "learning_rate": 6.7519334058944164e-06, "loss": 0.574, "step": 3175 }, { "epoch": 0.4, "grad_norm": 0.8240468508270222, "learning_rate": 6.750000751985112e-06, "loss": 0.7005, "step": 3176 }, { "epoch": 0.4, "grad_norm": 0.9855506761178785, "learning_rate": 6.74806780006891e-06, "loss": 0.6806, "step": 3177 }, { "epoch": 0.4, "grad_norm": 0.7414134873010033, "learning_rate": 6.746134550474974e-06, "loss": 0.5949, "step": 3178 }, { "epoch": 0.4, "grad_norm": 0.8196916582070289, "learning_rate": 6.7442010035325145e-06, "loss": 0.598, "step": 3179 }, { "epoch": 0.41, "grad_norm": 0.7835816954025843, "learning_rate": 6.742267159570796e-06, "loss": 0.608, "step": 3180 }, { "epoch": 0.41, "grad_norm": 0.6880365226354992, "learning_rate": 6.74033301891913e-06, "loss": 0.5792, "step": 3181 }, { "epoch": 0.41, "grad_norm": 0.8101969525469045, "learning_rate": 6.738398581906882e-06, "loss": 0.6056, "step": 3182 }, { "epoch": 0.41, "grad_norm": 0.6730490399273185, "learning_rate": 6.736463848863466e-06, "loss": 0.5552, "step": 3183 }, { "epoch": 0.41, "grad_norm": 0.6877744571552816, "learning_rate": 6.734528820118347e-06, "loss": 0.4969, "step": 3184 }, { "epoch": 0.41, "grad_norm": 0.8892511414983605, "learning_rate": 6.732593496001038e-06, "loss": 0.638, "step": 3185 }, { "epoch": 0.41, "grad_norm": 0.7853927907555196, "learning_rate": 6.730657876841109e-06, "loss": 0.5874, "step": 3186 }, { "epoch": 0.41, "grad_norm": 0.6989471215876331, "learning_rate": 6.728721962968173e-06, "loss": 0.541, "step": 3187 }, { "epoch": 0.41, "grad_norm": 0.905229393985051, "learning_rate": 6.726785754711897e-06, "loss": 0.6402, "step": 3188 }, { "epoch": 0.41, "grad_norm": 0.8800627840978599, "learning_rate": 6.724849252401995e-06, "loss": 0.6725, "step": 3189 }, { "epoch": 0.41, "grad_norm": 1.5064218391673445, "learning_rate": 6.722912456368236e-06, "loss": 0.6043, "step": 3190 }, { "epoch": 0.41, "grad_norm": 1.1388228133498755, "learning_rate": 6.720975366940434e-06, "loss": 0.6612, "step": 3191 }, { "epoch": 0.41, "grad_norm": 0.8481654457044425, "learning_rate": 6.719037984448457e-06, "loss": 0.6518, "step": 3192 }, { "epoch": 0.41, "grad_norm": 0.8249185178484697, "learning_rate": 6.71710030922222e-06, "loss": 0.6168, "step": 3193 }, { "epoch": 0.41, "grad_norm": 0.7432676805376397, "learning_rate": 6.715162341591687e-06, "loss": 0.6568, "step": 3194 }, { "epoch": 0.41, "grad_norm": 0.8706312118773447, "learning_rate": 6.713224081886877e-06, "loss": 0.5694, "step": 3195 }, { "epoch": 0.41, "grad_norm": 0.9438094996926238, "learning_rate": 6.711285530437855e-06, "loss": 0.6302, "step": 3196 }, { "epoch": 0.41, "grad_norm": 0.5750488512150372, "learning_rate": 6.709346687574735e-06, "loss": 0.4629, "step": 3197 }, { "epoch": 0.41, "grad_norm": 0.8274666511416832, "learning_rate": 6.7074075536276795e-06, "loss": 0.6379, "step": 3198 }, { "epoch": 0.41, "grad_norm": 0.6714846933104798, "learning_rate": 6.705468128926907e-06, "loss": 0.51, "step": 3199 }, { "epoch": 0.41, "grad_norm": 0.933527083577643, "learning_rate": 6.7035284138026805e-06, "loss": 0.6875, "step": 3200 }, { "epoch": 0.41, "grad_norm": 0.8556577020225966, "learning_rate": 6.7015884085853114e-06, "loss": 0.5819, "step": 3201 }, { "epoch": 0.41, "grad_norm": 0.8434954864676036, "learning_rate": 6.699648113605164e-06, "loss": 0.6641, "step": 3202 }, { "epoch": 0.41, "grad_norm": 0.8088761939282472, "learning_rate": 6.697707529192648e-06, "loss": 0.557, "step": 3203 }, { "epoch": 0.41, "grad_norm": 0.63629947691648, "learning_rate": 6.695766655678229e-06, "loss": 0.5523, "step": 3204 }, { "epoch": 0.41, "grad_norm": 0.6983738726228311, "learning_rate": 6.693825493392412e-06, "loss": 0.571, "step": 3205 }, { "epoch": 0.41, "grad_norm": 0.6849477062333319, "learning_rate": 6.691884042665761e-06, "loss": 0.5244, "step": 3206 }, { "epoch": 0.41, "grad_norm": 0.6167514872185222, "learning_rate": 6.689942303828883e-06, "loss": 0.5291, "step": 3207 }, { "epoch": 0.41, "grad_norm": 0.7602023220591377, "learning_rate": 6.688000277212436e-06, "loss": 0.5538, "step": 3208 }, { "epoch": 0.41, "grad_norm": 0.7131202697258787, "learning_rate": 6.686057963147129e-06, "loss": 0.5583, "step": 3209 }, { "epoch": 0.41, "grad_norm": 0.877502324096378, "learning_rate": 6.684115361963715e-06, "loss": 0.6692, "step": 3210 }, { "epoch": 0.41, "grad_norm": 0.7791310511627406, "learning_rate": 6.682172473993e-06, "loss": 0.5799, "step": 3211 }, { "epoch": 0.41, "grad_norm": 0.7326601545534721, "learning_rate": 6.680229299565838e-06, "loss": 0.6027, "step": 3212 }, { "epoch": 0.41, "grad_norm": 0.7525460036744519, "learning_rate": 6.67828583901313e-06, "loss": 0.618, "step": 3213 }, { "epoch": 0.41, "grad_norm": 0.812527373634474, "learning_rate": 6.676342092665827e-06, "loss": 0.6174, "step": 3214 }, { "epoch": 0.41, "grad_norm": 0.8206434490483032, "learning_rate": 6.674398060854931e-06, "loss": 0.6305, "step": 3215 }, { "epoch": 0.41, "grad_norm": 0.8654762681317059, "learning_rate": 6.672453743911489e-06, "loss": 0.6324, "step": 3216 }, { "epoch": 0.41, "grad_norm": 0.8078821289059402, "learning_rate": 6.670509142166597e-06, "loss": 0.5754, "step": 3217 }, { "epoch": 0.41, "grad_norm": 0.928748710887052, "learning_rate": 6.668564255951402e-06, "loss": 0.641, "step": 3218 }, { "epoch": 0.41, "grad_norm": 1.2234072340891011, "learning_rate": 6.666619085597097e-06, "loss": 0.7043, "step": 3219 }, { "epoch": 0.41, "grad_norm": 0.7736439174914075, "learning_rate": 6.664673631434925e-06, "loss": 0.5805, "step": 3220 }, { "epoch": 0.41, "grad_norm": 0.774103313814348, "learning_rate": 6.662727893796176e-06, "loss": 0.5273, "step": 3221 }, { "epoch": 0.41, "grad_norm": 0.9769044715362657, "learning_rate": 6.6607818730121876e-06, "loss": 0.6268, "step": 3222 }, { "epoch": 0.41, "grad_norm": 0.6377835877454072, "learning_rate": 6.65883556941435e-06, "loss": 0.5199, "step": 3223 }, { "epoch": 0.41, "grad_norm": 0.683265051113322, "learning_rate": 6.656888983334095e-06, "loss": 0.5405, "step": 3224 }, { "epoch": 0.41, "grad_norm": 0.9362381677066479, "learning_rate": 6.654942115102908e-06, "loss": 0.6376, "step": 3225 }, { "epoch": 0.41, "grad_norm": 0.7305770537409942, "learning_rate": 6.6529949650523195e-06, "loss": 0.5437, "step": 3226 }, { "epoch": 0.41, "grad_norm": 0.9851928756111706, "learning_rate": 6.65104753351391e-06, "loss": 0.6024, "step": 3227 }, { "epoch": 0.41, "grad_norm": 0.7763081241754975, "learning_rate": 6.649099820819305e-06, "loss": 0.5895, "step": 3228 }, { "epoch": 0.41, "grad_norm": 0.8984791885716891, "learning_rate": 6.647151827300181e-06, "loss": 0.6462, "step": 3229 }, { "epoch": 0.41, "grad_norm": 0.6651638815892492, "learning_rate": 6.645203553288262e-06, "loss": 0.5904, "step": 3230 }, { "epoch": 0.41, "grad_norm": 0.730746526323111, "learning_rate": 6.643254999115316e-06, "loss": 0.593, "step": 3231 }, { "epoch": 0.41, "grad_norm": 0.7611130732725617, "learning_rate": 6.641306165113164e-06, "loss": 0.5295, "step": 3232 }, { "epoch": 0.41, "grad_norm": 0.9163102768906297, "learning_rate": 6.639357051613671e-06, "loss": 0.651, "step": 3233 }, { "epoch": 0.41, "grad_norm": 0.6690449005103866, "learning_rate": 6.637407658948749e-06, "loss": 0.5268, "step": 3234 }, { "epoch": 0.41, "grad_norm": 0.6694288100444343, "learning_rate": 6.635457987450364e-06, "loss": 0.5868, "step": 3235 }, { "epoch": 0.41, "grad_norm": 1.1820348620862644, "learning_rate": 6.633508037450519e-06, "loss": 0.6284, "step": 3236 }, { "epoch": 0.41, "grad_norm": 1.0090864571388356, "learning_rate": 6.631557809281275e-06, "loss": 0.7001, "step": 3237 }, { "epoch": 0.41, "grad_norm": 0.7752070413352607, "learning_rate": 6.6296073032747325e-06, "loss": 0.6348, "step": 3238 }, { "epoch": 0.41, "grad_norm": 0.6734603115598812, "learning_rate": 6.627656519763044e-06, "loss": 0.5358, "step": 3239 }, { "epoch": 0.41, "grad_norm": 0.9471781638965864, "learning_rate": 6.625705459078407e-06, "loss": 0.6525, "step": 3240 }, { "epoch": 0.41, "grad_norm": 0.6999674640456416, "learning_rate": 6.623754121553065e-06, "loss": 0.5641, "step": 3241 }, { "epoch": 0.41, "grad_norm": 0.8919684115933687, "learning_rate": 6.621802507519314e-06, "loss": 0.5666, "step": 3242 }, { "epoch": 0.41, "grad_norm": 0.7821390216633386, "learning_rate": 6.619850617309491e-06, "loss": 0.6135, "step": 3243 }, { "epoch": 0.41, "grad_norm": 0.7809944463346813, "learning_rate": 6.617898451255984e-06, "loss": 0.6113, "step": 3244 }, { "epoch": 0.41, "grad_norm": 0.8128425655159538, "learning_rate": 6.6159460096912254e-06, "loss": 0.6486, "step": 3245 }, { "epoch": 0.41, "grad_norm": 0.7135439547230367, "learning_rate": 6.613993292947697e-06, "loss": 0.5485, "step": 3246 }, { "epoch": 0.41, "grad_norm": 0.6700150938470216, "learning_rate": 6.612040301357925e-06, "loss": 0.5958, "step": 3247 }, { "epoch": 0.41, "grad_norm": 0.8256094375685624, "learning_rate": 6.6100870352544835e-06, "loss": 0.5116, "step": 3248 }, { "epoch": 0.41, "grad_norm": 0.8378796317064201, "learning_rate": 6.608133494969993e-06, "loss": 0.6825, "step": 3249 }, { "epoch": 0.41, "grad_norm": 0.7573670340308144, "learning_rate": 6.606179680837122e-06, "loss": 0.5677, "step": 3250 }, { "epoch": 0.41, "grad_norm": 0.6109933129894055, "learning_rate": 6.604225593188585e-06, "loss": 0.5067, "step": 3251 }, { "epoch": 0.41, "grad_norm": 0.7893168109840055, "learning_rate": 6.602271232357142e-06, "loss": 0.6396, "step": 3252 }, { "epoch": 0.41, "grad_norm": 0.8049400645892669, "learning_rate": 6.6003165986756015e-06, "loss": 0.6086, "step": 3253 }, { "epoch": 0.41, "grad_norm": 0.8224229186815304, "learning_rate": 6.598361692476813e-06, "loss": 0.5751, "step": 3254 }, { "epoch": 0.41, "grad_norm": 0.6918534181976074, "learning_rate": 6.5964065140936825e-06, "loss": 0.5364, "step": 3255 }, { "epoch": 0.41, "grad_norm": 0.7556663170682492, "learning_rate": 6.594451063859154e-06, "loss": 0.595, "step": 3256 }, { "epoch": 0.41, "grad_norm": 0.8494250958296593, "learning_rate": 6.592495342106219e-06, "loss": 0.691, "step": 3257 }, { "epoch": 0.42, "grad_norm": 0.8384121881684764, "learning_rate": 6.5905393491679185e-06, "loss": 0.6377, "step": 3258 }, { "epoch": 0.42, "grad_norm": 0.7236954015988676, "learning_rate": 6.5885830853773365e-06, "loss": 0.5868, "step": 3259 }, { "epoch": 0.42, "grad_norm": 0.7435237522385246, "learning_rate": 6.586626551067605e-06, "loss": 0.5769, "step": 3260 }, { "epoch": 0.42, "grad_norm": 0.8433511768074987, "learning_rate": 6.584669746571902e-06, "loss": 0.6555, "step": 3261 }, { "epoch": 0.42, "grad_norm": 0.6734027508286957, "learning_rate": 6.582712672223449e-06, "loss": 0.5369, "step": 3262 }, { "epoch": 0.42, "grad_norm": 0.8051481259987183, "learning_rate": 6.580755328355517e-06, "loss": 0.5823, "step": 3263 }, { "epoch": 0.42, "grad_norm": 0.6553404341817208, "learning_rate": 6.578797715301422e-06, "loss": 0.5357, "step": 3264 }, { "epoch": 0.42, "grad_norm": 0.9108072738939391, "learning_rate": 6.576839833394522e-06, "loss": 0.6937, "step": 3265 }, { "epoch": 0.42, "grad_norm": 0.7943175938690936, "learning_rate": 6.574881682968226e-06, "loss": 0.6032, "step": 3266 }, { "epoch": 0.42, "grad_norm": 0.7722330966012264, "learning_rate": 6.572923264355987e-06, "loss": 0.5858, "step": 3267 }, { "epoch": 0.42, "grad_norm": 0.8251817943039168, "learning_rate": 6.570964577891303e-06, "loss": 0.6818, "step": 3268 }, { "epoch": 0.42, "grad_norm": 0.7406711665473202, "learning_rate": 6.569005623907717e-06, "loss": 0.5626, "step": 3269 }, { "epoch": 0.42, "grad_norm": 0.7480439126692651, "learning_rate": 6.567046402738818e-06, "loss": 0.5739, "step": 3270 }, { "epoch": 0.42, "grad_norm": 0.6859661587202633, "learning_rate": 6.565086914718244e-06, "loss": 0.5747, "step": 3271 }, { "epoch": 0.42, "grad_norm": 0.9746964468080379, "learning_rate": 6.563127160179672e-06, "loss": 0.6827, "step": 3272 }, { "epoch": 0.42, "grad_norm": 0.76062411408561, "learning_rate": 6.56116713945683e-06, "loss": 0.5569, "step": 3273 }, { "epoch": 0.42, "grad_norm": 0.6387530834451175, "learning_rate": 6.559206852883487e-06, "loss": 0.5463, "step": 3274 }, { "epoch": 0.42, "grad_norm": 0.7755272404170621, "learning_rate": 6.557246300793462e-06, "loss": 0.6121, "step": 3275 }, { "epoch": 0.42, "grad_norm": 0.6536370141932599, "learning_rate": 6.555285483520615e-06, "loss": 0.5257, "step": 3276 }, { "epoch": 0.42, "grad_norm": 0.727177598312349, "learning_rate": 6.553324401398853e-06, "loss": 0.5832, "step": 3277 }, { "epoch": 0.42, "grad_norm": 0.8894720674093568, "learning_rate": 6.551363054762128e-06, "loss": 0.595, "step": 3278 }, { "epoch": 0.42, "grad_norm": 0.659201630162912, "learning_rate": 6.549401443944436e-06, "loss": 0.5, "step": 3279 }, { "epoch": 0.42, "grad_norm": 0.6681382593528573, "learning_rate": 6.54743956927982e-06, "loss": 0.5729, "step": 3280 }, { "epoch": 0.42, "grad_norm": 0.6739172559160317, "learning_rate": 6.5454774311023676e-06, "loss": 0.5589, "step": 3281 }, { "epoch": 0.42, "grad_norm": 0.7622521489361416, "learning_rate": 6.543515029746208e-06, "loss": 0.5779, "step": 3282 }, { "epoch": 0.42, "grad_norm": 1.030900091439409, "learning_rate": 6.541552365545518e-06, "loss": 0.6408, "step": 3283 }, { "epoch": 0.42, "grad_norm": 0.8581221801617982, "learning_rate": 6.5395894388345215e-06, "loss": 0.6784, "step": 3284 }, { "epoch": 0.42, "grad_norm": 0.7754308508905653, "learning_rate": 6.5376262499474814e-06, "loss": 0.5708, "step": 3285 }, { "epoch": 0.42, "grad_norm": 0.6681913473673582, "learning_rate": 6.535662799218712e-06, "loss": 0.5517, "step": 3286 }, { "epoch": 0.42, "grad_norm": 0.7848583184110279, "learning_rate": 6.533699086982566e-06, "loss": 0.631, "step": 3287 }, { "epoch": 0.42, "grad_norm": 0.7394092222256954, "learning_rate": 6.531735113573442e-06, "loss": 0.5935, "step": 3288 }, { "epoch": 0.42, "grad_norm": 0.8655978981898818, "learning_rate": 6.529770879325787e-06, "loss": 0.615, "step": 3289 }, { "epoch": 0.42, "grad_norm": 0.8389382955357877, "learning_rate": 6.527806384574088e-06, "loss": 0.6685, "step": 3290 }, { "epoch": 0.42, "grad_norm": 0.7119620542518433, "learning_rate": 6.525841629652878e-06, "loss": 0.533, "step": 3291 }, { "epoch": 0.42, "grad_norm": 0.6721143699624627, "learning_rate": 6.523876614896735e-06, "loss": 0.5628, "step": 3292 }, { "epoch": 0.42, "grad_norm": 0.6497921544202933, "learning_rate": 6.521911340640279e-06, "loss": 0.5232, "step": 3293 }, { "epoch": 0.42, "grad_norm": 0.7619317433627457, "learning_rate": 6.519945807218178e-06, "loss": 0.5049, "step": 3294 }, { "epoch": 0.42, "grad_norm": 0.8092639201925059, "learning_rate": 6.51798001496514e-06, "loss": 0.6662, "step": 3295 }, { "epoch": 0.42, "grad_norm": 0.6759734252859513, "learning_rate": 6.516013964215919e-06, "loss": 0.5307, "step": 3296 }, { "epoch": 0.42, "grad_norm": 0.8361731753538412, "learning_rate": 6.514047655305314e-06, "loss": 0.6509, "step": 3297 }, { "epoch": 0.42, "grad_norm": 0.8467167305197648, "learning_rate": 6.5120810885681676e-06, "loss": 0.6301, "step": 3298 }, { "epoch": 0.42, "grad_norm": 0.8954484946169039, "learning_rate": 6.510114264339364e-06, "loss": 0.5608, "step": 3299 }, { "epoch": 0.42, "grad_norm": 1.1282540994970105, "learning_rate": 6.508147182953834e-06, "loss": 0.5877, "step": 3300 }, { "epoch": 0.42, "grad_norm": 0.7537054709330002, "learning_rate": 6.506179844746549e-06, "loss": 0.6493, "step": 3301 }, { "epoch": 0.42, "grad_norm": 0.7565455803528692, "learning_rate": 6.504212250052529e-06, "loss": 0.5631, "step": 3302 }, { "epoch": 0.42, "grad_norm": 0.6349168407238737, "learning_rate": 6.5022443992068344e-06, "loss": 0.4754, "step": 3303 }, { "epoch": 0.42, "grad_norm": 0.8856287452165285, "learning_rate": 6.500276292544567e-06, "loss": 0.7341, "step": 3304 }, { "epoch": 0.42, "grad_norm": 0.7118507733962567, "learning_rate": 6.498307930400878e-06, "loss": 0.5452, "step": 3305 }, { "epoch": 0.42, "grad_norm": 0.6796914620581587, "learning_rate": 6.496339313110958e-06, "loss": 0.5468, "step": 3306 }, { "epoch": 0.42, "grad_norm": 0.8632277497757777, "learning_rate": 6.49437044101004e-06, "loss": 0.6439, "step": 3307 }, { "epoch": 0.42, "grad_norm": 0.9885064830006294, "learning_rate": 6.492401314433404e-06, "loss": 0.6547, "step": 3308 }, { "epoch": 0.42, "grad_norm": 0.7076459076699314, "learning_rate": 6.490431933716373e-06, "loss": 0.5183, "step": 3309 }, { "epoch": 0.42, "grad_norm": 0.6985180715895505, "learning_rate": 6.488462299194311e-06, "loss": 0.4945, "step": 3310 }, { "epoch": 0.42, "grad_norm": 0.8221277645352243, "learning_rate": 6.4864924112026265e-06, "loss": 0.6279, "step": 3311 }, { "epoch": 0.42, "grad_norm": 0.659295492727098, "learning_rate": 6.48452227007677e-06, "loss": 0.5417, "step": 3312 }, { "epoch": 0.42, "grad_norm": 0.7079286192522138, "learning_rate": 6.4825518761522354e-06, "loss": 0.5624, "step": 3313 }, { "epoch": 0.42, "grad_norm": 0.7416336802491766, "learning_rate": 6.480581229764563e-06, "loss": 0.5624, "step": 3314 }, { "epoch": 0.42, "grad_norm": 0.5957354324484512, "learning_rate": 6.4786103312493305e-06, "loss": 0.4875, "step": 3315 }, { "epoch": 0.42, "grad_norm": 0.6026476741783775, "learning_rate": 6.476639180942165e-06, "loss": 0.5253, "step": 3316 }, { "epoch": 0.42, "grad_norm": 0.8939101132024457, "learning_rate": 6.474667779178728e-06, "loss": 0.6539, "step": 3317 }, { "epoch": 0.42, "grad_norm": 0.9139456793076355, "learning_rate": 6.472696126294733e-06, "loss": 0.6894, "step": 3318 }, { "epoch": 0.42, "grad_norm": 0.882269295476322, "learning_rate": 6.470724222625929e-06, "loss": 0.6457, "step": 3319 }, { "epoch": 0.42, "grad_norm": 0.7659107548275412, "learning_rate": 6.468752068508112e-06, "loss": 0.5761, "step": 3320 }, { "epoch": 0.42, "grad_norm": 0.7347419684856539, "learning_rate": 6.46677966427712e-06, "loss": 0.6477, "step": 3321 }, { "epoch": 0.42, "grad_norm": 0.8403944827663434, "learning_rate": 6.464807010268831e-06, "loss": 0.6459, "step": 3322 }, { "epoch": 0.42, "grad_norm": 0.7556831214070445, "learning_rate": 6.462834106819168e-06, "loss": 0.528, "step": 3323 }, { "epoch": 0.42, "grad_norm": 0.7648250583968272, "learning_rate": 6.460860954264096e-06, "loss": 0.5894, "step": 3324 }, { "epoch": 0.42, "grad_norm": 0.8058757702569989, "learning_rate": 6.458887552939624e-06, "loss": 0.6275, "step": 3325 }, { "epoch": 0.42, "grad_norm": 0.7227942204377473, "learning_rate": 6.4569139031818e-06, "loss": 0.5899, "step": 3326 }, { "epoch": 0.42, "grad_norm": 0.8873906739070164, "learning_rate": 6.454940005326715e-06, "loss": 0.6917, "step": 3327 }, { "epoch": 0.42, "grad_norm": 0.7138882900276078, "learning_rate": 6.452965859710506e-06, "loss": 0.5414, "step": 3328 }, { "epoch": 0.42, "grad_norm": 0.7871329931534004, "learning_rate": 6.450991466669347e-06, "loss": 0.6504, "step": 3329 }, { "epoch": 0.42, "grad_norm": 0.767741618757116, "learning_rate": 6.4490168265394595e-06, "loss": 0.5878, "step": 3330 }, { "epoch": 0.42, "grad_norm": 0.952277623646695, "learning_rate": 6.4470419396571e-06, "loss": 0.6844, "step": 3331 }, { "epoch": 0.42, "grad_norm": 0.7524692268715799, "learning_rate": 6.445066806358575e-06, "loss": 0.5322, "step": 3332 }, { "epoch": 0.42, "grad_norm": 0.8826213828307394, "learning_rate": 6.4430914269802266e-06, "loss": 0.561, "step": 3333 }, { "epoch": 0.42, "grad_norm": 0.7756530450059665, "learning_rate": 6.441115801858442e-06, "loss": 0.5603, "step": 3334 }, { "epoch": 0.42, "grad_norm": 1.11808653159282, "learning_rate": 6.43913993132965e-06, "loss": 0.6623, "step": 3335 }, { "epoch": 0.42, "grad_norm": 0.706092270802816, "learning_rate": 6.437163815730321e-06, "loss": 0.5891, "step": 3336 }, { "epoch": 0.43, "grad_norm": 0.9657030209033668, "learning_rate": 6.435187455396966e-06, "loss": 0.6943, "step": 3337 }, { "epoch": 0.43, "grad_norm": 0.6363654338641853, "learning_rate": 6.43321085066614e-06, "loss": 0.5245, "step": 3338 }, { "epoch": 0.43, "grad_norm": 0.6787614067552249, "learning_rate": 6.431234001874435e-06, "loss": 0.539, "step": 3339 }, { "epoch": 0.43, "grad_norm": 0.6755614114080194, "learning_rate": 6.429256909358491e-06, "loss": 0.5731, "step": 3340 }, { "epoch": 0.43, "grad_norm": 0.6462196449743945, "learning_rate": 6.427279573454985e-06, "loss": 0.5647, "step": 3341 }, { "epoch": 0.43, "grad_norm": 0.6656767428094686, "learning_rate": 6.4253019945006375e-06, "loss": 0.5521, "step": 3342 }, { "epoch": 0.43, "grad_norm": 0.8294814349002999, "learning_rate": 6.423324172832209e-06, "loss": 0.605, "step": 3343 }, { "epoch": 0.43, "grad_norm": 0.6274353004528871, "learning_rate": 6.421346108786503e-06, "loss": 0.5693, "step": 3344 }, { "epoch": 0.43, "grad_norm": 0.8493797436396938, "learning_rate": 6.4193678027003605e-06, "loss": 0.6726, "step": 3345 }, { "epoch": 0.43, "grad_norm": 0.680754667365435, "learning_rate": 6.417389254910669e-06, "loss": 0.56, "step": 3346 }, { "epoch": 0.43, "grad_norm": 0.8288774622844942, "learning_rate": 6.415410465754353e-06, "loss": 0.6532, "step": 3347 }, { "epoch": 0.43, "grad_norm": 0.645400257288062, "learning_rate": 6.41343143556838e-06, "loss": 0.5221, "step": 3348 }, { "epoch": 0.43, "grad_norm": 0.8546403108356332, "learning_rate": 6.41145216468976e-06, "loss": 0.6671, "step": 3349 }, { "epoch": 0.43, "grad_norm": 0.6901347228111183, "learning_rate": 6.409472653455539e-06, "loss": 0.5282, "step": 3350 }, { "epoch": 0.43, "grad_norm": 0.7821090471580274, "learning_rate": 6.40749290220281e-06, "loss": 0.6305, "step": 3351 }, { "epoch": 0.43, "grad_norm": 0.8824899382906244, "learning_rate": 6.405512911268702e-06, "loss": 0.705, "step": 3352 }, { "epoch": 0.43, "grad_norm": 0.8471521120975053, "learning_rate": 6.403532680990389e-06, "loss": 0.6539, "step": 3353 }, { "epoch": 0.43, "grad_norm": 0.8162824895821622, "learning_rate": 6.401552211705082e-06, "loss": 0.6476, "step": 3354 }, { "epoch": 0.43, "grad_norm": 0.690591417763809, "learning_rate": 6.399571503750034e-06, "loss": 0.5559, "step": 3355 }, { "epoch": 0.43, "grad_norm": 0.6625257415758892, "learning_rate": 6.39759055746254e-06, "loss": 0.5546, "step": 3356 }, { "epoch": 0.43, "grad_norm": 0.7010552369601221, "learning_rate": 6.395609373179934e-06, "loss": 0.5756, "step": 3357 }, { "epoch": 0.43, "grad_norm": 0.922348560184355, "learning_rate": 6.3936279512395896e-06, "loss": 0.6047, "step": 3358 }, { "epoch": 0.43, "grad_norm": 0.6125535300753597, "learning_rate": 6.3916462919789255e-06, "loss": 0.5127, "step": 3359 }, { "epoch": 0.43, "grad_norm": 0.9984193379668501, "learning_rate": 6.389664395735395e-06, "loss": 0.6656, "step": 3360 }, { "epoch": 0.43, "grad_norm": 0.6765207552568084, "learning_rate": 6.387682262846498e-06, "loss": 0.5235, "step": 3361 }, { "epoch": 0.43, "grad_norm": 0.7356430724978521, "learning_rate": 6.385699893649766e-06, "loss": 0.549, "step": 3362 }, { "epoch": 0.43, "grad_norm": 0.7855497414438462, "learning_rate": 6.383717288482778e-06, "loss": 0.5793, "step": 3363 }, { "epoch": 0.43, "grad_norm": 0.6289425045496839, "learning_rate": 6.381734447683152e-06, "loss": 0.5347, "step": 3364 }, { "epoch": 0.43, "grad_norm": 0.720860354892167, "learning_rate": 6.379751371588545e-06, "loss": 0.5343, "step": 3365 }, { "epoch": 0.43, "grad_norm": 0.6207474409677359, "learning_rate": 6.377768060536652e-06, "loss": 0.4902, "step": 3366 }, { "epoch": 0.43, "grad_norm": 0.9166928641723509, "learning_rate": 6.375784514865213e-06, "loss": 0.6777, "step": 3367 }, { "epoch": 0.43, "grad_norm": 0.6994130833180454, "learning_rate": 6.373800734912002e-06, "loss": 0.5226, "step": 3368 }, { "epoch": 0.43, "grad_norm": 0.8994609275271955, "learning_rate": 6.3718167210148406e-06, "loss": 0.594, "step": 3369 }, { "epoch": 0.43, "grad_norm": 0.8162041282330985, "learning_rate": 6.369832473511582e-06, "loss": 0.5705, "step": 3370 }, { "epoch": 0.43, "grad_norm": 0.8803012764198941, "learning_rate": 6.367847992740124e-06, "loss": 0.6564, "step": 3371 }, { "epoch": 0.43, "grad_norm": 0.8266403969820719, "learning_rate": 6.3658632790384026e-06, "loss": 0.5699, "step": 3372 }, { "epoch": 0.43, "grad_norm": 0.7893194416831567, "learning_rate": 6.363878332744395e-06, "loss": 0.6486, "step": 3373 }, { "epoch": 0.43, "grad_norm": 0.7913241913923671, "learning_rate": 6.361893154196116e-06, "loss": 0.6453, "step": 3374 }, { "epoch": 0.43, "grad_norm": 0.7243517419311286, "learning_rate": 6.35990774373162e-06, "loss": 0.644, "step": 3375 }, { "epoch": 0.43, "grad_norm": 0.6772032902068967, "learning_rate": 6.357922101689004e-06, "loss": 0.562, "step": 3376 }, { "epoch": 0.43, "grad_norm": 0.7607506820521205, "learning_rate": 6.3559362284064e-06, "loss": 0.5552, "step": 3377 }, { "epoch": 0.43, "grad_norm": 0.6513314158573268, "learning_rate": 6.353950124221982e-06, "loss": 0.5318, "step": 3378 }, { "epoch": 0.43, "grad_norm": 0.7386738028267484, "learning_rate": 6.351963789473965e-06, "loss": 0.5461, "step": 3379 }, { "epoch": 0.43, "grad_norm": 0.8219645023411236, "learning_rate": 6.349977224500598e-06, "loss": 0.6774, "step": 3380 }, { "epoch": 0.43, "grad_norm": 0.7122036293136335, "learning_rate": 6.347990429640171e-06, "loss": 0.5348, "step": 3381 }, { "epoch": 0.43, "grad_norm": 0.8694200500595037, "learning_rate": 6.34600340523102e-06, "loss": 0.6679, "step": 3382 }, { "epoch": 0.43, "grad_norm": 0.7834177254505951, "learning_rate": 6.344016151611512e-06, "loss": 0.6534, "step": 3383 }, { "epoch": 0.43, "grad_norm": 0.9621036791079701, "learning_rate": 6.342028669120055e-06, "loss": 0.6365, "step": 3384 }, { "epoch": 0.43, "grad_norm": 0.6445637695576225, "learning_rate": 6.340040958095096e-06, "loss": 0.5114, "step": 3385 }, { "epoch": 0.43, "grad_norm": 0.7002892693212692, "learning_rate": 6.338053018875123e-06, "loss": 0.5668, "step": 3386 }, { "epoch": 0.43, "grad_norm": 0.6427012913203892, "learning_rate": 6.3360648517986605e-06, "loss": 0.5089, "step": 3387 }, { "epoch": 0.43, "grad_norm": 0.8309286469572716, "learning_rate": 6.334076457204273e-06, "loss": 0.5901, "step": 3388 }, { "epoch": 0.43, "grad_norm": 0.8904877536179938, "learning_rate": 6.332087835430563e-06, "loss": 0.6104, "step": 3389 }, { "epoch": 0.43, "grad_norm": 0.6848042494746492, "learning_rate": 6.330098986816171e-06, "loss": 0.5645, "step": 3390 }, { "epoch": 0.43, "grad_norm": 0.9170523402598549, "learning_rate": 6.32810991169978e-06, "loss": 0.5533, "step": 3391 }, { "epoch": 0.43, "grad_norm": 0.8303882046461772, "learning_rate": 6.326120610420107e-06, "loss": 0.6303, "step": 3392 }, { "epoch": 0.43, "grad_norm": 0.739424781398264, "learning_rate": 6.324131083315909e-06, "loss": 0.5918, "step": 3393 }, { "epoch": 0.43, "grad_norm": 0.8269463540364971, "learning_rate": 6.322141330725981e-06, "loss": 0.5703, "step": 3394 }, { "epoch": 0.43, "grad_norm": 0.8718846837649499, "learning_rate": 6.320151352989159e-06, "loss": 0.6841, "step": 3395 }, { "epoch": 0.43, "grad_norm": 0.9390321651150065, "learning_rate": 6.318161150444315e-06, "loss": 0.6061, "step": 3396 }, { "epoch": 0.43, "grad_norm": 0.8404559344639178, "learning_rate": 6.31617072343036e-06, "loss": 0.6101, "step": 3397 }, { "epoch": 0.43, "grad_norm": 0.8958865869468765, "learning_rate": 6.314180072286243e-06, "loss": 0.6653, "step": 3398 }, { "epoch": 0.43, "grad_norm": 0.6204165929426746, "learning_rate": 6.312189197350951e-06, "loss": 0.5059, "step": 3399 }, { "epoch": 0.43, "grad_norm": 0.8980621324416335, "learning_rate": 6.310198098963508e-06, "loss": 0.6779, "step": 3400 }, { "epoch": 0.43, "grad_norm": 0.7141825189434798, "learning_rate": 6.308206777462978e-06, "loss": 0.5351, "step": 3401 }, { "epoch": 0.43, "grad_norm": 0.9631986788553436, "learning_rate": 6.306215233188464e-06, "loss": 0.6848, "step": 3402 }, { "epoch": 0.43, "grad_norm": 0.732145689973894, "learning_rate": 6.304223466479102e-06, "loss": 0.5444, "step": 3403 }, { "epoch": 0.43, "grad_norm": 0.7909167756925085, "learning_rate": 6.302231477674072e-06, "loss": 0.6223, "step": 3404 }, { "epoch": 0.43, "grad_norm": 0.957639817787375, "learning_rate": 6.300239267112589e-06, "loss": 0.5978, "step": 3405 }, { "epoch": 0.43, "grad_norm": 0.95503846064445, "learning_rate": 6.298246835133903e-06, "loss": 0.6449, "step": 3406 }, { "epoch": 0.43, "grad_norm": 0.7192761248867364, "learning_rate": 6.296254182077306e-06, "loss": 0.5416, "step": 3407 }, { "epoch": 0.43, "grad_norm": 0.8972144061795673, "learning_rate": 6.294261308282126e-06, "loss": 0.6646, "step": 3408 }, { "epoch": 0.43, "grad_norm": 0.9425585174614167, "learning_rate": 6.292268214087729e-06, "loss": 0.6346, "step": 3409 }, { "epoch": 0.43, "grad_norm": 0.873319820796118, "learning_rate": 6.290274899833517e-06, "loss": 0.5603, "step": 3410 }, { "epoch": 0.43, "grad_norm": 0.9847792236937688, "learning_rate": 6.288281365858933e-06, "loss": 0.6632, "step": 3411 }, { "epoch": 0.43, "grad_norm": 0.8235799173340377, "learning_rate": 6.286287612503454e-06, "loss": 0.6541, "step": 3412 }, { "epoch": 0.43, "grad_norm": 0.7281333806282612, "learning_rate": 6.284293640106593e-06, "loss": 0.5721, "step": 3413 }, { "epoch": 0.43, "grad_norm": 0.7333538500503372, "learning_rate": 6.282299449007907e-06, "loss": 0.5071, "step": 3414 }, { "epoch": 0.44, "grad_norm": 0.8151021698022166, "learning_rate": 6.280305039546983e-06, "loss": 0.5713, "step": 3415 }, { "epoch": 0.44, "grad_norm": 0.849450270845276, "learning_rate": 6.2783104120634484e-06, "loss": 0.5955, "step": 3416 }, { "epoch": 0.44, "grad_norm": 0.6026603441416055, "learning_rate": 6.276315566896969e-06, "loss": 0.5136, "step": 3417 }, { "epoch": 0.44, "grad_norm": 0.9111912869427533, "learning_rate": 6.274320504387245e-06, "loss": 0.5972, "step": 3418 }, { "epoch": 0.44, "grad_norm": 0.9638892838407052, "learning_rate": 6.2723252248740144e-06, "loss": 0.6435, "step": 3419 }, { "epoch": 0.44, "grad_norm": 0.79699660822151, "learning_rate": 6.270329728697052e-06, "loss": 0.5577, "step": 3420 }, { "epoch": 0.44, "grad_norm": 0.8060204312928541, "learning_rate": 6.268334016196172e-06, "loss": 0.6052, "step": 3421 }, { "epoch": 0.44, "grad_norm": 0.8386635644919266, "learning_rate": 6.2663380877112225e-06, "loss": 0.5949, "step": 3422 }, { "epoch": 0.44, "grad_norm": 0.7819148664666169, "learning_rate": 6.264341943582088e-06, "loss": 0.6193, "step": 3423 }, { "epoch": 0.44, "grad_norm": 0.6867306027453859, "learning_rate": 6.262345584148694e-06, "loss": 0.5386, "step": 3424 }, { "epoch": 0.44, "grad_norm": 0.9066875185878596, "learning_rate": 6.260349009750997e-06, "loss": 0.5989, "step": 3425 }, { "epoch": 0.44, "grad_norm": 0.6457118958726119, "learning_rate": 6.258352220728995e-06, "loss": 0.5305, "step": 3426 }, { "epoch": 0.44, "grad_norm": 0.7885289603227443, "learning_rate": 6.256355217422718e-06, "loss": 0.5986, "step": 3427 }, { "epoch": 0.44, "grad_norm": 0.6247774194476489, "learning_rate": 6.254358000172237e-06, "loss": 0.5434, "step": 3428 }, { "epoch": 0.44, "grad_norm": 0.6663443836330664, "learning_rate": 6.252360569317656e-06, "loss": 0.5354, "step": 3429 }, { "epoch": 0.44, "grad_norm": 0.7236541518246997, "learning_rate": 6.2503629251991175e-06, "loss": 0.5963, "step": 3430 }, { "epoch": 0.44, "grad_norm": 0.7827945251263246, "learning_rate": 6.248365068156799e-06, "loss": 0.6051, "step": 3431 }, { "epoch": 0.44, "grad_norm": 0.7711966207571743, "learning_rate": 6.246366998530914e-06, "loss": 0.5494, "step": 3432 }, { "epoch": 0.44, "grad_norm": 0.6320784557289187, "learning_rate": 6.244368716661714e-06, "loss": 0.4818, "step": 3433 }, { "epoch": 0.44, "grad_norm": 0.861065143332177, "learning_rate": 6.242370222889485e-06, "loss": 0.6408, "step": 3434 }, { "epoch": 0.44, "grad_norm": 0.8819630192489708, "learning_rate": 6.240371517554551e-06, "loss": 0.6368, "step": 3435 }, { "epoch": 0.44, "grad_norm": 0.6440729660424318, "learning_rate": 6.238372600997268e-06, "loss": 0.4757, "step": 3436 }, { "epoch": 0.44, "grad_norm": 0.6787641672683281, "learning_rate": 6.236373473558033e-06, "loss": 0.6229, "step": 3437 }, { "epoch": 0.44, "grad_norm": 0.8124451914813133, "learning_rate": 6.234374135577276e-06, "loss": 0.6297, "step": 3438 }, { "epoch": 0.44, "grad_norm": 0.6803340316552556, "learning_rate": 6.232374587395464e-06, "loss": 0.5954, "step": 3439 }, { "epoch": 0.44, "grad_norm": 0.7906846307228111, "learning_rate": 6.230374829353097e-06, "loss": 0.6785, "step": 3440 }, { "epoch": 0.44, "grad_norm": 0.8365586694505374, "learning_rate": 6.228374861790716e-06, "loss": 0.6049, "step": 3441 }, { "epoch": 0.44, "grad_norm": 0.6486857884705961, "learning_rate": 6.226374685048893e-06, "loss": 0.543, "step": 3442 }, { "epoch": 0.44, "grad_norm": 0.8592707801027841, "learning_rate": 6.224374299468237e-06, "loss": 0.6745, "step": 3443 }, { "epoch": 0.44, "grad_norm": 0.6773468169413863, "learning_rate": 6.222373705389394e-06, "loss": 0.5703, "step": 3444 }, { "epoch": 0.44, "grad_norm": 0.6473954419876244, "learning_rate": 6.220372903153042e-06, "loss": 0.5055, "step": 3445 }, { "epoch": 0.44, "grad_norm": 0.7308717183475844, "learning_rate": 6.218371893099901e-06, "loss": 0.5543, "step": 3446 }, { "epoch": 0.44, "grad_norm": 0.7599032713463516, "learning_rate": 6.216370675570718e-06, "loss": 0.6492, "step": 3447 }, { "epoch": 0.44, "grad_norm": 0.7425814312916679, "learning_rate": 6.214369250906281e-06, "loss": 0.5869, "step": 3448 }, { "epoch": 0.44, "grad_norm": 0.7837146390059074, "learning_rate": 6.212367619447409e-06, "loss": 0.6351, "step": 3449 }, { "epoch": 0.44, "grad_norm": 0.7578870373314055, "learning_rate": 6.210365781534965e-06, "loss": 0.5805, "step": 3450 }, { "epoch": 0.44, "grad_norm": 0.7432446663353631, "learning_rate": 6.208363737509837e-06, "loss": 0.5496, "step": 3451 }, { "epoch": 0.44, "grad_norm": 0.8906430499601756, "learning_rate": 6.206361487712954e-06, "loss": 0.6802, "step": 3452 }, { "epoch": 0.44, "grad_norm": 0.7684925724382321, "learning_rate": 6.204359032485276e-06, "loss": 0.5581, "step": 3453 }, { "epoch": 0.44, "grad_norm": 0.7727500789615049, "learning_rate": 6.202356372167804e-06, "loss": 0.5574, "step": 3454 }, { "epoch": 0.44, "grad_norm": 0.668092793539768, "learning_rate": 6.200353507101566e-06, "loss": 0.5629, "step": 3455 }, { "epoch": 0.44, "grad_norm": 0.6625074376548281, "learning_rate": 6.198350437627631e-06, "loss": 0.5524, "step": 3456 }, { "epoch": 0.44, "grad_norm": 0.6894049680306608, "learning_rate": 6.196347164087101e-06, "loss": 0.5352, "step": 3457 }, { "epoch": 0.44, "grad_norm": 0.7509889646163803, "learning_rate": 6.1943436868211125e-06, "loss": 0.5783, "step": 3458 }, { "epoch": 0.44, "grad_norm": 0.6650465814104387, "learning_rate": 6.192340006170836e-06, "loss": 0.4934, "step": 3459 }, { "epoch": 0.44, "grad_norm": 0.798882358357191, "learning_rate": 6.1903361224774795e-06, "loss": 0.6602, "step": 3460 }, { "epoch": 0.44, "grad_norm": 0.7390406362853507, "learning_rate": 6.188332036082281e-06, "loss": 0.5621, "step": 3461 }, { "epoch": 0.44, "grad_norm": 0.7529336428422556, "learning_rate": 6.186327747326518e-06, "loss": 0.6065, "step": 3462 }, { "epoch": 0.44, "grad_norm": 0.7058463708579121, "learning_rate": 6.184323256551498e-06, "loss": 0.5965, "step": 3463 }, { "epoch": 0.44, "grad_norm": 0.7549298935127112, "learning_rate": 6.182318564098565e-06, "loss": 0.5849, "step": 3464 }, { "epoch": 0.44, "grad_norm": 0.726983957325392, "learning_rate": 6.180313670309098e-06, "loss": 0.5603, "step": 3465 }, { "epoch": 0.44, "grad_norm": 0.8024173155728049, "learning_rate": 6.17830857552451e-06, "loss": 0.6692, "step": 3466 }, { "epoch": 0.44, "grad_norm": 1.1304922894097105, "learning_rate": 6.176303280086246e-06, "loss": 0.6522, "step": 3467 }, { "epoch": 0.44, "grad_norm": 0.6998991038356294, "learning_rate": 6.17429778433579e-06, "loss": 0.5295, "step": 3468 }, { "epoch": 0.44, "grad_norm": 0.7902387494963926, "learning_rate": 6.172292088614654e-06, "loss": 0.6315, "step": 3469 }, { "epoch": 0.44, "grad_norm": 0.9392059131825539, "learning_rate": 6.170286193264388e-06, "loss": 0.7119, "step": 3470 }, { "epoch": 0.44, "grad_norm": 0.8974577064555453, "learning_rate": 6.168280098626576e-06, "loss": 0.6494, "step": 3471 }, { "epoch": 0.44, "grad_norm": 0.6901552558902487, "learning_rate": 6.166273805042834e-06, "loss": 0.5736, "step": 3472 }, { "epoch": 0.44, "grad_norm": 0.6521369446437644, "learning_rate": 6.1642673128548125e-06, "loss": 0.5279, "step": 3473 }, { "epoch": 0.44, "grad_norm": 0.7331700511723845, "learning_rate": 6.162260622404197e-06, "loss": 0.5311, "step": 3474 }, { "epoch": 0.44, "grad_norm": 0.7277537223983758, "learning_rate": 6.160253734032706e-06, "loss": 0.5502, "step": 3475 }, { "epoch": 0.44, "grad_norm": 0.8679190623864572, "learning_rate": 6.158246648082092e-06, "loss": 0.5371, "step": 3476 }, { "epoch": 0.44, "grad_norm": 0.9594178210859036, "learning_rate": 6.1562393648941395e-06, "loss": 0.6422, "step": 3477 }, { "epoch": 0.44, "grad_norm": 0.9163289569881978, "learning_rate": 6.154231884810669e-06, "loss": 0.6952, "step": 3478 }, { "epoch": 0.44, "grad_norm": 0.6283955424196445, "learning_rate": 6.152224208173533e-06, "loss": 0.5313, "step": 3479 }, { "epoch": 0.44, "grad_norm": 0.6533490352790934, "learning_rate": 6.150216335324619e-06, "loss": 0.5438, "step": 3480 }, { "epoch": 0.44, "grad_norm": 0.82758793010938, "learning_rate": 6.148208266605845e-06, "loss": 0.5395, "step": 3481 }, { "epoch": 0.44, "grad_norm": 0.6167459873546352, "learning_rate": 6.146200002359166e-06, "loss": 0.5223, "step": 3482 }, { "epoch": 0.44, "grad_norm": 0.6970961302424825, "learning_rate": 6.144191542926567e-06, "loss": 0.5578, "step": 3483 }, { "epoch": 0.44, "grad_norm": 0.8276963716690393, "learning_rate": 6.142182888650069e-06, "loss": 0.5246, "step": 3484 }, { "epoch": 0.44, "grad_norm": 0.7320010005430326, "learning_rate": 6.1401740398717244e-06, "loss": 0.547, "step": 3485 }, { "epoch": 0.44, "grad_norm": 0.6787153450105404, "learning_rate": 6.138164996933619e-06, "loss": 0.5548, "step": 3486 }, { "epoch": 0.44, "grad_norm": 0.7637242521512037, "learning_rate": 6.136155760177871e-06, "loss": 0.5794, "step": 3487 }, { "epoch": 0.44, "grad_norm": 0.7691962063488876, "learning_rate": 6.134146329946634e-06, "loss": 0.6215, "step": 3488 }, { "epoch": 0.44, "grad_norm": 0.7946886967264549, "learning_rate": 6.132136706582092e-06, "loss": 0.6434, "step": 3489 }, { "epoch": 0.44, "grad_norm": 0.8027517099848307, "learning_rate": 6.130126890426464e-06, "loss": 0.6437, "step": 3490 }, { "epoch": 0.44, "grad_norm": 0.668247338188106, "learning_rate": 6.128116881822e-06, "loss": 0.5752, "step": 3491 }, { "epoch": 0.44, "grad_norm": 0.7353780788653977, "learning_rate": 6.126106681110983e-06, "loss": 0.639, "step": 3492 }, { "epoch": 0.44, "grad_norm": 0.6923545859244065, "learning_rate": 6.12409628863573e-06, "loss": 0.5658, "step": 3493 }, { "epoch": 0.45, "grad_norm": 0.7263948063964855, "learning_rate": 6.122085704738591e-06, "loss": 0.526, "step": 3494 }, { "epoch": 0.45, "grad_norm": 0.9644986152593055, "learning_rate": 6.120074929761945e-06, "loss": 0.6848, "step": 3495 }, { "epoch": 0.45, "grad_norm": 0.8983537765060773, "learning_rate": 6.118063964048209e-06, "loss": 0.6447, "step": 3496 }, { "epoch": 0.45, "grad_norm": 0.7597788384948722, "learning_rate": 6.1160528079398265e-06, "loss": 0.5619, "step": 3497 }, { "epoch": 0.45, "grad_norm": 0.7889025067585369, "learning_rate": 6.114041461779278e-06, "loss": 0.6753, "step": 3498 }, { "epoch": 0.45, "grad_norm": 0.7463419371673947, "learning_rate": 6.112029925909074e-06, "loss": 0.5794, "step": 3499 }, { "epoch": 0.45, "grad_norm": 0.8118772213049076, "learning_rate": 6.110018200671759e-06, "loss": 0.6497, "step": 3500 }, { "epoch": 0.45, "grad_norm": 0.9611855445993419, "learning_rate": 6.108006286409909e-06, "loss": 0.6602, "step": 3501 }, { "epoch": 0.45, "grad_norm": 0.8162934389970316, "learning_rate": 6.105994183466131e-06, "loss": 0.5714, "step": 3502 }, { "epoch": 0.45, "grad_norm": 0.8014887738897735, "learning_rate": 6.103981892183066e-06, "loss": 0.5224, "step": 3503 }, { "epoch": 0.45, "grad_norm": 0.7646375086847529, "learning_rate": 6.101969412903386e-06, "loss": 0.5377, "step": 3504 }, { "epoch": 0.45, "grad_norm": 0.7568546424930852, "learning_rate": 6.099956745969794e-06, "loss": 0.6212, "step": 3505 }, { "epoch": 0.45, "grad_norm": 0.8655740577414859, "learning_rate": 6.097943891725029e-06, "loss": 0.6782, "step": 3506 }, { "epoch": 0.45, "grad_norm": 0.6090674379090918, "learning_rate": 6.095930850511857e-06, "loss": 0.5136, "step": 3507 }, { "epoch": 0.45, "grad_norm": 0.8328219945031706, "learning_rate": 6.093917622673079e-06, "loss": 0.6468, "step": 3508 }, { "epoch": 0.45, "grad_norm": 0.8695867484489413, "learning_rate": 6.091904208551525e-06, "loss": 0.6391, "step": 3509 }, { "epoch": 0.45, "grad_norm": 0.6817016831374344, "learning_rate": 6.08989060849006e-06, "loss": 0.5745, "step": 3510 }, { "epoch": 0.45, "grad_norm": 0.8323457265388752, "learning_rate": 6.08787682283158e-06, "loss": 0.6416, "step": 3511 }, { "epoch": 0.45, "grad_norm": 0.903831229927447, "learning_rate": 6.085862851919009e-06, "loss": 0.6574, "step": 3512 }, { "epoch": 0.45, "grad_norm": 0.8432976116986718, "learning_rate": 6.083848696095308e-06, "loss": 0.5857, "step": 3513 }, { "epoch": 0.45, "grad_norm": 0.9373099001804219, "learning_rate": 6.081834355703464e-06, "loss": 0.6941, "step": 3514 }, { "epoch": 0.45, "grad_norm": 0.7065550748271121, "learning_rate": 6.079819831086502e-06, "loss": 0.5065, "step": 3515 }, { "epoch": 0.45, "grad_norm": 0.8233747924960846, "learning_rate": 6.07780512258747e-06, "loss": 0.6472, "step": 3516 }, { "epoch": 0.45, "grad_norm": 0.8131299143327212, "learning_rate": 6.075790230549456e-06, "loss": 0.5998, "step": 3517 }, { "epoch": 0.45, "grad_norm": 0.7168539855919053, "learning_rate": 6.073775155315572e-06, "loss": 0.5982, "step": 3518 }, { "epoch": 0.45, "grad_norm": 0.8659408625404263, "learning_rate": 6.071759897228966e-06, "loss": 0.6074, "step": 3519 }, { "epoch": 0.45, "grad_norm": 0.8790918891615503, "learning_rate": 6.069744456632815e-06, "loss": 0.6684, "step": 3520 }, { "epoch": 0.45, "grad_norm": 0.7471126658622433, "learning_rate": 6.067728833870326e-06, "loss": 0.4933, "step": 3521 }, { "epoch": 0.45, "grad_norm": 0.7094652301624043, "learning_rate": 6.065713029284741e-06, "loss": 0.5737, "step": 3522 }, { "epoch": 0.45, "grad_norm": 0.6730662547245935, "learning_rate": 6.063697043219331e-06, "loss": 0.518, "step": 3523 }, { "epoch": 0.45, "grad_norm": 0.8879363140401194, "learning_rate": 6.061680876017395e-06, "loss": 0.7149, "step": 3524 }, { "epoch": 0.45, "grad_norm": 1.0623829632061756, "learning_rate": 6.059664528022267e-06, "loss": 0.6261, "step": 3525 }, { "epoch": 0.45, "grad_norm": 0.7835680848472724, "learning_rate": 6.057647999577308e-06, "loss": 0.5941, "step": 3526 }, { "epoch": 0.45, "grad_norm": 0.9069880232593553, "learning_rate": 6.055631291025915e-06, "loss": 0.6029, "step": 3527 }, { "epoch": 0.45, "grad_norm": 0.9621275008705984, "learning_rate": 6.05361440271151e-06, "loss": 0.5603, "step": 3528 }, { "epoch": 0.45, "grad_norm": 1.063859495364726, "learning_rate": 6.051597334977548e-06, "loss": 0.689, "step": 3529 }, { "epoch": 0.45, "grad_norm": 0.6157204691487683, "learning_rate": 6.049580088167516e-06, "loss": 0.5488, "step": 3530 }, { "epoch": 0.45, "grad_norm": 0.9396825773868592, "learning_rate": 6.047562662624929e-06, "loss": 0.6223, "step": 3531 }, { "epoch": 0.45, "grad_norm": 0.8865459072128999, "learning_rate": 6.045545058693334e-06, "loss": 0.7247, "step": 3532 }, { "epoch": 0.45, "grad_norm": 0.8405742156976307, "learning_rate": 6.043527276716308e-06, "loss": 0.6303, "step": 3533 }, { "epoch": 0.45, "grad_norm": 0.7471576610798449, "learning_rate": 6.041509317037457e-06, "loss": 0.6235, "step": 3534 }, { "epoch": 0.45, "grad_norm": 0.7565404706543675, "learning_rate": 6.039491180000421e-06, "loss": 0.6127, "step": 3535 }, { "epoch": 0.45, "grad_norm": 1.2189207973463183, "learning_rate": 6.037472865948867e-06, "loss": 0.6186, "step": 3536 }, { "epoch": 0.45, "grad_norm": 0.6560022488743839, "learning_rate": 6.035454375226492e-06, "loss": 0.5372, "step": 3537 }, { "epoch": 0.45, "grad_norm": 0.6975606129924329, "learning_rate": 6.033435708177023e-06, "loss": 0.5889, "step": 3538 }, { "epoch": 0.45, "grad_norm": 0.8169788988518024, "learning_rate": 6.031416865144219e-06, "loss": 0.6314, "step": 3539 }, { "epoch": 0.45, "grad_norm": 0.7501275138246684, "learning_rate": 6.02939784647187e-06, "loss": 0.5622, "step": 3540 }, { "epoch": 0.45, "grad_norm": 0.7102187583015673, "learning_rate": 6.02737865250379e-06, "loss": 0.5458, "step": 3541 }, { "epoch": 0.45, "grad_norm": 0.8597501825146079, "learning_rate": 6.025359283583831e-06, "loss": 0.5504, "step": 3542 }, { "epoch": 0.45, "grad_norm": 0.7867146017004523, "learning_rate": 6.023339740055865e-06, "loss": 0.5491, "step": 3543 }, { "epoch": 0.45, "grad_norm": 0.7323822664437999, "learning_rate": 6.0213200222638044e-06, "loss": 0.5192, "step": 3544 }, { "epoch": 0.45, "grad_norm": 0.8149291308931363, "learning_rate": 6.019300130551584e-06, "loss": 0.6214, "step": 3545 }, { "epoch": 0.45, "grad_norm": 0.7467867566560927, "learning_rate": 6.0172800652631706e-06, "loss": 0.5965, "step": 3546 }, { "epoch": 0.45, "grad_norm": 0.7830340190980437, "learning_rate": 6.015259826742558e-06, "loss": 0.6661, "step": 3547 }, { "epoch": 0.45, "grad_norm": 0.7917803039558575, "learning_rate": 6.013239415333776e-06, "loss": 0.6149, "step": 3548 }, { "epoch": 0.45, "grad_norm": 0.6567729714115069, "learning_rate": 6.011218831380876e-06, "loss": 0.5462, "step": 3549 }, { "epoch": 0.45, "grad_norm": 0.7646356726247474, "learning_rate": 6.009198075227944e-06, "loss": 0.6005, "step": 3550 }, { "epoch": 0.45, "grad_norm": 0.7154563381946574, "learning_rate": 6.0071771472190945e-06, "loss": 0.5355, "step": 3551 }, { "epoch": 0.45, "grad_norm": 0.796096215999717, "learning_rate": 6.005156047698469e-06, "loss": 0.6325, "step": 3552 }, { "epoch": 0.45, "grad_norm": 0.6272674578695968, "learning_rate": 6.00313477701024e-06, "loss": 0.5258, "step": 3553 }, { "epoch": 0.45, "grad_norm": 0.7569466487664999, "learning_rate": 6.001113335498609e-06, "loss": 0.5954, "step": 3554 }, { "epoch": 0.45, "grad_norm": 0.758959025451322, "learning_rate": 5.999091723507807e-06, "loss": 0.5895, "step": 3555 }, { "epoch": 0.45, "grad_norm": 0.7970607749674867, "learning_rate": 5.997069941382093e-06, "loss": 0.6455, "step": 3556 }, { "epoch": 0.45, "grad_norm": 0.7594809189802804, "learning_rate": 5.995047989465755e-06, "loss": 0.5598, "step": 3557 }, { "epoch": 0.45, "grad_norm": 0.6312521034906191, "learning_rate": 5.993025868103111e-06, "loss": 0.5183, "step": 3558 }, { "epoch": 0.45, "grad_norm": 0.9532511883000315, "learning_rate": 5.9910035776385075e-06, "loss": 0.5961, "step": 3559 }, { "epoch": 0.45, "grad_norm": 0.8288996676251822, "learning_rate": 5.988981118416317e-06, "loss": 0.6242, "step": 3560 }, { "epoch": 0.45, "grad_norm": 0.7732535077261793, "learning_rate": 5.986958490780946e-06, "loss": 0.5715, "step": 3561 }, { "epoch": 0.45, "grad_norm": 0.8134869982776157, "learning_rate": 5.984935695076826e-06, "loss": 0.6379, "step": 3562 }, { "epoch": 0.45, "grad_norm": 0.8345727494486916, "learning_rate": 5.982912731648418e-06, "loss": 0.6459, "step": 3563 }, { "epoch": 0.45, "grad_norm": 0.8724717029035415, "learning_rate": 5.980889600840213e-06, "loss": 0.5759, "step": 3564 }, { "epoch": 0.45, "grad_norm": 0.8496740271347862, "learning_rate": 5.978866302996726e-06, "loss": 0.6059, "step": 3565 }, { "epoch": 0.45, "grad_norm": 0.8211642048893611, "learning_rate": 5.976842838462508e-06, "loss": 0.6547, "step": 3566 }, { "epoch": 0.45, "grad_norm": 0.6050944808059421, "learning_rate": 5.974819207582128e-06, "loss": 0.5166, "step": 3567 }, { "epoch": 0.45, "grad_norm": 0.6430056041447777, "learning_rate": 5.9727954107001954e-06, "loss": 0.5545, "step": 3568 }, { "epoch": 0.45, "grad_norm": 0.8209134745003611, "learning_rate": 5.970771448161338e-06, "loss": 0.6467, "step": 3569 }, { "epoch": 0.45, "grad_norm": 0.7266106273924304, "learning_rate": 5.968747320310216e-06, "loss": 0.5014, "step": 3570 }, { "epoch": 0.45, "grad_norm": 0.6973875445855897, "learning_rate": 5.966723027491518e-06, "loss": 0.6057, "step": 3571 }, { "epoch": 0.46, "grad_norm": 0.9649722368068999, "learning_rate": 5.964698570049959e-06, "loss": 0.6122, "step": 3572 }, { "epoch": 0.46, "grad_norm": 0.9742032066653912, "learning_rate": 5.962673948330284e-06, "loss": 0.623, "step": 3573 }, { "epoch": 0.46, "grad_norm": 0.7682866362668807, "learning_rate": 5.960649162677266e-06, "loss": 0.5726, "step": 3574 }, { "epoch": 0.46, "grad_norm": 0.6804961138309907, "learning_rate": 5.958624213435702e-06, "loss": 0.5019, "step": 3575 }, { "epoch": 0.46, "grad_norm": 0.8143525669200827, "learning_rate": 5.956599100950421e-06, "loss": 0.6582, "step": 3576 }, { "epoch": 0.46, "grad_norm": 0.7860851262431328, "learning_rate": 5.954573825566279e-06, "loss": 0.5912, "step": 3577 }, { "epoch": 0.46, "grad_norm": 0.9165967135411455, "learning_rate": 5.95254838762816e-06, "loss": 0.6606, "step": 3578 }, { "epoch": 0.46, "grad_norm": 0.7400573386980802, "learning_rate": 5.950522787480975e-06, "loss": 0.5749, "step": 3579 }, { "epoch": 0.46, "grad_norm": 0.6843701652023471, "learning_rate": 5.94849702546966e-06, "loss": 0.5082, "step": 3580 }, { "epoch": 0.46, "grad_norm": 0.8108917329924451, "learning_rate": 5.946471101939184e-06, "loss": 0.5671, "step": 3581 }, { "epoch": 0.46, "grad_norm": 0.7649257744135524, "learning_rate": 5.9444450172345395e-06, "loss": 0.5747, "step": 3582 }, { "epoch": 0.46, "grad_norm": 0.7988511007666568, "learning_rate": 5.942418771700747e-06, "loss": 0.6589, "step": 3583 }, { "epoch": 0.46, "grad_norm": 0.8680266578569742, "learning_rate": 5.940392365682857e-06, "loss": 0.6409, "step": 3584 }, { "epoch": 0.46, "grad_norm": 0.6526629781748411, "learning_rate": 5.938365799525942e-06, "loss": 0.5161, "step": 3585 }, { "epoch": 0.46, "grad_norm": 0.7125939111667973, "learning_rate": 5.936339073575109e-06, "loss": 0.5344, "step": 3586 }, { "epoch": 0.46, "grad_norm": 0.9008213988987009, "learning_rate": 5.934312188175486e-06, "loss": 0.5582, "step": 3587 }, { "epoch": 0.46, "grad_norm": 0.7812539747149474, "learning_rate": 5.932285143672231e-06, "loss": 0.5783, "step": 3588 }, { "epoch": 0.46, "grad_norm": 0.8865067131235267, "learning_rate": 5.930257940410529e-06, "loss": 0.6586, "step": 3589 }, { "epoch": 0.46, "grad_norm": 0.965208857151136, "learning_rate": 5.928230578735589e-06, "loss": 0.6186, "step": 3590 }, { "epoch": 0.46, "grad_norm": 0.8038857255786516, "learning_rate": 5.926203058992652e-06, "loss": 0.537, "step": 3591 }, { "epoch": 0.46, "grad_norm": 0.6253481993289098, "learning_rate": 5.924175381526985e-06, "loss": 0.5141, "step": 3592 }, { "epoch": 0.46, "grad_norm": 0.7419573127139674, "learning_rate": 5.922147546683878e-06, "loss": 0.564, "step": 3593 }, { "epoch": 0.46, "grad_norm": 0.7691705754525756, "learning_rate": 5.920119554808651e-06, "loss": 0.6022, "step": 3594 }, { "epoch": 0.46, "grad_norm": 0.9148032511283726, "learning_rate": 5.918091406246649e-06, "loss": 0.5691, "step": 3595 }, { "epoch": 0.46, "grad_norm": 0.7702565412128316, "learning_rate": 5.916063101343245e-06, "loss": 0.5514, "step": 3596 }, { "epoch": 0.46, "grad_norm": 0.6798873863850917, "learning_rate": 5.914034640443838e-06, "loss": 0.5357, "step": 3597 }, { "epoch": 0.46, "grad_norm": 0.7089581364282972, "learning_rate": 5.912006023893855e-06, "loss": 0.519, "step": 3598 }, { "epoch": 0.46, "grad_norm": 0.7303638554687092, "learning_rate": 5.909977252038747e-06, "loss": 0.5549, "step": 3599 }, { "epoch": 0.46, "grad_norm": 0.6741791288689153, "learning_rate": 5.907948325223993e-06, "loss": 0.4961, "step": 3600 }, { "epoch": 0.46, "grad_norm": 0.8632256466442529, "learning_rate": 5.905919243795099e-06, "loss": 0.6696, "step": 3601 }, { "epoch": 0.46, "grad_norm": 0.8726088211824585, "learning_rate": 5.903890008097594e-06, "loss": 0.6737, "step": 3602 }, { "epoch": 0.46, "grad_norm": 0.6866104334058141, "learning_rate": 5.9018606184770365e-06, "loss": 0.5441, "step": 3603 }, { "epoch": 0.46, "grad_norm": 0.9807599591662677, "learning_rate": 5.8998310752790135e-06, "loss": 0.5523, "step": 3604 }, { "epoch": 0.46, "grad_norm": 0.83283830030592, "learning_rate": 5.897801378849132e-06, "loss": 0.6965, "step": 3605 }, { "epoch": 0.46, "grad_norm": 0.7485611876684435, "learning_rate": 5.895771529533028e-06, "loss": 0.6036, "step": 3606 }, { "epoch": 0.46, "grad_norm": 0.6532469783350606, "learning_rate": 5.893741527676366e-06, "loss": 0.5709, "step": 3607 }, { "epoch": 0.46, "grad_norm": 0.6491188015727933, "learning_rate": 5.891711373624832e-06, "loss": 0.5138, "step": 3608 }, { "epoch": 0.46, "grad_norm": 0.8034476961357356, "learning_rate": 5.889681067724141e-06, "loss": 0.59, "step": 3609 }, { "epoch": 0.46, "grad_norm": 0.8370396089461789, "learning_rate": 5.8876506103200325e-06, "loss": 0.5926, "step": 3610 }, { "epoch": 0.46, "grad_norm": 0.746726257539987, "learning_rate": 5.885620001758273e-06, "loss": 0.5696, "step": 3611 }, { "epoch": 0.46, "grad_norm": 0.9319247931733304, "learning_rate": 5.883589242384653e-06, "loss": 0.6088, "step": 3612 }, { "epoch": 0.46, "grad_norm": 0.739532771285927, "learning_rate": 5.881558332544991e-06, "loss": 0.5481, "step": 3613 }, { "epoch": 0.46, "grad_norm": 0.8861879647178765, "learning_rate": 5.879527272585128e-06, "loss": 0.6978, "step": 3614 }, { "epoch": 0.46, "grad_norm": 0.8324688284164715, "learning_rate": 5.877496062850934e-06, "loss": 0.6547, "step": 3615 }, { "epoch": 0.46, "grad_norm": 0.8422378637422568, "learning_rate": 5.875464703688303e-06, "loss": 0.6437, "step": 3616 }, { "epoch": 0.46, "grad_norm": 0.6164181540392585, "learning_rate": 5.873433195443152e-06, "loss": 0.5136, "step": 3617 }, { "epoch": 0.46, "grad_norm": 0.7213991595217203, "learning_rate": 5.871401538461428e-06, "loss": 0.56, "step": 3618 }, { "epoch": 0.46, "grad_norm": 0.6862213664480401, "learning_rate": 5.869369733089099e-06, "loss": 0.5537, "step": 3619 }, { "epoch": 0.46, "grad_norm": 0.6691614913578832, "learning_rate": 5.867337779672162e-06, "loss": 0.582, "step": 3620 }, { "epoch": 0.46, "grad_norm": 0.980547989571627, "learning_rate": 5.865305678556638e-06, "loss": 0.5774, "step": 3621 }, { "epoch": 0.46, "grad_norm": 0.6774321067152484, "learning_rate": 5.863273430088571e-06, "loss": 0.5179, "step": 3622 }, { "epoch": 0.46, "grad_norm": 0.7428194786787314, "learning_rate": 5.861241034614032e-06, "loss": 0.5455, "step": 3623 }, { "epoch": 0.46, "grad_norm": 0.6530955408777839, "learning_rate": 5.859208492479118e-06, "loss": 0.5359, "step": 3624 }, { "epoch": 0.46, "grad_norm": 0.7003092039834383, "learning_rate": 5.857175804029949e-06, "loss": 0.5771, "step": 3625 }, { "epoch": 0.46, "grad_norm": 0.8069255311729716, "learning_rate": 5.8551429696126695e-06, "loss": 0.6595, "step": 3626 }, { "epoch": 0.46, "grad_norm": 0.7190154324593653, "learning_rate": 5.853109989573451e-06, "loss": 0.5661, "step": 3627 }, { "epoch": 0.46, "grad_norm": 0.929649690606898, "learning_rate": 5.8510768642584904e-06, "loss": 0.6358, "step": 3628 }, { "epoch": 0.46, "grad_norm": 0.6993286664300414, "learning_rate": 5.8490435940140055e-06, "loss": 0.5727, "step": 3629 }, { "epoch": 0.46, "grad_norm": 0.716654918268465, "learning_rate": 5.847010179186241e-06, "loss": 0.5707, "step": 3630 }, { "epoch": 0.46, "grad_norm": 0.7015636461180884, "learning_rate": 5.844976620121467e-06, "loss": 0.5473, "step": 3631 }, { "epoch": 0.46, "grad_norm": 0.6588378491063771, "learning_rate": 5.842942917165978e-06, "loss": 0.5246, "step": 3632 }, { "epoch": 0.46, "grad_norm": 0.7050502995673144, "learning_rate": 5.840909070666092e-06, "loss": 0.5319, "step": 3633 }, { "epoch": 0.46, "grad_norm": 0.6964475889484161, "learning_rate": 5.838875080968153e-06, "loss": 0.5193, "step": 3634 }, { "epoch": 0.46, "grad_norm": 0.6001425658259114, "learning_rate": 5.836840948418525e-06, "loss": 0.513, "step": 3635 }, { "epoch": 0.46, "grad_norm": 0.8071858862935555, "learning_rate": 5.834806673363604e-06, "loss": 0.6224, "step": 3636 }, { "epoch": 0.46, "grad_norm": 0.7774387032493759, "learning_rate": 5.832772256149802e-06, "loss": 0.5276, "step": 3637 }, { "epoch": 0.46, "grad_norm": 0.7016516287517406, "learning_rate": 5.830737697123561e-06, "loss": 0.5535, "step": 3638 }, { "epoch": 0.46, "grad_norm": 0.8270310853968084, "learning_rate": 5.828702996631344e-06, "loss": 0.6636, "step": 3639 }, { "epoch": 0.46, "grad_norm": 0.7536480354992842, "learning_rate": 5.82666815501964e-06, "loss": 0.5495, "step": 3640 }, { "epoch": 0.46, "grad_norm": 0.7281284638789438, "learning_rate": 5.824633172634963e-06, "loss": 0.5387, "step": 3641 }, { "epoch": 0.46, "grad_norm": 0.9235042834152511, "learning_rate": 5.822598049823845e-06, "loss": 0.7081, "step": 3642 }, { "epoch": 0.46, "grad_norm": 0.8715182184684162, "learning_rate": 5.820562786932851e-06, "loss": 0.6445, "step": 3643 }, { "epoch": 0.46, "grad_norm": 0.8064603342498802, "learning_rate": 5.818527384308564e-06, "loss": 0.6191, "step": 3644 }, { "epoch": 0.46, "grad_norm": 0.8411031428683867, "learning_rate": 5.816491842297587e-06, "loss": 0.5927, "step": 3645 }, { "epoch": 0.46, "grad_norm": 0.8069680573343667, "learning_rate": 5.814456161246557e-06, "loss": 0.6584, "step": 3646 }, { "epoch": 0.46, "grad_norm": 0.8145084948138652, "learning_rate": 5.812420341502128e-06, "loss": 0.6138, "step": 3647 }, { "epoch": 0.46, "grad_norm": 0.8024624359240449, "learning_rate": 5.81038438341098e-06, "loss": 0.5643, "step": 3648 }, { "epoch": 0.46, "grad_norm": 0.865736303200125, "learning_rate": 5.808348287319813e-06, "loss": 0.5542, "step": 3649 }, { "epoch": 0.46, "grad_norm": 0.9799358162718407, "learning_rate": 5.806312053575353e-06, "loss": 0.6078, "step": 3650 }, { "epoch": 0.47, "grad_norm": 0.7188163395401812, "learning_rate": 5.804275682524352e-06, "loss": 0.5836, "step": 3651 }, { "epoch": 0.47, "grad_norm": 0.926139714498065, "learning_rate": 5.80223917451358e-06, "loss": 0.6583, "step": 3652 }, { "epoch": 0.47, "grad_norm": 0.8419297860870829, "learning_rate": 5.8002025298898345e-06, "loss": 0.6676, "step": 3653 }, { "epoch": 0.47, "grad_norm": 0.7609251570265859, "learning_rate": 5.798165748999935e-06, "loss": 0.537, "step": 3654 }, { "epoch": 0.47, "grad_norm": 0.7383794341613403, "learning_rate": 5.796128832190723e-06, "loss": 0.5641, "step": 3655 }, { "epoch": 0.47, "grad_norm": 1.0003546097222928, "learning_rate": 5.794091779809066e-06, "loss": 0.6535, "step": 3656 }, { "epoch": 0.47, "grad_norm": 0.8619725279897564, "learning_rate": 5.79205459220185e-06, "loss": 0.6203, "step": 3657 }, { "epoch": 0.47, "grad_norm": 0.8293298685765608, "learning_rate": 5.790017269715989e-06, "loss": 0.6131, "step": 3658 }, { "epoch": 0.47, "grad_norm": 0.8474196137794393, "learning_rate": 5.787979812698418e-06, "loss": 0.6504, "step": 3659 }, { "epoch": 0.47, "grad_norm": 2.746441019142809, "learning_rate": 5.785942221496093e-06, "loss": 0.6754, "step": 3660 }, { "epoch": 0.47, "grad_norm": 0.9270722329816211, "learning_rate": 5.783904496455997e-06, "loss": 0.6662, "step": 3661 }, { "epoch": 0.47, "grad_norm": 0.692865919234672, "learning_rate": 5.78186663792513e-06, "loss": 0.5406, "step": 3662 }, { "epoch": 0.47, "grad_norm": 0.8327666950022071, "learning_rate": 5.779828646250522e-06, "loss": 0.652, "step": 3663 }, { "epoch": 0.47, "grad_norm": 0.7862746699614179, "learning_rate": 5.7777905217792184e-06, "loss": 0.6654, "step": 3664 }, { "epoch": 0.47, "grad_norm": 0.6382482925009612, "learning_rate": 5.775752264858292e-06, "loss": 0.5361, "step": 3665 }, { "epoch": 0.47, "grad_norm": 0.7728858052443739, "learning_rate": 5.773713875834837e-06, "loss": 0.6085, "step": 3666 }, { "epoch": 0.47, "grad_norm": 0.7020843335874097, "learning_rate": 5.77167535505597e-06, "loss": 0.6171, "step": 3667 }, { "epoch": 0.47, "grad_norm": 0.6462785973893841, "learning_rate": 5.76963670286883e-06, "loss": 0.5527, "step": 3668 }, { "epoch": 0.47, "grad_norm": 0.7726695942855915, "learning_rate": 5.7675979196205755e-06, "loss": 0.6092, "step": 3669 }, { "epoch": 0.47, "grad_norm": 0.7688289089436985, "learning_rate": 5.765559005658393e-06, "loss": 0.6061, "step": 3670 }, { "epoch": 0.47, "grad_norm": 0.8096084056787889, "learning_rate": 5.763519961329487e-06, "loss": 0.6468, "step": 3671 }, { "epoch": 0.47, "grad_norm": 0.7428833079841595, "learning_rate": 5.761480786981086e-06, "loss": 0.5737, "step": 3672 }, { "epoch": 0.47, "grad_norm": 0.8306835704735783, "learning_rate": 5.75944148296044e-06, "loss": 0.6355, "step": 3673 }, { "epoch": 0.47, "grad_norm": 0.7059843220118877, "learning_rate": 5.757402049614821e-06, "loss": 0.4727, "step": 3674 }, { "epoch": 0.47, "grad_norm": 0.962968332672543, "learning_rate": 5.755362487291525e-06, "loss": 0.6483, "step": 3675 }, { "epoch": 0.47, "grad_norm": 0.8336546228762126, "learning_rate": 5.753322796337864e-06, "loss": 0.5706, "step": 3676 }, { "epoch": 0.47, "grad_norm": 0.8543824541382932, "learning_rate": 5.751282977101181e-06, "loss": 0.5984, "step": 3677 }, { "epoch": 0.47, "grad_norm": 0.728187292212504, "learning_rate": 5.7492430299288335e-06, "loss": 0.5305, "step": 3678 }, { "epoch": 0.47, "grad_norm": 0.6553315642963978, "learning_rate": 5.747202955168204e-06, "loss": 0.5587, "step": 3679 }, { "epoch": 0.47, "grad_norm": 0.8134879986716608, "learning_rate": 5.745162753166696e-06, "loss": 0.6057, "step": 3680 }, { "epoch": 0.47, "grad_norm": 0.6850167887067071, "learning_rate": 5.743122424271732e-06, "loss": 0.4995, "step": 3681 }, { "epoch": 0.47, "grad_norm": 0.6567386437740192, "learning_rate": 5.741081968830762e-06, "loss": 0.5408, "step": 3682 }, { "epoch": 0.47, "grad_norm": 0.7803618123945372, "learning_rate": 5.739041387191253e-06, "loss": 0.6484, "step": 3683 }, { "epoch": 0.47, "grad_norm": 0.6474000448146338, "learning_rate": 5.737000679700696e-06, "loss": 0.5322, "step": 3684 }, { "epoch": 0.47, "grad_norm": 0.8754652193278403, "learning_rate": 5.7349598467066e-06, "loss": 0.6128, "step": 3685 }, { "epoch": 0.47, "grad_norm": 0.8896300820246065, "learning_rate": 5.7329188885565e-06, "loss": 0.64, "step": 3686 }, { "epoch": 0.47, "grad_norm": 0.7158370670589107, "learning_rate": 5.730877805597947e-06, "loss": 0.5523, "step": 3687 }, { "epoch": 0.47, "grad_norm": 0.702051930924746, "learning_rate": 5.72883659817852e-06, "loss": 0.5614, "step": 3688 }, { "epoch": 0.47, "grad_norm": 0.8428446053224009, "learning_rate": 5.726795266645813e-06, "loss": 0.5947, "step": 3689 }, { "epoch": 0.47, "grad_norm": 0.8403297534339824, "learning_rate": 5.724753811347442e-06, "loss": 0.6163, "step": 3690 }, { "epoch": 0.47, "grad_norm": 0.828982464796016, "learning_rate": 5.7227122326310495e-06, "loss": 0.6314, "step": 3691 }, { "epoch": 0.47, "grad_norm": 5.744373009461017, "learning_rate": 5.7206705308442925e-06, "loss": 0.6471, "step": 3692 }, { "epoch": 0.47, "grad_norm": 0.6206768764700785, "learning_rate": 5.718628706334851e-06, "loss": 0.4867, "step": 3693 }, { "epoch": 0.47, "grad_norm": 0.7250433412225785, "learning_rate": 5.716586759450428e-06, "loss": 0.5746, "step": 3694 }, { "epoch": 0.47, "grad_norm": 0.8780566759722332, "learning_rate": 5.7145446905387445e-06, "loss": 0.6113, "step": 3695 }, { "epoch": 0.47, "grad_norm": 0.7808163405403723, "learning_rate": 5.712502499947545e-06, "loss": 0.5113, "step": 3696 }, { "epoch": 0.47, "grad_norm": 0.746959986955119, "learning_rate": 5.710460188024593e-06, "loss": 0.4809, "step": 3697 }, { "epoch": 0.47, "grad_norm": 0.7000890412654375, "learning_rate": 5.708417755117672e-06, "loss": 0.5736, "step": 3698 }, { "epoch": 0.47, "grad_norm": 0.8515011529441295, "learning_rate": 5.706375201574588e-06, "loss": 0.5765, "step": 3699 }, { "epoch": 0.47, "grad_norm": 0.9734989516968692, "learning_rate": 5.704332527743166e-06, "loss": 0.6231, "step": 3700 }, { "epoch": 0.47, "grad_norm": 0.7890209810814113, "learning_rate": 5.70228973397125e-06, "loss": 0.6893, "step": 3701 }, { "epoch": 0.47, "grad_norm": 0.8471272572479925, "learning_rate": 5.700246820606711e-06, "loss": 0.6464, "step": 3702 }, { "epoch": 0.47, "grad_norm": 0.6482022032900072, "learning_rate": 5.698203787997434e-06, "loss": 0.4692, "step": 3703 }, { "epoch": 0.47, "grad_norm": 0.763134369242582, "learning_rate": 5.6961606364913246e-06, "loss": 0.6232, "step": 3704 }, { "epoch": 0.47, "grad_norm": 0.6657332618017631, "learning_rate": 5.694117366436312e-06, "loss": 0.5883, "step": 3705 }, { "epoch": 0.47, "grad_norm": 0.8109451359751612, "learning_rate": 5.692073978180342e-06, "loss": 0.6369, "step": 3706 }, { "epoch": 0.47, "grad_norm": 0.7031437301592737, "learning_rate": 5.690030472071385e-06, "loss": 0.5359, "step": 3707 }, { "epoch": 0.47, "grad_norm": 0.6981914501462341, "learning_rate": 5.687986848457427e-06, "loss": 0.5198, "step": 3708 }, { "epoch": 0.47, "grad_norm": 0.8266180031887741, "learning_rate": 5.685943107686476e-06, "loss": 0.6243, "step": 3709 }, { "epoch": 0.47, "grad_norm": 0.7214386634363655, "learning_rate": 5.68389925010656e-06, "loss": 0.5538, "step": 3710 }, { "epoch": 0.47, "grad_norm": 0.8108786760433367, "learning_rate": 5.681855276065725e-06, "loss": 0.6193, "step": 3711 }, { "epoch": 0.47, "grad_norm": 0.7756792116709265, "learning_rate": 5.679811185912041e-06, "loss": 0.5724, "step": 3712 }, { "epoch": 0.47, "grad_norm": 0.6840163115464162, "learning_rate": 5.677766979993593e-06, "loss": 0.5462, "step": 3713 }, { "epoch": 0.47, "grad_norm": 0.7608859089870624, "learning_rate": 5.675722658658491e-06, "loss": 0.645, "step": 3714 }, { "epoch": 0.47, "grad_norm": 0.7715700043634773, "learning_rate": 5.67367822225486e-06, "loss": 0.6193, "step": 3715 }, { "epoch": 0.47, "grad_norm": 0.9998495163177669, "learning_rate": 5.671633671130845e-06, "loss": 0.6676, "step": 3716 }, { "epoch": 0.47, "grad_norm": 0.8124420524609709, "learning_rate": 5.669589005634613e-06, "loss": 0.6133, "step": 3717 }, { "epoch": 0.47, "grad_norm": 0.700959602686, "learning_rate": 5.66754422611435e-06, "loss": 0.5528, "step": 3718 }, { "epoch": 0.47, "grad_norm": 0.8339189274409947, "learning_rate": 5.665499332918258e-06, "loss": 0.6655, "step": 3719 }, { "epoch": 0.47, "grad_norm": 0.6197495959599929, "learning_rate": 5.663454326394564e-06, "loss": 0.5088, "step": 3720 }, { "epoch": 0.47, "grad_norm": 0.9491850234070007, "learning_rate": 5.661409206891511e-06, "loss": 0.6651, "step": 3721 }, { "epoch": 0.47, "grad_norm": 0.7665697388040785, "learning_rate": 5.65936397475736e-06, "loss": 0.5678, "step": 3722 }, { "epoch": 0.47, "grad_norm": 0.698943193700711, "learning_rate": 5.657318630340391e-06, "loss": 0.5073, "step": 3723 }, { "epoch": 0.47, "grad_norm": 0.7364758846080469, "learning_rate": 5.65527317398891e-06, "loss": 0.5439, "step": 3724 }, { "epoch": 0.47, "grad_norm": 0.9065666002066101, "learning_rate": 5.653227606051233e-06, "loss": 0.5655, "step": 3725 }, { "epoch": 0.47, "grad_norm": 0.7155611546844571, "learning_rate": 5.651181926875699e-06, "loss": 0.5664, "step": 3726 }, { "epoch": 0.47, "grad_norm": 0.8047046981373644, "learning_rate": 5.649136136810669e-06, "loss": 0.658, "step": 3727 }, { "epoch": 0.47, "grad_norm": 0.8810795909433238, "learning_rate": 5.647090236204516e-06, "loss": 0.6619, "step": 3728 }, { "epoch": 0.48, "grad_norm": 0.8406971967581554, "learning_rate": 5.645044225405638e-06, "loss": 0.6436, "step": 3729 }, { "epoch": 0.48, "grad_norm": 0.6474976185117174, "learning_rate": 5.642998104762447e-06, "loss": 0.5148, "step": 3730 }, { "epoch": 0.48, "grad_norm": 0.658248919949886, "learning_rate": 5.6409518746233785e-06, "loss": 0.5838, "step": 3731 }, { "epoch": 0.48, "grad_norm": 0.9169710061876848, "learning_rate": 5.6389055353368826e-06, "loss": 0.6476, "step": 3732 }, { "epoch": 0.48, "grad_norm": 1.18774585626679, "learning_rate": 5.63685908725143e-06, "loss": 0.6249, "step": 3733 }, { "epoch": 0.48, "grad_norm": 0.8975256633068401, "learning_rate": 5.634812530715511e-06, "loss": 0.6952, "step": 3734 }, { "epoch": 0.48, "grad_norm": 0.6350085756192091, "learning_rate": 5.63276586607763e-06, "loss": 0.5354, "step": 3735 }, { "epoch": 0.48, "grad_norm": 0.882510878299978, "learning_rate": 5.630719093686314e-06, "loss": 0.6703, "step": 3736 }, { "epoch": 0.48, "grad_norm": 0.9157707863830172, "learning_rate": 5.628672213890109e-06, "loss": 0.6799, "step": 3737 }, { "epoch": 0.48, "grad_norm": 0.7780857816730271, "learning_rate": 5.626625227037574e-06, "loss": 0.5319, "step": 3738 }, { "epoch": 0.48, "grad_norm": 0.7691684915914921, "learning_rate": 5.624578133477291e-06, "loss": 0.6242, "step": 3739 }, { "epoch": 0.48, "grad_norm": 0.6847686824295655, "learning_rate": 5.6225309335578595e-06, "loss": 0.5954, "step": 3740 }, { "epoch": 0.48, "grad_norm": 0.6943218906562972, "learning_rate": 5.620483627627895e-06, "loss": 0.5649, "step": 3741 }, { "epoch": 0.48, "grad_norm": 0.8789568332691308, "learning_rate": 5.618436216036033e-06, "loss": 0.6696, "step": 3742 }, { "epoch": 0.48, "grad_norm": 0.7092032572782793, "learning_rate": 5.616388699130925e-06, "loss": 0.5506, "step": 3743 }, { "epoch": 0.48, "grad_norm": 0.7687916989324135, "learning_rate": 5.614341077261243e-06, "loss": 0.6322, "step": 3744 }, { "epoch": 0.48, "grad_norm": 0.6988407095155904, "learning_rate": 5.6122933507756775e-06, "loss": 0.5843, "step": 3745 }, { "epoch": 0.48, "grad_norm": 0.7417827124668309, "learning_rate": 5.610245520022933e-06, "loss": 0.5817, "step": 3746 }, { "epoch": 0.48, "grad_norm": 0.8245333582737887, "learning_rate": 5.608197585351732e-06, "loss": 0.5937, "step": 3747 }, { "epoch": 0.48, "grad_norm": 0.8124088065892567, "learning_rate": 5.60614954711082e-06, "loss": 0.6652, "step": 3748 }, { "epoch": 0.48, "grad_norm": 0.7706309684712072, "learning_rate": 5.604101405648955e-06, "loss": 0.6225, "step": 3749 }, { "epoch": 0.48, "grad_norm": 0.8296362414086256, "learning_rate": 5.602053161314913e-06, "loss": 0.6552, "step": 3750 }, { "epoch": 0.48, "grad_norm": 0.8039178612599128, "learning_rate": 5.6000048144574914e-06, "loss": 0.6294, "step": 3751 }, { "epoch": 0.48, "grad_norm": 0.7064968717210245, "learning_rate": 5.5979563654255e-06, "loss": 0.5353, "step": 3752 }, { "epoch": 0.48, "grad_norm": 0.8593535427876091, "learning_rate": 5.595907814567768e-06, "loss": 0.6637, "step": 3753 }, { "epoch": 0.48, "grad_norm": 0.7354470629092521, "learning_rate": 5.593859162233145e-06, "loss": 0.6243, "step": 3754 }, { "epoch": 0.48, "grad_norm": 0.8225093057451864, "learning_rate": 5.5918104087704925e-06, "loss": 0.593, "step": 3755 }, { "epoch": 0.48, "grad_norm": 0.7065847780582107, "learning_rate": 5.589761554528692e-06, "loss": 0.6409, "step": 3756 }, { "epoch": 0.48, "grad_norm": 0.8257092559323711, "learning_rate": 5.587712599856644e-06, "loss": 0.6515, "step": 3757 }, { "epoch": 0.48, "grad_norm": 0.8230713519589216, "learning_rate": 5.585663545103263e-06, "loss": 0.5718, "step": 3758 }, { "epoch": 0.48, "grad_norm": 0.7767947931328489, "learning_rate": 5.583614390617481e-06, "loss": 0.6196, "step": 3759 }, { "epoch": 0.48, "grad_norm": 0.6540839347134032, "learning_rate": 5.581565136748248e-06, "loss": 0.5232, "step": 3760 }, { "epoch": 0.48, "grad_norm": 0.6320646035950085, "learning_rate": 5.579515783844532e-06, "loss": 0.4825, "step": 3761 }, { "epoch": 0.48, "grad_norm": 0.75485151863857, "learning_rate": 5.577466332255316e-06, "loss": 0.5334, "step": 3762 }, { "epoch": 0.48, "grad_norm": 0.6606289169897692, "learning_rate": 5.575416782329598e-06, "loss": 0.5083, "step": 3763 }, { "epoch": 0.48, "grad_norm": 0.7666985575707803, "learning_rate": 5.573367134416397e-06, "loss": 0.5701, "step": 3764 }, { "epoch": 0.48, "grad_norm": 0.8263293495191297, "learning_rate": 5.5713173888647456e-06, "loss": 0.5969, "step": 3765 }, { "epoch": 0.48, "grad_norm": 0.7483891732093688, "learning_rate": 5.569267546023696e-06, "loss": 0.6352, "step": 3766 }, { "epoch": 0.48, "grad_norm": 0.7130565347986373, "learning_rate": 5.5672176062423145e-06, "loss": 0.5474, "step": 3767 }, { "epoch": 0.48, "grad_norm": 0.7917777041224456, "learning_rate": 5.565167569869683e-06, "loss": 0.6348, "step": 3768 }, { "epoch": 0.48, "grad_norm": 0.9286400057524099, "learning_rate": 5.563117437254902e-06, "loss": 0.716, "step": 3769 }, { "epoch": 0.48, "grad_norm": 0.7352809625167775, "learning_rate": 5.561067208747089e-06, "loss": 0.5747, "step": 3770 }, { "epoch": 0.48, "grad_norm": 0.656574420159467, "learning_rate": 5.559016884695374e-06, "loss": 0.5745, "step": 3771 }, { "epoch": 0.48, "grad_norm": 0.788370359305701, "learning_rate": 5.556966465448907e-06, "loss": 0.6095, "step": 3772 }, { "epoch": 0.48, "grad_norm": 0.8243774848418189, "learning_rate": 5.554915951356854e-06, "loss": 0.6384, "step": 3773 }, { "epoch": 0.48, "grad_norm": 0.8239082845827175, "learning_rate": 5.552865342768397e-06, "loss": 0.6129, "step": 3774 }, { "epoch": 0.48, "grad_norm": 0.6731821369513035, "learning_rate": 5.55081464003273e-06, "loss": 0.4963, "step": 3775 }, { "epoch": 0.48, "grad_norm": 0.7018677559325273, "learning_rate": 5.54876384349907e-06, "loss": 0.5437, "step": 3776 }, { "epoch": 0.48, "grad_norm": 0.7747102564655425, "learning_rate": 5.546712953516643e-06, "loss": 0.5551, "step": 3777 }, { "epoch": 0.48, "grad_norm": 0.7403891080268005, "learning_rate": 5.544661970434696e-06, "loss": 0.6009, "step": 3778 }, { "epoch": 0.48, "grad_norm": 0.78713833005115, "learning_rate": 5.54261089460249e-06, "loss": 0.5962, "step": 3779 }, { "epoch": 0.48, "grad_norm": 0.851454242443393, "learning_rate": 5.540559726369301e-06, "loss": 0.667, "step": 3780 }, { "epoch": 0.48, "grad_norm": 0.8756948364494134, "learning_rate": 5.5385084660844225e-06, "loss": 0.6565, "step": 3781 }, { "epoch": 0.48, "grad_norm": 0.7583746715174433, "learning_rate": 5.536457114097163e-06, "loss": 0.5506, "step": 3782 }, { "epoch": 0.48, "grad_norm": 0.8351209786717441, "learning_rate": 5.534405670756845e-06, "loss": 0.6051, "step": 3783 }, { "epoch": 0.48, "grad_norm": 0.8032521502231091, "learning_rate": 5.5323541364128085e-06, "loss": 0.6124, "step": 3784 }, { "epoch": 0.48, "grad_norm": 0.8057952881010237, "learning_rate": 5.530302511414407e-06, "loss": 0.6433, "step": 3785 }, { "epoch": 0.48, "grad_norm": 0.6034973781951312, "learning_rate": 5.5282507961110146e-06, "loss": 0.5121, "step": 3786 }, { "epoch": 0.48, "grad_norm": 0.7045495916877996, "learning_rate": 5.526198990852014e-06, "loss": 0.6005, "step": 3787 }, { "epoch": 0.48, "grad_norm": 0.7881856535656534, "learning_rate": 5.524147095986808e-06, "loss": 0.6133, "step": 3788 }, { "epoch": 0.48, "grad_norm": 0.7077175798428397, "learning_rate": 5.522095111864813e-06, "loss": 0.5731, "step": 3789 }, { "epoch": 0.48, "grad_norm": 0.8790488114558154, "learning_rate": 5.5200430388354575e-06, "loss": 0.6433, "step": 3790 }, { "epoch": 0.48, "grad_norm": 0.7401839731625945, "learning_rate": 5.517990877248192e-06, "loss": 0.6429, "step": 3791 }, { "epoch": 0.48, "grad_norm": 0.7820097556621365, "learning_rate": 5.515938627452475e-06, "loss": 0.5196, "step": 3792 }, { "epoch": 0.48, "grad_norm": 0.7716335975875197, "learning_rate": 5.513886289797785e-06, "loss": 0.5808, "step": 3793 }, { "epoch": 0.48, "grad_norm": 0.6753117916435982, "learning_rate": 5.511833864633615e-06, "loss": 0.5428, "step": 3794 }, { "epoch": 0.48, "grad_norm": 0.6823198521790625, "learning_rate": 5.509781352309468e-06, "loss": 0.5685, "step": 3795 }, { "epoch": 0.48, "grad_norm": 1.1811555544161623, "learning_rate": 5.507728753174868e-06, "loss": 0.6959, "step": 3796 }, { "epoch": 0.48, "grad_norm": 0.6995180925161227, "learning_rate": 5.505676067579352e-06, "loss": 0.541, "step": 3797 }, { "epoch": 0.48, "grad_norm": 0.6959581559943104, "learning_rate": 5.503623295872468e-06, "loss": 0.5404, "step": 3798 }, { "epoch": 0.48, "grad_norm": 0.6682403949262287, "learning_rate": 5.5015704384037845e-06, "loss": 0.5605, "step": 3799 }, { "epoch": 0.48, "grad_norm": 0.8349675473208893, "learning_rate": 5.49951749552288e-06, "loss": 0.5985, "step": 3800 }, { "epoch": 0.48, "grad_norm": 0.6385448061715642, "learning_rate": 5.497464467579351e-06, "loss": 0.5018, "step": 3801 }, { "epoch": 0.48, "grad_norm": 0.895833405280133, "learning_rate": 5.495411354922806e-06, "loss": 0.6358, "step": 3802 }, { "epoch": 0.48, "grad_norm": 0.7391780279163028, "learning_rate": 5.493358157902867e-06, "loss": 0.5718, "step": 3803 }, { "epoch": 0.48, "grad_norm": 0.7170843813999257, "learning_rate": 5.491304876869175e-06, "loss": 0.531, "step": 3804 }, { "epoch": 0.48, "grad_norm": 0.6868656287426803, "learning_rate": 5.48925151217138e-06, "loss": 0.5787, "step": 3805 }, { "epoch": 0.48, "grad_norm": 0.6632978398798797, "learning_rate": 5.487198064159149e-06, "loss": 0.508, "step": 3806 }, { "epoch": 0.48, "grad_norm": 0.7827899347295381, "learning_rate": 5.485144533182164e-06, "loss": 0.5992, "step": 3807 }, { "epoch": 0.49, "grad_norm": 0.8217609233260615, "learning_rate": 5.483090919590119e-06, "loss": 0.6143, "step": 3808 }, { "epoch": 0.49, "grad_norm": 0.7670942768175887, "learning_rate": 5.481037223732724e-06, "loss": 0.54, "step": 3809 }, { "epoch": 0.49, "grad_norm": 0.8146122915050482, "learning_rate": 5.4789834459597006e-06, "loss": 0.636, "step": 3810 }, { "epoch": 0.49, "grad_norm": 0.7870990608877836, "learning_rate": 5.476929586620786e-06, "loss": 0.6198, "step": 3811 }, { "epoch": 0.49, "grad_norm": 0.8919310737708784, "learning_rate": 5.4748756460657304e-06, "loss": 0.6187, "step": 3812 }, { "epoch": 0.49, "grad_norm": 0.6856255791581137, "learning_rate": 5.472821624644301e-06, "loss": 0.5791, "step": 3813 }, { "epoch": 0.49, "grad_norm": 0.8707993824599897, "learning_rate": 5.470767522706274e-06, "loss": 0.6232, "step": 3814 }, { "epoch": 0.49, "grad_norm": 0.6780172687583828, "learning_rate": 5.468713340601441e-06, "loss": 0.5409, "step": 3815 }, { "epoch": 0.49, "grad_norm": 0.764302449066546, "learning_rate": 5.4666590786796115e-06, "loss": 0.599, "step": 3816 }, { "epoch": 0.49, "grad_norm": 0.9392956569368222, "learning_rate": 5.464604737290602e-06, "loss": 0.6255, "step": 3817 }, { "epoch": 0.49, "grad_norm": 0.7641492922132964, "learning_rate": 5.462550316784246e-06, "loss": 0.6148, "step": 3818 }, { "epoch": 0.49, "grad_norm": 0.7938853659027516, "learning_rate": 5.46049581751039e-06, "loss": 0.5942, "step": 3819 }, { "epoch": 0.49, "grad_norm": 0.7277916557270209, "learning_rate": 5.458441239818893e-06, "loss": 0.6048, "step": 3820 }, { "epoch": 0.49, "grad_norm": 0.7843583052764689, "learning_rate": 5.45638658405963e-06, "loss": 0.6311, "step": 3821 }, { "epoch": 0.49, "grad_norm": 0.7348089455768977, "learning_rate": 5.454331850582485e-06, "loss": 0.541, "step": 3822 }, { "epoch": 0.49, "grad_norm": 0.6537547719923894, "learning_rate": 5.4522770397373605e-06, "loss": 0.4931, "step": 3823 }, { "epoch": 0.49, "grad_norm": 0.8041038312236473, "learning_rate": 5.450222151874166e-06, "loss": 0.6436, "step": 3824 }, { "epoch": 0.49, "grad_norm": 0.8592330235699351, "learning_rate": 5.448167187342831e-06, "loss": 0.648, "step": 3825 }, { "epoch": 0.49, "grad_norm": 1.0003520613880121, "learning_rate": 5.446112146493292e-06, "loss": 0.6357, "step": 3826 }, { "epoch": 0.49, "grad_norm": 0.7606521319132613, "learning_rate": 5.444057029675503e-06, "loss": 0.6044, "step": 3827 }, { "epoch": 0.49, "grad_norm": 0.9011213144168897, "learning_rate": 5.442001837239427e-06, "loss": 0.6733, "step": 3828 }, { "epoch": 0.49, "grad_norm": 0.6389917283686845, "learning_rate": 5.439946569535043e-06, "loss": 0.5717, "step": 3829 }, { "epoch": 0.49, "grad_norm": 0.7747034608140921, "learning_rate": 5.437891226912342e-06, "loss": 0.59, "step": 3830 }, { "epoch": 0.49, "grad_norm": 0.7045123141173107, "learning_rate": 5.435835809721326e-06, "loss": 0.5806, "step": 3831 }, { "epoch": 0.49, "grad_norm": 0.7782449551892204, "learning_rate": 5.433780318312013e-06, "loss": 0.5608, "step": 3832 }, { "epoch": 0.49, "grad_norm": 0.7209084161895974, "learning_rate": 5.4317247530344305e-06, "loss": 0.6196, "step": 3833 }, { "epoch": 0.49, "grad_norm": 0.8260617435213631, "learning_rate": 5.429669114238621e-06, "loss": 0.6048, "step": 3834 }, { "epoch": 0.49, "grad_norm": 0.9573921073955799, "learning_rate": 5.427613402274637e-06, "loss": 0.6328, "step": 3835 }, { "epoch": 0.49, "grad_norm": 0.8305749950914513, "learning_rate": 5.425557617492546e-06, "loss": 0.6535, "step": 3836 }, { "epoch": 0.49, "grad_norm": 0.8060792540361774, "learning_rate": 5.423501760242425e-06, "loss": 0.6649, "step": 3837 }, { "epoch": 0.49, "grad_norm": 0.8337030013744501, "learning_rate": 5.4214458308743674e-06, "loss": 0.6251, "step": 3838 }, { "epoch": 0.49, "grad_norm": 0.7064456100201285, "learning_rate": 5.419389829738475e-06, "loss": 0.541, "step": 3839 }, { "epoch": 0.49, "grad_norm": 0.8077887818439091, "learning_rate": 5.417333757184864e-06, "loss": 0.6126, "step": 3840 }, { "epoch": 0.49, "grad_norm": 0.7944744784863351, "learning_rate": 5.415277613563661e-06, "loss": 0.5822, "step": 3841 }, { "epoch": 0.49, "grad_norm": 0.7359226148266003, "learning_rate": 5.413221399225007e-06, "loss": 0.5364, "step": 3842 }, { "epoch": 0.49, "grad_norm": 0.8207786284640401, "learning_rate": 5.411165114519054e-06, "loss": 0.6316, "step": 3843 }, { "epoch": 0.49, "grad_norm": 0.761351414277866, "learning_rate": 5.409108759795966e-06, "loss": 0.5533, "step": 3844 }, { "epoch": 0.49, "grad_norm": 0.8620611040195203, "learning_rate": 5.4070523354059175e-06, "loss": 0.6326, "step": 3845 }, { "epoch": 0.49, "grad_norm": 0.8067164645720434, "learning_rate": 5.404995841699097e-06, "loss": 0.6788, "step": 3846 }, { "epoch": 0.49, "grad_norm": 0.8938055268343883, "learning_rate": 5.402939279025705e-06, "loss": 0.5842, "step": 3847 }, { "epoch": 0.49, "grad_norm": 0.6327188598711672, "learning_rate": 5.40088264773595e-06, "loss": 0.4597, "step": 3848 }, { "epoch": 0.49, "grad_norm": 0.8038640277830794, "learning_rate": 5.398825948180058e-06, "loss": 0.6055, "step": 3849 }, { "epoch": 0.49, "grad_norm": 0.6903374320625769, "learning_rate": 5.396769180708261e-06, "loss": 0.5273, "step": 3850 }, { "epoch": 0.49, "grad_norm": 0.7052734244364458, "learning_rate": 5.3947123456708065e-06, "loss": 0.5497, "step": 3851 }, { "epoch": 0.49, "grad_norm": 0.7319679782592236, "learning_rate": 5.39265544341795e-06, "loss": 0.6111, "step": 3852 }, { "epoch": 0.49, "grad_norm": 0.6286419621787235, "learning_rate": 5.390598474299964e-06, "loss": 0.547, "step": 3853 }, { "epoch": 0.49, "grad_norm": 0.6495615993824007, "learning_rate": 5.388541438667126e-06, "loss": 0.5687, "step": 3854 }, { "epoch": 0.49, "grad_norm": 0.8190019595768467, "learning_rate": 5.386484336869728e-06, "loss": 0.6582, "step": 3855 }, { "epoch": 0.49, "grad_norm": 0.6144310144825965, "learning_rate": 5.384427169258075e-06, "loss": 0.5553, "step": 3856 }, { "epoch": 0.49, "grad_norm": 0.7190871083020112, "learning_rate": 5.382369936182481e-06, "loss": 0.5912, "step": 3857 }, { "epoch": 0.49, "grad_norm": 0.7019300242830034, "learning_rate": 5.380312637993269e-06, "loss": 0.5412, "step": 3858 }, { "epoch": 0.49, "grad_norm": 0.6354299612327892, "learning_rate": 5.3782552750407765e-06, "loss": 0.5138, "step": 3859 }, { "epoch": 0.49, "grad_norm": 0.6702103895728937, "learning_rate": 5.376197847675353e-06, "loss": 0.6079, "step": 3860 }, { "epoch": 0.49, "grad_norm": 0.696995486897744, "learning_rate": 5.374140356247354e-06, "loss": 0.4903, "step": 3861 }, { "epoch": 0.49, "grad_norm": 0.7737769559787069, "learning_rate": 5.37208280110715e-06, "loss": 0.6091, "step": 3862 }, { "epoch": 0.49, "grad_norm": 0.8294111635474973, "learning_rate": 5.370025182605122e-06, "loss": 0.6243, "step": 3863 }, { "epoch": 0.49, "grad_norm": 0.793935295795038, "learning_rate": 5.36796750109166e-06, "loss": 0.5845, "step": 3864 }, { "epoch": 0.49, "grad_norm": 0.691177235719243, "learning_rate": 5.365909756917165e-06, "loss": 0.4849, "step": 3865 }, { "epoch": 0.49, "grad_norm": 0.773087409054588, "learning_rate": 5.363851950432052e-06, "loss": 0.5812, "step": 3866 }, { "epoch": 0.49, "grad_norm": 0.8831648602839782, "learning_rate": 5.361794081986741e-06, "loss": 0.6536, "step": 3867 }, { "epoch": 0.49, "grad_norm": 0.7150638310158256, "learning_rate": 5.3597361519316685e-06, "loss": 0.5346, "step": 3868 }, { "epoch": 0.49, "grad_norm": 0.7623917603921809, "learning_rate": 5.357678160617276e-06, "loss": 0.5575, "step": 3869 }, { "epoch": 0.49, "grad_norm": 1.1889003241608633, "learning_rate": 5.355620108394018e-06, "loss": 0.6372, "step": 3870 }, { "epoch": 0.49, "grad_norm": 0.830736440456483, "learning_rate": 5.353561995612362e-06, "loss": 0.6866, "step": 3871 }, { "epoch": 0.49, "grad_norm": 0.80264355673953, "learning_rate": 5.351503822622781e-06, "loss": 0.6549, "step": 3872 }, { "epoch": 0.49, "grad_norm": 0.8737518549648303, "learning_rate": 5.349445589775761e-06, "loss": 0.6902, "step": 3873 }, { "epoch": 0.49, "grad_norm": 0.6790303833821116, "learning_rate": 5.347387297421798e-06, "loss": 0.5769, "step": 3874 }, { "epoch": 0.49, "grad_norm": 0.7295110762831493, "learning_rate": 5.345328945911397e-06, "loss": 0.5727, "step": 3875 }, { "epoch": 0.49, "grad_norm": 0.8017729015067593, "learning_rate": 5.343270535595074e-06, "loss": 0.587, "step": 3876 }, { "epoch": 0.49, "grad_norm": 0.7476190672272506, "learning_rate": 5.341212066823356e-06, "loss": 0.5732, "step": 3877 }, { "epoch": 0.49, "grad_norm": 0.6405887601608288, "learning_rate": 5.339153539946777e-06, "loss": 0.5507, "step": 3878 }, { "epoch": 0.49, "grad_norm": 0.7271641143245126, "learning_rate": 5.337094955315882e-06, "loss": 0.5139, "step": 3879 }, { "epoch": 0.49, "grad_norm": 0.7242961103500672, "learning_rate": 5.33503631328123e-06, "loss": 0.6501, "step": 3880 }, { "epoch": 0.49, "grad_norm": 0.8466096687254789, "learning_rate": 5.3329776141933835e-06, "loss": 0.6748, "step": 3881 }, { "epoch": 0.49, "grad_norm": 0.8880092196494722, "learning_rate": 5.330918858402918e-06, "loss": 0.6523, "step": 3882 }, { "epoch": 0.49, "grad_norm": 0.5984737574694566, "learning_rate": 5.328860046260418e-06, "loss": 0.5417, "step": 3883 }, { "epoch": 0.49, "grad_norm": 0.6077989362682091, "learning_rate": 5.326801178116478e-06, "loss": 0.5563, "step": 3884 }, { "epoch": 0.49, "grad_norm": 0.8019998428435321, "learning_rate": 5.324742254321703e-06, "loss": 0.6176, "step": 3885 }, { "epoch": 0.5, "grad_norm": 0.6628284387210959, "learning_rate": 5.322683275226705e-06, "loss": 0.5714, "step": 3886 }, { "epoch": 0.5, "grad_norm": 0.9531470850931998, "learning_rate": 5.3206242411821075e-06, "loss": 0.6002, "step": 3887 }, { "epoch": 0.5, "grad_norm": 0.8075602127205689, "learning_rate": 5.318565152538541e-06, "loss": 0.589, "step": 3888 }, { "epoch": 0.5, "grad_norm": 0.8293248742864335, "learning_rate": 5.316506009646648e-06, "loss": 0.6868, "step": 3889 }, { "epoch": 0.5, "grad_norm": 0.728632371409959, "learning_rate": 5.314446812857079e-06, "loss": 0.5762, "step": 3890 }, { "epoch": 0.5, "grad_norm": 0.736799829621883, "learning_rate": 5.312387562520494e-06, "loss": 0.5123, "step": 3891 }, { "epoch": 0.5, "grad_norm": 0.8132818832545302, "learning_rate": 5.31032825898756e-06, "loss": 0.6795, "step": 3892 }, { "epoch": 0.5, "grad_norm": 0.6425187666420311, "learning_rate": 5.308268902608959e-06, "loss": 0.5505, "step": 3893 }, { "epoch": 0.5, "grad_norm": 0.7895670791913938, "learning_rate": 5.306209493735373e-06, "loss": 0.6207, "step": 3894 }, { "epoch": 0.5, "grad_norm": 0.8313051619781185, "learning_rate": 5.304150032717502e-06, "loss": 0.5407, "step": 3895 }, { "epoch": 0.5, "grad_norm": 0.6736623302126086, "learning_rate": 5.302090519906048e-06, "loss": 0.5638, "step": 3896 }, { "epoch": 0.5, "grad_norm": 0.7223814770812205, "learning_rate": 5.300030955651726e-06, "loss": 0.6144, "step": 3897 }, { "epoch": 0.5, "grad_norm": 0.6612983729165133, "learning_rate": 5.297971340305258e-06, "loss": 0.517, "step": 3898 }, { "epoch": 0.5, "grad_norm": 0.911059572482929, "learning_rate": 5.295911674217374e-06, "loss": 0.6769, "step": 3899 }, { "epoch": 0.5, "grad_norm": 0.798125451736803, "learning_rate": 5.2938519577388155e-06, "loss": 0.5625, "step": 3900 }, { "epoch": 0.5, "grad_norm": 2.2277441434243586, "learning_rate": 5.291792191220331e-06, "loss": 0.6652, "step": 3901 }, { "epoch": 0.5, "grad_norm": 0.7965989597396412, "learning_rate": 5.289732375012675e-06, "loss": 0.6189, "step": 3902 }, { "epoch": 0.5, "grad_norm": 0.7284296983113183, "learning_rate": 5.287672509466614e-06, "loss": 0.5619, "step": 3903 }, { "epoch": 0.5, "grad_norm": 0.7153552105582052, "learning_rate": 5.285612594932922e-06, "loss": 0.5161, "step": 3904 }, { "epoch": 0.5, "grad_norm": 0.7823567827528392, "learning_rate": 5.283552631762381e-06, "loss": 0.625, "step": 3905 }, { "epoch": 0.5, "grad_norm": 0.917451724370807, "learning_rate": 5.281492620305779e-06, "loss": 0.6016, "step": 3906 }, { "epoch": 0.5, "grad_norm": 0.9107262281377362, "learning_rate": 5.279432560913918e-06, "loss": 0.6555, "step": 3907 }, { "epoch": 0.5, "grad_norm": 0.7420049487936115, "learning_rate": 5.277372453937602e-06, "loss": 0.5721, "step": 3908 }, { "epoch": 0.5, "grad_norm": 0.7589901278008877, "learning_rate": 5.275312299727647e-06, "loss": 0.5808, "step": 3909 }, { "epoch": 0.5, "grad_norm": 0.7160711210356638, "learning_rate": 5.273252098634875e-06, "loss": 0.6231, "step": 3910 }, { "epoch": 0.5, "grad_norm": 1.04335638137264, "learning_rate": 5.271191851010119e-06, "loss": 0.6267, "step": 3911 }, { "epoch": 0.5, "grad_norm": 0.8458939871562218, "learning_rate": 5.2691315572042155e-06, "loss": 0.6284, "step": 3912 }, { "epoch": 0.5, "grad_norm": 0.7248244845522537, "learning_rate": 5.26707121756801e-06, "loss": 0.5666, "step": 3913 }, { "epoch": 0.5, "grad_norm": 0.8704400287327139, "learning_rate": 5.26501083245236e-06, "loss": 0.667, "step": 3914 }, { "epoch": 0.5, "grad_norm": 0.6763817543633236, "learning_rate": 5.262950402208127e-06, "loss": 0.492, "step": 3915 }, { "epoch": 0.5, "grad_norm": 0.7897791455427242, "learning_rate": 5.2608899271861765e-06, "loss": 0.6871, "step": 3916 }, { "epoch": 0.5, "grad_norm": 0.8215969220604853, "learning_rate": 5.258829407737392e-06, "loss": 0.6357, "step": 3917 }, { "epoch": 0.5, "grad_norm": 0.7954659706004132, "learning_rate": 5.2567688442126555e-06, "loss": 0.5363, "step": 3918 }, { "epoch": 0.5, "grad_norm": 0.8153359060392984, "learning_rate": 5.25470823696286e-06, "loss": 0.6415, "step": 3919 }, { "epoch": 0.5, "grad_norm": 0.6230678789586339, "learning_rate": 5.252647586338905e-06, "loss": 0.5272, "step": 3920 }, { "epoch": 0.5, "grad_norm": 0.7671432237615076, "learning_rate": 5.250586892691698e-06, "loss": 0.6401, "step": 3921 }, { "epoch": 0.5, "grad_norm": 0.7208126738008377, "learning_rate": 5.2485261563721535e-06, "loss": 0.5089, "step": 3922 }, { "epoch": 0.5, "grad_norm": 0.7145080882827488, "learning_rate": 5.246465377731195e-06, "loss": 0.5437, "step": 3923 }, { "epoch": 0.5, "grad_norm": 0.6905274673987536, "learning_rate": 5.244404557119749e-06, "loss": 0.5508, "step": 3924 }, { "epoch": 0.5, "grad_norm": 0.8106874109486311, "learning_rate": 5.242343694888753e-06, "loss": 0.614, "step": 3925 }, { "epoch": 0.5, "grad_norm": 0.6957217086755179, "learning_rate": 5.240282791389151e-06, "loss": 0.5402, "step": 3926 }, { "epoch": 0.5, "grad_norm": 0.630372581445786, "learning_rate": 5.238221846971892e-06, "loss": 0.4862, "step": 3927 }, { "epoch": 0.5, "grad_norm": 0.7731970985067274, "learning_rate": 5.236160861987935e-06, "loss": 0.583, "step": 3928 }, { "epoch": 0.5, "grad_norm": 1.0071684811360497, "learning_rate": 5.234099836788245e-06, "loss": 0.615, "step": 3929 }, { "epoch": 0.5, "grad_norm": 0.9171601406914759, "learning_rate": 5.23203877172379e-06, "loss": 0.6445, "step": 3930 }, { "epoch": 0.5, "grad_norm": 0.6087526357967677, "learning_rate": 5.229977667145549e-06, "loss": 0.5302, "step": 3931 }, { "epoch": 0.5, "grad_norm": 0.7878824035891563, "learning_rate": 5.2279165234045085e-06, "loss": 0.6143, "step": 3932 }, { "epoch": 0.5, "grad_norm": 0.5930690498674795, "learning_rate": 5.225855340851657e-06, "loss": 0.4817, "step": 3933 }, { "epoch": 0.5, "grad_norm": 0.7331913905266806, "learning_rate": 5.223794119837996e-06, "loss": 0.6417, "step": 3934 }, { "epoch": 0.5, "grad_norm": 0.7410897486652996, "learning_rate": 5.221732860714525e-06, "loss": 0.6344, "step": 3935 }, { "epoch": 0.5, "grad_norm": 0.6356543120402645, "learning_rate": 5.21967156383226e-06, "loss": 0.5281, "step": 3936 }, { "epoch": 0.5, "grad_norm": 0.9882021085591083, "learning_rate": 5.217610229542216e-06, "loss": 0.6515, "step": 3937 }, { "epoch": 0.5, "grad_norm": 0.6596919112488022, "learning_rate": 5.2155488581954176e-06, "loss": 0.5049, "step": 3938 }, { "epoch": 0.5, "grad_norm": 0.853979833761869, "learning_rate": 5.213487450142892e-06, "loss": 0.6217, "step": 3939 }, { "epoch": 0.5, "grad_norm": 0.8151279619920629, "learning_rate": 5.2114260057356794e-06, "loss": 0.5773, "step": 3940 }, { "epoch": 0.5, "grad_norm": 0.9487051719291467, "learning_rate": 5.209364525324819e-06, "loss": 0.6526, "step": 3941 }, { "epoch": 0.5, "grad_norm": 0.7740052051441528, "learning_rate": 5.207303009261363e-06, "loss": 0.5138, "step": 3942 }, { "epoch": 0.5, "grad_norm": 0.8509034195940681, "learning_rate": 5.205241457896364e-06, "loss": 0.5932, "step": 3943 }, { "epoch": 0.5, "grad_norm": 0.8272147455548723, "learning_rate": 5.2031798715808824e-06, "loss": 0.6764, "step": 3944 }, { "epoch": 0.5, "grad_norm": 0.8485951340071919, "learning_rate": 5.2011182506659856e-06, "loss": 0.6642, "step": 3945 }, { "epoch": 0.5, "grad_norm": 0.8327932607190893, "learning_rate": 5.199056595502745e-06, "loss": 0.6205, "step": 3946 }, { "epoch": 0.5, "grad_norm": 0.6901673640092363, "learning_rate": 5.196994906442241e-06, "loss": 0.5469, "step": 3947 }, { "epoch": 0.5, "grad_norm": 0.8409752221760088, "learning_rate": 5.194933183835556e-06, "loss": 0.6335, "step": 3948 }, { "epoch": 0.5, "grad_norm": 0.6610313833884395, "learning_rate": 5.192871428033782e-06, "loss": 0.5382, "step": 3949 }, { "epoch": 0.5, "grad_norm": 0.7261820462916982, "learning_rate": 5.190809639388012e-06, "loss": 0.5379, "step": 3950 }, { "epoch": 0.5, "grad_norm": 0.7112033018841851, "learning_rate": 5.1887478182493474e-06, "loss": 0.5569, "step": 3951 }, { "epoch": 0.5, "grad_norm": 0.9168185585416618, "learning_rate": 5.186685964968896e-06, "loss": 0.6542, "step": 3952 }, { "epoch": 0.5, "grad_norm": 0.8977435848892936, "learning_rate": 5.184624079897771e-06, "loss": 0.6782, "step": 3953 }, { "epoch": 0.5, "grad_norm": 0.775057102507285, "learning_rate": 5.182562163387088e-06, "loss": 0.6172, "step": 3954 }, { "epoch": 0.5, "grad_norm": 0.7438685462146467, "learning_rate": 5.1805002157879704e-06, "loss": 0.5664, "step": 3955 }, { "epoch": 0.5, "grad_norm": 0.7787555138334914, "learning_rate": 5.1784382374515464e-06, "loss": 0.5688, "step": 3956 }, { "epoch": 0.5, "grad_norm": 0.7980526028213298, "learning_rate": 5.17637622872895e-06, "loss": 0.6386, "step": 3957 }, { "epoch": 0.5, "grad_norm": 0.9173565476943023, "learning_rate": 5.174314189971319e-06, "loss": 0.6318, "step": 3958 }, { "epoch": 0.5, "grad_norm": 0.7140606973586655, "learning_rate": 5.172252121529797e-06, "loss": 0.5956, "step": 3959 }, { "epoch": 0.5, "grad_norm": 0.819159583163676, "learning_rate": 5.170190023755534e-06, "loss": 0.5257, "step": 3960 }, { "epoch": 0.5, "grad_norm": 0.6222897543955713, "learning_rate": 5.168127896999683e-06, "loss": 0.5028, "step": 3961 }, { "epoch": 0.5, "grad_norm": 0.8116308612826724, "learning_rate": 5.166065741613402e-06, "loss": 0.6755, "step": 3962 }, { "epoch": 0.5, "grad_norm": 0.8079616096038881, "learning_rate": 5.164003557947855e-06, "loss": 0.6514, "step": 3963 }, { "epoch": 0.51, "grad_norm": 0.6325325189809083, "learning_rate": 5.161941346354211e-06, "loss": 0.5586, "step": 3964 }, { "epoch": 0.51, "grad_norm": 0.7598035162863783, "learning_rate": 5.159879107183643e-06, "loss": 0.5387, "step": 3965 }, { "epoch": 0.51, "grad_norm": 0.8244041636725825, "learning_rate": 5.157816840787328e-06, "loss": 0.5731, "step": 3966 }, { "epoch": 0.51, "grad_norm": 0.7636594459504319, "learning_rate": 5.155754547516451e-06, "loss": 0.5652, "step": 3967 }, { "epoch": 0.51, "grad_norm": 0.7687513322591288, "learning_rate": 5.153692227722195e-06, "loss": 0.6231, "step": 3968 }, { "epoch": 0.51, "grad_norm": 0.6365027273253177, "learning_rate": 5.151629881755754e-06, "loss": 0.5229, "step": 3969 }, { "epoch": 0.51, "grad_norm": 0.6660627820368301, "learning_rate": 5.149567509968325e-06, "loss": 0.5635, "step": 3970 }, { "epoch": 0.51, "grad_norm": 0.8977479084855744, "learning_rate": 5.147505112711106e-06, "loss": 0.6474, "step": 3971 }, { "epoch": 0.51, "grad_norm": 0.6242025286809467, "learning_rate": 5.1454426903353035e-06, "loss": 0.4784, "step": 3972 }, { "epoch": 0.51, "grad_norm": 0.668758006157135, "learning_rate": 5.143380243192128e-06, "loss": 0.5372, "step": 3973 }, { "epoch": 0.51, "grad_norm": 0.6521677659179348, "learning_rate": 5.141317771632789e-06, "loss": 0.5372, "step": 3974 }, { "epoch": 0.51, "grad_norm": 0.7291272260705304, "learning_rate": 5.139255276008506e-06, "loss": 0.6172, "step": 3975 }, { "epoch": 0.51, "grad_norm": 0.7316764635573091, "learning_rate": 5.1371927566704995e-06, "loss": 0.5432, "step": 3976 }, { "epoch": 0.51, "grad_norm": 0.7201679313009776, "learning_rate": 5.135130213969996e-06, "loss": 0.5819, "step": 3977 }, { "epoch": 0.51, "grad_norm": 0.808797444175203, "learning_rate": 5.133067648258227e-06, "loss": 0.5254, "step": 3978 }, { "epoch": 0.51, "grad_norm": 0.838837036615046, "learning_rate": 5.131005059886421e-06, "loss": 0.6337, "step": 3979 }, { "epoch": 0.51, "grad_norm": 0.7475109946373851, "learning_rate": 5.128942449205819e-06, "loss": 0.6117, "step": 3980 }, { "epoch": 0.51, "grad_norm": 0.8451944146525131, "learning_rate": 5.12687981656766e-06, "loss": 0.6911, "step": 3981 }, { "epoch": 0.51, "grad_norm": 0.6693917000374486, "learning_rate": 5.124817162323192e-06, "loss": 0.5234, "step": 3982 }, { "epoch": 0.51, "grad_norm": 0.7726321828292547, "learning_rate": 5.122754486823661e-06, "loss": 0.4918, "step": 3983 }, { "epoch": 0.51, "grad_norm": 0.6928701631120032, "learning_rate": 5.12069179042032e-06, "loss": 0.5528, "step": 3984 }, { "epoch": 0.51, "grad_norm": 0.7047977773124475, "learning_rate": 5.118629073464424e-06, "loss": 0.5386, "step": 3985 }, { "epoch": 0.51, "grad_norm": 0.6385016494572069, "learning_rate": 5.116566336307233e-06, "loss": 0.5165, "step": 3986 }, { "epoch": 0.51, "grad_norm": 0.775412533633928, "learning_rate": 5.11450357930001e-06, "loss": 0.5892, "step": 3987 }, { "epoch": 0.51, "grad_norm": 0.8511416255850703, "learning_rate": 5.112440802794019e-06, "loss": 0.5522, "step": 3988 }, { "epoch": 0.51, "grad_norm": 0.7634150618667501, "learning_rate": 5.110378007140532e-06, "loss": 0.5609, "step": 3989 }, { "epoch": 0.51, "grad_norm": 0.8048193148634439, "learning_rate": 5.108315192690819e-06, "loss": 0.5602, "step": 3990 }, { "epoch": 0.51, "grad_norm": 0.8709184298970912, "learning_rate": 5.106252359796158e-06, "loss": 0.655, "step": 3991 }, { "epoch": 0.51, "grad_norm": 0.6233364534175336, "learning_rate": 5.104189508807826e-06, "loss": 0.5237, "step": 3992 }, { "epoch": 0.51, "grad_norm": 0.7559971459861031, "learning_rate": 5.102126640077108e-06, "loss": 0.6135, "step": 3993 }, { "epoch": 0.51, "grad_norm": 0.9911347383831909, "learning_rate": 5.100063753955284e-06, "loss": 0.6973, "step": 3994 }, { "epoch": 0.51, "grad_norm": 0.8274758276903637, "learning_rate": 5.0980008507936475e-06, "loss": 0.5894, "step": 3995 }, { "epoch": 0.51, "grad_norm": 0.6534841013227789, "learning_rate": 5.095937930943486e-06, "loss": 0.5459, "step": 3996 }, { "epoch": 0.51, "grad_norm": 0.6049952907772924, "learning_rate": 5.093874994756095e-06, "loss": 0.518, "step": 3997 }, { "epoch": 0.51, "grad_norm": 0.653584927243782, "learning_rate": 5.091812042582771e-06, "loss": 0.5568, "step": 3998 }, { "epoch": 0.51, "grad_norm": 0.7597314378676668, "learning_rate": 5.089749074774811e-06, "loss": 0.6313, "step": 3999 }, { "epoch": 0.51, "grad_norm": 0.8448182292971246, "learning_rate": 5.08768609168352e-06, "loss": 0.5565, "step": 4000 }, { "epoch": 0.51, "grad_norm": 0.7368078283415932, "learning_rate": 5.085623093660201e-06, "loss": 0.5917, "step": 4001 }, { "epoch": 0.51, "grad_norm": 0.8385262255143276, "learning_rate": 5.08356008105616e-06, "loss": 0.6545, "step": 4002 }, { "epoch": 0.51, "grad_norm": 0.8479285076706935, "learning_rate": 5.08149705422271e-06, "loss": 0.6013, "step": 4003 }, { "epoch": 0.51, "grad_norm": 0.8424998573119318, "learning_rate": 5.079434013511159e-06, "loss": 0.6561, "step": 4004 }, { "epoch": 0.51, "grad_norm": 0.6573910500537073, "learning_rate": 5.077370959272823e-06, "loss": 0.5385, "step": 4005 }, { "epoch": 0.51, "grad_norm": 0.6955431099832903, "learning_rate": 5.07530789185902e-06, "loss": 0.5317, "step": 4006 }, { "epoch": 0.51, "grad_norm": 0.7157400746031785, "learning_rate": 5.073244811621064e-06, "loss": 0.5331, "step": 4007 }, { "epoch": 0.51, "grad_norm": 0.8321937434217676, "learning_rate": 5.071181718910283e-06, "loss": 0.5805, "step": 4008 }, { "epoch": 0.51, "grad_norm": 0.7235452608282076, "learning_rate": 5.069118614077996e-06, "loss": 0.4927, "step": 4009 }, { "epoch": 0.51, "grad_norm": 0.7483213653726687, "learning_rate": 5.067055497475529e-06, "loss": 0.5403, "step": 4010 }, { "epoch": 0.51, "grad_norm": 0.7078573562250018, "learning_rate": 5.064992369454209e-06, "loss": 0.4875, "step": 4011 }, { "epoch": 0.51, "grad_norm": 0.7502973305168704, "learning_rate": 5.062929230365367e-06, "loss": 0.5196, "step": 4012 }, { "epoch": 0.51, "grad_norm": 0.7040531099196715, "learning_rate": 5.060866080560332e-06, "loss": 0.545, "step": 4013 }, { "epoch": 0.51, "grad_norm": 0.7487422882476922, "learning_rate": 5.058802920390437e-06, "loss": 0.5427, "step": 4014 }, { "epoch": 0.51, "grad_norm": 0.6754838839546053, "learning_rate": 5.056739750207017e-06, "loss": 0.5902, "step": 4015 }, { "epoch": 0.51, "grad_norm": 0.7864563497390438, "learning_rate": 5.054676570361409e-06, "loss": 0.5393, "step": 4016 }, { "epoch": 0.51, "grad_norm": 0.7313864239976106, "learning_rate": 5.0526133812049505e-06, "loss": 0.5961, "step": 4017 }, { "epoch": 0.51, "grad_norm": 1.024762915717553, "learning_rate": 5.0505501830889814e-06, "loss": 0.6347, "step": 4018 }, { "epoch": 0.51, "grad_norm": 2.2535746641185765, "learning_rate": 5.048486976364843e-06, "loss": 0.6139, "step": 4019 }, { "epoch": 0.51, "grad_norm": 1.0739846441391503, "learning_rate": 5.046423761383878e-06, "loss": 0.6202, "step": 4020 }, { "epoch": 0.51, "grad_norm": 0.8000080541817809, "learning_rate": 5.0443605384974296e-06, "loss": 0.6333, "step": 4021 }, { "epoch": 0.51, "grad_norm": 0.6463793909051258, "learning_rate": 5.0422973080568435e-06, "loss": 0.5135, "step": 4022 }, { "epoch": 0.51, "grad_norm": 0.7829491687888811, "learning_rate": 5.040234070413465e-06, "loss": 0.6345, "step": 4023 }, { "epoch": 0.51, "grad_norm": 0.6191956525147597, "learning_rate": 5.038170825918646e-06, "loss": 0.5401, "step": 4024 }, { "epoch": 0.51, "grad_norm": 0.9346662196034498, "learning_rate": 5.036107574923732e-06, "loss": 0.6128, "step": 4025 }, { "epoch": 0.51, "grad_norm": 0.6952086486395497, "learning_rate": 5.034044317780074e-06, "loss": 0.5532, "step": 4026 }, { "epoch": 0.51, "grad_norm": 0.9625641551589321, "learning_rate": 5.031981054839025e-06, "loss": 0.6646, "step": 4027 }, { "epoch": 0.51, "grad_norm": 0.6335104692879268, "learning_rate": 5.029917786451933e-06, "loss": 0.5411, "step": 4028 }, { "epoch": 0.51, "grad_norm": 0.7783873207886681, "learning_rate": 5.027854512970156e-06, "loss": 0.5774, "step": 4029 }, { "epoch": 0.51, "grad_norm": 0.6910372647508887, "learning_rate": 5.025791234745045e-06, "loss": 0.5559, "step": 4030 }, { "epoch": 0.51, "grad_norm": 0.8510773266034817, "learning_rate": 5.023727952127955e-06, "loss": 0.6604, "step": 4031 }, { "epoch": 0.51, "grad_norm": 0.8000334262615656, "learning_rate": 5.021664665470242e-06, "loss": 0.662, "step": 4032 }, { "epoch": 0.51, "grad_norm": 0.7307104036133295, "learning_rate": 5.019601375123261e-06, "loss": 0.5373, "step": 4033 }, { "epoch": 0.51, "grad_norm": 0.83977828553352, "learning_rate": 5.017538081438371e-06, "loss": 0.6239, "step": 4034 }, { "epoch": 0.51, "grad_norm": 0.6962055082394998, "learning_rate": 5.0154747847669296e-06, "loss": 0.5451, "step": 4035 }, { "epoch": 0.51, "grad_norm": 0.6816986305335738, "learning_rate": 5.0134114854602915e-06, "loss": 0.57, "step": 4036 }, { "epoch": 0.51, "grad_norm": 0.7454360993146433, "learning_rate": 5.011348183869818e-06, "loss": 0.535, "step": 4037 }, { "epoch": 0.51, "grad_norm": 0.7795950484634164, "learning_rate": 5.009284880346867e-06, "loss": 0.5475, "step": 4038 }, { "epoch": 0.51, "grad_norm": 0.7770615466112519, "learning_rate": 5.007221575242798e-06, "loss": 0.624, "step": 4039 }, { "epoch": 0.51, "grad_norm": 0.7679775422861651, "learning_rate": 5.0051582689089695e-06, "loss": 0.5998, "step": 4040 }, { "epoch": 0.51, "grad_norm": 0.8314630627406452, "learning_rate": 5.003094961696742e-06, "loss": 0.5632, "step": 4041 }, { "epoch": 0.51, "grad_norm": 0.9268488565529598, "learning_rate": 5.001031653957475e-06, "loss": 0.6281, "step": 4042 }, { "epoch": 0.52, "grad_norm": 0.748857160195666, "learning_rate": 4.998968346042526e-06, "loss": 0.6174, "step": 4043 }, { "epoch": 0.52, "grad_norm": 0.8810042213745155, "learning_rate": 4.996905038303258e-06, "loss": 0.5827, "step": 4044 }, { "epoch": 0.52, "grad_norm": 0.7684608887343929, "learning_rate": 4.9948417310910305e-06, "loss": 0.5226, "step": 4045 }, { "epoch": 0.52, "grad_norm": 0.9062057746167201, "learning_rate": 4.992778424757203e-06, "loss": 0.6814, "step": 4046 }, { "epoch": 0.52, "grad_norm": 0.7155257495629138, "learning_rate": 4.990715119653133e-06, "loss": 0.5959, "step": 4047 }, { "epoch": 0.52, "grad_norm": 0.7682000075359732, "learning_rate": 4.988651816130183e-06, "loss": 0.543, "step": 4048 }, { "epoch": 0.52, "grad_norm": 0.7386597792514806, "learning_rate": 4.986588514539709e-06, "loss": 0.549, "step": 4049 }, { "epoch": 0.52, "grad_norm": 0.7158236979402866, "learning_rate": 4.984525215233073e-06, "loss": 0.527, "step": 4050 }, { "epoch": 0.52, "grad_norm": 0.7074382393112482, "learning_rate": 4.9824619185616306e-06, "loss": 0.5111, "step": 4051 }, { "epoch": 0.52, "grad_norm": 0.7774182121636132, "learning_rate": 4.980398624876739e-06, "loss": 0.5791, "step": 4052 }, { "epoch": 0.52, "grad_norm": 0.6800399341005464, "learning_rate": 4.978335334529759e-06, "loss": 0.5469, "step": 4053 }, { "epoch": 0.52, "grad_norm": 0.8545608272312367, "learning_rate": 4.976272047872046e-06, "loss": 0.5345, "step": 4054 }, { "epoch": 0.52, "grad_norm": 0.9342929609182672, "learning_rate": 4.974208765254956e-06, "loss": 0.6256, "step": 4055 }, { "epoch": 0.52, "grad_norm": 1.101260152172225, "learning_rate": 4.972145487029845e-06, "loss": 0.7008, "step": 4056 }, { "epoch": 0.52, "grad_norm": 0.739271931462951, "learning_rate": 4.9700822135480666e-06, "loss": 0.5344, "step": 4057 }, { "epoch": 0.52, "grad_norm": 0.6450279287802339, "learning_rate": 4.968018945160976e-06, "loss": 0.5501, "step": 4058 }, { "epoch": 0.52, "grad_norm": 0.6967132575754956, "learning_rate": 4.965955682219926e-06, "loss": 0.5568, "step": 4059 }, { "epoch": 0.52, "grad_norm": 0.6796344480069014, "learning_rate": 4.9638924250762685e-06, "loss": 0.5643, "step": 4060 }, { "epoch": 0.52, "grad_norm": 0.900453374746162, "learning_rate": 4.961829174081355e-06, "loss": 0.5704, "step": 4061 }, { "epoch": 0.52, "grad_norm": 0.7243762624036385, "learning_rate": 4.959765929586535e-06, "loss": 0.5977, "step": 4062 }, { "epoch": 0.52, "grad_norm": 0.8218576654450279, "learning_rate": 4.957702691943158e-06, "loss": 0.6586, "step": 4063 }, { "epoch": 0.52, "grad_norm": 0.6362813540723148, "learning_rate": 4.955639461502572e-06, "loss": 0.5356, "step": 4064 }, { "epoch": 0.52, "grad_norm": 0.8889743155565952, "learning_rate": 4.953576238616124e-06, "loss": 0.6222, "step": 4065 }, { "epoch": 0.52, "grad_norm": 0.9365538256952871, "learning_rate": 4.951513023635158e-06, "loss": 0.6536, "step": 4066 }, { "epoch": 0.52, "grad_norm": 0.8165656205255383, "learning_rate": 4.949449816911019e-06, "loss": 0.6798, "step": 4067 }, { "epoch": 0.52, "grad_norm": 0.6560026407121607, "learning_rate": 4.94738661879505e-06, "loss": 0.5158, "step": 4068 }, { "epoch": 0.52, "grad_norm": 0.6847872701073555, "learning_rate": 4.945323429638592e-06, "loss": 0.5346, "step": 4069 }, { "epoch": 0.52, "grad_norm": 0.7298798212031871, "learning_rate": 4.9432602497929845e-06, "loss": 0.5085, "step": 4070 }, { "epoch": 0.52, "grad_norm": 0.787514214704033, "learning_rate": 4.941197079609565e-06, "loss": 0.6202, "step": 4071 }, { "epoch": 0.52, "grad_norm": 0.7622857318901765, "learning_rate": 4.93913391943967e-06, "loss": 0.6352, "step": 4072 }, { "epoch": 0.52, "grad_norm": 0.8887235871144169, "learning_rate": 4.937070769634635e-06, "loss": 0.6245, "step": 4073 }, { "epoch": 0.52, "grad_norm": 0.752672774301807, "learning_rate": 4.935007630545793e-06, "loss": 0.5261, "step": 4074 }, { "epoch": 0.52, "grad_norm": 0.7244996374002073, "learning_rate": 4.932944502524472e-06, "loss": 0.5714, "step": 4075 }, { "epoch": 0.52, "grad_norm": 0.6936494046491752, "learning_rate": 4.930881385922006e-06, "loss": 0.5683, "step": 4076 }, { "epoch": 0.52, "grad_norm": 0.8723750629791313, "learning_rate": 4.928818281089719e-06, "loss": 0.6662, "step": 4077 }, { "epoch": 0.52, "grad_norm": 0.855739373273639, "learning_rate": 4.926755188378937e-06, "loss": 0.6208, "step": 4078 }, { "epoch": 0.52, "grad_norm": 0.7514287348334113, "learning_rate": 4.924692108140983e-06, "loss": 0.6109, "step": 4079 }, { "epoch": 0.52, "grad_norm": 0.7919236261826393, "learning_rate": 4.922629040727178e-06, "loss": 0.6223, "step": 4080 }, { "epoch": 0.52, "grad_norm": 0.6330712334112418, "learning_rate": 4.920565986488843e-06, "loss": 0.5371, "step": 4081 }, { "epoch": 0.52, "grad_norm": 0.6656315723042943, "learning_rate": 4.918502945777292e-06, "loss": 0.5302, "step": 4082 }, { "epoch": 0.52, "grad_norm": 0.8731822284215348, "learning_rate": 4.9164399189438406e-06, "loss": 0.6296, "step": 4083 }, { "epoch": 0.52, "grad_norm": 0.692620820443639, "learning_rate": 4.914376906339801e-06, "loss": 0.525, "step": 4084 }, { "epoch": 0.52, "grad_norm": 0.739448994586261, "learning_rate": 4.912313908316481e-06, "loss": 0.5613, "step": 4085 }, { "epoch": 0.52, "grad_norm": 0.6844504504137746, "learning_rate": 4.91025092522519e-06, "loss": 0.5387, "step": 4086 }, { "epoch": 0.52, "grad_norm": 0.7086653042114045, "learning_rate": 4.908187957417231e-06, "loss": 0.5402, "step": 4087 }, { "epoch": 0.52, "grad_norm": 1.2915150228772212, "learning_rate": 4.906125005243906e-06, "loss": 0.6434, "step": 4088 }, { "epoch": 0.52, "grad_norm": 1.0018152581955475, "learning_rate": 4.904062069056515e-06, "loss": 0.6857, "step": 4089 }, { "epoch": 0.52, "grad_norm": 0.8192208571986294, "learning_rate": 4.901999149206354e-06, "loss": 0.5908, "step": 4090 }, { "epoch": 0.52, "grad_norm": 0.7035363698070811, "learning_rate": 4.899936246044717e-06, "loss": 0.5796, "step": 4091 }, { "epoch": 0.52, "grad_norm": 0.6165506434615436, "learning_rate": 4.897873359922895e-06, "loss": 0.4763, "step": 4092 }, { "epoch": 0.52, "grad_norm": 0.8200177504226985, "learning_rate": 4.895810491192175e-06, "loss": 0.6052, "step": 4093 }, { "epoch": 0.52, "grad_norm": 0.8229489488822884, "learning_rate": 4.893747640203844e-06, "loss": 0.6062, "step": 4094 }, { "epoch": 0.52, "grad_norm": 0.652692222138843, "learning_rate": 4.891684807309183e-06, "loss": 0.5482, "step": 4095 }, { "epoch": 0.52, "grad_norm": 0.8795349466324024, "learning_rate": 4.88962199285947e-06, "loss": 0.6674, "step": 4096 }, { "epoch": 0.52, "grad_norm": 0.7052708115141737, "learning_rate": 4.887559197205982e-06, "loss": 0.5504, "step": 4097 }, { "epoch": 0.52, "grad_norm": 0.6918782353209547, "learning_rate": 4.8854964206999925e-06, "loss": 0.5698, "step": 4098 }, { "epoch": 0.52, "grad_norm": 0.7323308689131311, "learning_rate": 4.883433663692768e-06, "loss": 0.5483, "step": 4099 }, { "epoch": 0.52, "grad_norm": 0.7550902263089855, "learning_rate": 4.8813709265355766e-06, "loss": 0.5846, "step": 4100 }, { "epoch": 0.52, "grad_norm": 0.8750850486718925, "learning_rate": 4.879308209579681e-06, "loss": 0.6008, "step": 4101 }, { "epoch": 0.52, "grad_norm": 0.839526391064823, "learning_rate": 4.87724551317634e-06, "loss": 0.5571, "step": 4102 }, { "epoch": 0.52, "grad_norm": 0.7771312600937295, "learning_rate": 4.87518283767681e-06, "loss": 0.5953, "step": 4103 }, { "epoch": 0.52, "grad_norm": 0.8337602557345495, "learning_rate": 4.873120183432341e-06, "loss": 0.5363, "step": 4104 }, { "epoch": 0.52, "grad_norm": 0.9783892627739132, "learning_rate": 4.871057550794182e-06, "loss": 0.6017, "step": 4105 }, { "epoch": 0.52, "grad_norm": 0.6867520589318331, "learning_rate": 4.8689949401135805e-06, "loss": 0.5719, "step": 4106 }, { "epoch": 0.52, "grad_norm": 0.8579573019105908, "learning_rate": 4.866932351741776e-06, "loss": 0.5899, "step": 4107 }, { "epoch": 0.52, "grad_norm": 0.7553796620952964, "learning_rate": 4.8648697860300045e-06, "loss": 0.5752, "step": 4108 }, { "epoch": 0.52, "grad_norm": 0.6501571168851404, "learning_rate": 4.862807243329502e-06, "loss": 0.4775, "step": 4109 }, { "epoch": 0.52, "grad_norm": 0.7502625536494554, "learning_rate": 4.8607447239914966e-06, "loss": 0.4845, "step": 4110 }, { "epoch": 0.52, "grad_norm": 1.1260136222716974, "learning_rate": 4.858682228367213e-06, "loss": 0.642, "step": 4111 }, { "epoch": 0.52, "grad_norm": 0.9334970014842207, "learning_rate": 4.856619756807874e-06, "loss": 0.5651, "step": 4112 }, { "epoch": 0.52, "grad_norm": 0.8255293683964169, "learning_rate": 4.854557309664697e-06, "loss": 0.6214, "step": 4113 }, { "epoch": 0.52, "grad_norm": 0.6042962114920335, "learning_rate": 4.852494887288895e-06, "loss": 0.5141, "step": 4114 }, { "epoch": 0.52, "grad_norm": 0.9514881845680646, "learning_rate": 4.850432490031677e-06, "loss": 0.6715, "step": 4115 }, { "epoch": 0.52, "grad_norm": 1.1647363006443063, "learning_rate": 4.848370118244247e-06, "loss": 0.6361, "step": 4116 }, { "epoch": 0.52, "grad_norm": 0.7391372996014577, "learning_rate": 4.846307772277808e-06, "loss": 0.5305, "step": 4117 }, { "epoch": 0.52, "grad_norm": 0.8531997857899264, "learning_rate": 4.844245452483551e-06, "loss": 0.658, "step": 4118 }, { "epoch": 0.52, "grad_norm": 0.6608306684098285, "learning_rate": 4.842183159212673e-06, "loss": 0.5569, "step": 4119 }, { "epoch": 0.52, "grad_norm": 0.7316685742795973, "learning_rate": 4.840120892816359e-06, "loss": 0.5802, "step": 4120 }, { "epoch": 0.53, "grad_norm": 0.8848493996257852, "learning_rate": 4.838058653645791e-06, "loss": 0.6008, "step": 4121 }, { "epoch": 0.53, "grad_norm": 0.681842390475123, "learning_rate": 4.835996442052147e-06, "loss": 0.5788, "step": 4122 }, { "epoch": 0.53, "grad_norm": 0.7990535059080215, "learning_rate": 4.8339342583866e-06, "loss": 0.6447, "step": 4123 }, { "epoch": 0.53, "grad_norm": 0.9179589949333559, "learning_rate": 4.831872103000321e-06, "loss": 0.6416, "step": 4124 }, { "epoch": 0.53, "grad_norm": 0.6833154187757796, "learning_rate": 4.829809976244469e-06, "loss": 0.5572, "step": 4125 }, { "epoch": 0.53, "grad_norm": 0.6647438949110751, "learning_rate": 4.8277478784702056e-06, "loss": 0.56, "step": 4126 }, { "epoch": 0.53, "grad_norm": 0.9807225034051716, "learning_rate": 4.825685810028683e-06, "loss": 0.6181, "step": 4127 }, { "epoch": 0.53, "grad_norm": 0.808616393443888, "learning_rate": 4.823623771271053e-06, "loss": 0.6407, "step": 4128 }, { "epoch": 0.53, "grad_norm": 0.9570981678184455, "learning_rate": 4.821561762548456e-06, "loss": 0.6153, "step": 4129 }, { "epoch": 0.53, "grad_norm": 0.6445356911638195, "learning_rate": 4.819499784212032e-06, "loss": 0.5625, "step": 4130 }, { "epoch": 0.53, "grad_norm": 0.6331674292315457, "learning_rate": 4.817437836612914e-06, "loss": 0.5242, "step": 4131 }, { "epoch": 0.53, "grad_norm": 0.7534944480206083, "learning_rate": 4.8153759201022305e-06, "loss": 0.6274, "step": 4132 }, { "epoch": 0.53, "grad_norm": 0.8580250627691435, "learning_rate": 4.813314035031105e-06, "loss": 0.6101, "step": 4133 }, { "epoch": 0.53, "grad_norm": 0.7862684191226618, "learning_rate": 4.811252181750654e-06, "loss": 0.5187, "step": 4134 }, { "epoch": 0.53, "grad_norm": 0.6926710365606015, "learning_rate": 4.809190360611991e-06, "loss": 0.5261, "step": 4135 }, { "epoch": 0.53, "grad_norm": 0.8000754993874579, "learning_rate": 4.807128571966221e-06, "loss": 0.6078, "step": 4136 }, { "epoch": 0.53, "grad_norm": 0.8498234927990753, "learning_rate": 4.805066816164446e-06, "loss": 0.6682, "step": 4137 }, { "epoch": 0.53, "grad_norm": 0.6575167296419226, "learning_rate": 4.803005093557761e-06, "loss": 0.5142, "step": 4138 }, { "epoch": 0.53, "grad_norm": 0.7581219805452567, "learning_rate": 4.800943404497257e-06, "loss": 0.5524, "step": 4139 }, { "epoch": 0.53, "grad_norm": 0.8137812092162776, "learning_rate": 4.798881749334017e-06, "loss": 0.5301, "step": 4140 }, { "epoch": 0.53, "grad_norm": 0.7244622361576895, "learning_rate": 4.79682012841912e-06, "loss": 0.5535, "step": 4141 }, { "epoch": 0.53, "grad_norm": 0.6706564469193905, "learning_rate": 4.794758542103639e-06, "loss": 0.5473, "step": 4142 }, { "epoch": 0.53, "grad_norm": 0.9196743076637054, "learning_rate": 4.792696990738639e-06, "loss": 0.5995, "step": 4143 }, { "epoch": 0.53, "grad_norm": 1.0376612947006751, "learning_rate": 4.790635474675183e-06, "loss": 0.6848, "step": 4144 }, { "epoch": 0.53, "grad_norm": 0.7442264398578571, "learning_rate": 4.788573994264321e-06, "loss": 0.5235, "step": 4145 }, { "epoch": 0.53, "grad_norm": 0.7879620646661468, "learning_rate": 4.786512549857109e-06, "loss": 0.5406, "step": 4146 }, { "epoch": 0.53, "grad_norm": 0.7363211574087645, "learning_rate": 4.784451141804584e-06, "loss": 0.5993, "step": 4147 }, { "epoch": 0.53, "grad_norm": 0.6084256955526857, "learning_rate": 4.782389770457785e-06, "loss": 0.5174, "step": 4148 }, { "epoch": 0.53, "grad_norm": 0.8886147830023661, "learning_rate": 4.7803284361677405e-06, "loss": 0.6087, "step": 4149 }, { "epoch": 0.53, "grad_norm": 0.8320970327971176, "learning_rate": 4.778267139285474e-06, "loss": 0.6966, "step": 4150 }, { "epoch": 0.53, "grad_norm": 0.8453912392042926, "learning_rate": 4.776205880162005e-06, "loss": 0.6263, "step": 4151 }, { "epoch": 0.53, "grad_norm": 0.7209206546214638, "learning_rate": 4.774144659148343e-06, "loss": 0.5564, "step": 4152 }, { "epoch": 0.53, "grad_norm": 0.8315453701652941, "learning_rate": 4.772083476595492e-06, "loss": 0.5745, "step": 4153 }, { "epoch": 0.53, "grad_norm": 0.7994989416708732, "learning_rate": 4.770022332854451e-06, "loss": 0.636, "step": 4154 }, { "epoch": 0.53, "grad_norm": 0.6321210320547559, "learning_rate": 4.767961228276211e-06, "loss": 0.4907, "step": 4155 }, { "epoch": 0.53, "grad_norm": 0.7873716729677722, "learning_rate": 4.765900163211756e-06, "loss": 0.617, "step": 4156 }, { "epoch": 0.53, "grad_norm": 0.7058287296756292, "learning_rate": 4.763839138012065e-06, "loss": 0.5601, "step": 4157 }, { "epoch": 0.53, "grad_norm": 0.80190079540078, "learning_rate": 4.7617781530281085e-06, "loss": 0.6593, "step": 4158 }, { "epoch": 0.53, "grad_norm": 0.7599906031130926, "learning_rate": 4.75971720861085e-06, "loss": 0.5067, "step": 4159 }, { "epoch": 0.53, "grad_norm": 0.8914924312843188, "learning_rate": 4.757656305111248e-06, "loss": 0.6147, "step": 4160 }, { "epoch": 0.53, "grad_norm": 0.8646992925410405, "learning_rate": 4.755595442880253e-06, "loss": 0.6275, "step": 4161 }, { "epoch": 0.53, "grad_norm": 0.6821930676796342, "learning_rate": 4.753534622268807e-06, "loss": 0.5324, "step": 4162 }, { "epoch": 0.53, "grad_norm": 0.6402092434188056, "learning_rate": 4.7514738436278465e-06, "loss": 0.4989, "step": 4163 }, { "epoch": 0.53, "grad_norm": 0.8822096973442393, "learning_rate": 4.749413107308302e-06, "loss": 0.6481, "step": 4164 }, { "epoch": 0.53, "grad_norm": 0.8880714360486498, "learning_rate": 4.747352413661095e-06, "loss": 0.6724, "step": 4165 }, { "epoch": 0.53, "grad_norm": 0.5886914657939684, "learning_rate": 4.745291763037141e-06, "loss": 0.5034, "step": 4166 }, { "epoch": 0.53, "grad_norm": 0.7872696460328779, "learning_rate": 4.7432311557873444e-06, "loss": 0.6334, "step": 4167 }, { "epoch": 0.53, "grad_norm": 0.7871243209929952, "learning_rate": 4.741170592262608e-06, "loss": 0.6144, "step": 4168 }, { "epoch": 0.53, "grad_norm": 0.8299558773385124, "learning_rate": 4.739110072813823e-06, "loss": 0.6586, "step": 4169 }, { "epoch": 0.53, "grad_norm": 0.7560711007689411, "learning_rate": 4.737049597791875e-06, "loss": 0.6227, "step": 4170 }, { "epoch": 0.53, "grad_norm": 0.7621097540827885, "learning_rate": 4.7349891675476404e-06, "loss": 0.6528, "step": 4171 }, { "epoch": 0.53, "grad_norm": 0.7226277006442361, "learning_rate": 4.732928782431991e-06, "loss": 0.5208, "step": 4172 }, { "epoch": 0.53, "grad_norm": 0.7703314032836279, "learning_rate": 4.730868442795786e-06, "loss": 0.671, "step": 4173 }, { "epoch": 0.53, "grad_norm": 0.7643178458922305, "learning_rate": 4.728808148989882e-06, "loss": 0.6007, "step": 4174 }, { "epoch": 0.53, "grad_norm": 0.7069434530417544, "learning_rate": 4.726747901365125e-06, "loss": 0.5177, "step": 4175 }, { "epoch": 0.53, "grad_norm": 0.8840323008373085, "learning_rate": 4.724687700272355e-06, "loss": 0.6241, "step": 4176 }, { "epoch": 0.53, "grad_norm": 0.8524444547142161, "learning_rate": 4.7226275460623985e-06, "loss": 0.6308, "step": 4177 }, { "epoch": 0.53, "grad_norm": 0.7282595187109793, "learning_rate": 4.7205674390860834e-06, "loss": 0.5175, "step": 4178 }, { "epoch": 0.53, "grad_norm": 0.7263627008817285, "learning_rate": 4.718507379694222e-06, "loss": 0.534, "step": 4179 }, { "epoch": 0.53, "grad_norm": 0.6248345499944433, "learning_rate": 4.716447368237621e-06, "loss": 0.5183, "step": 4180 }, { "epoch": 0.53, "grad_norm": 0.6897538293194313, "learning_rate": 4.71438740506708e-06, "loss": 0.5253, "step": 4181 }, { "epoch": 0.53, "grad_norm": 0.8227693698540856, "learning_rate": 4.712327490533387e-06, "loss": 0.5827, "step": 4182 }, { "epoch": 0.53, "grad_norm": 0.6922792568525376, "learning_rate": 4.710267624987326e-06, "loss": 0.5837, "step": 4183 }, { "epoch": 0.53, "grad_norm": 0.9211946554064792, "learning_rate": 4.7082078087796706e-06, "loss": 0.6284, "step": 4184 }, { "epoch": 0.53, "grad_norm": 0.7742322407013416, "learning_rate": 4.706148042261185e-06, "loss": 0.5746, "step": 4185 }, { "epoch": 0.53, "grad_norm": 0.6622490015545, "learning_rate": 4.7040883257826266e-06, "loss": 0.5731, "step": 4186 }, { "epoch": 0.53, "grad_norm": 0.7216346112681995, "learning_rate": 4.702028659694744e-06, "loss": 0.6083, "step": 4187 }, { "epoch": 0.53, "grad_norm": 0.6494127389970505, "learning_rate": 4.699969044348276e-06, "loss": 0.5231, "step": 4188 }, { "epoch": 0.53, "grad_norm": 0.7268336785111906, "learning_rate": 4.697909480093954e-06, "loss": 0.5199, "step": 4189 }, { "epoch": 0.53, "grad_norm": 0.6600914464233625, "learning_rate": 4.6958499672824996e-06, "loss": 0.5412, "step": 4190 }, { "epoch": 0.53, "grad_norm": 0.9927621903345056, "learning_rate": 4.693790506264627e-06, "loss": 0.6956, "step": 4191 }, { "epoch": 0.53, "grad_norm": 0.9086227675597718, "learning_rate": 4.691731097391043e-06, "loss": 0.6723, "step": 4192 }, { "epoch": 0.53, "grad_norm": 0.8441032132839217, "learning_rate": 4.6896717410124404e-06, "loss": 0.6084, "step": 4193 }, { "epoch": 0.53, "grad_norm": 0.7710085844145217, "learning_rate": 4.687612437479508e-06, "loss": 0.5634, "step": 4194 }, { "epoch": 0.53, "grad_norm": 0.9517107933846263, "learning_rate": 4.685553187142923e-06, "loss": 0.6268, "step": 4195 }, { "epoch": 0.53, "grad_norm": 0.9225947092171035, "learning_rate": 4.683493990353354e-06, "loss": 0.6226, "step": 4196 }, { "epoch": 0.53, "grad_norm": 0.7977823772216441, "learning_rate": 4.681434847461461e-06, "loss": 0.6272, "step": 4197 }, { "epoch": 0.53, "grad_norm": 0.6397954052173853, "learning_rate": 4.679375758817894e-06, "loss": 0.53, "step": 4198 }, { "epoch": 0.53, "grad_norm": 0.758027755916074, "learning_rate": 4.677316724773296e-06, "loss": 0.5419, "step": 4199 }, { "epoch": 0.54, "grad_norm": 0.805995659523113, "learning_rate": 4.675257745678298e-06, "loss": 0.5214, "step": 4200 }, { "epoch": 0.54, "grad_norm": 0.7623516212334143, "learning_rate": 4.673198821883523e-06, "loss": 0.5943, "step": 4201 }, { "epoch": 0.54, "grad_norm": 0.6844731682817229, "learning_rate": 4.671139953739584e-06, "loss": 0.5775, "step": 4202 }, { "epoch": 0.54, "grad_norm": 0.6400697253229971, "learning_rate": 4.669081141597083e-06, "loss": 0.538, "step": 4203 }, { "epoch": 0.54, "grad_norm": 0.6232927825322789, "learning_rate": 4.667022385806618e-06, "loss": 0.5355, "step": 4204 }, { "epoch": 0.54, "grad_norm": 0.8829371692400296, "learning_rate": 4.664963686718772e-06, "loss": 0.5787, "step": 4205 }, { "epoch": 0.54, "grad_norm": 0.7526636968478414, "learning_rate": 4.662905044684119e-06, "loss": 0.5598, "step": 4206 }, { "epoch": 0.54, "grad_norm": 0.8491422713028841, "learning_rate": 4.660846460053225e-06, "loss": 0.5434, "step": 4207 }, { "epoch": 0.54, "grad_norm": 0.7709862400718848, "learning_rate": 4.6587879331766465e-06, "loss": 0.5639, "step": 4208 }, { "epoch": 0.54, "grad_norm": 0.9292947724551804, "learning_rate": 4.6567294644049275e-06, "loss": 0.5923, "step": 4209 }, { "epoch": 0.54, "grad_norm": 0.6984239894360695, "learning_rate": 4.654671054088604e-06, "loss": 0.568, "step": 4210 }, { "epoch": 0.54, "grad_norm": 0.5855105801908731, "learning_rate": 4.652612702578203e-06, "loss": 0.5205, "step": 4211 }, { "epoch": 0.54, "grad_norm": 0.975030417128812, "learning_rate": 4.65055441022424e-06, "loss": 0.6098, "step": 4212 }, { "epoch": 0.54, "grad_norm": 0.712596075126336, "learning_rate": 4.648496177377221e-06, "loss": 0.6229, "step": 4213 }, { "epoch": 0.54, "grad_norm": 0.6613009493405987, "learning_rate": 4.64643800438764e-06, "loss": 0.4978, "step": 4214 }, { "epoch": 0.54, "grad_norm": 0.6615734185842508, "learning_rate": 4.644379891605983e-06, "loss": 0.5352, "step": 4215 }, { "epoch": 0.54, "grad_norm": 1.0248420183593918, "learning_rate": 4.642321839382726e-06, "loss": 0.586, "step": 4216 }, { "epoch": 0.54, "grad_norm": 0.6693688003932176, "learning_rate": 4.640263848068333e-06, "loss": 0.5078, "step": 4217 }, { "epoch": 0.54, "grad_norm": 1.0003929349676586, "learning_rate": 4.63820591801326e-06, "loss": 0.5979, "step": 4218 }, { "epoch": 0.54, "grad_norm": 0.6612912943875484, "learning_rate": 4.63614804956795e-06, "loss": 0.5316, "step": 4219 }, { "epoch": 0.54, "grad_norm": 0.6997957341277706, "learning_rate": 4.634090243082837e-06, "loss": 0.5513, "step": 4220 }, { "epoch": 0.54, "grad_norm": 0.7981191881569423, "learning_rate": 4.632032498908343e-06, "loss": 0.5346, "step": 4221 }, { "epoch": 0.54, "grad_norm": 0.7398219267470332, "learning_rate": 4.62997481739488e-06, "loss": 0.6506, "step": 4222 }, { "epoch": 0.54, "grad_norm": 0.6775551987599228, "learning_rate": 4.627917198892852e-06, "loss": 0.5305, "step": 4223 }, { "epoch": 0.54, "grad_norm": 0.7484709186870059, "learning_rate": 4.625859643752648e-06, "loss": 0.5805, "step": 4224 }, { "epoch": 0.54, "grad_norm": 0.663959302220853, "learning_rate": 4.62380215232465e-06, "loss": 0.5636, "step": 4225 }, { "epoch": 0.54, "grad_norm": 0.8098130906079022, "learning_rate": 4.621744724959225e-06, "loss": 0.5691, "step": 4226 }, { "epoch": 0.54, "grad_norm": 0.8157104117008908, "learning_rate": 4.619687362006734e-06, "loss": 0.6222, "step": 4227 }, { "epoch": 0.54, "grad_norm": 0.7426790421916588, "learning_rate": 4.617630063817522e-06, "loss": 0.4971, "step": 4228 }, { "epoch": 0.54, "grad_norm": 0.7699024957101612, "learning_rate": 4.615572830741926e-06, "loss": 0.5253, "step": 4229 }, { "epoch": 0.54, "grad_norm": 0.8602413979088466, "learning_rate": 4.613515663130273e-06, "loss": 0.5974, "step": 4230 }, { "epoch": 0.54, "grad_norm": 0.6950353640061578, "learning_rate": 4.611458561332876e-06, "loss": 0.5366, "step": 4231 }, { "epoch": 0.54, "grad_norm": 0.877582513364586, "learning_rate": 4.609401525700039e-06, "loss": 0.5723, "step": 4232 }, { "epoch": 0.54, "grad_norm": 0.8956010969356248, "learning_rate": 4.607344556582052e-06, "loss": 0.6101, "step": 4233 }, { "epoch": 0.54, "grad_norm": 0.7394549044676947, "learning_rate": 4.605287654329197e-06, "loss": 0.5248, "step": 4234 }, { "epoch": 0.54, "grad_norm": 0.8387306343856894, "learning_rate": 4.603230819291742e-06, "loss": 0.5788, "step": 4235 }, { "epoch": 0.54, "grad_norm": 0.7233643904928372, "learning_rate": 4.601174051819945e-06, "loss": 0.5437, "step": 4236 }, { "epoch": 0.54, "grad_norm": 0.7533710005842715, "learning_rate": 4.599117352264052e-06, "loss": 0.5077, "step": 4237 }, { "epoch": 0.54, "grad_norm": 0.7965672752490813, "learning_rate": 4.597060720974298e-06, "loss": 0.5525, "step": 4238 }, { "epoch": 0.54, "grad_norm": 0.70387357182827, "learning_rate": 4.595004158300904e-06, "loss": 0.5491, "step": 4239 }, { "epoch": 0.54, "grad_norm": 0.8188663480296382, "learning_rate": 4.592947664594085e-06, "loss": 0.68, "step": 4240 }, { "epoch": 0.54, "grad_norm": 0.8396903003478393, "learning_rate": 4.590891240204037e-06, "loss": 0.6159, "step": 4241 }, { "epoch": 0.54, "grad_norm": 0.968591130232146, "learning_rate": 4.588834885480946e-06, "loss": 0.6472, "step": 4242 }, { "epoch": 0.54, "grad_norm": 0.8045201236616198, "learning_rate": 4.586778600774994e-06, "loss": 0.5603, "step": 4243 }, { "epoch": 0.54, "grad_norm": 0.6976429329288849, "learning_rate": 4.5847223864363406e-06, "loss": 0.5553, "step": 4244 }, { "epoch": 0.54, "grad_norm": 0.8433699324387849, "learning_rate": 4.582666242815138e-06, "loss": 0.6371, "step": 4245 }, { "epoch": 0.54, "grad_norm": 0.7927997737236845, "learning_rate": 4.580610170261526e-06, "loss": 0.6021, "step": 4246 }, { "epoch": 0.54, "grad_norm": 0.7848123587632546, "learning_rate": 4.578554169125633e-06, "loss": 0.6351, "step": 4247 }, { "epoch": 0.54, "grad_norm": 0.8202415591065428, "learning_rate": 4.576498239757574e-06, "loss": 0.6087, "step": 4248 }, { "epoch": 0.54, "grad_norm": 0.6976202333638307, "learning_rate": 4.5744423825074545e-06, "loss": 0.5412, "step": 4249 }, { "epoch": 0.54, "grad_norm": 0.8654372934442717, "learning_rate": 4.572386597725363e-06, "loss": 0.6182, "step": 4250 }, { "epoch": 0.54, "grad_norm": 0.7821929778206282, "learning_rate": 4.5703308857613795e-06, "loss": 0.549, "step": 4251 }, { "epoch": 0.54, "grad_norm": 0.8559460081716068, "learning_rate": 4.568275246965569e-06, "loss": 0.6377, "step": 4252 }, { "epoch": 0.54, "grad_norm": 0.6575443589344072, "learning_rate": 4.5662196816879866e-06, "loss": 0.535, "step": 4253 }, { "epoch": 0.54, "grad_norm": 0.6187164243379435, "learning_rate": 4.564164190278674e-06, "loss": 0.5234, "step": 4254 }, { "epoch": 0.54, "grad_norm": 0.7225818388747259, "learning_rate": 4.562108773087659e-06, "loss": 0.5264, "step": 4255 }, { "epoch": 0.54, "grad_norm": 0.6340733376867966, "learning_rate": 4.560053430464958e-06, "loss": 0.5075, "step": 4256 }, { "epoch": 0.54, "grad_norm": 0.7316111365959566, "learning_rate": 4.557998162760574e-06, "loss": 0.5818, "step": 4257 }, { "epoch": 0.54, "grad_norm": 0.7981144418366527, "learning_rate": 4.555942970324498e-06, "loss": 0.5854, "step": 4258 }, { "epoch": 0.54, "grad_norm": 0.7182520923699315, "learning_rate": 4.553887853506709e-06, "loss": 0.5827, "step": 4259 }, { "epoch": 0.54, "grad_norm": 0.6450647639668683, "learning_rate": 4.551832812657171e-06, "loss": 0.5216, "step": 4260 }, { "epoch": 0.54, "grad_norm": 0.641867659700773, "learning_rate": 4.549777848125834e-06, "loss": 0.5416, "step": 4261 }, { "epoch": 0.54, "grad_norm": 0.9670865181996074, "learning_rate": 4.54772296026264e-06, "loss": 0.6679, "step": 4262 }, { "epoch": 0.54, "grad_norm": 0.684732296961073, "learning_rate": 4.545668149417515e-06, "loss": 0.56, "step": 4263 }, { "epoch": 0.54, "grad_norm": 0.6449500738330984, "learning_rate": 4.543613415940371e-06, "loss": 0.5345, "step": 4264 }, { "epoch": 0.54, "grad_norm": 1.1918378576783348, "learning_rate": 4.541558760181108e-06, "loss": 0.6161, "step": 4265 }, { "epoch": 0.54, "grad_norm": 0.7378955636160488, "learning_rate": 4.53950418248961e-06, "loss": 0.5991, "step": 4266 }, { "epoch": 0.54, "grad_norm": 0.7680941347583717, "learning_rate": 4.537449683215754e-06, "loss": 0.5975, "step": 4267 }, { "epoch": 0.54, "grad_norm": 0.871898713417721, "learning_rate": 4.535395262709398e-06, "loss": 0.6081, "step": 4268 }, { "epoch": 0.54, "grad_norm": 0.8247345272844328, "learning_rate": 4.5333409213203885e-06, "loss": 0.6472, "step": 4269 }, { "epoch": 0.54, "grad_norm": 0.9495176533479497, "learning_rate": 4.531286659398558e-06, "loss": 0.6238, "step": 4270 }, { "epoch": 0.54, "grad_norm": 0.6644655768280376, "learning_rate": 4.529232477293727e-06, "loss": 0.5453, "step": 4271 }, { "epoch": 0.54, "grad_norm": 0.7292651735979029, "learning_rate": 4.527178375355701e-06, "loss": 0.5272, "step": 4272 }, { "epoch": 0.54, "grad_norm": 0.8402002434211814, "learning_rate": 4.525124353934271e-06, "loss": 0.5777, "step": 4273 }, { "epoch": 0.54, "grad_norm": 0.8825379302849763, "learning_rate": 4.523070413379216e-06, "loss": 0.6504, "step": 4274 }, { "epoch": 0.54, "grad_norm": 0.8167405971518923, "learning_rate": 4.521016554040301e-06, "loss": 0.6795, "step": 4275 }, { "epoch": 0.54, "grad_norm": 0.8976887544850193, "learning_rate": 4.518962776267278e-06, "loss": 0.6136, "step": 4276 }, { "epoch": 0.54, "grad_norm": 0.8124866265258778, "learning_rate": 4.516909080409882e-06, "loss": 0.6481, "step": 4277 }, { "epoch": 0.55, "grad_norm": 0.6936794794611817, "learning_rate": 4.514855466817837e-06, "loss": 0.5548, "step": 4278 }, { "epoch": 0.55, "grad_norm": 0.7250953358677796, "learning_rate": 4.512801935840852e-06, "loss": 0.527, "step": 4279 }, { "epoch": 0.55, "grad_norm": 0.7268198807137963, "learning_rate": 4.510748487828621e-06, "loss": 0.5515, "step": 4280 }, { "epoch": 0.55, "grad_norm": 0.6980211816613745, "learning_rate": 4.508695123130827e-06, "loss": 0.5493, "step": 4281 }, { "epoch": 0.55, "grad_norm": 0.7026408716876937, "learning_rate": 4.506641842097134e-06, "loss": 0.5328, "step": 4282 }, { "epoch": 0.55, "grad_norm": 0.8673192725498058, "learning_rate": 4.5045886450771954e-06, "loss": 0.6134, "step": 4283 }, { "epoch": 0.55, "grad_norm": 0.8659812536920948, "learning_rate": 4.5025355324206495e-06, "loss": 0.6315, "step": 4284 }, { "epoch": 0.55, "grad_norm": 0.8689227197290147, "learning_rate": 4.5004825044771205e-06, "loss": 0.6875, "step": 4285 }, { "epoch": 0.55, "grad_norm": 0.6850586781361362, "learning_rate": 4.498429561596217e-06, "loss": 0.5257, "step": 4286 }, { "epoch": 0.55, "grad_norm": 0.837268635347408, "learning_rate": 4.4963767041275336e-06, "loss": 0.6089, "step": 4287 }, { "epoch": 0.55, "grad_norm": 0.835248129339369, "learning_rate": 4.49432393242065e-06, "loss": 0.6253, "step": 4288 }, { "epoch": 0.55, "grad_norm": 0.8530761989285995, "learning_rate": 4.492271246825133e-06, "loss": 0.6113, "step": 4289 }, { "epoch": 0.55, "grad_norm": 0.8664589337880827, "learning_rate": 4.490218647690534e-06, "loss": 0.6804, "step": 4290 }, { "epoch": 0.55, "grad_norm": 0.8726275259486077, "learning_rate": 4.488166135366388e-06, "loss": 0.6084, "step": 4291 }, { "epoch": 0.55, "grad_norm": 0.7980753409698471, "learning_rate": 4.4861137102022165e-06, "loss": 0.4971, "step": 4292 }, { "epoch": 0.55, "grad_norm": 0.8014274827688181, "learning_rate": 4.484061372547527e-06, "loss": 0.6543, "step": 4293 }, { "epoch": 0.55, "grad_norm": 0.892402177590191, "learning_rate": 4.482009122751811e-06, "loss": 0.6047, "step": 4294 }, { "epoch": 0.55, "grad_norm": 0.9968851672742536, "learning_rate": 4.479956961164544e-06, "loss": 0.6578, "step": 4295 }, { "epoch": 0.55, "grad_norm": 0.631712436125498, "learning_rate": 4.47790488813519e-06, "loss": 0.5045, "step": 4296 }, { "epoch": 0.55, "grad_norm": 0.7964008122827447, "learning_rate": 4.4758529040131934e-06, "loss": 0.6333, "step": 4297 }, { "epoch": 0.55, "grad_norm": 0.6713894780671378, "learning_rate": 4.473801009147987e-06, "loss": 0.5069, "step": 4298 }, { "epoch": 0.55, "grad_norm": 0.7975667380015566, "learning_rate": 4.471749203888987e-06, "loss": 0.5779, "step": 4299 }, { "epoch": 0.55, "grad_norm": 0.8390345092664563, "learning_rate": 4.4696974885855945e-06, "loss": 0.6623, "step": 4300 }, { "epoch": 0.55, "grad_norm": 0.6421372468223091, "learning_rate": 4.467645863587193e-06, "loss": 0.4968, "step": 4301 }, { "epoch": 0.55, "grad_norm": 0.8953854383897983, "learning_rate": 4.465594329243157e-06, "loss": 0.6599, "step": 4302 }, { "epoch": 0.55, "grad_norm": 0.8752159435031448, "learning_rate": 4.463542885902839e-06, "loss": 0.6304, "step": 4303 }, { "epoch": 0.55, "grad_norm": 0.7839989703639463, "learning_rate": 4.461491533915579e-06, "loss": 0.5016, "step": 4304 }, { "epoch": 0.55, "grad_norm": 0.665687292643882, "learning_rate": 4.4594402736307e-06, "loss": 0.4915, "step": 4305 }, { "epoch": 0.55, "grad_norm": 0.7011905377440351, "learning_rate": 4.457389105397511e-06, "loss": 0.5429, "step": 4306 }, { "epoch": 0.55, "grad_norm": 0.8399208018122137, "learning_rate": 4.455338029565306e-06, "loss": 0.6619, "step": 4307 }, { "epoch": 0.55, "grad_norm": 0.8548219064805802, "learning_rate": 4.4532870464833585e-06, "loss": 0.6475, "step": 4308 }, { "epoch": 0.55, "grad_norm": 0.6642099735601585, "learning_rate": 4.451236156500932e-06, "loss": 0.5323, "step": 4309 }, { "epoch": 0.55, "grad_norm": 0.8238343367257817, "learning_rate": 4.449185359967271e-06, "loss": 0.7038, "step": 4310 }, { "epoch": 0.55, "grad_norm": 0.7450769461965905, "learning_rate": 4.447134657231605e-06, "loss": 0.5496, "step": 4311 }, { "epoch": 0.55, "grad_norm": 0.7836224243294251, "learning_rate": 4.445084048643147e-06, "loss": 0.5544, "step": 4312 }, { "epoch": 0.55, "grad_norm": 0.8155307267419714, "learning_rate": 4.443033534551094e-06, "loss": 0.6542, "step": 4313 }, { "epoch": 0.55, "grad_norm": 0.7207284168490987, "learning_rate": 4.440983115304627e-06, "loss": 0.5948, "step": 4314 }, { "epoch": 0.55, "grad_norm": 0.9337629737871321, "learning_rate": 4.438932791252913e-06, "loss": 0.7644, "step": 4315 }, { "epoch": 0.55, "grad_norm": 0.9049565928982354, "learning_rate": 4.436882562745099e-06, "loss": 0.6371, "step": 4316 }, { "epoch": 0.55, "grad_norm": 0.8313283436625327, "learning_rate": 4.434832430130319e-06, "loss": 0.6796, "step": 4317 }, { "epoch": 0.55, "grad_norm": 0.872529747207513, "learning_rate": 4.432782393757687e-06, "loss": 0.6456, "step": 4318 }, { "epoch": 0.55, "grad_norm": 0.898426694309625, "learning_rate": 4.430732453976305e-06, "loss": 0.594, "step": 4319 }, { "epoch": 0.55, "grad_norm": 0.7241863911057442, "learning_rate": 4.428682611135255e-06, "loss": 0.6491, "step": 4320 }, { "epoch": 0.55, "grad_norm": 0.6795563495998798, "learning_rate": 4.426632865583605e-06, "loss": 0.5932, "step": 4321 }, { "epoch": 0.55, "grad_norm": 0.8702003187395987, "learning_rate": 4.424583217670405e-06, "loss": 0.6853, "step": 4322 }, { "epoch": 0.55, "grad_norm": 0.819095505720782, "learning_rate": 4.4225336677446875e-06, "loss": 0.5969, "step": 4323 }, { "epoch": 0.55, "grad_norm": 0.7352696441835315, "learning_rate": 4.42048421615547e-06, "loss": 0.6246, "step": 4324 }, { "epoch": 0.55, "grad_norm": 0.7780170497139071, "learning_rate": 4.418434863251754e-06, "loss": 0.5706, "step": 4325 }, { "epoch": 0.55, "grad_norm": 0.9237510905134435, "learning_rate": 4.416385609382522e-06, "loss": 0.6308, "step": 4326 }, { "epoch": 0.55, "grad_norm": 0.7634558482389361, "learning_rate": 4.414336454896739e-06, "loss": 0.6874, "step": 4327 }, { "epoch": 0.55, "grad_norm": 0.8966712398318945, "learning_rate": 4.412287400143358e-06, "loss": 0.5595, "step": 4328 }, { "epoch": 0.55, "grad_norm": 0.8721660430327461, "learning_rate": 4.41023844547131e-06, "loss": 0.7074, "step": 4329 }, { "epoch": 0.55, "grad_norm": 0.798209988546998, "learning_rate": 4.40818959122951e-06, "loss": 0.6548, "step": 4330 }, { "epoch": 0.55, "grad_norm": 0.6215329889662746, "learning_rate": 4.406140837766858e-06, "loss": 0.5668, "step": 4331 }, { "epoch": 0.55, "grad_norm": 0.7887135662123005, "learning_rate": 4.404092185432234e-06, "loss": 0.639, "step": 4332 }, { "epoch": 0.55, "grad_norm": 1.1032231106437858, "learning_rate": 4.4020436345745035e-06, "loss": 0.6213, "step": 4333 }, { "epoch": 0.55, "grad_norm": 0.6681236292645945, "learning_rate": 4.399995185542512e-06, "loss": 0.5561, "step": 4334 }, { "epoch": 0.55, "grad_norm": 0.5892457216553397, "learning_rate": 4.3979468386850884e-06, "loss": 0.5103, "step": 4335 }, { "epoch": 0.55, "grad_norm": 0.7951539859174098, "learning_rate": 4.395898594351048e-06, "loss": 0.6113, "step": 4336 }, { "epoch": 0.55, "grad_norm": 0.9406727358474832, "learning_rate": 4.393850452889182e-06, "loss": 0.6753, "step": 4337 }, { "epoch": 0.55, "grad_norm": 0.9938822358645597, "learning_rate": 4.39180241464827e-06, "loss": 0.7064, "step": 4338 }, { "epoch": 0.55, "grad_norm": 0.6620203618799757, "learning_rate": 4.389754479977071e-06, "loss": 0.5229, "step": 4339 }, { "epoch": 0.55, "grad_norm": 0.8543077631730749, "learning_rate": 4.387706649224324e-06, "loss": 0.6289, "step": 4340 }, { "epoch": 0.55, "grad_norm": 0.6652160346965879, "learning_rate": 4.385658922738758e-06, "loss": 0.5096, "step": 4341 }, { "epoch": 0.55, "grad_norm": 0.7520137932327667, "learning_rate": 4.3836113008690776e-06, "loss": 0.6489, "step": 4342 }, { "epoch": 0.55, "grad_norm": 0.8398007389218389, "learning_rate": 4.38156378396397e-06, "loss": 0.6537, "step": 4343 }, { "epoch": 0.55, "grad_norm": 0.7955293278487024, "learning_rate": 4.379516372372108e-06, "loss": 0.6084, "step": 4344 }, { "epoch": 0.55, "grad_norm": 0.8455922624241003, "learning_rate": 4.377469066442143e-06, "loss": 0.6375, "step": 4345 }, { "epoch": 0.55, "grad_norm": 0.8594604085067442, "learning_rate": 4.375421866522711e-06, "loss": 0.5891, "step": 4346 }, { "epoch": 0.55, "grad_norm": 0.6950798836528627, "learning_rate": 4.373374772962428e-06, "loss": 0.556, "step": 4347 }, { "epoch": 0.55, "grad_norm": 0.7154165337187443, "learning_rate": 4.371327786109891e-06, "loss": 0.5628, "step": 4348 }, { "epoch": 0.55, "grad_norm": 0.663563750111381, "learning_rate": 4.369280906313686e-06, "loss": 0.4827, "step": 4349 }, { "epoch": 0.55, "grad_norm": 0.7803592721361096, "learning_rate": 4.36723413392237e-06, "loss": 0.5781, "step": 4350 }, { "epoch": 0.55, "grad_norm": 0.899832149414895, "learning_rate": 4.36518746928449e-06, "loss": 0.6147, "step": 4351 }, { "epoch": 0.55, "grad_norm": 0.9273090795043808, "learning_rate": 4.3631409127485696e-06, "loss": 0.6419, "step": 4352 }, { "epoch": 0.55, "grad_norm": 0.8563376047287822, "learning_rate": 4.361094464663118e-06, "loss": 0.6328, "step": 4353 }, { "epoch": 0.55, "grad_norm": 2.6377745341816885, "learning_rate": 4.359048125376622e-06, "loss": 0.6369, "step": 4354 }, { "epoch": 0.55, "grad_norm": 0.7840415007860636, "learning_rate": 4.3570018952375535e-06, "loss": 0.5988, "step": 4355 }, { "epoch": 0.55, "grad_norm": 0.631625727697405, "learning_rate": 4.3549557745943635e-06, "loss": 0.4872, "step": 4356 }, { "epoch": 0.56, "grad_norm": 0.683577231303051, "learning_rate": 4.352909763795485e-06, "loss": 0.5207, "step": 4357 }, { "epoch": 0.56, "grad_norm": 0.8484486050390335, "learning_rate": 4.350863863189333e-06, "loss": 0.6315, "step": 4358 }, { "epoch": 0.56, "grad_norm": 0.7162493208386852, "learning_rate": 4.3488180731243e-06, "loss": 0.5671, "step": 4359 }, { "epoch": 0.56, "grad_norm": 0.8012950055965488, "learning_rate": 4.346772393948767e-06, "loss": 0.5329, "step": 4360 }, { "epoch": 0.56, "grad_norm": 0.7140012722654178, "learning_rate": 4.34472682601109e-06, "loss": 0.5461, "step": 4361 }, { "epoch": 0.56, "grad_norm": 0.773054766575202, "learning_rate": 4.342681369659608e-06, "loss": 0.5389, "step": 4362 }, { "epoch": 0.56, "grad_norm": 0.8367063710005508, "learning_rate": 4.340636025242641e-06, "loss": 0.6522, "step": 4363 }, { "epoch": 0.56, "grad_norm": 0.9129576504524181, "learning_rate": 4.33859079310849e-06, "loss": 0.5837, "step": 4364 }, { "epoch": 0.56, "grad_norm": 0.8391651759626918, "learning_rate": 4.336545673605435e-06, "loss": 0.5885, "step": 4365 }, { "epoch": 0.56, "grad_norm": 0.9764985524117383, "learning_rate": 4.334500667081742e-06, "loss": 0.6235, "step": 4366 }, { "epoch": 0.56, "grad_norm": 0.999775309604442, "learning_rate": 4.332455773885651e-06, "loss": 0.6399, "step": 4367 }, { "epoch": 0.56, "grad_norm": 0.7513709360270177, "learning_rate": 4.330410994365388e-06, "loss": 0.6117, "step": 4368 }, { "epoch": 0.56, "grad_norm": 0.6964864276009453, "learning_rate": 4.3283663288691555e-06, "loss": 0.5478, "step": 4369 }, { "epoch": 0.56, "grad_norm": 0.8829931466511156, "learning_rate": 4.326321777745142e-06, "loss": 0.6725, "step": 4370 }, { "epoch": 0.56, "grad_norm": 0.8564211092020534, "learning_rate": 4.32427734134151e-06, "loss": 0.6497, "step": 4371 }, { "epoch": 0.56, "grad_norm": 0.8081600591293844, "learning_rate": 4.322233020006407e-06, "loss": 0.498, "step": 4372 }, { "epoch": 0.56, "grad_norm": 0.7116148782644764, "learning_rate": 4.32018881408796e-06, "loss": 0.5756, "step": 4373 }, { "epoch": 0.56, "grad_norm": 0.7435320597155147, "learning_rate": 4.318144723934276e-06, "loss": 0.5371, "step": 4374 }, { "epoch": 0.56, "grad_norm": 0.831035640799675, "learning_rate": 4.316100749893443e-06, "loss": 0.6098, "step": 4375 }, { "epoch": 0.56, "grad_norm": 0.822788003880369, "learning_rate": 4.3140568923135265e-06, "loss": 0.6174, "step": 4376 }, { "epoch": 0.56, "grad_norm": 0.7033814927788082, "learning_rate": 4.312013151542575e-06, "loss": 0.557, "step": 4377 }, { "epoch": 0.56, "grad_norm": 0.820223218945259, "learning_rate": 4.309969527928616e-06, "loss": 0.678, "step": 4378 }, { "epoch": 0.56, "grad_norm": 0.8075742775220537, "learning_rate": 4.307926021819659e-06, "loss": 0.6013, "step": 4379 }, { "epoch": 0.56, "grad_norm": 0.828809443171805, "learning_rate": 4.30588263356369e-06, "loss": 0.5721, "step": 4380 }, { "epoch": 0.56, "grad_norm": 0.8607363203360329, "learning_rate": 4.303839363508677e-06, "loss": 0.706, "step": 4381 }, { "epoch": 0.56, "grad_norm": 0.982740544125849, "learning_rate": 4.301796212002568e-06, "loss": 0.6997, "step": 4382 }, { "epoch": 0.56, "grad_norm": 0.7879554640148345, "learning_rate": 4.29975317939329e-06, "loss": 0.5897, "step": 4383 }, { "epoch": 0.56, "grad_norm": 0.8628538771348504, "learning_rate": 4.297710266028751e-06, "loss": 0.6441, "step": 4384 }, { "epoch": 0.56, "grad_norm": 0.8058890579576748, "learning_rate": 4.295667472256836e-06, "loss": 0.6415, "step": 4385 }, { "epoch": 0.56, "grad_norm": 0.6715754210015439, "learning_rate": 4.293624798425414e-06, "loss": 0.5201, "step": 4386 }, { "epoch": 0.56, "grad_norm": 0.8451684900489934, "learning_rate": 4.291582244882329e-06, "loss": 0.5653, "step": 4387 }, { "epoch": 0.56, "grad_norm": 0.8188198937442785, "learning_rate": 4.289539811975408e-06, "loss": 0.6201, "step": 4388 }, { "epoch": 0.56, "grad_norm": 0.775092320627096, "learning_rate": 4.287497500052456e-06, "loss": 0.6265, "step": 4389 }, { "epoch": 0.56, "grad_norm": 0.7084126950507942, "learning_rate": 4.285455309461256e-06, "loss": 0.534, "step": 4390 }, { "epoch": 0.56, "grad_norm": 0.900616535504914, "learning_rate": 4.283413240549574e-06, "loss": 0.6659, "step": 4391 }, { "epoch": 0.56, "grad_norm": 0.8208774478706352, "learning_rate": 4.2813712936651506e-06, "loss": 0.6328, "step": 4392 }, { "epoch": 0.56, "grad_norm": 0.782433050104911, "learning_rate": 4.27932946915571e-06, "loss": 0.6253, "step": 4393 }, { "epoch": 0.56, "grad_norm": 0.6842266867942965, "learning_rate": 4.277287767368952e-06, "loss": 0.5875, "step": 4394 }, { "epoch": 0.56, "grad_norm": 0.7663245092156283, "learning_rate": 4.275246188652559e-06, "loss": 0.6032, "step": 4395 }, { "epoch": 0.56, "grad_norm": 0.6752798482582338, "learning_rate": 4.2732047333541895e-06, "loss": 0.524, "step": 4396 }, { "epoch": 0.56, "grad_norm": 0.9313095502623406, "learning_rate": 4.271163401821482e-06, "loss": 0.6069, "step": 4397 }, { "epoch": 0.56, "grad_norm": 0.8019830734169505, "learning_rate": 4.269122194402054e-06, "loss": 0.6264, "step": 4398 }, { "epoch": 0.56, "grad_norm": 0.8457642123745607, "learning_rate": 4.267081111443501e-06, "loss": 0.5841, "step": 4399 }, { "epoch": 0.56, "grad_norm": 0.6067332774236164, "learning_rate": 4.2650401532934005e-06, "loss": 0.4988, "step": 4400 }, { "epoch": 0.56, "grad_norm": 0.6746976738983865, "learning_rate": 4.262999320299306e-06, "loss": 0.6042, "step": 4401 }, { "epoch": 0.56, "grad_norm": 0.9593765997588732, "learning_rate": 4.260958612808748e-06, "loss": 0.6084, "step": 4402 }, { "epoch": 0.56, "grad_norm": 0.9496935712497462, "learning_rate": 4.258918031169239e-06, "loss": 0.6593, "step": 4403 }, { "epoch": 0.56, "grad_norm": 0.7501932363545909, "learning_rate": 4.256877575728269e-06, "loss": 0.5859, "step": 4404 }, { "epoch": 0.56, "grad_norm": 0.6681831403497059, "learning_rate": 4.254837246833307e-06, "loss": 0.5585, "step": 4405 }, { "epoch": 0.56, "grad_norm": 0.8352713747574878, "learning_rate": 4.252797044831797e-06, "loss": 0.5999, "step": 4406 }, { "epoch": 0.56, "grad_norm": 1.0033806169981134, "learning_rate": 4.250756970071167e-06, "loss": 0.6508, "step": 4407 }, { "epoch": 0.56, "grad_norm": 0.7415248405937985, "learning_rate": 4.24871702289882e-06, "loss": 0.5617, "step": 4408 }, { "epoch": 0.56, "grad_norm": 0.6800380634485673, "learning_rate": 4.246677203662137e-06, "loss": 0.5526, "step": 4409 }, { "epoch": 0.56, "grad_norm": 0.6752305504198819, "learning_rate": 4.2446375127084775e-06, "loss": 0.5597, "step": 4410 }, { "epoch": 0.56, "grad_norm": 0.860725233434424, "learning_rate": 4.24259795038518e-06, "loss": 0.6521, "step": 4411 }, { "epoch": 0.56, "grad_norm": 0.6410028005612902, "learning_rate": 4.2405585170395605e-06, "loss": 0.5173, "step": 4412 }, { "epoch": 0.56, "grad_norm": 0.597169745449015, "learning_rate": 4.2385192130189145e-06, "loss": 0.5476, "step": 4413 }, { "epoch": 0.56, "grad_norm": 0.6978752809713807, "learning_rate": 4.236480038670514e-06, "loss": 0.5287, "step": 4414 }, { "epoch": 0.56, "grad_norm": 0.8754603037400586, "learning_rate": 4.234440994341608e-06, "loss": 0.5991, "step": 4415 }, { "epoch": 0.56, "grad_norm": 0.6800498802828441, "learning_rate": 4.232402080379426e-06, "loss": 0.4805, "step": 4416 }, { "epoch": 0.56, "grad_norm": 0.8957000909929884, "learning_rate": 4.230363297131173e-06, "loss": 0.6186, "step": 4417 }, { "epoch": 0.56, "grad_norm": 0.8366311584683085, "learning_rate": 4.228324644944032e-06, "loss": 0.6803, "step": 4418 }, { "epoch": 0.56, "grad_norm": 0.7781682995555774, "learning_rate": 4.2262861241651635e-06, "loss": 0.623, "step": 4419 }, { "epoch": 0.56, "grad_norm": 0.8073638100477304, "learning_rate": 4.224247735141709e-06, "loss": 0.5973, "step": 4420 }, { "epoch": 0.56, "grad_norm": 0.9235060005182137, "learning_rate": 4.222209478220783e-06, "loss": 0.5849, "step": 4421 }, { "epoch": 0.56, "grad_norm": 0.6945462825103523, "learning_rate": 4.220171353749481e-06, "loss": 0.5358, "step": 4422 }, { "epoch": 0.56, "grad_norm": 0.7069245518762349, "learning_rate": 4.218133362074872e-06, "loss": 0.5561, "step": 4423 }, { "epoch": 0.56, "grad_norm": 0.7018026728986374, "learning_rate": 4.216095503544006e-06, "loss": 0.5183, "step": 4424 }, { "epoch": 0.56, "grad_norm": 0.7844546732012173, "learning_rate": 4.214057778503908e-06, "loss": 0.5966, "step": 4425 }, { "epoch": 0.56, "grad_norm": 0.8848703650925667, "learning_rate": 4.212020187301584e-06, "loss": 0.6518, "step": 4426 }, { "epoch": 0.56, "grad_norm": 0.6448911228036215, "learning_rate": 4.209982730284012e-06, "loss": 0.5281, "step": 4427 }, { "epoch": 0.56, "grad_norm": 0.6361294523369796, "learning_rate": 4.2079454077981516e-06, "loss": 0.5347, "step": 4428 }, { "epoch": 0.56, "grad_norm": 0.7748772916124865, "learning_rate": 4.205908220190938e-06, "loss": 0.581, "step": 4429 }, { "epoch": 0.56, "grad_norm": 0.8100475158397745, "learning_rate": 4.203871167809279e-06, "loss": 0.5431, "step": 4430 }, { "epoch": 0.56, "grad_norm": 0.8065475573830592, "learning_rate": 4.201834251000068e-06, "loss": 0.6559, "step": 4431 }, { "epoch": 0.56, "grad_norm": 0.7141958428718586, "learning_rate": 4.199797470110168e-06, "loss": 0.52, "step": 4432 }, { "epoch": 0.56, "grad_norm": 0.9433393396723571, "learning_rate": 4.197760825486423e-06, "loss": 0.5945, "step": 4433 }, { "epoch": 0.56, "grad_norm": 0.6421198110735689, "learning_rate": 4.195724317475651e-06, "loss": 0.4773, "step": 4434 }, { "epoch": 0.57, "grad_norm": 0.7918566182329413, "learning_rate": 4.193687946424649e-06, "loss": 0.6136, "step": 4435 }, { "epoch": 0.57, "grad_norm": 0.795441187698482, "learning_rate": 4.1916517126801905e-06, "loss": 0.6243, "step": 4436 }, { "epoch": 0.57, "grad_norm": 0.6914072720914571, "learning_rate": 4.189615616589024e-06, "loss": 0.5451, "step": 4437 }, { "epoch": 0.57, "grad_norm": 0.6738469166382922, "learning_rate": 4.1875796584978725e-06, "loss": 0.4631, "step": 4438 }, { "epoch": 0.57, "grad_norm": 0.6544194302610971, "learning_rate": 4.185543838753444e-06, "loss": 0.5412, "step": 4439 }, { "epoch": 0.57, "grad_norm": 0.7756213319689607, "learning_rate": 4.183508157702415e-06, "loss": 0.5198, "step": 4440 }, { "epoch": 0.57, "grad_norm": 0.8754822293947353, "learning_rate": 4.18147261569144e-06, "loss": 0.6343, "step": 4441 }, { "epoch": 0.57, "grad_norm": 0.7879180220291384, "learning_rate": 4.1794372130671515e-06, "loss": 0.5442, "step": 4442 }, { "epoch": 0.57, "grad_norm": 0.9695887602577561, "learning_rate": 4.177401950176156e-06, "loss": 0.6694, "step": 4443 }, { "epoch": 0.57, "grad_norm": 1.0284798486476336, "learning_rate": 4.175366827365041e-06, "loss": 0.5992, "step": 4444 }, { "epoch": 0.57, "grad_norm": 0.7250527780498202, "learning_rate": 4.173331844980362e-06, "loss": 0.5693, "step": 4445 }, { "epoch": 0.57, "grad_norm": 0.7051701526998766, "learning_rate": 4.171297003368658e-06, "loss": 0.5209, "step": 4446 }, { "epoch": 0.57, "grad_norm": 0.800320758976769, "learning_rate": 4.169262302876443e-06, "loss": 0.6316, "step": 4447 }, { "epoch": 0.57, "grad_norm": 0.7064458499898304, "learning_rate": 4.167227743850201e-06, "loss": 0.5428, "step": 4448 }, { "epoch": 0.57, "grad_norm": 0.7263998875079912, "learning_rate": 4.165193326636397e-06, "loss": 0.5444, "step": 4449 }, { "epoch": 0.57, "grad_norm": 0.7264158283065071, "learning_rate": 4.163159051581475e-06, "loss": 0.5255, "step": 4450 }, { "epoch": 0.57, "grad_norm": 0.7908346881017806, "learning_rate": 4.161124919031848e-06, "loss": 0.6021, "step": 4451 }, { "epoch": 0.57, "grad_norm": 0.8809202758921088, "learning_rate": 4.159090929333907e-06, "loss": 0.6834, "step": 4452 }, { "epoch": 0.57, "grad_norm": 0.640701368942295, "learning_rate": 4.157057082834022e-06, "loss": 0.5602, "step": 4453 }, { "epoch": 0.57, "grad_norm": 0.8437108871495803, "learning_rate": 4.1550233798785335e-06, "loss": 0.602, "step": 4454 }, { "epoch": 0.57, "grad_norm": 0.8765935116171523, "learning_rate": 4.1529898208137604e-06, "loss": 0.6847, "step": 4455 }, { "epoch": 0.57, "grad_norm": 0.8698150831041311, "learning_rate": 4.150956405985996e-06, "loss": 0.6159, "step": 4456 }, { "epoch": 0.57, "grad_norm": 0.9008177394925169, "learning_rate": 4.14892313574151e-06, "loss": 0.613, "step": 4457 }, { "epoch": 0.57, "grad_norm": 0.8694715830956062, "learning_rate": 4.146890010426549e-06, "loss": 0.644, "step": 4458 }, { "epoch": 0.57, "grad_norm": 0.8256535438576119, "learning_rate": 4.1448570303873305e-06, "loss": 0.6722, "step": 4459 }, { "epoch": 0.57, "grad_norm": 0.772849376790894, "learning_rate": 4.142824195970052e-06, "loss": 0.6084, "step": 4460 }, { "epoch": 0.57, "grad_norm": 0.829374809555207, "learning_rate": 4.140791507520883e-06, "loss": 0.6482, "step": 4461 }, { "epoch": 0.57, "grad_norm": 0.7964565266071935, "learning_rate": 4.1387589653859686e-06, "loss": 0.6163, "step": 4462 }, { "epoch": 0.57, "grad_norm": 0.678221986857363, "learning_rate": 4.13672656991143e-06, "loss": 0.5125, "step": 4463 }, { "epoch": 0.57, "grad_norm": 0.7854990822333536, "learning_rate": 4.134694321443363e-06, "loss": 0.624, "step": 4464 }, { "epoch": 0.57, "grad_norm": 0.6059465308241474, "learning_rate": 4.132662220327838e-06, "loss": 0.5079, "step": 4465 }, { "epoch": 0.57, "grad_norm": 0.9156875796548971, "learning_rate": 4.130630266910902e-06, "loss": 0.6074, "step": 4466 }, { "epoch": 0.57, "grad_norm": 0.7225565365590488, "learning_rate": 4.128598461538574e-06, "loss": 0.5731, "step": 4467 }, { "epoch": 0.57, "grad_norm": 0.7980905953476304, "learning_rate": 4.1265668045568495e-06, "loss": 0.61, "step": 4468 }, { "epoch": 0.57, "grad_norm": 0.7936195448809921, "learning_rate": 4.124535296311699e-06, "loss": 0.6294, "step": 4469 }, { "epoch": 0.57, "grad_norm": 0.6857743393778188, "learning_rate": 4.122503937149066e-06, "loss": 0.5671, "step": 4470 }, { "epoch": 0.57, "grad_norm": 0.8509053202680728, "learning_rate": 4.120472727414872e-06, "loss": 0.617, "step": 4471 }, { "epoch": 0.57, "grad_norm": 0.6936505336685249, "learning_rate": 4.11844166745501e-06, "loss": 0.5831, "step": 4472 }, { "epoch": 0.57, "grad_norm": 0.6665547368207002, "learning_rate": 4.1164107576153475e-06, "loss": 0.5106, "step": 4473 }, { "epoch": 0.57, "grad_norm": 0.7040820145719161, "learning_rate": 4.1143799982417276e-06, "loss": 0.4656, "step": 4474 }, { "epoch": 0.57, "grad_norm": 1.03498755693866, "learning_rate": 4.112349389679968e-06, "loss": 0.6186, "step": 4475 }, { "epoch": 0.57, "grad_norm": 0.6291052651966531, "learning_rate": 4.11031893227586e-06, "loss": 0.4835, "step": 4476 }, { "epoch": 0.57, "grad_norm": 1.0405536087578553, "learning_rate": 4.108288626375169e-06, "loss": 0.6228, "step": 4477 }, { "epoch": 0.57, "grad_norm": 0.8495624840035347, "learning_rate": 4.106258472323635e-06, "loss": 0.5264, "step": 4478 }, { "epoch": 0.57, "grad_norm": 0.9311194202820979, "learning_rate": 4.104228470466973e-06, "loss": 0.6418, "step": 4479 }, { "epoch": 0.57, "grad_norm": 0.7403730629951322, "learning_rate": 4.1021986211508695e-06, "loss": 0.5616, "step": 4480 }, { "epoch": 0.57, "grad_norm": 0.8997188331422484, "learning_rate": 4.100168924720989e-06, "loss": 0.6169, "step": 4481 }, { "epoch": 0.57, "grad_norm": 0.8682297629657586, "learning_rate": 4.098139381522964e-06, "loss": 0.6557, "step": 4482 }, { "epoch": 0.57, "grad_norm": 0.667710364478011, "learning_rate": 4.096109991902407e-06, "loss": 0.4878, "step": 4483 }, { "epoch": 0.57, "grad_norm": 0.6826195080322754, "learning_rate": 4.094080756204904e-06, "loss": 0.5451, "step": 4484 }, { "epoch": 0.57, "grad_norm": 0.9368518362681154, "learning_rate": 4.092051674776008e-06, "loss": 0.5787, "step": 4485 }, { "epoch": 0.57, "grad_norm": 0.8312111914194017, "learning_rate": 4.090022747961254e-06, "loss": 0.6435, "step": 4486 }, { "epoch": 0.57, "grad_norm": 0.7570072202531515, "learning_rate": 4.087993976106146e-06, "loss": 0.5958, "step": 4487 }, { "epoch": 0.57, "grad_norm": 0.6610870430350286, "learning_rate": 4.085965359556163e-06, "loss": 0.4931, "step": 4488 }, { "epoch": 0.57, "grad_norm": 0.6291958819416069, "learning_rate": 4.083936898656756e-06, "loss": 0.5615, "step": 4489 }, { "epoch": 0.57, "grad_norm": 0.9192216162257776, "learning_rate": 4.081908593753353e-06, "loss": 0.6523, "step": 4490 }, { "epoch": 0.57, "grad_norm": 0.7016562543049941, "learning_rate": 4.079880445191351e-06, "loss": 0.5255, "step": 4491 }, { "epoch": 0.57, "grad_norm": 0.7144108847668628, "learning_rate": 4.0778524533161235e-06, "loss": 0.5683, "step": 4492 }, { "epoch": 0.57, "grad_norm": 0.8957817686677506, "learning_rate": 4.075824618473016e-06, "loss": 0.6105, "step": 4493 }, { "epoch": 0.57, "grad_norm": 0.8456968990107605, "learning_rate": 4.0737969410073485e-06, "loss": 0.634, "step": 4494 }, { "epoch": 0.57, "grad_norm": 0.8938367414031712, "learning_rate": 4.071769421264413e-06, "loss": 0.5933, "step": 4495 }, { "epoch": 0.57, "grad_norm": 0.6861271589415892, "learning_rate": 4.0697420595894735e-06, "loss": 0.4993, "step": 4496 }, { "epoch": 0.57, "grad_norm": 0.7872958407726934, "learning_rate": 4.067714856327771e-06, "loss": 0.5295, "step": 4497 }, { "epoch": 0.57, "grad_norm": 0.8401578552600698, "learning_rate": 4.065687811824516e-06, "loss": 0.6081, "step": 4498 }, { "epoch": 0.57, "grad_norm": 0.6303556128274284, "learning_rate": 4.0636609264248926e-06, "loss": 0.5833, "step": 4499 }, { "epoch": 0.57, "grad_norm": 0.7449710690106373, "learning_rate": 4.061634200474059e-06, "loss": 0.5435, "step": 4500 }, { "epoch": 0.57, "grad_norm": 0.6394832923598611, "learning_rate": 4.059607634317146e-06, "loss": 0.5495, "step": 4501 }, { "epoch": 0.57, "grad_norm": 0.6202441411313765, "learning_rate": 4.057581228299254e-06, "loss": 0.539, "step": 4502 }, { "epoch": 0.57, "grad_norm": 0.839989360612665, "learning_rate": 4.055554982765463e-06, "loss": 0.6995, "step": 4503 }, { "epoch": 0.57, "grad_norm": 0.8550292360545774, "learning_rate": 4.053528898060818e-06, "loss": 0.5956, "step": 4504 }, { "epoch": 0.57, "grad_norm": 0.777474829516731, "learning_rate": 4.051502974530341e-06, "loss": 0.528, "step": 4505 }, { "epoch": 0.57, "grad_norm": 0.6639671499491367, "learning_rate": 4.049477212519028e-06, "loss": 0.5206, "step": 4506 }, { "epoch": 0.57, "grad_norm": 0.6337231399568349, "learning_rate": 4.047451612371841e-06, "loss": 0.5314, "step": 4507 }, { "epoch": 0.57, "grad_norm": 0.8099585713264955, "learning_rate": 4.045426174433722e-06, "loss": 0.5947, "step": 4508 }, { "epoch": 0.57, "grad_norm": 1.0325086468550524, "learning_rate": 4.043400899049581e-06, "loss": 0.6728, "step": 4509 }, { "epoch": 0.57, "grad_norm": 0.8121420007393971, "learning_rate": 4.041375786564299e-06, "loss": 0.5988, "step": 4510 }, { "epoch": 0.57, "grad_norm": 0.5994445604614119, "learning_rate": 4.039350837322736e-06, "loss": 0.4898, "step": 4511 }, { "epoch": 0.57, "grad_norm": 0.7664533881416199, "learning_rate": 4.037326051669717e-06, "loss": 0.5529, "step": 4512 }, { "epoch": 0.57, "grad_norm": 0.7339234094451136, "learning_rate": 4.0353014299500425e-06, "loss": 0.541, "step": 4513 }, { "epoch": 0.58, "grad_norm": 0.834292436273063, "learning_rate": 4.033276972508484e-06, "loss": 0.685, "step": 4514 }, { "epoch": 0.58, "grad_norm": 0.6373930343283828, "learning_rate": 4.031252679689785e-06, "loss": 0.5358, "step": 4515 }, { "epoch": 0.58, "grad_norm": 0.6723670265566082, "learning_rate": 4.029228551838664e-06, "loss": 0.5836, "step": 4516 }, { "epoch": 0.58, "grad_norm": 0.8286246667984236, "learning_rate": 4.027204589299805e-06, "loss": 0.5702, "step": 4517 }, { "epoch": 0.58, "grad_norm": 0.7497012728183629, "learning_rate": 4.0251807924178725e-06, "loss": 0.5202, "step": 4518 }, { "epoch": 0.58, "grad_norm": 0.8300544722357185, "learning_rate": 4.023157161537494e-06, "loss": 0.649, "step": 4519 }, { "epoch": 0.58, "grad_norm": 0.8741021675353209, "learning_rate": 4.021133697003275e-06, "loss": 0.6232, "step": 4520 }, { "epoch": 0.58, "grad_norm": 0.8095469614215342, "learning_rate": 4.019110399159789e-06, "loss": 0.6644, "step": 4521 }, { "epoch": 0.58, "grad_norm": 0.7679272331147581, "learning_rate": 4.017087268351583e-06, "loss": 0.5326, "step": 4522 }, { "epoch": 0.58, "grad_norm": 0.808127212469725, "learning_rate": 4.0150643049231744e-06, "loss": 0.552, "step": 4523 }, { "epoch": 0.58, "grad_norm": 0.8087816256788897, "learning_rate": 4.013041509219055e-06, "loss": 0.6077, "step": 4524 }, { "epoch": 0.58, "grad_norm": 0.7055910271872686, "learning_rate": 4.011018881583684e-06, "loss": 0.4965, "step": 4525 }, { "epoch": 0.58, "grad_norm": 0.7706770867690466, "learning_rate": 4.008996422361495e-06, "loss": 0.6467, "step": 4526 }, { "epoch": 0.58, "grad_norm": 0.7721696724865775, "learning_rate": 4.006974131896891e-06, "loss": 0.581, "step": 4527 }, { "epoch": 0.58, "grad_norm": 0.7927763138443585, "learning_rate": 4.004952010534247e-06, "loss": 0.6204, "step": 4528 }, { "epoch": 0.58, "grad_norm": 0.8948284011904157, "learning_rate": 4.002930058617909e-06, "loss": 0.6197, "step": 4529 }, { "epoch": 0.58, "grad_norm": 0.821729284960434, "learning_rate": 4.000908276492195e-06, "loss": 0.653, "step": 4530 }, { "epoch": 0.58, "grad_norm": 0.6143805701987523, "learning_rate": 3.9988866645013925e-06, "loss": 0.4926, "step": 4531 }, { "epoch": 0.58, "grad_norm": 0.8311299352373979, "learning_rate": 3.996865222989762e-06, "loss": 0.615, "step": 4532 }, { "epoch": 0.58, "grad_norm": 0.8094146121329434, "learning_rate": 3.994843952301533e-06, "loss": 0.6042, "step": 4533 }, { "epoch": 0.58, "grad_norm": 0.8648595247341165, "learning_rate": 3.992822852780908e-06, "loss": 0.65, "step": 4534 }, { "epoch": 0.58, "grad_norm": 0.896037340784818, "learning_rate": 3.9908019247720584e-06, "loss": 0.635, "step": 4535 }, { "epoch": 0.58, "grad_norm": 0.6806823867919641, "learning_rate": 3.988781168619125e-06, "loss": 0.5329, "step": 4536 }, { "epoch": 0.58, "grad_norm": 0.6895522545326527, "learning_rate": 3.986760584666226e-06, "loss": 0.5124, "step": 4537 }, { "epoch": 0.58, "grad_norm": 0.632482621101068, "learning_rate": 3.984740173257443e-06, "loss": 0.5045, "step": 4538 }, { "epoch": 0.58, "grad_norm": 0.7743533515950725, "learning_rate": 3.982719934736832e-06, "loss": 0.5986, "step": 4539 }, { "epoch": 0.58, "grad_norm": 0.8637524743488536, "learning_rate": 3.9806998694484185e-06, "loss": 0.6008, "step": 4540 }, { "epoch": 0.58, "grad_norm": 0.7504590681424177, "learning_rate": 3.978679977736197e-06, "loss": 0.5439, "step": 4541 }, { "epoch": 0.58, "grad_norm": 0.6844718007071564, "learning_rate": 3.976660259944136e-06, "loss": 0.5508, "step": 4542 }, { "epoch": 0.58, "grad_norm": 0.8007898019090697, "learning_rate": 3.974640716416172e-06, "loss": 0.5395, "step": 4543 }, { "epoch": 0.58, "grad_norm": 0.853710962479426, "learning_rate": 3.972621347496212e-06, "loss": 0.6688, "step": 4544 }, { "epoch": 0.58, "grad_norm": 0.7813937002883821, "learning_rate": 3.970602153528133e-06, "loss": 0.5704, "step": 4545 }, { "epoch": 0.58, "grad_norm": 0.7037140207088858, "learning_rate": 3.968583134855782e-06, "loss": 0.5257, "step": 4546 }, { "epoch": 0.58, "grad_norm": 0.6949108202317474, "learning_rate": 3.966564291822979e-06, "loss": 0.5454, "step": 4547 }, { "epoch": 0.58, "grad_norm": 0.6074549916156692, "learning_rate": 3.9645456247735116e-06, "loss": 0.4829, "step": 4548 }, { "epoch": 0.58, "grad_norm": 0.6722261071294441, "learning_rate": 3.962527134051135e-06, "loss": 0.5357, "step": 4549 }, { "epoch": 0.58, "grad_norm": 0.7552968197412698, "learning_rate": 3.96050881999958e-06, "loss": 0.567, "step": 4550 }, { "epoch": 0.58, "grad_norm": 0.7285588351863173, "learning_rate": 3.958490682962543e-06, "loss": 0.4673, "step": 4551 }, { "epoch": 0.58, "grad_norm": 0.6788614589431907, "learning_rate": 3.956472723283693e-06, "loss": 0.5365, "step": 4552 }, { "epoch": 0.58, "grad_norm": 0.9948833611993003, "learning_rate": 3.954454941306667e-06, "loss": 0.6932, "step": 4553 }, { "epoch": 0.58, "grad_norm": 0.6456013077423708, "learning_rate": 3.952437337375072e-06, "loss": 0.4828, "step": 4554 }, { "epoch": 0.58, "grad_norm": 0.712819145937391, "learning_rate": 3.9504199118324845e-06, "loss": 0.5281, "step": 4555 }, { "epoch": 0.58, "grad_norm": 0.9378994491712823, "learning_rate": 3.948402665022452e-06, "loss": 0.6541, "step": 4556 }, { "epoch": 0.58, "grad_norm": 0.85946904511879, "learning_rate": 3.94638559728849e-06, "loss": 0.6165, "step": 4557 }, { "epoch": 0.58, "grad_norm": 0.8486353526538747, "learning_rate": 3.944368708974086e-06, "loss": 0.6487, "step": 4558 }, { "epoch": 0.58, "grad_norm": 0.6359170716143816, "learning_rate": 3.9423520004226916e-06, "loss": 0.547, "step": 4559 }, { "epoch": 0.58, "grad_norm": 0.647376383410015, "learning_rate": 3.940335471977733e-06, "loss": 0.5506, "step": 4560 }, { "epoch": 0.58, "grad_norm": 1.0956950623384702, "learning_rate": 3.938319123982605e-06, "loss": 0.5261, "step": 4561 }, { "epoch": 0.58, "grad_norm": 0.9461874589705112, "learning_rate": 3.93630295678067e-06, "loss": 0.6038, "step": 4562 }, { "epoch": 0.58, "grad_norm": 0.8790905280523886, "learning_rate": 3.934286970715259e-06, "loss": 0.6461, "step": 4563 }, { "epoch": 0.58, "grad_norm": 0.7887680735411222, "learning_rate": 3.932271166129674e-06, "loss": 0.5799, "step": 4564 }, { "epoch": 0.58, "grad_norm": 0.86126683838304, "learning_rate": 3.930255543367187e-06, "loss": 0.5982, "step": 4565 }, { "epoch": 0.58, "grad_norm": 0.8141051902610317, "learning_rate": 3.928240102771036e-06, "loss": 0.5239, "step": 4566 }, { "epoch": 0.58, "grad_norm": 1.046501032184945, "learning_rate": 3.926224844684429e-06, "loss": 0.6756, "step": 4567 }, { "epoch": 0.58, "grad_norm": 0.6638123677878982, "learning_rate": 3.924209769450545e-06, "loss": 0.5314, "step": 4568 }, { "epoch": 0.58, "grad_norm": 0.9880581748134359, "learning_rate": 3.92219487741253e-06, "loss": 0.7062, "step": 4569 }, { "epoch": 0.58, "grad_norm": 0.8314958282450264, "learning_rate": 3.920180168913499e-06, "loss": 0.5755, "step": 4570 }, { "epoch": 0.58, "grad_norm": 0.7342076988686367, "learning_rate": 3.9181656442965356e-06, "loss": 0.5916, "step": 4571 }, { "epoch": 0.58, "grad_norm": 0.6965393431030459, "learning_rate": 3.916151303904693e-06, "loss": 0.5236, "step": 4572 }, { "epoch": 0.58, "grad_norm": 0.706958773422459, "learning_rate": 3.914137148080991e-06, "loss": 0.5486, "step": 4573 }, { "epoch": 0.58, "grad_norm": 0.8366107787131191, "learning_rate": 3.9121231771684205e-06, "loss": 0.5979, "step": 4574 }, { "epoch": 0.58, "grad_norm": 0.9636515129332259, "learning_rate": 3.91010939150994e-06, "loss": 0.6533, "step": 4575 }, { "epoch": 0.58, "grad_norm": 0.7284865865019958, "learning_rate": 3.9080957914484756e-06, "loss": 0.568, "step": 4576 }, { "epoch": 0.58, "grad_norm": 0.962932449502878, "learning_rate": 3.906082377326922e-06, "loss": 0.6931, "step": 4577 }, { "epoch": 0.58, "grad_norm": 0.6857622066104377, "learning_rate": 3.9040691494881445e-06, "loss": 0.5394, "step": 4578 }, { "epoch": 0.58, "grad_norm": 0.8038457589699521, "learning_rate": 3.902056108274972e-06, "loss": 0.543, "step": 4579 }, { "epoch": 0.58, "grad_norm": 0.82037980807607, "learning_rate": 3.900043254030207e-06, "loss": 0.6635, "step": 4580 }, { "epoch": 0.58, "grad_norm": 0.6766786092908463, "learning_rate": 3.898030587096615e-06, "loss": 0.5371, "step": 4581 }, { "epoch": 0.58, "grad_norm": 0.8474691008789372, "learning_rate": 3.8960181078169355e-06, "loss": 0.629, "step": 4582 }, { "epoch": 0.58, "grad_norm": 0.6418338157282201, "learning_rate": 3.89400581653387e-06, "loss": 0.545, "step": 4583 }, { "epoch": 0.58, "grad_norm": 1.3789183579528237, "learning_rate": 3.891993713590093e-06, "loss": 0.6892, "step": 4584 }, { "epoch": 0.58, "grad_norm": 0.7522069662142868, "learning_rate": 3.889981799328242e-06, "loss": 0.5264, "step": 4585 }, { "epoch": 0.58, "grad_norm": 0.6971872882571089, "learning_rate": 3.887970074090928e-06, "loss": 0.5322, "step": 4586 }, { "epoch": 0.58, "grad_norm": 0.6600219331143145, "learning_rate": 3.885958538220724e-06, "loss": 0.5353, "step": 4587 }, { "epoch": 0.58, "grad_norm": 0.7505581797861914, "learning_rate": 3.883947192060176e-06, "loss": 0.576, "step": 4588 }, { "epoch": 0.58, "grad_norm": 0.9315520977338261, "learning_rate": 3.8819360359517936e-06, "loss": 0.6647, "step": 4589 }, { "epoch": 0.58, "grad_norm": 0.9030072230898907, "learning_rate": 3.8799250702380565e-06, "loss": 0.6471, "step": 4590 }, { "epoch": 0.58, "grad_norm": 0.6306693346595053, "learning_rate": 3.877914295261411e-06, "loss": 0.5247, "step": 4591 }, { "epoch": 0.59, "grad_norm": 0.6777270719991172, "learning_rate": 3.875903711364271e-06, "loss": 0.5738, "step": 4592 }, { "epoch": 0.59, "grad_norm": 0.7870825650535681, "learning_rate": 3.873893318889019e-06, "loss": 0.5917, "step": 4593 }, { "epoch": 0.59, "grad_norm": 0.6608604149554316, "learning_rate": 3.8718831181780015e-06, "loss": 0.5092, "step": 4594 }, { "epoch": 0.59, "grad_norm": 0.7626094806052015, "learning_rate": 3.869873109573537e-06, "loss": 0.5792, "step": 4595 }, { "epoch": 0.59, "grad_norm": 0.8066414470826253, "learning_rate": 3.867863293417909e-06, "loss": 0.6802, "step": 4596 }, { "epoch": 0.59, "grad_norm": 0.7414355852365169, "learning_rate": 3.865853670053368e-06, "loss": 0.5681, "step": 4597 }, { "epoch": 0.59, "grad_norm": 0.7070149058058068, "learning_rate": 3.86384423982213e-06, "loss": 0.542, "step": 4598 }, { "epoch": 0.59, "grad_norm": 0.7060132998178545, "learning_rate": 3.861835003066383e-06, "loss": 0.5763, "step": 4599 }, { "epoch": 0.59, "grad_norm": 0.6679425118377987, "learning_rate": 3.859825960128277e-06, "loss": 0.5571, "step": 4600 }, { "epoch": 0.59, "grad_norm": 0.6982893625550693, "learning_rate": 3.857817111349932e-06, "loss": 0.5489, "step": 4601 }, { "epoch": 0.59, "grad_norm": 0.6738943386830591, "learning_rate": 3.855808457073434e-06, "loss": 0.5376, "step": 4602 }, { "epoch": 0.59, "grad_norm": 0.7562338356194049, "learning_rate": 3.853799997640836e-06, "loss": 0.5156, "step": 4603 }, { "epoch": 0.59, "grad_norm": 0.8299941146368418, "learning_rate": 3.8517917333941554e-06, "loss": 0.6037, "step": 4604 }, { "epoch": 0.59, "grad_norm": 0.7066288611132916, "learning_rate": 3.8497836646753826e-06, "loss": 0.513, "step": 4605 }, { "epoch": 0.59, "grad_norm": 0.7717550489671091, "learning_rate": 3.847775791826468e-06, "loss": 0.608, "step": 4606 }, { "epoch": 0.59, "grad_norm": 0.6906201239731287, "learning_rate": 3.8457681151893315e-06, "loss": 0.5773, "step": 4607 }, { "epoch": 0.59, "grad_norm": 0.9282421546294841, "learning_rate": 3.843760635105861e-06, "loss": 0.6021, "step": 4608 }, { "epoch": 0.59, "grad_norm": 0.7750930057114463, "learning_rate": 3.841753351917909e-06, "loss": 0.5486, "step": 4609 }, { "epoch": 0.59, "grad_norm": 0.8456932131654048, "learning_rate": 3.839746265967295e-06, "loss": 0.647, "step": 4610 }, { "epoch": 0.59, "grad_norm": 0.8985318556376093, "learning_rate": 3.837739377595804e-06, "loss": 0.6597, "step": 4611 }, { "epoch": 0.59, "grad_norm": 0.759034253056153, "learning_rate": 3.835732687145189e-06, "loss": 0.5717, "step": 4612 }, { "epoch": 0.59, "grad_norm": 0.6056031392324246, "learning_rate": 3.833726194957168e-06, "loss": 0.5248, "step": 4613 }, { "epoch": 0.59, "grad_norm": 0.7926161510391273, "learning_rate": 3.831719901373426e-06, "loss": 0.5793, "step": 4614 }, { "epoch": 0.59, "grad_norm": 0.7834046507953263, "learning_rate": 3.829713806735613e-06, "loss": 0.5877, "step": 4615 }, { "epoch": 0.59, "grad_norm": 0.8392907713347888, "learning_rate": 3.827707911385348e-06, "loss": 0.5155, "step": 4616 }, { "epoch": 0.59, "grad_norm": 0.669347155304091, "learning_rate": 3.825702215664212e-06, "loss": 0.5324, "step": 4617 }, { "epoch": 0.59, "grad_norm": 0.7501519524702971, "learning_rate": 3.8236967199137546e-06, "loss": 0.5442, "step": 4618 }, { "epoch": 0.59, "grad_norm": 0.6389394535667118, "learning_rate": 3.821691424475492e-06, "loss": 0.5189, "step": 4619 }, { "epoch": 0.59, "grad_norm": 0.7239492452737994, "learning_rate": 3.8196863296909035e-06, "loss": 0.6101, "step": 4620 }, { "epoch": 0.59, "grad_norm": 0.6756528275476881, "learning_rate": 3.817681435901436e-06, "loss": 0.5359, "step": 4621 }, { "epoch": 0.59, "grad_norm": 0.9951737071097115, "learning_rate": 3.815676743448504e-06, "loss": 0.6635, "step": 4622 }, { "epoch": 0.59, "grad_norm": 0.7063950037658637, "learning_rate": 3.8136722526734837e-06, "loss": 0.5435, "step": 4623 }, { "epoch": 0.59, "grad_norm": 0.7982005672170439, "learning_rate": 3.81166796391772e-06, "loss": 0.6796, "step": 4624 }, { "epoch": 0.59, "grad_norm": 0.7428193680315647, "learning_rate": 3.8096638775225226e-06, "loss": 0.5335, "step": 4625 }, { "epoch": 0.59, "grad_norm": 0.7715154433110496, "learning_rate": 3.8076599938291652e-06, "loss": 0.5542, "step": 4626 }, { "epoch": 0.59, "grad_norm": 0.7798028238396731, "learning_rate": 3.805656313178889e-06, "loss": 0.6276, "step": 4627 }, { "epoch": 0.59, "grad_norm": 0.6196570671219331, "learning_rate": 3.803652835912901e-06, "loss": 0.5211, "step": 4628 }, { "epoch": 0.59, "grad_norm": 0.6441885082942713, "learning_rate": 3.8016495623723715e-06, "loss": 0.5431, "step": 4629 }, { "epoch": 0.59, "grad_norm": 0.8861784866832928, "learning_rate": 3.7996464928984365e-06, "loss": 0.6413, "step": 4630 }, { "epoch": 0.59, "grad_norm": 0.893278252797954, "learning_rate": 3.797643627832199e-06, "loss": 0.6715, "step": 4631 }, { "epoch": 0.59, "grad_norm": 0.8204789881194132, "learning_rate": 3.795640967514726e-06, "loss": 0.522, "step": 4632 }, { "epoch": 0.59, "grad_norm": 0.9456295565789163, "learning_rate": 3.793638512287049e-06, "loss": 0.6476, "step": 4633 }, { "epoch": 0.59, "grad_norm": 0.907713088082132, "learning_rate": 3.7916362624901643e-06, "loss": 0.6215, "step": 4634 }, { "epoch": 0.59, "grad_norm": 0.6440151261356034, "learning_rate": 3.7896342184650363e-06, "loss": 0.5188, "step": 4635 }, { "epoch": 0.59, "grad_norm": 0.7843371391700882, "learning_rate": 3.7876323805525917e-06, "loss": 0.5082, "step": 4636 }, { "epoch": 0.59, "grad_norm": 0.7105943494472945, "learning_rate": 3.7856307490937228e-06, "loss": 0.5924, "step": 4637 }, { "epoch": 0.59, "grad_norm": 0.7352941509804366, "learning_rate": 3.7836293244292853e-06, "loss": 0.5724, "step": 4638 }, { "epoch": 0.59, "grad_norm": 0.6709134273438605, "learning_rate": 3.7816281069001026e-06, "loss": 0.5087, "step": 4639 }, { "epoch": 0.59, "grad_norm": 0.7024961024364729, "learning_rate": 3.7796270968469595e-06, "loss": 0.5335, "step": 4640 }, { "epoch": 0.59, "grad_norm": 0.7754288675854669, "learning_rate": 3.7776262946106086e-06, "loss": 0.5844, "step": 4641 }, { "epoch": 0.59, "grad_norm": 0.8400529841645835, "learning_rate": 3.7756257005317655e-06, "loss": 0.5998, "step": 4642 }, { "epoch": 0.59, "grad_norm": 0.6636386635724815, "learning_rate": 3.7736253149511095e-06, "loss": 0.4426, "step": 4643 }, { "epoch": 0.59, "grad_norm": 0.6966823459396856, "learning_rate": 3.771625138209286e-06, "loss": 0.5491, "step": 4644 }, { "epoch": 0.59, "grad_norm": 0.879565056877042, "learning_rate": 3.7696251706469044e-06, "loss": 0.6567, "step": 4645 }, { "epoch": 0.59, "grad_norm": 0.6371345073659821, "learning_rate": 3.7676254126045384e-06, "loss": 0.5631, "step": 4646 }, { "epoch": 0.59, "grad_norm": 0.782881297588299, "learning_rate": 3.7656258644227256e-06, "loss": 0.5461, "step": 4647 }, { "epoch": 0.59, "grad_norm": 0.8501299172349109, "learning_rate": 3.7636265264419684e-06, "loss": 0.6774, "step": 4648 }, { "epoch": 0.59, "grad_norm": 0.729508395051032, "learning_rate": 3.7616273990027343e-06, "loss": 0.5737, "step": 4649 }, { "epoch": 0.59, "grad_norm": 0.9136969319057946, "learning_rate": 3.7596284824454522e-06, "loss": 0.6399, "step": 4650 }, { "epoch": 0.59, "grad_norm": 0.8765847610149756, "learning_rate": 3.7576297771105176e-06, "loss": 0.6602, "step": 4651 }, { "epoch": 0.59, "grad_norm": 0.8372393351581214, "learning_rate": 3.755631283338287e-06, "loss": 0.6276, "step": 4652 }, { "epoch": 0.59, "grad_norm": 0.759296036998442, "learning_rate": 3.753633001469087e-06, "loss": 0.5784, "step": 4653 }, { "epoch": 0.59, "grad_norm": 0.9399949063765041, "learning_rate": 3.7516349318432022e-06, "loss": 0.6788, "step": 4654 }, { "epoch": 0.59, "grad_norm": 0.6104913000524868, "learning_rate": 3.7496370748008837e-06, "loss": 0.5438, "step": 4655 }, { "epoch": 0.59, "grad_norm": 0.8440338008111886, "learning_rate": 3.7476394306823443e-06, "loss": 0.6089, "step": 4656 }, { "epoch": 0.59, "grad_norm": 0.8037526191815576, "learning_rate": 3.745641999827764e-06, "loss": 0.5764, "step": 4657 }, { "epoch": 0.59, "grad_norm": 0.8958301210932439, "learning_rate": 3.7436447825772827e-06, "loss": 0.5668, "step": 4658 }, { "epoch": 0.59, "grad_norm": 0.8646863037424776, "learning_rate": 3.741647779271006e-06, "loss": 0.6235, "step": 4659 }, { "epoch": 0.59, "grad_norm": 0.9298616874351711, "learning_rate": 3.739650990249003e-06, "loss": 0.6228, "step": 4660 }, { "epoch": 0.59, "grad_norm": 0.844443054430272, "learning_rate": 3.737654415851307e-06, "loss": 0.6376, "step": 4661 }, { "epoch": 0.59, "grad_norm": 0.8534129319078696, "learning_rate": 3.7356580564179125e-06, "loss": 0.6048, "step": 4662 }, { "epoch": 0.59, "grad_norm": 0.7368352764416887, "learning_rate": 3.7336619122887784e-06, "loss": 0.5357, "step": 4663 }, { "epoch": 0.59, "grad_norm": 0.6922930551973379, "learning_rate": 3.7316659838038293e-06, "loss": 0.5609, "step": 4664 }, { "epoch": 0.59, "grad_norm": 0.9932109609537716, "learning_rate": 3.7296702713029493e-06, "loss": 0.542, "step": 4665 }, { "epoch": 0.59, "grad_norm": 0.9321548113401201, "learning_rate": 3.727674775125987e-06, "loss": 0.6282, "step": 4666 }, { "epoch": 0.59, "grad_norm": 0.888255319414392, "learning_rate": 3.7256794956127564e-06, "loss": 0.6004, "step": 4667 }, { "epoch": 0.59, "grad_norm": 0.7902035241214458, "learning_rate": 3.7236844331030316e-06, "loss": 0.5519, "step": 4668 }, { "epoch": 0.59, "grad_norm": 0.6890897742740402, "learning_rate": 3.721689587936551e-06, "loss": 0.5673, "step": 4669 }, { "epoch": 0.59, "grad_norm": 0.8579854987054736, "learning_rate": 3.7196949604530174e-06, "loss": 0.6468, "step": 4670 }, { "epoch": 0.6, "grad_norm": 0.9347135475111792, "learning_rate": 3.7177005509920925e-06, "loss": 0.66, "step": 4671 }, { "epoch": 0.6, "grad_norm": 0.8330090276775827, "learning_rate": 3.715706359893406e-06, "loss": 0.6495, "step": 4672 }, { "epoch": 0.6, "grad_norm": 0.8124484948509604, "learning_rate": 3.7137123874965462e-06, "loss": 0.537, "step": 4673 }, { "epoch": 0.6, "grad_norm": 0.7229605628048922, "learning_rate": 3.711718634141067e-06, "loss": 0.5305, "step": 4674 }, { "epoch": 0.6, "grad_norm": 0.7190834794394976, "learning_rate": 3.7097251001664824e-06, "loss": 0.5458, "step": 4675 }, { "epoch": 0.6, "grad_norm": 0.7924097644349875, "learning_rate": 3.707731785912272e-06, "loss": 0.5758, "step": 4676 }, { "epoch": 0.6, "grad_norm": 0.8025604062291016, "learning_rate": 3.705738691717875e-06, "loss": 0.6324, "step": 4677 }, { "epoch": 0.6, "grad_norm": 2.0967434466102395, "learning_rate": 3.703745817922696e-06, "loss": 0.6176, "step": 4678 }, { "epoch": 0.6, "grad_norm": 0.6557323406313835, "learning_rate": 3.701753164866098e-06, "loss": 0.5483, "step": 4679 }, { "epoch": 0.6, "grad_norm": 0.6099912962178521, "learning_rate": 3.699760732887413e-06, "loss": 0.5105, "step": 4680 }, { "epoch": 0.6, "grad_norm": 0.6938329244872425, "learning_rate": 3.6977685223259285e-06, "loss": 0.5101, "step": 4681 }, { "epoch": 0.6, "grad_norm": 0.6415441312676198, "learning_rate": 3.6957765335208983e-06, "loss": 0.5614, "step": 4682 }, { "epoch": 0.6, "grad_norm": 0.5911340111928757, "learning_rate": 3.6937847668115377e-06, "loss": 0.4684, "step": 4683 }, { "epoch": 0.6, "grad_norm": 0.6856324722840981, "learning_rate": 3.691793222537023e-06, "loss": 0.5009, "step": 4684 }, { "epoch": 0.6, "grad_norm": 0.7456400266005688, "learning_rate": 3.6898019010364934e-06, "loss": 0.5343, "step": 4685 }, { "epoch": 0.6, "grad_norm": 0.6818814852539161, "learning_rate": 3.6878108026490505e-06, "loss": 0.5622, "step": 4686 }, { "epoch": 0.6, "grad_norm": 0.82951989671035, "learning_rate": 3.685819927713758e-06, "loss": 0.5651, "step": 4687 }, { "epoch": 0.6, "grad_norm": 0.6892505032378883, "learning_rate": 3.6838292765696403e-06, "loss": 0.5902, "step": 4688 }, { "epoch": 0.6, "grad_norm": 0.7555375188398477, "learning_rate": 3.6818388495556856e-06, "loss": 0.5895, "step": 4689 }, { "epoch": 0.6, "grad_norm": 0.6415078040758984, "learning_rate": 3.679848647010842e-06, "loss": 0.5481, "step": 4690 }, { "epoch": 0.6, "grad_norm": 0.7633098209620045, "learning_rate": 3.6778586692740205e-06, "loss": 0.5728, "step": 4691 }, { "epoch": 0.6, "grad_norm": 0.7461049811139693, "learning_rate": 3.675868916684093e-06, "loss": 0.5494, "step": 4692 }, { "epoch": 0.6, "grad_norm": 0.8512081032029335, "learning_rate": 3.673879389579895e-06, "loss": 0.5674, "step": 4693 }, { "epoch": 0.6, "grad_norm": 0.6281200941907052, "learning_rate": 3.671890088300222e-06, "loss": 0.5172, "step": 4694 }, { "epoch": 0.6, "grad_norm": 0.7935366897133953, "learning_rate": 3.66990101318383e-06, "loss": 0.5418, "step": 4695 }, { "epoch": 0.6, "grad_norm": 0.6703064248489337, "learning_rate": 3.6679121645694392e-06, "loss": 0.5116, "step": 4696 }, { "epoch": 0.6, "grad_norm": 0.9829618803020502, "learning_rate": 3.6659235427957286e-06, "loss": 0.6556, "step": 4697 }, { "epoch": 0.6, "grad_norm": 0.8332846806246345, "learning_rate": 3.663935148201341e-06, "loss": 0.5956, "step": 4698 }, { "epoch": 0.6, "grad_norm": 0.675293595226352, "learning_rate": 3.6619469811248786e-06, "loss": 0.5396, "step": 4699 }, { "epoch": 0.6, "grad_norm": 0.7926402427488014, "learning_rate": 3.659959041904905e-06, "loss": 0.6698, "step": 4700 }, { "epoch": 0.6, "grad_norm": 0.6435200795986242, "learning_rate": 3.657971330879947e-06, "loss": 0.5134, "step": 4701 }, { "epoch": 0.6, "grad_norm": 0.8759783237199781, "learning_rate": 3.655983848388489e-06, "loss": 0.5587, "step": 4702 }, { "epoch": 0.6, "grad_norm": 0.5970316159027351, "learning_rate": 3.6539965947689804e-06, "loss": 0.492, "step": 4703 }, { "epoch": 0.6, "grad_norm": 0.8362960272663376, "learning_rate": 3.652009570359829e-06, "loss": 0.6446, "step": 4704 }, { "epoch": 0.6, "grad_norm": 0.6712304329028754, "learning_rate": 3.6500227754994046e-06, "loss": 0.5997, "step": 4705 }, { "epoch": 0.6, "grad_norm": 0.7885645641086384, "learning_rate": 3.648036210526037e-06, "loss": 0.579, "step": 4706 }, { "epoch": 0.6, "grad_norm": 0.8104668961366499, "learning_rate": 3.646049875778019e-06, "loss": 0.6482, "step": 4707 }, { "epoch": 0.6, "grad_norm": 0.8037702546272149, "learning_rate": 3.644063771593601e-06, "loss": 0.5602, "step": 4708 }, { "epoch": 0.6, "grad_norm": 0.8399692509109183, "learning_rate": 3.6420778983109974e-06, "loss": 0.6097, "step": 4709 }, { "epoch": 0.6, "grad_norm": 0.9974522283518326, "learning_rate": 3.640092256268381e-06, "loss": 0.6135, "step": 4710 }, { "epoch": 0.6, "grad_norm": 0.8759427678970045, "learning_rate": 3.638106845803886e-06, "loss": 0.6302, "step": 4711 }, { "epoch": 0.6, "grad_norm": 0.8720126711295777, "learning_rate": 3.6361216672556067e-06, "loss": 0.6297, "step": 4712 }, { "epoch": 0.6, "grad_norm": 0.6748568877463499, "learning_rate": 3.6341367209615987e-06, "loss": 0.5398, "step": 4713 }, { "epoch": 0.6, "grad_norm": 0.6485845582787974, "learning_rate": 3.6321520072598775e-06, "loss": 0.514, "step": 4714 }, { "epoch": 0.6, "grad_norm": 0.8883938647630483, "learning_rate": 3.6301675264884197e-06, "loss": 0.5815, "step": 4715 }, { "epoch": 0.6, "grad_norm": 0.7587650053415198, "learning_rate": 3.628183278985161e-06, "loss": 0.5781, "step": 4716 }, { "epoch": 0.6, "grad_norm": 0.818713374105233, "learning_rate": 3.6261992650879986e-06, "loss": 0.6759, "step": 4717 }, { "epoch": 0.6, "grad_norm": 0.6274169080295308, "learning_rate": 3.6242154851347885e-06, "loss": 0.5247, "step": 4718 }, { "epoch": 0.6, "grad_norm": 0.8868389399481289, "learning_rate": 3.6222319394633488e-06, "loss": 0.6506, "step": 4719 }, { "epoch": 0.6, "grad_norm": 0.7376488209664734, "learning_rate": 3.6202486284114567e-06, "loss": 0.5648, "step": 4720 }, { "epoch": 0.6, "grad_norm": 1.063400957229389, "learning_rate": 3.6182655523168493e-06, "loss": 0.6769, "step": 4721 }, { "epoch": 0.6, "grad_norm": 0.8040217481290075, "learning_rate": 3.6162827115172234e-06, "loss": 0.6563, "step": 4722 }, { "epoch": 0.6, "grad_norm": 0.6530126408569679, "learning_rate": 3.614300106350236e-06, "loss": 0.525, "step": 4723 }, { "epoch": 0.6, "grad_norm": 0.9042997592322346, "learning_rate": 3.6123177371535044e-06, "loss": 0.6736, "step": 4724 }, { "epoch": 0.6, "grad_norm": 0.933224436955365, "learning_rate": 3.6103356042646054e-06, "loss": 0.5819, "step": 4725 }, { "epoch": 0.6, "grad_norm": 0.9131729759955876, "learning_rate": 3.608353708021076e-06, "loss": 0.6167, "step": 4726 }, { "epoch": 0.6, "grad_norm": 0.6846960226708735, "learning_rate": 3.6063720487604113e-06, "loss": 0.5191, "step": 4727 }, { "epoch": 0.6, "grad_norm": 0.7923886627651665, "learning_rate": 3.604390626820069e-06, "loss": 0.6139, "step": 4728 }, { "epoch": 0.6, "grad_norm": 0.6206508519151634, "learning_rate": 3.6024094425374625e-06, "loss": 0.485, "step": 4729 }, { "epoch": 0.6, "grad_norm": 0.6615512670131243, "learning_rate": 3.6004284962499686e-06, "loss": 0.5389, "step": 4730 }, { "epoch": 0.6, "grad_norm": 0.8409763260067968, "learning_rate": 3.5984477882949198e-06, "loss": 0.6355, "step": 4731 }, { "epoch": 0.6, "grad_norm": 0.7543320709288874, "learning_rate": 3.596467319009613e-06, "loss": 0.529, "step": 4732 }, { "epoch": 0.6, "grad_norm": 0.7972509596251136, "learning_rate": 3.5944870887312993e-06, "loss": 0.631, "step": 4733 }, { "epoch": 0.6, "grad_norm": 0.6931952200348175, "learning_rate": 3.592507097797192e-06, "loss": 0.5464, "step": 4734 }, { "epoch": 0.6, "grad_norm": 0.84730794076543, "learning_rate": 3.5905273465444627e-06, "loss": 0.6511, "step": 4735 }, { "epoch": 0.6, "grad_norm": 1.0341919405731168, "learning_rate": 3.5885478353102428e-06, "loss": 0.6207, "step": 4736 }, { "epoch": 0.6, "grad_norm": 0.6754062150535651, "learning_rate": 3.5865685644316217e-06, "loss": 0.5452, "step": 4737 }, { "epoch": 0.6, "grad_norm": 0.846293908241235, "learning_rate": 3.584589534245649e-06, "loss": 0.6017, "step": 4738 }, { "epoch": 0.6, "grad_norm": 0.874981888161896, "learning_rate": 3.5826107450893337e-06, "loss": 0.613, "step": 4739 }, { "epoch": 0.6, "grad_norm": 0.8135745020609386, "learning_rate": 3.5806321972996416e-06, "loss": 0.5901, "step": 4740 }, { "epoch": 0.6, "grad_norm": 0.7981840046548233, "learning_rate": 3.5786538912135004e-06, "loss": 0.6247, "step": 4741 }, { "epoch": 0.6, "grad_norm": 0.8895879720504103, "learning_rate": 3.576675827167793e-06, "loss": 0.6475, "step": 4742 }, { "epoch": 0.6, "grad_norm": 0.898437566901641, "learning_rate": 3.5746980054993646e-06, "loss": 0.6062, "step": 4743 }, { "epoch": 0.6, "grad_norm": 0.8722425470485958, "learning_rate": 3.5727204265450165e-06, "loss": 0.6639, "step": 4744 }, { "epoch": 0.6, "grad_norm": 0.6293242240164223, "learning_rate": 3.5707430906415108e-06, "loss": 0.5502, "step": 4745 }, { "epoch": 0.6, "grad_norm": 0.7281008507815523, "learning_rate": 3.568765998125567e-06, "loss": 0.5515, "step": 4746 }, { "epoch": 0.6, "grad_norm": 0.7322492096029514, "learning_rate": 3.5667891493338633e-06, "loss": 0.599, "step": 4747 }, { "epoch": 0.6, "grad_norm": 0.8091610492308802, "learning_rate": 3.564812544603036e-06, "loss": 0.6756, "step": 4748 }, { "epoch": 0.61, "grad_norm": 0.9074481453523645, "learning_rate": 3.5628361842696813e-06, "loss": 0.6557, "step": 4749 }, { "epoch": 0.61, "grad_norm": 1.0181248975968313, "learning_rate": 3.560860068670352e-06, "loss": 0.6977, "step": 4750 }, { "epoch": 0.61, "grad_norm": 0.7629335920540325, "learning_rate": 3.55888419814156e-06, "loss": 0.5502, "step": 4751 }, { "epoch": 0.61, "grad_norm": 0.6844872403287836, "learning_rate": 3.556908573019776e-06, "loss": 0.4857, "step": 4752 }, { "epoch": 0.61, "grad_norm": 0.746712634132732, "learning_rate": 3.5549331936414276e-06, "loss": 0.5988, "step": 4753 }, { "epoch": 0.61, "grad_norm": 0.681013144387057, "learning_rate": 3.5529580603428997e-06, "loss": 0.596, "step": 4754 }, { "epoch": 0.61, "grad_norm": 0.6851246791868632, "learning_rate": 3.5509831734605414e-06, "loss": 0.561, "step": 4755 }, { "epoch": 0.61, "grad_norm": 0.8228783616223908, "learning_rate": 3.549008533330652e-06, "loss": 0.6276, "step": 4756 }, { "epoch": 0.61, "grad_norm": 0.6257032998750763, "learning_rate": 3.547034140289494e-06, "loss": 0.468, "step": 4757 }, { "epoch": 0.61, "grad_norm": 0.9414331319022593, "learning_rate": 3.5450599946732846e-06, "loss": 0.641, "step": 4758 }, { "epoch": 0.61, "grad_norm": 0.6463721066780047, "learning_rate": 3.543086096818201e-06, "loss": 0.5247, "step": 4759 }, { "epoch": 0.61, "grad_norm": 0.7686275151203134, "learning_rate": 3.5411124470603763e-06, "loss": 0.5471, "step": 4760 }, { "epoch": 0.61, "grad_norm": 0.7552823240815967, "learning_rate": 3.539139045735904e-06, "loss": 0.5285, "step": 4761 }, { "epoch": 0.61, "grad_norm": 0.8300872376137409, "learning_rate": 3.5371658931808327e-06, "loss": 0.6134, "step": 4762 }, { "epoch": 0.61, "grad_norm": 0.789759722416413, "learning_rate": 3.5351929897311698e-06, "loss": 0.5262, "step": 4763 }, { "epoch": 0.61, "grad_norm": 0.6988512924983046, "learning_rate": 3.5332203357228813e-06, "loss": 0.556, "step": 4764 }, { "epoch": 0.61, "grad_norm": 0.717569948873118, "learning_rate": 3.5312479314918878e-06, "loss": 0.575, "step": 4765 }, { "epoch": 0.61, "grad_norm": 0.7087328481554697, "learning_rate": 3.5292757773740717e-06, "loss": 0.5661, "step": 4766 }, { "epoch": 0.61, "grad_norm": 0.6237070500470364, "learning_rate": 3.5273038737052676e-06, "loss": 0.4752, "step": 4767 }, { "epoch": 0.61, "grad_norm": 1.0817877986738431, "learning_rate": 3.525332220821272e-06, "loss": 0.7145, "step": 4768 }, { "epoch": 0.61, "grad_norm": 0.7802200775419803, "learning_rate": 3.523360819057836e-06, "loss": 0.5728, "step": 4769 }, { "epoch": 0.61, "grad_norm": 0.7168916473892792, "learning_rate": 3.5213896687506686e-06, "loss": 0.5682, "step": 4770 }, { "epoch": 0.61, "grad_norm": 0.7133472910923415, "learning_rate": 3.5194187702354377e-06, "loss": 0.5635, "step": 4771 }, { "epoch": 0.61, "grad_norm": 0.7420869093246777, "learning_rate": 3.517448123847764e-06, "loss": 0.5375, "step": 4772 }, { "epoch": 0.61, "grad_norm": 0.7052125350552413, "learning_rate": 3.5154777299232312e-06, "loss": 0.5782, "step": 4773 }, { "epoch": 0.61, "grad_norm": 0.7947565903628313, "learning_rate": 3.5135075887973747e-06, "loss": 0.603, "step": 4774 }, { "epoch": 0.61, "grad_norm": 0.6601721262691077, "learning_rate": 3.51153770080569e-06, "loss": 0.5774, "step": 4775 }, { "epoch": 0.61, "grad_norm": 0.7798491189633278, "learning_rate": 3.509568066283628e-06, "loss": 0.5489, "step": 4776 }, { "epoch": 0.61, "grad_norm": 0.8049342102593532, "learning_rate": 3.5075986855665955e-06, "loss": 0.6427, "step": 4777 }, { "epoch": 0.61, "grad_norm": 0.6480478624632887, "learning_rate": 3.5056295589899615e-06, "loss": 0.513, "step": 4778 }, { "epoch": 0.61, "grad_norm": 1.0040250923365985, "learning_rate": 3.5036606868890444e-06, "loss": 0.6429, "step": 4779 }, { "epoch": 0.61, "grad_norm": 0.7627620983937304, "learning_rate": 3.501692069599123e-06, "loss": 0.5413, "step": 4780 }, { "epoch": 0.61, "grad_norm": 0.9708481647245439, "learning_rate": 3.4997237074554334e-06, "loss": 0.6539, "step": 4781 }, { "epoch": 0.61, "grad_norm": 0.8584156691406621, "learning_rate": 3.4977556007931672e-06, "loss": 0.557, "step": 4782 }, { "epoch": 0.61, "grad_norm": 0.7553294821132691, "learning_rate": 3.495787749947471e-06, "loss": 0.5992, "step": 4783 }, { "epoch": 0.61, "grad_norm": 0.7935100057445086, "learning_rate": 3.493820155253451e-06, "loss": 0.5411, "step": 4784 }, { "epoch": 0.61, "grad_norm": 0.7093719651309593, "learning_rate": 3.4918528170461673e-06, "loss": 0.553, "step": 4785 }, { "epoch": 0.61, "grad_norm": 0.6253463183542285, "learning_rate": 3.489885735660637e-06, "loss": 0.5021, "step": 4786 }, { "epoch": 0.61, "grad_norm": 0.6509738502962583, "learning_rate": 3.4879189114318337e-06, "loss": 0.4994, "step": 4787 }, { "epoch": 0.61, "grad_norm": 0.7038564590150723, "learning_rate": 3.4859523446946865e-06, "loss": 0.494, "step": 4788 }, { "epoch": 0.61, "grad_norm": 0.6423683584764396, "learning_rate": 3.4839860357840826e-06, "loss": 0.5628, "step": 4789 }, { "epoch": 0.61, "grad_norm": 0.7389609376070936, "learning_rate": 3.482019985034861e-06, "loss": 0.5342, "step": 4790 }, { "epoch": 0.61, "grad_norm": 0.7882749867642856, "learning_rate": 3.480054192781823e-06, "loss": 0.5598, "step": 4791 }, { "epoch": 0.61, "grad_norm": 0.7938943532587981, "learning_rate": 3.478088659359722e-06, "loss": 0.5907, "step": 4792 }, { "epoch": 0.61, "grad_norm": 0.8265812136373645, "learning_rate": 3.4761233851032674e-06, "loss": 0.6368, "step": 4793 }, { "epoch": 0.61, "grad_norm": 0.7894155094085734, "learning_rate": 3.474158370347124e-06, "loss": 0.6046, "step": 4794 }, { "epoch": 0.61, "grad_norm": 0.7746740583737033, "learning_rate": 3.472193615425914e-06, "loss": 0.5565, "step": 4795 }, { "epoch": 0.61, "grad_norm": 0.945757961316094, "learning_rate": 3.4702291206742145e-06, "loss": 0.6428, "step": 4796 }, { "epoch": 0.61, "grad_norm": 0.7672917925716772, "learning_rate": 3.468264886426559e-06, "loss": 0.5558, "step": 4797 }, { "epoch": 0.61, "grad_norm": 0.9071892592053592, "learning_rate": 3.466300913017436e-06, "loss": 0.6626, "step": 4798 }, { "epoch": 0.61, "grad_norm": 0.8473330708568858, "learning_rate": 3.464337200781289e-06, "loss": 0.5902, "step": 4799 }, { "epoch": 0.61, "grad_norm": 0.8493850563212643, "learning_rate": 3.462373750052519e-06, "loss": 0.616, "step": 4800 }, { "epoch": 0.61, "grad_norm": 0.6763096686896363, "learning_rate": 3.46041056116548e-06, "loss": 0.5392, "step": 4801 }, { "epoch": 0.61, "grad_norm": 0.6518321999835311, "learning_rate": 3.4584476344544836e-06, "loss": 0.5771, "step": 4802 }, { "epoch": 0.61, "grad_norm": 0.9043667572066351, "learning_rate": 3.456484970253794e-06, "loss": 0.6055, "step": 4803 }, { "epoch": 0.61, "grad_norm": 0.7155071234151839, "learning_rate": 3.4545225688976345e-06, "loss": 0.549, "step": 4804 }, { "epoch": 0.61, "grad_norm": 0.8478511896925388, "learning_rate": 3.452560430720181e-06, "loss": 0.5928, "step": 4805 }, { "epoch": 0.61, "grad_norm": 0.9009720535558737, "learning_rate": 3.4505985560555657e-06, "loss": 0.6368, "step": 4806 }, { "epoch": 0.61, "grad_norm": 0.9320451120311418, "learning_rate": 3.448636945237874e-06, "loss": 0.5805, "step": 4807 }, { "epoch": 0.61, "grad_norm": 0.7005472627380441, "learning_rate": 3.4466755986011484e-06, "loss": 0.5557, "step": 4808 }, { "epoch": 0.61, "grad_norm": 0.8033978323764367, "learning_rate": 3.444714516479386e-06, "loss": 0.6297, "step": 4809 }, { "epoch": 0.61, "grad_norm": 1.206176008969584, "learning_rate": 3.442753699206539e-06, "loss": 0.6441, "step": 4810 }, { "epoch": 0.61, "grad_norm": 0.7966491427373297, "learning_rate": 3.440793147116514e-06, "loss": 0.5994, "step": 4811 }, { "epoch": 0.61, "grad_norm": 0.7596099901560068, "learning_rate": 3.438832860543172e-06, "loss": 0.5521, "step": 4812 }, { "epoch": 0.61, "grad_norm": 1.0325413646549357, "learning_rate": 3.4368728398203297e-06, "loss": 0.6488, "step": 4813 }, { "epoch": 0.61, "grad_norm": 0.7157274817519422, "learning_rate": 3.434913085281758e-06, "loss": 0.5207, "step": 4814 }, { "epoch": 0.61, "grad_norm": 0.7016189231796639, "learning_rate": 3.432953597261183e-06, "loss": 0.5589, "step": 4815 }, { "epoch": 0.61, "grad_norm": 1.1038534068692278, "learning_rate": 3.4309943760922844e-06, "loss": 0.659, "step": 4816 }, { "epoch": 0.61, "grad_norm": 0.7399367983175158, "learning_rate": 3.4290354221086985e-06, "loss": 0.5412, "step": 4817 }, { "epoch": 0.61, "grad_norm": 0.7766395142912038, "learning_rate": 3.4270767356440137e-06, "loss": 0.5371, "step": 4818 }, { "epoch": 0.61, "grad_norm": 0.8171759037871216, "learning_rate": 3.4251183170317746e-06, "loss": 0.648, "step": 4819 }, { "epoch": 0.61, "grad_norm": 0.765875795243998, "learning_rate": 3.4231601666054793e-06, "loss": 0.521, "step": 4820 }, { "epoch": 0.61, "grad_norm": 0.7135420020722786, "learning_rate": 3.4212022846985805e-06, "loss": 0.5164, "step": 4821 }, { "epoch": 0.61, "grad_norm": 0.7769823514206529, "learning_rate": 3.419244671644484e-06, "loss": 0.5462, "step": 4822 }, { "epoch": 0.61, "grad_norm": 0.6670391683974994, "learning_rate": 3.4172873277765515e-06, "loss": 0.5995, "step": 4823 }, { "epoch": 0.61, "grad_norm": 0.9756655126366315, "learning_rate": 3.4153302534280994e-06, "loss": 0.6389, "step": 4824 }, { "epoch": 0.61, "grad_norm": 0.7274549141871021, "learning_rate": 3.4133734489323957e-06, "loss": 0.5256, "step": 4825 }, { "epoch": 0.61, "grad_norm": 0.6844493289585716, "learning_rate": 3.411416914622665e-06, "loss": 0.5266, "step": 4826 }, { "epoch": 0.61, "grad_norm": 0.7214241116810347, "learning_rate": 3.4094606508320827e-06, "loss": 0.5598, "step": 4827 }, { "epoch": 0.62, "grad_norm": 0.6434347799421288, "learning_rate": 3.4075046578937826e-06, "loss": 0.4939, "step": 4828 }, { "epoch": 0.62, "grad_norm": 0.7057183476839789, "learning_rate": 3.405548936140847e-06, "loss": 0.5691, "step": 4829 }, { "epoch": 0.62, "grad_norm": 0.7440769219138041, "learning_rate": 3.403593485906318e-06, "loss": 0.5462, "step": 4830 }, { "epoch": 0.62, "grad_norm": 0.8069058421927559, "learning_rate": 3.401638307523187e-06, "loss": 0.6443, "step": 4831 }, { "epoch": 0.62, "grad_norm": 0.8133869617537681, "learning_rate": 3.3996834013244006e-06, "loss": 0.5978, "step": 4832 }, { "epoch": 0.62, "grad_norm": 0.7080877040370535, "learning_rate": 3.3977287676428594e-06, "loss": 0.5852, "step": 4833 }, { "epoch": 0.62, "grad_norm": 0.7470867081227013, "learning_rate": 3.3957744068114168e-06, "loss": 0.523, "step": 4834 }, { "epoch": 0.62, "grad_norm": 0.8536832094165678, "learning_rate": 3.3938203191628795e-06, "loss": 0.6343, "step": 4835 }, { "epoch": 0.62, "grad_norm": 0.7998581367089739, "learning_rate": 3.391866505030009e-06, "loss": 0.5595, "step": 4836 }, { "epoch": 0.62, "grad_norm": 1.0619694756094007, "learning_rate": 3.3899129647455194e-06, "loss": 0.6412, "step": 4837 }, { "epoch": 0.62, "grad_norm": 1.0017191695412018, "learning_rate": 3.3879596986420782e-06, "loss": 0.6416, "step": 4838 }, { "epoch": 0.62, "grad_norm": 0.8679015960493628, "learning_rate": 3.3860067070523057e-06, "loss": 0.6365, "step": 4839 }, { "epoch": 0.62, "grad_norm": 0.7598776484629107, "learning_rate": 3.3840539903087766e-06, "loss": 0.5379, "step": 4840 }, { "epoch": 0.62, "grad_norm": 0.8279308677174302, "learning_rate": 3.3821015487440184e-06, "loss": 0.6146, "step": 4841 }, { "epoch": 0.62, "grad_norm": 0.6849055828479812, "learning_rate": 3.380149382690511e-06, "loss": 0.5576, "step": 4842 }, { "epoch": 0.62, "grad_norm": 0.7739495777487878, "learning_rate": 3.378197492480688e-06, "loss": 0.6603, "step": 4843 }, { "epoch": 0.62, "grad_norm": 0.8283635070337946, "learning_rate": 3.3762458784469367e-06, "loss": 0.5969, "step": 4844 }, { "epoch": 0.62, "grad_norm": 0.7790218080956149, "learning_rate": 3.374294540921596e-06, "loss": 0.62, "step": 4845 }, { "epoch": 0.62, "grad_norm": 0.6751833785401478, "learning_rate": 3.372343480236959e-06, "loss": 0.5433, "step": 4846 }, { "epoch": 0.62, "grad_norm": 0.7016284353913876, "learning_rate": 3.37039269672527e-06, "loss": 0.5176, "step": 4847 }, { "epoch": 0.62, "grad_norm": 0.8392908861156689, "learning_rate": 3.368442190718728e-06, "loss": 0.6295, "step": 4848 }, { "epoch": 0.62, "grad_norm": 0.6836045134325753, "learning_rate": 3.366491962549483e-06, "loss": 0.5127, "step": 4849 }, { "epoch": 0.62, "grad_norm": 0.6597338883743055, "learning_rate": 3.3645420125496398e-06, "loss": 0.5217, "step": 4850 }, { "epoch": 0.62, "grad_norm": 0.813766879527436, "learning_rate": 3.362592341051253e-06, "loss": 0.5392, "step": 4851 }, { "epoch": 0.62, "grad_norm": 0.8460313155741838, "learning_rate": 3.360642948386332e-06, "loss": 0.6343, "step": 4852 }, { "epoch": 0.62, "grad_norm": 0.7923539545087678, "learning_rate": 3.3586938348868393e-06, "loss": 0.5164, "step": 4853 }, { "epoch": 0.62, "grad_norm": 0.702408947430897, "learning_rate": 3.356745000884687e-06, "loss": 0.5502, "step": 4854 }, { "epoch": 0.62, "grad_norm": 0.6612461276502262, "learning_rate": 3.354796446711739e-06, "loss": 0.5071, "step": 4855 }, { "epoch": 0.62, "grad_norm": 0.8038853273832788, "learning_rate": 3.352848172699819e-06, "loss": 0.5776, "step": 4856 }, { "epoch": 0.62, "grad_norm": 1.042661789026824, "learning_rate": 3.3509001791806957e-06, "loss": 0.5893, "step": 4857 }, { "epoch": 0.62, "grad_norm": 0.9172990715480969, "learning_rate": 3.3489524664860912e-06, "loss": 0.6139, "step": 4858 }, { "epoch": 0.62, "grad_norm": 0.7188960921970509, "learning_rate": 3.3470050349476814e-06, "loss": 0.6237, "step": 4859 }, { "epoch": 0.62, "grad_norm": 0.7113900775815523, "learning_rate": 3.345057884897093e-06, "loss": 0.5448, "step": 4860 }, { "epoch": 0.62, "grad_norm": 0.8496930468829204, "learning_rate": 3.3431110166659055e-06, "loss": 0.6478, "step": 4861 }, { "epoch": 0.62, "grad_norm": 0.8532105449038324, "learning_rate": 3.3411644305856507e-06, "loss": 0.6977, "step": 4862 }, { "epoch": 0.62, "grad_norm": 0.7631110646799267, "learning_rate": 3.339218126987812e-06, "loss": 0.567, "step": 4863 }, { "epoch": 0.62, "grad_norm": 0.6766010557626443, "learning_rate": 3.337272106203825e-06, "loss": 0.5492, "step": 4864 }, { "epoch": 0.62, "grad_norm": 0.8998840420935064, "learning_rate": 3.335326368565075e-06, "loss": 0.627, "step": 4865 }, { "epoch": 0.62, "grad_norm": 0.858024881401816, "learning_rate": 3.333380914402903e-06, "loss": 0.5879, "step": 4866 }, { "epoch": 0.62, "grad_norm": 0.7614518312974131, "learning_rate": 3.3314357440485977e-06, "loss": 0.55, "step": 4867 }, { "epoch": 0.62, "grad_norm": 0.8000254000280547, "learning_rate": 3.3294908578334027e-06, "loss": 0.6017, "step": 4868 }, { "epoch": 0.62, "grad_norm": 0.6982594416499359, "learning_rate": 3.327546256088512e-06, "loss": 0.5669, "step": 4869 }, { "epoch": 0.62, "grad_norm": 0.6725220476244151, "learning_rate": 3.3256019391450696e-06, "loss": 0.5649, "step": 4870 }, { "epoch": 0.62, "grad_norm": 0.989083363135568, "learning_rate": 3.3236579073341735e-06, "loss": 0.6282, "step": 4871 }, { "epoch": 0.62, "grad_norm": 0.6353686708705769, "learning_rate": 3.3217141609868713e-06, "loss": 0.5435, "step": 4872 }, { "epoch": 0.62, "grad_norm": 0.8655108665798321, "learning_rate": 3.319770700434164e-06, "loss": 0.6262, "step": 4873 }, { "epoch": 0.62, "grad_norm": 0.8672443351181393, "learning_rate": 3.3178275260070002e-06, "loss": 0.5959, "step": 4874 }, { "epoch": 0.62, "grad_norm": 0.7880321689007912, "learning_rate": 3.315884638036285e-06, "loss": 0.591, "step": 4875 }, { "epoch": 0.62, "grad_norm": 0.865815556073217, "learning_rate": 3.3139420368528713e-06, "loss": 0.612, "step": 4876 }, { "epoch": 0.62, "grad_norm": 0.8418796330647859, "learning_rate": 3.3119997227875635e-06, "loss": 0.6577, "step": 4877 }, { "epoch": 0.62, "grad_norm": 0.9258534237585381, "learning_rate": 3.3100576961711173e-06, "loss": 0.6396, "step": 4878 }, { "epoch": 0.62, "grad_norm": 0.7562459828733519, "learning_rate": 3.3081159573342404e-06, "loss": 0.5109, "step": 4879 }, { "epoch": 0.62, "grad_norm": 1.17661149627926, "learning_rate": 3.306174506607589e-06, "loss": 0.6467, "step": 4880 }, { "epoch": 0.62, "grad_norm": 0.6524699833054226, "learning_rate": 3.3042333443217733e-06, "loss": 0.5507, "step": 4881 }, { "epoch": 0.62, "grad_norm": 0.8411939304649696, "learning_rate": 3.302292470807353e-06, "loss": 0.6535, "step": 4882 }, { "epoch": 0.62, "grad_norm": 0.9588206798839688, "learning_rate": 3.300351886394838e-06, "loss": 0.5941, "step": 4883 }, { "epoch": 0.62, "grad_norm": 0.6656219379954181, "learning_rate": 3.2984115914146902e-06, "loss": 0.5588, "step": 4884 }, { "epoch": 0.62, "grad_norm": 0.7143392009954862, "learning_rate": 3.296471586197321e-06, "loss": 0.5988, "step": 4885 }, { "epoch": 0.62, "grad_norm": 0.892175453269238, "learning_rate": 3.294531871073094e-06, "loss": 0.6374, "step": 4886 }, { "epoch": 0.62, "grad_norm": 0.9017819338410041, "learning_rate": 3.2925924463723213e-06, "loss": 0.6911, "step": 4887 }, { "epoch": 0.62, "grad_norm": 0.8813805627169389, "learning_rate": 3.290653312425267e-06, "loss": 0.6316, "step": 4888 }, { "epoch": 0.62, "grad_norm": 0.7540193800724198, "learning_rate": 3.2887144695621464e-06, "loss": 0.5318, "step": 4889 }, { "epoch": 0.62, "grad_norm": 0.8585230468011072, "learning_rate": 3.286775918113123e-06, "loss": 0.6621, "step": 4890 }, { "epoch": 0.62, "grad_norm": 0.8573584847627042, "learning_rate": 3.2848376584083134e-06, "loss": 0.606, "step": 4891 }, { "epoch": 0.62, "grad_norm": 0.7666590412801827, "learning_rate": 3.282899690777782e-06, "loss": 0.5697, "step": 4892 }, { "epoch": 0.62, "grad_norm": 0.6413491102721429, "learning_rate": 3.280962015551544e-06, "loss": 0.5733, "step": 4893 }, { "epoch": 0.62, "grad_norm": 0.6642561091882658, "learning_rate": 3.279024633059567e-06, "loss": 0.5173, "step": 4894 }, { "epoch": 0.62, "grad_norm": 0.6479034186218848, "learning_rate": 3.277087543631765e-06, "loss": 0.4998, "step": 4895 }, { "epoch": 0.62, "grad_norm": 0.6952032316172522, "learning_rate": 3.275150747598006e-06, "loss": 0.5558, "step": 4896 }, { "epoch": 0.62, "grad_norm": 0.7282713974997099, "learning_rate": 3.273214245288105e-06, "loss": 0.5682, "step": 4897 }, { "epoch": 0.62, "grad_norm": 0.6436940871354166, "learning_rate": 3.2712780370318287e-06, "loss": 0.5269, "step": 4898 }, { "epoch": 0.62, "grad_norm": 1.0035014834444884, "learning_rate": 3.269342123158892e-06, "loss": 0.6074, "step": 4899 }, { "epoch": 0.62, "grad_norm": 0.7674967010196748, "learning_rate": 3.2674065039989634e-06, "loss": 0.5995, "step": 4900 }, { "epoch": 0.62, "grad_norm": 0.9119643740731648, "learning_rate": 3.265471179881655e-06, "loss": 0.6521, "step": 4901 }, { "epoch": 0.62, "grad_norm": 0.673955547997138, "learning_rate": 3.263536151136536e-06, "loss": 0.547, "step": 4902 }, { "epoch": 0.62, "grad_norm": 0.8012164055101305, "learning_rate": 3.26160141809312e-06, "loss": 0.575, "step": 4903 }, { "epoch": 0.62, "grad_norm": 0.6713412322603758, "learning_rate": 3.259666981080871e-06, "loss": 0.53, "step": 4904 }, { "epoch": 0.62, "grad_norm": 0.7579530035280362, "learning_rate": 3.257732840429206e-06, "loss": 0.579, "step": 4905 }, { "epoch": 0.63, "grad_norm": 0.9239961923841928, "learning_rate": 3.2557989964674864e-06, "loss": 0.6146, "step": 4906 }, { "epoch": 0.63, "grad_norm": 0.80694798824872, "learning_rate": 3.2538654495250277e-06, "loss": 0.6353, "step": 4907 }, { "epoch": 0.63, "grad_norm": 0.7222510736191212, "learning_rate": 3.251932199931091e-06, "loss": 0.5571, "step": 4908 }, { "epoch": 0.63, "grad_norm": 0.7502950042134441, "learning_rate": 3.24999924801489e-06, "loss": 0.5256, "step": 4909 }, { "epoch": 0.63, "grad_norm": 0.8608629489315914, "learning_rate": 3.2480665941055856e-06, "loss": 0.6357, "step": 4910 }, { "epoch": 0.63, "grad_norm": 0.7418677042212336, "learning_rate": 3.2461342385322882e-06, "loss": 0.5605, "step": 4911 }, { "epoch": 0.63, "grad_norm": 0.7832289109113795, "learning_rate": 3.2442021816240587e-06, "loss": 0.5425, "step": 4912 }, { "epoch": 0.63, "grad_norm": 0.8632173099569125, "learning_rate": 3.2422704237099058e-06, "loss": 0.6517, "step": 4913 }, { "epoch": 0.63, "grad_norm": 0.835718632633242, "learning_rate": 3.2403389651187865e-06, "loss": 0.6461, "step": 4914 }, { "epoch": 0.63, "grad_norm": 0.6475646693020294, "learning_rate": 3.2384078061796107e-06, "loss": 0.5699, "step": 4915 }, { "epoch": 0.63, "grad_norm": 0.6756160597692056, "learning_rate": 3.236476947221234e-06, "loss": 0.5497, "step": 4916 }, { "epoch": 0.63, "grad_norm": 0.7360523277398411, "learning_rate": 3.2345463885724603e-06, "loss": 0.5519, "step": 4917 }, { "epoch": 0.63, "grad_norm": 0.7857838372051005, "learning_rate": 3.2326161305620445e-06, "loss": 0.5953, "step": 4918 }, { "epoch": 0.63, "grad_norm": 1.0867414981901475, "learning_rate": 3.230686173518689e-06, "loss": 0.5561, "step": 4919 }, { "epoch": 0.63, "grad_norm": 0.8334901216951245, "learning_rate": 3.2287565177710457e-06, "loss": 0.6191, "step": 4920 }, { "epoch": 0.63, "grad_norm": 0.8252590404172756, "learning_rate": 3.226827163647715e-06, "loss": 0.54, "step": 4921 }, { "epoch": 0.63, "grad_norm": 0.6342083779239805, "learning_rate": 3.224898111477245e-06, "loss": 0.4946, "step": 4922 }, { "epoch": 0.63, "grad_norm": 0.8927706406972433, "learning_rate": 3.222969361588134e-06, "loss": 0.6247, "step": 4923 }, { "epoch": 0.63, "grad_norm": 0.8496782475841955, "learning_rate": 3.2210409143088283e-06, "loss": 0.5323, "step": 4924 }, { "epoch": 0.63, "grad_norm": 0.7886095191936603, "learning_rate": 3.219112769967721e-06, "loss": 0.5769, "step": 4925 }, { "epoch": 0.63, "grad_norm": 0.7990749223210991, "learning_rate": 3.2171849288931563e-06, "loss": 0.6367, "step": 4926 }, { "epoch": 0.63, "grad_norm": 0.9538821994478781, "learning_rate": 3.2152573914134233e-06, "loss": 0.6341, "step": 4927 }, { "epoch": 0.63, "grad_norm": 0.6930772956881712, "learning_rate": 3.2133301578567647e-06, "loss": 0.5206, "step": 4928 }, { "epoch": 0.63, "grad_norm": 0.7413004598599584, "learning_rate": 3.2114032285513673e-06, "loss": 0.5647, "step": 4929 }, { "epoch": 0.63, "grad_norm": 0.6464996730049951, "learning_rate": 3.209476603825366e-06, "loss": 0.5477, "step": 4930 }, { "epoch": 0.63, "grad_norm": 0.7615829799789813, "learning_rate": 3.207550284006846e-06, "loss": 0.5265, "step": 4931 }, { "epoch": 0.63, "grad_norm": 0.8651244088782334, "learning_rate": 3.205624269423838e-06, "loss": 0.6537, "step": 4932 }, { "epoch": 0.63, "grad_norm": 0.8193347925799067, "learning_rate": 3.2036985604043246e-06, "loss": 0.5612, "step": 4933 }, { "epoch": 0.63, "grad_norm": 0.8387643501603282, "learning_rate": 3.201773157276232e-06, "loss": 0.5935, "step": 4934 }, { "epoch": 0.63, "grad_norm": 0.8157075502071942, "learning_rate": 3.1998480603674367e-06, "loss": 0.562, "step": 4935 }, { "epoch": 0.63, "grad_norm": 0.8148379058600491, "learning_rate": 3.1979232700057635e-06, "loss": 0.7118, "step": 4936 }, { "epoch": 0.63, "grad_norm": 0.6905417656518047, "learning_rate": 3.195998786518983e-06, "loss": 0.5101, "step": 4937 }, { "epoch": 0.63, "grad_norm": 0.6652861432019276, "learning_rate": 3.1940746102348154e-06, "loss": 0.575, "step": 4938 }, { "epoch": 0.63, "grad_norm": 0.723374555182776, "learning_rate": 3.192150741480927e-06, "loss": 0.5252, "step": 4939 }, { "epoch": 0.63, "grad_norm": 0.8947954162078751, "learning_rate": 3.1902271805849333e-06, "loss": 0.6618, "step": 4940 }, { "epoch": 0.63, "grad_norm": 0.6768322076991062, "learning_rate": 3.188303927874397e-06, "loss": 0.5688, "step": 4941 }, { "epoch": 0.63, "grad_norm": 0.9061095380882681, "learning_rate": 3.186380983676828e-06, "loss": 0.6529, "step": 4942 }, { "epoch": 0.63, "grad_norm": 0.8159919985696781, "learning_rate": 3.1844583483196832e-06, "loss": 0.6602, "step": 4943 }, { "epoch": 0.63, "grad_norm": 0.8603149980940482, "learning_rate": 3.182536022130368e-06, "loss": 0.6139, "step": 4944 }, { "epoch": 0.63, "grad_norm": 0.7509759478324181, "learning_rate": 3.1806140054362334e-06, "loss": 0.5302, "step": 4945 }, { "epoch": 0.63, "grad_norm": 0.665792640991831, "learning_rate": 3.1786922985645796e-06, "loss": 0.547, "step": 4946 }, { "epoch": 0.63, "grad_norm": 0.7434816450208102, "learning_rate": 3.176770901842653e-06, "loss": 0.5891, "step": 4947 }, { "epoch": 0.63, "grad_norm": 0.7910232360326743, "learning_rate": 3.1748498155976483e-06, "loss": 0.5843, "step": 4948 }, { "epoch": 0.63, "grad_norm": 0.7675773313527777, "learning_rate": 3.172929040156705e-06, "loss": 0.6172, "step": 4949 }, { "epoch": 0.63, "grad_norm": 0.678461169401386, "learning_rate": 3.1710085758469115e-06, "loss": 0.5332, "step": 4950 }, { "epoch": 0.63, "grad_norm": 0.7168108556638194, "learning_rate": 3.169088422995304e-06, "loss": 0.6055, "step": 4951 }, { "epoch": 0.63, "grad_norm": 0.8828118134190684, "learning_rate": 3.1671685819288633e-06, "loss": 0.7033, "step": 4952 }, { "epoch": 0.63, "grad_norm": 0.9027804332632563, "learning_rate": 3.1652490529745184e-06, "loss": 0.6137, "step": 4953 }, { "epoch": 0.63, "grad_norm": 0.7898009879510415, "learning_rate": 3.1633298364591457e-06, "loss": 0.6138, "step": 4954 }, { "epoch": 0.63, "grad_norm": 0.8787990749261377, "learning_rate": 3.1614109327095677e-06, "loss": 0.6449, "step": 4955 }, { "epoch": 0.63, "grad_norm": 0.8364346039271051, "learning_rate": 3.159492342052554e-06, "loss": 0.6083, "step": 4956 }, { "epoch": 0.63, "grad_norm": 0.7947598014173956, "learning_rate": 3.157574064814819e-06, "loss": 0.5677, "step": 4957 }, { "epoch": 0.63, "grad_norm": 0.7103119835086947, "learning_rate": 3.155656101323028e-06, "loss": 0.5601, "step": 4958 }, { "epoch": 0.63, "grad_norm": 0.9016593715419259, "learning_rate": 3.1537384519037873e-06, "loss": 0.6377, "step": 4959 }, { "epoch": 0.63, "grad_norm": 0.7496533370530231, "learning_rate": 3.1518211168836544e-06, "loss": 0.5295, "step": 4960 }, { "epoch": 0.63, "grad_norm": 0.8323363027335362, "learning_rate": 3.1499040965891315e-06, "loss": 0.6071, "step": 4961 }, { "epoch": 0.63, "grad_norm": 0.8202449582037884, "learning_rate": 3.1479873913466676e-06, "loss": 0.6298, "step": 4962 }, { "epoch": 0.63, "grad_norm": 1.249544773907663, "learning_rate": 3.1460710014826555e-06, "loss": 0.6427, "step": 4963 }, { "epoch": 0.63, "grad_norm": 0.8824146843396334, "learning_rate": 3.1441549273234382e-06, "loss": 0.6531, "step": 4964 }, { "epoch": 0.63, "grad_norm": 0.7998270664811491, "learning_rate": 3.1422391691953036e-06, "loss": 0.5805, "step": 4965 }, { "epoch": 0.63, "grad_norm": 0.8811540580169402, "learning_rate": 3.1403237274244834e-06, "loss": 0.6774, "step": 4966 }, { "epoch": 0.63, "grad_norm": 1.0553376679768194, "learning_rate": 3.1384086023371597e-06, "loss": 0.6827, "step": 4967 }, { "epoch": 0.63, "grad_norm": 0.8419507838284247, "learning_rate": 3.136493794259456e-06, "loss": 0.6646, "step": 4968 }, { "epoch": 0.63, "grad_norm": 0.8423246887675496, "learning_rate": 3.1345793035174452e-06, "loss": 0.6069, "step": 4969 }, { "epoch": 0.63, "grad_norm": 0.8934151390438726, "learning_rate": 3.1326651304371457e-06, "loss": 0.5994, "step": 4970 }, { "epoch": 0.63, "grad_norm": 0.7114782230046063, "learning_rate": 3.1307512753445206e-06, "loss": 0.4949, "step": 4971 }, { "epoch": 0.63, "grad_norm": 0.7161345759540172, "learning_rate": 3.128837738565479e-06, "loss": 0.5715, "step": 4972 }, { "epoch": 0.63, "grad_norm": 0.8830345859693479, "learning_rate": 3.126924520425877e-06, "loss": 0.6008, "step": 4973 }, { "epoch": 0.63, "grad_norm": 0.8658218617705198, "learning_rate": 3.1250116212515157e-06, "loss": 0.5512, "step": 4974 }, { "epoch": 0.63, "grad_norm": 0.6528126243296772, "learning_rate": 3.123099041368143e-06, "loss": 0.5359, "step": 4975 }, { "epoch": 0.63, "grad_norm": 0.7308498210634851, "learning_rate": 3.1211867811014492e-06, "loss": 0.5417, "step": 4976 }, { "epoch": 0.63, "grad_norm": 0.6643036815370214, "learning_rate": 3.1192748407770733e-06, "loss": 0.5519, "step": 4977 }, { "epoch": 0.63, "grad_norm": 0.6806872322796154, "learning_rate": 3.117363220720599e-06, "loss": 0.5, "step": 4978 }, { "epoch": 0.63, "grad_norm": 1.4402475571102153, "learning_rate": 3.1154519212575552e-06, "loss": 0.6614, "step": 4979 }, { "epoch": 0.63, "grad_norm": 0.7795024490442468, "learning_rate": 3.1135409427134156e-06, "loss": 0.5519, "step": 4980 }, { "epoch": 0.63, "grad_norm": 0.7548319278432761, "learning_rate": 3.1116302854136003e-06, "loss": 0.5551, "step": 4981 }, { "epoch": 0.63, "grad_norm": 0.8129771989783109, "learning_rate": 3.109719949683475e-06, "loss": 0.6457, "step": 4982 }, { "epoch": 0.63, "grad_norm": 0.7920356526417051, "learning_rate": 3.1078099358483495e-06, "loss": 0.5976, "step": 4983 }, { "epoch": 0.63, "grad_norm": 0.7644943534496127, "learning_rate": 3.105900244233479e-06, "loss": 0.5793, "step": 4984 }, { "epoch": 0.64, "grad_norm": 0.8842847966267777, "learning_rate": 3.1039908751640634e-06, "loss": 0.6217, "step": 4985 }, { "epoch": 0.64, "grad_norm": 0.6164995935996114, "learning_rate": 3.1020818289652504e-06, "loss": 0.4674, "step": 4986 }, { "epoch": 0.64, "grad_norm": 0.6714869291699325, "learning_rate": 3.10017310596213e-06, "loss": 0.5495, "step": 4987 }, { "epoch": 0.64, "grad_norm": 0.6814550791435244, "learning_rate": 3.0982647064797367e-06, "loss": 0.5597, "step": 4988 }, { "epoch": 0.64, "grad_norm": 0.7395183397217501, "learning_rate": 3.0963566308430516e-06, "loss": 0.6085, "step": 4989 }, { "epoch": 0.64, "grad_norm": 0.8114143907821454, "learning_rate": 3.0944488793770007e-06, "loss": 0.5521, "step": 4990 }, { "epoch": 0.64, "grad_norm": 0.8431355899480643, "learning_rate": 3.0925414524064535e-06, "loss": 0.5513, "step": 4991 }, { "epoch": 0.64, "grad_norm": 0.8476631471221191, "learning_rate": 3.0906343502562253e-06, "loss": 0.6426, "step": 4992 }, { "epoch": 0.64, "grad_norm": 0.7735019108505419, "learning_rate": 3.088727573251076e-06, "loss": 0.6104, "step": 4993 }, { "epoch": 0.64, "grad_norm": 0.7070841213477257, "learning_rate": 3.0868211217157086e-06, "loss": 0.5235, "step": 4994 }, { "epoch": 0.64, "grad_norm": 0.7606065290230307, "learning_rate": 3.0849149959747737e-06, "loss": 0.5724, "step": 4995 }, { "epoch": 0.64, "grad_norm": 0.6398751491300726, "learning_rate": 3.083009196352863e-06, "loss": 0.5391, "step": 4996 }, { "epoch": 0.64, "grad_norm": 0.8130704705420126, "learning_rate": 3.0811037231745156e-06, "loss": 0.5633, "step": 4997 }, { "epoch": 0.64, "grad_norm": 1.0557818325543082, "learning_rate": 3.0791985767642135e-06, "loss": 0.6917, "step": 4998 }, { "epoch": 0.64, "grad_norm": 0.745421372588127, "learning_rate": 3.077293757446381e-06, "loss": 0.5791, "step": 4999 }, { "epoch": 0.64, "grad_norm": 0.7845346987460242, "learning_rate": 3.0753892655453932e-06, "loss": 0.5146, "step": 5000 }, { "epoch": 0.64, "grad_norm": 0.8042884766722785, "learning_rate": 3.0734851013855626e-06, "loss": 0.5469, "step": 5001 }, { "epoch": 0.64, "grad_norm": 1.066151029808255, "learning_rate": 3.0715812652911486e-06, "loss": 0.6093, "step": 5002 }, { "epoch": 0.64, "grad_norm": 0.7760575017502817, "learning_rate": 3.0696777575863557e-06, "loss": 0.5855, "step": 5003 }, { "epoch": 0.64, "grad_norm": 0.6491191009117421, "learning_rate": 3.06777457859533e-06, "loss": 0.5024, "step": 5004 }, { "epoch": 0.64, "grad_norm": 0.8394753303812418, "learning_rate": 3.065871728642164e-06, "loss": 0.6361, "step": 5005 }, { "epoch": 0.64, "grad_norm": 0.9293200388729286, "learning_rate": 3.063969208050893e-06, "loss": 0.6418, "step": 5006 }, { "epoch": 0.64, "grad_norm": 0.8738802466288204, "learning_rate": 3.062067017145497e-06, "loss": 0.5916, "step": 5007 }, { "epoch": 0.64, "grad_norm": 0.7516143034017935, "learning_rate": 3.0601651562498978e-06, "loss": 0.5158, "step": 5008 }, { "epoch": 0.64, "grad_norm": 0.7991567658735562, "learning_rate": 3.058263625687964e-06, "loss": 0.6048, "step": 5009 }, { "epoch": 0.64, "grad_norm": 0.8175059542807093, "learning_rate": 3.0563624257835055e-06, "loss": 0.5799, "step": 5010 }, { "epoch": 0.64, "grad_norm": 0.7887745475538136, "learning_rate": 3.0544615568602776e-06, "loss": 0.6553, "step": 5011 }, { "epoch": 0.64, "grad_norm": 0.6725125103204541, "learning_rate": 3.0525610192419774e-06, "loss": 0.5309, "step": 5012 }, { "epoch": 0.64, "grad_norm": 0.8771590516858645, "learning_rate": 3.0506608132522476e-06, "loss": 0.625, "step": 5013 }, { "epoch": 0.64, "grad_norm": 0.9509894144134095, "learning_rate": 3.048760939214673e-06, "loss": 0.5932, "step": 5014 }, { "epoch": 0.64, "grad_norm": 1.1715137299169884, "learning_rate": 3.046861397452784e-06, "loss": 0.6358, "step": 5015 }, { "epoch": 0.64, "grad_norm": 0.8515079900548325, "learning_rate": 3.0449621882900505e-06, "loss": 0.6258, "step": 5016 }, { "epoch": 0.64, "grad_norm": 0.7106745518456697, "learning_rate": 3.0430633120498886e-06, "loss": 0.5917, "step": 5017 }, { "epoch": 0.64, "grad_norm": 0.9922685448593384, "learning_rate": 3.041164769055658e-06, "loss": 0.6985, "step": 5018 }, { "epoch": 0.64, "grad_norm": 0.8122359582056352, "learning_rate": 3.03926655963066e-06, "loss": 0.5868, "step": 5019 }, { "epoch": 0.64, "grad_norm": 0.7345459321467698, "learning_rate": 3.0373686840981396e-06, "loss": 0.5774, "step": 5020 }, { "epoch": 0.64, "grad_norm": 0.8952754988396652, "learning_rate": 3.035471142781287e-06, "loss": 0.6047, "step": 5021 }, { "epoch": 0.64, "grad_norm": 0.8278630861754526, "learning_rate": 3.0335739360032314e-06, "loss": 0.601, "step": 5022 }, { "epoch": 0.64, "grad_norm": 0.6433925822755447, "learning_rate": 3.0316770640870487e-06, "loss": 0.5083, "step": 5023 }, { "epoch": 0.64, "grad_norm": 0.6709492167969169, "learning_rate": 3.0297805273557563e-06, "loss": 0.5209, "step": 5024 }, { "epoch": 0.64, "grad_norm": 0.6971545777564855, "learning_rate": 3.0278843261323133e-06, "loss": 0.5183, "step": 5025 }, { "epoch": 0.64, "grad_norm": 1.0938457062993858, "learning_rate": 3.025988460739625e-06, "loss": 0.6174, "step": 5026 }, { "epoch": 0.64, "grad_norm": 0.8599379391062648, "learning_rate": 3.0240929315005375e-06, "loss": 0.6501, "step": 5027 }, { "epoch": 0.64, "grad_norm": 0.690843161365665, "learning_rate": 3.0221977387378386e-06, "loss": 0.5162, "step": 5028 }, { "epoch": 0.64, "grad_norm": 1.0328366809191933, "learning_rate": 3.02030288277426e-06, "loss": 0.6362, "step": 5029 }, { "epoch": 0.64, "grad_norm": 0.7278862276601206, "learning_rate": 3.018408363932476e-06, "loss": 0.5708, "step": 5030 }, { "epoch": 0.64, "grad_norm": 0.7966026810228043, "learning_rate": 3.016514182535104e-06, "loss": 0.5437, "step": 5031 }, { "epoch": 0.64, "grad_norm": 0.8116526771916039, "learning_rate": 3.0146203389047025e-06, "loss": 0.6232, "step": 5032 }, { "epoch": 0.64, "grad_norm": 0.6372993699838145, "learning_rate": 3.0127268333637738e-06, "loss": 0.5224, "step": 5033 }, { "epoch": 0.64, "grad_norm": 0.749541015071615, "learning_rate": 3.010833666234762e-06, "loss": 0.6246, "step": 5034 }, { "epoch": 0.64, "grad_norm": 0.7418348963897718, "learning_rate": 3.008940837840054e-06, "loss": 0.6061, "step": 5035 }, { "epoch": 0.64, "grad_norm": 0.657103502277192, "learning_rate": 3.0070483485019786e-06, "loss": 0.5691, "step": 5036 }, { "epoch": 0.64, "grad_norm": 0.6423930074719663, "learning_rate": 3.0051561985428073e-06, "loss": 0.5671, "step": 5037 }, { "epoch": 0.64, "grad_norm": 0.7647084416249336, "learning_rate": 3.0032643882847523e-06, "loss": 0.5772, "step": 5038 }, { "epoch": 0.64, "grad_norm": 0.8130263159828129, "learning_rate": 3.0013729180499707e-06, "loss": 0.6369, "step": 5039 }, { "epoch": 0.64, "grad_norm": 0.7939018654042358, "learning_rate": 2.9994817881605596e-06, "loss": 0.5443, "step": 5040 }, { "epoch": 0.64, "grad_norm": 0.7839391237657964, "learning_rate": 2.9975909989385587e-06, "loss": 0.5705, "step": 5041 }, { "epoch": 0.64, "grad_norm": 0.6741102622215539, "learning_rate": 2.99570055070595e-06, "loss": 0.528, "step": 5042 }, { "epoch": 0.64, "grad_norm": 0.8343447808665908, "learning_rate": 2.993810443784657e-06, "loss": 0.5974, "step": 5043 }, { "epoch": 0.64, "grad_norm": 0.7152191321668764, "learning_rate": 2.991920678496545e-06, "loss": 0.5714, "step": 5044 }, { "epoch": 0.64, "grad_norm": 0.6686741233385336, "learning_rate": 2.9900312551634216e-06, "loss": 0.5167, "step": 5045 }, { "epoch": 0.64, "grad_norm": 0.7503890990194079, "learning_rate": 2.9881421741070354e-06, "loss": 0.5904, "step": 5046 }, { "epoch": 0.64, "grad_norm": 0.8002828364337375, "learning_rate": 2.986253435649078e-06, "loss": 0.5152, "step": 5047 }, { "epoch": 0.64, "grad_norm": 0.8562790460800754, "learning_rate": 2.9843650401111807e-06, "loss": 0.6189, "step": 5048 }, { "epoch": 0.64, "grad_norm": 0.7761964296400664, "learning_rate": 2.9824769878149184e-06, "loss": 0.5531, "step": 5049 }, { "epoch": 0.64, "grad_norm": 0.6825114718682199, "learning_rate": 2.9805892790818074e-06, "loss": 0.5801, "step": 5050 }, { "epoch": 0.64, "grad_norm": 0.8428221778968118, "learning_rate": 2.978701914233303e-06, "loss": 0.6601, "step": 5051 }, { "epoch": 0.64, "grad_norm": 0.9128095881527932, "learning_rate": 2.9768148935908052e-06, "loss": 0.575, "step": 5052 }, { "epoch": 0.64, "grad_norm": 0.6162211458638216, "learning_rate": 2.9749282174756546e-06, "loss": 0.5089, "step": 5053 }, { "epoch": 0.64, "grad_norm": 1.0453328813565408, "learning_rate": 2.97304188620913e-06, "loss": 0.665, "step": 5054 }, { "epoch": 0.64, "grad_norm": 0.9949130244666881, "learning_rate": 2.971155900112457e-06, "loss": 0.613, "step": 5055 }, { "epoch": 0.64, "grad_norm": 0.8479103619003527, "learning_rate": 2.9692702595067975e-06, "loss": 0.5922, "step": 5056 }, { "epoch": 0.64, "grad_norm": 0.8064943202314837, "learning_rate": 2.967384964713256e-06, "loss": 0.5505, "step": 5057 }, { "epoch": 0.64, "grad_norm": 0.7629521567300589, "learning_rate": 2.9655000160528784e-06, "loss": 0.6124, "step": 5058 }, { "epoch": 0.64, "grad_norm": 0.787166679951818, "learning_rate": 2.9636154138466545e-06, "loss": 0.5565, "step": 5059 }, { "epoch": 0.64, "grad_norm": 0.627286840427744, "learning_rate": 2.9617311584155104e-06, "loss": 0.5607, "step": 5060 }, { "epoch": 0.64, "grad_norm": 0.7728065791669109, "learning_rate": 2.9598472500803153e-06, "loss": 0.5735, "step": 5061 }, { "epoch": 0.64, "grad_norm": 0.7847844195127908, "learning_rate": 2.95796368916188e-06, "loss": 0.5311, "step": 5062 }, { "epoch": 0.65, "grad_norm": 0.7905854136442236, "learning_rate": 2.9560804759809547e-06, "loss": 0.6023, "step": 5063 }, { "epoch": 0.65, "grad_norm": 0.767859920261755, "learning_rate": 2.9541976108582303e-06, "loss": 0.5773, "step": 5064 }, { "epoch": 0.65, "grad_norm": 0.8606071803912296, "learning_rate": 2.952315094114341e-06, "loss": 0.5858, "step": 5065 }, { "epoch": 0.65, "grad_norm": 0.7999038678294265, "learning_rate": 2.9504329260698573e-06, "loss": 0.5702, "step": 5066 }, { "epoch": 0.65, "grad_norm": 0.8830145480605472, "learning_rate": 2.9485511070452945e-06, "loss": 0.611, "step": 5067 }, { "epoch": 0.65, "grad_norm": 0.8365157062235715, "learning_rate": 2.9466696373611058e-06, "loss": 0.64, "step": 5068 }, { "epoch": 0.65, "grad_norm": 0.6936025799379858, "learning_rate": 2.9447885173376865e-06, "loss": 0.5583, "step": 5069 }, { "epoch": 0.65, "grad_norm": 0.6403350582941926, "learning_rate": 2.9429077472953715e-06, "loss": 0.4369, "step": 5070 }, { "epoch": 0.65, "grad_norm": 0.7912088043822698, "learning_rate": 2.9410273275544373e-06, "loss": 0.6167, "step": 5071 }, { "epoch": 0.65, "grad_norm": 0.7151672215093456, "learning_rate": 2.939147258435099e-06, "loss": 0.5516, "step": 5072 }, { "epoch": 0.65, "grad_norm": 0.8734463608373647, "learning_rate": 2.9372675402575122e-06, "loss": 0.5185, "step": 5073 }, { "epoch": 0.65, "grad_norm": 0.7575540456026442, "learning_rate": 2.9353881733417745e-06, "loss": 0.5712, "step": 5074 }, { "epoch": 0.65, "grad_norm": 0.8736440726166469, "learning_rate": 2.933509158007921e-06, "loss": 0.6262, "step": 5075 }, { "epoch": 0.65, "grad_norm": 0.7239375898253136, "learning_rate": 2.9316304945759298e-06, "loss": 0.5117, "step": 5076 }, { "epoch": 0.65, "grad_norm": 0.7370688651680586, "learning_rate": 2.9297521833657177e-06, "loss": 0.5242, "step": 5077 }, { "epoch": 0.65, "grad_norm": 0.878141668602577, "learning_rate": 2.9278742246971408e-06, "loss": 0.625, "step": 5078 }, { "epoch": 0.65, "grad_norm": 0.7011608883277275, "learning_rate": 2.9259966188899964e-06, "loss": 0.5304, "step": 5079 }, { "epoch": 0.65, "grad_norm": 0.6966904076748, "learning_rate": 2.9241193662640204e-06, "loss": 0.5495, "step": 5080 }, { "epoch": 0.65, "grad_norm": 0.6417231886938327, "learning_rate": 2.9222424671388907e-06, "loss": 0.511, "step": 5081 }, { "epoch": 0.65, "grad_norm": 0.693462911417472, "learning_rate": 2.9203659218342224e-06, "loss": 0.4913, "step": 5082 }, { "epoch": 0.65, "grad_norm": 0.7900963828228083, "learning_rate": 2.9184897306695714e-06, "loss": 0.5753, "step": 5083 }, { "epoch": 0.65, "grad_norm": 0.8148514879713444, "learning_rate": 2.916613893964435e-06, "loss": 0.5988, "step": 5084 }, { "epoch": 0.65, "grad_norm": 0.7124544774085725, "learning_rate": 2.9147384120382484e-06, "loss": 0.515, "step": 5085 }, { "epoch": 0.65, "grad_norm": 0.6997178029566059, "learning_rate": 2.9128632852103854e-06, "loss": 0.573, "step": 5086 }, { "epoch": 0.65, "grad_norm": 1.1315708795820967, "learning_rate": 2.9109885138001614e-06, "loss": 0.658, "step": 5087 }, { "epoch": 0.65, "grad_norm": 0.9176413053769573, "learning_rate": 2.909114098126831e-06, "loss": 0.6357, "step": 5088 }, { "epoch": 0.65, "grad_norm": 0.8206812379899895, "learning_rate": 2.9072400385095865e-06, "loss": 0.5782, "step": 5089 }, { "epoch": 0.65, "grad_norm": 0.8723181580950474, "learning_rate": 2.9053663352675613e-06, "loss": 0.6193, "step": 5090 }, { "epoch": 0.65, "grad_norm": 0.7064501080282872, "learning_rate": 2.9034929887198274e-06, "loss": 0.5065, "step": 5091 }, { "epoch": 0.65, "grad_norm": 0.8481158192491489, "learning_rate": 2.9016199991853966e-06, "loss": 0.6742, "step": 5092 }, { "epoch": 0.65, "grad_norm": 1.552527435384719, "learning_rate": 2.899747366983219e-06, "loss": 0.6098, "step": 5093 }, { "epoch": 0.65, "grad_norm": 0.6385767356469423, "learning_rate": 2.8978750924321843e-06, "loss": 0.6009, "step": 5094 }, { "epoch": 0.65, "grad_norm": 0.7869850340271355, "learning_rate": 2.896003175851122e-06, "loss": 0.6265, "step": 5095 }, { "epoch": 0.65, "grad_norm": 0.6742842764932138, "learning_rate": 2.8941316175587993e-06, "loss": 0.4911, "step": 5096 }, { "epoch": 0.65, "grad_norm": 0.7678884596916175, "learning_rate": 2.8922604178739234e-06, "loss": 0.6269, "step": 5097 }, { "epoch": 0.65, "grad_norm": 0.8435298422656635, "learning_rate": 2.8903895771151402e-06, "loss": 0.6004, "step": 5098 }, { "epoch": 0.65, "grad_norm": 0.9192396522506059, "learning_rate": 2.8885190956010333e-06, "loss": 0.6282, "step": 5099 }, { "epoch": 0.65, "grad_norm": 0.9091860817951859, "learning_rate": 2.8866489736501284e-06, "loss": 0.6585, "step": 5100 }, { "epoch": 0.65, "grad_norm": 0.7386786305836628, "learning_rate": 2.884779211580886e-06, "loss": 0.5952, "step": 5101 }, { "epoch": 0.65, "grad_norm": 0.9187346348948497, "learning_rate": 2.8829098097117083e-06, "loss": 0.6652, "step": 5102 }, { "epoch": 0.65, "grad_norm": 1.4714466866827551, "learning_rate": 2.8810407683609314e-06, "loss": 0.6128, "step": 5103 }, { "epoch": 0.65, "grad_norm": 0.659600892770533, "learning_rate": 2.879172087846839e-06, "loss": 0.5762, "step": 5104 }, { "epoch": 0.65, "grad_norm": 0.8581151843891774, "learning_rate": 2.8773037684876454e-06, "loss": 0.5678, "step": 5105 }, { "epoch": 0.65, "grad_norm": 0.8545377593903204, "learning_rate": 2.8754358106015057e-06, "loss": 0.6054, "step": 5106 }, { "epoch": 0.65, "grad_norm": 0.6498541467492196, "learning_rate": 2.8735682145065143e-06, "loss": 0.5229, "step": 5107 }, { "epoch": 0.65, "grad_norm": 0.6756064156040719, "learning_rate": 2.8717009805207026e-06, "loss": 0.5367, "step": 5108 }, { "epoch": 0.65, "grad_norm": 0.6937353110813358, "learning_rate": 2.869834108962042e-06, "loss": 0.5493, "step": 5109 }, { "epoch": 0.65, "grad_norm": 0.7869415419065402, "learning_rate": 2.8679676001484397e-06, "loss": 0.6606, "step": 5110 }, { "epoch": 0.65, "grad_norm": 0.6309322839766587, "learning_rate": 2.8661014543977446e-06, "loss": 0.5132, "step": 5111 }, { "epoch": 0.65, "grad_norm": 0.6402842922667611, "learning_rate": 2.864235672027741e-06, "loss": 0.5028, "step": 5112 }, { "epoch": 0.65, "grad_norm": 0.8171540989138597, "learning_rate": 2.8623702533561514e-06, "loss": 0.6219, "step": 5113 }, { "epoch": 0.65, "grad_norm": 0.6338841548721944, "learning_rate": 2.860505198700639e-06, "loss": 0.5244, "step": 5114 }, { "epoch": 0.65, "grad_norm": 0.8542887720948676, "learning_rate": 2.8586405083788016e-06, "loss": 0.6742, "step": 5115 }, { "epoch": 0.65, "grad_norm": 0.7872656446119819, "learning_rate": 2.8567761827081777e-06, "loss": 0.5628, "step": 5116 }, { "epoch": 0.65, "grad_norm": 0.6402900019101818, "learning_rate": 2.854912222006241e-06, "loss": 0.5048, "step": 5117 }, { "epoch": 0.65, "grad_norm": 0.8761378112092568, "learning_rate": 2.8530486265904057e-06, "loss": 0.6786, "step": 5118 }, { "epoch": 0.65, "grad_norm": 1.1071052112602673, "learning_rate": 2.8511853967780234e-06, "loss": 0.6572, "step": 5119 }, { "epoch": 0.65, "grad_norm": 0.7029030689748211, "learning_rate": 2.849322532886381e-06, "loss": 0.5525, "step": 5120 }, { "epoch": 0.65, "grad_norm": 0.9909517105724016, "learning_rate": 2.847460035232706e-06, "loss": 0.5844, "step": 5121 }, { "epoch": 0.65, "grad_norm": 0.9442490804991749, "learning_rate": 2.845597904134162e-06, "loss": 0.6525, "step": 5122 }, { "epoch": 0.65, "grad_norm": 0.6494560441160583, "learning_rate": 2.8437361399078502e-06, "loss": 0.5463, "step": 5123 }, { "epoch": 0.65, "grad_norm": 0.6197108843004744, "learning_rate": 2.8418747428708103e-06, "loss": 0.491, "step": 5124 }, { "epoch": 0.65, "grad_norm": 0.6602929038792663, "learning_rate": 2.8400137133400185e-06, "loss": 0.5374, "step": 5125 }, { "epoch": 0.65, "grad_norm": 0.7969911576003924, "learning_rate": 2.8381530516323887e-06, "loss": 0.6665, "step": 5126 }, { "epoch": 0.65, "grad_norm": 0.7634927541488229, "learning_rate": 2.836292758064772e-06, "loss": 0.5918, "step": 5127 }, { "epoch": 0.65, "grad_norm": 0.8797409090262036, "learning_rate": 2.8344328329539573e-06, "loss": 0.5925, "step": 5128 }, { "epoch": 0.65, "grad_norm": 0.819838885097571, "learning_rate": 2.832573276616669e-06, "loss": 0.6143, "step": 5129 }, { "epoch": 0.65, "grad_norm": 0.8976985230923279, "learning_rate": 2.8307140893695728e-06, "loss": 0.5931, "step": 5130 }, { "epoch": 0.65, "grad_norm": 0.887058680958711, "learning_rate": 2.8288552715292683e-06, "loss": 0.5915, "step": 5131 }, { "epoch": 0.65, "grad_norm": 0.6930251114140492, "learning_rate": 2.8269968234122922e-06, "loss": 0.5194, "step": 5132 }, { "epoch": 0.65, "grad_norm": 0.8679975696069272, "learning_rate": 2.8251387453351185e-06, "loss": 0.6218, "step": 5133 }, { "epoch": 0.65, "grad_norm": 0.6601588682863456, "learning_rate": 2.8232810376141593e-06, "loss": 0.5032, "step": 5134 }, { "epoch": 0.65, "grad_norm": 0.8106950176466404, "learning_rate": 2.821423700565763e-06, "loss": 0.6254, "step": 5135 }, { "epoch": 0.65, "grad_norm": 0.721201954560906, "learning_rate": 2.8195667345062135e-06, "loss": 0.525, "step": 5136 }, { "epoch": 0.65, "grad_norm": 0.6640771490332805, "learning_rate": 2.817710139751734e-06, "loss": 0.5378, "step": 5137 }, { "epoch": 0.65, "grad_norm": 0.9350091056659592, "learning_rate": 2.8158539166184816e-06, "loss": 0.6324, "step": 5138 }, { "epoch": 0.65, "grad_norm": 1.2512755449795776, "learning_rate": 2.813998065422554e-06, "loss": 0.6373, "step": 5139 }, { "epoch": 0.65, "grad_norm": 0.73420194955367, "learning_rate": 2.8121425864799825e-06, "loss": 0.5597, "step": 5140 }, { "epoch": 0.65, "grad_norm": 0.7815903484537784, "learning_rate": 2.810287480106735e-06, "loss": 0.6386, "step": 5141 }, { "epoch": 0.66, "grad_norm": 0.7277170027847618, "learning_rate": 2.8084327466187174e-06, "loss": 0.5702, "step": 5142 }, { "epoch": 0.66, "grad_norm": 0.9241973326162606, "learning_rate": 2.8065783863317715e-06, "loss": 0.6356, "step": 5143 }, { "epoch": 0.66, "grad_norm": 0.7116264821011563, "learning_rate": 2.8047243995616757e-06, "loss": 0.5791, "step": 5144 }, { "epoch": 0.66, "grad_norm": 0.7644541838097464, "learning_rate": 2.8028707866241443e-06, "loss": 0.5706, "step": 5145 }, { "epoch": 0.66, "grad_norm": 0.8130506833695966, "learning_rate": 2.8010175478348282e-06, "loss": 0.5468, "step": 5146 }, { "epoch": 0.66, "grad_norm": 0.6912794421788537, "learning_rate": 2.7991646835093155e-06, "loss": 0.5142, "step": 5147 }, { "epoch": 0.66, "grad_norm": 0.7977654033615409, "learning_rate": 2.7973121939631285e-06, "loss": 0.5409, "step": 5148 }, { "epoch": 0.66, "grad_norm": 0.8529239898813732, "learning_rate": 2.7954600795117276e-06, "loss": 0.6337, "step": 5149 }, { "epoch": 0.66, "grad_norm": 0.822981421300624, "learning_rate": 2.793608340470508e-06, "loss": 0.6254, "step": 5150 }, { "epoch": 0.66, "grad_norm": 0.7289860090207538, "learning_rate": 2.7917569771548027e-06, "loss": 0.5408, "step": 5151 }, { "epoch": 0.66, "grad_norm": 0.856365915073162, "learning_rate": 2.7899059898798785e-06, "loss": 0.6415, "step": 5152 }, { "epoch": 0.66, "grad_norm": 0.6612251014781776, "learning_rate": 2.78805537896094e-06, "loss": 0.5362, "step": 5153 }, { "epoch": 0.66, "grad_norm": 1.0021774016020024, "learning_rate": 2.786205144713126e-06, "loss": 0.6744, "step": 5154 }, { "epoch": 0.66, "grad_norm": 0.68576567882279, "learning_rate": 2.7843552874515135e-06, "loss": 0.5536, "step": 5155 }, { "epoch": 0.66, "grad_norm": 0.9577772696794191, "learning_rate": 2.7825058074911106e-06, "loss": 0.5977, "step": 5156 }, { "epoch": 0.66, "grad_norm": 0.6508401860666763, "learning_rate": 2.780656705146869e-06, "loss": 0.4906, "step": 5157 }, { "epoch": 0.66, "grad_norm": 0.8000308020498672, "learning_rate": 2.778807980733669e-06, "loss": 0.6598, "step": 5158 }, { "epoch": 0.66, "grad_norm": 0.8554737868033759, "learning_rate": 2.7769596345663306e-06, "loss": 0.6263, "step": 5159 }, { "epoch": 0.66, "grad_norm": 0.8441062403901539, "learning_rate": 2.7751116669596067e-06, "loss": 0.6486, "step": 5160 }, { "epoch": 0.66, "grad_norm": 0.8629860566627963, "learning_rate": 2.773264078228185e-06, "loss": 0.6245, "step": 5161 }, { "epoch": 0.66, "grad_norm": 0.7806912403688684, "learning_rate": 2.7714168686866947e-06, "loss": 0.6154, "step": 5162 }, { "epoch": 0.66, "grad_norm": 0.8886078858809823, "learning_rate": 2.7695700386496947e-06, "loss": 0.61, "step": 5163 }, { "epoch": 0.66, "grad_norm": 1.007021275010515, "learning_rate": 2.7677235884316812e-06, "loss": 0.5861, "step": 5164 }, { "epoch": 0.66, "grad_norm": 0.6877992839372186, "learning_rate": 2.7658775183470845e-06, "loss": 0.528, "step": 5165 }, { "epoch": 0.66, "grad_norm": 0.6803157516403499, "learning_rate": 2.7640318287102714e-06, "loss": 0.53, "step": 5166 }, { "epoch": 0.66, "grad_norm": 0.6897655848117805, "learning_rate": 2.7621865198355437e-06, "loss": 0.5049, "step": 5167 }, { "epoch": 0.66, "grad_norm": 0.9584201558024223, "learning_rate": 2.760341592037138e-06, "loss": 0.611, "step": 5168 }, { "epoch": 0.66, "grad_norm": 0.7005281413971542, "learning_rate": 2.758497045629227e-06, "loss": 0.5854, "step": 5169 }, { "epoch": 0.66, "grad_norm": 0.8207485264357269, "learning_rate": 2.7566528809259175e-06, "loss": 0.6173, "step": 5170 }, { "epoch": 0.66, "grad_norm": 0.7866032594797326, "learning_rate": 2.7548090982412507e-06, "loss": 0.6176, "step": 5171 }, { "epoch": 0.66, "grad_norm": 0.8424207618641728, "learning_rate": 2.752965697889205e-06, "loss": 0.5814, "step": 5172 }, { "epoch": 0.66, "grad_norm": 0.6853146914536142, "learning_rate": 2.751122680183691e-06, "loss": 0.5239, "step": 5173 }, { "epoch": 0.66, "grad_norm": 0.7500586682933541, "learning_rate": 2.749280045438556e-06, "loss": 0.5538, "step": 5174 }, { "epoch": 0.66, "grad_norm": 0.6663553236807322, "learning_rate": 2.7474377939675813e-06, "loss": 0.5339, "step": 5175 }, { "epoch": 0.66, "grad_norm": 0.7242520750398093, "learning_rate": 2.7455959260844833e-06, "loss": 0.5136, "step": 5176 }, { "epoch": 0.66, "grad_norm": 0.8178106857860368, "learning_rate": 2.7437544421029132e-06, "loss": 0.5971, "step": 5177 }, { "epoch": 0.66, "grad_norm": 0.902020897353715, "learning_rate": 2.741913342336456e-06, "loss": 0.6598, "step": 5178 }, { "epoch": 0.66, "grad_norm": 0.9053732133687181, "learning_rate": 2.740072627098632e-06, "loss": 0.6298, "step": 5179 }, { "epoch": 0.66, "grad_norm": 0.680032971338818, "learning_rate": 2.738232296702896e-06, "loss": 0.5297, "step": 5180 }, { "epoch": 0.66, "grad_norm": 0.8043774415528638, "learning_rate": 2.7363923514626368e-06, "loss": 0.5773, "step": 5181 }, { "epoch": 0.66, "grad_norm": 0.6672367262272082, "learning_rate": 2.7345527916911785e-06, "loss": 0.5455, "step": 5182 }, { "epoch": 0.66, "grad_norm": 0.7299318295239927, "learning_rate": 2.7327136177017777e-06, "loss": 0.5322, "step": 5183 }, { "epoch": 0.66, "grad_norm": 0.7621754528774594, "learning_rate": 2.730874829807628e-06, "loss": 0.5954, "step": 5184 }, { "epoch": 0.66, "grad_norm": 0.8544827069777828, "learning_rate": 2.729036428321855e-06, "loss": 0.6621, "step": 5185 }, { "epoch": 0.66, "grad_norm": 0.6929800893447793, "learning_rate": 2.727198413557519e-06, "loss": 0.509, "step": 5186 }, { "epoch": 0.66, "grad_norm": 0.8062805436810909, "learning_rate": 2.7253607858276166e-06, "loss": 0.5968, "step": 5187 }, { "epoch": 0.66, "grad_norm": 0.7393975822423453, "learning_rate": 2.7235235454450726e-06, "loss": 0.5816, "step": 5188 }, { "epoch": 0.66, "grad_norm": 0.6804203910252329, "learning_rate": 2.7216866927227547e-06, "loss": 0.4719, "step": 5189 }, { "epoch": 0.66, "grad_norm": 0.7466036692471355, "learning_rate": 2.7198502279734572e-06, "loss": 0.5636, "step": 5190 }, { "epoch": 0.66, "grad_norm": 0.755967473355392, "learning_rate": 2.7180141515099124e-06, "loss": 0.6153, "step": 5191 }, { "epoch": 0.66, "grad_norm": 0.7249784899300633, "learning_rate": 2.716178463644783e-06, "loss": 0.5196, "step": 5192 }, { "epoch": 0.66, "grad_norm": 0.6896874198975248, "learning_rate": 2.7143431646906693e-06, "loss": 0.5267, "step": 5193 }, { "epoch": 0.66, "grad_norm": 0.6242336184929248, "learning_rate": 2.712508254960103e-06, "loss": 0.5061, "step": 5194 }, { "epoch": 0.66, "grad_norm": 0.8642473176780114, "learning_rate": 2.71067373476555e-06, "loss": 0.6466, "step": 5195 }, { "epoch": 0.66, "grad_norm": 0.7998894403126326, "learning_rate": 2.7088396044194087e-06, "loss": 0.5867, "step": 5196 }, { "epoch": 0.66, "grad_norm": 0.8542562645589561, "learning_rate": 2.7070058642340146e-06, "loss": 0.645, "step": 5197 }, { "epoch": 0.66, "grad_norm": 0.8944803801071172, "learning_rate": 2.705172514521634e-06, "loss": 0.578, "step": 5198 }, { "epoch": 0.66, "grad_norm": 0.65550469517289, "learning_rate": 2.7033395555944664e-06, "loss": 0.5166, "step": 5199 }, { "epoch": 0.66, "grad_norm": 0.7627583236404839, "learning_rate": 2.7015069877646462e-06, "loss": 0.5435, "step": 5200 }, { "epoch": 0.66, "grad_norm": 0.9197477231974598, "learning_rate": 2.6996748113442397e-06, "loss": 0.6493, "step": 5201 }, { "epoch": 0.66, "grad_norm": 0.7925732192573283, "learning_rate": 2.697843026645249e-06, "loss": 0.4922, "step": 5202 }, { "epoch": 0.66, "grad_norm": 0.7236804362081727, "learning_rate": 2.696011633979607e-06, "loss": 0.5287, "step": 5203 }, { "epoch": 0.66, "grad_norm": 0.8518712947721799, "learning_rate": 2.694180633659181e-06, "loss": 0.5865, "step": 5204 }, { "epoch": 0.66, "grad_norm": 0.8176553594601641, "learning_rate": 2.692350025995771e-06, "loss": 0.5766, "step": 5205 }, { "epoch": 0.66, "grad_norm": 0.7266282703374228, "learning_rate": 2.6905198113011103e-06, "loss": 0.5066, "step": 5206 }, { "epoch": 0.66, "grad_norm": 0.7695846170219701, "learning_rate": 2.688689989886867e-06, "loss": 0.6189, "step": 5207 }, { "epoch": 0.66, "grad_norm": 0.7964515495984981, "learning_rate": 2.6868605620646383e-06, "loss": 0.5802, "step": 5208 }, { "epoch": 0.66, "grad_norm": 0.8680629696218501, "learning_rate": 2.6850315281459583e-06, "loss": 0.6504, "step": 5209 }, { "epoch": 0.66, "grad_norm": 0.7167740949045319, "learning_rate": 2.6832028884422913e-06, "loss": 0.5239, "step": 5210 }, { "epoch": 0.66, "grad_norm": 0.7818288681704455, "learning_rate": 2.681374643265037e-06, "loss": 0.5317, "step": 5211 }, { "epoch": 0.66, "grad_norm": 0.9651876787100336, "learning_rate": 2.6795467929255248e-06, "loss": 0.6323, "step": 5212 }, { "epoch": 0.66, "grad_norm": 0.8719896724909121, "learning_rate": 2.6777193377350196e-06, "loss": 0.6158, "step": 5213 }, { "epoch": 0.66, "grad_norm": 0.8655860041581656, "learning_rate": 2.6758922780047157e-06, "loss": 0.5818, "step": 5214 }, { "epoch": 0.66, "grad_norm": 0.6499668073058882, "learning_rate": 2.6740656140457478e-06, "loss": 0.512, "step": 5215 }, { "epoch": 0.66, "grad_norm": 0.7013168277868185, "learning_rate": 2.672239346169173e-06, "loss": 0.5509, "step": 5216 }, { "epoch": 0.66, "grad_norm": 1.059345314738029, "learning_rate": 2.6704134746859877e-06, "loss": 0.6556, "step": 5217 }, { "epoch": 0.66, "grad_norm": 0.9008223241379106, "learning_rate": 2.6685879999071184e-06, "loss": 0.6521, "step": 5218 }, { "epoch": 0.66, "grad_norm": 0.6815671362886792, "learning_rate": 2.666762922143424e-06, "loss": 0.5591, "step": 5219 }, { "epoch": 0.67, "grad_norm": 0.9576166895333427, "learning_rate": 2.6649382417056956e-06, "loss": 0.6651, "step": 5220 }, { "epoch": 0.67, "grad_norm": 0.8380433757412757, "learning_rate": 2.6631139589046595e-06, "loss": 0.5987, "step": 5221 }, { "epoch": 0.67, "grad_norm": 0.8832024017823825, "learning_rate": 2.6612900740509702e-06, "loss": 0.6165, "step": 5222 }, { "epoch": 0.67, "grad_norm": 0.7457261250272638, "learning_rate": 2.659466587455217e-06, "loss": 0.6025, "step": 5223 }, { "epoch": 0.67, "grad_norm": 0.8229459147850618, "learning_rate": 2.65764349942792e-06, "loss": 0.6468, "step": 5224 }, { "epoch": 0.67, "grad_norm": 0.7007632226870129, "learning_rate": 2.6558208102795322e-06, "loss": 0.5745, "step": 5225 }, { "epoch": 0.67, "grad_norm": 0.7521041410076824, "learning_rate": 2.65399852032044e-06, "loss": 0.5232, "step": 5226 }, { "epoch": 0.67, "grad_norm": 0.7332808431620648, "learning_rate": 2.6521766298609586e-06, "loss": 0.5684, "step": 5227 }, { "epoch": 0.67, "grad_norm": 0.6820348364164988, "learning_rate": 2.650355139211338e-06, "loss": 0.5252, "step": 5228 }, { "epoch": 0.67, "grad_norm": 0.8973651355313759, "learning_rate": 2.648534048681759e-06, "loss": 0.6312, "step": 5229 }, { "epoch": 0.67, "grad_norm": 0.9593551190644433, "learning_rate": 2.6467133585823334e-06, "loss": 0.6532, "step": 5230 }, { "epoch": 0.67, "grad_norm": 0.8515754673470484, "learning_rate": 2.644893069223107e-06, "loss": 0.6279, "step": 5231 }, { "epoch": 0.67, "grad_norm": 0.8483891426338936, "learning_rate": 2.643073180914055e-06, "loss": 0.5926, "step": 5232 }, { "epoch": 0.67, "grad_norm": 0.6915501624086446, "learning_rate": 2.641253693965087e-06, "loss": 0.5239, "step": 5233 }, { "epoch": 0.67, "grad_norm": 0.747231630935088, "learning_rate": 2.639434608686041e-06, "loss": 0.5496, "step": 5234 }, { "epoch": 0.67, "grad_norm": 0.8164066920757889, "learning_rate": 2.6376159253866888e-06, "loss": 0.6494, "step": 5235 }, { "epoch": 0.67, "grad_norm": 0.8764125422257726, "learning_rate": 2.635797644376734e-06, "loss": 0.6638, "step": 5236 }, { "epoch": 0.67, "grad_norm": 0.7379309562064749, "learning_rate": 2.6339797659658097e-06, "loss": 0.5821, "step": 5237 }, { "epoch": 0.67, "grad_norm": 0.9269195931587623, "learning_rate": 2.6321622904634826e-06, "loss": 0.6451, "step": 5238 }, { "epoch": 0.67, "grad_norm": 0.6678045720233075, "learning_rate": 2.6303452181792486e-06, "loss": 0.5508, "step": 5239 }, { "epoch": 0.67, "grad_norm": 0.9214877741918989, "learning_rate": 2.6285285494225365e-06, "loss": 0.6372, "step": 5240 }, { "epoch": 0.67, "grad_norm": 0.7706855728317539, "learning_rate": 2.6267122845027084e-06, "loss": 0.5649, "step": 5241 }, { "epoch": 0.67, "grad_norm": 0.6460768354717873, "learning_rate": 2.6248964237290526e-06, "loss": 0.4874, "step": 5242 }, { "epoch": 0.67, "grad_norm": 0.662705613300476, "learning_rate": 2.623080967410793e-06, "loss": 0.52, "step": 5243 }, { "epoch": 0.67, "grad_norm": 0.7990598437806741, "learning_rate": 2.621265915857081e-06, "loss": 0.6706, "step": 5244 }, { "epoch": 0.67, "grad_norm": 0.9225338268173324, "learning_rate": 2.619451269377003e-06, "loss": 0.6473, "step": 5245 }, { "epoch": 0.67, "grad_norm": 0.8533217697505727, "learning_rate": 2.617637028279573e-06, "loss": 0.7123, "step": 5246 }, { "epoch": 0.67, "grad_norm": 0.873994190015143, "learning_rate": 2.615823192873738e-06, "loss": 0.6195, "step": 5247 }, { "epoch": 0.67, "grad_norm": 1.0139458032267308, "learning_rate": 2.614009763468375e-06, "loss": 0.6711, "step": 5248 }, { "epoch": 0.67, "grad_norm": 0.809710035291973, "learning_rate": 2.6121967403722924e-06, "loss": 0.5229, "step": 5249 }, { "epoch": 0.67, "grad_norm": 0.6422406968118504, "learning_rate": 2.610384123894229e-06, "loss": 0.5363, "step": 5250 }, { "epoch": 0.67, "grad_norm": 0.6570753282383198, "learning_rate": 2.6085719143428547e-06, "loss": 0.5341, "step": 5251 }, { "epoch": 0.67, "grad_norm": 0.6103822686860066, "learning_rate": 2.6067601120267695e-06, "loss": 0.5225, "step": 5252 }, { "epoch": 0.67, "grad_norm": 0.7695996376033003, "learning_rate": 2.604948717254504e-06, "loss": 0.6001, "step": 5253 }, { "epoch": 0.67, "grad_norm": 0.7139670666644147, "learning_rate": 2.603137730334521e-06, "loss": 0.5494, "step": 5254 }, { "epoch": 0.67, "grad_norm": 0.9943704861211224, "learning_rate": 2.601327151575212e-06, "loss": 0.66, "step": 5255 }, { "epoch": 0.67, "grad_norm": 0.6826620922904162, "learning_rate": 2.5995169812849e-06, "loss": 0.5032, "step": 5256 }, { "epoch": 0.67, "grad_norm": 0.8977199972802585, "learning_rate": 2.5977072197718378e-06, "loss": 0.6674, "step": 5257 }, { "epoch": 0.67, "grad_norm": 0.6665536624720048, "learning_rate": 2.595897867344209e-06, "loss": 0.5717, "step": 5258 }, { "epoch": 0.67, "grad_norm": 0.8001280344675189, "learning_rate": 2.5940889243101288e-06, "loss": 0.5854, "step": 5259 }, { "epoch": 0.67, "grad_norm": 0.6987764600203765, "learning_rate": 2.5922803909776395e-06, "loss": 0.5331, "step": 5260 }, { "epoch": 0.67, "grad_norm": 0.8417843250563803, "learning_rate": 2.590472267654714e-06, "loss": 0.5828, "step": 5261 }, { "epoch": 0.67, "grad_norm": 0.7679209891435838, "learning_rate": 2.588664554649261e-06, "loss": 0.5427, "step": 5262 }, { "epoch": 0.67, "grad_norm": 0.701675621922384, "learning_rate": 2.586857252269114e-06, "loss": 0.5174, "step": 5263 }, { "epoch": 0.67, "grad_norm": 1.1556914796524849, "learning_rate": 2.5850503608220357e-06, "loss": 0.5997, "step": 5264 }, { "epoch": 0.67, "grad_norm": 0.7293964470323818, "learning_rate": 2.5832438806157235e-06, "loss": 0.5521, "step": 5265 }, { "epoch": 0.67, "grad_norm": 0.8560915706260345, "learning_rate": 2.5814378119578e-06, "loss": 0.6487, "step": 5266 }, { "epoch": 0.67, "grad_norm": 0.8777483202910402, "learning_rate": 2.5796321551558207e-06, "loss": 0.5826, "step": 5267 }, { "epoch": 0.67, "grad_norm": 0.711247152771286, "learning_rate": 2.5778269105172704e-06, "loss": 0.5564, "step": 5268 }, { "epoch": 0.67, "grad_norm": 0.866180313828373, "learning_rate": 2.576022078349563e-06, "loss": 0.6673, "step": 5269 }, { "epoch": 0.67, "grad_norm": 1.0275463129549856, "learning_rate": 2.574217658960043e-06, "loss": 0.6882, "step": 5270 }, { "epoch": 0.67, "grad_norm": 0.8071827298527652, "learning_rate": 2.5724136526559844e-06, "loss": 0.5799, "step": 5271 }, { "epoch": 0.67, "grad_norm": 0.7586171614296182, "learning_rate": 2.5706100597445883e-06, "loss": 0.5682, "step": 5272 }, { "epoch": 0.67, "grad_norm": 0.8427413360315162, "learning_rate": 2.5688068805329915e-06, "loss": 0.6191, "step": 5273 }, { "epoch": 0.67, "grad_norm": 0.7337929045199099, "learning_rate": 2.567004115328255e-06, "loss": 0.6035, "step": 5274 }, { "epoch": 0.67, "grad_norm": 0.9438880339452042, "learning_rate": 2.5652017644373706e-06, "loss": 0.5848, "step": 5275 }, { "epoch": 0.67, "grad_norm": 0.9769047683541847, "learning_rate": 2.563399828167261e-06, "loss": 0.6538, "step": 5276 }, { "epoch": 0.67, "grad_norm": 0.8829275409617017, "learning_rate": 2.5615983068247756e-06, "loss": 0.6033, "step": 5277 }, { "epoch": 0.67, "grad_norm": 1.3160186572457457, "learning_rate": 2.5597972007166962e-06, "loss": 0.6373, "step": 5278 }, { "epoch": 0.67, "grad_norm": 0.8258390241947977, "learning_rate": 2.5579965101497306e-06, "loss": 0.6359, "step": 5279 }, { "epoch": 0.67, "grad_norm": 1.0385353606902328, "learning_rate": 2.556196235430519e-06, "loss": 0.6075, "step": 5280 }, { "epoch": 0.67, "grad_norm": 0.7646205983824458, "learning_rate": 2.55439637686563e-06, "loss": 0.541, "step": 5281 }, { "epoch": 0.67, "grad_norm": 0.6097114060467259, "learning_rate": 2.5525969347615597e-06, "loss": 0.5082, "step": 5282 }, { "epoch": 0.67, "grad_norm": 0.8189481212915716, "learning_rate": 2.550797909424734e-06, "loss": 0.6647, "step": 5283 }, { "epoch": 0.67, "grad_norm": 0.7689881245083069, "learning_rate": 2.548999301161509e-06, "loss": 0.5887, "step": 5284 }, { "epoch": 0.67, "grad_norm": 0.9122215422869405, "learning_rate": 2.5472011102781685e-06, "loss": 0.7127, "step": 5285 }, { "epoch": 0.67, "grad_norm": 0.6515502180777499, "learning_rate": 2.5454033370809263e-06, "loss": 0.5224, "step": 5286 }, { "epoch": 0.67, "grad_norm": 0.8691432677847745, "learning_rate": 2.5436059818759228e-06, "loss": 0.6267, "step": 5287 }, { "epoch": 0.67, "grad_norm": 0.8713768790040591, "learning_rate": 2.541809044969231e-06, "loss": 0.6434, "step": 5288 }, { "epoch": 0.67, "grad_norm": 0.7830431389617342, "learning_rate": 2.5400125266668497e-06, "loss": 0.6034, "step": 5289 }, { "epoch": 0.67, "grad_norm": 1.0200552558680356, "learning_rate": 2.538216427274707e-06, "loss": 0.6176, "step": 5290 }, { "epoch": 0.67, "grad_norm": 0.7371042727602204, "learning_rate": 2.5364207470986602e-06, "loss": 0.5725, "step": 5291 }, { "epoch": 0.67, "grad_norm": 0.7577744632607916, "learning_rate": 2.5346254864444943e-06, "loss": 0.629, "step": 5292 }, { "epoch": 0.67, "grad_norm": 0.7466468977963334, "learning_rate": 2.5328306456179243e-06, "loss": 0.5683, "step": 5293 }, { "epoch": 0.67, "grad_norm": 2.5179946780505507, "learning_rate": 2.5310362249245925e-06, "loss": 0.6534, "step": 5294 }, { "epoch": 0.67, "grad_norm": 0.7053951999306056, "learning_rate": 2.5292422246700697e-06, "loss": 0.553, "step": 5295 }, { "epoch": 0.67, "grad_norm": 0.7006661402395418, "learning_rate": 2.5274486451598566e-06, "loss": 0.5853, "step": 5296 }, { "epoch": 0.67, "grad_norm": 0.9067466621332471, "learning_rate": 2.5256554866993806e-06, "loss": 0.5923, "step": 5297 }, { "epoch": 0.67, "grad_norm": 0.9191279986740406, "learning_rate": 2.523862749593997e-06, "loss": 0.6119, "step": 5298 }, { "epoch": 0.68, "grad_norm": 0.8216816541969479, "learning_rate": 2.522070434148989e-06, "loss": 0.6189, "step": 5299 }, { "epoch": 0.68, "grad_norm": 0.9552477996825173, "learning_rate": 2.5202785406695735e-06, "loss": 0.4926, "step": 5300 }, { "epoch": 0.68, "grad_norm": 0.8363393654229209, "learning_rate": 2.5184870694608885e-06, "loss": 0.6114, "step": 5301 }, { "epoch": 0.68, "grad_norm": 0.6484358948002191, "learning_rate": 2.5166960208280034e-06, "loss": 0.5039, "step": 5302 }, { "epoch": 0.68, "grad_norm": 0.6915461009566738, "learning_rate": 2.5149053950759158e-06, "loss": 0.5289, "step": 5303 }, { "epoch": 0.68, "grad_norm": 0.7191270531355878, "learning_rate": 2.5131151925095494e-06, "loss": 0.5573, "step": 5304 }, { "epoch": 0.68, "grad_norm": 1.0582117147089085, "learning_rate": 2.5113254134337573e-06, "loss": 0.6053, "step": 5305 }, { "epoch": 0.68, "grad_norm": 0.710298610894339, "learning_rate": 2.5095360581533206e-06, "loss": 0.5284, "step": 5306 }, { "epoch": 0.68, "grad_norm": 0.7029492979667713, "learning_rate": 2.507747126972948e-06, "loss": 0.5334, "step": 5307 }, { "epoch": 0.68, "grad_norm": 1.1488064095390504, "learning_rate": 2.5059586201972753e-06, "loss": 0.6311, "step": 5308 }, { "epoch": 0.68, "grad_norm": 0.7779439577540818, "learning_rate": 2.5041705381308668e-06, "loss": 0.4788, "step": 5309 }, { "epoch": 0.68, "grad_norm": 0.9933332791254801, "learning_rate": 2.5023828810782146e-06, "loss": 0.5873, "step": 5310 }, { "epoch": 0.68, "grad_norm": 0.8513073559435539, "learning_rate": 2.500595649343738e-06, "loss": 0.6416, "step": 5311 }, { "epoch": 0.68, "grad_norm": 0.7639063991240539, "learning_rate": 2.498808843231783e-06, "loss": 0.5688, "step": 5312 }, { "epoch": 0.68, "grad_norm": 0.788341030157502, "learning_rate": 2.497022463046625e-06, "loss": 0.5406, "step": 5313 }, { "epoch": 0.68, "grad_norm": 0.7494752350933808, "learning_rate": 2.4952365090924655e-06, "loss": 0.597, "step": 5314 }, { "epoch": 0.68, "grad_norm": 0.9327386042471393, "learning_rate": 2.4934509816734347e-06, "loss": 0.6244, "step": 5315 }, { "epoch": 0.68, "grad_norm": 0.71932882645165, "learning_rate": 2.4916658810935883e-06, "loss": 0.5138, "step": 5316 }, { "epoch": 0.68, "grad_norm": 0.6806515348120529, "learning_rate": 2.4898812076569107e-06, "loss": 0.56, "step": 5317 }, { "epoch": 0.68, "grad_norm": 0.7666257793189124, "learning_rate": 2.4880969616673136e-06, "loss": 0.5445, "step": 5318 }, { "epoch": 0.68, "grad_norm": 0.7277506922752934, "learning_rate": 2.486313143428634e-06, "loss": 0.5564, "step": 5319 }, { "epoch": 0.68, "grad_norm": 0.7166121764710645, "learning_rate": 2.48452975324464e-06, "loss": 0.5502, "step": 5320 }, { "epoch": 0.68, "grad_norm": 0.6892566640871824, "learning_rate": 2.4827467914190225e-06, "loss": 0.5342, "step": 5321 }, { "epoch": 0.68, "grad_norm": 0.8367229406763523, "learning_rate": 2.480964258255402e-06, "loss": 0.6368, "step": 5322 }, { "epoch": 0.68, "grad_norm": 0.9321918716234291, "learning_rate": 2.479182154057325e-06, "loss": 0.6727, "step": 5323 }, { "epoch": 0.68, "grad_norm": 0.8156121481571531, "learning_rate": 2.4774004791282653e-06, "loss": 0.5807, "step": 5324 }, { "epoch": 0.68, "grad_norm": 0.6726538973396666, "learning_rate": 2.4756192337716223e-06, "loss": 0.5095, "step": 5325 }, { "epoch": 0.68, "grad_norm": 0.8660779631221062, "learning_rate": 2.4738384182907264e-06, "loss": 0.5562, "step": 5326 }, { "epoch": 0.68, "grad_norm": 0.9454378469220245, "learning_rate": 2.4720580329888303e-06, "loss": 0.6312, "step": 5327 }, { "epoch": 0.68, "grad_norm": 0.7752034688525052, "learning_rate": 2.470278078169116e-06, "loss": 0.5346, "step": 5328 }, { "epoch": 0.68, "grad_norm": 0.693389887716643, "learning_rate": 2.4684985541346907e-06, "loss": 0.5288, "step": 5329 }, { "epoch": 0.68, "grad_norm": 0.7441921617972328, "learning_rate": 2.466719461188588e-06, "loss": 0.5628, "step": 5330 }, { "epoch": 0.68, "grad_norm": 0.9819225087527707, "learning_rate": 2.4649407996337697e-06, "loss": 0.6428, "step": 5331 }, { "epoch": 0.68, "grad_norm": 0.6675731053647536, "learning_rate": 2.463162569773123e-06, "loss": 0.5688, "step": 5332 }, { "epoch": 0.68, "grad_norm": 0.6435252461158815, "learning_rate": 2.4613847719094624e-06, "loss": 0.5398, "step": 5333 }, { "epoch": 0.68, "grad_norm": 0.7685754160739873, "learning_rate": 2.459607406345528e-06, "loss": 0.6097, "step": 5334 }, { "epoch": 0.68, "grad_norm": 0.8653335017815035, "learning_rate": 2.457830473383986e-06, "loss": 0.585, "step": 5335 }, { "epoch": 0.68, "grad_norm": 0.8109551090664087, "learning_rate": 2.4560539733274315e-06, "loss": 0.5861, "step": 5336 }, { "epoch": 0.68, "grad_norm": 0.791868389071689, "learning_rate": 2.4542779064783822e-06, "loss": 0.5948, "step": 5337 }, { "epoch": 0.68, "grad_norm": 0.6937719785335831, "learning_rate": 2.452502273139284e-06, "loss": 0.5633, "step": 5338 }, { "epoch": 0.68, "grad_norm": 0.8404216725002247, "learning_rate": 2.4507270736125094e-06, "loss": 0.6407, "step": 5339 }, { "epoch": 0.68, "grad_norm": 0.7938091361582823, "learning_rate": 2.448952308200356e-06, "loss": 0.6101, "step": 5340 }, { "epoch": 0.68, "grad_norm": 0.8191442134545885, "learning_rate": 2.4471779772050473e-06, "loss": 0.5575, "step": 5341 }, { "epoch": 0.68, "grad_norm": 0.7403353315547542, "learning_rate": 2.4454040809287342e-06, "loss": 0.5348, "step": 5342 }, { "epoch": 0.68, "grad_norm": 0.8599438179354085, "learning_rate": 2.4436306196734933e-06, "loss": 0.595, "step": 5343 }, { "epoch": 0.68, "grad_norm": 0.8825854790144705, "learning_rate": 2.4418575937413254e-06, "loss": 0.6291, "step": 5344 }, { "epoch": 0.68, "grad_norm": 0.68301862050764, "learning_rate": 2.4400850034341582e-06, "loss": 0.5152, "step": 5345 }, { "epoch": 0.68, "grad_norm": 0.6788624255747951, "learning_rate": 2.438312849053846e-06, "loss": 0.5825, "step": 5346 }, { "epoch": 0.68, "grad_norm": 0.7380081419150237, "learning_rate": 2.4365411309021687e-06, "loss": 0.5666, "step": 5347 }, { "epoch": 0.68, "grad_norm": 0.9596195705507765, "learning_rate": 2.434769849280831e-06, "loss": 0.6246, "step": 5348 }, { "epoch": 0.68, "grad_norm": 0.7342138494967124, "learning_rate": 2.432999004491463e-06, "loss": 0.5612, "step": 5349 }, { "epoch": 0.68, "grad_norm": 0.6667667016439291, "learning_rate": 2.431228596835622e-06, "loss": 0.5267, "step": 5350 }, { "epoch": 0.68, "grad_norm": 0.7303715038111931, "learning_rate": 2.4294586266147874e-06, "loss": 0.4997, "step": 5351 }, { "epoch": 0.68, "grad_norm": 0.6386560850597269, "learning_rate": 2.4276890941303716e-06, "loss": 0.5104, "step": 5352 }, { "epoch": 0.68, "grad_norm": 0.7536038991335989, "learning_rate": 2.425919999683704e-06, "loss": 0.5472, "step": 5353 }, { "epoch": 0.68, "grad_norm": 0.6570526316974628, "learning_rate": 2.424151343576045e-06, "loss": 0.4995, "step": 5354 }, { "epoch": 0.68, "grad_norm": 0.8340767752903652, "learning_rate": 2.4223831261085764e-06, "loss": 0.6018, "step": 5355 }, { "epoch": 0.68, "grad_norm": 0.7104125961206265, "learning_rate": 2.420615347582408e-06, "loss": 0.5863, "step": 5356 }, { "epoch": 0.68, "grad_norm": 0.8732035373587792, "learning_rate": 2.418848008298574e-06, "loss": 0.6282, "step": 5357 }, { "epoch": 0.68, "grad_norm": 0.6314505691052757, "learning_rate": 2.417081108558035e-06, "loss": 0.5066, "step": 5358 }, { "epoch": 0.68, "grad_norm": 0.897846263399217, "learning_rate": 2.4153146486616735e-06, "loss": 0.6146, "step": 5359 }, { "epoch": 0.68, "grad_norm": 0.7812882208015591, "learning_rate": 2.4135486289103004e-06, "loss": 0.5895, "step": 5360 }, { "epoch": 0.68, "grad_norm": 0.7737994567895307, "learning_rate": 2.411783049604651e-06, "loss": 0.5777, "step": 5361 }, { "epoch": 0.68, "grad_norm": 0.6040148231421012, "learning_rate": 2.4100179110453837e-06, "loss": 0.4791, "step": 5362 }, { "epoch": 0.68, "grad_norm": 0.7016395308207667, "learning_rate": 2.408253213533082e-06, "loss": 0.5269, "step": 5363 }, { "epoch": 0.68, "grad_norm": 0.6626890016453932, "learning_rate": 2.4064889573682597e-06, "loss": 0.5264, "step": 5364 }, { "epoch": 0.68, "grad_norm": 0.6480264204053937, "learning_rate": 2.4047251428513485e-06, "loss": 0.5171, "step": 5365 }, { "epoch": 0.68, "grad_norm": 0.7389679512333903, "learning_rate": 2.4029617702827084e-06, "loss": 0.5549, "step": 5366 }, { "epoch": 0.68, "grad_norm": 0.7278065098474038, "learning_rate": 2.4011988399626234e-06, "loss": 0.5757, "step": 5367 }, { "epoch": 0.68, "grad_norm": 0.7669481115886791, "learning_rate": 2.399436352191301e-06, "loss": 0.6056, "step": 5368 }, { "epoch": 0.68, "grad_norm": 0.8422950530889523, "learning_rate": 2.3976743072688762e-06, "loss": 0.6551, "step": 5369 }, { "epoch": 0.68, "grad_norm": 0.8229577730294655, "learning_rate": 2.3959127054954055e-06, "loss": 0.6399, "step": 5370 }, { "epoch": 0.68, "grad_norm": 0.6441364149994218, "learning_rate": 2.394151547170872e-06, "loss": 0.5392, "step": 5371 }, { "epoch": 0.68, "grad_norm": 0.7067806606288211, "learning_rate": 2.392390832595183e-06, "loss": 0.5188, "step": 5372 }, { "epoch": 0.68, "grad_norm": 0.6278798983084514, "learning_rate": 2.3906305620681685e-06, "loss": 0.5416, "step": 5373 }, { "epoch": 0.68, "grad_norm": 0.7145489780461023, "learning_rate": 2.3888707358895857e-06, "loss": 0.5592, "step": 5374 }, { "epoch": 0.68, "grad_norm": 0.7929315338798346, "learning_rate": 2.3871113543591135e-06, "loss": 0.6039, "step": 5375 }, { "epoch": 0.68, "grad_norm": 0.6540776364410453, "learning_rate": 2.385352417776357e-06, "loss": 0.5167, "step": 5376 }, { "epoch": 0.69, "grad_norm": 0.8526947533821853, "learning_rate": 2.3835939264408448e-06, "loss": 0.6577, "step": 5377 }, { "epoch": 0.69, "grad_norm": 0.6223658010552587, "learning_rate": 2.3818358806520294e-06, "loss": 0.4936, "step": 5378 }, { "epoch": 0.69, "grad_norm": 0.675765142899162, "learning_rate": 2.380078280709287e-06, "loss": 0.5254, "step": 5379 }, { "epoch": 0.69, "grad_norm": 0.8321936800602324, "learning_rate": 2.37832112691192e-06, "loss": 0.5635, "step": 5380 }, { "epoch": 0.69, "grad_norm": 0.6760457101828767, "learning_rate": 2.3765644195591523e-06, "loss": 0.51, "step": 5381 }, { "epoch": 0.69, "grad_norm": 0.925354546983763, "learning_rate": 2.3748081589501332e-06, "loss": 0.5816, "step": 5382 }, { "epoch": 0.69, "grad_norm": 0.6534995243192381, "learning_rate": 2.373052345383934e-06, "loss": 0.4827, "step": 5383 }, { "epoch": 0.69, "grad_norm": 0.7723883868607132, "learning_rate": 2.3712969791595545e-06, "loss": 0.6011, "step": 5384 }, { "epoch": 0.69, "grad_norm": 0.8063777701054817, "learning_rate": 2.369542060575914e-06, "loss": 0.5332, "step": 5385 }, { "epoch": 0.69, "grad_norm": 0.8498449762263667, "learning_rate": 2.3677875899318563e-06, "loss": 0.6023, "step": 5386 }, { "epoch": 0.69, "grad_norm": 0.7524225015616848, "learning_rate": 2.366033567526151e-06, "loss": 0.5127, "step": 5387 }, { "epoch": 0.69, "grad_norm": 0.7729641200368167, "learning_rate": 2.3642799936574874e-06, "loss": 0.5518, "step": 5388 }, { "epoch": 0.69, "grad_norm": 0.886761729221374, "learning_rate": 2.362526868624483e-06, "loss": 0.6214, "step": 5389 }, { "epoch": 0.69, "grad_norm": 0.6299389765409734, "learning_rate": 2.3607741927256755e-06, "loss": 0.4619, "step": 5390 }, { "epoch": 0.69, "grad_norm": 0.8738654729510181, "learning_rate": 2.3590219662595276e-06, "loss": 0.6325, "step": 5391 }, { "epoch": 0.69, "grad_norm": 0.8228660083102083, "learning_rate": 2.3572701895244252e-06, "loss": 0.6307, "step": 5392 }, { "epoch": 0.69, "grad_norm": 0.7887710694370695, "learning_rate": 2.3555188628186783e-06, "loss": 0.5075, "step": 5393 }, { "epoch": 0.69, "grad_norm": 0.7574997946229495, "learning_rate": 2.353767986440519e-06, "loss": 0.5575, "step": 5394 }, { "epoch": 0.69, "grad_norm": 0.896120589584835, "learning_rate": 2.3520175606881027e-06, "loss": 0.6828, "step": 5395 }, { "epoch": 0.69, "grad_norm": 0.6178984751225683, "learning_rate": 2.3502675858595104e-06, "loss": 0.5313, "step": 5396 }, { "epoch": 0.69, "grad_norm": 1.0551036824662303, "learning_rate": 2.3485180622527425e-06, "loss": 0.6067, "step": 5397 }, { "epoch": 0.69, "grad_norm": 0.6721172700713032, "learning_rate": 2.346768990165726e-06, "loss": 0.5041, "step": 5398 }, { "epoch": 0.69, "grad_norm": 0.6817065179056067, "learning_rate": 2.3450203698963096e-06, "loss": 0.537, "step": 5399 }, { "epoch": 0.69, "grad_norm": 0.6322783417417094, "learning_rate": 2.3432722017422642e-06, "loss": 0.4949, "step": 5400 }, { "epoch": 0.69, "grad_norm": 0.8564157233654893, "learning_rate": 2.341524486001286e-06, "loss": 0.6461, "step": 5401 }, { "epoch": 0.69, "grad_norm": 0.9013760904396483, "learning_rate": 2.3397772229709918e-06, "loss": 0.6791, "step": 5402 }, { "epoch": 0.69, "grad_norm": 0.8223743532631766, "learning_rate": 2.3380304129489222e-06, "loss": 0.544, "step": 5403 }, { "epoch": 0.69, "grad_norm": 0.7741006386300008, "learning_rate": 2.3362840562325416e-06, "loss": 0.5124, "step": 5404 }, { "epoch": 0.69, "grad_norm": 0.6707223499857118, "learning_rate": 2.3345381531192356e-06, "loss": 0.5051, "step": 5405 }, { "epoch": 0.69, "grad_norm": 0.7844729973188754, "learning_rate": 2.3327927039063132e-06, "loss": 0.5407, "step": 5406 }, { "epoch": 0.69, "grad_norm": 0.8094001757424992, "learning_rate": 2.331047708891007e-06, "loss": 0.587, "step": 5407 }, { "epoch": 0.69, "grad_norm": 0.8582538697112428, "learning_rate": 2.329303168370471e-06, "loss": 0.6154, "step": 5408 }, { "epoch": 0.69, "grad_norm": 0.8423328587794595, "learning_rate": 2.327559082641782e-06, "loss": 0.6138, "step": 5409 }, { "epoch": 0.69, "grad_norm": 0.7746988149780637, "learning_rate": 2.3258154520019384e-06, "loss": 0.6068, "step": 5410 }, { "epoch": 0.69, "grad_norm": 0.6718025721432145, "learning_rate": 2.324072276747866e-06, "loss": 0.5067, "step": 5411 }, { "epoch": 0.69, "grad_norm": 0.7085910569633641, "learning_rate": 2.3223295571764066e-06, "loss": 0.5204, "step": 5412 }, { "epoch": 0.69, "grad_norm": 1.147222270471566, "learning_rate": 2.3205872935843276e-06, "loss": 0.6157, "step": 5413 }, { "epoch": 0.69, "grad_norm": 0.7520969433114628, "learning_rate": 2.3188454862683195e-06, "loss": 0.5227, "step": 5414 }, { "epoch": 0.69, "grad_norm": 0.7507322232511526, "learning_rate": 2.3171041355249924e-06, "loss": 0.5929, "step": 5415 }, { "epoch": 0.69, "grad_norm": 0.6759347146284236, "learning_rate": 2.3153632416508807e-06, "loss": 0.5305, "step": 5416 }, { "epoch": 0.69, "grad_norm": 0.8243327845475505, "learning_rate": 2.3136228049424414e-06, "loss": 0.6164, "step": 5417 }, { "epoch": 0.69, "grad_norm": 0.7824745703095208, "learning_rate": 2.311882825696051e-06, "loss": 0.5448, "step": 5418 }, { "epoch": 0.69, "grad_norm": 0.7636758783251725, "learning_rate": 2.310143304208012e-06, "loss": 0.6491, "step": 5419 }, { "epoch": 0.69, "grad_norm": 0.9273589794626783, "learning_rate": 2.308404240774544e-06, "loss": 0.6899, "step": 5420 }, { "epoch": 0.69, "grad_norm": 0.8420805490318296, "learning_rate": 2.3066656356917945e-06, "loss": 0.653, "step": 5421 }, { "epoch": 0.69, "grad_norm": 0.7673069020917275, "learning_rate": 2.304927489255827e-06, "loss": 0.6179, "step": 5422 }, { "epoch": 0.69, "grad_norm": 0.6622773162898108, "learning_rate": 2.303189801762632e-06, "loss": 0.5102, "step": 5423 }, { "epoch": 0.69, "grad_norm": 0.7859261737358603, "learning_rate": 2.301452573508118e-06, "loss": 0.6175, "step": 5424 }, { "epoch": 0.69, "grad_norm": 0.6777604156252802, "learning_rate": 2.2997158047881175e-06, "loss": 0.5784, "step": 5425 }, { "epoch": 0.69, "grad_norm": 0.992417508911669, "learning_rate": 2.297979495898384e-06, "loss": 0.6385, "step": 5426 }, { "epoch": 0.69, "grad_norm": 1.0902541315610164, "learning_rate": 2.2962436471345933e-06, "loss": 0.6005, "step": 5427 }, { "epoch": 0.69, "grad_norm": 0.8109023685581865, "learning_rate": 2.2945082587923416e-06, "loss": 0.5942, "step": 5428 }, { "epoch": 0.69, "grad_norm": 0.8583208956967828, "learning_rate": 2.2927733311671486e-06, "loss": 0.5033, "step": 5429 }, { "epoch": 0.69, "grad_norm": 0.6944448285715461, "learning_rate": 2.291038864554453e-06, "loss": 0.5801, "step": 5430 }, { "epoch": 0.69, "grad_norm": 0.9278828512087278, "learning_rate": 2.289304859249617e-06, "loss": 0.6054, "step": 5431 }, { "epoch": 0.69, "grad_norm": 0.6582588346169828, "learning_rate": 2.2875713155479233e-06, "loss": 0.5181, "step": 5432 }, { "epoch": 0.69, "grad_norm": 0.7181118928320781, "learning_rate": 2.2858382337445774e-06, "loss": 0.57, "step": 5433 }, { "epoch": 0.69, "grad_norm": 0.7694876357311476, "learning_rate": 2.284105614134704e-06, "loss": 0.6105, "step": 5434 }, { "epoch": 0.69, "grad_norm": 0.9694382948828406, "learning_rate": 2.2823734570133506e-06, "loss": 0.6232, "step": 5435 }, { "epoch": 0.69, "grad_norm": 0.8574219637739839, "learning_rate": 2.2806417626754833e-06, "loss": 0.6288, "step": 5436 }, { "epoch": 0.69, "grad_norm": 0.6720394743273487, "learning_rate": 2.2789105314159958e-06, "loss": 0.5513, "step": 5437 }, { "epoch": 0.69, "grad_norm": 0.8032699678936319, "learning_rate": 2.277179763529697e-06, "loss": 0.6172, "step": 5438 }, { "epoch": 0.69, "grad_norm": 0.814665701234805, "learning_rate": 2.275449459311318e-06, "loss": 0.6312, "step": 5439 }, { "epoch": 0.69, "grad_norm": 0.7958682646619236, "learning_rate": 2.273719619055512e-06, "loss": 0.5246, "step": 5440 }, { "epoch": 0.69, "grad_norm": 0.6825506657840111, "learning_rate": 2.271990243056853e-06, "loss": 0.5402, "step": 5441 }, { "epoch": 0.69, "grad_norm": 0.8453369251157237, "learning_rate": 2.270261331609835e-06, "loss": 0.648, "step": 5442 }, { "epoch": 0.69, "grad_norm": 0.791894998758287, "learning_rate": 2.268532885008874e-06, "loss": 0.609, "step": 5443 }, { "epoch": 0.69, "grad_norm": 0.8574678061964685, "learning_rate": 2.2668049035483066e-06, "loss": 0.6243, "step": 5444 }, { "epoch": 0.69, "grad_norm": 0.751011965024508, "learning_rate": 2.2650773875223897e-06, "loss": 0.5689, "step": 5445 }, { "epoch": 0.69, "grad_norm": 0.7384988250916608, "learning_rate": 2.263350337225302e-06, "loss": 0.5029, "step": 5446 }, { "epoch": 0.69, "grad_norm": 0.9307559361448564, "learning_rate": 2.2616237529511416e-06, "loss": 0.6537, "step": 5447 }, { "epoch": 0.69, "grad_norm": 0.840607566116256, "learning_rate": 2.2598976349939274e-06, "loss": 0.6159, "step": 5448 }, { "epoch": 0.69, "grad_norm": 0.8193371530842894, "learning_rate": 2.2581719836475994e-06, "loss": 0.5749, "step": 5449 }, { "epoch": 0.69, "grad_norm": 0.7346374058338675, "learning_rate": 2.2564467992060197e-06, "loss": 0.5946, "step": 5450 }, { "epoch": 0.69, "grad_norm": 0.6884897738854778, "learning_rate": 2.2547220819629673e-06, "loss": 0.5408, "step": 5451 }, { "epoch": 0.69, "grad_norm": 0.7352881447111629, "learning_rate": 2.2529978322121437e-06, "loss": 0.5771, "step": 5452 }, { "epoch": 0.69, "grad_norm": 0.7543286695585357, "learning_rate": 2.251274050247172e-06, "loss": 0.5166, "step": 5453 }, { "epoch": 0.69, "grad_norm": 0.786852291708415, "learning_rate": 2.2495507363615933e-06, "loss": 0.5326, "step": 5454 }, { "epoch": 0.69, "grad_norm": 0.8003772149051532, "learning_rate": 2.2478278908488706e-06, "loss": 0.5575, "step": 5455 }, { "epoch": 0.7, "grad_norm": 0.6445187805096474, "learning_rate": 2.246105514002385e-06, "loss": 0.5247, "step": 5456 }, { "epoch": 0.7, "grad_norm": 0.7267553436671903, "learning_rate": 2.244383606115442e-06, "loss": 0.4903, "step": 5457 }, { "epoch": 0.7, "grad_norm": 0.7476051197273879, "learning_rate": 2.242662167481262e-06, "loss": 0.571, "step": 5458 }, { "epoch": 0.7, "grad_norm": 0.8233348815554917, "learning_rate": 2.2409411983929896e-06, "loss": 0.6096, "step": 5459 }, { "epoch": 0.7, "grad_norm": 0.6425241998175133, "learning_rate": 2.239220699143687e-06, "loss": 0.5715, "step": 5460 }, { "epoch": 0.7, "grad_norm": 0.8118638863170009, "learning_rate": 2.237500670026338e-06, "loss": 0.6421, "step": 5461 }, { "epoch": 0.7, "grad_norm": 0.7328324508731832, "learning_rate": 2.235781111333844e-06, "loss": 0.5437, "step": 5462 }, { "epoch": 0.7, "grad_norm": 0.848002311094021, "learning_rate": 2.2340620233590305e-06, "loss": 0.6428, "step": 5463 }, { "epoch": 0.7, "grad_norm": 0.9349523828157077, "learning_rate": 2.2323434063946393e-06, "loss": 0.6424, "step": 5464 }, { "epoch": 0.7, "grad_norm": 0.7746754400526813, "learning_rate": 2.2306252607333328e-06, "loss": 0.5952, "step": 5465 }, { "epoch": 0.7, "grad_norm": 0.7244380053129705, "learning_rate": 2.2289075866676934e-06, "loss": 0.5357, "step": 5466 }, { "epoch": 0.7, "grad_norm": 0.8212711232419224, "learning_rate": 2.2271903844902227e-06, "loss": 0.6646, "step": 5467 }, { "epoch": 0.7, "grad_norm": 0.6371127234918188, "learning_rate": 2.225473654493341e-06, "loss": 0.5084, "step": 5468 }, { "epoch": 0.7, "grad_norm": 0.7929704056355397, "learning_rate": 2.2237573969693926e-06, "loss": 0.545, "step": 5469 }, { "epoch": 0.7, "grad_norm": 0.7039795280299358, "learning_rate": 2.2220416122106374e-06, "loss": 0.5714, "step": 5470 }, { "epoch": 0.7, "grad_norm": 0.9149845528810331, "learning_rate": 2.2203263005092547e-06, "loss": 0.6597, "step": 5471 }, { "epoch": 0.7, "grad_norm": 0.8047719505540758, "learning_rate": 2.2186114621573445e-06, "loss": 0.609, "step": 5472 }, { "epoch": 0.7, "grad_norm": 0.7727057797183778, "learning_rate": 2.216897097446926e-06, "loss": 0.5249, "step": 5473 }, { "epoch": 0.7, "grad_norm": 0.8220198655219986, "learning_rate": 2.215183206669938e-06, "loss": 0.6012, "step": 5474 }, { "epoch": 0.7, "grad_norm": 0.7265727539917279, "learning_rate": 2.2134697901182377e-06, "loss": 0.5682, "step": 5475 }, { "epoch": 0.7, "grad_norm": 0.903222193124084, "learning_rate": 2.211756848083602e-06, "loss": 0.5433, "step": 5476 }, { "epoch": 0.7, "grad_norm": 0.627560511157613, "learning_rate": 2.2100443808577277e-06, "loss": 0.508, "step": 5477 }, { "epoch": 0.7, "grad_norm": 0.8675273771548871, "learning_rate": 2.20833238873223e-06, "loss": 0.6126, "step": 5478 }, { "epoch": 0.7, "grad_norm": 0.7465687450008791, "learning_rate": 2.2066208719986427e-06, "loss": 0.6016, "step": 5479 }, { "epoch": 0.7, "grad_norm": 0.7589702958589158, "learning_rate": 2.2049098309484195e-06, "loss": 0.5619, "step": 5480 }, { "epoch": 0.7, "grad_norm": 0.7436676254364487, "learning_rate": 2.2031992658729332e-06, "loss": 0.5846, "step": 5481 }, { "epoch": 0.7, "grad_norm": 0.8179437459808657, "learning_rate": 2.2014891770634754e-06, "loss": 0.6236, "step": 5482 }, { "epoch": 0.7, "grad_norm": 0.819481852039836, "learning_rate": 2.1997795648112565e-06, "loss": 0.5212, "step": 5483 }, { "epoch": 0.7, "grad_norm": 0.8062307125898023, "learning_rate": 2.1980704294074046e-06, "loss": 0.6598, "step": 5484 }, { "epoch": 0.7, "grad_norm": 0.8736678231296808, "learning_rate": 2.196361771142968e-06, "loss": 0.6641, "step": 5485 }, { "epoch": 0.7, "grad_norm": 0.797231043228781, "learning_rate": 2.1946535903089137e-06, "loss": 0.5354, "step": 5486 }, { "epoch": 0.7, "grad_norm": 0.730620179718931, "learning_rate": 2.192945887196128e-06, "loss": 0.562, "step": 5487 }, { "epoch": 0.7, "grad_norm": 0.687500333445565, "learning_rate": 2.191238662095413e-06, "loss": 0.5318, "step": 5488 }, { "epoch": 0.7, "grad_norm": 0.8008827186949273, "learning_rate": 2.189531915297493e-06, "loss": 0.5731, "step": 5489 }, { "epoch": 0.7, "grad_norm": 0.7450514056367135, "learning_rate": 2.187825647093008e-06, "loss": 0.606, "step": 5490 }, { "epoch": 0.7, "grad_norm": 0.7562351279620019, "learning_rate": 2.186119857772519e-06, "loss": 0.5262, "step": 5491 }, { "epoch": 0.7, "grad_norm": 0.777023742649139, "learning_rate": 2.1844145476265034e-06, "loss": 0.6402, "step": 5492 }, { "epoch": 0.7, "grad_norm": 0.7630075972679621, "learning_rate": 2.182709716945357e-06, "loss": 0.5389, "step": 5493 }, { "epoch": 0.7, "grad_norm": 0.6683080550086076, "learning_rate": 2.1810053660193942e-06, "loss": 0.5323, "step": 5494 }, { "epoch": 0.7, "grad_norm": 0.7387737613325633, "learning_rate": 2.179301495138851e-06, "loss": 0.5573, "step": 5495 }, { "epoch": 0.7, "grad_norm": 0.9339325229210625, "learning_rate": 2.1775981045938777e-06, "loss": 0.6318, "step": 5496 }, { "epoch": 0.7, "grad_norm": 0.6991522936648061, "learning_rate": 2.175895194674544e-06, "loss": 0.5574, "step": 5497 }, { "epoch": 0.7, "grad_norm": 0.7268789330806732, "learning_rate": 2.1741927656708367e-06, "loss": 0.5712, "step": 5498 }, { "epoch": 0.7, "grad_norm": 1.0366084791100336, "learning_rate": 2.172490817872662e-06, "loss": 0.6499, "step": 5499 }, { "epoch": 0.7, "grad_norm": 0.6085721067846119, "learning_rate": 2.1707893515698453e-06, "loss": 0.5262, "step": 5500 }, { "epoch": 0.7, "grad_norm": 0.8753175546708015, "learning_rate": 2.1690883670521268e-06, "loss": 0.6083, "step": 5501 }, { "epoch": 0.7, "grad_norm": 0.8832062765984533, "learning_rate": 2.1673878646091673e-06, "loss": 0.6709, "step": 5502 }, { "epoch": 0.7, "grad_norm": 0.8024769155615417, "learning_rate": 2.1656878445305444e-06, "loss": 0.6096, "step": 5503 }, { "epoch": 0.7, "grad_norm": 0.6736036244314383, "learning_rate": 2.163988307105754e-06, "loss": 0.5355, "step": 5504 }, { "epoch": 0.7, "grad_norm": 0.7993273577780864, "learning_rate": 2.16228925262421e-06, "loss": 0.6, "step": 5505 }, { "epoch": 0.7, "grad_norm": 0.768486974951675, "learning_rate": 2.1605906813752425e-06, "loss": 0.5565, "step": 5506 }, { "epoch": 0.7, "grad_norm": 0.8202716943392994, "learning_rate": 2.158892593648102e-06, "loss": 0.6415, "step": 5507 }, { "epoch": 0.7, "grad_norm": 1.119292165601142, "learning_rate": 2.157194989731954e-06, "loss": 0.6379, "step": 5508 }, { "epoch": 0.7, "grad_norm": 0.7937688272521658, "learning_rate": 2.155497869915883e-06, "loss": 0.5848, "step": 5509 }, { "epoch": 0.7, "grad_norm": 0.8435610489060712, "learning_rate": 2.153801234488891e-06, "loss": 0.6862, "step": 5510 }, { "epoch": 0.7, "grad_norm": 0.7357321420284718, "learning_rate": 2.1521050837398973e-06, "loss": 0.5584, "step": 5511 }, { "epoch": 0.7, "grad_norm": 0.74735447879491, "learning_rate": 2.1504094179577384e-06, "loss": 0.547, "step": 5512 }, { "epoch": 0.7, "grad_norm": 0.7012296461408165, "learning_rate": 2.1487142374311687e-06, "loss": 0.5101, "step": 5513 }, { "epoch": 0.7, "grad_norm": 0.725785321295535, "learning_rate": 2.1470195424488604e-06, "loss": 0.5499, "step": 5514 }, { "epoch": 0.7, "grad_norm": 0.6491729376208991, "learning_rate": 2.145325333299402e-06, "loss": 0.5594, "step": 5515 }, { "epoch": 0.7, "grad_norm": 0.7031354977511042, "learning_rate": 2.143631610271299e-06, "loss": 0.5744, "step": 5516 }, { "epoch": 0.7, "grad_norm": 0.7673997683983097, "learning_rate": 2.141938373652976e-06, "loss": 0.5722, "step": 5517 }, { "epoch": 0.7, "grad_norm": 0.8458977155063527, "learning_rate": 2.1402456237327723e-06, "loss": 0.5795, "step": 5518 }, { "epoch": 0.7, "grad_norm": 0.6702997935290794, "learning_rate": 2.138553360798946e-06, "loss": 0.55, "step": 5519 }, { "epoch": 0.7, "grad_norm": 0.7565775237821801, "learning_rate": 2.136861585139673e-06, "loss": 0.5039, "step": 5520 }, { "epoch": 0.7, "grad_norm": 0.6399293201317746, "learning_rate": 2.1351702970430416e-06, "loss": 0.506, "step": 5521 }, { "epoch": 0.7, "grad_norm": 0.6324718045819099, "learning_rate": 2.1334794967970645e-06, "loss": 0.5257, "step": 5522 }, { "epoch": 0.7, "grad_norm": 1.5075733951081995, "learning_rate": 2.1317891846896667e-06, "loss": 0.5919, "step": 5523 }, { "epoch": 0.7, "grad_norm": 0.9518755364849981, "learning_rate": 2.1300993610086896e-06, "loss": 0.5867, "step": 5524 }, { "epoch": 0.7, "grad_norm": 0.7968632079044731, "learning_rate": 2.1284100260418925e-06, "loss": 0.5386, "step": 5525 }, { "epoch": 0.7, "grad_norm": 0.8826189390342716, "learning_rate": 2.1267211800769526e-06, "loss": 0.6519, "step": 5526 }, { "epoch": 0.7, "grad_norm": 1.1235859108089383, "learning_rate": 2.1250328234014616e-06, "loss": 0.6338, "step": 5527 }, { "epoch": 0.7, "grad_norm": 0.8493132102038925, "learning_rate": 2.12334495630293e-06, "loss": 0.6474, "step": 5528 }, { "epoch": 0.7, "grad_norm": 0.8510494293402592, "learning_rate": 2.1216575790687832e-06, "loss": 0.555, "step": 5529 }, { "epoch": 0.7, "grad_norm": 0.7007531141533185, "learning_rate": 2.119970691986365e-06, "loss": 0.5941, "step": 5530 }, { "epoch": 0.7, "grad_norm": 0.8317231529744354, "learning_rate": 2.118284295342933e-06, "loss": 0.5655, "step": 5531 }, { "epoch": 0.7, "grad_norm": 0.7215323265558325, "learning_rate": 2.1165983894256647e-06, "loss": 0.539, "step": 5532 }, { "epoch": 0.7, "grad_norm": 0.8459321081897421, "learning_rate": 2.1149129745216517e-06, "loss": 0.5281, "step": 5533 }, { "epoch": 0.71, "grad_norm": 0.6855031396466958, "learning_rate": 2.1132280509179028e-06, "loss": 0.4985, "step": 5534 }, { "epoch": 0.71, "grad_norm": 1.018494057790836, "learning_rate": 2.111543618901342e-06, "loss": 0.5436, "step": 5535 }, { "epoch": 0.71, "grad_norm": 0.8204985499815014, "learning_rate": 2.109859678758811e-06, "loss": 0.6598, "step": 5536 }, { "epoch": 0.71, "grad_norm": 0.6676819925208591, "learning_rate": 2.108176230777068e-06, "loss": 0.538, "step": 5537 }, { "epoch": 0.71, "grad_norm": 0.742157236254718, "learning_rate": 2.106493275242786e-06, "loss": 0.6618, "step": 5538 }, { "epoch": 0.71, "grad_norm": 3.230737151565162, "learning_rate": 2.104810812442555e-06, "loss": 0.6408, "step": 5539 }, { "epoch": 0.71, "grad_norm": 0.7185964595205173, "learning_rate": 2.1031288426628804e-06, "loss": 0.5732, "step": 5540 }, { "epoch": 0.71, "grad_norm": 0.8169661986251882, "learning_rate": 2.1014473661901847e-06, "loss": 0.64, "step": 5541 }, { "epoch": 0.71, "grad_norm": 0.861008560614642, "learning_rate": 2.0997663833108055e-06, "loss": 0.5901, "step": 5542 }, { "epoch": 0.71, "grad_norm": 0.7666558973572319, "learning_rate": 2.0980858943109968e-06, "loss": 0.5228, "step": 5543 }, { "epoch": 0.71, "grad_norm": 0.8360299141662741, "learning_rate": 2.0964058994769285e-06, "loss": 0.6622, "step": 5544 }, { "epoch": 0.71, "grad_norm": 0.7996590397674925, "learning_rate": 2.0947263990946858e-06, "loss": 0.5166, "step": 5545 }, { "epoch": 0.71, "grad_norm": 0.7308603994951394, "learning_rate": 2.093047393450271e-06, "loss": 0.5705, "step": 5546 }, { "epoch": 0.71, "grad_norm": 0.8530056607536854, "learning_rate": 2.0913688828295985e-06, "loss": 0.6107, "step": 5547 }, { "epoch": 0.71, "grad_norm": 0.8375968253809218, "learning_rate": 2.089690867518505e-06, "loss": 0.5616, "step": 5548 }, { "epoch": 0.71, "grad_norm": 1.0095294866767712, "learning_rate": 2.088013347802738e-06, "loss": 0.656, "step": 5549 }, { "epoch": 0.71, "grad_norm": 0.7156805700817284, "learning_rate": 2.086336323967961e-06, "loss": 0.5055, "step": 5550 }, { "epoch": 0.71, "grad_norm": 0.6319153617280628, "learning_rate": 2.0846597962997527e-06, "loss": 0.5303, "step": 5551 }, { "epoch": 0.71, "grad_norm": 0.6406284759411545, "learning_rate": 2.08298376508361e-06, "loss": 0.5136, "step": 5552 }, { "epoch": 0.71, "grad_norm": 0.65868663834691, "learning_rate": 2.081308230604943e-06, "loss": 0.5432, "step": 5553 }, { "epoch": 0.71, "grad_norm": 0.8308231429315793, "learning_rate": 2.079633193149077e-06, "loss": 0.5445, "step": 5554 }, { "epoch": 0.71, "grad_norm": 0.7480304537907051, "learning_rate": 2.0779586530012546e-06, "loss": 0.5122, "step": 5555 }, { "epoch": 0.71, "grad_norm": 0.7185200318295508, "learning_rate": 2.0762846104466322e-06, "loss": 0.4888, "step": 5556 }, { "epoch": 0.71, "grad_norm": 0.9503475403761206, "learning_rate": 2.074611065770281e-06, "loss": 0.6851, "step": 5557 }, { "epoch": 0.71, "grad_norm": 0.8489818545682053, "learning_rate": 2.072938019257189e-06, "loss": 0.6, "step": 5558 }, { "epoch": 0.71, "grad_norm": 0.8315218311277328, "learning_rate": 2.0712654711922585e-06, "loss": 0.6163, "step": 5559 }, { "epoch": 0.71, "grad_norm": 0.8100270151300474, "learning_rate": 2.069593421860307e-06, "loss": 0.5831, "step": 5560 }, { "epoch": 0.71, "grad_norm": 0.6704463448884791, "learning_rate": 2.067921871546067e-06, "loss": 0.5537, "step": 5561 }, { "epoch": 0.71, "grad_norm": 0.787622333937641, "learning_rate": 2.066250820534186e-06, "loss": 0.6215, "step": 5562 }, { "epoch": 0.71, "grad_norm": 0.8523658204505838, "learning_rate": 2.0645802691092266e-06, "loss": 0.6071, "step": 5563 }, { "epoch": 0.71, "grad_norm": 0.8735155891505851, "learning_rate": 2.0629102175556666e-06, "loss": 0.6556, "step": 5564 }, { "epoch": 0.71, "grad_norm": 0.7920113011253676, "learning_rate": 2.0612406661578977e-06, "loss": 0.5863, "step": 5565 }, { "epoch": 0.71, "grad_norm": 1.0166283796513007, "learning_rate": 2.059571615200226e-06, "loss": 0.6218, "step": 5566 }, { "epoch": 0.71, "grad_norm": 0.6532332449733946, "learning_rate": 2.057903064966877e-06, "loss": 0.5728, "step": 5567 }, { "epoch": 0.71, "grad_norm": 0.8758672351536236, "learning_rate": 2.056235015741986e-06, "loss": 0.6422, "step": 5568 }, { "epoch": 0.71, "grad_norm": 0.7692196061873529, "learning_rate": 2.054567467809603e-06, "loss": 0.6013, "step": 5569 }, { "epoch": 0.71, "grad_norm": 0.6311065798340466, "learning_rate": 2.052900421453695e-06, "loss": 0.4586, "step": 5570 }, { "epoch": 0.71, "grad_norm": 0.6751948614991886, "learning_rate": 2.051233876958143e-06, "loss": 0.4977, "step": 5571 }, { "epoch": 0.71, "grad_norm": 0.857990397550982, "learning_rate": 2.0495678346067414e-06, "loss": 0.6146, "step": 5572 }, { "epoch": 0.71, "grad_norm": 0.7374865128137168, "learning_rate": 2.0479022946832007e-06, "loss": 0.5535, "step": 5573 }, { "epoch": 0.71, "grad_norm": 0.7762294594216456, "learning_rate": 2.0462372574711442e-06, "loss": 0.5657, "step": 5574 }, { "epoch": 0.71, "grad_norm": 0.9246031622653919, "learning_rate": 2.044572723254111e-06, "loss": 0.6678, "step": 5575 }, { "epoch": 0.71, "grad_norm": 0.6492410073858308, "learning_rate": 2.042908692315553e-06, "loss": 0.517, "step": 5576 }, { "epoch": 0.71, "grad_norm": 0.8818051855540839, "learning_rate": 2.041245164938838e-06, "loss": 0.5703, "step": 5577 }, { "epoch": 0.71, "grad_norm": 0.7563978191187196, "learning_rate": 2.0395821414072473e-06, "loss": 0.5898, "step": 5578 }, { "epoch": 0.71, "grad_norm": 0.7980143254065868, "learning_rate": 2.0379196220039754e-06, "loss": 0.6189, "step": 5579 }, { "epoch": 0.71, "grad_norm": 0.6657208467267434, "learning_rate": 2.036257607012134e-06, "loss": 0.5971, "step": 5580 }, { "epoch": 0.71, "grad_norm": 0.7319788423600943, "learning_rate": 2.034596096714747e-06, "loss": 0.5718, "step": 5581 }, { "epoch": 0.71, "grad_norm": 0.7167999258294124, "learning_rate": 2.0329350913947508e-06, "loss": 0.5946, "step": 5582 }, { "epoch": 0.71, "grad_norm": 0.7362578288789929, "learning_rate": 2.031274591334998e-06, "loss": 0.5051, "step": 5583 }, { "epoch": 0.71, "grad_norm": 0.7259309659847831, "learning_rate": 2.0296145968182546e-06, "loss": 0.5102, "step": 5584 }, { "epoch": 0.71, "grad_norm": 0.7489182315593272, "learning_rate": 2.0279551081272e-06, "loss": 0.584, "step": 5585 }, { "epoch": 0.71, "grad_norm": 1.0315748556547173, "learning_rate": 2.0262961255444273e-06, "loss": 0.6764, "step": 5586 }, { "epoch": 0.71, "grad_norm": 0.7428832516608075, "learning_rate": 2.024637649352445e-06, "loss": 0.5302, "step": 5587 }, { "epoch": 0.71, "grad_norm": 0.6554302218803151, "learning_rate": 2.0229796798336744e-06, "loss": 0.4961, "step": 5588 }, { "epoch": 0.71, "grad_norm": 0.9014089633908532, "learning_rate": 2.0213222172704496e-06, "loss": 0.5902, "step": 5589 }, { "epoch": 0.71, "grad_norm": 0.7657701621051626, "learning_rate": 2.019665261945019e-06, "loss": 0.5819, "step": 5590 }, { "epoch": 0.71, "grad_norm": 0.8488592972026056, "learning_rate": 2.0180088141395465e-06, "loss": 0.6349, "step": 5591 }, { "epoch": 0.71, "grad_norm": 0.8566977598272854, "learning_rate": 2.016352874136106e-06, "loss": 0.5623, "step": 5592 }, { "epoch": 0.71, "grad_norm": 0.7285906131668988, "learning_rate": 2.0146974422166878e-06, "loss": 0.5278, "step": 5593 }, { "epoch": 0.71, "grad_norm": 0.6994999894113406, "learning_rate": 2.0130425186631945e-06, "loss": 0.5438, "step": 5594 }, { "epoch": 0.71, "grad_norm": 0.849390029680417, "learning_rate": 2.0113881037574423e-06, "loss": 0.557, "step": 5595 }, { "epoch": 0.71, "grad_norm": 0.7981814800297028, "learning_rate": 2.009734197781161e-06, "loss": 0.5414, "step": 5596 }, { "epoch": 0.71, "grad_norm": 0.7446727470435076, "learning_rate": 2.0080808010159937e-06, "loss": 0.4848, "step": 5597 }, { "epoch": 0.71, "grad_norm": 0.7596826154645443, "learning_rate": 2.006427913743496e-06, "loss": 0.5934, "step": 5598 }, { "epoch": 0.71, "grad_norm": 0.7004442288235581, "learning_rate": 2.004775536245138e-06, "loss": 0.5386, "step": 5599 }, { "epoch": 0.71, "grad_norm": 0.6614905921355728, "learning_rate": 2.0031236688023026e-06, "loss": 0.5077, "step": 5600 }, { "epoch": 0.71, "grad_norm": 0.6425782973179972, "learning_rate": 2.0014723116962842e-06, "loss": 0.5273, "step": 5601 }, { "epoch": 0.71, "grad_norm": 0.7871629829663782, "learning_rate": 1.999821465208293e-06, "loss": 0.6436, "step": 5602 }, { "epoch": 0.71, "grad_norm": 0.8854727654619043, "learning_rate": 1.9981711296194513e-06, "loss": 0.6489, "step": 5603 }, { "epoch": 0.71, "grad_norm": 0.7322818852661492, "learning_rate": 1.996521305210793e-06, "loss": 0.5484, "step": 5604 }, { "epoch": 0.71, "grad_norm": 0.6440229071395928, "learning_rate": 1.9948719922632644e-06, "loss": 0.5019, "step": 5605 }, { "epoch": 0.71, "grad_norm": 0.7984422841313952, "learning_rate": 1.9932231910577306e-06, "loss": 0.6088, "step": 5606 }, { "epoch": 0.71, "grad_norm": 0.8190166531322095, "learning_rate": 1.9915749018749626e-06, "loss": 0.6077, "step": 5607 }, { "epoch": 0.71, "grad_norm": 0.8885474322932339, "learning_rate": 1.989927124995647e-06, "loss": 0.6272, "step": 5608 }, { "epoch": 0.71, "grad_norm": 1.086076821709225, "learning_rate": 1.9882798607003835e-06, "loss": 0.6467, "step": 5609 }, { "epoch": 0.71, "grad_norm": 0.7926130013670906, "learning_rate": 1.9866331092696837e-06, "loss": 0.6205, "step": 5610 }, { "epoch": 0.71, "grad_norm": 0.7642917304703144, "learning_rate": 1.9849868709839733e-06, "loss": 0.5554, "step": 5611 }, { "epoch": 0.71, "grad_norm": 0.8194738401249654, "learning_rate": 1.9833411461235873e-06, "loss": 0.6008, "step": 5612 }, { "epoch": 0.72, "grad_norm": 0.7958188348710266, "learning_rate": 1.9816959349687777e-06, "loss": 0.6011, "step": 5613 }, { "epoch": 0.72, "grad_norm": 0.9097094828494999, "learning_rate": 1.9800512377997053e-06, "loss": 0.5062, "step": 5614 }, { "epoch": 0.72, "grad_norm": 0.8362254123356185, "learning_rate": 1.9784070548964463e-06, "loss": 0.6502, "step": 5615 }, { "epoch": 0.72, "grad_norm": 0.6584584796883908, "learning_rate": 1.9767633865389864e-06, "loss": 0.5307, "step": 5616 }, { "epoch": 0.72, "grad_norm": 0.6677739618748387, "learning_rate": 1.975120233007226e-06, "loss": 0.4968, "step": 5617 }, { "epoch": 0.72, "grad_norm": 0.8563519816298458, "learning_rate": 1.973477594580977e-06, "loss": 0.5608, "step": 5618 }, { "epoch": 0.72, "grad_norm": 0.6777861919470805, "learning_rate": 1.971835471539964e-06, "loss": 0.5156, "step": 5619 }, { "epoch": 0.72, "grad_norm": 0.9524677825433081, "learning_rate": 1.9701938641638225e-06, "loss": 0.6285, "step": 5620 }, { "epoch": 0.72, "grad_norm": 0.7441502782741423, "learning_rate": 1.968552772732102e-06, "loss": 0.5198, "step": 5621 }, { "epoch": 0.72, "grad_norm": 0.6748969466907258, "learning_rate": 1.966912197524263e-06, "loss": 0.5212, "step": 5622 }, { "epoch": 0.72, "grad_norm": 0.701968446008239, "learning_rate": 1.9652721388196776e-06, "loss": 0.558, "step": 5623 }, { "epoch": 0.72, "grad_norm": 0.9340363911102243, "learning_rate": 1.9636325968976315e-06, "loss": 0.6975, "step": 5624 }, { "epoch": 0.72, "grad_norm": 0.8550216187316212, "learning_rate": 1.9619935720373224e-06, "loss": 0.6497, "step": 5625 }, { "epoch": 0.72, "grad_norm": 0.6786021756697905, "learning_rate": 1.9603550645178577e-06, "loss": 0.5339, "step": 5626 }, { "epoch": 0.72, "grad_norm": 0.7556227767922549, "learning_rate": 1.958717074618258e-06, "loss": 0.563, "step": 5627 }, { "epoch": 0.72, "grad_norm": 0.721231781529318, "learning_rate": 1.9570796026174576e-06, "loss": 0.542, "step": 5628 }, { "epoch": 0.72, "grad_norm": 0.6924333771489697, "learning_rate": 1.9554426487942997e-06, "loss": 0.5345, "step": 5629 }, { "epoch": 0.72, "grad_norm": 0.6630747797073369, "learning_rate": 1.953806213427541e-06, "loss": 0.5132, "step": 5630 }, { "epoch": 0.72, "grad_norm": 0.7456758873212999, "learning_rate": 1.952170296795849e-06, "loss": 0.5844, "step": 5631 }, { "epoch": 0.72, "grad_norm": 0.969336563163838, "learning_rate": 1.950534899177801e-06, "loss": 0.6127, "step": 5632 }, { "epoch": 0.72, "grad_norm": 0.8299354123958859, "learning_rate": 1.9489000208518925e-06, "loss": 0.5936, "step": 5633 }, { "epoch": 0.72, "grad_norm": 0.6670978958187256, "learning_rate": 1.947265662096525e-06, "loss": 0.4823, "step": 5634 }, { "epoch": 0.72, "grad_norm": 0.7462990298055201, "learning_rate": 1.945631823190012e-06, "loss": 0.5005, "step": 5635 }, { "epoch": 0.72, "grad_norm": 0.8779756079317077, "learning_rate": 1.9439985044105793e-06, "loss": 0.6414, "step": 5636 }, { "epoch": 0.72, "grad_norm": 0.7222123319899453, "learning_rate": 1.9423657060363633e-06, "loss": 0.5052, "step": 5637 }, { "epoch": 0.72, "grad_norm": 0.6755407658938087, "learning_rate": 1.940733428345414e-06, "loss": 0.5368, "step": 5638 }, { "epoch": 0.72, "grad_norm": 0.7882931307561672, "learning_rate": 1.939101671615691e-06, "loss": 0.5438, "step": 5639 }, { "epoch": 0.72, "grad_norm": 0.7707635336120291, "learning_rate": 1.9374704361250644e-06, "loss": 0.5806, "step": 5640 }, { "epoch": 0.72, "grad_norm": 0.6867621924471001, "learning_rate": 1.9358397221513174e-06, "loss": 0.5147, "step": 5641 }, { "epoch": 0.72, "grad_norm": 0.7290548431310014, "learning_rate": 1.934209529972144e-06, "loss": 0.5321, "step": 5642 }, { "epoch": 0.72, "grad_norm": 0.6836180611078975, "learning_rate": 1.932579859865148e-06, "loss": 0.5111, "step": 5643 }, { "epoch": 0.72, "grad_norm": 0.7699511169386786, "learning_rate": 1.9309507121078457e-06, "loss": 0.6183, "step": 5644 }, { "epoch": 0.72, "grad_norm": 0.7310472148076651, "learning_rate": 1.9293220869776647e-06, "loss": 0.5365, "step": 5645 }, { "epoch": 0.72, "grad_norm": 0.646314109956493, "learning_rate": 1.9276939847519416e-06, "loss": 0.5234, "step": 5646 }, { "epoch": 0.72, "grad_norm": 0.7498776915702167, "learning_rate": 1.926066405707926e-06, "loss": 0.5675, "step": 5647 }, { "epoch": 0.72, "grad_norm": 0.7276113774197578, "learning_rate": 1.924439350122778e-06, "loss": 0.5472, "step": 5648 }, { "epoch": 0.72, "grad_norm": 0.7224335701416914, "learning_rate": 1.922812818273567e-06, "loss": 0.5524, "step": 5649 }, { "epoch": 0.72, "grad_norm": 0.7038676456571389, "learning_rate": 1.921186810437275e-06, "loss": 0.5461, "step": 5650 }, { "epoch": 0.72, "grad_norm": 0.7120893238045858, "learning_rate": 1.919561326890795e-06, "loss": 0.5564, "step": 5651 }, { "epoch": 0.72, "grad_norm": 0.9231313078999004, "learning_rate": 1.9179363679109286e-06, "loss": 0.5981, "step": 5652 }, { "epoch": 0.72, "grad_norm": 0.6492092364277938, "learning_rate": 1.9163119337743903e-06, "loss": 0.4958, "step": 5653 }, { "epoch": 0.72, "grad_norm": 0.8497709413425779, "learning_rate": 1.914688024757804e-06, "loss": 0.6097, "step": 5654 }, { "epoch": 0.72, "grad_norm": 0.6825637718435527, "learning_rate": 1.9130646411377046e-06, "loss": 0.572, "step": 5655 }, { "epoch": 0.72, "grad_norm": 0.6366616117666464, "learning_rate": 1.9114417831905376e-06, "loss": 0.4942, "step": 5656 }, { "epoch": 0.72, "grad_norm": 0.6868144409208246, "learning_rate": 1.9098194511926583e-06, "loss": 0.5264, "step": 5657 }, { "epoch": 0.72, "grad_norm": 0.7174046254360706, "learning_rate": 1.9081976454203317e-06, "loss": 0.5613, "step": 5658 }, { "epoch": 0.72, "grad_norm": 0.8514493739215985, "learning_rate": 1.9065763661497371e-06, "loss": 0.5912, "step": 5659 }, { "epoch": 0.72, "grad_norm": 0.6692516374925389, "learning_rate": 1.9049556136569609e-06, "loss": 0.5523, "step": 5660 }, { "epoch": 0.72, "grad_norm": 0.8433657183931635, "learning_rate": 1.9033353882179995e-06, "loss": 0.667, "step": 5661 }, { "epoch": 0.72, "grad_norm": 0.736451547530937, "learning_rate": 1.9017156901087608e-06, "loss": 0.5108, "step": 5662 }, { "epoch": 0.72, "grad_norm": 0.7895222103376313, "learning_rate": 1.9000965196050619e-06, "loss": 0.5521, "step": 5663 }, { "epoch": 0.72, "grad_norm": 0.7386615692539416, "learning_rate": 1.8984778769826318e-06, "loss": 0.5559, "step": 5664 }, { "epoch": 0.72, "grad_norm": 0.6428037286901456, "learning_rate": 1.8968597625171076e-06, "loss": 0.4365, "step": 5665 }, { "epoch": 0.72, "grad_norm": 0.8420620419361232, "learning_rate": 1.8952421764840374e-06, "loss": 0.5757, "step": 5666 }, { "epoch": 0.72, "grad_norm": 1.7627420573034267, "learning_rate": 1.8936251191588779e-06, "loss": 0.6409, "step": 5667 }, { "epoch": 0.72, "grad_norm": 1.0303500950206863, "learning_rate": 1.892008590817e-06, "loss": 0.6819, "step": 5668 }, { "epoch": 0.72, "grad_norm": 0.8173379566460758, "learning_rate": 1.8903925917336807e-06, "loss": 0.5641, "step": 5669 }, { "epoch": 0.72, "grad_norm": 0.8963285199313336, "learning_rate": 1.8887771221841073e-06, "loss": 0.6493, "step": 5670 }, { "epoch": 0.72, "grad_norm": 0.6614650994531897, "learning_rate": 1.8871621824433772e-06, "loss": 0.5373, "step": 5671 }, { "epoch": 0.72, "grad_norm": 0.7086523071473418, "learning_rate": 1.8855477727864985e-06, "loss": 0.544, "step": 5672 }, { "epoch": 0.72, "grad_norm": 0.6693291638232884, "learning_rate": 1.8839338934883877e-06, "loss": 0.5207, "step": 5673 }, { "epoch": 0.72, "grad_norm": 0.6381550060212869, "learning_rate": 1.8823205448238718e-06, "loss": 0.5446, "step": 5674 }, { "epoch": 0.72, "grad_norm": 0.7632165870249248, "learning_rate": 1.8807077270676877e-06, "loss": 0.5499, "step": 5675 }, { "epoch": 0.72, "grad_norm": 0.6845272769349781, "learning_rate": 1.879095440494481e-06, "loss": 0.5752, "step": 5676 }, { "epoch": 0.72, "grad_norm": 0.78280223419142, "learning_rate": 1.877483685378807e-06, "loss": 0.5965, "step": 5677 }, { "epoch": 0.72, "grad_norm": 0.8671549301470479, "learning_rate": 1.8758724619951318e-06, "loss": 0.6622, "step": 5678 }, { "epoch": 0.72, "grad_norm": 0.7945552051326982, "learning_rate": 1.8742617706178294e-06, "loss": 0.5273, "step": 5679 }, { "epoch": 0.72, "grad_norm": 0.6470168702476706, "learning_rate": 1.8726516115211834e-06, "loss": 0.5889, "step": 5680 }, { "epoch": 0.72, "grad_norm": 0.7128489351494735, "learning_rate": 1.8710419849793876e-06, "loss": 0.5777, "step": 5681 }, { "epoch": 0.72, "grad_norm": 0.6499759199787295, "learning_rate": 1.869432891266545e-06, "loss": 0.5316, "step": 5682 }, { "epoch": 0.72, "grad_norm": 0.7553758782877432, "learning_rate": 1.8678243306566662e-06, "loss": 0.5493, "step": 5683 }, { "epoch": 0.72, "grad_norm": 0.7197721144686456, "learning_rate": 1.8662163034236736e-06, "loss": 0.6589, "step": 5684 }, { "epoch": 0.72, "grad_norm": 0.6913415922646856, "learning_rate": 1.8646088098413972e-06, "loss": 0.5104, "step": 5685 }, { "epoch": 0.72, "grad_norm": 0.8584374625102356, "learning_rate": 1.8630018501835757e-06, "loss": 0.5788, "step": 5686 }, { "epoch": 0.72, "grad_norm": 0.8061167069357875, "learning_rate": 1.8613954247238587e-06, "loss": 0.5948, "step": 5687 }, { "epoch": 0.72, "grad_norm": 0.9396029534574472, "learning_rate": 1.8597895337358025e-06, "loss": 0.6431, "step": 5688 }, { "epoch": 0.72, "grad_norm": 0.9824609418155691, "learning_rate": 1.8581841774928744e-06, "loss": 0.6141, "step": 5689 }, { "epoch": 0.72, "grad_norm": 0.8989922645950449, "learning_rate": 1.856579356268448e-06, "loss": 0.6437, "step": 5690 }, { "epoch": 0.73, "grad_norm": 0.8252181128944357, "learning_rate": 1.854975070335811e-06, "loss": 0.689, "step": 5691 }, { "epoch": 0.73, "grad_norm": 0.6453941500709436, "learning_rate": 1.8533713199681552e-06, "loss": 0.5137, "step": 5692 }, { "epoch": 0.73, "grad_norm": 0.7042843962620681, "learning_rate": 1.8517681054385816e-06, "loss": 0.5498, "step": 5693 }, { "epoch": 0.73, "grad_norm": 0.7308239953310444, "learning_rate": 1.8501654270201026e-06, "loss": 0.6134, "step": 5694 }, { "epoch": 0.73, "grad_norm": 1.0543878889686091, "learning_rate": 1.8485632849856355e-06, "loss": 0.66, "step": 5695 }, { "epoch": 0.73, "grad_norm": 0.8490842457356459, "learning_rate": 1.8469616796080103e-06, "loss": 0.6255, "step": 5696 }, { "epoch": 0.73, "grad_norm": 0.6715084651778989, "learning_rate": 1.8453606111599625e-06, "loss": 0.5309, "step": 5697 }, { "epoch": 0.73, "grad_norm": 0.8075334364885477, "learning_rate": 1.8437600799141387e-06, "loss": 0.5936, "step": 5698 }, { "epoch": 0.73, "grad_norm": 0.7190862689231354, "learning_rate": 1.8421600861430911e-06, "loss": 0.5231, "step": 5699 }, { "epoch": 0.73, "grad_norm": 0.774945186795371, "learning_rate": 1.8405606301192825e-06, "loss": 0.6023, "step": 5700 }, { "epoch": 0.73, "grad_norm": 0.7826488562850418, "learning_rate": 1.8389617121150844e-06, "loss": 0.4893, "step": 5701 }, { "epoch": 0.73, "grad_norm": 0.6639905414765804, "learning_rate": 1.8373633324027745e-06, "loss": 0.556, "step": 5702 }, { "epoch": 0.73, "grad_norm": 0.7772825976539545, "learning_rate": 1.8357654912545414e-06, "loss": 0.504, "step": 5703 }, { "epoch": 0.73, "grad_norm": 0.8717133441858241, "learning_rate": 1.8341681889424806e-06, "loss": 0.6046, "step": 5704 }, { "epoch": 0.73, "grad_norm": 0.6533406953403205, "learning_rate": 1.8325714257385956e-06, "loss": 0.4838, "step": 5705 }, { "epoch": 0.73, "grad_norm": 0.8867190079623544, "learning_rate": 1.8309752019147986e-06, "loss": 0.5165, "step": 5706 }, { "epoch": 0.73, "grad_norm": 0.6930943075971878, "learning_rate": 1.8293795177429103e-06, "loss": 0.5758, "step": 5707 }, { "epoch": 0.73, "grad_norm": 0.8771940293318368, "learning_rate": 1.8277843734946588e-06, "loss": 0.5621, "step": 5708 }, { "epoch": 0.73, "grad_norm": 0.8204142256420287, "learning_rate": 1.8261897694416807e-06, "loss": 0.5392, "step": 5709 }, { "epoch": 0.73, "grad_norm": 0.853316182461607, "learning_rate": 1.8245957058555203e-06, "loss": 0.6382, "step": 5710 }, { "epoch": 0.73, "grad_norm": 0.9145768400884016, "learning_rate": 1.82300218300763e-06, "loss": 0.6386, "step": 5711 }, { "epoch": 0.73, "grad_norm": 0.7537921452643934, "learning_rate": 1.8214092011693702e-06, "loss": 0.5485, "step": 5712 }, { "epoch": 0.73, "grad_norm": 0.8968244608196801, "learning_rate": 1.8198167606120092e-06, "loss": 0.6039, "step": 5713 }, { "epoch": 0.73, "grad_norm": 0.6181273218749963, "learning_rate": 1.8182248616067228e-06, "loss": 0.5118, "step": 5714 }, { "epoch": 0.73, "grad_norm": 0.7146249506856605, "learning_rate": 1.8166335044245953e-06, "loss": 0.4944, "step": 5715 }, { "epoch": 0.73, "grad_norm": 0.64756477037695, "learning_rate": 1.8150426893366157e-06, "loss": 0.5466, "step": 5716 }, { "epoch": 0.73, "grad_norm": 0.6875259049621283, "learning_rate": 1.8134524166136875e-06, "loss": 0.5196, "step": 5717 }, { "epoch": 0.73, "grad_norm": 0.8439288454434887, "learning_rate": 1.8118626865266153e-06, "loss": 0.5971, "step": 5718 }, { "epoch": 0.73, "grad_norm": 1.012448265122255, "learning_rate": 1.8102734993461141e-06, "loss": 0.6801, "step": 5719 }, { "epoch": 0.73, "grad_norm": 1.0681786169429894, "learning_rate": 1.8086848553428054e-06, "loss": 0.5871, "step": 5720 }, { "epoch": 0.73, "grad_norm": 0.6564929858831655, "learning_rate": 1.8070967547872192e-06, "loss": 0.4878, "step": 5721 }, { "epoch": 0.73, "grad_norm": 0.8320255830579747, "learning_rate": 1.8055091979497924e-06, "loss": 0.5218, "step": 5722 }, { "epoch": 0.73, "grad_norm": 0.9695415907881699, "learning_rate": 1.8039221851008686e-06, "loss": 0.635, "step": 5723 }, { "epoch": 0.73, "grad_norm": 1.1058337372771763, "learning_rate": 1.8023357165107008e-06, "loss": 0.6417, "step": 5724 }, { "epoch": 0.73, "grad_norm": 0.7997477782431707, "learning_rate": 1.800749792449447e-06, "loss": 0.6322, "step": 5725 }, { "epoch": 0.73, "grad_norm": 0.8079857867508752, "learning_rate": 1.7991644131871744e-06, "loss": 0.5932, "step": 5726 }, { "epoch": 0.73, "grad_norm": 1.0541914978638376, "learning_rate": 1.7975795789938561e-06, "loss": 0.6307, "step": 5727 }, { "epoch": 0.73, "grad_norm": 0.8555694261747371, "learning_rate": 1.7959952901393724e-06, "loss": 0.6441, "step": 5728 }, { "epoch": 0.73, "grad_norm": 0.6775536377373326, "learning_rate": 1.7944115468935119e-06, "loss": 0.4459, "step": 5729 }, { "epoch": 0.73, "grad_norm": 0.8360650694277018, "learning_rate": 1.7928283495259696e-06, "loss": 0.6437, "step": 5730 }, { "epoch": 0.73, "grad_norm": 0.7423201172380518, "learning_rate": 1.7912456983063475e-06, "loss": 0.5349, "step": 5731 }, { "epoch": 0.73, "grad_norm": 0.7736263417287377, "learning_rate": 1.789663593504154e-06, "loss": 0.5603, "step": 5732 }, { "epoch": 0.73, "grad_norm": 0.7030553003782025, "learning_rate": 1.7880820353888057e-06, "loss": 0.5337, "step": 5733 }, { "epoch": 0.73, "grad_norm": 0.8423427223986165, "learning_rate": 1.7865010242296254e-06, "loss": 0.5849, "step": 5734 }, { "epoch": 0.73, "grad_norm": 0.8761016272769074, "learning_rate": 1.7849205602958424e-06, "loss": 0.6388, "step": 5735 }, { "epoch": 0.73, "grad_norm": 0.7893869665333877, "learning_rate": 1.7833406438565937e-06, "loss": 0.6712, "step": 5736 }, { "epoch": 0.73, "grad_norm": 0.9779568753960978, "learning_rate": 1.781761275180922e-06, "loss": 0.5843, "step": 5737 }, { "epoch": 0.73, "grad_norm": 0.9062874964850393, "learning_rate": 1.7801824545377783e-06, "loss": 0.6251, "step": 5738 }, { "epoch": 0.73, "grad_norm": 0.8577072889913769, "learning_rate": 1.7786041821960188e-06, "loss": 0.6355, "step": 5739 }, { "epoch": 0.73, "grad_norm": 1.5446367191344814, "learning_rate": 1.7770264584244069e-06, "loss": 0.6138, "step": 5740 }, { "epoch": 0.73, "grad_norm": 0.8252681625676862, "learning_rate": 1.7754492834916121e-06, "loss": 0.5407, "step": 5741 }, { "epoch": 0.73, "grad_norm": 0.7339013238026564, "learning_rate": 1.7738726576662112e-06, "loss": 0.4918, "step": 5742 }, { "epoch": 0.73, "grad_norm": 0.8831005128725294, "learning_rate": 1.7722965812166854e-06, "loss": 0.6366, "step": 5743 }, { "epoch": 0.73, "grad_norm": 0.7612358345724639, "learning_rate": 1.7707210544114278e-06, "loss": 0.5482, "step": 5744 }, { "epoch": 0.73, "grad_norm": 0.6624729109266706, "learning_rate": 1.769146077518732e-06, "loss": 0.5508, "step": 5745 }, { "epoch": 0.73, "grad_norm": 0.8404092433164742, "learning_rate": 1.7675716508068003e-06, "loss": 0.6385, "step": 5746 }, { "epoch": 0.73, "grad_norm": 0.8979638683332845, "learning_rate": 1.7659977745437417e-06, "loss": 0.6972, "step": 5747 }, { "epoch": 0.73, "grad_norm": 0.6900946937007272, "learning_rate": 1.7644244489975704e-06, "loss": 0.5708, "step": 5748 }, { "epoch": 0.73, "grad_norm": 0.6618594413151697, "learning_rate": 1.7628516744362073e-06, "loss": 0.5279, "step": 5749 }, { "epoch": 0.73, "grad_norm": 0.6863226774325621, "learning_rate": 1.7612794511274794e-06, "loss": 0.5272, "step": 5750 }, { "epoch": 0.73, "grad_norm": 0.6169892224350209, "learning_rate": 1.7597077793391205e-06, "loss": 0.4689, "step": 5751 }, { "epoch": 0.73, "grad_norm": 0.6776845520065562, "learning_rate": 1.75813665933877e-06, "loss": 0.5045, "step": 5752 }, { "epoch": 0.73, "grad_norm": 1.0031560179275587, "learning_rate": 1.7565660913939726e-06, "loss": 0.7, "step": 5753 }, { "epoch": 0.73, "grad_norm": 0.7101435983485632, "learning_rate": 1.7549960757721802e-06, "loss": 0.5284, "step": 5754 }, { "epoch": 0.73, "grad_norm": 0.727339995607302, "learning_rate": 1.7534266127407496e-06, "loss": 0.559, "step": 5755 }, { "epoch": 0.73, "grad_norm": 0.8269970566953763, "learning_rate": 1.7518577025669441e-06, "loss": 0.5777, "step": 5756 }, { "epoch": 0.73, "grad_norm": 0.6925025714193013, "learning_rate": 1.7502893455179331e-06, "loss": 0.6489, "step": 5757 }, { "epoch": 0.73, "grad_norm": 1.1668924560953013, "learning_rate": 1.7487215418607917e-06, "loss": 0.6427, "step": 5758 }, { "epoch": 0.73, "grad_norm": 0.6083411271345088, "learning_rate": 1.7471542918625e-06, "loss": 0.4806, "step": 5759 }, { "epoch": 0.73, "grad_norm": 0.7524038320361798, "learning_rate": 1.7455875957899443e-06, "loss": 0.5043, "step": 5760 }, { "epoch": 0.73, "grad_norm": 0.6976115314462221, "learning_rate": 1.744021453909917e-06, "loss": 0.5355, "step": 5761 }, { "epoch": 0.73, "grad_norm": 0.6862618541635623, "learning_rate": 1.7424558664891156e-06, "loss": 0.6099, "step": 5762 }, { "epoch": 0.73, "grad_norm": 0.6536895970040975, "learning_rate": 1.7408908337941432e-06, "loss": 0.5352, "step": 5763 }, { "epoch": 0.73, "grad_norm": 0.6439201376713182, "learning_rate": 1.7393263560915085e-06, "loss": 0.5349, "step": 5764 }, { "epoch": 0.73, "grad_norm": 0.7055432760920843, "learning_rate": 1.7377624336476256e-06, "loss": 0.5227, "step": 5765 }, { "epoch": 0.73, "grad_norm": 0.8276041983160516, "learning_rate": 1.7361990667288153e-06, "loss": 0.5727, "step": 5766 }, { "epoch": 0.73, "grad_norm": 0.653954611591255, "learning_rate": 1.7346362556013013e-06, "loss": 0.5261, "step": 5767 }, { "epoch": 0.73, "grad_norm": 0.8435987621458022, "learning_rate": 1.7330740005312151e-06, "loss": 0.615, "step": 5768 }, { "epoch": 0.73, "grad_norm": 0.8791167489564625, "learning_rate": 1.7315123017845902e-06, "loss": 0.6315, "step": 5769 }, { "epoch": 0.74, "grad_norm": 0.7381109354667061, "learning_rate": 1.729951159627371e-06, "loss": 0.526, "step": 5770 }, { "epoch": 0.74, "grad_norm": 0.6695472719059816, "learning_rate": 1.728390574325402e-06, "loss": 0.5532, "step": 5771 }, { "epoch": 0.74, "grad_norm": 0.6350351883777356, "learning_rate": 1.7268305461444352e-06, "loss": 0.5114, "step": 5772 }, { "epoch": 0.74, "grad_norm": 0.7082825651756639, "learning_rate": 1.7252710753501268e-06, "loss": 0.5876, "step": 5773 }, { "epoch": 0.74, "grad_norm": 0.838483890857876, "learning_rate": 1.7237121622080383e-06, "loss": 0.6192, "step": 5774 }, { "epoch": 0.74, "grad_norm": 0.8239243086931671, "learning_rate": 1.7221538069836347e-06, "loss": 0.545, "step": 5775 }, { "epoch": 0.74, "grad_norm": 0.7780272279761498, "learning_rate": 1.7205960099422908e-06, "loss": 0.5177, "step": 5776 }, { "epoch": 0.74, "grad_norm": 0.6732864942160461, "learning_rate": 1.7190387713492824e-06, "loss": 0.5144, "step": 5777 }, { "epoch": 0.74, "grad_norm": 0.8146989571067235, "learning_rate": 1.71748209146979e-06, "loss": 0.5332, "step": 5778 }, { "epoch": 0.74, "grad_norm": 0.8156694451793075, "learning_rate": 1.7159259705689003e-06, "loss": 0.6011, "step": 5779 }, { "epoch": 0.74, "grad_norm": 0.6732985813150366, "learning_rate": 1.7143704089116049e-06, "loss": 0.5575, "step": 5780 }, { "epoch": 0.74, "grad_norm": 0.6638620859450522, "learning_rate": 1.7128154067627995e-06, "loss": 0.5308, "step": 5781 }, { "epoch": 0.74, "grad_norm": 0.854219047096941, "learning_rate": 1.7112609643872845e-06, "loss": 0.6044, "step": 5782 }, { "epoch": 0.74, "grad_norm": 0.8103742896785017, "learning_rate": 1.7097070820497653e-06, "loss": 0.576, "step": 5783 }, { "epoch": 0.74, "grad_norm": 0.6812733400886559, "learning_rate": 1.7081537600148523e-06, "loss": 0.5203, "step": 5784 }, { "epoch": 0.74, "grad_norm": 0.6586724672293538, "learning_rate": 1.7066009985470593e-06, "loss": 0.5031, "step": 5785 }, { "epoch": 0.74, "grad_norm": 0.8067857053702998, "learning_rate": 1.7050487979108065e-06, "loss": 0.6057, "step": 5786 }, { "epoch": 0.74, "grad_norm": 0.758185917171002, "learning_rate": 1.7034971583704163e-06, "loss": 0.6087, "step": 5787 }, { "epoch": 0.74, "grad_norm": 0.6078008010678202, "learning_rate": 1.7019460801901179e-06, "loss": 0.5376, "step": 5788 }, { "epoch": 0.74, "grad_norm": 0.6386876011916381, "learning_rate": 1.7003955636340424e-06, "loss": 0.5219, "step": 5789 }, { "epoch": 0.74, "grad_norm": 0.7789709881968113, "learning_rate": 1.698845608966228e-06, "loss": 0.5509, "step": 5790 }, { "epoch": 0.74, "grad_norm": 0.9005609015028593, "learning_rate": 1.6972962164506153e-06, "loss": 0.55, "step": 5791 }, { "epoch": 0.74, "grad_norm": 0.8565180971952367, "learning_rate": 1.6957473863510493e-06, "loss": 0.6239, "step": 5792 }, { "epoch": 0.74, "grad_norm": 0.7091351922628039, "learning_rate": 1.6941991189312795e-06, "loss": 0.5219, "step": 5793 }, { "epoch": 0.74, "grad_norm": 2.020578913620658, "learning_rate": 1.6926514144549611e-06, "loss": 0.6762, "step": 5794 }, { "epoch": 0.74, "grad_norm": 0.6613993607452004, "learning_rate": 1.6911042731856503e-06, "loss": 0.5246, "step": 5795 }, { "epoch": 0.74, "grad_norm": 0.7469482789831973, "learning_rate": 1.6895576953868104e-06, "loss": 0.5373, "step": 5796 }, { "epoch": 0.74, "grad_norm": 0.7846629958867347, "learning_rate": 1.6880116813218072e-06, "loss": 0.6102, "step": 5797 }, { "epoch": 0.74, "grad_norm": 0.6577856588113831, "learning_rate": 1.6864662312539105e-06, "loss": 0.5333, "step": 5798 }, { "epoch": 0.74, "grad_norm": 0.6675088541919377, "learning_rate": 1.6849213454462942e-06, "loss": 0.5055, "step": 5799 }, { "epoch": 0.74, "grad_norm": 0.6759524192481948, "learning_rate": 1.6833770241620367e-06, "loss": 0.5166, "step": 5800 }, { "epoch": 0.74, "grad_norm": 0.6859695600609146, "learning_rate": 1.681833267664118e-06, "loss": 0.5366, "step": 5801 }, { "epoch": 0.74, "grad_norm": 0.8344824306968963, "learning_rate": 1.6802900762154268e-06, "loss": 0.5418, "step": 5802 }, { "epoch": 0.74, "grad_norm": 0.6873472538429511, "learning_rate": 1.6787474500787515e-06, "loss": 0.5015, "step": 5803 }, { "epoch": 0.74, "grad_norm": 0.6886186064694745, "learning_rate": 1.6772053895167845e-06, "loss": 0.5161, "step": 5804 }, { "epoch": 0.74, "grad_norm": 0.8980129813270382, "learning_rate": 1.6756638947921234e-06, "loss": 0.5764, "step": 5805 }, { "epoch": 0.74, "grad_norm": 0.9400516390856912, "learning_rate": 1.6741229661672681e-06, "loss": 0.6248, "step": 5806 }, { "epoch": 0.74, "grad_norm": 0.8740214578619746, "learning_rate": 1.6725826039046233e-06, "loss": 0.6338, "step": 5807 }, { "epoch": 0.74, "grad_norm": 0.7322193819624436, "learning_rate": 1.6710428082664959e-06, "loss": 0.4966, "step": 5808 }, { "epoch": 0.74, "grad_norm": 0.7879048999557077, "learning_rate": 1.6695035795150977e-06, "loss": 0.6082, "step": 5809 }, { "epoch": 0.74, "grad_norm": 0.9262489171718701, "learning_rate": 1.667964917912543e-06, "loss": 0.6156, "step": 5810 }, { "epoch": 0.74, "grad_norm": 0.868157802716121, "learning_rate": 1.6664268237208496e-06, "loss": 0.6124, "step": 5811 }, { "epoch": 0.74, "grad_norm": 0.9629571231606969, "learning_rate": 1.6648892972019398e-06, "loss": 0.6101, "step": 5812 }, { "epoch": 0.74, "grad_norm": 0.630257986452402, "learning_rate": 1.663352338617637e-06, "loss": 0.5182, "step": 5813 }, { "epoch": 0.74, "grad_norm": 0.7725192520495178, "learning_rate": 1.6618159482296704e-06, "loss": 0.5162, "step": 5814 }, { "epoch": 0.74, "grad_norm": 0.7957401317794008, "learning_rate": 1.6602801262996709e-06, "loss": 0.5767, "step": 5815 }, { "epoch": 0.74, "grad_norm": 0.6924976568408082, "learning_rate": 1.658744873089173e-06, "loss": 0.5333, "step": 5816 }, { "epoch": 0.74, "grad_norm": 0.8876856529141748, "learning_rate": 1.6572101888596142e-06, "loss": 0.5505, "step": 5817 }, { "epoch": 0.74, "grad_norm": 0.8824769208240716, "learning_rate": 1.6556760738723349e-06, "loss": 0.6322, "step": 5818 }, { "epoch": 0.74, "grad_norm": 0.8568894790784245, "learning_rate": 1.6541425283885798e-06, "loss": 0.6057, "step": 5819 }, { "epoch": 0.74, "grad_norm": 0.629991114696536, "learning_rate": 1.6526095526694953e-06, "loss": 0.5234, "step": 5820 }, { "epoch": 0.74, "grad_norm": 0.8710601492173604, "learning_rate": 1.6510771469761306e-06, "loss": 0.5908, "step": 5821 }, { "epoch": 0.74, "grad_norm": 0.78861392849983, "learning_rate": 1.6495453115694393e-06, "loss": 0.6233, "step": 5822 }, { "epoch": 0.74, "grad_norm": 0.6002065357793965, "learning_rate": 1.6480140467102768e-06, "loss": 0.5008, "step": 5823 }, { "epoch": 0.74, "grad_norm": 0.740915385089809, "learning_rate": 1.646483352659401e-06, "loss": 0.5474, "step": 5824 }, { "epoch": 0.74, "grad_norm": 0.8145236738881111, "learning_rate": 1.6449532296774739e-06, "loss": 0.5498, "step": 5825 }, { "epoch": 0.74, "grad_norm": 0.7441417133206782, "learning_rate": 1.643423678025059e-06, "loss": 0.5837, "step": 5826 }, { "epoch": 0.74, "grad_norm": 0.7976850832151852, "learning_rate": 1.6418946979626216e-06, "loss": 0.5967, "step": 5827 }, { "epoch": 0.74, "grad_norm": 0.8760592741905344, "learning_rate": 1.640366289750534e-06, "loss": 0.5785, "step": 5828 }, { "epoch": 0.74, "grad_norm": 0.6810828055330408, "learning_rate": 1.638838453649067e-06, "loss": 0.5136, "step": 5829 }, { "epoch": 0.74, "grad_norm": 0.737418911972885, "learning_rate": 1.6373111899183953e-06, "loss": 0.5738, "step": 5830 }, { "epoch": 0.74, "grad_norm": 1.1547721275838845, "learning_rate": 1.6357844988185957e-06, "loss": 0.5899, "step": 5831 }, { "epoch": 0.74, "grad_norm": 0.8466645058848514, "learning_rate": 1.6342583806096473e-06, "loss": 0.6259, "step": 5832 }, { "epoch": 0.74, "grad_norm": 0.9192298182706128, "learning_rate": 1.6327328355514327e-06, "loss": 0.593, "step": 5833 }, { "epoch": 0.74, "grad_norm": 0.7362650260598587, "learning_rate": 1.6312078639037365e-06, "loss": 0.6052, "step": 5834 }, { "epoch": 0.74, "grad_norm": 0.7216085822459143, "learning_rate": 1.6296834659262457e-06, "loss": 0.4819, "step": 5835 }, { "epoch": 0.74, "grad_norm": 0.9298581391264136, "learning_rate": 1.6281596418785485e-06, "loss": 0.6282, "step": 5836 }, { "epoch": 0.74, "grad_norm": 0.7903572998676937, "learning_rate": 1.6266363920201366e-06, "loss": 0.5822, "step": 5837 }, { "epoch": 0.74, "grad_norm": 0.7132577999422386, "learning_rate": 1.6251137166104036e-06, "loss": 0.5308, "step": 5838 }, { "epoch": 0.74, "grad_norm": 0.813344706262403, "learning_rate": 1.6235916159086457e-06, "loss": 0.5209, "step": 5839 }, { "epoch": 0.74, "grad_norm": 0.7344273276920951, "learning_rate": 1.6220700901740604e-06, "loss": 0.5567, "step": 5840 }, { "epoch": 0.74, "grad_norm": 0.7681406061228656, "learning_rate": 1.6205491396657474e-06, "loss": 0.515, "step": 5841 }, { "epoch": 0.74, "grad_norm": 0.7275369113520247, "learning_rate": 1.6190287646427094e-06, "loss": 0.5396, "step": 5842 }, { "epoch": 0.74, "grad_norm": 0.8913274326521137, "learning_rate": 1.6175089653638504e-06, "loss": 0.6263, "step": 5843 }, { "epoch": 0.74, "grad_norm": 0.8751026922070858, "learning_rate": 1.6159897420879755e-06, "loss": 0.6564, "step": 5844 }, { "epoch": 0.74, "grad_norm": 0.7515926410062148, "learning_rate": 1.6144710950737935e-06, "loss": 0.5512, "step": 5845 }, { "epoch": 0.74, "grad_norm": 0.7405113135735025, "learning_rate": 1.6129530245799136e-06, "loss": 0.612, "step": 5846 }, { "epoch": 0.74, "grad_norm": 0.666004061993584, "learning_rate": 1.6114355308648472e-06, "loss": 0.5377, "step": 5847 }, { "epoch": 0.75, "grad_norm": 0.8863925692265253, "learning_rate": 1.6099186141870088e-06, "loss": 0.633, "step": 5848 }, { "epoch": 0.75, "grad_norm": 0.9424480768504067, "learning_rate": 1.6084022748047123e-06, "loss": 0.6714, "step": 5849 }, { "epoch": 0.75, "grad_norm": 0.9512953886730859, "learning_rate": 1.6068865129761752e-06, "loss": 0.6288, "step": 5850 }, { "epoch": 0.75, "grad_norm": 0.6975746137973988, "learning_rate": 1.6053713289595158e-06, "loss": 0.5315, "step": 5851 }, { "epoch": 0.75, "grad_norm": 1.1515504508982928, "learning_rate": 1.6038567230127545e-06, "loss": 0.6141, "step": 5852 }, { "epoch": 0.75, "grad_norm": 0.826371570181474, "learning_rate": 1.6023426953938115e-06, "loss": 0.6346, "step": 5853 }, { "epoch": 0.75, "grad_norm": 0.6818203518131212, "learning_rate": 1.6008292463605101e-06, "loss": 0.5213, "step": 5854 }, { "epoch": 0.75, "grad_norm": 0.6759748798150119, "learning_rate": 1.5993163761705772e-06, "loss": 0.522, "step": 5855 }, { "epoch": 0.75, "grad_norm": 0.8699457605382849, "learning_rate": 1.5978040850816373e-06, "loss": 0.6489, "step": 5856 }, { "epoch": 0.75, "grad_norm": 0.7831870937471817, "learning_rate": 1.5962923733512176e-06, "loss": 0.5737, "step": 5857 }, { "epoch": 0.75, "grad_norm": 0.8874041282179453, "learning_rate": 1.5947812412367474e-06, "loss": 0.6549, "step": 5858 }, { "epoch": 0.75, "grad_norm": 0.7027488829052079, "learning_rate": 1.5932706889955568e-06, "loss": 0.5686, "step": 5859 }, { "epoch": 0.75, "grad_norm": 0.8169543931240841, "learning_rate": 1.5917607168848764e-06, "loss": 0.6414, "step": 5860 }, { "epoch": 0.75, "grad_norm": 0.805539168386371, "learning_rate": 1.5902513251618396e-06, "loss": 0.5789, "step": 5861 }, { "epoch": 0.75, "grad_norm": 0.7464710005873254, "learning_rate": 1.5887425140834789e-06, "loss": 0.5739, "step": 5862 }, { "epoch": 0.75, "grad_norm": 0.6791323714145837, "learning_rate": 1.5872342839067305e-06, "loss": 0.5299, "step": 5863 }, { "epoch": 0.75, "grad_norm": 0.7407387624007086, "learning_rate": 1.5857266348884293e-06, "loss": 0.5183, "step": 5864 }, { "epoch": 0.75, "grad_norm": 0.8076478341283453, "learning_rate": 1.5842195672853128e-06, "loss": 0.5978, "step": 5865 }, { "epoch": 0.75, "grad_norm": 0.845242395335814, "learning_rate": 1.582713081354018e-06, "loss": 0.6081, "step": 5866 }, { "epoch": 0.75, "grad_norm": 0.7530378201272134, "learning_rate": 1.581207177351085e-06, "loss": 0.5156, "step": 5867 }, { "epoch": 0.75, "grad_norm": 0.8217957689627217, "learning_rate": 1.5797018555329529e-06, "loss": 0.6061, "step": 5868 }, { "epoch": 0.75, "grad_norm": 0.7530159261899385, "learning_rate": 1.5781971161559622e-06, "loss": 0.5697, "step": 5869 }, { "epoch": 0.75, "grad_norm": 0.9183465822912796, "learning_rate": 1.576692959476353e-06, "loss": 0.6437, "step": 5870 }, { "epoch": 0.75, "grad_norm": 0.6952600641197307, "learning_rate": 1.575189385750271e-06, "loss": 0.5488, "step": 5871 }, { "epoch": 0.75, "grad_norm": 0.7156962069325696, "learning_rate": 1.5736863952337567e-06, "loss": 0.5772, "step": 5872 }, { "epoch": 0.75, "grad_norm": 0.8216101625083886, "learning_rate": 1.5721839881827545e-06, "loss": 0.5956, "step": 5873 }, { "epoch": 0.75, "grad_norm": 0.6596400202999854, "learning_rate": 1.5706821648531078e-06, "loss": 0.5409, "step": 5874 }, { "epoch": 0.75, "grad_norm": 0.8283781922036126, "learning_rate": 1.569180925500563e-06, "loss": 0.6615, "step": 5875 }, { "epoch": 0.75, "grad_norm": 0.7008414768364053, "learning_rate": 1.5676802703807636e-06, "loss": 0.5372, "step": 5876 }, { "epoch": 0.75, "grad_norm": 0.7706524995832714, "learning_rate": 1.566180199749257e-06, "loss": 0.4792, "step": 5877 }, { "epoch": 0.75, "grad_norm": 0.7168011977678853, "learning_rate": 1.5646807138614895e-06, "loss": 0.5409, "step": 5878 }, { "epoch": 0.75, "grad_norm": 0.7735466749437391, "learning_rate": 1.5631818129728072e-06, "loss": 0.6144, "step": 5879 }, { "epoch": 0.75, "grad_norm": 0.6971762911929578, "learning_rate": 1.5616834973384576e-06, "loss": 0.5534, "step": 5880 }, { "epoch": 0.75, "grad_norm": 0.8229319732104531, "learning_rate": 1.5601857672135883e-06, "loss": 0.5871, "step": 5881 }, { "epoch": 0.75, "grad_norm": 0.6943147442626812, "learning_rate": 1.5586886228532472e-06, "loss": 0.5531, "step": 5882 }, { "epoch": 0.75, "grad_norm": 0.6520514089849233, "learning_rate": 1.5571920645123818e-06, "loss": 0.5313, "step": 5883 }, { "epoch": 0.75, "grad_norm": 3.822397362966487, "learning_rate": 1.5556960924458415e-06, "loss": 0.5891, "step": 5884 }, { "epoch": 0.75, "grad_norm": 0.8114589771046263, "learning_rate": 1.5542007069083737e-06, "loss": 0.6081, "step": 5885 }, { "epoch": 0.75, "grad_norm": 0.8840467383353776, "learning_rate": 1.5527059081546263e-06, "loss": 0.6484, "step": 5886 }, { "epoch": 0.75, "grad_norm": 0.6515757287267319, "learning_rate": 1.5512116964391505e-06, "loss": 0.5125, "step": 5887 }, { "epoch": 0.75, "grad_norm": 1.045024667893443, "learning_rate": 1.5497180720163936e-06, "loss": 0.6362, "step": 5888 }, { "epoch": 0.75, "grad_norm": 0.7897407381559436, "learning_rate": 1.5482250351407042e-06, "loss": 0.5537, "step": 5889 }, { "epoch": 0.75, "grad_norm": 0.8069402945209845, "learning_rate": 1.5467325860663307e-06, "loss": 0.6056, "step": 5890 }, { "epoch": 0.75, "grad_norm": 0.7421858959320319, "learning_rate": 1.5452407250474223e-06, "loss": 0.5444, "step": 5891 }, { "epoch": 0.75, "grad_norm": 0.7855589233418996, "learning_rate": 1.5437494523380269e-06, "loss": 0.6613, "step": 5892 }, { "epoch": 0.75, "grad_norm": 0.8890677963066336, "learning_rate": 1.542258768192093e-06, "loss": 0.5946, "step": 5893 }, { "epoch": 0.75, "grad_norm": 0.8994250368885927, "learning_rate": 1.540768672863468e-06, "loss": 0.5169, "step": 5894 }, { "epoch": 0.75, "grad_norm": 0.6814967753869515, "learning_rate": 1.5392791666059003e-06, "loss": 0.5616, "step": 5895 }, { "epoch": 0.75, "grad_norm": 0.8906932723136545, "learning_rate": 1.5377902496730367e-06, "loss": 0.6279, "step": 5896 }, { "epoch": 0.75, "grad_norm": 0.9241740406517359, "learning_rate": 1.536301922318425e-06, "loss": 0.6415, "step": 5897 }, { "epoch": 0.75, "grad_norm": 0.8572185864943535, "learning_rate": 1.5348141847955112e-06, "loss": 0.6624, "step": 5898 }, { "epoch": 0.75, "grad_norm": 0.8152249955762612, "learning_rate": 1.5333270373576414e-06, "loss": 0.6076, "step": 5899 }, { "epoch": 0.75, "grad_norm": 0.7544383590950826, "learning_rate": 1.5318404802580617e-06, "loss": 0.5318, "step": 5900 }, { "epoch": 0.75, "grad_norm": 0.6936164892634805, "learning_rate": 1.5303545137499177e-06, "loss": 0.5508, "step": 5901 }, { "epoch": 0.75, "grad_norm": 0.7047536383014844, "learning_rate": 1.528869138086253e-06, "loss": 0.546, "step": 5902 }, { "epoch": 0.75, "grad_norm": 0.9822219715093419, "learning_rate": 1.5273843535200122e-06, "loss": 0.663, "step": 5903 }, { "epoch": 0.75, "grad_norm": 0.7570505699788797, "learning_rate": 1.5259001603040385e-06, "loss": 0.5377, "step": 5904 }, { "epoch": 0.75, "grad_norm": 1.0753220874579519, "learning_rate": 1.5244165586910748e-06, "loss": 0.6086, "step": 5905 }, { "epoch": 0.75, "grad_norm": 0.759454448041383, "learning_rate": 1.5229335489337628e-06, "loss": 0.6145, "step": 5906 }, { "epoch": 0.75, "grad_norm": 0.6250259531135143, "learning_rate": 1.5214511312846431e-06, "loss": 0.5148, "step": 5907 }, { "epoch": 0.75, "grad_norm": 0.7600710998725176, "learning_rate": 1.5199693059961567e-06, "loss": 0.5607, "step": 5908 }, { "epoch": 0.75, "grad_norm": 0.7350211532263026, "learning_rate": 1.518488073320643e-06, "loss": 0.516, "step": 5909 }, { "epoch": 0.75, "grad_norm": 0.8691960863679342, "learning_rate": 1.5170074335103401e-06, "loss": 0.6209, "step": 5910 }, { "epoch": 0.75, "grad_norm": 0.6507197621832734, "learning_rate": 1.515527386817386e-06, "loss": 0.5324, "step": 5911 }, { "epoch": 0.75, "grad_norm": 0.7250007694966768, "learning_rate": 1.5140479334938153e-06, "loss": 0.4998, "step": 5912 }, { "epoch": 0.75, "grad_norm": 0.9191419850429818, "learning_rate": 1.5125690737915666e-06, "loss": 0.6246, "step": 5913 }, { "epoch": 0.75, "grad_norm": 0.6594747996901251, "learning_rate": 1.5110908079624726e-06, "loss": 0.4985, "step": 5914 }, { "epoch": 0.75, "grad_norm": 0.7284651782080697, "learning_rate": 1.5096131362582673e-06, "loss": 0.5401, "step": 5915 }, { "epoch": 0.75, "grad_norm": 0.6094309389910929, "learning_rate": 1.508136058930582e-06, "loss": 0.5135, "step": 5916 }, { "epoch": 0.75, "grad_norm": 0.7395913441951243, "learning_rate": 1.5066595762309478e-06, "loss": 0.5096, "step": 5917 }, { "epoch": 0.75, "grad_norm": 0.5903688396352945, "learning_rate": 1.5051836884107946e-06, "loss": 0.5078, "step": 5918 }, { "epoch": 0.75, "grad_norm": 0.8216335941071214, "learning_rate": 1.5037083957214505e-06, "loss": 0.64, "step": 5919 }, { "epoch": 0.75, "grad_norm": 0.9325850357672525, "learning_rate": 1.5022336984141428e-06, "loss": 0.6432, "step": 5920 }, { "epoch": 0.75, "grad_norm": 0.9158450409814138, "learning_rate": 1.5007595967399962e-06, "loss": 0.606, "step": 5921 }, { "epoch": 0.75, "grad_norm": 0.8026594844365942, "learning_rate": 1.4992860909500357e-06, "loss": 0.5377, "step": 5922 }, { "epoch": 0.75, "grad_norm": 0.8382445201843135, "learning_rate": 1.4978131812951841e-06, "loss": 0.6215, "step": 5923 }, { "epoch": 0.75, "grad_norm": 0.7805690601597003, "learning_rate": 1.4963408680262614e-06, "loss": 0.5586, "step": 5924 }, { "epoch": 0.75, "grad_norm": 0.7354570274878476, "learning_rate": 1.494869151393989e-06, "loss": 0.493, "step": 5925 }, { "epoch": 0.75, "grad_norm": 0.9198073858996956, "learning_rate": 1.4933980316489832e-06, "loss": 0.6305, "step": 5926 }, { "epoch": 0.76, "grad_norm": 0.6319337327826817, "learning_rate": 1.4919275090417613e-06, "loss": 0.5297, "step": 5927 }, { "epoch": 0.76, "grad_norm": 0.7483555945780317, "learning_rate": 1.490457583822738e-06, "loss": 0.5809, "step": 5928 }, { "epoch": 0.76, "grad_norm": 0.8055112074954867, "learning_rate": 1.4889882562422258e-06, "loss": 0.6433, "step": 5929 }, { "epoch": 0.76, "grad_norm": 0.6800242159719299, "learning_rate": 1.4875195265504362e-06, "loss": 0.5297, "step": 5930 }, { "epoch": 0.76, "grad_norm": 0.6365067854231993, "learning_rate": 1.486051394997478e-06, "loss": 0.5221, "step": 5931 }, { "epoch": 0.76, "grad_norm": 0.7467987320092495, "learning_rate": 1.4845838618333597e-06, "loss": 0.5397, "step": 5932 }, { "epoch": 0.76, "grad_norm": 0.855320509955299, "learning_rate": 1.4831169273079866e-06, "loss": 0.6086, "step": 5933 }, { "epoch": 0.76, "grad_norm": 0.6708267092816687, "learning_rate": 1.4816505916711615e-06, "loss": 0.5175, "step": 5934 }, { "epoch": 0.76, "grad_norm": 0.8761132120671505, "learning_rate": 1.4801848551725872e-06, "loss": 0.6094, "step": 5935 }, { "epoch": 0.76, "grad_norm": 0.7997342589857858, "learning_rate": 1.478719718061863e-06, "loss": 0.5497, "step": 5936 }, { "epoch": 0.76, "grad_norm": 0.7381074437220135, "learning_rate": 1.477255180588486e-06, "loss": 0.6094, "step": 5937 }, { "epoch": 0.76, "grad_norm": 0.8855118778747625, "learning_rate": 1.4757912430018506e-06, "loss": 0.6141, "step": 5938 }, { "epoch": 0.76, "grad_norm": 0.6947700741175422, "learning_rate": 1.4743279055512532e-06, "loss": 0.5302, "step": 5939 }, { "epoch": 0.76, "grad_norm": 0.869442066721817, "learning_rate": 1.4728651684858835e-06, "loss": 0.6398, "step": 5940 }, { "epoch": 0.76, "grad_norm": 0.7379399231132954, "learning_rate": 1.4714030320548301e-06, "loss": 0.5057, "step": 5941 }, { "epoch": 0.76, "grad_norm": 0.7324847319394786, "learning_rate": 1.469941496507079e-06, "loss": 0.553, "step": 5942 }, { "epoch": 0.76, "grad_norm": 0.8215193094263105, "learning_rate": 1.468480562091516e-06, "loss": 0.6425, "step": 5943 }, { "epoch": 0.76, "grad_norm": 0.9034475166490202, "learning_rate": 1.4670202290569218e-06, "loss": 0.6544, "step": 5944 }, { "epoch": 0.76, "grad_norm": 0.8489302259656883, "learning_rate": 1.4655604976519767e-06, "loss": 0.6398, "step": 5945 }, { "epoch": 0.76, "grad_norm": 0.7457195425457996, "learning_rate": 1.4641013681252569e-06, "loss": 0.556, "step": 5946 }, { "epoch": 0.76, "grad_norm": 0.8095512196505711, "learning_rate": 1.4626428407252374e-06, "loss": 0.6222, "step": 5947 }, { "epoch": 0.76, "grad_norm": 0.6767086932911917, "learning_rate": 1.4611849157002899e-06, "loss": 0.5918, "step": 5948 }, { "epoch": 0.76, "grad_norm": 0.7454314079690785, "learning_rate": 1.4597275932986848e-06, "loss": 0.5301, "step": 5949 }, { "epoch": 0.76, "grad_norm": 0.7258126404629276, "learning_rate": 1.458270873768588e-06, "loss": 0.5468, "step": 5950 }, { "epoch": 0.76, "grad_norm": 2.525755271359152, "learning_rate": 1.4568147573580637e-06, "loss": 0.6212, "step": 5951 }, { "epoch": 0.76, "grad_norm": 0.8272288146500956, "learning_rate": 1.4553592443150739e-06, "loss": 0.5482, "step": 5952 }, { "epoch": 0.76, "grad_norm": 0.8266659542980566, "learning_rate": 1.453904334887477e-06, "loss": 0.5841, "step": 5953 }, { "epoch": 0.76, "grad_norm": 0.801343697910673, "learning_rate": 1.4524500293230287e-06, "loss": 0.5963, "step": 5954 }, { "epoch": 0.76, "grad_norm": 0.8870717941376552, "learning_rate": 1.4509963278693828e-06, "loss": 0.6297, "step": 5955 }, { "epoch": 0.76, "grad_norm": 0.8306591252887179, "learning_rate": 1.4495432307740885e-06, "loss": 0.6093, "step": 5956 }, { "epoch": 0.76, "grad_norm": 0.8138944828651896, "learning_rate": 1.4480907382845938e-06, "loss": 0.587, "step": 5957 }, { "epoch": 0.76, "grad_norm": 0.640960443830883, "learning_rate": 1.4466388506482425e-06, "loss": 0.5209, "step": 5958 }, { "epoch": 0.76, "grad_norm": 0.7898516414805914, "learning_rate": 1.4451875681122767e-06, "loss": 0.5377, "step": 5959 }, { "epoch": 0.76, "grad_norm": 0.7042045011578976, "learning_rate": 1.443736890923834e-06, "loss": 0.51, "step": 5960 }, { "epoch": 0.76, "grad_norm": 1.162246867809616, "learning_rate": 1.44228681932995e-06, "loss": 0.6127, "step": 5961 }, { "epoch": 0.76, "grad_norm": 0.6716421491730815, "learning_rate": 1.4408373535775572e-06, "loss": 0.5425, "step": 5962 }, { "epoch": 0.76, "grad_norm": 0.6633333032048389, "learning_rate": 1.4393884939134833e-06, "loss": 0.5727, "step": 5963 }, { "epoch": 0.76, "grad_norm": 0.6608236926032384, "learning_rate": 1.437940240584455e-06, "loss": 0.5267, "step": 5964 }, { "epoch": 0.76, "grad_norm": 0.6330701673801944, "learning_rate": 1.4364925938370926e-06, "loss": 0.5338, "step": 5965 }, { "epoch": 0.76, "grad_norm": 0.7413039913488412, "learning_rate": 1.4350455539179192e-06, "loss": 0.5799, "step": 5966 }, { "epoch": 0.76, "grad_norm": 0.7739790891707063, "learning_rate": 1.433599121073348e-06, "loss": 0.5901, "step": 5967 }, { "epoch": 0.76, "grad_norm": 0.8051481132758258, "learning_rate": 1.4321532955496925e-06, "loss": 0.6134, "step": 5968 }, { "epoch": 0.76, "grad_norm": 0.6783678526297705, "learning_rate": 1.430708077593161e-06, "loss": 0.5712, "step": 5969 }, { "epoch": 0.76, "grad_norm": 0.7877745873297851, "learning_rate": 1.429263467449859e-06, "loss": 0.6028, "step": 5970 }, { "epoch": 0.76, "grad_norm": 0.6191267615792932, "learning_rate": 1.427819465365789e-06, "loss": 0.5352, "step": 5971 }, { "epoch": 0.76, "grad_norm": 0.6849466786355246, "learning_rate": 1.426376071586848e-06, "loss": 0.5357, "step": 5972 }, { "epoch": 0.76, "grad_norm": 0.7603943447595155, "learning_rate": 1.4249332863588334e-06, "loss": 0.5669, "step": 5973 }, { "epoch": 0.76, "grad_norm": 0.7021987874269323, "learning_rate": 1.4234911099274357e-06, "loss": 0.506, "step": 5974 }, { "epoch": 0.76, "grad_norm": 0.8086945062698163, "learning_rate": 1.4220495425382418e-06, "loss": 0.624, "step": 5975 }, { "epoch": 0.76, "grad_norm": 0.7139204168377902, "learning_rate": 1.4206085844367361e-06, "loss": 0.5175, "step": 5976 }, { "epoch": 0.76, "grad_norm": 0.6419213776173835, "learning_rate": 1.419168235868299e-06, "loss": 0.514, "step": 5977 }, { "epoch": 0.76, "grad_norm": 0.7990206802172736, "learning_rate": 1.4177284970782063e-06, "loss": 0.5991, "step": 5978 }, { "epoch": 0.76, "grad_norm": 0.7537870528281434, "learning_rate": 1.4162893683116307e-06, "loss": 0.5055, "step": 5979 }, { "epoch": 0.76, "grad_norm": 0.6320668823822558, "learning_rate": 1.414850849813641e-06, "loss": 0.5349, "step": 5980 }, { "epoch": 0.76, "grad_norm": 0.8177142881926991, "learning_rate": 1.4134129418292013e-06, "loss": 0.5151, "step": 5981 }, { "epoch": 0.76, "grad_norm": 0.7432383161836048, "learning_rate": 1.4119756446031736e-06, "loss": 0.5462, "step": 5982 }, { "epoch": 0.76, "grad_norm": 0.7794214011334519, "learning_rate": 1.410538958380313e-06, "loss": 0.4998, "step": 5983 }, { "epoch": 0.76, "grad_norm": 0.9262414925714734, "learning_rate": 1.4091028834052739e-06, "loss": 0.5719, "step": 5984 }, { "epoch": 0.76, "grad_norm": 0.8293905770682239, "learning_rate": 1.4076674199226037e-06, "loss": 0.6042, "step": 5985 }, { "epoch": 0.76, "grad_norm": 0.6385102480554473, "learning_rate": 1.406232568176747e-06, "loss": 0.5557, "step": 5986 }, { "epoch": 0.76, "grad_norm": 0.6534442623344828, "learning_rate": 1.4047983284120442e-06, "loss": 0.5199, "step": 5987 }, { "epoch": 0.76, "grad_norm": 0.7950501398759092, "learning_rate": 1.403364700872732e-06, "loss": 0.6034, "step": 5988 }, { "epoch": 0.76, "grad_norm": 0.6783707693013732, "learning_rate": 1.4019316858029418e-06, "loss": 0.5231, "step": 5989 }, { "epoch": 0.76, "grad_norm": 0.6888939247448539, "learning_rate": 1.4004992834467006e-06, "loss": 0.5571, "step": 5990 }, { "epoch": 0.76, "grad_norm": 0.7163480834841157, "learning_rate": 1.3990674940479327e-06, "loss": 0.5919, "step": 5991 }, { "epoch": 0.76, "grad_norm": 0.6999541991361229, "learning_rate": 1.397636317850456e-06, "loss": 0.5646, "step": 5992 }, { "epoch": 0.76, "grad_norm": 0.7198008743943831, "learning_rate": 1.396205755097985e-06, "loss": 0.5538, "step": 5993 }, { "epoch": 0.76, "grad_norm": 0.8863364329317801, "learning_rate": 1.3947758060341304e-06, "loss": 0.5875, "step": 5994 }, { "epoch": 0.76, "grad_norm": 0.7468557020009613, "learning_rate": 1.3933464709023964e-06, "loss": 0.5804, "step": 5995 }, { "epoch": 0.76, "grad_norm": 0.8034963130296258, "learning_rate": 1.391917749946185e-06, "loss": 0.6229, "step": 5996 }, { "epoch": 0.76, "grad_norm": 0.8163806876327102, "learning_rate": 1.3904896434087905e-06, "loss": 0.5968, "step": 5997 }, { "epoch": 0.76, "grad_norm": 0.6789758301208867, "learning_rate": 1.3890621515334073e-06, "loss": 0.5049, "step": 5998 }, { "epoch": 0.76, "grad_norm": 0.7316420049627483, "learning_rate": 1.3876352745631216e-06, "loss": 0.5539, "step": 5999 }, { "epoch": 0.76, "grad_norm": 0.8103186910531326, "learning_rate": 1.3862090127409156e-06, "loss": 0.5707, "step": 6000 }, { "epoch": 0.76, "grad_norm": 0.7505970066378608, "learning_rate": 1.384783366309666e-06, "loss": 0.6098, "step": 6001 }, { "epoch": 0.76, "grad_norm": 0.9009395804911083, "learning_rate": 1.383358335512146e-06, "loss": 0.6242, "step": 6002 }, { "epoch": 0.76, "grad_norm": 0.6995677412940682, "learning_rate": 1.3819339205910237e-06, "loss": 0.5266, "step": 6003 }, { "epoch": 0.76, "grad_norm": 0.8429682614789905, "learning_rate": 1.3805101217888623e-06, "loss": 0.5928, "step": 6004 }, { "epoch": 0.77, "grad_norm": 1.9386287022319724, "learning_rate": 1.3790869393481193e-06, "loss": 0.6884, "step": 6005 }, { "epoch": 0.77, "grad_norm": 0.8655067538626704, "learning_rate": 1.3776643735111484e-06, "loss": 0.6475, "step": 6006 }, { "epoch": 0.77, "grad_norm": 0.6201921386862851, "learning_rate": 1.3762424245201972e-06, "loss": 0.529, "step": 6007 }, { "epoch": 0.77, "grad_norm": 0.9047269047415928, "learning_rate": 1.374821092617409e-06, "loss": 0.634, "step": 6008 }, { "epoch": 0.77, "grad_norm": 0.9033827818889651, "learning_rate": 1.3734003780448218e-06, "loss": 0.6544, "step": 6009 }, { "epoch": 0.77, "grad_norm": 0.6054456249163662, "learning_rate": 1.371980281044369e-06, "loss": 0.4544, "step": 6010 }, { "epoch": 0.77, "grad_norm": 0.6426130083170906, "learning_rate": 1.3705608018578776e-06, "loss": 0.5057, "step": 6011 }, { "epoch": 0.77, "grad_norm": 0.8335041603346685, "learning_rate": 1.3691419407270707e-06, "loss": 0.6079, "step": 6012 }, { "epoch": 0.77, "grad_norm": 0.8116543473206832, "learning_rate": 1.3677236978935648e-06, "loss": 0.5973, "step": 6013 }, { "epoch": 0.77, "grad_norm": 1.14300805950097, "learning_rate": 1.3663060735988725e-06, "loss": 0.6529, "step": 6014 }, { "epoch": 0.77, "grad_norm": 0.6419717599229467, "learning_rate": 1.3648890680844007e-06, "loss": 0.5269, "step": 6015 }, { "epoch": 0.77, "grad_norm": 0.6714725660150257, "learning_rate": 1.3634726815914501e-06, "loss": 0.5533, "step": 6016 }, { "epoch": 0.77, "grad_norm": 0.6730175978283861, "learning_rate": 1.3620569143612166e-06, "loss": 0.5096, "step": 6017 }, { "epoch": 0.77, "grad_norm": 0.9308954989709286, "learning_rate": 1.3606417666347909e-06, "loss": 0.6835, "step": 6018 }, { "epoch": 0.77, "grad_norm": 1.054057802258776, "learning_rate": 1.359227238653158e-06, "loss": 0.6338, "step": 6019 }, { "epoch": 0.77, "grad_norm": 0.8259535783866477, "learning_rate": 1.3578133306571966e-06, "loss": 0.6177, "step": 6020 }, { "epoch": 0.77, "grad_norm": 0.9192245356074534, "learning_rate": 1.3564000428876812e-06, "loss": 0.673, "step": 6021 }, { "epoch": 0.77, "grad_norm": 0.8488838025352896, "learning_rate": 1.3549873755852799e-06, "loss": 0.6068, "step": 6022 }, { "epoch": 0.77, "grad_norm": 0.6994533586336426, "learning_rate": 1.3535753289905535e-06, "loss": 0.5368, "step": 6023 }, { "epoch": 0.77, "grad_norm": 0.7897552446075208, "learning_rate": 1.3521639033439614e-06, "loss": 0.5736, "step": 6024 }, { "epoch": 0.77, "grad_norm": 0.7008637813195776, "learning_rate": 1.3507530988858541e-06, "loss": 0.5559, "step": 6025 }, { "epoch": 0.77, "grad_norm": 0.9772917942084419, "learning_rate": 1.3493429158564764e-06, "loss": 0.5947, "step": 6026 }, { "epoch": 0.77, "grad_norm": 0.6172940347657915, "learning_rate": 1.3479333544959682e-06, "loss": 0.5022, "step": 6027 }, { "epoch": 0.77, "grad_norm": 0.9065971768357095, "learning_rate": 1.3465244150443624e-06, "loss": 0.6059, "step": 6028 }, { "epoch": 0.77, "grad_norm": 0.6067229747553409, "learning_rate": 1.3451160977415872e-06, "loss": 0.5293, "step": 6029 }, { "epoch": 0.77, "grad_norm": 0.7250477177365416, "learning_rate": 1.3437084028274643e-06, "loss": 0.5155, "step": 6030 }, { "epoch": 0.77, "grad_norm": 0.8577505396725205, "learning_rate": 1.3423013305417093e-06, "loss": 0.6494, "step": 6031 }, { "epoch": 0.77, "grad_norm": 0.897424399888634, "learning_rate": 1.340894881123932e-06, "loss": 0.6197, "step": 6032 }, { "epoch": 0.77, "grad_norm": 0.6553612072707354, "learning_rate": 1.3394890548136363e-06, "loss": 0.5366, "step": 6033 }, { "epoch": 0.77, "grad_norm": 1.499327744828209, "learning_rate": 1.3380838518502198e-06, "loss": 0.6286, "step": 6034 }, { "epoch": 0.77, "grad_norm": 1.0300526700137789, "learning_rate": 1.3366792724729733e-06, "loss": 0.6168, "step": 6035 }, { "epoch": 0.77, "grad_norm": 0.8075927598443222, "learning_rate": 1.335275316921083e-06, "loss": 0.5699, "step": 6036 }, { "epoch": 0.77, "grad_norm": 0.6597717410366899, "learning_rate": 1.3338719854336275e-06, "loss": 0.5163, "step": 6037 }, { "epoch": 0.77, "grad_norm": 0.6768126377782689, "learning_rate": 1.332469278249579e-06, "loss": 0.5212, "step": 6038 }, { "epoch": 0.77, "grad_norm": 0.733601860471724, "learning_rate": 1.3310671956078041e-06, "loss": 0.5743, "step": 6039 }, { "epoch": 0.77, "grad_norm": 0.8306856898122218, "learning_rate": 1.329665737747064e-06, "loss": 0.6057, "step": 6040 }, { "epoch": 0.77, "grad_norm": 0.7573699753924524, "learning_rate": 1.3282649049060108e-06, "loss": 0.4816, "step": 6041 }, { "epoch": 0.77, "grad_norm": 0.9017700823777901, "learning_rate": 1.3268646973231925e-06, "loss": 0.6299, "step": 6042 }, { "epoch": 0.77, "grad_norm": 0.7422807308761529, "learning_rate": 1.32546511523705e-06, "loss": 0.4954, "step": 6043 }, { "epoch": 0.77, "grad_norm": 0.6916212253163068, "learning_rate": 1.324066158885917e-06, "loss": 0.5329, "step": 6044 }, { "epoch": 0.77, "grad_norm": 0.7149522420996964, "learning_rate": 1.3226678285080224e-06, "loss": 0.59, "step": 6045 }, { "epoch": 0.77, "grad_norm": 0.8108642728190536, "learning_rate": 1.3212701243414856e-06, "loss": 0.565, "step": 6046 }, { "epoch": 0.77, "grad_norm": 0.6915305812833428, "learning_rate": 1.319873046624322e-06, "loss": 0.5244, "step": 6047 }, { "epoch": 0.77, "grad_norm": 0.7590747643299829, "learning_rate": 1.3184765955944395e-06, "loss": 0.6153, "step": 6048 }, { "epoch": 0.77, "grad_norm": 0.8422047896311641, "learning_rate": 1.3170807714896379e-06, "loss": 0.6248, "step": 6049 }, { "epoch": 0.77, "grad_norm": 0.6379720530900855, "learning_rate": 1.3156855745476133e-06, "loss": 0.5017, "step": 6050 }, { "epoch": 0.77, "grad_norm": 0.6845356019667418, "learning_rate": 1.3142910050059527e-06, "loss": 0.5532, "step": 6051 }, { "epoch": 0.77, "grad_norm": 0.6888578405648416, "learning_rate": 1.3128970631021365e-06, "loss": 0.5561, "step": 6052 }, { "epoch": 0.77, "grad_norm": 0.6884702522586105, "learning_rate": 1.3115037490735383e-06, "loss": 0.5815, "step": 6053 }, { "epoch": 0.77, "grad_norm": 0.8503523753587275, "learning_rate": 1.3101110631574255e-06, "loss": 0.6372, "step": 6054 }, { "epoch": 0.77, "grad_norm": 1.0259526281667457, "learning_rate": 1.3087190055909572e-06, "loss": 0.6539, "step": 6055 }, { "epoch": 0.77, "grad_norm": 0.6809943919954881, "learning_rate": 1.307327576611187e-06, "loss": 0.5552, "step": 6056 }, { "epoch": 0.77, "grad_norm": 0.917301632475324, "learning_rate": 1.3059367764550607e-06, "loss": 0.6307, "step": 6057 }, { "epoch": 0.77, "grad_norm": 0.7059629439276569, "learning_rate": 1.304546605359417e-06, "loss": 0.5603, "step": 6058 }, { "epoch": 0.77, "grad_norm": 0.6432175903378218, "learning_rate": 1.303157063560987e-06, "loss": 0.503, "step": 6059 }, { "epoch": 0.77, "grad_norm": 0.8778449999656432, "learning_rate": 1.3017681512963958e-06, "loss": 0.6396, "step": 6060 }, { "epoch": 0.77, "grad_norm": 1.1758783907972818, "learning_rate": 1.3003798688021606e-06, "loss": 0.6056, "step": 6061 }, { "epoch": 0.77, "grad_norm": 0.8032840443700658, "learning_rate": 1.2989922163146913e-06, "loss": 0.5792, "step": 6062 }, { "epoch": 0.77, "grad_norm": 0.8573378852709852, "learning_rate": 1.297605194070291e-06, "loss": 0.5287, "step": 6063 }, { "epoch": 0.77, "grad_norm": 0.8955307700350046, "learning_rate": 1.2962188023051548e-06, "loss": 0.5728, "step": 6064 }, { "epoch": 0.77, "grad_norm": 0.7312917274314831, "learning_rate": 1.2948330412553707e-06, "loss": 0.5635, "step": 6065 }, { "epoch": 0.77, "grad_norm": 1.1079459711111121, "learning_rate": 1.2934479111569192e-06, "loss": 0.6707, "step": 6066 }, { "epoch": 0.77, "grad_norm": 0.7913647319789372, "learning_rate": 1.2920634122456744e-06, "loss": 0.587, "step": 6067 }, { "epoch": 0.77, "grad_norm": 0.7641170184102855, "learning_rate": 1.290679544757401e-06, "loss": 0.5632, "step": 6068 }, { "epoch": 0.77, "grad_norm": 0.6107530012059154, "learning_rate": 1.2892963089277582e-06, "loss": 0.5461, "step": 6069 }, { "epoch": 0.77, "grad_norm": 0.8022260519961586, "learning_rate": 1.2879137049922963e-06, "loss": 0.5554, "step": 6070 }, { "epoch": 0.77, "grad_norm": 0.8562519136056977, "learning_rate": 1.286531733186458e-06, "loss": 0.5647, "step": 6071 }, { "epoch": 0.77, "grad_norm": 0.7715188925563033, "learning_rate": 1.2851503937455785e-06, "loss": 0.5768, "step": 6072 }, { "epoch": 0.77, "grad_norm": 0.9621914822730264, "learning_rate": 1.283769686904885e-06, "loss": 0.5481, "step": 6073 }, { "epoch": 0.77, "grad_norm": 0.7536268707998498, "learning_rate": 1.2823896128994994e-06, "loss": 0.5855, "step": 6074 }, { "epoch": 0.77, "grad_norm": 0.7458044684248323, "learning_rate": 1.281010171964433e-06, "loss": 0.5403, "step": 6075 }, { "epoch": 0.77, "grad_norm": 0.7783001638164412, "learning_rate": 1.2796313643345904e-06, "loss": 0.5517, "step": 6076 }, { "epoch": 0.77, "grad_norm": 0.6916703893905545, "learning_rate": 1.278253190244768e-06, "loss": 0.5168, "step": 6077 }, { "epoch": 0.77, "grad_norm": 0.6027453925260882, "learning_rate": 1.276875649929654e-06, "loss": 0.5027, "step": 6078 }, { "epoch": 0.77, "grad_norm": 0.7189046753105177, "learning_rate": 1.2754987436238297e-06, "loss": 0.604, "step": 6079 }, { "epoch": 0.77, "grad_norm": 0.7314290936631835, "learning_rate": 1.2741224715617684e-06, "loss": 0.5448, "step": 6080 }, { "epoch": 0.77, "grad_norm": 0.9882015105342422, "learning_rate": 1.272746833977832e-06, "loss": 0.6289, "step": 6081 }, { "epoch": 0.77, "grad_norm": 1.1733530989755303, "learning_rate": 1.2713718311062817e-06, "loss": 0.5985, "step": 6082 }, { "epoch": 0.77, "grad_norm": 0.8205227298903993, "learning_rate": 1.2699974631812633e-06, "loss": 0.6079, "step": 6083 }, { "epoch": 0.78, "grad_norm": 0.8755323457447933, "learning_rate": 1.2686237304368188e-06, "loss": 0.5269, "step": 6084 }, { "epoch": 0.78, "grad_norm": 0.8397324892310829, "learning_rate": 1.267250633106879e-06, "loss": 0.619, "step": 6085 }, { "epoch": 0.78, "grad_norm": 0.8917042817078477, "learning_rate": 1.2658781714252693e-06, "loss": 0.5459, "step": 6086 }, { "epoch": 0.78, "grad_norm": 0.6471386347382809, "learning_rate": 1.2645063456257055e-06, "loss": 0.5207, "step": 6087 }, { "epoch": 0.78, "grad_norm": 0.8245716489820528, "learning_rate": 1.2631351559417947e-06, "loss": 0.6071, "step": 6088 }, { "epoch": 0.78, "grad_norm": 0.6960216053843672, "learning_rate": 1.2617646026070362e-06, "loss": 0.5307, "step": 6089 }, { "epoch": 0.78, "grad_norm": 0.7124612574024674, "learning_rate": 1.2603946858548215e-06, "loss": 0.5178, "step": 6090 }, { "epoch": 0.78, "grad_norm": 0.9009129538684394, "learning_rate": 1.2590254059184326e-06, "loss": 0.6917, "step": 6091 }, { "epoch": 0.78, "grad_norm": 0.831602385656947, "learning_rate": 1.2576567630310444e-06, "loss": 0.5843, "step": 6092 }, { "epoch": 0.78, "grad_norm": 0.7843621436884425, "learning_rate": 1.2562887574257216e-06, "loss": 0.5481, "step": 6093 }, { "epoch": 0.78, "grad_norm": 0.6475240337273345, "learning_rate": 1.2549213893354217e-06, "loss": 0.5362, "step": 6094 }, { "epoch": 0.78, "grad_norm": 0.7665872381775558, "learning_rate": 1.253554658992993e-06, "loss": 0.5648, "step": 6095 }, { "epoch": 0.78, "grad_norm": 0.6887918095354533, "learning_rate": 1.2521885666311762e-06, "loss": 0.5227, "step": 6096 }, { "epoch": 0.78, "grad_norm": 0.772910762175124, "learning_rate": 1.250823112482602e-06, "loss": 0.5388, "step": 6097 }, { "epoch": 0.78, "grad_norm": 0.8809272504283993, "learning_rate": 1.2494582967797931e-06, "loss": 0.6369, "step": 6098 }, { "epoch": 0.78, "grad_norm": 0.8148488024848227, "learning_rate": 1.2480941197551632e-06, "loss": 0.6084, "step": 6099 }, { "epoch": 0.78, "grad_norm": 0.8672361455040535, "learning_rate": 1.246730581641018e-06, "loss": 0.6437, "step": 6100 }, { "epoch": 0.78, "grad_norm": 0.6500809168369878, "learning_rate": 1.2453676826695531e-06, "loss": 0.4997, "step": 6101 }, { "epoch": 0.78, "grad_norm": 0.8634292658895205, "learning_rate": 1.2440054230728566e-06, "loss": 0.4912, "step": 6102 }, { "epoch": 0.78, "grad_norm": 0.9513206620762039, "learning_rate": 1.2426438030829069e-06, "loss": 0.6235, "step": 6103 }, { "epoch": 0.78, "grad_norm": 0.7008496110883633, "learning_rate": 1.2412828229315732e-06, "loss": 0.5486, "step": 6104 }, { "epoch": 0.78, "grad_norm": 0.7969223573520062, "learning_rate": 1.2399224828506174e-06, "loss": 0.5301, "step": 6105 }, { "epoch": 0.78, "grad_norm": 0.6922187346291012, "learning_rate": 1.23856278307169e-06, "loss": 0.6377, "step": 6106 }, { "epoch": 0.78, "grad_norm": 0.6771003714311855, "learning_rate": 1.2372037238263347e-06, "loss": 0.5347, "step": 6107 }, { "epoch": 0.78, "grad_norm": 0.7944169316741196, "learning_rate": 1.235845305345983e-06, "loss": 0.6368, "step": 6108 }, { "epoch": 0.78, "grad_norm": 0.9537648243619598, "learning_rate": 1.2344875278619627e-06, "loss": 0.6433, "step": 6109 }, { "epoch": 0.78, "grad_norm": 0.683887256780697, "learning_rate": 1.2331303916054877e-06, "loss": 0.5161, "step": 6110 }, { "epoch": 0.78, "grad_norm": 0.6484298106276131, "learning_rate": 1.2317738968076637e-06, "loss": 0.5986, "step": 6111 }, { "epoch": 0.78, "grad_norm": 0.668913991741974, "learning_rate": 1.2304180436994879e-06, "loss": 0.4905, "step": 6112 }, { "epoch": 0.78, "grad_norm": 0.9075030069616917, "learning_rate": 1.2290628325118481e-06, "loss": 0.6123, "step": 6113 }, { "epoch": 0.78, "grad_norm": 0.820641780130014, "learning_rate": 1.2277082634755234e-06, "loss": 0.6551, "step": 6114 }, { "epoch": 0.78, "grad_norm": 0.7252927123222411, "learning_rate": 1.226354336821181e-06, "loss": 0.5099, "step": 6115 }, { "epoch": 0.78, "grad_norm": 0.7932228393447641, "learning_rate": 1.2250010527793816e-06, "loss": 0.5421, "step": 6116 }, { "epoch": 0.78, "grad_norm": 0.6619852549126756, "learning_rate": 1.2236484115805758e-06, "loss": 0.559, "step": 6117 }, { "epoch": 0.78, "grad_norm": 0.7134715300268349, "learning_rate": 1.2222964134551035e-06, "loss": 0.553, "step": 6118 }, { "epoch": 0.78, "grad_norm": 0.8310633908193823, "learning_rate": 1.220945058633196e-06, "loss": 0.5238, "step": 6119 }, { "epoch": 0.78, "grad_norm": 0.8964118519201913, "learning_rate": 1.219594347344975e-06, "loss": 0.6094, "step": 6120 }, { "epoch": 0.78, "grad_norm": 0.59983042072799, "learning_rate": 1.218244279820453e-06, "loss": 0.5219, "step": 6121 }, { "epoch": 0.78, "grad_norm": 0.8496722176316378, "learning_rate": 1.216894856289532e-06, "loss": 0.663, "step": 6122 }, { "epoch": 0.78, "grad_norm": 0.7057927658769098, "learning_rate": 1.2155460769820048e-06, "loss": 0.5365, "step": 6123 }, { "epoch": 0.78, "grad_norm": 0.8619764085141581, "learning_rate": 1.2141979421275546e-06, "loss": 0.6533, "step": 6124 }, { "epoch": 0.78, "grad_norm": 0.6375408180022438, "learning_rate": 1.2128504519557543e-06, "loss": 0.4946, "step": 6125 }, { "epoch": 0.78, "grad_norm": 0.8717052967470488, "learning_rate": 1.2115036066960683e-06, "loss": 0.6652, "step": 6126 }, { "epoch": 0.78, "grad_norm": 0.7345593500747517, "learning_rate": 1.2101574065778498e-06, "loss": 0.5657, "step": 6127 }, { "epoch": 0.78, "grad_norm": 0.8940010362256054, "learning_rate": 1.2088118518303427e-06, "loss": 0.5514, "step": 6128 }, { "epoch": 0.78, "grad_norm": 0.8366021031673786, "learning_rate": 1.2074669426826806e-06, "loss": 0.6265, "step": 6129 }, { "epoch": 0.78, "grad_norm": 0.8790434193697267, "learning_rate": 1.206122679363888e-06, "loss": 0.6509, "step": 6130 }, { "epoch": 0.78, "grad_norm": 0.7848583893105114, "learning_rate": 1.2047790621028787e-06, "loss": 0.5662, "step": 6131 }, { "epoch": 0.78, "grad_norm": 0.9662591986593232, "learning_rate": 1.2034360911284565e-06, "loss": 0.6134, "step": 6132 }, { "epoch": 0.78, "grad_norm": 0.9108502169622835, "learning_rate": 1.202093766669316e-06, "loss": 0.6101, "step": 6133 }, { "epoch": 0.78, "grad_norm": 0.7168246131007312, "learning_rate": 1.2007520889540397e-06, "loss": 0.4855, "step": 6134 }, { "epoch": 0.78, "grad_norm": 0.8138976553363829, "learning_rate": 1.1994110582111028e-06, "loss": 0.6007, "step": 6135 }, { "epoch": 0.78, "grad_norm": 0.81512892558438, "learning_rate": 1.1980706746688692e-06, "loss": 0.6187, "step": 6136 }, { "epoch": 0.78, "grad_norm": 0.8751543934466348, "learning_rate": 1.1967309385555915e-06, "loss": 0.6029, "step": 6137 }, { "epoch": 0.78, "grad_norm": 0.8723820464892906, "learning_rate": 1.1953918500994128e-06, "loss": 0.6412, "step": 6138 }, { "epoch": 0.78, "grad_norm": 0.6816850317062958, "learning_rate": 1.1940534095283656e-06, "loss": 0.4984, "step": 6139 }, { "epoch": 0.78, "grad_norm": 0.831417523076056, "learning_rate": 1.192715617070373e-06, "loss": 0.6724, "step": 6140 }, { "epoch": 0.78, "grad_norm": 0.9538029269609437, "learning_rate": 1.1913784729532462e-06, "loss": 0.6131, "step": 6141 }, { "epoch": 0.78, "grad_norm": 0.6938557371563928, "learning_rate": 1.1900419774046884e-06, "loss": 0.5261, "step": 6142 }, { "epoch": 0.78, "grad_norm": 0.8159928255872908, "learning_rate": 1.1887061306522896e-06, "loss": 0.6192, "step": 6143 }, { "epoch": 0.78, "grad_norm": 0.7190714993213324, "learning_rate": 1.1873709329235305e-06, "loss": 0.5821, "step": 6144 }, { "epoch": 0.78, "grad_norm": 0.808213704877347, "learning_rate": 1.1860363844457822e-06, "loss": 0.6252, "step": 6145 }, { "epoch": 0.78, "grad_norm": 0.7697640215178182, "learning_rate": 1.1847024854463047e-06, "loss": 0.6021, "step": 6146 }, { "epoch": 0.78, "grad_norm": 1.0060601802753137, "learning_rate": 1.1833692361522459e-06, "loss": 0.5979, "step": 6147 }, { "epoch": 0.78, "grad_norm": 0.7098569053601365, "learning_rate": 1.1820366367906445e-06, "loss": 0.5517, "step": 6148 }, { "epoch": 0.78, "grad_norm": 0.679607499865355, "learning_rate": 1.1807046875884286e-06, "loss": 0.5961, "step": 6149 }, { "epoch": 0.78, "grad_norm": 0.8495271095191237, "learning_rate": 1.1793733887724156e-06, "loss": 0.6253, "step": 6150 }, { "epoch": 0.78, "grad_norm": 0.696843863261326, "learning_rate": 1.1780427405693118e-06, "loss": 0.557, "step": 6151 }, { "epoch": 0.78, "grad_norm": 0.8612366803768845, "learning_rate": 1.1767127432057117e-06, "loss": 0.6076, "step": 6152 }, { "epoch": 0.78, "grad_norm": 0.614657376907369, "learning_rate": 1.1753833969081014e-06, "loss": 0.5288, "step": 6153 }, { "epoch": 0.78, "grad_norm": 0.7378628876834291, "learning_rate": 1.1740547019028537e-06, "loss": 0.6064, "step": 6154 }, { "epoch": 0.78, "grad_norm": 0.6604945264808391, "learning_rate": 1.1727266584162317e-06, "loss": 0.5242, "step": 6155 }, { "epoch": 0.78, "grad_norm": 0.7281271477432399, "learning_rate": 1.171399266674388e-06, "loss": 0.6234, "step": 6156 }, { "epoch": 0.78, "grad_norm": 0.852274518881763, "learning_rate": 1.1700725269033624e-06, "loss": 0.5503, "step": 6157 }, { "epoch": 0.78, "grad_norm": 0.7497403266504802, "learning_rate": 1.168746439329086e-06, "loss": 0.6508, "step": 6158 }, { "epoch": 0.78, "grad_norm": 0.8439379266791349, "learning_rate": 1.1674210041773776e-06, "loss": 0.6128, "step": 6159 }, { "epoch": 0.78, "grad_norm": 0.8195612851552496, "learning_rate": 1.1660962216739425e-06, "loss": 0.5712, "step": 6160 }, { "epoch": 0.78, "grad_norm": 0.988188847067404, "learning_rate": 1.1647720920443816e-06, "loss": 0.655, "step": 6161 }, { "epoch": 0.79, "grad_norm": 0.7851582447545722, "learning_rate": 1.1634486155141778e-06, "loss": 0.5413, "step": 6162 }, { "epoch": 0.79, "grad_norm": 0.9486162560621779, "learning_rate": 1.1621257923087065e-06, "loss": 0.6147, "step": 6163 }, { "epoch": 0.79, "grad_norm": 0.9150414034760967, "learning_rate": 1.1608036226532292e-06, "loss": 0.6156, "step": 6164 }, { "epoch": 0.79, "grad_norm": 0.6821148013297105, "learning_rate": 1.1594821067728995e-06, "loss": 0.4879, "step": 6165 }, { "epoch": 0.79, "grad_norm": 0.6288667761698284, "learning_rate": 1.158161244892756e-06, "loss": 0.4646, "step": 6166 }, { "epoch": 0.79, "grad_norm": 0.6656312099453875, "learning_rate": 1.1568410372377293e-06, "loss": 0.5492, "step": 6167 }, { "epoch": 0.79, "grad_norm": 0.6785429332951738, "learning_rate": 1.1555214840326355e-06, "loss": 0.496, "step": 6168 }, { "epoch": 0.79, "grad_norm": 0.7134932405240115, "learning_rate": 1.1542025855021815e-06, "loss": 0.5491, "step": 6169 }, { "epoch": 0.79, "grad_norm": 0.6603433432560484, "learning_rate": 1.1528843418709623e-06, "loss": 0.5253, "step": 6170 }, { "epoch": 0.79, "grad_norm": 0.8306929964653622, "learning_rate": 1.1515667533634607e-06, "loss": 0.61, "step": 6171 }, { "epoch": 0.79, "grad_norm": 0.7710547110438372, "learning_rate": 1.150249820204048e-06, "loss": 0.58, "step": 6172 }, { "epoch": 0.79, "grad_norm": 0.9072139986073352, "learning_rate": 1.148933542616985e-06, "loss": 0.6498, "step": 6173 }, { "epoch": 0.79, "grad_norm": 0.7795215662855888, "learning_rate": 1.1476179208264194e-06, "loss": 0.623, "step": 6174 }, { "epoch": 0.79, "grad_norm": 0.8515107013953397, "learning_rate": 1.1463029550563865e-06, "loss": 0.662, "step": 6175 }, { "epoch": 0.79, "grad_norm": 0.7988210288360347, "learning_rate": 1.144988645530814e-06, "loss": 0.5976, "step": 6176 }, { "epoch": 0.79, "grad_norm": 0.6430245707432374, "learning_rate": 1.1436749924735142e-06, "loss": 0.535, "step": 6177 }, { "epoch": 0.79, "grad_norm": 0.8144922769304165, "learning_rate": 1.1423619961081884e-06, "loss": 0.621, "step": 6178 }, { "epoch": 0.79, "grad_norm": 0.7654573309480317, "learning_rate": 1.1410496566584261e-06, "loss": 0.5927, "step": 6179 }, { "epoch": 0.79, "grad_norm": 0.8609749546291151, "learning_rate": 1.1397379743477049e-06, "loss": 0.6713, "step": 6180 }, { "epoch": 0.79, "grad_norm": 0.7678564312715521, "learning_rate": 1.1384269493993904e-06, "loss": 0.595, "step": 6181 }, { "epoch": 0.79, "grad_norm": 0.7057751995240582, "learning_rate": 1.1371165820367374e-06, "loss": 0.4971, "step": 6182 }, { "epoch": 0.79, "grad_norm": 0.6928187456092068, "learning_rate": 1.1358068724828869e-06, "loss": 0.5229, "step": 6183 }, { "epoch": 0.79, "grad_norm": 0.6949700436378984, "learning_rate": 1.1344978209608693e-06, "loss": 0.5015, "step": 6184 }, { "epoch": 0.79, "grad_norm": 0.7633083830399727, "learning_rate": 1.1331894276936023e-06, "loss": 0.638, "step": 6185 }, { "epoch": 0.79, "grad_norm": 0.8868990583661809, "learning_rate": 1.1318816929038917e-06, "loss": 0.6378, "step": 6186 }, { "epoch": 0.79, "grad_norm": 0.7482377697075585, "learning_rate": 1.1305746168144311e-06, "loss": 0.5627, "step": 6187 }, { "epoch": 0.79, "grad_norm": 0.8106571710188469, "learning_rate": 1.1292681996478016e-06, "loss": 0.6062, "step": 6188 }, { "epoch": 0.79, "grad_norm": 0.881086958854107, "learning_rate": 1.1279624416264728e-06, "loss": 0.6472, "step": 6189 }, { "epoch": 0.79, "grad_norm": 0.9418688480993389, "learning_rate": 1.1266573429728017e-06, "loss": 0.5853, "step": 6190 }, { "epoch": 0.79, "grad_norm": 0.8565746535553717, "learning_rate": 1.1253529039090328e-06, "loss": 0.6326, "step": 6191 }, { "epoch": 0.79, "grad_norm": 0.8673629367375326, "learning_rate": 1.124049124657297e-06, "loss": 0.5691, "step": 6192 }, { "epoch": 0.79, "grad_norm": 0.7131959255254904, "learning_rate": 1.1227460054396177e-06, "loss": 0.5023, "step": 6193 }, { "epoch": 0.79, "grad_norm": 0.6438808413111474, "learning_rate": 1.1214435464779006e-06, "loss": 0.4696, "step": 6194 }, { "epoch": 0.79, "grad_norm": 0.629225317410022, "learning_rate": 1.1201417479939409e-06, "loss": 0.5351, "step": 6195 }, { "epoch": 0.79, "grad_norm": 0.9578969302653552, "learning_rate": 1.1188406102094212e-06, "loss": 0.6284, "step": 6196 }, { "epoch": 0.79, "grad_norm": 0.6630072380433006, "learning_rate": 1.1175401333459125e-06, "loss": 0.5373, "step": 6197 }, { "epoch": 0.79, "grad_norm": 0.9070015214670604, "learning_rate": 1.1162403176248715e-06, "loss": 0.5891, "step": 6198 }, { "epoch": 0.79, "grad_norm": 0.5866880757374822, "learning_rate": 1.1149411632676438e-06, "loss": 0.4916, "step": 6199 }, { "epoch": 0.79, "grad_norm": 0.58782424096755, "learning_rate": 1.1136426704954622e-06, "loss": 0.4622, "step": 6200 }, { "epoch": 0.79, "grad_norm": 0.8056465797658702, "learning_rate": 1.1123448395294456e-06, "loss": 0.5852, "step": 6201 }, { "epoch": 0.79, "grad_norm": 0.8645160049101256, "learning_rate": 1.1110476705906015e-06, "loss": 0.6357, "step": 6202 }, { "epoch": 0.79, "grad_norm": 0.731765507538455, "learning_rate": 1.1097511638998242e-06, "loss": 0.5594, "step": 6203 }, { "epoch": 0.79, "grad_norm": 0.7261411816489569, "learning_rate": 1.1084553196778958e-06, "loss": 0.5415, "step": 6204 }, { "epoch": 0.79, "grad_norm": 0.848535599717028, "learning_rate": 1.1071601381454845e-06, "loss": 0.5527, "step": 6205 }, { "epoch": 0.79, "grad_norm": 0.8321029980681731, "learning_rate": 1.105865619523146e-06, "loss": 0.6308, "step": 6206 }, { "epoch": 0.79, "grad_norm": 0.9830634768572302, "learning_rate": 1.1045717640313242e-06, "loss": 0.6878, "step": 6207 }, { "epoch": 0.79, "grad_norm": 0.7405648641021086, "learning_rate": 1.1032785718903483e-06, "loss": 0.5296, "step": 6208 }, { "epoch": 0.79, "grad_norm": 0.7601487978703901, "learning_rate": 1.101986043320436e-06, "loss": 0.6108, "step": 6209 }, { "epoch": 0.79, "grad_norm": 0.8406239980286435, "learning_rate": 1.1006941785416913e-06, "loss": 0.6328, "step": 6210 }, { "epoch": 0.79, "grad_norm": 0.7530948194857948, "learning_rate": 1.099402977774105e-06, "loss": 0.541, "step": 6211 }, { "epoch": 0.79, "grad_norm": 0.6766893753807784, "learning_rate": 1.098112441237556e-06, "loss": 0.5138, "step": 6212 }, { "epoch": 0.79, "grad_norm": 0.6519095171781057, "learning_rate": 1.0968225691518087e-06, "loss": 0.513, "step": 6213 }, { "epoch": 0.79, "grad_norm": 0.8115321958296825, "learning_rate": 1.0955333617365143e-06, "loss": 0.5079, "step": 6214 }, { "epoch": 0.79, "grad_norm": 0.6460120675259842, "learning_rate": 1.0942448192112126e-06, "loss": 0.5183, "step": 6215 }, { "epoch": 0.79, "grad_norm": 0.6380077886955908, "learning_rate": 1.0929569417953279e-06, "loss": 0.4961, "step": 6216 }, { "epoch": 0.79, "grad_norm": 0.6552424049911305, "learning_rate": 1.091669729708173e-06, "loss": 0.5324, "step": 6217 }, { "epoch": 0.79, "grad_norm": 0.9788464200653787, "learning_rate": 1.090383183168946e-06, "loss": 0.6374, "step": 6218 }, { "epoch": 0.79, "grad_norm": 0.8082605448167149, "learning_rate": 1.0890973023967316e-06, "loss": 0.5505, "step": 6219 }, { "epoch": 0.79, "grad_norm": 0.6598846093612981, "learning_rate": 1.0878120876105048e-06, "loss": 0.448, "step": 6220 }, { "epoch": 0.79, "grad_norm": 0.9606216679552523, "learning_rate": 1.0865275390291224e-06, "loss": 0.6129, "step": 6221 }, { "epoch": 0.79, "grad_norm": 0.7190480441961784, "learning_rate": 1.0852436568713298e-06, "loss": 0.5212, "step": 6222 }, { "epoch": 0.79, "grad_norm": 0.6236530283726269, "learning_rate": 1.0839604413557586e-06, "loss": 0.5166, "step": 6223 }, { "epoch": 0.79, "grad_norm": 0.8125470559413996, "learning_rate": 1.082677892700928e-06, "loss": 0.6029, "step": 6224 }, { "epoch": 0.79, "grad_norm": 0.7001503441112323, "learning_rate": 1.0813960111252414e-06, "loss": 0.549, "step": 6225 }, { "epoch": 0.79, "grad_norm": 0.7816738434130006, "learning_rate": 1.0801147968469899e-06, "loss": 0.635, "step": 6226 }, { "epoch": 0.79, "grad_norm": 0.7835350219471248, "learning_rate": 1.0788342500843523e-06, "loss": 0.6075, "step": 6227 }, { "epoch": 0.79, "grad_norm": 0.6754054734801362, "learning_rate": 1.077554371055391e-06, "loss": 0.5141, "step": 6228 }, { "epoch": 0.79, "grad_norm": 0.9462088514474958, "learning_rate": 1.0762751599780568e-06, "loss": 0.6178, "step": 6229 }, { "epoch": 0.79, "grad_norm": 0.9549454473405544, "learning_rate": 1.0749966170701853e-06, "loss": 0.6146, "step": 6230 }, { "epoch": 0.79, "grad_norm": 0.6648093085411874, "learning_rate": 1.0737187425495e-06, "loss": 0.4686, "step": 6231 }, { "epoch": 0.79, "grad_norm": 0.8424240595325632, "learning_rate": 1.0724415366336094e-06, "loss": 0.6633, "step": 6232 }, { "epoch": 0.79, "grad_norm": 0.8061080345778929, "learning_rate": 1.0711649995400074e-06, "loss": 0.5888, "step": 6233 }, { "epoch": 0.79, "grad_norm": 0.7234579542316615, "learning_rate": 1.069889131486076e-06, "loss": 0.6015, "step": 6234 }, { "epoch": 0.79, "grad_norm": 0.853396978446328, "learning_rate": 1.068613932689082e-06, "loss": 0.6162, "step": 6235 }, { "epoch": 0.79, "grad_norm": 0.8172353131822218, "learning_rate": 1.0673394033661783e-06, "loss": 0.5776, "step": 6236 }, { "epoch": 0.79, "grad_norm": 0.8205851146633035, "learning_rate": 1.0660655437344041e-06, "loss": 0.594, "step": 6237 }, { "epoch": 0.79, "grad_norm": 1.011073369699019, "learning_rate": 1.0647923540106842e-06, "loss": 0.6244, "step": 6238 }, { "epoch": 0.79, "grad_norm": 0.9431651575566694, "learning_rate": 1.0635198344118298e-06, "loss": 0.563, "step": 6239 }, { "epoch": 0.79, "grad_norm": 0.7203332072907978, "learning_rate": 1.0622479851545376e-06, "loss": 0.5172, "step": 6240 }, { "epoch": 0.8, "grad_norm": 1.5330732846790553, "learning_rate": 1.0609768064553906e-06, "loss": 0.6169, "step": 6241 }, { "epoch": 0.8, "grad_norm": 0.7922575471273464, "learning_rate": 1.0597062985308565e-06, "loss": 0.6055, "step": 6242 }, { "epoch": 0.8, "grad_norm": 0.8960165388719186, "learning_rate": 1.058436461597291e-06, "loss": 0.607, "step": 6243 }, { "epoch": 0.8, "grad_norm": 0.7009826156987234, "learning_rate": 1.0571672958709323e-06, "loss": 0.4863, "step": 6244 }, { "epoch": 0.8, "grad_norm": 0.8613762557827476, "learning_rate": 1.0558988015679056e-06, "loss": 0.5474, "step": 6245 }, { "epoch": 0.8, "grad_norm": 0.5716277494162099, "learning_rate": 1.0546309789042252e-06, "loss": 0.4953, "step": 6246 }, { "epoch": 0.8, "grad_norm": 0.8906396133121085, "learning_rate": 1.053363828095787e-06, "loss": 0.6278, "step": 6247 }, { "epoch": 0.8, "grad_norm": 0.614837720931292, "learning_rate": 1.0520973493583725e-06, "loss": 0.527, "step": 6248 }, { "epoch": 0.8, "grad_norm": 0.6737021852045152, "learning_rate": 1.0508315429076509e-06, "loss": 0.5392, "step": 6249 }, { "epoch": 0.8, "grad_norm": 0.8202359192341226, "learning_rate": 1.049566408959175e-06, "loss": 0.6121, "step": 6250 }, { "epoch": 0.8, "grad_norm": 0.7362648230998704, "learning_rate": 1.0483019477283845e-06, "loss": 0.596, "step": 6251 }, { "epoch": 0.8, "grad_norm": 0.8718961809706266, "learning_rate": 1.047038159430604e-06, "loss": 0.6618, "step": 6252 }, { "epoch": 0.8, "grad_norm": 0.8422450533487897, "learning_rate": 1.0457750442810433e-06, "loss": 0.5744, "step": 6253 }, { "epoch": 0.8, "grad_norm": 0.6586926569265514, "learning_rate": 1.0445126024947971e-06, "loss": 0.5133, "step": 6254 }, { "epoch": 0.8, "grad_norm": 0.684533044519361, "learning_rate": 1.0432508342868475e-06, "loss": 0.5882, "step": 6255 }, { "epoch": 0.8, "grad_norm": 0.7911646447510978, "learning_rate": 1.041989739872059e-06, "loss": 0.5693, "step": 6256 }, { "epoch": 0.8, "grad_norm": 1.2598379082550029, "learning_rate": 1.0407293194651841e-06, "loss": 0.6607, "step": 6257 }, { "epoch": 0.8, "grad_norm": 0.7961612572007144, "learning_rate": 1.0394695732808586e-06, "loss": 0.5328, "step": 6258 }, { "epoch": 0.8, "grad_norm": 0.6672435034180854, "learning_rate": 1.0382105015336042e-06, "loss": 0.5146, "step": 6259 }, { "epoch": 0.8, "grad_norm": 0.7198639090313526, "learning_rate": 1.0369521044378278e-06, "loss": 0.5718, "step": 6260 }, { "epoch": 0.8, "grad_norm": 0.8923439659012693, "learning_rate": 1.0356943822078213e-06, "loss": 0.5887, "step": 6261 }, { "epoch": 0.8, "grad_norm": 0.995128344461852, "learning_rate": 1.034437335057762e-06, "loss": 0.6101, "step": 6262 }, { "epoch": 0.8, "grad_norm": 0.6566195268725807, "learning_rate": 1.033180963201711e-06, "loss": 0.5229, "step": 6263 }, { "epoch": 0.8, "grad_norm": 0.820809713397241, "learning_rate": 1.031925266853616e-06, "loss": 0.6099, "step": 6264 }, { "epoch": 0.8, "grad_norm": 0.7802241174866805, "learning_rate": 1.0306702462273093e-06, "loss": 0.5923, "step": 6265 }, { "epoch": 0.8, "grad_norm": 0.8156660113492312, "learning_rate": 1.0294159015365073e-06, "loss": 0.5571, "step": 6266 }, { "epoch": 0.8, "grad_norm": 0.682182596830582, "learning_rate": 1.0281622329948122e-06, "loss": 0.5005, "step": 6267 }, { "epoch": 0.8, "grad_norm": 0.6995287117136341, "learning_rate": 1.0269092408157104e-06, "loss": 0.502, "step": 6268 }, { "epoch": 0.8, "grad_norm": 0.8567453344893206, "learning_rate": 1.0256569252125736e-06, "loss": 0.6122, "step": 6269 }, { "epoch": 0.8, "grad_norm": 0.7311172055675106, "learning_rate": 1.0244052863986575e-06, "loss": 0.5522, "step": 6270 }, { "epoch": 0.8, "grad_norm": 0.8766170310779489, "learning_rate": 1.0231543245871028e-06, "loss": 0.5918, "step": 6271 }, { "epoch": 0.8, "grad_norm": 0.713738796145529, "learning_rate": 1.0219040399909375e-06, "loss": 0.5271, "step": 6272 }, { "epoch": 0.8, "grad_norm": 0.9568214224760472, "learning_rate": 1.0206544328230705e-06, "loss": 0.5951, "step": 6273 }, { "epoch": 0.8, "grad_norm": 0.8079660719584748, "learning_rate": 1.0194055032962973e-06, "loss": 0.6152, "step": 6274 }, { "epoch": 0.8, "grad_norm": 0.832200353042896, "learning_rate": 1.0181572516232973e-06, "loss": 0.6097, "step": 6275 }, { "epoch": 0.8, "grad_norm": 0.7485972629506407, "learning_rate": 1.0169096780166348e-06, "loss": 0.5347, "step": 6276 }, { "epoch": 0.8, "grad_norm": 0.7655463644938711, "learning_rate": 1.0156627826887572e-06, "loss": 0.5326, "step": 6277 }, { "epoch": 0.8, "grad_norm": 0.6501270495038894, "learning_rate": 1.0144165658520006e-06, "loss": 0.5254, "step": 6278 }, { "epoch": 0.8, "grad_norm": 0.6745780888093885, "learning_rate": 1.0131710277185814e-06, "loss": 0.537, "step": 6279 }, { "epoch": 0.8, "grad_norm": 0.7763817561059477, "learning_rate": 1.0119261685006016e-06, "loss": 0.6129, "step": 6280 }, { "epoch": 0.8, "grad_norm": 0.6481494702559437, "learning_rate": 1.0106819884100478e-06, "loss": 0.5146, "step": 6281 }, { "epoch": 0.8, "grad_norm": 0.8744103792126252, "learning_rate": 1.0094384876587909e-06, "loss": 0.6936, "step": 6282 }, { "epoch": 0.8, "grad_norm": 0.8081329439939369, "learning_rate": 1.0081956664585863e-06, "loss": 0.5273, "step": 6283 }, { "epoch": 0.8, "grad_norm": 0.8218241318554043, "learning_rate": 1.0069535250210733e-06, "loss": 0.5723, "step": 6284 }, { "epoch": 0.8, "grad_norm": 0.8323508997339945, "learning_rate": 1.005712063557776e-06, "loss": 0.5859, "step": 6285 }, { "epoch": 0.8, "grad_norm": 0.6833483456005509, "learning_rate": 1.0044712822801018e-06, "loss": 0.5094, "step": 6286 }, { "epoch": 0.8, "grad_norm": 0.7376026633247432, "learning_rate": 1.0032311813993433e-06, "loss": 0.5705, "step": 6287 }, { "epoch": 0.8, "grad_norm": 0.6637827017026487, "learning_rate": 1.001991761126676e-06, "loss": 0.531, "step": 6288 }, { "epoch": 0.8, "grad_norm": 0.7060537044332207, "learning_rate": 1.0007530216731614e-06, "loss": 0.5112, "step": 6289 }, { "epoch": 0.8, "grad_norm": 0.7486848973427744, "learning_rate": 9.995149632497435e-07, "loss": 0.5348, "step": 6290 }, { "epoch": 0.8, "grad_norm": 0.713976176966678, "learning_rate": 9.982775860672507e-07, "loss": 0.5323, "step": 6291 }, { "epoch": 0.8, "grad_norm": 0.9228682757431986, "learning_rate": 9.97040890336395e-07, "loss": 0.6437, "step": 6292 }, { "epoch": 0.8, "grad_norm": 0.8681026717342937, "learning_rate": 9.95804876267773e-07, "loss": 0.6467, "step": 6293 }, { "epoch": 0.8, "grad_norm": 0.891720177211184, "learning_rate": 9.94569544071865e-07, "loss": 0.6566, "step": 6294 }, { "epoch": 0.8, "grad_norm": 0.7155541886546622, "learning_rate": 9.933348939590358e-07, "loss": 0.5291, "step": 6295 }, { "epoch": 0.8, "grad_norm": 0.8106697195823868, "learning_rate": 9.921009261395332e-07, "loss": 0.702, "step": 6296 }, { "epoch": 0.8, "grad_norm": 0.8378337582728091, "learning_rate": 9.908676408234884e-07, "loss": 0.5963, "step": 6297 }, { "epoch": 0.8, "grad_norm": 0.7856225166543965, "learning_rate": 9.896350382209175e-07, "loss": 0.5445, "step": 6298 }, { "epoch": 0.8, "grad_norm": 0.6981942904144177, "learning_rate": 9.884031185417204e-07, "loss": 0.534, "step": 6299 }, { "epoch": 0.8, "grad_norm": 1.6213183746973765, "learning_rate": 9.871718819956794e-07, "loss": 0.6102, "step": 6300 }, { "epoch": 0.8, "grad_norm": 0.7772848459838978, "learning_rate": 9.859413287924613e-07, "loss": 0.6649, "step": 6301 }, { "epoch": 0.8, "grad_norm": 0.9619917126053887, "learning_rate": 9.847114591416174e-07, "loss": 0.6294, "step": 6302 }, { "epoch": 0.8, "grad_norm": 0.6182236137916943, "learning_rate": 9.834822732525795e-07, "loss": 0.4744, "step": 6303 }, { "epoch": 0.8, "grad_norm": 0.7906434671275144, "learning_rate": 9.822537713346674e-07, "loss": 0.5943, "step": 6304 }, { "epoch": 0.8, "grad_norm": 0.6999346251123362, "learning_rate": 9.810259535970822e-07, "loss": 0.5183, "step": 6305 }, { "epoch": 0.8, "grad_norm": 0.8120641418406452, "learning_rate": 9.797988202489073e-07, "loss": 0.6163, "step": 6306 }, { "epoch": 0.8, "grad_norm": 0.9170177494928352, "learning_rate": 9.785723714991113e-07, "loss": 0.6439, "step": 6307 }, { "epoch": 0.8, "grad_norm": 0.7407648585684432, "learning_rate": 9.773466075565457e-07, "loss": 0.5794, "step": 6308 }, { "epoch": 0.8, "grad_norm": 0.7570388545399602, "learning_rate": 9.761215286299448e-07, "loss": 0.5619, "step": 6309 }, { "epoch": 0.8, "grad_norm": 0.7003609097323054, "learning_rate": 9.748971349279273e-07, "loss": 0.5164, "step": 6310 }, { "epoch": 0.8, "grad_norm": 0.7846353207242666, "learning_rate": 9.736734266589949e-07, "loss": 0.633, "step": 6311 }, { "epoch": 0.8, "grad_norm": 0.7442210558441857, "learning_rate": 9.724504040315313e-07, "loss": 0.4941, "step": 6312 }, { "epoch": 0.8, "grad_norm": 0.7851932250667001, "learning_rate": 9.712280672538056e-07, "loss": 0.5389, "step": 6313 }, { "epoch": 0.8, "grad_norm": 1.0628178993798705, "learning_rate": 9.700064165339679e-07, "loss": 0.6568, "step": 6314 }, { "epoch": 0.8, "grad_norm": 0.8270627781034178, "learning_rate": 9.687854520800538e-07, "loss": 0.6764, "step": 6315 }, { "epoch": 0.8, "grad_norm": 0.8703573663692341, "learning_rate": 9.67565174099981e-07, "loss": 0.6785, "step": 6316 }, { "epoch": 0.8, "grad_norm": 0.7432694516472188, "learning_rate": 9.663455828015483e-07, "loss": 0.5056, "step": 6317 }, { "epoch": 0.8, "grad_norm": 0.8401794413763661, "learning_rate": 9.651266783924413e-07, "loss": 0.5762, "step": 6318 }, { "epoch": 0.81, "grad_norm": 0.6502974963213034, "learning_rate": 9.639084610802258e-07, "loss": 0.5132, "step": 6319 }, { "epoch": 0.81, "grad_norm": 0.9041666141911302, "learning_rate": 9.626909310723514e-07, "loss": 0.6339, "step": 6320 }, { "epoch": 0.81, "grad_norm": 0.9661907743730896, "learning_rate": 9.614740885761514e-07, "loss": 0.6125, "step": 6321 }, { "epoch": 0.81, "grad_norm": 0.6989020740784724, "learning_rate": 9.602579337988405e-07, "loss": 0.5244, "step": 6322 }, { "epoch": 0.81, "grad_norm": 0.6942587172255824, "learning_rate": 9.590424669475185e-07, "loss": 0.5369, "step": 6323 }, { "epoch": 0.81, "grad_norm": 0.8215775919701693, "learning_rate": 9.578276882291654e-07, "loss": 0.5938, "step": 6324 }, { "epoch": 0.81, "grad_norm": 0.7763457187065593, "learning_rate": 9.566135978506463e-07, "loss": 0.6062, "step": 6325 }, { "epoch": 0.81, "grad_norm": 0.8439439083412723, "learning_rate": 9.554001960187076e-07, "loss": 0.5933, "step": 6326 }, { "epoch": 0.81, "grad_norm": 0.6865102763966442, "learning_rate": 9.541874829399794e-07, "loss": 0.5598, "step": 6327 }, { "epoch": 0.81, "grad_norm": 0.792086123196357, "learning_rate": 9.529754588209739e-07, "loss": 0.5553, "step": 6328 }, { "epoch": 0.81, "grad_norm": 0.7952880125025787, "learning_rate": 9.517641238680858e-07, "loss": 0.5129, "step": 6329 }, { "epoch": 0.81, "grad_norm": 0.9459410360699414, "learning_rate": 9.505534782875919e-07, "loss": 0.6355, "step": 6330 }, { "epoch": 0.81, "grad_norm": 1.0417818703953796, "learning_rate": 9.493435222856556e-07, "loss": 0.6733, "step": 6331 }, { "epoch": 0.81, "grad_norm": 0.7685259581956614, "learning_rate": 9.481342560683177e-07, "loss": 0.5507, "step": 6332 }, { "epoch": 0.81, "grad_norm": 0.8153557506737833, "learning_rate": 9.469256798415039e-07, "loss": 0.6318, "step": 6333 }, { "epoch": 0.81, "grad_norm": 0.905984596158335, "learning_rate": 9.457177938110224e-07, "loss": 0.6344, "step": 6334 }, { "epoch": 0.81, "grad_norm": 0.713504844088086, "learning_rate": 9.445105981825631e-07, "loss": 0.5502, "step": 6335 }, { "epoch": 0.81, "grad_norm": 0.8501016308531509, "learning_rate": 9.433040931616994e-07, "loss": 0.6587, "step": 6336 }, { "epoch": 0.81, "grad_norm": 0.8894184030141193, "learning_rate": 9.420982789538863e-07, "loss": 0.5844, "step": 6337 }, { "epoch": 0.81, "grad_norm": 0.8014474699975243, "learning_rate": 9.408931557644612e-07, "loss": 0.5991, "step": 6338 }, { "epoch": 0.81, "grad_norm": 0.6616724922259084, "learning_rate": 9.396887237986446e-07, "loss": 0.5244, "step": 6339 }, { "epoch": 0.81, "grad_norm": 0.670124248743178, "learning_rate": 9.384849832615378e-07, "loss": 0.5429, "step": 6340 }, { "epoch": 0.81, "grad_norm": 0.9663526142345502, "learning_rate": 9.372819343581263e-07, "loss": 0.6628, "step": 6341 }, { "epoch": 0.81, "grad_norm": 1.1966347654919072, "learning_rate": 9.360795772932757e-07, "loss": 0.6262, "step": 6342 }, { "epoch": 0.81, "grad_norm": 0.8772406751189397, "learning_rate": 9.348779122717361e-07, "loss": 0.6318, "step": 6343 }, { "epoch": 0.81, "grad_norm": 0.6198695705419238, "learning_rate": 9.336769394981377e-07, "loss": 0.4884, "step": 6344 }, { "epoch": 0.81, "grad_norm": 0.7309703264407184, "learning_rate": 9.324766591769935e-07, "loss": 0.5811, "step": 6345 }, { "epoch": 0.81, "grad_norm": 0.7704208564997415, "learning_rate": 9.312770715126995e-07, "loss": 0.6154, "step": 6346 }, { "epoch": 0.81, "grad_norm": 0.8255495150700635, "learning_rate": 9.300781767095324e-07, "loss": 0.6275, "step": 6347 }, { "epoch": 0.81, "grad_norm": 0.6469598942949039, "learning_rate": 9.288799749716515e-07, "loss": 0.5169, "step": 6348 }, { "epoch": 0.81, "grad_norm": 0.7209148556957957, "learning_rate": 9.276824665030987e-07, "loss": 0.5758, "step": 6349 }, { "epoch": 0.81, "grad_norm": 0.6903977614344166, "learning_rate": 9.264856515077963e-07, "loss": 0.5184, "step": 6350 }, { "epoch": 0.81, "grad_norm": 0.7610622064284739, "learning_rate": 9.252895301895498e-07, "loss": 0.5348, "step": 6351 }, { "epoch": 0.81, "grad_norm": 0.6234572038760733, "learning_rate": 9.240941027520467e-07, "loss": 0.5325, "step": 6352 }, { "epoch": 0.81, "grad_norm": 0.9358115177267461, "learning_rate": 9.228993693988552e-07, "loss": 0.6189, "step": 6353 }, { "epoch": 0.81, "grad_norm": 0.6530056342410644, "learning_rate": 9.217053303334261e-07, "loss": 0.5526, "step": 6354 }, { "epoch": 0.81, "grad_norm": 0.9130393891859884, "learning_rate": 9.205119857590922e-07, "loss": 0.6836, "step": 6355 }, { "epoch": 0.81, "grad_norm": 0.7188109948012957, "learning_rate": 9.193193358790659e-07, "loss": 0.5856, "step": 6356 }, { "epoch": 0.81, "grad_norm": 0.7572151585042509, "learning_rate": 9.181273808964458e-07, "loss": 0.5072, "step": 6357 }, { "epoch": 0.81, "grad_norm": 0.6728016940172079, "learning_rate": 9.16936121014208e-07, "loss": 0.5378, "step": 6358 }, { "epoch": 0.81, "grad_norm": 0.7294785169408458, "learning_rate": 9.157455564352113e-07, "loss": 0.5511, "step": 6359 }, { "epoch": 0.81, "grad_norm": 0.6325989437731061, "learning_rate": 9.145556873621975e-07, "loss": 0.5211, "step": 6360 }, { "epoch": 0.81, "grad_norm": 0.6986411665808339, "learning_rate": 9.133665139977882e-07, "loss": 0.5113, "step": 6361 }, { "epoch": 0.81, "grad_norm": 0.8420509759587462, "learning_rate": 9.121780365444872e-07, "loss": 0.6081, "step": 6362 }, { "epoch": 0.81, "grad_norm": 0.7028574459494599, "learning_rate": 9.109902552046801e-07, "loss": 0.5869, "step": 6363 }, { "epoch": 0.81, "grad_norm": 0.7961708199236058, "learning_rate": 9.098031701806337e-07, "loss": 0.5573, "step": 6364 }, { "epoch": 0.81, "grad_norm": 0.927958601548571, "learning_rate": 9.086167816744956e-07, "loss": 0.6459, "step": 6365 }, { "epoch": 0.81, "grad_norm": 0.6727780101487731, "learning_rate": 9.074310898882959e-07, "loss": 0.5791, "step": 6366 }, { "epoch": 0.81, "grad_norm": 0.6969774042320578, "learning_rate": 9.06246095023946e-07, "loss": 0.5567, "step": 6367 }, { "epoch": 0.81, "grad_norm": 0.6832756967576599, "learning_rate": 9.050617972832371e-07, "loss": 0.5323, "step": 6368 }, { "epoch": 0.81, "grad_norm": 0.873415656932259, "learning_rate": 9.03878196867844e-07, "loss": 0.6124, "step": 6369 }, { "epoch": 0.81, "grad_norm": 0.7211061479204822, "learning_rate": 9.026952939793205e-07, "loss": 0.5625, "step": 6370 }, { "epoch": 0.81, "grad_norm": 0.6046508241330434, "learning_rate": 9.015130888191026e-07, "loss": 0.4904, "step": 6371 }, { "epoch": 0.81, "grad_norm": 0.7240367374676969, "learning_rate": 9.003315815885083e-07, "loss": 0.5416, "step": 6372 }, { "epoch": 0.81, "grad_norm": 0.7552968507889684, "learning_rate": 8.99150772488736e-07, "loss": 0.5648, "step": 6373 }, { "epoch": 0.81, "grad_norm": 0.6444843294447101, "learning_rate": 8.979706617208644e-07, "loss": 0.4902, "step": 6374 }, { "epoch": 0.81, "grad_norm": 0.8652895633378936, "learning_rate": 8.96791249485855e-07, "loss": 0.6235, "step": 6375 }, { "epoch": 0.81, "grad_norm": 0.7362087482833665, "learning_rate": 8.95612535984548e-07, "loss": 0.4955, "step": 6376 }, { "epoch": 0.81, "grad_norm": 0.7442295393655132, "learning_rate": 8.944345214176675e-07, "loss": 0.5593, "step": 6377 }, { "epoch": 0.81, "grad_norm": 0.8884109501699472, "learning_rate": 8.932572059858152e-07, "loss": 0.6404, "step": 6378 }, { "epoch": 0.81, "grad_norm": 0.710718206757313, "learning_rate": 8.920805898894785e-07, "loss": 0.5346, "step": 6379 }, { "epoch": 0.81, "grad_norm": 0.7705635707168244, "learning_rate": 8.909046733290211e-07, "loss": 0.606, "step": 6380 }, { "epoch": 0.81, "grad_norm": 0.9028447540402339, "learning_rate": 8.897294565046894e-07, "loss": 0.6082, "step": 6381 }, { "epoch": 0.81, "grad_norm": 0.8651942490622496, "learning_rate": 8.885549396166109e-07, "loss": 0.5092, "step": 6382 }, { "epoch": 0.81, "grad_norm": 0.7637105023210538, "learning_rate": 8.873811228647933e-07, "loss": 0.4897, "step": 6383 }, { "epoch": 0.81, "grad_norm": 0.8446746908123323, "learning_rate": 8.862080064491258e-07, "loss": 0.6442, "step": 6384 }, { "epoch": 0.81, "grad_norm": 0.7506576838512039, "learning_rate": 8.850355905693775e-07, "loss": 0.5856, "step": 6385 }, { "epoch": 0.81, "grad_norm": 0.6526432601773813, "learning_rate": 8.838638754251988e-07, "loss": 0.4714, "step": 6386 }, { "epoch": 0.81, "grad_norm": 1.2469464106570451, "learning_rate": 8.826928612161206e-07, "loss": 0.6614, "step": 6387 }, { "epoch": 0.81, "grad_norm": 0.9555022354121456, "learning_rate": 8.815225481415529e-07, "loss": 0.65, "step": 6388 }, { "epoch": 0.81, "grad_norm": 0.8031010869418164, "learning_rate": 8.803529364007907e-07, "loss": 0.5896, "step": 6389 }, { "epoch": 0.81, "grad_norm": 0.6506408573927392, "learning_rate": 8.791840261930051e-07, "loss": 0.5076, "step": 6390 }, { "epoch": 0.81, "grad_norm": 0.7958142341409334, "learning_rate": 8.780158177172493e-07, "loss": 0.6211, "step": 6391 }, { "epoch": 0.81, "grad_norm": 0.6992844880535665, "learning_rate": 8.76848311172458e-07, "loss": 0.5018, "step": 6392 }, { "epoch": 0.81, "grad_norm": 0.8464495817244385, "learning_rate": 8.756815067574437e-07, "loss": 0.5834, "step": 6393 }, { "epoch": 0.81, "grad_norm": 0.6769978154615174, "learning_rate": 8.745154046709025e-07, "loss": 0.522, "step": 6394 }, { "epoch": 0.81, "grad_norm": 0.7282635362494032, "learning_rate": 8.733500051114085e-07, "loss": 0.5223, "step": 6395 }, { "epoch": 0.81, "grad_norm": 0.7064724956371685, "learning_rate": 8.721853082774179e-07, "loss": 0.5072, "step": 6396 }, { "epoch": 0.81, "grad_norm": 0.6736115477866378, "learning_rate": 8.710213143672658e-07, "loss": 0.4898, "step": 6397 }, { "epoch": 0.82, "grad_norm": 0.6872224234036635, "learning_rate": 8.698580235791687e-07, "loss": 0.4781, "step": 6398 }, { "epoch": 0.82, "grad_norm": 0.7645808313815857, "learning_rate": 8.686954361112226e-07, "loss": 0.6034, "step": 6399 }, { "epoch": 0.82, "grad_norm": 0.8205171128663795, "learning_rate": 8.675335521614036e-07, "loss": 0.675, "step": 6400 }, { "epoch": 0.82, "grad_norm": 0.6973765243887022, "learning_rate": 8.663723719275696e-07, "loss": 0.5552, "step": 6401 }, { "epoch": 0.82, "grad_norm": 0.8716466453068313, "learning_rate": 8.652118956074568e-07, "loss": 0.5486, "step": 6402 }, { "epoch": 0.82, "grad_norm": 0.910889051339716, "learning_rate": 8.640521233986821e-07, "loss": 0.6764, "step": 6403 }, { "epoch": 0.82, "grad_norm": 0.897066922239198, "learning_rate": 8.62893055498743e-07, "loss": 0.5139, "step": 6404 }, { "epoch": 0.82, "grad_norm": 0.9734767889047309, "learning_rate": 8.617346921050162e-07, "loss": 0.6108, "step": 6405 }, { "epoch": 0.82, "grad_norm": 0.7916151181150848, "learning_rate": 8.60577033414759e-07, "loss": 0.6009, "step": 6406 }, { "epoch": 0.82, "grad_norm": 0.8315242732182044, "learning_rate": 8.59420079625109e-07, "loss": 0.5479, "step": 6407 }, { "epoch": 0.82, "grad_norm": 1.2298868688925846, "learning_rate": 8.582638309330832e-07, "loss": 0.6719, "step": 6408 }, { "epoch": 0.82, "grad_norm": 0.8750952535243524, "learning_rate": 8.57108287535579e-07, "loss": 0.6203, "step": 6409 }, { "epoch": 0.82, "grad_norm": 0.6481336043044734, "learning_rate": 8.559534496293731e-07, "loss": 0.5055, "step": 6410 }, { "epoch": 0.82, "grad_norm": 0.7258832671269168, "learning_rate": 8.547993174111219e-07, "loss": 0.5998, "step": 6411 }, { "epoch": 0.82, "grad_norm": 0.8904719368762815, "learning_rate": 8.536458910773626e-07, "loss": 0.6317, "step": 6412 }, { "epoch": 0.82, "grad_norm": 0.7792863333282625, "learning_rate": 8.524931708245121e-07, "loss": 0.5018, "step": 6413 }, { "epoch": 0.82, "grad_norm": 1.0350283676168204, "learning_rate": 8.513411568488645e-07, "loss": 0.5705, "step": 6414 }, { "epoch": 0.82, "grad_norm": 0.7300904436997702, "learning_rate": 8.501898493465987e-07, "loss": 0.5725, "step": 6415 }, { "epoch": 0.82, "grad_norm": 0.9430342021084578, "learning_rate": 8.490392485137694e-07, "loss": 0.6605, "step": 6416 }, { "epoch": 0.82, "grad_norm": 0.8221835254769719, "learning_rate": 8.478893545463118e-07, "loss": 0.5958, "step": 6417 }, { "epoch": 0.82, "grad_norm": 0.9080898966147469, "learning_rate": 8.467401676400405e-07, "loss": 0.6525, "step": 6418 }, { "epoch": 0.82, "grad_norm": 0.9063293526838265, "learning_rate": 8.455916879906506e-07, "loss": 0.5777, "step": 6419 }, { "epoch": 0.82, "grad_norm": 0.6812565107290276, "learning_rate": 8.444439157937157e-07, "loss": 0.5403, "step": 6420 }, { "epoch": 0.82, "grad_norm": 0.8449181236722197, "learning_rate": 8.432968512446899e-07, "loss": 0.6618, "step": 6421 }, { "epoch": 0.82, "grad_norm": 0.7151088648532885, "learning_rate": 8.421504945389058e-07, "loss": 0.5615, "step": 6422 }, { "epoch": 0.82, "grad_norm": 0.8819943013699523, "learning_rate": 8.410048458715763e-07, "loss": 0.611, "step": 6423 }, { "epoch": 0.82, "grad_norm": 0.7398960907214395, "learning_rate": 8.398599054377938e-07, "loss": 0.5817, "step": 6424 }, { "epoch": 0.82, "grad_norm": 0.6737527039318283, "learning_rate": 8.387156734325286e-07, "loss": 0.4806, "step": 6425 }, { "epoch": 0.82, "grad_norm": 0.8898809918516477, "learning_rate": 8.375721500506329e-07, "loss": 0.6666, "step": 6426 }, { "epoch": 0.82, "grad_norm": 0.705381848449586, "learning_rate": 8.364293354868364e-07, "loss": 0.5413, "step": 6427 }, { "epoch": 0.82, "grad_norm": 0.9201966811073016, "learning_rate": 8.352872299357478e-07, "loss": 0.6921, "step": 6428 }, { "epoch": 0.82, "grad_norm": 0.9764504006838134, "learning_rate": 8.341458335918562e-07, "loss": 0.6028, "step": 6429 }, { "epoch": 0.82, "grad_norm": 0.8930701182556481, "learning_rate": 8.330051466495298e-07, "loss": 0.6304, "step": 6430 }, { "epoch": 0.82, "grad_norm": 0.8090905722357825, "learning_rate": 8.318651693030161e-07, "loss": 0.5732, "step": 6431 }, { "epoch": 0.82, "grad_norm": 0.7990617014192907, "learning_rate": 8.307259017464397e-07, "loss": 0.5524, "step": 6432 }, { "epoch": 0.82, "grad_norm": 0.8723261145153438, "learning_rate": 8.295873441738078e-07, "loss": 0.4804, "step": 6433 }, { "epoch": 0.82, "grad_norm": 0.9497407108176819, "learning_rate": 8.284494967790036e-07, "loss": 0.6029, "step": 6434 }, { "epoch": 0.82, "grad_norm": 0.7487588504033302, "learning_rate": 8.27312359755792e-07, "loss": 0.5049, "step": 6435 }, { "epoch": 0.82, "grad_norm": 0.9875815722822804, "learning_rate": 8.261759332978142e-07, "loss": 0.6082, "step": 6436 }, { "epoch": 0.82, "grad_norm": 0.602675938569681, "learning_rate": 8.250402175985922e-07, "loss": 0.4879, "step": 6437 }, { "epoch": 0.82, "grad_norm": 0.6970143125962176, "learning_rate": 8.239052128515268e-07, "loss": 0.5487, "step": 6438 }, { "epoch": 0.82, "grad_norm": 0.6684524885507424, "learning_rate": 8.227709192498979e-07, "loss": 0.5187, "step": 6439 }, { "epoch": 0.82, "grad_norm": 0.7580073133390448, "learning_rate": 8.21637336986863e-07, "loss": 0.5891, "step": 6440 }, { "epoch": 0.82, "grad_norm": 0.8224837185440056, "learning_rate": 8.205044662554584e-07, "loss": 0.565, "step": 6441 }, { "epoch": 0.82, "grad_norm": 0.8587563777130872, "learning_rate": 8.193723072486032e-07, "loss": 0.6328, "step": 6442 }, { "epoch": 0.82, "grad_norm": 0.7415384257076991, "learning_rate": 8.182408601590902e-07, "loss": 0.5113, "step": 6443 }, { "epoch": 0.82, "grad_norm": 0.828380262117733, "learning_rate": 8.171101251795937e-07, "loss": 0.6017, "step": 6444 }, { "epoch": 0.82, "grad_norm": 0.6839197399037709, "learning_rate": 8.159801025026659e-07, "loss": 0.5056, "step": 6445 }, { "epoch": 0.82, "grad_norm": 0.7660586088529385, "learning_rate": 8.148507923207377e-07, "loss": 0.5758, "step": 6446 }, { "epoch": 0.82, "grad_norm": 0.777553505297608, "learning_rate": 8.137221948261187e-07, "loss": 0.5946, "step": 6447 }, { "epoch": 0.82, "grad_norm": 0.6120111706389496, "learning_rate": 8.125943102109979e-07, "loss": 0.5102, "step": 6448 }, { "epoch": 0.82, "grad_norm": 0.8154463983584009, "learning_rate": 8.114671386674422e-07, "loss": 0.5613, "step": 6449 }, { "epoch": 0.82, "grad_norm": 0.7925579121584788, "learning_rate": 8.103406803873969e-07, "loss": 0.5276, "step": 6450 }, { "epoch": 0.82, "grad_norm": 0.8180563185991548, "learning_rate": 8.092149355626861e-07, "loss": 0.5566, "step": 6451 }, { "epoch": 0.82, "grad_norm": 0.6520611315975993, "learning_rate": 8.080899043850121e-07, "loss": 0.5128, "step": 6452 }, { "epoch": 0.82, "grad_norm": 0.6409915168930695, "learning_rate": 8.06965587045957e-07, "loss": 0.4664, "step": 6453 }, { "epoch": 0.82, "grad_norm": 0.7785096698611864, "learning_rate": 8.058419837369791e-07, "loss": 0.5641, "step": 6454 }, { "epoch": 0.82, "grad_norm": 1.2059408329842793, "learning_rate": 8.047190946494176e-07, "loss": 0.5678, "step": 6455 }, { "epoch": 0.82, "grad_norm": 0.8636108318229886, "learning_rate": 8.035969199744875e-07, "loss": 0.6432, "step": 6456 }, { "epoch": 0.82, "grad_norm": 0.6784763867394926, "learning_rate": 8.024754599032847e-07, "loss": 0.4905, "step": 6457 }, { "epoch": 0.82, "grad_norm": 0.6312876033399646, "learning_rate": 8.013547146267814e-07, "loss": 0.5438, "step": 6458 }, { "epoch": 0.82, "grad_norm": 0.7317142855710564, "learning_rate": 8.002346843358289e-07, "loss": 0.5523, "step": 6459 }, { "epoch": 0.82, "grad_norm": 0.897920036628498, "learning_rate": 7.991153692211568e-07, "loss": 0.5509, "step": 6460 }, { "epoch": 0.82, "grad_norm": 0.8568066928443241, "learning_rate": 7.979967694733726e-07, "loss": 0.6415, "step": 6461 }, { "epoch": 0.82, "grad_norm": 0.892723819171723, "learning_rate": 7.968788852829629e-07, "loss": 0.6203, "step": 6462 }, { "epoch": 0.82, "grad_norm": 0.7037148952275748, "learning_rate": 7.957617168402909e-07, "loss": 0.5094, "step": 6463 }, { "epoch": 0.82, "grad_norm": 0.8011052057685065, "learning_rate": 7.946452643355995e-07, "loss": 0.6489, "step": 6464 }, { "epoch": 0.82, "grad_norm": 0.908649020884252, "learning_rate": 7.935295279590082e-07, "loss": 0.6019, "step": 6465 }, { "epoch": 0.82, "grad_norm": 0.8202694790046764, "learning_rate": 7.924145079005158e-07, "loss": 0.6325, "step": 6466 }, { "epoch": 0.82, "grad_norm": 0.6918177701681298, "learning_rate": 7.913002043499968e-07, "loss": 0.5543, "step": 6467 }, { "epoch": 0.82, "grad_norm": 0.8514098585170228, "learning_rate": 7.901866174972084e-07, "loss": 0.5362, "step": 6468 }, { "epoch": 0.82, "grad_norm": 0.9216796790734681, "learning_rate": 7.890737475317816e-07, "loss": 0.5834, "step": 6469 }, { "epoch": 0.82, "grad_norm": 0.6654423347433402, "learning_rate": 7.879615946432268e-07, "loss": 0.5403, "step": 6470 }, { "epoch": 0.82, "grad_norm": 0.7535850823083603, "learning_rate": 7.868501590209315e-07, "loss": 0.5056, "step": 6471 }, { "epoch": 0.82, "grad_norm": 0.6950152086840601, "learning_rate": 7.857394408541619e-07, "loss": 0.5256, "step": 6472 }, { "epoch": 0.82, "grad_norm": 0.6618727857489061, "learning_rate": 7.846294403320614e-07, "loss": 0.5756, "step": 6473 }, { "epoch": 0.82, "grad_norm": 0.8479021801667956, "learning_rate": 7.835201576436518e-07, "loss": 0.6183, "step": 6474 }, { "epoch": 0.82, "grad_norm": 0.7812017074292636, "learning_rate": 7.824115929778325e-07, "loss": 0.5057, "step": 6475 }, { "epoch": 0.83, "grad_norm": 0.7723323111542498, "learning_rate": 7.813037465233808e-07, "loss": 0.4913, "step": 6476 }, { "epoch": 0.83, "grad_norm": 0.6851069637669087, "learning_rate": 7.801966184689502e-07, "loss": 0.5185, "step": 6477 }, { "epoch": 0.83, "grad_norm": 0.8930163809330414, "learning_rate": 7.790902090030745e-07, "loss": 0.6458, "step": 6478 }, { "epoch": 0.83, "grad_norm": 0.8950594834668176, "learning_rate": 7.779845183141622e-07, "loss": 0.6127, "step": 6479 }, { "epoch": 0.83, "grad_norm": 0.8542798718681037, "learning_rate": 7.768795465905021e-07, "loss": 0.6416, "step": 6480 }, { "epoch": 0.83, "grad_norm": 0.9204954413015148, "learning_rate": 7.757752940202596e-07, "loss": 0.5953, "step": 6481 }, { "epoch": 0.83, "grad_norm": 0.6560953156263014, "learning_rate": 7.746717607914766e-07, "loss": 0.478, "step": 6482 }, { "epoch": 0.83, "grad_norm": 0.6999493642693491, "learning_rate": 7.735689470920738e-07, "loss": 0.5704, "step": 6483 }, { "epoch": 0.83, "grad_norm": 0.7103873071471618, "learning_rate": 7.724668531098489e-07, "loss": 0.5308, "step": 6484 }, { "epoch": 0.83, "grad_norm": 0.8182183839372367, "learning_rate": 7.713654790324765e-07, "loss": 0.5327, "step": 6485 }, { "epoch": 0.83, "grad_norm": 0.6088936193910871, "learning_rate": 7.702648250475092e-07, "loss": 0.4662, "step": 6486 }, { "epoch": 0.83, "grad_norm": 0.6945340773217124, "learning_rate": 7.691648913423772e-07, "loss": 0.5371, "step": 6487 }, { "epoch": 0.83, "grad_norm": 0.8052921052230595, "learning_rate": 7.680656781043882e-07, "loss": 0.6224, "step": 6488 }, { "epoch": 0.83, "grad_norm": 0.6811345915823981, "learning_rate": 7.669671855207256e-07, "loss": 0.5459, "step": 6489 }, { "epoch": 0.83, "grad_norm": 0.8879446267811603, "learning_rate": 7.658694137784522e-07, "loss": 0.626, "step": 6490 }, { "epoch": 0.83, "grad_norm": 0.6489813321023465, "learning_rate": 7.64772363064507e-07, "loss": 0.5046, "step": 6491 }, { "epoch": 0.83, "grad_norm": 0.7594974198505549, "learning_rate": 7.636760335657056e-07, "loss": 0.5707, "step": 6492 }, { "epoch": 0.83, "grad_norm": 0.7211531903587437, "learning_rate": 7.625804254687425e-07, "loss": 0.5205, "step": 6493 }, { "epoch": 0.83, "grad_norm": 0.8114582002808618, "learning_rate": 7.614855389601872e-07, "loss": 0.6189, "step": 6494 }, { "epoch": 0.83, "grad_norm": 0.9914081689024536, "learning_rate": 7.603913742264885e-07, "loss": 0.6116, "step": 6495 }, { "epoch": 0.83, "grad_norm": 0.6756113126695669, "learning_rate": 7.592979314539711e-07, "loss": 0.509, "step": 6496 }, { "epoch": 0.83, "grad_norm": 0.7881046685308296, "learning_rate": 7.582052108288363e-07, "loss": 0.5875, "step": 6497 }, { "epoch": 0.83, "grad_norm": 0.7854487899893072, "learning_rate": 7.571132125371633e-07, "loss": 0.6147, "step": 6498 }, { "epoch": 0.83, "grad_norm": 0.8426822431831505, "learning_rate": 7.560219367649069e-07, "loss": 0.6141, "step": 6499 }, { "epoch": 0.83, "grad_norm": 0.7097615281401756, "learning_rate": 7.549313836979027e-07, "loss": 0.5226, "step": 6500 }, { "epoch": 0.83, "grad_norm": 0.8762150764731828, "learning_rate": 7.538415535218591e-07, "loss": 0.5852, "step": 6501 }, { "epoch": 0.83, "grad_norm": 0.9341001814991424, "learning_rate": 7.527524464223629e-07, "loss": 0.6607, "step": 6502 }, { "epoch": 0.83, "grad_norm": 0.8474017520442968, "learning_rate": 7.51664062584877e-07, "loss": 0.702, "step": 6503 }, { "epoch": 0.83, "grad_norm": 0.8612114281507735, "learning_rate": 7.505764021947426e-07, "loss": 0.5472, "step": 6504 }, { "epoch": 0.83, "grad_norm": 0.6497753113001647, "learning_rate": 7.494894654371771e-07, "loss": 0.6018, "step": 6505 }, { "epoch": 0.83, "grad_norm": 0.6210852166384915, "learning_rate": 7.484032524972739e-07, "loss": 0.4705, "step": 6506 }, { "epoch": 0.83, "grad_norm": 0.9260109906457434, "learning_rate": 7.473177635600037e-07, "loss": 0.6305, "step": 6507 }, { "epoch": 0.83, "grad_norm": 0.7667925016785997, "learning_rate": 7.462329988102147e-07, "loss": 0.6503, "step": 6508 }, { "epoch": 0.83, "grad_norm": 0.7437470904870618, "learning_rate": 7.451489584326304e-07, "loss": 0.5631, "step": 6509 }, { "epoch": 0.83, "grad_norm": 0.7458368296289911, "learning_rate": 7.440656426118515e-07, "loss": 0.5599, "step": 6510 }, { "epoch": 0.83, "grad_norm": 0.6960985561781085, "learning_rate": 7.42983051532356e-07, "loss": 0.5344, "step": 6511 }, { "epoch": 0.83, "grad_norm": 0.6847519213495826, "learning_rate": 7.419011853784974e-07, "loss": 0.5393, "step": 6512 }, { "epoch": 0.83, "grad_norm": 0.6739700716475066, "learning_rate": 7.408200443345065e-07, "loss": 0.4921, "step": 6513 }, { "epoch": 0.83, "grad_norm": 0.7677987528044851, "learning_rate": 7.397396285844898e-07, "loss": 0.5781, "step": 6514 }, { "epoch": 0.83, "grad_norm": 0.6881564934431853, "learning_rate": 7.386599383124321e-07, "loss": 0.523, "step": 6515 }, { "epoch": 0.83, "grad_norm": 0.7891656762827007, "learning_rate": 7.375809737021922e-07, "loss": 0.6497, "step": 6516 }, { "epoch": 0.83, "grad_norm": 0.6748799703713955, "learning_rate": 7.365027349375065e-07, "loss": 0.5322, "step": 6517 }, { "epoch": 0.83, "grad_norm": 0.8133659434698493, "learning_rate": 7.354252222019887e-07, "loss": 0.5289, "step": 6518 }, { "epoch": 0.83, "grad_norm": 0.8721716971664203, "learning_rate": 7.343484356791275e-07, "loss": 0.5698, "step": 6519 }, { "epoch": 0.83, "grad_norm": 0.8306903272994864, "learning_rate": 7.33272375552288e-07, "loss": 0.705, "step": 6520 }, { "epoch": 0.83, "grad_norm": 0.7301242419997787, "learning_rate": 7.321970420047131e-07, "loss": 0.5408, "step": 6521 }, { "epoch": 0.83, "grad_norm": 0.851566074482624, "learning_rate": 7.311224352195201e-07, "loss": 0.6491, "step": 6522 }, { "epoch": 0.83, "grad_norm": 0.8994519614635279, "learning_rate": 7.300485553797037e-07, "loss": 0.6776, "step": 6523 }, { "epoch": 0.83, "grad_norm": 0.9604227114442911, "learning_rate": 7.289754026681339e-07, "loss": 0.6479, "step": 6524 }, { "epoch": 0.83, "grad_norm": 0.7741664738252255, "learning_rate": 7.279029772675572e-07, "loss": 0.5746, "step": 6525 }, { "epoch": 0.83, "grad_norm": 0.9252040164173223, "learning_rate": 7.268312793605975e-07, "loss": 0.6237, "step": 6526 }, { "epoch": 0.83, "grad_norm": 0.8382225232199589, "learning_rate": 7.257603091297538e-07, "loss": 0.5213, "step": 6527 }, { "epoch": 0.83, "grad_norm": 0.7815582144118317, "learning_rate": 7.246900667574009e-07, "loss": 0.5466, "step": 6528 }, { "epoch": 0.83, "grad_norm": 0.772197561483472, "learning_rate": 7.236205524257894e-07, "loss": 0.5132, "step": 6529 }, { "epoch": 0.83, "grad_norm": 0.7428604057593859, "learning_rate": 7.225517663170473e-07, "loss": 0.5197, "step": 6530 }, { "epoch": 0.83, "grad_norm": 0.7127185987053908, "learning_rate": 7.214837086131771e-07, "loss": 0.5333, "step": 6531 }, { "epoch": 0.83, "grad_norm": 0.903382842631152, "learning_rate": 7.204163794960578e-07, "loss": 0.6325, "step": 6532 }, { "epoch": 0.83, "grad_norm": 0.7398743569942173, "learning_rate": 7.193497791474446e-07, "loss": 0.5368, "step": 6533 }, { "epoch": 0.83, "grad_norm": 0.9228566647147254, "learning_rate": 7.182839077489689e-07, "loss": 0.6272, "step": 6534 }, { "epoch": 0.83, "grad_norm": 0.7204189569886309, "learning_rate": 7.172187654821372e-07, "loss": 0.5373, "step": 6535 }, { "epoch": 0.83, "grad_norm": 0.8046945371198233, "learning_rate": 7.161543525283315e-07, "loss": 0.6012, "step": 6536 }, { "epoch": 0.83, "grad_norm": 0.9334020107419905, "learning_rate": 7.150906690688114e-07, "loss": 0.6378, "step": 6537 }, { "epoch": 0.83, "grad_norm": 0.8252522540635658, "learning_rate": 7.140277152847102e-07, "loss": 0.5813, "step": 6538 }, { "epoch": 0.83, "grad_norm": 0.8441720435695788, "learning_rate": 7.129654913570383e-07, "loss": 0.6096, "step": 6539 }, { "epoch": 0.83, "grad_norm": 1.046191285128024, "learning_rate": 7.119039974666809e-07, "loss": 0.6358, "step": 6540 }, { "epoch": 0.83, "grad_norm": 0.7897050072671297, "learning_rate": 7.108432337943999e-07, "loss": 0.5614, "step": 6541 }, { "epoch": 0.83, "grad_norm": 0.925825742956098, "learning_rate": 7.097832005208322e-07, "loss": 0.6194, "step": 6542 }, { "epoch": 0.83, "grad_norm": 0.6639325753349582, "learning_rate": 7.087238978264899e-07, "loss": 0.525, "step": 6543 }, { "epoch": 0.83, "grad_norm": 0.8458210571181917, "learning_rate": 7.076653258917615e-07, "loss": 0.5256, "step": 6544 }, { "epoch": 0.83, "grad_norm": 0.8970400226029809, "learning_rate": 7.066074848969112e-07, "loss": 0.6703, "step": 6545 }, { "epoch": 0.83, "grad_norm": 0.7288540836577638, "learning_rate": 7.055503750220783e-07, "loss": 0.5601, "step": 6546 }, { "epoch": 0.83, "grad_norm": 0.7880073716352396, "learning_rate": 7.044939964472769e-07, "loss": 0.6089, "step": 6547 }, { "epoch": 0.83, "grad_norm": 0.807433246236488, "learning_rate": 7.034383493523978e-07, "loss": 0.6235, "step": 6548 }, { "epoch": 0.83, "grad_norm": 0.8460268456842512, "learning_rate": 7.023834339172064e-07, "loss": 0.6064, "step": 6549 }, { "epoch": 0.83, "grad_norm": 0.7127437865258521, "learning_rate": 7.013292503213443e-07, "loss": 0.564, "step": 6550 }, { "epoch": 0.83, "grad_norm": 0.8372618732884886, "learning_rate": 7.002757987443271e-07, "loss": 0.6523, "step": 6551 }, { "epoch": 0.83, "grad_norm": 0.6510529742660217, "learning_rate": 6.992230793655464e-07, "loss": 0.4872, "step": 6552 }, { "epoch": 0.83, "grad_norm": 0.8825271137113972, "learning_rate": 6.98171092364271e-07, "loss": 0.6358, "step": 6553 }, { "epoch": 0.83, "grad_norm": 0.9186629204336472, "learning_rate": 6.971198379196425e-07, "loss": 0.6642, "step": 6554 }, { "epoch": 0.84, "grad_norm": 0.6899110144642793, "learning_rate": 6.960693162106791e-07, "loss": 0.5621, "step": 6555 }, { "epoch": 0.84, "grad_norm": 0.7517082236620101, "learning_rate": 6.950195274162725e-07, "loss": 0.5229, "step": 6556 }, { "epoch": 0.84, "grad_norm": 0.63533970528908, "learning_rate": 6.939704717151913e-07, "loss": 0.5014, "step": 6557 }, { "epoch": 0.84, "grad_norm": 0.8511302440395219, "learning_rate": 6.929221492860794e-07, "loss": 0.6717, "step": 6558 }, { "epoch": 0.84, "grad_norm": 0.6902038290606284, "learning_rate": 6.918745603074545e-07, "loss": 0.5023, "step": 6559 }, { "epoch": 0.84, "grad_norm": 0.6495942208233723, "learning_rate": 6.9082770495771e-07, "loss": 0.5581, "step": 6560 }, { "epoch": 0.84, "grad_norm": 0.6920938888084776, "learning_rate": 6.89781583415115e-07, "loss": 0.5082, "step": 6561 }, { "epoch": 0.84, "grad_norm": 0.8274598308488191, "learning_rate": 6.887361958578126e-07, "loss": 0.6074, "step": 6562 }, { "epoch": 0.84, "grad_norm": 0.8229797456862055, "learning_rate": 6.876915424638214e-07, "loss": 0.6803, "step": 6563 }, { "epoch": 0.84, "grad_norm": 0.7435217812970396, "learning_rate": 6.866476234110358e-07, "loss": 0.6069, "step": 6564 }, { "epoch": 0.84, "grad_norm": 0.6633564205218362, "learning_rate": 6.856044388772232e-07, "loss": 0.5188, "step": 6565 }, { "epoch": 0.84, "grad_norm": 0.9011349377109444, "learning_rate": 6.845619890400279e-07, "loss": 0.6222, "step": 6566 }, { "epoch": 0.84, "grad_norm": 0.7242900348331709, "learning_rate": 6.83520274076968e-07, "loss": 0.5769, "step": 6567 }, { "epoch": 0.84, "grad_norm": 0.7172579274798075, "learning_rate": 6.824792941654367e-07, "loss": 0.5016, "step": 6568 }, { "epoch": 0.84, "grad_norm": 0.7334040085187365, "learning_rate": 6.814390494827017e-07, "loss": 0.4865, "step": 6569 }, { "epoch": 0.84, "grad_norm": 0.6922043380499469, "learning_rate": 6.803995402059066e-07, "loss": 0.6136, "step": 6570 }, { "epoch": 0.84, "grad_norm": 0.8507336565683287, "learning_rate": 6.793607665120683e-07, "loss": 0.5847, "step": 6571 }, { "epoch": 0.84, "grad_norm": 0.8544148798495429, "learning_rate": 6.783227285780791e-07, "loss": 0.632, "step": 6572 }, { "epoch": 0.84, "grad_norm": 0.8094824740712867, "learning_rate": 6.772854265807067e-07, "loss": 0.5908, "step": 6573 }, { "epoch": 0.84, "grad_norm": 0.7314383141767523, "learning_rate": 6.762488606965923e-07, "loss": 0.5619, "step": 6574 }, { "epoch": 0.84, "grad_norm": 0.9592265697085839, "learning_rate": 6.752130311022526e-07, "loss": 0.6096, "step": 6575 }, { "epoch": 0.84, "grad_norm": 0.7136288729789788, "learning_rate": 6.741779379740782e-07, "loss": 0.5345, "step": 6576 }, { "epoch": 0.84, "grad_norm": 0.6801727801386589, "learning_rate": 6.731435814883347e-07, "loss": 0.5616, "step": 6577 }, { "epoch": 0.84, "grad_norm": 0.9150043542854669, "learning_rate": 6.721099618211613e-07, "loss": 0.5897, "step": 6578 }, { "epoch": 0.84, "grad_norm": 0.6573435031350073, "learning_rate": 6.710770791485749e-07, "loss": 0.494, "step": 6579 }, { "epoch": 0.84, "grad_norm": 0.8458907566928735, "learning_rate": 6.700449336464637e-07, "loss": 0.6492, "step": 6580 }, { "epoch": 0.84, "grad_norm": 0.7891506612026545, "learning_rate": 6.690135254905916e-07, "loss": 0.507, "step": 6581 }, { "epoch": 0.84, "grad_norm": 0.7363962078087404, "learning_rate": 6.679828548565953e-07, "loss": 0.5688, "step": 6582 }, { "epoch": 0.84, "grad_norm": 0.7483263661502465, "learning_rate": 6.669529219199877e-07, "loss": 0.5266, "step": 6583 }, { "epoch": 0.84, "grad_norm": 0.6614566801717008, "learning_rate": 6.659237268561569e-07, "loss": 0.5625, "step": 6584 }, { "epoch": 0.84, "grad_norm": 0.6997752087560764, "learning_rate": 6.648952698403638e-07, "loss": 0.5789, "step": 6585 }, { "epoch": 0.84, "grad_norm": 0.8622418962488968, "learning_rate": 6.638675510477433e-07, "loss": 0.5844, "step": 6586 }, { "epoch": 0.84, "grad_norm": 0.6496142281971781, "learning_rate": 6.628405706533053e-07, "loss": 0.4949, "step": 6587 }, { "epoch": 0.84, "grad_norm": 0.7752862159838645, "learning_rate": 6.618143288319334e-07, "loss": 0.62, "step": 6588 }, { "epoch": 0.84, "grad_norm": 0.6884560532238665, "learning_rate": 6.607888257583872e-07, "loss": 0.5267, "step": 6589 }, { "epoch": 0.84, "grad_norm": 0.9632589696241081, "learning_rate": 6.597640616072982e-07, "loss": 0.6133, "step": 6590 }, { "epoch": 0.84, "grad_norm": 1.175798276460779, "learning_rate": 6.587400365531727e-07, "loss": 0.6573, "step": 6591 }, { "epoch": 0.84, "grad_norm": 0.6957212247020323, "learning_rate": 6.577167507703924e-07, "loss": 0.5223, "step": 6592 }, { "epoch": 0.84, "grad_norm": 0.8352043556627344, "learning_rate": 6.56694204433212e-07, "loss": 0.6036, "step": 6593 }, { "epoch": 0.84, "grad_norm": 0.9007777722190855, "learning_rate": 6.556723977157603e-07, "loss": 0.6608, "step": 6594 }, { "epoch": 0.84, "grad_norm": 1.2455559278980968, "learning_rate": 6.546513307920405e-07, "loss": 0.6454, "step": 6595 }, { "epoch": 0.84, "grad_norm": 0.895344559397274, "learning_rate": 6.536310038359295e-07, "loss": 0.5853, "step": 6596 }, { "epoch": 0.84, "grad_norm": 0.8147482627636214, "learning_rate": 6.526114170211784e-07, "loss": 0.5642, "step": 6597 }, { "epoch": 0.84, "grad_norm": 0.7078279820985721, "learning_rate": 6.515925705214121e-07, "loss": 0.5498, "step": 6598 }, { "epoch": 0.84, "grad_norm": 0.7088968242192337, "learning_rate": 6.505744645101297e-07, "loss": 0.6174, "step": 6599 }, { "epoch": 0.84, "grad_norm": 0.6904135820440285, "learning_rate": 6.495570991607037e-07, "loss": 0.5551, "step": 6600 }, { "epoch": 0.84, "grad_norm": 0.7862387716656356, "learning_rate": 6.485404746463814e-07, "loss": 0.5577, "step": 6601 }, { "epoch": 0.84, "grad_norm": 0.864439197272147, "learning_rate": 6.475245911402834e-07, "loss": 0.6271, "step": 6602 }, { "epoch": 0.84, "grad_norm": 0.9083952512629536, "learning_rate": 6.465094488154033e-07, "loss": 0.5907, "step": 6603 }, { "epoch": 0.84, "grad_norm": 1.025873277259067, "learning_rate": 6.454950478446098e-07, "loss": 0.6752, "step": 6604 }, { "epoch": 0.84, "grad_norm": 0.7064691761211881, "learning_rate": 6.444813884006451e-07, "loss": 0.5554, "step": 6605 }, { "epoch": 0.84, "grad_norm": 0.8670253134946831, "learning_rate": 6.434684706561245e-07, "loss": 0.6097, "step": 6606 }, { "epoch": 0.84, "grad_norm": 0.685954258627498, "learning_rate": 6.424562947835367e-07, "loss": 0.5271, "step": 6607 }, { "epoch": 0.84, "grad_norm": 0.8960855454330223, "learning_rate": 6.41444860955246e-07, "loss": 0.6234, "step": 6608 }, { "epoch": 0.84, "grad_norm": 0.6920440160616677, "learning_rate": 6.404341693434879e-07, "loss": 0.495, "step": 6609 }, { "epoch": 0.84, "grad_norm": 0.8065475664869005, "learning_rate": 6.394242201203721e-07, "loss": 0.6373, "step": 6610 }, { "epoch": 0.84, "grad_norm": 0.6925968971186459, "learning_rate": 6.384150134578848e-07, "loss": 0.5267, "step": 6611 }, { "epoch": 0.84, "grad_norm": 0.755069660541801, "learning_rate": 6.374065495278819e-07, "loss": 0.5097, "step": 6612 }, { "epoch": 0.84, "grad_norm": 0.8191009070321031, "learning_rate": 6.363988285020945e-07, "loss": 0.5046, "step": 6613 }, { "epoch": 0.84, "grad_norm": 0.8960369799273593, "learning_rate": 6.353918505521273e-07, "loss": 0.6315, "step": 6614 }, { "epoch": 0.84, "grad_norm": 0.6764562784266911, "learning_rate": 6.343856158494571e-07, "loss": 0.5137, "step": 6615 }, { "epoch": 0.84, "grad_norm": 0.6454672083155628, "learning_rate": 6.333801245654364e-07, "loss": 0.4892, "step": 6616 }, { "epoch": 0.84, "grad_norm": 0.8045098208261016, "learning_rate": 6.323753768712892e-07, "loss": 0.621, "step": 6617 }, { "epoch": 0.84, "grad_norm": 0.8393630167875734, "learning_rate": 6.313713729381138e-07, "loss": 0.6323, "step": 6618 }, { "epoch": 0.84, "grad_norm": 0.9187812000656891, "learning_rate": 6.303681129368811e-07, "loss": 0.6311, "step": 6619 }, { "epoch": 0.84, "grad_norm": 0.7491693734229161, "learning_rate": 6.293655970384365e-07, "loss": 0.5697, "step": 6620 }, { "epoch": 0.84, "grad_norm": 0.808694753028369, "learning_rate": 6.283638254134982e-07, "loss": 0.6031, "step": 6621 }, { "epoch": 0.84, "grad_norm": 0.6929546853512615, "learning_rate": 6.273627982326569e-07, "loss": 0.5125, "step": 6622 }, { "epoch": 0.84, "grad_norm": 0.712977412043284, "learning_rate": 6.263625156663771e-07, "loss": 0.5327, "step": 6623 }, { "epoch": 0.84, "grad_norm": 0.7497393177219862, "learning_rate": 6.253629778849968e-07, "loss": 0.5211, "step": 6624 }, { "epoch": 0.84, "grad_norm": 0.7829935478188985, "learning_rate": 6.243641850587262e-07, "loss": 0.5412, "step": 6625 }, { "epoch": 0.84, "grad_norm": 0.8015704685460228, "learning_rate": 6.233661373576505e-07, "loss": 0.5993, "step": 6626 }, { "epoch": 0.84, "grad_norm": 0.7311230006799819, "learning_rate": 6.22368834951726e-07, "loss": 0.5025, "step": 6627 }, { "epoch": 0.84, "grad_norm": 0.6522736470894342, "learning_rate": 6.213722780107829e-07, "loss": 0.5278, "step": 6628 }, { "epoch": 0.84, "grad_norm": 0.9495278766822307, "learning_rate": 6.203764667045248e-07, "loss": 0.6341, "step": 6629 }, { "epoch": 0.84, "grad_norm": 0.6600076316837938, "learning_rate": 6.193814012025278e-07, "loss": 0.5431, "step": 6630 }, { "epoch": 0.84, "grad_norm": 0.7223962732789876, "learning_rate": 6.18387081674241e-07, "loss": 0.5033, "step": 6631 }, { "epoch": 0.84, "grad_norm": 1.0115853671943351, "learning_rate": 6.173935082889871e-07, "loss": 0.618, "step": 6632 }, { "epoch": 0.85, "grad_norm": 0.6169789762530803, "learning_rate": 6.164006812159612e-07, "loss": 0.4767, "step": 6633 }, { "epoch": 0.85, "grad_norm": 0.6001650551054903, "learning_rate": 6.15408600624231e-07, "loss": 0.4583, "step": 6634 }, { "epoch": 0.85, "grad_norm": 0.7520824242428634, "learning_rate": 6.14417266682738e-07, "loss": 0.5102, "step": 6635 }, { "epoch": 0.85, "grad_norm": 0.8362768901726602, "learning_rate": 6.134266795602944e-07, "loss": 0.6298, "step": 6636 }, { "epoch": 0.85, "grad_norm": 0.7315763207232844, "learning_rate": 6.124368394255898e-07, "loss": 0.5012, "step": 6637 }, { "epoch": 0.85, "grad_norm": 0.852498104191264, "learning_rate": 6.114477464471819e-07, "loss": 0.6188, "step": 6638 }, { "epoch": 0.85, "grad_norm": 0.9094063787593737, "learning_rate": 6.104594007935033e-07, "loss": 0.6702, "step": 6639 }, { "epoch": 0.85, "grad_norm": 0.8265869620461759, "learning_rate": 6.094718026328583e-07, "loss": 0.5774, "step": 6640 }, { "epoch": 0.85, "grad_norm": 0.6946989349757569, "learning_rate": 6.084849521334257e-07, "loss": 0.5109, "step": 6641 }, { "epoch": 0.85, "grad_norm": 0.7640435512437086, "learning_rate": 6.07498849463255e-07, "loss": 0.5647, "step": 6642 }, { "epoch": 0.85, "grad_norm": 0.9579411548752673, "learning_rate": 6.065134947902695e-07, "loss": 0.645, "step": 6643 }, { "epoch": 0.85, "grad_norm": 0.8216598148726845, "learning_rate": 6.055288882822652e-07, "loss": 0.6178, "step": 6644 }, { "epoch": 0.85, "grad_norm": 0.9543165896804842, "learning_rate": 6.0454503010691e-07, "loss": 0.4812, "step": 6645 }, { "epoch": 0.85, "grad_norm": 0.9689780840641171, "learning_rate": 6.035619204317439e-07, "loss": 0.5543, "step": 6646 }, { "epoch": 0.85, "grad_norm": 0.6761672107852109, "learning_rate": 6.025795594241817e-07, "loss": 0.5438, "step": 6647 }, { "epoch": 0.85, "grad_norm": 0.6635736510253682, "learning_rate": 6.015979472515082e-07, "loss": 0.5071, "step": 6648 }, { "epoch": 0.85, "grad_norm": 0.9002661217661749, "learning_rate": 6.006170840808823e-07, "loss": 0.6033, "step": 6649 }, { "epoch": 0.85, "grad_norm": 1.5050216595890071, "learning_rate": 5.996369700793342e-07, "loss": 0.688, "step": 6650 }, { "epoch": 0.85, "grad_norm": 0.630109754315328, "learning_rate": 5.986576054137677e-07, "loss": 0.5178, "step": 6651 }, { "epoch": 0.85, "grad_norm": 0.9155447520534753, "learning_rate": 5.976789902509583e-07, "loss": 0.6613, "step": 6652 }, { "epoch": 0.85, "grad_norm": 0.9820046030654918, "learning_rate": 5.967011247575532e-07, "loss": 0.6213, "step": 6653 }, { "epoch": 0.85, "grad_norm": 0.6558362385868037, "learning_rate": 5.957240091000738e-07, "loss": 0.5553, "step": 6654 }, { "epoch": 0.85, "grad_norm": 0.6903335376409027, "learning_rate": 5.947476434449123e-07, "loss": 0.5505, "step": 6655 }, { "epoch": 0.85, "grad_norm": 0.64420493720284, "learning_rate": 5.937720279583337e-07, "loss": 0.5081, "step": 6656 }, { "epoch": 0.85, "grad_norm": 0.8145204462810093, "learning_rate": 5.927971628064744e-07, "loss": 0.5657, "step": 6657 }, { "epoch": 0.85, "grad_norm": 1.001511706551358, "learning_rate": 5.918230481553444e-07, "loss": 0.6998, "step": 6658 }, { "epoch": 0.85, "grad_norm": 0.8172986374485259, "learning_rate": 5.908496841708255e-07, "loss": 0.6622, "step": 6659 }, { "epoch": 0.85, "grad_norm": 0.6178531025454447, "learning_rate": 5.898770710186714e-07, "loss": 0.4946, "step": 6660 }, { "epoch": 0.85, "grad_norm": 0.7907835344553366, "learning_rate": 5.889052088645075e-07, "loss": 0.5728, "step": 6661 }, { "epoch": 0.85, "grad_norm": 0.8495992604157405, "learning_rate": 5.879340978738313e-07, "loss": 0.5893, "step": 6662 }, { "epoch": 0.85, "grad_norm": 0.8712011458641244, "learning_rate": 5.869637382120142e-07, "loss": 0.5844, "step": 6663 }, { "epoch": 0.85, "grad_norm": 0.7367028112167259, "learning_rate": 5.859941300442984e-07, "loss": 0.5627, "step": 6664 }, { "epoch": 0.85, "grad_norm": 0.6850367441887795, "learning_rate": 5.850252735357969e-07, "loss": 0.543, "step": 6665 }, { "epoch": 0.85, "grad_norm": 0.8034512080275277, "learning_rate": 5.840571688514968e-07, "loss": 0.5555, "step": 6666 }, { "epoch": 0.85, "grad_norm": 1.0041028325002337, "learning_rate": 5.83089816156256e-07, "loss": 0.6227, "step": 6667 }, { "epoch": 0.85, "grad_norm": 0.7423828439667035, "learning_rate": 5.821232156148038e-07, "loss": 0.5036, "step": 6668 }, { "epoch": 0.85, "grad_norm": 0.6867344045954835, "learning_rate": 5.811573673917437e-07, "loss": 0.5458, "step": 6669 }, { "epoch": 0.85, "grad_norm": 0.7532799713461582, "learning_rate": 5.801922716515484e-07, "loss": 0.5531, "step": 6670 }, { "epoch": 0.85, "grad_norm": 0.7337520098242731, "learning_rate": 5.792279285585639e-07, "loss": 0.5768, "step": 6671 }, { "epoch": 0.85, "grad_norm": 0.5865350997259149, "learning_rate": 5.782643382770081e-07, "loss": 0.4887, "step": 6672 }, { "epoch": 0.85, "grad_norm": 0.6961869592594258, "learning_rate": 5.773015009709699e-07, "loss": 0.5084, "step": 6673 }, { "epoch": 0.85, "grad_norm": 0.8443426684067711, "learning_rate": 5.763394168044112e-07, "loss": 0.5076, "step": 6674 }, { "epoch": 0.85, "grad_norm": 0.8130278404048547, "learning_rate": 5.75378085941164e-07, "loss": 0.6074, "step": 6675 }, { "epoch": 0.85, "grad_norm": 0.8791188186089853, "learning_rate": 5.744175085449338e-07, "loss": 0.6357, "step": 6676 }, { "epoch": 0.85, "grad_norm": 0.8281016205869466, "learning_rate": 5.734576847792961e-07, "loss": 0.6446, "step": 6677 }, { "epoch": 0.85, "grad_norm": 0.783230280814367, "learning_rate": 5.724986148076995e-07, "loss": 0.6188, "step": 6678 }, { "epoch": 0.85, "grad_norm": 0.7720714095568783, "learning_rate": 5.715402987934632e-07, "loss": 0.6254, "step": 6679 }, { "epoch": 0.85, "grad_norm": 0.9144659141459703, "learning_rate": 5.70582736899779e-07, "loss": 0.5646, "step": 6680 }, { "epoch": 0.85, "grad_norm": 0.6811205167451434, "learning_rate": 5.696259292897094e-07, "loss": 0.5407, "step": 6681 }, { "epoch": 0.85, "grad_norm": 0.7029733085662122, "learning_rate": 5.686698761261872e-07, "loss": 0.5328, "step": 6682 }, { "epoch": 0.85, "grad_norm": 0.7271438792959343, "learning_rate": 5.677145775720216e-07, "loss": 0.5423, "step": 6683 }, { "epoch": 0.85, "grad_norm": 0.8482020211725333, "learning_rate": 5.667600337898876e-07, "loss": 0.61, "step": 6684 }, { "epoch": 0.85, "grad_norm": 0.8934355627262323, "learning_rate": 5.658062449423352e-07, "loss": 0.5782, "step": 6685 }, { "epoch": 0.85, "grad_norm": 0.732886356591622, "learning_rate": 5.648532111917837e-07, "loss": 0.5205, "step": 6686 }, { "epoch": 0.85, "grad_norm": 0.7406662787341809, "learning_rate": 5.639009327005257e-07, "loss": 0.5273, "step": 6687 }, { "epoch": 0.85, "grad_norm": 0.8336562427239234, "learning_rate": 5.629494096307236e-07, "loss": 0.5765, "step": 6688 }, { "epoch": 0.85, "grad_norm": 0.7316241170428661, "learning_rate": 5.619986421444124e-07, "loss": 0.5216, "step": 6689 }, { "epoch": 0.85, "grad_norm": 0.8118423680253526, "learning_rate": 5.610486304034973e-07, "loss": 0.5909, "step": 6690 }, { "epoch": 0.85, "grad_norm": 0.6992098729325374, "learning_rate": 5.600993745697564e-07, "loss": 0.5521, "step": 6691 }, { "epoch": 0.85, "grad_norm": 0.9295298419127371, "learning_rate": 5.591508748048369e-07, "loss": 0.6369, "step": 6692 }, { "epoch": 0.85, "grad_norm": 0.701918935071087, "learning_rate": 5.582031312702596e-07, "loss": 0.5326, "step": 6693 }, { "epoch": 0.85, "grad_norm": 0.8317144429426071, "learning_rate": 5.572561441274132e-07, "loss": 0.6267, "step": 6694 }, { "epoch": 0.85, "grad_norm": 0.7718526690059179, "learning_rate": 5.563099135375622e-07, "loss": 0.5816, "step": 6695 }, { "epoch": 0.85, "grad_norm": 0.7734736046367386, "learning_rate": 5.55364439661839e-07, "loss": 0.5425, "step": 6696 }, { "epoch": 0.85, "grad_norm": 0.6256423017637482, "learning_rate": 5.544197226612486e-07, "loss": 0.5377, "step": 6697 }, { "epoch": 0.85, "grad_norm": 0.8050142198012837, "learning_rate": 5.534757626966647e-07, "loss": 0.5776, "step": 6698 }, { "epoch": 0.85, "grad_norm": 0.9749231815572074, "learning_rate": 5.525325599288356e-07, "loss": 0.6004, "step": 6699 }, { "epoch": 0.85, "grad_norm": 0.9218144609161856, "learning_rate": 5.515901145183778e-07, "loss": 0.6094, "step": 6700 }, { "epoch": 0.85, "grad_norm": 0.6859431604713325, "learning_rate": 5.5064842662578e-07, "loss": 0.5202, "step": 6701 }, { "epoch": 0.85, "grad_norm": 0.6858568460525114, "learning_rate": 5.497074964114024e-07, "loss": 0.5397, "step": 6702 }, { "epoch": 0.85, "grad_norm": 0.680589784458555, "learning_rate": 5.487673240354757e-07, "loss": 0.5687, "step": 6703 }, { "epoch": 0.85, "grad_norm": 0.7866750202681904, "learning_rate": 5.478279096581008e-07, "loss": 0.6138, "step": 6704 }, { "epoch": 0.85, "grad_norm": 0.8416185289132421, "learning_rate": 5.468892534392506e-07, "loss": 0.6023, "step": 6705 }, { "epoch": 0.85, "grad_norm": 0.6983827388076834, "learning_rate": 5.45951355538768e-07, "loss": 0.4549, "step": 6706 }, { "epoch": 0.85, "grad_norm": 0.9529964173405592, "learning_rate": 5.450142161163679e-07, "loss": 0.623, "step": 6707 }, { "epoch": 0.85, "grad_norm": 0.7934857079047395, "learning_rate": 5.440778353316351e-07, "loss": 0.5279, "step": 6708 }, { "epoch": 0.85, "grad_norm": 1.0550014155465883, "learning_rate": 5.431422133440256e-07, "loss": 0.617, "step": 6709 }, { "epoch": 0.85, "grad_norm": 0.6292006488676265, "learning_rate": 5.422073503128655e-07, "loss": 0.5203, "step": 6710 }, { "epoch": 0.85, "grad_norm": 0.9030709829990416, "learning_rate": 5.412732463973531e-07, "loss": 0.6435, "step": 6711 }, { "epoch": 0.86, "grad_norm": 0.7470482344475862, "learning_rate": 5.403399017565563e-07, "loss": 0.5252, "step": 6712 }, { "epoch": 0.86, "grad_norm": 0.8329525197801656, "learning_rate": 5.394073165494135e-07, "loss": 0.6394, "step": 6713 }, { "epoch": 0.86, "grad_norm": 0.7564476104651021, "learning_rate": 5.384754909347345e-07, "loss": 0.555, "step": 6714 }, { "epoch": 0.86, "grad_norm": 0.7557044998004324, "learning_rate": 5.375444250711997e-07, "loss": 0.5702, "step": 6715 }, { "epoch": 0.86, "grad_norm": 0.6666910710711206, "learning_rate": 5.366141191173602e-07, "loss": 0.5073, "step": 6716 }, { "epoch": 0.86, "grad_norm": 0.8077592649107482, "learning_rate": 5.356845732316368e-07, "loss": 0.6512, "step": 6717 }, { "epoch": 0.86, "grad_norm": 0.7297283234079667, "learning_rate": 5.347557875723214e-07, "loss": 0.5567, "step": 6718 }, { "epoch": 0.86, "grad_norm": 0.7379214844064317, "learning_rate": 5.338277622975769e-07, "loss": 0.5573, "step": 6719 }, { "epoch": 0.86, "grad_norm": 0.7973404610329959, "learning_rate": 5.329004975654361e-07, "loss": 0.6162, "step": 6720 }, { "epoch": 0.86, "grad_norm": 0.8953338522024854, "learning_rate": 5.31973993533802e-07, "loss": 0.6205, "step": 6721 }, { "epoch": 0.86, "grad_norm": 0.624479714645648, "learning_rate": 5.310482503604497e-07, "loss": 0.4784, "step": 6722 }, { "epoch": 0.86, "grad_norm": 0.6572203102259121, "learning_rate": 5.301232682030239e-07, "loss": 0.5187, "step": 6723 }, { "epoch": 0.86, "grad_norm": 0.746509996698656, "learning_rate": 5.291990472190378e-07, "loss": 0.5764, "step": 6724 }, { "epoch": 0.86, "grad_norm": 0.7252402771027143, "learning_rate": 5.28275587565878e-07, "loss": 0.5205, "step": 6725 }, { "epoch": 0.86, "grad_norm": 0.769748151715065, "learning_rate": 5.273528894007995e-07, "loss": 0.5533, "step": 6726 }, { "epoch": 0.86, "grad_norm": 0.9482714920919001, "learning_rate": 5.264309528809286e-07, "loss": 0.6273, "step": 6727 }, { "epoch": 0.86, "grad_norm": 0.8245267925991313, "learning_rate": 5.255097781632606e-07, "loss": 0.5199, "step": 6728 }, { "epoch": 0.86, "grad_norm": 0.7754770650072839, "learning_rate": 5.245893654046625e-07, "loss": 0.5463, "step": 6729 }, { "epoch": 0.86, "grad_norm": 0.7053218324500792, "learning_rate": 5.236697147618713e-07, "loss": 0.4901, "step": 6730 }, { "epoch": 0.86, "grad_norm": 0.681869034081981, "learning_rate": 5.227508263914933e-07, "loss": 0.5462, "step": 6731 }, { "epoch": 0.86, "grad_norm": 0.6961163291499552, "learning_rate": 5.218327004500062e-07, "loss": 0.5314, "step": 6732 }, { "epoch": 0.86, "grad_norm": 0.6538120026732325, "learning_rate": 5.209153370937569e-07, "loss": 0.5247, "step": 6733 }, { "epoch": 0.86, "grad_norm": 0.6701074681331374, "learning_rate": 5.199987364789627e-07, "loss": 0.4855, "step": 6734 }, { "epoch": 0.86, "grad_norm": 0.8418016593954959, "learning_rate": 5.190828987617119e-07, "loss": 0.5844, "step": 6735 }, { "epoch": 0.86, "grad_norm": 1.0377313602386256, "learning_rate": 5.18167824097961e-07, "loss": 0.6338, "step": 6736 }, { "epoch": 0.86, "grad_norm": 0.8535672143176243, "learning_rate": 5.172535126435385e-07, "loss": 0.6308, "step": 6737 }, { "epoch": 0.86, "grad_norm": 0.6670861902778317, "learning_rate": 5.163399645541417e-07, "loss": 0.5323, "step": 6738 }, { "epoch": 0.86, "grad_norm": 0.7787532767284215, "learning_rate": 5.154271799853389e-07, "loss": 0.5894, "step": 6739 }, { "epoch": 0.86, "grad_norm": 0.7249869752081559, "learning_rate": 5.145151590925673e-07, "loss": 0.5289, "step": 6740 }, { "epoch": 0.86, "grad_norm": 0.6668592754131172, "learning_rate": 5.136039020311345e-07, "loss": 0.5567, "step": 6741 }, { "epoch": 0.86, "grad_norm": 0.8118886276253745, "learning_rate": 5.126934089562186e-07, "loss": 0.6115, "step": 6742 }, { "epoch": 0.86, "grad_norm": 0.9056117908349427, "learning_rate": 5.11783680022866e-07, "loss": 0.5527, "step": 6743 }, { "epoch": 0.86, "grad_norm": 0.7347999822566383, "learning_rate": 5.108747153859956e-07, "loss": 0.5184, "step": 6744 }, { "epoch": 0.86, "grad_norm": 0.8355829946032209, "learning_rate": 5.099665152003929e-07, "loss": 0.5964, "step": 6745 }, { "epoch": 0.86, "grad_norm": 0.6416412954058793, "learning_rate": 5.090590796207168e-07, "loss": 0.4566, "step": 6746 }, { "epoch": 0.86, "grad_norm": 0.8396672141426126, "learning_rate": 5.081524088014916e-07, "loss": 0.6658, "step": 6747 }, { "epoch": 0.86, "grad_norm": 0.8610716348412157, "learning_rate": 5.072465028971168e-07, "loss": 0.6407, "step": 6748 }, { "epoch": 0.86, "grad_norm": 0.6193157597771248, "learning_rate": 5.063413620618568e-07, "loss": 0.5334, "step": 6749 }, { "epoch": 0.86, "grad_norm": 0.8897653668198241, "learning_rate": 5.054369864498487e-07, "loss": 0.5912, "step": 6750 }, { "epoch": 0.86, "grad_norm": 0.7508538914643151, "learning_rate": 5.045333762150978e-07, "loss": 0.5816, "step": 6751 }, { "epoch": 0.86, "grad_norm": 0.6397178648767955, "learning_rate": 5.036305315114792e-07, "loss": 0.5154, "step": 6752 }, { "epoch": 0.86, "grad_norm": 0.7066585449650707, "learning_rate": 5.027284524927384e-07, "loss": 0.5581, "step": 6753 }, { "epoch": 0.86, "grad_norm": 0.8739775262682591, "learning_rate": 5.018271393124901e-07, "loss": 0.6349, "step": 6754 }, { "epoch": 0.86, "grad_norm": 1.4116853772701352, "learning_rate": 5.009265921242185e-07, "loss": 0.6274, "step": 6755 }, { "epoch": 0.86, "grad_norm": 0.8242199581327988, "learning_rate": 5.000268110812767e-07, "loss": 0.6128, "step": 6756 }, { "epoch": 0.86, "grad_norm": 0.7677744210600361, "learning_rate": 4.99127796336889e-07, "loss": 0.6398, "step": 6757 }, { "epoch": 0.86, "grad_norm": 0.911019440437457, "learning_rate": 4.982295480441473e-07, "loss": 0.6458, "step": 6758 }, { "epoch": 0.86, "grad_norm": 0.6492105159862692, "learning_rate": 4.97332066356015e-07, "loss": 0.4959, "step": 6759 }, { "epoch": 0.86, "grad_norm": 0.6192634049603718, "learning_rate": 4.964353514253234e-07, "loss": 0.517, "step": 6760 }, { "epoch": 0.86, "grad_norm": 0.8562076206174601, "learning_rate": 4.955394034047734e-07, "loss": 0.5531, "step": 6761 }, { "epoch": 0.86, "grad_norm": 0.9545415698057342, "learning_rate": 4.946442224469356e-07, "loss": 0.5951, "step": 6762 }, { "epoch": 0.86, "grad_norm": 0.8673426410389913, "learning_rate": 4.937498087042508e-07, "loss": 0.5969, "step": 6763 }, { "epoch": 0.86, "grad_norm": 0.8770459392939213, "learning_rate": 4.928561623290273e-07, "loss": 0.664, "step": 6764 }, { "epoch": 0.86, "grad_norm": 0.8525983029934454, "learning_rate": 4.919632834734451e-07, "loss": 0.5981, "step": 6765 }, { "epoch": 0.86, "grad_norm": 0.8186084441320477, "learning_rate": 4.910711722895506e-07, "loss": 0.6199, "step": 6766 }, { "epoch": 0.86, "grad_norm": 0.711732577811161, "learning_rate": 4.901798289292619e-07, "loss": 0.5136, "step": 6767 }, { "epoch": 0.86, "grad_norm": 0.8118998111549554, "learning_rate": 4.892892535443655e-07, "loss": 0.6057, "step": 6768 }, { "epoch": 0.86, "grad_norm": 0.7495231699115711, "learning_rate": 4.883994462865166e-07, "loss": 0.4684, "step": 6769 }, { "epoch": 0.86, "grad_norm": 0.7256228022868758, "learning_rate": 4.875104073072412e-07, "loss": 0.566, "step": 6770 }, { "epoch": 0.86, "grad_norm": 0.8726281530599873, "learning_rate": 4.866221367579321e-07, "loss": 0.6354, "step": 6771 }, { "epoch": 0.86, "grad_norm": 0.6618751822917529, "learning_rate": 4.857346347898534e-07, "loss": 0.5518, "step": 6772 }, { "epoch": 0.86, "grad_norm": 0.5953706836835683, "learning_rate": 4.848479015541357e-07, "loss": 0.4961, "step": 6773 }, { "epoch": 0.86, "grad_norm": 0.7022808077604614, "learning_rate": 4.839619372017829e-07, "loss": 0.5512, "step": 6774 }, { "epoch": 0.86, "grad_norm": 0.838539482973408, "learning_rate": 4.830767418836646e-07, "loss": 0.521, "step": 6775 }, { "epoch": 0.86, "grad_norm": 0.8278782733605144, "learning_rate": 4.821923157505204e-07, "loss": 0.605, "step": 6776 }, { "epoch": 0.86, "grad_norm": 0.8249562991141697, "learning_rate": 4.813086589529587e-07, "loss": 0.5902, "step": 6777 }, { "epoch": 0.86, "grad_norm": 0.9229908607233321, "learning_rate": 4.804257716414568e-07, "loss": 0.6813, "step": 6778 }, { "epoch": 0.86, "grad_norm": 0.7503060863534825, "learning_rate": 4.795436539663611e-07, "loss": 0.6196, "step": 6779 }, { "epoch": 0.86, "grad_norm": 0.9066843622351747, "learning_rate": 4.786623060778878e-07, "loss": 0.5739, "step": 6780 }, { "epoch": 0.86, "grad_norm": 0.6401371007213178, "learning_rate": 4.777817281261205e-07, "loss": 0.5523, "step": 6781 }, { "epoch": 0.86, "grad_norm": 0.9687108354645209, "learning_rate": 4.769019202610131e-07, "loss": 0.5962, "step": 6782 }, { "epoch": 0.86, "grad_norm": 0.7686738272549012, "learning_rate": 4.7602288263238704e-07, "loss": 0.5638, "step": 6783 }, { "epoch": 0.86, "grad_norm": 0.913255053557828, "learning_rate": 4.7514461538993306e-07, "loss": 0.6536, "step": 6784 }, { "epoch": 0.86, "grad_norm": 0.6467988572123885, "learning_rate": 4.7426711868321206e-07, "loss": 0.5335, "step": 6785 }, { "epoch": 0.86, "grad_norm": 0.636413510811312, "learning_rate": 4.733903926616523e-07, "loss": 0.5066, "step": 6786 }, { "epoch": 0.86, "grad_norm": 0.7325449776923049, "learning_rate": 4.725144374745505e-07, "loss": 0.5856, "step": 6787 }, { "epoch": 0.86, "grad_norm": 0.7791334682511725, "learning_rate": 4.716392532710734e-07, "loss": 0.5576, "step": 6788 }, { "epoch": 0.86, "grad_norm": 0.6791214542215358, "learning_rate": 4.7076484020025515e-07, "loss": 0.5726, "step": 6789 }, { "epoch": 0.87, "grad_norm": 0.6854480019351055, "learning_rate": 4.6989119841099885e-07, "loss": 0.4893, "step": 6790 }, { "epoch": 0.87, "grad_norm": 0.7958724188131521, "learning_rate": 4.6901832805207767e-07, "loss": 0.5659, "step": 6791 }, { "epoch": 0.87, "grad_norm": 0.6175377429627246, "learning_rate": 4.6814622927213173e-07, "loss": 0.5306, "step": 6792 }, { "epoch": 0.87, "grad_norm": 0.7080386859850634, "learning_rate": 4.6727490221967045e-07, "loss": 0.4996, "step": 6793 }, { "epoch": 0.87, "grad_norm": 0.7368830841915116, "learning_rate": 4.664043470430718e-07, "loss": 0.5, "step": 6794 }, { "epoch": 0.87, "grad_norm": 0.7737900367302412, "learning_rate": 4.655345638905823e-07, "loss": 0.594, "step": 6795 }, { "epoch": 0.87, "grad_norm": 0.6922047848659214, "learning_rate": 4.646655529103167e-07, "loss": 0.5209, "step": 6796 }, { "epoch": 0.87, "grad_norm": 0.8008251346769502, "learning_rate": 4.6379731425025833e-07, "loss": 0.5682, "step": 6797 }, { "epoch": 0.87, "grad_norm": 0.6835830699019857, "learning_rate": 4.6292984805826005e-07, "loss": 0.5429, "step": 6798 }, { "epoch": 0.87, "grad_norm": 0.8126309441794485, "learning_rate": 4.620631544820414e-07, "loss": 0.5396, "step": 6799 }, { "epoch": 0.87, "grad_norm": 0.8292290049068698, "learning_rate": 4.6119723366919165e-07, "loss": 0.6184, "step": 6800 }, { "epoch": 0.87, "grad_norm": 0.695727262243584, "learning_rate": 4.6033208576716837e-07, "loss": 0.5559, "step": 6801 }, { "epoch": 0.87, "grad_norm": 0.686152965410741, "learning_rate": 4.59467710923297e-07, "loss": 0.5584, "step": 6802 }, { "epoch": 0.87, "grad_norm": 0.869218102651399, "learning_rate": 4.586041092847715e-07, "loss": 0.6628, "step": 6803 }, { "epoch": 0.87, "grad_norm": 0.7033760378930044, "learning_rate": 4.5774128099865423e-07, "loss": 0.5452, "step": 6804 }, { "epoch": 0.87, "grad_norm": 0.8817873099132584, "learning_rate": 4.5687922621187486e-07, "loss": 0.6602, "step": 6805 }, { "epoch": 0.87, "grad_norm": 0.6778791026065074, "learning_rate": 4.560179450712343e-07, "loss": 0.5421, "step": 6806 }, { "epoch": 0.87, "grad_norm": 0.6867619275953373, "learning_rate": 4.551574377233986e-07, "loss": 0.5645, "step": 6807 }, { "epoch": 0.87, "grad_norm": 0.6442054288426311, "learning_rate": 4.5429770431490384e-07, "loss": 0.544, "step": 6808 }, { "epoch": 0.87, "grad_norm": 0.7733245367956972, "learning_rate": 4.534387449921529e-07, "loss": 0.5778, "step": 6809 }, { "epoch": 0.87, "grad_norm": 0.7175828112212366, "learning_rate": 4.5258055990141824e-07, "loss": 0.5527, "step": 6810 }, { "epoch": 0.87, "grad_norm": 0.8169946273011858, "learning_rate": 4.5172314918883966e-07, "loss": 0.6252, "step": 6811 }, { "epoch": 0.87, "grad_norm": 0.7653382954104186, "learning_rate": 4.508665130004247e-07, "loss": 0.5589, "step": 6812 }, { "epoch": 0.87, "grad_norm": 0.7825437685036122, "learning_rate": 4.500106514820507e-07, "loss": 0.6105, "step": 6813 }, { "epoch": 0.87, "grad_norm": 0.703887035643475, "learning_rate": 4.49155564779461e-07, "loss": 0.5048, "step": 6814 }, { "epoch": 0.87, "grad_norm": 0.6794530379352032, "learning_rate": 4.48301253038268e-07, "loss": 0.4855, "step": 6815 }, { "epoch": 0.87, "grad_norm": 0.7483641478404777, "learning_rate": 4.474477164039526e-07, "loss": 0.6125, "step": 6816 }, { "epoch": 0.87, "grad_norm": 0.7479259027806663, "learning_rate": 4.465949550218629e-07, "loss": 0.5981, "step": 6817 }, { "epoch": 0.87, "grad_norm": 0.7668018295572554, "learning_rate": 4.457429690372156e-07, "loss": 0.5848, "step": 6818 }, { "epoch": 0.87, "grad_norm": 0.7771909454271351, "learning_rate": 4.4489175859509457e-07, "loss": 0.6349, "step": 6819 }, { "epoch": 0.87, "grad_norm": 0.8611013648956578, "learning_rate": 4.4404132384045175e-07, "loss": 0.5848, "step": 6820 }, { "epoch": 0.87, "grad_norm": 0.7529478721056361, "learning_rate": 4.4319166491810837e-07, "loss": 0.5377, "step": 6821 }, { "epoch": 0.87, "grad_norm": 0.7823556060885173, "learning_rate": 4.423427819727522e-07, "loss": 0.61, "step": 6822 }, { "epoch": 0.87, "grad_norm": 0.8724258091935903, "learning_rate": 4.414946751489385e-07, "loss": 0.6478, "step": 6823 }, { "epoch": 0.87, "grad_norm": 0.7172657941061589, "learning_rate": 4.4064734459109196e-07, "loss": 0.5273, "step": 6824 }, { "epoch": 0.87, "grad_norm": 0.8003943794910167, "learning_rate": 4.398007904435031e-07, "loss": 0.5955, "step": 6825 }, { "epoch": 0.87, "grad_norm": 0.6323228053391567, "learning_rate": 4.389550128503317e-07, "loss": 0.5389, "step": 6826 }, { "epoch": 0.87, "grad_norm": 0.6960791593406801, "learning_rate": 4.381100119556053e-07, "loss": 0.5472, "step": 6827 }, { "epoch": 0.87, "grad_norm": 0.7696280108354582, "learning_rate": 4.3726578790321883e-07, "loss": 0.5116, "step": 6828 }, { "epoch": 0.87, "grad_norm": 0.7935042196525566, "learning_rate": 4.3642234083693394e-07, "loss": 0.5808, "step": 6829 }, { "epoch": 0.87, "grad_norm": 0.9761111474908527, "learning_rate": 4.355796709003812e-07, "loss": 0.6043, "step": 6830 }, { "epoch": 0.87, "grad_norm": 0.7815814672097576, "learning_rate": 4.347377782370593e-07, "loss": 0.5275, "step": 6831 }, { "epoch": 0.87, "grad_norm": 0.9070702883255608, "learning_rate": 4.3389666299033187e-07, "loss": 0.5954, "step": 6832 }, { "epoch": 0.87, "grad_norm": 0.7346600417424236, "learning_rate": 4.330563253034348e-07, "loss": 0.6451, "step": 6833 }, { "epoch": 0.87, "grad_norm": 0.7604800538695748, "learning_rate": 4.32216765319467e-07, "loss": 0.5662, "step": 6834 }, { "epoch": 0.87, "grad_norm": 0.7100245554364574, "learning_rate": 4.3137798318139743e-07, "loss": 0.5223, "step": 6835 }, { "epoch": 0.87, "grad_norm": 0.6917532187602273, "learning_rate": 4.305399790320619e-07, "loss": 0.5409, "step": 6836 }, { "epoch": 0.87, "grad_norm": 0.7800980219393966, "learning_rate": 4.297027530141634e-07, "loss": 0.5641, "step": 6837 }, { "epoch": 0.87, "grad_norm": 1.4123768643865868, "learning_rate": 4.2886630527027296e-07, "loss": 0.6374, "step": 6838 }, { "epoch": 0.87, "grad_norm": 0.7812681699758023, "learning_rate": 4.2803063594282933e-07, "loss": 0.5542, "step": 6839 }, { "epoch": 0.87, "grad_norm": 0.7374181134282078, "learning_rate": 4.271957451741376e-07, "loss": 0.5465, "step": 6840 }, { "epoch": 0.87, "grad_norm": 0.7627086107356028, "learning_rate": 4.2636163310637136e-07, "loss": 0.5041, "step": 6841 }, { "epoch": 0.87, "grad_norm": 0.8017589177021514, "learning_rate": 4.255282998815713e-07, "loss": 0.559, "step": 6842 }, { "epoch": 0.87, "grad_norm": 4.392871557551146, "learning_rate": 4.246957456416456e-07, "loss": 0.5955, "step": 6843 }, { "epoch": 0.87, "grad_norm": 0.8788631862402585, "learning_rate": 4.2386397052836924e-07, "loss": 0.5684, "step": 6844 }, { "epoch": 0.87, "grad_norm": 0.8175707495151532, "learning_rate": 4.230329746833844e-07, "loss": 0.6572, "step": 6845 }, { "epoch": 0.87, "grad_norm": 1.076979857917219, "learning_rate": 4.2220275824820136e-07, "loss": 0.6191, "step": 6846 }, { "epoch": 0.87, "grad_norm": 0.6767655043363255, "learning_rate": 4.2137332136419796e-07, "loss": 0.5426, "step": 6847 }, { "epoch": 0.87, "grad_norm": 0.8756275194949769, "learning_rate": 4.2054466417261797e-07, "loss": 0.5679, "step": 6848 }, { "epoch": 0.87, "grad_norm": 0.6903222954808087, "learning_rate": 4.1971678681457294e-07, "loss": 0.5825, "step": 6849 }, { "epoch": 0.87, "grad_norm": 1.0013682367500893, "learning_rate": 4.1888968943104236e-07, "loss": 0.6232, "step": 6850 }, { "epoch": 0.87, "grad_norm": 0.9861620341408214, "learning_rate": 4.180633721628724e-07, "loss": 0.5604, "step": 6851 }, { "epoch": 0.87, "grad_norm": 0.6825068009700247, "learning_rate": 4.1723783515077555e-07, "loss": 0.5647, "step": 6852 }, { "epoch": 0.87, "grad_norm": 0.865924622991732, "learning_rate": 4.164130785353321e-07, "loss": 0.6838, "step": 6853 }, { "epoch": 0.87, "grad_norm": 0.846563986788896, "learning_rate": 4.1558910245699027e-07, "loss": 0.6136, "step": 6854 }, { "epoch": 0.87, "grad_norm": 0.6599117250326576, "learning_rate": 4.1476590705606445e-07, "loss": 0.5418, "step": 6855 }, { "epoch": 0.87, "grad_norm": 0.738252561715404, "learning_rate": 4.139434924727359e-07, "loss": 0.5662, "step": 6856 }, { "epoch": 0.87, "grad_norm": 0.7003117425747266, "learning_rate": 4.131218588470531e-07, "loss": 0.4961, "step": 6857 }, { "epoch": 0.87, "grad_norm": 0.6164644232338188, "learning_rate": 4.1230100631893147e-07, "loss": 0.5168, "step": 6858 }, { "epoch": 0.87, "grad_norm": 0.7704882681491474, "learning_rate": 4.1148093502815456e-07, "loss": 0.5307, "step": 6859 }, { "epoch": 0.87, "grad_norm": 0.8390704993099708, "learning_rate": 4.106616451143719e-07, "loss": 0.5816, "step": 6860 }, { "epoch": 0.87, "grad_norm": 0.878284303444981, "learning_rate": 4.0984313671709964e-07, "loss": 0.5812, "step": 6861 }, { "epoch": 0.87, "grad_norm": 0.7511873474742895, "learning_rate": 4.0902540997572126e-07, "loss": 0.5249, "step": 6862 }, { "epoch": 0.87, "grad_norm": 0.7069526483100713, "learning_rate": 4.082084650294876e-07, "loss": 0.5638, "step": 6863 }, { "epoch": 0.87, "grad_norm": 0.6295548390546648, "learning_rate": 4.0739230201751467e-07, "loss": 0.5217, "step": 6864 }, { "epoch": 0.87, "grad_norm": 0.8218383948225362, "learning_rate": 4.0657692107878786e-07, "loss": 0.5297, "step": 6865 }, { "epoch": 0.87, "grad_norm": 0.716755321479113, "learning_rate": 4.057623223521573e-07, "loss": 0.6145, "step": 6866 }, { "epoch": 0.87, "grad_norm": 0.7018049628331753, "learning_rate": 4.0494850597634085e-07, "loss": 0.5328, "step": 6867 }, { "epoch": 0.87, "grad_norm": 0.6346296975776975, "learning_rate": 4.0413547208992266e-07, "loss": 0.4875, "step": 6868 }, { "epoch": 0.88, "grad_norm": 0.6149852180625857, "learning_rate": 4.0332322083135423e-07, "loss": 0.5551, "step": 6869 }, { "epoch": 0.88, "grad_norm": 0.7891505455240773, "learning_rate": 4.025117523389538e-07, "loss": 0.61, "step": 6870 }, { "epoch": 0.88, "grad_norm": 0.9327963460010891, "learning_rate": 4.0170106675090525e-07, "loss": 0.6267, "step": 6871 }, { "epoch": 0.88, "grad_norm": 0.6265477901565917, "learning_rate": 4.008911642052604e-07, "loss": 0.4838, "step": 6872 }, { "epoch": 0.88, "grad_norm": 0.9771155422037211, "learning_rate": 4.0008204483993727e-07, "loss": 0.6863, "step": 6873 }, { "epoch": 0.88, "grad_norm": 0.7949880738497038, "learning_rate": 3.992737087927201e-07, "loss": 0.6243, "step": 6874 }, { "epoch": 0.88, "grad_norm": 0.7942470456138682, "learning_rate": 3.984661562012604e-07, "loss": 0.5645, "step": 6875 }, { "epoch": 0.88, "grad_norm": 0.8241258590694551, "learning_rate": 3.9765938720307597e-07, "loss": 0.622, "step": 6876 }, { "epoch": 0.88, "grad_norm": 0.7746195041475429, "learning_rate": 3.9685340193555134e-07, "loss": 0.5296, "step": 6877 }, { "epoch": 0.88, "grad_norm": 0.7994063499442703, "learning_rate": 3.9604820053593674e-07, "loss": 0.6392, "step": 6878 }, { "epoch": 0.88, "grad_norm": 0.846324670541516, "learning_rate": 3.952437831413497e-07, "loss": 0.5781, "step": 6879 }, { "epoch": 0.88, "grad_norm": 0.837393186171623, "learning_rate": 3.944401498887751e-07, "loss": 0.5723, "step": 6880 }, { "epoch": 0.88, "grad_norm": 1.0279746020972238, "learning_rate": 3.936373009150618e-07, "loss": 0.6141, "step": 6881 }, { "epoch": 0.88, "grad_norm": 0.7405639040079075, "learning_rate": 3.9283523635692824e-07, "loss": 0.5521, "step": 6882 }, { "epoch": 0.88, "grad_norm": 0.7662720425572059, "learning_rate": 3.920339563509562e-07, "loss": 0.6346, "step": 6883 }, { "epoch": 0.88, "grad_norm": 0.661707733285121, "learning_rate": 3.912334610335955e-07, "loss": 0.5168, "step": 6884 }, { "epoch": 0.88, "grad_norm": 0.8866194093469825, "learning_rate": 3.904337505411632e-07, "loss": 0.6181, "step": 6885 }, { "epoch": 0.88, "grad_norm": 0.6639006598345805, "learning_rate": 3.8963482500984084e-07, "loss": 0.5233, "step": 6886 }, { "epoch": 0.88, "grad_norm": 0.7089907406100479, "learning_rate": 3.88836684575678e-07, "loss": 0.5224, "step": 6887 }, { "epoch": 0.88, "grad_norm": 0.9709052690857305, "learning_rate": 3.880393293745882e-07, "loss": 0.6476, "step": 6888 }, { "epoch": 0.88, "grad_norm": 0.7078290522664243, "learning_rate": 3.872427595423545e-07, "loss": 0.5246, "step": 6889 }, { "epoch": 0.88, "grad_norm": 0.7773499366125478, "learning_rate": 3.8644697521462217e-07, "loss": 0.6141, "step": 6890 }, { "epoch": 0.88, "grad_norm": 0.7890174550710611, "learning_rate": 3.856519765269068e-07, "loss": 0.61, "step": 6891 }, { "epoch": 0.88, "grad_norm": 0.7002798302807485, "learning_rate": 3.8485776361458837e-07, "loss": 0.5213, "step": 6892 }, { "epoch": 0.88, "grad_norm": 0.7727571508671105, "learning_rate": 3.8406433661291264e-07, "loss": 0.6327, "step": 6893 }, { "epoch": 0.88, "grad_norm": 0.7869232079443725, "learning_rate": 3.832716956569915e-07, "loss": 0.6688, "step": 6894 }, { "epoch": 0.88, "grad_norm": 0.750352468157192, "learning_rate": 3.8247984088180425e-07, "loss": 0.5299, "step": 6895 }, { "epoch": 0.88, "grad_norm": 0.6735478717148347, "learning_rate": 3.8168877242219517e-07, "loss": 0.547, "step": 6896 }, { "epoch": 0.88, "grad_norm": 0.9261104360253203, "learning_rate": 3.808984904128743e-07, "loss": 0.6403, "step": 6897 }, { "epoch": 0.88, "grad_norm": 0.9402187707065872, "learning_rate": 3.8010899498841956e-07, "loss": 0.5862, "step": 6898 }, { "epoch": 0.88, "grad_norm": 0.9271254088548732, "learning_rate": 3.7932028628327346e-07, "loss": 0.6467, "step": 6899 }, { "epoch": 0.88, "grad_norm": 0.9380125441395577, "learning_rate": 3.785323644317446e-07, "loss": 0.6566, "step": 6900 }, { "epoch": 0.88, "grad_norm": 0.7674381775107388, "learning_rate": 3.7774522956800786e-07, "loss": 0.6122, "step": 6901 }, { "epoch": 0.88, "grad_norm": 0.9153304736106843, "learning_rate": 3.7695888182610385e-07, "loss": 0.6076, "step": 6902 }, { "epoch": 0.88, "grad_norm": 0.7997953495084293, "learning_rate": 3.7617332133994046e-07, "loss": 0.5367, "step": 6903 }, { "epoch": 0.88, "grad_norm": 0.6222969877308744, "learning_rate": 3.753885482432895e-07, "loss": 0.5279, "step": 6904 }, { "epoch": 0.88, "grad_norm": 0.7001504885983985, "learning_rate": 3.746045626697897e-07, "loss": 0.5526, "step": 6905 }, { "epoch": 0.88, "grad_norm": 0.6752696819991477, "learning_rate": 3.738213647529459e-07, "loss": 0.4499, "step": 6906 }, { "epoch": 0.88, "grad_norm": 0.709618406756323, "learning_rate": 3.730389546261287e-07, "loss": 0.5586, "step": 6907 }, { "epoch": 0.88, "grad_norm": 0.9123097924192907, "learning_rate": 3.7225733242257424e-07, "loss": 0.6448, "step": 6908 }, { "epoch": 0.88, "grad_norm": 0.9488789912478209, "learning_rate": 3.714764982753843e-07, "loss": 0.6166, "step": 6909 }, { "epoch": 0.88, "grad_norm": 1.2451797262702748, "learning_rate": 3.7069645231752713e-07, "loss": 0.6686, "step": 6910 }, { "epoch": 0.88, "grad_norm": 0.8699991128307344, "learning_rate": 3.6991719468183696e-07, "loss": 0.6018, "step": 6911 }, { "epoch": 0.88, "grad_norm": 0.6653320708322982, "learning_rate": 3.6913872550101206e-07, "loss": 0.5214, "step": 6912 }, { "epoch": 0.88, "grad_norm": 0.6648400282397972, "learning_rate": 3.683610449076186e-07, "loss": 0.5315, "step": 6913 }, { "epoch": 0.88, "grad_norm": 0.8988892255364088, "learning_rate": 3.6758415303408677e-07, "loss": 0.6633, "step": 6914 }, { "epoch": 0.88, "grad_norm": 0.8331837088938187, "learning_rate": 3.6680805001271413e-07, "loss": 0.6502, "step": 6915 }, { "epoch": 0.88, "grad_norm": 0.8522064293592045, "learning_rate": 3.66032735975661e-07, "loss": 0.5894, "step": 6916 }, { "epoch": 0.88, "grad_norm": 0.7267399888318924, "learning_rate": 3.652582110549574e-07, "loss": 0.556, "step": 6917 }, { "epoch": 0.88, "grad_norm": 0.6525631118985582, "learning_rate": 3.6448447538249607e-07, "loss": 0.5178, "step": 6918 }, { "epoch": 0.88, "grad_norm": 0.6678621811405187, "learning_rate": 3.6371152909003617e-07, "loss": 0.5311, "step": 6919 }, { "epoch": 0.88, "grad_norm": 0.7755896507457781, "learning_rate": 3.6293937230920286e-07, "loss": 0.5775, "step": 6920 }, { "epoch": 0.88, "grad_norm": 0.7233229254905214, "learning_rate": 3.621680051714854e-07, "loss": 0.6355, "step": 6921 }, { "epoch": 0.88, "grad_norm": 0.7517800436879831, "learning_rate": 3.6139742780823993e-07, "loss": 0.5584, "step": 6922 }, { "epoch": 0.88, "grad_norm": 0.8555129232744482, "learning_rate": 3.606276403506881e-07, "loss": 0.6097, "step": 6923 }, { "epoch": 0.88, "grad_norm": 0.9443308726560262, "learning_rate": 3.598586429299167e-07, "loss": 0.5809, "step": 6924 }, { "epoch": 0.88, "grad_norm": 1.090612087503481, "learning_rate": 3.5909043567687764e-07, "loss": 0.6676, "step": 6925 }, { "epoch": 0.88, "grad_norm": 0.824867716377936, "learning_rate": 3.583230187223885e-07, "loss": 0.603, "step": 6926 }, { "epoch": 0.88, "grad_norm": 0.8694607584208969, "learning_rate": 3.575563921971331e-07, "loss": 0.6721, "step": 6927 }, { "epoch": 0.88, "grad_norm": 0.7461706664584211, "learning_rate": 3.5679055623165924e-07, "loss": 0.4813, "step": 6928 }, { "epoch": 0.88, "grad_norm": 0.8876390230410423, "learning_rate": 3.5602551095638094e-07, "loss": 0.5108, "step": 6929 }, { "epoch": 0.88, "grad_norm": 0.6559274253866437, "learning_rate": 3.5526125650157725e-07, "loss": 0.533, "step": 6930 }, { "epoch": 0.88, "grad_norm": 0.7457679782258849, "learning_rate": 3.544977929973936e-07, "loss": 0.5776, "step": 6931 }, { "epoch": 0.88, "grad_norm": 0.702490934007531, "learning_rate": 3.537351205738393e-07, "loss": 0.5473, "step": 6932 }, { "epoch": 0.88, "grad_norm": 0.6445776896524193, "learning_rate": 3.529732393607893e-07, "loss": 0.51, "step": 6933 }, { "epoch": 0.88, "grad_norm": 0.6670299732439671, "learning_rate": 3.522121494879843e-07, "loss": 0.5552, "step": 6934 }, { "epoch": 0.88, "grad_norm": 0.7736167503730588, "learning_rate": 3.5145185108502954e-07, "loss": 0.5255, "step": 6935 }, { "epoch": 0.88, "grad_norm": 0.7171381500724084, "learning_rate": 3.50692344281397e-07, "loss": 0.5473, "step": 6936 }, { "epoch": 0.88, "grad_norm": 0.7235075676515516, "learning_rate": 3.4993362920642203e-07, "loss": 0.5106, "step": 6937 }, { "epoch": 0.88, "grad_norm": 0.6993561792287761, "learning_rate": 3.491757059893053e-07, "loss": 0.554, "step": 6938 }, { "epoch": 0.88, "grad_norm": 0.9627727313690821, "learning_rate": 3.4841857475911454e-07, "loss": 0.5873, "step": 6939 }, { "epoch": 0.88, "grad_norm": 0.9825846628224079, "learning_rate": 3.4766223564478063e-07, "loss": 0.6347, "step": 6940 }, { "epoch": 0.88, "grad_norm": 0.7347986735468995, "learning_rate": 3.469066887750999e-07, "loss": 0.5362, "step": 6941 }, { "epoch": 0.88, "grad_norm": 0.8234358645824681, "learning_rate": 3.461519342787351e-07, "loss": 0.6099, "step": 6942 }, { "epoch": 0.88, "grad_norm": 0.9191586072061311, "learning_rate": 3.45397972284211e-07, "loss": 0.6638, "step": 6943 }, { "epoch": 0.88, "grad_norm": 0.6503457217687219, "learning_rate": 3.4464480291992177e-07, "loss": 0.5246, "step": 6944 }, { "epoch": 0.88, "grad_norm": 0.582986678206098, "learning_rate": 3.4389242631412413e-07, "loss": 0.4604, "step": 6945 }, { "epoch": 0.88, "grad_norm": 0.637010140064547, "learning_rate": 3.4314084259493897e-07, "loss": 0.4951, "step": 6946 }, { "epoch": 0.89, "grad_norm": 0.7290929898169166, "learning_rate": 3.4239005189035335e-07, "loss": 0.5715, "step": 6947 }, { "epoch": 0.89, "grad_norm": 1.27325381155192, "learning_rate": 3.416400543282189e-07, "loss": 0.571, "step": 6948 }, { "epoch": 0.89, "grad_norm": 0.8251831808010429, "learning_rate": 3.408908500362534e-07, "loss": 0.6635, "step": 6949 }, { "epoch": 0.89, "grad_norm": 0.8581039923204896, "learning_rate": 3.401424391420377e-07, "loss": 0.5924, "step": 6950 }, { "epoch": 0.89, "grad_norm": 0.7975402178987853, "learning_rate": 3.393948217730186e-07, "loss": 0.6368, "step": 6951 }, { "epoch": 0.89, "grad_norm": 0.6974268376522019, "learning_rate": 3.386479980565077e-07, "loss": 0.5624, "step": 6952 }, { "epoch": 0.89, "grad_norm": 0.6936500310076437, "learning_rate": 3.379019681196805e-07, "loss": 0.4911, "step": 6953 }, { "epoch": 0.89, "grad_norm": 0.9422566394040627, "learning_rate": 3.371567320895791e-07, "loss": 0.6401, "step": 6954 }, { "epoch": 0.89, "grad_norm": 1.7677258658613064, "learning_rate": 3.364122900931094e-07, "loss": 0.7253, "step": 6955 }, { "epoch": 0.89, "grad_norm": 0.6747384078911947, "learning_rate": 3.3566864225704156e-07, "loss": 0.4613, "step": 6956 }, { "epoch": 0.89, "grad_norm": 0.7914947855322968, "learning_rate": 3.34925788708011e-07, "loss": 0.6493, "step": 6957 }, { "epoch": 0.89, "grad_norm": 0.8611323040500779, "learning_rate": 3.3418372957251864e-07, "loss": 0.5927, "step": 6958 }, { "epoch": 0.89, "grad_norm": 0.6892243682656503, "learning_rate": 3.3344246497692856e-07, "loss": 0.5473, "step": 6959 }, { "epoch": 0.89, "grad_norm": 0.8071568528194664, "learning_rate": 3.3270199504747137e-07, "loss": 0.5912, "step": 6960 }, { "epoch": 0.89, "grad_norm": 0.6489081725854973, "learning_rate": 3.319623199102406e-07, "loss": 0.5496, "step": 6961 }, { "epoch": 0.89, "grad_norm": 0.7645147360763048, "learning_rate": 3.312234396911956e-07, "loss": 0.578, "step": 6962 }, { "epoch": 0.89, "grad_norm": 0.7636841622102487, "learning_rate": 3.304853545161596e-07, "loss": 0.5495, "step": 6963 }, { "epoch": 0.89, "grad_norm": 0.7478663981948215, "learning_rate": 3.297480645108214e-07, "loss": 0.5565, "step": 6964 }, { "epoch": 0.89, "grad_norm": 0.7910201536702967, "learning_rate": 3.290115698007329e-07, "loss": 0.5828, "step": 6965 }, { "epoch": 0.89, "grad_norm": 0.7601905653887429, "learning_rate": 3.282758705113126e-07, "loss": 0.5878, "step": 6966 }, { "epoch": 0.89, "grad_norm": 0.7885189484176697, "learning_rate": 3.27540966767842e-07, "loss": 0.6145, "step": 6967 }, { "epoch": 0.89, "grad_norm": 0.8083902611325703, "learning_rate": 3.26806858695467e-07, "loss": 0.6785, "step": 6968 }, { "epoch": 0.89, "grad_norm": 0.9553180885231035, "learning_rate": 3.260735464191983e-07, "loss": 0.5001, "step": 6969 }, { "epoch": 0.89, "grad_norm": 0.8958323072326773, "learning_rate": 3.2534103006391314e-07, "loss": 0.6256, "step": 6970 }, { "epoch": 0.89, "grad_norm": 0.8858237745038109, "learning_rate": 3.246093097543501e-07, "loss": 0.5929, "step": 6971 }, { "epoch": 0.89, "grad_norm": 0.8546518752288892, "learning_rate": 3.238783856151134e-07, "loss": 0.5647, "step": 6972 }, { "epoch": 0.89, "grad_norm": 0.732904673898036, "learning_rate": 3.2314825777067294e-07, "loss": 0.5361, "step": 6973 }, { "epoch": 0.89, "grad_norm": 0.742062478261483, "learning_rate": 3.2241892634536034e-07, "loss": 0.5175, "step": 6974 }, { "epoch": 0.89, "grad_norm": 0.9138286581049283, "learning_rate": 3.2169039146337457e-07, "loss": 0.6797, "step": 6975 }, { "epoch": 0.89, "grad_norm": 0.7558495500258164, "learning_rate": 3.209626532487764e-07, "loss": 0.4705, "step": 6976 }, { "epoch": 0.89, "grad_norm": 0.710825217787473, "learning_rate": 3.202357118254923e-07, "loss": 0.5359, "step": 6977 }, { "epoch": 0.89, "grad_norm": 0.605834096758279, "learning_rate": 3.195095673173132e-07, "loss": 0.5336, "step": 6978 }, { "epoch": 0.89, "grad_norm": 0.6864356020580893, "learning_rate": 3.187842198478941e-07, "loss": 0.5513, "step": 6979 }, { "epoch": 0.89, "grad_norm": 0.6759669168563152, "learning_rate": 3.18059669540754e-07, "loss": 0.5288, "step": 6980 }, { "epoch": 0.89, "grad_norm": 0.7045823353040241, "learning_rate": 3.173359165192752e-07, "loss": 0.549, "step": 6981 }, { "epoch": 0.89, "grad_norm": 0.7323056226383078, "learning_rate": 3.16612960906707e-07, "loss": 0.5864, "step": 6982 }, { "epoch": 0.89, "grad_norm": 0.7001829028875254, "learning_rate": 3.158908028261598e-07, "loss": 0.5756, "step": 6983 }, { "epoch": 0.89, "grad_norm": 1.4567425588892098, "learning_rate": 3.1516944240061077e-07, "loss": 0.5913, "step": 6984 }, { "epoch": 0.89, "grad_norm": 0.8593649698855941, "learning_rate": 3.1444887975289886e-07, "loss": 0.6559, "step": 6985 }, { "epoch": 0.89, "grad_norm": 0.8825695459196331, "learning_rate": 3.1372911500572923e-07, "loss": 0.5615, "step": 6986 }, { "epoch": 0.89, "grad_norm": 0.6486826066326216, "learning_rate": 3.130101482816694e-07, "loss": 0.5393, "step": 6987 }, { "epoch": 0.89, "grad_norm": 0.7771684236717392, "learning_rate": 3.1229197970315374e-07, "loss": 0.5436, "step": 6988 }, { "epoch": 0.89, "grad_norm": 0.8971593602959002, "learning_rate": 3.115746093924771e-07, "loss": 0.6208, "step": 6989 }, { "epoch": 0.89, "grad_norm": 0.6243847288722311, "learning_rate": 3.108580374718012e-07, "loss": 0.4787, "step": 6990 }, { "epoch": 0.89, "grad_norm": 0.6895107679319626, "learning_rate": 3.1014226406315076e-07, "loss": 0.5202, "step": 6991 }, { "epoch": 0.89, "grad_norm": 0.6869952310809828, "learning_rate": 3.094272892884137e-07, "loss": 0.5258, "step": 6992 }, { "epoch": 0.89, "grad_norm": 0.7981868981637653, "learning_rate": 3.087131132693433e-07, "loss": 0.5602, "step": 6993 }, { "epoch": 0.89, "grad_norm": 0.8797551499257745, "learning_rate": 3.0799973612755607e-07, "loss": 0.5667, "step": 6994 }, { "epoch": 0.89, "grad_norm": 0.7329088767484716, "learning_rate": 3.0728715798453266e-07, "loss": 0.5361, "step": 6995 }, { "epoch": 0.89, "grad_norm": 0.6915315346563154, "learning_rate": 3.0657537896161824e-07, "loss": 0.5534, "step": 6996 }, { "epoch": 0.89, "grad_norm": 0.8777997273576092, "learning_rate": 3.0586439918002143e-07, "loss": 0.6639, "step": 6997 }, { "epoch": 0.89, "grad_norm": 0.8835129072842285, "learning_rate": 3.051542187608136e-07, "loss": 0.5706, "step": 6998 }, { "epoch": 0.89, "grad_norm": 0.8822879692217039, "learning_rate": 3.0444483782493207e-07, "loss": 0.6302, "step": 6999 }, { "epoch": 0.89, "grad_norm": 0.7144881672330305, "learning_rate": 3.0373625649317674e-07, "loss": 0.5518, "step": 7000 }, { "epoch": 0.89, "grad_norm": 0.712558302776765, "learning_rate": 3.0302847488621114e-07, "loss": 0.5903, "step": 7001 }, { "epoch": 0.89, "grad_norm": 0.7100044367656192, "learning_rate": 3.023214931245638e-07, "loss": 0.5521, "step": 7002 }, { "epoch": 0.89, "grad_norm": 0.6749515165421132, "learning_rate": 3.016153113286263e-07, "loss": 0.5523, "step": 7003 }, { "epoch": 0.89, "grad_norm": 0.7636478222843134, "learning_rate": 3.0090992961865404e-07, "loss": 0.5154, "step": 7004 }, { "epoch": 0.89, "grad_norm": 0.8319166412829024, "learning_rate": 3.00205348114766e-07, "loss": 0.6102, "step": 7005 }, { "epoch": 0.89, "grad_norm": 0.8699956819100007, "learning_rate": 2.99501566936945e-07, "loss": 0.6081, "step": 7006 }, { "epoch": 0.89, "grad_norm": 0.9130595218705513, "learning_rate": 2.987985862050374e-07, "loss": 0.6893, "step": 7007 }, { "epoch": 0.89, "grad_norm": 0.8666271146032645, "learning_rate": 2.9809640603875423e-07, "loss": 0.5186, "step": 7008 }, { "epoch": 0.89, "grad_norm": 0.6086223377233909, "learning_rate": 2.973950265576686e-07, "loss": 0.5689, "step": 7009 }, { "epoch": 0.89, "grad_norm": 0.6261903218404566, "learning_rate": 2.966944478812189e-07, "loss": 0.5089, "step": 7010 }, { "epoch": 0.89, "grad_norm": 0.7795110203080939, "learning_rate": 2.9599467012870585e-07, "loss": 0.6468, "step": 7011 }, { "epoch": 0.89, "grad_norm": 0.7562381923264567, "learning_rate": 2.9529569341929465e-07, "loss": 0.5411, "step": 7012 }, { "epoch": 0.89, "grad_norm": 0.6648073384045954, "learning_rate": 2.9459751787201396e-07, "loss": 0.5316, "step": 7013 }, { "epoch": 0.89, "grad_norm": 0.7704531930552118, "learning_rate": 2.939001436057548e-07, "loss": 0.5426, "step": 7014 }, { "epoch": 0.89, "grad_norm": 0.7559707076164255, "learning_rate": 2.932035707392739e-07, "loss": 0.4867, "step": 7015 }, { "epoch": 0.89, "grad_norm": 0.8009764462752249, "learning_rate": 2.9250779939118954e-07, "loss": 0.6485, "step": 7016 }, { "epoch": 0.89, "grad_norm": 0.6360209916378563, "learning_rate": 2.9181282967998493e-07, "loss": 0.525, "step": 7017 }, { "epoch": 0.89, "grad_norm": 1.216844001749365, "learning_rate": 2.9111866172400527e-07, "loss": 0.5708, "step": 7018 }, { "epoch": 0.89, "grad_norm": 0.6724771473420179, "learning_rate": 2.9042529564146104e-07, "loss": 0.5188, "step": 7019 }, { "epoch": 0.89, "grad_norm": 0.6107138625887601, "learning_rate": 2.8973273155042556e-07, "loss": 0.4381, "step": 7020 }, { "epoch": 0.89, "grad_norm": 0.8667014730760305, "learning_rate": 2.89040969568834e-07, "loss": 0.6348, "step": 7021 }, { "epoch": 0.89, "grad_norm": 0.8609136351307967, "learning_rate": 2.88350009814487e-07, "loss": 0.5861, "step": 7022 }, { "epoch": 0.89, "grad_norm": 0.7231364731422838, "learning_rate": 2.876598524050478e-07, "loss": 0.5114, "step": 7023 }, { "epoch": 0.89, "grad_norm": 0.7918572201065193, "learning_rate": 2.869704974580434e-07, "loss": 0.64, "step": 7024 }, { "epoch": 0.89, "grad_norm": 0.677159760913107, "learning_rate": 2.862819450908627e-07, "loss": 0.5171, "step": 7025 }, { "epoch": 0.9, "grad_norm": 0.8175295099283835, "learning_rate": 2.8559419542075973e-07, "loss": 0.5664, "step": 7026 }, { "epoch": 0.9, "grad_norm": 0.8599328960190116, "learning_rate": 2.8490724856485017e-07, "loss": 0.6107, "step": 7027 }, { "epoch": 0.9, "grad_norm": 0.6916106104323896, "learning_rate": 2.842211046401155e-07, "loss": 0.4957, "step": 7028 }, { "epoch": 0.9, "grad_norm": 0.6372831000949382, "learning_rate": 2.8353576376339786e-07, "loss": 0.4869, "step": 7029 }, { "epoch": 0.9, "grad_norm": 0.8402321887312569, "learning_rate": 2.8285122605140433e-07, "loss": 0.6356, "step": 7030 }, { "epoch": 0.9, "grad_norm": 0.709162788295609, "learning_rate": 2.82167491620704e-07, "loss": 0.4921, "step": 7031 }, { "epoch": 0.9, "grad_norm": 1.2340712363566009, "learning_rate": 2.8148456058772977e-07, "loss": 0.6281, "step": 7032 }, { "epoch": 0.9, "grad_norm": 1.0184218311929112, "learning_rate": 2.808024330687775e-07, "loss": 0.5297, "step": 7033 }, { "epoch": 0.9, "grad_norm": 0.6485840936874373, "learning_rate": 2.8012110918000657e-07, "loss": 0.5183, "step": 7034 }, { "epoch": 0.9, "grad_norm": 0.9654053535424876, "learning_rate": 2.7944058903743964e-07, "loss": 0.6208, "step": 7035 }, { "epoch": 0.9, "grad_norm": 0.7202838090490248, "learning_rate": 2.787608727569624e-07, "loss": 0.5151, "step": 7036 }, { "epoch": 0.9, "grad_norm": 0.6263902949244659, "learning_rate": 2.7808196045432224e-07, "loss": 0.4849, "step": 7037 }, { "epoch": 0.9, "grad_norm": 0.8266628551698149, "learning_rate": 2.774038522451322e-07, "loss": 0.6024, "step": 7038 }, { "epoch": 0.9, "grad_norm": 0.7833067891524704, "learning_rate": 2.767265482448661e-07, "loss": 0.5607, "step": 7039 }, { "epoch": 0.9, "grad_norm": 0.7353334391585161, "learning_rate": 2.760500485688622e-07, "loss": 0.5339, "step": 7040 }, { "epoch": 0.9, "grad_norm": 0.6841793443135431, "learning_rate": 2.7537435333232165e-07, "loss": 0.5078, "step": 7041 }, { "epoch": 0.9, "grad_norm": 0.7670098693714952, "learning_rate": 2.746994626503074e-07, "loss": 0.5406, "step": 7042 }, { "epoch": 0.9, "grad_norm": 0.7998903518140569, "learning_rate": 2.740253766377471e-07, "loss": 0.5527, "step": 7043 }, { "epoch": 0.9, "grad_norm": 0.6328764137402102, "learning_rate": 2.733520954094304e-07, "loss": 0.4937, "step": 7044 }, { "epoch": 0.9, "grad_norm": 0.7104193587876197, "learning_rate": 2.726796190800096e-07, "loss": 0.5558, "step": 7045 }, { "epoch": 0.9, "grad_norm": 0.6689463628416755, "learning_rate": 2.720079477640009e-07, "loss": 0.5306, "step": 7046 }, { "epoch": 0.9, "grad_norm": 0.9270457056764949, "learning_rate": 2.713370815757832e-07, "loss": 0.5915, "step": 7047 }, { "epoch": 0.9, "grad_norm": 0.7134306407832558, "learning_rate": 2.706670206295975e-07, "loss": 0.5354, "step": 7048 }, { "epoch": 0.9, "grad_norm": 0.968517769346105, "learning_rate": 2.69997765039548e-07, "loss": 0.6725, "step": 7049 }, { "epoch": 0.9, "grad_norm": 0.7462057343588644, "learning_rate": 2.69329314919603e-07, "loss": 0.5452, "step": 7050 }, { "epoch": 0.9, "grad_norm": 0.6084785648605658, "learning_rate": 2.6866167038359146e-07, "loss": 0.5083, "step": 7051 }, { "epoch": 0.9, "grad_norm": 0.6947343598459573, "learning_rate": 2.679948315452069e-07, "loss": 0.5775, "step": 7052 }, { "epoch": 0.9, "grad_norm": 0.8446069298115492, "learning_rate": 2.673287985180045e-07, "loss": 0.6017, "step": 7053 }, { "epoch": 0.9, "grad_norm": 0.6175663214902423, "learning_rate": 2.6666357141540255e-07, "loss": 0.4735, "step": 7054 }, { "epoch": 0.9, "grad_norm": 0.8468003612838216, "learning_rate": 2.6599915035068367e-07, "loss": 0.5983, "step": 7055 }, { "epoch": 0.9, "grad_norm": 0.9358853973868989, "learning_rate": 2.653355354369908e-07, "loss": 0.6184, "step": 7056 }, { "epoch": 0.9, "grad_norm": 0.7350697162970625, "learning_rate": 2.6467272678733114e-07, "loss": 0.5699, "step": 7057 }, { "epoch": 0.9, "grad_norm": 0.757707184325397, "learning_rate": 2.6401072451457397e-07, "loss": 0.5448, "step": 7058 }, { "epoch": 0.9, "grad_norm": 0.8562357487747339, "learning_rate": 2.633495287314508e-07, "loss": 0.6307, "step": 7059 }, { "epoch": 0.9, "grad_norm": 0.7088049314602112, "learning_rate": 2.626891395505571e-07, "loss": 0.5397, "step": 7060 }, { "epoch": 0.9, "grad_norm": 0.7305906116858653, "learning_rate": 2.620295570843501e-07, "loss": 0.5357, "step": 7061 }, { "epoch": 0.9, "grad_norm": 0.8256628776120437, "learning_rate": 2.6137078144514937e-07, "loss": 0.5285, "step": 7062 }, { "epoch": 0.9, "grad_norm": 0.857129435342946, "learning_rate": 2.6071281274513805e-07, "loss": 0.6252, "step": 7063 }, { "epoch": 0.9, "grad_norm": 0.6269493699640977, "learning_rate": 2.6005565109636086e-07, "loss": 0.4871, "step": 7064 }, { "epoch": 0.9, "grad_norm": 0.7914612364642583, "learning_rate": 2.5939929661072615e-07, "loss": 0.6056, "step": 7065 }, { "epoch": 0.9, "grad_norm": 1.0079455349570206, "learning_rate": 2.587437494000045e-07, "loss": 0.6326, "step": 7066 }, { "epoch": 0.9, "grad_norm": 1.3023054964119716, "learning_rate": 2.580890095758276e-07, "loss": 0.5742, "step": 7067 }, { "epoch": 0.9, "grad_norm": 0.7769674375256881, "learning_rate": 2.5743507724969196e-07, "loss": 0.6068, "step": 7068 }, { "epoch": 0.9, "grad_norm": 0.7533057660642877, "learning_rate": 2.5678195253295455e-07, "loss": 0.5454, "step": 7069 }, { "epoch": 0.9, "grad_norm": 0.6712511631210043, "learning_rate": 2.5612963553683646e-07, "loss": 0.5452, "step": 7070 }, { "epoch": 0.9, "grad_norm": 0.6758664301975796, "learning_rate": 2.554781263724199e-07, "loss": 0.5027, "step": 7071 }, { "epoch": 0.9, "grad_norm": 0.7390186762902936, "learning_rate": 2.548274251506505e-07, "loss": 0.5291, "step": 7072 }, { "epoch": 0.9, "grad_norm": 0.6643365682069159, "learning_rate": 2.541775319823359e-07, "loss": 0.5136, "step": 7073 }, { "epoch": 0.9, "grad_norm": 0.6939284519542831, "learning_rate": 2.535284469781457e-07, "loss": 0.4921, "step": 7074 }, { "epoch": 0.9, "grad_norm": 0.7192443709846023, "learning_rate": 2.5288017024861275e-07, "loss": 0.495, "step": 7075 }, { "epoch": 0.9, "grad_norm": 0.7236574478596427, "learning_rate": 2.522327019041315e-07, "loss": 0.592, "step": 7076 }, { "epoch": 0.9, "grad_norm": 0.7095519864309726, "learning_rate": 2.515860420549593e-07, "loss": 0.5323, "step": 7077 }, { "epoch": 0.9, "grad_norm": 0.664579149964256, "learning_rate": 2.509401908112158e-07, "loss": 0.5595, "step": 7078 }, { "epoch": 0.9, "grad_norm": 0.9287731271909274, "learning_rate": 2.5029514828288194e-07, "loss": 0.6237, "step": 7079 }, { "epoch": 0.9, "grad_norm": 0.9682793347658527, "learning_rate": 2.4965091457980204e-07, "loss": 0.6218, "step": 7080 }, { "epoch": 0.9, "grad_norm": 0.6187793924499251, "learning_rate": 2.490074898116829e-07, "loss": 0.4864, "step": 7081 }, { "epoch": 0.9, "grad_norm": 0.752378765205412, "learning_rate": 2.4836487408809284e-07, "loss": 0.6115, "step": 7082 }, { "epoch": 0.9, "grad_norm": 0.9326199565186433, "learning_rate": 2.4772306751846273e-07, "loss": 0.6125, "step": 7083 }, { "epoch": 0.9, "grad_norm": 0.767021366413531, "learning_rate": 2.470820702120857e-07, "loss": 0.535, "step": 7084 }, { "epoch": 0.9, "grad_norm": 0.6465448790357634, "learning_rate": 2.4644188227811615e-07, "loss": 0.4647, "step": 7085 }, { "epoch": 0.9, "grad_norm": 0.8689702530254806, "learning_rate": 2.458025038255718e-07, "loss": 0.6542, "step": 7086 }, { "epoch": 0.9, "grad_norm": 1.0249396177769077, "learning_rate": 2.4516393496333225e-07, "loss": 0.6297, "step": 7087 }, { "epoch": 0.9, "grad_norm": 0.8459240902913864, "learning_rate": 2.4452617580013827e-07, "loss": 0.6601, "step": 7088 }, { "epoch": 0.9, "grad_norm": 0.7201875641647064, "learning_rate": 2.4388922644459525e-07, "loss": 0.5253, "step": 7089 }, { "epoch": 0.9, "grad_norm": 0.7910108479186113, "learning_rate": 2.4325308700516805e-07, "loss": 0.6053, "step": 7090 }, { "epoch": 0.9, "grad_norm": 0.8791270265381143, "learning_rate": 2.4261775759018445e-07, "loss": 0.6523, "step": 7091 }, { "epoch": 0.9, "grad_norm": 0.8256320099237336, "learning_rate": 2.419832383078352e-07, "loss": 0.6083, "step": 7092 }, { "epoch": 0.9, "grad_norm": 0.7394375310809276, "learning_rate": 2.413495292661716e-07, "loss": 0.5249, "step": 7093 }, { "epoch": 0.9, "grad_norm": 0.9538122054240401, "learning_rate": 2.407166305731079e-07, "loss": 0.5523, "step": 7094 }, { "epoch": 0.9, "grad_norm": 0.8663068288282182, "learning_rate": 2.400845423364201e-07, "loss": 0.5097, "step": 7095 }, { "epoch": 0.9, "grad_norm": 0.8748138919881248, "learning_rate": 2.3945326466374653e-07, "loss": 0.5417, "step": 7096 }, { "epoch": 0.9, "grad_norm": 0.8982500360953076, "learning_rate": 2.3882279766258676e-07, "loss": 0.6382, "step": 7097 }, { "epoch": 0.9, "grad_norm": 0.795692760464459, "learning_rate": 2.381931414403027e-07, "loss": 0.6448, "step": 7098 }, { "epoch": 0.9, "grad_norm": 1.1017615614465852, "learning_rate": 2.3756429610411913e-07, "loss": 0.6318, "step": 7099 }, { "epoch": 0.9, "grad_norm": 0.8856021951495591, "learning_rate": 2.3693626176112096e-07, "loss": 0.5187, "step": 7100 }, { "epoch": 0.9, "grad_norm": 0.9442964311471541, "learning_rate": 2.3630903851825593e-07, "loss": 0.6505, "step": 7101 }, { "epoch": 0.9, "grad_norm": 0.7271985109025816, "learning_rate": 2.356826264823342e-07, "loss": 0.5709, "step": 7102 }, { "epoch": 0.9, "grad_norm": 0.6359329365587, "learning_rate": 2.3505702576002654e-07, "loss": 0.4809, "step": 7103 }, { "epoch": 0.91, "grad_norm": 0.7253713559731307, "learning_rate": 2.3443223645786662e-07, "loss": 0.5283, "step": 7104 }, { "epoch": 0.91, "grad_norm": 0.8485637511844898, "learning_rate": 2.3380825868224987e-07, "loss": 0.628, "step": 7105 }, { "epoch": 0.91, "grad_norm": 0.6863163524885713, "learning_rate": 2.3318509253943245e-07, "loss": 0.5776, "step": 7106 }, { "epoch": 0.91, "grad_norm": 0.6164989318007248, "learning_rate": 2.3256273813553387e-07, "loss": 0.5022, "step": 7107 }, { "epoch": 0.91, "grad_norm": 0.713443878747001, "learning_rate": 2.3194119557653384e-07, "loss": 0.5374, "step": 7108 }, { "epoch": 0.91, "grad_norm": 0.7591216189472731, "learning_rate": 2.3132046496827486e-07, "loss": 0.5584, "step": 7109 }, { "epoch": 0.91, "grad_norm": 0.747974880105595, "learning_rate": 2.3070054641646134e-07, "loss": 0.5796, "step": 7110 }, { "epoch": 0.91, "grad_norm": 0.8951868454865017, "learning_rate": 2.3008144002665823e-07, "loss": 0.6346, "step": 7111 }, { "epoch": 0.91, "grad_norm": 0.8349950816077631, "learning_rate": 2.2946314590429287e-07, "loss": 0.6016, "step": 7112 }, { "epoch": 0.91, "grad_norm": 0.6399477910833015, "learning_rate": 2.288456641546549e-07, "loss": 0.5322, "step": 7113 }, { "epoch": 0.91, "grad_norm": 0.6932096174340461, "learning_rate": 2.2822899488289474e-07, "loss": 0.6073, "step": 7114 }, { "epoch": 0.91, "grad_norm": 0.6732670674436677, "learning_rate": 2.2761313819402553e-07, "loss": 0.5779, "step": 7115 }, { "epoch": 0.91, "grad_norm": 1.2724381653920382, "learning_rate": 2.2699809419292007e-07, "loss": 0.6472, "step": 7116 }, { "epoch": 0.91, "grad_norm": 0.9303540914468582, "learning_rate": 2.2638386298431404e-07, "loss": 0.6572, "step": 7117 }, { "epoch": 0.91, "grad_norm": 0.7968642434156843, "learning_rate": 2.257704446728054e-07, "loss": 0.5871, "step": 7118 }, { "epoch": 0.91, "grad_norm": 0.7161511395365222, "learning_rate": 2.251578393628523e-07, "loss": 0.5825, "step": 7119 }, { "epoch": 0.91, "grad_norm": 0.8062276010121023, "learning_rate": 2.245460471587757e-07, "loss": 0.6411, "step": 7120 }, { "epoch": 0.91, "grad_norm": 0.6448369603234269, "learning_rate": 2.2393506816475618e-07, "loss": 0.5076, "step": 7121 }, { "epoch": 0.91, "grad_norm": 0.7975509266431865, "learning_rate": 2.2332490248483828e-07, "loss": 0.5272, "step": 7122 }, { "epoch": 0.91, "grad_norm": 0.8450583049842941, "learning_rate": 2.2271555022292669e-07, "loss": 0.6058, "step": 7123 }, { "epoch": 0.91, "grad_norm": 0.8186238837891533, "learning_rate": 2.2210701148278724e-07, "loss": 0.5575, "step": 7124 }, { "epoch": 0.91, "grad_norm": 0.8552212947485776, "learning_rate": 2.2149928636804817e-07, "loss": 0.5459, "step": 7125 }, { "epoch": 0.91, "grad_norm": 0.7051197562685936, "learning_rate": 2.2089237498219839e-07, "loss": 0.5758, "step": 7126 }, { "epoch": 0.91, "grad_norm": 2.235319009836637, "learning_rate": 2.2028627742858855e-07, "loss": 0.5888, "step": 7127 }, { "epoch": 0.91, "grad_norm": 0.8552803463757278, "learning_rate": 2.1968099381043106e-07, "loss": 0.6369, "step": 7128 }, { "epoch": 0.91, "grad_norm": 0.6312595287701854, "learning_rate": 2.190765242307996e-07, "loss": 0.4955, "step": 7129 }, { "epoch": 0.91, "grad_norm": 0.6625882948581738, "learning_rate": 2.1847286879262852e-07, "loss": 0.5401, "step": 7130 }, { "epoch": 0.91, "grad_norm": 0.8906231153802382, "learning_rate": 2.1787002759871446e-07, "loss": 0.6107, "step": 7131 }, { "epoch": 0.91, "grad_norm": 0.6984324581123562, "learning_rate": 2.1726800075171473e-07, "loss": 0.4947, "step": 7132 }, { "epoch": 0.91, "grad_norm": 0.8061261657145004, "learning_rate": 2.1666678835414846e-07, "loss": 0.629, "step": 7133 }, { "epoch": 0.91, "grad_norm": 0.6426059207406561, "learning_rate": 2.1606639050839596e-07, "loss": 0.5209, "step": 7134 }, { "epoch": 0.91, "grad_norm": 0.734655949442228, "learning_rate": 2.1546680731669767e-07, "loss": 0.5614, "step": 7135 }, { "epoch": 0.91, "grad_norm": 1.028804723195199, "learning_rate": 2.1486803888115805e-07, "loss": 0.626, "step": 7136 }, { "epoch": 0.91, "grad_norm": 0.8773926003625261, "learning_rate": 2.1427008530373994e-07, "loss": 0.6251, "step": 7137 }, { "epoch": 0.91, "grad_norm": 0.9811830901944624, "learning_rate": 2.1367294668626803e-07, "loss": 0.7057, "step": 7138 }, { "epoch": 0.91, "grad_norm": 0.8689753685764708, "learning_rate": 2.1307662313043043e-07, "loss": 0.5909, "step": 7139 }, { "epoch": 0.91, "grad_norm": 0.6305367710103641, "learning_rate": 2.1248111473777422e-07, "loss": 0.4956, "step": 7140 }, { "epoch": 0.91, "grad_norm": 0.6734045563812018, "learning_rate": 2.118864216097083e-07, "loss": 0.5158, "step": 7141 }, { "epoch": 0.91, "grad_norm": 0.7333230002556563, "learning_rate": 2.1129254384750275e-07, "loss": 0.563, "step": 7142 }, { "epoch": 0.91, "grad_norm": 0.7174059188948292, "learning_rate": 2.1069948155228838e-07, "loss": 0.5815, "step": 7143 }, { "epoch": 0.91, "grad_norm": 0.7389695046596599, "learning_rate": 2.1010723482505712e-07, "loss": 0.6053, "step": 7144 }, { "epoch": 0.91, "grad_norm": 0.829117219036659, "learning_rate": 2.0951580376666393e-07, "loss": 0.5968, "step": 7145 }, { "epoch": 0.91, "grad_norm": 0.6973702716805741, "learning_rate": 2.0892518847782205e-07, "loss": 0.5013, "step": 7146 }, { "epoch": 0.91, "grad_norm": 0.7223751531504962, "learning_rate": 2.0833538905910723e-07, "loss": 0.5644, "step": 7147 }, { "epoch": 0.91, "grad_norm": 0.8771362530749554, "learning_rate": 2.0774640561095682e-07, "loss": 0.5349, "step": 7148 }, { "epoch": 0.91, "grad_norm": 0.8448018995415694, "learning_rate": 2.0715823823366844e-07, "loss": 0.6504, "step": 7149 }, { "epoch": 0.91, "grad_norm": 0.8098850175375887, "learning_rate": 2.0657088702740024e-07, "loss": 0.5978, "step": 7150 }, { "epoch": 0.91, "grad_norm": 0.9019805800373341, "learning_rate": 2.0598435209217228e-07, "loss": 0.6332, "step": 7151 }, { "epoch": 0.91, "grad_norm": 0.8902668900601771, "learning_rate": 2.053986335278657e-07, "loss": 0.6241, "step": 7152 }, { "epoch": 0.91, "grad_norm": 0.7405242224551115, "learning_rate": 2.0481373143422244e-07, "loss": 0.4857, "step": 7153 }, { "epoch": 0.91, "grad_norm": 0.8218848980735411, "learning_rate": 2.0422964591084394e-07, "loss": 0.6267, "step": 7154 }, { "epoch": 0.91, "grad_norm": 0.8089294191267027, "learning_rate": 2.036463770571956e-07, "loss": 0.6374, "step": 7155 }, { "epoch": 0.91, "grad_norm": 0.7234180045107353, "learning_rate": 2.0306392497260075e-07, "loss": 0.5363, "step": 7156 }, { "epoch": 0.91, "grad_norm": 0.8849831781816686, "learning_rate": 2.0248228975624506e-07, "loss": 0.622, "step": 7157 }, { "epoch": 0.91, "grad_norm": 0.8085358743725332, "learning_rate": 2.0190147150717598e-07, "loss": 0.5461, "step": 7158 }, { "epoch": 0.91, "grad_norm": 0.752844305249223, "learning_rate": 2.013214703242994e-07, "loss": 0.5222, "step": 7159 }, { "epoch": 0.91, "grad_norm": 0.7678663736220976, "learning_rate": 2.007422863063846e-07, "loss": 0.5523, "step": 7160 }, { "epoch": 0.91, "grad_norm": 0.6080486702035511, "learning_rate": 2.0016391955205993e-07, "loss": 0.5184, "step": 7161 }, { "epoch": 0.91, "grad_norm": 0.7833129578078779, "learning_rate": 1.9958637015981552e-07, "loss": 0.5631, "step": 7162 }, { "epoch": 0.91, "grad_norm": 0.803368956510164, "learning_rate": 1.9900963822800212e-07, "loss": 0.5957, "step": 7163 }, { "epoch": 0.91, "grad_norm": 0.9315691973525265, "learning_rate": 1.9843372385483062e-07, "loss": 0.6144, "step": 7164 }, { "epoch": 0.91, "grad_norm": 0.6501987968813367, "learning_rate": 1.9785862713837312e-07, "loss": 0.557, "step": 7165 }, { "epoch": 0.91, "grad_norm": 0.6644622950023554, "learning_rate": 1.972843481765635e-07, "loss": 0.5413, "step": 7166 }, { "epoch": 0.91, "grad_norm": 0.7703229252873747, "learning_rate": 1.967108870671952e-07, "loss": 0.5275, "step": 7167 }, { "epoch": 0.91, "grad_norm": 0.845235080616675, "learning_rate": 1.961382439079229e-07, "loss": 0.5838, "step": 7168 }, { "epoch": 0.91, "grad_norm": 0.9493000787658042, "learning_rate": 1.955664187962608e-07, "loss": 0.6042, "step": 7169 }, { "epoch": 0.91, "grad_norm": 0.7990135664535877, "learning_rate": 1.9499541182958604e-07, "loss": 0.6013, "step": 7170 }, { "epoch": 0.91, "grad_norm": 0.9409366215620388, "learning_rate": 1.9442522310513355e-07, "loss": 0.5653, "step": 7171 }, { "epoch": 0.91, "grad_norm": 0.7415705644774591, "learning_rate": 1.9385585272000241e-07, "loss": 0.5289, "step": 7172 }, { "epoch": 0.91, "grad_norm": 0.8663803473444577, "learning_rate": 1.9328730077114889e-07, "loss": 0.6092, "step": 7173 }, { "epoch": 0.91, "grad_norm": 0.6466471897218875, "learning_rate": 1.9271956735539222e-07, "loss": 0.491, "step": 7174 }, { "epoch": 0.91, "grad_norm": 0.9657893941012731, "learning_rate": 1.9215265256941117e-07, "loss": 0.6575, "step": 7175 }, { "epoch": 0.91, "grad_norm": 0.7102076254729801, "learning_rate": 1.915865565097458e-07, "loss": 0.528, "step": 7176 }, { "epoch": 0.91, "grad_norm": 0.6270302751772797, "learning_rate": 1.9102127927279613e-07, "loss": 0.5478, "step": 7177 }, { "epoch": 0.91, "grad_norm": 0.721118094924588, "learning_rate": 1.9045682095482298e-07, "loss": 0.5566, "step": 7178 }, { "epoch": 0.91, "grad_norm": 0.9338382777522265, "learning_rate": 1.898931816519478e-07, "loss": 0.6155, "step": 7179 }, { "epoch": 0.91, "grad_norm": 0.828588403972346, "learning_rate": 1.8933036146015271e-07, "loss": 0.6461, "step": 7180 }, { "epoch": 0.91, "grad_norm": 0.8703385692153343, "learning_rate": 1.8876836047527937e-07, "loss": 0.5794, "step": 7181 }, { "epoch": 0.91, "grad_norm": 0.8499247307989124, "learning_rate": 1.8820717879303173e-07, "loss": 0.5769, "step": 7182 }, { "epoch": 0.92, "grad_norm": 0.8346166661850465, "learning_rate": 1.876468165089723e-07, "loss": 0.5915, "step": 7183 }, { "epoch": 0.92, "grad_norm": 0.9179178226621876, "learning_rate": 1.8708727371852531e-07, "loss": 0.6033, "step": 7184 }, { "epoch": 0.92, "grad_norm": 0.6874965214042009, "learning_rate": 1.8652855051697504e-07, "loss": 0.5139, "step": 7185 }, { "epoch": 0.92, "grad_norm": 0.8641984261272758, "learning_rate": 1.8597064699946654e-07, "loss": 0.6015, "step": 7186 }, { "epoch": 0.92, "grad_norm": 0.6971071038458619, "learning_rate": 1.8541356326100436e-07, "loss": 0.5064, "step": 7187 }, { "epoch": 0.92, "grad_norm": 0.7535879680846994, "learning_rate": 1.8485729939645425e-07, "loss": 0.523, "step": 7188 }, { "epoch": 0.92, "grad_norm": 0.7943957796459852, "learning_rate": 1.8430185550054214e-07, "loss": 0.5439, "step": 7189 }, { "epoch": 0.92, "grad_norm": 0.7762325154146982, "learning_rate": 1.8374723166785456e-07, "loss": 0.6019, "step": 7190 }, { "epoch": 0.92, "grad_norm": 0.7455010974633227, "learning_rate": 1.8319342799283824e-07, "loss": 0.5829, "step": 7191 }, { "epoch": 0.92, "grad_norm": 0.7681783232511509, "learning_rate": 1.826404445697999e-07, "loss": 0.4493, "step": 7192 }, { "epoch": 0.92, "grad_norm": 0.7840365994342315, "learning_rate": 1.8208828149290702e-07, "loss": 0.5468, "step": 7193 }, { "epoch": 0.92, "grad_norm": 0.7118399932943732, "learning_rate": 1.8153693885618718e-07, "loss": 0.5628, "step": 7194 }, { "epoch": 0.92, "grad_norm": 0.6103364414898095, "learning_rate": 1.80986416753528e-07, "loss": 0.5268, "step": 7195 }, { "epoch": 0.92, "grad_norm": 0.7428047166651373, "learning_rate": 1.8043671527867846e-07, "loss": 0.5388, "step": 7196 }, { "epoch": 0.92, "grad_norm": 0.6431049238974033, "learning_rate": 1.7988783452524583e-07, "loss": 0.5188, "step": 7197 }, { "epoch": 0.92, "grad_norm": 0.8226918770684055, "learning_rate": 1.7933977458669982e-07, "loss": 0.6034, "step": 7198 }, { "epoch": 0.92, "grad_norm": 0.9584408885605662, "learning_rate": 1.7879253555636855e-07, "loss": 0.6481, "step": 7199 }, { "epoch": 0.92, "grad_norm": 0.8108181407093012, "learning_rate": 1.7824611752744193e-07, "loss": 0.6093, "step": 7200 }, { "epoch": 0.92, "grad_norm": 0.7789346752929458, "learning_rate": 1.7770052059296882e-07, "loss": 0.5973, "step": 7201 }, { "epoch": 0.92, "grad_norm": 0.8033982393375146, "learning_rate": 1.7715574484585885e-07, "loss": 0.6144, "step": 7202 }, { "epoch": 0.92, "grad_norm": 0.6650795973550269, "learning_rate": 1.766117903788811e-07, "loss": 0.549, "step": 7203 }, { "epoch": 0.92, "grad_norm": 0.9048984414670657, "learning_rate": 1.7606865728466592e-07, "loss": 0.5862, "step": 7204 }, { "epoch": 0.92, "grad_norm": 0.6989273872268915, "learning_rate": 1.7552634565570325e-07, "loss": 0.544, "step": 7205 }, { "epoch": 0.92, "grad_norm": 0.800429379818265, "learning_rate": 1.7498485558434252e-07, "loss": 0.5694, "step": 7206 }, { "epoch": 0.92, "grad_norm": 0.733207102092401, "learning_rate": 1.7444418716279443e-07, "loss": 0.5328, "step": 7207 }, { "epoch": 0.92, "grad_norm": 0.8065312252122538, "learning_rate": 1.739043404831292e-07, "loss": 0.5699, "step": 7208 }, { "epoch": 0.92, "grad_norm": 0.7543913815098466, "learning_rate": 1.7336531563727664e-07, "loss": 0.6131, "step": 7209 }, { "epoch": 0.92, "grad_norm": 0.8908361515944642, "learning_rate": 1.7282711271702723e-07, "loss": 0.6604, "step": 7210 }, { "epoch": 0.92, "grad_norm": 0.9352180956734087, "learning_rate": 1.7228973181403153e-07, "loss": 0.6345, "step": 7211 }, { "epoch": 0.92, "grad_norm": 0.8337364430948481, "learning_rate": 1.7175317301979967e-07, "loss": 0.6209, "step": 7212 }, { "epoch": 0.92, "grad_norm": 0.7236231476194895, "learning_rate": 1.7121743642570244e-07, "loss": 0.5255, "step": 7213 }, { "epoch": 0.92, "grad_norm": 0.6804012606244086, "learning_rate": 1.7068252212296966e-07, "loss": 0.5429, "step": 7214 }, { "epoch": 0.92, "grad_norm": 0.7443964473025415, "learning_rate": 1.7014843020269178e-07, "loss": 0.5419, "step": 7215 }, { "epoch": 0.92, "grad_norm": 0.7612930878922145, "learning_rate": 1.696151607558194e-07, "loss": 0.6044, "step": 7216 }, { "epoch": 0.92, "grad_norm": 1.0337458171805556, "learning_rate": 1.6908271387316267e-07, "loss": 0.6532, "step": 7217 }, { "epoch": 0.92, "grad_norm": 0.6108009146553584, "learning_rate": 1.6855108964539124e-07, "loss": 0.4885, "step": 7218 }, { "epoch": 0.92, "grad_norm": 0.7919708868274475, "learning_rate": 1.6802028816303606e-07, "loss": 0.5667, "step": 7219 }, { "epoch": 0.92, "grad_norm": 0.9418347945434669, "learning_rate": 1.6749030951648647e-07, "loss": 0.6393, "step": 7220 }, { "epoch": 0.92, "grad_norm": 0.9298620630681734, "learning_rate": 1.6696115379599255e-07, "loss": 0.635, "step": 7221 }, { "epoch": 0.92, "grad_norm": 0.7577334959722177, "learning_rate": 1.6643282109166326e-07, "loss": 0.5552, "step": 7222 }, { "epoch": 0.92, "grad_norm": 0.8401159965497546, "learning_rate": 1.659053114934689e-07, "loss": 0.643, "step": 7223 }, { "epoch": 0.92, "grad_norm": 0.7632485871117627, "learning_rate": 1.6537862509123924e-07, "loss": 0.5098, "step": 7224 }, { "epoch": 0.92, "grad_norm": 0.6836376357237847, "learning_rate": 1.648527619746626e-07, "loss": 0.5736, "step": 7225 }, { "epoch": 0.92, "grad_norm": 0.856336635320358, "learning_rate": 1.6432772223328898e-07, "loss": 0.6342, "step": 7226 }, { "epoch": 0.92, "grad_norm": 0.8174785267813208, "learning_rate": 1.6380350595652628e-07, "loss": 0.582, "step": 7227 }, { "epoch": 0.92, "grad_norm": 0.8619754409539745, "learning_rate": 1.632801132336431e-07, "loss": 0.5851, "step": 7228 }, { "epoch": 0.92, "grad_norm": 0.8209396215900154, "learning_rate": 1.627575441537682e-07, "loss": 0.6033, "step": 7229 }, { "epoch": 0.92, "grad_norm": 0.7839055606645394, "learning_rate": 1.622357988058898e-07, "loss": 0.5837, "step": 7230 }, { "epoch": 0.92, "grad_norm": 0.7393864397438376, "learning_rate": 1.6171487727885515e-07, "loss": 0.473, "step": 7231 }, { "epoch": 0.92, "grad_norm": 0.6820447291478476, "learning_rate": 1.611947796613722e-07, "loss": 0.5325, "step": 7232 }, { "epoch": 0.92, "grad_norm": 0.8375674756471989, "learning_rate": 1.6067550604200787e-07, "loss": 0.5889, "step": 7233 }, { "epoch": 0.92, "grad_norm": 0.6278770645329156, "learning_rate": 1.601570565091892e-07, "loss": 0.4811, "step": 7234 }, { "epoch": 0.92, "grad_norm": 0.9216177944482743, "learning_rate": 1.5963943115120284e-07, "loss": 0.6323, "step": 7235 }, { "epoch": 0.92, "grad_norm": 0.662379481099137, "learning_rate": 1.5912263005619433e-07, "loss": 0.5101, "step": 7236 }, { "epoch": 0.92, "grad_norm": 1.0434824488140055, "learning_rate": 1.5860665331217052e-07, "loss": 0.6077, "step": 7237 }, { "epoch": 0.92, "grad_norm": 0.6463294416645726, "learning_rate": 1.580915010069961e-07, "loss": 0.5412, "step": 7238 }, { "epoch": 0.92, "grad_norm": 0.79385198066356, "learning_rate": 1.575771732283965e-07, "loss": 0.66, "step": 7239 }, { "epoch": 0.92, "grad_norm": 0.811795868922921, "learning_rate": 1.5706367006395607e-07, "loss": 0.5595, "step": 7240 }, { "epoch": 0.92, "grad_norm": 0.9237649889918583, "learning_rate": 1.5655099160111986e-07, "loss": 0.6342, "step": 7241 }, { "epoch": 0.92, "grad_norm": 0.9513765092354893, "learning_rate": 1.5603913792719083e-07, "loss": 0.6577, "step": 7242 }, { "epoch": 0.92, "grad_norm": 0.8941049062976215, "learning_rate": 1.5552810912933202e-07, "loss": 0.6472, "step": 7243 }, { "epoch": 0.92, "grad_norm": 0.762418050132673, "learning_rate": 1.5501790529456772e-07, "loss": 0.6102, "step": 7244 }, { "epoch": 0.92, "grad_norm": 0.7413108397970571, "learning_rate": 1.54508526509779e-07, "loss": 0.5173, "step": 7245 }, { "epoch": 0.92, "grad_norm": 0.9122060565943907, "learning_rate": 1.5399997286170865e-07, "loss": 0.5982, "step": 7246 }, { "epoch": 0.92, "grad_norm": 0.7603507605809376, "learning_rate": 1.5349224443695742e-07, "loss": 0.5359, "step": 7247 }, { "epoch": 0.92, "grad_norm": 0.8170087182992468, "learning_rate": 1.5298534132198616e-07, "loss": 0.5981, "step": 7248 }, { "epoch": 0.92, "grad_norm": 0.7791241193311133, "learning_rate": 1.5247926360311528e-07, "loss": 0.6004, "step": 7249 }, { "epoch": 0.92, "grad_norm": 0.7312295761256361, "learning_rate": 1.5197401136652523e-07, "loss": 0.5328, "step": 7250 }, { "epoch": 0.92, "grad_norm": 0.6092954745272051, "learning_rate": 1.5146958469825446e-07, "loss": 0.4666, "step": 7251 }, { "epoch": 0.92, "grad_norm": 0.8649372402444326, "learning_rate": 1.50965983684202e-07, "loss": 0.5055, "step": 7252 }, { "epoch": 0.92, "grad_norm": 0.8306144127054994, "learning_rate": 1.5046320841012541e-07, "loss": 0.6647, "step": 7253 }, { "epoch": 0.92, "grad_norm": 0.7027455000400481, "learning_rate": 1.4996125896164226e-07, "loss": 0.5335, "step": 7254 }, { "epoch": 0.92, "grad_norm": 0.8171187402406849, "learning_rate": 1.4946013542422976e-07, "loss": 0.5794, "step": 7255 }, { "epoch": 0.92, "grad_norm": 0.7163602134643401, "learning_rate": 1.4895983788322298e-07, "loss": 0.5958, "step": 7256 }, { "epoch": 0.92, "grad_norm": 0.6363625728689332, "learning_rate": 1.4846036642381878e-07, "loss": 0.5228, "step": 7257 }, { "epoch": 0.92, "grad_norm": 0.7820200790663094, "learning_rate": 1.4796172113107076e-07, "loss": 0.5387, "step": 7258 }, { "epoch": 0.92, "grad_norm": 0.8966288364832229, "learning_rate": 1.474639020898938e-07, "loss": 0.6284, "step": 7259 }, { "epoch": 0.92, "grad_norm": 1.7253105896826562, "learning_rate": 1.4696690938506063e-07, "loss": 0.663, "step": 7260 }, { "epoch": 0.93, "grad_norm": 0.8053078621884595, "learning_rate": 1.4647074310120467e-07, "loss": 0.5073, "step": 7261 }, { "epoch": 0.93, "grad_norm": 0.7558488047770283, "learning_rate": 1.4597540332281724e-07, "loss": 0.5786, "step": 7262 }, { "epoch": 0.93, "grad_norm": 0.8031110169382346, "learning_rate": 1.454808901342497e-07, "loss": 0.5034, "step": 7263 }, { "epoch": 0.93, "grad_norm": 0.7999272606864025, "learning_rate": 1.4498720361971251e-07, "loss": 0.6249, "step": 7264 }, { "epoch": 0.93, "grad_norm": 0.7622721677428819, "learning_rate": 1.444943438632751e-07, "loss": 0.5436, "step": 7265 }, { "epoch": 0.93, "grad_norm": 0.7103629644539494, "learning_rate": 1.4400231094886696e-07, "loss": 0.5383, "step": 7266 }, { "epoch": 0.93, "grad_norm": 0.7517678667871269, "learning_rate": 1.435111049602761e-07, "loss": 0.5413, "step": 7267 }, { "epoch": 0.93, "grad_norm": 0.7716418733229863, "learning_rate": 1.4302072598114947e-07, "loss": 0.5301, "step": 7268 }, { "epoch": 0.93, "grad_norm": 0.7210693409852775, "learning_rate": 1.4253117409499307e-07, "loss": 0.5437, "step": 7269 }, { "epoch": 0.93, "grad_norm": 0.6810966492044397, "learning_rate": 1.4204244938517353e-07, "loss": 0.5174, "step": 7270 }, { "epoch": 0.93, "grad_norm": 0.7882773009113102, "learning_rate": 1.4155455193491485e-07, "loss": 0.5773, "step": 7271 }, { "epoch": 0.93, "grad_norm": 0.7625913996140953, "learning_rate": 1.4106748182730057e-07, "loss": 0.5178, "step": 7272 }, { "epoch": 0.93, "grad_norm": 0.7765444837546138, "learning_rate": 1.4058123914527489e-07, "loss": 0.5614, "step": 7273 }, { "epoch": 0.93, "grad_norm": 0.8589502591680025, "learning_rate": 1.4009582397163878e-07, "loss": 0.6607, "step": 7274 }, { "epoch": 0.93, "grad_norm": 0.6871440011206116, "learning_rate": 1.3961123638905338e-07, "loss": 0.5279, "step": 7275 }, { "epoch": 0.93, "grad_norm": 0.6566957303391037, "learning_rate": 1.3912747648003933e-07, "loss": 0.5171, "step": 7276 }, { "epoch": 0.93, "grad_norm": 0.8791288500475686, "learning_rate": 1.3864454432697626e-07, "loss": 0.6833, "step": 7277 }, { "epoch": 0.93, "grad_norm": 0.6856361885371615, "learning_rate": 1.3816244001210177e-07, "loss": 0.4997, "step": 7278 }, { "epoch": 0.93, "grad_norm": 0.8135724637012066, "learning_rate": 1.376811636175135e-07, "loss": 0.6011, "step": 7279 }, { "epoch": 0.93, "grad_norm": 0.695346015847785, "learning_rate": 1.372007152251681e-07, "loss": 0.5035, "step": 7280 }, { "epoch": 0.93, "grad_norm": 0.7426620018324094, "learning_rate": 1.3672109491688068e-07, "loss": 0.5772, "step": 7281 }, { "epoch": 0.93, "grad_norm": 0.874240425056211, "learning_rate": 1.3624230277432538e-07, "loss": 0.5638, "step": 7282 }, { "epoch": 0.93, "grad_norm": 0.8266477060745181, "learning_rate": 1.357643388790353e-07, "loss": 0.6329, "step": 7283 }, { "epoch": 0.93, "grad_norm": 0.8310711632642739, "learning_rate": 1.3528720331240363e-07, "loss": 0.6437, "step": 7284 }, { "epoch": 0.93, "grad_norm": 0.7857312126715882, "learning_rate": 1.3481089615568044e-07, "loss": 0.542, "step": 7285 }, { "epoch": 0.93, "grad_norm": 0.6357937854081518, "learning_rate": 1.3433541748997692e-07, "loss": 0.4875, "step": 7286 }, { "epoch": 0.93, "grad_norm": 0.8116090325592704, "learning_rate": 1.3386076739626164e-07, "loss": 0.5983, "step": 7287 }, { "epoch": 0.93, "grad_norm": 0.7126544222392629, "learning_rate": 1.3338694595536218e-07, "loss": 0.5082, "step": 7288 }, { "epoch": 0.93, "grad_norm": 0.8862799074073311, "learning_rate": 1.3291395324796618e-07, "loss": 0.6758, "step": 7289 }, { "epoch": 0.93, "grad_norm": 0.8850685983174424, "learning_rate": 1.3244178935461926e-07, "loss": 0.5197, "step": 7290 }, { "epoch": 0.93, "grad_norm": 0.6416868278447686, "learning_rate": 1.3197045435572487e-07, "loss": 0.4744, "step": 7291 }, { "epoch": 0.93, "grad_norm": 0.6065229160659318, "learning_rate": 1.3149994833154768e-07, "loss": 0.515, "step": 7292 }, { "epoch": 0.93, "grad_norm": 0.6255487609177446, "learning_rate": 1.310302713622097e-07, "loss": 0.4644, "step": 7293 }, { "epoch": 0.93, "grad_norm": 0.7671393331462449, "learning_rate": 1.3056142352769253e-07, "loss": 0.5709, "step": 7294 }, { "epoch": 0.93, "grad_norm": 0.8288261003774176, "learning_rate": 1.3009340490783507e-07, "loss": 0.5772, "step": 7295 }, { "epoch": 0.93, "grad_norm": 0.8965150342414727, "learning_rate": 1.2962621558233635e-07, "loss": 0.6201, "step": 7296 }, { "epoch": 0.93, "grad_norm": 0.7358651322956123, "learning_rate": 1.2915985563075383e-07, "loss": 0.5704, "step": 7297 }, { "epoch": 0.93, "grad_norm": 0.5996505589400662, "learning_rate": 1.2869432513250403e-07, "loss": 0.4781, "step": 7298 }, { "epoch": 0.93, "grad_norm": 0.8826346651961483, "learning_rate": 1.282296241668618e-07, "loss": 0.6141, "step": 7299 }, { "epoch": 0.93, "grad_norm": 0.6917842156335661, "learning_rate": 1.2776575281296055e-07, "loss": 0.5725, "step": 7300 }, { "epoch": 0.93, "grad_norm": 0.8483391325350825, "learning_rate": 1.2730271114979375e-07, "loss": 0.5803, "step": 7301 }, { "epoch": 0.93, "grad_norm": 0.662984599520145, "learning_rate": 1.268404992562111e-07, "loss": 0.4692, "step": 7302 }, { "epoch": 0.93, "grad_norm": 0.716657251695465, "learning_rate": 1.2637911721092356e-07, "loss": 0.5637, "step": 7303 }, { "epoch": 0.93, "grad_norm": 1.0023802468834073, "learning_rate": 1.2591856509249935e-07, "loss": 0.6214, "step": 7304 }, { "epoch": 0.93, "grad_norm": 0.6850418100402297, "learning_rate": 1.2545884297936572e-07, "loss": 0.5286, "step": 7305 }, { "epoch": 0.93, "grad_norm": 0.7705106271550255, "learning_rate": 1.2499995094980843e-07, "loss": 0.5944, "step": 7306 }, { "epoch": 0.93, "grad_norm": 0.7177916983823144, "learning_rate": 1.245418890819716e-07, "loss": 0.5682, "step": 7307 }, { "epoch": 0.93, "grad_norm": 0.8325556302563488, "learning_rate": 1.24084657453859e-07, "loss": 0.6101, "step": 7308 }, { "epoch": 0.93, "grad_norm": 0.6735129378054524, "learning_rate": 1.2362825614333275e-07, "loss": 0.5494, "step": 7309 }, { "epoch": 0.93, "grad_norm": 0.6406439998324431, "learning_rate": 1.2317268522811286e-07, "loss": 0.5456, "step": 7310 }, { "epoch": 0.93, "grad_norm": 0.8626057098563875, "learning_rate": 1.2271794478577847e-07, "loss": 0.6458, "step": 7311 }, { "epoch": 0.93, "grad_norm": 0.7293757717691276, "learning_rate": 1.222640348937665e-07, "loss": 0.543, "step": 7312 }, { "epoch": 0.93, "grad_norm": 0.7205390701467687, "learning_rate": 1.218109556293734e-07, "loss": 0.5554, "step": 7313 }, { "epoch": 0.93, "grad_norm": 0.6394006074449649, "learning_rate": 1.2135870706975416e-07, "loss": 0.488, "step": 7314 }, { "epoch": 0.93, "grad_norm": 0.6875498888448462, "learning_rate": 1.209072892919222e-07, "loss": 0.5286, "step": 7315 }, { "epoch": 0.93, "grad_norm": 0.8722385122980011, "learning_rate": 1.204567023727482e-07, "loss": 0.556, "step": 7316 }, { "epoch": 0.93, "grad_norm": 0.8141119333855233, "learning_rate": 1.200069463889636e-07, "loss": 0.6031, "step": 7317 }, { "epoch": 0.93, "grad_norm": 0.8181392856626931, "learning_rate": 1.19558021417156e-07, "loss": 0.6232, "step": 7318 }, { "epoch": 0.93, "grad_norm": 0.8190091678953136, "learning_rate": 1.191099275337737e-07, "loss": 0.579, "step": 7319 }, { "epoch": 0.93, "grad_norm": 1.0702031458544996, "learning_rate": 1.1866266481512234e-07, "loss": 0.6566, "step": 7320 }, { "epoch": 0.93, "grad_norm": 0.9393952041689086, "learning_rate": 1.1821623333736487e-07, "loss": 0.6161, "step": 7321 }, { "epoch": 0.93, "grad_norm": 0.892329138021765, "learning_rate": 1.1777063317652549e-07, "loss": 0.7195, "step": 7322 }, { "epoch": 0.93, "grad_norm": 0.6428168154385282, "learning_rate": 1.1732586440848459e-07, "loss": 0.5451, "step": 7323 }, { "epoch": 0.93, "grad_norm": 0.7467473564336984, "learning_rate": 1.168819271089816e-07, "loss": 0.4558, "step": 7324 }, { "epoch": 0.93, "grad_norm": 0.6996962877786095, "learning_rate": 1.1643882135361384e-07, "loss": 0.5489, "step": 7325 }, { "epoch": 0.93, "grad_norm": 0.729384449939817, "learning_rate": 1.159965472178387e-07, "loss": 0.4909, "step": 7326 }, { "epoch": 0.93, "grad_norm": 0.9210812763635579, "learning_rate": 1.155551047769704e-07, "loss": 0.6422, "step": 7327 }, { "epoch": 0.93, "grad_norm": 0.7522939144579935, "learning_rate": 1.1511449410618214e-07, "loss": 0.547, "step": 7328 }, { "epoch": 0.93, "grad_norm": 0.9392851914349472, "learning_rate": 1.14674715280505e-07, "loss": 0.6285, "step": 7329 }, { "epoch": 0.93, "grad_norm": 0.8490034260807524, "learning_rate": 1.1423576837482908e-07, "loss": 0.6993, "step": 7330 }, { "epoch": 0.93, "grad_norm": 0.7692444173967874, "learning_rate": 1.1379765346390181e-07, "loss": 0.5675, "step": 7331 }, { "epoch": 0.93, "grad_norm": 0.7618326757462582, "learning_rate": 1.1336037062233075e-07, "loss": 0.5529, "step": 7332 }, { "epoch": 0.93, "grad_norm": 0.9470098883084807, "learning_rate": 1.1292391992457907e-07, "loss": 0.6862, "step": 7333 }, { "epoch": 0.93, "grad_norm": 0.9390875430241059, "learning_rate": 1.124883014449707e-07, "loss": 0.6755, "step": 7334 }, { "epoch": 0.93, "grad_norm": 0.8053956607457149, "learning_rate": 1.1205351525768737e-07, "loss": 0.5763, "step": 7335 }, { "epoch": 0.93, "grad_norm": 0.8040419476669075, "learning_rate": 1.1161956143676822e-07, "loss": 0.6063, "step": 7336 }, { "epoch": 0.93, "grad_norm": 0.8922416387459771, "learning_rate": 1.1118644005611079e-07, "loss": 0.62, "step": 7337 }, { "epoch": 0.93, "grad_norm": 0.6470085687473525, "learning_rate": 1.1075415118947108e-07, "loss": 0.4567, "step": 7338 }, { "epoch": 0.93, "grad_norm": 0.6174593204703737, "learning_rate": 1.103226949104641e-07, "loss": 0.4962, "step": 7339 }, { "epoch": 0.94, "grad_norm": 0.7269615687151015, "learning_rate": 1.0989207129256163e-07, "loss": 0.5989, "step": 7340 }, { "epoch": 0.94, "grad_norm": 0.8502106075474133, "learning_rate": 1.0946228040909445e-07, "loss": 0.6283, "step": 7341 }, { "epoch": 0.94, "grad_norm": 0.6486284023748357, "learning_rate": 1.0903332233325181e-07, "loss": 0.4695, "step": 7342 }, { "epoch": 0.94, "grad_norm": 0.7905882462156122, "learning_rate": 1.0860519713808082e-07, "loss": 0.601, "step": 7343 }, { "epoch": 0.94, "grad_norm": 0.7528406797193351, "learning_rate": 1.081779048964865e-07, "loss": 0.5193, "step": 7344 }, { "epoch": 0.94, "grad_norm": 0.7405114275518584, "learning_rate": 1.077514456812323e-07, "loss": 0.532, "step": 7345 }, { "epoch": 0.94, "grad_norm": 0.73006527424087, "learning_rate": 1.0732581956494015e-07, "loss": 0.5076, "step": 7346 }, { "epoch": 0.94, "grad_norm": 0.7338725281136042, "learning_rate": 1.0690102662008928e-07, "loss": 0.5611, "step": 7347 }, { "epoch": 0.94, "grad_norm": 0.5958310173568875, "learning_rate": 1.0647706691901738e-07, "loss": 0.5405, "step": 7348 }, { "epoch": 0.94, "grad_norm": 0.7392212911350581, "learning_rate": 1.060539405339206e-07, "loss": 0.5911, "step": 7349 }, { "epoch": 0.94, "grad_norm": 0.9405858773290489, "learning_rate": 1.0563164753685296e-07, "loss": 0.6476, "step": 7350 }, { "epoch": 0.94, "grad_norm": 0.7958212301674082, "learning_rate": 1.0521018799972693e-07, "loss": 0.5629, "step": 7351 }, { "epoch": 0.94, "grad_norm": 0.7013041877092918, "learning_rate": 1.0478956199431233e-07, "loss": 0.5491, "step": 7352 }, { "epoch": 0.94, "grad_norm": 0.7769767823289179, "learning_rate": 1.0436976959223688e-07, "loss": 0.5288, "step": 7353 }, { "epoch": 0.94, "grad_norm": 0.7095681781276898, "learning_rate": 1.0395081086498781e-07, "loss": 0.5226, "step": 7354 }, { "epoch": 0.94, "grad_norm": 0.8854674966142587, "learning_rate": 1.0353268588390863e-07, "loss": 0.6602, "step": 7355 }, { "epoch": 0.94, "grad_norm": 0.6555574370439939, "learning_rate": 1.0311539472020238e-07, "loss": 0.4829, "step": 7356 }, { "epoch": 0.94, "grad_norm": 0.7159034068352127, "learning_rate": 1.0269893744492887e-07, "loss": 0.5749, "step": 7357 }, { "epoch": 0.94, "grad_norm": 0.666760197261771, "learning_rate": 1.0228331412900693e-07, "loss": 0.4956, "step": 7358 }, { "epoch": 0.94, "grad_norm": 0.9035166141226554, "learning_rate": 1.0186852484321218e-07, "loss": 0.5535, "step": 7359 }, { "epoch": 0.94, "grad_norm": 0.7517025558418778, "learning_rate": 1.0145456965817868e-07, "loss": 0.5361, "step": 7360 }, { "epoch": 0.94, "grad_norm": 0.7588075554988031, "learning_rate": 1.0104144864440002e-07, "loss": 0.5532, "step": 7361 }, { "epoch": 0.94, "grad_norm": 0.6815012408779785, "learning_rate": 1.0062916187222549e-07, "loss": 0.488, "step": 7362 }, { "epoch": 0.94, "grad_norm": 1.0222338230754746, "learning_rate": 1.0021770941186393e-07, "loss": 0.6169, "step": 7363 }, { "epoch": 0.94, "grad_norm": 0.8091819969509825, "learning_rate": 9.98070913333804e-08, "loss": 0.6313, "step": 7364 }, { "epoch": 0.94, "grad_norm": 0.7173543941400221, "learning_rate": 9.939730770669953e-08, "loss": 0.5647, "step": 7365 }, { "epoch": 0.94, "grad_norm": 0.7284321572404366, "learning_rate": 9.898835860160272e-08, "loss": 0.5762, "step": 7366 }, { "epoch": 0.94, "grad_norm": 0.6400188578213188, "learning_rate": 9.858024408773036e-08, "loss": 0.5446, "step": 7367 }, { "epoch": 0.94, "grad_norm": 0.7736441661010852, "learning_rate": 9.817296423457968e-08, "loss": 0.5464, "step": 7368 }, { "epoch": 0.94, "grad_norm": 0.8063318126842265, "learning_rate": 9.776651911150681e-08, "loss": 0.6151, "step": 7369 }, { "epoch": 0.94, "grad_norm": 0.9714943737295673, "learning_rate": 9.73609087877242e-08, "loss": 0.635, "step": 7370 }, { "epoch": 0.94, "grad_norm": 0.9055985251724772, "learning_rate": 9.69561333323038e-08, "loss": 0.6246, "step": 7371 }, { "epoch": 0.94, "grad_norm": 0.7802595122283081, "learning_rate": 9.655219281417382e-08, "loss": 0.5992, "step": 7372 }, { "epoch": 0.94, "grad_norm": 0.7642877634701126, "learning_rate": 9.614908730212202e-08, "loss": 0.6061, "step": 7373 }, { "epoch": 0.94, "grad_norm": 0.7614338289147581, "learning_rate": 9.574681686479237e-08, "loss": 0.5357, "step": 7374 }, { "epoch": 0.94, "grad_norm": 0.7084461885573448, "learning_rate": 9.53453815706884e-08, "loss": 0.5143, "step": 7375 }, { "epoch": 0.94, "grad_norm": 0.7294685552871213, "learning_rate": 9.494478148816877e-08, "loss": 0.5369, "step": 7376 }, { "epoch": 0.94, "grad_norm": 0.6528965384956722, "learning_rate": 9.454501668545279e-08, "loss": 0.4736, "step": 7377 }, { "epoch": 0.94, "grad_norm": 0.6911121271333714, "learning_rate": 9.414608723061602e-08, "loss": 0.5119, "step": 7378 }, { "epoch": 0.94, "grad_norm": 0.6698477467275437, "learning_rate": 9.374799319159134e-08, "loss": 0.5089, "step": 7379 }, { "epoch": 0.94, "grad_norm": 0.858908076645722, "learning_rate": 9.335073463617062e-08, "loss": 0.5925, "step": 7380 }, { "epoch": 0.94, "grad_norm": 0.9747074207088203, "learning_rate": 9.295431163200308e-08, "loss": 0.6454, "step": 7381 }, { "epoch": 0.94, "grad_norm": 0.9200028770155853, "learning_rate": 9.255872424659418e-08, "loss": 0.6253, "step": 7382 }, { "epoch": 0.94, "grad_norm": 0.7470864329520069, "learning_rate": 9.216397254731002e-08, "loss": 0.581, "step": 7383 }, { "epoch": 0.94, "grad_norm": 0.623530278497163, "learning_rate": 9.177005660137129e-08, "loss": 0.5032, "step": 7384 }, { "epoch": 0.94, "grad_norm": 0.7311882987243897, "learning_rate": 9.137697647585875e-08, "loss": 0.5648, "step": 7385 }, { "epoch": 0.94, "grad_norm": 0.7830188672111648, "learning_rate": 9.098473223770943e-08, "loss": 0.6198, "step": 7386 }, { "epoch": 0.94, "grad_norm": 0.6719440031138225, "learning_rate": 9.059332395371768e-08, "loss": 0.4882, "step": 7387 }, { "epoch": 0.94, "grad_norm": 0.736446488086922, "learning_rate": 9.020275169053793e-08, "loss": 0.4992, "step": 7388 }, { "epoch": 0.94, "grad_norm": 0.8108198488966872, "learning_rate": 8.981301551467924e-08, "loss": 0.5367, "step": 7389 }, { "epoch": 0.94, "grad_norm": 0.6705747796265014, "learning_rate": 8.942411549251018e-08, "loss": 0.4735, "step": 7390 }, { "epoch": 0.94, "grad_norm": 0.9262665393606445, "learning_rate": 8.903605169025609e-08, "loss": 0.5458, "step": 7391 }, { "epoch": 0.94, "grad_norm": 0.6807210799599605, "learning_rate": 8.864882417400078e-08, "loss": 0.5708, "step": 7392 }, { "epoch": 0.94, "grad_norm": 0.7040951620549809, "learning_rate": 8.826243300968374e-08, "loss": 0.5729, "step": 7393 }, { "epoch": 0.94, "grad_norm": 0.7551867210440104, "learning_rate": 8.787687826310509e-08, "loss": 0.6155, "step": 7394 }, { "epoch": 0.94, "grad_norm": 0.7681511535557327, "learning_rate": 8.749215999991956e-08, "loss": 0.5192, "step": 7395 }, { "epoch": 0.94, "grad_norm": 0.8799553300687178, "learning_rate": 8.710827828564139e-08, "loss": 0.5438, "step": 7396 }, { "epoch": 0.94, "grad_norm": 0.8819019453552943, "learning_rate": 8.672523318564108e-08, "loss": 0.5513, "step": 7397 }, { "epoch": 0.94, "grad_norm": 0.6555108477083655, "learning_rate": 8.634302476514755e-08, "loss": 0.4952, "step": 7398 }, { "epoch": 0.94, "grad_norm": 0.8049771846196506, "learning_rate": 8.596165308924708e-08, "loss": 0.4966, "step": 7399 }, { "epoch": 0.94, "grad_norm": 0.6445621320653898, "learning_rate": 8.558111822288217e-08, "loss": 0.5245, "step": 7400 }, { "epoch": 0.94, "grad_norm": 0.8071006753641993, "learning_rate": 8.520142023085543e-08, "loss": 0.6203, "step": 7401 }, { "epoch": 0.94, "grad_norm": 0.7892589034145806, "learning_rate": 8.482255917782455e-08, "loss": 0.557, "step": 7402 }, { "epoch": 0.94, "grad_norm": 0.6910387541917993, "learning_rate": 8.444453512830631e-08, "loss": 0.5416, "step": 7403 }, { "epoch": 0.94, "grad_norm": 0.659051561439889, "learning_rate": 8.406734814667361e-08, "loss": 0.5465, "step": 7404 }, { "epoch": 0.94, "grad_norm": 0.6302135302637807, "learning_rate": 8.369099829715733e-08, "loss": 0.5138, "step": 7405 }, { "epoch": 0.94, "grad_norm": 1.021620722476716, "learning_rate": 8.331548564384672e-08, "loss": 0.6353, "step": 7406 }, { "epoch": 0.94, "grad_norm": 0.6548546798751134, "learning_rate": 8.294081025068734e-08, "loss": 0.4992, "step": 7407 }, { "epoch": 0.94, "grad_norm": 0.7814362816415003, "learning_rate": 8.256697218148202e-08, "loss": 0.5288, "step": 7408 }, { "epoch": 0.94, "grad_norm": 0.6080749573520178, "learning_rate": 8.219397149989206e-08, "loss": 0.5044, "step": 7409 }, { "epoch": 0.94, "grad_norm": 0.8875353285515969, "learning_rate": 8.182180826943554e-08, "loss": 0.6534, "step": 7410 }, { "epoch": 0.94, "grad_norm": 0.9955848566920733, "learning_rate": 8.145048255348786e-08, "loss": 0.6272, "step": 7411 }, { "epoch": 0.94, "grad_norm": 1.2900601951864894, "learning_rate": 8.10799944152818e-08, "loss": 0.6536, "step": 7412 }, { "epoch": 0.94, "grad_norm": 0.6691976740484893, "learning_rate": 8.071034391790799e-08, "loss": 0.5216, "step": 7413 }, { "epoch": 0.94, "grad_norm": 0.6992770355368774, "learning_rate": 8.034153112431331e-08, "loss": 0.5573, "step": 7414 }, { "epoch": 0.94, "grad_norm": 0.7573448448022497, "learning_rate": 7.997355609730361e-08, "loss": 0.5036, "step": 7415 }, { "epoch": 0.94, "grad_norm": 0.7895226861238228, "learning_rate": 7.960641889954102e-08, "loss": 0.5915, "step": 7416 }, { "epoch": 0.94, "grad_norm": 0.8569780606468798, "learning_rate": 7.924011959354494e-08, "loss": 0.6104, "step": 7417 }, { "epoch": 0.95, "grad_norm": 0.8948860917878033, "learning_rate": 7.887465824169216e-08, "loss": 0.609, "step": 7418 }, { "epoch": 0.95, "grad_norm": 0.8587982446316301, "learning_rate": 7.851003490621678e-08, "loss": 0.6538, "step": 7419 }, { "epoch": 0.95, "grad_norm": 0.8061055937651449, "learning_rate": 7.814624964921136e-08, "loss": 0.592, "step": 7420 }, { "epoch": 0.95, "grad_norm": 0.8501248495231092, "learning_rate": 7.778330253262357e-08, "loss": 0.6031, "step": 7421 }, { "epoch": 0.95, "grad_norm": 0.7508365902515983, "learning_rate": 7.742119361826061e-08, "loss": 0.5953, "step": 7422 }, { "epoch": 0.95, "grad_norm": 0.769294198884973, "learning_rate": 7.705992296778542e-08, "loss": 0.5824, "step": 7423 }, { "epoch": 0.95, "grad_norm": 0.7137904186482988, "learning_rate": 7.66994906427182e-08, "loss": 0.4806, "step": 7424 }, { "epoch": 0.95, "grad_norm": 0.8894916387203268, "learning_rate": 7.63398967044371e-08, "loss": 0.6612, "step": 7425 }, { "epoch": 0.95, "grad_norm": 0.7751909187222735, "learning_rate": 7.598114121417754e-08, "loss": 0.6082, "step": 7426 }, { "epoch": 0.95, "grad_norm": 0.8879137173213574, "learning_rate": 7.562322423303125e-08, "loss": 0.6166, "step": 7427 }, { "epoch": 0.95, "grad_norm": 0.8549700924191678, "learning_rate": 7.526614582194836e-08, "loss": 0.5266, "step": 7428 }, { "epoch": 0.95, "grad_norm": 0.9274097809705091, "learning_rate": 7.490990604173521e-08, "loss": 0.6383, "step": 7429 }, { "epoch": 0.95, "grad_norm": 0.8741603042536286, "learning_rate": 7.455450495305606e-08, "loss": 0.6504, "step": 7430 }, { "epoch": 0.95, "grad_norm": 0.7201206310120831, "learning_rate": 7.419994261643192e-08, "loss": 0.5486, "step": 7431 }, { "epoch": 0.95, "grad_norm": 0.9767964118496452, "learning_rate": 7.384621909224066e-08, "loss": 0.6272, "step": 7432 }, { "epoch": 0.95, "grad_norm": 0.7223315650826927, "learning_rate": 7.349333444071793e-08, "loss": 0.6089, "step": 7433 }, { "epoch": 0.95, "grad_norm": 0.6780399729649443, "learning_rate": 7.314128872195626e-08, "loss": 0.5385, "step": 7434 }, { "epoch": 0.95, "grad_norm": 0.707306368940346, "learning_rate": 7.279008199590543e-08, "loss": 0.486, "step": 7435 }, { "epoch": 0.95, "grad_norm": 0.714139965985434, "learning_rate": 7.243971432237263e-08, "loss": 0.5739, "step": 7436 }, { "epoch": 0.95, "grad_norm": 0.6162192494164707, "learning_rate": 7.209018576102178e-08, "loss": 0.4752, "step": 7437 }, { "epoch": 0.95, "grad_norm": 0.8631658155881962, "learning_rate": 7.174149637137307e-08, "loss": 0.5679, "step": 7438 }, { "epoch": 0.95, "grad_norm": 0.7819558887318622, "learning_rate": 7.139364621280564e-08, "loss": 0.6359, "step": 7439 }, { "epoch": 0.95, "grad_norm": 0.6684400294528868, "learning_rate": 7.104663534455436e-08, "loss": 0.5222, "step": 7440 }, { "epoch": 0.95, "grad_norm": 0.7666833899696217, "learning_rate": 7.070046382571194e-08, "loss": 0.5476, "step": 7441 }, { "epoch": 0.95, "grad_norm": 0.6993628436058983, "learning_rate": 7.035513171522678e-08, "loss": 0.5163, "step": 7442 }, { "epoch": 0.95, "grad_norm": 0.8518681412246044, "learning_rate": 7.00106390719063e-08, "loss": 0.626, "step": 7443 }, { "epoch": 0.95, "grad_norm": 0.8753522895777665, "learning_rate": 6.966698595441413e-08, "loss": 0.6539, "step": 7444 }, { "epoch": 0.95, "grad_norm": 0.7745927228490971, "learning_rate": 6.932417242126954e-08, "loss": 0.5003, "step": 7445 }, { "epoch": 0.95, "grad_norm": 0.859209627688157, "learning_rate": 6.898219853085142e-08, "loss": 0.6418, "step": 7446 }, { "epoch": 0.95, "grad_norm": 0.648242136691597, "learning_rate": 6.864106434139428e-08, "loss": 0.5473, "step": 7447 }, { "epoch": 0.95, "grad_norm": 0.6810104864839329, "learning_rate": 6.830076991098889e-08, "loss": 0.4944, "step": 7448 }, { "epoch": 0.95, "grad_norm": 0.8989056436038333, "learning_rate": 6.796131529758443e-08, "loss": 0.6665, "step": 7449 }, { "epoch": 0.95, "grad_norm": 0.6133260543909741, "learning_rate": 6.762270055898689e-08, "loss": 0.5133, "step": 7450 }, { "epoch": 0.95, "grad_norm": 0.895883922095325, "learning_rate": 6.728492575285795e-08, "loss": 0.6544, "step": 7451 }, { "epoch": 0.95, "grad_norm": 0.7175767020902932, "learning_rate": 6.694799093671712e-08, "loss": 0.5446, "step": 7452 }, { "epoch": 0.95, "grad_norm": 0.8772505057973679, "learning_rate": 6.661189616794184e-08, "loss": 0.5742, "step": 7453 }, { "epoch": 0.95, "grad_norm": 0.8528062763836372, "learning_rate": 6.627664150376467e-08, "loss": 0.605, "step": 7454 }, { "epoch": 0.95, "grad_norm": 0.9298121551861412, "learning_rate": 6.594222700127605e-08, "loss": 0.5195, "step": 7455 }, { "epoch": 0.95, "grad_norm": 1.0489493131058054, "learning_rate": 6.560865271742434e-08, "loss": 0.6109, "step": 7456 }, { "epoch": 0.95, "grad_norm": 0.8531231142070393, "learning_rate": 6.52759187090124e-08, "loss": 0.6511, "step": 7457 }, { "epoch": 0.95, "grad_norm": 0.7094782016869042, "learning_rate": 6.494402503270159e-08, "loss": 0.5327, "step": 7458 }, { "epoch": 0.95, "grad_norm": 0.778488280364918, "learning_rate": 6.461297174501058e-08, "loss": 0.6375, "step": 7459 }, { "epoch": 0.95, "grad_norm": 0.9649809611221075, "learning_rate": 6.428275890231428e-08, "loss": 0.6085, "step": 7460 }, { "epoch": 0.95, "grad_norm": 0.8384867714665719, "learning_rate": 6.395338656084382e-08, "loss": 0.5417, "step": 7461 }, { "epoch": 0.95, "grad_norm": 0.7172016219462297, "learning_rate": 6.362485477668878e-08, "loss": 0.5185, "step": 7462 }, { "epoch": 0.95, "grad_norm": 0.734621564244927, "learning_rate": 6.329716360579386e-08, "loss": 0.5063, "step": 7463 }, { "epoch": 0.95, "grad_norm": 0.8585544568767859, "learning_rate": 6.297031310396162e-08, "loss": 0.5736, "step": 7464 }, { "epoch": 0.95, "grad_norm": 0.6924314831354269, "learning_rate": 6.264430332685145e-08, "loss": 0.5083, "step": 7465 }, { "epoch": 0.95, "grad_norm": 0.631501622783633, "learning_rate": 6.231913432997949e-08, "loss": 0.5091, "step": 7466 }, { "epoch": 0.95, "grad_norm": 0.9919905000100189, "learning_rate": 6.199480616871867e-08, "loss": 0.6368, "step": 7467 }, { "epoch": 0.95, "grad_norm": 0.6427426645437417, "learning_rate": 6.167131889829869e-08, "loss": 0.5226, "step": 7468 }, { "epoch": 0.95, "grad_norm": 0.9290370498472447, "learning_rate": 6.134867257380606e-08, "loss": 0.6453, "step": 7469 }, { "epoch": 0.95, "grad_norm": 0.8110301863562172, "learning_rate": 6.102686725018348e-08, "loss": 0.6369, "step": 7470 }, { "epoch": 0.95, "grad_norm": 0.6910169488205844, "learning_rate": 6.070590298223156e-08, "loss": 0.5709, "step": 7471 }, { "epoch": 0.95, "grad_norm": 0.6731087264860027, "learning_rate": 6.03857798246077e-08, "loss": 0.5665, "step": 7472 }, { "epoch": 0.95, "grad_norm": 0.8943346689620795, "learning_rate": 6.006649783182494e-08, "loss": 0.6289, "step": 7473 }, { "epoch": 0.95, "grad_norm": 0.8167250978026973, "learning_rate": 5.974805705825315e-08, "loss": 0.5844, "step": 7474 }, { "epoch": 0.95, "grad_norm": 0.984246334537608, "learning_rate": 5.9430457558120604e-08, "loss": 0.5991, "step": 7475 }, { "epoch": 0.95, "grad_norm": 0.6799249381001408, "learning_rate": 5.9113699385510703e-08, "loss": 0.5534, "step": 7476 }, { "epoch": 0.95, "grad_norm": 0.7261401602783657, "learning_rate": 5.879778259436364e-08, "loss": 0.5101, "step": 7477 }, { "epoch": 0.95, "grad_norm": 0.8532129506866355, "learning_rate": 5.8482707238477485e-08, "loss": 0.5765, "step": 7478 }, { "epoch": 0.95, "grad_norm": 0.806207622017008, "learning_rate": 5.8168473371505994e-08, "loss": 0.5542, "step": 7479 }, { "epoch": 0.95, "grad_norm": 0.6545822991323784, "learning_rate": 5.785508104695969e-08, "loss": 0.5447, "step": 7480 }, { "epoch": 0.95, "grad_norm": 0.9957514847587225, "learning_rate": 5.754253031820589e-08, "loss": 0.6748, "step": 7481 }, { "epoch": 0.95, "grad_norm": 0.7380907981824266, "learning_rate": 5.723082123846924e-08, "loss": 0.5567, "step": 7482 }, { "epoch": 0.95, "grad_norm": 0.9386289161624838, "learning_rate": 5.69199538608306e-08, "loss": 0.6698, "step": 7483 }, { "epoch": 0.95, "grad_norm": 0.9054781923651454, "learning_rate": 5.660992823822708e-08, "loss": 0.6854, "step": 7484 }, { "epoch": 0.95, "grad_norm": 0.6393259310632982, "learning_rate": 5.630074442345257e-08, "loss": 0.5401, "step": 7485 }, { "epoch": 0.95, "grad_norm": 0.7125136364076405, "learning_rate": 5.599240246915882e-08, "loss": 0.5059, "step": 7486 }, { "epoch": 0.95, "grad_norm": 0.9879095274100037, "learning_rate": 5.5684902427852184e-08, "loss": 0.5688, "step": 7487 }, { "epoch": 0.95, "grad_norm": 0.8698599708981681, "learning_rate": 5.537824435189742e-08, "loss": 0.6757, "step": 7488 }, { "epoch": 0.95, "grad_norm": 1.055570003412716, "learning_rate": 5.507242829351445e-08, "loss": 0.6763, "step": 7489 }, { "epoch": 0.95, "grad_norm": 0.8004758143381725, "learning_rate": 5.476745430478159e-08, "loss": 0.6024, "step": 7490 }, { "epoch": 0.95, "grad_norm": 1.1545071889606615, "learning_rate": 5.446332243763175e-08, "loss": 0.6503, "step": 7491 }, { "epoch": 0.95, "grad_norm": 0.8329510466577515, "learning_rate": 5.416003274385628e-08, "loss": 0.5604, "step": 7492 }, { "epoch": 0.95, "grad_norm": 0.7050925062683882, "learning_rate": 5.385758527510165e-08, "loss": 0.4843, "step": 7493 }, { "epoch": 0.95, "grad_norm": 0.797023627805345, "learning_rate": 5.355598008287166e-08, "loss": 0.5367, "step": 7494 }, { "epoch": 0.95, "grad_norm": 0.6770177519989335, "learning_rate": 5.325521721852634e-08, "loss": 0.5337, "step": 7495 }, { "epoch": 0.95, "grad_norm": 0.7650629487914867, "learning_rate": 5.2955296733283035e-08, "loss": 0.5682, "step": 7496 }, { "epoch": 0.96, "grad_norm": 0.6991932344743291, "learning_rate": 5.2656218678214796e-08, "loss": 0.5234, "step": 7497 }, { "epoch": 0.96, "grad_norm": 0.7406186254989756, "learning_rate": 5.235798310425144e-08, "loss": 0.567, "step": 7498 }, { "epoch": 0.96, "grad_norm": 0.6333883574602763, "learning_rate": 5.2060590062179564e-08, "loss": 0.5034, "step": 7499 }, { "epoch": 0.96, "grad_norm": 0.8411842986062696, "learning_rate": 5.1764039602641425e-08, "loss": 0.6154, "step": 7500 }, { "epoch": 0.96, "grad_norm": 0.8398522524183845, "learning_rate": 5.1468331776137194e-08, "loss": 0.5931, "step": 7501 }, { "epoch": 0.96, "grad_norm": 0.6295971104189083, "learning_rate": 5.117346663302325e-08, "loss": 0.5134, "step": 7502 }, { "epoch": 0.96, "grad_norm": 0.7133737735344948, "learning_rate": 5.0879444223510545e-08, "loss": 0.5133, "step": 7503 }, { "epoch": 0.96, "grad_norm": 0.785016914148015, "learning_rate": 5.058626459766902e-08, "loss": 0.5233, "step": 7504 }, { "epoch": 0.96, "grad_norm": 0.6561288306975454, "learning_rate": 5.02939278054243e-08, "loss": 0.5598, "step": 7505 }, { "epoch": 0.96, "grad_norm": 0.7467025471001423, "learning_rate": 5.000243389655823e-08, "loss": 0.5059, "step": 7506 }, { "epoch": 0.96, "grad_norm": 0.6969993780799142, "learning_rate": 4.9711782920708884e-08, "loss": 0.5592, "step": 7507 }, { "epoch": 0.96, "grad_norm": 0.7316437964232073, "learning_rate": 4.9421974927371106e-08, "loss": 0.5455, "step": 7508 }, { "epoch": 0.96, "grad_norm": 0.8792334846283368, "learning_rate": 4.913300996589598e-08, "loss": 0.6052, "step": 7509 }, { "epoch": 0.96, "grad_norm": 0.8550110046030751, "learning_rate": 4.8844888085491924e-08, "loss": 0.6383, "step": 7510 }, { "epoch": 0.96, "grad_norm": 0.9171351622488054, "learning_rate": 4.855760933522247e-08, "loss": 0.6374, "step": 7511 }, { "epoch": 0.96, "grad_norm": 0.8955492983908454, "learning_rate": 4.827117376400847e-08, "loss": 0.5689, "step": 7512 }, { "epoch": 0.96, "grad_norm": 0.7459772028562747, "learning_rate": 4.798558142062648e-08, "loss": 0.5588, "step": 7513 }, { "epoch": 0.96, "grad_norm": 0.8590116947538903, "learning_rate": 4.770083235371037e-08, "loss": 0.6369, "step": 7514 }, { "epoch": 0.96, "grad_norm": 0.8198050199769966, "learning_rate": 4.741692661175023e-08, "loss": 0.6195, "step": 7515 }, { "epoch": 0.96, "grad_norm": 0.7873992295593397, "learning_rate": 4.7133864243091296e-08, "loss": 0.5965, "step": 7516 }, { "epoch": 0.96, "grad_norm": 0.9160868531555808, "learning_rate": 4.685164529593722e-08, "loss": 0.5691, "step": 7517 }, { "epoch": 0.96, "grad_norm": 0.8000447445083542, "learning_rate": 4.657026981834623e-08, "loss": 0.6108, "step": 7518 }, { "epoch": 0.96, "grad_norm": 0.8085491423626913, "learning_rate": 4.628973785823387e-08, "loss": 0.6165, "step": 7519 }, { "epoch": 0.96, "grad_norm": 0.6795877964477249, "learning_rate": 4.6010049463371953e-08, "loss": 0.512, "step": 7520 }, { "epoch": 0.96, "grad_norm": 0.975881642312962, "learning_rate": 4.573120468138792e-08, "loss": 0.6768, "step": 7521 }, { "epoch": 0.96, "grad_norm": 0.751163012352079, "learning_rate": 4.5453203559766566e-08, "loss": 0.5635, "step": 7522 }, { "epoch": 0.96, "grad_norm": 0.7118732092834387, "learning_rate": 4.517604614584892e-08, "loss": 0.5145, "step": 7523 }, { "epoch": 0.96, "grad_norm": 0.6823053069108216, "learning_rate": 4.489973248683111e-08, "loss": 0.4954, "step": 7524 }, { "epoch": 0.96, "grad_norm": 0.7182483845788249, "learning_rate": 4.462426262976716e-08, "loss": 0.5542, "step": 7525 }, { "epoch": 0.96, "grad_norm": 0.8170515846596358, "learning_rate": 4.434963662156677e-08, "loss": 0.6004, "step": 7526 }, { "epoch": 0.96, "grad_norm": 2.3205693102806864, "learning_rate": 4.407585450899587e-08, "loss": 0.6224, "step": 7527 }, { "epoch": 0.96, "grad_norm": 0.8456537587567948, "learning_rate": 4.380291633867606e-08, "loss": 0.6312, "step": 7528 }, { "epoch": 0.96, "grad_norm": 0.9061800139308304, "learning_rate": 4.353082215708626e-08, "loss": 0.6636, "step": 7529 }, { "epoch": 0.96, "grad_norm": 0.6291401034723934, "learning_rate": 4.3259572010561635e-08, "loss": 0.5266, "step": 7530 }, { "epoch": 0.96, "grad_norm": 0.9078034208417431, "learning_rate": 4.2989165945292453e-08, "loss": 0.6589, "step": 7531 }, { "epoch": 0.96, "grad_norm": 0.8683726750442915, "learning_rate": 4.2719604007327445e-08, "loss": 0.6134, "step": 7532 }, { "epoch": 0.96, "grad_norm": 0.8111394987920763, "learning_rate": 4.2450886242568767e-08, "loss": 0.5968, "step": 7533 }, { "epoch": 0.96, "grad_norm": 0.7948536402146265, "learning_rate": 4.2183012696776494e-08, "loss": 0.5738, "step": 7534 }, { "epoch": 0.96, "grad_norm": 0.8439442302535923, "learning_rate": 4.191598341556746e-08, "loss": 0.6332, "step": 7535 }, { "epoch": 0.96, "grad_norm": 0.8414604968394939, "learning_rate": 4.164979844441308e-08, "loss": 0.6055, "step": 7536 }, { "epoch": 0.96, "grad_norm": 0.6850448926497814, "learning_rate": 4.138445782864264e-08, "loss": 0.5203, "step": 7537 }, { "epoch": 0.96, "grad_norm": 0.7083976371231931, "learning_rate": 4.111996161344056e-08, "loss": 0.6204, "step": 7538 }, { "epoch": 0.96, "grad_norm": 0.8068246758263687, "learning_rate": 4.085630984384803e-08, "loss": 0.5136, "step": 7539 }, { "epoch": 0.96, "grad_norm": 0.7421138418611758, "learning_rate": 4.0593502564761354e-08, "loss": 0.5004, "step": 7540 }, { "epoch": 0.96, "grad_norm": 0.680298252757217, "learning_rate": 4.033153982093474e-08, "loss": 0.5057, "step": 7541 }, { "epoch": 0.96, "grad_norm": 0.892761612727089, "learning_rate": 4.00704216569775e-08, "loss": 0.5793, "step": 7542 }, { "epoch": 0.96, "grad_norm": 0.5991013086495907, "learning_rate": 3.9810148117355175e-08, "loss": 0.4995, "step": 7543 }, { "epoch": 0.96, "grad_norm": 0.7813304815649467, "learning_rate": 3.955071924639009e-08, "loss": 0.535, "step": 7544 }, { "epoch": 0.96, "grad_norm": 0.7147271161143031, "learning_rate": 3.929213508825913e-08, "loss": 0.528, "step": 7545 }, { "epoch": 0.96, "grad_norm": 0.7271019639566279, "learning_rate": 3.903439568699818e-08, "loss": 0.5357, "step": 7546 }, { "epoch": 0.96, "grad_norm": 0.6924238607904464, "learning_rate": 3.877750108649603e-08, "loss": 0.5599, "step": 7547 }, { "epoch": 0.96, "grad_norm": 0.8227709696525372, "learning_rate": 3.8521451330499894e-08, "loss": 0.6816, "step": 7548 }, { "epoch": 0.96, "grad_norm": 0.8139320579366447, "learning_rate": 3.8266246462612675e-08, "loss": 0.5432, "step": 7549 }, { "epoch": 0.96, "grad_norm": 0.8977256524792877, "learning_rate": 3.8011886526292394e-08, "loss": 0.6683, "step": 7550 }, { "epoch": 0.96, "grad_norm": 0.7313993538405529, "learning_rate": 3.77583715648544e-08, "loss": 0.5152, "step": 7551 }, { "epoch": 0.96, "grad_norm": 1.1248106578866355, "learning_rate": 3.7505701621469714e-08, "loss": 0.582, "step": 7552 }, { "epoch": 0.96, "grad_norm": 0.7232042933147221, "learning_rate": 3.725387673916448e-08, "loss": 0.5303, "step": 7553 }, { "epoch": 0.96, "grad_norm": 0.8607725374247168, "learning_rate": 3.700289696082271e-08, "loss": 0.622, "step": 7554 }, { "epoch": 0.96, "grad_norm": 0.6810540978016121, "learning_rate": 3.675276232918357e-08, "loss": 0.554, "step": 7555 }, { "epoch": 0.96, "grad_norm": 0.7691704879145688, "learning_rate": 3.6503472886842415e-08, "loss": 0.5098, "step": 7556 }, { "epoch": 0.96, "grad_norm": 0.7989962994326033, "learning_rate": 3.6255028676250305e-08, "loss": 0.6368, "step": 7557 }, { "epoch": 0.96, "grad_norm": 0.954583259642225, "learning_rate": 3.600742973971505e-08, "loss": 0.6488, "step": 7558 }, { "epoch": 0.96, "grad_norm": 0.7879537975219921, "learning_rate": 3.57606761193996e-08, "loss": 0.5595, "step": 7559 }, { "epoch": 0.96, "grad_norm": 0.7481630461900315, "learning_rate": 3.5514767857324774e-08, "loss": 0.5969, "step": 7560 }, { "epoch": 0.96, "grad_norm": 1.2515041888048102, "learning_rate": 3.526970499536486e-08, "loss": 0.6275, "step": 7561 }, { "epoch": 0.96, "grad_norm": 0.7774979304075383, "learning_rate": 3.5025487575251485e-08, "loss": 0.5216, "step": 7562 }, { "epoch": 0.96, "grad_norm": 0.6939490686347481, "learning_rate": 3.4782115638573586e-08, "loss": 0.54, "step": 7563 }, { "epoch": 0.96, "grad_norm": 0.7695443028938527, "learning_rate": 3.453958922677414e-08, "loss": 0.621, "step": 7564 }, { "epoch": 0.96, "grad_norm": 0.6020808565150131, "learning_rate": 3.4297908381152324e-08, "loss": 0.4413, "step": 7565 }, { "epoch": 0.96, "grad_norm": 0.76437625223097, "learning_rate": 3.405707314286466e-08, "loss": 0.5897, "step": 7566 }, { "epoch": 0.96, "grad_norm": 0.6480458169495762, "learning_rate": 3.3817083552922793e-08, "loss": 0.5276, "step": 7567 }, { "epoch": 0.96, "grad_norm": 0.7104413272447241, "learning_rate": 3.357793965219402e-08, "loss": 0.5646, "step": 7568 }, { "epoch": 0.96, "grad_norm": 0.6836036129920416, "learning_rate": 3.3339641481402455e-08, "loss": 0.5546, "step": 7569 }, { "epoch": 0.96, "grad_norm": 0.7126474655394591, "learning_rate": 3.3102189081127835e-08, "loss": 0.5127, "step": 7570 }, { "epoch": 0.96, "grad_norm": 0.7887757503717319, "learning_rate": 3.286558249180505e-08, "loss": 0.4984, "step": 7571 }, { "epoch": 0.96, "grad_norm": 0.6364014173268464, "learning_rate": 3.262982175372686e-08, "loss": 0.5045, "step": 7572 }, { "epoch": 0.96, "grad_norm": 0.9592387600886884, "learning_rate": 3.239490690704006e-08, "loss": 0.6247, "step": 7573 }, { "epoch": 0.96, "grad_norm": 0.6965653049718208, "learning_rate": 3.216083799174874e-08, "loss": 0.5304, "step": 7574 }, { "epoch": 0.97, "grad_norm": 0.6726101160073129, "learning_rate": 3.1927615047712135e-08, "loss": 0.5497, "step": 7575 }, { "epoch": 0.97, "grad_norm": 0.7061453200551265, "learning_rate": 3.16952381146457e-08, "loss": 0.5149, "step": 7576 }, { "epoch": 0.97, "grad_norm": 0.6775519593995715, "learning_rate": 3.146370723212056e-08, "loss": 0.5449, "step": 7577 }, { "epoch": 0.97, "grad_norm": 0.6943310331760596, "learning_rate": 3.1233022439564055e-08, "loss": 0.5371, "step": 7578 }, { "epoch": 0.97, "grad_norm": 0.8425427798288586, "learning_rate": 3.100318377625977e-08, "loss": 0.6013, "step": 7579 }, { "epoch": 0.97, "grad_norm": 0.9160785143760598, "learning_rate": 3.0774191281346956e-08, "loss": 0.6379, "step": 7580 }, { "epoch": 0.97, "grad_norm": 0.703069754176784, "learning_rate": 3.054604499381997e-08, "loss": 0.5836, "step": 7581 }, { "epoch": 0.97, "grad_norm": 0.7052626811005301, "learning_rate": 3.0318744952529954e-08, "loss": 0.5668, "step": 7582 }, { "epoch": 0.97, "grad_norm": 0.6703817169944095, "learning_rate": 3.0092291196184285e-08, "loss": 0.5498, "step": 7583 }, { "epoch": 0.97, "grad_norm": 0.7093961310763612, "learning_rate": 2.986668376334545e-08, "loss": 0.4832, "step": 7584 }, { "epoch": 0.97, "grad_norm": 0.8194420737638893, "learning_rate": 2.96419226924316e-08, "loss": 0.5692, "step": 7585 }, { "epoch": 0.97, "grad_norm": 0.6302911122407827, "learning_rate": 2.941800802171768e-08, "loss": 0.4973, "step": 7586 }, { "epoch": 0.97, "grad_norm": 0.7758949579106059, "learning_rate": 2.9194939789333743e-08, "loss": 0.5273, "step": 7587 }, { "epoch": 0.97, "grad_norm": 0.8939951709116091, "learning_rate": 2.8972718033266068e-08, "loss": 0.5628, "step": 7588 }, { "epoch": 0.97, "grad_norm": 0.7439240101071243, "learning_rate": 2.875134279135716e-08, "loss": 0.5666, "step": 7589 }, { "epoch": 0.97, "grad_norm": 0.6641650024767523, "learning_rate": 2.8530814101304094e-08, "loss": 0.5479, "step": 7590 }, { "epoch": 0.97, "grad_norm": 0.8909326656246606, "learning_rate": 2.8311132000660712e-08, "loss": 0.6051, "step": 7591 }, { "epoch": 0.97, "grad_norm": 0.9336998038701715, "learning_rate": 2.8092296526837092e-08, "loss": 0.6334, "step": 7592 }, { "epoch": 0.97, "grad_norm": 0.9265661366575673, "learning_rate": 2.7874307717098425e-08, "loss": 0.6137, "step": 7593 }, { "epoch": 0.97, "grad_norm": 0.57553316797106, "learning_rate": 2.7657165608566128e-08, "loss": 0.4506, "step": 7594 }, { "epoch": 0.97, "grad_norm": 0.8949490207016785, "learning_rate": 2.7440870238216733e-08, "loss": 0.5264, "step": 7595 }, { "epoch": 0.97, "grad_norm": 0.6224418763795442, "learning_rate": 2.7225421642883554e-08, "loss": 0.5402, "step": 7596 }, { "epoch": 0.97, "grad_norm": 0.9481257507679741, "learning_rate": 2.7010819859255022e-08, "loss": 0.6386, "step": 7597 }, { "epoch": 0.97, "grad_norm": 0.8198017356079316, "learning_rate": 2.6797064923875792e-08, "loss": 0.654, "step": 7598 }, { "epoch": 0.97, "grad_norm": 0.9115867113271215, "learning_rate": 2.6584156873145638e-08, "loss": 0.583, "step": 7599 }, { "epoch": 0.97, "grad_norm": 0.8080440274525793, "learning_rate": 2.6372095743321115e-08, "loss": 0.5628, "step": 7600 }, { "epoch": 0.97, "grad_norm": 0.6894831506206119, "learning_rate": 2.6160881570513884e-08, "loss": 0.5348, "step": 7601 }, { "epoch": 0.97, "grad_norm": 0.9214827202214521, "learning_rate": 2.5950514390691296e-08, "loss": 0.6196, "step": 7602 }, { "epoch": 0.97, "grad_norm": 1.4383952745109176, "learning_rate": 2.574099423967691e-08, "loss": 0.5653, "step": 7603 }, { "epoch": 0.97, "grad_norm": 0.9832497162835971, "learning_rate": 2.5532321153149962e-08, "loss": 0.5451, "step": 7604 }, { "epoch": 0.97, "grad_norm": 0.760822492437758, "learning_rate": 2.5324495166644813e-08, "loss": 0.5494, "step": 7605 }, { "epoch": 0.97, "grad_norm": 0.6849650968792051, "learning_rate": 2.5117516315552038e-08, "loss": 0.4845, "step": 7606 }, { "epoch": 0.97, "grad_norm": 0.7551381448446217, "learning_rate": 2.4911384635118996e-08, "loss": 0.5166, "step": 7607 }, { "epoch": 0.97, "grad_norm": 0.6942683914485269, "learning_rate": 2.47061001604465e-08, "loss": 0.5681, "step": 7608 }, { "epoch": 0.97, "grad_norm": 0.6164606785817738, "learning_rate": 2.4501662926493252e-08, "loss": 0.5015, "step": 7609 }, { "epoch": 0.97, "grad_norm": 0.933629322704433, "learning_rate": 2.429807296807196e-08, "loss": 0.6731, "step": 7610 }, { "epoch": 0.97, "grad_norm": 0.7075975356508541, "learning_rate": 2.409533031985267e-08, "loss": 0.528, "step": 7611 }, { "epoch": 0.97, "grad_norm": 0.9408293318238553, "learning_rate": 2.389343501635999e-08, "loss": 0.6494, "step": 7612 }, { "epoch": 0.97, "grad_norm": 0.739327088547856, "learning_rate": 2.3692387091974744e-08, "loss": 0.512, "step": 7613 }, { "epoch": 0.97, "grad_norm": 0.6672513485985363, "learning_rate": 2.3492186580932884e-08, "loss": 0.5183, "step": 7614 }, { "epoch": 0.97, "grad_norm": 0.6385530075485687, "learning_rate": 2.329283351732714e-08, "loss": 0.4924, "step": 7615 }, { "epoch": 0.97, "grad_norm": 0.6117564201872002, "learning_rate": 2.30943279351048e-08, "loss": 0.5451, "step": 7616 }, { "epoch": 0.97, "grad_norm": 0.7053936823477176, "learning_rate": 2.2896669868069378e-08, "loss": 0.5679, "step": 7617 }, { "epoch": 0.97, "grad_norm": 0.6139386673899543, "learning_rate": 2.269985934988006e-08, "loss": 0.5171, "step": 7618 }, { "epoch": 0.97, "grad_norm": 0.6924980335519657, "learning_rate": 2.2503896414051153e-08, "loss": 0.5184, "step": 7619 }, { "epoch": 0.97, "grad_norm": 0.7836532609520706, "learning_rate": 2.2308781093953736e-08, "loss": 0.5199, "step": 7620 }, { "epoch": 0.97, "grad_norm": 0.6822585771271885, "learning_rate": 2.211451342281401e-08, "loss": 0.5106, "step": 7621 }, { "epoch": 0.97, "grad_norm": 0.6498117269238174, "learning_rate": 2.1921093433712737e-08, "loss": 0.527, "step": 7622 }, { "epoch": 0.97, "grad_norm": 0.8263831589742552, "learning_rate": 2.1728521159588566e-08, "loss": 0.6286, "step": 7623 }, { "epoch": 0.97, "grad_norm": 0.9478364554102605, "learning_rate": 2.1536796633233603e-08, "loss": 0.6193, "step": 7624 }, { "epoch": 0.97, "grad_norm": 0.8074256039605504, "learning_rate": 2.1345919887297282e-08, "loss": 0.5908, "step": 7625 }, { "epoch": 0.97, "grad_norm": 0.7687298793058737, "learning_rate": 2.115589095428361e-08, "loss": 0.5159, "step": 7626 }, { "epoch": 0.97, "grad_norm": 0.8953012025285586, "learning_rate": 2.0966709866552247e-08, "loss": 0.6637, "step": 7627 }, { "epoch": 0.97, "grad_norm": 0.6510354904056129, "learning_rate": 2.07783766563191e-08, "loss": 0.485, "step": 7628 }, { "epoch": 0.97, "grad_norm": 0.6771934372010611, "learning_rate": 2.0590891355655172e-08, "loss": 0.5039, "step": 7629 }, { "epoch": 0.97, "grad_norm": 0.8862268120628601, "learning_rate": 2.0404253996487156e-08, "loss": 0.6675, "step": 7630 }, { "epoch": 0.97, "grad_norm": 0.6549563332423868, "learning_rate": 2.0218464610597956e-08, "loss": 0.5177, "step": 7631 }, { "epoch": 0.97, "grad_norm": 0.815536908391313, "learning_rate": 2.003352322962504e-08, "loss": 0.6283, "step": 7632 }, { "epoch": 0.97, "grad_norm": 0.8024763610746151, "learning_rate": 1.9849429885062664e-08, "loss": 0.5837, "step": 7633 }, { "epoch": 0.97, "grad_norm": 0.6538272674063264, "learning_rate": 1.9666184608259086e-08, "loss": 0.512, "step": 7634 }, { "epoch": 0.97, "grad_norm": 0.9747424145626216, "learning_rate": 1.9483787430419345e-08, "loss": 0.5918, "step": 7635 }, { "epoch": 0.97, "grad_norm": 0.7269490009815838, "learning_rate": 1.9302238382604143e-08, "loss": 0.5391, "step": 7636 }, { "epoch": 0.97, "grad_norm": 0.7955981522535058, "learning_rate": 1.9121537495729313e-08, "loss": 0.553, "step": 7637 }, { "epoch": 0.97, "grad_norm": 0.7532457359610292, "learning_rate": 1.894168480056635e-08, "loss": 0.5258, "step": 7638 }, { "epoch": 0.97, "grad_norm": 0.7786838948230345, "learning_rate": 1.8762680327741867e-08, "loss": 0.5158, "step": 7639 }, { "epoch": 0.97, "grad_norm": 0.7705090147624136, "learning_rate": 1.8584524107739254e-08, "loss": 0.6039, "step": 7640 }, { "epoch": 0.97, "grad_norm": 0.6271619679119295, "learning_rate": 1.840721617089536e-08, "loss": 0.4755, "step": 7641 }, { "epoch": 0.97, "grad_norm": 0.8242831530757934, "learning_rate": 1.823075654740547e-08, "loss": 0.5875, "step": 7642 }, { "epoch": 0.97, "grad_norm": 0.9118442549462102, "learning_rate": 1.8055145267317777e-08, "loss": 0.6527, "step": 7643 }, { "epoch": 0.97, "grad_norm": 0.8089574031526846, "learning_rate": 1.788038236053724e-08, "loss": 0.5842, "step": 7644 }, { "epoch": 0.97, "grad_norm": 1.0375731049153643, "learning_rate": 1.7706467856824506e-08, "loss": 0.5502, "step": 7645 }, { "epoch": 0.97, "grad_norm": 0.8947846294822129, "learning_rate": 1.753340178579477e-08, "loss": 0.6349, "step": 7646 }, { "epoch": 0.97, "grad_norm": 0.7154147934546784, "learning_rate": 1.7361184176920008e-08, "loss": 0.5444, "step": 7647 }, { "epoch": 0.97, "grad_norm": 0.7088735427201656, "learning_rate": 1.718981505952677e-08, "loss": 0.5615, "step": 7648 }, { "epoch": 0.97, "grad_norm": 0.9218429625809412, "learning_rate": 1.7019294462797265e-08, "loss": 0.6155, "step": 7649 }, { "epoch": 0.97, "grad_norm": 0.8195366290720348, "learning_rate": 1.6849622415769928e-08, "loss": 0.6005, "step": 7650 }, { "epoch": 0.97, "grad_norm": 0.8740285290283316, "learning_rate": 1.6680798947337762e-08, "loss": 0.6114, "step": 7651 }, { "epoch": 0.97, "grad_norm": 0.8197519517165034, "learning_rate": 1.6512824086249435e-08, "loss": 0.6723, "step": 7652 }, { "epoch": 0.97, "grad_norm": 0.848332027468816, "learning_rate": 1.6345697861109843e-08, "loss": 0.6257, "step": 7653 }, { "epoch": 0.98, "grad_norm": 0.7602282230177202, "learning_rate": 1.6179420300378447e-08, "loss": 0.5368, "step": 7654 }, { "epoch": 0.98, "grad_norm": 6.523336022225611, "learning_rate": 1.6013991432370368e-08, "loss": 0.6458, "step": 7655 }, { "epoch": 0.98, "grad_norm": 0.7357873236779066, "learning_rate": 1.584941128525752e-08, "loss": 0.6184, "step": 7656 }, { "epoch": 0.98, "grad_norm": 0.6648318920546201, "learning_rate": 1.568567988706471e-08, "loss": 0.4788, "step": 7657 }, { "epoch": 0.98, "grad_norm": 0.7455860455840642, "learning_rate": 1.5522797265674628e-08, "loss": 0.5713, "step": 7658 }, { "epoch": 0.98, "grad_norm": 0.767895085522298, "learning_rate": 1.5360763448824533e-08, "loss": 0.5014, "step": 7659 }, { "epoch": 0.98, "grad_norm": 0.6436561928192716, "learning_rate": 1.51995784641068e-08, "loss": 0.5185, "step": 7660 }, { "epoch": 0.98, "grad_norm": 0.946774318535707, "learning_rate": 1.5039242338969474e-08, "loss": 0.5827, "step": 7661 }, { "epoch": 0.98, "grad_norm": 0.8146163446823751, "learning_rate": 1.4879755100716265e-08, "loss": 0.6285, "step": 7662 }, { "epoch": 0.98, "grad_norm": 0.963849630989354, "learning_rate": 1.4721116776506005e-08, "loss": 0.6304, "step": 7663 }, { "epoch": 0.98, "grad_norm": 1.050497809462164, "learning_rate": 1.45633273933532e-08, "loss": 0.6008, "step": 7664 }, { "epoch": 0.98, "grad_norm": 0.8619927286062997, "learning_rate": 1.4406386978128017e-08, "loss": 0.6373, "step": 7665 }, { "epoch": 0.98, "grad_norm": 0.754761807069326, "learning_rate": 1.4250295557555748e-08, "loss": 0.553, "step": 7666 }, { "epoch": 0.98, "grad_norm": 0.8762840585867455, "learning_rate": 1.4095053158216244e-08, "loss": 0.578, "step": 7667 }, { "epoch": 0.98, "grad_norm": 0.6853056973897615, "learning_rate": 1.3940659806547242e-08, "loss": 0.4753, "step": 7668 }, { "epoch": 0.98, "grad_norm": 0.7746630447024893, "learning_rate": 1.3787115528839379e-08, "loss": 0.5713, "step": 7669 }, { "epoch": 0.98, "grad_norm": 0.6237274074845542, "learning_rate": 1.363442035123952e-08, "loss": 0.495, "step": 7670 }, { "epoch": 0.98, "grad_norm": 0.8055965862715443, "learning_rate": 1.3482574299750195e-08, "loss": 0.6074, "step": 7671 }, { "epoch": 0.98, "grad_norm": 0.7166271483017809, "learning_rate": 1.3331577400229611e-08, "loss": 0.5492, "step": 7672 }, { "epoch": 0.98, "grad_norm": 0.9617096777816452, "learning_rate": 1.3181429678390534e-08, "loss": 0.6534, "step": 7673 }, { "epoch": 0.98, "grad_norm": 0.6914267583377445, "learning_rate": 1.3032131159801397e-08, "loss": 0.5591, "step": 7674 }, { "epoch": 0.98, "grad_norm": 0.9030150608544938, "learning_rate": 1.2883681869886866e-08, "loss": 0.6313, "step": 7675 }, { "epoch": 0.98, "grad_norm": 0.6728825040994538, "learning_rate": 1.2736081833925607e-08, "loss": 0.5605, "step": 7676 }, { "epoch": 0.98, "grad_norm": 0.8455372774306043, "learning_rate": 1.258933107705307e-08, "loss": 0.6027, "step": 7677 }, { "epoch": 0.98, "grad_norm": 0.8132978841711667, "learning_rate": 1.2443429624258707e-08, "loss": 0.6431, "step": 7678 }, { "epoch": 0.98, "grad_norm": 0.7470156413722059, "learning_rate": 1.2298377500388204e-08, "loss": 0.6046, "step": 7679 }, { "epoch": 0.98, "grad_norm": 0.6282495803286285, "learning_rate": 1.2154174730142908e-08, "loss": 0.4967, "step": 7680 }, { "epoch": 0.98, "grad_norm": 0.7577861601460566, "learning_rate": 1.2010821338078183e-08, "loss": 0.5325, "step": 7681 }, { "epoch": 0.98, "grad_norm": 0.8544789700296355, "learning_rate": 1.1868317348606163e-08, "loss": 0.6396, "step": 7682 }, { "epoch": 0.98, "grad_norm": 0.9000443677134143, "learning_rate": 1.1726662785994103e-08, "loss": 0.6668, "step": 7683 }, { "epoch": 0.98, "grad_norm": 0.7819674662950078, "learning_rate": 1.1585857674363821e-08, "loss": 0.6322, "step": 7684 }, { "epoch": 0.98, "grad_norm": 0.8640397434417426, "learning_rate": 1.1445902037692801e-08, "loss": 0.5418, "step": 7685 }, { "epoch": 0.98, "grad_norm": 0.6942189142401901, "learning_rate": 1.1306795899814205e-08, "loss": 0.5229, "step": 7686 }, { "epoch": 0.98, "grad_norm": 0.8252149382811961, "learning_rate": 1.1168539284416858e-08, "loss": 0.5897, "step": 7687 }, { "epoch": 0.98, "grad_norm": 0.9370495691978186, "learning_rate": 1.1031132215043593e-08, "loss": 0.6481, "step": 7688 }, { "epoch": 0.98, "grad_norm": 0.9468573208554496, "learning_rate": 1.089457471509403e-08, "loss": 0.6029, "step": 7689 }, { "epoch": 0.98, "grad_norm": 0.7799486706748221, "learning_rate": 1.0758866807822344e-08, "loss": 0.6144, "step": 7690 }, { "epoch": 0.98, "grad_norm": 0.9564609198009918, "learning_rate": 1.0624008516337825e-08, "loss": 0.6432, "step": 7691 }, { "epoch": 0.98, "grad_norm": 0.9950670412142393, "learning_rate": 1.0489999863605994e-08, "loss": 0.6406, "step": 7692 }, { "epoch": 0.98, "grad_norm": 0.9867067801806185, "learning_rate": 1.0356840872446373e-08, "loss": 0.6618, "step": 7693 }, { "epoch": 0.98, "grad_norm": 0.691692246098567, "learning_rate": 1.0224531565535267e-08, "loss": 0.5272, "step": 7694 }, { "epoch": 0.98, "grad_norm": 0.9529959200558118, "learning_rate": 1.0093071965403544e-08, "loss": 0.6626, "step": 7695 }, { "epoch": 0.98, "grad_norm": 0.8631900570623116, "learning_rate": 9.962462094437186e-09, "loss": 0.6504, "step": 7696 }, { "epoch": 0.98, "grad_norm": 0.7051312279628598, "learning_rate": 9.83270197487729e-09, "loss": 0.5292, "step": 7697 }, { "epoch": 0.98, "grad_norm": 0.624240652680998, "learning_rate": 9.703791628821735e-09, "loss": 0.5128, "step": 7698 }, { "epoch": 0.98, "grad_norm": 1.0862893000426475, "learning_rate": 9.575731078221295e-09, "loss": 0.628, "step": 7699 }, { "epoch": 0.98, "grad_norm": 0.6151464782986507, "learning_rate": 9.448520344884082e-09, "loss": 0.5222, "step": 7700 }, { "epoch": 0.98, "grad_norm": 0.8302844615144636, "learning_rate": 9.322159450472768e-09, "loss": 0.618, "step": 7701 }, { "epoch": 0.98, "grad_norm": 0.9629715401901966, "learning_rate": 9.196648416505694e-09, "loss": 0.638, "step": 7702 }, { "epoch": 0.98, "grad_norm": 0.6284057979685642, "learning_rate": 9.071987264355209e-09, "loss": 0.4622, "step": 7703 }, { "epoch": 0.98, "grad_norm": 0.7946363867564209, "learning_rate": 8.948176015249887e-09, "loss": 0.5813, "step": 7704 }, { "epoch": 0.98, "grad_norm": 0.8240984944842134, "learning_rate": 8.825214690273976e-09, "loss": 0.5794, "step": 7705 }, { "epoch": 0.98, "grad_norm": 0.7708128742040896, "learning_rate": 8.703103310366279e-09, "loss": 0.53, "step": 7706 }, { "epoch": 0.98, "grad_norm": 0.7930533939687292, "learning_rate": 8.581841896321274e-09, "loss": 0.5901, "step": 7707 }, { "epoch": 0.98, "grad_norm": 0.8868068286654145, "learning_rate": 8.461430468788557e-09, "loss": 0.588, "step": 7708 }, { "epoch": 0.98, "grad_norm": 0.8641175805955916, "learning_rate": 8.341869048272278e-09, "loss": 0.6371, "step": 7709 }, { "epoch": 0.98, "grad_norm": 0.8669576713465519, "learning_rate": 8.223157655133374e-09, "loss": 0.5611, "step": 7710 }, { "epoch": 0.98, "grad_norm": 0.6462270456572182, "learning_rate": 8.105296309586786e-09, "loss": 0.5056, "step": 7711 }, { "epoch": 0.98, "grad_norm": 0.6507443854245548, "learning_rate": 7.988285031702569e-09, "loss": 0.5629, "step": 7712 }, { "epoch": 0.98, "grad_norm": 0.9160757491823722, "learning_rate": 7.872123841407564e-09, "loss": 0.6535, "step": 7713 }, { "epoch": 0.98, "grad_norm": 0.7848714663431117, "learning_rate": 7.756812758482058e-09, "loss": 0.5844, "step": 7714 }, { "epoch": 0.98, "grad_norm": 0.9968209939293037, "learning_rate": 7.642351802562564e-09, "loss": 0.6688, "step": 7715 }, { "epoch": 0.98, "grad_norm": 0.7743367133853505, "learning_rate": 7.52874099314016e-09, "loss": 0.5677, "step": 7716 }, { "epoch": 0.98, "grad_norm": 0.985855860243906, "learning_rate": 7.4159803495627014e-09, "loss": 0.5944, "step": 7717 }, { "epoch": 0.98, "grad_norm": 0.7769300759622173, "learning_rate": 7.3040698910309405e-09, "loss": 0.5758, "step": 7718 }, { "epoch": 0.98, "grad_norm": 0.8238070914861223, "learning_rate": 7.1930096366024104e-09, "loss": 0.6125, "step": 7719 }, { "epoch": 0.98, "grad_norm": 0.8968157418227001, "learning_rate": 7.08279960518976e-09, "loss": 0.6793, "step": 7720 }, { "epoch": 0.98, "grad_norm": 0.7954210920795329, "learning_rate": 6.9734398155607565e-09, "loss": 0.5517, "step": 7721 }, { "epoch": 0.98, "grad_norm": 0.9231282029029846, "learning_rate": 6.864930286337723e-09, "loss": 0.5853, "step": 7722 }, { "epoch": 0.98, "grad_norm": 0.6722337777132225, "learning_rate": 6.757271035999213e-09, "loss": 0.5089, "step": 7723 }, { "epoch": 0.98, "grad_norm": 0.7967805112616456, "learning_rate": 6.650462082878339e-09, "loss": 0.6144, "step": 7724 }, { "epoch": 0.98, "grad_norm": 0.8618017600310541, "learning_rate": 6.544503445162776e-09, "loss": 0.641, "step": 7725 }, { "epoch": 0.98, "grad_norm": 0.8572291298679268, "learning_rate": 6.439395140896976e-09, "loss": 0.6132, "step": 7726 }, { "epoch": 0.98, "grad_norm": 0.8126804757304016, "learning_rate": 6.3351371879799565e-09, "loss": 0.5717, "step": 7727 }, { "epoch": 0.98, "grad_norm": 0.8912996518047204, "learning_rate": 6.2317296041658485e-09, "loss": 0.6713, "step": 7728 }, { "epoch": 0.98, "grad_norm": 0.7494857653277093, "learning_rate": 6.129172407062789e-09, "loss": 0.5811, "step": 7729 }, { "epoch": 0.98, "grad_norm": 0.6505078175920673, "learning_rate": 6.027465614136252e-09, "loss": 0.5485, "step": 7730 }, { "epoch": 0.98, "grad_norm": 0.7161590675565519, "learning_rate": 5.9266092427051614e-09, "loss": 0.5381, "step": 7731 }, { "epoch": 0.99, "grad_norm": 0.727954679948721, "learning_rate": 5.826603309944667e-09, "loss": 0.5307, "step": 7732 }, { "epoch": 0.99, "grad_norm": 0.7296040349339669, "learning_rate": 5.727447832885036e-09, "loss": 0.4967, "step": 7733 }, { "epoch": 0.99, "grad_norm": 0.8287125089496792, "learning_rate": 5.6291428284110934e-09, "loss": 0.6687, "step": 7734 }, { "epoch": 0.99, "grad_norm": 0.7386894344806016, "learning_rate": 5.531688313262784e-09, "loss": 0.5609, "step": 7735 }, { "epoch": 0.99, "grad_norm": 0.6992615364351703, "learning_rate": 5.435084304036276e-09, "loss": 0.521, "step": 7736 }, { "epoch": 0.99, "grad_norm": 0.645566685331598, "learning_rate": 5.339330817181743e-09, "loss": 0.5036, "step": 7737 }, { "epoch": 0.99, "grad_norm": 0.7415998513341338, "learning_rate": 5.244427869005586e-09, "loss": 0.5239, "step": 7738 }, { "epoch": 0.99, "grad_norm": 0.8696586769269208, "learning_rate": 5.150375475667657e-09, "loss": 0.6374, "step": 7739 }, { "epoch": 0.99, "grad_norm": 0.6585700511068008, "learning_rate": 5.0571736531851435e-09, "loss": 0.4942, "step": 7740 }, { "epoch": 0.99, "grad_norm": 0.6818694657527492, "learning_rate": 4.964822417429238e-09, "loss": 0.5017, "step": 7741 }, { "epoch": 0.99, "grad_norm": 0.9738124329608369, "learning_rate": 4.87332178412625e-09, "loss": 0.6612, "step": 7742 }, { "epoch": 0.99, "grad_norm": 0.5989502745504444, "learning_rate": 4.7826717688570504e-09, "loss": 0.465, "step": 7743 }, { "epoch": 0.99, "grad_norm": 0.7610246981313702, "learning_rate": 4.692872387059844e-09, "loss": 0.5602, "step": 7744 }, { "epoch": 0.99, "grad_norm": 0.7948919264223075, "learning_rate": 4.603923654025177e-09, "loss": 0.524, "step": 7745 }, { "epoch": 0.99, "grad_norm": 0.9546635423186289, "learning_rate": 4.515825584900935e-09, "loss": 0.6557, "step": 7746 }, { "epoch": 0.99, "grad_norm": 0.6425175540783005, "learning_rate": 4.428578194689004e-09, "loss": 0.4951, "step": 7747 }, { "epoch": 0.99, "grad_norm": 0.7739844154182344, "learning_rate": 4.3421814982463896e-09, "loss": 0.5958, "step": 7748 }, { "epoch": 0.99, "grad_norm": 0.7354734804398161, "learning_rate": 4.256635510286322e-09, "loss": 0.5468, "step": 7749 }, { "epoch": 0.99, "grad_norm": 0.7094949902187434, "learning_rate": 4.171940245375483e-09, "loss": 0.5637, "step": 7750 }, { "epoch": 0.99, "grad_norm": 0.9152832736516887, "learning_rate": 4.0880957179373345e-09, "loss": 0.6787, "step": 7751 }, { "epoch": 0.99, "grad_norm": 0.8337972076499247, "learning_rate": 4.005101942249345e-09, "loss": 0.6361, "step": 7752 }, { "epoch": 0.99, "grad_norm": 0.7116564328879181, "learning_rate": 3.922958932444654e-09, "loss": 0.5385, "step": 7753 }, { "epoch": 0.99, "grad_norm": 0.7469676186804468, "learning_rate": 3.841666702511515e-09, "loss": 0.5242, "step": 7754 }, { "epoch": 0.99, "grad_norm": 0.6889087565562085, "learning_rate": 3.761225266292745e-09, "loss": 0.5099, "step": 7755 }, { "epoch": 0.99, "grad_norm": 0.8503511104789901, "learning_rate": 3.6816346374868307e-09, "loss": 0.5719, "step": 7756 }, { "epoch": 0.99, "grad_norm": 0.8357174587071159, "learning_rate": 3.602894829647374e-09, "loss": 0.5505, "step": 7757 }, { "epoch": 0.99, "grad_norm": 0.7224718547138523, "learning_rate": 3.5250058561825397e-09, "loss": 0.5524, "step": 7758 }, { "epoch": 0.99, "grad_norm": 0.6978070506328778, "learning_rate": 3.447967730356716e-09, "loss": 0.4938, "step": 7759 }, { "epoch": 0.99, "grad_norm": 0.88907344584442, "learning_rate": 3.371780465288299e-09, "loss": 0.5532, "step": 7760 }, { "epoch": 0.99, "grad_norm": 0.8917303071815923, "learning_rate": 3.2964440739508e-09, "loss": 0.6157, "step": 7761 }, { "epoch": 0.99, "grad_norm": 0.6840176788264855, "learning_rate": 3.221958569173955e-09, "loss": 0.5724, "step": 7762 }, { "epoch": 0.99, "grad_norm": 0.7945173049507899, "learning_rate": 3.148323963641509e-09, "loss": 0.5738, "step": 7763 }, { "epoch": 0.99, "grad_norm": 0.6977984037923312, "learning_rate": 3.0755402698923185e-09, "loss": 0.5739, "step": 7764 }, { "epoch": 0.99, "grad_norm": 0.8102152513572651, "learning_rate": 3.0036075003209152e-09, "loss": 0.5876, "step": 7765 }, { "epoch": 0.99, "grad_norm": 0.8342865402711346, "learning_rate": 2.9325256671774992e-09, "loss": 0.5828, "step": 7766 }, { "epoch": 0.99, "grad_norm": 0.8904558550414632, "learning_rate": 2.862294782565167e-09, "loss": 0.6963, "step": 7767 }, { "epoch": 0.99, "grad_norm": 0.8758157724870405, "learning_rate": 2.792914858444906e-09, "loss": 0.5964, "step": 7768 }, { "epoch": 0.99, "grad_norm": 0.7425124539882405, "learning_rate": 2.7243859066306e-09, "loss": 0.5852, "step": 7769 }, { "epoch": 0.99, "grad_norm": 0.7460310516973931, "learning_rate": 2.6567079387918026e-09, "loss": 0.5309, "step": 7770 }, { "epoch": 0.99, "grad_norm": 0.7889244025296429, "learning_rate": 2.5898809664537395e-09, "loss": 0.5286, "step": 7771 }, { "epoch": 0.99, "grad_norm": 0.8218427560324548, "learning_rate": 2.5239050009961962e-09, "loss": 0.5818, "step": 7772 }, { "epoch": 0.99, "grad_norm": 0.7004383419013205, "learning_rate": 2.4587800536546304e-09, "loss": 0.5161, "step": 7773 }, { "epoch": 0.99, "grad_norm": 0.8498689634209067, "learning_rate": 2.3945061355185038e-09, "loss": 0.626, "step": 7774 }, { "epoch": 0.99, "grad_norm": 0.794196644507347, "learning_rate": 2.3310832575335065e-09, "loss": 0.5921, "step": 7775 }, { "epoch": 0.99, "grad_norm": 0.7973033067988463, "learning_rate": 2.2685114304998868e-09, "loss": 0.637, "step": 7776 }, { "epoch": 0.99, "grad_norm": 0.8177026760959344, "learning_rate": 2.2067906650724556e-09, "loss": 0.6068, "step": 7777 }, { "epoch": 0.99, "grad_norm": 0.8371703718276913, "learning_rate": 2.14592097176225e-09, "loss": 0.5878, "step": 7778 }, { "epoch": 0.99, "grad_norm": 0.7940912155992044, "learning_rate": 2.085902360934311e-09, "loss": 0.5465, "step": 7779 }, { "epoch": 0.99, "grad_norm": 0.6542199991423644, "learning_rate": 2.0267348428087974e-09, "loss": 0.5269, "step": 7780 }, { "epoch": 0.99, "grad_norm": 0.8336999965186498, "learning_rate": 1.9684184274626482e-09, "loss": 0.6512, "step": 7781 }, { "epoch": 0.99, "grad_norm": 0.7203921560481334, "learning_rate": 1.910953124825143e-09, "loss": 0.5767, "step": 7782 }, { "epoch": 0.99, "grad_norm": 0.7152695576658792, "learning_rate": 1.8543389446828986e-09, "loss": 0.5201, "step": 7783 }, { "epoch": 0.99, "grad_norm": 0.6492815330947064, "learning_rate": 1.7985758966759802e-09, "loss": 0.49, "step": 7784 }, { "epoch": 0.99, "grad_norm": 0.672969385685014, "learning_rate": 1.7436639903012364e-09, "loss": 0.5268, "step": 7785 }, { "epoch": 0.99, "grad_norm": 0.870950256196372, "learning_rate": 1.6896032349089653e-09, "loss": 0.6033, "step": 7786 }, { "epoch": 0.99, "grad_norm": 0.8307803805180253, "learning_rate": 1.636393639705136e-09, "loss": 0.6131, "step": 7787 }, { "epoch": 0.99, "grad_norm": 0.685915269680486, "learning_rate": 1.5840352137508342e-09, "loss": 0.5046, "step": 7788 }, { "epoch": 0.99, "grad_norm": 1.2242661160829826, "learning_rate": 1.5325279659622605e-09, "loss": 0.6561, "step": 7789 }, { "epoch": 0.99, "grad_norm": 0.8246040025098534, "learning_rate": 1.4818719051107323e-09, "loss": 0.5787, "step": 7790 }, { "epoch": 0.99, "grad_norm": 0.8654962246155131, "learning_rate": 1.432067039822127e-09, "loss": 0.6085, "step": 7791 }, { "epoch": 0.99, "grad_norm": 0.7440648543329675, "learning_rate": 1.3831133785774387e-09, "loss": 0.5854, "step": 7792 }, { "epoch": 0.99, "grad_norm": 0.6533064685289787, "learning_rate": 1.3350109297138868e-09, "loss": 0.5154, "step": 7793 }, { "epoch": 0.99, "grad_norm": 0.6605468557127613, "learning_rate": 1.287759701421587e-09, "loss": 0.5663, "step": 7794 }, { "epoch": 0.99, "grad_norm": 0.6640633822597499, "learning_rate": 1.2413597017479907e-09, "loss": 0.4827, "step": 7795 }, { "epoch": 0.99, "grad_norm": 0.9687083555099592, "learning_rate": 1.1958109385945549e-09, "loss": 0.6545, "step": 7796 }, { "epoch": 0.99, "grad_norm": 0.9359954040840036, "learning_rate": 1.1511134197167429e-09, "loss": 0.6436, "step": 7797 }, { "epoch": 0.99, "grad_norm": 0.8085712408662021, "learning_rate": 1.1072671527267986e-09, "loss": 0.5153, "step": 7798 }, { "epoch": 0.99, "grad_norm": 0.7900497162608829, "learning_rate": 1.0642721450915272e-09, "loss": 0.57, "step": 7799 }, { "epoch": 0.99, "grad_norm": 1.0073835613094655, "learning_rate": 1.0221284041317392e-09, "loss": 0.6231, "step": 7800 }, { "epoch": 0.99, "grad_norm": 0.7447281939135997, "learning_rate": 9.808359370244713e-10, "loss": 0.5375, "step": 7801 }, { "epoch": 0.99, "grad_norm": 1.2691562437176505, "learning_rate": 9.403947508018763e-10, "loss": 0.5765, "step": 7802 }, { "epoch": 0.99, "grad_norm": 0.603492045493172, "learning_rate": 9.008048523501123e-10, "loss": 0.4676, "step": 7803 }, { "epoch": 0.99, "grad_norm": 0.8399561608212126, "learning_rate": 8.620662484110087e-10, "loss": 0.6406, "step": 7804 }, { "epoch": 0.99, "grad_norm": 0.6660470559642359, "learning_rate": 8.241789455809557e-10, "loss": 0.487, "step": 7805 }, { "epoch": 0.99, "grad_norm": 0.7526997649275916, "learning_rate": 7.871429503125694e-10, "loss": 0.5727, "step": 7806 }, { "epoch": 0.99, "grad_norm": 0.8386335849922518, "learning_rate": 7.509582689124717e-10, "loss": 0.5359, "step": 7807 }, { "epoch": 0.99, "grad_norm": 0.8791408182547873, "learning_rate": 7.156249075424004e-10, "loss": 0.5908, "step": 7808 }, { "epoch": 0.99, "grad_norm": 0.8814804310839345, "learning_rate": 6.811428722186542e-10, "loss": 0.5871, "step": 7809 }, { "epoch": 0.99, "grad_norm": 0.8069524659368271, "learning_rate": 6.475121688143127e-10, "loss": 0.5866, "step": 7810 }, { "epoch": 1.0, "grad_norm": 1.0796992262489185, "learning_rate": 6.147328030553512e-10, "loss": 0.6439, "step": 7811 }, { "epoch": 1.0, "grad_norm": 0.6471025574360484, "learning_rate": 5.828047805245263e-10, "loss": 0.5278, "step": 7812 }, { "epoch": 1.0, "grad_norm": 0.7213080347264894, "learning_rate": 5.517281066586e-10, "loss": 0.5464, "step": 7813 }, { "epoch": 1.0, "grad_norm": 0.7544696732992185, "learning_rate": 5.215027867488954e-10, "loss": 0.5973, "step": 7814 }, { "epoch": 1.0, "grad_norm": 0.7980019980413136, "learning_rate": 4.921288259435164e-10, "loss": 0.5612, "step": 7815 }, { "epoch": 1.0, "grad_norm": 0.7209293917073583, "learning_rate": 4.63606229244018e-10, "loss": 0.5556, "step": 7816 }, { "epoch": 1.0, "grad_norm": 0.6936144025753888, "learning_rate": 4.359350015076258e-10, "loss": 0.5407, "step": 7817 }, { "epoch": 1.0, "grad_norm": 0.8459246286115272, "learning_rate": 4.0911514744668146e-10, "loss": 0.6508, "step": 7818 }, { "epoch": 1.0, "grad_norm": 0.728279364160396, "learning_rate": 3.8314667162753226e-10, "loss": 0.5827, "step": 7819 }, { "epoch": 1.0, "grad_norm": 0.6240523422549289, "learning_rate": 3.5802957847330675e-10, "loss": 0.5268, "step": 7820 }, { "epoch": 1.0, "grad_norm": 1.1505071053576004, "learning_rate": 3.33763872260584e-10, "loss": 0.6181, "step": 7821 }, { "epoch": 1.0, "grad_norm": 0.781740776424288, "learning_rate": 3.103495571216142e-10, "loss": 0.6263, "step": 7822 }, { "epoch": 1.0, "grad_norm": 0.7154723188236493, "learning_rate": 2.877866370443183e-10, "loss": 0.5604, "step": 7823 }, { "epoch": 1.0, "grad_norm": 0.7804506360670698, "learning_rate": 2.6607511586951295e-10, "loss": 0.531, "step": 7824 }, { "epoch": 1.0, "grad_norm": 0.6245231106333156, "learning_rate": 2.4521499729590617e-10, "loss": 0.4899, "step": 7825 }, { "epoch": 1.0, "grad_norm": 0.8060527603216245, "learning_rate": 2.252062848745462e-10, "loss": 0.6094, "step": 7826 }, { "epoch": 1.0, "grad_norm": 0.7712624981343347, "learning_rate": 2.0604898201381784e-10, "loss": 0.5505, "step": 7827 }, { "epoch": 1.0, "grad_norm": 0.8325203915473648, "learning_rate": 1.8774309197500117e-10, "loss": 0.5425, "step": 7828 }, { "epoch": 1.0, "grad_norm": 0.6362454048618623, "learning_rate": 1.7028861787615757e-10, "loss": 0.5784, "step": 7829 }, { "epoch": 1.0, "grad_norm": 0.9404219665387779, "learning_rate": 1.5368556268935407e-10, "loss": 0.6056, "step": 7830 }, { "epoch": 1.0, "grad_norm": 0.65911909680477, "learning_rate": 1.3793392924177362e-10, "loss": 0.5173, "step": 7831 }, { "epoch": 1.0, "grad_norm": 1.0428669071958223, "learning_rate": 1.2303372021571502e-10, "loss": 0.6511, "step": 7832 }, { "epoch": 1.0, "grad_norm": 0.7835975398135873, "learning_rate": 1.0898493814859301e-10, "loss": 0.5284, "step": 7833 }, { "epoch": 1.0, "grad_norm": 0.6754899777119601, "learning_rate": 9.57875854329382e-11, "loss": 0.5642, "step": 7834 }, { "epoch": 1.0, "grad_norm": 0.7817867229811551, "learning_rate": 8.34416643163971e-11, "loss": 0.5251, "step": 7835 }, { "epoch": 1.0, "grad_norm": 0.7628512553475276, "learning_rate": 7.194717690062191e-11, "loss": 0.5022, "step": 7836 }, { "epoch": 1.0, "grad_norm": 0.8942812694349074, "learning_rate": 6.130412514349094e-11, "loss": 0.6451, "step": 7837 }, { "epoch": 1.0, "grad_norm": 0.7026924685199204, "learning_rate": 5.151251085744324e-11, "loss": 0.5829, "step": 7838 }, { "epoch": 1.0, "grad_norm": 0.8938957575622153, "learning_rate": 4.2572335709478717e-11, "loss": 0.6132, "step": 7839 }, { "epoch": 1.0, "grad_norm": 0.6940844492508118, "learning_rate": 3.4483601222268237e-11, "loss": 0.5458, "step": 7840 }, { "epoch": 1.0, "grad_norm": 0.7017763326458094, "learning_rate": 2.724630877304346e-11, "loss": 0.538, "step": 7841 }, { "epoch": 1.0, "grad_norm": 0.7365141654995742, "learning_rate": 2.0860459594707062e-11, "loss": 0.5385, "step": 7842 }, { "epoch": 1.0, "grad_norm": 0.7958613214386577, "learning_rate": 1.532605477472249e-11, "loss": 0.5561, "step": 7843 }, { "epoch": 1.0, "grad_norm": 0.7231877128825722, "learning_rate": 1.0643095255113978e-11, "loss": 0.5354, "step": 7844 }, { "epoch": 1.0, "grad_norm": 0.9451208547386291, "learning_rate": 6.811581833021663e-12, "loss": 0.6171, "step": 7845 }, { "epoch": 1.0, "grad_norm": 0.9173825027155778, "learning_rate": 3.831515161811794e-12, "loss": 0.5922, "step": 7846 }, { "epoch": 1.0, "grad_norm": 0.748525232596623, "learning_rate": 1.702895748301181e-12, "loss": 0.5628, "step": 7847 }, { "epoch": 1.0, "grad_norm": 0.6750411804468949, "learning_rate": 4.257239549776415e-13, "loss": 0.4978, "step": 7848 }, { "epoch": 1.0, "grad_norm": 0.8171339809221412, "learning_rate": 0.0, "loss": 0.5818, "step": 7849 }, { "epoch": 1.0, "step": 7849, "total_flos": 4047659296358400.0, "train_loss": 0.5952112748412087, "train_runtime": 40810.2508, "train_samples_per_second": 24.62, "train_steps_per_second": 0.192 } ], "logging_steps": 1.0, "max_steps": 7849, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 4047659296358400.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }