{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.005163240866168, "eval_steps": 500, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000628227025541355, "grad_norm": 37.5, "learning_rate": 2.5000000000000004e-07, "loss": 1.9002, "step": 2 }, { "epoch": 0.00125645405108271, "grad_norm": 5.65625, "learning_rate": 5.000000000000001e-07, "loss": 1.6234, "step": 4 }, { "epoch": 0.001884681076624065, "grad_norm": 5.15625, "learning_rate": 7.5e-07, "loss": 1.6223, "step": 6 }, { "epoch": 0.00251290810216542, "grad_norm": 4.8125, "learning_rate": 1.0000000000000002e-06, "loss": 1.6607, "step": 8 }, { "epoch": 0.003141135127706775, "grad_norm": 5.03125, "learning_rate": 1.25e-06, "loss": 1.6644, "step": 10 }, { "epoch": 0.00376936215324813, "grad_norm": 5.71875, "learning_rate": 1.5e-06, "loss": 1.8307, "step": 12 }, { "epoch": 0.004397589178789485, "grad_norm": 5.28125, "learning_rate": 1.75e-06, "loss": 1.623, "step": 14 }, { "epoch": 0.00502581620433084, "grad_norm": 3.625, "learning_rate": 2.0000000000000003e-06, "loss": 1.6915, "step": 16 }, { "epoch": 0.005654043229872195, "grad_norm": 3.609375, "learning_rate": 2.25e-06, "loss": 1.8222, "step": 18 }, { "epoch": 0.00628227025541355, "grad_norm": 3.21875, "learning_rate": 2.5e-06, "loss": 1.6702, "step": 20 }, { "epoch": 0.006910497280954905, "grad_norm": 2.140625, "learning_rate": 2.7500000000000004e-06, "loss": 1.6118, "step": 22 }, { "epoch": 0.00753872430649626, "grad_norm": 2.890625, "learning_rate": 3e-06, "loss": 1.6499, "step": 24 }, { "epoch": 0.008166951332037615, "grad_norm": 2.421875, "learning_rate": 3.2500000000000002e-06, "loss": 1.4785, "step": 26 }, { "epoch": 0.00879517835757897, "grad_norm": 2.421875, "learning_rate": 3.5e-06, "loss": 1.7235, "step": 28 }, { "epoch": 0.009423405383120325, "grad_norm": 2.25, "learning_rate": 3.7500000000000005e-06, "loss": 1.6245, "step": 30 }, { "epoch": 0.01005163240866168, "grad_norm": 1.6640625, "learning_rate": 4.000000000000001e-06, "loss": 1.5486, "step": 32 }, { "epoch": 0.010679859434203035, "grad_norm": 2.09375, "learning_rate": 4.25e-06, "loss": 1.4448, "step": 34 }, { "epoch": 0.01130808645974439, "grad_norm": 1.5078125, "learning_rate": 4.5e-06, "loss": 1.48, "step": 36 }, { "epoch": 0.011936313485285744, "grad_norm": 1.6796875, "learning_rate": 4.75e-06, "loss": 1.5128, "step": 38 }, { "epoch": 0.0125645405108271, "grad_norm": 1.5234375, "learning_rate": 5e-06, "loss": 1.4774, "step": 40 }, { "epoch": 0.013192767536368456, "grad_norm": 1.546875, "learning_rate": 5.2500000000000006e-06, "loss": 1.3692, "step": 42 }, { "epoch": 0.01382099456190981, "grad_norm": 1.34375, "learning_rate": 5.500000000000001e-06, "loss": 1.5056, "step": 44 }, { "epoch": 0.014449221587451166, "grad_norm": 1.21875, "learning_rate": 5.75e-06, "loss": 1.4744, "step": 46 }, { "epoch": 0.01507744861299252, "grad_norm": 1.0078125, "learning_rate": 6e-06, "loss": 1.5776, "step": 48 }, { "epoch": 0.015705675638533874, "grad_norm": 1.1015625, "learning_rate": 6.25e-06, "loss": 1.486, "step": 50 }, { "epoch": 0.01633390266407523, "grad_norm": 0.85546875, "learning_rate": 6.5000000000000004e-06, "loss": 1.5541, "step": 52 }, { "epoch": 0.016962129689616585, "grad_norm": 0.984375, "learning_rate": 6.750000000000001e-06, "loss": 1.3999, "step": 54 }, { "epoch": 0.01759035671515794, "grad_norm": 0.96875, "learning_rate": 7e-06, "loss": 1.4455, "step": 56 }, { "epoch": 0.018218583740699294, "grad_norm": 1.0546875, "learning_rate": 7.25e-06, "loss": 1.5221, "step": 58 }, { "epoch": 0.01884681076624065, "grad_norm": 0.88671875, "learning_rate": 7.500000000000001e-06, "loss": 1.4798, "step": 60 }, { "epoch": 0.019475037791782005, "grad_norm": 0.9140625, "learning_rate": 7.75e-06, "loss": 1.4334, "step": 62 }, { "epoch": 0.02010326481732336, "grad_norm": 0.98046875, "learning_rate": 8.000000000000001e-06, "loss": 1.3293, "step": 64 }, { "epoch": 0.020731491842864717, "grad_norm": 0.83984375, "learning_rate": 8.25e-06, "loss": 1.4174, "step": 66 }, { "epoch": 0.02135971886840607, "grad_norm": 0.859375, "learning_rate": 8.5e-06, "loss": 1.4177, "step": 68 }, { "epoch": 0.021987945893947425, "grad_norm": 0.84765625, "learning_rate": 8.750000000000001e-06, "loss": 1.3708, "step": 70 }, { "epoch": 0.02261617291948878, "grad_norm": 0.90234375, "learning_rate": 9e-06, "loss": 1.4062, "step": 72 }, { "epoch": 0.023244399945030136, "grad_norm": 0.83984375, "learning_rate": 9.250000000000001e-06, "loss": 1.3829, "step": 74 }, { "epoch": 0.02387262697057149, "grad_norm": 0.9375, "learning_rate": 9.5e-06, "loss": 1.3441, "step": 76 }, { "epoch": 0.024500853996112845, "grad_norm": 0.90234375, "learning_rate": 9.75e-06, "loss": 1.5625, "step": 78 }, { "epoch": 0.0251290810216542, "grad_norm": 0.81640625, "learning_rate": 1e-05, "loss": 1.4504, "step": 80 }, { "epoch": 0.025757308047195556, "grad_norm": 0.83984375, "learning_rate": 1.025e-05, "loss": 1.4672, "step": 82 }, { "epoch": 0.026385535072736912, "grad_norm": 0.82421875, "learning_rate": 1.0500000000000001e-05, "loss": 1.4405, "step": 84 }, { "epoch": 0.027013762098278264, "grad_norm": 1.0546875, "learning_rate": 1.075e-05, "loss": 1.3557, "step": 86 }, { "epoch": 0.02764198912381962, "grad_norm": 0.90234375, "learning_rate": 1.1000000000000001e-05, "loss": 1.4309, "step": 88 }, { "epoch": 0.028270216149360976, "grad_norm": 0.78125, "learning_rate": 1.125e-05, "loss": 1.3528, "step": 90 }, { "epoch": 0.02889844317490233, "grad_norm": 0.96875, "learning_rate": 1.15e-05, "loss": 1.4093, "step": 92 }, { "epoch": 0.029526670200443684, "grad_norm": 0.87890625, "learning_rate": 1.1750000000000001e-05, "loss": 1.4324, "step": 94 }, { "epoch": 0.03015489722598504, "grad_norm": 0.875, "learning_rate": 1.2e-05, "loss": 1.4622, "step": 96 }, { "epoch": 0.030783124251526395, "grad_norm": 0.85546875, "learning_rate": 1.2250000000000001e-05, "loss": 1.5166, "step": 98 }, { "epoch": 0.03141135127706775, "grad_norm": 0.76953125, "learning_rate": 1.25e-05, "loss": 1.4729, "step": 100 }, { "epoch": 0.032039578302609104, "grad_norm": 0.8828125, "learning_rate": 1.275e-05, "loss": 1.4201, "step": 102 }, { "epoch": 0.03266780532815046, "grad_norm": 0.97265625, "learning_rate": 1.3000000000000001e-05, "loss": 1.3646, "step": 104 }, { "epoch": 0.033296032353691815, "grad_norm": 0.859375, "learning_rate": 1.325e-05, "loss": 1.3105, "step": 106 }, { "epoch": 0.03392425937923317, "grad_norm": 0.78125, "learning_rate": 1.3500000000000001e-05, "loss": 1.5302, "step": 108 }, { "epoch": 0.03455248640477453, "grad_norm": 0.875, "learning_rate": 1.375e-05, "loss": 1.3979, "step": 110 }, { "epoch": 0.03518071343031588, "grad_norm": 0.796875, "learning_rate": 1.4e-05, "loss": 1.3961, "step": 112 }, { "epoch": 0.03580894045585724, "grad_norm": 0.796875, "learning_rate": 1.425e-05, "loss": 1.3645, "step": 114 }, { "epoch": 0.03643716748139859, "grad_norm": 0.7421875, "learning_rate": 1.45e-05, "loss": 1.306, "step": 116 }, { "epoch": 0.03706539450693994, "grad_norm": 0.8828125, "learning_rate": 1.4750000000000003e-05, "loss": 1.3799, "step": 118 }, { "epoch": 0.0376936215324813, "grad_norm": 0.73828125, "learning_rate": 1.5000000000000002e-05, "loss": 1.3281, "step": 120 }, { "epoch": 0.038321848558022654, "grad_norm": 0.87890625, "learning_rate": 1.525e-05, "loss": 1.4052, "step": 122 }, { "epoch": 0.03895007558356401, "grad_norm": 0.8203125, "learning_rate": 1.55e-05, "loss": 1.4946, "step": 124 }, { "epoch": 0.039578302609105366, "grad_norm": 0.80859375, "learning_rate": 1.575e-05, "loss": 1.4292, "step": 126 }, { "epoch": 0.04020652963464672, "grad_norm": 1.0078125, "learning_rate": 1.6000000000000003e-05, "loss": 1.4858, "step": 128 }, { "epoch": 0.04083475666018808, "grad_norm": 0.9765625, "learning_rate": 1.6250000000000002e-05, "loss": 1.2745, "step": 130 }, { "epoch": 0.04146298368572943, "grad_norm": 0.8046875, "learning_rate": 1.65e-05, "loss": 1.4684, "step": 132 }, { "epoch": 0.04209121071127078, "grad_norm": 0.80859375, "learning_rate": 1.675e-05, "loss": 1.4275, "step": 134 }, { "epoch": 0.04271943773681214, "grad_norm": 0.90625, "learning_rate": 1.7e-05, "loss": 1.2831, "step": 136 }, { "epoch": 0.043347664762353494, "grad_norm": 0.953125, "learning_rate": 1.7250000000000003e-05, "loss": 1.445, "step": 138 }, { "epoch": 0.04397589178789485, "grad_norm": 0.80859375, "learning_rate": 1.7500000000000002e-05, "loss": 1.3457, "step": 140 }, { "epoch": 0.044604118813436205, "grad_norm": 0.8046875, "learning_rate": 1.775e-05, "loss": 1.3961, "step": 142 }, { "epoch": 0.04523234583897756, "grad_norm": 0.75, "learning_rate": 1.8e-05, "loss": 1.2985, "step": 144 }, { "epoch": 0.04586057286451892, "grad_norm": 0.81640625, "learning_rate": 1.825e-05, "loss": 1.3075, "step": 146 }, { "epoch": 0.04648879989006027, "grad_norm": 0.76953125, "learning_rate": 1.8500000000000002e-05, "loss": 1.3602, "step": 148 }, { "epoch": 0.04711702691560163, "grad_norm": 0.8828125, "learning_rate": 1.8750000000000002e-05, "loss": 1.4481, "step": 150 }, { "epoch": 0.04774525394114298, "grad_norm": 0.80078125, "learning_rate": 1.9e-05, "loss": 1.409, "step": 152 }, { "epoch": 0.04837348096668433, "grad_norm": 0.80859375, "learning_rate": 1.925e-05, "loss": 1.357, "step": 154 }, { "epoch": 0.04900170799222569, "grad_norm": 0.77734375, "learning_rate": 1.95e-05, "loss": 1.2841, "step": 156 }, { "epoch": 0.049629935017767045, "grad_norm": 0.7421875, "learning_rate": 1.9750000000000002e-05, "loss": 1.4336, "step": 158 }, { "epoch": 0.0502581620433084, "grad_norm": 0.9609375, "learning_rate": 2e-05, "loss": 1.3853, "step": 160 }, { "epoch": 0.050886389068849756, "grad_norm": 0.89453125, "learning_rate": 1.9997461123452876e-05, "loss": 1.3465, "step": 162 }, { "epoch": 0.05151461609439111, "grad_norm": 0.84375, "learning_rate": 1.9994922246905744e-05, "loss": 1.3669, "step": 164 }, { "epoch": 0.05214284311993247, "grad_norm": 0.7421875, "learning_rate": 1.999238337035862e-05, "loss": 1.6046, "step": 166 }, { "epoch": 0.052771070145473824, "grad_norm": 0.81640625, "learning_rate": 1.998984449381149e-05, "loss": 1.2993, "step": 168 }, { "epoch": 0.05339929717101517, "grad_norm": 0.79296875, "learning_rate": 1.9987305617264362e-05, "loss": 1.4495, "step": 170 }, { "epoch": 0.05402752419655653, "grad_norm": 0.7734375, "learning_rate": 1.9984766740717233e-05, "loss": 1.3141, "step": 172 }, { "epoch": 0.054655751222097884, "grad_norm": 0.71484375, "learning_rate": 1.9982227864170108e-05, "loss": 1.4852, "step": 174 }, { "epoch": 0.05528397824763924, "grad_norm": 0.875, "learning_rate": 1.997968898762298e-05, "loss": 1.4228, "step": 176 }, { "epoch": 0.055912205273180596, "grad_norm": 0.80078125, "learning_rate": 1.997715011107585e-05, "loss": 1.5617, "step": 178 }, { "epoch": 0.05654043229872195, "grad_norm": 0.82421875, "learning_rate": 1.9974611234528722e-05, "loss": 1.2938, "step": 180 }, { "epoch": 0.05716865932426331, "grad_norm": 0.7421875, "learning_rate": 1.9972072357981597e-05, "loss": 1.469, "step": 182 }, { "epoch": 0.05779688634980466, "grad_norm": 0.82421875, "learning_rate": 1.9969533481434465e-05, "loss": 1.41, "step": 184 }, { "epoch": 0.05842511337534602, "grad_norm": 0.7265625, "learning_rate": 1.996699460488734e-05, "loss": 1.4414, "step": 186 }, { "epoch": 0.05905334040088737, "grad_norm": 0.8828125, "learning_rate": 1.996445572834021e-05, "loss": 1.2717, "step": 188 }, { "epoch": 0.059681567426428724, "grad_norm": 0.7890625, "learning_rate": 1.9961916851793083e-05, "loss": 1.3179, "step": 190 }, { "epoch": 0.06030979445197008, "grad_norm": 0.88671875, "learning_rate": 1.9959377975245954e-05, "loss": 1.4353, "step": 192 }, { "epoch": 0.060938021477511435, "grad_norm": 0.79296875, "learning_rate": 1.995683909869883e-05, "loss": 1.4721, "step": 194 }, { "epoch": 0.06156624850305279, "grad_norm": 0.7890625, "learning_rate": 1.99543002221517e-05, "loss": 1.4394, "step": 196 }, { "epoch": 0.06219447552859415, "grad_norm": 0.765625, "learning_rate": 1.995176134560457e-05, "loss": 1.4004, "step": 198 }, { "epoch": 0.0628227025541355, "grad_norm": 0.703125, "learning_rate": 1.9949222469057443e-05, "loss": 1.3159, "step": 200 }, { "epoch": 0.06345092957967685, "grad_norm": 0.7890625, "learning_rate": 1.9946683592510318e-05, "loss": 1.408, "step": 202 }, { "epoch": 0.06407915660521821, "grad_norm": 0.77734375, "learning_rate": 1.994414471596319e-05, "loss": 1.362, "step": 204 }, { "epoch": 0.06470738363075956, "grad_norm": 0.7421875, "learning_rate": 1.994160583941606e-05, "loss": 1.321, "step": 206 }, { "epoch": 0.06533561065630092, "grad_norm": 0.76953125, "learning_rate": 1.9939066962868932e-05, "loss": 1.3576, "step": 208 }, { "epoch": 0.06596383768184227, "grad_norm": 0.77734375, "learning_rate": 1.9936528086321803e-05, "loss": 1.4552, "step": 210 }, { "epoch": 0.06659206470738363, "grad_norm": 0.875, "learning_rate": 1.9933989209774675e-05, "loss": 1.3144, "step": 212 }, { "epoch": 0.06722029173292499, "grad_norm": 0.8515625, "learning_rate": 1.993145033322755e-05, "loss": 1.3575, "step": 214 }, { "epoch": 0.06784851875846634, "grad_norm": 0.828125, "learning_rate": 1.992891145668042e-05, "loss": 1.25, "step": 216 }, { "epoch": 0.0684767457840077, "grad_norm": 0.75, "learning_rate": 1.9926372580133292e-05, "loss": 1.4611, "step": 218 }, { "epoch": 0.06910497280954905, "grad_norm": 0.73828125, "learning_rate": 1.9923833703586163e-05, "loss": 1.2994, "step": 220 }, { "epoch": 0.06973319983509041, "grad_norm": 0.9375, "learning_rate": 1.9921294827039038e-05, "loss": 1.2697, "step": 222 }, { "epoch": 0.07036142686063176, "grad_norm": 0.81640625, "learning_rate": 1.991875595049191e-05, "loss": 1.4699, "step": 224 }, { "epoch": 0.07098965388617312, "grad_norm": 0.92578125, "learning_rate": 1.991621707394478e-05, "loss": 1.4276, "step": 226 }, { "epoch": 0.07161788091171448, "grad_norm": 0.74609375, "learning_rate": 1.9913678197397652e-05, "loss": 1.4052, "step": 228 }, { "epoch": 0.07224610793725583, "grad_norm": 0.8203125, "learning_rate": 1.9911139320850527e-05, "loss": 1.3744, "step": 230 }, { "epoch": 0.07287433496279717, "grad_norm": 0.87109375, "learning_rate": 1.9908600444303395e-05, "loss": 1.363, "step": 232 }, { "epoch": 0.07350256198833853, "grad_norm": 0.859375, "learning_rate": 1.990606156775627e-05, "loss": 1.421, "step": 234 }, { "epoch": 0.07413078901387989, "grad_norm": 0.76171875, "learning_rate": 1.990352269120914e-05, "loss": 1.5131, "step": 236 }, { "epoch": 0.07475901603942124, "grad_norm": 1.5625, "learning_rate": 1.9900983814662013e-05, "loss": 1.3155, "step": 238 }, { "epoch": 0.0753872430649626, "grad_norm": 0.78125, "learning_rate": 1.9898444938114884e-05, "loss": 1.3595, "step": 240 }, { "epoch": 0.07601547009050395, "grad_norm": 0.7890625, "learning_rate": 1.989590606156776e-05, "loss": 1.3706, "step": 242 }, { "epoch": 0.07664369711604531, "grad_norm": 0.72265625, "learning_rate": 1.989336718502063e-05, "loss": 1.3058, "step": 244 }, { "epoch": 0.07727192414158666, "grad_norm": 0.71484375, "learning_rate": 1.98908283084735e-05, "loss": 1.3404, "step": 246 }, { "epoch": 0.07790015116712802, "grad_norm": 0.9453125, "learning_rate": 1.9888289431926376e-05, "loss": 1.235, "step": 248 }, { "epoch": 0.07852837819266938, "grad_norm": 0.84765625, "learning_rate": 1.9885750555379248e-05, "loss": 1.3668, "step": 250 }, { "epoch": 0.07915660521821073, "grad_norm": 0.71875, "learning_rate": 1.988321167883212e-05, "loss": 1.3602, "step": 252 }, { "epoch": 0.07978483224375209, "grad_norm": 0.828125, "learning_rate": 1.988067280228499e-05, "loss": 1.3833, "step": 254 }, { "epoch": 0.08041305926929344, "grad_norm": 0.796875, "learning_rate": 1.9878133925737865e-05, "loss": 1.4476, "step": 256 }, { "epoch": 0.0810412862948348, "grad_norm": 0.68359375, "learning_rate": 1.9875595049190733e-05, "loss": 1.4111, "step": 258 }, { "epoch": 0.08166951332037616, "grad_norm": 0.88671875, "learning_rate": 1.9873056172643608e-05, "loss": 1.3636, "step": 260 }, { "epoch": 0.08229774034591751, "grad_norm": 0.78515625, "learning_rate": 1.987051729609648e-05, "loss": 1.2524, "step": 262 }, { "epoch": 0.08292596737145887, "grad_norm": 0.9609375, "learning_rate": 1.986797841954935e-05, "loss": 1.4048, "step": 264 }, { "epoch": 0.08355419439700022, "grad_norm": 0.7109375, "learning_rate": 1.9865439543002222e-05, "loss": 1.3619, "step": 266 }, { "epoch": 0.08418242142254156, "grad_norm": 0.796875, "learning_rate": 1.9862900666455097e-05, "loss": 1.4125, "step": 268 }, { "epoch": 0.08481064844808292, "grad_norm": 0.6875, "learning_rate": 1.986036178990797e-05, "loss": 1.4536, "step": 270 }, { "epoch": 0.08543887547362428, "grad_norm": 0.8125, "learning_rate": 1.985782291336084e-05, "loss": 1.392, "step": 272 }, { "epoch": 0.08606710249916563, "grad_norm": 0.77734375, "learning_rate": 1.985528403681371e-05, "loss": 1.393, "step": 274 }, { "epoch": 0.08669532952470699, "grad_norm": 0.91015625, "learning_rate": 1.9852745160266586e-05, "loss": 1.3635, "step": 276 }, { "epoch": 0.08732355655024834, "grad_norm": 0.75, "learning_rate": 1.9850206283719454e-05, "loss": 1.4626, "step": 278 }, { "epoch": 0.0879517835757897, "grad_norm": 0.8671875, "learning_rate": 1.984766740717233e-05, "loss": 1.3507, "step": 280 }, { "epoch": 0.08858001060133106, "grad_norm": 0.83203125, "learning_rate": 1.98451285306252e-05, "loss": 1.4432, "step": 282 }, { "epoch": 0.08920823762687241, "grad_norm": 0.83203125, "learning_rate": 1.984258965407807e-05, "loss": 1.3932, "step": 284 }, { "epoch": 0.08983646465241377, "grad_norm": 0.8203125, "learning_rate": 1.9840050777530943e-05, "loss": 1.391, "step": 286 }, { "epoch": 0.09046469167795512, "grad_norm": 0.7109375, "learning_rate": 1.9837511900983818e-05, "loss": 1.4163, "step": 288 }, { "epoch": 0.09109291870349648, "grad_norm": 1.171875, "learning_rate": 1.983497302443669e-05, "loss": 1.4135, "step": 290 }, { "epoch": 0.09172114572903783, "grad_norm": 0.8515625, "learning_rate": 1.983243414788956e-05, "loss": 1.4099, "step": 292 }, { "epoch": 0.09234937275457919, "grad_norm": 0.76171875, "learning_rate": 1.982989527134243e-05, "loss": 1.2512, "step": 294 }, { "epoch": 0.09297759978012055, "grad_norm": 0.734375, "learning_rate": 1.9827356394795306e-05, "loss": 1.255, "step": 296 }, { "epoch": 0.0936058268056619, "grad_norm": 0.87109375, "learning_rate": 1.9824817518248174e-05, "loss": 1.2295, "step": 298 }, { "epoch": 0.09423405383120326, "grad_norm": 0.765625, "learning_rate": 1.982227864170105e-05, "loss": 1.4514, "step": 300 }, { "epoch": 0.09486228085674461, "grad_norm": 0.8828125, "learning_rate": 1.981973976515392e-05, "loss": 1.3137, "step": 302 }, { "epoch": 0.09549050788228595, "grad_norm": 0.86328125, "learning_rate": 1.9817200888606792e-05, "loss": 1.3511, "step": 304 }, { "epoch": 0.09611873490782731, "grad_norm": 0.85546875, "learning_rate": 1.9814662012059663e-05, "loss": 1.3035, "step": 306 }, { "epoch": 0.09674696193336867, "grad_norm": 0.734375, "learning_rate": 1.9812123135512538e-05, "loss": 1.4151, "step": 308 }, { "epoch": 0.09737518895891002, "grad_norm": 0.79296875, "learning_rate": 1.980958425896541e-05, "loss": 1.3819, "step": 310 }, { "epoch": 0.09800341598445138, "grad_norm": 0.76953125, "learning_rate": 1.980704538241828e-05, "loss": 1.3212, "step": 312 }, { "epoch": 0.09863164300999273, "grad_norm": 0.86328125, "learning_rate": 1.9804506505871152e-05, "loss": 1.4313, "step": 314 }, { "epoch": 0.09925987003553409, "grad_norm": 0.828125, "learning_rate": 1.9801967629324027e-05, "loss": 1.4021, "step": 316 }, { "epoch": 0.09988809706107545, "grad_norm": 0.7578125, "learning_rate": 1.97994287527769e-05, "loss": 1.3191, "step": 318 }, { "epoch": 0.1005163240866168, "grad_norm": 0.734375, "learning_rate": 1.979688987622977e-05, "loss": 1.3138, "step": 320 }, { "epoch": 0.10114455111215816, "grad_norm": 0.78125, "learning_rate": 1.979435099968264e-05, "loss": 1.3881, "step": 322 }, { "epoch": 0.10177277813769951, "grad_norm": 0.7265625, "learning_rate": 1.9791812123135513e-05, "loss": 1.1786, "step": 324 }, { "epoch": 0.10240100516324087, "grad_norm": 0.73828125, "learning_rate": 1.9789273246588384e-05, "loss": 1.246, "step": 326 }, { "epoch": 0.10302923218878222, "grad_norm": 0.8515625, "learning_rate": 1.978673437004126e-05, "loss": 1.359, "step": 328 }, { "epoch": 0.10365745921432358, "grad_norm": 0.91796875, "learning_rate": 1.978419549349413e-05, "loss": 1.271, "step": 330 }, { "epoch": 0.10428568623986494, "grad_norm": 0.75, "learning_rate": 1.9781656616947e-05, "loss": 1.4137, "step": 332 }, { "epoch": 0.10491391326540629, "grad_norm": 0.75390625, "learning_rate": 1.9779117740399876e-05, "loss": 1.3471, "step": 334 }, { "epoch": 0.10554214029094765, "grad_norm": 0.70703125, "learning_rate": 1.9776578863852748e-05, "loss": 1.4421, "step": 336 }, { "epoch": 0.10617036731648899, "grad_norm": 0.73828125, "learning_rate": 1.977403998730562e-05, "loss": 1.2823, "step": 338 }, { "epoch": 0.10679859434203035, "grad_norm": 0.76171875, "learning_rate": 1.977150111075849e-05, "loss": 1.463, "step": 340 }, { "epoch": 0.1074268213675717, "grad_norm": 0.74609375, "learning_rate": 1.9768962234211365e-05, "loss": 1.2987, "step": 342 }, { "epoch": 0.10805504839311306, "grad_norm": 0.88671875, "learning_rate": 1.9766423357664237e-05, "loss": 1.4113, "step": 344 }, { "epoch": 0.10868327541865441, "grad_norm": 0.7578125, "learning_rate": 1.9763884481117108e-05, "loss": 1.4153, "step": 346 }, { "epoch": 0.10931150244419577, "grad_norm": 0.703125, "learning_rate": 1.976134560456998e-05, "loss": 1.3976, "step": 348 }, { "epoch": 0.10993972946973712, "grad_norm": 0.78515625, "learning_rate": 1.975880672802285e-05, "loss": 1.395, "step": 350 }, { "epoch": 0.11056795649527848, "grad_norm": 0.83984375, "learning_rate": 1.9756267851475722e-05, "loss": 1.533, "step": 352 }, { "epoch": 0.11119618352081984, "grad_norm": 0.796875, "learning_rate": 1.9753728974928597e-05, "loss": 1.3265, "step": 354 }, { "epoch": 0.11182441054636119, "grad_norm": 0.76171875, "learning_rate": 1.9751190098381468e-05, "loss": 1.4088, "step": 356 }, { "epoch": 0.11245263757190255, "grad_norm": 0.8671875, "learning_rate": 1.974865122183434e-05, "loss": 1.4432, "step": 358 }, { "epoch": 0.1130808645974439, "grad_norm": 0.984375, "learning_rate": 1.974611234528721e-05, "loss": 1.2292, "step": 360 }, { "epoch": 0.11370909162298526, "grad_norm": 0.73828125, "learning_rate": 1.9743573468740086e-05, "loss": 1.3708, "step": 362 }, { "epoch": 0.11433731864852661, "grad_norm": 0.73046875, "learning_rate": 1.9741034592192957e-05, "loss": 1.2918, "step": 364 }, { "epoch": 0.11496554567406797, "grad_norm": 0.79296875, "learning_rate": 1.973849571564583e-05, "loss": 1.4335, "step": 366 }, { "epoch": 0.11559377269960933, "grad_norm": 0.78515625, "learning_rate": 1.97359568390987e-05, "loss": 1.2187, "step": 368 }, { "epoch": 0.11622199972515068, "grad_norm": 10.625, "learning_rate": 1.9733417962551575e-05, "loss": 1.2494, "step": 370 }, { "epoch": 0.11685022675069204, "grad_norm": 0.75, "learning_rate": 1.9730879086004443e-05, "loss": 1.2348, "step": 372 }, { "epoch": 0.11747845377623338, "grad_norm": 0.6953125, "learning_rate": 1.9728340209457317e-05, "loss": 1.3933, "step": 374 }, { "epoch": 0.11810668080177474, "grad_norm": 0.8515625, "learning_rate": 1.972580133291019e-05, "loss": 1.445, "step": 376 }, { "epoch": 0.11873490782731609, "grad_norm": 0.8515625, "learning_rate": 1.972326245636306e-05, "loss": 1.3521, "step": 378 }, { "epoch": 0.11936313485285745, "grad_norm": 0.69921875, "learning_rate": 1.972072357981593e-05, "loss": 1.4533, "step": 380 }, { "epoch": 0.1199913618783988, "grad_norm": 0.78515625, "learning_rate": 1.9718184703268806e-05, "loss": 1.2909, "step": 382 }, { "epoch": 0.12061958890394016, "grad_norm": 0.8203125, "learning_rate": 1.9715645826721678e-05, "loss": 1.5502, "step": 384 }, { "epoch": 0.12124781592948151, "grad_norm": 0.84765625, "learning_rate": 1.971310695017455e-05, "loss": 1.3525, "step": 386 }, { "epoch": 0.12187604295502287, "grad_norm": 0.796875, "learning_rate": 1.971056807362742e-05, "loss": 1.5028, "step": 388 }, { "epoch": 0.12250426998056423, "grad_norm": 6.21875, "learning_rate": 1.9708029197080295e-05, "loss": 1.3943, "step": 390 }, { "epoch": 0.12313249700610558, "grad_norm": 0.765625, "learning_rate": 1.9705490320533163e-05, "loss": 1.5042, "step": 392 }, { "epoch": 0.12376072403164694, "grad_norm": 0.79296875, "learning_rate": 1.9702951443986038e-05, "loss": 1.3527, "step": 394 }, { "epoch": 0.1243889510571883, "grad_norm": 0.734375, "learning_rate": 1.970041256743891e-05, "loss": 1.5268, "step": 396 }, { "epoch": 0.12501717808272964, "grad_norm": 0.77734375, "learning_rate": 1.969787369089178e-05, "loss": 1.2923, "step": 398 }, { "epoch": 0.125645405108271, "grad_norm": 0.85546875, "learning_rate": 1.9695334814344652e-05, "loss": 1.4865, "step": 400 }, { "epoch": 0.12627363213381235, "grad_norm": 0.828125, "learning_rate": 1.9692795937797527e-05, "loss": 1.2926, "step": 402 }, { "epoch": 0.1269018591593537, "grad_norm": 0.796875, "learning_rate": 1.96902570612504e-05, "loss": 1.3763, "step": 404 }, { "epoch": 0.12753008618489506, "grad_norm": 0.77734375, "learning_rate": 1.968771818470327e-05, "loss": 1.4208, "step": 406 }, { "epoch": 0.12815831321043641, "grad_norm": 0.8515625, "learning_rate": 1.968517930815614e-05, "loss": 1.2802, "step": 408 }, { "epoch": 0.12878654023597777, "grad_norm": 0.7578125, "learning_rate": 1.9682640431609016e-05, "loss": 1.3137, "step": 410 }, { "epoch": 0.12941476726151913, "grad_norm": 0.734375, "learning_rate": 1.9680101555061887e-05, "loss": 1.2313, "step": 412 }, { "epoch": 0.13004299428706048, "grad_norm": 0.73828125, "learning_rate": 1.967756267851476e-05, "loss": 1.3286, "step": 414 }, { "epoch": 0.13067122131260184, "grad_norm": 0.86328125, "learning_rate": 1.967502380196763e-05, "loss": 1.3544, "step": 416 }, { "epoch": 0.1312994483381432, "grad_norm": 0.796875, "learning_rate": 1.96724849254205e-05, "loss": 1.4726, "step": 418 }, { "epoch": 0.13192767536368455, "grad_norm": 0.71875, "learning_rate": 1.9669946048873376e-05, "loss": 1.3215, "step": 420 }, { "epoch": 0.1325559023892259, "grad_norm": 0.78515625, "learning_rate": 1.9667407172326248e-05, "loss": 1.5521, "step": 422 }, { "epoch": 0.13318412941476726, "grad_norm": 0.796875, "learning_rate": 1.966486829577912e-05, "loss": 1.3127, "step": 424 }, { "epoch": 0.13381235644030862, "grad_norm": 0.8359375, "learning_rate": 1.966232941923199e-05, "loss": 1.2696, "step": 426 }, { "epoch": 0.13444058346584997, "grad_norm": 0.8046875, "learning_rate": 1.9659790542684865e-05, "loss": 1.2138, "step": 428 }, { "epoch": 0.13506881049139133, "grad_norm": 0.7890625, "learning_rate": 1.9657251666137736e-05, "loss": 1.4204, "step": 430 }, { "epoch": 0.13569703751693268, "grad_norm": 0.75390625, "learning_rate": 1.9654712789590608e-05, "loss": 1.2865, "step": 432 }, { "epoch": 0.13632526454247404, "grad_norm": 0.73828125, "learning_rate": 1.965217391304348e-05, "loss": 1.2856, "step": 434 }, { "epoch": 0.1369534915680154, "grad_norm": 0.73046875, "learning_rate": 1.9649635036496354e-05, "loss": 1.4284, "step": 436 }, { "epoch": 0.13758171859355675, "grad_norm": 0.73828125, "learning_rate": 1.9647096159949225e-05, "loss": 1.3569, "step": 438 }, { "epoch": 0.1382099456190981, "grad_norm": 0.671875, "learning_rate": 1.9644557283402097e-05, "loss": 1.3295, "step": 440 }, { "epoch": 0.13883817264463946, "grad_norm": 0.7421875, "learning_rate": 1.9642018406854968e-05, "loss": 1.2948, "step": 442 }, { "epoch": 0.13946639967018082, "grad_norm": 0.79296875, "learning_rate": 1.963947953030784e-05, "loss": 1.4097, "step": 444 }, { "epoch": 0.14009462669572217, "grad_norm": 1.0703125, "learning_rate": 1.963694065376071e-05, "loss": 1.3674, "step": 446 }, { "epoch": 0.14072285372126353, "grad_norm": 0.8671875, "learning_rate": 1.9634401777213586e-05, "loss": 1.4544, "step": 448 }, { "epoch": 0.14135108074680489, "grad_norm": 0.72265625, "learning_rate": 1.9631862900666457e-05, "loss": 1.3385, "step": 450 }, { "epoch": 0.14197930777234624, "grad_norm": 0.75390625, "learning_rate": 1.962932402411933e-05, "loss": 1.3962, "step": 452 }, { "epoch": 0.1426075347978876, "grad_norm": 0.80078125, "learning_rate": 1.96267851475722e-05, "loss": 1.3257, "step": 454 }, { "epoch": 0.14323576182342895, "grad_norm": 0.8359375, "learning_rate": 1.9624246271025075e-05, "loss": 1.3572, "step": 456 }, { "epoch": 0.1438639888489703, "grad_norm": 0.73046875, "learning_rate": 1.9621707394477946e-05, "loss": 1.5115, "step": 458 }, { "epoch": 0.14449221587451166, "grad_norm": 0.7578125, "learning_rate": 1.9619168517930817e-05, "loss": 1.3532, "step": 460 }, { "epoch": 0.14512044290005302, "grad_norm": 0.8046875, "learning_rate": 1.961662964138369e-05, "loss": 1.3612, "step": 462 }, { "epoch": 0.14574866992559435, "grad_norm": 0.73828125, "learning_rate": 1.9614090764836564e-05, "loss": 1.3881, "step": 464 }, { "epoch": 0.1463768969511357, "grad_norm": 0.94140625, "learning_rate": 1.961155188828943e-05, "loss": 1.534, "step": 466 }, { "epoch": 0.14700512397667706, "grad_norm": 0.71484375, "learning_rate": 1.9609013011742306e-05, "loss": 1.4607, "step": 468 }, { "epoch": 0.14763335100221842, "grad_norm": 0.72265625, "learning_rate": 1.9606474135195178e-05, "loss": 1.3466, "step": 470 }, { "epoch": 0.14826157802775977, "grad_norm": 0.7109375, "learning_rate": 1.960393525864805e-05, "loss": 1.3187, "step": 472 }, { "epoch": 0.14888980505330113, "grad_norm": 0.77734375, "learning_rate": 1.960139638210092e-05, "loss": 1.36, "step": 474 }, { "epoch": 0.14951803207884248, "grad_norm": 0.859375, "learning_rate": 1.9598857505553795e-05, "loss": 1.2185, "step": 476 }, { "epoch": 0.15014625910438384, "grad_norm": 0.71484375, "learning_rate": 1.9596318629006667e-05, "loss": 1.4085, "step": 478 }, { "epoch": 0.1507744861299252, "grad_norm": 0.98046875, "learning_rate": 1.9593779752459538e-05, "loss": 1.3917, "step": 480 }, { "epoch": 0.15140271315546655, "grad_norm": 0.7421875, "learning_rate": 1.959124087591241e-05, "loss": 1.3497, "step": 482 }, { "epoch": 0.1520309401810079, "grad_norm": 0.76171875, "learning_rate": 1.9588701999365284e-05, "loss": 1.3855, "step": 484 }, { "epoch": 0.15265916720654926, "grad_norm": 0.81640625, "learning_rate": 1.9586163122818152e-05, "loss": 1.4071, "step": 486 }, { "epoch": 0.15328739423209062, "grad_norm": 0.80859375, "learning_rate": 1.9583624246271027e-05, "loss": 1.2817, "step": 488 }, { "epoch": 0.15391562125763197, "grad_norm": 0.75, "learning_rate": 1.9581085369723898e-05, "loss": 1.3758, "step": 490 }, { "epoch": 0.15454384828317333, "grad_norm": 0.78125, "learning_rate": 1.957854649317677e-05, "loss": 1.4021, "step": 492 }, { "epoch": 0.15517207530871469, "grad_norm": 0.75390625, "learning_rate": 1.957600761662964e-05, "loss": 1.4163, "step": 494 }, { "epoch": 0.15580030233425604, "grad_norm": 0.7890625, "learning_rate": 1.9573468740082516e-05, "loss": 1.3127, "step": 496 }, { "epoch": 0.1564285293597974, "grad_norm": 0.79296875, "learning_rate": 1.9570929863535387e-05, "loss": 1.2817, "step": 498 }, { "epoch": 0.15705675638533875, "grad_norm": 0.76171875, "learning_rate": 1.956839098698826e-05, "loss": 1.3561, "step": 500 }, { "epoch": 0.1576849834108801, "grad_norm": 0.76171875, "learning_rate": 1.956585211044113e-05, "loss": 1.317, "step": 502 }, { "epoch": 0.15831321043642146, "grad_norm": 1.3671875, "learning_rate": 1.9563313233894005e-05, "loss": 1.4507, "step": 504 }, { "epoch": 0.15894143746196282, "grad_norm": 1.0703125, "learning_rate": 1.9560774357346876e-05, "loss": 1.3715, "step": 506 }, { "epoch": 0.15956966448750418, "grad_norm": 0.765625, "learning_rate": 1.9558235480799747e-05, "loss": 1.3502, "step": 508 }, { "epoch": 0.16019789151304553, "grad_norm": 0.72265625, "learning_rate": 1.9555696604252622e-05, "loss": 1.4822, "step": 510 }, { "epoch": 0.1608261185385869, "grad_norm": 0.75, "learning_rate": 1.955315772770549e-05, "loss": 1.2966, "step": 512 }, { "epoch": 0.16145434556412824, "grad_norm": 0.80078125, "learning_rate": 1.9550618851158365e-05, "loss": 1.2823, "step": 514 }, { "epoch": 0.1620825725896696, "grad_norm": 0.87109375, "learning_rate": 1.9548079974611236e-05, "loss": 1.3542, "step": 516 }, { "epoch": 0.16271079961521095, "grad_norm": 0.72265625, "learning_rate": 1.9545541098064108e-05, "loss": 1.2341, "step": 518 }, { "epoch": 0.1633390266407523, "grad_norm": 0.90625, "learning_rate": 1.954300222151698e-05, "loss": 1.4864, "step": 520 }, { "epoch": 0.16396725366629367, "grad_norm": 0.80078125, "learning_rate": 1.9540463344969854e-05, "loss": 1.4035, "step": 522 }, { "epoch": 0.16459548069183502, "grad_norm": 0.7734375, "learning_rate": 1.9537924468422725e-05, "loss": 1.3945, "step": 524 }, { "epoch": 0.16522370771737638, "grad_norm": 0.94921875, "learning_rate": 1.9535385591875597e-05, "loss": 1.3526, "step": 526 }, { "epoch": 0.16585193474291773, "grad_norm": 0.71484375, "learning_rate": 1.9532846715328468e-05, "loss": 1.4006, "step": 528 }, { "epoch": 0.1664801617684591, "grad_norm": 0.703125, "learning_rate": 1.9530307838781343e-05, "loss": 1.3114, "step": 530 }, { "epoch": 0.16710838879400045, "grad_norm": 0.75390625, "learning_rate": 1.9527768962234214e-05, "loss": 1.4793, "step": 532 }, { "epoch": 0.16773661581954177, "grad_norm": 0.7734375, "learning_rate": 1.9525230085687086e-05, "loss": 1.3226, "step": 534 }, { "epoch": 0.16836484284508313, "grad_norm": 0.90625, "learning_rate": 1.9522691209139957e-05, "loss": 1.3562, "step": 536 }, { "epoch": 0.16899306987062448, "grad_norm": 0.91796875, "learning_rate": 1.952015233259283e-05, "loss": 1.3007, "step": 538 }, { "epoch": 0.16962129689616584, "grad_norm": 0.69140625, "learning_rate": 1.95176134560457e-05, "loss": 1.4449, "step": 540 }, { "epoch": 0.1702495239217072, "grad_norm": 0.74609375, "learning_rate": 1.9515074579498575e-05, "loss": 1.3835, "step": 542 }, { "epoch": 0.17087775094724855, "grad_norm": 0.74609375, "learning_rate": 1.9512535702951446e-05, "loss": 1.4734, "step": 544 }, { "epoch": 0.1715059779727899, "grad_norm": 0.7109375, "learning_rate": 1.9509996826404317e-05, "loss": 1.4759, "step": 546 }, { "epoch": 0.17213420499833126, "grad_norm": 0.6953125, "learning_rate": 1.950745794985719e-05, "loss": 1.3622, "step": 548 }, { "epoch": 0.17276243202387262, "grad_norm": 0.703125, "learning_rate": 1.9504919073310063e-05, "loss": 1.5, "step": 550 }, { "epoch": 0.17339065904941398, "grad_norm": 0.765625, "learning_rate": 1.9502380196762935e-05, "loss": 1.4584, "step": 552 }, { "epoch": 0.17401888607495533, "grad_norm": 0.78125, "learning_rate": 1.9499841320215806e-05, "loss": 1.1847, "step": 554 }, { "epoch": 0.1746471131004967, "grad_norm": 0.8046875, "learning_rate": 1.9497302443668678e-05, "loss": 1.4887, "step": 556 }, { "epoch": 0.17527534012603804, "grad_norm": 0.703125, "learning_rate": 1.9494763567121552e-05, "loss": 1.3411, "step": 558 }, { "epoch": 0.1759035671515794, "grad_norm": 0.8125, "learning_rate": 1.949222469057442e-05, "loss": 1.4525, "step": 560 }, { "epoch": 0.17653179417712075, "grad_norm": 0.78125, "learning_rate": 1.9489685814027295e-05, "loss": 1.2743, "step": 562 }, { "epoch": 0.1771600212026621, "grad_norm": 0.77734375, "learning_rate": 1.9487146937480167e-05, "loss": 1.385, "step": 564 }, { "epoch": 0.17778824822820347, "grad_norm": 0.89453125, "learning_rate": 1.9484608060933038e-05, "loss": 1.3988, "step": 566 }, { "epoch": 0.17841647525374482, "grad_norm": 0.78125, "learning_rate": 1.948206918438591e-05, "loss": 1.2637, "step": 568 }, { "epoch": 0.17904470227928618, "grad_norm": 0.73046875, "learning_rate": 1.9479530307838784e-05, "loss": 1.3832, "step": 570 }, { "epoch": 0.17967292930482753, "grad_norm": 0.83984375, "learning_rate": 1.9476991431291655e-05, "loss": 1.367, "step": 572 }, { "epoch": 0.1803011563303689, "grad_norm": 0.85546875, "learning_rate": 1.9474452554744527e-05, "loss": 1.3174, "step": 574 }, { "epoch": 0.18092938335591024, "grad_norm": 0.6875, "learning_rate": 1.9471913678197398e-05, "loss": 1.2966, "step": 576 }, { "epoch": 0.1815576103814516, "grad_norm": 0.796875, "learning_rate": 1.9469374801650273e-05, "loss": 1.4582, "step": 578 }, { "epoch": 0.18218583740699296, "grad_norm": 0.69921875, "learning_rate": 1.946683592510314e-05, "loss": 1.3229, "step": 580 }, { "epoch": 0.1828140644325343, "grad_norm": 0.734375, "learning_rate": 1.9464297048556016e-05, "loss": 1.2895, "step": 582 }, { "epoch": 0.18344229145807567, "grad_norm": 0.73046875, "learning_rate": 1.9461758172008887e-05, "loss": 1.5382, "step": 584 }, { "epoch": 0.18407051848361702, "grad_norm": 0.92578125, "learning_rate": 1.945921929546176e-05, "loss": 1.4349, "step": 586 }, { "epoch": 0.18469874550915838, "grad_norm": 0.828125, "learning_rate": 1.945668041891463e-05, "loss": 1.3861, "step": 588 }, { "epoch": 0.18532697253469974, "grad_norm": 0.76953125, "learning_rate": 1.9454141542367505e-05, "loss": 1.2897, "step": 590 }, { "epoch": 0.1859551995602411, "grad_norm": 0.8671875, "learning_rate": 1.9451602665820376e-05, "loss": 1.3362, "step": 592 }, { "epoch": 0.18658342658578245, "grad_norm": 0.8046875, "learning_rate": 1.9449063789273247e-05, "loss": 1.3954, "step": 594 }, { "epoch": 0.1872116536113238, "grad_norm": 0.734375, "learning_rate": 1.9446524912726122e-05, "loss": 1.3541, "step": 596 }, { "epoch": 0.18783988063686516, "grad_norm": 0.80859375, "learning_rate": 1.9443986036178994e-05, "loss": 1.4498, "step": 598 }, { "epoch": 0.18846810766240651, "grad_norm": 0.83203125, "learning_rate": 1.9441447159631865e-05, "loss": 1.3767, "step": 600 }, { "epoch": 0.18909633468794787, "grad_norm": 0.9453125, "learning_rate": 1.9438908283084736e-05, "loss": 1.245, "step": 602 }, { "epoch": 0.18972456171348923, "grad_norm": 0.66015625, "learning_rate": 1.943636940653761e-05, "loss": 1.4371, "step": 604 }, { "epoch": 0.19035278873903055, "grad_norm": 0.7890625, "learning_rate": 1.943383052999048e-05, "loss": 1.3194, "step": 606 }, { "epoch": 0.1909810157645719, "grad_norm": 0.7421875, "learning_rate": 1.9431291653443354e-05, "loss": 1.3339, "step": 608 }, { "epoch": 0.19160924279011327, "grad_norm": 0.7578125, "learning_rate": 1.9428752776896225e-05, "loss": 1.3773, "step": 610 }, { "epoch": 0.19223746981565462, "grad_norm": 0.7265625, "learning_rate": 1.9426213900349097e-05, "loss": 1.3456, "step": 612 }, { "epoch": 0.19286569684119598, "grad_norm": 1.015625, "learning_rate": 1.9423675023801968e-05, "loss": 1.3713, "step": 614 }, { "epoch": 0.19349392386673733, "grad_norm": 0.73828125, "learning_rate": 1.9421136147254843e-05, "loss": 1.4541, "step": 616 }, { "epoch": 0.1941221508922787, "grad_norm": 0.71484375, "learning_rate": 1.9418597270707714e-05, "loss": 1.4132, "step": 618 }, { "epoch": 0.19475037791782004, "grad_norm": 0.90234375, "learning_rate": 1.9416058394160586e-05, "loss": 1.461, "step": 620 }, { "epoch": 0.1953786049433614, "grad_norm": 0.765625, "learning_rate": 1.9413519517613457e-05, "loss": 1.2741, "step": 622 }, { "epoch": 0.19600683196890276, "grad_norm": 0.75, "learning_rate": 1.9410980641066332e-05, "loss": 1.4783, "step": 624 }, { "epoch": 0.1966350589944441, "grad_norm": 0.9296875, "learning_rate": 1.94084417645192e-05, "loss": 1.3676, "step": 626 }, { "epoch": 0.19726328601998547, "grad_norm": 0.83984375, "learning_rate": 1.9405902887972074e-05, "loss": 1.2958, "step": 628 }, { "epoch": 0.19789151304552682, "grad_norm": 0.8125, "learning_rate": 1.9403364011424946e-05, "loss": 1.3159, "step": 630 }, { "epoch": 0.19851974007106818, "grad_norm": 0.7578125, "learning_rate": 1.9400825134877817e-05, "loss": 1.3835, "step": 632 }, { "epoch": 0.19914796709660953, "grad_norm": 0.890625, "learning_rate": 1.939828625833069e-05, "loss": 1.2554, "step": 634 }, { "epoch": 0.1997761941221509, "grad_norm": 0.671875, "learning_rate": 1.9395747381783563e-05, "loss": 1.3815, "step": 636 }, { "epoch": 0.20040442114769225, "grad_norm": 0.78125, "learning_rate": 1.9393208505236435e-05, "loss": 1.4323, "step": 638 }, { "epoch": 0.2010326481732336, "grad_norm": 0.7265625, "learning_rate": 1.9390669628689306e-05, "loss": 1.3292, "step": 640 }, { "epoch": 0.20166087519877496, "grad_norm": 0.82421875, "learning_rate": 1.9388130752142178e-05, "loss": 1.2865, "step": 642 }, { "epoch": 0.20228910222431631, "grad_norm": 1.015625, "learning_rate": 1.9385591875595052e-05, "loss": 1.3822, "step": 644 }, { "epoch": 0.20291732924985767, "grad_norm": 0.75, "learning_rate": 1.9383052999047924e-05, "loss": 1.3657, "step": 646 }, { "epoch": 0.20354555627539903, "grad_norm": 0.82421875, "learning_rate": 1.9380514122500795e-05, "loss": 1.3554, "step": 648 }, { "epoch": 0.20417378330094038, "grad_norm": 0.75, "learning_rate": 1.9377975245953666e-05, "loss": 1.331, "step": 650 }, { "epoch": 0.20480201032648174, "grad_norm": 0.96484375, "learning_rate": 1.9375436369406538e-05, "loss": 1.3798, "step": 652 }, { "epoch": 0.2054302373520231, "grad_norm": 0.80078125, "learning_rate": 1.937289749285941e-05, "loss": 1.4126, "step": 654 }, { "epoch": 0.20605846437756445, "grad_norm": 0.7265625, "learning_rate": 1.9370358616312284e-05, "loss": 1.5412, "step": 656 }, { "epoch": 0.2066866914031058, "grad_norm": 0.6875, "learning_rate": 1.9367819739765155e-05, "loss": 1.4367, "step": 658 }, { "epoch": 0.20731491842864716, "grad_norm": 0.8828125, "learning_rate": 1.9365280863218027e-05, "loss": 1.3944, "step": 660 }, { "epoch": 0.20794314545418852, "grad_norm": 0.74609375, "learning_rate": 1.9362741986670898e-05, "loss": 1.4311, "step": 662 }, { "epoch": 0.20857137247972987, "grad_norm": 0.73046875, "learning_rate": 1.9360203110123773e-05, "loss": 1.452, "step": 664 }, { "epoch": 0.20919959950527123, "grad_norm": 0.73828125, "learning_rate": 1.9357664233576644e-05, "loss": 1.3529, "step": 666 }, { "epoch": 0.20982782653081258, "grad_norm": 0.703125, "learning_rate": 1.9355125357029516e-05, "loss": 1.3444, "step": 668 }, { "epoch": 0.21045605355635394, "grad_norm": 0.70703125, "learning_rate": 1.9352586480482387e-05, "loss": 1.352, "step": 670 }, { "epoch": 0.2110842805818953, "grad_norm": 0.81640625, "learning_rate": 1.9350047603935262e-05, "loss": 1.455, "step": 672 }, { "epoch": 0.21171250760743665, "grad_norm": 0.7578125, "learning_rate": 1.934750872738813e-05, "loss": 1.2581, "step": 674 }, { "epoch": 0.21234073463297798, "grad_norm": 0.8515625, "learning_rate": 1.9344969850841005e-05, "loss": 1.3224, "step": 676 }, { "epoch": 0.21296896165851933, "grad_norm": 0.6875, "learning_rate": 1.9342430974293876e-05, "loss": 1.4604, "step": 678 }, { "epoch": 0.2135971886840607, "grad_norm": 0.75, "learning_rate": 1.9339892097746747e-05, "loss": 1.2345, "step": 680 }, { "epoch": 0.21422541570960205, "grad_norm": 0.70703125, "learning_rate": 1.9337353221199622e-05, "loss": 1.4289, "step": 682 }, { "epoch": 0.2148536427351434, "grad_norm": 0.875, "learning_rate": 1.9334814344652494e-05, "loss": 1.4216, "step": 684 }, { "epoch": 0.21548186976068476, "grad_norm": 0.796875, "learning_rate": 1.9332275468105365e-05, "loss": 1.4541, "step": 686 }, { "epoch": 0.2161100967862261, "grad_norm": 0.83203125, "learning_rate": 1.9329736591558236e-05, "loss": 1.3089, "step": 688 }, { "epoch": 0.21673832381176747, "grad_norm": 0.8828125, "learning_rate": 1.932719771501111e-05, "loss": 1.3822, "step": 690 }, { "epoch": 0.21736655083730883, "grad_norm": 0.78125, "learning_rate": 1.9324658838463982e-05, "loss": 1.2839, "step": 692 }, { "epoch": 0.21799477786285018, "grad_norm": 0.7421875, "learning_rate": 1.9322119961916854e-05, "loss": 1.2813, "step": 694 }, { "epoch": 0.21862300488839154, "grad_norm": 0.7265625, "learning_rate": 1.9319581085369725e-05, "loss": 1.3437, "step": 696 }, { "epoch": 0.2192512319139329, "grad_norm": 0.69140625, "learning_rate": 1.93170422088226e-05, "loss": 1.3649, "step": 698 }, { "epoch": 0.21987945893947425, "grad_norm": 0.70703125, "learning_rate": 1.9314503332275468e-05, "loss": 1.3949, "step": 700 }, { "epoch": 0.2205076859650156, "grad_norm": 0.9921875, "learning_rate": 1.9311964455728343e-05, "loss": 1.3488, "step": 702 }, { "epoch": 0.22113591299055696, "grad_norm": 0.8671875, "learning_rate": 1.9309425579181214e-05, "loss": 1.268, "step": 704 }, { "epoch": 0.22176414001609832, "grad_norm": 0.875, "learning_rate": 1.9306886702634085e-05, "loss": 1.3855, "step": 706 }, { "epoch": 0.22239236704163967, "grad_norm": 0.765625, "learning_rate": 1.9304347826086957e-05, "loss": 1.2877, "step": 708 }, { "epoch": 0.22302059406718103, "grad_norm": 0.7734375, "learning_rate": 1.930180894953983e-05, "loss": 1.3324, "step": 710 }, { "epoch": 0.22364882109272238, "grad_norm": 0.75390625, "learning_rate": 1.9299270072992703e-05, "loss": 1.3602, "step": 712 }, { "epoch": 0.22427704811826374, "grad_norm": 0.76171875, "learning_rate": 1.9296731196445574e-05, "loss": 1.3006, "step": 714 }, { "epoch": 0.2249052751438051, "grad_norm": 0.7578125, "learning_rate": 1.9294192319898446e-05, "loss": 1.3667, "step": 716 }, { "epoch": 0.22553350216934645, "grad_norm": 0.70703125, "learning_rate": 1.929165344335132e-05, "loss": 1.3536, "step": 718 }, { "epoch": 0.2261617291948878, "grad_norm": 0.73828125, "learning_rate": 1.928911456680419e-05, "loss": 1.4343, "step": 720 }, { "epoch": 0.22678995622042916, "grad_norm": 0.76171875, "learning_rate": 1.9286575690257063e-05, "loss": 1.355, "step": 722 }, { "epoch": 0.22741818324597052, "grad_norm": 0.88671875, "learning_rate": 1.9284036813709935e-05, "loss": 1.3999, "step": 724 }, { "epoch": 0.22804641027151187, "grad_norm": 0.9140625, "learning_rate": 1.9281497937162806e-05, "loss": 1.3638, "step": 726 }, { "epoch": 0.22867463729705323, "grad_norm": 0.7265625, "learning_rate": 1.9278959060615677e-05, "loss": 1.2724, "step": 728 }, { "epoch": 0.22930286432259459, "grad_norm": 1.0, "learning_rate": 1.9276420184068552e-05, "loss": 1.3783, "step": 730 }, { "epoch": 0.22993109134813594, "grad_norm": 0.7578125, "learning_rate": 1.9273881307521424e-05, "loss": 1.2429, "step": 732 }, { "epoch": 0.2305593183736773, "grad_norm": 0.76171875, "learning_rate": 1.9271342430974295e-05, "loss": 1.4618, "step": 734 }, { "epoch": 0.23118754539921865, "grad_norm": 0.70703125, "learning_rate": 1.9268803554427166e-05, "loss": 1.3145, "step": 736 }, { "epoch": 0.23181577242476, "grad_norm": 0.74609375, "learning_rate": 1.926626467788004e-05, "loss": 1.3562, "step": 738 }, { "epoch": 0.23244399945030136, "grad_norm": 0.7734375, "learning_rate": 1.9263725801332913e-05, "loss": 1.3047, "step": 740 }, { "epoch": 0.23307222647584272, "grad_norm": 0.765625, "learning_rate": 1.9261186924785784e-05, "loss": 1.4534, "step": 742 }, { "epoch": 0.23370045350138408, "grad_norm": 0.84375, "learning_rate": 1.9258648048238655e-05, "loss": 1.3435, "step": 744 }, { "epoch": 0.23432868052692543, "grad_norm": 0.75, "learning_rate": 1.9256109171691527e-05, "loss": 1.3951, "step": 746 }, { "epoch": 0.23495690755246676, "grad_norm": 0.68359375, "learning_rate": 1.9253570295144398e-05, "loss": 1.3799, "step": 748 }, { "epoch": 0.23558513457800812, "grad_norm": 0.8046875, "learning_rate": 1.9251031418597273e-05, "loss": 1.5794, "step": 750 }, { "epoch": 0.23621336160354947, "grad_norm": 0.73828125, "learning_rate": 1.9248492542050144e-05, "loss": 1.3543, "step": 752 }, { "epoch": 0.23684158862909083, "grad_norm": 0.71484375, "learning_rate": 1.9245953665503016e-05, "loss": 1.2956, "step": 754 }, { "epoch": 0.23746981565463218, "grad_norm": 0.8359375, "learning_rate": 1.9243414788955887e-05, "loss": 1.2537, "step": 756 }, { "epoch": 0.23809804268017354, "grad_norm": 0.83203125, "learning_rate": 1.9240875912408762e-05, "loss": 1.3696, "step": 758 }, { "epoch": 0.2387262697057149, "grad_norm": 0.8359375, "learning_rate": 1.9238337035861633e-05, "loss": 1.4097, "step": 760 }, { "epoch": 0.23935449673125625, "grad_norm": 1.015625, "learning_rate": 1.9235798159314505e-05, "loss": 1.307, "step": 762 }, { "epoch": 0.2399827237567976, "grad_norm": 0.91015625, "learning_rate": 1.923325928276738e-05, "loss": 1.2294, "step": 764 }, { "epoch": 0.24061095078233896, "grad_norm": 0.796875, "learning_rate": 1.923072040622025e-05, "loss": 1.3091, "step": 766 }, { "epoch": 0.24123917780788032, "grad_norm": 0.859375, "learning_rate": 1.9228181529673122e-05, "loss": 1.3432, "step": 768 }, { "epoch": 0.24186740483342167, "grad_norm": 0.80078125, "learning_rate": 1.9225642653125993e-05, "loss": 1.3201, "step": 770 }, { "epoch": 0.24249563185896303, "grad_norm": 0.80078125, "learning_rate": 1.9223103776578865e-05, "loss": 1.4521, "step": 772 }, { "epoch": 0.24312385888450438, "grad_norm": 0.95703125, "learning_rate": 1.9220564900031736e-05, "loss": 1.4254, "step": 774 }, { "epoch": 0.24375208591004574, "grad_norm": 0.8125, "learning_rate": 1.921802602348461e-05, "loss": 1.3347, "step": 776 }, { "epoch": 0.2443803129355871, "grad_norm": 0.7890625, "learning_rate": 1.9215487146937482e-05, "loss": 1.2956, "step": 778 }, { "epoch": 0.24500853996112845, "grad_norm": 0.7734375, "learning_rate": 1.9212948270390354e-05, "loss": 1.3128, "step": 780 }, { "epoch": 0.2456367669866698, "grad_norm": 0.84375, "learning_rate": 1.9210409393843225e-05, "loss": 1.3065, "step": 782 }, { "epoch": 0.24626499401221116, "grad_norm": 1.015625, "learning_rate": 1.92078705172961e-05, "loss": 1.2968, "step": 784 }, { "epoch": 0.24689322103775252, "grad_norm": 0.87890625, "learning_rate": 1.920533164074897e-05, "loss": 1.3041, "step": 786 }, { "epoch": 0.24752144806329388, "grad_norm": 0.8046875, "learning_rate": 1.9202792764201843e-05, "loss": 1.4266, "step": 788 }, { "epoch": 0.24814967508883523, "grad_norm": 0.68359375, "learning_rate": 1.9200253887654714e-05, "loss": 1.4958, "step": 790 }, { "epoch": 0.2487779021143766, "grad_norm": 0.9375, "learning_rate": 1.919771501110759e-05, "loss": 1.4217, "step": 792 }, { "epoch": 0.24940612913991794, "grad_norm": 0.70703125, "learning_rate": 1.9195176134560457e-05, "loss": 1.3905, "step": 794 }, { "epoch": 0.25003435616545927, "grad_norm": 0.8203125, "learning_rate": 1.919263725801333e-05, "loss": 1.3715, "step": 796 }, { "epoch": 0.25066258319100065, "grad_norm": 0.9296875, "learning_rate": 1.9190098381466203e-05, "loss": 1.4086, "step": 798 }, { "epoch": 0.251290810216542, "grad_norm": 0.84375, "learning_rate": 1.9187559504919074e-05, "loss": 1.4157, "step": 800 }, { "epoch": 0.25191903724208337, "grad_norm": 0.7109375, "learning_rate": 1.9185020628371946e-05, "loss": 1.2557, "step": 802 }, { "epoch": 0.2525472642676247, "grad_norm": 0.7734375, "learning_rate": 1.918248175182482e-05, "loss": 1.3713, "step": 804 }, { "epoch": 0.2531754912931661, "grad_norm": 0.7265625, "learning_rate": 1.9179942875277692e-05, "loss": 1.3549, "step": 806 }, { "epoch": 0.2538037183187074, "grad_norm": 0.7734375, "learning_rate": 1.9177403998730563e-05, "loss": 1.4184, "step": 808 }, { "epoch": 0.2544319453442488, "grad_norm": 0.828125, "learning_rate": 1.9174865122183435e-05, "loss": 1.3112, "step": 810 }, { "epoch": 0.2550601723697901, "grad_norm": 0.7421875, "learning_rate": 1.917232624563631e-05, "loss": 1.3818, "step": 812 }, { "epoch": 0.2556883993953315, "grad_norm": 0.796875, "learning_rate": 1.9169787369089177e-05, "loss": 1.4245, "step": 814 }, { "epoch": 0.25631662642087283, "grad_norm": 0.91015625, "learning_rate": 1.9167248492542052e-05, "loss": 1.3986, "step": 816 }, { "epoch": 0.2569448534464142, "grad_norm": 0.7421875, "learning_rate": 1.9164709615994924e-05, "loss": 1.3054, "step": 818 }, { "epoch": 0.25757308047195554, "grad_norm": 0.8046875, "learning_rate": 1.9162170739447795e-05, "loss": 1.3303, "step": 820 }, { "epoch": 0.2582013074974969, "grad_norm": 0.77734375, "learning_rate": 1.9159631862900666e-05, "loss": 1.3877, "step": 822 }, { "epoch": 0.25882953452303825, "grad_norm": 0.8359375, "learning_rate": 1.915709298635354e-05, "loss": 1.3464, "step": 824 }, { "epoch": 0.25945776154857964, "grad_norm": 0.74609375, "learning_rate": 1.9154554109806412e-05, "loss": 1.4358, "step": 826 }, { "epoch": 0.26008598857412096, "grad_norm": 0.73046875, "learning_rate": 1.9152015233259284e-05, "loss": 1.2982, "step": 828 }, { "epoch": 0.26071421559966235, "grad_norm": 0.796875, "learning_rate": 1.9149476356712155e-05, "loss": 1.398, "step": 830 }, { "epoch": 0.2613424426252037, "grad_norm": 0.69921875, "learning_rate": 1.914693748016503e-05, "loss": 1.2641, "step": 832 }, { "epoch": 0.26197066965074506, "grad_norm": 0.88671875, "learning_rate": 1.91443986036179e-05, "loss": 1.3669, "step": 834 }, { "epoch": 0.2625988966762864, "grad_norm": 0.796875, "learning_rate": 1.9141859727070773e-05, "loss": 1.3182, "step": 836 }, { "epoch": 0.26322712370182777, "grad_norm": 0.734375, "learning_rate": 1.9139320850523644e-05, "loss": 1.3939, "step": 838 }, { "epoch": 0.2638553507273691, "grad_norm": 0.66796875, "learning_rate": 1.9136781973976516e-05, "loss": 1.4948, "step": 840 }, { "epoch": 0.2644835777529105, "grad_norm": 0.88671875, "learning_rate": 1.9134243097429387e-05, "loss": 1.34, "step": 842 }, { "epoch": 0.2651118047784518, "grad_norm": 0.890625, "learning_rate": 1.913170422088226e-05, "loss": 1.3576, "step": 844 }, { "epoch": 0.2657400318039932, "grad_norm": 0.71875, "learning_rate": 1.9129165344335133e-05, "loss": 1.3366, "step": 846 }, { "epoch": 0.2663682588295345, "grad_norm": 0.8359375, "learning_rate": 1.9126626467788004e-05, "loss": 1.4665, "step": 848 }, { "epoch": 0.2669964858550759, "grad_norm": 0.69140625, "learning_rate": 1.912408759124088e-05, "loss": 1.4036, "step": 850 }, { "epoch": 0.26762471288061723, "grad_norm": 0.73046875, "learning_rate": 1.912154871469375e-05, "loss": 1.2714, "step": 852 }, { "epoch": 0.26825293990615856, "grad_norm": 0.71875, "learning_rate": 1.9119009838146622e-05, "loss": 1.3858, "step": 854 }, { "epoch": 0.26888116693169994, "grad_norm": 0.734375, "learning_rate": 1.9116470961599493e-05, "loss": 1.4882, "step": 856 }, { "epoch": 0.26950939395724127, "grad_norm": 0.77734375, "learning_rate": 1.9113932085052368e-05, "loss": 1.2592, "step": 858 }, { "epoch": 0.27013762098278266, "grad_norm": 0.75, "learning_rate": 1.911139320850524e-05, "loss": 1.4349, "step": 860 }, { "epoch": 0.270765848008324, "grad_norm": 0.921875, "learning_rate": 1.910885433195811e-05, "loss": 1.2003, "step": 862 }, { "epoch": 0.27139407503386537, "grad_norm": 0.703125, "learning_rate": 1.9106315455410982e-05, "loss": 1.4485, "step": 864 }, { "epoch": 0.2720223020594067, "grad_norm": 0.78125, "learning_rate": 1.9103776578863854e-05, "loss": 1.2389, "step": 866 }, { "epoch": 0.2726505290849481, "grad_norm": 0.75, "learning_rate": 1.9101237702316725e-05, "loss": 1.4348, "step": 868 }, { "epoch": 0.2732787561104894, "grad_norm": 0.78125, "learning_rate": 1.90986988257696e-05, "loss": 1.4559, "step": 870 }, { "epoch": 0.2739069831360308, "grad_norm": 0.796875, "learning_rate": 1.909615994922247e-05, "loss": 1.4004, "step": 872 }, { "epoch": 0.2745352101615721, "grad_norm": 0.8046875, "learning_rate": 1.9093621072675343e-05, "loss": 1.3105, "step": 874 }, { "epoch": 0.2751634371871135, "grad_norm": 0.78125, "learning_rate": 1.9091082196128214e-05, "loss": 1.2796, "step": 876 }, { "epoch": 0.27579166421265483, "grad_norm": 0.74609375, "learning_rate": 1.908854331958109e-05, "loss": 1.4628, "step": 878 }, { "epoch": 0.2764198912381962, "grad_norm": 0.71484375, "learning_rate": 1.908600444303396e-05, "loss": 1.3618, "step": 880 }, { "epoch": 0.27704811826373754, "grad_norm": 0.73828125, "learning_rate": 1.908346556648683e-05, "loss": 1.3635, "step": 882 }, { "epoch": 0.2776763452892789, "grad_norm": 0.69921875, "learning_rate": 1.9080926689939703e-05, "loss": 1.3921, "step": 884 }, { "epoch": 0.27830457231482025, "grad_norm": 0.70703125, "learning_rate": 1.9078387813392578e-05, "loss": 1.3431, "step": 886 }, { "epoch": 0.27893279934036164, "grad_norm": 0.796875, "learning_rate": 1.9075848936845446e-05, "loss": 1.3725, "step": 888 }, { "epoch": 0.27956102636590296, "grad_norm": 0.6640625, "learning_rate": 1.907331006029832e-05, "loss": 1.2754, "step": 890 }, { "epoch": 0.28018925339144435, "grad_norm": 0.99609375, "learning_rate": 1.9070771183751192e-05, "loss": 1.1762, "step": 892 }, { "epoch": 0.2808174804169857, "grad_norm": 0.80859375, "learning_rate": 1.9068232307204063e-05, "loss": 1.301, "step": 894 }, { "epoch": 0.28144570744252706, "grad_norm": 0.68359375, "learning_rate": 1.9065693430656935e-05, "loss": 1.2999, "step": 896 }, { "epoch": 0.2820739344680684, "grad_norm": 0.76171875, "learning_rate": 1.906315455410981e-05, "loss": 1.355, "step": 898 }, { "epoch": 0.28270216149360977, "grad_norm": 0.71875, "learning_rate": 1.906061567756268e-05, "loss": 1.4332, "step": 900 }, { "epoch": 0.2833303885191511, "grad_norm": 0.96875, "learning_rate": 1.9058076801015552e-05, "loss": 1.4116, "step": 902 }, { "epoch": 0.2839586155446925, "grad_norm": 0.8203125, "learning_rate": 1.9055537924468423e-05, "loss": 1.3064, "step": 904 }, { "epoch": 0.2845868425702338, "grad_norm": 0.81640625, "learning_rate": 1.9052999047921298e-05, "loss": 1.5111, "step": 906 }, { "epoch": 0.2852150695957752, "grad_norm": 0.8125, "learning_rate": 1.9050460171374166e-05, "loss": 1.2457, "step": 908 }, { "epoch": 0.2858432966213165, "grad_norm": 0.78125, "learning_rate": 1.904792129482704e-05, "loss": 1.346, "step": 910 }, { "epoch": 0.2864715236468579, "grad_norm": 0.75390625, "learning_rate": 1.9045382418279912e-05, "loss": 1.3722, "step": 912 }, { "epoch": 0.28709975067239923, "grad_norm": 0.8203125, "learning_rate": 1.9042843541732784e-05, "loss": 1.3245, "step": 914 }, { "epoch": 0.2877279776979406, "grad_norm": 0.87109375, "learning_rate": 1.9040304665185655e-05, "loss": 1.42, "step": 916 }, { "epoch": 0.28835620472348195, "grad_norm": 0.83984375, "learning_rate": 1.903776578863853e-05, "loss": 1.405, "step": 918 }, { "epoch": 0.28898443174902333, "grad_norm": 0.703125, "learning_rate": 1.90352269120914e-05, "loss": 1.3066, "step": 920 }, { "epoch": 0.28961265877456466, "grad_norm": 0.8046875, "learning_rate": 1.9032688035544273e-05, "loss": 1.3226, "step": 922 }, { "epoch": 0.29024088580010604, "grad_norm": 0.875, "learning_rate": 1.9030149158997144e-05, "loss": 1.1937, "step": 924 }, { "epoch": 0.29086911282564737, "grad_norm": 0.78125, "learning_rate": 1.902761028245002e-05, "loss": 1.3474, "step": 926 }, { "epoch": 0.2914973398511887, "grad_norm": 0.76171875, "learning_rate": 1.9025071405902887e-05, "loss": 1.3306, "step": 928 }, { "epoch": 0.2921255668767301, "grad_norm": 0.89453125, "learning_rate": 1.902253252935576e-05, "loss": 1.3498, "step": 930 }, { "epoch": 0.2927537939022714, "grad_norm": 0.92578125, "learning_rate": 1.9019993652808633e-05, "loss": 1.4435, "step": 932 }, { "epoch": 0.2933820209278128, "grad_norm": 0.75, "learning_rate": 1.9017454776261504e-05, "loss": 1.3682, "step": 934 }, { "epoch": 0.2940102479533541, "grad_norm": 0.8203125, "learning_rate": 1.901491589971438e-05, "loss": 1.3391, "step": 936 }, { "epoch": 0.2946384749788955, "grad_norm": 0.7109375, "learning_rate": 1.901237702316725e-05, "loss": 1.5098, "step": 938 }, { "epoch": 0.29526670200443683, "grad_norm": 0.7109375, "learning_rate": 1.9009838146620122e-05, "loss": 1.4432, "step": 940 }, { "epoch": 0.2958949290299782, "grad_norm": 0.703125, "learning_rate": 1.9007299270072993e-05, "loss": 1.3789, "step": 942 }, { "epoch": 0.29652315605551954, "grad_norm": 0.703125, "learning_rate": 1.9004760393525868e-05, "loss": 1.2943, "step": 944 }, { "epoch": 0.2971513830810609, "grad_norm": 0.8671875, "learning_rate": 1.900222151697874e-05, "loss": 1.3753, "step": 946 }, { "epoch": 0.29777961010660225, "grad_norm": 0.70703125, "learning_rate": 1.899968264043161e-05, "loss": 1.3704, "step": 948 }, { "epoch": 0.29840783713214364, "grad_norm": 0.78515625, "learning_rate": 1.8997143763884482e-05, "loss": 1.4176, "step": 950 }, { "epoch": 0.29903606415768497, "grad_norm": 0.7890625, "learning_rate": 1.8994604887337357e-05, "loss": 1.2448, "step": 952 }, { "epoch": 0.29966429118322635, "grad_norm": 0.7421875, "learning_rate": 1.8992066010790225e-05, "loss": 1.2357, "step": 954 }, { "epoch": 0.3002925182087677, "grad_norm": 0.72265625, "learning_rate": 1.89895271342431e-05, "loss": 1.4002, "step": 956 }, { "epoch": 0.30092074523430906, "grad_norm": 0.796875, "learning_rate": 1.898698825769597e-05, "loss": 1.3756, "step": 958 }, { "epoch": 0.3015489722598504, "grad_norm": 0.75390625, "learning_rate": 1.8984449381148842e-05, "loss": 1.2851, "step": 960 }, { "epoch": 0.3021771992853918, "grad_norm": 0.79296875, "learning_rate": 1.8981910504601714e-05, "loss": 1.3339, "step": 962 }, { "epoch": 0.3028054263109331, "grad_norm": 0.6953125, "learning_rate": 1.897937162805459e-05, "loss": 1.4284, "step": 964 }, { "epoch": 0.3034336533364745, "grad_norm": 0.83203125, "learning_rate": 1.897683275150746e-05, "loss": 1.3142, "step": 966 }, { "epoch": 0.3040618803620158, "grad_norm": 0.76953125, "learning_rate": 1.897429387496033e-05, "loss": 1.4217, "step": 968 }, { "epoch": 0.3046901073875572, "grad_norm": 0.7890625, "learning_rate": 1.8971754998413203e-05, "loss": 1.4308, "step": 970 }, { "epoch": 0.3053183344130985, "grad_norm": 0.75390625, "learning_rate": 1.8969216121866078e-05, "loss": 1.2463, "step": 972 }, { "epoch": 0.3059465614386399, "grad_norm": 0.72265625, "learning_rate": 1.896667724531895e-05, "loss": 1.3149, "step": 974 }, { "epoch": 0.30657478846418124, "grad_norm": 0.91796875, "learning_rate": 1.896413836877182e-05, "loss": 1.3623, "step": 976 }, { "epoch": 0.3072030154897226, "grad_norm": 0.69921875, "learning_rate": 1.8961599492224692e-05, "loss": 1.5585, "step": 978 }, { "epoch": 0.30783124251526395, "grad_norm": 0.71875, "learning_rate": 1.8959060615677563e-05, "loss": 1.2155, "step": 980 }, { "epoch": 0.30845946954080533, "grad_norm": 0.703125, "learning_rate": 1.8956521739130434e-05, "loss": 1.384, "step": 982 }, { "epoch": 0.30908769656634666, "grad_norm": 0.890625, "learning_rate": 1.895398286258331e-05, "loss": 1.2724, "step": 984 }, { "epoch": 0.30971592359188804, "grad_norm": 0.75, "learning_rate": 1.895144398603618e-05, "loss": 1.3504, "step": 986 }, { "epoch": 0.31034415061742937, "grad_norm": 0.7578125, "learning_rate": 1.8948905109489052e-05, "loss": 1.3144, "step": 988 }, { "epoch": 0.31097237764297075, "grad_norm": 0.71484375, "learning_rate": 1.8946366232941923e-05, "loss": 1.3399, "step": 990 }, { "epoch": 0.3116006046685121, "grad_norm": 0.796875, "learning_rate": 1.8943827356394798e-05, "loss": 1.3355, "step": 992 }, { "epoch": 0.31222883169405347, "grad_norm": 0.78125, "learning_rate": 1.894128847984767e-05, "loss": 1.2823, "step": 994 }, { "epoch": 0.3128570587195948, "grad_norm": 0.78125, "learning_rate": 1.893874960330054e-05, "loss": 1.4969, "step": 996 }, { "epoch": 0.3134852857451361, "grad_norm": 0.7890625, "learning_rate": 1.8936210726753412e-05, "loss": 1.3046, "step": 998 }, { "epoch": 0.3141135127706775, "grad_norm": 0.7109375, "learning_rate": 1.8933671850206287e-05, "loss": 1.4317, "step": 1000 }, { "epoch": 0.31474173979621883, "grad_norm": 0.71875, "learning_rate": 1.8931132973659155e-05, "loss": 1.3786, "step": 1002 }, { "epoch": 0.3153699668217602, "grad_norm": 0.73046875, "learning_rate": 1.892859409711203e-05, "loss": 1.3259, "step": 1004 }, { "epoch": 0.31599819384730155, "grad_norm": 0.72265625, "learning_rate": 1.89260552205649e-05, "loss": 1.3619, "step": 1006 }, { "epoch": 0.31662642087284293, "grad_norm": 0.7578125, "learning_rate": 1.8923516344017773e-05, "loss": 1.4299, "step": 1008 }, { "epoch": 0.31725464789838426, "grad_norm": 0.78515625, "learning_rate": 1.8920977467470644e-05, "loss": 1.389, "step": 1010 }, { "epoch": 0.31788287492392564, "grad_norm": 0.8046875, "learning_rate": 1.891843859092352e-05, "loss": 1.3459, "step": 1012 }, { "epoch": 0.31851110194946697, "grad_norm": 0.765625, "learning_rate": 1.891589971437639e-05, "loss": 1.4309, "step": 1014 }, { "epoch": 0.31913932897500835, "grad_norm": 0.76953125, "learning_rate": 1.891336083782926e-05, "loss": 1.3712, "step": 1016 }, { "epoch": 0.3197675560005497, "grad_norm": 0.80859375, "learning_rate": 1.8910821961282133e-05, "loss": 1.3044, "step": 1018 }, { "epoch": 0.32039578302609106, "grad_norm": 0.6796875, "learning_rate": 1.8908283084735008e-05, "loss": 1.3589, "step": 1020 }, { "epoch": 0.3210240100516324, "grad_norm": 0.69140625, "learning_rate": 1.890574420818788e-05, "loss": 1.2593, "step": 1022 }, { "epoch": 0.3216522370771738, "grad_norm": 0.87109375, "learning_rate": 1.890320533164075e-05, "loss": 1.3657, "step": 1024 }, { "epoch": 0.3222804641027151, "grad_norm": 0.6796875, "learning_rate": 1.8900666455093625e-05, "loss": 1.2129, "step": 1026 }, { "epoch": 0.3229086911282565, "grad_norm": 0.71875, "learning_rate": 1.8898127578546493e-05, "loss": 1.09, "step": 1028 }, { "epoch": 0.3235369181537978, "grad_norm": 0.8671875, "learning_rate": 1.8895588701999368e-05, "loss": 1.3569, "step": 1030 }, { "epoch": 0.3241651451793392, "grad_norm": 0.78515625, "learning_rate": 1.889304982545224e-05, "loss": 1.4419, "step": 1032 }, { "epoch": 0.3247933722048805, "grad_norm": 0.7578125, "learning_rate": 1.889051094890511e-05, "loss": 1.3802, "step": 1034 }, { "epoch": 0.3254215992304219, "grad_norm": 0.75390625, "learning_rate": 1.8887972072357982e-05, "loss": 1.312, "step": 1036 }, { "epoch": 0.32604982625596324, "grad_norm": 0.74609375, "learning_rate": 1.8885433195810857e-05, "loss": 1.4378, "step": 1038 }, { "epoch": 0.3266780532815046, "grad_norm": 0.83203125, "learning_rate": 1.8882894319263728e-05, "loss": 1.2541, "step": 1040 }, { "epoch": 0.32730628030704595, "grad_norm": 0.7421875, "learning_rate": 1.88803554427166e-05, "loss": 1.3656, "step": 1042 }, { "epoch": 0.32793450733258733, "grad_norm": 0.7578125, "learning_rate": 1.887781656616947e-05, "loss": 1.4039, "step": 1044 }, { "epoch": 0.32856273435812866, "grad_norm": 0.72265625, "learning_rate": 1.8875277689622346e-05, "loss": 1.3563, "step": 1046 }, { "epoch": 0.32919096138367004, "grad_norm": 0.88671875, "learning_rate": 1.8872738813075214e-05, "loss": 1.285, "step": 1048 }, { "epoch": 0.3298191884092114, "grad_norm": 0.84375, "learning_rate": 1.887019993652809e-05, "loss": 1.2465, "step": 1050 }, { "epoch": 0.33044741543475276, "grad_norm": 0.92578125, "learning_rate": 1.886766105998096e-05, "loss": 1.2184, "step": 1052 }, { "epoch": 0.3310756424602941, "grad_norm": 0.69921875, "learning_rate": 1.886512218343383e-05, "loss": 1.3098, "step": 1054 }, { "epoch": 0.33170386948583547, "grad_norm": 0.76171875, "learning_rate": 1.8862583306886703e-05, "loss": 1.318, "step": 1056 }, { "epoch": 0.3323320965113768, "grad_norm": 0.91015625, "learning_rate": 1.8860044430339577e-05, "loss": 1.2984, "step": 1058 }, { "epoch": 0.3329603235369182, "grad_norm": 0.78515625, "learning_rate": 1.885750555379245e-05, "loss": 1.4075, "step": 1060 }, { "epoch": 0.3335885505624595, "grad_norm": 0.94140625, "learning_rate": 1.885496667724532e-05, "loss": 1.354, "step": 1062 }, { "epoch": 0.3342167775880009, "grad_norm": 0.74609375, "learning_rate": 1.885242780069819e-05, "loss": 1.2434, "step": 1064 }, { "epoch": 0.3348450046135422, "grad_norm": 0.8359375, "learning_rate": 1.8849888924151066e-05, "loss": 1.4308, "step": 1066 }, { "epoch": 0.33547323163908355, "grad_norm": 0.8984375, "learning_rate": 1.8847350047603938e-05, "loss": 1.2561, "step": 1068 }, { "epoch": 0.33610145866462493, "grad_norm": 0.875, "learning_rate": 1.884481117105681e-05, "loss": 1.4753, "step": 1070 }, { "epoch": 0.33672968569016626, "grad_norm": 0.69921875, "learning_rate": 1.884227229450968e-05, "loss": 1.369, "step": 1072 }, { "epoch": 0.33735791271570764, "grad_norm": 0.76171875, "learning_rate": 1.8839733417962552e-05, "loss": 1.4776, "step": 1074 }, { "epoch": 0.33798613974124897, "grad_norm": 0.73046875, "learning_rate": 1.8837194541415423e-05, "loss": 1.3619, "step": 1076 }, { "epoch": 0.33861436676679035, "grad_norm": 0.77734375, "learning_rate": 1.8834655664868298e-05, "loss": 1.2684, "step": 1078 }, { "epoch": 0.3392425937923317, "grad_norm": 0.7421875, "learning_rate": 1.883211678832117e-05, "loss": 1.4172, "step": 1080 }, { "epoch": 0.33987082081787306, "grad_norm": 0.890625, "learning_rate": 1.882957791177404e-05, "loss": 1.501, "step": 1082 }, { "epoch": 0.3404990478434144, "grad_norm": 0.82421875, "learning_rate": 1.8827039035226912e-05, "loss": 1.4823, "step": 1084 }, { "epoch": 0.3411272748689558, "grad_norm": 0.8828125, "learning_rate": 1.8824500158679787e-05, "loss": 1.2784, "step": 1086 }, { "epoch": 0.3417555018944971, "grad_norm": 0.76171875, "learning_rate": 1.882196128213266e-05, "loss": 1.359, "step": 1088 }, { "epoch": 0.3423837289200385, "grad_norm": 0.79296875, "learning_rate": 1.881942240558553e-05, "loss": 1.2725, "step": 1090 }, { "epoch": 0.3430119559455798, "grad_norm": 0.76171875, "learning_rate": 1.88168835290384e-05, "loss": 1.2185, "step": 1092 }, { "epoch": 0.3436401829711212, "grad_norm": 0.703125, "learning_rate": 1.8814344652491276e-05, "loss": 1.3709, "step": 1094 }, { "epoch": 0.3442684099966625, "grad_norm": 0.79296875, "learning_rate": 1.8811805775944144e-05, "loss": 1.4139, "step": 1096 }, { "epoch": 0.3448966370222039, "grad_norm": 0.69921875, "learning_rate": 1.880926689939702e-05, "loss": 1.5253, "step": 1098 }, { "epoch": 0.34552486404774524, "grad_norm": 0.72265625, "learning_rate": 1.880672802284989e-05, "loss": 1.2929, "step": 1100 }, { "epoch": 0.3461530910732866, "grad_norm": 0.90625, "learning_rate": 1.880418914630276e-05, "loss": 1.3314, "step": 1102 }, { "epoch": 0.34678131809882795, "grad_norm": 0.70703125, "learning_rate": 1.8801650269755633e-05, "loss": 1.1409, "step": 1104 }, { "epoch": 0.34740954512436933, "grad_norm": 0.765625, "learning_rate": 1.8799111393208508e-05, "loss": 1.4453, "step": 1106 }, { "epoch": 0.34803777214991066, "grad_norm": 0.671875, "learning_rate": 1.879657251666138e-05, "loss": 1.3495, "step": 1108 }, { "epoch": 0.34866599917545205, "grad_norm": 0.77734375, "learning_rate": 1.879403364011425e-05, "loss": 1.3406, "step": 1110 }, { "epoch": 0.3492942262009934, "grad_norm": 0.85546875, "learning_rate": 1.8791494763567125e-05, "loss": 1.2358, "step": 1112 }, { "epoch": 0.34992245322653476, "grad_norm": 0.83984375, "learning_rate": 1.8788955887019997e-05, "loss": 1.3972, "step": 1114 }, { "epoch": 0.3505506802520761, "grad_norm": 0.72265625, "learning_rate": 1.8786417010472868e-05, "loss": 1.3597, "step": 1116 }, { "epoch": 0.35117890727761747, "grad_norm": 0.66015625, "learning_rate": 1.878387813392574e-05, "loss": 1.3003, "step": 1118 }, { "epoch": 0.3518071343031588, "grad_norm": 0.86328125, "learning_rate": 1.8781339257378614e-05, "loss": 1.2663, "step": 1120 }, { "epoch": 0.3524353613287002, "grad_norm": 0.73828125, "learning_rate": 1.8778800380831482e-05, "loss": 1.4089, "step": 1122 }, { "epoch": 0.3530635883542415, "grad_norm": 0.828125, "learning_rate": 1.8776261504284357e-05, "loss": 1.3793, "step": 1124 }, { "epoch": 0.3536918153797829, "grad_norm": 0.796875, "learning_rate": 1.8773722627737228e-05, "loss": 1.4041, "step": 1126 }, { "epoch": 0.3543200424053242, "grad_norm": 0.8046875, "learning_rate": 1.87711837511901e-05, "loss": 1.252, "step": 1128 }, { "epoch": 0.3549482694308656, "grad_norm": 0.76953125, "learning_rate": 1.876864487464297e-05, "loss": 1.3771, "step": 1130 }, { "epoch": 0.35557649645640693, "grad_norm": 0.86328125, "learning_rate": 1.8766105998095846e-05, "loss": 1.2952, "step": 1132 }, { "epoch": 0.3562047234819483, "grad_norm": 0.7734375, "learning_rate": 1.8763567121548717e-05, "loss": 1.2377, "step": 1134 }, { "epoch": 0.35683295050748964, "grad_norm": 0.78125, "learning_rate": 1.876102824500159e-05, "loss": 1.429, "step": 1136 }, { "epoch": 0.35746117753303097, "grad_norm": 0.7734375, "learning_rate": 1.875848936845446e-05, "loss": 1.3617, "step": 1138 }, { "epoch": 0.35808940455857236, "grad_norm": 0.7109375, "learning_rate": 1.8755950491907335e-05, "loss": 1.4136, "step": 1140 }, { "epoch": 0.3587176315841137, "grad_norm": 0.80859375, "learning_rate": 1.8753411615360203e-05, "loss": 1.2859, "step": 1142 }, { "epoch": 0.35934585860965507, "grad_norm": 0.6796875, "learning_rate": 1.8750872738813077e-05, "loss": 1.2145, "step": 1144 }, { "epoch": 0.3599740856351964, "grad_norm": 0.70703125, "learning_rate": 1.874833386226595e-05, "loss": 1.294, "step": 1146 }, { "epoch": 0.3606023126607378, "grad_norm": 0.8203125, "learning_rate": 1.874579498571882e-05, "loss": 1.1749, "step": 1148 }, { "epoch": 0.3612305396862791, "grad_norm": 0.75, "learning_rate": 1.874325610917169e-05, "loss": 1.2759, "step": 1150 }, { "epoch": 0.3618587667118205, "grad_norm": 0.76953125, "learning_rate": 1.8740717232624566e-05, "loss": 1.2798, "step": 1152 }, { "epoch": 0.3624869937373618, "grad_norm": 0.83203125, "learning_rate": 1.8738178356077438e-05, "loss": 1.3493, "step": 1154 }, { "epoch": 0.3631152207629032, "grad_norm": 0.76953125, "learning_rate": 1.873563947953031e-05, "loss": 1.4311, "step": 1156 }, { "epoch": 0.36374344778844453, "grad_norm": 0.765625, "learning_rate": 1.873310060298318e-05, "loss": 1.2613, "step": 1158 }, { "epoch": 0.3643716748139859, "grad_norm": 1.0078125, "learning_rate": 1.8730561726436055e-05, "loss": 1.3474, "step": 1160 }, { "epoch": 0.36499990183952724, "grad_norm": 0.7109375, "learning_rate": 1.8728022849888923e-05, "loss": 1.4257, "step": 1162 }, { "epoch": 0.3656281288650686, "grad_norm": 0.78125, "learning_rate": 1.8725483973341798e-05, "loss": 1.3411, "step": 1164 }, { "epoch": 0.36625635589060995, "grad_norm": 0.734375, "learning_rate": 1.872294509679467e-05, "loss": 1.3309, "step": 1166 }, { "epoch": 0.36688458291615134, "grad_norm": 0.8984375, "learning_rate": 1.872040622024754e-05, "loss": 1.4467, "step": 1168 }, { "epoch": 0.36751280994169266, "grad_norm": 0.8515625, "learning_rate": 1.8717867343700412e-05, "loss": 1.2754, "step": 1170 }, { "epoch": 0.36814103696723405, "grad_norm": 0.7890625, "learning_rate": 1.8715328467153287e-05, "loss": 1.4556, "step": 1172 }, { "epoch": 0.3687692639927754, "grad_norm": 0.84375, "learning_rate": 1.871278959060616e-05, "loss": 1.3598, "step": 1174 }, { "epoch": 0.36939749101831676, "grad_norm": 0.6875, "learning_rate": 1.871025071405903e-05, "loss": 1.2428, "step": 1176 }, { "epoch": 0.3700257180438581, "grad_norm": 0.8046875, "learning_rate": 1.87077118375119e-05, "loss": 1.3761, "step": 1178 }, { "epoch": 0.37065394506939947, "grad_norm": 0.78515625, "learning_rate": 1.8705172960964776e-05, "loss": 1.3929, "step": 1180 }, { "epoch": 0.3712821720949408, "grad_norm": 0.8671875, "learning_rate": 1.8702634084417647e-05, "loss": 1.2633, "step": 1182 }, { "epoch": 0.3719103991204822, "grad_norm": 0.828125, "learning_rate": 1.870009520787052e-05, "loss": 1.4286, "step": 1184 }, { "epoch": 0.3725386261460235, "grad_norm": 0.7734375, "learning_rate": 1.869755633132339e-05, "loss": 1.2967, "step": 1186 }, { "epoch": 0.3731668531715649, "grad_norm": 1.015625, "learning_rate": 1.869501745477626e-05, "loss": 1.3566, "step": 1188 }, { "epoch": 0.3737950801971062, "grad_norm": 0.71875, "learning_rate": 1.8692478578229133e-05, "loss": 1.3837, "step": 1190 }, { "epoch": 0.3744233072226476, "grad_norm": 0.9296875, "learning_rate": 1.8689939701682008e-05, "loss": 1.369, "step": 1192 }, { "epoch": 0.37505153424818893, "grad_norm": 0.71484375, "learning_rate": 1.868740082513488e-05, "loss": 1.4206, "step": 1194 }, { "epoch": 0.3756797612737303, "grad_norm": 0.75390625, "learning_rate": 1.868486194858775e-05, "loss": 1.3345, "step": 1196 }, { "epoch": 0.37630798829927165, "grad_norm": 0.84765625, "learning_rate": 1.8682323072040625e-05, "loss": 1.3843, "step": 1198 }, { "epoch": 0.37693621532481303, "grad_norm": 0.71484375, "learning_rate": 1.8679784195493496e-05, "loss": 1.4273, "step": 1200 }, { "epoch": 0.37756444235035436, "grad_norm": 0.7734375, "learning_rate": 1.8677245318946368e-05, "loss": 1.3729, "step": 1202 }, { "epoch": 0.37819266937589574, "grad_norm": 1.15625, "learning_rate": 1.867470644239924e-05, "loss": 1.1632, "step": 1204 }, { "epoch": 0.37882089640143707, "grad_norm": 0.6796875, "learning_rate": 1.8672167565852114e-05, "loss": 1.3493, "step": 1206 }, { "epoch": 0.37944912342697845, "grad_norm": 0.7578125, "learning_rate": 1.8669628689304985e-05, "loss": 1.3056, "step": 1208 }, { "epoch": 0.3800773504525198, "grad_norm": 0.7265625, "learning_rate": 1.8667089812757857e-05, "loss": 1.414, "step": 1210 }, { "epoch": 0.3807055774780611, "grad_norm": 0.8359375, "learning_rate": 1.8664550936210728e-05, "loss": 1.33, "step": 1212 }, { "epoch": 0.3813338045036025, "grad_norm": 0.80859375, "learning_rate": 1.86620120596636e-05, "loss": 1.378, "step": 1214 }, { "epoch": 0.3819620315291438, "grad_norm": 0.95703125, "learning_rate": 1.865947318311647e-05, "loss": 1.2628, "step": 1216 }, { "epoch": 0.3825902585546852, "grad_norm": 0.73046875, "learning_rate": 1.8656934306569346e-05, "loss": 1.2875, "step": 1218 }, { "epoch": 0.38321848558022653, "grad_norm": 0.78515625, "learning_rate": 1.8654395430022217e-05, "loss": 1.3463, "step": 1220 }, { "epoch": 0.3838467126057679, "grad_norm": 0.80078125, "learning_rate": 1.865185655347509e-05, "loss": 1.3272, "step": 1222 }, { "epoch": 0.38447493963130924, "grad_norm": 0.71484375, "learning_rate": 1.864931767692796e-05, "loss": 1.3908, "step": 1224 }, { "epoch": 0.3851031666568506, "grad_norm": 0.6796875, "learning_rate": 1.8646778800380835e-05, "loss": 1.3235, "step": 1226 }, { "epoch": 0.38573139368239195, "grad_norm": 0.74609375, "learning_rate": 1.8644239923833706e-05, "loss": 1.2354, "step": 1228 }, { "epoch": 0.38635962070793334, "grad_norm": 0.88671875, "learning_rate": 1.8641701047286577e-05, "loss": 1.2592, "step": 1230 }, { "epoch": 0.38698784773347467, "grad_norm": 0.7265625, "learning_rate": 1.863916217073945e-05, "loss": 1.3272, "step": 1232 }, { "epoch": 0.38761607475901605, "grad_norm": 0.77734375, "learning_rate": 1.8636623294192323e-05, "loss": 1.2147, "step": 1234 }, { "epoch": 0.3882443017845574, "grad_norm": 0.7734375, "learning_rate": 1.863408441764519e-05, "loss": 1.3168, "step": 1236 }, { "epoch": 0.38887252881009876, "grad_norm": 0.73828125, "learning_rate": 1.8631545541098066e-05, "loss": 1.2581, "step": 1238 }, { "epoch": 0.3895007558356401, "grad_norm": 0.84375, "learning_rate": 1.8629006664550938e-05, "loss": 1.404, "step": 1240 }, { "epoch": 0.3901289828611815, "grad_norm": 0.79296875, "learning_rate": 1.862646778800381e-05, "loss": 1.3546, "step": 1242 }, { "epoch": 0.3907572098867228, "grad_norm": 0.74609375, "learning_rate": 1.862392891145668e-05, "loss": 1.2896, "step": 1244 }, { "epoch": 0.3913854369122642, "grad_norm": 0.74609375, "learning_rate": 1.8621390034909555e-05, "loss": 1.3196, "step": 1246 }, { "epoch": 0.3920136639378055, "grad_norm": 0.72265625, "learning_rate": 1.8618851158362427e-05, "loss": 1.3084, "step": 1248 }, { "epoch": 0.3926418909633469, "grad_norm": 0.75390625, "learning_rate": 1.8616312281815298e-05, "loss": 1.2459, "step": 1250 }, { "epoch": 0.3932701179888882, "grad_norm": 0.73828125, "learning_rate": 1.861377340526817e-05, "loss": 1.3642, "step": 1252 }, { "epoch": 0.3938983450144296, "grad_norm": 0.9140625, "learning_rate": 1.8611234528721044e-05, "loss": 1.2232, "step": 1254 }, { "epoch": 0.39452657203997094, "grad_norm": 0.6875, "learning_rate": 1.8608695652173912e-05, "loss": 1.2384, "step": 1256 }, { "epoch": 0.3951547990655123, "grad_norm": 0.6640625, "learning_rate": 1.8606156775626787e-05, "loss": 1.3031, "step": 1258 }, { "epoch": 0.39578302609105365, "grad_norm": 0.67578125, "learning_rate": 1.8603617899079658e-05, "loss": 1.3142, "step": 1260 }, { "epoch": 0.39641125311659503, "grad_norm": 0.875, "learning_rate": 1.860107902253253e-05, "loss": 1.2851, "step": 1262 }, { "epoch": 0.39703948014213636, "grad_norm": 0.73828125, "learning_rate": 1.85985401459854e-05, "loss": 1.3063, "step": 1264 }, { "epoch": 0.39766770716767774, "grad_norm": 0.7578125, "learning_rate": 1.8596001269438276e-05, "loss": 1.4062, "step": 1266 }, { "epoch": 0.39829593419321907, "grad_norm": 0.78125, "learning_rate": 1.8593462392891147e-05, "loss": 1.2698, "step": 1268 }, { "epoch": 0.39892416121876045, "grad_norm": 0.6796875, "learning_rate": 1.859092351634402e-05, "loss": 1.3242, "step": 1270 }, { "epoch": 0.3995523882443018, "grad_norm": 0.70703125, "learning_rate": 1.858838463979689e-05, "loss": 1.3655, "step": 1272 }, { "epoch": 0.40018061526984317, "grad_norm": 0.75, "learning_rate": 1.8585845763249765e-05, "loss": 1.259, "step": 1274 }, { "epoch": 0.4008088422953845, "grad_norm": 0.8984375, "learning_rate": 1.8583306886702636e-05, "loss": 1.2373, "step": 1276 }, { "epoch": 0.4014370693209259, "grad_norm": 0.75390625, "learning_rate": 1.8580768010155507e-05, "loss": 1.3231, "step": 1278 }, { "epoch": 0.4020652963464672, "grad_norm": 0.7421875, "learning_rate": 1.8578229133608382e-05, "loss": 1.3715, "step": 1280 }, { "epoch": 0.40269352337200853, "grad_norm": 0.91015625, "learning_rate": 1.857569025706125e-05, "loss": 1.4227, "step": 1282 }, { "epoch": 0.4033217503975499, "grad_norm": 0.72265625, "learning_rate": 1.8573151380514125e-05, "loss": 1.4352, "step": 1284 }, { "epoch": 0.40394997742309124, "grad_norm": 0.8359375, "learning_rate": 1.8570612503966996e-05, "loss": 1.3358, "step": 1286 }, { "epoch": 0.40457820444863263, "grad_norm": 0.7734375, "learning_rate": 1.8568073627419868e-05, "loss": 1.3508, "step": 1288 }, { "epoch": 0.40520643147417396, "grad_norm": 0.94921875, "learning_rate": 1.856553475087274e-05, "loss": 1.4527, "step": 1290 }, { "epoch": 0.40583465849971534, "grad_norm": 0.68359375, "learning_rate": 1.8562995874325614e-05, "loss": 1.4456, "step": 1292 }, { "epoch": 0.40646288552525667, "grad_norm": 0.90625, "learning_rate": 1.8560456997778485e-05, "loss": 1.3093, "step": 1294 }, { "epoch": 0.40709111255079805, "grad_norm": 0.74609375, "learning_rate": 1.8557918121231357e-05, "loss": 1.4534, "step": 1296 }, { "epoch": 0.4077193395763394, "grad_norm": 0.9609375, "learning_rate": 1.8555379244684228e-05, "loss": 1.2337, "step": 1298 }, { "epoch": 0.40834756660188076, "grad_norm": 0.71875, "learning_rate": 1.8552840368137103e-05, "loss": 1.212, "step": 1300 }, { "epoch": 0.4089757936274221, "grad_norm": 0.70703125, "learning_rate": 1.8550301491589974e-05, "loss": 1.3673, "step": 1302 }, { "epoch": 0.4096040206529635, "grad_norm": 0.6875, "learning_rate": 1.8547762615042846e-05, "loss": 1.3345, "step": 1304 }, { "epoch": 0.4102322476785048, "grad_norm": 0.70703125, "learning_rate": 1.8545223738495717e-05, "loss": 1.3542, "step": 1306 }, { "epoch": 0.4108604747040462, "grad_norm": 0.828125, "learning_rate": 1.854268486194859e-05, "loss": 1.4953, "step": 1308 }, { "epoch": 0.4114887017295875, "grad_norm": 0.7421875, "learning_rate": 1.854014598540146e-05, "loss": 1.4254, "step": 1310 }, { "epoch": 0.4121169287551289, "grad_norm": 0.71875, "learning_rate": 1.8537607108854335e-05, "loss": 1.3089, "step": 1312 }, { "epoch": 0.4127451557806702, "grad_norm": 0.73046875, "learning_rate": 1.8535068232307206e-05, "loss": 1.3985, "step": 1314 }, { "epoch": 0.4133733828062116, "grad_norm": 0.828125, "learning_rate": 1.8532529355760077e-05, "loss": 1.45, "step": 1316 }, { "epoch": 0.41400160983175294, "grad_norm": 0.71875, "learning_rate": 1.852999047921295e-05, "loss": 1.472, "step": 1318 }, { "epoch": 0.4146298368572943, "grad_norm": 0.69140625, "learning_rate": 1.8527451602665823e-05, "loss": 1.4135, "step": 1320 }, { "epoch": 0.41525806388283565, "grad_norm": 0.76171875, "learning_rate": 1.8524912726118695e-05, "loss": 1.2985, "step": 1322 }, { "epoch": 0.41588629090837703, "grad_norm": 0.84375, "learning_rate": 1.8522373849571566e-05, "loss": 1.292, "step": 1324 }, { "epoch": 0.41651451793391836, "grad_norm": 0.73046875, "learning_rate": 1.8519834973024438e-05, "loss": 1.3459, "step": 1326 }, { "epoch": 0.41714274495945974, "grad_norm": 0.72265625, "learning_rate": 1.8517296096477312e-05, "loss": 1.3259, "step": 1328 }, { "epoch": 0.41777097198500107, "grad_norm": 0.70703125, "learning_rate": 1.851475721993018e-05, "loss": 1.3027, "step": 1330 }, { "epoch": 0.41839919901054246, "grad_norm": 0.671875, "learning_rate": 1.8512218343383055e-05, "loss": 1.3385, "step": 1332 }, { "epoch": 0.4190274260360838, "grad_norm": 0.7109375, "learning_rate": 1.8509679466835926e-05, "loss": 1.3775, "step": 1334 }, { "epoch": 0.41965565306162517, "grad_norm": 0.79296875, "learning_rate": 1.8507140590288798e-05, "loss": 1.1561, "step": 1336 }, { "epoch": 0.4202838800871665, "grad_norm": 0.8125, "learning_rate": 1.850460171374167e-05, "loss": 1.2644, "step": 1338 }, { "epoch": 0.4209121071127079, "grad_norm": 0.72265625, "learning_rate": 1.8502062837194544e-05, "loss": 1.3686, "step": 1340 }, { "epoch": 0.4215403341382492, "grad_norm": 0.79296875, "learning_rate": 1.8499523960647415e-05, "loss": 1.4161, "step": 1342 }, { "epoch": 0.4221685611637906, "grad_norm": 0.796875, "learning_rate": 1.8496985084100287e-05, "loss": 1.3431, "step": 1344 }, { "epoch": 0.4227967881893319, "grad_norm": 0.80859375, "learning_rate": 1.8494446207553158e-05, "loss": 1.3203, "step": 1346 }, { "epoch": 0.4234250152148733, "grad_norm": 0.8359375, "learning_rate": 1.8491907331006033e-05, "loss": 1.3866, "step": 1348 }, { "epoch": 0.42405324224041463, "grad_norm": 0.73828125, "learning_rate": 1.84893684544589e-05, "loss": 1.307, "step": 1350 }, { "epoch": 0.42468146926595596, "grad_norm": 0.75390625, "learning_rate": 1.8486829577911776e-05, "loss": 1.3054, "step": 1352 }, { "epoch": 0.42530969629149734, "grad_norm": 0.73828125, "learning_rate": 1.8484290701364647e-05, "loss": 1.263, "step": 1354 }, { "epoch": 0.42593792331703867, "grad_norm": 0.7421875, "learning_rate": 1.848175182481752e-05, "loss": 1.2961, "step": 1356 }, { "epoch": 0.42656615034258005, "grad_norm": 0.70703125, "learning_rate": 1.847921294827039e-05, "loss": 1.386, "step": 1358 }, { "epoch": 0.4271943773681214, "grad_norm": 0.79296875, "learning_rate": 1.8476674071723265e-05, "loss": 1.2587, "step": 1360 }, { "epoch": 0.42782260439366276, "grad_norm": 0.80078125, "learning_rate": 1.8474135195176136e-05, "loss": 1.3613, "step": 1362 }, { "epoch": 0.4284508314192041, "grad_norm": 0.734375, "learning_rate": 1.8471596318629007e-05, "loss": 1.4578, "step": 1364 }, { "epoch": 0.4290790584447455, "grad_norm": 0.75, "learning_rate": 1.8469057442081882e-05, "loss": 1.4915, "step": 1366 }, { "epoch": 0.4297072854702868, "grad_norm": 0.984375, "learning_rate": 1.8466518565534754e-05, "loss": 1.2513, "step": 1368 }, { "epoch": 0.4303355124958282, "grad_norm": 0.78125, "learning_rate": 1.8463979688987625e-05, "loss": 1.3317, "step": 1370 }, { "epoch": 0.4309637395213695, "grad_norm": 0.76171875, "learning_rate": 1.8461440812440496e-05, "loss": 1.3281, "step": 1372 }, { "epoch": 0.4315919665469109, "grad_norm": 0.89453125, "learning_rate": 1.845890193589337e-05, "loss": 1.2836, "step": 1374 }, { "epoch": 0.4322201935724522, "grad_norm": 0.96875, "learning_rate": 1.845636305934624e-05, "loss": 1.3258, "step": 1376 }, { "epoch": 0.4328484205979936, "grad_norm": 0.703125, "learning_rate": 1.8453824182799114e-05, "loss": 1.3192, "step": 1378 }, { "epoch": 0.43347664762353494, "grad_norm": 0.7890625, "learning_rate": 1.8451285306251985e-05, "loss": 1.2383, "step": 1380 }, { "epoch": 0.4341048746490763, "grad_norm": 0.6953125, "learning_rate": 1.8448746429704857e-05, "loss": 1.4198, "step": 1382 }, { "epoch": 0.43473310167461765, "grad_norm": 0.84375, "learning_rate": 1.8446207553157728e-05, "loss": 1.3262, "step": 1384 }, { "epoch": 0.43536132870015903, "grad_norm": 0.90234375, "learning_rate": 1.8443668676610603e-05, "loss": 1.3783, "step": 1386 }, { "epoch": 0.43598955572570036, "grad_norm": 0.8046875, "learning_rate": 1.8441129800063474e-05, "loss": 1.3803, "step": 1388 }, { "epoch": 0.43661778275124175, "grad_norm": 0.8359375, "learning_rate": 1.8438590923516346e-05, "loss": 1.2537, "step": 1390 }, { "epoch": 0.4372460097767831, "grad_norm": 0.74609375, "learning_rate": 1.8436052046969217e-05, "loss": 1.4251, "step": 1392 }, { "epoch": 0.43787423680232446, "grad_norm": 0.80078125, "learning_rate": 1.843351317042209e-05, "loss": 1.3708, "step": 1394 }, { "epoch": 0.4385024638278658, "grad_norm": 0.81640625, "learning_rate": 1.8430974293874963e-05, "loss": 1.3983, "step": 1396 }, { "epoch": 0.43913069085340717, "grad_norm": 0.703125, "learning_rate": 1.8428435417327834e-05, "loss": 1.3208, "step": 1398 }, { "epoch": 0.4397589178789485, "grad_norm": 0.6484375, "learning_rate": 1.8425896540780706e-05, "loss": 1.2447, "step": 1400 }, { "epoch": 0.4403871449044899, "grad_norm": 0.7265625, "learning_rate": 1.8423357664233577e-05, "loss": 1.4995, "step": 1402 }, { "epoch": 0.4410153719300312, "grad_norm": 0.69140625, "learning_rate": 1.842081878768645e-05, "loss": 1.2333, "step": 1404 }, { "epoch": 0.4416435989555726, "grad_norm": 0.72265625, "learning_rate": 1.8418279911139323e-05, "loss": 1.438, "step": 1406 }, { "epoch": 0.4422718259811139, "grad_norm": 0.6796875, "learning_rate": 1.8415741034592195e-05, "loss": 1.3648, "step": 1408 }, { "epoch": 0.4429000530066553, "grad_norm": 0.87890625, "learning_rate": 1.8413202158045066e-05, "loss": 1.3982, "step": 1410 }, { "epoch": 0.44352828003219663, "grad_norm": 0.7734375, "learning_rate": 1.8410663281497937e-05, "loss": 1.2714, "step": 1412 }, { "epoch": 0.444156507057738, "grad_norm": 0.66015625, "learning_rate": 1.8408124404950812e-05, "loss": 1.3464, "step": 1414 }, { "epoch": 0.44478473408327934, "grad_norm": 0.671875, "learning_rate": 1.8405585528403684e-05, "loss": 1.3379, "step": 1416 }, { "epoch": 0.4454129611088207, "grad_norm": 0.73046875, "learning_rate": 1.8403046651856555e-05, "loss": 1.3022, "step": 1418 }, { "epoch": 0.44604118813436205, "grad_norm": 0.765625, "learning_rate": 1.8400507775309426e-05, "loss": 1.3677, "step": 1420 }, { "epoch": 0.4466694151599034, "grad_norm": 0.6796875, "learning_rate": 1.83979688987623e-05, "loss": 1.3101, "step": 1422 }, { "epoch": 0.44729764218544477, "grad_norm": 0.94140625, "learning_rate": 1.839543002221517e-05, "loss": 1.2118, "step": 1424 }, { "epoch": 0.4479258692109861, "grad_norm": 2.84375, "learning_rate": 1.8392891145668044e-05, "loss": 1.2927, "step": 1426 }, { "epoch": 0.4485540962365275, "grad_norm": 0.88671875, "learning_rate": 1.8390352269120915e-05, "loss": 1.4683, "step": 1428 }, { "epoch": 0.4491823232620688, "grad_norm": 0.75, "learning_rate": 1.8387813392573787e-05, "loss": 1.2949, "step": 1430 }, { "epoch": 0.4498105502876102, "grad_norm": 0.75, "learning_rate": 1.8385274516026658e-05, "loss": 1.3789, "step": 1432 }, { "epoch": 0.4504387773131515, "grad_norm": 0.7265625, "learning_rate": 1.8382735639479533e-05, "loss": 1.3308, "step": 1434 }, { "epoch": 0.4510670043386929, "grad_norm": 0.78125, "learning_rate": 1.8380196762932404e-05, "loss": 1.3221, "step": 1436 }, { "epoch": 0.45169523136423423, "grad_norm": 0.703125, "learning_rate": 1.8377657886385276e-05, "loss": 1.353, "step": 1438 }, { "epoch": 0.4523234583897756, "grad_norm": 0.84765625, "learning_rate": 1.8375119009838147e-05, "loss": 1.2386, "step": 1440 }, { "epoch": 0.45295168541531694, "grad_norm": 0.70703125, "learning_rate": 1.8372580133291022e-05, "loss": 1.5192, "step": 1442 }, { "epoch": 0.4535799124408583, "grad_norm": 0.7890625, "learning_rate": 1.837004125674389e-05, "loss": 1.4076, "step": 1444 }, { "epoch": 0.45420813946639965, "grad_norm": 0.6953125, "learning_rate": 1.8367502380196765e-05, "loss": 1.4394, "step": 1446 }, { "epoch": 0.45483636649194104, "grad_norm": 0.96484375, "learning_rate": 1.836496350364964e-05, "loss": 1.3238, "step": 1448 }, { "epoch": 0.45546459351748236, "grad_norm": 0.75390625, "learning_rate": 1.8362424627102507e-05, "loss": 1.2685, "step": 1450 }, { "epoch": 0.45609282054302375, "grad_norm": 0.7890625, "learning_rate": 1.8359885750555382e-05, "loss": 1.3883, "step": 1452 }, { "epoch": 0.4567210475685651, "grad_norm": 0.71484375, "learning_rate": 1.8357346874008253e-05, "loss": 1.3735, "step": 1454 }, { "epoch": 0.45734927459410646, "grad_norm": 0.7265625, "learning_rate": 1.8354807997461125e-05, "loss": 1.4432, "step": 1456 }, { "epoch": 0.4579775016196478, "grad_norm": 0.8046875, "learning_rate": 1.8352269120913996e-05, "loss": 1.3395, "step": 1458 }, { "epoch": 0.45860572864518917, "grad_norm": 0.78515625, "learning_rate": 1.834973024436687e-05, "loss": 1.2355, "step": 1460 }, { "epoch": 0.4592339556707305, "grad_norm": 0.703125, "learning_rate": 1.8347191367819742e-05, "loss": 1.4257, "step": 1462 }, { "epoch": 0.4598621826962719, "grad_norm": 0.78515625, "learning_rate": 1.8344652491272614e-05, "loss": 1.4014, "step": 1464 }, { "epoch": 0.4604904097218132, "grad_norm": 0.66015625, "learning_rate": 1.8342113614725485e-05, "loss": 1.4452, "step": 1466 }, { "epoch": 0.4611186367473546, "grad_norm": 0.7578125, "learning_rate": 1.833957473817836e-05, "loss": 1.2609, "step": 1468 }, { "epoch": 0.4617468637728959, "grad_norm": 3.109375, "learning_rate": 1.8337035861631228e-05, "loss": 1.3392, "step": 1470 }, { "epoch": 0.4623750907984373, "grad_norm": 0.8359375, "learning_rate": 1.8334496985084103e-05, "loss": 1.4992, "step": 1472 }, { "epoch": 0.46300331782397863, "grad_norm": 0.71875, "learning_rate": 1.8331958108536974e-05, "loss": 1.3606, "step": 1474 }, { "epoch": 0.46363154484952, "grad_norm": 0.73046875, "learning_rate": 1.8329419231989845e-05, "loss": 1.4007, "step": 1476 }, { "epoch": 0.46425977187506134, "grad_norm": 0.71484375, "learning_rate": 1.8326880355442717e-05, "loss": 1.3096, "step": 1478 }, { "epoch": 0.46488799890060273, "grad_norm": 0.75, "learning_rate": 1.832434147889559e-05, "loss": 1.3873, "step": 1480 }, { "epoch": 0.46551622592614406, "grad_norm": 0.75390625, "learning_rate": 1.8321802602348463e-05, "loss": 1.3962, "step": 1482 }, { "epoch": 0.46614445295168544, "grad_norm": 1.1171875, "learning_rate": 1.8319263725801334e-05, "loss": 1.3512, "step": 1484 }, { "epoch": 0.46677267997722677, "grad_norm": 0.671875, "learning_rate": 1.8316724849254206e-05, "loss": 1.3546, "step": 1486 }, { "epoch": 0.46740090700276815, "grad_norm": 0.76953125, "learning_rate": 1.831418597270708e-05, "loss": 1.3739, "step": 1488 }, { "epoch": 0.4680291340283095, "grad_norm": 0.71875, "learning_rate": 1.831164709615995e-05, "loss": 1.3045, "step": 1490 }, { "epoch": 0.46865736105385086, "grad_norm": 0.72265625, "learning_rate": 1.8309108219612823e-05, "loss": 1.385, "step": 1492 }, { "epoch": 0.4692855880793922, "grad_norm": 0.765625, "learning_rate": 1.8306569343065695e-05, "loss": 1.282, "step": 1494 }, { "epoch": 0.4699138151049335, "grad_norm": 0.671875, "learning_rate": 1.8304030466518566e-05, "loss": 1.5008, "step": 1496 }, { "epoch": 0.4705420421304749, "grad_norm": 0.78515625, "learning_rate": 1.8301491589971437e-05, "loss": 1.422, "step": 1498 }, { "epoch": 0.47117026915601623, "grad_norm": 0.70703125, "learning_rate": 1.8298952713424312e-05, "loss": 1.3419, "step": 1500 }, { "epoch": 0.4717984961815576, "grad_norm": 0.72265625, "learning_rate": 1.8296413836877184e-05, "loss": 1.3878, "step": 1502 }, { "epoch": 0.47242672320709894, "grad_norm": 0.7109375, "learning_rate": 1.8293874960330055e-05, "loss": 1.4378, "step": 1504 }, { "epoch": 0.4730549502326403, "grad_norm": 0.69140625, "learning_rate": 1.8291336083782926e-05, "loss": 1.4115, "step": 1506 }, { "epoch": 0.47368317725818165, "grad_norm": 0.75, "learning_rate": 1.82887972072358e-05, "loss": 1.2909, "step": 1508 }, { "epoch": 0.47431140428372304, "grad_norm": 0.74609375, "learning_rate": 1.8286258330688672e-05, "loss": 1.3813, "step": 1510 }, { "epoch": 0.47493963130926437, "grad_norm": 0.7890625, "learning_rate": 1.8283719454141544e-05, "loss": 1.3018, "step": 1512 }, { "epoch": 0.47556785833480575, "grad_norm": 0.7109375, "learning_rate": 1.8281180577594415e-05, "loss": 1.228, "step": 1514 }, { "epoch": 0.4761960853603471, "grad_norm": 0.65625, "learning_rate": 1.8278641701047287e-05, "loss": 1.3985, "step": 1516 }, { "epoch": 0.47682431238588846, "grad_norm": 0.67578125, "learning_rate": 1.8276102824500158e-05, "loss": 1.4065, "step": 1518 }, { "epoch": 0.4774525394114298, "grad_norm": 0.7421875, "learning_rate": 1.8273563947953033e-05, "loss": 1.34, "step": 1520 }, { "epoch": 0.47808076643697117, "grad_norm": 0.73046875, "learning_rate": 1.8271025071405904e-05, "loss": 1.3451, "step": 1522 }, { "epoch": 0.4787089934625125, "grad_norm": 0.75, "learning_rate": 1.8268486194858776e-05, "loss": 1.3477, "step": 1524 }, { "epoch": 0.4793372204880539, "grad_norm": 0.734375, "learning_rate": 1.8265947318311647e-05, "loss": 1.3247, "step": 1526 }, { "epoch": 0.4799654475135952, "grad_norm": 0.73046875, "learning_rate": 1.8263408441764522e-05, "loss": 1.21, "step": 1528 }, { "epoch": 0.4805936745391366, "grad_norm": 0.71875, "learning_rate": 1.8260869565217393e-05, "loss": 1.3398, "step": 1530 }, { "epoch": 0.4812219015646779, "grad_norm": 0.734375, "learning_rate": 1.8258330688670264e-05, "loss": 1.3262, "step": 1532 }, { "epoch": 0.4818501285902193, "grad_norm": 0.75, "learning_rate": 1.825579181212314e-05, "loss": 1.4908, "step": 1534 }, { "epoch": 0.48247835561576063, "grad_norm": 0.7890625, "learning_rate": 1.825325293557601e-05, "loss": 1.3113, "step": 1536 }, { "epoch": 0.483106582641302, "grad_norm": 0.6640625, "learning_rate": 1.8250714059028882e-05, "loss": 1.2718, "step": 1538 }, { "epoch": 0.48373480966684335, "grad_norm": 0.7265625, "learning_rate": 1.8248175182481753e-05, "loss": 1.406, "step": 1540 }, { "epoch": 0.48436303669238473, "grad_norm": 0.6953125, "learning_rate": 1.8245636305934625e-05, "loss": 1.3577, "step": 1542 }, { "epoch": 0.48499126371792606, "grad_norm": 0.7421875, "learning_rate": 1.8243097429387496e-05, "loss": 1.3054, "step": 1544 }, { "epoch": 0.48561949074346744, "grad_norm": 0.78515625, "learning_rate": 1.824055855284037e-05, "loss": 1.3842, "step": 1546 }, { "epoch": 0.48624771776900877, "grad_norm": 0.76953125, "learning_rate": 1.8238019676293242e-05, "loss": 1.3789, "step": 1548 }, { "epoch": 0.48687594479455015, "grad_norm": 0.65625, "learning_rate": 1.8235480799746114e-05, "loss": 1.3439, "step": 1550 }, { "epoch": 0.4875041718200915, "grad_norm": 0.69921875, "learning_rate": 1.8232941923198985e-05, "loss": 1.3715, "step": 1552 }, { "epoch": 0.48813239884563286, "grad_norm": 0.70703125, "learning_rate": 1.823040304665186e-05, "loss": 1.4506, "step": 1554 }, { "epoch": 0.4887606258711742, "grad_norm": 0.69140625, "learning_rate": 1.822786417010473e-05, "loss": 1.4064, "step": 1556 }, { "epoch": 0.4893888528967156, "grad_norm": 0.7578125, "learning_rate": 1.8225325293557603e-05, "loss": 1.3322, "step": 1558 }, { "epoch": 0.4900170799222569, "grad_norm": 0.76953125, "learning_rate": 1.8222786417010474e-05, "loss": 1.3075, "step": 1560 }, { "epoch": 0.4906453069477983, "grad_norm": 0.703125, "learning_rate": 1.822024754046335e-05, "loss": 1.4187, "step": 1562 }, { "epoch": 0.4912735339733396, "grad_norm": 0.8984375, "learning_rate": 1.8217708663916217e-05, "loss": 1.3365, "step": 1564 }, { "epoch": 0.49190176099888094, "grad_norm": 0.7578125, "learning_rate": 1.821516978736909e-05, "loss": 1.3593, "step": 1566 }, { "epoch": 0.4925299880244223, "grad_norm": 0.7421875, "learning_rate": 1.8212630910821963e-05, "loss": 1.298, "step": 1568 }, { "epoch": 0.49315821504996366, "grad_norm": 0.71875, "learning_rate": 1.8210092034274834e-05, "loss": 1.4256, "step": 1570 }, { "epoch": 0.49378644207550504, "grad_norm": 0.7890625, "learning_rate": 1.8207553157727706e-05, "loss": 1.4808, "step": 1572 }, { "epoch": 0.49441466910104637, "grad_norm": 0.875, "learning_rate": 1.820501428118058e-05, "loss": 1.4233, "step": 1574 }, { "epoch": 0.49504289612658775, "grad_norm": 0.7890625, "learning_rate": 1.8202475404633452e-05, "loss": 1.3273, "step": 1576 }, { "epoch": 0.4956711231521291, "grad_norm": 0.73828125, "learning_rate": 1.8199936528086323e-05, "loss": 1.3384, "step": 1578 }, { "epoch": 0.49629935017767046, "grad_norm": 0.7421875, "learning_rate": 1.8197397651539195e-05, "loss": 1.4455, "step": 1580 }, { "epoch": 0.4969275772032118, "grad_norm": 0.7421875, "learning_rate": 1.819485877499207e-05, "loss": 1.3235, "step": 1582 }, { "epoch": 0.4975558042287532, "grad_norm": 0.73828125, "learning_rate": 1.8192319898444937e-05, "loss": 1.3602, "step": 1584 }, { "epoch": 0.4981840312542945, "grad_norm": 0.890625, "learning_rate": 1.8189781021897812e-05, "loss": 1.2892, "step": 1586 }, { "epoch": 0.4988122582798359, "grad_norm": 0.85546875, "learning_rate": 1.8187242145350684e-05, "loss": 1.2329, "step": 1588 }, { "epoch": 0.4994404853053772, "grad_norm": 0.68359375, "learning_rate": 1.8184703268803555e-05, "loss": 1.2801, "step": 1590 }, { "epoch": 0.5000687123309185, "grad_norm": 0.76171875, "learning_rate": 1.8182164392256426e-05, "loss": 1.3857, "step": 1592 }, { "epoch": 0.50069693935646, "grad_norm": 0.8671875, "learning_rate": 1.81796255157093e-05, "loss": 1.3113, "step": 1594 }, { "epoch": 0.5013251663820013, "grad_norm": 0.78125, "learning_rate": 1.8177086639162172e-05, "loss": 1.4523, "step": 1596 }, { "epoch": 0.5019533934075426, "grad_norm": 0.77734375, "learning_rate": 1.8174547762615044e-05, "loss": 1.3314, "step": 1598 }, { "epoch": 0.502581620433084, "grad_norm": 0.796875, "learning_rate": 1.8172008886067915e-05, "loss": 1.3618, "step": 1600 }, { "epoch": 0.5032098474586254, "grad_norm": 0.8203125, "learning_rate": 1.816947000952079e-05, "loss": 1.3553, "step": 1602 }, { "epoch": 0.5038380744841667, "grad_norm": 0.671875, "learning_rate": 1.816693113297366e-05, "loss": 1.4201, "step": 1604 }, { "epoch": 0.5044663015097081, "grad_norm": 0.8125, "learning_rate": 1.8164392256426533e-05, "loss": 1.309, "step": 1606 }, { "epoch": 0.5050945285352494, "grad_norm": 0.7734375, "learning_rate": 1.8161853379879404e-05, "loss": 1.3145, "step": 1608 }, { "epoch": 0.5057227555607908, "grad_norm": 0.87890625, "learning_rate": 1.8159314503332275e-05, "loss": 1.3546, "step": 1610 }, { "epoch": 0.5063509825863322, "grad_norm": 0.7109375, "learning_rate": 1.8156775626785147e-05, "loss": 1.2818, "step": 1612 }, { "epoch": 0.5069792096118735, "grad_norm": 0.796875, "learning_rate": 1.815423675023802e-05, "loss": 1.4176, "step": 1614 }, { "epoch": 0.5076074366374148, "grad_norm": 0.734375, "learning_rate": 1.8151697873690893e-05, "loss": 1.3501, "step": 1616 }, { "epoch": 0.5082356636629562, "grad_norm": 0.74609375, "learning_rate": 1.8149158997143764e-05, "loss": 1.3265, "step": 1618 }, { "epoch": 0.5088638906884976, "grad_norm": 0.79296875, "learning_rate": 1.814662012059664e-05, "loss": 1.333, "step": 1620 }, { "epoch": 0.5094921177140389, "grad_norm": 0.7265625, "learning_rate": 1.814408124404951e-05, "loss": 1.2086, "step": 1622 }, { "epoch": 0.5101203447395802, "grad_norm": 0.8046875, "learning_rate": 1.8141542367502382e-05, "loss": 1.2181, "step": 1624 }, { "epoch": 0.5107485717651217, "grad_norm": 0.72265625, "learning_rate": 1.8139003490955253e-05, "loss": 1.3269, "step": 1626 }, { "epoch": 0.511376798790663, "grad_norm": 0.67578125, "learning_rate": 1.8136464614408128e-05, "loss": 1.2733, "step": 1628 }, { "epoch": 0.5120050258162043, "grad_norm": 0.69921875, "learning_rate": 1.8133925737861e-05, "loss": 1.253, "step": 1630 }, { "epoch": 0.5126332528417457, "grad_norm": 0.71484375, "learning_rate": 1.813138686131387e-05, "loss": 1.511, "step": 1632 }, { "epoch": 0.5132614798672871, "grad_norm": 0.671875, "learning_rate": 1.8128847984766742e-05, "loss": 1.2451, "step": 1634 }, { "epoch": 0.5138897068928284, "grad_norm": 0.66015625, "learning_rate": 1.8126309108219614e-05, "loss": 1.2587, "step": 1636 }, { "epoch": 0.5145179339183698, "grad_norm": 0.875, "learning_rate": 1.8123770231672485e-05, "loss": 1.301, "step": 1638 }, { "epoch": 0.5151461609439111, "grad_norm": 0.8046875, "learning_rate": 1.812123135512536e-05, "loss": 1.4174, "step": 1640 }, { "epoch": 0.5157743879694524, "grad_norm": 0.7265625, "learning_rate": 1.811869247857823e-05, "loss": 1.2725, "step": 1642 }, { "epoch": 0.5164026149949938, "grad_norm": 0.81640625, "learning_rate": 1.8116153602031103e-05, "loss": 1.3744, "step": 1644 }, { "epoch": 0.5170308420205352, "grad_norm": 0.734375, "learning_rate": 1.8113614725483974e-05, "loss": 1.2455, "step": 1646 }, { "epoch": 0.5176590690460765, "grad_norm": 0.68359375, "learning_rate": 1.811107584893685e-05, "loss": 1.4318, "step": 1648 }, { "epoch": 0.5182872960716178, "grad_norm": 0.80859375, "learning_rate": 1.810853697238972e-05, "loss": 1.3426, "step": 1650 }, { "epoch": 0.5189155230971593, "grad_norm": 0.73046875, "learning_rate": 1.810599809584259e-05, "loss": 1.1767, "step": 1652 }, { "epoch": 0.5195437501227006, "grad_norm": 0.73046875, "learning_rate": 1.8103459219295463e-05, "loss": 1.2447, "step": 1654 }, { "epoch": 0.5201719771482419, "grad_norm": 0.87890625, "learning_rate": 1.8100920342748338e-05, "loss": 1.3263, "step": 1656 }, { "epoch": 0.5208002041737833, "grad_norm": 0.74609375, "learning_rate": 1.8098381466201206e-05, "loss": 1.3857, "step": 1658 }, { "epoch": 0.5214284311993247, "grad_norm": 0.7421875, "learning_rate": 1.809584258965408e-05, "loss": 1.4577, "step": 1660 }, { "epoch": 0.522056658224866, "grad_norm": 0.77734375, "learning_rate": 1.8093303713106952e-05, "loss": 1.4192, "step": 1662 }, { "epoch": 0.5226848852504073, "grad_norm": 0.72265625, "learning_rate": 1.8090764836559823e-05, "loss": 1.2375, "step": 1664 }, { "epoch": 0.5233131122759487, "grad_norm": 0.7578125, "learning_rate": 1.8088225960012695e-05, "loss": 1.3199, "step": 1666 }, { "epoch": 0.5239413393014901, "grad_norm": 0.859375, "learning_rate": 1.808568708346557e-05, "loss": 1.3239, "step": 1668 }, { "epoch": 0.5245695663270314, "grad_norm": 0.63671875, "learning_rate": 1.808314820691844e-05, "loss": 1.4124, "step": 1670 }, { "epoch": 0.5251977933525728, "grad_norm": 0.8046875, "learning_rate": 1.8080609330371312e-05, "loss": 1.3539, "step": 1672 }, { "epoch": 0.5258260203781141, "grad_norm": 0.75, "learning_rate": 1.8078070453824183e-05, "loss": 1.2979, "step": 1674 }, { "epoch": 0.5264542474036555, "grad_norm": 0.67578125, "learning_rate": 1.8075531577277058e-05, "loss": 1.3582, "step": 1676 }, { "epoch": 0.5270824744291969, "grad_norm": 0.7890625, "learning_rate": 1.8072992700729926e-05, "loss": 1.2866, "step": 1678 }, { "epoch": 0.5277107014547382, "grad_norm": 0.83984375, "learning_rate": 1.80704538241828e-05, "loss": 1.1929, "step": 1680 }, { "epoch": 0.5283389284802795, "grad_norm": 0.6875, "learning_rate": 1.8067914947635672e-05, "loss": 1.2081, "step": 1682 }, { "epoch": 0.528967155505821, "grad_norm": 0.67578125, "learning_rate": 1.8065376071088544e-05, "loss": 1.4058, "step": 1684 }, { "epoch": 0.5295953825313623, "grad_norm": 0.69140625, "learning_rate": 1.8062837194541415e-05, "loss": 1.3689, "step": 1686 }, { "epoch": 0.5302236095569036, "grad_norm": 0.7734375, "learning_rate": 1.806029831799429e-05, "loss": 1.2963, "step": 1688 }, { "epoch": 0.530851836582445, "grad_norm": 0.76953125, "learning_rate": 1.805775944144716e-05, "loss": 1.3622, "step": 1690 }, { "epoch": 0.5314800636079864, "grad_norm": 0.765625, "learning_rate": 1.8055220564900033e-05, "loss": 1.2601, "step": 1692 }, { "epoch": 0.5321082906335277, "grad_norm": 0.78515625, "learning_rate": 1.8052681688352904e-05, "loss": 1.2963, "step": 1694 }, { "epoch": 0.532736517659069, "grad_norm": 0.7109375, "learning_rate": 1.805014281180578e-05, "loss": 1.3702, "step": 1696 }, { "epoch": 0.5333647446846104, "grad_norm": 0.734375, "learning_rate": 1.804760393525865e-05, "loss": 1.2802, "step": 1698 }, { "epoch": 0.5339929717101518, "grad_norm": 1.0078125, "learning_rate": 1.804506505871152e-05, "loss": 1.2703, "step": 1700 }, { "epoch": 0.5346211987356931, "grad_norm": 0.71875, "learning_rate": 1.8042526182164393e-05, "loss": 1.2858, "step": 1702 }, { "epoch": 0.5352494257612345, "grad_norm": 1.03125, "learning_rate": 1.8039987305617264e-05, "loss": 1.2983, "step": 1704 }, { "epoch": 0.5358776527867758, "grad_norm": 0.71484375, "learning_rate": 1.803744842907014e-05, "loss": 1.4192, "step": 1706 }, { "epoch": 0.5365058798123171, "grad_norm": 0.71484375, "learning_rate": 1.803490955252301e-05, "loss": 1.4011, "step": 1708 }, { "epoch": 0.5371341068378586, "grad_norm": 0.6796875, "learning_rate": 1.8032370675975882e-05, "loss": 1.3894, "step": 1710 }, { "epoch": 0.5377623338633999, "grad_norm": 0.75390625, "learning_rate": 1.8029831799428753e-05, "loss": 1.5168, "step": 1712 }, { "epoch": 0.5383905608889412, "grad_norm": 1.4609375, "learning_rate": 1.8027292922881628e-05, "loss": 1.3708, "step": 1714 }, { "epoch": 0.5390187879144825, "grad_norm": 0.76953125, "learning_rate": 1.80247540463345e-05, "loss": 1.3817, "step": 1716 }, { "epoch": 0.539647014940024, "grad_norm": 0.7578125, "learning_rate": 1.802221516978737e-05, "loss": 1.3174, "step": 1718 }, { "epoch": 0.5402752419655653, "grad_norm": 0.73828125, "learning_rate": 1.8019676293240242e-05, "loss": 1.3609, "step": 1720 }, { "epoch": 0.5409034689911066, "grad_norm": 0.734375, "learning_rate": 1.8017137416693117e-05, "loss": 1.4835, "step": 1722 }, { "epoch": 0.541531696016648, "grad_norm": 0.69921875, "learning_rate": 1.801459854014599e-05, "loss": 1.5052, "step": 1724 }, { "epoch": 0.5421599230421894, "grad_norm": 0.72265625, "learning_rate": 1.801205966359886e-05, "loss": 1.3482, "step": 1726 }, { "epoch": 0.5427881500677307, "grad_norm": 0.79296875, "learning_rate": 1.800952078705173e-05, "loss": 1.21, "step": 1728 }, { "epoch": 0.5434163770932721, "grad_norm": 0.75390625, "learning_rate": 1.8006981910504602e-05, "loss": 1.3702, "step": 1730 }, { "epoch": 0.5440446041188134, "grad_norm": 0.7578125, "learning_rate": 1.8004443033957474e-05, "loss": 1.4266, "step": 1732 }, { "epoch": 0.5446728311443548, "grad_norm": 0.671875, "learning_rate": 1.800190415741035e-05, "loss": 1.339, "step": 1734 }, { "epoch": 0.5453010581698962, "grad_norm": 0.74609375, "learning_rate": 1.799936528086322e-05, "loss": 1.3851, "step": 1736 }, { "epoch": 0.5459292851954375, "grad_norm": 0.69140625, "learning_rate": 1.799682640431609e-05, "loss": 1.4017, "step": 1738 }, { "epoch": 0.5465575122209788, "grad_norm": 0.734375, "learning_rate": 1.7994287527768963e-05, "loss": 1.2933, "step": 1740 }, { "epoch": 0.5471857392465203, "grad_norm": 0.74609375, "learning_rate": 1.7991748651221838e-05, "loss": 1.3104, "step": 1742 }, { "epoch": 0.5478139662720616, "grad_norm": 0.65625, "learning_rate": 1.798920977467471e-05, "loss": 1.231, "step": 1744 }, { "epoch": 0.5484421932976029, "grad_norm": 0.7578125, "learning_rate": 1.798667089812758e-05, "loss": 1.4584, "step": 1746 }, { "epoch": 0.5490704203231442, "grad_norm": 0.75390625, "learning_rate": 1.798413202158045e-05, "loss": 1.2988, "step": 1748 }, { "epoch": 0.5496986473486857, "grad_norm": 0.7578125, "learning_rate": 1.7981593145033326e-05, "loss": 1.2553, "step": 1750 }, { "epoch": 0.550326874374227, "grad_norm": 0.75, "learning_rate": 1.7979054268486194e-05, "loss": 1.3824, "step": 1752 }, { "epoch": 0.5509551013997683, "grad_norm": 0.765625, "learning_rate": 1.797651539193907e-05, "loss": 1.4831, "step": 1754 }, { "epoch": 0.5515833284253097, "grad_norm": 0.8046875, "learning_rate": 1.797397651539194e-05, "loss": 1.3839, "step": 1756 }, { "epoch": 0.5522115554508511, "grad_norm": 0.7578125, "learning_rate": 1.7971437638844812e-05, "loss": 1.4556, "step": 1758 }, { "epoch": 0.5528397824763924, "grad_norm": 0.67578125, "learning_rate": 1.7968898762297683e-05, "loss": 1.3564, "step": 1760 }, { "epoch": 0.5534680095019338, "grad_norm": 0.81640625, "learning_rate": 1.7966359885750558e-05, "loss": 1.4027, "step": 1762 }, { "epoch": 0.5540962365274751, "grad_norm": 0.93359375, "learning_rate": 1.796382100920343e-05, "loss": 1.3738, "step": 1764 }, { "epoch": 0.5547244635530165, "grad_norm": 0.8203125, "learning_rate": 1.79612821326563e-05, "loss": 1.4116, "step": 1766 }, { "epoch": 0.5553526905785579, "grad_norm": 0.9140625, "learning_rate": 1.7958743256109172e-05, "loss": 1.4383, "step": 1768 }, { "epoch": 0.5559809176040992, "grad_norm": 0.76171875, "learning_rate": 1.7956204379562047e-05, "loss": 1.2174, "step": 1770 }, { "epoch": 0.5566091446296405, "grad_norm": 0.75390625, "learning_rate": 1.7953665503014915e-05, "loss": 1.2893, "step": 1772 }, { "epoch": 0.557237371655182, "grad_norm": 0.796875, "learning_rate": 1.795112662646779e-05, "loss": 1.291, "step": 1774 }, { "epoch": 0.5578655986807233, "grad_norm": 0.82421875, "learning_rate": 1.794858774992066e-05, "loss": 1.4798, "step": 1776 }, { "epoch": 0.5584938257062646, "grad_norm": 0.9296875, "learning_rate": 1.7946048873373533e-05, "loss": 1.2961, "step": 1778 }, { "epoch": 0.5591220527318059, "grad_norm": 2.1875, "learning_rate": 1.7943509996826404e-05, "loss": 1.3342, "step": 1780 }, { "epoch": 0.5597502797573473, "grad_norm": 0.890625, "learning_rate": 1.794097112027928e-05, "loss": 1.2582, "step": 1782 }, { "epoch": 0.5603785067828887, "grad_norm": 0.66796875, "learning_rate": 1.793843224373215e-05, "loss": 1.3106, "step": 1784 }, { "epoch": 0.56100673380843, "grad_norm": 0.8125, "learning_rate": 1.793589336718502e-05, "loss": 1.3369, "step": 1786 }, { "epoch": 0.5616349608339714, "grad_norm": 0.859375, "learning_rate": 1.7933354490637893e-05, "loss": 1.2346, "step": 1788 }, { "epoch": 0.5622631878595127, "grad_norm": 0.76171875, "learning_rate": 1.7930815614090768e-05, "loss": 1.2644, "step": 1790 }, { "epoch": 0.5628914148850541, "grad_norm": 0.8359375, "learning_rate": 1.792827673754364e-05, "loss": 1.3247, "step": 1792 }, { "epoch": 0.5635196419105954, "grad_norm": 0.7734375, "learning_rate": 1.792573786099651e-05, "loss": 1.2764, "step": 1794 }, { "epoch": 0.5641478689361368, "grad_norm": 0.71484375, "learning_rate": 1.7923198984449385e-05, "loss": 1.2428, "step": 1796 }, { "epoch": 0.5647760959616781, "grad_norm": 0.80078125, "learning_rate": 1.7920660107902253e-05, "loss": 1.4744, "step": 1798 }, { "epoch": 0.5654043229872195, "grad_norm": 0.7421875, "learning_rate": 1.7918121231355128e-05, "loss": 1.3754, "step": 1800 }, { "epoch": 0.5660325500127609, "grad_norm": 0.8828125, "learning_rate": 1.7915582354808e-05, "loss": 1.3084, "step": 1802 }, { "epoch": 0.5666607770383022, "grad_norm": 0.75, "learning_rate": 1.791304347826087e-05, "loss": 1.3269, "step": 1804 }, { "epoch": 0.5672890040638435, "grad_norm": 0.7265625, "learning_rate": 1.7910504601713742e-05, "loss": 1.3141, "step": 1806 }, { "epoch": 0.567917231089385, "grad_norm": 0.90234375, "learning_rate": 1.7907965725166617e-05, "loss": 1.1482, "step": 1808 }, { "epoch": 0.5685454581149263, "grad_norm": 0.69921875, "learning_rate": 1.7905426848619488e-05, "loss": 1.3093, "step": 1810 }, { "epoch": 0.5691736851404676, "grad_norm": 0.6640625, "learning_rate": 1.790288797207236e-05, "loss": 1.4742, "step": 1812 }, { "epoch": 0.569801912166009, "grad_norm": 0.8203125, "learning_rate": 1.790034909552523e-05, "loss": 1.3429, "step": 1814 }, { "epoch": 0.5704301391915504, "grad_norm": 0.77734375, "learning_rate": 1.7897810218978106e-05, "loss": 1.3247, "step": 1816 }, { "epoch": 0.5710583662170917, "grad_norm": 0.6875, "learning_rate": 1.7895271342430974e-05, "loss": 1.386, "step": 1818 }, { "epoch": 0.571686593242633, "grad_norm": 0.6796875, "learning_rate": 1.789273246588385e-05, "loss": 1.3501, "step": 1820 }, { "epoch": 0.5723148202681744, "grad_norm": 0.73828125, "learning_rate": 1.789019358933672e-05, "loss": 1.2759, "step": 1822 }, { "epoch": 0.5729430472937158, "grad_norm": 0.78515625, "learning_rate": 1.788765471278959e-05, "loss": 1.2834, "step": 1824 }, { "epoch": 0.5735712743192571, "grad_norm": 0.765625, "learning_rate": 1.7885115836242463e-05, "loss": 1.3764, "step": 1826 }, { "epoch": 0.5741995013447985, "grad_norm": 0.80859375, "learning_rate": 1.7882576959695337e-05, "loss": 1.2428, "step": 1828 }, { "epoch": 0.5748277283703398, "grad_norm": 0.78125, "learning_rate": 1.788003808314821e-05, "loss": 1.3577, "step": 1830 }, { "epoch": 0.5754559553958812, "grad_norm": 0.94921875, "learning_rate": 1.787749920660108e-05, "loss": 1.2091, "step": 1832 }, { "epoch": 0.5760841824214226, "grad_norm": 0.83203125, "learning_rate": 1.787496033005395e-05, "loss": 1.4, "step": 1834 }, { "epoch": 0.5767124094469639, "grad_norm": 0.7109375, "learning_rate": 1.7872421453506826e-05, "loss": 1.3621, "step": 1836 }, { "epoch": 0.5773406364725052, "grad_norm": 0.828125, "learning_rate": 1.7869882576959698e-05, "loss": 1.3756, "step": 1838 }, { "epoch": 0.5779688634980467, "grad_norm": 0.68359375, "learning_rate": 1.786734370041257e-05, "loss": 1.3658, "step": 1840 }, { "epoch": 0.578597090523588, "grad_norm": 0.7109375, "learning_rate": 1.786480482386544e-05, "loss": 1.2812, "step": 1842 }, { "epoch": 0.5792253175491293, "grad_norm": 0.73828125, "learning_rate": 1.7862265947318312e-05, "loss": 1.4921, "step": 1844 }, { "epoch": 0.5798535445746706, "grad_norm": 0.77734375, "learning_rate": 1.7859727070771183e-05, "loss": 1.3042, "step": 1846 }, { "epoch": 0.5804817716002121, "grad_norm": 1.203125, "learning_rate": 1.7857188194224058e-05, "loss": 1.1429, "step": 1848 }, { "epoch": 0.5811099986257534, "grad_norm": 0.73046875, "learning_rate": 1.785464931767693e-05, "loss": 1.4471, "step": 1850 }, { "epoch": 0.5817382256512947, "grad_norm": 0.6953125, "learning_rate": 1.78521104411298e-05, "loss": 1.3808, "step": 1852 }, { "epoch": 0.5823664526768361, "grad_norm": 0.94140625, "learning_rate": 1.7849571564582672e-05, "loss": 1.3266, "step": 1854 }, { "epoch": 0.5829946797023774, "grad_norm": 0.68359375, "learning_rate": 1.7847032688035547e-05, "loss": 1.4399, "step": 1856 }, { "epoch": 0.5836229067279188, "grad_norm": 0.75, "learning_rate": 1.784449381148842e-05, "loss": 1.2884, "step": 1858 }, { "epoch": 0.5842511337534602, "grad_norm": 0.6796875, "learning_rate": 1.784195493494129e-05, "loss": 1.3308, "step": 1860 }, { "epoch": 0.5848793607790015, "grad_norm": 0.7890625, "learning_rate": 1.783941605839416e-05, "loss": 1.3215, "step": 1862 }, { "epoch": 0.5855075878045428, "grad_norm": 0.8671875, "learning_rate": 1.7836877181847036e-05, "loss": 1.4684, "step": 1864 }, { "epoch": 0.5861358148300843, "grad_norm": 0.6875, "learning_rate": 1.7834338305299904e-05, "loss": 1.293, "step": 1866 }, { "epoch": 0.5867640418556256, "grad_norm": 0.7578125, "learning_rate": 1.783179942875278e-05, "loss": 1.2667, "step": 1868 }, { "epoch": 0.5873922688811669, "grad_norm": 0.76953125, "learning_rate": 1.782926055220565e-05, "loss": 1.3243, "step": 1870 }, { "epoch": 0.5880204959067082, "grad_norm": 0.79296875, "learning_rate": 1.782672167565852e-05, "loss": 1.2651, "step": 1872 }, { "epoch": 0.5886487229322497, "grad_norm": 0.69921875, "learning_rate": 1.7824182799111393e-05, "loss": 1.2973, "step": 1874 }, { "epoch": 0.589276949957791, "grad_norm": 0.73828125, "learning_rate": 1.7821643922564268e-05, "loss": 1.2823, "step": 1876 }, { "epoch": 0.5899051769833323, "grad_norm": 0.94921875, "learning_rate": 1.781910504601714e-05, "loss": 1.415, "step": 1878 }, { "epoch": 0.5905334040088737, "grad_norm": 0.76171875, "learning_rate": 1.781656616947001e-05, "loss": 1.2477, "step": 1880 }, { "epoch": 0.5911616310344151, "grad_norm": 0.80078125, "learning_rate": 1.7814027292922885e-05, "loss": 1.2649, "step": 1882 }, { "epoch": 0.5917898580599564, "grad_norm": 0.64453125, "learning_rate": 1.7811488416375756e-05, "loss": 1.3797, "step": 1884 }, { "epoch": 0.5924180850854978, "grad_norm": 0.75390625, "learning_rate": 1.7808949539828628e-05, "loss": 1.3717, "step": 1886 }, { "epoch": 0.5930463121110391, "grad_norm": 0.70703125, "learning_rate": 1.78064106632815e-05, "loss": 1.2677, "step": 1888 }, { "epoch": 0.5936745391365805, "grad_norm": 0.78515625, "learning_rate": 1.7803871786734374e-05, "loss": 1.4157, "step": 1890 }, { "epoch": 0.5943027661621219, "grad_norm": 0.6875, "learning_rate": 1.7801332910187242e-05, "loss": 1.2478, "step": 1892 }, { "epoch": 0.5949309931876632, "grad_norm": 0.73046875, "learning_rate": 1.7798794033640117e-05, "loss": 1.3108, "step": 1894 }, { "epoch": 0.5955592202132045, "grad_norm": 0.75, "learning_rate": 1.7796255157092988e-05, "loss": 1.3043, "step": 1896 }, { "epoch": 0.596187447238746, "grad_norm": 0.703125, "learning_rate": 1.779371628054586e-05, "loss": 1.259, "step": 1898 }, { "epoch": 0.5968156742642873, "grad_norm": 0.83203125, "learning_rate": 1.779117740399873e-05, "loss": 1.2671, "step": 1900 }, { "epoch": 0.5974439012898286, "grad_norm": 0.7734375, "learning_rate": 1.7788638527451606e-05, "loss": 1.3808, "step": 1902 }, { "epoch": 0.5980721283153699, "grad_norm": 0.8828125, "learning_rate": 1.7786099650904477e-05, "loss": 1.3359, "step": 1904 }, { "epoch": 0.5987003553409114, "grad_norm": 0.8359375, "learning_rate": 1.778356077435735e-05, "loss": 1.3205, "step": 1906 }, { "epoch": 0.5993285823664527, "grad_norm": 0.73828125, "learning_rate": 1.778102189781022e-05, "loss": 1.3357, "step": 1908 }, { "epoch": 0.599956809391994, "grad_norm": 0.76953125, "learning_rate": 1.7778483021263095e-05, "loss": 1.3952, "step": 1910 }, { "epoch": 0.6005850364175354, "grad_norm": 0.828125, "learning_rate": 1.7775944144715963e-05, "loss": 1.2332, "step": 1912 }, { "epoch": 0.6012132634430768, "grad_norm": 0.828125, "learning_rate": 1.7773405268168837e-05, "loss": 1.3406, "step": 1914 }, { "epoch": 0.6018414904686181, "grad_norm": 0.71875, "learning_rate": 1.777086639162171e-05, "loss": 1.3423, "step": 1916 }, { "epoch": 0.6024697174941595, "grad_norm": 0.74609375, "learning_rate": 1.776832751507458e-05, "loss": 1.3578, "step": 1918 }, { "epoch": 0.6030979445197008, "grad_norm": 0.65625, "learning_rate": 1.776578863852745e-05, "loss": 1.3513, "step": 1920 }, { "epoch": 0.6037261715452421, "grad_norm": 0.8203125, "learning_rate": 1.7763249761980326e-05, "loss": 1.2171, "step": 1922 }, { "epoch": 0.6043543985707835, "grad_norm": 0.72265625, "learning_rate": 1.7760710885433198e-05, "loss": 1.3335, "step": 1924 }, { "epoch": 0.6049826255963249, "grad_norm": 0.83203125, "learning_rate": 1.775817200888607e-05, "loss": 1.3946, "step": 1926 }, { "epoch": 0.6056108526218662, "grad_norm": 0.76953125, "learning_rate": 1.775563313233894e-05, "loss": 1.2521, "step": 1928 }, { "epoch": 0.6062390796474075, "grad_norm": 0.78125, "learning_rate": 1.7753094255791815e-05, "loss": 1.4663, "step": 1930 }, { "epoch": 0.606867306672949, "grad_norm": 0.71484375, "learning_rate": 1.7750555379244687e-05, "loss": 1.1201, "step": 1932 }, { "epoch": 0.6074955336984903, "grad_norm": 0.78515625, "learning_rate": 1.7748016502697558e-05, "loss": 1.4028, "step": 1934 }, { "epoch": 0.6081237607240316, "grad_norm": 0.7734375, "learning_rate": 1.774547762615043e-05, "loss": 1.2642, "step": 1936 }, { "epoch": 0.608751987749573, "grad_norm": 0.76953125, "learning_rate": 1.77429387496033e-05, "loss": 1.2945, "step": 1938 }, { "epoch": 0.6093802147751144, "grad_norm": 0.76953125, "learning_rate": 1.7740399873056172e-05, "loss": 1.265, "step": 1940 }, { "epoch": 0.6100084418006557, "grad_norm": 0.859375, "learning_rate": 1.7737860996509047e-05, "loss": 1.4148, "step": 1942 }, { "epoch": 0.610636668826197, "grad_norm": 0.66796875, "learning_rate": 1.7735322119961918e-05, "loss": 1.2506, "step": 1944 }, { "epoch": 0.6112648958517384, "grad_norm": 0.90234375, "learning_rate": 1.773278324341479e-05, "loss": 1.3281, "step": 1946 }, { "epoch": 0.6118931228772798, "grad_norm": 0.7109375, "learning_rate": 1.773024436686766e-05, "loss": 1.273, "step": 1948 }, { "epoch": 0.6125213499028211, "grad_norm": 0.75, "learning_rate": 1.7727705490320536e-05, "loss": 1.2533, "step": 1950 }, { "epoch": 0.6131495769283625, "grad_norm": 0.77734375, "learning_rate": 1.7725166613773407e-05, "loss": 1.2262, "step": 1952 }, { "epoch": 0.6137778039539038, "grad_norm": 0.72265625, "learning_rate": 1.772262773722628e-05, "loss": 1.2834, "step": 1954 }, { "epoch": 0.6144060309794452, "grad_norm": 0.6796875, "learning_rate": 1.772008886067915e-05, "loss": 1.286, "step": 1956 }, { "epoch": 0.6150342580049866, "grad_norm": 0.7265625, "learning_rate": 1.7717549984132025e-05, "loss": 1.2474, "step": 1958 }, { "epoch": 0.6156624850305279, "grad_norm": 0.71484375, "learning_rate": 1.7715011107584893e-05, "loss": 1.345, "step": 1960 }, { "epoch": 0.6162907120560692, "grad_norm": 0.79296875, "learning_rate": 1.7712472231037767e-05, "loss": 1.2253, "step": 1962 }, { "epoch": 0.6169189390816107, "grad_norm": 0.76953125, "learning_rate": 1.770993335449064e-05, "loss": 1.3628, "step": 1964 }, { "epoch": 0.617547166107152, "grad_norm": 0.76953125, "learning_rate": 1.770739447794351e-05, "loss": 1.2676, "step": 1966 }, { "epoch": 0.6181753931326933, "grad_norm": 0.72265625, "learning_rate": 1.7704855601396385e-05, "loss": 1.2463, "step": 1968 }, { "epoch": 0.6188036201582346, "grad_norm": 0.7109375, "learning_rate": 1.7702316724849256e-05, "loss": 1.3617, "step": 1970 }, { "epoch": 0.6194318471837761, "grad_norm": 0.83984375, "learning_rate": 1.7699777848302128e-05, "loss": 1.4785, "step": 1972 }, { "epoch": 0.6200600742093174, "grad_norm": 0.76953125, "learning_rate": 1.7697238971755e-05, "loss": 1.2933, "step": 1974 }, { "epoch": 0.6206883012348587, "grad_norm": 0.70703125, "learning_rate": 1.7694700095207874e-05, "loss": 1.4211, "step": 1976 }, { "epoch": 0.6213165282604001, "grad_norm": 0.734375, "learning_rate": 1.7692161218660745e-05, "loss": 1.4411, "step": 1978 }, { "epoch": 0.6219447552859415, "grad_norm": 0.70703125, "learning_rate": 1.7689622342113617e-05, "loss": 1.2598, "step": 1980 }, { "epoch": 0.6225729823114828, "grad_norm": 0.71875, "learning_rate": 1.7687083465566488e-05, "loss": 1.2098, "step": 1982 }, { "epoch": 0.6232012093370242, "grad_norm": 0.69921875, "learning_rate": 1.7684544589019363e-05, "loss": 1.3236, "step": 1984 }, { "epoch": 0.6238294363625655, "grad_norm": 0.73046875, "learning_rate": 1.768200571247223e-05, "loss": 1.3541, "step": 1986 }, { "epoch": 0.6244576633881069, "grad_norm": 0.84765625, "learning_rate": 1.7679466835925106e-05, "loss": 1.2746, "step": 1988 }, { "epoch": 0.6250858904136483, "grad_norm": 0.86328125, "learning_rate": 1.7676927959377977e-05, "loss": 1.3703, "step": 1990 }, { "epoch": 0.6257141174391896, "grad_norm": 0.80859375, "learning_rate": 1.767438908283085e-05, "loss": 1.2673, "step": 1992 }, { "epoch": 0.6263423444647309, "grad_norm": 0.88671875, "learning_rate": 1.767185020628372e-05, "loss": 1.2734, "step": 1994 }, { "epoch": 0.6269705714902722, "grad_norm": 0.8125, "learning_rate": 1.7669311329736595e-05, "loss": 1.2994, "step": 1996 }, { "epoch": 0.6275987985158137, "grad_norm": 0.84765625, "learning_rate": 1.7666772453189466e-05, "loss": 1.2314, "step": 1998 }, { "epoch": 0.628227025541355, "grad_norm": 0.71875, "learning_rate": 1.7664233576642337e-05, "loss": 1.3692, "step": 2000 }, { "epoch": 0.6288552525668963, "grad_norm": 0.703125, "learning_rate": 1.766169470009521e-05, "loss": 1.1083, "step": 2002 }, { "epoch": 0.6294834795924377, "grad_norm": 0.71875, "learning_rate": 1.7659155823548083e-05, "loss": 1.3513, "step": 2004 }, { "epoch": 0.6301117066179791, "grad_norm": 0.71875, "learning_rate": 1.765661694700095e-05, "loss": 1.2768, "step": 2006 }, { "epoch": 0.6307399336435204, "grad_norm": 0.77734375, "learning_rate": 1.7654078070453826e-05, "loss": 1.399, "step": 2008 }, { "epoch": 0.6313681606690618, "grad_norm": 0.7734375, "learning_rate": 1.7651539193906698e-05, "loss": 1.3596, "step": 2010 }, { "epoch": 0.6319963876946031, "grad_norm": 0.99609375, "learning_rate": 1.764900031735957e-05, "loss": 1.3298, "step": 2012 }, { "epoch": 0.6326246147201445, "grad_norm": 0.81640625, "learning_rate": 1.764646144081244e-05, "loss": 1.3194, "step": 2014 }, { "epoch": 0.6332528417456859, "grad_norm": 0.78125, "learning_rate": 1.7643922564265315e-05, "loss": 1.2478, "step": 2016 }, { "epoch": 0.6338810687712272, "grad_norm": 0.78125, "learning_rate": 1.7641383687718187e-05, "loss": 1.285, "step": 2018 }, { "epoch": 0.6345092957967685, "grad_norm": 0.75, "learning_rate": 1.7638844811171058e-05, "loss": 1.4251, "step": 2020 }, { "epoch": 0.63513752282231, "grad_norm": 0.97265625, "learning_rate": 1.763630593462393e-05, "loss": 1.281, "step": 2022 }, { "epoch": 0.6357657498478513, "grad_norm": 0.859375, "learning_rate": 1.7633767058076804e-05, "loss": 1.3546, "step": 2024 }, { "epoch": 0.6363939768733926, "grad_norm": 0.6796875, "learning_rate": 1.7631228181529672e-05, "loss": 1.2134, "step": 2026 }, { "epoch": 0.6370222038989339, "grad_norm": 0.7734375, "learning_rate": 1.7628689304982547e-05, "loss": 1.352, "step": 2028 }, { "epoch": 0.6376504309244754, "grad_norm": 0.69140625, "learning_rate": 1.7626150428435418e-05, "loss": 1.3923, "step": 2030 }, { "epoch": 0.6382786579500167, "grad_norm": 0.69140625, "learning_rate": 1.762361155188829e-05, "loss": 1.3658, "step": 2032 }, { "epoch": 0.638906884975558, "grad_norm": 0.96875, "learning_rate": 1.762107267534116e-05, "loss": 1.2421, "step": 2034 }, { "epoch": 0.6395351120010994, "grad_norm": 0.71875, "learning_rate": 1.7618533798794036e-05, "loss": 1.3427, "step": 2036 }, { "epoch": 0.6401633390266408, "grad_norm": 0.8515625, "learning_rate": 1.7615994922246907e-05, "loss": 1.3342, "step": 2038 }, { "epoch": 0.6407915660521821, "grad_norm": 0.7578125, "learning_rate": 1.761345604569978e-05, "loss": 1.4221, "step": 2040 }, { "epoch": 0.6414197930777235, "grad_norm": 0.73046875, "learning_rate": 1.761091716915265e-05, "loss": 1.3898, "step": 2042 }, { "epoch": 0.6420480201032648, "grad_norm": 0.703125, "learning_rate": 1.7608378292605525e-05, "loss": 1.5576, "step": 2044 }, { "epoch": 0.6426762471288062, "grad_norm": 0.79296875, "learning_rate": 1.7605839416058396e-05, "loss": 1.3117, "step": 2046 }, { "epoch": 0.6433044741543475, "grad_norm": 0.76171875, "learning_rate": 1.7603300539511267e-05, "loss": 1.2932, "step": 2048 }, { "epoch": 0.6439327011798889, "grad_norm": 0.7734375, "learning_rate": 1.7600761662964142e-05, "loss": 1.2463, "step": 2050 }, { "epoch": 0.6445609282054302, "grad_norm": 0.703125, "learning_rate": 1.759822278641701e-05, "loss": 1.3657, "step": 2052 }, { "epoch": 0.6451891552309716, "grad_norm": 0.7734375, "learning_rate": 1.7595683909869885e-05, "loss": 1.4386, "step": 2054 }, { "epoch": 0.645817382256513, "grad_norm": 0.80078125, "learning_rate": 1.7593145033322756e-05, "loss": 1.3022, "step": 2056 }, { "epoch": 0.6464456092820543, "grad_norm": 1.0546875, "learning_rate": 1.7590606156775628e-05, "loss": 1.5185, "step": 2058 }, { "epoch": 0.6470738363075956, "grad_norm": 0.890625, "learning_rate": 1.75880672802285e-05, "loss": 1.1322, "step": 2060 }, { "epoch": 0.647702063333137, "grad_norm": 0.671875, "learning_rate": 1.7585528403681374e-05, "loss": 1.3653, "step": 2062 }, { "epoch": 0.6483302903586784, "grad_norm": 0.81640625, "learning_rate": 1.7582989527134245e-05, "loss": 1.2649, "step": 2064 }, { "epoch": 0.6489585173842197, "grad_norm": 0.7578125, "learning_rate": 1.7580450650587117e-05, "loss": 1.4218, "step": 2066 }, { "epoch": 0.649586744409761, "grad_norm": 0.84375, "learning_rate": 1.7577911774039988e-05, "loss": 1.42, "step": 2068 }, { "epoch": 0.6502149714353024, "grad_norm": 0.83984375, "learning_rate": 1.7575372897492863e-05, "loss": 1.2207, "step": 2070 }, { "epoch": 0.6508431984608438, "grad_norm": 0.73046875, "learning_rate": 1.7572834020945734e-05, "loss": 1.3358, "step": 2072 }, { "epoch": 0.6514714254863851, "grad_norm": 0.921875, "learning_rate": 1.7570295144398606e-05, "loss": 1.2739, "step": 2074 }, { "epoch": 0.6520996525119265, "grad_norm": 0.8125, "learning_rate": 1.7567756267851477e-05, "loss": 1.3296, "step": 2076 }, { "epoch": 0.6527278795374678, "grad_norm": 0.76171875, "learning_rate": 1.756521739130435e-05, "loss": 1.3499, "step": 2078 }, { "epoch": 0.6533561065630092, "grad_norm": 0.78515625, "learning_rate": 1.756267851475722e-05, "loss": 1.3431, "step": 2080 }, { "epoch": 0.6539843335885506, "grad_norm": 0.6796875, "learning_rate": 1.7560139638210094e-05, "loss": 1.4784, "step": 2082 }, { "epoch": 0.6546125606140919, "grad_norm": 0.7265625, "learning_rate": 1.7557600761662966e-05, "loss": 1.2925, "step": 2084 }, { "epoch": 0.6552407876396332, "grad_norm": 0.828125, "learning_rate": 1.7555061885115837e-05, "loss": 1.2877, "step": 2086 }, { "epoch": 0.6558690146651747, "grad_norm": 0.74609375, "learning_rate": 1.755252300856871e-05, "loss": 1.3971, "step": 2088 }, { "epoch": 0.656497241690716, "grad_norm": 0.69921875, "learning_rate": 1.7549984132021583e-05, "loss": 1.2842, "step": 2090 }, { "epoch": 0.6571254687162573, "grad_norm": 0.734375, "learning_rate": 1.7547445255474455e-05, "loss": 1.1632, "step": 2092 }, { "epoch": 0.6577536957417986, "grad_norm": 0.828125, "learning_rate": 1.7544906378927326e-05, "loss": 1.3903, "step": 2094 }, { "epoch": 0.6583819227673401, "grad_norm": 0.7421875, "learning_rate": 1.7542367502380198e-05, "loss": 1.4009, "step": 2096 }, { "epoch": 0.6590101497928814, "grad_norm": 0.75390625, "learning_rate": 1.7539828625833072e-05, "loss": 1.353, "step": 2098 }, { "epoch": 0.6596383768184227, "grad_norm": 0.7421875, "learning_rate": 1.753728974928594e-05, "loss": 1.4069, "step": 2100 }, { "epoch": 0.6602666038439641, "grad_norm": 0.828125, "learning_rate": 1.7534750872738815e-05, "loss": 1.2694, "step": 2102 }, { "epoch": 0.6608948308695055, "grad_norm": 0.6796875, "learning_rate": 1.7532211996191686e-05, "loss": 1.2923, "step": 2104 }, { "epoch": 0.6615230578950468, "grad_norm": 0.75390625, "learning_rate": 1.7529673119644558e-05, "loss": 1.2756, "step": 2106 }, { "epoch": 0.6621512849205882, "grad_norm": 0.7421875, "learning_rate": 1.752713424309743e-05, "loss": 1.4151, "step": 2108 }, { "epoch": 0.6627795119461295, "grad_norm": 0.71875, "learning_rate": 1.7524595366550304e-05, "loss": 1.3067, "step": 2110 }, { "epoch": 0.6634077389716709, "grad_norm": 0.70703125, "learning_rate": 1.7522056490003175e-05, "loss": 1.3295, "step": 2112 }, { "epoch": 0.6640359659972123, "grad_norm": 0.7421875, "learning_rate": 1.7519517613456047e-05, "loss": 1.3994, "step": 2114 }, { "epoch": 0.6646641930227536, "grad_norm": 0.79296875, "learning_rate": 1.7516978736908918e-05, "loss": 1.3512, "step": 2116 }, { "epoch": 0.6652924200482949, "grad_norm": 0.71484375, "learning_rate": 1.7514439860361793e-05, "loss": 1.2393, "step": 2118 }, { "epoch": 0.6659206470738364, "grad_norm": 0.7734375, "learning_rate": 1.751190098381466e-05, "loss": 1.2977, "step": 2120 }, { "epoch": 0.6665488740993777, "grad_norm": 0.73828125, "learning_rate": 1.7509362107267536e-05, "loss": 1.4039, "step": 2122 }, { "epoch": 0.667177101124919, "grad_norm": 0.7265625, "learning_rate": 1.7506823230720407e-05, "loss": 1.3294, "step": 2124 }, { "epoch": 0.6678053281504603, "grad_norm": 0.69921875, "learning_rate": 1.750428435417328e-05, "loss": 1.2816, "step": 2126 }, { "epoch": 0.6684335551760018, "grad_norm": 0.75, "learning_rate": 1.750174547762615e-05, "loss": 1.3298, "step": 2128 }, { "epoch": 0.6690617822015431, "grad_norm": 0.6796875, "learning_rate": 1.7499206601079025e-05, "loss": 1.3823, "step": 2130 }, { "epoch": 0.6696900092270844, "grad_norm": 0.72265625, "learning_rate": 1.7496667724531896e-05, "loss": 1.2973, "step": 2132 }, { "epoch": 0.6703182362526258, "grad_norm": 0.67578125, "learning_rate": 1.7494128847984767e-05, "loss": 1.3873, "step": 2134 }, { "epoch": 0.6709464632781671, "grad_norm": 0.71484375, "learning_rate": 1.7491589971437642e-05, "loss": 1.3746, "step": 2136 }, { "epoch": 0.6715746903037085, "grad_norm": 0.71875, "learning_rate": 1.7489051094890514e-05, "loss": 1.2803, "step": 2138 }, { "epoch": 0.6722029173292499, "grad_norm": 0.78515625, "learning_rate": 1.7486512218343385e-05, "loss": 1.3632, "step": 2140 }, { "epoch": 0.6728311443547912, "grad_norm": 0.75, "learning_rate": 1.7483973341796256e-05, "loss": 1.3377, "step": 2142 }, { "epoch": 0.6734593713803325, "grad_norm": 0.69921875, "learning_rate": 1.748143446524913e-05, "loss": 1.2896, "step": 2144 }, { "epoch": 0.674087598405874, "grad_norm": 0.890625, "learning_rate": 1.7478895588702e-05, "loss": 1.2543, "step": 2146 }, { "epoch": 0.6747158254314153, "grad_norm": 0.87109375, "learning_rate": 1.7476356712154874e-05, "loss": 1.2882, "step": 2148 }, { "epoch": 0.6753440524569566, "grad_norm": 0.86328125, "learning_rate": 1.7473817835607745e-05, "loss": 1.3234, "step": 2150 }, { "epoch": 0.6759722794824979, "grad_norm": 0.75390625, "learning_rate": 1.7471278959060617e-05, "loss": 1.2965, "step": 2152 }, { "epoch": 0.6766005065080394, "grad_norm": 0.67578125, "learning_rate": 1.7468740082513488e-05, "loss": 1.4172, "step": 2154 }, { "epoch": 0.6772287335335807, "grad_norm": 0.73828125, "learning_rate": 1.7466201205966363e-05, "loss": 1.3369, "step": 2156 }, { "epoch": 0.677856960559122, "grad_norm": 0.7265625, "learning_rate": 1.7463662329419234e-05, "loss": 1.3239, "step": 2158 }, { "epoch": 0.6784851875846634, "grad_norm": 0.7265625, "learning_rate": 1.7461123452872105e-05, "loss": 1.2926, "step": 2160 }, { "epoch": 0.6791134146102048, "grad_norm": 0.83203125, "learning_rate": 1.7458584576324977e-05, "loss": 1.3588, "step": 2162 }, { "epoch": 0.6797416416357461, "grad_norm": 0.87109375, "learning_rate": 1.745604569977785e-05, "loss": 1.1972, "step": 2164 }, { "epoch": 0.6803698686612875, "grad_norm": 0.71484375, "learning_rate": 1.7453506823230723e-05, "loss": 1.2391, "step": 2166 }, { "epoch": 0.6809980956868288, "grad_norm": 0.82421875, "learning_rate": 1.7450967946683594e-05, "loss": 1.3438, "step": 2168 }, { "epoch": 0.6816263227123702, "grad_norm": 0.72265625, "learning_rate": 1.7448429070136466e-05, "loss": 1.4117, "step": 2170 }, { "epoch": 0.6822545497379116, "grad_norm": 0.79296875, "learning_rate": 1.7445890193589337e-05, "loss": 1.388, "step": 2172 }, { "epoch": 0.6828827767634529, "grad_norm": 0.6875, "learning_rate": 1.744335131704221e-05, "loss": 1.3602, "step": 2174 }, { "epoch": 0.6835110037889942, "grad_norm": 1.0546875, "learning_rate": 1.7440812440495083e-05, "loss": 1.2999, "step": 2176 }, { "epoch": 0.6841392308145356, "grad_norm": 0.828125, "learning_rate": 1.7438273563947955e-05, "loss": 1.3296, "step": 2178 }, { "epoch": 0.684767457840077, "grad_norm": 0.78125, "learning_rate": 1.7435734687400826e-05, "loss": 1.302, "step": 2180 }, { "epoch": 0.6853956848656183, "grad_norm": 0.73046875, "learning_rate": 1.7433195810853697e-05, "loss": 1.321, "step": 2182 }, { "epoch": 0.6860239118911596, "grad_norm": 0.78515625, "learning_rate": 1.7430656934306572e-05, "loss": 1.3628, "step": 2184 }, { "epoch": 0.6866521389167011, "grad_norm": 0.8671875, "learning_rate": 1.7428118057759444e-05, "loss": 1.3183, "step": 2186 }, { "epoch": 0.6872803659422424, "grad_norm": 0.921875, "learning_rate": 1.7425579181212315e-05, "loss": 1.4956, "step": 2188 }, { "epoch": 0.6879085929677837, "grad_norm": 0.7265625, "learning_rate": 1.7423040304665186e-05, "loss": 1.4264, "step": 2190 }, { "epoch": 0.688536819993325, "grad_norm": 0.765625, "learning_rate": 1.742050142811806e-05, "loss": 1.3176, "step": 2192 }, { "epoch": 0.6891650470188665, "grad_norm": 0.78515625, "learning_rate": 1.741796255157093e-05, "loss": 1.3268, "step": 2194 }, { "epoch": 0.6897932740444078, "grad_norm": 1.046875, "learning_rate": 1.7415423675023804e-05, "loss": 1.346, "step": 2196 }, { "epoch": 0.6904215010699492, "grad_norm": 0.80078125, "learning_rate": 1.7412884798476675e-05, "loss": 1.3614, "step": 2198 }, { "epoch": 0.6910497280954905, "grad_norm": 0.7265625, "learning_rate": 1.7410345921929547e-05, "loss": 1.2779, "step": 2200 }, { "epoch": 0.6916779551210319, "grad_norm": 0.7265625, "learning_rate": 1.7407807045382418e-05, "loss": 1.2913, "step": 2202 }, { "epoch": 0.6923061821465732, "grad_norm": 0.86328125, "learning_rate": 1.7405268168835293e-05, "loss": 1.3669, "step": 2204 }, { "epoch": 0.6929344091721146, "grad_norm": 0.71484375, "learning_rate": 1.7402729292288164e-05, "loss": 1.2992, "step": 2206 }, { "epoch": 0.6935626361976559, "grad_norm": 0.80078125, "learning_rate": 1.7400190415741036e-05, "loss": 1.3157, "step": 2208 }, { "epoch": 0.6941908632231972, "grad_norm": 0.828125, "learning_rate": 1.7397651539193907e-05, "loss": 1.1712, "step": 2210 }, { "epoch": 0.6948190902487387, "grad_norm": 0.78515625, "learning_rate": 1.7395112662646782e-05, "loss": 1.1813, "step": 2212 }, { "epoch": 0.69544731727428, "grad_norm": 0.76171875, "learning_rate": 1.739257378609965e-05, "loss": 1.3688, "step": 2214 }, { "epoch": 0.6960755442998213, "grad_norm": 0.9453125, "learning_rate": 1.7390034909552525e-05, "loss": 1.3554, "step": 2216 }, { "epoch": 0.6967037713253627, "grad_norm": 0.87890625, "learning_rate": 1.7387496033005396e-05, "loss": 1.3605, "step": 2218 }, { "epoch": 0.6973319983509041, "grad_norm": 0.890625, "learning_rate": 1.7384957156458267e-05, "loss": 1.2138, "step": 2220 }, { "epoch": 0.6979602253764454, "grad_norm": 0.8515625, "learning_rate": 1.7382418279911142e-05, "loss": 1.309, "step": 2222 }, { "epoch": 0.6985884524019867, "grad_norm": 0.78515625, "learning_rate": 1.7379879403364013e-05, "loss": 1.2578, "step": 2224 }, { "epoch": 0.6992166794275281, "grad_norm": 0.78515625, "learning_rate": 1.7377340526816885e-05, "loss": 1.3457, "step": 2226 }, { "epoch": 0.6998449064530695, "grad_norm": 0.7890625, "learning_rate": 1.7374801650269756e-05, "loss": 1.2938, "step": 2228 }, { "epoch": 0.7004731334786108, "grad_norm": 0.8984375, "learning_rate": 1.737226277372263e-05, "loss": 1.284, "step": 2230 }, { "epoch": 0.7011013605041522, "grad_norm": 0.72265625, "learning_rate": 1.7369723897175502e-05, "loss": 1.415, "step": 2232 }, { "epoch": 0.7017295875296935, "grad_norm": 0.94921875, "learning_rate": 1.7367185020628374e-05, "loss": 1.2291, "step": 2234 }, { "epoch": 0.7023578145552349, "grad_norm": 0.74609375, "learning_rate": 1.7364646144081245e-05, "loss": 1.374, "step": 2236 }, { "epoch": 0.7029860415807763, "grad_norm": 0.6953125, "learning_rate": 1.736210726753412e-05, "loss": 1.4697, "step": 2238 }, { "epoch": 0.7036142686063176, "grad_norm": 0.71484375, "learning_rate": 1.7359568390986988e-05, "loss": 1.2784, "step": 2240 }, { "epoch": 0.7042424956318589, "grad_norm": 0.73828125, "learning_rate": 1.7357029514439863e-05, "loss": 1.2381, "step": 2242 }, { "epoch": 0.7048707226574004, "grad_norm": 0.78125, "learning_rate": 1.7354490637892734e-05, "loss": 1.2173, "step": 2244 }, { "epoch": 0.7054989496829417, "grad_norm": 0.77734375, "learning_rate": 1.7351951761345605e-05, "loss": 1.2839, "step": 2246 }, { "epoch": 0.706127176708483, "grad_norm": 0.6953125, "learning_rate": 1.7349412884798477e-05, "loss": 1.3768, "step": 2248 }, { "epoch": 0.7067554037340243, "grad_norm": 0.81640625, "learning_rate": 1.734687400825135e-05, "loss": 1.3607, "step": 2250 }, { "epoch": 0.7073836307595658, "grad_norm": 0.703125, "learning_rate": 1.7344335131704223e-05, "loss": 1.3943, "step": 2252 }, { "epoch": 0.7080118577851071, "grad_norm": 0.6875, "learning_rate": 1.7341796255157094e-05, "loss": 1.4092, "step": 2254 }, { "epoch": 0.7086400848106484, "grad_norm": 0.75, "learning_rate": 1.7339257378609966e-05, "loss": 1.2429, "step": 2256 }, { "epoch": 0.7092683118361898, "grad_norm": 0.7109375, "learning_rate": 1.733671850206284e-05, "loss": 1.458, "step": 2258 }, { "epoch": 0.7098965388617312, "grad_norm": 0.67578125, "learning_rate": 1.7334179625515712e-05, "loss": 1.3227, "step": 2260 }, { "epoch": 0.7105247658872725, "grad_norm": 0.79296875, "learning_rate": 1.7331640748968583e-05, "loss": 1.4453, "step": 2262 }, { "epoch": 0.7111529929128139, "grad_norm": 0.74609375, "learning_rate": 1.7329101872421455e-05, "loss": 1.2725, "step": 2264 }, { "epoch": 0.7117812199383552, "grad_norm": 0.74609375, "learning_rate": 1.7326562995874326e-05, "loss": 1.2165, "step": 2266 }, { "epoch": 0.7124094469638966, "grad_norm": 0.76171875, "learning_rate": 1.7324024119327197e-05, "loss": 1.4287, "step": 2268 }, { "epoch": 0.713037673989438, "grad_norm": 0.6484375, "learning_rate": 1.7321485242780072e-05, "loss": 1.2783, "step": 2270 }, { "epoch": 0.7136659010149793, "grad_norm": 0.6875, "learning_rate": 1.7318946366232944e-05, "loss": 1.3641, "step": 2272 }, { "epoch": 0.7142941280405206, "grad_norm": 0.7109375, "learning_rate": 1.7316407489685815e-05, "loss": 1.3313, "step": 2274 }, { "epoch": 0.7149223550660619, "grad_norm": 0.68359375, "learning_rate": 1.7313868613138686e-05, "loss": 1.3461, "step": 2276 }, { "epoch": 0.7155505820916034, "grad_norm": 0.6953125, "learning_rate": 1.731132973659156e-05, "loss": 1.3159, "step": 2278 }, { "epoch": 0.7161788091171447, "grad_norm": 0.765625, "learning_rate": 1.7308790860044432e-05, "loss": 1.2575, "step": 2280 }, { "epoch": 0.716807036142686, "grad_norm": 0.65625, "learning_rate": 1.7306251983497304e-05, "loss": 1.3816, "step": 2282 }, { "epoch": 0.7174352631682274, "grad_norm": 0.71875, "learning_rate": 1.7303713106950175e-05, "loss": 1.4548, "step": 2284 }, { "epoch": 0.7180634901937688, "grad_norm": 0.83984375, "learning_rate": 1.730117423040305e-05, "loss": 1.2777, "step": 2286 }, { "epoch": 0.7186917172193101, "grad_norm": 0.7421875, "learning_rate": 1.7298635353855918e-05, "loss": 1.3142, "step": 2288 }, { "epoch": 0.7193199442448515, "grad_norm": 0.7890625, "learning_rate": 1.7296096477308793e-05, "loss": 1.2618, "step": 2290 }, { "epoch": 0.7199481712703928, "grad_norm": 0.70703125, "learning_rate": 1.7293557600761664e-05, "loss": 1.3586, "step": 2292 }, { "epoch": 0.7205763982959342, "grad_norm": 0.77734375, "learning_rate": 1.7291018724214536e-05, "loss": 1.2284, "step": 2294 }, { "epoch": 0.7212046253214756, "grad_norm": 0.76953125, "learning_rate": 1.7288479847667407e-05, "loss": 1.3143, "step": 2296 }, { "epoch": 0.7218328523470169, "grad_norm": 0.73828125, "learning_rate": 1.728594097112028e-05, "loss": 1.2988, "step": 2298 }, { "epoch": 0.7224610793725582, "grad_norm": 0.78125, "learning_rate": 1.7283402094573153e-05, "loss": 1.3754, "step": 2300 }, { "epoch": 0.7230893063980997, "grad_norm": 0.69140625, "learning_rate": 1.7280863218026024e-05, "loss": 1.3633, "step": 2302 }, { "epoch": 0.723717533423641, "grad_norm": 0.71875, "learning_rate": 1.7278324341478896e-05, "loss": 1.4773, "step": 2304 }, { "epoch": 0.7243457604491823, "grad_norm": 0.77734375, "learning_rate": 1.727578546493177e-05, "loss": 1.351, "step": 2306 }, { "epoch": 0.7249739874747236, "grad_norm": 0.73828125, "learning_rate": 1.7273246588384642e-05, "loss": 1.3577, "step": 2308 }, { "epoch": 0.7256022145002651, "grad_norm": 0.81640625, "learning_rate": 1.7270707711837513e-05, "loss": 1.2883, "step": 2310 }, { "epoch": 0.7262304415258064, "grad_norm": 0.67578125, "learning_rate": 1.7268168835290388e-05, "loss": 1.4049, "step": 2312 }, { "epoch": 0.7268586685513477, "grad_norm": 0.6796875, "learning_rate": 1.7265629958743256e-05, "loss": 1.3443, "step": 2314 }, { "epoch": 0.7274868955768891, "grad_norm": 0.90234375, "learning_rate": 1.726309108219613e-05, "loss": 1.2131, "step": 2316 }, { "epoch": 0.7281151226024305, "grad_norm": 0.71875, "learning_rate": 1.7260552205649002e-05, "loss": 1.353, "step": 2318 }, { "epoch": 0.7287433496279718, "grad_norm": 0.73828125, "learning_rate": 1.7258013329101874e-05, "loss": 1.2911, "step": 2320 }, { "epoch": 0.7293715766535132, "grad_norm": 0.90625, "learning_rate": 1.7255474452554745e-05, "loss": 1.3567, "step": 2322 }, { "epoch": 0.7299998036790545, "grad_norm": 0.70703125, "learning_rate": 1.725293557600762e-05, "loss": 1.3589, "step": 2324 }, { "epoch": 0.7306280307045959, "grad_norm": 0.87109375, "learning_rate": 1.725039669946049e-05, "loss": 1.3734, "step": 2326 }, { "epoch": 0.7312562577301372, "grad_norm": 0.6484375, "learning_rate": 1.7247857822913363e-05, "loss": 1.3007, "step": 2328 }, { "epoch": 0.7318844847556786, "grad_norm": 0.80859375, "learning_rate": 1.7245318946366234e-05, "loss": 1.3652, "step": 2330 }, { "epoch": 0.7325127117812199, "grad_norm": 0.72265625, "learning_rate": 1.724278006981911e-05, "loss": 1.3929, "step": 2332 }, { "epoch": 0.7331409388067613, "grad_norm": 0.6640625, "learning_rate": 1.7240241193271977e-05, "loss": 1.2893, "step": 2334 }, { "epoch": 0.7337691658323027, "grad_norm": 0.77734375, "learning_rate": 1.723770231672485e-05, "loss": 1.4986, "step": 2336 }, { "epoch": 0.734397392857844, "grad_norm": 0.6875, "learning_rate": 1.7235163440177723e-05, "loss": 1.3224, "step": 2338 }, { "epoch": 0.7350256198833853, "grad_norm": 0.77734375, "learning_rate": 1.7232624563630594e-05, "loss": 1.422, "step": 2340 }, { "epoch": 0.7356538469089268, "grad_norm": 0.703125, "learning_rate": 1.7230085687083466e-05, "loss": 1.4021, "step": 2342 }, { "epoch": 0.7362820739344681, "grad_norm": 0.67578125, "learning_rate": 1.722754681053634e-05, "loss": 1.3948, "step": 2344 }, { "epoch": 0.7369103009600094, "grad_norm": 0.73046875, "learning_rate": 1.7225007933989212e-05, "loss": 1.2958, "step": 2346 }, { "epoch": 0.7375385279855508, "grad_norm": 0.734375, "learning_rate": 1.7222469057442083e-05, "loss": 1.2972, "step": 2348 }, { "epoch": 0.7381667550110921, "grad_norm": 0.68359375, "learning_rate": 1.7219930180894955e-05, "loss": 1.3356, "step": 2350 }, { "epoch": 0.7387949820366335, "grad_norm": 0.82421875, "learning_rate": 1.721739130434783e-05, "loss": 1.2247, "step": 2352 }, { "epoch": 0.7394232090621748, "grad_norm": 0.70703125, "learning_rate": 1.7214852427800697e-05, "loss": 1.3243, "step": 2354 }, { "epoch": 0.7400514360877162, "grad_norm": 0.7265625, "learning_rate": 1.7212313551253572e-05, "loss": 1.4064, "step": 2356 }, { "epoch": 0.7406796631132575, "grad_norm": 0.77734375, "learning_rate": 1.7209774674706443e-05, "loss": 1.4806, "step": 2358 }, { "epoch": 0.7413078901387989, "grad_norm": 0.85546875, "learning_rate": 1.7207235798159315e-05, "loss": 1.3769, "step": 2360 }, { "epoch": 0.7419361171643403, "grad_norm": 0.71875, "learning_rate": 1.7204696921612186e-05, "loss": 1.2256, "step": 2362 }, { "epoch": 0.7425643441898816, "grad_norm": 0.78125, "learning_rate": 1.720215804506506e-05, "loss": 1.389, "step": 2364 }, { "epoch": 0.7431925712154229, "grad_norm": 0.6796875, "learning_rate": 1.7199619168517932e-05, "loss": 1.4362, "step": 2366 }, { "epoch": 0.7438207982409644, "grad_norm": 0.8984375, "learning_rate": 1.7197080291970804e-05, "loss": 1.4191, "step": 2368 }, { "epoch": 0.7444490252665057, "grad_norm": 0.7265625, "learning_rate": 1.7194541415423675e-05, "loss": 1.3115, "step": 2370 }, { "epoch": 0.745077252292047, "grad_norm": 0.7578125, "learning_rate": 1.719200253887655e-05, "loss": 1.4019, "step": 2372 }, { "epoch": 0.7457054793175883, "grad_norm": 0.734375, "learning_rate": 1.718946366232942e-05, "loss": 1.3587, "step": 2374 }, { "epoch": 0.7463337063431298, "grad_norm": 0.87109375, "learning_rate": 1.7186924785782293e-05, "loss": 1.3749, "step": 2376 }, { "epoch": 0.7469619333686711, "grad_norm": 0.6875, "learning_rate": 1.7184385909235164e-05, "loss": 1.3042, "step": 2378 }, { "epoch": 0.7475901603942124, "grad_norm": 0.73828125, "learning_rate": 1.7181847032688035e-05, "loss": 1.2356, "step": 2380 }, { "epoch": 0.7482183874197538, "grad_norm": 0.7734375, "learning_rate": 1.7179308156140907e-05, "loss": 1.2864, "step": 2382 }, { "epoch": 0.7488466144452952, "grad_norm": 0.69921875, "learning_rate": 1.717676927959378e-05, "loss": 1.3995, "step": 2384 }, { "epoch": 0.7494748414708365, "grad_norm": 0.78125, "learning_rate": 1.7174230403046653e-05, "loss": 1.2924, "step": 2386 }, { "epoch": 0.7501030684963779, "grad_norm": 0.81640625, "learning_rate": 1.7171691526499524e-05, "loss": 1.2801, "step": 2388 }, { "epoch": 0.7507312955219192, "grad_norm": 0.7890625, "learning_rate": 1.7169152649952396e-05, "loss": 1.2726, "step": 2390 }, { "epoch": 0.7513595225474606, "grad_norm": 0.734375, "learning_rate": 1.716661377340527e-05, "loss": 1.35, "step": 2392 }, { "epoch": 0.751987749573002, "grad_norm": 0.796875, "learning_rate": 1.7164074896858142e-05, "loss": 1.2783, "step": 2394 }, { "epoch": 0.7526159765985433, "grad_norm": 0.78515625, "learning_rate": 1.7161536020311013e-05, "loss": 1.3665, "step": 2396 }, { "epoch": 0.7532442036240846, "grad_norm": 0.98046875, "learning_rate": 1.7158997143763888e-05, "loss": 1.3679, "step": 2398 }, { "epoch": 0.7538724306496261, "grad_norm": 0.78515625, "learning_rate": 1.715645826721676e-05, "loss": 1.3874, "step": 2400 }, { "epoch": 0.7545006576751674, "grad_norm": 0.75390625, "learning_rate": 1.715391939066963e-05, "loss": 1.2631, "step": 2402 }, { "epoch": 0.7551288847007087, "grad_norm": 0.796875, "learning_rate": 1.7151380514122502e-05, "loss": 1.2159, "step": 2404 }, { "epoch": 0.75575711172625, "grad_norm": 0.74609375, "learning_rate": 1.7148841637575374e-05, "loss": 1.3067, "step": 2406 }, { "epoch": 0.7563853387517915, "grad_norm": 0.7109375, "learning_rate": 1.7146302761028245e-05, "loss": 1.3503, "step": 2408 }, { "epoch": 0.7570135657773328, "grad_norm": 0.7421875, "learning_rate": 1.714376388448112e-05, "loss": 1.369, "step": 2410 }, { "epoch": 0.7576417928028741, "grad_norm": 0.88671875, "learning_rate": 1.714122500793399e-05, "loss": 1.2634, "step": 2412 }, { "epoch": 0.7582700198284155, "grad_norm": 0.75390625, "learning_rate": 1.7138686131386862e-05, "loss": 1.2631, "step": 2414 }, { "epoch": 0.7588982468539569, "grad_norm": 0.72265625, "learning_rate": 1.7136147254839734e-05, "loss": 1.33, "step": 2416 }, { "epoch": 0.7595264738794982, "grad_norm": 0.72265625, "learning_rate": 1.713360837829261e-05, "loss": 1.3229, "step": 2418 }, { "epoch": 0.7601547009050396, "grad_norm": 1.1640625, "learning_rate": 1.713106950174548e-05, "loss": 1.2857, "step": 2420 }, { "epoch": 0.7607829279305809, "grad_norm": 0.875, "learning_rate": 1.712853062519835e-05, "loss": 1.3812, "step": 2422 }, { "epoch": 0.7614111549561222, "grad_norm": 0.6953125, "learning_rate": 1.7125991748651223e-05, "loss": 1.3809, "step": 2424 }, { "epoch": 0.7620393819816637, "grad_norm": 0.7890625, "learning_rate": 1.7123452872104098e-05, "loss": 1.3366, "step": 2426 }, { "epoch": 0.762667609007205, "grad_norm": 0.77734375, "learning_rate": 1.7120913995556966e-05, "loss": 1.3628, "step": 2428 }, { "epoch": 0.7632958360327463, "grad_norm": 0.8046875, "learning_rate": 1.711837511900984e-05, "loss": 1.3394, "step": 2430 }, { "epoch": 0.7639240630582876, "grad_norm": 0.7265625, "learning_rate": 1.7115836242462712e-05, "loss": 1.4378, "step": 2432 }, { "epoch": 0.7645522900838291, "grad_norm": 0.7890625, "learning_rate": 1.7113297365915583e-05, "loss": 1.1978, "step": 2434 }, { "epoch": 0.7651805171093704, "grad_norm": 0.75, "learning_rate": 1.7110758489368454e-05, "loss": 1.2939, "step": 2436 }, { "epoch": 0.7658087441349117, "grad_norm": 0.7109375, "learning_rate": 1.710821961282133e-05, "loss": 1.3248, "step": 2438 }, { "epoch": 0.7664369711604531, "grad_norm": 0.7578125, "learning_rate": 1.71056807362742e-05, "loss": 1.2087, "step": 2440 }, { "epoch": 0.7670651981859945, "grad_norm": 0.81640625, "learning_rate": 1.7103141859727072e-05, "loss": 1.1633, "step": 2442 }, { "epoch": 0.7676934252115358, "grad_norm": 1.078125, "learning_rate": 1.7100602983179943e-05, "loss": 1.2432, "step": 2444 }, { "epoch": 0.7683216522370772, "grad_norm": 0.75390625, "learning_rate": 1.7098064106632818e-05, "loss": 1.3272, "step": 2446 }, { "epoch": 0.7689498792626185, "grad_norm": 0.78125, "learning_rate": 1.7095525230085686e-05, "loss": 1.2589, "step": 2448 }, { "epoch": 0.7695781062881599, "grad_norm": 0.71484375, "learning_rate": 1.709298635353856e-05, "loss": 1.375, "step": 2450 }, { "epoch": 0.7702063333137013, "grad_norm": 0.72265625, "learning_rate": 1.7090447476991432e-05, "loss": 1.2817, "step": 2452 }, { "epoch": 0.7708345603392426, "grad_norm": 0.75390625, "learning_rate": 1.7087908600444304e-05, "loss": 1.2879, "step": 2454 }, { "epoch": 0.7714627873647839, "grad_norm": 1.234375, "learning_rate": 1.7085369723897175e-05, "loss": 1.2573, "step": 2456 }, { "epoch": 0.7720910143903253, "grad_norm": 0.7890625, "learning_rate": 1.708283084735005e-05, "loss": 1.343, "step": 2458 }, { "epoch": 0.7727192414158667, "grad_norm": 0.7109375, "learning_rate": 1.708029197080292e-05, "loss": 1.357, "step": 2460 }, { "epoch": 0.773347468441408, "grad_norm": 0.74609375, "learning_rate": 1.7077753094255793e-05, "loss": 1.3493, "step": 2462 }, { "epoch": 0.7739756954669493, "grad_norm": 0.78515625, "learning_rate": 1.7075214217708664e-05, "loss": 1.2568, "step": 2464 }, { "epoch": 0.7746039224924908, "grad_norm": 0.73828125, "learning_rate": 1.707267534116154e-05, "loss": 1.3476, "step": 2466 }, { "epoch": 0.7752321495180321, "grad_norm": 0.76171875, "learning_rate": 1.707013646461441e-05, "loss": 1.2797, "step": 2468 }, { "epoch": 0.7758603765435734, "grad_norm": 0.75390625, "learning_rate": 1.706759758806728e-05, "loss": 1.3368, "step": 2470 }, { "epoch": 0.7764886035691148, "grad_norm": 0.671875, "learning_rate": 1.7065058711520153e-05, "loss": 1.2807, "step": 2472 }, { "epoch": 0.7771168305946562, "grad_norm": 0.71484375, "learning_rate": 1.7062519834973024e-05, "loss": 1.5012, "step": 2474 }, { "epoch": 0.7777450576201975, "grad_norm": 0.734375, "learning_rate": 1.7059980958425896e-05, "loss": 1.3204, "step": 2476 }, { "epoch": 0.7783732846457388, "grad_norm": 0.8046875, "learning_rate": 1.705744208187877e-05, "loss": 1.3475, "step": 2478 }, { "epoch": 0.7790015116712802, "grad_norm": 0.796875, "learning_rate": 1.7054903205331642e-05, "loss": 1.2051, "step": 2480 }, { "epoch": 0.7796297386968216, "grad_norm": 0.68359375, "learning_rate": 1.7052364328784513e-05, "loss": 1.3502, "step": 2482 }, { "epoch": 0.780257965722363, "grad_norm": 0.9140625, "learning_rate": 1.7049825452237388e-05, "loss": 1.2337, "step": 2484 }, { "epoch": 0.7808861927479043, "grad_norm": 0.77734375, "learning_rate": 1.704728657569026e-05, "loss": 1.3524, "step": 2486 }, { "epoch": 0.7815144197734456, "grad_norm": 0.82421875, "learning_rate": 1.704474769914313e-05, "loss": 1.3843, "step": 2488 }, { "epoch": 0.7821426467989869, "grad_norm": 0.6953125, "learning_rate": 1.7042208822596002e-05, "loss": 1.3905, "step": 2490 }, { "epoch": 0.7827708738245284, "grad_norm": 0.69921875, "learning_rate": 1.7039669946048877e-05, "loss": 1.3168, "step": 2492 }, { "epoch": 0.7833991008500697, "grad_norm": 0.79296875, "learning_rate": 1.7037131069501748e-05, "loss": 1.233, "step": 2494 }, { "epoch": 0.784027327875611, "grad_norm": 0.77734375, "learning_rate": 1.703459219295462e-05, "loss": 1.3278, "step": 2496 }, { "epoch": 0.7846555549011524, "grad_norm": 0.6953125, "learning_rate": 1.703205331640749e-05, "loss": 1.2751, "step": 2498 }, { "epoch": 0.7852837819266938, "grad_norm": 0.796875, "learning_rate": 1.7029514439860362e-05, "loss": 1.3463, "step": 2500 }, { "epoch": 0.7859120089522351, "grad_norm": 0.80859375, "learning_rate": 1.7026975563313234e-05, "loss": 1.2921, "step": 2502 }, { "epoch": 0.7865402359777764, "grad_norm": 0.71484375, "learning_rate": 1.702443668676611e-05, "loss": 1.1402, "step": 2504 }, { "epoch": 0.7871684630033178, "grad_norm": 1.125, "learning_rate": 1.702189781021898e-05, "loss": 1.2382, "step": 2506 }, { "epoch": 0.7877966900288592, "grad_norm": 0.63671875, "learning_rate": 1.701935893367185e-05, "loss": 1.3848, "step": 2508 }, { "epoch": 0.7884249170544005, "grad_norm": 0.7578125, "learning_rate": 1.7016820057124723e-05, "loss": 1.2577, "step": 2510 }, { "epoch": 0.7890531440799419, "grad_norm": 0.74609375, "learning_rate": 1.7014281180577597e-05, "loss": 1.4976, "step": 2512 }, { "epoch": 0.7896813711054832, "grad_norm": 0.65234375, "learning_rate": 1.701174230403047e-05, "loss": 1.3051, "step": 2514 }, { "epoch": 0.7903095981310246, "grad_norm": 0.75, "learning_rate": 1.700920342748334e-05, "loss": 1.3637, "step": 2516 }, { "epoch": 0.790937825156566, "grad_norm": 0.828125, "learning_rate": 1.700666455093621e-05, "loss": 1.2335, "step": 2518 }, { "epoch": 0.7915660521821073, "grad_norm": 0.73828125, "learning_rate": 1.7004125674389086e-05, "loss": 1.2534, "step": 2520 }, { "epoch": 0.7921942792076486, "grad_norm": 0.78515625, "learning_rate": 1.7001586797841954e-05, "loss": 1.4272, "step": 2522 }, { "epoch": 0.7928225062331901, "grad_norm": 0.66796875, "learning_rate": 1.699904792129483e-05, "loss": 1.2296, "step": 2524 }, { "epoch": 0.7934507332587314, "grad_norm": 0.765625, "learning_rate": 1.69965090447477e-05, "loss": 1.3799, "step": 2526 }, { "epoch": 0.7940789602842727, "grad_norm": 0.625, "learning_rate": 1.6993970168200572e-05, "loss": 1.4241, "step": 2528 }, { "epoch": 0.794707187309814, "grad_norm": 0.8125, "learning_rate": 1.6991431291653443e-05, "loss": 1.2411, "step": 2530 }, { "epoch": 0.7953354143353555, "grad_norm": 1.078125, "learning_rate": 1.6988892415106318e-05, "loss": 1.3962, "step": 2532 }, { "epoch": 0.7959636413608968, "grad_norm": 0.8828125, "learning_rate": 1.698635353855919e-05, "loss": 1.3154, "step": 2534 }, { "epoch": 0.7965918683864381, "grad_norm": 0.62890625, "learning_rate": 1.698381466201206e-05, "loss": 1.3236, "step": 2536 }, { "epoch": 0.7972200954119795, "grad_norm": 0.80859375, "learning_rate": 1.6981275785464932e-05, "loss": 1.2605, "step": 2538 }, { "epoch": 0.7978483224375209, "grad_norm": 0.7578125, "learning_rate": 1.6978736908917807e-05, "loss": 1.2216, "step": 2540 }, { "epoch": 0.7984765494630622, "grad_norm": 0.6875, "learning_rate": 1.6976198032370675e-05, "loss": 1.3394, "step": 2542 }, { "epoch": 0.7991047764886036, "grad_norm": 0.73828125, "learning_rate": 1.697365915582355e-05, "loss": 1.331, "step": 2544 }, { "epoch": 0.7997330035141449, "grad_norm": 0.72265625, "learning_rate": 1.697112027927642e-05, "loss": 1.3703, "step": 2546 }, { "epoch": 0.8003612305396863, "grad_norm": 0.828125, "learning_rate": 1.6968581402729293e-05, "loss": 1.3128, "step": 2548 }, { "epoch": 0.8009894575652277, "grad_norm": 0.8125, "learning_rate": 1.6966042526182164e-05, "loss": 1.278, "step": 2550 }, { "epoch": 0.801617684590769, "grad_norm": 0.65625, "learning_rate": 1.696350364963504e-05, "loss": 1.3876, "step": 2552 }, { "epoch": 0.8022459116163103, "grad_norm": 0.71484375, "learning_rate": 1.696096477308791e-05, "loss": 1.2858, "step": 2554 }, { "epoch": 0.8028741386418518, "grad_norm": 0.6953125, "learning_rate": 1.695842589654078e-05, "loss": 1.412, "step": 2556 }, { "epoch": 0.8035023656673931, "grad_norm": 0.7109375, "learning_rate": 1.6955887019993653e-05, "loss": 1.4499, "step": 2558 }, { "epoch": 0.8041305926929344, "grad_norm": 0.9140625, "learning_rate": 1.6953348143446528e-05, "loss": 1.291, "step": 2560 }, { "epoch": 0.8047588197184757, "grad_norm": 0.90625, "learning_rate": 1.69508092668994e-05, "loss": 1.4154, "step": 2562 }, { "epoch": 0.8053870467440171, "grad_norm": 0.82421875, "learning_rate": 1.694827039035227e-05, "loss": 1.4474, "step": 2564 }, { "epoch": 0.8060152737695585, "grad_norm": 0.79296875, "learning_rate": 1.6945731513805145e-05, "loss": 1.3263, "step": 2566 }, { "epoch": 0.8066435007950998, "grad_norm": 0.84375, "learning_rate": 1.6943192637258013e-05, "loss": 1.3238, "step": 2568 }, { "epoch": 0.8072717278206412, "grad_norm": 0.83984375, "learning_rate": 1.6940653760710888e-05, "loss": 1.4225, "step": 2570 }, { "epoch": 0.8078999548461825, "grad_norm": 0.70703125, "learning_rate": 1.693811488416376e-05, "loss": 1.2038, "step": 2572 }, { "epoch": 0.8085281818717239, "grad_norm": 0.8359375, "learning_rate": 1.693557600761663e-05, "loss": 1.1913, "step": 2574 }, { "epoch": 0.8091564088972653, "grad_norm": 0.76953125, "learning_rate": 1.6933037131069502e-05, "loss": 1.3431, "step": 2576 }, { "epoch": 0.8097846359228066, "grad_norm": 0.87890625, "learning_rate": 1.6930498254522377e-05, "loss": 1.3336, "step": 2578 }, { "epoch": 0.8104128629483479, "grad_norm": 0.87890625, "learning_rate": 1.6927959377975248e-05, "loss": 1.2205, "step": 2580 }, { "epoch": 0.8110410899738894, "grad_norm": 0.69921875, "learning_rate": 1.692542050142812e-05, "loss": 1.3004, "step": 2582 }, { "epoch": 0.8116693169994307, "grad_norm": 0.75390625, "learning_rate": 1.692288162488099e-05, "loss": 1.3125, "step": 2584 }, { "epoch": 0.812297544024972, "grad_norm": 0.6953125, "learning_rate": 1.6920342748333866e-05, "loss": 1.4572, "step": 2586 }, { "epoch": 0.8129257710505133, "grad_norm": 0.74609375, "learning_rate": 1.6917803871786737e-05, "loss": 1.2809, "step": 2588 }, { "epoch": 0.8135539980760548, "grad_norm": 0.66796875, "learning_rate": 1.691526499523961e-05, "loss": 1.2979, "step": 2590 }, { "epoch": 0.8141822251015961, "grad_norm": 0.890625, "learning_rate": 1.691272611869248e-05, "loss": 1.3751, "step": 2592 }, { "epoch": 0.8148104521271374, "grad_norm": 0.8125, "learning_rate": 1.691018724214535e-05, "loss": 1.3556, "step": 2594 }, { "epoch": 0.8154386791526788, "grad_norm": 0.734375, "learning_rate": 1.6907648365598223e-05, "loss": 1.2648, "step": 2596 }, { "epoch": 0.8160669061782202, "grad_norm": 0.77734375, "learning_rate": 1.6905109489051097e-05, "loss": 1.3499, "step": 2598 }, { "epoch": 0.8166951332037615, "grad_norm": 0.8359375, "learning_rate": 1.690257061250397e-05, "loss": 1.3424, "step": 2600 }, { "epoch": 0.8173233602293029, "grad_norm": 0.72265625, "learning_rate": 1.690003173595684e-05, "loss": 1.3746, "step": 2602 }, { "epoch": 0.8179515872548442, "grad_norm": 0.78515625, "learning_rate": 1.689749285940971e-05, "loss": 1.3152, "step": 2604 }, { "epoch": 0.8185798142803856, "grad_norm": 0.7109375, "learning_rate": 1.6894953982862586e-05, "loss": 1.3755, "step": 2606 }, { "epoch": 0.819208041305927, "grad_norm": 0.84765625, "learning_rate": 1.6892415106315458e-05, "loss": 1.2247, "step": 2608 }, { "epoch": 0.8198362683314683, "grad_norm": 0.69921875, "learning_rate": 1.688987622976833e-05, "loss": 1.4328, "step": 2610 }, { "epoch": 0.8204644953570096, "grad_norm": 0.6796875, "learning_rate": 1.68873373532212e-05, "loss": 1.2965, "step": 2612 }, { "epoch": 0.821092722382551, "grad_norm": 0.91015625, "learning_rate": 1.6884798476674075e-05, "loss": 1.2175, "step": 2614 }, { "epoch": 0.8217209494080924, "grad_norm": 0.8828125, "learning_rate": 1.6882259600126943e-05, "loss": 1.1868, "step": 2616 }, { "epoch": 0.8223491764336337, "grad_norm": 0.9296875, "learning_rate": 1.6879720723579818e-05, "loss": 1.331, "step": 2618 }, { "epoch": 0.822977403459175, "grad_norm": 0.69140625, "learning_rate": 1.687718184703269e-05, "loss": 1.3342, "step": 2620 }, { "epoch": 0.8236056304847165, "grad_norm": 0.68359375, "learning_rate": 1.687464297048556e-05, "loss": 1.3036, "step": 2622 }, { "epoch": 0.8242338575102578, "grad_norm": 0.75390625, "learning_rate": 1.6872104093938432e-05, "loss": 1.2481, "step": 2624 }, { "epoch": 0.8248620845357991, "grad_norm": 0.703125, "learning_rate": 1.6869565217391307e-05, "loss": 1.3175, "step": 2626 }, { "epoch": 0.8254903115613405, "grad_norm": 0.97265625, "learning_rate": 1.686702634084418e-05, "loss": 1.3181, "step": 2628 }, { "epoch": 0.8261185385868819, "grad_norm": 0.7421875, "learning_rate": 1.686448746429705e-05, "loss": 1.3106, "step": 2630 }, { "epoch": 0.8267467656124232, "grad_norm": 0.82421875, "learning_rate": 1.686194858774992e-05, "loss": 1.3216, "step": 2632 }, { "epoch": 0.8273749926379645, "grad_norm": 0.85546875, "learning_rate": 1.6859409711202796e-05, "loss": 1.3221, "step": 2634 }, { "epoch": 0.8280032196635059, "grad_norm": 0.7734375, "learning_rate": 1.6856870834655664e-05, "loss": 1.3614, "step": 2636 }, { "epoch": 0.8286314466890472, "grad_norm": 0.7890625, "learning_rate": 1.685433195810854e-05, "loss": 1.3956, "step": 2638 }, { "epoch": 0.8292596737145886, "grad_norm": 0.6875, "learning_rate": 1.685179308156141e-05, "loss": 1.1662, "step": 2640 }, { "epoch": 0.82988790074013, "grad_norm": 0.76953125, "learning_rate": 1.684925420501428e-05, "loss": 1.2505, "step": 2642 }, { "epoch": 0.8305161277656713, "grad_norm": 0.86328125, "learning_rate": 1.6846715328467153e-05, "loss": 1.251, "step": 2644 }, { "epoch": 0.8311443547912126, "grad_norm": 0.78515625, "learning_rate": 1.6844176451920028e-05, "loss": 1.398, "step": 2646 }, { "epoch": 0.8317725818167541, "grad_norm": 0.79296875, "learning_rate": 1.68416375753729e-05, "loss": 1.2618, "step": 2648 }, { "epoch": 0.8324008088422954, "grad_norm": 0.66796875, "learning_rate": 1.683909869882577e-05, "loss": 1.3516, "step": 2650 }, { "epoch": 0.8330290358678367, "grad_norm": 0.74609375, "learning_rate": 1.6836559822278645e-05, "loss": 1.4359, "step": 2652 }, { "epoch": 0.833657262893378, "grad_norm": 0.703125, "learning_rate": 1.6834020945731516e-05, "loss": 1.3158, "step": 2654 }, { "epoch": 0.8342854899189195, "grad_norm": 0.81640625, "learning_rate": 1.6831482069184388e-05, "loss": 1.2849, "step": 2656 }, { "epoch": 0.8349137169444608, "grad_norm": 0.734375, "learning_rate": 1.682894319263726e-05, "loss": 1.4921, "step": 2658 }, { "epoch": 0.8355419439700021, "grad_norm": 0.94921875, "learning_rate": 1.6826404316090134e-05, "loss": 1.2774, "step": 2660 }, { "epoch": 0.8361701709955435, "grad_norm": 0.78125, "learning_rate": 1.6823865439543002e-05, "loss": 1.3282, "step": 2662 }, { "epoch": 0.8367983980210849, "grad_norm": 0.75, "learning_rate": 1.6821326562995877e-05, "loss": 1.2604, "step": 2664 }, { "epoch": 0.8374266250466262, "grad_norm": 0.75390625, "learning_rate": 1.6818787686448748e-05, "loss": 1.2322, "step": 2666 }, { "epoch": 0.8380548520721676, "grad_norm": 0.75, "learning_rate": 1.681624880990162e-05, "loss": 1.3847, "step": 2668 }, { "epoch": 0.8386830790977089, "grad_norm": 1.0, "learning_rate": 1.681370993335449e-05, "loss": 1.2521, "step": 2670 }, { "epoch": 0.8393113061232503, "grad_norm": 0.73046875, "learning_rate": 1.6811171056807366e-05, "loss": 1.4187, "step": 2672 }, { "epoch": 0.8399395331487917, "grad_norm": 0.7109375, "learning_rate": 1.6808632180260237e-05, "loss": 1.195, "step": 2674 }, { "epoch": 0.840567760174333, "grad_norm": 1.015625, "learning_rate": 1.680609330371311e-05, "loss": 1.3454, "step": 2676 }, { "epoch": 0.8411959871998743, "grad_norm": 0.78515625, "learning_rate": 1.680355442716598e-05, "loss": 1.453, "step": 2678 }, { "epoch": 0.8418242142254158, "grad_norm": 0.68359375, "learning_rate": 1.6801015550618855e-05, "loss": 1.4218, "step": 2680 }, { "epoch": 0.8424524412509571, "grad_norm": 0.85546875, "learning_rate": 1.6798476674071723e-05, "loss": 1.4194, "step": 2682 }, { "epoch": 0.8430806682764984, "grad_norm": 0.80859375, "learning_rate": 1.6795937797524597e-05, "loss": 1.3225, "step": 2684 }, { "epoch": 0.8437088953020397, "grad_norm": 0.7578125, "learning_rate": 1.679339892097747e-05, "loss": 1.205, "step": 2686 }, { "epoch": 0.8443371223275812, "grad_norm": 1.046875, "learning_rate": 1.679086004443034e-05, "loss": 1.425, "step": 2688 }, { "epoch": 0.8449653493531225, "grad_norm": 0.6875, "learning_rate": 1.678832116788321e-05, "loss": 1.3743, "step": 2690 }, { "epoch": 0.8455935763786638, "grad_norm": 0.83203125, "learning_rate": 1.6785782291336086e-05, "loss": 1.2462, "step": 2692 }, { "epoch": 0.8462218034042052, "grad_norm": 0.671875, "learning_rate": 1.6783243414788958e-05, "loss": 1.3989, "step": 2694 }, { "epoch": 0.8468500304297466, "grad_norm": 0.76953125, "learning_rate": 1.678070453824183e-05, "loss": 1.4101, "step": 2696 }, { "epoch": 0.8474782574552879, "grad_norm": 0.71484375, "learning_rate": 1.67781656616947e-05, "loss": 1.2639, "step": 2698 }, { "epoch": 0.8481064844808293, "grad_norm": 0.79296875, "learning_rate": 1.6775626785147575e-05, "loss": 1.3388, "step": 2700 }, { "epoch": 0.8487347115063706, "grad_norm": 0.78515625, "learning_rate": 1.6773087908600447e-05, "loss": 1.363, "step": 2702 }, { "epoch": 0.8493629385319119, "grad_norm": 0.828125, "learning_rate": 1.6770549032053318e-05, "loss": 1.2831, "step": 2704 }, { "epoch": 0.8499911655574534, "grad_norm": 0.7109375, "learning_rate": 1.676801015550619e-05, "loss": 1.2638, "step": 2706 }, { "epoch": 0.8506193925829947, "grad_norm": 0.6875, "learning_rate": 1.676547127895906e-05, "loss": 1.3733, "step": 2708 }, { "epoch": 0.851247619608536, "grad_norm": 0.6796875, "learning_rate": 1.6762932402411932e-05, "loss": 1.3726, "step": 2710 }, { "epoch": 0.8518758466340773, "grad_norm": 0.73828125, "learning_rate": 1.6760393525864807e-05, "loss": 1.3406, "step": 2712 }, { "epoch": 0.8525040736596188, "grad_norm": 0.69921875, "learning_rate": 1.6757854649317678e-05, "loss": 1.4331, "step": 2714 }, { "epoch": 0.8531323006851601, "grad_norm": 0.7890625, "learning_rate": 1.675531577277055e-05, "loss": 1.302, "step": 2716 }, { "epoch": 0.8537605277107014, "grad_norm": 0.79296875, "learning_rate": 1.675277689622342e-05, "loss": 1.3428, "step": 2718 }, { "epoch": 0.8543887547362428, "grad_norm": 0.765625, "learning_rate": 1.6750238019676296e-05, "loss": 1.2827, "step": 2720 }, { "epoch": 0.8550169817617842, "grad_norm": 0.67578125, "learning_rate": 1.6747699143129167e-05, "loss": 1.2744, "step": 2722 }, { "epoch": 0.8556452087873255, "grad_norm": 0.75, "learning_rate": 1.674516026658204e-05, "loss": 1.2999, "step": 2724 }, { "epoch": 0.8562734358128669, "grad_norm": 0.9765625, "learning_rate": 1.674262139003491e-05, "loss": 1.2288, "step": 2726 }, { "epoch": 0.8569016628384082, "grad_norm": 0.7109375, "learning_rate": 1.6740082513487785e-05, "loss": 1.3101, "step": 2728 }, { "epoch": 0.8575298898639496, "grad_norm": 0.71484375, "learning_rate": 1.6737543636940653e-05, "loss": 1.309, "step": 2730 }, { "epoch": 0.858158116889491, "grad_norm": 0.69921875, "learning_rate": 1.6735004760393527e-05, "loss": 1.3683, "step": 2732 }, { "epoch": 0.8587863439150323, "grad_norm": 1.015625, "learning_rate": 1.67324658838464e-05, "loss": 1.2708, "step": 2734 }, { "epoch": 0.8594145709405736, "grad_norm": 0.7578125, "learning_rate": 1.672992700729927e-05, "loss": 1.5443, "step": 2736 }, { "epoch": 0.860042797966115, "grad_norm": 0.73046875, "learning_rate": 1.6727388130752145e-05, "loss": 1.3305, "step": 2738 }, { "epoch": 0.8606710249916564, "grad_norm": 0.86328125, "learning_rate": 1.6724849254205016e-05, "loss": 1.3512, "step": 2740 }, { "epoch": 0.8612992520171977, "grad_norm": 0.73828125, "learning_rate": 1.6722310377657888e-05, "loss": 1.3854, "step": 2742 }, { "epoch": 0.861927479042739, "grad_norm": 0.75390625, "learning_rate": 1.671977150111076e-05, "loss": 1.2901, "step": 2744 }, { "epoch": 0.8625557060682805, "grad_norm": 0.68359375, "learning_rate": 1.6717232624563634e-05, "loss": 1.3502, "step": 2746 }, { "epoch": 0.8631839330938218, "grad_norm": 0.7578125, "learning_rate": 1.6714693748016505e-05, "loss": 1.1293, "step": 2748 }, { "epoch": 0.8638121601193631, "grad_norm": 0.74609375, "learning_rate": 1.6712154871469377e-05, "loss": 1.3325, "step": 2750 }, { "epoch": 0.8644403871449045, "grad_norm": 0.7890625, "learning_rate": 1.6709615994922248e-05, "loss": 1.4138, "step": 2752 }, { "epoch": 0.8650686141704459, "grad_norm": 0.69140625, "learning_rate": 1.6707077118375123e-05, "loss": 1.2818, "step": 2754 }, { "epoch": 0.8656968411959872, "grad_norm": 0.73046875, "learning_rate": 1.670453824182799e-05, "loss": 1.2926, "step": 2756 }, { "epoch": 0.8663250682215285, "grad_norm": 0.6953125, "learning_rate": 1.6701999365280866e-05, "loss": 1.3686, "step": 2758 }, { "epoch": 0.8669532952470699, "grad_norm": 0.8359375, "learning_rate": 1.6699460488733737e-05, "loss": 1.2924, "step": 2760 }, { "epoch": 0.8675815222726113, "grad_norm": 0.78515625, "learning_rate": 1.669692161218661e-05, "loss": 1.4022, "step": 2762 }, { "epoch": 0.8682097492981526, "grad_norm": 0.8359375, "learning_rate": 1.669438273563948e-05, "loss": 1.429, "step": 2764 }, { "epoch": 0.868837976323694, "grad_norm": 0.7890625, "learning_rate": 1.6691843859092355e-05, "loss": 1.2911, "step": 2766 }, { "epoch": 0.8694662033492353, "grad_norm": 0.73046875, "learning_rate": 1.6689304982545226e-05, "loss": 1.4, "step": 2768 }, { "epoch": 0.8700944303747767, "grad_norm": 0.88671875, "learning_rate": 1.6686766105998097e-05, "loss": 1.3409, "step": 2770 }, { "epoch": 0.8707226574003181, "grad_norm": 1.0390625, "learning_rate": 1.668422722945097e-05, "loss": 1.2781, "step": 2772 }, { "epoch": 0.8713508844258594, "grad_norm": 0.8359375, "learning_rate": 1.6681688352903843e-05, "loss": 1.3083, "step": 2774 }, { "epoch": 0.8719791114514007, "grad_norm": 0.73046875, "learning_rate": 1.667914947635671e-05, "loss": 1.2491, "step": 2776 }, { "epoch": 0.872607338476942, "grad_norm": 0.67578125, "learning_rate": 1.6676610599809586e-05, "loss": 1.3156, "step": 2778 }, { "epoch": 0.8732355655024835, "grad_norm": 0.8515625, "learning_rate": 1.6674071723262458e-05, "loss": 1.2403, "step": 2780 }, { "epoch": 0.8738637925280248, "grad_norm": 0.74609375, "learning_rate": 1.667153284671533e-05, "loss": 1.4226, "step": 2782 }, { "epoch": 0.8744920195535661, "grad_norm": 0.84765625, "learning_rate": 1.66689939701682e-05, "loss": 1.2981, "step": 2784 }, { "epoch": 0.8751202465791075, "grad_norm": 0.6953125, "learning_rate": 1.6666455093621075e-05, "loss": 1.256, "step": 2786 }, { "epoch": 0.8757484736046489, "grad_norm": 0.734375, "learning_rate": 1.6663916217073946e-05, "loss": 1.255, "step": 2788 }, { "epoch": 0.8763767006301902, "grad_norm": 0.7265625, "learning_rate": 1.6661377340526818e-05, "loss": 1.2185, "step": 2790 }, { "epoch": 0.8770049276557316, "grad_norm": 0.6640625, "learning_rate": 1.665883846397969e-05, "loss": 1.4315, "step": 2792 }, { "epoch": 0.8776331546812729, "grad_norm": 0.703125, "learning_rate": 1.6656299587432564e-05, "loss": 1.4531, "step": 2794 }, { "epoch": 0.8782613817068143, "grad_norm": 0.8828125, "learning_rate": 1.6653760710885435e-05, "loss": 1.2937, "step": 2796 }, { "epoch": 0.8788896087323557, "grad_norm": 0.9375, "learning_rate": 1.6651221834338307e-05, "loss": 1.2382, "step": 2798 }, { "epoch": 0.879517835757897, "grad_norm": 0.8515625, "learning_rate": 1.6648682957791178e-05, "loss": 1.2398, "step": 2800 }, { "epoch": 0.8801460627834383, "grad_norm": 0.7890625, "learning_rate": 1.664614408124405e-05, "loss": 1.3117, "step": 2802 }, { "epoch": 0.8807742898089798, "grad_norm": 0.88671875, "learning_rate": 1.664360520469692e-05, "loss": 1.35, "step": 2804 }, { "epoch": 0.8814025168345211, "grad_norm": 0.8125, "learning_rate": 1.6641066328149796e-05, "loss": 1.4186, "step": 2806 }, { "epoch": 0.8820307438600624, "grad_norm": 0.67578125, "learning_rate": 1.6638527451602667e-05, "loss": 1.2733, "step": 2808 }, { "epoch": 0.8826589708856037, "grad_norm": 0.734375, "learning_rate": 1.663598857505554e-05, "loss": 1.312, "step": 2810 }, { "epoch": 0.8832871979111452, "grad_norm": 0.76171875, "learning_rate": 1.663344969850841e-05, "loss": 1.2711, "step": 2812 }, { "epoch": 0.8839154249366865, "grad_norm": 0.7265625, "learning_rate": 1.6630910821961285e-05, "loss": 1.3649, "step": 2814 }, { "epoch": 0.8845436519622278, "grad_norm": 0.734375, "learning_rate": 1.6628371945414156e-05, "loss": 1.5247, "step": 2816 }, { "epoch": 0.8851718789877692, "grad_norm": 0.76171875, "learning_rate": 1.6625833068867027e-05, "loss": 1.2794, "step": 2818 }, { "epoch": 0.8858001060133106, "grad_norm": 0.68359375, "learning_rate": 1.66232941923199e-05, "loss": 1.2475, "step": 2820 }, { "epoch": 0.8864283330388519, "grad_norm": 0.8359375, "learning_rate": 1.6620755315772774e-05, "loss": 1.1927, "step": 2822 }, { "epoch": 0.8870565600643933, "grad_norm": 0.8828125, "learning_rate": 1.6618216439225645e-05, "loss": 1.2817, "step": 2824 }, { "epoch": 0.8876847870899346, "grad_norm": 0.72265625, "learning_rate": 1.6615677562678516e-05, "loss": 1.3958, "step": 2826 }, { "epoch": 0.888313014115476, "grad_norm": 0.69140625, "learning_rate": 1.6613138686131388e-05, "loss": 1.4729, "step": 2828 }, { "epoch": 0.8889412411410174, "grad_norm": 0.87109375, "learning_rate": 1.661059980958426e-05, "loss": 1.2705, "step": 2830 }, { "epoch": 0.8895694681665587, "grad_norm": 0.73828125, "learning_rate": 1.6608060933037134e-05, "loss": 1.4104, "step": 2832 }, { "epoch": 0.8901976951921, "grad_norm": 0.78125, "learning_rate": 1.6605522056490005e-05, "loss": 1.3625, "step": 2834 }, { "epoch": 0.8908259222176415, "grad_norm": 0.8046875, "learning_rate": 1.6602983179942877e-05, "loss": 1.3754, "step": 2836 }, { "epoch": 0.8914541492431828, "grad_norm": 0.7578125, "learning_rate": 1.6600444303395748e-05, "loss": 1.2763, "step": 2838 }, { "epoch": 0.8920823762687241, "grad_norm": 0.6953125, "learning_rate": 1.6597905426848623e-05, "loss": 1.1745, "step": 2840 }, { "epoch": 0.8927106032942654, "grad_norm": 0.75390625, "learning_rate": 1.6595366550301494e-05, "loss": 1.2782, "step": 2842 }, { "epoch": 0.8933388303198068, "grad_norm": 0.796875, "learning_rate": 1.6592827673754366e-05, "loss": 1.3032, "step": 2844 }, { "epoch": 0.8939670573453482, "grad_norm": 0.7265625, "learning_rate": 1.6590288797207237e-05, "loss": 1.4176, "step": 2846 }, { "epoch": 0.8945952843708895, "grad_norm": 0.71875, "learning_rate": 1.658774992066011e-05, "loss": 1.3804, "step": 2848 }, { "epoch": 0.8952235113964309, "grad_norm": 0.6796875, "learning_rate": 1.658521104411298e-05, "loss": 1.3173, "step": 2850 }, { "epoch": 0.8958517384219722, "grad_norm": 0.86328125, "learning_rate": 1.6582672167565854e-05, "loss": 1.1673, "step": 2852 }, { "epoch": 0.8964799654475136, "grad_norm": 0.69921875, "learning_rate": 1.6580133291018726e-05, "loss": 1.3201, "step": 2854 }, { "epoch": 0.897108192473055, "grad_norm": 0.6953125, "learning_rate": 1.6577594414471597e-05, "loss": 1.3234, "step": 2856 }, { "epoch": 0.8977364194985963, "grad_norm": 2.359375, "learning_rate": 1.657505553792447e-05, "loss": 1.4672, "step": 2858 }, { "epoch": 0.8983646465241376, "grad_norm": 0.78515625, "learning_rate": 1.6572516661377343e-05, "loss": 1.3377, "step": 2860 }, { "epoch": 0.898992873549679, "grad_norm": 0.71484375, "learning_rate": 1.6569977784830215e-05, "loss": 1.2545, "step": 2862 }, { "epoch": 0.8996211005752204, "grad_norm": 0.8984375, "learning_rate": 1.6567438908283086e-05, "loss": 1.2684, "step": 2864 }, { "epoch": 0.9002493276007617, "grad_norm": 0.7421875, "learning_rate": 1.6564900031735957e-05, "loss": 1.2329, "step": 2866 }, { "epoch": 0.900877554626303, "grad_norm": 0.6796875, "learning_rate": 1.6562361155188832e-05, "loss": 1.4101, "step": 2868 }, { "epoch": 0.9015057816518445, "grad_norm": 0.7578125, "learning_rate": 1.65598222786417e-05, "loss": 1.2965, "step": 2870 }, { "epoch": 0.9021340086773858, "grad_norm": 0.90234375, "learning_rate": 1.6557283402094575e-05, "loss": 1.331, "step": 2872 }, { "epoch": 0.9027622357029271, "grad_norm": 0.765625, "learning_rate": 1.6554744525547446e-05, "loss": 1.4061, "step": 2874 }, { "epoch": 0.9033904627284685, "grad_norm": 0.76953125, "learning_rate": 1.6552205649000318e-05, "loss": 1.3482, "step": 2876 }, { "epoch": 0.9040186897540099, "grad_norm": 0.68359375, "learning_rate": 1.654966677245319e-05, "loss": 1.3822, "step": 2878 }, { "epoch": 0.9046469167795512, "grad_norm": 0.79296875, "learning_rate": 1.6547127895906064e-05, "loss": 1.2013, "step": 2880 }, { "epoch": 0.9052751438050926, "grad_norm": 0.75390625, "learning_rate": 1.6544589019358935e-05, "loss": 1.2415, "step": 2882 }, { "epoch": 0.9059033708306339, "grad_norm": 0.8984375, "learning_rate": 1.6542050142811807e-05, "loss": 1.3142, "step": 2884 }, { "epoch": 0.9065315978561753, "grad_norm": 0.7734375, "learning_rate": 1.6539511266264678e-05, "loss": 1.3292, "step": 2886 }, { "epoch": 0.9071598248817166, "grad_norm": 0.7421875, "learning_rate": 1.6536972389717553e-05, "loss": 1.3243, "step": 2888 }, { "epoch": 0.907788051907258, "grad_norm": 0.75, "learning_rate": 1.6534433513170424e-05, "loss": 1.2548, "step": 2890 }, { "epoch": 0.9084162789327993, "grad_norm": 0.78515625, "learning_rate": 1.6531894636623296e-05, "loss": 1.3526, "step": 2892 }, { "epoch": 0.9090445059583407, "grad_norm": 0.7890625, "learning_rate": 1.6529355760076167e-05, "loss": 1.3198, "step": 2894 }, { "epoch": 0.9096727329838821, "grad_norm": 0.6875, "learning_rate": 1.652681688352904e-05, "loss": 1.0987, "step": 2896 }, { "epoch": 0.9103009600094234, "grad_norm": 0.7890625, "learning_rate": 1.652427800698191e-05, "loss": 1.2387, "step": 2898 }, { "epoch": 0.9109291870349647, "grad_norm": 0.71484375, "learning_rate": 1.6521739130434785e-05, "loss": 1.1774, "step": 2900 }, { "epoch": 0.9115574140605062, "grad_norm": 0.78515625, "learning_rate": 1.6519200253887656e-05, "loss": 1.2341, "step": 2902 }, { "epoch": 0.9121856410860475, "grad_norm": 0.796875, "learning_rate": 1.6516661377340527e-05, "loss": 1.2046, "step": 2904 }, { "epoch": 0.9128138681115888, "grad_norm": 0.7890625, "learning_rate": 1.65141225007934e-05, "loss": 1.477, "step": 2906 }, { "epoch": 0.9134420951371302, "grad_norm": 0.7109375, "learning_rate": 1.6511583624246273e-05, "loss": 1.4045, "step": 2908 }, { "epoch": 0.9140703221626716, "grad_norm": 0.7734375, "learning_rate": 1.6509044747699145e-05, "loss": 1.2798, "step": 2910 }, { "epoch": 0.9146985491882129, "grad_norm": 0.703125, "learning_rate": 1.6506505871152016e-05, "loss": 1.3729, "step": 2912 }, { "epoch": 0.9153267762137542, "grad_norm": 0.84375, "learning_rate": 1.650396699460489e-05, "loss": 1.3434, "step": 2914 }, { "epoch": 0.9159550032392956, "grad_norm": 0.73046875, "learning_rate": 1.650142811805776e-05, "loss": 1.3866, "step": 2916 }, { "epoch": 0.9165832302648369, "grad_norm": 0.71875, "learning_rate": 1.6498889241510634e-05, "loss": 1.2233, "step": 2918 }, { "epoch": 0.9172114572903783, "grad_norm": 0.95703125, "learning_rate": 1.6496350364963505e-05, "loss": 1.2631, "step": 2920 }, { "epoch": 0.9178396843159197, "grad_norm": 0.6640625, "learning_rate": 1.6493811488416377e-05, "loss": 1.3768, "step": 2922 }, { "epoch": 0.918467911341461, "grad_norm": 0.84375, "learning_rate": 1.6491272611869248e-05, "loss": 1.2722, "step": 2924 }, { "epoch": 0.9190961383670023, "grad_norm": 0.83203125, "learning_rate": 1.6488733735322123e-05, "loss": 1.2799, "step": 2926 }, { "epoch": 0.9197243653925438, "grad_norm": 0.859375, "learning_rate": 1.6486194858774994e-05, "loss": 1.2571, "step": 2928 }, { "epoch": 0.9203525924180851, "grad_norm": 0.71875, "learning_rate": 1.6483655982227865e-05, "loss": 1.2148, "step": 2930 }, { "epoch": 0.9209808194436264, "grad_norm": 0.74609375, "learning_rate": 1.6481117105680737e-05, "loss": 1.3129, "step": 2932 }, { "epoch": 0.9216090464691677, "grad_norm": 0.71484375, "learning_rate": 1.647857822913361e-05, "loss": 1.2683, "step": 2934 }, { "epoch": 0.9222372734947092, "grad_norm": 0.703125, "learning_rate": 1.6476039352586483e-05, "loss": 1.356, "step": 2936 }, { "epoch": 0.9228655005202505, "grad_norm": 0.74609375, "learning_rate": 1.6473500476039354e-05, "loss": 1.2901, "step": 2938 }, { "epoch": 0.9234937275457918, "grad_norm": 0.68359375, "learning_rate": 1.6470961599492226e-05, "loss": 1.4158, "step": 2940 }, { "epoch": 0.9241219545713332, "grad_norm": 0.796875, "learning_rate": 1.6468422722945097e-05, "loss": 1.2391, "step": 2942 }, { "epoch": 0.9247501815968746, "grad_norm": 0.74609375, "learning_rate": 1.646588384639797e-05, "loss": 1.3964, "step": 2944 }, { "epoch": 0.9253784086224159, "grad_norm": 0.71875, "learning_rate": 1.6463344969850843e-05, "loss": 1.3187, "step": 2946 }, { "epoch": 0.9260066356479573, "grad_norm": 0.6640625, "learning_rate": 1.6460806093303715e-05, "loss": 1.3794, "step": 2948 }, { "epoch": 0.9266348626734986, "grad_norm": 0.69140625, "learning_rate": 1.6458267216756586e-05, "loss": 1.3897, "step": 2950 }, { "epoch": 0.92726308969904, "grad_norm": 0.73046875, "learning_rate": 1.6455728340209457e-05, "loss": 1.2514, "step": 2952 }, { "epoch": 0.9278913167245814, "grad_norm": 0.7265625, "learning_rate": 1.6453189463662332e-05, "loss": 1.2275, "step": 2954 }, { "epoch": 0.9285195437501227, "grad_norm": 0.8828125, "learning_rate": 1.6450650587115204e-05, "loss": 1.3661, "step": 2956 }, { "epoch": 0.929147770775664, "grad_norm": 0.703125, "learning_rate": 1.6448111710568075e-05, "loss": 1.3095, "step": 2958 }, { "epoch": 0.9297759978012055, "grad_norm": 0.80859375, "learning_rate": 1.6445572834020946e-05, "loss": 1.4244, "step": 2960 }, { "epoch": 0.9304042248267468, "grad_norm": 0.69921875, "learning_rate": 1.644303395747382e-05, "loss": 1.3683, "step": 2962 }, { "epoch": 0.9310324518522881, "grad_norm": 0.71875, "learning_rate": 1.644049508092669e-05, "loss": 1.512, "step": 2964 }, { "epoch": 0.9316606788778294, "grad_norm": 0.80859375, "learning_rate": 1.6437956204379564e-05, "loss": 1.4732, "step": 2966 }, { "epoch": 0.9322889059033709, "grad_norm": 0.734375, "learning_rate": 1.6435417327832435e-05, "loss": 1.34, "step": 2968 }, { "epoch": 0.9329171329289122, "grad_norm": 0.77734375, "learning_rate": 1.6432878451285307e-05, "loss": 1.2436, "step": 2970 }, { "epoch": 0.9335453599544535, "grad_norm": 0.7421875, "learning_rate": 1.6430339574738178e-05, "loss": 1.3719, "step": 2972 }, { "epoch": 0.9341735869799949, "grad_norm": 0.79296875, "learning_rate": 1.6427800698191053e-05, "loss": 1.3081, "step": 2974 }, { "epoch": 0.9348018140055363, "grad_norm": 0.74609375, "learning_rate": 1.6425261821643924e-05, "loss": 1.3141, "step": 2976 }, { "epoch": 0.9354300410310776, "grad_norm": 0.73828125, "learning_rate": 1.6422722945096796e-05, "loss": 1.3385, "step": 2978 }, { "epoch": 0.936058268056619, "grad_norm": 0.73046875, "learning_rate": 1.6420184068549667e-05, "loss": 1.3452, "step": 2980 }, { "epoch": 0.9366864950821603, "grad_norm": 0.80078125, "learning_rate": 1.6417645192002542e-05, "loss": 1.2058, "step": 2982 }, { "epoch": 0.9373147221077017, "grad_norm": 0.75390625, "learning_rate": 1.641510631545541e-05, "loss": 1.3226, "step": 2984 }, { "epoch": 0.937942949133243, "grad_norm": 0.7109375, "learning_rate": 1.6412567438908284e-05, "loss": 1.362, "step": 2986 }, { "epoch": 0.9385711761587844, "grad_norm": 0.734375, "learning_rate": 1.6410028562361156e-05, "loss": 1.2904, "step": 2988 }, { "epoch": 0.9391994031843257, "grad_norm": 0.875, "learning_rate": 1.6407489685814027e-05, "loss": 1.3076, "step": 2990 }, { "epoch": 0.939827630209867, "grad_norm": 1.2890625, "learning_rate": 1.64049508092669e-05, "loss": 1.2634, "step": 2992 }, { "epoch": 0.9404558572354085, "grad_norm": 0.77734375, "learning_rate": 1.6402411932719773e-05, "loss": 1.3545, "step": 2994 }, { "epoch": 0.9410840842609498, "grad_norm": 0.73046875, "learning_rate": 1.6399873056172645e-05, "loss": 1.3319, "step": 2996 }, { "epoch": 0.9417123112864911, "grad_norm": 0.75, "learning_rate": 1.6397334179625516e-05, "loss": 1.3534, "step": 2998 }, { "epoch": 0.9423405383120325, "grad_norm": 0.68359375, "learning_rate": 1.639479530307839e-05, "loss": 1.2353, "step": 3000 }, { "epoch": 0.9429687653375739, "grad_norm": 0.68359375, "learning_rate": 1.6392256426531262e-05, "loss": 1.2108, "step": 3002 }, { "epoch": 0.9435969923631152, "grad_norm": 0.71875, "learning_rate": 1.6389717549984134e-05, "loss": 1.2961, "step": 3004 }, { "epoch": 0.9442252193886566, "grad_norm": 0.734375, "learning_rate": 1.6387178673437005e-05, "loss": 1.2746, "step": 3006 }, { "epoch": 0.9448534464141979, "grad_norm": 0.72265625, "learning_rate": 1.638463979688988e-05, "loss": 1.2231, "step": 3008 }, { "epoch": 0.9454816734397393, "grad_norm": 0.85546875, "learning_rate": 1.6382100920342748e-05, "loss": 1.4304, "step": 3010 }, { "epoch": 0.9461099004652807, "grad_norm": 0.87890625, "learning_rate": 1.6379562043795623e-05, "loss": 1.336, "step": 3012 }, { "epoch": 0.946738127490822, "grad_norm": 0.78125, "learning_rate": 1.6377023167248494e-05, "loss": 1.2532, "step": 3014 }, { "epoch": 0.9473663545163633, "grad_norm": 0.73046875, "learning_rate": 1.6374484290701365e-05, "loss": 1.3438, "step": 3016 }, { "epoch": 0.9479945815419047, "grad_norm": 0.765625, "learning_rate": 1.6371945414154237e-05, "loss": 1.3412, "step": 3018 }, { "epoch": 0.9486228085674461, "grad_norm": 0.7578125, "learning_rate": 1.636940653760711e-05, "loss": 1.2943, "step": 3020 }, { "epoch": 0.9492510355929874, "grad_norm": 0.71484375, "learning_rate": 1.6366867661059983e-05, "loss": 1.3679, "step": 3022 }, { "epoch": 0.9498792626185287, "grad_norm": 0.6953125, "learning_rate": 1.6364328784512854e-05, "loss": 1.2899, "step": 3024 }, { "epoch": 0.9505074896440702, "grad_norm": 0.87109375, "learning_rate": 1.6361789907965726e-05, "loss": 1.4329, "step": 3026 }, { "epoch": 0.9511357166696115, "grad_norm": 0.6875, "learning_rate": 1.63592510314186e-05, "loss": 1.2883, "step": 3028 }, { "epoch": 0.9517639436951528, "grad_norm": 0.66796875, "learning_rate": 1.6356712154871472e-05, "loss": 1.2225, "step": 3030 }, { "epoch": 0.9523921707206942, "grad_norm": 0.796875, "learning_rate": 1.6354173278324343e-05, "loss": 1.3128, "step": 3032 }, { "epoch": 0.9530203977462356, "grad_norm": 0.8828125, "learning_rate": 1.6351634401777215e-05, "loss": 1.3125, "step": 3034 }, { "epoch": 0.9536486247717769, "grad_norm": 0.7265625, "learning_rate": 1.6349095525230086e-05, "loss": 1.3294, "step": 3036 }, { "epoch": 0.9542768517973182, "grad_norm": 0.79296875, "learning_rate": 1.6346556648682957e-05, "loss": 1.2799, "step": 3038 }, { "epoch": 0.9549050788228596, "grad_norm": 0.703125, "learning_rate": 1.6344017772135832e-05, "loss": 1.3259, "step": 3040 }, { "epoch": 0.955533305848401, "grad_norm": 0.66796875, "learning_rate": 1.6341478895588704e-05, "loss": 1.2925, "step": 3042 }, { "epoch": 0.9561615328739423, "grad_norm": 0.81640625, "learning_rate": 1.6338940019041575e-05, "loss": 1.4347, "step": 3044 }, { "epoch": 0.9567897598994837, "grad_norm": 0.71875, "learning_rate": 1.6336401142494446e-05, "loss": 1.1746, "step": 3046 }, { "epoch": 0.957417986925025, "grad_norm": 0.8671875, "learning_rate": 1.633386226594732e-05, "loss": 1.3328, "step": 3048 }, { "epoch": 0.9580462139505664, "grad_norm": 0.6953125, "learning_rate": 1.6331323389400192e-05, "loss": 1.3283, "step": 3050 }, { "epoch": 0.9586744409761078, "grad_norm": 0.67578125, "learning_rate": 1.6328784512853064e-05, "loss": 1.4036, "step": 3052 }, { "epoch": 0.9593026680016491, "grad_norm": 0.83984375, "learning_rate": 1.6326245636305935e-05, "loss": 1.2255, "step": 3054 }, { "epoch": 0.9599308950271904, "grad_norm": 0.7265625, "learning_rate": 1.632370675975881e-05, "loss": 1.2382, "step": 3056 }, { "epoch": 0.9605591220527318, "grad_norm": 0.72265625, "learning_rate": 1.6321167883211678e-05, "loss": 1.311, "step": 3058 }, { "epoch": 0.9611873490782732, "grad_norm": 0.7109375, "learning_rate": 1.6318629006664553e-05, "loss": 1.2916, "step": 3060 }, { "epoch": 0.9618155761038145, "grad_norm": 0.734375, "learning_rate": 1.6316090130117424e-05, "loss": 1.2996, "step": 3062 }, { "epoch": 0.9624438031293558, "grad_norm": 0.6796875, "learning_rate": 1.6313551253570295e-05, "loss": 1.3253, "step": 3064 }, { "epoch": 0.9630720301548972, "grad_norm": 0.66796875, "learning_rate": 1.6311012377023167e-05, "loss": 1.3582, "step": 3066 }, { "epoch": 0.9637002571804386, "grad_norm": 0.671875, "learning_rate": 1.630847350047604e-05, "loss": 1.3637, "step": 3068 }, { "epoch": 0.9643284842059799, "grad_norm": 0.80859375, "learning_rate": 1.6305934623928913e-05, "loss": 1.2882, "step": 3070 }, { "epoch": 0.9649567112315213, "grad_norm": 1.1171875, "learning_rate": 1.6303395747381784e-05, "loss": 1.2281, "step": 3072 }, { "epoch": 0.9655849382570626, "grad_norm": 0.80078125, "learning_rate": 1.6300856870834656e-05, "loss": 1.3915, "step": 3074 }, { "epoch": 0.966213165282604, "grad_norm": 0.75, "learning_rate": 1.629831799428753e-05, "loss": 1.401, "step": 3076 }, { "epoch": 0.9668413923081454, "grad_norm": 0.73828125, "learning_rate": 1.62957791177404e-05, "loss": 1.2698, "step": 3078 }, { "epoch": 0.9674696193336867, "grad_norm": 0.75, "learning_rate": 1.6293240241193273e-05, "loss": 1.3833, "step": 3080 }, { "epoch": 0.968097846359228, "grad_norm": 0.84375, "learning_rate": 1.6290701364646148e-05, "loss": 1.2167, "step": 3082 }, { "epoch": 0.9687260733847695, "grad_norm": 0.6875, "learning_rate": 1.6288162488099016e-05, "loss": 1.3872, "step": 3084 }, { "epoch": 0.9693543004103108, "grad_norm": 0.65625, "learning_rate": 1.628562361155189e-05, "loss": 1.1942, "step": 3086 }, { "epoch": 0.9699825274358521, "grad_norm": 0.91015625, "learning_rate": 1.6283084735004762e-05, "loss": 1.3442, "step": 3088 }, { "epoch": 0.9706107544613934, "grad_norm": 0.68359375, "learning_rate": 1.6280545858457634e-05, "loss": 1.1895, "step": 3090 }, { "epoch": 0.9712389814869349, "grad_norm": 0.765625, "learning_rate": 1.6278006981910505e-05, "loss": 1.2817, "step": 3092 }, { "epoch": 0.9718672085124762, "grad_norm": 0.85546875, "learning_rate": 1.627546810536338e-05, "loss": 1.3469, "step": 3094 }, { "epoch": 0.9724954355380175, "grad_norm": 0.76171875, "learning_rate": 1.627292922881625e-05, "loss": 1.3717, "step": 3096 }, { "epoch": 0.9731236625635589, "grad_norm": 0.6953125, "learning_rate": 1.6270390352269123e-05, "loss": 1.3015, "step": 3098 }, { "epoch": 0.9737518895891003, "grad_norm": 0.703125, "learning_rate": 1.6267851475721994e-05, "loss": 1.4265, "step": 3100 }, { "epoch": 0.9743801166146416, "grad_norm": 0.80859375, "learning_rate": 1.626531259917487e-05, "loss": 1.2424, "step": 3102 }, { "epoch": 0.975008343640183, "grad_norm": 0.8125, "learning_rate": 1.6262773722627737e-05, "loss": 1.3252, "step": 3104 }, { "epoch": 0.9756365706657243, "grad_norm": 0.87890625, "learning_rate": 1.626023484608061e-05, "loss": 1.3464, "step": 3106 }, { "epoch": 0.9762647976912657, "grad_norm": 0.81640625, "learning_rate": 1.6257695969533483e-05, "loss": 1.3007, "step": 3108 }, { "epoch": 0.9768930247168071, "grad_norm": 0.90234375, "learning_rate": 1.6255157092986354e-05, "loss": 1.2549, "step": 3110 }, { "epoch": 0.9775212517423484, "grad_norm": 0.76171875, "learning_rate": 1.6252618216439226e-05, "loss": 1.3299, "step": 3112 }, { "epoch": 0.9781494787678897, "grad_norm": 0.77734375, "learning_rate": 1.62500793398921e-05, "loss": 1.3118, "step": 3114 }, { "epoch": 0.9787777057934312, "grad_norm": 0.765625, "learning_rate": 1.6247540463344972e-05, "loss": 1.3114, "step": 3116 }, { "epoch": 0.9794059328189725, "grad_norm": 0.66796875, "learning_rate": 1.6245001586797843e-05, "loss": 1.2729, "step": 3118 }, { "epoch": 0.9800341598445138, "grad_norm": 0.69921875, "learning_rate": 1.6242462710250715e-05, "loss": 1.3769, "step": 3120 }, { "epoch": 0.9806623868700551, "grad_norm": 0.65234375, "learning_rate": 1.623992383370359e-05, "loss": 1.2581, "step": 3122 }, { "epoch": 0.9812906138955966, "grad_norm": 0.73046875, "learning_rate": 1.623738495715646e-05, "loss": 1.3578, "step": 3124 }, { "epoch": 0.9819188409211379, "grad_norm": 0.67578125, "learning_rate": 1.6234846080609332e-05, "loss": 1.3055, "step": 3126 }, { "epoch": 0.9825470679466792, "grad_norm": 1.3125, "learning_rate": 1.6232307204062203e-05, "loss": 1.2103, "step": 3128 }, { "epoch": 0.9831752949722206, "grad_norm": 0.79296875, "learning_rate": 1.6229768327515075e-05, "loss": 1.3818, "step": 3130 }, { "epoch": 0.9838035219977619, "grad_norm": 0.703125, "learning_rate": 1.6227229450967946e-05, "loss": 1.3531, "step": 3132 }, { "epoch": 0.9844317490233033, "grad_norm": 0.73046875, "learning_rate": 1.622469057442082e-05, "loss": 1.2813, "step": 3134 }, { "epoch": 0.9850599760488447, "grad_norm": 0.7578125, "learning_rate": 1.6222151697873692e-05, "loss": 1.3331, "step": 3136 }, { "epoch": 0.985688203074386, "grad_norm": 0.72265625, "learning_rate": 1.6219612821326564e-05, "loss": 1.3681, "step": 3138 }, { "epoch": 0.9863164300999273, "grad_norm": 0.7421875, "learning_rate": 1.6217073944779435e-05, "loss": 1.3545, "step": 3140 }, { "epoch": 0.9869446571254687, "grad_norm": 0.703125, "learning_rate": 1.621453506823231e-05, "loss": 1.392, "step": 3142 }, { "epoch": 0.9875728841510101, "grad_norm": 0.79296875, "learning_rate": 1.621199619168518e-05, "loss": 1.3232, "step": 3144 }, { "epoch": 0.9882011111765514, "grad_norm": 0.7578125, "learning_rate": 1.6209457315138053e-05, "loss": 1.2144, "step": 3146 }, { "epoch": 0.9888293382020927, "grad_norm": 0.6796875, "learning_rate": 1.6206918438590924e-05, "loss": 1.3129, "step": 3148 }, { "epoch": 0.9894575652276342, "grad_norm": 0.74609375, "learning_rate": 1.62043795620438e-05, "loss": 1.3398, "step": 3150 }, { "epoch": 0.9900857922531755, "grad_norm": 0.79296875, "learning_rate": 1.6201840685496667e-05, "loss": 1.3094, "step": 3152 }, { "epoch": 0.9907140192787168, "grad_norm": 0.66796875, "learning_rate": 1.619930180894954e-05, "loss": 1.436, "step": 3154 }, { "epoch": 0.9913422463042582, "grad_norm": 0.828125, "learning_rate": 1.6196762932402413e-05, "loss": 1.4225, "step": 3156 }, { "epoch": 0.9919704733297996, "grad_norm": 0.76953125, "learning_rate": 1.6194224055855284e-05, "loss": 1.2521, "step": 3158 }, { "epoch": 0.9925987003553409, "grad_norm": 0.7265625, "learning_rate": 1.6191685179308156e-05, "loss": 1.2926, "step": 3160 }, { "epoch": 0.9932269273808823, "grad_norm": 0.66796875, "learning_rate": 1.618914630276103e-05, "loss": 1.222, "step": 3162 }, { "epoch": 0.9938551544064236, "grad_norm": 0.87890625, "learning_rate": 1.6186607426213902e-05, "loss": 1.3083, "step": 3164 }, { "epoch": 0.994483381431965, "grad_norm": 0.66015625, "learning_rate": 1.6184068549666773e-05, "loss": 1.3349, "step": 3166 }, { "epoch": 0.9951116084575063, "grad_norm": 0.66015625, "learning_rate": 1.6181529673119648e-05, "loss": 1.306, "step": 3168 }, { "epoch": 0.9957398354830477, "grad_norm": 0.796875, "learning_rate": 1.617899079657252e-05, "loss": 1.2053, "step": 3170 }, { "epoch": 0.996368062508589, "grad_norm": 0.91015625, "learning_rate": 1.617645192002539e-05, "loss": 1.2241, "step": 3172 }, { "epoch": 0.9969962895341304, "grad_norm": 0.78125, "learning_rate": 1.6173913043478262e-05, "loss": 1.1861, "step": 3174 }, { "epoch": 0.9976245165596718, "grad_norm": 0.6953125, "learning_rate": 1.6171374166931137e-05, "loss": 1.3181, "step": 3176 }, { "epoch": 0.9982527435852131, "grad_norm": 0.7265625, "learning_rate": 1.6168835290384005e-05, "loss": 1.2654, "step": 3178 }, { "epoch": 0.9988809706107544, "grad_norm": 0.77734375, "learning_rate": 1.616629641383688e-05, "loss": 1.435, "step": 3180 }, { "epoch": 0.9995091976362959, "grad_norm": 0.75, "learning_rate": 1.616375753728975e-05, "loss": 1.4596, "step": 3182 }, { "epoch": 1.000137424661837, "grad_norm": 0.68359375, "learning_rate": 1.6161218660742622e-05, "loss": 1.3658, "step": 3184 }, { "epoch": 1.0007656516873786, "grad_norm": 0.67578125, "learning_rate": 1.6158679784195494e-05, "loss": 1.278, "step": 3186 }, { "epoch": 1.00139387871292, "grad_norm": 0.6640625, "learning_rate": 1.615614090764837e-05, "loss": 1.3165, "step": 3188 }, { "epoch": 1.0020221057384613, "grad_norm": 0.68359375, "learning_rate": 1.615360203110124e-05, "loss": 1.292, "step": 3190 }, { "epoch": 1.0026503327640026, "grad_norm": 0.7109375, "learning_rate": 1.615106315455411e-05, "loss": 1.2257, "step": 3192 }, { "epoch": 1.003278559789544, "grad_norm": 0.78125, "learning_rate": 1.6148524278006983e-05, "loss": 1.256, "step": 3194 }, { "epoch": 1.0039067868150853, "grad_norm": 0.83984375, "learning_rate": 1.6145985401459858e-05, "loss": 1.2126, "step": 3196 }, { "epoch": 1.0045350138406266, "grad_norm": 0.75390625, "learning_rate": 1.6143446524912726e-05, "loss": 1.2712, "step": 3198 }, { "epoch": 1.005163240866168, "grad_norm": 0.76953125, "learning_rate": 1.61409076483656e-05, "loss": 1.1845, "step": 3200 } ], "logging_steps": 2, "max_steps": 15915, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.666866260954317e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }