{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 768801, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9967481832099594e-05, "loss": 6.7212, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.993496366419919e-05, "loss": 6.2046, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9902445496298784e-05, "loss": 6.1099, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.986992732839838e-05, "loss": 6.0605, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9837409160497974e-05, "loss": 6.0447, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.980489099259757e-05, "loss": 6.0184, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9772372824697164e-05, "loss": 5.9684, "step": 3500 }, { "epoch": 0.02, "learning_rate": 4.9739854656796756e-05, "loss": 5.9596, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.970733648889635e-05, "loss": 5.9604, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.967481832099594e-05, "loss": 5.9346, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.964230015309554e-05, "loss": 5.9298, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.960978198519513e-05, "loss": 5.9189, "step": 6000 }, { "epoch": 0.03, "learning_rate": 4.957726381729472e-05, "loss": 5.8765, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.954474564939432e-05, "loss": 5.8812, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.951222748149391e-05, "loss": 5.8845, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.947970931359351e-05, "loss": 5.867, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.94471911456931e-05, "loss": 5.8744, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.94146729777927e-05, "loss": 5.8506, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.938215480989229e-05, "loss": 5.8343, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.934963664199188e-05, "loss": 5.829, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.931711847409148e-05, "loss": 5.8303, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.928460030619107e-05, "loss": 5.8147, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.9252082138290664e-05, "loss": 5.8124, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.9219563970390256e-05, "loss": 5.8069, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.9187045802489855e-05, "loss": 5.8026, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.9154527634589446e-05, "loss": 5.8412, "step": 13000 }, { "epoch": 0.05, "learning_rate": 4.912200946668904e-05, "loss": 5.8021, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.9089491298788636e-05, "loss": 5.7984, "step": 14000 }, { "epoch": 0.06, "learning_rate": 4.905697313088823e-05, "loss": 5.7936, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.9024454962987826e-05, "loss": 5.7868, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.899193679508742e-05, "loss": 5.7637, "step": 15500 }, { "epoch": 0.06, "learning_rate": 4.895941862718701e-05, "loss": 5.7895, "step": 16000 }, { "epoch": 0.06, "learning_rate": 4.892690045928661e-05, "loss": 5.7957, "step": 16500 }, { "epoch": 0.07, "learning_rate": 4.88943822913862e-05, "loss": 5.7764, "step": 17000 }, { "epoch": 0.07, "learning_rate": 4.88618641234858e-05, "loss": 5.7723, "step": 17500 }, { "epoch": 0.07, "learning_rate": 4.882934595558539e-05, "loss": 5.763, "step": 18000 }, { "epoch": 0.07, "learning_rate": 4.879682778768498e-05, "loss": 5.7695, "step": 18500 }, { "epoch": 0.07, "learning_rate": 4.876430961978457e-05, "loss": 5.768, "step": 19000 }, { "epoch": 0.08, "learning_rate": 4.8731791451884165e-05, "loss": 5.7776, "step": 19500 }, { "epoch": 0.08, "learning_rate": 4.869927328398376e-05, "loss": 5.7606, "step": 20000 }, { "epoch": 0.08, "learning_rate": 4.8666755116083355e-05, "loss": 5.7308, "step": 20500 }, { "epoch": 0.08, "learning_rate": 4.863423694818295e-05, "loss": 5.7412, "step": 21000 }, { "epoch": 0.08, "learning_rate": 4.8601718780282545e-05, "loss": 5.7627, "step": 21500 }, { "epoch": 0.09, "learning_rate": 4.8569200612382136e-05, "loss": 5.7449, "step": 22000 }, { "epoch": 0.09, "learning_rate": 4.8536682444481735e-05, "loss": 5.7572, "step": 22500 }, { "epoch": 0.09, "learning_rate": 4.8504164276581326e-05, "loss": 5.7505, "step": 23000 }, { "epoch": 0.09, "learning_rate": 4.8471646108680925e-05, "loss": 5.7188, "step": 23500 }, { "epoch": 0.09, "learning_rate": 4.8439127940780517e-05, "loss": 5.7193, "step": 24000 }, { "epoch": 0.1, "learning_rate": 4.8406609772880115e-05, "loss": 5.7303, "step": 24500 }, { "epoch": 0.1, "learning_rate": 4.8374091604979707e-05, "loss": 5.7279, "step": 25000 }, { "epoch": 0.1, "learning_rate": 4.83415734370793e-05, "loss": 5.716, "step": 25500 }, { "epoch": 0.1, "learning_rate": 4.83090552691789e-05, "loss": 5.7237, "step": 26000 }, { "epoch": 0.1, "learning_rate": 4.827653710127849e-05, "loss": 5.7276, "step": 26500 }, { "epoch": 0.11, "learning_rate": 4.824401893337808e-05, "loss": 5.7271, "step": 27000 }, { "epoch": 0.11, "learning_rate": 4.821150076547767e-05, "loss": 5.7384, "step": 27500 }, { "epoch": 0.11, "learning_rate": 4.817898259757727e-05, "loss": 5.6949, "step": 28000 }, { "epoch": 0.11, "learning_rate": 4.814646442967686e-05, "loss": 5.7068, "step": 28500 }, { "epoch": 0.11, "learning_rate": 4.811394626177645e-05, "loss": 5.7147, "step": 29000 }, { "epoch": 0.12, "learning_rate": 4.808142809387605e-05, "loss": 5.7132, "step": 29500 }, { "epoch": 0.12, "learning_rate": 4.804890992597564e-05, "loss": 5.7256, "step": 30000 }, { "epoch": 0.12, "learning_rate": 4.801639175807524e-05, "loss": 5.7195, "step": 30500 }, { "epoch": 0.12, "learning_rate": 4.7983873590174833e-05, "loss": 5.7022, "step": 31000 }, { "epoch": 0.12, "learning_rate": 4.7951355422274425e-05, "loss": 5.6978, "step": 31500 }, { "epoch": 0.12, "learning_rate": 4.7918837254374024e-05, "loss": 5.674, "step": 32000 }, { "epoch": 0.13, "learning_rate": 4.7886319086473615e-05, "loss": 5.7044, "step": 32500 }, { "epoch": 0.13, "learning_rate": 4.7853800918573214e-05, "loss": 5.7076, "step": 33000 }, { "epoch": 0.13, "learning_rate": 4.7821282750672805e-05, "loss": 5.7146, "step": 33500 }, { "epoch": 0.13, "learning_rate": 4.77887645827724e-05, "loss": 5.6924, "step": 34000 }, { "epoch": 0.13, "learning_rate": 4.775624641487199e-05, "loss": 5.6972, "step": 34500 }, { "epoch": 0.14, "learning_rate": 4.772372824697158e-05, "loss": 5.6929, "step": 35000 }, { "epoch": 0.14, "learning_rate": 4.769121007907118e-05, "loss": 5.6553, "step": 35500 }, { "epoch": 0.14, "learning_rate": 4.765869191117077e-05, "loss": 5.6857, "step": 36000 }, { "epoch": 0.14, "learning_rate": 4.762617374327037e-05, "loss": 5.7021, "step": 36500 }, { "epoch": 0.14, "learning_rate": 4.759365557536996e-05, "loss": 5.695, "step": 37000 }, { "epoch": 0.15, "learning_rate": 4.756113740746956e-05, "loss": 5.7066, "step": 37500 }, { "epoch": 0.15, "learning_rate": 4.752861923956915e-05, "loss": 5.6919, "step": 38000 }, { "epoch": 0.15, "learning_rate": 4.749610107166874e-05, "loss": 5.6823, "step": 38500 }, { "epoch": 0.15, "learning_rate": 4.746358290376834e-05, "loss": 5.6699, "step": 39000 }, { "epoch": 0.15, "learning_rate": 4.743106473586793e-05, "loss": 5.6837, "step": 39500 }, { "epoch": 0.16, "learning_rate": 4.739854656796753e-05, "loss": 5.6773, "step": 40000 }, { "epoch": 0.16, "learning_rate": 4.736602840006712e-05, "loss": 5.6796, "step": 40500 }, { "epoch": 0.16, "learning_rate": 4.7333510232166714e-05, "loss": 5.6663, "step": 41000 }, { "epoch": 0.16, "learning_rate": 4.7300992064266305e-05, "loss": 5.6548, "step": 41500 }, { "epoch": 0.16, "learning_rate": 4.72684738963659e-05, "loss": 5.6853, "step": 42000 }, { "epoch": 0.17, "learning_rate": 4.7235955728465495e-05, "loss": 5.691, "step": 42500 }, { "epoch": 0.17, "learning_rate": 4.720343756056509e-05, "loss": 5.657, "step": 43000 }, { "epoch": 0.17, "learning_rate": 4.7170919392664686e-05, "loss": 5.6815, "step": 43500 }, { "epoch": 0.17, "learning_rate": 4.713840122476428e-05, "loss": 5.6602, "step": 44000 }, { "epoch": 0.17, "learning_rate": 4.710588305686387e-05, "loss": 5.6691, "step": 44500 }, { "epoch": 0.18, "learning_rate": 4.707336488896347e-05, "loss": 5.6767, "step": 45000 }, { "epoch": 0.18, "learning_rate": 4.704084672106306e-05, "loss": 5.6499, "step": 45500 }, { "epoch": 0.18, "learning_rate": 4.700832855316266e-05, "loss": 5.669, "step": 46000 }, { "epoch": 0.18, "learning_rate": 4.697581038526225e-05, "loss": 5.6641, "step": 46500 }, { "epoch": 0.18, "learning_rate": 4.694329221736185e-05, "loss": 5.6509, "step": 47000 }, { "epoch": 0.19, "learning_rate": 4.691077404946144e-05, "loss": 5.6501, "step": 47500 }, { "epoch": 0.19, "learning_rate": 4.687825588156103e-05, "loss": 5.6611, "step": 48000 }, { "epoch": 0.19, "learning_rate": 4.684573771366062e-05, "loss": 5.6626, "step": 48500 }, { "epoch": 0.19, "learning_rate": 4.681321954576022e-05, "loss": 5.6591, "step": 49000 }, { "epoch": 0.19, "learning_rate": 4.678070137785981e-05, "loss": 5.6519, "step": 49500 }, { "epoch": 0.2, "learning_rate": 4.6748183209959404e-05, "loss": 5.6552, "step": 50000 }, { "epoch": 0.2, "learning_rate": 4.6715665042058996e-05, "loss": 5.6542, "step": 50500 }, { "epoch": 0.2, "learning_rate": 4.6683146874158594e-05, "loss": 5.6521, "step": 51000 }, { "epoch": 0.2, "learning_rate": 4.6650628706258186e-05, "loss": 5.6631, "step": 51500 }, { "epoch": 0.2, "learning_rate": 4.6618110538357784e-05, "loss": 5.6296, "step": 52000 }, { "epoch": 0.2, "learning_rate": 4.6585592370457376e-05, "loss": 5.6541, "step": 52500 }, { "epoch": 0.21, "learning_rate": 4.6553074202556974e-05, "loss": 5.6373, "step": 53000 }, { "epoch": 0.21, "learning_rate": 4.6520556034656566e-05, "loss": 5.6358, "step": 53500 }, { "epoch": 0.21, "learning_rate": 4.648803786675616e-05, "loss": 5.6654, "step": 54000 }, { "epoch": 0.21, "learning_rate": 4.6455519698855756e-05, "loss": 5.6481, "step": 54500 }, { "epoch": 0.21, "learning_rate": 4.642300153095535e-05, "loss": 5.6545, "step": 55000 }, { "epoch": 0.22, "learning_rate": 4.6390483363054946e-05, "loss": 5.6379, "step": 55500 }, { "epoch": 0.22, "learning_rate": 4.635796519515454e-05, "loss": 5.6537, "step": 56000 }, { "epoch": 0.22, "learning_rate": 4.632544702725413e-05, "loss": 5.6611, "step": 56500 }, { "epoch": 0.22, "learning_rate": 4.629292885935372e-05, "loss": 5.6639, "step": 57000 }, { "epoch": 0.22, "learning_rate": 4.626041069145331e-05, "loss": 5.6579, "step": 57500 }, { "epoch": 0.23, "learning_rate": 4.622789252355291e-05, "loss": 5.6714, "step": 58000 }, { "epoch": 0.23, "learning_rate": 4.61953743556525e-05, "loss": 5.6345, "step": 58500 }, { "epoch": 0.23, "learning_rate": 4.61628561877521e-05, "loss": 5.6457, "step": 59000 }, { "epoch": 0.23, "learning_rate": 4.613033801985169e-05, "loss": 5.6613, "step": 59500 }, { "epoch": 0.23, "learning_rate": 4.6097819851951284e-05, "loss": 5.6517, "step": 60000 }, { "epoch": 0.24, "learning_rate": 4.606530168405088e-05, "loss": 5.6328, "step": 60500 }, { "epoch": 0.24, "learning_rate": 4.6032783516150474e-05, "loss": 5.6311, "step": 61000 }, { "epoch": 0.24, "learning_rate": 4.600026534825007e-05, "loss": 5.6228, "step": 61500 }, { "epoch": 0.24, "learning_rate": 4.5967747180349665e-05, "loss": 5.6283, "step": 62000 }, { "epoch": 0.24, "learning_rate": 4.593522901244926e-05, "loss": 5.656, "step": 62500 }, { "epoch": 0.25, "learning_rate": 4.5902710844548855e-05, "loss": 5.6389, "step": 63000 }, { "epoch": 0.25, "learning_rate": 4.5870192676648446e-05, "loss": 5.6361, "step": 63500 }, { "epoch": 0.25, "learning_rate": 4.583767450874804e-05, "loss": 5.6305, "step": 64000 }, { "epoch": 0.25, "learning_rate": 4.580515634084763e-05, "loss": 5.6367, "step": 64500 }, { "epoch": 0.25, "learning_rate": 4.577263817294723e-05, "loss": 5.6393, "step": 65000 }, { "epoch": 0.26, "learning_rate": 4.574012000504682e-05, "loss": 5.6073, "step": 65500 }, { "epoch": 0.26, "learning_rate": 4.570760183714641e-05, "loss": 5.6139, "step": 66000 }, { "epoch": 0.26, "learning_rate": 4.567508366924601e-05, "loss": 5.619, "step": 66500 }, { "epoch": 0.26, "learning_rate": 4.56425655013456e-05, "loss": 5.6307, "step": 67000 }, { "epoch": 0.26, "learning_rate": 4.56100473334452e-05, "loss": 5.634, "step": 67500 }, { "epoch": 0.27, "learning_rate": 4.557752916554479e-05, "loss": 5.6234, "step": 68000 }, { "epoch": 0.27, "learning_rate": 4.554501099764439e-05, "loss": 5.6219, "step": 68500 }, { "epoch": 0.27, "learning_rate": 4.551249282974398e-05, "loss": 5.6326, "step": 69000 }, { "epoch": 0.27, "learning_rate": 4.547997466184357e-05, "loss": 5.6293, "step": 69500 }, { "epoch": 0.27, "learning_rate": 4.544745649394317e-05, "loss": 5.6153, "step": 70000 }, { "epoch": 0.28, "learning_rate": 4.541493832604276e-05, "loss": 5.6193, "step": 70500 }, { "epoch": 0.28, "learning_rate": 4.5382420158142355e-05, "loss": 5.62, "step": 71000 }, { "epoch": 0.28, "learning_rate": 4.5349901990241946e-05, "loss": 5.6276, "step": 71500 }, { "epoch": 0.28, "learning_rate": 4.5317383822341545e-05, "loss": 5.6065, "step": 72000 }, { "epoch": 0.28, "learning_rate": 4.5284865654441136e-05, "loss": 5.6008, "step": 72500 }, { "epoch": 0.28, "learning_rate": 4.525234748654073e-05, "loss": 5.624, "step": 73000 }, { "epoch": 0.29, "learning_rate": 4.5219829318640327e-05, "loss": 5.6111, "step": 73500 }, { "epoch": 0.29, "learning_rate": 4.518731115073992e-05, "loss": 5.6309, "step": 74000 }, { "epoch": 0.29, "learning_rate": 4.5154792982839517e-05, "loss": 5.6356, "step": 74500 }, { "epoch": 0.29, "learning_rate": 4.512227481493911e-05, "loss": 5.6045, "step": 75000 }, { "epoch": 0.29, "learning_rate": 4.50897566470387e-05, "loss": 5.6079, "step": 75500 }, { "epoch": 0.3, "learning_rate": 4.50572384791383e-05, "loss": 5.609, "step": 76000 }, { "epoch": 0.3, "learning_rate": 4.502472031123789e-05, "loss": 5.6262, "step": 76500 }, { "epoch": 0.3, "learning_rate": 4.499220214333749e-05, "loss": 5.5966, "step": 77000 }, { "epoch": 0.3, "learning_rate": 4.495968397543708e-05, "loss": 5.598, "step": 77500 }, { "epoch": 0.3, "learning_rate": 4.492716580753668e-05, "loss": 5.618, "step": 78000 }, { "epoch": 0.31, "learning_rate": 4.489464763963627e-05, "loss": 5.6125, "step": 78500 }, { "epoch": 0.31, "learning_rate": 4.486212947173586e-05, "loss": 5.6182, "step": 79000 }, { "epoch": 0.31, "learning_rate": 4.482961130383545e-05, "loss": 5.5995, "step": 79500 }, { "epoch": 0.31, "learning_rate": 4.4797093135935045e-05, "loss": 5.6079, "step": 80000 }, { "epoch": 0.31, "learning_rate": 4.4764574968034643e-05, "loss": 5.6138, "step": 80500 }, { "epoch": 0.32, "learning_rate": 4.4732056800134235e-05, "loss": 5.6076, "step": 81000 }, { "epoch": 0.32, "learning_rate": 4.4699538632233834e-05, "loss": 5.598, "step": 81500 }, { "epoch": 0.32, "learning_rate": 4.4667020464333425e-05, "loss": 5.6038, "step": 82000 }, { "epoch": 0.32, "learning_rate": 4.463450229643302e-05, "loss": 5.5977, "step": 82500 }, { "epoch": 0.32, "learning_rate": 4.4601984128532615e-05, "loss": 5.621, "step": 83000 }, { "epoch": 0.33, "learning_rate": 4.456946596063221e-05, "loss": 5.6076, "step": 83500 }, { "epoch": 0.33, "learning_rate": 4.4536947792731805e-05, "loss": 5.6156, "step": 84000 }, { "epoch": 0.33, "learning_rate": 4.45044296248314e-05, "loss": 5.6229, "step": 84500 }, { "epoch": 0.33, "learning_rate": 4.447191145693099e-05, "loss": 5.5821, "step": 85000 }, { "epoch": 0.33, "learning_rate": 4.443939328903059e-05, "loss": 5.5937, "step": 85500 }, { "epoch": 0.34, "learning_rate": 4.440687512113018e-05, "loss": 5.5981, "step": 86000 }, { "epoch": 0.34, "learning_rate": 4.437435695322977e-05, "loss": 5.6175, "step": 86500 }, { "epoch": 0.34, "learning_rate": 4.434183878532936e-05, "loss": 5.5978, "step": 87000 }, { "epoch": 0.34, "learning_rate": 4.430932061742896e-05, "loss": 5.5959, "step": 87500 }, { "epoch": 0.34, "learning_rate": 4.427680244952855e-05, "loss": 5.5779, "step": 88000 }, { "epoch": 0.35, "learning_rate": 4.4244284281628144e-05, "loss": 5.6002, "step": 88500 }, { "epoch": 0.35, "learning_rate": 4.421176611372774e-05, "loss": 5.6177, "step": 89000 }, { "epoch": 0.35, "learning_rate": 4.4179247945827334e-05, "loss": 5.6038, "step": 89500 }, { "epoch": 0.35, "learning_rate": 4.414672977792693e-05, "loss": 5.6033, "step": 90000 }, { "epoch": 0.35, "learning_rate": 4.4114211610026524e-05, "loss": 5.6066, "step": 90500 }, { "epoch": 0.36, "learning_rate": 4.408169344212612e-05, "loss": 5.605, "step": 91000 }, { "epoch": 0.36, "learning_rate": 4.4049175274225714e-05, "loss": 5.613, "step": 91500 }, { "epoch": 0.36, "learning_rate": 4.4016657106325305e-05, "loss": 5.5924, "step": 92000 }, { "epoch": 0.36, "learning_rate": 4.3984138938424904e-05, "loss": 5.5983, "step": 92500 }, { "epoch": 0.36, "learning_rate": 4.3951620770524496e-05, "loss": 5.5947, "step": 93000 }, { "epoch": 0.36, "learning_rate": 4.391910260262409e-05, "loss": 5.5848, "step": 93500 }, { "epoch": 0.37, "learning_rate": 4.388658443472368e-05, "loss": 5.5853, "step": 94000 }, { "epoch": 0.37, "learning_rate": 4.385406626682327e-05, "loss": 5.5949, "step": 94500 }, { "epoch": 0.37, "learning_rate": 4.382154809892287e-05, "loss": 5.5795, "step": 95000 }, { "epoch": 0.37, "learning_rate": 4.378902993102246e-05, "loss": 5.5979, "step": 95500 }, { "epoch": 0.37, "learning_rate": 4.375651176312206e-05, "loss": 5.5873, "step": 96000 }, { "epoch": 0.38, "learning_rate": 4.372399359522165e-05, "loss": 5.6173, "step": 96500 }, { "epoch": 0.38, "learning_rate": 4.369147542732125e-05, "loss": 5.6014, "step": 97000 }, { "epoch": 0.38, "learning_rate": 4.365895725942084e-05, "loss": 5.597, "step": 97500 }, { "epoch": 0.38, "learning_rate": 4.362643909152043e-05, "loss": 5.5682, "step": 98000 }, { "epoch": 0.38, "learning_rate": 4.359392092362003e-05, "loss": 5.5819, "step": 98500 }, { "epoch": 0.39, "learning_rate": 4.356140275571962e-05, "loss": 5.5784, "step": 99000 }, { "epoch": 0.39, "learning_rate": 4.352888458781922e-05, "loss": 5.5703, "step": 99500 }, { "epoch": 0.39, "learning_rate": 4.349636641991881e-05, "loss": 5.5705, "step": 100000 }, { "epoch": 0.39, "learning_rate": 4.3463848252018404e-05, "loss": 5.5742, "step": 100500 }, { "epoch": 0.39, "learning_rate": 4.3431330084118e-05, "loss": 5.6006, "step": 101000 }, { "epoch": 0.4, "learning_rate": 4.3398811916217594e-05, "loss": 5.5678, "step": 101500 }, { "epoch": 0.4, "learning_rate": 4.3366293748317186e-05, "loss": 5.5967, "step": 102000 }, { "epoch": 0.4, "learning_rate": 4.333377558041678e-05, "loss": 5.5938, "step": 102500 }, { "epoch": 0.4, "learning_rate": 4.3301257412516376e-05, "loss": 5.5844, "step": 103000 }, { "epoch": 0.4, "learning_rate": 4.326873924461597e-05, "loss": 5.5882, "step": 103500 }, { "epoch": 0.41, "learning_rate": 4.323622107671556e-05, "loss": 5.5708, "step": 104000 }, { "epoch": 0.41, "learning_rate": 4.320370290881516e-05, "loss": 5.5687, "step": 104500 }, { "epoch": 0.41, "learning_rate": 4.317118474091475e-05, "loss": 5.5928, "step": 105000 }, { "epoch": 0.41, "learning_rate": 4.313866657301435e-05, "loss": 5.5926, "step": 105500 }, { "epoch": 0.41, "learning_rate": 4.310614840511394e-05, "loss": 5.577, "step": 106000 }, { "epoch": 0.42, "learning_rate": 4.307363023721354e-05, "loss": 5.5642, "step": 106500 }, { "epoch": 0.42, "learning_rate": 4.304111206931313e-05, "loss": 5.5927, "step": 107000 }, { "epoch": 0.42, "learning_rate": 4.300859390141272e-05, "loss": 5.5954, "step": 107500 }, { "epoch": 0.42, "learning_rate": 4.297607573351232e-05, "loss": 5.5789, "step": 108000 }, { "epoch": 0.42, "learning_rate": 4.294355756561191e-05, "loss": 5.5541, "step": 108500 }, { "epoch": 0.43, "learning_rate": 4.29110393977115e-05, "loss": 5.5594, "step": 109000 }, { "epoch": 0.43, "learning_rate": 4.2878521229811094e-05, "loss": 5.5868, "step": 109500 }, { "epoch": 0.43, "learning_rate": 4.2846003061910686e-05, "loss": 5.5632, "step": 110000 }, { "epoch": 0.43, "learning_rate": 4.2813484894010284e-05, "loss": 5.5895, "step": 110500 }, { "epoch": 0.43, "learning_rate": 4.2780966726109876e-05, "loss": 5.594, "step": 111000 }, { "epoch": 0.44, "learning_rate": 4.2748448558209475e-05, "loss": 5.5825, "step": 111500 }, { "epoch": 0.44, "learning_rate": 4.2715930390309066e-05, "loss": 5.588, "step": 112000 }, { "epoch": 0.44, "learning_rate": 4.2683412222408665e-05, "loss": 5.5711, "step": 112500 }, { "epoch": 0.44, "learning_rate": 4.2650894054508256e-05, "loss": 5.5995, "step": 113000 }, { "epoch": 0.44, "learning_rate": 4.261837588660785e-05, "loss": 5.571, "step": 113500 }, { "epoch": 0.44, "learning_rate": 4.2585857718707446e-05, "loss": 5.5743, "step": 114000 }, { "epoch": 0.45, "learning_rate": 4.255333955080704e-05, "loss": 5.5766, "step": 114500 }, { "epoch": 0.45, "learning_rate": 4.2520821382906636e-05, "loss": 5.5705, "step": 115000 }, { "epoch": 0.45, "learning_rate": 4.248830321500623e-05, "loss": 5.5684, "step": 115500 }, { "epoch": 0.45, "learning_rate": 4.245578504710582e-05, "loss": 5.5593, "step": 116000 }, { "epoch": 0.45, "learning_rate": 4.242326687920541e-05, "loss": 5.5969, "step": 116500 }, { "epoch": 0.46, "learning_rate": 4.2390748711305e-05, "loss": 5.5724, "step": 117000 }, { "epoch": 0.46, "learning_rate": 4.23582305434046e-05, "loss": 5.5601, "step": 117500 }, { "epoch": 0.46, "learning_rate": 4.232571237550419e-05, "loss": 5.572, "step": 118000 }, { "epoch": 0.46, "learning_rate": 4.229319420760379e-05, "loss": 5.5733, "step": 118500 }, { "epoch": 0.46, "learning_rate": 4.226067603970338e-05, "loss": 5.5782, "step": 119000 }, { "epoch": 0.47, "learning_rate": 4.2228157871802975e-05, "loss": 5.5429, "step": 119500 }, { "epoch": 0.47, "learning_rate": 4.219563970390257e-05, "loss": 5.5873, "step": 120000 }, { "epoch": 0.47, "learning_rate": 4.2163121536002165e-05, "loss": 5.5707, "step": 120500 }, { "epoch": 0.47, "learning_rate": 4.213060336810176e-05, "loss": 5.5741, "step": 121000 }, { "epoch": 0.47, "learning_rate": 4.2098085200201355e-05, "loss": 5.5847, "step": 121500 }, { "epoch": 0.48, "learning_rate": 4.206556703230095e-05, "loss": 5.5845, "step": 122000 }, { "epoch": 0.48, "learning_rate": 4.2033048864400545e-05, "loss": 5.5769, "step": 122500 }, { "epoch": 0.48, "learning_rate": 4.2000530696500137e-05, "loss": 5.5777, "step": 123000 }, { "epoch": 0.48, "learning_rate": 4.1968012528599735e-05, "loss": 5.5624, "step": 123500 }, { "epoch": 0.48, "learning_rate": 4.1935494360699327e-05, "loss": 5.5753, "step": 124000 }, { "epoch": 0.49, "learning_rate": 4.190297619279892e-05, "loss": 5.572, "step": 124500 }, { "epoch": 0.49, "learning_rate": 4.187045802489851e-05, "loss": 5.5825, "step": 125000 }, { "epoch": 0.49, "learning_rate": 4.183793985699811e-05, "loss": 5.5775, "step": 125500 }, { "epoch": 0.49, "learning_rate": 4.18054216890977e-05, "loss": 5.5575, "step": 126000 }, { "epoch": 0.49, "learning_rate": 4.177290352119729e-05, "loss": 5.5643, "step": 126500 }, { "epoch": 0.5, "learning_rate": 4.174038535329689e-05, "loss": 5.5624, "step": 127000 }, { "epoch": 0.5, "learning_rate": 4.170786718539648e-05, "loss": 5.57, "step": 127500 }, { "epoch": 0.5, "learning_rate": 4.167534901749608e-05, "loss": 5.581, "step": 128000 }, { "epoch": 0.5, "learning_rate": 4.164283084959567e-05, "loss": 5.5746, "step": 128500 }, { "epoch": 0.5, "learning_rate": 4.161031268169526e-05, "loss": 5.5536, "step": 129000 }, { "epoch": 0.51, "learning_rate": 4.157779451379486e-05, "loss": 5.5706, "step": 129500 }, { "epoch": 0.51, "learning_rate": 4.1545276345894453e-05, "loss": 5.5287, "step": 130000 }, { "epoch": 0.51, "learning_rate": 4.151275817799405e-05, "loss": 5.5688, "step": 130500 }, { "epoch": 0.51, "learning_rate": 4.1480240010093644e-05, "loss": 5.5577, "step": 131000 }, { "epoch": 0.51, "learning_rate": 4.1447721842193235e-05, "loss": 5.5561, "step": 131500 }, { "epoch": 0.52, "learning_rate": 4.141520367429283e-05, "loss": 5.5381, "step": 132000 }, { "epoch": 0.52, "learning_rate": 4.138268550639242e-05, "loss": 5.5509, "step": 132500 }, { "epoch": 0.52, "learning_rate": 4.135016733849202e-05, "loss": 5.5589, "step": 133000 }, { "epoch": 0.52, "learning_rate": 4.131764917059161e-05, "loss": 5.5433, "step": 133500 }, { "epoch": 0.52, "learning_rate": 4.128513100269121e-05, "loss": 5.5809, "step": 134000 }, { "epoch": 0.52, "learning_rate": 4.12526128347908e-05, "loss": 5.5478, "step": 134500 }, { "epoch": 0.53, "learning_rate": 4.122009466689039e-05, "loss": 5.5663, "step": 135000 }, { "epoch": 0.53, "learning_rate": 4.118757649898999e-05, "loss": 5.529, "step": 135500 }, { "epoch": 0.53, "learning_rate": 4.115505833108958e-05, "loss": 5.5764, "step": 136000 }, { "epoch": 0.53, "learning_rate": 4.112254016318918e-05, "loss": 5.5561, "step": 136500 }, { "epoch": 0.53, "learning_rate": 4.109002199528877e-05, "loss": 5.5781, "step": 137000 }, { "epoch": 0.54, "learning_rate": 4.105750382738837e-05, "loss": 5.572, "step": 137500 }, { "epoch": 0.54, "learning_rate": 4.102498565948796e-05, "loss": 5.5631, "step": 138000 }, { "epoch": 0.54, "learning_rate": 4.099246749158755e-05, "loss": 5.5649, "step": 138500 }, { "epoch": 0.54, "learning_rate": 4.0959949323687144e-05, "loss": 5.5553, "step": 139000 }, { "epoch": 0.54, "learning_rate": 4.0927431155786735e-05, "loss": 5.5324, "step": 139500 }, { "epoch": 0.55, "learning_rate": 4.0894912987886334e-05, "loss": 5.5562, "step": 140000 }, { "epoch": 0.55, "learning_rate": 4.0862394819985925e-05, "loss": 5.5345, "step": 140500 }, { "epoch": 0.55, "learning_rate": 4.0829876652085524e-05, "loss": 5.5513, "step": 141000 }, { "epoch": 0.55, "learning_rate": 4.0797358484185115e-05, "loss": 5.5401, "step": 141500 }, { "epoch": 0.55, "learning_rate": 4.076484031628471e-05, "loss": 5.5566, "step": 142000 }, { "epoch": 0.56, "learning_rate": 4.0732322148384306e-05, "loss": 5.538, "step": 142500 }, { "epoch": 0.56, "learning_rate": 4.06998039804839e-05, "loss": 5.5495, "step": 143000 }, { "epoch": 0.56, "learning_rate": 4.0667285812583496e-05, "loss": 5.5551, "step": 143500 }, { "epoch": 0.56, "learning_rate": 4.063476764468309e-05, "loss": 5.5617, "step": 144000 }, { "epoch": 0.56, "learning_rate": 4.060224947678268e-05, "loss": 5.5654, "step": 144500 }, { "epoch": 0.57, "learning_rate": 4.056973130888228e-05, "loss": 5.5485, "step": 145000 }, { "epoch": 0.57, "learning_rate": 4.053721314098187e-05, "loss": 5.5328, "step": 145500 }, { "epoch": 0.57, "learning_rate": 4.050469497308146e-05, "loss": 5.5312, "step": 146000 }, { "epoch": 0.57, "learning_rate": 4.047217680518106e-05, "loss": 5.5306, "step": 146500 }, { "epoch": 0.57, "learning_rate": 4.043965863728065e-05, "loss": 5.5662, "step": 147000 }, { "epoch": 0.58, "learning_rate": 4.040714046938024e-05, "loss": 5.531, "step": 147500 }, { "epoch": 0.58, "learning_rate": 4.0374622301479834e-05, "loss": 5.5477, "step": 148000 }, { "epoch": 0.58, "learning_rate": 4.034210413357943e-05, "loss": 5.5436, "step": 148500 }, { "epoch": 0.58, "learning_rate": 4.0309585965679024e-05, "loss": 5.5617, "step": 149000 }, { "epoch": 0.58, "learning_rate": 4.027706779777862e-05, "loss": 5.5636, "step": 149500 }, { "epoch": 0.59, "learning_rate": 4.0244549629878214e-05, "loss": 5.5282, "step": 150000 }, { "epoch": 0.59, "learning_rate": 4.021203146197781e-05, "loss": 5.55, "step": 150500 }, { "epoch": 0.59, "learning_rate": 4.0179513294077404e-05, "loss": 5.5378, "step": 151000 }, { "epoch": 0.59, "learning_rate": 4.0146995126176996e-05, "loss": 5.5463, "step": 151500 }, { "epoch": 0.59, "learning_rate": 4.0114476958276594e-05, "loss": 5.5292, "step": 152000 }, { "epoch": 0.6, "learning_rate": 4.0081958790376186e-05, "loss": 5.5282, "step": 152500 }, { "epoch": 0.6, "learning_rate": 4.0049440622475784e-05, "loss": 5.5575, "step": 153000 }, { "epoch": 0.6, "learning_rate": 4.0016922454575376e-05, "loss": 5.5299, "step": 153500 }, { "epoch": 0.6, "learning_rate": 3.998440428667497e-05, "loss": 5.55, "step": 154000 }, { "epoch": 0.6, "learning_rate": 3.995188611877456e-05, "loss": 5.5286, "step": 154500 }, { "epoch": 0.6, "learning_rate": 3.991936795087415e-05, "loss": 5.5405, "step": 155000 }, { "epoch": 0.61, "learning_rate": 3.988684978297375e-05, "loss": 5.5244, "step": 155500 }, { "epoch": 0.61, "learning_rate": 3.985433161507334e-05, "loss": 5.5208, "step": 156000 }, { "epoch": 0.61, "learning_rate": 3.982181344717294e-05, "loss": 5.5472, "step": 156500 }, { "epoch": 0.61, "learning_rate": 3.978929527927253e-05, "loss": 5.5582, "step": 157000 }, { "epoch": 0.61, "learning_rate": 3.975677711137212e-05, "loss": 5.5539, "step": 157500 }, { "epoch": 0.62, "learning_rate": 3.972425894347172e-05, "loss": 5.5463, "step": 158000 }, { "epoch": 0.62, "learning_rate": 3.969174077557131e-05, "loss": 5.5498, "step": 158500 }, { "epoch": 0.62, "learning_rate": 3.965922260767091e-05, "loss": 5.5299, "step": 159000 }, { "epoch": 0.62, "learning_rate": 3.96267044397705e-05, "loss": 5.5384, "step": 159500 }, { "epoch": 0.62, "learning_rate": 3.95941862718701e-05, "loss": 5.5288, "step": 160000 }, { "epoch": 0.63, "learning_rate": 3.956166810396969e-05, "loss": 5.5213, "step": 160500 }, { "epoch": 0.63, "learning_rate": 3.9529149936069285e-05, "loss": 5.5386, "step": 161000 }, { "epoch": 0.63, "learning_rate": 3.9496631768168876e-05, "loss": 5.5413, "step": 161500 }, { "epoch": 0.63, "learning_rate": 3.946411360026847e-05, "loss": 5.5387, "step": 162000 }, { "epoch": 0.63, "learning_rate": 3.9431595432368066e-05, "loss": 5.5342, "step": 162500 }, { "epoch": 0.64, "learning_rate": 3.939907726446766e-05, "loss": 5.5485, "step": 163000 }, { "epoch": 0.64, "learning_rate": 3.936655909656725e-05, "loss": 5.532, "step": 163500 }, { "epoch": 0.64, "learning_rate": 3.933404092866685e-05, "loss": 5.518, "step": 164000 }, { "epoch": 0.64, "learning_rate": 3.930152276076644e-05, "loss": 5.5319, "step": 164500 }, { "epoch": 0.64, "learning_rate": 3.926900459286604e-05, "loss": 5.5335, "step": 165000 }, { "epoch": 0.65, "learning_rate": 3.923648642496563e-05, "loss": 5.5488, "step": 165500 }, { "epoch": 0.65, "learning_rate": 3.920396825706523e-05, "loss": 5.5252, "step": 166000 }, { "epoch": 0.65, "learning_rate": 3.917145008916482e-05, "loss": 5.5141, "step": 166500 }, { "epoch": 0.65, "learning_rate": 3.913893192126441e-05, "loss": 5.5417, "step": 167000 }, { "epoch": 0.65, "learning_rate": 3.910641375336401e-05, "loss": 5.5355, "step": 167500 }, { "epoch": 0.66, "learning_rate": 3.90738955854636e-05, "loss": 5.5415, "step": 168000 }, { "epoch": 0.66, "learning_rate": 3.904137741756319e-05, "loss": 5.534, "step": 168500 }, { "epoch": 0.66, "learning_rate": 3.9008859249662785e-05, "loss": 5.5257, "step": 169000 }, { "epoch": 0.66, "learning_rate": 3.897634108176238e-05, "loss": 5.5221, "step": 169500 }, { "epoch": 0.66, "learning_rate": 3.8943822913861975e-05, "loss": 5.5364, "step": 170000 }, { "epoch": 0.67, "learning_rate": 3.8911304745961566e-05, "loss": 5.5296, "step": 170500 }, { "epoch": 0.67, "learning_rate": 3.8878786578061165e-05, "loss": 5.5197, "step": 171000 }, { "epoch": 0.67, "learning_rate": 3.8846268410160756e-05, "loss": 5.5615, "step": 171500 }, { "epoch": 0.67, "learning_rate": 3.8813750242260355e-05, "loss": 5.5354, "step": 172000 }, { "epoch": 0.67, "learning_rate": 3.8781232074359947e-05, "loss": 5.5192, "step": 172500 }, { "epoch": 0.68, "learning_rate": 3.874871390645954e-05, "loss": 5.5497, "step": 173000 }, { "epoch": 0.68, "learning_rate": 3.8716195738559137e-05, "loss": 5.5419, "step": 173500 }, { "epoch": 0.68, "learning_rate": 3.868367757065873e-05, "loss": 5.5366, "step": 174000 }, { "epoch": 0.68, "learning_rate": 3.865115940275833e-05, "loss": 5.5031, "step": 174500 }, { "epoch": 0.68, "learning_rate": 3.861864123485792e-05, "loss": 5.5425, "step": 175000 }, { "epoch": 0.68, "learning_rate": 3.858612306695752e-05, "loss": 5.5195, "step": 175500 }, { "epoch": 0.69, "learning_rate": 3.855360489905711e-05, "loss": 5.5235, "step": 176000 }, { "epoch": 0.69, "learning_rate": 3.85210867311567e-05, "loss": 5.5299, "step": 176500 }, { "epoch": 0.69, "learning_rate": 3.848856856325629e-05, "loss": 5.5233, "step": 177000 }, { "epoch": 0.69, "learning_rate": 3.845605039535588e-05, "loss": 5.5386, "step": 177500 }, { "epoch": 0.69, "learning_rate": 3.842353222745548e-05, "loss": 5.5233, "step": 178000 }, { "epoch": 0.7, "learning_rate": 3.839101405955507e-05, "loss": 5.5128, "step": 178500 }, { "epoch": 0.7, "learning_rate": 3.8358495891654665e-05, "loss": 5.5286, "step": 179000 }, { "epoch": 0.7, "learning_rate": 3.8325977723754263e-05, "loss": 5.5022, "step": 179500 }, { "epoch": 0.7, "learning_rate": 3.8293459555853855e-05, "loss": 5.523, "step": 180000 }, { "epoch": 0.7, "learning_rate": 3.8260941387953454e-05, "loss": 5.5229, "step": 180500 }, { "epoch": 0.71, "learning_rate": 3.8228423220053045e-05, "loss": 5.5352, "step": 181000 }, { "epoch": 0.71, "learning_rate": 3.8195905052152644e-05, "loss": 5.5312, "step": 181500 }, { "epoch": 0.71, "learning_rate": 3.8163386884252235e-05, "loss": 5.5209, "step": 182000 }, { "epoch": 0.71, "learning_rate": 3.813086871635183e-05, "loss": 5.5273, "step": 182500 }, { "epoch": 0.71, "learning_rate": 3.8098350548451425e-05, "loss": 5.5092, "step": 183000 }, { "epoch": 0.72, "learning_rate": 3.806583238055102e-05, "loss": 5.5154, "step": 183500 }, { "epoch": 0.72, "learning_rate": 3.803331421265061e-05, "loss": 5.5246, "step": 184000 }, { "epoch": 0.72, "learning_rate": 3.80007960447502e-05, "loss": 5.5372, "step": 184500 }, { "epoch": 0.72, "learning_rate": 3.79682778768498e-05, "loss": 5.5258, "step": 185000 }, { "epoch": 0.72, "learning_rate": 3.793575970894939e-05, "loss": 5.5236, "step": 185500 }, { "epoch": 0.73, "learning_rate": 3.790324154104898e-05, "loss": 5.4988, "step": 186000 }, { "epoch": 0.73, "learning_rate": 3.787072337314858e-05, "loss": 5.5234, "step": 186500 }, { "epoch": 0.73, "learning_rate": 3.783820520524817e-05, "loss": 5.5285, "step": 187000 }, { "epoch": 0.73, "learning_rate": 3.780568703734777e-05, "loss": 5.5015, "step": 187500 }, { "epoch": 0.73, "learning_rate": 3.777316886944736e-05, "loss": 5.5222, "step": 188000 }, { "epoch": 0.74, "learning_rate": 3.7740650701546954e-05, "loss": 5.5113, "step": 188500 }, { "epoch": 0.74, "learning_rate": 3.770813253364655e-05, "loss": 5.524, "step": 189000 }, { "epoch": 0.74, "learning_rate": 3.7675614365746144e-05, "loss": 5.5357, "step": 189500 }, { "epoch": 0.74, "learning_rate": 3.764309619784574e-05, "loss": 5.5221, "step": 190000 }, { "epoch": 0.74, "learning_rate": 3.7610578029945334e-05, "loss": 5.5226, "step": 190500 }, { "epoch": 0.75, "learning_rate": 3.7578059862044925e-05, "loss": 5.5204, "step": 191000 }, { "epoch": 0.75, "learning_rate": 3.754554169414452e-05, "loss": 5.5201, "step": 191500 }, { "epoch": 0.75, "learning_rate": 3.751302352624411e-05, "loss": 5.521, "step": 192000 }, { "epoch": 0.75, "learning_rate": 3.748050535834371e-05, "loss": 5.5334, "step": 192500 }, { "epoch": 0.75, "learning_rate": 3.74479871904433e-05, "loss": 5.4979, "step": 193000 }, { "epoch": 0.76, "learning_rate": 3.74154690225429e-05, "loss": 5.5098, "step": 193500 }, { "epoch": 0.76, "learning_rate": 3.738295085464249e-05, "loss": 5.5429, "step": 194000 }, { "epoch": 0.76, "learning_rate": 3.735043268674209e-05, "loss": 5.506, "step": 194500 }, { "epoch": 0.76, "learning_rate": 3.731791451884168e-05, "loss": 5.5116, "step": 195000 }, { "epoch": 0.76, "learning_rate": 3.728539635094127e-05, "loss": 5.5075, "step": 195500 }, { "epoch": 0.76, "learning_rate": 3.725287818304087e-05, "loss": 5.5159, "step": 196000 }, { "epoch": 0.77, "learning_rate": 3.722036001514046e-05, "loss": 5.5192, "step": 196500 }, { "epoch": 0.77, "learning_rate": 3.718784184724006e-05, "loss": 5.5109, "step": 197000 }, { "epoch": 0.77, "learning_rate": 3.715532367933965e-05, "loss": 5.496, "step": 197500 }, { "epoch": 0.77, "learning_rate": 3.712280551143924e-05, "loss": 5.5329, "step": 198000 }, { "epoch": 0.77, "learning_rate": 3.709028734353884e-05, "loss": 5.4942, "step": 198500 }, { "epoch": 0.78, "learning_rate": 3.705776917563843e-05, "loss": 5.5314, "step": 199000 }, { "epoch": 0.78, "learning_rate": 3.7025251007738024e-05, "loss": 5.5117, "step": 199500 }, { "epoch": 0.78, "learning_rate": 3.6992732839837616e-05, "loss": 5.5068, "step": 200000 }, { "epoch": 0.78, "learning_rate": 3.6960214671937214e-05, "loss": 5.5034, "step": 200500 }, { "epoch": 0.78, "learning_rate": 3.6927696504036806e-05, "loss": 5.5165, "step": 201000 }, { "epoch": 0.79, "learning_rate": 3.68951783361364e-05, "loss": 5.5228, "step": 201500 }, { "epoch": 0.79, "learning_rate": 3.6862660168235996e-05, "loss": 5.522, "step": 202000 }, { "epoch": 0.79, "learning_rate": 3.683014200033559e-05, "loss": 5.5127, "step": 202500 }, { "epoch": 0.79, "learning_rate": 3.6797623832435186e-05, "loss": 5.498, "step": 203000 }, { "epoch": 0.79, "learning_rate": 3.676510566453478e-05, "loss": 5.5175, "step": 203500 }, { "epoch": 0.8, "learning_rate": 3.6732587496634376e-05, "loss": 5.4934, "step": 204000 }, { "epoch": 0.8, "learning_rate": 3.670006932873397e-05, "loss": 5.5252, "step": 204500 }, { "epoch": 0.8, "learning_rate": 3.666755116083356e-05, "loss": 5.4944, "step": 205000 }, { "epoch": 0.8, "learning_rate": 3.663503299293316e-05, "loss": 5.4928, "step": 205500 }, { "epoch": 0.8, "learning_rate": 3.660251482503275e-05, "loss": 5.4842, "step": 206000 }, { "epoch": 0.81, "learning_rate": 3.656999665713234e-05, "loss": 5.4963, "step": 206500 }, { "epoch": 0.81, "learning_rate": 3.653747848923193e-05, "loss": 5.4839, "step": 207000 }, { "epoch": 0.81, "learning_rate": 3.6504960321331524e-05, "loss": 5.5047, "step": 207500 }, { "epoch": 0.81, "learning_rate": 3.647244215343112e-05, "loss": 5.5055, "step": 208000 }, { "epoch": 0.81, "learning_rate": 3.6439923985530714e-05, "loss": 5.467, "step": 208500 }, { "epoch": 0.82, "learning_rate": 3.640740581763031e-05, "loss": 5.4914, "step": 209000 }, { "epoch": 0.82, "learning_rate": 3.6374887649729904e-05, "loss": 5.4803, "step": 209500 }, { "epoch": 0.82, "learning_rate": 3.63423694818295e-05, "loss": 5.4874, "step": 210000 }, { "epoch": 0.82, "learning_rate": 3.6309851313929095e-05, "loss": 5.4186, "step": 210500 }, { "epoch": 0.82, "learning_rate": 3.6277333146028686e-05, "loss": 5.3267, "step": 211000 }, { "epoch": 0.83, "learning_rate": 3.6244814978128285e-05, "loss": 5.2962, "step": 211500 }, { "epoch": 0.83, "learning_rate": 3.6212296810227876e-05, "loss": 5.2441, "step": 212000 }, { "epoch": 0.83, "learning_rate": 3.6179778642327475e-05, "loss": 5.1665, "step": 212500 }, { "epoch": 0.83, "learning_rate": 3.6147260474427066e-05, "loss": 5.1096, "step": 213000 }, { "epoch": 0.83, "learning_rate": 3.611474230652666e-05, "loss": 5.0509, "step": 213500 }, { "epoch": 0.84, "learning_rate": 3.608222413862625e-05, "loss": 5.0168, "step": 214000 }, { "epoch": 0.84, "learning_rate": 3.604970597072584e-05, "loss": 4.9811, "step": 214500 }, { "epoch": 0.84, "learning_rate": 3.601718780282544e-05, "loss": 4.9535, "step": 215000 }, { "epoch": 0.84, "learning_rate": 3.598466963492503e-05, "loss": 4.9113, "step": 215500 }, { "epoch": 0.84, "learning_rate": 3.595215146702463e-05, "loss": 4.8715, "step": 216000 }, { "epoch": 0.84, "learning_rate": 3.591963329912422e-05, "loss": 4.8401, "step": 216500 }, { "epoch": 0.85, "learning_rate": 3.588711513122381e-05, "loss": 4.8103, "step": 217000 }, { "epoch": 0.85, "learning_rate": 3.585459696332341e-05, "loss": 4.7615, "step": 217500 }, { "epoch": 0.85, "learning_rate": 3.5822078795423e-05, "loss": 4.7477, "step": 218000 }, { "epoch": 0.85, "learning_rate": 3.57895606275226e-05, "loss": 4.6929, "step": 218500 }, { "epoch": 0.85, "learning_rate": 3.575704245962219e-05, "loss": 4.6674, "step": 219000 }, { "epoch": 0.86, "learning_rate": 3.572452429172179e-05, "loss": 4.6552, "step": 219500 }, { "epoch": 0.86, "learning_rate": 3.569200612382138e-05, "loss": 4.5982, "step": 220000 }, { "epoch": 0.86, "learning_rate": 3.5659487955920975e-05, "loss": 4.4937, "step": 220500 }, { "epoch": 0.86, "learning_rate": 3.562696978802057e-05, "loss": 4.3654, "step": 221000 }, { "epoch": 0.86, "learning_rate": 3.5594451620120165e-05, "loss": 4.249, "step": 221500 }, { "epoch": 0.87, "learning_rate": 3.5561933452219757e-05, "loss": 4.122, "step": 222000 }, { "epoch": 0.87, "learning_rate": 3.552941528431935e-05, "loss": 3.9966, "step": 222500 }, { "epoch": 0.87, "learning_rate": 3.549689711641894e-05, "loss": 3.8664, "step": 223000 }, { "epoch": 0.87, "learning_rate": 3.546437894851854e-05, "loss": 3.7561, "step": 223500 }, { "epoch": 0.87, "learning_rate": 3.543186078061813e-05, "loss": 3.6266, "step": 224000 }, { "epoch": 0.88, "learning_rate": 3.539934261271773e-05, "loss": 3.5324, "step": 224500 }, { "epoch": 0.88, "learning_rate": 3.536682444481732e-05, "loss": 3.439, "step": 225000 }, { "epoch": 0.88, "learning_rate": 3.533430627691692e-05, "loss": 3.3637, "step": 225500 }, { "epoch": 0.88, "learning_rate": 3.530178810901651e-05, "loss": 3.2801, "step": 226000 }, { "epoch": 0.88, "learning_rate": 3.52692699411161e-05, "loss": 3.1894, "step": 226500 }, { "epoch": 0.89, "learning_rate": 3.52367517732157e-05, "loss": 3.1034, "step": 227000 }, { "epoch": 0.89, "learning_rate": 3.520423360531529e-05, "loss": 3.0252, "step": 227500 }, { "epoch": 0.89, "learning_rate": 3.517171543741489e-05, "loss": 2.9289, "step": 228000 }, { "epoch": 0.89, "learning_rate": 3.513919726951448e-05, "loss": 2.8581, "step": 228500 }, { "epoch": 0.89, "learning_rate": 3.5106679101614073e-05, "loss": 2.8208, "step": 229000 }, { "epoch": 0.9, "learning_rate": 3.5074160933713665e-05, "loss": 2.7686, "step": 229500 }, { "epoch": 0.9, "learning_rate": 3.504164276581326e-05, "loss": 2.7256, "step": 230000 }, { "epoch": 0.9, "learning_rate": 3.5009124597912855e-05, "loss": 2.6784, "step": 230500 }, { "epoch": 0.9, "learning_rate": 3.497660643001245e-05, "loss": 2.6355, "step": 231000 }, { "epoch": 0.9, "learning_rate": 3.4944088262112045e-05, "loss": 2.6078, "step": 231500 }, { "epoch": 0.91, "learning_rate": 3.491157009421164e-05, "loss": 2.575, "step": 232000 }, { "epoch": 0.91, "learning_rate": 3.487905192631123e-05, "loss": 2.5266, "step": 232500 }, { "epoch": 0.91, "learning_rate": 3.484653375841083e-05, "loss": 2.5291, "step": 233000 }, { "epoch": 0.91, "learning_rate": 3.481401559051042e-05, "loss": 2.4816, "step": 233500 }, { "epoch": 0.91, "learning_rate": 3.478149742261002e-05, "loss": 2.4666, "step": 234000 }, { "epoch": 0.92, "learning_rate": 3.474897925470961e-05, "loss": 2.4444, "step": 234500 }, { "epoch": 0.92, "learning_rate": 3.471646108680921e-05, "loss": 2.4253, "step": 235000 }, { "epoch": 0.92, "learning_rate": 3.46839429189088e-05, "loss": 2.403, "step": 235500 }, { "epoch": 0.92, "learning_rate": 3.465142475100839e-05, "loss": 2.3755, "step": 236000 }, { "epoch": 0.92, "learning_rate": 3.461890658310798e-05, "loss": 2.35, "step": 236500 }, { "epoch": 0.92, "learning_rate": 3.4586388415207574e-05, "loss": 2.3292, "step": 237000 }, { "epoch": 0.93, "learning_rate": 3.455387024730717e-05, "loss": 2.3076, "step": 237500 }, { "epoch": 0.93, "learning_rate": 3.4521352079406764e-05, "loss": 2.2888, "step": 238000 }, { "epoch": 0.93, "learning_rate": 3.448883391150636e-05, "loss": 2.267, "step": 238500 }, { "epoch": 0.93, "learning_rate": 3.4456315743605954e-05, "loss": 2.2615, "step": 239000 }, { "epoch": 0.93, "learning_rate": 3.4423797575705545e-05, "loss": 2.2433, "step": 239500 }, { "epoch": 0.94, "learning_rate": 3.4391279407805144e-05, "loss": 2.2348, "step": 240000 }, { "epoch": 0.94, "learning_rate": 3.4358761239904735e-05, "loss": 2.2083, "step": 240500 }, { "epoch": 0.94, "learning_rate": 3.4326243072004334e-05, "loss": 2.2022, "step": 241000 }, { "epoch": 0.94, "learning_rate": 3.4293724904103926e-05, "loss": 2.2007, "step": 241500 }, { "epoch": 0.94, "learning_rate": 3.426120673620352e-05, "loss": 2.1813, "step": 242000 }, { "epoch": 0.95, "learning_rate": 3.4228688568303116e-05, "loss": 2.157, "step": 242500 }, { "epoch": 0.95, "learning_rate": 3.419617040040271e-05, "loss": 2.1462, "step": 243000 }, { "epoch": 0.95, "learning_rate": 3.41636522325023e-05, "loss": 2.1346, "step": 243500 }, { "epoch": 0.95, "learning_rate": 3.41311340646019e-05, "loss": 2.1271, "step": 244000 }, { "epoch": 0.95, "learning_rate": 3.409861589670149e-05, "loss": 2.1027, "step": 244500 }, { "epoch": 0.96, "learning_rate": 3.406609772880108e-05, "loss": 2.1023, "step": 245000 }, { "epoch": 0.96, "learning_rate": 3.403357956090067e-05, "loss": 2.0849, "step": 245500 }, { "epoch": 0.96, "learning_rate": 3.400106139300027e-05, "loss": 2.0843, "step": 246000 }, { "epoch": 0.96, "learning_rate": 3.396854322509986e-05, "loss": 2.0666, "step": 246500 }, { "epoch": 0.96, "learning_rate": 3.393602505719946e-05, "loss": 2.0578, "step": 247000 }, { "epoch": 0.97, "learning_rate": 3.390350688929905e-05, "loss": 2.0446, "step": 247500 }, { "epoch": 0.97, "learning_rate": 3.3870988721398644e-05, "loss": 2.0486, "step": 248000 }, { "epoch": 0.97, "learning_rate": 3.383847055349824e-05, "loss": 2.0247, "step": 248500 }, { "epoch": 0.97, "learning_rate": 3.3805952385597834e-05, "loss": 2.0286, "step": 249000 }, { "epoch": 0.97, "learning_rate": 3.377343421769743e-05, "loss": 2.0144, "step": 249500 }, { "epoch": 0.98, "learning_rate": 3.3740916049797024e-05, "loss": 2.0024, "step": 250000 }, { "epoch": 0.98, "learning_rate": 3.370839788189662e-05, "loss": 1.9928, "step": 250500 }, { "epoch": 0.98, "learning_rate": 3.3675879713996214e-05, "loss": 1.9791, "step": 251000 }, { "epoch": 0.98, "learning_rate": 3.3643361546095806e-05, "loss": 1.9735, "step": 251500 }, { "epoch": 0.98, "learning_rate": 3.36108433781954e-05, "loss": 1.9699, "step": 252000 }, { "epoch": 0.99, "learning_rate": 3.357832521029499e-05, "loss": 1.9683, "step": 252500 }, { "epoch": 0.99, "learning_rate": 3.354580704239459e-05, "loss": 1.9456, "step": 253000 }, { "epoch": 0.99, "learning_rate": 3.351328887449418e-05, "loss": 1.9365, "step": 253500 }, { "epoch": 0.99, "learning_rate": 3.348077070659378e-05, "loss": 1.9347, "step": 254000 }, { "epoch": 0.99, "learning_rate": 3.344825253869337e-05, "loss": 1.9325, "step": 254500 }, { "epoch": 1.0, "learning_rate": 3.341573437079296e-05, "loss": 1.9314, "step": 255000 }, { "epoch": 1.0, "learning_rate": 3.338321620289256e-05, "loss": 1.9199, "step": 255500 }, { "epoch": 1.0, "learning_rate": 3.335069803499215e-05, "loss": 1.905, "step": 256000 }, { "epoch": 1.0, "learning_rate": 3.331817986709175e-05, "loss": 1.9004, "step": 256500 }, { "epoch": 1.0, "learning_rate": 3.328566169919134e-05, "loss": 1.8919, "step": 257000 }, { "epoch": 1.0, "learning_rate": 3.325314353129093e-05, "loss": 1.8801, "step": 257500 }, { "epoch": 1.01, "learning_rate": 3.322062536339053e-05, "loss": 1.8772, "step": 258000 }, { "epoch": 1.01, "learning_rate": 3.318810719549012e-05, "loss": 1.8774, "step": 258500 }, { "epoch": 1.01, "learning_rate": 3.3155589027589714e-05, "loss": 1.8752, "step": 259000 }, { "epoch": 1.01, "learning_rate": 3.3123070859689306e-05, "loss": 1.852, "step": 259500 }, { "epoch": 1.01, "learning_rate": 3.3090552691788905e-05, "loss": 1.8492, "step": 260000 }, { "epoch": 1.02, "learning_rate": 3.3058034523888496e-05, "loss": 1.8562, "step": 260500 }, { "epoch": 1.02, "learning_rate": 3.302551635598809e-05, "loss": 1.843, "step": 261000 }, { "epoch": 1.02, "learning_rate": 3.2992998188087686e-05, "loss": 1.8321, "step": 261500 }, { "epoch": 1.02, "learning_rate": 3.296048002018728e-05, "loss": 1.8233, "step": 262000 }, { "epoch": 1.02, "learning_rate": 3.2927961852286876e-05, "loss": 1.8265, "step": 262500 }, { "epoch": 1.03, "learning_rate": 3.289544368438647e-05, "loss": 1.818, "step": 263000 }, { "epoch": 1.03, "learning_rate": 3.2862925516486066e-05, "loss": 1.8186, "step": 263500 }, { "epoch": 1.03, "learning_rate": 3.283040734858566e-05, "loss": 1.7974, "step": 264000 }, { "epoch": 1.03, "learning_rate": 3.279788918068525e-05, "loss": 1.7867, "step": 264500 }, { "epoch": 1.03, "learning_rate": 3.276537101278485e-05, "loss": 1.7938, "step": 265000 }, { "epoch": 1.04, "learning_rate": 3.273285284488444e-05, "loss": 1.7877, "step": 265500 }, { "epoch": 1.04, "learning_rate": 3.270033467698403e-05, "loss": 1.7923, "step": 266000 }, { "epoch": 1.04, "learning_rate": 3.266781650908362e-05, "loss": 1.7794, "step": 266500 }, { "epoch": 1.04, "learning_rate": 3.263529834118322e-05, "loss": 1.7809, "step": 267000 }, { "epoch": 1.04, "learning_rate": 3.260278017328281e-05, "loss": 1.7654, "step": 267500 }, { "epoch": 1.05, "learning_rate": 3.2570262005382405e-05, "loss": 1.7569, "step": 268000 }, { "epoch": 1.05, "learning_rate": 3.2537743837482e-05, "loss": 1.7653, "step": 268500 }, { "epoch": 1.05, "learning_rate": 3.2505225669581595e-05, "loss": 1.7554, "step": 269000 }, { "epoch": 1.05, "learning_rate": 3.247270750168119e-05, "loss": 1.7461, "step": 269500 }, { "epoch": 1.05, "learning_rate": 3.2440189333780785e-05, "loss": 1.7386, "step": 270000 }, { "epoch": 1.06, "learning_rate": 3.2407671165880376e-05, "loss": 1.738, "step": 270500 }, { "epoch": 1.06, "learning_rate": 3.2375152997979975e-05, "loss": 1.7343, "step": 271000 }, { "epoch": 1.06, "learning_rate": 3.2342634830079567e-05, "loss": 1.7334, "step": 271500 }, { "epoch": 1.06, "learning_rate": 3.2310116662179165e-05, "loss": 1.7163, "step": 272000 }, { "epoch": 1.06, "learning_rate": 3.2277598494278757e-05, "loss": 1.7176, "step": 272500 }, { "epoch": 1.07, "learning_rate": 3.2245080326378355e-05, "loss": 1.7203, "step": 273000 }, { "epoch": 1.07, "learning_rate": 3.221256215847795e-05, "loss": 1.7172, "step": 273500 }, { "epoch": 1.07, "learning_rate": 3.218004399057754e-05, "loss": 1.703, "step": 274000 }, { "epoch": 1.07, "learning_rate": 3.214752582267713e-05, "loss": 1.6993, "step": 274500 }, { "epoch": 1.07, "learning_rate": 3.211500765477672e-05, "loss": 1.6958, "step": 275000 }, { "epoch": 1.08, "learning_rate": 3.208248948687632e-05, "loss": 1.6965, "step": 275500 }, { "epoch": 1.08, "learning_rate": 3.204997131897591e-05, "loss": 1.6939, "step": 276000 }, { "epoch": 1.08, "learning_rate": 3.20174531510755e-05, "loss": 1.6792, "step": 276500 }, { "epoch": 1.08, "learning_rate": 3.19849349831751e-05, "loss": 1.6956, "step": 277000 }, { "epoch": 1.08, "learning_rate": 3.195241681527469e-05, "loss": 1.6888, "step": 277500 }, { "epoch": 1.08, "learning_rate": 3.191989864737429e-05, "loss": 1.6803, "step": 278000 }, { "epoch": 1.09, "learning_rate": 3.1887380479473883e-05, "loss": 1.6753, "step": 278500 }, { "epoch": 1.09, "learning_rate": 3.185486231157348e-05, "loss": 1.6731, "step": 279000 }, { "epoch": 1.09, "learning_rate": 3.1822344143673074e-05, "loss": 1.6584, "step": 279500 }, { "epoch": 1.09, "learning_rate": 3.1789825975772665e-05, "loss": 1.6632, "step": 280000 }, { "epoch": 1.09, "learning_rate": 3.1757307807872264e-05, "loss": 1.6526, "step": 280500 }, { "epoch": 1.1, "learning_rate": 3.1724789639971855e-05, "loss": 1.6596, "step": 281000 }, { "epoch": 1.1, "learning_rate": 3.169227147207145e-05, "loss": 1.6466, "step": 281500 }, { "epoch": 1.1, "learning_rate": 3.165975330417104e-05, "loss": 1.6503, "step": 282000 }, { "epoch": 1.1, "learning_rate": 3.162723513627063e-05, "loss": 1.6462, "step": 282500 }, { "epoch": 1.1, "learning_rate": 3.159471696837023e-05, "loss": 1.6459, "step": 283000 }, { "epoch": 1.11, "learning_rate": 3.156219880046982e-05, "loss": 1.6391, "step": 283500 }, { "epoch": 1.11, "learning_rate": 3.152968063256942e-05, "loss": 1.6301, "step": 284000 }, { "epoch": 1.11, "learning_rate": 3.149716246466901e-05, "loss": 1.627, "step": 284500 }, { "epoch": 1.11, "learning_rate": 3.146464429676861e-05, "loss": 1.6331, "step": 285000 }, { "epoch": 1.11, "learning_rate": 3.14321261288682e-05, "loss": 1.6254, "step": 285500 }, { "epoch": 1.12, "learning_rate": 3.139960796096779e-05, "loss": 1.6201, "step": 286000 }, { "epoch": 1.12, "learning_rate": 3.136708979306739e-05, "loss": 1.6213, "step": 286500 }, { "epoch": 1.12, "learning_rate": 3.133457162516698e-05, "loss": 1.6158, "step": 287000 }, { "epoch": 1.12, "learning_rate": 3.130205345726658e-05, "loss": 1.6142, "step": 287500 }, { "epoch": 1.12, "learning_rate": 3.126953528936617e-05, "loss": 1.6048, "step": 288000 }, { "epoch": 1.13, "learning_rate": 3.1237017121465764e-05, "loss": 1.6069, "step": 288500 }, { "epoch": 1.13, "learning_rate": 3.1204498953565355e-05, "loss": 1.601, "step": 289000 }, { "epoch": 1.13, "learning_rate": 3.117198078566495e-05, "loss": 1.6069, "step": 289500 }, { "epoch": 1.13, "learning_rate": 3.1139462617764545e-05, "loss": 1.5992, "step": 290000 }, { "epoch": 1.13, "learning_rate": 3.110694444986414e-05, "loss": 1.6011, "step": 290500 }, { "epoch": 1.14, "learning_rate": 3.1074426281963736e-05, "loss": 1.5995, "step": 291000 }, { "epoch": 1.14, "learning_rate": 3.104190811406333e-05, "loss": 1.5854, "step": 291500 }, { "epoch": 1.14, "learning_rate": 3.100938994616292e-05, "loss": 1.5855, "step": 292000 }, { "epoch": 1.14, "learning_rate": 3.097687177826252e-05, "loss": 1.58, "step": 292500 }, { "epoch": 1.14, "learning_rate": 3.094435361036211e-05, "loss": 1.5733, "step": 293000 }, { "epoch": 1.15, "learning_rate": 3.091183544246171e-05, "loss": 1.5734, "step": 293500 }, { "epoch": 1.15, "learning_rate": 3.08793172745613e-05, "loss": 1.5749, "step": 294000 }, { "epoch": 1.15, "learning_rate": 3.08467991066609e-05, "loss": 1.5793, "step": 294500 }, { "epoch": 1.15, "learning_rate": 3.081428093876049e-05, "loss": 1.57, "step": 295000 }, { "epoch": 1.15, "learning_rate": 3.078176277086008e-05, "loss": 1.559, "step": 295500 }, { "epoch": 1.16, "learning_rate": 3.074924460295968e-05, "loss": 1.5605, "step": 296000 }, { "epoch": 1.16, "learning_rate": 3.071672643505927e-05, "loss": 1.5528, "step": 296500 }, { "epoch": 1.16, "learning_rate": 3.068420826715886e-05, "loss": 1.5572, "step": 297000 }, { "epoch": 1.16, "learning_rate": 3.0651690099258454e-05, "loss": 1.5568, "step": 297500 }, { "epoch": 1.16, "learning_rate": 3.061917193135805e-05, "loss": 1.556, "step": 298000 }, { "epoch": 1.16, "learning_rate": 3.0586653763457644e-05, "loss": 1.5599, "step": 298500 }, { "epoch": 1.17, "learning_rate": 3.0554135595557236e-05, "loss": 1.5531, "step": 299000 }, { "epoch": 1.17, "learning_rate": 3.0521617427656834e-05, "loss": 1.5557, "step": 299500 }, { "epoch": 1.17, "learning_rate": 3.0489099259756426e-05, "loss": 1.5438, "step": 300000 }, { "epoch": 1.17, "learning_rate": 3.0456581091856024e-05, "loss": 1.5526, "step": 300500 }, { "epoch": 1.17, "learning_rate": 3.0424062923955616e-05, "loss": 1.5352, "step": 301000 }, { "epoch": 1.18, "learning_rate": 3.0391544756055208e-05, "loss": 1.5359, "step": 301500 }, { "epoch": 1.18, "learning_rate": 3.0359026588154803e-05, "loss": 1.5354, "step": 302000 }, { "epoch": 1.18, "learning_rate": 3.0326508420254394e-05, "loss": 1.5295, "step": 302500 }, { "epoch": 1.18, "learning_rate": 3.0293990252353993e-05, "loss": 1.5296, "step": 303000 }, { "epoch": 1.18, "learning_rate": 3.0261472084453584e-05, "loss": 1.5247, "step": 303500 }, { "epoch": 1.19, "learning_rate": 3.0228953916553183e-05, "loss": 1.5268, "step": 304000 }, { "epoch": 1.19, "learning_rate": 3.0196435748652774e-05, "loss": 1.5214, "step": 304500 }, { "epoch": 1.19, "learning_rate": 3.0163917580752366e-05, "loss": 1.5246, "step": 305000 }, { "epoch": 1.19, "learning_rate": 3.0131399412851964e-05, "loss": 1.5274, "step": 305500 }, { "epoch": 1.19, "learning_rate": 3.0098881244951556e-05, "loss": 1.5078, "step": 306000 }, { "epoch": 1.2, "learning_rate": 3.006636307705115e-05, "loss": 1.5175, "step": 306500 }, { "epoch": 1.2, "learning_rate": 3.0033844909150743e-05, "loss": 1.5166, "step": 307000 }, { "epoch": 1.2, "learning_rate": 3.000132674125034e-05, "loss": 1.5126, "step": 307500 }, { "epoch": 1.2, "learning_rate": 2.9968808573349933e-05, "loss": 1.5188, "step": 308000 }, { "epoch": 1.2, "learning_rate": 2.9936290405449524e-05, "loss": 1.5015, "step": 308500 }, { "epoch": 1.21, "learning_rate": 2.9903772237549123e-05, "loss": 1.505, "step": 309000 }, { "epoch": 1.21, "learning_rate": 2.9871254069648715e-05, "loss": 1.4986, "step": 309500 }, { "epoch": 1.21, "learning_rate": 2.983873590174831e-05, "loss": 1.4994, "step": 310000 }, { "epoch": 1.21, "learning_rate": 2.98062177338479e-05, "loss": 1.4982, "step": 310500 }, { "epoch": 1.21, "learning_rate": 2.9773699565947493e-05, "loss": 1.4874, "step": 311000 }, { "epoch": 1.22, "learning_rate": 2.974118139804709e-05, "loss": 1.4915, "step": 311500 }, { "epoch": 1.22, "learning_rate": 2.9708663230146683e-05, "loss": 1.4996, "step": 312000 }, { "epoch": 1.22, "learning_rate": 2.967614506224628e-05, "loss": 1.4948, "step": 312500 }, { "epoch": 1.22, "learning_rate": 2.9643626894345873e-05, "loss": 1.4842, "step": 313000 }, { "epoch": 1.22, "learning_rate": 2.9611108726445468e-05, "loss": 1.4798, "step": 313500 }, { "epoch": 1.23, "learning_rate": 2.957859055854506e-05, "loss": 1.491, "step": 314000 }, { "epoch": 1.23, "learning_rate": 2.954607239064465e-05, "loss": 1.4821, "step": 314500 }, { "epoch": 1.23, "learning_rate": 2.951355422274425e-05, "loss": 1.4841, "step": 315000 }, { "epoch": 1.23, "learning_rate": 2.948103605484384e-05, "loss": 1.4746, "step": 315500 }, { "epoch": 1.23, "learning_rate": 2.944851788694344e-05, "loss": 1.4767, "step": 316000 }, { "epoch": 1.24, "learning_rate": 2.941599971904303e-05, "loss": 1.4775, "step": 316500 }, { "epoch": 1.24, "learning_rate": 2.9383481551142623e-05, "loss": 1.4777, "step": 317000 }, { "epoch": 1.24, "learning_rate": 2.9350963383242218e-05, "loss": 1.4718, "step": 317500 }, { "epoch": 1.24, "learning_rate": 2.931844521534181e-05, "loss": 1.4654, "step": 318000 }, { "epoch": 1.24, "learning_rate": 2.9285927047441408e-05, "loss": 1.4667, "step": 318500 }, { "epoch": 1.24, "learning_rate": 2.9253408879541e-05, "loss": 1.4718, "step": 319000 }, { "epoch": 1.25, "learning_rate": 2.9220890711640598e-05, "loss": 1.4644, "step": 319500 }, { "epoch": 1.25, "learning_rate": 2.918837254374019e-05, "loss": 1.4597, "step": 320000 }, { "epoch": 1.25, "learning_rate": 2.915585437583978e-05, "loss": 1.4532, "step": 320500 }, { "epoch": 1.25, "learning_rate": 2.9123336207939377e-05, "loss": 1.4635, "step": 321000 }, { "epoch": 1.25, "learning_rate": 2.9090818040038968e-05, "loss": 1.4501, "step": 321500 }, { "epoch": 1.26, "learning_rate": 2.9058299872138567e-05, "loss": 1.4521, "step": 322000 }, { "epoch": 1.26, "learning_rate": 2.9025781704238158e-05, "loss": 1.4588, "step": 322500 }, { "epoch": 1.26, "learning_rate": 2.8993263536337757e-05, "loss": 1.4562, "step": 323000 }, { "epoch": 1.26, "learning_rate": 2.896074536843735e-05, "loss": 1.4555, "step": 323500 }, { "epoch": 1.26, "learning_rate": 2.892822720053694e-05, "loss": 1.4483, "step": 324000 }, { "epoch": 1.27, "learning_rate": 2.8895709032636535e-05, "loss": 1.4548, "step": 324500 }, { "epoch": 1.27, "learning_rate": 2.8863190864736127e-05, "loss": 1.4491, "step": 325000 }, { "epoch": 1.27, "learning_rate": 2.8830672696835725e-05, "loss": 1.4398, "step": 325500 }, { "epoch": 1.27, "learning_rate": 2.8798154528935317e-05, "loss": 1.4409, "step": 326000 }, { "epoch": 1.27, "learning_rate": 2.876563636103491e-05, "loss": 1.4472, "step": 326500 }, { "epoch": 1.28, "learning_rate": 2.8733118193134507e-05, "loss": 1.4316, "step": 327000 }, { "epoch": 1.28, "learning_rate": 2.87006000252341e-05, "loss": 1.4255, "step": 327500 }, { "epoch": 1.28, "learning_rate": 2.8668081857333693e-05, "loss": 1.4308, "step": 328000 }, { "epoch": 1.28, "learning_rate": 2.863556368943329e-05, "loss": 1.4355, "step": 328500 }, { "epoch": 1.28, "learning_rate": 2.8603045521532884e-05, "loss": 1.4294, "step": 329000 }, { "epoch": 1.29, "learning_rate": 2.8570527353632475e-05, "loss": 1.4267, "step": 329500 }, { "epoch": 1.29, "learning_rate": 2.8538009185732067e-05, "loss": 1.4333, "step": 330000 }, { "epoch": 1.29, "learning_rate": 2.8505491017831665e-05, "loss": 1.4254, "step": 330500 }, { "epoch": 1.29, "learning_rate": 2.8472972849931257e-05, "loss": 1.4281, "step": 331000 }, { "epoch": 1.29, "learning_rate": 2.8440454682030855e-05, "loss": 1.426, "step": 331500 }, { "epoch": 1.3, "learning_rate": 2.8407936514130447e-05, "loss": 1.4294, "step": 332000 }, { "epoch": 1.3, "learning_rate": 2.8375418346230042e-05, "loss": 1.4208, "step": 332500 }, { "epoch": 1.3, "learning_rate": 2.8342900178329634e-05, "loss": 1.421, "step": 333000 }, { "epoch": 1.3, "learning_rate": 2.8310382010429225e-05, "loss": 1.4226, "step": 333500 }, { "epoch": 1.3, "learning_rate": 2.8277863842528824e-05, "loss": 1.417, "step": 334000 }, { "epoch": 1.31, "learning_rate": 2.8245345674628415e-05, "loss": 1.4166, "step": 334500 }, { "epoch": 1.31, "learning_rate": 2.8212827506728014e-05, "loss": 1.4207, "step": 335000 }, { "epoch": 1.31, "learning_rate": 2.8180309338827605e-05, "loss": 1.408, "step": 335500 }, { "epoch": 1.31, "learning_rate": 2.8147791170927197e-05, "loss": 1.412, "step": 336000 }, { "epoch": 1.31, "learning_rate": 2.8115273003026792e-05, "loss": 1.412, "step": 336500 }, { "epoch": 1.32, "learning_rate": 2.8082754835126384e-05, "loss": 1.4137, "step": 337000 }, { "epoch": 1.32, "learning_rate": 2.8050236667225982e-05, "loss": 1.4032, "step": 337500 }, { "epoch": 1.32, "learning_rate": 2.8017718499325574e-05, "loss": 1.4022, "step": 338000 }, { "epoch": 1.32, "learning_rate": 2.7985200331425172e-05, "loss": 1.4038, "step": 338500 }, { "epoch": 1.32, "learning_rate": 2.7952682163524764e-05, "loss": 1.3994, "step": 339000 }, { "epoch": 1.32, "learning_rate": 2.7920163995624355e-05, "loss": 1.3996, "step": 339500 }, { "epoch": 1.33, "learning_rate": 2.788764582772395e-05, "loss": 1.3937, "step": 340000 }, { "epoch": 1.33, "learning_rate": 2.7855127659823542e-05, "loss": 1.3994, "step": 340500 }, { "epoch": 1.33, "learning_rate": 2.782260949192314e-05, "loss": 1.4008, "step": 341000 }, { "epoch": 1.33, "learning_rate": 2.7790091324022732e-05, "loss": 1.399, "step": 341500 }, { "epoch": 1.33, "learning_rate": 2.775757315612233e-05, "loss": 1.398, "step": 342000 }, { "epoch": 1.34, "learning_rate": 2.7725054988221922e-05, "loss": 1.3928, "step": 342500 }, { "epoch": 1.34, "learning_rate": 2.7692536820321514e-05, "loss": 1.3855, "step": 343000 }, { "epoch": 1.34, "learning_rate": 2.766001865242111e-05, "loss": 1.394, "step": 343500 }, { "epoch": 1.34, "learning_rate": 2.76275004845207e-05, "loss": 1.3863, "step": 344000 }, { "epoch": 1.34, "learning_rate": 2.75949823166203e-05, "loss": 1.3915, "step": 344500 }, { "epoch": 1.35, "learning_rate": 2.756246414871989e-05, "loss": 1.392, "step": 345000 }, { "epoch": 1.35, "learning_rate": 2.7529945980819482e-05, "loss": 1.389, "step": 345500 }, { "epoch": 1.35, "learning_rate": 2.749742781291908e-05, "loss": 1.3836, "step": 346000 }, { "epoch": 1.35, "learning_rate": 2.7464909645018672e-05, "loss": 1.3852, "step": 346500 }, { "epoch": 1.35, "learning_rate": 2.7432391477118267e-05, "loss": 1.3736, "step": 347000 }, { "epoch": 1.36, "learning_rate": 2.739987330921786e-05, "loss": 1.3818, "step": 347500 }, { "epoch": 1.36, "learning_rate": 2.7367355141317457e-05, "loss": 1.3816, "step": 348000 }, { "epoch": 1.36, "learning_rate": 2.733483697341705e-05, "loss": 1.3766, "step": 348500 }, { "epoch": 1.36, "learning_rate": 2.730231880551664e-05, "loss": 1.3835, "step": 349000 }, { "epoch": 1.36, "learning_rate": 2.726980063761624e-05, "loss": 1.3843, "step": 349500 }, { "epoch": 1.37, "learning_rate": 2.723728246971583e-05, "loss": 1.3723, "step": 350000 }, { "epoch": 1.37, "learning_rate": 2.7204764301815426e-05, "loss": 1.3802, "step": 350500 }, { "epoch": 1.37, "learning_rate": 2.7172246133915018e-05, "loss": 1.3747, "step": 351000 }, { "epoch": 1.37, "learning_rate": 2.7139727966014616e-05, "loss": 1.3662, "step": 351500 }, { "epoch": 1.37, "learning_rate": 2.7107209798114208e-05, "loss": 1.3712, "step": 352000 }, { "epoch": 1.38, "learning_rate": 2.70746916302138e-05, "loss": 1.3805, "step": 352500 }, { "epoch": 1.38, "learning_rate": 2.7042173462313398e-05, "loss": 1.3647, "step": 353000 }, { "epoch": 1.38, "learning_rate": 2.700965529441299e-05, "loss": 1.3731, "step": 353500 }, { "epoch": 1.38, "learning_rate": 2.6977137126512588e-05, "loss": 1.3663, "step": 354000 }, { "epoch": 1.38, "learning_rate": 2.694461895861218e-05, "loss": 1.3559, "step": 354500 }, { "epoch": 1.39, "learning_rate": 2.691210079071177e-05, "loss": 1.3642, "step": 355000 }, { "epoch": 1.39, "learning_rate": 2.6879582622811366e-05, "loss": 1.3679, "step": 355500 }, { "epoch": 1.39, "learning_rate": 2.6847064454910958e-05, "loss": 1.3576, "step": 356000 }, { "epoch": 1.39, "learning_rate": 2.6814546287010556e-05, "loss": 1.3536, "step": 356500 }, { "epoch": 1.39, "learning_rate": 2.6782028119110148e-05, "loss": 1.3635, "step": 357000 }, { "epoch": 1.4, "learning_rate": 2.6749509951209746e-05, "loss": 1.3622, "step": 357500 }, { "epoch": 1.4, "learning_rate": 2.6716991783309338e-05, "loss": 1.3594, "step": 358000 }, { "epoch": 1.4, "learning_rate": 2.668447361540893e-05, "loss": 1.3559, "step": 358500 }, { "epoch": 1.4, "learning_rate": 2.6651955447508525e-05, "loss": 1.351, "step": 359000 }, { "epoch": 1.4, "learning_rate": 2.6619437279608116e-05, "loss": 1.3582, "step": 359500 }, { "epoch": 1.4, "learning_rate": 2.6586919111707715e-05, "loss": 1.3567, "step": 360000 }, { "epoch": 1.41, "learning_rate": 2.6554400943807306e-05, "loss": 1.3417, "step": 360500 }, { "epoch": 1.41, "learning_rate": 2.6521882775906898e-05, "loss": 1.3467, "step": 361000 }, { "epoch": 1.41, "learning_rate": 2.6489364608006496e-05, "loss": 1.3623, "step": 361500 }, { "epoch": 1.41, "learning_rate": 2.6456846440106088e-05, "loss": 1.3495, "step": 362000 }, { "epoch": 1.41, "learning_rate": 2.6424328272205683e-05, "loss": 1.3514, "step": 362500 }, { "epoch": 1.42, "learning_rate": 2.6391810104305275e-05, "loss": 1.342, "step": 363000 }, { "epoch": 1.42, "learning_rate": 2.6359291936404873e-05, "loss": 1.3363, "step": 363500 }, { "epoch": 1.42, "learning_rate": 2.6326773768504465e-05, "loss": 1.3467, "step": 364000 }, { "epoch": 1.42, "learning_rate": 2.6294255600604056e-05, "loss": 1.345, "step": 364500 }, { "epoch": 1.42, "learning_rate": 2.6261737432703655e-05, "loss": 1.3486, "step": 365000 }, { "epoch": 1.43, "learning_rate": 2.6229219264803246e-05, "loss": 1.3505, "step": 365500 }, { "epoch": 1.43, "learning_rate": 2.619670109690284e-05, "loss": 1.3397, "step": 366000 }, { "epoch": 1.43, "learning_rate": 2.6164182929002433e-05, "loss": 1.3381, "step": 366500 }, { "epoch": 1.43, "learning_rate": 2.613166476110203e-05, "loss": 1.3367, "step": 367000 }, { "epoch": 1.43, "learning_rate": 2.6099146593201623e-05, "loss": 1.3375, "step": 367500 }, { "epoch": 1.44, "learning_rate": 2.6066628425301215e-05, "loss": 1.3405, "step": 368000 }, { "epoch": 1.44, "learning_rate": 2.6034110257400813e-05, "loss": 1.3384, "step": 368500 }, { "epoch": 1.44, "learning_rate": 2.6001592089500405e-05, "loss": 1.3357, "step": 369000 }, { "epoch": 1.44, "learning_rate": 2.59690739216e-05, "loss": 1.3349, "step": 369500 }, { "epoch": 1.44, "learning_rate": 2.593655575369959e-05, "loss": 1.3386, "step": 370000 }, { "epoch": 1.45, "learning_rate": 2.5904037585799183e-05, "loss": 1.3269, "step": 370500 }, { "epoch": 1.45, "learning_rate": 2.587151941789878e-05, "loss": 1.3368, "step": 371000 }, { "epoch": 1.45, "learning_rate": 2.5839001249998373e-05, "loss": 1.3378, "step": 371500 }, { "epoch": 1.45, "learning_rate": 2.580648308209797e-05, "loss": 1.3308, "step": 372000 }, { "epoch": 1.45, "learning_rate": 2.5773964914197563e-05, "loss": 1.3222, "step": 372500 }, { "epoch": 1.46, "learning_rate": 2.574144674629716e-05, "loss": 1.3313, "step": 373000 }, { "epoch": 1.46, "learning_rate": 2.570892857839675e-05, "loss": 1.3274, "step": 373500 }, { "epoch": 1.46, "learning_rate": 2.567641041049634e-05, "loss": 1.3282, "step": 374000 }, { "epoch": 1.46, "learning_rate": 2.564389224259594e-05, "loss": 1.3198, "step": 374500 }, { "epoch": 1.46, "learning_rate": 2.561137407469553e-05, "loss": 1.319, "step": 375000 }, { "epoch": 1.47, "learning_rate": 2.557885590679513e-05, "loss": 1.3279, "step": 375500 }, { "epoch": 1.47, "learning_rate": 2.5546337738894722e-05, "loss": 1.3206, "step": 376000 }, { "epoch": 1.47, "learning_rate": 2.5513819570994317e-05, "loss": 1.3237, "step": 376500 }, { "epoch": 1.47, "learning_rate": 2.5481301403093912e-05, "loss": 1.317, "step": 377000 }, { "epoch": 1.47, "learning_rate": 2.5448783235193503e-05, "loss": 1.3168, "step": 377500 }, { "epoch": 1.48, "learning_rate": 2.54162650672931e-05, "loss": 1.3219, "step": 378000 }, { "epoch": 1.48, "learning_rate": 2.538374689939269e-05, "loss": 1.316, "step": 378500 }, { "epoch": 1.48, "learning_rate": 2.535122873149229e-05, "loss": 1.3099, "step": 379000 }, { "epoch": 1.48, "learning_rate": 2.531871056359188e-05, "loss": 1.3184, "step": 379500 }, { "epoch": 1.48, "learning_rate": 2.5286192395691472e-05, "loss": 1.32, "step": 380000 }, { "epoch": 1.48, "learning_rate": 2.525367422779107e-05, "loss": 1.3118, "step": 380500 }, { "epoch": 1.49, "learning_rate": 2.5221156059890662e-05, "loss": 1.301, "step": 381000 }, { "epoch": 1.49, "learning_rate": 2.5188637891990257e-05, "loss": 1.3119, "step": 381500 }, { "epoch": 1.49, "learning_rate": 2.515611972408985e-05, "loss": 1.3115, "step": 382000 }, { "epoch": 1.49, "learning_rate": 2.5123601556189447e-05, "loss": 1.3155, "step": 382500 }, { "epoch": 1.49, "learning_rate": 2.509108338828904e-05, "loss": 1.318, "step": 383000 }, { "epoch": 1.5, "learning_rate": 2.505856522038863e-05, "loss": 1.3038, "step": 383500 }, { "epoch": 1.5, "learning_rate": 2.502604705248823e-05, "loss": 1.3007, "step": 384000 }, { "epoch": 1.5, "learning_rate": 2.499352888458782e-05, "loss": 1.3096, "step": 384500 }, { "epoch": 1.5, "learning_rate": 2.4961010716687412e-05, "loss": 1.2961, "step": 385000 }, { "epoch": 1.5, "learning_rate": 2.4928492548787007e-05, "loss": 1.3118, "step": 385500 }, { "epoch": 1.51, "learning_rate": 2.4895974380886602e-05, "loss": 1.3076, "step": 386000 }, { "epoch": 1.51, "learning_rate": 2.4863456212986197e-05, "loss": 1.3118, "step": 386500 }, { "epoch": 1.51, "learning_rate": 2.4830938045085792e-05, "loss": 1.2965, "step": 387000 }, { "epoch": 1.51, "learning_rate": 2.4798419877185387e-05, "loss": 1.302, "step": 387500 }, { "epoch": 1.51, "learning_rate": 2.476590170928498e-05, "loss": 1.3042, "step": 388000 }, { "epoch": 1.52, "learning_rate": 2.473338354138457e-05, "loss": 1.3032, "step": 388500 }, { "epoch": 1.52, "learning_rate": 2.4700865373484165e-05, "loss": 1.3031, "step": 389000 }, { "epoch": 1.52, "learning_rate": 2.466834720558376e-05, "loss": 1.2966, "step": 389500 }, { "epoch": 1.52, "learning_rate": 2.4635829037683356e-05, "loss": 1.3008, "step": 390000 }, { "epoch": 1.52, "learning_rate": 2.460331086978295e-05, "loss": 1.2896, "step": 390500 }, { "epoch": 1.53, "learning_rate": 2.4570792701882542e-05, "loss": 1.2999, "step": 391000 }, { "epoch": 1.53, "learning_rate": 2.4538274533982137e-05, "loss": 1.3017, "step": 391500 }, { "epoch": 1.53, "learning_rate": 2.4505756366081732e-05, "loss": 1.2969, "step": 392000 }, { "epoch": 1.53, "learning_rate": 2.4473238198181324e-05, "loss": 1.297, "step": 392500 }, { "epoch": 1.53, "learning_rate": 2.444072003028092e-05, "loss": 1.2959, "step": 393000 }, { "epoch": 1.54, "learning_rate": 2.4408201862380514e-05, "loss": 1.3034, "step": 393500 }, { "epoch": 1.54, "learning_rate": 2.4375683694480106e-05, "loss": 1.285, "step": 394000 }, { "epoch": 1.54, "learning_rate": 2.43431655265797e-05, "loss": 1.2913, "step": 394500 }, { "epoch": 1.54, "learning_rate": 2.4310647358679296e-05, "loss": 1.2806, "step": 395000 }, { "epoch": 1.54, "learning_rate": 2.427812919077889e-05, "loss": 1.2842, "step": 395500 }, { "epoch": 1.55, "learning_rate": 2.4245611022878482e-05, "loss": 1.2776, "step": 396000 }, { "epoch": 1.55, "learning_rate": 2.4213092854978077e-05, "loss": 1.2867, "step": 396500 }, { "epoch": 1.55, "learning_rate": 2.4180574687077672e-05, "loss": 1.2906, "step": 397000 }, { "epoch": 1.55, "learning_rate": 2.4148056519177264e-05, "loss": 1.2907, "step": 397500 }, { "epoch": 1.55, "learning_rate": 2.411553835127686e-05, "loss": 1.2786, "step": 398000 }, { "epoch": 1.56, "learning_rate": 2.4083020183376454e-05, "loss": 1.2943, "step": 398500 }, { "epoch": 1.56, "learning_rate": 2.405050201547605e-05, "loss": 1.2758, "step": 399000 }, { "epoch": 1.56, "learning_rate": 2.401798384757564e-05, "loss": 1.2839, "step": 399500 }, { "epoch": 1.56, "learning_rate": 2.3985465679675236e-05, "loss": 1.2819, "step": 400000 }, { "epoch": 1.56, "learning_rate": 2.3952947511774828e-05, "loss": 1.2791, "step": 400500 }, { "epoch": 1.56, "learning_rate": 2.3920429343874423e-05, "loss": 1.2753, "step": 401000 }, { "epoch": 1.57, "learning_rate": 2.3887911175974018e-05, "loss": 1.2791, "step": 401500 }, { "epoch": 1.57, "learning_rate": 2.3855393008073613e-05, "loss": 1.2691, "step": 402000 }, { "epoch": 1.57, "learning_rate": 2.3822874840173208e-05, "loss": 1.278, "step": 402500 }, { "epoch": 1.57, "learning_rate": 2.3790356672272803e-05, "loss": 1.2769, "step": 403000 }, { "epoch": 1.57, "learning_rate": 2.3757838504372394e-05, "loss": 1.282, "step": 403500 }, { "epoch": 1.58, "learning_rate": 2.3725320336471986e-05, "loss": 1.272, "step": 404000 }, { "epoch": 1.58, "learning_rate": 2.369280216857158e-05, "loss": 1.2694, "step": 404500 }, { "epoch": 1.58, "learning_rate": 2.3660284000671176e-05, "loss": 1.2681, "step": 405000 }, { "epoch": 1.58, "learning_rate": 2.362776583277077e-05, "loss": 1.2774, "step": 405500 }, { "epoch": 1.58, "learning_rate": 2.3595247664870366e-05, "loss": 1.2685, "step": 406000 }, { "epoch": 1.59, "learning_rate": 2.3562729496969958e-05, "loss": 1.2703, "step": 406500 }, { "epoch": 1.59, "learning_rate": 2.3530211329069553e-05, "loss": 1.277, "step": 407000 }, { "epoch": 1.59, "learning_rate": 2.3497693161169144e-05, "loss": 1.2666, "step": 407500 }, { "epoch": 1.59, "learning_rate": 2.346517499326874e-05, "loss": 1.2656, "step": 408000 }, { "epoch": 1.59, "learning_rate": 2.3432656825368335e-05, "loss": 1.2666, "step": 408500 }, { "epoch": 1.6, "learning_rate": 2.340013865746793e-05, "loss": 1.2639, "step": 409000 }, { "epoch": 1.6, "learning_rate": 2.3367620489567525e-05, "loss": 1.2686, "step": 409500 }, { "epoch": 1.6, "learning_rate": 2.3335102321667116e-05, "loss": 1.2689, "step": 410000 }, { "epoch": 1.6, "learning_rate": 2.330258415376671e-05, "loss": 1.2643, "step": 410500 }, { "epoch": 1.6, "learning_rate": 2.3270065985866303e-05, "loss": 1.2647, "step": 411000 }, { "epoch": 1.61, "learning_rate": 2.3237547817965898e-05, "loss": 1.2546, "step": 411500 }, { "epoch": 1.61, "learning_rate": 2.3205029650065493e-05, "loss": 1.2731, "step": 412000 }, { "epoch": 1.61, "learning_rate": 2.3172511482165088e-05, "loss": 1.2624, "step": 412500 }, { "epoch": 1.61, "learning_rate": 2.313999331426468e-05, "loss": 1.2578, "step": 413000 }, { "epoch": 1.61, "learning_rate": 2.3107475146364275e-05, "loss": 1.2609, "step": 413500 }, { "epoch": 1.62, "learning_rate": 2.307495697846387e-05, "loss": 1.2538, "step": 414000 }, { "epoch": 1.62, "learning_rate": 2.3042438810563465e-05, "loss": 1.258, "step": 414500 }, { "epoch": 1.62, "learning_rate": 2.3009920642663056e-05, "loss": 1.2518, "step": 415000 }, { "epoch": 1.62, "learning_rate": 2.297740247476265e-05, "loss": 1.2589, "step": 415500 }, { "epoch": 1.62, "learning_rate": 2.2944884306862243e-05, "loss": 1.2574, "step": 416000 }, { "epoch": 1.63, "learning_rate": 2.2912366138961838e-05, "loss": 1.2568, "step": 416500 }, { "epoch": 1.63, "learning_rate": 2.2879847971061433e-05, "loss": 1.2668, "step": 417000 }, { "epoch": 1.63, "learning_rate": 2.2847329803161028e-05, "loss": 1.2583, "step": 417500 }, { "epoch": 1.63, "learning_rate": 2.2814811635260623e-05, "loss": 1.2515, "step": 418000 }, { "epoch": 1.63, "learning_rate": 2.2782293467360215e-05, "loss": 1.2536, "step": 418500 }, { "epoch": 1.64, "learning_rate": 2.2749775299459806e-05, "loss": 1.2568, "step": 419000 }, { "epoch": 1.64, "learning_rate": 2.27172571315594e-05, "loss": 1.2572, "step": 419500 }, { "epoch": 1.64, "learning_rate": 2.2684738963658997e-05, "loss": 1.2556, "step": 420000 }, { "epoch": 1.64, "learning_rate": 2.265222079575859e-05, "loss": 1.2529, "step": 420500 }, { "epoch": 1.64, "learning_rate": 2.2619702627858187e-05, "loss": 1.2514, "step": 421000 }, { "epoch": 1.64, "learning_rate": 2.258718445995778e-05, "loss": 1.2515, "step": 421500 }, { "epoch": 1.65, "learning_rate": 2.2554666292057373e-05, "loss": 1.2482, "step": 422000 }, { "epoch": 1.65, "learning_rate": 2.2522148124156965e-05, "loss": 1.25, "step": 422500 }, { "epoch": 1.65, "learning_rate": 2.248962995625656e-05, "loss": 1.2518, "step": 423000 }, { "epoch": 1.65, "learning_rate": 2.2457111788356155e-05, "loss": 1.2528, "step": 423500 }, { "epoch": 1.65, "learning_rate": 2.242459362045575e-05, "loss": 1.2491, "step": 424000 }, { "epoch": 1.66, "learning_rate": 2.2392075452555345e-05, "loss": 1.2438, "step": 424500 }, { "epoch": 1.66, "learning_rate": 2.235955728465494e-05, "loss": 1.2541, "step": 425000 }, { "epoch": 1.66, "learning_rate": 2.2327039116754532e-05, "loss": 1.2482, "step": 425500 }, { "epoch": 1.66, "learning_rate": 2.2294520948854127e-05, "loss": 1.26, "step": 426000 }, { "epoch": 1.66, "learning_rate": 2.226200278095372e-05, "loss": 1.2491, "step": 426500 }, { "epoch": 1.67, "learning_rate": 2.2229484613053313e-05, "loss": 1.2455, "step": 427000 }, { "epoch": 1.67, "learning_rate": 2.219696644515291e-05, "loss": 1.2415, "step": 427500 }, { "epoch": 1.67, "learning_rate": 2.2164448277252504e-05, "loss": 1.2429, "step": 428000 }, { "epoch": 1.67, "learning_rate": 2.2131930109352095e-05, "loss": 1.2357, "step": 428500 }, { "epoch": 1.67, "learning_rate": 2.209941194145169e-05, "loss": 1.2391, "step": 429000 }, { "epoch": 1.68, "learning_rate": 2.2066893773551285e-05, "loss": 1.234, "step": 429500 }, { "epoch": 1.68, "learning_rate": 2.2034375605650877e-05, "loss": 1.2468, "step": 430000 }, { "epoch": 1.68, "learning_rate": 2.2001857437750472e-05, "loss": 1.2346, "step": 430500 }, { "epoch": 1.68, "learning_rate": 2.1969339269850067e-05, "loss": 1.2501, "step": 431000 }, { "epoch": 1.68, "learning_rate": 2.1936821101949662e-05, "loss": 1.2325, "step": 431500 }, { "epoch": 1.69, "learning_rate": 2.1904302934049254e-05, "loss": 1.2407, "step": 432000 }, { "epoch": 1.69, "learning_rate": 2.187178476614885e-05, "loss": 1.2381, "step": 432500 }, { "epoch": 1.69, "learning_rate": 2.1839266598248444e-05, "loss": 1.239, "step": 433000 }, { "epoch": 1.69, "learning_rate": 2.1806748430348035e-05, "loss": 1.2338, "step": 433500 }, { "epoch": 1.69, "learning_rate": 2.177423026244763e-05, "loss": 1.2336, "step": 434000 }, { "epoch": 1.7, "learning_rate": 2.1741712094547225e-05, "loss": 1.2403, "step": 434500 }, { "epoch": 1.7, "learning_rate": 2.1709193926646817e-05, "loss": 1.2359, "step": 435000 }, { "epoch": 1.7, "learning_rate": 2.1676675758746412e-05, "loss": 1.2263, "step": 435500 }, { "epoch": 1.7, "learning_rate": 2.1644157590846007e-05, "loss": 1.2363, "step": 436000 }, { "epoch": 1.7, "learning_rate": 2.1611639422945602e-05, "loss": 1.2334, "step": 436500 }, { "epoch": 1.71, "learning_rate": 2.1579121255045194e-05, "loss": 1.2317, "step": 437000 }, { "epoch": 1.71, "learning_rate": 2.154660308714479e-05, "loss": 1.2421, "step": 437500 }, { "epoch": 1.71, "learning_rate": 2.151408491924438e-05, "loss": 1.2329, "step": 438000 }, { "epoch": 1.71, "learning_rate": 2.1481566751343975e-05, "loss": 1.2237, "step": 438500 }, { "epoch": 1.71, "learning_rate": 2.144904858344357e-05, "loss": 1.2316, "step": 439000 }, { "epoch": 1.72, "learning_rate": 2.1416530415543166e-05, "loss": 1.2239, "step": 439500 }, { "epoch": 1.72, "learning_rate": 2.138401224764276e-05, "loss": 1.2344, "step": 440000 }, { "epoch": 1.72, "learning_rate": 2.1351494079742356e-05, "loss": 1.2243, "step": 440500 }, { "epoch": 1.72, "learning_rate": 2.1318975911841947e-05, "loss": 1.2329, "step": 441000 }, { "epoch": 1.72, "learning_rate": 2.128645774394154e-05, "loss": 1.2228, "step": 441500 }, { "epoch": 1.72, "learning_rate": 2.1253939576041134e-05, "loss": 1.2296, "step": 442000 }, { "epoch": 1.73, "learning_rate": 2.122142140814073e-05, "loss": 1.22, "step": 442500 }, { "epoch": 1.73, "learning_rate": 2.1188903240240324e-05, "loss": 1.2317, "step": 443000 }, { "epoch": 1.73, "learning_rate": 2.115638507233992e-05, "loss": 1.2299, "step": 443500 }, { "epoch": 1.73, "learning_rate": 2.1123866904439514e-05, "loss": 1.2361, "step": 444000 }, { "epoch": 1.73, "learning_rate": 2.1091348736539106e-05, "loss": 1.2282, "step": 444500 }, { "epoch": 1.74, "learning_rate": 2.1058830568638697e-05, "loss": 1.2263, "step": 445000 }, { "epoch": 1.74, "learning_rate": 2.1026312400738292e-05, "loss": 1.218, "step": 445500 }, { "epoch": 1.74, "learning_rate": 2.0993794232837887e-05, "loss": 1.228, "step": 446000 }, { "epoch": 1.74, "learning_rate": 2.0961276064937482e-05, "loss": 1.2248, "step": 446500 }, { "epoch": 1.74, "learning_rate": 2.0928757897037077e-05, "loss": 1.2275, "step": 447000 }, { "epoch": 1.75, "learning_rate": 2.089623972913667e-05, "loss": 1.2223, "step": 447500 }, { "epoch": 1.75, "learning_rate": 2.0863721561236264e-05, "loss": 1.2224, "step": 448000 }, { "epoch": 1.75, "learning_rate": 2.0831203393335856e-05, "loss": 1.2217, "step": 448500 }, { "epoch": 1.75, "learning_rate": 2.079868522543545e-05, "loss": 1.2208, "step": 449000 }, { "epoch": 1.75, "learning_rate": 2.0766167057535046e-05, "loss": 1.217, "step": 449500 }, { "epoch": 1.76, "learning_rate": 2.073364888963464e-05, "loss": 1.214, "step": 450000 }, { "epoch": 1.76, "learning_rate": 2.0701130721734233e-05, "loss": 1.2142, "step": 450500 }, { "epoch": 1.76, "learning_rate": 2.0668612553833828e-05, "loss": 1.2072, "step": 451000 }, { "epoch": 1.76, "learning_rate": 2.0636094385933423e-05, "loss": 1.2121, "step": 451500 }, { "epoch": 1.76, "learning_rate": 2.0603576218033018e-05, "loss": 1.2185, "step": 452000 }, { "epoch": 1.77, "learning_rate": 2.057105805013261e-05, "loss": 1.2129, "step": 452500 }, { "epoch": 1.77, "learning_rate": 2.0538539882232204e-05, "loss": 1.2091, "step": 453000 }, { "epoch": 1.77, "learning_rate": 2.05060217143318e-05, "loss": 1.2087, "step": 453500 }, { "epoch": 1.77, "learning_rate": 2.047350354643139e-05, "loss": 1.2247, "step": 454000 }, { "epoch": 1.77, "learning_rate": 2.0440985378530986e-05, "loss": 1.2131, "step": 454500 }, { "epoch": 1.78, "learning_rate": 2.040846721063058e-05, "loss": 1.2172, "step": 455000 }, { "epoch": 1.78, "learning_rate": 2.0375949042730176e-05, "loss": 1.2044, "step": 455500 }, { "epoch": 1.78, "learning_rate": 2.0343430874829768e-05, "loss": 1.2159, "step": 456000 }, { "epoch": 1.78, "learning_rate": 2.0310912706929363e-05, "loss": 1.2087, "step": 456500 }, { "epoch": 1.78, "learning_rate": 2.0278394539028954e-05, "loss": 1.2067, "step": 457000 }, { "epoch": 1.79, "learning_rate": 2.024587637112855e-05, "loss": 1.1992, "step": 457500 }, { "epoch": 1.79, "learning_rate": 2.0213358203228144e-05, "loss": 1.202, "step": 458000 }, { "epoch": 1.79, "learning_rate": 2.018084003532774e-05, "loss": 1.2078, "step": 458500 }, { "epoch": 1.79, "learning_rate": 2.0148321867427335e-05, "loss": 1.2087, "step": 459000 }, { "epoch": 1.79, "learning_rate": 2.0115803699526926e-05, "loss": 1.2119, "step": 459500 }, { "epoch": 1.8, "learning_rate": 2.0083285531626518e-05, "loss": 1.2056, "step": 460000 }, { "epoch": 1.8, "learning_rate": 2.0050767363726113e-05, "loss": 1.1988, "step": 460500 }, { "epoch": 1.8, "learning_rate": 2.0018249195825708e-05, "loss": 1.2137, "step": 461000 }, { "epoch": 1.8, "learning_rate": 1.9985731027925303e-05, "loss": 1.2125, "step": 461500 }, { "epoch": 1.8, "learning_rate": 1.9953212860024898e-05, "loss": 1.2043, "step": 462000 }, { "epoch": 1.8, "learning_rate": 1.9920694692124493e-05, "loss": 1.2074, "step": 462500 }, { "epoch": 1.81, "learning_rate": 1.9888176524224085e-05, "loss": 1.2036, "step": 463000 }, { "epoch": 1.81, "learning_rate": 1.985565835632368e-05, "loss": 1.2005, "step": 463500 }, { "epoch": 1.81, "learning_rate": 1.982314018842327e-05, "loss": 1.2057, "step": 464000 }, { "epoch": 1.81, "learning_rate": 1.9790622020522866e-05, "loss": 1.2043, "step": 464500 }, { "epoch": 1.81, "learning_rate": 1.975810385262246e-05, "loss": 1.2036, "step": 465000 }, { "epoch": 1.82, "learning_rate": 1.9725585684722056e-05, "loss": 1.2022, "step": 465500 }, { "epoch": 1.82, "learning_rate": 1.969306751682165e-05, "loss": 1.2003, "step": 466000 }, { "epoch": 1.82, "learning_rate": 1.9660549348921243e-05, "loss": 1.1992, "step": 466500 }, { "epoch": 1.82, "learning_rate": 1.9628031181020838e-05, "loss": 1.2011, "step": 467000 }, { "epoch": 1.82, "learning_rate": 1.959551301312043e-05, "loss": 1.1997, "step": 467500 }, { "epoch": 1.83, "learning_rate": 1.9562994845220025e-05, "loss": 1.2033, "step": 468000 }, { "epoch": 1.83, "learning_rate": 1.953047667731962e-05, "loss": 1.199, "step": 468500 }, { "epoch": 1.83, "learning_rate": 1.9497958509419215e-05, "loss": 1.1972, "step": 469000 }, { "epoch": 1.83, "learning_rate": 1.9465440341518807e-05, "loss": 1.1979, "step": 469500 }, { "epoch": 1.83, "learning_rate": 1.94329221736184e-05, "loss": 1.2027, "step": 470000 }, { "epoch": 1.84, "learning_rate": 1.9400404005717997e-05, "loss": 1.1959, "step": 470500 }, { "epoch": 1.84, "learning_rate": 1.9367885837817588e-05, "loss": 1.1962, "step": 471000 }, { "epoch": 1.84, "learning_rate": 1.9335367669917183e-05, "loss": 1.1978, "step": 471500 }, { "epoch": 1.84, "learning_rate": 1.930284950201678e-05, "loss": 1.2043, "step": 472000 }, { "epoch": 1.84, "learning_rate": 1.927033133411637e-05, "loss": 1.1901, "step": 472500 }, { "epoch": 1.85, "learning_rate": 1.9237813166215965e-05, "loss": 1.2003, "step": 473000 }, { "epoch": 1.85, "learning_rate": 1.920529499831556e-05, "loss": 1.1963, "step": 473500 }, { "epoch": 1.85, "learning_rate": 1.9172776830415155e-05, "loss": 1.197, "step": 474000 }, { "epoch": 1.85, "learning_rate": 1.914025866251475e-05, "loss": 1.1969, "step": 474500 }, { "epoch": 1.85, "learning_rate": 1.9107740494614342e-05, "loss": 1.1896, "step": 475000 }, { "epoch": 1.86, "learning_rate": 1.9075222326713933e-05, "loss": 1.1928, "step": 475500 }, { "epoch": 1.86, "learning_rate": 1.904270415881353e-05, "loss": 1.1887, "step": 476000 }, { "epoch": 1.86, "learning_rate": 1.9010185990913123e-05, "loss": 1.1895, "step": 476500 }, { "epoch": 1.86, "learning_rate": 1.897766782301272e-05, "loss": 1.1926, "step": 477000 }, { "epoch": 1.86, "learning_rate": 1.8945149655112314e-05, "loss": 1.1861, "step": 477500 }, { "epoch": 1.87, "learning_rate": 1.891263148721191e-05, "loss": 1.2007, "step": 478000 }, { "epoch": 1.87, "learning_rate": 1.88801133193115e-05, "loss": 1.1893, "step": 478500 }, { "epoch": 1.87, "learning_rate": 1.8847595151411092e-05, "loss": 1.1894, "step": 479000 }, { "epoch": 1.87, "learning_rate": 1.8815076983510687e-05, "loss": 1.1954, "step": 479500 }, { "epoch": 1.87, "learning_rate": 1.8782558815610282e-05, "loss": 1.186, "step": 480000 }, { "epoch": 1.87, "learning_rate": 1.8750040647709877e-05, "loss": 1.1876, "step": 480500 }, { "epoch": 1.88, "learning_rate": 1.8717522479809472e-05, "loss": 1.19, "step": 481000 }, { "epoch": 1.88, "learning_rate": 1.8685004311909067e-05, "loss": 1.1834, "step": 481500 }, { "epoch": 1.88, "learning_rate": 1.865248614400866e-05, "loss": 1.1836, "step": 482000 }, { "epoch": 1.88, "learning_rate": 1.861996797610825e-05, "loss": 1.1895, "step": 482500 }, { "epoch": 1.88, "learning_rate": 1.8587449808207845e-05, "loss": 1.1894, "step": 483000 }, { "epoch": 1.89, "learning_rate": 1.855493164030744e-05, "loss": 1.1848, "step": 483500 }, { "epoch": 1.89, "learning_rate": 1.8522413472407035e-05, "loss": 1.1855, "step": 484000 }, { "epoch": 1.89, "learning_rate": 1.848989530450663e-05, "loss": 1.1856, "step": 484500 }, { "epoch": 1.89, "learning_rate": 1.8457377136606222e-05, "loss": 1.1802, "step": 485000 }, { "epoch": 1.89, "learning_rate": 1.8424858968705817e-05, "loss": 1.1805, "step": 485500 }, { "epoch": 1.9, "learning_rate": 1.8392340800805412e-05, "loss": 1.1837, "step": 486000 }, { "epoch": 1.9, "learning_rate": 1.8359822632905004e-05, "loss": 1.1772, "step": 486500 }, { "epoch": 1.9, "learning_rate": 1.83273044650046e-05, "loss": 1.1849, "step": 487000 }, { "epoch": 1.9, "learning_rate": 1.8294786297104194e-05, "loss": 1.1802, "step": 487500 }, { "epoch": 1.9, "learning_rate": 1.826226812920379e-05, "loss": 1.1749, "step": 488000 }, { "epoch": 1.91, "learning_rate": 1.822974996130338e-05, "loss": 1.1786, "step": 488500 }, { "epoch": 1.91, "learning_rate": 1.8197231793402976e-05, "loss": 1.182, "step": 489000 }, { "epoch": 1.91, "learning_rate": 1.816471362550257e-05, "loss": 1.1811, "step": 489500 }, { "epoch": 1.91, "learning_rate": 1.8132195457602162e-05, "loss": 1.1782, "step": 490000 }, { "epoch": 1.91, "learning_rate": 1.8099677289701757e-05, "loss": 1.1785, "step": 490500 }, { "epoch": 1.92, "learning_rate": 1.8067159121801352e-05, "loss": 1.1802, "step": 491000 }, { "epoch": 1.92, "learning_rate": 1.8034640953900944e-05, "loss": 1.1783, "step": 491500 }, { "epoch": 1.92, "learning_rate": 1.800212278600054e-05, "loss": 1.1862, "step": 492000 }, { "epoch": 1.92, "learning_rate": 1.7969604618100134e-05, "loss": 1.1749, "step": 492500 }, { "epoch": 1.92, "learning_rate": 1.793708645019973e-05, "loss": 1.1816, "step": 493000 }, { "epoch": 1.93, "learning_rate": 1.790456828229932e-05, "loss": 1.175, "step": 493500 }, { "epoch": 1.93, "learning_rate": 1.7872050114398916e-05, "loss": 1.178, "step": 494000 }, { "epoch": 1.93, "learning_rate": 1.7839531946498507e-05, "loss": 1.1739, "step": 494500 }, { "epoch": 1.93, "learning_rate": 1.7807013778598102e-05, "loss": 1.1811, "step": 495000 }, { "epoch": 1.93, "learning_rate": 1.7774495610697697e-05, "loss": 1.1812, "step": 495500 }, { "epoch": 1.94, "learning_rate": 1.7741977442797292e-05, "loss": 1.1772, "step": 496000 }, { "epoch": 1.94, "learning_rate": 1.7709459274896887e-05, "loss": 1.1761, "step": 496500 }, { "epoch": 1.94, "learning_rate": 1.767694110699648e-05, "loss": 1.1731, "step": 497000 }, { "epoch": 1.94, "learning_rate": 1.764442293909607e-05, "loss": 1.1655, "step": 497500 }, { "epoch": 1.94, "learning_rate": 1.7611904771195666e-05, "loss": 1.1715, "step": 498000 }, { "epoch": 1.95, "learning_rate": 1.757938660329526e-05, "loss": 1.1777, "step": 498500 }, { "epoch": 1.95, "learning_rate": 1.7546868435394856e-05, "loss": 1.172, "step": 499000 }, { "epoch": 1.95, "learning_rate": 1.751435026749445e-05, "loss": 1.1716, "step": 499500 }, { "epoch": 1.95, "learning_rate": 1.7481832099594046e-05, "loss": 1.1733, "step": 500000 }, { "epoch": 1.95, "learning_rate": 1.744931393169364e-05, "loss": 1.1745, "step": 500500 }, { "epoch": 1.95, "learning_rate": 1.7416795763793233e-05, "loss": 1.1722, "step": 501000 }, { "epoch": 1.96, "learning_rate": 1.7384277595892824e-05, "loss": 1.1628, "step": 501500 }, { "epoch": 1.96, "learning_rate": 1.735175942799242e-05, "loss": 1.1659, "step": 502000 }, { "epoch": 1.96, "learning_rate": 1.7319241260092014e-05, "loss": 1.1742, "step": 502500 }, { "epoch": 1.96, "learning_rate": 1.728672309219161e-05, "loss": 1.1674, "step": 503000 }, { "epoch": 1.96, "learning_rate": 1.7254204924291204e-05, "loss": 1.1704, "step": 503500 }, { "epoch": 1.97, "learning_rate": 1.7221686756390796e-05, "loss": 1.164, "step": 504000 }, { "epoch": 1.97, "learning_rate": 1.718916858849039e-05, "loss": 1.1759, "step": 504500 }, { "epoch": 1.97, "learning_rate": 1.7156650420589983e-05, "loss": 1.1658, "step": 505000 }, { "epoch": 1.97, "learning_rate": 1.7124132252689578e-05, "loss": 1.164, "step": 505500 }, { "epoch": 1.97, "learning_rate": 1.7091614084789173e-05, "loss": 1.1667, "step": 506000 }, { "epoch": 1.98, "learning_rate": 1.7059095916888768e-05, "loss": 1.1729, "step": 506500 }, { "epoch": 1.98, "learning_rate": 1.702657774898836e-05, "loss": 1.1668, "step": 507000 }, { "epoch": 1.98, "learning_rate": 1.6994059581087954e-05, "loss": 1.1709, "step": 507500 }, { "epoch": 1.98, "learning_rate": 1.696154141318755e-05, "loss": 1.1647, "step": 508000 }, { "epoch": 1.98, "learning_rate": 1.692902324528714e-05, "loss": 1.1599, "step": 508500 }, { "epoch": 1.99, "learning_rate": 1.6896505077386736e-05, "loss": 1.1652, "step": 509000 }, { "epoch": 1.99, "learning_rate": 1.686398690948633e-05, "loss": 1.1587, "step": 509500 }, { "epoch": 1.99, "learning_rate": 1.6831468741585926e-05, "loss": 1.1691, "step": 510000 }, { "epoch": 1.99, "learning_rate": 1.6798950573685518e-05, "loss": 1.1643, "step": 510500 }, { "epoch": 1.99, "learning_rate": 1.6766432405785113e-05, "loss": 1.1606, "step": 511000 }, { "epoch": 2.0, "learning_rate": 1.6733914237884708e-05, "loss": 1.1571, "step": 511500 }, { "epoch": 2.0, "learning_rate": 1.6701396069984303e-05, "loss": 1.1628, "step": 512000 }, { "epoch": 2.0, "learning_rate": 1.6668877902083895e-05, "loss": 1.1619, "step": 512500 }, { "epoch": 2.0, "learning_rate": 1.663635973418349e-05, "loss": 1.1597, "step": 513000 }, { "epoch": 2.0, "learning_rate": 1.660384156628308e-05, "loss": 1.1605, "step": 513500 }, { "epoch": 2.01, "learning_rate": 1.6571323398382676e-05, "loss": 1.1593, "step": 514000 }, { "epoch": 2.01, "learning_rate": 1.653880523048227e-05, "loss": 1.167, "step": 514500 }, { "epoch": 2.01, "learning_rate": 1.6506287062581866e-05, "loss": 1.1579, "step": 515000 }, { "epoch": 2.01, "learning_rate": 1.647376889468146e-05, "loss": 1.1611, "step": 515500 }, { "epoch": 2.01, "learning_rate": 1.6441250726781053e-05, "loss": 1.1498, "step": 516000 }, { "epoch": 2.02, "learning_rate": 1.6408732558880645e-05, "loss": 1.1566, "step": 516500 }, { "epoch": 2.02, "learning_rate": 1.637621439098024e-05, "loss": 1.1596, "step": 517000 }, { "epoch": 2.02, "learning_rate": 1.6343696223079835e-05, "loss": 1.1514, "step": 517500 }, { "epoch": 2.02, "learning_rate": 1.631117805517943e-05, "loss": 1.1478, "step": 518000 }, { "epoch": 2.02, "learning_rate": 1.6278659887279025e-05, "loss": 1.1547, "step": 518500 }, { "epoch": 2.03, "learning_rate": 1.624614171937862e-05, "loss": 1.1621, "step": 519000 }, { "epoch": 2.03, "learning_rate": 1.621362355147821e-05, "loss": 1.1557, "step": 519500 }, { "epoch": 2.03, "learning_rate": 1.6181105383577803e-05, "loss": 1.1481, "step": 520000 }, { "epoch": 2.03, "learning_rate": 1.6148587215677398e-05, "loss": 1.1573, "step": 520500 }, { "epoch": 2.03, "learning_rate": 1.6116069047776993e-05, "loss": 1.1651, "step": 521000 }, { "epoch": 2.03, "learning_rate": 1.608355087987659e-05, "loss": 1.1524, "step": 521500 }, { "epoch": 2.04, "learning_rate": 1.6051032711976183e-05, "loss": 1.151, "step": 522000 }, { "epoch": 2.04, "learning_rate": 1.601851454407578e-05, "loss": 1.1594, "step": 522500 }, { "epoch": 2.04, "learning_rate": 1.598599637617537e-05, "loss": 1.1506, "step": 523000 }, { "epoch": 2.04, "learning_rate": 1.5953478208274965e-05, "loss": 1.1606, "step": 523500 }, { "epoch": 2.04, "learning_rate": 1.5920960040374557e-05, "loss": 1.1546, "step": 524000 }, { "epoch": 2.05, "learning_rate": 1.5888441872474152e-05, "loss": 1.1559, "step": 524500 }, { "epoch": 2.05, "learning_rate": 1.5855923704573747e-05, "loss": 1.1504, "step": 525000 }, { "epoch": 2.05, "learning_rate": 1.5823405536673342e-05, "loss": 1.1538, "step": 525500 }, { "epoch": 2.05, "learning_rate": 1.5790887368772933e-05, "loss": 1.1498, "step": 526000 }, { "epoch": 2.05, "learning_rate": 1.575836920087253e-05, "loss": 1.1576, "step": 526500 }, { "epoch": 2.06, "learning_rate": 1.5725851032972124e-05, "loss": 1.1574, "step": 527000 }, { "epoch": 2.06, "learning_rate": 1.5693332865071715e-05, "loss": 1.1434, "step": 527500 }, { "epoch": 2.06, "learning_rate": 1.566081469717131e-05, "loss": 1.1451, "step": 528000 }, { "epoch": 2.06, "learning_rate": 1.5628296529270905e-05, "loss": 1.1555, "step": 528500 }, { "epoch": 2.06, "learning_rate": 1.5595778361370497e-05, "loss": 1.1537, "step": 529000 }, { "epoch": 2.07, "learning_rate": 1.5563260193470092e-05, "loss": 1.153, "step": 529500 }, { "epoch": 2.07, "learning_rate": 1.5530742025569687e-05, "loss": 1.1526, "step": 530000 }, { "epoch": 2.07, "learning_rate": 1.5498223857669282e-05, "loss": 1.1589, "step": 530500 }, { "epoch": 2.07, "learning_rate": 1.5465705689768874e-05, "loss": 1.1471, "step": 531000 }, { "epoch": 2.07, "learning_rate": 1.543318752186847e-05, "loss": 1.1536, "step": 531500 }, { "epoch": 2.08, "learning_rate": 1.540066935396806e-05, "loss": 1.1498, "step": 532000 }, { "epoch": 2.08, "learning_rate": 1.5368151186067655e-05, "loss": 1.1515, "step": 532500 }, { "epoch": 2.08, "learning_rate": 1.533563301816725e-05, "loss": 1.1455, "step": 533000 }, { "epoch": 2.08, "learning_rate": 1.5303114850266845e-05, "loss": 1.1417, "step": 533500 }, { "epoch": 2.08, "learning_rate": 1.527059668236644e-05, "loss": 1.1472, "step": 534000 }, { "epoch": 2.09, "learning_rate": 1.5238078514466034e-05, "loss": 1.1419, "step": 534500 }, { "epoch": 2.09, "learning_rate": 1.5205560346565629e-05, "loss": 1.1446, "step": 535000 }, { "epoch": 2.09, "learning_rate": 1.517304217866522e-05, "loss": 1.1467, "step": 535500 }, { "epoch": 2.09, "learning_rate": 1.5140524010764814e-05, "loss": 1.1511, "step": 536000 }, { "epoch": 2.09, "learning_rate": 1.5108005842864409e-05, "loss": 1.1476, "step": 536500 }, { "epoch": 2.1, "learning_rate": 1.5075487674964004e-05, "loss": 1.1456, "step": 537000 }, { "epoch": 2.1, "learning_rate": 1.5042969507063597e-05, "loss": 1.146, "step": 537500 }, { "epoch": 2.1, "learning_rate": 1.5010451339163192e-05, "loss": 1.1491, "step": 538000 }, { "epoch": 2.1, "learning_rate": 1.4977933171262784e-05, "loss": 1.1476, "step": 538500 }, { "epoch": 2.1, "learning_rate": 1.4945415003362379e-05, "loss": 1.1413, "step": 539000 }, { "epoch": 2.11, "learning_rate": 1.4912896835461972e-05, "loss": 1.1439, "step": 539500 }, { "epoch": 2.11, "learning_rate": 1.4880378667561567e-05, "loss": 1.1463, "step": 540000 }, { "epoch": 2.11, "learning_rate": 1.4847860499661162e-05, "loss": 1.1464, "step": 540500 }, { "epoch": 2.11, "learning_rate": 1.4815342331760756e-05, "loss": 1.1368, "step": 541000 }, { "epoch": 2.11, "learning_rate": 1.4782824163860349e-05, "loss": 1.1439, "step": 541500 }, { "epoch": 2.11, "learning_rate": 1.4750305995959942e-05, "loss": 1.1426, "step": 542000 }, { "epoch": 2.12, "learning_rate": 1.4717787828059537e-05, "loss": 1.1357, "step": 542500 }, { "epoch": 2.12, "learning_rate": 1.4685269660159132e-05, "loss": 1.1489, "step": 543000 }, { "epoch": 2.12, "learning_rate": 1.4652751492258726e-05, "loss": 1.1383, "step": 543500 }, { "epoch": 2.12, "learning_rate": 1.462023332435832e-05, "loss": 1.1401, "step": 544000 }, { "epoch": 2.12, "learning_rate": 1.4587715156457916e-05, "loss": 1.1387, "step": 544500 }, { "epoch": 2.13, "learning_rate": 1.4555196988557507e-05, "loss": 1.1428, "step": 545000 }, { "epoch": 2.13, "learning_rate": 1.45226788206571e-05, "loss": 1.1404, "step": 545500 }, { "epoch": 2.13, "learning_rate": 1.4490160652756696e-05, "loss": 1.1366, "step": 546000 }, { "epoch": 2.13, "learning_rate": 1.445764248485629e-05, "loss": 1.1338, "step": 546500 }, { "epoch": 2.13, "learning_rate": 1.4425124316955884e-05, "loss": 1.1323, "step": 547000 }, { "epoch": 2.14, "learning_rate": 1.439260614905548e-05, "loss": 1.1413, "step": 547500 }, { "epoch": 2.14, "learning_rate": 1.4360087981155071e-05, "loss": 1.1357, "step": 548000 }, { "epoch": 2.14, "learning_rate": 1.4327569813254666e-05, "loss": 1.1352, "step": 548500 }, { "epoch": 2.14, "learning_rate": 1.429505164535426e-05, "loss": 1.1406, "step": 549000 }, { "epoch": 2.14, "learning_rate": 1.4262533477453854e-05, "loss": 1.1359, "step": 549500 }, { "epoch": 2.15, "learning_rate": 1.423001530955345e-05, "loss": 1.1319, "step": 550000 }, { "epoch": 2.15, "learning_rate": 1.4197497141653043e-05, "loss": 1.1335, "step": 550500 }, { "epoch": 2.15, "learning_rate": 1.4164978973752634e-05, "loss": 1.1293, "step": 551000 }, { "epoch": 2.15, "learning_rate": 1.413246080585223e-05, "loss": 1.1322, "step": 551500 }, { "epoch": 2.15, "learning_rate": 1.4099942637951824e-05, "loss": 1.135, "step": 552000 }, { "epoch": 2.16, "learning_rate": 1.4067424470051418e-05, "loss": 1.1328, "step": 552500 }, { "epoch": 2.16, "learning_rate": 1.4034906302151013e-05, "loss": 1.1359, "step": 553000 }, { "epoch": 2.16, "learning_rate": 1.4002388134250608e-05, "loss": 1.1387, "step": 553500 }, { "epoch": 2.16, "learning_rate": 1.39698699663502e-05, "loss": 1.1368, "step": 554000 }, { "epoch": 2.16, "learning_rate": 1.3937351798449794e-05, "loss": 1.1342, "step": 554500 }, { "epoch": 2.17, "learning_rate": 1.3904833630549388e-05, "loss": 1.1309, "step": 555000 }, { "epoch": 2.17, "learning_rate": 1.3872315462648983e-05, "loss": 1.1427, "step": 555500 }, { "epoch": 2.17, "learning_rate": 1.3839797294748578e-05, "loss": 1.1333, "step": 556000 }, { "epoch": 2.17, "learning_rate": 1.3807279126848171e-05, "loss": 1.1328, "step": 556500 }, { "epoch": 2.17, "learning_rate": 1.3774760958947766e-05, "loss": 1.1394, "step": 557000 }, { "epoch": 2.18, "learning_rate": 1.3742242791047358e-05, "loss": 1.1368, "step": 557500 }, { "epoch": 2.18, "learning_rate": 1.3709724623146953e-05, "loss": 1.1344, "step": 558000 }, { "epoch": 2.18, "learning_rate": 1.3677206455246546e-05, "loss": 1.1375, "step": 558500 }, { "epoch": 2.18, "learning_rate": 1.3644688287346141e-05, "loss": 1.1308, "step": 559000 }, { "epoch": 2.18, "learning_rate": 1.3612170119445736e-05, "loss": 1.1339, "step": 559500 }, { "epoch": 2.19, "learning_rate": 1.357965195154533e-05, "loss": 1.1252, "step": 560000 }, { "epoch": 2.19, "learning_rate": 1.3547133783644921e-05, "loss": 1.1296, "step": 560500 }, { "epoch": 2.19, "learning_rate": 1.3514615615744516e-05, "loss": 1.1386, "step": 561000 }, { "epoch": 2.19, "learning_rate": 1.3482097447844111e-05, "loss": 1.1379, "step": 561500 }, { "epoch": 2.19, "learning_rate": 1.3449579279943705e-05, "loss": 1.1262, "step": 562000 }, { "epoch": 2.19, "learning_rate": 1.34170611120433e-05, "loss": 1.1237, "step": 562500 }, { "epoch": 2.2, "learning_rate": 1.3384542944142895e-05, "loss": 1.1263, "step": 563000 }, { "epoch": 2.2, "learning_rate": 1.3352024776242486e-05, "loss": 1.1255, "step": 563500 }, { "epoch": 2.2, "learning_rate": 1.331950660834208e-05, "loss": 1.1362, "step": 564000 }, { "epoch": 2.2, "learning_rate": 1.3286988440441675e-05, "loss": 1.1202, "step": 564500 }, { "epoch": 2.2, "learning_rate": 1.325447027254127e-05, "loss": 1.1261, "step": 565000 }, { "epoch": 2.21, "learning_rate": 1.3221952104640865e-05, "loss": 1.128, "step": 565500 }, { "epoch": 2.21, "learning_rate": 1.3189433936740458e-05, "loss": 1.1336, "step": 566000 }, { "epoch": 2.21, "learning_rate": 1.3156915768840053e-05, "loss": 1.1263, "step": 566500 }, { "epoch": 2.21, "learning_rate": 1.3124397600939645e-05, "loss": 1.1338, "step": 567000 }, { "epoch": 2.21, "learning_rate": 1.309187943303924e-05, "loss": 1.1248, "step": 567500 }, { "epoch": 2.22, "learning_rate": 1.3059361265138833e-05, "loss": 1.1261, "step": 568000 }, { "epoch": 2.22, "learning_rate": 1.3026843097238428e-05, "loss": 1.1328, "step": 568500 }, { "epoch": 2.22, "learning_rate": 1.2994324929338023e-05, "loss": 1.1213, "step": 569000 }, { "epoch": 2.22, "learning_rate": 1.2961806761437617e-05, "loss": 1.1279, "step": 569500 }, { "epoch": 2.22, "learning_rate": 1.2929288593537208e-05, "loss": 1.1243, "step": 570000 }, { "epoch": 2.23, "learning_rate": 1.2896770425636803e-05, "loss": 1.1201, "step": 570500 }, { "epoch": 2.23, "learning_rate": 1.2864252257736398e-05, "loss": 1.1245, "step": 571000 }, { "epoch": 2.23, "learning_rate": 1.2831734089835992e-05, "loss": 1.1251, "step": 571500 }, { "epoch": 2.23, "learning_rate": 1.2799215921935587e-05, "loss": 1.1285, "step": 572000 }, { "epoch": 2.23, "learning_rate": 1.2766697754035182e-05, "loss": 1.1276, "step": 572500 }, { "epoch": 2.24, "learning_rate": 1.2734179586134773e-05, "loss": 1.1199, "step": 573000 }, { "epoch": 2.24, "learning_rate": 1.2701661418234367e-05, "loss": 1.1215, "step": 573500 }, { "epoch": 2.24, "learning_rate": 1.2669143250333962e-05, "loss": 1.1203, "step": 574000 }, { "epoch": 2.24, "learning_rate": 1.2636625082433557e-05, "loss": 1.1168, "step": 574500 }, { "epoch": 2.24, "learning_rate": 1.260410691453315e-05, "loss": 1.1223, "step": 575000 }, { "epoch": 2.25, "learning_rate": 1.2571588746632745e-05, "loss": 1.117, "step": 575500 }, { "epoch": 2.25, "learning_rate": 1.2539070578732337e-05, "loss": 1.1251, "step": 576000 }, { "epoch": 2.25, "learning_rate": 1.2506552410831932e-05, "loss": 1.1169, "step": 576500 }, { "epoch": 2.25, "learning_rate": 1.2474034242931527e-05, "loss": 1.1183, "step": 577000 }, { "epoch": 2.25, "learning_rate": 1.244151607503112e-05, "loss": 1.1251, "step": 577500 }, { "epoch": 2.26, "learning_rate": 1.2408997907130715e-05, "loss": 1.1188, "step": 578000 }, { "epoch": 2.26, "learning_rate": 1.2376479739230309e-05, "loss": 1.1198, "step": 578500 }, { "epoch": 2.26, "learning_rate": 1.2343961571329902e-05, "loss": 1.1304, "step": 579000 }, { "epoch": 2.26, "learning_rate": 1.2311443403429497e-05, "loss": 1.121, "step": 579500 }, { "epoch": 2.26, "learning_rate": 1.227892523552909e-05, "loss": 1.1215, "step": 580000 }, { "epoch": 2.27, "learning_rate": 1.2246407067628685e-05, "loss": 1.1213, "step": 580500 }, { "epoch": 2.27, "learning_rate": 1.2213888899728279e-05, "loss": 1.1229, "step": 581000 }, { "epoch": 2.27, "learning_rate": 1.2181370731827872e-05, "loss": 1.1167, "step": 581500 }, { "epoch": 2.27, "learning_rate": 1.2148852563927467e-05, "loss": 1.1308, "step": 582000 }, { "epoch": 2.27, "learning_rate": 1.211633439602706e-05, "loss": 1.1189, "step": 582500 }, { "epoch": 2.27, "learning_rate": 1.2083816228126654e-05, "loss": 1.1161, "step": 583000 }, { "epoch": 2.28, "learning_rate": 1.2051298060226249e-05, "loss": 1.1188, "step": 583500 }, { "epoch": 2.28, "learning_rate": 1.2018779892325844e-05, "loss": 1.1157, "step": 584000 }, { "epoch": 2.28, "learning_rate": 1.1986261724425437e-05, "loss": 1.1145, "step": 584500 }, { "epoch": 2.28, "learning_rate": 1.195374355652503e-05, "loss": 1.1179, "step": 585000 }, { "epoch": 2.28, "learning_rate": 1.1921225388624625e-05, "loss": 1.1155, "step": 585500 }, { "epoch": 2.29, "learning_rate": 1.188870722072422e-05, "loss": 1.1277, "step": 586000 }, { "epoch": 2.29, "learning_rate": 1.1856189052823812e-05, "loss": 1.1162, "step": 586500 }, { "epoch": 2.29, "learning_rate": 1.1823670884923407e-05, "loss": 1.122, "step": 587000 }, { "epoch": 2.29, "learning_rate": 1.1791152717023002e-05, "loss": 1.1188, "step": 587500 }, { "epoch": 2.29, "learning_rate": 1.1758634549122596e-05, "loss": 1.1187, "step": 588000 }, { "epoch": 2.3, "learning_rate": 1.1726116381222189e-05, "loss": 1.108, "step": 588500 }, { "epoch": 2.3, "learning_rate": 1.1693598213321784e-05, "loss": 1.1199, "step": 589000 }, { "epoch": 2.3, "learning_rate": 1.1661080045421377e-05, "loss": 1.1125, "step": 589500 }, { "epoch": 2.3, "learning_rate": 1.1628561877520972e-05, "loss": 1.117, "step": 590000 }, { "epoch": 2.3, "learning_rate": 1.1596043709620566e-05, "loss": 1.1159, "step": 590500 }, { "epoch": 2.31, "learning_rate": 1.1563525541720159e-05, "loss": 1.1172, "step": 591000 }, { "epoch": 2.31, "learning_rate": 1.1531007373819754e-05, "loss": 1.1136, "step": 591500 }, { "epoch": 2.31, "learning_rate": 1.1498489205919347e-05, "loss": 1.1146, "step": 592000 }, { "epoch": 2.31, "learning_rate": 1.146597103801894e-05, "loss": 1.111, "step": 592500 }, { "epoch": 2.31, "learning_rate": 1.1433452870118536e-05, "loss": 1.1129, "step": 593000 }, { "epoch": 2.32, "learning_rate": 1.140093470221813e-05, "loss": 1.1069, "step": 593500 }, { "epoch": 2.32, "learning_rate": 1.1368416534317722e-05, "loss": 1.1168, "step": 594000 }, { "epoch": 2.32, "learning_rate": 1.1335898366417317e-05, "loss": 1.1157, "step": 594500 }, { "epoch": 2.32, "learning_rate": 1.1303380198516912e-05, "loss": 1.1111, "step": 595000 }, { "epoch": 2.32, "learning_rate": 1.1270862030616506e-05, "loss": 1.1084, "step": 595500 }, { "epoch": 2.33, "learning_rate": 1.1238343862716099e-05, "loss": 1.1132, "step": 596000 }, { "epoch": 2.33, "learning_rate": 1.1205825694815694e-05, "loss": 1.1139, "step": 596500 }, { "epoch": 2.33, "learning_rate": 1.117330752691529e-05, "loss": 1.11, "step": 597000 }, { "epoch": 2.33, "learning_rate": 1.1140789359014883e-05, "loss": 1.1162, "step": 597500 }, { "epoch": 2.33, "learning_rate": 1.1108271191114476e-05, "loss": 1.11, "step": 598000 }, { "epoch": 2.34, "learning_rate": 1.1075753023214071e-05, "loss": 1.1106, "step": 598500 }, { "epoch": 2.34, "learning_rate": 1.1043234855313664e-05, "loss": 1.1054, "step": 599000 }, { "epoch": 2.34, "learning_rate": 1.1010716687413258e-05, "loss": 1.1091, "step": 599500 }, { "epoch": 2.34, "learning_rate": 1.0978198519512853e-05, "loss": 1.1067, "step": 600000 }, { "epoch": 2.34, "learning_rate": 1.0945680351612446e-05, "loss": 1.1033, "step": 600500 }, { "epoch": 2.35, "learning_rate": 1.0913162183712041e-05, "loss": 1.1133, "step": 601000 }, { "epoch": 2.35, "learning_rate": 1.0880644015811634e-05, "loss": 1.1101, "step": 601500 }, { "epoch": 2.35, "learning_rate": 1.0848125847911228e-05, "loss": 1.1105, "step": 602000 }, { "epoch": 2.35, "learning_rate": 1.0815607680010823e-05, "loss": 1.1083, "step": 602500 }, { "epoch": 2.35, "learning_rate": 1.0783089512110418e-05, "loss": 1.1077, "step": 603000 }, { "epoch": 2.35, "learning_rate": 1.075057134421001e-05, "loss": 1.1051, "step": 603500 }, { "epoch": 2.36, "learning_rate": 1.0718053176309604e-05, "loss": 1.1064, "step": 604000 }, { "epoch": 2.36, "learning_rate": 1.06855350084092e-05, "loss": 1.1054, "step": 604500 }, { "epoch": 2.36, "learning_rate": 1.0653016840508793e-05, "loss": 1.1107, "step": 605000 }, { "epoch": 2.36, "learning_rate": 1.0620498672608386e-05, "loss": 1.1031, "step": 605500 }, { "epoch": 2.36, "learning_rate": 1.0587980504707981e-05, "loss": 1.1097, "step": 606000 }, { "epoch": 2.37, "learning_rate": 1.0555462336807576e-05, "loss": 1.1074, "step": 606500 }, { "epoch": 2.37, "learning_rate": 1.0522944168907168e-05, "loss": 1.1144, "step": 607000 }, { "epoch": 2.37, "learning_rate": 1.0490426001006763e-05, "loss": 1.1106, "step": 607500 }, { "epoch": 2.37, "learning_rate": 1.0457907833106358e-05, "loss": 1.1091, "step": 608000 }, { "epoch": 2.37, "learning_rate": 1.0425389665205951e-05, "loss": 1.1057, "step": 608500 }, { "epoch": 2.38, "learning_rate": 1.0392871497305545e-05, "loss": 1.1066, "step": 609000 }, { "epoch": 2.38, "learning_rate": 1.036035332940514e-05, "loss": 1.108, "step": 609500 }, { "epoch": 2.38, "learning_rate": 1.0327835161504733e-05, "loss": 1.1104, "step": 610000 }, { "epoch": 2.38, "learning_rate": 1.0295316993604328e-05, "loss": 1.1158, "step": 610500 }, { "epoch": 2.38, "learning_rate": 1.0262798825703921e-05, "loss": 1.109, "step": 611000 }, { "epoch": 2.39, "learning_rate": 1.0230280657803515e-05, "loss": 1.1011, "step": 611500 }, { "epoch": 2.39, "learning_rate": 1.019776248990311e-05, "loss": 1.0989, "step": 612000 }, { "epoch": 2.39, "learning_rate": 1.0165244322002703e-05, "loss": 1.109, "step": 612500 }, { "epoch": 2.39, "learning_rate": 1.0132726154102296e-05, "loss": 1.0999, "step": 613000 }, { "epoch": 2.39, "learning_rate": 1.0100207986201891e-05, "loss": 1.1091, "step": 613500 }, { "epoch": 2.4, "learning_rate": 1.0067689818301486e-05, "loss": 1.1033, "step": 614000 }, { "epoch": 2.4, "learning_rate": 1.003517165040108e-05, "loss": 1.1087, "step": 614500 }, { "epoch": 2.4, "learning_rate": 1.0002653482500673e-05, "loss": 1.0964, "step": 615000 }, { "epoch": 2.4, "learning_rate": 9.970135314600268e-06, "loss": 1.0946, "step": 615500 }, { "epoch": 2.4, "learning_rate": 9.937617146699861e-06, "loss": 1.0994, "step": 616000 }, { "epoch": 2.41, "learning_rate": 9.905098978799455e-06, "loss": 1.1078, "step": 616500 }, { "epoch": 2.41, "learning_rate": 9.87258081089905e-06, "loss": 1.101, "step": 617000 }, { "epoch": 2.41, "learning_rate": 9.840062642998645e-06, "loss": 1.1085, "step": 617500 }, { "epoch": 2.41, "learning_rate": 9.807544475098238e-06, "loss": 1.1083, "step": 618000 }, { "epoch": 2.41, "learning_rate": 9.775026307197832e-06, "loss": 1.1069, "step": 618500 }, { "epoch": 2.42, "learning_rate": 9.742508139297427e-06, "loss": 1.1041, "step": 619000 }, { "epoch": 2.42, "learning_rate": 9.70998997139702e-06, "loss": 1.1056, "step": 619500 }, { "epoch": 2.42, "learning_rate": 9.677471803496615e-06, "loss": 1.1013, "step": 620000 }, { "epoch": 2.42, "learning_rate": 9.644953635596208e-06, "loss": 1.1007, "step": 620500 }, { "epoch": 2.42, "learning_rate": 9.612435467695802e-06, "loss": 1.1092, "step": 621000 }, { "epoch": 2.43, "learning_rate": 9.579917299795397e-06, "loss": 1.104, "step": 621500 }, { "epoch": 2.43, "learning_rate": 9.54739913189499e-06, "loss": 1.098, "step": 622000 }, { "epoch": 2.43, "learning_rate": 9.514880963994583e-06, "loss": 1.0999, "step": 622500 }, { "epoch": 2.43, "learning_rate": 9.482362796094178e-06, "loss": 1.0956, "step": 623000 }, { "epoch": 2.43, "learning_rate": 9.449844628193773e-06, "loss": 1.1052, "step": 623500 }, { "epoch": 2.43, "learning_rate": 9.417326460293365e-06, "loss": 1.1057, "step": 624000 }, { "epoch": 2.44, "learning_rate": 9.38480829239296e-06, "loss": 1.0962, "step": 624500 }, { "epoch": 2.44, "learning_rate": 9.352290124492555e-06, "loss": 1.103, "step": 625000 }, { "epoch": 2.44, "learning_rate": 9.319771956592148e-06, "loss": 1.1092, "step": 625500 }, { "epoch": 2.44, "learning_rate": 9.287253788691742e-06, "loss": 1.0881, "step": 626000 }, { "epoch": 2.44, "learning_rate": 9.254735620791337e-06, "loss": 1.0999, "step": 626500 }, { "epoch": 2.45, "learning_rate": 9.22221745289093e-06, "loss": 1.1054, "step": 627000 }, { "epoch": 2.45, "learning_rate": 9.189699284990525e-06, "loss": 1.0941, "step": 627500 }, { "epoch": 2.45, "learning_rate": 9.157181117090119e-06, "loss": 1.1027, "step": 628000 }, { "epoch": 2.45, "learning_rate": 9.124662949189712e-06, "loss": 1.0986, "step": 628500 }, { "epoch": 2.45, "learning_rate": 9.092144781289307e-06, "loss": 1.0928, "step": 629000 }, { "epoch": 2.46, "learning_rate": 9.0596266133889e-06, "loss": 1.0906, "step": 629500 }, { "epoch": 2.46, "learning_rate": 9.027108445488495e-06, "loss": 1.0925, "step": 630000 }, { "epoch": 2.46, "learning_rate": 8.994590277588089e-06, "loss": 1.0926, "step": 630500 }, { "epoch": 2.46, "learning_rate": 8.962072109687684e-06, "loss": 1.0927, "step": 631000 }, { "epoch": 2.46, "learning_rate": 8.929553941787277e-06, "loss": 1.1047, "step": 631500 }, { "epoch": 2.47, "learning_rate": 8.89703577388687e-06, "loss": 1.0932, "step": 632000 }, { "epoch": 2.47, "learning_rate": 8.864517605986465e-06, "loss": 1.099, "step": 632500 }, { "epoch": 2.47, "learning_rate": 8.83199943808606e-06, "loss": 1.0946, "step": 633000 }, { "epoch": 2.47, "learning_rate": 8.799481270185652e-06, "loss": 1.1017, "step": 633500 }, { "epoch": 2.47, "learning_rate": 8.766963102285247e-06, "loss": 1.1008, "step": 634000 }, { "epoch": 2.48, "learning_rate": 8.734444934384842e-06, "loss": 1.0916, "step": 634500 }, { "epoch": 2.48, "learning_rate": 8.701926766484435e-06, "loss": 1.0949, "step": 635000 }, { "epoch": 2.48, "learning_rate": 8.669408598584029e-06, "loss": 1.0958, "step": 635500 }, { "epoch": 2.48, "learning_rate": 8.636890430683624e-06, "loss": 1.1001, "step": 636000 }, { "epoch": 2.48, "learning_rate": 8.604372262783217e-06, "loss": 1.0931, "step": 636500 }, { "epoch": 2.49, "learning_rate": 8.57185409488281e-06, "loss": 1.0975, "step": 637000 }, { "epoch": 2.49, "learning_rate": 8.539335926982406e-06, "loss": 1.1003, "step": 637500 }, { "epoch": 2.49, "learning_rate": 8.506817759081999e-06, "loss": 1.0929, "step": 638000 }, { "epoch": 2.49, "learning_rate": 8.474299591181594e-06, "loss": 1.0987, "step": 638500 }, { "epoch": 2.49, "learning_rate": 8.441781423281187e-06, "loss": 1.0915, "step": 639000 }, { "epoch": 2.5, "learning_rate": 8.40926325538078e-06, "loss": 1.0907, "step": 639500 }, { "epoch": 2.5, "learning_rate": 8.376745087480376e-06, "loss": 1.0959, "step": 640000 }, { "epoch": 2.5, "learning_rate": 8.34422691957997e-06, "loss": 1.094, "step": 640500 }, { "epoch": 2.5, "learning_rate": 8.311708751679564e-06, "loss": 1.0932, "step": 641000 }, { "epoch": 2.5, "learning_rate": 8.279190583779157e-06, "loss": 1.0887, "step": 641500 }, { "epoch": 2.51, "learning_rate": 8.246672415878752e-06, "loss": 1.0996, "step": 642000 }, { "epoch": 2.51, "learning_rate": 8.214154247978346e-06, "loss": 1.0919, "step": 642500 }, { "epoch": 2.51, "learning_rate": 8.181636080077939e-06, "loss": 1.0899, "step": 643000 }, { "epoch": 2.51, "learning_rate": 8.149117912177534e-06, "loss": 1.0899, "step": 643500 }, { "epoch": 2.51, "learning_rate": 8.116599744277129e-06, "loss": 1.0939, "step": 644000 }, { "epoch": 2.51, "learning_rate": 8.084081576376722e-06, "loss": 1.0915, "step": 644500 }, { "epoch": 2.52, "learning_rate": 8.051563408476316e-06, "loss": 1.0901, "step": 645000 }, { "epoch": 2.52, "learning_rate": 8.01904524057591e-06, "loss": 1.0844, "step": 645500 }, { "epoch": 2.52, "learning_rate": 7.986527072675504e-06, "loss": 1.0808, "step": 646000 }, { "epoch": 2.52, "learning_rate": 7.954008904775098e-06, "loss": 1.0843, "step": 646500 }, { "epoch": 2.52, "learning_rate": 7.921490736874693e-06, "loss": 1.0931, "step": 647000 }, { "epoch": 2.53, "learning_rate": 7.888972568974286e-06, "loss": 1.0874, "step": 647500 }, { "epoch": 2.53, "learning_rate": 7.856454401073881e-06, "loss": 1.0893, "step": 648000 }, { "epoch": 2.53, "learning_rate": 7.823936233173474e-06, "loss": 1.0879, "step": 648500 }, { "epoch": 2.53, "learning_rate": 7.791418065273068e-06, "loss": 1.1044, "step": 649000 }, { "epoch": 2.53, "learning_rate": 7.758899897372663e-06, "loss": 1.0898, "step": 649500 }, { "epoch": 2.54, "learning_rate": 7.726381729472256e-06, "loss": 1.0876, "step": 650000 }, { "epoch": 2.54, "learning_rate": 7.69386356157185e-06, "loss": 1.0968, "step": 650500 }, { "epoch": 2.54, "learning_rate": 7.661345393671444e-06, "loss": 1.087, "step": 651000 }, { "epoch": 2.54, "learning_rate": 7.6288272257710385e-06, "loss": 1.0867, "step": 651500 }, { "epoch": 2.54, "learning_rate": 7.5963090578706336e-06, "loss": 1.0834, "step": 652000 }, { "epoch": 2.55, "learning_rate": 7.563790889970226e-06, "loss": 1.0882, "step": 652500 }, { "epoch": 2.55, "learning_rate": 7.531272722069821e-06, "loss": 1.0883, "step": 653000 }, { "epoch": 2.55, "learning_rate": 7.498754554169415e-06, "loss": 1.0811, "step": 653500 }, { "epoch": 2.55, "learning_rate": 7.466236386269009e-06, "loss": 1.0852, "step": 654000 }, { "epoch": 2.55, "learning_rate": 7.433718218368603e-06, "loss": 1.0924, "step": 654500 }, { "epoch": 2.56, "learning_rate": 7.401200050468198e-06, "loss": 1.0938, "step": 655000 }, { "epoch": 2.56, "learning_rate": 7.36868188256779e-06, "loss": 1.0888, "step": 655500 }, { "epoch": 2.56, "learning_rate": 7.336163714667385e-06, "loss": 1.0905, "step": 656000 }, { "epoch": 2.56, "learning_rate": 7.3036455467669795e-06, "loss": 1.0882, "step": 656500 }, { "epoch": 2.56, "learning_rate": 7.271127378866573e-06, "loss": 1.0871, "step": 657000 }, { "epoch": 2.57, "learning_rate": 7.238609210966167e-06, "loss": 1.0855, "step": 657500 }, { "epoch": 2.57, "learning_rate": 7.206091043065761e-06, "loss": 1.0888, "step": 658000 }, { "epoch": 2.57, "learning_rate": 7.173572875165355e-06, "loss": 1.0819, "step": 658500 }, { "epoch": 2.57, "learning_rate": 7.141054707264949e-06, "loss": 1.089, "step": 659000 }, { "epoch": 2.57, "learning_rate": 7.108536539364544e-06, "loss": 1.0899, "step": 659500 }, { "epoch": 2.58, "learning_rate": 7.076018371464137e-06, "loss": 1.0841, "step": 660000 }, { "epoch": 2.58, "learning_rate": 7.043500203563731e-06, "loss": 1.0852, "step": 660500 }, { "epoch": 2.58, "learning_rate": 7.0109820356633255e-06, "loss": 1.0779, "step": 661000 }, { "epoch": 2.58, "learning_rate": 6.978463867762919e-06, "loss": 1.084, "step": 661500 }, { "epoch": 2.58, "learning_rate": 6.945945699862513e-06, "loss": 1.0835, "step": 662000 }, { "epoch": 2.59, "learning_rate": 6.913427531962108e-06, "loss": 1.0893, "step": 662500 }, { "epoch": 2.59, "learning_rate": 6.880909364061702e-06, "loss": 1.0897, "step": 663000 }, { "epoch": 2.59, "learning_rate": 6.848391196161296e-06, "loss": 1.0888, "step": 663500 }, { "epoch": 2.59, "learning_rate": 6.81587302826089e-06, "loss": 1.0907, "step": 664000 }, { "epoch": 2.59, "learning_rate": 6.783354860360484e-06, "loss": 1.0827, "step": 664500 }, { "epoch": 2.59, "learning_rate": 6.750836692460077e-06, "loss": 1.0874, "step": 665000 }, { "epoch": 2.6, "learning_rate": 6.718318524559672e-06, "loss": 1.0859, "step": 665500 }, { "epoch": 2.6, "learning_rate": 6.6858003566592665e-06, "loss": 1.0768, "step": 666000 }, { "epoch": 2.6, "learning_rate": 6.65328218875886e-06, "loss": 1.084, "step": 666500 }, { "epoch": 2.6, "learning_rate": 6.620764020858454e-06, "loss": 1.0814, "step": 667000 }, { "epoch": 2.6, "learning_rate": 6.588245852958048e-06, "loss": 1.0858, "step": 667500 }, { "epoch": 2.61, "learning_rate": 6.5557276850576416e-06, "loss": 1.0886, "step": 668000 }, { "epoch": 2.61, "learning_rate": 6.523209517157236e-06, "loss": 1.077, "step": 668500 }, { "epoch": 2.61, "learning_rate": 6.490691349256831e-06, "loss": 1.0803, "step": 669000 }, { "epoch": 2.61, "learning_rate": 6.458173181356423e-06, "loss": 1.0757, "step": 669500 }, { "epoch": 2.61, "learning_rate": 6.425655013456018e-06, "loss": 1.0778, "step": 670000 }, { "epoch": 2.62, "learning_rate": 6.3931368455556125e-06, "loss": 1.0805, "step": 670500 }, { "epoch": 2.62, "learning_rate": 6.360618677655206e-06, "loss": 1.0889, "step": 671000 }, { "epoch": 2.62, "learning_rate": 6.3281005097548e-06, "loss": 1.0801, "step": 671500 }, { "epoch": 2.62, "learning_rate": 6.295582341854395e-06, "loss": 1.0801, "step": 672000 }, { "epoch": 2.62, "learning_rate": 6.2630641739539876e-06, "loss": 1.0843, "step": 672500 }, { "epoch": 2.63, "learning_rate": 6.230546006053583e-06, "loss": 1.0856, "step": 673000 }, { "epoch": 2.63, "learning_rate": 6.198027838153177e-06, "loss": 1.0809, "step": 673500 }, { "epoch": 2.63, "learning_rate": 6.16550967025277e-06, "loss": 1.0775, "step": 674000 }, { "epoch": 2.63, "learning_rate": 6.132991502352364e-06, "loss": 1.0819, "step": 674500 }, { "epoch": 2.63, "learning_rate": 6.1004733344519585e-06, "loss": 1.0753, "step": 675000 }, { "epoch": 2.64, "learning_rate": 6.067955166551553e-06, "loss": 1.0901, "step": 675500 }, { "epoch": 2.64, "learning_rate": 6.035436998651146e-06, "loss": 1.0796, "step": 676000 }, { "epoch": 2.64, "learning_rate": 6.002918830750741e-06, "loss": 1.0775, "step": 676500 }, { "epoch": 2.64, "learning_rate": 5.970400662850334e-06, "loss": 1.0789, "step": 677000 }, { "epoch": 2.64, "learning_rate": 5.937882494949929e-06, "loss": 1.0898, "step": 677500 }, { "epoch": 2.65, "learning_rate": 5.905364327049523e-06, "loss": 1.0753, "step": 678000 }, { "epoch": 2.65, "learning_rate": 5.872846159149117e-06, "loss": 1.0732, "step": 678500 }, { "epoch": 2.65, "learning_rate": 5.840327991248711e-06, "loss": 1.0776, "step": 679000 }, { "epoch": 2.65, "learning_rate": 5.807809823348305e-06, "loss": 1.0794, "step": 679500 }, { "epoch": 2.65, "learning_rate": 5.775291655447899e-06, "loss": 1.0765, "step": 680000 }, { "epoch": 2.66, "learning_rate": 5.742773487547493e-06, "loss": 1.0737, "step": 680500 }, { "epoch": 2.66, "learning_rate": 5.710255319647087e-06, "loss": 1.0817, "step": 681000 }, { "epoch": 2.66, "learning_rate": 5.677737151746681e-06, "loss": 1.0755, "step": 681500 }, { "epoch": 2.66, "learning_rate": 5.645218983846275e-06, "loss": 1.0853, "step": 682000 }, { "epoch": 2.66, "learning_rate": 5.612700815945869e-06, "loss": 1.0847, "step": 682500 }, { "epoch": 2.67, "learning_rate": 5.580182648045464e-06, "loss": 1.086, "step": 683000 }, { "epoch": 2.67, "learning_rate": 5.547664480145057e-06, "loss": 1.0752, "step": 683500 }, { "epoch": 2.67, "learning_rate": 5.515146312244651e-06, "loss": 1.0851, "step": 684000 }, { "epoch": 2.67, "learning_rate": 5.4826281443442455e-06, "loss": 1.0743, "step": 684500 }, { "epoch": 2.67, "learning_rate": 5.45010997644384e-06, "loss": 1.0807, "step": 685000 }, { "epoch": 2.67, "learning_rate": 5.417591808543433e-06, "loss": 1.0754, "step": 685500 }, { "epoch": 2.68, "learning_rate": 5.385073640643028e-06, "loss": 1.0702, "step": 686000 }, { "epoch": 2.68, "learning_rate": 5.352555472742621e-06, "loss": 1.0796, "step": 686500 }, { "epoch": 2.68, "learning_rate": 5.3200373048422156e-06, "loss": 1.0742, "step": 687000 }, { "epoch": 2.68, "learning_rate": 5.28751913694181e-06, "loss": 1.0724, "step": 687500 }, { "epoch": 2.68, "learning_rate": 5.255000969041404e-06, "loss": 1.0806, "step": 688000 }, { "epoch": 2.69, "learning_rate": 5.222482801140998e-06, "loss": 1.0784, "step": 688500 }, { "epoch": 2.69, "learning_rate": 5.1899646332405914e-06, "loss": 1.0779, "step": 689000 }, { "epoch": 2.69, "learning_rate": 5.157446465340186e-06, "loss": 1.0747, "step": 689500 }, { "epoch": 2.69, "learning_rate": 5.12492829743978e-06, "loss": 1.0713, "step": 690000 }, { "epoch": 2.69, "learning_rate": 5.092410129539374e-06, "loss": 1.0813, "step": 690500 }, { "epoch": 2.7, "learning_rate": 5.059891961638967e-06, "loss": 1.0787, "step": 691000 }, { "epoch": 2.7, "learning_rate": 5.027373793738562e-06, "loss": 1.0704, "step": 691500 }, { "epoch": 2.7, "learning_rate": 4.994855625838156e-06, "loss": 1.0789, "step": 692000 }, { "epoch": 2.7, "learning_rate": 4.96233745793775e-06, "loss": 1.0808, "step": 692500 }, { "epoch": 2.7, "learning_rate": 4.929819290037344e-06, "loss": 1.077, "step": 693000 }, { "epoch": 2.71, "learning_rate": 4.897301122136938e-06, "loss": 1.0727, "step": 693500 }, { "epoch": 2.71, "learning_rate": 4.8647829542365325e-06, "loss": 1.0761, "step": 694000 }, { "epoch": 2.71, "learning_rate": 4.832264786336127e-06, "loss": 1.072, "step": 694500 }, { "epoch": 2.71, "learning_rate": 4.79974661843572e-06, "loss": 1.077, "step": 695000 }, { "epoch": 2.71, "learning_rate": 4.767228450535314e-06, "loss": 1.0719, "step": 695500 }, { "epoch": 2.72, "learning_rate": 4.734710282634908e-06, "loss": 1.0732, "step": 696000 }, { "epoch": 2.72, "learning_rate": 4.7021921147345025e-06, "loss": 1.0697, "step": 696500 }, { "epoch": 2.72, "learning_rate": 4.669673946834097e-06, "loss": 1.0658, "step": 697000 }, { "epoch": 2.72, "learning_rate": 4.63715577893369e-06, "loss": 1.0835, "step": 697500 }, { "epoch": 2.72, "learning_rate": 4.604637611033284e-06, "loss": 1.0741, "step": 698000 }, { "epoch": 2.73, "learning_rate": 4.5721194431328784e-06, "loss": 1.0666, "step": 698500 }, { "epoch": 2.73, "learning_rate": 4.539601275232473e-06, "loss": 1.0721, "step": 699000 }, { "epoch": 2.73, "learning_rate": 4.507083107332067e-06, "loss": 1.0756, "step": 699500 }, { "epoch": 2.73, "learning_rate": 4.474564939431661e-06, "loss": 1.0732, "step": 700000 }, { "epoch": 2.73, "learning_rate": 4.442046771531254e-06, "loss": 1.0761, "step": 700500 }, { "epoch": 2.74, "learning_rate": 4.409528603630849e-06, "loss": 1.0742, "step": 701000 }, { "epoch": 2.74, "learning_rate": 4.377010435730443e-06, "loss": 1.0779, "step": 701500 }, { "epoch": 2.74, "learning_rate": 4.344492267830037e-06, "loss": 1.072, "step": 702000 }, { "epoch": 2.74, "learning_rate": 4.311974099929631e-06, "loss": 1.0762, "step": 702500 }, { "epoch": 2.74, "learning_rate": 4.279455932029225e-06, "loss": 1.0707, "step": 703000 }, { "epoch": 2.75, "learning_rate": 4.246937764128819e-06, "loss": 1.0772, "step": 703500 }, { "epoch": 2.75, "learning_rate": 4.214419596228413e-06, "loss": 1.0619, "step": 704000 }, { "epoch": 2.75, "learning_rate": 4.181901428328007e-06, "loss": 1.0868, "step": 704500 }, { "epoch": 2.75, "learning_rate": 4.149383260427601e-06, "loss": 1.0695, "step": 705000 }, { "epoch": 2.75, "learning_rate": 4.116865092527195e-06, "loss": 1.0613, "step": 705500 }, { "epoch": 2.75, "learning_rate": 4.084346924626789e-06, "loss": 1.0673, "step": 706000 }, { "epoch": 2.76, "learning_rate": 4.051828756726384e-06, "loss": 1.0672, "step": 706500 }, { "epoch": 2.76, "learning_rate": 4.019310588825977e-06, "loss": 1.067, "step": 707000 }, { "epoch": 2.76, "learning_rate": 3.986792420925571e-06, "loss": 1.0703, "step": 707500 }, { "epoch": 2.76, "learning_rate": 3.9542742530251654e-06, "loss": 1.0661, "step": 708000 }, { "epoch": 2.76, "learning_rate": 3.92175608512476e-06, "loss": 1.0697, "step": 708500 }, { "epoch": 2.77, "learning_rate": 3.889237917224353e-06, "loss": 1.072, "step": 709000 }, { "epoch": 2.77, "learning_rate": 3.856719749323948e-06, "loss": 1.0708, "step": 709500 }, { "epoch": 2.77, "learning_rate": 3.824201581423541e-06, "loss": 1.0715, "step": 710000 }, { "epoch": 2.77, "learning_rate": 3.791683413523135e-06, "loss": 1.0731, "step": 710500 }, { "epoch": 2.77, "learning_rate": 3.7591652456227297e-06, "loss": 1.0731, "step": 711000 }, { "epoch": 2.78, "learning_rate": 3.7266470777223235e-06, "loss": 1.0659, "step": 711500 }, { "epoch": 2.78, "learning_rate": 3.694128909821918e-06, "loss": 1.0744, "step": 712000 }, { "epoch": 2.78, "learning_rate": 3.661610741921512e-06, "loss": 1.0691, "step": 712500 }, { "epoch": 2.78, "learning_rate": 3.6290925740211056e-06, "loss": 1.078, "step": 713000 }, { "epoch": 2.78, "learning_rate": 3.5965744061207e-06, "loss": 1.0713, "step": 713500 }, { "epoch": 2.79, "learning_rate": 3.564056238220294e-06, "loss": 1.0709, "step": 714000 }, { "epoch": 2.79, "learning_rate": 3.5315380703198877e-06, "loss": 1.0647, "step": 714500 }, { "epoch": 2.79, "learning_rate": 3.499019902419482e-06, "loss": 1.0637, "step": 715000 }, { "epoch": 2.79, "learning_rate": 3.466501734519076e-06, "loss": 1.0696, "step": 715500 }, { "epoch": 2.79, "learning_rate": 3.43398356661867e-06, "loss": 1.0796, "step": 716000 }, { "epoch": 2.8, "learning_rate": 3.401465398718264e-06, "loss": 1.0665, "step": 716500 }, { "epoch": 2.8, "learning_rate": 3.368947230817858e-06, "loss": 1.0692, "step": 717000 }, { "epoch": 2.8, "learning_rate": 3.3364290629174524e-06, "loss": 1.0693, "step": 717500 }, { "epoch": 2.8, "learning_rate": 3.303910895017046e-06, "loss": 1.0715, "step": 718000 }, { "epoch": 2.8, "learning_rate": 3.27139272711664e-06, "loss": 1.0652, "step": 718500 }, { "epoch": 2.81, "learning_rate": 3.2388745592162346e-06, "loss": 1.0846, "step": 719000 }, { "epoch": 2.81, "learning_rate": 3.2063563913158283e-06, "loss": 1.0657, "step": 719500 }, { "epoch": 2.81, "learning_rate": 3.173838223415422e-06, "loss": 1.0693, "step": 720000 }, { "epoch": 2.81, "learning_rate": 3.1413200555150167e-06, "loss": 1.0736, "step": 720500 }, { "epoch": 2.81, "learning_rate": 3.1088018876146104e-06, "loss": 1.0636, "step": 721000 }, { "epoch": 2.82, "learning_rate": 3.0762837197142046e-06, "loss": 1.069, "step": 721500 }, { "epoch": 2.82, "learning_rate": 3.043765551813799e-06, "loss": 1.072, "step": 722000 }, { "epoch": 2.82, "learning_rate": 3.0112473839133926e-06, "loss": 1.0663, "step": 722500 }, { "epoch": 2.82, "learning_rate": 2.9787292160129868e-06, "loss": 1.072, "step": 723000 }, { "epoch": 2.82, "learning_rate": 2.9462110481125805e-06, "loss": 1.0611, "step": 723500 }, { "epoch": 2.83, "learning_rate": 2.9136928802121747e-06, "loss": 1.0741, "step": 724000 }, { "epoch": 2.83, "learning_rate": 2.8811747123117685e-06, "loss": 1.0712, "step": 724500 }, { "epoch": 2.83, "learning_rate": 2.8486565444113627e-06, "loss": 1.07, "step": 725000 }, { "epoch": 2.83, "learning_rate": 2.816138376510957e-06, "loss": 1.0655, "step": 725500 }, { "epoch": 2.83, "learning_rate": 2.7836202086105506e-06, "loss": 1.0663, "step": 726000 }, { "epoch": 2.83, "learning_rate": 2.751102040710145e-06, "loss": 1.0648, "step": 726500 }, { "epoch": 2.84, "learning_rate": 2.718583872809739e-06, "loss": 1.068, "step": 727000 }, { "epoch": 2.84, "learning_rate": 2.686065704909333e-06, "loss": 1.0661, "step": 727500 }, { "epoch": 2.84, "learning_rate": 2.653547537008927e-06, "loss": 1.0593, "step": 728000 }, { "epoch": 2.84, "learning_rate": 2.621029369108521e-06, "loss": 1.0652, "step": 728500 }, { "epoch": 2.84, "learning_rate": 2.5885112012081153e-06, "loss": 1.0744, "step": 729000 }, { "epoch": 2.85, "learning_rate": 2.5559930333077095e-06, "loss": 1.0704, "step": 729500 }, { "epoch": 2.85, "learning_rate": 2.5234748654073033e-06, "loss": 1.0706, "step": 730000 }, { "epoch": 2.85, "learning_rate": 2.490956697506897e-06, "loss": 1.0681, "step": 730500 }, { "epoch": 2.85, "learning_rate": 2.458438529606491e-06, "loss": 1.078, "step": 731000 }, { "epoch": 2.85, "learning_rate": 2.425920361706085e-06, "loss": 1.0632, "step": 731500 }, { "epoch": 2.86, "learning_rate": 2.393402193805679e-06, "loss": 1.0671, "step": 732000 }, { "epoch": 2.86, "learning_rate": 2.3608840259052733e-06, "loss": 1.0681, "step": 732500 }, { "epoch": 2.86, "learning_rate": 2.3283658580048675e-06, "loss": 1.0653, "step": 733000 }, { "epoch": 2.86, "learning_rate": 2.2958476901044613e-06, "loss": 1.0697, "step": 733500 }, { "epoch": 2.86, "learning_rate": 2.2633295222040555e-06, "loss": 1.0691, "step": 734000 }, { "epoch": 2.87, "learning_rate": 2.2308113543036497e-06, "loss": 1.0584, "step": 734500 }, { "epoch": 2.87, "learning_rate": 2.198293186403244e-06, "loss": 1.0656, "step": 735000 }, { "epoch": 2.87, "learning_rate": 2.1657750185028376e-06, "loss": 1.0707, "step": 735500 }, { "epoch": 2.87, "learning_rate": 2.133256850602432e-06, "loss": 1.062, "step": 736000 }, { "epoch": 2.87, "learning_rate": 2.100738682702026e-06, "loss": 1.0601, "step": 736500 }, { "epoch": 2.88, "learning_rate": 2.0682205148016197e-06, "loss": 1.0657, "step": 737000 }, { "epoch": 2.88, "learning_rate": 2.035702346901214e-06, "loss": 1.0618, "step": 737500 }, { "epoch": 2.88, "learning_rate": 2.0031841790008077e-06, "loss": 1.0607, "step": 738000 }, { "epoch": 2.88, "learning_rate": 1.970666011100402e-06, "loss": 1.0701, "step": 738500 }, { "epoch": 2.88, "learning_rate": 1.9381478431999956e-06, "loss": 1.0678, "step": 739000 }, { "epoch": 2.89, "learning_rate": 1.90562967529959e-06, "loss": 1.071, "step": 739500 }, { "epoch": 2.89, "learning_rate": 1.873111507399184e-06, "loss": 1.0649, "step": 740000 }, { "epoch": 2.89, "learning_rate": 1.8405933394987782e-06, "loss": 1.0644, "step": 740500 }, { "epoch": 2.89, "learning_rate": 1.808075171598372e-06, "loss": 1.0663, "step": 741000 }, { "epoch": 2.89, "learning_rate": 1.7755570036979661e-06, "loss": 1.0682, "step": 741500 }, { "epoch": 2.9, "learning_rate": 1.7430388357975603e-06, "loss": 1.0641, "step": 742000 }, { "epoch": 2.9, "learning_rate": 1.710520667897154e-06, "loss": 1.0597, "step": 742500 }, { "epoch": 2.9, "learning_rate": 1.6780024999967483e-06, "loss": 1.0585, "step": 743000 }, { "epoch": 2.9, "learning_rate": 1.6454843320963425e-06, "loss": 1.0633, "step": 743500 }, { "epoch": 2.9, "learning_rate": 1.6129661641959364e-06, "loss": 1.0659, "step": 744000 }, { "epoch": 2.91, "learning_rate": 1.5804479962955302e-06, "loss": 1.0535, "step": 744500 }, { "epoch": 2.91, "learning_rate": 1.5479298283951244e-06, "loss": 1.0733, "step": 745000 }, { "epoch": 2.91, "learning_rate": 1.5154116604947186e-06, "loss": 1.0663, "step": 745500 }, { "epoch": 2.91, "learning_rate": 1.4828934925943125e-06, "loss": 1.0696, "step": 746000 }, { "epoch": 2.91, "learning_rate": 1.4503753246939065e-06, "loss": 1.0688, "step": 746500 }, { "epoch": 2.91, "learning_rate": 1.4178571567935007e-06, "loss": 1.0593, "step": 747000 }, { "epoch": 2.92, "learning_rate": 1.3853389888930947e-06, "loss": 1.0683, "step": 747500 }, { "epoch": 2.92, "learning_rate": 1.3528208209926887e-06, "loss": 1.0631, "step": 748000 }, { "epoch": 2.92, "learning_rate": 1.3203026530922826e-06, "loss": 1.0726, "step": 748500 }, { "epoch": 2.92, "learning_rate": 1.2877844851918768e-06, "loss": 1.0675, "step": 749000 }, { "epoch": 2.92, "learning_rate": 1.2552663172914708e-06, "loss": 1.0617, "step": 749500 }, { "epoch": 2.93, "learning_rate": 1.222748149391065e-06, "loss": 1.0617, "step": 750000 }, { "epoch": 2.93, "learning_rate": 1.190229981490659e-06, "loss": 1.0654, "step": 750500 }, { "epoch": 2.93, "learning_rate": 1.157711813590253e-06, "loss": 1.0681, "step": 751000 }, { "epoch": 2.93, "learning_rate": 1.1251936456898469e-06, "loss": 1.0669, "step": 751500 }, { "epoch": 2.93, "learning_rate": 1.0926754777894409e-06, "loss": 1.0634, "step": 752000 }, { "epoch": 2.94, "learning_rate": 1.060157309889035e-06, "loss": 1.0596, "step": 752500 }, { "epoch": 2.94, "learning_rate": 1.027639141988629e-06, "loss": 1.0609, "step": 753000 }, { "epoch": 2.94, "learning_rate": 9.951209740882232e-07, "loss": 1.0641, "step": 753500 }, { "epoch": 2.94, "learning_rate": 9.626028061878172e-07, "loss": 1.0555, "step": 754000 }, { "epoch": 2.94, "learning_rate": 9.300846382874113e-07, "loss": 1.0574, "step": 754500 }, { "epoch": 2.95, "learning_rate": 8.975664703870052e-07, "loss": 1.0654, "step": 755000 }, { "epoch": 2.95, "learning_rate": 8.650483024865994e-07, "loss": 1.0592, "step": 755500 }, { "epoch": 2.95, "learning_rate": 8.325301345861933e-07, "loss": 1.057, "step": 756000 }, { "epoch": 2.95, "learning_rate": 8.000119666857873e-07, "loss": 1.0593, "step": 756500 }, { "epoch": 2.95, "learning_rate": 7.674937987853815e-07, "loss": 1.0655, "step": 757000 }, { "epoch": 2.96, "learning_rate": 7.349756308849755e-07, "loss": 1.071, "step": 757500 }, { "epoch": 2.96, "learning_rate": 7.024574629845695e-07, "loss": 1.0546, "step": 758000 }, { "epoch": 2.96, "learning_rate": 6.699392950841635e-07, "loss": 1.0573, "step": 758500 }, { "epoch": 2.96, "learning_rate": 6.374211271837576e-07, "loss": 1.0652, "step": 759000 }, { "epoch": 2.96, "learning_rate": 6.049029592833516e-07, "loss": 1.065, "step": 759500 }, { "epoch": 2.97, "learning_rate": 5.723847913829456e-07, "loss": 1.071, "step": 760000 }, { "epoch": 2.97, "learning_rate": 5.398666234825397e-07, "loss": 1.0628, "step": 760500 }, { "epoch": 2.97, "learning_rate": 5.073484555821338e-07, "loss": 1.0618, "step": 761000 }, { "epoch": 2.97, "learning_rate": 4.748302876817278e-07, "loss": 1.0604, "step": 761500 }, { "epoch": 2.97, "learning_rate": 4.423121197813219e-07, "loss": 1.0612, "step": 762000 }, { "epoch": 2.98, "learning_rate": 4.097939518809159e-07, "loss": 1.0581, "step": 762500 }, { "epoch": 2.98, "learning_rate": 3.7727578398050994e-07, "loss": 1.0603, "step": 763000 }, { "epoch": 2.98, "learning_rate": 3.4475761608010396e-07, "loss": 1.0605, "step": 763500 }, { "epoch": 2.98, "learning_rate": 3.1223944817969804e-07, "loss": 1.0618, "step": 764000 }, { "epoch": 2.98, "learning_rate": 2.7972128027929207e-07, "loss": 1.064, "step": 764500 }, { "epoch": 2.99, "learning_rate": 2.472031123788861e-07, "loss": 1.0619, "step": 765000 }, { "epoch": 2.99, "learning_rate": 2.1468494447848013e-07, "loss": 1.0642, "step": 765500 }, { "epoch": 2.99, "learning_rate": 1.8216677657807418e-07, "loss": 1.0553, "step": 766000 }, { "epoch": 2.99, "learning_rate": 1.4964860867766823e-07, "loss": 1.0646, "step": 766500 }, { "epoch": 2.99, "learning_rate": 1.1713044077726226e-07, "loss": 1.0582, "step": 767000 }, { "epoch": 2.99, "learning_rate": 8.46122728768563e-08, "loss": 1.057, "step": 767500 }, { "epoch": 3.0, "learning_rate": 5.209410497645035e-08, "loss": 1.0583, "step": 768000 }, { "epoch": 3.0, "learning_rate": 1.9575937076044387e-08, "loss": 1.0635, "step": 768500 }, { "epoch": 3.0, "step": 768801, "total_flos": 6.47787050209493e+18, "train_loss": 2.538804254183028, "train_runtime": 274559.0377, "train_samples_per_second": 89.604, "train_steps_per_second": 2.8 } ], "max_steps": 768801, "num_train_epochs": 3, "total_flos": 6.47787050209493e+18, "trial_name": null, "trial_params": null }