diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9247 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 768801, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9967481832099594e-05, + "loss": 6.7212, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.993496366419919e-05, + "loss": 6.2046, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9902445496298784e-05, + "loss": 6.1099, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.986992732839838e-05, + "loss": 6.0605, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9837409160497974e-05, + "loss": 6.0447, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.980489099259757e-05, + "loss": 6.0184, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9772372824697164e-05, + "loss": 5.9684, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9739854656796756e-05, + "loss": 5.9596, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.970733648889635e-05, + "loss": 5.9604, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.967481832099594e-05, + "loss": 5.9346, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 4.964230015309554e-05, + "loss": 5.9298, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 4.960978198519513e-05, + "loss": 5.9189, + "step": 6000 + }, + { + "epoch": 0.03, + "learning_rate": 4.957726381729472e-05, + "loss": 5.8765, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.954474564939432e-05, + "loss": 5.8812, + "step": 7000 + }, + { + "epoch": 0.03, + "learning_rate": 4.951222748149391e-05, + "loss": 5.8845, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 4.947970931359351e-05, + "loss": 5.867, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.94471911456931e-05, + "loss": 5.8744, + "step": 8500 + }, + { + "epoch": 0.04, + "learning_rate": 4.94146729777927e-05, + "loss": 5.8506, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 4.938215480989229e-05, + "loss": 5.8343, + "step": 9500 + }, + { + "epoch": 0.04, + "learning_rate": 4.934963664199188e-05, + "loss": 5.829, + "step": 10000 + }, + { + "epoch": 0.04, + "learning_rate": 4.931711847409148e-05, + "loss": 5.8303, + "step": 10500 + }, + { + "epoch": 0.04, + "learning_rate": 4.928460030619107e-05, + "loss": 5.8147, + "step": 11000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9252082138290664e-05, + "loss": 5.8124, + "step": 11500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9219563970390256e-05, + "loss": 5.8069, + "step": 12000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9187045802489855e-05, + "loss": 5.8026, + "step": 12500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9154527634589446e-05, + "loss": 5.8412, + "step": 13000 + }, + { + "epoch": 0.05, + "learning_rate": 4.912200946668904e-05, + "loss": 5.8021, + "step": 13500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9089491298788636e-05, + "loss": 5.7984, + "step": 14000 + }, + { + "epoch": 0.06, + "learning_rate": 4.905697313088823e-05, + "loss": 5.7936, + "step": 14500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9024454962987826e-05, + "loss": 5.7868, + "step": 15000 + }, + { + "epoch": 0.06, + "learning_rate": 4.899193679508742e-05, + "loss": 5.7637, + "step": 15500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895941862718701e-05, + "loss": 5.7895, + "step": 16000 + }, + { + "epoch": 0.06, + "learning_rate": 4.892690045928661e-05, + "loss": 5.7957, + "step": 16500 + }, + { + "epoch": 0.07, + "learning_rate": 4.88943822913862e-05, + "loss": 5.7764, + "step": 17000 + }, + { + "epoch": 0.07, + "learning_rate": 4.88618641234858e-05, + "loss": 5.7723, + "step": 17500 + }, + { + "epoch": 0.07, + "learning_rate": 4.882934595558539e-05, + "loss": 5.763, + "step": 18000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879682778768498e-05, + "loss": 5.7695, + "step": 18500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876430961978457e-05, + "loss": 5.768, + "step": 19000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8731791451884165e-05, + "loss": 5.7776, + "step": 19500 + }, + { + "epoch": 0.08, + "learning_rate": 4.869927328398376e-05, + "loss": 5.7606, + "step": 20000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8666755116083355e-05, + "loss": 5.7308, + "step": 20500 + }, + { + "epoch": 0.08, + "learning_rate": 4.863423694818295e-05, + "loss": 5.7412, + "step": 21000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8601718780282545e-05, + "loss": 5.7627, + "step": 21500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8569200612382136e-05, + "loss": 5.7449, + "step": 22000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8536682444481735e-05, + "loss": 5.7572, + "step": 22500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8504164276581326e-05, + "loss": 5.7505, + "step": 23000 + }, + { + "epoch": 0.09, + "learning_rate": 4.8471646108680925e-05, + "loss": 5.7188, + "step": 23500 + }, + { + "epoch": 0.09, + "learning_rate": 4.8439127940780517e-05, + "loss": 5.7193, + "step": 24000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8406609772880115e-05, + "loss": 5.7303, + "step": 24500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8374091604979707e-05, + "loss": 5.7279, + "step": 25000 + }, + { + "epoch": 0.1, + "learning_rate": 4.83415734370793e-05, + "loss": 5.716, + "step": 25500 + }, + { + "epoch": 0.1, + "learning_rate": 4.83090552691789e-05, + "loss": 5.7237, + "step": 26000 + }, + { + "epoch": 0.1, + "learning_rate": 4.827653710127849e-05, + "loss": 5.7276, + "step": 26500 + }, + { + "epoch": 0.11, + "learning_rate": 4.824401893337808e-05, + "loss": 5.7271, + "step": 27000 + }, + { + "epoch": 0.11, + "learning_rate": 4.821150076547767e-05, + "loss": 5.7384, + "step": 27500 + }, + { + "epoch": 0.11, + "learning_rate": 4.817898259757727e-05, + "loss": 5.6949, + "step": 28000 + }, + { + "epoch": 0.11, + "learning_rate": 4.814646442967686e-05, + "loss": 5.7068, + "step": 28500 + }, + { + "epoch": 0.11, + "learning_rate": 4.811394626177645e-05, + "loss": 5.7147, + "step": 29000 + }, + { + "epoch": 0.12, + "learning_rate": 4.808142809387605e-05, + "loss": 5.7132, + "step": 29500 + }, + { + "epoch": 0.12, + "learning_rate": 4.804890992597564e-05, + "loss": 5.7256, + "step": 30000 + }, + { + "epoch": 0.12, + "learning_rate": 4.801639175807524e-05, + "loss": 5.7195, + "step": 30500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7983873590174833e-05, + "loss": 5.7022, + "step": 31000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7951355422274425e-05, + "loss": 5.6978, + "step": 31500 + }, + { + "epoch": 0.12, + "learning_rate": 4.7918837254374024e-05, + "loss": 5.674, + "step": 32000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7886319086473615e-05, + "loss": 5.7044, + "step": 32500 + }, + { + "epoch": 0.13, + "learning_rate": 4.7853800918573214e-05, + "loss": 5.7076, + "step": 33000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7821282750672805e-05, + "loss": 5.7146, + "step": 33500 + }, + { + "epoch": 0.13, + "learning_rate": 4.77887645827724e-05, + "loss": 5.6924, + "step": 34000 + }, + { + "epoch": 0.13, + "learning_rate": 4.775624641487199e-05, + "loss": 5.6972, + "step": 34500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772372824697158e-05, + "loss": 5.6929, + "step": 35000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769121007907118e-05, + "loss": 5.6553, + "step": 35500 + }, + { + "epoch": 0.14, + "learning_rate": 4.765869191117077e-05, + "loss": 5.6857, + "step": 36000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762617374327037e-05, + "loss": 5.7021, + "step": 36500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759365557536996e-05, + "loss": 5.695, + "step": 37000 + }, + { + "epoch": 0.15, + "learning_rate": 4.756113740746956e-05, + "loss": 5.7066, + "step": 37500 + }, + { + "epoch": 0.15, + "learning_rate": 4.752861923956915e-05, + "loss": 5.6919, + "step": 38000 + }, + { + "epoch": 0.15, + "learning_rate": 4.749610107166874e-05, + "loss": 5.6823, + "step": 38500 + }, + { + "epoch": 0.15, + "learning_rate": 4.746358290376834e-05, + "loss": 5.6699, + "step": 39000 + }, + { + "epoch": 0.15, + "learning_rate": 4.743106473586793e-05, + "loss": 5.6837, + "step": 39500 + }, + { + "epoch": 0.16, + "learning_rate": 4.739854656796753e-05, + "loss": 5.6773, + "step": 40000 + }, + { + "epoch": 0.16, + "learning_rate": 4.736602840006712e-05, + "loss": 5.6796, + "step": 40500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7333510232166714e-05, + "loss": 5.6663, + "step": 41000 + }, + { + "epoch": 0.16, + "learning_rate": 4.7300992064266305e-05, + "loss": 5.6548, + "step": 41500 + }, + { + "epoch": 0.16, + "learning_rate": 4.72684738963659e-05, + "loss": 5.6853, + "step": 42000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7235955728465495e-05, + "loss": 5.691, + "step": 42500 + }, + { + "epoch": 0.17, + "learning_rate": 4.720343756056509e-05, + "loss": 5.657, + "step": 43000 + }, + { + "epoch": 0.17, + "learning_rate": 4.7170919392664686e-05, + "loss": 5.6815, + "step": 43500 + }, + { + "epoch": 0.17, + "learning_rate": 4.713840122476428e-05, + "loss": 5.6602, + "step": 44000 + }, + { + "epoch": 0.17, + "learning_rate": 4.710588305686387e-05, + "loss": 5.6691, + "step": 44500 + }, + { + "epoch": 0.18, + "learning_rate": 4.707336488896347e-05, + "loss": 5.6767, + "step": 45000 + }, + { + "epoch": 0.18, + "learning_rate": 4.704084672106306e-05, + "loss": 5.6499, + "step": 45500 + }, + { + "epoch": 0.18, + "learning_rate": 4.700832855316266e-05, + "loss": 5.669, + "step": 46000 + }, + { + "epoch": 0.18, + "learning_rate": 4.697581038526225e-05, + "loss": 5.6641, + "step": 46500 + }, + { + "epoch": 0.18, + "learning_rate": 4.694329221736185e-05, + "loss": 5.6509, + "step": 47000 + }, + { + "epoch": 0.19, + "learning_rate": 4.691077404946144e-05, + "loss": 5.6501, + "step": 47500 + }, + { + "epoch": 0.19, + "learning_rate": 4.687825588156103e-05, + "loss": 5.6611, + "step": 48000 + }, + { + "epoch": 0.19, + "learning_rate": 4.684573771366062e-05, + "loss": 5.6626, + "step": 48500 + }, + { + "epoch": 0.19, + "learning_rate": 4.681321954576022e-05, + "loss": 5.6591, + "step": 49000 + }, + { + "epoch": 0.19, + "learning_rate": 4.678070137785981e-05, + "loss": 5.6519, + "step": 49500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6748183209959404e-05, + "loss": 5.6552, + "step": 50000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6715665042058996e-05, + "loss": 5.6542, + "step": 50500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6683146874158594e-05, + "loss": 5.6521, + "step": 51000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6650628706258186e-05, + "loss": 5.6631, + "step": 51500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6618110538357784e-05, + "loss": 5.6296, + "step": 52000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6585592370457376e-05, + "loss": 5.6541, + "step": 52500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6553074202556974e-05, + "loss": 5.6373, + "step": 53000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6520556034656566e-05, + "loss": 5.6358, + "step": 53500 + }, + { + "epoch": 0.21, + "learning_rate": 4.648803786675616e-05, + "loss": 5.6654, + "step": 54000 + }, + { + "epoch": 0.21, + "learning_rate": 4.6455519698855756e-05, + "loss": 5.6481, + "step": 54500 + }, + { + "epoch": 0.21, + "learning_rate": 4.642300153095535e-05, + "loss": 5.6545, + "step": 55000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6390483363054946e-05, + "loss": 5.6379, + "step": 55500 + }, + { + "epoch": 0.22, + "learning_rate": 4.635796519515454e-05, + "loss": 5.6537, + "step": 56000 + }, + { + "epoch": 0.22, + "learning_rate": 4.632544702725413e-05, + "loss": 5.6611, + "step": 56500 + }, + { + "epoch": 0.22, + "learning_rate": 4.629292885935372e-05, + "loss": 5.6639, + "step": 57000 + }, + { + "epoch": 0.22, + "learning_rate": 4.626041069145331e-05, + "loss": 5.6579, + "step": 57500 + }, + { + "epoch": 0.23, + "learning_rate": 4.622789252355291e-05, + "loss": 5.6714, + "step": 58000 + }, + { + "epoch": 0.23, + "learning_rate": 4.61953743556525e-05, + "loss": 5.6345, + "step": 58500 + }, + { + "epoch": 0.23, + "learning_rate": 4.61628561877521e-05, + "loss": 5.6457, + "step": 59000 + }, + { + "epoch": 0.23, + "learning_rate": 4.613033801985169e-05, + "loss": 5.6613, + "step": 59500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6097819851951284e-05, + "loss": 5.6517, + "step": 60000 + }, + { + "epoch": 0.24, + "learning_rate": 4.606530168405088e-05, + "loss": 5.6328, + "step": 60500 + }, + { + "epoch": 0.24, + "learning_rate": 4.6032783516150474e-05, + "loss": 5.6311, + "step": 61000 + }, + { + "epoch": 0.24, + "learning_rate": 4.600026534825007e-05, + "loss": 5.6228, + "step": 61500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5967747180349665e-05, + "loss": 5.6283, + "step": 62000 + }, + { + "epoch": 0.24, + "learning_rate": 4.593522901244926e-05, + "loss": 5.656, + "step": 62500 + }, + { + "epoch": 0.25, + "learning_rate": 4.5902710844548855e-05, + "loss": 5.6389, + "step": 63000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5870192676648446e-05, + "loss": 5.6361, + "step": 63500 + }, + { + "epoch": 0.25, + "learning_rate": 4.583767450874804e-05, + "loss": 5.6305, + "step": 64000 + }, + { + "epoch": 0.25, + "learning_rate": 4.580515634084763e-05, + "loss": 5.6367, + "step": 64500 + }, + { + "epoch": 0.25, + "learning_rate": 4.577263817294723e-05, + "loss": 5.6393, + "step": 65000 + }, + { + "epoch": 0.26, + "learning_rate": 4.574012000504682e-05, + "loss": 5.6073, + "step": 65500 + }, + { + "epoch": 0.26, + "learning_rate": 4.570760183714641e-05, + "loss": 5.6139, + "step": 66000 + }, + { + "epoch": 0.26, + "learning_rate": 4.567508366924601e-05, + "loss": 5.619, + "step": 66500 + }, + { + "epoch": 0.26, + "learning_rate": 4.56425655013456e-05, + "loss": 5.6307, + "step": 67000 + }, + { + "epoch": 0.26, + "learning_rate": 4.56100473334452e-05, + "loss": 5.634, + "step": 67500 + }, + { + "epoch": 0.27, + "learning_rate": 4.557752916554479e-05, + "loss": 5.6234, + "step": 68000 + }, + { + "epoch": 0.27, + "learning_rate": 4.554501099764439e-05, + "loss": 5.6219, + "step": 68500 + }, + { + "epoch": 0.27, + "learning_rate": 4.551249282974398e-05, + "loss": 5.6326, + "step": 69000 + }, + { + "epoch": 0.27, + "learning_rate": 4.547997466184357e-05, + "loss": 5.6293, + "step": 69500 + }, + { + "epoch": 0.27, + "learning_rate": 4.544745649394317e-05, + "loss": 5.6153, + "step": 70000 + }, + { + "epoch": 0.28, + "learning_rate": 4.541493832604276e-05, + "loss": 5.6193, + "step": 70500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5382420158142355e-05, + "loss": 5.62, + "step": 71000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5349901990241946e-05, + "loss": 5.6276, + "step": 71500 + }, + { + "epoch": 0.28, + "learning_rate": 4.5317383822341545e-05, + "loss": 5.6065, + "step": 72000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5284865654441136e-05, + "loss": 5.6008, + "step": 72500 + }, + { + "epoch": 0.28, + "learning_rate": 4.525234748654073e-05, + "loss": 5.624, + "step": 73000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5219829318640327e-05, + "loss": 5.6111, + "step": 73500 + }, + { + "epoch": 0.29, + "learning_rate": 4.518731115073992e-05, + "loss": 5.6309, + "step": 74000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5154792982839517e-05, + "loss": 5.6356, + "step": 74500 + }, + { + "epoch": 0.29, + "learning_rate": 4.512227481493911e-05, + "loss": 5.6045, + "step": 75000 + }, + { + "epoch": 0.29, + "learning_rate": 4.50897566470387e-05, + "loss": 5.6079, + "step": 75500 + }, + { + "epoch": 0.3, + "learning_rate": 4.50572384791383e-05, + "loss": 5.609, + "step": 76000 + }, + { + "epoch": 0.3, + "learning_rate": 4.502472031123789e-05, + "loss": 5.6262, + "step": 76500 + }, + { + "epoch": 0.3, + "learning_rate": 4.499220214333749e-05, + "loss": 5.5966, + "step": 77000 + }, + { + "epoch": 0.3, + "learning_rate": 4.495968397543708e-05, + "loss": 5.598, + "step": 77500 + }, + { + "epoch": 0.3, + "learning_rate": 4.492716580753668e-05, + "loss": 5.618, + "step": 78000 + }, + { + "epoch": 0.31, + "learning_rate": 4.489464763963627e-05, + "loss": 5.6125, + "step": 78500 + }, + { + "epoch": 0.31, + "learning_rate": 4.486212947173586e-05, + "loss": 5.6182, + "step": 79000 + }, + { + "epoch": 0.31, + "learning_rate": 4.482961130383545e-05, + "loss": 5.5995, + "step": 79500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4797093135935045e-05, + "loss": 5.6079, + "step": 80000 + }, + { + "epoch": 0.31, + "learning_rate": 4.4764574968034643e-05, + "loss": 5.6138, + "step": 80500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4732056800134235e-05, + "loss": 5.6076, + "step": 81000 + }, + { + "epoch": 0.32, + "learning_rate": 4.4699538632233834e-05, + "loss": 5.598, + "step": 81500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4667020464333425e-05, + "loss": 5.6038, + "step": 82000 + }, + { + "epoch": 0.32, + "learning_rate": 4.463450229643302e-05, + "loss": 5.5977, + "step": 82500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4601984128532615e-05, + "loss": 5.621, + "step": 83000 + }, + { + "epoch": 0.33, + "learning_rate": 4.456946596063221e-05, + "loss": 5.6076, + "step": 83500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4536947792731805e-05, + "loss": 5.6156, + "step": 84000 + }, + { + "epoch": 0.33, + "learning_rate": 4.45044296248314e-05, + "loss": 5.6229, + "step": 84500 + }, + { + "epoch": 0.33, + "learning_rate": 4.447191145693099e-05, + "loss": 5.5821, + "step": 85000 + }, + { + "epoch": 0.33, + "learning_rate": 4.443939328903059e-05, + "loss": 5.5937, + "step": 85500 + }, + { + "epoch": 0.34, + "learning_rate": 4.440687512113018e-05, + "loss": 5.5981, + "step": 86000 + }, + { + "epoch": 0.34, + "learning_rate": 4.437435695322977e-05, + "loss": 5.6175, + "step": 86500 + }, + { + "epoch": 0.34, + "learning_rate": 4.434183878532936e-05, + "loss": 5.5978, + "step": 87000 + }, + { + "epoch": 0.34, + "learning_rate": 4.430932061742896e-05, + "loss": 5.5959, + "step": 87500 + }, + { + "epoch": 0.34, + "learning_rate": 4.427680244952855e-05, + "loss": 5.5779, + "step": 88000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4244284281628144e-05, + "loss": 5.6002, + "step": 88500 + }, + { + "epoch": 0.35, + "learning_rate": 4.421176611372774e-05, + "loss": 5.6177, + "step": 89000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4179247945827334e-05, + "loss": 5.6038, + "step": 89500 + }, + { + "epoch": 0.35, + "learning_rate": 4.414672977792693e-05, + "loss": 5.6033, + "step": 90000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4114211610026524e-05, + "loss": 5.6066, + "step": 90500 + }, + { + "epoch": 0.36, + "learning_rate": 4.408169344212612e-05, + "loss": 5.605, + "step": 91000 + }, + { + "epoch": 0.36, + "learning_rate": 4.4049175274225714e-05, + "loss": 5.613, + "step": 91500 + }, + { + "epoch": 0.36, + "learning_rate": 4.4016657106325305e-05, + "loss": 5.5924, + "step": 92000 + }, + { + "epoch": 0.36, + "learning_rate": 4.3984138938424904e-05, + "loss": 5.5983, + "step": 92500 + }, + { + "epoch": 0.36, + "learning_rate": 4.3951620770524496e-05, + "loss": 5.5947, + "step": 93000 + }, + { + "epoch": 0.36, + "learning_rate": 4.391910260262409e-05, + "loss": 5.5848, + "step": 93500 + }, + { + "epoch": 0.37, + "learning_rate": 4.388658443472368e-05, + "loss": 5.5853, + "step": 94000 + }, + { + "epoch": 0.37, + "learning_rate": 4.385406626682327e-05, + "loss": 5.5949, + "step": 94500 + }, + { + "epoch": 0.37, + "learning_rate": 4.382154809892287e-05, + "loss": 5.5795, + "step": 95000 + }, + { + "epoch": 0.37, + "learning_rate": 4.378902993102246e-05, + "loss": 5.5979, + "step": 95500 + }, + { + "epoch": 0.37, + "learning_rate": 4.375651176312206e-05, + "loss": 5.5873, + "step": 96000 + }, + { + "epoch": 0.38, + "learning_rate": 4.372399359522165e-05, + "loss": 5.6173, + "step": 96500 + }, + { + "epoch": 0.38, + "learning_rate": 4.369147542732125e-05, + "loss": 5.6014, + "step": 97000 + }, + { + "epoch": 0.38, + "learning_rate": 4.365895725942084e-05, + "loss": 5.597, + "step": 97500 + }, + { + "epoch": 0.38, + "learning_rate": 4.362643909152043e-05, + "loss": 5.5682, + "step": 98000 + }, + { + "epoch": 0.38, + "learning_rate": 4.359392092362003e-05, + "loss": 5.5819, + "step": 98500 + }, + { + "epoch": 0.39, + "learning_rate": 4.356140275571962e-05, + "loss": 5.5784, + "step": 99000 + }, + { + "epoch": 0.39, + "learning_rate": 4.352888458781922e-05, + "loss": 5.5703, + "step": 99500 + }, + { + "epoch": 0.39, + "learning_rate": 4.349636641991881e-05, + "loss": 5.5705, + "step": 100000 + }, + { + "epoch": 0.39, + "learning_rate": 4.3463848252018404e-05, + "loss": 5.5742, + "step": 100500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3431330084118e-05, + "loss": 5.6006, + "step": 101000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3398811916217594e-05, + "loss": 5.5678, + "step": 101500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3366293748317186e-05, + "loss": 5.5967, + "step": 102000 + }, + { + "epoch": 0.4, + "learning_rate": 4.333377558041678e-05, + "loss": 5.5938, + "step": 102500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3301257412516376e-05, + "loss": 5.5844, + "step": 103000 + }, + { + "epoch": 0.4, + "learning_rate": 4.326873924461597e-05, + "loss": 5.5882, + "step": 103500 + }, + { + "epoch": 0.41, + "learning_rate": 4.323622107671556e-05, + "loss": 5.5708, + "step": 104000 + }, + { + "epoch": 0.41, + "learning_rate": 4.320370290881516e-05, + "loss": 5.5687, + "step": 104500 + }, + { + "epoch": 0.41, + "learning_rate": 4.317118474091475e-05, + "loss": 5.5928, + "step": 105000 + }, + { + "epoch": 0.41, + "learning_rate": 4.313866657301435e-05, + "loss": 5.5926, + "step": 105500 + }, + { + "epoch": 0.41, + "learning_rate": 4.310614840511394e-05, + "loss": 5.577, + "step": 106000 + }, + { + "epoch": 0.42, + "learning_rate": 4.307363023721354e-05, + "loss": 5.5642, + "step": 106500 + }, + { + "epoch": 0.42, + "learning_rate": 4.304111206931313e-05, + "loss": 5.5927, + "step": 107000 + }, + { + "epoch": 0.42, + "learning_rate": 4.300859390141272e-05, + "loss": 5.5954, + "step": 107500 + }, + { + "epoch": 0.42, + "learning_rate": 4.297607573351232e-05, + "loss": 5.5789, + "step": 108000 + }, + { + "epoch": 0.42, + "learning_rate": 4.294355756561191e-05, + "loss": 5.5541, + "step": 108500 + }, + { + "epoch": 0.43, + "learning_rate": 4.29110393977115e-05, + "loss": 5.5594, + "step": 109000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2878521229811094e-05, + "loss": 5.5868, + "step": 109500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2846003061910686e-05, + "loss": 5.5632, + "step": 110000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2813484894010284e-05, + "loss": 5.5895, + "step": 110500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2780966726109876e-05, + "loss": 5.594, + "step": 111000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2748448558209475e-05, + "loss": 5.5825, + "step": 111500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2715930390309066e-05, + "loss": 5.588, + "step": 112000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2683412222408665e-05, + "loss": 5.5711, + "step": 112500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2650894054508256e-05, + "loss": 5.5995, + "step": 113000 + }, + { + "epoch": 0.44, + "learning_rate": 4.261837588660785e-05, + "loss": 5.571, + "step": 113500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2585857718707446e-05, + "loss": 5.5743, + "step": 114000 + }, + { + "epoch": 0.45, + "learning_rate": 4.255333955080704e-05, + "loss": 5.5766, + "step": 114500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2520821382906636e-05, + "loss": 5.5705, + "step": 115000 + }, + { + "epoch": 0.45, + "learning_rate": 4.248830321500623e-05, + "loss": 5.5684, + "step": 115500 + }, + { + "epoch": 0.45, + "learning_rate": 4.245578504710582e-05, + "loss": 5.5593, + "step": 116000 + }, + { + "epoch": 0.45, + "learning_rate": 4.242326687920541e-05, + "loss": 5.5969, + "step": 116500 + }, + { + "epoch": 0.46, + "learning_rate": 4.2390748711305e-05, + "loss": 5.5724, + "step": 117000 + }, + { + "epoch": 0.46, + "learning_rate": 4.23582305434046e-05, + "loss": 5.5601, + "step": 117500 + }, + { + "epoch": 0.46, + "learning_rate": 4.232571237550419e-05, + "loss": 5.572, + "step": 118000 + }, + { + "epoch": 0.46, + "learning_rate": 4.229319420760379e-05, + "loss": 5.5733, + "step": 118500 + }, + { + "epoch": 0.46, + "learning_rate": 4.226067603970338e-05, + "loss": 5.5782, + "step": 119000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2228157871802975e-05, + "loss": 5.5429, + "step": 119500 + }, + { + "epoch": 0.47, + "learning_rate": 4.219563970390257e-05, + "loss": 5.5873, + "step": 120000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2163121536002165e-05, + "loss": 5.5707, + "step": 120500 + }, + { + "epoch": 0.47, + "learning_rate": 4.213060336810176e-05, + "loss": 5.5741, + "step": 121000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2098085200201355e-05, + "loss": 5.5847, + "step": 121500 + }, + { + "epoch": 0.48, + "learning_rate": 4.206556703230095e-05, + "loss": 5.5845, + "step": 122000 + }, + { + "epoch": 0.48, + "learning_rate": 4.2033048864400545e-05, + "loss": 5.5769, + "step": 122500 + }, + { + "epoch": 0.48, + "learning_rate": 4.2000530696500137e-05, + "loss": 5.5777, + "step": 123000 + }, + { + "epoch": 0.48, + "learning_rate": 4.1968012528599735e-05, + "loss": 5.5624, + "step": 123500 + }, + { + "epoch": 0.48, + "learning_rate": 4.1935494360699327e-05, + "loss": 5.5753, + "step": 124000 + }, + { + "epoch": 0.49, + "learning_rate": 4.190297619279892e-05, + "loss": 5.572, + "step": 124500 + }, + { + "epoch": 0.49, + "learning_rate": 4.187045802489851e-05, + "loss": 5.5825, + "step": 125000 + }, + { + "epoch": 0.49, + "learning_rate": 4.183793985699811e-05, + "loss": 5.5775, + "step": 125500 + }, + { + "epoch": 0.49, + "learning_rate": 4.18054216890977e-05, + "loss": 5.5575, + "step": 126000 + }, + { + "epoch": 0.49, + "learning_rate": 4.177290352119729e-05, + "loss": 5.5643, + "step": 126500 + }, + { + "epoch": 0.5, + "learning_rate": 4.174038535329689e-05, + "loss": 5.5624, + "step": 127000 + }, + { + "epoch": 0.5, + "learning_rate": 4.170786718539648e-05, + "loss": 5.57, + "step": 127500 + }, + { + "epoch": 0.5, + "learning_rate": 4.167534901749608e-05, + "loss": 5.581, + "step": 128000 + }, + { + "epoch": 0.5, + "learning_rate": 4.164283084959567e-05, + "loss": 5.5746, + "step": 128500 + }, + { + "epoch": 0.5, + "learning_rate": 4.161031268169526e-05, + "loss": 5.5536, + "step": 129000 + }, + { + "epoch": 0.51, + "learning_rate": 4.157779451379486e-05, + "loss": 5.5706, + "step": 129500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1545276345894453e-05, + "loss": 5.5287, + "step": 130000 + }, + { + "epoch": 0.51, + "learning_rate": 4.151275817799405e-05, + "loss": 5.5688, + "step": 130500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1480240010093644e-05, + "loss": 5.5577, + "step": 131000 + }, + { + "epoch": 0.51, + "learning_rate": 4.1447721842193235e-05, + "loss": 5.5561, + "step": 131500 + }, + { + "epoch": 0.52, + "learning_rate": 4.141520367429283e-05, + "loss": 5.5381, + "step": 132000 + }, + { + "epoch": 0.52, + "learning_rate": 4.138268550639242e-05, + "loss": 5.5509, + "step": 132500 + }, + { + "epoch": 0.52, + "learning_rate": 4.135016733849202e-05, + "loss": 5.5589, + "step": 133000 + }, + { + "epoch": 0.52, + "learning_rate": 4.131764917059161e-05, + "loss": 5.5433, + "step": 133500 + }, + { + "epoch": 0.52, + "learning_rate": 4.128513100269121e-05, + "loss": 5.5809, + "step": 134000 + }, + { + "epoch": 0.52, + "learning_rate": 4.12526128347908e-05, + "loss": 5.5478, + "step": 134500 + }, + { + "epoch": 0.53, + "learning_rate": 4.122009466689039e-05, + "loss": 5.5663, + "step": 135000 + }, + { + "epoch": 0.53, + "learning_rate": 4.118757649898999e-05, + "loss": 5.529, + "step": 135500 + }, + { + "epoch": 0.53, + "learning_rate": 4.115505833108958e-05, + "loss": 5.5764, + "step": 136000 + }, + { + "epoch": 0.53, + "learning_rate": 4.112254016318918e-05, + "loss": 5.5561, + "step": 136500 + }, + { + "epoch": 0.53, + "learning_rate": 4.109002199528877e-05, + "loss": 5.5781, + "step": 137000 + }, + { + "epoch": 0.54, + "learning_rate": 4.105750382738837e-05, + "loss": 5.572, + "step": 137500 + }, + { + "epoch": 0.54, + "learning_rate": 4.102498565948796e-05, + "loss": 5.5631, + "step": 138000 + }, + { + "epoch": 0.54, + "learning_rate": 4.099246749158755e-05, + "loss": 5.5649, + "step": 138500 + }, + { + "epoch": 0.54, + "learning_rate": 4.0959949323687144e-05, + "loss": 5.5553, + "step": 139000 + }, + { + "epoch": 0.54, + "learning_rate": 4.0927431155786735e-05, + "loss": 5.5324, + "step": 139500 + }, + { + "epoch": 0.55, + "learning_rate": 4.0894912987886334e-05, + "loss": 5.5562, + "step": 140000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0862394819985925e-05, + "loss": 5.5345, + "step": 140500 + }, + { + "epoch": 0.55, + "learning_rate": 4.0829876652085524e-05, + "loss": 5.5513, + "step": 141000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0797358484185115e-05, + "loss": 5.5401, + "step": 141500 + }, + { + "epoch": 0.55, + "learning_rate": 4.076484031628471e-05, + "loss": 5.5566, + "step": 142000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0732322148384306e-05, + "loss": 5.538, + "step": 142500 + }, + { + "epoch": 0.56, + "learning_rate": 4.06998039804839e-05, + "loss": 5.5495, + "step": 143000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0667285812583496e-05, + "loss": 5.5551, + "step": 143500 + }, + { + "epoch": 0.56, + "learning_rate": 4.063476764468309e-05, + "loss": 5.5617, + "step": 144000 + }, + { + "epoch": 0.56, + "learning_rate": 4.060224947678268e-05, + "loss": 5.5654, + "step": 144500 + }, + { + "epoch": 0.57, + "learning_rate": 4.056973130888228e-05, + "loss": 5.5485, + "step": 145000 + }, + { + "epoch": 0.57, + "learning_rate": 4.053721314098187e-05, + "loss": 5.5328, + "step": 145500 + }, + { + "epoch": 0.57, + "learning_rate": 4.050469497308146e-05, + "loss": 5.5312, + "step": 146000 + }, + { + "epoch": 0.57, + "learning_rate": 4.047217680518106e-05, + "loss": 5.5306, + "step": 146500 + }, + { + "epoch": 0.57, + "learning_rate": 4.043965863728065e-05, + "loss": 5.5662, + "step": 147000 + }, + { + "epoch": 0.58, + "learning_rate": 4.040714046938024e-05, + "loss": 5.531, + "step": 147500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0374622301479834e-05, + "loss": 5.5477, + "step": 148000 + }, + { + "epoch": 0.58, + "learning_rate": 4.034210413357943e-05, + "loss": 5.5436, + "step": 148500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0309585965679024e-05, + "loss": 5.5617, + "step": 149000 + }, + { + "epoch": 0.58, + "learning_rate": 4.027706779777862e-05, + "loss": 5.5636, + "step": 149500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0244549629878214e-05, + "loss": 5.5282, + "step": 150000 + }, + { + "epoch": 0.59, + "learning_rate": 4.021203146197781e-05, + "loss": 5.55, + "step": 150500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0179513294077404e-05, + "loss": 5.5378, + "step": 151000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0146995126176996e-05, + "loss": 5.5463, + "step": 151500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0114476958276594e-05, + "loss": 5.5292, + "step": 152000 + }, + { + "epoch": 0.6, + "learning_rate": 4.0081958790376186e-05, + "loss": 5.5282, + "step": 152500 + }, + { + "epoch": 0.6, + "learning_rate": 4.0049440622475784e-05, + "loss": 5.5575, + "step": 153000 + }, + { + "epoch": 0.6, + "learning_rate": 4.0016922454575376e-05, + "loss": 5.5299, + "step": 153500 + }, + { + "epoch": 0.6, + "learning_rate": 3.998440428667497e-05, + "loss": 5.55, + "step": 154000 + }, + { + "epoch": 0.6, + "learning_rate": 3.995188611877456e-05, + "loss": 5.5286, + "step": 154500 + }, + { + "epoch": 0.6, + "learning_rate": 3.991936795087415e-05, + "loss": 5.5405, + "step": 155000 + }, + { + "epoch": 0.61, + "learning_rate": 3.988684978297375e-05, + "loss": 5.5244, + "step": 155500 + }, + { + "epoch": 0.61, + "learning_rate": 3.985433161507334e-05, + "loss": 5.5208, + "step": 156000 + }, + { + "epoch": 0.61, + "learning_rate": 3.982181344717294e-05, + "loss": 5.5472, + "step": 156500 + }, + { + "epoch": 0.61, + "learning_rate": 3.978929527927253e-05, + "loss": 5.5582, + "step": 157000 + }, + { + "epoch": 0.61, + "learning_rate": 3.975677711137212e-05, + "loss": 5.5539, + "step": 157500 + }, + { + "epoch": 0.62, + "learning_rate": 3.972425894347172e-05, + "loss": 5.5463, + "step": 158000 + }, + { + "epoch": 0.62, + "learning_rate": 3.969174077557131e-05, + "loss": 5.5498, + "step": 158500 + }, + { + "epoch": 0.62, + "learning_rate": 3.965922260767091e-05, + "loss": 5.5299, + "step": 159000 + }, + { + "epoch": 0.62, + "learning_rate": 3.96267044397705e-05, + "loss": 5.5384, + "step": 159500 + }, + { + "epoch": 0.62, + "learning_rate": 3.95941862718701e-05, + "loss": 5.5288, + "step": 160000 + }, + { + "epoch": 0.63, + "learning_rate": 3.956166810396969e-05, + "loss": 5.5213, + "step": 160500 + }, + { + "epoch": 0.63, + "learning_rate": 3.9529149936069285e-05, + "loss": 5.5386, + "step": 161000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9496631768168876e-05, + "loss": 5.5413, + "step": 161500 + }, + { + "epoch": 0.63, + "learning_rate": 3.946411360026847e-05, + "loss": 5.5387, + "step": 162000 + }, + { + "epoch": 0.63, + "learning_rate": 3.9431595432368066e-05, + "loss": 5.5342, + "step": 162500 + }, + { + "epoch": 0.64, + "learning_rate": 3.939907726446766e-05, + "loss": 5.5485, + "step": 163000 + }, + { + "epoch": 0.64, + "learning_rate": 3.936655909656725e-05, + "loss": 5.532, + "step": 163500 + }, + { + "epoch": 0.64, + "learning_rate": 3.933404092866685e-05, + "loss": 5.518, + "step": 164000 + }, + { + "epoch": 0.64, + "learning_rate": 3.930152276076644e-05, + "loss": 5.5319, + "step": 164500 + }, + { + "epoch": 0.64, + "learning_rate": 3.926900459286604e-05, + "loss": 5.5335, + "step": 165000 + }, + { + "epoch": 0.65, + "learning_rate": 3.923648642496563e-05, + "loss": 5.5488, + "step": 165500 + }, + { + "epoch": 0.65, + "learning_rate": 3.920396825706523e-05, + "loss": 5.5252, + "step": 166000 + }, + { + "epoch": 0.65, + "learning_rate": 3.917145008916482e-05, + "loss": 5.5141, + "step": 166500 + }, + { + "epoch": 0.65, + "learning_rate": 3.913893192126441e-05, + "loss": 5.5417, + "step": 167000 + }, + { + "epoch": 0.65, + "learning_rate": 3.910641375336401e-05, + "loss": 5.5355, + "step": 167500 + }, + { + "epoch": 0.66, + "learning_rate": 3.90738955854636e-05, + "loss": 5.5415, + "step": 168000 + }, + { + "epoch": 0.66, + "learning_rate": 3.904137741756319e-05, + "loss": 5.534, + "step": 168500 + }, + { + "epoch": 0.66, + "learning_rate": 3.9008859249662785e-05, + "loss": 5.5257, + "step": 169000 + }, + { + "epoch": 0.66, + "learning_rate": 3.897634108176238e-05, + "loss": 5.5221, + "step": 169500 + }, + { + "epoch": 0.66, + "learning_rate": 3.8943822913861975e-05, + "loss": 5.5364, + "step": 170000 + }, + { + "epoch": 0.67, + "learning_rate": 3.8911304745961566e-05, + "loss": 5.5296, + "step": 170500 + }, + { + "epoch": 0.67, + "learning_rate": 3.8878786578061165e-05, + "loss": 5.5197, + "step": 171000 + }, + { + "epoch": 0.67, + "learning_rate": 3.8846268410160756e-05, + "loss": 5.5615, + "step": 171500 + }, + { + "epoch": 0.67, + "learning_rate": 3.8813750242260355e-05, + "loss": 5.5354, + "step": 172000 + }, + { + "epoch": 0.67, + "learning_rate": 3.8781232074359947e-05, + "loss": 5.5192, + "step": 172500 + }, + { + "epoch": 0.68, + "learning_rate": 3.874871390645954e-05, + "loss": 5.5497, + "step": 173000 + }, + { + "epoch": 0.68, + "learning_rate": 3.8716195738559137e-05, + "loss": 5.5419, + "step": 173500 + }, + { + "epoch": 0.68, + "learning_rate": 3.868367757065873e-05, + "loss": 5.5366, + "step": 174000 + }, + { + "epoch": 0.68, + "learning_rate": 3.865115940275833e-05, + "loss": 5.5031, + "step": 174500 + }, + { + "epoch": 0.68, + "learning_rate": 3.861864123485792e-05, + "loss": 5.5425, + "step": 175000 + }, + { + "epoch": 0.68, + "learning_rate": 3.858612306695752e-05, + "loss": 5.5195, + "step": 175500 + }, + { + "epoch": 0.69, + "learning_rate": 3.855360489905711e-05, + "loss": 5.5235, + "step": 176000 + }, + { + "epoch": 0.69, + "learning_rate": 3.85210867311567e-05, + "loss": 5.5299, + "step": 176500 + }, + { + "epoch": 0.69, + "learning_rate": 3.848856856325629e-05, + "loss": 5.5233, + "step": 177000 + }, + { + "epoch": 0.69, + "learning_rate": 3.845605039535588e-05, + "loss": 5.5386, + "step": 177500 + }, + { + "epoch": 0.69, + "learning_rate": 3.842353222745548e-05, + "loss": 5.5233, + "step": 178000 + }, + { + "epoch": 0.7, + "learning_rate": 3.839101405955507e-05, + "loss": 5.5128, + "step": 178500 + }, + { + "epoch": 0.7, + "learning_rate": 3.8358495891654665e-05, + "loss": 5.5286, + "step": 179000 + }, + { + "epoch": 0.7, + "learning_rate": 3.8325977723754263e-05, + "loss": 5.5022, + "step": 179500 + }, + { + "epoch": 0.7, + "learning_rate": 3.8293459555853855e-05, + "loss": 5.523, + "step": 180000 + }, + { + "epoch": 0.7, + "learning_rate": 3.8260941387953454e-05, + "loss": 5.5229, + "step": 180500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8228423220053045e-05, + "loss": 5.5352, + "step": 181000 + }, + { + "epoch": 0.71, + "learning_rate": 3.8195905052152644e-05, + "loss": 5.5312, + "step": 181500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8163386884252235e-05, + "loss": 5.5209, + "step": 182000 + }, + { + "epoch": 0.71, + "learning_rate": 3.813086871635183e-05, + "loss": 5.5273, + "step": 182500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8098350548451425e-05, + "loss": 5.5092, + "step": 183000 + }, + { + "epoch": 0.72, + "learning_rate": 3.806583238055102e-05, + "loss": 5.5154, + "step": 183500 + }, + { + "epoch": 0.72, + "learning_rate": 3.803331421265061e-05, + "loss": 5.5246, + "step": 184000 + }, + { + "epoch": 0.72, + "learning_rate": 3.80007960447502e-05, + "loss": 5.5372, + "step": 184500 + }, + { + "epoch": 0.72, + "learning_rate": 3.79682778768498e-05, + "loss": 5.5258, + "step": 185000 + }, + { + "epoch": 0.72, + "learning_rate": 3.793575970894939e-05, + "loss": 5.5236, + "step": 185500 + }, + { + "epoch": 0.73, + "learning_rate": 3.790324154104898e-05, + "loss": 5.4988, + "step": 186000 + }, + { + "epoch": 0.73, + "learning_rate": 3.787072337314858e-05, + "loss": 5.5234, + "step": 186500 + }, + { + "epoch": 0.73, + "learning_rate": 3.783820520524817e-05, + "loss": 5.5285, + "step": 187000 + }, + { + "epoch": 0.73, + "learning_rate": 3.780568703734777e-05, + "loss": 5.5015, + "step": 187500 + }, + { + "epoch": 0.73, + "learning_rate": 3.777316886944736e-05, + "loss": 5.5222, + "step": 188000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7740650701546954e-05, + "loss": 5.5113, + "step": 188500 + }, + { + "epoch": 0.74, + "learning_rate": 3.770813253364655e-05, + "loss": 5.524, + "step": 189000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7675614365746144e-05, + "loss": 5.5357, + "step": 189500 + }, + { + "epoch": 0.74, + "learning_rate": 3.764309619784574e-05, + "loss": 5.5221, + "step": 190000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7610578029945334e-05, + "loss": 5.5226, + "step": 190500 + }, + { + "epoch": 0.75, + "learning_rate": 3.7578059862044925e-05, + "loss": 5.5204, + "step": 191000 + }, + { + "epoch": 0.75, + "learning_rate": 3.754554169414452e-05, + "loss": 5.5201, + "step": 191500 + }, + { + "epoch": 0.75, + "learning_rate": 3.751302352624411e-05, + "loss": 5.521, + "step": 192000 + }, + { + "epoch": 0.75, + "learning_rate": 3.748050535834371e-05, + "loss": 5.5334, + "step": 192500 + }, + { + "epoch": 0.75, + "learning_rate": 3.74479871904433e-05, + "loss": 5.4979, + "step": 193000 + }, + { + "epoch": 0.76, + "learning_rate": 3.74154690225429e-05, + "loss": 5.5098, + "step": 193500 + }, + { + "epoch": 0.76, + "learning_rate": 3.738295085464249e-05, + "loss": 5.5429, + "step": 194000 + }, + { + "epoch": 0.76, + "learning_rate": 3.735043268674209e-05, + "loss": 5.506, + "step": 194500 + }, + { + "epoch": 0.76, + "learning_rate": 3.731791451884168e-05, + "loss": 5.5116, + "step": 195000 + }, + { + "epoch": 0.76, + "learning_rate": 3.728539635094127e-05, + "loss": 5.5075, + "step": 195500 + }, + { + "epoch": 0.76, + "learning_rate": 3.725287818304087e-05, + "loss": 5.5159, + "step": 196000 + }, + { + "epoch": 0.77, + "learning_rate": 3.722036001514046e-05, + "loss": 5.5192, + "step": 196500 + }, + { + "epoch": 0.77, + "learning_rate": 3.718784184724006e-05, + "loss": 5.5109, + "step": 197000 + }, + { + "epoch": 0.77, + "learning_rate": 3.715532367933965e-05, + "loss": 5.496, + "step": 197500 + }, + { + "epoch": 0.77, + "learning_rate": 3.712280551143924e-05, + "loss": 5.5329, + "step": 198000 + }, + { + "epoch": 0.77, + "learning_rate": 3.709028734353884e-05, + "loss": 5.4942, + "step": 198500 + }, + { + "epoch": 0.78, + "learning_rate": 3.705776917563843e-05, + "loss": 5.5314, + "step": 199000 + }, + { + "epoch": 0.78, + "learning_rate": 3.7025251007738024e-05, + "loss": 5.5117, + "step": 199500 + }, + { + "epoch": 0.78, + "learning_rate": 3.6992732839837616e-05, + "loss": 5.5068, + "step": 200000 + }, + { + "epoch": 0.78, + "learning_rate": 3.6960214671937214e-05, + "loss": 5.5034, + "step": 200500 + }, + { + "epoch": 0.78, + "learning_rate": 3.6927696504036806e-05, + "loss": 5.5165, + "step": 201000 + }, + { + "epoch": 0.79, + "learning_rate": 3.68951783361364e-05, + "loss": 5.5228, + "step": 201500 + }, + { + "epoch": 0.79, + "learning_rate": 3.6862660168235996e-05, + "loss": 5.522, + "step": 202000 + }, + { + "epoch": 0.79, + "learning_rate": 3.683014200033559e-05, + "loss": 5.5127, + "step": 202500 + }, + { + "epoch": 0.79, + "learning_rate": 3.6797623832435186e-05, + "loss": 5.498, + "step": 203000 + }, + { + "epoch": 0.79, + "learning_rate": 3.676510566453478e-05, + "loss": 5.5175, + "step": 203500 + }, + { + "epoch": 0.8, + "learning_rate": 3.6732587496634376e-05, + "loss": 5.4934, + "step": 204000 + }, + { + "epoch": 0.8, + "learning_rate": 3.670006932873397e-05, + "loss": 5.5252, + "step": 204500 + }, + { + "epoch": 0.8, + "learning_rate": 3.666755116083356e-05, + "loss": 5.4944, + "step": 205000 + }, + { + "epoch": 0.8, + "learning_rate": 3.663503299293316e-05, + "loss": 5.4928, + "step": 205500 + }, + { + "epoch": 0.8, + "learning_rate": 3.660251482503275e-05, + "loss": 5.4842, + "step": 206000 + }, + { + "epoch": 0.81, + "learning_rate": 3.656999665713234e-05, + "loss": 5.4963, + "step": 206500 + }, + { + "epoch": 0.81, + "learning_rate": 3.653747848923193e-05, + "loss": 5.4839, + "step": 207000 + }, + { + "epoch": 0.81, + "learning_rate": 3.6504960321331524e-05, + "loss": 5.5047, + "step": 207500 + }, + { + "epoch": 0.81, + "learning_rate": 3.647244215343112e-05, + "loss": 5.5055, + "step": 208000 + }, + { + "epoch": 0.81, + "learning_rate": 3.6439923985530714e-05, + "loss": 5.467, + "step": 208500 + }, + { + "epoch": 0.82, + "learning_rate": 3.640740581763031e-05, + "loss": 5.4914, + "step": 209000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6374887649729904e-05, + "loss": 5.4803, + "step": 209500 + }, + { + "epoch": 0.82, + "learning_rate": 3.63423694818295e-05, + "loss": 5.4874, + "step": 210000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6309851313929095e-05, + "loss": 5.4186, + "step": 210500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6277333146028686e-05, + "loss": 5.3267, + "step": 211000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6244814978128285e-05, + "loss": 5.2962, + "step": 211500 + }, + { + "epoch": 0.83, + "learning_rate": 3.6212296810227876e-05, + "loss": 5.2441, + "step": 212000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6179778642327475e-05, + "loss": 5.1665, + "step": 212500 + }, + { + "epoch": 0.83, + "learning_rate": 3.6147260474427066e-05, + "loss": 5.1096, + "step": 213000 + }, + { + "epoch": 0.83, + "learning_rate": 3.611474230652666e-05, + "loss": 5.0509, + "step": 213500 + }, + { + "epoch": 0.84, + "learning_rate": 3.608222413862625e-05, + "loss": 5.0168, + "step": 214000 + }, + { + "epoch": 0.84, + "learning_rate": 3.604970597072584e-05, + "loss": 4.9811, + "step": 214500 + }, + { + "epoch": 0.84, + "learning_rate": 3.601718780282544e-05, + "loss": 4.9535, + "step": 215000 + }, + { + "epoch": 0.84, + "learning_rate": 3.598466963492503e-05, + "loss": 4.9113, + "step": 215500 + }, + { + "epoch": 0.84, + "learning_rate": 3.595215146702463e-05, + "loss": 4.8715, + "step": 216000 + }, + { + "epoch": 0.84, + "learning_rate": 3.591963329912422e-05, + "loss": 4.8401, + "step": 216500 + }, + { + "epoch": 0.85, + "learning_rate": 3.588711513122381e-05, + "loss": 4.8103, + "step": 217000 + }, + { + "epoch": 0.85, + "learning_rate": 3.585459696332341e-05, + "loss": 4.7615, + "step": 217500 + }, + { + "epoch": 0.85, + "learning_rate": 3.5822078795423e-05, + "loss": 4.7477, + "step": 218000 + }, + { + "epoch": 0.85, + "learning_rate": 3.57895606275226e-05, + "loss": 4.6929, + "step": 218500 + }, + { + "epoch": 0.85, + "learning_rate": 3.575704245962219e-05, + "loss": 4.6674, + "step": 219000 + }, + { + "epoch": 0.86, + "learning_rate": 3.572452429172179e-05, + "loss": 4.6552, + "step": 219500 + }, + { + "epoch": 0.86, + "learning_rate": 3.569200612382138e-05, + "loss": 4.5982, + "step": 220000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5659487955920975e-05, + "loss": 4.4937, + "step": 220500 + }, + { + "epoch": 0.86, + "learning_rate": 3.562696978802057e-05, + "loss": 4.3654, + "step": 221000 + }, + { + "epoch": 0.86, + "learning_rate": 3.5594451620120165e-05, + "loss": 4.249, + "step": 221500 + }, + { + "epoch": 0.87, + "learning_rate": 3.5561933452219757e-05, + "loss": 4.122, + "step": 222000 + }, + { + "epoch": 0.87, + "learning_rate": 3.552941528431935e-05, + "loss": 3.9966, + "step": 222500 + }, + { + "epoch": 0.87, + "learning_rate": 3.549689711641894e-05, + "loss": 3.8664, + "step": 223000 + }, + { + "epoch": 0.87, + "learning_rate": 3.546437894851854e-05, + "loss": 3.7561, + "step": 223500 + }, + { + "epoch": 0.87, + "learning_rate": 3.543186078061813e-05, + "loss": 3.6266, + "step": 224000 + }, + { + "epoch": 0.88, + "learning_rate": 3.539934261271773e-05, + "loss": 3.5324, + "step": 224500 + }, + { + "epoch": 0.88, + "learning_rate": 3.536682444481732e-05, + "loss": 3.439, + "step": 225000 + }, + { + "epoch": 0.88, + "learning_rate": 3.533430627691692e-05, + "loss": 3.3637, + "step": 225500 + }, + { + "epoch": 0.88, + "learning_rate": 3.530178810901651e-05, + "loss": 3.2801, + "step": 226000 + }, + { + "epoch": 0.88, + "learning_rate": 3.52692699411161e-05, + "loss": 3.1894, + "step": 226500 + }, + { + "epoch": 0.89, + "learning_rate": 3.52367517732157e-05, + "loss": 3.1034, + "step": 227000 + }, + { + "epoch": 0.89, + "learning_rate": 3.520423360531529e-05, + "loss": 3.0252, + "step": 227500 + }, + { + "epoch": 0.89, + "learning_rate": 3.517171543741489e-05, + "loss": 2.9289, + "step": 228000 + }, + { + "epoch": 0.89, + "learning_rate": 3.513919726951448e-05, + "loss": 2.8581, + "step": 228500 + }, + { + "epoch": 0.89, + "learning_rate": 3.5106679101614073e-05, + "loss": 2.8208, + "step": 229000 + }, + { + "epoch": 0.9, + "learning_rate": 3.5074160933713665e-05, + "loss": 2.7686, + "step": 229500 + }, + { + "epoch": 0.9, + "learning_rate": 3.504164276581326e-05, + "loss": 2.7256, + "step": 230000 + }, + { + "epoch": 0.9, + "learning_rate": 3.5009124597912855e-05, + "loss": 2.6784, + "step": 230500 + }, + { + "epoch": 0.9, + "learning_rate": 3.497660643001245e-05, + "loss": 2.6355, + "step": 231000 + }, + { + "epoch": 0.9, + "learning_rate": 3.4944088262112045e-05, + "loss": 2.6078, + "step": 231500 + }, + { + "epoch": 0.91, + "learning_rate": 3.491157009421164e-05, + "loss": 2.575, + "step": 232000 + }, + { + "epoch": 0.91, + "learning_rate": 3.487905192631123e-05, + "loss": 2.5266, + "step": 232500 + }, + { + "epoch": 0.91, + "learning_rate": 3.484653375841083e-05, + "loss": 2.5291, + "step": 233000 + }, + { + "epoch": 0.91, + "learning_rate": 3.481401559051042e-05, + "loss": 2.4816, + "step": 233500 + }, + { + "epoch": 0.91, + "learning_rate": 3.478149742261002e-05, + "loss": 2.4666, + "step": 234000 + }, + { + "epoch": 0.92, + "learning_rate": 3.474897925470961e-05, + "loss": 2.4444, + "step": 234500 + }, + { + "epoch": 0.92, + "learning_rate": 3.471646108680921e-05, + "loss": 2.4253, + "step": 235000 + }, + { + "epoch": 0.92, + "learning_rate": 3.46839429189088e-05, + "loss": 2.403, + "step": 235500 + }, + { + "epoch": 0.92, + "learning_rate": 3.465142475100839e-05, + "loss": 2.3755, + "step": 236000 + }, + { + "epoch": 0.92, + "learning_rate": 3.461890658310798e-05, + "loss": 2.35, + "step": 236500 + }, + { + "epoch": 0.92, + "learning_rate": 3.4586388415207574e-05, + "loss": 2.3292, + "step": 237000 + }, + { + "epoch": 0.93, + "learning_rate": 3.455387024730717e-05, + "loss": 2.3076, + "step": 237500 + }, + { + "epoch": 0.93, + "learning_rate": 3.4521352079406764e-05, + "loss": 2.2888, + "step": 238000 + }, + { + "epoch": 0.93, + "learning_rate": 3.448883391150636e-05, + "loss": 2.267, + "step": 238500 + }, + { + "epoch": 0.93, + "learning_rate": 3.4456315743605954e-05, + "loss": 2.2615, + "step": 239000 + }, + { + "epoch": 0.93, + "learning_rate": 3.4423797575705545e-05, + "loss": 2.2433, + "step": 239500 + }, + { + "epoch": 0.94, + "learning_rate": 3.4391279407805144e-05, + "loss": 2.2348, + "step": 240000 + }, + { + "epoch": 0.94, + "learning_rate": 3.4358761239904735e-05, + "loss": 2.2083, + "step": 240500 + }, + { + "epoch": 0.94, + "learning_rate": 3.4326243072004334e-05, + "loss": 2.2022, + "step": 241000 + }, + { + "epoch": 0.94, + "learning_rate": 3.4293724904103926e-05, + "loss": 2.2007, + "step": 241500 + }, + { + "epoch": 0.94, + "learning_rate": 3.426120673620352e-05, + "loss": 2.1813, + "step": 242000 + }, + { + "epoch": 0.95, + "learning_rate": 3.4228688568303116e-05, + "loss": 2.157, + "step": 242500 + }, + { + "epoch": 0.95, + "learning_rate": 3.419617040040271e-05, + "loss": 2.1462, + "step": 243000 + }, + { + "epoch": 0.95, + "learning_rate": 3.41636522325023e-05, + "loss": 2.1346, + "step": 243500 + }, + { + "epoch": 0.95, + "learning_rate": 3.41311340646019e-05, + "loss": 2.1271, + "step": 244000 + }, + { + "epoch": 0.95, + "learning_rate": 3.409861589670149e-05, + "loss": 2.1027, + "step": 244500 + }, + { + "epoch": 0.96, + "learning_rate": 3.406609772880108e-05, + "loss": 2.1023, + "step": 245000 + }, + { + "epoch": 0.96, + "learning_rate": 3.403357956090067e-05, + "loss": 2.0849, + "step": 245500 + }, + { + "epoch": 0.96, + "learning_rate": 3.400106139300027e-05, + "loss": 2.0843, + "step": 246000 + }, + { + "epoch": 0.96, + "learning_rate": 3.396854322509986e-05, + "loss": 2.0666, + "step": 246500 + }, + { + "epoch": 0.96, + "learning_rate": 3.393602505719946e-05, + "loss": 2.0578, + "step": 247000 + }, + { + "epoch": 0.97, + "learning_rate": 3.390350688929905e-05, + "loss": 2.0446, + "step": 247500 + }, + { + "epoch": 0.97, + "learning_rate": 3.3870988721398644e-05, + "loss": 2.0486, + "step": 248000 + }, + { + "epoch": 0.97, + "learning_rate": 3.383847055349824e-05, + "loss": 2.0247, + "step": 248500 + }, + { + "epoch": 0.97, + "learning_rate": 3.3805952385597834e-05, + "loss": 2.0286, + "step": 249000 + }, + { + "epoch": 0.97, + "learning_rate": 3.377343421769743e-05, + "loss": 2.0144, + "step": 249500 + }, + { + "epoch": 0.98, + "learning_rate": 3.3740916049797024e-05, + "loss": 2.0024, + "step": 250000 + }, + { + "epoch": 0.98, + "learning_rate": 3.370839788189662e-05, + "loss": 1.9928, + "step": 250500 + }, + { + "epoch": 0.98, + "learning_rate": 3.3675879713996214e-05, + "loss": 1.9791, + "step": 251000 + }, + { + "epoch": 0.98, + "learning_rate": 3.3643361546095806e-05, + "loss": 1.9735, + "step": 251500 + }, + { + "epoch": 0.98, + "learning_rate": 3.36108433781954e-05, + "loss": 1.9699, + "step": 252000 + }, + { + "epoch": 0.99, + "learning_rate": 3.357832521029499e-05, + "loss": 1.9683, + "step": 252500 + }, + { + "epoch": 0.99, + "learning_rate": 3.354580704239459e-05, + "loss": 1.9456, + "step": 253000 + }, + { + "epoch": 0.99, + "learning_rate": 3.351328887449418e-05, + "loss": 1.9365, + "step": 253500 + }, + { + "epoch": 0.99, + "learning_rate": 3.348077070659378e-05, + "loss": 1.9347, + "step": 254000 + }, + { + "epoch": 0.99, + "learning_rate": 3.344825253869337e-05, + "loss": 1.9325, + "step": 254500 + }, + { + "epoch": 1.0, + "learning_rate": 3.341573437079296e-05, + "loss": 1.9314, + "step": 255000 + }, + { + "epoch": 1.0, + "learning_rate": 3.338321620289256e-05, + "loss": 1.9199, + "step": 255500 + }, + { + "epoch": 1.0, + "learning_rate": 3.335069803499215e-05, + "loss": 1.905, + "step": 256000 + }, + { + "epoch": 1.0, + "learning_rate": 3.331817986709175e-05, + "loss": 1.9004, + "step": 256500 + }, + { + "epoch": 1.0, + "learning_rate": 3.328566169919134e-05, + "loss": 1.8919, + "step": 257000 + }, + { + "epoch": 1.0, + "learning_rate": 3.325314353129093e-05, + "loss": 1.8801, + "step": 257500 + }, + { + "epoch": 1.01, + "learning_rate": 3.322062536339053e-05, + "loss": 1.8772, + "step": 258000 + }, + { + "epoch": 1.01, + "learning_rate": 3.318810719549012e-05, + "loss": 1.8774, + "step": 258500 + }, + { + "epoch": 1.01, + "learning_rate": 3.3155589027589714e-05, + "loss": 1.8752, + "step": 259000 + }, + { + "epoch": 1.01, + "learning_rate": 3.3123070859689306e-05, + "loss": 1.852, + "step": 259500 + }, + { + "epoch": 1.01, + "learning_rate": 3.3090552691788905e-05, + "loss": 1.8492, + "step": 260000 + }, + { + "epoch": 1.02, + "learning_rate": 3.3058034523888496e-05, + "loss": 1.8562, + "step": 260500 + }, + { + "epoch": 1.02, + "learning_rate": 3.302551635598809e-05, + "loss": 1.843, + "step": 261000 + }, + { + "epoch": 1.02, + "learning_rate": 3.2992998188087686e-05, + "loss": 1.8321, + "step": 261500 + }, + { + "epoch": 1.02, + "learning_rate": 3.296048002018728e-05, + "loss": 1.8233, + "step": 262000 + }, + { + "epoch": 1.02, + "learning_rate": 3.2927961852286876e-05, + "loss": 1.8265, + "step": 262500 + }, + { + "epoch": 1.03, + "learning_rate": 3.289544368438647e-05, + "loss": 1.818, + "step": 263000 + }, + { + "epoch": 1.03, + "learning_rate": 3.2862925516486066e-05, + "loss": 1.8186, + "step": 263500 + }, + { + "epoch": 1.03, + "learning_rate": 3.283040734858566e-05, + "loss": 1.7974, + "step": 264000 + }, + { + "epoch": 1.03, + "learning_rate": 3.279788918068525e-05, + "loss": 1.7867, + "step": 264500 + }, + { + "epoch": 1.03, + "learning_rate": 3.276537101278485e-05, + "loss": 1.7938, + "step": 265000 + }, + { + "epoch": 1.04, + "learning_rate": 3.273285284488444e-05, + "loss": 1.7877, + "step": 265500 + }, + { + "epoch": 1.04, + "learning_rate": 3.270033467698403e-05, + "loss": 1.7923, + "step": 266000 + }, + { + "epoch": 1.04, + "learning_rate": 3.266781650908362e-05, + "loss": 1.7794, + "step": 266500 + }, + { + "epoch": 1.04, + "learning_rate": 3.263529834118322e-05, + "loss": 1.7809, + "step": 267000 + }, + { + "epoch": 1.04, + "learning_rate": 3.260278017328281e-05, + "loss": 1.7654, + "step": 267500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2570262005382405e-05, + "loss": 1.7569, + "step": 268000 + }, + { + "epoch": 1.05, + "learning_rate": 3.2537743837482e-05, + "loss": 1.7653, + "step": 268500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2505225669581595e-05, + "loss": 1.7554, + "step": 269000 + }, + { + "epoch": 1.05, + "learning_rate": 3.247270750168119e-05, + "loss": 1.7461, + "step": 269500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2440189333780785e-05, + "loss": 1.7386, + "step": 270000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2407671165880376e-05, + "loss": 1.738, + "step": 270500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2375152997979975e-05, + "loss": 1.7343, + "step": 271000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2342634830079567e-05, + "loss": 1.7334, + "step": 271500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2310116662179165e-05, + "loss": 1.7163, + "step": 272000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2277598494278757e-05, + "loss": 1.7176, + "step": 272500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2245080326378355e-05, + "loss": 1.7203, + "step": 273000 + }, + { + "epoch": 1.07, + "learning_rate": 3.221256215847795e-05, + "loss": 1.7172, + "step": 273500 + }, + { + "epoch": 1.07, + "learning_rate": 3.218004399057754e-05, + "loss": 1.703, + "step": 274000 + }, + { + "epoch": 1.07, + "learning_rate": 3.214752582267713e-05, + "loss": 1.6993, + "step": 274500 + }, + { + "epoch": 1.07, + "learning_rate": 3.211500765477672e-05, + "loss": 1.6958, + "step": 275000 + }, + { + "epoch": 1.08, + "learning_rate": 3.208248948687632e-05, + "loss": 1.6965, + "step": 275500 + }, + { + "epoch": 1.08, + "learning_rate": 3.204997131897591e-05, + "loss": 1.6939, + "step": 276000 + }, + { + "epoch": 1.08, + "learning_rate": 3.20174531510755e-05, + "loss": 1.6792, + "step": 276500 + }, + { + "epoch": 1.08, + "learning_rate": 3.19849349831751e-05, + "loss": 1.6956, + "step": 277000 + }, + { + "epoch": 1.08, + "learning_rate": 3.195241681527469e-05, + "loss": 1.6888, + "step": 277500 + }, + { + "epoch": 1.08, + "learning_rate": 3.191989864737429e-05, + "loss": 1.6803, + "step": 278000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1887380479473883e-05, + "loss": 1.6753, + "step": 278500 + }, + { + "epoch": 1.09, + "learning_rate": 3.185486231157348e-05, + "loss": 1.6731, + "step": 279000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1822344143673074e-05, + "loss": 1.6584, + "step": 279500 + }, + { + "epoch": 1.09, + "learning_rate": 3.1789825975772665e-05, + "loss": 1.6632, + "step": 280000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1757307807872264e-05, + "loss": 1.6526, + "step": 280500 + }, + { + "epoch": 1.1, + "learning_rate": 3.1724789639971855e-05, + "loss": 1.6596, + "step": 281000 + }, + { + "epoch": 1.1, + "learning_rate": 3.169227147207145e-05, + "loss": 1.6466, + "step": 281500 + }, + { + "epoch": 1.1, + "learning_rate": 3.165975330417104e-05, + "loss": 1.6503, + "step": 282000 + }, + { + "epoch": 1.1, + "learning_rate": 3.162723513627063e-05, + "loss": 1.6462, + "step": 282500 + }, + { + "epoch": 1.1, + "learning_rate": 3.159471696837023e-05, + "loss": 1.6459, + "step": 283000 + }, + { + "epoch": 1.11, + "learning_rate": 3.156219880046982e-05, + "loss": 1.6391, + "step": 283500 + }, + { + "epoch": 1.11, + "learning_rate": 3.152968063256942e-05, + "loss": 1.6301, + "step": 284000 + }, + { + "epoch": 1.11, + "learning_rate": 3.149716246466901e-05, + "loss": 1.627, + "step": 284500 + }, + { + "epoch": 1.11, + "learning_rate": 3.146464429676861e-05, + "loss": 1.6331, + "step": 285000 + }, + { + "epoch": 1.11, + "learning_rate": 3.14321261288682e-05, + "loss": 1.6254, + "step": 285500 + }, + { + "epoch": 1.12, + "learning_rate": 3.139960796096779e-05, + "loss": 1.6201, + "step": 286000 + }, + { + "epoch": 1.12, + "learning_rate": 3.136708979306739e-05, + "loss": 1.6213, + "step": 286500 + }, + { + "epoch": 1.12, + "learning_rate": 3.133457162516698e-05, + "loss": 1.6158, + "step": 287000 + }, + { + "epoch": 1.12, + "learning_rate": 3.130205345726658e-05, + "loss": 1.6142, + "step": 287500 + }, + { + "epoch": 1.12, + "learning_rate": 3.126953528936617e-05, + "loss": 1.6048, + "step": 288000 + }, + { + "epoch": 1.13, + "learning_rate": 3.1237017121465764e-05, + "loss": 1.6069, + "step": 288500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1204498953565355e-05, + "loss": 1.601, + "step": 289000 + }, + { + "epoch": 1.13, + "learning_rate": 3.117198078566495e-05, + "loss": 1.6069, + "step": 289500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1139462617764545e-05, + "loss": 1.5992, + "step": 290000 + }, + { + "epoch": 1.13, + "learning_rate": 3.110694444986414e-05, + "loss": 1.6011, + "step": 290500 + }, + { + "epoch": 1.14, + "learning_rate": 3.1074426281963736e-05, + "loss": 1.5995, + "step": 291000 + }, + { + "epoch": 1.14, + "learning_rate": 3.104190811406333e-05, + "loss": 1.5854, + "step": 291500 + }, + { + "epoch": 1.14, + "learning_rate": 3.100938994616292e-05, + "loss": 1.5855, + "step": 292000 + }, + { + "epoch": 1.14, + "learning_rate": 3.097687177826252e-05, + "loss": 1.58, + "step": 292500 + }, + { + "epoch": 1.14, + "learning_rate": 3.094435361036211e-05, + "loss": 1.5733, + "step": 293000 + }, + { + "epoch": 1.15, + "learning_rate": 3.091183544246171e-05, + "loss": 1.5734, + "step": 293500 + }, + { + "epoch": 1.15, + "learning_rate": 3.08793172745613e-05, + "loss": 1.5749, + "step": 294000 + }, + { + "epoch": 1.15, + "learning_rate": 3.08467991066609e-05, + "loss": 1.5793, + "step": 294500 + }, + { + "epoch": 1.15, + "learning_rate": 3.081428093876049e-05, + "loss": 1.57, + "step": 295000 + }, + { + "epoch": 1.15, + "learning_rate": 3.078176277086008e-05, + "loss": 1.559, + "step": 295500 + }, + { + "epoch": 1.16, + "learning_rate": 3.074924460295968e-05, + "loss": 1.5605, + "step": 296000 + }, + { + "epoch": 1.16, + "learning_rate": 3.071672643505927e-05, + "loss": 1.5528, + "step": 296500 + }, + { + "epoch": 1.16, + "learning_rate": 3.068420826715886e-05, + "loss": 1.5572, + "step": 297000 + }, + { + "epoch": 1.16, + "learning_rate": 3.0651690099258454e-05, + "loss": 1.5568, + "step": 297500 + }, + { + "epoch": 1.16, + "learning_rate": 3.061917193135805e-05, + "loss": 1.556, + "step": 298000 + }, + { + "epoch": 1.16, + "learning_rate": 3.0586653763457644e-05, + "loss": 1.5599, + "step": 298500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0554135595557236e-05, + "loss": 1.5531, + "step": 299000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0521617427656834e-05, + "loss": 1.5557, + "step": 299500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0489099259756426e-05, + "loss": 1.5438, + "step": 300000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0456581091856024e-05, + "loss": 1.5526, + "step": 300500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0424062923955616e-05, + "loss": 1.5352, + "step": 301000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0391544756055208e-05, + "loss": 1.5359, + "step": 301500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0359026588154803e-05, + "loss": 1.5354, + "step": 302000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0326508420254394e-05, + "loss": 1.5295, + "step": 302500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0293990252353993e-05, + "loss": 1.5296, + "step": 303000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0261472084453584e-05, + "loss": 1.5247, + "step": 303500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0228953916553183e-05, + "loss": 1.5268, + "step": 304000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0196435748652774e-05, + "loss": 1.5214, + "step": 304500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0163917580752366e-05, + "loss": 1.5246, + "step": 305000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0131399412851964e-05, + "loss": 1.5274, + "step": 305500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0098881244951556e-05, + "loss": 1.5078, + "step": 306000 + }, + { + "epoch": 1.2, + "learning_rate": 3.006636307705115e-05, + "loss": 1.5175, + "step": 306500 + }, + { + "epoch": 1.2, + "learning_rate": 3.0033844909150743e-05, + "loss": 1.5166, + "step": 307000 + }, + { + "epoch": 1.2, + "learning_rate": 3.000132674125034e-05, + "loss": 1.5126, + "step": 307500 + }, + { + "epoch": 1.2, + "learning_rate": 2.9968808573349933e-05, + "loss": 1.5188, + "step": 308000 + }, + { + "epoch": 1.2, + "learning_rate": 2.9936290405449524e-05, + "loss": 1.5015, + "step": 308500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9903772237549123e-05, + "loss": 1.505, + "step": 309000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9871254069648715e-05, + "loss": 1.4986, + "step": 309500 + }, + { + "epoch": 1.21, + "learning_rate": 2.983873590174831e-05, + "loss": 1.4994, + "step": 310000 + }, + { + "epoch": 1.21, + "learning_rate": 2.98062177338479e-05, + "loss": 1.4982, + "step": 310500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9773699565947493e-05, + "loss": 1.4874, + "step": 311000 + }, + { + "epoch": 1.22, + "learning_rate": 2.974118139804709e-05, + "loss": 1.4915, + "step": 311500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9708663230146683e-05, + "loss": 1.4996, + "step": 312000 + }, + { + "epoch": 1.22, + "learning_rate": 2.967614506224628e-05, + "loss": 1.4948, + "step": 312500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9643626894345873e-05, + "loss": 1.4842, + "step": 313000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9611108726445468e-05, + "loss": 1.4798, + "step": 313500 + }, + { + "epoch": 1.23, + "learning_rate": 2.957859055854506e-05, + "loss": 1.491, + "step": 314000 + }, + { + "epoch": 1.23, + "learning_rate": 2.954607239064465e-05, + "loss": 1.4821, + "step": 314500 + }, + { + "epoch": 1.23, + "learning_rate": 2.951355422274425e-05, + "loss": 1.4841, + "step": 315000 + }, + { + "epoch": 1.23, + "learning_rate": 2.948103605484384e-05, + "loss": 1.4746, + "step": 315500 + }, + { + "epoch": 1.23, + "learning_rate": 2.944851788694344e-05, + "loss": 1.4767, + "step": 316000 + }, + { + "epoch": 1.24, + "learning_rate": 2.941599971904303e-05, + "loss": 1.4775, + "step": 316500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9383481551142623e-05, + "loss": 1.4777, + "step": 317000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9350963383242218e-05, + "loss": 1.4718, + "step": 317500 + }, + { + "epoch": 1.24, + "learning_rate": 2.931844521534181e-05, + "loss": 1.4654, + "step": 318000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9285927047441408e-05, + "loss": 1.4667, + "step": 318500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9253408879541e-05, + "loss": 1.4718, + "step": 319000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9220890711640598e-05, + "loss": 1.4644, + "step": 319500 + }, + { + "epoch": 1.25, + "learning_rate": 2.918837254374019e-05, + "loss": 1.4597, + "step": 320000 + }, + { + "epoch": 1.25, + "learning_rate": 2.915585437583978e-05, + "loss": 1.4532, + "step": 320500 + }, + { + "epoch": 1.25, + "learning_rate": 2.9123336207939377e-05, + "loss": 1.4635, + "step": 321000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9090818040038968e-05, + "loss": 1.4501, + "step": 321500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9058299872138567e-05, + "loss": 1.4521, + "step": 322000 + }, + { + "epoch": 1.26, + "learning_rate": 2.9025781704238158e-05, + "loss": 1.4588, + "step": 322500 + }, + { + "epoch": 1.26, + "learning_rate": 2.8993263536337757e-05, + "loss": 1.4562, + "step": 323000 + }, + { + "epoch": 1.26, + "learning_rate": 2.896074536843735e-05, + "loss": 1.4555, + "step": 323500 + }, + { + "epoch": 1.26, + "learning_rate": 2.892822720053694e-05, + "loss": 1.4483, + "step": 324000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8895709032636535e-05, + "loss": 1.4548, + "step": 324500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8863190864736127e-05, + "loss": 1.4491, + "step": 325000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8830672696835725e-05, + "loss": 1.4398, + "step": 325500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8798154528935317e-05, + "loss": 1.4409, + "step": 326000 + }, + { + "epoch": 1.27, + "learning_rate": 2.876563636103491e-05, + "loss": 1.4472, + "step": 326500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8733118193134507e-05, + "loss": 1.4316, + "step": 327000 + }, + { + "epoch": 1.28, + "learning_rate": 2.87006000252341e-05, + "loss": 1.4255, + "step": 327500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8668081857333693e-05, + "loss": 1.4308, + "step": 328000 + }, + { + "epoch": 1.28, + "learning_rate": 2.863556368943329e-05, + "loss": 1.4355, + "step": 328500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8603045521532884e-05, + "loss": 1.4294, + "step": 329000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8570527353632475e-05, + "loss": 1.4267, + "step": 329500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8538009185732067e-05, + "loss": 1.4333, + "step": 330000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8505491017831665e-05, + "loss": 1.4254, + "step": 330500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8472972849931257e-05, + "loss": 1.4281, + "step": 331000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8440454682030855e-05, + "loss": 1.426, + "step": 331500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8407936514130447e-05, + "loss": 1.4294, + "step": 332000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8375418346230042e-05, + "loss": 1.4208, + "step": 332500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8342900178329634e-05, + "loss": 1.421, + "step": 333000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8310382010429225e-05, + "loss": 1.4226, + "step": 333500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8277863842528824e-05, + "loss": 1.417, + "step": 334000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8245345674628415e-05, + "loss": 1.4166, + "step": 334500 + }, + { + "epoch": 1.31, + "learning_rate": 2.8212827506728014e-05, + "loss": 1.4207, + "step": 335000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8180309338827605e-05, + "loss": 1.408, + "step": 335500 + }, + { + "epoch": 1.31, + "learning_rate": 2.8147791170927197e-05, + "loss": 1.412, + "step": 336000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8115273003026792e-05, + "loss": 1.412, + "step": 336500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8082754835126384e-05, + "loss": 1.4137, + "step": 337000 + }, + { + "epoch": 1.32, + "learning_rate": 2.8050236667225982e-05, + "loss": 1.4032, + "step": 337500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8017718499325574e-05, + "loss": 1.4022, + "step": 338000 + }, + { + "epoch": 1.32, + "learning_rate": 2.7985200331425172e-05, + "loss": 1.4038, + "step": 338500 + }, + { + "epoch": 1.32, + "learning_rate": 2.7952682163524764e-05, + "loss": 1.3994, + "step": 339000 + }, + { + "epoch": 1.32, + "learning_rate": 2.7920163995624355e-05, + "loss": 1.3996, + "step": 339500 + }, + { + "epoch": 1.33, + "learning_rate": 2.788764582772395e-05, + "loss": 1.3937, + "step": 340000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7855127659823542e-05, + "loss": 1.3994, + "step": 340500 + }, + { + "epoch": 1.33, + "learning_rate": 2.782260949192314e-05, + "loss": 1.4008, + "step": 341000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7790091324022732e-05, + "loss": 1.399, + "step": 341500 + }, + { + "epoch": 1.33, + "learning_rate": 2.775757315612233e-05, + "loss": 1.398, + "step": 342000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7725054988221922e-05, + "loss": 1.3928, + "step": 342500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7692536820321514e-05, + "loss": 1.3855, + "step": 343000 + }, + { + "epoch": 1.34, + "learning_rate": 2.766001865242111e-05, + "loss": 1.394, + "step": 343500 + }, + { + "epoch": 1.34, + "learning_rate": 2.76275004845207e-05, + "loss": 1.3863, + "step": 344000 + }, + { + "epoch": 1.34, + "learning_rate": 2.75949823166203e-05, + "loss": 1.3915, + "step": 344500 + }, + { + "epoch": 1.35, + "learning_rate": 2.756246414871989e-05, + "loss": 1.392, + "step": 345000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7529945980819482e-05, + "loss": 1.389, + "step": 345500 + }, + { + "epoch": 1.35, + "learning_rate": 2.749742781291908e-05, + "loss": 1.3836, + "step": 346000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7464909645018672e-05, + "loss": 1.3852, + "step": 346500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7432391477118267e-05, + "loss": 1.3736, + "step": 347000 + }, + { + "epoch": 1.36, + "learning_rate": 2.739987330921786e-05, + "loss": 1.3818, + "step": 347500 + }, + { + "epoch": 1.36, + "learning_rate": 2.7367355141317457e-05, + "loss": 1.3816, + "step": 348000 + }, + { + "epoch": 1.36, + "learning_rate": 2.733483697341705e-05, + "loss": 1.3766, + "step": 348500 + }, + { + "epoch": 1.36, + "learning_rate": 2.730231880551664e-05, + "loss": 1.3835, + "step": 349000 + }, + { + "epoch": 1.36, + "learning_rate": 2.726980063761624e-05, + "loss": 1.3843, + "step": 349500 + }, + { + "epoch": 1.37, + "learning_rate": 2.723728246971583e-05, + "loss": 1.3723, + "step": 350000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7204764301815426e-05, + "loss": 1.3802, + "step": 350500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7172246133915018e-05, + "loss": 1.3747, + "step": 351000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7139727966014616e-05, + "loss": 1.3662, + "step": 351500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7107209798114208e-05, + "loss": 1.3712, + "step": 352000 + }, + { + "epoch": 1.38, + "learning_rate": 2.70746916302138e-05, + "loss": 1.3805, + "step": 352500 + }, + { + "epoch": 1.38, + "learning_rate": 2.7042173462313398e-05, + "loss": 1.3647, + "step": 353000 + }, + { + "epoch": 1.38, + "learning_rate": 2.700965529441299e-05, + "loss": 1.3731, + "step": 353500 + }, + { + "epoch": 1.38, + "learning_rate": 2.6977137126512588e-05, + "loss": 1.3663, + "step": 354000 + }, + { + "epoch": 1.38, + "learning_rate": 2.694461895861218e-05, + "loss": 1.3559, + "step": 354500 + }, + { + "epoch": 1.39, + "learning_rate": 2.691210079071177e-05, + "loss": 1.3642, + "step": 355000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6879582622811366e-05, + "loss": 1.3679, + "step": 355500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6847064454910958e-05, + "loss": 1.3576, + "step": 356000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6814546287010556e-05, + "loss": 1.3536, + "step": 356500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6782028119110148e-05, + "loss": 1.3635, + "step": 357000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6749509951209746e-05, + "loss": 1.3622, + "step": 357500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6716991783309338e-05, + "loss": 1.3594, + "step": 358000 + }, + { + "epoch": 1.4, + "learning_rate": 2.668447361540893e-05, + "loss": 1.3559, + "step": 358500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6651955447508525e-05, + "loss": 1.351, + "step": 359000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6619437279608116e-05, + "loss": 1.3582, + "step": 359500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6586919111707715e-05, + "loss": 1.3567, + "step": 360000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6554400943807306e-05, + "loss": 1.3417, + "step": 360500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6521882775906898e-05, + "loss": 1.3467, + "step": 361000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6489364608006496e-05, + "loss": 1.3623, + "step": 361500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6456846440106088e-05, + "loss": 1.3495, + "step": 362000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6424328272205683e-05, + "loss": 1.3514, + "step": 362500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6391810104305275e-05, + "loss": 1.342, + "step": 363000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6359291936404873e-05, + "loss": 1.3363, + "step": 363500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6326773768504465e-05, + "loss": 1.3467, + "step": 364000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6294255600604056e-05, + "loss": 1.345, + "step": 364500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6261737432703655e-05, + "loss": 1.3486, + "step": 365000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6229219264803246e-05, + "loss": 1.3505, + "step": 365500 + }, + { + "epoch": 1.43, + "learning_rate": 2.619670109690284e-05, + "loss": 1.3397, + "step": 366000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6164182929002433e-05, + "loss": 1.3381, + "step": 366500 + }, + { + "epoch": 1.43, + "learning_rate": 2.613166476110203e-05, + "loss": 1.3367, + "step": 367000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6099146593201623e-05, + "loss": 1.3375, + "step": 367500 + }, + { + "epoch": 1.44, + "learning_rate": 2.6066628425301215e-05, + "loss": 1.3405, + "step": 368000 + }, + { + "epoch": 1.44, + "learning_rate": 2.6034110257400813e-05, + "loss": 1.3384, + "step": 368500 + }, + { + "epoch": 1.44, + "learning_rate": 2.6001592089500405e-05, + "loss": 1.3357, + "step": 369000 + }, + { + "epoch": 1.44, + "learning_rate": 2.59690739216e-05, + "loss": 1.3349, + "step": 369500 + }, + { + "epoch": 1.44, + "learning_rate": 2.593655575369959e-05, + "loss": 1.3386, + "step": 370000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5904037585799183e-05, + "loss": 1.3269, + "step": 370500 + }, + { + "epoch": 1.45, + "learning_rate": 2.587151941789878e-05, + "loss": 1.3368, + "step": 371000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5839001249998373e-05, + "loss": 1.3378, + "step": 371500 + }, + { + "epoch": 1.45, + "learning_rate": 2.580648308209797e-05, + "loss": 1.3308, + "step": 372000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5773964914197563e-05, + "loss": 1.3222, + "step": 372500 + }, + { + "epoch": 1.46, + "learning_rate": 2.574144674629716e-05, + "loss": 1.3313, + "step": 373000 + }, + { + "epoch": 1.46, + "learning_rate": 2.570892857839675e-05, + "loss": 1.3274, + "step": 373500 + }, + { + "epoch": 1.46, + "learning_rate": 2.567641041049634e-05, + "loss": 1.3282, + "step": 374000 + }, + { + "epoch": 1.46, + "learning_rate": 2.564389224259594e-05, + "loss": 1.3198, + "step": 374500 + }, + { + "epoch": 1.46, + "learning_rate": 2.561137407469553e-05, + "loss": 1.319, + "step": 375000 + }, + { + "epoch": 1.47, + "learning_rate": 2.557885590679513e-05, + "loss": 1.3279, + "step": 375500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5546337738894722e-05, + "loss": 1.3206, + "step": 376000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5513819570994317e-05, + "loss": 1.3237, + "step": 376500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5481301403093912e-05, + "loss": 1.317, + "step": 377000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5448783235193503e-05, + "loss": 1.3168, + "step": 377500 + }, + { + "epoch": 1.48, + "learning_rate": 2.54162650672931e-05, + "loss": 1.3219, + "step": 378000 + }, + { + "epoch": 1.48, + "learning_rate": 2.538374689939269e-05, + "loss": 1.316, + "step": 378500 + }, + { + "epoch": 1.48, + "learning_rate": 2.535122873149229e-05, + "loss": 1.3099, + "step": 379000 + }, + { + "epoch": 1.48, + "learning_rate": 2.531871056359188e-05, + "loss": 1.3184, + "step": 379500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5286192395691472e-05, + "loss": 1.32, + "step": 380000 + }, + { + "epoch": 1.48, + "learning_rate": 2.525367422779107e-05, + "loss": 1.3118, + "step": 380500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5221156059890662e-05, + "loss": 1.301, + "step": 381000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5188637891990257e-05, + "loss": 1.3119, + "step": 381500 + }, + { + "epoch": 1.49, + "learning_rate": 2.515611972408985e-05, + "loss": 1.3115, + "step": 382000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5123601556189447e-05, + "loss": 1.3155, + "step": 382500 + }, + { + "epoch": 1.49, + "learning_rate": 2.509108338828904e-05, + "loss": 1.318, + "step": 383000 + }, + { + "epoch": 1.5, + "learning_rate": 2.505856522038863e-05, + "loss": 1.3038, + "step": 383500 + }, + { + "epoch": 1.5, + "learning_rate": 2.502604705248823e-05, + "loss": 1.3007, + "step": 384000 + }, + { + "epoch": 1.5, + "learning_rate": 2.499352888458782e-05, + "loss": 1.3096, + "step": 384500 + }, + { + "epoch": 1.5, + "learning_rate": 2.4961010716687412e-05, + "loss": 1.2961, + "step": 385000 + }, + { + "epoch": 1.5, + "learning_rate": 2.4928492548787007e-05, + "loss": 1.3118, + "step": 385500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4895974380886602e-05, + "loss": 1.3076, + "step": 386000 + }, + { + "epoch": 1.51, + "learning_rate": 2.4863456212986197e-05, + "loss": 1.3118, + "step": 386500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4830938045085792e-05, + "loss": 1.2965, + "step": 387000 + }, + { + "epoch": 1.51, + "learning_rate": 2.4798419877185387e-05, + "loss": 1.302, + "step": 387500 + }, + { + "epoch": 1.51, + "learning_rate": 2.476590170928498e-05, + "loss": 1.3042, + "step": 388000 + }, + { + "epoch": 1.52, + "learning_rate": 2.473338354138457e-05, + "loss": 1.3032, + "step": 388500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4700865373484165e-05, + "loss": 1.3031, + "step": 389000 + }, + { + "epoch": 1.52, + "learning_rate": 2.466834720558376e-05, + "loss": 1.2966, + "step": 389500 + }, + { + "epoch": 1.52, + "learning_rate": 2.4635829037683356e-05, + "loss": 1.3008, + "step": 390000 + }, + { + "epoch": 1.52, + "learning_rate": 2.460331086978295e-05, + "loss": 1.2896, + "step": 390500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4570792701882542e-05, + "loss": 1.2999, + "step": 391000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4538274533982137e-05, + "loss": 1.3017, + "step": 391500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4505756366081732e-05, + "loss": 1.2969, + "step": 392000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4473238198181324e-05, + "loss": 1.297, + "step": 392500 + }, + { + "epoch": 1.53, + "learning_rate": 2.444072003028092e-05, + "loss": 1.2959, + "step": 393000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4408201862380514e-05, + "loss": 1.3034, + "step": 393500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4375683694480106e-05, + "loss": 1.285, + "step": 394000 + }, + { + "epoch": 1.54, + "learning_rate": 2.43431655265797e-05, + "loss": 1.2913, + "step": 394500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4310647358679296e-05, + "loss": 1.2806, + "step": 395000 + }, + { + "epoch": 1.54, + "learning_rate": 2.427812919077889e-05, + "loss": 1.2842, + "step": 395500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4245611022878482e-05, + "loss": 1.2776, + "step": 396000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4213092854978077e-05, + "loss": 1.2867, + "step": 396500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4180574687077672e-05, + "loss": 1.2906, + "step": 397000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4148056519177264e-05, + "loss": 1.2907, + "step": 397500 + }, + { + "epoch": 1.55, + "learning_rate": 2.411553835127686e-05, + "loss": 1.2786, + "step": 398000 + }, + { + "epoch": 1.56, + "learning_rate": 2.4083020183376454e-05, + "loss": 1.2943, + "step": 398500 + }, + { + "epoch": 1.56, + "learning_rate": 2.405050201547605e-05, + "loss": 1.2758, + "step": 399000 + }, + { + "epoch": 1.56, + "learning_rate": 2.401798384757564e-05, + "loss": 1.2839, + "step": 399500 + }, + { + "epoch": 1.56, + "learning_rate": 2.3985465679675236e-05, + "loss": 1.2819, + "step": 400000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3952947511774828e-05, + "loss": 1.2791, + "step": 400500 + }, + { + "epoch": 1.56, + "learning_rate": 2.3920429343874423e-05, + "loss": 1.2753, + "step": 401000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3887911175974018e-05, + "loss": 1.2791, + "step": 401500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3855393008073613e-05, + "loss": 1.2691, + "step": 402000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3822874840173208e-05, + "loss": 1.278, + "step": 402500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3790356672272803e-05, + "loss": 1.2769, + "step": 403000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3757838504372394e-05, + "loss": 1.282, + "step": 403500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3725320336471986e-05, + "loss": 1.272, + "step": 404000 + }, + { + "epoch": 1.58, + "learning_rate": 2.369280216857158e-05, + "loss": 1.2694, + "step": 404500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3660284000671176e-05, + "loss": 1.2681, + "step": 405000 + }, + { + "epoch": 1.58, + "learning_rate": 2.362776583277077e-05, + "loss": 1.2774, + "step": 405500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3595247664870366e-05, + "loss": 1.2685, + "step": 406000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3562729496969958e-05, + "loss": 1.2703, + "step": 406500 + }, + { + "epoch": 1.59, + "learning_rate": 2.3530211329069553e-05, + "loss": 1.277, + "step": 407000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3497693161169144e-05, + "loss": 1.2666, + "step": 407500 + }, + { + "epoch": 1.59, + "learning_rate": 2.346517499326874e-05, + "loss": 1.2656, + "step": 408000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3432656825368335e-05, + "loss": 1.2666, + "step": 408500 + }, + { + "epoch": 1.6, + "learning_rate": 2.340013865746793e-05, + "loss": 1.2639, + "step": 409000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3367620489567525e-05, + "loss": 1.2686, + "step": 409500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3335102321667116e-05, + "loss": 1.2689, + "step": 410000 + }, + { + "epoch": 1.6, + "learning_rate": 2.330258415376671e-05, + "loss": 1.2643, + "step": 410500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3270065985866303e-05, + "loss": 1.2647, + "step": 411000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3237547817965898e-05, + "loss": 1.2546, + "step": 411500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3205029650065493e-05, + "loss": 1.2731, + "step": 412000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3172511482165088e-05, + "loss": 1.2624, + "step": 412500 + }, + { + "epoch": 1.61, + "learning_rate": 2.313999331426468e-05, + "loss": 1.2578, + "step": 413000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3107475146364275e-05, + "loss": 1.2609, + "step": 413500 + }, + { + "epoch": 1.62, + "learning_rate": 2.307495697846387e-05, + "loss": 1.2538, + "step": 414000 + }, + { + "epoch": 1.62, + "learning_rate": 2.3042438810563465e-05, + "loss": 1.258, + "step": 414500 + }, + { + "epoch": 1.62, + "learning_rate": 2.3009920642663056e-05, + "loss": 1.2518, + "step": 415000 + }, + { + "epoch": 1.62, + "learning_rate": 2.297740247476265e-05, + "loss": 1.2589, + "step": 415500 + }, + { + "epoch": 1.62, + "learning_rate": 2.2944884306862243e-05, + "loss": 1.2574, + "step": 416000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2912366138961838e-05, + "loss": 1.2568, + "step": 416500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2879847971061433e-05, + "loss": 1.2668, + "step": 417000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2847329803161028e-05, + "loss": 1.2583, + "step": 417500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2814811635260623e-05, + "loss": 1.2515, + "step": 418000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2782293467360215e-05, + "loss": 1.2536, + "step": 418500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2749775299459806e-05, + "loss": 1.2568, + "step": 419000 + }, + { + "epoch": 1.64, + "learning_rate": 2.27172571315594e-05, + "loss": 1.2572, + "step": 419500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2684738963658997e-05, + "loss": 1.2556, + "step": 420000 + }, + { + "epoch": 1.64, + "learning_rate": 2.265222079575859e-05, + "loss": 1.2529, + "step": 420500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2619702627858187e-05, + "loss": 1.2514, + "step": 421000 + }, + { + "epoch": 1.64, + "learning_rate": 2.258718445995778e-05, + "loss": 1.2515, + "step": 421500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2554666292057373e-05, + "loss": 1.2482, + "step": 422000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2522148124156965e-05, + "loss": 1.25, + "step": 422500 + }, + { + "epoch": 1.65, + "learning_rate": 2.248962995625656e-05, + "loss": 1.2518, + "step": 423000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2457111788356155e-05, + "loss": 1.2528, + "step": 423500 + }, + { + "epoch": 1.65, + "learning_rate": 2.242459362045575e-05, + "loss": 1.2491, + "step": 424000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2392075452555345e-05, + "loss": 1.2438, + "step": 424500 + }, + { + "epoch": 1.66, + "learning_rate": 2.235955728465494e-05, + "loss": 1.2541, + "step": 425000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2327039116754532e-05, + "loss": 1.2482, + "step": 425500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2294520948854127e-05, + "loss": 1.26, + "step": 426000 + }, + { + "epoch": 1.66, + "learning_rate": 2.226200278095372e-05, + "loss": 1.2491, + "step": 426500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2229484613053313e-05, + "loss": 1.2455, + "step": 427000 + }, + { + "epoch": 1.67, + "learning_rate": 2.219696644515291e-05, + "loss": 1.2415, + "step": 427500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2164448277252504e-05, + "loss": 1.2429, + "step": 428000 + }, + { + "epoch": 1.67, + "learning_rate": 2.2131930109352095e-05, + "loss": 1.2357, + "step": 428500 + }, + { + "epoch": 1.67, + "learning_rate": 2.209941194145169e-05, + "loss": 1.2391, + "step": 429000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2066893773551285e-05, + "loss": 1.234, + "step": 429500 + }, + { + "epoch": 1.68, + "learning_rate": 2.2034375605650877e-05, + "loss": 1.2468, + "step": 430000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2001857437750472e-05, + "loss": 1.2346, + "step": 430500 + }, + { + "epoch": 1.68, + "learning_rate": 2.1969339269850067e-05, + "loss": 1.2501, + "step": 431000 + }, + { + "epoch": 1.68, + "learning_rate": 2.1936821101949662e-05, + "loss": 1.2325, + "step": 431500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1904302934049254e-05, + "loss": 1.2407, + "step": 432000 + }, + { + "epoch": 1.69, + "learning_rate": 2.187178476614885e-05, + "loss": 1.2381, + "step": 432500 + }, + { + "epoch": 1.69, + "learning_rate": 2.1839266598248444e-05, + "loss": 1.239, + "step": 433000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1806748430348035e-05, + "loss": 1.2338, + "step": 433500 + }, + { + "epoch": 1.69, + "learning_rate": 2.177423026244763e-05, + "loss": 1.2336, + "step": 434000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1741712094547225e-05, + "loss": 1.2403, + "step": 434500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1709193926646817e-05, + "loss": 1.2359, + "step": 435000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1676675758746412e-05, + "loss": 1.2263, + "step": 435500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1644157590846007e-05, + "loss": 1.2363, + "step": 436000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1611639422945602e-05, + "loss": 1.2334, + "step": 436500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1579121255045194e-05, + "loss": 1.2317, + "step": 437000 + }, + { + "epoch": 1.71, + "learning_rate": 2.154660308714479e-05, + "loss": 1.2421, + "step": 437500 + }, + { + "epoch": 1.71, + "learning_rate": 2.151408491924438e-05, + "loss": 1.2329, + "step": 438000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1481566751343975e-05, + "loss": 1.2237, + "step": 438500 + }, + { + "epoch": 1.71, + "learning_rate": 2.144904858344357e-05, + "loss": 1.2316, + "step": 439000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1416530415543166e-05, + "loss": 1.2239, + "step": 439500 + }, + { + "epoch": 1.72, + "learning_rate": 2.138401224764276e-05, + "loss": 1.2344, + "step": 440000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1351494079742356e-05, + "loss": 1.2243, + "step": 440500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1318975911841947e-05, + "loss": 1.2329, + "step": 441000 + }, + { + "epoch": 1.72, + "learning_rate": 2.128645774394154e-05, + "loss": 1.2228, + "step": 441500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1253939576041134e-05, + "loss": 1.2296, + "step": 442000 + }, + { + "epoch": 1.73, + "learning_rate": 2.122142140814073e-05, + "loss": 1.22, + "step": 442500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1188903240240324e-05, + "loss": 1.2317, + "step": 443000 + }, + { + "epoch": 1.73, + "learning_rate": 2.115638507233992e-05, + "loss": 1.2299, + "step": 443500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1123866904439514e-05, + "loss": 1.2361, + "step": 444000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1091348736539106e-05, + "loss": 1.2282, + "step": 444500 + }, + { + "epoch": 1.74, + "learning_rate": 2.1058830568638697e-05, + "loss": 1.2263, + "step": 445000 + }, + { + "epoch": 1.74, + "learning_rate": 2.1026312400738292e-05, + "loss": 1.218, + "step": 445500 + }, + { + "epoch": 1.74, + "learning_rate": 2.0993794232837887e-05, + "loss": 1.228, + "step": 446000 + }, + { + "epoch": 1.74, + "learning_rate": 2.0961276064937482e-05, + "loss": 1.2248, + "step": 446500 + }, + { + "epoch": 1.74, + "learning_rate": 2.0928757897037077e-05, + "loss": 1.2275, + "step": 447000 + }, + { + "epoch": 1.75, + "learning_rate": 2.089623972913667e-05, + "loss": 1.2223, + "step": 447500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0863721561236264e-05, + "loss": 1.2224, + "step": 448000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0831203393335856e-05, + "loss": 1.2217, + "step": 448500 + }, + { + "epoch": 1.75, + "learning_rate": 2.079868522543545e-05, + "loss": 1.2208, + "step": 449000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0766167057535046e-05, + "loss": 1.217, + "step": 449500 + }, + { + "epoch": 1.76, + "learning_rate": 2.073364888963464e-05, + "loss": 1.214, + "step": 450000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0701130721734233e-05, + "loss": 1.2142, + "step": 450500 + }, + { + "epoch": 1.76, + "learning_rate": 2.0668612553833828e-05, + "loss": 1.2072, + "step": 451000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0636094385933423e-05, + "loss": 1.2121, + "step": 451500 + }, + { + "epoch": 1.76, + "learning_rate": 2.0603576218033018e-05, + "loss": 1.2185, + "step": 452000 + }, + { + "epoch": 1.77, + "learning_rate": 2.057105805013261e-05, + "loss": 1.2129, + "step": 452500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0538539882232204e-05, + "loss": 1.2091, + "step": 453000 + }, + { + "epoch": 1.77, + "learning_rate": 2.05060217143318e-05, + "loss": 1.2087, + "step": 453500 + }, + { + "epoch": 1.77, + "learning_rate": 2.047350354643139e-05, + "loss": 1.2247, + "step": 454000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0440985378530986e-05, + "loss": 1.2131, + "step": 454500 + }, + { + "epoch": 1.78, + "learning_rate": 2.040846721063058e-05, + "loss": 1.2172, + "step": 455000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0375949042730176e-05, + "loss": 1.2044, + "step": 455500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0343430874829768e-05, + "loss": 1.2159, + "step": 456000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0310912706929363e-05, + "loss": 1.2087, + "step": 456500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0278394539028954e-05, + "loss": 1.2067, + "step": 457000 + }, + { + "epoch": 1.79, + "learning_rate": 2.024587637112855e-05, + "loss": 1.1992, + "step": 457500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0213358203228144e-05, + "loss": 1.202, + "step": 458000 + }, + { + "epoch": 1.79, + "learning_rate": 2.018084003532774e-05, + "loss": 1.2078, + "step": 458500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0148321867427335e-05, + "loss": 1.2087, + "step": 459000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0115803699526926e-05, + "loss": 1.2119, + "step": 459500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0083285531626518e-05, + "loss": 1.2056, + "step": 460000 + }, + { + "epoch": 1.8, + "learning_rate": 2.0050767363726113e-05, + "loss": 1.1988, + "step": 460500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0018249195825708e-05, + "loss": 1.2137, + "step": 461000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9985731027925303e-05, + "loss": 1.2125, + "step": 461500 + }, + { + "epoch": 1.8, + "learning_rate": 1.9953212860024898e-05, + "loss": 1.2043, + "step": 462000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9920694692124493e-05, + "loss": 1.2074, + "step": 462500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9888176524224085e-05, + "loss": 1.2036, + "step": 463000 + }, + { + "epoch": 1.81, + "learning_rate": 1.985565835632368e-05, + "loss": 1.2005, + "step": 463500 + }, + { + "epoch": 1.81, + "learning_rate": 1.982314018842327e-05, + "loss": 1.2057, + "step": 464000 + }, + { + "epoch": 1.81, + "learning_rate": 1.9790622020522866e-05, + "loss": 1.2043, + "step": 464500 + }, + { + "epoch": 1.81, + "learning_rate": 1.975810385262246e-05, + "loss": 1.2036, + "step": 465000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9725585684722056e-05, + "loss": 1.2022, + "step": 465500 + }, + { + "epoch": 1.82, + "learning_rate": 1.969306751682165e-05, + "loss": 1.2003, + "step": 466000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9660549348921243e-05, + "loss": 1.1992, + "step": 466500 + }, + { + "epoch": 1.82, + "learning_rate": 1.9628031181020838e-05, + "loss": 1.2011, + "step": 467000 + }, + { + "epoch": 1.82, + "learning_rate": 1.959551301312043e-05, + "loss": 1.1997, + "step": 467500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9562994845220025e-05, + "loss": 1.2033, + "step": 468000 + }, + { + "epoch": 1.83, + "learning_rate": 1.953047667731962e-05, + "loss": 1.199, + "step": 468500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9497958509419215e-05, + "loss": 1.1972, + "step": 469000 + }, + { + "epoch": 1.83, + "learning_rate": 1.9465440341518807e-05, + "loss": 1.1979, + "step": 469500 + }, + { + "epoch": 1.83, + "learning_rate": 1.94329221736184e-05, + "loss": 1.2027, + "step": 470000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9400404005717997e-05, + "loss": 1.1959, + "step": 470500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9367885837817588e-05, + "loss": 1.1962, + "step": 471000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9335367669917183e-05, + "loss": 1.1978, + "step": 471500 + }, + { + "epoch": 1.84, + "learning_rate": 1.930284950201678e-05, + "loss": 1.2043, + "step": 472000 + }, + { + "epoch": 1.84, + "learning_rate": 1.927033133411637e-05, + "loss": 1.1901, + "step": 472500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9237813166215965e-05, + "loss": 1.2003, + "step": 473000 + }, + { + "epoch": 1.85, + "learning_rate": 1.920529499831556e-05, + "loss": 1.1963, + "step": 473500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9172776830415155e-05, + "loss": 1.197, + "step": 474000 + }, + { + "epoch": 1.85, + "learning_rate": 1.914025866251475e-05, + "loss": 1.1969, + "step": 474500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9107740494614342e-05, + "loss": 1.1896, + "step": 475000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9075222326713933e-05, + "loss": 1.1928, + "step": 475500 + }, + { + "epoch": 1.86, + "learning_rate": 1.904270415881353e-05, + "loss": 1.1887, + "step": 476000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9010185990913123e-05, + "loss": 1.1895, + "step": 476500 + }, + { + "epoch": 1.86, + "learning_rate": 1.897766782301272e-05, + "loss": 1.1926, + "step": 477000 + }, + { + "epoch": 1.86, + "learning_rate": 1.8945149655112314e-05, + "loss": 1.1861, + "step": 477500 + }, + { + "epoch": 1.87, + "learning_rate": 1.891263148721191e-05, + "loss": 1.2007, + "step": 478000 + }, + { + "epoch": 1.87, + "learning_rate": 1.88801133193115e-05, + "loss": 1.1893, + "step": 478500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8847595151411092e-05, + "loss": 1.1894, + "step": 479000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8815076983510687e-05, + "loss": 1.1954, + "step": 479500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8782558815610282e-05, + "loss": 1.186, + "step": 480000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8750040647709877e-05, + "loss": 1.1876, + "step": 480500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8717522479809472e-05, + "loss": 1.19, + "step": 481000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8685004311909067e-05, + "loss": 1.1834, + "step": 481500 + }, + { + "epoch": 1.88, + "learning_rate": 1.865248614400866e-05, + "loss": 1.1836, + "step": 482000 + }, + { + "epoch": 1.88, + "learning_rate": 1.861996797610825e-05, + "loss": 1.1895, + "step": 482500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8587449808207845e-05, + "loss": 1.1894, + "step": 483000 + }, + { + "epoch": 1.89, + "learning_rate": 1.855493164030744e-05, + "loss": 1.1848, + "step": 483500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8522413472407035e-05, + "loss": 1.1855, + "step": 484000 + }, + { + "epoch": 1.89, + "learning_rate": 1.848989530450663e-05, + "loss": 1.1856, + "step": 484500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8457377136606222e-05, + "loss": 1.1802, + "step": 485000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8424858968705817e-05, + "loss": 1.1805, + "step": 485500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8392340800805412e-05, + "loss": 1.1837, + "step": 486000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8359822632905004e-05, + "loss": 1.1772, + "step": 486500 + }, + { + "epoch": 1.9, + "learning_rate": 1.83273044650046e-05, + "loss": 1.1849, + "step": 487000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8294786297104194e-05, + "loss": 1.1802, + "step": 487500 + }, + { + "epoch": 1.9, + "learning_rate": 1.826226812920379e-05, + "loss": 1.1749, + "step": 488000 + }, + { + "epoch": 1.91, + "learning_rate": 1.822974996130338e-05, + "loss": 1.1786, + "step": 488500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8197231793402976e-05, + "loss": 1.182, + "step": 489000 + }, + { + "epoch": 1.91, + "learning_rate": 1.816471362550257e-05, + "loss": 1.1811, + "step": 489500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8132195457602162e-05, + "loss": 1.1782, + "step": 490000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8099677289701757e-05, + "loss": 1.1785, + "step": 490500 + }, + { + "epoch": 1.92, + "learning_rate": 1.8067159121801352e-05, + "loss": 1.1802, + "step": 491000 + }, + { + "epoch": 1.92, + "learning_rate": 1.8034640953900944e-05, + "loss": 1.1783, + "step": 491500 + }, + { + "epoch": 1.92, + "learning_rate": 1.800212278600054e-05, + "loss": 1.1862, + "step": 492000 + }, + { + "epoch": 1.92, + "learning_rate": 1.7969604618100134e-05, + "loss": 1.1749, + "step": 492500 + }, + { + "epoch": 1.92, + "learning_rate": 1.793708645019973e-05, + "loss": 1.1816, + "step": 493000 + }, + { + "epoch": 1.93, + "learning_rate": 1.790456828229932e-05, + "loss": 1.175, + "step": 493500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7872050114398916e-05, + "loss": 1.178, + "step": 494000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7839531946498507e-05, + "loss": 1.1739, + "step": 494500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7807013778598102e-05, + "loss": 1.1811, + "step": 495000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7774495610697697e-05, + "loss": 1.1812, + "step": 495500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7741977442797292e-05, + "loss": 1.1772, + "step": 496000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7709459274896887e-05, + "loss": 1.1761, + "step": 496500 + }, + { + "epoch": 1.94, + "learning_rate": 1.767694110699648e-05, + "loss": 1.1731, + "step": 497000 + }, + { + "epoch": 1.94, + "learning_rate": 1.764442293909607e-05, + "loss": 1.1655, + "step": 497500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7611904771195666e-05, + "loss": 1.1715, + "step": 498000 + }, + { + "epoch": 1.95, + "learning_rate": 1.757938660329526e-05, + "loss": 1.1777, + "step": 498500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7546868435394856e-05, + "loss": 1.172, + "step": 499000 + }, + { + "epoch": 1.95, + "learning_rate": 1.751435026749445e-05, + "loss": 1.1716, + "step": 499500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7481832099594046e-05, + "loss": 1.1733, + "step": 500000 + }, + { + "epoch": 1.95, + "learning_rate": 1.744931393169364e-05, + "loss": 1.1745, + "step": 500500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7416795763793233e-05, + "loss": 1.1722, + "step": 501000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7384277595892824e-05, + "loss": 1.1628, + "step": 501500 + }, + { + "epoch": 1.96, + "learning_rate": 1.735175942799242e-05, + "loss": 1.1659, + "step": 502000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7319241260092014e-05, + "loss": 1.1742, + "step": 502500 + }, + { + "epoch": 1.96, + "learning_rate": 1.728672309219161e-05, + "loss": 1.1674, + "step": 503000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7254204924291204e-05, + "loss": 1.1704, + "step": 503500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7221686756390796e-05, + "loss": 1.164, + "step": 504000 + }, + { + "epoch": 1.97, + "learning_rate": 1.718916858849039e-05, + "loss": 1.1759, + "step": 504500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7156650420589983e-05, + "loss": 1.1658, + "step": 505000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7124132252689578e-05, + "loss": 1.164, + "step": 505500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7091614084789173e-05, + "loss": 1.1667, + "step": 506000 + }, + { + "epoch": 1.98, + "learning_rate": 1.7059095916888768e-05, + "loss": 1.1729, + "step": 506500 + }, + { + "epoch": 1.98, + "learning_rate": 1.702657774898836e-05, + "loss": 1.1668, + "step": 507000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6994059581087954e-05, + "loss": 1.1709, + "step": 507500 + }, + { + "epoch": 1.98, + "learning_rate": 1.696154141318755e-05, + "loss": 1.1647, + "step": 508000 + }, + { + "epoch": 1.98, + "learning_rate": 1.692902324528714e-05, + "loss": 1.1599, + "step": 508500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6896505077386736e-05, + "loss": 1.1652, + "step": 509000 + }, + { + "epoch": 1.99, + "learning_rate": 1.686398690948633e-05, + "loss": 1.1587, + "step": 509500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6831468741585926e-05, + "loss": 1.1691, + "step": 510000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6798950573685518e-05, + "loss": 1.1643, + "step": 510500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6766432405785113e-05, + "loss": 1.1606, + "step": 511000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6733914237884708e-05, + "loss": 1.1571, + "step": 511500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6701396069984303e-05, + "loss": 1.1628, + "step": 512000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6668877902083895e-05, + "loss": 1.1619, + "step": 512500 + }, + { + "epoch": 2.0, + "learning_rate": 1.663635973418349e-05, + "loss": 1.1597, + "step": 513000 + }, + { + "epoch": 2.0, + "learning_rate": 1.660384156628308e-05, + "loss": 1.1605, + "step": 513500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6571323398382676e-05, + "loss": 1.1593, + "step": 514000 + }, + { + "epoch": 2.01, + "learning_rate": 1.653880523048227e-05, + "loss": 1.167, + "step": 514500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6506287062581866e-05, + "loss": 1.1579, + "step": 515000 + }, + { + "epoch": 2.01, + "learning_rate": 1.647376889468146e-05, + "loss": 1.1611, + "step": 515500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6441250726781053e-05, + "loss": 1.1498, + "step": 516000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6408732558880645e-05, + "loss": 1.1566, + "step": 516500 + }, + { + "epoch": 2.02, + "learning_rate": 1.637621439098024e-05, + "loss": 1.1596, + "step": 517000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6343696223079835e-05, + "loss": 1.1514, + "step": 517500 + }, + { + "epoch": 2.02, + "learning_rate": 1.631117805517943e-05, + "loss": 1.1478, + "step": 518000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6278659887279025e-05, + "loss": 1.1547, + "step": 518500 + }, + { + "epoch": 2.03, + "learning_rate": 1.624614171937862e-05, + "loss": 1.1621, + "step": 519000 + }, + { + "epoch": 2.03, + "learning_rate": 1.621362355147821e-05, + "loss": 1.1557, + "step": 519500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6181105383577803e-05, + "loss": 1.1481, + "step": 520000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6148587215677398e-05, + "loss": 1.1573, + "step": 520500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6116069047776993e-05, + "loss": 1.1651, + "step": 521000 + }, + { + "epoch": 2.03, + "learning_rate": 1.608355087987659e-05, + "loss": 1.1524, + "step": 521500 + }, + { + "epoch": 2.04, + "learning_rate": 1.6051032711976183e-05, + "loss": 1.151, + "step": 522000 + }, + { + "epoch": 2.04, + "learning_rate": 1.601851454407578e-05, + "loss": 1.1594, + "step": 522500 + }, + { + "epoch": 2.04, + "learning_rate": 1.598599637617537e-05, + "loss": 1.1506, + "step": 523000 + }, + { + "epoch": 2.04, + "learning_rate": 1.5953478208274965e-05, + "loss": 1.1606, + "step": 523500 + }, + { + "epoch": 2.04, + "learning_rate": 1.5920960040374557e-05, + "loss": 1.1546, + "step": 524000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5888441872474152e-05, + "loss": 1.1559, + "step": 524500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5855923704573747e-05, + "loss": 1.1504, + "step": 525000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5823405536673342e-05, + "loss": 1.1538, + "step": 525500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5790887368772933e-05, + "loss": 1.1498, + "step": 526000 + }, + { + "epoch": 2.05, + "learning_rate": 1.575836920087253e-05, + "loss": 1.1576, + "step": 526500 + }, + { + "epoch": 2.06, + "learning_rate": 1.5725851032972124e-05, + "loss": 1.1574, + "step": 527000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5693332865071715e-05, + "loss": 1.1434, + "step": 527500 + }, + { + "epoch": 2.06, + "learning_rate": 1.566081469717131e-05, + "loss": 1.1451, + "step": 528000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5628296529270905e-05, + "loss": 1.1555, + "step": 528500 + }, + { + "epoch": 2.06, + "learning_rate": 1.5595778361370497e-05, + "loss": 1.1537, + "step": 529000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5563260193470092e-05, + "loss": 1.153, + "step": 529500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5530742025569687e-05, + "loss": 1.1526, + "step": 530000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5498223857669282e-05, + "loss": 1.1589, + "step": 530500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5465705689768874e-05, + "loss": 1.1471, + "step": 531000 + }, + { + "epoch": 2.07, + "learning_rate": 1.543318752186847e-05, + "loss": 1.1536, + "step": 531500 + }, + { + "epoch": 2.08, + "learning_rate": 1.540066935396806e-05, + "loss": 1.1498, + "step": 532000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5368151186067655e-05, + "loss": 1.1515, + "step": 532500 + }, + { + "epoch": 2.08, + "learning_rate": 1.533563301816725e-05, + "loss": 1.1455, + "step": 533000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5303114850266845e-05, + "loss": 1.1417, + "step": 533500 + }, + { + "epoch": 2.08, + "learning_rate": 1.527059668236644e-05, + "loss": 1.1472, + "step": 534000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5238078514466034e-05, + "loss": 1.1419, + "step": 534500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5205560346565629e-05, + "loss": 1.1446, + "step": 535000 + }, + { + "epoch": 2.09, + "learning_rate": 1.517304217866522e-05, + "loss": 1.1467, + "step": 535500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5140524010764814e-05, + "loss": 1.1511, + "step": 536000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5108005842864409e-05, + "loss": 1.1476, + "step": 536500 + }, + { + "epoch": 2.1, + "learning_rate": 1.5075487674964004e-05, + "loss": 1.1456, + "step": 537000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5042969507063597e-05, + "loss": 1.146, + "step": 537500 + }, + { + "epoch": 2.1, + "learning_rate": 1.5010451339163192e-05, + "loss": 1.1491, + "step": 538000 + }, + { + "epoch": 2.1, + "learning_rate": 1.4977933171262784e-05, + "loss": 1.1476, + "step": 538500 + }, + { + "epoch": 2.1, + "learning_rate": 1.4945415003362379e-05, + "loss": 1.1413, + "step": 539000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4912896835461972e-05, + "loss": 1.1439, + "step": 539500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4880378667561567e-05, + "loss": 1.1463, + "step": 540000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4847860499661162e-05, + "loss": 1.1464, + "step": 540500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4815342331760756e-05, + "loss": 1.1368, + "step": 541000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4782824163860349e-05, + "loss": 1.1439, + "step": 541500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4750305995959942e-05, + "loss": 1.1426, + "step": 542000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4717787828059537e-05, + "loss": 1.1357, + "step": 542500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4685269660159132e-05, + "loss": 1.1489, + "step": 543000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4652751492258726e-05, + "loss": 1.1383, + "step": 543500 + }, + { + "epoch": 2.12, + "learning_rate": 1.462023332435832e-05, + "loss": 1.1401, + "step": 544000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4587715156457916e-05, + "loss": 1.1387, + "step": 544500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4555196988557507e-05, + "loss": 1.1428, + "step": 545000 + }, + { + "epoch": 2.13, + "learning_rate": 1.45226788206571e-05, + "loss": 1.1404, + "step": 545500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4490160652756696e-05, + "loss": 1.1366, + "step": 546000 + }, + { + "epoch": 2.13, + "learning_rate": 1.445764248485629e-05, + "loss": 1.1338, + "step": 546500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4425124316955884e-05, + "loss": 1.1323, + "step": 547000 + }, + { + "epoch": 2.14, + "learning_rate": 1.439260614905548e-05, + "loss": 1.1413, + "step": 547500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4360087981155071e-05, + "loss": 1.1357, + "step": 548000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4327569813254666e-05, + "loss": 1.1352, + "step": 548500 + }, + { + "epoch": 2.14, + "learning_rate": 1.429505164535426e-05, + "loss": 1.1406, + "step": 549000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4262533477453854e-05, + "loss": 1.1359, + "step": 549500 + }, + { + "epoch": 2.15, + "learning_rate": 1.423001530955345e-05, + "loss": 1.1319, + "step": 550000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4197497141653043e-05, + "loss": 1.1335, + "step": 550500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4164978973752634e-05, + "loss": 1.1293, + "step": 551000 + }, + { + "epoch": 2.15, + "learning_rate": 1.413246080585223e-05, + "loss": 1.1322, + "step": 551500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4099942637951824e-05, + "loss": 1.135, + "step": 552000 + }, + { + "epoch": 2.16, + "learning_rate": 1.4067424470051418e-05, + "loss": 1.1328, + "step": 552500 + }, + { + "epoch": 2.16, + "learning_rate": 1.4034906302151013e-05, + "loss": 1.1359, + "step": 553000 + }, + { + "epoch": 2.16, + "learning_rate": 1.4002388134250608e-05, + "loss": 1.1387, + "step": 553500 + }, + { + "epoch": 2.16, + "learning_rate": 1.39698699663502e-05, + "loss": 1.1368, + "step": 554000 + }, + { + "epoch": 2.16, + "learning_rate": 1.3937351798449794e-05, + "loss": 1.1342, + "step": 554500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3904833630549388e-05, + "loss": 1.1309, + "step": 555000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3872315462648983e-05, + "loss": 1.1427, + "step": 555500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3839797294748578e-05, + "loss": 1.1333, + "step": 556000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3807279126848171e-05, + "loss": 1.1328, + "step": 556500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3774760958947766e-05, + "loss": 1.1394, + "step": 557000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3742242791047358e-05, + "loss": 1.1368, + "step": 557500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3709724623146953e-05, + "loss": 1.1344, + "step": 558000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3677206455246546e-05, + "loss": 1.1375, + "step": 558500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3644688287346141e-05, + "loss": 1.1308, + "step": 559000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3612170119445736e-05, + "loss": 1.1339, + "step": 559500 + }, + { + "epoch": 2.19, + "learning_rate": 1.357965195154533e-05, + "loss": 1.1252, + "step": 560000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3547133783644921e-05, + "loss": 1.1296, + "step": 560500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3514615615744516e-05, + "loss": 1.1386, + "step": 561000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3482097447844111e-05, + "loss": 1.1379, + "step": 561500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3449579279943705e-05, + "loss": 1.1262, + "step": 562000 + }, + { + "epoch": 2.19, + "learning_rate": 1.34170611120433e-05, + "loss": 1.1237, + "step": 562500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3384542944142895e-05, + "loss": 1.1263, + "step": 563000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3352024776242486e-05, + "loss": 1.1255, + "step": 563500 + }, + { + "epoch": 2.2, + "learning_rate": 1.331950660834208e-05, + "loss": 1.1362, + "step": 564000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3286988440441675e-05, + "loss": 1.1202, + "step": 564500 + }, + { + "epoch": 2.2, + "learning_rate": 1.325447027254127e-05, + "loss": 1.1261, + "step": 565000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3221952104640865e-05, + "loss": 1.128, + "step": 565500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3189433936740458e-05, + "loss": 1.1336, + "step": 566000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3156915768840053e-05, + "loss": 1.1263, + "step": 566500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3124397600939645e-05, + "loss": 1.1338, + "step": 567000 + }, + { + "epoch": 2.21, + "learning_rate": 1.309187943303924e-05, + "loss": 1.1248, + "step": 567500 + }, + { + "epoch": 2.22, + "learning_rate": 1.3059361265138833e-05, + "loss": 1.1261, + "step": 568000 + }, + { + "epoch": 2.22, + "learning_rate": 1.3026843097238428e-05, + "loss": 1.1328, + "step": 568500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2994324929338023e-05, + "loss": 1.1213, + "step": 569000 + }, + { + "epoch": 2.22, + "learning_rate": 1.2961806761437617e-05, + "loss": 1.1279, + "step": 569500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2929288593537208e-05, + "loss": 1.1243, + "step": 570000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2896770425636803e-05, + "loss": 1.1201, + "step": 570500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2864252257736398e-05, + "loss": 1.1245, + "step": 571000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2831734089835992e-05, + "loss": 1.1251, + "step": 571500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2799215921935587e-05, + "loss": 1.1285, + "step": 572000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2766697754035182e-05, + "loss": 1.1276, + "step": 572500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2734179586134773e-05, + "loss": 1.1199, + "step": 573000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2701661418234367e-05, + "loss": 1.1215, + "step": 573500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2669143250333962e-05, + "loss": 1.1203, + "step": 574000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2636625082433557e-05, + "loss": 1.1168, + "step": 574500 + }, + { + "epoch": 2.24, + "learning_rate": 1.260410691453315e-05, + "loss": 1.1223, + "step": 575000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2571588746632745e-05, + "loss": 1.117, + "step": 575500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2539070578732337e-05, + "loss": 1.1251, + "step": 576000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2506552410831932e-05, + "loss": 1.1169, + "step": 576500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2474034242931527e-05, + "loss": 1.1183, + "step": 577000 + }, + { + "epoch": 2.25, + "learning_rate": 1.244151607503112e-05, + "loss": 1.1251, + "step": 577500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2408997907130715e-05, + "loss": 1.1188, + "step": 578000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2376479739230309e-05, + "loss": 1.1198, + "step": 578500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2343961571329902e-05, + "loss": 1.1304, + "step": 579000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2311443403429497e-05, + "loss": 1.121, + "step": 579500 + }, + { + "epoch": 2.26, + "learning_rate": 1.227892523552909e-05, + "loss": 1.1215, + "step": 580000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2246407067628685e-05, + "loss": 1.1213, + "step": 580500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2213888899728279e-05, + "loss": 1.1229, + "step": 581000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2181370731827872e-05, + "loss": 1.1167, + "step": 581500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2148852563927467e-05, + "loss": 1.1308, + "step": 582000 + }, + { + "epoch": 2.27, + "learning_rate": 1.211633439602706e-05, + "loss": 1.1189, + "step": 582500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2083816228126654e-05, + "loss": 1.1161, + "step": 583000 + }, + { + "epoch": 2.28, + "learning_rate": 1.2051298060226249e-05, + "loss": 1.1188, + "step": 583500 + }, + { + "epoch": 2.28, + "learning_rate": 1.2018779892325844e-05, + "loss": 1.1157, + "step": 584000 + }, + { + "epoch": 2.28, + "learning_rate": 1.1986261724425437e-05, + "loss": 1.1145, + "step": 584500 + }, + { + "epoch": 2.28, + "learning_rate": 1.195374355652503e-05, + "loss": 1.1179, + "step": 585000 + }, + { + "epoch": 2.28, + "learning_rate": 1.1921225388624625e-05, + "loss": 1.1155, + "step": 585500 + }, + { + "epoch": 2.29, + "learning_rate": 1.188870722072422e-05, + "loss": 1.1277, + "step": 586000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1856189052823812e-05, + "loss": 1.1162, + "step": 586500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1823670884923407e-05, + "loss": 1.122, + "step": 587000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1791152717023002e-05, + "loss": 1.1188, + "step": 587500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1758634549122596e-05, + "loss": 1.1187, + "step": 588000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1726116381222189e-05, + "loss": 1.108, + "step": 588500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1693598213321784e-05, + "loss": 1.1199, + "step": 589000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1661080045421377e-05, + "loss": 1.1125, + "step": 589500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1628561877520972e-05, + "loss": 1.117, + "step": 590000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1596043709620566e-05, + "loss": 1.1159, + "step": 590500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1563525541720159e-05, + "loss": 1.1172, + "step": 591000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1531007373819754e-05, + "loss": 1.1136, + "step": 591500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1498489205919347e-05, + "loss": 1.1146, + "step": 592000 + }, + { + "epoch": 2.31, + "learning_rate": 1.146597103801894e-05, + "loss": 1.111, + "step": 592500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1433452870118536e-05, + "loss": 1.1129, + "step": 593000 + }, + { + "epoch": 2.32, + "learning_rate": 1.140093470221813e-05, + "loss": 1.1069, + "step": 593500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1368416534317722e-05, + "loss": 1.1168, + "step": 594000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1335898366417317e-05, + "loss": 1.1157, + "step": 594500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1303380198516912e-05, + "loss": 1.1111, + "step": 595000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1270862030616506e-05, + "loss": 1.1084, + "step": 595500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1238343862716099e-05, + "loss": 1.1132, + "step": 596000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1205825694815694e-05, + "loss": 1.1139, + "step": 596500 + }, + { + "epoch": 2.33, + "learning_rate": 1.117330752691529e-05, + "loss": 1.11, + "step": 597000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1140789359014883e-05, + "loss": 1.1162, + "step": 597500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1108271191114476e-05, + "loss": 1.11, + "step": 598000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1075753023214071e-05, + "loss": 1.1106, + "step": 598500 + }, + { + "epoch": 2.34, + "learning_rate": 1.1043234855313664e-05, + "loss": 1.1054, + "step": 599000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1010716687413258e-05, + "loss": 1.1091, + "step": 599500 + }, + { + "epoch": 2.34, + "learning_rate": 1.0978198519512853e-05, + "loss": 1.1067, + "step": 600000 + }, + { + "epoch": 2.34, + "learning_rate": 1.0945680351612446e-05, + "loss": 1.1033, + "step": 600500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0913162183712041e-05, + "loss": 1.1133, + "step": 601000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0880644015811634e-05, + "loss": 1.1101, + "step": 601500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0848125847911228e-05, + "loss": 1.1105, + "step": 602000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0815607680010823e-05, + "loss": 1.1083, + "step": 602500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0783089512110418e-05, + "loss": 1.1077, + "step": 603000 + }, + { + "epoch": 2.35, + "learning_rate": 1.075057134421001e-05, + "loss": 1.1051, + "step": 603500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0718053176309604e-05, + "loss": 1.1064, + "step": 604000 + }, + { + "epoch": 2.36, + "learning_rate": 1.06855350084092e-05, + "loss": 1.1054, + "step": 604500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0653016840508793e-05, + "loss": 1.1107, + "step": 605000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0620498672608386e-05, + "loss": 1.1031, + "step": 605500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0587980504707981e-05, + "loss": 1.1097, + "step": 606000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0555462336807576e-05, + "loss": 1.1074, + "step": 606500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0522944168907168e-05, + "loss": 1.1144, + "step": 607000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0490426001006763e-05, + "loss": 1.1106, + "step": 607500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0457907833106358e-05, + "loss": 1.1091, + "step": 608000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0425389665205951e-05, + "loss": 1.1057, + "step": 608500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0392871497305545e-05, + "loss": 1.1066, + "step": 609000 + }, + { + "epoch": 2.38, + "learning_rate": 1.036035332940514e-05, + "loss": 1.108, + "step": 609500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0327835161504733e-05, + "loss": 1.1104, + "step": 610000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0295316993604328e-05, + "loss": 1.1158, + "step": 610500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0262798825703921e-05, + "loss": 1.109, + "step": 611000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0230280657803515e-05, + "loss": 1.1011, + "step": 611500 + }, + { + "epoch": 2.39, + "learning_rate": 1.019776248990311e-05, + "loss": 1.0989, + "step": 612000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0165244322002703e-05, + "loss": 1.109, + "step": 612500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0132726154102296e-05, + "loss": 1.0999, + "step": 613000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0100207986201891e-05, + "loss": 1.1091, + "step": 613500 + }, + { + "epoch": 2.4, + "learning_rate": 1.0067689818301486e-05, + "loss": 1.1033, + "step": 614000 + }, + { + "epoch": 2.4, + "learning_rate": 1.003517165040108e-05, + "loss": 1.1087, + "step": 614500 + }, + { + "epoch": 2.4, + "learning_rate": 1.0002653482500673e-05, + "loss": 1.0964, + "step": 615000 + }, + { + "epoch": 2.4, + "learning_rate": 9.970135314600268e-06, + "loss": 1.0946, + "step": 615500 + }, + { + "epoch": 2.4, + "learning_rate": 9.937617146699861e-06, + "loss": 1.0994, + "step": 616000 + }, + { + "epoch": 2.41, + "learning_rate": 9.905098978799455e-06, + "loss": 1.1078, + "step": 616500 + }, + { + "epoch": 2.41, + "learning_rate": 9.87258081089905e-06, + "loss": 1.101, + "step": 617000 + }, + { + "epoch": 2.41, + "learning_rate": 9.840062642998645e-06, + "loss": 1.1085, + "step": 617500 + }, + { + "epoch": 2.41, + "learning_rate": 9.807544475098238e-06, + "loss": 1.1083, + "step": 618000 + }, + { + "epoch": 2.41, + "learning_rate": 9.775026307197832e-06, + "loss": 1.1069, + "step": 618500 + }, + { + "epoch": 2.42, + "learning_rate": 9.742508139297427e-06, + "loss": 1.1041, + "step": 619000 + }, + { + "epoch": 2.42, + "learning_rate": 9.70998997139702e-06, + "loss": 1.1056, + "step": 619500 + }, + { + "epoch": 2.42, + "learning_rate": 9.677471803496615e-06, + "loss": 1.1013, + "step": 620000 + }, + { + "epoch": 2.42, + "learning_rate": 9.644953635596208e-06, + "loss": 1.1007, + "step": 620500 + }, + { + "epoch": 2.42, + "learning_rate": 9.612435467695802e-06, + "loss": 1.1092, + "step": 621000 + }, + { + "epoch": 2.43, + "learning_rate": 9.579917299795397e-06, + "loss": 1.104, + "step": 621500 + }, + { + "epoch": 2.43, + "learning_rate": 9.54739913189499e-06, + "loss": 1.098, + "step": 622000 + }, + { + "epoch": 2.43, + "learning_rate": 9.514880963994583e-06, + "loss": 1.0999, + "step": 622500 + }, + { + "epoch": 2.43, + "learning_rate": 9.482362796094178e-06, + "loss": 1.0956, + "step": 623000 + }, + { + "epoch": 2.43, + "learning_rate": 9.449844628193773e-06, + "loss": 1.1052, + "step": 623500 + }, + { + "epoch": 2.43, + "learning_rate": 9.417326460293365e-06, + "loss": 1.1057, + "step": 624000 + }, + { + "epoch": 2.44, + "learning_rate": 9.38480829239296e-06, + "loss": 1.0962, + "step": 624500 + }, + { + "epoch": 2.44, + "learning_rate": 9.352290124492555e-06, + "loss": 1.103, + "step": 625000 + }, + { + "epoch": 2.44, + "learning_rate": 9.319771956592148e-06, + "loss": 1.1092, + "step": 625500 + }, + { + "epoch": 2.44, + "learning_rate": 9.287253788691742e-06, + "loss": 1.0881, + "step": 626000 + }, + { + "epoch": 2.44, + "learning_rate": 9.254735620791337e-06, + "loss": 1.0999, + "step": 626500 + }, + { + "epoch": 2.45, + "learning_rate": 9.22221745289093e-06, + "loss": 1.1054, + "step": 627000 + }, + { + "epoch": 2.45, + "learning_rate": 9.189699284990525e-06, + "loss": 1.0941, + "step": 627500 + }, + { + "epoch": 2.45, + "learning_rate": 9.157181117090119e-06, + "loss": 1.1027, + "step": 628000 + }, + { + "epoch": 2.45, + "learning_rate": 9.124662949189712e-06, + "loss": 1.0986, + "step": 628500 + }, + { + "epoch": 2.45, + "learning_rate": 9.092144781289307e-06, + "loss": 1.0928, + "step": 629000 + }, + { + "epoch": 2.46, + "learning_rate": 9.0596266133889e-06, + "loss": 1.0906, + "step": 629500 + }, + { + "epoch": 2.46, + "learning_rate": 9.027108445488495e-06, + "loss": 1.0925, + "step": 630000 + }, + { + "epoch": 2.46, + "learning_rate": 8.994590277588089e-06, + "loss": 1.0926, + "step": 630500 + }, + { + "epoch": 2.46, + "learning_rate": 8.962072109687684e-06, + "loss": 1.0927, + "step": 631000 + }, + { + "epoch": 2.46, + "learning_rate": 8.929553941787277e-06, + "loss": 1.1047, + "step": 631500 + }, + { + "epoch": 2.47, + "learning_rate": 8.89703577388687e-06, + "loss": 1.0932, + "step": 632000 + }, + { + "epoch": 2.47, + "learning_rate": 8.864517605986465e-06, + "loss": 1.099, + "step": 632500 + }, + { + "epoch": 2.47, + "learning_rate": 8.83199943808606e-06, + "loss": 1.0946, + "step": 633000 + }, + { + "epoch": 2.47, + "learning_rate": 8.799481270185652e-06, + "loss": 1.1017, + "step": 633500 + }, + { + "epoch": 2.47, + "learning_rate": 8.766963102285247e-06, + "loss": 1.1008, + "step": 634000 + }, + { + "epoch": 2.48, + "learning_rate": 8.734444934384842e-06, + "loss": 1.0916, + "step": 634500 + }, + { + "epoch": 2.48, + "learning_rate": 8.701926766484435e-06, + "loss": 1.0949, + "step": 635000 + }, + { + "epoch": 2.48, + "learning_rate": 8.669408598584029e-06, + "loss": 1.0958, + "step": 635500 + }, + { + "epoch": 2.48, + "learning_rate": 8.636890430683624e-06, + "loss": 1.1001, + "step": 636000 + }, + { + "epoch": 2.48, + "learning_rate": 8.604372262783217e-06, + "loss": 1.0931, + "step": 636500 + }, + { + "epoch": 2.49, + "learning_rate": 8.57185409488281e-06, + "loss": 1.0975, + "step": 637000 + }, + { + "epoch": 2.49, + "learning_rate": 8.539335926982406e-06, + "loss": 1.1003, + "step": 637500 + }, + { + "epoch": 2.49, + "learning_rate": 8.506817759081999e-06, + "loss": 1.0929, + "step": 638000 + }, + { + "epoch": 2.49, + "learning_rate": 8.474299591181594e-06, + "loss": 1.0987, + "step": 638500 + }, + { + "epoch": 2.49, + "learning_rate": 8.441781423281187e-06, + "loss": 1.0915, + "step": 639000 + }, + { + "epoch": 2.5, + "learning_rate": 8.40926325538078e-06, + "loss": 1.0907, + "step": 639500 + }, + { + "epoch": 2.5, + "learning_rate": 8.376745087480376e-06, + "loss": 1.0959, + "step": 640000 + }, + { + "epoch": 2.5, + "learning_rate": 8.34422691957997e-06, + "loss": 1.094, + "step": 640500 + }, + { + "epoch": 2.5, + "learning_rate": 8.311708751679564e-06, + "loss": 1.0932, + "step": 641000 + }, + { + "epoch": 2.5, + "learning_rate": 8.279190583779157e-06, + "loss": 1.0887, + "step": 641500 + }, + { + "epoch": 2.51, + "learning_rate": 8.246672415878752e-06, + "loss": 1.0996, + "step": 642000 + }, + { + "epoch": 2.51, + "learning_rate": 8.214154247978346e-06, + "loss": 1.0919, + "step": 642500 + }, + { + "epoch": 2.51, + "learning_rate": 8.181636080077939e-06, + "loss": 1.0899, + "step": 643000 + }, + { + "epoch": 2.51, + "learning_rate": 8.149117912177534e-06, + "loss": 1.0899, + "step": 643500 + }, + { + "epoch": 2.51, + "learning_rate": 8.116599744277129e-06, + "loss": 1.0939, + "step": 644000 + }, + { + "epoch": 2.51, + "learning_rate": 8.084081576376722e-06, + "loss": 1.0915, + "step": 644500 + }, + { + "epoch": 2.52, + "learning_rate": 8.051563408476316e-06, + "loss": 1.0901, + "step": 645000 + }, + { + "epoch": 2.52, + "learning_rate": 8.01904524057591e-06, + "loss": 1.0844, + "step": 645500 + }, + { + "epoch": 2.52, + "learning_rate": 7.986527072675504e-06, + "loss": 1.0808, + "step": 646000 + }, + { + "epoch": 2.52, + "learning_rate": 7.954008904775098e-06, + "loss": 1.0843, + "step": 646500 + }, + { + "epoch": 2.52, + "learning_rate": 7.921490736874693e-06, + "loss": 1.0931, + "step": 647000 + }, + { + "epoch": 2.53, + "learning_rate": 7.888972568974286e-06, + "loss": 1.0874, + "step": 647500 + }, + { + "epoch": 2.53, + "learning_rate": 7.856454401073881e-06, + "loss": 1.0893, + "step": 648000 + }, + { + "epoch": 2.53, + "learning_rate": 7.823936233173474e-06, + "loss": 1.0879, + "step": 648500 + }, + { + "epoch": 2.53, + "learning_rate": 7.791418065273068e-06, + "loss": 1.1044, + "step": 649000 + }, + { + "epoch": 2.53, + "learning_rate": 7.758899897372663e-06, + "loss": 1.0898, + "step": 649500 + }, + { + "epoch": 2.54, + "learning_rate": 7.726381729472256e-06, + "loss": 1.0876, + "step": 650000 + }, + { + "epoch": 2.54, + "learning_rate": 7.69386356157185e-06, + "loss": 1.0968, + "step": 650500 + }, + { + "epoch": 2.54, + "learning_rate": 7.661345393671444e-06, + "loss": 1.087, + "step": 651000 + }, + { + "epoch": 2.54, + "learning_rate": 7.6288272257710385e-06, + "loss": 1.0867, + "step": 651500 + }, + { + "epoch": 2.54, + "learning_rate": 7.5963090578706336e-06, + "loss": 1.0834, + "step": 652000 + }, + { + "epoch": 2.55, + "learning_rate": 7.563790889970226e-06, + "loss": 1.0882, + "step": 652500 + }, + { + "epoch": 2.55, + "learning_rate": 7.531272722069821e-06, + "loss": 1.0883, + "step": 653000 + }, + { + "epoch": 2.55, + "learning_rate": 7.498754554169415e-06, + "loss": 1.0811, + "step": 653500 + }, + { + "epoch": 2.55, + "learning_rate": 7.466236386269009e-06, + "loss": 1.0852, + "step": 654000 + }, + { + "epoch": 2.55, + "learning_rate": 7.433718218368603e-06, + "loss": 1.0924, + "step": 654500 + }, + { + "epoch": 2.56, + "learning_rate": 7.401200050468198e-06, + "loss": 1.0938, + "step": 655000 + }, + { + "epoch": 2.56, + "learning_rate": 7.36868188256779e-06, + "loss": 1.0888, + "step": 655500 + }, + { + "epoch": 2.56, + "learning_rate": 7.336163714667385e-06, + "loss": 1.0905, + "step": 656000 + }, + { + "epoch": 2.56, + "learning_rate": 7.3036455467669795e-06, + "loss": 1.0882, + "step": 656500 + }, + { + "epoch": 2.56, + "learning_rate": 7.271127378866573e-06, + "loss": 1.0871, + "step": 657000 + }, + { + "epoch": 2.57, + "learning_rate": 7.238609210966167e-06, + "loss": 1.0855, + "step": 657500 + }, + { + "epoch": 2.57, + "learning_rate": 7.206091043065761e-06, + "loss": 1.0888, + "step": 658000 + }, + { + "epoch": 2.57, + "learning_rate": 7.173572875165355e-06, + "loss": 1.0819, + "step": 658500 + }, + { + "epoch": 2.57, + "learning_rate": 7.141054707264949e-06, + "loss": 1.089, + "step": 659000 + }, + { + "epoch": 2.57, + "learning_rate": 7.108536539364544e-06, + "loss": 1.0899, + "step": 659500 + }, + { + "epoch": 2.58, + "learning_rate": 7.076018371464137e-06, + "loss": 1.0841, + "step": 660000 + }, + { + "epoch": 2.58, + "learning_rate": 7.043500203563731e-06, + "loss": 1.0852, + "step": 660500 + }, + { + "epoch": 2.58, + "learning_rate": 7.0109820356633255e-06, + "loss": 1.0779, + "step": 661000 + }, + { + "epoch": 2.58, + "learning_rate": 6.978463867762919e-06, + "loss": 1.084, + "step": 661500 + }, + { + "epoch": 2.58, + "learning_rate": 6.945945699862513e-06, + "loss": 1.0835, + "step": 662000 + }, + { + "epoch": 2.59, + "learning_rate": 6.913427531962108e-06, + "loss": 1.0893, + "step": 662500 + }, + { + "epoch": 2.59, + "learning_rate": 6.880909364061702e-06, + "loss": 1.0897, + "step": 663000 + }, + { + "epoch": 2.59, + "learning_rate": 6.848391196161296e-06, + "loss": 1.0888, + "step": 663500 + }, + { + "epoch": 2.59, + "learning_rate": 6.81587302826089e-06, + "loss": 1.0907, + "step": 664000 + }, + { + "epoch": 2.59, + "learning_rate": 6.783354860360484e-06, + "loss": 1.0827, + "step": 664500 + }, + { + "epoch": 2.59, + "learning_rate": 6.750836692460077e-06, + "loss": 1.0874, + "step": 665000 + }, + { + "epoch": 2.6, + "learning_rate": 6.718318524559672e-06, + "loss": 1.0859, + "step": 665500 + }, + { + "epoch": 2.6, + "learning_rate": 6.6858003566592665e-06, + "loss": 1.0768, + "step": 666000 + }, + { + "epoch": 2.6, + "learning_rate": 6.65328218875886e-06, + "loss": 1.084, + "step": 666500 + }, + { + "epoch": 2.6, + "learning_rate": 6.620764020858454e-06, + "loss": 1.0814, + "step": 667000 + }, + { + "epoch": 2.6, + "learning_rate": 6.588245852958048e-06, + "loss": 1.0858, + "step": 667500 + }, + { + "epoch": 2.61, + "learning_rate": 6.5557276850576416e-06, + "loss": 1.0886, + "step": 668000 + }, + { + "epoch": 2.61, + "learning_rate": 6.523209517157236e-06, + "loss": 1.077, + "step": 668500 + }, + { + "epoch": 2.61, + "learning_rate": 6.490691349256831e-06, + "loss": 1.0803, + "step": 669000 + }, + { + "epoch": 2.61, + "learning_rate": 6.458173181356423e-06, + "loss": 1.0757, + "step": 669500 + }, + { + "epoch": 2.61, + "learning_rate": 6.425655013456018e-06, + "loss": 1.0778, + "step": 670000 + }, + { + "epoch": 2.62, + "learning_rate": 6.3931368455556125e-06, + "loss": 1.0805, + "step": 670500 + }, + { + "epoch": 2.62, + "learning_rate": 6.360618677655206e-06, + "loss": 1.0889, + "step": 671000 + }, + { + "epoch": 2.62, + "learning_rate": 6.3281005097548e-06, + "loss": 1.0801, + "step": 671500 + }, + { + "epoch": 2.62, + "learning_rate": 6.295582341854395e-06, + "loss": 1.0801, + "step": 672000 + }, + { + "epoch": 2.62, + "learning_rate": 6.2630641739539876e-06, + "loss": 1.0843, + "step": 672500 + }, + { + "epoch": 2.63, + "learning_rate": 6.230546006053583e-06, + "loss": 1.0856, + "step": 673000 + }, + { + "epoch": 2.63, + "learning_rate": 6.198027838153177e-06, + "loss": 1.0809, + "step": 673500 + }, + { + "epoch": 2.63, + "learning_rate": 6.16550967025277e-06, + "loss": 1.0775, + "step": 674000 + }, + { + "epoch": 2.63, + "learning_rate": 6.132991502352364e-06, + "loss": 1.0819, + "step": 674500 + }, + { + "epoch": 2.63, + "learning_rate": 6.1004733344519585e-06, + "loss": 1.0753, + "step": 675000 + }, + { + "epoch": 2.64, + "learning_rate": 6.067955166551553e-06, + "loss": 1.0901, + "step": 675500 + }, + { + "epoch": 2.64, + "learning_rate": 6.035436998651146e-06, + "loss": 1.0796, + "step": 676000 + }, + { + "epoch": 2.64, + "learning_rate": 6.002918830750741e-06, + "loss": 1.0775, + "step": 676500 + }, + { + "epoch": 2.64, + "learning_rate": 5.970400662850334e-06, + "loss": 1.0789, + "step": 677000 + }, + { + "epoch": 2.64, + "learning_rate": 5.937882494949929e-06, + "loss": 1.0898, + "step": 677500 + }, + { + "epoch": 2.65, + "learning_rate": 5.905364327049523e-06, + "loss": 1.0753, + "step": 678000 + }, + { + "epoch": 2.65, + "learning_rate": 5.872846159149117e-06, + "loss": 1.0732, + "step": 678500 + }, + { + "epoch": 2.65, + "learning_rate": 5.840327991248711e-06, + "loss": 1.0776, + "step": 679000 + }, + { + "epoch": 2.65, + "learning_rate": 5.807809823348305e-06, + "loss": 1.0794, + "step": 679500 + }, + { + "epoch": 2.65, + "learning_rate": 5.775291655447899e-06, + "loss": 1.0765, + "step": 680000 + }, + { + "epoch": 2.66, + "learning_rate": 5.742773487547493e-06, + "loss": 1.0737, + "step": 680500 + }, + { + "epoch": 2.66, + "learning_rate": 5.710255319647087e-06, + "loss": 1.0817, + "step": 681000 + }, + { + "epoch": 2.66, + "learning_rate": 5.677737151746681e-06, + "loss": 1.0755, + "step": 681500 + }, + { + "epoch": 2.66, + "learning_rate": 5.645218983846275e-06, + "loss": 1.0853, + "step": 682000 + }, + { + "epoch": 2.66, + "learning_rate": 5.612700815945869e-06, + "loss": 1.0847, + "step": 682500 + }, + { + "epoch": 2.67, + "learning_rate": 5.580182648045464e-06, + "loss": 1.086, + "step": 683000 + }, + { + "epoch": 2.67, + "learning_rate": 5.547664480145057e-06, + "loss": 1.0752, + "step": 683500 + }, + { + "epoch": 2.67, + "learning_rate": 5.515146312244651e-06, + "loss": 1.0851, + "step": 684000 + }, + { + "epoch": 2.67, + "learning_rate": 5.4826281443442455e-06, + "loss": 1.0743, + "step": 684500 + }, + { + "epoch": 2.67, + "learning_rate": 5.45010997644384e-06, + "loss": 1.0807, + "step": 685000 + }, + { + "epoch": 2.67, + "learning_rate": 5.417591808543433e-06, + "loss": 1.0754, + "step": 685500 + }, + { + "epoch": 2.68, + "learning_rate": 5.385073640643028e-06, + "loss": 1.0702, + "step": 686000 + }, + { + "epoch": 2.68, + "learning_rate": 5.352555472742621e-06, + "loss": 1.0796, + "step": 686500 + }, + { + "epoch": 2.68, + "learning_rate": 5.3200373048422156e-06, + "loss": 1.0742, + "step": 687000 + }, + { + "epoch": 2.68, + "learning_rate": 5.28751913694181e-06, + "loss": 1.0724, + "step": 687500 + }, + { + "epoch": 2.68, + "learning_rate": 5.255000969041404e-06, + "loss": 1.0806, + "step": 688000 + }, + { + "epoch": 2.69, + "learning_rate": 5.222482801140998e-06, + "loss": 1.0784, + "step": 688500 + }, + { + "epoch": 2.69, + "learning_rate": 5.1899646332405914e-06, + "loss": 1.0779, + "step": 689000 + }, + { + "epoch": 2.69, + "learning_rate": 5.157446465340186e-06, + "loss": 1.0747, + "step": 689500 + }, + { + "epoch": 2.69, + "learning_rate": 5.12492829743978e-06, + "loss": 1.0713, + "step": 690000 + }, + { + "epoch": 2.69, + "learning_rate": 5.092410129539374e-06, + "loss": 1.0813, + "step": 690500 + }, + { + "epoch": 2.7, + "learning_rate": 5.059891961638967e-06, + "loss": 1.0787, + "step": 691000 + }, + { + "epoch": 2.7, + "learning_rate": 5.027373793738562e-06, + "loss": 1.0704, + "step": 691500 + }, + { + "epoch": 2.7, + "learning_rate": 4.994855625838156e-06, + "loss": 1.0789, + "step": 692000 + }, + { + "epoch": 2.7, + "learning_rate": 4.96233745793775e-06, + "loss": 1.0808, + "step": 692500 + }, + { + "epoch": 2.7, + "learning_rate": 4.929819290037344e-06, + "loss": 1.077, + "step": 693000 + }, + { + "epoch": 2.71, + "learning_rate": 4.897301122136938e-06, + "loss": 1.0727, + "step": 693500 + }, + { + "epoch": 2.71, + "learning_rate": 4.8647829542365325e-06, + "loss": 1.0761, + "step": 694000 + }, + { + "epoch": 2.71, + "learning_rate": 4.832264786336127e-06, + "loss": 1.072, + "step": 694500 + }, + { + "epoch": 2.71, + "learning_rate": 4.79974661843572e-06, + "loss": 1.077, + "step": 695000 + }, + { + "epoch": 2.71, + "learning_rate": 4.767228450535314e-06, + "loss": 1.0719, + "step": 695500 + }, + { + "epoch": 2.72, + "learning_rate": 4.734710282634908e-06, + "loss": 1.0732, + "step": 696000 + }, + { + "epoch": 2.72, + "learning_rate": 4.7021921147345025e-06, + "loss": 1.0697, + "step": 696500 + }, + { + "epoch": 2.72, + "learning_rate": 4.669673946834097e-06, + "loss": 1.0658, + "step": 697000 + }, + { + "epoch": 2.72, + "learning_rate": 4.63715577893369e-06, + "loss": 1.0835, + "step": 697500 + }, + { + "epoch": 2.72, + "learning_rate": 4.604637611033284e-06, + "loss": 1.0741, + "step": 698000 + }, + { + "epoch": 2.73, + "learning_rate": 4.5721194431328784e-06, + "loss": 1.0666, + "step": 698500 + }, + { + "epoch": 2.73, + "learning_rate": 4.539601275232473e-06, + "loss": 1.0721, + "step": 699000 + }, + { + "epoch": 2.73, + "learning_rate": 4.507083107332067e-06, + "loss": 1.0756, + "step": 699500 + }, + { + "epoch": 2.73, + "learning_rate": 4.474564939431661e-06, + "loss": 1.0732, + "step": 700000 + }, + { + "epoch": 2.73, + "learning_rate": 4.442046771531254e-06, + "loss": 1.0761, + "step": 700500 + }, + { + "epoch": 2.74, + "learning_rate": 4.409528603630849e-06, + "loss": 1.0742, + "step": 701000 + }, + { + "epoch": 2.74, + "learning_rate": 4.377010435730443e-06, + "loss": 1.0779, + "step": 701500 + }, + { + "epoch": 2.74, + "learning_rate": 4.344492267830037e-06, + "loss": 1.072, + "step": 702000 + }, + { + "epoch": 2.74, + "learning_rate": 4.311974099929631e-06, + "loss": 1.0762, + "step": 702500 + }, + { + "epoch": 2.74, + "learning_rate": 4.279455932029225e-06, + "loss": 1.0707, + "step": 703000 + }, + { + "epoch": 2.75, + "learning_rate": 4.246937764128819e-06, + "loss": 1.0772, + "step": 703500 + }, + { + "epoch": 2.75, + "learning_rate": 4.214419596228413e-06, + "loss": 1.0619, + "step": 704000 + }, + { + "epoch": 2.75, + "learning_rate": 4.181901428328007e-06, + "loss": 1.0868, + "step": 704500 + }, + { + "epoch": 2.75, + "learning_rate": 4.149383260427601e-06, + "loss": 1.0695, + "step": 705000 + }, + { + "epoch": 2.75, + "learning_rate": 4.116865092527195e-06, + "loss": 1.0613, + "step": 705500 + }, + { + "epoch": 2.75, + "learning_rate": 4.084346924626789e-06, + "loss": 1.0673, + "step": 706000 + }, + { + "epoch": 2.76, + "learning_rate": 4.051828756726384e-06, + "loss": 1.0672, + "step": 706500 + }, + { + "epoch": 2.76, + "learning_rate": 4.019310588825977e-06, + "loss": 1.067, + "step": 707000 + }, + { + "epoch": 2.76, + "learning_rate": 3.986792420925571e-06, + "loss": 1.0703, + "step": 707500 + }, + { + "epoch": 2.76, + "learning_rate": 3.9542742530251654e-06, + "loss": 1.0661, + "step": 708000 + }, + { + "epoch": 2.76, + "learning_rate": 3.92175608512476e-06, + "loss": 1.0697, + "step": 708500 + }, + { + "epoch": 2.77, + "learning_rate": 3.889237917224353e-06, + "loss": 1.072, + "step": 709000 + }, + { + "epoch": 2.77, + "learning_rate": 3.856719749323948e-06, + "loss": 1.0708, + "step": 709500 + }, + { + "epoch": 2.77, + "learning_rate": 3.824201581423541e-06, + "loss": 1.0715, + "step": 710000 + }, + { + "epoch": 2.77, + "learning_rate": 3.791683413523135e-06, + "loss": 1.0731, + "step": 710500 + }, + { + "epoch": 2.77, + "learning_rate": 3.7591652456227297e-06, + "loss": 1.0731, + "step": 711000 + }, + { + "epoch": 2.78, + "learning_rate": 3.7266470777223235e-06, + "loss": 1.0659, + "step": 711500 + }, + { + "epoch": 2.78, + "learning_rate": 3.694128909821918e-06, + "loss": 1.0744, + "step": 712000 + }, + { + "epoch": 2.78, + "learning_rate": 3.661610741921512e-06, + "loss": 1.0691, + "step": 712500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6290925740211056e-06, + "loss": 1.078, + "step": 713000 + }, + { + "epoch": 2.78, + "learning_rate": 3.5965744061207e-06, + "loss": 1.0713, + "step": 713500 + }, + { + "epoch": 2.79, + "learning_rate": 3.564056238220294e-06, + "loss": 1.0709, + "step": 714000 + }, + { + "epoch": 2.79, + "learning_rate": 3.5315380703198877e-06, + "loss": 1.0647, + "step": 714500 + }, + { + "epoch": 2.79, + "learning_rate": 3.499019902419482e-06, + "loss": 1.0637, + "step": 715000 + }, + { + "epoch": 2.79, + "learning_rate": 3.466501734519076e-06, + "loss": 1.0696, + "step": 715500 + }, + { + "epoch": 2.79, + "learning_rate": 3.43398356661867e-06, + "loss": 1.0796, + "step": 716000 + }, + { + "epoch": 2.8, + "learning_rate": 3.401465398718264e-06, + "loss": 1.0665, + "step": 716500 + }, + { + "epoch": 2.8, + "learning_rate": 3.368947230817858e-06, + "loss": 1.0692, + "step": 717000 + }, + { + "epoch": 2.8, + "learning_rate": 3.3364290629174524e-06, + "loss": 1.0693, + "step": 717500 + }, + { + "epoch": 2.8, + "learning_rate": 3.303910895017046e-06, + "loss": 1.0715, + "step": 718000 + }, + { + "epoch": 2.8, + "learning_rate": 3.27139272711664e-06, + "loss": 1.0652, + "step": 718500 + }, + { + "epoch": 2.81, + "learning_rate": 3.2388745592162346e-06, + "loss": 1.0846, + "step": 719000 + }, + { + "epoch": 2.81, + "learning_rate": 3.2063563913158283e-06, + "loss": 1.0657, + "step": 719500 + }, + { + "epoch": 2.81, + "learning_rate": 3.173838223415422e-06, + "loss": 1.0693, + "step": 720000 + }, + { + "epoch": 2.81, + "learning_rate": 3.1413200555150167e-06, + "loss": 1.0736, + "step": 720500 + }, + { + "epoch": 2.81, + "learning_rate": 3.1088018876146104e-06, + "loss": 1.0636, + "step": 721000 + }, + { + "epoch": 2.82, + "learning_rate": 3.0762837197142046e-06, + "loss": 1.069, + "step": 721500 + }, + { + "epoch": 2.82, + "learning_rate": 3.043765551813799e-06, + "loss": 1.072, + "step": 722000 + }, + { + "epoch": 2.82, + "learning_rate": 3.0112473839133926e-06, + "loss": 1.0663, + "step": 722500 + }, + { + "epoch": 2.82, + "learning_rate": 2.9787292160129868e-06, + "loss": 1.072, + "step": 723000 + }, + { + "epoch": 2.82, + "learning_rate": 2.9462110481125805e-06, + "loss": 1.0611, + "step": 723500 + }, + { + "epoch": 2.83, + "learning_rate": 2.9136928802121747e-06, + "loss": 1.0741, + "step": 724000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8811747123117685e-06, + "loss": 1.0712, + "step": 724500 + }, + { + "epoch": 2.83, + "learning_rate": 2.8486565444113627e-06, + "loss": 1.07, + "step": 725000 + }, + { + "epoch": 2.83, + "learning_rate": 2.816138376510957e-06, + "loss": 1.0655, + "step": 725500 + }, + { + "epoch": 2.83, + "learning_rate": 2.7836202086105506e-06, + "loss": 1.0663, + "step": 726000 + }, + { + "epoch": 2.83, + "learning_rate": 2.751102040710145e-06, + "loss": 1.0648, + "step": 726500 + }, + { + "epoch": 2.84, + "learning_rate": 2.718583872809739e-06, + "loss": 1.068, + "step": 727000 + }, + { + "epoch": 2.84, + "learning_rate": 2.686065704909333e-06, + "loss": 1.0661, + "step": 727500 + }, + { + "epoch": 2.84, + "learning_rate": 2.653547537008927e-06, + "loss": 1.0593, + "step": 728000 + }, + { + "epoch": 2.84, + "learning_rate": 2.621029369108521e-06, + "loss": 1.0652, + "step": 728500 + }, + { + "epoch": 2.84, + "learning_rate": 2.5885112012081153e-06, + "loss": 1.0744, + "step": 729000 + }, + { + "epoch": 2.85, + "learning_rate": 2.5559930333077095e-06, + "loss": 1.0704, + "step": 729500 + }, + { + "epoch": 2.85, + "learning_rate": 2.5234748654073033e-06, + "loss": 1.0706, + "step": 730000 + }, + { + "epoch": 2.85, + "learning_rate": 2.490956697506897e-06, + "loss": 1.0681, + "step": 730500 + }, + { + "epoch": 2.85, + "learning_rate": 2.458438529606491e-06, + "loss": 1.078, + "step": 731000 + }, + { + "epoch": 2.85, + "learning_rate": 2.425920361706085e-06, + "loss": 1.0632, + "step": 731500 + }, + { + "epoch": 2.86, + "learning_rate": 2.393402193805679e-06, + "loss": 1.0671, + "step": 732000 + }, + { + "epoch": 2.86, + "learning_rate": 2.3608840259052733e-06, + "loss": 1.0681, + "step": 732500 + }, + { + "epoch": 2.86, + "learning_rate": 2.3283658580048675e-06, + "loss": 1.0653, + "step": 733000 + }, + { + "epoch": 2.86, + "learning_rate": 2.2958476901044613e-06, + "loss": 1.0697, + "step": 733500 + }, + { + "epoch": 2.86, + "learning_rate": 2.2633295222040555e-06, + "loss": 1.0691, + "step": 734000 + }, + { + "epoch": 2.87, + "learning_rate": 2.2308113543036497e-06, + "loss": 1.0584, + "step": 734500 + }, + { + "epoch": 2.87, + "learning_rate": 2.198293186403244e-06, + "loss": 1.0656, + "step": 735000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1657750185028376e-06, + "loss": 1.0707, + "step": 735500 + }, + { + "epoch": 2.87, + "learning_rate": 2.133256850602432e-06, + "loss": 1.062, + "step": 736000 + }, + { + "epoch": 2.87, + "learning_rate": 2.100738682702026e-06, + "loss": 1.0601, + "step": 736500 + }, + { + "epoch": 2.88, + "learning_rate": 2.0682205148016197e-06, + "loss": 1.0657, + "step": 737000 + }, + { + "epoch": 2.88, + "learning_rate": 2.035702346901214e-06, + "loss": 1.0618, + "step": 737500 + }, + { + "epoch": 2.88, + "learning_rate": 2.0031841790008077e-06, + "loss": 1.0607, + "step": 738000 + }, + { + "epoch": 2.88, + "learning_rate": 1.970666011100402e-06, + "loss": 1.0701, + "step": 738500 + }, + { + "epoch": 2.88, + "learning_rate": 1.9381478431999956e-06, + "loss": 1.0678, + "step": 739000 + }, + { + "epoch": 2.89, + "learning_rate": 1.90562967529959e-06, + "loss": 1.071, + "step": 739500 + }, + { + "epoch": 2.89, + "learning_rate": 1.873111507399184e-06, + "loss": 1.0649, + "step": 740000 + }, + { + "epoch": 2.89, + "learning_rate": 1.8405933394987782e-06, + "loss": 1.0644, + "step": 740500 + }, + { + "epoch": 2.89, + "learning_rate": 1.808075171598372e-06, + "loss": 1.0663, + "step": 741000 + }, + { + "epoch": 2.89, + "learning_rate": 1.7755570036979661e-06, + "loss": 1.0682, + "step": 741500 + }, + { + "epoch": 2.9, + "learning_rate": 1.7430388357975603e-06, + "loss": 1.0641, + "step": 742000 + }, + { + "epoch": 2.9, + "learning_rate": 1.710520667897154e-06, + "loss": 1.0597, + "step": 742500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6780024999967483e-06, + "loss": 1.0585, + "step": 743000 + }, + { + "epoch": 2.9, + "learning_rate": 1.6454843320963425e-06, + "loss": 1.0633, + "step": 743500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6129661641959364e-06, + "loss": 1.0659, + "step": 744000 + }, + { + "epoch": 2.91, + "learning_rate": 1.5804479962955302e-06, + "loss": 1.0535, + "step": 744500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5479298283951244e-06, + "loss": 1.0733, + "step": 745000 + }, + { + "epoch": 2.91, + "learning_rate": 1.5154116604947186e-06, + "loss": 1.0663, + "step": 745500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4828934925943125e-06, + "loss": 1.0696, + "step": 746000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4503753246939065e-06, + "loss": 1.0688, + "step": 746500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4178571567935007e-06, + "loss": 1.0593, + "step": 747000 + }, + { + "epoch": 2.92, + "learning_rate": 1.3853389888930947e-06, + "loss": 1.0683, + "step": 747500 + }, + { + "epoch": 2.92, + "learning_rate": 1.3528208209926887e-06, + "loss": 1.0631, + "step": 748000 + }, + { + "epoch": 2.92, + "learning_rate": 1.3203026530922826e-06, + "loss": 1.0726, + "step": 748500 + }, + { + "epoch": 2.92, + "learning_rate": 1.2877844851918768e-06, + "loss": 1.0675, + "step": 749000 + }, + { + "epoch": 2.92, + "learning_rate": 1.2552663172914708e-06, + "loss": 1.0617, + "step": 749500 + }, + { + "epoch": 2.93, + "learning_rate": 1.222748149391065e-06, + "loss": 1.0617, + "step": 750000 + }, + { + "epoch": 2.93, + "learning_rate": 1.190229981490659e-06, + "loss": 1.0654, + "step": 750500 + }, + { + "epoch": 2.93, + "learning_rate": 1.157711813590253e-06, + "loss": 1.0681, + "step": 751000 + }, + { + "epoch": 2.93, + "learning_rate": 1.1251936456898469e-06, + "loss": 1.0669, + "step": 751500 + }, + { + "epoch": 2.93, + "learning_rate": 1.0926754777894409e-06, + "loss": 1.0634, + "step": 752000 + }, + { + "epoch": 2.94, + "learning_rate": 1.060157309889035e-06, + "loss": 1.0596, + "step": 752500 + }, + { + "epoch": 2.94, + "learning_rate": 1.027639141988629e-06, + "loss": 1.0609, + "step": 753000 + }, + { + "epoch": 2.94, + "learning_rate": 9.951209740882232e-07, + "loss": 1.0641, + "step": 753500 + }, + { + "epoch": 2.94, + "learning_rate": 9.626028061878172e-07, + "loss": 1.0555, + "step": 754000 + }, + { + "epoch": 2.94, + "learning_rate": 9.300846382874113e-07, + "loss": 1.0574, + "step": 754500 + }, + { + "epoch": 2.95, + "learning_rate": 8.975664703870052e-07, + "loss": 1.0654, + "step": 755000 + }, + { + "epoch": 2.95, + "learning_rate": 8.650483024865994e-07, + "loss": 1.0592, + "step": 755500 + }, + { + "epoch": 2.95, + "learning_rate": 8.325301345861933e-07, + "loss": 1.057, + "step": 756000 + }, + { + "epoch": 2.95, + "learning_rate": 8.000119666857873e-07, + "loss": 1.0593, + "step": 756500 + }, + { + "epoch": 2.95, + "learning_rate": 7.674937987853815e-07, + "loss": 1.0655, + "step": 757000 + }, + { + "epoch": 2.96, + "learning_rate": 7.349756308849755e-07, + "loss": 1.071, + "step": 757500 + }, + { + "epoch": 2.96, + "learning_rate": 7.024574629845695e-07, + "loss": 1.0546, + "step": 758000 + }, + { + "epoch": 2.96, + "learning_rate": 6.699392950841635e-07, + "loss": 1.0573, + "step": 758500 + }, + { + "epoch": 2.96, + "learning_rate": 6.374211271837576e-07, + "loss": 1.0652, + "step": 759000 + }, + { + "epoch": 2.96, + "learning_rate": 6.049029592833516e-07, + "loss": 1.065, + "step": 759500 + }, + { + "epoch": 2.97, + "learning_rate": 5.723847913829456e-07, + "loss": 1.071, + "step": 760000 + }, + { + "epoch": 2.97, + "learning_rate": 5.398666234825397e-07, + "loss": 1.0628, + "step": 760500 + }, + { + "epoch": 2.97, + "learning_rate": 5.073484555821338e-07, + "loss": 1.0618, + "step": 761000 + }, + { + "epoch": 2.97, + "learning_rate": 4.748302876817278e-07, + "loss": 1.0604, + "step": 761500 + }, + { + "epoch": 2.97, + "learning_rate": 4.423121197813219e-07, + "loss": 1.0612, + "step": 762000 + }, + { + "epoch": 2.98, + "learning_rate": 4.097939518809159e-07, + "loss": 1.0581, + "step": 762500 + }, + { + "epoch": 2.98, + "learning_rate": 3.7727578398050994e-07, + "loss": 1.0603, + "step": 763000 + }, + { + "epoch": 2.98, + "learning_rate": 3.4475761608010396e-07, + "loss": 1.0605, + "step": 763500 + }, + { + "epoch": 2.98, + "learning_rate": 3.1223944817969804e-07, + "loss": 1.0618, + "step": 764000 + }, + { + "epoch": 2.98, + "learning_rate": 2.7972128027929207e-07, + "loss": 1.064, + "step": 764500 + }, + { + "epoch": 2.99, + "learning_rate": 2.472031123788861e-07, + "loss": 1.0619, + "step": 765000 + }, + { + "epoch": 2.99, + "learning_rate": 2.1468494447848013e-07, + "loss": 1.0642, + "step": 765500 + }, + { + "epoch": 2.99, + "learning_rate": 1.8216677657807418e-07, + "loss": 1.0553, + "step": 766000 + }, + { + "epoch": 2.99, + "learning_rate": 1.4964860867766823e-07, + "loss": 1.0646, + "step": 766500 + }, + { + "epoch": 2.99, + "learning_rate": 1.1713044077726226e-07, + "loss": 1.0582, + "step": 767000 + }, + { + "epoch": 2.99, + "learning_rate": 8.46122728768563e-08, + "loss": 1.057, + "step": 767500 + }, + { + "epoch": 3.0, + "learning_rate": 5.209410497645035e-08, + "loss": 1.0583, + "step": 768000 + }, + { + "epoch": 3.0, + "learning_rate": 1.9575937076044387e-08, + "loss": 1.0635, + "step": 768500 + }, + { + "epoch": 3.0, + "step": 768801, + "total_flos": 6.47787050209493e+18, + "train_loss": 2.538804254183028, + "train_runtime": 274559.0377, + "train_samples_per_second": 89.604, + "train_steps_per_second": 2.8 + } + ], + "max_steps": 768801, + "num_train_epochs": 3, + "total_flos": 6.47787050209493e+18, + "trial_name": null, + "trial_params": null +}