diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,12025 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 74.79454103570761, + "global_step": 200001, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.04, + "learning_rate": 5e-06, + "loss": 10.2769, + "step": 100 + }, + { + "epoch": 0.07, + "learning_rate": 1e-05, + "loss": 9.1431, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 1.5e-05, + "loss": 8.0625, + "step": 300 + }, + { + "epoch": 0.15, + "learning_rate": 2e-05, + "loss": 7.1226, + "step": 400 + }, + { + "epoch": 0.19, + "learning_rate": 2.5e-05, + "loss": 6.6198, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 3e-05, + "loss": 6.393, + "step": 600 + }, + { + "epoch": 0.26, + "learning_rate": 3.5000000000000004e-05, + "loss": 6.2631, + "step": 700 + }, + { + "epoch": 0.3, + "learning_rate": 4e-05, + "loss": 6.1591, + "step": 800 + }, + { + "epoch": 0.34, + "learning_rate": 4.4999999999999996e-05, + "loss": 6.0731, + "step": 900 + }, + { + "epoch": 0.37, + "learning_rate": 5e-05, + "loss": 5.9878, + "step": 1000 + }, + { + "epoch": 0.41, + "learning_rate": 5.5e-05, + "loss": 5.9197, + "step": 1100 + }, + { + "epoch": 0.45, + "learning_rate": 6e-05, + "loss": 5.8554, + "step": 1200 + }, + { + "epoch": 0.49, + "learning_rate": 6.500000000000001e-05, + "loss": 5.7992, + "step": 1300 + }, + { + "epoch": 0.52, + "learning_rate": 7.000000000000001e-05, + "loss": 5.7512, + "step": 1400 + }, + { + "epoch": 0.56, + "learning_rate": 7.5e-05, + "loss": 5.7052, + "step": 1500 + }, + { + "epoch": 0.6, + "learning_rate": 8e-05, + "loss": 5.6635, + "step": 1600 + }, + { + "epoch": 0.64, + "learning_rate": 8.5e-05, + "loss": 5.6218, + "step": 1700 + }, + { + "epoch": 0.67, + "learning_rate": 8.999999999999999e-05, + "loss": 5.5903, + "step": 1800 + }, + { + "epoch": 0.71, + "learning_rate": 9.5e-05, + "loss": 5.5598, + "step": 1900 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001, + "loss": 5.5297, + "step": 2000 + }, + { + "epoch": 0.79, + "learning_rate": 0.000105, + "loss": 5.5019, + "step": 2100 + }, + { + "epoch": 0.82, + "learning_rate": 0.00011, + "loss": 5.4745, + "step": 2200 + }, + { + "epoch": 0.86, + "learning_rate": 0.000115, + "loss": 5.4513, + "step": 2300 + }, + { + "epoch": 0.9, + "learning_rate": 0.00012, + "loss": 5.4302, + "step": 2400 + }, + { + "epoch": 0.93, + "learning_rate": 0.000125, + "loss": 5.408, + "step": 2500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00013000000000000002, + "loss": 5.3916, + "step": 2600 + }, + { + "epoch": 1.01, + "learning_rate": 0.000135, + "loss": 5.4024, + "step": 2700 + }, + { + "epoch": 1.05, + "learning_rate": 0.00014000000000000001, + "loss": 5.3568, + "step": 2800 + }, + { + "epoch": 1.08, + "learning_rate": 0.000145, + "loss": 5.338, + "step": 2900 + }, + { + "epoch": 1.12, + "learning_rate": 0.00015, + "loss": 5.3311, + "step": 3000 + }, + { + "epoch": 1.16, + "learning_rate": 0.000155, + "loss": 5.3137, + "step": 3100 + }, + { + "epoch": 1.2, + "learning_rate": 0.00016, + "loss": 5.2984, + "step": 3200 + }, + { + "epoch": 1.23, + "learning_rate": 0.000165, + "loss": 5.2857, + "step": 3300 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017, + "loss": 5.2744, + "step": 3400 + }, + { + "epoch": 1.31, + "learning_rate": 0.000175, + "loss": 5.2631, + "step": 3500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00017999999999999998, + "loss": 5.2573, + "step": 3600 + }, + { + "epoch": 1.38, + "learning_rate": 0.000185, + "loss": 5.2485, + "step": 3700 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019, + "loss": 5.2358, + "step": 3800 + }, + { + "epoch": 1.46, + "learning_rate": 0.00019500000000000002, + "loss": 5.23, + "step": 3900 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002, + "loss": 5.2214, + "step": 4000 + }, + { + "epoch": 1.53, + "learning_rate": 0.000205, + "loss": 5.2122, + "step": 4100 + }, + { + "epoch": 1.57, + "learning_rate": 0.00021, + "loss": 5.2077, + "step": 4200 + }, + { + "epoch": 1.61, + "learning_rate": 0.000215, + "loss": 5.2, + "step": 4300 + }, + { + "epoch": 1.65, + "learning_rate": 0.00022, + "loss": 5.1948, + "step": 4400 + }, + { + "epoch": 1.68, + "learning_rate": 0.00022500000000000002, + "loss": 5.1821, + "step": 4500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00023, + "loss": 5.1826, + "step": 4600 + }, + { + "epoch": 1.76, + "learning_rate": 0.000235, + "loss": 5.176, + "step": 4700 + }, + { + "epoch": 1.79, + "learning_rate": 0.00024, + "loss": 5.1708, + "step": 4800 + }, + { + "epoch": 1.83, + "learning_rate": 0.000245, + "loss": 5.0859, + "step": 4900 + }, + { + "epoch": 1.87, + "learning_rate": 0.00025, + "loss": 4.9407, + "step": 5000 + }, + { + "epoch": 1.91, + "learning_rate": 0.000255, + "loss": 4.8315, + "step": 5100 + }, + { + "epoch": 1.94, + "learning_rate": 0.00026000000000000003, + "loss": 4.6961, + "step": 5200 + }, + { + "epoch": 1.98, + "learning_rate": 0.00026500000000000004, + "loss": 4.5345, + "step": 5300 + }, + { + "epoch": 2.02, + "learning_rate": 0.00027, + "loss": 4.3013, + "step": 5400 + }, + { + "epoch": 2.06, + "learning_rate": 0.000275, + "loss": 3.8932, + "step": 5500 + }, + { + "epoch": 2.09, + "learning_rate": 0.00028000000000000003, + "loss": 3.5304, + "step": 5600 + }, + { + "epoch": 2.13, + "learning_rate": 0.000285, + "loss": 3.1432, + "step": 5700 + }, + { + "epoch": 2.17, + "learning_rate": 0.00029, + "loss": 2.8713, + "step": 5800 + }, + { + "epoch": 2.21, + "learning_rate": 0.000295, + "loss": 2.6971, + "step": 5900 + }, + { + "epoch": 2.24, + "learning_rate": 0.0003, + "loss": 2.5736, + "step": 6000 + }, + { + "epoch": 2.28, + "learning_rate": 0.000305, + "loss": 2.4816, + "step": 6100 + }, + { + "epoch": 2.32, + "learning_rate": 0.00031, + "loss": 2.4073, + "step": 6200 + }, + { + "epoch": 2.36, + "learning_rate": 0.000315, + "loss": 2.3448, + "step": 6300 + }, + { + "epoch": 2.39, + "learning_rate": 0.00032, + "loss": 2.2917, + "step": 6400 + }, + { + "epoch": 2.43, + "learning_rate": 0.00032500000000000004, + "loss": 2.244, + "step": 6500 + }, + { + "epoch": 2.47, + "learning_rate": 0.00033, + "loss": 2.2047, + "step": 6600 + }, + { + "epoch": 2.51, + "learning_rate": 0.000335, + "loss": 2.1707, + "step": 6700 + }, + { + "epoch": 2.54, + "learning_rate": 0.00034, + "loss": 2.1401, + "step": 6800 + }, + { + "epoch": 2.58, + "learning_rate": 0.000345, + "loss": 2.1109, + "step": 6900 + }, + { + "epoch": 2.62, + "learning_rate": 0.00035, + "loss": 2.079, + "step": 7000 + }, + { + "epoch": 2.66, + "learning_rate": 0.000355, + "loss": 2.0593, + "step": 7100 + }, + { + "epoch": 2.69, + "learning_rate": 0.00035999999999999997, + "loss": 2.0377, + "step": 7200 + }, + { + "epoch": 2.73, + "learning_rate": 0.000365, + "loss": 2.0127, + "step": 7300 + }, + { + "epoch": 2.77, + "learning_rate": 0.00037, + "loss": 1.9964, + "step": 7400 + }, + { + "epoch": 2.8, + "learning_rate": 0.000375, + "loss": 1.9722, + "step": 7500 + }, + { + "epoch": 2.84, + "learning_rate": 0.00038, + "loss": 1.9608, + "step": 7600 + }, + { + "epoch": 2.88, + "learning_rate": 0.00038500000000000003, + "loss": 1.9453, + "step": 7700 + }, + { + "epoch": 2.92, + "learning_rate": 0.00039000000000000005, + "loss": 1.9231, + "step": 7800 + }, + { + "epoch": 2.95, + "learning_rate": 0.000395, + "loss": 1.9114, + "step": 7900 + }, + { + "epoch": 2.99, + "learning_rate": 0.0004, + "loss": 1.9029, + "step": 8000 + }, + { + "epoch": 3.03, + "learning_rate": 0.00040500000000000003, + "loss": 1.8942, + "step": 8100 + }, + { + "epoch": 3.07, + "learning_rate": 0.00041, + "loss": 1.8709, + "step": 8200 + }, + { + "epoch": 3.1, + "learning_rate": 0.000415, + "loss": 1.8589, + "step": 8300 + }, + { + "epoch": 3.14, + "learning_rate": 0.00042, + "loss": 1.8475, + "step": 8400 + }, + { + "epoch": 3.18, + "learning_rate": 0.000425, + "loss": 1.8435, + "step": 8500 + }, + { + "epoch": 3.22, + "learning_rate": 0.00043, + "loss": 1.833, + "step": 8600 + }, + { + "epoch": 3.25, + "learning_rate": 0.000435, + "loss": 1.8249, + "step": 8700 + }, + { + "epoch": 3.29, + "learning_rate": 0.00044, + "loss": 1.811, + "step": 8800 + }, + { + "epoch": 3.33, + "learning_rate": 0.00044500000000000003, + "loss": 1.8013, + "step": 8900 + }, + { + "epoch": 3.37, + "learning_rate": 0.00045000000000000004, + "loss": 1.7965, + "step": 9000 + }, + { + "epoch": 3.4, + "learning_rate": 0.000455, + "loss": 1.791, + "step": 9100 + }, + { + "epoch": 3.44, + "learning_rate": 0.00046, + "loss": 1.7794, + "step": 9200 + }, + { + "epoch": 3.48, + "learning_rate": 0.000465, + "loss": 1.778, + "step": 9300 + }, + { + "epoch": 3.52, + "learning_rate": 0.00047, + "loss": 1.7656, + "step": 9400 + }, + { + "epoch": 3.55, + "learning_rate": 0.000475, + "loss": 1.7596, + "step": 9500 + }, + { + "epoch": 3.59, + "learning_rate": 0.00048, + "loss": 1.7561, + "step": 9600 + }, + { + "epoch": 3.63, + "learning_rate": 0.00048499999999999997, + "loss": 1.7486, + "step": 9700 + }, + { + "epoch": 3.66, + "learning_rate": 0.00049, + "loss": 1.7425, + "step": 9800 + }, + { + "epoch": 3.7, + "learning_rate": 0.000495, + "loss": 1.7347, + "step": 9900 + }, + { + "epoch": 3.74, + "learning_rate": 0.0005, + "loss": 1.7352, + "step": 10000 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004997368421052632, + "loss": 1.7255, + "step": 10100 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004994736842105263, + "loss": 1.72, + "step": 10200 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004992105263157895, + "loss": 1.7136, + "step": 10300 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004989473684210527, + "loss": 1.7037, + "step": 10400 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004986842105263158, + "loss": 1.7013, + "step": 10500 + }, + { + "epoch": 3.96, + "learning_rate": 0.000498421052631579, + "loss": 1.6961, + "step": 10600 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004981578947368422, + "loss": 1.6953, + "step": 10700 + }, + { + "epoch": 4.04, + "learning_rate": 0.0004978947368421053, + "loss": 1.6793, + "step": 10800 + }, + { + "epoch": 4.08, + "learning_rate": 0.0004976315789473685, + "loss": 1.6719, + "step": 10900 + }, + { + "epoch": 4.11, + "learning_rate": 0.0004973684210526315, + "loss": 1.672, + "step": 11000 + }, + { + "epoch": 4.15, + "learning_rate": 0.0004971052631578947, + "loss": 1.6683, + "step": 11100 + }, + { + "epoch": 4.19, + "learning_rate": 0.0004968421052631579, + "loss": 1.6603, + "step": 11200 + }, + { + "epoch": 4.23, + "learning_rate": 0.000496578947368421, + "loss": 1.655, + "step": 11300 + }, + { + "epoch": 4.26, + "learning_rate": 0.0004963157894736842, + "loss": 1.6493, + "step": 11400 + }, + { + "epoch": 4.3, + "learning_rate": 0.0004960526315789473, + "loss": 1.6436, + "step": 11500 + }, + { + "epoch": 4.34, + "learning_rate": 0.0004957894736842105, + "loss": 1.6445, + "step": 11600 + }, + { + "epoch": 4.38, + "learning_rate": 0.0004955263157894737, + "loss": 1.6418, + "step": 11700 + }, + { + "epoch": 4.41, + "learning_rate": 0.0004952631578947369, + "loss": 1.6302, + "step": 11800 + }, + { + "epoch": 4.45, + "learning_rate": 0.000495, + "loss": 1.6286, + "step": 11900 + }, + { + "epoch": 4.49, + "learning_rate": 0.0004947368421052632, + "loss": 1.6257, + "step": 12000 + }, + { + "epoch": 4.52, + "learning_rate": 0.0004944736842105264, + "loss": 1.6218, + "step": 12100 + }, + { + "epoch": 4.56, + "learning_rate": 0.0004942105263157895, + "loss": 1.617, + "step": 12200 + }, + { + "epoch": 4.6, + "learning_rate": 0.0004939473684210527, + "loss": 1.6124, + "step": 12300 + }, + { + "epoch": 4.64, + "learning_rate": 0.0004936842105263158, + "loss": 1.61, + "step": 12400 + }, + { + "epoch": 4.67, + "learning_rate": 0.000493421052631579, + "loss": 1.6082, + "step": 12500 + }, + { + "epoch": 4.71, + "learning_rate": 0.0004931578947368422, + "loss": 1.6016, + "step": 12600 + }, + { + "epoch": 4.75, + "learning_rate": 0.0004928947368421052, + "loss": 1.595, + "step": 12700 + }, + { + "epoch": 4.79, + "learning_rate": 0.0004926315789473684, + "loss": 1.5895, + "step": 12800 + }, + { + "epoch": 4.82, + "learning_rate": 0.0004923684210526315, + "loss": 1.5889, + "step": 12900 + }, + { + "epoch": 4.86, + "learning_rate": 0.0004921052631578947, + "loss": 1.5904, + "step": 13000 + }, + { + "epoch": 4.9, + "learning_rate": 0.0004918421052631579, + "loss": 1.5844, + "step": 13100 + }, + { + "epoch": 4.94, + "learning_rate": 0.000491578947368421, + "loss": 1.5834, + "step": 13200 + }, + { + "epoch": 4.97, + "learning_rate": 0.0004913157894736842, + "loss": 1.5764, + "step": 13300 + }, + { + "epoch": 5.01, + "learning_rate": 0.0004910526315789474, + "loss": 1.5851, + "step": 13400 + }, + { + "epoch": 5.05, + "learning_rate": 0.0004907894736842106, + "loss": 1.5645, + "step": 13500 + }, + { + "epoch": 5.09, + "learning_rate": 0.0004905263157894737, + "loss": 1.5664, + "step": 13600 + }, + { + "epoch": 5.12, + "learning_rate": 0.0004902631578947369, + "loss": 1.5612, + "step": 13700 + }, + { + "epoch": 5.16, + "learning_rate": 0.00049, + "loss": 1.5556, + "step": 13800 + }, + { + "epoch": 5.2, + "learning_rate": 0.0004897368421052632, + "loss": 1.5569, + "step": 13900 + }, + { + "epoch": 5.24, + "learning_rate": 0.0004894736842105264, + "loss": 1.5517, + "step": 14000 + }, + { + "epoch": 5.27, + "learning_rate": 0.0004892105263157895, + "loss": 1.551, + "step": 14100 + }, + { + "epoch": 5.31, + "learning_rate": 0.0004889473684210527, + "loss": 1.5531, + "step": 14200 + }, + { + "epoch": 5.35, + "learning_rate": 0.0004886842105263158, + "loss": 1.5484, + "step": 14300 + }, + { + "epoch": 5.39, + "learning_rate": 0.000488421052631579, + "loss": 1.5433, + "step": 14400 + }, + { + "epoch": 5.42, + "learning_rate": 0.00048815789473684215, + "loss": 1.5421, + "step": 14500 + }, + { + "epoch": 5.46, + "learning_rate": 0.0004878947368421053, + "loss": 1.537, + "step": 14600 + }, + { + "epoch": 5.5, + "learning_rate": 0.00048763157894736843, + "loss": 1.5367, + "step": 14700 + }, + { + "epoch": 5.53, + "learning_rate": 0.0004873684210526316, + "loss": 1.5338, + "step": 14800 + }, + { + "epoch": 5.57, + "learning_rate": 0.0004871052631578948, + "loss": 1.5333, + "step": 14900 + }, + { + "epoch": 5.61, + "learning_rate": 0.0004868421052631579, + "loss": 1.5317, + "step": 15000 + }, + { + "epoch": 5.65, + "learning_rate": 0.00048657894736842106, + "loss": 1.5269, + "step": 15100 + }, + { + "epoch": 5.68, + "learning_rate": 0.0004863157894736842, + "loss": 1.5218, + "step": 15200 + }, + { + "epoch": 5.72, + "learning_rate": 0.00048605263157894735, + "loss": 1.5188, + "step": 15300 + }, + { + "epoch": 5.76, + "learning_rate": 0.00048578947368421054, + "loss": 1.5241, + "step": 15400 + }, + { + "epoch": 5.8, + "learning_rate": 0.0004855263157894737, + "loss": 1.5173, + "step": 15500 + }, + { + "epoch": 5.83, + "learning_rate": 0.00048526315789473683, + "loss": 1.517, + "step": 15600 + }, + { + "epoch": 5.87, + "learning_rate": 0.00048499999999999997, + "loss": 1.5195, + "step": 15700 + }, + { + "epoch": 5.91, + "learning_rate": 0.00048473684210526317, + "loss": 1.5087, + "step": 15800 + }, + { + "epoch": 5.95, + "learning_rate": 0.00048447368421052637, + "loss": 1.5081, + "step": 15900 + }, + { + "epoch": 5.98, + "learning_rate": 0.0004842105263157895, + "loss": 1.5087, + "step": 16000 + }, + { + "epoch": 6.02, + "learning_rate": 0.00048394736842105265, + "loss": 1.51, + "step": 16100 + }, + { + "epoch": 6.06, + "learning_rate": 0.0004836842105263158, + "loss": 1.4973, + "step": 16200 + }, + { + "epoch": 6.1, + "learning_rate": 0.000483421052631579, + "loss": 1.497, + "step": 16300 + }, + { + "epoch": 6.13, + "learning_rate": 0.00048315789473684213, + "loss": 1.4946, + "step": 16400 + }, + { + "epoch": 6.17, + "learning_rate": 0.0004828947368421053, + "loss": 1.4965, + "step": 16500 + }, + { + "epoch": 6.21, + "learning_rate": 0.0004826315789473684, + "loss": 1.4927, + "step": 16600 + }, + { + "epoch": 6.25, + "learning_rate": 0.00048236842105263156, + "loss": 1.4936, + "step": 16700 + }, + { + "epoch": 6.28, + "learning_rate": 0.00048210526315789476, + "loss": 1.4885, + "step": 16800 + }, + { + "epoch": 6.32, + "learning_rate": 0.0004818421052631579, + "loss": 1.4893, + "step": 16900 + }, + { + "epoch": 6.36, + "learning_rate": 0.00048157894736842105, + "loss": 1.4883, + "step": 17000 + }, + { + "epoch": 6.39, + "learning_rate": 0.0004813157894736842, + "loss": 1.4872, + "step": 17100 + }, + { + "epoch": 6.43, + "learning_rate": 0.00048105263157894733, + "loss": 1.4842, + "step": 17200 + }, + { + "epoch": 6.47, + "learning_rate": 0.00048078947368421053, + "loss": 1.4841, + "step": 17300 + }, + { + "epoch": 6.51, + "learning_rate": 0.0004805263157894737, + "loss": 1.4773, + "step": 17400 + }, + { + "epoch": 6.54, + "learning_rate": 0.00048026315789473687, + "loss": 1.4785, + "step": 17500 + }, + { + "epoch": 6.58, + "learning_rate": 0.00048, + "loss": 1.4772, + "step": 17600 + }, + { + "epoch": 6.62, + "learning_rate": 0.0004797368421052632, + "loss": 1.473, + "step": 17700 + }, + { + "epoch": 6.66, + "learning_rate": 0.00047947368421052635, + "loss": 1.4755, + "step": 17800 + }, + { + "epoch": 6.69, + "learning_rate": 0.0004792105263157895, + "loss": 1.4791, + "step": 17900 + }, + { + "epoch": 6.73, + "learning_rate": 0.00047894736842105264, + "loss": 1.4738, + "step": 18000 + }, + { + "epoch": 6.77, + "learning_rate": 0.0004786842105263158, + "loss": 1.473, + "step": 18100 + }, + { + "epoch": 6.81, + "learning_rate": 0.000478421052631579, + "loss": 1.4702, + "step": 18200 + }, + { + "epoch": 6.84, + "learning_rate": 0.0004781578947368421, + "loss": 1.4685, + "step": 18300 + }, + { + "epoch": 6.88, + "learning_rate": 0.00047789473684210526, + "loss": 1.466, + "step": 18400 + }, + { + "epoch": 6.92, + "learning_rate": 0.0004776315789473684, + "loss": 1.4656, + "step": 18500 + }, + { + "epoch": 6.96, + "learning_rate": 0.00047736842105263155, + "loss": 1.4647, + "step": 18600 + }, + { + "epoch": 6.99, + "learning_rate": 0.00047710526315789475, + "loss": 1.4604, + "step": 18700 + }, + { + "epoch": 7.03, + "learning_rate": 0.0004768421052631579, + "loss": 1.4625, + "step": 18800 + }, + { + "epoch": 7.07, + "learning_rate": 0.00047657894736842103, + "loss": 1.4563, + "step": 18900 + }, + { + "epoch": 7.11, + "learning_rate": 0.0004763157894736842, + "loss": 1.4567, + "step": 19000 + }, + { + "epoch": 7.14, + "learning_rate": 0.00047605263157894743, + "loss": 1.4529, + "step": 19100 + }, + { + "epoch": 7.18, + "learning_rate": 0.00047578947368421057, + "loss": 1.454, + "step": 19200 + }, + { + "epoch": 7.22, + "learning_rate": 0.0004755263157894737, + "loss": 1.4524, + "step": 19300 + }, + { + "epoch": 7.26, + "learning_rate": 0.00047526315789473686, + "loss": 1.4492, + "step": 19400 + }, + { + "epoch": 7.29, + "learning_rate": 0.000475, + "loss": 1.4467, + "step": 19500 + }, + { + "epoch": 7.33, + "learning_rate": 0.0004747368421052632, + "loss": 1.4468, + "step": 19600 + }, + { + "epoch": 7.37, + "learning_rate": 0.00047447368421052634, + "loss": 1.4459, + "step": 19700 + }, + { + "epoch": 7.4, + "learning_rate": 0.0004742105263157895, + "loss": 1.4444, + "step": 19800 + }, + { + "epoch": 7.44, + "learning_rate": 0.0004739473684210526, + "loss": 1.442, + "step": 19900 + }, + { + "epoch": 7.48, + "learning_rate": 0.00047368421052631577, + "loss": 1.4422, + "step": 20000 + }, + { + "epoch": 7.52, + "learning_rate": 0.00047342105263157897, + "loss": 1.4389, + "step": 20100 + }, + { + "epoch": 7.55, + "learning_rate": 0.0004731578947368421, + "loss": 1.4449, + "step": 20200 + }, + { + "epoch": 7.59, + "learning_rate": 0.00047289473684210525, + "loss": 1.4429, + "step": 20300 + }, + { + "epoch": 7.63, + "learning_rate": 0.0004726315789473684, + "loss": 1.4364, + "step": 20400 + }, + { + "epoch": 7.67, + "learning_rate": 0.0004723684210526316, + "loss": 1.4394, + "step": 20500 + }, + { + "epoch": 7.7, + "learning_rate": 0.00047210526315789473, + "loss": 1.4358, + "step": 20600 + }, + { + "epoch": 7.74, + "learning_rate": 0.00047184210526315793, + "loss": 1.4336, + "step": 20700 + }, + { + "epoch": 7.78, + "learning_rate": 0.0004715789473684211, + "loss": 1.4316, + "step": 20800 + }, + { + "epoch": 7.82, + "learning_rate": 0.0004713157894736842, + "loss": 1.4372, + "step": 20900 + }, + { + "epoch": 7.85, + "learning_rate": 0.0004710526315789474, + "loss": 1.4308, + "step": 21000 + }, + { + "epoch": 7.89, + "learning_rate": 0.00047078947368421056, + "loss": 1.4287, + "step": 21100 + }, + { + "epoch": 7.93, + "learning_rate": 0.0004705263157894737, + "loss": 1.4278, + "step": 21200 + }, + { + "epoch": 7.97, + "learning_rate": 0.00047026315789473684, + "loss": 1.4325, + "step": 21300 + }, + { + "epoch": 8.0, + "learning_rate": 0.00047, + "loss": 1.4303, + "step": 21400 + }, + { + "epoch": 8.04, + "learning_rate": 0.0004697368421052632, + "loss": 1.4242, + "step": 21500 + }, + { + "epoch": 8.08, + "learning_rate": 0.0004694736842105263, + "loss": 1.424, + "step": 21600 + }, + { + "epoch": 8.12, + "learning_rate": 0.00046921052631578947, + "loss": 1.4191, + "step": 21700 + }, + { + "epoch": 8.15, + "learning_rate": 0.0004689473684210526, + "loss": 1.419, + "step": 21800 + }, + { + "epoch": 8.19, + "learning_rate": 0.0004686842105263158, + "loss": 1.4185, + "step": 21900 + }, + { + "epoch": 8.23, + "learning_rate": 0.00046842105263157895, + "loss": 1.4213, + "step": 22000 + }, + { + "epoch": 8.26, + "learning_rate": 0.0004681578947368421, + "loss": 1.4203, + "step": 22100 + }, + { + "epoch": 8.3, + "learning_rate": 0.00046789473684210524, + "loss": 1.4147, + "step": 22200 + }, + { + "epoch": 8.34, + "learning_rate": 0.0004676315789473684, + "loss": 1.4129, + "step": 22300 + }, + { + "epoch": 8.38, + "learning_rate": 0.00046736842105263163, + "loss": 1.4099, + "step": 22400 + }, + { + "epoch": 8.41, + "learning_rate": 0.0004671052631578948, + "loss": 1.4147, + "step": 22500 + }, + { + "epoch": 8.45, + "learning_rate": 0.0004668421052631579, + "loss": 1.4136, + "step": 22600 + }, + { + "epoch": 8.49, + "learning_rate": 0.00046657894736842106, + "loss": 1.406, + "step": 22700 + }, + { + "epoch": 8.53, + "learning_rate": 0.0004663157894736842, + "loss": 1.4063, + "step": 22800 + }, + { + "epoch": 8.56, + "learning_rate": 0.0004660526315789474, + "loss": 1.4048, + "step": 22900 + }, + { + "epoch": 8.6, + "learning_rate": 0.00046578947368421054, + "loss": 1.405, + "step": 23000 + }, + { + "epoch": 8.64, + "learning_rate": 0.0004655263157894737, + "loss": 1.4049, + "step": 23100 + }, + { + "epoch": 8.68, + "learning_rate": 0.00046526315789473683, + "loss": 1.4077, + "step": 23200 + }, + { + "epoch": 8.71, + "learning_rate": 0.000465, + "loss": 1.4017, + "step": 23300 + }, + { + "epoch": 8.75, + "learning_rate": 0.00046473684210526317, + "loss": 1.4077, + "step": 23400 + }, + { + "epoch": 8.79, + "learning_rate": 0.0004644736842105263, + "loss": 1.4015, + "step": 23500 + }, + { + "epoch": 8.83, + "learning_rate": 0.00046421052631578946, + "loss": 1.4028, + "step": 23600 + }, + { + "epoch": 8.86, + "learning_rate": 0.0004639473684210526, + "loss": 1.3991, + "step": 23700 + }, + { + "epoch": 8.9, + "learning_rate": 0.0004636842105263158, + "loss": 1.4002, + "step": 23800 + }, + { + "epoch": 8.94, + "learning_rate": 0.00046342105263157894, + "loss": 1.3981, + "step": 23900 + }, + { + "epoch": 8.98, + "learning_rate": 0.00046315789473684214, + "loss": 1.3977, + "step": 24000 + }, + { + "epoch": 9.01, + "learning_rate": 0.0004628947368421053, + "loss": 1.4059, + "step": 24100 + }, + { + "epoch": 9.05, + "learning_rate": 0.0004626315789473684, + "loss": 1.3919, + "step": 24200 + }, + { + "epoch": 9.09, + "learning_rate": 0.0004623684210526316, + "loss": 1.3951, + "step": 24300 + }, + { + "epoch": 9.12, + "learning_rate": 0.00046210526315789476, + "loss": 1.3904, + "step": 24400 + }, + { + "epoch": 9.16, + "learning_rate": 0.0004618421052631579, + "loss": 1.3904, + "step": 24500 + }, + { + "epoch": 9.2, + "learning_rate": 0.00046157894736842105, + "loss": 1.3907, + "step": 24600 + }, + { + "epoch": 9.24, + "learning_rate": 0.00046131578947368425, + "loss": 1.3887, + "step": 24700 + }, + { + "epoch": 9.27, + "learning_rate": 0.0004610526315789474, + "loss": 1.389, + "step": 24800 + }, + { + "epoch": 9.31, + "learning_rate": 0.00046078947368421053, + "loss": 1.3876, + "step": 24900 + }, + { + "epoch": 9.35, + "learning_rate": 0.0004605263157894737, + "loss": 1.384, + "step": 25000 + }, + { + "epoch": 9.39, + "learning_rate": 0.00041611111111111113, + "loss": 1.3801, + "step": 25100 + }, + { + "epoch": 9.42, + "learning_rate": 0.00041555555555555557, + "loss": 1.3787, + "step": 25200 + }, + { + "epoch": 9.46, + "learning_rate": 0.000415, + "loss": 1.3744, + "step": 25300 + }, + { + "epoch": 9.5, + "learning_rate": 0.00041444444444444444, + "loss": 1.3753, + "step": 25400 + }, + { + "epoch": 9.54, + "learning_rate": 0.0004138888888888889, + "loss": 1.3728, + "step": 25500 + }, + { + "epoch": 9.57, + "learning_rate": 0.0004133333333333333, + "loss": 1.3707, + "step": 25600 + }, + { + "epoch": 9.61, + "learning_rate": 0.0004127777777777778, + "loss": 1.368, + "step": 25700 + }, + { + "epoch": 9.65, + "learning_rate": 0.00041222222222222224, + "loss": 1.368, + "step": 25800 + }, + { + "epoch": 9.69, + "learning_rate": 0.0004116666666666667, + "loss": 1.3685, + "step": 25900 + }, + { + "epoch": 9.72, + "learning_rate": 0.0004111111111111111, + "loss": 1.3691, + "step": 26000 + }, + { + "epoch": 9.76, + "learning_rate": 0.00041055555555555555, + "loss": 1.3667, + "step": 26100 + }, + { + "epoch": 9.8, + "learning_rate": 0.00041, + "loss": 1.3653, + "step": 26200 + }, + { + "epoch": 9.84, + "learning_rate": 0.00040944444444444443, + "loss": 1.365, + "step": 26300 + }, + { + "epoch": 9.87, + "learning_rate": 0.0004088888888888889, + "loss": 1.3606, + "step": 26400 + }, + { + "epoch": 9.91, + "learning_rate": 0.00040833333333333336, + "loss": 1.3617, + "step": 26500 + }, + { + "epoch": 9.95, + "learning_rate": 0.0004077777777777778, + "loss": 1.3634, + "step": 26600 + }, + { + "epoch": 9.98, + "learning_rate": 0.00040722222222222223, + "loss": 1.3618, + "step": 26700 + }, + { + "epoch": 10.02, + "learning_rate": 0.00040666666666666667, + "loss": 1.3629, + "step": 26800 + }, + { + "epoch": 10.06, + "learning_rate": 0.0004061111111111111, + "loss": 1.3543, + "step": 26900 + }, + { + "epoch": 10.1, + "learning_rate": 0.00040555555555555554, + "loss": 1.3543, + "step": 27000 + }, + { + "epoch": 10.13, + "learning_rate": 0.00040500000000000003, + "loss": 1.3501, + "step": 27100 + }, + { + "epoch": 10.17, + "learning_rate": 0.00040444444444444447, + "loss": 1.351, + "step": 27200 + }, + { + "epoch": 10.21, + "learning_rate": 0.0004038888888888889, + "loss": 1.3533, + "step": 27300 + }, + { + "epoch": 10.25, + "learning_rate": 0.00040333333333333334, + "loss": 1.3538, + "step": 27400 + }, + { + "epoch": 10.28, + "learning_rate": 0.0004027777777777778, + "loss": 1.3501, + "step": 27500 + }, + { + "epoch": 10.32, + "learning_rate": 0.0004022222222222222, + "loss": 1.3455, + "step": 27600 + }, + { + "epoch": 10.36, + "learning_rate": 0.00040166666666666665, + "loss": 1.3508, + "step": 27700 + }, + { + "epoch": 10.4, + "learning_rate": 0.0004011111111111111, + "loss": 1.3541, + "step": 27800 + }, + { + "epoch": 10.43, + "learning_rate": 0.0004005555555555556, + "loss": 1.3469, + "step": 27900 + }, + { + "epoch": 10.47, + "learning_rate": 0.0004, + "loss": 1.3451, + "step": 28000 + }, + { + "epoch": 10.51, + "learning_rate": 0.00039944444444444446, + "loss": 1.347, + "step": 28100 + }, + { + "epoch": 10.55, + "learning_rate": 0.0003988888888888889, + "loss": 1.346, + "step": 28200 + }, + { + "epoch": 10.58, + "learning_rate": 0.00039833333333333333, + "loss": 1.3448, + "step": 28300 + }, + { + "epoch": 10.62, + "learning_rate": 0.00039777777777777777, + "loss": 1.3432, + "step": 28400 + }, + { + "epoch": 10.66, + "learning_rate": 0.0003972222222222222, + "loss": 1.3433, + "step": 28500 + }, + { + "epoch": 10.7, + "learning_rate": 0.0003966666666666667, + "loss": 1.346, + "step": 28600 + }, + { + "epoch": 10.73, + "learning_rate": 0.00039611111111111113, + "loss": 1.3442, + "step": 28700 + }, + { + "epoch": 10.77, + "learning_rate": 0.00039555555555555557, + "loss": 1.3413, + "step": 28800 + }, + { + "epoch": 10.81, + "learning_rate": 0.000395, + "loss": 1.3413, + "step": 28900 + }, + { + "epoch": 10.85, + "learning_rate": 0.00039444444444444444, + "loss": 1.3389, + "step": 29000 + }, + { + "epoch": 10.88, + "learning_rate": 0.00039388888888888893, + "loss": 1.3406, + "step": 29100 + }, + { + "epoch": 10.92, + "learning_rate": 0.0003933333333333333, + "loss": 1.3392, + "step": 29200 + }, + { + "epoch": 10.96, + "learning_rate": 0.0003927777777777778, + "loss": 1.3355, + "step": 29300 + }, + { + "epoch": 10.99, + "learning_rate": 0.00039222222222222225, + "loss": 1.3386, + "step": 29400 + }, + { + "epoch": 11.03, + "learning_rate": 0.0003916666666666667, + "loss": 1.3402, + "step": 29500 + }, + { + "epoch": 11.07, + "learning_rate": 0.0003911111111111111, + "loss": 1.3317, + "step": 29600 + }, + { + "epoch": 11.11, + "learning_rate": 0.00039055555555555556, + "loss": 1.3314, + "step": 29700 + }, + { + "epoch": 11.14, + "learning_rate": 0.00039000000000000005, + "loss": 1.3319, + "step": 29800 + }, + { + "epoch": 11.18, + "learning_rate": 0.00038944444444444443, + "loss": 1.3309, + "step": 29900 + }, + { + "epoch": 11.22, + "learning_rate": 0.0003888888888888889, + "loss": 1.33, + "step": 30000 + }, + { + "epoch": 11.26, + "learning_rate": 0.0003883333333333333, + "loss": 1.3311, + "step": 30100 + }, + { + "epoch": 11.29, + "learning_rate": 0.0003877777777777778, + "loss": 1.3316, + "step": 30200 + }, + { + "epoch": 11.33, + "learning_rate": 0.00038722222222222223, + "loss": 1.3262, + "step": 30300 + }, + { + "epoch": 11.37, + "learning_rate": 0.00038666666666666667, + "loss": 1.3283, + "step": 30400 + }, + { + "epoch": 11.41, + "learning_rate": 0.00038611111111111116, + "loss": 1.3257, + "step": 30500 + }, + { + "epoch": 11.44, + "learning_rate": 0.00038555555555555554, + "loss": 1.3267, + "step": 30600 + }, + { + "epoch": 11.48, + "learning_rate": 0.00038500000000000003, + "loss": 1.3248, + "step": 30700 + }, + { + "epoch": 11.52, + "learning_rate": 0.0003844444444444444, + "loss": 1.3224, + "step": 30800 + }, + { + "epoch": 11.56, + "learning_rate": 0.0003838888888888889, + "loss": 1.3207, + "step": 30900 + }, + { + "epoch": 11.59, + "learning_rate": 0.00038333333333333334, + "loss": 1.3215, + "step": 31000 + }, + { + "epoch": 11.63, + "learning_rate": 0.0003827777777777778, + "loss": 1.3217, + "step": 31100 + }, + { + "epoch": 11.67, + "learning_rate": 0.0003822222222222223, + "loss": 1.3202, + "step": 31200 + }, + { + "epoch": 11.71, + "learning_rate": 0.00038166666666666666, + "loss": 1.3222, + "step": 31300 + }, + { + "epoch": 11.74, + "learning_rate": 0.00038111111111111115, + "loss": 1.3199, + "step": 31400 + }, + { + "epoch": 11.78, + "learning_rate": 0.00038055555555555553, + "loss": 1.3203, + "step": 31500 + }, + { + "epoch": 11.82, + "learning_rate": 0.00038, + "loss": 1.3194, + "step": 31600 + }, + { + "epoch": 11.85, + "learning_rate": 0.0003794444444444444, + "loss": 1.3188, + "step": 31700 + }, + { + "epoch": 11.89, + "learning_rate": 0.0003788888888888889, + "loss": 1.3192, + "step": 31800 + }, + { + "epoch": 11.93, + "learning_rate": 0.0003783333333333334, + "loss": 1.3171, + "step": 31900 + }, + { + "epoch": 11.97, + "learning_rate": 0.00037777777777777777, + "loss": 1.3215, + "step": 32000 + }, + { + "epoch": 12.0, + "learning_rate": 0.00037722222222222226, + "loss": 1.3216, + "step": 32100 + }, + { + "epoch": 12.04, + "learning_rate": 0.00037666666666666664, + "loss": 1.3123, + "step": 32200 + }, + { + "epoch": 12.08, + "learning_rate": 0.00037611111111111113, + "loss": 1.3093, + "step": 32300 + }, + { + "epoch": 12.12, + "learning_rate": 0.0003755555555555555, + "loss": 1.3136, + "step": 32400 + }, + { + "epoch": 12.15, + "learning_rate": 0.000375, + "loss": 1.3114, + "step": 32500 + }, + { + "epoch": 12.19, + "learning_rate": 0.0003744444444444445, + "loss": 1.3089, + "step": 32600 + }, + { + "epoch": 12.23, + "learning_rate": 0.0003738888888888889, + "loss": 1.3109, + "step": 32700 + }, + { + "epoch": 12.27, + "learning_rate": 0.0003733333333333334, + "loss": 1.3082, + "step": 32800 + }, + { + "epoch": 12.3, + "learning_rate": 0.00037277777777777776, + "loss": 1.3104, + "step": 32900 + }, + { + "epoch": 12.34, + "learning_rate": 0.00037222222222222225, + "loss": 1.3101, + "step": 33000 + }, + { + "epoch": 12.38, + "learning_rate": 0.00037166666666666663, + "loss": 1.3074, + "step": 33100 + }, + { + "epoch": 12.42, + "learning_rate": 0.0003711111111111111, + "loss": 1.3055, + "step": 33200 + }, + { + "epoch": 12.45, + "learning_rate": 0.0003705555555555556, + "loss": 1.305, + "step": 33300 + }, + { + "epoch": 12.49, + "learning_rate": 0.00037, + "loss": 1.3077, + "step": 33400 + }, + { + "epoch": 12.53, + "learning_rate": 0.0003694444444444445, + "loss": 1.3065, + "step": 33500 + }, + { + "epoch": 12.57, + "learning_rate": 0.00036888888888888887, + "loss": 1.3052, + "step": 33600 + }, + { + "epoch": 12.6, + "learning_rate": 0.00036833333333333336, + "loss": 1.3078, + "step": 33700 + }, + { + "epoch": 12.64, + "learning_rate": 0.00036777777777777774, + "loss": 1.3038, + "step": 33800 + }, + { + "epoch": 12.68, + "learning_rate": 0.00036722222222222223, + "loss": 1.302, + "step": 33900 + }, + { + "epoch": 12.71, + "learning_rate": 0.00036666666666666667, + "loss": 1.3034, + "step": 34000 + }, + { + "epoch": 12.75, + "learning_rate": 0.0003661111111111111, + "loss": 1.3002, + "step": 34100 + }, + { + "epoch": 12.79, + "learning_rate": 0.0003655555555555556, + "loss": 1.3046, + "step": 34200 + }, + { + "epoch": 12.83, + "learning_rate": 0.000365, + "loss": 1.3343, + "step": 34300 + }, + { + "epoch": 12.86, + "learning_rate": 0.00036444444444444447, + "loss": 1.3163, + "step": 34400 + }, + { + "epoch": 12.9, + "learning_rate": 0.00036388888888888886, + "loss": 1.3062, + "step": 34500 + }, + { + "epoch": 12.94, + "learning_rate": 0.00036333333333333335, + "loss": 1.3042, + "step": 34600 + }, + { + "epoch": 12.98, + "learning_rate": 0.0003627777777777778, + "loss": 1.3036, + "step": 34700 + }, + { + "epoch": 13.01, + "learning_rate": 0.0003622222222222222, + "loss": 1.3137, + "step": 34800 + }, + { + "epoch": 13.05, + "learning_rate": 0.0003616666666666667, + "loss": 1.2952, + "step": 34900 + }, + { + "epoch": 13.09, + "learning_rate": 0.0003611111111111111, + "loss": 1.2961, + "step": 35000 + }, + { + "epoch": 13.13, + "learning_rate": 0.0003605555555555556, + "loss": 1.2932, + "step": 35100 + }, + { + "epoch": 13.16, + "learning_rate": 0.00035999999999999997, + "loss": 1.2936, + "step": 35200 + }, + { + "epoch": 13.2, + "learning_rate": 0.00035944444444444446, + "loss": 1.2966, + "step": 35300 + }, + { + "epoch": 13.24, + "learning_rate": 0.0003588888888888889, + "loss": 1.2942, + "step": 35400 + }, + { + "epoch": 13.28, + "learning_rate": 0.00035833333333333333, + "loss": 1.2914, + "step": 35500 + }, + { + "epoch": 13.31, + "learning_rate": 0.00035777777777777777, + "loss": 1.296, + "step": 35600 + }, + { + "epoch": 13.35, + "learning_rate": 0.0003572222222222222, + "loss": 1.2945, + "step": 35700 + }, + { + "epoch": 13.39, + "learning_rate": 0.0003566666666666667, + "loss": 1.2878, + "step": 35800 + }, + { + "epoch": 13.43, + "learning_rate": 0.0003561111111111111, + "loss": 1.2926, + "step": 35900 + }, + { + "epoch": 13.46, + "learning_rate": 0.00035555555555555557, + "loss": 1.2918, + "step": 36000 + }, + { + "epoch": 13.5, + "learning_rate": 0.000355, + "loss": 1.2888, + "step": 36100 + }, + { + "epoch": 13.54, + "learning_rate": 0.00035444444444444445, + "loss": 1.2903, + "step": 36200 + }, + { + "epoch": 13.58, + "learning_rate": 0.0003538888888888889, + "loss": 1.3005, + "step": 36300 + }, + { + "epoch": 13.61, + "learning_rate": 0.0003533333333333333, + "loss": 1.2875, + "step": 36400 + }, + { + "epoch": 13.65, + "learning_rate": 0.0003527777777777778, + "loss": 1.2918, + "step": 36500 + }, + { + "epoch": 13.69, + "learning_rate": 0.00035222222222222225, + "loss": 1.2885, + "step": 36600 + }, + { + "epoch": 13.72, + "learning_rate": 0.0003516666666666667, + "loss": 1.2856, + "step": 36700 + }, + { + "epoch": 13.76, + "learning_rate": 0.0003511111111111111, + "loss": 1.2884, + "step": 36800 + }, + { + "epoch": 13.8, + "learning_rate": 0.00035055555555555556, + "loss": 1.2922, + "step": 36900 + }, + { + "epoch": 13.84, + "learning_rate": 0.00035, + "loss": 1.2875, + "step": 37000 + }, + { + "epoch": 13.87, + "learning_rate": 0.00034944444444444443, + "loss": 1.2874, + "step": 37100 + }, + { + "epoch": 13.91, + "learning_rate": 0.0003488888888888889, + "loss": 1.2858, + "step": 37200 + }, + { + "epoch": 13.95, + "learning_rate": 0.00034833333333333336, + "loss": 1.2849, + "step": 37300 + }, + { + "epoch": 13.99, + "learning_rate": 0.0003477777777777778, + "loss": 1.2803, + "step": 37400 + }, + { + "epoch": 14.02, + "learning_rate": 0.00034722222222222224, + "loss": 1.2903, + "step": 37500 + }, + { + "epoch": 14.06, + "learning_rate": 0.00034666666666666667, + "loss": 1.2778, + "step": 37600 + }, + { + "epoch": 14.1, + "learning_rate": 0.0003461111111111111, + "loss": 1.2795, + "step": 37700 + }, + { + "epoch": 14.14, + "learning_rate": 0.00034555555555555555, + "loss": 1.277, + "step": 37800 + }, + { + "epoch": 14.17, + "learning_rate": 0.000345, + "loss": 1.2798, + "step": 37900 + }, + { + "epoch": 14.21, + "learning_rate": 0.0003444444444444445, + "loss": 1.2795, + "step": 38000 + }, + { + "epoch": 14.25, + "learning_rate": 0.0003438888888888889, + "loss": 1.2772, + "step": 38100 + }, + { + "epoch": 14.29, + "learning_rate": 0.00034333333333333335, + "loss": 1.2754, + "step": 38200 + }, + { + "epoch": 14.32, + "learning_rate": 0.0003427777777777778, + "loss": 1.2762, + "step": 38300 + }, + { + "epoch": 14.36, + "learning_rate": 0.0003422222222222222, + "loss": 1.2732, + "step": 38400 + }, + { + "epoch": 14.4, + "learning_rate": 0.00034166666666666666, + "loss": 1.2793, + "step": 38500 + }, + { + "epoch": 14.44, + "learning_rate": 0.0003411111111111111, + "loss": 1.2751, + "step": 38600 + }, + { + "epoch": 14.47, + "learning_rate": 0.0003405555555555556, + "loss": 1.2753, + "step": 38700 + }, + { + "epoch": 14.51, + "learning_rate": 0.00034, + "loss": 1.2758, + "step": 38800 + }, + { + "epoch": 14.55, + "learning_rate": 0.00033944444444444446, + "loss": 1.2734, + "step": 38900 + }, + { + "epoch": 14.58, + "learning_rate": 0.0003388888888888889, + "loss": 1.2723, + "step": 39000 + }, + { + "epoch": 14.62, + "learning_rate": 0.00033833333333333334, + "loss": 1.2746, + "step": 39100 + }, + { + "epoch": 14.66, + "learning_rate": 0.00033777777777777777, + "loss": 1.275, + "step": 39200 + }, + { + "epoch": 14.7, + "learning_rate": 0.0003372222222222222, + "loss": 1.2711, + "step": 39300 + }, + { + "epoch": 14.73, + "learning_rate": 0.0003366666666666667, + "loss": 1.2688, + "step": 39400 + }, + { + "epoch": 14.77, + "learning_rate": 0.00033611111111111114, + "loss": 1.2713, + "step": 39500 + }, + { + "epoch": 14.81, + "learning_rate": 0.0003355555555555556, + "loss": 1.2725, + "step": 39600 + }, + { + "epoch": 14.85, + "learning_rate": 0.000335, + "loss": 1.2679, + "step": 39700 + }, + { + "epoch": 14.88, + "learning_rate": 0.00033444444444444445, + "loss": 1.2748, + "step": 39800 + }, + { + "epoch": 14.92, + "learning_rate": 0.0003338888888888889, + "loss": 1.2708, + "step": 39900 + }, + { + "epoch": 14.96, + "learning_rate": 0.0003333333333333333, + "loss": 1.2706, + "step": 40000 + }, + { + "epoch": 15.0, + "learning_rate": 0.0003327777777777778, + "loss": 1.2667, + "step": 40100 + }, + { + "epoch": 15.03, + "learning_rate": 0.0003322222222222222, + "loss": 1.2703, + "step": 40200 + }, + { + "epoch": 15.07, + "learning_rate": 0.0003316666666666667, + "loss": 1.264, + "step": 40300 + }, + { + "epoch": 15.11, + "learning_rate": 0.0003311111111111111, + "loss": 1.262, + "step": 40400 + }, + { + "epoch": 15.15, + "learning_rate": 0.00033055555555555556, + "loss": 1.2645, + "step": 40500 + }, + { + "epoch": 15.18, + "learning_rate": 0.00033, + "loss": 1.2603, + "step": 40600 + }, + { + "epoch": 15.22, + "learning_rate": 0.00032944444444444444, + "loss": 1.2648, + "step": 40700 + }, + { + "epoch": 15.26, + "learning_rate": 0.0003288888888888889, + "loss": 1.2644, + "step": 40800 + }, + { + "epoch": 15.3, + "learning_rate": 0.0003283333333333333, + "loss": 1.2624, + "step": 40900 + }, + { + "epoch": 15.33, + "learning_rate": 0.0003277777777777778, + "loss": 1.2623, + "step": 41000 + }, + { + "epoch": 15.37, + "learning_rate": 0.00032722222222222224, + "loss": 1.2616, + "step": 41100 + }, + { + "epoch": 15.41, + "learning_rate": 0.0003266666666666667, + "loss": 1.2652, + "step": 41200 + }, + { + "epoch": 15.44, + "learning_rate": 0.0003261111111111111, + "loss": 1.2621, + "step": 41300 + }, + { + "epoch": 15.48, + "learning_rate": 0.00032555555555555555, + "loss": 1.26, + "step": 41400 + }, + { + "epoch": 15.52, + "learning_rate": 0.00032500000000000004, + "loss": 1.259, + "step": 41500 + }, + { + "epoch": 15.56, + "learning_rate": 0.0003244444444444444, + "loss": 1.2648, + "step": 41600 + }, + { + "epoch": 15.59, + "learning_rate": 0.0003238888888888889, + "loss": 1.2615, + "step": 41700 + }, + { + "epoch": 15.63, + "learning_rate": 0.0003233333333333333, + "loss": 1.261, + "step": 41800 + }, + { + "epoch": 15.67, + "learning_rate": 0.0003227777777777778, + "loss": 1.2573, + "step": 41900 + }, + { + "epoch": 15.71, + "learning_rate": 0.0003222222222222222, + "loss": 1.2604, + "step": 42000 + }, + { + "epoch": 15.74, + "learning_rate": 0.00032166666666666666, + "loss": 1.2574, + "step": 42100 + }, + { + "epoch": 15.78, + "learning_rate": 0.00032111111111111115, + "loss": 1.255, + "step": 42200 + }, + { + "epoch": 15.82, + "learning_rate": 0.00032055555555555554, + "loss": 1.2575, + "step": 42300 + }, + { + "epoch": 15.86, + "learning_rate": 0.00032, + "loss": 1.2556, + "step": 42400 + }, + { + "epoch": 15.89, + "learning_rate": 0.0003194444444444444, + "loss": 1.2547, + "step": 42500 + }, + { + "epoch": 15.93, + "learning_rate": 0.0003188888888888889, + "loss": 1.2567, + "step": 42600 + }, + { + "epoch": 15.97, + "learning_rate": 0.00031833333333333334, + "loss": 1.2552, + "step": 42700 + }, + { + "epoch": 16.01, + "learning_rate": 0.0003177777777777778, + "loss": 1.2637, + "step": 42800 + }, + { + "epoch": 16.04, + "learning_rate": 0.00031722222222222227, + "loss": 1.2495, + "step": 42900 + }, + { + "epoch": 16.08, + "learning_rate": 0.00031666666666666665, + "loss": 1.2485, + "step": 43000 + }, + { + "epoch": 16.12, + "learning_rate": 0.00031611111111111114, + "loss": 1.2513, + "step": 43100 + }, + { + "epoch": 16.16, + "learning_rate": 0.0003155555555555555, + "loss": 1.2494, + "step": 43200 + }, + { + "epoch": 16.19, + "learning_rate": 0.000315, + "loss": 1.253, + "step": 43300 + }, + { + "epoch": 16.23, + "learning_rate": 0.0003144444444444445, + "loss": 1.2543, + "step": 43400 + }, + { + "epoch": 16.27, + "learning_rate": 0.0003138888888888889, + "loss": 1.2488, + "step": 43500 + }, + { + "epoch": 16.31, + "learning_rate": 0.0003133333333333334, + "loss": 1.2479, + "step": 43600 + }, + { + "epoch": 16.34, + "learning_rate": 0.00031277777777777776, + "loss": 1.2481, + "step": 43700 + }, + { + "epoch": 16.38, + "learning_rate": 0.00031222222222222225, + "loss": 1.2528, + "step": 43800 + }, + { + "epoch": 16.42, + "learning_rate": 0.00031166666666666663, + "loss": 1.2445, + "step": 43900 + }, + { + "epoch": 16.45, + "learning_rate": 0.0003111111111111111, + "loss": 1.2485, + "step": 44000 + }, + { + "epoch": 16.49, + "learning_rate": 0.0003105555555555555, + "loss": 1.2495, + "step": 44100 + }, + { + "epoch": 16.53, + "learning_rate": 0.00031, + "loss": 1.2483, + "step": 44200 + }, + { + "epoch": 16.57, + "learning_rate": 0.0003094444444444445, + "loss": 1.2411, + "step": 44300 + }, + { + "epoch": 16.6, + "learning_rate": 0.0003088888888888889, + "loss": 1.2498, + "step": 44400 + }, + { + "epoch": 16.64, + "learning_rate": 0.00030833333333333337, + "loss": 1.2436, + "step": 44500 + }, + { + "epoch": 16.68, + "learning_rate": 0.00030777777777777775, + "loss": 1.2457, + "step": 44600 + }, + { + "epoch": 16.72, + "learning_rate": 0.00030722222222222224, + "loss": 1.2442, + "step": 44700 + }, + { + "epoch": 16.75, + "learning_rate": 0.0003066666666666667, + "loss": 1.2459, + "step": 44800 + }, + { + "epoch": 16.79, + "learning_rate": 0.0003061111111111111, + "loss": 1.2394, + "step": 44900 + }, + { + "epoch": 16.83, + "learning_rate": 0.0003055555555555556, + "loss": 1.2418, + "step": 45000 + }, + { + "epoch": 16.87, + "learning_rate": 0.000305, + "loss": 1.244, + "step": 45100 + }, + { + "epoch": 16.9, + "learning_rate": 0.0003044444444444445, + "loss": 1.2428, + "step": 45200 + }, + { + "epoch": 16.94, + "learning_rate": 0.00030388888888888886, + "loss": 1.2445, + "step": 45300 + }, + { + "epoch": 16.98, + "learning_rate": 0.00030333333333333335, + "loss": 1.2447, + "step": 45400 + }, + { + "epoch": 17.02, + "learning_rate": 0.0003027777777777778, + "loss": 1.2505, + "step": 45500 + }, + { + "epoch": 17.05, + "learning_rate": 0.0003022222222222222, + "loss": 1.2316, + "step": 45600 + }, + { + "epoch": 17.09, + "learning_rate": 0.0003016666666666667, + "loss": 1.2389, + "step": 45700 + }, + { + "epoch": 17.13, + "learning_rate": 0.0003011111111111111, + "loss": 1.2364, + "step": 45800 + }, + { + "epoch": 17.17, + "learning_rate": 0.0003005555555555556, + "loss": 1.2389, + "step": 45900 + }, + { + "epoch": 17.2, + "learning_rate": 0.0003, + "loss": 1.2379, + "step": 46000 + }, + { + "epoch": 17.24, + "learning_rate": 0.00029944444444444446, + "loss": 1.236, + "step": 46100 + }, + { + "epoch": 17.28, + "learning_rate": 0.0002988888888888889, + "loss": 1.2386, + "step": 46200 + }, + { + "epoch": 17.31, + "learning_rate": 0.00029833333333333334, + "loss": 1.2364, + "step": 46300 + }, + { + "epoch": 17.35, + "learning_rate": 0.0002977777777777778, + "loss": 1.2365, + "step": 46400 + }, + { + "epoch": 17.39, + "learning_rate": 0.0002972222222222222, + "loss": 1.2332, + "step": 46500 + }, + { + "epoch": 17.43, + "learning_rate": 0.0002966666666666667, + "loss": 1.2385, + "step": 46600 + }, + { + "epoch": 17.46, + "learning_rate": 0.0002961111111111111, + "loss": 1.2334, + "step": 46700 + }, + { + "epoch": 17.5, + "learning_rate": 0.0002955555555555556, + "loss": 1.2361, + "step": 46800 + }, + { + "epoch": 17.54, + "learning_rate": 0.000295, + "loss": 1.2319, + "step": 46900 + }, + { + "epoch": 17.58, + "learning_rate": 0.00029444444444444445, + "loss": 1.2394, + "step": 47000 + }, + { + "epoch": 17.61, + "learning_rate": 0.0002938888888888889, + "loss": 1.232, + "step": 47100 + }, + { + "epoch": 17.65, + "learning_rate": 0.0002933333333333333, + "loss": 1.2349, + "step": 47200 + }, + { + "epoch": 17.69, + "learning_rate": 0.0002927777777777778, + "loss": 1.2358, + "step": 47300 + }, + { + "epoch": 17.73, + "learning_rate": 0.0002922222222222222, + "loss": 1.2308, + "step": 47400 + }, + { + "epoch": 17.76, + "learning_rate": 0.0002916666666666667, + "loss": 1.2316, + "step": 47500 + }, + { + "epoch": 17.8, + "learning_rate": 0.00029111111111111113, + "loss": 1.2309, + "step": 47600 + }, + { + "epoch": 17.84, + "learning_rate": 0.00029055555555555556, + "loss": 1.2327, + "step": 47700 + }, + { + "epoch": 17.88, + "learning_rate": 0.00029, + "loss": 1.2279, + "step": 47800 + }, + { + "epoch": 17.91, + "learning_rate": 0.00028944444444444444, + "loss": 1.2307, + "step": 47900 + }, + { + "epoch": 17.95, + "learning_rate": 0.0002888888888888889, + "loss": 1.23, + "step": 48000 + }, + { + "epoch": 17.99, + "learning_rate": 0.0002883333333333333, + "loss": 1.2274, + "step": 48100 + }, + { + "epoch": 18.03, + "learning_rate": 0.0002877777777777778, + "loss": 1.2322, + "step": 48200 + }, + { + "epoch": 18.06, + "learning_rate": 0.00028722222222222224, + "loss": 1.2269, + "step": 48300 + }, + { + "epoch": 18.1, + "learning_rate": 0.0002866666666666667, + "loss": 1.2264, + "step": 48400 + }, + { + "epoch": 18.14, + "learning_rate": 0.0002861111111111111, + "loss": 1.2255, + "step": 48500 + }, + { + "epoch": 18.17, + "learning_rate": 0.00028555555555555555, + "loss": 1.2243, + "step": 48600 + }, + { + "epoch": 18.21, + "learning_rate": 0.000285, + "loss": 1.223, + "step": 48700 + }, + { + "epoch": 18.25, + "learning_rate": 0.0002844444444444444, + "loss": 1.2232, + "step": 48800 + }, + { + "epoch": 18.29, + "learning_rate": 0.0002838888888888889, + "loss": 1.2238, + "step": 48900 + }, + { + "epoch": 18.32, + "learning_rate": 0.00028333333333333335, + "loss": 1.2239, + "step": 49000 + }, + { + "epoch": 18.36, + "learning_rate": 0.0002827777777777778, + "loss": 1.2212, + "step": 49100 + }, + { + "epoch": 18.4, + "learning_rate": 0.00028222222222222223, + "loss": 1.2234, + "step": 49200 + }, + { + "epoch": 18.44, + "learning_rate": 0.00028166666666666666, + "loss": 1.2267, + "step": 49300 + }, + { + "epoch": 18.47, + "learning_rate": 0.0002811111111111111, + "loss": 1.2196, + "step": 49400 + }, + { + "epoch": 18.51, + "learning_rate": 0.00028055555555555554, + "loss": 1.2264, + "step": 49500 + }, + { + "epoch": 18.55, + "learning_rate": 0.00028000000000000003, + "loss": 1.2256, + "step": 49600 + }, + { + "epoch": 18.59, + "learning_rate": 0.00027944444444444447, + "loss": 1.2208, + "step": 49700 + }, + { + "epoch": 18.62, + "learning_rate": 0.0002788888888888889, + "loss": 1.2228, + "step": 49800 + }, + { + "epoch": 18.66, + "learning_rate": 0.00027833333333333334, + "loss": 1.2245, + "step": 49900 + }, + { + "epoch": 18.7, + "learning_rate": 0.0002777777777777778, + "loss": 1.2251, + "step": 50000 + }, + { + "epoch": 18.74, + "learning_rate": 0.0002772222222222222, + "loss": 1.2167, + "step": 50100 + }, + { + "epoch": 18.77, + "learning_rate": 0.00027666666666666665, + "loss": 1.2187, + "step": 50200 + }, + { + "epoch": 18.81, + "learning_rate": 0.0002761111111111111, + "loss": 1.2226, + "step": 50300 + }, + { + "epoch": 18.85, + "learning_rate": 0.0002755555555555556, + "loss": 1.2198, + "step": 50400 + }, + { + "epoch": 18.89, + "learning_rate": 0.000275, + "loss": 1.2221, + "step": 50500 + }, + { + "epoch": 18.92, + "learning_rate": 0.00027444444444444445, + "loss": 1.219, + "step": 50600 + }, + { + "epoch": 18.96, + "learning_rate": 0.0002738888888888889, + "loss": 1.2193, + "step": 50700 + }, + { + "epoch": 19.0, + "learning_rate": 0.00027333333333333333, + "loss": 1.2178, + "step": 50800 + }, + { + "epoch": 19.04, + "learning_rate": 0.00027277777777777776, + "loss": 1.2218, + "step": 50900 + }, + { + "epoch": 19.07, + "learning_rate": 0.0002722222222222222, + "loss": 1.2159, + "step": 51000 + }, + { + "epoch": 19.11, + "learning_rate": 0.0002716666666666667, + "loss": 1.2122, + "step": 51100 + }, + { + "epoch": 19.15, + "learning_rate": 0.00027111111111111113, + "loss": 1.2179, + "step": 51200 + }, + { + "epoch": 19.18, + "learning_rate": 0.00027055555555555557, + "loss": 1.2161, + "step": 51300 + }, + { + "epoch": 19.22, + "learning_rate": 0.00027, + "loss": 1.2144, + "step": 51400 + }, + { + "epoch": 19.26, + "learning_rate": 0.00026944444444444444, + "loss": 1.2154, + "step": 51500 + }, + { + "epoch": 19.3, + "learning_rate": 0.00026888888888888893, + "loss": 1.2101, + "step": 51600 + }, + { + "epoch": 19.33, + "learning_rate": 0.0002683333333333333, + "loss": 1.2083, + "step": 51700 + }, + { + "epoch": 19.37, + "learning_rate": 0.0002677777777777778, + "loss": 1.2219, + "step": 51800 + }, + { + "epoch": 19.41, + "learning_rate": 0.00026722222222222224, + "loss": 1.2137, + "step": 51900 + }, + { + "epoch": 19.45, + "learning_rate": 0.0002666666666666667, + "loss": 1.2127, + "step": 52000 + }, + { + "epoch": 19.48, + "learning_rate": 0.0002661111111111111, + "loss": 1.2112, + "step": 52100 + }, + { + "epoch": 19.52, + "learning_rate": 0.00026555555555555555, + "loss": 1.2102, + "step": 52200 + }, + { + "epoch": 19.56, + "learning_rate": 0.00026500000000000004, + "loss": 1.2115, + "step": 52300 + }, + { + "epoch": 19.6, + "learning_rate": 0.00026444444444444443, + "loss": 1.2088, + "step": 52400 + }, + { + "epoch": 19.63, + "learning_rate": 0.0002638888888888889, + "loss": 1.2135, + "step": 52500 + }, + { + "epoch": 19.67, + "learning_rate": 0.0002633333333333333, + "loss": 1.2133, + "step": 52600 + }, + { + "epoch": 19.71, + "learning_rate": 0.0002627777777777778, + "loss": 1.2094, + "step": 52700 + }, + { + "epoch": 19.75, + "learning_rate": 0.00026222222222222223, + "loss": 1.2067, + "step": 52800 + }, + { + "epoch": 19.78, + "learning_rate": 0.00026166666666666667, + "loss": 1.2097, + "step": 52900 + }, + { + "epoch": 19.82, + "learning_rate": 0.00026111111111111116, + "loss": 1.2099, + "step": 53000 + }, + { + "epoch": 19.86, + "learning_rate": 0.00026055555555555554, + "loss": 1.2097, + "step": 53100 + }, + { + "epoch": 19.9, + "learning_rate": 0.00026000000000000003, + "loss": 1.2044, + "step": 53200 + }, + { + "epoch": 19.93, + "learning_rate": 0.0002594444444444444, + "loss": 1.2092, + "step": 53300 + }, + { + "epoch": 19.97, + "learning_rate": 0.0002588888888888889, + "loss": 1.2061, + "step": 53400 + }, + { + "epoch": 20.01, + "learning_rate": 0.00025833333333333334, + "loss": 1.2142, + "step": 53500 + }, + { + "epoch": 20.04, + "learning_rate": 0.0002577777777777778, + "loss": 1.2038, + "step": 53600 + }, + { + "epoch": 20.08, + "learning_rate": 0.00025722222222222227, + "loss": 1.2003, + "step": 53700 + }, + { + "epoch": 20.12, + "learning_rate": 0.00025666666666666665, + "loss": 1.2033, + "step": 53800 + }, + { + "epoch": 20.16, + "learning_rate": 0.00025611111111111114, + "loss": 1.2045, + "step": 53900 + }, + { + "epoch": 20.19, + "learning_rate": 0.00025555555555555553, + "loss": 1.1993, + "step": 54000 + }, + { + "epoch": 20.23, + "learning_rate": 0.000255, + "loss": 1.204, + "step": 54100 + }, + { + "epoch": 20.27, + "learning_rate": 0.0002544444444444444, + "loss": 1.2039, + "step": 54200 + }, + { + "epoch": 20.31, + "learning_rate": 0.0002538888888888889, + "loss": 1.2004, + "step": 54300 + }, + { + "epoch": 20.34, + "learning_rate": 0.0002533333333333334, + "loss": 1.2046, + "step": 54400 + }, + { + "epoch": 20.38, + "learning_rate": 0.00025277777777777777, + "loss": 1.2048, + "step": 54500 + }, + { + "epoch": 20.42, + "learning_rate": 0.00025222222222222226, + "loss": 1.1981, + "step": 54600 + }, + { + "epoch": 20.46, + "learning_rate": 0.00025166666666666664, + "loss": 1.2021, + "step": 54700 + }, + { + "epoch": 20.49, + "learning_rate": 0.00025111111111111113, + "loss": 1.201, + "step": 54800 + }, + { + "epoch": 20.53, + "learning_rate": 0.0002505555555555555, + "loss": 1.2027, + "step": 54900 + }, + { + "epoch": 20.57, + "learning_rate": 0.00025, + "loss": 1.201, + "step": 55000 + }, + { + "epoch": 20.61, + "learning_rate": 0.00024944444444444444, + "loss": 1.1973, + "step": 55100 + }, + { + "epoch": 20.64, + "learning_rate": 0.0002488888888888889, + "loss": 1.1996, + "step": 55200 + }, + { + "epoch": 20.68, + "learning_rate": 0.0002483333333333333, + "loss": 1.1996, + "step": 55300 + }, + { + "epoch": 20.72, + "learning_rate": 0.0002477777777777778, + "loss": 1.2022, + "step": 55400 + }, + { + "epoch": 20.76, + "learning_rate": 0.00024722222222222224, + "loss": 1.1971, + "step": 55500 + }, + { + "epoch": 20.79, + "learning_rate": 0.0002466666666666667, + "loss": 1.1992, + "step": 55600 + }, + { + "epoch": 20.83, + "learning_rate": 0.0002461111111111111, + "loss": 1.1997, + "step": 55700 + }, + { + "epoch": 20.87, + "learning_rate": 0.00024555555555555556, + "loss": 1.2, + "step": 55800 + }, + { + "epoch": 20.9, + "learning_rate": 0.000245, + "loss": 1.1995, + "step": 55900 + }, + { + "epoch": 20.94, + "learning_rate": 0.00024444444444444443, + "loss": 1.1969, + "step": 56000 + }, + { + "epoch": 20.98, + "learning_rate": 0.0002438888888888889, + "loss": 1.1957, + "step": 56100 + }, + { + "epoch": 21.02, + "learning_rate": 0.00024333333333333336, + "loss": 1.2026, + "step": 56200 + }, + { + "epoch": 21.05, + "learning_rate": 0.0002427777777777778, + "loss": 1.1926, + "step": 56300 + }, + { + "epoch": 21.09, + "learning_rate": 0.00024222222222222223, + "loss": 1.1964, + "step": 56400 + }, + { + "epoch": 21.13, + "learning_rate": 0.00024166666666666667, + "loss": 1.1918, + "step": 56500 + }, + { + "epoch": 21.17, + "learning_rate": 0.0002411111111111111, + "loss": 1.1913, + "step": 56600 + }, + { + "epoch": 21.2, + "learning_rate": 0.00024055555555555554, + "loss": 1.1956, + "step": 56700 + }, + { + "epoch": 21.24, + "learning_rate": 0.00024, + "loss": 1.1932, + "step": 56800 + }, + { + "epoch": 21.28, + "learning_rate": 0.00023944444444444444, + "loss": 1.194, + "step": 56900 + }, + { + "epoch": 21.32, + "learning_rate": 0.0002388888888888889, + "loss": 1.1918, + "step": 57000 + }, + { + "epoch": 21.35, + "learning_rate": 0.00023833333333333334, + "loss": 1.1879, + "step": 57100 + }, + { + "epoch": 21.39, + "learning_rate": 0.00023777777777777778, + "loss": 1.1894, + "step": 57200 + }, + { + "epoch": 21.43, + "learning_rate": 0.00023722222222222222, + "loss": 1.1909, + "step": 57300 + }, + { + "epoch": 21.47, + "learning_rate": 0.00023666666666666668, + "loss": 1.1917, + "step": 57400 + }, + { + "epoch": 21.5, + "learning_rate": 0.00023611111111111112, + "loss": 1.1908, + "step": 57500 + }, + { + "epoch": 21.54, + "learning_rate": 0.00023555555555555556, + "loss": 1.1908, + "step": 57600 + }, + { + "epoch": 21.58, + "learning_rate": 0.000235, + "loss": 1.1924, + "step": 57700 + }, + { + "epoch": 21.62, + "learning_rate": 0.00023444444444444446, + "loss": 1.1887, + "step": 57800 + }, + { + "epoch": 21.65, + "learning_rate": 0.0002338888888888889, + "loss": 1.1903, + "step": 57900 + }, + { + "epoch": 21.69, + "learning_rate": 0.00023333333333333333, + "loss": 1.1868, + "step": 58000 + }, + { + "epoch": 21.73, + "learning_rate": 0.0002327777777777778, + "loss": 1.1889, + "step": 58100 + }, + { + "epoch": 21.77, + "learning_rate": 0.00023222222222222223, + "loss": 1.1854, + "step": 58200 + }, + { + "epoch": 21.8, + "learning_rate": 0.00023166666666666667, + "loss": 1.1857, + "step": 58300 + }, + { + "epoch": 21.84, + "learning_rate": 0.0002311111111111111, + "loss": 1.1878, + "step": 58400 + }, + { + "epoch": 21.88, + "learning_rate": 0.00023055555555555557, + "loss": 1.1873, + "step": 58500 + }, + { + "epoch": 21.91, + "learning_rate": 0.00023, + "loss": 1.1867, + "step": 58600 + }, + { + "epoch": 21.95, + "learning_rate": 0.00022944444444444444, + "loss": 1.1854, + "step": 58700 + }, + { + "epoch": 21.99, + "learning_rate": 0.0002288888888888889, + "loss": 1.1867, + "step": 58800 + }, + { + "epoch": 22.03, + "learning_rate": 0.00022833333333333334, + "loss": 1.1906, + "step": 58900 + }, + { + "epoch": 22.06, + "learning_rate": 0.00022777777777777778, + "loss": 1.1831, + "step": 59000 + }, + { + "epoch": 22.1, + "learning_rate": 0.00022722222222222222, + "loss": 1.1849, + "step": 59100 + }, + { + "epoch": 22.14, + "learning_rate": 0.00022666666666666666, + "loss": 1.1792, + "step": 59200 + }, + { + "epoch": 22.18, + "learning_rate": 0.00022611111111111112, + "loss": 1.1823, + "step": 59300 + }, + { + "epoch": 22.21, + "learning_rate": 0.00022555555555555556, + "loss": 1.1839, + "step": 59400 + }, + { + "epoch": 22.25, + "learning_rate": 0.00022500000000000002, + "loss": 1.184, + "step": 59500 + }, + { + "epoch": 22.29, + "learning_rate": 0.00022444444444444446, + "loss": 1.1813, + "step": 59600 + }, + { + "epoch": 22.33, + "learning_rate": 0.0002238888888888889, + "loss": 1.1858, + "step": 59700 + }, + { + "epoch": 22.36, + "learning_rate": 0.00022333333333333333, + "loss": 1.1788, + "step": 59800 + }, + { + "epoch": 22.4, + "learning_rate": 0.00022277777777777777, + "loss": 1.1813, + "step": 59900 + }, + { + "epoch": 22.44, + "learning_rate": 0.0002222222222222222, + "loss": 1.1816, + "step": 60000 + }, + { + "epoch": 22.48, + "learning_rate": 0.00022166666666666667, + "loss": 1.1821, + "step": 60100 + }, + { + "epoch": 22.51, + "learning_rate": 0.00022111111111111113, + "loss": 1.1807, + "step": 60200 + }, + { + "epoch": 22.55, + "learning_rate": 0.00022055555555555557, + "loss": 1.1768, + "step": 60300 + }, + { + "epoch": 22.59, + "learning_rate": 0.00022, + "loss": 1.1811, + "step": 60400 + }, + { + "epoch": 22.63, + "learning_rate": 0.00021944444444444444, + "loss": 1.1799, + "step": 60500 + }, + { + "epoch": 22.66, + "learning_rate": 0.00021888888888888888, + "loss": 1.1784, + "step": 60600 + }, + { + "epoch": 22.7, + "learning_rate": 0.00021833333333333332, + "loss": 1.1788, + "step": 60700 + }, + { + "epoch": 22.74, + "learning_rate": 0.00021777777777777776, + "loss": 1.1769, + "step": 60800 + }, + { + "epoch": 22.77, + "learning_rate": 0.00021722222222222225, + "loss": 1.178, + "step": 60900 + }, + { + "epoch": 22.81, + "learning_rate": 0.00021666666666666668, + "loss": 1.1784, + "step": 61000 + }, + { + "epoch": 22.85, + "learning_rate": 0.00021611111111111112, + "loss": 1.1775, + "step": 61100 + }, + { + "epoch": 22.89, + "learning_rate": 0.00021555555555555556, + "loss": 1.1813, + "step": 61200 + }, + { + "epoch": 22.92, + "learning_rate": 0.000215, + "loss": 1.1793, + "step": 61300 + }, + { + "epoch": 22.96, + "learning_rate": 0.00021444444444444443, + "loss": 1.1745, + "step": 61400 + }, + { + "epoch": 23.0, + "learning_rate": 0.0002138888888888889, + "loss": 1.1788, + "step": 61500 + }, + { + "epoch": 23.04, + "learning_rate": 0.00021333333333333336, + "loss": 1.1811, + "step": 61600 + }, + { + "epoch": 23.07, + "learning_rate": 0.0002127777777777778, + "loss": 1.1697, + "step": 61700 + }, + { + "epoch": 23.11, + "learning_rate": 0.00021222222222222223, + "loss": 1.1751, + "step": 61800 + }, + { + "epoch": 23.15, + "learning_rate": 0.00021166666666666667, + "loss": 1.1714, + "step": 61900 + }, + { + "epoch": 23.19, + "learning_rate": 0.0002111111111111111, + "loss": 1.1754, + "step": 62000 + }, + { + "epoch": 23.22, + "learning_rate": 0.00021055555555555554, + "loss": 1.1711, + "step": 62100 + }, + { + "epoch": 23.26, + "learning_rate": 0.00021, + "loss": 1.1712, + "step": 62200 + }, + { + "epoch": 23.3, + "learning_rate": 0.00020944444444444445, + "loss": 1.1751, + "step": 62300 + }, + { + "epoch": 23.34, + "learning_rate": 0.0002088888888888889, + "loss": 1.1733, + "step": 62400 + }, + { + "epoch": 23.37, + "learning_rate": 0.00020833333333333335, + "loss": 1.1722, + "step": 62500 + }, + { + "epoch": 23.41, + "learning_rate": 0.00020777777777777778, + "loss": 1.1707, + "step": 62600 + }, + { + "epoch": 23.45, + "learning_rate": 0.00020722222222222222, + "loss": 1.1731, + "step": 62700 + }, + { + "epoch": 23.49, + "learning_rate": 0.00020666666666666666, + "loss": 1.1697, + "step": 62800 + }, + { + "epoch": 23.52, + "learning_rate": 0.00020611111111111112, + "loss": 1.171, + "step": 62900 + }, + { + "epoch": 23.56, + "learning_rate": 0.00020555555555555556, + "loss": 1.1706, + "step": 63000 + }, + { + "epoch": 23.6, + "learning_rate": 0.000205, + "loss": 1.1687, + "step": 63100 + }, + { + "epoch": 23.63, + "learning_rate": 0.00020444444444444446, + "loss": 1.1685, + "step": 63200 + }, + { + "epoch": 23.67, + "learning_rate": 0.0002038888888888889, + "loss": 1.1707, + "step": 63300 + }, + { + "epoch": 23.71, + "learning_rate": 0.00020333333333333333, + "loss": 1.1681, + "step": 63400 + }, + { + "epoch": 23.75, + "learning_rate": 0.00020277777777777777, + "loss": 1.1708, + "step": 63500 + }, + { + "epoch": 23.78, + "learning_rate": 0.00020222222222222223, + "loss": 1.1662, + "step": 63600 + }, + { + "epoch": 23.82, + "learning_rate": 0.00020166666666666667, + "loss": 1.1678, + "step": 63700 + }, + { + "epoch": 23.86, + "learning_rate": 0.0002011111111111111, + "loss": 1.17, + "step": 63800 + }, + { + "epoch": 23.9, + "learning_rate": 0.00020055555555555555, + "loss": 1.1663, + "step": 63900 + }, + { + "epoch": 23.93, + "learning_rate": 0.0002, + "loss": 1.1686, + "step": 64000 + }, + { + "epoch": 23.97, + "learning_rate": 0.00019944444444444445, + "loss": 1.168, + "step": 64100 + }, + { + "epoch": 24.01, + "learning_rate": 0.00019888888888888888, + "loss": 1.1734, + "step": 64200 + }, + { + "epoch": 24.05, + "learning_rate": 0.00019833333333333335, + "loss": 1.1634, + "step": 64300 + }, + { + "epoch": 24.08, + "learning_rate": 0.00019777777777777778, + "loss": 1.1634, + "step": 64400 + }, + { + "epoch": 24.12, + "learning_rate": 0.00019722222222222222, + "loss": 1.164, + "step": 64500 + }, + { + "epoch": 24.16, + "learning_rate": 0.00019666666666666666, + "loss": 1.1616, + "step": 64600 + }, + { + "epoch": 24.2, + "learning_rate": 0.00019611111111111112, + "loss": 1.1627, + "step": 64700 + }, + { + "epoch": 24.23, + "learning_rate": 0.00019555555555555556, + "loss": 1.1626, + "step": 64800 + }, + { + "epoch": 24.27, + "learning_rate": 0.00019500000000000002, + "loss": 1.1621, + "step": 64900 + }, + { + "epoch": 24.31, + "learning_rate": 0.00019444444444444446, + "loss": 1.1624, + "step": 65000 + }, + { + "epoch": 24.35, + "learning_rate": 0.0001938888888888889, + "loss": 1.1614, + "step": 65100 + }, + { + "epoch": 24.38, + "learning_rate": 0.00019333333333333333, + "loss": 1.1624, + "step": 65200 + }, + { + "epoch": 24.42, + "learning_rate": 0.00019277777777777777, + "loss": 1.1644, + "step": 65300 + }, + { + "epoch": 24.46, + "learning_rate": 0.0001922222222222222, + "loss": 1.1624, + "step": 65400 + }, + { + "epoch": 24.5, + "learning_rate": 0.00019166666666666667, + "loss": 1.1629, + "step": 65500 + }, + { + "epoch": 24.53, + "learning_rate": 0.00019111111111111114, + "loss": 1.1592, + "step": 65600 + }, + { + "epoch": 24.57, + "learning_rate": 0.00019055555555555557, + "loss": 1.1603, + "step": 65700 + }, + { + "epoch": 24.61, + "learning_rate": 0.00019, + "loss": 1.1595, + "step": 65800 + }, + { + "epoch": 24.64, + "learning_rate": 0.00018944444444444445, + "loss": 1.1612, + "step": 65900 + }, + { + "epoch": 24.68, + "learning_rate": 0.00018888888888888888, + "loss": 1.1603, + "step": 66000 + }, + { + "epoch": 24.72, + "learning_rate": 0.00018833333333333332, + "loss": 1.1602, + "step": 66100 + }, + { + "epoch": 24.76, + "learning_rate": 0.00018777777777777776, + "loss": 1.1628, + "step": 66200 + }, + { + "epoch": 24.79, + "learning_rate": 0.00018722222222222225, + "loss": 1.1579, + "step": 66300 + }, + { + "epoch": 24.83, + "learning_rate": 0.0001866666666666667, + "loss": 1.1594, + "step": 66400 + }, + { + "epoch": 24.87, + "learning_rate": 0.00018611111111111112, + "loss": 1.1592, + "step": 66500 + }, + { + "epoch": 24.91, + "learning_rate": 0.00018555555555555556, + "loss": 1.1599, + "step": 66600 + }, + { + "epoch": 24.94, + "learning_rate": 0.000185, + "loss": 1.1574, + "step": 66700 + }, + { + "epoch": 24.98, + "learning_rate": 0.00018444444444444443, + "loss": 1.1575, + "step": 66800 + }, + { + "epoch": 25.02, + "learning_rate": 0.00018388888888888887, + "loss": 1.1605, + "step": 66900 + }, + { + "epoch": 25.06, + "learning_rate": 0.00018333333333333334, + "loss": 1.1531, + "step": 67000 + }, + { + "epoch": 25.09, + "learning_rate": 0.0001827777777777778, + "loss": 1.1529, + "step": 67100 + }, + { + "epoch": 25.13, + "learning_rate": 0.00018222222222222224, + "loss": 1.1537, + "step": 67200 + }, + { + "epoch": 25.17, + "learning_rate": 0.00018166666666666667, + "loss": 1.1524, + "step": 67300 + }, + { + "epoch": 25.21, + "learning_rate": 0.0001811111111111111, + "loss": 1.1539, + "step": 67400 + }, + { + "epoch": 25.24, + "learning_rate": 0.00018055555555555555, + "loss": 1.1549, + "step": 67500 + }, + { + "epoch": 25.28, + "learning_rate": 0.00017999999999999998, + "loss": 1.1574, + "step": 67600 + }, + { + "epoch": 25.32, + "learning_rate": 0.00017944444444444445, + "loss": 1.1538, + "step": 67700 + }, + { + "epoch": 25.36, + "learning_rate": 0.00017888888888888889, + "loss": 1.1521, + "step": 67800 + }, + { + "epoch": 25.39, + "learning_rate": 0.00017833333333333335, + "loss": 1.1518, + "step": 67900 + }, + { + "epoch": 25.43, + "learning_rate": 0.00017777777777777779, + "loss": 1.1547, + "step": 68000 + }, + { + "epoch": 25.47, + "learning_rate": 0.00017722222222222222, + "loss": 1.1524, + "step": 68100 + }, + { + "epoch": 25.5, + "learning_rate": 0.00017666666666666666, + "loss": 1.1514, + "step": 68200 + }, + { + "epoch": 25.54, + "learning_rate": 0.00017611111111111112, + "loss": 1.1512, + "step": 68300 + }, + { + "epoch": 25.58, + "learning_rate": 0.00017555555555555556, + "loss": 1.1508, + "step": 68400 + }, + { + "epoch": 25.62, + "learning_rate": 0.000175, + "loss": 1.1526, + "step": 68500 + }, + { + "epoch": 25.65, + "learning_rate": 0.00017444444444444446, + "loss": 1.147, + "step": 68600 + }, + { + "epoch": 25.69, + "learning_rate": 0.0001738888888888889, + "loss": 1.1497, + "step": 68700 + }, + { + "epoch": 25.73, + "learning_rate": 0.00017333333333333334, + "loss": 1.1527, + "step": 68800 + }, + { + "epoch": 25.77, + "learning_rate": 0.00017277777777777777, + "loss": 1.1492, + "step": 68900 + }, + { + "epoch": 25.8, + "learning_rate": 0.00017222222222222224, + "loss": 1.1521, + "step": 69000 + }, + { + "epoch": 25.84, + "learning_rate": 0.00017166666666666667, + "loss": 1.1517, + "step": 69100 + }, + { + "epoch": 25.88, + "learning_rate": 0.0001711111111111111, + "loss": 1.147, + "step": 69200 + }, + { + "epoch": 25.92, + "learning_rate": 0.00017055555555555555, + "loss": 1.1506, + "step": 69300 + }, + { + "epoch": 25.95, + "learning_rate": 0.00017, + "loss": 1.1496, + "step": 69400 + }, + { + "epoch": 25.99, + "learning_rate": 0.00016944444444444445, + "loss": 1.1478, + "step": 69500 + }, + { + "epoch": 26.03, + "learning_rate": 0.00016888888888888889, + "loss": 1.1513, + "step": 69600 + }, + { + "epoch": 26.07, + "learning_rate": 0.00016833333333333335, + "loss": 1.1459, + "step": 69700 + }, + { + "epoch": 26.1, + "learning_rate": 0.0001677777777777778, + "loss": 1.1443, + "step": 69800 + }, + { + "epoch": 26.14, + "learning_rate": 0.00016722222222222222, + "loss": 1.1453, + "step": 69900 + }, + { + "epoch": 26.18, + "learning_rate": 0.00016666666666666666, + "loss": 1.1438, + "step": 70000 + }, + { + "epoch": 26.22, + "learning_rate": 0.0001661111111111111, + "loss": 1.1429, + "step": 70100 + }, + { + "epoch": 26.25, + "learning_rate": 0.00016555555555555556, + "loss": 1.1465, + "step": 70200 + }, + { + "epoch": 26.29, + "learning_rate": 0.000165, + "loss": 1.1436, + "step": 70300 + }, + { + "epoch": 26.33, + "learning_rate": 0.00016444444444444446, + "loss": 1.1424, + "step": 70400 + }, + { + "epoch": 26.36, + "learning_rate": 0.0001638888888888889, + "loss": 1.1434, + "step": 70500 + }, + { + "epoch": 26.4, + "learning_rate": 0.00016333333333333334, + "loss": 1.1441, + "step": 70600 + }, + { + "epoch": 26.44, + "learning_rate": 0.00016277777777777777, + "loss": 1.1443, + "step": 70700 + }, + { + "epoch": 26.48, + "learning_rate": 0.0001622222222222222, + "loss": 1.1446, + "step": 70800 + }, + { + "epoch": 26.51, + "learning_rate": 0.00016166666666666665, + "loss": 1.144, + "step": 70900 + }, + { + "epoch": 26.55, + "learning_rate": 0.0001611111111111111, + "loss": 1.1429, + "step": 71000 + }, + { + "epoch": 26.59, + "learning_rate": 0.00016055555555555558, + "loss": 1.1435, + "step": 71100 + }, + { + "epoch": 26.63, + "learning_rate": 0.00016, + "loss": 1.1425, + "step": 71200 + }, + { + "epoch": 26.66, + "learning_rate": 0.00015944444444444445, + "loss": 1.1417, + "step": 71300 + }, + { + "epoch": 26.7, + "learning_rate": 0.0001588888888888889, + "loss": 1.1432, + "step": 71400 + }, + { + "epoch": 26.74, + "learning_rate": 0.00015833333333333332, + "loss": 1.1405, + "step": 71500 + }, + { + "epoch": 26.78, + "learning_rate": 0.00015777777777777776, + "loss": 1.1425, + "step": 71600 + }, + { + "epoch": 26.81, + "learning_rate": 0.00015722222222222225, + "loss": 1.1438, + "step": 71700 + }, + { + "epoch": 26.85, + "learning_rate": 0.0001566666666666667, + "loss": 1.1402, + "step": 71800 + }, + { + "epoch": 26.89, + "learning_rate": 0.00015611111111111113, + "loss": 1.1424, + "step": 71900 + }, + { + "epoch": 26.93, + "learning_rate": 0.00015555555555555556, + "loss": 1.1396, + "step": 72000 + }, + { + "epoch": 26.96, + "learning_rate": 0.000155, + "loss": 1.138, + "step": 72100 + }, + { + "epoch": 27.0, + "learning_rate": 0.00015444444444444444, + "loss": 1.1486, + "step": 72200 + }, + { + "epoch": 27.04, + "learning_rate": 0.00015388888888888887, + "loss": 1.1363, + "step": 72300 + }, + { + "epoch": 27.08, + "learning_rate": 0.00015333333333333334, + "loss": 1.1342, + "step": 72400 + }, + { + "epoch": 27.11, + "learning_rate": 0.0001527777777777778, + "loss": 1.1333, + "step": 72500 + }, + { + "epoch": 27.15, + "learning_rate": 0.00015222222222222224, + "loss": 1.1354, + "step": 72600 + }, + { + "epoch": 27.19, + "learning_rate": 0.00015166666666666668, + "loss": 1.1376, + "step": 72700 + }, + { + "epoch": 27.23, + "learning_rate": 0.0001511111111111111, + "loss": 1.1336, + "step": 72800 + }, + { + "epoch": 27.26, + "learning_rate": 0.00015055555555555555, + "loss": 1.1364, + "step": 72900 + }, + { + "epoch": 27.3, + "learning_rate": 0.00015, + "loss": 1.137, + "step": 73000 + }, + { + "epoch": 27.34, + "learning_rate": 0.00014944444444444445, + "loss": 1.1346, + "step": 73100 + }, + { + "epoch": 27.37, + "learning_rate": 0.0001488888888888889, + "loss": 1.1353, + "step": 73200 + }, + { + "epoch": 27.41, + "learning_rate": 0.00014833333333333335, + "loss": 1.1372, + "step": 73300 + }, + { + "epoch": 27.45, + "learning_rate": 0.0001477777777777778, + "loss": 1.1362, + "step": 73400 + }, + { + "epoch": 27.49, + "learning_rate": 0.00014722222222222223, + "loss": 1.1354, + "step": 73500 + }, + { + "epoch": 27.52, + "learning_rate": 0.00014666666666666666, + "loss": 1.1357, + "step": 73600 + }, + { + "epoch": 27.56, + "learning_rate": 0.0001461111111111111, + "loss": 1.1349, + "step": 73700 + }, + { + "epoch": 27.6, + "learning_rate": 0.00014555555555555556, + "loss": 1.1386, + "step": 73800 + }, + { + "epoch": 27.64, + "learning_rate": 0.000145, + "loss": 1.1308, + "step": 73900 + }, + { + "epoch": 27.67, + "learning_rate": 0.00014444444444444444, + "loss": 1.1322, + "step": 74000 + }, + { + "epoch": 27.71, + "learning_rate": 0.0001438888888888889, + "loss": 1.1363, + "step": 74100 + }, + { + "epoch": 27.75, + "learning_rate": 0.00014333333333333334, + "loss": 1.1335, + "step": 74200 + }, + { + "epoch": 27.79, + "learning_rate": 0.00014277777777777778, + "loss": 1.1334, + "step": 74300 + }, + { + "epoch": 27.82, + "learning_rate": 0.0001422222222222222, + "loss": 1.1361, + "step": 74400 + }, + { + "epoch": 27.86, + "learning_rate": 0.00014166666666666668, + "loss": 1.1352, + "step": 74500 + }, + { + "epoch": 27.9, + "learning_rate": 0.00014111111111111111, + "loss": 1.1318, + "step": 74600 + }, + { + "epoch": 27.94, + "learning_rate": 0.00014055555555555555, + "loss": 1.1346, + "step": 74700 + }, + { + "epoch": 27.97, + "learning_rate": 0.00014000000000000001, + "loss": 1.1355, + "step": 74800 + }, + { + "epoch": 28.01, + "learning_rate": 0.00013944444444444445, + "loss": 1.1346, + "step": 74900 + }, + { + "epoch": 28.05, + "learning_rate": 0.0001388888888888889, + "loss": 1.1273, + "step": 75000 + }, + { + "epoch": 28.09, + "learning_rate": 0.00013833333333333333, + "loss": 1.1258, + "step": 75100 + }, + { + "epoch": 28.12, + "learning_rate": 0.0001377777777777778, + "loss": 1.1285, + "step": 75200 + }, + { + "epoch": 28.16, + "learning_rate": 0.00013722222222222223, + "loss": 1.127, + "step": 75300 + }, + { + "epoch": 28.2, + "learning_rate": 0.00013666666666666666, + "loss": 1.1305, + "step": 75400 + }, + { + "epoch": 28.23, + "learning_rate": 0.0001361111111111111, + "loss": 1.1296, + "step": 75500 + }, + { + "epoch": 28.27, + "learning_rate": 0.00013555555555555556, + "loss": 1.127, + "step": 75600 + }, + { + "epoch": 28.31, + "learning_rate": 0.000135, + "loss": 1.1279, + "step": 75700 + }, + { + "epoch": 28.35, + "learning_rate": 0.00013444444444444447, + "loss": 1.1275, + "step": 75800 + }, + { + "epoch": 28.38, + "learning_rate": 0.0001338888888888889, + "loss": 1.1277, + "step": 75900 + }, + { + "epoch": 28.42, + "learning_rate": 0.00013333333333333334, + "loss": 1.1275, + "step": 76000 + }, + { + "epoch": 28.46, + "learning_rate": 0.00013277777777777778, + "loss": 1.1282, + "step": 76100 + }, + { + "epoch": 28.5, + "learning_rate": 0.00013222222222222221, + "loss": 1.126, + "step": 76200 + }, + { + "epoch": 28.53, + "learning_rate": 0.00013166666666666665, + "loss": 1.1297, + "step": 76300 + }, + { + "epoch": 28.57, + "learning_rate": 0.00013111111111111111, + "loss": 1.1275, + "step": 76400 + }, + { + "epoch": 28.61, + "learning_rate": 0.00013055555555555558, + "loss": 1.1299, + "step": 76500 + }, + { + "epoch": 28.65, + "learning_rate": 0.00013000000000000002, + "loss": 1.1285, + "step": 76600 + }, + { + "epoch": 28.68, + "learning_rate": 0.00012944444444444445, + "loss": 1.1254, + "step": 76700 + }, + { + "epoch": 28.72, + "learning_rate": 0.0001288888888888889, + "loss": 1.1227, + "step": 76800 + }, + { + "epoch": 28.76, + "learning_rate": 0.00012833333333333333, + "loss": 1.1278, + "step": 76900 + }, + { + "epoch": 28.8, + "learning_rate": 0.00012777777777777776, + "loss": 1.1211, + "step": 77000 + }, + { + "epoch": 28.83, + "learning_rate": 0.0001272222222222222, + "loss": 1.1255, + "step": 77100 + }, + { + "epoch": 28.87, + "learning_rate": 0.0001266666666666667, + "loss": 1.1229, + "step": 77200 + }, + { + "epoch": 28.91, + "learning_rate": 0.00012611111111111113, + "loss": 1.1236, + "step": 77300 + }, + { + "epoch": 28.95, + "learning_rate": 0.00012555555555555557, + "loss": 1.1227, + "step": 77400 + }, + { + "epoch": 28.98, + "learning_rate": 0.000125, + "loss": 1.1209, + "step": 77500 + }, + { + "epoch": 29.02, + "learning_rate": 0.00012444444444444444, + "loss": 1.1309, + "step": 77600 + }, + { + "epoch": 29.06, + "learning_rate": 0.0001238888888888889, + "loss": 1.1172, + "step": 77700 + }, + { + "epoch": 29.09, + "learning_rate": 0.00012333333333333334, + "loss": 1.1183, + "step": 77800 + }, + { + "epoch": 29.13, + "learning_rate": 0.00012277777777777778, + "loss": 1.1227, + "step": 77900 + }, + { + "epoch": 29.17, + "learning_rate": 0.00012222222222222221, + "loss": 1.1187, + "step": 78000 + }, + { + "epoch": 29.21, + "learning_rate": 0.00012166666666666668, + "loss": 1.1191, + "step": 78100 + }, + { + "epoch": 29.24, + "learning_rate": 0.00012111111111111112, + "loss": 1.1209, + "step": 78200 + }, + { + "epoch": 29.28, + "learning_rate": 0.00012055555555555555, + "loss": 1.1231, + "step": 78300 + }, + { + "epoch": 29.32, + "learning_rate": 0.00012, + "loss": 1.1187, + "step": 78400 + }, + { + "epoch": 29.36, + "learning_rate": 0.00011944444444444445, + "loss": 1.1213, + "step": 78500 + }, + { + "epoch": 29.39, + "learning_rate": 0.00011888888888888889, + "loss": 1.1208, + "step": 78600 + }, + { + "epoch": 29.43, + "learning_rate": 0.00011833333333333334, + "loss": 1.1196, + "step": 78700 + }, + { + "epoch": 29.47, + "learning_rate": 0.00011777777777777778, + "loss": 1.1206, + "step": 78800 + }, + { + "epoch": 29.51, + "learning_rate": 0.00011722222222222223, + "loss": 1.1197, + "step": 78900 + }, + { + "epoch": 29.54, + "learning_rate": 0.00011666666666666667, + "loss": 1.1198, + "step": 79000 + }, + { + "epoch": 29.58, + "learning_rate": 0.00011611111111111112, + "loss": 1.1184, + "step": 79100 + }, + { + "epoch": 29.62, + "learning_rate": 0.00011555555555555555, + "loss": 1.1181, + "step": 79200 + }, + { + "epoch": 29.66, + "learning_rate": 0.000115, + "loss": 1.1159, + "step": 79300 + }, + { + "epoch": 29.69, + "learning_rate": 0.00011444444444444445, + "loss": 1.1174, + "step": 79400 + }, + { + "epoch": 29.73, + "learning_rate": 0.00011388888888888889, + "loss": 1.1199, + "step": 79500 + }, + { + "epoch": 29.77, + "learning_rate": 0.00011333333333333333, + "loss": 1.1163, + "step": 79600 + }, + { + "epoch": 29.81, + "learning_rate": 0.00011277777777777778, + "loss": 1.123, + "step": 79700 + }, + { + "epoch": 29.84, + "learning_rate": 0.00011222222222222223, + "loss": 1.1165, + "step": 79800 + }, + { + "epoch": 29.88, + "learning_rate": 0.00011166666666666667, + "loss": 1.1155, + "step": 79900 + }, + { + "epoch": 29.92, + "learning_rate": 0.0001111111111111111, + "loss": 1.1125, + "step": 80000 + }, + { + "epoch": 29.95, + "learning_rate": 0.00011055555555555557, + "loss": 1.1135, + "step": 80100 + }, + { + "epoch": 29.99, + "learning_rate": 0.00011, + "loss": 1.1162, + "step": 80200 + }, + { + "epoch": 30.03, + "learning_rate": 0.00010944444444444444, + "loss": 1.1166, + "step": 80300 + }, + { + "epoch": 30.07, + "learning_rate": 0.00010888888888888888, + "loss": 1.1105, + "step": 80400 + }, + { + "epoch": 30.1, + "learning_rate": 0.00010833333333333334, + "loss": 1.1109, + "step": 80500 + }, + { + "epoch": 30.14, + "learning_rate": 0.00010777777777777778, + "loss": 1.1119, + "step": 80600 + }, + { + "epoch": 30.18, + "learning_rate": 0.00010722222222222222, + "loss": 1.1156, + "step": 80700 + }, + { + "epoch": 30.22, + "learning_rate": 0.00010666666666666668, + "loss": 1.1136, + "step": 80800 + }, + { + "epoch": 30.25, + "learning_rate": 0.00010611111111111112, + "loss": 1.1115, + "step": 80900 + }, + { + "epoch": 30.29, + "learning_rate": 0.00010555555555555555, + "loss": 1.1109, + "step": 81000 + }, + { + "epoch": 30.33, + "learning_rate": 0.000105, + "loss": 1.1141, + "step": 81100 + }, + { + "epoch": 30.37, + "learning_rate": 0.00010444444444444445, + "loss": 1.1129, + "step": 81200 + }, + { + "epoch": 30.4, + "learning_rate": 0.00010388888888888889, + "loss": 1.1155, + "step": 81300 + }, + { + "epoch": 30.44, + "learning_rate": 0.00010333333333333333, + "loss": 1.1117, + "step": 81400 + }, + { + "epoch": 30.48, + "learning_rate": 0.00010277777777777778, + "loss": 1.1097, + "step": 81500 + }, + { + "epoch": 30.52, + "learning_rate": 0.00010222222222222223, + "loss": 1.1097, + "step": 81600 + }, + { + "epoch": 30.55, + "learning_rate": 0.00010166666666666667, + "loss": 1.1089, + "step": 81700 + }, + { + "epoch": 30.59, + "learning_rate": 0.00010111111111111112, + "loss": 1.1124, + "step": 81800 + }, + { + "epoch": 30.63, + "learning_rate": 0.00010055555555555555, + "loss": 1.1077, + "step": 81900 + }, + { + "epoch": 30.67, + "learning_rate": 0.0001, + "loss": 1.1085, + "step": 82000 + }, + { + "epoch": 30.7, + "learning_rate": 9.944444444444444e-05, + "loss": 1.1097, + "step": 82100 + }, + { + "epoch": 30.74, + "learning_rate": 9.888888888888889e-05, + "loss": 1.112, + "step": 82200 + }, + { + "epoch": 30.78, + "learning_rate": 9.833333333333333e-05, + "loss": 1.1099, + "step": 82300 + }, + { + "epoch": 30.82, + "learning_rate": 9.777777777777778e-05, + "loss": 1.1106, + "step": 82400 + }, + { + "epoch": 30.85, + "learning_rate": 9.722222222222223e-05, + "loss": 1.1099, + "step": 82500 + }, + { + "epoch": 30.89, + "learning_rate": 9.666666666666667e-05, + "loss": 1.1086, + "step": 82600 + }, + { + "epoch": 30.93, + "learning_rate": 9.61111111111111e-05, + "loss": 1.1101, + "step": 82700 + }, + { + "epoch": 30.96, + "learning_rate": 9.555555555555557e-05, + "loss": 1.1089, + "step": 82800 + }, + { + "epoch": 31.0, + "learning_rate": 9.5e-05, + "loss": 1.1116, + "step": 82900 + }, + { + "epoch": 31.04, + "learning_rate": 9.444444444444444e-05, + "loss": 1.1062, + "step": 83000 + }, + { + "epoch": 31.08, + "learning_rate": 9.388888888888888e-05, + "loss": 1.1082, + "step": 83100 + }, + { + "epoch": 31.11, + "learning_rate": 9.333333333333334e-05, + "loss": 1.104, + "step": 83200 + }, + { + "epoch": 31.15, + "learning_rate": 9.277777777777778e-05, + "loss": 1.1039, + "step": 83300 + }, + { + "epoch": 31.19, + "learning_rate": 9.222222222222222e-05, + "loss": 1.1027, + "step": 83400 + }, + { + "epoch": 31.23, + "learning_rate": 9.166666666666667e-05, + "loss": 1.1049, + "step": 83500 + }, + { + "epoch": 31.26, + "learning_rate": 9.111111111111112e-05, + "loss": 1.1089, + "step": 83600 + }, + { + "epoch": 31.3, + "learning_rate": 9.055555555555556e-05, + "loss": 1.1082, + "step": 83700 + }, + { + "epoch": 31.34, + "learning_rate": 8.999999999999999e-05, + "loss": 1.1019, + "step": 83800 + }, + { + "epoch": 31.38, + "learning_rate": 8.944444444444444e-05, + "loss": 1.1034, + "step": 83900 + }, + { + "epoch": 31.41, + "learning_rate": 8.888888888888889e-05, + "loss": 1.1034, + "step": 84000 + }, + { + "epoch": 31.45, + "learning_rate": 8.833333333333333e-05, + "loss": 1.1002, + "step": 84100 + }, + { + "epoch": 31.49, + "learning_rate": 8.777777777777778e-05, + "loss": 1.1032, + "step": 84200 + }, + { + "epoch": 31.53, + "learning_rate": 8.722222222222223e-05, + "loss": 1.1081, + "step": 84300 + }, + { + "epoch": 31.56, + "learning_rate": 8.666666666666667e-05, + "loss": 1.0995, + "step": 84400 + }, + { + "epoch": 31.6, + "learning_rate": 8.611111111111112e-05, + "loss": 1.104, + "step": 84500 + }, + { + "epoch": 31.64, + "learning_rate": 8.555555555555556e-05, + "loss": 1.1015, + "step": 84600 + }, + { + "epoch": 31.68, + "learning_rate": 8.5e-05, + "loss": 1.1022, + "step": 84700 + }, + { + "epoch": 31.71, + "learning_rate": 8.444444444444444e-05, + "loss": 1.1016, + "step": 84800 + }, + { + "epoch": 31.75, + "learning_rate": 8.38888888888889e-05, + "loss": 1.1008, + "step": 84900 + }, + { + "epoch": 31.79, + "learning_rate": 8.333333333333333e-05, + "loss": 1.102, + "step": 85000 + }, + { + "epoch": 31.82, + "learning_rate": 8.277777777777778e-05, + "loss": 1.0997, + "step": 85100 + }, + { + "epoch": 31.86, + "learning_rate": 8.222222222222223e-05, + "loss": 1.1036, + "step": 85200 + }, + { + "epoch": 31.9, + "learning_rate": 8.166666666666667e-05, + "loss": 1.1021, + "step": 85300 + }, + { + "epoch": 31.94, + "learning_rate": 8.11111111111111e-05, + "loss": 1.0999, + "step": 85400 + }, + { + "epoch": 31.97, + "learning_rate": 8.055555555555556e-05, + "loss": 1.0988, + "step": 85500 + }, + { + "epoch": 32.01, + "learning_rate": 8e-05, + "loss": 1.1106, + "step": 85600 + }, + { + "epoch": 32.05, + "learning_rate": 7.944444444444444e-05, + "loss": 1.1002, + "step": 85700 + }, + { + "epoch": 32.09, + "learning_rate": 7.888888888888888e-05, + "loss": 1.0966, + "step": 85800 + }, + { + "epoch": 32.12, + "learning_rate": 7.833333333333334e-05, + "loss": 1.0942, + "step": 85900 + }, + { + "epoch": 32.16, + "learning_rate": 7.777777777777778e-05, + "loss": 1.0986, + "step": 86000 + }, + { + "epoch": 32.2, + "learning_rate": 7.722222222222222e-05, + "loss": 1.0963, + "step": 86100 + }, + { + "epoch": 32.24, + "learning_rate": 7.666666666666667e-05, + "loss": 1.0951, + "step": 86200 + }, + { + "epoch": 32.27, + "learning_rate": 7.611111111111112e-05, + "loss": 1.0958, + "step": 86300 + }, + { + "epoch": 32.31, + "learning_rate": 7.555555555555556e-05, + "loss": 1.095, + "step": 86400 + }, + { + "epoch": 32.35, + "learning_rate": 7.5e-05, + "loss": 1.1004, + "step": 86500 + }, + { + "epoch": 32.39, + "learning_rate": 7.444444444444444e-05, + "loss": 1.0977, + "step": 86600 + }, + { + "epoch": 32.42, + "learning_rate": 7.38888888888889e-05, + "loss": 1.0984, + "step": 86700 + }, + { + "epoch": 32.46, + "learning_rate": 7.333333333333333e-05, + "loss": 1.1005, + "step": 86800 + }, + { + "epoch": 32.5, + "learning_rate": 7.277777777777778e-05, + "loss": 1.0948, + "step": 86900 + }, + { + "epoch": 32.54, + "learning_rate": 7.222222222222222e-05, + "loss": 1.0953, + "step": 87000 + }, + { + "epoch": 32.57, + "learning_rate": 7.166666666666667e-05, + "loss": 1.0967, + "step": 87100 + }, + { + "epoch": 32.61, + "learning_rate": 7.11111111111111e-05, + "loss": 1.0986, + "step": 87200 + }, + { + "epoch": 32.65, + "learning_rate": 7.055555555555556e-05, + "loss": 1.0938, + "step": 87300 + }, + { + "epoch": 32.68, + "learning_rate": 7.000000000000001e-05, + "loss": 1.0934, + "step": 87400 + }, + { + "epoch": 32.72, + "learning_rate": 6.944444444444444e-05, + "loss": 1.0925, + "step": 87500 + }, + { + "epoch": 32.76, + "learning_rate": 6.88888888888889e-05, + "loss": 1.0951, + "step": 87600 + }, + { + "epoch": 32.8, + "learning_rate": 6.833333333333333e-05, + "loss": 1.0926, + "step": 87700 + }, + { + "epoch": 32.83, + "learning_rate": 6.777777777777778e-05, + "loss": 1.0941, + "step": 87800 + }, + { + "epoch": 32.87, + "learning_rate": 6.722222222222223e-05, + "loss": 1.0952, + "step": 87900 + }, + { + "epoch": 32.91, + "learning_rate": 6.666666666666667e-05, + "loss": 1.0938, + "step": 88000 + }, + { + "epoch": 32.95, + "learning_rate": 6.611111111111111e-05, + "loss": 1.0968, + "step": 88100 + }, + { + "epoch": 32.98, + "learning_rate": 6.555555555555556e-05, + "loss": 1.0935, + "step": 88200 + }, + { + "epoch": 33.02, + "learning_rate": 6.500000000000001e-05, + "loss": 1.0984, + "step": 88300 + }, + { + "epoch": 33.06, + "learning_rate": 6.444444444444444e-05, + "loss": 1.0893, + "step": 88400 + }, + { + "epoch": 33.1, + "learning_rate": 6.388888888888888e-05, + "loss": 1.0907, + "step": 88500 + }, + { + "epoch": 33.13, + "learning_rate": 6.333333333333335e-05, + "loss": 1.0879, + "step": 88600 + }, + { + "epoch": 33.17, + "learning_rate": 6.277777777777778e-05, + "loss": 1.0905, + "step": 88700 + }, + { + "epoch": 33.21, + "learning_rate": 6.222222222222222e-05, + "loss": 1.0884, + "step": 88800 + }, + { + "epoch": 33.25, + "learning_rate": 6.166666666666667e-05, + "loss": 1.0897, + "step": 88900 + }, + { + "epoch": 33.28, + "learning_rate": 6.111111111111111e-05, + "loss": 1.093, + "step": 89000 + }, + { + "epoch": 33.32, + "learning_rate": 6.055555555555556e-05, + "loss": 1.0889, + "step": 89100 + }, + { + "epoch": 33.36, + "learning_rate": 6e-05, + "loss": 1.0903, + "step": 89200 + }, + { + "epoch": 33.4, + "learning_rate": 5.9444444444444445e-05, + "loss": 1.0895, + "step": 89300 + }, + { + "epoch": 33.43, + "learning_rate": 5.888888888888889e-05, + "loss": 1.0928, + "step": 89400 + }, + { + "epoch": 33.47, + "learning_rate": 5.833333333333333e-05, + "loss": 1.0909, + "step": 89500 + }, + { + "epoch": 33.51, + "learning_rate": 5.7777777777777776e-05, + "loss": 1.0889, + "step": 89600 + }, + { + "epoch": 33.55, + "learning_rate": 5.722222222222223e-05, + "loss": 1.0913, + "step": 89700 + }, + { + "epoch": 33.58, + "learning_rate": 5.6666666666666664e-05, + "loss": 1.09, + "step": 89800 + }, + { + "epoch": 33.62, + "learning_rate": 5.6111111111111114e-05, + "loss": 1.0908, + "step": 89900 + }, + { + "epoch": 33.66, + "learning_rate": 5.555555555555555e-05, + "loss": 1.0879, + "step": 90000 + }, + { + "epoch": 33.69, + "learning_rate": 5.5e-05, + "loss": 1.0862, + "step": 90100 + }, + { + "epoch": 33.73, + "learning_rate": 5.444444444444444e-05, + "loss": 1.086, + "step": 90200 + }, + { + "epoch": 33.77, + "learning_rate": 5.388888888888889e-05, + "loss": 1.0897, + "step": 90300 + }, + { + "epoch": 33.81, + "learning_rate": 5.333333333333334e-05, + "loss": 1.0888, + "step": 90400 + }, + { + "epoch": 33.84, + "learning_rate": 5.277777777777778e-05, + "loss": 1.0892, + "step": 90500 + }, + { + "epoch": 33.88, + "learning_rate": 5.222222222222223e-05, + "loss": 1.0858, + "step": 90600 + }, + { + "epoch": 33.92, + "learning_rate": 5.1666666666666664e-05, + "loss": 1.0874, + "step": 90700 + }, + { + "epoch": 33.96, + "learning_rate": 5.1111111111111115e-05, + "loss": 1.0864, + "step": 90800 + }, + { + "epoch": 33.99, + "learning_rate": 5.055555555555556e-05, + "loss": 1.088, + "step": 90900 + }, + { + "epoch": 34.03, + "learning_rate": 5e-05, + "loss": 1.0882, + "step": 91000 + }, + { + "epoch": 34.07, + "learning_rate": 4.9444444444444446e-05, + "loss": 1.0818, + "step": 91100 + }, + { + "epoch": 34.11, + "learning_rate": 4.888888888888889e-05, + "loss": 1.0873, + "step": 91200 + }, + { + "epoch": 34.14, + "learning_rate": 4.8333333333333334e-05, + "loss": 1.0823, + "step": 91300 + }, + { + "epoch": 34.18, + "learning_rate": 4.7777777777777784e-05, + "loss": 1.0852, + "step": 91400 + }, + { + "epoch": 34.22, + "learning_rate": 4.722222222222222e-05, + "loss": 1.0845, + "step": 91500 + }, + { + "epoch": 34.26, + "learning_rate": 4.666666666666667e-05, + "loss": 1.0864, + "step": 91600 + }, + { + "epoch": 34.29, + "learning_rate": 4.611111111111111e-05, + "loss": 1.0839, + "step": 91700 + }, + { + "epoch": 34.33, + "learning_rate": 4.555555555555556e-05, + "loss": 1.0821, + "step": 91800 + }, + { + "epoch": 34.37, + "learning_rate": 4.4999999999999996e-05, + "loss": 1.0843, + "step": 91900 + }, + { + "epoch": 34.41, + "learning_rate": 4.4444444444444447e-05, + "loss": 1.0788, + "step": 92000 + }, + { + "epoch": 34.44, + "learning_rate": 4.388888888888889e-05, + "loss": 1.0852, + "step": 92100 + }, + { + "epoch": 34.48, + "learning_rate": 4.3333333333333334e-05, + "loss": 1.0842, + "step": 92200 + }, + { + "epoch": 34.52, + "learning_rate": 4.277777777777778e-05, + "loss": 1.0815, + "step": 92300 + }, + { + "epoch": 34.55, + "learning_rate": 4.222222222222222e-05, + "loss": 1.0848, + "step": 92400 + }, + { + "epoch": 34.59, + "learning_rate": 4.1666666666666665e-05, + "loss": 1.0835, + "step": 92500 + }, + { + "epoch": 34.63, + "learning_rate": 4.1111111111111116e-05, + "loss": 1.0838, + "step": 92600 + }, + { + "epoch": 34.67, + "learning_rate": 4.055555555555555e-05, + "loss": 1.0812, + "step": 92700 + }, + { + "epoch": 34.7, + "learning_rate": 4e-05, + "loss": 1.0794, + "step": 92800 + }, + { + "epoch": 34.74, + "learning_rate": 3.944444444444444e-05, + "loss": 1.0808, + "step": 92900 + }, + { + "epoch": 34.78, + "learning_rate": 3.888888888888889e-05, + "loss": 1.084, + "step": 93000 + }, + { + "epoch": 34.82, + "learning_rate": 3.8333333333333334e-05, + "loss": 1.0825, + "step": 93100 + }, + { + "epoch": 34.85, + "learning_rate": 3.777777777777778e-05, + "loss": 1.0788, + "step": 93200 + }, + { + "epoch": 34.89, + "learning_rate": 3.722222222222222e-05, + "loss": 1.0812, + "step": 93300 + }, + { + "epoch": 34.93, + "learning_rate": 3.6666666666666666e-05, + "loss": 1.0804, + "step": 93400 + }, + { + "epoch": 34.97, + "learning_rate": 3.611111111111111e-05, + "loss": 1.0809, + "step": 93500 + }, + { + "epoch": 35.0, + "learning_rate": 3.555555555555555e-05, + "loss": 1.0839, + "step": 93600 + }, + { + "epoch": 35.04, + "learning_rate": 3.5000000000000004e-05, + "loss": 1.0794, + "step": 93700 + }, + { + "epoch": 35.08, + "learning_rate": 3.444444444444445e-05, + "loss": 1.0783, + "step": 93800 + }, + { + "epoch": 35.12, + "learning_rate": 3.388888888888889e-05, + "loss": 1.0794, + "step": 93900 + }, + { + "epoch": 35.15, + "learning_rate": 3.3333333333333335e-05, + "loss": 1.0768, + "step": 94000 + }, + { + "epoch": 35.19, + "learning_rate": 3.277777777777778e-05, + "loss": 1.082, + "step": 94100 + }, + { + "epoch": 35.23, + "learning_rate": 3.222222222222222e-05, + "loss": 1.0793, + "step": 94200 + }, + { + "epoch": 35.27, + "learning_rate": 3.166666666666667e-05, + "loss": 1.0776, + "step": 94300 + }, + { + "epoch": 35.3, + "learning_rate": 3.111111111111111e-05, + "loss": 1.0769, + "step": 94400 + }, + { + "epoch": 35.34, + "learning_rate": 3.0555555555555554e-05, + "loss": 1.0783, + "step": 94500 + }, + { + "epoch": 35.38, + "learning_rate": 3e-05, + "loss": 1.0774, + "step": 94600 + }, + { + "epoch": 35.42, + "learning_rate": 2.9444444444444445e-05, + "loss": 1.0772, + "step": 94700 + }, + { + "epoch": 35.45, + "learning_rate": 2.8888888888888888e-05, + "loss": 1.0791, + "step": 94800 + }, + { + "epoch": 35.49, + "learning_rate": 2.8333333333333332e-05, + "loss": 1.0766, + "step": 94900 + }, + { + "epoch": 35.53, + "learning_rate": 2.7777777777777776e-05, + "loss": 1.0755, + "step": 95000 + }, + { + "epoch": 35.56, + "learning_rate": 2.722222222222222e-05, + "loss": 1.0744, + "step": 95100 + }, + { + "epoch": 35.6, + "learning_rate": 2.666666666666667e-05, + "loss": 1.0775, + "step": 95200 + }, + { + "epoch": 35.64, + "learning_rate": 2.6111111111111114e-05, + "loss": 1.0723, + "step": 95300 + }, + { + "epoch": 35.68, + "learning_rate": 2.5555555555555557e-05, + "loss": 1.0763, + "step": 95400 + }, + { + "epoch": 35.71, + "learning_rate": 2.5e-05, + "loss": 1.0765, + "step": 95500 + }, + { + "epoch": 35.75, + "learning_rate": 2.4444444444444445e-05, + "loss": 1.0735, + "step": 95600 + }, + { + "epoch": 35.79, + "learning_rate": 2.3888888888888892e-05, + "loss": 1.0765, + "step": 95700 + }, + { + "epoch": 35.83, + "learning_rate": 2.3333333333333336e-05, + "loss": 1.0743, + "step": 95800 + }, + { + "epoch": 35.86, + "learning_rate": 2.277777777777778e-05, + "loss": 1.0742, + "step": 95900 + }, + { + "epoch": 35.9, + "learning_rate": 2.2222222222222223e-05, + "loss": 1.0755, + "step": 96000 + }, + { + "epoch": 35.94, + "learning_rate": 2.1666666666666667e-05, + "loss": 1.0747, + "step": 96100 + }, + { + "epoch": 35.98, + "learning_rate": 2.111111111111111e-05, + "loss": 1.0753, + "step": 96200 + }, + { + "epoch": 36.01, + "learning_rate": 2.0555555555555558e-05, + "loss": 1.0784, + "step": 96300 + }, + { + "epoch": 36.05, + "learning_rate": 2e-05, + "loss": 1.0724, + "step": 96400 + }, + { + "epoch": 36.09, + "learning_rate": 1.9444444444444445e-05, + "loss": 1.0725, + "step": 96500 + }, + { + "epoch": 36.13, + "learning_rate": 1.888888888888889e-05, + "loss": 1.0723, + "step": 96600 + }, + { + "epoch": 36.16, + "learning_rate": 1.8333333333333333e-05, + "loss": 1.073, + "step": 96700 + }, + { + "epoch": 36.2, + "learning_rate": 1.7777777777777777e-05, + "loss": 1.0736, + "step": 96800 + }, + { + "epoch": 36.24, + "learning_rate": 1.7222222222222224e-05, + "loss": 1.0724, + "step": 96900 + }, + { + "epoch": 36.28, + "learning_rate": 1.6666666666666667e-05, + "loss": 1.075, + "step": 97000 + }, + { + "epoch": 36.31, + "learning_rate": 1.611111111111111e-05, + "loss": 1.0739, + "step": 97100 + }, + { + "epoch": 36.35, + "learning_rate": 1.5555555555555555e-05, + "loss": 1.0721, + "step": 97200 + }, + { + "epoch": 36.39, + "learning_rate": 1.5e-05, + "loss": 1.0707, + "step": 97300 + }, + { + "epoch": 36.42, + "learning_rate": 1.4444444444444444e-05, + "loss": 1.0714, + "step": 97400 + }, + { + "epoch": 36.46, + "learning_rate": 1.3888888888888888e-05, + "loss": 1.0712, + "step": 97500 + }, + { + "epoch": 36.5, + "learning_rate": 1.3333333333333335e-05, + "loss": 1.0741, + "step": 97600 + }, + { + "epoch": 36.54, + "learning_rate": 1.2777777777777779e-05, + "loss": 1.0715, + "step": 97700 + }, + { + "epoch": 36.57, + "learning_rate": 1.2222222222222222e-05, + "loss": 1.0717, + "step": 97800 + }, + { + "epoch": 36.61, + "learning_rate": 1.1666666666666668e-05, + "loss": 1.0703, + "step": 97900 + }, + { + "epoch": 36.65, + "learning_rate": 1.1111111111111112e-05, + "loss": 1.0683, + "step": 98000 + }, + { + "epoch": 36.69, + "learning_rate": 1.0555555555555555e-05, + "loss": 1.069, + "step": 98100 + }, + { + "epoch": 36.72, + "learning_rate": 1e-05, + "loss": 1.0751, + "step": 98200 + }, + { + "epoch": 36.76, + "learning_rate": 9.444444444444445e-06, + "loss": 1.0719, + "step": 98300 + }, + { + "epoch": 36.8, + "learning_rate": 8.888888888888888e-06, + "loss": 1.0701, + "step": 98400 + }, + { + "epoch": 36.84, + "learning_rate": 8.333333333333334e-06, + "loss": 1.0719, + "step": 98500 + }, + { + "epoch": 36.87, + "learning_rate": 7.777777777777777e-06, + "loss": 1.0692, + "step": 98600 + }, + { + "epoch": 36.91, + "learning_rate": 7.222222222222222e-06, + "loss": 1.0716, + "step": 98700 + }, + { + "epoch": 36.95, + "learning_rate": 6.6666666666666675e-06, + "loss": 1.0712, + "step": 98800 + }, + { + "epoch": 36.99, + "learning_rate": 6.111111111111111e-06, + "loss": 1.0695, + "step": 98900 + }, + { + "epoch": 37.02, + "learning_rate": 5.555555555555556e-06, + "loss": 1.0749, + "step": 99000 + }, + { + "epoch": 37.06, + "learning_rate": 5e-06, + "loss": 1.0682, + "step": 99100 + }, + { + "epoch": 37.1, + "learning_rate": 4.444444444444444e-06, + "loss": 1.0721, + "step": 99200 + }, + { + "epoch": 37.14, + "learning_rate": 3.888888888888889e-06, + "loss": 1.069, + "step": 99300 + }, + { + "epoch": 37.17, + "learning_rate": 3.3333333333333337e-06, + "loss": 1.0687, + "step": 99400 + }, + { + "epoch": 37.21, + "learning_rate": 2.777777777777778e-06, + "loss": 1.0688, + "step": 99500 + }, + { + "epoch": 37.25, + "learning_rate": 2.222222222222222e-06, + "loss": 1.0694, + "step": 99600 + }, + { + "epoch": 37.28, + "learning_rate": 1.6666666666666669e-06, + "loss": 1.0671, + "step": 99700 + }, + { + "epoch": 37.32, + "learning_rate": 1.111111111111111e-06, + "loss": 1.0672, + "step": 99800 + }, + { + "epoch": 37.36, + "learning_rate": 5.555555555555555e-07, + "loss": 1.0654, + "step": 99900 + }, + { + "epoch": 37.4, + "learning_rate": 0.0, + "loss": 1.0688, + "step": 100000 + }, + { + "epoch": 37.43, + "learning_rate": 0.00026289473684210524, + "loss": 1.1059, + "step": 100100 + }, + { + "epoch": 37.47, + "learning_rate": 0.00026263157894736844, + "loss": 1.1269, + "step": 100200 + }, + { + "epoch": 37.51, + "learning_rate": 0.0002623684210526316, + "loss": 1.1303, + "step": 100300 + }, + { + "epoch": 37.55, + "learning_rate": 0.0002621052631578947, + "loss": 1.1346, + "step": 100400 + }, + { + "epoch": 37.58, + "learning_rate": 0.00026184210526315787, + "loss": 1.1416, + "step": 100500 + }, + { + "epoch": 37.62, + "learning_rate": 0.00026157894736842107, + "loss": 1.1439, + "step": 100600 + }, + { + "epoch": 37.66, + "learning_rate": 0.0002613157894736842, + "loss": 1.1447, + "step": 100700 + }, + { + "epoch": 37.7, + "learning_rate": 0.00026105263157894735, + "loss": 1.1446, + "step": 100800 + }, + { + "epoch": 37.73, + "learning_rate": 0.0002607894736842105, + "loss": 1.1452, + "step": 100900 + }, + { + "epoch": 37.77, + "learning_rate": 0.0002605263157894737, + "loss": 1.1472, + "step": 101000 + }, + { + "epoch": 37.81, + "learning_rate": 0.0002602631578947369, + "loss": 1.149, + "step": 101100 + }, + { + "epoch": 37.85, + "learning_rate": 0.00026000000000000003, + "loss": 1.1538, + "step": 101200 + }, + { + "epoch": 37.88, + "learning_rate": 0.0002597368421052632, + "loss": 1.149, + "step": 101300 + }, + { + "epoch": 37.92, + "learning_rate": 0.0002594736842105263, + "loss": 1.1556, + "step": 101400 + }, + { + "epoch": 37.96, + "learning_rate": 0.00025921052631578946, + "loss": 1.1521, + "step": 101500 + }, + { + "epoch": 38.0, + "learning_rate": 0.00025894736842105266, + "loss": 1.1535, + "step": 101600 + }, + { + "epoch": 38.03, + "learning_rate": 0.0002586842105263158, + "loss": 1.1576, + "step": 101700 + }, + { + "epoch": 38.07, + "learning_rate": 0.00025842105263157894, + "loss": 1.1513, + "step": 101800 + }, + { + "epoch": 38.11, + "learning_rate": 0.0002581578947368421, + "loss": 1.1513, + "step": 101900 + }, + { + "epoch": 38.15, + "learning_rate": 0.0002578947368421053, + "loss": 1.1516, + "step": 102000 + }, + { + "epoch": 38.18, + "learning_rate": 0.0002576315789473684, + "loss": 1.152, + "step": 102100 + }, + { + "epoch": 38.22, + "learning_rate": 0.00025736842105263157, + "loss": 1.153, + "step": 102200 + }, + { + "epoch": 38.26, + "learning_rate": 0.0002571052631578947, + "loss": 1.1558, + "step": 102300 + }, + { + "epoch": 38.29, + "learning_rate": 0.00025684210526315786, + "loss": 1.1527, + "step": 102400 + }, + { + "epoch": 38.33, + "learning_rate": 0.00025657894736842105, + "loss": 1.1557, + "step": 102500 + }, + { + "epoch": 38.37, + "learning_rate": 0.00025631578947368425, + "loss": 1.1532, + "step": 102600 + }, + { + "epoch": 38.41, + "learning_rate": 0.0002560526315789474, + "loss": 1.1515, + "step": 102700 + }, + { + "epoch": 38.44, + "learning_rate": 0.00025578947368421054, + "loss": 1.1515, + "step": 102800 + }, + { + "epoch": 38.48, + "learning_rate": 0.0002555263157894737, + "loss": 1.156, + "step": 102900 + }, + { + "epoch": 38.52, + "learning_rate": 0.0002552631578947369, + "loss": 1.1539, + "step": 103000 + }, + { + "epoch": 38.56, + "learning_rate": 0.000255, + "loss": 1.151, + "step": 103100 + }, + { + "epoch": 38.59, + "learning_rate": 0.00025473684210526316, + "loss": 1.1533, + "step": 103200 + }, + { + "epoch": 38.63, + "learning_rate": 0.0002544736842105263, + "loss": 1.1538, + "step": 103300 + }, + { + "epoch": 38.67, + "learning_rate": 0.0002542105263157895, + "loss": 1.1567, + "step": 103400 + }, + { + "epoch": 38.71, + "learning_rate": 0.00025394736842105264, + "loss": 1.1575, + "step": 103500 + }, + { + "epoch": 38.74, + "learning_rate": 0.0002536842105263158, + "loss": 1.1549, + "step": 103600 + }, + { + "epoch": 38.78, + "learning_rate": 0.00025342105263157893, + "loss": 1.1582, + "step": 103700 + }, + { + "epoch": 38.82, + "learning_rate": 0.0002531578947368421, + "loss": 1.1554, + "step": 103800 + }, + { + "epoch": 38.86, + "learning_rate": 0.00025289473684210527, + "loss": 1.1547, + "step": 103900 + }, + { + "epoch": 38.89, + "learning_rate": 0.0002526315789473684, + "loss": 1.1584, + "step": 104000 + }, + { + "epoch": 38.93, + "learning_rate": 0.00025236842105263156, + "loss": 1.1595, + "step": 104100 + }, + { + "epoch": 38.97, + "learning_rate": 0.00025210526315789475, + "loss": 1.1589, + "step": 104200 + }, + { + "epoch": 39.01, + "learning_rate": 0.0002518421052631579, + "loss": 1.1614, + "step": 104300 + }, + { + "epoch": 39.04, + "learning_rate": 0.0002515789473684211, + "loss": 1.1519, + "step": 104400 + }, + { + "epoch": 39.08, + "learning_rate": 0.00025131578947368424, + "loss": 1.1525, + "step": 104500 + }, + { + "epoch": 39.12, + "learning_rate": 0.0002510526315789474, + "loss": 1.1524, + "step": 104600 + }, + { + "epoch": 39.15, + "learning_rate": 0.0002507894736842105, + "loss": 1.1555, + "step": 104700 + }, + { + "epoch": 39.19, + "learning_rate": 0.0002505263157894737, + "loss": 1.1548, + "step": 104800 + }, + { + "epoch": 39.23, + "learning_rate": 0.00025026315789473686, + "loss": 1.1541, + "step": 104900 + }, + { + "epoch": 39.27, + "learning_rate": 0.00025, + "loss": 1.1527, + "step": 105000 + }, + { + "epoch": 39.3, + "learning_rate": 0.00024973684210526315, + "loss": 1.154, + "step": 105100 + }, + { + "epoch": 39.34, + "learning_rate": 0.00024947368421052635, + "loss": 1.1538, + "step": 105200 + }, + { + "epoch": 39.38, + "learning_rate": 0.0002492105263157895, + "loss": 1.154, + "step": 105300 + }, + { + "epoch": 39.42, + "learning_rate": 0.00024894736842105263, + "loss": 1.1523, + "step": 105400 + }, + { + "epoch": 39.45, + "learning_rate": 0.0002486842105263158, + "loss": 1.1553, + "step": 105500 + }, + { + "epoch": 39.49, + "learning_rate": 0.00024842105263157897, + "loss": 1.1565, + "step": 105600 + }, + { + "epoch": 39.53, + "learning_rate": 0.0002481578947368421, + "loss": 1.1535, + "step": 105700 + }, + { + "epoch": 39.57, + "learning_rate": 0.00024789473684210526, + "loss": 1.1564, + "step": 105800 + }, + { + "epoch": 39.6, + "learning_rate": 0.00024763157894736845, + "loss": 1.1558, + "step": 105900 + }, + { + "epoch": 39.64, + "learning_rate": 0.0002473684210526316, + "loss": 1.1538, + "step": 106000 + }, + { + "epoch": 39.68, + "learning_rate": 0.00024710526315789474, + "loss": 1.1571, + "step": 106100 + }, + { + "epoch": 39.72, + "learning_rate": 0.0002468421052631579, + "loss": 1.1573, + "step": 106200 + }, + { + "epoch": 39.75, + "learning_rate": 0.0002465789473684211, + "loss": 1.1535, + "step": 106300 + }, + { + "epoch": 39.79, + "learning_rate": 0.0002463157894736842, + "loss": 1.1542, + "step": 106400 + }, + { + "epoch": 39.83, + "learning_rate": 0.00024605263157894737, + "loss": 1.1519, + "step": 106500 + }, + { + "epoch": 39.87, + "learning_rate": 0.0002457894736842105, + "loss": 1.1534, + "step": 106600 + }, + { + "epoch": 39.9, + "learning_rate": 0.0002455263157894737, + "loss": 1.1508, + "step": 106700 + }, + { + "epoch": 39.94, + "learning_rate": 0.00024526315789473685, + "loss": 1.1543, + "step": 106800 + }, + { + "epoch": 39.98, + "learning_rate": 0.000245, + "loss": 1.1545, + "step": 106900 + }, + { + "epoch": 40.01, + "learning_rate": 0.0002447368421052632, + "loss": 1.1544, + "step": 107000 + }, + { + "epoch": 40.05, + "learning_rate": 0.00024447368421052633, + "loss": 1.1492, + "step": 107100 + }, + { + "epoch": 40.09, + "learning_rate": 0.0002442105263157895, + "loss": 1.152, + "step": 107200 + }, + { + "epoch": 40.13, + "learning_rate": 0.00024394736842105265, + "loss": 1.1503, + "step": 107300 + }, + { + "epoch": 40.16, + "learning_rate": 0.0002436842105263158, + "loss": 1.1506, + "step": 107400 + }, + { + "epoch": 40.2, + "learning_rate": 0.00024342105263157896, + "loss": 1.1509, + "step": 107500 + }, + { + "epoch": 40.24, + "learning_rate": 0.0002431578947368421, + "loss": 1.1511, + "step": 107600 + }, + { + "epoch": 40.28, + "learning_rate": 0.00024289473684210527, + "loss": 1.1493, + "step": 107700 + }, + { + "epoch": 40.31, + "learning_rate": 0.00024263157894736841, + "loss": 1.151, + "step": 107800 + }, + { + "epoch": 40.35, + "learning_rate": 0.00024236842105263158, + "loss": 1.1537, + "step": 107900 + }, + { + "epoch": 40.39, + "learning_rate": 0.00024210526315789475, + "loss": 1.1519, + "step": 108000 + }, + { + "epoch": 40.43, + "learning_rate": 0.0002418421052631579, + "loss": 1.1534, + "step": 108100 + }, + { + "epoch": 40.46, + "learning_rate": 0.00024157894736842107, + "loss": 1.1511, + "step": 108200 + }, + { + "epoch": 40.5, + "learning_rate": 0.0002413157894736842, + "loss": 1.1546, + "step": 108300 + }, + { + "epoch": 40.54, + "learning_rate": 0.00024105263157894738, + "loss": 1.1518, + "step": 108400 + }, + { + "epoch": 40.58, + "learning_rate": 0.00024078947368421052, + "loss": 1.1538, + "step": 108500 + }, + { + "epoch": 40.61, + "learning_rate": 0.00024052631578947367, + "loss": 1.1513, + "step": 108600 + }, + { + "epoch": 40.65, + "learning_rate": 0.00024026315789473686, + "loss": 1.1466, + "step": 108700 + }, + { + "epoch": 40.69, + "learning_rate": 0.00024, + "loss": 1.151, + "step": 108800 + }, + { + "epoch": 40.73, + "learning_rate": 0.00023973684210526318, + "loss": 1.1535, + "step": 108900 + }, + { + "epoch": 40.76, + "learning_rate": 0.00023947368421052632, + "loss": 1.1516, + "step": 109000 + }, + { + "epoch": 40.8, + "learning_rate": 0.0002392105263157895, + "loss": 1.1516, + "step": 109100 + }, + { + "epoch": 40.84, + "learning_rate": 0.00023894736842105263, + "loss": 1.1523, + "step": 109200 + }, + { + "epoch": 40.87, + "learning_rate": 0.00023868421052631577, + "loss": 1.1517, + "step": 109300 + }, + { + "epoch": 40.91, + "learning_rate": 0.00023842105263157895, + "loss": 1.1508, + "step": 109400 + }, + { + "epoch": 40.95, + "learning_rate": 0.0002381578947368421, + "loss": 1.1529, + "step": 109500 + }, + { + "epoch": 40.99, + "learning_rate": 0.00023789473684210529, + "loss": 1.1531, + "step": 109600 + }, + { + "epoch": 41.02, + "learning_rate": 0.00023763157894736843, + "loss": 1.1527, + "step": 109700 + }, + { + "epoch": 41.06, + "learning_rate": 0.0002373684210526316, + "loss": 1.1456, + "step": 109800 + }, + { + "epoch": 41.1, + "learning_rate": 0.00023710526315789474, + "loss": 1.1491, + "step": 109900 + }, + { + "epoch": 41.14, + "learning_rate": 0.00023684210526315788, + "loss": 1.1463, + "step": 110000 + }, + { + "epoch": 41.17, + "learning_rate": 0.00023657894736842105, + "loss": 1.1486, + "step": 110100 + }, + { + "epoch": 41.21, + "learning_rate": 0.0002363157894736842, + "loss": 1.1481, + "step": 110200 + }, + { + "epoch": 41.25, + "learning_rate": 0.00023605263157894737, + "loss": 1.1483, + "step": 110300 + }, + { + "epoch": 41.29, + "learning_rate": 0.00023578947368421054, + "loss": 1.1478, + "step": 110400 + }, + { + "epoch": 41.32, + "learning_rate": 0.0002355263157894737, + "loss": 1.1514, + "step": 110500 + }, + { + "epoch": 41.36, + "learning_rate": 0.00023526315789473685, + "loss": 1.1476, + "step": 110600 + }, + { + "epoch": 41.4, + "learning_rate": 0.000235, + "loss": 1.1474, + "step": 110700 + }, + { + "epoch": 41.44, + "learning_rate": 0.00023473684210526316, + "loss": 1.1475, + "step": 110800 + }, + { + "epoch": 41.47, + "learning_rate": 0.0002344736842105263, + "loss": 1.1456, + "step": 110900 + }, + { + "epoch": 41.51, + "learning_rate": 0.00023421052631578948, + "loss": 1.1478, + "step": 111000 + }, + { + "epoch": 41.55, + "learning_rate": 0.00023394736842105262, + "loss": 1.1482, + "step": 111100 + }, + { + "epoch": 41.59, + "learning_rate": 0.00023368421052631582, + "loss": 1.1426, + "step": 111200 + }, + { + "epoch": 41.62, + "learning_rate": 0.00023342105263157896, + "loss": 1.1472, + "step": 111300 + }, + { + "epoch": 41.66, + "learning_rate": 0.0002331578947368421, + "loss": 1.1486, + "step": 111400 + }, + { + "epoch": 41.7, + "learning_rate": 0.00023289473684210527, + "loss": 1.1452, + "step": 111500 + }, + { + "epoch": 41.74, + "learning_rate": 0.00023263157894736841, + "loss": 1.1501, + "step": 111600 + }, + { + "epoch": 41.77, + "learning_rate": 0.00023236842105263158, + "loss": 1.1473, + "step": 111700 + }, + { + "epoch": 41.81, + "learning_rate": 0.00023210526315789473, + "loss": 1.1454, + "step": 111800 + }, + { + "epoch": 41.85, + "learning_rate": 0.0002318421052631579, + "loss": 1.1464, + "step": 111900 + }, + { + "epoch": 41.88, + "learning_rate": 0.00023157894736842107, + "loss": 1.1466, + "step": 112000 + }, + { + "epoch": 41.92, + "learning_rate": 0.0002313157894736842, + "loss": 1.1459, + "step": 112100 + }, + { + "epoch": 41.96, + "learning_rate": 0.00023105263157894738, + "loss": 1.1454, + "step": 112200 + }, + { + "epoch": 42.0, + "learning_rate": 0.00023078947368421052, + "loss": 1.1471, + "step": 112300 + }, + { + "epoch": 42.03, + "learning_rate": 0.0002305263157894737, + "loss": 1.1486, + "step": 112400 + }, + { + "epoch": 42.07, + "learning_rate": 0.00023026315789473684, + "loss": 1.143, + "step": 112500 + }, + { + "epoch": 42.11, + "learning_rate": 0.00023, + "loss": 1.1375, + "step": 112600 + }, + { + "epoch": 42.15, + "learning_rate": 0.00022973684210526315, + "loss": 1.1434, + "step": 112700 + }, + { + "epoch": 42.18, + "learning_rate": 0.00022947368421052632, + "loss": 1.1454, + "step": 112800 + }, + { + "epoch": 42.22, + "learning_rate": 0.0002292105263157895, + "loss": 1.1425, + "step": 112900 + }, + { + "epoch": 42.26, + "learning_rate": 0.00022894736842105263, + "loss": 1.1455, + "step": 113000 + }, + { + "epoch": 42.3, + "learning_rate": 0.0002286842105263158, + "loss": 1.1413, + "step": 113100 + }, + { + "epoch": 42.33, + "learning_rate": 0.00022842105263157895, + "loss": 1.141, + "step": 113200 + }, + { + "epoch": 42.37, + "learning_rate": 0.00022815789473684212, + "loss": 1.1455, + "step": 113300 + }, + { + "epoch": 42.41, + "learning_rate": 0.00022789473684210526, + "loss": 1.1436, + "step": 113400 + }, + { + "epoch": 42.45, + "learning_rate": 0.00022763157894736843, + "loss": 1.1412, + "step": 113500 + }, + { + "epoch": 42.48, + "learning_rate": 0.0002273684210526316, + "loss": 1.144, + "step": 113600 + }, + { + "epoch": 42.52, + "learning_rate": 0.00022710526315789474, + "loss": 1.1434, + "step": 113700 + }, + { + "epoch": 42.56, + "learning_rate": 0.0002268421052631579, + "loss": 1.1438, + "step": 113800 + }, + { + "epoch": 42.6, + "learning_rate": 0.00022657894736842105, + "loss": 1.1451, + "step": 113900 + }, + { + "epoch": 42.63, + "learning_rate": 0.00022631578947368422, + "loss": 1.1433, + "step": 114000 + }, + { + "epoch": 42.67, + "learning_rate": 0.00022605263157894737, + "loss": 1.1418, + "step": 114100 + }, + { + "epoch": 42.71, + "learning_rate": 0.00022578947368421054, + "loss": 1.1429, + "step": 114200 + }, + { + "epoch": 42.74, + "learning_rate": 0.00022552631578947368, + "loss": 1.1471, + "step": 114300 + }, + { + "epoch": 42.78, + "learning_rate": 0.00022526315789473682, + "loss": 1.1397, + "step": 114400 + }, + { + "epoch": 42.82, + "learning_rate": 0.00022500000000000002, + "loss": 1.1401, + "step": 114500 + }, + { + "epoch": 42.86, + "learning_rate": 0.00022473684210526316, + "loss": 1.1405, + "step": 114600 + }, + { + "epoch": 42.89, + "learning_rate": 0.00022447368421052633, + "loss": 1.1435, + "step": 114700 + }, + { + "epoch": 42.93, + "learning_rate": 0.00022421052631578948, + "loss": 1.1395, + "step": 114800 + }, + { + "epoch": 42.97, + "learning_rate": 0.00022394736842105265, + "loss": 1.1414, + "step": 114900 + }, + { + "epoch": 43.01, + "learning_rate": 0.0002236842105263158, + "loss": 1.1457, + "step": 115000 + }, + { + "epoch": 43.04, + "learning_rate": 0.00022342105263157893, + "loss": 1.1396, + "step": 115100 + }, + { + "epoch": 43.08, + "learning_rate": 0.0002231578947368421, + "loss": 1.1339, + "step": 115200 + }, + { + "epoch": 43.12, + "learning_rate": 0.00022289473684210527, + "loss": 1.1366, + "step": 115300 + }, + { + "epoch": 43.16, + "learning_rate": 0.00022263157894736844, + "loss": 1.142, + "step": 115400 + }, + { + "epoch": 43.19, + "learning_rate": 0.00022236842105263159, + "loss": 1.138, + "step": 115500 + }, + { + "epoch": 43.23, + "learning_rate": 0.00022210526315789476, + "loss": 1.1372, + "step": 115600 + }, + { + "epoch": 43.27, + "learning_rate": 0.0002218421052631579, + "loss": 1.1377, + "step": 115700 + }, + { + "epoch": 43.31, + "learning_rate": 0.00022157894736842104, + "loss": 1.1382, + "step": 115800 + }, + { + "epoch": 43.34, + "learning_rate": 0.0002213157894736842, + "loss": 1.1375, + "step": 115900 + }, + { + "epoch": 43.38, + "learning_rate": 0.00022105263157894735, + "loss": 1.1372, + "step": 116000 + }, + { + "epoch": 43.42, + "learning_rate": 0.00022078947368421055, + "loss": 1.1373, + "step": 116100 + }, + { + "epoch": 43.46, + "learning_rate": 0.0002205263157894737, + "loss": 1.1357, + "step": 116200 + }, + { + "epoch": 43.49, + "learning_rate": 0.00022026315789473686, + "loss": 1.1357, + "step": 116300 + }, + { + "epoch": 43.53, + "learning_rate": 0.00022, + "loss": 1.1406, + "step": 116400 + }, + { + "epoch": 43.57, + "learning_rate": 0.00021973684210526315, + "loss": 1.1385, + "step": 116500 + }, + { + "epoch": 43.6, + "learning_rate": 0.00021947368421052632, + "loss": 1.1415, + "step": 116600 + }, + { + "epoch": 43.64, + "learning_rate": 0.00021921052631578946, + "loss": 1.1387, + "step": 116700 + }, + { + "epoch": 43.68, + "learning_rate": 0.00021894736842105263, + "loss": 1.1385, + "step": 116800 + }, + { + "epoch": 43.72, + "learning_rate": 0.0002186842105263158, + "loss": 1.1372, + "step": 116900 + }, + { + "epoch": 43.75, + "learning_rate": 0.00021842105263157897, + "loss": 1.1343, + "step": 117000 + }, + { + "epoch": 43.79, + "learning_rate": 0.00021815789473684212, + "loss": 1.1374, + "step": 117100 + }, + { + "epoch": 43.83, + "learning_rate": 0.00021789473684210526, + "loss": 1.1398, + "step": 117200 + }, + { + "epoch": 43.87, + "learning_rate": 0.00021763157894736843, + "loss": 1.1385, + "step": 117300 + }, + { + "epoch": 43.9, + "learning_rate": 0.00021736842105263157, + "loss": 1.1394, + "step": 117400 + }, + { + "epoch": 43.94, + "learning_rate": 0.00021710526315789474, + "loss": 1.1401, + "step": 117500 + }, + { + "epoch": 43.98, + "learning_rate": 0.00021684210526315789, + "loss": 1.1371, + "step": 117600 + }, + { + "epoch": 44.02, + "learning_rate": 0.00021657894736842108, + "loss": 1.1424, + "step": 117700 + }, + { + "epoch": 44.05, + "learning_rate": 0.00021631578947368423, + "loss": 1.1344, + "step": 117800 + }, + { + "epoch": 44.09, + "learning_rate": 0.00021605263157894737, + "loss": 1.1324, + "step": 117900 + }, + { + "epoch": 44.13, + "learning_rate": 0.00021578947368421054, + "loss": 1.1361, + "step": 118000 + }, + { + "epoch": 44.17, + "learning_rate": 0.00021552631578947368, + "loss": 1.1341, + "step": 118100 + }, + { + "epoch": 44.2, + "learning_rate": 0.00021526315789473685, + "loss": 1.1312, + "step": 118200 + }, + { + "epoch": 44.24, + "learning_rate": 0.000215, + "loss": 1.1307, + "step": 118300 + }, + { + "epoch": 44.28, + "learning_rate": 0.00021473684210526316, + "loss": 1.1305, + "step": 118400 + }, + { + "epoch": 44.32, + "learning_rate": 0.0002144736842105263, + "loss": 1.134, + "step": 118500 + }, + { + "epoch": 44.35, + "learning_rate": 0.00021421052631578948, + "loss": 1.1339, + "step": 118600 + }, + { + "epoch": 44.39, + "learning_rate": 0.00021394736842105265, + "loss": 1.1359, + "step": 118700 + }, + { + "epoch": 44.43, + "learning_rate": 0.0002136842105263158, + "loss": 1.1354, + "step": 118800 + }, + { + "epoch": 44.47, + "learning_rate": 0.00021342105263157896, + "loss": 1.1349, + "step": 118900 + }, + { + "epoch": 44.5, + "learning_rate": 0.0002131578947368421, + "loss": 1.1335, + "step": 119000 + }, + { + "epoch": 44.54, + "learning_rate": 0.00021289473684210527, + "loss": 1.1335, + "step": 119100 + }, + { + "epoch": 44.58, + "learning_rate": 0.00021263157894736842, + "loss": 1.1416, + "step": 119200 + }, + { + "epoch": 44.61, + "learning_rate": 0.00021236842105263156, + "loss": 1.1344, + "step": 119300 + }, + { + "epoch": 44.65, + "learning_rate": 0.00021210526315789476, + "loss": 1.1337, + "step": 119400 + }, + { + "epoch": 44.69, + "learning_rate": 0.0002118421052631579, + "loss": 1.1299, + "step": 119500 + }, + { + "epoch": 44.73, + "learning_rate": 0.00021157894736842107, + "loss": 1.1337, + "step": 119600 + }, + { + "epoch": 44.76, + "learning_rate": 0.0002113157894736842, + "loss": 1.1338, + "step": 119700 + }, + { + "epoch": 44.8, + "learning_rate": 0.00021105263157894738, + "loss": 1.1312, + "step": 119800 + }, + { + "epoch": 44.84, + "learning_rate": 0.00021078947368421053, + "loss": 1.1329, + "step": 119900 + }, + { + "epoch": 44.88, + "learning_rate": 0.00021052631578947367, + "loss": 1.1367, + "step": 120000 + }, + { + "epoch": 44.91, + "learning_rate": 0.00021026315789473684, + "loss": 1.1346, + "step": 120100 + }, + { + "epoch": 44.95, + "learning_rate": 0.00021, + "loss": 1.1327, + "step": 120200 + }, + { + "epoch": 44.99, + "learning_rate": 0.00020973684210526318, + "loss": 1.1302, + "step": 120300 + }, + { + "epoch": 45.03, + "learning_rate": 0.00020947368421052632, + "loss": 1.1346, + "step": 120400 + }, + { + "epoch": 45.06, + "learning_rate": 0.0002092105263157895, + "loss": 1.1296, + "step": 120500 + }, + { + "epoch": 45.1, + "learning_rate": 0.00020894736842105263, + "loss": 1.1282, + "step": 120600 + }, + { + "epoch": 45.14, + "learning_rate": 0.00020868421052631578, + "loss": 1.1292, + "step": 120700 + }, + { + "epoch": 45.18, + "learning_rate": 0.00020842105263157895, + "loss": 1.1305, + "step": 120800 + }, + { + "epoch": 45.21, + "learning_rate": 0.0002081578947368421, + "loss": 1.1277, + "step": 120900 + }, + { + "epoch": 45.25, + "learning_rate": 0.0002078947368421053, + "loss": 1.1272, + "step": 121000 + }, + { + "epoch": 45.29, + "learning_rate": 0.00020763157894736843, + "loss": 1.1322, + "step": 121100 + }, + { + "epoch": 45.33, + "learning_rate": 0.0002073684210526316, + "loss": 1.1284, + "step": 121200 + }, + { + "epoch": 45.36, + "learning_rate": 0.00020710526315789474, + "loss": 1.1294, + "step": 121300 + }, + { + "epoch": 45.4, + "learning_rate": 0.0002068421052631579, + "loss": 1.1273, + "step": 121400 + }, + { + "epoch": 45.44, + "learning_rate": 0.00020657894736842106, + "loss": 1.1271, + "step": 121500 + }, + { + "epoch": 45.47, + "learning_rate": 0.0002063157894736842, + "loss": 1.1282, + "step": 121600 + }, + { + "epoch": 45.51, + "learning_rate": 0.00020605263157894737, + "loss": 1.1299, + "step": 121700 + }, + { + "epoch": 45.55, + "learning_rate": 0.00020578947368421054, + "loss": 1.1283, + "step": 121800 + }, + { + "epoch": 45.59, + "learning_rate": 0.0002055263157894737, + "loss": 1.1275, + "step": 121900 + }, + { + "epoch": 45.62, + "learning_rate": 0.00020526315789473685, + "loss": 1.1315, + "step": 122000 + }, + { + "epoch": 45.66, + "learning_rate": 0.000205, + "loss": 1.1278, + "step": 122100 + }, + { + "epoch": 45.7, + "learning_rate": 0.00020473684210526317, + "loss": 1.1295, + "step": 122200 + }, + { + "epoch": 45.74, + "learning_rate": 0.0002044736842105263, + "loss": 1.1314, + "step": 122300 + }, + { + "epoch": 45.77, + "learning_rate": 0.00020421052631578948, + "loss": 1.128, + "step": 122400 + }, + { + "epoch": 45.81, + "learning_rate": 0.00020394736842105262, + "loss": 1.1263, + "step": 122500 + }, + { + "epoch": 45.85, + "learning_rate": 0.00020368421052631582, + "loss": 1.1234, + "step": 122600 + }, + { + "epoch": 45.89, + "learning_rate": 0.00020342105263157896, + "loss": 1.128, + "step": 122700 + }, + { + "epoch": 45.92, + "learning_rate": 0.0002031578947368421, + "loss": 1.1291, + "step": 122800 + }, + { + "epoch": 45.96, + "learning_rate": 0.00020289473684210527, + "loss": 1.1292, + "step": 122900 + }, + { + "epoch": 46.0, + "learning_rate": 0.00020263157894736842, + "loss": 1.1246, + "step": 123000 + }, + { + "epoch": 46.04, + "learning_rate": 0.0002023684210526316, + "loss": 1.1327, + "step": 123100 + }, + { + "epoch": 46.07, + "learning_rate": 0.00020210526315789473, + "loss": 1.1204, + "step": 123200 + }, + { + "epoch": 46.11, + "learning_rate": 0.0002018421052631579, + "loss": 1.1275, + "step": 123300 + }, + { + "epoch": 46.15, + "learning_rate": 0.00020157894736842104, + "loss": 1.1235, + "step": 123400 + }, + { + "epoch": 46.19, + "learning_rate": 0.00020131578947368421, + "loss": 1.1244, + "step": 123500 + }, + { + "epoch": 46.22, + "learning_rate": 0.00020105263157894738, + "loss": 1.1239, + "step": 123600 + }, + { + "epoch": 46.26, + "learning_rate": 0.00020078947368421053, + "loss": 1.1252, + "step": 123700 + }, + { + "epoch": 46.3, + "learning_rate": 0.0002005263157894737, + "loss": 1.1249, + "step": 123800 + }, + { + "epoch": 46.34, + "learning_rate": 0.00020026315789473684, + "loss": 1.1221, + "step": 123900 + }, + { + "epoch": 46.37, + "learning_rate": 0.0002, + "loss": 1.1267, + "step": 124000 + }, + { + "epoch": 46.41, + "learning_rate": 0.00019973684210526315, + "loss": 1.1246, + "step": 124100 + }, + { + "epoch": 46.45, + "learning_rate": 0.0001994736842105263, + "loss": 1.1262, + "step": 124200 + }, + { + "epoch": 46.48, + "learning_rate": 0.0001992105263157895, + "loss": 1.1257, + "step": 124300 + }, + { + "epoch": 46.52, + "learning_rate": 0.00019894736842105264, + "loss": 1.1236, + "step": 124400 + }, + { + "epoch": 46.56, + "learning_rate": 0.0001986842105263158, + "loss": 1.1255, + "step": 124500 + }, + { + "epoch": 46.6, + "learning_rate": 0.00019842105263157895, + "loss": 1.1282, + "step": 124600 + }, + { + "epoch": 46.63, + "learning_rate": 0.00019815789473684212, + "loss": 1.1227, + "step": 124700 + }, + { + "epoch": 46.67, + "learning_rate": 0.00019789473684210526, + "loss": 1.1252, + "step": 124800 + }, + { + "epoch": 46.71, + "learning_rate": 0.0001976315789473684, + "loss": 1.1229, + "step": 124900 + }, + { + "epoch": 46.75, + "learning_rate": 0.00019736842105263157, + "loss": 1.1237, + "step": 125000 + }, + { + "epoch": 46.78, + "learning_rate": 0.00019710526315789474, + "loss": 1.1217, + "step": 125100 + }, + { + "epoch": 46.82, + "learning_rate": 0.00019684210526315791, + "loss": 1.1219, + "step": 125200 + }, + { + "epoch": 46.86, + "learning_rate": 0.00019657894736842106, + "loss": 1.1234, + "step": 125300 + }, + { + "epoch": 46.9, + "learning_rate": 0.00019631578947368423, + "loss": 1.1233, + "step": 125400 + }, + { + "epoch": 46.93, + "learning_rate": 0.00019605263157894737, + "loss": 1.1198, + "step": 125500 + }, + { + "epoch": 46.97, + "learning_rate": 0.0001957894736842105, + "loss": 1.1238, + "step": 125600 + }, + { + "epoch": 47.01, + "learning_rate": 0.00019552631578947368, + "loss": 1.1341, + "step": 125700 + }, + { + "epoch": 47.05, + "learning_rate": 0.00019526315789473683, + "loss": 1.1211, + "step": 125800 + }, + { + "epoch": 47.08, + "learning_rate": 0.00019500000000000002, + "loss": 1.1213, + "step": 125900 + }, + { + "epoch": 47.12, + "learning_rate": 0.00019473684210526317, + "loss": 1.1205, + "step": 126000 + }, + { + "epoch": 47.16, + "learning_rate": 0.00019447368421052634, + "loss": 1.1181, + "step": 126100 + }, + { + "epoch": 47.2, + "learning_rate": 0.00019421052631578948, + "loss": 1.118, + "step": 126200 + }, + { + "epoch": 47.23, + "learning_rate": 0.00019394736842105262, + "loss": 1.1193, + "step": 126300 + }, + { + "epoch": 47.27, + "learning_rate": 0.0001936842105263158, + "loss": 1.1216, + "step": 126400 + }, + { + "epoch": 47.31, + "learning_rate": 0.00019342105263157894, + "loss": 1.1221, + "step": 126500 + }, + { + "epoch": 47.34, + "learning_rate": 0.0001931578947368421, + "loss": 1.1226, + "step": 126600 + }, + { + "epoch": 47.38, + "learning_rate": 0.00019289473684210525, + "loss": 1.1211, + "step": 126700 + }, + { + "epoch": 47.42, + "learning_rate": 0.00019263157894736845, + "loss": 1.1194, + "step": 126800 + }, + { + "epoch": 47.46, + "learning_rate": 0.0001923684210526316, + "loss": 1.1192, + "step": 126900 + }, + { + "epoch": 47.49, + "learning_rate": 0.00019210526315789473, + "loss": 1.1191, + "step": 127000 + }, + { + "epoch": 47.53, + "learning_rate": 0.0001918421052631579, + "loss": 1.1181, + "step": 127100 + }, + { + "epoch": 47.57, + "learning_rate": 0.00019157894736842104, + "loss": 1.1217, + "step": 127200 + }, + { + "epoch": 47.61, + "learning_rate": 0.00019131578947368421, + "loss": 1.1175, + "step": 127300 + }, + { + "epoch": 47.64, + "learning_rate": 0.00019105263157894736, + "loss": 1.1186, + "step": 127400 + }, + { + "epoch": 47.68, + "learning_rate": 0.00019078947368421053, + "loss": 1.1204, + "step": 127500 + }, + { + "epoch": 47.72, + "learning_rate": 0.0001905263157894737, + "loss": 1.1218, + "step": 127600 + }, + { + "epoch": 47.76, + "learning_rate": 0.00019026315789473684, + "loss": 1.1205, + "step": 127700 + }, + { + "epoch": 47.79, + "learning_rate": 0.00019, + "loss": 1.1204, + "step": 127800 + }, + { + "epoch": 47.83, + "learning_rate": 0.00018973684210526315, + "loss": 1.1189, + "step": 127900 + }, + { + "epoch": 47.87, + "learning_rate": 0.00018947368421052632, + "loss": 1.1177, + "step": 128000 + }, + { + "epoch": 47.91, + "learning_rate": 0.00018921052631578947, + "loss": 1.1215, + "step": 128100 + }, + { + "epoch": 47.94, + "learning_rate": 0.00018894736842105264, + "loss": 1.1202, + "step": 128200 + }, + { + "epoch": 47.98, + "learning_rate": 0.00018868421052631578, + "loss": 1.1197, + "step": 128300 + }, + { + "epoch": 48.02, + "learning_rate": 0.00018842105263157895, + "loss": 1.1234, + "step": 128400 + }, + { + "epoch": 48.06, + "learning_rate": 0.00018815789473684212, + "loss": 1.1161, + "step": 128500 + }, + { + "epoch": 48.09, + "learning_rate": 0.00018789473684210526, + "loss": 1.1169, + "step": 128600 + }, + { + "epoch": 48.13, + "learning_rate": 0.00018763157894736843, + "loss": 1.1157, + "step": 128700 + }, + { + "epoch": 48.17, + "learning_rate": 0.00018736842105263158, + "loss": 1.114, + "step": 128800 + }, + { + "epoch": 48.2, + "learning_rate": 0.00018710526315789475, + "loss": 1.1164, + "step": 128900 + }, + { + "epoch": 48.24, + "learning_rate": 0.0001868421052631579, + "loss": 1.1126, + "step": 129000 + }, + { + "epoch": 48.28, + "learning_rate": 0.00018657894736842106, + "loss": 1.1171, + "step": 129100 + }, + { + "epoch": 48.32, + "learning_rate": 0.00018631578947368423, + "loss": 1.1181, + "step": 129200 + }, + { + "epoch": 48.35, + "learning_rate": 0.00018605263157894737, + "loss": 1.1174, + "step": 129300 + }, + { + "epoch": 48.39, + "learning_rate": 0.00018578947368421054, + "loss": 1.1141, + "step": 129400 + }, + { + "epoch": 48.43, + "learning_rate": 0.00018552631578947368, + "loss": 1.1118, + "step": 129500 + }, + { + "epoch": 48.47, + "learning_rate": 0.00018526315789473685, + "loss": 1.1152, + "step": 129600 + }, + { + "epoch": 48.5, + "learning_rate": 0.000185, + "loss": 1.1153, + "step": 129700 + }, + { + "epoch": 48.54, + "learning_rate": 0.00018473684210526317, + "loss": 1.1143, + "step": 129800 + }, + { + "epoch": 48.58, + "learning_rate": 0.0001844736842105263, + "loss": 1.1117, + "step": 129900 + }, + { + "epoch": 48.62, + "learning_rate": 0.00018421052631578948, + "loss": 1.1195, + "step": 130000 + }, + { + "epoch": 48.65, + "learning_rate": 0.00018394736842105265, + "loss": 1.1153, + "step": 130100 + }, + { + "epoch": 48.69, + "learning_rate": 0.0001836842105263158, + "loss": 1.1142, + "step": 130200 + }, + { + "epoch": 48.73, + "learning_rate": 0.00018342105263157896, + "loss": 1.1124, + "step": 130300 + }, + { + "epoch": 48.77, + "learning_rate": 0.0001831578947368421, + "loss": 1.118, + "step": 130400 + }, + { + "epoch": 48.8, + "learning_rate": 0.00018289473684210528, + "loss": 1.1149, + "step": 130500 + }, + { + "epoch": 48.84, + "learning_rate": 0.00018263157894736842, + "loss": 1.1127, + "step": 130600 + }, + { + "epoch": 48.88, + "learning_rate": 0.00018236842105263156, + "loss": 1.1138, + "step": 130700 + }, + { + "epoch": 48.92, + "learning_rate": 0.00018210526315789476, + "loss": 1.118, + "step": 130800 + }, + { + "epoch": 48.95, + "learning_rate": 0.0001818421052631579, + "loss": 1.1125, + "step": 130900 + }, + { + "epoch": 48.99, + "learning_rate": 0.00018157894736842107, + "loss": 1.1168, + "step": 131000 + }, + { + "epoch": 49.03, + "learning_rate": 0.00018131578947368422, + "loss": 1.1163, + "step": 131100 + }, + { + "epoch": 49.07, + "learning_rate": 0.00018105263157894739, + "loss": 1.1119, + "step": 131200 + }, + { + "epoch": 49.1, + "learning_rate": 0.00018078947368421053, + "loss": 1.1087, + "step": 131300 + }, + { + "epoch": 49.14, + "learning_rate": 0.00018052631578947367, + "loss": 1.1065, + "step": 131400 + }, + { + "epoch": 49.18, + "learning_rate": 0.00018026315789473684, + "loss": 1.108, + "step": 131500 + }, + { + "epoch": 49.21, + "learning_rate": 0.00017999999999999998, + "loss": 1.1118, + "step": 131600 + }, + { + "epoch": 49.25, + "learning_rate": 0.00017973684210526318, + "loss": 1.109, + "step": 131700 + }, + { + "epoch": 49.29, + "learning_rate": 0.00017947368421052632, + "loss": 1.1123, + "step": 131800 + }, + { + "epoch": 49.33, + "learning_rate": 0.0001792105263157895, + "loss": 1.11, + "step": 131900 + }, + { + "epoch": 49.36, + "learning_rate": 0.00017894736842105264, + "loss": 1.1092, + "step": 132000 + }, + { + "epoch": 49.4, + "learning_rate": 0.00017868421052631578, + "loss": 1.1135, + "step": 132100 + }, + { + "epoch": 49.44, + "learning_rate": 0.00017842105263157895, + "loss": 1.1108, + "step": 132200 + }, + { + "epoch": 49.48, + "learning_rate": 0.0001781578947368421, + "loss": 1.1122, + "step": 132300 + }, + { + "epoch": 49.51, + "learning_rate": 0.00017789473684210526, + "loss": 1.1104, + "step": 132400 + }, + { + "epoch": 49.55, + "learning_rate": 0.00017763157894736843, + "loss": 1.1157, + "step": 132500 + }, + { + "epoch": 49.59, + "learning_rate": 0.0001773684210526316, + "loss": 1.1118, + "step": 132600 + }, + { + "epoch": 49.63, + "learning_rate": 0.00017710526315789475, + "loss": 1.1077, + "step": 132700 + }, + { + "epoch": 49.66, + "learning_rate": 0.0001768421052631579, + "loss": 1.1078, + "step": 132800 + }, + { + "epoch": 49.7, + "learning_rate": 0.00017657894736842106, + "loss": 1.1132, + "step": 132900 + }, + { + "epoch": 49.74, + "learning_rate": 0.0001763157894736842, + "loss": 1.1107, + "step": 133000 + }, + { + "epoch": 49.78, + "learning_rate": 0.00017605263157894737, + "loss": 1.1095, + "step": 133100 + }, + { + "epoch": 49.81, + "learning_rate": 0.00017578947368421052, + "loss": 1.1103, + "step": 133200 + }, + { + "epoch": 49.85, + "learning_rate": 0.0001755263157894737, + "loss": 1.1128, + "step": 133300 + }, + { + "epoch": 49.89, + "learning_rate": 0.00017526315789473686, + "loss": 1.1077, + "step": 133400 + }, + { + "epoch": 49.93, + "learning_rate": 0.000175, + "loss": 1.1111, + "step": 133500 + }, + { + "epoch": 49.96, + "learning_rate": 0.00017473684210526317, + "loss": 1.1069, + "step": 133600 + }, + { + "epoch": 50.0, + "learning_rate": 0.0001744736842105263, + "loss": 1.1077, + "step": 133700 + }, + { + "epoch": 50.04, + "learning_rate": 0.00017421052631578948, + "loss": 1.1125, + "step": 133800 + }, + { + "epoch": 50.07, + "learning_rate": 0.00017394736842105262, + "loss": 1.1069, + "step": 133900 + }, + { + "epoch": 50.11, + "learning_rate": 0.0001736842105263158, + "loss": 1.1044, + "step": 134000 + }, + { + "epoch": 50.15, + "learning_rate": 0.00017342105263157896, + "loss": 1.1047, + "step": 134100 + }, + { + "epoch": 50.19, + "learning_rate": 0.0001731578947368421, + "loss": 1.1071, + "step": 134200 + }, + { + "epoch": 50.22, + "learning_rate": 0.00017289473684210528, + "loss": 1.1068, + "step": 134300 + }, + { + "epoch": 50.26, + "learning_rate": 0.00017263157894736842, + "loss": 1.1074, + "step": 134400 + }, + { + "epoch": 50.3, + "learning_rate": 0.0001723684210526316, + "loss": 1.105, + "step": 134500 + }, + { + "epoch": 50.34, + "learning_rate": 0.00017210526315789473, + "loss": 1.1074, + "step": 134600 + }, + { + "epoch": 50.37, + "learning_rate": 0.0001718421052631579, + "loss": 1.1067, + "step": 134700 + }, + { + "epoch": 50.41, + "learning_rate": 0.00017157894736842105, + "loss": 1.1054, + "step": 134800 + }, + { + "epoch": 50.45, + "learning_rate": 0.0001713157894736842, + "loss": 1.1087, + "step": 134900 + }, + { + "epoch": 50.49, + "learning_rate": 0.00017105263157894739, + "loss": 1.1037, + "step": 135000 + }, + { + "epoch": 50.52, + "learning_rate": 0.00017078947368421053, + "loss": 1.1052, + "step": 135100 + }, + { + "epoch": 50.56, + "learning_rate": 0.0001705263157894737, + "loss": 1.1086, + "step": 135200 + }, + { + "epoch": 50.6, + "learning_rate": 0.00017026315789473684, + "loss": 1.1066, + "step": 135300 + }, + { + "epoch": 50.64, + "learning_rate": 0.00017, + "loss": 1.1086, + "step": 135400 + }, + { + "epoch": 50.67, + "learning_rate": 0.00016973684210526316, + "loss": 1.1059, + "step": 135500 + }, + { + "epoch": 50.71, + "learning_rate": 0.0001694736842105263, + "loss": 1.1066, + "step": 135600 + }, + { + "epoch": 50.75, + "learning_rate": 0.00016921052631578947, + "loss": 1.1048, + "step": 135700 + }, + { + "epoch": 50.79, + "learning_rate": 0.00016894736842105264, + "loss": 1.1068, + "step": 135800 + }, + { + "epoch": 50.82, + "learning_rate": 0.0001686842105263158, + "loss": 1.103, + "step": 135900 + }, + { + "epoch": 50.86, + "learning_rate": 0.00016842105263157895, + "loss": 1.1026, + "step": 136000 + }, + { + "epoch": 50.9, + "learning_rate": 0.00016815789473684212, + "loss": 1.1075, + "step": 136100 + }, + { + "epoch": 50.93, + "learning_rate": 0.00016789473684210526, + "loss": 1.1043, + "step": 136200 + }, + { + "epoch": 50.97, + "learning_rate": 0.0001676315789473684, + "loss": 1.1071, + "step": 136300 + }, + { + "epoch": 51.01, + "learning_rate": 0.00016736842105263158, + "loss": 1.1114, + "step": 136400 + }, + { + "epoch": 51.05, + "learning_rate": 0.00016710526315789472, + "loss": 1.0996, + "step": 136500 + }, + { + "epoch": 51.08, + "learning_rate": 0.00016684210526315792, + "loss": 1.1027, + "step": 136600 + }, + { + "epoch": 51.12, + "learning_rate": 0.00016657894736842106, + "loss": 1.1027, + "step": 136700 + }, + { + "epoch": 51.16, + "learning_rate": 0.00016631578947368423, + "loss": 1.1043, + "step": 136800 + }, + { + "epoch": 51.2, + "learning_rate": 0.00016605263157894737, + "loss": 1.1021, + "step": 136900 + }, + { + "epoch": 51.23, + "learning_rate": 0.00016578947368421052, + "loss": 1.0988, + "step": 137000 + }, + { + "epoch": 51.27, + "learning_rate": 0.00016552631578947369, + "loss": 1.0999, + "step": 137100 + }, + { + "epoch": 51.31, + "learning_rate": 0.00016526315789473683, + "loss": 1.1028, + "step": 137200 + }, + { + "epoch": 51.35, + "learning_rate": 0.000165, + "loss": 1.1025, + "step": 137300 + }, + { + "epoch": 51.38, + "learning_rate": 0.00016473684210526317, + "loss": 1.1005, + "step": 137400 + }, + { + "epoch": 51.42, + "learning_rate": 0.00016447368421052634, + "loss": 1.1003, + "step": 137500 + }, + { + "epoch": 51.46, + "learning_rate": 0.00016421052631578948, + "loss": 1.1005, + "step": 137600 + }, + { + "epoch": 51.5, + "learning_rate": 0.00016394736842105263, + "loss": 1.1003, + "step": 137700 + }, + { + "epoch": 51.53, + "learning_rate": 0.0001636842105263158, + "loss": 1.1032, + "step": 137800 + }, + { + "epoch": 51.57, + "learning_rate": 0.00016342105263157894, + "loss": 1.1027, + "step": 137900 + }, + { + "epoch": 51.61, + "learning_rate": 0.0001631578947368421, + "loss": 1.102, + "step": 138000 + }, + { + "epoch": 51.65, + "learning_rate": 0.00016289473684210525, + "loss": 1.0996, + "step": 138100 + }, + { + "epoch": 51.68, + "learning_rate": 0.00016263157894736845, + "loss": 1.1009, + "step": 138200 + }, + { + "epoch": 51.72, + "learning_rate": 0.0001623684210526316, + "loss": 1.1014, + "step": 138300 + }, + { + "epoch": 51.76, + "learning_rate": 0.00016210526315789473, + "loss": 1.1025, + "step": 138400 + }, + { + "epoch": 51.79, + "learning_rate": 0.0001618421052631579, + "loss": 1.0997, + "step": 138500 + }, + { + "epoch": 51.83, + "learning_rate": 0.00016157894736842105, + "loss": 1.1029, + "step": 138600 + }, + { + "epoch": 51.87, + "learning_rate": 0.00016131578947368422, + "loss": 1.1045, + "step": 138700 + }, + { + "epoch": 51.91, + "learning_rate": 0.00016105263157894736, + "loss": 1.103, + "step": 138800 + }, + { + "epoch": 51.94, + "learning_rate": 0.00016078947368421053, + "loss": 1.099, + "step": 138900 + }, + { + "epoch": 51.98, + "learning_rate": 0.0001605263157894737, + "loss": 1.0998, + "step": 139000 + }, + { + "epoch": 52.02, + "learning_rate": 0.00016026315789473684, + "loss": 1.103, + "step": 139100 + }, + { + "epoch": 52.06, + "learning_rate": 0.00016, + "loss": 1.0982, + "step": 139200 + }, + { + "epoch": 52.09, + "learning_rate": 0.00015973684210526316, + "loss": 1.0975, + "step": 139300 + }, + { + "epoch": 52.13, + "learning_rate": 0.00015947368421052633, + "loss": 1.0981, + "step": 139400 + }, + { + "epoch": 52.17, + "learning_rate": 0.00015921052631578947, + "loss": 1.0989, + "step": 139500 + }, + { + "epoch": 52.21, + "learning_rate": 0.00015894736842105264, + "loss": 1.0985, + "step": 139600 + }, + { + "epoch": 52.24, + "learning_rate": 0.00015868421052631578, + "loss": 1.0977, + "step": 139700 + }, + { + "epoch": 52.28, + "learning_rate": 0.00015842105263157892, + "loss": 1.0966, + "step": 139800 + }, + { + "epoch": 52.32, + "learning_rate": 0.00015815789473684212, + "loss": 1.098, + "step": 139900 + }, + { + "epoch": 52.36, + "learning_rate": 0.00015789473684210527, + "loss": 1.0939, + "step": 140000 + }, + { + "epoch": 52.39, + "learning_rate": 0.00015763157894736844, + "loss": 1.0951, + "step": 140100 + }, + { + "epoch": 52.43, + "learning_rate": 0.00015736842105263158, + "loss": 1.0979, + "step": 140200 + }, + { + "epoch": 52.47, + "learning_rate": 0.00015710526315789475, + "loss": 1.0985, + "step": 140300 + }, + { + "epoch": 52.51, + "learning_rate": 0.0001568421052631579, + "loss": 1.0971, + "step": 140400 + }, + { + "epoch": 52.54, + "learning_rate": 0.00015657894736842103, + "loss": 1.0961, + "step": 140500 + }, + { + "epoch": 52.58, + "learning_rate": 0.0001563157894736842, + "loss": 1.0978, + "step": 140600 + }, + { + "epoch": 52.62, + "learning_rate": 0.00015605263157894737, + "loss": 1.0953, + "step": 140700 + }, + { + "epoch": 52.66, + "learning_rate": 0.00015578947368421054, + "loss": 1.0977, + "step": 140800 + }, + { + "epoch": 52.69, + "learning_rate": 0.0001555263157894737, + "loss": 1.0979, + "step": 140900 + }, + { + "epoch": 52.73, + "learning_rate": 0.00015526315789473686, + "loss": 1.0952, + "step": 141000 + }, + { + "epoch": 52.77, + "learning_rate": 0.000155, + "loss": 1.0987, + "step": 141100 + }, + { + "epoch": 52.8, + "learning_rate": 0.00015473684210526314, + "loss": 1.0992, + "step": 141200 + }, + { + "epoch": 52.84, + "learning_rate": 0.0001544736842105263, + "loss": 1.0926, + "step": 141300 + }, + { + "epoch": 52.88, + "learning_rate": 0.00015421052631578946, + "loss": 1.0964, + "step": 141400 + }, + { + "epoch": 52.92, + "learning_rate": 0.00015394736842105265, + "loss": 1.0946, + "step": 141500 + }, + { + "epoch": 52.95, + "learning_rate": 0.0001536842105263158, + "loss": 1.0996, + "step": 141600 + }, + { + "epoch": 52.99, + "learning_rate": 0.00015342105263157897, + "loss": 1.0962, + "step": 141700 + }, + { + "epoch": 53.03, + "learning_rate": 0.0001531578947368421, + "loss": 1.0983, + "step": 141800 + }, + { + "epoch": 53.07, + "learning_rate": 0.00015289473684210525, + "loss": 1.0905, + "step": 141900 + }, + { + "epoch": 53.1, + "learning_rate": 0.00015263157894736842, + "loss": 1.0931, + "step": 142000 + }, + { + "epoch": 53.14, + "learning_rate": 0.00015236842105263156, + "loss": 1.0946, + "step": 142100 + }, + { + "epoch": 53.18, + "learning_rate": 0.00015210526315789473, + "loss": 1.0962, + "step": 142200 + }, + { + "epoch": 53.22, + "learning_rate": 0.0001518421052631579, + "loss": 1.0924, + "step": 142300 + }, + { + "epoch": 53.25, + "learning_rate": 0.00015157894736842108, + "loss": 1.0948, + "step": 142400 + }, + { + "epoch": 53.29, + "learning_rate": 0.00015131578947368422, + "loss": 1.0933, + "step": 142500 + }, + { + "epoch": 53.33, + "learning_rate": 0.00015105263157894736, + "loss": 1.0948, + "step": 142600 + }, + { + "epoch": 53.37, + "learning_rate": 0.00015078947368421053, + "loss": 1.093, + "step": 142700 + }, + { + "epoch": 53.4, + "learning_rate": 0.00015052631578947367, + "loss": 1.094, + "step": 142800 + }, + { + "epoch": 53.44, + "learning_rate": 0.00015026315789473684, + "loss": 1.0982, + "step": 142900 + }, + { + "epoch": 53.48, + "learning_rate": 0.00015, + "loss": 1.0937, + "step": 143000 + }, + { + "epoch": 53.52, + "learning_rate": 0.00014973684210526318, + "loss": 1.0956, + "step": 143100 + }, + { + "epoch": 53.55, + "learning_rate": 0.00014947368421052633, + "loss": 1.0937, + "step": 143200 + }, + { + "epoch": 53.59, + "learning_rate": 0.00014921052631578947, + "loss": 1.0906, + "step": 143300 + }, + { + "epoch": 53.63, + "learning_rate": 0.00014894736842105264, + "loss": 1.0904, + "step": 143400 + }, + { + "epoch": 53.66, + "learning_rate": 0.00014868421052631578, + "loss": 1.0892, + "step": 143500 + }, + { + "epoch": 53.7, + "learning_rate": 0.00014842105263157895, + "loss": 1.0928, + "step": 143600 + }, + { + "epoch": 53.74, + "learning_rate": 0.0001481578947368421, + "loss": 1.0916, + "step": 143700 + }, + { + "epoch": 53.78, + "learning_rate": 0.00014789473684210527, + "loss": 1.094, + "step": 143800 + }, + { + "epoch": 53.81, + "learning_rate": 0.0001476315789473684, + "loss": 1.0913, + "step": 143900 + }, + { + "epoch": 53.85, + "learning_rate": 0.00014736842105263158, + "loss": 1.0908, + "step": 144000 + }, + { + "epoch": 53.89, + "learning_rate": 0.00014710526315789475, + "loss": 1.0909, + "step": 144100 + }, + { + "epoch": 53.93, + "learning_rate": 0.0001468421052631579, + "loss": 1.0936, + "step": 144200 + }, + { + "epoch": 53.96, + "learning_rate": 0.00014657894736842106, + "loss": 1.0932, + "step": 144300 + }, + { + "epoch": 54.0, + "learning_rate": 0.0001463157894736842, + "loss": 1.1001, + "step": 144400 + }, + { + "epoch": 54.04, + "learning_rate": 0.00014605263157894737, + "loss": 1.0875, + "step": 144500 + }, + { + "epoch": 54.08, + "learning_rate": 0.00014578947368421052, + "loss": 1.0868, + "step": 144600 + }, + { + "epoch": 54.11, + "learning_rate": 0.0001455263157894737, + "loss": 1.0879, + "step": 144700 + }, + { + "epoch": 54.15, + "learning_rate": 0.00014526315789473686, + "loss": 1.088, + "step": 144800 + }, + { + "epoch": 54.19, + "learning_rate": 0.000145, + "loss": 1.0886, + "step": 144900 + }, + { + "epoch": 54.23, + "learning_rate": 0.00014473684210526317, + "loss": 1.0897, + "step": 145000 + }, + { + "epoch": 54.26, + "learning_rate": 0.00014447368421052631, + "loss": 1.091, + "step": 145100 + }, + { + "epoch": 54.3, + "learning_rate": 0.00014421052631578948, + "loss": 1.0889, + "step": 145200 + }, + { + "epoch": 54.34, + "learning_rate": 0.00014394736842105263, + "loss": 1.0895, + "step": 145300 + }, + { + "epoch": 54.38, + "learning_rate": 0.0001436842105263158, + "loss": 1.0869, + "step": 145400 + }, + { + "epoch": 54.41, + "learning_rate": 0.00014342105263157894, + "loss": 1.0913, + "step": 145500 + }, + { + "epoch": 54.45, + "learning_rate": 0.0001431578947368421, + "loss": 1.0896, + "step": 145600 + }, + { + "epoch": 54.49, + "learning_rate": 0.00014289473684210528, + "loss": 1.0877, + "step": 145700 + }, + { + "epoch": 54.52, + "learning_rate": 0.00014263157894736842, + "loss": 1.0891, + "step": 145800 + }, + { + "epoch": 54.56, + "learning_rate": 0.0001423684210526316, + "loss": 1.0922, + "step": 145900 + }, + { + "epoch": 54.6, + "learning_rate": 0.00014210526315789474, + "loss": 1.0908, + "step": 146000 + }, + { + "epoch": 54.64, + "learning_rate": 0.0001418421052631579, + "loss": 1.0851, + "step": 146100 + }, + { + "epoch": 54.67, + "learning_rate": 0.00014157894736842105, + "loss": 1.091, + "step": 146200 + }, + { + "epoch": 54.71, + "learning_rate": 0.0001413157894736842, + "loss": 1.0902, + "step": 146300 + }, + { + "epoch": 54.75, + "learning_rate": 0.0001410526315789474, + "loss": 1.0858, + "step": 146400 + }, + { + "epoch": 54.79, + "learning_rate": 0.00014078947368421053, + "loss": 1.0897, + "step": 146500 + }, + { + "epoch": 54.82, + "learning_rate": 0.0001405263157894737, + "loss": 1.0896, + "step": 146600 + }, + { + "epoch": 54.86, + "learning_rate": 0.00014026315789473684, + "loss": 1.0879, + "step": 146700 + }, + { + "epoch": 54.9, + "learning_rate": 0.00014000000000000001, + "loss": 1.0868, + "step": 146800 + }, + { + "epoch": 54.94, + "learning_rate": 0.00013973684210526316, + "loss": 1.087, + "step": 146900 + }, + { + "epoch": 54.97, + "learning_rate": 0.0001394736842105263, + "loss": 1.0872, + "step": 147000 + }, + { + "epoch": 55.01, + "learning_rate": 0.00013921052631578947, + "loss": 1.0933, + "step": 147100 + }, + { + "epoch": 55.05, + "learning_rate": 0.00013894736842105264, + "loss": 1.0839, + "step": 147200 + }, + { + "epoch": 55.09, + "learning_rate": 0.0001386842105263158, + "loss": 1.0824, + "step": 147300 + }, + { + "epoch": 55.12, + "learning_rate": 0.00013842105263157895, + "loss": 1.0868, + "step": 147400 + }, + { + "epoch": 55.16, + "learning_rate": 0.00013815789473684212, + "loss": 1.0843, + "step": 147500 + }, + { + "epoch": 55.2, + "learning_rate": 0.00013789473684210527, + "loss": 1.0854, + "step": 147600 + }, + { + "epoch": 55.24, + "learning_rate": 0.0001376315789473684, + "loss": 1.0847, + "step": 147700 + }, + { + "epoch": 55.27, + "learning_rate": 0.00013736842105263158, + "loss": 1.0852, + "step": 147800 + }, + { + "epoch": 55.31, + "learning_rate": 0.00013710526315789472, + "loss": 1.0863, + "step": 147900 + }, + { + "epoch": 55.35, + "learning_rate": 0.00013684210526315792, + "loss": 1.0825, + "step": 148000 + }, + { + "epoch": 55.39, + "learning_rate": 0.00013657894736842106, + "loss": 1.0849, + "step": 148100 + }, + { + "epoch": 55.42, + "learning_rate": 0.00013631578947368423, + "loss": 1.0845, + "step": 148200 + }, + { + "epoch": 55.46, + "learning_rate": 0.00013605263157894738, + "loss": 1.0839, + "step": 148300 + }, + { + "epoch": 55.5, + "learning_rate": 0.00013578947368421052, + "loss": 1.0857, + "step": 148400 + }, + { + "epoch": 55.53, + "learning_rate": 0.0001355263157894737, + "loss": 1.0835, + "step": 148500 + }, + { + "epoch": 55.57, + "learning_rate": 0.00013526315789473683, + "loss": 1.0816, + "step": 148600 + }, + { + "epoch": 55.61, + "learning_rate": 0.000135, + "loss": 1.0826, + "step": 148700 + }, + { + "epoch": 55.65, + "learning_rate": 0.00013473684210526314, + "loss": 1.0853, + "step": 148800 + }, + { + "epoch": 55.68, + "learning_rate": 0.00013447368421052634, + "loss": 1.0853, + "step": 148900 + }, + { + "epoch": 55.72, + "learning_rate": 0.00013421052631578948, + "loss": 1.0859, + "step": 149000 + }, + { + "epoch": 55.76, + "learning_rate": 0.00013394736842105263, + "loss": 1.0852, + "step": 149100 + }, + { + "epoch": 55.8, + "learning_rate": 0.0001336842105263158, + "loss": 1.0814, + "step": 149200 + }, + { + "epoch": 55.83, + "learning_rate": 0.00013342105263157894, + "loss": 1.0793, + "step": 149300 + }, + { + "epoch": 55.87, + "learning_rate": 0.0001331578947368421, + "loss": 1.0802, + "step": 149400 + }, + { + "epoch": 55.91, + "learning_rate": 0.00013289473684210525, + "loss": 1.0825, + "step": 149500 + }, + { + "epoch": 55.95, + "learning_rate": 0.00013263157894736842, + "loss": 1.0839, + "step": 149600 + }, + { + "epoch": 55.98, + "learning_rate": 0.0001323684210526316, + "loss": 1.085, + "step": 149700 + }, + { + "epoch": 56.02, + "learning_rate": 0.00013210526315789474, + "loss": 1.086, + "step": 149800 + }, + { + "epoch": 56.06, + "learning_rate": 0.0001318421052631579, + "loss": 1.0833, + "step": 149900 + }, + { + "epoch": 56.1, + "learning_rate": 0.00013157894736842105, + "loss": 1.0807, + "step": 150000 + }, + { + "epoch": 56.13, + "learning_rate": 0.00013131578947368422, + "loss": 1.0792, + "step": 150100 + }, + { + "epoch": 56.17, + "learning_rate": 0.00013105263157894736, + "loss": 1.0811, + "step": 150200 + }, + { + "epoch": 56.21, + "learning_rate": 0.00013078947368421053, + "loss": 1.0812, + "step": 150300 + }, + { + "epoch": 56.25, + "learning_rate": 0.00013052631578947368, + "loss": 1.0785, + "step": 150400 + }, + { + "epoch": 56.28, + "learning_rate": 0.00013026315789473685, + "loss": 1.0814, + "step": 150500 + }, + { + "epoch": 56.32, + "learning_rate": 0.00013000000000000002, + "loss": 1.0808, + "step": 150600 + }, + { + "epoch": 56.36, + "learning_rate": 0.00012973684210526316, + "loss": 1.0788, + "step": 150700 + }, + { + "epoch": 56.39, + "learning_rate": 0.00012947368421052633, + "loss": 1.0761, + "step": 150800 + }, + { + "epoch": 56.43, + "learning_rate": 0.00012921052631578947, + "loss": 1.0817, + "step": 150900 + }, + { + "epoch": 56.47, + "learning_rate": 0.00012894736842105264, + "loss": 1.0802, + "step": 151000 + }, + { + "epoch": 56.51, + "learning_rate": 0.00012868421052631578, + "loss": 1.0768, + "step": 151100 + }, + { + "epoch": 56.54, + "learning_rate": 0.00012842105263157893, + "loss": 1.0795, + "step": 151200 + }, + { + "epoch": 56.58, + "learning_rate": 0.00012815789473684212, + "loss": 1.0857, + "step": 151300 + }, + { + "epoch": 56.62, + "learning_rate": 0.00012789473684210527, + "loss": 1.0796, + "step": 151400 + }, + { + "epoch": 56.66, + "learning_rate": 0.00012763157894736844, + "loss": 1.0815, + "step": 151500 + }, + { + "epoch": 56.69, + "learning_rate": 0.00012736842105263158, + "loss": 1.0795, + "step": 151600 + }, + { + "epoch": 56.73, + "learning_rate": 0.00012710526315789475, + "loss": 1.0782, + "step": 151700 + }, + { + "epoch": 56.77, + "learning_rate": 0.0001268421052631579, + "loss": 1.0781, + "step": 151800 + }, + { + "epoch": 56.81, + "learning_rate": 0.00012657894736842104, + "loss": 1.0825, + "step": 151900 + }, + { + "epoch": 56.84, + "learning_rate": 0.0001263157894736842, + "loss": 1.0808, + "step": 152000 + }, + { + "epoch": 56.88, + "learning_rate": 0.00012605263157894738, + "loss": 1.0795, + "step": 152100 + }, + { + "epoch": 56.92, + "learning_rate": 0.00012578947368421055, + "loss": 1.0802, + "step": 152200 + }, + { + "epoch": 56.96, + "learning_rate": 0.0001255263157894737, + "loss": 1.08, + "step": 152300 + }, + { + "epoch": 56.99, + "learning_rate": 0.00012526315789473686, + "loss": 1.0782, + "step": 152400 + }, + { + "epoch": 57.03, + "learning_rate": 0.000125, + "loss": 1.0813, + "step": 152500 + }, + { + "epoch": 57.07, + "learning_rate": 0.00012473684210526317, + "loss": 1.08, + "step": 152600 + }, + { + "epoch": 57.11, + "learning_rate": 0.00012447368421052632, + "loss": 1.0749, + "step": 152700 + }, + { + "epoch": 57.14, + "learning_rate": 0.00012421052631578949, + "loss": 1.0765, + "step": 152800 + }, + { + "epoch": 57.18, + "learning_rate": 0.00012394736842105263, + "loss": 1.0759, + "step": 152900 + }, + { + "epoch": 57.22, + "learning_rate": 0.0001236842105263158, + "loss": 1.0736, + "step": 153000 + }, + { + "epoch": 57.26, + "learning_rate": 0.00012342105263157894, + "loss": 1.0772, + "step": 153100 + }, + { + "epoch": 57.29, + "learning_rate": 0.0001231578947368421, + "loss": 1.0763, + "step": 153200 + }, + { + "epoch": 57.33, + "learning_rate": 0.00012289473684210525, + "loss": 1.0771, + "step": 153300 + }, + { + "epoch": 57.37, + "learning_rate": 0.00012263157894736842, + "loss": 1.0745, + "step": 153400 + }, + { + "epoch": 57.4, + "learning_rate": 0.0001223684210526316, + "loss": 1.0791, + "step": 153500 + }, + { + "epoch": 57.44, + "learning_rate": 0.00012210526315789474, + "loss": 1.0738, + "step": 153600 + }, + { + "epoch": 57.48, + "learning_rate": 0.0001218421052631579, + "loss": 1.0757, + "step": 153700 + }, + { + "epoch": 57.52, + "learning_rate": 0.00012157894736842105, + "loss": 1.0767, + "step": 153800 + }, + { + "epoch": 57.55, + "learning_rate": 0.00012131578947368421, + "loss": 1.0791, + "step": 153900 + }, + { + "epoch": 57.59, + "learning_rate": 0.00012105263157894738, + "loss": 1.076, + "step": 154000 + }, + { + "epoch": 57.63, + "learning_rate": 0.00012078947368421053, + "loss": 1.0793, + "step": 154100 + }, + { + "epoch": 57.67, + "learning_rate": 0.00012052631578947369, + "loss": 1.0743, + "step": 154200 + }, + { + "epoch": 57.7, + "learning_rate": 0.00012026315789473683, + "loss": 1.0768, + "step": 154300 + }, + { + "epoch": 57.74, + "learning_rate": 0.00012, + "loss": 1.0749, + "step": 154400 + }, + { + "epoch": 57.78, + "learning_rate": 0.00011973684210526316, + "loss": 1.076, + "step": 154500 + }, + { + "epoch": 57.82, + "learning_rate": 0.00011947368421052632, + "loss": 1.0751, + "step": 154600 + }, + { + "epoch": 57.85, + "learning_rate": 0.00011921052631578947, + "loss": 1.0739, + "step": 154700 + }, + { + "epoch": 57.89, + "learning_rate": 0.00011894736842105264, + "loss": 1.077, + "step": 154800 + }, + { + "epoch": 57.93, + "learning_rate": 0.0001186842105263158, + "loss": 1.0748, + "step": 154900 + }, + { + "epoch": 57.97, + "learning_rate": 0.00011842105263157894, + "loss": 1.0752, + "step": 155000 + }, + { + "epoch": 58.0, + "learning_rate": 0.0001181578947368421, + "loss": 1.082, + "step": 155100 + }, + { + "epoch": 58.04, + "learning_rate": 0.00011789473684210527, + "loss": 1.0708, + "step": 155200 + }, + { + "epoch": 58.08, + "learning_rate": 0.00011763157894736843, + "loss": 1.0671, + "step": 155300 + }, + { + "epoch": 58.12, + "learning_rate": 0.00011736842105263158, + "loss": 1.0712, + "step": 155400 + }, + { + "epoch": 58.15, + "learning_rate": 0.00011710526315789474, + "loss": 1.0732, + "step": 155500 + }, + { + "epoch": 58.19, + "learning_rate": 0.00011684210526315791, + "loss": 1.0737, + "step": 155600 + }, + { + "epoch": 58.23, + "learning_rate": 0.00011657894736842105, + "loss": 1.0727, + "step": 155700 + }, + { + "epoch": 58.26, + "learning_rate": 0.00011631578947368421, + "loss": 1.0748, + "step": 155800 + }, + { + "epoch": 58.3, + "learning_rate": 0.00011605263157894736, + "loss": 1.0692, + "step": 155900 + }, + { + "epoch": 58.34, + "learning_rate": 0.00011578947368421053, + "loss": 1.0705, + "step": 156000 + }, + { + "epoch": 58.38, + "learning_rate": 0.00011552631578947369, + "loss": 1.0709, + "step": 156100 + }, + { + "epoch": 58.41, + "learning_rate": 0.00011526315789473685, + "loss": 1.0718, + "step": 156200 + }, + { + "epoch": 58.45, + "learning_rate": 0.000115, + "loss": 1.0715, + "step": 156300 + }, + { + "epoch": 58.49, + "learning_rate": 0.00011473684210526316, + "loss": 1.0723, + "step": 156400 + }, + { + "epoch": 58.53, + "learning_rate": 0.00011447368421052632, + "loss": 1.0728, + "step": 156500 + }, + { + "epoch": 58.56, + "learning_rate": 0.00011421052631578947, + "loss": 1.0723, + "step": 156600 + }, + { + "epoch": 58.6, + "learning_rate": 0.00011394736842105263, + "loss": 1.0693, + "step": 156700 + }, + { + "epoch": 58.64, + "learning_rate": 0.0001136842105263158, + "loss": 1.073, + "step": 156800 + }, + { + "epoch": 58.68, + "learning_rate": 0.00011342105263157896, + "loss": 1.0743, + "step": 156900 + }, + { + "epoch": 58.71, + "learning_rate": 0.00011315789473684211, + "loss": 1.0715, + "step": 157000 + }, + { + "epoch": 58.75, + "learning_rate": 0.00011289473684210527, + "loss": 1.0734, + "step": 157100 + }, + { + "epoch": 58.79, + "learning_rate": 0.00011263157894736841, + "loss": 1.0731, + "step": 157200 + }, + { + "epoch": 58.83, + "learning_rate": 0.00011236842105263158, + "loss": 1.0711, + "step": 157300 + }, + { + "epoch": 58.86, + "learning_rate": 0.00011210526315789474, + "loss": 1.0694, + "step": 157400 + }, + { + "epoch": 58.9, + "learning_rate": 0.0001118421052631579, + "loss": 1.0706, + "step": 157500 + }, + { + "epoch": 58.94, + "learning_rate": 0.00011157894736842105, + "loss": 1.0729, + "step": 157600 + }, + { + "epoch": 58.98, + "learning_rate": 0.00011131578947368422, + "loss": 1.0704, + "step": 157700 + }, + { + "epoch": 59.01, + "learning_rate": 0.00011105263157894738, + "loss": 1.0741, + "step": 157800 + }, + { + "epoch": 59.05, + "learning_rate": 0.00011078947368421052, + "loss": 1.0697, + "step": 157900 + }, + { + "epoch": 59.09, + "learning_rate": 0.00011052631578947368, + "loss": 1.0676, + "step": 158000 + }, + { + "epoch": 59.12, + "learning_rate": 0.00011026315789473685, + "loss": 1.0689, + "step": 158100 + }, + { + "epoch": 59.16, + "learning_rate": 0.00011, + "loss": 1.0674, + "step": 158200 + }, + { + "epoch": 59.2, + "learning_rate": 0.00010973684210526316, + "loss": 1.0707, + "step": 158300 + }, + { + "epoch": 59.24, + "learning_rate": 0.00010947368421052632, + "loss": 1.0691, + "step": 158400 + }, + { + "epoch": 59.27, + "learning_rate": 0.00010921052631578949, + "loss": 1.0657, + "step": 158500 + }, + { + "epoch": 59.31, + "learning_rate": 0.00010894736842105263, + "loss": 1.0676, + "step": 158600 + }, + { + "epoch": 59.35, + "learning_rate": 0.00010868421052631579, + "loss": 1.0678, + "step": 158700 + }, + { + "epoch": 59.39, + "learning_rate": 0.00010842105263157894, + "loss": 1.0683, + "step": 158800 + }, + { + "epoch": 59.42, + "learning_rate": 0.00010815789473684211, + "loss": 1.0638, + "step": 158900 + }, + { + "epoch": 59.46, + "learning_rate": 0.00010789473684210527, + "loss": 1.0722, + "step": 159000 + }, + { + "epoch": 59.5, + "learning_rate": 0.00010763157894736843, + "loss": 1.0673, + "step": 159100 + }, + { + "epoch": 59.54, + "learning_rate": 0.00010736842105263158, + "loss": 1.0659, + "step": 159200 + }, + { + "epoch": 59.57, + "learning_rate": 0.00010710526315789474, + "loss": 1.0665, + "step": 159300 + }, + { + "epoch": 59.61, + "learning_rate": 0.0001068421052631579, + "loss": 1.0662, + "step": 159400 + }, + { + "epoch": 59.65, + "learning_rate": 0.00010657894736842105, + "loss": 1.0676, + "step": 159500 + }, + { + "epoch": 59.69, + "learning_rate": 0.00010631578947368421, + "loss": 1.0677, + "step": 159600 + }, + { + "epoch": 59.72, + "learning_rate": 0.00010605263157894738, + "loss": 1.0668, + "step": 159700 + }, + { + "epoch": 59.76, + "learning_rate": 0.00010578947368421053, + "loss": 1.0661, + "step": 159800 + }, + { + "epoch": 59.8, + "learning_rate": 0.00010552631578947369, + "loss": 1.0705, + "step": 159900 + }, + { + "epoch": 59.84, + "learning_rate": 0.00010526315789473683, + "loss": 1.0675, + "step": 160000 + }, + { + "epoch": 59.87, + "learning_rate": 0.000105, + "loss": 1.0695, + "step": 160100 + }, + { + "epoch": 59.91, + "learning_rate": 0.00010473684210526316, + "loss": 1.0655, + "step": 160200 + }, + { + "epoch": 59.95, + "learning_rate": 0.00010447368421052632, + "loss": 1.0679, + "step": 160300 + }, + { + "epoch": 59.98, + "learning_rate": 0.00010421052631578947, + "loss": 1.0667, + "step": 160400 + }, + { + "epoch": 60.02, + "learning_rate": 0.00010394736842105264, + "loss": 1.0693, + "step": 160500 + }, + { + "epoch": 60.06, + "learning_rate": 0.0001036842105263158, + "loss": 1.0631, + "step": 160600 + }, + { + "epoch": 60.1, + "learning_rate": 0.00010342105263157894, + "loss": 1.0619, + "step": 160700 + }, + { + "epoch": 60.13, + "learning_rate": 0.0001031578947368421, + "loss": 1.0652, + "step": 160800 + }, + { + "epoch": 60.17, + "learning_rate": 0.00010289473684210527, + "loss": 1.0627, + "step": 160900 + }, + { + "epoch": 60.21, + "learning_rate": 0.00010263157894736843, + "loss": 1.0658, + "step": 161000 + }, + { + "epoch": 60.25, + "learning_rate": 0.00010236842105263158, + "loss": 1.0642, + "step": 161100 + }, + { + "epoch": 60.28, + "learning_rate": 0.00010210526315789474, + "loss": 1.0665, + "step": 161200 + }, + { + "epoch": 60.32, + "learning_rate": 0.00010184210526315791, + "loss": 1.0673, + "step": 161300 + }, + { + "epoch": 60.36, + "learning_rate": 0.00010157894736842105, + "loss": 1.0696, + "step": 161400 + }, + { + "epoch": 60.4, + "learning_rate": 0.00010131578947368421, + "loss": 1.065, + "step": 161500 + }, + { + "epoch": 60.43, + "learning_rate": 0.00010105263157894737, + "loss": 1.0633, + "step": 161600 + }, + { + "epoch": 60.47, + "learning_rate": 0.00010078947368421052, + "loss": 1.0618, + "step": 161700 + }, + { + "epoch": 60.51, + "learning_rate": 0.00010052631578947369, + "loss": 1.0662, + "step": 161800 + }, + { + "epoch": 60.55, + "learning_rate": 0.00010026315789473685, + "loss": 1.0637, + "step": 161900 + }, + { + "epoch": 60.58, + "learning_rate": 0.0001, + "loss": 1.0604, + "step": 162000 + }, + { + "epoch": 60.62, + "learning_rate": 9.973684210526315e-05, + "loss": 1.0604, + "step": 162100 + }, + { + "epoch": 60.66, + "learning_rate": 9.947368421052632e-05, + "loss": 1.0634, + "step": 162200 + }, + { + "epoch": 60.7, + "learning_rate": 9.921052631578947e-05, + "loss": 1.0608, + "step": 162300 + }, + { + "epoch": 60.73, + "learning_rate": 9.894736842105263e-05, + "loss": 1.0651, + "step": 162400 + }, + { + "epoch": 60.77, + "learning_rate": 9.868421052631579e-05, + "loss": 1.0639, + "step": 162500 + }, + { + "epoch": 60.81, + "learning_rate": 9.842105263157896e-05, + "loss": 1.0665, + "step": 162600 + }, + { + "epoch": 60.85, + "learning_rate": 9.815789473684211e-05, + "loss": 1.0665, + "step": 162700 + }, + { + "epoch": 60.88, + "learning_rate": 9.789473684210526e-05, + "loss": 1.0644, + "step": 162800 + }, + { + "epoch": 60.92, + "learning_rate": 9.763157894736841e-05, + "loss": 1.063, + "step": 162900 + }, + { + "epoch": 60.96, + "learning_rate": 9.736842105263158e-05, + "loss": 1.0627, + "step": 163000 + }, + { + "epoch": 60.99, + "learning_rate": 9.710526315789474e-05, + "loss": 1.0623, + "step": 163100 + }, + { + "epoch": 61.03, + "learning_rate": 9.68421052631579e-05, + "loss": 1.0722, + "step": 163200 + }, + { + "epoch": 61.07, + "learning_rate": 9.657894736842105e-05, + "loss": 1.0582, + "step": 163300 + }, + { + "epoch": 61.11, + "learning_rate": 9.631578947368422e-05, + "loss": 1.0605, + "step": 163400 + }, + { + "epoch": 61.14, + "learning_rate": 9.605263157894737e-05, + "loss": 1.0618, + "step": 163500 + }, + { + "epoch": 61.18, + "learning_rate": 9.578947368421052e-05, + "loss": 1.0613, + "step": 163600 + }, + { + "epoch": 61.22, + "learning_rate": 9.552631578947368e-05, + "loss": 1.0601, + "step": 163700 + }, + { + "epoch": 61.26, + "learning_rate": 9.526315789473685e-05, + "loss": 1.0635, + "step": 163800 + }, + { + "epoch": 61.29, + "learning_rate": 9.5e-05, + "loss": 1.0583, + "step": 163900 + }, + { + "epoch": 61.33, + "learning_rate": 9.473684210526316e-05, + "loss": 1.0622, + "step": 164000 + }, + { + "epoch": 61.37, + "learning_rate": 9.447368421052632e-05, + "loss": 1.0647, + "step": 164100 + }, + { + "epoch": 61.41, + "learning_rate": 9.421052631578947e-05, + "loss": 1.0605, + "step": 164200 + }, + { + "epoch": 61.44, + "learning_rate": 9.394736842105263e-05, + "loss": 1.0636, + "step": 164300 + }, + { + "epoch": 61.48, + "learning_rate": 9.368421052631579e-05, + "loss": 1.0578, + "step": 164400 + }, + { + "epoch": 61.52, + "learning_rate": 9.342105263157894e-05, + "loss": 1.0608, + "step": 164500 + }, + { + "epoch": 61.56, + "learning_rate": 9.315789473684211e-05, + "loss": 1.0621, + "step": 164600 + }, + { + "epoch": 61.59, + "learning_rate": 9.289473684210527e-05, + "loss": 1.0562, + "step": 164700 + }, + { + "epoch": 61.63, + "learning_rate": 9.263157894736843e-05, + "loss": 1.06, + "step": 164800 + }, + { + "epoch": 61.67, + "learning_rate": 9.236842105263158e-05, + "loss": 1.0589, + "step": 164900 + }, + { + "epoch": 61.71, + "learning_rate": 9.210526315789474e-05, + "loss": 1.0623, + "step": 165000 + }, + { + "epoch": 61.74, + "learning_rate": 9.18421052631579e-05, + "loss": 1.0593, + "step": 165100 + }, + { + "epoch": 61.78, + "learning_rate": 9.157894736842105e-05, + "loss": 1.0608, + "step": 165200 + }, + { + "epoch": 61.82, + "learning_rate": 9.131578947368421e-05, + "loss": 1.0565, + "step": 165300 + }, + { + "epoch": 61.85, + "learning_rate": 9.105263157894738e-05, + "loss": 1.0597, + "step": 165400 + }, + { + "epoch": 61.89, + "learning_rate": 9.078947368421054e-05, + "loss": 1.0618, + "step": 165500 + }, + { + "epoch": 61.93, + "learning_rate": 9.052631578947369e-05, + "loss": 1.0587, + "step": 165600 + }, + { + "epoch": 61.97, + "learning_rate": 9.026315789473684e-05, + "loss": 1.0569, + "step": 165700 + }, + { + "epoch": 62.0, + "learning_rate": 8.999999999999999e-05, + "loss": 1.0663, + "step": 165800 + }, + { + "epoch": 62.04, + "learning_rate": 8.973684210526316e-05, + "loss": 1.0524, + "step": 165900 + }, + { + "epoch": 62.08, + "learning_rate": 8.947368421052632e-05, + "loss": 1.0573, + "step": 166000 + }, + { + "epoch": 62.12, + "learning_rate": 8.921052631578948e-05, + "loss": 1.0554, + "step": 166100 + }, + { + "epoch": 62.15, + "learning_rate": 8.894736842105263e-05, + "loss": 1.059, + "step": 166200 + }, + { + "epoch": 62.19, + "learning_rate": 8.86842105263158e-05, + "loss": 1.0551, + "step": 166300 + }, + { + "epoch": 62.23, + "learning_rate": 8.842105263157894e-05, + "loss": 1.0565, + "step": 166400 + }, + { + "epoch": 62.27, + "learning_rate": 8.81578947368421e-05, + "loss": 1.0585, + "step": 166500 + }, + { + "epoch": 62.3, + "learning_rate": 8.789473684210526e-05, + "loss": 1.0556, + "step": 166600 + }, + { + "epoch": 62.34, + "learning_rate": 8.763157894736843e-05, + "loss": 1.0543, + "step": 166700 + }, + { + "epoch": 62.38, + "learning_rate": 8.736842105263158e-05, + "loss": 1.0566, + "step": 166800 + }, + { + "epoch": 62.42, + "learning_rate": 8.710526315789474e-05, + "loss": 1.058, + "step": 166900 + }, + { + "epoch": 62.45, + "learning_rate": 8.68421052631579e-05, + "loss": 1.0562, + "step": 167000 + }, + { + "epoch": 62.49, + "learning_rate": 8.657894736842105e-05, + "loss": 1.0589, + "step": 167100 + }, + { + "epoch": 62.53, + "learning_rate": 8.631578947368421e-05, + "loss": 1.0546, + "step": 167200 + }, + { + "epoch": 62.57, + "learning_rate": 8.605263157894737e-05, + "loss": 1.055, + "step": 167300 + }, + { + "epoch": 62.6, + "learning_rate": 8.578947368421052e-05, + "loss": 1.0566, + "step": 167400 + }, + { + "epoch": 62.64, + "learning_rate": 8.552631578947369e-05, + "loss": 1.0563, + "step": 167500 + }, + { + "epoch": 62.68, + "learning_rate": 8.526315789473685e-05, + "loss": 1.0587, + "step": 167600 + }, + { + "epoch": 62.71, + "learning_rate": 8.5e-05, + "loss": 1.0578, + "step": 167700 + }, + { + "epoch": 62.75, + "learning_rate": 8.473684210526315e-05, + "loss": 1.0535, + "step": 167800 + }, + { + "epoch": 62.79, + "learning_rate": 8.447368421052632e-05, + "loss": 1.0527, + "step": 167900 + }, + { + "epoch": 62.83, + "learning_rate": 8.421052631578948e-05, + "loss": 1.0546, + "step": 168000 + }, + { + "epoch": 62.86, + "learning_rate": 8.394736842105263e-05, + "loss": 1.0543, + "step": 168100 + }, + { + "epoch": 62.9, + "learning_rate": 8.368421052631579e-05, + "loss": 1.0562, + "step": 168200 + }, + { + "epoch": 62.94, + "learning_rate": 8.342105263157896e-05, + "loss": 1.0578, + "step": 168300 + }, + { + "epoch": 62.98, + "learning_rate": 8.315789473684212e-05, + "loss": 1.0579, + "step": 168400 + }, + { + "epoch": 63.01, + "learning_rate": 8.289473684210526e-05, + "loss": 1.0604, + "step": 168500 + }, + { + "epoch": 63.05, + "learning_rate": 8.263157894736841e-05, + "loss": 1.0547, + "step": 168600 + }, + { + "epoch": 63.09, + "learning_rate": 8.236842105263158e-05, + "loss": 1.0531, + "step": 168700 + }, + { + "epoch": 63.13, + "learning_rate": 8.210526315789474e-05, + "loss": 1.0547, + "step": 168800 + }, + { + "epoch": 63.16, + "learning_rate": 8.18421052631579e-05, + "loss": 1.0509, + "step": 168900 + }, + { + "epoch": 63.2, + "learning_rate": 8.157894736842105e-05, + "loss": 1.0528, + "step": 169000 + }, + { + "epoch": 63.24, + "learning_rate": 8.131578947368422e-05, + "loss": 1.0534, + "step": 169100 + }, + { + "epoch": 63.28, + "learning_rate": 8.105263157894737e-05, + "loss": 1.049, + "step": 169200 + }, + { + "epoch": 63.31, + "learning_rate": 8.078947368421052e-05, + "loss": 1.0526, + "step": 169300 + }, + { + "epoch": 63.35, + "learning_rate": 8.052631578947368e-05, + "loss": 1.052, + "step": 169400 + }, + { + "epoch": 63.39, + "learning_rate": 8.026315789473685e-05, + "loss": 1.0539, + "step": 169500 + }, + { + "epoch": 63.43, + "learning_rate": 8e-05, + "loss": 1.0525, + "step": 169600 + }, + { + "epoch": 63.46, + "learning_rate": 7.973684210526316e-05, + "loss": 1.0532, + "step": 169700 + }, + { + "epoch": 63.5, + "learning_rate": 7.947368421052632e-05, + "loss": 1.0504, + "step": 169800 + }, + { + "epoch": 63.54, + "learning_rate": 7.921052631578946e-05, + "loss": 1.0497, + "step": 169900 + }, + { + "epoch": 63.58, + "learning_rate": 7.894736842105263e-05, + "loss": 1.0514, + "step": 170000 + }, + { + "epoch": 63.61, + "learning_rate": 7.868421052631579e-05, + "loss": 1.0527, + "step": 170100 + }, + { + "epoch": 63.65, + "learning_rate": 7.842105263157895e-05, + "loss": 1.0498, + "step": 170200 + }, + { + "epoch": 63.69, + "learning_rate": 7.81578947368421e-05, + "loss": 1.0498, + "step": 170300 + }, + { + "epoch": 63.72, + "learning_rate": 7.789473684210527e-05, + "loss": 1.0524, + "step": 170400 + }, + { + "epoch": 63.76, + "learning_rate": 7.763157894736843e-05, + "loss": 1.0573, + "step": 170500 + }, + { + "epoch": 63.8, + "learning_rate": 7.736842105263157e-05, + "loss": 1.0533, + "step": 170600 + }, + { + "epoch": 63.84, + "learning_rate": 7.710526315789473e-05, + "loss": 1.0513, + "step": 170700 + }, + { + "epoch": 63.87, + "learning_rate": 7.68421052631579e-05, + "loss": 1.0501, + "step": 170800 + }, + { + "epoch": 63.91, + "learning_rate": 7.657894736842105e-05, + "loss": 1.052, + "step": 170900 + }, + { + "epoch": 63.95, + "learning_rate": 7.631578947368421e-05, + "loss": 1.0528, + "step": 171000 + }, + { + "epoch": 63.99, + "learning_rate": 7.605263157894737e-05, + "loss": 1.0516, + "step": 171100 + }, + { + "epoch": 64.02, + "learning_rate": 7.578947368421054e-05, + "loss": 1.0559, + "step": 171200 + }, + { + "epoch": 64.06, + "learning_rate": 7.552631578947368e-05, + "loss": 1.0487, + "step": 171300 + }, + { + "epoch": 64.1, + "learning_rate": 7.526315789473684e-05, + "loss": 1.0471, + "step": 171400 + }, + { + "epoch": 64.14, + "learning_rate": 7.5e-05, + "loss": 1.0488, + "step": 171500 + }, + { + "epoch": 64.17, + "learning_rate": 7.473684210526316e-05, + "loss": 1.0499, + "step": 171600 + }, + { + "epoch": 64.21, + "learning_rate": 7.447368421052632e-05, + "loss": 1.0487, + "step": 171700 + }, + { + "epoch": 64.25, + "learning_rate": 7.421052631578948e-05, + "loss": 1.0494, + "step": 171800 + }, + { + "epoch": 64.29, + "learning_rate": 7.394736842105263e-05, + "loss": 1.0472, + "step": 171900 + }, + { + "epoch": 64.32, + "learning_rate": 7.368421052631579e-05, + "loss": 1.05, + "step": 172000 + }, + { + "epoch": 64.36, + "learning_rate": 7.342105263157895e-05, + "loss": 1.0473, + "step": 172100 + }, + { + "epoch": 64.4, + "learning_rate": 7.31578947368421e-05, + "loss": 1.0478, + "step": 172200 + }, + { + "epoch": 64.44, + "learning_rate": 7.289473684210526e-05, + "loss": 1.05, + "step": 172300 + }, + { + "epoch": 64.47, + "learning_rate": 7.263157894736843e-05, + "loss": 1.0475, + "step": 172400 + }, + { + "epoch": 64.51, + "learning_rate": 7.236842105263159e-05, + "loss": 1.0461, + "step": 172500 + }, + { + "epoch": 64.55, + "learning_rate": 7.210526315789474e-05, + "loss": 1.0467, + "step": 172600 + }, + { + "epoch": 64.58, + "learning_rate": 7.18421052631579e-05, + "loss": 1.0495, + "step": 172700 + }, + { + "epoch": 64.62, + "learning_rate": 7.157894736842105e-05, + "loss": 1.0519, + "step": 172800 + }, + { + "epoch": 64.66, + "learning_rate": 7.131578947368421e-05, + "loss": 1.053, + "step": 172900 + }, + { + "epoch": 64.7, + "learning_rate": 7.105263157894737e-05, + "loss": 1.0444, + "step": 173000 + }, + { + "epoch": 64.73, + "learning_rate": 7.078947368421052e-05, + "loss": 1.0478, + "step": 173100 + }, + { + "epoch": 64.77, + "learning_rate": 7.05263157894737e-05, + "loss": 1.0477, + "step": 173200 + }, + { + "epoch": 64.81, + "learning_rate": 7.026315789473685e-05, + "loss": 1.0462, + "step": 173300 + }, + { + "epoch": 64.85, + "learning_rate": 7.000000000000001e-05, + "loss": 1.0529, + "step": 173400 + }, + { + "epoch": 64.88, + "learning_rate": 6.973684210526315e-05, + "loss": 1.0502, + "step": 173500 + }, + { + "epoch": 64.92, + "learning_rate": 6.947368421052632e-05, + "loss": 1.0494, + "step": 173600 + }, + { + "epoch": 64.96, + "learning_rate": 6.921052631578948e-05, + "loss": 1.0464, + "step": 173700 + }, + { + "epoch": 65.0, + "learning_rate": 6.894736842105263e-05, + "loss": 1.0447, + "step": 173800 + }, + { + "epoch": 65.03, + "learning_rate": 6.868421052631579e-05, + "loss": 1.0476, + "step": 173900 + }, + { + "epoch": 65.07, + "learning_rate": 6.842105263157896e-05, + "loss": 1.0486, + "step": 174000 + }, + { + "epoch": 65.11, + "learning_rate": 6.815789473684212e-05, + "loss": 1.044, + "step": 174100 + }, + { + "epoch": 65.15, + "learning_rate": 6.789473684210526e-05, + "loss": 1.0466, + "step": 174200 + }, + { + "epoch": 65.18, + "learning_rate": 6.763157894736842e-05, + "loss": 1.0408, + "step": 174300 + }, + { + "epoch": 65.22, + "learning_rate": 6.736842105263157e-05, + "loss": 1.0478, + "step": 174400 + }, + { + "epoch": 65.26, + "learning_rate": 6.710526315789474e-05, + "loss": 1.0446, + "step": 174500 + }, + { + "epoch": 65.3, + "learning_rate": 6.68421052631579e-05, + "loss": 1.0471, + "step": 174600 + }, + { + "epoch": 65.33, + "learning_rate": 6.657894736842106e-05, + "loss": 1.047, + "step": 174700 + }, + { + "epoch": 65.37, + "learning_rate": 6.631578947368421e-05, + "loss": 1.0452, + "step": 174800 + }, + { + "epoch": 65.41, + "learning_rate": 6.605263157894737e-05, + "loss": 1.045, + "step": 174900 + }, + { + "epoch": 65.44, + "learning_rate": 6.578947368421052e-05, + "loss": 1.0466, + "step": 175000 + }, + { + "epoch": 65.48, + "learning_rate": 6.552631578947368e-05, + "loss": 1.0462, + "step": 175100 + }, + { + "epoch": 65.52, + "learning_rate": 6.526315789473684e-05, + "loss": 1.0427, + "step": 175200 + }, + { + "epoch": 65.56, + "learning_rate": 6.500000000000001e-05, + "loss": 1.0449, + "step": 175300 + }, + { + "epoch": 65.59, + "learning_rate": 6.473684210526316e-05, + "loss": 1.0468, + "step": 175400 + }, + { + "epoch": 65.63, + "learning_rate": 6.447368421052632e-05, + "loss": 1.0466, + "step": 175500 + }, + { + "epoch": 65.67, + "learning_rate": 6.421052631578946e-05, + "loss": 1.0443, + "step": 175600 + }, + { + "epoch": 65.71, + "learning_rate": 6.394736842105263e-05, + "loss": 1.0463, + "step": 175700 + }, + { + "epoch": 65.74, + "learning_rate": 6.368421052631579e-05, + "loss": 1.0433, + "step": 175800 + }, + { + "epoch": 65.78, + "learning_rate": 6.342105263157895e-05, + "loss": 1.0441, + "step": 175900 + }, + { + "epoch": 65.82, + "learning_rate": 6.31578947368421e-05, + "loss": 1.0453, + "step": 176000 + }, + { + "epoch": 65.86, + "learning_rate": 6.289473684210527e-05, + "loss": 1.0459, + "step": 176100 + }, + { + "epoch": 65.89, + "learning_rate": 6.263157894736843e-05, + "loss": 1.0447, + "step": 176200 + }, + { + "epoch": 65.93, + "learning_rate": 6.236842105263159e-05, + "loss": 1.0451, + "step": 176300 + }, + { + "epoch": 65.97, + "learning_rate": 6.210526315789474e-05, + "loss": 1.0418, + "step": 176400 + }, + { + "epoch": 66.01, + "learning_rate": 6.18421052631579e-05, + "loss": 1.0505, + "step": 176500 + }, + { + "epoch": 66.04, + "learning_rate": 6.157894736842106e-05, + "loss": 1.0445, + "step": 176600 + }, + { + "epoch": 66.08, + "learning_rate": 6.131578947368421e-05, + "loss": 1.0397, + "step": 176700 + }, + { + "epoch": 66.12, + "learning_rate": 6.105263157894737e-05, + "loss": 1.0408, + "step": 176800 + }, + { + "epoch": 66.16, + "learning_rate": 6.0789473684210525e-05, + "loss": 1.0449, + "step": 176900 + }, + { + "epoch": 66.19, + "learning_rate": 6.052631578947369e-05, + "loss": 1.0453, + "step": 177000 + }, + { + "epoch": 66.23, + "learning_rate": 6.0263157894736845e-05, + "loss": 1.0419, + "step": 177100 + }, + { + "epoch": 66.27, + "learning_rate": 6e-05, + "loss": 1.0371, + "step": 177200 + }, + { + "epoch": 66.31, + "learning_rate": 5.973684210526316e-05, + "loss": 1.041, + "step": 177300 + }, + { + "epoch": 66.34, + "learning_rate": 5.947368421052632e-05, + "loss": 1.0418, + "step": 177400 + }, + { + "epoch": 66.38, + "learning_rate": 5.921052631578947e-05, + "loss": 1.0413, + "step": 177500 + }, + { + "epoch": 66.42, + "learning_rate": 5.8947368421052634e-05, + "loss": 1.0426, + "step": 177600 + }, + { + "epoch": 66.45, + "learning_rate": 5.868421052631579e-05, + "loss": 1.0427, + "step": 177700 + }, + { + "epoch": 66.49, + "learning_rate": 5.8421052631578954e-05, + "loss": 1.042, + "step": 177800 + }, + { + "epoch": 66.53, + "learning_rate": 5.8157894736842104e-05, + "loss": 1.0425, + "step": 177900 + }, + { + "epoch": 66.57, + "learning_rate": 5.789473684210527e-05, + "loss": 1.041, + "step": 178000 + }, + { + "epoch": 66.6, + "learning_rate": 5.7631578947368423e-05, + "loss": 1.042, + "step": 178100 + }, + { + "epoch": 66.64, + "learning_rate": 5.736842105263158e-05, + "loss": 1.0379, + "step": 178200 + }, + { + "epoch": 66.68, + "learning_rate": 5.7105263157894736e-05, + "loss": 1.0401, + "step": 178300 + }, + { + "epoch": 66.72, + "learning_rate": 5.68421052631579e-05, + "loss": 1.0431, + "step": 178400 + }, + { + "epoch": 66.75, + "learning_rate": 5.6578947368421056e-05, + "loss": 1.0394, + "step": 178500 + }, + { + "epoch": 66.79, + "learning_rate": 5.6315789473684206e-05, + "loss": 1.0426, + "step": 178600 + }, + { + "epoch": 66.83, + "learning_rate": 5.605263157894737e-05, + "loss": 1.0433, + "step": 178700 + }, + { + "epoch": 66.87, + "learning_rate": 5.5789473684210526e-05, + "loss": 1.0434, + "step": 178800 + }, + { + "epoch": 66.9, + "learning_rate": 5.552631578947369e-05, + "loss": 1.0388, + "step": 178900 + }, + { + "epoch": 66.94, + "learning_rate": 5.526315789473684e-05, + "loss": 1.041, + "step": 179000 + }, + { + "epoch": 66.98, + "learning_rate": 5.5e-05, + "loss": 1.0414, + "step": 179100 + }, + { + "epoch": 67.02, + "learning_rate": 5.473684210526316e-05, + "loss": 1.0458, + "step": 179200 + }, + { + "epoch": 67.05, + "learning_rate": 5.4473684210526315e-05, + "loss": 1.0394, + "step": 179300 + }, + { + "epoch": 67.09, + "learning_rate": 5.421052631578947e-05, + "loss": 1.0395, + "step": 179400 + }, + { + "epoch": 67.13, + "learning_rate": 5.3947368421052635e-05, + "loss": 1.0368, + "step": 179500 + }, + { + "epoch": 67.17, + "learning_rate": 5.368421052631579e-05, + "loss": 1.0392, + "step": 179600 + }, + { + "epoch": 67.2, + "learning_rate": 5.342105263157895e-05, + "loss": 1.0382, + "step": 179700 + }, + { + "epoch": 67.24, + "learning_rate": 5.3157894736842104e-05, + "loss": 1.0377, + "step": 179800 + }, + { + "epoch": 67.28, + "learning_rate": 5.289473684210527e-05, + "loss": 1.0397, + "step": 179900 + }, + { + "epoch": 67.31, + "learning_rate": 5.263157894736842e-05, + "loss": 1.0356, + "step": 180000 + }, + { + "epoch": 67.35, + "learning_rate": 5.236842105263158e-05, + "loss": 1.0383, + "step": 180100 + }, + { + "epoch": 67.39, + "learning_rate": 5.210526315789474e-05, + "loss": 1.0362, + "step": 180200 + }, + { + "epoch": 67.43, + "learning_rate": 5.18421052631579e-05, + "loss": 1.0388, + "step": 180300 + }, + { + "epoch": 67.46, + "learning_rate": 5.157894736842105e-05, + "loss": 1.039, + "step": 180400 + }, + { + "epoch": 67.5, + "learning_rate": 5.131578947368421e-05, + "loss": 1.0396, + "step": 180500 + }, + { + "epoch": 67.54, + "learning_rate": 5.105263157894737e-05, + "loss": 1.0379, + "step": 180600 + }, + { + "epoch": 67.58, + "learning_rate": 5.0789473684210526e-05, + "loss": 1.0353, + "step": 180700 + }, + { + "epoch": 67.61, + "learning_rate": 5.052631578947368e-05, + "loss": 1.039, + "step": 180800 + }, + { + "epoch": 67.65, + "learning_rate": 5.0263157894736846e-05, + "loss": 1.0369, + "step": 180900 + }, + { + "epoch": 67.69, + "learning_rate": 5e-05, + "loss": 1.0371, + "step": 181000 + }, + { + "epoch": 67.73, + "learning_rate": 4.973684210526316e-05, + "loss": 1.0387, + "step": 181100 + }, + { + "epoch": 67.76, + "learning_rate": 4.9473684210526315e-05, + "loss": 1.0402, + "step": 181200 + }, + { + "epoch": 67.8, + "learning_rate": 4.921052631578948e-05, + "loss": 1.0385, + "step": 181300 + }, + { + "epoch": 67.84, + "learning_rate": 4.894736842105263e-05, + "loss": 1.0349, + "step": 181400 + }, + { + "epoch": 67.88, + "learning_rate": 4.868421052631579e-05, + "loss": 1.0362, + "step": 181500 + }, + { + "epoch": 67.91, + "learning_rate": 4.842105263157895e-05, + "loss": 1.0398, + "step": 181600 + }, + { + "epoch": 67.95, + "learning_rate": 4.815789473684211e-05, + "loss": 1.0386, + "step": 181700 + }, + { + "epoch": 67.99, + "learning_rate": 4.789473684210526e-05, + "loss": 1.0365, + "step": 181800 + }, + { + "epoch": 68.03, + "learning_rate": 4.7631578947368424e-05, + "loss": 1.037, + "step": 181900 + }, + { + "epoch": 68.06, + "learning_rate": 4.736842105263158e-05, + "loss": 1.0344, + "step": 182000 + }, + { + "epoch": 68.1, + "learning_rate": 4.710526315789474e-05, + "loss": 1.0362, + "step": 182100 + }, + { + "epoch": 68.14, + "learning_rate": 4.6842105263157894e-05, + "loss": 1.0321, + "step": 182200 + }, + { + "epoch": 68.17, + "learning_rate": 4.657894736842106e-05, + "loss": 1.0373, + "step": 182300 + }, + { + "epoch": 68.21, + "learning_rate": 4.6315789473684214e-05, + "loss": 1.0349, + "step": 182400 + }, + { + "epoch": 68.25, + "learning_rate": 4.605263157894737e-05, + "loss": 1.0335, + "step": 182500 + }, + { + "epoch": 68.29, + "learning_rate": 4.5789473684210527e-05, + "loss": 1.0359, + "step": 182600 + }, + { + "epoch": 68.32, + "learning_rate": 4.552631578947369e-05, + "loss": 1.0341, + "step": 182700 + }, + { + "epoch": 68.36, + "learning_rate": 4.5263157894736846e-05, + "loss": 1.0377, + "step": 182800 + }, + { + "epoch": 68.4, + "learning_rate": 4.4999999999999996e-05, + "loss": 1.038, + "step": 182900 + }, + { + "epoch": 68.44, + "learning_rate": 4.473684210526316e-05, + "loss": 1.0351, + "step": 183000 + }, + { + "epoch": 68.47, + "learning_rate": 4.4473684210526316e-05, + "loss": 1.0358, + "step": 183100 + }, + { + "epoch": 68.51, + "learning_rate": 4.421052631578947e-05, + "loss": 1.0336, + "step": 183200 + }, + { + "epoch": 68.55, + "learning_rate": 4.394736842105263e-05, + "loss": 1.0358, + "step": 183300 + }, + { + "epoch": 68.59, + "learning_rate": 4.368421052631579e-05, + "loss": 1.0336, + "step": 183400 + }, + { + "epoch": 68.62, + "learning_rate": 4.342105263157895e-05, + "loss": 1.0368, + "step": 183500 + }, + { + "epoch": 68.66, + "learning_rate": 4.3157894736842105e-05, + "loss": 1.0374, + "step": 183600 + }, + { + "epoch": 68.7, + "learning_rate": 4.289473684210526e-05, + "loss": 1.0363, + "step": 183700 + }, + { + "epoch": 68.74, + "learning_rate": 4.2631578947368425e-05, + "loss": 1.0351, + "step": 183800 + }, + { + "epoch": 68.77, + "learning_rate": 4.2368421052631575e-05, + "loss": 1.0325, + "step": 183900 + }, + { + "epoch": 68.81, + "learning_rate": 4.210526315789474e-05, + "loss": 1.0359, + "step": 184000 + }, + { + "epoch": 68.85, + "learning_rate": 4.1842105263157894e-05, + "loss": 1.0327, + "step": 184100 + }, + { + "epoch": 68.89, + "learning_rate": 4.157894736842106e-05, + "loss": 1.0319, + "step": 184200 + }, + { + "epoch": 68.92, + "learning_rate": 4.131578947368421e-05, + "loss": 1.0332, + "step": 184300 + }, + { + "epoch": 68.96, + "learning_rate": 4.105263157894737e-05, + "loss": 1.0341, + "step": 184400 + }, + { + "epoch": 69.0, + "learning_rate": 4.078947368421053e-05, + "loss": 1.0318, + "step": 184500 + }, + { + "epoch": 69.04, + "learning_rate": 4.0526315789473684e-05, + "loss": 1.0382, + "step": 184600 + }, + { + "epoch": 69.07, + "learning_rate": 4.026315789473684e-05, + "loss": 1.0329, + "step": 184700 + }, + { + "epoch": 69.11, + "learning_rate": 4e-05, + "loss": 1.0322, + "step": 184800 + }, + { + "epoch": 69.15, + "learning_rate": 3.973684210526316e-05, + "loss": 1.0318, + "step": 184900 + }, + { + "epoch": 69.18, + "learning_rate": 3.9473684210526316e-05, + "loss": 1.0319, + "step": 185000 + }, + { + "epoch": 69.22, + "learning_rate": 3.921052631578947e-05, + "loss": 1.032, + "step": 185100 + }, + { + "epoch": 69.26, + "learning_rate": 3.8947368421052636e-05, + "loss": 1.0304, + "step": 185200 + }, + { + "epoch": 69.3, + "learning_rate": 3.8684210526315786e-05, + "loss": 1.0341, + "step": 185300 + }, + { + "epoch": 69.33, + "learning_rate": 3.842105263157895e-05, + "loss": 1.034, + "step": 185400 + }, + { + "epoch": 69.37, + "learning_rate": 3.8157894736842105e-05, + "loss": 1.0343, + "step": 185500 + }, + { + "epoch": 69.41, + "learning_rate": 3.789473684210527e-05, + "loss": 1.0309, + "step": 185600 + }, + { + "epoch": 69.45, + "learning_rate": 3.763157894736842e-05, + "loss": 1.0292, + "step": 185700 + }, + { + "epoch": 69.48, + "learning_rate": 3.736842105263158e-05, + "loss": 1.0294, + "step": 185800 + }, + { + "epoch": 69.52, + "learning_rate": 3.710526315789474e-05, + "loss": 1.0259, + "step": 185900 + }, + { + "epoch": 69.56, + "learning_rate": 3.6842105263157895e-05, + "loss": 1.0318, + "step": 186000 + }, + { + "epoch": 69.6, + "learning_rate": 3.657894736842105e-05, + "loss": 1.0326, + "step": 186100 + }, + { + "epoch": 69.63, + "learning_rate": 3.6315789473684214e-05, + "loss": 1.031, + "step": 186200 + }, + { + "epoch": 69.67, + "learning_rate": 3.605263157894737e-05, + "loss": 1.0294, + "step": 186300 + }, + { + "epoch": 69.71, + "learning_rate": 3.578947368421053e-05, + "loss": 1.0303, + "step": 186400 + }, + { + "epoch": 69.75, + "learning_rate": 3.5526315789473684e-05, + "loss": 1.0321, + "step": 186500 + }, + { + "epoch": 69.78, + "learning_rate": 3.526315789473685e-05, + "loss": 1.0296, + "step": 186600 + }, + { + "epoch": 69.82, + "learning_rate": 3.5000000000000004e-05, + "loss": 1.0322, + "step": 186700 + }, + { + "epoch": 69.86, + "learning_rate": 3.473684210526316e-05, + "loss": 1.0286, + "step": 186800 + }, + { + "epoch": 69.9, + "learning_rate": 3.447368421052632e-05, + "loss": 1.0307, + "step": 186900 + }, + { + "epoch": 69.93, + "learning_rate": 3.421052631578948e-05, + "loss": 1.0308, + "step": 187000 + }, + { + "epoch": 69.97, + "learning_rate": 3.394736842105263e-05, + "loss": 1.0319, + "step": 187100 + }, + { + "epoch": 70.01, + "learning_rate": 3.3684210526315786e-05, + "loss": 1.0335, + "step": 187200 + }, + { + "epoch": 70.04, + "learning_rate": 3.342105263157895e-05, + "loss": 1.0306, + "step": 187300 + }, + { + "epoch": 70.08, + "learning_rate": 3.3157894736842106e-05, + "loss": 1.0271, + "step": 187400 + }, + { + "epoch": 70.12, + "learning_rate": 3.289473684210526e-05, + "loss": 1.0281, + "step": 187500 + }, + { + "epoch": 70.16, + "learning_rate": 3.263157894736842e-05, + "loss": 1.0296, + "step": 187600 + }, + { + "epoch": 70.19, + "learning_rate": 3.236842105263158e-05, + "loss": 1.0279, + "step": 187700 + }, + { + "epoch": 70.23, + "learning_rate": 3.210526315789473e-05, + "loss": 1.029, + "step": 187800 + }, + { + "epoch": 70.27, + "learning_rate": 3.1842105263157895e-05, + "loss": 1.0288, + "step": 187900 + }, + { + "epoch": 70.31, + "learning_rate": 3.157894736842105e-05, + "loss": 1.0283, + "step": 188000 + }, + { + "epoch": 70.34, + "learning_rate": 3.1315789473684215e-05, + "loss": 1.0284, + "step": 188100 + }, + { + "epoch": 70.38, + "learning_rate": 3.105263157894737e-05, + "loss": 1.0245, + "step": 188200 + }, + { + "epoch": 70.42, + "learning_rate": 3.078947368421053e-05, + "loss": 1.0298, + "step": 188300 + }, + { + "epoch": 70.46, + "learning_rate": 3.0526315789473684e-05, + "loss": 1.0267, + "step": 188400 + }, + { + "epoch": 70.49, + "learning_rate": 3.0263157894736844e-05, + "loss": 1.0256, + "step": 188500 + }, + { + "epoch": 70.53, + "learning_rate": 3e-05, + "loss": 1.0291, + "step": 188600 + }, + { + "epoch": 70.57, + "learning_rate": 2.973684210526316e-05, + "loss": 1.0269, + "step": 188700 + }, + { + "epoch": 70.61, + "learning_rate": 2.9473684210526317e-05, + "loss": 1.0276, + "step": 188800 + }, + { + "epoch": 70.64, + "learning_rate": 2.9210526315789477e-05, + "loss": 1.03, + "step": 188900 + }, + { + "epoch": 70.68, + "learning_rate": 2.8947368421052634e-05, + "loss": 1.0286, + "step": 189000 + }, + { + "epoch": 70.72, + "learning_rate": 2.868421052631579e-05, + "loss": 1.0277, + "step": 189100 + }, + { + "epoch": 70.76, + "learning_rate": 2.842105263157895e-05, + "loss": 1.0288, + "step": 189200 + }, + { + "epoch": 70.79, + "learning_rate": 2.8157894736842103e-05, + "loss": 1.028, + "step": 189300 + }, + { + "epoch": 70.83, + "learning_rate": 2.7894736842105263e-05, + "loss": 1.0264, + "step": 189400 + }, + { + "epoch": 70.87, + "learning_rate": 2.763157894736842e-05, + "loss": 1.0263, + "step": 189500 + }, + { + "epoch": 70.9, + "learning_rate": 2.736842105263158e-05, + "loss": 1.0269, + "step": 189600 + }, + { + "epoch": 70.94, + "learning_rate": 2.7105263157894736e-05, + "loss": 1.0272, + "step": 189700 + }, + { + "epoch": 70.98, + "learning_rate": 2.6842105263157896e-05, + "loss": 1.0309, + "step": 189800 + }, + { + "epoch": 71.02, + "learning_rate": 2.6578947368421052e-05, + "loss": 1.0341, + "step": 189900 + }, + { + "epoch": 71.05, + "learning_rate": 2.631578947368421e-05, + "loss": 1.0242, + "step": 190000 + }, + { + "epoch": 71.09, + "learning_rate": 2.605263157894737e-05, + "loss": 1.0255, + "step": 190100 + }, + { + "epoch": 71.13, + "learning_rate": 2.5789473684210525e-05, + "loss": 1.021, + "step": 190200 + }, + { + "epoch": 71.17, + "learning_rate": 2.5526315789473685e-05, + "loss": 1.0281, + "step": 190300 + }, + { + "epoch": 71.2, + "learning_rate": 2.526315789473684e-05, + "loss": 1.0254, + "step": 190400 + }, + { + "epoch": 71.24, + "learning_rate": 2.5e-05, + "loss": 1.0225, + "step": 190500 + }, + { + "epoch": 71.28, + "learning_rate": 2.4736842105263158e-05, + "loss": 1.0265, + "step": 190600 + }, + { + "epoch": 71.32, + "learning_rate": 2.4473684210526314e-05, + "loss": 1.0269, + "step": 190700 + }, + { + "epoch": 71.35, + "learning_rate": 2.4210526315789474e-05, + "loss": 1.0285, + "step": 190800 + }, + { + "epoch": 71.39, + "learning_rate": 2.394736842105263e-05, + "loss": 1.0241, + "step": 190900 + }, + { + "epoch": 71.43, + "learning_rate": 2.368421052631579e-05, + "loss": 1.0242, + "step": 191000 + }, + { + "epoch": 71.47, + "learning_rate": 2.3421052631578947e-05, + "loss": 1.0243, + "step": 191100 + }, + { + "epoch": 71.5, + "learning_rate": 2.3157894736842107e-05, + "loss": 1.0261, + "step": 191200 + }, + { + "epoch": 71.54, + "learning_rate": 2.2894736842105263e-05, + "loss": 1.0289, + "step": 191300 + }, + { + "epoch": 71.58, + "learning_rate": 2.2631578947368423e-05, + "loss": 1.0237, + "step": 191400 + }, + { + "epoch": 71.62, + "learning_rate": 2.236842105263158e-05, + "loss": 1.0262, + "step": 191500 + }, + { + "epoch": 71.65, + "learning_rate": 2.2105263157894736e-05, + "loss": 1.0249, + "step": 191600 + }, + { + "epoch": 71.69, + "learning_rate": 2.1842105263157896e-05, + "loss": 1.0255, + "step": 191700 + }, + { + "epoch": 71.73, + "learning_rate": 2.1578947368421053e-05, + "loss": 1.0242, + "step": 191800 + }, + { + "epoch": 71.77, + "learning_rate": 2.1315789473684212e-05, + "loss": 1.0227, + "step": 191900 + }, + { + "epoch": 71.8, + "learning_rate": 2.105263157894737e-05, + "loss": 1.0239, + "step": 192000 + }, + { + "epoch": 71.84, + "learning_rate": 2.078947368421053e-05, + "loss": 1.0227, + "step": 192100 + }, + { + "epoch": 71.88, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.0271, + "step": 192200 + }, + { + "epoch": 71.91, + "learning_rate": 2.0263157894736842e-05, + "loss": 1.0233, + "step": 192300 + }, + { + "epoch": 71.95, + "learning_rate": 2e-05, + "loss": 1.0243, + "step": 192400 + }, + { + "epoch": 71.99, + "learning_rate": 1.9736842105263158e-05, + "loss": 1.0216, + "step": 192500 + }, + { + "epoch": 72.03, + "learning_rate": 1.9473684210526318e-05, + "loss": 1.0283, + "step": 192600 + }, + { + "epoch": 72.06, + "learning_rate": 1.9210526315789474e-05, + "loss": 1.0226, + "step": 192700 + }, + { + "epoch": 72.1, + "learning_rate": 1.8947368421052634e-05, + "loss": 1.0209, + "step": 192800 + }, + { + "epoch": 72.14, + "learning_rate": 1.868421052631579e-05, + "loss": 1.0217, + "step": 192900 + }, + { + "epoch": 72.18, + "learning_rate": 1.8421052631578947e-05, + "loss": 1.0206, + "step": 193000 + }, + { + "epoch": 72.21, + "learning_rate": 1.8157894736842107e-05, + "loss": 1.0235, + "step": 193100 + }, + { + "epoch": 72.25, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.024, + "step": 193200 + }, + { + "epoch": 72.29, + "learning_rate": 1.7631578947368424e-05, + "loss": 1.0225, + "step": 193300 + }, + { + "epoch": 72.33, + "learning_rate": 1.736842105263158e-05, + "loss": 1.0217, + "step": 193400 + }, + { + "epoch": 72.36, + "learning_rate": 1.710526315789474e-05, + "loss": 1.0238, + "step": 193500 + }, + { + "epoch": 72.4, + "learning_rate": 1.6842105263157893e-05, + "loss": 1.0222, + "step": 193600 + }, + { + "epoch": 72.44, + "learning_rate": 1.6578947368421053e-05, + "loss": 1.0198, + "step": 193700 + }, + { + "epoch": 72.48, + "learning_rate": 1.631578947368421e-05, + "loss": 1.0228, + "step": 193800 + }, + { + "epoch": 72.51, + "learning_rate": 1.6052631578947366e-05, + "loss": 1.0217, + "step": 193900 + }, + { + "epoch": 72.55, + "learning_rate": 1.5789473684210526e-05, + "loss": 1.021, + "step": 194000 + }, + { + "epoch": 72.59, + "learning_rate": 1.5526315789473686e-05, + "loss": 1.0239, + "step": 194100 + }, + { + "epoch": 72.63, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.0197, + "step": 194200 + }, + { + "epoch": 72.66, + "learning_rate": 1.5e-05, + "loss": 1.0192, + "step": 194300 + }, + { + "epoch": 72.7, + "learning_rate": 1.4736842105263159e-05, + "loss": 1.025, + "step": 194400 + }, + { + "epoch": 72.74, + "learning_rate": 1.4473684210526317e-05, + "loss": 1.023, + "step": 194500 + }, + { + "epoch": 72.77, + "learning_rate": 1.4210526315789475e-05, + "loss": 1.018, + "step": 194600 + }, + { + "epoch": 72.81, + "learning_rate": 1.3947368421052631e-05, + "loss": 1.0233, + "step": 194700 + }, + { + "epoch": 72.85, + "learning_rate": 1.368421052631579e-05, + "loss": 1.0205, + "step": 194800 + }, + { + "epoch": 72.89, + "learning_rate": 1.3421052631578948e-05, + "loss": 1.0253, + "step": 194900 + }, + { + "epoch": 72.92, + "learning_rate": 1.3157894736842104e-05, + "loss": 1.0193, + "step": 195000 + }, + { + "epoch": 72.96, + "learning_rate": 1.2894736842105262e-05, + "loss": 1.0221, + "step": 195100 + }, + { + "epoch": 73.0, + "learning_rate": 1.263157894736842e-05, + "loss": 1.0221, + "step": 195200 + }, + { + "epoch": 73.04, + "learning_rate": 1.2368421052631579e-05, + "loss": 1.0233, + "step": 195300 + }, + { + "epoch": 73.07, + "learning_rate": 1.2105263157894737e-05, + "loss": 1.0181, + "step": 195400 + }, + { + "epoch": 73.11, + "learning_rate": 1.1842105263157895e-05, + "loss": 1.0205, + "step": 195500 + }, + { + "epoch": 73.15, + "learning_rate": 1.1578947368421053e-05, + "loss": 1.0207, + "step": 195600 + }, + { + "epoch": 73.19, + "learning_rate": 1.1315789473684212e-05, + "loss": 1.0226, + "step": 195700 + }, + { + "epoch": 73.22, + "learning_rate": 1.1052631578947368e-05, + "loss": 1.0195, + "step": 195800 + }, + { + "epoch": 73.26, + "learning_rate": 1.0789473684210526e-05, + "loss": 1.0189, + "step": 195900 + }, + { + "epoch": 73.3, + "learning_rate": 1.0526315789473684e-05, + "loss": 1.021, + "step": 196000 + }, + { + "epoch": 73.34, + "learning_rate": 1.0263157894736843e-05, + "loss": 1.0205, + "step": 196100 + }, + { + "epoch": 73.37, + "learning_rate": 1e-05, + "loss": 1.0199, + "step": 196200 + }, + { + "epoch": 73.41, + "learning_rate": 9.736842105263159e-06, + "loss": 1.0202, + "step": 196300 + }, + { + "epoch": 73.45, + "learning_rate": 9.473684210526317e-06, + "loss": 1.0172, + "step": 196400 + }, + { + "epoch": 73.49, + "learning_rate": 9.210526315789474e-06, + "loss": 1.0191, + "step": 196500 + }, + { + "epoch": 73.52, + "learning_rate": 8.947368421052632e-06, + "loss": 1.0216, + "step": 196600 + }, + { + "epoch": 73.56, + "learning_rate": 8.68421052631579e-06, + "loss": 1.0197, + "step": 196700 + }, + { + "epoch": 73.6, + "learning_rate": 8.421052631578947e-06, + "loss": 1.0202, + "step": 196800 + }, + { + "epoch": 73.63, + "learning_rate": 8.157894736842105e-06, + "loss": 1.0219, + "step": 196900 + }, + { + "epoch": 73.67, + "learning_rate": 7.894736842105263e-06, + "loss": 1.0178, + "step": 197000 + }, + { + "epoch": 73.71, + "learning_rate": 7.631578947368421e-06, + "loss": 1.023, + "step": 197100 + }, + { + "epoch": 73.75, + "learning_rate": 7.368421052631579e-06, + "loss": 1.0181, + "step": 197200 + }, + { + "epoch": 73.78, + "learning_rate": 7.1052631578947375e-06, + "loss": 1.0208, + "step": 197300 + }, + { + "epoch": 73.82, + "learning_rate": 6.842105263157895e-06, + "loss": 1.0191, + "step": 197400 + }, + { + "epoch": 73.86, + "learning_rate": 6.578947368421052e-06, + "loss": 1.0193, + "step": 197500 + }, + { + "epoch": 73.9, + "learning_rate": 6.31578947368421e-06, + "loss": 1.0187, + "step": 197600 + }, + { + "epoch": 73.93, + "learning_rate": 6.0526315789473685e-06, + "loss": 1.0192, + "step": 197700 + }, + { + "epoch": 73.97, + "learning_rate": 5.789473684210527e-06, + "loss": 1.02, + "step": 197800 + }, + { + "epoch": 74.01, + "learning_rate": 5.526315789473684e-06, + "loss": 1.0228, + "step": 197900 + }, + { + "epoch": 74.05, + "learning_rate": 5.263157894736842e-06, + "loss": 1.0162, + "step": 198000 + }, + { + "epoch": 74.08, + "learning_rate": 5e-06, + "loss": 1.0187, + "step": 198100 + }, + { + "epoch": 74.12, + "learning_rate": 4.736842105263159e-06, + "loss": 1.0157, + "step": 198200 + }, + { + "epoch": 74.16, + "learning_rate": 4.473684210526316e-06, + "loss": 1.0182, + "step": 198300 + }, + { + "epoch": 74.2, + "learning_rate": 4.210526315789473e-06, + "loss": 1.0197, + "step": 198400 + }, + { + "epoch": 74.23, + "learning_rate": 3.9473684210526315e-06, + "loss": 1.0176, + "step": 198500 + }, + { + "epoch": 74.27, + "learning_rate": 3.6842105263157896e-06, + "loss": 1.0183, + "step": 198600 + }, + { + "epoch": 74.31, + "learning_rate": 3.4210526315789474e-06, + "loss": 1.019, + "step": 198700 + }, + { + "epoch": 74.35, + "learning_rate": 3.157894736842105e-06, + "loss": 1.0174, + "step": 198800 + }, + { + "epoch": 74.38, + "learning_rate": 2.8947368421052634e-06, + "loss": 1.0188, + "step": 198900 + }, + { + "epoch": 74.42, + "learning_rate": 2.631578947368421e-06, + "loss": 1.0175, + "step": 199000 + }, + { + "epoch": 74.46, + "learning_rate": 2.3684210526315793e-06, + "loss": 1.0187, + "step": 199100 + }, + { + "epoch": 74.5, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.0183, + "step": 199200 + }, + { + "epoch": 74.53, + "learning_rate": 1.8421052631578948e-06, + "loss": 1.0195, + "step": 199300 + }, + { + "epoch": 74.57, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.0219, + "step": 199400 + }, + { + "epoch": 74.61, + "learning_rate": 1.3157894736842106e-06, + "loss": 1.0212, + "step": 199500 + }, + { + "epoch": 74.64, + "learning_rate": 1.0526315789473683e-06, + "loss": 1.0155, + "step": 199600 + }, + { + "epoch": 74.68, + "learning_rate": 7.894736842105263e-07, + "loss": 1.0178, + "step": 199700 + }, + { + "epoch": 74.72, + "learning_rate": 5.263157894736842e-07, + "loss": 1.0159, + "step": 199800 + }, + { + "epoch": 74.76, + "learning_rate": 2.631578947368421e-07, + "loss": 1.0171, + "step": 199900 + }, + { + "epoch": 74.79, + "learning_rate": 0.0, + "loss": 1.0182, + "step": 200000 + }, + { + "epoch": 74.79, + "step": 200001, + "total_flos": 5.392567541842457e+19, + "train_loss": 5.0799270787734195e-06, + "train_runtime": 2733.1068, + "train_samples_per_second": 149866.079, + "train_steps_per_second": 73.177 + } + ], + "max_steps": 200000, + "num_train_epochs": 75, + "total_flos": 5.392567541842457e+19, + "trial_name": null, + "trial_params": null +}