diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5716 @@ +{ + "best_metric": 0.4023992419242859, + "best_model_checkpoint": "/scratch/s3545881/dumped/translation/byt5/3124577/checkpoint-6600", + "epoch": 6.7736185383244205, + "global_step": 7600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 9.97, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 9.212, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 7.5e-06, + "loss": 7.8584, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 1e-05, + "loss": 6.3664, + "step": 40 + }, + { + "epoch": 0.04, + "learning_rate": 1.25e-05, + "loss": 4.6717, + "step": 50 + }, + { + "epoch": 0.05, + "learning_rate": 1.5e-05, + "loss": 3.7212, + "step": 60 + }, + { + "epoch": 0.06, + "learning_rate": 1.7500000000000002e-05, + "loss": 3.113, + "step": 70 + }, + { + "epoch": 0.07, + "learning_rate": 2e-05, + "loss": 2.6835, + "step": 80 + }, + { + "epoch": 0.08, + "learning_rate": 2.2499999999999998e-05, + "loss": 2.4711, + "step": 90 + }, + { + "epoch": 0.09, + "learning_rate": 2.5e-05, + "loss": 2.3731, + "step": 100 + }, + { + "epoch": 0.1, + "learning_rate": 2.75e-05, + "loss": 2.1419, + "step": 110 + }, + { + "epoch": 0.11, + "learning_rate": 3e-05, + "loss": 2.037, + "step": 120 + }, + { + "epoch": 0.12, + "learning_rate": 3.2500000000000004e-05, + "loss": 1.9651, + "step": 130 + }, + { + "epoch": 0.12, + "learning_rate": 3.5000000000000004e-05, + "loss": 1.8411, + "step": 140 + }, + { + "epoch": 0.13, + "learning_rate": 3.75e-05, + "loss": 1.7802, + "step": 150 + }, + { + "epoch": 0.14, + "learning_rate": 4e-05, + "loss": 1.7039, + "step": 160 + }, + { + "epoch": 0.15, + "learning_rate": 4.25e-05, + "loss": 1.6342, + "step": 170 + }, + { + "epoch": 0.16, + "learning_rate": 4.4999999999999996e-05, + "loss": 1.5809, + "step": 180 + }, + { + "epoch": 0.17, + "learning_rate": 4.75e-05, + "loss": 1.5851, + "step": 190 + }, + { + "epoch": 0.18, + "learning_rate": 5e-05, + "loss": 1.4769, + "step": 200 + }, + { + "epoch": 0.18, + "eval_bp": 1.0, + "eval_counts": [ + 17716, + 9437, + 5749, + 3729 + ], + "eval_loss": 0.7528842687606812, + "eval_precisions": [ + 5.2827443075418365, + 2.843172108773854, + 1.7501240521049284, + 1.1471304818624797 + ], + "eval_ref_len": 41379, + "eval_runtime": 1613.712, + "eval_samples_per_second": 2.146, + "eval_score": 2.3433442773872275, + "eval_steps_per_second": 2.146, + "eval_sys_len": 335356, + "eval_totals": [ + 335356, + 331918, + 328491, + 325072 + ], + "step": 200 + }, + { + "epoch": 0.19, + "learning_rate": 5.25e-05, + "loss": 1.5189, + "step": 210 + }, + { + "epoch": 0.2, + "learning_rate": 5.5e-05, + "loss": 1.442, + "step": 220 + }, + { + "epoch": 0.2, + "learning_rate": 5.75e-05, + "loss": 1.4614, + "step": 230 + }, + { + "epoch": 0.21, + "learning_rate": 6e-05, + "loss": 1.372, + "step": 240 + }, + { + "epoch": 0.22, + "learning_rate": 6.25e-05, + "loss": 1.382, + "step": 250 + }, + { + "epoch": 0.23, + "learning_rate": 6.500000000000001e-05, + "loss": 1.3639, + "step": 260 + }, + { + "epoch": 0.24, + "learning_rate": 6.75e-05, + "loss": 1.418, + "step": 270 + }, + { + "epoch": 0.25, + "learning_rate": 7.000000000000001e-05, + "loss": 1.3478, + "step": 280 + }, + { + "epoch": 0.26, + "learning_rate": 7.25e-05, + "loss": 1.3029, + "step": 290 + }, + { + "epoch": 0.27, + "learning_rate": 7.5e-05, + "loss": 1.2836, + "step": 300 + }, + { + "epoch": 0.28, + "learning_rate": 7.75e-05, + "loss": 1.2925, + "step": 310 + }, + { + "epoch": 0.29, + "learning_rate": 8e-05, + "loss": 1.278, + "step": 320 + }, + { + "epoch": 0.29, + "learning_rate": 8.25e-05, + "loss": 1.2818, + "step": 330 + }, + { + "epoch": 0.3, + "learning_rate": 8.5e-05, + "loss": 1.2765, + "step": 340 + }, + { + "epoch": 0.31, + "learning_rate": 8.75e-05, + "loss": 1.2529, + "step": 350 + }, + { + "epoch": 0.32, + "learning_rate": 8.999999999999999e-05, + "loss": 1.2395, + "step": 360 + }, + { + "epoch": 0.33, + "learning_rate": 9.25e-05, + "loss": 1.2002, + "step": 370 + }, + { + "epoch": 0.34, + "learning_rate": 9.5e-05, + "loss": 1.2723, + "step": 380 + }, + { + "epoch": 0.35, + "learning_rate": 9.750000000000001e-05, + "loss": 1.2499, + "step": 390 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001, + "loss": 1.2356, + "step": 400 + }, + { + "epoch": 0.36, + "eval_bp": 1.0, + "eval_counts": [ + 23700, + 14605, + 9895, + 7121 + ], + "eval_loss": 0.602267324924469, + "eval_precisions": [ + 34.1729990050899, + 22.165394363418372, + 15.848482421718588, + 12.06335761477215 + ], + "eval_ref_len": 41379, + "eval_runtime": 852.9342, + "eval_samples_per_second": 4.06, + "eval_score": 19.50758960232416, + "eval_steps_per_second": 4.06, + "eval_sys_len": 69353, + "eval_totals": [ + 69353, + 65891, + 62435, + 59030 + ], + "step": 400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001025, + "loss": 1.2241, + "step": 410 + }, + { + "epoch": 0.37, + "learning_rate": 0.000105, + "loss": 1.209, + "step": 420 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001075, + "loss": 1.1935, + "step": 430 + }, + { + "epoch": 0.39, + "learning_rate": 0.00011, + "loss": 1.1674, + "step": 440 + }, + { + "epoch": 0.4, + "learning_rate": 0.00011250000000000001, + "loss": 1.2343, + "step": 450 + }, + { + "epoch": 0.41, + "learning_rate": 0.000115, + "loss": 1.1924, + "step": 460 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001175, + "loss": 1.1366, + "step": 470 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012, + "loss": 1.1779, + "step": 480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001225, + "loss": 1.1417, + "step": 490 + }, + { + "epoch": 0.45, + "learning_rate": 0.000125, + "loss": 1.1594, + "step": 500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001275, + "loss": 1.1466, + "step": 510 + }, + { + "epoch": 0.46, + "learning_rate": 0.00013000000000000002, + "loss": 1.1283, + "step": 520 + }, + { + "epoch": 0.47, + "learning_rate": 0.00013250000000000002, + "loss": 1.0987, + "step": 530 + }, + { + "epoch": 0.48, + "learning_rate": 0.000135, + "loss": 1.1215, + "step": 540 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001375, + "loss": 1.1662, + "step": 550 + }, + { + "epoch": 0.5, + "learning_rate": 0.00014000000000000001, + "loss": 1.1227, + "step": 560 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001425, + "loss": 1.1494, + "step": 570 + }, + { + "epoch": 0.52, + "learning_rate": 0.000145, + "loss": 1.1005, + "step": 580 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001475, + "loss": 1.1506, + "step": 590 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015, + "loss": 1.0997, + "step": 600 + }, + { + "epoch": 0.53, + "eval_bp": 1.0, + "eval_counts": [ + 25462, + 16355, + 11456, + 8410 + ], + "eval_loss": 0.5470613241195679, + "eval_precisions": [ + 46.03091385700081, + 31.541695595155442, + 23.670888691447818, + 18.699693156045715 + ], + "eval_ref_len": 41379, + "eval_runtime": 585.2022, + "eval_samples_per_second": 5.918, + "eval_score": 28.313653908246078, + "eval_steps_per_second": 5.918, + "eval_sys_len": 55315, + "eval_totals": [ + 55315, + 51852, + 48397, + 44974 + ], + "step": 600 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001525, + "loss": 1.1135, + "step": 610 + }, + { + "epoch": 0.55, + "learning_rate": 0.000155, + "loss": 1.1367, + "step": 620 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001575, + "loss": 1.0833, + "step": 630 + }, + { + "epoch": 0.57, + "learning_rate": 0.00016, + "loss": 1.104, + "step": 640 + }, + { + "epoch": 0.58, + "learning_rate": 0.00016250000000000002, + "loss": 1.0879, + "step": 650 + }, + { + "epoch": 0.59, + "learning_rate": 0.000165, + "loss": 1.0817, + "step": 660 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001675, + "loss": 1.0712, + "step": 670 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017, + "loss": 1.1092, + "step": 680 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001725, + "loss": 1.0784, + "step": 690 + }, + { + "epoch": 0.62, + "learning_rate": 0.000175, + "loss": 1.0826, + "step": 700 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001775, + "loss": 1.0979, + "step": 710 + }, + { + "epoch": 0.64, + "learning_rate": 0.00017999999999999998, + "loss": 1.0701, + "step": 720 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001825, + "loss": 1.0797, + "step": 730 + }, + { + "epoch": 0.66, + "learning_rate": 0.000185, + "loss": 1.0733, + "step": 740 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875, + "loss": 1.0547, + "step": 750 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019, + "loss": 1.0681, + "step": 760 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019250000000000002, + "loss": 1.0683, + "step": 770 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019500000000000002, + "loss": 1.0525, + "step": 780 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001975, + "loss": 1.0493, + "step": 790 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002, + "loss": 1.0861, + "step": 800 + }, + { + "epoch": 0.71, + "eval_bp": 1.0, + "eval_counts": [ + 26335, + 17460, + 12480, + 9234 + ], + "eval_loss": 0.5133824944496155, + "eval_precisions": [ + 50.52956751985878, + 35.885314972767446, + 27.611841231912916, + 22.09989708733217 + ], + "eval_ref_len": 41379, + "eval_runtime": 535.368, + "eval_samples_per_second": 6.468, + "eval_score": 32.43299018324529, + "eval_steps_per_second": 6.468, + "eval_sys_len": 52118, + "eval_totals": [ + 52118, + 48655, + 45198, + 41783 + ], + "step": 800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00020250000000000002, + "loss": 1.0747, + "step": 810 + }, + { + "epoch": 0.73, + "learning_rate": 0.000205, + "loss": 1.0245, + "step": 820 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002075, + "loss": 1.086, + "step": 830 + }, + { + "epoch": 0.75, + "learning_rate": 0.00021, + "loss": 1.0777, + "step": 840 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002125, + "loss": 1.0618, + "step": 850 + }, + { + "epoch": 0.77, + "learning_rate": 0.000215, + "loss": 1.0328, + "step": 860 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002175, + "loss": 1.0533, + "step": 870 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022, + "loss": 1.0574, + "step": 880 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022250000000000001, + "loss": 1.0408, + "step": 890 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022500000000000002, + "loss": 1.0357, + "step": 900 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002275, + "loss": 1.0146, + "step": 910 + }, + { + "epoch": 0.82, + "learning_rate": 0.00023, + "loss": 0.9986, + "step": 920 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002325, + "loss": 1.0326, + "step": 930 + }, + { + "epoch": 0.84, + "learning_rate": 0.000235, + "loss": 1.0253, + "step": 940 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002375, + "loss": 0.9881, + "step": 950 + }, + { + "epoch": 0.86, + "learning_rate": 0.00024, + "loss": 1.0384, + "step": 960 + }, + { + "epoch": 0.86, + "learning_rate": 0.00024249999999999999, + "loss": 1.0008, + "step": 970 + }, + { + "epoch": 0.87, + "learning_rate": 0.000245, + "loss": 0.9963, + "step": 980 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002475, + "loss": 0.9914, + "step": 990 + }, + { + "epoch": 0.89, + "learning_rate": 0.00025, + "loss": 1.0244, + "step": 1000 + }, + { + "epoch": 0.89, + "eval_bp": 1.0, + "eval_counts": [ + 26618, + 17909, + 12938, + 9646 + ], + "eval_loss": 0.4954930245876312, + "eval_precisions": [ + 51.9588514318062, + 37.49319599715279, + 29.198167497912483, + 23.586658841940533 + ], + "eval_ref_len": 41379, + "eval_runtime": 549.0088, + "eval_samples_per_second": 6.308, + "eval_score": 34.03364828263135, + "eval_steps_per_second": 6.308, + "eval_sys_len": 51229, + "eval_totals": [ + 51229, + 47766, + 44311, + 40896 + ], + "step": 1000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002525, + "loss": 1.0132, + "step": 1010 + }, + { + "epoch": 0.91, + "learning_rate": 0.000255, + "loss": 0.9777, + "step": 1020 + }, + { + "epoch": 0.92, + "learning_rate": 0.0002575, + "loss": 0.9896, + "step": 1030 + }, + { + "epoch": 0.93, + "learning_rate": 0.00026000000000000003, + "loss": 0.9835, + "step": 1040 + }, + { + "epoch": 0.94, + "learning_rate": 0.00026250000000000004, + "loss": 0.9828, + "step": 1050 + }, + { + "epoch": 0.94, + "learning_rate": 0.00026500000000000004, + "loss": 0.9894, + "step": 1060 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002675, + "loss": 0.9767, + "step": 1070 + }, + { + "epoch": 0.96, + "learning_rate": 0.00027, + "loss": 0.9637, + "step": 1080 + }, + { + "epoch": 0.97, + "learning_rate": 0.0002725, + "loss": 0.9551, + "step": 1090 + }, + { + "epoch": 0.98, + "learning_rate": 0.000275, + "loss": 0.9858, + "step": 1100 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002775, + "loss": 1.015, + "step": 1110 + }, + { + "epoch": 1.0, + "learning_rate": 0.00028000000000000003, + "loss": 0.9669, + "step": 1120 + }, + { + "epoch": 1.01, + "learning_rate": 0.0002825, + "loss": 0.954, + "step": 1130 + }, + { + "epoch": 1.02, + "learning_rate": 0.000285, + "loss": 0.9628, + "step": 1140 + }, + { + "epoch": 1.02, + "learning_rate": 0.0002875, + "loss": 0.9235, + "step": 1150 + }, + { + "epoch": 1.03, + "learning_rate": 0.00029, + "loss": 0.9764, + "step": 1160 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002925, + "loss": 0.9347, + "step": 1170 + }, + { + "epoch": 1.05, + "learning_rate": 0.000295, + "loss": 0.9473, + "step": 1180 + }, + { + "epoch": 1.06, + "learning_rate": 0.00029749999999999997, + "loss": 0.9664, + "step": 1190 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003, + "loss": 0.9572, + "step": 1200 + }, + { + "epoch": 1.07, + "eval_bp": 1.0, + "eval_counts": [ + 27385, + 18748, + 13686, + 10276 + ], + "eval_loss": 0.47938892245292664, + "eval_precisions": [ + 60.447201130143036, + 44.80772448077245, + 35.65362371698015, + 29.386027624467385 + ], + "eval_ref_len": 41379, + "eval_runtime": 508.8009, + "eval_samples_per_second": 6.806, + "eval_score": 41.04340766403553, + "eval_steps_per_second": 6.806, + "eval_sys_len": 45304, + "eval_totals": [ + 45304, + 41841, + 38386, + 34969 + ], + "step": 1200 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003025, + "loss": 0.9521, + "step": 1210 + }, + { + "epoch": 1.09, + "learning_rate": 0.000305, + "loss": 0.9554, + "step": 1220 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003075, + "loss": 0.934, + "step": 1230 + }, + { + "epoch": 1.11, + "learning_rate": 0.00031, + "loss": 0.9731, + "step": 1240 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003125, + "loss": 0.9477, + "step": 1250 + }, + { + "epoch": 1.12, + "learning_rate": 0.000315, + "loss": 0.9497, + "step": 1260 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003175, + "loss": 0.8888, + "step": 1270 + }, + { + "epoch": 1.14, + "learning_rate": 0.00032, + "loss": 0.9484, + "step": 1280 + }, + { + "epoch": 1.15, + "learning_rate": 0.00032250000000000003, + "loss": 0.9342, + "step": 1290 + }, + { + "epoch": 1.16, + "learning_rate": 0.00032500000000000004, + "loss": 0.8897, + "step": 1300 + }, + { + "epoch": 1.17, + "learning_rate": 0.00032750000000000005, + "loss": 0.9814, + "step": 1310 + }, + { + "epoch": 1.18, + "learning_rate": 0.00033, + "loss": 0.9506, + "step": 1320 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003325, + "loss": 0.9298, + "step": 1330 + }, + { + "epoch": 1.19, + "learning_rate": 0.000335, + "loss": 0.938, + "step": 1340 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003375, + "loss": 0.951, + "step": 1350 + }, + { + "epoch": 1.21, + "learning_rate": 0.00034, + "loss": 0.9584, + "step": 1360 + }, + { + "epoch": 1.22, + "learning_rate": 0.00034250000000000003, + "loss": 0.9679, + "step": 1370 + }, + { + "epoch": 1.23, + "learning_rate": 0.000345, + "loss": 0.9592, + "step": 1380 + }, + { + "epoch": 1.24, + "learning_rate": 0.0003475, + "loss": 0.9307, + "step": 1390 + }, + { + "epoch": 1.25, + "learning_rate": 0.00035, + "loss": 0.9061, + "step": 1400 + }, + { + "epoch": 1.25, + "eval_bp": 1.0, + "eval_counts": [ + 27662, + 18944, + 13768, + 10241 + ], + "eval_loss": 0.48935189843177795, + "eval_precisions": [ + 65.6882999691292, + 49.01676671496585, + 39.120304597374556, + 32.218586799219786 + ], + "eval_ref_len": 41379, + "eval_runtime": 411.2631, + "eval_samples_per_second": 8.42, + "eval_score": 44.88335879592958, + "eval_steps_per_second": 8.42, + "eval_sys_len": 42111, + "eval_totals": [ + 42111, + 38648, + 35194, + 31786 + ], + "step": 1400 + }, + { + "epoch": 1.26, + "learning_rate": 0.0003525, + "loss": 0.9391, + "step": 1410 + }, + { + "epoch": 1.27, + "learning_rate": 0.000355, + "loss": 0.9575, + "step": 1420 + }, + { + "epoch": 1.27, + "learning_rate": 0.0003575, + "loss": 0.9054, + "step": 1430 + }, + { + "epoch": 1.28, + "learning_rate": 0.00035999999999999997, + "loss": 0.9524, + "step": 1440 + }, + { + "epoch": 1.29, + "learning_rate": 0.0003625, + "loss": 0.9298, + "step": 1450 + }, + { + "epoch": 1.3, + "learning_rate": 0.000365, + "loss": 0.9706, + "step": 1460 + }, + { + "epoch": 1.31, + "learning_rate": 0.0003675, + "loss": 0.955, + "step": 1470 + }, + { + "epoch": 1.32, + "learning_rate": 0.00037, + "loss": 0.9234, + "step": 1480 + }, + { + "epoch": 1.33, + "learning_rate": 0.0003725, + "loss": 0.9129, + "step": 1490 + }, + { + "epoch": 1.34, + "learning_rate": 0.000375, + "loss": 0.9137, + "step": 1500 + }, + { + "epoch": 1.35, + "learning_rate": 0.0003775, + "loss": 0.9047, + "step": 1510 + }, + { + "epoch": 1.35, + "learning_rate": 0.00038, + "loss": 0.9261, + "step": 1520 + }, + { + "epoch": 1.36, + "learning_rate": 0.00038250000000000003, + "loss": 0.876, + "step": 1530 + }, + { + "epoch": 1.37, + "learning_rate": 0.00038500000000000003, + "loss": 0.9451, + "step": 1540 + }, + { + "epoch": 1.38, + "learning_rate": 0.00038750000000000004, + "loss": 0.9117, + "step": 1550 + }, + { + "epoch": 1.39, + "learning_rate": 0.00039000000000000005, + "loss": 0.9202, + "step": 1560 + }, + { + "epoch": 1.4, + "learning_rate": 0.0003925, + "loss": 0.9203, + "step": 1570 + }, + { + "epoch": 1.41, + "learning_rate": 0.000395, + "loss": 0.9688, + "step": 1580 + }, + { + "epoch": 1.42, + "learning_rate": 0.0003975, + "loss": 0.9265, + "step": 1590 + }, + { + "epoch": 1.43, + "learning_rate": 0.0004, + "loss": 0.9381, + "step": 1600 + }, + { + "epoch": 1.43, + "eval_bp": 1.0, + "eval_counts": [ + 27320, + 18959, + 14016, + 10644 + ], + "eval_loss": 0.4612188935279846, + "eval_precisions": [ + 58.260294713496684, + 43.654156113285744, + 35.06191369606004, + 29.116174740815712 + ], + "eval_ref_len": 41379, + "eval_runtime": 502.6767, + "eval_samples_per_second": 6.889, + "eval_score": 40.14135718782399, + "eval_steps_per_second": 6.889, + "eval_sys_len": 46893, + "eval_totals": [ + 46893, + 43430, + 39975, + 36557 + ], + "step": 1600 + }, + { + "epoch": 1.43, + "learning_rate": 0.0004025, + "loss": 0.9329, + "step": 1610 + }, + { + "epoch": 1.44, + "learning_rate": 0.00040500000000000003, + "loss": 0.9324, + "step": 1620 + }, + { + "epoch": 1.45, + "learning_rate": 0.0004075, + "loss": 0.927, + "step": 1630 + }, + { + "epoch": 1.46, + "learning_rate": 0.00041, + "loss": 0.9037, + "step": 1640 + }, + { + "epoch": 1.47, + "learning_rate": 0.0004125, + "loss": 0.8935, + "step": 1650 + }, + { + "epoch": 1.48, + "learning_rate": 0.000415, + "loss": 0.9072, + "step": 1660 + }, + { + "epoch": 1.49, + "learning_rate": 0.0004175, + "loss": 0.8702, + "step": 1670 + }, + { + "epoch": 1.5, + "learning_rate": 0.00042, + "loss": 0.9744, + "step": 1680 + }, + { + "epoch": 1.51, + "learning_rate": 0.00042249999999999997, + "loss": 0.8935, + "step": 1690 + }, + { + "epoch": 1.52, + "learning_rate": 0.000425, + "loss": 0.9277, + "step": 1700 + }, + { + "epoch": 1.52, + "learning_rate": 0.0004275, + "loss": 0.8846, + "step": 1710 + }, + { + "epoch": 1.53, + "learning_rate": 0.00043, + "loss": 0.8766, + "step": 1720 + }, + { + "epoch": 1.54, + "learning_rate": 0.0004325, + "loss": 0.8836, + "step": 1730 + }, + { + "epoch": 1.55, + "learning_rate": 0.000435, + "loss": 0.921, + "step": 1740 + }, + { + "epoch": 1.56, + "learning_rate": 0.0004375, + "loss": 0.9319, + "step": 1750 + }, + { + "epoch": 1.57, + "learning_rate": 0.00044, + "loss": 0.9127, + "step": 1760 + }, + { + "epoch": 1.58, + "learning_rate": 0.0004425, + "loss": 0.9045, + "step": 1770 + }, + { + "epoch": 1.59, + "learning_rate": 0.00044500000000000003, + "loss": 0.918, + "step": 1780 + }, + { + "epoch": 1.6, + "learning_rate": 0.00044750000000000004, + "loss": 0.9119, + "step": 1790 + }, + { + "epoch": 1.6, + "learning_rate": 0.00045000000000000004, + "loss": 0.8815, + "step": 1800 + }, + { + "epoch": 1.6, + "eval_bp": 1.0, + "eval_counts": [ + 28644, + 20158, + 15010, + 11444 + ], + "eval_loss": 0.45668816566467285, + "eval_precisions": [ + 68.8309503784693, + 52.8360243237576, + 43.25897746267796, + 36.58099987213911 + ], + "eval_ref_len": 41379, + "eval_runtime": 377.0997, + "eval_samples_per_second": 9.183, + "eval_score": 48.97917095230454, + "eval_steps_per_second": 9.183, + "eval_sys_len": 41615, + "eval_totals": [ + 41615, + 38152, + 34698, + 31284 + ], + "step": 1800 + }, + { + "epoch": 1.61, + "learning_rate": 0.00045250000000000005, + "loss": 0.8805, + "step": 1810 + }, + { + "epoch": 1.62, + "learning_rate": 0.000455, + "loss": 0.8728, + "step": 1820 + }, + { + "epoch": 1.63, + "learning_rate": 0.0004575, + "loss": 0.885, + "step": 1830 + }, + { + "epoch": 1.64, + "learning_rate": 0.00046, + "loss": 0.9132, + "step": 1840 + }, + { + "epoch": 1.65, + "learning_rate": 0.0004625, + "loss": 0.8883, + "step": 1850 + }, + { + "epoch": 1.66, + "learning_rate": 0.000465, + "loss": 0.8669, + "step": 1860 + }, + { + "epoch": 1.67, + "learning_rate": 0.00046750000000000003, + "loss": 0.9288, + "step": 1870 + }, + { + "epoch": 1.68, + "learning_rate": 0.00047, + "loss": 0.8605, + "step": 1880 + }, + { + "epoch": 1.68, + "learning_rate": 0.0004725, + "loss": 0.8763, + "step": 1890 + }, + { + "epoch": 1.69, + "learning_rate": 0.000475, + "loss": 0.8821, + "step": 1900 + }, + { + "epoch": 1.7, + "learning_rate": 0.0004775, + "loss": 0.8632, + "step": 1910 + }, + { + "epoch": 1.71, + "learning_rate": 0.00048, + "loss": 0.8763, + "step": 1920 + }, + { + "epoch": 1.72, + "learning_rate": 0.0004825, + "loss": 0.8936, + "step": 1930 + }, + { + "epoch": 1.73, + "learning_rate": 0.00048499999999999997, + "loss": 0.9197, + "step": 1940 + }, + { + "epoch": 1.74, + "learning_rate": 0.0004875, + "loss": 0.8868, + "step": 1950 + }, + { + "epoch": 1.75, + "learning_rate": 0.00049, + "loss": 0.8543, + "step": 1960 + }, + { + "epoch": 1.76, + "learning_rate": 0.0004925, + "loss": 0.8487, + "step": 1970 + }, + { + "epoch": 1.76, + "learning_rate": 0.000495, + "loss": 0.8654, + "step": 1980 + }, + { + "epoch": 1.77, + "learning_rate": 0.0004975, + "loss": 0.878, + "step": 1990 + }, + { + "epoch": 1.78, + "learning_rate": 0.0005, + "loss": 0.8889, + "step": 2000 + }, + { + "epoch": 1.78, + "eval_bp": 1.0, + "eval_counts": [ + 28627, + 20189, + 15089, + 11523 + ], + "eval_loss": 0.45155465602874756, + "eval_precisions": [ + 68.39401758409785, + 52.58510666006824, + 43.185460789925585, + 36.551942902458364 + ], + "eval_ref_len": 41379, + "eval_runtime": 367.2279, + "eval_samples_per_second": 9.43, + "eval_score": 48.812631909516725, + "eval_steps_per_second": 9.43, + "eval_sys_len": 41856, + "eval_totals": [ + 41856, + 38393, + 34940, + 31525 + ], + "step": 2000 + }, + { + "epoch": 1.79, + "learning_rate": 0.0005024999999999999, + "loss": 0.8704, + "step": 2010 + }, + { + "epoch": 1.8, + "learning_rate": 0.000505, + "loss": 0.8952, + "step": 2020 + }, + { + "epoch": 1.81, + "learning_rate": 0.0005074999999999999, + "loss": 0.8621, + "step": 2030 + }, + { + "epoch": 1.82, + "learning_rate": 0.00051, + "loss": 0.8965, + "step": 2040 + }, + { + "epoch": 1.83, + "learning_rate": 0.0005124999999999999, + "loss": 0.8919, + "step": 2050 + }, + { + "epoch": 1.84, + "learning_rate": 0.000515, + "loss": 0.888, + "step": 2060 + }, + { + "epoch": 1.84, + "learning_rate": 0.0005175, + "loss": 0.8876, + "step": 2070 + }, + { + "epoch": 1.85, + "learning_rate": 0.0005200000000000001, + "loss": 0.8368, + "step": 2080 + }, + { + "epoch": 1.86, + "learning_rate": 0.0005225, + "loss": 0.8681, + "step": 2090 + }, + { + "epoch": 1.87, + "learning_rate": 0.0005250000000000001, + "loss": 0.9107, + "step": 2100 + }, + { + "epoch": 1.88, + "learning_rate": 0.0005275, + "loss": 0.9227, + "step": 2110 + }, + { + "epoch": 1.89, + "learning_rate": 0.0005300000000000001, + "loss": 0.9045, + "step": 2120 + }, + { + "epoch": 1.9, + "learning_rate": 0.0005325, + "loss": 0.8744, + "step": 2130 + }, + { + "epoch": 1.91, + "learning_rate": 0.000535, + "loss": 0.9088, + "step": 2140 + }, + { + "epoch": 1.92, + "learning_rate": 0.0005375, + "loss": 0.8861, + "step": 2150 + }, + { + "epoch": 1.93, + "learning_rate": 0.00054, + "loss": 0.8542, + "step": 2160 + }, + { + "epoch": 1.93, + "learning_rate": 0.0005425, + "loss": 0.915, + "step": 2170 + }, + { + "epoch": 1.94, + "learning_rate": 0.000545, + "loss": 0.8839, + "step": 2180 + }, + { + "epoch": 1.95, + "learning_rate": 0.0005475, + "loss": 0.8698, + "step": 2190 + }, + { + "epoch": 1.96, + "learning_rate": 0.00055, + "loss": 0.8598, + "step": 2200 + }, + { + "epoch": 1.96, + "eval_bp": 1.0, + "eval_counts": [ + 27573, + 19348, + 14395, + 10963 + ], + "eval_loss": 0.4470996558666229, + "eval_precisions": [ + 55.13717805151176, + 41.56837469115909, + 33.40449725013343, + 27.63061723416589 + ], + "eval_ref_len": 41379, + "eval_runtime": 526.7436, + "eval_samples_per_second": 6.574, + "eval_score": 38.137373171080895, + "eval_steps_per_second": 6.574, + "eval_sys_len": 50008, + "eval_totals": [ + 50008, + 46545, + 43093, + 39677 + ], + "step": 2200 + }, + { + "epoch": 1.97, + "learning_rate": 0.0005525, + "loss": 0.8728, + "step": 2210 + }, + { + "epoch": 1.98, + "learning_rate": 0.000555, + "loss": 0.9058, + "step": 2220 + }, + { + "epoch": 1.99, + "learning_rate": 0.0005575, + "loss": 0.8776, + "step": 2230 + }, + { + "epoch": 2.0, + "learning_rate": 0.0005600000000000001, + "loss": 0.8669, + "step": 2240 + }, + { + "epoch": 2.01, + "learning_rate": 0.0005625000000000001, + "loss": 0.8562, + "step": 2250 + }, + { + "epoch": 2.01, + "learning_rate": 0.000565, + "loss": 0.7894, + "step": 2260 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005675, + "loss": 0.832, + "step": 2270 + }, + { + "epoch": 2.03, + "learning_rate": 0.00057, + "loss": 0.8437, + "step": 2280 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005725, + "loss": 0.8788, + "step": 2290 + }, + { + "epoch": 2.05, + "learning_rate": 0.000575, + "loss": 0.8238, + "step": 2300 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005775, + "loss": 0.8248, + "step": 2310 + }, + { + "epoch": 2.07, + "learning_rate": 0.00058, + "loss": 0.8653, + "step": 2320 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005825, + "loss": 0.8257, + "step": 2330 + }, + { + "epoch": 2.09, + "learning_rate": 0.000585, + "loss": 0.8608, + "step": 2340 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005875, + "loss": 0.8483, + "step": 2350 + }, + { + "epoch": 2.1, + "learning_rate": 0.00059, + "loss": 0.8503, + "step": 2360 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005925, + "loss": 0.8306, + "step": 2370 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005949999999999999, + "loss": 0.8413, + "step": 2380 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005975, + "loss": 0.8448, + "step": 2390 + }, + { + "epoch": 2.14, + "learning_rate": 0.0006, + "loss": 0.8693, + "step": 2400 + }, + { + "epoch": 2.14, + "eval_bp": 1.0, + "eval_counts": [ + 27425, + 19232, + 14323, + 10947 + ], + "eval_loss": 0.44469693303108215, + "eval_precisions": [ + 52.27294386734013, + 39.24737765805477, + 31.448708940804497, + 25.99002849002849 + ], + "eval_ref_len": 41379, + "eval_runtime": 633.5059, + "eval_samples_per_second": 5.466, + "eval_score": 35.98523101021763, + "eval_steps_per_second": 5.466, + "eval_sys_len": 52465, + "eval_totals": [ + 52465, + 49002, + 45544, + 42120 + ], + "step": 2400 + }, + { + "epoch": 2.15, + "learning_rate": 0.0006025000000000001, + "loss": 0.8079, + "step": 2410 + }, + { + "epoch": 2.16, + "learning_rate": 0.000605, + "loss": 0.8713, + "step": 2420 + }, + { + "epoch": 2.17, + "learning_rate": 0.0006075000000000001, + "loss": 0.8728, + "step": 2430 + }, + { + "epoch": 2.17, + "learning_rate": 0.00061, + "loss": 0.8336, + "step": 2440 + }, + { + "epoch": 2.18, + "learning_rate": 0.0006125000000000001, + "loss": 0.8426, + "step": 2450 + }, + { + "epoch": 2.19, + "learning_rate": 0.000615, + "loss": 0.8462, + "step": 2460 + }, + { + "epoch": 2.2, + "learning_rate": 0.0006175000000000001, + "loss": 0.8412, + "step": 2470 + }, + { + "epoch": 2.21, + "learning_rate": 0.00062, + "loss": 0.8114, + "step": 2480 + }, + { + "epoch": 2.22, + "learning_rate": 0.0006225000000000001, + "loss": 0.826, + "step": 2490 + }, + { + "epoch": 2.23, + "learning_rate": 0.000625, + "loss": 0.8248, + "step": 2500 + }, + { + "epoch": 2.24, + "learning_rate": 0.0006274999999999999, + "loss": 0.8524, + "step": 2510 + }, + { + "epoch": 2.25, + "learning_rate": 0.00063, + "loss": 0.8695, + "step": 2520 + }, + { + "epoch": 2.25, + "learning_rate": 0.0006324999999999999, + "loss": 0.826, + "step": 2530 + }, + { + "epoch": 2.26, + "learning_rate": 0.000635, + "loss": 0.8171, + "step": 2540 + }, + { + "epoch": 2.27, + "learning_rate": 0.0006374999999999999, + "loss": 0.8296, + "step": 2550 + }, + { + "epoch": 2.28, + "learning_rate": 0.00064, + "loss": 0.8438, + "step": 2560 + }, + { + "epoch": 2.29, + "learning_rate": 0.0006425, + "loss": 0.8327, + "step": 2570 + }, + { + "epoch": 2.3, + "learning_rate": 0.0006450000000000001, + "loss": 0.8357, + "step": 2580 + }, + { + "epoch": 2.31, + "learning_rate": 0.0006475, + "loss": 0.8652, + "step": 2590 + }, + { + "epoch": 2.32, + "learning_rate": 0.0006500000000000001, + "loss": 0.8196, + "step": 2600 + }, + { + "epoch": 2.32, + "eval_bp": 1.0, + "eval_counts": [ + 28513, + 20270, + 15211, + 11701 + ], + "eval_loss": 0.43805596232414246, + "eval_precisions": [ + 66.24459829933554, + 51.21402764092069, + 42.11356903568759, + 35.78834684202477 + ], + "eval_ref_len": 41379, + "eval_runtime": 409.6535, + "eval_samples_per_second": 8.453, + "eval_score": 47.55276474687039, + "eval_steps_per_second": 8.453, + "eval_sys_len": 43042, + "eval_totals": [ + 43042, + 39579, + 36119, + 32695 + ], + "step": 2600 + }, + { + "epoch": 2.33, + "learning_rate": 0.0006525, + "loss": 0.8485, + "step": 2610 + }, + { + "epoch": 2.34, + "learning_rate": 0.0006550000000000001, + "loss": 0.8067, + "step": 2620 + }, + { + "epoch": 2.34, + "learning_rate": 0.0006575, + "loss": 0.8607, + "step": 2630 + }, + { + "epoch": 2.35, + "learning_rate": 0.00066, + "loss": 0.8362, + "step": 2640 + }, + { + "epoch": 2.36, + "learning_rate": 0.0006625, + "loss": 0.8426, + "step": 2650 + }, + { + "epoch": 2.37, + "learning_rate": 0.000665, + "loss": 0.8327, + "step": 2660 + }, + { + "epoch": 2.38, + "learning_rate": 0.0006675, + "loss": 0.8404, + "step": 2670 + }, + { + "epoch": 2.39, + "learning_rate": 0.00067, + "loss": 0.8353, + "step": 2680 + }, + { + "epoch": 2.4, + "learning_rate": 0.0006725, + "loss": 0.8576, + "step": 2690 + }, + { + "epoch": 2.41, + "learning_rate": 0.000675, + "loss": 0.8357, + "step": 2700 + }, + { + "epoch": 2.42, + "learning_rate": 0.0006775, + "loss": 0.8328, + "step": 2710 + }, + { + "epoch": 2.42, + "learning_rate": 0.00068, + "loss": 0.8499, + "step": 2720 + }, + { + "epoch": 2.43, + "learning_rate": 0.0006825000000000001, + "loss": 0.8171, + "step": 2730 + }, + { + "epoch": 2.44, + "learning_rate": 0.0006850000000000001, + "loss": 0.8372, + "step": 2740 + }, + { + "epoch": 2.45, + "learning_rate": 0.0006875, + "loss": 0.8542, + "step": 2750 + }, + { + "epoch": 2.46, + "learning_rate": 0.00069, + "loss": 0.8209, + "step": 2760 + }, + { + "epoch": 2.47, + "learning_rate": 0.0006925, + "loss": 0.8048, + "step": 2770 + }, + { + "epoch": 2.48, + "learning_rate": 0.000695, + "loss": 0.8162, + "step": 2780 + }, + { + "epoch": 2.49, + "learning_rate": 0.0006975, + "loss": 0.8293, + "step": 2790 + }, + { + "epoch": 2.5, + "learning_rate": 0.0007, + "loss": 0.8423, + "step": 2800 + }, + { + "epoch": 2.5, + "eval_bp": 1.0, + "eval_counts": [ + 28571, + 20383, + 15384, + 11858 + ], + "eval_loss": 0.43379348516464233, + "eval_precisions": [ + 65.67744011769574, + 50.907864831789006, + 42.046572646769434, + 35.745937961595274 + ], + "eval_ref_len": 41379, + "eval_runtime": 414.063, + "eval_samples_per_second": 8.363, + "eval_score": 47.346687432735244, + "eval_steps_per_second": 8.363, + "eval_sys_len": 43502, + "eval_totals": [ + 43502, + 40039, + 36588, + 33173 + ], + "step": 2800 + }, + { + "epoch": 2.5, + "learning_rate": 0.0007025, + "loss": 0.8276, + "step": 2810 + }, + { + "epoch": 2.51, + "learning_rate": 0.000705, + "loss": 0.8094, + "step": 2820 + }, + { + "epoch": 2.52, + "learning_rate": 0.0007075, + "loss": 0.8435, + "step": 2830 + }, + { + "epoch": 2.53, + "learning_rate": 0.00071, + "loss": 0.8609, + "step": 2840 + }, + { + "epoch": 2.54, + "learning_rate": 0.0007125, + "loss": 0.8234, + "step": 2850 + }, + { + "epoch": 2.55, + "learning_rate": 0.000715, + "loss": 0.8359, + "step": 2860 + }, + { + "epoch": 2.56, + "learning_rate": 0.0007175, + "loss": 0.844, + "step": 2870 + }, + { + "epoch": 2.57, + "learning_rate": 0.0007199999999999999, + "loss": 0.8147, + "step": 2880 + }, + { + "epoch": 2.58, + "learning_rate": 0.0007225, + "loss": 0.8016, + "step": 2890 + }, + { + "epoch": 2.58, + "learning_rate": 0.000725, + "loss": 0.8507, + "step": 2900 + }, + { + "epoch": 2.59, + "learning_rate": 0.0007275000000000001, + "loss": 0.8298, + "step": 2910 + }, + { + "epoch": 2.6, + "learning_rate": 0.00073, + "loss": 0.8564, + "step": 2920 + }, + { + "epoch": 2.61, + "learning_rate": 0.0007325000000000001, + "loss": 0.79, + "step": 2930 + }, + { + "epoch": 2.62, + "learning_rate": 0.000735, + "loss": 0.8021, + "step": 2940 + }, + { + "epoch": 2.63, + "learning_rate": 0.0007375000000000001, + "loss": 0.8208, + "step": 2950 + }, + { + "epoch": 2.64, + "learning_rate": 0.00074, + "loss": 0.8673, + "step": 2960 + }, + { + "epoch": 2.65, + "learning_rate": 0.0007425000000000001, + "loss": 0.8469, + "step": 2970 + }, + { + "epoch": 2.66, + "learning_rate": 0.000745, + "loss": 0.8596, + "step": 2980 + }, + { + "epoch": 2.66, + "learning_rate": 0.0007475000000000001, + "loss": 0.8321, + "step": 2990 + }, + { + "epoch": 2.67, + "learning_rate": 0.00075, + "loss": 0.7965, + "step": 3000 + }, + { + "epoch": 2.67, + "eval_bp": 1.0, + "eval_counts": [ + 27367, + 19330, + 14468, + 11113 + ], + "eval_loss": 0.433325856924057, + "eval_precisions": [ + 47.69681231155341, + 35.853396149423155, + 28.673352094811527, + 23.624574829931973 + ], + "eval_ref_len": 41379, + "eval_runtime": 723.5054, + "eval_samples_per_second": 4.786, + "eval_score": 32.80692539007455, + "eval_steps_per_second": 4.786, + "eval_sys_len": 57377, + "eval_totals": [ + 57377, + 53914, + 50458, + 47040 + ], + "step": 3000 + }, + { + "epoch": 2.68, + "learning_rate": 0.0007524999999999999, + "loss": 0.8643, + "step": 3010 + }, + { + "epoch": 2.69, + "learning_rate": 0.000755, + "loss": 0.8288, + "step": 3020 + }, + { + "epoch": 2.7, + "learning_rate": 0.0007574999999999999, + "loss": 0.8361, + "step": 3030 + }, + { + "epoch": 2.71, + "learning_rate": 0.00076, + "loss": 0.8157, + "step": 3040 + }, + { + "epoch": 2.72, + "learning_rate": 0.0007624999999999999, + "loss": 0.8668, + "step": 3050 + }, + { + "epoch": 2.73, + "learning_rate": 0.0007650000000000001, + "loss": 0.8379, + "step": 3060 + }, + { + "epoch": 2.74, + "learning_rate": 0.0007675, + "loss": 0.8419, + "step": 3070 + }, + { + "epoch": 2.75, + "learning_rate": 0.0007700000000000001, + "loss": 0.8147, + "step": 3080 + }, + { + "epoch": 2.75, + "learning_rate": 0.0007725, + "loss": 0.8318, + "step": 3090 + }, + { + "epoch": 2.76, + "learning_rate": 0.0007750000000000001, + "loss": 0.7908, + "step": 3100 + }, + { + "epoch": 2.77, + "learning_rate": 0.0007775, + "loss": 0.8331, + "step": 3110 + }, + { + "epoch": 2.78, + "learning_rate": 0.0007800000000000001, + "loss": 0.8459, + "step": 3120 + }, + { + "epoch": 2.79, + "learning_rate": 0.0007825, + "loss": 0.8177, + "step": 3130 + }, + { + "epoch": 2.8, + "learning_rate": 0.000785, + "loss": 0.8354, + "step": 3140 + }, + { + "epoch": 2.81, + "learning_rate": 0.0007875, + "loss": 0.8164, + "step": 3150 + }, + { + "epoch": 2.82, + "learning_rate": 0.00079, + "loss": 0.8422, + "step": 3160 + }, + { + "epoch": 2.83, + "learning_rate": 0.0007925, + "loss": 0.783, + "step": 3170 + }, + { + "epoch": 2.83, + "learning_rate": 0.000795, + "loss": 0.8223, + "step": 3180 + }, + { + "epoch": 2.84, + "learning_rate": 0.0007975, + "loss": 0.8012, + "step": 3190 + }, + { + "epoch": 2.85, + "learning_rate": 0.0008, + "loss": 0.8108, + "step": 3200 + }, + { + "epoch": 2.85, + "eval_bp": 1.0, + "eval_counts": [ + 28853, + 20613, + 15542, + 11972 + ], + "eval_loss": 0.4307364523410797, + "eval_precisions": [ + 68.28465944052634, + 53.138614627104225, + 43.98347294543808, + 37.508615828059405 + ], + "eval_ref_len": 41379, + "eval_runtime": 423.5558, + "eval_samples_per_second": 8.176, + "eval_score": 49.463914513865696, + "eval_steps_per_second": 8.176, + "eval_sys_len": 42254, + "eval_totals": [ + 42254, + 38791, + 35336, + 31918 + ], + "step": 3200 + }, + { + "epoch": 2.86, + "learning_rate": 0.0008025, + "loss": 0.8126, + "step": 3210 + }, + { + "epoch": 2.87, + "learning_rate": 0.000805, + "loss": 0.8282, + "step": 3220 + }, + { + "epoch": 2.88, + "learning_rate": 0.0008075000000000001, + "loss": 0.8188, + "step": 3230 + }, + { + "epoch": 2.89, + "learning_rate": 0.0008100000000000001, + "loss": 0.7631, + "step": 3240 + }, + { + "epoch": 2.9, + "learning_rate": 0.0008125000000000001, + "loss": 0.8016, + "step": 3250 + }, + { + "epoch": 2.91, + "learning_rate": 0.000815, + "loss": 0.8385, + "step": 3260 + }, + { + "epoch": 2.91, + "learning_rate": 0.0008175, + "loss": 0.8138, + "step": 3270 + }, + { + "epoch": 2.92, + "learning_rate": 0.00082, + "loss": 0.7849, + "step": 3280 + }, + { + "epoch": 2.93, + "learning_rate": 0.0008225, + "loss": 0.8189, + "step": 3290 + }, + { + "epoch": 2.94, + "learning_rate": 0.000825, + "loss": 0.8385, + "step": 3300 + }, + { + "epoch": 2.95, + "learning_rate": 0.0008275, + "loss": 0.7924, + "step": 3310 + }, + { + "epoch": 2.96, + "learning_rate": 0.00083, + "loss": 0.8227, + "step": 3320 + }, + { + "epoch": 2.97, + "learning_rate": 0.0008325, + "loss": 0.8135, + "step": 3330 + }, + { + "epoch": 2.98, + "learning_rate": 0.000835, + "loss": 0.7983, + "step": 3340 + }, + { + "epoch": 2.99, + "learning_rate": 0.0008375, + "loss": 0.8417, + "step": 3350 + }, + { + "epoch": 2.99, + "learning_rate": 0.00084, + "loss": 0.8202, + "step": 3360 + }, + { + "epoch": 3.0, + "learning_rate": 0.0008425, + "loss": 0.8205, + "step": 3370 + }, + { + "epoch": 3.01, + "learning_rate": 0.0008449999999999999, + "loss": 0.7563, + "step": 3380 + }, + { + "epoch": 3.02, + "learning_rate": 0.0008475000000000001, + "loss": 0.8087, + "step": 3390 + }, + { + "epoch": 3.03, + "learning_rate": 0.00085, + "loss": 0.7227, + "step": 3400 + }, + { + "epoch": 3.03, + "eval_bp": 1.0, + "eval_counts": [ + 28393, + 20333, + 15377, + 11915 + ], + "eval_loss": 0.4285724461078644, + "eval_precisions": [ + 66.42103539429668, + 51.75898584665513, + 42.91774819280471, + 36.76447900274615 + ], + "eval_ref_len": 41379, + "eval_runtime": 379.2685, + "eval_samples_per_second": 9.131, + "eval_score": 48.260207590301434, + "eval_steps_per_second": 9.131, + "eval_sys_len": 42747, + "eval_totals": [ + 42747, + 39284, + 35829, + 32409 + ], + "step": 3400 + }, + { + "epoch": 3.04, + "learning_rate": 0.0008525000000000001, + "loss": 0.7961, + "step": 3410 + }, + { + "epoch": 3.05, + "learning_rate": 0.000855, + "loss": 0.8006, + "step": 3420 + }, + { + "epoch": 3.06, + "learning_rate": 0.0008575000000000001, + "loss": 0.7477, + "step": 3430 + }, + { + "epoch": 3.07, + "learning_rate": 0.00086, + "loss": 0.7639, + "step": 3440 + }, + { + "epoch": 3.07, + "learning_rate": 0.0008625000000000001, + "loss": 0.7835, + "step": 3450 + }, + { + "epoch": 3.08, + "learning_rate": 0.000865, + "loss": 0.7763, + "step": 3460 + }, + { + "epoch": 3.09, + "learning_rate": 0.0008675000000000001, + "loss": 0.8046, + "step": 3470 + }, + { + "epoch": 3.1, + "learning_rate": 0.00087, + "loss": 0.7509, + "step": 3480 + }, + { + "epoch": 3.11, + "learning_rate": 0.0008725000000000001, + "loss": 0.781, + "step": 3490 + }, + { + "epoch": 3.12, + "learning_rate": 0.000875, + "loss": 0.7989, + "step": 3500 + }, + { + "epoch": 3.13, + "learning_rate": 0.0008774999999999999, + "loss": 0.8082, + "step": 3510 + }, + { + "epoch": 3.14, + "learning_rate": 0.00088, + "loss": 0.7752, + "step": 3520 + }, + { + "epoch": 3.15, + "learning_rate": 0.0008824999999999999, + "loss": 0.7655, + "step": 3530 + }, + { + "epoch": 3.16, + "learning_rate": 0.000885, + "loss": 0.8013, + "step": 3540 + }, + { + "epoch": 3.16, + "learning_rate": 0.0008874999999999999, + "loss": 0.7823, + "step": 3550 + }, + { + "epoch": 3.17, + "learning_rate": 0.0008900000000000001, + "loss": 0.8154, + "step": 3560 + }, + { + "epoch": 3.18, + "learning_rate": 0.0008925, + "loss": 0.7932, + "step": 3570 + }, + { + "epoch": 3.19, + "learning_rate": 0.0008950000000000001, + "loss": 0.7951, + "step": 3580 + }, + { + "epoch": 3.2, + "learning_rate": 0.0008975, + "loss": 0.7676, + "step": 3590 + }, + { + "epoch": 3.21, + "learning_rate": 0.0009000000000000001, + "loss": 0.7851, + "step": 3600 + }, + { + "epoch": 3.21, + "eval_bp": 1.0, + "eval_counts": [ + 28905, + 20753, + 15720, + 12165 + ], + "eval_loss": 0.4306156039237976, + "eval_precisions": [ + 68.80668428193958, + 53.83956830799564, + 44.78887685908029, + 38.38508140855736 + ], + "eval_ref_len": 41379, + "eval_runtime": 368.5897, + "eval_samples_per_second": 9.395, + "eval_score": 50.23613389703178, + "eval_steps_per_second": 9.395, + "eval_sys_len": 42009, + "eval_totals": [ + 42009, + 38546, + 35098, + 31692 + ], + "step": 3600 + }, + { + "epoch": 3.22, + "learning_rate": 0.0009025, + "loss": 0.8031, + "step": 3610 + }, + { + "epoch": 3.23, + "learning_rate": 0.0009050000000000001, + "loss": 0.8191, + "step": 3620 + }, + { + "epoch": 3.24, + "learning_rate": 0.0009075, + "loss": 0.7885, + "step": 3630 + }, + { + "epoch": 3.24, + "learning_rate": 0.00091, + "loss": 0.8008, + "step": 3640 + }, + { + "epoch": 3.25, + "learning_rate": 0.0009125, + "loss": 0.8004, + "step": 3650 + }, + { + "epoch": 3.26, + "learning_rate": 0.000915, + "loss": 0.8011, + "step": 3660 + }, + { + "epoch": 3.27, + "learning_rate": 0.0009175, + "loss": 0.8327, + "step": 3670 + }, + { + "epoch": 3.28, + "learning_rate": 0.00092, + "loss": 0.7965, + "step": 3680 + }, + { + "epoch": 3.29, + "learning_rate": 0.0009225, + "loss": 0.8453, + "step": 3690 + }, + { + "epoch": 3.3, + "learning_rate": 0.000925, + "loss": 0.7944, + "step": 3700 + }, + { + "epoch": 3.31, + "learning_rate": 0.0009275, + "loss": 0.7959, + "step": 3710 + }, + { + "epoch": 3.32, + "learning_rate": 0.00093, + "loss": 0.7691, + "step": 3720 + }, + { + "epoch": 3.32, + "learning_rate": 0.0009325000000000001, + "loss": 0.805, + "step": 3730 + }, + { + "epoch": 3.33, + "learning_rate": 0.0009350000000000001, + "loss": 0.7473, + "step": 3740 + }, + { + "epoch": 3.34, + "learning_rate": 0.0009375, + "loss": 0.7917, + "step": 3750 + }, + { + "epoch": 3.35, + "learning_rate": 0.00094, + "loss": 0.8023, + "step": 3760 + }, + { + "epoch": 3.36, + "learning_rate": 0.0009425, + "loss": 0.7892, + "step": 3770 + }, + { + "epoch": 3.37, + "learning_rate": 0.000945, + "loss": 0.785, + "step": 3780 + }, + { + "epoch": 3.38, + "learning_rate": 0.0009475, + "loss": 0.792, + "step": 3790 + }, + { + "epoch": 3.39, + "learning_rate": 0.00095, + "loss": 0.7561, + "step": 3800 + }, + { + "epoch": 3.39, + "eval_bp": 0.9827426623868254, + "eval_counts": [ + 28866, + 20722, + 15692, + 12126 + ], + "eval_loss": 0.42703622579574585, + "eval_precisions": [ + 70.9744043667478, + 55.692324231348096, + 46.4851734455076, + 39.96177168468231 + ], + "eval_ref_len": 41379, + "eval_runtime": 313.0426, + "eval_samples_per_second": 11.062, + "eval_score": 51.15686029358077, + "eval_steps_per_second": 11.062, + "eval_sys_len": 40671, + "eval_totals": [ + 40671, + 37208, + 33757, + 30344 + ], + "step": 3800 + }, + { + "epoch": 3.4, + "learning_rate": 0.0009525, + "loss": 0.8078, + "step": 3810 + }, + { + "epoch": 3.4, + "learning_rate": 0.000955, + "loss": 0.8172, + "step": 3820 + }, + { + "epoch": 3.41, + "learning_rate": 0.0009575, + "loss": 0.7645, + "step": 3830 + }, + { + "epoch": 3.42, + "learning_rate": 0.00096, + "loss": 0.7616, + "step": 3840 + }, + { + "epoch": 3.43, + "learning_rate": 0.0009625, + "loss": 0.7658, + "step": 3850 + }, + { + "epoch": 3.44, + "learning_rate": 0.000965, + "loss": 0.7459, + "step": 3860 + }, + { + "epoch": 3.45, + "learning_rate": 0.0009675, + "loss": 0.7666, + "step": 3870 + }, + { + "epoch": 3.46, + "learning_rate": 0.0009699999999999999, + "loss": 0.7894, + "step": 3880 + }, + { + "epoch": 3.47, + "learning_rate": 0.0009725000000000001, + "loss": 0.7572, + "step": 3890 + }, + { + "epoch": 3.48, + "learning_rate": 0.000975, + "loss": 0.7827, + "step": 3900 + }, + { + "epoch": 3.48, + "learning_rate": 0.0009775, + "loss": 0.8275, + "step": 3910 + }, + { + "epoch": 3.49, + "learning_rate": 0.00098, + "loss": 0.7848, + "step": 3920 + }, + { + "epoch": 3.5, + "learning_rate": 0.0009825, + "loss": 0.755, + "step": 3930 + }, + { + "epoch": 3.51, + "learning_rate": 0.000985, + "loss": 0.7603, + "step": 3940 + }, + { + "epoch": 3.52, + "learning_rate": 0.0009875, + "loss": 0.7703, + "step": 3950 + }, + { + "epoch": 3.53, + "learning_rate": 0.00099, + "loss": 0.7523, + "step": 3960 + }, + { + "epoch": 3.54, + "learning_rate": 0.0009925000000000001, + "loss": 0.8317, + "step": 3970 + }, + { + "epoch": 3.55, + "learning_rate": 0.000995, + "loss": 0.7548, + "step": 3980 + }, + { + "epoch": 3.56, + "learning_rate": 0.0009975000000000001, + "loss": 0.8015, + "step": 3990 + }, + { + "epoch": 3.57, + "learning_rate": 0.001, + "loss": 0.8206, + "step": 4000 + }, + { + "epoch": 3.57, + "eval_bp": 1.0, + "eval_counts": [ + 27552, + 19510, + 14673, + 11285 + ], + "eval_loss": 0.4267815351486206, + "eval_precisions": [ + 57.07538375489404, + 43.53938852934613, + 35.480594849474066, + 29.74825359166996 + ], + "eval_ref_len": 41379, + "eval_runtime": 541.444, + "eval_samples_per_second": 6.396, + "eval_score": 40.24351285668535, + "eval_steps_per_second": 6.396, + "eval_sys_len": 48273, + "eval_totals": [ + 48273, + 44810, + 41355, + 37935 + ], + "step": 4000 + }, + { + "epoch": 3.57, + "learning_rate": 0.001, + "loss": 0.78, + "step": 4010 + }, + { + "epoch": 3.58, + "learning_rate": 0.001, + "loss": 0.8166, + "step": 4020 + }, + { + "epoch": 3.59, + "learning_rate": 0.001, + "loss": 0.756, + "step": 4030 + }, + { + "epoch": 3.6, + "learning_rate": 0.001, + "loss": 0.777, + "step": 4040 + }, + { + "epoch": 3.61, + "learning_rate": 0.001, + "loss": 0.8195, + "step": 4050 + }, + { + "epoch": 3.62, + "learning_rate": 0.001, + "loss": 0.7597, + "step": 4060 + }, + { + "epoch": 3.63, + "learning_rate": 0.001, + "loss": 0.7881, + "step": 4070 + }, + { + "epoch": 3.64, + "learning_rate": 0.001, + "loss": 0.7737, + "step": 4080 + }, + { + "epoch": 3.65, + "learning_rate": 0.001, + "loss": 0.8094, + "step": 4090 + }, + { + "epoch": 3.65, + "learning_rate": 0.001, + "loss": 0.8164, + "step": 4100 + }, + { + "epoch": 3.66, + "learning_rate": 0.001, + "loss": 0.7642, + "step": 4110 + }, + { + "epoch": 3.67, + "learning_rate": 0.001, + "loss": 0.7943, + "step": 4120 + }, + { + "epoch": 3.68, + "learning_rate": 0.001, + "loss": 0.8135, + "step": 4130 + }, + { + "epoch": 3.69, + "learning_rate": 0.001, + "loss": 0.7656, + "step": 4140 + }, + { + "epoch": 3.7, + "learning_rate": 0.001, + "loss": 0.8099, + "step": 4150 + }, + { + "epoch": 3.71, + "learning_rate": 0.001, + "loss": 0.7723, + "step": 4160 + }, + { + "epoch": 3.72, + "learning_rate": 0.001, + "loss": 0.8023, + "step": 4170 + }, + { + "epoch": 3.73, + "learning_rate": 0.001, + "loss": 0.7847, + "step": 4180 + }, + { + "epoch": 3.73, + "learning_rate": 0.001, + "loss": 0.7681, + "step": 4190 + }, + { + "epoch": 3.74, + "learning_rate": 0.001, + "loss": 0.7777, + "step": 4200 + }, + { + "epoch": 3.74, + "eval_bp": 1.0, + "eval_counts": [ + 29035, + 20912, + 15887, + 12288 + ], + "eval_loss": 0.41903844475746155, + "eval_precisions": [ + 68.37717542330971, + 53.62051282051282, + 44.69545646363764, + 38.24939301500343 + ], + "eval_ref_len": 41379, + "eval_runtime": 387.8999, + "eval_samples_per_second": 8.928, + "eval_score": 50.03599162186965, + "eval_steps_per_second": 8.928, + "eval_sys_len": 42463, + "eval_totals": [ + 42463, + 39000, + 35545, + 32126 + ], + "step": 4200 + }, + { + "epoch": 3.75, + "learning_rate": 0.001, + "loss": 0.7665, + "step": 4210 + }, + { + "epoch": 3.76, + "learning_rate": 0.001, + "loss": 0.8047, + "step": 4220 + }, + { + "epoch": 3.77, + "learning_rate": 0.001, + "loss": 0.781, + "step": 4230 + }, + { + "epoch": 3.78, + "learning_rate": 0.001, + "loss": 0.7649, + "step": 4240 + }, + { + "epoch": 3.79, + "learning_rate": 0.001, + "loss": 0.7847, + "step": 4250 + }, + { + "epoch": 3.8, + "learning_rate": 0.001, + "loss": 0.7721, + "step": 4260 + }, + { + "epoch": 3.81, + "learning_rate": 0.001, + "loss": 0.758, + "step": 4270 + }, + { + "epoch": 3.81, + "learning_rate": 0.001, + "loss": 0.7738, + "step": 4280 + }, + { + "epoch": 3.82, + "learning_rate": 0.001, + "loss": 0.7786, + "step": 4290 + }, + { + "epoch": 3.83, + "learning_rate": 0.001, + "loss": 0.8155, + "step": 4300 + }, + { + "epoch": 3.84, + "learning_rate": 0.001, + "loss": 0.7831, + "step": 4310 + }, + { + "epoch": 3.85, + "learning_rate": 0.001, + "loss": 0.7836, + "step": 4320 + }, + { + "epoch": 3.86, + "learning_rate": 0.001, + "loss": 0.756, + "step": 4330 + }, + { + "epoch": 3.87, + "learning_rate": 0.001, + "loss": 0.7785, + "step": 4340 + }, + { + "epoch": 3.88, + "learning_rate": 0.001, + "loss": 0.8093, + "step": 4350 + }, + { + "epoch": 3.89, + "learning_rate": 0.001, + "loss": 0.7747, + "step": 4360 + }, + { + "epoch": 3.89, + "learning_rate": 0.001, + "loss": 0.78, + "step": 4370 + }, + { + "epoch": 3.9, + "learning_rate": 0.001, + "loss": 0.7726, + "step": 4380 + }, + { + "epoch": 3.91, + "learning_rate": 0.001, + "loss": 0.8045, + "step": 4390 + }, + { + "epoch": 3.92, + "learning_rate": 0.001, + "loss": 0.7241, + "step": 4400 + }, + { + "epoch": 3.92, + "eval_bp": 1.0, + "eval_counts": [ + 28666, + 20580, + 15602, + 12089 + ], + "eval_loss": 0.41805002093315125, + "eval_precisions": [ + 67.87743890888426, + 53.08364930743635, + 44.174523627509274, + 37.8894251864853 + ], + "eval_ref_len": 41379, + "eval_runtime": 376.5087, + "eval_samples_per_second": 9.198, + "eval_score": 49.55574706503557, + "eval_steps_per_second": 9.198, + "eval_sys_len": 42232, + "eval_totals": [ + 42232, + 38769, + 35319, + 31906 + ], + "step": 4400 + }, + { + "epoch": 3.93, + "learning_rate": 0.001, + "loss": 0.7529, + "step": 4410 + }, + { + "epoch": 3.94, + "learning_rate": 0.001, + "loss": 0.7707, + "step": 4420 + }, + { + "epoch": 3.95, + "learning_rate": 0.001, + "loss": 0.7616, + "step": 4430 + }, + { + "epoch": 3.96, + "learning_rate": 0.001, + "loss": 0.7536, + "step": 4440 + }, + { + "epoch": 3.97, + "learning_rate": 0.001, + "loss": 0.7813, + "step": 4450 + }, + { + "epoch": 3.98, + "learning_rate": 0.001, + "loss": 0.8368, + "step": 4460 + }, + { + "epoch": 3.98, + "learning_rate": 0.001, + "loss": 0.8251, + "step": 4470 + }, + { + "epoch": 3.99, + "learning_rate": 0.001, + "loss": 0.7924, + "step": 4480 + }, + { + "epoch": 4.0, + "learning_rate": 0.001, + "loss": 0.7777, + "step": 4490 + }, + { + "epoch": 4.01, + "learning_rate": 0.001, + "loss": 0.7578, + "step": 4500 + }, + { + "epoch": 4.02, + "learning_rate": 0.001, + "loss": 0.675, + "step": 4510 + }, + { + "epoch": 4.03, + "learning_rate": 0.001, + "loss": 0.7593, + "step": 4520 + }, + { + "epoch": 4.04, + "learning_rate": 0.001, + "loss": 0.7106, + "step": 4530 + }, + { + "epoch": 4.05, + "learning_rate": 0.001, + "loss": 0.7564, + "step": 4540 + }, + { + "epoch": 4.06, + "learning_rate": 0.001, + "loss": 0.7051, + "step": 4550 + }, + { + "epoch": 4.06, + "learning_rate": 0.001, + "loss": 0.724, + "step": 4560 + }, + { + "epoch": 4.07, + "learning_rate": 0.001, + "loss": 0.7047, + "step": 4570 + }, + { + "epoch": 4.08, + "learning_rate": 0.001, + "loss": 0.7605, + "step": 4580 + }, + { + "epoch": 4.09, + "learning_rate": 0.001, + "loss": 0.7314, + "step": 4590 + }, + { + "epoch": 4.1, + "learning_rate": 0.001, + "loss": 0.7592, + "step": 4600 + }, + { + "epoch": 4.1, + "eval_bp": 1.0, + "eval_counts": [ + 28474, + 20378, + 15394, + 11847 + ], + "eval_loss": 0.4237304925918579, + "eval_precisions": [ + 56.65791149315505, + 43.54924881926784, + 35.520790068761826, + 29.678340598226363 + ], + "eval_ref_len": 41379, + "eval_runtime": 584.8688, + "eval_samples_per_second": 5.921, + "eval_score": 40.15973749581189, + "eval_steps_per_second": 5.921, + "eval_sys_len": 50256, + "eval_totals": [ + 50256, + 46793, + 43338, + 39918 + ], + "step": 4600 + }, + { + "epoch": 4.11, + "learning_rate": 0.001, + "loss": 0.7484, + "step": 4610 + }, + { + "epoch": 4.12, + "learning_rate": 0.001, + "loss": 0.6936, + "step": 4620 + }, + { + "epoch": 4.13, + "learning_rate": 0.001, + "loss": 0.7071, + "step": 4630 + }, + { + "epoch": 4.14, + "learning_rate": 0.001, + "loss": 0.7151, + "step": 4640 + }, + { + "epoch": 4.14, + "learning_rate": 0.001, + "loss": 0.7612, + "step": 4650 + }, + { + "epoch": 4.15, + "learning_rate": 0.001, + "loss": 0.7635, + "step": 4660 + }, + { + "epoch": 4.16, + "learning_rate": 0.001, + "loss": 0.7501, + "step": 4670 + }, + { + "epoch": 4.17, + "learning_rate": 0.001, + "loss": 0.7595, + "step": 4680 + }, + { + "epoch": 4.18, + "learning_rate": 0.001, + "loss": 0.7207, + "step": 4690 + }, + { + "epoch": 4.19, + "learning_rate": 0.001, + "loss": 0.7126, + "step": 4700 + }, + { + "epoch": 4.2, + "learning_rate": 0.001, + "loss": 0.7357, + "step": 4710 + }, + { + "epoch": 4.21, + "learning_rate": 0.001, + "loss": 0.7577, + "step": 4720 + }, + { + "epoch": 4.22, + "learning_rate": 0.001, + "loss": 0.7425, + "step": 4730 + }, + { + "epoch": 4.22, + "learning_rate": 0.001, + "loss": 0.7373, + "step": 4740 + }, + { + "epoch": 4.23, + "learning_rate": 0.001, + "loss": 0.7281, + "step": 4750 + }, + { + "epoch": 4.24, + "learning_rate": 0.001, + "loss": 0.7504, + "step": 4760 + }, + { + "epoch": 4.25, + "learning_rate": 0.001, + "loss": 0.7454, + "step": 4770 + }, + { + "epoch": 4.26, + "learning_rate": 0.001, + "loss": 0.7439, + "step": 4780 + }, + { + "epoch": 4.27, + "learning_rate": 0.001, + "loss": 0.7349, + "step": 4790 + }, + { + "epoch": 4.28, + "learning_rate": 0.001, + "loss": 0.7379, + "step": 4800 + }, + { + "epoch": 4.28, + "eval_bp": 1.0, + "eval_counts": [ + 27615, + 19598, + 14752, + 11330 + ], + "eval_loss": 0.4184766411781311, + "eval_precisions": [ + 50.95676563393796, + 38.63197319140548, + 31.2072941126695, + 25.83985221337834 + ], + "eval_ref_len": 41379, + "eval_runtime": 631.7986, + "eval_samples_per_second": 5.481, + "eval_score": 35.49553457595366, + "eval_steps_per_second": 5.481, + "eval_sys_len": 54193, + "eval_totals": [ + 54193, + 50730, + 47271, + 43847 + ], + "step": 4800 + }, + { + "epoch": 4.29, + "learning_rate": 0.001, + "loss": 0.7364, + "step": 4810 + }, + { + "epoch": 4.3, + "learning_rate": 0.001, + "loss": 0.7491, + "step": 4820 + }, + { + "epoch": 4.3, + "learning_rate": 0.001, + "loss": 0.7557, + "step": 4830 + }, + { + "epoch": 4.31, + "learning_rate": 0.001, + "loss": 0.7362, + "step": 4840 + }, + { + "epoch": 4.32, + "learning_rate": 0.001, + "loss": 0.7544, + "step": 4850 + }, + { + "epoch": 4.33, + "learning_rate": 0.001, + "loss": 0.7076, + "step": 4860 + }, + { + "epoch": 4.34, + "learning_rate": 0.001, + "loss": 0.7698, + "step": 4870 + }, + { + "epoch": 4.35, + "learning_rate": 0.001, + "loss": 0.7554, + "step": 4880 + }, + { + "epoch": 4.36, + "learning_rate": 0.001, + "loss": 0.7318, + "step": 4890 + }, + { + "epoch": 4.37, + "learning_rate": 0.001, + "loss": 0.7667, + "step": 4900 + }, + { + "epoch": 4.38, + "learning_rate": 0.001, + "loss": 0.7311, + "step": 4910 + }, + { + "epoch": 4.39, + "learning_rate": 0.001, + "loss": 0.7199, + "step": 4920 + }, + { + "epoch": 4.39, + "learning_rate": 0.001, + "loss": 0.7221, + "step": 4930 + }, + { + "epoch": 4.4, + "learning_rate": 0.001, + "loss": 0.744, + "step": 4940 + }, + { + "epoch": 4.41, + "learning_rate": 0.001, + "loss": 0.721, + "step": 4950 + }, + { + "epoch": 4.42, + "learning_rate": 0.001, + "loss": 0.7239, + "step": 4960 + }, + { + "epoch": 4.43, + "learning_rate": 0.001, + "loss": 0.7995, + "step": 4970 + }, + { + "epoch": 4.44, + "learning_rate": 0.001, + "loss": 0.7436, + "step": 4980 + }, + { + "epoch": 4.45, + "learning_rate": 0.001, + "loss": 0.7742, + "step": 4990 + }, + { + "epoch": 4.46, + "learning_rate": 0.001, + "loss": 0.7488, + "step": 5000 + }, + { + "epoch": 4.46, + "eval_bp": 1.0, + "eval_counts": [ + 28036, + 19955, + 14999, + 11506 + ], + "eval_loss": 0.41945788264274597, + "eval_precisions": [ + 58.80773587280279, + 45.1358259256746, + 36.801040312093626, + 30.811664836783333 + ], + "eval_ref_len": 41379, + "eval_runtime": 519.6429, + "eval_samples_per_second": 6.664, + "eval_score": 41.65170448536966, + "eval_steps_per_second": 6.664, + "eval_sys_len": 47674, + "eval_totals": [ + 47674, + 44211, + 40757, + 37343 + ], + "step": 5000 + }, + { + "epoch": 4.47, + "learning_rate": 0.001, + "loss": 0.7725, + "step": 5010 + }, + { + "epoch": 4.47, + "learning_rate": 0.001, + "loss": 0.7873, + "step": 5020 + }, + { + "epoch": 4.48, + "learning_rate": 0.001, + "loss": 0.6723, + "step": 5030 + }, + { + "epoch": 4.49, + "learning_rate": 0.001, + "loss": 0.7106, + "step": 5040 + }, + { + "epoch": 4.5, + "learning_rate": 0.001, + "loss": 0.7208, + "step": 5050 + }, + { + "epoch": 4.51, + "learning_rate": 0.001, + "loss": 0.7665, + "step": 5060 + }, + { + "epoch": 4.52, + "learning_rate": 0.001, + "loss": 0.7777, + "step": 5070 + }, + { + "epoch": 4.53, + "learning_rate": 0.001, + "loss": 0.7249, + "step": 5080 + }, + { + "epoch": 4.54, + "learning_rate": 0.001, + "loss": 0.7591, + "step": 5090 + }, + { + "epoch": 4.55, + "learning_rate": 0.001, + "loss": 0.7441, + "step": 5100 + }, + { + "epoch": 4.55, + "learning_rate": 0.001, + "loss": 0.7468, + "step": 5110 + }, + { + "epoch": 4.56, + "learning_rate": 0.001, + "loss": 0.7277, + "step": 5120 + }, + { + "epoch": 4.57, + "learning_rate": 0.001, + "loss": 0.7283, + "step": 5130 + }, + { + "epoch": 4.58, + "learning_rate": 0.001, + "loss": 0.7538, + "step": 5140 + }, + { + "epoch": 4.59, + "learning_rate": 0.001, + "loss": 0.7294, + "step": 5150 + }, + { + "epoch": 4.6, + "learning_rate": 0.001, + "loss": 0.7511, + "step": 5160 + }, + { + "epoch": 4.61, + "learning_rate": 0.001, + "loss": 0.7919, + "step": 5170 + }, + { + "epoch": 4.62, + "learning_rate": 0.001, + "loss": 0.7084, + "step": 5180 + }, + { + "epoch": 4.63, + "learning_rate": 0.001, + "loss": 0.7445, + "step": 5190 + }, + { + "epoch": 4.63, + "learning_rate": 0.001, + "loss": 0.7782, + "step": 5200 + }, + { + "epoch": 4.63, + "eval_bp": 1.0, + "eval_counts": [ + 28268, + 20306, + 15368, + 11874 + ], + "eval_loss": 0.4119018316268921, + "eval_precisions": [ + 65.29311220954405, + 50.98039215686274, + 42.248797250859106, + 36.02876475407349 + ], + "eval_ref_len": 41379, + "eval_runtime": 400.7856, + "eval_samples_per_second": 8.641, + "eval_score": 47.44424785372734, + "eval_steps_per_second": 8.641, + "eval_sys_len": 43294, + "eval_totals": [ + 43294, + 39831, + 36375, + 32957 + ], + "step": 5200 + }, + { + "epoch": 4.64, + "learning_rate": 0.001, + "loss": 0.7814, + "step": 5210 + }, + { + "epoch": 4.65, + "learning_rate": 0.001, + "loss": 0.763, + "step": 5220 + }, + { + "epoch": 4.66, + "learning_rate": 0.001, + "loss": 0.7003, + "step": 5230 + }, + { + "epoch": 4.67, + "learning_rate": 0.001, + "loss": 0.7538, + "step": 5240 + }, + { + "epoch": 4.68, + "learning_rate": 0.001, + "loss": 0.6957, + "step": 5250 + }, + { + "epoch": 4.69, + "learning_rate": 0.001, + "loss": 0.7334, + "step": 5260 + }, + { + "epoch": 4.7, + "learning_rate": 0.001, + "loss": 0.7609, + "step": 5270 + }, + { + "epoch": 4.71, + "learning_rate": 0.001, + "loss": 0.7183, + "step": 5280 + }, + { + "epoch": 4.71, + "learning_rate": 0.001, + "loss": 0.7249, + "step": 5290 + }, + { + "epoch": 4.72, + "learning_rate": 0.001, + "loss": 0.7514, + "step": 5300 + }, + { + "epoch": 4.73, + "learning_rate": 0.001, + "loss": 0.7246, + "step": 5310 + }, + { + "epoch": 4.74, + "learning_rate": 0.001, + "loss": 0.7183, + "step": 5320 + }, + { + "epoch": 4.75, + "learning_rate": 0.001, + "loss": 0.7347, + "step": 5330 + }, + { + "epoch": 4.76, + "learning_rate": 0.001, + "loss": 0.7779, + "step": 5340 + }, + { + "epoch": 4.77, + "learning_rate": 0.001, + "loss": 0.7029, + "step": 5350 + }, + { + "epoch": 4.78, + "learning_rate": 0.001, + "loss": 0.7365, + "step": 5360 + }, + { + "epoch": 4.79, + "learning_rate": 0.001, + "loss": 0.7072, + "step": 5370 + }, + { + "epoch": 4.8, + "learning_rate": 0.001, + "loss": 0.7292, + "step": 5380 + }, + { + "epoch": 4.8, + "learning_rate": 0.001, + "loss": 0.7763, + "step": 5390 + }, + { + "epoch": 4.81, + "learning_rate": 0.001, + "loss": 0.7358, + "step": 5400 + }, + { + "epoch": 4.81, + "eval_bp": 1.0, + "eval_counts": [ + 28311, + 20157, + 15171, + 11654 + ], + "eval_loss": 0.4105658531188965, + "eval_precisions": [ + 61.92528107091299, + 47.703230386936454, + 39.0985000773156, + 32.9339286723563 + ], + "eval_ref_len": 41379, + "eval_runtime": 468.4336, + "eval_samples_per_second": 7.393, + "eval_score": 44.16262233243579, + "eval_steps_per_second": 7.393, + "eval_sys_len": 45718, + "eval_totals": [ + 45718, + 42255, + 38802, + 35386 + ], + "step": 5400 + }, + { + "epoch": 4.82, + "learning_rate": 0.001, + "loss": 0.7607, + "step": 5410 + }, + { + "epoch": 4.83, + "learning_rate": 0.001, + "loss": 0.7039, + "step": 5420 + }, + { + "epoch": 4.84, + "learning_rate": 0.001, + "loss": 0.7188, + "step": 5430 + }, + { + "epoch": 4.85, + "learning_rate": 0.001, + "loss": 0.7493, + "step": 5440 + }, + { + "epoch": 4.86, + "learning_rate": 0.001, + "loss": 0.741, + "step": 5450 + }, + { + "epoch": 4.87, + "learning_rate": 0.001, + "loss": 0.7424, + "step": 5460 + }, + { + "epoch": 4.88, + "learning_rate": 0.001, + "loss": 0.7076, + "step": 5470 + }, + { + "epoch": 4.88, + "learning_rate": 0.001, + "loss": 0.7188, + "step": 5480 + }, + { + "epoch": 4.89, + "learning_rate": 0.001, + "loss": 0.7182, + "step": 5490 + }, + { + "epoch": 4.9, + "learning_rate": 0.001, + "loss": 0.7285, + "step": 5500 + }, + { + "epoch": 4.91, + "learning_rate": 0.001, + "loss": 0.7231, + "step": 5510 + }, + { + "epoch": 4.92, + "learning_rate": 0.001, + "loss": 0.718, + "step": 5520 + }, + { + "epoch": 4.93, + "learning_rate": 0.001, + "loss": 0.7232, + "step": 5530 + }, + { + "epoch": 4.94, + "learning_rate": 0.001, + "loss": 0.7678, + "step": 5540 + }, + { + "epoch": 4.95, + "learning_rate": 0.001, + "loss": 0.7502, + "step": 5550 + }, + { + "epoch": 4.96, + "learning_rate": 0.001, + "loss": 0.742, + "step": 5560 + }, + { + "epoch": 4.96, + "learning_rate": 0.001, + "loss": 0.7435, + "step": 5570 + }, + { + "epoch": 4.97, + "learning_rate": 0.001, + "loss": 0.7489, + "step": 5580 + }, + { + "epoch": 4.98, + "learning_rate": 0.001, + "loss": 0.7244, + "step": 5590 + }, + { + "epoch": 4.99, + "learning_rate": 0.001, + "loss": 0.7587, + "step": 5600 + }, + { + "epoch": 4.99, + "eval_bp": 1.0, + "eval_counts": [ + 28615, + 20649, + 15678, + 12143 + ], + "eval_loss": 0.4097524583339691, + "eval_precisions": [ + 62.32711114983337, + 48.64540143234075, + 40.209279064399475, + 34.13448023837634 + ], + "eval_ref_len": 41379, + "eval_runtime": 437.6071, + "eval_samples_per_second": 7.913, + "eval_score": 45.16578809030254, + "eval_steps_per_second": 7.913, + "eval_sys_len": 45911, + "eval_totals": [ + 45911, + 42448, + 38991, + 35574 + ], + "step": 5600 + }, + { + "epoch": 5.0, + "learning_rate": 0.001, + "loss": 0.7357, + "step": 5610 + }, + { + "epoch": 5.01, + "learning_rate": 0.001, + "loss": 0.6867, + "step": 5620 + }, + { + "epoch": 5.02, + "learning_rate": 0.001, + "loss": 0.6787, + "step": 5630 + }, + { + "epoch": 5.03, + "learning_rate": 0.001, + "loss": 0.7284, + "step": 5640 + }, + { + "epoch": 5.04, + "learning_rate": 0.001, + "loss": 0.6993, + "step": 5650 + }, + { + "epoch": 5.04, + "learning_rate": 0.001, + "loss": 0.7037, + "step": 5660 + }, + { + "epoch": 5.05, + "learning_rate": 0.001, + "loss": 0.6966, + "step": 5670 + }, + { + "epoch": 5.06, + "learning_rate": 0.001, + "loss": 0.71, + "step": 5680 + }, + { + "epoch": 5.07, + "learning_rate": 0.001, + "loss": 0.6992, + "step": 5690 + }, + { + "epoch": 5.08, + "learning_rate": 0.001, + "loss": 0.6615, + "step": 5700 + }, + { + "epoch": 5.09, + "learning_rate": 0.001, + "loss": 0.6818, + "step": 5710 + }, + { + "epoch": 5.1, + "learning_rate": 0.001, + "loss": 0.6957, + "step": 5720 + }, + { + "epoch": 5.11, + "learning_rate": 0.001, + "loss": 0.6872, + "step": 5730 + }, + { + "epoch": 5.12, + "learning_rate": 0.001, + "loss": 0.6464, + "step": 5740 + }, + { + "epoch": 5.12, + "learning_rate": 0.001, + "loss": 0.6667, + "step": 5750 + }, + { + "epoch": 5.13, + "learning_rate": 0.001, + "loss": 0.7373, + "step": 5760 + }, + { + "epoch": 5.14, + "learning_rate": 0.001, + "loss": 0.692, + "step": 5770 + }, + { + "epoch": 5.15, + "learning_rate": 0.001, + "loss": 0.7055, + "step": 5780 + }, + { + "epoch": 5.16, + "learning_rate": 0.001, + "loss": 0.7528, + "step": 5790 + }, + { + "epoch": 5.17, + "learning_rate": 0.001, + "loss": 0.6792, + "step": 5800 + }, + { + "epoch": 5.17, + "eval_bp": 1.0, + "eval_counts": [ + 27651, + 19813, + 15002, + 11593 + ], + "eval_loss": 0.4170142412185669, + "eval_precisions": [ + 62.127305817062485, + 48.2725855179807, + 39.915921668795235, + 33.93735362997658 + ], + "eval_ref_len": 41379, + "eval_runtime": 414.7853, + "eval_samples_per_second": 8.349, + "eval_score": 44.89539574981961, + "eval_steps_per_second": 8.349, + "eval_sys_len": 44507, + "eval_totals": [ + 44507, + 41044, + 37584, + 34160 + ], + "step": 5800 + }, + { + "epoch": 5.18, + "learning_rate": 0.001, + "loss": 0.715, + "step": 5810 + }, + { + "epoch": 5.19, + "learning_rate": 0.001, + "loss": 0.6674, + "step": 5820 + }, + { + "epoch": 5.2, + "learning_rate": 0.001, + "loss": 0.6976, + "step": 5830 + }, + { + "epoch": 5.2, + "learning_rate": 0.001, + "loss": 0.6825, + "step": 5840 + }, + { + "epoch": 5.21, + "learning_rate": 0.001, + "loss": 0.6884, + "step": 5850 + }, + { + "epoch": 5.22, + "learning_rate": 0.001, + "loss": 0.6909, + "step": 5860 + }, + { + "epoch": 5.23, + "learning_rate": 0.001, + "loss": 0.6839, + "step": 5870 + }, + { + "epoch": 5.24, + "learning_rate": 0.001, + "loss": 0.7047, + "step": 5880 + }, + { + "epoch": 5.25, + "learning_rate": 0.001, + "loss": 0.7109, + "step": 5890 + }, + { + "epoch": 5.26, + "learning_rate": 0.001, + "loss": 0.7006, + "step": 5900 + }, + { + "epoch": 5.27, + "learning_rate": 0.001, + "loss": 0.7107, + "step": 5910 + }, + { + "epoch": 5.28, + "learning_rate": 0.001, + "loss": 0.6975, + "step": 5920 + }, + { + "epoch": 5.29, + "learning_rate": 0.001, + "loss": 0.7228, + "step": 5930 + }, + { + "epoch": 5.29, + "learning_rate": 0.001, + "loss": 0.6847, + "step": 5940 + }, + { + "epoch": 5.3, + "learning_rate": 0.001, + "loss": 0.693, + "step": 5950 + }, + { + "epoch": 5.31, + "learning_rate": 0.001, + "loss": 0.6744, + "step": 5960 + }, + { + "epoch": 5.32, + "learning_rate": 0.001, + "loss": 0.7602, + "step": 5970 + }, + { + "epoch": 5.33, + "learning_rate": 0.001, + "loss": 0.6582, + "step": 5980 + }, + { + "epoch": 5.34, + "learning_rate": 0.001, + "loss": 0.6723, + "step": 5990 + }, + { + "epoch": 5.35, + "learning_rate": 0.001, + "loss": 0.6927, + "step": 6000 + }, + { + "epoch": 5.35, + "eval_bp": 1.0, + "eval_counts": [ + 28469, + 20488, + 15559, + 12021 + ], + "eval_loss": 0.4135335087776184, + "eval_precisions": [ + 65.44296813939589, + 51.170109143585, + 42.533008939066725, + 36.24713544807623 + ], + "eval_ref_len": 41379, + "eval_runtime": 396.1878, + "eval_samples_per_second": 8.741, + "eval_score": 47.667215417697754, + "eval_steps_per_second": 8.741, + "eval_sys_len": 43502, + "eval_totals": [ + 43502, + 40039, + 36581, + 33164 + ], + "step": 6000 + }, + { + "epoch": 5.36, + "learning_rate": 0.001, + "loss": 0.7059, + "step": 6010 + }, + { + "epoch": 5.37, + "learning_rate": 0.001, + "loss": 0.6644, + "step": 6020 + }, + { + "epoch": 5.37, + "learning_rate": 0.001, + "loss": 0.6709, + "step": 6030 + }, + { + "epoch": 5.38, + "learning_rate": 0.001, + "loss": 0.6865, + "step": 6040 + }, + { + "epoch": 5.39, + "learning_rate": 0.001, + "loss": 0.6894, + "step": 6050 + }, + { + "epoch": 5.4, + "learning_rate": 0.001, + "loss": 0.6821, + "step": 6060 + }, + { + "epoch": 5.41, + "learning_rate": 0.001, + "loss": 0.7082, + "step": 6070 + }, + { + "epoch": 5.42, + "learning_rate": 0.001, + "loss": 0.694, + "step": 6080 + }, + { + "epoch": 5.43, + "learning_rate": 0.001, + "loss": 0.7056, + "step": 6090 + }, + { + "epoch": 5.44, + "learning_rate": 0.001, + "loss": 0.7194, + "step": 6100 + }, + { + "epoch": 5.45, + "learning_rate": 0.001, + "loss": 0.6988, + "step": 6110 + }, + { + "epoch": 5.45, + "learning_rate": 0.001, + "loss": 0.7332, + "step": 6120 + }, + { + "epoch": 5.46, + "learning_rate": 0.001, + "loss": 0.6843, + "step": 6130 + }, + { + "epoch": 5.47, + "learning_rate": 0.001, + "loss": 0.7271, + "step": 6140 + }, + { + "epoch": 5.48, + "learning_rate": 0.001, + "loss": 0.7122, + "step": 6150 + }, + { + "epoch": 5.49, + "learning_rate": 0.001, + "loss": 0.7164, + "step": 6160 + }, + { + "epoch": 5.5, + "learning_rate": 0.001, + "loss": 0.7547, + "step": 6170 + }, + { + "epoch": 5.51, + "learning_rate": 0.001, + "loss": 0.6799, + "step": 6180 + }, + { + "epoch": 5.52, + "learning_rate": 0.001, + "loss": 0.7032, + "step": 6190 + }, + { + "epoch": 5.53, + "learning_rate": 0.001, + "loss": 0.6984, + "step": 6200 + }, + { + "epoch": 5.53, + "eval_bp": 1.0, + "eval_counts": [ + 28063, + 20180, + 15363, + 11911 + ], + "eval_loss": 0.4087334871292114, + "eval_precisions": [ + 59.575416622439235, + 46.239860684661565, + 38.22402468152866, + 32.38620914677253 + ], + "eval_ref_len": 41379, + "eval_runtime": 443.9736, + "eval_samples_per_second": 7.8, + "eval_score": 42.97293668517703, + "eval_steps_per_second": 7.8, + "eval_sys_len": 47105, + "eval_totals": [ + 47105, + 43642, + 40192, + 36778 + ], + "step": 6200 + }, + { + "epoch": 5.53, + "learning_rate": 0.001, + "loss": 0.7198, + "step": 6210 + }, + { + "epoch": 5.54, + "learning_rate": 0.001, + "loss": 0.6764, + "step": 6220 + }, + { + "epoch": 5.55, + "learning_rate": 0.001, + "loss": 0.7239, + "step": 6230 + }, + { + "epoch": 5.56, + "learning_rate": 0.001, + "loss": 0.6859, + "step": 6240 + }, + { + "epoch": 5.57, + "learning_rate": 0.001, + "loss": 0.7203, + "step": 6250 + }, + { + "epoch": 5.58, + "learning_rate": 0.001, + "loss": 0.7088, + "step": 6260 + }, + { + "epoch": 5.59, + "learning_rate": 0.001, + "loss": 0.689, + "step": 6270 + }, + { + "epoch": 5.6, + "learning_rate": 0.001, + "loss": 0.6946, + "step": 6280 + }, + { + "epoch": 5.61, + "learning_rate": 0.001, + "loss": 0.6679, + "step": 6290 + }, + { + "epoch": 5.61, + "learning_rate": 0.001, + "loss": 0.7237, + "step": 6300 + }, + { + "epoch": 5.62, + "learning_rate": 0.001, + "loss": 0.7139, + "step": 6310 + }, + { + "epoch": 5.63, + "learning_rate": 0.001, + "loss": 0.7018, + "step": 6320 + }, + { + "epoch": 5.64, + "learning_rate": 0.001, + "loss": 0.7296, + "step": 6330 + }, + { + "epoch": 5.65, + "learning_rate": 0.001, + "loss": 0.6959, + "step": 6340 + }, + { + "epoch": 5.66, + "learning_rate": 0.001, + "loss": 0.7384, + "step": 6350 + }, + { + "epoch": 5.67, + "learning_rate": 0.001, + "loss": 0.7346, + "step": 6360 + }, + { + "epoch": 5.68, + "learning_rate": 0.001, + "loss": 0.7235, + "step": 6370 + }, + { + "epoch": 5.69, + "learning_rate": 0.001, + "loss": 0.7139, + "step": 6380 + }, + { + "epoch": 5.7, + "learning_rate": 0.001, + "loss": 0.7185, + "step": 6390 + }, + { + "epoch": 5.7, + "learning_rate": 0.001, + "loss": 0.7465, + "step": 6400 + }, + { + "epoch": 5.7, + "eval_bp": 1.0, + "eval_counts": [ + 28601, + 20618, + 15662, + 12099 + ], + "eval_loss": 0.4099283218383789, + "eval_precisions": [ + 58.90676168310918, + 45.726325127522735, + 37.62009992313605, + 31.665314454709623 + ], + "eval_ref_len": 41379, + "eval_runtime": 502.1342, + "eval_samples_per_second": 6.897, + "eval_score": 42.32371679075985, + "eval_steps_per_second": 6.897, + "eval_sys_len": 48553, + "eval_totals": [ + 48553, + 45090, + 41632, + 38209 + ], + "step": 6400 + }, + { + "epoch": 5.71, + "learning_rate": 0.001, + "loss": 0.7045, + "step": 6410 + }, + { + "epoch": 5.72, + "learning_rate": 0.001, + "loss": 0.664, + "step": 6420 + }, + { + "epoch": 5.73, + "learning_rate": 0.001, + "loss": 0.7009, + "step": 6430 + }, + { + "epoch": 5.74, + "learning_rate": 0.001, + "loss": 0.7025, + "step": 6440 + }, + { + "epoch": 5.75, + "learning_rate": 0.001, + "loss": 0.7353, + "step": 6450 + }, + { + "epoch": 5.76, + "learning_rate": 0.001, + "loss": 0.6896, + "step": 6460 + }, + { + "epoch": 5.77, + "learning_rate": 0.001, + "loss": 0.6899, + "step": 6470 + }, + { + "epoch": 5.78, + "learning_rate": 0.001, + "loss": 0.6815, + "step": 6480 + }, + { + "epoch": 5.78, + "learning_rate": 0.001, + "loss": 0.704, + "step": 6490 + }, + { + "epoch": 5.79, + "learning_rate": 0.001, + "loss": 0.7097, + "step": 6500 + }, + { + "epoch": 5.8, + "learning_rate": 0.001, + "loss": 0.6945, + "step": 6510 + }, + { + "epoch": 5.81, + "learning_rate": 0.001, + "loss": 0.7193, + "step": 6520 + }, + { + "epoch": 5.82, + "learning_rate": 0.001, + "loss": 0.7148, + "step": 6530 + }, + { + "epoch": 5.83, + "learning_rate": 0.001, + "loss": 0.7248, + "step": 6540 + }, + { + "epoch": 5.84, + "learning_rate": 0.001, + "loss": 0.6939, + "step": 6550 + }, + { + "epoch": 5.85, + "learning_rate": 0.001, + "loss": 0.6806, + "step": 6560 + }, + { + "epoch": 5.86, + "learning_rate": 0.001, + "loss": 0.692, + "step": 6570 + }, + { + "epoch": 5.86, + "learning_rate": 0.001, + "loss": 0.7103, + "step": 6580 + }, + { + "epoch": 5.87, + "learning_rate": 0.001, + "loss": 0.7486, + "step": 6590 + }, + { + "epoch": 5.88, + "learning_rate": 0.001, + "loss": 0.726, + "step": 6600 + }, + { + "epoch": 5.88, + "eval_bp": 1.0, + "eval_counts": [ + 28944, + 21003, + 16051, + 12511 + ], + "eval_loss": 0.4023992419242859, + "eval_precisions": [ + 65.1891891891892, + 51.30566480201285, + 42.819794584500464, + 36.71930030523597 + ], + "eval_ref_len": 41379, + "eval_runtime": 409.3565, + "eval_samples_per_second": 8.46, + "eval_score": 47.88725784349983, + "eval_steps_per_second": 8.46, + "eval_sys_len": 44400, + "eval_totals": [ + 44400, + 40937, + 37485, + 34072 + ], + "step": 6600 + }, + { + "epoch": 5.89, + "learning_rate": 0.001, + "loss": 0.7087, + "step": 6610 + }, + { + "epoch": 5.9, + "learning_rate": 0.001, + "loss": 0.7224, + "step": 6620 + }, + { + "epoch": 5.91, + "learning_rate": 0.001, + "loss": 0.676, + "step": 6630 + }, + { + "epoch": 5.92, + "learning_rate": 0.001, + "loss": 0.6951, + "step": 6640 + }, + { + "epoch": 5.93, + "learning_rate": 0.001, + "loss": 0.7293, + "step": 6650 + }, + { + "epoch": 5.94, + "learning_rate": 0.001, + "loss": 0.734, + "step": 6660 + }, + { + "epoch": 5.94, + "learning_rate": 0.001, + "loss": 0.6968, + "step": 6670 + }, + { + "epoch": 5.95, + "learning_rate": 0.001, + "loss": 0.6946, + "step": 6680 + }, + { + "epoch": 5.96, + "learning_rate": 0.001, + "loss": 0.6691, + "step": 6690 + }, + { + "epoch": 5.97, + "learning_rate": 0.001, + "loss": 0.7026, + "step": 6700 + }, + { + "epoch": 5.98, + "learning_rate": 0.001, + "loss": 0.7264, + "step": 6710 + }, + { + "epoch": 5.99, + "learning_rate": 0.001, + "loss": 0.7092, + "step": 6720 + }, + { + "epoch": 6.0, + "learning_rate": 0.001, + "loss": 0.635, + "step": 6730 + }, + { + "epoch": 6.01, + "learning_rate": 0.001, + "loss": 0.6688, + "step": 6740 + }, + { + "epoch": 6.02, + "learning_rate": 0.001, + "loss": 0.6522, + "step": 6750 + }, + { + "epoch": 6.02, + "learning_rate": 0.001, + "loss": 0.6694, + "step": 6760 + }, + { + "epoch": 6.03, + "learning_rate": 0.001, + "loss": 0.6707, + "step": 6770 + }, + { + "epoch": 6.04, + "learning_rate": 0.001, + "loss": 0.6127, + "step": 6780 + }, + { + "epoch": 6.05, + "learning_rate": 0.001, + "loss": 0.6742, + "step": 6790 + }, + { + "epoch": 6.06, + "learning_rate": 0.001, + "loss": 0.6481, + "step": 6800 + }, + { + "epoch": 6.06, + "eval_bp": 1.0, + "eval_counts": [ + 28255, + 20328, + 15387, + 11879 + ], + "eval_loss": 0.407896488904953, + "eval_precisions": [ + 61.8677468797898, + 48.16262705238468, + 39.704288589564946, + 33.61251803853881 + ], + "eval_ref_len": 41379, + "eval_runtime": 451.1476, + "eval_samples_per_second": 7.676, + "eval_score": 44.655836980011564, + "eval_steps_per_second": 7.676, + "eval_sys_len": 45670, + "eval_totals": [ + 45670, + 42207, + 38754, + 35341 + ], + "step": 6800 + }, + { + "epoch": 6.07, + "learning_rate": 0.001, + "loss": 0.6797, + "step": 6810 + }, + { + "epoch": 6.08, + "learning_rate": 0.001, + "loss": 0.6612, + "step": 6820 + }, + { + "epoch": 6.09, + "learning_rate": 0.001, + "loss": 0.6498, + "step": 6830 + }, + { + "epoch": 6.1, + "learning_rate": 0.001, + "loss": 0.6577, + "step": 6840 + }, + { + "epoch": 6.11, + "learning_rate": 0.001, + "loss": 0.6477, + "step": 6850 + }, + { + "epoch": 6.11, + "learning_rate": 0.001, + "loss": 0.6514, + "step": 6860 + }, + { + "epoch": 6.12, + "learning_rate": 0.001, + "loss": 0.6663, + "step": 6870 + }, + { + "epoch": 6.13, + "learning_rate": 0.001, + "loss": 0.6274, + "step": 6880 + }, + { + "epoch": 6.14, + "learning_rate": 0.001, + "loss": 0.6409, + "step": 6890 + }, + { + "epoch": 6.15, + "learning_rate": 0.001, + "loss": 0.644, + "step": 6900 + }, + { + "epoch": 6.16, + "learning_rate": 0.001, + "loss": 0.6545, + "step": 6910 + }, + { + "epoch": 6.17, + "learning_rate": 0.001, + "loss": 0.679, + "step": 6920 + }, + { + "epoch": 6.18, + "learning_rate": 0.001, + "loss": 0.6643, + "step": 6930 + }, + { + "epoch": 6.19, + "learning_rate": 0.001, + "loss": 0.6624, + "step": 6940 + }, + { + "epoch": 6.19, + "learning_rate": 0.001, + "loss": 0.6418, + "step": 6950 + }, + { + "epoch": 6.2, + "learning_rate": 0.001, + "loss": 0.6474, + "step": 6960 + }, + { + "epoch": 6.21, + "learning_rate": 0.001, + "loss": 0.6754, + "step": 6970 + }, + { + "epoch": 6.22, + "learning_rate": 0.001, + "loss": 0.687, + "step": 6980 + }, + { + "epoch": 6.23, + "learning_rate": 0.001, + "loss": 0.6862, + "step": 6990 + }, + { + "epoch": 6.24, + "learning_rate": 0.001, + "loss": 0.6861, + "step": 7000 + }, + { + "epoch": 6.24, + "eval_bp": 1.0, + "eval_counts": [ + 28664, + 20760, + 15826, + 12278 + ], + "eval_loss": 0.41178786754608154, + "eval_precisions": [ + 58.025466102552684, + 45.19331243469175, + 37.256055933520095, + 31.434496530889167 + ], + "eval_ref_len": 41379, + "eval_runtime": 530.1653, + "eval_samples_per_second": 6.532, + "eval_score": 41.862392893483864, + "eval_steps_per_second": 6.532, + "eval_sys_len": 49399, + "eval_totals": [ + 49399, + 45936, + 42479, + 39059 + ], + "step": 7000 + }, + { + "epoch": 6.25, + "learning_rate": 0.001, + "loss": 0.6054, + "step": 7010 + }, + { + "epoch": 6.26, + "learning_rate": 0.001, + "loss": 0.6685, + "step": 7020 + }, + { + "epoch": 6.27, + "learning_rate": 0.001, + "loss": 0.6526, + "step": 7030 + }, + { + "epoch": 6.27, + "learning_rate": 0.001, + "loss": 0.6443, + "step": 7040 + }, + { + "epoch": 6.28, + "learning_rate": 0.001, + "loss": 0.6378, + "step": 7050 + }, + { + "epoch": 6.29, + "learning_rate": 0.001, + "loss": 0.6647, + "step": 7060 + }, + { + "epoch": 6.3, + "learning_rate": 0.001, + "loss": 0.644, + "step": 7070 + }, + { + "epoch": 6.31, + "learning_rate": 0.001, + "loss": 0.7003, + "step": 7080 + }, + { + "epoch": 6.32, + "learning_rate": 0.001, + "loss": 0.6819, + "step": 7090 + }, + { + "epoch": 6.33, + "learning_rate": 0.001, + "loss": 0.6532, + "step": 7100 + }, + { + "epoch": 6.34, + "learning_rate": 0.001, + "loss": 0.6615, + "step": 7110 + }, + { + "epoch": 6.35, + "learning_rate": 0.001, + "loss": 0.6749, + "step": 7120 + }, + { + "epoch": 6.35, + "learning_rate": 0.001, + "loss": 0.6733, + "step": 7130 + }, + { + "epoch": 6.36, + "learning_rate": 0.001, + "loss": 0.6569, + "step": 7140 + }, + { + "epoch": 6.37, + "learning_rate": 0.001, + "loss": 0.6881, + "step": 7150 + }, + { + "epoch": 6.38, + "learning_rate": 0.001, + "loss": 0.6938, + "step": 7160 + }, + { + "epoch": 6.39, + "learning_rate": 0.001, + "loss": 0.6505, + "step": 7170 + }, + { + "epoch": 6.4, + "learning_rate": 0.001, + "loss": 0.6582, + "step": 7180 + }, + { + "epoch": 6.41, + "learning_rate": 0.001, + "loss": 0.6855, + "step": 7190 + }, + { + "epoch": 6.42, + "learning_rate": 0.001, + "loss": 0.6636, + "step": 7200 + }, + { + "epoch": 6.42, + "eval_bp": 1.0, + "eval_counts": [ + 28586, + 20643, + 15705, + 12185 + ], + "eval_loss": 0.4073503613471985, + "eval_precisions": [ + 63.47789398885262, + 49.658407505412555, + 41.19884575026233, + 35.1071799008874 + ], + "eval_ref_len": 41379, + "eval_runtime": 433.3648, + "eval_samples_per_second": 7.991, + "eval_score": 46.20873428545001, + "eval_steps_per_second": 7.991, + "eval_sys_len": 45033, + "eval_totals": [ + 45033, + 41570, + 38120, + 34708 + ], + "step": 7200 + }, + { + "epoch": 6.43, + "learning_rate": 0.001, + "loss": 0.6498, + "step": 7210 + }, + { + "epoch": 6.43, + "learning_rate": 0.001, + "loss": 0.6536, + "step": 7220 + }, + { + "epoch": 6.44, + "learning_rate": 0.001, + "loss": 0.6747, + "step": 7230 + }, + { + "epoch": 6.45, + "learning_rate": 0.001, + "loss": 0.6563, + "step": 7240 + }, + { + "epoch": 6.46, + "learning_rate": 0.001, + "loss": 0.6853, + "step": 7250 + }, + { + "epoch": 6.47, + "learning_rate": 0.001, + "loss": 0.6742, + "step": 7260 + }, + { + "epoch": 6.48, + "learning_rate": 0.001, + "loss": 0.666, + "step": 7270 + }, + { + "epoch": 6.49, + "learning_rate": 0.001, + "loss": 0.6618, + "step": 7280 + }, + { + "epoch": 6.5, + "learning_rate": 0.001, + "loss": 0.6849, + "step": 7290 + }, + { + "epoch": 6.51, + "learning_rate": 0.001, + "loss": 0.6696, + "step": 7300 + }, + { + "epoch": 6.52, + "learning_rate": 0.001, + "loss": 0.6524, + "step": 7310 + }, + { + "epoch": 6.52, + "learning_rate": 0.001, + "loss": 0.683, + "step": 7320 + }, + { + "epoch": 6.53, + "learning_rate": 0.001, + "loss": 0.679, + "step": 7330 + }, + { + "epoch": 6.54, + "learning_rate": 0.001, + "loss": 0.6418, + "step": 7340 + }, + { + "epoch": 6.55, + "learning_rate": 0.001, + "loss": 0.6403, + "step": 7350 + }, + { + "epoch": 6.56, + "learning_rate": 0.001, + "loss": 0.681, + "step": 7360 + }, + { + "epoch": 6.57, + "learning_rate": 0.001, + "loss": 0.6369, + "step": 7370 + }, + { + "epoch": 6.58, + "learning_rate": 0.001, + "loss": 0.6728, + "step": 7380 + }, + { + "epoch": 6.59, + "learning_rate": 0.001, + "loss": 0.6444, + "step": 7390 + }, + { + "epoch": 6.6, + "learning_rate": 0.001, + "loss": 0.6733, + "step": 7400 + }, + { + "epoch": 6.6, + "eval_bp": 1.0, + "eval_counts": [ + 29063, + 21171, + 16212, + 12650 + ], + "eval_loss": 0.40429091453552246, + "eval_precisions": [ + 56.77476069544833, + 44.358539191652525, + 36.620736390332056, + 30.966952264381884 + ], + "eval_ref_len": 41379, + "eval_runtime": 459.9316, + "eval_samples_per_second": 7.529, + "eval_score": 41.109246958331305, + "eval_steps_per_second": 7.529, + "eval_sys_len": 51190, + "eval_totals": [ + 51190, + 47727, + 44270, + 40850 + ], + "step": 7400 + }, + { + "epoch": 6.6, + "learning_rate": 0.001, + "loss": 0.6469, + "step": 7410 + }, + { + "epoch": 6.61, + "learning_rate": 0.001, + "loss": 0.6875, + "step": 7420 + }, + { + "epoch": 6.62, + "learning_rate": 0.001, + "loss": 0.6756, + "step": 7430 + }, + { + "epoch": 6.63, + "learning_rate": 0.001, + "loss": 0.688, + "step": 7440 + }, + { + "epoch": 6.64, + "learning_rate": 0.001, + "loss": 0.6725, + "step": 7450 + }, + { + "epoch": 6.65, + "learning_rate": 0.001, + "loss": 0.6597, + "step": 7460 + }, + { + "epoch": 6.66, + "learning_rate": 0.001, + "loss": 0.7135, + "step": 7470 + }, + { + "epoch": 6.67, + "learning_rate": 0.001, + "loss": 0.6923, + "step": 7480 + }, + { + "epoch": 6.68, + "learning_rate": 0.001, + "loss": 0.6674, + "step": 7490 + }, + { + "epoch": 6.68, + "learning_rate": 0.001, + "loss": 0.6743, + "step": 7500 + }, + { + "epoch": 6.69, + "learning_rate": 0.001, + "loss": 0.6706, + "step": 7510 + }, + { + "epoch": 6.7, + "learning_rate": 0.001, + "loss": 0.7095, + "step": 7520 + }, + { + "epoch": 6.71, + "learning_rate": 0.001, + "loss": 0.6835, + "step": 7530 + }, + { + "epoch": 6.72, + "learning_rate": 0.001, + "loss": 0.6735, + "step": 7540 + }, + { + "epoch": 6.73, + "learning_rate": 0.001, + "loss": 0.6785, + "step": 7550 + }, + { + "epoch": 6.74, + "learning_rate": 0.001, + "loss": 0.6595, + "step": 7560 + }, + { + "epoch": 6.75, + "learning_rate": 0.001, + "loss": 0.6841, + "step": 7570 + }, + { + "epoch": 6.76, + "learning_rate": 0.001, + "loss": 0.7051, + "step": 7580 + }, + { + "epoch": 6.76, + "learning_rate": 0.001, + "loss": 0.6606, + "step": 7590 + }, + { + "epoch": 6.77, + "learning_rate": 0.001, + "loss": 0.7023, + "step": 7600 + }, + { + "epoch": 6.77, + "eval_bp": 1.0, + "eval_counts": [ + 28473, + 20549, + 15671, + 12152 + ], + "eval_loss": 0.4036000967025757, + "eval_precisions": [ + 63.1624481465871, + 49.37764321414841, + 41.066561844863735, + 34.97985031663788 + ], + "eval_ref_len": 41379, + "eval_runtime": 429.7544, + "eval_samples_per_second": 8.058, + "eval_score": 46.00699850503643, + "eval_steps_per_second": 8.058, + "eval_sys_len": 45079, + "eval_totals": [ + 45079, + 41616, + 38160, + 34740 + ], + "step": 7600 + } + ], + "max_steps": 280500, + "num_train_epochs": 250, + "total_flos": 9.355029219235507e+16, + "trial_name": null, + "trial_params": null +}