{ "best_metric": 1.5756924152374268, "best_model_checkpoint": "/scratch/s3545881/dumped/translation/mt5/3086466/checkpoint-2800", "epoch": 2.7092404450895016, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 37.0498, "step": 10 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 37.4476, "step": 20 }, { "epoch": 0.03, "learning_rate": 7.5e-06, "loss": 36.636, "step": 30 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 35.662, "step": 40 }, { "epoch": 0.05, "learning_rate": 1.25e-05, "loss": 34.4439, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.5e-05, "loss": 33.4065, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.7500000000000002e-05, "loss": 32.8662, "step": 70 }, { "epoch": 0.08, "learning_rate": 2e-05, "loss": 32.7596, "step": 80 }, { "epoch": 0.09, "learning_rate": 2.2499999999999998e-05, "loss": 31.2127, "step": 90 }, { "epoch": 0.1, "learning_rate": 2.5e-05, "loss": 30.3467, "step": 100 }, { "epoch": 0.11, "learning_rate": 2.75e-05, "loss": 28.3883, "step": 110 }, { "epoch": 0.12, "learning_rate": 3e-05, "loss": 26.9506, "step": 120 }, { "epoch": 0.13, "learning_rate": 3.2500000000000004e-05, "loss": 24.2034, "step": 130 }, { "epoch": 0.14, "learning_rate": 3.5000000000000004e-05, "loss": 22.9662, "step": 140 }, { "epoch": 0.15, "learning_rate": 3.75e-05, "loss": 20.9191, "step": 150 }, { "epoch": 0.15, "learning_rate": 4e-05, "loss": 18.8087, "step": 160 }, { "epoch": 0.16, "learning_rate": 4.25e-05, "loss": 17.9818, "step": 170 }, { "epoch": 0.17, "learning_rate": 4.4999999999999996e-05, "loss": 16.0516, "step": 180 }, { "epoch": 0.18, "learning_rate": 4.75e-05, "loss": 13.4505, "step": 190 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 12.6675, "step": 200 }, { "epoch": 0.19, "eval_bp": 1.0, "eval_counts": [ 3907, 91, 14, 6 ], "eval_loss": 8.447338104248047, "eval_precisions": [ 0.5502692897061474, 0.012879411906237882, 0.0019911394295385534, 0.0008574956553553462 ], "eval_ref_len": 39265, "eval_runtime": 1345.1117, "eval_samples_per_second": 2.575, "eval_score": 0.010488206448671048, "eval_steps_per_second": 2.575, "eval_sys_len": 710016, "eval_totals": [ 710016, 706554, 703115, 699712 ], "step": 200 }, { "epoch": 0.2, "learning_rate": 5.25e-05, "loss": 11.941, "step": 210 }, { "epoch": 0.21, "learning_rate": 5.5e-05, "loss": 10.961, "step": 220 }, { "epoch": 0.22, "learning_rate": 5.75e-05, "loss": 10.3499, "step": 230 }, { "epoch": 0.23, "learning_rate": 6e-05, "loss": 9.5122, "step": 240 }, { "epoch": 0.24, "learning_rate": 6.25e-05, "loss": 9.1592, "step": 250 }, { "epoch": 0.25, "learning_rate": 6.500000000000001e-05, "loss": 8.7164, "step": 260 }, { "epoch": 0.26, "learning_rate": 6.75e-05, "loss": 8.4345, "step": 270 }, { "epoch": 0.27, "learning_rate": 7.000000000000001e-05, "loss": 8.0707, "step": 280 }, { "epoch": 0.28, "learning_rate": 7.25e-05, "loss": 7.959, "step": 290 }, { "epoch": 0.29, "learning_rate": 7.5e-05, "loss": 7.661, "step": 300 }, { "epoch": 0.3, "learning_rate": 7.75e-05, "loss": 7.7047, "step": 310 }, { "epoch": 0.31, "learning_rate": 8e-05, "loss": 7.6889, "step": 320 }, { "epoch": 0.32, "learning_rate": 8.25e-05, "loss": 7.7815, "step": 330 }, { "epoch": 0.33, "learning_rate": 8.5e-05, "loss": 8.0411, "step": 340 }, { "epoch": 0.34, "learning_rate": 8.75e-05, "loss": 7.8518, "step": 350 }, { "epoch": 0.35, "learning_rate": 8.999999999999999e-05, "loss": 7.7647, "step": 360 }, { "epoch": 0.36, "learning_rate": 9.25e-05, "loss": 7.4841, "step": 370 }, { "epoch": 0.37, "learning_rate": 9.5e-05, "loss": 7.2992, "step": 380 }, { "epoch": 0.38, "learning_rate": 9.750000000000001e-05, "loss": 7.2381, "step": 390 }, { "epoch": 0.39, "learning_rate": 0.0001, "loss": 7.1215, "step": 400 }, { "epoch": 0.39, "eval_bp": 1.0, "eval_counts": [ 3135, 982, 295, 61 ], "eval_loss": 6.286175727844238, "eval_precisions": [ 0.9876939944424491, 0.31213545788873104, 0.09449404046907482, 0.019671708213744397 ], "eval_ref_len": 39265, "eval_runtime": 1325.4365, "eval_samples_per_second": 2.613, "eval_score": 0.15472233223389328, "eval_steps_per_second": 2.613, "eval_sys_len": 317406, "eval_totals": [ 317406, 314607, 312189, 310090 ], "step": 400 }, { "epoch": 0.4, "learning_rate": 0.0001025, "loss": 7.0884, "step": 410 }, { "epoch": 0.41, "learning_rate": 0.000105, "loss": 7.0528, "step": 420 }, { "epoch": 0.42, "learning_rate": 0.0001075, "loss": 7.0151, "step": 430 }, { "epoch": 0.43, "learning_rate": 0.00011, "loss": 6.7428, "step": 440 }, { "epoch": 0.44, "learning_rate": 0.00011250000000000001, "loss": 6.5513, "step": 450 }, { "epoch": 0.45, "learning_rate": 0.000115, "loss": 6.4033, "step": 460 }, { "epoch": 0.45, "learning_rate": 0.0001175, "loss": 6.0151, "step": 470 }, { "epoch": 0.46, "learning_rate": 0.00012, "loss": 5.5711, "step": 480 }, { "epoch": 0.47, "learning_rate": 0.0001225, "loss": 5.0243, "step": 490 }, { "epoch": 0.48, "learning_rate": 0.000125, "loss": 4.8831, "step": 500 }, { "epoch": 0.49, "learning_rate": 0.0001275, "loss": 4.636, "step": 510 }, { "epoch": 0.5, "learning_rate": 0.00013000000000000002, "loss": 4.5649, "step": 520 }, { "epoch": 0.51, "learning_rate": 0.00013250000000000002, "loss": 4.1967, "step": 530 }, { "epoch": 0.52, "learning_rate": 0.000135, "loss": 4.4032, "step": 540 }, { "epoch": 0.53, "learning_rate": 0.0001375, "loss": 4.3111, "step": 550 }, { "epoch": 0.54, "learning_rate": 0.00014000000000000001, "loss": 4.2533, "step": 560 }, { "epoch": 0.55, "learning_rate": 0.0001425, "loss": 4.2514, "step": 570 }, { "epoch": 0.56, "learning_rate": 0.000145, "loss": 4.1326, "step": 580 }, { "epoch": 0.57, "learning_rate": 0.0001475, "loss": 4.0604, "step": 590 }, { "epoch": 0.58, "learning_rate": 0.00015, "loss": 4.0115, "step": 600 }, { "epoch": 0.58, "eval_bp": 1.0, "eval_counts": [ 22228, 12576, 7955, 5383 ], "eval_loss": 2.3380367755889893, "eval_precisions": [ 45.45975130890052, 27.68032047190368, 18.94273127753304, 13.947402513278922 ], "eval_ref_len": 39265, "eval_runtime": 572.0306, "eval_samples_per_second": 6.054, "eval_score": 24.01228700570531, "eval_steps_per_second": 6.054, "eval_sys_len": 48896, "eval_totals": [ 48896, 45433, 41995, 38595 ], "step": 600 }, { "epoch": 0.59, "learning_rate": 0.0001525, "loss": 3.9441, "step": 610 }, { "epoch": 0.6, "learning_rate": 0.000155, "loss": 3.9152, "step": 620 }, { "epoch": 0.61, "learning_rate": 0.0001575, "loss": 3.9774, "step": 630 }, { "epoch": 0.62, "learning_rate": 0.00016, "loss": 3.7911, "step": 640 }, { "epoch": 0.63, "learning_rate": 0.00016250000000000002, "loss": 3.8047, "step": 650 }, { "epoch": 0.64, "learning_rate": 0.000165, "loss": 3.8829, "step": 660 }, { "epoch": 0.65, "learning_rate": 0.0001675, "loss": 3.6344, "step": 670 }, { "epoch": 0.66, "learning_rate": 0.00017, "loss": 3.8138, "step": 680 }, { "epoch": 0.67, "learning_rate": 0.0001725, "loss": 3.7081, "step": 690 }, { "epoch": 0.68, "learning_rate": 0.000175, "loss": 3.7708, "step": 700 }, { "epoch": 0.69, "learning_rate": 0.0001775, "loss": 3.7171, "step": 710 }, { "epoch": 0.7, "learning_rate": 0.00017999999999999998, "loss": 3.5893, "step": 720 }, { "epoch": 0.71, "learning_rate": 0.0001825, "loss": 3.5494, "step": 730 }, { "epoch": 0.72, "learning_rate": 0.000185, "loss": 3.6416, "step": 740 }, { "epoch": 0.73, "learning_rate": 0.0001875, "loss": 3.6174, "step": 750 }, { "epoch": 0.74, "learning_rate": 0.00019, "loss": 3.6616, "step": 760 }, { "epoch": 0.75, "learning_rate": 0.00019250000000000002, "loss": 3.456, "step": 770 }, { "epoch": 0.75, "learning_rate": 0.00019500000000000002, "loss": 3.6063, "step": 780 }, { "epoch": 0.76, "learning_rate": 0.0001975, "loss": 3.6423, "step": 790 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 3.6089, "step": 800 }, { "epoch": 0.77, "eval_bp": 1.0, "eval_counts": [ 22303, 12687, 7909, 5220 ], "eval_loss": 2.027892589569092, "eval_precisions": [ 51.82164598726707, 32.05811749842072, 21.88494424306151, 15.94282572842221 ], "eval_ref_len": 39265, "eval_runtime": 511.1135, "eval_samples_per_second": 6.775, "eval_score": 27.592432621043134, "eval_steps_per_second": 6.775, "eval_sys_len": 43038, "eval_totals": [ 43038, 39575, 36139, 32742 ], "step": 800 }, { "epoch": 0.78, "learning_rate": 0.00020250000000000002, "loss": 3.4158, "step": 810 }, { "epoch": 0.79, "learning_rate": 0.000205, "loss": 3.409, "step": 820 }, { "epoch": 0.8, "learning_rate": 0.0002075, "loss": 3.5841, "step": 830 }, { "epoch": 0.81, "learning_rate": 0.00021, "loss": 3.4777, "step": 840 }, { "epoch": 0.82, "learning_rate": 0.0002125, "loss": 3.4107, "step": 850 }, { "epoch": 0.83, "learning_rate": 0.000215, "loss": 3.3874, "step": 860 }, { "epoch": 0.84, "learning_rate": 0.0002175, "loss": 3.4342, "step": 870 }, { "epoch": 0.85, "learning_rate": 0.00022, "loss": 3.4097, "step": 880 }, { "epoch": 0.86, "learning_rate": 0.00022250000000000001, "loss": 3.3245, "step": 890 }, { "epoch": 0.87, "learning_rate": 0.00022500000000000002, "loss": 3.3971, "step": 900 }, { "epoch": 0.88, "learning_rate": 0.0002275, "loss": 3.2777, "step": 910 }, { "epoch": 0.89, "learning_rate": 0.00023, "loss": 3.2984, "step": 920 }, { "epoch": 0.9, "learning_rate": 0.0002325, "loss": 3.2537, "step": 930 }, { "epoch": 0.91, "learning_rate": 0.000235, "loss": 3.3175, "step": 940 }, { "epoch": 0.92, "learning_rate": 0.0002375, "loss": 3.287, "step": 950 }, { "epoch": 0.93, "learning_rate": 0.00024, "loss": 3.2052, "step": 960 }, { "epoch": 0.94, "learning_rate": 0.00024249999999999999, "loss": 3.2984, "step": 970 }, { "epoch": 0.95, "learning_rate": 0.000245, "loss": 3.137, "step": 980 }, { "epoch": 0.96, "learning_rate": 0.0002475, "loss": 3.2062, "step": 990 }, { "epoch": 0.97, "learning_rate": 0.00025, "loss": 3.3031, "step": 1000 }, { "epoch": 0.97, "eval_bp": 1.0, "eval_counts": [ 22475, 12870, 8033, 5319 ], "eval_loss": 1.880346655845642, "eval_precisions": [ 52.89355392906733, 32.97632468996618, 22.57031271950774, 16.520172686896295 ], "eval_ref_len": 39265, "eval_runtime": 493.392, "eval_samples_per_second": 7.019, "eval_score": 28.39810035019441, "eval_steps_per_second": 7.019, "eval_sys_len": 42491, "eval_totals": [ 42491, 39028, 35591, 32197 ], "step": 1000 }, { "epoch": 0.98, "learning_rate": 0.0002525, "loss": 3.3379, "step": 1010 }, { "epoch": 0.99, "learning_rate": 0.000255, "loss": 3.3144, "step": 1020 }, { "epoch": 1.0, "learning_rate": 0.0002575, "loss": 3.3039, "step": 1030 }, { "epoch": 1.01, "learning_rate": 0.00026000000000000003, "loss": 3.2099, "step": 1040 }, { "epoch": 1.02, "learning_rate": 0.00026250000000000004, "loss": 3.203, "step": 1050 }, { "epoch": 1.03, "learning_rate": 0.00026500000000000004, "loss": 3.027, "step": 1060 }, { "epoch": 1.04, "learning_rate": 0.0002675, "loss": 3.0334, "step": 1070 }, { "epoch": 1.04, "learning_rate": 0.00027, "loss": 3.1445, "step": 1080 }, { "epoch": 1.05, "learning_rate": 0.0002725, "loss": 3.0927, "step": 1090 }, { "epoch": 1.06, "learning_rate": 0.000275, "loss": 3.1355, "step": 1100 }, { "epoch": 1.07, "learning_rate": 0.0002775, "loss": 2.9411, "step": 1110 }, { "epoch": 1.08, "learning_rate": 0.00028000000000000003, "loss": 3.0683, "step": 1120 }, { "epoch": 1.09, "learning_rate": 0.0002825, "loss": 3.0704, "step": 1130 }, { "epoch": 1.1, "learning_rate": 0.000285, "loss": 3.1367, "step": 1140 }, { "epoch": 1.11, "learning_rate": 0.0002875, "loss": 3.1829, "step": 1150 }, { "epoch": 1.12, "learning_rate": 0.00029, "loss": 3.101, "step": 1160 }, { "epoch": 1.13, "learning_rate": 0.0002925, "loss": 3.0338, "step": 1170 }, { "epoch": 1.14, "learning_rate": 0.000295, "loss": 3.0992, "step": 1180 }, { "epoch": 1.15, "learning_rate": 0.00029749999999999997, "loss": 3.1358, "step": 1190 }, { "epoch": 1.16, "learning_rate": 0.0003, "loss": 3.0265, "step": 1200 }, { "epoch": 1.16, "eval_bp": 1.0, "eval_counts": [ 22069, 12741, 8000, 5303 ], "eval_loss": 1.8022379875183105, "eval_precisions": [ 49.50426200089726, 30.98718291704161, 21.2286055459732, 15.46019066499519 ], "eval_ref_len": 39265, "eval_runtime": 683.7176, "eval_samples_per_second": 5.065, "eval_score": 26.637302937486467, "eval_steps_per_second": 5.065, "eval_sys_len": 44580, "eval_totals": [ 44580, 41117, 37685, 34301 ], "step": 1200 }, { "epoch": 1.17, "learning_rate": 0.0003025, "loss": 2.9454, "step": 1210 }, { "epoch": 1.18, "learning_rate": 0.000305, "loss": 3.0222, "step": 1220 }, { "epoch": 1.19, "learning_rate": 0.0003075, "loss": 3.08, "step": 1230 }, { "epoch": 1.2, "learning_rate": 0.00031, "loss": 2.9951, "step": 1240 }, { "epoch": 1.21, "learning_rate": 0.0003125, "loss": 2.9722, "step": 1250 }, { "epoch": 1.22, "learning_rate": 0.000315, "loss": 2.9865, "step": 1260 }, { "epoch": 1.23, "learning_rate": 0.0003175, "loss": 2.9643, "step": 1270 }, { "epoch": 1.24, "learning_rate": 0.00032, "loss": 2.9591, "step": 1280 }, { "epoch": 1.25, "learning_rate": 0.00032250000000000003, "loss": 2.9174, "step": 1290 }, { "epoch": 1.26, "learning_rate": 0.00032500000000000004, "loss": 2.9484, "step": 1300 }, { "epoch": 1.27, "learning_rate": 0.00032750000000000005, "loss": 2.9186, "step": 1310 }, { "epoch": 1.28, "learning_rate": 0.00033, "loss": 2.9222, "step": 1320 }, { "epoch": 1.29, "learning_rate": 0.0003325, "loss": 2.9231, "step": 1330 }, { "epoch": 1.3, "learning_rate": 0.000335, "loss": 2.8752, "step": 1340 }, { "epoch": 1.31, "learning_rate": 0.0003375, "loss": 2.8569, "step": 1350 }, { "epoch": 1.32, "learning_rate": 0.00034, "loss": 3.0137, "step": 1360 }, { "epoch": 1.33, "learning_rate": 0.00034250000000000003, "loss": 2.9199, "step": 1370 }, { "epoch": 1.34, "learning_rate": 0.000345, "loss": 2.9726, "step": 1380 }, { "epoch": 1.34, "learning_rate": 0.0003475, "loss": 2.906, "step": 1390 }, { "epoch": 1.35, "learning_rate": 0.00035, "loss": 2.9055, "step": 1400 }, { "epoch": 1.35, "eval_bp": 1.0, "eval_counts": [ 23204, 13680, 8622, 5764 ], "eval_loss": 1.7418313026428223, "eval_precisions": [ 54.13147949423786, 34.697035026758314, 23.951330629479415, 17.671224477282482 ], "eval_ref_len": 39265, "eval_runtime": 544.287, "eval_samples_per_second": 6.362, "eval_score": 29.859644843289843, "eval_steps_per_second": 6.362, "eval_sys_len": 42866, "eval_totals": [ 42866, 39427, 35998, 32618 ], "step": 1400 }, { "epoch": 1.36, "learning_rate": 0.0003525, "loss": 2.8119, "step": 1410 }, { "epoch": 1.37, "learning_rate": 0.000355, "loss": 3.0053, "step": 1420 }, { "epoch": 1.38, "learning_rate": 0.0003575, "loss": 2.8639, "step": 1430 }, { "epoch": 1.39, "learning_rate": 0.00035999999999999997, "loss": 2.9254, "step": 1440 }, { "epoch": 1.4, "learning_rate": 0.0003625, "loss": 2.9477, "step": 1450 }, { "epoch": 1.41, "learning_rate": 0.000365, "loss": 3.0299, "step": 1460 }, { "epoch": 1.42, "learning_rate": 0.0003675, "loss": 2.8582, "step": 1470 }, { "epoch": 1.43, "learning_rate": 0.00037, "loss": 2.9433, "step": 1480 }, { "epoch": 1.44, "learning_rate": 0.0003725, "loss": 2.856, "step": 1490 }, { "epoch": 1.45, "learning_rate": 0.000375, "loss": 2.7648, "step": 1500 }, { "epoch": 1.46, "learning_rate": 0.0003775, "loss": 2.842, "step": 1510 }, { "epoch": 1.47, "learning_rate": 0.00038, "loss": 2.7609, "step": 1520 }, { "epoch": 1.48, "learning_rate": 0.00038250000000000003, "loss": 2.9978, "step": 1530 }, { "epoch": 1.49, "learning_rate": 0.00038500000000000003, "loss": 2.8184, "step": 1540 }, { "epoch": 1.5, "learning_rate": 0.00038750000000000004, "loss": 2.8703, "step": 1550 }, { "epoch": 1.51, "learning_rate": 0.00039000000000000005, "loss": 2.7835, "step": 1560 }, { "epoch": 1.52, "learning_rate": 0.0003925, "loss": 2.8729, "step": 1570 }, { "epoch": 1.53, "learning_rate": 0.000395, "loss": 2.8479, "step": 1580 }, { "epoch": 1.54, "learning_rate": 0.0003975, "loss": 2.8868, "step": 1590 }, { "epoch": 1.55, "learning_rate": 0.0004, "loss": 2.829, "step": 1600 }, { "epoch": 1.55, "eval_bp": 1.0, "eval_counts": [ 23892, 14346, 9260, 6235 ], "eval_loss": 1.7061405181884766, "eval_precisions": [ 57.79110831599826, 37.873227909923706, 26.900618772332454, 20.09993552546744 ], "eval_ref_len": 39265, "eval_runtime": 422.3766, "eval_samples_per_second": 8.199, "eval_score": 32.98280367671718, "eval_steps_per_second": 8.199, "eval_sys_len": 41342, "eval_totals": [ 41342, 37879, 34423, 31020 ], "step": 1600 }, { "epoch": 1.56, "learning_rate": 0.0004025, "loss": 2.7557, "step": 1610 }, { "epoch": 1.57, "learning_rate": 0.00040500000000000003, "loss": 2.8196, "step": 1620 }, { "epoch": 1.58, "learning_rate": 0.0004075, "loss": 2.6888, "step": 1630 }, { "epoch": 1.59, "learning_rate": 0.00041, "loss": 2.8251, "step": 1640 }, { "epoch": 1.6, "learning_rate": 0.0004125, "loss": 2.8098, "step": 1650 }, { "epoch": 1.61, "learning_rate": 0.000415, "loss": 2.8685, "step": 1660 }, { "epoch": 1.62, "learning_rate": 0.0004175, "loss": 2.7419, "step": 1670 }, { "epoch": 1.63, "learning_rate": 0.00042, "loss": 2.8122, "step": 1680 }, { "epoch": 1.64, "learning_rate": 0.00042249999999999997, "loss": 2.8728, "step": 1690 }, { "epoch": 1.64, "learning_rate": 0.000425, "loss": 2.8772, "step": 1700 }, { "epoch": 1.65, "learning_rate": 0.0004275, "loss": 2.7859, "step": 1710 }, { "epoch": 1.66, "learning_rate": 0.00043, "loss": 2.8488, "step": 1720 }, { "epoch": 1.67, "learning_rate": 0.0004325, "loss": 2.6726, "step": 1730 }, { "epoch": 1.68, "learning_rate": 0.000435, "loss": 2.8862, "step": 1740 }, { "epoch": 1.69, "learning_rate": 0.0004375, "loss": 2.7678, "step": 1750 }, { "epoch": 1.7, "learning_rate": 0.00044, "loss": 2.8049, "step": 1760 }, { "epoch": 1.71, "learning_rate": 0.0004425, "loss": 2.7327, "step": 1770 }, { "epoch": 1.72, "learning_rate": 0.00044500000000000003, "loss": 2.791, "step": 1780 }, { "epoch": 1.73, "learning_rate": 0.00044750000000000004, "loss": 2.7355, "step": 1790 }, { "epoch": 1.74, "learning_rate": 0.00045000000000000004, "loss": 2.7519, "step": 1800 }, { "epoch": 1.74, "eval_bp": 1.0, "eval_counts": [ 22400, 13234, 8438, 5686 ], "eval_loss": 1.6596581935882568, "eval_precisions": [ 51.272660684856255, 32.89993784959602, 22.93246364995244, 17.015291618038724 ], "eval_ref_len": 39265, "eval_runtime": 755.4171, "eval_samples_per_second": 4.584, "eval_score": 28.483461117844286, "eval_steps_per_second": 4.584, "eval_sys_len": 43688, "eval_totals": [ 43688, 40225, 36795, 33417 ], "step": 1800 }, { "epoch": 1.75, "learning_rate": 0.00045250000000000005, "loss": 2.8522, "step": 1810 }, { "epoch": 1.76, "learning_rate": 0.000455, "loss": 2.8933, "step": 1820 }, { "epoch": 1.77, "learning_rate": 0.0004575, "loss": 2.7223, "step": 1830 }, { "epoch": 1.78, "learning_rate": 0.00046, "loss": 2.7376, "step": 1840 }, { "epoch": 1.79, "learning_rate": 0.0004625, "loss": 2.6918, "step": 1850 }, { "epoch": 1.8, "learning_rate": 0.000465, "loss": 2.8248, "step": 1860 }, { "epoch": 1.81, "learning_rate": 0.00046750000000000003, "loss": 2.8406, "step": 1870 }, { "epoch": 1.82, "learning_rate": 0.00047, "loss": 2.6332, "step": 1880 }, { "epoch": 1.83, "learning_rate": 0.0004725, "loss": 2.8686, "step": 1890 }, { "epoch": 1.84, "learning_rate": 0.000475, "loss": 2.5939, "step": 1900 }, { "epoch": 1.85, "learning_rate": 0.0004775, "loss": 2.7392, "step": 1910 }, { "epoch": 1.86, "learning_rate": 0.00048, "loss": 2.7951, "step": 1920 }, { "epoch": 1.87, "learning_rate": 0.0004825, "loss": 2.7636, "step": 1930 }, { "epoch": 1.88, "learning_rate": 0.00048499999999999997, "loss": 2.8033, "step": 1940 }, { "epoch": 1.89, "learning_rate": 0.0004875, "loss": 2.7528, "step": 1950 }, { "epoch": 1.9, "learning_rate": 0.00049, "loss": 2.71, "step": 1960 }, { "epoch": 1.91, "learning_rate": 0.0004925, "loss": 2.7453, "step": 1970 }, { "epoch": 1.92, "learning_rate": 0.000495, "loss": 2.8172, "step": 1980 }, { "epoch": 1.93, "learning_rate": 0.0004975, "loss": 2.8219, "step": 1990 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 2.6677, "step": 2000 }, { "epoch": 1.94, "eval_bp": 1.0, "eval_counts": [ 22981, 13820, 8891, 6002 ], "eval_loss": 1.6349472999572754, "eval_precisions": [ 57.158135601651495, 37.612606482867484, 26.690081652257444, 20.05211813443806 ], "eval_ref_len": 39265, "eval_runtime": 477.7754, "eval_samples_per_second": 7.248, "eval_score": 32.751440814753295, "eval_steps_per_second": 7.248, "eval_sys_len": 40206, "eval_totals": [ 40206, 36743, 33312, 29932 ], "step": 2000 }, { "epoch": 1.94, "learning_rate": 0.0005024999999999999, "loss": 2.7718, "step": 2010 }, { "epoch": 1.95, "learning_rate": 0.000505, "loss": 2.6684, "step": 2020 }, { "epoch": 1.96, "learning_rate": 0.0005074999999999999, "loss": 2.8251, "step": 2030 }, { "epoch": 1.97, "learning_rate": 0.00051, "loss": 2.6859, "step": 2040 }, { "epoch": 1.98, "learning_rate": 0.0005124999999999999, "loss": 2.7663, "step": 2050 }, { "epoch": 1.99, "learning_rate": 0.000515, "loss": 2.5372, "step": 2060 }, { "epoch": 2.0, "learning_rate": 0.0005175, "loss": 2.714, "step": 2070 }, { "epoch": 2.01, "learning_rate": 0.0005200000000000001, "loss": 2.4456, "step": 2080 }, { "epoch": 2.02, "learning_rate": 0.0005225, "loss": 2.4832, "step": 2090 }, { "epoch": 2.03, "learning_rate": 0.0005250000000000001, "loss": 2.498, "step": 2100 }, { "epoch": 2.04, "learning_rate": 0.0005275, "loss": 2.5938, "step": 2110 }, { "epoch": 2.05, "learning_rate": 0.0005300000000000001, "loss": 2.5316, "step": 2120 }, { "epoch": 2.06, "learning_rate": 0.0005325, "loss": 2.47, "step": 2130 }, { "epoch": 2.07, "learning_rate": 0.000535, "loss": 2.3812, "step": 2140 }, { "epoch": 2.08, "learning_rate": 0.0005375, "loss": 2.528, "step": 2150 }, { "epoch": 2.09, "learning_rate": 0.00054, "loss": 2.4435, "step": 2160 }, { "epoch": 2.1, "learning_rate": 0.0005425, "loss": 2.4662, "step": 2170 }, { "epoch": 2.11, "learning_rate": 0.000545, "loss": 2.4676, "step": 2180 }, { "epoch": 2.12, "learning_rate": 0.0005475, "loss": 2.5348, "step": 2190 }, { "epoch": 2.13, "learning_rate": 0.00055, "loss": 2.4479, "step": 2200 }, { "epoch": 2.13, "eval_bp": 1.0, "eval_counts": [ 24046, 14704, 9579, 6510 ], "eval_loss": 1.636635661125183, "eval_precisions": [ 61.04905047222504, 40.929714683368125, 29.480195734465884, 22.361144505890838 ], "eval_ref_len": 39265, "eval_runtime": 347.8604, "eval_samples_per_second": 9.955, "eval_score": 35.824942780304305, "eval_steps_per_second": 9.955, "eval_sys_len": 39388, "eval_totals": [ 39388, 35925, 32493, 29113 ], "step": 2200 }, { "epoch": 2.14, "learning_rate": 0.0005525, "loss": 2.4267, "step": 2210 }, { "epoch": 2.15, "learning_rate": 0.000555, "loss": 2.3329, "step": 2220 }, { "epoch": 2.16, "learning_rate": 0.0005575, "loss": 2.531, "step": 2230 }, { "epoch": 2.17, "learning_rate": 0.0005600000000000001, "loss": 2.5151, "step": 2240 }, { "epoch": 2.18, "learning_rate": 0.0005625000000000001, "loss": 2.5409, "step": 2250 }, { "epoch": 2.19, "learning_rate": 0.000565, "loss": 2.4443, "step": 2260 }, { "epoch": 2.2, "learning_rate": 0.0005675, "loss": 2.4513, "step": 2270 }, { "epoch": 2.21, "learning_rate": 0.00057, "loss": 2.385, "step": 2280 }, { "epoch": 2.22, "learning_rate": 0.0005725, "loss": 2.4345, "step": 2290 }, { "epoch": 2.23, "learning_rate": 0.000575, "loss": 2.5575, "step": 2300 }, { "epoch": 2.24, "learning_rate": 0.0005775, "loss": 2.4997, "step": 2310 }, { "epoch": 2.24, "learning_rate": 0.00058, "loss": 2.4726, "step": 2320 }, { "epoch": 2.25, "learning_rate": 0.0005825, "loss": 2.4253, "step": 2330 }, { "epoch": 2.26, "learning_rate": 0.000585, "loss": 2.4573, "step": 2340 }, { "epoch": 2.27, "learning_rate": 0.0005875, "loss": 2.4581, "step": 2350 }, { "epoch": 2.28, "learning_rate": 0.00059, "loss": 2.5404, "step": 2360 }, { "epoch": 2.29, "learning_rate": 0.0005925, "loss": 2.5728, "step": 2370 }, { "epoch": 2.3, "learning_rate": 0.0005949999999999999, "loss": 2.4639, "step": 2380 }, { "epoch": 2.31, "learning_rate": 0.0005975, "loss": 2.6251, "step": 2390 }, { "epoch": 2.32, "learning_rate": 0.0006, "loss": 2.4606, "step": 2400 }, { "epoch": 2.32, "eval_bp": 1.0, "eval_counts": [ 22981, 13834, 8940, 6036 ], "eval_loss": 1.6156156063079834, "eval_precisions": [ 52.36283266496537, 34.22139764996908, 24.18438565167992, 17.986233201227687 ], "eval_ref_len": 39265, "eval_runtime": 623.2248, "eval_samples_per_second": 5.557, "eval_score": 29.713173488652338, "eval_steps_per_second": 5.557, "eval_sys_len": 43888, "eval_totals": [ 43888, 40425, 36966, 33559 ], "step": 2400 }, { "epoch": 2.33, "learning_rate": 0.0006025000000000001, "loss": 2.5344, "step": 2410 }, { "epoch": 2.34, "learning_rate": 0.000605, "loss": 2.5511, "step": 2420 }, { "epoch": 2.35, "learning_rate": 0.0006075000000000001, "loss": 2.4595, "step": 2430 }, { "epoch": 2.36, "learning_rate": 0.00061, "loss": 2.4675, "step": 2440 }, { "epoch": 2.37, "learning_rate": 0.0006125000000000001, "loss": 2.4186, "step": 2450 }, { "epoch": 2.38, "learning_rate": 0.000615, "loss": 2.3482, "step": 2460 }, { "epoch": 2.39, "learning_rate": 0.0006175000000000001, "loss": 2.4653, "step": 2470 }, { "epoch": 2.4, "learning_rate": 0.00062, "loss": 2.5461, "step": 2480 }, { "epoch": 2.41, "learning_rate": 0.0006225000000000001, "loss": 2.4725, "step": 2490 }, { "epoch": 2.42, "learning_rate": 0.000625, "loss": 2.4625, "step": 2500 }, { "epoch": 2.43, "learning_rate": 0.0006274999999999999, "loss": 2.576, "step": 2510 }, { "epoch": 2.44, "learning_rate": 0.00063, "loss": 2.5898, "step": 2520 }, { "epoch": 2.45, "learning_rate": 0.0006324999999999999, "loss": 2.4081, "step": 2530 }, { "epoch": 2.46, "learning_rate": 0.000635, "loss": 2.3564, "step": 2540 }, { "epoch": 2.47, "learning_rate": 0.0006374999999999999, "loss": 2.3773, "step": 2550 }, { "epoch": 2.48, "learning_rate": 0.00064, "loss": 2.4186, "step": 2560 }, { "epoch": 2.49, "learning_rate": 0.0006425, "loss": 2.4812, "step": 2570 }, { "epoch": 2.5, "learning_rate": 0.0006450000000000001, "loss": 2.5489, "step": 2580 }, { "epoch": 2.51, "learning_rate": 0.0006475, "loss": 2.4855, "step": 2590 }, { "epoch": 2.52, "learning_rate": 0.0006500000000000001, "loss": 2.4259, "step": 2600 }, { "epoch": 2.52, "eval_bp": 1.0, "eval_counts": [ 24252, 14861, 9670, 6560 ], "eval_loss": 1.5984951257705688, "eval_precisions": [ 60.03416095254598, 40.23663832782802, 28.86136397552604, 21.777379411081235 ], "eval_ref_len": 39265, "eval_runtime": 371.9049, "eval_samples_per_second": 9.312, "eval_score": 35.10230625697626, "eval_steps_per_second": 9.312, "eval_sys_len": 40397, "eval_totals": [ 40397, 36934, 33505, 30123 ], "step": 2600 }, { "epoch": 2.53, "learning_rate": 0.0006525, "loss": 2.4609, "step": 2610 }, { "epoch": 2.54, "learning_rate": 0.0006550000000000001, "loss": 2.4513, "step": 2620 }, { "epoch": 2.54, "learning_rate": 0.0006575, "loss": 2.4666, "step": 2630 }, { "epoch": 2.55, "learning_rate": 0.00066, "loss": 2.5221, "step": 2640 }, { "epoch": 2.56, "learning_rate": 0.0006625, "loss": 2.4292, "step": 2650 }, { "epoch": 2.57, "learning_rate": 0.000665, "loss": 2.4118, "step": 2660 }, { "epoch": 2.58, "learning_rate": 0.0006675, "loss": 2.5148, "step": 2670 }, { "epoch": 2.59, "learning_rate": 0.00067, "loss": 2.5603, "step": 2680 }, { "epoch": 2.6, "learning_rate": 0.0006725, "loss": 2.4776, "step": 2690 }, { "epoch": 2.61, "learning_rate": 0.000675, "loss": 2.4421, "step": 2700 }, { "epoch": 2.62, "learning_rate": 0.0006775, "loss": 2.5626, "step": 2710 }, { "epoch": 2.63, "learning_rate": 0.00068, "loss": 2.4886, "step": 2720 }, { "epoch": 2.64, "learning_rate": 0.0006825000000000001, "loss": 2.5124, "step": 2730 }, { "epoch": 2.65, "learning_rate": 0.0006850000000000001, "loss": 2.4808, "step": 2740 }, { "epoch": 2.66, "learning_rate": 0.0006875, "loss": 2.4922, "step": 2750 }, { "epoch": 2.67, "learning_rate": 0.00069, "loss": 2.5108, "step": 2760 }, { "epoch": 2.68, "learning_rate": 0.0006925, "loss": 2.4229, "step": 2770 }, { "epoch": 2.69, "learning_rate": 0.000695, "loss": 2.3996, "step": 2780 }, { "epoch": 2.7, "learning_rate": 0.0006975, "loss": 2.56, "step": 2790 }, { "epoch": 2.71, "learning_rate": 0.0007, "loss": 2.4794, "step": 2800 }, { "epoch": 2.71, "eval_bp": 1.0, "eval_counts": [ 24440, 15095, 9863, 6691 ], "eval_loss": 1.5756924152374268, "eval_precisions": [ 61.18412817624233, 41.3765692670358, 29.839954013251443, 22.549118727462677 ], "eval_ref_len": 39265, "eval_runtime": 376.0541, "eval_samples_per_second": 9.209, "eval_score": 36.12686234680497, "eval_steps_per_second": 9.209, "eval_sys_len": 39945, "eval_totals": [ 39945, 36482, 33053, 29673 ], "step": 2800 } ], "max_steps": 258250, "num_train_epochs": 250, "total_flos": 4.09482327561216e+16, "trial_name": null, "trial_params": null }