sedrickkeh's picture
Training in progress, epoch 2
e43253c verified
{"current_steps": 10, "total_steps": 2004, "loss": 0.7782, "learning_rate": 5e-06, "epoch": 0.014967259120673527, "percentage": 0.5, "elapsed_time": "0:07:27", "remaining_time": "1 day, 0:48:41"}
{"current_steps": 20, "total_steps": 2004, "loss": 0.6964, "learning_rate": 5e-06, "epoch": 0.029934518241347054, "percentage": 1.0, "elapsed_time": "0:14:51", "remaining_time": "1 day, 0:34:44"}
{"current_steps": 30, "total_steps": 2004, "loss": 0.6801, "learning_rate": 5e-06, "epoch": 0.04490177736202058, "percentage": 1.5, "elapsed_time": "0:22:15", "remaining_time": "1 day, 0:24:59"}
{"current_steps": 40, "total_steps": 2004, "loss": 0.6742, "learning_rate": 5e-06, "epoch": 0.05986903648269411, "percentage": 2.0, "elapsed_time": "0:29:39", "remaining_time": "1 day, 0:16:33"}
{"current_steps": 50, "total_steps": 2004, "loss": 0.6556, "learning_rate": 5e-06, "epoch": 0.07483629560336763, "percentage": 2.5, "elapsed_time": "0:37:04", "remaining_time": "1 day, 0:08:34"}
{"current_steps": 60, "total_steps": 2004, "loss": 0.6652, "learning_rate": 5e-06, "epoch": 0.08980355472404115, "percentage": 2.99, "elapsed_time": "0:44:28", "remaining_time": "1 day, 0:00:48"}
{"current_steps": 70, "total_steps": 2004, "loss": 0.6606, "learning_rate": 5e-06, "epoch": 0.10477081384471469, "percentage": 3.49, "elapsed_time": "0:51:52", "remaining_time": "23:53:10"}
{"current_steps": 80, "total_steps": 2004, "loss": 0.6575, "learning_rate": 5e-06, "epoch": 0.11973807296538821, "percentage": 3.99, "elapsed_time": "0:59:16", "remaining_time": "23:45:33"}
{"current_steps": 90, "total_steps": 2004, "loss": 0.6513, "learning_rate": 5e-06, "epoch": 0.13470533208606175, "percentage": 4.49, "elapsed_time": "1:06:40", "remaining_time": "23:38:00"}
{"current_steps": 100, "total_steps": 2004, "loss": 0.6523, "learning_rate": 5e-06, "epoch": 0.14967259120673526, "percentage": 4.99, "elapsed_time": "1:14:05", "remaining_time": "23:30:36"}
{"current_steps": 110, "total_steps": 2004, "loss": 0.6433, "learning_rate": 5e-06, "epoch": 0.1646398503274088, "percentage": 5.49, "elapsed_time": "1:21:29", "remaining_time": "23:23:06"}
{"current_steps": 120, "total_steps": 2004, "loss": 0.6484, "learning_rate": 5e-06, "epoch": 0.1796071094480823, "percentage": 5.99, "elapsed_time": "1:28:53", "remaining_time": "23:15:33"}
{"current_steps": 130, "total_steps": 2004, "loss": 0.6465, "learning_rate": 5e-06, "epoch": 0.19457436856875585, "percentage": 6.49, "elapsed_time": "1:36:17", "remaining_time": "23:08:00"}
{"current_steps": 140, "total_steps": 2004, "loss": 0.6399, "learning_rate": 5e-06, "epoch": 0.20954162768942938, "percentage": 6.99, "elapsed_time": "1:43:41", "remaining_time": "23:00:30"}
{"current_steps": 150, "total_steps": 2004, "loss": 0.6465, "learning_rate": 5e-06, "epoch": 0.2245088868101029, "percentage": 7.49, "elapsed_time": "1:51:05", "remaining_time": "22:53:00"}
{"current_steps": 160, "total_steps": 2004, "loss": 0.6439, "learning_rate": 5e-06, "epoch": 0.23947614593077643, "percentage": 7.98, "elapsed_time": "1:58:29", "remaining_time": "22:45:32"}
{"current_steps": 170, "total_steps": 2004, "loss": 0.6338, "learning_rate": 5e-06, "epoch": 0.25444340505144997, "percentage": 8.48, "elapsed_time": "2:05:52", "remaining_time": "22:38:02"}
{"current_steps": 180, "total_steps": 2004, "loss": 0.639, "learning_rate": 5e-06, "epoch": 0.2694106641721235, "percentage": 8.98, "elapsed_time": "2:13:16", "remaining_time": "22:30:33"}
{"current_steps": 190, "total_steps": 2004, "loss": 0.644, "learning_rate": 5e-06, "epoch": 0.284377923292797, "percentage": 9.48, "elapsed_time": "2:20:40", "remaining_time": "22:23:06"}
{"current_steps": 200, "total_steps": 2004, "loss": 0.6324, "learning_rate": 5e-06, "epoch": 0.2993451824134705, "percentage": 9.98, "elapsed_time": "2:28:04", "remaining_time": "22:15:39"}
{"current_steps": 210, "total_steps": 2004, "loss": 0.6353, "learning_rate": 5e-06, "epoch": 0.31431244153414406, "percentage": 10.48, "elapsed_time": "2:35:28", "remaining_time": "22:08:14"}
{"current_steps": 220, "total_steps": 2004, "loss": 0.6336, "learning_rate": 5e-06, "epoch": 0.3292797006548176, "percentage": 10.98, "elapsed_time": "2:42:53", "remaining_time": "22:00:50"}
{"current_steps": 230, "total_steps": 2004, "loss": 0.6354, "learning_rate": 5e-06, "epoch": 0.34424695977549113, "percentage": 11.48, "elapsed_time": "2:50:17", "remaining_time": "21:53:26"}
{"current_steps": 240, "total_steps": 2004, "loss": 0.631, "learning_rate": 5e-06, "epoch": 0.3592142188961646, "percentage": 11.98, "elapsed_time": "2:57:41", "remaining_time": "21:46:03"}
{"current_steps": 250, "total_steps": 2004, "loss": 0.6324, "learning_rate": 5e-06, "epoch": 0.37418147801683815, "percentage": 12.48, "elapsed_time": "3:05:06", "remaining_time": "21:38:41"}
{"current_steps": 260, "total_steps": 2004, "loss": 0.6341, "learning_rate": 5e-06, "epoch": 0.3891487371375117, "percentage": 12.97, "elapsed_time": "3:12:30", "remaining_time": "21:31:17"}
{"current_steps": 270, "total_steps": 2004, "loss": 0.6279, "learning_rate": 5e-06, "epoch": 0.40411599625818523, "percentage": 13.47, "elapsed_time": "3:19:54", "remaining_time": "21:23:54"}
{"current_steps": 280, "total_steps": 2004, "loss": 0.6265, "learning_rate": 5e-06, "epoch": 0.41908325537885877, "percentage": 13.97, "elapsed_time": "3:27:19", "remaining_time": "21:16:31"}
{"current_steps": 290, "total_steps": 2004, "loss": 0.6326, "learning_rate": 5e-06, "epoch": 0.43405051449953225, "percentage": 14.47, "elapsed_time": "3:34:43", "remaining_time": "21:09:07"}
{"current_steps": 300, "total_steps": 2004, "loss": 0.6251, "learning_rate": 5e-06, "epoch": 0.4490177736202058, "percentage": 14.97, "elapsed_time": "3:42:07", "remaining_time": "21:01:42"}
{"current_steps": 310, "total_steps": 2004, "loss": 0.6314, "learning_rate": 5e-06, "epoch": 0.4639850327408793, "percentage": 15.47, "elapsed_time": "3:49:32", "remaining_time": "20:54:19"}
{"current_steps": 320, "total_steps": 2004, "loss": 0.6246, "learning_rate": 5e-06, "epoch": 0.47895229186155286, "percentage": 15.97, "elapsed_time": "3:56:56", "remaining_time": "20:46:55"}
{"current_steps": 330, "total_steps": 2004, "loss": 0.617, "learning_rate": 5e-06, "epoch": 0.4939195509822264, "percentage": 16.47, "elapsed_time": "4:04:21", "remaining_time": "20:39:32"}
{"current_steps": 340, "total_steps": 2004, "loss": 0.6283, "learning_rate": 5e-06, "epoch": 0.5088868101028999, "percentage": 16.97, "elapsed_time": "4:11:45", "remaining_time": "20:32:08"}
{"current_steps": 350, "total_steps": 2004, "loss": 0.6231, "learning_rate": 5e-06, "epoch": 0.5238540692235735, "percentage": 17.47, "elapsed_time": "4:19:09", "remaining_time": "20:24:43"}
{"current_steps": 360, "total_steps": 2004, "loss": 0.6252, "learning_rate": 5e-06, "epoch": 0.538821328344247, "percentage": 17.96, "elapsed_time": "4:26:34", "remaining_time": "20:17:19"}
{"current_steps": 370, "total_steps": 2004, "loss": 0.6276, "learning_rate": 5e-06, "epoch": 0.5537885874649204, "percentage": 18.46, "elapsed_time": "4:33:58", "remaining_time": "20:09:56"}
{"current_steps": 380, "total_steps": 2004, "loss": 0.6161, "learning_rate": 5e-06, "epoch": 0.568755846585594, "percentage": 18.96, "elapsed_time": "4:41:22", "remaining_time": "20:02:32"}
{"current_steps": 390, "total_steps": 2004, "loss": 0.6319, "learning_rate": 5e-06, "epoch": 0.5837231057062675, "percentage": 19.46, "elapsed_time": "4:48:47", "remaining_time": "19:55:08"}
{"current_steps": 400, "total_steps": 2004, "loss": 0.6283, "learning_rate": 5e-06, "epoch": 0.598690364826941, "percentage": 19.96, "elapsed_time": "4:56:11", "remaining_time": "19:47:45"}
{"current_steps": 410, "total_steps": 2004, "loss": 0.6242, "learning_rate": 5e-06, "epoch": 0.6136576239476146, "percentage": 20.46, "elapsed_time": "5:03:36", "remaining_time": "19:40:22"}
{"current_steps": 420, "total_steps": 2004, "loss": 0.6229, "learning_rate": 5e-06, "epoch": 0.6286248830682881, "percentage": 20.96, "elapsed_time": "5:11:00", "remaining_time": "19:32:58"}
{"current_steps": 430, "total_steps": 2004, "loss": 0.6222, "learning_rate": 5e-06, "epoch": 0.6435921421889617, "percentage": 21.46, "elapsed_time": "5:18:25", "remaining_time": "19:25:33"}
{"current_steps": 440, "total_steps": 2004, "loss": 0.6214, "learning_rate": 5e-06, "epoch": 0.6585594013096352, "percentage": 21.96, "elapsed_time": "5:25:49", "remaining_time": "19:18:08"}
{"current_steps": 450, "total_steps": 2004, "loss": 0.6257, "learning_rate": 5e-06, "epoch": 0.6735266604303087, "percentage": 22.46, "elapsed_time": "5:33:13", "remaining_time": "19:10:44"}
{"current_steps": 460, "total_steps": 2004, "loss": 0.6239, "learning_rate": 5e-06, "epoch": 0.6884939195509823, "percentage": 22.95, "elapsed_time": "5:40:37", "remaining_time": "19:03:19"}
{"current_steps": 470, "total_steps": 2004, "loss": 0.6278, "learning_rate": 5e-06, "epoch": 0.7034611786716558, "percentage": 23.45, "elapsed_time": "5:48:02", "remaining_time": "18:55:55"}
{"current_steps": 480, "total_steps": 2004, "loss": 0.6195, "learning_rate": 5e-06, "epoch": 0.7184284377923292, "percentage": 23.95, "elapsed_time": "5:55:26", "remaining_time": "18:48:32"}
{"current_steps": 490, "total_steps": 2004, "loss": 0.6261, "learning_rate": 5e-06, "epoch": 0.7333956969130028, "percentage": 24.45, "elapsed_time": "6:02:51", "remaining_time": "18:41:08"}
{"current_steps": 500, "total_steps": 2004, "loss": 0.6147, "learning_rate": 5e-06, "epoch": 0.7483629560336763, "percentage": 24.95, "elapsed_time": "6:10:15", "remaining_time": "18:33:44"}
{"current_steps": 510, "total_steps": 2004, "loss": 0.6156, "learning_rate": 5e-06, "epoch": 0.7633302151543498, "percentage": 25.45, "elapsed_time": "6:17:40", "remaining_time": "18:26:21"}
{"current_steps": 520, "total_steps": 2004, "loss": 0.6218, "learning_rate": 5e-06, "epoch": 0.7782974742750234, "percentage": 25.95, "elapsed_time": "6:25:04", "remaining_time": "18:18:57"}
{"current_steps": 530, "total_steps": 2004, "loss": 0.6255, "learning_rate": 5e-06, "epoch": 0.7932647333956969, "percentage": 26.45, "elapsed_time": "6:32:29", "remaining_time": "18:11:33"}
{"current_steps": 540, "total_steps": 2004, "loss": 0.6285, "learning_rate": 5e-06, "epoch": 0.8082319925163705, "percentage": 26.95, "elapsed_time": "6:39:53", "remaining_time": "18:04:09"}
{"current_steps": 550, "total_steps": 2004, "loss": 0.6156, "learning_rate": 5e-06, "epoch": 0.823199251637044, "percentage": 27.45, "elapsed_time": "6:47:17", "remaining_time": "17:56:45"}
{"current_steps": 560, "total_steps": 2004, "loss": 0.6183, "learning_rate": 5e-06, "epoch": 0.8381665107577175, "percentage": 27.94, "elapsed_time": "6:54:42", "remaining_time": "17:49:20"}
{"current_steps": 570, "total_steps": 2004, "loss": 0.6148, "learning_rate": 5e-06, "epoch": 0.8531337698783911, "percentage": 28.44, "elapsed_time": "7:02:06", "remaining_time": "17:41:56"}
{"current_steps": 580, "total_steps": 2004, "loss": 0.6126, "learning_rate": 5e-06, "epoch": 0.8681010289990645, "percentage": 28.94, "elapsed_time": "7:09:31", "remaining_time": "17:34:32"}
{"current_steps": 590, "total_steps": 2004, "loss": 0.6147, "learning_rate": 5e-06, "epoch": 0.883068288119738, "percentage": 29.44, "elapsed_time": "7:16:55", "remaining_time": "17:27:08"}
{"current_steps": 600, "total_steps": 2004, "loss": 0.6152, "learning_rate": 5e-06, "epoch": 0.8980355472404116, "percentage": 29.94, "elapsed_time": "7:24:19", "remaining_time": "17:19:44"}
{"current_steps": 610, "total_steps": 2004, "loss": 0.6167, "learning_rate": 5e-06, "epoch": 0.9130028063610851, "percentage": 30.44, "elapsed_time": "7:31:44", "remaining_time": "17:12:19"}
{"current_steps": 620, "total_steps": 2004, "loss": 0.6106, "learning_rate": 5e-06, "epoch": 0.9279700654817586, "percentage": 30.94, "elapsed_time": "7:39:08", "remaining_time": "17:04:55"}
{"current_steps": 630, "total_steps": 2004, "loss": 0.6176, "learning_rate": 5e-06, "epoch": 0.9429373246024322, "percentage": 31.44, "elapsed_time": "7:46:32", "remaining_time": "16:57:30"}
{"current_steps": 640, "total_steps": 2004, "loss": 0.6137, "learning_rate": 5e-06, "epoch": 0.9579045837231057, "percentage": 31.94, "elapsed_time": "7:53:56", "remaining_time": "16:50:06"}
{"current_steps": 650, "total_steps": 2004, "loss": 0.6022, "learning_rate": 5e-06, "epoch": 0.9728718428437793, "percentage": 32.44, "elapsed_time": "8:01:21", "remaining_time": "16:42:42"}
{"current_steps": 660, "total_steps": 2004, "loss": 0.616, "learning_rate": 5e-06, "epoch": 0.9878391019644528, "percentage": 32.93, "elapsed_time": "8:08:45", "remaining_time": "16:35:17"}
{"current_steps": 668, "total_steps": 2004, "eval_loss": 0.6200858950614929, "epoch": 0.9998129092609915, "percentage": 33.33, "elapsed_time": "8:23:22", "remaining_time": "16:46:44"}
{"current_steps": 670, "total_steps": 2004, "loss": 0.6487, "learning_rate": 5e-06, "epoch": 1.0028063610851263, "percentage": 33.43, "elapsed_time": "8:26:02", "remaining_time": "16:47:33"}
{"current_steps": 680, "total_steps": 2004, "loss": 0.545, "learning_rate": 5e-06, "epoch": 1.0177736202057999, "percentage": 33.93, "elapsed_time": "8:33:26", "remaining_time": "16:39:42"}
{"current_steps": 690, "total_steps": 2004, "loss": 0.5337, "learning_rate": 5e-06, "epoch": 1.0327408793264734, "percentage": 34.43, "elapsed_time": "8:40:51", "remaining_time": "16:31:53"}
{"current_steps": 700, "total_steps": 2004, "loss": 0.5325, "learning_rate": 5e-06, "epoch": 1.047708138447147, "percentage": 34.93, "elapsed_time": "8:48:15", "remaining_time": "16:24:03"}
{"current_steps": 710, "total_steps": 2004, "loss": 0.5318, "learning_rate": 5e-06, "epoch": 1.0626753975678205, "percentage": 35.43, "elapsed_time": "8:55:39", "remaining_time": "16:16:15"}
{"current_steps": 720, "total_steps": 2004, "loss": 0.5295, "learning_rate": 5e-06, "epoch": 1.077642656688494, "percentage": 35.93, "elapsed_time": "9:03:03", "remaining_time": "16:08:28"}
{"current_steps": 730, "total_steps": 2004, "loss": 0.5317, "learning_rate": 5e-06, "epoch": 1.0926099158091676, "percentage": 36.43, "elapsed_time": "9:10:28", "remaining_time": "16:00:40"}
{"current_steps": 740, "total_steps": 2004, "loss": 0.5315, "learning_rate": 5e-06, "epoch": 1.1075771749298409, "percentage": 36.93, "elapsed_time": "9:17:52", "remaining_time": "15:52:54"}
{"current_steps": 750, "total_steps": 2004, "loss": 0.533, "learning_rate": 5e-06, "epoch": 1.1225444340505144, "percentage": 37.43, "elapsed_time": "9:25:16", "remaining_time": "15:45:08"}
{"current_steps": 760, "total_steps": 2004, "loss": 0.5316, "learning_rate": 5e-06, "epoch": 1.137511693171188, "percentage": 37.92, "elapsed_time": "9:32:40", "remaining_time": "15:37:22"}
{"current_steps": 770, "total_steps": 2004, "loss": 0.5359, "learning_rate": 5e-06, "epoch": 1.1524789522918615, "percentage": 38.42, "elapsed_time": "9:40:04", "remaining_time": "15:29:37"}
{"current_steps": 780, "total_steps": 2004, "loss": 0.5335, "learning_rate": 5e-06, "epoch": 1.167446211412535, "percentage": 38.92, "elapsed_time": "9:47:29", "remaining_time": "15:21:54"}
{"current_steps": 790, "total_steps": 2004, "loss": 0.5353, "learning_rate": 5e-06, "epoch": 1.1824134705332086, "percentage": 39.42, "elapsed_time": "9:54:53", "remaining_time": "15:14:10"}
{"current_steps": 800, "total_steps": 2004, "loss": 0.534, "learning_rate": 5e-06, "epoch": 1.197380729653882, "percentage": 39.92, "elapsed_time": "10:02:17", "remaining_time": "15:06:27"}
{"current_steps": 810, "total_steps": 2004, "loss": 0.5342, "learning_rate": 5e-06, "epoch": 1.2123479887745556, "percentage": 40.42, "elapsed_time": "10:09:41", "remaining_time": "14:58:44"}
{"current_steps": 820, "total_steps": 2004, "loss": 0.5406, "learning_rate": 5e-06, "epoch": 1.2273152478952292, "percentage": 40.92, "elapsed_time": "10:17:06", "remaining_time": "14:51:02"}
{"current_steps": 830, "total_steps": 2004, "loss": 0.5402, "learning_rate": 5e-06, "epoch": 1.2422825070159027, "percentage": 41.42, "elapsed_time": "10:24:30", "remaining_time": "14:43:20"}
{"current_steps": 840, "total_steps": 2004, "loss": 0.5421, "learning_rate": 5e-06, "epoch": 1.2572497661365762, "percentage": 41.92, "elapsed_time": "10:31:54", "remaining_time": "14:35:38"}
{"current_steps": 850, "total_steps": 2004, "loss": 0.5341, "learning_rate": 5e-06, "epoch": 1.2722170252572498, "percentage": 42.42, "elapsed_time": "10:39:18", "remaining_time": "14:27:57"}
{"current_steps": 860, "total_steps": 2004, "loss": 0.5396, "learning_rate": 5e-06, "epoch": 1.2871842843779233, "percentage": 42.91, "elapsed_time": "10:46:42", "remaining_time": "14:20:16"}
{"current_steps": 870, "total_steps": 2004, "loss": 0.5401, "learning_rate": 5e-06, "epoch": 1.3021515434985969, "percentage": 43.41, "elapsed_time": "10:54:06", "remaining_time": "14:12:36"}
{"current_steps": 880, "total_steps": 2004, "loss": 0.5379, "learning_rate": 5e-06, "epoch": 1.3171188026192704, "percentage": 43.91, "elapsed_time": "11:01:31", "remaining_time": "14:04:56"}
{"current_steps": 890, "total_steps": 2004, "loss": 0.5338, "learning_rate": 5e-06, "epoch": 1.332086061739944, "percentage": 44.41, "elapsed_time": "11:08:55", "remaining_time": "13:57:16"}
{"current_steps": 900, "total_steps": 2004, "loss": 0.5341, "learning_rate": 5e-06, "epoch": 1.3470533208606175, "percentage": 44.91, "elapsed_time": "11:16:19", "remaining_time": "13:49:37"}
{"current_steps": 910, "total_steps": 2004, "loss": 0.5465, "learning_rate": 5e-06, "epoch": 1.362020579981291, "percentage": 45.41, "elapsed_time": "11:23:43", "remaining_time": "13:41:58"}
{"current_steps": 920, "total_steps": 2004, "loss": 0.5419, "learning_rate": 5e-06, "epoch": 1.3769878391019645, "percentage": 45.91, "elapsed_time": "11:31:07", "remaining_time": "13:34:19"}
{"current_steps": 930, "total_steps": 2004, "loss": 0.5431, "learning_rate": 5e-06, "epoch": 1.3919550982226379, "percentage": 46.41, "elapsed_time": "11:38:31", "remaining_time": "13:26:41"}
{"current_steps": 940, "total_steps": 2004, "loss": 0.5432, "learning_rate": 5e-06, "epoch": 1.4069223573433116, "percentage": 46.91, "elapsed_time": "11:45:56", "remaining_time": "13:19:03"}
{"current_steps": 950, "total_steps": 2004, "loss": 0.5419, "learning_rate": 5e-06, "epoch": 1.421889616463985, "percentage": 47.41, "elapsed_time": "11:53:20", "remaining_time": "13:11:26"}
{"current_steps": 960, "total_steps": 2004, "loss": 0.5491, "learning_rate": 5e-06, "epoch": 1.4368568755846587, "percentage": 47.9, "elapsed_time": "12:00:44", "remaining_time": "13:03:48"}
{"current_steps": 970, "total_steps": 2004, "loss": 0.5508, "learning_rate": 5e-06, "epoch": 1.451824134705332, "percentage": 48.4, "elapsed_time": "12:08:09", "remaining_time": "12:56:11"}
{"current_steps": 980, "total_steps": 2004, "loss": 0.5394, "learning_rate": 5e-06, "epoch": 1.4667913938260055, "percentage": 48.9, "elapsed_time": "12:15:33", "remaining_time": "12:48:35"}
{"current_steps": 990, "total_steps": 2004, "loss": 0.5439, "learning_rate": 5e-06, "epoch": 1.481758652946679, "percentage": 49.4, "elapsed_time": "12:22:57", "remaining_time": "12:40:58"}
{"current_steps": 1000, "total_steps": 2004, "loss": 0.5382, "learning_rate": 5e-06, "epoch": 1.4967259120673526, "percentage": 49.9, "elapsed_time": "12:30:22", "remaining_time": "12:33:22"}
{"current_steps": 1010, "total_steps": 2004, "loss": 0.5421, "learning_rate": 5e-06, "epoch": 1.5116931711880262, "percentage": 50.4, "elapsed_time": "12:37:46", "remaining_time": "12:25:46"}
{"current_steps": 1020, "total_steps": 2004, "loss": 0.5465, "learning_rate": 5e-06, "epoch": 1.5266604303086997, "percentage": 50.9, "elapsed_time": "12:45:10", "remaining_time": "12:18:10"}
{"current_steps": 1030, "total_steps": 2004, "loss": 0.5403, "learning_rate": 5e-06, "epoch": 1.5416276894293732, "percentage": 51.4, "elapsed_time": "12:52:35", "remaining_time": "12:10:34"}
{"current_steps": 1040, "total_steps": 2004, "loss": 0.5451, "learning_rate": 5e-06, "epoch": 1.5565949485500468, "percentage": 51.9, "elapsed_time": "12:59:59", "remaining_time": "12:02:59"}
{"current_steps": 1050, "total_steps": 2004, "loss": 0.5405, "learning_rate": 5e-06, "epoch": 1.5715622076707203, "percentage": 52.4, "elapsed_time": "13:07:23", "remaining_time": "11:55:24"}
{"current_steps": 1060, "total_steps": 2004, "loss": 0.5462, "learning_rate": 5e-06, "epoch": 1.5865294667913938, "percentage": 52.89, "elapsed_time": "13:14:48", "remaining_time": "11:47:49"}
{"current_steps": 1070, "total_steps": 2004, "loss": 0.5455, "learning_rate": 5e-06, "epoch": 1.6014967259120674, "percentage": 53.39, "elapsed_time": "13:22:12", "remaining_time": "11:40:14"}
{"current_steps": 1080, "total_steps": 2004, "loss": 0.5487, "learning_rate": 5e-06, "epoch": 1.616463985032741, "percentage": 53.89, "elapsed_time": "13:29:36", "remaining_time": "11:32:39"}
{"current_steps": 1090, "total_steps": 2004, "loss": 0.5449, "learning_rate": 5e-06, "epoch": 1.6314312441534145, "percentage": 54.39, "elapsed_time": "13:37:00", "remaining_time": "11:25:05"}
{"current_steps": 1100, "total_steps": 2004, "loss": 0.5485, "learning_rate": 5e-06, "epoch": 1.646398503274088, "percentage": 54.89, "elapsed_time": "13:44:24", "remaining_time": "11:17:30"}
{"current_steps": 1110, "total_steps": 2004, "loss": 0.5378, "learning_rate": 5e-06, "epoch": 1.6613657623947615, "percentage": 55.39, "elapsed_time": "13:51:48", "remaining_time": "11:09:56"}
{"current_steps": 1120, "total_steps": 2004, "loss": 0.5366, "learning_rate": 5e-06, "epoch": 1.6763330215154348, "percentage": 55.89, "elapsed_time": "13:59:12", "remaining_time": "11:02:22"}
{"current_steps": 1130, "total_steps": 2004, "loss": 0.5378, "learning_rate": 5e-06, "epoch": 1.6913002806361086, "percentage": 56.39, "elapsed_time": "14:06:36", "remaining_time": "10:54:48"}
{"current_steps": 1140, "total_steps": 2004, "loss": 0.5406, "learning_rate": 5e-06, "epoch": 1.706267539756782, "percentage": 56.89, "elapsed_time": "14:14:01", "remaining_time": "10:47:15"}
{"current_steps": 1150, "total_steps": 2004, "loss": 0.5381, "learning_rate": 5e-06, "epoch": 1.7212347988774557, "percentage": 57.39, "elapsed_time": "14:21:25", "remaining_time": "10:39:42"}
{"current_steps": 1160, "total_steps": 2004, "loss": 0.5418, "learning_rate": 5e-06, "epoch": 1.736202057998129, "percentage": 57.88, "elapsed_time": "14:28:49", "remaining_time": "10:32:09"}
{"current_steps": 1170, "total_steps": 2004, "loss": 0.5403, "learning_rate": 5e-06, "epoch": 1.7511693171188027, "percentage": 58.38, "elapsed_time": "14:36:14", "remaining_time": "10:24:36"}
{"current_steps": 1180, "total_steps": 2004, "loss": 0.5469, "learning_rate": 5e-06, "epoch": 1.766136576239476, "percentage": 58.88, "elapsed_time": "14:43:38", "remaining_time": "10:17:03"}
{"current_steps": 1190, "total_steps": 2004, "loss": 0.5467, "learning_rate": 5e-06, "epoch": 1.7811038353601498, "percentage": 59.38, "elapsed_time": "14:51:02", "remaining_time": "10:09:30"}
{"current_steps": 1200, "total_steps": 2004, "loss": 0.5421, "learning_rate": 5e-06, "epoch": 1.7960710944808231, "percentage": 59.88, "elapsed_time": "14:58:26", "remaining_time": "10:01:57"}
{"current_steps": 1210, "total_steps": 2004, "loss": 0.5488, "learning_rate": 5e-06, "epoch": 1.8110383536014967, "percentage": 60.38, "elapsed_time": "15:05:50", "remaining_time": "9:54:24"}
{"current_steps": 1220, "total_steps": 2004, "loss": 0.5363, "learning_rate": 5e-06, "epoch": 1.8260056127221702, "percentage": 60.88, "elapsed_time": "15:13:14", "remaining_time": "9:46:52"}
{"current_steps": 1230, "total_steps": 2004, "loss": 0.54, "learning_rate": 5e-06, "epoch": 1.8409728718428437, "percentage": 61.38, "elapsed_time": "15:20:39", "remaining_time": "9:39:20"}
{"current_steps": 1240, "total_steps": 2004, "loss": 0.5417, "learning_rate": 5e-06, "epoch": 1.8559401309635173, "percentage": 61.88, "elapsed_time": "15:28:03", "remaining_time": "9:31:48"}
{"current_steps": 1250, "total_steps": 2004, "loss": 0.5469, "learning_rate": 5e-06, "epoch": 1.8709073900841908, "percentage": 62.38, "elapsed_time": "15:35:27", "remaining_time": "9:24:16"}
{"current_steps": 1260, "total_steps": 2004, "loss": 0.5376, "learning_rate": 5e-06, "epoch": 1.8858746492048644, "percentage": 62.87, "elapsed_time": "15:42:51", "remaining_time": "9:16:44"}
{"current_steps": 1270, "total_steps": 2004, "loss": 0.5439, "learning_rate": 5e-06, "epoch": 1.900841908325538, "percentage": 63.37, "elapsed_time": "15:50:16", "remaining_time": "9:09:12"}
{"current_steps": 1280, "total_steps": 2004, "loss": 0.5444, "learning_rate": 5e-06, "epoch": 1.9158091674462114, "percentage": 63.87, "elapsed_time": "15:57:40", "remaining_time": "9:01:41"}
{"current_steps": 1290, "total_steps": 2004, "loss": 0.5419, "learning_rate": 5e-06, "epoch": 1.930776426566885, "percentage": 64.37, "elapsed_time": "16:05:05", "remaining_time": "8:54:09"}
{"current_steps": 1300, "total_steps": 2004, "loss": 0.5433, "learning_rate": 5e-06, "epoch": 1.9457436856875585, "percentage": 64.87, "elapsed_time": "16:12:29", "remaining_time": "8:46:38"}
{"current_steps": 1310, "total_steps": 2004, "loss": 0.5503, "learning_rate": 5e-06, "epoch": 1.960710944808232, "percentage": 65.37, "elapsed_time": "16:19:53", "remaining_time": "8:39:07"}
{"current_steps": 1320, "total_steps": 2004, "loss": 0.5438, "learning_rate": 5e-06, "epoch": 1.9756782039289056, "percentage": 65.87, "elapsed_time": "16:27:17", "remaining_time": "8:31:35"}
{"current_steps": 1330, "total_steps": 2004, "loss": 0.5488, "learning_rate": 5e-06, "epoch": 1.990645463049579, "percentage": 66.37, "elapsed_time": "16:34:42", "remaining_time": "8:24:04"}
{"current_steps": 1336, "total_steps": 2004, "eval_loss": 0.6217324733734131, "epoch": 1.999625818521983, "percentage": 66.67, "elapsed_time": "16:47:54", "remaining_time": "8:23:57"}
{"current_steps": 1340, "total_steps": 2004, "loss": 0.5617, "learning_rate": 5e-06, "epoch": 2.0056127221702527, "percentage": 66.87, "elapsed_time": "16:52:06", "remaining_time": "8:21:31"}
{"current_steps": 1350, "total_steps": 2004, "loss": 0.4541, "learning_rate": 5e-06, "epoch": 2.020579981290926, "percentage": 67.37, "elapsed_time": "16:59:30", "remaining_time": "8:13:53"}
{"current_steps": 1360, "total_steps": 2004, "loss": 0.443, "learning_rate": 5e-06, "epoch": 2.0355472404115997, "percentage": 67.86, "elapsed_time": "17:06:55", "remaining_time": "8:06:16"}
{"current_steps": 1370, "total_steps": 2004, "loss": 0.4414, "learning_rate": 5e-06, "epoch": 2.050514499532273, "percentage": 68.36, "elapsed_time": "17:14:20", "remaining_time": "7:58:39"}
{"current_steps": 1380, "total_steps": 2004, "loss": 0.4524, "learning_rate": 5e-06, "epoch": 2.065481758652947, "percentage": 68.86, "elapsed_time": "17:21:44", "remaining_time": "7:51:03"}
{"current_steps": 1390, "total_steps": 2004, "loss": 0.4531, "learning_rate": 5e-06, "epoch": 2.08044901777362, "percentage": 69.36, "elapsed_time": "17:29:09", "remaining_time": "7:43:26"}
{"current_steps": 1400, "total_steps": 2004, "loss": 0.4517, "learning_rate": 5e-06, "epoch": 2.095416276894294, "percentage": 69.86, "elapsed_time": "17:36:33", "remaining_time": "7:35:49"}
{"current_steps": 1410, "total_steps": 2004, "loss": 0.4551, "learning_rate": 5e-06, "epoch": 2.110383536014967, "percentage": 70.36, "elapsed_time": "17:43:58", "remaining_time": "7:28:13"}
{"current_steps": 1420, "total_steps": 2004, "loss": 0.4516, "learning_rate": 5e-06, "epoch": 2.125350795135641, "percentage": 70.86, "elapsed_time": "17:51:23", "remaining_time": "7:20:37"}
{"current_steps": 1430, "total_steps": 2004, "loss": 0.4585, "learning_rate": 5e-06, "epoch": 2.1403180542563143, "percentage": 71.36, "elapsed_time": "17:58:47", "remaining_time": "7:13:01"}
{"current_steps": 1440, "total_steps": 2004, "loss": 0.4559, "learning_rate": 5e-06, "epoch": 2.155285313376988, "percentage": 71.86, "elapsed_time": "18:06:12", "remaining_time": "7:05:25"}
{"current_steps": 1450, "total_steps": 2004, "loss": 0.4568, "learning_rate": 5e-06, "epoch": 2.1702525724976613, "percentage": 72.36, "elapsed_time": "18:13:37", "remaining_time": "6:57:50"}
{"current_steps": 1460, "total_steps": 2004, "loss": 0.4586, "learning_rate": 5e-06, "epoch": 2.185219831618335, "percentage": 72.85, "elapsed_time": "18:21:01", "remaining_time": "6:50:14"}
{"current_steps": 1470, "total_steps": 2004, "loss": 0.457, "learning_rate": 5e-06, "epoch": 2.2001870907390084, "percentage": 73.35, "elapsed_time": "18:28:26", "remaining_time": "6:42:39"}
{"current_steps": 1480, "total_steps": 2004, "loss": 0.4601, "learning_rate": 5e-06, "epoch": 2.2151543498596817, "percentage": 73.85, "elapsed_time": "18:35:50", "remaining_time": "6:35:04"}
{"current_steps": 1490, "total_steps": 2004, "loss": 0.4642, "learning_rate": 5e-06, "epoch": 2.2301216089803555, "percentage": 74.35, "elapsed_time": "18:43:14", "remaining_time": "6:27:29"}
{"current_steps": 1500, "total_steps": 2004, "loss": 0.4581, "learning_rate": 5e-06, "epoch": 2.245088868101029, "percentage": 74.85, "elapsed_time": "18:50:39", "remaining_time": "6:19:54"}
{"current_steps": 1510, "total_steps": 2004, "loss": 0.4603, "learning_rate": 5e-06, "epoch": 2.2600561272217026, "percentage": 75.35, "elapsed_time": "18:58:03", "remaining_time": "6:12:19"}
{"current_steps": 1520, "total_steps": 2004, "loss": 0.453, "learning_rate": 5e-06, "epoch": 2.275023386342376, "percentage": 75.85, "elapsed_time": "19:05:28", "remaining_time": "6:04:44"}
{"current_steps": 1530, "total_steps": 2004, "loss": 0.4579, "learning_rate": 5e-06, "epoch": 2.2899906454630496, "percentage": 76.35, "elapsed_time": "19:12:53", "remaining_time": "5:57:10"}
{"current_steps": 1540, "total_steps": 2004, "loss": 0.4645, "learning_rate": 5e-06, "epoch": 2.304957904583723, "percentage": 76.85, "elapsed_time": "19:20:17", "remaining_time": "5:49:35"}
{"current_steps": 1550, "total_steps": 2004, "loss": 0.4581, "learning_rate": 5e-06, "epoch": 2.3199251637043967, "percentage": 77.35, "elapsed_time": "19:27:42", "remaining_time": "5:42:01"}
{"current_steps": 1560, "total_steps": 2004, "loss": 0.4599, "learning_rate": 5e-06, "epoch": 2.33489242282507, "percentage": 77.84, "elapsed_time": "19:35:06", "remaining_time": "5:34:27"}
{"current_steps": 1570, "total_steps": 2004, "loss": 0.4638, "learning_rate": 5e-06, "epoch": 2.349859681945744, "percentage": 78.34, "elapsed_time": "19:42:30", "remaining_time": "5:26:53"}
{"current_steps": 1580, "total_steps": 2004, "loss": 0.4585, "learning_rate": 5e-06, "epoch": 2.364826941066417, "percentage": 78.84, "elapsed_time": "19:49:55", "remaining_time": "5:19:19"}
{"current_steps": 1590, "total_steps": 2004, "loss": 0.4659, "learning_rate": 5e-06, "epoch": 2.379794200187091, "percentage": 79.34, "elapsed_time": "19:57:19", "remaining_time": "5:11:45"}
{"current_steps": 1600, "total_steps": 2004, "loss": 0.4603, "learning_rate": 5e-06, "epoch": 2.394761459307764, "percentage": 79.84, "elapsed_time": "20:04:44", "remaining_time": "5:04:11"}
{"current_steps": 1610, "total_steps": 2004, "loss": 0.4657, "learning_rate": 5e-06, "epoch": 2.409728718428438, "percentage": 80.34, "elapsed_time": "20:12:08", "remaining_time": "4:56:38"}
{"current_steps": 1620, "total_steps": 2004, "loss": 0.4739, "learning_rate": 5e-06, "epoch": 2.4246959775491113, "percentage": 80.84, "elapsed_time": "20:19:33", "remaining_time": "4:49:04"}
{"current_steps": 1630, "total_steps": 2004, "loss": 0.4682, "learning_rate": 5e-06, "epoch": 2.439663236669785, "percentage": 81.34, "elapsed_time": "20:26:57", "remaining_time": "4:41:31"}
{"current_steps": 1640, "total_steps": 2004, "loss": 0.4675, "learning_rate": 5e-06, "epoch": 2.4546304957904583, "percentage": 81.84, "elapsed_time": "20:34:21", "remaining_time": "4:33:58"}
{"current_steps": 1650, "total_steps": 2004, "loss": 0.4688, "learning_rate": 5e-06, "epoch": 2.469597754911132, "percentage": 82.34, "elapsed_time": "20:41:46", "remaining_time": "4:26:25"}
{"current_steps": 1660, "total_steps": 2004, "loss": 0.4673, "learning_rate": 5e-06, "epoch": 2.4845650140318054, "percentage": 82.83, "elapsed_time": "20:49:11", "remaining_time": "4:18:52"}
{"current_steps": 1670, "total_steps": 2004, "loss": 0.4647, "learning_rate": 5e-06, "epoch": 2.4995322731524787, "percentage": 83.33, "elapsed_time": "20:56:35", "remaining_time": "4:11:19"}
{"current_steps": 1680, "total_steps": 2004, "loss": 0.4663, "learning_rate": 5e-06, "epoch": 2.5144995322731525, "percentage": 83.83, "elapsed_time": "21:04:00", "remaining_time": "4:03:46"}
{"current_steps": 1690, "total_steps": 2004, "loss": 0.4674, "learning_rate": 5e-06, "epoch": 2.5294667913938262, "percentage": 84.33, "elapsed_time": "21:11:25", "remaining_time": "3:56:13"}
{"current_steps": 1700, "total_steps": 2004, "loss": 0.4702, "learning_rate": 5e-06, "epoch": 2.5444340505144996, "percentage": 84.83, "elapsed_time": "21:18:49", "remaining_time": "3:48:41"}
{"current_steps": 1710, "total_steps": 2004, "loss": 0.4669, "learning_rate": 5e-06, "epoch": 2.559401309635173, "percentage": 85.33, "elapsed_time": "21:26:13", "remaining_time": "3:41:08"}
{"current_steps": 1720, "total_steps": 2004, "loss": 0.467, "learning_rate": 5e-06, "epoch": 2.5743685687558466, "percentage": 85.83, "elapsed_time": "21:33:37", "remaining_time": "3:33:35"}
{"current_steps": 1730, "total_steps": 2004, "loss": 0.469, "learning_rate": 5e-06, "epoch": 2.58933582787652, "percentage": 86.33, "elapsed_time": "21:41:01", "remaining_time": "3:26:03"}
{"current_steps": 1740, "total_steps": 2004, "loss": 0.4617, "learning_rate": 5e-06, "epoch": 2.6043030869971937, "percentage": 86.83, "elapsed_time": "21:48:26", "remaining_time": "3:18:31"}
{"current_steps": 1750, "total_steps": 2004, "loss": 0.4657, "learning_rate": 5e-06, "epoch": 2.619270346117867, "percentage": 87.33, "elapsed_time": "21:55:50", "remaining_time": "3:10:59"}
{"current_steps": 1760, "total_steps": 2004, "loss": 0.4614, "learning_rate": 5e-06, "epoch": 2.634237605238541, "percentage": 87.82, "elapsed_time": "22:03:14", "remaining_time": "3:03:26"}
{"current_steps": 1770, "total_steps": 2004, "loss": 0.4658, "learning_rate": 5e-06, "epoch": 2.649204864359214, "percentage": 88.32, "elapsed_time": "22:10:38", "remaining_time": "2:55:54"}
{"current_steps": 1780, "total_steps": 2004, "loss": 0.4732, "learning_rate": 5e-06, "epoch": 2.664172123479888, "percentage": 88.82, "elapsed_time": "22:18:02", "remaining_time": "2:48:22"}
{"current_steps": 1790, "total_steps": 2004, "loss": 0.4716, "learning_rate": 5e-06, "epoch": 2.679139382600561, "percentage": 89.32, "elapsed_time": "22:25:27", "remaining_time": "2:40:51"}
{"current_steps": 1800, "total_steps": 2004, "loss": 0.473, "learning_rate": 5e-06, "epoch": 2.694106641721235, "percentage": 89.82, "elapsed_time": "22:32:51", "remaining_time": "2:33:19"}
{"current_steps": 1810, "total_steps": 2004, "loss": 0.4676, "learning_rate": 5e-06, "epoch": 2.7090739008419082, "percentage": 90.32, "elapsed_time": "22:40:15", "remaining_time": "2:25:47"}
{"current_steps": 1820, "total_steps": 2004, "loss": 0.465, "learning_rate": 5e-06, "epoch": 2.724041159962582, "percentage": 90.82, "elapsed_time": "22:47:39", "remaining_time": "2:18:16"}
{"current_steps": 1830, "total_steps": 2004, "loss": 0.4662, "learning_rate": 5e-06, "epoch": 2.7390084190832553, "percentage": 91.32, "elapsed_time": "22:55:04", "remaining_time": "2:10:44"}
{"current_steps": 1840, "total_steps": 2004, "loss": 0.4688, "learning_rate": 5e-06, "epoch": 2.753975678203929, "percentage": 91.82, "elapsed_time": "23:02:28", "remaining_time": "2:03:13"}
{"current_steps": 1850, "total_steps": 2004, "loss": 0.4769, "learning_rate": 5e-06, "epoch": 2.7689429373246024, "percentage": 92.32, "elapsed_time": "23:09:52", "remaining_time": "1:55:41"}
{"current_steps": 1860, "total_steps": 2004, "loss": 0.47, "learning_rate": 5e-06, "epoch": 2.7839101964452757, "percentage": 92.81, "elapsed_time": "23:17:16", "remaining_time": "1:48:10"}
{"current_steps": 1870, "total_steps": 2004, "loss": 0.4776, "learning_rate": 5e-06, "epoch": 2.7988774555659495, "percentage": 93.31, "elapsed_time": "23:24:41", "remaining_time": "1:40:39"}
{"current_steps": 1880, "total_steps": 2004, "loss": 0.4649, "learning_rate": 5e-06, "epoch": 2.8138447146866232, "percentage": 93.81, "elapsed_time": "23:32:05", "remaining_time": "1:33:08"}
{"current_steps": 1890, "total_steps": 2004, "loss": 0.4706, "learning_rate": 5e-06, "epoch": 2.8288119738072965, "percentage": 94.31, "elapsed_time": "23:39:29", "remaining_time": "1:25:37"}
{"current_steps": 1900, "total_steps": 2004, "loss": 0.4715, "learning_rate": 5e-06, "epoch": 2.84377923292797, "percentage": 94.81, "elapsed_time": "23:46:53", "remaining_time": "1:18:06"}
{"current_steps": 1910, "total_steps": 2004, "loss": 0.474, "learning_rate": 5e-06, "epoch": 2.8587464920486436, "percentage": 95.31, "elapsed_time": "23:54:18", "remaining_time": "1:10:35"}
{"current_steps": 1920, "total_steps": 2004, "loss": 0.4709, "learning_rate": 5e-06, "epoch": 2.8737137511693174, "percentage": 95.81, "elapsed_time": "1 day, 0:01:42", "remaining_time": "1:03:04"}
{"current_steps": 1930, "total_steps": 2004, "loss": 0.473, "learning_rate": 5e-06, "epoch": 2.8886810102899907, "percentage": 96.31, "elapsed_time": "1 day, 0:09:07", "remaining_time": "0:55:33"}
{"current_steps": 1940, "total_steps": 2004, "loss": 0.4663, "learning_rate": 5e-06, "epoch": 2.903648269410664, "percentage": 96.81, "elapsed_time": "1 day, 0:16:31", "remaining_time": "0:48:03"}
{"current_steps": 1950, "total_steps": 2004, "loss": 0.4767, "learning_rate": 5e-06, "epoch": 2.9186155285313378, "percentage": 97.31, "elapsed_time": "1 day, 0:23:55", "remaining_time": "0:40:32"}
{"current_steps": 1960, "total_steps": 2004, "loss": 0.4719, "learning_rate": 5e-06, "epoch": 2.933582787652011, "percentage": 97.8, "elapsed_time": "1 day, 0:31:20", "remaining_time": "0:33:01"}
{"current_steps": 1970, "total_steps": 2004, "loss": 0.4748, "learning_rate": 5e-06, "epoch": 2.948550046772685, "percentage": 98.3, "elapsed_time": "1 day, 0:38:44", "remaining_time": "0:25:31"}
{"current_steps": 1980, "total_steps": 2004, "loss": 0.4777, "learning_rate": 5e-06, "epoch": 2.963517305893358, "percentage": 98.8, "elapsed_time": "1 day, 0:46:09", "remaining_time": "0:18:00"}
{"current_steps": 1990, "total_steps": 2004, "loss": 0.4719, "learning_rate": 5e-06, "epoch": 2.978484565014032, "percentage": 99.3, "elapsed_time": "1 day, 0:53:33", "remaining_time": "0:10:30"}
{"current_steps": 2000, "total_steps": 2004, "loss": 0.4829, "learning_rate": 5e-06, "epoch": 2.9934518241347052, "percentage": 99.8, "elapsed_time": "1 day, 1:00:57", "remaining_time": "0:03:00"}
{"current_steps": 2004, "total_steps": 2004, "eval_loss": 0.655114471912384, "epoch": 2.999438727782975, "percentage": 100.0, "elapsed_time": "1 day, 1:13:54", "remaining_time": "0:00:00"}
{"current_steps": 2004, "total_steps": 2004, "epoch": 2.999438727782975, "percentage": 100.0, "elapsed_time": "1 day, 1:15:36", "remaining_time": "0:00:00"}