oh-dcft-v3.1-llama-3.1-405b / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 3
b84621f verified
{"current_steps": 10, "total_steps": 1266, "loss": 0.5563, "lr": 5e-06, "epoch": 0.023696682464454975, "percentage": 0.79, "elapsed_time": "0:01:00", "remaining_time": "2:06:41"}
{"current_steps": 20, "total_steps": 1266, "loss": 0.4838, "lr": 5e-06, "epoch": 0.04739336492890995, "percentage": 1.58, "elapsed_time": "0:02:00", "remaining_time": "2:05:11"}
{"current_steps": 30, "total_steps": 1266, "loss": 0.4503, "lr": 5e-06, "epoch": 0.07109004739336493, "percentage": 2.37, "elapsed_time": "0:03:00", "remaining_time": "2:03:49"}
{"current_steps": 40, "total_steps": 1266, "loss": 0.429, "lr": 5e-06, "epoch": 0.0947867298578199, "percentage": 3.16, "elapsed_time": "0:03:59", "remaining_time": "2:02:08"}
{"current_steps": 50, "total_steps": 1266, "loss": 0.4108, "lr": 5e-06, "epoch": 0.11848341232227488, "percentage": 3.95, "elapsed_time": "0:04:58", "remaining_time": "2:00:58"}
{"current_steps": 60, "total_steps": 1266, "loss": 0.4018, "lr": 5e-06, "epoch": 0.14218009478672985, "percentage": 4.74, "elapsed_time": "0:05:57", "remaining_time": "1:59:54"}
{"current_steps": 70, "total_steps": 1266, "loss": 0.3908, "lr": 5e-06, "epoch": 0.16587677725118483, "percentage": 5.53, "elapsed_time": "0:06:57", "remaining_time": "1:58:49"}
{"current_steps": 80, "total_steps": 1266, "loss": 0.3833, "lr": 5e-06, "epoch": 0.1895734597156398, "percentage": 6.32, "elapsed_time": "0:07:55", "remaining_time": "1:57:36"}
{"current_steps": 90, "total_steps": 1266, "loss": 0.3777, "lr": 5e-06, "epoch": 0.2132701421800948, "percentage": 7.11, "elapsed_time": "0:08:55", "remaining_time": "1:56:36"}
{"current_steps": 100, "total_steps": 1266, "loss": 0.3704, "lr": 5e-06, "epoch": 0.23696682464454977, "percentage": 7.9, "elapsed_time": "0:09:55", "remaining_time": "1:55:46"}
{"current_steps": 110, "total_steps": 1266, "loss": 0.3662, "lr": 5e-06, "epoch": 0.26066350710900477, "percentage": 8.69, "elapsed_time": "0:10:54", "remaining_time": "1:54:36"}
{"current_steps": 120, "total_steps": 1266, "loss": 0.3658, "lr": 5e-06, "epoch": 0.2843601895734597, "percentage": 9.48, "elapsed_time": "0:11:53", "remaining_time": "1:53:33"}
{"current_steps": 130, "total_steps": 1266, "loss": 0.3629, "lr": 5e-06, "epoch": 0.3080568720379147, "percentage": 10.27, "elapsed_time": "0:12:53", "remaining_time": "1:52:40"}
{"current_steps": 140, "total_steps": 1266, "loss": 0.3652, "lr": 5e-06, "epoch": 0.33175355450236965, "percentage": 11.06, "elapsed_time": "0:13:51", "remaining_time": "1:51:30"}
{"current_steps": 150, "total_steps": 1266, "loss": 0.3531, "lr": 5e-06, "epoch": 0.35545023696682465, "percentage": 11.85, "elapsed_time": "0:14:52", "remaining_time": "1:50:36"}
{"current_steps": 160, "total_steps": 1266, "loss": 0.3498, "lr": 5e-06, "epoch": 0.3791469194312796, "percentage": 12.64, "elapsed_time": "0:15:51", "remaining_time": "1:49:35"}
{"current_steps": 170, "total_steps": 1266, "loss": 0.3507, "lr": 5e-06, "epoch": 0.4028436018957346, "percentage": 13.43, "elapsed_time": "0:16:49", "remaining_time": "1:48:30"}
{"current_steps": 180, "total_steps": 1266, "loss": 0.348, "lr": 5e-06, "epoch": 0.4265402843601896, "percentage": 14.22, "elapsed_time": "0:17:49", "remaining_time": "1:47:35"}
{"current_steps": 190, "total_steps": 1266, "loss": 0.3476, "lr": 5e-06, "epoch": 0.45023696682464454, "percentage": 15.01, "elapsed_time": "0:18:48", "remaining_time": "1:46:29"}
{"current_steps": 200, "total_steps": 1266, "loss": 0.3427, "lr": 5e-06, "epoch": 0.47393364928909953, "percentage": 15.8, "elapsed_time": "0:19:48", "remaining_time": "1:45:34"}
{"current_steps": 210, "total_steps": 1266, "loss": 0.3447, "lr": 5e-06, "epoch": 0.4976303317535545, "percentage": 16.59, "elapsed_time": "0:20:47", "remaining_time": "1:44:33"}
{"current_steps": 220, "total_steps": 1266, "loss": 0.3408, "lr": 5e-06, "epoch": 0.5213270142180095, "percentage": 17.38, "elapsed_time": "0:21:46", "remaining_time": "1:43:30"}
{"current_steps": 230, "total_steps": 1266, "loss": 0.341, "lr": 5e-06, "epoch": 0.5450236966824644, "percentage": 18.17, "elapsed_time": "0:22:46", "remaining_time": "1:42:35"}
{"current_steps": 240, "total_steps": 1266, "loss": 0.3335, "lr": 5e-06, "epoch": 0.5687203791469194, "percentage": 18.96, "elapsed_time": "0:23:44", "remaining_time": "1:41:31"}
{"current_steps": 250, "total_steps": 1266, "loss": 0.3345, "lr": 5e-06, "epoch": 0.5924170616113744, "percentage": 19.75, "elapsed_time": "0:24:44", "remaining_time": "1:40:33"}
{"current_steps": 260, "total_steps": 1266, "loss": 0.3308, "lr": 5e-06, "epoch": 0.6161137440758294, "percentage": 20.54, "elapsed_time": "0:25:43", "remaining_time": "1:39:31"}
{"current_steps": 270, "total_steps": 1266, "loss": 0.3361, "lr": 5e-06, "epoch": 0.6398104265402843, "percentage": 21.33, "elapsed_time": "0:26:42", "remaining_time": "1:38:32"}
{"current_steps": 280, "total_steps": 1266, "loss": 0.3326, "lr": 5e-06, "epoch": 0.6635071090047393, "percentage": 22.12, "elapsed_time": "0:27:42", "remaining_time": "1:37:33"}
{"current_steps": 290, "total_steps": 1266, "loss": 0.3302, "lr": 5e-06, "epoch": 0.6872037914691943, "percentage": 22.91, "elapsed_time": "0:28:41", "remaining_time": "1:36:34"}
{"current_steps": 300, "total_steps": 1266, "loss": 0.3289, "lr": 5e-06, "epoch": 0.7109004739336493, "percentage": 23.7, "elapsed_time": "0:29:40", "remaining_time": "1:35:33"}
{"current_steps": 310, "total_steps": 1266, "loss": 0.3316, "lr": 5e-06, "epoch": 0.7345971563981043, "percentage": 24.49, "elapsed_time": "0:30:39", "remaining_time": "1:34:33"}
{"current_steps": 320, "total_steps": 1266, "loss": 0.326, "lr": 5e-06, "epoch": 0.7582938388625592, "percentage": 25.28, "elapsed_time": "0:31:38", "remaining_time": "1:33:32"}
{"current_steps": 330, "total_steps": 1266, "loss": 0.3296, "lr": 5e-06, "epoch": 0.7819905213270142, "percentage": 26.07, "elapsed_time": "0:32:36", "remaining_time": "1:32:29"}
{"current_steps": 340, "total_steps": 1266, "loss": 0.3292, "lr": 5e-06, "epoch": 0.8056872037914692, "percentage": 26.86, "elapsed_time": "0:33:34", "remaining_time": "1:31:27"}
{"current_steps": 350, "total_steps": 1266, "loss": 0.3294, "lr": 5e-06, "epoch": 0.8293838862559242, "percentage": 27.65, "elapsed_time": "0:34:33", "remaining_time": "1:30:25"}
{"current_steps": 360, "total_steps": 1266, "loss": 0.3302, "lr": 5e-06, "epoch": 0.8530805687203792, "percentage": 28.44, "elapsed_time": "0:35:32", "remaining_time": "1:29:26"}
{"current_steps": 370, "total_steps": 1266, "loss": 0.3273, "lr": 5e-06, "epoch": 0.8767772511848341, "percentage": 29.23, "elapsed_time": "0:36:32", "remaining_time": "1:28:28"}
{"current_steps": 380, "total_steps": 1266, "loss": 0.3192, "lr": 5e-06, "epoch": 0.9004739336492891, "percentage": 30.02, "elapsed_time": "0:37:30", "remaining_time": "1:27:27"}
{"current_steps": 390, "total_steps": 1266, "loss": 0.3214, "lr": 5e-06, "epoch": 0.9241706161137441, "percentage": 30.81, "elapsed_time": "0:38:29", "remaining_time": "1:26:27"}
{"current_steps": 400, "total_steps": 1266, "loss": 0.3204, "lr": 5e-06, "epoch": 0.9478672985781991, "percentage": 31.6, "elapsed_time": "0:39:27", "remaining_time": "1:25:26"}
{"current_steps": 410, "total_steps": 1266, "loss": 0.3214, "lr": 5e-06, "epoch": 0.9715639810426541, "percentage": 32.39, "elapsed_time": "0:40:26", "remaining_time": "1:24:26"}
{"current_steps": 420, "total_steps": 1266, "loss": 0.3182, "lr": 5e-06, "epoch": 0.995260663507109, "percentage": 33.18, "elapsed_time": "0:41:26", "remaining_time": "1:23:28"}
{"current_steps": 422, "total_steps": 1266, "eval_loss": 0.3187981843948364, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:42:18", "remaining_time": "1:24:37"}
{"current_steps": 430, "total_steps": 1266, "loss": 0.287, "lr": 5e-06, "epoch": 1.018957345971564, "percentage": 33.97, "elapsed_time": "0:44:35", "remaining_time": "1:26:42"}
{"current_steps": 440, "total_steps": 1266, "loss": 0.2762, "lr": 5e-06, "epoch": 1.042654028436019, "percentage": 34.76, "elapsed_time": "0:45:34", "remaining_time": "1:25:33"}
{"current_steps": 450, "total_steps": 1266, "loss": 0.2734, "lr": 5e-06, "epoch": 1.066350710900474, "percentage": 35.55, "elapsed_time": "0:46:32", "remaining_time": "1:24:24"}
{"current_steps": 460, "total_steps": 1266, "loss": 0.2769, "lr": 5e-06, "epoch": 1.0900473933649288, "percentage": 36.33, "elapsed_time": "0:47:30", "remaining_time": "1:23:15"}
{"current_steps": 470, "total_steps": 1266, "loss": 0.273, "lr": 5e-06, "epoch": 1.113744075829384, "percentage": 37.12, "elapsed_time": "0:48:29", "remaining_time": "1:22:07"}
{"current_steps": 480, "total_steps": 1266, "loss": 0.2763, "lr": 5e-06, "epoch": 1.1374407582938388, "percentage": 37.91, "elapsed_time": "0:49:28", "remaining_time": "1:21:00"}
{"current_steps": 490, "total_steps": 1266, "loss": 0.2779, "lr": 5e-06, "epoch": 1.161137440758294, "percentage": 38.7, "elapsed_time": "0:50:27", "remaining_time": "1:19:54"}
{"current_steps": 500, "total_steps": 1266, "loss": 0.2745, "lr": 5e-06, "epoch": 1.1848341232227488, "percentage": 39.49, "elapsed_time": "0:51:27", "remaining_time": "1:18:50"}
{"current_steps": 510, "total_steps": 1266, "loss": 0.2754, "lr": 5e-06, "epoch": 1.2085308056872037, "percentage": 40.28, "elapsed_time": "0:52:25", "remaining_time": "1:17:43"}
{"current_steps": 520, "total_steps": 1266, "loss": 0.2731, "lr": 5e-06, "epoch": 1.2322274881516588, "percentage": 41.07, "elapsed_time": "0:53:24", "remaining_time": "1:16:37"}
{"current_steps": 530, "total_steps": 1266, "loss": 0.2739, "lr": 5e-06, "epoch": 1.2559241706161137, "percentage": 41.86, "elapsed_time": "0:54:23", "remaining_time": "1:15:31"}
{"current_steps": 540, "total_steps": 1266, "loss": 0.2714, "lr": 5e-06, "epoch": 1.2796208530805688, "percentage": 42.65, "elapsed_time": "0:55:22", "remaining_time": "1:14:26"}
{"current_steps": 550, "total_steps": 1266, "loss": 0.2737, "lr": 5e-06, "epoch": 1.3033175355450237, "percentage": 43.44, "elapsed_time": "0:56:20", "remaining_time": "1:13:20"}
{"current_steps": 560, "total_steps": 1266, "loss": 0.2751, "lr": 5e-06, "epoch": 1.3270142180094786, "percentage": 44.23, "elapsed_time": "0:57:18", "remaining_time": "1:12:15"}
{"current_steps": 570, "total_steps": 1266, "loss": 0.2745, "lr": 5e-06, "epoch": 1.3507109004739337, "percentage": 45.02, "elapsed_time": "0:58:16", "remaining_time": "1:11:09"}
{"current_steps": 580, "total_steps": 1266, "loss": 0.2734, "lr": 5e-06, "epoch": 1.3744075829383886, "percentage": 45.81, "elapsed_time": "0:59:15", "remaining_time": "1:10:05"}
{"current_steps": 590, "total_steps": 1266, "loss": 0.2738, "lr": 5e-06, "epoch": 1.3981042654028437, "percentage": 46.6, "elapsed_time": "1:00:13", "remaining_time": "1:09:00"}
{"current_steps": 600, "total_steps": 1266, "loss": 0.2744, "lr": 5e-06, "epoch": 1.4218009478672986, "percentage": 47.39, "elapsed_time": "1:01:12", "remaining_time": "1:07:56"}
{"current_steps": 610, "total_steps": 1266, "loss": 0.2721, "lr": 5e-06, "epoch": 1.4454976303317535, "percentage": 48.18, "elapsed_time": "1:02:12", "remaining_time": "1:06:53"}
{"current_steps": 620, "total_steps": 1266, "loss": 0.2732, "lr": 5e-06, "epoch": 1.4691943127962086, "percentage": 48.97, "elapsed_time": "1:03:11", "remaining_time": "1:05:50"}
{"current_steps": 630, "total_steps": 1266, "loss": 0.2725, "lr": 5e-06, "epoch": 1.4928909952606635, "percentage": 49.76, "elapsed_time": "1:04:10", "remaining_time": "1:04:47"}
{"current_steps": 640, "total_steps": 1266, "loss": 0.2665, "lr": 5e-06, "epoch": 1.5165876777251186, "percentage": 50.55, "elapsed_time": "1:05:10", "remaining_time": "1:03:45"}
{"current_steps": 650, "total_steps": 1266, "loss": 0.2697, "lr": 5e-06, "epoch": 1.5402843601895735, "percentage": 51.34, "elapsed_time": "1:06:08", "remaining_time": "1:02:41"}
{"current_steps": 660, "total_steps": 1266, "loss": 0.2673, "lr": 5e-06, "epoch": 1.5639810426540284, "percentage": 52.13, "elapsed_time": "1:07:08", "remaining_time": "1:01:39"}
{"current_steps": 670, "total_steps": 1266, "loss": 0.274, "lr": 5e-06, "epoch": 1.5876777251184833, "percentage": 52.92, "elapsed_time": "1:08:08", "remaining_time": "1:00:36"}
{"current_steps": 680, "total_steps": 1266, "loss": 0.2702, "lr": 5e-06, "epoch": 1.6113744075829384, "percentage": 53.71, "elapsed_time": "1:09:06", "remaining_time": "0:59:33"}
{"current_steps": 690, "total_steps": 1266, "loss": 0.2695, "lr": 5e-06, "epoch": 1.6350710900473935, "percentage": 54.5, "elapsed_time": "1:10:05", "remaining_time": "0:58:30"}
{"current_steps": 700, "total_steps": 1266, "loss": 0.2686, "lr": 5e-06, "epoch": 1.6587677725118484, "percentage": 55.29, "elapsed_time": "1:11:03", "remaining_time": "0:57:27"}
{"current_steps": 710, "total_steps": 1266, "loss": 0.2707, "lr": 5e-06, "epoch": 1.6824644549763033, "percentage": 56.08, "elapsed_time": "1:12:01", "remaining_time": "0:56:24"}
{"current_steps": 720, "total_steps": 1266, "loss": 0.2669, "lr": 5e-06, "epoch": 1.7061611374407581, "percentage": 56.87, "elapsed_time": "1:13:00", "remaining_time": "0:55:21"}
{"current_steps": 730, "total_steps": 1266, "loss": 0.2658, "lr": 5e-06, "epoch": 1.7298578199052133, "percentage": 57.66, "elapsed_time": "1:13:58", "remaining_time": "0:54:19"}
{"current_steps": 740, "total_steps": 1266, "loss": 0.2687, "lr": 5e-06, "epoch": 1.7535545023696684, "percentage": 58.45, "elapsed_time": "1:14:57", "remaining_time": "0:53:16"}
{"current_steps": 750, "total_steps": 1266, "loss": 0.2704, "lr": 5e-06, "epoch": 1.7772511848341233, "percentage": 59.24, "elapsed_time": "1:15:55", "remaining_time": "0:52:14"}
{"current_steps": 760, "total_steps": 1266, "loss": 0.2673, "lr": 5e-06, "epoch": 1.8009478672985781, "percentage": 60.03, "elapsed_time": "1:16:55", "remaining_time": "0:51:13"}
{"current_steps": 770, "total_steps": 1266, "loss": 0.2693, "lr": 5e-06, "epoch": 1.824644549763033, "percentage": 60.82, "elapsed_time": "1:17:54", "remaining_time": "0:50:11"}
{"current_steps": 780, "total_steps": 1266, "loss": 0.2652, "lr": 5e-06, "epoch": 1.8483412322274881, "percentage": 61.61, "elapsed_time": "1:18:53", "remaining_time": "0:49:09"}
{"current_steps": 790, "total_steps": 1266, "loss": 0.264, "lr": 5e-06, "epoch": 1.8720379146919433, "percentage": 62.4, "elapsed_time": "1:19:52", "remaining_time": "0:48:07"}
{"current_steps": 800, "total_steps": 1266, "loss": 0.2707, "lr": 5e-06, "epoch": 1.8957345971563981, "percentage": 63.19, "elapsed_time": "1:20:50", "remaining_time": "0:47:05"}
{"current_steps": 810, "total_steps": 1266, "loss": 0.2645, "lr": 5e-06, "epoch": 1.919431279620853, "percentage": 63.98, "elapsed_time": "1:21:48", "remaining_time": "0:46:03"}
{"current_steps": 820, "total_steps": 1266, "loss": 0.2627, "lr": 5e-06, "epoch": 1.943127962085308, "percentage": 64.77, "elapsed_time": "1:22:47", "remaining_time": "0:45:01"}
{"current_steps": 830, "total_steps": 1266, "loss": 0.2663, "lr": 5e-06, "epoch": 1.966824644549763, "percentage": 65.56, "elapsed_time": "1:23:45", "remaining_time": "0:43:59"}
{"current_steps": 840, "total_steps": 1266, "loss": 0.2676, "lr": 5e-06, "epoch": 1.9905213270142181, "percentage": 66.35, "elapsed_time": "1:24:44", "remaining_time": "0:42:58"}
{"current_steps": 844, "total_steps": 1266, "eval_loss": 0.30145883560180664, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:25:50", "remaining_time": "0:42:55"}
{"current_steps": 850, "total_steps": 1266, "loss": 0.2369, "lr": 5e-06, "epoch": 2.014218009478673, "percentage": 67.14, "elapsed_time": "1:28:00", "remaining_time": "0:43:04"}
{"current_steps": 860, "total_steps": 1266, "loss": 0.2195, "lr": 5e-06, "epoch": 2.037914691943128, "percentage": 67.93, "elapsed_time": "1:28:58", "remaining_time": "0:42:00"}
{"current_steps": 870, "total_steps": 1266, "loss": 0.2199, "lr": 5e-06, "epoch": 2.061611374407583, "percentage": 68.72, "elapsed_time": "1:29:59", "remaining_time": "0:40:57"}
{"current_steps": 880, "total_steps": 1266, "loss": 0.2204, "lr": 5e-06, "epoch": 2.085308056872038, "percentage": 69.51, "elapsed_time": "1:30:57", "remaining_time": "0:39:53"}
{"current_steps": 890, "total_steps": 1266, "loss": 0.2207, "lr": 5e-06, "epoch": 2.109004739336493, "percentage": 70.3, "elapsed_time": "1:31:56", "remaining_time": "0:38:50"}
{"current_steps": 900, "total_steps": 1266, "loss": 0.2205, "lr": 5e-06, "epoch": 2.132701421800948, "percentage": 71.09, "elapsed_time": "1:32:55", "remaining_time": "0:37:47"}
{"current_steps": 910, "total_steps": 1266, "loss": 0.2202, "lr": 5e-06, "epoch": 2.156398104265403, "percentage": 71.88, "elapsed_time": "1:33:53", "remaining_time": "0:36:43"}
{"current_steps": 920, "total_steps": 1266, "loss": 0.2213, "lr": 5e-06, "epoch": 2.1800947867298577, "percentage": 72.67, "elapsed_time": "1:34:51", "remaining_time": "0:35:40"}
{"current_steps": 930, "total_steps": 1266, "loss": 0.2237, "lr": 5e-06, "epoch": 2.2037914691943126, "percentage": 73.46, "elapsed_time": "1:35:50", "remaining_time": "0:34:37"}
{"current_steps": 940, "total_steps": 1266, "loss": 0.2196, "lr": 5e-06, "epoch": 2.227488151658768, "percentage": 74.25, "elapsed_time": "1:36:48", "remaining_time": "0:33:34"}
{"current_steps": 950, "total_steps": 1266, "loss": 0.2213, "lr": 5e-06, "epoch": 2.251184834123223, "percentage": 75.04, "elapsed_time": "1:37:48", "remaining_time": "0:32:32"}
{"current_steps": 960, "total_steps": 1266, "loss": 0.22, "lr": 5e-06, "epoch": 2.2748815165876777, "percentage": 75.83, "elapsed_time": "1:38:46", "remaining_time": "0:31:29"}
{"current_steps": 970, "total_steps": 1266, "loss": 0.2209, "lr": 5e-06, "epoch": 2.2985781990521326, "percentage": 76.62, "elapsed_time": "1:39:45", "remaining_time": "0:30:26"}
{"current_steps": 980, "total_steps": 1266, "loss": 0.225, "lr": 5e-06, "epoch": 2.322274881516588, "percentage": 77.41, "elapsed_time": "1:40:44", "remaining_time": "0:29:24"}
{"current_steps": 990, "total_steps": 1266, "loss": 0.2236, "lr": 5e-06, "epoch": 2.345971563981043, "percentage": 78.2, "elapsed_time": "1:41:43", "remaining_time": "0:28:21"}
{"current_steps": 1000, "total_steps": 1266, "loss": 0.2216, "lr": 5e-06, "epoch": 2.3696682464454977, "percentage": 78.99, "elapsed_time": "1:42:42", "remaining_time": "0:27:19"}
{"current_steps": 1010, "total_steps": 1266, "loss": 0.2233, "lr": 5e-06, "epoch": 2.3933649289099526, "percentage": 79.78, "elapsed_time": "1:43:40", "remaining_time": "0:26:16"}
{"current_steps": 1020, "total_steps": 1266, "loss": 0.2228, "lr": 5e-06, "epoch": 2.4170616113744074, "percentage": 80.57, "elapsed_time": "1:44:38", "remaining_time": "0:25:14"}
{"current_steps": 1030, "total_steps": 1266, "loss": 0.2226, "lr": 5e-06, "epoch": 2.4407582938388623, "percentage": 81.36, "elapsed_time": "1:45:36", "remaining_time": "0:24:11"}
{"current_steps": 1040, "total_steps": 1266, "loss": 0.2211, "lr": 5e-06, "epoch": 2.4644549763033177, "percentage": 82.15, "elapsed_time": "1:46:35", "remaining_time": "0:23:09"}
{"current_steps": 1050, "total_steps": 1266, "loss": 0.2225, "lr": 5e-06, "epoch": 2.4881516587677726, "percentage": 82.94, "elapsed_time": "1:47:34", "remaining_time": "0:22:07"}
{"current_steps": 1060, "total_steps": 1266, "loss": 0.2207, "lr": 5e-06, "epoch": 2.5118483412322274, "percentage": 83.73, "elapsed_time": "1:48:33", "remaining_time": "0:21:05"}
{"current_steps": 1070, "total_steps": 1266, "loss": 0.2224, "lr": 5e-06, "epoch": 2.5355450236966823, "percentage": 84.52, "elapsed_time": "1:49:32", "remaining_time": "0:20:03"}
{"current_steps": 1080, "total_steps": 1266, "loss": 0.2193, "lr": 5e-06, "epoch": 2.5592417061611377, "percentage": 85.31, "elapsed_time": "1:50:32", "remaining_time": "0:19:02"}
{"current_steps": 1090, "total_steps": 1266, "loss": 0.2215, "lr": 5e-06, "epoch": 2.5829383886255926, "percentage": 86.1, "elapsed_time": "1:51:30", "remaining_time": "0:18:00"}
{"current_steps": 1100, "total_steps": 1266, "loss": 0.2239, "lr": 5e-06, "epoch": 2.6066350710900474, "percentage": 86.89, "elapsed_time": "1:52:29", "remaining_time": "0:16:58"}
{"current_steps": 1110, "total_steps": 1266, "loss": 0.225, "lr": 5e-06, "epoch": 2.6303317535545023, "percentage": 87.68, "elapsed_time": "1:53:27", "remaining_time": "0:15:56"}
{"current_steps": 1120, "total_steps": 1266, "loss": 0.2217, "lr": 5e-06, "epoch": 2.654028436018957, "percentage": 88.47, "elapsed_time": "1:54:25", "remaining_time": "0:14:54"}
{"current_steps": 1130, "total_steps": 1266, "loss": 0.2223, "lr": 5e-06, "epoch": 2.677725118483412, "percentage": 89.26, "elapsed_time": "1:55:23", "remaining_time": "0:13:53"}
{"current_steps": 1140, "total_steps": 1266, "loss": 0.2202, "lr": 5e-06, "epoch": 2.7014218009478674, "percentage": 90.05, "elapsed_time": "1:56:22", "remaining_time": "0:12:51"}
{"current_steps": 1150, "total_steps": 1266, "loss": 0.2203, "lr": 5e-06, "epoch": 2.7251184834123223, "percentage": 90.84, "elapsed_time": "1:57:20", "remaining_time": "0:11:50"}
{"current_steps": 1160, "total_steps": 1266, "loss": 0.2195, "lr": 5e-06, "epoch": 2.748815165876777, "percentage": 91.63, "elapsed_time": "1:58:19", "remaining_time": "0:10:48"}
{"current_steps": 1170, "total_steps": 1266, "loss": 0.2201, "lr": 5e-06, "epoch": 2.772511848341232, "percentage": 92.42, "elapsed_time": "1:59:18", "remaining_time": "0:09:47"}
{"current_steps": 1180, "total_steps": 1266, "loss": 0.2241, "lr": 5e-06, "epoch": 2.7962085308056874, "percentage": 93.21, "elapsed_time": "2:00:17", "remaining_time": "0:08:46"}
{"current_steps": 1190, "total_steps": 1266, "loss": 0.2185, "lr": 5e-06, "epoch": 2.8199052132701423, "percentage": 94.0, "elapsed_time": "2:01:15", "remaining_time": "0:07:44"}
{"current_steps": 1200, "total_steps": 1266, "loss": 0.2218, "lr": 5e-06, "epoch": 2.843601895734597, "percentage": 94.79, "elapsed_time": "2:02:14", "remaining_time": "0:06:43"}
{"current_steps": 1210, "total_steps": 1266, "loss": 0.2211, "lr": 5e-06, "epoch": 2.867298578199052, "percentage": 95.58, "elapsed_time": "2:03:12", "remaining_time": "0:05:42"}
{"current_steps": 1220, "total_steps": 1266, "loss": 0.2194, "lr": 5e-06, "epoch": 2.890995260663507, "percentage": 96.37, "elapsed_time": "2:04:10", "remaining_time": "0:04:40"}
{"current_steps": 1230, "total_steps": 1266, "loss": 0.2258, "lr": 5e-06, "epoch": 2.914691943127962, "percentage": 97.16, "elapsed_time": "2:05:09", "remaining_time": "0:03:39"}
{"current_steps": 1240, "total_steps": 1266, "loss": 0.2218, "lr": 5e-06, "epoch": 2.938388625592417, "percentage": 97.95, "elapsed_time": "2:06:07", "remaining_time": "0:02:38"}
{"current_steps": 1250, "total_steps": 1266, "loss": 0.2211, "lr": 5e-06, "epoch": 2.962085308056872, "percentage": 98.74, "elapsed_time": "2:07:07", "remaining_time": "0:01:37"}
{"current_steps": 1260, "total_steps": 1266, "loss": 0.2241, "lr": 5e-06, "epoch": 2.985781990521327, "percentage": 99.53, "elapsed_time": "2:08:39", "remaining_time": "0:00:36"}
{"current_steps": 1266, "total_steps": 1266, "eval_loss": 0.30088362097740173, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:11:24", "remaining_time": "0:00:00"}
{"current_steps": 1266, "total_steps": 1266, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:13:03", "remaining_time": "0:00:00"}