ben81828's picture
Training in progress, step 716
d80e3a3 verified
{"current_steps": 5, "total_steps": 716, "loss": 2.9908, "lr": 6.944444444444445e-06, "epoch": 0.013961605584642234, "percentage": 0.7, "elapsed_time": "0:03:15", "remaining_time": "7:43:27", "throughput": 398.59, "total_tokens": 77944}
{"current_steps": 10, "total_steps": 716, "loss": 3.0071, "lr": 1.388888888888889e-05, "epoch": 0.027923211169284468, "percentage": 1.4, "elapsed_time": "0:05:11", "remaining_time": "6:06:44", "throughput": 500.18, "total_tokens": 155896}
{"current_steps": 15, "total_steps": 716, "loss": 2.354, "lr": 2.0833333333333336e-05, "epoch": 0.041884816753926704, "percentage": 2.09, "elapsed_time": "0:07:08", "remaining_time": "5:33:43", "throughput": 545.89, "total_tokens": 233896}
{"current_steps": 20, "total_steps": 716, "loss": 1.2959, "lr": 2.777777777777778e-05, "epoch": 0.055846422338568937, "percentage": 2.79, "elapsed_time": "0:09:04", "remaining_time": "5:15:46", "throughput": 572.78, "total_tokens": 311840}
{"current_steps": 25, "total_steps": 716, "loss": 1.0206, "lr": 3.472222222222222e-05, "epoch": 0.06980802792321117, "percentage": 3.49, "elapsed_time": "0:11:01", "remaining_time": "5:04:39", "throughput": 589.45, "total_tokens": 389816}
{"current_steps": 30, "total_steps": 716, "loss": 0.9285, "lr": 4.166666666666667e-05, "epoch": 0.08376963350785341, "percentage": 4.19, "elapsed_time": "0:12:57", "remaining_time": "4:56:12", "throughput": 601.89, "total_tokens": 467808}
{"current_steps": 35, "total_steps": 716, "loss": 0.9052, "lr": 4.8611111111111115e-05, "epoch": 0.09773123909249563, "percentage": 4.89, "elapsed_time": "0:14:51", "remaining_time": "4:49:15", "throughput": 611.87, "total_tokens": 545776}
{"current_steps": 40, "total_steps": 716, "loss": 0.929, "lr": 5.555555555555556e-05, "epoch": 0.11169284467713787, "percentage": 5.59, "elapsed_time": "0:16:48", "remaining_time": "4:43:55", "throughput": 618.78, "total_tokens": 623744}
{"current_steps": 45, "total_steps": 716, "loss": 0.9076, "lr": 6.25e-05, "epoch": 0.1256544502617801, "percentage": 6.28, "elapsed_time": "0:18:43", "remaining_time": "4:39:18", "throughput": 624.36, "total_tokens": 701720}
{"current_steps": 50, "total_steps": 716, "loss": 0.9039, "lr": 6.944444444444444e-05, "epoch": 0.13961605584642234, "percentage": 6.98, "elapsed_time": "0:20:37", "remaining_time": "4:34:45", "throughput": 630.02, "total_tokens": 779728}
{"current_steps": 50, "total_steps": 716, "eval_loss": 0.9039102792739868, "epoch": 0.13961605584642234, "percentage": 6.98, "elapsed_time": "0:21:52", "remaining_time": "4:51:23", "throughput": 594.04, "total_tokens": 779728}
{"current_steps": 55, "total_steps": 716, "loss": 0.8983, "lr": 7.638888888888889e-05, "epoch": 0.15357766143106458, "percentage": 7.68, "elapsed_time": "0:23:55", "remaining_time": "4:47:37", "throughput": 597.32, "total_tokens": 857728}
{"current_steps": 60, "total_steps": 716, "loss": 0.9115, "lr": 8.333333333333334e-05, "epoch": 0.16753926701570682, "percentage": 8.38, "elapsed_time": "0:25:50", "remaining_time": "4:42:35", "throughput": 603.37, "total_tokens": 935680}
{"current_steps": 65, "total_steps": 716, "loss": 0.9022, "lr": 9.027777777777779e-05, "epoch": 0.18150087260034903, "percentage": 9.08, "elapsed_time": "0:27:45", "remaining_time": "4:38:01", "throughput": 608.6, "total_tokens": 1013664}
{"current_steps": 70, "total_steps": 716, "loss": 0.8981, "lr": 9.722222222222223e-05, "epoch": 0.19546247818499127, "percentage": 9.78, "elapsed_time": "0:29:40", "remaining_time": "4:33:53", "throughput": 613.04, "total_tokens": 1091656}
{"current_steps": 75, "total_steps": 716, "loss": 0.9067, "lr": 9.999464569905628e-05, "epoch": 0.2094240837696335, "percentage": 10.47, "elapsed_time": "0:31:34", "remaining_time": "4:29:52", "throughput": 617.35, "total_tokens": 1169664}
{"current_steps": 80, "total_steps": 716, "loss": 0.9075, "lr": 9.99619291237835e-05, "epoch": 0.22338568935427575, "percentage": 11.17, "elapsed_time": "0:33:29", "remaining_time": "4:26:17", "throughput": 620.81, "total_tokens": 1247672}
{"current_steps": 85, "total_steps": 716, "loss": 0.8964, "lr": 9.989949002448076e-05, "epoch": 0.23734729493891799, "percentage": 11.87, "elapsed_time": "0:35:24", "remaining_time": "4:22:54", "throughput": 623.85, "total_tokens": 1325640}
{"current_steps": 90, "total_steps": 716, "loss": 0.9128, "lr": 9.980736554638366e-05, "epoch": 0.2513089005235602, "percentage": 12.57, "elapsed_time": "0:37:18", "remaining_time": "4:19:32", "throughput": 626.98, "total_tokens": 1403688}
{"current_steps": 95, "total_steps": 716, "loss": 0.8991, "lr": 9.968561049466214e-05, "epoch": 0.26527050610820246, "percentage": 13.27, "elapsed_time": "0:39:13", "remaining_time": "4:16:27", "throughput": 629.44, "total_tokens": 1481664}
{"current_steps": 100, "total_steps": 716, "loss": 0.9033, "lr": 9.953429730181653e-05, "epoch": 0.2792321116928447, "percentage": 13.97, "elapsed_time": "0:41:08", "remaining_time": "4:13:23", "throughput": 631.9, "total_tokens": 1559632}
{"current_steps": 100, "total_steps": 716, "eval_loss": 0.9009457230567932, "epoch": 0.2792321116928447, "percentage": 13.97, "elapsed_time": "0:41:55", "remaining_time": "4:18:15", "throughput": 620.01, "total_tokens": 1559632}
{"current_steps": 105, "total_steps": 716, "loss": 0.902, "lr": 9.935351598458742e-05, "epoch": 0.2931937172774869, "percentage": 14.66, "elapsed_time": "0:43:57", "remaining_time": "4:15:46", "throughput": 620.95, "total_tokens": 1637592}
{"current_steps": 110, "total_steps": 716, "loss": 0.903, "lr": 9.914337409040418e-05, "epoch": 0.30715532286212915, "percentage": 15.36, "elapsed_time": "0:45:51", "remaining_time": "4:12:40", "throughput": 623.43, "total_tokens": 1715592}
{"current_steps": 115, "total_steps": 716, "loss": 0.9014, "lr": 9.890399663340478e-05, "epoch": 0.32111692844677137, "percentage": 16.06, "elapsed_time": "0:47:45", "remaining_time": "4:09:35", "throughput": 625.92, "total_tokens": 1793544}
{"current_steps": 120, "total_steps": 716, "loss": 0.8966, "lr": 9.863552602006435e-05, "epoch": 0.33507853403141363, "percentage": 16.76, "elapsed_time": "0:49:40", "remaining_time": "4:06:43", "throughput": 627.93, "total_tokens": 1871520}
{"current_steps": 125, "total_steps": 716, "loss": 0.9032, "lr": 9.83381219644771e-05, "epoch": 0.34904013961605584, "percentage": 17.46, "elapsed_time": "0:51:35", "remaining_time": "4:03:54", "throughput": 629.81, "total_tokens": 1949488}
{"current_steps": 130, "total_steps": 716, "loss": 0.8919, "lr": 9.801196139334195e-05, "epoch": 0.36300174520069806, "percentage": 18.16, "elapsed_time": "0:53:28", "remaining_time": "4:01:04", "throughput": 631.84, "total_tokens": 2027488}
{"current_steps": 135, "total_steps": 716, "loss": 0.9025, "lr": 9.765723834070804e-05, "epoch": 0.3769633507853403, "percentage": 18.85, "elapsed_time": "0:55:23", "remaining_time": "3:58:25", "throughput": 633.4, "total_tokens": 2105424}
{"current_steps": 140, "total_steps": 716, "loss": 0.9001, "lr": 9.72741638325434e-05, "epoch": 0.39092495636998253, "percentage": 19.55, "elapsed_time": "0:57:17", "remaining_time": "3:55:41", "throughput": 635.23, "total_tokens": 2183432}
{"current_steps": 145, "total_steps": 716, "loss": 0.9007, "lr": 9.686296576119471e-05, "epoch": 0.4048865619546248, "percentage": 20.25, "elapsed_time": "0:59:10", "remaining_time": "3:53:02", "throughput": 636.88, "total_tokens": 2261408}
{"current_steps": 150, "total_steps": 716, "loss": 0.9001, "lr": 9.642388874981347e-05, "epoch": 0.418848167539267, "percentage": 20.95, "elapsed_time": "1:01:04", "remaining_time": "3:50:26", "throughput": 638.42, "total_tokens": 2339368}
{"current_steps": 150, "total_steps": 716, "eval_loss": 0.8987511396408081, "epoch": 0.418848167539267, "percentage": 20.95, "elapsed_time": "1:01:50", "remaining_time": "3:53:21", "throughput": 630.43, "total_tokens": 2339368}
{"current_steps": 155, "total_steps": 716, "loss": 0.8974, "lr": 9.595719400682881e-05, "epoch": 0.4328097731239092, "percentage": 21.65, "elapsed_time": "1:03:50", "remaining_time": "3:51:02", "throughput": 631.14, "total_tokens": 2417328}
{"current_steps": 160, "total_steps": 716, "loss": 0.895, "lr": 9.546315917055361e-05, "epoch": 0.4467713787085515, "percentage": 22.35, "elapsed_time": "1:05:43", "remaining_time": "3:48:23", "throughput": 632.77, "total_tokens": 2495328}
{"current_steps": 165, "total_steps": 716, "loss": 0.8993, "lr": 9.494207814401672e-05, "epoch": 0.4607329842931937, "percentage": 23.04, "elapsed_time": "1:07:37", "remaining_time": "3:45:49", "throughput": 634.22, "total_tokens": 2573264}
{"current_steps": 170, "total_steps": 716, "loss": 0.9011, "lr": 9.439426092011875e-05, "epoch": 0.47469458987783597, "percentage": 23.74, "elapsed_time": "1:09:29", "remaining_time": "3:43:12", "throughput": 635.81, "total_tokens": 2651200}
{"current_steps": 175, "total_steps": 716, "loss": 0.8943, "lr": 9.382003339721652e-05, "epoch": 0.4886561954624782, "percentage": 24.44, "elapsed_time": "1:11:23", "remaining_time": "3:40:42", "throughput": 637.13, "total_tokens": 2729208}
{"current_steps": 180, "total_steps": 716, "loss": 0.9074, "lr": 9.321973718524472e-05, "epoch": 0.5026178010471204, "percentage": 25.14, "elapsed_time": "1:13:16", "remaining_time": "3:38:10", "throughput": 638.57, "total_tokens": 2807176}
{"current_steps": 185, "total_steps": 716, "loss": 0.8979, "lr": 9.25937294024912e-05, "epoch": 0.5165794066317626, "percentage": 25.84, "elapsed_time": "1:15:09", "remaining_time": "3:35:44", "throughput": 639.75, "total_tokens": 2885136}
{"current_steps": 190, "total_steps": 716, "loss": 0.8908, "lr": 9.194238246314599e-05, "epoch": 0.5305410122164049, "percentage": 26.54, "elapsed_time": "1:17:03", "remaining_time": "3:33:20", "throughput": 640.86, "total_tokens": 2963120}
{"current_steps": 195, "total_steps": 716, "loss": 0.8922, "lr": 9.126608385575076e-05, "epoch": 0.5445026178010471, "percentage": 27.23, "elapsed_time": "1:18:56", "remaining_time": "3:30:54", "throughput": 642.09, "total_tokens": 3041096}
{"current_steps": 200, "total_steps": 716, "loss": 0.902, "lr": 9.056523591268064e-05, "epoch": 0.5584642233856894, "percentage": 27.93, "elapsed_time": "1:20:50", "remaining_time": "3:28:33", "throughput": 643.09, "total_tokens": 3119064}
{"current_steps": 200, "total_steps": 716, "eval_loss": 0.9003660678863525, "epoch": 0.5584642233856894, "percentage": 27.93, "elapsed_time": "1:21:36", "remaining_time": "3:30:33", "throughput": 636.98, "total_tokens": 3119064}
{"current_steps": 205, "total_steps": 716, "loss": 0.9016, "lr": 8.984025557079523e-05, "epoch": 0.5724258289703316, "percentage": 28.63, "elapsed_time": "1:23:35", "remaining_time": "3:28:21", "throughput": 637.44, "total_tokens": 3197048}
{"current_steps": 210, "total_steps": 716, "loss": 0.907, "lr": 8.90915741234015e-05, "epoch": 0.5863874345549738, "percentage": 29.33, "elapsed_time": "1:25:27", "remaining_time": "3:25:55", "throughput": 638.67, "total_tokens": 3275024}
{"current_steps": 215, "total_steps": 716, "loss": 0.8966, "lr": 8.831963696367581e-05, "epoch": 0.6003490401396161, "percentage": 30.03, "elapsed_time": "1:27:21", "remaining_time": "3:23:34", "throughput": 639.67, "total_tokens": 3353024}
{"current_steps": 220, "total_steps": 716, "loss": 0.9031, "lr": 8.752490331969807e-05, "epoch": 0.6143106457242583, "percentage": 30.73, "elapsed_time": "1:29:14", "remaining_time": "3:21:12", "throughput": 640.74, "total_tokens": 3430936}
{"current_steps": 225, "total_steps": 716, "loss": 0.9012, "lr": 8.670784598125533e-05, "epoch": 0.6282722513089005, "percentage": 31.42, "elapsed_time": "1:31:07", "remaining_time": "3:18:52", "throughput": 641.72, "total_tokens": 3508920}
{"current_steps": 230, "total_steps": 716, "loss": 0.8936, "lr": 8.586895101857747e-05, "epoch": 0.6422338568935427, "percentage": 32.12, "elapsed_time": "1:33:02", "remaining_time": "3:16:35", "throughput": 642.57, "total_tokens": 3586920}
{"current_steps": 235, "total_steps": 716, "loss": 0.9042, "lr": 8.500871749317243e-05, "epoch": 0.6561954624781849, "percentage": 32.82, "elapsed_time": "1:34:54", "remaining_time": "3:14:15", "throughput": 643.57, "total_tokens": 3664896}
{"current_steps": 240, "total_steps": 716, "loss": 0.9034, "lr": 8.412765716093272e-05, "epoch": 0.6701570680628273, "percentage": 33.52, "elapsed_time": "1:36:48", "remaining_time": "3:12:00", "throughput": 644.36, "total_tokens": 3742832}
{"current_steps": 245, "total_steps": 716, "loss": 0.8969, "lr": 8.322629416769006e-05, "epoch": 0.6841186736474695, "percentage": 34.22, "elapsed_time": "1:38:42", "remaining_time": "3:09:45", "throughput": 645.17, "total_tokens": 3820792}
{"current_steps": 250, "total_steps": 716, "loss": 0.8933, "lr": 8.230516473739935e-05, "epoch": 0.6980802792321117, "percentage": 34.92, "elapsed_time": "1:40:34", "remaining_time": "3:07:28", "throughput": 646.06, "total_tokens": 3898784}
{"current_steps": 250, "total_steps": 716, "eval_loss": 0.9052047729492188, "epoch": 0.6980802792321117, "percentage": 34.92, "elapsed_time": "1:41:21", "remaining_time": "3:08:55", "throughput": 641.12, "total_tokens": 3898784}
{"current_steps": 255, "total_steps": 716, "loss": 0.9079, "lr": 8.1364816853137e-05, "epoch": 0.7120418848167539, "percentage": 35.61, "elapsed_time": "1:43:20", "remaining_time": "3:06:48", "throughput": 641.42, "total_tokens": 3976824}
{"current_steps": 260, "total_steps": 716, "loss": 0.9044, "lr": 8.040580993110404e-05, "epoch": 0.7260034904013961, "percentage": 36.31, "elapsed_time": "1:45:14", "remaining_time": "3:04:33", "throughput": 642.18, "total_tokens": 4054752}
{"current_steps": 265, "total_steps": 716, "loss": 0.895, "lr": 7.942871448782748e-05, "epoch": 0.7399650959860384, "percentage": 37.01, "elapsed_time": "1:47:06", "remaining_time": "3:02:17", "throughput": 643.03, "total_tokens": 4132664}
{"current_steps": 270, "total_steps": 716, "loss": 0.8984, "lr": 7.843411180075794e-05, "epoch": 0.7539267015706806, "percentage": 37.71, "elapsed_time": "1:49:00", "remaining_time": "3:00:04", "throughput": 643.77, "total_tokens": 4210656}
{"current_steps": 275, "total_steps": 716, "loss": 0.904, "lr": 7.742259356246593e-05, "epoch": 0.7678883071553229, "percentage": 38.41, "elapsed_time": "1:50:53", "remaining_time": "2:57:49", "throughput": 644.59, "total_tokens": 4288664}
{"current_steps": 280, "total_steps": 716, "loss": 0.8973, "lr": 7.639476152864162e-05, "epoch": 0.7818499127399651, "percentage": 39.11, "elapsed_time": "1:52:47", "remaining_time": "2:55:38", "throughput": 645.22, "total_tokens": 4366608}
{"current_steps": 285, "total_steps": 716, "loss": 0.9018, "lr": 7.535122716010849e-05, "epoch": 0.7958115183246073, "percentage": 39.8, "elapsed_time": "1:54:42", "remaining_time": "2:53:27", "throughput": 645.82, "total_tokens": 4444568}
{"current_steps": 290, "total_steps": 716, "loss": 0.8886, "lr": 7.42926112590631e-05, "epoch": 0.8097731239092496, "percentage": 40.5, "elapsed_time": "1:56:34", "remaining_time": "2:51:14", "throughput": 646.57, "total_tokens": 4522512}
{"current_steps": 295, "total_steps": 716, "loss": 0.9002, "lr": 7.321954359975776e-05, "epoch": 0.8237347294938918, "percentage": 41.2, "elapsed_time": "1:58:28", "remaining_time": "2:49:05", "throughput": 647.15, "total_tokens": 4600504}
{"current_steps": 300, "total_steps": 716, "loss": 0.897, "lr": 7.21326625538456e-05, "epoch": 0.837696335078534, "percentage": 41.9, "elapsed_time": "2:00:22", "remaining_time": "2:46:54", "throughput": 647.79, "total_tokens": 4678472}
{"current_steps": 300, "total_steps": 716, "eval_loss": 0.9003945589065552, "epoch": 0.837696335078534, "percentage": 41.9, "elapsed_time": "2:01:08", "remaining_time": "2:47:58", "throughput": 643.67, "total_tokens": 4678472}
{"current_steps": 305, "total_steps": 716, "loss": 0.9088, "lr": 7.103261471061116e-05, "epoch": 0.8516579406631762, "percentage": 42.6, "elapsed_time": "2:03:07", "remaining_time": "2:45:55", "throughput": 643.82, "total_tokens": 4756440}
{"current_steps": 310, "total_steps": 716, "loss": 0.899, "lr": 6.992005449231208e-05, "epoch": 0.8656195462478184, "percentage": 43.3, "elapsed_time": "2:05:01", "remaining_time": "2:43:44", "throughput": 644.48, "total_tokens": 4834424}
{"current_steps": 315, "total_steps": 716, "loss": 0.905, "lr": 6.879564376486114e-05, "epoch": 0.8795811518324608, "percentage": 43.99, "elapsed_time": "2:06:53", "remaining_time": "2:41:32", "throughput": 645.19, "total_tokens": 4912376}
{"current_steps": 320, "total_steps": 716, "loss": 0.8968, "lr": 6.76600514440799e-05, "epoch": 0.893542757417103, "percentage": 44.69, "elapsed_time": "2:08:48", "remaining_time": "2:39:23", "throughput": 645.74, "total_tokens": 4990328}
{"current_steps": 325, "total_steps": 716, "loss": 0.8916, "lr": 6.651395309775837e-05, "epoch": 0.9075043630017452, "percentage": 45.39, "elapsed_time": "2:10:41", "remaining_time": "2:37:14", "throughput": 646.32, "total_tokens": 5068304}
{"current_steps": 330, "total_steps": 716, "loss": 0.8937, "lr": 6.535803054375738e-05, "epoch": 0.9214659685863874, "percentage": 46.09, "elapsed_time": "2:12:34", "remaining_time": "2:35:04", "throughput": 646.98, "total_tokens": 5146272}
{"current_steps": 335, "total_steps": 716, "loss": 0.8965, "lr": 6.419297144439283e-05, "epoch": 0.9354275741710296, "percentage": 46.79, "elapsed_time": "2:14:28", "remaining_time": "2:32:56", "throughput": 647.5, "total_tokens": 5224232}
{"current_steps": 340, "total_steps": 716, "loss": 0.8957, "lr": 6.301946889734302e-05, "epoch": 0.9493891797556719, "percentage": 47.49, "elapsed_time": "2:16:21", "remaining_time": "2:30:47", "throughput": 648.08, "total_tokens": 5302200}
{"current_steps": 345, "total_steps": 716, "loss": 0.9025, "lr": 6.183822102332234e-05, "epoch": 0.9633507853403142, "percentage": 48.18, "elapsed_time": "2:18:14", "remaining_time": "2:28:39", "throughput": 648.63, "total_tokens": 5380168}
{"current_steps": 350, "total_steps": 716, "loss": 0.8997, "lr": 6.064993055076698e-05, "epoch": 0.9773123909249564, "percentage": 48.88, "elapsed_time": "2:20:08", "remaining_time": "2:26:32", "throughput": 649.13, "total_tokens": 5458104}
{"current_steps": 350, "total_steps": 716, "eval_loss": 0.9016226530075073, "epoch": 0.9773123909249564, "percentage": 48.88, "elapsed_time": "2:20:54", "remaining_time": "2:27:21", "throughput": 645.57, "total_tokens": 5458104}
{"current_steps": 355, "total_steps": 716, "loss": 0.902, "lr": 5.945530439777923e-05, "epoch": 0.9912739965095986, "percentage": 49.58, "elapsed_time": "2:22:53", "remaining_time": "2:25:18", "throughput": 645.72, "total_tokens": 5536072}
{"current_steps": 360, "total_steps": 716, "loss": 0.7347, "lr": 5.8255053251579616e-05, "epoch": 1.0027923211169285, "percentage": 50.28, "elapsed_time": "2:24:27", "remaining_time": "2:22:51", "throughput": 646.11, "total_tokens": 5600392}
{"current_steps": 365, "total_steps": 716, "loss": 0.8899, "lr": 5.704989114571648e-05, "epoch": 1.0167539267015706, "percentage": 50.98, "elapsed_time": "2:26:21", "remaining_time": "2:20:44", "throughput": 646.67, "total_tokens": 5678424}
{"current_steps": 370, "total_steps": 716, "loss": 0.8929, "lr": 5.5840535035285025e-05, "epoch": 1.030715532286213, "percentage": 51.68, "elapsed_time": "2:28:14", "remaining_time": "2:18:37", "throughput": 647.18, "total_tokens": 5756400}
{"current_steps": 375, "total_steps": 716, "loss": 0.8904, "lr": 5.4627704370408236e-05, "epoch": 1.0446771378708553, "percentage": 52.37, "elapsed_time": "2:30:08", "remaining_time": "2:16:31", "throughput": 647.63, "total_tokens": 5834352}
{"current_steps": 380, "total_steps": 716, "loss": 0.8964, "lr": 5.341212066823355e-05, "epoch": 1.0586387434554974, "percentage": 53.07, "elapsed_time": "2:32:01", "remaining_time": "2:14:25", "throughput": 648.19, "total_tokens": 5912320}
{"current_steps": 385, "total_steps": 716, "loss": 0.8843, "lr": 5.219450708369977e-05, "epoch": 1.0726003490401397, "percentage": 53.77, "elapsed_time": "2:33:55", "remaining_time": "2:12:20", "throughput": 648.61, "total_tokens": 5990312}
{"current_steps": 390, "total_steps": 716, "loss": 0.8879, "lr": 5.0975587979329734e-05, "epoch": 1.0865619546247818, "percentage": 54.47, "elapsed_time": "2:35:49", "remaining_time": "2:10:14", "throughput": 649.08, "total_tokens": 6068280}
{"current_steps": 395, "total_steps": 716, "loss": 0.8816, "lr": 4.9756088494304504e-05, "epoch": 1.100523560209424, "percentage": 55.17, "elapsed_time": "2:37:41", "remaining_time": "2:08:09", "throughput": 649.61, "total_tokens": 6146288}
{"current_steps": 400, "total_steps": 716, "loss": 0.9109, "lr": 4.853673411307564e-05, "epoch": 1.1144851657940662, "percentage": 55.87, "elapsed_time": "2:39:35", "remaining_time": "2:06:04", "throughput": 650.02, "total_tokens": 6224248}
{"current_steps": 400, "total_steps": 716, "eval_loss": 0.8960007429122925, "epoch": 1.1144851657940662, "percentage": 55.87, "elapsed_time": "2:40:21", "remaining_time": "2:06:41", "throughput": 646.88, "total_tokens": 6224248}
{"current_steps": 405, "total_steps": 716, "loss": 0.8631, "lr": 4.731825023377192e-05, "epoch": 1.1284467713787085, "percentage": 56.56, "elapsed_time": "2:42:21", "remaining_time": "2:04:40", "throughput": 646.93, "total_tokens": 6302208}
{"current_steps": 410, "total_steps": 716, "loss": 0.8722, "lr": 4.610136173665751e-05, "epoch": 1.1424083769633508, "percentage": 57.26, "elapsed_time": "2:44:15", "remaining_time": "2:02:35", "throughput": 647.39, "total_tokens": 6380096}
{"current_steps": 415, "total_steps": 716, "loss": 0.864, "lr": 4.4886792552898286e-05, "epoch": 1.156369982547993, "percentage": 57.96, "elapsed_time": "2:46:09", "remaining_time": "2:00:30", "throughput": 647.8, "total_tokens": 6458096}
{"current_steps": 420, "total_steps": 716, "loss": 0.8446, "lr": 4.367526523389253e-05, "epoch": 1.1703315881326353, "percentage": 58.66, "elapsed_time": "2:48:01", "remaining_time": "1:58:25", "throughput": 648.3, "total_tokens": 6536064}
{"current_steps": 425, "total_steps": 716, "loss": 0.8576, "lr": 4.24675005214227e-05, "epoch": 1.1842931937172776, "percentage": 59.36, "elapsed_time": "2:49:55", "remaining_time": "1:56:20", "throughput": 648.73, "total_tokens": 6614048}
{"current_steps": 430, "total_steps": 716, "loss": 0.8715, "lr": 4.1264216918883656e-05, "epoch": 1.1982547993019197, "percentage": 60.06, "elapsed_time": "2:51:48", "remaining_time": "1:54:16", "throughput": 649.14, "total_tokens": 6691984}
{"current_steps": 435, "total_steps": 716, "loss": 0.8708, "lr": 4.006613026384249e-05, "epoch": 1.212216404886562, "percentage": 60.75, "elapsed_time": "2:53:42", "remaining_time": "1:52:12", "throughput": 649.58, "total_tokens": 6769984}
{"current_steps": 440, "total_steps": 716, "loss": 0.8546, "lr": 3.887395330218429e-05, "epoch": 1.2261780104712041, "percentage": 61.45, "elapsed_time": "2:55:35", "remaining_time": "1:50:08", "throughput": 649.98, "total_tokens": 6847976}
{"current_steps": 445, "total_steps": 716, "loss": 0.8592, "lr": 3.768839526409718e-05, "epoch": 1.2401396160558464, "percentage": 62.15, "elapsed_time": "2:57:28", "remaining_time": "1:48:04", "throughput": 650.41, "total_tokens": 6925944}
{"current_steps": 450, "total_steps": 716, "loss": 0.8127, "lr": 3.651016144214878e-05, "epoch": 1.2541012216404885, "percentage": 62.85, "elapsed_time": "2:59:21", "remaining_time": "1:46:01", "throughput": 650.83, "total_tokens": 7003904}
{"current_steps": 450, "total_steps": 716, "eval_loss": 0.8821887373924255, "epoch": 1.2541012216404885, "percentage": 62.85, "elapsed_time": "3:00:07", "remaining_time": "1:46:28", "throughput": 648.03, "total_tokens": 7003904}
{"current_steps": 455, "total_steps": 716, "loss": 0.837, "lr": 3.533995277170532e-05, "epoch": 1.2680628272251309, "percentage": 63.55, "elapsed_time": "3:02:07", "remaining_time": "1:44:28", "throughput": 648.1, "total_tokens": 7081856}
{"current_steps": 460, "total_steps": 716, "loss": 0.8631, "lr": 3.4178465413942625e-05, "epoch": 1.2820244328097732, "percentage": 64.25, "elapsed_time": "3:03:59", "remaining_time": "1:42:23", "throughput": 648.56, "total_tokens": 7159776}
{"current_steps": 465, "total_steps": 716, "loss": 0.8511, "lr": 3.3026390341697576e-05, "epoch": 1.2959860383944153, "percentage": 64.94, "elapsed_time": "3:05:53", "remaining_time": "1:40:20", "throughput": 648.9, "total_tokens": 7237720}
{"current_steps": 470, "total_steps": 716, "loss": 0.8439, "lr": 3.188441292840587e-05, "epoch": 1.3099476439790576, "percentage": 65.64, "elapsed_time": "3:07:48", "remaining_time": "1:38:17", "throughput": 649.25, "total_tokens": 7315704}
{"current_steps": 475, "total_steps": 716, "loss": 0.872, "lr": 3.075321254037112e-05, "epoch": 1.3239092495637, "percentage": 66.34, "elapsed_time": "3:09:40", "remaining_time": "1:36:14", "throughput": 649.67, "total_tokens": 7393672}
{"current_steps": 480, "total_steps": 716, "loss": 0.8397, "lr": 2.963346213260737e-05, "epoch": 1.337870855148342, "percentage": 67.04, "elapsed_time": "3:11:34", "remaining_time": "1:34:11", "throughput": 650.02, "total_tokens": 7471632}
{"current_steps": 485, "total_steps": 716, "loss": 0.8254, "lr": 2.8525827848495913e-05, "epoch": 1.3518324607329844, "percentage": 67.74, "elapsed_time": "3:13:27", "remaining_time": "1:32:08", "throughput": 650.41, "total_tokens": 7549624}
{"current_steps": 490, "total_steps": 716, "loss": 0.8236, "lr": 2.743096862349427e-05, "epoch": 1.3657940663176265, "percentage": 68.44, "elapsed_time": "3:15:20", "remaining_time": "1:30:05", "throughput": 650.79, "total_tokens": 7627568}
{"current_steps": 495, "total_steps": 716, "loss": 0.8561, "lr": 2.6349535793133196e-05, "epoch": 1.3797556719022688, "percentage": 69.13, "elapsed_time": "3:17:14", "remaining_time": "1:28:03", "throughput": 651.11, "total_tokens": 7705512}
{"current_steps": 500, "total_steps": 716, "loss": 0.8198, "lr": 2.5282172705535013e-05, "epoch": 1.3937172774869109, "percentage": 69.83, "elapsed_time": "3:19:06", "remaining_time": "1:26:01", "throughput": 651.51, "total_tokens": 7783528}
{"current_steps": 500, "total_steps": 716, "eval_loss": 0.846021294593811, "epoch": 1.3937172774869109, "percentage": 69.83, "elapsed_time": "3:19:53", "remaining_time": "1:26:21", "throughput": 648.99, "total_tokens": 7783528}
{"current_steps": 505, "total_steps": 716, "loss": 0.8498, "lr": 2.4229514338683458e-05, "epoch": 1.4076788830715532, "percentage": 70.53, "elapsed_time": "3:21:51", "remaining_time": "1:24:20", "throughput": 649.07, "total_tokens": 7861512}
{"current_steps": 510, "total_steps": 716, "loss": 0.8195, "lr": 2.3192186922673186e-05, "epoch": 1.4216404886561955, "percentage": 71.23, "elapsed_time": "3:23:45", "remaining_time": "1:22:18", "throughput": 649.43, "total_tokens": 7939480}
{"current_steps": 515, "total_steps": 716, "loss": 0.8428, "lr": 2.2170807567163294e-05, "epoch": 1.4356020942408376, "percentage": 71.93, "elapsed_time": "3:25:37", "remaining_time": "1:20:15", "throughput": 649.83, "total_tokens": 8017496}
{"current_steps": 520, "total_steps": 716, "loss": 0.8534, "lr": 2.1165983894256647e-05, "epoch": 1.44956369982548, "percentage": 72.63, "elapsed_time": "3:27:30", "remaining_time": "1:18:12", "throughput": 650.2, "total_tokens": 8095504}
{"current_steps": 525, "total_steps": 716, "loss": 0.8113, "lr": 2.0178313677023425e-05, "epoch": 1.4635253054101223, "percentage": 73.32, "elapsed_time": "3:29:24", "remaining_time": "1:16:10", "throughput": 650.53, "total_tokens": 8173440}
{"current_steps": 530, "total_steps": 716, "loss": 0.8325, "lr": 1.9208384483883817e-05, "epoch": 1.4774869109947644, "percentage": 74.02, "elapsed_time": "3:31:16", "remaining_time": "1:14:08", "throughput": 650.91, "total_tokens": 8251400}
{"current_steps": 535, "total_steps": 716, "loss": 0.8158, "lr": 1.8256773329061567e-05, "epoch": 1.4914485165794067, "percentage": 74.72, "elapsed_time": "3:33:10", "remaining_time": "1:12:07", "throughput": 651.23, "total_tokens": 8329384}
{"current_steps": 540, "total_steps": 716, "loss": 0.8183, "lr": 1.732404632931625e-05, "epoch": 1.505410122164049, "percentage": 75.42, "elapsed_time": "3:35:02", "remaining_time": "1:10:05", "throughput": 651.6, "total_tokens": 8407384}
{"current_steps": 545, "total_steps": 716, "loss": 0.8364, "lr": 1.6410758367158385e-05, "epoch": 1.5193717277486911, "percentage": 76.12, "elapsed_time": "3:36:56", "remaining_time": "1:08:03", "throughput": 651.91, "total_tokens": 8485328}
{"current_steps": 550, "total_steps": 716, "loss": 0.832, "lr": 1.5517452760747975e-05, "epoch": 1.5333333333333332, "percentage": 76.82, "elapsed_time": "3:38:49", "remaining_time": "1:06:02", "throughput": 652.2, "total_tokens": 8563264}
{"current_steps": 550, "total_steps": 716, "eval_loss": 0.8187811374664307, "epoch": 1.5333333333333332, "percentage": 76.82, "elapsed_time": "3:39:36", "remaining_time": "1:06:16", "throughput": 649.9, "total_tokens": 8563264}
{"current_steps": 555, "total_steps": 716, "loss": 0.8445, "lr": 1.4644660940672627e-05, "epoch": 1.5472949389179755, "percentage": 77.51, "elapsed_time": "3:41:35", "remaining_time": "1:04:17", "throughput": 649.92, "total_tokens": 8641240}
{"current_steps": 560, "total_steps": 716, "loss": 0.8372, "lr": 1.3792902133797692e-05, "epoch": 1.5612565445026179, "percentage": 78.21, "elapsed_time": "3:43:30", "remaining_time": "1:02:15", "throughput": 650.16, "total_tokens": 8719256}
{"current_steps": 565, "total_steps": 716, "loss": 0.8107, "lr": 1.2962683054376373e-05, "epoch": 1.57521815008726, "percentage": 78.91, "elapsed_time": "3:45:25", "remaining_time": "1:00:14", "throughput": 650.41, "total_tokens": 8797240}
{"current_steps": 570, "total_steps": 716, "loss": 0.8472, "lr": 1.2154497602603703e-05, "epoch": 1.5891797556719023, "percentage": 79.61, "elapsed_time": "3:47:19", "remaining_time": "0:58:13", "throughput": 650.7, "total_tokens": 8875208}
{"current_steps": 575, "total_steps": 716, "loss": 0.8221, "lr": 1.13688265707936e-05, "epoch": 1.6031413612565446, "percentage": 80.31, "elapsed_time": "3:49:14", "remaining_time": "0:56:12", "throughput": 650.93, "total_tokens": 8953176}
{"current_steps": 580, "total_steps": 716, "loss": 0.8076, "lr": 1.060613735735384e-05, "epoch": 1.6171029668411867, "percentage": 81.01, "elapsed_time": "3:51:07", "remaining_time": "0:54:11", "throughput": 651.24, "total_tokens": 9031192}
{"current_steps": 585, "total_steps": 716, "loss": 0.7987, "lr": 9.86688368872919e-06, "epoch": 1.6310645724258288, "percentage": 81.7, "elapsed_time": "3:53:02", "remaining_time": "0:52:11", "throughput": 651.47, "total_tokens": 9109184}
{"current_steps": 590, "total_steps": 716, "loss": 0.7814, "lr": 9.151505349477902e-06, "epoch": 1.6450261780104714, "percentage": 82.4, "elapsed_time": "3:54:57", "remaining_time": "0:50:10", "throughput": 651.7, "total_tokens": 9187136}
{"current_steps": 595, "total_steps": 716, "loss": 0.7907, "lr": 8.460427920642423e-06, "epoch": 1.6589877835951135, "percentage": 83.1, "elapsed_time": "3:56:50", "remaining_time": "0:48:09", "throughput": 651.99, "total_tokens": 9265112}
{"current_steps": 600, "total_steps": 716, "loss": 0.786, "lr": 7.794062526569734e-06, "epoch": 1.6729493891797556, "percentage": 83.8, "elapsed_time": "3:58:45", "remaining_time": "0:46:09", "throughput": 652.2, "total_tokens": 9343120}
{"current_steps": 600, "total_steps": 716, "eval_loss": 0.8021153211593628, "epoch": 1.6729493891797556, "percentage": 83.8, "elapsed_time": "3:59:31", "remaining_time": "0:46:18", "throughput": 650.09, "total_tokens": 9343120}
{"current_steps": 605, "total_steps": 716, "loss": 0.7702, "lr": 7.152805590332079e-06, "epoch": 1.6869109947643979, "percentage": 84.5, "elapsed_time": "4:01:32", "remaining_time": "0:44:18", "throughput": 650.07, "total_tokens": 9421080}
{"current_steps": 610, "total_steps": 716, "loss": 0.7447, "lr": 6.53703859789348e-06, "epoch": 1.7008726003490402, "percentage": 85.2, "elapsed_time": "4:03:26", "remaining_time": "0:42:18", "throughput": 650.34, "total_tokens": 9499048}
{"current_steps": 615, "total_steps": 716, "loss": 0.7943, "lr": 5.947127871162456e-06, "epoch": 1.7148342059336823, "percentage": 85.89, "elapsed_time": "4:05:20", "remaining_time": "0:40:17", "throughput": 650.58, "total_tokens": 9577048}
{"current_steps": 620, "total_steps": 716, "loss": 0.7784, "lr": 5.383424350065824e-06, "epoch": 1.7287958115183246, "percentage": 86.59, "elapsed_time": "4:07:14", "remaining_time": "0:38:16", "throughput": 650.85, "total_tokens": 9655032}
{"current_steps": 625, "total_steps": 716, "loss": 0.8188, "lr": 4.846263383773364e-06, "epoch": 1.742757417102967, "percentage": 87.29, "elapsed_time": "4:09:09", "remaining_time": "0:36:16", "throughput": 651.08, "total_tokens": 9733000}
{"current_steps": 630, "total_steps": 716, "loss": 0.7514, "lr": 4.335964531197401e-06, "epoch": 1.756719022687609, "percentage": 87.99, "elapsed_time": "4:11:03", "remaining_time": "0:34:16", "throughput": 651.31, "total_tokens": 9810984}
{"current_steps": 635, "total_steps": 716, "loss": 0.8165, "lr": 3.8528313708861174e-06, "epoch": 1.7706806282722511, "percentage": 88.69, "elapsed_time": "4:12:57", "remaining_time": "0:32:15", "throughput": 651.57, "total_tokens": 9888984}
{"current_steps": 640, "total_steps": 716, "loss": 0.7778, "lr": 3.397151320423647e-06, "epoch": 1.7846422338568937, "percentage": 89.39, "elapsed_time": "4:14:52", "remaining_time": "0:30:15", "throughput": 651.78, "total_tokens": 9966984}
{"current_steps": 645, "total_steps": 716, "loss": 0.7745, "lr": 2.9691954654443355e-06, "epoch": 1.7986038394415358, "percentage": 90.08, "elapsed_time": "4:16:46", "remaining_time": "0:28:15", "throughput": 651.99, "total_tokens": 10044928}
{"current_steps": 650, "total_steps": 716, "loss": 0.8312, "lr": 2.5692183983629713e-06, "epoch": 1.812565445026178, "percentage": 90.78, "elapsed_time": "4:18:40", "remaining_time": "0:26:15", "throughput": 652.24, "total_tokens": 10122936}
{"current_steps": 650, "total_steps": 716, "eval_loss": 0.7986289858818054, "epoch": 1.812565445026178, "percentage": 90.78, "elapsed_time": "4:19:26", "remaining_time": "0:26:20", "throughput": 650.29, "total_tokens": 10122936}
{"current_steps": 655, "total_steps": 716, "loss": 0.794, "lr": 2.197458066916891e-06, "epoch": 1.8265270506108202, "percentage": 91.48, "elapsed_time": "4:21:26", "remaining_time": "0:24:20", "throughput": 650.28, "total_tokens": 10200848}
{"current_steps": 660, "total_steps": 716, "loss": 0.8129, "lr": 1.8541356326100433e-06, "epoch": 1.8404886561954625, "percentage": 92.18, "elapsed_time": "4:23:21", "remaining_time": "0:22:20", "throughput": 650.52, "total_tokens": 10278848}
{"current_steps": 665, "total_steps": 716, "loss": 0.8057, "lr": 1.5394553391432143e-06, "epoch": 1.8544502617801046, "percentage": 92.88, "elapsed_time": "4:25:16", "remaining_time": "0:20:20", "throughput": 650.69, "total_tokens": 10356800}
{"current_steps": 670, "total_steps": 716, "loss": 0.7678, "lr": 1.2536043909088191e-06, "epoch": 1.868411867364747, "percentage": 93.58, "elapsed_time": "4:27:11", "remaining_time": "0:18:20", "throughput": 650.87, "total_tokens": 10434768}
{"current_steps": 695, "total_steps": 716, "loss": 0.7823, "lr": 2.62136057095258e-07, "epoch": 1.9382198952879581, "percentage": 97.07, "elapsed_time": "4:36:59", "remaining_time": "0:08:22", "throughput": 651.32, "total_tokens": 10824664}
{"current_steps": 700, "total_steps": 716, "loss": 0.7797, "lr": 1.5222552920138856e-07, "epoch": 1.9521815008726002, "percentage": 97.77, "elapsed_time": "4:38:53", "remaining_time": "0:06:22", "throughput": 651.55, "total_tokens": 10902632}
{"current_steps": 700, "total_steps": 716, "eval_loss": 0.794740617275238, "epoch": 1.9521815008726002, "percentage": 97.77, "elapsed_time": "4:39:39", "remaining_time": "0:06:23", "throughput": 649.74, "total_tokens": 10902632}
{"current_steps": 705, "total_steps": 716, "loss": 0.8029, "lr": 7.196961434052796e-08, "epoch": 1.9661431064572426, "percentage": 98.46, "elapsed_time": "4:41:39", "remaining_time": "0:04:23", "throughput": 649.75, "total_tokens": 10980576}
{"current_steps": 710, "total_steps": 716, "loss": 0.7942, "lr": 2.1416057033352144e-08, "epoch": 1.9801047120418849, "percentage": 99.16, "elapsed_time": "4:43:34", "remaining_time": "0:02:23", "throughput": 649.97, "total_tokens": 11058552}
{"current_steps": 715, "total_steps": 716, "loss": 0.7964, "lr": 5.949317655462583e-10, "epoch": 1.994066317626527, "percentage": 99.86, "elapsed_time": "4:45:26", "remaining_time": "0:00:23", "throughput": 650.25, "total_tokens": 11136520}
{"current_steps": 716, "total_steps": 716, "epoch": 1.9968586387434555, "percentage": 100.0, "elapsed_time": "4:45:54", "remaining_time": "0:00:00", "throughput": 650.12, "total_tokens": 11152104}