k1h0's picture
Upload folder using huggingface_hub
29e2abb verified
{"current_steps": 1, "total_steps": 67, "loss": 1.5801, "lr": 4.997252228714279e-05, "epoch": 0.014787430683918669, "percentage": 1.49, "elapsed_time": "0:02:41", "remaining_time": "2:57:48", "throughput": 9730.57, "total_tokens": 1572864}
{"current_steps": 2, "total_steps": 67, "loss": 1.3696, "lr": 4.9890149550547454e-05, "epoch": 0.029574861367837338, "percentage": 2.99, "elapsed_time": "0:05:14", "remaining_time": "2:50:27", "throughput": 9996.29, "total_tokens": 3145728}
{"current_steps": 3, "total_steps": 67, "loss": 1.2964, "lr": 4.9753062863366276e-05, "epoch": 0.04436229205175601, "percentage": 4.48, "elapsed_time": "0:07:48", "remaining_time": "2:46:24", "throughput": 10081.95, "total_tokens": 4718592}
{"current_steps": 4, "total_steps": 67, "loss": 1.2465, "lr": 4.95615635718894e-05, "epoch": 0.059149722735674676, "percentage": 5.97, "elapsed_time": "0:10:20", "remaining_time": "2:43:00", "throughput": 10131.27, "total_tokens": 6291456}
{"current_steps": 5, "total_steps": 67, "loss": 1.1284, "lr": 4.931607263312032e-05, "epoch": 0.07393715341959335, "percentage": 7.46, "elapsed_time": "0:12:53", "remaining_time": "2:39:54", "throughput": 10163.81, "total_tokens": 7864320}
{"current_steps": 6, "total_steps": 67, "loss": 1.0455, "lr": 4.9017129689421e-05, "epoch": 0.08872458410351201, "percentage": 8.96, "elapsed_time": "0:15:26", "remaining_time": "2:37:01", "throughput": 10183.83, "total_tokens": 9437184}
{"current_steps": 7, "total_steps": 67, "loss": 0.985, "lr": 4.8665391882260856e-05, "epoch": 0.10351201478743069, "percentage": 10.45, "elapsed_time": "0:17:59", "remaining_time": "2:34:12", "throughput": 10199.69, "total_tokens": 11010048}
{"current_steps": 8, "total_steps": 67, "loss": 0.957, "lr": 4.8261632407677174e-05, "epoch": 0.11829944547134935, "percentage": 11.94, "elapsed_time": "0:20:32", "remaining_time": "2:31:30", "throughput": 10208.81, "total_tokens": 12582912}
{"current_steps": 9, "total_steps": 67, "loss": 0.9121, "lr": 4.780673881662242e-05, "epoch": 0.133086876155268, "percentage": 13.43, "elapsed_time": "0:23:05", "remaining_time": "2:28:50", "throughput": 10215.43, "total_tokens": 14155776}
{"current_steps": 10, "total_steps": 67, "loss": 0.8857, "lr": 4.730171106393466e-05, "epoch": 0.1478743068391867, "percentage": 14.93, "elapsed_time": "0:25:38", "remaining_time": "2:26:12", "throughput": 10220.33, "total_tokens": 15728640}
{"current_steps": 11, "total_steps": 67, "loss": 0.8871, "lr": 4.674765931021976e-05, "epoch": 0.16266173752310537, "percentage": 16.42, "elapsed_time": "0:28:12", "remaining_time": "2:23:34", "throughput": 10224.6, "total_tokens": 17301504}
{"current_steps": 12, "total_steps": 67, "loss": 0.8668, "lr": 4.614580148147744e-05, "epoch": 0.17744916820702403, "percentage": 17.91, "elapsed_time": "0:30:45", "remaining_time": "2:20:57", "throughput": 10228.07, "total_tokens": 18874368}
{"current_steps": 13, "total_steps": 67, "loss": 0.8264, "lr": 4.5497460591835615e-05, "epoch": 0.1922365988909427, "percentage": 19.4, "elapsed_time": "0:33:18", "remaining_time": "2:18:21", "throughput": 10231.09, "total_tokens": 20447232}
{"current_steps": 14, "total_steps": 67, "loss": 0.8316, "lr": 4.480406183527823e-05, "epoch": 0.20702402957486138, "percentage": 20.9, "elapsed_time": "0:35:51", "remaining_time": "2:15:46", "throughput": 10233.42, "total_tokens": 22020096}
{"current_steps": 15, "total_steps": 67, "loss": 0.8214, "lr": 4.406712945275955e-05, "epoch": 0.22181146025878004, "percentage": 22.39, "elapsed_time": "0:38:24", "remaining_time": "2:13:10", "throughput": 10236.31, "total_tokens": 23592960}
{"current_steps": 16, "total_steps": 67, "loss": 0.8074, "lr": 4.328828338159173e-05, "epoch": 0.2365988909426987, "percentage": 23.88, "elapsed_time": "0:40:57", "remaining_time": "2:10:34", "throughput": 10238.37, "total_tokens": 25165824}
{"current_steps": 17, "total_steps": 67, "loss": 0.7947, "lr": 4.2469235694471043e-05, "epoch": 0.2513863216266174, "percentage": 25.37, "elapsed_time": "0:43:31", "remaining_time": "2:08:00", "throughput": 10239.79, "total_tokens": 26738688}
{"current_steps": 18, "total_steps": 67, "loss": 0.7868, "lr": 4.161178683597054e-05, "epoch": 0.266173752310536, "percentage": 26.87, "elapsed_time": "0:46:04", "remaining_time": "2:05:25", "throughput": 10240.87, "total_tokens": 28311552}
{"current_steps": 19, "total_steps": 67, "loss": 0.7657, "lr": 4.071782166477213e-05, "epoch": 0.2809611829944547, "percentage": 28.36, "elapsed_time": "0:48:37", "remaining_time": "2:02:51", "throughput": 10241.86, "total_tokens": 29884416}
{"current_steps": 20, "total_steps": 67, "loss": 0.7821, "lr": 3.978930531033807e-05, "epoch": 0.2957486136783734, "percentage": 29.85, "elapsed_time": "0:51:11", "remaining_time": "2:00:17", "throughput": 10242.44, "total_tokens": 31457280}
{"current_steps": 21, "total_steps": 67, "loss": 0.764, "lr": 3.882827885312999e-05, "epoch": 0.31053604436229204, "percentage": 31.34, "elapsed_time": "0:53:44", "remaining_time": "1:57:43", "throughput": 10242.8, "total_tokens": 33030144}
{"current_steps": 22, "total_steps": 67, "loss": 0.7781, "lr": 3.783685483787105e-05, "epoch": 0.32532347504621073, "percentage": 32.84, "elapsed_time": "0:56:18", "remaining_time": "1:55:09", "throughput": 10243.48, "total_tokens": 34603008}
{"current_steps": 23, "total_steps": 67, "loss": 0.7663, "lr": 3.681721262971413e-05, "epoch": 0.34011090573012936, "percentage": 34.33, "elapsed_time": "0:58:51", "remaining_time": "1:52:35", "throughput": 10244.34, "total_tokens": 36175872}
{"current_steps": 24, "total_steps": 67, "loss": 0.7303, "lr": 3.5771593623524265e-05, "epoch": 0.35489833641404805, "percentage": 35.82, "elapsed_time": "1:01:24", "remaining_time": "1:50:01", "throughput": 10245.23, "total_tokens": 37748736}
{"current_steps": 25, "total_steps": 67, "loss": 0.7626, "lr": 3.4702296316806244e-05, "epoch": 0.36968576709796674, "percentage": 37.31, "elapsed_time": "1:03:57", "remaining_time": "1:47:27", "throughput": 10245.49, "total_tokens": 39321600}
{"current_steps": 26, "total_steps": 67, "loss": 0.7709, "lr": 3.361167125710832e-05, "epoch": 0.3844731977818854, "percentage": 38.81, "elapsed_time": "1:06:31", "remaining_time": "1:44:53", "throughput": 10246.02, "total_tokens": 40894464}
{"current_steps": 27, "total_steps": 67, "loss": 0.7638, "lr": 3.2502115875008524e-05, "epoch": 0.39926062846580407, "percentage": 40.3, "elapsed_time": "1:09:04", "remaining_time": "1:42:20", "throughput": 10246.34, "total_tokens": 42467328}
{"current_steps": 28, "total_steps": 67, "loss": 0.7561, "lr": 3.1376069214041913e-05, "epoch": 0.41404805914972276, "percentage": 41.79, "elapsed_time": "1:11:38", "remaining_time": "1:39:46", "throughput": 10246.39, "total_tokens": 44040192}
{"current_steps": 29, "total_steps": 67, "loss": 0.7372, "lr": 3.0236006569153617e-05, "epoch": 0.4288354898336414, "percentage": 43.28, "elapsed_time": "1:14:11", "remaining_time": "1:37:12", "throughput": 10246.9, "total_tokens": 45613056}
{"current_steps": 30, "total_steps": 67, "loss": 0.7281, "lr": 2.9084434045463255e-05, "epoch": 0.4436229205175601, "percentage": 44.78, "elapsed_time": "1:16:45", "remaining_time": "1:34:39", "throughput": 10246.62, "total_tokens": 47185920}
{"current_steps": 31, "total_steps": 67, "loss": 0.7601, "lr": 2.792388304930207e-05, "epoch": 0.4584103512014787, "percentage": 46.27, "elapsed_time": "1:19:18", "remaining_time": "1:32:06", "throughput": 10246.53, "total_tokens": 48758784}
{"current_steps": 32, "total_steps": 67, "loss": 0.7296, "lr": 2.6756904723632324e-05, "epoch": 0.4731977818853974, "percentage": 47.76, "elapsed_time": "1:21:51", "remaining_time": "1:29:32", "throughput": 10246.68, "total_tokens": 50331648}
{"current_steps": 33, "total_steps": 67, "loss": 0.741, "lr": 2.5586064340081516e-05, "epoch": 0.4879852125693161, "percentage": 49.25, "elapsed_time": "1:24:25", "remaining_time": "1:26:59", "throughput": 10246.53, "total_tokens": 51904512}
{"current_steps": 34, "total_steps": 67, "loss": 0.7251, "lr": 2.441393565991849e-05, "epoch": 0.5027726432532348, "percentage": 50.75, "elapsed_time": "1:26:59", "remaining_time": "1:24:25", "throughput": 10246.23, "total_tokens": 53477376}
{"current_steps": 35, "total_steps": 67, "loss": 0.7385, "lr": 2.3243095276367685e-05, "epoch": 0.5175600739371534, "percentage": 52.24, "elapsed_time": "1:29:32", "remaining_time": "1:21:52", "throughput": 10246.0, "total_tokens": 55050240}
{"current_steps": 36, "total_steps": 67, "loss": 0.746, "lr": 2.207611695069794e-05, "epoch": 0.532347504621072, "percentage": 53.73, "elapsed_time": "1:32:06", "remaining_time": "1:19:18", "throughput": 10245.76, "total_tokens": 56623104}
{"current_steps": 37, "total_steps": 67, "loss": 0.7315, "lr": 2.0915565954536744e-05, "epoch": 0.5471349353049908, "percentage": 55.22, "elapsed_time": "1:34:40", "remaining_time": "1:16:45", "throughput": 10245.41, "total_tokens": 58195968}
{"current_steps": 38, "total_steps": 67, "loss": 0.7267, "lr": 1.9763993430846395e-05, "epoch": 0.5619223659889094, "percentage": 56.72, "elapsed_time": "1:37:13", "remaining_time": "1:14:12", "throughput": 10245.47, "total_tokens": 59768832}
{"current_steps": 39, "total_steps": 67, "loss": 0.7443, "lr": 1.8623930785958092e-05, "epoch": 0.5767097966728281, "percentage": 58.21, "elapsed_time": "1:39:46", "remaining_time": "1:11:38", "throughput": 10246.13, "total_tokens": 61341696}
{"current_steps": 40, "total_steps": 67, "loss": 0.7163, "lr": 1.749788412499149e-05, "epoch": 0.5914972273567468, "percentage": 59.7, "elapsed_time": "1:42:19", "remaining_time": "1:09:04", "throughput": 10246.67, "total_tokens": 62914560}
{"current_steps": 41, "total_steps": 67, "loss": 0.73, "lr": 1.638832874289168e-05, "epoch": 0.6062846580406654, "percentage": 61.19, "elapsed_time": "1:44:53", "remaining_time": "1:06:31", "throughput": 10246.53, "total_tokens": 64487424}
{"current_steps": 42, "total_steps": 67, "loss": 0.723, "lr": 1.5297703683193752e-05, "epoch": 0.6210720887245841, "percentage": 62.69, "elapsed_time": "1:47:27", "remaining_time": "1:03:57", "throughput": 10246.38, "total_tokens": 66060288}
{"current_steps": 43, "total_steps": 67, "loss": 0.7176, "lr": 1.4228406376475742e-05, "epoch": 0.6358595194085028, "percentage": 64.18, "elapsed_time": "1:50:00", "remaining_time": "1:01:24", "throughput": 10246.13, "total_tokens": 67633152}
{"current_steps": 44, "total_steps": 67, "loss": 0.7066, "lr": 1.3182787370285865e-05, "epoch": 0.6506469500924215, "percentage": 65.67, "elapsed_time": "1:52:34", "remaining_time": "0:58:50", "throughput": 10245.31, "total_tokens": 69206016}
{"current_steps": 45, "total_steps": 67, "loss": 0.7244, "lr": 1.2163145162128947e-05, "epoch": 0.6654343807763401, "percentage": 67.16, "elapsed_time": "1:55:08", "remaining_time": "0:56:17", "throughput": 10245.33, "total_tokens": 70778880}
{"current_steps": 46, "total_steps": 67, "loss": 0.7349, "lr": 1.1171721146870015e-05, "epoch": 0.6802218114602587, "percentage": 68.66, "elapsed_time": "1:57:42", "remaining_time": "0:53:43", "throughput": 10245.14, "total_tokens": 72351744}
{"current_steps": 47, "total_steps": 67, "loss": 0.7487, "lr": 1.021069468966194e-05, "epoch": 0.6950092421441775, "percentage": 70.15, "elapsed_time": "2:00:15", "remaining_time": "0:51:10", "throughput": 10245.15, "total_tokens": 73924608}
{"current_steps": 48, "total_steps": 67, "loss": 0.7381, "lr": 9.282178335227884e-06, "epoch": 0.7097966728280961, "percentage": 71.64, "elapsed_time": "2:02:49", "remaining_time": "0:48:36", "throughput": 10245.27, "total_tokens": 75497472}
{"current_steps": 49, "total_steps": 67, "loss": 0.7262, "lr": 8.38821316402946e-06, "epoch": 0.7245841035120147, "percentage": 73.13, "elapsed_time": "2:05:22", "remaining_time": "0:46:03", "throughput": 10245.11, "total_tokens": 77070336}
{"current_steps": 50, "total_steps": 67, "loss": 0.7473, "lr": 7.530764305528959e-06, "epoch": 0.7393715341959335, "percentage": 74.63, "elapsed_time": "2:07:56", "remaining_time": "0:43:30", "throughput": 10244.43, "total_tokens": 78643200}
{"current_steps": 51, "total_steps": 67, "loss": 0.6998, "lr": 6.711716618408281e-06, "epoch": 0.7541589648798521, "percentage": 76.12, "elapsed_time": "2:10:30", "remaining_time": "0:40:56", "throughput": 10244.28, "total_tokens": 80216064}
{"current_steps": 52, "total_steps": 67, "loss": 0.7218, "lr": 5.932870547240454e-06, "epoch": 0.7689463955637708, "percentage": 77.61, "elapsed_time": "2:13:03", "remaining_time": "0:38:23", "throughput": 10244.28, "total_tokens": 81788928}
{"current_steps": 53, "total_steps": 67, "loss": 0.7314, "lr": 5.1959381647217666e-06, "epoch": 0.7837338262476895, "percentage": 79.1, "elapsed_time": "2:15:37", "remaining_time": "0:35:49", "throughput": 10244.27, "total_tokens": 83361792}
{"current_steps": 54, "total_steps": 67, "loss": 0.7028, "lr": 4.502539408164386e-06, "epoch": 0.7985212569316081, "percentage": 80.6, "elapsed_time": "2:18:10", "remaining_time": "0:33:15", "throughput": 10244.46, "total_tokens": 84934656}
{"current_steps": 55, "total_steps": 67, "loss": 0.7084, "lr": 3.8541985185225645e-06, "epoch": 0.8133086876155268, "percentage": 82.09, "elapsed_time": "2:20:44", "remaining_time": "0:30:42", "throughput": 10244.41, "total_tokens": 86507520}
{"current_steps": 56, "total_steps": 67, "loss": 0.7223, "lr": 3.252340689780245e-06, "epoch": 0.8280961182994455, "percentage": 83.58, "elapsed_time": "2:23:17", "remaining_time": "0:28:08", "throughput": 10244.78, "total_tokens": 88080384}
{"current_steps": 57, "total_steps": 67, "loss": 0.7195, "lr": 2.6982889360653377e-06, "epoch": 0.8428835489833642, "percentage": 85.07, "elapsed_time": "2:25:51", "remaining_time": "0:25:35", "throughput": 10244.84, "total_tokens": 89653248}
{"current_steps": 58, "total_steps": 67, "loss": 0.7431, "lr": 2.1932611833775846e-06, "epoch": 0.8576709796672828, "percentage": 86.57, "elapsed_time": "2:28:24", "remaining_time": "0:23:01", "throughput": 10244.76, "total_tokens": 91226112}
{"current_steps": 59, "total_steps": 67, "loss": 0.732, "lr": 1.738367592322837e-06, "epoch": 0.8724584103512015, "percentage": 88.06, "elapsed_time": "2:30:57", "remaining_time": "0:20:28", "throughput": 10245.04, "total_tokens": 92798976}
{"current_steps": 60, "total_steps": 67, "loss": 0.7302, "lr": 1.3346081177391472e-06, "epoch": 0.8872458410351202, "percentage": 89.55, "elapsed_time": "2:33:31", "remaining_time": "0:17:54", "throughput": 10245.15, "total_tokens": 94371840}
{"current_steps": 61, "total_steps": 67, "loss": 0.7197, "lr": 9.828703105789983e-07, "epoch": 0.9020332717190388, "percentage": 91.04, "elapsed_time": "2:36:05", "remaining_time": "0:15:21", "throughput": 10244.99, "total_tokens": 95944704}
{"current_steps": 62, "total_steps": 67, "loss": 0.7203, "lr": 6.839273668796747e-07, "epoch": 0.9168207024029574, "percentage": 92.54, "elapsed_time": "2:38:38", "remaining_time": "0:12:47", "throughput": 10244.88, "total_tokens": 97517568}
{"current_steps": 63, "total_steps": 67, "loss": 0.7474, "lr": 4.3843642811059737e-07, "epoch": 0.9316081330868762, "percentage": 94.03, "elapsed_time": "2:41:12", "remaining_time": "0:10:14", "throughput": 10244.81, "total_tokens": 99090432}
{"current_steps": 64, "total_steps": 67, "loss": 0.7403, "lr": 2.4693713663372644e-07, "epoch": 0.9463955637707948, "percentage": 95.52, "elapsed_time": "2:43:45", "remaining_time": "0:07:40", "throughput": 10244.76, "total_tokens": 100663296}
{"current_steps": 65, "total_steps": 67, "loss": 0.7327, "lr": 1.0985044945254764e-07, "epoch": 0.9611829944547134, "percentage": 97.01, "elapsed_time": "2:46:19", "remaining_time": "0:05:07", "throughput": 10244.72, "total_tokens": 102236160}
{"current_steps": 66, "total_steps": 67, "loss": 0.725, "lr": 2.7477712857215677e-08, "epoch": 0.9759704251386322, "percentage": 98.51, "elapsed_time": "2:48:53", "remaining_time": "0:02:33", "throughput": 10244.49, "total_tokens": 103809024}
{"current_steps": 67, "total_steps": 67, "loss": 0.7209, "lr": 0.0, "epoch": 0.9907578558225508, "percentage": 100.0, "elapsed_time": "2:51:26", "remaining_time": "0:00:00", "throughput": 10244.41, "total_tokens": 105381888}
{"current_steps": 67, "total_steps": 67, "epoch": 0.9907578558225508, "percentage": 100.0, "elapsed_time": "2:51:48", "remaining_time": "0:00:00", "throughput": 10222.76, "total_tokens": 105381888}