tinymagnum-r2-KTO-r1-ood / trainer_log.jsonl
kubernetes-bad's picture
Upload folder using huggingface_hub
6f3a34a verified
{"current_steps": 1, "total_steps": 148, "loss": 0.5, "learning_rate": 2.702702702702703e-07, "epoch": 0.006739679865206402, "percentage": 0.68, "elapsed_time": "0:00:23", "remaining_time": "0:58:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 2, "total_steps": 148, "loss": 0.5, "learning_rate": 5.405405405405406e-07, "epoch": 0.013479359730412805, "percentage": 1.35, "elapsed_time": "0:00:49", "remaining_time": "0:59:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 3, "total_steps": 148, "loss": 0.5056, "learning_rate": 8.108108108108109e-07, "epoch": 0.020219039595619208, "percentage": 2.03, "elapsed_time": "0:01:17", "remaining_time": "1:02:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 4, "total_steps": 148, "loss": 0.503, "learning_rate": 1.0810810810810812e-06, "epoch": 0.02695871946082561, "percentage": 2.7, "elapsed_time": "0:01:42", "remaining_time": "1:01:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 5, "total_steps": 148, "loss": 0.5045, "learning_rate": 1.3513513513513515e-06, "epoch": 0.03369839932603201, "percentage": 3.38, "elapsed_time": "0:02:07", "remaining_time": "1:00:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 6, "total_steps": 148, "loss": 0.5043, "learning_rate": 1.6216216216216219e-06, "epoch": 0.040438079191238416, "percentage": 4.05, "elapsed_time": "0:02:35", "remaining_time": "1:01:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 7, "total_steps": 148, "loss": 0.5044, "learning_rate": 1.8918918918918922e-06, "epoch": 0.04717775905644482, "percentage": 4.73, "elapsed_time": "0:03:02", "remaining_time": "1:01:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 8, "total_steps": 148, "loss": 0.5046, "learning_rate": 2.1621621621621623e-06, "epoch": 0.05391743892165122, "percentage": 5.41, "elapsed_time": "0:03:30", "remaining_time": "1:01:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 9, "total_steps": 148, "loss": 0.5041, "learning_rate": 2.432432432432433e-06, "epoch": 0.060657118786857624, "percentage": 6.08, "elapsed_time": "0:03:56", "remaining_time": "1:00:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 10, "total_steps": 148, "loss": 0.5039, "learning_rate": 2.702702702702703e-06, "epoch": 0.06739679865206402, "percentage": 6.76, "elapsed_time": "0:04:23", "remaining_time": "1:00:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 11, "total_steps": 148, "loss": 0.5025, "learning_rate": 2.9729729729729736e-06, "epoch": 0.07413647851727043, "percentage": 7.43, "elapsed_time": "0:04:50", "remaining_time": "1:00:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 12, "total_steps": 148, "loss": 0.5031, "learning_rate": 3.2432432432432437e-06, "epoch": 0.08087615838247683, "percentage": 8.11, "elapsed_time": "0:05:16", "remaining_time": "0:59:43", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 13, "total_steps": 148, "loss": 0.5036, "learning_rate": 3.513513513513514e-06, "epoch": 0.08761583824768324, "percentage": 8.78, "elapsed_time": "0:05:45", "remaining_time": "0:59:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 14, "total_steps": 148, "loss": 0.5027, "learning_rate": 3.7837837837837844e-06, "epoch": 0.09435551811288964, "percentage": 9.46, "elapsed_time": "0:06:09", "remaining_time": "0:59:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 15, "total_steps": 148, "loss": 0.5047, "learning_rate": 4.0540540540540545e-06, "epoch": 0.10109519797809605, "percentage": 10.14, "elapsed_time": "0:06:33", "remaining_time": "0:58:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 16, "total_steps": 148, "loss": 0.5025, "learning_rate": 4.324324324324325e-06, "epoch": 0.10783487784330244, "percentage": 10.81, "elapsed_time": "0:06:59", "remaining_time": "0:57:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 16, "total_steps": 148, "eval_loss": 0.5038444399833679, "epoch": 0.10783487784330244, "percentage": 10.81, "elapsed_time": "0:08:56", "remaining_time": "1:13:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 17, "total_steps": 148, "loss": 0.502, "learning_rate": 4.594594594594596e-06, "epoch": 0.11457455770850884, "percentage": 11.49, "elapsed_time": "0:09:23", "remaining_time": "1:12:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 18, "total_steps": 148, "loss": 0.5034, "learning_rate": 4.864864864864866e-06, "epoch": 0.12131423757371525, "percentage": 12.16, "elapsed_time": "0:09:46", "remaining_time": "1:10:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 19, "total_steps": 148, "loss": 0.5016, "learning_rate": 5.135135135135135e-06, "epoch": 0.12805391743892164, "percentage": 12.84, "elapsed_time": "0:10:11", "remaining_time": "1:09:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 148, "loss": 0.5043, "learning_rate": 5.405405405405406e-06, "epoch": 0.13479359730412804, "percentage": 13.51, "elapsed_time": "0:10:35", "remaining_time": "1:07:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 21, "total_steps": 148, "loss": 0.5057, "learning_rate": 5.675675675675676e-06, "epoch": 0.14153327716933445, "percentage": 14.19, "elapsed_time": "0:11:01", "remaining_time": "1:06:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 22, "total_steps": 148, "loss": 0.5025, "learning_rate": 5.945945945945947e-06, "epoch": 0.14827295703454085, "percentage": 14.86, "elapsed_time": "0:11:26", "remaining_time": "1:05:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 23, "total_steps": 148, "loss": 0.5057, "learning_rate": 6.2162162162162164e-06, "epoch": 0.15501263689974726, "percentage": 15.54, "elapsed_time": "0:11:50", "remaining_time": "1:04:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 24, "total_steps": 148, "loss": 0.5035, "learning_rate": 6.486486486486487e-06, "epoch": 0.16175231676495366, "percentage": 16.22, "elapsed_time": "0:12:16", "remaining_time": "1:03:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 25, "total_steps": 148, "loss": 0.5027, "learning_rate": 6.7567567567567575e-06, "epoch": 0.16849199663016007, "percentage": 16.89, "elapsed_time": "0:12:42", "remaining_time": "1:02:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 26, "total_steps": 148, "loss": 0.5048, "learning_rate": 7.027027027027028e-06, "epoch": 0.17523167649536647, "percentage": 17.57, "elapsed_time": "0:13:10", "remaining_time": "1:01:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 27, "total_steps": 148, "loss": 0.5033, "learning_rate": 7.297297297297298e-06, "epoch": 0.18197135636057288, "percentage": 18.24, "elapsed_time": "0:13:33", "remaining_time": "1:00:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 28, "total_steps": 148, "loss": 0.5019, "learning_rate": 7.567567567567569e-06, "epoch": 0.18871103622577928, "percentage": 18.92, "elapsed_time": "0:13:57", "remaining_time": "0:59:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 29, "total_steps": 148, "loss": 0.5008, "learning_rate": 7.837837837837838e-06, "epoch": 0.1954507160909857, "percentage": 19.59, "elapsed_time": "0:14:25", "remaining_time": "0:59:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 148, "loss": 0.501, "learning_rate": 8.108108108108109e-06, "epoch": 0.2021903959561921, "percentage": 20.27, "elapsed_time": "0:14:48", "remaining_time": "0:58:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 31, "total_steps": 148, "loss": 0.5081, "learning_rate": 8.378378378378378e-06, "epoch": 0.20893007582139847, "percentage": 20.95, "elapsed_time": "0:15:14", "remaining_time": "0:57:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 32, "total_steps": 148, "loss": 0.502, "learning_rate": 8.64864864864865e-06, "epoch": 0.21566975568660487, "percentage": 21.62, "elapsed_time": "0:15:39", "remaining_time": "0:56:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 32, "total_steps": 148, "eval_loss": 0.5019354224205017, "epoch": 0.21566975568660487, "percentage": 21.62, "elapsed_time": "0:17:36", "remaining_time": "1:03:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 33, "total_steps": 148, "loss": 0.502, "learning_rate": 8.91891891891892e-06, "epoch": 0.22240943555181128, "percentage": 22.3, "elapsed_time": "0:18:00", "remaining_time": "1:02:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 34, "total_steps": 148, "loss": 0.5019, "learning_rate": 9.189189189189191e-06, "epoch": 0.22914911541701768, "percentage": 22.97, "elapsed_time": "0:18:26", "remaining_time": "1:01:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 148, "loss": 0.5022, "learning_rate": 9.45945945945946e-06, "epoch": 0.2358887952822241, "percentage": 23.65, "elapsed_time": "0:18:53", "remaining_time": "1:00:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 36, "total_steps": 148, "loss": 0.5009, "learning_rate": 9.729729729729732e-06, "epoch": 0.2426284751474305, "percentage": 24.32, "elapsed_time": "0:19:18", "remaining_time": "1:00:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 37, "total_steps": 148, "loss": 0.5031, "learning_rate": 1e-05, "epoch": 0.2493681550126369, "percentage": 25.0, "elapsed_time": "0:19:44", "remaining_time": "0:59:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 38, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.99799753559161e-06, "epoch": 0.2561078348778433, "percentage": 25.68, "elapsed_time": "0:20:08", "remaining_time": "0:58:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 39, "total_steps": 148, "loss": 0.5059, "learning_rate": 9.991991746311916e-06, "epoch": 0.2628475147430497, "percentage": 26.35, "elapsed_time": "0:20:34", "remaining_time": "0:57:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 148, "loss": 0.5025, "learning_rate": 9.981987442712634e-06, "epoch": 0.2695871946082561, "percentage": 27.03, "elapsed_time": "0:20:58", "remaining_time": "0:56:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 41, "total_steps": 148, "loss": 0.5005, "learning_rate": 9.967992638098517e-06, "epoch": 0.2763268744734625, "percentage": 27.7, "elapsed_time": "0:21:23", "remaining_time": "0:55:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 42, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.950018542108818e-06, "epoch": 0.2830665543386689, "percentage": 28.38, "elapsed_time": "0:21:47", "remaining_time": "0:55:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 43, "total_steps": 148, "loss": 0.5018, "learning_rate": 9.928079551738542e-06, "epoch": 0.2898062342038753, "percentage": 29.05, "elapsed_time": "0:22:13", "remaining_time": "0:54:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 44, "total_steps": 148, "loss": 0.5004, "learning_rate": 9.902193239806634e-06, "epoch": 0.2965459140690817, "percentage": 29.73, "elapsed_time": "0:22:38", "remaining_time": "0:53:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 45, "total_steps": 148, "loss": 0.5008, "learning_rate": 9.872380340880416e-06, "epoch": 0.3032855939342881, "percentage": 30.41, "elapsed_time": "0:23:06", "remaining_time": "0:52:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 46, "total_steps": 148, "loss": 0.5012, "learning_rate": 9.838664734667496e-06, "epoch": 0.3100252737994945, "percentage": 31.08, "elapsed_time": "0:23:31", "remaining_time": "0:52:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 47, "total_steps": 148, "loss": 0.5007, "learning_rate": 9.801073426888447e-06, "epoch": 0.3167649536647009, "percentage": 31.76, "elapsed_time": "0:23:57", "remaining_time": "0:51:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 48, "total_steps": 148, "loss": 0.5026, "learning_rate": 9.759636527645633e-06, "epoch": 0.3235046335299073, "percentage": 32.43, "elapsed_time": "0:24:23", "remaining_time": "0:50:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 48, "total_steps": 148, "eval_loss": 0.5012729167938232, "epoch": 0.3235046335299073, "percentage": 32.43, "elapsed_time": "0:26:20", "remaining_time": "0:54:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 49, "total_steps": 148, "loss": 0.5007, "learning_rate": 9.714387227305422e-06, "epoch": 0.33024431339511373, "percentage": 33.11, "elapsed_time": "0:26:46", "remaining_time": "0:54:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 148, "loss": 0.5021, "learning_rate": 9.665361769913187e-06, "epoch": 0.33698399326032014, "percentage": 33.78, "elapsed_time": "0:27:12", "remaining_time": "0:53:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 51, "total_steps": 148, "loss": 0.5002, "learning_rate": 9.612599424162344e-06, "epoch": 0.34372367312552654, "percentage": 34.46, "elapsed_time": "0:27:37", "remaining_time": "0:52:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 52, "total_steps": 148, "loss": 0.5083, "learning_rate": 9.55614245194068e-06, "epoch": 0.35046335299073295, "percentage": 35.14, "elapsed_time": "0:28:03", "remaining_time": "0:51:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 53, "total_steps": 148, "loss": 0.5006, "learning_rate": 9.496036074479184e-06, "epoch": 0.35720303285593935, "percentage": 35.81, "elapsed_time": "0:28:27", "remaining_time": "0:51:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 54, "total_steps": 148, "loss": 0.5013, "learning_rate": 9.432328436130493e-06, "epoch": 0.36394271272114576, "percentage": 36.49, "elapsed_time": "0:28:54", "remaining_time": "0:50:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 55, "total_steps": 148, "loss": 0.5021, "learning_rate": 9.365070565805941e-06, "epoch": 0.37068239258635216, "percentage": 37.16, "elapsed_time": "0:29:18", "remaining_time": "0:49:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 56, "total_steps": 148, "loss": 0.5008, "learning_rate": 9.294316336102132e-06, "epoch": 0.37742207245155857, "percentage": 37.84, "elapsed_time": "0:29:42", "remaining_time": "0:48:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 57, "total_steps": 148, "loss": 0.5, "learning_rate": 9.220122420149753e-06, "epoch": 0.38416175231676497, "percentage": 38.51, "elapsed_time": "0:30:05", "remaining_time": "0:48:02", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 58, "total_steps": 148, "loss": 0.5016, "learning_rate": 9.142548246219212e-06, "epoch": 0.3909014321819714, "percentage": 39.19, "elapsed_time": "0:30:29", "remaining_time": "0:47:19", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 59, "total_steps": 148, "loss": 0.5015, "learning_rate": 9.06165595011943e-06, "epoch": 0.3976411120471778, "percentage": 39.86, "elapsed_time": "0:30:54", "remaining_time": "0:46:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 148, "loss": 0.4998, "learning_rate": 8.97751032542795e-06, "epoch": 0.4043807919123842, "percentage": 40.54, "elapsed_time": "0:31:20", "remaining_time": "0:45:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 61, "total_steps": 148, "loss": 0.5007, "learning_rate": 8.890178771592198e-06, "epoch": 0.4111204717775906, "percentage": 41.22, "elapsed_time": "0:31:47", "remaining_time": "0:45:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 62, "total_steps": 148, "loss": 0.5016, "learning_rate": 8.799731239943488e-06, "epoch": 0.41786015164279694, "percentage": 41.89, "elapsed_time": "0:32:11", "remaining_time": "0:44:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 63, "total_steps": 148, "loss": 0.5058, "learning_rate": 8.706240177667003e-06, "epoch": 0.42459983150800334, "percentage": 42.57, "elapsed_time": "0:32:36", "remaining_time": "0:43:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 64, "total_steps": 148, "loss": 0.5021, "learning_rate": 8.609780469772623e-06, "epoch": 0.43133951137320975, "percentage": 43.24, "elapsed_time": "0:33:01", "remaining_time": "0:43:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 64, "total_steps": 148, "eval_loss": 0.5015159249305725, "epoch": 0.43133951137320975, "percentage": 43.24, "elapsed_time": "0:34:58", "remaining_time": "0:45:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 65, "total_steps": 148, "loss": 0.5008, "learning_rate": 8.510429379113114e-06, "epoch": 0.43807919123841615, "percentage": 43.92, "elapsed_time": "0:35:25", "remaining_time": "0:45:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 66, "total_steps": 148, "loss": 0.4999, "learning_rate": 8.408266484497664e-06, "epoch": 0.44481887110362256, "percentage": 44.59, "elapsed_time": "0:35:50", "remaining_time": "0:44:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 67, "total_steps": 148, "loss": 0.5002, "learning_rate": 8.303373616950408e-06, "epoch": 0.45155855096882896, "percentage": 45.27, "elapsed_time": "0:36:13", "remaining_time": "0:43:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 68, "total_steps": 148, "loss": 0.501, "learning_rate": 8.195834794164925e-06, "epoch": 0.45829823083403537, "percentage": 45.95, "elapsed_time": "0:36:40", "remaining_time": "0:43:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 69, "total_steps": 148, "loss": 0.5007, "learning_rate": 8.085736153207277e-06, "epoch": 0.4650379106992418, "percentage": 46.62, "elapsed_time": "0:37:05", "remaining_time": "0:42:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 148, "loss": 0.5003, "learning_rate": 7.973165881521435e-06, "epoch": 0.4717775905644482, "percentage": 47.3, "elapsed_time": "0:37:28", "remaining_time": "0:41:45", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 71, "total_steps": 148, "loss": 0.5001, "learning_rate": 7.858214146292394e-06, "epoch": 0.4785172704296546, "percentage": 47.97, "elapsed_time": "0:37:58", "remaining_time": "0:41:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 72, "total_steps": 148, "loss": 0.5002, "learning_rate": 7.74097302222355e-06, "epoch": 0.485256950294861, "percentage": 48.65, "elapsed_time": "0:38:21", "remaining_time": "0:40:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 73, "total_steps": 148, "loss": 0.5004, "learning_rate": 7.621536417786159e-06, "epoch": 0.4919966301600674, "percentage": 49.32, "elapsed_time": "0:38:49", "remaining_time": "0:39:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 74, "total_steps": 148, "loss": 0.5006, "learning_rate": 7.500000000000001e-06, "epoch": 0.4987363100252738, "percentage": 50.0, "elapsed_time": "0:39:15", "remaining_time": "0:39:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 75, "total_steps": 148, "loss": 0.501, "learning_rate": 7.37646111780545e-06, "epoch": 0.5054759898904801, "percentage": 50.68, "elapsed_time": "0:39:41", "remaining_time": "0:38:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 76, "total_steps": 148, "loss": 0.5005, "learning_rate": 7.251018724088367e-06, "epoch": 0.5122156697556866, "percentage": 51.35, "elapsed_time": "0:40:07", "remaining_time": "0:38:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 77, "total_steps": 148, "loss": 0.4998, "learning_rate": 7.12377329642024e-06, "epoch": 0.518955349620893, "percentage": 52.03, "elapsed_time": "0:40:34", "remaining_time": "0:37:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 78, "total_steps": 148, "loss": 0.5005, "learning_rate": 6.994826756577082e-06, "epoch": 0.5256950294860994, "percentage": 52.7, "elapsed_time": "0:40:59", "remaining_time": "0:36:47", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 79, "total_steps": 148, "loss": 0.5, "learning_rate": 6.864282388901544e-06, "epoch": 0.5324347093513058, "percentage": 53.38, "elapsed_time": "0:41:26", "remaining_time": "0:36:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 148, "loss": 0.5017, "learning_rate": 6.732244757573619e-06, "epoch": 0.5391743892165122, "percentage": 54.05, "elapsed_time": "0:41:50", "remaining_time": "0:35:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 148, "eval_loss": 0.501190721988678, "epoch": 0.5391743892165122, "percentage": 54.05, "elapsed_time": "0:43:47", "remaining_time": "0:37:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 81, "total_steps": 148, "loss": 0.5024, "learning_rate": 6.598819622856227e-06, "epoch": 0.5459140690817186, "percentage": 54.73, "elapsed_time": "0:44:15", "remaining_time": "0:36:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 82, "total_steps": 148, "loss": 0.5005, "learning_rate": 6.464113856382752e-06, "epoch": 0.552653748946925, "percentage": 55.41, "elapsed_time": "0:44:39", "remaining_time": "0:35:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 83, "total_steps": 148, "loss": 0.5003, "learning_rate": 6.328235355554382e-06, "epoch": 0.5593934288121314, "percentage": 56.08, "elapsed_time": "0:45:07", "remaining_time": "0:35:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 84, "total_steps": 148, "loss": 0.5017, "learning_rate": 6.191292957115825e-06, "epoch": 0.5661331086773378, "percentage": 56.76, "elapsed_time": "0:45:33", "remaining_time": "0:34:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 85, "total_steps": 148, "loss": 0.5021, "learning_rate": 6.053396349978632e-06, "epoch": 0.5728727885425442, "percentage": 57.43, "elapsed_time": "0:45:59", "remaining_time": "0:34:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 86, "total_steps": 148, "loss": 0.5013, "learning_rate": 5.914655987361934e-06, "epoch": 0.5796124684077506, "percentage": 58.11, "elapsed_time": "0:46:25", "remaining_time": "0:33:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 87, "total_steps": 148, "loss": 0.5004, "learning_rate": 5.77518299832099e-06, "epoch": 0.586352148272957, "percentage": 58.78, "elapsed_time": "0:46:51", "remaining_time": "0:32:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 88, "total_steps": 148, "loss": 0.5013, "learning_rate": 5.635089098734394e-06, "epoch": 0.5930918281381634, "percentage": 59.46, "elapsed_time": "0:47:16", "remaining_time": "0:32:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 89, "total_steps": 148, "loss": 0.4999, "learning_rate": 5.49448650182125e-06, "epoch": 0.5998315080033698, "percentage": 60.14, "elapsed_time": "0:47:42", "remaining_time": "0:31:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 148, "loss": 0.5011, "learning_rate": 5.353487828259973e-06, "epoch": 0.6065711878685762, "percentage": 60.81, "elapsed_time": "0:48:07", "remaining_time": "0:31:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 91, "total_steps": 148, "loss": 0.5011, "learning_rate": 5.212206015980742e-06, "epoch": 0.6133108677337826, "percentage": 61.49, "elapsed_time": "0:48:31", "remaining_time": "0:30:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 92, "total_steps": 148, "loss": 0.5009, "learning_rate": 5.070754229703811e-06, "epoch": 0.620050547598989, "percentage": 62.16, "elapsed_time": "0:48:55", "remaining_time": "0:29:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 93, "total_steps": 148, "loss": 0.5016, "learning_rate": 4.929245770296191e-06, "epoch": 0.6267902274641954, "percentage": 62.84, "elapsed_time": "0:49:22", "remaining_time": "0:29:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 94, "total_steps": 148, "loss": 0.5015, "learning_rate": 4.78779398401926e-06, "epoch": 0.6335299073294018, "percentage": 63.51, "elapsed_time": "0:49:47", "remaining_time": "0:28:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 95, "total_steps": 148, "loss": 0.5016, "learning_rate": 4.646512171740028e-06, "epoch": 0.6402695871946082, "percentage": 64.19, "elapsed_time": "0:50:13", "remaining_time": "0:28:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 96, "total_steps": 148, "loss": 0.5003, "learning_rate": 4.505513498178752e-06, "epoch": 0.6470092670598147, "percentage": 64.86, "elapsed_time": "0:50:38", "remaining_time": "0:27:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 96, "total_steps": 148, "eval_loss": 0.5007099509239197, "epoch": 0.6470092670598147, "percentage": 64.86, "elapsed_time": "0:52:35", "remaining_time": "0:28:29", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 97, "total_steps": 148, "loss": 0.5012, "learning_rate": 4.364910901265607e-06, "epoch": 0.6537489469250211, "percentage": 65.54, "elapsed_time": "0:53:01", "remaining_time": "0:27:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 98, "total_steps": 148, "loss": 0.504, "learning_rate": 4.224817001679011e-06, "epoch": 0.6604886267902275, "percentage": 66.22, "elapsed_time": "0:53:27", "remaining_time": "0:27:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 99, "total_steps": 148, "loss": 0.4996, "learning_rate": 4.085344012638067e-06, "epoch": 0.6672283066554339, "percentage": 66.89, "elapsed_time": "0:53:50", "remaining_time": "0:26:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 148, "loss": 0.5039, "learning_rate": 3.94660365002137e-06, "epoch": 0.6739679865206403, "percentage": 67.57, "elapsed_time": "0:54:13", "remaining_time": "0:26:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 101, "total_steps": 148, "loss": 0.5008, "learning_rate": 3.808707042884176e-06, "epoch": 0.6807076663858467, "percentage": 68.24, "elapsed_time": "0:54:38", "remaining_time": "0:25:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 102, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.6717646444456196e-06, "epoch": 0.6874473462510531, "percentage": 68.92, "elapsed_time": "0:55:03", "remaining_time": "0:24:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 103, "total_steps": 148, "loss": 0.5002, "learning_rate": 3.5358861436172487e-06, "epoch": 0.6941870261162595, "percentage": 69.59, "elapsed_time": "0:55:28", "remaining_time": "0:24:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 104, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.401180377143774e-06, "epoch": 0.7009267059814659, "percentage": 70.27, "elapsed_time": "0:55:56", "remaining_time": "0:23:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 148, "loss": 0.5012, "learning_rate": 3.2677552424263836e-06, "epoch": 0.7076663858466723, "percentage": 70.95, "elapsed_time": "0:56:24", "remaining_time": "0:23:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 106, "total_steps": 148, "loss": 0.5007, "learning_rate": 3.1357176110984578e-06, "epoch": 0.7144060657118787, "percentage": 71.62, "elapsed_time": "0:56:50", "remaining_time": "0:22:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 107, "total_steps": 148, "loss": 0.5003, "learning_rate": 3.0051732434229185e-06, "epoch": 0.7211457455770851, "percentage": 72.3, "elapsed_time": "0:57:16", "remaining_time": "0:21:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 108, "total_steps": 148, "loss": 0.4988, "learning_rate": 2.8762267035797607e-06, "epoch": 0.7278854254422915, "percentage": 72.97, "elapsed_time": "0:57:41", "remaining_time": "0:21:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 109, "total_steps": 148, "loss": 0.4998, "learning_rate": 2.748981275911633e-06, "epoch": 0.7346251053074979, "percentage": 73.65, "elapsed_time": "0:58:05", "remaining_time": "0:20:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 148, "loss": 0.5013, "learning_rate": 2.6235388821945497e-06, "epoch": 0.7413647851727043, "percentage": 74.32, "elapsed_time": "0:58:28", "remaining_time": "0:20:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 111, "total_steps": 148, "loss": 0.5006, "learning_rate": 2.5000000000000015e-06, "epoch": 0.7481044650379107, "percentage": 75.0, "elapsed_time": "0:58:53", "remaining_time": "0:19:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 112, "total_steps": 148, "loss": 0.4996, "learning_rate": 2.3784635822138424e-06, "epoch": 0.7548441449031171, "percentage": 75.68, "elapsed_time": "0:59:19", "remaining_time": "0:19:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 112, "total_steps": 148, "eval_loss": 0.5012484788894653, "epoch": 0.7548441449031171, "percentage": 75.68, "elapsed_time": "1:01:16", "remaining_time": "0:19:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 113, "total_steps": 148, "loss": 0.5014, "learning_rate": 2.2590269777764516e-06, "epoch": 0.7615838247683235, "percentage": 76.35, "elapsed_time": "1:01:40", "remaining_time": "0:19:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 114, "total_steps": 148, "loss": 0.5009, "learning_rate": 2.141785853707607e-06, "epoch": 0.7683235046335299, "percentage": 77.03, "elapsed_time": "1:02:04", "remaining_time": "0:18:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 115, "total_steps": 148, "loss": 0.5002, "learning_rate": 2.0268341184785674e-06, "epoch": 0.7750631844987363, "percentage": 77.7, "elapsed_time": "1:02:33", "remaining_time": "0:17:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 116, "total_steps": 148, "loss": 0.4998, "learning_rate": 1.9142638467927254e-06, "epoch": 0.7818028643639428, "percentage": 78.38, "elapsed_time": "1:03:00", "remaining_time": "0:17:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 117, "total_steps": 148, "loss": 0.5042, "learning_rate": 1.8041652058350768e-06, "epoch": 0.7885425442291492, "percentage": 79.05, "elapsed_time": "1:03:26", "remaining_time": "0:16:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 118, "total_steps": 148, "loss": 0.5008, "learning_rate": 1.6966263830495939e-06, "epoch": 0.7952822240943556, "percentage": 79.73, "elapsed_time": "1:03:49", "remaining_time": "0:16:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 119, "total_steps": 148, "loss": 0.5, "learning_rate": 1.5917335155023368e-06, "epoch": 0.802021903959562, "percentage": 80.41, "elapsed_time": "1:04:16", "remaining_time": "0:15:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 148, "loss": 0.4998, "learning_rate": 1.4895706208868876e-06, "epoch": 0.8087615838247684, "percentage": 81.08, "elapsed_time": "1:04:41", "remaining_time": "0:15:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 121, "total_steps": 148, "loss": 0.5009, "learning_rate": 1.390219530227378e-06, "epoch": 0.8155012636899748, "percentage": 81.76, "elapsed_time": "1:05:07", "remaining_time": "0:14:31", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 122, "total_steps": 148, "loss": 0.5004, "learning_rate": 1.2937598223330006e-06, "epoch": 0.8222409435551812, "percentage": 82.43, "elapsed_time": "1:05:32", "remaining_time": "0:13:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 123, "total_steps": 148, "loss": 0.5006, "learning_rate": 1.2002687600565138e-06, "epoch": 0.8289806234203876, "percentage": 83.11, "elapsed_time": "1:05:59", "remaining_time": "0:13:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 124, "total_steps": 148, "loss": 0.5013, "learning_rate": 1.1098212284078037e-06, "epoch": 0.8357203032855939, "percentage": 83.78, "elapsed_time": "1:06:24", "remaining_time": "0:12:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 125, "total_steps": 148, "loss": 0.4999, "learning_rate": 1.0224896745720513e-06, "epoch": 0.8424599831508003, "percentage": 84.46, "elapsed_time": "1:06:51", "remaining_time": "0:12:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 126, "total_steps": 148, "loss": 0.4997, "learning_rate": 9.383440498805712e-07, "epoch": 0.8491996630160067, "percentage": 85.14, "elapsed_time": "1:07:16", "remaining_time": "0:11:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 127, "total_steps": 148, "loss": 0.4999, "learning_rate": 8.574517537807897e-07, "epoch": 0.8559393428812131, "percentage": 85.81, "elapsed_time": "1:07:41", "remaining_time": "0:11:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 128, "total_steps": 148, "loss": 0.5077, "learning_rate": 7.798775798502484e-07, "epoch": 0.8626790227464195, "percentage": 86.49, "elapsed_time": "1:08:07", "remaining_time": "0:10:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 128, "total_steps": 148, "eval_loss": 0.5004793405532837, "epoch": 0.8626790227464195, "percentage": 86.49, "elapsed_time": "1:10:04", "remaining_time": "0:10:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 129, "total_steps": 148, "loss": 0.5014, "learning_rate": 7.056836638978698e-07, "epoch": 0.8694187026116259, "percentage": 87.16, "elapsed_time": "1:10:33", "remaining_time": "0:10:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 148, "loss": 0.5008, "learning_rate": 6.349294341940593e-07, "epoch": 0.8761583824768323, "percentage": 87.84, "elapsed_time": "1:10:59", "remaining_time": "0:09:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 131, "total_steps": 148, "loss": 0.5006, "learning_rate": 5.676715638695063e-07, "epoch": 0.8828980623420387, "percentage": 88.51, "elapsed_time": "1:11:23", "remaining_time": "0:09:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 132, "total_steps": 148, "loss": 0.5004, "learning_rate": 5.039639255208156e-07, "epoch": 0.8896377422072451, "percentage": 89.19, "elapsed_time": "1:11:46", "remaining_time": "0:08:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 133, "total_steps": 148, "loss": 0.5009, "learning_rate": 4.43857548059321e-07, "epoch": 0.8963774220724515, "percentage": 89.86, "elapsed_time": "1:12:11", "remaining_time": "0:08:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 134, "total_steps": 148, "loss": 0.5005, "learning_rate": 3.87400575837657e-07, "epoch": 0.9031171019376579, "percentage": 90.54, "elapsed_time": "1:12:35", "remaining_time": "0:07:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 135, "total_steps": 148, "loss": 0.501, "learning_rate": 3.346382300868134e-07, "epoch": 0.9098567818028643, "percentage": 91.22, "elapsed_time": "1:12:59", "remaining_time": "0:07:01", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 136, "total_steps": 148, "loss": 0.5, "learning_rate": 2.85612772694579e-07, "epoch": 0.9165964616680707, "percentage": 91.89, "elapsed_time": "1:13:24", "remaining_time": "0:06:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 137, "total_steps": 148, "loss": 0.5028, "learning_rate": 2.403634723543674e-07, "epoch": 0.9233361415332771, "percentage": 92.57, "elapsed_time": "1:13:48", "remaining_time": "0:05:55", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 138, "total_steps": 148, "loss": 0.5002, "learning_rate": 1.989265731115525e-07, "epoch": 0.9300758213984835, "percentage": 93.24, "elapsed_time": "1:14:11", "remaining_time": "0:05:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 139, "total_steps": 148, "loss": 0.4988, "learning_rate": 1.6133526533250566e-07, "epoch": 0.93681550126369, "percentage": 93.92, "elapsed_time": "1:14:36", "remaining_time": "0:04:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 148, "loss": 0.5017, "learning_rate": 1.2761965911958385e-07, "epoch": 0.9435551811288964, "percentage": 94.59, "elapsed_time": "1:15:02", "remaining_time": "0:04:17", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 141, "total_steps": 148, "loss": 0.4998, "learning_rate": 9.780676019336632e-08, "epoch": 0.9502948609941028, "percentage": 95.27, "elapsed_time": "1:15:27", "remaining_time": "0:03:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 142, "total_steps": 148, "loss": 0.4999, "learning_rate": 7.192044826145772e-08, "epoch": 0.9570345408593092, "percentage": 95.95, "elapsed_time": "1:15:53", "remaining_time": "0:03:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 143, "total_steps": 148, "loss": 0.5005, "learning_rate": 4.998145789118114e-08, "epoch": 0.9637742207245156, "percentage": 96.62, "elapsed_time": "1:16:20", "remaining_time": "0:02:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 144, "total_steps": 148, "loss": 0.5012, "learning_rate": 3.2007361901485455e-08, "epoch": 0.970513900589722, "percentage": 97.3, "elapsed_time": "1:16:47", "remaining_time": "0:02:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 144, "total_steps": 148, "eval_loss": 0.5004004240036011, "epoch": 0.970513900589722, "percentage": 97.3, "elapsed_time": "1:18:44", "remaining_time": "0:02:11", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 145, "total_steps": 148, "loss": 0.4988, "learning_rate": 1.8012557287367394e-08, "epoch": 0.9772535804549284, "percentage": 97.97, "elapsed_time": "1:19:11", "remaining_time": "0:01:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 146, "total_steps": 148, "loss": 0.5013, "learning_rate": 8.008253688084888e-09, "epoch": 0.9839932603201348, "percentage": 98.65, "elapsed_time": "1:19:36", "remaining_time": "0:01:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 147, "total_steps": 148, "loss": 0.5021, "learning_rate": 2.002464408392135e-09, "epoch": 0.9907329401853412, "percentage": 99.32, "elapsed_time": "1:20:02", "remaining_time": "0:00:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 148, "total_steps": 148, "loss": 0.5016, "learning_rate": 0.0, "epoch": 0.9974726200505476, "percentage": 100.0, "elapsed_time": "1:20:29", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 148, "total_steps": 148, "epoch": 0.9974726200505476, "percentage": 100.0, "elapsed_time": "1:20:33", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}