llama3-platypus / trainer_log.jsonl
2nji's picture
Initial model commit
c2aa2db
{"current_steps": 10, "total_steps": 336, "loss": 0.913, "learning_rate": 2.9411764705882354e-05, "epoch": 0.08888888888888889, "percentage": 2.98, "elapsed_time": "0:00:16", "remaining_time": "0:09:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 336, "loss": 0.8774, "learning_rate": 5.882352941176471e-05, "epoch": 0.17777777777777778, "percentage": 5.95, "elapsed_time": "0:00:30", "remaining_time": "0:08:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 336, "loss": 0.7954, "learning_rate": 8.823529411764706e-05, "epoch": 0.26666666666666666, "percentage": 8.93, "elapsed_time": "0:00:44", "remaining_time": "0:07:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 336, "loss": 0.7382, "learning_rate": 9.990263847374976e-05, "epoch": 0.35555555555555557, "percentage": 11.9, "elapsed_time": "0:00:58", "remaining_time": "0:07:13", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 336, "loss": 0.7335, "learning_rate": 9.930902394260747e-05, "epoch": 0.4444444444444444, "percentage": 14.88, "elapsed_time": "0:01:12", "remaining_time": "0:06:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 336, "loss": 0.7586, "learning_rate": 9.818229479678158e-05, "epoch": 0.5333333333333333, "percentage": 17.86, "elapsed_time": "0:01:26", "remaining_time": "0:06:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 336, "loss": 0.7147, "learning_rate": 9.653463289927411e-05, "epoch": 0.6222222222222222, "percentage": 20.83, "elapsed_time": "0:01:40", "remaining_time": "0:06:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 336, "loss": 0.7583, "learning_rate": 9.438385228425938e-05, "epoch": 0.7111111111111111, "percentage": 23.81, "elapsed_time": "0:01:54", "remaining_time": "0:06:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 336, "loss": 0.7012, "learning_rate": 9.175320655700406e-05, "epoch": 0.8, "percentage": 26.79, "elapsed_time": "0:02:08", "remaining_time": "0:05:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 336, "loss": 0.7226, "learning_rate": 8.86711374827494e-05, "epoch": 0.8888888888888888, "percentage": 29.76, "elapsed_time": "0:02:21", "remaining_time": "0:05:34", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 336, "loss": 0.75, "learning_rate": 8.517096748273951e-05, "epoch": 0.9777777777777777, "percentage": 32.74, "elapsed_time": "0:02:35", "remaining_time": "0:05:20", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 336, "loss": 0.6529, "learning_rate": 8.129053936203687e-05, "epoch": 1.0666666666666667, "percentage": 35.71, "elapsed_time": "0:02:49", "remaining_time": "0:05:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 336, "loss": 0.6358, "learning_rate": 7.707180716428237e-05, "epoch": 1.1555555555555554, "percentage": 38.69, "elapsed_time": "0:03:03", "remaining_time": "0:04:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 336, "loss": 0.6568, "learning_rate": 7.256038257695687e-05, "epoch": 1.2444444444444445, "percentage": 41.67, "elapsed_time": "0:03:17", "remaining_time": "0:04:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 336, "loss": 0.6376, "learning_rate": 6.780504179127734e-05, "epoch": 1.3333333333333333, "percentage": 44.64, "elapsed_time": "0:03:31", "remaining_time": "0:04:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 336, "loss": 0.6286, "learning_rate": 6.28571981484123e-05, "epoch": 1.4222222222222223, "percentage": 47.62, "elapsed_time": "0:03:45", "remaining_time": "0:04:07", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 336, "loss": 0.6114, "learning_rate": 5.7770346273610254e-05, "epoch": 1.511111111111111, "percentage": 50.6, "elapsed_time": "0:03:58", "remaining_time": "0:03:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 336, "loss": 0.6427, "learning_rate": 5.2599483708099016e-05, "epoch": 1.6, "percentage": 53.57, "elapsed_time": "0:04:12", "remaining_time": "0:03:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 336, "loss": 0.6302, "learning_rate": 4.740051629190099e-05, "epoch": 1.6888888888888889, "percentage": 56.55, "elapsed_time": "0:04:26", "remaining_time": "0:03:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 336, "loss": 0.6985, "learning_rate": 4.2229653726389765e-05, "epoch": 1.7777777777777777, "percentage": 59.52, "elapsed_time": "0:04:40", "remaining_time": "0:03:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 336, "loss": 0.5733, "learning_rate": 3.714280185158771e-05, "epoch": 1.8666666666666667, "percentage": 62.5, "elapsed_time": "0:04:54", "remaining_time": "0:02:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 336, "loss": 0.631, "learning_rate": 3.219495820872265e-05, "epoch": 1.9555555555555557, "percentage": 65.48, "elapsed_time": "0:05:08", "remaining_time": "0:02:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 230, "total_steps": 336, "loss": 0.5628, "learning_rate": 2.7439617423043145e-05, "epoch": 2.0444444444444443, "percentage": 68.45, "elapsed_time": "0:05:22", "remaining_time": "0:02:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 336, "loss": 0.4699, "learning_rate": 2.2928192835717644e-05, "epoch": 2.1333333333333333, "percentage": 71.43, "elapsed_time": "0:05:36", "remaining_time": "0:02:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 250, "total_steps": 336, "loss": 0.5786, "learning_rate": 1.8709460637963123e-05, "epoch": 2.2222222222222223, "percentage": 74.4, "elapsed_time": "0:05:50", "remaining_time": "0:02:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 336, "loss": 0.4809, "learning_rate": 1.4829032517260489e-05, "epoch": 2.311111111111111, "percentage": 77.38, "elapsed_time": "0:06:04", "remaining_time": "0:01:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 270, "total_steps": 336, "loss": 0.4812, "learning_rate": 1.132886251725061e-05, "epoch": 2.4, "percentage": 80.36, "elapsed_time": "0:06:18", "remaining_time": "0:01:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 336, "loss": 0.4922, "learning_rate": 8.246793442995954e-06, "epoch": 2.488888888888889, "percentage": 83.33, "elapsed_time": "0:06:31", "remaining_time": "0:01:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 290, "total_steps": 336, "loss": 0.5673, "learning_rate": 5.616147715740611e-06, "epoch": 2.5777777777777775, "percentage": 86.31, "elapsed_time": "0:06:45", "remaining_time": "0:01:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 336, "loss": 0.4935, "learning_rate": 3.465367100725908e-06, "epoch": 2.6666666666666665, "percentage": 89.29, "elapsed_time": "0:06:59", "remaining_time": "0:00:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 310, "total_steps": 336, "loss": 0.4809, "learning_rate": 1.8177052032184283e-06, "epoch": 2.7555555555555555, "percentage": 92.26, "elapsed_time": "0:07:13", "remaining_time": "0:00:36", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 336, "loss": 0.5241, "learning_rate": 6.909760573925561e-07, "epoch": 2.8444444444444446, "percentage": 95.24, "elapsed_time": "0:07:27", "remaining_time": "0:00:22", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 330, "total_steps": 336, "loss": 0.4713, "learning_rate": 9.73615262502503e-08, "epoch": 2.9333333333333336, "percentage": 98.21, "elapsed_time": "0:07:41", "remaining_time": "0:00:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 336, "total_steps": 336, "epoch": 2.986666666666667, "percentage": 100.0, "elapsed_time": "0:07:51", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}