llama3-1_8b_math_50000_samples / trainer_log.jsonl
gsmyrnis's picture
Training in progress, epoch 1
8768aab verified
{"current_steps": 10, "total_steps": 126, "loss": 0.7676, "lr": 5e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:00:59", "remaining_time": "0:11:28"}
{"current_steps": 20, "total_steps": 126, "loss": 0.6743, "lr": 5e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:01:57", "remaining_time": "0:10:21"}
{"current_steps": 30, "total_steps": 126, "loss": 0.6533, "lr": 5e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:02:55", "remaining_time": "0:09:20"}
{"current_steps": 40, "total_steps": 126, "loss": 0.6367, "lr": 5e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "0:03:53", "remaining_time": "0:08:21"}
{"current_steps": 42, "total_steps": 126, "eval_loss": 0.6265256404876709, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:04:08", "remaining_time": "0:08:17"}
{"current_steps": 50, "total_steps": 126, "loss": 0.6056, "lr": 5e-06, "epoch": 1.1904761904761905, "percentage": 39.68, "elapsed_time": "0:06:25", "remaining_time": "0:09:46"}
{"current_steps": 60, "total_steps": 126, "loss": 0.5894, "lr": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:07:23", "remaining_time": "0:08:08"}
{"current_steps": 70, "total_steps": 126, "loss": 0.588, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:08:21", "remaining_time": "0:06:41"}
{"current_steps": 80, "total_steps": 126, "loss": 0.5825, "lr": 5e-06, "epoch": 1.9047619047619047, "percentage": 63.49, "elapsed_time": "0:09:19", "remaining_time": "0:05:21"}
{"current_steps": 84, "total_steps": 126, "eval_loss": 0.6075287461280823, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:09:47", "remaining_time": "0:04:53"}
{"current_steps": 90, "total_steps": 126, "loss": 0.5563, "lr": 5e-06, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "0:11:49", "remaining_time": "0:04:43"}
{"current_steps": 100, "total_steps": 126, "loss": 0.5354, "lr": 5e-06, "epoch": 2.380952380952381, "percentage": 79.37, "elapsed_time": "0:12:47", "remaining_time": "0:03:19"}
{"current_steps": 110, "total_steps": 126, "loss": 0.5417, "lr": 5e-06, "epoch": 2.619047619047619, "percentage": 87.3, "elapsed_time": "0:13:45", "remaining_time": "0:02:00"}
{"current_steps": 120, "total_steps": 126, "loss": 0.5386, "lr": 5e-06, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "0:14:43", "remaining_time": "0:00:44"}
{"current_steps": 126, "total_steps": 126, "eval_loss": 0.6146200895309448, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:16:47", "remaining_time": "0:00:00"}
{"current_steps": 126, "total_steps": 126, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:18:12", "remaining_time": "0:00:00"}