qwen_odia_0.5b_pt / trainer_log.jsonl
sam2ai's picture
Upload folder using huggingface_hub
91acf6c verified
{"current_steps": 10, "total_steps": 336, "loss": 1.3387, "learning_rate": 2.9411764705882354e-05, "epoch": 0.08888888888888889, "percentage": 2.98, "elapsed_time": "0:00:27", "remaining_time": "0:14:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 336, "loss": 1.4468, "learning_rate": 5.882352941176471e-05, "epoch": 0.17777777777777778, "percentage": 5.95, "elapsed_time": "0:00:50", "remaining_time": "0:13:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 336, "loss": 1.0612, "learning_rate": 8.823529411764706e-05, "epoch": 0.26666666666666666, "percentage": 8.93, "elapsed_time": "0:01:10", "remaining_time": "0:12:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 336, "loss": 1.0064, "learning_rate": 9.990263847374976e-05, "epoch": 0.35555555555555557, "percentage": 11.9, "elapsed_time": "0:01:30", "remaining_time": "0:11:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 336, "loss": 0.9215, "learning_rate": 9.930902394260747e-05, "epoch": 0.4444444444444444, "percentage": 14.88, "elapsed_time": "0:01:52", "remaining_time": "0:10:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 336, "loss": 1.2268, "learning_rate": 9.818229479678158e-05, "epoch": 0.5333333333333333, "percentage": 17.86, "elapsed_time": "0:02:12", "remaining_time": "0:10:09", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 336, "loss": 0.8976, "learning_rate": 9.653463289927411e-05, "epoch": 0.6222222222222222, "percentage": 20.83, "elapsed_time": "0:02:33", "remaining_time": "0:09:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 336, "loss": 0.8541, "learning_rate": 9.438385228425938e-05, "epoch": 0.7111111111111111, "percentage": 23.81, "elapsed_time": "0:02:51", "remaining_time": "0:09:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 336, "loss": 0.8365, "learning_rate": 9.175320655700406e-05, "epoch": 0.8, "percentage": 26.79, "elapsed_time": "0:03:10", "remaining_time": "0:08:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 336, "loss": 0.8059, "learning_rate": 8.86711374827494e-05, "epoch": 0.8888888888888888, "percentage": 29.76, "elapsed_time": "0:03:32", "remaining_time": "0:08:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 336, "loss": 0.8329, "learning_rate": 8.517096748273951e-05, "epoch": 0.9777777777777777, "percentage": 32.74, "elapsed_time": "0:03:52", "remaining_time": "0:07:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 336, "loss": 0.835, "learning_rate": 8.129053936203687e-05, "epoch": 1.0666666666666667, "percentage": 35.71, "elapsed_time": "0:04:15", "remaining_time": "0:07:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 336, "loss": 0.8222, "learning_rate": 7.707180716428237e-05, "epoch": 1.1555555555555554, "percentage": 38.69, "elapsed_time": "0:04:32", "remaining_time": "0:07:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 336, "loss": 0.7214, "learning_rate": 7.256038257695687e-05, "epoch": 1.2444444444444445, "percentage": 41.67, "elapsed_time": "0:04:52", "remaining_time": "0:06:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 336, "loss": 0.7347, "learning_rate": 6.780504179127734e-05, "epoch": 1.3333333333333333, "percentage": 44.64, "elapsed_time": "0:05:13", "remaining_time": "0:06:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 336, "loss": 0.7399, "learning_rate": 6.28571981484123e-05, "epoch": 1.4222222222222223, "percentage": 47.62, "elapsed_time": "0:05:38", "remaining_time": "0:06:12", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 336, "loss": 0.6807, "learning_rate": 5.7770346273610254e-05, "epoch": 1.511111111111111, "percentage": 50.6, "elapsed_time": "0:05:58", "remaining_time": "0:05:49", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 336, "loss": 0.7299, "learning_rate": 5.2599483708099016e-05, "epoch": 1.6, "percentage": 53.57, "elapsed_time": "0:06:18", "remaining_time": "0:05:28", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 336, "loss": 0.6873, "learning_rate": 4.740051629190099e-05, "epoch": 1.6888888888888889, "percentage": 56.55, "elapsed_time": "0:06:38", "remaining_time": "0:05:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 336, "loss": 0.7299, "learning_rate": 4.2229653726389765e-05, "epoch": 1.7777777777777777, "percentage": 59.52, "elapsed_time": "0:06:58", "remaining_time": "0:04:44", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 336, "loss": 0.6641, "learning_rate": 3.714280185158771e-05, "epoch": 1.8666666666666667, "percentage": 62.5, "elapsed_time": "0:07:24", "remaining_time": "0:04:26", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 220, "total_steps": 336, "loss": 0.6632, "learning_rate": 3.219495820872265e-05, "epoch": 1.9555555555555557, "percentage": 65.48, "elapsed_time": "0:07:55", "remaining_time": "0:04:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 230, "total_steps": 336, "loss": 0.6072, "learning_rate": 2.7439617423043145e-05, "epoch": 2.0444444444444443, "percentage": 68.45, "elapsed_time": "0:08:24", "remaining_time": "0:03:52", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 240, "total_steps": 336, "loss": 0.6008, "learning_rate": 2.2928192835717644e-05, "epoch": 2.1333333333333333, "percentage": 71.43, "elapsed_time": "0:08:58", "remaining_time": "0:03:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 250, "total_steps": 336, "loss": 0.5797, "learning_rate": 1.8709460637963123e-05, "epoch": 2.2222222222222223, "percentage": 74.4, "elapsed_time": "0:09:29", "remaining_time": "0:03:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 260, "total_steps": 336, "loss": 0.5873, "learning_rate": 1.4829032517260489e-05, "epoch": 2.311111111111111, "percentage": 77.38, "elapsed_time": "0:10:05", "remaining_time": "0:02:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 270, "total_steps": 336, "loss": 0.6224, "learning_rate": 1.132886251725061e-05, "epoch": 2.4, "percentage": 80.36, "elapsed_time": "0:10:38", "remaining_time": "0:02:35", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 280, "total_steps": 336, "loss": 0.547, "learning_rate": 8.246793442995954e-06, "epoch": 2.488888888888889, "percentage": 83.33, "elapsed_time": "0:11:11", "remaining_time": "0:02:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 290, "total_steps": 336, "loss": 0.5523, "learning_rate": 5.616147715740611e-06, "epoch": 2.5777777777777775, "percentage": 86.31, "elapsed_time": "0:11:44", "remaining_time": "0:01:51", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 300, "total_steps": 336, "loss": 0.557, "learning_rate": 3.465367100725908e-06, "epoch": 2.6666666666666665, "percentage": 89.29, "elapsed_time": "0:12:11", "remaining_time": "0:01:27", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 310, "total_steps": 336, "loss": 0.5292, "learning_rate": 1.8177052032184283e-06, "epoch": 2.7555555555555555, "percentage": 92.26, "elapsed_time": "0:12:38", "remaining_time": "0:01:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 320, "total_steps": 336, "loss": 0.5643, "learning_rate": 6.909760573925561e-07, "epoch": 2.8444444444444446, "percentage": 95.24, "elapsed_time": "0:12:58", "remaining_time": "0:00:38", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 330, "total_steps": 336, "loss": 0.5762, "learning_rate": 9.73615262502503e-08, "epoch": 2.9333333333333336, "percentage": 98.21, "elapsed_time": "0:13:16", "remaining_time": "0:00:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 336, "total_steps": 336, "epoch": 2.986666666666667, "percentage": 100.0, "elapsed_time": "0:13:30", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}