Training in progress, step 50
Browse files- adapter_config.json +1 -1
- adapter_model.safetensors +1 -1
- tokenizer_config.json +0 -1
- trainer_log.jsonl +11 -77
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -19,7 +19,7 @@
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
-
"target_modules": "^(?!.*patch_embed).*(?:
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
|
|
19 |
"r": 8,
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
+
"target_modules": "^(?!.*patch_embed).*(?:k_proj|gate_proj|up_proj|fc2|v_proj|down_proj|proj|fc1|qkv|q_proj|o_proj).*",
|
23 |
"task_type": "CAUSAL_LM",
|
24 |
"use_dora": false,
|
25 |
"use_rslora": false
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72a1dc75a4969cd4402c49224cdbeb8259d9c8645a8db3aac5f2374a463b3078
|
3 |
size 29034840
|
tokenizer_config.json
CHANGED
@@ -137,7 +137,6 @@
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
140 |
-
"processor_class": "Qwen2VLProcessor",
|
141 |
"split_special_tokens": false,
|
142 |
"tokenizer_class": "Qwen2Tokenizer",
|
143 |
"unk_token": null
|
|
|
137 |
"model_max_length": 32768,
|
138 |
"pad_token": "<|endoftext|>",
|
139 |
"padding_side": "right",
|
|
|
140 |
"split_special_tokens": false,
|
141 |
"tokenizer_class": "Qwen2Tokenizer",
|
142 |
"unk_token": null
|
trainer_log.jsonl
CHANGED
@@ -1,77 +1,11 @@
|
|
1 |
-
{"current_steps": 5, "total_steps":
|
2 |
-
{"current_steps": 10, "total_steps":
|
3 |
-
{"current_steps": 15, "total_steps":
|
4 |
-
{"current_steps": 20, "total_steps":
|
5 |
-
{"current_steps": 25, "total_steps":
|
6 |
-
{"current_steps": 30, "total_steps":
|
7 |
-
{"current_steps": 35, "total_steps":
|
8 |
-
{"current_steps": 40, "total_steps":
|
9 |
-
{"current_steps": 45, "total_steps":
|
10 |
-
{"current_steps": 50, "total_steps":
|
11 |
-
{"current_steps": 50, "total_steps":
|
12 |
-
{"current_steps": 55, "total_steps": 720, "loss": 0.9113, "lr": 7.638888888888889e-05, "epoch": 0.1527777777777778, "percentage": 7.64, "elapsed_time": "0:24:09", "remaining_time": "4:52:11", "throughput": 591.62, "total_tokens": 857840}
|
13 |
-
{"current_steps": 60, "total_steps": 720, "loss": 0.8999, "lr": 8.333333333333334e-05, "epoch": 0.16666666666666666, "percentage": 8.33, "elapsed_time": "0:26:13", "remaining_time": "4:48:31", "throughput": 594.67, "total_tokens": 935880}
|
14 |
-
{"current_steps": 65, "total_steps": 720, "loss": 0.9027, "lr": 9.027777777777779e-05, "epoch": 0.18055555555555555, "percentage": 9.03, "elapsed_time": "0:28:15", "remaining_time": "4:44:47", "throughput": 597.88, "total_tokens": 1013824}
|
15 |
-
{"current_steps": 70, "total_steps": 720, "loss": 0.8994, "lr": 9.722222222222223e-05, "epoch": 0.19444444444444445, "percentage": 9.72, "elapsed_time": "0:30:18", "remaining_time": "4:41:27", "throughput": 600.35, "total_tokens": 1091808}
|
16 |
-
{"current_steps": 75, "total_steps": 720, "loss": 0.9049, "lr": 9.999471159635539e-05, "epoch": 0.20833333333333334, "percentage": 10.42, "elapsed_time": "0:32:18", "remaining_time": "4:37:53", "throughput": 603.34, "total_tokens": 1169744}
|
17 |
-
{"current_steps": 80, "total_steps": 720, "loss": 0.9054, "lr": 9.996239762521151e-05, "epoch": 0.2222222222222222, "percentage": 11.11, "elapsed_time": "0:34:19", "remaining_time": "4:34:32", "throughput": 606.01, "total_tokens": 1247784}
|
18 |
-
{"current_steps": 85, "total_steps": 720, "loss": 0.8997, "lr": 9.990072664903717e-05, "epoch": 0.2361111111111111, "percentage": 11.81, "elapsed_time": "0:36:19", "remaining_time": "4:31:20", "throughput": 608.31, "total_tokens": 1325712}
|
19 |
-
{"current_steps": 90, "total_steps": 720, "loss": 0.9019, "lr": 9.980973490458728e-05, "epoch": 0.25, "percentage": 12.5, "elapsed_time": "0:38:18", "remaining_time": "4:28:11", "throughput": 610.64, "total_tokens": 1403680}
|
20 |
-
{"current_steps": 95, "total_steps": 720, "loss": 0.9014, "lr": 9.968947585697214e-05, "epoch": 0.2638888888888889, "percentage": 13.19, "elapsed_time": "0:40:18", "remaining_time": "4:25:10", "throughput": 612.64, "total_tokens": 1481584}
|
21 |
-
{"current_steps": 100, "total_steps": 720, "loss": 0.9068, "lr": 9.954002016824227e-05, "epoch": 0.2777777777777778, "percentage": 13.89, "elapsed_time": "0:42:14", "remaining_time": "4:21:51", "throughput": 615.42, "total_tokens": 1559560}
|
22 |
-
{"current_steps": 100, "total_steps": 720, "eval_loss": 0.8995540738105774, "epoch": 0.2777777777777778, "percentage": 13.89, "elapsed_time": "0:43:01", "remaining_time": "4:26:43", "throughput": 604.19, "total_tokens": 1559560}
|
23 |
-
{"current_steps": 105, "total_steps": 720, "loss": 0.9044, "lr": 9.936145565586871e-05, "epoch": 0.2916666666666667, "percentage": 14.58, "elapsed_time": "0:45:04", "remaining_time": "4:23:58", "throughput": 605.55, "total_tokens": 1637512}
|
24 |
-
{"current_steps": 110, "total_steps": 720, "loss": 0.8938, "lr": 9.915388724114301e-05, "epoch": 0.3055555555555556, "percentage": 15.28, "elapsed_time": "0:47:02", "remaining_time": "4:20:51", "throughput": 607.83, "total_tokens": 1715496}
|
25 |
-
{"current_steps": 115, "total_steps": 720, "loss": 0.8986, "lr": 9.891743688752738e-05, "epoch": 0.3194444444444444, "percentage": 15.97, "elapsed_time": "0:48:57", "remaining_time": "4:17:35", "throughput": 610.51, "total_tokens": 1793520}
|
26 |
-
{"current_steps": 120, "total_steps": 720, "loss": 0.8954, "lr": 9.865224352899119e-05, "epoch": 0.3333333333333333, "percentage": 16.67, "elapsed_time": "0:50:55", "remaining_time": "4:14:36", "throughput": 612.55, "total_tokens": 1871496}
|
27 |
-
{"current_steps": 125, "total_steps": 720, "loss": 0.9014, "lr": 9.835846298837584e-05, "epoch": 0.3472222222222222, "percentage": 17.36, "elapsed_time": "0:52:52", "remaining_time": "4:11:40", "throughput": 614.52, "total_tokens": 1949496}
|
28 |
-
{"current_steps": 130, "total_steps": 720, "loss": 0.9069, "lr": 9.803626788583603e-05, "epoch": 0.3611111111111111, "percentage": 18.06, "elapsed_time": "0:54:47", "remaining_time": "4:08:41", "throughput": 616.66, "total_tokens": 2027472}
|
29 |
-
{"current_steps": 135, "total_steps": 720, "loss": 0.9036, "lr": 9.768584753741134e-05, "epoch": 0.375, "percentage": 18.75, "elapsed_time": "0:56:44", "remaining_time": "4:05:53", "throughput": 618.42, "total_tokens": 2105488}
|
30 |
-
{"current_steps": 140, "total_steps": 720, "loss": 0.8974, "lr": 9.730740784378753e-05, "epoch": 0.3888888888888889, "percentage": 19.44, "elapsed_time": "0:58:40", "remaining_time": "4:03:04", "throughput": 620.23, "total_tokens": 2183488}
|
31 |
-
{"current_steps": 145, "total_steps": 720, "loss": 0.8999, "lr": 9.69011711693129e-05, "epoch": 0.4027777777777778, "percentage": 20.14, "elapsed_time": "1:00:35", "remaining_time": "4:00:18", "throughput": 621.99, "total_tokens": 2261472}
|
32 |
-
{"current_steps": 150, "total_steps": 720, "loss": 0.908, "lr": 9.646737621134112e-05, "epoch": 0.4166666666666667, "percentage": 20.83, "elapsed_time": "1:02:32", "remaining_time": "3:57:39", "throughput": 623.44, "total_tokens": 2339512}
|
33 |
-
{"current_steps": 150, "total_steps": 720, "eval_loss": 0.8949049115180969, "epoch": 0.4166666666666667, "percentage": 20.83, "elapsed_time": "1:03:19", "remaining_time": "4:00:37", "throughput": 615.77, "total_tokens": 2339512}
|
34 |
-
{"current_steps": 155, "total_steps": 720, "loss": 0.8971, "lr": 9.600627785997696e-05, "epoch": 0.4305555555555556, "percentage": 21.53, "elapsed_time": "1:05:20", "remaining_time": "3:58:11", "throughput": 616.6, "total_tokens": 2417488}
|
35 |
-
{"current_steps": 160, "total_steps": 720, "loss": 0.8952, "lr": 9.551814704830734e-05, "epoch": 0.4444444444444444, "percentage": 22.22, "elapsed_time": "1:07:17", "remaining_time": "3:55:31", "throughput": 618.07, "total_tokens": 2495464}
|
36 |
-
{"current_steps": 165, "total_steps": 720, "loss": 0.8972, "lr": 9.500327059320606e-05, "epoch": 0.4583333333333333, "percentage": 22.92, "elapsed_time": "1:09:13", "remaining_time": "3:52:51", "throughput": 619.54, "total_tokens": 2573408}
|
37 |
-
{"current_steps": 170, "total_steps": 720, "loss": 0.8979, "lr": 9.446195102680531e-05, "epoch": 0.4722222222222222, "percentage": 23.61, "elapsed_time": "1:11:08", "remaining_time": "3:50:10", "throughput": 621.1, "total_tokens": 2651368}
|
38 |
-
{"current_steps": 175, "total_steps": 720, "loss": 0.8908, "lr": 9.389450641873323e-05, "epoch": 0.4861111111111111, "percentage": 24.31, "elapsed_time": "1:13:04", "remaining_time": "3:47:35", "throughput": 622.48, "total_tokens": 2729352}
|
39 |
-
{"current_steps": 180, "total_steps": 720, "loss": 0.8997, "lr": 9.330127018922194e-05, "epoch": 0.5, "percentage": 25.0, "elapsed_time": "1:14:59", "remaining_time": "3:44:59", "throughput": 623.87, "total_tokens": 2807320}
|
40 |
-
{"current_steps": 185, "total_steps": 720, "loss": 0.9024, "lr": 9.268259091319582e-05, "epoch": 0.5138888888888888, "percentage": 25.69, "elapsed_time": "1:16:55", "remaining_time": "3:42:27", "throughput": 625.13, "total_tokens": 2885368}
|
41 |
-
{"current_steps": 190, "total_steps": 720, "loss": 0.9002, "lr": 9.203883211545517e-05, "epoch": 0.5277777777777778, "percentage": 26.39, "elapsed_time": "1:18:51", "remaining_time": "3:39:58", "throughput": 626.3, "total_tokens": 2963376}
|
42 |
-
{"current_steps": 195, "total_steps": 720, "loss": 0.9, "lr": 9.137037205707552e-05, "epoch": 0.5416666666666666, "percentage": 27.08, "elapsed_time": "1:20:46", "remaining_time": "3:37:28", "throughput": 627.55, "total_tokens": 3041376}
|
43 |
-
{"current_steps": 200, "total_steps": 720, "loss": 0.8989, "lr": 9.067760351314838e-05, "epoch": 0.5555555555555556, "percentage": 27.78, "elapsed_time": "1:22:42", "remaining_time": "3:35:02", "throughput": 628.55, "total_tokens": 3119304}
|
44 |
-
{"current_steps": 200, "total_steps": 720, "eval_loss": 0.8992709517478943, "epoch": 0.5555555555555556, "percentage": 27.78, "elapsed_time": "1:23:29", "remaining_time": "3:37:04", "throughput": 622.7, "total_tokens": 3119304}
|
45 |
-
{"current_steps": 205, "total_steps": 720, "loss": 0.8994, "lr": 8.996093354199349e-05, "epoch": 0.5694444444444444, "percentage": 28.47, "elapsed_time": "1:25:31", "remaining_time": "3:34:51", "throughput": 623.08, "total_tokens": 3197320}
|
46 |
-
{"current_steps": 210, "total_steps": 720, "loss": 0.9036, "lr": 8.922078324597879e-05, "epoch": 0.5833333333333334, "percentage": 29.17, "elapsed_time": "1:27:26", "remaining_time": "3:32:20", "throughput": 624.32, "total_tokens": 3275288}
|
47 |
-
{"current_steps": 215, "total_steps": 720, "loss": 0.8988, "lr": 8.845758752408826e-05, "epoch": 0.5972222222222222, "percentage": 29.86, "elapsed_time": "1:29:22", "remaining_time": "3:29:54", "throughput": 625.36, "total_tokens": 3353240}
|
48 |
-
{"current_steps": 220, "total_steps": 720, "loss": 0.9015, "lr": 8.767179481638303e-05, "epoch": 0.6111111111111112, "percentage": 30.56, "elapsed_time": "1:31:17", "remaining_time": "3:27:27", "throughput": 626.48, "total_tokens": 3431248}
|
49 |
-
{"current_steps": 225, "total_steps": 720, "loss": 0.9047, "lr": 8.68638668405062e-05, "epoch": 0.625, "percentage": 31.25, "elapsed_time": "1:33:12", "remaining_time": "3:25:03", "throughput": 627.47, "total_tokens": 3509192}
|
50 |
-
{"current_steps": 230, "total_steps": 720, "loss": 0.901, "lr": 8.603427832038574e-05, "epoch": 0.6388888888888888, "percentage": 31.94, "elapsed_time": "1:35:07", "remaining_time": "3:22:39", "throughput": 628.5, "total_tokens": 3587152}
|
51 |
-
{"current_steps": 235, "total_steps": 720, "loss": 0.8898, "lr": 8.518351670729529e-05, "epoch": 0.6527777777777778, "percentage": 32.64, "elapsed_time": "1:37:00", "remaining_time": "3:20:12", "throughput": 629.71, "total_tokens": 3665128}
|
52 |
-
{"current_steps": 240, "total_steps": 720, "loss": 0.8896, "lr": 8.43120818934367e-05, "epoch": 0.6666666666666666, "percentage": 33.33, "elapsed_time": "1:38:55", "remaining_time": "3:17:50", "throughput": 630.68, "total_tokens": 3743128}
|
53 |
-
{"current_steps": 245, "total_steps": 720, "loss": 0.8993, "lr": 8.342048591821212e-05, "epoch": 0.6805555555555556, "percentage": 34.03, "elapsed_time": "1:40:49", "remaining_time": "3:15:28", "throughput": 631.62, "total_tokens": 3821088}
|
54 |
-
{"current_steps": 250, "total_steps": 720, "loss": 0.8907, "lr": 8.250925266735918e-05, "epoch": 0.6944444444444444, "percentage": 34.72, "elapsed_time": "1:42:42", "remaining_time": "3:13:05", "throughput": 632.69, "total_tokens": 3899064}
|
55 |
-
{"current_steps": 250, "total_steps": 720, "eval_loss": 0.8925368785858154, "epoch": 0.6944444444444444, "percentage": 34.72, "elapsed_time": "1:43:29", "remaining_time": "3:14:33", "throughput": 627.95, "total_tokens": 3899064}
|
56 |
-
{"current_steps": 255, "total_steps": 720, "loss": 0.878, "lr": 8.157891756512488e-05, "epoch": 0.7083333333333334, "percentage": 35.42, "elapsed_time": "1:45:29", "remaining_time": "3:12:22", "throughput": 628.33, "total_tokens": 3977048}
|
57 |
-
{"current_steps": 260, "total_steps": 720, "loss": 0.8881, "lr": 8.063002725966015e-05, "epoch": 0.7222222222222222, "percentage": 36.11, "elapsed_time": "1:47:23", "remaining_time": "3:10:00", "throughput": 629.31, "total_tokens": 4055080}
|
58 |
-
{"current_steps": 265, "total_steps": 720, "loss": 0.8755, "lr": 7.966313930181912e-05, "epoch": 0.7361111111111112, "percentage": 36.81, "elapsed_time": "1:49:17", "remaining_time": "3:07:38", "throughput": 630.28, "total_tokens": 4133008}
|
59 |
-
{"current_steps": 270, "total_steps": 720, "loss": 0.8558, "lr": 7.86788218175523e-05, "epoch": 0.75, "percentage": 37.5, "elapsed_time": "1:51:12", "remaining_time": "3:05:20", "throughput": 631.13, "total_tokens": 4210992}
|
60 |
-
{"current_steps": 275, "total_steps": 720, "loss": 0.9057, "lr": 7.767765317408613e-05, "epoch": 0.7638888888888888, "percentage": 38.19, "elapsed_time": "1:53:05", "remaining_time": "3:02:59", "throughput": 632.09, "total_tokens": 4288976}
|
61 |
-
{"current_steps": 280, "total_steps": 720, "loss": 0.845, "lr": 7.666022164008457e-05, "epoch": 0.7777777777777778, "percentage": 38.89, "elapsed_time": "1:54:59", "remaining_time": "3:00:42", "throughput": 632.92, "total_tokens": 4366920}
|
62 |
-
{"current_steps": 285, "total_steps": 720, "loss": 0.8524, "lr": 7.562712503999327e-05, "epoch": 0.7916666666666666, "percentage": 39.58, "elapsed_time": "1:56:53", "remaining_time": "2:58:25", "throughput": 633.73, "total_tokens": 4444912}
|
63 |
-
{"current_steps": 290, "total_steps": 720, "loss": 0.8694, "lr": 7.457897040276853e-05, "epoch": 0.8055555555555556, "percentage": 40.28, "elapsed_time": "1:58:47", "remaining_time": "2:56:07", "throughput": 634.61, "total_tokens": 4522912}
|
64 |
-
{"current_steps": 295, "total_steps": 720, "loss": 0.8605, "lr": 7.351637360519813e-05, "epoch": 0.8194444444444444, "percentage": 40.97, "elapsed_time": "2:00:41", "remaining_time": "2:53:52", "throughput": 635.35, "total_tokens": 4600848}
|
65 |
-
{"current_steps": 300, "total_steps": 720, "loss": 0.8739, "lr": 7.243995901002312e-05, "epoch": 0.8333333333333334, "percentage": 41.67, "elapsed_time": "2:02:34", "remaining_time": "2:51:36", "throughput": 636.14, "total_tokens": 4678776}
|
66 |
-
{"current_steps": 300, "total_steps": 720, "eval_loss": 0.8601770997047424, "epoch": 0.8333333333333334, "percentage": 41.67, "elapsed_time": "2:03:21", "remaining_time": "2:52:42", "throughput": 632.1, "total_tokens": 4678776}
|
67 |
-
{"current_steps": 305, "total_steps": 720, "loss": 0.8838, "lr": 7.135035909907358e-05, "epoch": 0.8472222222222222, "percentage": 42.36, "elapsed_time": "2:05:21", "remaining_time": "2:50:33", "throughput": 632.44, "total_tokens": 4756744}
|
68 |
-
{"current_steps": 310, "total_steps": 720, "loss": 0.8843, "lr": 7.024821410163368e-05, "epoch": 0.8611111111111112, "percentage": 43.06, "elapsed_time": "2:07:15", "remaining_time": "2:48:18", "throughput": 633.18, "total_tokens": 4834648}
|
69 |
-
{"current_steps": 315, "total_steps": 720, "loss": 0.8637, "lr": 6.91341716182545e-05, "epoch": 0.875, "percentage": 43.75, "elapsed_time": "2:09:08", "remaining_time": "2:46:02", "throughput": 634.01, "total_tokens": 4912632}
|
70 |
-
{"current_steps": 320, "total_steps": 720, "loss": 0.8714, "lr": 6.800888624023553e-05, "epoch": 0.8888888888888888, "percentage": 44.44, "elapsed_time": "2:11:02", "remaining_time": "2:43:48", "throughput": 634.71, "total_tokens": 4990632}
|
71 |
-
{"current_steps": 325, "total_steps": 720, "loss": 0.8581, "lr": 6.687301916499871e-05, "epoch": 0.9027777777777778, "percentage": 45.14, "elapsed_time": "2:12:57", "remaining_time": "2:41:35", "throughput": 635.39, "total_tokens": 5068632}
|
72 |
-
{"current_steps": 330, "total_steps": 720, "loss": 0.8655, "lr": 6.572723780758069e-05, "epoch": 0.9166666666666666, "percentage": 45.83, "elapsed_time": "2:14:50", "remaining_time": "2:39:20", "throughput": 636.17, "total_tokens": 5146648}
|
73 |
-
{"current_steps": 335, "total_steps": 720, "loss": 0.8721, "lr": 6.457221540847176e-05, "epoch": 0.9305555555555556, "percentage": 46.53, "elapsed_time": "2:16:43", "remaining_time": "2:37:08", "throughput": 636.87, "total_tokens": 5224624}
|
74 |
-
{"current_steps": 340, "total_steps": 720, "loss": 0.8563, "lr": 6.340863063803188e-05, "epoch": 0.9444444444444444, "percentage": 47.22, "elapsed_time": "2:18:36", "remaining_time": "2:34:55", "throughput": 637.58, "total_tokens": 5302600}
|
75 |
-
{"current_steps": 345, "total_steps": 720, "loss": 0.8634, "lr": 6.22371671977162e-05, "epoch": 0.9583333333333334, "percentage": 47.92, "elapsed_time": "2:20:29", "remaining_time": "2:32:42", "throughput": 638.28, "total_tokens": 5380600}
|
76 |
-
{"current_steps": 350, "total_steps": 720, "loss": 0.8526, "lr": 6.105851341834439e-05, "epoch": 0.9722222222222222, "percentage": 48.61, "elapsed_time": "2:22:23", "remaining_time": "2:30:31", "throughput": 638.9, "total_tokens": 5458520}
|
77 |
-
{"current_steps": 350, "total_steps": 720, "eval_loss": 0.830344021320343, "epoch": 0.9722222222222222, "percentage": 48.61, "elapsed_time": "2:23:09", "remaining_time": "2:31:20", "throughput": 635.45, "total_tokens": 5458520}
|
|
|
1 |
+
{"current_steps": 5, "total_steps": 716, "loss": 2.9908, "lr": 6.944444444444445e-06, "epoch": 0.013961605584642234, "percentage": 0.7, "elapsed_time": "0:03:15", "remaining_time": "7:43:27", "throughput": 398.59, "total_tokens": 77944}
|
2 |
+
{"current_steps": 10, "total_steps": 716, "loss": 3.0071, "lr": 1.388888888888889e-05, "epoch": 0.027923211169284468, "percentage": 1.4, "elapsed_time": "0:05:11", "remaining_time": "6:06:44", "throughput": 500.18, "total_tokens": 155896}
|
3 |
+
{"current_steps": 15, "total_steps": 716, "loss": 2.354, "lr": 2.0833333333333336e-05, "epoch": 0.041884816753926704, "percentage": 2.09, "elapsed_time": "0:07:08", "remaining_time": "5:33:43", "throughput": 545.89, "total_tokens": 233896}
|
4 |
+
{"current_steps": 20, "total_steps": 716, "loss": 1.2959, "lr": 2.777777777777778e-05, "epoch": 0.055846422338568937, "percentage": 2.79, "elapsed_time": "0:09:04", "remaining_time": "5:15:46", "throughput": 572.78, "total_tokens": 311840}
|
5 |
+
{"current_steps": 25, "total_steps": 716, "loss": 1.0206, "lr": 3.472222222222222e-05, "epoch": 0.06980802792321117, "percentage": 3.49, "elapsed_time": "0:11:01", "remaining_time": "5:04:39", "throughput": 589.45, "total_tokens": 389816}
|
6 |
+
{"current_steps": 30, "total_steps": 716, "loss": 0.9285, "lr": 4.166666666666667e-05, "epoch": 0.08376963350785341, "percentage": 4.19, "elapsed_time": "0:12:57", "remaining_time": "4:56:12", "throughput": 601.89, "total_tokens": 467808}
|
7 |
+
{"current_steps": 35, "total_steps": 716, "loss": 0.9052, "lr": 4.8611111111111115e-05, "epoch": 0.09773123909249563, "percentage": 4.89, "elapsed_time": "0:14:51", "remaining_time": "4:49:15", "throughput": 611.87, "total_tokens": 545776}
|
8 |
+
{"current_steps": 40, "total_steps": 716, "loss": 0.929, "lr": 5.555555555555556e-05, "epoch": 0.11169284467713787, "percentage": 5.59, "elapsed_time": "0:16:48", "remaining_time": "4:43:55", "throughput": 618.78, "total_tokens": 623744}
|
9 |
+
{"current_steps": 45, "total_steps": 716, "loss": 0.9076, "lr": 6.25e-05, "epoch": 0.1256544502617801, "percentage": 6.28, "elapsed_time": "0:18:43", "remaining_time": "4:39:18", "throughput": 624.36, "total_tokens": 701720}
|
10 |
+
{"current_steps": 50, "total_steps": 716, "loss": 0.9039, "lr": 6.944444444444444e-05, "epoch": 0.13961605584642234, "percentage": 6.98, "elapsed_time": "0:20:37", "remaining_time": "4:34:45", "throughput": 630.02, "total_tokens": 779728}
|
11 |
+
{"current_steps": 50, "total_steps": 716, "eval_loss": 0.9039102792739868, "epoch": 0.13961605584642234, "percentage": 6.98, "elapsed_time": "0:21:52", "remaining_time": "4:51:23", "throughput": 594.04, "total_tokens": 779728}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b0af6075e02b8e93a59ce938db15a9c8a754a7d6b6c53d6278e322b07db3808
|
3 |
size 7352
|