Training in progress, epoch 1
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +49 -58
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a0b9c95097b37274d5c23caffcb79d5767cb0dc9fb52c03e2c231ecd4beaef6
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94f7dcc72d2c52ea56b12303c141df87a436772cd323912960b4e6e00411b78c
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbf083755762720ba2e7cbb70edec20cab4bd533f336ad1650481ab84127f8a8
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bfbf9df4f680193d8db45b05e988999232c6d7536aa1c9a7562e86a04a09b2a
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -1,58 +1,49 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 549, "loss": 1.
|
2 |
-
{"current_steps": 20, "total_steps": 549, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 549, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 549, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 549, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 549, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 549, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 549, "loss": 0.
|
9 |
-
{"current_steps": 90, "total_steps": 549, "loss": 0.
|
10 |
-
{"current_steps": 100, "total_steps": 549, "loss": 0.
|
11 |
-
{"current_steps": 110, "total_steps": 549, "loss": 0.
|
12 |
-
{"current_steps": 120, "total_steps": 549, "loss": 0.
|
13 |
-
{"current_steps": 130, "total_steps": 549, "loss": 0.
|
14 |
-
{"current_steps": 140, "total_steps": 549, "loss": 0.
|
15 |
-
{"current_steps": 150, "total_steps": 549, "loss": 0.
|
16 |
-
{"current_steps": 160, "total_steps": 549, "loss": 0.
|
17 |
-
{"current_steps": 170, "total_steps": 549, "loss": 0.
|
18 |
-
{"current_steps": 180, "total_steps": 549, "loss": 0.
|
19 |
-
{"current_steps": 183, "total_steps": 549, "eval_loss": 0.
|
20 |
-
{"current_steps": 190, "total_steps": 549, "loss": 0.
|
21 |
-
{"current_steps": 200, "total_steps": 549, "loss": 0.
|
22 |
-
{"current_steps": 210, "total_steps": 549, "loss": 0.
|
23 |
-
{"current_steps": 220, "total_steps": 549, "loss": 0.
|
24 |
-
{"current_steps": 230, "total_steps": 549, "loss": 0.
|
25 |
-
{"current_steps": 240, "total_steps": 549, "loss": 0.
|
26 |
-
{"current_steps": 250, "total_steps": 549, "loss": 0.
|
27 |
-
{"current_steps": 260, "total_steps": 549, "loss": 0.
|
28 |
-
{"current_steps": 270, "total_steps": 549, "loss": 0.
|
29 |
-
{"current_steps": 280, "total_steps": 549, "loss": 0.
|
30 |
-
{"current_steps": 290, "total_steps": 549, "loss": 0.
|
31 |
-
{"current_steps": 300, "total_steps": 549, "loss": 0.
|
32 |
-
{"current_steps": 310, "total_steps": 549, "loss": 0.
|
33 |
-
{"current_steps": 320, "total_steps": 549, "loss": 0.
|
34 |
-
{"current_steps": 330, "total_steps": 549, "loss": 0.
|
35 |
-
{"current_steps": 340, "total_steps": 549, "loss": 0.
|
36 |
-
{"current_steps": 350, "total_steps": 549, "loss": 0.
|
37 |
-
{"current_steps": 360, "total_steps": 549, "loss": 0.
|
38 |
-
{"current_steps": 366, "total_steps": 549, "eval_loss": 0.
|
39 |
-
{"current_steps": 370, "total_steps": 549, "loss": 0.
|
40 |
-
{"current_steps": 380, "total_steps": 549, "loss": 0.
|
41 |
-
{"current_steps": 390, "total_steps": 549, "loss": 0.
|
42 |
-
{"current_steps": 400, "total_steps": 549, "loss": 0.
|
43 |
-
{"current_steps": 410, "total_steps": 549, "loss": 0.
|
44 |
-
{"current_steps": 420, "total_steps": 549, "loss": 0.
|
45 |
-
{"current_steps": 430, "total_steps": 549, "loss": 0.
|
46 |
-
{"current_steps": 440, "total_steps": 549, "loss": 0.
|
47 |
-
{"current_steps": 450, "total_steps": 549, "loss": 0.
|
48 |
-
{"current_steps": 460, "total_steps": 549, "loss": 0.
|
49 |
-
{"current_steps": 470, "total_steps": 549, "loss": 0.
|
50 |
-
{"current_steps": 480, "total_steps": 549, "loss": 0.7237, "lr": 5e-06, "epoch": 2.6229508196721314, "percentage": 87.43, "elapsed_time": "0:50:39", "remaining_time": "0:07:16"}
|
51 |
-
{"current_steps": 490, "total_steps": 549, "loss": 0.7243, "lr": 5e-06, "epoch": 2.6775956284153004, "percentage": 89.25, "elapsed_time": "0:51:38", "remaining_time": "0:06:13"}
|
52 |
-
{"current_steps": 500, "total_steps": 549, "loss": 0.7215, "lr": 5e-06, "epoch": 2.73224043715847, "percentage": 91.07, "elapsed_time": "0:52:36", "remaining_time": "0:05:09"}
|
53 |
-
{"current_steps": 510, "total_steps": 549, "loss": 0.727, "lr": 5e-06, "epoch": 2.7868852459016393, "percentage": 92.9, "elapsed_time": "0:53:35", "remaining_time": "0:04:05"}
|
54 |
-
{"current_steps": 520, "total_steps": 549, "loss": 0.7225, "lr": 5e-06, "epoch": 2.841530054644809, "percentage": 94.72, "elapsed_time": "0:54:33", "remaining_time": "0:03:02"}
|
55 |
-
{"current_steps": 530, "total_steps": 549, "loss": 0.7269, "lr": 5e-06, "epoch": 2.8961748633879782, "percentage": 96.54, "elapsed_time": "0:55:32", "remaining_time": "0:01:59"}
|
56 |
-
{"current_steps": 540, "total_steps": 549, "loss": 0.7197, "lr": 5e-06, "epoch": 2.9508196721311473, "percentage": 98.36, "elapsed_time": "0:56:30", "remaining_time": "0:00:56"}
|
57 |
-
{"current_steps": 549, "total_steps": 549, "eval_loss": 0.7866214513778687, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:59:11", "remaining_time": "0:00:00"}
|
58 |
-
{"current_steps": 549, "total_steps": 549, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:00:39", "remaining_time": "0:00:00"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 549, "loss": 1.0197, "lr": 5e-06, "epoch": 0.0546448087431694, "percentage": 1.82, "elapsed_time": "0:00:59", "remaining_time": "0:53:29"}
|
2 |
+
{"current_steps": 20, "total_steps": 549, "loss": 0.9201, "lr": 5e-06, "epoch": 0.1092896174863388, "percentage": 3.64, "elapsed_time": "0:01:57", "remaining_time": "0:51:53"}
|
3 |
+
{"current_steps": 30, "total_steps": 549, "loss": 0.8893, "lr": 5e-06, "epoch": 0.16393442622950818, "percentage": 5.46, "elapsed_time": "0:02:56", "remaining_time": "0:50:44"}
|
4 |
+
{"current_steps": 40, "total_steps": 549, "loss": 0.8686, "lr": 5e-06, "epoch": 0.2185792349726776, "percentage": 7.29, "elapsed_time": "0:03:54", "remaining_time": "0:49:40"}
|
5 |
+
{"current_steps": 50, "total_steps": 549, "loss": 0.8566, "lr": 5e-06, "epoch": 0.273224043715847, "percentage": 9.11, "elapsed_time": "0:04:52", "remaining_time": "0:48:38"}
|
6 |
+
{"current_steps": 60, "total_steps": 549, "loss": 0.8464, "lr": 5e-06, "epoch": 0.32786885245901637, "percentage": 10.93, "elapsed_time": "0:05:50", "remaining_time": "0:47:38"}
|
7 |
+
{"current_steps": 70, "total_steps": 549, "loss": 0.837, "lr": 5e-06, "epoch": 0.3825136612021858, "percentage": 12.75, "elapsed_time": "0:06:49", "remaining_time": "0:46:39"}
|
8 |
+
{"current_steps": 80, "total_steps": 549, "loss": 0.834, "lr": 5e-06, "epoch": 0.4371584699453552, "percentage": 14.57, "elapsed_time": "0:07:47", "remaining_time": "0:45:39"}
|
9 |
+
{"current_steps": 90, "total_steps": 549, "loss": 0.8239, "lr": 5e-06, "epoch": 0.4918032786885246, "percentage": 16.39, "elapsed_time": "0:08:45", "remaining_time": "0:44:41"}
|
10 |
+
{"current_steps": 100, "total_steps": 549, "loss": 0.8203, "lr": 5e-06, "epoch": 0.546448087431694, "percentage": 18.21, "elapsed_time": "0:09:44", "remaining_time": "0:43:42"}
|
11 |
+
{"current_steps": 110, "total_steps": 549, "loss": 0.8191, "lr": 5e-06, "epoch": 0.6010928961748634, "percentage": 20.04, "elapsed_time": "0:10:42", "remaining_time": "0:42:43"}
|
12 |
+
{"current_steps": 120, "total_steps": 549, "loss": 0.8148, "lr": 5e-06, "epoch": 0.6557377049180327, "percentage": 21.86, "elapsed_time": "0:11:40", "remaining_time": "0:41:44"}
|
13 |
+
{"current_steps": 130, "total_steps": 549, "loss": 0.816, "lr": 5e-06, "epoch": 0.7103825136612022, "percentage": 23.68, "elapsed_time": "0:12:38", "remaining_time": "0:40:45"}
|
14 |
+
{"current_steps": 140, "total_steps": 549, "loss": 0.8084, "lr": 5e-06, "epoch": 0.7650273224043715, "percentage": 25.5, "elapsed_time": "0:13:37", "remaining_time": "0:39:47"}
|
15 |
+
{"current_steps": 150, "total_steps": 549, "loss": 0.8101, "lr": 5e-06, "epoch": 0.819672131147541, "percentage": 27.32, "elapsed_time": "0:14:35", "remaining_time": "0:38:48"}
|
16 |
+
{"current_steps": 160, "total_steps": 549, "loss": 0.8095, "lr": 5e-06, "epoch": 0.8743169398907104, "percentage": 29.14, "elapsed_time": "0:15:33", "remaining_time": "0:37:50"}
|
17 |
+
{"current_steps": 170, "total_steps": 549, "loss": 0.8056, "lr": 5e-06, "epoch": 0.9289617486338798, "percentage": 30.97, "elapsed_time": "0:16:32", "remaining_time": "0:36:52"}
|
18 |
+
{"current_steps": 180, "total_steps": 549, "loss": 0.8067, "lr": 5e-06, "epoch": 0.9836065573770492, "percentage": 32.79, "elapsed_time": "0:17:30", "remaining_time": "0:35:53"}
|
19 |
+
{"current_steps": 183, "total_steps": 549, "eval_loss": 0.8027574419975281, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:18:06", "remaining_time": "0:36:12"}
|
20 |
+
{"current_steps": 190, "total_steps": 549, "loss": 0.7796, "lr": 5e-06, "epoch": 1.0382513661202186, "percentage": 34.61, "elapsed_time": "0:20:16", "remaining_time": "0:38:18"}
|
21 |
+
{"current_steps": 200, "total_steps": 549, "loss": 0.7669, "lr": 5e-06, "epoch": 1.092896174863388, "percentage": 36.43, "elapsed_time": "0:21:14", "remaining_time": "0:37:04"}
|
22 |
+
{"current_steps": 210, "total_steps": 549, "loss": 0.7664, "lr": 5e-06, "epoch": 1.1475409836065573, "percentage": 38.25, "elapsed_time": "0:22:12", "remaining_time": "0:35:51"}
|
23 |
+
{"current_steps": 220, "total_steps": 549, "loss": 0.7675, "lr": 5e-06, "epoch": 1.2021857923497268, "percentage": 40.07, "elapsed_time": "0:23:11", "remaining_time": "0:34:40"}
|
24 |
+
{"current_steps": 230, "total_steps": 549, "loss": 0.7625, "lr": 5e-06, "epoch": 1.2568306010928962, "percentage": 41.89, "elapsed_time": "0:24:09", "remaining_time": "0:33:30"}
|
25 |
+
{"current_steps": 240, "total_steps": 549, "loss": 0.7669, "lr": 5e-06, "epoch": 1.3114754098360657, "percentage": 43.72, "elapsed_time": "0:25:07", "remaining_time": "0:32:21"}
|
26 |
+
{"current_steps": 250, "total_steps": 549, "loss": 0.7679, "lr": 5e-06, "epoch": 1.366120218579235, "percentage": 45.54, "elapsed_time": "0:26:06", "remaining_time": "0:31:13"}
|
27 |
+
{"current_steps": 260, "total_steps": 549, "loss": 0.7606, "lr": 5e-06, "epoch": 1.4207650273224044, "percentage": 47.36, "elapsed_time": "0:27:04", "remaining_time": "0:30:05"}
|
28 |
+
{"current_steps": 270, "total_steps": 549, "loss": 0.7593, "lr": 5e-06, "epoch": 1.4754098360655736, "percentage": 49.18, "elapsed_time": "0:28:02", "remaining_time": "0:28:58"}
|
29 |
+
{"current_steps": 280, "total_steps": 549, "loss": 0.7612, "lr": 5e-06, "epoch": 1.530054644808743, "percentage": 51.0, "elapsed_time": "0:29:00", "remaining_time": "0:27:52"}
|
30 |
+
{"current_steps": 290, "total_steps": 549, "loss": 0.7582, "lr": 5e-06, "epoch": 1.5846994535519126, "percentage": 52.82, "elapsed_time": "0:29:59", "remaining_time": "0:26:46"}
|
31 |
+
{"current_steps": 300, "total_steps": 549, "loss": 0.7625, "lr": 5e-06, "epoch": 1.639344262295082, "percentage": 54.64, "elapsed_time": "0:30:57", "remaining_time": "0:25:41"}
|
32 |
+
{"current_steps": 310, "total_steps": 549, "loss": 0.7626, "lr": 5e-06, "epoch": 1.6939890710382515, "percentage": 56.47, "elapsed_time": "0:31:55", "remaining_time": "0:24:36"}
|
33 |
+
{"current_steps": 320, "total_steps": 549, "loss": 0.764, "lr": 5e-06, "epoch": 1.748633879781421, "percentage": 58.29, "elapsed_time": "0:32:54", "remaining_time": "0:23:32"}
|
34 |
+
{"current_steps": 330, "total_steps": 549, "loss": 0.762, "lr": 5e-06, "epoch": 1.8032786885245902, "percentage": 60.11, "elapsed_time": "0:33:52", "remaining_time": "0:22:28"}
|
35 |
+
{"current_steps": 340, "total_steps": 549, "loss": 0.7672, "lr": 5e-06, "epoch": 1.8579234972677594, "percentage": 61.93, "elapsed_time": "0:34:51", "remaining_time": "0:21:25"}
|
36 |
+
{"current_steps": 350, "total_steps": 549, "loss": 0.7615, "lr": 5e-06, "epoch": 1.9125683060109289, "percentage": 63.75, "elapsed_time": "0:35:49", "remaining_time": "0:20:22"}
|
37 |
+
{"current_steps": 360, "total_steps": 549, "loss": 0.7596, "lr": 5e-06, "epoch": 1.9672131147540983, "percentage": 65.57, "elapsed_time": "0:36:47", "remaining_time": "0:19:19"}
|
38 |
+
{"current_steps": 366, "total_steps": 549, "eval_loss": 0.7888949513435364, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:37:40", "remaining_time": "0:18:50"}
|
39 |
+
{"current_steps": 370, "total_steps": 549, "loss": 0.7482, "lr": 5e-06, "epoch": 2.021857923497268, "percentage": 67.4, "elapsed_time": "0:39:33", "remaining_time": "0:19:08"}
|
40 |
+
{"current_steps": 380, "total_steps": 549, "loss": 0.7216, "lr": 5e-06, "epoch": 2.0765027322404372, "percentage": 69.22, "elapsed_time": "0:40:31", "remaining_time": "0:18:01"}
|
41 |
+
{"current_steps": 390, "total_steps": 549, "loss": 0.7175, "lr": 5e-06, "epoch": 2.1311475409836067, "percentage": 71.04, "elapsed_time": "0:41:30", "remaining_time": "0:16:55"}
|
42 |
+
{"current_steps": 400, "total_steps": 549, "loss": 0.7222, "lr": 5e-06, "epoch": 2.185792349726776, "percentage": 72.86, "elapsed_time": "0:42:28", "remaining_time": "0:15:49"}
|
43 |
+
{"current_steps": 410, "total_steps": 549, "loss": 0.7184, "lr": 5e-06, "epoch": 2.240437158469945, "percentage": 74.68, "elapsed_time": "0:43:26", "remaining_time": "0:14:43"}
|
44 |
+
{"current_steps": 420, "total_steps": 549, "loss": 0.7222, "lr": 5e-06, "epoch": 2.2950819672131146, "percentage": 76.5, "elapsed_time": "0:44:24", "remaining_time": "0:13:38"}
|
45 |
+
{"current_steps": 430, "total_steps": 549, "loss": 0.7246, "lr": 5e-06, "epoch": 2.349726775956284, "percentage": 78.32, "elapsed_time": "0:45:23", "remaining_time": "0:12:33"}
|
46 |
+
{"current_steps": 440, "total_steps": 549, "loss": 0.7228, "lr": 5e-06, "epoch": 2.4043715846994536, "percentage": 80.15, "elapsed_time": "0:46:21", "remaining_time": "0:11:29"}
|
47 |
+
{"current_steps": 450, "total_steps": 549, "loss": 0.722, "lr": 5e-06, "epoch": 2.459016393442623, "percentage": 81.97, "elapsed_time": "0:47:19", "remaining_time": "0:10:24"}
|
48 |
+
{"current_steps": 460, "total_steps": 549, "loss": 0.7215, "lr": 5e-06, "epoch": 2.5136612021857925, "percentage": 83.79, "elapsed_time": "0:48:17", "remaining_time": "0:09:20"}
|
49 |
+
{"current_steps": 470, "total_steps": 549, "loss": 0.7255, "lr": 5e-06, "epoch": 2.5683060109289615, "percentage": 85.61, "elapsed_time": "0:49:16", "remaining_time": "0:08:16"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7288
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff856ed23876b6d804b4faae24c537ade2f7e1f816c3f8c2a98b81a57603c7af
|
3 |
size 7288
|