Training in progress, epoch 1
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +69 -117
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6c5857dd8d7bf751a51ed832110e144c05d1252501eb3c516edd411e0e880da
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10ce8bcabaa7c5d3d48e7527852f2cd42e57c92af3b1e34043cfe5b2af29d03a
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bd8615d3e81eaf46656f678ae4d9564af7bb3df87edacbad8367720d07bb409
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1436cafb42c114e82eae1e28fc1e9494069b4b291756f9c9fadbddbf6d48e4a
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -1,117 +1,69 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 1134, "loss": 1.
|
2 |
-
{"current_steps": 20, "total_steps": 1134, "loss": 0.
|
3 |
-
{"current_steps": 30, "total_steps": 1134, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 1134, "loss": 0.
|
5 |
-
{"current_steps": 50, "total_steps": 1134, "loss": 0.
|
6 |
-
{"current_steps": 60, "total_steps": 1134, "loss": 0.
|
7 |
-
{"current_steps": 70, "total_steps": 1134, "loss": 0.
|
8 |
-
{"current_steps": 80, "total_steps": 1134, "loss": 0.
|
9 |
-
{"current_steps": 90, "total_steps": 1134, "loss": 0.
|
10 |
-
{"current_steps": 100, "total_steps": 1134, "loss": 0.
|
11 |
-
{"current_steps": 110, "total_steps": 1134, "loss": 0.
|
12 |
-
{"current_steps": 120, "total_steps": 1134, "loss": 0.
|
13 |
-
{"current_steps": 130, "total_steps": 1134, "loss": 0.
|
14 |
-
{"current_steps": 140, "total_steps": 1134, "loss": 0.
|
15 |
-
{"current_steps": 150, "total_steps": 1134, "loss": 0.
|
16 |
-
{"current_steps": 160, "total_steps": 1134, "loss": 0.
|
17 |
-
{"current_steps": 170, "total_steps": 1134, "loss": 0.
|
18 |
-
{"current_steps": 180, "total_steps": 1134, "loss": 0.
|
19 |
-
{"current_steps": 190, "total_steps": 1134, "loss": 0.
|
20 |
-
{"current_steps": 200, "total_steps": 1134, "loss": 0.
|
21 |
-
{"current_steps": 210, "total_steps": 1134, "loss": 0.
|
22 |
-
{"current_steps": 220, "total_steps": 1134, "loss": 0.
|
23 |
-
{"current_steps": 230, "total_steps": 1134, "loss": 0.
|
24 |
-
{"current_steps": 240, "total_steps": 1134, "loss": 0.
|
25 |
-
{"current_steps": 250, "total_steps": 1134, "loss": 0.
|
26 |
-
{"current_steps": 260, "total_steps": 1134, "loss": 0.
|
27 |
-
{"current_steps": 270, "total_steps": 1134, "loss": 0.
|
28 |
-
{"current_steps": 280, "total_steps": 1134, "loss": 0.
|
29 |
-
{"current_steps": 290, "total_steps": 1134, "loss": 0.
|
30 |
-
{"current_steps": 300, "total_steps": 1134, "loss": 0.
|
31 |
-
{"current_steps": 310, "total_steps": 1134, "loss": 0.
|
32 |
-
{"current_steps": 320, "total_steps": 1134, "loss": 0.
|
33 |
-
{"current_steps": 330, "total_steps": 1134, "loss": 0.
|
34 |
-
{"current_steps": 340, "total_steps": 1134, "loss": 0.
|
35 |
-
{"current_steps": 350, "total_steps": 1134, "loss": 0.
|
36 |
-
{"current_steps": 360, "total_steps": 1134, "loss": 0.
|
37 |
-
{"current_steps": 370, "total_steps": 1134, "loss": 0.
|
38 |
-
{"current_steps": 378, "total_steps": 1134, "eval_loss": 0.
|
39 |
-
{"current_steps": 380, "total_steps": 1134, "loss": 0.
|
40 |
-
{"current_steps": 390, "total_steps": 1134, "loss": 0.
|
41 |
-
{"current_steps": 400, "total_steps": 1134, "loss": 0.
|
42 |
-
{"current_steps": 410, "total_steps": 1134, "loss": 0.
|
43 |
-
{"current_steps": 420, "total_steps": 1134, "loss": 0.
|
44 |
-
{"current_steps": 430, "total_steps": 1134, "loss": 0.
|
45 |
-
{"current_steps": 440, "total_steps": 1134, "loss": 0.
|
46 |
-
{"current_steps": 450, "total_steps": 1134, "loss": 0.
|
47 |
-
{"current_steps": 460, "total_steps": 1134, "loss": 0.
|
48 |
-
{"current_steps": 470, "total_steps": 1134, "loss": 0.
|
49 |
-
{"current_steps": 480, "total_steps": 1134, "loss": 0.
|
50 |
-
{"current_steps": 490, "total_steps": 1134, "loss": 0.
|
51 |
-
{"current_steps": 500, "total_steps": 1134, "loss": 0.
|
52 |
-
{"current_steps": 510, "total_steps": 1134, "loss": 0.
|
53 |
-
{"current_steps": 520, "total_steps": 1134, "loss": 0.
|
54 |
-
{"current_steps": 530, "total_steps": 1134, "loss": 0.
|
55 |
-
{"current_steps": 540, "total_steps": 1134, "loss": 0.
|
56 |
-
{"current_steps": 550, "total_steps": 1134, "loss": 0.
|
57 |
-
{"current_steps": 560, "total_steps": 1134, "loss": 0.
|
58 |
-
{"current_steps": 570, "total_steps": 1134, "loss": 0.
|
59 |
-
{"current_steps": 580, "total_steps": 1134, "loss": 0.
|
60 |
-
{"current_steps": 590, "total_steps": 1134, "loss": 0.
|
61 |
-
{"current_steps": 600, "total_steps": 1134, "loss": 0.
|
62 |
-
{"current_steps": 610, "total_steps": 1134, "loss": 0.
|
63 |
-
{"current_steps": 620, "total_steps": 1134, "loss": 0.
|
64 |
-
{"current_steps": 630, "total_steps": 1134, "loss": 0.
|
65 |
-
{"current_steps": 640, "total_steps": 1134, "loss": 0.
|
66 |
-
{"current_steps": 650, "total_steps": 1134, "loss": 0.
|
67 |
-
{"current_steps": 660, "total_steps": 1134, "loss": 0.
|
68 |
-
{"current_steps": 670, "total_steps": 1134, "loss": 0.
|
69 |
-
{"current_steps": 680, "total_steps": 1134, "loss": 0.
|
70 |
-
{"current_steps": 690, "total_steps": 1134, "loss": 0.7488, "lr": 5e-06, "epoch": 1.8253968253968254, "percentage": 60.85, "elapsed_time": "1:10:15", "remaining_time": "0:45:12"}
|
71 |
-
{"current_steps": 700, "total_steps": 1134, "loss": 0.7434, "lr": 5e-06, "epoch": 1.8518518518518519, "percentage": 61.73, "elapsed_time": "1:11:14", "remaining_time": "0:44:10"}
|
72 |
-
{"current_steps": 710, "total_steps": 1134, "loss": 0.7475, "lr": 5e-06, "epoch": 1.8783068783068781, "percentage": 62.61, "elapsed_time": "1:12:13", "remaining_time": "0:43:08"}
|
73 |
-
{"current_steps": 720, "total_steps": 1134, "loss": 0.7418, "lr": 5e-06, "epoch": 1.9047619047619047, "percentage": 63.49, "elapsed_time": "1:13:13", "remaining_time": "0:42:06"}
|
74 |
-
{"current_steps": 730, "total_steps": 1134, "loss": 0.7451, "lr": 5e-06, "epoch": 1.9312169312169312, "percentage": 64.37, "elapsed_time": "1:14:12", "remaining_time": "0:41:04"}
|
75 |
-
{"current_steps": 740, "total_steps": 1134, "loss": 0.7466, "lr": 5e-06, "epoch": 1.9576719576719577, "percentage": 65.26, "elapsed_time": "1:15:11", "remaining_time": "0:40:02"}
|
76 |
-
{"current_steps": 750, "total_steps": 1134, "loss": 0.7445, "lr": 5e-06, "epoch": 1.9841269841269842, "percentage": 66.14, "elapsed_time": "1:16:11", "remaining_time": "0:39:00"}
|
77 |
-
{"current_steps": 756, "total_steps": 1134, "eval_loss": 0.7725165486335754, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:17:23", "remaining_time": "0:38:41"}
|
78 |
-
{"current_steps": 760, "total_steps": 1134, "loss": 0.7322, "lr": 5e-06, "epoch": 2.0105820105820107, "percentage": 67.02, "elapsed_time": "1:19:21", "remaining_time": "0:39:03"}
|
79 |
-
{"current_steps": 770, "total_steps": 1134, "loss": 0.7091, "lr": 5e-06, "epoch": 2.037037037037037, "percentage": 67.9, "elapsed_time": "1:20:20", "remaining_time": "0:37:58"}
|
80 |
-
{"current_steps": 780, "total_steps": 1134, "loss": 0.711, "lr": 5e-06, "epoch": 2.0634920634920633, "percentage": 68.78, "elapsed_time": "1:21:19", "remaining_time": "0:36:54"}
|
81 |
-
{"current_steps": 790, "total_steps": 1134, "loss": 0.7113, "lr": 5e-06, "epoch": 2.0899470899470898, "percentage": 69.66, "elapsed_time": "1:22:19", "remaining_time": "0:35:50"}
|
82 |
-
{"current_steps": 800, "total_steps": 1134, "loss": 0.7109, "lr": 5e-06, "epoch": 2.1164021164021163, "percentage": 70.55, "elapsed_time": "1:23:18", "remaining_time": "0:34:46"}
|
83 |
-
{"current_steps": 810, "total_steps": 1134, "loss": 0.7126, "lr": 5e-06, "epoch": 2.142857142857143, "percentage": 71.43, "elapsed_time": "1:24:17", "remaining_time": "0:33:42"}
|
84 |
-
{"current_steps": 820, "total_steps": 1134, "loss": 0.7124, "lr": 5e-06, "epoch": 2.1693121693121693, "percentage": 72.31, "elapsed_time": "1:25:16", "remaining_time": "0:32:39"}
|
85 |
-
{"current_steps": 830, "total_steps": 1134, "loss": 0.7123, "lr": 5e-06, "epoch": 2.195767195767196, "percentage": 73.19, "elapsed_time": "1:26:16", "remaining_time": "0:31:35"}
|
86 |
-
{"current_steps": 840, "total_steps": 1134, "loss": 0.7123, "lr": 5e-06, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "1:27:15", "remaining_time": "0:30:32"}
|
87 |
-
{"current_steps": 850, "total_steps": 1134, "loss": 0.7139, "lr": 5e-06, "epoch": 2.248677248677249, "percentage": 74.96, "elapsed_time": "1:28:14", "remaining_time": "0:29:28"}
|
88 |
-
{"current_steps": 860, "total_steps": 1134, "loss": 0.7164, "lr": 5e-06, "epoch": 2.2751322751322753, "percentage": 75.84, "elapsed_time": "1:29:13", "remaining_time": "0:28:25"}
|
89 |
-
{"current_steps": 870, "total_steps": 1134, "loss": 0.7141, "lr": 5e-06, "epoch": 2.3015873015873014, "percentage": 76.72, "elapsed_time": "1:30:13", "remaining_time": "0:27:22"}
|
90 |
-
{"current_steps": 880, "total_steps": 1134, "loss": 0.7189, "lr": 5e-06, "epoch": 2.328042328042328, "percentage": 77.6, "elapsed_time": "1:31:12", "remaining_time": "0:26:19"}
|
91 |
-
{"current_steps": 890, "total_steps": 1134, "loss": 0.7159, "lr": 5e-06, "epoch": 2.3544973544973544, "percentage": 78.48, "elapsed_time": "1:32:11", "remaining_time": "0:25:16"}
|
92 |
-
{"current_steps": 900, "total_steps": 1134, "loss": 0.717, "lr": 5e-06, "epoch": 2.380952380952381, "percentage": 79.37, "elapsed_time": "1:33:11", "remaining_time": "0:24:13"}
|
93 |
-
{"current_steps": 910, "total_steps": 1134, "loss": 0.7176, "lr": 5e-06, "epoch": 2.4074074074074074, "percentage": 80.25, "elapsed_time": "1:34:10", "remaining_time": "0:23:10"}
|
94 |
-
{"current_steps": 920, "total_steps": 1134, "loss": 0.7141, "lr": 5e-06, "epoch": 2.433862433862434, "percentage": 81.13, "elapsed_time": "1:35:09", "remaining_time": "0:22:08"}
|
95 |
-
{"current_steps": 930, "total_steps": 1134, "loss": 0.7147, "lr": 5e-06, "epoch": 2.4603174603174605, "percentage": 82.01, "elapsed_time": "1:36:08", "remaining_time": "0:21:05"}
|
96 |
-
{"current_steps": 940, "total_steps": 1134, "loss": 0.7126, "lr": 5e-06, "epoch": 2.4867724867724865, "percentage": 82.89, "elapsed_time": "1:37:07", "remaining_time": "0:20:02"}
|
97 |
-
{"current_steps": 950, "total_steps": 1134, "loss": 0.7171, "lr": 5e-06, "epoch": 2.5132275132275135, "percentage": 83.77, "elapsed_time": "1:38:07", "remaining_time": "0:19:00"}
|
98 |
-
{"current_steps": 960, "total_steps": 1134, "loss": 0.7182, "lr": 5e-06, "epoch": 2.5396825396825395, "percentage": 84.66, "elapsed_time": "1:39:07", "remaining_time": "0:17:57"}
|
99 |
-
{"current_steps": 970, "total_steps": 1134, "loss": 0.7187, "lr": 5e-06, "epoch": 2.566137566137566, "percentage": 85.54, "elapsed_time": "1:40:05", "remaining_time": "0:16:55"}
|
100 |
-
{"current_steps": 980, "total_steps": 1134, "loss": 0.7164, "lr": 5e-06, "epoch": 2.5925925925925926, "percentage": 86.42, "elapsed_time": "1:41:05", "remaining_time": "0:15:53"}
|
101 |
-
{"current_steps": 990, "total_steps": 1134, "loss": 0.7157, "lr": 5e-06, "epoch": 2.619047619047619, "percentage": 87.3, "elapsed_time": "1:42:05", "remaining_time": "0:14:50"}
|
102 |
-
{"current_steps": 1000, "total_steps": 1134, "loss": 0.7175, "lr": 5e-06, "epoch": 2.6455026455026456, "percentage": 88.18, "elapsed_time": "1:43:04", "remaining_time": "0:13:48"}
|
103 |
-
{"current_steps": 1010, "total_steps": 1134, "loss": 0.715, "lr": 5e-06, "epoch": 2.671957671957672, "percentage": 89.07, "elapsed_time": "1:44:03", "remaining_time": "0:12:46"}
|
104 |
-
{"current_steps": 1020, "total_steps": 1134, "loss": 0.7208, "lr": 5e-06, "epoch": 2.6984126984126986, "percentage": 89.95, "elapsed_time": "1:45:02", "remaining_time": "0:11:44"}
|
105 |
-
{"current_steps": 1030, "total_steps": 1134, "loss": 0.7157, "lr": 5e-06, "epoch": 2.7248677248677247, "percentage": 90.83, "elapsed_time": "1:46:02", "remaining_time": "0:10:42"}
|
106 |
-
{"current_steps": 1040, "total_steps": 1134, "loss": 0.7119, "lr": 5e-06, "epoch": 2.751322751322751, "percentage": 91.71, "elapsed_time": "1:47:01", "remaining_time": "0:09:40"}
|
107 |
-
{"current_steps": 1050, "total_steps": 1134, "loss": 0.7159, "lr": 5e-06, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "1:47:59", "remaining_time": "0:08:38"}
|
108 |
-
{"current_steps": 1060, "total_steps": 1134, "loss": 0.7152, "lr": 5e-06, "epoch": 2.804232804232804, "percentage": 93.47, "elapsed_time": "1:48:59", "remaining_time": "0:07:36"}
|
109 |
-
{"current_steps": 1070, "total_steps": 1134, "loss": 0.7171, "lr": 5e-06, "epoch": 2.8306878306878307, "percentage": 94.36, "elapsed_time": "1:49:58", "remaining_time": "0:06:34"}
|
110 |
-
{"current_steps": 1080, "total_steps": 1134, "loss": 0.7154, "lr": 5e-06, "epoch": 2.857142857142857, "percentage": 95.24, "elapsed_time": "1:50:57", "remaining_time": "0:05:32"}
|
111 |
-
{"current_steps": 1090, "total_steps": 1134, "loss": 0.7152, "lr": 5e-06, "epoch": 2.8835978835978837, "percentage": 96.12, "elapsed_time": "1:51:57", "remaining_time": "0:04:31"}
|
112 |
-
{"current_steps": 1100, "total_steps": 1134, "loss": 0.7181, "lr": 5e-06, "epoch": 2.91005291005291, "percentage": 97.0, "elapsed_time": "1:52:56", "remaining_time": "0:03:29"}
|
113 |
-
{"current_steps": 1110, "total_steps": 1134, "loss": 0.7167, "lr": 5e-06, "epoch": 2.9365079365079367, "percentage": 97.88, "elapsed_time": "1:53:56", "remaining_time": "0:02:27"}
|
114 |
-
{"current_steps": 1120, "total_steps": 1134, "loss": 0.7146, "lr": 5e-06, "epoch": 2.962962962962963, "percentage": 98.77, "elapsed_time": "1:54:55", "remaining_time": "0:01:26"}
|
115 |
-
{"current_steps": 1130, "total_steps": 1134, "loss": 0.7153, "lr": 5e-06, "epoch": 2.9894179894179893, "percentage": 99.65, "elapsed_time": "1:55:54", "remaining_time": "0:00:24"}
|
116 |
-
{"current_steps": 1134, "total_steps": 1134, "eval_loss": 0.7704827189445496, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:58:29", "remaining_time": "0:00:00"}
|
117 |
-
{"current_steps": 1134, "total_steps": 1134, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "1:59:56", "remaining_time": "0:00:00"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 1134, "loss": 1.0155, "lr": 5e-06, "epoch": 0.026455026455026454, "percentage": 0.88, "elapsed_time": "0:00:59", "remaining_time": "1:51:38"}
|
2 |
+
{"current_steps": 20, "total_steps": 1134, "loss": 0.9187, "lr": 5e-06, "epoch": 0.05291005291005291, "percentage": 1.76, "elapsed_time": "0:01:57", "remaining_time": "1:49:22"}
|
3 |
+
{"current_steps": 30, "total_steps": 1134, "loss": 0.8889, "lr": 5e-06, "epoch": 0.07936507936507936, "percentage": 2.65, "elapsed_time": "0:02:56", "remaining_time": "1:47:58"}
|
4 |
+
{"current_steps": 40, "total_steps": 1134, "loss": 0.8728, "lr": 5e-06, "epoch": 0.10582010582010581, "percentage": 3.53, "elapsed_time": "0:03:54", "remaining_time": "1:46:50"}
|
5 |
+
{"current_steps": 50, "total_steps": 1134, "loss": 0.8553, "lr": 5e-06, "epoch": 0.13227513227513227, "percentage": 4.41, "elapsed_time": "0:04:52", "remaining_time": "1:45:44"}
|
6 |
+
{"current_steps": 60, "total_steps": 1134, "loss": 0.8472, "lr": 5e-06, "epoch": 0.15873015873015872, "percentage": 5.29, "elapsed_time": "0:05:50", "remaining_time": "1:44:41"}
|
7 |
+
{"current_steps": 70, "total_steps": 1134, "loss": 0.8386, "lr": 5e-06, "epoch": 0.18518518518518517, "percentage": 6.17, "elapsed_time": "0:06:49", "remaining_time": "1:43:45"}
|
8 |
+
{"current_steps": 80, "total_steps": 1134, "loss": 0.8303, "lr": 5e-06, "epoch": 0.21164021164021163, "percentage": 7.05, "elapsed_time": "0:07:47", "remaining_time": "1:42:45"}
|
9 |
+
{"current_steps": 90, "total_steps": 1134, "loss": 0.8296, "lr": 5e-06, "epoch": 0.23809523809523808, "percentage": 7.94, "elapsed_time": "0:08:46", "remaining_time": "1:41:44"}
|
10 |
+
{"current_steps": 100, "total_steps": 1134, "loss": 0.8208, "lr": 5e-06, "epoch": 0.26455026455026454, "percentage": 8.82, "elapsed_time": "0:09:44", "remaining_time": "1:40:45"}
|
11 |
+
{"current_steps": 110, "total_steps": 1134, "loss": 0.817, "lr": 5e-06, "epoch": 0.291005291005291, "percentage": 9.7, "elapsed_time": "0:10:43", "remaining_time": "1:39:47"}
|
12 |
+
{"current_steps": 120, "total_steps": 1134, "loss": 0.816, "lr": 5e-06, "epoch": 0.31746031746031744, "percentage": 10.58, "elapsed_time": "0:11:41", "remaining_time": "1:38:46"}
|
13 |
+
{"current_steps": 130, "total_steps": 1134, "loss": 0.8095, "lr": 5e-06, "epoch": 0.3439153439153439, "percentage": 11.46, "elapsed_time": "0:12:39", "remaining_time": "1:37:46"}
|
14 |
+
{"current_steps": 140, "total_steps": 1134, "loss": 0.809, "lr": 5e-06, "epoch": 0.37037037037037035, "percentage": 12.35, "elapsed_time": "0:13:38", "remaining_time": "1:36:49"}
|
15 |
+
{"current_steps": 150, "total_steps": 1134, "loss": 0.8116, "lr": 5e-06, "epoch": 0.3968253968253968, "percentage": 13.23, "elapsed_time": "0:14:36", "remaining_time": "1:35:49"}
|
16 |
+
{"current_steps": 160, "total_steps": 1134, "loss": 0.8043, "lr": 5e-06, "epoch": 0.42328042328042326, "percentage": 14.11, "elapsed_time": "0:15:34", "remaining_time": "1:34:49"}
|
17 |
+
{"current_steps": 170, "total_steps": 1134, "loss": 0.8081, "lr": 5e-06, "epoch": 0.4497354497354497, "percentage": 14.99, "elapsed_time": "0:16:33", "remaining_time": "1:33:55"}
|
18 |
+
{"current_steps": 180, "total_steps": 1134, "loss": 0.806, "lr": 5e-06, "epoch": 0.47619047619047616, "percentage": 15.87, "elapsed_time": "0:17:32", "remaining_time": "1:32:58"}
|
19 |
+
{"current_steps": 190, "total_steps": 1134, "loss": 0.8021, "lr": 5e-06, "epoch": 0.5026455026455027, "percentage": 16.75, "elapsed_time": "0:18:30", "remaining_time": "1:31:59"}
|
20 |
+
{"current_steps": 200, "total_steps": 1134, "loss": 0.7978, "lr": 5e-06, "epoch": 0.5291005291005291, "percentage": 17.64, "elapsed_time": "0:19:29", "remaining_time": "1:30:59"}
|
21 |
+
{"current_steps": 210, "total_steps": 1134, "loss": 0.7997, "lr": 5e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:20:27", "remaining_time": "1:30:01"}
|
22 |
+
{"current_steps": 220, "total_steps": 1134, "loss": 0.7942, "lr": 5e-06, "epoch": 0.582010582010582, "percentage": 19.4, "elapsed_time": "0:21:26", "remaining_time": "1:29:03"}
|
23 |
+
{"current_steps": 230, "total_steps": 1134, "loss": 0.791, "lr": 5e-06, "epoch": 0.6084656084656085, "percentage": 20.28, "elapsed_time": "0:22:24", "remaining_time": "1:28:03"}
|
24 |
+
{"current_steps": 240, "total_steps": 1134, "loss": 0.7948, "lr": 5e-06, "epoch": 0.6349206349206349, "percentage": 21.16, "elapsed_time": "0:23:22", "remaining_time": "1:27:04"}
|
25 |
+
{"current_steps": 250, "total_steps": 1134, "loss": 0.7914, "lr": 5e-06, "epoch": 0.6613756613756614, "percentage": 22.05, "elapsed_time": "0:24:21", "remaining_time": "1:26:06"}
|
26 |
+
{"current_steps": 260, "total_steps": 1134, "loss": 0.7941, "lr": 5e-06, "epoch": 0.6878306878306878, "percentage": 22.93, "elapsed_time": "0:25:19", "remaining_time": "1:25:07"}
|
27 |
+
{"current_steps": 270, "total_steps": 1134, "loss": 0.7934, "lr": 5e-06, "epoch": 0.7142857142857143, "percentage": 23.81, "elapsed_time": "0:26:17", "remaining_time": "1:24:08"}
|
28 |
+
{"current_steps": 280, "total_steps": 1134, "loss": 0.7938, "lr": 5e-06, "epoch": 0.7407407407407407, "percentage": 24.69, "elapsed_time": "0:27:17", "remaining_time": "1:23:14"}
|
29 |
+
{"current_steps": 290, "total_steps": 1134, "loss": 0.7917, "lr": 5e-06, "epoch": 0.7671957671957672, "percentage": 25.57, "elapsed_time": "0:28:16", "remaining_time": "1:22:17"}
|
30 |
+
{"current_steps": 300, "total_steps": 1134, "loss": 0.7882, "lr": 5e-06, "epoch": 0.7936507936507936, "percentage": 26.46, "elapsed_time": "0:29:15", "remaining_time": "1:21:20"}
|
31 |
+
{"current_steps": 310, "total_steps": 1134, "loss": 0.7849, "lr": 5e-06, "epoch": 0.8201058201058201, "percentage": 27.34, "elapsed_time": "0:30:13", "remaining_time": "1:20:21"}
|
32 |
+
{"current_steps": 320, "total_steps": 1134, "loss": 0.7884, "lr": 5e-06, "epoch": 0.8465608465608465, "percentage": 28.22, "elapsed_time": "0:31:12", "remaining_time": "1:19:22"}
|
33 |
+
{"current_steps": 330, "total_steps": 1134, "loss": 0.7832, "lr": 5e-06, "epoch": 0.873015873015873, "percentage": 29.1, "elapsed_time": "0:32:10", "remaining_time": "1:18:23"}
|
34 |
+
{"current_steps": 340, "total_steps": 1134, "loss": 0.7889, "lr": 5e-06, "epoch": 0.8994708994708994, "percentage": 29.98, "elapsed_time": "0:33:09", "remaining_time": "1:17:25"}
|
35 |
+
{"current_steps": 350, "total_steps": 1134, "loss": 0.7915, "lr": 5e-06, "epoch": 0.9259259259259259, "percentage": 30.86, "elapsed_time": "0:34:07", "remaining_time": "1:16:26"}
|
36 |
+
{"current_steps": 360, "total_steps": 1134, "loss": 0.7863, "lr": 5e-06, "epoch": 0.9523809523809523, "percentage": 31.75, "elapsed_time": "0:35:05", "remaining_time": "1:15:27"}
|
37 |
+
{"current_steps": 370, "total_steps": 1134, "loss": 0.7836, "lr": 5e-06, "epoch": 0.9788359788359788, "percentage": 32.63, "elapsed_time": "0:36:04", "remaining_time": "1:14:30"}
|
38 |
+
{"current_steps": 378, "total_steps": 1134, "eval_loss": 0.7853822112083435, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:37:28", "remaining_time": "1:14:57"}
|
39 |
+
{"current_steps": 380, "total_steps": 1134, "loss": 0.7782, "lr": 5e-06, "epoch": 1.0052910052910053, "percentage": 33.51, "elapsed_time": "0:39:10", "remaining_time": "1:17:44"}
|
40 |
+
{"current_steps": 390, "total_steps": 1134, "loss": 0.752, "lr": 5e-06, "epoch": 1.0317460317460316, "percentage": 34.39, "elapsed_time": "0:40:09", "remaining_time": "1:16:35"}
|
41 |
+
{"current_steps": 400, "total_steps": 1134, "loss": 0.7504, "lr": 5e-06, "epoch": 1.0582010582010581, "percentage": 35.27, "elapsed_time": "0:41:07", "remaining_time": "1:15:27"}
|
42 |
+
{"current_steps": 410, "total_steps": 1134, "loss": 0.753, "lr": 5e-06, "epoch": 1.0846560846560847, "percentage": 36.16, "elapsed_time": "0:42:05", "remaining_time": "1:14:20"}
|
43 |
+
{"current_steps": 420, "total_steps": 1134, "loss": 0.7526, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:43:04", "remaining_time": "1:13:13"}
|
44 |
+
{"current_steps": 430, "total_steps": 1134, "loss": 0.7499, "lr": 5e-06, "epoch": 1.1375661375661377, "percentage": 37.92, "elapsed_time": "0:44:02", "remaining_time": "1:12:06"}
|
45 |
+
{"current_steps": 440, "total_steps": 1134, "loss": 0.7534, "lr": 5e-06, "epoch": 1.164021164021164, "percentage": 38.8, "elapsed_time": "0:45:01", "remaining_time": "1:11:00"}
|
46 |
+
{"current_steps": 450, "total_steps": 1134, "loss": 0.7516, "lr": 5e-06, "epoch": 1.1904761904761905, "percentage": 39.68, "elapsed_time": "0:45:59", "remaining_time": "1:09:54"}
|
47 |
+
{"current_steps": 460, "total_steps": 1134, "loss": 0.7525, "lr": 5e-06, "epoch": 1.216931216931217, "percentage": 40.56, "elapsed_time": "0:46:57", "remaining_time": "1:08:48"}
|
48 |
+
{"current_steps": 470, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.2433862433862433, "percentage": 41.45, "elapsed_time": "0:47:56", "remaining_time": "1:07:43"}
|
49 |
+
{"current_steps": 480, "total_steps": 1134, "loss": 0.7529, "lr": 5e-06, "epoch": 1.2698412698412698, "percentage": 42.33, "elapsed_time": "0:48:55", "remaining_time": "1:06:38"}
|
50 |
+
{"current_steps": 490, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.2962962962962963, "percentage": 43.21, "elapsed_time": "0:49:53", "remaining_time": "1:05:34"}
|
51 |
+
{"current_steps": 500, "total_steps": 1134, "loss": 0.7493, "lr": 5e-06, "epoch": 1.3227513227513228, "percentage": 44.09, "elapsed_time": "0:50:51", "remaining_time": "1:04:29"}
|
52 |
+
{"current_steps": 510, "total_steps": 1134, "loss": 0.7468, "lr": 5e-06, "epoch": 1.3492063492063493, "percentage": 44.97, "elapsed_time": "0:51:50", "remaining_time": "1:03:25"}
|
53 |
+
{"current_steps": 520, "total_steps": 1134, "loss": 0.7498, "lr": 5e-06, "epoch": 1.3756613756613756, "percentage": 45.86, "elapsed_time": "0:52:48", "remaining_time": "1:02:21"}
|
54 |
+
{"current_steps": 530, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.402116402116402, "percentage": 46.74, "elapsed_time": "0:53:46", "remaining_time": "1:01:17"}
|
55 |
+
{"current_steps": 540, "total_steps": 1134, "loss": 0.7491, "lr": 5e-06, "epoch": 1.4285714285714286, "percentage": 47.62, "elapsed_time": "0:54:45", "remaining_time": "1:00:13"}
|
56 |
+
{"current_steps": 550, "total_steps": 1134, "loss": 0.7501, "lr": 5e-06, "epoch": 1.455026455026455, "percentage": 48.5, "elapsed_time": "0:55:43", "remaining_time": "0:59:10"}
|
57 |
+
{"current_steps": 560, "total_steps": 1134, "loss": 0.7523, "lr": 5e-06, "epoch": 1.4814814814814814, "percentage": 49.38, "elapsed_time": "0:56:41", "remaining_time": "0:58:06"}
|
58 |
+
{"current_steps": 570, "total_steps": 1134, "loss": 0.7506, "lr": 5e-06, "epoch": 1.507936507936508, "percentage": 50.26, "elapsed_time": "0:57:40", "remaining_time": "0:57:03"}
|
59 |
+
{"current_steps": 580, "total_steps": 1134, "loss": 0.7498, "lr": 5e-06, "epoch": 1.5343915343915344, "percentage": 51.15, "elapsed_time": "0:58:38", "remaining_time": "0:56:00"}
|
60 |
+
{"current_steps": 590, "total_steps": 1134, "loss": 0.749, "lr": 5e-06, "epoch": 1.560846560846561, "percentage": 52.03, "elapsed_time": "0:59:36", "remaining_time": "0:54:57"}
|
61 |
+
{"current_steps": 600, "total_steps": 1134, "loss": 0.7507, "lr": 5e-06, "epoch": 1.5873015873015874, "percentage": 52.91, "elapsed_time": "1:00:35", "remaining_time": "0:53:55"}
|
62 |
+
{"current_steps": 610, "total_steps": 1134, "loss": 0.751, "lr": 5e-06, "epoch": 1.6137566137566137, "percentage": 53.79, "elapsed_time": "1:01:33", "remaining_time": "0:52:53"}
|
63 |
+
{"current_steps": 620, "total_steps": 1134, "loss": 0.7452, "lr": 5e-06, "epoch": 1.6402116402116402, "percentage": 54.67, "elapsed_time": "1:02:32", "remaining_time": "0:51:50"}
|
64 |
+
{"current_steps": 630, "total_steps": 1134, "loss": 0.747, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "1:03:30", "remaining_time": "0:50:48"}
|
65 |
+
{"current_steps": 640, "total_steps": 1134, "loss": 0.7478, "lr": 5e-06, "epoch": 1.693121693121693, "percentage": 56.44, "elapsed_time": "1:04:28", "remaining_time": "0:49:46"}
|
66 |
+
{"current_steps": 650, "total_steps": 1134, "loss": 0.7493, "lr": 5e-06, "epoch": 1.7195767195767195, "percentage": 57.32, "elapsed_time": "1:05:27", "remaining_time": "0:48:44"}
|
67 |
+
{"current_steps": 660, "total_steps": 1134, "loss": 0.7488, "lr": 5e-06, "epoch": 1.746031746031746, "percentage": 58.2, "elapsed_time": "1:06:25", "remaining_time": "0:47:42"}
|
68 |
+
{"current_steps": 670, "total_steps": 1134, "loss": 0.7484, "lr": 5e-06, "epoch": 1.7724867724867726, "percentage": 59.08, "elapsed_time": "1:07:24", "remaining_time": "0:46:40"}
|
69 |
+
{"current_steps": 680, "total_steps": 1134, "loss": 0.7425, "lr": 5e-06, "epoch": 1.798941798941799, "percentage": 59.96, "elapsed_time": "1:08:22", "remaining_time": "0:45:39"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7288
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efcd1397e29d9e9e463cb3c83bc95dc1767e9d528cbac772697e88580f516784
|
3 |
size 7288
|