sedrickkeh
commited on
Commit
•
e3fb566
1
Parent(s):
c4a3fb0
Training in progress, epoch 2
Browse files
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4903351912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dd027e7cf24d6934df10a3e800805afc06891943591db9198fc43dfd0f6a1bc
|
3 |
size 4903351912
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4947570872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47e126ae3930cf0e62a5bb8c6d60d3a5ca52aac5b363370649966a82f4876280
|
3 |
size 4947570872
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4962221464
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8604f8eb4efee426dd1defd0fd843b2c395d60e944a8a3cd33da8a20f76694c
|
3 |
size 4962221464
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3670322200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a7ed32b0d4aab9f1d93eaaaa2ff83e3736222a7171a35a802f15a8bcb48cf90
|
3 |
size 3670322200
|
trainer_log.jsonl
CHANGED
@@ -134,3 +134,69 @@
|
|
134 |
{"current_steps": 1330, "total_steps": 2004, "loss": 0.5488, "learning_rate": 5e-06, "epoch": 1.990645463049579, "percentage": 66.37, "elapsed_time": "16:34:42", "remaining_time": "8:24:04"}
|
135 |
{"current_steps": 1336, "total_steps": 2004, "eval_loss": 0.6217324733734131, "epoch": 1.999625818521983, "percentage": 66.67, "elapsed_time": "16:47:54", "remaining_time": "8:23:57"}
|
136 |
{"current_steps": 1340, "total_steps": 2004, "loss": 0.5617, "learning_rate": 5e-06, "epoch": 2.0056127221702527, "percentage": 66.87, "elapsed_time": "16:52:06", "remaining_time": "8:21:31"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
{"current_steps": 1330, "total_steps": 2004, "loss": 0.5488, "learning_rate": 5e-06, "epoch": 1.990645463049579, "percentage": 66.37, "elapsed_time": "16:34:42", "remaining_time": "8:24:04"}
|
135 |
{"current_steps": 1336, "total_steps": 2004, "eval_loss": 0.6217324733734131, "epoch": 1.999625818521983, "percentage": 66.67, "elapsed_time": "16:47:54", "remaining_time": "8:23:57"}
|
136 |
{"current_steps": 1340, "total_steps": 2004, "loss": 0.5617, "learning_rate": 5e-06, "epoch": 2.0056127221702527, "percentage": 66.87, "elapsed_time": "16:52:06", "remaining_time": "8:21:31"}
|
137 |
+
{"current_steps": 1350, "total_steps": 2004, "loss": 0.4541, "learning_rate": 5e-06, "epoch": 2.020579981290926, "percentage": 67.37, "elapsed_time": "16:59:30", "remaining_time": "8:13:53"}
|
138 |
+
{"current_steps": 1360, "total_steps": 2004, "loss": 0.443, "learning_rate": 5e-06, "epoch": 2.0355472404115997, "percentage": 67.86, "elapsed_time": "17:06:55", "remaining_time": "8:06:16"}
|
139 |
+
{"current_steps": 1370, "total_steps": 2004, "loss": 0.4414, "learning_rate": 5e-06, "epoch": 2.050514499532273, "percentage": 68.36, "elapsed_time": "17:14:20", "remaining_time": "7:58:39"}
|
140 |
+
{"current_steps": 1380, "total_steps": 2004, "loss": 0.4524, "learning_rate": 5e-06, "epoch": 2.065481758652947, "percentage": 68.86, "elapsed_time": "17:21:44", "remaining_time": "7:51:03"}
|
141 |
+
{"current_steps": 1390, "total_steps": 2004, "loss": 0.4531, "learning_rate": 5e-06, "epoch": 2.08044901777362, "percentage": 69.36, "elapsed_time": "17:29:09", "remaining_time": "7:43:26"}
|
142 |
+
{"current_steps": 1400, "total_steps": 2004, "loss": 0.4517, "learning_rate": 5e-06, "epoch": 2.095416276894294, "percentage": 69.86, "elapsed_time": "17:36:33", "remaining_time": "7:35:49"}
|
143 |
+
{"current_steps": 1410, "total_steps": 2004, "loss": 0.4551, "learning_rate": 5e-06, "epoch": 2.110383536014967, "percentage": 70.36, "elapsed_time": "17:43:58", "remaining_time": "7:28:13"}
|
144 |
+
{"current_steps": 1420, "total_steps": 2004, "loss": 0.4516, "learning_rate": 5e-06, "epoch": 2.125350795135641, "percentage": 70.86, "elapsed_time": "17:51:23", "remaining_time": "7:20:37"}
|
145 |
+
{"current_steps": 1430, "total_steps": 2004, "loss": 0.4585, "learning_rate": 5e-06, "epoch": 2.1403180542563143, "percentage": 71.36, "elapsed_time": "17:58:47", "remaining_time": "7:13:01"}
|
146 |
+
{"current_steps": 1440, "total_steps": 2004, "loss": 0.4559, "learning_rate": 5e-06, "epoch": 2.155285313376988, "percentage": 71.86, "elapsed_time": "18:06:12", "remaining_time": "7:05:25"}
|
147 |
+
{"current_steps": 1450, "total_steps": 2004, "loss": 0.4568, "learning_rate": 5e-06, "epoch": 2.1702525724976613, "percentage": 72.36, "elapsed_time": "18:13:37", "remaining_time": "6:57:50"}
|
148 |
+
{"current_steps": 1460, "total_steps": 2004, "loss": 0.4586, "learning_rate": 5e-06, "epoch": 2.185219831618335, "percentage": 72.85, "elapsed_time": "18:21:01", "remaining_time": "6:50:14"}
|
149 |
+
{"current_steps": 1470, "total_steps": 2004, "loss": 0.457, "learning_rate": 5e-06, "epoch": 2.2001870907390084, "percentage": 73.35, "elapsed_time": "18:28:26", "remaining_time": "6:42:39"}
|
150 |
+
{"current_steps": 1480, "total_steps": 2004, "loss": 0.4601, "learning_rate": 5e-06, "epoch": 2.2151543498596817, "percentage": 73.85, "elapsed_time": "18:35:50", "remaining_time": "6:35:04"}
|
151 |
+
{"current_steps": 1490, "total_steps": 2004, "loss": 0.4642, "learning_rate": 5e-06, "epoch": 2.2301216089803555, "percentage": 74.35, "elapsed_time": "18:43:14", "remaining_time": "6:27:29"}
|
152 |
+
{"current_steps": 1500, "total_steps": 2004, "loss": 0.4581, "learning_rate": 5e-06, "epoch": 2.245088868101029, "percentage": 74.85, "elapsed_time": "18:50:39", "remaining_time": "6:19:54"}
|
153 |
+
{"current_steps": 1510, "total_steps": 2004, "loss": 0.4603, "learning_rate": 5e-06, "epoch": 2.2600561272217026, "percentage": 75.35, "elapsed_time": "18:58:03", "remaining_time": "6:12:19"}
|
154 |
+
{"current_steps": 1520, "total_steps": 2004, "loss": 0.453, "learning_rate": 5e-06, "epoch": 2.275023386342376, "percentage": 75.85, "elapsed_time": "19:05:28", "remaining_time": "6:04:44"}
|
155 |
+
{"current_steps": 1530, "total_steps": 2004, "loss": 0.4579, "learning_rate": 5e-06, "epoch": 2.2899906454630496, "percentage": 76.35, "elapsed_time": "19:12:53", "remaining_time": "5:57:10"}
|
156 |
+
{"current_steps": 1540, "total_steps": 2004, "loss": 0.4645, "learning_rate": 5e-06, "epoch": 2.304957904583723, "percentage": 76.85, "elapsed_time": "19:20:17", "remaining_time": "5:49:35"}
|
157 |
+
{"current_steps": 1550, "total_steps": 2004, "loss": 0.4581, "learning_rate": 5e-06, "epoch": 2.3199251637043967, "percentage": 77.35, "elapsed_time": "19:27:42", "remaining_time": "5:42:01"}
|
158 |
+
{"current_steps": 1560, "total_steps": 2004, "loss": 0.4599, "learning_rate": 5e-06, "epoch": 2.33489242282507, "percentage": 77.84, "elapsed_time": "19:35:06", "remaining_time": "5:34:27"}
|
159 |
+
{"current_steps": 1570, "total_steps": 2004, "loss": 0.4638, "learning_rate": 5e-06, "epoch": 2.349859681945744, "percentage": 78.34, "elapsed_time": "19:42:30", "remaining_time": "5:26:53"}
|
160 |
+
{"current_steps": 1580, "total_steps": 2004, "loss": 0.4585, "learning_rate": 5e-06, "epoch": 2.364826941066417, "percentage": 78.84, "elapsed_time": "19:49:55", "remaining_time": "5:19:19"}
|
161 |
+
{"current_steps": 1590, "total_steps": 2004, "loss": 0.4659, "learning_rate": 5e-06, "epoch": 2.379794200187091, "percentage": 79.34, "elapsed_time": "19:57:19", "remaining_time": "5:11:45"}
|
162 |
+
{"current_steps": 1600, "total_steps": 2004, "loss": 0.4603, "learning_rate": 5e-06, "epoch": 2.394761459307764, "percentage": 79.84, "elapsed_time": "20:04:44", "remaining_time": "5:04:11"}
|
163 |
+
{"current_steps": 1610, "total_steps": 2004, "loss": 0.4657, "learning_rate": 5e-06, "epoch": 2.409728718428438, "percentage": 80.34, "elapsed_time": "20:12:08", "remaining_time": "4:56:38"}
|
164 |
+
{"current_steps": 1620, "total_steps": 2004, "loss": 0.4739, "learning_rate": 5e-06, "epoch": 2.4246959775491113, "percentage": 80.84, "elapsed_time": "20:19:33", "remaining_time": "4:49:04"}
|
165 |
+
{"current_steps": 1630, "total_steps": 2004, "loss": 0.4682, "learning_rate": 5e-06, "epoch": 2.439663236669785, "percentage": 81.34, "elapsed_time": "20:26:57", "remaining_time": "4:41:31"}
|
166 |
+
{"current_steps": 1640, "total_steps": 2004, "loss": 0.4675, "learning_rate": 5e-06, "epoch": 2.4546304957904583, "percentage": 81.84, "elapsed_time": "20:34:21", "remaining_time": "4:33:58"}
|
167 |
+
{"current_steps": 1650, "total_steps": 2004, "loss": 0.4688, "learning_rate": 5e-06, "epoch": 2.469597754911132, "percentage": 82.34, "elapsed_time": "20:41:46", "remaining_time": "4:26:25"}
|
168 |
+
{"current_steps": 1660, "total_steps": 2004, "loss": 0.4673, "learning_rate": 5e-06, "epoch": 2.4845650140318054, "percentage": 82.83, "elapsed_time": "20:49:11", "remaining_time": "4:18:52"}
|
169 |
+
{"current_steps": 1670, "total_steps": 2004, "loss": 0.4647, "learning_rate": 5e-06, "epoch": 2.4995322731524787, "percentage": 83.33, "elapsed_time": "20:56:35", "remaining_time": "4:11:19"}
|
170 |
+
{"current_steps": 1680, "total_steps": 2004, "loss": 0.4663, "learning_rate": 5e-06, "epoch": 2.5144995322731525, "percentage": 83.83, "elapsed_time": "21:04:00", "remaining_time": "4:03:46"}
|
171 |
+
{"current_steps": 1690, "total_steps": 2004, "loss": 0.4674, "learning_rate": 5e-06, "epoch": 2.5294667913938262, "percentage": 84.33, "elapsed_time": "21:11:25", "remaining_time": "3:56:13"}
|
172 |
+
{"current_steps": 1700, "total_steps": 2004, "loss": 0.4702, "learning_rate": 5e-06, "epoch": 2.5444340505144996, "percentage": 84.83, "elapsed_time": "21:18:49", "remaining_time": "3:48:41"}
|
173 |
+
{"current_steps": 1710, "total_steps": 2004, "loss": 0.4669, "learning_rate": 5e-06, "epoch": 2.559401309635173, "percentage": 85.33, "elapsed_time": "21:26:13", "remaining_time": "3:41:08"}
|
174 |
+
{"current_steps": 1720, "total_steps": 2004, "loss": 0.467, "learning_rate": 5e-06, "epoch": 2.5743685687558466, "percentage": 85.83, "elapsed_time": "21:33:37", "remaining_time": "3:33:35"}
|
175 |
+
{"current_steps": 1730, "total_steps": 2004, "loss": 0.469, "learning_rate": 5e-06, "epoch": 2.58933582787652, "percentage": 86.33, "elapsed_time": "21:41:01", "remaining_time": "3:26:03"}
|
176 |
+
{"current_steps": 1740, "total_steps": 2004, "loss": 0.4617, "learning_rate": 5e-06, "epoch": 2.6043030869971937, "percentage": 86.83, "elapsed_time": "21:48:26", "remaining_time": "3:18:31"}
|
177 |
+
{"current_steps": 1750, "total_steps": 2004, "loss": 0.4657, "learning_rate": 5e-06, "epoch": 2.619270346117867, "percentage": 87.33, "elapsed_time": "21:55:50", "remaining_time": "3:10:59"}
|
178 |
+
{"current_steps": 1760, "total_steps": 2004, "loss": 0.4614, "learning_rate": 5e-06, "epoch": 2.634237605238541, "percentage": 87.82, "elapsed_time": "22:03:14", "remaining_time": "3:03:26"}
|
179 |
+
{"current_steps": 1770, "total_steps": 2004, "loss": 0.4658, "learning_rate": 5e-06, "epoch": 2.649204864359214, "percentage": 88.32, "elapsed_time": "22:10:38", "remaining_time": "2:55:54"}
|
180 |
+
{"current_steps": 1780, "total_steps": 2004, "loss": 0.4732, "learning_rate": 5e-06, "epoch": 2.664172123479888, "percentage": 88.82, "elapsed_time": "22:18:02", "remaining_time": "2:48:22"}
|
181 |
+
{"current_steps": 1790, "total_steps": 2004, "loss": 0.4716, "learning_rate": 5e-06, "epoch": 2.679139382600561, "percentage": 89.32, "elapsed_time": "22:25:27", "remaining_time": "2:40:51"}
|
182 |
+
{"current_steps": 1800, "total_steps": 2004, "loss": 0.473, "learning_rate": 5e-06, "epoch": 2.694106641721235, "percentage": 89.82, "elapsed_time": "22:32:51", "remaining_time": "2:33:19"}
|
183 |
+
{"current_steps": 1810, "total_steps": 2004, "loss": 0.4676, "learning_rate": 5e-06, "epoch": 2.7090739008419082, "percentage": 90.32, "elapsed_time": "22:40:15", "remaining_time": "2:25:47"}
|
184 |
+
{"current_steps": 1820, "total_steps": 2004, "loss": 0.465, "learning_rate": 5e-06, "epoch": 2.724041159962582, "percentage": 90.82, "elapsed_time": "22:47:39", "remaining_time": "2:18:16"}
|
185 |
+
{"current_steps": 1830, "total_steps": 2004, "loss": 0.4662, "learning_rate": 5e-06, "epoch": 2.7390084190832553, "percentage": 91.32, "elapsed_time": "22:55:04", "remaining_time": "2:10:44"}
|
186 |
+
{"current_steps": 1840, "total_steps": 2004, "loss": 0.4688, "learning_rate": 5e-06, "epoch": 2.753975678203929, "percentage": 91.82, "elapsed_time": "23:02:28", "remaining_time": "2:03:13"}
|
187 |
+
{"current_steps": 1850, "total_steps": 2004, "loss": 0.4769, "learning_rate": 5e-06, "epoch": 2.7689429373246024, "percentage": 92.32, "elapsed_time": "23:09:52", "remaining_time": "1:55:41"}
|
188 |
+
{"current_steps": 1860, "total_steps": 2004, "loss": 0.47, "learning_rate": 5e-06, "epoch": 2.7839101964452757, "percentage": 92.81, "elapsed_time": "23:17:16", "remaining_time": "1:48:10"}
|
189 |
+
{"current_steps": 1870, "total_steps": 2004, "loss": 0.4776, "learning_rate": 5e-06, "epoch": 2.7988774555659495, "percentage": 93.31, "elapsed_time": "23:24:41", "remaining_time": "1:40:39"}
|
190 |
+
{"current_steps": 1880, "total_steps": 2004, "loss": 0.4649, "learning_rate": 5e-06, "epoch": 2.8138447146866232, "percentage": 93.81, "elapsed_time": "23:32:05", "remaining_time": "1:33:08"}
|
191 |
+
{"current_steps": 1890, "total_steps": 2004, "loss": 0.4706, "learning_rate": 5e-06, "epoch": 2.8288119738072965, "percentage": 94.31, "elapsed_time": "23:39:29", "remaining_time": "1:25:37"}
|
192 |
+
{"current_steps": 1900, "total_steps": 2004, "loss": 0.4715, "learning_rate": 5e-06, "epoch": 2.84377923292797, "percentage": 94.81, "elapsed_time": "23:46:53", "remaining_time": "1:18:06"}
|
193 |
+
{"current_steps": 1910, "total_steps": 2004, "loss": 0.474, "learning_rate": 5e-06, "epoch": 2.8587464920486436, "percentage": 95.31, "elapsed_time": "23:54:18", "remaining_time": "1:10:35"}
|
194 |
+
{"current_steps": 1920, "total_steps": 2004, "loss": 0.4709, "learning_rate": 5e-06, "epoch": 2.8737137511693174, "percentage": 95.81, "elapsed_time": "1 day, 0:01:42", "remaining_time": "1:03:04"}
|
195 |
+
{"current_steps": 1930, "total_steps": 2004, "loss": 0.473, "learning_rate": 5e-06, "epoch": 2.8886810102899907, "percentage": 96.31, "elapsed_time": "1 day, 0:09:07", "remaining_time": "0:55:33"}
|
196 |
+
{"current_steps": 1940, "total_steps": 2004, "loss": 0.4663, "learning_rate": 5e-06, "epoch": 2.903648269410664, "percentage": 96.81, "elapsed_time": "1 day, 0:16:31", "remaining_time": "0:48:03"}
|
197 |
+
{"current_steps": 1950, "total_steps": 2004, "loss": 0.4767, "learning_rate": 5e-06, "epoch": 2.9186155285313378, "percentage": 97.31, "elapsed_time": "1 day, 0:23:55", "remaining_time": "0:40:32"}
|
198 |
+
{"current_steps": 1960, "total_steps": 2004, "loss": 0.4719, "learning_rate": 5e-06, "epoch": 2.933582787652011, "percentage": 97.8, "elapsed_time": "1 day, 0:31:20", "remaining_time": "0:33:01"}
|
199 |
+
{"current_steps": 1970, "total_steps": 2004, "loss": 0.4748, "learning_rate": 5e-06, "epoch": 2.948550046772685, "percentage": 98.3, "elapsed_time": "1 day, 0:38:44", "remaining_time": "0:25:31"}
|
200 |
+
{"current_steps": 1980, "total_steps": 2004, "loss": 0.4777, "learning_rate": 5e-06, "epoch": 2.963517305893358, "percentage": 98.8, "elapsed_time": "1 day, 0:46:09", "remaining_time": "0:18:00"}
|
201 |
+
{"current_steps": 1990, "total_steps": 2004, "loss": 0.4719, "learning_rate": 5e-06, "epoch": 2.978484565014032, "percentage": 99.3, "elapsed_time": "1 day, 0:53:33", "remaining_time": "0:10:30"}
|
202 |
+
{"current_steps": 2000, "total_steps": 2004, "loss": 0.4829, "learning_rate": 5e-06, "epoch": 2.9934518241347052, "percentage": 99.8, "elapsed_time": "1 day, 1:00:57", "remaining_time": "0:03:00"}
|