sedrickkeh commited on
Commit
971caff
·
verified ·
1 Parent(s): a73e92a

Training in progress, epoch 2

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:198fc802f7064f45b028342e995f8a53c4d4ceafa8838170c49a060913c2e58a
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945e3862c0aac9359d02e1e47383078cc9f215a44babd5da33ca1f0b6851fbeb
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8904ff7efc3ee3f0eb9bed9242bc17133767a9fd2103686031990056bfdc3be
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7206bd9ff3596f20ea3544d43e9e4b46968af4ca2020582e5f5023ac92d4563
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c289af0b05a06b19f38fbf578d61cae9d25f85710452a5c2bfd37f4e813b789
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6985eccd3c77bc862bfb4d6c3bb7bd5a805bbd718e0cf8756c91bb9998d8945a
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:611551b0177899e48b9d6c69ed03d58b6072d835a7c6b520f5937d7e93e1f715
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0454e484ee19214af7b868f628439bb0cdc02d3015f556f2f4ede52f770d2c1c
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -168,3 +168,85 @@
168
  {"current_steps": 1670, "total_steps": 2505, "loss": 0.6629, "lr": 5e-06, "epoch": 1.9988031119090364, "percentage": 66.67, "elapsed_time": "3:56:53", "remaining_time": "1:58:26"}
169
  {"current_steps": 1671, "total_steps": 2505, "eval_loss": 0.7005711197853088, "epoch": 2.0, "percentage": 66.71, "elapsed_time": "3:59:26", "remaining_time": "1:59:30"}
170
  {"current_steps": 1680, "total_steps": 2505, "loss": 0.6508, "lr": 5e-06, "epoch": 2.0107719928186714, "percentage": 67.07, "elapsed_time": "4:01:39", "remaining_time": "1:58:40"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  {"current_steps": 1670, "total_steps": 2505, "loss": 0.6629, "lr": 5e-06, "epoch": 1.9988031119090364, "percentage": 66.67, "elapsed_time": "3:56:53", "remaining_time": "1:58:26"}
169
  {"current_steps": 1671, "total_steps": 2505, "eval_loss": 0.7005711197853088, "epoch": 2.0, "percentage": 66.71, "elapsed_time": "3:59:26", "remaining_time": "1:59:30"}
170
  {"current_steps": 1680, "total_steps": 2505, "loss": 0.6508, "lr": 5e-06, "epoch": 2.0107719928186714, "percentage": 67.07, "elapsed_time": "4:01:39", "remaining_time": "1:58:40"}
171
+ {"current_steps": 1690, "total_steps": 2505, "loss": 0.6156, "lr": 5e-06, "epoch": 2.0227408737283064, "percentage": 67.47, "elapsed_time": "4:03:02", "remaining_time": "1:57:12"}
172
+ {"current_steps": 1700, "total_steps": 2505, "loss": 0.6119, "lr": 5e-06, "epoch": 2.0347097546379413, "percentage": 67.86, "elapsed_time": "4:04:26", "remaining_time": "1:55:45"}
173
+ {"current_steps": 1710, "total_steps": 2505, "loss": 0.6121, "lr": 5e-06, "epoch": 2.0466786355475763, "percentage": 68.26, "elapsed_time": "4:05:51", "remaining_time": "1:54:17"}
174
+ {"current_steps": 1720, "total_steps": 2505, "loss": 0.614, "lr": 5e-06, "epoch": 2.0586475164572113, "percentage": 68.66, "elapsed_time": "4:07:14", "remaining_time": "1:52:50"}
175
+ {"current_steps": 1730, "total_steps": 2505, "loss": 0.6179, "lr": 5e-06, "epoch": 2.0706163973668463, "percentage": 69.06, "elapsed_time": "4:08:38", "remaining_time": "1:51:23"}
176
+ {"current_steps": 1740, "total_steps": 2505, "loss": 0.6166, "lr": 5e-06, "epoch": 2.0825852782764813, "percentage": 69.46, "elapsed_time": "4:10:01", "remaining_time": "1:49:55"}
177
+ {"current_steps": 1750, "total_steps": 2505, "loss": 0.6181, "lr": 5e-06, "epoch": 2.0945541591861163, "percentage": 69.86, "elapsed_time": "4:11:25", "remaining_time": "1:48:28"}
178
+ {"current_steps": 1760, "total_steps": 2505, "loss": 0.6169, "lr": 5e-06, "epoch": 2.106523040095751, "percentage": 70.26, "elapsed_time": "4:12:48", "remaining_time": "1:47:00"}
179
+ {"current_steps": 1770, "total_steps": 2505, "loss": 0.621, "lr": 5e-06, "epoch": 2.118491921005386, "percentage": 70.66, "elapsed_time": "4:14:12", "remaining_time": "1:45:33"}
180
+ {"current_steps": 1780, "total_steps": 2505, "loss": 0.6168, "lr": 5e-06, "epoch": 2.130460801915021, "percentage": 71.06, "elapsed_time": "4:15:36", "remaining_time": "1:44:06"}
181
+ {"current_steps": 1790, "total_steps": 2505, "loss": 0.6172, "lr": 5e-06, "epoch": 2.142429682824656, "percentage": 71.46, "elapsed_time": "4:16:59", "remaining_time": "1:42:39"}
182
+ {"current_steps": 1800, "total_steps": 2505, "loss": 0.6165, "lr": 5e-06, "epoch": 2.154398563734291, "percentage": 71.86, "elapsed_time": "4:18:23", "remaining_time": "1:41:12"}
183
+ {"current_steps": 1810, "total_steps": 2505, "loss": 0.6154, "lr": 5e-06, "epoch": 2.166367444643926, "percentage": 72.26, "elapsed_time": "4:19:46", "remaining_time": "1:39:45"}
184
+ {"current_steps": 1820, "total_steps": 2505, "loss": 0.6136, "lr": 5e-06, "epoch": 2.178336325553561, "percentage": 72.65, "elapsed_time": "4:21:10", "remaining_time": "1:38:18"}
185
+ {"current_steps": 1830, "total_steps": 2505, "loss": 0.6174, "lr": 5e-06, "epoch": 2.190305206463196, "percentage": 73.05, "elapsed_time": "4:22:34", "remaining_time": "1:36:50"}
186
+ {"current_steps": 1840, "total_steps": 2505, "loss": 0.6193, "lr": 5e-06, "epoch": 2.202274087372831, "percentage": 73.45, "elapsed_time": "4:23:57", "remaining_time": "1:35:23"}
187
+ {"current_steps": 1850, "total_steps": 2505, "loss": 0.6151, "lr": 5e-06, "epoch": 2.2142429682824654, "percentage": 73.85, "elapsed_time": "4:25:20", "remaining_time": "1:33:56"}
188
+ {"current_steps": 1860, "total_steps": 2505, "loss": 0.62, "lr": 5e-06, "epoch": 2.2262118491921004, "percentage": 74.25, "elapsed_time": "4:26:44", "remaining_time": "1:32:30"}
189
+ {"current_steps": 1870, "total_steps": 2505, "loss": 0.6174, "lr": 5e-06, "epoch": 2.2381807301017353, "percentage": 74.65, "elapsed_time": "4:28:09", "remaining_time": "1:31:03"}
190
+ {"current_steps": 1880, "total_steps": 2505, "loss": 0.6206, "lr": 5e-06, "epoch": 2.2501496110113703, "percentage": 75.05, "elapsed_time": "4:29:33", "remaining_time": "1:29:36"}
191
+ {"current_steps": 1890, "total_steps": 2505, "loss": 0.6174, "lr": 5e-06, "epoch": 2.2621184919210053, "percentage": 75.45, "elapsed_time": "4:30:56", "remaining_time": "1:28:09"}
192
+ {"current_steps": 1900, "total_steps": 2505, "loss": 0.6218, "lr": 5e-06, "epoch": 2.2740873728306403, "percentage": 75.85, "elapsed_time": "4:32:21", "remaining_time": "1:26:43"}
193
+ {"current_steps": 1910, "total_steps": 2505, "loss": 0.6212, "lr": 5e-06, "epoch": 2.2860562537402753, "percentage": 76.25, "elapsed_time": "4:33:45", "remaining_time": "1:25:16"}
194
+ {"current_steps": 1920, "total_steps": 2505, "loss": 0.6165, "lr": 5e-06, "epoch": 2.2980251346499103, "percentage": 76.65, "elapsed_time": "4:35:08", "remaining_time": "1:23:50"}
195
+ {"current_steps": 1930, "total_steps": 2505, "loss": 0.6213, "lr": 5e-06, "epoch": 2.3099940155595453, "percentage": 77.05, "elapsed_time": "4:36:32", "remaining_time": "1:22:23"}
196
+ {"current_steps": 1940, "total_steps": 2505, "loss": 0.6199, "lr": 5e-06, "epoch": 2.3219628964691803, "percentage": 77.45, "elapsed_time": "4:37:56", "remaining_time": "1:20:56"}
197
+ {"current_steps": 1950, "total_steps": 2505, "loss": 0.6197, "lr": 5e-06, "epoch": 2.3339317773788153, "percentage": 77.84, "elapsed_time": "4:39:19", "remaining_time": "1:19:30"}
198
+ {"current_steps": 1960, "total_steps": 2505, "loss": 0.6248, "lr": 5e-06, "epoch": 2.34590065828845, "percentage": 78.24, "elapsed_time": "4:40:43", "remaining_time": "1:18:03"}
199
+ {"current_steps": 1970, "total_steps": 2505, "loss": 0.6198, "lr": 5e-06, "epoch": 2.357869539198085, "percentage": 78.64, "elapsed_time": "4:42:06", "remaining_time": "1:16:36"}
200
+ {"current_steps": 1980, "total_steps": 2505, "loss": 0.6211, "lr": 5e-06, "epoch": 2.36983842010772, "percentage": 79.04, "elapsed_time": "4:43:30", "remaining_time": "1:15:10"}
201
+ {"current_steps": 1990, "total_steps": 2505, "loss": 0.6181, "lr": 5e-06, "epoch": 2.381807301017355, "percentage": 79.44, "elapsed_time": "4:44:54", "remaining_time": "1:13:43"}
202
+ {"current_steps": 2000, "total_steps": 2505, "loss": 0.6223, "lr": 5e-06, "epoch": 2.39377618192699, "percentage": 79.84, "elapsed_time": "4:46:17", "remaining_time": "1:12:17"}
203
+ {"current_steps": 2010, "total_steps": 2505, "loss": 0.6209, "lr": 5e-06, "epoch": 2.405745062836625, "percentage": 80.24, "elapsed_time": "4:47:40", "remaining_time": "1:10:50"}
204
+ {"current_steps": 2020, "total_steps": 2505, "loss": 0.6203, "lr": 5e-06, "epoch": 2.41771394374626, "percentage": 80.64, "elapsed_time": "4:49:03", "remaining_time": "1:09:24"}
205
+ {"current_steps": 2030, "total_steps": 2505, "loss": 0.6154, "lr": 5e-06, "epoch": 2.429682824655895, "percentage": 81.04, "elapsed_time": "4:50:27", "remaining_time": "1:07:57"}
206
+ {"current_steps": 2040, "total_steps": 2505, "loss": 0.6221, "lr": 5e-06, "epoch": 2.44165170556553, "percentage": 81.44, "elapsed_time": "4:51:51", "remaining_time": "1:06:31"}
207
+ {"current_steps": 2050, "total_steps": 2505, "loss": 0.6213, "lr": 5e-06, "epoch": 2.4536205864751643, "percentage": 81.84, "elapsed_time": "4:53:14", "remaining_time": "1:05:05"}
208
+ {"current_steps": 2060, "total_steps": 2505, "loss": 0.6229, "lr": 5e-06, "epoch": 2.4655894673847993, "percentage": 82.24, "elapsed_time": "4:54:38", "remaining_time": "1:03:38"}
209
+ {"current_steps": 2070, "total_steps": 2505, "loss": 0.62, "lr": 5e-06, "epoch": 2.4775583482944343, "percentage": 82.63, "elapsed_time": "4:56:01", "remaining_time": "1:02:12"}
210
+ {"current_steps": 2080, "total_steps": 2505, "loss": 0.6256, "lr": 5e-06, "epoch": 2.4895272292040693, "percentage": 83.03, "elapsed_time": "4:57:25", "remaining_time": "1:00:46"}
211
+ {"current_steps": 2090, "total_steps": 2505, "loss": 0.6233, "lr": 5e-06, "epoch": 2.5014961101137043, "percentage": 83.43, "elapsed_time": "4:58:49", "remaining_time": "0:59:20"}
212
+ {"current_steps": 2100, "total_steps": 2505, "loss": 0.6193, "lr": 5e-06, "epoch": 2.5134649910233393, "percentage": 83.83, "elapsed_time": "5:00:12", "remaining_time": "0:57:53"}
213
+ {"current_steps": 2110, "total_steps": 2505, "loss": 0.6187, "lr": 5e-06, "epoch": 2.5254338719329743, "percentage": 84.23, "elapsed_time": "5:01:36", "remaining_time": "0:56:27"}
214
+ {"current_steps": 2120, "total_steps": 2505, "loss": 0.624, "lr": 5e-06, "epoch": 2.5374027528426093, "percentage": 84.63, "elapsed_time": "5:02:59", "remaining_time": "0:55:01"}
215
+ {"current_steps": 2130, "total_steps": 2505, "loss": 0.6171, "lr": 5e-06, "epoch": 2.5493716337522443, "percentage": 85.03, "elapsed_time": "5:04:23", "remaining_time": "0:53:35"}
216
+ {"current_steps": 2140, "total_steps": 2505, "loss": 0.6217, "lr": 5e-06, "epoch": 2.561340514661879, "percentage": 85.43, "elapsed_time": "5:05:46", "remaining_time": "0:52:09"}
217
+ {"current_steps": 2150, "total_steps": 2505, "loss": 0.6226, "lr": 5e-06, "epoch": 2.5733093955715143, "percentage": 85.83, "elapsed_time": "5:07:10", "remaining_time": "0:50:43"}
218
+ {"current_steps": 2160, "total_steps": 2505, "loss": 0.6271, "lr": 5e-06, "epoch": 2.585278276481149, "percentage": 86.23, "elapsed_time": "5:08:33", "remaining_time": "0:49:17"}
219
+ {"current_steps": 2170, "total_steps": 2505, "loss": 0.6224, "lr": 5e-06, "epoch": 2.597247157390784, "percentage": 86.63, "elapsed_time": "5:09:57", "remaining_time": "0:47:50"}
220
+ {"current_steps": 2180, "total_steps": 2505, "loss": 0.6212, "lr": 5e-06, "epoch": 2.609216038300419, "percentage": 87.03, "elapsed_time": "5:11:20", "remaining_time": "0:46:25"}
221
+ {"current_steps": 2190, "total_steps": 2505, "loss": 0.6235, "lr": 5e-06, "epoch": 2.621184919210054, "percentage": 87.43, "elapsed_time": "5:12:44", "remaining_time": "0:44:59"}
222
+ {"current_steps": 2200, "total_steps": 2505, "loss": 0.6237, "lr": 5e-06, "epoch": 2.633153800119689, "percentage": 87.82, "elapsed_time": "5:14:08", "remaining_time": "0:43:33"}
223
+ {"current_steps": 2210, "total_steps": 2505, "loss": 0.6269, "lr": 5e-06, "epoch": 2.645122681029324, "percentage": 88.22, "elapsed_time": "5:15:31", "remaining_time": "0:42:07"}
224
+ {"current_steps": 2220, "total_steps": 2505, "loss": 0.6213, "lr": 5e-06, "epoch": 2.657091561938959, "percentage": 88.62, "elapsed_time": "5:16:55", "remaining_time": "0:40:41"}
225
+ {"current_steps": 2230, "total_steps": 2505, "loss": 0.6261, "lr": 5e-06, "epoch": 2.669060442848594, "percentage": 89.02, "elapsed_time": "5:18:19", "remaining_time": "0:39:15"}
226
+ {"current_steps": 2240, "total_steps": 2505, "loss": 0.6238, "lr": 5e-06, "epoch": 2.6810293237582288, "percentage": 89.42, "elapsed_time": "5:19:42", "remaining_time": "0:37:49"}
227
+ {"current_steps": 2250, "total_steps": 2505, "loss": 0.6224, "lr": 5e-06, "epoch": 2.6929982046678633, "percentage": 89.82, "elapsed_time": "5:21:06", "remaining_time": "0:36:23"}
228
+ {"current_steps": 2260, "total_steps": 2505, "loss": 0.6245, "lr": 5e-06, "epoch": 2.7049670855774988, "percentage": 90.22, "elapsed_time": "5:22:29", "remaining_time": "0:34:57"}
229
+ {"current_steps": 2270, "total_steps": 2505, "loss": 0.626, "lr": 5e-06, "epoch": 2.7169359664871333, "percentage": 90.62, "elapsed_time": "5:23:53", "remaining_time": "0:33:31"}
230
+ {"current_steps": 2280, "total_steps": 2505, "loss": 0.6213, "lr": 5e-06, "epoch": 2.7289048473967683, "percentage": 91.02, "elapsed_time": "5:25:16", "remaining_time": "0:32:06"}
231
+ {"current_steps": 2290, "total_steps": 2505, "loss": 0.6257, "lr": 5e-06, "epoch": 2.7408737283064033, "percentage": 91.42, "elapsed_time": "5:26:40", "remaining_time": "0:30:40"}
232
+ {"current_steps": 2300, "total_steps": 2505, "loss": 0.6209, "lr": 5e-06, "epoch": 2.7528426092160383, "percentage": 91.82, "elapsed_time": "5:28:03", "remaining_time": "0:29:14"}
233
+ {"current_steps": 2310, "total_steps": 2505, "loss": 0.627, "lr": 5e-06, "epoch": 2.7648114901256733, "percentage": 92.22, "elapsed_time": "5:29:27", "remaining_time": "0:27:48"}
234
+ {"current_steps": 2320, "total_steps": 2505, "loss": 0.626, "lr": 5e-06, "epoch": 2.7767803710353083, "percentage": 92.61, "elapsed_time": "5:30:51", "remaining_time": "0:26:22"}
235
+ {"current_steps": 2330, "total_steps": 2505, "loss": 0.6284, "lr": 5e-06, "epoch": 2.7887492519449433, "percentage": 93.01, "elapsed_time": "5:32:14", "remaining_time": "0:24:57"}
236
+ {"current_steps": 2340, "total_steps": 2505, "loss": 0.6277, "lr": 5e-06, "epoch": 2.800718132854578, "percentage": 93.41, "elapsed_time": "5:33:37", "remaining_time": "0:23:31"}
237
+ {"current_steps": 2350, "total_steps": 2505, "loss": 0.6214, "lr": 5e-06, "epoch": 2.8126870137642133, "percentage": 93.81, "elapsed_time": "5:35:01", "remaining_time": "0:22:05"}
238
+ {"current_steps": 2360, "total_steps": 2505, "loss": 0.6176, "lr": 5e-06, "epoch": 2.824655894673848, "percentage": 94.21, "elapsed_time": "5:36:25", "remaining_time": "0:20:40"}
239
+ {"current_steps": 2370, "total_steps": 2505, "loss": 0.6228, "lr": 5e-06, "epoch": 2.836624775583483, "percentage": 94.61, "elapsed_time": "5:37:49", "remaining_time": "0:19:14"}
240
+ {"current_steps": 2380, "total_steps": 2505, "loss": 0.6236, "lr": 5e-06, "epoch": 2.848593656493118, "percentage": 95.01, "elapsed_time": "5:39:13", "remaining_time": "0:17:48"}
241
+ {"current_steps": 2390, "total_steps": 2505, "loss": 0.6247, "lr": 5e-06, "epoch": 2.860562537402753, "percentage": 95.41, "elapsed_time": "5:40:37", "remaining_time": "0:16:23"}
242
+ {"current_steps": 2400, "total_steps": 2505, "loss": 0.6227, "lr": 5e-06, "epoch": 2.872531418312388, "percentage": 95.81, "elapsed_time": "5:42:01", "remaining_time": "0:14:57"}
243
+ {"current_steps": 2410, "total_steps": 2505, "loss": 0.625, "lr": 5e-06, "epoch": 2.884500299222023, "percentage": 96.21, "elapsed_time": "5:43:24", "remaining_time": "0:13:32"}
244
+ {"current_steps": 2420, "total_steps": 2505, "loss": 0.6195, "lr": 5e-06, "epoch": 2.8964691801316578, "percentage": 96.61, "elapsed_time": "5:44:47", "remaining_time": "0:12:06"}
245
+ {"current_steps": 2430, "total_steps": 2505, "loss": 0.6234, "lr": 5e-06, "epoch": 2.9084380610412928, "percentage": 97.01, "elapsed_time": "5:46:11", "remaining_time": "0:10:41"}
246
+ {"current_steps": 2440, "total_steps": 2505, "loss": 0.6247, "lr": 5e-06, "epoch": 2.9204069419509278, "percentage": 97.41, "elapsed_time": "5:47:34", "remaining_time": "0:09:15"}
247
+ {"current_steps": 2450, "total_steps": 2505, "loss": 0.623, "lr": 5e-06, "epoch": 2.9323758228605623, "percentage": 97.8, "elapsed_time": "5:48:58", "remaining_time": "0:07:50"}
248
+ {"current_steps": 2460, "total_steps": 2505, "loss": 0.6258, "lr": 5e-06, "epoch": 2.9443447037701977, "percentage": 98.2, "elapsed_time": "5:50:21", "remaining_time": "0:06:24"}
249
+ {"current_steps": 2470, "total_steps": 2505, "loss": 0.6196, "lr": 5e-06, "epoch": 2.9563135846798323, "percentage": 98.6, "elapsed_time": "5:51:44", "remaining_time": "0:04:59"}
250
+ {"current_steps": 2480, "total_steps": 2505, "loss": 0.6265, "lr": 5e-06, "epoch": 2.9682824655894673, "percentage": 99.0, "elapsed_time": "5:53:07", "remaining_time": "0:03:33"}
251
+ {"current_steps": 2490, "total_steps": 2505, "loss": 0.6217, "lr": 5e-06, "epoch": 2.9802513464991023, "percentage": 99.4, "elapsed_time": "5:54:31", "remaining_time": "0:02:08"}
252
+ {"current_steps": 2500, "total_steps": 2505, "loss": 0.6193, "lr": 5e-06, "epoch": 2.9922202274087373, "percentage": 99.8, "elapsed_time": "5:55:54", "remaining_time": "0:00:42"}