Training in progress, step 320000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8164585c0cf8e0435f85a1a07a5b437860c2cdc001a4018d6d0b9d01b7eba98
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962905691e3e61901278bd35bae6a9e6802f21882ed0e962dd2ab116fea9b46f
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48df4ff304dd68d62a4e5fc4e07d3d51f94b7c8bf43256093a56ac0f03010b1f
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6206,11 +6206,211 @@
|
|
6206 |
"eval_samples_per_second": 1519.474,
|
6207 |
"eval_steps_per_second": 24.195,
|
6208 |
"step": 310000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6209 |
}
|
6210 |
],
|
6211 |
"max_steps": 500000,
|
6212 |
"num_train_epochs": 12,
|
6213 |
-
"total_flos":
|
6214 |
"trial_name": null,
|
6215 |
"trial_params": null
|
6216 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.455905310002563,
|
5 |
+
"global_step": 320000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6206 |
"eval_samples_per_second": 1519.474,
|
6207 |
"eval_steps_per_second": 24.195,
|
6208 |
"step": 310000
|
6209 |
+
},
|
6210 |
+
{
|
6211 |
+
"epoch": 7.23,
|
6212 |
+
"learning_rate": 0.00010973674410951567,
|
6213 |
+
"loss": 0.2657,
|
6214 |
+
"step": 310500
|
6215 |
+
},
|
6216 |
+
{
|
6217 |
+
"epoch": 7.25,
|
6218 |
+
"learning_rate": 0.00010928144739511337,
|
6219 |
+
"loss": 0.2656,
|
6220 |
+
"step": 311000
|
6221 |
+
},
|
6222 |
+
{
|
6223 |
+
"epoch": 7.25,
|
6224 |
+
"eval_loss": 0.2480616420507431,
|
6225 |
+
"eval_runtime": 1.4455,
|
6226 |
+
"eval_samples_per_second": 1520.549,
|
6227 |
+
"eval_steps_per_second": 24.213,
|
6228 |
+
"step": 311000
|
6229 |
+
},
|
6230 |
+
{
|
6231 |
+
"epoch": 7.26,
|
6232 |
+
"learning_rate": 0.00010882665065147757,
|
6233 |
+
"loss": 0.2655,
|
6234 |
+
"step": 311500
|
6235 |
+
},
|
6236 |
+
{
|
6237 |
+
"epoch": 7.27,
|
6238 |
+
"learning_rate": 0.00010837235885219267,
|
6239 |
+
"loss": 0.2653,
|
6240 |
+
"step": 312000
|
6241 |
+
},
|
6242 |
+
{
|
6243 |
+
"epoch": 7.27,
|
6244 |
+
"eval_loss": 0.2518889009952545,
|
6245 |
+
"eval_runtime": 1.4252,
|
6246 |
+
"eval_samples_per_second": 1542.203,
|
6247 |
+
"eval_steps_per_second": 24.557,
|
6248 |
+
"step": 312000
|
6249 |
+
},
|
6250 |
+
{
|
6251 |
+
"epoch": 7.28,
|
6252 |
+
"learning_rate": 0.00010791857696532089,
|
6253 |
+
"loss": 0.2653,
|
6254 |
+
"step": 312500
|
6255 |
+
},
|
6256 |
+
{
|
6257 |
+
"epoch": 7.29,
|
6258 |
+
"learning_rate": 0.00010746530995334832,
|
6259 |
+
"loss": 0.2652,
|
6260 |
+
"step": 313000
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 7.29,
|
6264 |
+
"eval_loss": 0.24834561347961426,
|
6265 |
+
"eval_runtime": 1.454,
|
6266 |
+
"eval_samples_per_second": 1511.735,
|
6267 |
+
"eval_steps_per_second": 24.072,
|
6268 |
+
"step": 313000
|
6269 |
+
},
|
6270 |
+
{
|
6271 |
+
"epoch": 7.3,
|
6272 |
+
"learning_rate": 0.0001070125627731304,
|
6273 |
+
"loss": 0.2653,
|
6274 |
+
"step": 313500
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 7.32,
|
6278 |
+
"learning_rate": 0.0001065603403758377,
|
6279 |
+
"loss": 0.2654,
|
6280 |
+
"step": 314000
|
6281 |
+
},
|
6282 |
+
{
|
6283 |
+
"epoch": 7.32,
|
6284 |
+
"eval_loss": 0.24683910608291626,
|
6285 |
+
"eval_runtime": 1.4569,
|
6286 |
+
"eval_samples_per_second": 1508.633,
|
6287 |
+
"eval_steps_per_second": 24.023,
|
6288 |
+
"step": 314000
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 7.33,
|
6292 |
+
"learning_rate": 0.00010610864770690196,
|
6293 |
+
"loss": 0.2652,
|
6294 |
+
"step": 314500
|
6295 |
+
},
|
6296 |
+
{
|
6297 |
+
"epoch": 7.34,
|
6298 |
+
"learning_rate": 0.00010565748970596172,
|
6299 |
+
"loss": 0.2653,
|
6300 |
+
"step": 315000
|
6301 |
+
},
|
6302 |
+
{
|
6303 |
+
"epoch": 7.34,
|
6304 |
+
"eval_loss": 0.2493169754743576,
|
6305 |
+
"eval_runtime": 1.4715,
|
6306 |
+
"eval_samples_per_second": 1493.749,
|
6307 |
+
"eval_steps_per_second": 23.786,
|
6308 |
+
"step": 315000
|
6309 |
+
},
|
6310 |
+
{
|
6311 |
+
"epoch": 7.35,
|
6312 |
+
"learning_rate": 0.00010520687130680884,
|
6313 |
+
"loss": 0.2653,
|
6314 |
+
"step": 315500
|
6315 |
+
},
|
6316 |
+
{
|
6317 |
+
"epoch": 7.36,
|
6318 |
+
"learning_rate": 0.00010475679743733364,
|
6319 |
+
"loss": 0.265,
|
6320 |
+
"step": 316000
|
6321 |
+
},
|
6322 |
+
{
|
6323 |
+
"epoch": 7.36,
|
6324 |
+
"eval_loss": 0.24726563692092896,
|
6325 |
+
"eval_runtime": 1.4143,
|
6326 |
+
"eval_samples_per_second": 1554.111,
|
6327 |
+
"eval_steps_per_second": 24.747,
|
6328 |
+
"step": 316000
|
6329 |
+
},
|
6330 |
+
{
|
6331 |
+
"epoch": 7.37,
|
6332 |
+
"learning_rate": 0.00010430727301947202,
|
6333 |
+
"loss": 0.2647,
|
6334 |
+
"step": 316500
|
6335 |
+
},
|
6336 |
+
{
|
6337 |
+
"epoch": 7.39,
|
6338 |
+
"learning_rate": 0.00010385830296915104,
|
6339 |
+
"loss": 0.2646,
|
6340 |
+
"step": 317000
|
6341 |
+
},
|
6342 |
+
{
|
6343 |
+
"epoch": 7.39,
|
6344 |
+
"eval_loss": 0.24790264666080475,
|
6345 |
+
"eval_runtime": 1.4368,
|
6346 |
+
"eval_samples_per_second": 1529.828,
|
6347 |
+
"eval_steps_per_second": 24.36,
|
6348 |
+
"step": 317000
|
6349 |
+
},
|
6350 |
+
{
|
6351 |
+
"epoch": 7.4,
|
6352 |
+
"learning_rate": 0.00010340989219623508,
|
6353 |
+
"loss": 0.2649,
|
6354 |
+
"step": 317500
|
6355 |
+
},
|
6356 |
+
{
|
6357 |
+
"epoch": 7.41,
|
6358 |
+
"learning_rate": 0.0001029620456044727,
|
6359 |
+
"loss": 0.2647,
|
6360 |
+
"step": 318000
|
6361 |
+
},
|
6362 |
+
{
|
6363 |
+
"epoch": 7.41,
|
6364 |
+
"eval_loss": 0.24833044409751892,
|
6365 |
+
"eval_runtime": 1.4568,
|
6366 |
+
"eval_samples_per_second": 1508.735,
|
6367 |
+
"eval_steps_per_second": 24.024,
|
6368 |
+
"step": 318000
|
6369 |
+
},
|
6370 |
+
{
|
6371 |
+
"epoch": 7.42,
|
6372 |
+
"learning_rate": 0.00010251476809144226,
|
6373 |
+
"loss": 0.2643,
|
6374 |
+
"step": 318500
|
6375 |
+
},
|
6376 |
+
{
|
6377 |
+
"epoch": 7.43,
|
6378 |
+
"learning_rate": 0.00010206806454849917,
|
6379 |
+
"loss": 0.2645,
|
6380 |
+
"step": 319000
|
6381 |
+
},
|
6382 |
+
{
|
6383 |
+
"epoch": 7.43,
|
6384 |
+
"eval_loss": 0.2483694851398468,
|
6385 |
+
"eval_runtime": 1.465,
|
6386 |
+
"eval_samples_per_second": 1500.321,
|
6387 |
+
"eval_steps_per_second": 23.89,
|
6388 |
+
"step": 319000
|
6389 |
+
},
|
6390 |
+
{
|
6391 |
+
"epoch": 7.44,
|
6392 |
+
"learning_rate": 0.00010162193986072167,
|
6393 |
+
"loss": 0.2644,
|
6394 |
+
"step": 319500
|
6395 |
+
},
|
6396 |
+
{
|
6397 |
+
"epoch": 7.46,
|
6398 |
+
"learning_rate": 0.00010117639890685795,
|
6399 |
+
"loss": 0.2642,
|
6400 |
+
"step": 320000
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 7.46,
|
6404 |
+
"eval_loss": 0.2466782182455063,
|
6405 |
+
"eval_runtime": 1.4627,
|
6406 |
+
"eval_samples_per_second": 1502.721,
|
6407 |
+
"eval_steps_per_second": 23.929,
|
6408 |
+
"step": 320000
|
6409 |
}
|
6410 |
],
|
6411 |
"max_steps": 500000,
|
6412 |
"num_train_epochs": 12,
|
6413 |
+
"total_flos": 1.0223570961847653e+22,
|
6414 |
"trial_name": null,
|
6415 |
"trial_params": null
|
6416 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962905691e3e61901278bd35bae6a9e6802f21882ed0e962dd2ab116fea9b46f
|
3 |
size 102501541
|