Training in progress, step 380000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8f4969533102642963b0b2227bcca65da739e93c9ba93ada329fad00247349b
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31402a0bcc9e6eb51374c53180861ce7a4f03142fd97638776d46c6dc480c809
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7942041c29d1b42261564b880acf9a06c922c5e2ec40da67c47a0bff66b113b4
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c2047b5f47fc3de929bb0738f7fbdd248300ab063f6fd4eddcabc29f5482852
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7406,11 +7406,211 @@
|
|
7406 |
"eval_samples_per_second": 1511.147,
|
7407 |
"eval_steps_per_second": 24.063,
|
7408 |
"step": 370000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7409 |
}
|
7410 |
],
|
7411 |
"max_steps": 500000,
|
7412 |
"num_train_epochs": 12,
|
7413 |
-
"total_flos": 1.
|
7414 |
"trial_name": null,
|
7415 |
"trial_params": null
|
7416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.853887555628043,
|
5 |
+
"global_step": 380000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7406 |
"eval_samples_per_second": 1511.147,
|
7407 |
"eval_steps_per_second": 24.063,
|
7408 |
"step": 370000
|
7409 |
+
},
|
7410 |
+
{
|
7411 |
+
"epoch": 8.63,
|
7412 |
+
"learning_rate": 6.0012253629189544e-05,
|
7413 |
+
"loss": 0.2584,
|
7414 |
+
"step": 370500
|
7415 |
+
},
|
7416 |
+
{
|
7417 |
+
"epoch": 8.64,
|
7418 |
+
"learning_rate": 5.965048145015944e-05,
|
7419 |
+
"loss": 0.2585,
|
7420 |
+
"step": 371000
|
7421 |
+
},
|
7422 |
+
{
|
7423 |
+
"epoch": 8.64,
|
7424 |
+
"eval_loss": 0.24364076554775238,
|
7425 |
+
"eval_runtime": 1.4292,
|
7426 |
+
"eval_samples_per_second": 1537.961,
|
7427 |
+
"eval_steps_per_second": 24.49,
|
7428 |
+
"step": 371000
|
7429 |
+
},
|
7430 |
+
{
|
7431 |
+
"epoch": 8.66,
|
7432 |
+
"learning_rate": 5.928975199818785e-05,
|
7433 |
+
"loss": 0.2583,
|
7434 |
+
"step": 371500
|
7435 |
+
},
|
7436 |
+
{
|
7437 |
+
"epoch": 8.67,
|
7438 |
+
"learning_rate": 5.893006921815428e-05,
|
7439 |
+
"loss": 0.2582,
|
7440 |
+
"step": 372000
|
7441 |
+
},
|
7442 |
+
{
|
7443 |
+
"epoch": 8.67,
|
7444 |
+
"eval_loss": 0.24061721563339233,
|
7445 |
+
"eval_runtime": 1.4626,
|
7446 |
+
"eval_samples_per_second": 1502.828,
|
7447 |
+
"eval_steps_per_second": 23.93,
|
7448 |
+
"step": 372000
|
7449 |
+
},
|
7450 |
+
{
|
7451 |
+
"epoch": 8.68,
|
7452 |
+
"learning_rate": 5.857143704349198e-05,
|
7453 |
+
"loss": 0.2584,
|
7454 |
+
"step": 372500
|
7455 |
+
},
|
7456 |
+
{
|
7457 |
+
"epoch": 8.69,
|
7458 |
+
"learning_rate": 5.8213859396144986e-05,
|
7459 |
+
"loss": 0.2583,
|
7460 |
+
"step": 373000
|
7461 |
+
},
|
7462 |
+
{
|
7463 |
+
"epoch": 8.69,
|
7464 |
+
"eval_loss": 0.24275849759578705,
|
7465 |
+
"eval_runtime": 1.4296,
|
7466 |
+
"eval_samples_per_second": 1537.531,
|
7467 |
+
"eval_steps_per_second": 24.483,
|
7468 |
+
"step": 373000
|
7469 |
+
},
|
7470 |
+
{
|
7471 |
+
"epoch": 8.7,
|
7472 |
+
"learning_rate": 5.785734018652507e-05,
|
7473 |
+
"loss": 0.2584,
|
7474 |
+
"step": 373500
|
7475 |
+
},
|
7476 |
+
{
|
7477 |
+
"epoch": 8.71,
|
7478 |
+
"learning_rate": 5.750188331346927e-05,
|
7479 |
+
"loss": 0.2585,
|
7480 |
+
"step": 374000
|
7481 |
+
},
|
7482 |
+
{
|
7483 |
+
"epoch": 8.71,
|
7484 |
+
"eval_loss": 0.24333250522613525,
|
7485 |
+
"eval_runtime": 1.4555,
|
7486 |
+
"eval_samples_per_second": 1510.18,
|
7487 |
+
"eval_steps_per_second": 24.047,
|
7488 |
+
"step": 374000
|
7489 |
+
},
|
7490 |
+
{
|
7491 |
+
"epoch": 8.73,
|
7492 |
+
"learning_rate": 5.714749266419695e-05,
|
7493 |
+
"loss": 0.2584,
|
7494 |
+
"step": 374500
|
7495 |
+
},
|
7496 |
+
{
|
7497 |
+
"epoch": 8.74,
|
7498 |
+
"learning_rate": 5.6794172114267566e-05,
|
7499 |
+
"loss": 0.2578,
|
7500 |
+
"step": 375000
|
7501 |
+
},
|
7502 |
+
{
|
7503 |
+
"epoch": 8.74,
|
7504 |
+
"eval_loss": 0.24110642075538635,
|
7505 |
+
"eval_runtime": 1.4471,
|
7506 |
+
"eval_samples_per_second": 1518.91,
|
7507 |
+
"eval_steps_per_second": 24.186,
|
7508 |
+
"step": 375000
|
7509 |
+
},
|
7510 |
+
{
|
7511 |
+
"epoch": 8.75,
|
7512 |
+
"learning_rate": 5.6441925527537914e-05,
|
7513 |
+
"loss": 0.2578,
|
7514 |
+
"step": 375500
|
7515 |
+
},
|
7516 |
+
{
|
7517 |
+
"epoch": 8.76,
|
7518 |
+
"learning_rate": 5.60907567561203e-05,
|
7519 |
+
"loss": 0.2582,
|
7520 |
+
"step": 376000
|
7521 |
+
},
|
7522 |
+
{
|
7523 |
+
"epoch": 8.76,
|
7524 |
+
"eval_loss": 0.242658793926239,
|
7525 |
+
"eval_runtime": 1.4703,
|
7526 |
+
"eval_samples_per_second": 1494.982,
|
7527 |
+
"eval_steps_per_second": 23.805,
|
7528 |
+
"step": 376000
|
7529 |
+
},
|
7530 |
+
{
|
7531 |
+
"epoch": 8.77,
|
7532 |
+
"learning_rate": 5.574066964034012e-05,
|
7533 |
+
"loss": 0.2581,
|
7534 |
+
"step": 376500
|
7535 |
+
},
|
7536 |
+
{
|
7537 |
+
"epoch": 8.78,
|
7538 |
+
"learning_rate": 5.539166800869402e-05,
|
7539 |
+
"loss": 0.258,
|
7540 |
+
"step": 377000
|
7541 |
+
},
|
7542 |
+
{
|
7543 |
+
"epoch": 8.78,
|
7544 |
+
"eval_loss": 0.2416759431362152,
|
7545 |
+
"eval_runtime": 1.4756,
|
7546 |
+
"eval_samples_per_second": 1489.53,
|
7547 |
+
"eval_steps_per_second": 23.719,
|
7548 |
+
"step": 377000
|
7549 |
+
},
|
7550 |
+
{
|
7551 |
+
"epoch": 8.8,
|
7552 |
+
"learning_rate": 5.5043755677807955e-05,
|
7553 |
+
"loss": 0.2578,
|
7554 |
+
"step": 377500
|
7555 |
+
},
|
7556 |
+
{
|
7557 |
+
"epoch": 8.81,
|
7558 |
+
"learning_rate": 5.4696936452395344e-05,
|
7559 |
+
"loss": 0.2576,
|
7560 |
+
"step": 378000
|
7561 |
+
},
|
7562 |
+
{
|
7563 |
+
"epoch": 8.81,
|
7564 |
+
"eval_loss": 0.23985832929611206,
|
7565 |
+
"eval_runtime": 1.4569,
|
7566 |
+
"eval_samples_per_second": 1508.675,
|
7567 |
+
"eval_steps_per_second": 24.023,
|
7568 |
+
"step": 378000
|
7569 |
+
},
|
7570 |
+
{
|
7571 |
+
"epoch": 8.82,
|
7572 |
+
"learning_rate": 5.435121412521576e-05,
|
7573 |
+
"loss": 0.2579,
|
7574 |
+
"step": 378500
|
7575 |
+
},
|
7576 |
+
{
|
7577 |
+
"epoch": 8.83,
|
7578 |
+
"learning_rate": 5.400659247703307e-05,
|
7579 |
+
"loss": 0.2574,
|
7580 |
+
"step": 379000
|
7581 |
+
},
|
7582 |
+
{
|
7583 |
+
"epoch": 8.83,
|
7584 |
+
"eval_loss": 0.24152863025665283,
|
7585 |
+
"eval_runtime": 1.4464,
|
7586 |
+
"eval_samples_per_second": 1519.59,
|
7587 |
+
"eval_steps_per_second": 24.197,
|
7588 |
+
"step": 379000
|
7589 |
+
},
|
7590 |
+
{
|
7591 |
+
"epoch": 8.84,
|
7592 |
+
"learning_rate": 5.36630752765745e-05,
|
7593 |
+
"loss": 0.2576,
|
7594 |
+
"step": 379500
|
7595 |
+
},
|
7596 |
+
{
|
7597 |
+
"epoch": 8.85,
|
7598 |
+
"learning_rate": 5.3320666280489146e-05,
|
7599 |
+
"loss": 0.2579,
|
7600 |
+
"step": 380000
|
7601 |
+
},
|
7602 |
+
{
|
7603 |
+
"epoch": 8.85,
|
7604 |
+
"eval_loss": 0.24166275560855865,
|
7605 |
+
"eval_runtime": 1.4454,
|
7606 |
+
"eval_samples_per_second": 1520.737,
|
7607 |
+
"eval_steps_per_second": 24.216,
|
7608 |
+
"step": 380000
|
7609 |
}
|
7610 |
],
|
7611 |
"max_steps": 500000,
|
7612 |
"num_train_epochs": 12,
|
7613 |
+
"total_flos": 1.2140491297503746e+22,
|
7614 |
"trial_name": null,
|
7615 |
"trial_params": null
|
7616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31402a0bcc9e6eb51374c53180861ce7a4f03142fd97638776d46c6dc480c809
|
3 |
size 102501541
|