Training in progress, epoch 77, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559424792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5ab75838ed7afe0c288b3631cba017faa548802aa5787f848080d63a4faa88d
|
3 |
size 559424792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1118926970
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b09965c1441b1cd1585c57f2d17e8d0e016910d7c90ad54c3a3afac8664c3bfd
|
3 |
size 1118926970
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eddd447be293e64952cf96e8afe87fbb852cea08cd9ba63106be57df271d8556
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc365eae5b467a6c47056a0e754f854a75cac7f154e6d27e155fce16f2e6ebd0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7552,6 +7552,105 @@
|
|
7552 |
"eval_samples_per_second": 29.555,
|
7553 |
"eval_steps_per_second": 3.714,
|
7554 |
"step": 99180
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7555 |
}
|
7556 |
],
|
7557 |
"logging_steps": 100,
|
@@ -7566,7 +7665,7 @@
|
|
7566 |
"early_stopping_threshold": 0.0
|
7567 |
},
|
7568 |
"attributes": {
|
7569 |
-
"early_stopping_patience_counter":
|
7570 |
}
|
7571 |
},
|
7572 |
"TrainerControl": {
|
@@ -7580,7 +7679,7 @@
|
|
7580 |
"attributes": {}
|
7581 |
}
|
7582 |
},
|
7583 |
-
"total_flos": 4.
|
7584 |
"train_batch_size": 8,
|
7585 |
"trial_name": null,
|
7586 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
+
"epoch": 77.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 100485,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7552 |
"eval_samples_per_second": 29.555,
|
7553 |
"eval_steps_per_second": 3.714,
|
7554 |
"step": 99180
|
7555 |
+
},
|
7556 |
+
{
|
7557 |
+
"epoch": 76.01532567049809,
|
7558 |
+
"grad_norm": 1.737823724746704,
|
7559 |
+
"learning_rate": 2.509578544061303e-06,
|
7560 |
+
"loss": 11.8981,
|
7561 |
+
"step": 99200
|
7562 |
+
},
|
7563 |
+
{
|
7564 |
+
"epoch": 76.0919540229885,
|
7565 |
+
"grad_norm": 1.0878353118896484,
|
7566 |
+
"learning_rate": 2.4616858237547894e-06,
|
7567 |
+
"loss": 11.8443,
|
7568 |
+
"step": 99300
|
7569 |
+
},
|
7570 |
+
{
|
7571 |
+
"epoch": 76.16858237547893,
|
7572 |
+
"grad_norm": 2.0454564094543457,
|
7573 |
+
"learning_rate": 2.413793103448276e-06,
|
7574 |
+
"loss": 11.8515,
|
7575 |
+
"step": 99400
|
7576 |
+
},
|
7577 |
+
{
|
7578 |
+
"epoch": 76.24521072796935,
|
7579 |
+
"grad_norm": 1.3210684061050415,
|
7580 |
+
"learning_rate": 2.3659003831417623e-06,
|
7581 |
+
"loss": 12.0233,
|
7582 |
+
"step": 99500
|
7583 |
+
},
|
7584 |
+
{
|
7585 |
+
"epoch": 76.32183908045977,
|
7586 |
+
"grad_norm": 1.1547104120254517,
|
7587 |
+
"learning_rate": 2.318007662835249e-06,
|
7588 |
+
"loss": 11.7145,
|
7589 |
+
"step": 99600
|
7590 |
+
},
|
7591 |
+
{
|
7592 |
+
"epoch": 76.3984674329502,
|
7593 |
+
"grad_norm": 1.3948626518249512,
|
7594 |
+
"learning_rate": 2.270114942528736e-06,
|
7595 |
+
"loss": 11.7098,
|
7596 |
+
"step": 99700
|
7597 |
+
},
|
7598 |
+
{
|
7599 |
+
"epoch": 76.47509578544062,
|
7600 |
+
"grad_norm": 1.2874501943588257,
|
7601 |
+
"learning_rate": 2.2222222222222225e-06,
|
7602 |
+
"loss": 11.8953,
|
7603 |
+
"step": 99800
|
7604 |
+
},
|
7605 |
+
{
|
7606 |
+
"epoch": 76.55172413793103,
|
7607 |
+
"grad_norm": 1.8570905923843384,
|
7608 |
+
"learning_rate": 2.174329501915709e-06,
|
7609 |
+
"loss": 11.9397,
|
7610 |
+
"step": 99900
|
7611 |
+
},
|
7612 |
+
{
|
7613 |
+
"epoch": 76.62835249042146,
|
7614 |
+
"grad_norm": 1.3673057556152344,
|
7615 |
+
"learning_rate": 2.1264367816091954e-06,
|
7616 |
+
"loss": 11.8056,
|
7617 |
+
"step": 100000
|
7618 |
+
},
|
7619 |
+
{
|
7620 |
+
"epoch": 76.70498084291188,
|
7621 |
+
"grad_norm": 2.1938419342041016,
|
7622 |
+
"learning_rate": 2.078544061302682e-06,
|
7623 |
+
"loss": 11.9414,
|
7624 |
+
"step": 100100
|
7625 |
+
},
|
7626 |
+
{
|
7627 |
+
"epoch": 76.7816091954023,
|
7628 |
+
"grad_norm": 1.9171061515808105,
|
7629 |
+
"learning_rate": 2.0306513409961687e-06,
|
7630 |
+
"loss": 11.8369,
|
7631 |
+
"step": 100200
|
7632 |
+
},
|
7633 |
+
{
|
7634 |
+
"epoch": 76.85823754789271,
|
7635 |
+
"grad_norm": 1.0486401319503784,
|
7636 |
+
"learning_rate": 1.982758620689655e-06,
|
7637 |
+
"loss": 11.8322,
|
7638 |
+
"step": 100300
|
7639 |
+
},
|
7640 |
+
{
|
7641 |
+
"epoch": 76.93486590038314,
|
7642 |
+
"grad_norm": 1.6005215644836426,
|
7643 |
+
"learning_rate": 1.934865900383142e-06,
|
7644 |
+
"loss": 11.8781,
|
7645 |
+
"step": 100400
|
7646 |
+
},
|
7647 |
+
{
|
7648 |
+
"epoch": 77.0,
|
7649 |
+
"eval_loss": 12.72097396850586,
|
7650 |
+
"eval_runtime": 44.1751,
|
7651 |
+
"eval_samples_per_second": 29.542,
|
7652 |
+
"eval_steps_per_second": 3.712,
|
7653 |
+
"step": 100485
|
7654 |
}
|
7655 |
],
|
7656 |
"logging_steps": 100,
|
|
|
7665 |
"early_stopping_threshold": 0.0
|
7666 |
},
|
7667 |
"attributes": {
|
7668 |
+
"early_stopping_patience_counter": 9
|
7669 |
}
|
7670 |
},
|
7671 |
"TrainerControl": {
|
|
|
7679 |
"attributes": {}
|
7680 |
}
|
7681 |
},
|
7682 |
+
"total_flos": 4.681650983960218e+16,
|
7683 |
"train_batch_size": 8,
|
7684 |
"trial_name": null,
|
7685 |
"trial_params": null
|