Training in progress, epoch 78, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559424792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21c9f69a173a892667d6dee7aaad371f40e7baf8cb3e9d969f1b6dcde4a515f1
|
3 |
size 559424792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1118926970
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b013c178fe9fdbc20f357d276fbc376ffeb5ae200d4656a25c0840d60905025
|
3 |
size 1118926970
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:325b7db62095a95388ccc4086fa74938caa1cf51efa7e26a65d55b88f9c20149
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43652567e6245845fba84f7f23bb5bb43e6dfe5f4e79e8b80a87c4194eca0dd2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7651,6 +7651,105 @@
|
|
7651 |
"eval_samples_per_second": 29.542,
|
7652 |
"eval_steps_per_second": 3.712,
|
7653 |
"step": 100485
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7654 |
}
|
7655 |
],
|
7656 |
"logging_steps": 100,
|
@@ -7665,7 +7764,7 @@
|
|
7665 |
"early_stopping_threshold": 0.0
|
7666 |
},
|
7667 |
"attributes": {
|
7668 |
-
"early_stopping_patience_counter":
|
7669 |
}
|
7670 |
},
|
7671 |
"TrainerControl": {
|
@@ -7674,12 +7773,12 @@
|
|
7674 |
"should_evaluate": false,
|
7675 |
"should_log": false,
|
7676 |
"should_save": true,
|
7677 |
-
"should_training_stop":
|
7678 |
},
|
7679 |
"attributes": {}
|
7680 |
}
|
7681 |
},
|
7682 |
-
"total_flos": 4.
|
7683 |
"train_batch_size": 8,
|
7684 |
"trial_name": null,
|
7685 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 12.716951370239258,
|
3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-88740",
|
4 |
+
"epoch": 78.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 101790,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7651 |
"eval_samples_per_second": 29.542,
|
7652 |
"eval_steps_per_second": 3.712,
|
7653 |
"step": 100485
|
7654 |
+
},
|
7655 |
+
{
|
7656 |
+
"epoch": 77.01149425287356,
|
7657 |
+
"grad_norm": 1.5276216268539429,
|
7658 |
+
"learning_rate": 1.8869731800766285e-06,
|
7659 |
+
"loss": 12.0855,
|
7660 |
+
"step": 100500
|
7661 |
+
},
|
7662 |
+
{
|
7663 |
+
"epoch": 77.08812260536398,
|
7664 |
+
"grad_norm": 1.6150214672088623,
|
7665 |
+
"learning_rate": 1.839080459770115e-06,
|
7666 |
+
"loss": 12.0257,
|
7667 |
+
"step": 100600
|
7668 |
+
},
|
7669 |
+
{
|
7670 |
+
"epoch": 77.16475095785441,
|
7671 |
+
"grad_norm": 1.33854341506958,
|
7672 |
+
"learning_rate": 1.7911877394636015e-06,
|
7673 |
+
"loss": 11.8612,
|
7674 |
+
"step": 100700
|
7675 |
+
},
|
7676 |
+
{
|
7677 |
+
"epoch": 77.24137931034483,
|
7678 |
+
"grad_norm": 3.1214921474456787,
|
7679 |
+
"learning_rate": 1.7432950191570881e-06,
|
7680 |
+
"loss": 11.8497,
|
7681 |
+
"step": 100800
|
7682 |
+
},
|
7683 |
+
{
|
7684 |
+
"epoch": 77.31800766283524,
|
7685 |
+
"grad_norm": 1.6240577697753906,
|
7686 |
+
"learning_rate": 1.695402298850575e-06,
|
7687 |
+
"loss": 11.7641,
|
7688 |
+
"step": 100900
|
7689 |
+
},
|
7690 |
+
{
|
7691 |
+
"epoch": 77.39463601532567,
|
7692 |
+
"grad_norm": 1.1317625045776367,
|
7693 |
+
"learning_rate": 1.6475095785440615e-06,
|
7694 |
+
"loss": 11.846,
|
7695 |
+
"step": 101000
|
7696 |
+
},
|
7697 |
+
{
|
7698 |
+
"epoch": 77.47126436781609,
|
7699 |
+
"grad_norm": 1.294534683227539,
|
7700 |
+
"learning_rate": 1.599616858237548e-06,
|
7701 |
+
"loss": 11.7364,
|
7702 |
+
"step": 101100
|
7703 |
+
},
|
7704 |
+
{
|
7705 |
+
"epoch": 77.5478927203065,
|
7706 |
+
"grad_norm": 1.099564790725708,
|
7707 |
+
"learning_rate": 1.5517241379310346e-06,
|
7708 |
+
"loss": 11.8554,
|
7709 |
+
"step": 101200
|
7710 |
+
},
|
7711 |
+
{
|
7712 |
+
"epoch": 77.62452107279694,
|
7713 |
+
"grad_norm": 1.1482292413711548,
|
7714 |
+
"learning_rate": 1.503831417624521e-06,
|
7715 |
+
"loss": 11.9346,
|
7716 |
+
"step": 101300
|
7717 |
+
},
|
7718 |
+
{
|
7719 |
+
"epoch": 77.70114942528735,
|
7720 |
+
"grad_norm": 1.198670506477356,
|
7721 |
+
"learning_rate": 1.4559386973180077e-06,
|
7722 |
+
"loss": 11.8685,
|
7723 |
+
"step": 101400
|
7724 |
+
},
|
7725 |
+
{
|
7726 |
+
"epoch": 77.77777777777777,
|
7727 |
+
"grad_norm": 2.162407159805298,
|
7728 |
+
"learning_rate": 1.4080459770114944e-06,
|
7729 |
+
"loss": 11.8622,
|
7730 |
+
"step": 101500
|
7731 |
+
},
|
7732 |
+
{
|
7733 |
+
"epoch": 77.8544061302682,
|
7734 |
+
"grad_norm": 1.3662519454956055,
|
7735 |
+
"learning_rate": 1.3601532567049808e-06,
|
7736 |
+
"loss": 11.9364,
|
7737 |
+
"step": 101600
|
7738 |
+
},
|
7739 |
+
{
|
7740 |
+
"epoch": 77.93103448275862,
|
7741 |
+
"grad_norm": 1.1566288471221924,
|
7742 |
+
"learning_rate": 1.3122605363984675e-06,
|
7743 |
+
"loss": 11.7345,
|
7744 |
+
"step": 101700
|
7745 |
+
},
|
7746 |
+
{
|
7747 |
+
"epoch": 78.0,
|
7748 |
+
"eval_loss": 12.726273536682129,
|
7749 |
+
"eval_runtime": 44.1782,
|
7750 |
+
"eval_samples_per_second": 29.539,
|
7751 |
+
"eval_steps_per_second": 3.712,
|
7752 |
+
"step": 101790
|
7753 |
}
|
7754 |
],
|
7755 |
"logging_steps": 100,
|
|
|
7764 |
"early_stopping_threshold": 0.0
|
7765 |
},
|
7766 |
"attributes": {
|
7767 |
+
"early_stopping_patience_counter": 10
|
7768 |
}
|
7769 |
},
|
7770 |
"TrainerControl": {
|
|
|
7773 |
"should_evaluate": false,
|
7774 |
"should_log": false,
|
7775 |
"should_save": true,
|
7776 |
+
"should_training_stop": true
|
7777 |
},
|
7778 |
"attributes": {}
|
7779 |
}
|
7780 |
},
|
7781 |
+
"total_flos": 4.742440753614336e+16,
|
7782 |
"train_batch_size": 8,
|
7783 |
"trial_name": null,
|
7784 |
"trial_params": null
|