Training in progress, step 22900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5de2a0b6f5605c28e3b76e90893140030beb717b3d1a72803f0f6f8bf22f20d3
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fdeca2e925e60f963cb2238d682deedab54a56146007d4419812a3113ddc204
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d10bfe6adc048388b735955f524b88e59f21f6f4b5176cd8a3a681534d740d
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:024df13920a7bfc09c9ab0f4046389f896972fb4c1a844e4c41f17872fd80aa2
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a23d9aa0c3e1803c2e7edca514f5bf1ca5e6413b4e99acf0ac462c28ded1f82
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3639,6 +3639,38 @@
|
|
3639 |
"eval_samples_per_second": 26.255,
|
3640 |
"eval_steps_per_second": 3.283,
|
3641 |
"step": 22700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3642 |
}
|
3643 |
],
|
3644 |
"logging_steps": 100,
|
@@ -3646,7 +3678,7 @@
|
|
3646 |
"num_input_tokens_seen": 0,
|
3647 |
"num_train_epochs": 30,
|
3648 |
"save_steps": 100,
|
3649 |
-
"total_flos": 2.
|
3650 |
"train_batch_size": 8,
|
3651 |
"trial_name": null,
|
3652 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.640075773521364,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 22900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3639 |
"eval_samples_per_second": 26.255,
|
3640 |
"eval_steps_per_second": 3.283,
|
3641 |
"step": 22700
|
3642 |
+
},
|
3643 |
+
{
|
3644 |
+
"epoch": 9.6,
|
3645 |
+
"grad_norm": 4.784325122833252,
|
3646 |
+
"learning_rate": 3.4250176678445236e-05,
|
3647 |
+
"loss": 1.2519,
|
3648 |
+
"step": 22800
|
3649 |
+
},
|
3650 |
+
{
|
3651 |
+
"epoch": 9.6,
|
3652 |
+
"eval_cer": 0.4394284541081694,
|
3653 |
+
"eval_loss": 2.026627540588379,
|
3654 |
+
"eval_runtime": 383.883,
|
3655 |
+
"eval_samples_per_second": 24.69,
|
3656 |
+
"eval_steps_per_second": 3.087,
|
3657 |
+
"step": 22800
|
3658 |
+
},
|
3659 |
+
{
|
3660 |
+
"epoch": 9.64,
|
3661 |
+
"grad_norm": 34.55485534667969,
|
3662 |
+
"learning_rate": 3.417950530035336e-05,
|
3663 |
+
"loss": 1.2678,
|
3664 |
+
"step": 22900
|
3665 |
+
},
|
3666 |
+
{
|
3667 |
+
"epoch": 9.64,
|
3668 |
+
"eval_cer": 0.45279075906300104,
|
3669 |
+
"eval_loss": 1.6792824268341064,
|
3670 |
+
"eval_runtime": 362.9993,
|
3671 |
+
"eval_samples_per_second": 26.11,
|
3672 |
+
"eval_steps_per_second": 3.264,
|
3673 |
+
"step": 22900
|
3674 |
}
|
3675 |
],
|
3676 |
"logging_steps": 100,
|
|
|
3678 |
"num_input_tokens_seen": 0,
|
3679 |
"num_train_epochs": 30,
|
3680 |
"save_steps": 100,
|
3681 |
+
"total_flos": 2.5097235719710438e+20,
|
3682 |
"train_batch_size": 8,
|
3683 |
"trial_name": null,
|
3684 |
"trial_params": null
|