Training in progress, step 24100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d1d86a67812950b8066ed1bd4785f9e5f74864e9e185c3c7d65a737e73412eb
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9bb1ef4ed06294b4512beffd9b8417f18d9d6c0bf30582a61d1b2418d2d7ea6
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2964d4840b876fd5ea402f55cbb3d3ecf1ccd015054120c9986b8308a80fe3b6
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29deb45594f2ffe1b33e01ccb1b744191d09d4b58ab92a56ff666176b2e013c6
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed638b60d93524052d154552e2b26184c00d0eedea3ca18e5c2db2c0ab950927
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3831,6 +3831,38 @@
|
|
3831 |
"eval_samples_per_second": 25.853,
|
3832 |
"eval_steps_per_second": 3.232,
|
3833 |
"step": 23900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3834 |
}
|
3835 |
],
|
3836 |
"logging_steps": 100,
|
@@ -3838,7 +3870,7 @@
|
|
3838 |
"num_input_tokens_seen": 0,
|
3839 |
"num_train_epochs": 30,
|
3840 |
"save_steps": 100,
|
3841 |
-
"total_flos": 2.
|
3842 |
"train_batch_size": 8,
|
3843 |
"trial_name": null,
|
3844 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.145232582614186,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 24100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3831 |
"eval_samples_per_second": 25.853,
|
3832 |
"eval_steps_per_second": 3.232,
|
3833 |
"step": 23900
|
3834 |
+
},
|
3835 |
+
{
|
3836 |
+
"epoch": 10.1,
|
3837 |
+
"grad_norm": 2.430925130844116,
|
3838 |
+
"learning_rate": 3.340212014134276e-05,
|
3839 |
+
"loss": 1.185,
|
3840 |
+
"step": 24000
|
3841 |
+
},
|
3842 |
+
{
|
3843 |
+
"epoch": 10.1,
|
3844 |
+
"eval_cer": 0.4357133080442689,
|
3845 |
+
"eval_loss": 1.5434983968734741,
|
3846 |
+
"eval_runtime": 385.0716,
|
3847 |
+
"eval_samples_per_second": 24.614,
|
3848 |
+
"eval_steps_per_second": 3.077,
|
3849 |
+
"step": 24000
|
3850 |
+
},
|
3851 |
+
{
|
3852 |
+
"epoch": 10.15,
|
3853 |
+
"grad_norm": 2.367770195007324,
|
3854 |
+
"learning_rate": 3.333144876325088e-05,
|
3855 |
+
"loss": 1.2225,
|
3856 |
+
"step": 24100
|
3857 |
+
},
|
3858 |
+
{
|
3859 |
+
"epoch": 10.15,
|
3860 |
+
"eval_cer": 0.4268898361425052,
|
3861 |
+
"eval_loss": 1.602295160293579,
|
3862 |
+
"eval_runtime": 368.0631,
|
3863 |
+
"eval_samples_per_second": 25.751,
|
3864 |
+
"eval_steps_per_second": 3.22,
|
3865 |
+
"step": 24100
|
3866 |
}
|
3867 |
],
|
3868 |
"logging_steps": 100,
|
|
|
3870 |
"num_input_tokens_seen": 0,
|
3871 |
"num_train_epochs": 30,
|
3872 |
"save_steps": 100,
|
3873 |
+
"total_flos": 2.6405292408352522e+20,
|
3874 |
"train_batch_size": 8,
|
3875 |
"trial_name": null,
|
3876 |
"trial_params": null
|