Training in progress, step 49300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74a357b98e4ee095ed45472c47587cbff53b11bba8ee52364cf747aec2edad57
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1045be07891224de41ffc49353bbd9c71f37abedb35e78847a85b94a93414d1f
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab63e00126966b51e82c6cfc37d4154dc70a54f57af4954047f6d1bfa10482e3
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3413234ec77271dbb44be6798264935b5bc29bd7a383e5efca859e1b414dc0d
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:776174e0cdc8de84863dc2bc871e616ade46ecb971d9ed338d192083a8322ec2
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 20.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7863,6 +7863,38 @@
|
|
7863 |
"eval_samples_per_second": 24.202,
|
7864 |
"eval_steps_per_second": 3.026,
|
7865 |
"step": 49100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7866 |
}
|
7867 |
],
|
7868 |
"logging_steps": 100,
|
@@ -7870,7 +7902,7 @@
|
|
7870 |
"num_input_tokens_seen": 0,
|
7871 |
"num_train_epochs": 30,
|
7872 |
"save_steps": 100,
|
7873 |
-
"total_flos": 5.
|
7874 |
"train_batch_size": 8,
|
7875 |
"trial_name": null,
|
7876 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.75352557356346,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 49300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7863 |
"eval_samples_per_second": 24.202,
|
7864 |
"eval_steps_per_second": 3.026,
|
7865 |
"step": 49100
|
7866 |
+
},
|
7867 |
+
{
|
7868 |
+
"epoch": 20.71,
|
7869 |
+
"grad_norm": 5.6570048332214355,
|
7870 |
+
"learning_rate": 1.5599293286219083e-05,
|
7871 |
+
"loss": 0.4067,
|
7872 |
+
"step": 49200
|
7873 |
+
},
|
7874 |
+
{
|
7875 |
+
"epoch": 20.71,
|
7876 |
+
"eval_cer": 0.3445235814008056,
|
7877 |
+
"eval_loss": 3.190380811691284,
|
7878 |
+
"eval_runtime": 410.6709,
|
7879 |
+
"eval_samples_per_second": 23.079,
|
7880 |
+
"eval_steps_per_second": 2.886,
|
7881 |
+
"step": 49200
|
7882 |
+
},
|
7883 |
+
{
|
7884 |
+
"epoch": 20.75,
|
7885 |
+
"grad_norm": 6.448336124420166,
|
7886 |
+
"learning_rate": 1.5528621908127208e-05,
|
7887 |
+
"loss": 0.428,
|
7888 |
+
"step": 49300
|
7889 |
+
},
|
7890 |
+
{
|
7891 |
+
"epoch": 20.75,
|
7892 |
+
"eval_cer": 0.3451126275859372,
|
7893 |
+
"eval_loss": 3.366471529006958,
|
7894 |
+
"eval_runtime": 393.2567,
|
7895 |
+
"eval_samples_per_second": 24.101,
|
7896 |
+
"eval_steps_per_second": 3.013,
|
7897 |
+
"step": 49300
|
7898 |
}
|
7899 |
],
|
7900 |
"logging_steps": 100,
|
|
|
7902 |
"num_input_tokens_seen": 0,
|
7903 |
"num_train_epochs": 30,
|
7904 |
"save_steps": 100,
|
7905 |
+
"total_flos": 5.4016158411070066e+20,
|
7906 |
"train_batch_size": 8,
|
7907 |
"trial_name": null,
|
7908 |
"trial_params": null
|