Training in progress, step 55700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:785057a17e70302c21c007e0ef0b0f57a36cfb96718c31c8f395b008e214458d
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19609ff9aeb7b6551ed81c17110b29191a5d741f3df4560f9d420d2d58bce8b9
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:293accb5c9dbd84af2034bf699ee6c8b6fc3c17f88796b7449387a733eaff90c
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cde67baa4dd7f0279761a9f5a42adbc49a7e57ab6ad2ad6773f0f379f28031b
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d431db1e058e7753d522d0aa651c6ef43b7fbfb259f3bade675c44750e1cb84
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 23.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8887,6 +8887,38 @@
|
|
8887 |
"eval_samples_per_second": 23.782,
|
8888 |
"eval_steps_per_second": 2.973,
|
8889 |
"step": 55500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8890 |
}
|
8891 |
],
|
8892 |
"logging_steps": 100,
|
@@ -8894,7 +8926,7 @@
|
|
8894 |
"num_input_tokens_seen": 0,
|
8895 |
"num_train_epochs": 30,
|
8896 |
"save_steps": 100,
|
8897 |
-
"total_flos": 6.
|
8898 |
"train_batch_size": 8,
|
8899 |
"trial_name": null,
|
8900 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 23.447695222058513,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 55700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8887 |
"eval_samples_per_second": 23.782,
|
8888 |
"eval_steps_per_second": 2.973,
|
8889 |
"step": 55500
|
8890 |
+
},
|
8891 |
+
{
|
8892 |
+
"epoch": 23.41,
|
8893 |
+
"grad_norm": 2.3033080101013184,
|
8894 |
+
"learning_rate": 1.107703180212014e-05,
|
8895 |
+
"loss": 0.2574,
|
8896 |
+
"step": 55600
|
8897 |
+
},
|
8898 |
+
{
|
8899 |
+
"epoch": 23.41,
|
8900 |
+
"eval_cer": 0.3336567791638966,
|
8901 |
+
"eval_loss": 2.5886073112487793,
|
8902 |
+
"eval_runtime": 422.633,
|
8903 |
+
"eval_samples_per_second": 22.426,
|
8904 |
+
"eval_steps_per_second": 2.804,
|
8905 |
+
"step": 55600
|
8906 |
+
},
|
8907 |
+
{
|
8908 |
+
"epoch": 23.45,
|
8909 |
+
"grad_norm": 3.2907676696777344,
|
8910 |
+
"learning_rate": 1.100636042402827e-05,
|
8911 |
+
"loss": 0.2612,
|
8912 |
+
"step": 55700
|
8913 |
+
},
|
8914 |
+
{
|
8915 |
+
"epoch": 23.45,
|
8916 |
+
"eval_cer": 0.33410161902154784,
|
8917 |
+
"eval_loss": 2.841667413711548,
|
8918 |
+
"eval_runtime": 403.4827,
|
8919 |
+
"eval_samples_per_second": 23.49,
|
8920 |
+
"eval_steps_per_second": 2.937,
|
8921 |
+
"step": 55700
|
8922 |
}
|
8923 |
],
|
8924 |
"logging_steps": 100,
|
|
|
8926 |
"num_input_tokens_seen": 0,
|
8927 |
"num_train_epochs": 30,
|
8928 |
"save_steps": 100,
|
8929 |
+
"total_flos": 6.103646459472622e+20,
|
8930 |
"train_batch_size": 8,
|
8931 |
"trial_name": null,
|
8932 |
"trial_params": null
|