Training in progress, step 51500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6879e7a47865a2d9dbdc1dee441becb941a61f7ae9db55a4342ae07a816d700
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab07935c87ce3f230af1aca71f7d354f2ae99539d4922cd9ff951ce52ecc827f
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:691a9b667c584e2397167aa221b255d9341ff38ca1d8535cfe09f55be3c23054
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6da86477a7e38c974647d1441bec2689395584cdc4189de87cdd2736695ba37f
|
3 |
+
size 14631
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a0508acb91372d3d8167376986d8fc4095af16c3a72cecf2777e3dd527abab5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 21.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8215,6 +8215,38 @@
|
|
8215 |
"eval_samples_per_second": 24.408,
|
8216 |
"eval_steps_per_second": 3.052,
|
8217 |
"step": 51300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8218 |
}
|
8219 |
],
|
8220 |
"logging_steps": 100,
|
@@ -8222,7 +8254,7 @@
|
|
8222 |
"num_input_tokens_seen": 0,
|
8223 |
"num_train_epochs": 30,
|
8224 |
"save_steps": 100,
|
8225 |
-
"total_flos": 5.
|
8226 |
"train_batch_size": 8,
|
8227 |
"trial_name": null,
|
8228 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 21.679646390233636,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 51500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8215 |
"eval_samples_per_second": 24.408,
|
8216 |
"eval_steps_per_second": 3.052,
|
8217 |
"step": 51300
|
8218 |
+
},
|
8219 |
+
{
|
8220 |
+
"epoch": 21.64,
|
8221 |
+
"grad_norm": 1.9676817655563354,
|
8222 |
+
"learning_rate": 1.4044522968197882e-05,
|
8223 |
+
"loss": 0.3469,
|
8224 |
+
"step": 51400
|
8225 |
+
},
|
8226 |
+
{
|
8227 |
+
"epoch": 21.64,
|
8228 |
+
"eval_cer": 0.34236293066364243,
|
8229 |
+
"eval_loss": 2.332427740097046,
|
8230 |
+
"eval_runtime": 411.3092,
|
8231 |
+
"eval_samples_per_second": 23.043,
|
8232 |
+
"eval_steps_per_second": 2.881,
|
8233 |
+
"step": 51400
|
8234 |
+
},
|
8235 |
+
{
|
8236 |
+
"epoch": 21.68,
|
8237 |
+
"grad_norm": 2.4417457580566406,
|
8238 |
+
"learning_rate": 1.3973851590106007e-05,
|
8239 |
+
"loss": 0.3535,
|
8240 |
+
"step": 51500
|
8241 |
+
},
|
8242 |
+
{
|
8243 |
+
"epoch": 21.68,
|
8244 |
+
"eval_cer": 0.3423751515388526,
|
8245 |
+
"eval_loss": 2.3060784339904785,
|
8246 |
+
"eval_runtime": 391.5586,
|
8247 |
+
"eval_samples_per_second": 24.206,
|
8248 |
+
"eval_steps_per_second": 3.026,
|
8249 |
+
"step": 51500
|
8250 |
}
|
8251 |
],
|
8252 |
"logging_steps": 100,
|
|
|
8254 |
"num_input_tokens_seen": 0,
|
8255 |
"num_train_epochs": 30,
|
8256 |
"save_steps": 100,
|
8257 |
+
"total_flos": 5.643614886293618e+20,
|
8258 |
"train_batch_size": 8,
|
8259 |
"trial_name": null,
|
8260 |
"trial_params": null
|