Training in progress, step 47500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67401216b3029d2622e7e7add3d17e2a39d754ad2562031455d96e2f627ae67b
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6921906aba6ad6cda84ac2dad9392fde118c107f9c07cce5a4b585abe35d446d
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb52b0ddcb7b49c326f60274849ef6aa3c79a378160d3f09440dbe910b761c44
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c20e682f3fe40fa57d88e22486ddd5912b58563883c794fa3b1d8da05268377
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:372a4198b83fdb5e8fb2ecd27baf4c7bfc16f7ae1e02bdddaa16958cd86125f9
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 19.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7575,6 +7575,38 @@
|
|
7575 |
"eval_samples_per_second": 24.565,
|
7576 |
"eval_steps_per_second": 3.071,
|
7577 |
"step": 47300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7578 |
}
|
7579 |
],
|
7580 |
"logging_steps": 100,
|
@@ -7582,7 +7614,7 @@
|
|
7582 |
"num_input_tokens_seen": 0,
|
7583 |
"num_train_epochs": 30,
|
7584 |
"save_steps": 100,
|
7585 |
-
"total_flos": 5.
|
7586 |
"train_batch_size": 8,
|
7587 |
"trial_name": null,
|
7588 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 19.995790359924225,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 47500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7575 |
"eval_samples_per_second": 24.565,
|
7576 |
"eval_steps_per_second": 3.071,
|
7577 |
"step": 47300
|
7578 |
+
},
|
7579 |
+
{
|
7580 |
+
"epoch": 19.95,
|
7581 |
+
"grad_norm": 3.6561086177825928,
|
7582 |
+
"learning_rate": 1.6870671378091874e-05,
|
7583 |
+
"loss": 0.4914,
|
7584 |
+
"step": 47400
|
7585 |
+
},
|
7586 |
+
{
|
7587 |
+
"epoch": 19.95,
|
7588 |
+
"eval_cer": 0.3475079191271362,
|
7589 |
+
"eval_loss": 2.747345447540283,
|
7590 |
+
"eval_runtime": 406.0174,
|
7591 |
+
"eval_samples_per_second": 23.344,
|
7592 |
+
"eval_steps_per_second": 2.919,
|
7593 |
+
"step": 47400
|
7594 |
+
},
|
7595 |
+
{
|
7596 |
+
"epoch": 20.0,
|
7597 |
+
"grad_norm": 2.65822696685791,
|
7598 |
+
"learning_rate": 1.6800000000000002e-05,
|
7599 |
+
"loss": 0.4636,
|
7600 |
+
"step": 47500
|
7601 |
+
},
|
7602 |
+
{
|
7603 |
+
"epoch": 20.0,
|
7604 |
+
"eval_cer": 0.34717795549646085,
|
7605 |
+
"eval_loss": 3.2108983993530273,
|
7606 |
+
"eval_runtime": 391.3482,
|
7607 |
+
"eval_samples_per_second": 24.219,
|
7608 |
+
"eval_steps_per_second": 3.028,
|
7609 |
+
"step": 47500
|
7610 |
}
|
7611 |
],
|
7612 |
"logging_steps": 100,
|
|
|
7614 |
"num_input_tokens_seen": 0,
|
7615 |
"num_train_epochs": 30,
|
7616 |
"save_steps": 100,
|
7617 |
+
"total_flos": 5.20459391384764e+20,
|
7618 |
"train_batch_size": 8,
|
7619 |
"trial_name": null,
|
7620 |
"trial_params": null
|