Training in progress, step 28500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9d7a9a80fc16ae19f3ee016e1f87ebbe28f8e5c54d2dee20067ac1375b3a699
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6412766255f9548b27afe82cfd466bfa83c0ceff5e923f6bd16707cb31228ca1
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb1d84e520f4f8a65b7489c9592eba3324b25ef0380fb698f857e6540f071004
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54b7c6a6814674e66826f920adadd693d50e8c9564cda211cba81879f559c0af
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf7c52a861212ad700d353d281972076097e14210eec92b789d70cbdc6a0277
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4535,6 +4535,38 @@
|
|
4535 |
"eval_samples_per_second": 25.816,
|
4536 |
"eval_steps_per_second": 3.228,
|
4537 |
"step": 28300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4538 |
}
|
4539 |
],
|
4540 |
"logging_steps": 100,
|
@@ -4542,7 +4574,7 @@
|
|
4542 |
"num_input_tokens_seen": 0,
|
4543 |
"num_train_epochs": 30,
|
4544 |
"save_steps": 100,
|
4545 |
-
"total_flos": 3.
|
4546 |
"train_batch_size": 8,
|
4547 |
"trial_name": null,
|
4548 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.997474215954536,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 28500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4535 |
"eval_samples_per_second": 25.816,
|
4536 |
"eval_steps_per_second": 3.228,
|
4537 |
"step": 28300
|
4538 |
+
},
|
4539 |
+
{
|
4540 |
+
"epoch": 11.96,
|
4541 |
+
"grad_norm": 13.059289932250977,
|
4542 |
+
"learning_rate": 3.0293992932862192e-05,
|
4543 |
+
"loss": 1.1318,
|
4544 |
+
"step": 28400
|
4545 |
+
},
|
4546 |
+
{
|
4547 |
+
"epoch": 11.96,
|
4548 |
+
"eval_cer": 0.4224341050408666,
|
4549 |
+
"eval_loss": 2.1082444190979004,
|
4550 |
+
"eval_runtime": 393.3473,
|
4551 |
+
"eval_samples_per_second": 24.096,
|
4552 |
+
"eval_steps_per_second": 3.013,
|
4553 |
+
"step": 28400
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 12.0,
|
4557 |
+
"grad_norm": 5.903038501739502,
|
4558 |
+
"learning_rate": 3.0223321554770317e-05,
|
4559 |
+
"loss": 1.1131,
|
4560 |
+
"step": 28500
|
4561 |
+
},
|
4562 |
+
{
|
4563 |
+
"epoch": 12.0,
|
4564 |
+
"eval_cer": 0.41807125259082556,
|
4565 |
+
"eval_loss": 1.7783021926879883,
|
4566 |
+
"eval_runtime": 365.8673,
|
4567 |
+
"eval_samples_per_second": 25.906,
|
4568 |
+
"eval_steps_per_second": 3.239,
|
4569 |
+
"step": 28500
|
4570 |
}
|
4571 |
],
|
4572 |
"logging_steps": 100,
|
|
|
4574 |
"num_input_tokens_seen": 0,
|
4575 |
"num_train_epochs": 30,
|
4576 |
"save_steps": 100,
|
4577 |
+
"total_flos": 3.122274035580518e+20,
|
4578 |
"train_batch_size": 8,
|
4579 |
"trial_name": null,
|
4580 |
"trial_params": null
|