Training in progress, step 41400, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8a24d4d54212b7695bf8a287d4f8eb9270a6d60d13f9a931c5779f13538973f
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c05975af95ab79acf8a310695dfc38749b93829e001cc546ee9d4792a95bad7
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe48223f5c90918abbcc4c7259c7f6f8efbe538b78aea4ef8ef1741ac5a2fe8c
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52d26f522d8cbc1a8729bb0644c4ccc57cbd2a2a52d55e5adac8c2487f5ed38e
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc876a1c5dec1820929e730793e2c174c8e91d009bdd7e30b3f49cc015882f56
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 17.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6567,6 +6567,70 @@
|
|
6567 |
"eval_samples_per_second": 25.165,
|
6568 |
"eval_steps_per_second": 3.146,
|
6569 |
"step": 41000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6570 |
}
|
6571 |
],
|
6572 |
"logging_steps": 100,
|
@@ -6574,7 +6638,7 @@
|
|
6574 |
"num_input_tokens_seen": 0,
|
6575 |
"num_train_epochs": 30,
|
6576 |
"save_steps": 100,
|
6577 |
-
"total_flos": 4.
|
6578 |
"train_batch_size": 8,
|
6579 |
"trial_name": null,
|
6580 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 17.42790991370238,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 41400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6567 |
"eval_samples_per_second": 25.165,
|
6568 |
"eval_steps_per_second": 3.146,
|
6569 |
"step": 41000
|
6570 |
+
},
|
6571 |
+
{
|
6572 |
+
"epoch": 17.3,
|
6573 |
+
"grad_norm": 5.671442031860352,
|
6574 |
+
"learning_rate": 2.1320848056537103e-05,
|
6575 |
+
"loss": 0.6544,
|
6576 |
+
"step": 41100
|
6577 |
+
},
|
6578 |
+
{
|
6579 |
+
"epoch": 17.3,
|
6580 |
+
"eval_cer": 0.3657659066911736,
|
6581 |
+
"eval_loss": 1.7072508335113525,
|
6582 |
+
"eval_runtime": 397.2129,
|
6583 |
+
"eval_samples_per_second": 23.861,
|
6584 |
+
"eval_steps_per_second": 2.983,
|
6585 |
+
"step": 41100
|
6586 |
+
},
|
6587 |
+
{
|
6588 |
+
"epoch": 17.34,
|
6589 |
+
"grad_norm": 1.6116443872451782,
|
6590 |
+
"learning_rate": 2.125017667844523e-05,
|
6591 |
+
"loss": 0.6304,
|
6592 |
+
"step": 41200
|
6593 |
+
},
|
6594 |
+
{
|
6595 |
+
"epoch": 17.34,
|
6596 |
+
"eval_cer": 0.36735950881858354,
|
6597 |
+
"eval_loss": 2.110800266265869,
|
6598 |
+
"eval_runtime": 379.2769,
|
6599 |
+
"eval_samples_per_second": 24.99,
|
6600 |
+
"eval_steps_per_second": 3.124,
|
6601 |
+
"step": 41200
|
6602 |
+
},
|
6603 |
+
{
|
6604 |
+
"epoch": 17.39,
|
6605 |
+
"grad_norm": 2.9379818439483643,
|
6606 |
+
"learning_rate": 2.117950530035336e-05,
|
6607 |
+
"loss": 0.6426,
|
6608 |
+
"step": 41300
|
6609 |
+
},
|
6610 |
+
{
|
6611 |
+
"epoch": 17.39,
|
6612 |
+
"eval_cer": 0.3644313871182199,
|
6613 |
+
"eval_loss": 2.184722900390625,
|
6614 |
+
"eval_runtime": 397.6084,
|
6615 |
+
"eval_samples_per_second": 23.838,
|
6616 |
+
"eval_steps_per_second": 2.98,
|
6617 |
+
"step": 41300
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 17.43,
|
6621 |
+
"grad_norm": 2.8842365741729736,
|
6622 |
+
"learning_rate": 2.1108833922261484e-05,
|
6623 |
+
"loss": 0.6913,
|
6624 |
+
"step": 41400
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 17.43,
|
6628 |
+
"eval_cer": 0.36425785069023503,
|
6629 |
+
"eval_loss": 1.9356818199157715,
|
6630 |
+
"eval_runtime": 381.9605,
|
6631 |
+
"eval_samples_per_second": 24.814,
|
6632 |
+
"eval_steps_per_second": 3.102,
|
6633 |
+
"step": 41400
|
6634 |
}
|
6635 |
],
|
6636 |
"logging_steps": 100,
|
|
|
6638 |
"num_input_tokens_seen": 0,
|
6639 |
"num_train_epochs": 30,
|
6640 |
"save_steps": 100,
|
6641 |
+
"total_flos": 4.5372888758914744e+20,
|
6642 |
"train_batch_size": 8,
|
6643 |
"trial_name": null,
|
6644 |
"trial_params": null
|