Training in progress, step 41000, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3f31e821b74dd2aecadf4a49ced761482cb24335fa070340f440dfac62ebd15
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff6ee0aff95bbd15a39f1188865d0051383c510dd4594f6c062ec608c68dc92c
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:440e01e057f577e2f701487c44c7f69aaeaa1240bf4c1499312f3532285ec4eb
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68b009d8e28cf1d32b3e47f2da155cb0f031794cbe7f69274b2edda21d846b19
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ddc5f7d6b9326460646ca0179f41d1dca732c5948fbb1407ab5e2beb6ca76c2
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 17.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6535,6 +6535,38 @@
|
|
6535 |
"eval_samples_per_second": 24.92,
|
6536 |
"eval_steps_per_second": 3.116,
|
6537 |
"step": 40800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6538 |
}
|
6539 |
],
|
6540 |
"logging_steps": 100,
|
@@ -6542,7 +6574,7 @@
|
|
6542 |
"num_input_tokens_seen": 0,
|
6543 |
"num_train_epochs": 30,
|
6544 |
"save_steps": 100,
|
6545 |
-
"total_flos": 4.
|
6546 |
"train_batch_size": 8,
|
6547 |
"trial_name": null,
|
6548 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 17.259524310671438,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 41000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6535 |
"eval_samples_per_second": 24.92,
|
6536 |
"eval_steps_per_second": 3.116,
|
6537 |
"step": 40800
|
6538 |
+
},
|
6539 |
+
{
|
6540 |
+
"epoch": 17.22,
|
6541 |
+
"grad_norm": 7.374648094177246,
|
6542 |
+
"learning_rate": 2.146219081272085e-05,
|
6543 |
+
"loss": 0.6245,
|
6544 |
+
"step": 40900
|
6545 |
+
},
|
6546 |
+
{
|
6547 |
+
"epoch": 17.22,
|
6548 |
+
"eval_cer": 0.36244916115912557,
|
6549 |
+
"eval_loss": 2.089902400970459,
|
6550 |
+
"eval_runtime": 399.0568,
|
6551 |
+
"eval_samples_per_second": 23.751,
|
6552 |
+
"eval_steps_per_second": 2.97,
|
6553 |
+
"step": 40900
|
6554 |
+
},
|
6555 |
+
{
|
6556 |
+
"epoch": 17.26,
|
6557 |
+
"grad_norm": 27.956933975219727,
|
6558 |
+
"learning_rate": 2.1391519434628975e-05,
|
6559 |
+
"loss": 0.6492,
|
6560 |
+
"step": 41000
|
6561 |
+
},
|
6562 |
+
{
|
6563 |
+
"epoch": 17.26,
|
6564 |
+
"eval_cer": 0.36416741621367954,
|
6565 |
+
"eval_loss": 2.015270948410034,
|
6566 |
+
"eval_runtime": 376.6268,
|
6567 |
+
"eval_samples_per_second": 25.165,
|
6568 |
+
"eval_steps_per_second": 3.146,
|
6569 |
+
"step": 41000
|
6570 |
}
|
6571 |
],
|
6572 |
"logging_steps": 100,
|
|
|
6574 |
"num_input_tokens_seen": 0,
|
6575 |
"num_train_epochs": 30,
|
6576 |
"save_steps": 100,
|
6577 |
+
"total_flos": 4.49389546173303e+20,
|
6578 |
"train_batch_size": 8,
|
6579 |
"trial_name": null,
|
6580 |
"trial_params": null
|