Training in progress, step 54300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:261f5aa29d796648d142454d356754c0daa58ea249148b27851231430a96995c
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35e6973bc5c3aceef25441e3d061e46116955f4cf9ff95f3edcc89fd5823832b
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b57c1cb981aa093ad536532c531f30dd1534cd1ce4054df1e5da8e31c553687
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9c9224ba23bc6336e062987353c9877aacd7d8246979cd20e730d3da7787040
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1d6f240808a249e74b97cfcaffd4c82e145821d5e3022d1fd6a98866547498c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 22.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8663,6 +8663,38 @@
|
|
8663 |
"eval_samples_per_second": 24.031,
|
8664 |
"eval_steps_per_second": 3.004,
|
8665 |
"step": 54100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8666 |
}
|
8667 |
],
|
8668 |
"logging_steps": 100,
|
@@ -8670,7 +8702,7 @@
|
|
8670 |
"num_input_tokens_seen": 0,
|
8671 |
"num_train_epochs": 30,
|
8672 |
"save_steps": 100,
|
8673 |
-
"total_flos": 5.
|
8674 |
"train_batch_size": 8,
|
8675 |
"trial_name": null,
|
8676 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 22.858345611450222,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 54300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8663 |
"eval_samples_per_second": 24.031,
|
8664 |
"eval_steps_per_second": 3.004,
|
8665 |
"step": 54100
|
8666 |
+
},
|
8667 |
+
{
|
8668 |
+
"epoch": 22.82,
|
8669 |
+
"grad_norm": 3.5150487422943115,
|
8670 |
+
"learning_rate": 1.206643109540636e-05,
|
8671 |
+
"loss": 0.3035,
|
8672 |
+
"step": 54200
|
8673 |
+
},
|
8674 |
+
{
|
8675 |
+
"epoch": 22.82,
|
8676 |
+
"eval_cer": 0.3343362598255837,
|
8677 |
+
"eval_loss": 2.274048328399658,
|
8678 |
+
"eval_runtime": 434.158,
|
8679 |
+
"eval_samples_per_second": 21.831,
|
8680 |
+
"eval_steps_per_second": 2.729,
|
8681 |
+
"step": 54200
|
8682 |
+
},
|
8683 |
+
{
|
8684 |
+
"epoch": 22.86,
|
8685 |
+
"grad_norm": 2.194784641265869,
|
8686 |
+
"learning_rate": 1.1995759717314487e-05,
|
8687 |
+
"loss": 0.3109,
|
8688 |
+
"step": 54300
|
8689 |
+
},
|
8690 |
+
{
|
8691 |
+
"epoch": 22.86,
|
8692 |
+
"eval_cer": 0.33527482304172695,
|
8693 |
+
"eval_loss": 2.228667736053467,
|
8694 |
+
"eval_runtime": 394.9356,
|
8695 |
+
"eval_samples_per_second": 23.999,
|
8696 |
+
"eval_steps_per_second": 3.0,
|
8697 |
+
"step": 54300
|
8698 |
}
|
8699 |
],
|
8700 |
"logging_steps": 100,
|
|
|
8702 |
"num_input_tokens_seen": 0,
|
8703 |
"num_train_epochs": 30,
|
8704 |
"save_steps": 100,
|
8705 |
+
"total_flos": 5.949889515390252e+20,
|
8706 |
"train_batch_size": 8,
|
8707 |
"trial_name": null,
|
8708 |
"trial_params": null
|