Training in progress, step 32500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09eceed9906bfe27bd3680afd50c41121c73b82c7cd5ef345c69fad35c0ab8b8
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bf99370e0c7555ecb290c63576152db41e5b2dbb69d0454ac7e5f89074f7bd4
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e617199afd6c3a950fb46336c901b1ff9024ac6556123985ed6b261b689855f
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd931d9164958a9409e10cbb2086014b2f7f92c6de1fd6882064a496d5c52211
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2b9126a6c1c8e3c86ea30228e288a1b5e1ab3597232760fc5ccce02139d2524
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 13.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5175,6 +5175,38 @@
|
|
5175 |
"eval_samples_per_second": 25.799,
|
5176 |
"eval_steps_per_second": 3.226,
|
5177 |
"step": 32300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5178 |
}
|
5179 |
],
|
5180 |
"logging_steps": 100,
|
@@ -5182,7 +5214,7 @@
|
|
5182 |
"num_input_tokens_seen": 0,
|
5183 |
"num_train_epochs": 30,
|
5184 |
"save_steps": 100,
|
5185 |
-
"total_flos": 3.
|
5186 |
"train_batch_size": 8,
|
5187 |
"trial_name": null,
|
5188 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 13.681330246263945,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 32500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5175 |
"eval_samples_per_second": 25.799,
|
5176 |
"eval_steps_per_second": 3.226,
|
5177 |
"step": 32300
|
5178 |
+
},
|
5179 |
+
{
|
5180 |
+
"epoch": 13.64,
|
5181 |
+
"grad_norm": 2.740971565246582,
|
5182 |
+
"learning_rate": 2.74678445229682e-05,
|
5183 |
+
"loss": 1.0443,
|
5184 |
+
"step": 32400
|
5185 |
+
},
|
5186 |
+
{
|
5187 |
+
"epoch": 13.64,
|
5188 |
+
"eval_cer": 0.422089476359939,
|
5189 |
+
"eval_loss": 2.9092776775360107,
|
5190 |
+
"eval_runtime": 389.1697,
|
5191 |
+
"eval_samples_per_second": 24.354,
|
5192 |
+
"eval_steps_per_second": 3.045,
|
5193 |
+
"step": 32400
|
5194 |
+
},
|
5195 |
+
{
|
5196 |
+
"epoch": 13.68,
|
5197 |
+
"grad_norm": 7.612214088439941,
|
5198 |
+
"learning_rate": 2.7397173144876324e-05,
|
5199 |
+
"loss": 1.0561,
|
5200 |
+
"step": 32500
|
5201 |
+
},
|
5202 |
+
{
|
5203 |
+
"epoch": 13.68,
|
5204 |
+
"eval_cer": 0.4040587970748113,
|
5205 |
+
"eval_loss": 1.9982529878616333,
|
5206 |
+
"eval_runtime": 371.4689,
|
5207 |
+
"eval_samples_per_second": 25.515,
|
5208 |
+
"eval_steps_per_second": 3.19,
|
5209 |
+
"step": 32500
|
5210 |
}
|
5211 |
],
|
5212 |
"logging_steps": 100,
|
|
|
5214 |
"num_input_tokens_seen": 0,
|
5215 |
"num_train_epochs": 30,
|
5216 |
"save_steps": 100,
|
5217 |
+
"total_flos": 3.560483539431464e+20,
|
5218 |
"train_batch_size": 8,
|
5219 |
"trial_name": null,
|
5220 |
"trial_params": null
|