Training in progress, step 32700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc051f6398a756522cc490f0a8f154f42b3f2b30d98bf7d60f11ac1888cdb3da
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02021cbbbec3246b6712aecc4cd14c4d4dfc08e3dbc00c5473d548a791306375
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1539346c05dd0db53e003a5edfb0638d6fd9baf85cb3d6708792dcf6b2d6967
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c999f4c1d5bb7ee3e6b99ed7a27056b2b32e84ae4ae775052bdec993bdba558
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f278373e69e9ebd2b300373366d8c349b14d9205b631b0731c93a52b7c91f8b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 13.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5207,6 +5207,38 @@
|
|
5207 |
"eval_samples_per_second": 25.515,
|
5208 |
"eval_steps_per_second": 3.19,
|
5209 |
"step": 32500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5210 |
}
|
5211 |
],
|
5212 |
"logging_steps": 100,
|
@@ -5214,7 +5246,7 @@
|
|
5214 |
"num_input_tokens_seen": 0,
|
5215 |
"num_train_epochs": 30,
|
5216 |
"save_steps": 100,
|
5217 |
-
"total_flos": 3.
|
5218 |
"train_batch_size": 8,
|
5219 |
"trial_name": null,
|
5220 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 13.765523047779414,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 32700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5207 |
"eval_samples_per_second": 25.515,
|
5208 |
"eval_steps_per_second": 3.19,
|
5209 |
"step": 32500
|
5210 |
+
},
|
5211 |
+
{
|
5212 |
+
"epoch": 13.72,
|
5213 |
+
"grad_norm": 2.935277223587036,
|
5214 |
+
"learning_rate": 2.7326501766784452e-05,
|
5215 |
+
"loss": 1.0508,
|
5216 |
+
"step": 32600
|
5217 |
+
},
|
5218 |
+
{
|
5219 |
+
"epoch": 13.72,
|
5220 |
+
"eval_cer": 0.40732665910601856,
|
5221 |
+
"eval_loss": 2.4883534908294678,
|
5222 |
+
"eval_runtime": 395.2638,
|
5223 |
+
"eval_samples_per_second": 23.979,
|
5224 |
+
"eval_steps_per_second": 2.998,
|
5225 |
+
"step": 32600
|
5226 |
+
},
|
5227 |
+
{
|
5228 |
+
"epoch": 13.77,
|
5229 |
+
"grad_norm": 2.7527854442596436,
|
5230 |
+
"learning_rate": 2.7255830388692584e-05,
|
5231 |
+
"loss": 1.0314,
|
5232 |
+
"step": 32700
|
5233 |
+
},
|
5234 |
+
{
|
5235 |
+
"epoch": 13.77,
|
5236 |
+
"eval_cer": 0.40216944976731456,
|
5237 |
+
"eval_loss": 2.4499011039733887,
|
5238 |
+
"eval_runtime": 369.8082,
|
5239 |
+
"eval_samples_per_second": 25.63,
|
5240 |
+
"eval_steps_per_second": 3.204,
|
5241 |
+
"step": 32700
|
5242 |
}
|
5243 |
],
|
5244 |
"logging_steps": 100,
|
|
|
5246 |
"num_input_tokens_seen": 0,
|
5247 |
"num_train_epochs": 30,
|
5248 |
"save_steps": 100,
|
5249 |
+
"total_flos": 3.582774517736169e+20,
|
5250 |
"train_batch_size": 8,
|
5251 |
"trial_name": null,
|
5252 |
"trial_params": null
|