Training in progress, step 26700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff3cdcb7d4b7ed611afcd9ebd6643d02bc2e1b35c4bb74c08c50cf9a5f561a7
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3b6b9e0933269179cf5434f263f8f9d9d7b6fff1b073c6931905cc4b53994b5
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad304c52a9c03014020a10f709aaf27545830076ddab93a26c7de57de9dde158
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:840a7138cc3ba7fbc10190e1e4c329f4c41a56a5385a8c0310bf23bf7b9dc752
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ac896d0e921c51d399d94984f4a87a03619430db29ebc79fed180feeab8b5af
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4247,6 +4247,38 @@
|
|
4247 |
"eval_samples_per_second": 25.322,
|
4248 |
"eval_steps_per_second": 3.166,
|
4249 |
"step": 26500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4250 |
}
|
4251 |
],
|
4252 |
"logging_steps": 100,
|
@@ -4254,7 +4286,7 @@
|
|
4254 |
"num_input_tokens_seen": 0,
|
4255 |
"num_train_epochs": 30,
|
4256 |
"save_steps": 100,
|
4257 |
-
"total_flos": 2.
|
4258 |
"train_batch_size": 8,
|
4259 |
"trial_name": null,
|
4260 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.239739002315302,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 26700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4247 |
"eval_samples_per_second": 25.322,
|
4248 |
"eval_steps_per_second": 3.166,
|
4249 |
"step": 26500
|
4250 |
+
},
|
4251 |
+
{
|
4252 |
+
"epoch": 11.2,
|
4253 |
+
"grad_norm": 2.1193289756774902,
|
4254 |
+
"learning_rate": 3.156537102473498e-05,
|
4255 |
+
"loss": 1.1587,
|
4256 |
+
"step": 26600
|
4257 |
+
},
|
4258 |
+
{
|
4259 |
+
"epoch": 11.2,
|
4260 |
+
"eval_cer": 0.4244921004262641,
|
4261 |
+
"eval_loss": 2.2303755283355713,
|
4262 |
+
"eval_runtime": 390.1983,
|
4263 |
+
"eval_samples_per_second": 24.29,
|
4264 |
+
"eval_steps_per_second": 3.037,
|
4265 |
+
"step": 26600
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 11.24,
|
4269 |
+
"grad_norm": 13.682442665100098,
|
4270 |
+
"learning_rate": 3.149469964664311e-05,
|
4271 |
+
"loss": 1.1569,
|
4272 |
+
"step": 26700
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 11.24,
|
4276 |
+
"eval_cer": 0.4308762856360721,
|
4277 |
+
"eval_loss": 1.715321660041809,
|
4278 |
+
"eval_runtime": 368.2313,
|
4279 |
+
"eval_samples_per_second": 25.739,
|
4280 |
+
"eval_steps_per_second": 3.218,
|
4281 |
+
"step": 26700
|
4282 |
}
|
4283 |
],
|
4284 |
"logging_steps": 100,
|
|
|
4286 |
"num_input_tokens_seen": 0,
|
4287 |
"num_train_epochs": 30,
|
4288 |
"save_steps": 100,
|
4289 |
+
"total_flos": 2.9261113218403457e+20,
|
4290 |
"train_batch_size": 8,
|
4291 |
"trial_name": null,
|
4292 |
"trial_params": null
|