Training in progress, step 46900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dfbe3fca61cc77c707b6d697b41387c813bc32fd6d32bd5c1c26762aa6213e5
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a79ff95105c45438b111e350a13810a36dce99b6fcbea094936e85e5e0a2e3ab
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96fafe221adf68f53b28e28362f927e1c10f230fee3f19fb1f737d95674a280f
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a65da35c1d2659bcfb09a26ed28519b60a522956dd7f331870d788477aa1a474
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3e7a17c080461acc65ee185f2f70c19263f58e165eb35d2d33f300ca2a7104a
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 19.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7479,6 +7479,38 @@
|
|
7479 |
"eval_samples_per_second": 25.691,
|
7480 |
"eval_steps_per_second": 3.212,
|
7481 |
"step": 46700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7482 |
}
|
7483 |
],
|
7484 |
"logging_steps": 100,
|
@@ -7486,7 +7518,7 @@
|
|
7486 |
"num_input_tokens_seen": 0,
|
7487 |
"num_train_epochs": 30,
|
7488 |
"save_steps": 100,
|
7489 |
-
"total_flos": 5.
|
7490 |
"train_batch_size": 8,
|
7491 |
"trial_name": null,
|
7492 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 19.743211955377816,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 46900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7479 |
"eval_samples_per_second": 25.691,
|
7480 |
"eval_steps_per_second": 3.212,
|
7481 |
"step": 46700
|
7482 |
+
},
|
7483 |
+
{
|
7484 |
+
"epoch": 19.7,
|
7485 |
+
"grad_norm": 2.5890541076660156,
|
7486 |
+
"learning_rate": 1.7294699646643112e-05,
|
7487 |
+
"loss": 0.4757,
|
7488 |
+
"step": 46800
|
7489 |
+
},
|
7490 |
+
{
|
7491 |
+
"epoch": 19.7,
|
7492 |
+
"eval_cer": 0.3484415939931954,
|
7493 |
+
"eval_loss": 2.6541638374328613,
|
7494 |
+
"eval_runtime": 380.984,
|
7495 |
+
"eval_samples_per_second": 24.878,
|
7496 |
+
"eval_steps_per_second": 3.11,
|
7497 |
+
"step": 46800
|
7498 |
+
},
|
7499 |
+
{
|
7500 |
+
"epoch": 19.74,
|
7501 |
+
"grad_norm": 5.675697326660156,
|
7502 |
+
"learning_rate": 1.7224028268551237e-05,
|
7503 |
+
"loss": 0.506,
|
7504 |
+
"step": 46900
|
7505 |
+
},
|
7506 |
+
{
|
7507 |
+
"epoch": 19.74,
|
7508 |
+
"eval_cer": 0.3504775918032146,
|
7509 |
+
"eval_loss": 2.4880096912384033,
|
7510 |
+
"eval_runtime": 369.3344,
|
7511 |
+
"eval_samples_per_second": 25.662,
|
7512 |
+
"eval_steps_per_second": 3.208,
|
7513 |
+
"step": 46900
|
7514 |
}
|
7515 |
],
|
7516 |
"logging_steps": 100,
|
|
|
7518 |
"num_input_tokens_seen": 0,
|
7519 |
"num_train_epochs": 30,
|
7520 |
"save_steps": 100,
|
7521 |
+
"total_flos": 5.13961492307041e+20,
|
7522 |
"train_batch_size": 8,
|
7523 |
"trial_name": null,
|
7524 |
"trial_params": null
|