Training in progress, step 21400, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd295564c886cab31b38fdc522144c8184d26012f8017f463f8b19068b69ce54
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10129ed8317e1eb717c2b209ed001fa2e165c880f232b8ab7fc6026685e36a65
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d5ca3cb0ab23b9c64be740c79b07d64816cd7d9490aeeae08ccc4a3f3b37ddc
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a20e1a39d049694d044e652201b8a810f3562871d67cc396aaad5e0bbc2f803
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48225e4ec8a66fcccd258aec5acd97fada08046f07bbc5a0c629141f0b9c9d7d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3399,6 +3399,38 @@
|
|
3399 |
"eval_samples_per_second": 25.994,
|
3400 |
"eval_steps_per_second": 3.25,
|
3401 |
"step": 21200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3402 |
}
|
3403 |
],
|
3404 |
"logging_steps": 100,
|
@@ -3406,7 +3438,7 @@
|
|
3406 |
"num_input_tokens_seen": 0,
|
3407 |
"num_train_epochs": 30,
|
3408 |
"save_steps": 100,
|
3409 |
-
"total_flos": 2.
|
3410 |
"train_batch_size": 8,
|
3411 |
"trial_name": null,
|
3412 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.008629762155335,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 21400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3399 |
"eval_samples_per_second": 25.994,
|
3400 |
"eval_steps_per_second": 3.25,
|
3401 |
"step": 21200
|
3402 |
+
},
|
3403 |
+
{
|
3404 |
+
"epoch": 8.97,
|
3405 |
+
"grad_norm": 3.520254135131836,
|
3406 |
+
"learning_rate": 3.53095406360424e-05,
|
3407 |
+
"loss": 1.2819,
|
3408 |
+
"step": 21300
|
3409 |
+
},
|
3410 |
+
{
|
3411 |
+
"epoch": 8.97,
|
3412 |
+
"eval_cer": 0.44700784091353485,
|
3413 |
+
"eval_loss": 1.840844988822937,
|
3414 |
+
"eval_runtime": 393.1165,
|
3415 |
+
"eval_samples_per_second": 24.11,
|
3416 |
+
"eval_steps_per_second": 3.014,
|
3417 |
+
"step": 21300
|
3418 |
+
},
|
3419 |
+
{
|
3420 |
+
"epoch": 9.01,
|
3421 |
+
"grad_norm": 1.5368082523345947,
|
3422 |
+
"learning_rate": 3.523886925795053e-05,
|
3423 |
+
"loss": 1.2587,
|
3424 |
+
"step": 21400
|
3425 |
+
},
|
3426 |
+
{
|
3427 |
+
"epoch": 9.01,
|
3428 |
+
"eval_cer": 0.43732890774705724,
|
3429 |
+
"eval_loss": 2.2236878871917725,
|
3430 |
+
"eval_runtime": 363.5106,
|
3431 |
+
"eval_samples_per_second": 26.074,
|
3432 |
+
"eval_steps_per_second": 3.26,
|
3433 |
+
"step": 21400
|
3434 |
}
|
3435 |
],
|
3436 |
"logging_steps": 100,
|
|
|
3438 |
"num_input_tokens_seen": 0,
|
3439 |
"num_train_epochs": 30,
|
3440 |
"save_steps": 100,
|
3441 |
+
"total_flos": 2.3455923104904626e+20,
|
3442 |
"train_batch_size": 8,
|
3443 |
"trial_name": null,
|
3444 |
"trial_params": null
|