Training in progress, step 47700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c737cef3e8496bd78bd7ec0023f68befe3d9a9c05598534030ccd26c85918ac6
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2723715a10a82482c591b82cb51edd59abae6401d5ba96ea07090b181b9d643
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b283c3ae87191de5fca07697a3974a9d234e69848cf8838bc28bee17cf75ce3
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:392684c134f5cf29a9c14bb0a154c1d7d4d520741978198c633be27f537faa31
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9916ddee38224af3ba1b5cf7e13befa56143fa30905487537b8489305fe5fcae
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7607,6 +7607,38 @@
|
|
7607 |
"eval_samples_per_second": 24.219,
|
7608 |
"eval_steps_per_second": 3.028,
|
7609 |
"step": 47500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7610 |
}
|
7611 |
],
|
7612 |
"logging_steps": 100,
|
@@ -7614,7 +7646,7 @@
|
|
7614 |
"num_input_tokens_seen": 0,
|
7615 |
"num_train_epochs": 30,
|
7616 |
"save_steps": 100,
|
7617 |
-
"total_flos": 5.
|
7618 |
"train_batch_size": 8,
|
7619 |
"trial_name": null,
|
7620 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.079983161439696,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 47700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7607 |
"eval_samples_per_second": 24.219,
|
7608 |
"eval_steps_per_second": 3.028,
|
7609 |
"step": 47500
|
7610 |
+
},
|
7611 |
+
{
|
7612 |
+
"epoch": 20.04,
|
7613 |
+
"grad_norm": 2.881988525390625,
|
7614 |
+
"learning_rate": 1.6729328621908126e-05,
|
7615 |
+
"loss": 0.4024,
|
7616 |
+
"step": 47600
|
7617 |
+
},
|
7618 |
+
{
|
7619 |
+
"epoch": 20.04,
|
7620 |
+
"eval_cer": 0.3465546908607407,
|
7621 |
+
"eval_loss": 2.992396831512451,
|
7622 |
+
"eval_runtime": 406.9581,
|
7623 |
+
"eval_samples_per_second": 23.29,
|
7624 |
+
"eval_steps_per_second": 2.912,
|
7625 |
+
"step": 47600
|
7626 |
+
},
|
7627 |
+
{
|
7628 |
+
"epoch": 20.08,
|
7629 |
+
"grad_norm": 2.220200777053833,
|
7630 |
+
"learning_rate": 1.6658657243816255e-05,
|
7631 |
+
"loss": 0.4103,
|
7632 |
+
"step": 47700
|
7633 |
+
},
|
7634 |
+
{
|
7635 |
+
"epoch": 20.08,
|
7636 |
+
"eval_cer": 0.3478818779085683,
|
7637 |
+
"eval_loss": 3.0730020999908447,
|
7638 |
+
"eval_runtime": 382.3538,
|
7639 |
+
"eval_samples_per_second": 24.789,
|
7640 |
+
"eval_steps_per_second": 3.099,
|
7641 |
+
"step": 47700
|
7642 |
}
|
7643 |
],
|
7644 |
"logging_steps": 100,
|
|
|
7646 |
"num_input_tokens_seen": 0,
|
7647 |
"num_train_epochs": 30,
|
7648 |
"save_steps": 100,
|
7649 |
+
"total_flos": 5.227217240325222e+20,
|
7650 |
"train_batch_size": 8,
|
7651 |
"trial_name": null,
|
7652 |
"trial_params": null
|