Training in progress, step 16900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87bc7c877cdfbe54847b102871e2d4073e1e3b3e77f4628403c62609d4215ee0
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa0adf8ce583fad0205dd992aeb22f4bb05366efc58cf6b0feae933acf7733a
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1057f87e50786d1aca543245abd8aa5218ae3920b5ed10328f1cc6e8d05b8fb8
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70bc272cae15954849873b08592843a4166415a3756dbd7282f8116aed448377
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5e32e74e7ac82af1d3f98e8ce057551c88fd1fff091ef003d7c5821593146b8
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2679,6 +2679,38 @@
|
|
2679 |
"eval_samples_per_second": 26.016,
|
2680 |
"eval_steps_per_second": 3.253,
|
2681 |
"step": 16700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2682 |
}
|
2683 |
],
|
2684 |
"logging_steps": 100,
|
@@ -2686,7 +2718,7 @@
|
|
2686 |
"num_input_tokens_seen": 0,
|
2687 |
"num_train_epochs": 30,
|
2688 |
"save_steps": 100,
|
2689 |
-
"total_flos": 1.
|
2690 |
"train_batch_size": 8,
|
2691 |
"trial_name": null,
|
2692 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.1142917280572515,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 16900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2679 |
"eval_samples_per_second": 26.016,
|
2680 |
"eval_steps_per_second": 3.253,
|
2681 |
"step": 16700
|
2682 |
+
},
|
2683 |
+
{
|
2684 |
+
"epoch": 7.07,
|
2685 |
+
"grad_norm": 4.643332004547119,
|
2686 |
+
"learning_rate": 3.848833922261484e-05,
|
2687 |
+
"loss": 1.3365,
|
2688 |
+
"step": 16800
|
2689 |
+
},
|
2690 |
+
{
|
2691 |
+
"epoch": 7.07,
|
2692 |
+
"eval_cer": 0.46146024793711626,
|
2693 |
+
"eval_loss": 1.8595181703567505,
|
2694 |
+
"eval_runtime": 374.6805,
|
2695 |
+
"eval_samples_per_second": 25.296,
|
2696 |
+
"eval_steps_per_second": 3.163,
|
2697 |
+
"step": 16800
|
2698 |
+
},
|
2699 |
+
{
|
2700 |
+
"epoch": 7.11,
|
2701 |
+
"grad_norm": 2.433307409286499,
|
2702 |
+
"learning_rate": 3.841766784452297e-05,
|
2703 |
+
"loss": 1.6175,
|
2704 |
+
"step": 16900
|
2705 |
+
},
|
2706 |
+
{
|
2707 |
+
"epoch": 7.11,
|
2708 |
+
"eval_cer": 0.468870986664581,
|
2709 |
+
"eval_loss": 1.7079046964645386,
|
2710 |
+
"eval_runtime": 364.7345,
|
2711 |
+
"eval_samples_per_second": 25.986,
|
2712 |
+
"eval_steps_per_second": 3.249,
|
2713 |
+
"step": 16900
|
2714 |
}
|
2715 |
],
|
2716 |
"logging_steps": 100,
|
|
|
2718 |
"num_input_tokens_seen": 0,
|
2719 |
"num_train_epochs": 30,
|
2720 |
"save_steps": 100,
|
2721 |
+
"total_flos": 1.8525968673735387e+20,
|
2722 |
"train_batch_size": 8,
|
2723 |
"trial_name": null,
|
2724 |
"trial_params": null
|