Training in progress, step 50100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c467a55474359ea150af7f199402cf84590658e0255a9165922970cef84e170
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:489d7ba688753abe3e33833a76b4ae882115ed3d90c66c7a1a12d1ffa7f82528
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f413dee5c854704d5963137b5956e9d25624be5f512c8185c3af5befb0aca4f7
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7672fe3061dcd7690545b653f58f4ee116feae4c3b67a28c5239aa57cd8e646c
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08ae60acffa23a34f3e2f8fcc14339c1e29295c511dc2b5dad5d07178f8a8bf5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 21.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7991,6 +7991,38 @@
|
|
7991 |
"eval_samples_per_second": 24.261,
|
7992 |
"eval_steps_per_second": 3.033,
|
7993 |
"step": 49900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7994 |
}
|
7995 |
],
|
7996 |
"logging_steps": 100,
|
@@ -7998,7 +8030,7 @@
|
|
7998 |
"num_input_tokens_seen": 0,
|
7999 |
"num_train_epochs": 30,
|
8000 |
"save_steps": 100,
|
8001 |
-
"total_flos": 5.
|
8002 |
"train_batch_size": 8,
|
8003 |
"trial_name": null,
|
8004 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 21.09029677962534,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 50100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7991 |
"eval_samples_per_second": 24.261,
|
7992 |
"eval_steps_per_second": 3.033,
|
7993 |
"step": 49900
|
7994 |
+
},
|
7995 |
+
{
|
7996 |
+
"epoch": 21.05,
|
7997 |
+
"grad_norm": 4.511066913604736,
|
7998 |
+
"learning_rate": 1.50339222614841e-05,
|
7999 |
+
"loss": 0.3451,
|
8000 |
+
"step": 50000
|
8001 |
+
},
|
8002 |
+
{
|
8003 |
+
"epoch": 21.05,
|
8004 |
+
"eval_cer": 0.34278332877087325,
|
8005 |
+
"eval_loss": 2.672588348388672,
|
8006 |
+
"eval_runtime": 415.0883,
|
8007 |
+
"eval_samples_per_second": 22.834,
|
8008 |
+
"eval_steps_per_second": 2.855,
|
8009 |
+
"step": 50000
|
8010 |
+
},
|
8011 |
+
{
|
8012 |
+
"epoch": 21.09,
|
8013 |
+
"grad_norm": 3.952530860900879,
|
8014 |
+
"learning_rate": 1.4963250883392227e-05,
|
8015 |
+
"loss": 0.3449,
|
8016 |
+
"step": 50100
|
8017 |
+
},
|
8018 |
+
{
|
8019 |
+
"epoch": 21.09,
|
8020 |
+
"eval_cer": 0.3425169136912909,
|
8021 |
+
"eval_loss": 2.945607900619507,
|
8022 |
+
"eval_runtime": 386.6565,
|
8023 |
+
"eval_samples_per_second": 24.513,
|
8024 |
+
"eval_steps_per_second": 3.065,
|
8025 |
+
"step": 50100
|
8026 |
}
|
8027 |
],
|
8028 |
"logging_steps": 100,
|
|
|
8030 |
"num_input_tokens_seen": 0,
|
8031 |
"num_train_epochs": 30,
|
8032 |
"save_steps": 100,
|
8033 |
+
"total_flos": 5.490253620598738e+20,
|
8034 |
"train_batch_size": 8,
|
8035 |
"trial_name": null,
|
8036 |
"trial_params": null
|