Training in progress, step 12700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c6e9580fb798101735047d737cee85a6bb1061fad515187b2725aaa63bfedfc
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e97e01dc64ab5637bb404489fce4bc374a9d40ac15f27ef1b88fa4b9f545c28
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23b5eb8171ce3b32f1473da5c445fd724fddce4624bb8f3de388a3614ee04629
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d6c38bb1634ce2d5111d8f0da0ee0f0eeb90e7348bc821241c2773d71d06af6
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5e2d9a4da1dca4e718c671213c3210706697de61ee8df6908344ec05bce806c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2007,6 +2007,38 @@
|
|
2007 |
"eval_samples_per_second": 24.817,
|
2008 |
"eval_steps_per_second": 3.103,
|
2009 |
"step": 12500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2010 |
}
|
2011 |
],
|
2012 |
"logging_steps": 100,
|
@@ -2014,7 +2046,7 @@
|
|
2014 |
"num_input_tokens_seen": 0,
|
2015 |
"num_train_epochs": 30,
|
2016 |
"save_steps": 100,
|
2017 |
-
"total_flos": 1.
|
2018 |
"train_batch_size": 8,
|
2019 |
"trial_name": null,
|
2020 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.346242896232372,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 12700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2007 |
"eval_samples_per_second": 24.817,
|
2008 |
"eval_steps_per_second": 3.103,
|
2009 |
"step": 12500
|
2010 |
+
},
|
2011 |
+
{
|
2012 |
+
"epoch": 5.3,
|
2013 |
+
"grad_norm": 9.076600074768066,
|
2014 |
+
"learning_rate": 4.1455830388692577e-05,
|
2015 |
+
"loss": 2.0271,
|
2016 |
+
"step": 12600
|
2017 |
+
},
|
2018 |
+
{
|
2019 |
+
"epoch": 5.3,
|
2020 |
+
"eval_cer": 0.48266591060185365,
|
2021 |
+
"eval_loss": 2.858898639678955,
|
2022 |
+
"eval_runtime": 405.4609,
|
2023 |
+
"eval_samples_per_second": 23.376,
|
2024 |
+
"eval_steps_per_second": 2.923,
|
2025 |
+
"step": 12600
|
2026 |
+
},
|
2027 |
+
{
|
2028 |
+
"epoch": 5.35,
|
2029 |
+
"grad_norm": 1.6710706949234009,
|
2030 |
+
"learning_rate": 4.138515901060071e-05,
|
2031 |
+
"loss": 1.7331,
|
2032 |
+
"step": 12700
|
2033 |
+
},
|
2034 |
+
{
|
2035 |
+
"epoch": 5.35,
|
2036 |
+
"eval_cer": 0.501048551093035,
|
2037 |
+
"eval_loss": 2.9208521842956543,
|
2038 |
+
"eval_runtime": 361.6906,
|
2039 |
+
"eval_samples_per_second": 26.205,
|
2040 |
+
"eval_steps_per_second": 3.276,
|
2041 |
+
"step": 12700
|
2042 |
}
|
2043 |
],
|
2044 |
"logging_steps": 100,
|
|
|
2046 |
"num_input_tokens_seen": 0,
|
2047 |
"num_train_epochs": 30,
|
2048 |
"save_steps": 100,
|
2049 |
+
"total_flos": 1.3921302086472671e+20,
|
2050 |
"train_batch_size": 8,
|
2051 |
"trial_name": null,
|
2052 |
"trial_params": null
|