Training in progress, step 43800, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ee0ab11786b912c2f0f9e3b57f5ffbe9d0694ec931e2307992de1c69fcc6067
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2e55e361ac64d7291febab7443b2f6a44b10aa8eab4266d488df2b631e69bb9
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:523b100723d242bcd643f3d25fa3179a70891cdcb5922df989d44d3134b47c80
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bc8b57edba631898d0c329c841283aca0c73672887396826c194a70381d9299
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:504183be9a2961ad91183bbca527d7ccf44c7b7d7af7b2247d4cb4fe8fc9255e
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6983,6 +6983,38 @@
|
|
6983 |
"eval_samples_per_second": 24.798,
|
6984 |
"eval_steps_per_second": 3.1,
|
6985 |
"step": 43600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6986 |
}
|
6987 |
],
|
6988 |
"logging_steps": 100,
|
@@ -6990,7 +7022,7 @@
|
|
6990 |
"num_input_tokens_seen": 0,
|
6991 |
"num_train_epochs": 30,
|
6992 |
"save_steps": 100,
|
6993 |
-
"total_flos": 4.
|
6994 |
"train_batch_size": 8,
|
6995 |
"trial_name": null,
|
6996 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.438223531888024,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 43800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6983 |
"eval_samples_per_second": 24.798,
|
6984 |
"eval_steps_per_second": 3.1,
|
6985 |
"step": 43600
|
6986 |
+
},
|
6987 |
+
{
|
6988 |
+
"epoch": 18.4,
|
6989 |
+
"grad_norm": 5.3773956298828125,
|
6990 |
+
"learning_rate": 1.9484805653710248e-05,
|
6991 |
+
"loss": 0.5594,
|
6992 |
+
"step": 43700
|
6993 |
+
},
|
6994 |
+
{
|
6995 |
+
"epoch": 18.4,
|
6996 |
+
"eval_cer": 0.3562653983027648,
|
6997 |
+
"eval_loss": 2.0783262252807617,
|
6998 |
+
"eval_runtime": 404.1105,
|
6999 |
+
"eval_samples_per_second": 23.454,
|
7000 |
+
"eval_steps_per_second": 2.932,
|
7001 |
+
"step": 43700
|
7002 |
+
},
|
7003 |
+
{
|
7004 |
+
"epoch": 18.44,
|
7005 |
+
"grad_norm": 1.7726545333862305,
|
7006 |
+
"learning_rate": 1.9414134275618376e-05,
|
7007 |
+
"loss": 0.5482,
|
7008 |
+
"step": 43800
|
7009 |
+
},
|
7010 |
+
{
|
7011 |
+
"epoch": 18.44,
|
7012 |
+
"eval_cer": 0.3563827187047828,
|
7013 |
+
"eval_loss": 2.1228716373443604,
|
7014 |
+
"eval_runtime": 379.1678,
|
7015 |
+
"eval_samples_per_second": 24.997,
|
7016 |
+
"eval_steps_per_second": 3.125,
|
7017 |
+
"step": 43800
|
7018 |
}
|
7019 |
],
|
7020 |
"logging_steps": 100,
|
|
|
7022 |
"num_input_tokens_seen": 0,
|
7023 |
"num_train_epochs": 30,
|
7024 |
"save_steps": 100,
|
7025 |
+
"total_flos": 4.799417110573236e+20,
|
7026 |
"train_batch_size": 8,
|
7027 |
"trial_name": null,
|
7028 |
"trial_params": null
|