Training in progress, step 30300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8547a7322e157d670ed2412e949d1f0885f06c8cbaac3795431c83b2400ad687
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e3f3da746f6d92e513538b782722aacd2930f02af3056fdc6b1b9211ad4f969
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c32888919c52e486af2fe0eedfc4a55ab9ac5c7ea916faaf672fa4ba60c176a
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79dfeac7ede56e3f252c95778498b8d970c65dd0948b9c809c0a7dce43089bf1
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:428cbf0e904ad9abe549f3777087cb24ea80512aa0df465c9f11ee3c360e92cf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 12.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4823,6 +4823,38 @@
|
|
4823 |
"eval_samples_per_second": 25.556,
|
4824 |
"eval_steps_per_second": 3.195,
|
4825 |
"step": 30100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4826 |
}
|
4827 |
],
|
4828 |
"logging_steps": 100,
|
@@ -4830,7 +4862,7 @@
|
|
4830 |
"num_input_tokens_seen": 0,
|
4831 |
"num_train_epochs": 30,
|
4832 |
"save_steps": 100,
|
4833 |
-
"total_flos": 3.
|
4834 |
"train_batch_size": 8,
|
4835 |
"trial_name": null,
|
4836 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 12.75520942959377,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 30300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4823 |
"eval_samples_per_second": 25.556,
|
4824 |
"eval_steps_per_second": 3.195,
|
4825 |
"step": 30100
|
4826 |
+
},
|
4827 |
+
{
|
4828 |
+
"epoch": 12.71,
|
4829 |
+
"grad_norm": 8.163016319274902,
|
4830 |
+
"learning_rate": 2.9022614840989398e-05,
|
4831 |
+
"loss": 1.2078,
|
4832 |
+
"step": 30200
|
4833 |
+
},
|
4834 |
+
{
|
4835 |
+
"epoch": 12.71,
|
4836 |
+
"eval_cer": 0.41148908920261235,
|
4837 |
+
"eval_loss": 2.2178566455841064,
|
4838 |
+
"eval_runtime": 392.2715,
|
4839 |
+
"eval_samples_per_second": 24.162,
|
4840 |
+
"eval_steps_per_second": 3.021,
|
4841 |
+
"step": 30200
|
4842 |
+
},
|
4843 |
+
{
|
4844 |
+
"epoch": 12.76,
|
4845 |
+
"grad_norm": 7.318975448608398,
|
4846 |
+
"learning_rate": 2.895194346289753e-05,
|
4847 |
+
"loss": 1.0775,
|
4848 |
+
"step": 30300
|
4849 |
+
},
|
4850 |
+
{
|
4851 |
+
"epoch": 12.76,
|
4852 |
+
"eval_cer": 0.4067913847718118,
|
4853 |
+
"eval_loss": 2.1497671604156494,
|
4854 |
+
"eval_runtime": 374.4081,
|
4855 |
+
"eval_samples_per_second": 25.315,
|
4856 |
+
"eval_steps_per_second": 3.165,
|
4857 |
+
"step": 30300
|
4858 |
}
|
4859 |
],
|
4860 |
"logging_steps": 100,
|
|
|
4862 |
"num_input_tokens_seen": 0,
|
4863 |
"num_train_epochs": 30,
|
4864 |
"save_steps": 100,
|
4865 |
+
"total_flos": 3.3198106925315916e+20,
|
4866 |
"train_batch_size": 8,
|
4867 |
"trial_name": null,
|
4868 |
"trial_params": null
|