Training in progress, step 55500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3be30c038fe83e2c45066c8865edcd4f97638d7c9497d22507519438f766b9e
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc37f0c081dea232ed47687524d26d35478631d3af4618df338fc236e166a184
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5479170340108cb57e7920fc9709a1e6e98ddda8596cfda43bc01b7188a5ccc2
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70e547d0244ac6c17f72a2e2c794d7af278410f2b12b34a7dbb487ec9d9fcdcb
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a30e334ce630adfe0f015710ad2e5f2148b3a474ea9fa476144c76c38fe45245
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 23.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8855,6 +8855,38 @@
|
|
8855 |
"eval_samples_per_second": 24.017,
|
8856 |
"eval_steps_per_second": 3.003,
|
8857 |
"step": 55300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8858 |
}
|
8859 |
],
|
8860 |
"logging_steps": 100,
|
@@ -8862,7 +8894,7 @@
|
|
8862 |
"num_input_tokens_seen": 0,
|
8863 |
"num_train_epochs": 30,
|
8864 |
"save_steps": 100,
|
8865 |
-
"total_flos": 6.
|
8866 |
"train_batch_size": 8,
|
8867 |
"trial_name": null,
|
8868 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 23.363502420543043,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 55500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8855 |
"eval_samples_per_second": 24.017,
|
8856 |
"eval_steps_per_second": 3.003,
|
8857 |
"step": 55300
|
8858 |
+
},
|
8859 |
+
{
|
8860 |
+
"epoch": 23.32,
|
8861 |
+
"grad_norm": 2.5659921169281006,
|
8862 |
+
"learning_rate": 1.1218374558303888e-05,
|
8863 |
+
"loss": 0.2612,
|
8864 |
+
"step": 55400
|
8865 |
+
},
|
8866 |
+
{
|
8867 |
+
"epoch": 23.32,
|
8868 |
+
"eval_cer": 0.3338303155918814,
|
8869 |
+
"eval_loss": 2.4699885845184326,
|
8870 |
+
"eval_runtime": 423.6808,
|
8871 |
+
"eval_samples_per_second": 22.371,
|
8872 |
+
"eval_steps_per_second": 2.797,
|
8873 |
+
"step": 55400
|
8874 |
+
},
|
8875 |
+
{
|
8876 |
+
"epoch": 23.36,
|
8877 |
+
"grad_norm": 1.429592490196228,
|
8878 |
+
"learning_rate": 1.1147703180212014e-05,
|
8879 |
+
"loss": 0.2702,
|
8880 |
+
"step": 55500
|
8881 |
+
},
|
8882 |
+
{
|
8883 |
+
"epoch": 23.36,
|
8884 |
+
"eval_cer": 0.33553879394626723,
|
8885 |
+
"eval_loss": 2.7780275344848633,
|
8886 |
+
"eval_runtime": 398.5381,
|
8887 |
+
"eval_samples_per_second": 23.782,
|
8888 |
+
"eval_steps_per_second": 2.973,
|
8889 |
+
"step": 55500
|
8890 |
}
|
8891 |
],
|
8892 |
"logging_steps": 100,
|
|
|
8894 |
"num_input_tokens_seen": 0,
|
8895 |
"num_train_epochs": 30,
|
8896 |
"save_steps": 100,
|
8897 |
+
"total_flos": 6.081637645988172e+20,
|
8898 |
"train_batch_size": 8,
|
8899 |
"trial_name": null,
|
8900 |
"trial_params": null
|