Training in progress, step 55300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f38a0345160eafc7a2bff16ecb3b832e4bed866ef2900dd0baf0b2f9d5d0eb0
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c019cff8f9be930c3fc0d37425de55b6c57a313e24c2bb3dc34b7f2486efdfbe
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7e4a457495ababe0eb08547a34b3470fd36b5930b98f33eb20ef364970e0c83
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f106411af63d8226ffb8f49ed683b71c1bcf027a8df03274802b22a6f8fda8f
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5aa0f94c39f4406774a86e68d3997d19d329feb34dcd618fc1dc95189d45b9f
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 23.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8823,6 +8823,38 @@
|
|
8823 |
"eval_samples_per_second": 23.084,
|
8824 |
"eval_steps_per_second": 2.886,
|
8825 |
"step": 55100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8826 |
}
|
8827 |
],
|
8828 |
"logging_steps": 100,
|
@@ -8830,7 +8862,7 @@
|
|
8830 |
"num_input_tokens_seen": 0,
|
8831 |
"num_train_epochs": 30,
|
8832 |
"save_steps": 100,
|
8833 |
-
"total_flos": 6.
|
8834 |
"train_batch_size": 8,
|
8835 |
"trial_name": null,
|
8836 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 23.279309619027572,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 55300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8823 |
"eval_samples_per_second": 23.084,
|
8824 |
"eval_steps_per_second": 2.886,
|
8825 |
"step": 55100
|
8826 |
+
},
|
8827 |
+
{
|
8828 |
+
"epoch": 23.24,
|
8829 |
+
"grad_norm": 1.8620479106903076,
|
8830 |
+
"learning_rate": 1.1359717314487633e-05,
|
8831 |
+
"loss": 0.2719,
|
8832 |
+
"step": 55200
|
8833 |
+
},
|
8834 |
+
{
|
8835 |
+
"epoch": 23.24,
|
8836 |
+
"eval_cer": 0.3328697548003598,
|
8837 |
+
"eval_loss": 3.229948043823242,
|
8838 |
+
"eval_runtime": 423.4357,
|
8839 |
+
"eval_samples_per_second": 22.384,
|
8840 |
+
"eval_steps_per_second": 2.799,
|
8841 |
+
"step": 55200
|
8842 |
+
},
|
8843 |
+
{
|
8844 |
+
"epoch": 23.28,
|
8845 |
+
"grad_norm": 1.727643609046936,
|
8846 |
+
"learning_rate": 1.128904593639576e-05,
|
8847 |
+
"loss": 0.2628,
|
8848 |
+
"step": 55300
|
8849 |
+
},
|
8850 |
+
{
|
8851 |
+
"epoch": 23.28,
|
8852 |
+
"eval_cer": 0.33284286887489734,
|
8853 |
+
"eval_loss": 2.7340400218963623,
|
8854 |
+
"eval_runtime": 394.6323,
|
8855 |
+
"eval_samples_per_second": 24.017,
|
8856 |
+
"eval_steps_per_second": 3.003,
|
8857 |
+
"step": 55300
|
8858 |
}
|
8859 |
],
|
8860 |
"logging_steps": 100,
|
|
|
8862 |
"num_input_tokens_seen": 0,
|
8863 |
"num_train_epochs": 30,
|
8864 |
"save_steps": 100,
|
8865 |
+
"total_flos": 6.060174194718131e+20,
|
8866 |
"train_batch_size": 8,
|
8867 |
"trial_name": null,
|
8868 |
"trial_params": null
|