Training in progress, step 35900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b35f734ee856f18dc12768c16472dc4fa4a48e29bb5476207b1dd1732446ff9
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfa23d05a63ef98a7a08510cfe63830d8c0bfde6e697c271a310ceb8b3f71341
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5a4e470770ebadc4146150e5c1b050c970bb70871bdd3d49120d1a71cafdf88
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa8c3ee406720d81ffabaf84f9e91e08e6b6eb075e6cb1267455d95c6a23d91a
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89c666c1e074cfdb22c4aa9905324d78e551418b1a46be058b6dc5f2e31bf25c
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 15.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5719,6 +5719,38 @@
|
|
5719 |
"eval_samples_per_second": 25.476,
|
5720 |
"eval_steps_per_second": 3.185,
|
5721 |
"step": 35700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5722 |
}
|
5723 |
],
|
5724 |
"logging_steps": 100,
|
@@ -5726,7 +5758,7 @@
|
|
5726 |
"num_input_tokens_seen": 0,
|
5727 |
"num_train_epochs": 30,
|
5728 |
"save_steps": 100,
|
5729 |
-
"total_flos": 3.
|
5730 |
"train_batch_size": 8,
|
5731 |
"trial_name": null,
|
5732 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 15.112607872026942,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 35900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5719 |
"eval_samples_per_second": 25.476,
|
5720 |
"eval_steps_per_second": 3.185,
|
5721 |
"step": 35700
|
5722 |
+
},
|
5723 |
+
{
|
5724 |
+
"epoch": 15.07,
|
5725 |
+
"grad_norm": 2.7166266441345215,
|
5726 |
+
"learning_rate": 2.5065724381625445e-05,
|
5727 |
+
"loss": 0.8238,
|
5728 |
+
"step": 35800
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 15.07,
|
5732 |
+
"eval_cer": 0.38250361737906224,
|
5733 |
+
"eval_loss": 2.33854079246521,
|
5734 |
+
"eval_runtime": 394.007,
|
5735 |
+
"eval_samples_per_second": 24.055,
|
5736 |
+
"eval_steps_per_second": 3.008,
|
5737 |
+
"step": 35800
|
5738 |
+
},
|
5739 |
+
{
|
5740 |
+
"epoch": 15.11,
|
5741 |
+
"grad_norm": 5.8333282470703125,
|
5742 |
+
"learning_rate": 2.499505300353357e-05,
|
5743 |
+
"loss": 0.8495,
|
5744 |
+
"step": 35900
|
5745 |
+
},
|
5746 |
+
{
|
5747 |
+
"epoch": 15.11,
|
5748 |
+
"eval_cer": 0.38729908881154435,
|
5749 |
+
"eval_loss": 1.496200680732727,
|
5750 |
+
"eval_runtime": 379.9915,
|
5751 |
+
"eval_samples_per_second": 24.943,
|
5752 |
+
"eval_steps_per_second": 3.118,
|
5753 |
+
"step": 35900
|
5754 |
}
|
5755 |
],
|
5756 |
"logging_steps": 100,
|
|
|
5758 |
"num_input_tokens_seen": 0,
|
5759 |
"num_train_epochs": 30,
|
5760 |
"save_steps": 100,
|
5761 |
+
"total_flos": 3.934653943012538e+20,
|
5762 |
"train_batch_size": 8,
|
5763 |
"trial_name": null,
|
5764 |
"trial_params": null
|