Training in progress, step 44600, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ae988bcf1e80d64fc3c50daaef60bc1bad12fb87f548e09dc40aad3485bc941
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d962bf48fdb22f97a263b871d805534e31f88c562fea1fd3091e25ec9fe6fca5
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4f7df543c28f7b1d4e237537e55af3a1c5c2f2ab50783f01bbf2aff73580c05
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:341280b36e33bee4b43d726478793b13fc021522a0fb4ba6c0b4344e30e91f0e
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e165bf74ae366b7834cfc96ebc1c64c1b63d8ca28272d2793ae0769f087d7fdf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7111,6 +7111,38 @@
|
|
7111 |
"eval_samples_per_second": 24.603,
|
7112 |
"eval_steps_per_second": 3.076,
|
7113 |
"step": 44400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7114 |
}
|
7115 |
],
|
7116 |
"logging_steps": 100,
|
@@ -7118,7 +7150,7 @@
|
|
7118 |
"num_input_tokens_seen": 0,
|
7119 |
"num_train_epochs": 30,
|
7120 |
"save_steps": 100,
|
7121 |
-
"total_flos": 4.
|
7122 |
"train_batch_size": 8,
|
7123 |
"trial_name": null,
|
7124 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.774994737949907,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 44600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7111 |
"eval_samples_per_second": 24.603,
|
7112 |
"eval_steps_per_second": 3.076,
|
7113 |
"step": 44400
|
7114 |
+
},
|
7115 |
+
{
|
7116 |
+
"epoch": 18.73,
|
7117 |
+
"grad_norm": 2.587009906768799,
|
7118 |
+
"learning_rate": 1.8919434628975266e-05,
|
7119 |
+
"loss": 0.5442,
|
7120 |
+
"step": 44500
|
7121 |
+
},
|
7122 |
+
{
|
7123 |
+
"epoch": 18.73,
|
7124 |
+
"eval_cer": 0.35546126471393374,
|
7125 |
+
"eval_loss": 2.185746192932129,
|
7126 |
+
"eval_runtime": 399.8878,
|
7127 |
+
"eval_samples_per_second": 23.702,
|
7128 |
+
"eval_steps_per_second": 2.963,
|
7129 |
+
"step": 44500
|
7130 |
+
},
|
7131 |
+
{
|
7132 |
+
"epoch": 18.77,
|
7133 |
+
"grad_norm": 2.142084836959839,
|
7134 |
+
"learning_rate": 1.8849469964664314e-05,
|
7135 |
+
"loss": 0.9587,
|
7136 |
+
"step": 44600
|
7137 |
+
},
|
7138 |
+
{
|
7139 |
+
"epoch": 18.77,
|
7140 |
+
"eval_cer": 0.353083082398029,
|
7141 |
+
"eval_loss": 2.363218307495117,
|
7142 |
+
"eval_runtime": 384.8343,
|
7143 |
+
"eval_samples_per_second": 24.629,
|
7144 |
+
"eval_steps_per_second": 3.079,
|
7145 |
+
"step": 44600
|
7146 |
}
|
7147 |
],
|
7148 |
"logging_steps": 100,
|
|
|
7150 |
"num_input_tokens_seen": 0,
|
7151 |
"num_train_epochs": 30,
|
7152 |
"save_steps": 100,
|
7153 |
+
"total_flos": 4.8869923018175506e+20,
|
7154 |
"train_batch_size": 8,
|
7155 |
"trial_name": null,
|
7156 |
"trial_params": null
|