Training in progress, step 32300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02ef20f83611dc5ecda3865d609d1c2529353faeb6e69db3373387d266c57e6d
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cea2f0b6eea82ec7b672c6f99ffe205fc0b7e54f20c87785b2f34e524aa97736
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1f8cabf689066796731ca11e18b147c9b88c182baa68362d28e596087a999dd
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2604f2db63f8d40bcc859d3c5d9db567a5197e74b5fcf8b847396dd21324043f
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:529f0d80ba72a29cd60cd97ed89d7289715ba99737c418335d0e26f49eb2d4a3
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 13.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5143,6 +5143,38 @@
|
|
5143 |
"eval_samples_per_second": 25.555,
|
5144 |
"eval_steps_per_second": 3.195,
|
5145 |
"step": 32100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5146 |
}
|
5147 |
],
|
5148 |
"logging_steps": 100,
|
@@ -5150,7 +5182,7 @@
|
|
5150 |
"num_input_tokens_seen": 0,
|
5151 |
"num_train_epochs": 30,
|
5152 |
"save_steps": 100,
|
5153 |
-
"total_flos": 3.
|
5154 |
"train_batch_size": 8,
|
5155 |
"trial_name": null,
|
5156 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 13.597137444748475,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 32300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5143 |
"eval_samples_per_second": 25.555,
|
5144 |
"eval_steps_per_second": 3.195,
|
5145 |
"step": 32100
|
5146 |
+
},
|
5147 |
+
{
|
5148 |
+
"epoch": 13.56,
|
5149 |
+
"grad_norm": 4.829713344573975,
|
5150 |
+
"learning_rate": 2.7609187279151943e-05,
|
5151 |
+
"loss": 1.0437,
|
5152 |
+
"step": 32200
|
5153 |
+
},
|
5154 |
+
{
|
5155 |
+
"epoch": 13.56,
|
5156 |
+
"eval_cer": 0.4043276563294357,
|
5157 |
+
"eval_loss": 1.8479336500167847,
|
5158 |
+
"eval_runtime": 398.7479,
|
5159 |
+
"eval_samples_per_second": 23.769,
|
5160 |
+
"eval_steps_per_second": 2.972,
|
5161 |
+
"step": 32200
|
5162 |
+
},
|
5163 |
+
{
|
5164 |
+
"epoch": 13.6,
|
5165 |
+
"grad_norm": 3.0680336952209473,
|
5166 |
+
"learning_rate": 2.7538515901060075e-05,
|
5167 |
+
"loss": 1.4222,
|
5168 |
+
"step": 32300
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 13.6,
|
5172 |
+
"eval_cer": 0.4040881271753158,
|
5173 |
+
"eval_loss": 2.828200101852417,
|
5174 |
+
"eval_runtime": 367.3731,
|
5175 |
+
"eval_samples_per_second": 25.799,
|
5176 |
+
"eval_steps_per_second": 3.226,
|
5177 |
+
"step": 32300
|
5178 |
}
|
5179 |
],
|
5180 |
"logging_steps": 100,
|
|
|
5182 |
"num_input_tokens_seen": 0,
|
5183 |
"num_train_epochs": 30,
|
5184 |
"save_steps": 100,
|
5185 |
+
"total_flos": 3.5387141404690835e+20,
|
5186 |
"train_batch_size": 8,
|
5187 |
"trial_name": null,
|
5188 |
"trial_params": null
|