Training in progress, step 20400, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:424006b63705ff8d128ce03cf112d0f0202b9b53563618c80069b74aadf6b121
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:486219a9cbd763fd42552ee2a1a2647b831dc1b08e3f181c8b5b51fe133f0228
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0908165977f8c3d9489a849f85d9268f712bef0a616c664c5b76ba43a71c7f3d
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae9aa79a869a104ab7e27c688f3cfabb1a5b016e6c0d9194e567985352b95f11
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:006c5e8305ef9dc1fe3403e1173068b8cb7f990d0fc5d4b955050dc8157ec06d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3239,6 +3239,38 @@
|
|
3239 |
"eval_samples_per_second": 26.116,
|
3240 |
"eval_steps_per_second": 3.265,
|
3241 |
"step": 20200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3242 |
}
|
3243 |
],
|
3244 |
"logging_steps": 100,
|
@@ -3246,7 +3278,7 @@
|
|
3246 |
"num_input_tokens_seen": 0,
|
3247 |
"num_train_epochs": 30,
|
3248 |
"save_steps": 100,
|
3249 |
-
"total_flos": 2.
|
3250 |
"train_batch_size": 8,
|
3251 |
"trial_name": null,
|
3252 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.587665754577984,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 20400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3239 |
"eval_samples_per_second": 26.116,
|
3240 |
"eval_steps_per_second": 3.265,
|
3241 |
"step": 20200
|
3242 |
+
},
|
3243 |
+
{
|
3244 |
+
"epoch": 8.55,
|
3245 |
+
"grad_norm": 6.447911262512207,
|
3246 |
+
"learning_rate": 3.6016254416961133e-05,
|
3247 |
+
"loss": 2.11,
|
3248 |
+
"step": 20300
|
3249 |
+
},
|
3250 |
+
{
|
3251 |
+
"epoch": 8.55,
|
3252 |
+
"eval_cer": 0.43633657267998904,
|
3253 |
+
"eval_loss": 3.3446898460388184,
|
3254 |
+
"eval_runtime": 385.9035,
|
3255 |
+
"eval_samples_per_second": 24.561,
|
3256 |
+
"eval_steps_per_second": 3.071,
|
3257 |
+
"step": 20300
|
3258 |
+
},
|
3259 |
+
{
|
3260 |
+
"epoch": 8.59,
|
3261 |
+
"grad_norm": 2.172891139984131,
|
3262 |
+
"learning_rate": 3.5945583038869255e-05,
|
3263 |
+
"loss": 1.4999,
|
3264 |
+
"step": 20400
|
3265 |
+
},
|
3266 |
+
{
|
3267 |
+
"epoch": 8.59,
|
3268 |
+
"eval_cer": 0.4393355754565719,
|
3269 |
+
"eval_loss": 3.0930521488189697,
|
3270 |
+
"eval_runtime": 363.4753,
|
3271 |
+
"eval_samples_per_second": 26.076,
|
3272 |
+
"eval_steps_per_second": 3.26,
|
3273 |
+
"step": 20400
|
3274 |
}
|
3275 |
],
|
3276 |
"logging_steps": 100,
|
|
|
3278 |
"num_input_tokens_seen": 0,
|
3279 |
"num_train_epochs": 30,
|
3280 |
"save_steps": 100,
|
3281 |
+
"total_flos": 2.2352658822253032e+20,
|
3282 |
"train_batch_size": 8,
|
3283 |
"trial_name": null,
|
3284 |
"trial_params": null
|