Training in progress, step 2400, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44d55abe18136649fb6def532385b4b394d9e108fdf8f4ca57578d416c9b41ef
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90c33bc16a6ad0497f092d4c75ded33c7dd30729c9f35a49f646bce7c9c01ac4
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95c02ba7f38d49e33dc88cf95e4ccb546c08b6fd6a7dddb75eaf0aad68dc9620
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d84eac669e63d3ebd7cbfdaf89ff43149b8b9e94f7d0e66c6ee43cd90c3b778d
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5894c6573565c6328682bcc690a51bee32ff4f44e9c1aa3c075cea5a25453a5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -359,6 +359,38 @@
|
|
359 |
"eval_samples_per_second": 26.742,
|
360 |
"eval_steps_per_second": 3.343,
|
361 |
"step": 2200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
}
|
363 |
],
|
364 |
"logging_steps": 100,
|
@@ -366,7 +398,7 @@
|
|
366 |
"num_input_tokens_seen": 0,
|
367 |
"num_train_epochs": 30,
|
368 |
"save_steps": 100,
|
369 |
-
"total_flos": 2.
|
370 |
"train_batch_size": 8,
|
371 |
"trial_name": null,
|
372 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0103136181856451,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
359 |
"eval_samples_per_second": 26.742,
|
360 |
"eval_steps_per_second": 3.343,
|
361 |
"step": 2200
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 0.97,
|
365 |
+
"grad_norm": 2.9436004161834717,
|
366 |
+
"learning_rate": 4.87321554770318e-05,
|
367 |
+
"loss": 1.6308,
|
368 |
+
"step": 2300
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"epoch": 0.97,
|
372 |
+
"eval_cer": 0.5162586523796489,
|
373 |
+
"eval_loss": 3.3528623580932617,
|
374 |
+
"eval_runtime": 360.9925,
|
375 |
+
"eval_samples_per_second": 26.255,
|
376 |
+
"eval_steps_per_second": 3.283,
|
377 |
+
"step": 2300
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 1.01,
|
381 |
+
"grad_norm": 7.935622692108154,
|
382 |
+
"learning_rate": 4.866148409893993e-05,
|
383 |
+
"loss": 2.4462,
|
384 |
+
"step": 2400
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 1.01,
|
388 |
+
"eval_cer": 0.575390579171718,
|
389 |
+
"eval_loss": 2.0690765380859375,
|
390 |
+
"eval_runtime": 343.7001,
|
391 |
+
"eval_samples_per_second": 27.576,
|
392 |
+
"eval_steps_per_second": 3.448,
|
393 |
+
"step": 2400
|
394 |
}
|
395 |
],
|
396 |
"logging_steps": 100,
|
|
|
398 |
"num_input_tokens_seen": 0,
|
399 |
"num_train_epochs": 30,
|
400 |
"save_steps": 100,
|
401 |
+
"total_flos": 2.6422344601189564e+19,
|
402 |
"train_batch_size": 8,
|
403 |
"trial_name": null,
|
404 |
"trial_params": null
|