Training in progress, step 15900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c20ad3beef3f33ef2b4c145c9b9506f9a226a39e2e73693f7ae2fcfc0dcf1cd
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74d0557152fa6f6a5c4206d20e2e5e3e65c08a54d396cdd08189203105bb821e
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49a17f38b1772819369863ac91e47ffc5deca1f2b7e3fbb755cc5669eb6f2884
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f938892d18549bfd11854e754003e9c1d26470f2c462d1c3bdac6d13825d7005
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49c14c6d2163c777317ea1561699a4fab0cc691b0ee652db9a2586b628592aa5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 6.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2519,6 +2519,38 @@
|
|
2519 |
"eval_samples_per_second": 25.949,
|
2520 |
"eval_steps_per_second": 3.244,
|
2521 |
"step": 15700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2522 |
}
|
2523 |
],
|
2524 |
"logging_steps": 100,
|
@@ -2526,7 +2558,7 @@
|
|
2526 |
"num_input_tokens_seen": 0,
|
2527 |
"num_train_epochs": 30,
|
2528 |
"save_steps": 100,
|
2529 |
-
"total_flos": 1.
|
2530 |
"train_batch_size": 8,
|
2531 |
"trial_name": null,
|
2532 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.693327720479899,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 15900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2519 |
"eval_samples_per_second": 25.949,
|
2520 |
"eval_steps_per_second": 3.244,
|
2521 |
"step": 15700
|
2522 |
+
},
|
2523 |
+
{
|
2524 |
+
"epoch": 6.65,
|
2525 |
+
"grad_norm": 2.1095080375671387,
|
2526 |
+
"learning_rate": 3.919505300353357e-05,
|
2527 |
+
"loss": 2.2505,
|
2528 |
+
"step": 15800
|
2529 |
+
},
|
2530 |
+
{
|
2531 |
+
"epoch": 6.65,
|
2532 |
+
"eval_cer": 0.4647232216182394,
|
2533 |
+
"eval_loss": 1.8471035957336426,
|
2534 |
+
"eval_runtime": 388.913,
|
2535 |
+
"eval_samples_per_second": 24.37,
|
2536 |
+
"eval_steps_per_second": 3.047,
|
2537 |
+
"step": 15800
|
2538 |
+
},
|
2539 |
+
{
|
2540 |
+
"epoch": 6.69,
|
2541 |
+
"grad_norm": 3.6614439487457275,
|
2542 |
+
"learning_rate": 3.91243816254417e-05,
|
2543 |
+
"loss": 1.9341,
|
2544 |
+
"step": 15900
|
2545 |
+
},
|
2546 |
+
{
|
2547 |
+
"epoch": 6.69,
|
2548 |
+
"eval_cer": 0.4613698134605608,
|
2549 |
+
"eval_loss": 2.8519198894500732,
|
2550 |
+
"eval_runtime": 363.022,
|
2551 |
+
"eval_samples_per_second": 26.109,
|
2552 |
+
"eval_steps_per_second": 3.264,
|
2553 |
+
"step": 15900
|
2554 |
}
|
2555 |
],
|
2556 |
"logging_steps": 100,
|
|
|
2558 |
"num_input_tokens_seen": 0,
|
2559 |
"num_train_epochs": 30,
|
2560 |
"save_steps": 100,
|
2561 |
+
"total_flos": 1.7421368190330087e+20,
|
2562 |
"train_batch_size": 8,
|
2563 |
"trial_name": null,
|
2564 |
"trial_params": null
|