Training in progress, step 47300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0a7eca189b6cba3d0264a3a6b528f4d768503319d4957c82aafb34cecdfdf4a
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d44c9dabcde04ec924b500d0f45e98bbda60772b9108bedac5d65641ce067cb9
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5815b1015337a35948babb6d5b6ab9cf7c776143678a541486d413ba284536e
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d54689fcde880091f1c0ee11daaede986eacb0d5ba40ec6c4cb9bad1bdacaa9
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d0677dbdd096b9b20e1b9970b686999e123df432ab47b960e13026ac8fbafd2
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 19.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7543,6 +7543,38 @@
|
|
7543 |
"eval_samples_per_second": 25.595,
|
7544 |
"eval_steps_per_second": 3.2,
|
7545 |
"step": 47100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7546 |
}
|
7547 |
],
|
7548 |
"logging_steps": 100,
|
@@ -7550,7 +7582,7 @@
|
|
7550 |
"num_input_tokens_seen": 0,
|
7551 |
"num_train_epochs": 30,
|
7552 |
"save_steps": 100,
|
7553 |
-
"total_flos": 5.
|
7554 |
"train_batch_size": 8,
|
7555 |
"trial_name": null,
|
7556 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 19.911597558408754,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 47300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7543 |
"eval_samples_per_second": 25.595,
|
7544 |
"eval_steps_per_second": 3.2,
|
7545 |
"step": 47100
|
7546 |
+
},
|
7547 |
+
{
|
7548 |
+
"epoch": 19.87,
|
7549 |
+
"grad_norm": 1.6992497444152832,
|
7550 |
+
"learning_rate": 1.701201413427562e-05,
|
7551 |
+
"loss": 0.4712,
|
7552 |
+
"step": 47200
|
7553 |
+
},
|
7554 |
+
{
|
7555 |
+
"epoch": 19.87,
|
7556 |
+
"eval_cer": 0.34815073716319267,
|
7557 |
+
"eval_loss": 2.558990001678467,
|
7558 |
+
"eval_runtime": 412.1751,
|
7559 |
+
"eval_samples_per_second": 22.995,
|
7560 |
+
"eval_steps_per_second": 2.875,
|
7561 |
+
"step": 47200
|
7562 |
+
},
|
7563 |
+
{
|
7564 |
+
"epoch": 19.91,
|
7565 |
+
"grad_norm": 3.102717399597168,
|
7566 |
+
"learning_rate": 1.6941342756183746e-05,
|
7567 |
+
"loss": 0.4767,
|
7568 |
+
"step": 47300
|
7569 |
+
},
|
7570 |
+
{
|
7571 |
+
"epoch": 19.91,
|
7572 |
+
"eval_cer": 0.3449659770834148,
|
7573 |
+
"eval_loss": 2.632328748703003,
|
7574 |
+
"eval_runtime": 385.8283,
|
7575 |
+
"eval_samples_per_second": 24.565,
|
7576 |
+
"eval_steps_per_second": 3.071,
|
7577 |
+
"step": 47300
|
7578 |
}
|
7579 |
],
|
7580 |
"logging_steps": 100,
|
|
|
7582 |
"num_input_tokens_seen": 0,
|
7583 |
"num_train_epochs": 30,
|
7584 |
"save_steps": 100,
|
7585 |
+
"total_flos": 5.183308424712062e+20,
|
7586 |
"train_batch_size": 8,
|
7587 |
"trial_name": null,
|
7588 |
"trial_params": null
|