Training in progress, step 10700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71f48962b77a0ffadd466c017fcd32bec6946f7ff9c2a2e74009ea84a13a26ac
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd244e0c641e9fc420b274487550a5fd81f515433393f446ae69f4eb95c66400
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5da71c3972cdf1c8ca47cfca858d3f5eebd3ddd0bbfffc533b99224dfc911ff
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3e5ea2f8b945220efdb684f11b1780e2edefdb156c009e8c72ef7983c62a8e2
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca3e3c26689da313cb94037cbfc3a7e856b1f0f65c3a0e23766260d8fd9feabe
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 4.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1687,6 +1687,38 @@
|
|
1687 |
"eval_samples_per_second": 26.52,
|
1688 |
"eval_steps_per_second": 3.316,
|
1689 |
"step": 10500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1690 |
}
|
1691 |
],
|
1692 |
"logging_steps": 100,
|
@@ -1694,7 +1726,7 @@
|
|
1694 |
"num_input_tokens_seen": 0,
|
1695 |
"num_train_epochs": 30,
|
1696 |
"save_steps": 100,
|
1697 |
-
"total_flos": 1.
|
1698 |
"train_batch_size": 8,
|
1699 |
"trial_name": null,
|
1700 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.504314881077668,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 10700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1687 |
"eval_samples_per_second": 26.52,
|
1688 |
"eval_steps_per_second": 3.316,
|
1689 |
"step": 10500
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 4.46,
|
1693 |
+
"grad_norm": 3.71893048286438,
|
1694 |
+
"learning_rate": 4.286925795053004e-05,
|
1695 |
+
"loss": 1.4426,
|
1696 |
+
"step": 10600
|
1697 |
+
},
|
1698 |
+
{
|
1699 |
+
"epoch": 4.46,
|
1700 |
+
"eval_cer": 0.48283211450471236,
|
1701 |
+
"eval_loss": 2.5203747749328613,
|
1702 |
+
"eval_runtime": 378.5225,
|
1703 |
+
"eval_samples_per_second": 25.039,
|
1704 |
+
"eval_steps_per_second": 3.131,
|
1705 |
+
"step": 10600
|
1706 |
+
},
|
1707 |
+
{
|
1708 |
+
"epoch": 4.5,
|
1709 |
+
"grad_norm": 3.5400538444519043,
|
1710 |
+
"learning_rate": 4.279858657243816e-05,
|
1711 |
+
"loss": 2.5204,
|
1712 |
+
"step": 10700
|
1713 |
+
},
|
1714 |
+
{
|
1715 |
+
"epoch": 4.5,
|
1716 |
+
"eval_cer": 0.496866567596105,
|
1717 |
+
"eval_loss": 3.1510612964630127,
|
1718 |
+
"eval_runtime": 360.9029,
|
1719 |
+
"eval_samples_per_second": 26.262,
|
1720 |
+
"eval_steps_per_second": 3.283,
|
1721 |
+
"step": 10700
|
1722 |
}
|
1723 |
],
|
1724 |
"logging_steps": 100,
|
|
|
1726 |
"num_input_tokens_seen": 0,
|
1727 |
"num_train_epochs": 30,
|
1728 |
"save_steps": 100,
|
1729 |
+
"total_flos": 1.1715979719910262e+20,
|
1730 |
"train_batch_size": 8,
|
1731 |
"trial_name": null,
|
1732 |
"trial_params": null
|