Training in progress, step 50300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:720bc977d62e7816011389161c4eeae7e71a5e15fa6414fc84828a3a030a05e6
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f5d803a6ecbd25b4508f6830cb0d354f3164cf36de87bc4c942336f1d723990
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:941eea433be3c1f9adb0f47751c327703c05efe14921d20365ff7c6534eecd09
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8cbe5fab90a476cca692ca727115b096b7062ef579d4f32a4ad2921bb938c28b
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08a28e7b27d1cecd73113f4062cdd78f7500a461c5bdb8769059e4729aaa72d3
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 21.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8023,6 +8023,38 @@
|
|
8023 |
"eval_samples_per_second": 24.513,
|
8024 |
"eval_steps_per_second": 3.065,
|
8025 |
"step": 50100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8026 |
}
|
8027 |
],
|
8028 |
"logging_steps": 100,
|
@@ -8030,7 +8062,7 @@
|
|
8030 |
"num_input_tokens_seen": 0,
|
8031 |
"num_train_epochs": 30,
|
8032 |
"save_steps": 100,
|
8033 |
-
"total_flos": 5.
|
8034 |
"train_batch_size": 8,
|
8035 |
"trial_name": null,
|
8036 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 21.17448958114081,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 50300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8023 |
"eval_samples_per_second": 24.513,
|
8024 |
"eval_steps_per_second": 3.065,
|
8025 |
"step": 50100
|
8026 |
+
},
|
8027 |
+
{
|
8028 |
+
"epoch": 21.13,
|
8029 |
+
"grad_norm": 2.7008559703826904,
|
8030 |
+
"learning_rate": 1.4892579505300355e-05,
|
8031 |
+
"loss": 0.3479,
|
8032 |
+
"step": 50200
|
8033 |
+
},
|
8034 |
+
{
|
8035 |
+
"epoch": 21.13,
|
8036 |
+
"eval_cer": 0.34289331664776507,
|
8037 |
+
"eval_loss": 2.209360122680664,
|
8038 |
+
"eval_runtime": 412.345,
|
8039 |
+
"eval_samples_per_second": 22.986,
|
8040 |
+
"eval_steps_per_second": 2.874,
|
8041 |
+
"step": 50200
|
8042 |
+
},
|
8043 |
+
{
|
8044 |
+
"epoch": 21.17,
|
8045 |
+
"grad_norm": 1.4452073574066162,
|
8046 |
+
"learning_rate": 1.482190812720848e-05,
|
8047 |
+
"loss": 0.3465,
|
8048 |
+
"step": 50300
|
8049 |
+
},
|
8050 |
+
{
|
8051 |
+
"epoch": 21.17,
|
8052 |
+
"eval_cer": 0.33977454929412226,
|
8053 |
+
"eval_loss": 2.268566370010376,
|
8054 |
+
"eval_runtime": 390.1756,
|
8055 |
+
"eval_samples_per_second": 24.292,
|
8056 |
+
"eval_steps_per_second": 3.037,
|
8057 |
+
"step": 50300
|
8058 |
}
|
8059 |
],
|
8060 |
"logging_steps": 100,
|
|
|
8062 |
"num_input_tokens_seen": 0,
|
8063 |
"num_train_epochs": 30,
|
8064 |
"save_steps": 100,
|
8065 |
+
"total_flos": 5.5126604494545053e+20,
|
8066 |
"train_batch_size": 8,
|
8067 |
"trial_name": null,
|
8068 |
"trial_params": null
|