Training in progress, step 49900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c030a05cedfb701f1f900ac5f26cde012f010403c760db37339a226c2da019b
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:011ba2a8625abf53042a63306d8a7372e4ce9afaa38b9b2ed242865f4641eb35
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37ee240f317d6efe3bef85d93c11c29152c2835f327f23980767091f56826096
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f06d58a45fbcf190e6121835ad9ae3823578f428a1fa2340da936ad10e470b66
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88466590320f3a52b9bb148645555d7df1e6a76ed6ca782ad8cb3e3e4334e3f9
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7959,6 +7959,38 @@
|
|
7959 |
"eval_samples_per_second": 24.299,
|
7960 |
"eval_steps_per_second": 3.038,
|
7961 |
"step": 49700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7962 |
}
|
7963 |
],
|
7964 |
"logging_steps": 100,
|
@@ -7966,7 +7998,7 @@
|
|
7966 |
"num_input_tokens_seen": 0,
|
7967 |
"num_train_epochs": 30,
|
7968 |
"save_steps": 100,
|
7969 |
-
"total_flos": 5.
|
7970 |
"train_batch_size": 8,
|
7971 |
"trial_name": null,
|
7972 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 21.00610397810987,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 49900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7959 |
"eval_samples_per_second": 24.299,
|
7960 |
"eval_steps_per_second": 3.038,
|
7961 |
"step": 49700
|
7962 |
+
},
|
7963 |
+
{
|
7964 |
+
"epoch": 20.96,
|
7965 |
+
"grad_norm": 2.362006902694702,
|
7966 |
+
"learning_rate": 1.5175265017667845e-05,
|
7967 |
+
"loss": 0.4054,
|
7968 |
+
"step": 49800
|
7969 |
+
},
|
7970 |
+
{
|
7971 |
+
"epoch": 20.96,
|
7972 |
+
"eval_cer": 0.34167611747682924,
|
7973 |
+
"eval_loss": 2.4431283473968506,
|
7974 |
+
"eval_runtime": 408.3684,
|
7975 |
+
"eval_samples_per_second": 23.209,
|
7976 |
+
"eval_steps_per_second": 2.902,
|
7977 |
+
"step": 49800
|
7978 |
+
},
|
7979 |
+
{
|
7980 |
+
"epoch": 21.01,
|
7981 |
+
"grad_norm": 1.828452706336975,
|
7982 |
+
"learning_rate": 1.5104593639575973e-05,
|
7983 |
+
"loss": 0.389,
|
7984 |
+
"step": 49900
|
7985 |
+
},
|
7986 |
+
{
|
7987 |
+
"epoch": 21.01,
|
7988 |
+
"eval_cer": 0.3424289233897775,
|
7989 |
+
"eval_loss": 2.5479896068573,
|
7990 |
+
"eval_runtime": 390.6666,
|
7991 |
+
"eval_samples_per_second": 24.261,
|
7992 |
+
"eval_steps_per_second": 3.033,
|
7993 |
+
"step": 49900
|
7994 |
}
|
7995 |
],
|
7996 |
"logging_steps": 100,
|
|
|
7998 |
"num_input_tokens_seen": 0,
|
7999 |
"num_train_epochs": 30,
|
8000 |
"save_steps": 100,
|
8001 |
+
"total_flos": 5.468477204725173e+20,
|
8002 |
"train_batch_size": 8,
|
8003 |
"trial_name": null,
|
8004 |
"trial_params": null
|