Training in progress, step 12900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57c6b54d077cb667a4cfbe738582da2ecd39159286c41485b7c6efd53cb7bc1a
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45da60abe68c6f65c5ae53251ba8b983302267f69d34edaa7bf0ebf95d54ac3a
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a67ddb96ec88d7be4083d93ecc9ee80924bab46ac10dc058c48085c087713d8
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b3ac36e7fbd4b3c080b9441c4f2b70acea99b1b8d1002061ebda98ec5266a7e
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c402405be7dbbda177a1e84950f8dcd4bc4f3baf7fe67d401f484f341b04e122
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 5.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2039,6 +2039,38 @@
|
|
2039 |
"eval_samples_per_second": 26.205,
|
2040 |
"eval_steps_per_second": 3.276,
|
2041 |
"step": 12700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2042 |
}
|
2043 |
],
|
2044 |
"logging_steps": 100,
|
@@ -2046,7 +2078,7 @@
|
|
2046 |
"num_input_tokens_seen": 0,
|
2047 |
"num_train_epochs": 30,
|
2048 |
"save_steps": 100,
|
2049 |
-
"total_flos": 1.
|
2050 |
"train_batch_size": 8,
|
2051 |
"trial_name": null,
|
2052 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.430435697747843,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 12900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2039 |
"eval_samples_per_second": 26.205,
|
2040 |
"eval_steps_per_second": 3.276,
|
2041 |
"step": 12700
|
2042 |
+
},
|
2043 |
+
{
|
2044 |
+
"epoch": 5.39,
|
2045 |
+
"grad_norm": 1.902030348777771,
|
2046 |
+
"learning_rate": 4.131448763250883e-05,
|
2047 |
+
"loss": 2.1405,
|
2048 |
+
"step": 12800
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 5.39,
|
2052 |
+
"eval_cer": 0.4769636502287748,
|
2053 |
+
"eval_loss": 2.750805139541626,
|
2054 |
+
"eval_runtime": 383.778,
|
2055 |
+
"eval_samples_per_second": 24.697,
|
2056 |
+
"eval_steps_per_second": 3.088,
|
2057 |
+
"step": 12800
|
2058 |
+
},
|
2059 |
+
{
|
2060 |
+
"epoch": 5.43,
|
2061 |
+
"grad_norm": 5.2548041343688965,
|
2062 |
+
"learning_rate": 4.124381625441696e-05,
|
2063 |
+
"loss": 1.967,
|
2064 |
+
"step": 12900
|
2065 |
+
},
|
2066 |
+
{
|
2067 |
+
"epoch": 5.43,
|
2068 |
+
"eval_cer": 0.4813216143287318,
|
2069 |
+
"eval_loss": 2.39349627494812,
|
2070 |
+
"eval_runtime": 366.8876,
|
2071 |
+
"eval_samples_per_second": 25.834,
|
2072 |
+
"eval_steps_per_second": 3.23,
|
2073 |
+
"step": 12900
|
2074 |
}
|
2075 |
],
|
2076 |
"logging_steps": 100,
|
|
|
2078 |
"num_input_tokens_seen": 0,
|
2079 |
"num_train_epochs": 30,
|
2080 |
"save_steps": 100,
|
2081 |
+
"total_flos": 1.4138922393346692e+20,
|
2082 |
"train_batch_size": 8,
|
2083 |
"trial_name": null,
|
2084 |
"trial_params": null
|