Training in progress, step 45100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cb3db9d2bd3bce52bd4db7c183811c68dab8410c3e736a49c6c697983dffdd9
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84b143c07e902a9dcb951619cb1986e38157fd91e3cf1bcaced4a96d3371c692
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20e71314605e7678242a5a386ea35a216d449f08cf4c3ff655d23be515343b67
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5293e155d2b0d2e7b914f2d40a0c1ca84ad8e4c46494c609b09f7e1634588b5b
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4efa9e4a4746446a85030c2502426e055bf40a9b3cad17aeaecf307da12c8f64
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7207,6 +7207,22 @@
|
|
7207 |
"eval_samples_per_second": 24.544,
|
7208 |
"eval_steps_per_second": 3.069,
|
7209 |
"step": 45000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7210 |
}
|
7211 |
],
|
7212 |
"logging_steps": 100,
|
@@ -7214,7 +7230,7 @@
|
|
7214 |
"num_input_tokens_seen": 0,
|
7215 |
"num_train_epochs": 30,
|
7216 |
"save_steps": 100,
|
7217 |
-
"total_flos": 4.
|
7218 |
"train_batch_size": 8,
|
7219 |
"trial_name": null,
|
7220 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.98547674173858,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 45100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7207 |
"eval_samples_per_second": 24.544,
|
7208 |
"eval_steps_per_second": 3.069,
|
7209 |
"step": 45000
|
7210 |
+
},
|
7211 |
+
{
|
7212 |
+
"epoch": 18.99,
|
7213 |
+
"grad_norm": 2.5482892990112305,
|
7214 |
+
"learning_rate": 1.8496113074204948e-05,
|
7215 |
+
"loss": 0.5893,
|
7216 |
+
"step": 45100
|
7217 |
+
},
|
7218 |
+
{
|
7219 |
+
"epoch": 18.99,
|
7220 |
+
"eval_cer": 0.3533568300027375,
|
7221 |
+
"eval_loss": 2.9341318607330322,
|
7222 |
+
"eval_runtime": 402.0749,
|
7223 |
+
"eval_samples_per_second": 23.573,
|
7224 |
+
"eval_steps_per_second": 2.947,
|
7225 |
+
"step": 45100
|
7226 |
}
|
7227 |
],
|
7228 |
"logging_steps": 100,
|
|
|
7230 |
"num_input_tokens_seen": 0,
|
7231 |
"num_train_epochs": 30,
|
7232 |
"save_steps": 100,
|
7233 |
+
"total_flos": 4.941606672163604e+20,
|
7234 |
"train_batch_size": 8,
|
7235 |
"trial_name": null,
|
7236 |
"trial_params": null
|