Training in progress, step 21000, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0914d34b789b137bde7b5e425800476cd3b8a0c1c2a2f6dca29e198df8bce8d1
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b055483df48b82f8f43aae9c1bf44543d9d6b475dd015afb266b79d61079e91
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d1f86a4256547c5562fc9179cc66f200a56f9f53b2448731cd2d83aadb1bd70
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73aa4ed4dedc1aae7d9f7534e9a473a7d481fd77c26b1dac6a9acd73c2b3a117
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73620584f32d0b623d2e9a17c6bb745cf56317c4ff398df3fcc5b050e7d94c8b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3335,6 +3335,38 @@
|
|
3335 |
"eval_samples_per_second": 26.137,
|
3336 |
"eval_steps_per_second": 3.268,
|
3337 |
"step": 20800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3338 |
}
|
3339 |
],
|
3340 |
"logging_steps": 100,
|
@@ -3342,7 +3374,7 @@
|
|
3342 |
"num_input_tokens_seen": 0,
|
3343 |
"num_train_epochs": 30,
|
3344 |
"save_steps": 100,
|
3345 |
-
"total_flos": 2.
|
3346 |
"train_batch_size": 8,
|
3347 |
"trial_name": null,
|
3348 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.840244159124396,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 21000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3335 |
"eval_samples_per_second": 26.137,
|
3336 |
"eval_steps_per_second": 3.268,
|
3337 |
"step": 20800
|
3338 |
+
},
|
3339 |
+
{
|
3340 |
+
"epoch": 8.8,
|
3341 |
+
"grad_norm": 10.084565162658691,
|
3342 |
+
"learning_rate": 3.5592226148409895e-05,
|
3343 |
+
"loss": 1.3031,
|
3344 |
+
"step": 20900
|
3345 |
+
},
|
3346 |
+
{
|
3347 |
+
"epoch": 8.8,
|
3348 |
+
"eval_cer": 0.44013237652027687,
|
3349 |
+
"eval_loss": 1.9289778470993042,
|
3350 |
+
"eval_runtime": 383.5806,
|
3351 |
+
"eval_samples_per_second": 24.709,
|
3352 |
+
"eval_steps_per_second": 3.089,
|
3353 |
+
"step": 20900
|
3354 |
+
},
|
3355 |
+
{
|
3356 |
+
"epoch": 8.84,
|
3357 |
+
"grad_norm": 2.5064008235931396,
|
3358 |
+
"learning_rate": 3.552155477031802e-05,
|
3359 |
+
"loss": 2.0089,
|
3360 |
+
"step": 21000
|
3361 |
+
},
|
3362 |
+
{
|
3363 |
+
"epoch": 8.84,
|
3364 |
+
"eval_cer": 0.4491318290250675,
|
3365 |
+
"eval_loss": 2.749382734298706,
|
3366 |
+
"eval_runtime": 372.5311,
|
3367 |
+
"eval_samples_per_second": 25.442,
|
3368 |
+
"eval_steps_per_second": 3.181,
|
3369 |
+
"step": 21000
|
3370 |
}
|
3371 |
],
|
3372 |
"logging_steps": 100,
|
|
|
3374 |
"num_input_tokens_seen": 0,
|
3375 |
"num_train_epochs": 30,
|
3376 |
"save_steps": 100,
|
3377 |
+
"total_flos": 2.300997184346249e+20,
|
3378 |
"train_batch_size": 8,
|
3379 |
"trial_name": null,
|
3380 |
"trial_params": null
|