Training in progress, step 26900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:378dc527ac459832013c97617137877ef2787d8ad0a5019604ca85aa956d8e7e
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4c23db54888854e9599d18a9d3bb2ca8aba5577e50d015aa3e0495e88b82be4
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b0e4523778043b99de59919488370129ad827e481ce7777fc2e2ed0192be8c4
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ab6c67f572fa86813b890e9a102c067c20a3ae8213e83df8a69f01dcfe18650
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:920901db8d07342ad0b34f21db24e241a543d399c53204a75752af51c73d0ca9
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4279,6 +4279,38 @@
|
|
4279 |
"eval_samples_per_second": 25.739,
|
4280 |
"eval_steps_per_second": 3.218,
|
4281 |
"step": 26700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4282 |
}
|
4283 |
],
|
4284 |
"logging_steps": 100,
|
@@ -4286,7 +4318,7 @@
|
|
4286 |
"num_input_tokens_seen": 0,
|
4287 |
"num_train_epochs": 30,
|
4288 |
"save_steps": 100,
|
4289 |
-
"total_flos": 2.
|
4290 |
"train_batch_size": 8,
|
4291 |
"trial_name": null,
|
4292 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.323931803830773,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 26900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4279 |
"eval_samples_per_second": 25.739,
|
4280 |
"eval_steps_per_second": 3.218,
|
4281 |
"step": 26700
|
4282 |
+
},
|
4283 |
+
{
|
4284 |
+
"epoch": 11.28,
|
4285 |
+
"grad_norm": 1.7043545246124268,
|
4286 |
+
"learning_rate": 3.142402826855124e-05,
|
4287 |
+
"loss": 1.1348,
|
4288 |
+
"step": 26800
|
4289 |
+
},
|
4290 |
+
{
|
4291 |
+
"epoch": 11.28,
|
4292 |
+
"eval_cer": 0.41776573071057055,
|
4293 |
+
"eval_loss": 2.2900288105010986,
|
4294 |
+
"eval_runtime": 394.8533,
|
4295 |
+
"eval_samples_per_second": 24.004,
|
4296 |
+
"eval_steps_per_second": 3.001,
|
4297 |
+
"step": 26800
|
4298 |
+
},
|
4299 |
+
{
|
4300 |
+
"epoch": 11.32,
|
4301 |
+
"grad_norm": 1.8619517087936401,
|
4302 |
+
"learning_rate": 3.135335689045937e-05,
|
4303 |
+
"loss": 1.1366,
|
4304 |
+
"step": 26900
|
4305 |
+
},
|
4306 |
+
{
|
4307 |
+
"epoch": 11.32,
|
4308 |
+
"eval_cer": 0.42347532360877554,
|
4309 |
+
"eval_loss": 1.863010287284851,
|
4310 |
+
"eval_runtime": 370.2336,
|
4311 |
+
"eval_samples_per_second": 25.6,
|
4312 |
+
"eval_steps_per_second": 3.201,
|
4313 |
+
"step": 26900
|
4314 |
}
|
4315 |
],
|
4316 |
"logging_steps": 100,
|
|
|
4318 |
"num_input_tokens_seen": 0,
|
4319 |
"num_train_epochs": 30,
|
4320 |
"save_steps": 100,
|
4321 |
+
"total_flos": 2.9479302465142643e+20,
|
4322 |
"train_batch_size": 8,
|
4323 |
"trial_name": null,
|
4324 |
"trial_params": null
|