Training in progress, step 2200, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00172f6dd05a49b1cf2f8ff023f3ca93980f8b4e3060eeb47e74d8f65933ac36
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d59fdb186dd6719014f3267c673ca01c879f220542715dcb5862f4204903bc2
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbe23e6ec83e38f888ce1b667257f959e02478dbaef89d9e23350f77615b295b
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39c98aca05a0d374052b7c1cb664ff3eb713201073b7a44e234eb536fbccee1c
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1665cb8324f796c9f90a2705fd01fef5d8ec3fa89b088395f8b2aa4bae6228c2
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -327,6 +327,38 @@
|
|
327 |
"eval_samples_per_second": 27.767,
|
328 |
"eval_steps_per_second": 3.472,
|
329 |
"step": 2000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
}
|
331 |
],
|
332 |
"logging_steps": 100,
|
@@ -334,7 +366,7 @@
|
|
334 |
"num_input_tokens_seen": 0,
|
335 |
"num_train_epochs": 30,
|
336 |
"save_steps": 100,
|
337 |
-
"total_flos": 2.
|
338 |
"train_batch_size": 8,
|
339 |
"trial_name": null,
|
340 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9261208166701747,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 2200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
327 |
"eval_samples_per_second": 27.767,
|
328 |
"eval_steps_per_second": 3.472,
|
329 |
"step": 2000
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 0.88,
|
333 |
+
"grad_norm": 2.842353343963623,
|
334 |
+
"learning_rate": 4.8873498233215547e-05,
|
335 |
+
"loss": 2.1649,
|
336 |
+
"step": 2100
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"epoch": 0.88,
|
340 |
+
"eval_cer": 0.544757733369833,
|
341 |
+
"eval_loss": 2.421653985977173,
|
342 |
+
"eval_runtime": 358.7261,
|
343 |
+
"eval_samples_per_second": 26.421,
|
344 |
+
"eval_steps_per_second": 3.303,
|
345 |
+
"step": 2100
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.93,
|
349 |
+
"grad_norm": 4.818225383758545,
|
350 |
+
"learning_rate": 4.880282685512368e-05,
|
351 |
+
"loss": 2.2775,
|
352 |
+
"step": 2200
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.93,
|
356 |
+
"eval_cer": 0.5533832270931915,
|
357 |
+
"eval_loss": 2.188101291656494,
|
358 |
+
"eval_runtime": 354.4215,
|
359 |
+
"eval_samples_per_second": 26.742,
|
360 |
+
"eval_steps_per_second": 3.343,
|
361 |
+
"step": 2200
|
362 |
}
|
363 |
],
|
364 |
"logging_steps": 100,
|
|
|
366 |
"num_input_tokens_seen": 0,
|
367 |
"num_train_epochs": 30,
|
368 |
"save_steps": 100,
|
369 |
+
"total_flos": 2.411832086421037e+19,
|
370 |
"train_batch_size": 8,
|
371 |
"trial_name": null,
|
372 |
"trial_params": null
|