Training in progress, step 17100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b9e8f1cd682c600b8cb11187fc1ab5ae248a9db42188a27ec84ab8dd6b176e3
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e55ffbe26a558cd4d44bf377902f7de44a99d96212bf6909a06d0b6c973855f0
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d85e4e6b570f463ab7d57a8eb340d363fee4aedfddaa988b4c8532024ed27e4d
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9edfa95c867c886482be8a9997b5cdbca3a08cde04092b0ac7f65fec6704c015
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c97c09909b76b35ea84168af91d3198272a70d988945ee6a69678b1b1242e4e
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2711,6 +2711,38 @@
|
|
2711 |
"eval_samples_per_second": 25.986,
|
2712 |
"eval_steps_per_second": 3.249,
|
2713 |
"step": 16900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2714 |
}
|
2715 |
],
|
2716 |
"logging_steps": 100,
|
@@ -2718,7 +2750,7 @@
|
|
2718 |
"num_input_tokens_seen": 0,
|
2719 |
"num_train_epochs": 30,
|
2720 |
"save_steps": 100,
|
2721 |
-
"total_flos": 1.
|
2722 |
"train_batch_size": 8,
|
2723 |
"trial_name": null,
|
2724 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.198484529572721,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 17100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2711 |
"eval_samples_per_second": 25.986,
|
2712 |
"eval_steps_per_second": 3.249,
|
2713 |
"step": 16900
|
2714 |
+
},
|
2715 |
+
{
|
2716 |
+
"epoch": 7.16,
|
2717 |
+
"grad_norm": 4.391873359680176,
|
2718 |
+
"learning_rate": 3.834699646643109e-05,
|
2719 |
+
"loss": 1.3334,
|
2720 |
+
"step": 17000
|
2721 |
+
},
|
2722 |
+
{
|
2723 |
+
"epoch": 7.16,
|
2724 |
+
"eval_cer": 0.4652682726526143,
|
2725 |
+
"eval_loss": 2.7629072666168213,
|
2726 |
+
"eval_runtime": 380.8752,
|
2727 |
+
"eval_samples_per_second": 24.885,
|
2728 |
+
"eval_steps_per_second": 3.111,
|
2729 |
+
"step": 17000
|
2730 |
+
},
|
2731 |
+
{
|
2732 |
+
"epoch": 7.2,
|
2733 |
+
"grad_norm": 7.738865375518799,
|
2734 |
+
"learning_rate": 3.827632508833923e-05,
|
2735 |
+
"loss": 1.4858,
|
2736 |
+
"step": 17100
|
2737 |
+
},
|
2738 |
+
{
|
2739 |
+
"epoch": 7.2,
|
2740 |
+
"eval_cer": 0.4701737319619882,
|
2741 |
+
"eval_loss": 2.189877986907959,
|
2742 |
+
"eval_runtime": 363.6078,
|
2743 |
+
"eval_samples_per_second": 26.067,
|
2744 |
+
"eval_steps_per_second": 3.259,
|
2745 |
+
"step": 17100
|
2746 |
}
|
2747 |
],
|
2748 |
"logging_steps": 100,
|
|
|
2750 |
"num_input_tokens_seen": 0,
|
2751 |
"num_train_epochs": 30,
|
2752 |
"save_steps": 100,
|
2753 |
+
"total_flos": 1.8747010341512125e+20,
|
2754 |
"train_batch_size": 8,
|
2755 |
"trial_name": null,
|
2756 |
"trial_params": null
|