Training in progress, step 55100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8c48bb3d612c40a450aa730d13e87c4b64a6c34e5b31e7b8fe6c269f23b421c
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1069f6193c7916eb469ce04a7fa8c02754c89e947e59faab71008137e15e1b8
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc3024dcfeaef3e6a6bcda461b928f14a072e6bddfed080d72f79930e36aa00b
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a74bc8c11a5b138c20c6a3a72f1fc0760ad94bd8169c2089dd7b02f38fcd2093
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b611b98dbecb53ef90f98b1e408d8bea073963dab97c75432c6a94c95519856
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 23.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8791,6 +8791,38 @@
|
|
8791 |
"eval_samples_per_second": 23.669,
|
8792 |
"eval_steps_per_second": 2.959,
|
8793 |
"step": 54900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8794 |
}
|
8795 |
],
|
8796 |
"logging_steps": 100,
|
@@ -8798,7 +8830,7 @@
|
|
8798 |
"num_input_tokens_seen": 0,
|
8799 |
"num_train_epochs": 30,
|
8800 |
"save_steps": 100,
|
8801 |
-
"total_flos": 6.
|
8802 |
"train_batch_size": 8,
|
8803 |
"trial_name": null,
|
8804 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 23.1951168175121,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 55100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8791 |
"eval_samples_per_second": 23.669,
|
8792 |
"eval_steps_per_second": 2.959,
|
8793 |
"step": 54900
|
8794 |
+
},
|
8795 |
+
{
|
8796 |
+
"epoch": 23.15,
|
8797 |
+
"grad_norm": 2.888965606689453,
|
8798 |
+
"learning_rate": 1.1501060070671379e-05,
|
8799 |
+
"loss": 0.2584,
|
8800 |
+
"step": 55000
|
8801 |
+
},
|
8802 |
+
{
|
8803 |
+
"epoch": 23.15,
|
8804 |
+
"eval_cer": 0.3347419928825623,
|
8805 |
+
"eval_loss": 2.6486945152282715,
|
8806 |
+
"eval_runtime": 425.0006,
|
8807 |
+
"eval_samples_per_second": 22.301,
|
8808 |
+
"eval_steps_per_second": 2.788,
|
8809 |
+
"step": 55000
|
8810 |
+
},
|
8811 |
+
{
|
8812 |
+
"epoch": 23.2,
|
8813 |
+
"grad_norm": 1.428572654724121,
|
8814 |
+
"learning_rate": 1.1430388692579507e-05,
|
8815 |
+
"loss": 0.2592,
|
8816 |
+
"step": 55100
|
8817 |
+
},
|
8818 |
+
{
|
8819 |
+
"epoch": 23.2,
|
8820 |
+
"eval_cer": 0.33205340033631847,
|
8821 |
+
"eval_loss": 2.5768284797668457,
|
8822 |
+
"eval_runtime": 410.5876,
|
8823 |
+
"eval_samples_per_second": 23.084,
|
8824 |
+
"eval_steps_per_second": 2.886,
|
8825 |
+
"step": 55100
|
8826 |
}
|
8827 |
],
|
8828 |
"logging_steps": 100,
|
|
|
8830 |
"num_input_tokens_seen": 0,
|
8831 |
"num_train_epochs": 30,
|
8832 |
"save_steps": 100,
|
8833 |
+
"total_flos": 6.038294629213688e+20,
|
8834 |
"train_batch_size": 8,
|
8835 |
"trial_name": null,
|
8836 |
"trial_params": null
|