Training in progress, step 17500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abbcad510f5d6d17da77ac25180b09d3437fab9772e7c44ff2b3717ab2555827
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a68f90a5153d4c6d993bcbe7dfb647ef73f657679f4aaf49e0141450a805cabb
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd60b24f7ae9d45a90489d4bf13a52080f5bef0765dbfd70e0655cc95cf2a575
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c66fd25519e26fd88891aa401ed778beca63619aeba161031fca5e7539ce650
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d0b60ffa460bee76500f761630a9787c479b8332d14ac3a0ab84782353df19d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2775,6 +2775,38 @@
|
|
2775 |
"eval_samples_per_second": 26.189,
|
2776 |
"eval_steps_per_second": 3.274,
|
2777 |
"step": 17300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2778 |
}
|
2779 |
],
|
2780 |
"logging_steps": 100,
|
@@ -2782,7 +2814,7 @@
|
|
2782 |
"num_input_tokens_seen": 0,
|
2783 |
"num_train_epochs": 30,
|
2784 |
"save_steps": 100,
|
2785 |
-
"total_flos": 1.
|
2786 |
"train_batch_size": 8,
|
2787 |
"trial_name": null,
|
2788 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.366870132603663,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 17500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2775 |
"eval_samples_per_second": 26.189,
|
2776 |
"eval_steps_per_second": 3.274,
|
2777 |
"step": 17300
|
2778 |
+
},
|
2779 |
+
{
|
2780 |
+
"epoch": 7.32,
|
2781 |
+
"grad_norm": 3.9697859287261963,
|
2782 |
+
"learning_rate": 3.8065017667844525e-05,
|
2783 |
+
"loss": 1.3348,
|
2784 |
+
"step": 17400
|
2785 |
+
},
|
2786 |
+
{
|
2787 |
+
"epoch": 7.32,
|
2788 |
+
"eval_cer": 0.460401920143913,
|
2789 |
+
"eval_loss": 2.626167058944702,
|
2790 |
+
"eval_runtime": 377.1422,
|
2791 |
+
"eval_samples_per_second": 25.131,
|
2792 |
+
"eval_steps_per_second": 3.142,
|
2793 |
+
"step": 17400
|
2794 |
+
},
|
2795 |
+
{
|
2796 |
+
"epoch": 7.37,
|
2797 |
+
"grad_norm": 2.342355251312256,
|
2798 |
+
"learning_rate": 3.7994346289752653e-05,
|
2799 |
+
"loss": 2.0519,
|
2800 |
+
"step": 17500
|
2801 |
+
},
|
2802 |
+
{
|
2803 |
+
"epoch": 7.37,
|
2804 |
+
"eval_cer": 0.4643712604121857,
|
2805 |
+
"eval_loss": 1.923600196838379,
|
2806 |
+
"eval_runtime": 360.7805,
|
2807 |
+
"eval_samples_per_second": 26.271,
|
2808 |
+
"eval_steps_per_second": 3.285,
|
2809 |
+
"step": 17500
|
2810 |
}
|
2811 |
],
|
2812 |
"logging_steps": 100,
|
|
|
2814 |
"num_input_tokens_seen": 0,
|
2815 |
"num_train_epochs": 30,
|
2816 |
"save_steps": 100,
|
2817 |
+
"total_flos": 1.9185164331179065e+20,
|
2818 |
"train_batch_size": 8,
|
2819 |
"trial_name": null,
|
2820 |
"trial_params": null
|