Training in progress, step 49100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e998f8468cc7c346dc4ab88bf0c2ea3d0619ac332532e3a814afb1ae30e5137
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26f75452f5c6dacef05179ac21358b2dfe0213c1b720af143f8e2325f3b80e9b
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f320ce414b15cc9e8e335856b46b099587922107eefcdd05c2003fd1b73a4a1
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbf00c1e2916dc93efcaa39d2802c751ace54d5e0176b36b9c71c02426bcbae3
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3526b76b5bb1c00b0ec05511da6eda21f6c0133130e655d44d777a0153ed64a2
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 20.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7831,6 +7831,38 @@
|
|
7831 |
"eval_samples_per_second": 24.524,
|
7832 |
"eval_steps_per_second": 3.066,
|
7833 |
"step": 48900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7834 |
}
|
7835 |
],
|
7836 |
"logging_steps": 100,
|
@@ -7838,7 +7870,7 @@
|
|
7838 |
"num_input_tokens_seen": 0,
|
7839 |
"num_train_epochs": 30,
|
7840 |
"save_steps": 100,
|
7841 |
-
"total_flos": 5.
|
7842 |
"train_batch_size": 8,
|
7843 |
"trial_name": null,
|
7844 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 20.66933277204799,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 49100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7831 |
"eval_samples_per_second": 24.524,
|
7832 |
"eval_steps_per_second": 3.066,
|
7833 |
"step": 48900
|
7834 |
+
},
|
7835 |
+
{
|
7836 |
+
"epoch": 20.63,
|
7837 |
+
"grad_norm": 1.9635140895843506,
|
7838 |
+
"learning_rate": 1.574063604240283e-05,
|
7839 |
+
"loss": 0.4048,
|
7840 |
+
"step": 49000
|
7841 |
+
},
|
7842 |
+
{
|
7843 |
+
"epoch": 20.63,
|
7844 |
+
"eval_cer": 0.34388076336474915,
|
7845 |
+
"eval_loss": 2.86527156829834,
|
7846 |
+
"eval_runtime": 408.1952,
|
7847 |
+
"eval_samples_per_second": 23.219,
|
7848 |
+
"eval_steps_per_second": 2.903,
|
7849 |
+
"step": 49000
|
7850 |
+
},
|
7851 |
+
{
|
7852 |
+
"epoch": 20.67,
|
7853 |
+
"grad_norm": 2.3898985385894775,
|
7854 |
+
"learning_rate": 1.5669964664310955e-05,
|
7855 |
+
"loss": 0.4189,
|
7856 |
+
"step": 49100
|
7857 |
+
},
|
7858 |
+
{
|
7859 |
+
"epoch": 20.67,
|
7860 |
+
"eval_cer": 0.34160034805052597,
|
7861 |
+
"eval_loss": 3.328908681869507,
|
7862 |
+
"eval_runtime": 391.6176,
|
7863 |
+
"eval_samples_per_second": 24.202,
|
7864 |
+
"eval_steps_per_second": 3.026,
|
7865 |
+
"step": 49100
|
7866 |
}
|
7867 |
],
|
7868 |
"logging_steps": 100,
|
|
|
7870 |
"num_input_tokens_seen": 0,
|
7871 |
"num_train_epochs": 30,
|
7872 |
"save_steps": 100,
|
7873 |
+
"total_flos": 5.379768925487233e+20,
|
7874 |
"train_batch_size": 8,
|
7875 |
"trial_name": null,
|
7876 |
"trial_params": null
|