Training in progress, step 44800, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d986491485545d732d2543b339c31a259dcb270837f25d91f6c0b31f3ac46e38
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af17d7c2b3c7bf2b538b3ba90686b2f4e4ed09dd4b9a9afb1fe76d2905f4f613
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33f3fc9081c7129e45e682bf733c743600617e1c0e5a4ea79dca6c91a1e29f04
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ffd4ac7d16e7a8d42ef9c4f13d915aa5d41769606ce52a3a1031a916b016bf9
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e3647f0c4181d85f62ad117eca30124a1bd2379804c9e34d325ea3a74a4ab0d
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7143,6 +7143,38 @@
|
|
7143 |
"eval_samples_per_second": 24.629,
|
7144 |
"eval_steps_per_second": 3.079,
|
7145 |
"step": 44600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7146 |
}
|
7147 |
],
|
7148 |
"logging_steps": 100,
|
@@ -7150,7 +7182,7 @@
|
|
7150 |
"num_input_tokens_seen": 0,
|
7151 |
"num_train_epochs": 30,
|
7152 |
"save_steps": 100,
|
7153 |
-
"total_flos": 4.
|
7154 |
"train_batch_size": 8,
|
7155 |
"trial_name": null,
|
7156 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.859187539465374,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 44800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7143 |
"eval_samples_per_second": 24.629,
|
7144 |
"eval_steps_per_second": 3.079,
|
7145 |
"step": 44600
|
7146 |
+
},
|
7147 |
+
{
|
7148 |
+
"epoch": 18.82,
|
7149 |
+
"grad_norm": 8.04695987701416,
|
7150 |
+
"learning_rate": 1.877879858657244e-05,
|
7151 |
+
"loss": 0.5773,
|
7152 |
+
"step": 44700
|
7153 |
+
},
|
7154 |
+
{
|
7155 |
+
"epoch": 18.82,
|
7156 |
+
"eval_cer": 0.35235960658558524,
|
7157 |
+
"eval_loss": 2.419173002243042,
|
7158 |
+
"eval_runtime": 407.1529,
|
7159 |
+
"eval_samples_per_second": 23.279,
|
7160 |
+
"eval_steps_per_second": 2.91,
|
7161 |
+
"step": 44700
|
7162 |
+
},
|
7163 |
+
{
|
7164 |
+
"epoch": 18.86,
|
7165 |
+
"grad_norm": 2.3344297409057617,
|
7166 |
+
"learning_rate": 1.8708127208480567e-05,
|
7167 |
+
"loss": 0.658,
|
7168 |
+
"step": 44800
|
7169 |
+
},
|
7170 |
+
{
|
7171 |
+
"epoch": 18.86,
|
7172 |
+
"eval_cer": 0.3527897813929842,
|
7173 |
+
"eval_loss": 2.6321187019348145,
|
7174 |
+
"eval_runtime": 378.5146,
|
7175 |
+
"eval_samples_per_second": 25.04,
|
7176 |
+
"eval_steps_per_second": 3.131,
|
7177 |
+
"step": 44800
|
7178 |
}
|
7179 |
],
|
7180 |
"logging_steps": 100,
|
|
|
7182 |
"num_input_tokens_seen": 0,
|
7183 |
"num_train_epochs": 30,
|
7184 |
"save_steps": 100,
|
7185 |
+
"total_flos": 4.909167843123271e+20,
|
7186 |
"train_batch_size": 8,
|
7187 |
"trial_name": null,
|
7188 |
"trial_params": null
|