Training in progress, step 45000, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91e422627c4b9c768f46280d63fdbcfda35620e774f98056638ba875f3575284
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acf19318ad1b6dc72bddec929f0849bc212f2e1e8bd60f19ac70115441c83c91
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5d07a63df9aaae7d187b5731e2c4c19721e7a2e9545c19b5722e4ffda120ed9
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:815af6775e420c509219caee17b9a5516bbf2b05365eac817aa0f9a01cbd8f8d
|
3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02a1219b2614f7ac1e5741ff596a810f142ab9bd2e0d40eb1eb0d44675ea0fea
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 18.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -7175,6 +7175,38 @@
|
|
7175 |
"eval_samples_per_second": 25.04,
|
7176 |
"eval_steps_per_second": 3.131,
|
7177 |
"step": 44800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7178 |
}
|
7179 |
],
|
7180 |
"logging_steps": 100,
|
@@ -7182,7 +7214,7 @@
|
|
7182 |
"num_input_tokens_seen": 0,
|
7183 |
"num_train_epochs": 30,
|
7184 |
"save_steps": 100,
|
7185 |
-
"total_flos": 4.
|
7186 |
"train_batch_size": 8,
|
7187 |
"trial_name": null,
|
7188 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 18.943380340980845,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 45000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
7175 |
"eval_samples_per_second": 25.04,
|
7176 |
"eval_steps_per_second": 3.131,
|
7177 |
"step": 44800
|
7178 |
+
},
|
7179 |
+
{
|
7180 |
+
"epoch": 18.9,
|
7181 |
+
"grad_norm": 1.822228193283081,
|
7182 |
+
"learning_rate": 1.8637455830388695e-05,
|
7183 |
+
"loss": 0.5733,
|
7184 |
+
"step": 44900
|
7185 |
+
},
|
7186 |
+
{
|
7187 |
+
"epoch": 18.9,
|
7188 |
+
"eval_cer": 0.34930194360799344,
|
7189 |
+
"eval_loss": 2.7781569957733154,
|
7190 |
+
"eval_runtime": 405.1046,
|
7191 |
+
"eval_samples_per_second": 23.396,
|
7192 |
+
"eval_steps_per_second": 2.925,
|
7193 |
+
"step": 44900
|
7194 |
+
},
|
7195 |
+
{
|
7196 |
+
"epoch": 18.94,
|
7197 |
+
"grad_norm": 15.387603759765625,
|
7198 |
+
"learning_rate": 1.856678445229682e-05,
|
7199 |
+
"loss": 0.5554,
|
7200 |
+
"step": 45000
|
7201 |
+
},
|
7202 |
+
{
|
7203 |
+
"epoch": 18.94,
|
7204 |
+
"eval_cer": 0.35281422314340466,
|
7205 |
+
"eval_loss": 2.2615067958831787,
|
7206 |
+
"eval_runtime": 386.1566,
|
7207 |
+
"eval_samples_per_second": 24.544,
|
7208 |
+
"eval_steps_per_second": 3.069,
|
7209 |
+
"step": 45000
|
7210 |
}
|
7211 |
],
|
7212 |
"logging_steps": 100,
|
|
|
7214 |
"num_input_tokens_seen": 0,
|
7215 |
"num_train_epochs": 30,
|
7216 |
"save_steps": 100,
|
7217 |
+
"total_flos": 4.930677119902053e+20,
|
7218 |
"train_batch_size": 8,
|
7219 |
"trial_name": null,
|
7220 |
"trial_params": null
|