Training in progress, step 38900, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86aedaea68068dd5c1a8b421f702958fe0acd2fbb8823da4823612fb4e7c45bf
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:727182a7c870435209c98f44944156abd6a6c3261ccb96daf87907a574c1b37b
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93a07f3eb542190ed23f7b40d5f4678b80bd5b4ba91438888eb3d3cb6fc3186e
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91e28ae43f1dd07d9a9ddf4073e22adb6e7929806ef21e48eb3b580f30a7e559
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e9da594946dcc239f89ecda3fc48286b363af1118506368ef79741edf876cc1
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 16.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6199,6 +6199,38 @@
|
|
6199 |
"eval_samples_per_second": 25.205,
|
6200 |
"eval_steps_per_second": 3.151,
|
6201 |
"step": 38700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6202 |
}
|
6203 |
],
|
6204 |
"logging_steps": 100,
|
@@ -6206,7 +6238,7 @@
|
|
6206 |
"num_input_tokens_seen": 0,
|
6207 |
"num_train_epochs": 30,
|
6208 |
"save_steps": 100,
|
6209 |
-
"total_flos": 4.
|
6210 |
"train_batch_size": 8,
|
6211 |
"trial_name": null,
|
6212 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 16.375499894759,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 38900,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6199 |
"eval_samples_per_second": 25.205,
|
6200 |
"eval_steps_per_second": 3.151,
|
6201 |
"step": 38700
|
6202 |
+
},
|
6203 |
+
{
|
6204 |
+
"epoch": 16.33,
|
6205 |
+
"grad_norm": 1.934323787689209,
|
6206 |
+
"learning_rate": 2.2945583038869257e-05,
|
6207 |
+
"loss": 0.7233,
|
6208 |
+
"step": 38800
|
6209 |
+
},
|
6210 |
+
{
|
6211 |
+
"epoch": 16.33,
|
6212 |
+
"eval_cer": 0.3704929412224786,
|
6213 |
+
"eval_loss": 2.179749011993408,
|
6214 |
+
"eval_runtime": 389.749,
|
6215 |
+
"eval_samples_per_second": 24.318,
|
6216 |
+
"eval_steps_per_second": 3.04,
|
6217 |
+
"step": 38800
|
6218 |
+
},
|
6219 |
+
{
|
6220 |
+
"epoch": 16.38,
|
6221 |
+
"grad_norm": 2.3759796619415283,
|
6222 |
+
"learning_rate": 2.287491166077739e-05,
|
6223 |
+
"loss": 0.743,
|
6224 |
+
"step": 38900
|
6225 |
+
},
|
6226 |
+
{
|
6227 |
+
"epoch": 16.38,
|
6228 |
+
"eval_cer": 0.3723774001798913,
|
6229 |
+
"eval_loss": 2.006964683532715,
|
6230 |
+
"eval_runtime": 381.2342,
|
6231 |
+
"eval_samples_per_second": 24.861,
|
6232 |
+
"eval_steps_per_second": 3.108,
|
6233 |
+
"step": 38900
|
6234 |
}
|
6235 |
],
|
6236 |
"logging_steps": 100,
|
|
|
6238 |
"num_input_tokens_seen": 0,
|
6239 |
"num_train_epochs": 30,
|
6240 |
"save_steps": 100,
|
6241 |
+
"total_flos": 4.262537185081538e+20,
|
6242 |
"train_batch_size": 8,
|
6243 |
"trial_name": null,
|
6244 |
"trial_params": null
|