Training in progress, step 35700, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:977a5c69feae10b912f571de7977009febbf01a0eaa266424ca31544fa3061d4
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2957ac18ba2253b8c409a29674b89329d32e320d7a98ce2b81ee8d57641bc16
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b3b488252dd3d31bc05954dcc0188157b4ef2c211110990aacba94eb2fa5123
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1edf5ffcac2b5407479ae34e05195359730147dee5af532317f0b039b8c166f2
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a8b1f07cc0159a990d1350d3b8aacd4d203ba9556ec73cabc9467bc15b486a5
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5687,6 +5687,38 @@
|
|
5687 |
"eval_samples_per_second": 25.72,
|
5688 |
"eval_steps_per_second": 3.216,
|
5689 |
"step": 35500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5690 |
}
|
5691 |
],
|
5692 |
"logging_steps": 100,
|
@@ -5694,7 +5726,7 @@
|
|
5694 |
"num_input_tokens_seen": 0,
|
5695 |
"num_train_epochs": 30,
|
5696 |
"save_steps": 100,
|
5697 |
-
"total_flos": 3.
|
5698 |
"train_batch_size": 8,
|
5699 |
"trial_name": null,
|
5700 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 15.028415070511471,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 35700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5687 |
"eval_samples_per_second": 25.72,
|
5688 |
"eval_steps_per_second": 3.216,
|
5689 |
"step": 35500
|
5690 |
+
},
|
5691 |
+
{
|
5692 |
+
"epoch": 14.99,
|
5693 |
+
"grad_norm": 3.9734749794006348,
|
5694 |
+
"learning_rate": 2.520706713780919e-05,
|
5695 |
+
"loss": 0.9122,
|
5696 |
+
"step": 35600
|
5697 |
+
},
|
5698 |
+
{
|
5699 |
+
"epoch": 14.99,
|
5700 |
+
"eval_cer": 0.3868713581791874,
|
5701 |
+
"eval_loss": 2.068138360977173,
|
5702 |
+
"eval_runtime": 401.8562,
|
5703 |
+
"eval_samples_per_second": 23.586,
|
5704 |
+
"eval_steps_per_second": 2.949,
|
5705 |
+
"step": 35600
|
5706 |
+
},
|
5707 |
+
{
|
5708 |
+
"epoch": 15.03,
|
5709 |
+
"grad_norm": 1.6432957649230957,
|
5710 |
+
"learning_rate": 2.513639575971732e-05,
|
5711 |
+
"loss": 0.872,
|
5712 |
+
"step": 35700
|
5713 |
+
},
|
5714 |
+
{
|
5715 |
+
"epoch": 15.03,
|
5716 |
+
"eval_cer": 0.3834226271948692,
|
5717 |
+
"eval_loss": 2.0865533351898193,
|
5718 |
+
"eval_runtime": 372.0301,
|
5719 |
+
"eval_samples_per_second": 25.476,
|
5720 |
+
"eval_steps_per_second": 3.185,
|
5721 |
+
"step": 35700
|
5722 |
}
|
5723 |
],
|
5724 |
"logging_steps": 100,
|
|
|
5726 |
"num_input_tokens_seen": 0,
|
5727 |
"num_train_epochs": 30,
|
5728 |
"save_steps": 100,
|
5729 |
+
"total_flos": 3.9129036959681564e+20,
|
5730 |
"train_batch_size": 8,
|
5731 |
"trial_name": null,
|
5732 |
"trial_params": null
|