Training in progress, step 38100, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad0e4d057edfb8f7f24491b899369f7b93228a9e17e1354d5f644333b4e1576d
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfe756275a5388124bc2228feeaaf7ef82b935e701eb728a6a11413b046929db
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d38f9132e2085616601917cca2b9dcaa50f30da1f10838a4c93930581540ab0
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f87a00bf8137824c3bb0753019893bcf791dd44d13ce34dc225bf7f0963a1806
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbe3a461b22634f6c029dd4cb55ac1a5510596e6da69170fbde554202a3b4b05
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6039,6 +6039,70 @@
|
|
6039 |
"eval_samples_per_second": 25.393,
|
6040 |
"eval_steps_per_second": 3.175,
|
6041 |
"step": 37700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6042 |
}
|
6043 |
],
|
6044 |
"logging_steps": 100,
|
@@ -6046,7 +6110,7 @@
|
|
6046 |
"num_input_tokens_seen": 0,
|
6047 |
"num_train_epochs": 30,
|
6048 |
"save_steps": 100,
|
6049 |
-
"total_flos": 4.
|
6050 |
"train_batch_size": 8,
|
6051 |
"trial_name": null,
|
6052 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 16.038728688697116,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 38100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6039 |
"eval_samples_per_second": 25.393,
|
6040 |
"eval_steps_per_second": 3.175,
|
6041 |
"step": 37700
|
6042 |
+
},
|
6043 |
+
{
|
6044 |
+
"epoch": 15.91,
|
6045 |
+
"grad_norm": 17.767349243164062,
|
6046 |
+
"learning_rate": 2.3652296819787986e-05,
|
6047 |
+
"loss": 0.8418,
|
6048 |
+
"step": 37800
|
6049 |
+
},
|
6050 |
+
{
|
6051 |
+
"epoch": 15.91,
|
6052 |
+
"eval_cer": 0.37160748504164876,
|
6053 |
+
"eval_loss": 1.7542308568954468,
|
6054 |
+
"eval_runtime": 389.6677,
|
6055 |
+
"eval_samples_per_second": 24.323,
|
6056 |
+
"eval_steps_per_second": 3.041,
|
6057 |
+
"step": 37800
|
6058 |
+
},
|
6059 |
+
{
|
6060 |
+
"epoch": 15.95,
|
6061 |
+
"grad_norm": 6.326321601867676,
|
6062 |
+
"learning_rate": 2.3581625441696114e-05,
|
6063 |
+
"loss": 0.8193,
|
6064 |
+
"step": 37900
|
6065 |
+
},
|
6066 |
+
{
|
6067 |
+
"epoch": 15.95,
|
6068 |
+
"eval_cer": 0.3725704900082124,
|
6069 |
+
"eval_loss": 2.059025764465332,
|
6070 |
+
"eval_runtime": 377.283,
|
6071 |
+
"eval_samples_per_second": 25.122,
|
6072 |
+
"eval_steps_per_second": 3.141,
|
6073 |
+
"step": 37900
|
6074 |
+
},
|
6075 |
+
{
|
6076 |
+
"epoch": 16.0,
|
6077 |
+
"grad_norm": 2.2139954566955566,
|
6078 |
+
"learning_rate": 2.351095406360424e-05,
|
6079 |
+
"loss": 0.8378,
|
6080 |
+
"step": 38000
|
6081 |
+
},
|
6082 |
+
{
|
6083 |
+
"epoch": 16.0,
|
6084 |
+
"eval_cer": 0.37425452661217784,
|
6085 |
+
"eval_loss": 1.9064280986785889,
|
6086 |
+
"eval_runtime": 411.9001,
|
6087 |
+
"eval_samples_per_second": 23.01,
|
6088 |
+
"eval_steps_per_second": 2.877,
|
6089 |
+
"step": 38000
|
6090 |
+
},
|
6091 |
+
{
|
6092 |
+
"epoch": 16.04,
|
6093 |
+
"grad_norm": 2.1273019313812256,
|
6094 |
+
"learning_rate": 2.3440282685512367e-05,
|
6095 |
+
"loss": 0.7478,
|
6096 |
+
"step": 38100
|
6097 |
+
},
|
6098 |
+
{
|
6099 |
+
"epoch": 16.04,
|
6100 |
+
"eval_cer": 0.3710159946814751,
|
6101 |
+
"eval_loss": 1.7977242469787598,
|
6102 |
+
"eval_runtime": 380.1262,
|
6103 |
+
"eval_samples_per_second": 24.934,
|
6104 |
+
"eval_steps_per_second": 3.117,
|
6105 |
+
"step": 38100
|
6106 |
}
|
6107 |
],
|
6108 |
"logging_steps": 100,
|
|
|
6110 |
"num_input_tokens_seen": 0,
|
6111 |
"num_train_epochs": 30,
|
6112 |
"save_steps": 100,
|
6113 |
+
"total_flos": 4.175077130259511e+20,
|
6114 |
"train_batch_size": 8,
|
6115 |
"trial_name": null,
|
6116 |
"trial_params": null
|