Training in progress, step 25300, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92d0f54d94bb40c36aff6996d6fa628c3ad8f35db1778fc27c392e8f1f297c37
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e11244be255828a4851b0092fd87821c5c801f9a5160bbcbb2f0bc57051dd6c0
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67169462303e6fe8077b6f06672390845332995ce37eb008c9c0bf1f85691316
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14567
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95db0728543ded7d217ea539348d5e5e4f1dbdf964d9771ae6b95d5606964667
|
3 |
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:917056f6954f4dd224b2ffcefa138533388ad3cff19d51f30a910abd911033b3
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4023,6 +4023,38 @@
|
|
4023 |
"eval_samples_per_second": 26.051,
|
4024 |
"eval_steps_per_second": 3.257,
|
4025 |
"step": 25100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4026 |
}
|
4027 |
],
|
4028 |
"logging_steps": 100,
|
@@ -4030,7 +4062,7 @@
|
|
4030 |
"num_input_tokens_seen": 0,
|
4031 |
"num_train_epochs": 30,
|
4032 |
"save_steps": 100,
|
4033 |
-
"total_flos": 2.
|
4034 |
"train_batch_size": 8,
|
4035 |
"trial_name": null,
|
4036 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.650389391707009,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 25300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4023 |
"eval_samples_per_second": 26.051,
|
4024 |
"eval_steps_per_second": 3.257,
|
4025 |
"step": 25100
|
4026 |
+
},
|
4027 |
+
{
|
4028 |
+
"epoch": 10.61,
|
4029 |
+
"grad_norm": 2.433943033218384,
|
4030 |
+
"learning_rate": 3.255406360424029e-05,
|
4031 |
+
"loss": 1.186,
|
4032 |
+
"step": 25200
|
4033 |
+
},
|
4034 |
+
{
|
4035 |
+
"epoch": 10.61,
|
4036 |
+
"eval_cer": 0.4293755621602597,
|
4037 |
+
"eval_loss": 2.9843032360076904,
|
4038 |
+
"eval_runtime": 383.8866,
|
4039 |
+
"eval_samples_per_second": 24.69,
|
4040 |
+
"eval_steps_per_second": 3.087,
|
4041 |
+
"step": 25200
|
4042 |
+
},
|
4043 |
+
{
|
4044 |
+
"epoch": 10.65,
|
4045 |
+
"grad_norm": 2.10840106010437,
|
4046 |
+
"learning_rate": 3.248339222614841e-05,
|
4047 |
+
"loss": 1.2008,
|
4048 |
+
"step": 25300
|
4049 |
+
},
|
4050 |
+
{
|
4051 |
+
"epoch": 10.65,
|
4052 |
+
"eval_cer": 0.42626657150678504,
|
4053 |
+
"eval_loss": 2.721179485321045,
|
4054 |
+
"eval_runtime": 367.0761,
|
4055 |
+
"eval_samples_per_second": 25.82,
|
4056 |
+
"eval_steps_per_second": 3.228,
|
4057 |
+
"step": 25300
|
4058 |
}
|
4059 |
],
|
4060 |
"logging_steps": 100,
|
|
|
4062 |
"num_input_tokens_seen": 0,
|
4063 |
"num_train_epochs": 30,
|
4064 |
"save_steps": 100,
|
4065 |
+
"total_flos": 2.772097477056956e+20,
|
4066 |
"train_batch_size": 8,
|
4067 |
"trial_name": null,
|
4068 |
"trial_params": null
|