Training in progress, step 2109, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +67 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37ffda79ba118623656f8db97819ae6e85518aa69d76fdc791db100043e48be3
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed8e07a5506949e411645ce87a449f3891f4b957ab0ddc49de3f672874f62723
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5b48b159ad925162a1fb4fdb564c7822ef0b8faee2ca4553b90be22401cfc27
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74d8401662575808efa6d4f055be82b79a2163808e88edd675b8c2d69ffec243
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:130e8e526dc3be989ce9e1461bf9ba00c6c3268dd82397cdb981f3011ad5d2d8
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab35ef8e239aeb1a02ce2664693e6a54a69dc22deb0f8cc4e9d4aadcd8208b19
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c02ee9352c0cf1e8394f869f2bc7131d250afe60e955f4919648fd5da62223c
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6df05302d7fd7f51f68396fc2bb6ad042d2a59fc7d68c901ad21562911ed44e8
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa3642bf088423aa8179accacc32785bc88d1c8451ac1c553aa395bd19b6282d
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85938c29325af8d2e4e3e20d49a2776c8cdfd754b3f6d2de7d434efbe1e18dc2
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c76ba06042e26108cca2c1434deb9e836ac081642cac834e23811c5b667f8b38
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53cd2af488fd8da1bd97f5b13b847fe755bce635dec1434cf52aae85b3dad708
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2e5684d7b341caf8fde096d7c7ea6b392a41b77ac783f1df1a641a78a0c87fb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -15979,6 +15979,69 @@
|
|
15979 |
"eval_samples_per_second": 5.418,
|
15980 |
"eval_steps_per_second": 0.18,
|
15981 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15982 |
}
|
15983 |
],
|
15984 |
"logging_steps": 1,
|
@@ -15993,12 +16056,12 @@
|
|
15993 |
"should_evaluate": false,
|
15994 |
"should_log": false,
|
15995 |
"should_save": true,
|
15996 |
-
"should_training_stop":
|
15997 |
},
|
15998 |
"attributes": {}
|
15999 |
}
|
16000 |
},
|
16001 |
-
"total_flos": 5.
|
16002 |
"train_batch_size": 8,
|
16003 |
"trial_name": null,
|
16004 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997629770087698,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 2109,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
15979 |
"eval_samples_per_second": 5.418,
|
15980 |
"eval_steps_per_second": 0.18,
|
15981 |
"step": 2100
|
15982 |
+
},
|
15983 |
+
{
|
15984 |
+
"epoch": 0.9959706091490874,
|
15985 |
+
"grad_norm": 4.999462127685547,
|
15986 |
+
"learning_rate": 8.766999093690054e-10,
|
15987 |
+
"loss": 0.139,
|
15988 |
+
"step": 2101
|
15989 |
+
},
|
15990 |
+
{
|
15991 |
+
"epoch": 0.9964446551315478,
|
15992 |
+
"grad_norm": 7.40416145324707,
|
15993 |
+
"learning_rate": 6.71225666831754e-10,
|
15994 |
+
"loss": 0.1862,
|
15995 |
+
"step": 2102
|
15996 |
+
},
|
15997 |
+
{
|
15998 |
+
"epoch": 0.996918701114008,
|
15999 |
+
"grad_norm": 3.0805890560150146,
|
16000 |
+
"learning_rate": 4.931468515556593e-10,
|
16001 |
+
"loss": 0.1179,
|
16002 |
+
"step": 2103
|
16003 |
+
},
|
16004 |
+
{
|
16005 |
+
"epoch": 0.9973927470964684,
|
16006 |
+
"grad_norm": 3.0441319942474365,
|
16007 |
+
"learning_rate": 3.4246395142822906e-10,
|
16008 |
+
"loss": 0.1261,
|
16009 |
+
"step": 2104
|
16010 |
+
},
|
16011 |
+
{
|
16012 |
+
"epoch": 0.9978667930789287,
|
16013 |
+
"grad_norm": 3.2093586921691895,
|
16014 |
+
"learning_rate": 2.1917737927812377e-10,
|
16015 |
+
"loss": 0.0813,
|
16016 |
+
"step": 2105
|
16017 |
+
},
|
16018 |
+
{
|
16019 |
+
"epoch": 0.9983408390613889,
|
16020 |
+
"grad_norm": 5.661715507507324,
|
16021 |
+
"learning_rate": 1.2328747287848609e-10,
|
16022 |
+
"loss": 0.1345,
|
16023 |
+
"step": 2106
|
16024 |
+
},
|
16025 |
+
{
|
16026 |
+
"epoch": 0.9988148850438493,
|
16027 |
+
"grad_norm": 3.4387176036834717,
|
16028 |
+
"learning_rate": 5.4794494941390333e-11,
|
16029 |
+
"loss": 0.1216,
|
16030 |
+
"step": 2107
|
16031 |
+
},
|
16032 |
+
{
|
16033 |
+
"epoch": 0.9992889310263096,
|
16034 |
+
"grad_norm": 3.1428146362304688,
|
16035 |
+
"learning_rate": 1.3698633117842365e-11,
|
16036 |
+
"loss": 0.1275,
|
16037 |
+
"step": 2108
|
16038 |
+
},
|
16039 |
+
{
|
16040 |
+
"epoch": 0.9997629770087698,
|
16041 |
+
"grad_norm": 8.101336479187012,
|
16042 |
+
"learning_rate": 0.0,
|
16043 |
+
"loss": 0.2897,
|
16044 |
+
"step": 2109
|
16045 |
}
|
16046 |
],
|
16047 |
"logging_steps": 1,
|
|
|
16056 |
"should_evaluate": false,
|
16057 |
"should_log": false,
|
16058 |
"should_save": true,
|
16059 |
+
"should_training_stop": true
|
16060 |
},
|
16061 |
"attributes": {}
|
16062 |
}
|
16063 |
},
|
16064 |
+
"total_flos": 5.63300984822956e+17,
|
16065 |
"train_batch_size": 8,
|
16066 |
"trial_name": null,
|
16067 |
"trial_params": null
|