Training in progress, step 808, checkpoint
Browse files- last-checkpoint/optimizer_0/.metadata +0 -0
- last-checkpoint/optimizer_0/__0_0.distcp +1 -1
- last-checkpoint/optimizer_0/__1_0.distcp +1 -1
- last-checkpoint/optimizer_0/__2_0.distcp +1 -1
- last-checkpoint/optimizer_0/__3_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/.metadata +0 -0
- last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp +1 -1
- last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +60 -4
last-checkpoint/optimizer_0/.metadata
CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
|
|
last-checkpoint/optimizer_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13934748
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d79a8a24bad92ed6eddb4f1a5ae34147ccedd0938830a31dd4b49d4107b25fe
|
3 |
size 13934748
|
last-checkpoint/optimizer_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13999412
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19d5894029ff5fb8c3f198f3d0dc4721910b9e651dcf6775ef035747ababd49d
|
3 |
size 13999412
|
last-checkpoint/optimizer_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72f89593cdfdb1d0d63e29c7655dd6926cfa8d7de6f64ba99861afa048925281
|
3 |
size 13990904
|
last-checkpoint/optimizer_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 13990904
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1de76ba031b2df70290abcc273c0ca7c76e5324bf347df6f17f34aee7dc63d1c
|
3 |
size 13990904
|
last-checkpoint/pytorch_model_fsdp_0/.metadata
CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
|
|
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26f35c31737247f2cdd166bfc3866830e294dca3791e7c58a4a4c7da0bde1664
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee274d658c2cd7d90c2cb7a6e847197c382e7b20d51d4d940ab01b8f68c15df2
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43d339baab952c823a939ba4aaa601da58042e6b41d474cb9dd88a0b1db4d424
|
3 |
size 6966784
|
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6966784
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8564d765df3996b99c746bc714c38fa581f05f6f7bd8afb7323b7cee52b46db5
|
3 |
size 6966784
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5267e6651ba6650aad8d4187f305b173ad364e0765707ff4829b4e5b285e5d5
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb6e8a7a5edcac0f188611ba47c80f2c5826e8fdf1f9517cb0c11df9afe649c
|
3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81b06ae580a53262ce36ae07c3fb3c1ed4571ad7e635f838d2bd9d98ff1de4da
|
3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c23b8d462da5c4f7f12b2321b9990502056ba1dfc5c41ddf990e0cf27e4f862b
|
3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df74225d229428c183e5117a04f191bf9faa39263780150f0dc259da5480c864
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 20,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6099,6 +6099,62 @@
|
|
6099 |
"eval_samples_per_second": 5.734,
|
6100 |
"eval_steps_per_second": 0.187,
|
6101 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6102 |
}
|
6103 |
],
|
6104 |
"logging_steps": 1,
|
@@ -6113,12 +6169,12 @@
|
|
6113 |
"should_evaluate": false,
|
6114 |
"should_log": false,
|
6115 |
"should_save": true,
|
6116 |
-
"should_training_stop":
|
6117 |
},
|
6118 |
"attributes": {}
|
6119 |
}
|
6120 |
},
|
6121 |
-
"total_flos": 2.
|
6122 |
"train_batch_size": 8,
|
6123 |
"trial_name": null,
|
6124 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 20,
|
6 |
+
"global_step": 808,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6099 |
"eval_samples_per_second": 5.734,
|
6100 |
"eval_steps_per_second": 0.187,
|
6101 |
"step": 800
|
6102 |
+
},
|
6103 |
+
{
|
6104 |
+
"epoch": 0.9913366336633663,
|
6105 |
+
"grad_norm": 5.27916955947876,
|
6106 |
+
"learning_rate": 4.574713411816811e-09,
|
6107 |
+
"loss": 0.1858,
|
6108 |
+
"step": 801
|
6109 |
+
},
|
6110 |
+
{
|
6111 |
+
"epoch": 0.9925742574257426,
|
6112 |
+
"grad_norm": 5.716527462005615,
|
6113 |
+
"learning_rate": 3.361081929664778e-09,
|
6114 |
+
"loss": 0.1604,
|
6115 |
+
"step": 802
|
6116 |
+
},
|
6117 |
+
{
|
6118 |
+
"epoch": 0.9938118811881188,
|
6119 |
+
"grad_norm": 5.259273529052734,
|
6120 |
+
"learning_rate": 2.3341246279806606e-09,
|
6121 |
+
"loss": 0.1598,
|
6122 |
+
"step": 803
|
6123 |
+
},
|
6124 |
+
{
|
6125 |
+
"epoch": 0.995049504950495,
|
6126 |
+
"grad_norm": 4.770205974578857,
|
6127 |
+
"learning_rate": 1.493860683851045e-09,
|
6128 |
+
"loss": 0.1981,
|
6129 |
+
"step": 804
|
6130 |
+
},
|
6131 |
+
{
|
6132 |
+
"epoch": 0.9962871287128713,
|
6133 |
+
"grad_norm": 3.5331854820251465,
|
6134 |
+
"learning_rate": 8.403057881067877e-10,
|
6135 |
+
"loss": 0.1671,
|
6136 |
+
"step": 805
|
6137 |
+
},
|
6138 |
+
{
|
6139 |
+
"epoch": 0.9975247524752475,
|
6140 |
+
"grad_norm": 4.290172576904297,
|
6141 |
+
"learning_rate": 3.7347214503435927e-10,
|
6142 |
+
"loss": 0.2307,
|
6143 |
+
"step": 806
|
6144 |
+
},
|
6145 |
+
{
|
6146 |
+
"epoch": 0.9987623762376238,
|
6147 |
+
"grad_norm": 4.1493964195251465,
|
6148 |
+
"learning_rate": 9.33684721426964e-11,
|
6149 |
+
"loss": 0.1106,
|
6150 |
+
"step": 807
|
6151 |
+
},
|
6152 |
+
{
|
6153 |
+
"epoch": 1.0,
|
6154 |
+
"grad_norm": 3.5634615421295166,
|
6155 |
+
"learning_rate": 0.0,
|
6156 |
+
"loss": 0.1396,
|
6157 |
+
"step": 808
|
6158 |
}
|
6159 |
],
|
6160 |
"logging_steps": 1,
|
|
|
6169 |
"should_evaluate": false,
|
6170 |
"should_log": false,
|
6171 |
"should_save": true,
|
6172 |
+
"should_training_stop": true
|
6173 |
},
|
6174 |
"attributes": {}
|
6175 |
}
|
6176 |
},
|
6177 |
+
"total_flos": 2.4786605370559693e+17,
|
6178 |
"train_batch_size": 8,
|
6179 |
"trial_name": null,
|
6180 |
"trial_params": null
|