Training in progress, step 11350, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step11350/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11350/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11350/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11350/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11350/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11350/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11350/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11350/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edc014425a512bfb24ea4891c4aef3f48c141f1f00d1f7453681365e76633268
|
3 |
size 29034840
|
last-checkpoint/global_step11350/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d2947716e2a3cd8e39d2e76d32b534d90a81f6c83a14945ee976472d80de4a1
|
3 |
+
size 43429616
|
last-checkpoint/global_step11350/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df1da6b792edcf70549e10834a4cdbde37daf6a9923e6c5b5e6ceed3ca00488b
|
3 |
+
size 43429616
|
last-checkpoint/global_step11350/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9ba8dfc1c45990f6894acda0e9e8db03126e7284e937f1fd690df36f6f1622a
|
3 |
+
size 43429616
|
last-checkpoint/global_step11350/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a62a7d488449956dd721a000e5908b54083da6e5b2bd4245e5cd34493db79f4
|
3 |
+
size 43429616
|
last-checkpoint/global_step11350/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a52c93770a0e5454edd3ddb7135e4abf58c57e41cf99aabfdea88c1b2b0b07b0
|
3 |
+
size 637299
|
last-checkpoint/global_step11350/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:156ed4f6bcb776fd13d543b44fba0c3e2a128c91542f1b31d9465f9a73093c5a
|
3 |
+
size 637171
|
last-checkpoint/global_step11350/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71f73472d6af07878bce0e988a637fcc524d50fd6103e17d6df8c1f33f589b98
|
3 |
+
size 637171
|
last-checkpoint/global_step11350/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9718b4569c71a0ec0a00876f7db9ff32d23ddba0def4ba29e20abd07f2204904
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step11350
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09ff6441ff368f6bad27e48a0362b77b338163b2ee1b15751fc3233cf1145ecc
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ebb93e7cba7126cd62f47fae229b37829e1e1fad42bc204362afc892225cc21
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89185906b69c940650b80ba4e630ae4cb9dd704b264f6b06e7d3bfff2d267319
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1056a5c3a428500192d5b8a2afe77cbae2afe446f795137aa2e6595314783a5c
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac8cd11960cc9e792f50280ed9e43f35e6a2f509ff57d9f6ebd92e0d5757652e
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20121,11 +20121,100 @@
|
|
20121 |
"eval_steps_per_second": 0.758,
|
20122 |
"num_input_tokens_seen": 75535760,
|
20123 |
"step": 11300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20124 |
}
|
20125 |
],
|
20126 |
"logging_steps": 5,
|
20127 |
"max_steps": 16324,
|
20128 |
-
"num_input_tokens_seen":
|
20129 |
"num_train_epochs": 2,
|
20130 |
"save_steps": 50,
|
20131 |
"stateful_callbacks": {
|
@@ -20140,7 +20229,7 @@
|
|
20140 |
"attributes": {}
|
20141 |
}
|
20142 |
},
|
20143 |
-
"total_flos":
|
20144 |
"train_batch_size": 1,
|
20145 |
"trial_name": null,
|
20146 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.3905546537625186,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 11350,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20121 |
"eval_steps_per_second": 0.758,
|
20122 |
"num_input_tokens_seen": 75535760,
|
20123 |
"step": 11300
|
20124 |
+
},
|
20125 |
+
{
|
20126 |
+
"epoch": 1.3850418057639888,
|
20127 |
+
"grad_norm": 1.6232949150301446,
|
20128 |
+
"learning_rate": 2.369587209571217e-05,
|
20129 |
+
"loss": 0.236,
|
20130 |
+
"num_input_tokens_seen": 75568840,
|
20131 |
+
"step": 11305
|
20132 |
+
},
|
20133 |
+
{
|
20134 |
+
"epoch": 1.3856543444304923,
|
20135 |
+
"grad_norm": 1.5155958228674475,
|
20136 |
+
"learning_rate": 2.3652812862641265e-05,
|
20137 |
+
"loss": 0.261,
|
20138 |
+
"num_input_tokens_seen": 75601944,
|
20139 |
+
"step": 11310
|
20140 |
+
},
|
20141 |
+
{
|
20142 |
+
"epoch": 1.3862668830969955,
|
20143 |
+
"grad_norm": 0.870646720736669,
|
20144 |
+
"learning_rate": 2.3609780664073923e-05,
|
20145 |
+
"loss": 0.2013,
|
20146 |
+
"num_input_tokens_seen": 75635768,
|
20147 |
+
"step": 11315
|
20148 |
+
},
|
20149 |
+
{
|
20150 |
+
"epoch": 1.3868794217634988,
|
20151 |
+
"grad_norm": 1.1925450250695349,
|
20152 |
+
"learning_rate": 2.3566775544164937e-05,
|
20153 |
+
"loss": 0.218,
|
20154 |
+
"num_input_tokens_seen": 75669384,
|
20155 |
+
"step": 11320
|
20156 |
+
},
|
20157 |
+
{
|
20158 |
+
"epoch": 1.3874919604300022,
|
20159 |
+
"grad_norm": 1.8228325292060459,
|
20160 |
+
"learning_rate": 2.3523797547041316e-05,
|
20161 |
+
"loss": 0.1893,
|
20162 |
+
"num_input_tokens_seen": 75703016,
|
20163 |
+
"step": 11325
|
20164 |
+
},
|
20165 |
+
{
|
20166 |
+
"epoch": 1.3881044990965055,
|
20167 |
+
"grad_norm": 1.4306894391386675,
|
20168 |
+
"learning_rate": 2.3480846716802184e-05,
|
20169 |
+
"loss": 0.2366,
|
20170 |
+
"num_input_tokens_seen": 75736336,
|
20171 |
+
"step": 11330
|
20172 |
+
},
|
20173 |
+
{
|
20174 |
+
"epoch": 1.388717037763009,
|
20175 |
+
"grad_norm": 1.19916009774191,
|
20176 |
+
"learning_rate": 2.3437923097518843e-05,
|
20177 |
+
"loss": 0.1948,
|
20178 |
+
"num_input_tokens_seen": 75770120,
|
20179 |
+
"step": 11335
|
20180 |
+
},
|
20181 |
+
{
|
20182 |
+
"epoch": 1.3893295764295122,
|
20183 |
+
"grad_norm": 1.2947975130523453,
|
20184 |
+
"learning_rate": 2.3395026733234653e-05,
|
20185 |
+
"loss": 0.2454,
|
20186 |
+
"num_input_tokens_seen": 75803520,
|
20187 |
+
"step": 11340
|
20188 |
+
},
|
20189 |
+
{
|
20190 |
+
"epoch": 1.3899421150960154,
|
20191 |
+
"grad_norm": 1.4486076958155687,
|
20192 |
+
"learning_rate": 2.3352157667965034e-05,
|
20193 |
+
"loss": 0.1971,
|
20194 |
+
"num_input_tokens_seen": 75837704,
|
20195 |
+
"step": 11345
|
20196 |
+
},
|
20197 |
+
{
|
20198 |
+
"epoch": 1.3905546537625186,
|
20199 |
+
"grad_norm": 1.490900454933061,
|
20200 |
+
"learning_rate": 2.3309315945697335e-05,
|
20201 |
+
"loss": 0.2302,
|
20202 |
+
"num_input_tokens_seen": 75870800,
|
20203 |
+
"step": 11350
|
20204 |
+
},
|
20205 |
+
{
|
20206 |
+
"epoch": 1.3905546537625186,
|
20207 |
+
"eval_loss": 0.1493845283985138,
|
20208 |
+
"eval_runtime": 19.6828,
|
20209 |
+
"eval_samples_per_second": 3.048,
|
20210 |
+
"eval_steps_per_second": 0.762,
|
20211 |
+
"num_input_tokens_seen": 75870800,
|
20212 |
+
"step": 11350
|
20213 |
}
|
20214 |
],
|
20215 |
"logging_steps": 5,
|
20216 |
"max_steps": 16324,
|
20217 |
+
"num_input_tokens_seen": 75870800,
|
20218 |
"num_train_epochs": 2,
|
20219 |
"save_steps": 50,
|
20220 |
"stateful_callbacks": {
|
|
|
20229 |
"attributes": {}
|
20230 |
}
|
20231 |
},
|
20232 |
+
"total_flos": 4758866066145280.0,
|
20233 |
"train_batch_size": 1,
|
20234 |
"trial_name": null,
|
20235 |
"trial_params": null
|