Training in progress, step 10300, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10300/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10300/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10300/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10300/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0c1907f05fc0b8c01b36cbedf199619bd14eb6d4cd5c2990d045fd2cb3d3409
|
3 |
size 29034840
|
last-checkpoint/global_step10300/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11ef07e358207e27e1d1954eebc6af831ab63dce318d5de1b541e42ece690f81
|
3 |
+
size 43429616
|
last-checkpoint/global_step10300/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b258ec8066b1370cf577f7d6d0604e182aea07a60bae3fa21e4fc63afa018b94
|
3 |
+
size 43429616
|
last-checkpoint/global_step10300/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4250fcd8b0b1a0d8f64bba04574aa2b16450c9d529f18f689ee28988bb17f65c
|
3 |
+
size 43429616
|
last-checkpoint/global_step10300/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a48a301374b65e1b896c91591ed0c9b7d3605a2881f7877519f9974d8dbfe8e
|
3 |
+
size 43429616
|
last-checkpoint/global_step10300/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4215005fe1a3796482296033d0a6c38e8658d6e335c79a9b00f561d815fb332f
|
3 |
+
size 637299
|
last-checkpoint/global_step10300/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eed3ed73ce87d6b848d4e44dc55787fde04cc382e8322e33c93deeab9032425a
|
3 |
+
size 637171
|
last-checkpoint/global_step10300/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2540aab80fd811e62e04abd40a4a7684bf6a5c6b7987273cc8a84e43342e0aac
|
3 |
+
size 637171
|
last-checkpoint/global_step10300/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a02fece188bac11c87517eaa459834f44105bc8444a477ffd8a1cf2a3276b8b
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10300
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80df60feff0acd4f14f797bf07109d6e26bd33d7850b304e4e8cef824c9108c6
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bee3071b78f92ab4f49bcb4c72f4f14b59b05a36ead63461c5e6dae16ae35ec
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ee022c06201a478aedb709f3708d8030c1b3fb5811eac43139b44c8c2a8ef92
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c270f4af9e6cbad65ece8938b424235f5542e516f3d4b9a98d8797c69ddfba5d
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fb32c56163830cfeafa65a155993bf08d0bd9b86f98eac86494ff604a54cadb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18252,11 +18252,100 @@
|
|
18252 |
"eval_steps_per_second": 0.783,
|
18253 |
"num_input_tokens_seen": 68479264,
|
18254 |
"step": 10250
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18255 |
}
|
18256 |
],
|
18257 |
"logging_steps": 5,
|
18258 |
"max_steps": 16324,
|
18259 |
-
"num_input_tokens_seen":
|
18260 |
"num_train_epochs": 2,
|
18261 |
"save_steps": 50,
|
18262 |
"stateful_callbacks": {
|
@@ -18271,7 +18360,7 @@
|
|
18271 |
"attributes": {}
|
18272 |
}
|
18273 |
},
|
18274 |
-
"total_flos":
|
18275 |
"train_batch_size": 1,
|
18276 |
"trial_name": null,
|
18277 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.261921533796821,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10300,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18252 |
"eval_steps_per_second": 0.783,
|
18253 |
"num_input_tokens_seen": 68479264,
|
18254 |
"step": 10250
|
18255 |
+
},
|
18256 |
+
{
|
18257 |
+
"epoch": 1.256408685798291,
|
18258 |
+
"grad_norm": 1.0352761658466105,
|
18259 |
+
"learning_rate": 3.326597947513025e-05,
|
18260 |
+
"loss": 0.1938,
|
18261 |
+
"num_input_tokens_seen": 68513176,
|
18262 |
+
"step": 10255
|
18263 |
+
},
|
18264 |
+
{
|
18265 |
+
"epoch": 1.2570212244647943,
|
18266 |
+
"grad_norm": 1.0777283789352847,
|
18267 |
+
"learning_rate": 3.321826087554129e-05,
|
18268 |
+
"loss": 0.2257,
|
18269 |
+
"num_input_tokens_seen": 68547256,
|
18270 |
+
"step": 10260
|
18271 |
+
},
|
18272 |
+
{
|
18273 |
+
"epoch": 1.2576337631312977,
|
18274 |
+
"grad_norm": 1.3423245290250527,
|
18275 |
+
"learning_rate": 3.317055949547503e-05,
|
18276 |
+
"loss": 0.2599,
|
18277 |
+
"num_input_tokens_seen": 68580448,
|
18278 |
+
"step": 10265
|
18279 |
+
},
|
18280 |
+
{
|
18281 |
+
"epoch": 1.258246301797801,
|
18282 |
+
"grad_norm": 1.4455392387346562,
|
18283 |
+
"learning_rate": 3.3122875383877194e-05,
|
18284 |
+
"loss": 0.2444,
|
18285 |
+
"num_input_tokens_seen": 68614144,
|
18286 |
+
"step": 10270
|
18287 |
+
},
|
18288 |
+
{
|
18289 |
+
"epoch": 1.2588588404643044,
|
18290 |
+
"grad_norm": 1.1766802710720077,
|
18291 |
+
"learning_rate": 3.307520858967586e-05,
|
18292 |
+
"loss": 0.2425,
|
18293 |
+
"num_input_tokens_seen": 68647664,
|
18294 |
+
"step": 10275
|
18295 |
+
},
|
18296 |
+
{
|
18297 |
+
"epoch": 1.2594713791308076,
|
18298 |
+
"grad_norm": 0.8034532858902004,
|
18299 |
+
"learning_rate": 3.302755916178128e-05,
|
18300 |
+
"loss": 0.2108,
|
18301 |
+
"num_input_tokens_seen": 68681192,
|
18302 |
+
"step": 10280
|
18303 |
+
},
|
18304 |
+
{
|
18305 |
+
"epoch": 1.2600839177973109,
|
18306 |
+
"grad_norm": 1.187551767143693,
|
18307 |
+
"learning_rate": 3.297992714908589e-05,
|
18308 |
+
"loss": 0.2493,
|
18309 |
+
"num_input_tokens_seen": 68714600,
|
18310 |
+
"step": 10285
|
18311 |
+
},
|
18312 |
+
{
|
18313 |
+
"epoch": 1.2606964564638143,
|
18314 |
+
"grad_norm": 1.2626992038766227,
|
18315 |
+
"learning_rate": 3.293231260046431e-05,
|
18316 |
+
"loss": 0.2286,
|
18317 |
+
"num_input_tokens_seen": 68748432,
|
18318 |
+
"step": 10290
|
18319 |
+
},
|
18320 |
+
{
|
18321 |
+
"epoch": 1.2613089951303176,
|
18322 |
+
"grad_norm": 0.9648518095639778,
|
18323 |
+
"learning_rate": 3.288471556477317e-05,
|
18324 |
+
"loss": 0.2285,
|
18325 |
+
"num_input_tokens_seen": 68781728,
|
18326 |
+
"step": 10295
|
18327 |
+
},
|
18328 |
+
{
|
18329 |
+
"epoch": 1.261921533796821,
|
18330 |
+
"grad_norm": 1.4967036865983903,
|
18331 |
+
"learning_rate": 3.2837136090851205e-05,
|
18332 |
+
"loss": 0.2253,
|
18333 |
+
"num_input_tokens_seen": 68815336,
|
18334 |
+
"step": 10300
|
18335 |
+
},
|
18336 |
+
{
|
18337 |
+
"epoch": 1.261921533796821,
|
18338 |
+
"eval_loss": 0.2143474966287613,
|
18339 |
+
"eval_runtime": 19.5855,
|
18340 |
+
"eval_samples_per_second": 3.063,
|
18341 |
+
"eval_steps_per_second": 0.766,
|
18342 |
+
"num_input_tokens_seen": 68815336,
|
18343 |
+
"step": 10300
|
18344 |
}
|
18345 |
],
|
18346 |
"logging_steps": 5,
|
18347 |
"max_steps": 16324,
|
18348 |
+
"num_input_tokens_seen": 68815336,
|
18349 |
"num_train_epochs": 2,
|
18350 |
"save_steps": 50,
|
18351 |
"stateful_callbacks": {
|
|
|
18360 |
"attributes": {}
|
18361 |
}
|
18362 |
},
|
18363 |
+
"total_flos": 4316442486636544.0,
|
18364 |
"train_batch_size": 1,
|
18365 |
"trial_name": null,
|
18366 |
"trial_params": null
|