Training in progress, step 10950, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10950/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10950/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10950/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10950/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10950/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10950/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10950/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10950/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98c036d65163318c461386add3e9a6b55936c9a9141b7e5a1e65f6def18c2199
|
3 |
size 29034840
|
last-checkpoint/global_step10950/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87fd1c41ae097254550e929ca23527bdc8d2fe3073be20f2de7b1f59318a9c96
|
3 |
+
size 43429616
|
last-checkpoint/global_step10950/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8cae2538967d347079863afb5a69647232abbcf4e31739b09681699fa45b1a9
|
3 |
+
size 43429616
|
last-checkpoint/global_step10950/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09b3f0d978a51a7550c19cd04d98d31279de7625c20045fde35d12d5e9d7db8d
|
3 |
+
size 43429616
|
last-checkpoint/global_step10950/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48d89c85a3cc1699bdd64b25d5b44a50be07c0673fe290836d1565d1b076c7f1
|
3 |
+
size 43429616
|
last-checkpoint/global_step10950/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a558d2a4d35b47b9bb670d55948d70cb7033b95909e01f44215a89d756da85bd
|
3 |
+
size 637299
|
last-checkpoint/global_step10950/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3366832819c6f1ed7ca05f32ba04fbaac85eb03706683436c0969fa3505aee2
|
3 |
+
size 637171
|
last-checkpoint/global_step10950/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db868ac7d105bf7b12c826e45297df4cba8aa7f75d4da07f56b9973c984b8c3b
|
3 |
+
size 637171
|
last-checkpoint/global_step10950/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccbf98124b46d9924a152fc3a99394bbcfdedadbfe5d15b47aff6f2a36a68b13
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10950
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1d9cae1611b2d8771a62a3be454537a0b72fbb4bf4ca6fea33e0c56b6859ab6
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e3c589c6ac8a418b8315f1184a361f94d4f49b42155a7ecec4863756dfc8f7f
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74ff9439ece28b5e9595dddf4b3df7e89d5ad92e520d5dcda0e80a87255988d7
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27bdd2ec35c42c65c6e26dc3745183ee55697e5294c7dca877a032e64b43096c
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fec3130c6df89734c8097f34b9622380c84a5271448cf66167ee69b0346a2cb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -19409,11 +19409,100 @@
|
|
19409 |
"eval_steps_per_second": 0.765,
|
19410 |
"num_input_tokens_seen": 72850512,
|
19411 |
"step": 10900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19412 |
}
|
19413 |
],
|
19414 |
"logging_steps": 5,
|
19415 |
"max_steps": 16324,
|
19416 |
-
"num_input_tokens_seen":
|
19417 |
"num_train_epochs": 2,
|
19418 |
"save_steps": 50,
|
19419 |
"stateful_callbacks": {
|
@@ -19428,7 +19517,7 @@
|
|
19428 |
"attributes": {}
|
19429 |
}
|
19430 |
},
|
19431 |
-
"total_flos":
|
19432 |
"train_batch_size": 1,
|
19433 |
"trial_name": null,
|
19434 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.341551560442253,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10950,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
19409 |
"eval_steps_per_second": 0.765,
|
19410 |
"num_input_tokens_seen": 72850512,
|
19411 |
"step": 10900
|
19412 |
+
},
|
19413 |
+
{
|
19414 |
+
"epoch": 1.336038712443723,
|
19415 |
+
"grad_norm": 1.3265511756999933,
|
19416 |
+
"learning_rate": 2.7224242537782056e-05,
|
19417 |
+
"loss": 0.1994,
|
19418 |
+
"num_input_tokens_seen": 72884432,
|
19419 |
+
"step": 10905
|
19420 |
+
},
|
19421 |
+
{
|
19422 |
+
"epoch": 1.3366512511102262,
|
19423 |
+
"grad_norm": 1.2264412034005454,
|
19424 |
+
"learning_rate": 2.7179165982475924e-05,
|
19425 |
+
"loss": 0.2839,
|
19426 |
+
"num_input_tokens_seen": 72917608,
|
19427 |
+
"step": 10910
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 1.3372637897767297,
|
19431 |
+
"grad_norm": 1.5094223269822984,
|
19432 |
+
"learning_rate": 2.713411284332863e-05,
|
19433 |
+
"loss": 0.186,
|
19434 |
+
"num_input_tokens_seen": 72952000,
|
19435 |
+
"step": 10915
|
19436 |
+
},
|
19437 |
+
{
|
19438 |
+
"epoch": 1.337876328443233,
|
19439 |
+
"grad_norm": 0.8664736323227381,
|
19440 |
+
"learning_rate": 2.708908316656863e-05,
|
19441 |
+
"loss": 0.2018,
|
19442 |
+
"num_input_tokens_seen": 72985840,
|
19443 |
+
"step": 10920
|
19444 |
+
},
|
19445 |
+
{
|
19446 |
+
"epoch": 1.3384888671097364,
|
19447 |
+
"grad_norm": 1.317958833725117,
|
19448 |
+
"learning_rate": 2.7044076998400247e-05,
|
19449 |
+
"loss": 0.2494,
|
19450 |
+
"num_input_tokens_seen": 73019136,
|
19451 |
+
"step": 10925
|
19452 |
+
},
|
19453 |
+
{
|
19454 |
+
"epoch": 1.3391014057762396,
|
19455 |
+
"grad_norm": 1.6578104286015414,
|
19456 |
+
"learning_rate": 2.6999094385003743e-05,
|
19457 |
+
"loss": 0.199,
|
19458 |
+
"num_input_tokens_seen": 73053088,
|
19459 |
+
"step": 10930
|
19460 |
+
},
|
19461 |
+
{
|
19462 |
+
"epoch": 1.3397139444427428,
|
19463 |
+
"grad_norm": 0.8606284496425388,
|
19464 |
+
"learning_rate": 2.69541353725352e-05,
|
19465 |
+
"loss": 0.288,
|
19466 |
+
"num_input_tokens_seen": 73086296,
|
19467 |
+
"step": 10935
|
19468 |
+
},
|
19469 |
+
{
|
19470 |
+
"epoch": 1.3403264831092463,
|
19471 |
+
"grad_norm": 0.9567441511611826,
|
19472 |
+
"learning_rate": 2.690920000712644e-05,
|
19473 |
+
"loss": 0.1766,
|
19474 |
+
"num_input_tokens_seen": 73120432,
|
19475 |
+
"step": 10940
|
19476 |
+
},
|
19477 |
+
{
|
19478 |
+
"epoch": 1.3409390217757495,
|
19479 |
+
"grad_norm": 1.3031676970273844,
|
19480 |
+
"learning_rate": 2.6864288334885067e-05,
|
19481 |
+
"loss": 0.2227,
|
19482 |
+
"num_input_tokens_seen": 73153912,
|
19483 |
+
"step": 10945
|
19484 |
+
},
|
19485 |
+
{
|
19486 |
+
"epoch": 1.341551560442253,
|
19487 |
+
"grad_norm": 1.1674568949705564,
|
19488 |
+
"learning_rate": 2.6819400401894385e-05,
|
19489 |
+
"loss": 0.2192,
|
19490 |
+
"num_input_tokens_seen": 73187184,
|
19491 |
+
"step": 10950
|
19492 |
+
},
|
19493 |
+
{
|
19494 |
+
"epoch": 1.341551560442253,
|
19495 |
+
"eval_loss": 0.21316958963871002,
|
19496 |
+
"eval_runtime": 19.8522,
|
19497 |
+
"eval_samples_per_second": 3.022,
|
19498 |
+
"eval_steps_per_second": 0.756,
|
19499 |
+
"num_input_tokens_seen": 73187184,
|
19500 |
+
"step": 10950
|
19501 |
}
|
19502 |
],
|
19503 |
"logging_steps": 5,
|
19504 |
"max_steps": 16324,
|
19505 |
+
"num_input_tokens_seen": 73187184,
|
19506 |
"num_train_epochs": 2,
|
19507 |
"save_steps": 50,
|
19508 |
"stateful_callbacks": {
|
|
|
19517 |
"attributes": {}
|
19518 |
}
|
19519 |
},
|
19520 |
+
"total_flos": 4590614816751616.0,
|
19521 |
"train_batch_size": 1,
|
19522 |
"trial_name": null,
|
19523 |
"trial_params": null
|