Training in progress, step 12150, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step12150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step12150/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12150/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12150/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step12150/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af0f9e937a2dbaf537ac173984164282e59a37c80c7e58522d9ee4148ff9c733
|
3 |
size 29034840
|
last-checkpoint/global_step12150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:037eec4cdbc368d17f6cfec3a783c8058303de3373aa2a91549f2ebabe7b1f29
|
3 |
+
size 43429616
|
last-checkpoint/global_step12150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a3e09f3c0eaaaa5f7302e6c905ba8eb537ba74a38fc2c22d1cfe2d3b7055a59
|
3 |
+
size 43429616
|
last-checkpoint/global_step12150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59083fcd9c130dfc286e1144a3563e7b8ebf4952e9367b4a514e934cfce82060
|
3 |
+
size 43429616
|
last-checkpoint/global_step12150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8beba7123c06e80ee8ea761ae31a7e59f8a8ed851f5bd88750bf59d5b72676e0
|
3 |
+
size 43429616
|
last-checkpoint/global_step12150/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75a9c7ae97d414610f673832d5c375f0393e05fae13b6f25edef64a4b472a1fb
|
3 |
+
size 637299
|
last-checkpoint/global_step12150/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:542f98aedda710ef50b9920971aac1ec30afb81dc11d1be9b26239172afcc41f
|
3 |
+
size 637171
|
last-checkpoint/global_step12150/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e9bc8b75f762e1a4d3de1be879ebeb1528fda2a7ef7e19e217ab1c45ceff43a
|
3 |
+
size 637171
|
last-checkpoint/global_step12150/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e75d4fe0dae9b2c1521fde91805e9b14b39452645bad9af7c35e356a2b3b0dcb
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step12150
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6307efa24539f25205494058ac21269eb4472c24196e1ee8b02057608ce4d24
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:104d067239480b773fb429ce7cf6fef608773d731045e564129e32bad2f89f87
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b31a105b7d05e94e9b867f006f5892a5f0e0d40a8f25f6fc4e1ae7b27248e3c
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21975035cfdc76b4d274d6b419772538d54271629fee8e9459ef219c0b502f8c
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52419b8ff63153cdbaca6da836a3cad296cdecb3457c63b7ac611396d56073f0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -21545,11 +21545,100 @@
|
|
21545 |
"eval_steps_per_second": 0.771,
|
21546 |
"num_input_tokens_seen": 80912808,
|
21547 |
"step": 12100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21548 |
}
|
21549 |
],
|
21550 |
"logging_steps": 5,
|
21551 |
"max_steps": 16324,
|
21552 |
-
"num_input_tokens_seen":
|
21553 |
"num_train_epochs": 2,
|
21554 |
"save_steps": 50,
|
21555 |
"stateful_callbacks": {
|
@@ -21564,7 +21653,7 @@
|
|
21564 |
"attributes": {}
|
21565 |
}
|
21566 |
},
|
21567 |
-
"total_flos":
|
21568 |
"train_batch_size": 1,
|
21569 |
"trial_name": null,
|
21570 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.4885608404030504,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 12150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
21545 |
"eval_steps_per_second": 0.771,
|
21546 |
"num_input_tokens_seen": 80912808,
|
21547 |
"step": 12100
|
21548 |
+
},
|
21549 |
+
{
|
21550 |
+
"epoch": 1.4830479924045206,
|
21551 |
+
"grad_norm": 1.350213028954588,
|
21552 |
+
"learning_rate": 1.7179086747792705e-05,
|
21553 |
+
"loss": 0.2205,
|
21554 |
+
"num_input_tokens_seen": 80945688,
|
21555 |
+
"step": 12105
|
21556 |
+
},
|
21557 |
+
{
|
21558 |
+
"epoch": 1.4836605310710238,
|
21559 |
+
"grad_norm": 1.7138932429222575,
|
21560 |
+
"learning_rate": 1.7140894904108396e-05,
|
21561 |
+
"loss": 0.2271,
|
21562 |
+
"num_input_tokens_seen": 80979064,
|
21563 |
+
"step": 12110
|
21564 |
+
},
|
21565 |
+
{
|
21566 |
+
"epoch": 1.4842730697375273,
|
21567 |
+
"grad_norm": 1.8330956643913292,
|
21568 |
+
"learning_rate": 1.7102736776721383e-05,
|
21569 |
+
"loss": 0.219,
|
21570 |
+
"num_input_tokens_seen": 81012720,
|
21571 |
+
"step": 12115
|
21572 |
+
},
|
21573 |
+
{
|
21574 |
+
"epoch": 1.4848856084040305,
|
21575 |
+
"grad_norm": 1.14443147495106,
|
21576 |
+
"learning_rate": 1.7064612404785196e-05,
|
21577 |
+
"loss": 0.1997,
|
21578 |
+
"num_input_tokens_seen": 81046296,
|
21579 |
+
"step": 12120
|
21580 |
+
},
|
21581 |
+
{
|
21582 |
+
"epoch": 1.4854981470705337,
|
21583 |
+
"grad_norm": 1.3913689156054243,
|
21584 |
+
"learning_rate": 1.7026521827418786e-05,
|
21585 |
+
"loss": 0.2113,
|
21586 |
+
"num_input_tokens_seen": 81079272,
|
21587 |
+
"step": 12125
|
21588 |
+
},
|
21589 |
+
{
|
21590 |
+
"epoch": 1.4861106857370372,
|
21591 |
+
"grad_norm": 1.023101620622787,
|
21592 |
+
"learning_rate": 1.698846508370639e-05,
|
21593 |
+
"loss": 0.1786,
|
21594 |
+
"num_input_tokens_seen": 81113592,
|
21595 |
+
"step": 12130
|
21596 |
+
},
|
21597 |
+
{
|
21598 |
+
"epoch": 1.4867232244035404,
|
21599 |
+
"grad_norm": 1.429552357254653,
|
21600 |
+
"learning_rate": 1.695044221269752e-05,
|
21601 |
+
"loss": 0.2348,
|
21602 |
+
"num_input_tokens_seen": 81146704,
|
21603 |
+
"step": 12135
|
21604 |
+
},
|
21605 |
+
{
|
21606 |
+
"epoch": 1.487335763070044,
|
21607 |
+
"grad_norm": 1.5634928291725132,
|
21608 |
+
"learning_rate": 1.6912453253406958e-05,
|
21609 |
+
"loss": 0.224,
|
21610 |
+
"num_input_tokens_seen": 81179856,
|
21611 |
+
"step": 12140
|
21612 |
+
},
|
21613 |
+
{
|
21614 |
+
"epoch": 1.4879483017365471,
|
21615 |
+
"grad_norm": 1.2283588996234667,
|
21616 |
+
"learning_rate": 1.687449824481469e-05,
|
21617 |
+
"loss": 0.1995,
|
21618 |
+
"num_input_tokens_seen": 81213312,
|
21619 |
+
"step": 12145
|
21620 |
+
},
|
21621 |
+
{
|
21622 |
+
"epoch": 1.4885608404030504,
|
21623 |
+
"grad_norm": 1.3793598486147676,
|
21624 |
+
"learning_rate": 1.683657722586582e-05,
|
21625 |
+
"loss": 0.2078,
|
21626 |
+
"num_input_tokens_seen": 81247096,
|
21627 |
+
"step": 12150
|
21628 |
+
},
|
21629 |
+
{
|
21630 |
+
"epoch": 1.4885608404030504,
|
21631 |
+
"eval_loss": 0.1698664128780365,
|
21632 |
+
"eval_runtime": 19.6604,
|
21633 |
+
"eval_samples_per_second": 3.052,
|
21634 |
+
"eval_steps_per_second": 0.763,
|
21635 |
+
"num_input_tokens_seen": 81247096,
|
21636 |
+
"step": 12150
|
21637 |
}
|
21638 |
],
|
21639 |
"logging_steps": 5,
|
21640 |
"max_steps": 16324,
|
21641 |
+
"num_input_tokens_seen": 81247096,
|
21642 |
"num_train_epochs": 2,
|
21643 |
"save_steps": 50,
|
21644 |
"stateful_callbacks": {
|
|
|
21653 |
"attributes": {}
|
21654 |
}
|
21655 |
},
|
21656 |
+
"total_flos": 5096107533664256.0,
|
21657 |
"train_batch_size": 1,
|
21658 |
"trial_name": null,
|
21659 |
"trial_params": null
|