Training in progress, step 11700, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step11700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11700/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11700/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11700/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11700/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:881aa3a04a05cc0ad7b4570e3a1809bc29a84224f8ed16048589deba3ee0865a
|
3 |
size 29034840
|
last-checkpoint/global_step11700/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:804853d323e099bcf1e75c95ffb29d6e0fd9a2002501f156ea256f63bf1dae07
|
3 |
+
size 43429616
|
last-checkpoint/global_step11700/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c947adbb62d2c1d8ea2696af5bf2c80b695ffb2832d8cbc71b6dcd8512eeafe
|
3 |
+
size 43429616
|
last-checkpoint/global_step11700/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ea298c49f4d12e30c80e6b2d39be234ff5c51e05d61af40e9e880862d8395f
|
3 |
+
size 43429616
|
last-checkpoint/global_step11700/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e7dd8d6d20f853fecb814ebb020db5703fee02fb4ff88f14de55b0b5ff9e66d
|
3 |
+
size 43429616
|
last-checkpoint/global_step11700/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2e29aee8e069a86e1259e89a2f0e200d762b8596185881166d137625eb03a6b
|
3 |
+
size 637299
|
last-checkpoint/global_step11700/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b91be24d12b3a0df29602e6f76c4ce4f617efa1c84ae064e9241db52333242a1
|
3 |
+
size 637171
|
last-checkpoint/global_step11700/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebee66cf4b9213b2d3e759939f0cb59240847dccae99a242320af4e987b06a85
|
3 |
+
size 637171
|
last-checkpoint/global_step11700/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0033b4c7e5847c449d7ea637c9a42a263fa722b419d1e5e9f1efa5b20e094974
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step11700
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b33ed948779fdfaec4f9870e0e1167984f874cea68b1eb884873410fd2b029bd
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8298e05040cc732f2284b41bafae6e2295235210263596b37cd2e15920d9d84d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4e1232c69bc02f65d9f715d7088f2e691753736c7334d63d9c7da8dca90c15b
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4754c13130c2dbce34778d458b13b231b49de6ee3fd1f26cae887bb164bef7e4
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa8a45d0237bf6d8d467d6e2a41e9e7fc47e745a5380b1403902ab45d8c41098
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20744,11 +20744,100 @@
|
|
20744 |
"eval_steps_per_second": 0.754,
|
20745 |
"num_input_tokens_seen": 77884056,
|
20746 |
"step": 11650
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20747 |
}
|
20748 |
],
|
20749 |
"logging_steps": 5,
|
20750 |
"max_steps": 16324,
|
20751 |
-
"num_input_tokens_seen":
|
20752 |
"num_train_epochs": 2,
|
20753 |
"save_steps": 50,
|
20754 |
"stateful_callbacks": {
|
@@ -20763,7 +20852,7 @@
|
|
20763 |
"attributes": {}
|
20764 |
}
|
20765 |
},
|
20766 |
-
"total_flos":
|
20767 |
"train_batch_size": 1,
|
20768 |
"trial_name": null,
|
20769 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.4334323604177515,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 11700,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20744 |
"eval_steps_per_second": 0.754,
|
20745 |
"num_input_tokens_seen": 77884056,
|
20746 |
"step": 11650
|
20747 |
+
},
|
20748 |
+
{
|
20749 |
+
"epoch": 1.4279195124192214,
|
20750 |
+
"grad_norm": 1.385767865109891,
|
20751 |
+
"learning_rate": 2.0749405119111086e-05,
|
20752 |
+
"loss": 0.2166,
|
20753 |
+
"num_input_tokens_seen": 77917736,
|
20754 |
+
"step": 11655
|
20755 |
+
},
|
20756 |
+
{
|
20757 |
+
"epoch": 1.4285320510857247,
|
20758 |
+
"grad_norm": 1.181715061093167,
|
20759 |
+
"learning_rate": 2.0708343372955417e-05,
|
20760 |
+
"loss": 0.29,
|
20761 |
+
"num_input_tokens_seen": 77951440,
|
20762 |
+
"step": 11660
|
20763 |
+
},
|
20764 |
+
{
|
20765 |
+
"epoch": 1.4291445897522281,
|
20766 |
+
"grad_norm": 1.2509637127020838,
|
20767 |
+
"learning_rate": 2.0667311682584927e-05,
|
20768 |
+
"loss": 0.2354,
|
20769 |
+
"num_input_tokens_seen": 77984592,
|
20770 |
+
"step": 11665
|
20771 |
+
},
|
20772 |
+
{
|
20773 |
+
"epoch": 1.4297571284187314,
|
20774 |
+
"grad_norm": 1.1952630070074588,
|
20775 |
+
"learning_rate": 2.0626310090101646e-05,
|
20776 |
+
"loss": 0.199,
|
20777 |
+
"num_input_tokens_seen": 78018336,
|
20778 |
+
"step": 11670
|
20779 |
+
},
|
20780 |
+
{
|
20781 |
+
"epoch": 1.4303696670852348,
|
20782 |
+
"grad_norm": 1.289421137659356,
|
20783 |
+
"learning_rate": 2.0585338637576802e-05,
|
20784 |
+
"loss": 0.2172,
|
20785 |
+
"num_input_tokens_seen": 78051872,
|
20786 |
+
"step": 11675
|
20787 |
+
},
|
20788 |
+
{
|
20789 |
+
"epoch": 1.430982205751738,
|
20790 |
+
"grad_norm": 1.0855263584051233,
|
20791 |
+
"learning_rate": 2.0544397367050673e-05,
|
20792 |
+
"loss": 0.2054,
|
20793 |
+
"num_input_tokens_seen": 78085520,
|
20794 |
+
"step": 11680
|
20795 |
+
},
|
20796 |
+
{
|
20797 |
+
"epoch": 1.4315947444182413,
|
20798 |
+
"grad_norm": 1.2468690985988058,
|
20799 |
+
"learning_rate": 2.0503486320532523e-05,
|
20800 |
+
"loss": 0.2326,
|
20801 |
+
"num_input_tokens_seen": 78118664,
|
20802 |
+
"step": 11685
|
20803 |
+
},
|
20804 |
+
{
|
20805 |
+
"epoch": 1.4322072830847448,
|
20806 |
+
"grad_norm": 1.5679238415062051,
|
20807 |
+
"learning_rate": 2.0462605540000668e-05,
|
20808 |
+
"loss": 0.2424,
|
20809 |
+
"num_input_tokens_seen": 78151408,
|
20810 |
+
"step": 11690
|
20811 |
+
},
|
20812 |
+
{
|
20813 |
+
"epoch": 1.432819821751248,
|
20814 |
+
"grad_norm": 1.1426900959643231,
|
20815 |
+
"learning_rate": 2.042175506740233e-05,
|
20816 |
+
"loss": 0.2131,
|
20817 |
+
"num_input_tokens_seen": 78184680,
|
20818 |
+
"step": 11695
|
20819 |
+
},
|
20820 |
+
{
|
20821 |
+
"epoch": 1.4334323604177515,
|
20822 |
+
"grad_norm": 1.5320385814658846,
|
20823 |
+
"learning_rate": 2.038093494465368e-05,
|
20824 |
+
"loss": 0.2303,
|
20825 |
+
"num_input_tokens_seen": 78217776,
|
20826 |
+
"step": 11700
|
20827 |
+
},
|
20828 |
+
{
|
20829 |
+
"epoch": 1.4334323604177515,
|
20830 |
+
"eval_loss": 0.1379169523715973,
|
20831 |
+
"eval_runtime": 19.8386,
|
20832 |
+
"eval_samples_per_second": 3.024,
|
20833 |
+
"eval_steps_per_second": 0.756,
|
20834 |
+
"num_input_tokens_seen": 78217776,
|
20835 |
+
"step": 11700
|
20836 |
}
|
20837 |
],
|
20838 |
"logging_steps": 5,
|
20839 |
"max_steps": 16324,
|
20840 |
+
"num_input_tokens_seen": 78217776,
|
20841 |
"num_train_epochs": 2,
|
20842 |
"save_steps": 50,
|
20843 |
"stateful_callbacks": {
|
|
|
20852 |
"attributes": {}
|
20853 |
}
|
20854 |
},
|
20855 |
+
"total_flos": 4906098901385216.0,
|
20856 |
"train_batch_size": 1,
|
20857 |
"trial_name": null,
|
20858 |
"trial_params": null
|