Training in progress, step 11750, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step11750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11750/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11750/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11750/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11750/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9254c5fd1e36a2001d6f3de3a14c6e1552a474dc98156451c9dd1fa5e83c4ecb
|
3 |
size 29034840
|
last-checkpoint/global_step11750/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:213cde885872d87e18c7f9ca589aa569cb0ab4cc3e20d97a4246757632bf1cfe
|
3 |
+
size 43429616
|
last-checkpoint/global_step11750/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:703dfd4939ab17e193c13d55cfd512f1584459e10d62c527b6d436b15b757f0b
|
3 |
+
size 43429616
|
last-checkpoint/global_step11750/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70a1cd17642a2b3ae225bbf515a92df5e8ec096b336f79814e25ce3bcfd83961
|
3 |
+
size 43429616
|
last-checkpoint/global_step11750/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe71dbc175d1acb55716114263bc7d3174a2109d41a5885918b04878d6022f2
|
3 |
+
size 43429616
|
last-checkpoint/global_step11750/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92aadb1c39535538e0836a7bc8285e18cefc590b3a998c1cba594000c6cabcd7
|
3 |
+
size 637299
|
last-checkpoint/global_step11750/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb3893ed371f88c27063051ce9b588102e838eabb53f34cd862728458290e548
|
3 |
+
size 637171
|
last-checkpoint/global_step11750/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99922c33ea6f0709dfd29f3818249c4af91c5a2f455aa4f418a61bccf3330c29
|
3 |
+
size 637171
|
last-checkpoint/global_step11750/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0ef362bf31d0dfb1abcc1e313f03deb2d25865db31e2a0b3b89b830fd270b3a
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step11750
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4c2239de6b0a8a7c38ea9b14c315c93c56698b7a6a63b466eda0ccae102ad7d
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8c8a5c5e52a11119fb852fcd71f5af7ac70a5953317669655785000f83240fe
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e027846b2398ff2037e24a79062e341501a3bb99a90812daf6474f54c1faccce
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58f5d5f12209364154cff8e64dd3365589f1a45b55abcd416143e7501212069f
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fc63ad6a415b118874ca08eae048720f7c0233d5ddd2f00bc26edb0db229ace
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20833,11 +20833,100 @@
|
|
20833 |
"eval_steps_per_second": 0.756,
|
20834 |
"num_input_tokens_seen": 78217776,
|
20835 |
"step": 11700
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20836 |
}
|
20837 |
],
|
20838 |
"logging_steps": 5,
|
20839 |
"max_steps": 16324,
|
20840 |
-
"num_input_tokens_seen":
|
20841 |
"num_train_epochs": 2,
|
20842 |
"save_steps": 50,
|
20843 |
"stateful_callbacks": {
|
@@ -20852,7 +20941,7 @@
|
|
20852 |
"attributes": {}
|
20853 |
}
|
20854 |
},
|
20855 |
-
"total_flos":
|
20856 |
"train_batch_size": 1,
|
20857 |
"trial_name": null,
|
20858 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.4395577470827847,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 11750,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20833 |
"eval_steps_per_second": 0.756,
|
20834 |
"num_input_tokens_seen": 78217776,
|
20835 |
"step": 11700
|
20836 |
+
},
|
20837 |
+
{
|
20838 |
+
"epoch": 1.4340448990842547,
|
20839 |
+
"grad_norm": 1.4326590047864796,
|
20840 |
+
"learning_rate": 2.0340145213639655e-05,
|
20841 |
+
"loss": 0.2127,
|
20842 |
+
"num_input_tokens_seen": 78251376,
|
20843 |
+
"step": 11705
|
20844 |
+
},
|
20845 |
+
{
|
20846 |
+
"epoch": 1.434657437750758,
|
20847 |
+
"grad_norm": 1.551965243090594,
|
20848 |
+
"learning_rate": 2.0299385916214116e-05,
|
20849 |
+
"loss": 0.2821,
|
20850 |
+
"num_input_tokens_seen": 78284456,
|
20851 |
+
"step": 11710
|
20852 |
+
},
|
20853 |
+
{
|
20854 |
+
"epoch": 1.4352699764172614,
|
20855 |
+
"grad_norm": 1.4266904783817593,
|
20856 |
+
"learning_rate": 2.0258657094199597e-05,
|
20857 |
+
"loss": 0.2111,
|
20858 |
+
"num_input_tokens_seen": 78318256,
|
20859 |
+
"step": 11715
|
20860 |
+
},
|
20861 |
+
{
|
20862 |
+
"epoch": 1.4358825150837646,
|
20863 |
+
"grad_norm": 1.4622832329237696,
|
20864 |
+
"learning_rate": 2.0217958789387446e-05,
|
20865 |
+
"loss": 0.1973,
|
20866 |
+
"num_input_tokens_seen": 78351968,
|
20867 |
+
"step": 11720
|
20868 |
+
},
|
20869 |
+
{
|
20870 |
+
"epoch": 1.436495053750268,
|
20871 |
+
"grad_norm": 1.2767914245182206,
|
20872 |
+
"learning_rate": 2.017729104353763e-05,
|
20873 |
+
"loss": 0.2047,
|
20874 |
+
"num_input_tokens_seen": 78385944,
|
20875 |
+
"step": 11725
|
20876 |
+
},
|
20877 |
+
{
|
20878 |
+
"epoch": 1.4371075924167713,
|
20879 |
+
"grad_norm": 1.0109017464140821,
|
20880 |
+
"learning_rate": 2.0136653898378805e-05,
|
20881 |
+
"loss": 0.2477,
|
20882 |
+
"num_input_tokens_seen": 78418960,
|
20883 |
+
"step": 11730
|
20884 |
+
},
|
20885 |
+
{
|
20886 |
+
"epoch": 1.4377201310832746,
|
20887 |
+
"grad_norm": 1.4595388451625722,
|
20888 |
+
"learning_rate": 2.009604739560823e-05,
|
20889 |
+
"loss": 0.215,
|
20890 |
+
"num_input_tokens_seen": 78452624,
|
20891 |
+
"step": 11735
|
20892 |
+
},
|
20893 |
+
{
|
20894 |
+
"epoch": 1.438332669749778,
|
20895 |
+
"grad_norm": 1.6265969189605476,
|
20896 |
+
"learning_rate": 2.0055471576891672e-05,
|
20897 |
+
"loss": 0.2267,
|
20898 |
+
"num_input_tokens_seen": 78485592,
|
20899 |
+
"step": 11740
|
20900 |
+
},
|
20901 |
+
{
|
20902 |
+
"epoch": 1.4389452084162813,
|
20903 |
+
"grad_norm": 1.6223135458613367,
|
20904 |
+
"learning_rate": 2.0014926483863466e-05,
|
20905 |
+
"loss": 0.2245,
|
20906 |
+
"num_input_tokens_seen": 78519176,
|
20907 |
+
"step": 11745
|
20908 |
+
},
|
20909 |
+
{
|
20910 |
+
"epoch": 1.4395577470827847,
|
20911 |
+
"grad_norm": 1.4066738070341533,
|
20912 |
+
"learning_rate": 1.99744121581264e-05,
|
20913 |
+
"loss": 0.2207,
|
20914 |
+
"num_input_tokens_seen": 78552600,
|
20915 |
+
"step": 11750
|
20916 |
+
},
|
20917 |
+
{
|
20918 |
+
"epoch": 1.4395577470827847,
|
20919 |
+
"eval_loss": 0.07964655011892319,
|
20920 |
+
"eval_runtime": 19.3924,
|
20921 |
+
"eval_samples_per_second": 3.094,
|
20922 |
+
"eval_steps_per_second": 0.773,
|
20923 |
+
"num_input_tokens_seen": 78552600,
|
20924 |
+
"step": 11750
|
20925 |
}
|
20926 |
],
|
20927 |
"logging_steps": 5,
|
20928 |
"max_steps": 16324,
|
20929 |
+
"num_input_tokens_seen": 78552600,
|
20930 |
"num_train_epochs": 2,
|
20931 |
"save_steps": 50,
|
20932 |
"stateful_callbacks": {
|
|
|
20941 |
"attributes": {}
|
20942 |
}
|
20943 |
},
|
20944 |
+
"total_flos": 4927092224294912.0,
|
20945 |
"train_batch_size": 1,
|
20946 |
"trial_name": null,
|
20947 |
"trial_params": null
|