Training in progress, step 11800, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step11800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11800/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11800/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11800/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step11800/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11800/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11800/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step11800/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4df1ca79bb62c7f7d57b94371ba1cc9b738491547dea27c6864a9fd89888effd
|
3 |
size 29034840
|
last-checkpoint/global_step11800/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80b7db288302cd18d86f82561c1c82d2666cf9f18a545ab0eeb6437eac57ff84
|
3 |
+
size 43429616
|
last-checkpoint/global_step11800/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd967a0005a729b4902f2806ee960180d4a748adebe2604c84331abb1e10c68d
|
3 |
+
size 43429616
|
last-checkpoint/global_step11800/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54ed02a8d16452a2419ce55ce7c30e80606f09b87f0631b360f2dc0bc2f26242
|
3 |
+
size 43429616
|
last-checkpoint/global_step11800/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e40bda84c0c5538dfc2813b3e9812cdd4dff5ceb4ef92229253fe5a4037f333
|
3 |
+
size 43429616
|
last-checkpoint/global_step11800/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63e6c17001a6d56adbf9bc11f98715a551ea96e063902bb0adc8a236ba8f9105
|
3 |
+
size 637299
|
last-checkpoint/global_step11800/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efb004680c56454cf63676ade01c65878f55f3eb0fef882b648ff1c45664c8bb
|
3 |
+
size 637171
|
last-checkpoint/global_step11800/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1e3f150ad2b9da3ef6e2d2805b60efc49f4f639febaf4da554d6b41170c544e
|
3 |
+
size 637171
|
last-checkpoint/global_step11800/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:735e5df1ccbcba602f88a9e6a3566bfcb2e6de433aa41864e2a6c02b35e23936
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step11800
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:118f345115d6406635644f29c252b4422b2506be11f6910493602e614f210606
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:968a0a7348ad7f60ad1f94d7c846f90180289399e2238fc980328d18bb7e20ff
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf971b3af95ba9d3c3db41ad222741289b13536a4c39c0846264486be063cbcf
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6022093a77b75def8d86a848770d7abaa395b93fff48c134444d67b354e880a1
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df23140c05dc12c71fa438e150f78ac78bc5e93e523b7d28196fbe6d878dda5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20922,11 +20922,100 @@
|
|
20922 |
"eval_steps_per_second": 0.773,
|
20923 |
"num_input_tokens_seen": 78552600,
|
20924 |
"step": 11750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20925 |
}
|
20926 |
],
|
20927 |
"logging_steps": 5,
|
20928 |
"max_steps": 16324,
|
20929 |
-
"num_input_tokens_seen":
|
20930 |
"num_train_epochs": 2,
|
20931 |
"save_steps": 50,
|
20932 |
"stateful_callbacks": {
|
@@ -20941,7 +21030,7 @@
|
|
20941 |
"attributes": {}
|
20942 |
}
|
20943 |
},
|
20944 |
-
"total_flos":
|
20945 |
"train_batch_size": 1,
|
20946 |
"trial_name": null,
|
20947 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.4456831337478178,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 11800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20922 |
"eval_steps_per_second": 0.773,
|
20923 |
"num_input_tokens_seen": 78552600,
|
20924 |
"step": 11750
|
20925 |
+
},
|
20926 |
+
{
|
20927 |
+
"epoch": 1.440170285749288,
|
20928 |
+
"grad_norm": 1.3268858294030856,
|
20929 |
+
"learning_rate": 1.9933928641251702e-05,
|
20930 |
+
"loss": 0.2158,
|
20931 |
+
"num_input_tokens_seen": 78586312,
|
20932 |
+
"step": 11755
|
20933 |
+
},
|
20934 |
+
{
|
20935 |
+
"epoch": 1.4407828244157912,
|
20936 |
+
"grad_norm": 1.2359824350049413,
|
20937 |
+
"learning_rate": 1.9893475974778957e-05,
|
20938 |
+
"loss": 0.1939,
|
20939 |
+
"num_input_tokens_seen": 78620064,
|
20940 |
+
"step": 11760
|
20941 |
+
},
|
20942 |
+
{
|
20943 |
+
"epoch": 1.4413953630822944,
|
20944 |
+
"grad_norm": 1.1683024300087932,
|
20945 |
+
"learning_rate": 1.9853054200216124e-05,
|
20946 |
+
"loss": 0.212,
|
20947 |
+
"num_input_tokens_seen": 78653936,
|
20948 |
+
"step": 11765
|
20949 |
+
},
|
20950 |
+
{
|
20951 |
+
"epoch": 1.442007901748798,
|
20952 |
+
"grad_norm": 1.1805090414134891,
|
20953 |
+
"learning_rate": 1.9812663359039485e-05,
|
20954 |
+
"loss": 0.2009,
|
20955 |
+
"num_input_tokens_seen": 78687776,
|
20956 |
+
"step": 11770
|
20957 |
+
},
|
20958 |
+
{
|
20959 |
+
"epoch": 1.4426204404153014,
|
20960 |
+
"grad_norm": 1.7542936987483482,
|
20961 |
+
"learning_rate": 1.97723034926935e-05,
|
20962 |
+
"loss": 0.2234,
|
20963 |
+
"num_input_tokens_seen": 78721528,
|
20964 |
+
"step": 11775
|
20965 |
+
},
|
20966 |
+
{
|
20967 |
+
"epoch": 1.4432329790818046,
|
20968 |
+
"grad_norm": 1.1636937431093823,
|
20969 |
+
"learning_rate": 1.9731974642590933e-05,
|
20970 |
+
"loss": 0.2094,
|
20971 |
+
"num_input_tokens_seen": 78755520,
|
20972 |
+
"step": 11780
|
20973 |
+
},
|
20974 |
+
{
|
20975 |
+
"epoch": 1.4438455177483078,
|
20976 |
+
"grad_norm": 1.4721779800528187,
|
20977 |
+
"learning_rate": 1.96916768501127e-05,
|
20978 |
+
"loss": 0.2597,
|
20979 |
+
"num_input_tokens_seen": 78788352,
|
20980 |
+
"step": 11785
|
20981 |
+
},
|
20982 |
+
{
|
20983 |
+
"epoch": 1.444458056414811,
|
20984 |
+
"grad_norm": 1.3899142619840883,
|
20985 |
+
"learning_rate": 1.9651410156607803e-05,
|
20986 |
+
"loss": 0.1969,
|
20987 |
+
"num_input_tokens_seen": 78821984,
|
20988 |
+
"step": 11790
|
20989 |
+
},
|
20990 |
+
{
|
20991 |
+
"epoch": 1.4450705950813145,
|
20992 |
+
"grad_norm": 0.9965825392569478,
|
20993 |
+
"learning_rate": 1.9611174603393402e-05,
|
20994 |
+
"loss": 0.2171,
|
20995 |
+
"num_input_tokens_seen": 78855176,
|
20996 |
+
"step": 11795
|
20997 |
+
},
|
20998 |
+
{
|
20999 |
+
"epoch": 1.4456831337478178,
|
21000 |
+
"grad_norm": 1.3906640428951949,
|
21001 |
+
"learning_rate": 1.9570970231754636e-05,
|
21002 |
+
"loss": 0.2276,
|
21003 |
+
"num_input_tokens_seen": 78888720,
|
21004 |
+
"step": 11800
|
21005 |
+
},
|
21006 |
+
{
|
21007 |
+
"epoch": 1.4456831337478178,
|
21008 |
+
"eval_loss": 0.07458024471998215,
|
21009 |
+
"eval_runtime": 19.6385,
|
21010 |
+
"eval_samples_per_second": 3.055,
|
21011 |
+
"eval_steps_per_second": 0.764,
|
21012 |
+
"num_input_tokens_seen": 78888720,
|
21013 |
+
"step": 11800
|
21014 |
}
|
21015 |
],
|
21016 |
"logging_steps": 5,
|
21017 |
"max_steps": 16324,
|
21018 |
+
"num_input_tokens_seen": 78888720,
|
21019 |
"num_train_epochs": 2,
|
21020 |
"save_steps": 50,
|
21021 |
"stateful_callbacks": {
|
|
|
21030 |
"attributes": {}
|
21031 |
}
|
21032 |
},
|
21033 |
+
"total_flos": 4948194971746304.0,
|
21034 |
"train_batch_size": 1,
|
21035 |
"trial_name": null,
|
21036 |
"trial_params": null
|