Training in progress, step 10150, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step10150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step10150/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10150/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10150/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step10150/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +93 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 29034840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b709a208926d21714cd1bb86f591d64cce47911809ec3f7e8a5c123f64f95d0
|
3 |
size 29034840
|
last-checkpoint/global_step10150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fac37dd6abf79a2f0324725d6f7414177d6074dfacbdd908516704f1ed69689c
|
3 |
+
size 43429616
|
last-checkpoint/global_step10150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90348832ecb7b6d9294d2ce288e06326ae27439f1529ff51623f1748a3b6cb5b
|
3 |
+
size 43429616
|
last-checkpoint/global_step10150/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90d36a74b353f006716370d198ee586c79fb750682469f7d587d6d95e4bda162
|
3 |
+
size 43429616
|
last-checkpoint/global_step10150/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2052dff2a3b68572858349151b284a0504a50667dde6203e9802e8db3ac56acc
|
3 |
+
size 43429616
|
last-checkpoint/global_step10150/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9afe9bc7c27ae02f7a964f519d985993f5875299cd612b6e1a81c18eee91cdf7
|
3 |
+
size 637299
|
last-checkpoint/global_step10150/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13091fa5907acdb036f7846e4ad3e04ed399bec126b6d70c50d9e15abaa7f4fc
|
3 |
+
size 637171
|
last-checkpoint/global_step10150/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:605c5433808ef9f672facc9f274a27d0cef22b882af585270679cf151baf28ad
|
3 |
+
size 637171
|
last-checkpoint/global_step10150/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edd210b7f580f5ed51f872dbb5ff18ae56331a89b93d1732a0eb9aba5fdda3ad
|
3 |
+
size 637171
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step10150
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b60a1e55577dcd4ed4fa019490c4c8eb9f112f4cd49750bf0dad84e03964d8
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03594dad0b7bcdfd713e254e6b4b6361abf9101ccb4145def4e53b637fbf29f0
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06754065173301b0389466b461ad8c15f2df51ba57930e6118cfef8791c58d11
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27ab01cfdaef4a38e122402159bb7f3067802714906a79c8bb2d7b9185cfd311
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7a5914a992a4a403b02982cd5ff7e2aa6de814df9df91f0b8f16811eec8d7c1
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -17985,11 +17985,100 @@
|
|
17985 |
"eval_steps_per_second": 0.773,
|
17986 |
"num_input_tokens_seen": 67471000,
|
17987 |
"step": 10100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17988 |
}
|
17989 |
],
|
17990 |
"logging_steps": 5,
|
17991 |
"max_steps": 16324,
|
17992 |
-
"num_input_tokens_seen":
|
17993 |
"num_train_epochs": 2,
|
17994 |
"save_steps": 50,
|
17995 |
"stateful_callbacks": {
|
@@ -18004,7 +18093,7 @@
|
|
18004 |
"attributes": {}
|
18005 |
}
|
18006 |
},
|
18007 |
-
"total_flos":
|
18008 |
"train_batch_size": 1,
|
18009 |
"trial_name": null,
|
18010 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 0.04092838987708092,
|
3 |
"best_model_checkpoint": "saves/CADICA_qwenvl_detect_classify_augmented/lora/sft/checkpoint-9050",
|
4 |
+
"epoch": 1.2435453738017213,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 10150,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
17985 |
"eval_steps_per_second": 0.773,
|
17986 |
"num_input_tokens_seen": 67471000,
|
17987 |
"step": 10100
|
17988 |
+
},
|
17989 |
+
{
|
17990 |
+
"epoch": 1.2380325258031912,
|
17991 |
+
"grad_norm": 61.68793183843857,
|
17992 |
+
"learning_rate": 3.47053010588085e-05,
|
17993 |
+
"loss": 0.2594,
|
17994 |
+
"num_input_tokens_seen": 67504136,
|
17995 |
+
"step": 10105
|
17996 |
+
},
|
17997 |
+
{
|
17998 |
+
"epoch": 1.2386450644696947,
|
17999 |
+
"grad_norm": 0.9460767854553899,
|
18000 |
+
"learning_rate": 3.465708871914475e-05,
|
18001 |
+
"loss": 0.2491,
|
18002 |
+
"num_input_tokens_seen": 67537536,
|
18003 |
+
"step": 10110
|
18004 |
+
},
|
18005 |
+
{
|
18006 |
+
"epoch": 1.239257603136198,
|
18007 |
+
"grad_norm": 1.2005295422829294,
|
18008 |
+
"learning_rate": 3.4608892122641295e-05,
|
18009 |
+
"loss": 0.2029,
|
18010 |
+
"num_input_tokens_seen": 67571048,
|
18011 |
+
"step": 10115
|
18012 |
+
},
|
18013 |
+
{
|
18014 |
+
"epoch": 1.2398701418027014,
|
18015 |
+
"grad_norm": 0.9752561237968562,
|
18016 |
+
"learning_rate": 3.4560711318752016e-05,
|
18017 |
+
"loss": 0.2015,
|
18018 |
+
"num_input_tokens_seen": 67604768,
|
18019 |
+
"step": 10120
|
18020 |
+
},
|
18021 |
+
{
|
18022 |
+
"epoch": 1.2404826804692046,
|
18023 |
+
"grad_norm": 1.1644311130139964,
|
18024 |
+
"learning_rate": 3.451254635691458e-05,
|
18025 |
+
"loss": 0.2179,
|
18026 |
+
"num_input_tokens_seen": 67638424,
|
18027 |
+
"step": 10125
|
18028 |
+
},
|
18029 |
+
{
|
18030 |
+
"epoch": 1.2410952191357079,
|
18031 |
+
"grad_norm": 1.4435187736881145,
|
18032 |
+
"learning_rate": 3.446439728655047e-05,
|
18033 |
+
"loss": 0.2452,
|
18034 |
+
"num_input_tokens_seen": 67671792,
|
18035 |
+
"step": 10130
|
18036 |
+
},
|
18037 |
+
{
|
18038 |
+
"epoch": 1.2417077578022113,
|
18039 |
+
"grad_norm": 1.0148079567428998,
|
18040 |
+
"learning_rate": 3.441626415706477e-05,
|
18041 |
+
"loss": 0.2498,
|
18042 |
+
"num_input_tokens_seen": 67705816,
|
18043 |
+
"step": 10135
|
18044 |
+
},
|
18045 |
+
{
|
18046 |
+
"epoch": 1.2423202964687146,
|
18047 |
+
"grad_norm": 1.0027085224671495,
|
18048 |
+
"learning_rate": 3.4368147017846295e-05,
|
18049 |
+
"loss": 0.1976,
|
18050 |
+
"num_input_tokens_seen": 67740120,
|
18051 |
+
"step": 10140
|
18052 |
+
},
|
18053 |
+
{
|
18054 |
+
"epoch": 1.242932835135218,
|
18055 |
+
"grad_norm": 1.4330460305461843,
|
18056 |
+
"learning_rate": 3.432004591826739e-05,
|
18057 |
+
"loss": 0.2408,
|
18058 |
+
"num_input_tokens_seen": 67773216,
|
18059 |
+
"step": 10145
|
18060 |
+
},
|
18061 |
+
{
|
18062 |
+
"epoch": 1.2435453738017213,
|
18063 |
+
"grad_norm": 1.307307643275231,
|
18064 |
+
"learning_rate": 3.427196090768395e-05,
|
18065 |
+
"loss": 0.2124,
|
18066 |
+
"num_input_tokens_seen": 67807536,
|
18067 |
+
"step": 10150
|
18068 |
+
},
|
18069 |
+
{
|
18070 |
+
"epoch": 1.2435453738017213,
|
18071 |
+
"eval_loss": 0.1516389101743698,
|
18072 |
+
"eval_runtime": 19.1372,
|
18073 |
+
"eval_samples_per_second": 3.135,
|
18074 |
+
"eval_steps_per_second": 0.784,
|
18075 |
+
"num_input_tokens_seen": 67807536,
|
18076 |
+
"step": 10150
|
18077 |
}
|
18078 |
],
|
18079 |
"logging_steps": 5,
|
18080 |
"max_steps": 16324,
|
18081 |
+
"num_input_tokens_seen": 67807536,
|
18082 |
"num_train_epochs": 2,
|
18083 |
"save_steps": 50,
|
18084 |
"stateful_callbacks": {
|
|
|
18093 |
"attributes": {}
|
18094 |
}
|
18095 |
},
|
18096 |
+
"total_flos": 4253231174647808.0,
|
18097 |
"train_batch_size": 1,
|
18098 |
"trial_name": null,
|
18099 |
"trial_params": null
|