Upload folder using huggingface_hub
Browse files- config.json +4 -6
- finetune_all_multinode_stage4.sh +8 -10
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- tokenizer_config.json +1 -1
- trainer_state.json +0 -0
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
{
|
2 |
"X": [
|
3 |
-
"
|
4 |
-
"Audio_caption",
|
5 |
-
"Video",
|
6 |
-
"Image"
|
7 |
],
|
8 |
-
"_name_or_path": "/mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-main/checkpoints/OmniFusion-8B-stage3-1018",
|
9 |
"architectures": [
|
10 |
"LlavaLlamaForCausalLM"
|
11 |
],
|
@@ -24,6 +21,7 @@
|
|
24 |
"image_grid_pinpoints": null,
|
25 |
"initializer_range": 0.02,
|
26 |
"intermediate_size": 14336,
|
|
|
27 |
"max_position_embeddings": 131072,
|
28 |
"mlp_bias": false,
|
29 |
"mm_audio_caption_tower": "/mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio",
|
@@ -51,7 +49,7 @@
|
|
51 |
},
|
52 |
"rope_theta": 500000.0,
|
53 |
"tie_word_embeddings": false,
|
54 |
-
"tokenizer_model_max_length":
|
55 |
"torch_dtype": "bfloat16",
|
56 |
"transformers_version": "4.43.1",
|
57 |
"tune_mm_mlp_adapter": false,
|
|
|
1 |
{
|
2 |
"X": [
|
3 |
+
"Video"
|
|
|
|
|
|
|
4 |
],
|
5 |
+
"_name_or_path": "/mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/tmp/OmniFusion-main/checkpoints/OmniFusion-8B-stage3-1018",
|
6 |
"architectures": [
|
7 |
"LlavaLlamaForCausalLM"
|
8 |
],
|
|
|
21 |
"image_grid_pinpoints": null,
|
22 |
"initializer_range": 0.02,
|
23 |
"intermediate_size": 14336,
|
24 |
+
"is_fusion": true,
|
25 |
"max_position_embeddings": 131072,
|
26 |
"mlp_bias": false,
|
27 |
"mm_audio_caption_tower": "/mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio",
|
|
|
49 |
},
|
50 |
"rope_theta": 500000.0,
|
51 |
"tie_word_embeddings": false,
|
52 |
+
"tokenizer_model_max_length": 6144,
|
53 |
"torch_dtype": "bfloat16",
|
54 |
"transformers_version": "4.43.1",
|
55 |
"tune_mm_mlp_adapter": false,
|
finetune_all_multinode_stage4.sh
CHANGED
@@ -27,7 +27,7 @@ echo "master port: ${port}"
|
|
27 |
|
28 |
source /mnt/bn/tns-algo-video-public-my2/wangpeng.an/environment/anaconda3/bin/activate multimodal
|
29 |
|
30 |
-
cd /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-
|
31 |
|
32 |
# Install necessary packages
|
33 |
pip3 install requests
|
@@ -48,14 +48,11 @@ sudo chmod 777 /var/lib/fastrak -R
|
|
48 |
ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=$ARNOLD_WORKER_NUM --node_rank=$ARNOLD_ID --master_addr=$METIS_WORKER_0_HOST --master_port=$port \
|
49 |
llava/train/train_mem.py \
|
50 |
--deepspeed ./scripts/zero2.json \
|
51 |
-
--model_name_or_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-main/checkpoints/OmniFusion-8B-stage3-1018 \
|
52 |
--version llama_3_1 \
|
53 |
-
--data_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/
|
54 |
-
--audio_asr_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data \
|
55 |
-
--audio_caption_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage2/audio_caption_data_tune/audio_caption_tune/audio_caption \
|
56 |
--video_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/Video-LLaVA \
|
57 |
-
--
|
58 |
-
--X "Audio_asr" "Audio_caption" "Video" "Image" \
|
59 |
--audio_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/LanguageBind_Audio_Asr \
|
60 |
--audio_caption_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio \
|
61 |
--video_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Video_merge \
|
@@ -64,10 +61,11 @@ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=
|
|
64 |
--mm_vision_select_layer -2 \
|
65 |
--mm_use_x_start_end False \
|
66 |
--mm_use_x_patch_token False \
|
|
|
67 |
--image_aspect_ratio pad \
|
68 |
--group_by_modality_length True \
|
69 |
--bf16 True \
|
70 |
-
--output_dir ./checkpoints/OmniFusion-8B-stage4-
|
71 |
--num_train_epochs 1 \
|
72 |
--per_device_train_batch_size 8 \
|
73 |
--per_device_eval_batch_size 4 \
|
@@ -82,8 +80,8 @@ ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=
|
|
82 |
--lr_scheduler_type "cosine" \
|
83 |
--logging_steps 1 \
|
84 |
--tf32 True \
|
85 |
-
--model_max_length
|
86 |
-
--tokenizer_model_max_length
|
87 |
--gradient_checkpointing True \
|
88 |
--dataloader_num_workers 8 \
|
89 |
--lazy_preprocess True \
|
|
|
27 |
|
28 |
source /mnt/bn/tns-algo-video-public-my2/wangpeng.an/environment/anaconda3/bin/activate multimodal
|
29 |
|
30 |
+
cd /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/OmniFusion-main
|
31 |
|
32 |
# Install necessary packages
|
33 |
pip3 install requests
|
|
|
48 |
ACCELERATE_CPU_AFFINITY=1 torchrun --nproc_per_node=$ARNOLD_WORKER_GPU --nnodes=$ARNOLD_WORKER_NUM --node_rank=$ARNOLD_ID --master_addr=$METIS_WORKER_0_HOST --master_port=$port \
|
49 |
llava/train/train_mem.py \
|
50 |
--deepspeed ./scripts/zero2.json \
|
51 |
+
--model_name_or_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/tmp/OmniFusion-main/checkpoints/OmniFusion-8B-stage3-1018 \
|
52 |
--version llama_3_1 \
|
53 |
+
--data_path /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/stage4_1031/videochatgpt_tune_stage4.json \
|
|
|
|
|
54 |
--video_folder /mnt/bn/tns-algo-video-public-my2/wangpeng.an/data/Video-LLaVA \
|
55 |
+
--X "Video" \
|
|
|
56 |
--audio_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/LanguageBind_Audio_Asr \
|
57 |
--audio_caption_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Audio \
|
58 |
--video_tower /mnt/bn/tns-algo-video-public-my2/wangpeng.an/model/pretrained_model/LanguageBind_Video_merge \
|
|
|
61 |
--mm_vision_select_layer -2 \
|
62 |
--mm_use_x_start_end False \
|
63 |
--mm_use_x_patch_token False \
|
64 |
+
--is_fusion True \
|
65 |
--image_aspect_ratio pad \
|
66 |
--group_by_modality_length True \
|
67 |
--bf16 True \
|
68 |
+
--output_dir ./checkpoints/OmniFusion-8B-stage4-1031 \
|
69 |
--num_train_epochs 1 \
|
70 |
--per_device_train_batch_size 8 \
|
71 |
--per_device_eval_batch_size 4 \
|
|
|
80 |
--lr_scheduler_type "cosine" \
|
81 |
--logging_steps 1 \
|
82 |
--tf32 True \
|
83 |
+
--model_max_length 4096 \
|
84 |
+
--tokenizer_model_max_length 6144 \
|
85 |
--gradient_checkpointing True \
|
86 |
--dataloader_num_workers 8 \
|
87 |
--lazy_preprocess True \
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:346f2705a5e9ae972033847876dea0bcedd1ee56116c9c23cfa72ded222e8214
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bab15611dfc404180dd875368f2e343eef0be202143436f74781821fdb00be9
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:134edcbecc2c4602a9f3276d0014ec68dfd9b379af4b1b750f6524cabb9703ab
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3851682320
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37ba906825d1bb6330573ed40c45cfa9dcb3e28a86ddf494f4f498321e6fbb86
|
3 |
size 3851682320
|
tokenizer_config.json
CHANGED
@@ -2057,7 +2057,7 @@
|
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
-
"model_max_length":
|
2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
2062 |
"padding_side": "right",
|
2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
|
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
+
"model_max_length": 4096,
|
2061 |
"pad_token": "<|finetune_right_pad_id|>",
|
2062 |
"padding_side": "right",
|
2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:145ed827bee3a57b7ae1ffa2e2548128c776658cc3e524c09ac1865e2a584bf7
|
3 |
size 6776
|