File size: 5,844 Bytes
bbfa6f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
data_configs = {
'llava_pretrain': {
'data_type': 'images',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/blip_laion_cc_sbu_558k/meta_data.json'
},
'llava_instruct': {
'data_type': 'images',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/llava_instruct_150k/meta_data.json'
},
'lrv_instruct': {
'data_type': 'images',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/lrv_instructions/meta_data.json'
},
'coco_caption': {
'data_type': 'images',
'train_data_path': '/mnt/bn/data-tns-algo-masp/baiyi.by/data/coco_caption/train.json'
},
'cc_sbu': {
'data_type': 'images',
'train_data_path': '/mnt/bn/baiyi-arnold-nas/data/masp/vlm_data/cc_sbu/meta_data.json'
},
'laion': {
'data_type': 'images',
'train_data_path': '/mnt/bn/data-tns-algo-masp/baiyi.by/data/laion/train.json'
},
'webvid': {
'data_type': 'video',
'train_data_path': '/mnt/bn/baiyi-arnold-nas/data/masp/vlm_data/webvid_10M_video/train.json',
'val_data_path': '/mnt/bn/baiyi-arnold-nas/data/masp/vlm_data/webvid_10M_video/val.json'
},
'internvid': {
'data_type': 'frames',
'fps': 0.5,
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/InternVid/meta_data.json'
},
'video_chatgpt_instruct_single': {
'data_type': 'video',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/VideoChatGPT_Instruct_100K_single/train.json'
},
'video_chatgpt_instruct_multi': {
'data_type': 'video',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/VideoChatGPT_Instruct_100K_multi/train.json'
},
'video_chatgpt': {
'data_type': 'frames',
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/video_chatgpt_instruct/meta_data.json'
},
'm3it': {
'data_type': 'images',
'default_tasks': [
'coco',
'textcap',
'image-paragraph-captioning',
'coco-goi',
'coco-itm',
'vqa-v2',
'shapes',
'docvqa',
'ocr-vqa',
'st-vqa',
'text-vqa',
'gqa',
'okvqa',
'a-okvqa',
'viquae',
'clevr',
'nlvr',
'vcr',
'visual-mrc',
'visual-dialog',
'multi30k'
]
},
'tt_vqa': {
'data_type': 'frames',
'fps': 2,
'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/ADSO_Anno_Data/batch_20231128/meta_data_single_60k_caption_170k_QA.json'
# 'train_data_path': '/mnt/bn/yukunfeng-nasdrive/xiangchen/dataset/masp/20240208_meta_data_single_135k_caption_160k_QA.json'
# 'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/ADSO_Anno_Data/batch_20231128/meta_data_final_single_non_empty.json'
},
'gpt4v_tt_vqa': {
'data_type': 'frames',
'fps': 0.5,
# 'train_data_path': '/mnt/bn/algo-masp-nas-2/baiyi.by/data/GPT4V_Negs/20231127_81k_single.json'
# 'train_data_path': '/mnt/bn/yukunfeng-nasdrive/xiangchen/dataset/masp/20231127_81k_25k_filtered_single_non_empty.json'
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/masp/20231222_120k_multi_filtered.json',
'task_types': ['caption', 'qas'],
'conv_type': 'single'
},
'sharegpt4v': {
'data_type': 'images',
'coco_dir': '/mnt/bn/data-tns-algo-masp/data',
'llava_dir': '/mnt/bn/data-tns-algo-masp/baiyi.by/data/blip_laion_cc_sbu_558k',
'other_dir': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/sharegpt4v',
},
'gpt4v_public': {
'data_type': 'frames',
'fps': 1,
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/data/shared_gpt4v_data/data_130k.json',
# 'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/data/shared_gpt4v_data/data_500k_filtered.json',
'task_types': ['summary', 'detail', 'qa_pairs'],
'conv_type': 'single',
'sample_method': 'uniform'
},
'gpt4v_internal': {
'data_type': 'frames',
'fps': 2,
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/masp/gpt4v_internal_28k.json',
'task_types': ['summary','detail','qa_pairs'],
'conv_type': 'single'
},
'synthdog': { #500k
'data_type': 'images',
},
'ocr_vqa': { #200k
'data_type': 'images',
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/OCR-VQA/training_meta.json'
},
'sharegpt': { #50k
'data_type': 'text'
},
'text_caps':{ #100k
'data_type': 'images',
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/TextCaps/TextCaps_0.1_train.json'
},
'synthetic_ocr':{ # 50k
'data_type': 'frames',
'fps': 0.5, # total 10 frames for each video
'train_data_path': '/mnt/bn/algo-masp-nas-2/xiangchen/dataset/masp/synthetic_ocr/train_filtered.json'
},
'lk_image':{ # 600k
'data_type': 'images',
'train_data_path': '/mnt/bn/liangkeg/data/xiangchen/finetune_all_detail_vidal200k_videollava_images_im.json'
},
'lk_video':{ # 850k
'data_type': 'frames',
'fps': 1,
'train_data_path': '/mnt/bn/liangkeg/data/xiangchen/finetune_all_detail_vidal200k_videollava_images_vid.json',
'select_datasets': ['webvid10m', 'webvid2m', 'activitynet', 'vidal', 'hdvila'],
},
'promptv1_2_internal':{ # 210k
'data_type': 'frames',
'train_data_path': '/mnt/bn/algo-masp-nas-2/kaili.zhao/data/masp_data/train/gpt4v_annotation/202400401week_gpt4v_all_videos_unique_ids.json',
'task_types': ['caption']
}
}
|