|
{ |
|
"TextEncoders": { |
|
"bert": { |
|
"config": "configs/config_bert.json", |
|
"d_model": 768, |
|
"fusion_layer": 9, |
|
"name": "bert_base", |
|
"pretrained": "bert-base-uncased" |
|
}, |
|
"bert_large": { |
|
"config": "configs/config_bert_large.json", |
|
"d_model": 1024, |
|
"fusion_layer": 19, |
|
"name": "bert_large", |
|
"pretrained": "bert-large-uncased" |
|
}, |
|
"med_bert": { |
|
"config": "configs/med_config.json", |
|
"d_model": 768, |
|
"name": "med_bert_base", |
|
"pretrained": "bert-base-uncased" |
|
}, |
|
"med_bert_large": { |
|
"config": "configs/med_large_config.json", |
|
"d_model": 768, |
|
"name": "med_bert_large", |
|
"pretrained": "bert-base-uncased" |
|
} |
|
}, |
|
"VisionEncoders": {}, |
|
"architectures": [ |
|
"InternVideo2_Stage2" |
|
], |
|
"auto_map": { |
|
"AutoConfig": "modeling_internvideo2.InternVideo2_Stage2_Config", |
|
"AutoModel": "modeling_internvideo2.InternVideo2_Stage2" |
|
}, |
|
"auto_resume": true, |
|
"available_corpus": { |
|
"anet_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_paragraph_retrieval": true, |
|
"max_txt_l": 150, |
|
"media_type": "video" |
|
}, |
|
"anet_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_paragraph_retrieval": true, |
|
"max_txt_l": 150, |
|
"media_type": "video" |
|
}, |
|
"audiocaps_ret_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"audiocaps_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"cc12m": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
"cc3m": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
"cc3m_debug": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
"charades_mc_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"clothov1_ret_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"clothov1_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"clothov2_ret_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"clothov2_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"coco": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
"data_25m": [ |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
} |
|
], |
|
"debug": [ |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
} |
|
], |
|
"didemo_ret_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_paragraph_retrieval": true, |
|
"max_txt_l": 64, |
|
"media_type": "video", |
|
"trimmed30": true |
|
}, |
|
"didemo_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_paragraph_retrieval": true, |
|
"max_txt_l": 64, |
|
"media_type": "video", |
|
"trimmed30": true |
|
}, |
|
"didemo_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_paragraph_retrieval": true, |
|
"max_txt_l": 64, |
|
"media_type": "video", |
|
"trimmed30": true |
|
}, |
|
"hmdb51_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true, |
|
"media_type": "video" |
|
}, |
|
"internvid_v1": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "video" |
|
}, |
|
"internvid_v2_avs_private": { |
|
"anno_path": "your_path", |
|
"caption_augmentation": { |
|
"caption_sample_type": "avs_all" |
|
}, |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "audio_video", |
|
"read_audio_from_video": true, |
|
"read_clip_from_video": false, |
|
"zero_audio_padding_for_video": true |
|
}, |
|
"k400_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true |
|
}, |
|
"k600_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true, |
|
"media_type": "video" |
|
}, |
|
"k700_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true, |
|
"media_type": "video" |
|
}, |
|
"laion_2b": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
"laion_coco": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
"laion_pop": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
"lsmdc_ret_test_1000": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"lsmdc_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"max_txt_l": 96, |
|
"media_type": "video" |
|
}, |
|
"lsmdc_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"max_txt_l": 96, |
|
"media_type": "video" |
|
}, |
|
"mit_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true, |
|
"media_type": "video" |
|
}, |
|
"msrvtt_1k_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"msrvtt_ret_test1k": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"msrvtt_ret_train9k": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"msvd_ret_test": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"max_txt_l": 64, |
|
"media_type": "video" |
|
}, |
|
"msvd_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"has_multi_txt_gt": true, |
|
"max_txt_l": 64, |
|
"media_type": "video" |
|
}, |
|
"msvd_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"max_txt_l": 64, |
|
"media_type": "video" |
|
}, |
|
"pretrain_example_data_1B": [ |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
} |
|
], |
|
"pretrain_example_data_6B": [ |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
{ |
|
"anno_path": "your_path", |
|
"caption_augmentation": { |
|
"caption_sample_type": "avs_all" |
|
}, |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "audio_video", |
|
"read_audio_from_video": true, |
|
"read_clip_from_video": false, |
|
"zero_audio_padding_for_video": true |
|
} |
|
], |
|
"sbu": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "image" |
|
}, |
|
"ssv2_mc_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"ucf101_act_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"is_act_rec": true, |
|
"media_type": "video" |
|
}, |
|
"vatex_ch_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"vatex_en_ret_train": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"has_multi_txt_gt": true, |
|
"media_type": "video" |
|
}, |
|
"vatex_en_ret_val": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"vg": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "image" |
|
}, |
|
"wavcaps_400k": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "audio" |
|
}, |
|
"webvid": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"webvid_10m": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"webvid_debug": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"media_type": "video" |
|
}, |
|
"webvid_fuse_10m": { |
|
"anno_path": "your_path", |
|
"data_root": "", |
|
"jump_filter": true, |
|
"media_type": "video" |
|
} |
|
}, |
|
"batch_size": 8, |
|
"batch_size_test": 4, |
|
"compile_model": false, |
|
"debug": false, |
|
"deep_fusion": false, |
|
"deepspeed": { |
|
"enable": true, |
|
"stage": 1 |
|
}, |
|
"device": "cuda", |
|
"dist_url": "env://", |
|
"evaluate": true, |
|
"evaluation": { |
|
"eval_frame_ensemble": "concat", |
|
"eval_offload": true, |
|
"eval_x_only": false, |
|
"k_test": 128 |
|
}, |
|
"gradient_checkpointing": true, |
|
"inputs": { |
|
"batch_size": { |
|
"image": 8, |
|
"video": 8 |
|
}, |
|
"batch_size_test": { |
|
"image": 4, |
|
"video": 4 |
|
}, |
|
"image_res": 224, |
|
"max_txt_l": { |
|
"image": 40, |
|
"video": 40 |
|
}, |
|
"video_input": { |
|
"num_frames": 4, |
|
"num_frames_test": 4, |
|
"random_aug": false, |
|
"sample_type": "rand", |
|
"sample_type_test": "middle" |
|
} |
|
}, |
|
"jump_evaluate": false, |
|
"log_freq": 100, |
|
"max_txt_l": 40, |
|
"mode": "pt", |
|
"model": { |
|
"embed_dim": 512, |
|
"find_unused_parameters": false, |
|
"model_cls": "InternVideo2_Stage2", |
|
"multimodal": { |
|
"enable": true |
|
}, |
|
"temp": 0.07, |
|
"text_encoder": { |
|
"config": "configs/config_bert_large.json", |
|
"d_model": 1024, |
|
"fusion_layer": 19, |
|
"name": "bert_large", |
|
"pretrained": "bert-large-uncased" |
|
}, |
|
"vision_encoder": { |
|
"checkpoint_num": 40, |
|
"clip_embed_dim": 768, |
|
"clip_input_resolution": 224, |
|
"clip_norm_type": "l2", |
|
"clip_return_layer": 6, |
|
"clip_student_return_interval": 1, |
|
"clip_teacher": null, |
|
"clip_teacher_embed_dim": 3200, |
|
"clip_teacher_final_dim": 768, |
|
"clip_teacher_return_interval": 1, |
|
"d_model": 1408, |
|
"image_mask_ratio": 0.5, |
|
"image_mask_type": "random", |
|
"img_size": 224, |
|
"keep_temporal": false, |
|
"name": "pretrain_internvideo2_6b_patch14_224", |
|
"num_frames": 4, |
|
"only_mask": true, |
|
"patch_size": 14, |
|
"pretrained": "", |
|
"sep_image_video_pos_embed": true, |
|
"tubelet_size": 1, |
|
"use_checkpoint": true, |
|
"use_flash_attn": false, |
|
"use_fused_mlp": false, |
|
"use_fused_rmsnorm": false, |
|
"video_mask_ratio": 0.8, |
|
"video_mask_type": "random" |
|
} |
|
}, |
|
"num_frames": 4, |
|
"num_frames_test": 4, |
|
"num_workers": 6, |
|
"origin_num_frames": 4, |
|
"output_dir": null, |
|
"resume": false, |
|
"save_latest": false, |
|
"seed": 42, |
|
"size_t": 224, |
|
"text_enc": "bert_large", |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.37.2", |
|
"use_bf16": false, |
|
"use_flash_sdp": false, |
|
"use_half_precision": false, |
|
"use_mem_efficient_sdp": false |
|
} |
|
|