{ "TextEncoders": { "bert": { "config": "configs/config_bert.json", "d_model": 768, "fusion_layer": 9, "name": "bert_base", "pretrained": "bert-base-uncased" }, "bert_large": { "config": "configs/config_bert_large.json", "d_model": 1024, "fusion_layer": 19, "name": "bert_large", "pretrained": "bert-large-uncased" }, "med_bert": { "config": "configs/med_config.json", "d_model": 768, "name": "med_bert_base", "pretrained": "bert-base-uncased" }, "med_bert_large": { "config": "configs/med_large_config.json", "d_model": 768, "name": "med_bert_large", "pretrained": "bert-base-uncased" } }, "VisionEncoders": {}, "architectures": [ "InternVideo2_Stage2" ], "auto_map": { "AutoConfig": "modeling_internvideo2.InternVideo2_Stage2_Config", "AutoModel": "modeling_internvideo2.InternVideo2_Stage2" }, "auto_resume": true, "available_corpus": { "anet_ret_train": { "anno_path": "your_path", "data_root": "", "is_paragraph_retrieval": true, "max_txt_l": 150, "media_type": "video" }, "anet_ret_val": { "anno_path": "your_path", "data_root": "", "is_paragraph_retrieval": true, "max_txt_l": 150, "media_type": "video" }, "audiocaps_ret_test": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "audiocaps_ret_train": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "cc12m": { "anno_path": "your_path", "data_root": "", "media_type": "image" }, "cc3m": { "anno_path": "your_path", "data_root": "", "media_type": "image" }, "cc3m_debug": { "anno_path": "your_path", "data_root": "", "media_type": "image" }, "charades_mc_test": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "clothov1_ret_test": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "clothov1_ret_train": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "clothov2_ret_test": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "clothov2_ret_train": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "coco": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, "data_25m": [ { "anno_path": "your_path", "data_root": "", "media_type": "video" }, { "anno_path": "your_path", "data_root": "", "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "media_type": "image" } ], "debug": [ { "anno_path": "your_path", "data_root": "", "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "media_type": "video" } ], "didemo_ret_test": { "anno_path": "your_path", "data_root": "", "is_paragraph_retrieval": true, "max_txt_l": 64, "media_type": "video", "trimmed30": true }, "didemo_ret_train": { "anno_path": "your_path", "data_root": "", "is_paragraph_retrieval": true, "max_txt_l": 64, "media_type": "video", "trimmed30": true }, "didemo_ret_val": { "anno_path": "your_path", "data_root": "", "is_paragraph_retrieval": true, "max_txt_l": 64, "media_type": "video", "trimmed30": true }, "hmdb51_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true, "media_type": "video" }, "internvid_v1": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "video" }, "internvid_v2_avs_private": { "anno_path": "your_path", "caption_augmentation": { "caption_sample_type": "avs_all" }, "data_root": "", "jump_filter": true, "media_type": "audio_video", "read_audio_from_video": true, "read_clip_from_video": false, "zero_audio_padding_for_video": true }, "k400_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true }, "k600_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true, "media_type": "video" }, "k700_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true, "media_type": "video" }, "laion_2b": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, "laion_coco": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, "laion_pop": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, "lsmdc_ret_test_1000": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "lsmdc_ret_train": { "anno_path": "your_path", "data_root": "", "max_txt_l": 96, "media_type": "video" }, "lsmdc_ret_val": { "anno_path": "your_path", "data_root": "", "max_txt_l": 96, "media_type": "video" }, "mit_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true, "media_type": "video" }, "msrvtt_1k_test": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "msrvtt_ret_test1k": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "msrvtt_ret_train9k": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "msvd_ret_test": { "anno_path": "your_path", "data_root": "", "max_txt_l": 64, "media_type": "video" }, "msvd_ret_train": { "anno_path": "your_path", "data_root": "", "has_multi_txt_gt": true, "max_txt_l": 64, "media_type": "video" }, "msvd_ret_val": { "anno_path": "your_path", "data_root": "", "max_txt_l": 64, "media_type": "video" }, "pretrain_example_data_1B": [ { "anno_path": "your_path", "data_root": "", "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "media_type": "video" } ], "pretrain_example_data_6B": [ { "anno_path": "your_path", "data_root": "", "media_type": "image" }, { "anno_path": "your_path", "data_root": "", "media_type": "video" }, { "anno_path": "your_path", "caption_augmentation": { "caption_sample_type": "avs_all" }, "data_root": "", "jump_filter": true, "media_type": "audio_video", "read_audio_from_video": true, "read_clip_from_video": false, "zero_audio_padding_for_video": true } ], "sbu": { "anno_path": "your_path", "data_root": "", "media_type": "image" }, "ssv2_mc_val": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "ucf101_act_val": { "anno_path": "your_path", "data_root": "", "is_act_rec": true, "media_type": "video" }, "vatex_ch_ret_val": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "vatex_en_ret_train": { "anno_path": "your_path", "data_root": "", "has_multi_txt_gt": true, "media_type": "video" }, "vatex_en_ret_val": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "vg": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "image" }, "wavcaps_400k": { "anno_path": "your_path", "data_root": "", "media_type": "audio" }, "webvid": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "webvid_10m": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "webvid_debug": { "anno_path": "your_path", "data_root": "", "media_type": "video" }, "webvid_fuse_10m": { "anno_path": "your_path", "data_root": "", "jump_filter": true, "media_type": "video" } }, "batch_size": 8, "batch_size_test": 4, "compile_model": false, "debug": false, "deep_fusion": false, "deepspeed": { "enable": true, "stage": 1 }, "device": "cuda", "dist_url": "env://", "evaluate": true, "evaluation": { "eval_frame_ensemble": "concat", "eval_offload": true, "eval_x_only": false, "k_test": 128 }, "gradient_checkpointing": true, "inputs": { "batch_size": { "image": 8, "video": 8 }, "batch_size_test": { "image": 4, "video": 4 }, "image_res": 224, "max_txt_l": { "image": 40, "video": 40 }, "video_input": { "num_frames": 4, "num_frames_test": 4, "random_aug": false, "sample_type": "rand", "sample_type_test": "middle" } }, "jump_evaluate": false, "log_freq": 100, "max_txt_l": 40, "mode": "pt", "model": { "embed_dim": 512, "find_unused_parameters": false, "model_cls": "InternVideo2_Stage2", "multimodal": { "enable": true }, "temp": 0.07, "text_encoder": { "config": "configs/config_bert_large.json", "d_model": 1024, "fusion_layer": 19, "name": "bert_large", "pretrained": "bert-large-uncased" }, "vision_encoder": { "checkpoint_num": 40, "clip_embed_dim": 768, "clip_input_resolution": 224, "clip_norm_type": "l2", "clip_return_layer": 6, "clip_student_return_interval": 1, "clip_teacher": null, "clip_teacher_embed_dim": 3200, "clip_teacher_final_dim": 768, "clip_teacher_return_interval": 1, "d_model": 1408, "image_mask_ratio": 0.5, "image_mask_type": "random", "img_size": 224, "keep_temporal": false, "name": "pretrain_internvideo2_6b_patch14_224", "num_frames": 4, "only_mask": true, "patch_size": 14, "pretrained": "", "sep_image_video_pos_embed": true, "tubelet_size": 1, "use_checkpoint": true, "use_flash_attn": false, "use_fused_mlp": false, "use_fused_rmsnorm": false, "video_mask_ratio": 0.8, "video_mask_type": "random" } }, "num_frames": 4, "num_frames_test": 4, "num_workers": 6, "origin_num_frames": 4, "output_dir": null, "resume": false, "save_latest": false, "seed": 42, "size_t": 224, "text_enc": "bert_large", "torch_dtype": "float32", "transformers_version": "4.37.2", "use_bf16": false, "use_flash_sdp": false, "use_half_precision": false, "use_mem_efficient_sdp": false }