InternVideo2-Stage2_6B / config.json
shepnerd's picture
Upload InternVideo2_Stage2
536db04 verified
raw
history blame
11.8 kB
{
"TextEncoders": {
"bert": {
"config": "configs/config_bert.json",
"d_model": 768,
"fusion_layer": 9,
"name": "bert_base",
"pretrained": "bert-base-uncased"
},
"bert_large": {
"config": "configs/config_bert_large.json",
"d_model": 1024,
"fusion_layer": 19,
"name": "bert_large",
"pretrained": "bert-large-uncased"
},
"med_bert": {
"config": "configs/med_config.json",
"d_model": 768,
"name": "med_bert_base",
"pretrained": "bert-base-uncased"
},
"med_bert_large": {
"config": "configs/med_large_config.json",
"d_model": 768,
"name": "med_bert_large",
"pretrained": "bert-base-uncased"
}
},
"VisionEncoders": {},
"architectures": [
"InternVideo2_Stage2"
],
"auto_map": {
"AutoConfig": "modeling_internvideo2.InternVideo2_Stage2_Config",
"AutoModel": "modeling_internvideo2.InternVideo2_Stage2"
},
"auto_resume": true,
"available_corpus": {
"anet_ret_train": {
"anno_path": "your_path",
"data_root": "",
"is_paragraph_retrieval": true,
"max_txt_l": 150,
"media_type": "video"
},
"anet_ret_val": {
"anno_path": "your_path",
"data_root": "",
"is_paragraph_retrieval": true,
"max_txt_l": 150,
"media_type": "video"
},
"audiocaps_ret_test": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"audiocaps_ret_train": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"cc12m": {
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
"cc3m": {
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
"cc3m_debug": {
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
"charades_mc_test": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"clothov1_ret_test": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"clothov1_ret_train": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"clothov2_ret_test": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"clothov2_ret_train": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"coco": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
"data_25m": [
{
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
}
],
"debug": [
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
}
],
"didemo_ret_test": {
"anno_path": "your_path",
"data_root": "",
"is_paragraph_retrieval": true,
"max_txt_l": 64,
"media_type": "video",
"trimmed30": true
},
"didemo_ret_train": {
"anno_path": "your_path",
"data_root": "",
"is_paragraph_retrieval": true,
"max_txt_l": 64,
"media_type": "video",
"trimmed30": true
},
"didemo_ret_val": {
"anno_path": "your_path",
"data_root": "",
"is_paragraph_retrieval": true,
"max_txt_l": 64,
"media_type": "video",
"trimmed30": true
},
"hmdb51_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true,
"media_type": "video"
},
"internvid_v1": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "video"
},
"internvid_v2_avs_private": {
"anno_path": "your_path",
"caption_augmentation": {
"caption_sample_type": "avs_all"
},
"data_root": "",
"jump_filter": true,
"media_type": "audio_video",
"read_audio_from_video": true,
"read_clip_from_video": false,
"zero_audio_padding_for_video": true
},
"k400_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true
},
"k600_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true,
"media_type": "video"
},
"k700_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true,
"media_type": "video"
},
"laion_2b": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
"laion_coco": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
"laion_pop": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
"lsmdc_ret_test_1000": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"lsmdc_ret_train": {
"anno_path": "your_path",
"data_root": "",
"max_txt_l": 96,
"media_type": "video"
},
"lsmdc_ret_val": {
"anno_path": "your_path",
"data_root": "",
"max_txt_l": 96,
"media_type": "video"
},
"mit_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true,
"media_type": "video"
},
"msrvtt_1k_test": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"msrvtt_ret_test1k": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"msrvtt_ret_train9k": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"msvd_ret_test": {
"anno_path": "your_path",
"data_root": "",
"max_txt_l": 64,
"media_type": "video"
},
"msvd_ret_train": {
"anno_path": "your_path",
"data_root": "",
"has_multi_txt_gt": true,
"max_txt_l": 64,
"media_type": "video"
},
"msvd_ret_val": {
"anno_path": "your_path",
"data_root": "",
"max_txt_l": 64,
"media_type": "video"
},
"pretrain_example_data_1B": [
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
}
],
"pretrain_example_data_6B": [
{
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
{
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
{
"anno_path": "your_path",
"caption_augmentation": {
"caption_sample_type": "avs_all"
},
"data_root": "",
"jump_filter": true,
"media_type": "audio_video",
"read_audio_from_video": true,
"read_clip_from_video": false,
"zero_audio_padding_for_video": true
}
],
"sbu": {
"anno_path": "your_path",
"data_root": "",
"media_type": "image"
},
"ssv2_mc_val": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"ucf101_act_val": {
"anno_path": "your_path",
"data_root": "",
"is_act_rec": true,
"media_type": "video"
},
"vatex_ch_ret_val": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"vatex_en_ret_train": {
"anno_path": "your_path",
"data_root": "",
"has_multi_txt_gt": true,
"media_type": "video"
},
"vatex_en_ret_val": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"vg": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "image"
},
"wavcaps_400k": {
"anno_path": "your_path",
"data_root": "",
"media_type": "audio"
},
"webvid": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"webvid_10m": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"webvid_debug": {
"anno_path": "your_path",
"data_root": "",
"media_type": "video"
},
"webvid_fuse_10m": {
"anno_path": "your_path",
"data_root": "",
"jump_filter": true,
"media_type": "video"
}
},
"batch_size": 8,
"batch_size_test": 4,
"compile_model": false,
"debug": false,
"deep_fusion": false,
"deepspeed": {
"enable": true,
"stage": 1
},
"device": "cuda",
"dist_url": "env://",
"evaluate": true,
"evaluation": {
"eval_frame_ensemble": "concat",
"eval_offload": true,
"eval_x_only": false,
"k_test": 128
},
"gradient_checkpointing": true,
"inputs": {
"batch_size": {
"image": 8,
"video": 8
},
"batch_size_test": {
"image": 4,
"video": 4
},
"image_res": 224,
"max_txt_l": {
"image": 40,
"video": 40
},
"video_input": {
"num_frames": 4,
"num_frames_test": 4,
"random_aug": false,
"sample_type": "rand",
"sample_type_test": "middle"
}
},
"jump_evaluate": false,
"log_freq": 100,
"max_txt_l": 40,
"mode": "pt",
"model": {
"embed_dim": 512,
"find_unused_parameters": false,
"model_cls": "InternVideo2_Stage2",
"multimodal": {
"enable": true
},
"temp": 0.07,
"text_encoder": {
"config": "configs/config_bert_large.json",
"d_model": 1024,
"fusion_layer": 19,
"name": "bert_large",
"pretrained": "bert-large-uncased"
},
"vision_encoder": {
"checkpoint_num": 40,
"clip_embed_dim": 768,
"clip_input_resolution": 224,
"clip_norm_type": "l2",
"clip_return_layer": 6,
"clip_student_return_interval": 1,
"clip_teacher": null,
"clip_teacher_embed_dim": 3200,
"clip_teacher_final_dim": 768,
"clip_teacher_return_interval": 1,
"d_model": 1408,
"image_mask_ratio": 0.5,
"image_mask_type": "random",
"img_size": 224,
"keep_temporal": false,
"name": "pretrain_internvideo2_6b_patch14_224",
"num_frames": 4,
"only_mask": true,
"patch_size": 14,
"pretrained": "",
"sep_image_video_pos_embed": true,
"tubelet_size": 1,
"use_checkpoint": true,
"use_flash_attn": false,
"use_fused_mlp": false,
"use_fused_rmsnorm": false,
"video_mask_ratio": 0.8,
"video_mask_type": "random"
}
},
"num_frames": 4,
"num_frames_test": 4,
"num_workers": 6,
"origin_num_frames": 4,
"output_dir": null,
"resume": false,
"save_latest": false,
"seed": 42,
"size_t": 224,
"text_enc": "bert_large",
"torch_dtype": "float32",
"transformers_version": "4.37.2",
"use_bf16": false,
"use_flash_sdp": false,
"use_half_precision": false,
"use_mem_efficient_sdp": false
}