Visual Question Answering
English
wangyueqian commited on
Commit
8a97bcf
·
verified ·
1 Parent(s): 04db715

upload ckpt

Browse files
Files changed (2) hide show
  1. config.json +48 -0
  2. hawkeye.pth +3 -0
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "model_cls": "HawkEye_it",
4
+ "vit_blip_model_path": "model/VideoChat2/umt_l16_qformer.pth",
5
+ "llama_model_path": "model/vicuna-7b",
6
+ "videochat2_model_path": "model/VideoChat2/videochat2_7b_stage2.pth",
7
+ "freeze_vit": true,
8
+ "freeze_qformer": false,
9
+ "max_txt_len": 512,
10
+ "low_resource": false,
11
+ "vision_encoder": {
12
+ "name": "vit_l14",
13
+ "img_size": 224,
14
+ "patch_size": 16,
15
+ "d_model": 1024,
16
+ "encoder_embed_dim": 1024,
17
+ "encoder_depth": 24,
18
+ "encoder_num_heads": 16,
19
+ "drop_path_rate": 0.0,
20
+ "num_frames": 32,
21
+ "tubelet_size": 1,
22
+ "use_checkpoint": false,
23
+ "checkpoint_num": 0,
24
+ "pretrained": "",
25
+ "return_index": -2,
26
+ "vit_add_ln": true,
27
+ "ckpt_num_frame": 4
28
+ },
29
+ "num_query_token": 32,
30
+ "qformer_hidden_dropout_prob": 0.1,
31
+ "qformer_attention_probs_dropout_prob": 0.1,
32
+ "qformer_drop_path_rate": 0.2,
33
+ "extra_num_query_token": 64,
34
+ "qformer_text_input": true,
35
+ "system": "",
36
+ "start_token": "<Video>",
37
+ "end_token": "</Video>",
38
+ "img_start_token": "<Image>",
39
+ "img_end_token": "</Image>",
40
+ "random_shuffle": true,
41
+ "use_lora": true,
42
+ "lora_r": 16,
43
+ "lora_alpha": 32,
44
+ "lora_dropout": 0.1
45
+ },
46
+ "device": "cuda"
47
+ }
48
+
hawkeye.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5da3b6fd40b214c0572daac77a036e012b5abea3b76a98f0666060628bf382a
3
+ size 775258023