ToviTu commited on
Commit
12a34cb
·
verified ·
1 Parent(s): c37615a

Upload TALlavaGemmaForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/ib-scratch/chenguang03/vision_share/models/llava-gemma2",
3
  "architectures": [
4
  "TALlavaGemmaForCausalLM"
5
  ],
@@ -13,27 +13,22 @@
13
  107
14
  ],
15
  "final_logit_softcapping": 30.0,
16
- "freeze_mm_mlp_adapter": false,
17
  "head_dim": 256,
18
  "hidden_act": "gelu_pytorch_tanh",
19
  "hidden_activation": "gelu_pytorch_tanh",
20
  "hidden_size": 2304,
21
- "image_aspect_ratio": "pad",
22
  "initializer_range": 0.02,
23
  "intermediate_size": 9216,
24
  "max_position_embeddings": 8192,
25
  "mm_hidden_size": 1024,
26
  "mm_patch_merge_type": "flat",
27
- "mm_projector_lr": null,
28
  "mm_projector_type": "linear",
29
- "mm_use_im_patch_token": false,
30
- "mm_use_im_start_end": false,
31
  "mm_vision_select_feature": "patch",
32
  "mm_vision_select_layer": -2,
33
  "mm_vision_tower": "openai/clip-vit-large-patch14-336",
34
  "model_type": "tallava_gemma",
35
  "num_attention_heads": 8,
36
- "num_hidden_layers": 30,
37
  "num_key_value_heads": 4,
38
  "num_learnable_tokens": 32,
39
  "pad_token_id": 0,
@@ -41,11 +36,8 @@
41
  "rms_norm_eps": 1e-06,
42
  "rope_theta": 10000.0,
43
  "sliding_window": 4096,
44
- "tokenizer_model_max_length": 2048,
45
- "tokenizer_padding_side": "right",
46
  "torch_dtype": "bfloat16",
47
  "transformers_version": "4.46.2",
48
- "tune_mm_mlp_adapter": false,
49
  "use_cache": true,
50
  "use_mm_proj": true,
51
  "vocab_size": 256000
 
1
  {
2
+ "_name_or_path": "google/gemma-2-2b-it",
3
  "architectures": [
4
  "TALlavaGemmaForCausalLM"
5
  ],
 
13
  107
14
  ],
15
  "final_logit_softcapping": 30.0,
 
16
  "head_dim": 256,
17
  "hidden_act": "gelu_pytorch_tanh",
18
  "hidden_activation": "gelu_pytorch_tanh",
19
  "hidden_size": 2304,
 
20
  "initializer_range": 0.02,
21
  "intermediate_size": 9216,
22
  "max_position_embeddings": 8192,
23
  "mm_hidden_size": 1024,
24
  "mm_patch_merge_type": "flat",
 
25
  "mm_projector_type": "linear",
 
 
26
  "mm_vision_select_feature": "patch",
27
  "mm_vision_select_layer": -2,
28
  "mm_vision_tower": "openai/clip-vit-large-patch14-336",
29
  "model_type": "tallava_gemma",
30
  "num_attention_heads": 8,
31
+ "num_hidden_layers": 28,
32
  "num_key_value_heads": 4,
33
  "num_learnable_tokens": 32,
34
  "pad_token_id": 0,
 
36
  "rms_norm_eps": 1e-06,
37
  "rope_theta": 10000.0,
38
  "sliding_window": 4096,
 
 
39
  "torch_dtype": "bfloat16",
40
  "transformers_version": "4.46.2",
 
41
  "use_cache": true,
42
  "use_mm_proj": true,
43
  "vocab_size": 256000
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1f842c519e70cc2856a88cb106af687d025689afd9bcf7bf0600bc86b4b6dc
3
  size 4988025760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532d792c9178805064170a3ec485b7dedbfccc6fd297b92c31a6091b6c7e41bf
3
  size 4988025760
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d84b6b81da0513d15f8fef626032bfd684eff1a1e798e4cae5e228f2d03e6dc
3
- size 1475572040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4691ff2bc105bdf495169797ba2669ed3705915335fac43ca95c7c9b038dc1fe
3
+ size 1164105176
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 6463497216
4
  },
5
  "weight_map": {
6
  "model.bottle_neck.text_xattn.input_layernorm.weight": "model-00002-of-00002.safetensors",
@@ -235,28 +235,6 @@
235
  "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
  "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
237
  "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
238
- "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
239
- "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
240
- "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
241
- "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
242
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
243
- "model.layers.26.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
244
- "model.layers.26.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
245
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
248
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
249
- "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
250
- "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
251
- "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
252
- "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
253
- "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
254
- "model.layers.27.post_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
255
- "model.layers.27.pre_feedforward_layernorm.weight": "model-00002-of-00002.safetensors",
256
- "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
- "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
- "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
259
- "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
260
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
261
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
262
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 6152033280
4
  },
5
  "weight_map": {
6
  "model.bottle_neck.text_xattn.input_layernorm.weight": "model-00002-of-00002.safetensors",
 
235
  "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
  "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
237
  "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
239
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
240
  "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",