aho-tai commited on
Commit
fba310b
·
verified ·
1 Parent(s): c15c861

Upload VisionPixtralEncoderDecoder

Browse files
Files changed (3) hide show
  1. config.json +2 -1
  2. generation_config.json +1 -2
  3. modeling.py +1 -1
config.json CHANGED
@@ -3,6 +3,7 @@
3
  "VisionPixtralEncoderDecoder"
4
  ],
5
  "auto_map": {
 
6
  "AutoModel": "modeling.VisionPixtralEncoderDecoder"
7
  },
8
  "decoder": {
@@ -36,10 +37,10 @@
36
  "architectures": [
37
  "PixtralVisionModelBatch"
38
  ],
 
39
  "auto_map": {
40
  "AutoModel": "modeling.PixtralVisionModelBatch"
41
  },
42
- "attention_dropout": 0.0,
43
  "head_dim": 64,
44
  "hidden_act": "silu",
45
  "hidden_size": 1024,
 
3
  "VisionPixtralEncoderDecoder"
4
  ],
5
  "auto_map": {
6
+ "AutoConfig": "configuration.VisionPixtralEncoderDecoderConfig",
7
  "AutoModel": "modeling.VisionPixtralEncoderDecoder"
8
  },
9
  "decoder": {
 
37
  "architectures": [
38
  "PixtralVisionModelBatch"
39
  ],
40
+ "attention_dropout": 0.0,
41
  "auto_map": {
42
  "AutoModel": "modeling.PixtralVisionModelBatch"
43
  },
 
44
  "head_dim": 64,
45
  "hidden_act": "silu",
46
  "hidden_size": 1024,
generation_config.json CHANGED
@@ -4,6 +4,5 @@
4
  "decoder_start_token_id": 2,
5
  "eos_token_id": 2,
6
  "pad_token_id": 1,
7
- "transformers_version": "4.51.3",
8
- "use_cache": true
9
  }
 
4
  "decoder_start_token_id": 2,
5
  "eos_token_id": 2,
6
  "pad_token_id": 1,
7
+ "transformers_version": "4.51.3"
 
8
  }
modeling.py CHANGED
@@ -13,7 +13,7 @@ from transformers.models.pixtral.modeling_pixtral import apply_rotary_pos_emb, P
13
  from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask
14
  from transformers.modeling_outputs import BaseModelOutput
15
 
16
- from pixtral_encoder_decoder.config import PixtralVisionModelBatchConfig, VisionPixtralEncoderDecoderConfig
17
 
18
 
19
  def position_ids_in_meshgrid_batch(patch_embeds, max_width):
 
13
  from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask
14
  from transformers.modeling_outputs import BaseModelOutput
15
 
16
+ from pixtral_encoder_decoder.configuration import PixtralVisionModelBatchConfig, VisionPixtralEncoderDecoderConfig
17
 
18
 
19
  def position_ids_in_meshgrid_batch(patch_embeds, max_width):