maghrane commited on
Commit
ebf6553
·
verified ·
1 Parent(s): e513f49

Upload configuration_minicpm.py

Browse files
Files changed (1) hide show
  1. configuration_minicpm.py +113 -0
configuration_minicpm.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """ MiniCPM model configuration"""
21
+ import os
22
+ from typing import Union
23
+
24
+ from transformers.utils import logging
25
+ from transformers import LlamaConfig, PretrainedConfig
26
+ from transformers.models.idefics2.modeling_idefics2 import Idefics2VisionConfig
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+
31
+ class MiniCPMVSliceConfig(PretrainedConfig):
32
+ model_type = "minicpmv"
33
+
34
+ def __init__(
35
+ self,
36
+ patch_size=14,
37
+ max_slice_nums=9,
38
+ scale_resolution=448,
39
+ **kwargs,
40
+ ):
41
+ super().__init__(**kwargs)
42
+ self.patch_size = patch_size
43
+ self.max_slice_nums = max_slice_nums
44
+ self.scale_resolution = scale_resolution
45
+
46
+ @classmethod
47
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
48
+ cls._set_token_in_kwargs(kwargs)
49
+
50
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
51
+
52
+ if config_dict.get("model_type") == "minicpmv":
53
+ config_dict = config_dict["slice_config"]
54
+
55
+ if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
56
+ logger.warning(
57
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
58
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
59
+ )
60
+
61
+ return cls.from_dict(config_dict, **kwargs)
62
+
63
+
64
+
65
+ class MiniCPMVConfig(LlamaConfig):
66
+ model_type = "minicpmv"
67
+ keys_to_ignore_at_inference = ["past_key_values"]
68
+
69
+ default_vision_config = {
70
+ "hidden_size": 1152,
71
+ "image_size": 980,
72
+ "intermediate_size": 4304,
73
+ "model_type": "idefics2",
74
+ "num_attention_heads": 16,
75
+ "num_hidden_layers": 27,
76
+ "patch_size": 14,
77
+ }
78
+
79
+ def __init__(
80
+ self,
81
+ use_cache=True,
82
+ query_num=64,
83
+ image_size=448,
84
+ drop_vision_last_layer=True,
85
+ batch_vision_input=True,
86
+ slice_config=None,
87
+ vision_config=None,
88
+ **kwargs,
89
+ ):
90
+ self.use_cache = use_cache
91
+ self.query_num = query_num
92
+ self.image_size = image_size
93
+ self.drop_vision_last_layer = drop_vision_last_layer
94
+ self.batch_vision_input = batch_vision_input
95
+
96
+ if slice_config is None:
97
+ self.slice_config = MiniCPMVSliceConfig(max_slice_nums=1)
98
+ else:
99
+ self.slice_config = MiniCPMVSliceConfig(**slice_config)
100
+ self.slice_mode = True
101
+
102
+ # same as HuggingFaceM4/siglip-so400m-14-980-flash-attn2-navit
103
+ if vision_config is None:
104
+ self.vision_config = Idefics2VisionConfig(**self.default_vision_config)
105
+ logger.info("vision_config is None, using default vision config")
106
+ elif isinstance(vision_config, dict):
107
+ self.vision_config = Idefics2VisionConfig(**vision_config)
108
+ elif isinstance(vision_config, Idefics2VisionConfig):
109
+ self.vision_config = vision_config
110
+
111
+ self.patch_size = self.vision_config.patch_size
112
+
113
+ super().__init__(**kwargs)