czczup commited on
Commit
acc9f3d
1 Parent(s): 3ecd833

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +10 -0
  2. modeling_internvl_chat.py +2 -1
README.md CHANGED
@@ -89,6 +89,7 @@ model = AutoModel.from_pretrained(
89
  path,
90
  torch_dtype=torch.bfloat16,
91
  low_cpu_mem_usage=True,
 
92
  trust_remote_code=True).eval().cuda()
93
  ```
94
 
@@ -103,6 +104,7 @@ model = AutoModel.from_pretrained(
103
  torch_dtype=torch.bfloat16,
104
  load_in_8bit=True,
105
  low_cpu_mem_usage=True,
 
106
  trust_remote_code=True).eval()
107
  ```
108
 
@@ -149,6 +151,7 @@ model = AutoModel.from_pretrained(
149
  path,
150
  torch_dtype=torch.bfloat16,
151
  low_cpu_mem_usage=True,
 
152
  trust_remote_code=True,
153
  device_map=device_map).eval()
154
  ```
@@ -166,6 +169,7 @@ model = AutoModel.from_pretrained(
166
  path,
167
  torch_dtype=torch.bfloat16,
168
  low_cpu_mem_usage=True,
 
169
  trust_remote_code=True).eval().cuda()
170
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
171
 
@@ -193,6 +197,7 @@ model = AutoModel.from_pretrained(
193
  path,
194
  torch_dtype=torch.bfloat16,
195
  low_cpu_mem_usage=True,
 
196
  trust_remote_code=True).eval().cuda()
197
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
198
 
@@ -219,6 +224,7 @@ model = AutoModel.from_pretrained(
219
  path,
220
  torch_dtype=torch.bfloat16,
221
  low_cpu_mem_usage=True,
 
222
  trust_remote_code=True).eval().cuda()
223
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
224
 
@@ -252,6 +258,7 @@ model = AutoModel.from_pretrained(
252
  path,
253
  torch_dtype=torch.bfloat16,
254
  low_cpu_mem_usage=True,
 
255
  trust_remote_code=True).eval().cuda()
256
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
257
 
@@ -290,6 +297,7 @@ model = AutoModel.from_pretrained(
290
  path,
291
  torch_dtype=torch.bfloat16,
292
  low_cpu_mem_usage=True,
 
293
  trust_remote_code=True).eval().cuda()
294
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
295
 
@@ -327,6 +335,7 @@ model = AutoModel.from_pretrained(
327
  path,
328
  torch_dtype=torch.bfloat16,
329
  low_cpu_mem_usage=True,
 
330
  trust_remote_code=True).eval().cuda()
331
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
332
 
@@ -397,6 +406,7 @@ model = AutoModel.from_pretrained(
397
  path,
398
  torch_dtype=torch.bfloat16,
399
  low_cpu_mem_usage=True,
 
400
  trust_remote_code=True).eval().cuda()
401
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
402
 
 
89
  path,
90
  torch_dtype=torch.bfloat16,
91
  low_cpu_mem_usage=True,
92
+ use_flash_attn=True,
93
  trust_remote_code=True).eval().cuda()
94
  ```
95
 
 
104
  torch_dtype=torch.bfloat16,
105
  load_in_8bit=True,
106
  low_cpu_mem_usage=True,
107
+ use_flash_attn=True,
108
  trust_remote_code=True).eval()
109
  ```
110
 
 
151
  path,
152
  torch_dtype=torch.bfloat16,
153
  low_cpu_mem_usage=True,
154
+ use_flash_attn=True,
155
  trust_remote_code=True,
156
  device_map=device_map).eval()
157
  ```
 
169
  path,
170
  torch_dtype=torch.bfloat16,
171
  low_cpu_mem_usage=True,
172
+ use_flash_attn=True,
173
  trust_remote_code=True).eval().cuda()
174
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
175
 
 
197
  path,
198
  torch_dtype=torch.bfloat16,
199
  low_cpu_mem_usage=True,
200
+ use_flash_attn=True,
201
  trust_remote_code=True).eval().cuda()
202
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
203
 
 
224
  path,
225
  torch_dtype=torch.bfloat16,
226
  low_cpu_mem_usage=True,
227
+ use_flash_attn=True,
228
  trust_remote_code=True).eval().cuda()
229
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
230
 
 
258
  path,
259
  torch_dtype=torch.bfloat16,
260
  low_cpu_mem_usage=True,
261
+ use_flash_attn=True,
262
  trust_remote_code=True).eval().cuda()
263
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
264
 
 
297
  path,
298
  torch_dtype=torch.bfloat16,
299
  low_cpu_mem_usage=True,
300
+ use_flash_attn=True,
301
  trust_remote_code=True).eval().cuda()
302
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
303
 
 
335
  path,
336
  torch_dtype=torch.bfloat16,
337
  low_cpu_mem_usage=True,
338
+ use_flash_attn=True,
339
  trust_remote_code=True).eval().cuda()
340
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
341
 
 
406
  path,
407
  torch_dtype=torch.bfloat16,
408
  low_cpu_mem_usage=True,
409
+ use_flash_attn=True,
410
  trust_remote_code=True).eval().cuda()
411
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
412
 
modeling_internvl_chat.py CHANGED
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
- from .modeling_intern_vit import InternVisionModel
21
 
22
  logger = logging.get_logger(__name__)
23
 
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
 
51
  config.vision_config.use_flash_attn = True if use_flash_attn else False
52
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
53
 
 
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
+ from .modeling_intern_vit import InternVisionModel, has_flash_attn
21
 
22
  logger = logging.get_logger(__name__)
23
 
 
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
51
+ use_flash_attn = use_flash_attn if has_flash_attn else False
52
  config.vision_config.use_flash_attn = True if use_flash_attn else False
53
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
54