czczup commited on
Commit
db1d413
·
verified ·
1 Parent(s): ffcc6f0

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +10 -0
  2. modeling_internvl_chat.py +2 -1
README.md CHANGED
@@ -110,6 +110,7 @@ model = AutoModel.from_pretrained(
110
  path,
111
  torch_dtype=torch.bfloat16,
112
  low_cpu_mem_usage=True,
 
113
  trust_remote_code=True).eval().cuda()
114
  ```
115
 
@@ -124,6 +125,7 @@ model = AutoModel.from_pretrained(
124
  torch_dtype=torch.bfloat16,
125
  load_in_8bit=True,
126
  low_cpu_mem_usage=True,
 
127
  trust_remote_code=True).eval()
128
  ```
129
 
@@ -170,6 +172,7 @@ model = AutoModel.from_pretrained(
170
  path,
171
  torch_dtype=torch.bfloat16,
172
  low_cpu_mem_usage=True,
 
173
  trust_remote_code=True,
174
  device_map=device_map).eval()
175
  ```
@@ -187,6 +190,7 @@ model = AutoModel.from_pretrained(
187
  path,
188
  torch_dtype=torch.bfloat16,
189
  low_cpu_mem_usage=True,
 
190
  trust_remote_code=True).eval().cuda()
191
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
192
 
@@ -214,6 +218,7 @@ model = AutoModel.from_pretrained(
214
  path,
215
  torch_dtype=torch.bfloat16,
216
  low_cpu_mem_usage=True,
 
217
  trust_remote_code=True).eval().cuda()
218
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
219
 
@@ -240,6 +245,7 @@ model = AutoModel.from_pretrained(
240
  path,
241
  torch_dtype=torch.bfloat16,
242
  low_cpu_mem_usage=True,
 
243
  trust_remote_code=True).eval().cuda()
244
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
245
 
@@ -273,6 +279,7 @@ model = AutoModel.from_pretrained(
273
  path,
274
  torch_dtype=torch.bfloat16,
275
  low_cpu_mem_usage=True,
 
276
  trust_remote_code=True).eval().cuda()
277
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
278
 
@@ -311,6 +318,7 @@ model = AutoModel.from_pretrained(
311
  path,
312
  torch_dtype=torch.bfloat16,
313
  low_cpu_mem_usage=True,
 
314
  trust_remote_code=True).eval().cuda()
315
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
316
 
@@ -348,6 +356,7 @@ model = AutoModel.from_pretrained(
348
  path,
349
  torch_dtype=torch.bfloat16,
350
  low_cpu_mem_usage=True,
 
351
  trust_remote_code=True).eval().cuda()
352
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
353
 
@@ -418,6 +427,7 @@ model = AutoModel.from_pretrained(
418
  path,
419
  torch_dtype=torch.bfloat16,
420
  low_cpu_mem_usage=True,
 
421
  trust_remote_code=True).eval().cuda()
422
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
423
 
 
110
  path,
111
  torch_dtype=torch.bfloat16,
112
  low_cpu_mem_usage=True,
113
+ use_flash_attn=True,
114
  trust_remote_code=True).eval().cuda()
115
  ```
116
 
 
125
  torch_dtype=torch.bfloat16,
126
  load_in_8bit=True,
127
  low_cpu_mem_usage=True,
128
+ use_flash_attn=True,
129
  trust_remote_code=True).eval()
130
  ```
131
 
 
172
  path,
173
  torch_dtype=torch.bfloat16,
174
  low_cpu_mem_usage=True,
175
+ use_flash_attn=True,
176
  trust_remote_code=True,
177
  device_map=device_map).eval()
178
  ```
 
190
  path,
191
  torch_dtype=torch.bfloat16,
192
  low_cpu_mem_usage=True,
193
+ use_flash_attn=True,
194
  trust_remote_code=True).eval().cuda()
195
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
196
 
 
218
  path,
219
  torch_dtype=torch.bfloat16,
220
  low_cpu_mem_usage=True,
221
+ use_flash_attn=True,
222
  trust_remote_code=True).eval().cuda()
223
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
224
 
 
245
  path,
246
  torch_dtype=torch.bfloat16,
247
  low_cpu_mem_usage=True,
248
+ use_flash_attn=True,
249
  trust_remote_code=True).eval().cuda()
250
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
251
 
 
279
  path,
280
  torch_dtype=torch.bfloat16,
281
  low_cpu_mem_usage=True,
282
+ use_flash_attn=True,
283
  trust_remote_code=True).eval().cuda()
284
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
285
 
 
318
  path,
319
  torch_dtype=torch.bfloat16,
320
  low_cpu_mem_usage=True,
321
+ use_flash_attn=True,
322
  trust_remote_code=True).eval().cuda()
323
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
324
 
 
356
  path,
357
  torch_dtype=torch.bfloat16,
358
  low_cpu_mem_usage=True,
359
+ use_flash_attn=True,
360
  trust_remote_code=True).eval().cuda()
361
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
362
 
 
427
  path,
428
  torch_dtype=torch.bfloat16,
429
  low_cpu_mem_usage=True,
430
+ use_flash_attn=True,
431
  trust_remote_code=True).eval().cuda()
432
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
433
 
modeling_internvl_chat.py CHANGED
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
- from .modeling_intern_vit import InternVisionModel
21
 
22
  logger = logging.get_logger(__name__)
23
 
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
 
51
  config.vision_config.use_flash_attn = True if use_flash_attn else False
52
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
53
 
 
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
+ from .modeling_intern_vit import InternVisionModel, has_flash_attn
21
 
22
  logger = logging.get_logger(__name__)
23
 
 
48
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
49
  self.downsample_ratio = config.downsample_ratio
50
  self.ps_version = config.ps_version
51
+ use_flash_attn = use_flash_attn if has_flash_attn else False
52
  config.vision_config.use_flash_attn = True if use_flash_attn else False
53
  config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
54