Upload folder using huggingface_hub
Browse files- README.md +10 -0
- modeling_internvl_chat.py +2 -1
README.md
CHANGED
@@ -110,6 +110,7 @@ model = AutoModel.from_pretrained(
|
|
110 |
path,
|
111 |
torch_dtype=torch.bfloat16,
|
112 |
low_cpu_mem_usage=True,
|
|
|
113 |
trust_remote_code=True).eval().cuda()
|
114 |
```
|
115 |
|
@@ -124,6 +125,7 @@ model = AutoModel.from_pretrained(
|
|
124 |
torch_dtype=torch.bfloat16,
|
125 |
load_in_8bit=True,
|
126 |
low_cpu_mem_usage=True,
|
|
|
127 |
trust_remote_code=True).eval()
|
128 |
```
|
129 |
|
@@ -170,6 +172,7 @@ model = AutoModel.from_pretrained(
|
|
170 |
path,
|
171 |
torch_dtype=torch.bfloat16,
|
172 |
low_cpu_mem_usage=True,
|
|
|
173 |
trust_remote_code=True,
|
174 |
device_map=device_map).eval()
|
175 |
```
|
@@ -187,6 +190,7 @@ model = AutoModel.from_pretrained(
|
|
187 |
path,
|
188 |
torch_dtype=torch.bfloat16,
|
189 |
low_cpu_mem_usage=True,
|
|
|
190 |
trust_remote_code=True).eval().cuda()
|
191 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
192 |
|
@@ -214,6 +218,7 @@ model = AutoModel.from_pretrained(
|
|
214 |
path,
|
215 |
torch_dtype=torch.bfloat16,
|
216 |
low_cpu_mem_usage=True,
|
|
|
217 |
trust_remote_code=True).eval().cuda()
|
218 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
219 |
|
@@ -240,6 +245,7 @@ model = AutoModel.from_pretrained(
|
|
240 |
path,
|
241 |
torch_dtype=torch.bfloat16,
|
242 |
low_cpu_mem_usage=True,
|
|
|
243 |
trust_remote_code=True).eval().cuda()
|
244 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
245 |
|
@@ -273,6 +279,7 @@ model = AutoModel.from_pretrained(
|
|
273 |
path,
|
274 |
torch_dtype=torch.bfloat16,
|
275 |
low_cpu_mem_usage=True,
|
|
|
276 |
trust_remote_code=True).eval().cuda()
|
277 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
278 |
|
@@ -311,6 +318,7 @@ model = AutoModel.from_pretrained(
|
|
311 |
path,
|
312 |
torch_dtype=torch.bfloat16,
|
313 |
low_cpu_mem_usage=True,
|
|
|
314 |
trust_remote_code=True).eval().cuda()
|
315 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
316 |
|
@@ -348,6 +356,7 @@ model = AutoModel.from_pretrained(
|
|
348 |
path,
|
349 |
torch_dtype=torch.bfloat16,
|
350 |
low_cpu_mem_usage=True,
|
|
|
351 |
trust_remote_code=True).eval().cuda()
|
352 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
353 |
|
@@ -418,6 +427,7 @@ model = AutoModel.from_pretrained(
|
|
418 |
path,
|
419 |
torch_dtype=torch.bfloat16,
|
420 |
low_cpu_mem_usage=True,
|
|
|
421 |
trust_remote_code=True).eval().cuda()
|
422 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
423 |
|
|
|
110 |
path,
|
111 |
torch_dtype=torch.bfloat16,
|
112 |
low_cpu_mem_usage=True,
|
113 |
+
use_flash_attn=True,
|
114 |
trust_remote_code=True).eval().cuda()
|
115 |
```
|
116 |
|
|
|
125 |
torch_dtype=torch.bfloat16,
|
126 |
load_in_8bit=True,
|
127 |
low_cpu_mem_usage=True,
|
128 |
+
use_flash_attn=True,
|
129 |
trust_remote_code=True).eval()
|
130 |
```
|
131 |
|
|
|
172 |
path,
|
173 |
torch_dtype=torch.bfloat16,
|
174 |
low_cpu_mem_usage=True,
|
175 |
+
use_flash_attn=True,
|
176 |
trust_remote_code=True,
|
177 |
device_map=device_map).eval()
|
178 |
```
|
|
|
190 |
path,
|
191 |
torch_dtype=torch.bfloat16,
|
192 |
low_cpu_mem_usage=True,
|
193 |
+
use_flash_attn=True,
|
194 |
trust_remote_code=True).eval().cuda()
|
195 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
196 |
|
|
|
218 |
path,
|
219 |
torch_dtype=torch.bfloat16,
|
220 |
low_cpu_mem_usage=True,
|
221 |
+
use_flash_attn=True,
|
222 |
trust_remote_code=True).eval().cuda()
|
223 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
224 |
|
|
|
245 |
path,
|
246 |
torch_dtype=torch.bfloat16,
|
247 |
low_cpu_mem_usage=True,
|
248 |
+
use_flash_attn=True,
|
249 |
trust_remote_code=True).eval().cuda()
|
250 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
251 |
|
|
|
279 |
path,
|
280 |
torch_dtype=torch.bfloat16,
|
281 |
low_cpu_mem_usage=True,
|
282 |
+
use_flash_attn=True,
|
283 |
trust_remote_code=True).eval().cuda()
|
284 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
285 |
|
|
|
318 |
path,
|
319 |
torch_dtype=torch.bfloat16,
|
320 |
low_cpu_mem_usage=True,
|
321 |
+
use_flash_attn=True,
|
322 |
trust_remote_code=True).eval().cuda()
|
323 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
324 |
|
|
|
356 |
path,
|
357 |
torch_dtype=torch.bfloat16,
|
358 |
low_cpu_mem_usage=True,
|
359 |
+
use_flash_attn=True,
|
360 |
trust_remote_code=True).eval().cuda()
|
361 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
362 |
|
|
|
427 |
path,
|
428 |
torch_dtype=torch.bfloat16,
|
429 |
low_cpu_mem_usage=True,
|
430 |
+
use_flash_attn=True,
|
431 |
trust_remote_code=True).eval().cuda()
|
432 |
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
|
433 |
|
modeling_internvl_chat.py
CHANGED
@@ -17,7 +17,7 @@ from transformers.utils import ModelOutput, logging
|
|
17 |
|
18 |
from .configuration_internvl_chat import InternVLChatConfig
|
19 |
from .conversation import get_conv_template
|
20 |
-
from .modeling_intern_vit import InternVisionModel
|
21 |
|
22 |
logger = logging.get_logger(__name__)
|
23 |
|
@@ -48,6 +48,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
49 |
self.downsample_ratio = config.downsample_ratio
|
50 |
self.ps_version = config.ps_version
|
|
|
51 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
52 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
53 |
|
|
|
17 |
|
18 |
from .configuration_internvl_chat import InternVLChatConfig
|
19 |
from .conversation import get_conv_template
|
20 |
+
from .modeling_intern_vit import InternVisionModel, has_flash_attn
|
21 |
|
22 |
logger = logging.get_logger(__name__)
|
23 |
|
|
|
48 |
self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
|
49 |
self.downsample_ratio = config.downsample_ratio
|
50 |
self.ps_version = config.ps_version
|
51 |
+
use_flash_attn = use_flash_attn if has_flash_attn else False
|
52 |
config.vision_config.use_flash_attn = True if use_flash_attn else False
|
53 |
config.llm_config._attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
|
54 |
|