Update Quantization Method to BitsAndBytesConfig method for newer transformers version (locally, 4.53.1)

#2
Files changed (1) hide show
  1. README.md +6 -4
README.md CHANGED
@@ -195,14 +195,15 @@ model = AutoModel.from_pretrained(
195
 
196
  ```python
197
  import torch
198
- from transformers import AutoTokenizer, AutoModel
199
  path = "OpenGVLab/InternVL3-2B"
 
200
  model = AutoModel.from_pretrained(
201
  path,
202
  torch_dtype=torch.bfloat16,
203
- load_in_8bit=True,
204
  low_cpu_mem_usage=True,
205
  use_flash_attn=True,
 
206
  trust_remote_code=True).eval()
207
  ```
208
 
@@ -262,7 +263,7 @@ import torchvision.transforms as T
262
  from decord import VideoReader, cpu
263
  from PIL import Image
264
  from torchvision.transforms.functional import InterpolationMode
265
- from transformers import AutoModel, AutoTokenizer
266
 
267
  IMAGENET_MEAN = (0.485, 0.456, 0.406)
268
  IMAGENET_STD = (0.229, 0.224, 0.225)
@@ -368,10 +369,11 @@ def split_model(model_name):
368
  # If you set `load_in_8bit=False`, you will need at least three 80GB GPUs.
369
  path = 'OpenGVLab/InternVL3-2B'
370
  device_map = split_model('InternVL3-2B')
 
371
  model = AutoModel.from_pretrained(
372
  path,
373
  torch_dtype=torch.bfloat16,
374
- load_in_8bit=False,
375
  low_cpu_mem_usage=True,
376
  use_flash_attn=True,
377
  trust_remote_code=True,
 
195
 
196
  ```python
197
  import torch
198
+ from transformers import AutoTokenizer, AutoModel, BitsAndBytesConfig
199
  path = "OpenGVLab/InternVL3-2B"
200
+ quant_config = BitsAndBytesConfig(load_in_8bit=True)
201
  model = AutoModel.from_pretrained(
202
  path,
203
  torch_dtype=torch.bfloat16,
 
204
  low_cpu_mem_usage=True,
205
  use_flash_attn=True,
206
+ quantization_config = quant_config,
207
  trust_remote_code=True).eval()
208
  ```
209
 
 
263
  from decord import VideoReader, cpu
264
  from PIL import Image
265
  from torchvision.transforms.functional import InterpolationMode
266
+ from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
267
 
268
  IMAGENET_MEAN = (0.485, 0.456, 0.406)
269
  IMAGENET_STD = (0.229, 0.224, 0.225)
 
369
  # If you set `load_in_8bit=False`, you will need at least three 80GB GPUs.
370
  path = 'OpenGVLab/InternVL3-2B'
371
  device_map = split_model('InternVL3-2B')
372
+ quant_config = BitsAndBytesConfig(load_in_8bit=False)
373
  model = AutoModel.from_pretrained(
374
  path,
375
  torch_dtype=torch.bfloat16,
376
+ quantization_config = quant_config,
377
  low_cpu_mem_usage=True,
378
  use_flash_attn=True,
379
  trust_remote_code=True,