AIDC-AI
/

Ovis1.6-Gemma2-9B-GPTQ-Int4

@@ -155,6 +155,184 @@ for i in range(len(batch_input_ids)):
 </details>
 ## Performance
 Here we report the performance of Ovis1.6-Gemma2-9B-GPTQ-Int4. The results are obtained with VLMEvalkit.
 ![image/png](https://cdn-uploads.huggingface.co/production/uploads/645cb4b4a03f3ebb0bde20e0/pSKiBhCy1S6Fb1QODY_ZZ.png)

 </details>
+## Quantize Your Own Ovis Model with AutoGPTQ
+We provide a demonstration code snippet for you to quantize your own fine-tuned Ovis model. Before running the code, you need to **follow the ABOVE installation steps** to obtain an environment for quantization.
+```python
+from typing import Dict, Sequence, Union, List
+import copy
+import logging
+from auto_gptq import BaseQuantizeConfig
+from auto_gptq.modeling import OvisGPTQForCausalLM
+import torch
+from torch.utils.data import Dataset, DataLoader
+from PIL import Image
+# Specify paths and hyperparameters for quantization
+model_path = "path/to/finetuned/model"
+quantize_save_path = "path/to/save/quantized/model"
+IGNORE_ID = -100
+device_idx = 2 # you customize
+torch.cuda.set_device(device_idx)
+quantize_config = BaseQuantizeConfig(
+    bits=4,          # 4 or 8
+    group_size=128,
+    damp_percent=0.1,
+    desc_act=False,  # set to False can significantly speed up inference but the perplexity may slightly bad
+    static_groups=False,
+    sym=True,
+    true_sequential=True,
+)
+# Load model
+model = OvisGPTQForCausalLM.from_pretrained(
+    model_path,
+    quantize_config,
+    torch_dtype=torch.bfloat16,
+    multimodal_max_length=8192,
+    trust_remote_code=True
+)
+print(f"Model Loaded!")
+# prepare calibration samples
+class CalibrationDataset(Dataset):
+    """
+    Dataset class for calibration. Initialize with the loaded Ovis model, and a sample list in the following format:
+    data_list = [
+        {
+            "image": "path/to/image/of/this/sample",
+            "conversations": [
+                {
+                    "from": "human",
+                    "value": "<image>\n[Your sample prompt]"
+                },
+                {
+                    "from": "gpt",
+                    "value": "[Anything]"
+                }
+            ]
+        },
+        ...
+    ]
+    """
+    def __init__(self, model, text_max_length, data_list: List[Dict]):
+        self.data = data_list
+        self.model = model
+        self.visual_tokenizer = model.get_visual_tokenizer()
+        self.text_max_length = text_max_length
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, i: int) -> Dict[str, torch.Tensor]:
+        sample = self.data[i]
+        conversations = copy.deepcopy(sample["conversations"])
+        images = [Image.open(sample['image'])]
+        max_partition = 9
+        prompt, input_ids, pixel_values, labels = self.model.preprocess_inputs(
+            conversations,
+            images,
+            max_partition=max_partition,
+            generation_preface=None,
+            return_labels=True,
+            propagate_exception=False
+        )
+        if pixel_values is None:
+            pixel_values, _ = self.visual_tokenizer.mock_input()
+        input_ids = input_ids[:self.text_max_length]
+        labels = labels[:self.text_max_length]
+        return dict(
+            pixel_values=pixel_values,
+            input_ids=input_ids,
+            labels=labels
+        )
+class DataCollatorForMultimodalDatasetGPTQ:
+    def __init__(self, text_tokenizer):
+        self.text_tokenizer = text_tokenizer
+    def __call__(self, instances: Sequence[Dict]) -> Dict[str, Union[torch.Tensor, List[torch.Tensor]]]:
+        pixel_values, input_ids, labels = tuple([instance[key] for instance in instances]
+                                                for key in ("pixel_values", "input_ids", "labels"))
+        input_ids = torch.nn.utils.rnn.pad_sequence(
+            input_ids,
+            batch_first=True,
+            padding_value=self.text_tokenizer.pad_token_id)
+        attention_mask = torch.ne(input_ids, self.text_tokenizer.pad_token_id)
+        labels = torch.nn.utils.rnn.pad_sequence(
+            labels,
+            batch_first=True,
+            padding_value=IGNORE_ID)
+        num_valid_label = torch.not_equal(labels, IGNORE_ID).sum().item()
+        if num_valid_label == 0:
+            logging.warning(
+                f'[DataCollatorForMultimodalDatasetGPTQ] All labels are ignored, may causing training instability\n{input_ids=}\n{attention_mask=}\n{labels=}')
+        return dict(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            labels=labels,
+            pixel_values=pixel_values
+        )
+class MyDataLoader(DataLoader):
+    def __len__(self):
+        return len(self.dataset) // self.batch_size # must set drop last=True
+# prepare your own calibration samples here
+data_list = [
+    {
+        "image": "path/to/image/of/this/sample",
+        "conversations": [
+            {
+                "from": "human",
+                "value": "<image>\n[Your sample prompt]"
+            },
+            {
+                "from": "gpt",
+                "value": "[Anything]"
+            }
+        ]
+    }
+]
+train_dataset = CalibrationDataset(model, text_max_length=832, data_list=data_list)
+print(f"Dataset Loaded!")
+print(f"Total length of the training set: {len(train_dataset)}")
+train_loader = MyDataLoader(
+    train_dataset,
+    collate_fn=DataCollatorForMultimodalDatasetGPTQ(model.get_text_tokenizer()),
+    shuffle=False,
+    batch_size=4,
+    drop_last=True,
+    pin_memory=True,
+    num_workers=8
+)
+print(f"Dataloader Loaded!")
+# start quantizing
+model.quantize(train_loader, cache_examples_on_gpu=False, samples_dtype=torch.bfloat16) # do not change samples_dtype
+print(f"Model Quantized! Now Saving...")
+model.save_quantized(quantize_save_path, use_safetensors=True)
+print(f"ALL Done!")
+```
 ## Performance
 Here we report the performance of Ovis1.6-Gemma2-9B-GPTQ-Int4. The results are obtained with VLMEvalkit.
 ![image/png](https://cdn-uploads.huggingface.co/production/uploads/645cb4b4a03f3ebb0bde20e0/pSKiBhCy1S6Fb1QODY_ZZ.png)