from pathlib import Path from typing import Tuple import auto_gptq import torch from auto_gptq.modeling import BaseGPTQForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer class QwenVLChat: def __init__(self, device: str = "cuda:0", quantized: bool = False) -> None: if quantized: self.model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen-VL-Chat-Int4", device_map=device, trust_remote_code=True ).eval() self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat-Int4", trust_remote_code=True) else: self.model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen-VL-Chat", device_map=device, trust_remote_code=True, fp16=True ).eval() self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL-Chat", trust_remote_code=True) def __call__(self, prompt: str, image: str) -> Tuple[str, str]: query = self.tokenizer.from_list_format([{"image": image}, {"text": prompt}]) response, history = self.model.chat(self.tokenizer, query=query, history=[]) return response, history class InternLMXComposer2QForCausalLM(BaseGPTQForCausalLM): layers_block_name = "model.layers" outside_layer_modules = [ "vit", "vision_proj", "model.tok_embeddings", "model.norm", "output", ] inside_layer_modules = [ ["attention.wqkv.linear"], ["attention.wo.linear"], ["feed_forward.w1.linear", "feed_forward.w3.linear"], ["feed_forward.w2.linear"], ] class InternLMXComposer2: def __init__(self, device: str = "cuda:0", quantized: bool = True): if quantized: auto_gptq.modeling._base.SUPPORTED_MODELS = ["internlm"] self.model = InternLMXComposer2QForCausalLM.from_quantized( "internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True, device=device ).eval() self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b-4bit", trust_remote_code=True) else: # Setting fp16=True does not work. See https://huggingface.co/internlm/internlm-xcomposer2-vl-7b/discussions/1. self.model = ( AutoModelForCausalLM.from_pretrained( "internlm/internlm-xcomposer2-vl-7b", device_map=device, trust_remote_code=True ) .eval() .to(torch.float16) ) self.tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-xcomposer2-vl-7b", trust_remote_code=True) def __call__(self, prompt: str, image: str): if not prompt.startswith(""): prompt = "" + prompt with torch.cuda.amp.autocast(), torch.no_grad(): response, history = self.model.chat(self.tokenizer, query=prompt, image=image, history=[], do_sample=False) return response, history if __name__ == "__main__": image_folder = "demo/" wildcard_list = ["*.jpg", "*.png"] image_list = [] for wildcard in wildcard_list: image_list.extend([str(image_path) for image_path in Path(image_folder).glob(wildcard)]) qwen_vl_chat = QwenVLChat(device="cuda:0", quantized=True) qwen_vl_prompt = "Please describe this image in detail." for image in image_list: response, _ = qwen_vl_chat(qwen_vl_prompt, image) print(image, response) internlm2_vl = InternLMXComposer2(device="cuda:0", quantized=False) internlm2_vl_prompt = "Please describe this image in detail." for image in image_list: response, _ = internlm2_vl(internlm2_vl_prompt, image) print(image, response)