Need to Update image_processor_type to Qwen2VLImageProcessor

#1
by TahirC - opened

I checked the preprocessor_config.json
for Qwen/Qwen2.5-VL-3B-Instruct and unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit
and i think "image_processor_type": "Qwen2_5_VLImageProcessor", in unsloth is causing this issue
as qwen has "image_processor_type": "Qwen2VLImageProcessor", in their file


Code:
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
torch_dtype=torch.float16,
device_map="auto"
)

processor = AutoProcessor.from_pretrained("unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
min_pixels = 2562828,
max_pixels = 12802828
)


Exception:
Using a slow image processor as use_fast is unset and a slow processor was saved with this model. use_fast=True will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with use_fast=False.

ValueError Traceback (most recent call last)
Cell In[1], line 23
6 # default: Load the model on the available device(s)
7 # model = Qwen2VLForConditionalGeneration.from_pretrained(
8 # "Qwen/Qwen2-VL-2B-Instruct", torch_dtype=torch.float16, device_map="auto"
(...)
15
16 ## TEST
17 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
18 "unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
19 torch_dtype=torch.float16,
20 device_map="auto"
21 )
---> 23 processor = AutoProcessor.from_pretrained("unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit",
24 min_pixels = 2562828,
25 max_pixels = 12802828
26 )
27 ## END
28
29 # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
(...)
88 # print(output_text)
89 # torch.cuda.empty_cache()

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:334, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
330 return processor_class.from_pretrained(
331 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
332 )
333 elif processor_class is not None:
--> 334 return processor_class.from_pretrained(
335 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
336 )
337 # Last try: we use the PROCESSOR_MAPPING.
338 elif type(config) in PROCESSOR_MAPPING:

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1070, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1067 if token is not None:
1068 kwargs["token"] = token
-> 1070 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1071 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1073 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File /opt/conda/lib/python3.10/site-packages/transformers/processing_utils.py:1116, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1113 else:
1114 attribute_class = getattr(transformers_module, class_name)
-> 1116 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
1117 return args

File /opt/conda/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:569, in AutoImageProcessor.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
564 else:
565 raise ValueError(
566 "This image processor cannot be instantiated. Please make sure you have Pillow installed."
567 )
--> 569 raise ValueError(
570 f"Unrecognized image processor in {pretrained_model_name_or_path}. Should have a "
571 f"image_processor_type key in its {IMAGE_PROCESSOR_NAME} of {CONFIG_NAME}, or one of the following "
572 f"model_type keys in its {CONFIG_NAME}: {', '.join(c for c in IMAGE_PROCESSOR_MAPPING_NAMES.keys())}"
573 )

ValueError: Unrecognized image processor in unsloth/Qwen2.5-VL-3B-Instruct-unsloth-bnb-4bit. Should have a image_processor_type key in its preprocessor_config.json of config.json, or one of the following model_type keys in its config.json: align, aria, beit, bit, blip, blip-2, bridgetower, chameleon, chinese_clip, clip, clipseg, conditional_detr, convnext, convnextv2, cvt, data2vec-vision, deformable_detr, deit, depth_anything, depth_pro, deta, detr, dinat, dinov2, donut-swin, dpt, efficientformer, efficientnet, flava, focalnet, fuyu, git, glpn, got_ocr2, grounding-dino, groupvit, hiera, idefics, idefics2, idefics3, ijepa, imagegpt, instructblip, instructblipvideo, kosmos-2, layoutlmv2, layoutlmv3, levit, llava, llava_next, llava_next_video, llava_onevision, mask2former, maskformer, mgp-str, mllama, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, nat, nougat, oneformer, owlv2, owlvit, paligemma, perceiver, pix2struct, pixtral, poolformer, pvt, pvt_v2, qwen2_5_vl, qwen2_vl, regnet, resnet, rt_detr, sam, segformer, seggpt, siglip, superglue, swiftformer, swin, swin2sr, swinv2, table-transformer, timesformer, timm_wrapper, tvlt, tvp, udop, upernet, van, videomae, vilt, vipllava, vit, vit_hybrid, vit_mae, vit_msn, vitmatte, xclip, yolos, zoedepth

Sign up or log in to comment