from PIL import Image import torch from transformers import AutoModel, AutoTokenizer class ModelHandler: def __init__(self): # Load the model and tokenizer with appropriate weights self.model = AutoModel.from_pretrained( 'openbmb/MiniCPM-V-2_6', trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16 ).eval().cuda() self.tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True) def preprocess(self, inputs): # Preprocess image input image = Image.open(inputs['image'].file).convert('RGB') question = inputs.get("question", "What is in the image?") msgs = [{'role': 'user', 'content': [image, question]}] return msgs def inference(self, msgs): # Run inference on the model result = self.model.chat(image=None, msgs=msgs, tokenizer=self.tokenizer) return result def postprocess(self, result): # Postprocess the output from the model return {"generated_text": result} service = ModelHandler()