from PIL import Image
import torch
from transformers import AutoModel, AutoTokenizer

class ModelHandler:
    def __init__(self):
        # Load the model and tokenizer with appropriate weights
        self.model = AutoModel.from_pretrained(
            'openbmb/MiniCPM-V-2_6', 
            trust_remote_code=True,
            attn_implementation='sdpa', 
            torch_dtype=torch.bfloat16
        ).eval().cuda()
        
        self.tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)

    def preprocess(self, inputs):
        # Preprocess image input
        image = Image.open(inputs['image'].file).convert('RGB')
        question = inputs.get("question", "What is in the image?")
        msgs = [{'role': 'user', 'content': [image, question]}]
        return msgs

    def inference(self, msgs):
        # Run inference on the model
        result = self.model.chat(image=None, msgs=msgs, tokenizer=self.tokenizer)
        return result

    def postprocess(self, result):
        # Postprocess the output from the model
        return {"generated_text": result}

service = ModelHandler()