File size: 1,140 Bytes
b1cc8b6
 
cd5795f
b1cc8b6
 
 
cd5795f
 
 
 
 
 
 
 
 
b1cc8b6
 
cd5795f
 
 
 
 
b1cc8b6
cd5795f
 
 
 
b1cc8b6
cd5795f
 
 
b1cc8b6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from PIL import Image
import torch
from transformers import AutoModel, AutoTokenizer

class ModelHandler:
    def __init__(self):
        # Load the model and tokenizer with appropriate weights
        self.model = AutoModel.from_pretrained(
            'openbmb/MiniCPM-V-2_6', 
            trust_remote_code=True,
            attn_implementation='sdpa', 
            torch_dtype=torch.bfloat16
        ).eval().cuda()
        
        self.tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-V-2_6', trust_remote_code=True)

    def preprocess(self, inputs):
        # Preprocess image input
        image = Image.open(inputs['image'].file).convert('RGB')
        question = inputs.get("question", "What is in the image?")
        msgs = [{'role': 'user', 'content': [image, question]}]
        return msgs

    def inference(self, msgs):
        # Run inference on the model
        result = self.model.chat(image=None, msgs=msgs, tokenizer=self.tokenizer)
        return result

    def postprocess(self, result):
        # Postprocess the output from the model
        return {"generated_text": result}

service = ModelHandler()