from transformers import AutoModelForCausalLM, AutoTokenizer import torch from PIL import Image model = AutoModelForCausalLM.from_pretrained( "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True ) print(model) tokenizer = AutoTokenizer.from_pretrained( "openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True ) image = Image.open("fridge.JPG") prompt = "What is the main object shown in the image? Describe in less than 5 words, as a name for it." # First round chat # question = "Tell me the model of this aircraft." msgs = [{'role': 'user', 'content': [image, prompt]}] answer = model.chat( image=None, msgs=msgs, tokenizer=tokenizer ) print(answer) # chat = model.chat( # image=image, # question=prompt, # tokenizer=tokenizer, # generate_args={"temperature": 0.8} # ) # print(chat)