import gradio as gr import torch from PIL import Image from transformers import AutoModel, AutoTokenizer device = "cuda" if torch.cuda.is_available() else "cpu" # Load the model model = AutoModel.from_pretrained("openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True, torch_dtype=torch.float16) model = model.to(device=device) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True) model.eval() # Define the Gradio components image = gr.Image(type="pil", label="Image") question = gr.Textbox(value="Using the standard 9x9 sudoku format, solve the sudoku puzzle in the image correctly.", label="Question") answer = gr.Textbox(label="Answer", show_label=True, show_copy_button=True) title = "Sudoku Solver by FG" description = "Sudoku Solver using MiniCPM-Llama3-V-2_5" # Define the function for solving Sudoku def solve_sudoku(image, question): # Convert image to RGB format if not already in RGB image = image.convert("RGB") if image.mode != 'RGB' else image msgs = [{"role": "user", "content": question}] res = model.chat( image=image, msgs=msgs, tokenizer=tokenizer, sampling=False, temperature=0.7, stream=False, # Enable streaming system_prompt="You are an expert in solving sudoku puzzles. Please solve the sudoku puzzle in the image correctly.", ) return "".join(res) # Create the Gradio interface demo = gr.Interface( fn=solve_sudoku, inputs=[image, question], outputs=answer, title=title, description=description, theme="compact", ) # Launch the interface demo.launch(share=True)