import gradio as gr
import torch
from PIL import Image
from transformers import AutoModel, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the model
model = AutoModel.from_pretrained("openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True, torch_dtype=torch.float16)
model = model.to(device=device)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-Llama3-V-2_5", trust_remote_code=True)

model.eval()

# Define the Gradio components
image = gr.Image(type="pil", label="Image")
question = gr.Textbox(value="Using the standard 9x9 sudoku format, solve the sudoku puzzle in the image correctly.", label="Question")
answer = gr.Textbox(label="Answer", show_label=True, show_copy_button=True)

title = "Sudoku Solver by FG"
description = "Sudoku Solver using MiniCPM-Llama3-V-2_5"

# Define the function for solving Sudoku
def solve_sudoku(image, question):
    # Convert image to RGB format if not already in RGB
    image = image.convert("RGB") if image.mode != 'RGB' else image
    msgs = [{"role": "user", "content": question}]
    res = model.chat(
        image=image,
        msgs=msgs,
        tokenizer=tokenizer,
        sampling=False,
        temperature=0.7,
        stream=False,  # Enable streaming
        system_prompt="You are an expert in solving sudoku puzzles. Please solve the sudoku puzzle in the image correctly.",
    )
    return "".join(res)

# Create the Gradio interface
demo = gr.Interface(
    fn=solve_sudoku,
    inputs=[image, question],
    outputs=answer,
    title=title,
    description=description,
    theme="compact",
)

# Launch the interface
demo.launch(share=True)