import gradio as gr
import numpy as np
import torch
import spaces

from diffusers import DiffusionPipeline
from PIL import Image

multi_view_diffusion_pipeline = DiffusionPipeline.from_pretrained(
    "jkorstad/multi-view-diffusion",
    custom_pipeline="dylanebert/multi-view-diffusion",
    torch_dtype=torch.float16,
    trust_remote_code=True,
).to("cuda")

@spaces.GPU
def run(image, elevation):
    image = np.array(image, dtype=np.float32) / 255.0
    images = multi_view_diffusion_pipeline(
        "", image, guidance_scale=5, num_inference_steps=30, elevation=elevation
    )

    images = [Image.fromarray((img * 255).astype("uint8")) for img in images]

    width, height = images[0].size
    grid_img = Image.new("RGB", (2 * width, 2 * height))

    grid_img.paste(images[0], (0, 0))
    grid_img.paste(images[1], (width, 0))
    grid_img.paste(images[2], (0, height))
    grid_img.paste(images[3], (width, height))

    return grid_img


demo = gr.Interface(
    title="Quick demo of the multi-view from an image model",
    fn=run,
    inputs=[
        gr.Image(label="Input Image"),
        gr.Slider(0, 100, 10, label="Elevation", info="Choose the elevation value for the generated multi view output. A higher value will be closest to a birds eye view of your object.")
    ],
    outputs=gr.Image(label="Output Image")
)
demo.launch()