import gradio as gr import numpy as np import torch import spaces from diffusers import DiffusionPipeline from PIL import Image multi_view_diffusion_pipeline = DiffusionPipeline.from_pretrained( "jkorstad/multi-view-diffusion", custom_pipeline="dylanebert/multi-view-diffusion", torch_dtype=torch.float16, trust_remote_code=True, ).to("cuda") @spaces.GPU def run(image, elevation): image = np.array(image, dtype=np.float32) / 255.0 images = multi_view_diffusion_pipeline( "", image, guidance_scale=5, num_inference_steps=30, elevation=elevation ) images = [Image.fromarray((img * 255).astype("uint8")) for img in images] width, height = images[0].size grid_img = Image.new("RGB", (2 * width, 2 * height)) grid_img.paste(images[0], (0, 0)) grid_img.paste(images[1], (width, 0)) grid_img.paste(images[2], (0, height)) grid_img.paste(images[3], (width, height)) return grid_img demo = gr.Interface( title="Quick demo of the multi-view from an image model", fn=run, inputs=[ gr.Image(label="Input Image"), gr.Slider(0, 100, 10, label="Elevation", info="Choose the elevation value for the generated multi view output. A higher value will be closest to a birds eye view of your object.") ], outputs=gr.Image(label="Output Image") ) demo.launch()