Spaces:

AP123
/

IllusionDiffusion

Running on Zero

File size: 6,210 Bytes

18274c1
453ed2e
 
 
1a833ba
453ed2e
a29e3ba
00f6a78
9ad92f4
453ed2e
9ad92f4
4984c7e
 
be85eb8
 
453ed2e
e56af76
b31f6c0
be85eb8
ecc6c05
453ed2e
00f6a78
 
a29e3ba
4984c7e
be85eb8
766763f
be85eb8
 
 
ecc6c05
842563d
be85eb8
 
ecc6c05
766763f
9ad92f4
00f6a78
453ed2e
00f6a78
ee36d88
ecc6c05
4984c7e
453ed2e
7391723
a29e3ba
453ed2e
 
 
 
 
9ad92f4
 
 
842563d
 
 
 
 
9ad92f4
 
842563d
9ad92f4
 
4984c7e
842563d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4984c7e
 
842563d
 
 
 
 
 
4984c7e
86d5e88
 
 
842563d
b31f6c0
 
49ad6a5
842563d
 
 
 
 
 
 
 
 
 
 
 
b31f6c0
453ed2e
4984c7e
842563d
a29e3ba
842563d
1a833ba
7391723
842563d
a29e3ba
453ed2e
 
4984c7e
9ad92f4
 
453ed2e
4984c7e
 
 
 
 
842563d
4984c7e
842563d
 
4984c7e
 
842563d
4984c7e
 
 
 
 
 
 
 
 
c000f9c
842563d
 
 
c000f9c
 
842563d
 
 
 
 
 
 
453ed2e
 
842563d
 
 
453ed2e
842563d
 
c000f9c
842563d
 
 
453ed2e
 
842563d

import spaces
import torch
import gradio as gr
from PIL import Image
import random
from diffusers import (
    DiffusionPipeline,
    AutoencoderKL,
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    StableDiffusionLatentUpscalePipeline,
    StableDiffusionImg2ImgPipeline,
    StableDiffusionControlNetImg2ImgPipeline,
    DPMSolverMultistepScheduler,
    EulerDiscreteScheduler
)
import tempfile
import time
import os
from transformers import CLIPImageProcessor

BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"

# Initialize both pipelines
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)

# Initialize the safety checker conditionally
SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
safety_checker = None
feature_extractor = None

if SAFETY_CHECKER_ENABLED:
    safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
    feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")

main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    BASE_MODEL,
    controlnet=controlnet,
    vae=vae,
    safety_checker=safety_checker,
    feature_extractor=feature_extractor,
    torch_dtype=torch.float16,
).to("cuda")

# Sampler map
SAMPLER_MAP = {
    "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
    "Euler": lambda config: EulerDiscreteScheduler.from_config(config),
}

def center_crop_resize(img, output_size=(512, 512)):
    width, height = img.size
    new_dimension = min(width, height)
    left = (width - new_dimension) / 2
    top = (height - new_dimension) / 2
    right = (width + new_dimension) / 2
    bottom = (height + new_dimension) / 2
    
    img = img.crop((left, top, right, bottom))
    img = img.resize(output_size)
    
    return img

def common_upscale(samples, width, height, upscale_method, crop=False):
    if crop == "center":
        old_width = samples.shape[3]
        old_height = samples.shape[2]
        old_aspect = old_width / old_height
        new_aspect = width / height
        
        x = 0
        y = 0
        
        if old_aspect > new_aspect:
            x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
        elif old_aspect < new_aspect:
            y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
        
        s = samples[:, :, y:old_height - y, x:old_width - x]
    else:
        s = samples
    
    return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)

def upscale(samples, upscale_method, scale_by):
    width = round(samples["images"].shape[3] * scale_by)
    height = round(samples["images"].shape[2] * scale_by)
    
    s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
    
    return s

def check_inputs(prompt: str, control_image: Image.Image):
    if control_image is None:
        raise gr.Error("Please select or upload an Input Illusion")
    
    if prompt is None or prompt == "":
        raise gr.Error("Prompt is required")

@spaces.GPU 
def inference(control_image: Image.Image, prompt: str, negative_prompt: str,
              guidance_scale: float = 8.0,
              controlnet_conditioning_scale: float = 1,
              control_guidance_start: float = 1,
              control_guidance_end: float = 1,
              upscaler_strength: float = 0.5,
              seed: int = -1,
              sampler="DPM++ Karras SDE",
              progress=gr.Progress(track_tqdm=True),
              profile=None):
    
    start_time = time.time()
    
    control_image_small = center_crop_resize(control_image)
    
    main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
    
    my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
    generator = torch.Generator(device="cuda").manual_seed(my_seed)

    out = main_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        image=control_image_small,
        guidance_scale=float(guidance_scale),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        generator=generator,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        num_inference_steps=15,
        output_type="latent"
    )
    
    upscaled_latents = upscale(out, "nearest-exact", 2)

    out_image = main_pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        control_image=center_crop_resize(control_image, (1024, 1024)),
        image=upscaled_latents,
        guidance_scale=float(guidance_scale),
        generator=generator,
        num_inference_steps=20,
        strength=upscaler_strength,
        control_guidance_start=float(control_guidance_start),
        control_guidance_end=float(control_guidance_end),
        controlnet_conditioning_scale=float(controlnet_conditioning_scale)
    )

    end_time = time.time()
    
    # Save image + metadata logic here

with gr.Blocks() as app:
    gr.Markdown('''
      <div style="text-align: center;">
      <h1>Illusion Diffusion HQ 🌀</h1>
      <p style="font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</p>
      </div>
      ''')
    
    with gr.Row():
        with gr.Column():
            control_image = gr.Image(label="Input Illusion", type="pil")
            prompt = gr.Textbox(label="Prompt", placeholder="Medieval village scene with busy streets and castle in the distance")
            negative_prompt = gr.Textbox(label="Negative Prompt", value="low quality")
            run_btn = gr.Button("Run")
            
            result_image = gr.Image(label="Illusion Diffusion Output", interactive=False)

            run_btn.click(check_inputs, inputs=[prompt, control_image]).success(
                inference, inputs=[control_image, prompt, negative_prompt], outputs=[result_image]
            )

if __name__ == "__main__":
    app.launch()