Spaces:

fffiloni
/

sd3-ControlNet

Running on Zero

File size: 3,222 Bytes

import gradio as gr
import os
import sys
import subprocess

import torch
from diffusers import StableDiffusion3Pipeline
from diffusers.models.controlnet_sd3 import ControlNetSD3Model
from diffusers.utils.torch_utils import randn_tensor

# Clone the specific branch
subprocess.run(["git", "clone", "-b", "sd3_control", "https://github.com/instantX-research/diffusers_sd3_control.git"])

# Change directory to the cloned repository and install it
os.chdir('diffusers_sd3_control')
subprocess.run(["pip", "install", "-e", "."])

# Add the path to the examples directory
sys.path.append(os.path.abspath('./examples/community'))

# Import the required pipeline
from pipeline_stable_diffusion_3_controlnet import StableDiffusion3CommonPipeline



# load pipeline
base_model = 'stabilityai/stable-diffusion-3-medium-diffusers'
pipe = StableDiffusion3CommonPipeline.from_pretrained(
    base_model, 
    controlnet_list=['InstantX/SD3-Controlnet-Canny'],
)
pipe.to('cuda:0', torch.float16)

def resize_image(input_path, output_path, target_height):
    # Open the input image
    img = Image.open(input_path)

    # Calculate the aspect ratio of the original image
    original_width, original_height = img.size
    original_aspect_ratio = original_width / original_height

    # Calculate the new width while maintaining the aspect ratio and the target height
    new_width = int(target_height * original_aspect_ratio)

    # Resize the image while maintaining the aspect ratio and fixing the height
    img = img.resize((new_width, target_height), Image.LANCZOS)

    # Save the resized image
    img.save(output_path)

    return output_path

def infer(image_in, prompt):
    prompt = 'Anime style illustration of a girl wearing a suit. A moon in sky. In the background we see a big rain approaching. text "InstantX" on image'
    n_prompt = 'NSFW, nude, naked, porn, ugly'

    image_to_canny = load_image(image_in)

    image_to_canny = np.array(image_to_canny)
    image_to_canny = cv2.Canny(image_to_canny, 100, 200)
    image_to_canny = image_to_canny[:, :, None]
    image_to_canny = np.concatenate([image_to_canny, image_to_canny, image_to_canny], axis=2)
    image_to_canny = Image.fromarray(image_to_canny)
    
    # controlnet config
    controlnet_conditioning = [
        dict(
            control_index=0,
            control_image=image_to_canny,
            control_weight=0.7,
            control_pooled_projections='zeros'
        )
    ]
    # infer
    image = pipe(
        prompt=prompt,
        negative_prompt=n_prompt,
        controlnet_conditioning=controlnet_conditioning,
        num_inference_steps=28,
        guidance_scale=7.0,
        height=1024,
        width=1024,
    ).images[0]

    return image


with gr.Blocks() as demo:
    with gr.Column():
        gr.Markdown("""
        # SD3 ControlNet
        """)
        image_in = gr.Image(label="Image reference", sources=["upload"], type="filepath")
        prompt = gr.Textbox(label="Prompt")
        submit_btn = gr.Button("Submit")
        result = gr.Image(label="Result")
    
    submit_btn.click(
        fn = infer,
        inputs = [image_in, prompt],
        outputs = [result],
        show_api=False
    )
demo.queue().launch()