Spaces:

prithivMLmods
/

DocScope-R1

Running on Zero

File size: 15,247 Bytes

a85c4cf
 
 
f5e2b63
04cce22
11d7c13
f5e2b63
e01e01c
 
04cce22
f5e2b63
a85c4cf
4074d29
19d58d4
e01e01c
 
 
 
19d58d4
e01e01c
 
 
 
 
 
19d58d4
a5ce6db
e01e01c
a5ce6db
e01e01c
 
 
 
 
 
 
19d58d4
 
d4884bc
 
 
19d58d4
d4884bc
 
 
19d58d4
 
 
 
 
 
 
 
 
 
 
 
 
 
e01e01c
04cce22
e01e01c
 
 
 
 
 
19d58d4
e01e01c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19d58d4
e01e01c
 
19d58d4
 
e01e01c
19d58d4
 
 
 
 
 
 
 
 
 
 
 
 
d4884bc
19d58d4
 
 
 
 
e01e01c
 
04cce22
1124bc4
19d58d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e01e01c
 
a5ce6db
e01e01c
 
04cce22
 
 
 
 
19d58d4
04cce22
 
 
19d58d4
04cce22
 
 
19d58d4
04cce22
e01e01c
04cce22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4074d29
11d7c13
f5e2b63
19d58d4
04cce22
4a9305f
19d58d4
 
76bb293
22ef061
108256c
a5ce6db
e01e01c
03b41ea
76bb293
e01e01c
 
108256c
e01e01c
19d58d4
e01e01c
 
 
108256c
19d58d4
 
 
 
7bc6034
108256c
e01e01c
19d58d4
 
 
108256c
4074d29
e01e01c
19d58d4
e01e01c
19d58d4
e01e01c
4074d29
e01e01c
 
 
19d58d4
4074d29
ce44242
e01e01c
 
 
 
a5ce6db
19d58d4
a5ce6db
19d58d4
 
e01e01c
 
a5ce6db
19d58d4
a5ce6db
19d58d4
 
e01e01c
a5ce6db
e01e01c
 
 
 
 
19d58d4
a5ce6db
e01e01c
 
19d58d4
 
 
 
 
e01e01c
 
19d58d4
e01e01c
19d58d4
e01e01c
 
 
 
 
 
 
19d58d4
e01e01c
19d58d4
108256c
e01e01c
19d58d4
 
 
 
e01e01c
19d58d4
e01e01c
108256c
19d58d4
b4cb811
 
 
edc9e87
b4cb811
19d58d4
d4884bc
19d58d4
 
70cf16f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cbdebf
70cf16f
 
19d58d4
70cf16f
 
19d58d4
 
 
 
70cf16f
19d58d4
 
 
70cf16f
 
19d58d4
70cf16f
19d58d4
70cf16f
 
19d58d4
 
 
 
70cf16f
19d58d4
70cf16f
19d58d4
70cf16f
19d58d4
70cf16f
a5ce6db

import gradio as gr
import spaces
import torch
from diffusers import AutoencoderKL, TCDScheduler
from diffusers.models.model_loading_utils import load_state_dict
from huggingface_hub import hf_hub_download

from controlnet_union import ControlNetModel_Union
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline

from PIL import Image, ImageDraw
import numpy as np

# Load configuration and models
config_file = hf_hub_download(
    "xinsir/controlnet-union-sdxl-1.0",
    filename="config_promax.json",
)

config = ControlNetModel_Union.load_config(config_file)
controlnet_model = ControlNetModel_Union.from_config(config)
model_file = hf_hub_download(
    "xinsir/controlnet-union-sdxl-1.0",
    filename="diffusion_pytorch_model_promax.safetensors",
)

sstate_dict = load_state_dict(model_file)
model, _, _, _, _ = ControlNetModel_Union._load_pretrained_model(
    controlnet_model, sstate_dict, model_file, "xinsir/controlnet-union-sdxl-1.0"
)
model.to(device="cuda", dtype=torch.float16)

vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
).to("cuda")

# Initially load the default pipeline
pipe = StableDiffusionXLFillPipeline.from_pretrained(
    "SG161222/RealVisXL_V5.0_Lightning",
    torch_dtype=torch.float16,
    vae=vae,
    controlnet=model,
    variant="fp16",
).to("cuda")

pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)

def load_model(selected_model):
    global pipe
    model_path = f"SG161222/{selected_model}"
    pipe = StableDiffusionXLFillPipeline.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        vae=vae,
        controlnet=model,
        variant="fp16",
    ).to("cuda")
    pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
    return f"Loaded model: {selected_model}"

def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
    target_size = (width, height)

    # Calculate the scaling factor to fit the image within the target size
    scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
    new_width = int(image.width * scale_factor)
    new_height = int(image.height * scale_factor)
    
    # Resize the source image to fit within target size
    source = image.resize((new_width, new_height), Image.LANCZOS)

    # Apply resize option using percentages
    if resize_option == "Full":
        resize_percentage = 100
    elif resize_option == "50%":
        resize_percentage = 50
    elif resize_option == "33%":
        resize_percentage = 33
    elif resize_option == "25%":
        resize_percentage = 25
    else:  # Custom
        resize_percentage = custom_resize_percentage

    # Calculate new dimensions based on percentage
    resize_factor = resize_percentage / 100
    new_width = int(source.width * resize_factor)
    new_height = int(source.height * resize_factor)

    # Ensure minimum size of 64 pixels
    new_width = max(new_width, 64)
    new_height = max(new_height, 64)

    # Resize the image
    source = source.resize((new_width, new_height), Image.LANCZOS)

    # Calculate the overlap in pixels based on the percentage
    overlap_x = int(new_width * (overlap_percentage / 100))
    overlap_y = int(new_height * (overlap_percentage / 100))

    # Ensure minimum overlap of 1 pixel
    overlap_x = max(overlap_x, 1)
    overlap_y = max(overlap_y, 1)

    # Calculate margins based on alignment
    if alignment == "Middle":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "Left":
        margin_x = 0
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "Right":
        margin_x = target_size[0] - new_width
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "Top":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = 0
    elif alignment == "Bottom":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = target_size[1] - new_height

    # Adjust margins to eliminate gaps
    margin_x = max(0, min(margin_x, target_size[0] - new_width))
    margin_y = max(0, min(margin_y, target_size[1] - new_height))

    # Create a new background image and paste the resized source image
    background = Image.new('RGB', target_size, (255, 255, 255))
    background.paste(source, (margin_x, margin_y))

    # Create the mask
    mask = Image.new('L', target_size, 255)
    mask_draw = ImageDraw.Draw(mask)

    # Calculate overlap areas
    white_gaps_patch = 2

    left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch
    right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch
    top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch
    bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch
    
    if alignment == "Left":
        left_overlap = margin_x + overlap_x if overlap_left else margin_x
    elif alignment == "Right":
        right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width
    elif alignment == "Top":
        top_overlap = margin_y + overlap_y if overlap_top else margin_y
    elif alignment == "Bottom":
        bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height

    # Draw the mask
    mask_draw.rectangle([
        (left_overlap, top_overlap),
        (right_overlap, bottom_overlap)
    ], fill=0)

    return background, mask

@spaces.GPU(duration=24)
def infer(image, width, height, overlap_percentage, num_inference_steps, resize_option, custom_resize_percentage, prompt_input, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
    background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom)
    
    cnet_image = background.copy()
    cnet_image.paste(0, (0, 0), mask)

    final_prompt = f"{prompt_input} , high quality, 4k"

    (
        prompt_embeds,
        negative_prompt_embeds,
        pooled_prompt_embeds,
        negative_pooled_prompt_embeds,
    ) = pipe.encode_prompt(final_prompt, "cuda", True)

    # Generate the image
    for image in pipe(
        prompt_embeds=prompt_embeds,
        negative_prompt_embeds=negative_prompt_embeds,
        pooled_prompt_embeds=pooled_prompt_embeds,
        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
        image=cnet_image,
        num_inference_steps=num_inference_steps
    ):
        pass  # Wait for the generation to complete
    generated_image = image  # Get the last image

    generated_image = generated_image.convert("RGBA")
    cnet_image.paste(generated_image, (0, 0), mask)

    return cnet_image

def clear_result():
    """Clears the result Image."""
    return gr.update(value=None)

def preload_presets(target_ratio, ui_width, ui_height):
    """Updates the width and height sliders based on the selected aspect ratio."""
    if target_ratio == "9:16":
        changed_width = 720
        changed_height = 1280
        return changed_width, changed_height, gr.update()
    elif target_ratio == "16:9":
        changed_width = 1280
        changed_height = 720
        return changed_width, changed_height, gr.update()
    elif target_ratio == "1:1":
        changed_width = 1024
        changed_height = 1024
        return changed_width, changed_height, gr.update()
    elif target_ratio == "Custom":
        return ui_width, ui_height, gr.update(open=True)

def select_the_right_preset(user_width, user_height):
    if user_width == 720 and user_height == 1280:
        return "9:16"
    elif user_width == 1280 and user_height == 720:
        return "16:9"
    elif user_width == 1024 and user_height == 1024:
        return "1:1"
    else:
        return "Custom"

def toggle_custom_resize_slider(resize_option):
    return gr.update(visible=(resize_option == "Custom"))

def update_history(new_image, history):
    """Updates the history gallery with the new image."""
    if history is None:
        history = []
    history.insert(0, new_image)
    return history

# CSS and Title
css = """
h1 {
  text-align: center;
  display: block;
}
"""

title = """<h1 align="center">Diffusers Image Outpaint Lightning</h1>
"""

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    with gr.Column():
        gr.HTML(title)

        with gr.Row():
            with gr.Column():
                input_image = gr.Image(
                    type="pil",
                    label="Input Image"
                )
                model_selection = gr.Dropdown(
                    choices=["RealVisXL_V5.0_Lightning", "RealVisXL_V4.0_Lightning"],
                    value="RealVisXL_V5.0_Lightning",
                    label="Select Model"
                )
                with gr.Row():
                    with gr.Column(scale=2):
                        prompt_input = gr.Textbox(label="Prompt (Optional)")
                    with gr.Column(scale=1):
                        run_button = gr.Button("Generate")

                with gr.Row():
                    target_ratio = gr.Radio(
                        label="Expected Ratio",
                        choices=["9:16", "16:9", "1:1", "Custom"],
                        value="9:16",
                        scale=2
                    )
                    alignment_dropdown = gr.Dropdown(
                        choices=["Middle", "Left", "Right", "Top", "Bottom"],
                        value="Middle",
                        label="Alignment"
                    )

                with gr.Accordion(label="Advanced settings", open=False) as settings_panel:
                    with gr.Column():
                        with gr.Row():
                            width_slider = gr.Slider(
                                label="Target Width",
                                minimum=720,
                                maximum=1536,
                                step=8,
                                value=720,
                            )
                            height_slider = gr.Slider(
                                label="Target Height",
                                minimum=720,
                                maximum=1536,
                                step=8,
                                value=1280,
                            )
                        num_inference_steps = gr.Slider(label="Steps", minimum=4, maximum=12, step=1, value=8)
                        with gr.Group():
                            overlap_percentage = gr.Slider(
                                label="Mask overlap (%)",
                                minimum=1,
                                maximum=50,
                                value=10,
                                step=1
                            )
                            with gr.Row():
                                overlap_top = gr.Checkbox(label="Overlap Top", value=True)
                                overlap_right = gr.Checkbox(label="Overlap Right", value=True)
                            with gr.Row():
                                overlap_left = gr.Checkbox(label="Overlap Left", value=True)
                                overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True)
                        with gr.Row():
                            resize_option = gr.Radio(
                                label="Resize input image",
                                choices=["Full", "50%", "33%", "25%", "Custom"],
                                value="Full"
                            )
                            custom_resize_percentage = gr.Slider(
                                label="Custom resize (%)",
                                minimum=1,
                                maximum=100,
                                step=1,
                                value=50,
                                visible=False
                            )
                status_text = gr.Textbox(label="Status", interactive=False)
                gr.Examples(
                    examples=[
                        ["./examples/example_1.webp", 1280, 720, "Middle"],
                        ["./examples/example_2.jpg", 1440, 810, "Left"],
                        ["./examples/example_3.jpg", 1024, 1024, "Top"],
                        ["./examples/example_3.jpg", 1024, 1024, "Bottom"],
                    ],
                    inputs=[input_image, width_slider, height_slider, alignment_dropdown],
                )

            with gr.Column():
                result = gr.Image(
                    interactive=False,
                    label="Generated Image",
                    format="png",
                )
                history_gallery = gr.Gallery(label="History", columns=6, object_fit="contain", interactive=False)

    # Event handlers
    model_selection.change(fn=load_model, inputs=model_selection, outputs=status_text)
    target_ratio.change(
        fn=preload_presets,
        inputs=[target_ratio, width_slider, height_slider],
        outputs=[width_slider, height_slider, settings_panel],
        queue=False
    )
    width_slider.change(
        fn=select_the_right_preset,
        inputs=[width_slider, height_slider],
        outputs=[target_ratio],
        queue=False
    )
    height_slider.change(
        fn=select_the_right_preset,
        inputs=[width_slider, height_slider],
        outputs=[target_ratio],
        queue=False
    )
    resize_option.change(
        fn=toggle_custom_resize_slider,
        inputs=[resize_option],
        outputs=[custom_resize_percentage],
        queue=False
    )
    run_button.click(
        fn=clear_result,
        inputs=None,
        outputs=result,
    ).then(
        fn=infer,
        inputs=[input_image, width_slider, height_slider, overlap_percentage, num_inference_steps,
                resize_option, custom_resize_percentage, prompt_input, alignment_dropdown,
                overlap_left, overlap_right, overlap_top, overlap_bottom],
        outputs=result,
    ).then(
        fn=lambda x, history: update_history(x, history),
        inputs=[result, history_gallery],
        outputs=history_gallery,
    )
    prompt_input.submit(
        fn=clear_result,
        inputs=None,
        outputs=result,
    ).then(
        fn=infer,
        inputs=[input_image, width_slider, height_slider, overlap_percentage, num_inference_steps,
                resize_option, custom_resize_percentage, prompt_input, alignment_dropdown,
                overlap_left, overlap_right, overlap_top, overlap_bottom],
        outputs=result,
    ).then(
        fn=lambda x, history: update_history(x, history),
        inputs=[result, history_gallery],
        outputs=history_gallery,
    )
    demo.load(fn=load_model, inputs=model_selection, outputs=status_text)

demo.queue(max_size=20).launch(share=False, ssr_mode=False, show_error=True)