Spaces:

prithivMLmods
/

core-OCR

Running on Zero

File size: 6,338 Bytes

import gradio as gr
import spaces
import numpy as np
import random
from diffusers import DiffusionPipeline
import torch
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "stabilityai/stable-diffusion-3.5-large-turbo"

torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32

pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
pipe = pipe.to(device)

pipe.load_lora_weights("prithivMLmods/SD3.5-Turbo-Realism-2.0-LoRA", weight_name="SD3.5-Turbo-Realism-2.0-LoRA.safetensors")
trigger_word = "Turbo Realism"  
pipe.fuse_lora(lora_scale=1.0)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

# Define styles
style_list = [
    {
        "name": "3840 x 2160",
        "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
    },
    {
        "name": "2560 x 1440",
        "prompt": "hyper-realistic 4K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
    },
    {
        "name": "HD+",
        "prompt": "hyper-realistic 2K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
    },
    {
        "name": "Style Zero",
        "prompt": "{prompt}",
        "negative_prompt": "",
    },
]

STYLE_NAMES = [s["name"] for s in style_list]
DEFAULT_STYLE_NAME = STYLE_NAMES[0]

grid_sizes = {
    "2x1": (2, 1),
    "1x2": (1, 2),
    "2x2": (2, 2),
    "2x3": (2, 3),
    "3x2": (3, 2),
    "1x1": (1, 1)
}

@spaces.GPU(duration=60)
def infer(
    prompt,
    negative_prompt="",
    seed=42,
    randomize_seed=False,
    width=1024,
    height=1024,
    guidance_scale=7.5,
    num_inference_steps=10,
    style="Style Zero",
    grid_size="1x1",
    progress=gr.Progress(track_tqdm=True),
):
    selected_style = next(s for s in style_list if s["name"] == style)
    styled_prompt = selected_style["prompt"].format(prompt=prompt)
    styled_negative_prompt = selected_style["negative_prompt"]

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator().manual_seed(seed)
    grid_x, grid_y = grid_sizes.get(grid_size, (1, 1))
    num_images = grid_x * grid_y

    opts = {
        "prompt": styled_prompt,
        "negative_prompt": styled_negative_prompt,
        "guidance_scale": guidance_scale,
        "num_inference_steps": num_inference_steps,
        "width": width,
        "height": height,
        "generator": generator,
        "num_images_per_prompt": num_images,
    }

    torch.cuda.empty_cache()
    res = pipe(**opts)

    grid_img = Image.new('RGB', (width * grid_x, height * grid_y))
    for i, img in enumerate(res.images[:num_images]):
        x = (i % grid_x) * width
        y = (i // grid_x) * height
        grid_img.paste(img, (x, y))

    return grid_img, seed

examples = [
    "A tiny astronaut hatching from an egg on the moon, 4k, planet theme",
    "An anime-style illustration of a delicious, golden-brown wiener schnitzel on a plate, served with fresh lemon slices, parsley --style raw5",
    "Cold coffee in a cup bokeh --ar 85:128 --v 6.0 --style raw5, 4K, Photo-Realistic",
    "A cat holding a sign that says hello world --ar 85:128 --v 6.0 --style raw"
]

css = '''
.gradio-container {
    max-width: 585px !important;
    margin: 0 auto !important;
    display: flex;
    flex-direction: column;
    align-items: center;
    justify-content: center;
}
h1 { text-align: center; }
footer { visibility: hidden; }
'''

with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## T2i Grid 6x")

        with gr.Row():
            prompt = gr.Text(
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            run_button = gr.Button("Run", scale=0, variant="primary")

        result = gr.Image(show_label=False)

        with gr.Row():
            grid_size_selection = gr.Dropdown(
                choices=list(grid_sizes.keys()),
                value="1x1",
                label="Grid Size"
            )

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(
                label="Negative prompt",
                max_lines=1,
                placeholder="Enter a negative prompt",
                value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
            )
            seed = gr.Slider(0, MAX_SEED, value=0, label="Seed")
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(512, MAX_IMAGE_SIZE, step=32, value=1024, label="Width")
                height = gr.Slider(512, MAX_IMAGE_SIZE, step=32, value=1024, label="Height")

            with gr.Row():
                guidance_scale = gr.Slider(0.0, 7.5, step=0.1, value=7.5, label="Guidance scale")
                num_inference_steps = gr.Slider(1, 50, step=1, value=10, label="Number of inference steps")

            style_selection = gr.Radio(
                choices=STYLE_NAMES,
                value=DEFAULT_STYLE_NAME,
                label="Quality Style",
            )

        gr.Examples(
            examples=examples,
            inputs=[prompt],
            outputs=[result, seed],
            fn=infer,
            cache_examples=False
        )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            prompt, negative_prompt, seed, randomize_seed,
            width, height, guidance_scale, num_inference_steps,
            style_selection, grid_size_selection
        ],
        outputs=[result, seed],
    )

if __name__ == "__main__":
    demo.launch()