Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from PIL import Image, ImageDraw, ImageFont, ImageOps, ImageEnhance | |
from quanto import qfloat8, quantize, freeze | |
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel | |
from diffusers import StableDiffusionXLControlNetPipeline, UNet2DConditionModel, DiffusionPipeline, LCMScheduler | |
from diffusers.utils import make_image_grid | |
atkbold = ImageFont.truetype("Atkinson-Hyperlegible-Bold-102.otf",50) | |
default_width = 1280 | |
default_height = 720 | |
default_timesteps = 8 | |
def mask_image_factory(mask_text="ASK FOR\nA SNACK", width=default_width, height=default_height): | |
img = Image.new("L", (width, height), (0,)) | |
draw = ImageDraw.Draw(img) | |
draw.multiline_text( | |
xy=(0,0), | |
text=mask_text, | |
fill=(255,), | |
font=atkbold, | |
align="center", | |
spacing=0, | |
) | |
cropped = img.crop(img.getbbox()) | |
# Calculate aspect ratios | |
image_aspect_ratio = width / height | |
cropped_aspect_ratio = cropped.size[0] / cropped.size[1] | |
# Determine which dimension of cropped.size is larger | |
if cropped_aspect_ratio > image_aspect_ratio: | |
# Calculate new dimensions for padding | |
new_width = int(cropped.size[1] * image_aspect_ratio) | |
new_height = cropped.size[1] | |
else: | |
new_width = cropped.size[0] | |
new_height = int(cropped.size[0] / image_aspect_ratio) | |
# Pad the image to the desired aspect ratio | |
padded = ImageOps.pad(cropped, (new_width, new_height)) | |
resized = padded.resize((width, height), resample=Image.Resampling.LANCZOS) | |
return resized | |
preferred_device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu") | |
# preferred_device = "cpu" | |
preferred_dtype = torch.float32 if preferred_device == "cpu" else torch.float16 | |
controlnet = ControlNetModel.from_pretrained( | |
# "monster-labs/control_v1p_sd15_qrcode_monster", | |
"monster-labs/control_v1p_sdxl_qrcode_monster", | |
# subfolder="v2", | |
torch_dtype=preferred_dtype, | |
#torch_dtype=unet_preferred_dtype | |
).to(preferred_device) | |
#quantize(controlnet, weights=qfloat8) | |
#freeze(controlnet) | |
unet = UNet2DConditionModel.from_pretrained( | |
"latent-consistency/lcm-sdxl", | |
torch_dtype=preferred_dtype, | |
variant="fp16", | |
).to(preferred_device) | |
ctlpipe = StableDiffusionXLControlNetPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", | |
unet=unet, | |
controlnet=controlnet, | |
torch_dtype=preferred_dtype, | |
safety_checker=None, | |
).to(preferred_device) | |
ctlpipe.scheduler = LCMScheduler.from_config(ctlpipe.scheduler.config) | |
#quantize(ctlpipe.unet, weights=qfloat8) | |
#freeze(ctlpipe.unet) | |
#quantize(ctlpipe.text_encoder, weights=qfloat8) | |
#freeze(ctlpipe.text_encoder) | |
def app(prompt, negative_prompt, mask_text, num_inference_steps, controlnet_conditioning_scale, width, height, seed, count): | |
all_images = [ctlpipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
image=mask_image_factory(mask_text=mask_text, width=width, height=height), | |
num_inference_steps=int(num_inference_steps), | |
guidance_scale=8.0, | |
controlnet_conditioning_scale=float(controlnet_conditioning_scale), | |
generator=torch.manual_seed(int(seed + i)), | |
height=height, | |
width=width, | |
).images[0] for i in range(count)] | |
if count == 1: | |
cols = 1 | |
rows = 1 | |
elif count == 2: | |
cols = 1 | |
rows = 2 | |
else: | |
cols = 2 if count % 2 == 0 else 1 | |
rows = count // cols | |
return make_image_grid(all_images, cols=cols, rows=rows) | |
app("corgis running in the park", "ugly, wrong", "ASK FOR\nA SNACK", 1, 1.0, default_height, default_width, 42, 1) | |
iface = gr.Interface( | |
app, | |
[ | |
gr.Textbox(label="Prompt", value="lots of puppies frolicking in a flower-filled meadow around tall trees at golden hour"), | |
gr.Textbox(label="Negative Prompt", value="ugly, wrong"), | |
gr.Textbox(label="Mask Text", value="ASK FOR\nA SNACK"), | |
gr.Number(label="Number of Inference Steps", value=default_timesteps, minimum=1, maximum=50, step=1), | |
gr.Slider(label="ControlNet Conditioning Scale", value=0.57, minimum=-1.0, maximum=2.0, step=0.01), | |
gr.Number(label="Width", value=default_width, minimum=256, maximum=2048, precision=0), | |
gr.Number(label="Height", value=default_height, minimum=256, maximum=2048, precision=0), | |
gr.Number(label="Random Number Seed", value=42, minimum=0, maximum=2**32-1, precision=0), | |
gr.Radio(label="Number of Images to Generate with Subsequent Consecutive Seeds", choices=[1, 2, 4, 6, 10], value=2), | |
], | |
"image", | |
) | |
iface.launch() | |