import torch import numpy as np import gradio as gr import spaces import cv2 from typing import Dict from torchvision.transforms.functional import to_tensor, center_crop, resize from PIL import Image from ui_model import fetch_model, process_sketch, process_mask engage_logo = Image.open("engage_studios_logo.png").resize((700, 88), Image.Resampling.BICUBIC) engage_logo_mask = np.array(engage_logo.split()[-1])[..., None] / 255 engage_logo_np = np.array(engage_logo.convert('RGB')) pipe = fetch_model() pipe.to('cuda') @spaces.GPU def run_text_to_image(prompt=None, neg_prompt=None, inference_steps=8, num_images=2, guidance_scale=2.0, guidance_rescale=0.0, height=1024, width=1024, condition_scale=0.5, exposure=0.0, progress=gr.Progress()): images = pipe(prompt=prompt, negative_prompt=neg_prompt, num_images_per_prompt=num_images, num_inference_steps=inference_steps, height=height, width=width, guidance_scale=guidance_scale, guidance_rescale=guidance_rescale, controlnet_conditioning_scale=condition_scale, gradio_progress=progress, cross_attention_kwargs={"scale": exposure} ).images return images def run_model(user_state, condition_image, settings, prompt, neg_prompt, inference_steps=8, num_images=2, guidance_scale=2.0, guidance_rescale=0.0, enable_freeu=False, height=1024, width=1024, condition_scale=0.5, sketch_detail=1.0, sketch_softness=0.5, inpaint_strength=0.9, exposure=0.0, enable_stylation=False, style_1_down=0.0, style_1_mid=0.0, style_1_up=0.0, style_2_down=0.0, style_2_mid=0.0, style_2_up=0.0, style_3_down=0.0, style_3_mid=0.0, style_3_up=0.0, style_4_down=0.0, style_4_mid=0.0, style_4_up=0.0, seed=None, progress=gr.Progress()): # prompt += ", shot with a mirrorless, 35mm, photography, real, 8k, photorealistic, " prompt += "best quality, HD, ~*~aesthetic~*~" np.random.seed(seed) torch.manual_seed(seed) progress(0, desc="Thinking...", total=int(inference_steps)) if enable_freeu: pipe.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2) else: pipe.disable_freeu() if exposure != 0.0 and enable_stylation: pipe.enable_lora() adapter_weight_scales_ENGAGE = {"unet": {"down": style_1_down, "mid": style_1_mid, "up": style_1_up}} adapter_weight_scales_FILM = {"unet": {"down": style_2_down, "mid": style_2_mid, "up": style_2_up}} adapter_weight_scales_MJ = {"unet": {"down": style_3_down, "mid": style_3_mid, "up": style_3_up}} adapter_weight_scales_MORE_ART = {"unet": {"down": style_4_down, "mid": style_4_mid, "up": style_4_up}} pipe.set_adapters(["ENGAGE_LORA", "FILM_LORA", "MJ_LORA", "MORE_ART_LORA"], [adapter_weight_scales_ENGAGE, adapter_weight_scales_FILM, adapter_weight_scales_MJ, adapter_weight_scales_MORE_ART]) else: pipe.set_adapters(["ENGAGE_LORA", "FILM_LORA", "MJ_LORA", "MORE_ART_LORA"], adapter_weights=[0.0, 0.0, 0.0, 0.0]) pipe.disable_lora() images = run_text_to_image(prompt=prompt, neg_prompt=neg_prompt, num_images=num_images, inference_steps=inference_steps, height=height, width=width, guidance_scale=guidance_scale, guidance_rescale=guidance_rescale, condition_scale=condition_scale, progress=progress, exposure=exposure) for idx, im in enumerate(images): im = np.asarray(im).copy() im[-88:, :700] = im[-88:, :700] * (1 - engage_logo_mask) + engage_logo_np images[idx] = Image.fromarray(np.clip(im.astype('uint8'), 0, 255)) user_state["IMAGE_GALLERY"] += images return user_state["IMAGE_GALLERY"], user_state theme = gr.themes.Base( primary_hue="neutral", radius_size="none", ).set( body_text_color_dark='*neutral_800', embed_radius='*radius_xxs', button_primary_background_fill='*primary_700', button_primary_background_fill_hover='*primary_400', button_primary_background_fill_hover_dark='*primary_400', button_primary_border_color_dark='*primary_200', button_primary_text_color='*primary_50', button_primary_text_color_dark='*primary_50', button_primary_text_color_hover='*primary_50' ) with gr.Blocks(theme=theme) as engage_automotive_lora_demo: session_state = gr.State(value={"IMAGE_GALLERY": [], "SELECTED_IMAGE": None }) diffused_image_out = gr.Gallery(label='Results', show_label=False, columns=[3], rows=[1], object_fit="contain", height="auto", format="png") with gr.Group(): with gr.Row(): prompt_box = gr.Textbox("futuristic dark red car in a white studio", label='Prompt') generate_button = gr.Button("Generate", scale=0) with gr.Row(): settings_dropdown = gr.Dropdown( ["Text to image", "From sketch", "Inpaint", "Inpaint sketch"], value="Text to image", label="Mode", info="Text to image, prompt only. " "From sketch, upload an initial image / sketch in the image editor. " "Inpaint sketch, edits the chosen area of an image. Uses the initial " "image as base for sketches." ) with gr.Accordion("Image Editor", open=False): condition_image = gr.ImageEditor(type='pil', show_label=False, brush=gr.Brush(colors=["#000000"], color_mode="fixed")) with gr.Row(): with gr.Accordion("Settings", open=False): neg_prompt_box = gr.Textbox( "blurry, poor quality, unrealistic", label='Negative Prompt') seed_box = gr.Number(42, label='Seed') inference_steps = gr.Slider(0, 20, value=8, label='Inference Steps', step=1) num_images = gr.Slider(1, 3, value=2, label='Number of Images', step=1) guidance_scale = gr.Slider(0, 10, value=1.5, label='Guidance Scale', step=0.1) guidance_rescale = gr.Slider(0.0, 1.0, value=0.0, label='Guidance Rescale', step=0.1) height = gr.Slider(128, 2048, value=1024, label='Image Height', step=64) width = gr.Slider(128, 2048, value=1024, label='Image Width', step=64) condition_influence = gr.Slider(0.0, 1.0, value=0.5, label='Condition Influence') sketch_detail = gr.Slider(0.0, 1.0, value=0.5, label='Sketch Detail') sketch_softness = gr.Slider(0.0, 1.0, value=0.5, label='Sketch Softness') inpaint_strength = gr.Slider(0.0, 1.0, value=0.8, label='Inpaint Strength') enable_freeu = gr.Checkbox(True, label='FreeU', info='Enables FreeU scaling factors.') with gr.Accordion("Stylation (Experimental)", open=False): with gr.Row(): exposure = gr.Slider(-1.0, 1.0, value=0.0, label='Exposure') enable_stylation = gr.Checkbox(label='Enable Stylation', info='EXPERIMENTAL: We apologize for the ambiguity, ' 'please play around with the sliders to ' 'find a style you like!' 'Warning: Will slow down the generation time.') with gr.Accordion("Style A - Engage Studios Futuristic", open=False): style_A_down = gr.Slider(-1.0, 1.0, value=0.0, label='down') style_A_mid = gr.Slider(-1.0, 1.0, value=0.0, label='mid') style_A_up = gr.Slider(-1.0, 1.0, value=0.0, label='up') with gr.Accordion("Style B - Lighting", open=False): style_B_down = gr.Slider(-1.0, 1.0, value=0.0, label='down') style_B_mid = gr.Slider(-1.0, 1.0, value=0.0, label='mid') style_B_up = gr.Slider(-1.0, 1.0, value=0.0, label='up') with gr.Accordion("Style C - Details A", open=False): style_C_down = gr.Slider(-1.0, 1.0, value=0.0, label='down') style_C_mid = gr.Slider(-1.0, 1.0, value=0.0, label='mid') style_C_up = gr.Slider(-1.0, 1.0, value=0.0, label='up') with gr.Accordion("Style D - Details B", open=False): style_D_down = gr.Slider(-1.0, 1.0, value=0.0, label='down') style_D_mid = gr.Slider(-1.0, 1.0, value=0.0, label='mid') style_D_up = gr.Slider(-1.0, 1.0, value=0.0, label='up') generate_button.click(run_model, inputs=[session_state, condition_image, settings_dropdown, prompt_box, neg_prompt_box, inference_steps, num_images, guidance_scale, guidance_rescale, enable_freeu, height, width, condition_influence, sketch_detail, sketch_softness, inpaint_strength, exposure, enable_stylation, style_A_down, style_A_mid, style_A_up, style_B_down, style_B_mid, style_B_up, style_C_down, style_C_mid, style_C_up, style_D_down, style_D_mid, style_D_up, seed_box], outputs=[diffused_image_out, session_state], show_progress=True) engage_automotive_lora_demo.launch()