from __future__ import annotations import math import random import gradio as gr import numpy as np import torch from PIL import Image from diffusers import StableDiffusionXLImg2ImgPipeline, EDMEulerScheduler, AutoencoderKL from huggingface_hub import hf_hub_download vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe_edit = StableDiffusionXLImg2ImgPipeline.from_single_file( hf_hub_download(repo_id="stabilityai/cosxl", filename="cosxl_edit.safetensors"), num_in_channels=8, is_cosxl_edit=True, vae=vae, torch_dtype=torch.float16, ) pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction") pipe_edit.to("cuda") refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-refiner-1.0", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16" ) refiner.to("cuda") def set_timesteps_patched(self, num_inference_steps: int, device=None): self.num_inference_steps = num_inference_steps ramp = np.linspace(0, 1, self.num_inference_steps) sigmas = torch.linspace(math.log(self.config.sigma_min), math.log(self.config.sigma_max), len(ramp)).exp().flip(0) sigmas = sigmas.to(dtype=torch.float32, device=device) self.timesteps = self.precondition_noise(sigmas) self.sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)]) self._step_index = None self._begin_index = None self.sigmas = self.sigmas.to("cpu") EDMEulerScheduler.set_timesteps = set_timesteps_patched def king(input_image, instruction: str, negative_prompt: str = "", steps: int = 25, randomize_seed: bool = True, seed: int = 2404, guidance_scale: float = 6, progress=gr.Progress(track_tqdm=True)): input_image = Image.open(input_image).convert('RGB') if randomize_seed: seed = random.randint(0, 999999) generator = torch.manual_seed(seed) output_image = pipe_edit( instruction, negative_prompt=negative_prompt, image=input_image, guidance_scale=guidance_scale, image_guidance_scale=1.5, width=input_image.width, height=input_image.height, num_inference_steps=steps, generator=generator, output_type="latent", ).images refine = refiner( prompt=f"{instruction}, 4k, hd, high quality, masterpiece", negative_prompt=negative_prompt, guidance_scale=7.5, num_inference_steps=steps, image=output_image, generator=generator, ).images[0] return seed, refine css = ''' .gradio-container{max-width: 700px !important} h1{text-align:center} footer { visibility: hidden } ''' examples = [ ["./supercar.png", "make it red"], ["./red_car.png", "add some snow"], ] with gr.Blocks(css=css) as demo: gr.Markdown("# Image Editing\n### Note: First image generation takes time") with gr.Row(): instruction = gr.Textbox(lines=1, label="Instruction", interactive=True) generate_button = gr.Button("Run", scale=0) with gr.Row(): input_image = gr.Image(label="Image", type='filepath', interactive=True) with gr.Row(): guidance_scale = gr.Number(value=6.0, step=0.1, label="Guidance Scale", interactive=True) steps = gr.Number(value=25, step=1, label="Steps", interactive=True) with gr.Accordion("Advanced options", open=False): with gr.Row(): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, ugly, disgusting, blurry, amputation,(face asymmetry, eyes asymmetry, deformed eyes, open mouth)", visible=True ) with gr.Row(): randomize_seed = gr.Checkbox(label="Randomize Seed", value=True, interactive=True) seed = gr.Number(value=2404, step=1, label="Seed", interactive=True) gr.Examples( examples=examples, inputs=[input_image, instruction], outputs=[input_image], cache_examples=False, ) generate_button.click( king, inputs=[input_image, instruction, negative_prompt, steps, randomize_seed, seed, guidance_scale], outputs=[seed, input_image], ) demo.queue(max_size=500).launch()