from diffusers import ( AutoPipelineForImage2Image, AutoencoderTiny, ) from compel import Compel, ReturnedEmbeddingsType import torch try: import intel_extension_for_pytorch as ipex # type: ignore except: pass import psutil from config import Args from pydantic import BaseModel, Field from PIL import Image from util import ParamsModel import math from pruna import smash, SmashConfig from pruna.telemetry import set_telemetry_metrics set_telemetry_metrics(False) # disable telemetry for current session set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally base_model = "stabilityai/sdxl-turbo" taesd_model = "madebyollin/taesdxl" default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux" default_negative_prompt = "blurry, low quality, render, 3D, oversaturated" page_content = """
This demo showcases SDXL Turbo Image to Image pipeline using Diffusers with a MJPEG stream server.
Change the prompt to generate different images, accepts Compel syntax.
""" class Pipeline: class Info(BaseModel): name: str = "img2img" title: str = "Image-to-Image SDXL" description: str = "Generates an image from a text prompt" input_mode: str = "image" page_content: str = page_content class InputParams(ParamsModel): prompt: str = Field( default_prompt, title="Prompt", field="textarea", id="prompt", ) negative_prompt: str = Field( default_negative_prompt, title="Negative Prompt", field="textarea", id="negative_prompt", hide=True, ) seed: int = Field( 2159232, min=0, title="Seed", field="seed", hide=True, id="seed" ) steps: int = Field( 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps" ) width: int = Field( 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width" ) height: int = Field( 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height" ) guidance_scale: float = Field( 1.0, min=0, max=1, step=0.001, title="Guidance Scale", field="range", hide=True, id="guidance_scale", ) strength: float = Field( 0.5, min=0.25, max=1.0, step=0.001, title="Strength", field="range", hide=True, id="strength", ) def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype): base_pipe = AutoPipelineForImage2Image.from_pretrained( base_model, safety_checker=None, ) self.pipe = None if args.taesd: self.pipe.vae = AutoencoderTiny.from_pretrained( taesd_model, torch_dtype=torch_dtype, use_safetensors=True ).to(device) if args.sfast: from sfast.compilers.stable_diffusion_pipeline_compiler import ( compile, CompilationConfig, ) config = CompilationConfig.Default() config.enable_xformers = True config.enable_triton = True config.enable_cuda_graph = True self.pipe = compile(self.pipe, config=config) if device.type != "mps": self.pipe.unet.to(memory_format=torch.channels_last) if args.pruna: # Create and smash your model smash_config = SmashConfig() smash_config["cacher"] = "deepcache" smash_config["compiler"] = "stable_fast" self.pipe = smash(model=base_pipe, smash_config=smash_config) if args.torch_compile: print("Running torch compile") self.pipe.unet = torch.compile( self.pipe.unet, mode="reduce-overhead", fullgraph=True ) self.pipe.vae = torch.compile( self.pipe.vae, mode="reduce-overhead", fullgraph=True ) self.pipe( prompt="warmup", image=[Image.new("RGB", (768, 768))], ) if args.compel: self.pipe.compel_proc = Compel( tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2], text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True], ) self.pipe.set_progress_bar_config(disable=True) self.pipe.to(device=device, dtype=torch_dtype) def predict(self, params: "Pipeline.InputParams") -> Image.Image: generator = torch.manual_seed(params.seed) prompt = params.prompt negative_prompt = params.negative_prompt prompt_embeds = None pooled_prompt_embeds = None negative_prompt_embeds = None negative_pooled_prompt_embeds = None if hasattr(self.pipe, "compel_proc"): _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc( [params.prompt, params.negative_prompt] ) prompt = None negative_prompt = None prompt_embeds = _prompt_embeds[0:1] pooled_prompt_embeds = pooled_prompt_embeds[0:1] negative_prompt_embeds = _prompt_embeds[1:2] negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2] steps = params.steps strength = params.strength if int(steps * strength) < 1: steps = math.ceil(1 / max(0.10, strength)) results = self.pipe( image=params.image, prompt=prompt, negative_prompt=negative_prompt, prompt_embeds=prompt_embeds, pooled_prompt_embeds=pooled_prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, generator=generator, strength=strength, num_inference_steps=steps, guidance_scale=params.guidance_scale, width=params.width, height=params.height, output_type="pil", ) return results.images[0]