Spaces:

radames
/

Real-Time-Latent-Consistency-Model

Running on A100

App Files Files Community

Real-Time-Latent-Consistency-Model / server /pipelines /img2imgSDXLTurbo.py

radames

use pruna for quantization

e5edfc8 10 days ago

raw

history blame contribute delete

7.18 kB

	from diffusers import (
	AutoPipelineForImage2Image,
	AutoencoderTiny,
	)
	from compel import Compel, ReturnedEmbeddingsType
	import torch

	try:
	import intel_extension_for_pytorch as ipex # type: ignore
	except:
	pass

	import psutil
	from config import Args
	from pydantic import BaseModel, Field
	from PIL import Image
	from util import ParamsModel
	import math

	from pruna import smash, SmashConfig
	from pruna.telemetry import set_telemetry_metrics

	set_telemetry_metrics(False) # disable telemetry for current session
	set_telemetry_metrics(False, set_as_default=True) # disable telemetry globally


	base_model = "stabilityai/sdxl-turbo"
	taesd_model = "madebyollin/taesdxl"

	default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
	default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
	page_content = """
	<h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
	<h3 class="text-xl font-bold">Image-to-Image</h3>
	<p class="text-sm">
	This demo showcases
	<a
	href="https://huggingface.co/stabilityai/sdxl-turbo"
	target="_blank"
	class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
	Image to Image pipeline using
	<a
	href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
	target="_blank"
	class="text-blue-500 underline hover:no-underline">Diffusers</a
	> with a MJPEG stream server.
	</p>
	<p class="text-sm text-gray-500">
	Change the prompt to generate different images, accepts <a
	href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
	target="_blank"
	class="text-blue-500 underline hover:no-underline">Compel</a
	> syntax.
	</p>
	"""


	class Pipeline:
	class Info(BaseModel):
	name: str = "img2img"
	title: str = "Image-to-Image SDXL"
	description: str = "Generates an image from a text prompt"
	input_mode: str = "image"
	page_content: str = page_content

	class InputParams(ParamsModel):
	prompt: str = Field(
	default_prompt,
	title="Prompt",
	field="textarea",
	id="prompt",
	)
	negative_prompt: str = Field(
	default_negative_prompt,
	title="Negative Prompt",
	field="textarea",
	id="negative_prompt",
	hide=True,
	)
	seed: int = Field(
	2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
	)
	steps: int = Field(
	1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
	)
	width: int = Field(
	768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
	)
	height: int = Field(
	768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
	)
	guidance_scale: float = Field(
	1.0,
	min=0,
	max=1,
	step=0.001,
	title="Guidance Scale",
	field="range",
	hide=True,
	id="guidance_scale",
	)
	strength: float = Field(
	0.5,
	min=0.25,
	max=1.0,
	step=0.001,
	title="Strength",
	field="range",
	hide=True,
	id="strength",
	)

	def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
	base_pipe = AutoPipelineForImage2Image.from_pretrained(
	base_model,
	safety_checker=None,
	)
	self.pipe = None
	if args.taesd:
	self.pipe.vae = AutoencoderTiny.from_pretrained(
	taesd_model, torch_dtype=torch_dtype, use_safetensors=True
	).to(device)

	if args.sfast:
	from sfast.compilers.stable_diffusion_pipeline_compiler import (
	compile,
	CompilationConfig,
	)

	config = CompilationConfig.Default()
	config.enable_xformers = True
	config.enable_triton = True
	config.enable_cuda_graph = True
	self.pipe = compile(self.pipe, config=config)

	if device.type != "mps":
	self.pipe.unet.to(memory_format=torch.channels_last)

	if args.pruna:
	# Create and smash your model
	smash_config = SmashConfig()
	smash_config["cacher"] = "deepcache"
	smash_config["compiler"] = "stable_fast"
	self.pipe = smash(model=base_pipe, smash_config=smash_config)

	if args.torch_compile:
	print("Running torch compile")
	self.pipe.unet = torch.compile(
	self.pipe.unet, mode="reduce-overhead", fullgraph=True
	)
	self.pipe.vae = torch.compile(
	self.pipe.vae, mode="reduce-overhead", fullgraph=True
	)
	self.pipe(
	prompt="warmup",
	image=[Image.new("RGB", (768, 768))],
	)

	if args.compel:
	self.pipe.compel_proc = Compel(
	tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
	text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
	returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
	requires_pooled=[False, True],
	)

	self.pipe.set_progress_bar_config(disable=True)
	self.pipe.to(device=device, dtype=torch_dtype)

	def predict(self, params: "Pipeline.InputParams") -> Image.Image:
	generator = torch.manual_seed(params.seed)
	prompt = params.prompt
	negative_prompt = params.negative_prompt
	prompt_embeds = None
	pooled_prompt_embeds = None
	negative_prompt_embeds = None
	negative_pooled_prompt_embeds = None
	if hasattr(self.pipe, "compel_proc"):
	_prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
	[params.prompt, params.negative_prompt]
	)
	prompt = None
	negative_prompt = None
	prompt_embeds = _prompt_embeds[0:1]
	pooled_prompt_embeds = pooled_prompt_embeds[0:1]
	negative_prompt_embeds = _prompt_embeds[1:2]
	negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]

	steps = params.steps
	strength = params.strength
	if int(steps * strength) < 1:
	steps = math.ceil(1 / max(0.10, strength))

	results = self.pipe(
	image=params.image,
	prompt=prompt,
	negative_prompt=negative_prompt,
	prompt_embeds=prompt_embeds,
	pooled_prompt_embeds=pooled_prompt_embeds,
	negative_prompt_embeds=negative_prompt_embeds,
	negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
	generator=generator,
	strength=strength,
	num_inference_steps=steps,
	guidance_scale=params.guidance_scale,
	width=params.width,
	height=params.height,
	output_type="pil",
	)

	return results.images[0]