import torch from PIL import Image from diffusers import AutoencoderKL vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae").to("mps:0") def pil_to_latent(input_im): # Single image -> single latent in a batch (so size 1, 4, 64, 64) with torch.no_grad(): latent = vae.encode(tfms.ToTensor()(input_im).unsqueeze(0).to(torch_device)*2-1) # Note scaling return 0.18215 * latent.latent_dist.sample() def latents_to_pil(latents, torch_device="mps:0"): # bath of latents -> list of images latents = (1 / 0.18215) * latents with torch.no_grad(): image = vae.decode(latents.to(torch_device)).sample image = (image / 2 + 0.5).clamp(0, 1) image = image.detach().cpu().permute(0, 2, 3, 1).numpy() images = (image * 255).round().astype("uint8") pil_images = [Image.fromarray(image) for image in images] return pil_images def load_embedding_bin(path): return torch.load(path) # Prep Scheduler def set_timesteps(scheduler, num_inference_steps): scheduler.set_timesteps(num_inference_steps) scheduler.timesteps = scheduler.timesteps.to(torch.float32) # minor fix to ensure MPS compatibility, fixed in diffusers PR 3925