#!/usr/bin/env python3 from diffusers import DiffusionPipeline import torch torch.backends.cudnn.deterministic = False torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cudnn.allow_tf32 = False torch.backends.cudnn.benchmark = True torch.backends.cuda.enable_flash_sdp(False) # vae = AutoEncoderKL.from_pretrained("stabilityai/sdxl-vae", torch_dtype=torch.float16) # base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16") base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16") base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, # Reproducibility. torch_seed = 4202420420 refiner_seed = 698008569 refiner_strength = 0.50 prompt = "happy child flying a kite on a sunny day" negative_prompt = '' # Batch size. batch_size = 2 do_latent = True # We're going to schedule 20 steps, and complete 50% of them using either model. total_num_steps = 20 # We need multiple Generators. generator = torch.Generator(device="cuda").manual_seed(torch_seed) pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16") # Using channels last layout. pipe.unet.to(memory_format=torch.channels_last) pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, pre_image = base_pipe(prompt=prompt, generator=generator, num_inference_steps=total_num_steps, negative_prompt=negative_prompt, num_images_per_prompt=batch_size, output_type="latent" if do_latent else "pil").images # Generate a range from 0.1 to 0.9, with 0.1 increments. test_strengths = [0.5] for refiner_strength in test_strengths: # Generate a new set of random states for each image. generator_two = torch.Generator(device="cuda").manual_seed(refiner_seed) # Put through the refiner now. images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two, num_inference_steps=total_num_steps, num_images_per_prompt=batch_size, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start for idx in range(0, len(images)): print(f'Image: {idx}') images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')