#!/usr/bin/env python3
from diffusers import DiffusionPipeline
import torch
torch.backends.cudnn.deterministic = False
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
torch.backends.cudnn.benchmark = True
torch.backends.cuda.enable_flash_sdp(False)

# vae = AutoEncoderKL.from_pretrained("stabilityai/sdxl-vae", torch_dtype=torch.float16)
# base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,

# Reproducibility.
torch_seed = 4202420420
refiner_seed = 698008569
refiner_strength = 0.50
prompt = "happy child flying a kite on a sunny day"
negative_prompt = ''
# Batch size.
batch_size = 2
do_latent = True

# We're going to schedule 20 steps, and complete 50% of them using either model.
total_num_steps = 20
# We need multiple Generators.
generator = torch.Generator(device="cuda").manual_seed(torch_seed)

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
# Using channels last layout.
pipe.unet.to(memory_format=torch.channels_last)
pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR, 

pre_image = base_pipe(prompt=prompt, generator=generator,
         num_inference_steps=total_num_steps, negative_prompt=negative_prompt, num_images_per_prompt=batch_size, output_type="latent" if do_latent else "pil").images

# Generate a range from 0.1 to 0.9, with 0.1 increments.
test_strengths = [0.5]
for refiner_strength in test_strengths:
    # Generate a new set of random states for each image.
    generator_two = torch.Generator(device="cuda").manual_seed(refiner_seed)
    # Put through the refiner now.
    images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
                num_inference_steps=total_num_steps, num_images_per_prompt=batch_size, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
    for idx in range(0, len(images)):
        print(f'Image: {idx}')
        images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')