patrickvonplaten commited on
Commit
71da11e
·
1 Parent(s): 6ed2376
Files changed (6) hide show
  1. run_bug_4297.py +52 -0
  2. run_local_xl.py +12 -6
  3. run_sd_compile.py +20 -0
  4. run_sd_xl.py +0 -2
  5. run_xl_ediffi.py +3 -4
  6. run_xl_lora.py +7 -0
run_bug_4297.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DiffusionPipeline
3
+ import torch
4
+ torch.backends.cudnn.deterministic = False
5
+ torch.backends.cuda.matmul.allow_tf32 = False
6
+ torch.backends.cudnn.allow_tf32 = False
7
+ torch.backends.cudnn.benchmark = True
8
+ torch.backends.cuda.enable_flash_sdp(False)
9
+
10
+ base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
11
+ base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
12
+ #pipe.enable_model_cpu_offload()
13
+
14
+ # if using torch < 2.0
15
+ # pipe.enable_xformers_memory_efficient_attention()
16
+
17
+ # Reproducibility.
18
+ torch_seed = 4202420420
19
+ refiner_seed = 698008569
20
+ refiner_strength = 0.50
21
+ prompt = "happy child flying a kite on a sunny day"
22
+ negative_prompt = ''
23
+ # Batch size.
24
+ batch_size = 2
25
+ do_latent = False
26
+ prompt = [ prompt ] * batch_size
27
+ negative_prompt = [ negative_prompt ] * batch_size
28
+ # We're going to schedule 20 steps, and complete 50% of them using either model.
29
+ total_num_steps = 20
30
+ # We need multiple Generators.
31
+ generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size
32
+
33
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
34
+ # Using channels last layout.
35
+ pipe.unet.to(memory_format=torch.channels_last)
36
+ pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
37
+
38
+ # Generate the base image.
39
+ pre_image = base_pipe(prompt=prompt, generator=generator,
40
+ num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images
41
+
42
+ # Generate a range from 0.1 to 0.9, with 0.1 increments.
43
+ test_strengths = [0.2]
44
+ for refiner_strength in test_strengths:
45
+ # Generate a new set of random states for each image.
46
+ generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size
47
+ # Put through the refiner now.
48
+ images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
49
+ num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
50
+ for idx in range(0, len(images)):
51
+ print(f'Image: {idx}')
52
+ images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')
run_local_xl.py CHANGED
@@ -16,18 +16,24 @@ from io import BytesIO
16
  api = HfApi()
17
  start_time = time.time()
18
 
19
- use_refiner = bool(int(sys.argv[1]))
20
- use_diffusers = False
 
 
 
 
 
21
 
22
- vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, force_upcast=True)
23
  if use_diffusers:
24
- pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
 
25
  print(time.time() - start_time)
26
  pipe.to("cuda")
27
 
28
  if use_refiner:
29
  start_time = time.time()
30
- refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
31
  print(time.time() - start_time)
32
  refiner.to("cuda")
33
  # refiner.enable_sequential_cpu_offload()
@@ -49,7 +55,7 @@ steps = 20
49
  seed = 0
50
  seed_everything(seed)
51
  start_time = time.time()
52
- image = pipe(prompt=prompt, num_inference_steps=steps, output_type="pil").images[0]
53
  print(time.time() - start_time)
54
 
55
  if use_refiner:
 
16
  api = HfApi()
17
  start_time = time.time()
18
 
19
+ # use_refiner = bool(int(sys.argv[1]))
20
+ use_refiner = True
21
+ use_diffusers = True
22
+ path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
23
+ refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
24
+ vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
25
+ vae_path = "/home/patrick/sai/sdxl-vae"
26
 
27
+ vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
28
  if use_diffusers:
29
+ # pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
30
+ pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
31
  print(time.time() - start_time)
32
  pipe.to("cuda")
33
 
34
  if use_refiner:
35
  start_time = time.time()
36
+ refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(refiner_path, vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
37
  print(time.time() - start_time)
38
  refiner.to("cuda")
39
  # refiner.enable_sequential_cpu_offload()
 
55
  seed = 0
56
  seed_everything(seed)
57
  start_time = time.time()
58
+ image = pipe(prompt=prompt, num_inference_steps=steps, output_type="latent" if use_refiner else "pil").images[0]
59
  print(time.time() - start_time)
60
 
61
  if use_refiner:
run_sd_compile.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import StableDiffusionPipeline
3
+ import torch
4
+
5
+ path = "runwayml/stable-diffusion-v1-5"
6
+
7
+ run_compile = True # Set True / False
8
+
9
+ pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
10
+ pipe = pipe.to("cuda:0")
11
+ pipe.unet.to(memory_format=torch.channels_last)
12
+
13
+ if run_compile:
14
+ print("Run torch compile")
15
+ pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
16
+
17
+ prompt = "ghibli style, a fantasy landscape with castles"
18
+
19
+ for _ in range(3):
20
+ images = pipe(prompt=prompt).images
run_sd_xl.py DELETED
@@ -1,2 +0,0 @@
1
- #!/usr/bin/env python3
2
- from diffusers import StableDiffusionXLPipeline
 
 
 
run_xl_ediffi.py CHANGED
@@ -18,15 +18,14 @@ from torch.nn.functional import fractional_max_pool2d_with_indices
18
  api = HfApi()
19
  start_time = time.time()
20
 
21
- model_id = "stabilityai/stable-diffusion-xl-base-0.9"
22
  scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
23
 
24
- model_id = "stabilityai/stable-diffusion-xl-base-0.9"
25
- pipe_high_noise = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
26
  # pipe_high_noise.scheduler = scheduler
27
  pipe_high_noise.to("cuda")
28
 
29
- pipe_low_noise = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
30
  # pipe_low_noise.scheduler = scheduler
31
  pipe_low_noise.to("cuda")
32
 
 
18
  api = HfApi()
19
  start_time = time.time()
20
 
21
+ model_id = "/home/patrick/stable-diffusion-xl-base-1.0/"
22
  scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
23
 
24
+ pipe_high_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-base-1.0/", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
 
25
  # pipe_high_noise.scheduler = scheduler
26
  pipe_high_noise.to("cuda")
27
 
28
+ pipe_low_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-refiner-1.0/", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
29
  # pipe_low_noise.scheduler = scheduler
30
  pipe_low_noise.to("cuda")
31
 
run_xl_lora.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DiffusionPipeline
3
+ import torch
4
+
5
+ pipe = DiffusionPipeline.from_pretrained("/home/patrick/sai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
6
+ pipe.load_lora_weights("/home/patrick/sai/stable-diffusion-xl-base-1.0/sd_xl_offset_example-lora_1.0.safetensors")
7
+ import ipdb; ipdb.set_trace()