Commit
·
71da11e
1
Parent(s):
6ed2376
up
Browse files- run_bug_4297.py +52 -0
- run_local_xl.py +12 -6
- run_sd_compile.py +20 -0
- run_sd_xl.py +0 -2
- run_xl_ediffi.py +3 -4
- run_xl_lora.py +7 -0
run_bug_4297.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import DiffusionPipeline
|
3 |
+
import torch
|
4 |
+
torch.backends.cudnn.deterministic = False
|
5 |
+
torch.backends.cuda.matmul.allow_tf32 = False
|
6 |
+
torch.backends.cudnn.allow_tf32 = False
|
7 |
+
torch.backends.cudnn.benchmark = True
|
8 |
+
torch.backends.cuda.enable_flash_sdp(False)
|
9 |
+
|
10 |
+
base_pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-0.9", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
11 |
+
base_pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
|
12 |
+
#pipe.enable_model_cpu_offload()
|
13 |
+
|
14 |
+
# if using torch < 2.0
|
15 |
+
# pipe.enable_xformers_memory_efficient_attention()
|
16 |
+
|
17 |
+
# Reproducibility.
|
18 |
+
torch_seed = 4202420420
|
19 |
+
refiner_seed = 698008569
|
20 |
+
refiner_strength = 0.50
|
21 |
+
prompt = "happy child flying a kite on a sunny day"
|
22 |
+
negative_prompt = ''
|
23 |
+
# Batch size.
|
24 |
+
batch_size = 2
|
25 |
+
do_latent = False
|
26 |
+
prompt = [ prompt ] * batch_size
|
27 |
+
negative_prompt = [ negative_prompt ] * batch_size
|
28 |
+
# We're going to schedule 20 steps, and complete 50% of them using either model.
|
29 |
+
total_num_steps = 20
|
30 |
+
# We need multiple Generators.
|
31 |
+
generator = [ torch.Generator(device="cuda").manual_seed(torch_seed) ] * batch_size
|
32 |
+
|
33 |
+
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-0.9", vae=base_pipe.vae, text_encoder_2=base_pipe.text_encoder_2, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
34 |
+
# Using channels last layout.
|
35 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
36 |
+
pipe.to("cuda") # OR, pipe.enable_sequential_cpu_offload() OR,
|
37 |
+
|
38 |
+
# Generate the base image.
|
39 |
+
pre_image = base_pipe(prompt=prompt, generator=generator,
|
40 |
+
num_inference_steps=total_num_steps, negative_prompt=negative_prompt, output_type="latent" if do_latent else "pil").images
|
41 |
+
|
42 |
+
# Generate a range from 0.1 to 0.9, with 0.1 increments.
|
43 |
+
test_strengths = [0.2]
|
44 |
+
for refiner_strength in test_strengths:
|
45 |
+
# Generate a new set of random states for each image.
|
46 |
+
generator_two = [ torch.Generator(device="cuda").manual_seed(refiner_seed) ] * batch_size
|
47 |
+
# Put through the refiner now.
|
48 |
+
images = pipe(prompt=prompt, image=pre_image, aesthetic_score=10, negative_aesthetic_score=2.4, generator=generator_two,
|
49 |
+
num_inference_steps=total_num_steps, strength=refiner_strength, negative_prompt=negative_prompt).images # denoising_start
|
50 |
+
for idx in range(0, len(images)):
|
51 |
+
print(f'Image: {idx}')
|
52 |
+
images[idx].save(f'/home/patrick/images/refiner_bug/test-{refiner_strength}-{idx}--{batch_size}--{do_latent}.png', format='PNG')
|
run_local_xl.py
CHANGED
@@ -16,18 +16,24 @@ from io import BytesIO
|
|
16 |
api = HfApi()
|
17 |
start_time = time.time()
|
18 |
|
19 |
-
use_refiner = bool(int(sys.argv[1]))
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
vae = AutoencoderKL.from_pretrained(
|
23 |
if use_diffusers:
|
24 |
-
pipe = StableDiffusionXLPipeline.from_pretrained(
|
|
|
25 |
print(time.time() - start_time)
|
26 |
pipe.to("cuda")
|
27 |
|
28 |
if use_refiner:
|
29 |
start_time = time.time()
|
30 |
-
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
|
31 |
print(time.time() - start_time)
|
32 |
refiner.to("cuda")
|
33 |
# refiner.enable_sequential_cpu_offload()
|
@@ -49,7 +55,7 @@ steps = 20
|
|
49 |
seed = 0
|
50 |
seed_everything(seed)
|
51 |
start_time = time.time()
|
52 |
-
image = pipe(prompt=prompt, num_inference_steps=steps, output_type="pil").images[0]
|
53 |
print(time.time() - start_time)
|
54 |
|
55 |
if use_refiner:
|
|
|
16 |
api = HfApi()
|
17 |
start_time = time.time()
|
18 |
|
19 |
+
# use_refiner = bool(int(sys.argv[1]))
|
20 |
+
use_refiner = True
|
21 |
+
use_diffusers = True
|
22 |
+
path = "/home/patrick/sai/stable-diffusion-xl-base-1.0"
|
23 |
+
refiner_path = "/home/patrick/sai/stable-diffusion-xl-refiner-1.0"
|
24 |
+
vae_path = "/home/patrick/sai/stable-diffusion-xl-base-1.0/vae/"
|
25 |
+
vae_path = "/home/patrick/sai/sdxl-vae"
|
26 |
|
27 |
+
vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=torch.float16, force_upcast=True)
|
28 |
if use_diffusers:
|
29 |
+
# pipe = StableDiffusionXLPipeline.from_pretrained(path, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
30 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(path, torch_dtype=torch.float16, vae=vae, variant="fp16", use_safetensors=True, local_files_only=True)
|
31 |
print(time.time() - start_time)
|
32 |
pipe.to("cuda")
|
33 |
|
34 |
if use_refiner:
|
35 |
start_time = time.time()
|
36 |
+
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(refiner_path, vae=vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
37 |
print(time.time() - start_time)
|
38 |
refiner.to("cuda")
|
39 |
# refiner.enable_sequential_cpu_offload()
|
|
|
55 |
seed = 0
|
56 |
seed_everything(seed)
|
57 |
start_time = time.time()
|
58 |
+
image = pipe(prompt=prompt, num_inference_steps=steps, output_type="latent" if use_refiner else "pil").images[0]
|
59 |
print(time.time() - start_time)
|
60 |
|
61 |
if use_refiner:
|
run_sd_compile.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import StableDiffusionPipeline
|
3 |
+
import torch
|
4 |
+
|
5 |
+
path = "runwayml/stable-diffusion-v1-5"
|
6 |
+
|
7 |
+
run_compile = True # Set True / False
|
8 |
+
|
9 |
+
pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
10 |
+
pipe = pipe.to("cuda:0")
|
11 |
+
pipe.unet.to(memory_format=torch.channels_last)
|
12 |
+
|
13 |
+
if run_compile:
|
14 |
+
print("Run torch compile")
|
15 |
+
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
|
16 |
+
|
17 |
+
prompt = "ghibli style, a fantasy landscape with castles"
|
18 |
+
|
19 |
+
for _ in range(3):
|
20 |
+
images = pipe(prompt=prompt).images
|
run_sd_xl.py
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
from diffusers import StableDiffusionXLPipeline
|
|
|
|
|
|
run_xl_ediffi.py
CHANGED
@@ -18,15 +18,14 @@ from torch.nn.functional import fractional_max_pool2d_with_indices
|
|
18 |
api = HfApi()
|
19 |
start_time = time.time()
|
20 |
|
21 |
-
model_id = "
|
22 |
scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
|
23 |
|
24 |
-
|
25 |
-
pipe_high_noise = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
26 |
# pipe_high_noise.scheduler = scheduler
|
27 |
pipe_high_noise.to("cuda")
|
28 |
|
29 |
-
pipe_low_noise = DiffusionPipeline.from_pretrained("
|
30 |
# pipe_low_noise.scheduler = scheduler
|
31 |
pipe_low_noise.to("cuda")
|
32 |
|
|
|
18 |
api = HfApi()
|
19 |
start_time = time.time()
|
20 |
|
21 |
+
model_id = "/home/patrick/stable-diffusion-xl-base-1.0/"
|
22 |
scheduler = DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler")
|
23 |
|
24 |
+
pipe_high_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-base-1.0/", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, local_files_only=True)
|
|
|
25 |
# pipe_high_noise.scheduler = scheduler
|
26 |
pipe_high_noise.to("cuda")
|
27 |
|
28 |
+
pipe_low_noise = DiffusionPipeline.from_pretrained("/home/patrick/stable-diffusion-xl-refiner-1.0/", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
|
29 |
# pipe_low_noise.scheduler = scheduler
|
30 |
pipe_low_noise.to("cuda")
|
31 |
|
run_xl_lora.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
from diffusers import DiffusionPipeline
|
3 |
+
import torch
|
4 |
+
|
5 |
+
pipe = DiffusionPipeline.from_pretrained("/home/patrick/sai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
|
6 |
+
pipe.load_lora_weights("/home/patrick/sai/stable-diffusion-xl-base-1.0/sd_xl_offset_example-lora_1.0.safetensors")
|
7 |
+
import ipdb; ipdb.set_trace()
|