patrickvonplaten commited on
Commit
c8b957a
·
2 Parent(s): 56fb00e 777e804

Merge branch 'main' of https://huggingface.co/diffusers/tools

Browse files
Files changed (9) hide show
  1. benchmark_llama.py +53 -0
  2. clear_mem.py +8 -6
  3. run_bug.py +32 -0
  4. run_lora_on_off.py +35 -0
  5. run_sd_lora.py +20 -0
  6. run_sd_sde.py +22 -0
  7. run_sdxl_cpu.py +25 -0
  8. run_xl.py +4 -2
  9. save_lora.py +62 -0
benchmark_llama.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import time
4
+ import torch
5
+
6
+ DEVICE = "cuda:1"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
9
+ model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
10
+ model.to(DEVICE)
11
+
12
+
13
+ # forward
14
+ print("Forward benchmarks")
15
+ print(50 * "=")
16
+
17
+ for batch_size in (1, 4, 16):
18
+ for input_seq in (4, 16, 256):
19
+ input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
20
+ attention_mask = torch.ones_like(input_ids)
21
+ attention_mask[0, 3] = 0
22
+
23
+ times = []
24
+ for _ in range(3):
25
+ start_time = time.time()
26
+ with torch.no_grad():
27
+ logits = model(input_ids=input_ids, attention_mask=attention_mask).logits
28
+ times.append(time.time() - start_time)
29
+
30
+ result = min(times)
31
+
32
+ print(f"Forward bsz={batch_size}, input_seq={input_seq}: {result}")
33
+
34
+
35
+ # generate
36
+ print("Generate benchmarks")
37
+ print(50 * "=")
38
+
39
+ for batch_size in (1, 16):
40
+ for input_seq in (4, 256):
41
+ input_ids = torch.ones((batch_size, input_seq), dtype=torch.long, device=DEVICE)
42
+ attention_mask = torch.ones_like(input_ids)
43
+ attention_mask[0, 3] = 0
44
+
45
+ times = []
46
+ for _ in range(3):
47
+ start_time = time.time()
48
+ out = model.generate(input_ids=input_ids, max_new_tokens=256, do_sample=False)
49
+ times.append(time.time() - start_time)
50
+
51
+ result = min(times)
52
+
53
+ print(f"Generate bsz={batch_size}, input_seq={input_seq}: {result}")
clear_mem.py CHANGED
@@ -1,10 +1,11 @@
1
  #!/usr/bin/env python3
2
  import torch
3
  import gc
 
4
 
5
- shape = (10,000)
6
 
7
- input = torch.ones((shape, shape), device="cuda")
8
 
9
 
10
  def clear_memory(model):
@@ -14,8 +15,9 @@ def clear_memory(model):
14
  torch.cuda.ipc_collect()
15
  torch.clear_autocast_cache()
16
 
17
- for _ in range(6):
18
- linear = torch.nn.Linear(shape, shape).to("cuda")
19
- output = linear(input)
 
20
 
21
- clear_memory(linear)
 
1
  #!/usr/bin/env python3
2
  import torch
3
  import gc
4
+ from diffusers import DiffusionPipeline
5
 
6
+ shape = (30_000, 30_000)
7
 
8
+ input = torch.randn(shape, device="cuda")
9
 
10
 
11
  def clear_memory(model):
 
15
  torch.cuda.ipc_collect()
16
  torch.clear_autocast_cache()
17
 
18
+ for _ids in ["runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5", "CompVis/stable-diffusion-v1-4", "runwayml/stable-diffusion-v1-5"]:
19
+ pipe = DiffusionPipeline.from_pretrained(_ids, use_safetensors=True).to("cuda")
20
+ pipe("hey", num_inference_steps=1)
21
+ print("finished...")
22
 
23
+ clear_memory(pipe)
run_bug.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+
4
+ from diffusers import AutoencoderKL, StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, KDPM2AncestralDiscreteScheduler
5
+
6
+ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix",
7
+ torch_dtype=torch.float16
8
+ )
9
+
10
+ base = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0",
11
+ vae=vae,
12
+ torch_dtype=torch.float16,
13
+ variant="fp16",
14
+ use_safetensors=True
15
+ )
16
+
17
+ scheduler = KDPM2AncestralDiscreteScheduler.from_config(base.scheduler.config, use_karras_sigmas=True)
18
+ base.scheduler = scheduler
19
+
20
+ base.to("cuda")
21
+
22
+ def print_step(s, t, latents):
23
+ print(s)
24
+
25
+ generator=torch.manual_seed(1111)
26
+
27
+ images = base(
28
+ prompt="LOVE",
29
+ num_inference_steps=10,
30
+ generator=generator,
31
+ callback=print_step
32
+ ).images
run_lora_on_off.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ #@title Fuse/unfuse LoRAs sequentially leading to trouble
3
+ import torch
4
+ from diffusers import StableDiffusionXLPipeline
5
+
6
+ pipe = StableDiffusionXLPipeline.from_pretrained(
7
+ "stabilityai/stable-diffusion-xl-base-1.0",
8
+ torch_dtype=torch.float16,
9
+ variant="fp16",
10
+ ).to("cuda")
11
+
12
+ pipe.load_lora_weights("Pclanglais/TintinIA")
13
+ pipe.fuse_lora()
14
+ images = pipe("a mecha robot", num_inference_steps=2)
15
+ pipe.unfuse_lora()
16
+ pipe.unload_lora_weights()
17
+
18
+ pipe.load_lora_weights("ProomptEngineer/pe-balloon-diffusion-style")
19
+ pipe.fuse_lora()
20
+ images = pipe("a mecha robot", num_inference_steps=2)
21
+ pipe.unfuse_lora()
22
+ pipe.unload_lora_weights()
23
+
24
+ pipe.load_lora_weights("ostris/crayon_style_lora_sdxl")
25
+ pipe.fuse_lora()
26
+ images = pipe("a mecha robot", num_inference_steps=2)
27
+ pipe.unfuse_lora()
28
+ pipe.unload_lora_weights()
29
+
30
+
31
+ pipe.load_lora_weights("joachimsallstrom/aether-cloud-lora-for-sdxl")
32
+ pipe.fuse_lora()
33
+ images = pipe("a mecha robot", num_inference_steps=2)
34
+ pipe.unfuse_lora()
35
+ pipe.unload_lora_weights()
run_sd_lora.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DiffusionPipeline, AutoencoderKL
3
+ import hf_image_uploader as hiu
4
+ import torch
5
+
6
+ vae = AutoencoderKL.from_pretrained(
7
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
8
+ )
9
+ pipe = DiffusionPipeline.from_pretrained(
10
+ "stabilityai/stable-diffusion-xl-base-1.0",
11
+ variant="fp16",
12
+ vae=vae,
13
+ torch_dtype=torch.float16,
14
+ ).to("cuda")
15
+
16
+ pipe.load_lora_weights("rvorias/m_test")
17
+ # pipe.enable_xformers_memory_efficient_attention()
18
+
19
+ image = pipe("a photo of a pikachu pixel art", generator=torch.manual_seed(66)).images[0]
20
+ hiu.upload(image, "patrickvonplaten/images")
run_sd_sde.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline, DPMSolverSDEScheduler
3
+
4
+ path = "hf-internal-testing/tiny-stable-diffusion-xl-pipe"
5
+
6
+ pipe = StableDiffusionXLPipeline.from_pretrained(path)
7
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, algorithm_type="sde-dpmsolver++")
8
+
9
+ prompt = "An astronaut riding a green horse on Mars"
10
+ steps = 20
11
+
12
+ for i in range(2):
13
+ width = 32 * (i + 1)
14
+ height = 32 * (i + 1)
15
+ image = pipe(prompt=prompt, width=width, height=height, num_inference_steps=steps).images[0]
16
+
17
+ pipe.scheduler = DPMSolverSDEScheduler.from_config(pipe.scheduler.config, algorithm_type="sde-dpmsolver++")
18
+
19
+ for i in range(2):
20
+ width = 32 * (i + 1)
21
+ height = 32 * (i + 1)
22
+ image = pipe(prompt=prompt, width=width, height=height, num_inference_steps=steps).images[0]
run_sdxl_cpu.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline, DPMSolverSDEScheduler
3
+ import hf_image_uploader as hiu
4
+
5
+ path = "hf-internal-testing/tiny-stable-diffusion-xl-pipe"
6
+
7
+ pipe = StableDiffusionXLPipeline.from_pretrained(path)
8
+ pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, algorithm_type="sde-dpmsolver++")
9
+
10
+ prompt = "An astronaut riding a green horse on Mars"
11
+ steps = 20
12
+
13
+ for i in range(2):
14
+ width = 32 * (i + 1)
15
+ height = 32 * (i + 1)
16
+ image = pipe(prompt=prompt, width=width, height=height, num_inference_steps=steps).images[0]
17
+ hiu.upload(image, "patrickvonplaten/images")
18
+
19
+ pipe.scheduler = DPMSolverSDEScheduler.from_config(pipe.scheduler.config, algorithm_type="sde-dpmsolver++")
20
+
21
+ for i in range(2):
22
+ width = 32 * (i + 1)
23
+ height = 32 * (i + 1)
24
+ image = pipe(prompt=prompt, width=width, height=height, num_inference_steps=steps).images[0]
25
+ hiu.upload(image, "patrickvonplaten/images")
run_xl.py CHANGED
@@ -14,8 +14,10 @@ pipe2 = DiffusionPipeline.from_pretrained(
14
  variant="fp16",
15
  torch_dtype=torch.float16
16
  )
17
- pipe.enable_model_cpu_offload()
18
- pipe2.enable_model_cpu_offload()
 
 
19
 
20
  compel = Compel(
21
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
 
14
  variant="fp16",
15
  torch_dtype=torch.float16
16
  )
17
+ pipe.to("cuda")
18
+ pipe2.to("cuda")
19
+ # pipe.enable_model_cpu_offload()
20
+ # pipe2.enable_model_cpu_offload()
21
 
22
  compel = Compel(
23
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2] ,
save_lora.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import torch
3
+ from warnings import warn
4
+ from diffusers import (
5
+ AutoencoderKL,
6
+ DiffusionPipeline,
7
+ )
8
+ import hashlib
9
+
10
+ base = "stabilityai/stable-diffusion-xl-base-1.0"
11
+ adapter1 = 'nerijs/pixel-art-xl'
12
+ weightname1 = 'pixel-art-xl.safetensors'
13
+
14
+ adapter2 = 'Alexzyx/lora-trained-xl-colab'
15
+ weightname2 = None
16
+
17
+ inputs = "elephant"
18
+ kwargs = {}
19
+
20
+ if torch.cuda.is_available():
21
+ kwargs["torch_dtype"] = torch.float16
22
+
23
+ #vae = AutoencoderKL.from_pretrained(
24
+ # "madebyollin/sdxl-vae-fp16-fix",
25
+ # torch_dtype=torch.float16, # load fp16 fix VAE
26
+ #)
27
+ #kwargs["vae"] = vae
28
+ #kwargs["variant"] = "fp16"
29
+ #
30
+
31
+ model = DiffusionPipeline.from_pretrained(
32
+ base, **kwargs
33
+ )
34
+
35
+ if torch.cuda.is_available():
36
+ model.to("cuda")
37
+
38
+
39
+ def inference(adapter, weightname):
40
+ model.load_lora_weights(adapter, weight_name=weightname)
41
+ try:
42
+ model.fuse_lora(safe_fusing=True)
43
+ except ValueError:
44
+ warn(f"{adapter} and {weightname} is broken. LoRA is not fused.")
45
+ model.unload_lora_weights()
46
+
47
+ data = model(inputs, num_inference_steps=1).images[0]
48
+ model.unfuse_lora()
49
+ model.unload_lora_weights()
50
+ filename = '/tmp/hello.jpg'
51
+ data.save(filename, format='jpeg')
52
+ with open(filename, 'rb') as f:
53
+ md5 = hashlib.md5(f.read()).hexdigest()
54
+ print("Adapter %s, md5sum %s" % (adapter, md5))
55
+ if md5 == '40c78c9fd4daeff01c988c3532fdd51b':
56
+ print("BLACK SCREEN IMAGE for adapter %s" % adapter)
57
+
58
+
59
+ inference(adapter1, weightname1)
60
+ inference(adapter2, weightname2)
61
+ inference(adapter1, weightname1)
62
+ inference(adapter1, weightname1)