multimodalart HF staff commited on
Commit
317aaa9
·
1 Parent(s): fe2765b

Pre-load all models in RAM

Browse files

Pre-load all models in RAM and swap between RAM and VRAM for faster inference

Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from diffusers import StableDiffusionPipeline
2
  from diffusers import StableDiffusionImg2ImgPipeline
 
3
  import gradio as gr
4
  import torch
5
 
@@ -34,9 +35,14 @@ prompt_prefixes = {
34
  }
35
 
36
  current_model = models[0]
37
- pipe = StableDiffusionPipeline.from_pretrained(current_model, torch_dtype=torch.float16)
38
- if torch.cuda.is_available():
39
- pipe = pipe.to("cuda")
 
 
 
 
 
40
 
41
  device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
42
 
@@ -54,10 +60,14 @@ def text_inference(model, prompt, guidance, steps, generator=None):
54
  global current_model
55
  global pipe
56
  if model != current_model:
57
- current_model = model
58
- pipe = StableDiffusionPipeline.from_pretrained(current_model, torch_dtype=torch.float16)
59
 
60
- if torch.cuda.is_available():
 
 
 
 
61
  pipe = pipe.to("cuda")
62
 
63
  prompt = prompt_prefixes[current_model] + prompt
@@ -68,6 +78,7 @@ def text_inference(model, prompt, guidance, steps, generator=None):
68
  width=512,
69
  height=512,
70
  generator=generator).images[0]
 
71
  return image
72
 
73
  def img_inference(model, prompt, img, strength, guidance, steps, generator):
@@ -76,9 +87,13 @@ def img_inference(model, prompt, img, strength, guidance, steps, generator):
76
  global pipe
77
  if model != current_model:
78
  current_model = model
79
- pipe = StableDiffusionImg2ImgPipeline.from_pretrained(current_model, torch_dtype=torch.float16)
80
 
81
- if torch.cuda.is_available():
 
 
 
 
82
  pipe = pipe.to("cuda")
83
 
84
  prompt = prompt_prefixes[current_model] + prompt
@@ -93,6 +108,7 @@ def img_inference(model, prompt, img, strength, guidance, steps, generator):
93
  width=512,
94
  height=512,
95
  generator=generator).images[0]
 
96
  return image
97
 
98
 
 
1
  from diffusers import StableDiffusionPipeline
2
  from diffusers import StableDiffusionImg2ImgPipeline
3
+ from diffusers import AutoencoderKL, UNet2DConditionModel
4
  import gradio as gr
5
  import torch
6
 
 
35
  }
36
 
37
  current_model = models[0]
38
+ pipes = []
39
+ vae = AutoencoderKL.from_pretrained(current_model, subfolder="vae", torch_dtype=torch.float16)
40
+ for model in models:
41
+ unet = UNet2DConditionModel.from_pretrained(model, subfolder="unet", torch_dtype=torch.float16)
42
+ pipe = StableDiffusionPipeline.from_pretrained(model, unet=unet, vae=vae, torch_dtype=torch.float16)
43
+ pipe_i2i = StableDiffusionImg2ImgPipeline.from_pretrained(model, unet=unet, vae=vae, torch_dtype=torch.float16)
44
+ pipes.append({"name":model, "pipeline":pipe, "pipeline_i2i":pipe_i2i})
45
+
46
 
47
  device = "GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"
48
 
 
60
  global current_model
61
  global pipe
62
  if model != current_model:
63
+ current_model = model
64
+ pipe = pipe.to("cpu")
65
 
66
+ for pipe_dict in pipes:
67
+ if(pipe_dict["name"] == current_model):
68
+ pipe = pipe_dict["pipeline"]
69
+
70
+ if torch.cuda.is_available():
71
  pipe = pipe.to("cuda")
72
 
73
  prompt = prompt_prefixes[current_model] + prompt
 
78
  width=512,
79
  height=512,
80
  generator=generator).images[0]
81
+
82
  return image
83
 
84
  def img_inference(model, prompt, img, strength, guidance, steps, generator):
 
87
  global pipe
88
  if model != current_model:
89
  current_model = model
90
+ pipe = pipe.to("cpu")
91
 
92
+ for pipe_dict in pipes:
93
+ if(pipe_dict["name"] == current_model):
94
+ pipe = pipe_dict["pipeline_i2i"]
95
+
96
+ if torch.cuda.is_available():
97
  pipe = pipe.to("cuda")
98
 
99
  prompt = prompt_prefixes[current_model] + prompt
 
108
  width=512,
109
  height=512,
110
  generator=generator).images[0]
111
+
112
  return image
113
 
114