Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,82 +1,57 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
import torch
|
4 |
-
|
5 |
from diffusers.models.model_loading_utils import load_state_dict
|
6 |
from gradio_imageslider import ImageSlider
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
|
12 |
|
13 |
from PIL import Image, ImageDraw
|
14 |
import numpy as np
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
|
21 |
-
|
22 |
-
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
|
31 |
-
#
|
32 |
-
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
#
|
40 |
-
|
41 |
-
|
42 |
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
# ).to("cuda")
|
47 |
-
|
48 |
-
# pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
49 |
-
# "SG161222/RealVisXL_V5.0_Lightning",
|
50 |
-
# torch_dtype=torch.float16,
|
51 |
-
# vae=vae,
|
52 |
-
# controlnet=model,
|
53 |
-
# variant="fp16",
|
54 |
-
# ).to("cuda")
|
55 |
-
|
56 |
-
# 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
|
57 |
-
import os
|
58 |
-
|
59 |
-
HF_TOKEN = os.environ["HF_TOKEN"] # or os.getenv("HF_TOKEN")
|
60 |
-
|
61 |
-
t2i = StableDiffusion3Pipeline.from_pretrained(
|
62 |
-
"stabilityai/stable-diffusion-3.5-large",
|
63 |
-
torch_dtype=torch.bfloat16,
|
64 |
-
use_auth_token=HF_TOKEN # ← here
|
65 |
).to("cuda")
|
66 |
|
67 |
-
pipe =
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
safety_checker=t2i.safety_checker,
|
74 |
-
feature_extractor=t2i.feature_extractor,
|
75 |
-
use_auth_token=HF_TOKEN # ← and here
|
76 |
).to("cuda")
|
77 |
|
78 |
-
|
79 |
-
# pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
80 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
81 |
|
82 |
|
@@ -217,38 +192,28 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
|
|
217 |
final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
|
218 |
|
219 |
# Use with torch.autocast to ensure consistent dtype
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
# Inpaint missing regions using SD3.5 Large:
|
243 |
-
|
244 |
-
result_img = pipe(
|
245 |
-
prompt=final_prompt,
|
246 |
-
image=background,
|
247 |
-
mask_image=mask.convert("RGB"),
|
248 |
-
num_inference_steps=num_inference_steps,
|
249 |
-
guidance_scale=7.5,
|
250 |
-
).images[0]
|
251 |
-
yield background, result_img
|
252 |
|
253 |
def clear_result():
|
254 |
"""Clears the result ImageSlider."""
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
import torch
|
4 |
+
from diffusers import AutoencoderKL, TCDScheduler
|
5 |
from diffusers.models.model_loading_utils import load_state_dict
|
6 |
from gradio_imageslider import ImageSlider
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
+
from controlnet_union import ControlNetModel_Union
|
10 |
+
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
|
|
|
11 |
|
12 |
from PIL import Image, ImageDraw
|
13 |
import numpy as np
|
14 |
|
15 |
+
config_file = hf_hub_download(
|
16 |
+
"xinsir/controlnet-union-sdxl-1.0",
|
17 |
+
filename="config_promax.json",
|
18 |
+
)
|
19 |
|
20 |
+
config = ControlNetModel_Union.load_config(config_file)
|
21 |
+
controlnet_model = ControlNetModel_Union.from_config(config)
|
22 |
|
23 |
+
# Load the state dictionary
|
24 |
+
model_file = hf_hub_download(
|
25 |
+
"xinsir/controlnet-union-sdxl-1.0",
|
26 |
+
filename="diffusion_pytorch_model_promax.safetensors",
|
27 |
+
)
|
28 |
+
state_dict = load_state_dict(model_file)
|
29 |
|
30 |
+
# Extract the keys from the state_dict
|
31 |
+
loaded_keys = list(state_dict.keys())
|
32 |
|
33 |
+
# Call the method and store all returns in a variable
|
34 |
+
result = ControlNetModel_Union._load_pretrained_model(
|
35 |
+
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
|
36 |
+
)
|
37 |
|
38 |
+
# Use the first element from the result
|
39 |
+
model = result[0]
|
40 |
+
model = model.to(device="cuda", dtype=torch.float16)
|
41 |
|
42 |
|
43 |
+
vae = AutoencoderKL.from_pretrained(
|
44 |
+
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
).to("cuda")
|
46 |
|
47 |
+
pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
48 |
+
"SG161222/RealVisXL_V5.0_Lightning",
|
49 |
+
torch_dtype=torch.float16,
|
50 |
+
vae=vae,
|
51 |
+
controlnet=model,
|
52 |
+
variant="fp16",
|
|
|
|
|
|
|
53 |
).to("cuda")
|
54 |
|
|
|
|
|
55 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
56 |
|
57 |
|
|
|
192 |
final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
|
193 |
|
194 |
# Use with torch.autocast to ensure consistent dtype
|
195 |
+
with torch.autocast(device_type="cuda", dtype=torch.float16):
|
196 |
+
(
|
197 |
+
prompt_embeds,
|
198 |
+
negative_prompt_embeds,
|
199 |
+
pooled_prompt_embeds,
|
200 |
+
negative_pooled_prompt_embeds,
|
201 |
+
) = pipe.encode_prompt(final_prompt, "cuda", True)
|
202 |
+
|
203 |
+
for image in pipe(
|
204 |
+
prompt_embeds=prompt_embeds,
|
205 |
+
negative_prompt_embeds=negative_prompt_embeds,
|
206 |
+
pooled_prompt_embeds=pooled_prompt_embeds,
|
207 |
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
208 |
+
image=cnet_image,
|
209 |
+
num_inference_steps=num_inference_steps
|
210 |
+
):
|
211 |
+
yield cnet_image, image
|
212 |
+
|
213 |
+
image = image.convert("RGBA")
|
214 |
+
cnet_image.paste(image, (0, 0), mask)
|
215 |
+
|
216 |
+
yield background, cnet_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
def clear_result():
|
219 |
"""Clears the result ImageSlider."""
|