Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,57 +1,78 @@
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
import torch
|
4 |
-
from diffusers import AutoencoderKL, TCDScheduler
|
5 |
from diffusers.models.model_loading_utils import load_state_dict
|
6 |
from gradio_imageslider import ImageSlider
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
-
from controlnet_union import ControlNetModel_Union
|
10 |
-
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
|
|
|
11 |
|
12 |
from PIL import Image, ImageDraw
|
13 |
import numpy as np
|
14 |
|
15 |
-
config_file = hf_hub_download(
|
16 |
-
|
17 |
-
|
18 |
-
)
|
19 |
-
|
20 |
-
config = ControlNetModel_Union.load_config(config_file)
|
21 |
-
controlnet_model = ControlNetModel_Union.from_config(config)
|
22 |
-
|
23 |
-
# Load the state dictionary
|
24 |
-
model_file = hf_hub_download(
|
25 |
-
|
26 |
-
|
27 |
-
)
|
28 |
-
state_dict = load_state_dict(model_file)
|
29 |
-
|
30 |
-
# Extract the keys from the state_dict
|
31 |
-
loaded_keys = list(state_dict.keys())
|
32 |
-
|
33 |
-
# Call the method and store all returns in a variable
|
34 |
-
result = ControlNetModel_Union._load_pretrained_model(
|
35 |
-
|
36 |
-
)
|
37 |
-
|
38 |
-
# Use the first element from the result
|
39 |
-
model = result[0]
|
40 |
-
model = model.to(device="cuda", dtype=torch.float16)
|
41 |
-
|
42 |
-
|
43 |
-
vae = AutoencoderKL.from_pretrained(
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
).to("cuda")
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
53 |
).to("cuda")
|
54 |
|
|
|
|
|
|
|
55 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
56 |
|
57 |
|
@@ -192,28 +213,38 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
|
|
192 |
final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
|
193 |
|
194 |
# Use with torch.autocast to ensure consistent dtype
|
195 |
-
with torch.autocast(device_type="cuda", dtype=torch.float16):
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
image = image.convert("RGBA")
|
214 |
-
cnet_image.paste(image, (0, 0), mask)
|
215 |
-
|
216 |
-
yield background, cnet_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
|
218 |
def clear_result():
|
219 |
"""Clears the result ImageSlider."""
|
|
|
1 |
import gradio as gr
|
2 |
import spaces
|
3 |
import torch
|
4 |
+
# from diffusers import AutoencoderKL, TCDScheduler
|
5 |
from diffusers.models.model_loading_utils import load_state_dict
|
6 |
from gradio_imageslider import ImageSlider
|
7 |
from huggingface_hub import hf_hub_download
|
8 |
|
9 |
+
# from controlnet_union import ControlNetModel_Union
|
10 |
+
# from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
|
11 |
+
from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
|
12 |
|
13 |
from PIL import Image, ImageDraw
|
14 |
import numpy as np
|
15 |
|
16 |
+
# config_file = hf_hub_download(
|
17 |
+
# "xinsir/controlnet-union-sdxl-1.0",
|
18 |
+
# filename="config_promax.json",
|
19 |
+
# )
|
20 |
+
|
21 |
+
# config = ControlNetModel_Union.load_config(config_file)
|
22 |
+
# controlnet_model = ControlNetModel_Union.from_config(config)
|
23 |
+
|
24 |
+
# # Load the state dictionary
|
25 |
+
# model_file = hf_hub_download(
|
26 |
+
# "xinsir/controlnet-union-sdxl-1.0",
|
27 |
+
# filename="diffusion_pytorch_model_promax.safetensors",
|
28 |
+
# )
|
29 |
+
# state_dict = load_state_dict(model_file)
|
30 |
+
|
31 |
+
# # Extract the keys from the state_dict
|
32 |
+
# loaded_keys = list(state_dict.keys())
|
33 |
+
|
34 |
+
# # Call the method and store all returns in a variable
|
35 |
+
# result = ControlNetModel_Union._load_pretrained_model(
|
36 |
+
# controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
|
37 |
+
# )
|
38 |
+
|
39 |
+
# # Use the first element from the result
|
40 |
+
# model = result[0]
|
41 |
+
# model = model.to(device="cuda", dtype=torch.float16)
|
42 |
+
|
43 |
+
|
44 |
+
# vae = AutoencoderKL.from_pretrained(
|
45 |
+
# "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
46 |
+
# ).to("cuda")
|
47 |
+
|
48 |
+
# pipe = StableDiffusionXLFillPipeline.from_pretrained(
|
49 |
+
# "SG161222/RealVisXL_V5.0_Lightning",
|
50 |
+
# torch_dtype=torch.float16,
|
51 |
+
# vae=vae,
|
52 |
+
# controlnet=model,
|
53 |
+
# variant="fp16",
|
54 |
+
# ).to("cuda")
|
55 |
+
|
56 |
+
# 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
|
57 |
+
t2i = StableDiffusion3Pipeline.from_pretrained(
|
58 |
+
"stabilityai/stable-diffusion-3.5-large",
|
59 |
+
torch_dtype=torch.bfloat16, # recommended for SD3.5 [oai_citation:0‡github.com](https://github.com/huggingface/diffusers/releases?utm_source=chatgpt.com)
|
60 |
).to("cuda")
|
61 |
|
62 |
+
# 2) Wrap it into the standard inpainting pipeline
|
63 |
+
pipe = StableDiffusionInpaintPipeline(
|
64 |
+
vae=t2i.vae,
|
65 |
+
text_encoder=t2i.text_encoder,
|
66 |
+
tokenizer=t2i.tokenizer,
|
67 |
+
unet=t2i.unet,
|
68 |
+
scheduler=t2i.scheduler,
|
69 |
+
safety_checker=t2i.safety_checker,
|
70 |
+
feature_extractor=t2i.feature_extractor,
|
71 |
).to("cuda")
|
72 |
|
73 |
+
|
74 |
+
|
75 |
+
# pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
76 |
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
|
77 |
|
78 |
|
|
|
213 |
final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
|
214 |
|
215 |
# Use with torch.autocast to ensure consistent dtype
|
216 |
+
# with torch.autocast(device_type="cuda", dtype=torch.float16):
|
217 |
+
# (
|
218 |
+
# prompt_embeds,
|
219 |
+
# negative_prompt_embeds,
|
220 |
+
# pooled_prompt_embeds,
|
221 |
+
# negative_pooled_prompt_embeds,
|
222 |
+
# ) = pipe.encode_prompt(final_prompt, "cuda", True)
|
223 |
+
|
224 |
+
# for image in pipe(
|
225 |
+
# prompt_embeds=prompt_embeds,
|
226 |
+
# negative_prompt_embeds=negative_prompt_embeds,
|
227 |
+
# pooled_prompt_embeds=pooled_prompt_embeds,
|
228 |
+
# negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
229 |
+
# image=cnet_image,
|
230 |
+
# num_inference_steps=num_inference_steps
|
231 |
+
# ):
|
232 |
+
# yield cnet_image, image
|
233 |
+
|
234 |
+
# image = image.convert("RGBA")
|
235 |
+
# cnet_image.paste(image, (0, 0), mask)
|
236 |
+
|
237 |
+
# yield background, cnet_image
|
238 |
+
# Inpaint missing regions using SD3.5 Large:
|
239 |
+
|
240 |
+
result_img = pipe(
|
241 |
+
prompt=final_prompt,
|
242 |
+
image=background,
|
243 |
+
mask_image=mask.convert("RGB"),
|
244 |
+
num_inference_steps=num_inference_steps,
|
245 |
+
guidance_scale=7.5,
|
246 |
+
).images[0]
|
247 |
+
yield background, result_img
|
248 |
|
249 |
def clear_result():
|
250 |
"""Clears the result ImageSlider."""
|