user-agent commited on
Commit
ccf2114
·
verified ·
1 Parent(s): 50bbed8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -89
app.py CHANGED
@@ -1,82 +1,57 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
4
- # from diffusers import AutoencoderKL, TCDScheduler
5
  from diffusers.models.model_loading_utils import load_state_dict
6
  from gradio_imageslider import ImageSlider
7
  from huggingface_hub import hf_hub_download
8
 
9
- # from controlnet_union import ControlNetModel_Union
10
- # from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
11
- from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
12
 
13
  from PIL import Image, ImageDraw
14
  import numpy as np
15
 
16
- # config_file = hf_hub_download(
17
- # "xinsir/controlnet-union-sdxl-1.0",
18
- # filename="config_promax.json",
19
- # )
20
 
21
- # config = ControlNetModel_Union.load_config(config_file)
22
- # controlnet_model = ControlNetModel_Union.from_config(config)
23
 
24
- # # Load the state dictionary
25
- # model_file = hf_hub_download(
26
- # "xinsir/controlnet-union-sdxl-1.0",
27
- # filename="diffusion_pytorch_model_promax.safetensors",
28
- # )
29
- # state_dict = load_state_dict(model_file)
30
 
31
- # # Extract the keys from the state_dict
32
- # loaded_keys = list(state_dict.keys())
33
 
34
- # # Call the method and store all returns in a variable
35
- # result = ControlNetModel_Union._load_pretrained_model(
36
- # controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
37
- # )
38
 
39
- # # Use the first element from the result
40
- # model = result[0]
41
- # model = model.to(device="cuda", dtype=torch.float16)
42
 
43
 
44
- # vae = AutoencoderKL.from_pretrained(
45
- # "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
46
- # ).to("cuda")
47
-
48
- # pipe = StableDiffusionXLFillPipeline.from_pretrained(
49
- # "SG161222/RealVisXL_V5.0_Lightning",
50
- # torch_dtype=torch.float16,
51
- # vae=vae,
52
- # controlnet=model,
53
- # variant="fp16",
54
- # ).to("cuda")
55
-
56
- # 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
57
- import os
58
-
59
- HF_TOKEN = os.environ["HF_TOKEN"] # or os.getenv("HF_TOKEN")
60
-
61
- t2i = StableDiffusion3Pipeline.from_pretrained(
62
- "stabilityai/stable-diffusion-3.5-large",
63
- torch_dtype=torch.bfloat16,
64
- use_auth_token=HF_TOKEN # ← here
65
  ).to("cuda")
66
 
67
- pipe = StableDiffusionInpaintPipeline(
68
- vae=t2i.vae,
69
- text_encoder=t2i.text_encoder,
70
- tokenizer=t2i.tokenizer,
71
- unet=t2i.unet,
72
- scheduler=t2i.scheduler,
73
- safety_checker=t2i.safety_checker,
74
- feature_extractor=t2i.feature_extractor,
75
- use_auth_token=HF_TOKEN # ← and here
76
  ).to("cuda")
77
 
78
-
79
- # pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
80
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
81
 
82
 
@@ -217,38 +192,28 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
217
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
218
 
219
  # Use with torch.autocast to ensure consistent dtype
220
- # with torch.autocast(device_type="cuda", dtype=torch.float16):
221
- # (
222
- # prompt_embeds,
223
- # negative_prompt_embeds,
224
- # pooled_prompt_embeds,
225
- # negative_pooled_prompt_embeds,
226
- # ) = pipe.encode_prompt(final_prompt, "cuda", True)
227
-
228
- # for image in pipe(
229
- # prompt_embeds=prompt_embeds,
230
- # negative_prompt_embeds=negative_prompt_embeds,
231
- # pooled_prompt_embeds=pooled_prompt_embeds,
232
- # negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
233
- # image=cnet_image,
234
- # num_inference_steps=num_inference_steps
235
- # ):
236
- # yield cnet_image, image
237
-
238
- # image = image.convert("RGBA")
239
- # cnet_image.paste(image, (0, 0), mask)
240
-
241
- # yield background, cnet_image
242
- # Inpaint missing regions using SD3.5 Large:
243
-
244
- result_img = pipe(
245
- prompt=final_prompt,
246
- image=background,
247
- mask_image=mask.convert("RGB"),
248
- num_inference_steps=num_inference_steps,
249
- guidance_scale=7.5,
250
- ).images[0]
251
- yield background, result_img
252
 
253
  def clear_result():
254
  """Clears the result ImageSlider."""
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ from diffusers import AutoencoderKL, TCDScheduler
5
  from diffusers.models.model_loading_utils import load_state_dict
6
  from gradio_imageslider import ImageSlider
7
  from huggingface_hub import hf_hub_download
8
 
9
+ from controlnet_union import ControlNetModel_Union
10
+ from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
 
11
 
12
  from PIL import Image, ImageDraw
13
  import numpy as np
14
 
15
+ config_file = hf_hub_download(
16
+ "xinsir/controlnet-union-sdxl-1.0",
17
+ filename="config_promax.json",
18
+ )
19
 
20
+ config = ControlNetModel_Union.load_config(config_file)
21
+ controlnet_model = ControlNetModel_Union.from_config(config)
22
 
23
+ # Load the state dictionary
24
+ model_file = hf_hub_download(
25
+ "xinsir/controlnet-union-sdxl-1.0",
26
+ filename="diffusion_pytorch_model_promax.safetensors",
27
+ )
28
+ state_dict = load_state_dict(model_file)
29
 
30
+ # Extract the keys from the state_dict
31
+ loaded_keys = list(state_dict.keys())
32
 
33
+ # Call the method and store all returns in a variable
34
+ result = ControlNetModel_Union._load_pretrained_model(
35
+ controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
36
+ )
37
 
38
+ # Use the first element from the result
39
+ model = result[0]
40
+ model = model.to(device="cuda", dtype=torch.float16)
41
 
42
 
43
+ vae = AutoencoderKL.from_pretrained(
44
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  ).to("cuda")
46
 
47
+ pipe = StableDiffusionXLFillPipeline.from_pretrained(
48
+ "SG161222/RealVisXL_V5.0_Lightning",
49
+ torch_dtype=torch.float16,
50
+ vae=vae,
51
+ controlnet=model,
52
+ variant="fp16",
 
 
 
53
  ).to("cuda")
54
 
 
 
55
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
56
 
57
 
 
192
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
193
 
194
  # Use with torch.autocast to ensure consistent dtype
195
+ with torch.autocast(device_type="cuda", dtype=torch.float16):
196
+ (
197
+ prompt_embeds,
198
+ negative_prompt_embeds,
199
+ pooled_prompt_embeds,
200
+ negative_pooled_prompt_embeds,
201
+ ) = pipe.encode_prompt(final_prompt, "cuda", True)
202
+
203
+ for image in pipe(
204
+ prompt_embeds=prompt_embeds,
205
+ negative_prompt_embeds=negative_prompt_embeds,
206
+ pooled_prompt_embeds=pooled_prompt_embeds,
207
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
208
+ image=cnet_image,
209
+ num_inference_steps=num_inference_steps
210
+ ):
211
+ yield cnet_image, image
212
+
213
+ image = image.convert("RGBA")
214
+ cnet_image.paste(image, (0, 0), mask)
215
+
216
+ yield background, cnet_image
 
 
 
 
 
 
 
 
 
 
217
 
218
  def clear_result():
219
  """Clears the result ImageSlider."""