user-agent commited on
Commit
7800478
·
verified ·
1 Parent(s): 418f55b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -61
app.py CHANGED
@@ -1,57 +1,78 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
4
- from diffusers import AutoencoderKL, TCDScheduler
5
  from diffusers.models.model_loading_utils import load_state_dict
6
  from gradio_imageslider import ImageSlider
7
  from huggingface_hub import hf_hub_download
8
 
9
- from controlnet_union import ControlNetModel_Union
10
- from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
 
11
 
12
  from PIL import Image, ImageDraw
13
  import numpy as np
14
 
15
- config_file = hf_hub_download(
16
- "xinsir/controlnet-union-sdxl-1.0",
17
- filename="config_promax.json",
18
- )
19
-
20
- config = ControlNetModel_Union.load_config(config_file)
21
- controlnet_model = ControlNetModel_Union.from_config(config)
22
-
23
- # Load the state dictionary
24
- model_file = hf_hub_download(
25
- "xinsir/controlnet-union-sdxl-1.0",
26
- filename="diffusion_pytorch_model_promax.safetensors",
27
- )
28
- state_dict = load_state_dict(model_file)
29
-
30
- # Extract the keys from the state_dict
31
- loaded_keys = list(state_dict.keys())
32
-
33
- # Call the method and store all returns in a variable
34
- result = ControlNetModel_Union._load_pretrained_model(
35
- controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
36
- )
37
-
38
- # Use the first element from the result
39
- model = result[0]
40
- model = model.to(device="cuda", dtype=torch.float16)
41
-
42
-
43
- vae = AutoencoderKL.from_pretrained(
44
- "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  ).to("cuda")
46
 
47
- pipe = StableDiffusionXLFillPipeline.from_pretrained(
48
- "SG161222/RealVisXL_V5.0_Lightning",
49
- torch_dtype=torch.float16,
50
- vae=vae,
51
- controlnet=model,
52
- variant="fp16",
 
 
 
53
  ).to("cuda")
54
 
 
 
 
55
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
56
 
57
 
@@ -192,28 +213,38 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
192
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
193
 
194
  # Use with torch.autocast to ensure consistent dtype
195
- with torch.autocast(device_type="cuda", dtype=torch.float16):
196
- (
197
- prompt_embeds,
198
- negative_prompt_embeds,
199
- pooled_prompt_embeds,
200
- negative_pooled_prompt_embeds,
201
- ) = pipe.encode_prompt(final_prompt, "cuda", True)
202
-
203
- for image in pipe(
204
- prompt_embeds=prompt_embeds,
205
- negative_prompt_embeds=negative_prompt_embeds,
206
- pooled_prompt_embeds=pooled_prompt_embeds,
207
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
208
- image=cnet_image,
209
- num_inference_steps=num_inference_steps
210
- ):
211
- yield cnet_image, image
212
-
213
- image = image.convert("RGBA")
214
- cnet_image.paste(image, (0, 0), mask)
215
-
216
- yield background, cnet_image
 
 
 
 
 
 
 
 
 
 
217
 
218
  def clear_result():
219
  """Clears the result ImageSlider."""
 
1
  import gradio as gr
2
  import spaces
3
  import torch
4
+ # from diffusers import AutoencoderKL, TCDScheduler
5
  from diffusers.models.model_loading_utils import load_state_dict
6
  from gradio_imageslider import ImageSlider
7
  from huggingface_hub import hf_hub_download
8
 
9
+ # from controlnet_union import ControlNetModel_Union
10
+ # from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
11
+ from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
12
 
13
  from PIL import Image, ImageDraw
14
  import numpy as np
15
 
16
+ # config_file = hf_hub_download(
17
+ # "xinsir/controlnet-union-sdxl-1.0",
18
+ # filename="config_promax.json",
19
+ # )
20
+
21
+ # config = ControlNetModel_Union.load_config(config_file)
22
+ # controlnet_model = ControlNetModel_Union.from_config(config)
23
+
24
+ # # Load the state dictionary
25
+ # model_file = hf_hub_download(
26
+ # "xinsir/controlnet-union-sdxl-1.0",
27
+ # filename="diffusion_pytorch_model_promax.safetensors",
28
+ # )
29
+ # state_dict = load_state_dict(model_file)
30
+
31
+ # # Extract the keys from the state_dict
32
+ # loaded_keys = list(state_dict.keys())
33
+
34
+ # # Call the method and store all returns in a variable
35
+ # result = ControlNetModel_Union._load_pretrained_model(
36
+ # controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
37
+ # )
38
+
39
+ # # Use the first element from the result
40
+ # model = result[0]
41
+ # model = model.to(device="cuda", dtype=torch.float16)
42
+
43
+
44
+ # vae = AutoencoderKL.from_pretrained(
45
+ # "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
46
+ # ).to("cuda")
47
+
48
+ # pipe = StableDiffusionXLFillPipeline.from_pretrained(
49
+ # "SG161222/RealVisXL_V5.0_Lightning",
50
+ # torch_dtype=torch.float16,
51
+ # vae=vae,
52
+ # controlnet=model,
53
+ # variant="fp16",
54
+ # ).to("cuda")
55
+
56
+ # 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
57
+ t2i = StableDiffusion3Pipeline.from_pretrained(
58
+ "stabilityai/stable-diffusion-3.5-large",
59
+ torch_dtype=torch.bfloat16, # recommended for SD3.5 [oai_citation:0‡github.com](https://github.com/huggingface/diffusers/releases?utm_source=chatgpt.com)
60
  ).to("cuda")
61
 
62
+ # 2) Wrap it into the standard inpainting pipeline
63
+ pipe = StableDiffusionInpaintPipeline(
64
+ vae=t2i.vae,
65
+ text_encoder=t2i.text_encoder,
66
+ tokenizer=t2i.tokenizer,
67
+ unet=t2i.unet,
68
+ scheduler=t2i.scheduler,
69
+ safety_checker=t2i.safety_checker,
70
+ feature_extractor=t2i.feature_extractor,
71
  ).to("cuda")
72
 
73
+
74
+
75
+ # pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
76
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
77
 
78
 
 
213
  final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
214
 
215
  # Use with torch.autocast to ensure consistent dtype
216
+ # with torch.autocast(device_type="cuda", dtype=torch.float16):
217
+ # (
218
+ # prompt_embeds,
219
+ # negative_prompt_embeds,
220
+ # pooled_prompt_embeds,
221
+ # negative_pooled_prompt_embeds,
222
+ # ) = pipe.encode_prompt(final_prompt, "cuda", True)
223
+
224
+ # for image in pipe(
225
+ # prompt_embeds=prompt_embeds,
226
+ # negative_prompt_embeds=negative_prompt_embeds,
227
+ # pooled_prompt_embeds=pooled_prompt_embeds,
228
+ # negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
229
+ # image=cnet_image,
230
+ # num_inference_steps=num_inference_steps
231
+ # ):
232
+ # yield cnet_image, image
233
+
234
+ # image = image.convert("RGBA")
235
+ # cnet_image.paste(image, (0, 0), mask)
236
+
237
+ # yield background, cnet_image
238
+ # Inpaint missing regions using SD3.5 Large:
239
+
240
+ result_img = pipe(
241
+ prompt=final_prompt,
242
+ image=background,
243
+ mask_image=mask.convert("RGB"),
244
+ num_inference_steps=num_inference_steps,
245
+ guidance_scale=7.5,
246
+ ).images[0]
247
+ yield background, result_img
248
 
249
  def clear_result():
250
  """Clears the result ImageSlider."""