Image-Outpainting

Running on Zero

App Files Files Community

user-agent commited on 13 days ago

Commit

7800478

verified ·

1 Parent(s): 418f55b

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -61

app.py CHANGED Viewed

@@ -1,57 +1,78 @@
 import gradio as gr
 import spaces
 import torch
-from diffusers import AutoencoderKL, TCDScheduler
 from diffusers.models.model_loading_utils import load_state_dict
 from gradio_imageslider import ImageSlider
 from huggingface_hub import hf_hub_download
-from controlnet_union import ControlNetModel_Union
-from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
 from PIL import Image, ImageDraw
 import numpy as np
-config_file = hf_hub_download(
-    "xinsir/controlnet-union-sdxl-1.0",
-    filename="config_promax.json",
-)
-config = ControlNetModel_Union.load_config(config_file)
-controlnet_model = ControlNetModel_Union.from_config(config)
-# Load the state dictionary
-model_file = hf_hub_download(
-    "xinsir/controlnet-union-sdxl-1.0",
-    filename="diffusion_pytorch_model_promax.safetensors",
-)
-state_dict = load_state_dict(model_file)
-# Extract the keys from the state_dict
-loaded_keys = list(state_dict.keys())
-# Call the method and store all returns in a variable
-result = ControlNetModel_Union._load_pretrained_model(
-    controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
-)
-# Use the first element from the result
-model = result[0]
-model = model.to(device="cuda", dtype=torch.float16)
-vae = AutoencoderKL.from_pretrained(
-    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
 ).to("cuda")
-pipe = StableDiffusionXLFillPipeline.from_pretrained(
-    "SG161222/RealVisXL_V5.0_Lightning",
-    torch_dtype=torch.float16,
-    vae=vae,
-    controlnet=model,
-    variant="fp16",
 ).to("cuda")
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
@@ -192,28 +213,38 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
     final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
     # Use with torch.autocast to ensure consistent dtype
-    with torch.autocast(device_type="cuda", dtype=torch.float16):
-        (
-            prompt_embeds,
-            negative_prompt_embeds,
-            pooled_prompt_embeds,
-            negative_pooled_prompt_embeds,
-        ) = pipe.encode_prompt(final_prompt, "cuda", True)
-        for image in pipe(
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-            pooled_prompt_embeds=pooled_prompt_embeds,
-            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-            image=cnet_image,
-            num_inference_steps=num_inference_steps
-        ):
-            yield cnet_image, image
-    image = image.convert("RGBA")
-    cnet_image.paste(image, (0, 0), mask)
-    yield background, cnet_image
 def clear_result():
     """Clears the result ImageSlider."""

 import gradio as gr
 import spaces
 import torch
+# from diffusers import AutoencoderKL, TCDScheduler
 from diffusers.models.model_loading_utils import load_state_dict
 from gradio_imageslider import ImageSlider
 from huggingface_hub import hf_hub_download
+# from controlnet_union import ControlNetModel_Union
+# from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
+from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
 from PIL import Image, ImageDraw
 import numpy as np
+# config_file = hf_hub_download(
+#     "xinsir/controlnet-union-sdxl-1.0",
+#     filename="config_promax.json",
+# )
+# config = ControlNetModel_Union.load_config(config_file)
+# controlnet_model = ControlNetModel_Union.from_config(config)
+# # Load the state dictionary
+# model_file = hf_hub_download(
+#     "xinsir/controlnet-union-sdxl-1.0",
+#     filename="diffusion_pytorch_model_promax.safetensors",
+# )
+# state_dict = load_state_dict(model_file)
+# # Extract the keys from the state_dict
+# loaded_keys = list(state_dict.keys())
+# # Call the method and store all returns in a variable
+# result = ControlNetModel_Union._load_pretrained_model(
+#     controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
+# )
+# # Use the first element from the result
+# model = result[0]
+# model = model.to(device="cuda", dtype=torch.float16)
+# vae = AutoencoderKL.from_pretrained(
+#     "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
+# ).to("cuda")
+# pipe = StableDiffusionXLFillPipeline.from_pretrained(
+#     "SG161222/RealVisXL_V5.0_Lightning",
+#     torch_dtype=torch.float16,
+#     vae=vae,
+#     controlnet=model,
+#     variant="fp16",
+# ).to("cuda")
+# 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
+t2i = StableDiffusion3Pipeline.from_pretrained(
+    "stabilityai/stable-diffusion-3.5-large",
+    torch_dtype=torch.bfloat16,    # recommended for SD3.5  [oai_citation:0‡github.com](https://github.com/huggingface/diffusers/releases?utm_source=chatgpt.com)
 ).to("cuda")
+# 2) Wrap it into the standard inpainting pipeline
+pipe = StableDiffusionInpaintPipeline(
+    vae=t2i.vae,
+    text_encoder=t2i.text_encoder,
+    tokenizer=t2i.tokenizer,
+    unet=t2i.unet,
+    scheduler=t2i.scheduler,
+    safety_checker=t2i.safety_checker,
+    feature_extractor=t2i.feature_extractor,
 ).to("cuda")
+# pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
     final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
     # Use with torch.autocast to ensure consistent dtype
+    # with torch.autocast(device_type="cuda", dtype=torch.float16):
+    #     (
+    #         prompt_embeds,
+    #         negative_prompt_embeds,
+    #         pooled_prompt_embeds,
+    #         negative_pooled_prompt_embeds,
+    #     ) = pipe.encode_prompt(final_prompt, "cuda", True)
+    #     for image in pipe(
+    #         prompt_embeds=prompt_embeds,
+    #         negative_prompt_embeds=negative_prompt_embeds,
+    #         pooled_prompt_embeds=pooled_prompt_embeds,
+    #         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+    #         image=cnet_image,
+    #         num_inference_steps=num_inference_steps
+    #     ):
+    #         yield cnet_image, image
+    # image = image.convert("RGBA")
+    # cnet_image.paste(image, (0, 0), mask)
+    # yield background, cnet_image
+    # Inpaint missing regions using SD3.5 Large:
+    result_img = pipe(
+        prompt=final_prompt,
+        image=background,
+        mask_image=mask.convert("RGB"),
+        num_inference_steps=num_inference_steps,
+        guidance_scale=7.5,
+    ).images[0]
+    yield background, result_img
 def clear_result():
     """Clears the result ImageSlider."""