Image-Outpainting

Running on Zero

App Files Files Community

user-agent commited on 12 days ago

Commit

ccf2114

verified ·

1 Parent(s): 50bbed8

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -89

app.py CHANGED Viewed

@@ -1,82 +1,57 @@
 import gradio as gr
 import spaces
 import torch
-# from diffusers import AutoencoderKL, TCDScheduler
 from diffusers.models.model_loading_utils import load_state_dict
 from gradio_imageslider import ImageSlider
 from huggingface_hub import hf_hub_download
-# from controlnet_union import ControlNetModel_Union
-# from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
-from diffusers import AutoencoderKL, StableDiffusion3Pipeline, StableDiffusionInpaintPipeline, TCDScheduler
 from PIL import Image, ImageDraw
 import numpy as np
-# config_file = hf_hub_download(
-#     "xinsir/controlnet-union-sdxl-1.0",
-#     filename="config_promax.json",
-# )
-# config = ControlNetModel_Union.load_config(config_file)
-# controlnet_model = ControlNetModel_Union.from_config(config)
-# # Load the state dictionary
-# model_file = hf_hub_download(
-#     "xinsir/controlnet-union-sdxl-1.0",
-#     filename="diffusion_pytorch_model_promax.safetensors",
-# )
-# state_dict = load_state_dict(model_file)
-# # Extract the keys from the state_dict
-# loaded_keys = list(state_dict.keys())
-# # Call the method and store all returns in a variable
-# result = ControlNetModel_Union._load_pretrained_model(
-#     controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
-# )
-# # Use the first element from the result
-# model = result[0]
-# model = model.to(device="cuda", dtype=torch.float16)
-# vae = AutoencoderKL.from_pretrained(
-#     "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
-# ).to("cuda")
-# pipe = StableDiffusionXLFillPipeline.from_pretrained(
-#     "SG161222/RealVisXL_V5.0_Lightning",
-#     torch_dtype=torch.float16,
-#     vae=vae,
-#     controlnet=model,
-#     variant="fp16",
-# ).to("cuda")
-# 1) Load the SD3.5-Large T2I pipeline (will pull in its own VAE, UNet, text encoders, etc.)
-import os
-HF_TOKEN = os.environ["HF_TOKEN"]  # or os.getenv("HF_TOKEN")
-t2i = StableDiffusion3Pipeline.from_pretrained(
-    "stabilityai/stable-diffusion-3.5-large",
-    torch_dtype=torch.bfloat16,
-    use_auth_token=HF_TOKEN   # ← here
 ).to("cuda")
-pipe = StableDiffusionInpaintPipeline(
-    vae=t2i.vae,
-    text_encoder=t2i.text_encoder,
-    tokenizer=t2i.tokenizer,
-    unet=t2i.unet,
-    scheduler=t2i.scheduler,
-    safety_checker=t2i.safety_checker,
-    feature_extractor=t2i.feature_extractor,
-    use_auth_token=HF_TOKEN   # ← and here
 ).to("cuda")
-# pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
@@ -217,38 +192,28 @@ def infer(image, width, height, overlap_percentage, num_inference_steps, resize_
     final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
     # Use with torch.autocast to ensure consistent dtype
-    # with torch.autocast(device_type="cuda", dtype=torch.float16):
-    #     (
-    #         prompt_embeds,
-    #         negative_prompt_embeds,
-    #         pooled_prompt_embeds,
-    #         negative_pooled_prompt_embeds,
-    #     ) = pipe.encode_prompt(final_prompt, "cuda", True)
-    #     for image in pipe(
-    #         prompt_embeds=prompt_embeds,
-    #         negative_prompt_embeds=negative_prompt_embeds,
-    #         pooled_prompt_embeds=pooled_prompt_embeds,
-    #         negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-    #         image=cnet_image,
-    #         num_inference_steps=num_inference_steps
-    #     ):
-    #         yield cnet_image, image
-    # image = image.convert("RGBA")
-    # cnet_image.paste(image, (0, 0), mask)
-    # yield background, cnet_image
-    # Inpaint missing regions using SD3.5 Large:
-    result_img = pipe(
-        prompt=final_prompt,
-        image=background,
-        mask_image=mask.convert("RGB"),
-        num_inference_steps=num_inference_steps,
-        guidance_scale=7.5,
-    ).images[0]
-    yield background, result_img
 def clear_result():
     """Clears the result ImageSlider."""

 import gradio as gr
 import spaces
 import torch
+from diffusers import AutoencoderKL, TCDScheduler
 from diffusers.models.model_loading_utils import load_state_dict
 from gradio_imageslider import ImageSlider
 from huggingface_hub import hf_hub_download
+from controlnet_union import ControlNetModel_Union
+from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
 from PIL import Image, ImageDraw
 import numpy as np
+config_file = hf_hub_download(
+    "xinsir/controlnet-union-sdxl-1.0",
+    filename="config_promax.json",
+)
+config = ControlNetModel_Union.load_config(config_file)
+controlnet_model = ControlNetModel_Union.from_config(config)
+# Load the state dictionary
+model_file = hf_hub_download(
+    "xinsir/controlnet-union-sdxl-1.0",
+    filename="diffusion_pytorch_model_promax.safetensors",
+)
+state_dict = load_state_dict(model_file)
+# Extract the keys from the state_dict
+loaded_keys = list(state_dict.keys())
+# Call the method and store all returns in a variable
+result = ControlNetModel_Union._load_pretrained_model(
+    controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
+)
+# Use the first element from the result
+model = result[0]
+model = model.to(device="cuda", dtype=torch.float16)
+vae = AutoencoderKL.from_pretrained(
+    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
 ).to("cuda")
+pipe = StableDiffusionXLFillPipeline.from_pretrained(
+    "SG161222/RealVisXL_V5.0_Lightning",
+    torch_dtype=torch.float16,
+    vae=vae,
+    controlnet=model,
+    variant="fp16",
 ).to("cuda")
 pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
     final_prompt = f"{prompt_input} , high quality, 4k" if prompt_input else "high quality, 4k"
     # Use with torch.autocast to ensure consistent dtype
+    with torch.autocast(device_type="cuda", dtype=torch.float16):
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = pipe.encode_prompt(final_prompt, "cuda", True)
+        for image in pipe(
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            image=cnet_image,
+            num_inference_steps=num_inference_steps
+        ):
+            yield cnet_image, image
+    image = image.convert("RGBA")
+    cnet_image.paste(image, (0, 0), mask)
+    yield background, cnet_image
 def clear_result():
     """Clears the result ImageSlider."""