3v324v23's picture
update
ff635e8
raw
history blame
4.1 kB
import torch
import gradio as gr
from pipeline_controlnet_sd_xl_raw import StableDiffusionXLControlNetRAWPipeline
from diffusers import ControlNetModel, UniPCMultistepScheduler
from torchvision import transforms
from PIL import Image
import traceback
# ========== 1. Load Models ==========
pipe = StableDiffusionXLControlNetRAWPipeline.from_pretrained(
"wencheng256/DiffusionRAW",
torch_dtype=torch.float16
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
# ========== 2. Utility function: tensor -> PIL ==========
def tensor_to_pil(img_tensor: torch.Tensor) -> Image.Image:
if img_tensor.is_cuda:
img_tensor = img_tensor.cpu()
if img_tensor.dtype != torch.float32:
img_tensor = img_tensor.float()
img_tensor = img_tensor.clamp(0, 1)
return transforms.ToPILImage()(img_tensor)
# ========== 3. Load a .pth file ==========
def load_pth_data(pth_path):
data = torch.load(pth_path)
rgb_tensor = data["rgb"]
raw_tensor = data["raw"]
mask_tensor = data["mask"]
cond_tensor = data["condition"]
raw_image_pil = tensor_to_pil(raw_tensor[0][:, :448])
rgb_tensor_pil = tensor_to_pil(torch.flip(rgb_tensor[0], dims=[0])[:, :448])
mask_image_pil = tensor_to_pil(1 - mask_tensor[0])
return rgb_tensor_pil, raw_image_pil, mask_image_pil, raw_tensor, mask_tensor, cond_tensor
# ========== 4. Inference function ==========
def infer_fn(prompt, mask_edited, raw_tensor_state, mask_tensor_state, cond_tensor_state):
try:
if isinstance(mask_edited, dict):
mask_edited = mask_edited["composite"]
mask_edited_tensor = transforms.ToTensor()(mask_edited)
mask_edited_tensor = 1-mask_edited_tensor[:1].unsqueeze(0).half()
raw_t = raw_tensor_state.half()
cond_t = cond_tensor_state.half()
generator = torch.manual_seed(0)
result = pipe(
prompt=prompt,
num_inference_steps=20,
generator=generator,
image=raw_t,
mask_image=mask_edited_tensor,
control_image=cond_t
).images[0]
return tensor_to_pil(result), mask_edited
except Exception as e:
traceback.print_exc()
return "Error occurred during inference. Please check the terminal logs!"
# ========== 5. Build Gradio App ==========
def build_demo():
with gr.Blocks() as demo:
gr.Markdown("# DiffusionRAW")
pth_options = ["./data1.pth", "./data2.pth", "./data3.pth"]
pth_selector = gr.Dropdown(choices=pth_options, value=pth_options[0], label="Select a PTH file")
load_button = gr.Button("Load")
with gr.Row():
raw_display = gr.Image(label="Raw Image", interactive=False)
rgb_display = gr.Image(label="sRGB Image", interactive=False)
mask_editor = gr.Sketchpad(
label="Mask (Sketch)",
interactive=True,
width=512,
height=512
)
raw_tensor_state = gr.State()
mask_tensor_state = gr.State()
cond_tensor_state = gr.State()
load_button.click(
fn=load_pth_data,
inputs=[pth_selector],
outputs=[
rgb_display,
raw_display,
mask_editor,
raw_tensor_state,
mask_tensor_state,
cond_tensor_state
]
)
prompt_input = gr.Textbox(label="Prompt", value="An RAW Image.", lines=1)
generate_button = gr.Button("Generate")
output_image = gr.Image(label="Output")
generate_button.click(
fn=infer_fn,
inputs=[
prompt_input,
mask_editor,
raw_tensor_state,
mask_tensor_state,
cond_tensor_state
],
outputs=[output_image, rgb_display]
)
return demo
if __name__ == "__main__":
demo = build_demo()
demo.launch(server_name="0.0.0.0", server_port=9112)