import os import numpy as np import torch from PIL import Image import gradio as gr from DAI.pipeline_all import DAIPipeline from DAI.controlnetvae import ControlNetVAEModel from DAI.decoder import CustomAutoencoderKL from diffusers import AutoencoderKL, UNet2DConditionModel from transformers import CLIPTextModel, AutoTokenizer # Initialize device and model paths device = torch.device("cuda" if torch.cuda.is_available() else "cpu") weight_dtype = torch.float32 pretrained_model_name_or_path = "sjtu-deepvision/dereflection-any-image-v0" pretrained_model_name_or_path2 = "stabilityai/stable-diffusion-2-1" # Load the model components controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path, subfolder="controlnet", torch_dtype=weight_dtype).to(device) unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path, subfolder="unet", torch_dtype=weight_dtype).to(device) vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae_2", torch_dtype=weight_dtype).to(device) vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path2, subfolder="vae").to(device) text_encoder = CLIPTextModel.from_pretrained(pretrained_model_name_or_path2, subfolder="text_encoder").to(device) tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path2, subfolder="tokenizer", use_fast=False) # Create the pipeline pipe = DAIPipeline( vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet, controlnet=controlnet, safety_checker=None, scheduler=None, feature_extractor=None, t_start=0, ).to(device) # Function to process the image def process_image(input_image): # Convert Gradio input to PIL Image input_image = Image.fromarray(input_image) # Process the image pipe_out = pipe( image=input_image, prompt="remove glass reflection", vae_2=vae_2, processing_resolution=None, ) # Convert the output to an image processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2 processed_frame = (processed_frame[0] * 255).astype(np.uint8) processed_frame = Image.fromarray(processed_frame) return processed_frame # Gradio interface def create_gradio_interface(): # Example images example_images = [ os.path.join("files", "image", f"{i}.png") for i in range(1, 9) ] with gr.Blocks() as demo: gr.Markdown("# Dereflection Any Image") with gr.Row(): with gr.Column(): input_image = gr.Image(label="Input Image", type="numpy") submit_btn = gr.Button("Remove Reflection", variant="primary") with gr.Column(): output_image = gr.Image(label="Processed Image") # Add examples gr.Examples( examples=example_images, inputs=input_image, outputs=output_image, fn=process_image, cache_examples=False, # Cache results for faster loading label="Example Images", ) submit_btn.click( fn=process_image, inputs=input_image, outputs=output_image, ) return demo # Main function to launch the Gradio app def main(): demo = create_gradio_interface() demo.launch(server_name="0.0.0.0", server_port=7860) if __name__ == "__main__": main()