sjtu-deepvision's picture
Upload app.py
1cedc13 verified
raw
history blame
3.38 kB
import os
import numpy as np
import torch
from PIL import Image
import gradio as gr
from DAI.pipeline_all import DAIPipeline
from DAI.controlnetvae import ControlNetVAEModel
from DAI.decoder import CustomAutoencoderKL
from diffusers import AutoencoderKL, UNet2DConditionModel
from transformers import CLIPTextModel, AutoTokenizer
# Initialize device and model paths
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weight_dtype = torch.float32
pretrained_model_name_or_path = "sjtu-deepvision/dereflection-any-image-v0"
pretrained_model_name_or_path2 = "stabilityai/stable-diffusion-2-1"
# Load the model components
controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path, subfolder="controlnet", torch_dtype=weight_dtype).to(device)
unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path, subfolder="unet", torch_dtype=weight_dtype).to(device)
vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae_2", torch_dtype=weight_dtype).to(device)
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path2, subfolder="vae").to(device)
text_encoder = CLIPTextModel.from_pretrained(pretrained_model_name_or_path2, subfolder="text_encoder").to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path2, subfolder="tokenizer", use_fast=False)
# Create the pipeline
pipe = DAIPipeline(
vae=vae,
text_encoder=text_encoder,
tokenizer=tokenizer,
unet=unet,
controlnet=controlnet,
safety_checker=None,
scheduler=None,
feature_extractor=None,
t_start=0,
).to(device)
# Function to process the image
def process_image(input_image):
# Convert Gradio input to PIL Image
input_image = Image.fromarray(input_image)
# Process the image
pipe_out = pipe(
image=input_image,
prompt="remove glass reflection",
vae_2=vae_2,
processing_resolution=None,
)
# Convert the output to an image
processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2
processed_frame = (processed_frame[0] * 255).astype(np.uint8)
processed_frame = Image.fromarray(processed_frame)
return processed_frame
# Gradio interface
def create_gradio_interface():
# Example images
example_images = [
os.path.join("files", "image", f"{i}.png") for i in range(1, 9)
]
with gr.Blocks() as demo:
gr.Markdown("# Dereflection Any Image")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type="numpy")
submit_btn = gr.Button("Remove Reflection", variant="primary")
with gr.Column():
output_image = gr.Image(label="Processed Image")
# Add examples
gr.Examples(
examples=example_images,
inputs=input_image,
outputs=output_image,
fn=process_image,
cache_examples=False, # Cache results for faster loading
label="Example Images",
)
submit_btn.click(
fn=process_image,
inputs=input_image,
outputs=output_image,
)
return demo
# Main function to launch the Gradio app
def main():
demo = create_gradio_interface()
demo.launch(server_name="0.0.0.0", server_port=7860)
if __name__ == "__main__":
main()