File size: 3,382 Bytes
311419e
 
1cedc13
651dfe7
1cedc13
651dfe7
 
 
1cedc13
311419e
 
1cedc13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311419e
651dfe7
311419e
 
1cedc13
311419e
 
1cedc13
311419e
 
 
 
1cedc13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651dfe7
 
1cedc13
 
 
 
651dfe7
 
1cedc13
651dfe7
1cedc13
651dfe7
1cedc13
 
c360cac
651dfe7
1cedc13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import os
import numpy as np
import torch
from PIL import Image
import gradio as gr
from DAI.pipeline_all import DAIPipeline
from DAI.controlnetvae import ControlNetVAEModel
from DAI.decoder import CustomAutoencoderKL
from diffusers import AutoencoderKL, UNet2DConditionModel
from transformers import CLIPTextModel, AutoTokenizer

# Initialize device and model paths
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weight_dtype = torch.float32
pretrained_model_name_or_path = "sjtu-deepvision/dereflection-any-image-v0"
pretrained_model_name_or_path2 = "stabilityai/stable-diffusion-2-1"

# Load the model components
controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path, subfolder="controlnet", torch_dtype=weight_dtype).to(device)
unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path, subfolder="unet", torch_dtype=weight_dtype).to(device)
vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae_2", torch_dtype=weight_dtype).to(device)
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path2, subfolder="vae").to(device)
text_encoder = CLIPTextModel.from_pretrained(pretrained_model_name_or_path2, subfolder="text_encoder").to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path2, subfolder="tokenizer", use_fast=False)

# Create the pipeline
pipe = DAIPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    controlnet=controlnet,
    safety_checker=None,
    scheduler=None,
    feature_extractor=None,
    t_start=0,
).to(device)

# Function to process the image
def process_image(input_image):
    # Convert Gradio input to PIL Image
    input_image = Image.fromarray(input_image)

    # Process the image
    pipe_out = pipe(
        image=input_image,
        prompt="remove glass reflection",
        vae_2=vae_2,
        processing_resolution=None,
    )

    # Convert the output to an image
    processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2
    processed_frame = (processed_frame[0] * 255).astype(np.uint8)
    processed_frame = Image.fromarray(processed_frame)

    return processed_frame

# Gradio interface
def create_gradio_interface():
    # Example images
    example_images = [
        os.path.join("files", "image", f"{i}.png") for i in range(1, 9)
    ]

    with gr.Blocks() as demo:
        gr.Markdown("# Dereflection Any Image")
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(label="Input Image", type="numpy")
                submit_btn = gr.Button("Remove Reflection", variant="primary")
            with gr.Column():
                output_image = gr.Image(label="Processed Image")

        # Add examples
        gr.Examples(
            examples=example_images,
            inputs=input_image,
            outputs=output_image,
            fn=process_image,
            cache_examples=False,  # Cache results for faster loading
            label="Example Images",
        )

        submit_btn.click(
            fn=process_image,
            inputs=input_image,
            outputs=output_image,
        )

    return demo

# Main function to launch the Gradio app
def main():
    demo = create_gradio_interface()
    demo.launch(server_name="0.0.0.0", server_port=7860)

if __name__ == "__main__":
    main()