import spaces  # 必须放在最前面
import os
import numpy as np
import torch
from PIL import Image
import gradio as gr
from gradio_imageslider import ImageSlider

# 延迟 CUDA 初始化
weight_dtype = torch.float32

# 加载模型组件
from DAI.pipeline_all import DAIPipeline
from DAI.controlnetvae import ControlNetVAEModel
from DAI.decoder import CustomAutoencoderKL
from diffusers import AutoencoderKL, UNet2DConditionModel
from transformers import CLIPTextModel, AutoTokenizer

pretrained_model_name_or_path = "sjtu-deepvision/dereflection-any-image-v0"
pretrained_model_name_or_path2 = "stabilityai/stable-diffusion-2-1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载模型
controlnet = ControlNetVAEModel.from_pretrained(pretrained_model_name_or_path, subfolder="controlnet", torch_dtype=weight_dtype).to(device)
unet = UNet2DConditionModel.from_pretrained(pretrained_model_name_or_path, subfolder="unet", torch_dtype=weight_dtype).to(device)
vae_2 = CustomAutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae_2", torch_dtype=weight_dtype).to(device)
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path2, subfolder="vae").to(device)
text_encoder = CLIPTextModel.from_pretrained(pretrained_model_name_or_path2, subfolder="text_encoder").to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path2, subfolder="tokenizer", use_fast=False)

# 创建推理管道
pipe = DAIPipeline(
    vae=vae,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    controlnet=controlnet,
    safety_checker=None,
    scheduler=None,
    feature_extractor=None,
    t_start=0,
).to(device)

@spaces.GPU
def process_image(input_image, resolution_choice):
    # 将 Gradio 输入转换为 PIL 图像
    input_image = Image.fromarray(input_image)

    # 如果 resolution_choice 为 '768'，将 input_image resize 到最大边 768
    if resolution_choice == "768":
        max_size = 768
        width, height = input_image.size
        if max(width, height) > max_size:
            scaling_factor = max_size / max(width, height)
            new_width = int(width * scaling_factor)
            new_height = int(height * scaling_factor)
            input_image = input_image.resize((new_width, new_height), Image.LANCZOS)

    # 根据用户选择设置处理分辨率
    # if resolution_choice == "768":
    #     processing_resolution = None
    # else:
    #     processing_resolution = 0  # 使用原始分辨率
    processing_resolution = 0  # 使用原始分辨率

    # 处理图像
    pipe_out = pipe(
        image=input_image,
        prompt="remove glass reflection",
        vae_2=vae_2,
        processing_resolution=processing_resolution,
    )

    # 将输出转换为图像
    processed_frame = (pipe_out.prediction.clip(-1, 1) + 1) / 2
    processed_frame = (processed_frame[0] * 255).astype(np.uint8)
    processed_frame = Image.fromarray(processed_frame)

    return input_image, processed_frame

# 创建 Gradio 界面
def create_gradio_interface():
    # 示例图像
    example_images = [
        [os.path.join("files", "image", f"{i}.png"), "768"] for i in range(1, 14)
    ]
    title = "# Dereflection Any Image"
    description = """Official demo for **Dereflection Any Image**.
    Please refer to our [paper](), [project page](https://abuuu122.github.io/DAI.github.io/), and [github](https://github.com/Abuuu122/Dereflection-Any-Image) for more details."""

    with gr.Blocks() as demo:
        gr.Markdown(title)
        gr.Markdown(description)
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(label="Input Image", type="numpy")
                resolution_choice = gr.Radio(
                    choices=["768", "Original Resolution"],
                    label="Processing Resolution",
                    value="768",  # 默认选择原始分辨率
                )
                gr.Markdown(
                    "Select the resolution for processing the image. Higher resolution may take longer to process. 768 is recommended for faster processing and stable performance."
                )
                submit_btn = gr.Button("Remove Reflection", variant="primary")
            with gr.Column():
                # output_image = gr.Image(label="Processed Image")
                output_slider = ImageSlider(label="Processed image", type="pil")

        # 添加示例
        gr.Examples(
            examples=example_images,
            inputs=[input_image, resolution_choice],  # 输入组件列表
            outputs=output_slider,
            fn=process_image,
            cache_examples=False,  # 缓存结果以加快加载速度
            label="Example Images",
        )

        # 绑定按钮点击事件
        submit_btn.click(
            fn=process_image,
            inputs=[input_image, resolution_choice],  # 输入组件列表
            outputs=output_slider,
        )

    return demo

# 主函数
def main():
    demo = create_gradio_interface()
    demo.launch(server_name="0.0.0.0", server_port=7860)

if __name__ == "__main__":
    main()