# Spaces GPU - 반드시 첫 번째로 import해야 함!
import os
IS_SPACES = os.environ.get("SPACE_ID") is not None

if IS_SPACES:
    import spaces
else:
    # GPU 데코레이터가 없을 때를 위한 더미 데코레이터
    class spaces:
        @staticmethod
        def GPU(duration=None):
            def decorator(func):
                return func
            return decorator

# 이제 다른 라이브러리들을 import
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw
from gradio_client import Client, handle_file
import random
import tempfile
import logging
import torch
from diffusers import AutoencoderKL, TCDScheduler
from diffusers.models.model_loading_utils import load_state_dict
from huggingface_hub import hf_hub_download
from pathlib import Path
import torchaudio
from einops import rearrange
from scipy.io import wavfile
from transformers import pipeline

# 비디오 배경제거 관련 import
# 비디오 배경제거 관련 import
from transformers import AutoModelForImageSegmentation
from torchvision import transforms


# ── moviepy import ──────────────────────────────────────────
try:
    from moviepy.editor import (
        VideoFileClip, 
        concatenate_videoclips,
        ImageSequenceClip,
        concatenate_audioclips,
        AudioFileClip,
        CompositeAudioClip,
        CompositeVideoClip,
        ColorClip
    )
except ImportError:
    # 개별적으로 import 시도
    try:
        from moviepy.video.io.VideoFileClip import VideoFileClip
    except ImportError:
        from moviepy import VideoFileClip
        
    try:
        from moviepy.video.compositing.concatenate import concatenate_videoclips
    except ImportError:
        from moviepy import concatenate_videoclips
        
    try:
        from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
    except ImportError:
        from moviepy.editor import ImageSequenceClip
        
    try:
        from moviepy.audio.io.AudioFileClip import AudioFileClip
    except ImportError:
        from moviepy.editor import AudioFileClip
        
    try:
        from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip
    except ImportError:
        from moviepy.editor import concatenate_audioclips, CompositeAudioClip
        
    try:
        from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
    except ImportError:
        from moviepy.editor import CompositeVideoClip
        
    try:
        from moviepy.video.VideoClip import ColorClip
    except ImportError:
        from moviepy.editor import ColorClip

# resize 함수 import 시도
resize = None
try:
    from moviepy.video.fx.resize import resize
except ImportError:
    try:
        from moviepy.video.fx.all import resize
    except ImportError:
        try:
            # editor를 통한 import 시도
            from moviepy.editor import resize
        except ImportError:
            pass  # resize를 찾을 수 없음

# resize가 없으면 대체 함수 생성
if resize is None:
    def resize(clip, newsize=None, height=None, width=None):
        """Fallback resize function when moviepy resize is not available"""
        if hasattr(clip, 'resize'):
            if newsize:
                return clip.resize(newsize)
            elif height:
                return clip.resize(height=height)
            elif width:
                return clip.resize(width=width)
        # 크기 변경이 불가능하면 원본 반환
        return clip

# speedx 함수 import 시도
speedx = None
try:
    from moviepy.video.fx.speedx import speedx
except ImportError:
    try:
        from moviepy.video.fx.all import speedx
    except ImportError:
        try:
            from moviepy.editor import speedx
        except ImportError:
            pass  # speedx를 찾을 수 없음

# speedx가 없으면 대체 함수 생성
if speedx is None:
    def speedx(clip, factor=1.0, final_duration=None):
        """Fallback speedx function"""
        if hasattr(clip, 'fx') and hasattr(clip.fx, 'speedx'):
            return clip.fx.speedx(factor, final_duration)
        elif hasattr(clip, 'fl_time'):
            return clip.fl_time(lambda t: t * factor)
        elif hasattr(clip, 'with_fps') and factor != 1.0:
            # FPS를 조정하여 속도 변경 효과 구현
            new_fps = clip.fps * factor if hasattr(clip, 'fps') else 24 * factor
            return clip.with_fps(new_fps)
        else:
            # 최후의 수단: 클립 그대로 반환
            return clip

import time
from concurrent.futures import ThreadPoolExecutor

# ────────────────────────────────────────────────────────────


# 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"

# GPU 초기화를 위한 간단한 함수 (Spaces 환경에서 필수)
@spaces.GPU(duration=1)
def gpu_warmup():
    """GPU 워밍업 함수 - Spaces 환경에서 GPU 사용을 위해 필요"""
    if torch.cuda.is_available():
        dummy = torch.zeros(1).cuda()
        del dummy
    return "GPU ready"

# MMAudio imports - spaces import 이후에 와야 함
try:
    import mmaudio
except ImportError:
    os.system("pip install -e .")
    import mmaudio

from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
                                setup_eval_logging)
from mmaudio.model.flow_matching import FlowMatching
from mmaudio.model.networks import MMAudio, get_my_mmaudio
from mmaudio.model.sequence_config import SequenceConfig
from mmaudio.model.utils.features_utils import FeaturesUtils

# 로깅 설정
logging.basicConfig(level=logging.INFO)

# 기존 코드의 모든 설정과 초기화 부분 유지
torch.set_float32_matmul_precision("medium")

# Device 설정을 더 명확하게
if torch.cuda.is_available():
    device = torch.device("cuda")
    torch_dtype = torch.float16
else:
    device = torch.device("cpu")
    torch_dtype = torch.float32

logging.info(f"Using device: {device}")

# 전역 변수로 모델 상태 관리
MODELS_LOADED = False
BIREFNET_MODEL = None
BIREFNET_LITE_MODEL = None
OUTPAINT_PIPE = None
MMAUDIO_NET = None
MMAUDIO_FEATURE_UTILS = None
MMAUDIO_SEQ_CFG = None
TRANSLATOR = None

# API URLs
TEXT2IMG_API_URL = "http://211.233.58.201:7896"
VIDEO_API_URL = "http://211.233.58.201:7875"

# Image size presets
IMAGE_PRESETS = {
    "커스텀": {"width": 1024, "height": 1024},
    "1:1 정사각형": {"width": 1024, "height": 1024},
    "4:3 표준": {"width": 1024, "height": 768},
    "16:9 와이드스크린": {"width": 1024, "height": 576},
    "9:16 세로형": {"width": 576, "height": 1024},
    "6:19 특수 세로형": {"width": 324, "height": 1024},
    "Instagram 정사각형": {"width": 1080, "height": 1080},
    "Instagram 스토리": {"width": 1080, "height": 1920},
    "Instagram 가로형": {"width": 1080, "height": 566},
    "Facebook 커버": {"width": 820, "height": 312},
    "Twitter 헤더": {"width": 1500, "height": 500},
    "YouTube 썸네일": {"width": 1280, "height": 720},
    "LinkedIn 배너": {"width": 1584, "height": 396},
}

# Transform for BiRefNet
transform_image = transforms.Compose([
    transforms.Resize((768, 768)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

@spaces.GPU(duration=60)
def load_models():
    """모든 모델을 로드하는 함수"""
    global MODELS_LOADED, BIREFNET_MODEL, BIREFNET_LITE_MODEL, OUTPAINT_PIPE
    global MMAUDIO_NET, MMAUDIO_FEATURE_UTILS, MMAUDIO_SEQ_CFG, TRANSLATOR
    
    if MODELS_LOADED:
        return True
    
    try:
        # BiRefNet 모델 로드
        logging.info("Loading BiRefNet models...")
        BIREFNET_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
        BIREFNET_MODEL.to(device)
        BIREFNET_LITE_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True)
        BIREFNET_LITE_MODEL.to(device)
        
        # ControlNet 및 Outpainting 모델 로드
        logging.info("Loading ControlNet models...")
        from controlnet_union import ControlNetModel_Union
        from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
        
        config_file = hf_hub_download(
            "xinsir/controlnet-union-sdxl-1.0",
            filename="config_promax.json",
        )
        
        config = ControlNetModel_Union.load_config(config_file)
        controlnet_model = ControlNetModel_Union.from_config(config)
        
        model_file = hf_hub_download(
            "xinsir/controlnet-union-sdxl-1.0",
            filename="diffusion_pytorch_model_promax.safetensors",
        )
        state_dict = load_state_dict(model_file)
        loaded_keys = list(state_dict.keys())
        
        result = ControlNetModel_Union._load_pretrained_model(
            controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
        )
        
        model = result[0]
        model = model.to(device=device, dtype=torch_dtype)
        
        # VAE 로드
        vae = AutoencoderKL.from_pretrained(
            "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
        ).to(device)
        
        # 파이프라인 로드
        OUTPAINT_PIPE = StableDiffusionXLFillPipeline.from_pretrained(
            "SG161222/RealVisXL_V5.0_Lightning",
            torch_dtype=torch_dtype,
            vae=vae,
            controlnet=model,
            variant="fp16" if device.type == "cuda" else None,
        ).to(device)
        
        OUTPAINT_PIPE.scheduler = TCDScheduler.from_config(OUTPAINT_PIPE.scheduler.config)
        
        # MMAudio 모델 로드
        logging.info("Loading MMAudio models...")
        model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
        model_mmaudio.download_if_needed()
        setup_eval_logging()
        
        # 번역기 설정
        try:
            TRANSLATOR = pipeline("translation", 
                                 model="Helsinki-NLP/opus-mt-ko-en", 
                                 device="cpu",
                                 use_fast=True,
                                 trust_remote_code=False)
        except Exception as e:
            logging.warning(f"Failed to load translation model: {e}")
            TRANSLATOR = None
        
        # MMAudio 모델 초기화
        if torch.cuda.is_available():
            mmaudio_dtype = torch.bfloat16
        else:
            mmaudio_dtype = torch.float32
            
        with torch.cuda.device(device):
            MMAUDIO_SEQ_CFG = model_mmaudio.seq_cfg
            MMAUDIO_NET = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
            MMAUDIO_NET.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
            logging.info(f'Loaded weights from {model_mmaudio.model_path}')

            MMAUDIO_FEATURE_UTILS = FeaturesUtils(
                tod_vae_ckpt=model_mmaudio.vae_path,
                synchformer_ckpt=model_mmaudio.synchformer_ckpt,
                enable_conditions=True,
                mode=model_mmaudio.mode,
                bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
                need_vae_encoder=False
            ).to(device, mmaudio_dtype).eval()
        
        MODELS_LOADED = True
        logging.info("All models loaded successfully!")
        return True
        
    except Exception as e:
        logging.error(f"Failed to load models: {str(e)}")
        return False

# 기존 함수들 모두 유지
def update_dimensions(preset):
    if preset in IMAGE_PRESETS:
        return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
    return 1024, 1024

def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed):
    if not prompt:
        return None, "프롬프트를 입력해주세요"
    
    try:
        client = Client(TEXT2IMG_API_URL)
        if seed == -1:
            seed = random.randint(0, 9999999)
            
        result = client.predict(
            prompt=prompt,
            width=int(width),
            height=int(height),
            guidance=float(guidance),
            inference_steps=int(inference_steps),
            seed=int(seed),
            do_img2img=False,
            init_image=None,
            image2image_strength=0.8,
            resize_img=True,
            api_name="/generate_image"
        )
        return result[0], f"사용된 시드: {result[1]}"
    except Exception as e:
        logging.error(f"Image generation error: {str(e)}")
        return None, f"오류: {str(e)}"

def generate_video_from_image(image, prompt="", length=4.0):
    if image is None:
        return None
    
    try:
        # 이미지 저장
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp:
            temp_path = fp.name
            Image.fromarray(image).save(temp_path)
        
        # API 호출
        client = Client(VIDEO_API_URL)
        result = client.predict(
            input_image=handle_file(temp_path),
            prompt=prompt if prompt else "Generate natural motion",
            n_prompt="",
            seed=random.randint(0, 9999999),
            use_teacache=True,
            video_length=float(length),
            api_name="/process"
        )
        
        os.unlink(temp_path)
        
        if result and len(result) > 0:
            video_dict = result[0]
            return video_dict.get("video") if isinstance(video_dict, dict) else None
            
    except Exception as e:
        logging.error(f"Video generation error: {str(e)}")
        return None

def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
    """이미지와 마스크를 준비하는 함수"""
    if image is None:
        return None, None
    
    # PIL 이미지로 변환
    if isinstance(image, np.ndarray):
        image = Image.fromarray(image).convert('RGB')
    
    target_size = (width, height)
    
    # 이미지를 타겟 크기에 맞게 조정
    scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
    new_width = int(image.width * scale_factor)
    new_height = int(image.height * scale_factor)
    
    # 이미지 리사이즈
    source = image.resize((new_width, new_height), Image.LANCZOS)
    
    # 오버랩 계산
    overlap_x = int(new_width * (overlap_percentage / 100))
    overlap_y = int(new_height * (overlap_percentage / 100))
    overlap_x = max(overlap_x, 1)
    overlap_y = max(overlap_y, 1)
    
    # 정렬에 따른 마진 계산
    if alignment == "가운데":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "왼쪽":
        margin_x = 0
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "오른쪽":
        margin_x = target_size[0] - new_width
        margin_y = (target_size[1] - new_height) // 2
    elif alignment == "위":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = 0
    elif alignment == "아래":
        margin_x = (target_size[0] - new_width) // 2
        margin_y = target_size[1] - new_height
    
    # 배경 이미지 생성
    background = Image.new('RGB', target_size, (255, 255, 255))
    background.paste(source, (margin_x, margin_y))
    
    # 마스크 생성
    mask = Image.new('L', target_size, 255)
    mask_draw = ImageDraw.Draw(mask)
    
    # 마스크 영역 그리기
    left_overlap = margin_x + overlap_x if alignment != "왼쪽" else margin_x
    right_overlap = margin_x + new_width - overlap_x if alignment != "오른쪽" else margin_x + new_width
    top_overlap = margin_y + overlap_y if alignment != "위" else margin_y
    bottom_overlap = margin_y + new_height - overlap_y if alignment != "아래" else margin_y + new_height
    
    mask_draw.rectangle([
        (left_overlap, top_overlap),
        (right_overlap, bottom_overlap)
    ], fill=0)
    
    return background, mask

def preview_outpaint(image, width, height, overlap_percentage, alignment):
    """아웃페인팅 미리보기"""
    background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
    if background is None:
        return None
    
    # 미리보기 이미지 생성
    preview = background.copy().convert('RGBA')
    
    # 반투명 빨간색 오버레이
    red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64))
    
    # 마스크 적용
    red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
    red_mask.paste(red_overlay, (0, 0), mask)
    
    # 오버레이 합성
    preview = Image.alpha_composite(preview, red_mask)
    
    return preview

@spaces.GPU(duration=120)
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
    """이미지 아웃페인팅 실행"""
    if image is None:
        return None
    
    # 모델 로드 확인
    if not MODELS_LOADED:
        load_models()
    
    if OUTPAINT_PIPE is None:
        return Image.new('RGB', (width, height), (200, 200, 200))
    
    try:
        # 이미지와 마스크 준비
        background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
        if background is None:
            return None
        
        # cnet_image 생성 (마스크 영역을 검은색으로)
        cnet_image = background.copy()
        cnet_image.paste(0, (0, 0), mask)
        
        # 프롬프트 준비
        final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
        
        # GPU에서 실행
        with torch.autocast(device_type=device.type, dtype=torch_dtype):
            (
                prompt_embeds,
                negative_prompt_embeds,
                pooled_prompt_embeds,
                negative_pooled_prompt_embeds,
            ) = OUTPAINT_PIPE.encode_prompt(final_prompt, str(device), True)
            
            # 생성 프로세스
            for generated_image in OUTPAINT_PIPE(
                prompt_embeds=prompt_embeds,
                negative_prompt_embeds=negative_prompt_embeds,
                pooled_prompt_embeds=pooled_prompt_embeds,
                negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
                image=cnet_image,
                num_inference_steps=num_steps
            ):
                # 중간 결과 (필요시 사용)
                pass
            
            # 최종 이미지
            final_image = generated_image
        
        # RGBA로 변환하고 마스크 적용
        final_image = final_image.convert("RGBA")
        cnet_image.paste(final_image, (0, 0), mask)
        
        return cnet_image
        
    except Exception as e:
        logging.error(f"Outpainting error: {str(e)}")
        return background if 'background' in locals() else None

# MMAudio 관련 함수들
def translate_prompt(text):
    try:
        if TRANSLATOR is None:
            return text
            
        if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
            with torch.no_grad():
                translation = TRANSLATOR(text)[0]['translation_text']
            return translation
        return text
    except Exception as e:
        logging.error(f"Translation error: {e}")
        return text

@spaces.GPU(duration=120)
@torch.inference_mode()
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
                   cfg_strength: float, duration: float):
    # 모델 로드 확인
    if not MODELS_LOADED:
        load_models()
        
    if MMAUDIO_NET is None:
        return None
    
    prompt = translate_prompt(prompt)
    negative_prompt = translate_prompt(negative_prompt)

    rng = torch.Generator(device=device)
    rng.manual_seed(seed)
    fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)

    clip_frames, sync_frames, duration = load_video(video, duration)
    clip_frames = clip_frames.unsqueeze(0)
    sync_frames = sync_frames.unsqueeze(0)
    MMAUDIO_SEQ_CFG.duration = duration
    MMAUDIO_NET.update_seq_lengths(MMAUDIO_SEQ_CFG.latent_seq_len, MMAUDIO_SEQ_CFG.clip_seq_len, MMAUDIO_SEQ_CFG.sync_seq_len)

    audios = generate(clip_frames,
                      sync_frames, [prompt],
                      negative_text=[negative_prompt],
                      feature_utils=MMAUDIO_FEATURE_UTILS,
                      net=MMAUDIO_NET,
                      fm=fm,
                      rng=rng,
                      cfg_strength=cfg_strength)
    audio = audios.float().cpu()[0]

    video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
    make_video(video,
               video_save_path,
               audio,
               sampling_rate=MMAUDIO_SEQ_CFG.sampling_rate,
               duration_sec=MMAUDIO_SEQ_CFG.duration)
    return video_save_path

# 비디오 배경제거 관련 함수들
def process_bg_image(image, bg, fast_mode=False):
    """단일 이미지 배경 처리"""
    if BIREFNET_MODEL is None or BIREFNET_LITE_MODEL is None:
        return image
        
    image_size = image.size
    input_images = transform_image(image).unsqueeze(0).to(device)
    model = BIREFNET_LITE_MODEL if fast_mode else BIREFNET_MODEL
    
    with torch.no_grad():
        preds = model(input_images)[-1].sigmoid().cpu()
    pred = preds[0].squeeze()
    pred_pil = transforms.ToPILImage()(pred)
    mask = pred_pil.resize(image_size)
    
    if isinstance(bg, str) and bg.startswith("#"):
        color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5))
        background = Image.new("RGBA", image_size, color_rgb + (255,))
    elif isinstance(bg, Image.Image):
        background = bg.convert("RGBA").resize(image_size)
    else:
        background = Image.open(bg).convert("RGBA").resize(image_size)
    
    image = Image.composite(image, background, mask)
    return image

def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color):
    """비디오 프레임 처리"""
    try:
        pil_image = Image.fromarray(frame)
        if bg_type == "색상":
            processed_image = process_bg_image(pil_image, color, fast_mode)
        elif bg_type == "이미지":
            processed_image = process_bg_image(pil_image, bg, fast_mode)
        elif bg_type == "비디오":
            background_frame = background_frames[bg_frame_index]
            bg_frame_index += 1
            background_image = Image.fromarray(background_frame)
            processed_image = process_bg_image(pil_image, background_image, fast_mode)
        else:
            processed_image = pil_image
        return np.array(processed_image), bg_frame_index
    except Exception as e:
        print(f"Error processing frame: {e}")
        return frame, bg_frame_index

@spaces.GPU(duration=300)
def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00", 
                     fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
    """비디오 배경 처리 메인 함수"""
    # 모델 로드 확인
    if not MODELS_LOADED:
        load_models()
        
    if BIREFNET_MODEL is None:
        yield gr.update(visible=False), gr.update(visible=True), "BiRefNet 모델을 로드하지 못했습니다."
        yield None, None, "BiRefNet 모델을 로드하지 못했습니다."
        return
        
    try:
        start_time = time.time()
        video = VideoFileClip(vid)
        if fps == 0:
            fps = video.fps
        
        audio = video.audio
        frames = list(video.iter_frames(fps=fps))
        
        processed_frames = []
        yield gr.update(visible=True), gr.update(visible=False), f"처리 시작... 경과 시간: 0초"

        if bg_type == "비디오":
            background_video = VideoFileClip(bg_video)
            if background_video.duration < video.duration:
                if video_handling == "slow_down":
                    # vfx.speedx 대신 speedx 함수 직접 사용
                    if speedx is not None:
                        background_video = speedx(background_video, factor=video.duration / background_video.duration)
                    else:
                        # speedx가 없으면 반복으로 대체
                        background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
                else:  # video_handling == "loop"
                    background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
            background_frames = list(background_video.iter_frames(fps=fps))
        else:
            background_frames = None


        bg_frame_index = 0

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(process_video_frame, frames[i], bg_type, bg_image, fast_mode, 
                                     bg_frame_index + i, background_frames, color) for i in range(len(frames))]
            for i, future in enumerate(futures):
                result, _ = future.result()
                processed_frames.append(result)
                elapsed_time = time.time() - start_time
                yield result, None, f"프레임 {i+1}/{len(frames)} 처리 중... 경과 시간: {elapsed_time:.2f}초"
        
        processed_video = ImageSequenceClip(processed_frames, fps=fps)
        processed_video = processed_video.with_audio(audio)
        
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
            temp_filepath = temp_file.name
            processed_video.write_videofile(temp_filepath, codec="libx264")
        
        elapsed_time = time.time() - start_time
        yield gr.update(visible=False), gr.update(visible=True), f"처리 완료! 경과 시간: {elapsed_time:.2f}초"
        yield processed_frames[-1], temp_filepath, f"처리 완료! 경과 시간: {elapsed_time:.2f}초"
    
    except Exception as e:
        print(f"Error: {e}")
        elapsed_time = time.time() - start_time
        yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
        yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"

@spaces.GPU(duration=180)
def merge_videos_with_audio(video_files, audio_file, audio_mode, audio_volume, original_audio_volume, output_fps):
    """여러 비디오를 병합하고 오디오를 추가하는 함수"""
    if not video_files:
        return None, "비디오 파일을 업로드해주세요."
    
    if isinstance(video_files, list) and len(video_files) > 10:
        return None, "최대 10개의 비디오만 업로드 가능합니다."
    
    try:
        # 상태 업데이트
        status = "비디오 파일 정렬 중..."
        
        # 파일 경로와 파일명을 튜플로 저장하고 파일명으로 정렬
        video_paths = []
        if isinstance(video_files, list):
            for video_file in video_files:
                if video_file is not None:
                    video_paths.append(video_file)
        else:
            video_paths.append(video_files)
        
        # 파일명으로 정렬 (경로에서 파일명만 추출하여 정렬)
        video_paths.sort(key=lambda x: os.path.basename(x))
        
        status = f"{len(video_paths)}개의 비디오 로드 중..."
        
        # 비디오 클립 로드
        video_clips = []
        clip_sizes = []
        
        for i, video_path in enumerate(video_paths):
            status = f"비디오 {i+1}/{len(video_paths)} 로드 중: {os.path.basename(video_path)}"
            clip = VideoFileClip(video_path)
            video_clips.append(clip)
            
            # 각 클립의 크기 저장
            try:
                clip_sizes.append((clip.w, clip.h))
            except:
                clip_sizes.append(clip.size)
        
        # 첫 번째 비디오의 크기를 기준으로 함
        target_width, target_height = clip_sizes[0]
        
        # 모든 비디오의 크기가 같은지 확인
        all_same_size = all(size == (target_width, target_height) for size in clip_sizes)
        
        if not all_same_size:
            logging.warning(f"비디오 크기가 서로 다릅니다. 첫 번째 비디오 크기({target_width}x{target_height})로 조정합니다.")
            
            # 크기가 다른 비디오들을 조정
            adjusted_clips = []
            for clip, size in zip(video_clips, clip_sizes):
                if size != (target_width, target_height):
                    if resize is not None:
                        adjusted_clip = resize(clip, newsize=(target_width, target_height))
                    else:
                        if hasattr(clip, 'resize'):
                            adjusted_clip = clip.resize((target_width, target_height))
                        else:
                            adjusted_clip = clip
                            logging.warning(f"Cannot resize video. Using original size.")
                    adjusted_clips.append(adjusted_clip)
                else:
                    adjusted_clips.append(clip)
            
            video_clips = adjusted_clips

        # 첫 번째 비디오의 FPS를 기본값으로 사용
        if output_fps == 0:
            output_fps = video_clips[0].fps
        
        status = "비디오 병합 중..."
        
        # 비디오 병합
        final_video = concatenate_videoclips(video_clips, method="compose")
        
        # 오디오 처리
        if audio_file:
            status = "오디오 처리 중..."
            
            try:
                # 오디오 파일 경로 확인
                if isinstance(audio_file, str):
                    audio_path = audio_file
                else:
                    audio_path = audio_file
                
                logging.info(f"Processing audio from: {audio_path}")
                logging.info(f"Audio mode: {audio_mode}")
                
                # 오디오 로드
                if audio_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                    temp_video = VideoFileClip(audio_path)
                    audio_clip = temp_video.audio
                    temp_video.close()
                else:
                    audio_clip = AudioFileClip(audio_path)
                
                if audio_clip is None:
                    raise ValueError("오디오를 로드할 수 없습니다.")
                
                # 볼륨 조절
                if audio_volume != 100:
                    audio_clip = audio_clip.volumex(audio_volume / 100)
                
                # 오디오를 비디오 길이에 맞춤
                video_duration = final_video.duration
                audio_duration = audio_clip.duration
                
                if audio_duration > video_duration:
                    audio_clip = audio_clip.subclip(0, video_duration)
                elif audio_duration < video_duration:
                    loops_needed = int(video_duration / audio_duration) + 1
                    audio_clips_list = [audio_clip] * loops_needed
                    looped_audio = concatenate_audioclips(audio_clips_list)
                    audio_clip = looped_audio.subclip(0, video_duration)
                
                # 오디오 모드에 따른 처리
                if audio_mode == "백그라운드 뮤직":
                    # 백그라운드 뮤직 모드: 기존 오디오와 합성
                    if final_video.audio:
                        # 원본 오디오 볼륨 조절
                        original_audio = final_video.audio
                        if original_audio_volume != 100:
                            original_audio = original_audio.volumex(original_audio_volume / 100)
                        
                        # 두 오디오 합성
                        final_audio = CompositeAudioClip([original_audio, audio_clip])
                        final_video = final_video.set_audio(final_audio)
                        logging.info("Background music mode: Mixed original and new audio")
                    else:
                        # 원본 오디오가 없으면 그냥 추가
                        final_video = final_video.set_audio(audio_clip)
                        logging.info("No original audio found, adding new audio only")
                else:
                    # 대체 모드: 기존 오디오를 완전히 교체
                    final_video = final_video.set_audio(audio_clip)
                    logging.info("Replace mode: Replaced original audio")
                
                logging.info("Audio successfully processed")
                    
            except Exception as e:
                logging.error(f"오디오 처리 중 오류 발생: {str(e)}")
                status = f"오디오 처리 실패: {str(e)}, 비디오만 병합합니다."
        
        status = "비디오 저장 중..."
        
        # 임시 파일로 저장
        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
            temp_filepath = temp_file.name
            
            # 코덱 설정 - 원본 품질 유지
            final_video.write_videofile(
                temp_filepath, 
                fps=output_fps,
                codec="libx264",
                audio_codec="aac",
                preset="medium",
                bitrate="5000k",
                audio_bitrate="192k"
            )
        
        # 리소스 정리
        for clip in video_clips:
            clip.close()
        if 'adjusted_clips' in locals():
            for clip in adjusted_clips:
                if clip not in video_clips:
                    clip.close()
        if audio_file and 'audio_clip' in locals():
            audio_clip.close()
        final_video.close()
        
        # 상태 메시지 생성
        if audio_file and audio_mode == "백그라운드 뮤직":
            mode_msg = "백그라운드 뮤직 추가됨"
        elif audio_file:
            mode_msg = "오디오 대체됨"
        else:
            mode_msg = "오디오 없음"
        
        return temp_filepath, f"✅ 성공적으로 {len(video_paths)}개의 비디오를 병합했습니다! (크기: {target_width}x{target_height}, {mode_msg})"
        
    except Exception as e:
        logging.error(f"Video merge error: {str(e)}")
        import traceback
        traceback.print_exc()
        return None, f"❌ 오류 발생: {str(e)}"

# CSS
css = """
:root {
    --primary-color: #f8c3cd;
    --secondary-color: #b3e5fc;
    --background-color: #f5f5f7;
    --card-background: #ffffff;
    --text-color: #424242;
    --accent-color: #ffb6c1;
    --success-color: #c8e6c9;
    --warning-color: #fff9c4;
    --shadow-color: rgba(0, 0, 0, 0.1);
    --border-radius: 12px;
}
.gradio-container {
    max-width: 1200px !important;
    margin: 0 auto !important;
}
.panel-box {
    border-radius: var(--border-radius) !important;
    box-shadow: 0 8px 16px var(--shadow-color) !important;
    background-color: var(--card-background) !important;
    padding: 20px !important;
    margin-bottom: 20px !important;
}
#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn, #merge-btn {
    background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
    font-size: 1.1rem !important;
    padding: 12px 24px !important;
    margin-top: 10px !important;
    width: 100% !important;
}
.tabitem {
    min-height: 700px !important;
}
"""

# Gradio Interface
demo = gr.Blocks(css=css, title="AI 이미지 & 비디오 & 오디오 생성기")

with demo:
    gr.Markdown("# 🎨 Ginigen 스튜디오")
    gr.Markdown("처음 사용 시 모델 로딩에 시간이 걸릴 수 있습니다. 잠시만 기다려주세요.")
    
    # 모델 로드 상태 표시
    model_status = gr.Textbox(label="모델 상태", value="모델 로딩 대기 중...", interactive=False)
    
    with gr.Tabs() as tabs:
        # 첫 번째 탭: 텍스트 to 이미지
        with gr.Tab("텍스트→이미지→비디오", elem_classes="tabitem"):
            with gr.Row(equal_height=True):
                # 입력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 📝 이미지 생성 설정")
                        
                        prompt = gr.Textbox(
                            label="프롬프트(한글/영어 가능)",
                            placeholder="생성하고 싶은 이미지를 설명하세요...",
                            lines=3
                        )
                        
                        size_preset = gr.Dropdown(
                            choices=list(IMAGE_PRESETS.keys()),
                            value="1:1 정사각형",
                            label="크기 프리셋"
                        )
                        
                        with gr.Row():
                            width = gr.Slider(256, 2048, 1024, step=64, label="너비")
                            height = gr.Slider(256, 2048, 1024, step=64, label="높이")
                        
                        with gr.Row():
                            guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="가이던스")
                            steps = gr.Slider(1, 50, 30, step=1, label="스텝")
                        
                        seed = gr.Number(label="시드 (-1=랜덤)", value=-1)
                        
                        generate_btn = gr.Button("🎨 이미지 생성", variant="primary", elem_id="generate-btn")
                        
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎬 비디오 생성 설정")
                        
                        video_prompt = gr.Textbox(
                            label="(선택) 비디오 프롬프트(영어로 입력)",
                            placeholder="비디오의 움직임을 설명하세요... (비워두면 기본 움직임 적용)",
                            lines=2
                        )
                        
                        video_length = gr.Slider(
                            minimum=1, 
                            maximum=60, 
                            value=4, 
                            step=0.5, 
                            label="비디오 길이 (초)",
                            info="1초에서 60초까지 선택 가능합니다"
                        )
                        
                        video_btn = gr.Button("🎬 비디오로 변환", variant="secondary", elem_id="video-btn")
                
                # 출력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🖼️ 생성 결과")
                        
                        output_image = gr.Image(label="생성된 이미지", type="numpy")
                        output_seed = gr.Textbox(label="시드 정보")
                        output_video = gr.Video(label="생성된 비디오")
        
        # 두 번째 탭: 이미지 아웃페인팅
        with gr.Tab("이미지 비율 변경/생성", elem_classes="tabitem"):
            with gr.Row(equal_height=True):
                # 입력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🖼️ 이미지 업로드")
                        
                        input_image = gr.Image(
                            label="원본 이미지",
                            type="numpy"
                        )
                        
                        outpaint_prompt = gr.Textbox(
                            label="프롬프트 (선택)",
                            placeholder="확장할 영역에 대한 설명...",
                            lines=2
                        )
                        
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### ⚙️ 아웃페인팅 설정")
                        
                        outpaint_size_preset = gr.Dropdown(
                            choices=list(IMAGE_PRESETS.keys()),
                            value="16:9 와이드스크린",
                            label="목표 크기 프리셋"
                        )
                        
                        with gr.Row():
                            outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="목표 너비")
                            outpaint_height = gr.Slider(256, 2048, 720, step=64, label="목표 높이")
                        
                        alignment = gr.Dropdown(
                            choices=["가운데", "왼쪽", "오른쪽", "위", "아래"],
                            value="가운데",
                            label="정렬"
                        )
                        
                        overlap_percentage = gr.Slider(
                            minimum=1,
                            maximum=50,
                            value=10,
                            step=1,
                            label="마스크 오버랩 (%)"
                        )
                        
                        outpaint_steps = gr.Slider(
                            minimum=4,
                            maximum=12,
                            value=8,
                            step=1,
                            label="추론 스텝"
                        )
                        
                        preview_btn = gr.Button("👁️ 미리보기", elem_id="preview-btn")
                        outpaint_btn = gr.Button("🎨 아웃페인팅 실행", variant="primary", elem_id="outpaint-btn")
                
                # 출력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🖼️ 결과")
                        
                        preview_image = gr.Image(label="미리보기")
                        outpaint_result = gr.Image(label="아웃페인팅 결과")
        
        # 세 번째 탭: 비디오 + 오디오
        with gr.Tab("비디오 + 오디오", elem_classes="tabitem"):
            with gr.Row(equal_height=True):
                # 입력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎥 비디오 업로드")
                        
                        audio_video_input = gr.Video(
                            label="입력 비디오",
                            sources=["upload"]
                        )
                        
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎵 오디오 생성 설정")
                        
                        audio_prompt = gr.Textbox(
                            label="프롬프트 (한글 지원)",
                            placeholder="생성하고 싶은 오디오를 설명하세요... (예: 평화로운 피아노 음악)",
                            lines=3
                        )
                        
                        audio_negative_prompt = gr.Textbox(
                            label="네거티브 프롬프트",
                            value="music",
                            placeholder="원하지 않는 요소...",
                            lines=2
                        )
                        
                        with gr.Row():
                            audio_seed = gr.Number(label="시드", value=0)
                            audio_steps = gr.Number(label="스텝", value=25)
                        
                        with gr.Row():
                            audio_cfg = gr.Number(label="가이던스 스케일", value=4.5)
                            audio_duration = gr.Number(label="지속시간 (초)", value=9999)
                        
                        audio_btn = gr.Button("🎵 오디오 생성 및 합성", variant="primary", elem_id="audio-btn")
                
                # 출력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎬 생성 결과")
                        
                        output_video_with_audio = gr.Video(
                            label="오디오가 추가된 비디오",
                            interactive=False
                        )
        

        # 네 번째 탭: 비디오 편집 부분을 다음과 같이 수정
        with gr.Tab("비디오 편집", elem_classes="tabitem"):
            with gr.Row(equal_height=True):
                # 입력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎥 비디오 업로드 (최대 10개)")
                        gr.Markdown("**파일명이 작을수록 우선순위가 높습니다** (예: 1.mp4, 2.mp4, 3.mp4)")
                
                        video_files = gr.File(
                            label="비디오 파일들",
                            file_count="multiple",
                            file_types=["video"],
                            type="filepath"
                        )

                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### ⚙️ 편집 설정")
    
                        output_fps = gr.Slider(
                            minimum=0,
                            maximum=60,
                            value=0,
                            step=1,
                            label="출력 FPS (0 = 첫 번째 비디오의 FPS 사용)"
                        )
    
                        gr.Markdown("""
                        **크기 처리**:
                        - 첫 번째 비디오의 크기가 기준이 됩니다
                        - 다른 크기의 비디오는 첫 번째 비디오 크기로 조정됩니다
                        - 최상의 결과를 위해 같은 크기의 비디오를 사용하세요
                        """)

                    
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎵 오디오 설정 (선택)")
                
                        # 오디오 모드 선택 추가
                        audio_mode = gr.Radio(
                            ["대체", "백그라운드 뮤직"],
                            label="오디오 모드",
                            value="대체",
                            info="대체: 기존 오디오를 완전히 교체 | 백그라운드 뮤직: 기존 오디오와 함께 재생"
                        )
                
                        audio_file = gr.Audio(
                            label="오디오 파일 (MP3, WAV, M4A 등)",
                            type="filepath",
                            sources=["upload"]
                        )
                
                        audio_volume = gr.Slider(
                            minimum=0,
                            maximum=200,
                            value=100,
                            step=1,
                            label="추가 오디오 볼륨 (%)",
                            info="100% = 원본 볼륨"
                        )
                
                        # 백그라운드 모드일 때만 보이는 원본 오디오 볼륨 조절
                        original_audio_volume = gr.Slider(
                            minimum=0,
                            maximum=200,
                            value=100,
                            step=1,
                            label="원본 오디오 볼륨 (%)",
                            info="백그라운드 뮤직 모드에서 원본 비디오 오디오의 볼륨",
                            visible=False
                        )
                
                        gr.Markdown("""
                        **오디오 옵션**:
                        - **대체 모드**: 업로드한 오디오가 비디오의 기존 오디오를 완전히 대체합니다
                        - **백그라운드 뮤직 모드**: 업로드한 오디오가 기존 오디오와 함께 재생됩니다
                        - 오디오가 비디오보다 짧으면 자동으로 반복됩니다
                        - 오디오가 비디오보다 길면 비디오 길이에 맞춰 잘립니다
                        """)

                        merge_videos_btn = gr.Button("🎬 비디오 병합", variant="primary", elem_id="merge-btn")
                
                # 출력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎬 병합 결과")
                        
                        merge_status = gr.Textbox(label="처리 상태", interactive=False)
                        merged_video = gr.Video(label="병합된 비디오")
                        
                        gr.Markdown("""
                        ### ℹ️ 사용 방법
                        1. 여러 비디오 파일을 업로드하세요 (최대 10개)
                        2. 파일명이 작은 순서대로 자동 정렬됩니다
                        3. (선택) 오디오 파일을 추가하고 볼륨을 조절하세요
                        4. '비디오 병합' 버튼을 클릭하세요
                        
                        **특징**:
                        - ✅ 첫 번째 비디오의 크기를 기준으로 통합
                        - ✅ 업로드한 오디오가 전체 비디오에 적용됩니다
                        - ✅ 높은 비트레이트로 품질 유지
                        
                        **팁**: 
                        - 파일명을 01.mp4, 02.mp4, 03.mp4 형식으로 지정하면 순서 관리가 쉽습니다
                        - 오디오를 추가하면 기존 비디오의 오디오는 대체됩니다
                        """)
        
        # 다섯 번째 탭: 비디오 배경제거/합성
        with gr.Tab("비디오 배경제거/합성", elem_classes="tabitem"):
            with gr.Row(equal_height=True):
                # 입력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎥 비디오 업로드")
                        
                        bg_video_input = gr.Video(
                            label="입력 비디오",
                            interactive=True
                        )
                        
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎨 배경 설정")
                        
                        bg_type = gr.Radio(
                            ["색상", "이미지", "비디오"], 
                            label="배경 유형", 
                            value="색상", 
                            interactive=True
                        )
                        
                        color_picker = gr.ColorPicker(
                            label="배경 색상", 
                            value="#00FF00", 
                            visible=True, 
                            interactive=True
                        )
                        
                        bg_image_input = gr.Image(
                            label="배경 이미지", 
                            type="filepath", 
                            visible=False, 
                            interactive=True
                        )
                        
                        bg_video_bg = gr.Video(
                            label="배경 비디오", 
                            visible=False, 
                            interactive=True
                        )
                        
                        with gr.Column(visible=False) as video_handling_options:
                            video_handling_radio = gr.Radio(
                                ["slow_down", "loop"], 
                                label="비디오 처리 방식", 
                                value="slow_down", 
                                interactive=True,
                                info="slow_down: 배경 비디오를 느리게 재생, loop: 배경 비디오를 반복"
                            )
                        
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### ⚙️ 처리 설정")
                        
                        fps_slider = gr.Slider(
                            minimum=0,
                            maximum=60,
                            step=1,
                            value=0,
                            label="출력 FPS (0 = 원본 FPS 유지)",
                            interactive=True
                        )
                        
                        fast_mode_checkbox = gr.Checkbox(
                            label="빠른 모드 (BiRefNet_lite 사용)", 
                            value=True, 
                            interactive=True
                        )
                        
                        max_workers_slider = gr.Slider(
                            minimum=1, 
                            maximum=32, 
                            step=1, 
                            value=10, 
                            label="최대 워커 수", 
                            info="병렬로 처리할 프레임 수", 
                            interactive=True
                        )
                        
                        bg_remove_btn = gr.Button("🎬 배경 변경", variant="primary", elem_id="bg-remove-btn")
                
                # 출력 컬럼
                with gr.Column(scale=1):
                    with gr.Group(elem_classes="panel-box"):
                        gr.Markdown("### 🎬 처리 결과")
                        
                        stream_image = gr.Image(label="실시간 스트리밍", visible=False)
                        output_bg_video = gr.Video(label="최종 비디오")
                        time_textbox = gr.Textbox(label="경과 시간", interactive=False)
                        
                        gr.Markdown("""
                        ### ℹ️ 사용 방법
                        1. 비디오를 업로드하세요
                        2. 원하는 배경 유형을 선택하세요
                        3. 설정을 조정하고 '배경 변경' 버튼을 클릭하세요
                        
                        **참고**: GPU 제한으로 한 번에 약 200프레임까지 처리 가능합니다.
                        긴 비디오는 작은 조각으로 나누어 처리하세요.
                        """)
    
    # 모델 로드 함수 실행
    def on_demo_load():
        try:
            if IS_SPACES:
                # Spaces 환경에서 GPU 워밍업
                gpu_warmup()
            # 모델 로드는 첫 번째 GPU 함수 호출 시 자동으로 수행됨
            return "모델 로딩 준비 완료"
        except Exception as e:
            return f"초기화 오류: {str(e)}"
    
    # 이벤트 연결 - 첫 번째 탭
    size_preset.change(update_dimensions, [size_preset], [width, height])
    
    generate_btn.click(
        generate_text_to_image,
        [prompt, width, height, guidance, steps, seed],
        [output_image, output_seed]
    )
    
    video_btn.click(
        lambda img, v_prompt, length: generate_video_from_image(img, v_prompt, length) if img is not None else None,
        [output_image, video_prompt, video_length],
        [output_video]
    )
    
    # 이벤트 연결 - 두 번째 탭
    outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height])
    
    preview_btn.click(
        preview_outpaint,
        [input_image, outpaint_width, outpaint_height, overlap_percentage, alignment],
        [preview_image]
    )
    
    outpaint_btn.click(
        outpaint_image,
        [input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps],
        [outpaint_result]
    )
    
    # 이벤트 연결 - 세 번째 탭
    audio_btn.click(
        video_to_audio,
        [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
        [output_video_with_audio]
    )
    
    # 이벤트 연결 - 네 번째 탭 (비디오 편집)

    def toggle_original_volume(mode):
        return gr.update(visible=(mode == "백그라운드 뮤직"))

    audio_mode.change(
        toggle_original_volume,
        inputs=[audio_mode],
        outputs=[original_audio_volume]
    )


    merge_videos_btn.click(
        merge_videos_with_audio,
        inputs=[video_files, audio_file, audio_mode, audio_volume, original_audio_volume, output_fps],
        outputs=[merged_video, merge_status]
    )    
    # 이벤트 연결 - 다섯 번째 탭 (비디오 배경제거/합성)
    def update_bg_visibility(bg_type):
        if bg_type == "색상":
            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
        elif bg_type == "이미지":
            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
        elif bg_type == "비디오":
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
        else:
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
    
    bg_type.change(
        update_bg_visibility, 
        inputs=bg_type, 
        outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options]
    )
    
    bg_remove_btn.click(
        process_video_bg,
        inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker, 
                fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider],
        outputs=[stream_image, output_bg_video, time_textbox]
    )
    
    # 데모 로드 시 실행
    demo.load(on_demo_load, outputs=model_status)

if __name__ == "__main__":
    # Spaces 환경에서 추가 체크
    if IS_SPACES:
        try:
            gpu_warmup()
        except:
            pass
    
    demo.launch()