STUDIO

Running

App Files Files Community

STUDIO / app.py

openfree

Update app.py

a74793f verified 12 days ago

raw

history blame

54.1 kB

	# Spaces GPU - 반드시 첫 번째로 import해야 함!
	import os
	IS_SPACES = os.environ.get("SPACE_ID") is not None

	if IS_SPACES:
	import spaces
	else:
	# GPU 데코레이터가 없을 때를 위한 더미 데코레이터
	class spaces:
	@staticmethod
	def GPU(duration=None):
	def decorator(func):
	return func
	return decorator

	# 이제 다른 라이브러리들을 import
	import gradio as gr
	import numpy as np
	from PIL import Image, ImageDraw
	from gradio_client import Client, handle_file
	import random
	import tempfile
	import logging
	import torch
	from diffusers import AutoencoderKL, TCDScheduler
	from diffusers.models.model_loading_utils import load_state_dict
	from huggingface_hub import hf_hub_download
	from pathlib import Path
	import torchaudio
	from einops import rearrange
	from scipy.io import wavfile
	from transformers import pipeline

	# 비디오 배경제거 관련 import
	from transformers import AutoModelForImageSegmentation
	from torchvision import transforms

	# ── moviepy import ──────────────────────────────────────────
	# editor 서브모듈이 있는 경우 우선 사용, 없으면 최상위 moviepy 사용
	try:
	from moviepy.editor import VideoFileClip, concatenate_videoclips
	except ImportError:
	from moviepy import VideoFileClip, concatenate_videoclips

	from moviepy import (
	vfx, # 효과 함수 (resize 등)
	ImageSequenceClip, # 이미지 시퀀스 → 비디오
	concatenate_audioclips, # 오디오 클립 합치기
	AudioFileClip, # 오디오 클립
	CompositeAudioClip # 오디오 합성
	)
	from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
	from moviepy.video.VideoClip import ColorClip
	# ────────────────────────────────────────────────────────────

	import time
	from concurrent.futures import ThreadPoolExecutor



	# 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
	os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"

	# GPU 초기화를 위한 간단한 함수 (Spaces 환경에서 필수)
	@spaces.GPU(duration=1)
	def gpu_warmup():
	"""GPU 워밍업 함수 - Spaces 환경에서 GPU 사용을 위해 필요"""
	if torch.cuda.is_available():
	dummy = torch.zeros(1).cuda()
	del dummy
	return "GPU ready"

	# MMAudio imports - spaces import 이후에 와야 함
	try:
	import mmaudio
	except ImportError:
	os.system("pip install -e .")
	import mmaudio

	from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
	setup_eval_logging)
	from mmaudio.model.flow_matching import FlowMatching
	from mmaudio.model.networks import MMAudio, get_my_mmaudio
	from mmaudio.model.sequence_config import SequenceConfig
	from mmaudio.model.utils.features_utils import FeaturesUtils

	# 로깅 설정
	logging.basicConfig(level=logging.INFO)

	# 기존 코드의 모든 설정과 초기화 부분 유지
	torch.set_float32_matmul_precision("medium")

	# Device 설정을 더 명확하게
	if torch.cuda.is_available():
	device = torch.device("cuda")
	torch_dtype = torch.float16
	else:
	device = torch.device("cpu")
	torch_dtype = torch.float32

	logging.info(f"Using device: {device}")

	# 전역 변수로 모델 상태 관리
	MODELS_LOADED = False
	BIREFNET_MODEL = None
	BIREFNET_LITE_MODEL = None
	OUTPAINT_PIPE = None
	MMAUDIO_NET = None
	MMAUDIO_FEATURE_UTILS = None
	MMAUDIO_SEQ_CFG = None
	TRANSLATOR = None

	# API URLs
	TEXT2IMG_API_URL = "http://211.233.58.201:7896"
	VIDEO_API_URL = "http://211.233.58.201:7875"

	# Image size presets
	IMAGE_PRESETS = {
	"커스텀": {"width": 1024, "height": 1024},
	"1:1 정사각형": {"width": 1024, "height": 1024},
	"4:3 표준": {"width": 1024, "height": 768},
	"16:9 와이드스크린": {"width": 1024, "height": 576},
	"9:16 세로형": {"width": 576, "height": 1024},
	"6:19 특수 세로형": {"width": 324, "height": 1024},
	"Instagram 정사각형": {"width": 1080, "height": 1080},
	"Instagram 스토리": {"width": 1080, "height": 1920},
	"Instagram 가로형": {"width": 1080, "height": 566},
	"Facebook 커버": {"width": 820, "height": 312},
	"Twitter 헤더": {"width": 1500, "height": 500},
	"YouTube 썸네일": {"width": 1280, "height": 720},
	"LinkedIn 배너": {"width": 1584, "height": 396},
	}

	# Transform for BiRefNet
	transform_image = transforms.Compose([
	transforms.Resize((768, 768)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	])

	@spaces.GPU(duration=60)
	def load_models():
	"""모든 모델을 로드하는 함수"""
	global MODELS_LOADED, BIREFNET_MODEL, BIREFNET_LITE_MODEL, OUTPAINT_PIPE
	global MMAUDIO_NET, MMAUDIO_FEATURE_UTILS, MMAUDIO_SEQ_CFG, TRANSLATOR

	if MODELS_LOADED:
	return True

	try:
	# BiRefNet 모델 로드
	logging.info("Loading BiRefNet models...")
	BIREFNET_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
	BIREFNET_MODEL.to(device)
	BIREFNET_LITE_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True)
	BIREFNET_LITE_MODEL.to(device)

	# ControlNet 및 Outpainting 모델 로드
	logging.info("Loading ControlNet models...")
	from controlnet_union import ControlNetModel_Union
	from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline

	config_file = hf_hub_download(
	"xinsir/controlnet-union-sdxl-1.0",
	filename="config_promax.json",
	)

	config = ControlNetModel_Union.load_config(config_file)
	controlnet_model = ControlNetModel_Union.from_config(config)

	model_file = hf_hub_download(
	"xinsir/controlnet-union-sdxl-1.0",
	filename="diffusion_pytorch_model_promax.safetensors",
	)
	state_dict = load_state_dict(model_file)
	loaded_keys = list(state_dict.keys())

	result = ControlNetModel_Union._load_pretrained_model(
	controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
	)

	model = result[0]
	model = model.to(device=device, dtype=torch_dtype)

	# VAE 로드
	vae = AutoencoderKL.from_pretrained(
	"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
	).to(device)

	# 파이프라인 로드
	OUTPAINT_PIPE = StableDiffusionXLFillPipeline.from_pretrained(
	"SG161222/RealVisXL_V5.0_Lightning",
	torch_dtype=torch_dtype,
	vae=vae,
	controlnet=model,
	variant="fp16" if device.type == "cuda" else None,
	).to(device)

	OUTPAINT_PIPE.scheduler = TCDScheduler.from_config(OUTPAINT_PIPE.scheduler.config)

	# MMAudio 모델 로드
	logging.info("Loading MMAudio models...")
	model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
	model_mmaudio.download_if_needed()
	setup_eval_logging()

	# 번역기 설정
	try:
	TRANSLATOR = pipeline("translation",
	model="Helsinki-NLP/opus-mt-ko-en",
	device="cpu",
	use_fast=True,
	trust_remote_code=False)
	except Exception as e:
	logging.warning(f"Failed to load translation model: {e}")
	TRANSLATOR = None

	# MMAudio 모델 초기화
	if torch.cuda.is_available():
	mmaudio_dtype = torch.bfloat16
	else:
	mmaudio_dtype = torch.float32

	with torch.cuda.device(device):
	MMAUDIO_SEQ_CFG = model_mmaudio.seq_cfg
	MMAUDIO_NET = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
	MMAUDIO_NET.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
	logging.info(f'Loaded weights from {model_mmaudio.model_path}')

	MMAUDIO_FEATURE_UTILS = FeaturesUtils(
	tod_vae_ckpt=model_mmaudio.vae_path,
	synchformer_ckpt=model_mmaudio.synchformer_ckpt,
	enable_conditions=True,
	mode=model_mmaudio.mode,
	bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
	need_vae_encoder=False
	).to(device, mmaudio_dtype).eval()

	MODELS_LOADED = True
	logging.info("All models loaded successfully!")
	return True

	except Exception as e:
	logging.error(f"Failed to load models: {str(e)}")
	return False

	# 기존 함수들 모두 유지
	def update_dimensions(preset):
	if preset in IMAGE_PRESETS:
	return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
	return 1024, 1024

	def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed):
	if not prompt:
	return None, "프롬프트를 입력해주세요"

	try:
	client = Client(TEXT2IMG_API_URL)
	if seed == -1:
	seed = random.randint(0, 9999999)

	result = client.predict(
	prompt=prompt,
	width=int(width),
	height=int(height),
	guidance=float(guidance),
	inference_steps=int(inference_steps),
	seed=int(seed),
	do_img2img=False,
	init_image=None,
	image2image_strength=0.8,
	resize_img=True,
	api_name="/generate_image"
	)
	return result[0], f"사용된 시드: {result[1]}"
	except Exception as e:
	logging.error(f"Image generation error: {str(e)}")
	return None, f"오류: {str(e)}"

	def generate_video_from_image(image, prompt="", length=4.0):
	if image is None:
	return None

	try:
	# 이미지 저장
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp:
	temp_path = fp.name
	Image.fromarray(image).save(temp_path)

	# API 호출
	client = Client(VIDEO_API_URL)
	result = client.predict(
	input_image=handle_file(temp_path),
	prompt=prompt if prompt else "Generate natural motion",
	n_prompt="",
	seed=random.randint(0, 9999999),
	use_teacache=True,
	video_length=float(length),
	api_name="/process"
	)

	os.unlink(temp_path)

	if result and len(result) > 0:
	video_dict = result[0]
	return video_dict.get("video") if isinstance(video_dict, dict) else None

	except Exception as e:
	logging.error(f"Video generation error: {str(e)}")
	return None

	def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
	"""이미지와 마스크를 준비하는 함수"""
	if image is None:
	return None, None

	# PIL 이미지로 변환
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image).convert('RGB')

	target_size = (width, height)

	# 이미지를 타겟 크기에 맞게 조정
	scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
	new_width = int(image.width * scale_factor)
	new_height = int(image.height * scale_factor)

	# 이미지 리사이즈
	source = image.resize((new_width, new_height), Image.LANCZOS)

	# 오버랩 계산
	overlap_x = int(new_width * (overlap_percentage / 100))
	overlap_y = int(new_height * (overlap_percentage / 100))
	overlap_x = max(overlap_x, 1)
	overlap_y = max(overlap_y, 1)

	# 정렬에 따른 마진 계산
	if alignment == "가운데":
	margin_x = (target_size[0] - new_width) // 2
	margin_y = (target_size[1] - new_height) // 2
	elif alignment == "왼쪽":
	margin_x = 0
	margin_y = (target_size[1] - new_height) // 2
	elif alignment == "오른쪽":
	margin_x = target_size[0] - new_width
	margin_y = (target_size[1] - new_height) // 2
	elif alignment == "위":
	margin_x = (target_size[0] - new_width) // 2
	margin_y = 0
	elif alignment == "아래":
	margin_x = (target_size[0] - new_width) // 2
	margin_y = target_size[1] - new_height

	# 배경 이미지 생성
	background = Image.new('RGB', target_size, (255, 255, 255))
	background.paste(source, (margin_x, margin_y))

	# 마스크 생성
	mask = Image.new('L', target_size, 255)
	mask_draw = ImageDraw.Draw(mask)

	# 마스크 영역 그리기
	left_overlap = margin_x + overlap_x if alignment != "왼쪽" else margin_x
	right_overlap = margin_x + new_width - overlap_x if alignment != "오른쪽" else margin_x + new_width
	top_overlap = margin_y + overlap_y if alignment != "위" else margin_y
	bottom_overlap = margin_y + new_height - overlap_y if alignment != "아래" else margin_y + new_height

	mask_draw.rectangle([
	(left_overlap, top_overlap),
	(right_overlap, bottom_overlap)
	], fill=0)

	return background, mask

	def preview_outpaint(image, width, height, overlap_percentage, alignment):
	"""아웃페인팅 미리보기"""
	background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
	if background is None:
	return None

	# 미리보기 이미지 생성
	preview = background.copy().convert('RGBA')

	# 반투명 빨간색 오버레이
	red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64))

	# 마스크 적용
	red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
	red_mask.paste(red_overlay, (0, 0), mask)

	# 오버레이 합성
	preview = Image.alpha_composite(preview, red_mask)

	return preview

	@spaces.GPU(duration=120)
	def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
	"""이미지 아웃페인팅 실행"""
	if image is None:
	return None

	# 모델 로드 확인
	if not MODELS_LOADED:
	load_models()

	if OUTPAINT_PIPE is None:
	return Image.new('RGB', (width, height), (200, 200, 200))

	try:
	# 이미지와 마스크 준비
	background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
	if background is None:
	return None

	# cnet_image 생성 (마스크 영역을 검은색으로)
	cnet_image = background.copy()
	cnet_image.paste(0, (0, 0), mask)

	# 프롬프트 준비
	final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"

	# GPU에서 실행
	with torch.autocast(device_type=device.type, dtype=torch_dtype):
	(
	prompt_embeds,
	negative_prompt_embeds,
	pooled_prompt_embeds,
	negative_pooled_prompt_embeds,
	) = OUTPAINT_PIPE.encode_prompt(final_prompt, str(device), True)

	# 생성 프로세스
	for generated_image in OUTPAINT_PIPE(
	prompt_embeds=prompt_embeds,
	negative_prompt_embeds=negative_prompt_embeds,
	pooled_prompt_embeds=pooled_prompt_embeds,
	negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
	image=cnet_image,
	num_inference_steps=num_steps
	):
	# 중간 결과 (필요시 사용)
	pass

	# 최종 이미지
	final_image = generated_image

	# RGBA로 변환하고 마스크 적용
	final_image = final_image.convert("RGBA")
	cnet_image.paste(final_image, (0, 0), mask)

	return cnet_image

	except Exception as e:
	logging.error(f"Outpainting error: {str(e)}")
	return background if 'background' in locals() else None

	# MMAudio 관련 함수들
	def translate_prompt(text):
	try:
	if TRANSLATOR is None:
	return text

	if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
	with torch.no_grad():
	translation = TRANSLATOR(text)[0]['translation_text']
	return translation
	return text
	except Exception as e:
	logging.error(f"Translation error: {e}")
	return text

	@spaces.GPU(duration=120)
	@torch.inference_mode()
	def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
	cfg_strength: float, duration: float):
	# 모델 로드 확인
	if not MODELS_LOADED:
	load_models()

	if MMAUDIO_NET is None:
	return None

	prompt = translate_prompt(prompt)
	negative_prompt = translate_prompt(negative_prompt)

	rng = torch.Generator(device=device)
	rng.manual_seed(seed)
	fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)

	clip_frames, sync_frames, duration = load_video(video, duration)
	clip_frames = clip_frames.unsqueeze(0)
	sync_frames = sync_frames.unsqueeze(0)
	MMAUDIO_SEQ_CFG.duration = duration
	MMAUDIO_NET.update_seq_lengths(MMAUDIO_SEQ_CFG.latent_seq_len, MMAUDIO_SEQ_CFG.clip_seq_len, MMAUDIO_SEQ_CFG.sync_seq_len)

	audios = generate(clip_frames,
	sync_frames, [prompt],
	negative_text=[negative_prompt],
	feature_utils=MMAUDIO_FEATURE_UTILS,
	net=MMAUDIO_NET,
	fm=fm,
	rng=rng,
	cfg_strength=cfg_strength)
	audio = audios.float().cpu()[0]

	video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
	make_video(video,
	video_save_path,
	audio,
	sampling_rate=MMAUDIO_SEQ_CFG.sampling_rate,
	duration_sec=MMAUDIO_SEQ_CFG.duration)
	return video_save_path

	# 비디오 배경제거 관련 함수들
	def process_bg_image(image, bg, fast_mode=False):
	"""단일 이미지 배경 처리"""
	if BIREFNET_MODEL is None or BIREFNET_LITE_MODEL is None:
	return image

	image_size = image.size
	input_images = transform_image(image).unsqueeze(0).to(device)
	model = BIREFNET_LITE_MODEL if fast_mode else BIREFNET_MODEL

	with torch.no_grad():
	preds = model(input_images)[-1].sigmoid().cpu()
	pred = preds[0].squeeze()
	pred_pil = transforms.ToPILImage()(pred)
	mask = pred_pil.resize(image_size)

	if isinstance(bg, str) and bg.startswith("#"):
	color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5))
	background = Image.new("RGBA", image_size, color_rgb + (255,))
	elif isinstance(bg, Image.Image):
	background = bg.convert("RGBA").resize(image_size)
	else:
	background = Image.open(bg).convert("RGBA").resize(image_size)

	image = Image.composite(image, background, mask)
	return image

	def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color):
	"""비디오 프레임 처리"""
	try:
	pil_image = Image.fromarray(frame)
	if bg_type == "색상":
	processed_image = process_bg_image(pil_image, color, fast_mode)
	elif bg_type == "이미지":
	processed_image = process_bg_image(pil_image, bg, fast_mode)
	elif bg_type == "비디오":
	background_frame = background_frames[bg_frame_index]
	bg_frame_index += 1
	background_image = Image.fromarray(background_frame)
	processed_image = process_bg_image(pil_image, background_image, fast_mode)
	else:
	processed_image = pil_image
	return np.array(processed_image), bg_frame_index
	except Exception as e:
	print(f"Error processing frame: {e}")
	return frame, bg_frame_index

	@spaces.GPU(duration=300)
	def process_video_bg(vid, bg_type="색상", bg_image=None, bg_video=None, color="#00FF00",
	fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
	"""비디오 배경 처리 메인 함수"""
	# 모델 로드 확인
	if not MODELS_LOADED:
	load_models()

	if BIREFNET_MODEL is None:
	yield gr.update(visible=False), gr.update(visible=True), "BiRefNet 모델을 로드하지 못했습니다."
	yield None, None, "BiRefNet 모델을 로드하지 못했습니다."
	return

	try:
	start_time = time.time()
	video = VideoFileClip(vid)
	if fps == 0:
	fps = video.fps

	audio = video.audio
	frames = list(video.iter_frames(fps=fps))

	processed_frames = []
	yield gr.update(visible=True), gr.update(visible=False), f"처리 시작... 경과 시간: 0초"

	if bg_type == "비디오":
	background_video = VideoFileClip(bg_video)
	if background_video.duration < video.duration:
	if video_handling == "slow_down":
	background_video = background_video.fx(vfx.speedx, factor=video.duration / background_video.duration)
	else: # video_handling == "loop"
	background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
	background_frames = list(background_video.iter_frames(fps=fps))
	else:
	background_frames = None

	bg_frame_index = 0

	with ThreadPoolExecutor(max_workers=max_workers) as executor:
	futures = [executor.submit(process_video_frame, frames[i], bg_type, bg_image, fast_mode,
	bg_frame_index + i, background_frames, color) for i in range(len(frames))]
	for i, future in enumerate(futures):
	result, _ = future.result()
	processed_frames.append(result)
	elapsed_time = time.time() - start_time
	yield result, None, f"프레임 {i+1}/{len(frames)} 처리 중... 경과 시간: {elapsed_time:.2f}초"

	processed_video = ImageSequenceClip(processed_frames, fps=fps)
	processed_video = processed_video.with_audio(audio)

	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
	temp_filepath = temp_file.name
	processed_video.write_videofile(temp_filepath, codec="libx264")

	elapsed_time = time.time() - start_time
	yield gr.update(visible=False), gr.update(visible=True), f"처리 완료! 경과 시간: {elapsed_time:.2f}초"
	yield processed_frames[-1], temp_filepath, f"처리 완료! 경과 시간: {elapsed_time:.2f}초"

	except Exception as e:
	print(f"Error: {e}")
	elapsed_time = time.time() - start_time
	yield gr.update(visible=False), gr.update(visible=True), f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"
	yield None, None, f"비디오 처리 오류: {e}. 경과 시간: {elapsed_time:.2f}초"

	@spaces.GPU(duration=180)
	def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
	"""여러 비디오를 병합하고 오디오를 추가하는 함수"""
	if not video_files:
	return None, "비디오 파일을 업로드해주세요."

	if isinstance(video_files, list) and len(video_files) > 10:
	return None, "최대 10개의 비디오만 업로드 가능합니다."

	try:
	# 상태 업데이트
	status = "비디오 파일 정렬 중..."

	# 파일 경로와 파일명을 튜플로 저장하고 파일명으로 정렬
	video_paths = []
	if isinstance(video_files, list):
	for video_file in video_files:
	if video_file is not None:
	video_paths.append(video_file)
	else:
	video_paths.append(video_files)

	# 파일명으로 정렬 (경로에서 파일명만 추출하여 정렬)
	video_paths.sort(key=lambda x: os.path.basename(x))

	status = f"{len(video_paths)}개의 비디오 로드 중..."

	# 비디오 클립 로드
	video_clips = []
	clip_sizes = []

	for i, video_path in enumerate(video_paths):
	status = f"비디오 {i+1}/{len(video_paths)} 로드 중: {os.path.basename(video_path)}"
	clip = VideoFileClip(video_path)
	video_clips.append(clip)

	# 각 클립의 크기 저장
	try:
	clip_sizes.append((clip.w, clip.h))
	except:
	clip_sizes.append(clip.size)

	# 첫 번째 비디오의 크기를 기준으로 함
	target_width, target_height = clip_sizes[0]

	# 모든 비디오의 크기가 같은지 확인
	all_same_size = all(size == (target_width, target_height) for size in clip_sizes)

	if not all_same_size:
	logging.warning(f"비디오 크기가 서로 다릅니다. 첫 번째 비디오 크기({target_width}x{target_height})로 조정합니다.")

	# 크기가 다른 비디오들을 조정
	adjusted_clips = []
	for clip, size in zip(video_clips, clip_sizes):
	if size != (target_width, target_height):
	adjusted_clip = vfx.resize(clip, newsize=(target_width, target_height))

	adjusted_clips.append(adjusted_clip)
	else:
	adjusted_clips.append(clip)

	video_clips = adjusted_clips

	# 첫 번째 비디오의 FPS를 기본값으로 사용
	if output_fps == 0:
	output_fps = video_clips[0].fps

	status = "비디오 병합 중..."

	# 비디오 병합
	final_video = concatenate_videoclips(video_clips, method="compose")

	# 오디오 처리
	if audio_file:
	status = "오디오 처리 중..."

	try:
	# 오디오 파일 경로 확인
	if isinstance(audio_file, str):
	audio_path = audio_file
	else:
	# gr.Audio에서 반환된 튜플인 경우
	audio_path = audio_file

	logging.info(f"Processing audio from: {audio_path}")

	# 오디오 로드
	if audio_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
	# 비디오 파일에서 오디오 추출
	temp_video = VideoFileClip(audio_path)
	audio_clip = temp_video.audio
	temp_video.close()
	else:
	# 오디오 파일 직접 로드
	audio_clip = AudioFileClip(audio_path)

	if audio_clip is None:
	raise ValueError("오디오를 로드할 수 없습니다.")

	# 볼륨 조절
	if audio_volume != 100:
	audio_clip = audio_clip.volumex(audio_volume / 100)

	# 오디오를 비디오 길이에 맞춤
	video_duration = final_video.duration
	audio_duration = audio_clip.duration

	if audio_duration > video_duration:
	# 오디오가 더 길면 잘라냄
	audio_clip = audio_clip.subclip(0, video_duration)
	elif audio_duration < video_duration:
	# 오디오가 더 짧으면 반복
	loops_needed = int(video_duration / audio_duration) + 1
	audio_clips_list = [audio_clip] * loops_needed
	looped_audio = concatenate_audioclips(audio_clips_list)
	audio_clip = looped_audio.subclip(0, video_duration)

	# 기존 오디오 제거하고 새 오디오로 교체
	# (기존 오디오와 합성하려면 아래 주석 해제)
	final_video = final_video.set_audio(audio_clip)

	# 기존 오디오와 새 오디오 합성을 원하는 경우:
	# if final_video.audio:
	# final_audio = CompositeAudioClip([final_video.audio, audio_clip])
	# final_video = final_video.set_audio(final_audio)
	# else:
	# final_video = final_video.set_audio(audio_clip)

	logging.info("Audio successfully added to video")

	except Exception as e:
	logging.error(f"오디오 처리 중 오류 발생: {str(e)}")
	# 오디오 처리 실패해도 비디오는 계속 처리
	status = f"오디오 처리 실패: {str(e)}, 비디오만 병합합니다."

	status = "비디오 저장 중..."

	# 임시 파일로 저장
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
	temp_filepath = temp_file.name

	# 코덱 설정 - 원본 품질 유지
	final_video.write_videofile(
	temp_filepath,
	fps=output_fps,
	codec="libx264",
	audio_codec="aac",
	preset="medium", # 품질 설정
	bitrate="5000k", # 비트레이트 설정으로 품질 유지
	audio_bitrate="192k"
	)

	# 리소스 정리
	for clip in video_clips:
	clip.close()
	if 'adjusted_clips' in locals():
	for clip in adjusted_clips:
	if clip not in video_clips:
	clip.close()
	if audio_file and 'audio_clip' in locals():
	audio_clip.close()
	final_video.close()

	return temp_filepath, f"✅ 성공적으로 {len(video_paths)}개의 비디오를 병합했습니다! (크기: {target_width}x{target_height})"

	except Exception as e:
	logging.error(f"Video merge error: {str(e)}")
	import traceback
	traceback.print_exc()
	return None, f"❌ 오류 발생: {str(e)}"

	# CSS
	css = """
	:root {
	--primary-color: #f8c3cd;
	--secondary-color: #b3e5fc;
	--background-color: #f5f5f7;
	--card-background: #ffffff;
	--text-color: #424242;
	--accent-color: #ffb6c1;
	--success-color: #c8e6c9;
	--warning-color: #fff9c4;
	--shadow-color: rgba(0, 0, 0, 0.1);
	--border-radius: 12px;
	}
	.gradio-container {
	max-width: 1200px !important;
	margin: 0 auto !important;
	}
	.panel-box {
	border-radius: var(--border-radius) !important;
	box-shadow: 0 8px 16px var(--shadow-color) !important;
	background-color: var(--card-background) !important;
	padding: 20px !important;
	margin-bottom: 20px !important;
	}
	#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn, #merge-btn {
	background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
	font-size: 1.1rem !important;
	padding: 12px 24px !important;
	margin-top: 10px !important;
	width: 100% !important;
	}
	.tabitem {
	min-height: 700px !important;
	}
	"""

	# Gradio Interface
	demo = gr.Blocks(css=css, title="AI 이미지 & 비디오 & 오디오 생성기")

	with demo:
	gr.Markdown("# 🎨 Ginigen 스튜디오")
	gr.Markdown("처음 사용 시 모델 로딩에 시간이 걸릴 수 있습니다. 잠시만 기다려주세요.")

	# 모델 로드 상태 표시
	model_status = gr.Textbox(label="모델 상태", value="모델 로딩 대기 중...", interactive=False)

	with gr.Tabs() as tabs:
	# 첫 번째 탭: 텍스트 to 이미지
	with gr.Tab("텍스트→이미지→비디오", elem_classes="tabitem"):
	with gr.Row(equal_height=True):
	# 입력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 📝 이미지 생성 설정")

	prompt = gr.Textbox(
	label="프롬프트(한글/영어 가능)",
	placeholder="생성하고 싶은 이미지를 설명하세요...",
	lines=3
	)

	size_preset = gr.Dropdown(
	choices=list(IMAGE_PRESETS.keys()),
	value="1:1 정사각형",
	label="크기 프리셋"
	)

	with gr.Row():
	width = gr.Slider(256, 2048, 1024, step=64, label="너비")
	height = gr.Slider(256, 2048, 1024, step=64, label="높이")

	with gr.Row():
	guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="가이던스")
	steps = gr.Slider(1, 50, 30, step=1, label="스텝")

	seed = gr.Number(label="시드 (-1=랜덤)", value=-1)

	generate_btn = gr.Button("🎨 이미지 생성", variant="primary", elem_id="generate-btn")

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎬 비디오 생성 설정")

	video_prompt = gr.Textbox(
	label="(선택) 비디오 프롬프트(영어로 입력)",
	placeholder="비디오의 움직임을 설명하세요... (비워두면 기본 움직임 적용)",
	lines=2
	)

	video_length = gr.Slider(
	minimum=1,
	maximum=60,
	value=4,
	step=0.5,
	label="비디오 길이 (초)",
	info="1초에서 60초까지 선택 가능합니다"
	)

	video_btn = gr.Button("🎬 비디오로 변환", variant="secondary", elem_id="video-btn")

	# 출력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🖼️ 생성 결과")

	output_image = gr.Image(label="생성된 이미지", type="numpy")
	output_seed = gr.Textbox(label="시드 정보")
	output_video = gr.Video(label="생성된 비디오")

	# 두 번째 탭: 이미지 아웃페인팅
	with gr.Tab("이미지 비율 변경/생성", elem_classes="tabitem"):
	with gr.Row(equal_height=True):
	# 입력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🖼️ 이미지 업로드")

	input_image = gr.Image(
	label="원본 이미지",
	type="numpy"
	)

	outpaint_prompt = gr.Textbox(
	label="프롬프트 (선택)",
	placeholder="확장할 영역에 대한 설명...",
	lines=2
	)

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### ⚙️ 아웃페인팅 설정")

	outpaint_size_preset = gr.Dropdown(
	choices=list(IMAGE_PRESETS.keys()),
	value="16:9 와이드스크린",
	label="목표 크기 프리셋"
	)

	with gr.Row():
	outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="목표 너비")
	outpaint_height = gr.Slider(256, 2048, 720, step=64, label="목표 높이")

	alignment = gr.Dropdown(
	choices=["가운데", "왼쪽", "오른쪽", "위", "아래"],
	value="가운데",
	label="정렬"
	)

	overlap_percentage = gr.Slider(
	minimum=1,
	maximum=50,
	value=10,
	step=1,
	label="마스크 오버랩 (%)"
	)

	outpaint_steps = gr.Slider(
	minimum=4,
	maximum=12,
	value=8,
	step=1,
	label="추론 스텝"
	)

	preview_btn = gr.Button("👁️ 미리보기", elem_id="preview-btn")
	outpaint_btn = gr.Button("🎨 아웃페인팅 실행", variant="primary", elem_id="outpaint-btn")

	# 출력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🖼️ 결과")

	preview_image = gr.Image(label="미리보기")
	outpaint_result = gr.Image(label="아웃페인팅 결과")

	# 세 번째 탭: 비디오 + 오디오
	with gr.Tab("비디오 + 오디오", elem_classes="tabitem"):
	with gr.Row(equal_height=True):
	# 입력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎥 비디오 업로드")

	audio_video_input = gr.Video(
	label="입력 비디오",
	sources=["upload"]
	)

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎵 오디오 생성 설정")

	audio_prompt = gr.Textbox(
	label="프롬프트 (한글 지원)",
	placeholder="생성하고 싶은 오디오를 설명하세요... (예: 평화로운 피아노 음악)",
	lines=3
	)

	audio_negative_prompt = gr.Textbox(
	label="네거티브 프롬프트",
	value="music",
	placeholder="원하지 않는 요소...",
	lines=2
	)

	with gr.Row():
	audio_seed = gr.Number(label="시드", value=0)
	audio_steps = gr.Number(label="스텝", value=25)

	with gr.Row():
	audio_cfg = gr.Number(label="가이던스 스케일", value=4.5)
	audio_duration = gr.Number(label="지속시간 (초)", value=9999)

	audio_btn = gr.Button("🎵 오디오 생성 및 합성", variant="primary", elem_id="audio-btn")

	# 출력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎬 생성 결과")

	output_video_with_audio = gr.Video(
	label="오디오가 추가된 비디오",
	interactive=False
	)

	# 네 번째 탭: 비디오 편집
	with gr.Tab("비디오 편집", elem_classes="tabitem"):
	with gr.Row(equal_height=True):
	# 입력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎥 비디오 업로드 (최대 10개)")
	gr.Markdown("파일명이 작을수록 우선순위가 높습니다 (예: 1.mp4, 2.mp4, 3.mp4)")

	video_files = gr.File(
	label="비디오 파일들",
	file_count="multiple",
	file_types=["video"],
	type="filepath"
	)

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎵 오디오 설정 (선택)")
	gr.Markdown("주의: 업로드한 오디오가 비디오의 기존 오디오를 완전히 대체합니다.")

	audio_file = gr.Audio(
	label="오디오 파일 (MP3, WAV, M4A 등)",
	type="filepath",
	sources=["upload"]
	)

	audio_volume = gr.Slider(
	minimum=0,
	maximum=200,
	value=100,
	step=1,
	label="오디오 볼륨 (%)",
	info="100% = 원본 볼륨"
	)

	gr.Markdown("""
	오디오 옵션:
	- 오디오가 비디오보다 짧으면 자동으로 반복됩니다
	- 오디오가 비디오보다 길면 비디오 길이에 맞춰 잘립니다
	""")

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### ⚙️ 편집 설정")

	output_fps = gr.Slider(
	minimum=0,
	maximum=60,
	value=0,
	step=1,
	label="출력 FPS (0 = 첫 번째 비디오의 FPS 사용)"
	)

	gr.Markdown("""
	크기 처리:
	- 첫 번째 비디오의 크기가 기준이 됩니다
	- 다른 크기의 비디오는 첫 번째 비디오 크기로 조정됩니다
	- 최상의 결과를 위해 같은 크기의 비디오를 사용하세요
	""")

	merge_videos_btn = gr.Button("🎬 비디오 병합", variant="primary", elem_id="merge-btn")

	# 출력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎬 병합 결과")

	merge_status = gr.Textbox(label="처리 상태", interactive=False)
	merged_video = gr.Video(label="병합된 비디오")

	gr.Markdown("""
	### ℹ️ 사용 방법
	1. 여러 비디오 파일을 업로드하세요 (최대 10개)
	2. 파일명이 작은 순서대로 자동 정렬됩니다
	3. (선택) 오디오 파일을 추가하고 볼륨을 조절하세요
	4. '비디오 병합' 버튼을 클릭하세요

	특징:
	- ✅ 첫 번째 비디오의 크기를 기준으로 통합
	- ✅ 업로드한 오디오가 전체 비디오에 적용됩니다
	- ✅ 높은 비트레이트로 품질 유지

	팁:
	- 파일명을 01.mp4, 02.mp4, 03.mp4 형식으로 지정하면 순서 관리가 쉽습니다
	- 오디오를 추가하면 기존 비디오의 오디오는 대체됩니다
	""")

	# 다섯 번째 탭: 비디오 배경제거/합성
	with gr.Tab("비디오 배경제거/합성", elem_classes="tabitem"):
	with gr.Row(equal_height=True):
	# 입력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎥 비디오 업로드")

	bg_video_input = gr.Video(
	label="입력 비디오",
	interactive=True
	)

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎨 배경 설정")

	bg_type = gr.Radio(
	["색상", "이미지", "비디오"],
	label="배경 유형",
	value="색상",
	interactive=True
	)

	color_picker = gr.ColorPicker(
	label="배경 색상",
	value="#00FF00",
	visible=True,
	interactive=True
	)

	bg_image_input = gr.Image(
	label="배경 이미지",
	type="filepath",
	visible=False,
	interactive=True
	)

	bg_video_bg = gr.Video(
	label="배경 비디오",
	visible=False,
	interactive=True
	)

	with gr.Column(visible=False) as video_handling_options:
	video_handling_radio = gr.Radio(
	["slow_down", "loop"],
	label="비디오 처리 방식",
	value="slow_down",
	interactive=True,
	info="slow_down: 배경 비디오를 느리게 재생, loop: 배경 비디오를 반복"
	)

	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### ⚙️ 처리 설정")

	fps_slider = gr.Slider(
	minimum=0,
	maximum=60,
	step=1,
	value=0,
	label="출력 FPS (0 = 원본 FPS 유지)",
	interactive=True
	)

	fast_mode_checkbox = gr.Checkbox(
	label="빠른 모드 (BiRefNet_lite 사용)",
	value=True,
	interactive=True
	)

	max_workers_slider = gr.Slider(
	minimum=1,
	maximum=32,
	step=1,
	value=10,
	label="최대 워커 수",
	info="병렬로 처리할 프레임 수",
	interactive=True
	)

	bg_remove_btn = gr.Button("🎬 배경 변경", variant="primary", elem_id="bg-remove-btn")

	# 출력 컬럼
	with gr.Column(scale=1):
	with gr.Group(elem_classes="panel-box"):
	gr.Markdown("### 🎬 처리 결과")

	stream_image = gr.Image(label="실시간 스트리밍", visible=False)
	output_bg_video = gr.Video(label="최종 비디오")
	time_textbox = gr.Textbox(label="경과 시간", interactive=False)

	gr.Markdown("""
	### ℹ️ 사용 방법
	1. 비디오를 업로드하세요
	2. 원하는 배경 유형을 선택하세요
	3. 설정을 조정하고 '배경 변경' 버튼을 클릭하세요

	참고: GPU 제한으로 한 번에 약 200프레임까지 처리 가능합니다.
	긴 비디오는 작은 조각으로 나누어 처리하세요.
	""")

	# 모델 로드 함수 실행
	def on_demo_load():
	try:
	if IS_SPACES:
	# Spaces 환경에서 GPU 워밍업
	gpu_warmup()
	# 모델 로드는 첫 번째 GPU 함수 호출 시 자동으로 수행됨
	return "모델 로딩 준비 완료"
	except Exception as e:
	return f"초기화 오류: {str(e)}"

	# 이벤트 연결 - 첫 번째 탭
	size_preset.change(update_dimensions, [size_preset], [width, height])

	generate_btn.click(
	generate_text_to_image,
	[prompt, width, height, guidance, steps, seed],
	[output_image, output_seed]
	)

	video_btn.click(
	lambda img, v_prompt, length: generate_video_from_image(img, v_prompt, length) if img is not None else None,
	[output_image, video_prompt, video_length],
	[output_video]
	)

	# 이벤트 연결 - 두 번째 탭
	outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height])

	preview_btn.click(
	preview_outpaint,
	[input_image, outpaint_width, outpaint_height, overlap_percentage, alignment],
	[preview_image]
	)

	outpaint_btn.click(
	outpaint_image,
	[input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps],
	[outpaint_result]
	)

	# 이벤트 연결 - 세 번째 탭
	audio_btn.click(
	video_to_audio,
	[audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
	[output_video_with_audio]
	)

	# 이벤트 연결 - 네 번째 탭 (비디오 편집)
	merge_videos_btn.click(
	merge_videos_with_audio,
	inputs=[video_files, audio_file, audio_volume, output_fps],
	outputs=[merged_video, merge_status]
	)

	# 이벤트 연결 - 다섯 번째 탭 (비디오 배경제거/합성)
	def update_bg_visibility(bg_type):
	if bg_type == "색상":
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
	elif bg_type == "이미지":
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
	elif bg_type == "비디오":
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
	else:
	return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)

	bg_type.change(
	update_bg_visibility,
	inputs=bg_type,
	outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options]
	)

	bg_remove_btn.click(
	process_video_bg,
	inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker,
	fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider],
	outputs=[stream_image, output_bg_video, time_textbox]
	)

	# 데모 로드 시 실행
	demo.load(on_demo_load, outputs=model_status)

	if __name__ == "__main__":
	# Spaces 환경에서 추가 체크
	if IS_SPACES:
	try:
	gpu_warmup()
	except:
	pass

	demo.launch()