|
|
|
import os |
|
IS_SPACES = os.environ.get("SPACE_ID") is not None |
|
|
|
if IS_SPACES: |
|
import spaces |
|
else: |
|
|
|
class spaces: |
|
@staticmethod |
|
def GPU(duration=None): |
|
def decorator(func): |
|
return func |
|
return decorator |
|
|
|
|
|
import gradio as gr |
|
import numpy as np |
|
from PIL import Image, ImageDraw |
|
from gradio_client import Client, handle_file |
|
import random |
|
import tempfile |
|
import logging |
|
import torch |
|
from diffusers import AutoencoderKL, TCDScheduler |
|
from diffusers.models.model_loading_utils import load_state_dict |
|
from huggingface_hub import hf_hub_download |
|
from pathlib import Path |
|
import torchaudio |
|
from einops import rearrange |
|
from scipy.io import wavfile |
|
from transformers import pipeline |
|
|
|
|
|
from transformers import AutoModelForImageSegmentation |
|
from torchvision import transforms |
|
|
|
|
|
try: |
|
from moviepy.editor import ( |
|
VideoFileClip, |
|
concatenate_videoclips, |
|
ImageSequenceClip, |
|
concatenate_audioclips, |
|
AudioFileClip, |
|
CompositeAudioClip, |
|
CompositeVideoClip, |
|
ColorClip |
|
) |
|
except ImportError: |
|
|
|
try: |
|
from moviepy.video.io.VideoFileClip import VideoFileClip |
|
except ImportError: |
|
from moviepy import VideoFileClip |
|
|
|
try: |
|
from moviepy.video.compositing.concatenate import concatenate_videoclips |
|
except ImportError: |
|
from moviepy import concatenate_videoclips |
|
|
|
try: |
|
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip |
|
except ImportError: |
|
from moviepy.editor import ImageSequenceClip |
|
|
|
try: |
|
from moviepy.audio.io.AudioFileClip import AudioFileClip |
|
except ImportError: |
|
from moviepy.editor import AudioFileClip |
|
|
|
try: |
|
from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip |
|
except ImportError: |
|
from moviepy.editor import concatenate_audioclips, CompositeAudioClip |
|
|
|
try: |
|
from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip |
|
except ImportError: |
|
from moviepy.editor import CompositeVideoClip |
|
|
|
try: |
|
from moviepy.video.VideoClip import ColorClip |
|
except ImportError: |
|
from moviepy.editor import ColorClip |
|
|
|
|
|
resize = None |
|
try: |
|
from moviepy.video.fx.resize import resize |
|
except ImportError: |
|
try: |
|
from moviepy.video.fx.all import resize |
|
except ImportError: |
|
try: |
|
|
|
from moviepy.editor import resize |
|
except ImportError: |
|
pass |
|
|
|
|
|
if resize is None: |
|
def resize(clip, newsize=None, height=None, width=None): |
|
"""Fallback resize function when moviepy resize is not available""" |
|
if hasattr(clip, 'resize'): |
|
if newsize: |
|
return clip.resize(newsize) |
|
elif height: |
|
return clip.resize(height=height) |
|
elif width: |
|
return clip.resize(width=width) |
|
|
|
return clip |
|
|
|
|
|
speedx = None |
|
try: |
|
from moviepy.video.fx.speedx import speedx |
|
except ImportError: |
|
try: |
|
from moviepy.video.fx.all import speedx |
|
except ImportError: |
|
try: |
|
from moviepy.editor import speedx |
|
except ImportError: |
|
pass |
|
|
|
|
|
if speedx is None: |
|
def speedx(clip, factor=1.0, final_duration=None): |
|
"""Fallback speedx function""" |
|
if hasattr(clip, 'fx') and hasattr(clip.fx, 'speedx'): |
|
return clip.fx.speedx(factor, final_duration) |
|
elif hasattr(clip, 'fl_time'): |
|
return clip.fl_time(lambda t: t * factor) |
|
elif hasattr(clip, 'with_fps') and factor != 1.0: |
|
|
|
new_fps = clip.fps * factor if hasattr(clip, 'fps') else 24 * factor |
|
return clip.with_fps(new_fps) |
|
else: |
|
|
|
return clip |
|
|
|
import time |
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
|
|
|
|
import httpx |
|
from datetime import datetime |
|
|
|
|
|
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1" |
|
|
|
|
|
@spaces.GPU(duration=1) |
|
def gpu_warmup(): |
|
"""GPU ์๋ฐ์
ํจ์ - Spaces ํ๊ฒฝ์์ GPU ์ฌ์ฉ์ ์ํด ํ์""" |
|
if torch.cuda.is_available(): |
|
dummy = torch.zeros(1).cuda() |
|
del dummy |
|
return "GPU ready" |
|
|
|
|
|
try: |
|
import mmaudio |
|
except ImportError: |
|
os.system("pip install -e .") |
|
import mmaudio |
|
|
|
from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video, |
|
setup_eval_logging) |
|
from mmaudio.model.flow_matching import FlowMatching |
|
from mmaudio.model.networks import MMAudio, get_my_mmaudio |
|
from mmaudio.model.sequence_config import SequenceConfig |
|
from mmaudio.model.utils.features_utils import FeaturesUtils |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
torch.set_float32_matmul_precision("medium") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
torch_dtype = torch.float16 |
|
else: |
|
device = torch.device("cpu") |
|
torch_dtype = torch.float32 |
|
|
|
logging.info(f"Using device: {device}") |
|
|
|
|
|
MODELS_LOADED = False |
|
BIREFNET_MODEL = None |
|
BIREFNET_LITE_MODEL = None |
|
OUTPAINT_PIPE = None |
|
MMAUDIO_NET = None |
|
MMAUDIO_FEATURE_UTILS = None |
|
MMAUDIO_SEQ_CFG = None |
|
TRANSLATOR = None |
|
|
|
|
|
TEXT2IMG_API_URL = "http://211.233.58.201:7896" |
|
VIDEO_API_URL = "http://211.233.58.201:7875" |
|
ANIM_API_URL = os.getenv("ANIM_API_URL", "http://211.233.58.201:7862/") |
|
|
|
|
|
ANIM_TIMEOUT = httpx.Timeout(connect=30.0, read=120.0, write=120.0, pool=30.0) |
|
|
|
|
|
IMAGE_PRESETS = { |
|
"์ปค์คํ
": {"width": 1024, "height": 1024}, |
|
"1:1 ์ ์ฌ๊ฐํ": {"width": 1024, "height": 1024}, |
|
"4:3 ํ์ค": {"width": 1024, "height": 768}, |
|
"16:9 ์์ด๋์คํฌ๋ฆฐ": {"width": 1024, "height": 576}, |
|
"9:16 ์ธ๋กํ": {"width": 576, "height": 1024}, |
|
"6:19 ํน์ ์ธ๋กํ": {"width": 324, "height": 1024}, |
|
"Instagram ์ ์ฌ๊ฐํ": {"width": 1080, "height": 1080}, |
|
"Instagram ์คํ ๋ฆฌ": {"width": 1080, "height": 1920}, |
|
"Instagram ๊ฐ๋กํ": {"width": 1080, "height": 566}, |
|
"Facebook ์ปค๋ฒ": {"width": 820, "height": 312}, |
|
"Twitter ํค๋": {"width": 1500, "height": 500}, |
|
"YouTube ์ธ๋ค์ผ": {"width": 1280, "height": 720}, |
|
"LinkedIn ๋ฐฐ๋": {"width": 1584, "height": 396}, |
|
} |
|
|
|
|
|
transform_image = transforms.Compose([ |
|
transforms.Resize((768, 768)), |
|
transforms.ToTensor(), |
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), |
|
]) |
|
|
|
@spaces.GPU(duration=60) |
|
def load_models(): |
|
"""๋ชจ๋ ๋ชจ๋ธ์ ๋ก๋ํ๋ ํจ์""" |
|
global MODELS_LOADED, BIREFNET_MODEL, BIREFNET_LITE_MODEL, OUTPAINT_PIPE |
|
global MMAUDIO_NET, MMAUDIO_FEATURE_UTILS, MMAUDIO_SEQ_CFG, TRANSLATOR |
|
|
|
if MODELS_LOADED: |
|
return True |
|
|
|
try: |
|
|
|
logging.info("Loading BiRefNet models...") |
|
BIREFNET_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True) |
|
BIREFNET_MODEL.to(device) |
|
BIREFNET_LITE_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True) |
|
BIREFNET_LITE_MODEL.to(device) |
|
|
|
|
|
logging.info("Loading ControlNet models...") |
|
from controlnet_union import ControlNetModel_Union |
|
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline |
|
|
|
config_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="config_promax.json", |
|
) |
|
|
|
config = ControlNetModel_Union.load_config(config_file) |
|
controlnet_model = ControlNetModel_Union.from_config(config) |
|
|
|
model_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="diffusion_pytorch_model_promax.safetensors", |
|
) |
|
state_dict = load_state_dict(model_file) |
|
loaded_keys = list(state_dict.keys()) |
|
|
|
result = ControlNetModel_Union._load_pretrained_model( |
|
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys |
|
) |
|
|
|
model = result[0] |
|
model = model.to(device=device, dtype=torch_dtype) |
|
|
|
|
|
vae = AutoencoderKL.from_pretrained( |
|
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype |
|
).to(device) |
|
|
|
|
|
OUTPAINT_PIPE = StableDiffusionXLFillPipeline.from_pretrained( |
|
"SG161222/RealVisXL_V5.0_Lightning", |
|
torch_dtype=torch_dtype, |
|
vae=vae, |
|
controlnet=model, |
|
variant="fp16" if device.type == "cuda" else None, |
|
).to(device) |
|
|
|
OUTPAINT_PIPE.scheduler = TCDScheduler.from_config(OUTPAINT_PIPE.scheduler.config) |
|
|
|
|
|
logging.info("Loading MMAudio models...") |
|
model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2'] |
|
model_mmaudio.download_if_needed() |
|
setup_eval_logging() |
|
|
|
|
|
try: |
|
TRANSLATOR = pipeline("translation", |
|
model="Helsinki-NLP/opus-mt-ko-en", |
|
device="cpu", |
|
use_fast=True, |
|
trust_remote_code=False) |
|
except Exception as e: |
|
logging.warning(f"Failed to load translation model: {e}") |
|
TRANSLATOR = None |
|
|
|
|
|
if torch.cuda.is_available(): |
|
mmaudio_dtype = torch.bfloat16 |
|
else: |
|
mmaudio_dtype = torch.float32 |
|
|
|
with torch.cuda.device(device): |
|
MMAUDIO_SEQ_CFG = model_mmaudio.seq_cfg |
|
MMAUDIO_NET = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval() |
|
MMAUDIO_NET.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True)) |
|
logging.info(f'Loaded weights from {model_mmaudio.model_path}') |
|
|
|
MMAUDIO_FEATURE_UTILS = FeaturesUtils( |
|
tod_vae_ckpt=model_mmaudio.vae_path, |
|
synchformer_ckpt=model_mmaudio.synchformer_ckpt, |
|
enable_conditions=True, |
|
mode=model_mmaudio.mode, |
|
bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path, |
|
need_vae_encoder=False |
|
).to(device, mmaudio_dtype).eval() |
|
|
|
MODELS_LOADED = True |
|
logging.info("All models loaded successfully!") |
|
return True |
|
|
|
except Exception as e: |
|
logging.error(f"Failed to load models: {str(e)}") |
|
return False |
|
|
|
|
|
def update_dimensions(preset): |
|
if preset in IMAGE_PRESETS: |
|
return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"] |
|
return 1024, 1024 |
|
|
|
def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed): |
|
if not prompt: |
|
return None, "ํ๋กฌํํธ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์" |
|
|
|
try: |
|
client = Client(TEXT2IMG_API_URL) |
|
if seed == -1: |
|
seed = random.randint(0, 9999999) |
|
|
|
result = client.predict( |
|
prompt=prompt, |
|
width=int(width), |
|
height=int(height), |
|
guidance=float(guidance), |
|
inference_steps=int(inference_steps), |
|
seed=int(seed), |
|
do_img2img=False, |
|
init_image=None, |
|
image2image_strength=0.8, |
|
resize_img=True, |
|
api_name="/generate_image" |
|
) |
|
return result[0], f"์ฌ์ฉ๋ ์๋: {result[1]}" |
|
except Exception as e: |
|
logging.error(f"Image generation error: {str(e)}") |
|
return None, f"์ค๋ฅ: {str(e)}" |
|
|
|
def generate_video_from_image(image, prompt="", length=4.0): |
|
if image is None: |
|
return None |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp: |
|
temp_path = fp.name |
|
Image.fromarray(image).save(temp_path) |
|
|
|
|
|
client = Client(VIDEO_API_URL) |
|
result = client.predict( |
|
input_image=handle_file(temp_path), |
|
prompt=prompt if prompt else "Generate natural motion", |
|
n_prompt="", |
|
seed=random.randint(0, 9999999), |
|
use_teacache=True, |
|
video_length=float(length), |
|
api_name="/process" |
|
) |
|
|
|
os.unlink(temp_path) |
|
|
|
if result and len(result) > 0: |
|
video_dict = result[0] |
|
return video_dict.get("video") if isinstance(video_dict, dict) else None |
|
|
|
except Exception as e: |
|
logging.error(f"Video generation error: {str(e)}") |
|
return None |
|
|
|
def prepare_image_and_mask(image, width, height, overlap_percentage, alignment): |
|
"""์ด๋ฏธ์ง์ ๋ง์คํฌ๋ฅผ ์ค๋นํ๋ ํจ์""" |
|
if image is None: |
|
return None, None |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image).convert('RGB') |
|
|
|
target_size = (width, height) |
|
|
|
|
|
scale_factor = min(target_size[0] / image.width, target_size[1] / image.height) |
|
new_width = int(image.width * scale_factor) |
|
new_height = int(image.height * scale_factor) |
|
|
|
|
|
source = image.resize((new_width, new_height), Image.LANCZOS) |
|
|
|
|
|
overlap_x = int(new_width * (overlap_percentage / 100)) |
|
overlap_y = int(new_height * (overlap_percentage / 100)) |
|
overlap_x = max(overlap_x, 1) |
|
overlap_y = max(overlap_y, 1) |
|
|
|
|
|
if alignment == "๊ฐ์ด๋ฐ": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์ผ์ชฝ": |
|
margin_x = 0 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์ค๋ฅธ์ชฝ": |
|
margin_x = target_size[0] - new_width |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "์": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = 0 |
|
elif alignment == "์๋": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = target_size[1] - new_height |
|
|
|
|
|
background = Image.new('RGB', target_size, (255, 255, 255)) |
|
background.paste(source, (margin_x, margin_y)) |
|
|
|
|
|
mask = Image.new('L', target_size, 255) |
|
mask_draw = ImageDraw.Draw(mask) |
|
|
|
|
|
left_overlap = margin_x + overlap_x if alignment != "์ผ์ชฝ" else margin_x |
|
right_overlap = margin_x + new_width - overlap_x if alignment != "์ค๋ฅธ์ชฝ" else margin_x + new_width |
|
top_overlap = margin_y + overlap_y if alignment != "์" else margin_y |
|
bottom_overlap = margin_y + new_height - overlap_y if alignment != "์๋" else margin_y + new_height |
|
|
|
mask_draw.rectangle([ |
|
(left_overlap, top_overlap), |
|
(right_overlap, bottom_overlap) |
|
], fill=0) |
|
|
|
return background, mask |
|
|
|
def preview_outpaint(image, width, height, overlap_percentage, alignment): |
|
"""์์ํ์ธํ
๋ฏธ๋ฆฌ๋ณด๊ธฐ""" |
|
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment) |
|
if background is None: |
|
return None |
|
|
|
|
|
preview = background.copy().convert('RGBA') |
|
|
|
|
|
red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) |
|
|
|
|
|
red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0)) |
|
red_mask.paste(red_overlay, (0, 0), mask) |
|
|
|
|
|
preview = Image.alpha_composite(preview, red_mask) |
|
|
|
return preview |
|
|
|
@spaces.GPU(duration=120) |
|
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8): |
|
"""์ด๋ฏธ์ง ์์ํ์ธํ
์คํ""" |
|
if image is None: |
|
return None |
|
|
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if OUTPAINT_PIPE is None: |
|
return Image.new('RGB', (width, height), (200, 200, 200)) |
|
|
|
try: |
|
|
|
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment) |
|
if background is None: |
|
return None |
|
|
|
|
|
cnet_image = background.copy() |
|
cnet_image.paste(0, (0, 0), mask) |
|
|
|
|
|
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k" |
|
|
|
|
|
with torch.autocast(device_type=device.type, dtype=torch_dtype): |
|
( |
|
prompt_embeds, |
|
negative_prompt_embeds, |
|
pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds, |
|
) = OUTPAINT_PIPE.encode_prompt(final_prompt, str(device), True) |
|
|
|
|
|
for generated_image in OUTPAINT_PIPE( |
|
prompt_embeds=prompt_embeds, |
|
negative_prompt_embeds=negative_prompt_embeds, |
|
pooled_prompt_embeds=pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, |
|
image=cnet_image, |
|
num_inference_steps=num_steps |
|
): |
|
|
|
pass |
|
|
|
|
|
final_image = generated_image |
|
|
|
|
|
final_image = final_image.convert("RGBA") |
|
cnet_image.paste(final_image, (0, 0), mask) |
|
|
|
return cnet_image |
|
|
|
except Exception as e: |
|
logging.error(f"Outpainting error: {str(e)}") |
|
return background if 'background' in locals() else None |
|
|
|
|
|
def translate_prompt(text): |
|
try: |
|
if TRANSLATOR is None: |
|
return text |
|
|
|
if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text): |
|
with torch.no_grad(): |
|
translation = TRANSLATOR(text)[0]['translation_text'] |
|
return translation |
|
return text |
|
except Exception as e: |
|
logging.error(f"Translation error: {e}") |
|
return text |
|
|
|
@spaces.GPU(duration=120) |
|
@torch.inference_mode() |
|
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int, |
|
cfg_strength: float, duration: float): |
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if MMAUDIO_NET is None: |
|
return None |
|
|
|
prompt = translate_prompt(prompt) |
|
negative_prompt = translate_prompt(negative_prompt) |
|
|
|
rng = torch.Generator(device=device) |
|
rng.manual_seed(seed) |
|
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
|
clip_frames, sync_frames, duration = load_video(video, duration) |
|
clip_frames = clip_frames.unsqueeze(0) |
|
sync_frames = sync_frames.unsqueeze(0) |
|
MMAUDIO_SEQ_CFG.duration = duration |
|
MMAUDIO_NET.update_seq_lengths(MMAUDIO_SEQ_CFG.latent_seq_len, MMAUDIO_SEQ_CFG.clip_seq_len, MMAUDIO_SEQ_CFG.sync_seq_len) |
|
|
|
audios = generate(clip_frames, |
|
sync_frames, [prompt], |
|
negative_text=[negative_prompt], |
|
feature_utils=MMAUDIO_FEATURE_UTILS, |
|
net=MMAUDIO_NET, |
|
fm=fm, |
|
rng=rng, |
|
cfg_strength=cfg_strength) |
|
audio = audios.float().cpu()[0] |
|
|
|
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name |
|
make_video(video, |
|
video_save_path, |
|
audio, |
|
sampling_rate=MMAUDIO_SEQ_CFG.sampling_rate, |
|
duration_sec=MMAUDIO_SEQ_CFG.duration) |
|
return video_save_path |
|
|
|
|
|
def process_bg_image(image, bg, fast_mode=False): |
|
"""๋จ์ผ ์ด๋ฏธ์ง ๋ฐฐ๊ฒฝ ์ฒ๋ฆฌ""" |
|
if BIREFNET_MODEL is None or BIREFNET_LITE_MODEL is None: |
|
return image |
|
|
|
image_size = image.size |
|
input_images = transform_image(image).unsqueeze(0).to(device) |
|
model = BIREFNET_LITE_MODEL if fast_mode else BIREFNET_MODEL |
|
|
|
with torch.no_grad(): |
|
preds = model(input_images)[-1].sigmoid().cpu() |
|
pred = preds[0].squeeze() |
|
pred_pil = transforms.ToPILImage()(pred) |
|
mask = pred_pil.resize(image_size) |
|
|
|
if isinstance(bg, str) and bg.startswith("#"): |
|
color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5)) |
|
background = Image.new("RGBA", image_size, color_rgb + (255,)) |
|
elif isinstance(bg, Image.Image): |
|
background = bg.convert("RGBA").resize(image_size) |
|
else: |
|
background = Image.open(bg).convert("RGBA").resize(image_size) |
|
|
|
image = Image.composite(image, background, mask) |
|
return image |
|
|
|
def process_video_frame(frame, bg_type, bg, fast_mode, frame_index, background_frames, color): |
|
"""๋น๋์ค ํ๋ ์ ์ฒ๋ฆฌ""" |
|
try: |
|
pil_image = Image.fromarray(frame) |
|
if bg_type == "์์": |
|
processed_image = process_bg_image(pil_image, color, fast_mode) |
|
elif bg_type == "์ด๋ฏธ์ง": |
|
processed_image = process_bg_image(pil_image, bg, fast_mode) |
|
elif bg_type == "๋น๋์ค": |
|
|
|
if background_frames and len(background_frames) > 0: |
|
|
|
bg_frame_index = frame_index % len(background_frames) |
|
background_frame = background_frames[bg_frame_index] |
|
background_image = Image.fromarray(background_frame) |
|
processed_image = process_bg_image(pil_image, background_image, fast_mode) |
|
else: |
|
processed_image = pil_image |
|
else: |
|
processed_image = pil_image |
|
|
|
|
|
if isinstance(processed_image, Image.Image): |
|
return np.array(processed_image) |
|
return processed_image |
|
|
|
except Exception as e: |
|
print(f"Error processing frame {frame_index}: {e}") |
|
|
|
if isinstance(frame, np.ndarray): |
|
return frame |
|
return np.array(pil_image) |
|
|
|
@spaces.GPU(duration=300) |
|
def process_video_bg(vid, bg_type="์์", bg_image=None, bg_video=None, color="#00FF00", |
|
fps=0, video_handling="slow_down", fast_mode=True, max_workers=10): |
|
"""๋น๋์ค ๋ฐฐ๊ฒฝ ์ฒ๋ฆฌ ๋ฉ์ธ ํจ์""" |
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if BIREFNET_MODEL is None: |
|
yield gr.update(visible=False), gr.update(visible=True), "BiRefNet ๋ชจ๋ธ์ ๋ก๋ํ์ง ๋ชปํ์ต๋๋ค." |
|
yield None, None, "BiRefNet ๋ชจ๋ธ์ ๋ก๋ํ์ง ๋ชปํ์ต๋๋ค." |
|
return |
|
|
|
try: |
|
start_time = time.time() |
|
video = VideoFileClip(vid) |
|
if fps == 0: |
|
fps = video.fps |
|
|
|
audio = video.audio |
|
frames = list(video.iter_frames(fps=fps)) |
|
|
|
|
|
if frames: |
|
frame_height, frame_width = frames[0].shape[:2] |
|
else: |
|
yield gr.update(visible=False), gr.update(visible=True), "๋น๋์ค์ ํ๋ ์์ด ์์ต๋๋ค." |
|
yield None, None, "๋น๋์ค์ ํ๋ ์์ด ์์ต๋๋ค." |
|
return |
|
|
|
processed_frames = [] |
|
yield gr.update(visible=True), gr.update(visible=False), f"์ฒ๋ฆฌ ์์... ๊ฒฝ๊ณผ ์๊ฐ: 0์ด" |
|
|
|
|
|
background_frames = None |
|
if bg_type == "๋น๋์ค" and bg_video: |
|
background_video = VideoFileClip(bg_video) |
|
|
|
|
|
if video_handling == "slow_down" and background_video.duration < video.duration: |
|
if speedx is not None: |
|
factor = video.duration / background_video.duration |
|
background_video = speedx(background_video, factor=factor) |
|
else: |
|
|
|
loops = int(video.duration / background_video.duration) + 1 |
|
background_video = concatenate_videoclips([background_video] * loops) |
|
elif video_handling == "loop" or background_video.duration < video.duration: |
|
|
|
loops = int(video.duration / background_video.duration) + 1 |
|
background_video = concatenate_videoclips([background_video] * loops) |
|
|
|
|
|
background_frames = list(background_video.iter_frames(fps=fps)) |
|
|
|
|
|
if len(background_frames) > len(frames): |
|
background_frames = background_frames[:len(frames)] |
|
|
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
futures = [] |
|
for i in range(len(frames)): |
|
future = executor.submit( |
|
process_video_frame, |
|
frames[i], |
|
bg_type, |
|
bg_image, |
|
fast_mode, |
|
i, |
|
background_frames, |
|
color |
|
) |
|
futures.append(future) |
|
|
|
|
|
for i, future in enumerate(futures): |
|
try: |
|
result = future.result() |
|
|
|
if result.shape[:2] != (frame_height, frame_width): |
|
|
|
pil_result = Image.fromarray(result) |
|
pil_result = pil_result.resize((frame_width, frame_height), Image.LANCZOS) |
|
result = np.array(pil_result) |
|
|
|
processed_frames.append(result) |
|
elapsed_time = time.time() - start_time |
|
|
|
|
|
if i % 10 == 0: |
|
yield result, None, f"ํ๋ ์ {i+1}/{len(frames)} ์ฒ๋ฆฌ ์ค... ๊ฒฝ๊ณผ ์๊ฐ: {elapsed_time:.2f}์ด" |
|
except Exception as e: |
|
print(f"Error getting result for frame {i}: {e}") |
|
|
|
processed_frames.append(frames[i]) |
|
|
|
|
|
frame_sizes = [frame.shape for frame in processed_frames] |
|
if len(set(frame_sizes)) > 1: |
|
print(f"Warning: Different frame sizes detected: {set(frame_sizes)}") |
|
|
|
target_size = processed_frames[0].shape |
|
for i in range(len(processed_frames)): |
|
if processed_frames[i].shape != target_size: |
|
pil_frame = Image.fromarray(processed_frames[i]) |
|
pil_frame = pil_frame.resize((target_size[1], target_size[0]), Image.LANCZOS) |
|
processed_frames[i] = np.array(pil_frame) |
|
|
|
|
|
processed_video = ImageSequenceClip(processed_frames, fps=fps) |
|
|
|
|
|
if audio: |
|
processed_video = processed_video.set_audio(audio) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: |
|
temp_filepath = temp_file.name |
|
processed_video.write_videofile(temp_filepath, codec="libx264", audio_codec="aac") |
|
|
|
elapsed_time = time.time() - start_time |
|
yield gr.update(visible=False), gr.update(visible=True), f"์ฒ๋ฆฌ ์๋ฃ! ๊ฒฝ๊ณผ ์๊ฐ: {elapsed_time:.2f}์ด" |
|
yield processed_frames[-1], temp_filepath, f"์ฒ๋ฆฌ ์๋ฃ! ๊ฒฝ๊ณผ ์๊ฐ: {elapsed_time:.2f}์ด" |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
import traceback |
|
traceback.print_exc() |
|
elapsed_time = time.time() - start_time |
|
yield gr.update(visible=False), gr.update(visible=True), f"๋น๋์ค ์ฒ๋ฆฌ ์ค๋ฅ: {e}. ๊ฒฝ๊ณผ ์๊ฐ: {elapsed_time:.2f}์ด" |
|
yield None, None, f"๋น๋์ค ์ฒ๋ฆฌ ์ค๋ฅ: {e}. ๊ฒฝ๊ณผ ์๊ฐ: {elapsed_time:.2f}์ด" |
|
|
|
@spaces.GPU(duration=180) |
|
def merge_videos_with_audio(video_files, audio_file, audio_mode, audio_volume, original_audio_volume, output_fps): |
|
"""์ฌ๋ฌ ๋น๋์ค๋ฅผ ๋ณํฉํ๊ณ ์ค๋์ค๋ฅผ ์ถ๊ฐํ๋ ํจ์""" |
|
if not video_files: |
|
return None, "๋น๋์ค ํ์ผ์ ์
๋ก๋ํด์ฃผ์ธ์." |
|
|
|
if isinstance(video_files, list) and len(video_files) > 10: |
|
return None, "์ต๋ 10๊ฐ์ ๋น๋์ค๋ง ์
๋ก๋ ๊ฐ๋ฅํฉ๋๋ค." |
|
|
|
try: |
|
|
|
status = "๋น๋์ค ํ์ผ ์ ๋ ฌ ์ค..." |
|
|
|
|
|
video_paths = [] |
|
if isinstance(video_files, list): |
|
for video_file in video_files: |
|
if video_file is not None: |
|
video_paths.append(video_file) |
|
else: |
|
video_paths.append(video_files) |
|
|
|
|
|
video_paths.sort(key=lambda x: os.path.basename(x)) |
|
|
|
status = f"{len(video_paths)}๊ฐ์ ๋น๋์ค ๋ก๋ ์ค..." |
|
|
|
|
|
video_clips = [] |
|
clip_sizes = [] |
|
|
|
for i, video_path in enumerate(video_paths): |
|
status = f"๋น๋์ค {i+1}/{len(video_paths)} ๋ก๋ ์ค: {os.path.basename(video_path)}" |
|
clip = VideoFileClip(video_path) |
|
video_clips.append(clip) |
|
|
|
|
|
try: |
|
clip_sizes.append((clip.w, clip.h)) |
|
except: |
|
clip_sizes.append(clip.size) |
|
|
|
|
|
target_width, target_height = clip_sizes[0] |
|
|
|
|
|
all_same_size = all(size == (target_width, target_height) for size in clip_sizes) |
|
|
|
if not all_same_size: |
|
logging.warning(f"๋น๋์ค ํฌ๊ธฐ๊ฐ ์๋ก ๋ค๋ฆ
๋๋ค. ์ฒซ ๋ฒ์งธ ๋น๋์ค ํฌ๊ธฐ({target_width}x{target_height})๋ก ์กฐ์ ํฉ๋๋ค.") |
|
|
|
|
|
adjusted_clips = [] |
|
for clip, size in zip(video_clips, clip_sizes): |
|
if size != (target_width, target_height): |
|
if resize is not None: |
|
adjusted_clip = resize(clip, newsize=(target_width, target_height)) |
|
else: |
|
if hasattr(clip, 'resize'): |
|
adjusted_clip = clip.resize((target_width, target_height)) |
|
else: |
|
adjusted_clip = clip |
|
logging.warning(f"Cannot resize video. Using original size.") |
|
adjusted_clips.append(adjusted_clip) |
|
else: |
|
adjusted_clips.append(clip) |
|
|
|
video_clips = adjusted_clips |
|
|
|
|
|
if output_fps == 0: |
|
output_fps = video_clips[0].fps |
|
|
|
status = "๋น๋์ค ๋ณํฉ ์ค..." |
|
|
|
|
|
final_video = concatenate_videoclips(video_clips, method="compose") |
|
|
|
|
|
if audio_file: |
|
status = "์ค๋์ค ์ฒ๋ฆฌ ์ค..." |
|
|
|
try: |
|
|
|
if isinstance(audio_file, str): |
|
audio_path = audio_file |
|
else: |
|
audio_path = audio_file |
|
|
|
logging.info(f"Processing audio from: {audio_path}") |
|
logging.info(f"Audio mode: {audio_mode}") |
|
|
|
|
|
if audio_path.endswith(('.mp4', '.avi', '.mov', '.mkv')): |
|
temp_video = VideoFileClip(audio_path) |
|
audio_clip = temp_video.audio |
|
temp_video.close() |
|
else: |
|
audio_clip = AudioFileClip(audio_path) |
|
|
|
if audio_clip is None: |
|
raise ValueError("์ค๋์ค๋ฅผ ๋ก๋ํ ์ ์์ต๋๋ค.") |
|
|
|
|
|
if audio_volume != 100: |
|
audio_clip = audio_clip.volumex(audio_volume / 100) |
|
|
|
|
|
video_duration = final_video.duration |
|
audio_duration = audio_clip.duration |
|
|
|
if audio_duration > video_duration: |
|
audio_clip = audio_clip.subclip(0, video_duration) |
|
elif audio_duration < video_duration: |
|
loops_needed = int(video_duration / audio_duration) + 1 |
|
audio_clips_list = [audio_clip] * loops_needed |
|
looped_audio = concatenate_audioclips(audio_clips_list) |
|
audio_clip = looped_audio.subclip(0, video_duration) |
|
|
|
|
|
if audio_mode == "๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง": |
|
|
|
if final_video.audio: |
|
|
|
original_audio = final_video.audio |
|
if original_audio_volume != 100: |
|
original_audio = original_audio.volumex(original_audio_volume / 100) |
|
|
|
|
|
final_audio = CompositeAudioClip([original_audio, audio_clip]) |
|
final_video = final_video.set_audio(final_audio) |
|
logging.info("Background music mode: Mixed original and new audio") |
|
else: |
|
|
|
final_video = final_video.set_audio(audio_clip) |
|
logging.info("No original audio found, adding new audio only") |
|
else: |
|
|
|
final_video = final_video.set_audio(audio_clip) |
|
logging.info("Replace mode: Replaced original audio") |
|
|
|
logging.info("Audio successfully processed") |
|
|
|
except Exception as e: |
|
logging.error(f"์ค๋์ค ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}") |
|
status = f"์ค๋์ค ์ฒ๋ฆฌ ์คํจ: {str(e)}, ๋น๋์ค๋ง ๋ณํฉํฉ๋๋ค." |
|
|
|
status = "๋น๋์ค ์ ์ฅ ์ค..." |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: |
|
temp_filepath = temp_file.name |
|
|
|
|
|
final_video.write_videofile( |
|
temp_filepath, |
|
fps=output_fps, |
|
codec="libx264", |
|
audio_codec="aac", |
|
preset="medium", |
|
bitrate="5000k", |
|
audio_bitrate="192k" |
|
) |
|
|
|
|
|
for clip in video_clips: |
|
clip.close() |
|
if 'adjusted_clips' in locals(): |
|
for clip in adjusted_clips: |
|
if clip not in video_clips: |
|
clip.close() |
|
if audio_file and 'audio_clip' in locals(): |
|
audio_clip.close() |
|
final_video.close() |
|
|
|
|
|
if audio_file and audio_mode == "๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง": |
|
mode_msg = "๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง ์ถ๊ฐ๋จ" |
|
elif audio_file: |
|
mode_msg = "์ค๋์ค ๋์ฒด๋จ" |
|
else: |
|
mode_msg = "์ค๋์ค ์์" |
|
|
|
return temp_filepath, f"โ
์ฑ๊ณต์ ์ผ๋ก {len(video_paths)}๊ฐ์ ๋น๋์ค๋ฅผ ๋ณํฉํ์ต๋๋ค! (ํฌ๊ธฐ: {target_width}x{target_height}, {mode_msg})" |
|
|
|
except Exception as e: |
|
logging.error(f"Video merge error: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return None, f"โ ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
def test_anim_api_connection(): |
|
"""์ ๋๋ฉ์ด์
์๋ฒ ์ฐ๊ฒฐ ํ
์คํธ""" |
|
now = datetime.now().strftime("%H:%M:%S") |
|
try: |
|
resp = httpx.get(f"{ANIM_API_URL.rstrip('/')}/healthz", timeout=ANIM_TIMEOUT) |
|
ready = resp.json().get("ready", False) |
|
msg = f"[{now}] ์ ๋๋ฉ์ด์
์๋ฒ ์ฐ๊ฒฐ ์ฑ๊ณต โ
(ready={ready})" |
|
logging.info(msg) |
|
return True, msg |
|
except Exception as e: |
|
msg = f"[{now}] ์ ๋๋ฉ์ด์
์๋ฒ ์ฐ๊ฒฐ ์คํจ โ : {e}" |
|
logging.error(msg) |
|
return False, msg |
|
|
|
def generate_avatar_animation(image, audio, guidance_scale, steps, progress=gr.Progress()): |
|
"""์ด๋ฏธ์ง์ ์ค๋์ค๋ก ์๋ฐํ ์ ๋๋ฉ์ด์
์์ฑ""" |
|
start = datetime.now().strftime("%H:%M:%S") |
|
logs = [f"[{start}] ์์ฒญ ์์"] |
|
|
|
try: |
|
if image is None or audio is None: |
|
raise ValueError("์ด๋ฏธ์ง์ ์ค๋์ค๋ฅผ ๋ชจ๋ ์
๋ก๋ํ์ธ์.") |
|
|
|
progress(0.05, desc="ํ์ผ ์ค๋น") |
|
client = Client(ANIM_API_URL) |
|
|
|
progress(0.15, desc="์๋ฒ ํธ์ถ ์คโฆ (์ ๋ถ ์์ ๊ฐ๋ฅ)") |
|
result = client.predict( |
|
image_path=handle_file(image), |
|
audio_path=handle_file(audio), |
|
guidance_scale=guidance_scale, |
|
steps=steps, |
|
api_name="/generate_animation" |
|
) |
|
|
|
progress(0.95, desc="๊ฒฐ๊ณผ ์ ๋ฆฌ") |
|
|
|
|
|
def extract_video_path(obj): |
|
"""๋น๋์ค ๊ฐ์ฒด์์ ๊ฒฝ๋ก ์ถ์ถ""" |
|
if isinstance(obj, str): |
|
return obj |
|
elif isinstance(obj, dict): |
|
|
|
if 'video' in obj: |
|
return obj['video'] |
|
elif 'path' in obj: |
|
return obj['path'] |
|
elif 'url' in obj: |
|
return obj['url'] |
|
elif 'name' in obj: |
|
return obj['name'] |
|
else: |
|
logging.warning(f"Unexpected dict structure: {obj.keys()}") |
|
return None |
|
else: |
|
logging.warning(f"Unexpected type: {type(obj)}") |
|
return None |
|
|
|
if isinstance(result, (list, tuple)) and len(result) >= 2: |
|
anim_path = extract_video_path(result[0]) |
|
comp_path = extract_video_path(result[1]) |
|
|
|
if anim_path and comp_path: |
|
logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] ์ฑ๊ณต") |
|
return anim_path, comp_path, "\n".join(logs) |
|
else: |
|
raise RuntimeError(f"๋น๋์ค ๊ฒฝ๋ก ์ถ์ถ ์คํจ: {result}") |
|
else: |
|
raise RuntimeError(f"์์์น ๋ชปํ ๋ฐํ ํ์: {type(result)}") |
|
|
|
except Exception as e: |
|
logs.append(f"[{datetime.now().strftime('%H:%M:%S')}] ์ค๋ฅ: {e}") |
|
logging.error(f"Avatar animation generation error: {e}", exc_info=True) |
|
return None, None, "\n".join(logs) |
|
|
|
|
|
css = """ |
|
:root { |
|
--primary-color: #f8c3cd; |
|
--secondary-color: #b3e5fc; |
|
--background-color: #f5f5f7; |
|
--card-background: #ffffff; |
|
--text-color: #424242; |
|
--accent-color: #ffb6c1; |
|
--success-color: #c8e6c9; |
|
--warning-color: #fff9c4; |
|
--shadow-color: rgba(0, 0, 0, 0.1); |
|
--border-radius: 12px; |
|
} |
|
.gradio-container { |
|
max-width: 1200px !important; |
|
margin: 0 auto !important; |
|
} |
|
.panel-box { |
|
border-radius: var(--border-radius) !important; |
|
box-shadow: 0 8px 16px var(--shadow-color) !important; |
|
background-color: var(--card-background) !important; |
|
padding: 20px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn, #merge-btn, #avatar-btn, #test-connection-btn { |
|
background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important; |
|
font-size: 1.1rem !important; |
|
padding: 12px 24px !important; |
|
margin-top: 10px !important; |
|
width: 100% !important; |
|
} |
|
#avatar-btn, #test-connection-btn { |
|
background: linear-gradient(135deg, #667eea, #764ba2) !important; |
|
} |
|
.tabitem { |
|
min-height: 700px !important; |
|
} |
|
""" |
|
|
|
|
|
demo = gr.Blocks(css=css, title="AI ์ด๋ฏธ์ง & ๋น๋์ค & ์ค๋์ค ์์ฑ๊ธฐ") |
|
|
|
with demo: |
|
gr.Markdown("# ๐จ Ginigen ์คํ๋์ค") |
|
gr.Markdown("์ฒ์ ์ฌ์ฉ ์ ๋ชจ๋ธ ๋ก๋ฉ์ ์๊ฐ์ด ๊ฑธ๋ฆด ์ ์์ต๋๋ค. ์ ์๋ง ๊ธฐ๋ค๋ ค์ฃผ์ธ์.") |
|
|
|
|
|
model_status = gr.Textbox(label="๋ชจ๋ธ ์ํ", value="๋ชจ๋ธ ๋ก๋ฉ ๋๊ธฐ ์ค...", interactive=False) |
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.Tab("ํ
์คํธโ์ด๋ฏธ์งโ๋น๋์ค", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ ์ด๋ฏธ์ง ์์ฑ ์ค์ ") |
|
|
|
prompt = gr.Textbox( |
|
label="ํ๋กฌํํธ(ํ๊ธ/์์ด ๊ฐ๋ฅ)", |
|
placeholder="์์ฑํ๊ณ ์ถ์ ์ด๋ฏธ์ง๋ฅผ ์ค๋ช
ํ์ธ์...", |
|
lines=3 |
|
) |
|
|
|
size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="1:1 ์ ์ฌ๊ฐํ", |
|
label="ํฌ๊ธฐ ํ๋ฆฌ์
" |
|
) |
|
|
|
with gr.Row(): |
|
width = gr.Slider(256, 2048, 1024, step=64, label="๋๋น") |
|
height = gr.Slider(256, 2048, 1024, step=64, label="๋์ด") |
|
|
|
with gr.Row(): |
|
guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="๊ฐ์ด๋์ค") |
|
steps = gr.Slider(1, 50, 30, step=1, label="์คํ
") |
|
|
|
seed = gr.Number(label="์๋ (-1=๋๋ค)", value=-1) |
|
|
|
generate_btn = gr.Button("๐จ ์ด๋ฏธ์ง ์์ฑ", variant="primary", elem_id="generate-btn") |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฌ ๋น๋์ค ์์ฑ ์ค์ ") |
|
|
|
video_prompt = gr.Textbox( |
|
label="(์ ํ) ๋น๋์ค ํ๋กฌํํธ(์์ด๋ก ์
๋ ฅ)", |
|
placeholder="๋น๋์ค์ ์์ง์์ ์ค๋ช
ํ์ธ์... (๋น์๋๋ฉด ๊ธฐ๋ณธ ์์ง์ ์ ์ฉ)", |
|
lines=2 |
|
) |
|
|
|
video_length = gr.Slider( |
|
minimum=1, |
|
maximum=60, |
|
value=4, |
|
step=0.5, |
|
label="๋น๋์ค ๊ธธ์ด (์ด)", |
|
info="1์ด์์ 60์ด๊น์ง ์ ํ ๊ฐ๋ฅํฉ๋๋ค" |
|
) |
|
|
|
video_btn = gr.Button("๐ฌ ๋น๋์ค๋ก ๋ณํ", variant="secondary", elem_id="video-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ์์ฑ ๊ฒฐ๊ณผ") |
|
|
|
output_image = gr.Image(label="์์ฑ๋ ์ด๋ฏธ์ง", type="numpy") |
|
output_seed = gr.Textbox(label="์๋ ์ ๋ณด") |
|
output_video = gr.Video(label="์์ฑ๋ ๋น๋์ค") |
|
|
|
|
|
with gr.Tab("์ด๋ฏธ์ง ๋น์จ ๋ณ๊ฒฝ/์์ฑ", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ์ด๋ฏธ์ง ์
๋ก๋") |
|
|
|
input_image = gr.Image( |
|
label="์๋ณธ ์ด๋ฏธ์ง", |
|
type="numpy" |
|
) |
|
|
|
outpaint_prompt = gr.Textbox( |
|
label="ํ๋กฌํํธ (์ ํ)", |
|
placeholder="ํ์ฅํ ์์ญ์ ๋ํ ์ค๋ช
...", |
|
lines=2 |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### โ๏ธ ์์ํ์ธํ
์ค์ ") |
|
|
|
outpaint_size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="16:9 ์์ด๋์คํฌ๋ฆฐ", |
|
label="๋ชฉํ ํฌ๊ธฐ ํ๋ฆฌ์
" |
|
) |
|
|
|
with gr.Row(): |
|
outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="๋ชฉํ ๋๋น") |
|
outpaint_height = gr.Slider(256, 2048, 720, step=64, label="๋ชฉํ ๋์ด") |
|
|
|
alignment = gr.Dropdown( |
|
choices=["๊ฐ์ด๋ฐ", "์ผ์ชฝ", "์ค๋ฅธ์ชฝ", "์", "์๋"], |
|
value="๊ฐ์ด๋ฐ", |
|
label="์ ๋ ฌ" |
|
) |
|
|
|
overlap_percentage = gr.Slider( |
|
minimum=1, |
|
maximum=50, |
|
value=10, |
|
step=1, |
|
label="๋ง์คํฌ ์ค๋ฒ๋ฉ (%)" |
|
) |
|
|
|
outpaint_steps = gr.Slider( |
|
minimum=4, |
|
maximum=12, |
|
value=8, |
|
step=1, |
|
label="์ถ๋ก ์คํ
" |
|
) |
|
|
|
preview_btn = gr.Button("๐๏ธ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", elem_id="preview-btn") |
|
outpaint_btn = gr.Button("๐จ ์์ํ์ธํ
์คํ", variant="primary", elem_id="outpaint-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ผ๏ธ ๊ฒฐ๊ณผ") |
|
|
|
preview_image = gr.Image(label="๋ฏธ๋ฆฌ๋ณด๊ธฐ") |
|
outpaint_result = gr.Image(label="์์ํ์ธํ
๊ฒฐ๊ณผ") |
|
|
|
|
|
with gr.Tab("๋น๋์ค + ์ค๋์ค", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฅ ๋น๋์ค ์
๋ก๋") |
|
|
|
audio_video_input = gr.Video( |
|
label="์
๋ ฅ ๋น๋์ค", |
|
sources=["upload"] |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ต ์ค๋์ค ์์ฑ ์ค์ ") |
|
|
|
audio_prompt = gr.Textbox( |
|
label="ํ๋กฌํํธ (ํ๊ธ ์ง์)", |
|
placeholder="์์ฑํ๊ณ ์ถ์ ์ค๋์ค๋ฅผ ์ค๋ช
ํ์ธ์... (์: ํํ๋ก์ด ํผ์๋
ธ ์์
)", |
|
lines=3 |
|
) |
|
|
|
audio_negative_prompt = gr.Textbox( |
|
label="๋ค๊ฑฐํฐ๋ธ ํ๋กฌํํธ", |
|
value="music", |
|
placeholder="์ํ์ง ์๋ ์์...", |
|
lines=2 |
|
) |
|
|
|
with gr.Row(): |
|
audio_seed = gr.Number(label="์๋", value=0) |
|
audio_steps = gr.Number(label="์คํ
", value=25) |
|
|
|
with gr.Row(): |
|
audio_cfg = gr.Number(label="๊ฐ์ด๋์ค ์ค์ผ์ผ", value=4.5) |
|
audio_duration = gr.Number(label="์ง์์๊ฐ (์ด)", value=9999) |
|
|
|
audio_btn = gr.Button("๐ต ์ค๋์ค ์์ฑ ๋ฐ ํฉ์ฑ", variant="primary", elem_id="audio-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฌ ์์ฑ ๊ฒฐ๊ณผ") |
|
|
|
output_video_with_audio = gr.Video( |
|
label="์ค๋์ค๊ฐ ์ถ๊ฐ๋ ๋น๋์ค", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Tab("๋น๋์ค ํธ์ง", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฅ ๋น๋์ค ์
๋ก๋ (์ต๋ 10๊ฐ)") |
|
gr.Markdown("**ํ์ผ๋ช
์ด ์์์๋ก ์ฐ์ ์์๊ฐ ๋์ต๋๋ค** (์: 1.mp4, 2.mp4, 3.mp4)") |
|
|
|
video_files = gr.File( |
|
label="๋น๋์ค ํ์ผ๋ค", |
|
file_count="multiple", |
|
file_types=["video"], |
|
type="filepath" |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### โ๏ธ ํธ์ง ์ค์ ") |
|
|
|
output_fps = gr.Slider( |
|
minimum=0, |
|
maximum=60, |
|
value=0, |
|
step=1, |
|
label="์ถ๋ ฅ FPS (0 = ์ฒซ ๋ฒ์งธ ๋น๋์ค์ FPS ์ฌ์ฉ)" |
|
) |
|
|
|
gr.Markdown(""" |
|
**ํฌ๊ธฐ ์ฒ๋ฆฌ**: |
|
- ์ฒซ ๋ฒ์งธ ๋น๋์ค์ ํฌ๊ธฐ๊ฐ ๊ธฐ์ค์ด ๋ฉ๋๋ค |
|
- ๋ค๋ฅธ ํฌ๊ธฐ์ ๋น๋์ค๋ ์ฒซ ๋ฒ์งธ ๋น๋์ค ํฌ๊ธฐ๋ก ์กฐ์ ๋ฉ๋๋ค |
|
- ์ต์์ ๊ฒฐ๊ณผ๋ฅผ ์ํด ๊ฐ์ ํฌ๊ธฐ์ ๋น๋์ค๋ฅผ ์ฌ์ฉํ์ธ์ |
|
""") |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ต ์ค๋์ค ์ค์ (์ ํ)") |
|
|
|
|
|
audio_mode = gr.Radio( |
|
["๋์ฒด", "๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง"], |
|
label="์ค๋์ค ๋ชจ๋", |
|
value="๋์ฒด", |
|
info="๋์ฒด: ๊ธฐ์กด ์ค๋์ค๋ฅผ ์์ ํ ๊ต์ฒด | ๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง: ๊ธฐ์กด ์ค๋์ค์ ํจ๊ป ์ฌ์" |
|
) |
|
|
|
audio_file = gr.Audio( |
|
label="์ค๋์ค ํ์ผ (MP3, WAV, M4A ๋ฑ)", |
|
type="filepath", |
|
sources=["upload"] |
|
) |
|
|
|
audio_volume = gr.Slider( |
|
minimum=0, |
|
maximum=200, |
|
value=100, |
|
step=1, |
|
label="์ถ๊ฐ ์ค๋์ค ๋ณผ๋ฅจ (%)", |
|
info="100% = ์๋ณธ ๋ณผ๋ฅจ" |
|
) |
|
|
|
|
|
original_audio_volume = gr.Slider( |
|
minimum=0, |
|
maximum=200, |
|
value=100, |
|
step=1, |
|
label="์๋ณธ ์ค๋์ค ๋ณผ๋ฅจ (%)", |
|
info="๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง ๋ชจ๋์์ ์๋ณธ ๋น๋์ค ์ค๋์ค์ ๋ณผ๋ฅจ", |
|
visible=False |
|
) |
|
|
|
gr.Markdown(""" |
|
**์ค๋์ค ์ต์
**: |
|
- **๋์ฒด ๋ชจ๋**: ์
๋ก๋ํ ์ค๋์ค๊ฐ ๋น๋์ค์ ๊ธฐ์กด ์ค๋์ค๋ฅผ ์์ ํ ๋์ฒดํฉ๋๋ค |
|
- **๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง ๋ชจ๋**: ์
๋ก๋ํ ์ค๋์ค๊ฐ ๊ธฐ์กด ์ค๋์ค์ ํจ๊ป ์ฌ์๋ฉ๋๋ค |
|
- ์ค๋์ค๊ฐ ๋น๋์ค๋ณด๋ค ์งง์ผ๋ฉด ์๋์ผ๋ก ๋ฐ๋ณต๋ฉ๋๋ค |
|
- ์ค๋์ค๊ฐ ๋น๋์ค๋ณด๋ค ๊ธธ๋ฉด ๋น๋์ค ๊ธธ์ด์ ๋ง์ถฐ ์๋ฆฝ๋๋ค |
|
""") |
|
|
|
merge_videos_btn = gr.Button("๐ฌ ๋น๋์ค ๋ณํฉ", variant="primary", elem_id="merge-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฌ ๋ณํฉ ๊ฒฐ๊ณผ") |
|
|
|
merge_status = gr.Textbox(label="์ฒ๋ฆฌ ์ํ", interactive=False) |
|
merged_video = gr.Video(label="๋ณํฉ๋ ๋น๋์ค") |
|
|
|
gr.Markdown(""" |
|
### โน๏ธ ์ฌ์ฉ ๋ฐฉ๋ฒ |
|
1. ์ฌ๋ฌ ๋น๋์ค ํ์ผ์ ์
๋ก๋ํ์ธ์ (์ต๋ 10๊ฐ) |
|
2. ํ์ผ๋ช
์ด ์์ ์์๋๋ก ์๋ ์ ๋ ฌ๋ฉ๋๋ค |
|
3. (์ ํ) ์ค๋์ค ํ์ผ์ ์ถ๊ฐํ๊ณ ๋ณผ๋ฅจ์ ์กฐ์ ํ์ธ์ |
|
4. '๋น๋์ค ๋ณํฉ' ๋ฒํผ์ ํด๋ฆญํ์ธ์ |
|
|
|
**ํน์ง**: |
|
- โ
์ฒซ ๋ฒ์งธ ๋น๋์ค์ ํฌ๊ธฐ๋ฅผ ๊ธฐ์ค์ผ๋ก ํตํฉ |
|
- โ
์
๋ก๋ํ ์ค๋์ค๊ฐ ์ ์ฒด ๋น๋์ค์ ์ ์ฉ๋ฉ๋๋ค |
|
- โ
๋์ ๋นํธ๋ ์ดํธ๋ก ํ์ง ์ ์ง |
|
|
|
**ํ**: |
|
- ํ์ผ๋ช
์ 01.mp4, 02.mp4, 03.mp4 ํ์์ผ๋ก ์ง์ ํ๋ฉด ์์ ๊ด๋ฆฌ๊ฐ ์ฝ์ต๋๋ค |
|
- ์ค๋์ค๋ฅผ ์ถ๊ฐํ๋ฉด ๊ธฐ์กด ๋น๋์ค์ ์ค๋์ค๋ ๋์ฒด๋ฉ๋๋ค |
|
""") |
|
|
|
|
|
with gr.Tab("๋น๋์ค ๋ฐฐ๊ฒฝ์ ๊ฑฐ/ํฉ์ฑ", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฅ ๋น๋์ค ์
๋ก๋") |
|
|
|
bg_video_input = gr.Video( |
|
label="์
๋ ฅ ๋น๋์ค", |
|
interactive=True |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐จ ๋ฐฐ๊ฒฝ ์ค์ ") |
|
|
|
bg_type = gr.Radio( |
|
["์์", "์ด๋ฏธ์ง", "๋น๋์ค"], |
|
label="๋ฐฐ๊ฒฝ ์ ํ", |
|
value="์์", |
|
interactive=True |
|
) |
|
|
|
color_picker = gr.ColorPicker( |
|
label="๋ฐฐ๊ฒฝ ์์", |
|
value="#00FF00", |
|
visible=True, |
|
interactive=True |
|
) |
|
|
|
bg_image_input = gr.Image( |
|
label="๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง", |
|
type="filepath", |
|
visible=False, |
|
interactive=True |
|
) |
|
|
|
bg_video_bg = gr.Video( |
|
label="๋ฐฐ๊ฒฝ ๋น๋์ค", |
|
visible=False, |
|
interactive=True |
|
) |
|
|
|
with gr.Column(visible=False) as video_handling_options: |
|
video_handling_radio = gr.Radio( |
|
["slow_down", "loop"], |
|
label="๋น๋์ค ์ฒ๋ฆฌ ๋ฐฉ์", |
|
value="slow_down", |
|
interactive=True, |
|
info="slow_down: ๋ฐฐ๊ฒฝ ๋น๋์ค๋ฅผ ๋๋ฆฌ๊ฒ ์ฌ์, loop: ๋ฐฐ๊ฒฝ ๋น๋์ค๋ฅผ ๋ฐ๋ณต" |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### โ๏ธ ์ฒ๋ฆฌ ์ค์ ") |
|
|
|
fps_slider = gr.Slider( |
|
minimum=0, |
|
maximum=60, |
|
step=1, |
|
value=0, |
|
label="์ถ๋ ฅ FPS (0 = ์๋ณธ FPS ์ ์ง)", |
|
interactive=True |
|
) |
|
|
|
fast_mode_checkbox = gr.Checkbox( |
|
label="๋น ๋ฅธ ๋ชจ๋ (BiRefNet_lite ์ฌ์ฉ)", |
|
value=True, |
|
interactive=True |
|
) |
|
|
|
max_workers_slider = gr.Slider( |
|
minimum=1, |
|
maximum=32, |
|
step=1, |
|
value=10, |
|
label="์ต๋ ์์ปค ์", |
|
info="๋ณ๋ ฌ๋ก ์ฒ๋ฆฌํ ํ๋ ์ ์", |
|
interactive=True |
|
) |
|
|
|
bg_remove_btn = gr.Button("๐ฌ ๋ฐฐ๊ฒฝ ๋ณ๊ฒฝ", variant="primary", elem_id="bg-remove-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ฌ ์ฒ๋ฆฌ ๊ฒฐ๊ณผ") |
|
|
|
stream_image = gr.Image(label="์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ", visible=False) |
|
output_bg_video = gr.Video(label="์ต์ข
๋น๋์ค") |
|
time_textbox = gr.Textbox(label="๊ฒฝ๊ณผ ์๊ฐ", interactive=False) |
|
|
|
gr.Markdown(""" |
|
### โน๏ธ ์ฌ์ฉ ๋ฐฉ๋ฒ |
|
1. ๋น๋์ค๋ฅผ ์
๋ก๋ํ์ธ์ |
|
2. ์ํ๋ ๋ฐฐ๊ฒฝ ์ ํ์ ์ ํํ์ธ์ |
|
3. ์ค์ ์ ์กฐ์ ํ๊ณ '๋ฐฐ๊ฒฝ ๋ณ๊ฒฝ' ๋ฒํผ์ ํด๋ฆญํ์ธ์ |
|
|
|
**์ฐธ๊ณ **: GPU ์ ํ์ผ๋ก ํ ๋ฒ์ ์ฝ 200ํ๋ ์๊น์ง ์ฒ๋ฆฌ ๊ฐ๋ฅํฉ๋๋ค. |
|
๊ธด ๋น๋์ค๋ ์์ ์กฐ๊ฐ์ผ๋ก ๋๋์ด ์ฒ๋ฆฌํ์ธ์. |
|
""") |
|
|
|
|
|
with gr.Tab("์ด๋ฏธ์งto์๋ฐํ", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ญ ์๋ฐํ ์ ๋๋ฉ์ด์
์์ฑ") |
|
gr.Markdown(""" |
|
ํฌํธ๋ ์ดํธ ์ด๋ฏธ์ง์ ์ค๋์ค๋ฅผ ์
๋ก๋ํ๋ฉด ๋งํ๋ ์๋ฐํ ์ ๋๋ฉ์ด์
์ ์์ฑํฉ๋๋ค. |
|
|
|
**๊ถ์ฅ ์ฌํญ**: |
|
- ์ด๋ฏธ์ง: ์ ๋ฉด์ ๋ณด๊ณ ์๋ ์ผ๊ตด ์ฌ์ง |
|
- ์ค๋์ค: ๋ช
ํํ ์์ฑ์ด ๋ด๊ธด ์ค๋์ค ํ์ผ |
|
""") |
|
|
|
avatar_image = gr.Image( |
|
label="ํฌํธ๋ ์ดํธ ์ด๋ฏธ์ง", |
|
type="filepath", |
|
elem_classes="panel-box" |
|
) |
|
|
|
avatar_audio = gr.Audio( |
|
label="๋๋ผ์ด๋น ์ค๋์ค", |
|
type="filepath", |
|
elem_classes="panel-box" |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### โ๏ธ ์์ฑ ์ค์ ") |
|
|
|
guidance_scale = gr.Slider( |
|
minimum=1.0, |
|
maximum=10.0, |
|
value=3.0, |
|
step=0.1, |
|
label="๊ฐ์ด๋์ค ์ค์ผ์ผ", |
|
info="๋์์๋ก ์ค๋์ค์ ๋ ์ถฉ์คํ ์์ง์ ์์ฑ" |
|
) |
|
|
|
inference_steps = gr.Slider( |
|
minimum=5, |
|
maximum=30, |
|
value=10, |
|
step=1, |
|
label="์ถ๋ก ์คํ
", |
|
info="๋์์๋ก ํ์ง์ด ์ข์์ง์ง๋ง ์์ฑ ์๊ฐ์ด ์ฆ๊ฐ" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
test_connection_btn = gr.Button( |
|
"๐ ์๋ฒ ์ฐ๊ฒฐ ํ
์คํธ", |
|
elem_id="test-connection-btn", |
|
scale=1 |
|
) |
|
|
|
anim_status = gr.Textbox( |
|
label="์๋ฒ ์ํ", |
|
interactive=False, |
|
elem_classes="panel-box" |
|
) |
|
|
|
generate_avatar_btn = gr.Button( |
|
"๐ฌ ์๋ฐํ ์์ฑ", |
|
variant="primary", |
|
elem_id="avatar-btn" |
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### ๐ญ ์์ฑ ๊ฒฐ๊ณผ") |
|
|
|
avatar_result = gr.Video( |
|
label="์ ๋๋ฉ์ด์
๊ฒฐ๊ณผ", |
|
elem_classes="panel-box" |
|
) |
|
|
|
avatar_comparison = gr.Video( |
|
label="์๋ณธ ๋๋น ๊ฒฐ๊ณผ (Side-by-Side)", |
|
elem_classes="panel-box" |
|
) |
|
|
|
with gr.Accordion("์คํ ๋ก๊ทธ", open=False): |
|
avatar_logs = gr.Textbox( |
|
label="๋ก๊ทธ", |
|
lines=10, |
|
max_lines=20, |
|
interactive=False, |
|
elem_classes="panel-box" |
|
) |
|
|
|
gr.Markdown(""" |
|
### โน๏ธ ์ฌ์ฉ ์๋ด |
|
|
|
1. **ํฌํธ๋ ์ดํธ ์ด๋ฏธ์ง ์
๋ก๋**: ์ ๋ฉด์ ๋ณด๊ณ ์๋ ์ ๋ช
ํ ์ผ๊ตด ์ฌ์ง |
|
2. **์ค๋์ค ์
๋ก๋**: ์ ๋๋ฉ์ด์
์ ์ฌ์ฉํ ์์ฑ ํ์ผ |
|
3. **์ค์ ์กฐ์ **: ๊ฐ์ด๋์ค ์ค์ผ์ผ๊ณผ ์ถ๋ก ์คํ
์กฐ์ |
|
4. **์์ฑ ์์**: '์๋ฐํ ์์ฑ' ๋ฒํผ ํด๋ฆญ |
|
|
|
**์ฒ๋ฆฌ ์๊ฐ**: |
|
- ์ผ๋ฐ์ ์ผ๋ก 2-5๋ถ ์์ |
|
- ๊ธด ์ค๋์ค์ผ์๋ก ์ฒ๋ฆฌ ์๊ฐ ์ฆ๊ฐ |
|
|
|
**ํ**: |
|
- ๋ฐฐ๊ฒฝ์ด ๋จ์ํ ์ด๋ฏธ์ง๊ฐ ๋ ์ข์ ๊ฒฐ๊ณผ๋ฅผ ์์ฑํฉ๋๋ค |
|
- ์ค๋์ค์ ์์ฑ์ด ๋ช
ํํ ์๋ก ๋ฆฝ์ฑํฌ๊ฐ ์ ํํฉ๋๋ค |
|
""") |
|
|
|
|
|
def on_demo_load(): |
|
try: |
|
if IS_SPACES: |
|
|
|
gpu_warmup() |
|
|
|
return "๋ชจ๋ธ ๋ก๋ฉ ์ค๋น ์๋ฃ" |
|
except Exception as e: |
|
return f"์ด๊ธฐํ ์ค๋ฅ: {str(e)}" |
|
|
|
|
|
size_preset.change(update_dimensions, [size_preset], [width, height]) |
|
|
|
generate_btn.click( |
|
generate_text_to_image, |
|
[prompt, width, height, guidance, steps, seed], |
|
[output_image, output_seed] |
|
) |
|
|
|
video_btn.click( |
|
lambda img, v_prompt, length: generate_video_from_image(img, v_prompt, length) if img is not None else None, |
|
[output_image, video_prompt, video_length], |
|
[output_video] |
|
) |
|
|
|
|
|
outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height]) |
|
|
|
preview_btn.click( |
|
preview_outpaint, |
|
[input_image, outpaint_width, outpaint_height, overlap_percentage, alignment], |
|
[preview_image] |
|
) |
|
|
|
outpaint_btn.click( |
|
outpaint_image, |
|
[input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps], |
|
[outpaint_result] |
|
) |
|
|
|
|
|
audio_btn.click( |
|
video_to_audio, |
|
[audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration], |
|
[output_video_with_audio] |
|
) |
|
|
|
|
|
def toggle_original_volume(mode): |
|
return gr.update(visible=(mode == "๋ฐฑ๊ทธ๋ผ์ด๋ ๋ฎค์ง")) |
|
|
|
audio_mode.change( |
|
toggle_original_volume, |
|
inputs=[audio_mode], |
|
outputs=[original_audio_volume] |
|
) |
|
|
|
merge_videos_btn.click( |
|
merge_videos_with_audio, |
|
inputs=[video_files, audio_file, audio_mode, audio_volume, original_audio_volume, output_fps], |
|
outputs=[merged_video, merge_status] |
|
) |
|
|
|
|
|
def update_bg_visibility(bg_type): |
|
if bg_type == "์์": |
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) |
|
elif bg_type == "์ด๋ฏธ์ง": |
|
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) |
|
elif bg_type == "๋น๋์ค": |
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) |
|
else: |
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) |
|
|
|
bg_type.change( |
|
update_bg_visibility, |
|
inputs=bg_type, |
|
outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options] |
|
) |
|
|
|
bg_remove_btn.click( |
|
process_video_bg, |
|
inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker, |
|
fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider], |
|
outputs=[stream_image, output_bg_video, time_textbox] |
|
) |
|
|
|
|
|
test_connection_btn.click( |
|
test_anim_api_connection, |
|
outputs=[anim_status, anim_status] |
|
) |
|
|
|
generate_avatar_btn.click( |
|
generate_avatar_animation, |
|
inputs=[avatar_image, avatar_audio, guidance_scale, inference_steps], |
|
outputs=[avatar_result, avatar_comparison, avatar_logs] |
|
) |
|
|
|
|
|
demo.load(on_demo_load, outputs=model_status) |
|
|
|
if __name__ == "__main__": |
|
|
|
if IS_SPACES: |
|
try: |
|
gpu_warmup() |
|
except: |
|
pass |
|
|
|
demo.launch() |