|
|
|
import os |
|
IS_SPACES = os.environ.get("SPACE_ID") is not None |
|
|
|
if IS_SPACES: |
|
import spaces |
|
else: |
|
|
|
class spaces: |
|
@staticmethod |
|
def GPU(duration=None): |
|
def decorator(func): |
|
return func |
|
return decorator |
|
|
|
|
|
import gradio as gr |
|
import numpy as np |
|
from PIL import Image, ImageDraw |
|
from gradio_client import Client, handle_file |
|
import random |
|
import tempfile |
|
import logging |
|
import torch |
|
from diffusers import AutoencoderKL, TCDScheduler |
|
from diffusers.models.model_loading_utils import load_state_dict |
|
from huggingface_hub import hf_hub_download |
|
from pathlib import Path |
|
import torchaudio |
|
from einops import rearrange |
|
from scipy.io import wavfile |
|
from transformers import pipeline |
|
|
|
|
|
from transformers import AutoModelForImageSegmentation |
|
from torchvision import transforms |
|
|
|
|
|
|
|
try: |
|
from moviepy.editor import VideoFileClip, concatenate_videoclips |
|
except ImportError: |
|
from moviepy import VideoFileClip, concatenate_videoclips |
|
|
|
from moviepy import ( |
|
vfx, |
|
ImageSequenceClip, |
|
concatenate_audioclips, |
|
AudioFileClip, |
|
CompositeAudioClip |
|
) |
|
from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip |
|
from moviepy.video.VideoClip import ColorClip |
|
|
|
|
|
import time |
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
|
|
|
|
|
|
os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1" |
|
|
|
|
|
@spaces.GPU(duration=1) |
|
def gpu_warmup(): |
|
"""GPU μλ°μ
ν¨μ - Spaces νκ²½μμ GPU μ¬μ©μ μν΄ νμ""" |
|
if torch.cuda.is_available(): |
|
dummy = torch.zeros(1).cuda() |
|
del dummy |
|
return "GPU ready" |
|
|
|
|
|
try: |
|
import mmaudio |
|
except ImportError: |
|
os.system("pip install -e .") |
|
import mmaudio |
|
|
|
from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video, |
|
setup_eval_logging) |
|
from mmaudio.model.flow_matching import FlowMatching |
|
from mmaudio.model.networks import MMAudio, get_my_mmaudio |
|
from mmaudio.model.sequence_config import SequenceConfig |
|
from mmaudio.model.utils.features_utils import FeaturesUtils |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
torch.set_float32_matmul_precision("medium") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
torch_dtype = torch.float16 |
|
else: |
|
device = torch.device("cpu") |
|
torch_dtype = torch.float32 |
|
|
|
logging.info(f"Using device: {device}") |
|
|
|
|
|
MODELS_LOADED = False |
|
BIREFNET_MODEL = None |
|
BIREFNET_LITE_MODEL = None |
|
OUTPAINT_PIPE = None |
|
MMAUDIO_NET = None |
|
MMAUDIO_FEATURE_UTILS = None |
|
MMAUDIO_SEQ_CFG = None |
|
TRANSLATOR = None |
|
|
|
|
|
TEXT2IMG_API_URL = "http://211.233.58.201:7896" |
|
VIDEO_API_URL = "http://211.233.58.201:7875" |
|
|
|
|
|
IMAGE_PRESETS = { |
|
"컀μ€ν
": {"width": 1024, "height": 1024}, |
|
"1:1 μ μ¬κ°ν": {"width": 1024, "height": 1024}, |
|
"4:3 νμ€": {"width": 1024, "height": 768}, |
|
"16:9 μμ΄λμ€ν¬λ¦°": {"width": 1024, "height": 576}, |
|
"9:16 μΈλ‘ν": {"width": 576, "height": 1024}, |
|
"6:19 νΉμ μΈλ‘ν": {"width": 324, "height": 1024}, |
|
"Instagram μ μ¬κ°ν": {"width": 1080, "height": 1080}, |
|
"Instagram μ€ν 리": {"width": 1080, "height": 1920}, |
|
"Instagram κ°λ‘ν": {"width": 1080, "height": 566}, |
|
"Facebook 컀λ²": {"width": 820, "height": 312}, |
|
"Twitter ν€λ": {"width": 1500, "height": 500}, |
|
"YouTube μΈλ€μΌ": {"width": 1280, "height": 720}, |
|
"LinkedIn λ°°λ": {"width": 1584, "height": 396}, |
|
} |
|
|
|
|
|
transform_image = transforms.Compose([ |
|
transforms.Resize((768, 768)), |
|
transforms.ToTensor(), |
|
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), |
|
]) |
|
|
|
@spaces.GPU(duration=60) |
|
def load_models(): |
|
"""λͺ¨λ λͺ¨λΈμ λ‘λνλ ν¨μ""" |
|
global MODELS_LOADED, BIREFNET_MODEL, BIREFNET_LITE_MODEL, OUTPAINT_PIPE |
|
global MMAUDIO_NET, MMAUDIO_FEATURE_UTILS, MMAUDIO_SEQ_CFG, TRANSLATOR |
|
|
|
if MODELS_LOADED: |
|
return True |
|
|
|
try: |
|
|
|
logging.info("Loading BiRefNet models...") |
|
BIREFNET_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True) |
|
BIREFNET_MODEL.to(device) |
|
BIREFNET_LITE_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True) |
|
BIREFNET_LITE_MODEL.to(device) |
|
|
|
|
|
logging.info("Loading ControlNet models...") |
|
from controlnet_union import ControlNetModel_Union |
|
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline |
|
|
|
config_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="config_promax.json", |
|
) |
|
|
|
config = ControlNetModel_Union.load_config(config_file) |
|
controlnet_model = ControlNetModel_Union.from_config(config) |
|
|
|
model_file = hf_hub_download( |
|
"xinsir/controlnet-union-sdxl-1.0", |
|
filename="diffusion_pytorch_model_promax.safetensors", |
|
) |
|
state_dict = load_state_dict(model_file) |
|
loaded_keys = list(state_dict.keys()) |
|
|
|
result = ControlNetModel_Union._load_pretrained_model( |
|
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys |
|
) |
|
|
|
model = result[0] |
|
model = model.to(device=device, dtype=torch_dtype) |
|
|
|
|
|
vae = AutoencoderKL.from_pretrained( |
|
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype |
|
).to(device) |
|
|
|
|
|
OUTPAINT_PIPE = StableDiffusionXLFillPipeline.from_pretrained( |
|
"SG161222/RealVisXL_V5.0_Lightning", |
|
torch_dtype=torch_dtype, |
|
vae=vae, |
|
controlnet=model, |
|
variant="fp16" if device.type == "cuda" else None, |
|
).to(device) |
|
|
|
OUTPAINT_PIPE.scheduler = TCDScheduler.from_config(OUTPAINT_PIPE.scheduler.config) |
|
|
|
|
|
logging.info("Loading MMAudio models...") |
|
model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2'] |
|
model_mmaudio.download_if_needed() |
|
setup_eval_logging() |
|
|
|
|
|
try: |
|
TRANSLATOR = pipeline("translation", |
|
model="Helsinki-NLP/opus-mt-ko-en", |
|
device="cpu", |
|
use_fast=True, |
|
trust_remote_code=False) |
|
except Exception as e: |
|
logging.warning(f"Failed to load translation model: {e}") |
|
TRANSLATOR = None |
|
|
|
|
|
if torch.cuda.is_available(): |
|
mmaudio_dtype = torch.bfloat16 |
|
else: |
|
mmaudio_dtype = torch.float32 |
|
|
|
with torch.cuda.device(device): |
|
MMAUDIO_SEQ_CFG = model_mmaudio.seq_cfg |
|
MMAUDIO_NET = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval() |
|
MMAUDIO_NET.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True)) |
|
logging.info(f'Loaded weights from {model_mmaudio.model_path}') |
|
|
|
MMAUDIO_FEATURE_UTILS = FeaturesUtils( |
|
tod_vae_ckpt=model_mmaudio.vae_path, |
|
synchformer_ckpt=model_mmaudio.synchformer_ckpt, |
|
enable_conditions=True, |
|
mode=model_mmaudio.mode, |
|
bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path, |
|
need_vae_encoder=False |
|
).to(device, mmaudio_dtype).eval() |
|
|
|
MODELS_LOADED = True |
|
logging.info("All models loaded successfully!") |
|
return True |
|
|
|
except Exception as e: |
|
logging.error(f"Failed to load models: {str(e)}") |
|
return False |
|
|
|
|
|
def update_dimensions(preset): |
|
if preset in IMAGE_PRESETS: |
|
return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"] |
|
return 1024, 1024 |
|
|
|
def generate_text_to_image(prompt, width, height, guidance, inference_steps, seed): |
|
if not prompt: |
|
return None, "ν둬ννΈλ₯Ό μ
λ ₯ν΄μ£ΌμΈμ" |
|
|
|
try: |
|
client = Client(TEXT2IMG_API_URL) |
|
if seed == -1: |
|
seed = random.randint(0, 9999999) |
|
|
|
result = client.predict( |
|
prompt=prompt, |
|
width=int(width), |
|
height=int(height), |
|
guidance=float(guidance), |
|
inference_steps=int(inference_steps), |
|
seed=int(seed), |
|
do_img2img=False, |
|
init_image=None, |
|
image2image_strength=0.8, |
|
resize_img=True, |
|
api_name="/generate_image" |
|
) |
|
return result[0], f"μ¬μ©λ μλ: {result[1]}" |
|
except Exception as e: |
|
logging.error(f"Image generation error: {str(e)}") |
|
return None, f"μ€λ₯: {str(e)}" |
|
|
|
def generate_video_from_image(image, prompt="", length=4.0): |
|
if image is None: |
|
return None |
|
|
|
try: |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as fp: |
|
temp_path = fp.name |
|
Image.fromarray(image).save(temp_path) |
|
|
|
|
|
client = Client(VIDEO_API_URL) |
|
result = client.predict( |
|
input_image=handle_file(temp_path), |
|
prompt=prompt if prompt else "Generate natural motion", |
|
n_prompt="", |
|
seed=random.randint(0, 9999999), |
|
use_teacache=True, |
|
video_length=float(length), |
|
api_name="/process" |
|
) |
|
|
|
os.unlink(temp_path) |
|
|
|
if result and len(result) > 0: |
|
video_dict = result[0] |
|
return video_dict.get("video") if isinstance(video_dict, dict) else None |
|
|
|
except Exception as e: |
|
logging.error(f"Video generation error: {str(e)}") |
|
return None |
|
|
|
def prepare_image_and_mask(image, width, height, overlap_percentage, alignment): |
|
"""μ΄λ―Έμ§μ λ§μ€ν¬λ₯Ό μ€λΉνλ ν¨μ""" |
|
if image is None: |
|
return None, None |
|
|
|
|
|
if isinstance(image, np.ndarray): |
|
image = Image.fromarray(image).convert('RGB') |
|
|
|
target_size = (width, height) |
|
|
|
|
|
scale_factor = min(target_size[0] / image.width, target_size[1] / image.height) |
|
new_width = int(image.width * scale_factor) |
|
new_height = int(image.height * scale_factor) |
|
|
|
|
|
source = image.resize((new_width, new_height), Image.LANCZOS) |
|
|
|
|
|
overlap_x = int(new_width * (overlap_percentage / 100)) |
|
overlap_y = int(new_height * (overlap_percentage / 100)) |
|
overlap_x = max(overlap_x, 1) |
|
overlap_y = max(overlap_y, 1) |
|
|
|
|
|
if alignment == "κ°μ΄λ°": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "μΌμͺ½": |
|
margin_x = 0 |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "μ€λ₯Έμͺ½": |
|
margin_x = target_size[0] - new_width |
|
margin_y = (target_size[1] - new_height) // 2 |
|
elif alignment == "μ": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = 0 |
|
elif alignment == "μλ": |
|
margin_x = (target_size[0] - new_width) // 2 |
|
margin_y = target_size[1] - new_height |
|
|
|
|
|
background = Image.new('RGB', target_size, (255, 255, 255)) |
|
background.paste(source, (margin_x, margin_y)) |
|
|
|
|
|
mask = Image.new('L', target_size, 255) |
|
mask_draw = ImageDraw.Draw(mask) |
|
|
|
|
|
left_overlap = margin_x + overlap_x if alignment != "μΌμͺ½" else margin_x |
|
right_overlap = margin_x + new_width - overlap_x if alignment != "μ€λ₯Έμͺ½" else margin_x + new_width |
|
top_overlap = margin_y + overlap_y if alignment != "μ" else margin_y |
|
bottom_overlap = margin_y + new_height - overlap_y if alignment != "μλ" else margin_y + new_height |
|
|
|
mask_draw.rectangle([ |
|
(left_overlap, top_overlap), |
|
(right_overlap, bottom_overlap) |
|
], fill=0) |
|
|
|
return background, mask |
|
|
|
def preview_outpaint(image, width, height, overlap_percentage, alignment): |
|
"""μμνμΈν
미리보기""" |
|
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment) |
|
if background is None: |
|
return None |
|
|
|
|
|
preview = background.copy().convert('RGBA') |
|
|
|
|
|
red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) |
|
|
|
|
|
red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0)) |
|
red_mask.paste(red_overlay, (0, 0), mask) |
|
|
|
|
|
preview = Image.alpha_composite(preview, red_mask) |
|
|
|
return preview |
|
|
|
@spaces.GPU(duration=120) |
|
def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8): |
|
"""μ΄λ―Έμ§ μμνμΈν
μ€ν""" |
|
if image is None: |
|
return None |
|
|
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if OUTPAINT_PIPE is None: |
|
return Image.new('RGB', (width, height), (200, 200, 200)) |
|
|
|
try: |
|
|
|
background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment) |
|
if background is None: |
|
return None |
|
|
|
|
|
cnet_image = background.copy() |
|
cnet_image.paste(0, (0, 0), mask) |
|
|
|
|
|
final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k" |
|
|
|
|
|
with torch.autocast(device_type=device.type, dtype=torch_dtype): |
|
( |
|
prompt_embeds, |
|
negative_prompt_embeds, |
|
pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds, |
|
) = OUTPAINT_PIPE.encode_prompt(final_prompt, str(device), True) |
|
|
|
|
|
for generated_image in OUTPAINT_PIPE( |
|
prompt_embeds=prompt_embeds, |
|
negative_prompt_embeds=negative_prompt_embeds, |
|
pooled_prompt_embeds=pooled_prompt_embeds, |
|
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds, |
|
image=cnet_image, |
|
num_inference_steps=num_steps |
|
): |
|
|
|
pass |
|
|
|
|
|
final_image = generated_image |
|
|
|
|
|
final_image = final_image.convert("RGBA") |
|
cnet_image.paste(final_image, (0, 0), mask) |
|
|
|
return cnet_image |
|
|
|
except Exception as e: |
|
logging.error(f"Outpainting error: {str(e)}") |
|
return background if 'background' in locals() else None |
|
|
|
|
|
def translate_prompt(text): |
|
try: |
|
if TRANSLATOR is None: |
|
return text |
|
|
|
if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text): |
|
with torch.no_grad(): |
|
translation = TRANSLATOR(text)[0]['translation_text'] |
|
return translation |
|
return text |
|
except Exception as e: |
|
logging.error(f"Translation error: {e}") |
|
return text |
|
|
|
@spaces.GPU(duration=120) |
|
@torch.inference_mode() |
|
def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int, |
|
cfg_strength: float, duration: float): |
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if MMAUDIO_NET is None: |
|
return None |
|
|
|
prompt = translate_prompt(prompt) |
|
negative_prompt = translate_prompt(negative_prompt) |
|
|
|
rng = torch.Generator(device=device) |
|
rng.manual_seed(seed) |
|
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
|
clip_frames, sync_frames, duration = load_video(video, duration) |
|
clip_frames = clip_frames.unsqueeze(0) |
|
sync_frames = sync_frames.unsqueeze(0) |
|
MMAUDIO_SEQ_CFG.duration = duration |
|
MMAUDIO_NET.update_seq_lengths(MMAUDIO_SEQ_CFG.latent_seq_len, MMAUDIO_SEQ_CFG.clip_seq_len, MMAUDIO_SEQ_CFG.sync_seq_len) |
|
|
|
audios = generate(clip_frames, |
|
sync_frames, [prompt], |
|
negative_text=[negative_prompt], |
|
feature_utils=MMAUDIO_FEATURE_UTILS, |
|
net=MMAUDIO_NET, |
|
fm=fm, |
|
rng=rng, |
|
cfg_strength=cfg_strength) |
|
audio = audios.float().cpu()[0] |
|
|
|
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name |
|
make_video(video, |
|
video_save_path, |
|
audio, |
|
sampling_rate=MMAUDIO_SEQ_CFG.sampling_rate, |
|
duration_sec=MMAUDIO_SEQ_CFG.duration) |
|
return video_save_path |
|
|
|
|
|
def process_bg_image(image, bg, fast_mode=False): |
|
"""λ¨μΌ μ΄λ―Έμ§ λ°°κ²½ μ²λ¦¬""" |
|
if BIREFNET_MODEL is None or BIREFNET_LITE_MODEL is None: |
|
return image |
|
|
|
image_size = image.size |
|
input_images = transform_image(image).unsqueeze(0).to(device) |
|
model = BIREFNET_LITE_MODEL if fast_mode else BIREFNET_MODEL |
|
|
|
with torch.no_grad(): |
|
preds = model(input_images)[-1].sigmoid().cpu() |
|
pred = preds[0].squeeze() |
|
pred_pil = transforms.ToPILImage()(pred) |
|
mask = pred_pil.resize(image_size) |
|
|
|
if isinstance(bg, str) and bg.startswith("#"): |
|
color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5)) |
|
background = Image.new("RGBA", image_size, color_rgb + (255,)) |
|
elif isinstance(bg, Image.Image): |
|
background = bg.convert("RGBA").resize(image_size) |
|
else: |
|
background = Image.open(bg).convert("RGBA").resize(image_size) |
|
|
|
image = Image.composite(image, background, mask) |
|
return image |
|
|
|
def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color): |
|
"""λΉλμ€ νλ μ μ²λ¦¬""" |
|
try: |
|
pil_image = Image.fromarray(frame) |
|
if bg_type == "μμ": |
|
processed_image = process_bg_image(pil_image, color, fast_mode) |
|
elif bg_type == "μ΄λ―Έμ§": |
|
processed_image = process_bg_image(pil_image, bg, fast_mode) |
|
elif bg_type == "λΉλμ€": |
|
background_frame = background_frames[bg_frame_index] |
|
bg_frame_index += 1 |
|
background_image = Image.fromarray(background_frame) |
|
processed_image = process_bg_image(pil_image, background_image, fast_mode) |
|
else: |
|
processed_image = pil_image |
|
return np.array(processed_image), bg_frame_index |
|
except Exception as e: |
|
print(f"Error processing frame: {e}") |
|
return frame, bg_frame_index |
|
|
|
@spaces.GPU(duration=300) |
|
def process_video_bg(vid, bg_type="μμ", bg_image=None, bg_video=None, color="#00FF00", |
|
fps=0, video_handling="slow_down", fast_mode=True, max_workers=10): |
|
"""λΉλμ€ λ°°κ²½ μ²λ¦¬ λ©μΈ ν¨μ""" |
|
|
|
if not MODELS_LOADED: |
|
load_models() |
|
|
|
if BIREFNET_MODEL is None: |
|
yield gr.update(visible=False), gr.update(visible=True), "BiRefNet λͺ¨λΈμ λ‘λνμ§ λͺ»νμ΅λλ€." |
|
yield None, None, "BiRefNet λͺ¨λΈμ λ‘λνμ§ λͺ»νμ΅λλ€." |
|
return |
|
|
|
try: |
|
start_time = time.time() |
|
video = VideoFileClip(vid) |
|
if fps == 0: |
|
fps = video.fps |
|
|
|
audio = video.audio |
|
frames = list(video.iter_frames(fps=fps)) |
|
|
|
processed_frames = [] |
|
yield gr.update(visible=True), gr.update(visible=False), f"μ²λ¦¬ μμ... κ²½κ³Ό μκ°: 0μ΄" |
|
|
|
if bg_type == "λΉλμ€": |
|
background_video = VideoFileClip(bg_video) |
|
if background_video.duration < video.duration: |
|
if video_handling == "slow_down": |
|
background_video = background_video.fx(vfx.speedx, factor=video.duration / background_video.duration) |
|
else: |
|
background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1)) |
|
background_frames = list(background_video.iter_frames(fps=fps)) |
|
else: |
|
background_frames = None |
|
|
|
bg_frame_index = 0 |
|
|
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
futures = [executor.submit(process_video_frame, frames[i], bg_type, bg_image, fast_mode, |
|
bg_frame_index + i, background_frames, color) for i in range(len(frames))] |
|
for i, future in enumerate(futures): |
|
result, _ = future.result() |
|
processed_frames.append(result) |
|
elapsed_time = time.time() - start_time |
|
yield result, None, f"νλ μ {i+1}/{len(frames)} μ²λ¦¬ μ€... κ²½κ³Ό μκ°: {elapsed_time:.2f}μ΄" |
|
|
|
processed_video = ImageSequenceClip(processed_frames, fps=fps) |
|
processed_video = processed_video.with_audio(audio) |
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: |
|
temp_filepath = temp_file.name |
|
processed_video.write_videofile(temp_filepath, codec="libx264") |
|
|
|
elapsed_time = time.time() - start_time |
|
yield gr.update(visible=False), gr.update(visible=True), f"μ²λ¦¬ μλ£! κ²½κ³Ό μκ°: {elapsed_time:.2f}μ΄" |
|
yield processed_frames[-1], temp_filepath, f"μ²λ¦¬ μλ£! κ²½κ³Ό μκ°: {elapsed_time:.2f}μ΄" |
|
|
|
except Exception as e: |
|
print(f"Error: {e}") |
|
elapsed_time = time.time() - start_time |
|
yield gr.update(visible=False), gr.update(visible=True), f"λΉλμ€ μ²λ¦¬ μ€λ₯: {e}. κ²½κ³Ό μκ°: {elapsed_time:.2f}μ΄" |
|
yield None, None, f"λΉλμ€ μ²λ¦¬ μ€λ₯: {e}. κ²½κ³Ό μκ°: {elapsed_time:.2f}μ΄" |
|
|
|
@spaces.GPU(duration=180) |
|
def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps): |
|
"""μ¬λ¬ λΉλμ€λ₯Ό λ³ν©νκ³ μ€λμ€λ₯Ό μΆκ°νλ ν¨μ""" |
|
if not video_files: |
|
return None, "λΉλμ€ νμΌμ μ
λ‘λν΄μ£ΌμΈμ." |
|
|
|
if isinstance(video_files, list) and len(video_files) > 10: |
|
return None, "μ΅λ 10κ°μ λΉλμ€λ§ μ
λ‘λ κ°λ₯ν©λλ€." |
|
|
|
try: |
|
|
|
status = "λΉλμ€ νμΌ μ λ ¬ μ€..." |
|
|
|
|
|
video_paths = [] |
|
if isinstance(video_files, list): |
|
for video_file in video_files: |
|
if video_file is not None: |
|
video_paths.append(video_file) |
|
else: |
|
video_paths.append(video_files) |
|
|
|
|
|
video_paths.sort(key=lambda x: os.path.basename(x)) |
|
|
|
status = f"{len(video_paths)}κ°μ λΉλμ€ λ‘λ μ€..." |
|
|
|
|
|
video_clips = [] |
|
clip_sizes = [] |
|
|
|
for i, video_path in enumerate(video_paths): |
|
status = f"λΉλμ€ {i+1}/{len(video_paths)} λ‘λ μ€: {os.path.basename(video_path)}" |
|
clip = VideoFileClip(video_path) |
|
video_clips.append(clip) |
|
|
|
|
|
try: |
|
clip_sizes.append((clip.w, clip.h)) |
|
except: |
|
clip_sizes.append(clip.size) |
|
|
|
|
|
target_width, target_height = clip_sizes[0] |
|
|
|
|
|
all_same_size = all(size == (target_width, target_height) for size in clip_sizes) |
|
|
|
if not all_same_size: |
|
logging.warning(f"λΉλμ€ ν¬κΈ°κ° μλ‘ λ€λ¦
λλ€. 첫 λ²μ§Έ λΉλμ€ ν¬κΈ°({target_width}x{target_height})λ‘ μ‘°μ ν©λλ€.") |
|
|
|
|
|
adjusted_clips = [] |
|
for clip, size in zip(video_clips, clip_sizes): |
|
if size != (target_width, target_height): |
|
adjusted_clip = vfx.resize(clip, newsize=(target_width, target_height)) |
|
|
|
adjusted_clips.append(adjusted_clip) |
|
else: |
|
adjusted_clips.append(clip) |
|
|
|
video_clips = adjusted_clips |
|
|
|
|
|
if output_fps == 0: |
|
output_fps = video_clips[0].fps |
|
|
|
status = "λΉλμ€ λ³ν© μ€..." |
|
|
|
|
|
final_video = concatenate_videoclips(video_clips, method="compose") |
|
|
|
|
|
if audio_file: |
|
status = "μ€λμ€ μ²λ¦¬ μ€..." |
|
|
|
try: |
|
|
|
if isinstance(audio_file, str): |
|
audio_path = audio_file |
|
else: |
|
|
|
audio_path = audio_file |
|
|
|
logging.info(f"Processing audio from: {audio_path}") |
|
|
|
|
|
if audio_path.endswith(('.mp4', '.avi', '.mov', '.mkv')): |
|
|
|
temp_video = VideoFileClip(audio_path) |
|
audio_clip = temp_video.audio |
|
temp_video.close() |
|
else: |
|
|
|
audio_clip = AudioFileClip(audio_path) |
|
|
|
if audio_clip is None: |
|
raise ValueError("μ€λμ€λ₯Ό λ‘λν μ μμ΅λλ€.") |
|
|
|
|
|
if audio_volume != 100: |
|
audio_clip = audio_clip.volumex(audio_volume / 100) |
|
|
|
|
|
video_duration = final_video.duration |
|
audio_duration = audio_clip.duration |
|
|
|
if audio_duration > video_duration: |
|
|
|
audio_clip = audio_clip.subclip(0, video_duration) |
|
elif audio_duration < video_duration: |
|
|
|
loops_needed = int(video_duration / audio_duration) + 1 |
|
audio_clips_list = [audio_clip] * loops_needed |
|
looped_audio = concatenate_audioclips(audio_clips_list) |
|
audio_clip = looped_audio.subclip(0, video_duration) |
|
|
|
|
|
|
|
final_video = final_video.set_audio(audio_clip) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logging.info("Audio successfully added to video") |
|
|
|
except Exception as e: |
|
logging.error(f"μ€λμ€ μ²λ¦¬ μ€ μ€λ₯ λ°μ: {str(e)}") |
|
|
|
status = f"μ€λμ€ μ²λ¦¬ μ€ν¨: {str(e)}, λΉλμ€λ§ λ³ν©ν©λλ€." |
|
|
|
status = "λΉλμ€ μ μ₯ μ€..." |
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: |
|
temp_filepath = temp_file.name |
|
|
|
|
|
final_video.write_videofile( |
|
temp_filepath, |
|
fps=output_fps, |
|
codec="libx264", |
|
audio_codec="aac", |
|
preset="medium", |
|
bitrate="5000k", |
|
audio_bitrate="192k" |
|
) |
|
|
|
|
|
for clip in video_clips: |
|
clip.close() |
|
if 'adjusted_clips' in locals(): |
|
for clip in adjusted_clips: |
|
if clip not in video_clips: |
|
clip.close() |
|
if audio_file and 'audio_clip' in locals(): |
|
audio_clip.close() |
|
final_video.close() |
|
|
|
return temp_filepath, f"β
μ±κ³΅μ μΌλ‘ {len(video_paths)}κ°μ λΉλμ€λ₯Ό λ³ν©νμ΅λλ€! (ν¬κΈ°: {target_width}x{target_height})" |
|
|
|
except Exception as e: |
|
logging.error(f"Video merge error: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return None, f"β μ€λ₯ λ°μ: {str(e)}" |
|
|
|
|
|
css = """ |
|
:root { |
|
--primary-color: #f8c3cd; |
|
--secondary-color: #b3e5fc; |
|
--background-color: #f5f5f7; |
|
--card-background: #ffffff; |
|
--text-color: #424242; |
|
--accent-color: #ffb6c1; |
|
--success-color: #c8e6c9; |
|
--warning-color: #fff9c4; |
|
--shadow-color: rgba(0, 0, 0, 0.1); |
|
--border-radius: 12px; |
|
} |
|
.gradio-container { |
|
max-width: 1200px !important; |
|
margin: 0 auto !important; |
|
} |
|
.panel-box { |
|
border-radius: var(--border-radius) !important; |
|
box-shadow: 0 8px 16px var(--shadow-color) !important; |
|
background-color: var(--card-background) !important; |
|
padding: 20px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
#generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn, #merge-btn { |
|
background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important; |
|
font-size: 1.1rem !important; |
|
padding: 12px 24px !important; |
|
margin-top: 10px !important; |
|
width: 100% !important; |
|
} |
|
.tabitem { |
|
min-height: 700px !important; |
|
} |
|
""" |
|
|
|
|
|
demo = gr.Blocks(css=css, title="AI μ΄λ―Έμ§ & λΉλμ€ & μ€λμ€ μμ±κΈ°") |
|
|
|
with demo: |
|
gr.Markdown("# π¨ Ginigen μ€νλμ€") |
|
gr.Markdown("μ²μ μ¬μ© μ λͺ¨λΈ λ‘λ©μ μκ°μ΄ 걸릴 μ μμ΅λλ€. μ μλ§ κΈ°λ€λ €μ£ΌμΈμ.") |
|
|
|
|
|
model_status = gr.Textbox(label="λͺ¨λΈ μν", value="λͺ¨λΈ λ‘λ© λκΈ° μ€...", interactive=False) |
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.Tab("ν
μ€νΈβμ΄λ―Έμ§βλΉλμ€", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π μ΄λ―Έμ§ μμ± μ€μ ") |
|
|
|
prompt = gr.Textbox( |
|
label="ν둬ννΈ(νκΈ/μμ΄ κ°λ₯)", |
|
placeholder="μμ±νκ³ μΆμ μ΄λ―Έμ§λ₯Ό μ€λͺ
νμΈμ...", |
|
lines=3 |
|
) |
|
|
|
size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="1:1 μ μ¬κ°ν", |
|
label="ν¬κΈ° ν리μ
" |
|
) |
|
|
|
with gr.Row(): |
|
width = gr.Slider(256, 2048, 1024, step=64, label="λλΉ") |
|
height = gr.Slider(256, 2048, 1024, step=64, label="λμ΄") |
|
|
|
with gr.Row(): |
|
guidance = gr.Slider(1.0, 20.0, 3.5, step=0.1, label="κ°μ΄λμ€") |
|
steps = gr.Slider(1, 50, 30, step=1, label="μ€ν
") |
|
|
|
seed = gr.Number(label="μλ (-1=λλ€)", value=-1) |
|
|
|
generate_btn = gr.Button("π¨ μ΄λ―Έμ§ μμ±", variant="primary", elem_id="generate-btn") |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π¬ λΉλμ€ μμ± μ€μ ") |
|
|
|
video_prompt = gr.Textbox( |
|
label="(μ ν) λΉλμ€ ν둬ννΈ(μμ΄λ‘ μ
λ ₯)", |
|
placeholder="λΉλμ€μ μμ§μμ μ€λͺ
νμΈμ... (λΉμλλ©΄ κΈ°λ³Έ μμ§μ μ μ©)", |
|
lines=2 |
|
) |
|
|
|
video_length = gr.Slider( |
|
minimum=1, |
|
maximum=60, |
|
value=4, |
|
step=0.5, |
|
label="λΉλμ€ κΈΈμ΄ (μ΄)", |
|
info="1μ΄μμ 60μ΄κΉμ§ μ ν κ°λ₯ν©λλ€" |
|
) |
|
|
|
video_btn = gr.Button("π¬ λΉλμ€λ‘ λ³ν", variant="secondary", elem_id="video-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### πΌοΈ μμ± κ²°κ³Ό") |
|
|
|
output_image = gr.Image(label="μμ±λ μ΄λ―Έμ§", type="numpy") |
|
output_seed = gr.Textbox(label="μλ μ 보") |
|
output_video = gr.Video(label="μμ±λ λΉλμ€") |
|
|
|
|
|
with gr.Tab("μ΄λ―Έμ§ λΉμ¨ λ³κ²½/μμ±", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### πΌοΈ μ΄λ―Έμ§ μ
λ‘λ") |
|
|
|
input_image = gr.Image( |
|
label="μλ³Έ μ΄λ―Έμ§", |
|
type="numpy" |
|
) |
|
|
|
outpaint_prompt = gr.Textbox( |
|
label="ν둬ννΈ (μ ν)", |
|
placeholder="νμ₯ν μμμ λν μ€λͺ
...", |
|
lines=2 |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### βοΈ μμνμΈν
μ€μ ") |
|
|
|
outpaint_size_preset = gr.Dropdown( |
|
choices=list(IMAGE_PRESETS.keys()), |
|
value="16:9 μμ΄λμ€ν¬λ¦°", |
|
label="λͺ©ν ν¬κΈ° ν리μ
" |
|
) |
|
|
|
with gr.Row(): |
|
outpaint_width = gr.Slider(256, 2048, 1280, step=64, label="λͺ©ν λλΉ") |
|
outpaint_height = gr.Slider(256, 2048, 720, step=64, label="λͺ©ν λμ΄") |
|
|
|
alignment = gr.Dropdown( |
|
choices=["κ°μ΄λ°", "μΌμͺ½", "μ€λ₯Έμͺ½", "μ", "μλ"], |
|
value="κ°μ΄λ°", |
|
label="μ λ ¬" |
|
) |
|
|
|
overlap_percentage = gr.Slider( |
|
minimum=1, |
|
maximum=50, |
|
value=10, |
|
step=1, |
|
label="λ§μ€ν¬ μ€λ²λ© (%)" |
|
) |
|
|
|
outpaint_steps = gr.Slider( |
|
minimum=4, |
|
maximum=12, |
|
value=8, |
|
step=1, |
|
label="μΆλ‘ μ€ν
" |
|
) |
|
|
|
preview_btn = gr.Button("ποΈ λ―Έλ¦¬λ³΄κΈ°", elem_id="preview-btn") |
|
outpaint_btn = gr.Button("π¨ μμνμΈν
μ€ν", variant="primary", elem_id="outpaint-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### πΌοΈ κ²°κ³Ό") |
|
|
|
preview_image = gr.Image(label="미리보기") |
|
outpaint_result = gr.Image(label="μμνμΈν
κ²°κ³Ό") |
|
|
|
|
|
with gr.Tab("λΉλμ€ + μ€λμ€", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π₯ λΉλμ€ μ
λ‘λ") |
|
|
|
audio_video_input = gr.Video( |
|
label="μ
λ ₯ λΉλμ€", |
|
sources=["upload"] |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π΅ μ€λμ€ μμ± μ€μ ") |
|
|
|
audio_prompt = gr.Textbox( |
|
label="ν둬ννΈ (νκΈ μ§μ)", |
|
placeholder="μμ±νκ³ μΆμ μ€λμ€λ₯Ό μ€λͺ
νμΈμ... (μ: ννλ‘μ΄ νΌμλ
Έ μμ
)", |
|
lines=3 |
|
) |
|
|
|
audio_negative_prompt = gr.Textbox( |
|
label="λ€κ±°ν°λΈ ν둬ννΈ", |
|
value="music", |
|
placeholder="μνμ§ μλ μμ...", |
|
lines=2 |
|
) |
|
|
|
with gr.Row(): |
|
audio_seed = gr.Number(label="μλ", value=0) |
|
audio_steps = gr.Number(label="μ€ν
", value=25) |
|
|
|
with gr.Row(): |
|
audio_cfg = gr.Number(label="κ°μ΄λμ€ μ€μΌμΌ", value=4.5) |
|
audio_duration = gr.Number(label="μ§μμκ° (μ΄)", value=9999) |
|
|
|
audio_btn = gr.Button("π΅ μ€λμ€ μμ± λ° ν©μ±", variant="primary", elem_id="audio-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π¬ μμ± κ²°κ³Ό") |
|
|
|
output_video_with_audio = gr.Video( |
|
label="μ€λμ€κ° μΆκ°λ λΉλμ€", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Tab("λΉλμ€ νΈμ§", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π₯ λΉλμ€ μ
λ‘λ (μ΅λ 10κ°)") |
|
gr.Markdown("**νμΌλͺ
μ΄ μμμλ‘ μ°μ μμκ° λμ΅λλ€** (μ: 1.mp4, 2.mp4, 3.mp4)") |
|
|
|
video_files = gr.File( |
|
label="λΉλμ€ νμΌλ€", |
|
file_count="multiple", |
|
file_types=["video"], |
|
type="filepath" |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π΅ μ€λμ€ μ€μ (μ ν)") |
|
gr.Markdown("**μ£Όμ**: μ
λ‘λν μ€λμ€κ° λΉλμ€μ κΈ°μ‘΄ μ€λμ€λ₯Ό μμ ν λ체ν©λλ€.") |
|
|
|
audio_file = gr.Audio( |
|
label="μ€λμ€ νμΌ (MP3, WAV, M4A λ±)", |
|
type="filepath", |
|
sources=["upload"] |
|
) |
|
|
|
audio_volume = gr.Slider( |
|
minimum=0, |
|
maximum=200, |
|
value=100, |
|
step=1, |
|
label="μ€λμ€ λ³Όλ₯¨ (%)", |
|
info="100% = μλ³Έ λ³Όλ₯¨" |
|
) |
|
|
|
gr.Markdown(""" |
|
**μ€λμ€ μ΅μ
**: |
|
- μ€λμ€κ° λΉλμ€λ³΄λ€ μ§§μΌλ©΄ μλμΌλ‘ λ°λ³΅λ©λλ€ |
|
- μ€λμ€κ° λΉλμ€λ³΄λ€ κΈΈλ©΄ λΉλμ€ κΈΈμ΄μ λ§μΆ° μ립λλ€ |
|
""") |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### βοΈ νΈμ§ μ€μ ") |
|
|
|
output_fps = gr.Slider( |
|
minimum=0, |
|
maximum=60, |
|
value=0, |
|
step=1, |
|
label="μΆλ ₯ FPS (0 = 첫 λ²μ§Έ λΉλμ€μ FPS μ¬μ©)" |
|
) |
|
|
|
gr.Markdown(""" |
|
**ν¬κΈ° μ²λ¦¬**: |
|
- 첫 λ²μ§Έ λΉλμ€μ ν¬κΈ°κ° κΈ°μ€μ΄ λ©λλ€ |
|
- λ€λ₯Έ ν¬κΈ°μ λΉλμ€λ 첫 λ²μ§Έ λΉλμ€ ν¬κΈ°λ‘ μ‘°μ λ©λλ€ |
|
- μ΅μμ κ²°κ³Όλ₯Ό μν΄ κ°μ ν¬κΈ°μ λΉλμ€λ₯Ό μ¬μ©νμΈμ |
|
""") |
|
|
|
merge_videos_btn = gr.Button("π¬ λΉλμ€ λ³ν©", variant="primary", elem_id="merge-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π¬ λ³ν© κ²°κ³Ό") |
|
|
|
merge_status = gr.Textbox(label="μ²λ¦¬ μν", interactive=False) |
|
merged_video = gr.Video(label="λ³ν©λ λΉλμ€") |
|
|
|
gr.Markdown(""" |
|
### βΉοΈ μ¬μ© λ°©λ² |
|
1. μ¬λ¬ λΉλμ€ νμΌμ μ
λ‘λνμΈμ (μ΅λ 10κ°) |
|
2. νμΌλͺ
μ΄ μμ μμλλ‘ μλ μ λ ¬λ©λλ€ |
|
3. (μ ν) μ€λμ€ νμΌμ μΆκ°νκ³ λ³Όλ₯¨μ μ‘°μ νμΈμ |
|
4. 'λΉλμ€ λ³ν©' λ²νΌμ ν΄λ¦νμΈμ |
|
|
|
**νΉμ§**: |
|
- β
첫 λ²μ§Έ λΉλμ€μ ν¬κΈ°λ₯Ό κΈ°μ€μΌλ‘ ν΅ν© |
|
- β
μ
λ‘λν μ€λμ€κ° μ 체 λΉλμ€μ μ μ©λ©λλ€ |
|
- β
λμ λΉνΈλ μ΄νΈλ‘ νμ§ μ μ§ |
|
|
|
**ν**: |
|
- νμΌλͺ
μ 01.mp4, 02.mp4, 03.mp4 νμμΌλ‘ μ§μ νλ©΄ μμ κ΄λ¦¬κ° μ½μ΅λλ€ |
|
- μ€λμ€λ₯Ό μΆκ°νλ©΄ κΈ°μ‘΄ λΉλμ€μ μ€λμ€λ λ체λ©λλ€ |
|
""") |
|
|
|
|
|
with gr.Tab("λΉλμ€ λ°°κ²½μ κ±°/ν©μ±", elem_classes="tabitem"): |
|
with gr.Row(equal_height=True): |
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π₯ λΉλμ€ μ
λ‘λ") |
|
|
|
bg_video_input = gr.Video( |
|
label="μ
λ ₯ λΉλμ€", |
|
interactive=True |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π¨ λ°°κ²½ μ€μ ") |
|
|
|
bg_type = gr.Radio( |
|
["μμ", "μ΄λ―Έμ§", "λΉλμ€"], |
|
label="λ°°κ²½ μ ν", |
|
value="μμ", |
|
interactive=True |
|
) |
|
|
|
color_picker = gr.ColorPicker( |
|
label="λ°°κ²½ μμ", |
|
value="#00FF00", |
|
visible=True, |
|
interactive=True |
|
) |
|
|
|
bg_image_input = gr.Image( |
|
label="λ°°κ²½ μ΄λ―Έμ§", |
|
type="filepath", |
|
visible=False, |
|
interactive=True |
|
) |
|
|
|
bg_video_bg = gr.Video( |
|
label="λ°°κ²½ λΉλμ€", |
|
visible=False, |
|
interactive=True |
|
) |
|
|
|
with gr.Column(visible=False) as video_handling_options: |
|
video_handling_radio = gr.Radio( |
|
["slow_down", "loop"], |
|
label="λΉλμ€ μ²λ¦¬ λ°©μ", |
|
value="slow_down", |
|
interactive=True, |
|
info="slow_down: λ°°κ²½ λΉλμ€λ₯Ό λλ¦¬κ² μ¬μ, loop: λ°°κ²½ λΉλμ€λ₯Ό λ°λ³΅" |
|
) |
|
|
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### βοΈ μ²λ¦¬ μ€μ ") |
|
|
|
fps_slider = gr.Slider( |
|
minimum=0, |
|
maximum=60, |
|
step=1, |
|
value=0, |
|
label="μΆλ ₯ FPS (0 = μλ³Έ FPS μ μ§)", |
|
interactive=True |
|
) |
|
|
|
fast_mode_checkbox = gr.Checkbox( |
|
label="λΉ λ₯Έ λͺ¨λ (BiRefNet_lite μ¬μ©)", |
|
value=True, |
|
interactive=True |
|
) |
|
|
|
max_workers_slider = gr.Slider( |
|
minimum=1, |
|
maximum=32, |
|
step=1, |
|
value=10, |
|
label="μ΅λ μ컀 μ", |
|
info="λ³λ ¬λ‘ μ²λ¦¬ν νλ μ μ", |
|
interactive=True |
|
) |
|
|
|
bg_remove_btn = gr.Button("π¬ λ°°κ²½ λ³κ²½", variant="primary", elem_id="bg-remove-btn") |
|
|
|
|
|
with gr.Column(scale=1): |
|
with gr.Group(elem_classes="panel-box"): |
|
gr.Markdown("### π¬ μ²λ¦¬ κ²°κ³Ό") |
|
|
|
stream_image = gr.Image(label="μ€μκ° μ€νΈλ¦¬λ°", visible=False) |
|
output_bg_video = gr.Video(label="μ΅μ’
λΉλμ€") |
|
time_textbox = gr.Textbox(label="κ²½κ³Ό μκ°", interactive=False) |
|
|
|
gr.Markdown(""" |
|
### βΉοΈ μ¬μ© λ°©λ² |
|
1. λΉλμ€λ₯Ό μ
λ‘λνμΈμ |
|
2. μνλ λ°°κ²½ μ νμ μ ννμΈμ |
|
3. μ€μ μ μ‘°μ νκ³ 'λ°°κ²½ λ³κ²½' λ²νΌμ ν΄λ¦νμΈμ |
|
|
|
**μ°Έκ³ **: GPU μ νμΌλ‘ ν λ²μ μ½ 200νλ μκΉμ§ μ²λ¦¬ κ°λ₯ν©λλ€. |
|
κΈ΄ λΉλμ€λ μμ μ‘°κ°μΌλ‘ λλμ΄ μ²λ¦¬νμΈμ. |
|
""") |
|
|
|
|
|
def on_demo_load(): |
|
try: |
|
if IS_SPACES: |
|
|
|
gpu_warmup() |
|
|
|
return "λͺ¨λΈ λ‘λ© μ€λΉ μλ£" |
|
except Exception as e: |
|
return f"μ΄κΈ°ν μ€λ₯: {str(e)}" |
|
|
|
|
|
size_preset.change(update_dimensions, [size_preset], [width, height]) |
|
|
|
generate_btn.click( |
|
generate_text_to_image, |
|
[prompt, width, height, guidance, steps, seed], |
|
[output_image, output_seed] |
|
) |
|
|
|
video_btn.click( |
|
lambda img, v_prompt, length: generate_video_from_image(img, v_prompt, length) if img is not None else None, |
|
[output_image, video_prompt, video_length], |
|
[output_video] |
|
) |
|
|
|
|
|
outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height]) |
|
|
|
preview_btn.click( |
|
preview_outpaint, |
|
[input_image, outpaint_width, outpaint_height, overlap_percentage, alignment], |
|
[preview_image] |
|
) |
|
|
|
outpaint_btn.click( |
|
outpaint_image, |
|
[input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps], |
|
[outpaint_result] |
|
) |
|
|
|
|
|
audio_btn.click( |
|
video_to_audio, |
|
[audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration], |
|
[output_video_with_audio] |
|
) |
|
|
|
|
|
merge_videos_btn.click( |
|
merge_videos_with_audio, |
|
inputs=[video_files, audio_file, audio_volume, output_fps], |
|
outputs=[merged_video, merge_status] |
|
) |
|
|
|
|
|
def update_bg_visibility(bg_type): |
|
if bg_type == "μμ": |
|
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) |
|
elif bg_type == "μ΄λ―Έμ§": |
|
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) |
|
elif bg_type == "λΉλμ€": |
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) |
|
else: |
|
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) |
|
|
|
bg_type.change( |
|
update_bg_visibility, |
|
inputs=bg_type, |
|
outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options] |
|
) |
|
|
|
bg_remove_btn.click( |
|
process_video_bg, |
|
inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker, |
|
fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider], |
|
outputs=[stream_image, output_bg_video, time_textbox] |
|
) |
|
|
|
|
|
demo.load(on_demo_load, outputs=model_status) |
|
|
|
if __name__ == "__main__": |
|
|
|
if IS_SPACES: |
|
try: |
|
gpu_warmup() |
|
except: |
|
pass |
|
|
|
demo.launch() |