openfree commited on
Commit
0712469
ยท
verified ยท
1 Parent(s): fe2ed65

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +847 -134
app-backup.py CHANGED
@@ -1,10 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  from PIL import Image, ImageDraw
4
  from gradio_client import Client, handle_file
5
  import random
6
  import tempfile
7
- import os
8
  import logging
9
  import torch
10
  from diffusers import AutoencoderKL, TCDScheduler
@@ -16,22 +31,139 @@ from einops import rearrange
16
  from scipy.io import wavfile
17
  from transformers import pipeline
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •์œผ๋กœ torch.load ์ฒดํฌ ์šฐํšŒ (์ž„์‹œ ํ•ด๊ฒฐ์ฑ…)
20
  os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
21
 
22
- # Spaces GPU
23
- try:
24
- import spaces
25
- except:
26
- # GPU ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ๊ฐ€ ์—†์„ ๋•Œ๋ฅผ ์œ„ํ•œ ๋”๋ฏธ ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ
27
- class spaces:
28
- @staticmethod
29
- def GPU(duration=None):
30
- def decorator(func):
31
- return func
32
- return decorator
33
 
34
- # MMAudio imports
35
  try:
36
  import mmaudio
37
  except ImportError:
@@ -45,116 +177,36 @@ from mmaudio.model.networks import MMAudio, get_my_mmaudio
45
  from mmaudio.model.sequence_config import SequenceConfig
46
  from mmaudio.model.utils.features_utils import FeaturesUtils
47
 
48
- # ControlNet ๋ชจ๋ธ ๋กœ๋“œ
49
- try:
50
- from controlnet_union import ControlNetModel_Union
51
- from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
52
-
53
- # ControlNet ์„ค์ • ๋ฐ ๋กœ๋“œ
54
- config_file = hf_hub_download(
55
- "xinsir/controlnet-union-sdxl-1.0",
56
- filename="config_promax.json",
57
- )
58
-
59
- config = ControlNetModel_Union.load_config(config_file)
60
- controlnet_model = ControlNetModel_Union.from_config(config)
61
-
62
- model_file = hf_hub_download(
63
- "xinsir/controlnet-union-sdxl-1.0",
64
- filename="diffusion_pytorch_model_promax.safetensors",
65
- )
66
- state_dict = load_state_dict(model_file)
67
- loaded_keys = list(state_dict.keys())
68
-
69
- result = ControlNetModel_Union._load_pretrained_model(
70
- controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
71
- )
72
-
73
- model = result[0]
74
- model = model.to(device="cuda", dtype=torch.float16)
75
-
76
- # VAE ๋กœ๋“œ
77
- vae = AutoencoderKL.from_pretrained(
78
- "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
79
- ).to("cuda")
80
-
81
- # ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
82
- pipe = StableDiffusionXLFillPipeline.from_pretrained(
83
- "SG161222/RealVisXL_V5.0_Lightning",
84
- torch_dtype=torch.float16,
85
- vae=vae,
86
- controlnet=model,
87
- variant="fp16",
88
- ).to("cuda")
89
-
90
- pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
91
-
92
- OUTPAINT_MODEL_LOADED = True
93
- except Exception as e:
94
- logging.error(f"Failed to load outpainting models: {str(e)}")
95
- OUTPAINT_MODEL_LOADED = False
96
 
97
- # MMAudio ๋ชจ๋ธ ์„ค์ •
98
  if torch.cuda.is_available():
99
  device = torch.device("cuda")
100
- torch.backends.cuda.matmul.allow_tf32 = True
101
- torch.backends.cudnn.allow_tf32 = True
102
- torch.backends.cudnn.benchmark = True
103
  else:
104
  device = torch.device("cpu")
 
105
 
106
- dtype = torch.bfloat16
107
-
108
- # MMAudio ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
109
- try:
110
- model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
111
- model_mmaudio.download_if_needed()
112
- output_dir = Path('./output/gradio')
113
- setup_eval_logging()
114
-
115
- # ๋ฒˆ์—ญ๊ธฐ ์„ค์ •
116
- try:
117
- translator = pipeline("translation",
118
- model="Helsinki-NLP/opus-mt-ko-en",
119
- device="cpu",
120
- use_fast=True,
121
- trust_remote_code=False)
122
- except Exception as e:
123
- logging.warning(f"Failed to load translation model: {e}")
124
- translator = None
125
-
126
- def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
127
- with torch.cuda.device(device):
128
- seq_cfg = model_mmaudio.seq_cfg
129
- net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, dtype).eval()
130
- net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
131
- logging.info(f'Loaded weights from {model_mmaudio.model_path}')
132
-
133
- feature_utils = FeaturesUtils(
134
- tod_vae_ckpt=model_mmaudio.vae_path,
135
- synchformer_ckpt=model_mmaudio.synchformer_ckpt,
136
- enable_conditions=True,
137
- mode=model_mmaudio.mode,
138
- bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
139
- need_vae_encoder=False
140
- ).to(device, dtype).eval()
141
-
142
- return net, feature_utils, seq_cfg
143
 
144
- net_mmaudio, feature_utils, seq_cfg = get_mmaudio_model()
145
- MMAUDIO_MODEL_LOADED = True
146
- except Exception as e:
147
- logging.error(f"Failed to load MMAudio models: {str(e)}")
148
- MMAUDIO_MODEL_LOADED = False
149
- translator = None
 
 
 
150
 
151
  # API URLs
152
  TEXT2IMG_API_URL = "http://211.233.58.201:7896"
153
  VIDEO_API_URL = "http://211.233.58.201:7875"
154
 
155
- # ๋กœ๊น… ์„ค์ •
156
- logging.basicConfig(level=logging.INFO)
157
-
158
  # Image size presets
159
  IMAGE_PRESETS = {
160
  "์ปค์Šคํ…€": {"width": 1024, "height": 1024},
@@ -172,6 +224,120 @@ IMAGE_PRESETS = {
172
  "LinkedIn ๋ฐฐ๋„ˆ": {"width": 1584, "height": 396},
173
  }
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def update_dimensions(preset):
176
  if preset in IMAGE_PRESETS:
177
  return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
@@ -286,9 +452,7 @@ def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
286
  mask = Image.new('L', target_size, 255)
287
  mask_draw = ImageDraw.Draw(mask)
288
 
289
- # ๋งˆ์Šคํฌ ์˜์—ญ ๊ทธ๋ฆฌ๊ธฐ (์˜์–ด ์ •๋ ฌ๊ณผ ๋งค์นญ)
290
- white_gaps_patch = 2
291
-
292
  left_overlap = margin_x + overlap_x if alignment != "์™ผ์ชฝ" else margin_x
293
  right_overlap = margin_x + new_width - overlap_x if alignment != "์˜ค๋ฅธ์ชฝ" else margin_x + new_width
294
  top_overlap = margin_y + overlap_y if alignment != "์œ„" else margin_y
@@ -322,13 +486,17 @@ def preview_outpaint(image, width, height, overlap_percentage, alignment):
322
 
323
  return preview
324
 
325
- @spaces.GPU(duration=24)
326
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
327
  """์ด๋ฏธ์ง€ ์•„์›ƒํŽ˜์ธํŒ… ์‹คํ–‰"""
328
  if image is None:
329
  return None
330
 
331
- if not OUTPAINT_MODEL_LOADED:
 
 
 
 
332
  return Image.new('RGB', (width, height), (200, 200, 200))
333
 
334
  try:
@@ -345,16 +513,16 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
345
  final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
346
 
347
  # GPU์—์„œ ์‹คํ–‰
348
- with torch.autocast(device_type="cuda", dtype=torch.float16):
349
  (
350
  prompt_embeds,
351
  negative_prompt_embeds,
352
  pooled_prompt_embeds,
353
  negative_pooled_prompt_embeds,
354
- ) = pipe.encode_prompt(final_prompt, "cuda", True)
355
 
356
  # ์ƒ์„ฑ ํ”„๋กœ์„ธ์Šค
357
- for generated_image in pipe(
358
  prompt_embeds=prompt_embeds,
359
  negative_prompt_embeds=negative_prompt_embeds,
360
  pooled_prompt_embeds=pooled_prompt_embeds,
@@ -381,23 +549,27 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
381
  # MMAudio ๊ด€๋ จ ํ•จ์ˆ˜๋“ค
382
  def translate_prompt(text):
383
  try:
384
- if translator is None:
385
  return text
386
 
387
  if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
388
  with torch.no_grad():
389
- translation = translator(text)[0]['translation_text']
390
  return translation
391
  return text
392
  except Exception as e:
393
  logging.error(f"Translation error: {e}")
394
  return text
395
 
396
- @spaces.GPU
397
  @torch.inference_mode()
398
  def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
399
  cfg_strength: float, duration: float):
400
- if not MMAUDIO_MODEL_LOADED:
 
 
 
 
401
  return None
402
 
403
  prompt = translate_prompt(prompt)
@@ -410,14 +582,14 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int
410
  clip_frames, sync_frames, duration = load_video(video, duration)
411
  clip_frames = clip_frames.unsqueeze(0)
412
  sync_frames = sync_frames.unsqueeze(0)
413
- seq_cfg.duration = duration
414
- net_mmaudio.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
415
 
416
  audios = generate(clip_frames,
417
  sync_frames, [prompt],
418
  negative_text=[negative_prompt],
419
- feature_utils=feature_utils,
420
- net=net_mmaudio,
421
  fm=fm,
422
  rng=rng,
423
  cfg_strength=cfg_strength)
@@ -427,10 +599,310 @@ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int
427
  make_video(video,
428
  video_save_path,
429
  audio,
430
- sampling_rate=seq_cfg.sampling_rate,
431
- duration_sec=seq_cfg.duration)
432
  return video_save_path
433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
  # CSS
435
  css = """
436
  :root {
@@ -456,7 +928,7 @@ css = """
456
  padding: 20px !important;
457
  margin-bottom: 20px !important;
458
  }
459
- #generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn {
460
  background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
461
  font-size: 1.1rem !important;
462
  padding: 12px 24px !important;
@@ -473,6 +945,10 @@ demo = gr.Blocks(css=css, title="AI ์ด๋ฏธ์ง€ & ๋น„๋””์˜ค & ์˜ค๋””์˜ค ์ƒ์„ฑ๊ธฐ"
473
 
474
  with demo:
475
  gr.Markdown("# ๐ŸŽจ Ginigen ์ŠคํŠœ๋””์˜ค")
 
 
 
 
476
 
477
  with gr.Tabs() as tabs:
478
  # ์ฒซ ๋ฒˆ์งธ ํƒญ: ํ…์ŠคํŠธ to ์ด๋ฏธ์ง€
@@ -618,7 +1094,7 @@ with demo:
618
  gr.Markdown("### ๐ŸŽต ์˜ค๋””์˜ค ์ƒ์„ฑ ์„ค์ •")
619
 
620
  audio_prompt = gr.Textbox(
621
- label="ํ”„๋กฌํ”„ํŠธ (ํ•œ๊ธ€ ์ง€์›)" if MMAUDIO_MODEL_LOADED and translator else "ํ”„๋กฌํ”„ํŠธ",
622
  placeholder="์ƒ์„ฑํ•˜๊ณ  ์‹ถ์€ ์˜ค๋””์˜ค๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”... (์˜ˆ: ํ‰ํ™”๋กœ์šด ํ”ผ์•„๋…ธ ์Œ์•…)",
623
  lines=3
624
  )
@@ -649,9 +1125,204 @@ with demo:
649
  label="์˜ค๋””์˜ค๊ฐ€ ์ถ”๊ฐ€๋œ ๋น„๋””์˜ค",
650
  interactive=False
651
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
652
 
653
- if not MMAUDIO_MODEL_LOADED:
654
- gr.Markdown("โš ๏ธ MMAudio ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ด ๊ธฐ๋Šฅ์€ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ์ฒซ ๋ฒˆ์งธ ํƒญ
657
  size_preset.change(update_dimensions, [size_preset], [width, height])
@@ -689,5 +1360,47 @@ with demo:
689
  [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
690
  [output_video_with_audio]
691
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
692
 
693
- demo.launch()
 
 
 
 
 
 
 
 
 
1
+ # Spaces GPU - ๋ฐ˜๋“œ์‹œ ์ฒซ ๋ฒˆ์งธ๋กœ importํ•ด์•ผ ํ•จ!
2
+ import os
3
+ IS_SPACES = os.environ.get("SPACE_ID") is not None
4
+
5
+ if IS_SPACES:
6
+ import spaces
7
+ else:
8
+ # GPU ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ๊ฐ€ ์—†์„ ๋•Œ๋ฅผ ์œ„ํ•œ ๋”๋ฏธ ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ
9
+ class spaces:
10
+ @staticmethod
11
+ def GPU(duration=None):
12
+ def decorator(func):
13
+ return func
14
+ return decorator
15
+
16
+ # ์ด์ œ ๋‹ค๋ฅธ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋“ค์„ import
17
  import gradio as gr
18
  import numpy as np
19
  from PIL import Image, ImageDraw
20
  from gradio_client import Client, handle_file
21
  import random
22
  import tempfile
 
23
  import logging
24
  import torch
25
  from diffusers import AutoencoderKL, TCDScheduler
 
31
  from scipy.io import wavfile
32
  from transformers import pipeline
33
 
34
+ # ๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ ๊ด€๋ จ import
35
+ # ๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ ๊ด€๋ จ import
36
+ from transformers import AutoModelForImageSegmentation
37
+ from torchvision import transforms
38
+
39
+
40
+ # โ”€โ”€ moviepy import โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
41
+ try:
42
+ from moviepy.editor import (
43
+ VideoFileClip,
44
+ concatenate_videoclips,
45
+ ImageSequenceClip,
46
+ concatenate_audioclips,
47
+ AudioFileClip,
48
+ CompositeAudioClip,
49
+ CompositeVideoClip,
50
+ ColorClip
51
+ )
52
+ except ImportError:
53
+ # ๊ฐœ๋ณ„์ ์œผ๋กœ import ์‹œ๋„
54
+ try:
55
+ from moviepy.video.io.VideoFileClip import VideoFileClip
56
+ except ImportError:
57
+ from moviepy import VideoFileClip
58
+
59
+ try:
60
+ from moviepy.video.compositing.concatenate import concatenate_videoclips
61
+ except ImportError:
62
+ from moviepy import concatenate_videoclips
63
+
64
+ try:
65
+ from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
66
+ except ImportError:
67
+ from moviepy.editor import ImageSequenceClip
68
+
69
+ try:
70
+ from moviepy.audio.io.AudioFileClip import AudioFileClip
71
+ except ImportError:
72
+ from moviepy.editor import AudioFileClip
73
+
74
+ try:
75
+ from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip
76
+ except ImportError:
77
+ from moviepy.editor import concatenate_audioclips, CompositeAudioClip
78
+
79
+ try:
80
+ from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
81
+ except ImportError:
82
+ from moviepy.editor import CompositeVideoClip
83
+
84
+ try:
85
+ from moviepy.video.VideoClip import ColorClip
86
+ except ImportError:
87
+ from moviepy.editor import ColorClip
88
+
89
+ # resize ํ•จ์ˆ˜ import ์‹œ๋„
90
+ resize = None
91
+ try:
92
+ from moviepy.video.fx.resize import resize
93
+ except ImportError:
94
+ try:
95
+ from moviepy.video.fx.all import resize
96
+ except ImportError:
97
+ try:
98
+ # editor๋ฅผ ํ†ตํ•œ import ์‹œ๋„
99
+ from moviepy.editor import resize
100
+ except ImportError:
101
+ pass # resize๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ
102
+
103
+ # resize๊ฐ€ ์—†์œผ๋ฉด ๋Œ€์ฒด ํ•จ์ˆ˜ ์ƒ์„ฑ
104
+ if resize is None:
105
+ def resize(clip, newsize=None, height=None, width=None):
106
+ """Fallback resize function when moviepy resize is not available"""
107
+ if hasattr(clip, 'resize'):
108
+ if newsize:
109
+ return clip.resize(newsize)
110
+ elif height:
111
+ return clip.resize(height=height)
112
+ elif width:
113
+ return clip.resize(width=width)
114
+ # ํฌ๊ธฐ ๋ณ€๊ฒฝ์ด ๋ถˆ๊ฐ€๋Šฅํ•˜๋ฉด ์›๋ณธ ๋ฐ˜ํ™˜
115
+ return clip
116
+
117
+ # speedx ํ•จ์ˆ˜ import ์‹œ๋„
118
+ speedx = None
119
+ try:
120
+ from moviepy.video.fx.speedx import speedx
121
+ except ImportError:
122
+ try:
123
+ from moviepy.video.fx.all import speedx
124
+ except ImportError:
125
+ try:
126
+ from moviepy.editor import speedx
127
+ except ImportError:
128
+ pass # speedx๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ
129
+
130
+ # speedx๊ฐ€ ์—†์œผ๋ฉด ๋Œ€์ฒด ํ•จ์ˆ˜ ์ƒ์„ฑ
131
+ if speedx is None:
132
+ def speedx(clip, factor=1.0, final_duration=None):
133
+ """Fallback speedx function"""
134
+ if hasattr(clip, 'fx') and hasattr(clip.fx, 'speedx'):
135
+ return clip.fx.speedx(factor, final_duration)
136
+ elif hasattr(clip, 'fl_time'):
137
+ return clip.fl_time(lambda t: t * factor)
138
+ elif hasattr(clip, 'with_fps') and factor != 1.0:
139
+ # FPS๋ฅผ ์กฐ์ •ํ•˜์—ฌ ์†๋„ ๋ณ€๊ฒฝ ํšจ๊ณผ ๊ตฌํ˜„
140
+ new_fps = clip.fps * factor if hasattr(clip, 'fps') else 24 * factor
141
+ return clip.with_fps(new_fps)
142
+ else:
143
+ # ์ตœํ›„์˜ ์ˆ˜๋‹จ: ํด๋ฆฝ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
144
+ return clip
145
+
146
+ import time
147
+ from concurrent.futures import ThreadPoolExecutor
148
+
149
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
150
+
151
+
152
+
153
+
154
  # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ์„ค์ •์œผ๋กœ torch.load ์ฒดํฌ ์šฐํšŒ (์ž„์‹œ ํ•ด๊ฒฐ์ฑ…)
155
  os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
156
 
157
+ # GPU ์ดˆ๊ธฐํ™”๋ฅผ ์œ„ํ•œ ๊ฐ„๋‹จํ•œ ํ•จ์ˆ˜ (Spaces ํ™˜๊ฒฝ์—์„œ ํ•„์ˆ˜)
158
+ @spaces.GPU(duration=1)
159
+ def gpu_warmup():
160
+ """GPU ์›Œ๋ฐ์—… ํ•จ์ˆ˜ - Spaces ํ™˜๊ฒฝ์—์„œ GPU ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”"""
161
+ if torch.cuda.is_available():
162
+ dummy = torch.zeros(1).cuda()
163
+ del dummy
164
+ return "GPU ready"
 
 
 
165
 
166
+ # MMAudio imports - spaces import ์ดํ›„์— ์™€์•ผ ํ•จ
167
  try:
168
  import mmaudio
169
  except ImportError:
 
177
  from mmaudio.model.sequence_config import SequenceConfig
178
  from mmaudio.model.utils.features_utils import FeaturesUtils
179
 
180
+ # ๋กœ๊น… ์„ค์ •
181
+ logging.basicConfig(level=logging.INFO)
182
+
183
+ # ๊ธฐ์กด ์ฝ”๋“œ์˜ ๋ชจ๋“  ์„ค์ •๊ณผ ์ดˆ๊ธฐํ™” ๋ถ€๋ถ„ ์œ ์ง€
184
+ torch.set_float32_matmul_precision("medium")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ # Device ์„ค์ •์„ ๋” ๋ช…ํ™•ํ•˜๊ฒŒ
187
  if torch.cuda.is_available():
188
  device = torch.device("cuda")
189
+ torch_dtype = torch.float16
 
 
190
  else:
191
  device = torch.device("cpu")
192
+ torch_dtype = torch.float32
193
 
194
+ logging.info(f"Using device: {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
+ # ์ „์—ญ ๋ณ€์ˆ˜๋กœ ๋ชจ๋ธ ์ƒํƒœ ๊ด€๋ฆฌ
197
+ MODELS_LOADED = False
198
+ BIREFNET_MODEL = None
199
+ BIREFNET_LITE_MODEL = None
200
+ OUTPAINT_PIPE = None
201
+ MMAUDIO_NET = None
202
+ MMAUDIO_FEATURE_UTILS = None
203
+ MMAUDIO_SEQ_CFG = None
204
+ TRANSLATOR = None
205
 
206
  # API URLs
207
  TEXT2IMG_API_URL = "http://211.233.58.201:7896"
208
  VIDEO_API_URL = "http://211.233.58.201:7875"
209
 
 
 
 
210
  # Image size presets
211
  IMAGE_PRESETS = {
212
  "์ปค์Šคํ…€": {"width": 1024, "height": 1024},
 
224
  "LinkedIn ๋ฐฐ๋„ˆ": {"width": 1584, "height": 396},
225
  }
226
 
227
+ # Transform for BiRefNet
228
+ transform_image = transforms.Compose([
229
+ transforms.Resize((768, 768)),
230
+ transforms.ToTensor(),
231
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
232
+ ])
233
+
234
+ @spaces.GPU(duration=60)
235
+ def load_models():
236
+ """๋ชจ๋“  ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜๋Š” ํ•จ์ˆ˜"""
237
+ global MODELS_LOADED, BIREFNET_MODEL, BIREFNET_LITE_MODEL, OUTPAINT_PIPE
238
+ global MMAUDIO_NET, MMAUDIO_FEATURE_UTILS, MMAUDIO_SEQ_CFG, TRANSLATOR
239
+
240
+ if MODELS_LOADED:
241
+ return True
242
+
243
+ try:
244
+ # BiRefNet ๋ชจ๋ธ ๋กœ๋“œ
245
+ logging.info("Loading BiRefNet models...")
246
+ BIREFNET_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True)
247
+ BIREFNET_MODEL.to(device)
248
+ BIREFNET_LITE_MODEL = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True)
249
+ BIREFNET_LITE_MODEL.to(device)
250
+
251
+ # ControlNet ๋ฐ Outpainting ๋ชจ๋ธ ๋กœ๋“œ
252
+ logging.info("Loading ControlNet models...")
253
+ from controlnet_union import ControlNetModel_Union
254
+ from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
255
+
256
+ config_file = hf_hub_download(
257
+ "xinsir/controlnet-union-sdxl-1.0",
258
+ filename="config_promax.json",
259
+ )
260
+
261
+ config = ControlNetModel_Union.load_config(config_file)
262
+ controlnet_model = ControlNetModel_Union.from_config(config)
263
+
264
+ model_file = hf_hub_download(
265
+ "xinsir/controlnet-union-sdxl-1.0",
266
+ filename="diffusion_pytorch_model_promax.safetensors",
267
+ )
268
+ state_dict = load_state_dict(model_file)
269
+ loaded_keys = list(state_dict.keys())
270
+
271
+ result = ControlNetModel_Union._load_pretrained_model(
272
+ controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0", loaded_keys
273
+ )
274
+
275
+ model = result[0]
276
+ model = model.to(device=device, dtype=torch_dtype)
277
+
278
+ # VAE ๋กœ๋“œ
279
+ vae = AutoencoderKL.from_pretrained(
280
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
281
+ ).to(device)
282
+
283
+ # ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
284
+ OUTPAINT_PIPE = StableDiffusionXLFillPipeline.from_pretrained(
285
+ "SG161222/RealVisXL_V5.0_Lightning",
286
+ torch_dtype=torch_dtype,
287
+ vae=vae,
288
+ controlnet=model,
289
+ variant="fp16" if device.type == "cuda" else None,
290
+ ).to(device)
291
+
292
+ OUTPAINT_PIPE.scheduler = TCDScheduler.from_config(OUTPAINT_PIPE.scheduler.config)
293
+
294
+ # MMAudio ๋ชจ๋ธ ๋กœ๋“œ
295
+ logging.info("Loading MMAudio models...")
296
+ model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
297
+ model_mmaudio.download_if_needed()
298
+ setup_eval_logging()
299
+
300
+ # ๋ฒˆ์—ญ๊ธฐ ์„ค์ •
301
+ try:
302
+ TRANSLATOR = pipeline("translation",
303
+ model="Helsinki-NLP/opus-mt-ko-en",
304
+ device="cpu",
305
+ use_fast=True,
306
+ trust_remote_code=False)
307
+ except Exception as e:
308
+ logging.warning(f"Failed to load translation model: {e}")
309
+ TRANSLATOR = None
310
+
311
+ # MMAudio ๋ชจ๋ธ ์ดˆ๊ธฐํ™”
312
+ if torch.cuda.is_available():
313
+ mmaudio_dtype = torch.bfloat16
314
+ else:
315
+ mmaudio_dtype = torch.float32
316
+
317
+ with torch.cuda.device(device):
318
+ MMAUDIO_SEQ_CFG = model_mmaudio.seq_cfg
319
+ MMAUDIO_NET = get_my_mmaudio(model_mmaudio.model_name).to(device, mmaudio_dtype).eval()
320
+ MMAUDIO_NET.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
321
+ logging.info(f'Loaded weights from {model_mmaudio.model_path}')
322
+
323
+ MMAUDIO_FEATURE_UTILS = FeaturesUtils(
324
+ tod_vae_ckpt=model_mmaudio.vae_path,
325
+ synchformer_ckpt=model_mmaudio.synchformer_ckpt,
326
+ enable_conditions=True,
327
+ mode=model_mmaudio.mode,
328
+ bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
329
+ need_vae_encoder=False
330
+ ).to(device, mmaudio_dtype).eval()
331
+
332
+ MODELS_LOADED = True
333
+ logging.info("All models loaded successfully!")
334
+ return True
335
+
336
+ except Exception as e:
337
+ logging.error(f"Failed to load models: {str(e)}")
338
+ return False
339
+
340
+ # ๊ธฐ์กด ํ•จ์ˆ˜๋“ค ๋ชจ๋‘ ์œ ์ง€
341
  def update_dimensions(preset):
342
  if preset in IMAGE_PRESETS:
343
  return IMAGE_PRESETS[preset]["width"], IMAGE_PRESETS[preset]["height"]
 
452
  mask = Image.new('L', target_size, 255)
453
  mask_draw = ImageDraw.Draw(mask)
454
 
455
+ # ๋งˆ์Šคํฌ ์˜์—ญ ๊ทธ๋ฆฌ๊ธฐ
 
 
456
  left_overlap = margin_x + overlap_x if alignment != "์™ผ์ชฝ" else margin_x
457
  right_overlap = margin_x + new_width - overlap_x if alignment != "์˜ค๋ฅธ์ชฝ" else margin_x + new_width
458
  top_overlap = margin_y + overlap_y if alignment != "์œ„" else margin_y
 
486
 
487
  return preview
488
 
489
+ @spaces.GPU(duration=120)
490
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
491
  """์ด๋ฏธ์ง€ ์•„์›ƒํŽ˜์ธํŒ… ์‹คํ–‰"""
492
  if image is None:
493
  return None
494
 
495
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
496
+ if not MODELS_LOADED:
497
+ load_models()
498
+
499
+ if OUTPAINT_PIPE is None:
500
  return Image.new('RGB', (width, height), (200, 200, 200))
501
 
502
  try:
 
513
  final_prompt = f"{prompt}, high quality, 4k" if prompt else "high quality, 4k"
514
 
515
  # GPU์—์„œ ์‹คํ–‰
516
+ with torch.autocast(device_type=device.type, dtype=torch_dtype):
517
  (
518
  prompt_embeds,
519
  negative_prompt_embeds,
520
  pooled_prompt_embeds,
521
  negative_pooled_prompt_embeds,
522
+ ) = OUTPAINT_PIPE.encode_prompt(final_prompt, str(device), True)
523
 
524
  # ์ƒ์„ฑ ํ”„๋กœ์„ธ์Šค
525
+ for generated_image in OUTPAINT_PIPE(
526
  prompt_embeds=prompt_embeds,
527
  negative_prompt_embeds=negative_prompt_embeds,
528
  pooled_prompt_embeds=pooled_prompt_embeds,
 
549
  # MMAudio ๊ด€๋ จ ํ•จ์ˆ˜๋“ค
550
  def translate_prompt(text):
551
  try:
552
+ if TRANSLATOR is None:
553
  return text
554
 
555
  if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
556
  with torch.no_grad():
557
+ translation = TRANSLATOR(text)[0]['translation_text']
558
  return translation
559
  return text
560
  except Exception as e:
561
  logging.error(f"Translation error: {e}")
562
  return text
563
 
564
+ @spaces.GPU(duration=120)
565
  @torch.inference_mode()
566
  def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
567
  cfg_strength: float, duration: float):
568
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
569
+ if not MODELS_LOADED:
570
+ load_models()
571
+
572
+ if MMAUDIO_NET is None:
573
  return None
574
 
575
  prompt = translate_prompt(prompt)
 
582
  clip_frames, sync_frames, duration = load_video(video, duration)
583
  clip_frames = clip_frames.unsqueeze(0)
584
  sync_frames = sync_frames.unsqueeze(0)
585
+ MMAUDIO_SEQ_CFG.duration = duration
586
+ MMAUDIO_NET.update_seq_lengths(MMAUDIO_SEQ_CFG.latent_seq_len, MMAUDIO_SEQ_CFG.clip_seq_len, MMAUDIO_SEQ_CFG.sync_seq_len)
587
 
588
  audios = generate(clip_frames,
589
  sync_frames, [prompt],
590
  negative_text=[negative_prompt],
591
+ feature_utils=MMAUDIO_FEATURE_UTILS,
592
+ net=MMAUDIO_NET,
593
  fm=fm,
594
  rng=rng,
595
  cfg_strength=cfg_strength)
 
599
  make_video(video,
600
  video_save_path,
601
  audio,
602
+ sampling_rate=MMAUDIO_SEQ_CFG.sampling_rate,
603
+ duration_sec=MMAUDIO_SEQ_CFG.duration)
604
  return video_save_path
605
 
606
+ # ๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ ๊ด€๋ จ ํ•จ์ˆ˜๋“ค
607
+ def process_bg_image(image, bg, fast_mode=False):
608
+ """๋‹จ์ผ ์ด๋ฏธ์ง€ ๋ฐฐ๊ฒฝ ์ฒ˜๋ฆฌ"""
609
+ if BIREFNET_MODEL is None or BIREFNET_LITE_MODEL is None:
610
+ return image
611
+
612
+ image_size = image.size
613
+ input_images = transform_image(image).unsqueeze(0).to(device)
614
+ model = BIREFNET_LITE_MODEL if fast_mode else BIREFNET_MODEL
615
+
616
+ with torch.no_grad():
617
+ preds = model(input_images)[-1].sigmoid().cpu()
618
+ pred = preds[0].squeeze()
619
+ pred_pil = transforms.ToPILImage()(pred)
620
+ mask = pred_pil.resize(image_size)
621
+
622
+ if isinstance(bg, str) and bg.startswith("#"):
623
+ color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5))
624
+ background = Image.new("RGBA", image_size, color_rgb + (255,))
625
+ elif isinstance(bg, Image.Image):
626
+ background = bg.convert("RGBA").resize(image_size)
627
+ else:
628
+ background = Image.open(bg).convert("RGBA").resize(image_size)
629
+
630
+ image = Image.composite(image, background, mask)
631
+ return image
632
+
633
+ def process_video_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color):
634
+ """๋น„๋””์˜ค ํ”„๋ ˆ์ž„ ์ฒ˜๋ฆฌ"""
635
+ try:
636
+ pil_image = Image.fromarray(frame)
637
+ if bg_type == "์ƒ‰์ƒ":
638
+ processed_image = process_bg_image(pil_image, color, fast_mode)
639
+ elif bg_type == "์ด๋ฏธ์ง€":
640
+ processed_image = process_bg_image(pil_image, bg, fast_mode)
641
+ elif bg_type == "๋น„๋””์˜ค":
642
+ background_frame = background_frames[bg_frame_index]
643
+ bg_frame_index += 1
644
+ background_image = Image.fromarray(background_frame)
645
+ processed_image = process_bg_image(pil_image, background_image, fast_mode)
646
+ else:
647
+ processed_image = pil_image
648
+ return np.array(processed_image), bg_frame_index
649
+ except Exception as e:
650
+ print(f"Error processing frame: {e}")
651
+ return frame, bg_frame_index
652
+
653
+ @spaces.GPU(duration=300)
654
+ def process_video_bg(vid, bg_type="์ƒ‰์ƒ", bg_image=None, bg_video=None, color="#00FF00",
655
+ fps=0, video_handling="slow_down", fast_mode=True, max_workers=10):
656
+ """๋น„๋””์˜ค ๋ฐฐ๊ฒฝ ์ฒ˜๋ฆฌ ๋ฉ”์ธ ํ•จ์ˆ˜"""
657
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ™•์ธ
658
+ if not MODELS_LOADED:
659
+ load_models()
660
+
661
+ if BIREFNET_MODEL is None:
662
+ yield gr.update(visible=False), gr.update(visible=True), "BiRefNet ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."
663
+ yield None, None, "BiRefNet ๋ชจ๋ธ์„ ๋กœ๋“œํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."
664
+ return
665
+
666
+ try:
667
+ start_time = time.time()
668
+ video = VideoFileClip(vid)
669
+ if fps == 0:
670
+ fps = video.fps
671
+
672
+ audio = video.audio
673
+ frames = list(video.iter_frames(fps=fps))
674
+
675
+ processed_frames = []
676
+ yield gr.update(visible=True), gr.update(visible=False), f"์ฒ˜๋ฆฌ ์‹œ์ž‘... ๊ฒฝ๊ณผ ์‹œ๊ฐ„: 0์ดˆ"
677
+
678
+ if bg_type == "๋น„๋””์˜ค":
679
+ background_video = VideoFileClip(bg_video)
680
+ if background_video.duration < video.duration:
681
+ if video_handling == "slow_down":
682
+ # vfx.speedx ๋Œ€์‹  speedx ํ•จ์ˆ˜ ์ง์ ‘ ์‚ฌ์šฉ
683
+ if speedx is not None:
684
+ background_video = speedx(background_video, factor=video.duration / background_video.duration)
685
+ else:
686
+ # speedx๊ฐ€ ์—†์œผ๋ฉด ๋ฐ˜๋ณต์œผ๋กœ ๋Œ€์ฒด
687
+ background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
688
+ else: # video_handling == "loop"
689
+ background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1))
690
+ background_frames = list(background_video.iter_frames(fps=fps))
691
+ else:
692
+ background_frames = None
693
+
694
+
695
+
696
+ bg_frame_index = 0
697
+
698
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
699
+ futures = [executor.submit(process_video_frame, frames[i], bg_type, bg_image, fast_mode,
700
+ bg_frame_index + i, background_frames, color) for i in range(len(frames))]
701
+ for i, future in enumerate(futures):
702
+ result, _ = future.result()
703
+ processed_frames.append(result)
704
+ elapsed_time = time.time() - start_time
705
+ yield result, None, f"ํ”„๋ ˆ์ž„ {i+1}/{len(frames)} ์ฒ˜๋ฆฌ ์ค‘... ๊ฒฝ๊ณผ ์‹œ๊ฐ„: {elapsed_time:.2f}์ดˆ"
706
+
707
+ processed_video = ImageSequenceClip(processed_frames, fps=fps)
708
+ processed_video = processed_video.with_audio(audio)
709
+
710
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
711
+ temp_filepath = temp_file.name
712
+ processed_video.write_videofile(temp_filepath, codec="libx264")
713
+
714
+ elapsed_time = time.time() - start_time
715
+ yield gr.update(visible=False), gr.update(visible=True), f"์ฒ˜๋ฆฌ ์™„๋ฃŒ! ๊ฒฝ๊ณผ ์‹œ๊ฐ„: {elapsed_time:.2f}์ดˆ"
716
+ yield processed_frames[-1], temp_filepath, f"์ฒ˜๋ฆฌ ์™„๋ฃŒ! ๊ฒฝ๊ณผ ์‹œ๊ฐ„: {elapsed_time:.2f}์ดˆ"
717
+
718
+ except Exception as e:
719
+ print(f"Error: {e}")
720
+ elapsed_time = time.time() - start_time
721
+ yield gr.update(visible=False), gr.update(visible=True), f"๋น„๋””์˜ค ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}. ๊ฒฝ๊ณผ ์‹œ๊ฐ„: {elapsed_time:.2f}์ดˆ"
722
+ yield None, None, f"๋น„๋””์˜ค ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}. ๊ฒฝ๊ณผ ์‹œ๊ฐ„: {elapsed_time:.2f}์ดˆ"
723
+
724
+ @spaces.GPU(duration=180)
725
+ def merge_videos_with_audio(video_files, audio_file, audio_volume, output_fps):
726
+ """์—ฌ๋Ÿฌ ๋น„๋””์˜ค๋ฅผ ๋ณ‘ํ•ฉํ•˜๊ณ  ์˜ค๋””์˜ค๋ฅผ ์ถ”๊ฐ€ํ•˜๋Š” ํ•จ์ˆ˜"""
727
+ if not video_files:
728
+ return None, "๋น„๋””์˜ค ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
729
+
730
+ if isinstance(video_files, list) and len(video_files) > 10:
731
+ return None, "์ตœ๋Œ€ 10๊ฐœ์˜ ๋น„๋””์˜ค๋งŒ ์—…๋กœ๋“œ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค."
732
+
733
+ try:
734
+ # ์ƒํƒœ ์—…๋ฐ์ดํŠธ
735
+ status = "๋น„๋””์˜ค ํŒŒ์ผ ์ •๋ ฌ ์ค‘..."
736
+
737
+ # ํŒŒ์ผ ๊ฒฝ๋กœ์™€ ํŒŒ์ผ๋ช…์„ ํŠœํ”Œ๋กœ ์ €์žฅํ•˜๊ณ  ํŒŒ์ผ๋ช…์œผ๋กœ ์ •๋ ฌ
738
+ video_paths = []
739
+ if isinstance(video_files, list):
740
+ for video_file in video_files:
741
+ if video_file is not None:
742
+ video_paths.append(video_file)
743
+ else:
744
+ video_paths.append(video_files)
745
+
746
+ # ํŒŒ์ผ๋ช…์œผ๋กœ ์ •๋ ฌ (๊ฒฝ๋กœ์—์„œ ํŒŒ์ผ๋ช…๋งŒ ์ถ”์ถœํ•˜์—ฌ ์ •๋ ฌ)
747
+ video_paths.sort(key=lambda x: os.path.basename(x))
748
+
749
+ status = f"{len(video_paths)}๊ฐœ์˜ ๋น„๋””์˜ค ๋กœ๋“œ ์ค‘..."
750
+
751
+ # ๋น„๋””์˜ค ํด๋ฆฝ ๋กœ๋“œ
752
+ video_clips = []
753
+ clip_sizes = []
754
+
755
+ for i, video_path in enumerate(video_paths):
756
+ status = f"๋น„๋””์˜ค {i+1}/{len(video_paths)} ๋กœ๋“œ ์ค‘: {os.path.basename(video_path)}"
757
+ clip = VideoFileClip(video_path)
758
+ video_clips.append(clip)
759
+
760
+ # ๊ฐ ํด๋ฆฝ์˜ ํฌ๊ธฐ ์ €์žฅ
761
+ try:
762
+ clip_sizes.append((clip.w, clip.h))
763
+ except:
764
+ clip_sizes.append(clip.size)
765
+
766
+ # ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค์˜ ํฌ๊ธฐ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ํ•จ
767
+ target_width, target_height = clip_sizes[0]
768
+
769
+ # ๋ชจ๋“  ๋น„๋””์˜ค์˜ ํฌ๊ธฐ๊ฐ€ ๊ฐ™์€์ง€ ํ™•์ธ
770
+ all_same_size = all(size == (target_width, target_height) for size in clip_sizes)
771
+
772
+ if not all_same_size:
773
+ logging.warning(f"๋น„๋””์˜ค ํฌ๊ธฐ๊ฐ€ ์„œ๋กœ ๋‹ค๋ฆ…๋‹ˆ๋‹ค. ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค ํฌ๊ธฐ({target_width}x{target_height})๋กœ ์กฐ์ •ํ•ฉ๋‹ˆ๋‹ค.")
774
+
775
+ # ํฌ๊ธฐ๊ฐ€ ๋‹ค๋ฅธ ๋น„๋””์˜ค๋“ค์„ ์กฐ์ •
776
+
777
+ adjusted_clips = []
778
+ for clip, size in zip(video_clips, clip_sizes):
779
+ if size != (target_width, target_height):
780
+ # resize ํ•จ์ˆ˜๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉ, ์—†์œผ๋ฉด ๋Œ€์ฒด ๋ฐฉ๋ฒ• ์‚ฌ์šฉ
781
+ if resize is not None:
782
+ adjusted_clip = resize(clip, newsize=(target_width, target_height))
783
+ else:
784
+ # resize๊ฐ€ ์—†์„ ๋•Œ ๋Œ€์ฒด ๋ฐฉ๋ฒ•
785
+ # clip.resize() ๋ฉ”์„œ๋“œ ์‚ฌ์šฉ ์‹œ๋„
786
+ if hasattr(clip, 'resize'):
787
+ adjusted_clip = clip.resize((target_width, target_height))
788
+ else:
789
+ # ์ตœํ›„์˜ ์ˆ˜๋‹จ: ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
790
+ adjusted_clip = clip
791
+ logging.warning(f"Cannot resize video. Using original size.")
792
+ adjusted_clips.append(adjusted_clip)
793
+ else:
794
+ adjusted_clips.append(clip)
795
+
796
+ video_clips = adjusted_clips
797
+
798
+ # ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค์˜ FPS๋ฅผ ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์‚ฌ์šฉ
799
+ if output_fps == 0:
800
+ output_fps = video_clips[0].fps
801
+
802
+ status = "๋น„๋””์˜ค ๋ณ‘ํ•ฉ ์ค‘..."
803
+
804
+ # ๋น„๋””์˜ค ๋ณ‘ํ•ฉ
805
+ final_video = concatenate_videoclips(video_clips, method="compose")
806
+
807
+ # ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ
808
+ if audio_file:
809
+ status = "์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์ค‘..."
810
+
811
+ try:
812
+ # ์˜ค๋””์˜ค ํŒŒ์ผ ๊ฒฝ๋กœ ํ™•์ธ
813
+ if isinstance(audio_file, str):
814
+ audio_path = audio_file
815
+ else:
816
+ # gr.Audio์—์„œ ๋ฐ˜ํ™˜๋œ ํŠœํ”Œ์ธ ๊ฒฝ์šฐ
817
+ audio_path = audio_file
818
+
819
+ logging.info(f"Processing audio from: {audio_path}")
820
+
821
+ # ์˜ค๋””์˜ค ๋กœ๋“œ
822
+ if audio_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
823
+ # ๋น„๋””์˜ค ํŒŒ์ผ์—์„œ ์˜ค๋””์˜ค ์ถ”์ถœ
824
+ temp_video = VideoFileClip(audio_path)
825
+ audio_clip = temp_video.audio
826
+ temp_video.close()
827
+ else:
828
+ # ์˜ค๋””์˜ค ํŒŒ์ผ ์ง์ ‘ ๋กœ๋“œ
829
+ audio_clip = AudioFileClip(audio_path)
830
+
831
+ if audio_clip is None:
832
+ raise ValueError("์˜ค๋””์˜ค๋ฅผ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
833
+
834
+ # ๋ณผ๋ฅจ ์กฐ์ ˆ
835
+ if audio_volume != 100:
836
+ audio_clip = audio_clip.volumex(audio_volume / 100)
837
+
838
+ # ์˜ค๋””์˜ค๋ฅผ ๋น„๋””์˜ค ๊ธธ์ด์— ๋งž์ถค
839
+ video_duration = final_video.duration
840
+ audio_duration = audio_clip.duration
841
+
842
+ if audio_duration > video_duration:
843
+ # ์˜ค๋””์˜ค๊ฐ€ ๋” ๊ธธ๋ฉด ์ž˜๋ผ๋ƒ„
844
+ audio_clip = audio_clip.subclip(0, video_duration)
845
+ elif audio_duration < video_duration:
846
+ # ์˜ค๋””์˜ค๊ฐ€ ๋” ์งง์œผ๋ฉด ๋ฐ˜๋ณต
847
+ loops_needed = int(video_duration / audio_duration) + 1
848
+ audio_clips_list = [audio_clip] * loops_needed
849
+ looped_audio = concatenate_audioclips(audio_clips_list)
850
+ audio_clip = looped_audio.subclip(0, video_duration)
851
+
852
+ # ๊ธฐ์กด ์˜ค๋””์˜ค ์ œ๊ฑฐํ•˜๊ณ  ์ƒˆ ์˜ค๋””์˜ค๋กœ ๊ต์ฒด
853
+ # (๊ธฐ์กด ์˜ค๋””์˜ค์™€ ํ•ฉ์„ฑํ•˜๋ ค๋ฉด ์•„๋ž˜ ์ฃผ์„ ํ•ด์ œ)
854
+ final_video = final_video.set_audio(audio_clip)
855
+
856
+ # ๊ธฐ์กด ์˜ค๋””์˜ค์™€ ์ƒˆ ์˜ค๋””์˜ค ํ•ฉ์„ฑ์„ ์›ํ•˜๋Š” ๊ฒฝ์šฐ:
857
+ # if final_video.audio:
858
+ # final_audio = CompositeAudioClip([final_video.audio, audio_clip])
859
+ # final_video = final_video.set_audio(final_audio)
860
+ # else:
861
+ # final_video = final_video.set_audio(audio_clip)
862
+
863
+ logging.info("Audio successfully added to video")
864
+
865
+ except Exception as e:
866
+ logging.error(f"์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}")
867
+ # ์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์‹คํŒจํ•ด๋„ ๋น„๋””์˜ค๋Š” ๊ณ„์† ์ฒ˜๋ฆฌ
868
+ status = f"์˜ค๋””์˜ค ์ฒ˜๋ฆฌ ์‹คํŒจ: {str(e)}, ๋น„๋””์˜ค๋งŒ ๋ณ‘ํ•ฉํ•ฉ๋‹ˆ๋‹ค."
869
+
870
+ status = "๋น„๋””์˜ค ์ €์žฅ ์ค‘..."
871
+
872
+ # ์ž„์‹œ ํŒŒ์ผ๋กœ ์ €์žฅ
873
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file:
874
+ temp_filepath = temp_file.name
875
+
876
+ # ์ฝ”๋ฑ ์„ค์ • - ์›๋ณธ ํ’ˆ์งˆ ์œ ์ง€
877
+ final_video.write_videofile(
878
+ temp_filepath,
879
+ fps=output_fps,
880
+ codec="libx264",
881
+ audio_codec="aac",
882
+ preset="medium", # ํ’ˆ์งˆ ์„ค์ •
883
+ bitrate="5000k", # ๋น„ํŠธ๋ ˆ์ดํŠธ ์„ค์ •์œผ๋กœ ํ’ˆ์งˆ ์œ ์ง€
884
+ audio_bitrate="192k"
885
+ )
886
+
887
+ # ๋ฆฌ์†Œ์Šค ์ •๋ฆฌ
888
+ for clip in video_clips:
889
+ clip.close()
890
+ if 'adjusted_clips' in locals():
891
+ for clip in adjusted_clips:
892
+ if clip not in video_clips:
893
+ clip.close()
894
+ if audio_file and 'audio_clip' in locals():
895
+ audio_clip.close()
896
+ final_video.close()
897
+
898
+ return temp_filepath, f"โœ… ์„ฑ๊ณต์ ์œผ๋กœ {len(video_paths)}๊ฐœ์˜ ๋น„๋””์˜ค๋ฅผ ๋ณ‘ํ•ฉํ–ˆ์Šต๋‹ˆ๋‹ค! (ํฌ๊ธฐ: {target_width}x{target_height})"
899
+
900
+ except Exception as e:
901
+ logging.error(f"Video merge error: {str(e)}")
902
+ import traceback
903
+ traceback.print_exc()
904
+ return None, f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
905
+
906
  # CSS
907
  css = """
908
  :root {
 
928
  padding: 20px !important;
929
  margin-bottom: 20px !important;
930
  }
931
+ #generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn, #bg-remove-btn, #merge-btn {
932
  background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
933
  font-size: 1.1rem !important;
934
  padding: 12px 24px !important;
 
945
 
946
  with demo:
947
  gr.Markdown("# ๐ŸŽจ Ginigen ์ŠคํŠœ๋””์˜ค")
948
+ gr.Markdown("์ฒ˜์Œ ์‚ฌ์šฉ ์‹œ ๋ชจ๋ธ ๋กœ๋”ฉ์— ์‹œ๊ฐ„์ด ๊ฑธ๋ฆด ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์ž ์‹œ๋งŒ ๊ธฐ๋‹ค๋ ค์ฃผ์„ธ์š”.")
949
+
950
+ # ๋ชจ๋ธ ๋กœ๋“œ ์ƒํƒœ ํ‘œ์‹œ
951
+ model_status = gr.Textbox(label="๋ชจ๋ธ ์ƒํƒœ", value="๋ชจ๋ธ ๋กœ๋”ฉ ๋Œ€๊ธฐ ์ค‘...", interactive=False)
952
 
953
  with gr.Tabs() as tabs:
954
  # ์ฒซ ๋ฒˆ์งธ ํƒญ: ํ…์ŠคํŠธ to ์ด๋ฏธ์ง€
 
1094
  gr.Markdown("### ๐ŸŽต ์˜ค๋””์˜ค ์ƒ์„ฑ ์„ค์ •")
1095
 
1096
  audio_prompt = gr.Textbox(
1097
+ label="ํ”„๋กฌํ”„ํŠธ (ํ•œ๊ธ€ ์ง€์›)",
1098
  placeholder="์ƒ์„ฑํ•˜๊ณ  ์‹ถ์€ ์˜ค๋””์˜ค๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”... (์˜ˆ: ํ‰ํ™”๋กœ์šด ํ”ผ์•„๋…ธ ์Œ์•…)",
1099
  lines=3
1100
  )
 
1125
  label="์˜ค๋””์˜ค๊ฐ€ ์ถ”๊ฐ€๋œ ๋น„๋””์˜ค",
1126
  interactive=False
1127
  )
1128
+
1129
+ # ๋„ค ๋ฒˆ์งธ ํƒญ: ๋น„๋””์˜ค ํŽธ์ง‘
1130
+ with gr.Tab("๋น„๋””์˜ค ํŽธ์ง‘", elem_classes="tabitem"):
1131
+ with gr.Row(equal_height=True):
1132
+ # ์ž…๋ ฅ ์ปฌ๋Ÿผ
1133
+ with gr.Column(scale=1):
1134
+ with gr.Group(elem_classes="panel-box"):
1135
+ gr.Markdown("### ๐ŸŽฅ ๋น„๋””์˜ค ์—…๋กœ๋“œ (์ตœ๋Œ€ 10๊ฐœ)")
1136
+ gr.Markdown("**ํŒŒ์ผ๋ช…์ด ์ž‘์„์ˆ˜๋ก ์šฐ์„ ์ˆœ์œ„๊ฐ€ ๋†’์Šต๋‹ˆ๋‹ค** (์˜ˆ: 1.mp4, 2.mp4, 3.mp4)")
1137
+
1138
+ video_files = gr.File(
1139
+ label="๋น„๋””์˜ค ํŒŒ์ผ๋“ค",
1140
+ file_count="multiple",
1141
+ file_types=["video"],
1142
+ type="filepath"
1143
+ )
1144
+
1145
+ with gr.Group(elem_classes="panel-box"):
1146
+ gr.Markdown("### ๐ŸŽต ์˜ค๋””์˜ค ์„ค์ • (์„ ํƒ)")
1147
+ gr.Markdown("**์ฃผ์˜**: ์—…๋กœ๋“œํ•œ ์˜ค๋””์˜ค๊ฐ€ ๋น„๋””์˜ค์˜ ๊ธฐ์กด ์˜ค๋””์˜ค๋ฅผ ์™„์ „ํžˆ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.")
1148
+
1149
+ audio_file = gr.Audio(
1150
+ label="์˜ค๋””์˜ค ํŒŒ์ผ (MP3, WAV, M4A ๋“ฑ)",
1151
+ type="filepath",
1152
+ sources=["upload"]
1153
+ )
1154
 
1155
+ audio_volume = gr.Slider(
1156
+ minimum=0,
1157
+ maximum=200,
1158
+ value=100,
1159
+ step=1,
1160
+ label="์˜ค๋””์˜ค ๋ณผ๋ฅจ (%)",
1161
+ info="100% = ์›๋ณธ ๋ณผ๋ฅจ"
1162
+ )
1163
+
1164
+ gr.Markdown("""
1165
+ **์˜ค๋””์˜ค ์˜ต์…˜**:
1166
+ - ์˜ค๋””์˜ค๊ฐ€ ๋น„๋””์˜ค๋ณด๋‹ค ์งง์œผ๋ฉด ์ž๋™์œผ๋กœ ๋ฐ˜๋ณต๋ฉ๋‹ˆ๋‹ค
1167
+ - ์˜ค๋””์˜ค๊ฐ€ ๋น„๋””์˜ค๋ณด๋‹ค ๊ธธ๋ฉด ๋น„๋””์˜ค ๊ธธ์ด์— ๋งž์ถฐ ์ž˜๋ฆฝ๋‹ˆ๋‹ค
1168
+ """)
1169
+
1170
+ with gr.Group(elem_classes="panel-box"):
1171
+ gr.Markdown("### โš™๏ธ ํŽธ์ง‘ ์„ค์ •")
1172
+
1173
+ output_fps = gr.Slider(
1174
+ minimum=0,
1175
+ maximum=60,
1176
+ value=0,
1177
+ step=1,
1178
+ label="์ถœ๋ ฅ FPS (0 = ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค์˜ FPS ์‚ฌ์šฉ)"
1179
+ )
1180
+
1181
+ gr.Markdown("""
1182
+ **ํฌ๊ธฐ ์ฒ˜๋ฆฌ**:
1183
+ - ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค์˜ ํฌ๊ธฐ๊ฐ€ ๊ธฐ์ค€์ด ๋ฉ๋‹ˆ๋‹ค
1184
+ - ๋‹ค๋ฅธ ํฌ๊ธฐ์˜ ๋น„๋””์˜ค๋Š” ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค ํฌ๊ธฐ๋กœ ์กฐ์ •๋ฉ๋‹ˆ๋‹ค
1185
+ - ์ตœ์ƒ์˜ ๊ฒฐ๊ณผ๋ฅผ ์œ„ํ•ด ๊ฐ™์€ ํฌ๊ธฐ์˜ ๋น„๋””์˜ค๋ฅผ ์‚ฌ์šฉํ•˜์„ธ์š”
1186
+ """)
1187
+
1188
+ merge_videos_btn = gr.Button("๐ŸŽฌ ๋น„๋””์˜ค ๋ณ‘ํ•ฉ", variant="primary", elem_id="merge-btn")
1189
+
1190
+ # ์ถœ๋ ฅ ์ปฌ๋Ÿผ
1191
+ with gr.Column(scale=1):
1192
+ with gr.Group(elem_classes="panel-box"):
1193
+ gr.Markdown("### ๐ŸŽฌ ๋ณ‘ํ•ฉ ๊ฒฐ๊ณผ")
1194
+
1195
+ merge_status = gr.Textbox(label="์ฒ˜๋ฆฌ ์ƒํƒœ", interactive=False)
1196
+ merged_video = gr.Video(label="๋ณ‘ํ•ฉ๋œ ๋น„๋””์˜ค")
1197
+
1198
+ gr.Markdown("""
1199
+ ### โ„น๏ธ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
1200
+ 1. ์—ฌ๋Ÿฌ ๋น„๋””์˜ค ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜์„ธ์š” (์ตœ๋Œ€ 10๊ฐœ)
1201
+ 2. ํŒŒ์ผ๋ช…์ด ์ž‘์€ ์ˆœ์„œ๋Œ€๋กœ ์ž๋™ ์ •๋ ฌ๋ฉ๋‹ˆ๋‹ค
1202
+ 3. (์„ ํƒ) ์˜ค๋””์˜ค ํŒŒ์ผ์„ ์ถ”๊ฐ€ํ•˜๊ณ  ๋ณผ๋ฅจ์„ ์กฐ์ ˆํ•˜์„ธ์š”
1203
+ 4. '๋น„๋””์˜ค ๋ณ‘ํ•ฉ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜์„ธ์š”
1204
+
1205
+ **ํŠน์ง•**:
1206
+ - โœ… ์ฒซ ๋ฒˆ์งธ ๋น„๋””์˜ค์˜ ํฌ๊ธฐ๋ฅผ ๊ธฐ์ค€์œผ๋กœ ํ†ตํ•ฉ
1207
+ - โœ… ์—…๋กœ๋“œํ•œ ์˜ค๋””์˜ค๊ฐ€ ์ „์ฒด ๋น„๋””์˜ค์— ์ ์šฉ๋ฉ๋‹ˆ๋‹ค
1208
+ - โœ… ๋†’์€ ๋น„ํŠธ๋ ˆ์ดํŠธ๋กœ ํ’ˆ์งˆ ์œ ์ง€
1209
+
1210
+ **ํŒ**:
1211
+ - ํŒŒ์ผ๋ช…์„ 01.mp4, 02.mp4, 03.mp4 ํ˜•์‹์œผ๋กœ ์ง€์ •ํ•˜๋ฉด ์ˆœ์„œ ๊ด€๋ฆฌ๊ฐ€ ์‰ฝ์Šต๋‹ˆ๋‹ค
1212
+ - ์˜ค๋””์˜ค๋ฅผ ์ถ”๊ฐ€ํ•˜๋ฉด ๊ธฐ์กด ๋น„๋””์˜ค์˜ ์˜ค๋””์˜ค๋Š” ๋Œ€์ฒด๋ฉ๋‹ˆ๋‹ค
1213
+ """)
1214
+
1215
+ # ๋‹ค์„ฏ ๋ฒˆ์งธ ํƒญ: ๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ/ํ•ฉ์„ฑ
1216
+ with gr.Tab("๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ/ํ•ฉ์„ฑ", elem_classes="tabitem"):
1217
+ with gr.Row(equal_height=True):
1218
+ # ์ž…๋ ฅ ์ปฌ๋Ÿผ
1219
+ with gr.Column(scale=1):
1220
+ with gr.Group(elem_classes="panel-box"):
1221
+ gr.Markdown("### ๐ŸŽฅ ๋น„๋””์˜ค ์—…๋กœ๋“œ")
1222
+
1223
+ bg_video_input = gr.Video(
1224
+ label="์ž…๋ ฅ ๋น„๋””์˜ค",
1225
+ interactive=True
1226
+ )
1227
+
1228
+ with gr.Group(elem_classes="panel-box"):
1229
+ gr.Markdown("### ๐ŸŽจ ๋ฐฐ๊ฒฝ ์„ค์ •")
1230
+
1231
+ bg_type = gr.Radio(
1232
+ ["์ƒ‰์ƒ", "์ด๋ฏธ์ง€", "๋น„๋””์˜ค"],
1233
+ label="๋ฐฐ๊ฒฝ ์œ ํ˜•",
1234
+ value="์ƒ‰์ƒ",
1235
+ interactive=True
1236
+ )
1237
+
1238
+ color_picker = gr.ColorPicker(
1239
+ label="๋ฐฐ๊ฒฝ ์ƒ‰์ƒ",
1240
+ value="#00FF00",
1241
+ visible=True,
1242
+ interactive=True
1243
+ )
1244
+
1245
+ bg_image_input = gr.Image(
1246
+ label="๋ฐฐ๊ฒฝ ์ด๋ฏธ์ง€",
1247
+ type="filepath",
1248
+ visible=False,
1249
+ interactive=True
1250
+ )
1251
+
1252
+ bg_video_bg = gr.Video(
1253
+ label="๋ฐฐ๊ฒฝ ๋น„๋””์˜ค",
1254
+ visible=False,
1255
+ interactive=True
1256
+ )
1257
+
1258
+ with gr.Column(visible=False) as video_handling_options:
1259
+ video_handling_radio = gr.Radio(
1260
+ ["slow_down", "loop"],
1261
+ label="๋น„๋””์˜ค ์ฒ˜๋ฆฌ ๋ฐฉ์‹",
1262
+ value="slow_down",
1263
+ interactive=True,
1264
+ info="slow_down: ๋ฐฐ๊ฒฝ ๋น„๋””์˜ค๋ฅผ ๋А๋ฆฌ๊ฒŒ ์žฌ์ƒ, loop: ๋ฐฐ๊ฒฝ ๋น„๋””์˜ค๋ฅผ ๋ฐ˜๋ณต"
1265
+ )
1266
+
1267
+ with gr.Group(elem_classes="panel-box"):
1268
+ gr.Markdown("### โš™๏ธ ์ฒ˜๋ฆฌ ์„ค์ •")
1269
+
1270
+ fps_slider = gr.Slider(
1271
+ minimum=0,
1272
+ maximum=60,
1273
+ step=1,
1274
+ value=0,
1275
+ label="์ถœ๋ ฅ FPS (0 = ์›๋ณธ FPS ์œ ์ง€)",
1276
+ interactive=True
1277
+ )
1278
+
1279
+ fast_mode_checkbox = gr.Checkbox(
1280
+ label="๋น ๋ฅธ ๋ชจ๋“œ (BiRefNet_lite ์‚ฌ์šฉ)",
1281
+ value=True,
1282
+ interactive=True
1283
+ )
1284
+
1285
+ max_workers_slider = gr.Slider(
1286
+ minimum=1,
1287
+ maximum=32,
1288
+ step=1,
1289
+ value=10,
1290
+ label="์ตœ๋Œ€ ์›Œ์ปค ์ˆ˜",
1291
+ info="๋ณ‘๋ ฌ๋กœ ์ฒ˜๋ฆฌํ•  ํ”„๋ ˆ์ž„ ์ˆ˜",
1292
+ interactive=True
1293
+ )
1294
+
1295
+ bg_remove_btn = gr.Button("๐ŸŽฌ ๋ฐฐ๊ฒฝ ๋ณ€๊ฒฝ", variant="primary", elem_id="bg-remove-btn")
1296
+
1297
+ # ์ถœ๋ ฅ ์ปฌ๋Ÿผ
1298
+ with gr.Column(scale=1):
1299
+ with gr.Group(elem_classes="panel-box"):
1300
+ gr.Markdown("### ๐ŸŽฌ ์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ")
1301
+
1302
+ stream_image = gr.Image(label="์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ", visible=False)
1303
+ output_bg_video = gr.Video(label="์ตœ์ข… ๋น„๋””์˜ค")
1304
+ time_textbox = gr.Textbox(label="๊ฒฝ๊ณผ ์‹œ๊ฐ„", interactive=False)
1305
+
1306
+ gr.Markdown("""
1307
+ ### โ„น๏ธ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
1308
+ 1. ๋น„๋””์˜ค๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”
1309
+ 2. ์›ํ•˜๋Š” ๋ฐฐ๊ฒฝ ์œ ํ˜•์„ ์„ ํƒํ•˜์„ธ์š”
1310
+ 3. ์„ค์ •์„ ์กฐ์ •ํ•˜๊ณ  '๋ฐฐ๊ฒฝ ๋ณ€๊ฒฝ' ๋ฒ„ํŠผ์„ ํด๋ฆญํ•˜์„ธ์š”
1311
+
1312
+ **์ฐธ๊ณ **: GPU ์ œํ•œ์œผ๋กœ ํ•œ ๋ฒˆ์— ์•ฝ 200ํ”„๋ ˆ์ž„๊นŒ์ง€ ์ฒ˜๋ฆฌ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.
1313
+ ๊ธด ๋น„๋””์˜ค๋Š” ์ž‘์€ ์กฐ๊ฐ์œผ๋กœ ๋‚˜๋ˆ„์–ด ์ฒ˜๋ฆฌํ•˜์„ธ์š”.
1314
+ """)
1315
+
1316
+ # ๋ชจ๋ธ ๋กœ๋“œ ํ•จ์ˆ˜ ์‹คํ–‰
1317
+ def on_demo_load():
1318
+ try:
1319
+ if IS_SPACES:
1320
+ # Spaces ํ™˜๊ฒฝ์—์„œ GPU ์›Œ๋ฐ์—…
1321
+ gpu_warmup()
1322
+ # ๋ชจ๋ธ ๋กœ๋“œ๋Š” ์ฒซ ๋ฒˆ์งธ GPU ํ•จ์ˆ˜ ํ˜ธ์ถœ ์‹œ ์ž๋™์œผ๋กœ ์ˆ˜ํ–‰๋จ
1323
+ return "๋ชจ๋ธ ๋กœ๋”ฉ ์ค€๋น„ ์™„๋ฃŒ"
1324
+ except Exception as e:
1325
+ return f"์ดˆ๊ธฐํ™” ์˜ค๋ฅ˜: {str(e)}"
1326
 
1327
  # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ์ฒซ ๋ฒˆ์งธ ํƒญ
1328
  size_preset.change(update_dimensions, [size_preset], [width, height])
 
1360
  [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
1361
  [output_video_with_audio]
1362
  )
1363
+
1364
+ # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ๋„ค ๋ฒˆ์งธ ํƒญ (๋น„๋””์˜ค ํŽธ์ง‘)
1365
+ merge_videos_btn.click(
1366
+ merge_videos_with_audio,
1367
+ inputs=[video_files, audio_file, audio_volume, output_fps],
1368
+ outputs=[merged_video, merge_status]
1369
+ )
1370
+
1371
+ # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ - ๋‹ค์„ฏ ๋ฒˆ์งธ ํƒญ (๋น„๋””์˜ค ๋ฐฐ๊ฒฝ์ œ๊ฑฐ/ํ•ฉ์„ฑ)
1372
+ def update_bg_visibility(bg_type):
1373
+ if bg_type == "์ƒ‰์ƒ":
1374
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
1375
+ elif bg_type == "์ด๋ฏธ์ง€":
1376
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
1377
+ elif bg_type == "๋น„๋””์˜ค":
1378
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
1379
+ else:
1380
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
1381
+
1382
+ bg_type.change(
1383
+ update_bg_visibility,
1384
+ inputs=bg_type,
1385
+ outputs=[color_picker, bg_image_input, bg_video_bg, video_handling_options]
1386
+ )
1387
+
1388
+ bg_remove_btn.click(
1389
+ process_video_bg,
1390
+ inputs=[bg_video_input, bg_type, bg_image_input, bg_video_bg, color_picker,
1391
+ fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider],
1392
+ outputs=[stream_image, output_bg_video, time_textbox]
1393
+ )
1394
+
1395
+ # ๋ฐ๋ชจ ๋กœ๋“œ ์‹œ ์‹คํ–‰
1396
+ demo.load(on_demo_load, outputs=model_status)
1397
 
1398
+ if __name__ == "__main__":
1399
+ # Spaces ํ™˜๊ฒฝ์—์„œ ์ถ”๊ฐ€ ์ฒดํฌ
1400
+ if IS_SPACES:
1401
+ try:
1402
+ gpu_warmup()
1403
+ except:
1404
+ pass
1405
+
1406
+ demo.launch()