openfree commited on
Commit
b694e4e
·
verified ·
1 Parent(s): 12bb0b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -168
app.py CHANGED
@@ -10,6 +10,14 @@ import torch
10
  from diffusers import AutoencoderKL, TCDScheduler
11
  from diffusers.models.model_loading_utils import load_state_dict
12
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
 
13
 
14
  # Spaces GPU
15
  try:
@@ -23,22 +31,19 @@ except:
23
  return func
24
  return decorator
25
 
26
- # 환경 변수 설정
27
- os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
28
-
29
- # MMAudio 관련 임포트
30
  try:
31
  import mmaudio
32
- from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
33
- setup_eval_logging)
34
- from mmaudio.model.flow_matching import FlowMatching
35
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
36
- from mmaudio.model.sequence_config import SequenceConfig
37
- from mmaudio.model.utils.features_utils import FeaturesUtils
38
- MMAUDIO_AVAILABLE = True
39
  except ImportError:
40
- MMAUDIO_AVAILABLE = False
41
- logging.warning("MMAudio not available. Sound generation will be disabled.")
 
 
 
 
 
 
 
42
 
43
  # ControlNet 모델 로드
44
  try:
@@ -89,52 +94,59 @@ except Exception as e:
89
  logging.error(f"Failed to load outpainting models: {str(e)}")
90
  OUTPAINT_MODEL_LOADED = False
91
 
92
- # MMAudio 모델 설정 및 로드
93
- if MMAUDIO_AVAILABLE:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  try:
95
- # CUDA 설정
96
- if torch.cuda.is_available():
97
- device = torch.device("cuda")
98
- torch.backends.cuda.matmul.allow_tf32 = True
99
- torch.backends.cudnn.allow_tf32 = True
100
- torch.backends.cudnn.benchmark = True
101
- else:
102
- device = torch.device("cpu")
103
-
104
- dtype = torch.bfloat16
105
-
106
- # 모델 설정
107
- model_cfg: ModelConfig = all_model_cfg['large_44k_v2']
108
- model_cfg.download_if_needed()
109
-
110
- setup_eval_logging()
111
-
112
- # 모델 로드
113
- def get_mmaudio_model():
114
- with torch.cuda.device(device):
115
- seq_cfg = model_cfg.seq_cfg
116
- net: MMAudio = get_my_mmaudio(model_cfg.model_name).to(device, dtype).eval()
117
- net.load_weights(torch.load(model_cfg.model_path, map_location=device, weights_only=True))
118
- logging.info(f'Loaded MMAudio weights from {model_cfg.model_path}')
119
-
120
- feature_utils = FeaturesUtils(
121
- tod_vae_ckpt=model_cfg.vae_path,
122
- synchformer_ckpt=model_cfg.synchformer_ckpt,
123
- enable_conditions=True,
124
- mode=model_cfg.mode,
125
- bigvgan_vocoder_ckpt=model_cfg.bigvgan_16k_path,
126
- need_vae_encoder=False
127
- ).to(device, dtype).eval()
128
-
129
- return net, feature_utils, seq_cfg
130
-
131
- mmaudio_net, mmaudio_feature_utils, mmaudio_seq_cfg = get_mmaudio_model()
132
- MMAUDIO_LOADED = True
133
  except Exception as e:
134
- logging.error(f"Failed to load MMAudio models: {str(e)}")
135
- MMAUDIO_LOADED = False
136
- else:
137
- MMAUDIO_LOADED = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  # API URLs
140
  TEXT2IMG_API_URL = "http://211.233.58.201:7896"
@@ -192,52 +204,6 @@ def generate_text_to_image(prompt, width, height, guidance, inference_steps, see
192
  logging.error(f"Image generation error: {str(e)}")
193
  return None, f"오류: {str(e)}"
194
 
195
- @spaces.GPU(duration=60)
196
- @torch.inference_mode()
197
- def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, target_duration=8.0):
198
- """비디오에 사운드를 추가하는 함수"""
199
- if not MMAUDIO_LOADED:
200
- logging.error("MMAudio model not loaded")
201
- return video_path
202
-
203
- try:
204
- rng = torch.Generator(device=device)
205
- rng.manual_seed(seed)
206
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
207
-
208
- # 비디오 로드 - target_duration 사용
209
- clip_frames, sync_frames, actual_duration = load_video(video_path, target_duration)
210
- clip_frames = clip_frames.unsqueeze(0)
211
- sync_frames = sync_frames.unsqueeze(0)
212
- mmaudio_seq_cfg.duration = actual_duration
213
- mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
214
-
215
- # 오디오 생성
216
- audios = generate(clip_frames,
217
- sync_frames, [prompt],
218
- negative_text=[negative_prompt],
219
- feature_utils=mmaudio_feature_utils,
220
- net=mmaudio_net,
221
- fm=fm,
222
- rng=rng,
223
- cfg_strength=cfg_strength)
224
- audio = audios.float().cpu()[0]
225
-
226
- # 비디오와 오디오 결합
227
- video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
228
- make_video(video_path,
229
- video_save_path,
230
- audio,
231
- sampling_rate=mmaudio_seq_cfg.sampling_rate,
232
- duration_sec=mmaudio_seq_cfg.duration)
233
-
234
- return video_save_path
235
- except Exception as e:
236
- logging.error(f"Video to audio error: {str(e)}")
237
- import traceback
238
- traceback.print_exc()
239
- return video_path
240
-
241
  def generate_video_from_image(image, prompt="", length=4.0):
242
  if image is None:
243
  return None
@@ -270,25 +236,6 @@ def generate_video_from_image(image, prompt="", length=4.0):
270
  logging.error(f"Video generation error: {str(e)}")
271
  return None
272
 
273
- def add_sound_to_video(video_path, sound_prompt, sound_negative_prompt="music"):
274
- if not video_path or not MMAUDIO_LOADED:
275
- return video_path
276
-
277
- try:
278
- return video_to_audio(
279
- video_path=video_path,
280
- prompt=sound_prompt,
281
- negative_prompt=sound_negative_prompt,
282
- seed=random.randint(0, 9999999),
283
- num_steps=25,
284
- cfg_strength=4.5,
285
- target_duration=8.0 # 기본값 사용
286
- )
287
- except Exception as e:
288
- logging.error(f"Sound addition error: {str(e)}")
289
- return video_path
290
-
291
-
292
  def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
293
  """이미지와 마스크를 준비하는 함수"""
294
  if image is None:
@@ -339,7 +286,7 @@ def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
339
  mask = Image.new('L', target_size, 255)
340
  mask_draw = ImageDraw.Draw(mask)
341
 
342
- # 마스크 영역 그리기
343
  white_gaps_patch = 2
344
 
345
  left_overlap = margin_x + overlap_x if alignment != "왼쪽" else margin_x
@@ -354,6 +301,27 @@ def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
354
 
355
  return background, mask
356
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  @spaces.GPU(duration=24)
358
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
359
  """이미지 아웃페인팅 실행"""
@@ -410,6 +378,59 @@ def outpaint_image(image, prompt, width, height, overlap_percentage, alignment,
410
  logging.error(f"Outpainting error: {str(e)}")
411
  return background if 'background' in locals() else None
412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  # CSS
414
  css = """
415
  :root {
@@ -435,7 +456,7 @@ css = """
435
  padding: 20px !important;
436
  margin-bottom: 20px !important;
437
  }
438
- #generate-btn, #video-btn, #outpaint-btn {
439
  background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
440
  font-size: 1.1rem !important;
441
  padding: 12px 24px !important;
@@ -448,7 +469,7 @@ css = """
448
  """
449
 
450
  # Gradio Interface
451
- demo = gr.Blocks(css=css, title="AI 이미지 & 비디오 생성기")
452
 
453
  with demo:
454
  gr.Markdown("# 🎨 Ginigen 스튜디오")
@@ -504,27 +525,6 @@ with demo:
504
  info="1초에서 60초까지 선택 가능합니다"
505
  )
506
 
507
- # 사운드 생성 옵션 추가
508
- sound_generation = gr.Radio(
509
- choices=["사운드 없음", "사운드 생성"],
510
- value="사운드 없음",
511
- label="사운드 옵션",
512
- info="비디오에 사운드를 추가할지 선택하세요"
513
- )
514
-
515
- # 사운드 관련 입력 필드 (조건부 표시)
516
- with gr.Column(visible=False) as sound_options:
517
- sound_prompt = gr.Textbox(
518
- label="사운드 프롬프트 (선택)",
519
- placeholder="생성할 사운드를 설명하세요... (비워두면 비디오 프롬프트 사용)",
520
- lines=2
521
- )
522
- sound_negative_prompt = gr.Textbox(
523
- label="사운드 네거티브 프롬프트",
524
- value="music",
525
- lines=1
526
- )
527
-
528
  video_btn = gr.Button("🎬 비디오로 변환", variant="secondary", elem_id="video-btn")
529
 
530
  # 출력 컬럼
@@ -590,6 +590,7 @@ with demo:
590
  label="추론 스텝"
591
  )
592
 
 
593
  outpaint_btn = gr.Button("🎨 아웃페인팅 실행", variant="primary", elem_id="outpaint-btn")
594
 
595
  # 출력 컬럼
@@ -597,7 +598,60 @@ with demo:
597
  with gr.Group(elem_classes="panel-box"):
598
  gr.Markdown("### 🖼️ 결과")
599
 
 
600
  outpaint_result = gr.Image(label="아웃페인팅 결과")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
  # 이벤트 연결 - 첫 번째 탭
603
  size_preset.change(update_dimensions, [size_preset], [width, height])
@@ -608,39 +662,32 @@ with demo:
608
  [output_image, output_seed]
609
  )
610
 
611
- # 사운드 옵션 표시/숨김
612
- def toggle_sound_options(choice):
613
- return gr.update(visible=(choice == "사운드 생성"))
614
-
615
- sound_generation.change(
616
- toggle_sound_options,
617
- [sound_generation],
618
- [sound_options]
619
- )
620
-
621
-
622
  video_btn.click(
623
- generate_video_from_image,
624
- [output_image, video_prompt, video_length], # 원래대로 3개 매개변수만
625
- [output_video]
626
- )
627
-
628
- # 사운드 추가는 별도 버튼으로
629
- sound_btn = gr.Button("🔊 비디오에 사운드 추가", visible=False)
630
- sound_btn.click(
631
- add_sound_to_video,
632
- [output_video, sound_prompt, sound_negative_prompt],
633
  [output_video]
634
  )
635
-
636
 
637
  # 이벤트 연결 - 두 번째 탭
638
  outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height])
639
 
 
 
 
 
 
 
640
  outpaint_btn.click(
641
  outpaint_image,
642
  [input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps],
643
  [outpaint_result]
644
  )
 
 
 
 
 
 
 
645
 
646
  demo.launch()
 
10
  from diffusers import AutoencoderKL, TCDScheduler
11
  from diffusers.models.model_loading_utils import load_state_dict
12
  from huggingface_hub import hf_hub_download
13
+ from pathlib import Path
14
+ import torchaudio
15
+ from einops import rearrange
16
+ from scipy.io import wavfile
17
+ from transformers import pipeline
18
+
19
+ # 환경 변수 설정으로 torch.load 체크 우회 (임시 해결책)
20
+ os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1"
21
 
22
  # Spaces GPU
23
  try:
 
31
  return func
32
  return decorator
33
 
34
+ # MMAudio imports
 
 
 
35
  try:
36
  import mmaudio
 
 
 
 
 
 
 
37
  except ImportError:
38
+ os.system("pip install -e .")
39
+ import mmaudio
40
+
41
+ from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
42
+ setup_eval_logging)
43
+ from mmaudio.model.flow_matching import FlowMatching
44
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
45
+ from mmaudio.model.sequence_config import SequenceConfig
46
+ from mmaudio.model.utils.features_utils import FeaturesUtils
47
 
48
  # ControlNet 모델 로드
49
  try:
 
94
  logging.error(f"Failed to load outpainting models: {str(e)}")
95
  OUTPAINT_MODEL_LOADED = False
96
 
97
+ # MMAudio 모델 설정
98
+ if torch.cuda.is_available():
99
+ device = torch.device("cuda")
100
+ torch.backends.cuda.matmul.allow_tf32 = True
101
+ torch.backends.cudnn.allow_tf32 = True
102
+ torch.backends.cudnn.benchmark = True
103
+ else:
104
+ device = torch.device("cpu")
105
+
106
+ dtype = torch.bfloat16
107
+
108
+ # MMAudio 모델 초기화
109
+ try:
110
+ model_mmaudio: ModelConfig = all_model_cfg['large_44k_v2']
111
+ model_mmaudio.download_if_needed()
112
+ output_dir = Path('./output/gradio')
113
+ setup_eval_logging()
114
+
115
+ # 번역기 설정
116
  try:
117
+ translator = pipeline("translation",
118
+ model="Helsinki-NLP/opus-mt-ko-en",
119
+ device="cpu",
120
+ use_fast=True,
121
+ trust_remote_code=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  except Exception as e:
123
+ logging.warning(f"Failed to load translation model: {e}")
124
+ translator = None
125
+
126
+ def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
127
+ with torch.cuda.device(device):
128
+ seq_cfg = model_mmaudio.seq_cfg
129
+ net: MMAudio = get_my_mmaudio(model_mmaudio.model_name).to(device, dtype).eval()
130
+ net.load_weights(torch.load(model_mmaudio.model_path, map_location=device, weights_only=True))
131
+ logging.info(f'Loaded weights from {model_mmaudio.model_path}')
132
+
133
+ feature_utils = FeaturesUtils(
134
+ tod_vae_ckpt=model_mmaudio.vae_path,
135
+ synchformer_ckpt=model_mmaudio.synchformer_ckpt,
136
+ enable_conditions=True,
137
+ mode=model_mmaudio.mode,
138
+ bigvgan_vocoder_ckpt=model_mmaudio.bigvgan_16k_path,
139
+ need_vae_encoder=False
140
+ ).to(device, dtype).eval()
141
+
142
+ return net, feature_utils, seq_cfg
143
+
144
+ net_mmaudio, feature_utils, seq_cfg = get_mmaudio_model()
145
+ MMAUDIO_MODEL_LOADED = True
146
+ except Exception as e:
147
+ logging.error(f"Failed to load MMAudio models: {str(e)}")
148
+ MMAUDIO_MODEL_LOADED = False
149
+ translator = None
150
 
151
  # API URLs
152
  TEXT2IMG_API_URL = "http://211.233.58.201:7896"
 
204
  logging.error(f"Image generation error: {str(e)}")
205
  return None, f"오류: {str(e)}"
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def generate_video_from_image(image, prompt="", length=4.0):
208
  if image is None:
209
  return None
 
236
  logging.error(f"Video generation error: {str(e)}")
237
  return None
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def prepare_image_and_mask(image, width, height, overlap_percentage, alignment):
240
  """이미지와 마스크를 준비하는 함수"""
241
  if image is None:
 
286
  mask = Image.new('L', target_size, 255)
287
  mask_draw = ImageDraw.Draw(mask)
288
 
289
+ # 마스크 영역 그리기 (영어 정렬과 매칭)
290
  white_gaps_patch = 2
291
 
292
  left_overlap = margin_x + overlap_x if alignment != "왼쪽" else margin_x
 
301
 
302
  return background, mask
303
 
304
+ def preview_outpaint(image, width, height, overlap_percentage, alignment):
305
+ """아웃페인팅 미리보기"""
306
+ background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, alignment)
307
+ if background is None:
308
+ return None
309
+
310
+ # 미리보기 이미지 생성
311
+ preview = background.copy().convert('RGBA')
312
+
313
+ # 반투명 빨간색 오버레이
314
+ red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64))
315
+
316
+ # 마스크 적용
317
+ red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
318
+ red_mask.paste(red_overlay, (0, 0), mask)
319
+
320
+ # 오버레이 합성
321
+ preview = Image.alpha_composite(preview, red_mask)
322
+
323
+ return preview
324
+
325
  @spaces.GPU(duration=24)
326
  def outpaint_image(image, prompt, width, height, overlap_percentage, alignment, num_steps=8):
327
  """이미지 아웃페인팅 실행"""
 
378
  logging.error(f"Outpainting error: {str(e)}")
379
  return background if 'background' in locals() else None
380
 
381
+ # MMAudio 관련 함수들
382
+ def translate_prompt(text):
383
+ try:
384
+ if translator is None:
385
+ return text
386
+
387
+ if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
388
+ with torch.no_grad():
389
+ translation = translator(text)[0]['translation_text']
390
+ return translation
391
+ return text
392
+ except Exception as e:
393
+ logging.error(f"Translation error: {e}")
394
+ return text
395
+
396
+ @spaces.GPU
397
+ @torch.inference_mode()
398
+ def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
399
+ cfg_strength: float, duration: float):
400
+ if not MMAUDIO_MODEL_LOADED:
401
+ return None
402
+
403
+ prompt = translate_prompt(prompt)
404
+ negative_prompt = translate_prompt(negative_prompt)
405
+
406
+ rng = torch.Generator(device=device)
407
+ rng.manual_seed(seed)
408
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
409
+
410
+ clip_frames, sync_frames, duration = load_video(video, duration)
411
+ clip_frames = clip_frames.unsqueeze(0)
412
+ sync_frames = sync_frames.unsqueeze(0)
413
+ seq_cfg.duration = duration
414
+ net_mmaudio.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
415
+
416
+ audios = generate(clip_frames,
417
+ sync_frames, [prompt],
418
+ negative_text=[negative_prompt],
419
+ feature_utils=feature_utils,
420
+ net=net_mmaudio,
421
+ fm=fm,
422
+ rng=rng,
423
+ cfg_strength=cfg_strength)
424
+ audio = audios.float().cpu()[0]
425
+
426
+ video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
427
+ make_video(video,
428
+ video_save_path,
429
+ audio,
430
+ sampling_rate=seq_cfg.sampling_rate,
431
+ duration_sec=seq_cfg.duration)
432
+ return video_save_path
433
+
434
  # CSS
435
  css = """
436
  :root {
 
456
  padding: 20px !important;
457
  margin-bottom: 20px !important;
458
  }
459
+ #generate-btn, #video-btn, #outpaint-btn, #preview-btn, #audio-btn {
460
  background: linear-gradient(135deg, #ff9a9e, #fad0c4) !important;
461
  font-size: 1.1rem !important;
462
  padding: 12px 24px !important;
 
469
  """
470
 
471
  # Gradio Interface
472
+ demo = gr.Blocks(css=css, title="AI 이미지 & 비디오 & 오디오 생성기")
473
 
474
  with demo:
475
  gr.Markdown("# 🎨 Ginigen 스튜디오")
 
525
  info="1초에서 60초까지 선택 가능합니다"
526
  )
527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  video_btn = gr.Button("🎬 비디오로 변환", variant="secondary", elem_id="video-btn")
529
 
530
  # 출력 컬럼
 
590
  label="추론 스텝"
591
  )
592
 
593
+ preview_btn = gr.Button("👁️ 미리보기", elem_id="preview-btn")
594
  outpaint_btn = gr.Button("🎨 아웃페인팅 실행", variant="primary", elem_id="outpaint-btn")
595
 
596
  # 출력 컬럼
 
598
  with gr.Group(elem_classes="panel-box"):
599
  gr.Markdown("### 🖼️ 결과")
600
 
601
+ preview_image = gr.Image(label="미리보기")
602
  outpaint_result = gr.Image(label="아웃페인팅 결과")
603
+
604
+ # 세 번째 탭: 비디오 + 오디오
605
+ with gr.Tab("비디오 + 오디오", elem_classes="tabitem"):
606
+ with gr.Row(equal_height=True):
607
+ # 입력 컬럼
608
+ with gr.Column(scale=1):
609
+ with gr.Group(elem_classes="panel-box"):
610
+ gr.Markdown("### 🎥 비디오 업로드")
611
+
612
+ audio_video_input = gr.Video(
613
+ label="입력 비디오",
614
+ sources=["upload"]
615
+ )
616
+
617
+ with gr.Group(elem_classes="panel-box"):
618
+ gr.Markdown("### 🎵 오디오 생성 설정")
619
+
620
+ audio_prompt = gr.Textbox(
621
+ label="프롬프트 (한글 지원)" if MMAUDIO_MODEL_LOADED and translator else "프롬프트",
622
+ placeholder="생성하고 싶은 오디오를 설명하세요... (예: 평화로운 피아노 음악)",
623
+ lines=3
624
+ )
625
+
626
+ audio_negative_prompt = gr.Textbox(
627
+ label="네거티브 프롬프트",
628
+ value="music",
629
+ placeholder="원하지 않는 요소...",
630
+ lines=2
631
+ )
632
+
633
+ with gr.Row():
634
+ audio_seed = gr.Number(label="시드", value=0)
635
+ audio_steps = gr.Number(label="스텝", value=25)
636
+
637
+ with gr.Row():
638
+ audio_cfg = gr.Number(label="가이던스 스케일", value=4.5)
639
+ audio_duration = gr.Number(label="지속시간 (초)", value=8)
640
+
641
+ audio_btn = gr.Button("🎵 오디오 생성 및 합성", variant="primary", elem_id="audio-btn")
642
+
643
+ # 출력 컬럼
644
+ with gr.Column(scale=1):
645
+ with gr.Group(elem_classes="panel-box"):
646
+ gr.Markdown("### 🎬 생성 결과")
647
+
648
+ output_video_with_audio = gr.Video(
649
+ label="오디오가 추가된 비디오",
650
+ interactive=False
651
+ )
652
+
653
+ if not MMAUDIO_MODEL_LOADED:
654
+ gr.Markdown("⚠️ MMAudio 모델을 로드하지 못했습니다. 이 기능은 사용할 수 없습니다.")
655
 
656
  # 이벤트 연결 - 첫 번째 탭
657
  size_preset.change(update_dimensions, [size_preset], [width, height])
 
662
  [output_image, output_seed]
663
  )
664
 
 
 
 
 
 
 
 
 
 
 
 
665
  video_btn.click(
666
+ lambda img, v_prompt, length: generate_video_from_image(img, v_prompt, length) if img is not None else None,
667
+ [output_image, video_prompt, video_length],
 
 
 
 
 
 
 
 
668
  [output_video]
669
  )
 
670
 
671
  # 이벤트 연결 - 두 번째 탭
672
  outpaint_size_preset.change(update_dimensions, [outpaint_size_preset], [outpaint_width, outpaint_height])
673
 
674
+ preview_btn.click(
675
+ preview_outpaint,
676
+ [input_image, outpaint_width, outpaint_height, overlap_percentage, alignment],
677
+ [preview_image]
678
+ )
679
+
680
  outpaint_btn.click(
681
  outpaint_image,
682
  [input_image, outpaint_prompt, outpaint_width, outpaint_height, overlap_percentage, alignment, outpaint_steps],
683
  [outpaint_result]
684
  )
685
+
686
+ # 이벤트 연결 - 세 번째 탭
687
+ audio_btn.click(
688
+ video_to_audio,
689
+ [audio_video_input, audio_prompt, audio_negative_prompt, audio_seed, audio_steps, audio_cfg, audio_duration],
690
+ [output_video_with_audio]
691
+ )
692
 
693
  demo.launch()