Update app.py
Browse files
app.py
CHANGED
@@ -194,7 +194,7 @@ def generate_text_to_image(prompt, width, height, guidance, inference_steps, see
|
|
194 |
|
195 |
@spaces.GPU(duration=60)
|
196 |
@torch.inference_mode()
|
197 |
-
def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5,
|
198 |
"""๋น๋์ค์ ์ฌ์ด๋๋ฅผ ์ถ๊ฐํ๋ ํจ์"""
|
199 |
if not MMAUDIO_LOADED:
|
200 |
logging.error("MMAudio model not loaded")
|
@@ -205,12 +205,10 @@ def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_step
|
|
205 |
rng.manual_seed(seed)
|
206 |
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
|
207 |
|
208 |
-
# ๋น๋์ค ๋ก๋ -
|
209 |
-
clip_frames, sync_frames, actual_duration = load_video(video_path,
|
210 |
clip_frames = clip_frames.unsqueeze(0)
|
211 |
sync_frames = sync_frames.unsqueeze(0)
|
212 |
-
|
213 |
-
# ์ค์ ๋น๋์ค ๊ธธ์ด๋ก seq_cfg ์
๋ฐ์ดํธ
|
214 |
mmaudio_seq_cfg.duration = actual_duration
|
215 |
mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
|
216 |
|
@@ -225,12 +223,13 @@ def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_step
|
|
225 |
cfg_strength=cfg_strength)
|
226 |
audio = audios.float().cpu()[0]
|
227 |
|
228 |
-
# ๋น๋์ค์ ์ค๋์ค ๊ฒฐํฉ
|
229 |
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
230 |
make_video(video_path,
|
231 |
video_save_path,
|
232 |
audio,
|
233 |
-
sampling_rate=mmaudio_seq_cfg.sampling_rate
|
|
|
234 |
|
235 |
return video_save_path
|
236 |
except Exception as e:
|
@@ -274,6 +273,7 @@ def generate_video_from_image(image, prompt="", length=4.0, sound_generation="
|
|
274 |
sound_prompt = prompt if prompt else "ambient sound"
|
275 |
|
276 |
# ๋น๋์ค์ ์ฌ์ด๋ ์ถ๊ฐ - ๋ชจ๋ ๋งค๊ฐ๋ณ์๋ฅผ ๋ช
์์ ์ผ๋ก ์ ๋ฌ
|
|
|
277 |
video_with_sound = video_to_audio(
|
278 |
video_path=video_path,
|
279 |
prompt=sound_prompt,
|
@@ -281,8 +281,9 @@ def generate_video_from_image(image, prompt="", length=4.0, sound_generation="
|
|
281 |
seed=random.randint(0, 9999999),
|
282 |
num_steps=25,
|
283 |
cfg_strength=4.5,
|
284 |
-
|
285 |
)
|
|
|
286 |
return video_with_sound
|
287 |
|
288 |
return video_path
|
|
|
194 |
|
195 |
@spaces.GPU(duration=60)
|
196 |
@torch.inference_mode()
|
197 |
+
def video_to_audio(video_path, prompt, negative_prompt="music", seed=0, num_steps=25, cfg_strength=4.5, target_duration=8.0):
|
198 |
"""๋น๋์ค์ ์ฌ์ด๋๋ฅผ ์ถ๊ฐํ๋ ํจ์"""
|
199 |
if not MMAUDIO_LOADED:
|
200 |
logging.error("MMAudio model not loaded")
|
|
|
205 |
rng.manual_seed(seed)
|
206 |
fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
|
207 |
|
208 |
+
# ๋น๋์ค ๋ก๋ - target_duration ์ฌ์ฉ
|
209 |
+
clip_frames, sync_frames, actual_duration = load_video(video_path, target_duration)
|
210 |
clip_frames = clip_frames.unsqueeze(0)
|
211 |
sync_frames = sync_frames.unsqueeze(0)
|
|
|
|
|
212 |
mmaudio_seq_cfg.duration = actual_duration
|
213 |
mmaudio_net.update_seq_lengths(mmaudio_seq_cfg.latent_seq_len, mmaudio_seq_cfg.clip_seq_len, mmaudio_seq_cfg.sync_seq_len)
|
214 |
|
|
|
223 |
cfg_strength=cfg_strength)
|
224 |
audio = audios.float().cpu()[0]
|
225 |
|
226 |
+
# ๋น๋์ค์ ์ค๋์ค ๊ฒฐํฉ
|
227 |
video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
|
228 |
make_video(video_path,
|
229 |
video_save_path,
|
230 |
audio,
|
231 |
+
sampling_rate=mmaudio_seq_cfg.sampling_rate,
|
232 |
+
duration_sec=mmaudio_seq_cfg.duration)
|
233 |
|
234 |
return video_save_path
|
235 |
except Exception as e:
|
|
|
273 |
sound_prompt = prompt if prompt else "ambient sound"
|
274 |
|
275 |
# ๋น๋์ค์ ์ฌ์ด๋ ์ถ๊ฐ - ๋ชจ๋ ๋งค๊ฐ๋ณ์๋ฅผ ๋ช
์์ ์ผ๋ก ์ ๋ฌ
|
276 |
+
# ๋น๋์ค์ ์ฌ์ด๋ ์ถ๊ฐ
|
277 |
video_with_sound = video_to_audio(
|
278 |
video_path=video_path,
|
279 |
prompt=sound_prompt,
|
|
|
281 |
seed=random.randint(0, 9999999),
|
282 |
num_steps=25,
|
283 |
cfg_strength=4.5,
|
284 |
+
target_duration=length # duration โ target_duration
|
285 |
)
|
286 |
+
|
287 |
return video_with_sound
|
288 |
|
289 |
return video_path
|