EchoMimic

Sleeping

App Files Files Community

fantaxy commited on Jul 29, 2024

Commit

e2d0fd3

verified ·

1 Parent(s): d18b5df

Update webgui.py

Browse files

Files changed (1) hide show

webgui.py +6 -9

webgui.py CHANGED Viewed

@@ -21,15 +21,13 @@ from src.models.whisper.audio2feature import load_audio_model
 from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
 from src.utils.util import save_videos_grid, crop_and_pad
 from src.models.face_locator import FaceLocator
-from moviepy.editor import VideoFileClip, AudioFileClip
 from facenet_pytorch import MTCNN
 import argparse
 import gradio as gr
 import huggingface_hub
-from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
-from moviepy.video.fx.all import composite
 huggingface_hub.snapshot_download(
     repo_id='BadToBest/EchoMimic',
@@ -69,7 +67,6 @@ elif ffmpeg_path not in os.getenv('PATH'):
     print("add ffmpeg to path")
     os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
 config_path = "./configs/prompts/animation.yaml"
 config = OmegaConf.load(config_path)
 if config.weight_dtype == "fp16":
@@ -157,9 +154,8 @@ def select_face(det_bboxes, probs):
     sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
     return sorted_bboxes[0]
 def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
     if seed is not None and seed > -1:
         generator = torch.manual_seed(seed)
     else:
@@ -179,7 +175,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
         r_pad = int((re - rb) * facemask_dilation_ratio)
         c_pad = int((ce - cb) * facemask_dilation_ratio)
         face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
         #### face crop
         r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
         c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
@@ -191,7 +187,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
     ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
     face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
     video = pipe(
         ref_image_pil,
         uploaded_audio,
@@ -223,13 +219,14 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
                  .margin(right=8, bottom=8, opacity=0)  # 마진 및 투명도 설정
                  .set_pos(("right", "bottom")))  # 위치 설정
-    final_clip = composite(video_clip.set_audio(audio_clip), watermark)
     # APP.PY와 동일한 경로에 위치시키기
     final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
     final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
     return final_output_path
 with gr.Blocks() as demo:
     gr.Markdown('# Mimic FACE')

 from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
 from src.utils.util import save_videos_grid, crop_and_pad
 from src.models.face_locator import FaceLocator
+from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, vfx
 from facenet_pytorch import MTCNN
 import argparse
 import gradio as gr
 import huggingface_hub
 huggingface_hub.snapshot_download(
     repo_id='BadToBest/EchoMimic',
     print("add ffmpeg to path")
     os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
 config_path = "./configs/prompts/animation.yaml"
 config = OmegaConf.load(config_path)
 if config.weight_dtype == "fp16":
     sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
     return sorted_bboxes[0]
 def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
     if seed is not None and seed > -1:
         generator = torch.manual_seed(seed)
     else:
         r_pad = int((re - rb) * facemask_dilation_ratio)
         c_pad = int((ce - cb) * facemask_dilation_ratio)
         face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
         #### face crop
         r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
         c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
     ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
     face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
     video = pipe(
         ref_image_pil,
         uploaded_audio,
                  .margin(right=8, bottom=8, opacity=0)  # 마진 및 투명도 설정
                  .set_pos(("right", "bottom")))  # 위치 설정
+    final_clip = video_clip.set_audio(audio_clip).fx(vfx.composite, watermark)
     # APP.PY와 동일한 경로에 위치시키기
     final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
     final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
     return final_output_path
 with gr.Blocks() as demo:
     gr.Markdown('# Mimic FACE')