fantaxy commited on
Commit
e2d0fd3
ยท
verified ยท
1 Parent(s): d18b5df

Update webgui.py

Browse files
Files changed (1) hide show
  1. webgui.py +6 -9
webgui.py CHANGED
@@ -21,15 +21,13 @@ from src.models.whisper.audio2feature import load_audio_model
21
  from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
22
  from src.utils.util import save_videos_grid, crop_and_pad
23
  from src.models.face_locator import FaceLocator
24
- from moviepy.editor import VideoFileClip, AudioFileClip
25
  from facenet_pytorch import MTCNN
26
  import argparse
27
 
28
  import gradio as gr
29
 
30
  import huggingface_hub
31
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
32
- from moviepy.video.fx.all import composite
33
 
34
  huggingface_hub.snapshot_download(
35
  repo_id='BadToBest/EchoMimic',
@@ -69,7 +67,6 @@ elif ffmpeg_path not in os.getenv('PATH'):
69
  print("add ffmpeg to path")
70
  os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
71
 
72
-
73
  config_path = "./configs/prompts/animation.yaml"
74
  config = OmegaConf.load(config_path)
75
  if config.weight_dtype == "fp16":
@@ -157,9 +154,8 @@ def select_face(det_bboxes, probs):
157
  sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
158
  return sorted_bboxes[0]
159
 
160
-
161
-
162
  def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
 
163
  if seed is not None and seed > -1:
164
  generator = torch.manual_seed(seed)
165
  else:
@@ -179,7 +175,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
179
  r_pad = int((re - rb) * facemask_dilation_ratio)
180
  c_pad = int((ce - cb) * facemask_dilation_ratio)
181
  face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
182
-
183
  #### face crop
184
  r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
185
  c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
@@ -191,7 +187,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
191
 
192
  ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
193
  face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
194
-
195
  video = pipe(
196
  ref_image_pil,
197
  uploaded_audio,
@@ -223,13 +219,14 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
223
  .margin(right=8, bottom=8, opacity=0) # ๋งˆ์ง„ ๋ฐ ํˆฌ๋ช…๋„ ์„ค์ •
224
  .set_pos(("right", "bottom"))) # ์œ„์น˜ ์„ค์ •
225
 
226
- final_clip = composite(video_clip.set_audio(audio_clip), watermark)
227
 
228
  # APP.PY์™€ ๋™์ผํ•œ ๊ฒฝ๋กœ์— ์œ„์น˜์‹œํ‚ค๊ธฐ
229
  final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
230
  final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
231
 
232
  return final_output_path
 
233
 
234
  with gr.Blocks() as demo:
235
  gr.Markdown('# Mimic FACE')
 
21
  from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
22
  from src.utils.util import save_videos_grid, crop_and_pad
23
  from src.models.face_locator import FaceLocator
24
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, vfx
25
  from facenet_pytorch import MTCNN
26
  import argparse
27
 
28
  import gradio as gr
29
 
30
  import huggingface_hub
 
 
31
 
32
  huggingface_hub.snapshot_download(
33
  repo_id='BadToBest/EchoMimic',
 
67
  print("add ffmpeg to path")
68
  os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
69
 
 
70
  config_path = "./configs/prompts/animation.yaml"
71
  config = OmegaConf.load(config_path)
72
  if config.weight_dtype == "fp16":
 
154
  sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
155
  return sorted_bboxes[0]
156
 
 
 
157
  def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
158
+
159
  if seed is not None and seed > -1:
160
  generator = torch.manual_seed(seed)
161
  else:
 
175
  r_pad = int((re - rb) * facemask_dilation_ratio)
176
  c_pad = int((ce - cb) * facemask_dilation_ratio)
177
  face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
178
+
179
  #### face crop
180
  r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
181
  c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
 
187
 
188
  ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
189
  face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
190
+
191
  video = pipe(
192
  ref_image_pil,
193
  uploaded_audio,
 
219
  .margin(right=8, bottom=8, opacity=0) # ๋งˆ์ง„ ๋ฐ ํˆฌ๋ช…๋„ ์„ค์ •
220
  .set_pos(("right", "bottom"))) # ์œ„์น˜ ์„ค์ •
221
 
222
+ final_clip = video_clip.set_audio(audio_clip).fx(vfx.composite, watermark)
223
 
224
  # APP.PY์™€ ๋™์ผํ•œ ๊ฒฝ๋กœ์— ์œ„์น˜์‹œํ‚ค๊ธฐ
225
  final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
226
  final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
227
 
228
  return final_output_path
229
+
230
 
231
  with gr.Blocks() as demo:
232
  gr.Markdown('# Mimic FACE')