Spaces:
Sleeping
Sleeping
Update webgui.py
Browse files
webgui.py
CHANGED
@@ -21,15 +21,13 @@ from src.models.whisper.audio2feature import load_audio_model
|
|
21 |
from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
|
22 |
from src.utils.util import save_videos_grid, crop_and_pad
|
23 |
from src.models.face_locator import FaceLocator
|
24 |
-
from moviepy.editor import VideoFileClip, AudioFileClip
|
25 |
from facenet_pytorch import MTCNN
|
26 |
import argparse
|
27 |
|
28 |
import gradio as gr
|
29 |
|
30 |
import huggingface_hub
|
31 |
-
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
|
32 |
-
from moviepy.video.fx.all import composite
|
33 |
|
34 |
huggingface_hub.snapshot_download(
|
35 |
repo_id='BadToBest/EchoMimic',
|
@@ -69,7 +67,6 @@ elif ffmpeg_path not in os.getenv('PATH'):
|
|
69 |
print("add ffmpeg to path")
|
70 |
os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
|
71 |
|
72 |
-
|
73 |
config_path = "./configs/prompts/animation.yaml"
|
74 |
config = OmegaConf.load(config_path)
|
75 |
if config.weight_dtype == "fp16":
|
@@ -157,9 +154,8 @@ def select_face(det_bboxes, probs):
|
|
157 |
sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
|
158 |
return sorted_bboxes[0]
|
159 |
|
160 |
-
|
161 |
-
|
162 |
def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
|
|
|
163 |
if seed is not None and seed > -1:
|
164 |
generator = torch.manual_seed(seed)
|
165 |
else:
|
@@ -179,7 +175,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
|
|
179 |
r_pad = int((re - rb) * facemask_dilation_ratio)
|
180 |
c_pad = int((ce - cb) * facemask_dilation_ratio)
|
181 |
face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
|
182 |
-
|
183 |
#### face crop
|
184 |
r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
|
185 |
c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
|
@@ -191,7 +187,7 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
|
|
191 |
|
192 |
ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
|
193 |
face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
|
194 |
-
|
195 |
video = pipe(
|
196 |
ref_image_pil,
|
197 |
uploaded_audio,
|
@@ -223,13 +219,14 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
|
|
223 |
.margin(right=8, bottom=8, opacity=0) # ๋ง์ง ๋ฐ ํฌ๋ช
๋ ์ค์
|
224 |
.set_pos(("right", "bottom"))) # ์์น ์ค์
|
225 |
|
226 |
-
final_clip =
|
227 |
|
228 |
# APP.PY์ ๋์ผํ ๊ฒฝ๋ก์ ์์น์ํค๊ธฐ
|
229 |
final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
|
230 |
final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
|
231 |
|
232 |
return final_output_path
|
|
|
233 |
|
234 |
with gr.Blocks() as demo:
|
235 |
gr.Markdown('# Mimic FACE')
|
|
|
21 |
from src.pipelines.pipeline_echo_mimic import Audio2VideoPipeline
|
22 |
from src.utils.util import save_videos_grid, crop_and_pad
|
23 |
from src.models.face_locator import FaceLocator
|
24 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, vfx
|
25 |
from facenet_pytorch import MTCNN
|
26 |
import argparse
|
27 |
|
28 |
import gradio as gr
|
29 |
|
30 |
import huggingface_hub
|
|
|
|
|
31 |
|
32 |
huggingface_hub.snapshot_download(
|
33 |
repo_id='BadToBest/EchoMimic',
|
|
|
67 |
print("add ffmpeg to path")
|
68 |
os.environ["PATH"] = f"{ffmpeg_path}:{os.environ['PATH']}"
|
69 |
|
|
|
70 |
config_path = "./configs/prompts/animation.yaml"
|
71 |
config = OmegaConf.load(config_path)
|
72 |
if config.weight_dtype == "fp16":
|
|
|
154 |
sorted_bboxes = sorted(filtered_bboxes, key=lambda x:(x[3]-x[1]) * (x[2] - x[0]), reverse=True)
|
155 |
return sorted_bboxes[0]
|
156 |
|
|
|
|
|
157 |
def process_video(uploaded_img, uploaded_audio, width, height, length, seed, facemask_dilation_ratio, facecrop_dilation_ratio, context_frames, context_overlap, cfg, steps, sample_rate, fps, device):
|
158 |
+
|
159 |
if seed is not None and seed > -1:
|
160 |
generator = torch.manual_seed(seed)
|
161 |
else:
|
|
|
175 |
r_pad = int((re - rb) * facemask_dilation_ratio)
|
176 |
c_pad = int((ce - cb) * facemask_dilation_ratio)
|
177 |
face_mask[rb - r_pad : re + r_pad, cb - c_pad : ce + c_pad] = 255
|
178 |
+
|
179 |
#### face crop
|
180 |
r_pad_crop = int((re - rb) * facecrop_dilation_ratio)
|
181 |
c_pad_crop = int((ce - cb) * facecrop_dilation_ratio)
|
|
|
187 |
|
188 |
ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
|
189 |
face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
|
190 |
+
|
191 |
video = pipe(
|
192 |
ref_image_pil,
|
193 |
uploaded_audio,
|
|
|
219 |
.margin(right=8, bottom=8, opacity=0) # ๋ง์ง ๋ฐ ํฌ๋ช
๋ ์ค์
|
220 |
.set_pos(("right", "bottom"))) # ์์น ์ค์
|
221 |
|
222 |
+
final_clip = video_clip.set_audio(audio_clip).fx(vfx.composite, watermark)
|
223 |
|
224 |
# APP.PY์ ๋์ผํ ๊ฒฝ๋ก์ ์์น์ํค๊ธฐ
|
225 |
final_output_path = Path(__file__).parent / "output_video_with_audio.mp4"
|
226 |
final_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
|
227 |
|
228 |
return final_output_path
|
229 |
+
|
230 |
|
231 |
with gr.Blocks() as demo:
|
232 |
gr.Markdown('# Mimic FACE')
|