File size: 4,551 Bytes
2b34e02 3d4bc6a 2b34e02 1b9f93d cd6d6d8 2b34e02 50ddafd 2b34e02 cd6d6d8 2b34e02 cd6d6d8 2b34e02 cd6d6d8 2b34e02 eca9dba 2b34e02 eae1cca 2b34e02 cb78db8 41e07ce cb78db8 2b34e02 41e07ce 2b34e02 41e07ce 2b34e02 aa8730f eae1cca 2b34e02 5484118 7e23819 2b34e02 5484118 2b34e02 eae1cca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import glob
import spaces
from natsort import natsorted
import gradio as gr
from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink
@spaces.GPU
def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
model = init_model()
pose = input_pose(pose_select)
emotion = input_emotion(emotion_select)
# Convert audio with error handling
print(audio_path, input_vid)
result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav")
if result != 0:
raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.")
# Check if output file exists
if not os.path.exists("2_output.wav"):
raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.")
blink = input_blink(blink_select)
print("input_vid: ", input_vid)
# Perform inference
try:
result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink)
except Exception as e:
raise RuntimeError(f"Inference failed: {e}")
print("result: ", result)
print("finished!")
return result # , gr.Group.update(visible=True)
available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]
# prepare audio
for video in available_videos:
audio = video.replace(".mp4", ".wav")
if not os.path.exists(os.path.join("./assets/audios/", audio)):
os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
Free-View Expressive Talking Head Video Editing
</h1>
<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
<a style="text-align: center; display:inline-block"
href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
alt="Project Page">
</a>
<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
</a>
</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub.
</p>
"""
)
with gr.Column(elem_id="col-container"):
with gr.Row():
with gr.Column():
# select and preview video from a list of examples
video_preview = gr.Video(label="Video Preview", elem_id="video-preview")
audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath")
pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
# with gr.Row():
with gr.Column():
video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
submit_btn = gr.Button("Generate video")
inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select]
outputs = [video_out]
submit_btn.click(process, inputs, outputs)
demo.queue(max_size=10).launch()
|