guardiancc's picture
Update app.py
7e23819 verified
import os
import glob
import spaces
from natsort import natsorted
import gradio as gr
from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink
@spaces.GPU
def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
model = init_model()
pose = input_pose(pose_select)
emotion = input_emotion(emotion_select)
# Convert audio with error handling
print(audio_path, input_vid)
result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav")
if result != 0:
raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.")
# Check if output file exists
if not os.path.exists("2_output.wav"):
raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.")
blink = input_blink(blink_select)
print("input_vid: ", input_vid)
# Perform inference
try:
result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink)
except Exception as e:
raise RuntimeError(f"Inference failed: {e}")
print("result: ", result)
print("finished!")
return result # , gr.Group.update(visible=True)
available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]
# prepare audio
for video in available_videos:
audio = video.replace(".mp4", ".wav")
if not os.path.exists(os.path.join("./assets/audios/", audio)):
os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]
with gr.Blocks() as demo:
gr.HTML(
"""
<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
Free-View Expressive Talking Head Video Editing
</h1>
<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
<a style="text-align: center; display:inline-block"
href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
alt="Project Page">
</a>
<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
</a>
</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p>
<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub.
</p>
"""
)
with gr.Column(elem_id="col-container"):
with gr.Row():
with gr.Column():
# select and preview video from a list of examples
video_preview = gr.Video(label="Video Preview", elem_id="video-preview")
audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath")
pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
# with gr.Row():
with gr.Column():
video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
submit_btn = gr.Button("Generate video")
inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select]
outputs = [video_out]
submit_btn.click(process, inputs, outputs)
demo.queue(max_size=10).launch()