Free-View_Expressive_Talking_Head_Video_Editing

Running

Free-View_Expressive_Talking_Head_Video_Editing

File size: 4,551 Bytes

2b34e02
 
3d4bc6a
2b34e02
 
 
 
 
 
 
1b9f93d
cd6d6d8
2b34e02
50ddafd
2b34e02
 
cd6d6d8
 
 
 
 
 
 
 
 
 
 
2b34e02
 
cd6d6d8
 
 
 
 
 
 
2b34e02
cd6d6d8
2b34e02
 
 
 
 
 
 
 
 
 
 
eca9dba
2b34e02
 
 
 
 
 
eae1cca
2b34e02
 
 
 
cb78db8
41e07ce
 
cb78db8
2b34e02
41e07ce
2b34e02
41e07ce
2b34e02
aa8730f
 
 
 
 
eae1cca
 
2b34e02
 
 
 
5484118
7e23819
2b34e02
 
 
 
 
 
 
 
5484118
2b34e02
 
 
 
eae1cca

import os
import glob
import spaces
from natsort import natsorted
import gradio as gr

from inference_util import init_model, infenrece
from attributtes_utils import input_pose, input_emotion, input_blink


@spaces.GPU

def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
    model = init_model()
    pose = input_pose(pose_select)
    emotion = input_emotion(emotion_select)
    
    # Convert audio with error handling
    print(audio_path, input_vid)
    result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav")
    if result != 0:
        raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.")
    
    # Check if output file exists
    if not os.path.exists("2_output.wav"):
        raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.")
    
    blink = input_blink(blink_select)
    print("input_vid: ", input_vid)
    
    # Perform inference
    try:
        result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink)
    except Exception as e:
        raise RuntimeError(f"Inference failed: {e}")
    
    print("result: ", result)
    print("finished!")

    return result  # , gr.Group.update(visible=True)


available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
available_videos = [os.path.basename(x) for x in available_videos]

# prepare audio
for video in available_videos:
    audio = video.replace(".mp4", ".wav")
    if not os.path.exists(os.path.join("./assets/audios/", audio)):
        os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
available_audios = [os.path.basename(x) for x in available_audios]


with gr.Blocks() as demo:
    gr.HTML(
        """
            <h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
                Free-View Expressive Talking Head Video Editing
            </h1>
            <p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
                <a style="text-align: center; display:inline-block"
                    href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
                    alt="Project Page">
                </a>
                <a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
                    <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
                </a>
            </p>
            <p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
                If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p>
            <p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
                Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub.
            </p>
            """
    )
    with gr.Column(elem_id="col-container"):
        with gr.Row():
            with gr.Column():
                # select and preview video from a list of examples
                video_preview = gr.Video(label="Video Preview", elem_id="video-preview")
                audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath")
                pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
                emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
                blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
                # with gr.Row():
            with gr.Column():
                video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
                submit_btn = gr.Button("Generate video")

        inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select]
        outputs = [video_out]

    submit_btn.click(process, inputs, outputs)

demo.queue(max_size=10).launch()