import os import glob import spaces from natsort import natsorted import gradio as gr from inference_util import init_model, infenrece from attributtes_utils import input_pose, input_emotion, input_blink @spaces.GPU def process(input_vid, audio_path, pose_select, emotion_select, blink_select): model = init_model() pose = input_pose(pose_select) emotion = input_emotion(emotion_select) # Convert audio with error handling print(audio_path, input_vid) result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav") if result != 0: raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.") # Check if output file exists if not os.path.exists("2_output.wav"): raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.") blink = input_blink(blink_select) print("input_vid: ", input_vid) # Perform inference try: result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink) except Exception as e: raise RuntimeError(f"Inference failed: {e}") print("result: ", result) print("finished!") return result # , gr.Group.update(visible=True) available_videos = natsorted(glob.glob("./assets/videos/*.mp4")) available_videos = [os.path.basename(x) for x in available_videos] # prepare audio for video in available_videos: audio = video.replace(".mp4", ".wav") if not os.path.exists(os.path.join("./assets/audios/", audio)): os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}") available_audios = natsorted(glob.glob("./assets/audios/*.wav")) available_audios = [os.path.basename(x) for x in available_audios] with gr.Blocks() as demo: gr.HTML( """

Free-View Expressive Talking Head Video Editing

If you wish to use your custom input files, please duplicate this space or clone it to your local environment.

Alternatively, you can check our official repository on GitHub.

""" ) with gr.Column(elem_id="col-container"): with gr.Row(): with gr.Column(): # select and preview video from a list of examples video_preview = gr.Video(label="Video Preview", elem_id="video-preview") audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath") pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front") emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral") blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes") # with gr.Row(): with gr.Column(): video_out = gr.Video(label="Video Output", elem_id="video-output", height=360) submit_btn = gr.Button("Generate video") inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select] outputs = [video_out] submit_btn.click(process, inputs, outputs) demo.queue(max_size=10).launch()