import os import glob import spaces from natsort import natsorted import gradio as gr from inference_util import init_model, infenrece from attributtes_utils import input_pose, input_emotion, input_blink @spaces.GPU def process(input_vid, audio_path, pose_select, emotion_select, blink_select): model = init_model() pose = input_pose(pose_select) emotion = input_emotion(emotion_select) # Convert audio with error handling print(audio_path, input_vid) result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav") if result != 0: raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.") # Check if output file exists if not os.path.exists("2_output.wav"): raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.") blink = input_blink(blink_select) print("input_vid: ", input_vid) # Perform inference try: result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink) except Exception as e: raise RuntimeError(f"Inference failed: {e}") print("result: ", result) print("finished!") return result # , gr.Group.update(visible=True) available_videos = natsorted(glob.glob("./assets/videos/*.mp4")) available_videos = [os.path.basename(x) for x in available_videos] # prepare audio for video in available_videos: audio = video.replace(".mp4", ".wav") if not os.path.exists(os.path.join("./assets/audios/", audio)): os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}") available_audios = natsorted(glob.glob("./assets/audios/*.wav")) available_audios = [os.path.basename(x) for x in available_audios] with gr.Blocks() as demo: gr.HTML( """
If you wish to use your custom input files, please duplicate this space or clone it to your local environment.
Alternatively, you can check our official repository on GitHub.
""" ) with gr.Column(elem_id="col-container"): with gr.Row(): with gr.Column(): # select and preview video from a list of examples video_preview = gr.Video(label="Video Preview", elem_id="video-preview") audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath") pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front") emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral") blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes") # with gr.Row(): with gr.Column(): video_out = gr.Video(label="Video Output", elem_id="video-output", height=360) submit_btn = gr.Button("Generate video") inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select] outputs = [video_out] submit_btn.click(process, inputs, outputs) demo.queue(max_size=10).launch()