Free-View_Expressive_Talking_Head_Video_Editing

Running

App Files Files Community

Free-View_Expressive_Talking_Head_Video_Editing / app.py

guardiancc

Update app.py

7e23819 verified 6 months ago

raw

history blame contribute delete

4.55 kB

	import os
	import glob
	import spaces
	from natsort import natsorted
	import gradio as gr

	from inference_util import init_model, infenrece
	from attributtes_utils import input_pose, input_emotion, input_blink


	@spaces.GPU

	def process(input_vid, audio_path, pose_select, emotion_select, blink_select):
	model = init_model()
	pose = input_pose(pose_select)
	emotion = input_emotion(emotion_select)

	# Convert audio with error handling
	print(audio_path, input_vid)
	result = os.system(f"ffmpeg -y -loglevel error -i {audio_path} -vn -acodec pcm_s16le -ar 16000 -ac 1 2_output.wav")
	if result != 0:
	raise RuntimeError("Failed to execute ffmpeg command. Please check the input audio file.")

	# Check if output file exists
	if not os.path.exists("2_output.wav"):
	raise FileNotFoundError("2_output.wav was not created. Check the ffmpeg command and input file.")

	blink = input_blink(blink_select)
	print("input_vid: ", input_vid)

	# Perform inference
	try:
	result = infenrece(model, input_vid, "2_output.wav", pose, emotion, blink)
	except Exception as e:
	raise RuntimeError(f"Inference failed: {e}")

	print("result: ", result)
	print("finished!")

	return result # , gr.Group.update(visible=True)


	available_videos = natsorted(glob.glob("./assets/videos/*.mp4"))
	available_videos = [os.path.basename(x) for x in available_videos]

	# prepare audio
	for video in available_videos:
	audio = video.replace(".mp4", ".wav")
	if not os.path.exists(os.path.join("./assets/audios/", audio)):
	os.system(f"ffmpeg -y -loglevel error -i ./assets/videos/{video} -vn -acodec pcm_s16le -ar 16000 -ac 1 ./assets/audios/{audio}")
	available_audios = natsorted(glob.glob("./assets/audios/*.wav"))
	available_audios = [os.path.basename(x) for x in available_audios]


	with gr.Blocks() as demo:
	gr.HTML(
	"""
	<h1 style="text-align: center; font-size: 40px; font-family: 'Times New Roman', Times, serif;">
	Free-View Expressive Talking Head Video Editing
	</h1>
	<p style="text-align: center; font-size: 20px; font-family: 'Times New Roman', Times, serif;">
	<a style="text-align: center; display:inline-block"
	href="https://sky24h.github.io/websites/icassp2023_free-view_video-editing">
	<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/paper-page-sm.svg#center"
	alt="Project Page">
	</a>
	<a style="text-align: center; display:inline-block" href="https://huggingface.co/spaces/sky24h/Free-View_Expressive_Talking_Head_Video_Editing?duplicate=true">
	<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center" alt="Duplicate Space">
	</a>
	</p>
	<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
	If you wish to use your custom input files, please duplicate this space or clone it to your local environment.</p>
	<p style="text-align: center; font-size: 16px; font-family: 'Times New Roman', Times, serif;">
	Alternatively, you can check our official <a href="https://github.com/sky24h/Free-View_Expressive_Talking_Head_Video_Editing">repository</a> on GitHub.
	</p>
	"""
	)
	with gr.Column(elem_id="col-container"):
	with gr.Row():
	with gr.Column():
	# select and preview video from a list of examples
	video_preview = gr.Video(label="Video Preview", elem_id="video-preview")
	audio_preview = gr.Audio(label="Audio Preview", elem_id="audio-preview", type="filepath")
	pose_select = gr.Radio(["front", "left_right_shaking"], label="Pose", value="front")
	emotion_select = gr.Radio(["neutral", "happy", "angry", "surprised"], label="Emotion", value="neutral")
	blink_select = gr.Radio(["yes", "no"], label="Blink", value="yes")
	# with gr.Row():
	with gr.Column():
	video_out = gr.Video(label="Video Output", elem_id="video-output", height=360)
	submit_btn = gr.Button("Generate video")

	inputs = [video_preview, audio_preview, pose_select, emotion_select, blink_select]
	outputs = [video_out]

	submit_btn.click(process, inputs, outputs)

	demo.queue(max_size=10).launch()