Spaces:
Running
on
L4
Running
on
L4
File size: 4,816 Bytes
cff9535 00505e3 cff9535 2299694 a86a2b8 cff9535 a86a2b8 cff9535 a86a2b8 cff9535 9beb764 a86a2b8 9beb764 a86a2b8 9beb764 cff9535 a86a2b8 cff9535 a86a2b8 2ad9d5d cff9535 a86a2b8 cff9535 a86a2b8 cff9535 a22eb82 cff9535 a22eb82 0f9de5e cff9535 9801f4f a22eb82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os, sys
import tempfile
import gradio as gr
from modules.text2speech import text2speech
from modules.sadtalker_test import SadTalker
def get_driven_audio(audio):
if os.path.isfile(audio):
return audio
else:
save_path = tempfile.NamedTemporaryFile(
delete=False,
suffix=("." + "wav"),
)
gen_audio = text2speech(audio, save_path.name)
return gen_audio, gen_audio
def get_source_image(image):
return image
def sadtalker_demo(result_dir='./tmp/'):
sad_talker = SadTalker()
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
with gr.Row():
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_source_image"):
with gr.TabItem('Upload image'):
with gr.Row():
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256)
with gr.Tabs(elem_id="sadtalker_driven_audio"):
with gr.TabItem('Upload audio(wav/mp3 only currently)'):
with gr.Column(variant='panel'):
driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath")
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Settings'):
with gr.Column(variant='panel'):
is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion)").style(container=True)
is_resize_mode = gr.Checkbox(label="Resize Mode (⚠️ Resize mode need manually crop the image firstly, can handle larger image crop)").style(container=True)
is_enhance_mode = gr.Checkbox(label="Enhance Mode (better face quality )").style(container=True)
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
with gr.Tabs(elem_id="sadtalker_genearted"):
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
gen_text = gr.Textbox(visible=False)
with gr.Row():
examples = [
[
'examples/source_image/art_10.png',
'examples/driven_audio/deyu.wav',
True,
False,
False
],
[
'examples/source_image/art_1.png',
'examples/driven_audio/fayu.wav',
True,
True,
False
],
[
'examples/source_image/art_9.png',
'examples/driven_audio/itosinger1.wav',
True,
False,
True
]
]
gr.Examples(examples=examples,
inputs=[
source_image,
driven_audio,
is_still_mode,
is_resize_mode,
is_enhance_mode,
gr.Textbox(value=result_dir, visible=False)],
outputs=[gen_video, gen_text],
fn=sad_talker.test,
cache_examples=os.getenv('SYSTEM') == 'spaces')
submit.click(
fn=sad_talker.test,
inputs=[source_image,
driven_audio,
is_still_mode,
is_resize_mode,
is_enhance_mode,
gr.Textbox(value=result_dir, visible=False)],
outputs=[gen_video, gen_text]
)
return sadtalker_interface
if __name__ == "__main__":
sadtalker_result_dir = os.path.join('./', 'results')
demo = sadtalker_demo(sadtalker_result_dir)
demo.launch()
|