File size: 2,628 Bytes
8e5c8d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
from gradio_client import Client

def get_speech(text, voice):
    try:
        client = Client("https://collabora-whisperspeech.hf.space/")
        result = client.predict(
            text,   # str  in 'Enter multilingual text💬📝' Textbox component
            voice,  # filepath  in 'Upload or Record Speaker Audio (optional)🌬️💬' Audio component
            "",     # str  in 'alternatively, you can paste in an audio file URL:' Textbox component
            14,     # float (numeric value between 10 and 15) in 'Tempo (in characters per second)' Slider component
            api_name="/whisper_speech_demo"
        )
        print(result)
        return result
    except ValueError as e:
        raise gr.Error(f"Error in get_speech: {str(e)}")

def get_dreamtalk(image_in, speech):
    try:
        client = Client("https://fffiloni-dreamtalk.hf.space/")
        result = client.predict(
            speech, # filepath  in 'Audio input' Audio component
            image_in,   # filepath  in 'Image' Image component
            "M030_front_neutral_level1_001.mat",   # Literal in 'emotional style' Dropdown component
            api_name="/infer"
        )
        print(result)
        return result['video']
    except ValueError as e:
        raise gr.Error(f"Error in get_dreamtalk: {str(e)}")

def pipe(text, voice, image_in):
    try:
        speech = get_speech(text, voice)
        video = get_dreamtalk(image_in, speech)
        return video
    except Exception as e:
        raise gr.Error(f"An error occurred while processing: {str(e)}")

with gr.Blocks() as demo:
    with gr.Column():
        gr.HTML("""
        <h1 style="text-align: center;">
        Talking Image 
        </h1>
        <p style="text-align: center;"></p>
        <h3 style="text-align: center;">
        Clone your voice and make your photos speak. 
        </h3>
        <p style="text-align: center;"></p>
        """)
        with gr.Row():
            with gr.Column():
                image_in = gr.Image(label="Portrait IN", type="filepath", value="./creatus.jpg")
            with gr.Column():
                voice = gr.Audio(type="filepath", label="Upload or Record Speaker audio (Optional voice cloning)")
                text = gr.Textbox(label="text")
                submit_btn = gr.Button('Submit')
            with gr.Column():
                video_o = gr.Video(label="Video result")
    submit_btn.click(
        fn=pipe,
        inputs=[text, voice, image_in],
        outputs=[video_o],
        concurrency_limit=3
    )
demo.queue(max_size=10).launch(show_error=True, show_api=False)