Spaces:
Running
Running
File size: 4,466 Bytes
63f899c 59d9186 63f899c 59d9186 63f899c ca33a23 bd786ec c39b894 63f899c 8c79f6a a6075c0 59d9186 63f899c 59d9186 751c5b7 59d9186 63f899c 59d9186 63f899c 59d9186 63f899c a6075c0 baa1646 a6075c0 bd786ec a6075c0 e4e4cf1 a6075c0 e4e4cf1 a6075c0 c39b894 a6075c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid
is_shared_ui = True if "fudan-generative-ai/hallo" in os.environ['SPACE_ID'] else False
if(not is_shared_ui):
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
if is_shared_ui:
raise gr.Error("This Space only works in duplicated instances")
unique_id = uuid.uuid4()
args = argparse.Namespace(
config='configs/inference/default.yaml',
source_image=source_image,
driving_audio=driving_audio,
output=f'output-{unique_id}.mp4',
pose_weight=1.0,
face_weight=1.0,
lip_weight=1.0,
face_expand_ratio=1.2,
checkpoint=None
)
inference_process(args)
return f'output-{unique_id}.mp4'
css = '''
div#warning-ready {
background-color: #ecfdf5;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
color: #057857!important;
}
div#warning-duplicate {
background-color: #ebf5ff;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
color: #0f4592!important;
}
div#warning-duplicate strong {
color: #0f4592;
}
p.actions {
display: flex;
align-items: center;
margin: 20px 0;
}
div#warning-duplicate .actions a {
display: inline-block;
margin-right: 10px;
}
.dark #warning-duplicate {
background-color: #0c0c0c !important;
border: 1px solid white !important;
}
'''
with gr.Blocks(css=css) as demo:
if is_shared_ui:
top_description = gr.HTML(f'''
<div class="gr-prose">
<h2 class="custom-color"><svg xmlns="http://www.w3.org/2000/svg" width="18px" height="18px" style="margin-right: 0px;display: inline-block;"fill="none"><path fill="#fff" d="M7 13.2a6.3 6.3 0 0 0 4.4-10.7A6.3 6.3 0 0 0 .6 6.9 6.3 6.3 0 0 0 7 13.2Z"/><path fill="#fff" fill-rule="evenodd" d="M7 0a6.9 6.9 0 0 1 4.8 11.8A6.9 6.9 0 0 1 0 7 6.9 6.9 0 0 1 7 0Zm0 0v.7V0ZM0 7h.6H0Zm7 6.8v-.6.6ZM13.7 7h-.6.6ZM9.1 1.7c-.7-.3-1.4-.4-2.2-.4a5.6 5.6 0 0 0-4 1.6 5.6 5.6 0 0 0-1.6 4 5.6 5.6 0 0 0 1.6 4 5.6 5.6 0 0 0 4 1.7 5.6 5.6 0 0 0 4-1.7 5.6 5.6 0 0 0 1.7-4 5.6 5.6 0 0 0-1.7-4c-.5-.5-1.1-.9-1.8-1.2Z" clip-rule="evenodd"/><path fill="#000" fill-rule="evenodd" d="M7 2.9a.8.8 0 1 1 0 1.5A.8.8 0 0 1 7 3ZM5.8 5.7c0-.4.3-.6.6-.6h.7c.3 0 .6.2.6.6v3.7h.5a.6.6 0 0 1 0 1.3H6a.6.6 0 0 1 0-1.3h.4v-3a.6.6 0 0 1-.6-.7Z" clip-rule="evenodd"/></svg>
Attention: this Space need to be duplicated to work</h2>
<p class="main-message custom-color">
To make it work, <strong>duplicate the Space</strong> and run it on your own profile using a <strong>private</strong> GPU.<br />
An L4 costs <strong>US$0.80/h</strong>
</p>
<p class="actions custom-color">
<a href="https://huggingface.co/spaces/{os.environ['SPACE_ID']}?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg-dark.svg" alt="Duplicate this Space" />
</a>
to start generate your talking head
</p>
</div>
''', elem_id="warning-duplicate")
gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab")
with gr.Row():
with gr.Column():
avatar_face = gr.Image(type="filepath", label="Face")
driving_audio = gr.Audio(type="filepath", label="Driving audio")
generate = gr.Button("Generate")
with gr.Column():
output_video = gr.Video(label="Your talking head")
generate.click(
fn=run_inference,
inputs=[avatar_face, driving_audio],
outputs=output_video
)
demo.launch() |