Spaces:
Running
Running
import os | |
import shutil | |
from huggingface_hub import snapshot_download | |
import gradio as gr | |
os.chdir(os.path.dirname(os.path.abspath(__file__))) | |
from scripts.inference import inference_process | |
import argparse | |
import uuid | |
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") | |
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): | |
unique_id = uuid.uuid4() | |
args = argparse.Namespace( | |
config='configs/inference/default.yaml', | |
source_image=source_image, | |
driving_audio=driving_audio, | |
output=f'output-{unique_id}.mp4', | |
pose_weight=1.0, | |
face_weight=1.0, | |
lip_weight=1.0, | |
face_expand_ratio=1.2, | |
checkpoint=None | |
) | |
inference_process(args) | |
return f'output-{unique_id}.mp4' | |
iface = gr.Interface( | |
title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation", | |
description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab", | |
fn=run_inference, | |
inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")], | |
cache_examples=False, | |
outputs="video" | |
) | |
iface.launch(share=True) |