DATA: | |
dataset: multi | |
data_root: sample_dataset | |
wav_path: wav | |
vertices_path: npy | |
template_file: templates.pkl | |
train_subjects: Arabic English French German Greek Italian Portuguese Russian Spanish Korean Mandarin Japanese | |
NETWORK: | |
arch: stage2 | |
in_dim: 15069 | |
hidden_size: 1024 | |
num_hidden_layers: 6 | |
num_attention_heads: 8 | |
intermediate_size: 1536 | |
window_size: 1 | |
quant_factor: 0 | |
face_quan_num: 16 | |
neg: 0.2 | |
autoencoder: stage1_vocaset | |
INaffine: False | |
style_emb_method: nnemb # onehot or nnemb | |
VQuantizer: | |
n_embed: 256 | |
zquant_dim: 64 | |
PREDICTOR: | |
feature_dim: 1024 | |
vertice_dim: 15069 | |
device: cuda | |
period: 25 | |
vqvae_pretrained_path: checkpoints/stage1.pth.tar | |
wav2vec2model_path: facebook/wav2vec2-large-xlsr-53 | |
teacher_forcing: True | |
num_layers: 6 | |
n_head: 4 # not used | |
DEMO: | |
model_path: checkpoints/stage2.pth.tar | |
#condition: False #if false, the waveform file has the cue for the type of language | |
condition: English | |
subject: id | |
demo_wav_dir_path: demo/input/ | |
demo_output_path: demo/output/ | |
fps: 25 | |
background_black: True # chose the background color of your rendered video |