|
source_image: examples/reference_images/1.jpg
|
|
driving_audio: examples/driving_audios/1.wav
|
|
|
|
weight_dtype: fp16
|
|
|
|
data:
|
|
n_motion_frames: 2
|
|
n_sample_frames: 16
|
|
source_image:
|
|
width: 512
|
|
height: 512
|
|
driving_audio:
|
|
sample_rate: 16000
|
|
export_video:
|
|
fps: 25
|
|
|
|
inference_steps: 40
|
|
cfg_scale: 3.5
|
|
|
|
audio_ckpt_dir: ./pretrained_models/hallo
|
|
|
|
base_model_path: ./pretrained_models/stable-diffusion-v1-5
|
|
|
|
motion_module_path: ./pretrained_models/motion_module/mm_sd_v15_v2.ckpt
|
|
|
|
face_analysis:
|
|
model_path: ./pretrained_models/face_analysis
|
|
|
|
wav2vec:
|
|
model_path: ./pretrained_models/wav2vec/wav2vec2-base-960h
|
|
features: all
|
|
|
|
audio_separator:
|
|
model_path: ./pretrained_models/audio_separator/Kim_Vocal_2.onnx
|
|
|
|
vae:
|
|
model_path: ./pretrained_models/sd-vae-ft-mse
|
|
|
|
save_path: ./.cache
|
|
|
|
face_expand_ratio: 1.2
|
|
pose_weight: 1.0
|
|
face_weight: 1.0
|
|
lip_weight: 1.0
|
|
|
|
unet_additional_kwargs:
|
|
use_inflated_groupnorm: true
|
|
unet_use_cross_frame_attention: false
|
|
unet_use_temporal_attention: false
|
|
use_motion_module: true
|
|
use_audio_module: true
|
|
motion_module_resolutions:
|
|
- 1
|
|
- 2
|
|
- 4
|
|
- 8
|
|
motion_module_mid_block: true
|
|
motion_module_decoder_only: false
|
|
motion_module_type: Vanilla
|
|
motion_module_kwargs:
|
|
num_attention_heads: 8
|
|
num_transformer_block: 1
|
|
attention_block_types:
|
|
- Temporal_Self
|
|
- Temporal_Self
|
|
temporal_position_encoding: true
|
|
temporal_position_encoding_max_len: 32
|
|
temporal_attention_dim_div: 1
|
|
audio_attention_dim: 768
|
|
stack_enable_blocks_name:
|
|
- "up"
|
|
- "down"
|
|
- "mid"
|
|
stack_enable_blocks_depth: [0,1,2,3]
|
|
|
|
|
|
enable_zero_snr: true
|
|
|
|
noise_scheduler_kwargs:
|
|
beta_start: 0.00085
|
|
beta_end: 0.012
|
|
beta_schedule: "linear"
|
|
clip_sample: false
|
|
steps_offset: 1
|
|
|
|
prediction_type: "v_prediction"
|
|
rescale_betas_zero_snr: True
|
|
timestep_spacing: "trailing"
|
|
|
|
sampler: DDIM
|
|
|