JAM-0.5 / jam_infer.yaml
hungchiayu's picture
Create jam_infer.yaml
66c32b2 verified
project_root: "."
evaluation:
checkpoint_path: ""
output_dir: "outputs"
test_set_path: "inputs/input.json"
negative_style_prompt: ${project_root}/public/vocal.npy
num_samples: null
batch_size: 1
random_crop_style: false
vae_type: 'diffrhythm'
num_style_secs: 30
ignore_style: false
use_prompt_style: false
dataset:
pattern: "placeholder"
shuffle: false
resample_by_duration_threshold: null
always_crop_from_beginning: true
always_use_style_index: 0
sample_kwargs:
cfg_range:
- 0.05
- 1
dual_cfg:
- 4.7
- 2.5
steps: 50
model:
num_channels: 64
cfm:
max_frames: ${max_frames}
num_channels: ${model.num_channels}
dual_drop_prob: [0.1, 0.5]
no_edit: true
dit:
max_frames: ${max_frames}
mel_dim: ${model.num_channels}
dim: 1408
depth: 16
heads: 32
ff_mult: 4
text_dim: 512
conv_layers: 4
grad_ckpt: true
use_implicit_duration: true
data:
train_dataset:
max_frames: ${max_frames}
multiple_styles: true
sampling_rate: 44100
shuffle: true
silence_latent_path: ${project_root}/public/silience_latent.pt
tokenizer_path: ${project_root}/public/en_us_cmudict_ipa_forward.pt
lrc_upsample_factor: ${lrc_upsample_factor}
filler: average_sparse
phonemizer_checkpoint: ${project_root}/public/en_us_cmudict_ipa_forward.pt
# General settings
max_frames: 5000
lrc_upsample_factor: 4
seed: 42