hungchiayu commited on
Commit
66c32b2
·
verified ·
1 Parent(s): 1e36220

Create jam_infer.yaml

Browse files
Files changed (1) hide show
  1. jam_infer.yaml +66 -0
jam_infer.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ project_root: "."
2
+ evaluation:
3
+ checkpoint_path: ""
4
+ output_dir: "outputs"
5
+ test_set_path: "inputs/input.json"
6
+ negative_style_prompt: ${project_root}/public/vocal.npy
7
+ num_samples: null
8
+ batch_size: 1
9
+ random_crop_style: false
10
+ vae_type: 'diffrhythm'
11
+ num_style_secs: 30
12
+ ignore_style: false
13
+ use_prompt_style: false
14
+
15
+ dataset:
16
+ pattern: "placeholder"
17
+ shuffle: false
18
+ resample_by_duration_threshold: null
19
+ always_crop_from_beginning: true
20
+ always_use_style_index: 0
21
+
22
+ sample_kwargs:
23
+ cfg_range:
24
+ - 0.05
25
+ - 1
26
+ dual_cfg:
27
+ - 4.7
28
+ - 2.5
29
+ steps: 50
30
+
31
+ model:
32
+ num_channels: 64
33
+ cfm:
34
+ max_frames: ${max_frames}
35
+ num_channels: ${model.num_channels}
36
+ dual_drop_prob: [0.1, 0.5]
37
+ no_edit: true
38
+
39
+ dit:
40
+ max_frames: ${max_frames}
41
+ mel_dim: ${model.num_channels}
42
+ dim: 1408
43
+ depth: 16
44
+ heads: 32
45
+ ff_mult: 4
46
+ text_dim: 512
47
+ conv_layers: 4
48
+ grad_ckpt: true
49
+ use_implicit_duration: true
50
+
51
+ data:
52
+ train_dataset:
53
+ max_frames: ${max_frames}
54
+ multiple_styles: true
55
+ sampling_rate: 44100
56
+ shuffle: true
57
+ silence_latent_path: ${project_root}/public/silience_latent.pt
58
+ tokenizer_path: ${project_root}/public/en_us_cmudict_ipa_forward.pt
59
+ lrc_upsample_factor: ${lrc_upsample_factor}
60
+ filler: average_sparse
61
+ phonemizer_checkpoint: ${project_root}/public/en_us_cmudict_ipa_forward.pt
62
+
63
+ # General settings
64
+ max_frames: 5000
65
+ lrc_upsample_factor: 4
66
+ seed: 42