None1145 commited on
Commit
8ab304b
·
verified ·
1 Parent(s): 96ba2ba

Delete configs_template

Browse files
configs_template/config_template.json DELETED
@@ -1,79 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 200,
4
- "eval_interval": 800,
5
- "seed": 1234,
6
- "epochs": 10000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 6,
14
- "fp16_run": false,
15
- "half_type": "fp16",
16
- "lr_decay": 0.999875,
17
- "segment_size": 10240,
18
- "init_lr_ratio": 1,
19
- "warmup_epochs": 0,
20
- "c_mel": 45,
21
- "c_kl": 1.0,
22
- "use_sr": true,
23
- "max_speclen": 512,
24
- "port": "8001",
25
- "keep_ckpts": 3,
26
- "all_in_mem": false,
27
- "vol_aug":false
28
- },
29
- "data": {
30
- "training_files": "filelists/train.txt",
31
- "validation_files": "filelists/val.txt",
32
- "max_wav_value": 32768.0,
33
- "sampling_rate": 44100,
34
- "filter_length": 2048,
35
- "hop_length": 512,
36
- "win_length": 2048,
37
- "n_mel_channels": 80,
38
- "mel_fmin": 0.0,
39
- "mel_fmax": 22050,
40
- "unit_interpolate_mode":"nearest"
41
- },
42
- "model": {
43
- "inter_channels": 192,
44
- "hidden_channels": 192,
45
- "filter_channels": 768,
46
- "n_heads": 2,
47
- "n_layers": 6,
48
- "kernel_size": 3,
49
- "p_dropout": 0.1,
50
- "resblock": "1",
51
- "resblock_kernel_sizes": [3,7,11],
52
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
53
- "upsample_rates": [ 8, 8, 2, 2, 2],
54
- "upsample_initial_channel": 512,
55
- "upsample_kernel_sizes": [16,16, 4, 4, 4],
56
- "n_layers_q": 3,
57
- "n_layers_trans_flow": 3,
58
- "n_flow_layer": 4,
59
- "use_spectral_norm": false,
60
- "gin_channels": 768,
61
- "ssl_dim": 768,
62
- "n_speakers": 200,
63
- "vocoder_name":"nsf-hifigan",
64
- "speech_encoder":"vec768l12",
65
- "speaker_embedding":false,
66
- "vol_embedding":false,
67
- "use_depthwise_conv":false,
68
- "flow_share_parameter": false,
69
- "use_automatic_f0_prediction": true,
70
- "use_transformer_flow": false
71
- },
72
- "spk": {
73
- "nyaru": 0,
74
- "huiyu": 1,
75
- "nen": 2,
76
- "paimon": 3,
77
- "yunhao": 4
78
- }
79
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs_template/config_tiny_template.json DELETED
@@ -1,79 +0,0 @@
1
- {
2
- "train": {
3
- "log_interval": 200,
4
- "eval_interval": 800,
5
- "seed": 1234,
6
- "epochs": 10000,
7
- "learning_rate": 0.0001,
8
- "betas": [
9
- 0.8,
10
- 0.99
11
- ],
12
- "eps": 1e-09,
13
- "batch_size": 6,
14
- "fp16_run": false,
15
- "half_type": "fp16",
16
- "lr_decay": 0.999875,
17
- "segment_size": 10240,
18
- "init_lr_ratio": 1,
19
- "warmup_epochs": 0,
20
- "c_mel": 45,
21
- "c_kl": 1.0,
22
- "use_sr": true,
23
- "max_speclen": 512,
24
- "port": "8001",
25
- "keep_ckpts": 3,
26
- "all_in_mem": false,
27
- "vol_aug":false
28
- },
29
- "data": {
30
- "training_files": "filelists/train.txt",
31
- "validation_files": "filelists/val.txt",
32
- "max_wav_value": 32768.0,
33
- "sampling_rate": 44100,
34
- "filter_length": 2048,
35
- "hop_length": 512,
36
- "win_length": 2048,
37
- "n_mel_channels": 80,
38
- "mel_fmin": 0.0,
39
- "mel_fmax": 22050,
40
- "unit_interpolate_mode":"nearest"
41
- },
42
- "model": {
43
- "inter_channels": 192,
44
- "hidden_channels": 192,
45
- "filter_channels": 512,
46
- "n_heads": 2,
47
- "n_layers": 6,
48
- "kernel_size": 3,
49
- "p_dropout": 0.1,
50
- "resblock": "1",
51
- "resblock_kernel_sizes": [3,7,11],
52
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
53
- "upsample_rates": [ 8, 8, 2, 2, 2],
54
- "upsample_initial_channel": 400,
55
- "upsample_kernel_sizes": [16,16, 4, 4, 4],
56
- "n_layers_q": 3,
57
- "n_layers_trans_flow": 3,
58
- "n_flow_layer": 4,
59
- "use_spectral_norm": false,
60
- "gin_channels": 768,
61
- "ssl_dim": 768,
62
- "n_speakers": 200,
63
- "vocoder_name":"nsf-hifigan",
64
- "speech_encoder":"vec768l12",
65
- "speaker_embedding":false,
66
- "vol_embedding":false,
67
- "use_depthwise_conv":true,
68
- "flow_share_parameter": true,
69
- "use_automatic_f0_prediction": true,
70
- "use_transformer_flow": false
71
- },
72
- "spk": {
73
- "nyaru": 0,
74
- "huiyu": 1,
75
- "nen": 2,
76
- "paimon": 3,
77
- "yunhao": 4
78
- }
79
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs_template/diffusion_template.yaml DELETED
@@ -1,51 +0,0 @@
1
- data:
2
- sampling_rate: 44100
3
- block_size: 512 # Equal to hop_length
4
- duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
5
- encoder: 'vec768l12' # 'hubertsoft', 'vec256l9', 'vec768l12'
6
- cnhubertsoft_gate: 10
7
- encoder_sample_rate: 16000
8
- encoder_hop_size: 320
9
- encoder_out_channels: 768 # 256 if using 'hubertsoft'
10
- training_files: "filelists/train.txt"
11
- validation_files: "filelists/val.txt"
12
- extensions: # List of extension included in the data collection
13
- - wav
14
- unit_interpolate_mode: "nearest"
15
- model:
16
- type: 'Diffusion'
17
- n_layers: 20
18
- n_chans: 512
19
- n_hidden: 256
20
- use_pitch_aug: true
21
- timesteps : 1000
22
- k_step_max: 0 # must <= timesteps, If it is 0, train all
23
- n_spk: 1 # max number of different speakers
24
- device: cuda
25
- vocoder:
26
- type: 'nsf-hifigan'
27
- ckpt: 'pretrain/nsf_hifigan/model'
28
- infer:
29
- speedup: 10
30
- method: 'dpm-solver++' # 'pndm' or 'dpm-solver' or 'ddim' or 'unipc' or 'dpm-solver++'
31
- env:
32
- expdir: logs/44k/diffusion
33
- gpu_id: 0
34
- train:
35
- num_workers: 4 # If your cpu and gpu are both very strong, set to 0 may be faster!
36
- amp_dtype: fp32 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu)
37
- batch_size: 48
38
- cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
39
- cache_device: 'cpu' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
40
- cache_fp16: true
41
- epochs: 100000
42
- interval_log: 10
43
- interval_val: 2000
44
- interval_force_save: 5000
45
- lr: 0.0001
46
- decay_step: 100000
47
- gamma: 0.5
48
- weight_decay: 0
49
- save_opt: false
50
- spk:
51
- 'nyaru': 0