chjn commited on
Commit
e523a2f
1 Parent(s): 5ec2aad

Upload 3 files

Browse files

Add so-vits model; The entire training process of 10,000 epochs was not completed due to NaN loss. The shallow diffusion model, feature retrieval model and cluster model will be added soon.
Current training step: 268000.

SpecialWeek/SpecialWeek.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12ac70d7d5f7a9fe7bfe88e8de22ac200117799eac663632e25ff0b1e32c1f67
3
+ size 627915739
SpecialWeek/config.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "eval_interval": 800,
5
+ "seed": 1234,
6
+ "epochs": 10008,
7
+ "learning_rate": 0.0002,
8
+ "betas": [
9
+ 0.8,
10
+ 0.99
11
+ ],
12
+ "eps": 1e-09,
13
+ "batch_size": 24,
14
+ "fp16_run": true,
15
+ "half_type": "fp16",
16
+ "lr_decay": 0.999875,
17
+ "segment_size": 10240,
18
+ "init_lr_ratio": 1,
19
+ "warmup_epochs": 0,
20
+ "c_mel": 45,
21
+ "c_kl": 1.0,
22
+ "use_sr": true,
23
+ "max_speclen": 512,
24
+ "port": "8001",
25
+ "keep_ckpts": 3,
26
+ "all_in_mem": true,
27
+ "vol_aug": true
28
+ },
29
+ "data": {
30
+ "training_files": "filelists/train.txt",
31
+ "validation_files": "filelists/val.txt",
32
+ "max_wav_value": 32768.0,
33
+ "sampling_rate": 44100,
34
+ "filter_length": 2048,
35
+ "hop_length": 512,
36
+ "win_length": 2048,
37
+ "n_mel_channels": 80,
38
+ "mel_fmin": 0.0,
39
+ "mel_fmax": 22050,
40
+ "unit_interpolate_mode": "nearest"
41
+ },
42
+ "model": {
43
+ "inter_channels": 192,
44
+ "hidden_channels": 192,
45
+ "filter_channels": 768,
46
+ "n_heads": 2,
47
+ "n_layers": 6,
48
+ "kernel_size": 3,
49
+ "p_dropout": 0.1,
50
+ "resblock": "1",
51
+ "resblock_kernel_sizes": [
52
+ 3,
53
+ 7,
54
+ 11
55
+ ],
56
+ "resblock_dilation_sizes": [
57
+ [
58
+ 1,
59
+ 3,
60
+ 5
61
+ ],
62
+ [
63
+ 1,
64
+ 3,
65
+ 5
66
+ ],
67
+ [
68
+ 1,
69
+ 3,
70
+ 5
71
+ ]
72
+ ],
73
+ "upsample_rates": [
74
+ 8,
75
+ 8,
76
+ 2,
77
+ 2,
78
+ 2
79
+ ],
80
+ "upsample_initial_channel": 512,
81
+ "upsample_kernel_sizes": [
82
+ 16,
83
+ 16,
84
+ 4,
85
+ 4,
86
+ 4
87
+ ],
88
+ "n_layers_q": 3,
89
+ "n_layers_trans_flow": 3,
90
+ "n_flow_layer": 4,
91
+ "use_spectral_norm": false,
92
+ "gin_channels": 768,
93
+ "ssl_dim": 768,
94
+ "n_speakers": 1,
95
+ "vocoder_name": "nsf-hifigan",
96
+ "speech_encoder": "vec768l12",
97
+ "speaker_embedding": false,
98
+ "vol_embedding": true,
99
+ "use_depthwise_conv": false,
100
+ "flow_share_parameter": false,
101
+ "use_automatic_f0_prediction": true,
102
+ "use_transformer_flow": false
103
+ },
104
+ "spk": {
105
+ "SpecialWeek": 0
106
+ }
107
+ }
SpecialWeek/diffusion.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ block_size: 512
3
+ cnhubertsoft_gate: 10
4
+ duration: 2
5
+ encoder: vec768l12
6
+ encoder_hop_size: 320
7
+ encoder_out_channels: 768
8
+ encoder_sample_rate: 16000
9
+ extensions:
10
+ - wav
11
+ sampling_rate: 44100
12
+ training_files: filelists/train.txt
13
+ unit_interpolate_mode: nearest
14
+ validation_files: filelists/val.txt
15
+ device: cuda
16
+ env:
17
+ expdir: logs/44k/diffusion
18
+ gpu_id: 0
19
+ infer:
20
+ method: dpm-solver++
21
+ speedup: 10
22
+ model:
23
+ k_step_max: 0
24
+ n_chans: 512
25
+ n_hidden: 256
26
+ n_layers: 20
27
+ n_spk: 1
28
+ timesteps: 1000
29
+ type: Diffusion
30
+ use_pitch_aug: true
31
+ spk:
32
+ SpecialWeek: 0
33
+ train:
34
+ amp_dtype: fp16
35
+ batch_size: 192
36
+ cache_all_data: true
37
+ cache_device: cpu
38
+ cache_fp16: true
39
+ decay_step: 50000
40
+ epochs: 100000
41
+ gamma: 0.5
42
+ interval_force_save: 5000
43
+ interval_log: 10
44
+ interval_val: 2000
45
+ lr: 0.0002
46
+ num_workers: 4
47
+ save_opt: false
48
+ weight_decay: 0
49
+ vocoder:
50
+ ckpt: pretrain/nsf_hifigan/model
51
+ type: nsf-hifigan