AnhP commited on
Commit
a9e6876
·
verified ·
1 Parent(s): 0e1c65b

Upload 8 files

Browse files
configs/v1/32000.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 0.0001,
7
+ "betas": [
8
+ 0.8,
9
+ 0.99
10
+ ],
11
+ "eps": 1e-09,
12
+ "batch_size": 4,
13
+ "fp16_run": false,
14
+ "lr_decay": 0.999875,
15
+ "segment_size": 12800,
16
+ "init_lr_ratio": 1,
17
+ "warmup_epochs": 0,
18
+ "c_mel": 45,
19
+ "c_kl": 1.0
20
+ },
21
+ "data": {
22
+ "max_wav_value": 32768.0,
23
+ "sample_rate": 32000,
24
+ "filter_length": 1024,
25
+ "hop_length": 320,
26
+ "win_length": 1024,
27
+ "n_mel_channels": 80,
28
+ "mel_fmin": 0.0,
29
+ "mel_fmax": null
30
+ },
31
+ "model": {
32
+ "inter_channels": 192,
33
+ "hidden_channels": 192,
34
+ "filter_channels": 768,
35
+ "text_enc_hidden_dim": 256,
36
+ "n_heads": 2,
37
+ "n_layers": 6,
38
+ "kernel_size": 3,
39
+ "p_dropout": 0,
40
+ "resblock": "1",
41
+ "resblock_kernel_sizes": [
42
+ 3,
43
+ 7,
44
+ 11
45
+ ],
46
+ "resblock_dilation_sizes": [
47
+ [
48
+ 1,
49
+ 3,
50
+ 5
51
+ ],
52
+ [
53
+ 1,
54
+ 3,
55
+ 5
56
+ ],
57
+ [
58
+ 1,
59
+ 3,
60
+ 5
61
+ ]
62
+ ],
63
+ "upsample_rates": [
64
+ 10,
65
+ 4,
66
+ 2,
67
+ 2,
68
+ 2
69
+ ],
70
+ "upsample_initial_channel": 512,
71
+ "upsample_kernel_sizes": [
72
+ 16,
73
+ 16,
74
+ 4,
75
+ 4,
76
+ 4
77
+ ],
78
+ "use_spectral_norm": false,
79
+ "gin_channels": 256,
80
+ "spk_embed_dim": 109
81
+ }
82
+ }
configs/v1/40000.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 0.0001,
7
+ "betas": [
8
+ 0.8,
9
+ 0.99
10
+ ],
11
+ "eps": 1e-09,
12
+ "batch_size": 4,
13
+ "fp16_run": false,
14
+ "lr_decay": 0.999875,
15
+ "segment_size": 12800,
16
+ "init_lr_ratio": 1,
17
+ "warmup_epochs": 0,
18
+ "c_mel": 45,
19
+ "c_kl": 1.0
20
+ },
21
+ "data": {
22
+ "max_wav_value": 32768.0,
23
+ "sample_rate": 40000,
24
+ "filter_length": 2048,
25
+ "hop_length": 400,
26
+ "win_length": 2048,
27
+ "n_mel_channels": 125,
28
+ "mel_fmin": 0.0,
29
+ "mel_fmax": null
30
+ },
31
+ "model": {
32
+ "inter_channels": 192,
33
+ "hidden_channels": 192,
34
+ "filter_channels": 768,
35
+ "text_enc_hidden_dim": 256,
36
+ "n_heads": 2,
37
+ "n_layers": 6,
38
+ "kernel_size": 3,
39
+ "p_dropout": 0,
40
+ "resblock": "1",
41
+ "resblock_kernel_sizes": [
42
+ 3,
43
+ 7,
44
+ 11
45
+ ],
46
+ "resblock_dilation_sizes": [
47
+ [
48
+ 1,
49
+ 3,
50
+ 5
51
+ ],
52
+ [
53
+ 1,
54
+ 3,
55
+ 5
56
+ ],
57
+ [
58
+ 1,
59
+ 3,
60
+ 5
61
+ ]
62
+ ],
63
+ "upsample_rates": [
64
+ 10,
65
+ 10,
66
+ 2,
67
+ 2
68
+ ],
69
+ "upsample_initial_channel": 512,
70
+ "upsample_kernel_sizes": [
71
+ 16,
72
+ 16,
73
+ 4,
74
+ 4
75
+ ],
76
+ "use_spectral_norm": false,
77
+ "gin_channels": 256,
78
+ "spk_embed_dim": 109
79
+ }
80
+ }
configs/v1/44000.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 0.0001,
7
+ "betas": [
8
+ 0.8,
9
+ 0.99
10
+ ],
11
+ "eps": 1e-09,
12
+ "batch_size": 4,
13
+ "fp16_run": false,
14
+ "lr_decay": 0.999875,
15
+ "segment_size": 15876,
16
+ "init_lr_ratio": 1,
17
+ "warmup_epochs": 0,
18
+ "c_mel": 45,
19
+ "c_kl": 1.0
20
+ },
21
+ "data": {
22
+ "max_wav_value": 32768.0,
23
+ "sample_rate": 44100,
24
+ "filter_length": 2048,
25
+ "hop_length": 441,
26
+ "win_length": 2048,
27
+ "n_mel_channels": 160,
28
+ "mel_fmin": 0.0,
29
+ "mel_fmax": null
30
+ },
31
+ "model": {
32
+ "inter_channels": 192,
33
+ "hidden_channels": 192,
34
+ "filter_channels": 768,
35
+ "text_enc_hidden_dim": 256,
36
+ "n_heads": 2,
37
+ "n_layers": 6,
38
+ "kernel_size": 3,
39
+ "p_dropout": 0,
40
+ "resblock": "1",
41
+ "resblock_kernel_sizes": [
42
+ 3,
43
+ 7,
44
+ 11
45
+ ],
46
+ "resblock_dilation_sizes": [
47
+ [
48
+ 1,
49
+ 3,
50
+ 5
51
+ ],
52
+ [
53
+ 1,
54
+ 3,
55
+ 5
56
+ ],
57
+ [
58
+ 1,
59
+ 3,
60
+ 5
61
+ ]
62
+ ],
63
+ "upsample_rates": [
64
+ 7,
65
+ 7,
66
+ 3,
67
+ 3
68
+ ],
69
+ "upsample_initial_channel": 512,
70
+ "upsample_kernel_sizes": [
71
+ 14,
72
+ 14,
73
+ 6,
74
+ 6
75
+ ],
76
+ "use_spectral_norm": false,
77
+ "gin_channels": 256,
78
+ "spk_embed_dim": 109
79
+ }
80
+ }
configs/v1/48000.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "epochs": 20000,
6
+ "learning_rate": 0.0001,
7
+ "betas": [
8
+ 0.8,
9
+ 0.99
10
+ ],
11
+ "eps": 1e-09,
12
+ "batch_size": 4,
13
+ "fp16_run": false,
14
+ "lr_decay": 0.999875,
15
+ "segment_size": 11520,
16
+ "init_lr_ratio": 1,
17
+ "warmup_epochs": 0,
18
+ "c_mel": 45,
19
+ "c_kl": 1.0
20
+ },
21
+ "data": {
22
+ "max_wav_value": 32768.0,
23
+ "sample_rate": 48000,
24
+ "filter_length": 2048,
25
+ "hop_length": 480,
26
+ "win_length": 2048,
27
+ "n_mel_channels": 128,
28
+ "mel_fmin": 0.0,
29
+ "mel_fmax": null
30
+ },
31
+ "model": {
32
+ "inter_channels": 192,
33
+ "hidden_channels": 192,
34
+ "filter_channels": 768,
35
+ "text_enc_hidden_dim": 256,
36
+ "n_heads": 2,
37
+ "n_layers": 6,
38
+ "kernel_size": 3,
39
+ "p_dropout": 0,
40
+ "resblock": "1",
41
+ "resblock_kernel_sizes": [
42
+ 3,
43
+ 7,
44
+ 11
45
+ ],
46
+ "resblock_dilation_sizes": [
47
+ [
48
+ 1,
49
+ 3,
50
+ 5
51
+ ],
52
+ [
53
+ 1,
54
+ 3,
55
+ 5
56
+ ],
57
+ [
58
+ 1,
59
+ 3,
60
+ 5
61
+ ]
62
+ ],
63
+ "upsample_rates": [
64
+ 10,
65
+ 6,
66
+ 2,
67
+ 2,
68
+ 2
69
+ ],
70
+ "upsample_initial_channel": 512,
71
+ "upsample_kernel_sizes": [
72
+ 16,
73
+ 16,
74
+ 4,
75
+ 4,
76
+ 4
77
+ ],
78
+ "use_spectral_norm": false,
79
+ "gin_channels": 256,
80
+ "spk_embed_dim": 109
81
+ }
82
+ }
configs/v2/32000.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "learning_rate": 0.0001,
6
+ "betas": [
7
+ 0.8,
8
+ 0.99
9
+ ],
10
+ "eps": 1e-09,
11
+ "fp16_run": false,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 12800,
14
+ "c_mel": 45,
15
+ "c_kl": 1.0
16
+ },
17
+ "data": {
18
+ "max_wav_value": 32768.0,
19
+ "sample_rate": 32000,
20
+ "filter_length": 1024,
21
+ "hop_length": 320,
22
+ "win_length": 1024,
23
+ "n_mel_channels": 80,
24
+ "mel_fmin": 0.0,
25
+ "mel_fmax": null
26
+ },
27
+ "model": {
28
+ "inter_channels": 192,
29
+ "hidden_channels": 192,
30
+ "filter_channels": 768,
31
+ "text_enc_hidden_dim": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [
38
+ 3,
39
+ 7,
40
+ 11
41
+ ],
42
+ "resblock_dilation_sizes": [
43
+ [
44
+ 1,
45
+ 3,
46
+ 5
47
+ ],
48
+ [
49
+ 1,
50
+ 3,
51
+ 5
52
+ ],
53
+ [
54
+ 1,
55
+ 3,
56
+ 5
57
+ ]
58
+ ],
59
+ "upsample_rates": [
60
+ 10,
61
+ 8,
62
+ 2,
63
+ 2
64
+ ],
65
+ "upsample_initial_channel": 512,
66
+ "upsample_kernel_sizes": [
67
+ 20,
68
+ 16,
69
+ 4,
70
+ 4
71
+ ],
72
+ "use_spectral_norm": false,
73
+ "gin_channels": 256,
74
+ "spk_embed_dim": 109
75
+ }
76
+ }
configs/v2/40000.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "learning_rate": 0.0001,
6
+ "betas": [
7
+ 0.8,
8
+ 0.99
9
+ ],
10
+ "eps": 1e-09,
11
+ "fp16_run": false,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 12800,
14
+ "c_mel": 45,
15
+ "c_kl": 1.0
16
+ },
17
+ "data": {
18
+ "max_wav_value": 32768.0,
19
+ "sample_rate": 40000,
20
+ "filter_length": 2048,
21
+ "hop_length": 400,
22
+ "win_length": 2048,
23
+ "n_mel_channels": 125,
24
+ "mel_fmin": 0.0,
25
+ "mel_fmax": null
26
+ },
27
+ "model": {
28
+ "inter_channels": 192,
29
+ "hidden_channels": 192,
30
+ "filter_channels": 768,
31
+ "text_enc_hidden_dim": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [
38
+ 3,
39
+ 7,
40
+ 11
41
+ ],
42
+ "resblock_dilation_sizes": [
43
+ [
44
+ 1,
45
+ 3,
46
+ 5
47
+ ],
48
+ [
49
+ 1,
50
+ 3,
51
+ 5
52
+ ],
53
+ [
54
+ 1,
55
+ 3,
56
+ 5
57
+ ]
58
+ ],
59
+ "upsample_rates": [
60
+ 10,
61
+ 10,
62
+ 2,
63
+ 2
64
+ ],
65
+ "upsample_initial_channel": 512,
66
+ "upsample_kernel_sizes": [
67
+ 16,
68
+ 16,
69
+ 4,
70
+ 4
71
+ ],
72
+ "use_spectral_norm": false,
73
+ "gin_channels": 256,
74
+ "spk_embed_dim": 109
75
+ }
76
+ }
configs/v2/44000.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "learning_rate": 0.0001,
6
+ "betas": [
7
+ 0.8,
8
+ 0.99
9
+ ],
10
+ "eps": 1e-09,
11
+ "fp16_run": false,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 15876,
14
+ "c_mel": 45,
15
+ "c_kl": 1.0
16
+ },
17
+ "data": {
18
+ "max_wav_value": 32768.0,
19
+ "sample_rate": 44100,
20
+ "filter_length": 2048,
21
+ "hop_length": 441,
22
+ "win_length": 2048,
23
+ "n_mel_channels": 160,
24
+ "mel_fmin": 0.0,
25
+ "mel_fmax": null
26
+ },
27
+ "model": {
28
+ "inter_channels": 192,
29
+ "hidden_channels": 192,
30
+ "filter_channels": 768,
31
+ "text_enc_hidden_dim": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [
38
+ 3,
39
+ 7,
40
+ 11
41
+ ],
42
+ "resblock_dilation_sizes": [
43
+ [
44
+ 1,
45
+ 3,
46
+ 5
47
+ ],
48
+ [
49
+ 1,
50
+ 3,
51
+ 5
52
+ ],
53
+ [
54
+ 1,
55
+ 3,
56
+ 5
57
+ ]
58
+ ],
59
+ "upsample_rates": [
60
+ 7,
61
+ 7,
62
+ 3,
63
+ 3
64
+ ],
65
+ "upsample_initial_channel": 512,
66
+ "upsample_kernel_sizes": [
67
+ 14,
68
+ 14,
69
+ 6,
70
+ 6
71
+ ],
72
+ "use_spectral_norm": false,
73
+ "gin_channels": 256,
74
+ "spk_embed_dim": 109
75
+ }
76
+ }
configs/v2/48000.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 200,
4
+ "seed": 1234,
5
+ "learning_rate": 0.0001,
6
+ "betas": [
7
+ 0.8,
8
+ 0.99
9
+ ],
10
+ "eps": 1e-09,
11
+ "fp16_run": false,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 17280,
14
+ "c_mel": 45,
15
+ "c_kl": 1.0
16
+ },
17
+ "data": {
18
+ "max_wav_value": 32768.0,
19
+ "sample_rate": 48000,
20
+ "filter_length": 2048,
21
+ "hop_length": 480,
22
+ "win_length": 2048,
23
+ "n_mel_channels": 128,
24
+ "mel_fmin": 0.0,
25
+ "mel_fmax": null
26
+ },
27
+ "model": {
28
+ "inter_channels": 192,
29
+ "hidden_channels": 192,
30
+ "filter_channels": 768,
31
+ "text_enc_hidden_dim": 768,
32
+ "n_heads": 2,
33
+ "n_layers": 6,
34
+ "kernel_size": 3,
35
+ "p_dropout": 0,
36
+ "resblock": "1",
37
+ "resblock_kernel_sizes": [
38
+ 3,
39
+ 7,
40
+ 11
41
+ ],
42
+ "resblock_dilation_sizes": [
43
+ [
44
+ 1,
45
+ 3,
46
+ 5
47
+ ],
48
+ [
49
+ 1,
50
+ 3,
51
+ 5
52
+ ],
53
+ [
54
+ 1,
55
+ 3,
56
+ 5
57
+ ]
58
+ ],
59
+ "upsample_rates": [
60
+ 12,
61
+ 10,
62
+ 2,
63
+ 2
64
+ ],
65
+ "upsample_initial_channel": 512,
66
+ "upsample_kernel_sizes": [
67
+ 24,
68
+ 20,
69
+ 4,
70
+ 4
71
+ ],
72
+ "use_spectral_norm": false,
73
+ "gin_channels": 256,
74
+ "spk_embed_dim": 109
75
+ }
76
+ }