next-playground commited on
Commit
5528da5
·
verified ·
1 Parent(s): c890f94

Upload 12 files

Browse files
models/MDX_Net_Models/model_data/mdx_c_configs/model1.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 4096
4
+ dim_t: 128
5
+ hop_length: 2048
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ model:
10
+ act: gelu
11
+ bottleneck_factor: 4
12
+ growth: 64
13
+ norm: InstanceNorm
14
+ num_blocks_per_scale: 2
15
+ num_channels: 128
16
+ num_scales: 5
17
+ num_subbands: 4
18
+ scale:
19
+ - 2
20
+ - 2
21
+ training:
22
+ batch_size: 8
23
+ grad_clip: 0
24
+ instruments:
25
+ - Vocals
26
+ - Drums
27
+ - Bass
28
+ - Other
29
+ lr: 5.0e-05
30
+ target_instrument: null
31
+ inference:
32
+ batch_size: 1
33
+ dim_t: 256
34
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model2.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 4096
4
+ dim_t: 128
5
+ hop_length: 2048
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ model:
10
+ act: gelu
11
+ bottleneck_factor: 4
12
+ growth: 64
13
+ norm: InstanceNorm
14
+ num_blocks_per_scale: 2
15
+ num_channels: 256
16
+ num_scales: 5
17
+ num_subbands: 4
18
+ scale:
19
+ - 2
20
+ - 2
21
+ training:
22
+ batch_size: 8
23
+ grad_clip: 0
24
+ instruments:
25
+ - Vocals
26
+ - Drums
27
+ - Bass
28
+ - Other
29
+ lr: 3.0e-05
30
+ target_instrument: null
31
+ inference:
32
+ batch_size: 1
33
+ dim_t: 256
34
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model3.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 4096
4
+ dim_t: 128
5
+ hop_length: 2048
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ model:
10
+ act: gelu
11
+ bottleneck_factor: 4
12
+ growth: 64
13
+ norm: InstanceNorm
14
+ num_blocks_per_scale: 2
15
+ num_channels: 128
16
+ num_scales: 5
17
+ num_subbands: 4
18
+ scale:
19
+ - 2
20
+ - 2
21
+ training:
22
+ batch_size: 8
23
+ grad_clip: 0
24
+ instruments:
25
+ - Vocals
26
+ - Drums
27
+ - Bass
28
+ - Other
29
+ lr: 5.0e-05
30
+ target_instrument: Vocals
31
+ inference:
32
+ batch_size: 1
33
+ dim_t: 256
34
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/modelA.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ min_mean_abs: 0.01
7
+ n_fft: 8192
8
+ num_channels: 2
9
+ sample_rate: 44100
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 64
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 64
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ coarse_loss_clip: true
25
+ ema_momentum: 0.999
26
+ grad_clip: null
27
+ instruments:
28
+ - Vocals
29
+ - Drums
30
+ - Bass
31
+ - Other
32
+ lr: 0.0001
33
+ num_steps: 100000
34
+ q: 0.4
35
+ target_instrument: null
36
+ inference:
37
+ batch_size: 2
38
+ dim_t: 256
39
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/modelB.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ min_mean_abs: 0.01
7
+ n_fft: 8192
8
+ num_channels: 2
9
+ sample_rate: 44100
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 64
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 64
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ coarse_loss_clip: false
25
+ datasets:
26
+ - ../data/moises/bleeding
27
+ ema_momentum: 0.999
28
+ grad_clip: null
29
+ instruments:
30
+ - Vocals
31
+ - Drums
32
+ - Bass
33
+ - Other
34
+ lr: 0.0001
35
+ num_steps: 150000
36
+ q: 0.93
37
+ target_instrument: null
38
+ inference:
39
+ batch_size: 2
40
+ dim_t: 256
41
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_061321.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 2048
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 64
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ name: epoch_10.ckpt
23
+ training:
24
+ batch_size: 16
25
+ grad_clip: 0
26
+ instruments:
27
+ - Vocals
28
+ - Instrumental
29
+ lr: 5.0e-05
30
+ target_instrument: null
31
+ num_epochs: 100
32
+ num_steps: 1000
33
+ inference:
34
+ batch_size: 1
35
+ dim_t: 256
36
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_full_band.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 6144
4
+ dim_t: 128
5
+ hop_length: 2048
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 64
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 6
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 14
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 3.0e-05
29
+ target_instrument: null
30
+ num_epochs: 1000
31
+ num_steps: 1000
32
+ augmentation: 1
33
+ inference:
34
+ batch_size: 1
35
+ dim_t: 256
36
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_full_band_2.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 260096
3
+ dim_f: 6144
4
+ dim_t: 128
5
+ hop_length: 2048
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 6
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 14
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 2.0e-05
29
+ target_instrument: null
30
+ num_epochs: 1000
31
+ num_steps: 1000
32
+ augmentation: 1
33
+ inference:
34
+ batch_size: 1
35
+ dim_t: 256
36
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_full_band_3.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 6144
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 6
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 1.0e-05
29
+ target_instrument: null
30
+ num_epochs: 1000
31
+ num_steps: 1000
32
+ augmentation: 1
33
+ q: 0.95
34
+ coarse_loss_clip: true
35
+ ema_momentum: 0.999
36
+ inference:
37
+ batch_size: 1
38
+ dim_t: 256
39
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_full_band_4.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 6144
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 12288
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 6
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 0.7e-05
29
+ patience: 2
30
+ target_instrument: null
31
+ num_epochs: 1000
32
+ num_steps: 1000
33
+ augmentation: 1
34
+ q: 0.95
35
+ coarse_loss_clip: true
36
+ ema_momentum: 0.999
37
+ inference:
38
+ batch_size: 1
39
+ dim_t: 256
40
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/model_2_stem_full_band_8k.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 4096
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ n_fft: 8192
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.001
10
+ model:
11
+ act: gelu
12
+ bottleneck_factor: 4
13
+ growth: 128
14
+ norm: InstanceNorm
15
+ num_blocks_per_scale: 2
16
+ num_channels: 128
17
+ num_scales: 5
18
+ num_subbands: 4
19
+ scale:
20
+ - 2
21
+ - 2
22
+ training:
23
+ batch_size: 6
24
+ grad_clip: 0
25
+ instruments:
26
+ - Vocals
27
+ - Instrumental
28
+ lr: 1.0e-05
29
+ patience: 2
30
+ reduce_factor: 0.95
31
+ target_instrument: null
32
+ num_epochs: 1000
33
+ num_steps: 1000
34
+ augmentation: 1
35
+ augmentation_type: simple1
36
+ augmentation_mix: true
37
+ q: 0.95
38
+ coarse_loss_clip: true
39
+ ema_momentum: 0.999
40
+ inference:
41
+ batch_size: 1
42
+ dim_t: 256
43
+ num_overlap: 8
models/MDX_Net_Models/model_data/mdx_c_configs/sndfx.yaml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 261120
3
+ dim_f: 1024
4
+ dim_t: 256
5
+ hop_length: 1024
6
+ min_mean_abs: 0.01
7
+ n_fft: 2048
8
+ num_channels: 2
9
+ sample_rate: 44100
10
+ stereo_prob: 0.7
11
+ model:
12
+ act: gelu
13
+ bottleneck_factor: 4
14
+ growth: 64
15
+ norm: InstanceNorm
16
+ num_blocks_per_scale: 2
17
+ num_channels: 64
18
+ num_scales: 5
19
+ num_subbands: 4
20
+ scale:
21
+ - 2
22
+ - 2
23
+ training:
24
+ batch_size: 8
25
+ ema_momentum: 0.999
26
+ grad_clip: null
27
+ instruments:
28
+ - Music
29
+ - Speech
30
+ - SFX
31
+ lr: 0.0001
32
+ num_steps: 30000
33
+ target_instrument: null
34
+ inference:
35
+ batch_size: 8
36
+ dim_t: 256
37
+ instruments:
38
+ - Music
39
+ - Dialog
40
+ - Effect
41
+ num_overlap: 8