Upload 61 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +6 -0
- miku.png +3 -0
- uvr5_models/Demucs/04573f0d-f3cf25b2.th +3 -0
- uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th +3 -0
- uvr5_models/Demucs/955717e8-8726e21a.th +3 -0
- uvr5_models/Demucs/d12395a8-e57c48e6.th +3 -0
- uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th +3 -0
- uvr5_models/Demucs/hdemucs_mmi.yaml +2 -0
- uvr5_models/Demucs/htdemucs.yaml +1 -0
- uvr5_models/Demucs/htdemucs_6s.yaml +1 -0
- uvr5_models/Demucs/htdemucs_ft.yaml +7 -0
- uvr5_models/MDX/Kim_Inst.onnx +3 -0
- uvr5_models/MDX/Kim_Vocal_1.onnx +3 -0
- uvr5_models/MDX/Kim_Vocal_2.onnx +3 -0
- uvr5_models/MDX/Reverb_HQ_By_FoxJoy.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_1.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_2.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_3.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_1.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_2.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_3.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_4.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_5.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_Main.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Inst_full_292.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET-Voc_FT.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Crowd_HQ_1.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Inst_187_beta.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Inst_82_beta.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Inst_90_beta.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Main_340.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Main_390.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Main_406.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Main_427.onnx +3 -0
- uvr5_models/MDX/UVR-MDX-NET_Main_438.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_1_9703.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_2_9682.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_3_9662.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_9482.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_KARA.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_KARA_2.onnx +3 -0
- uvr5_models/MDX/UVR_MDXNET_Main.onnx +3 -0
- uvr5_models/mdx_c_configs/config_melbandroformer_inst.yaml +51 -0
- uvr5_models/mdx_c_configs/config_melbandroformer_inst_v2.yaml +51 -0
- uvr5_models/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml +51 -0
- uvr5_models/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml +51 -0
- uvr5_models/mdx_c_configs/model1.yaml +34 -0
- uvr5_models/mdx_c_configs/model2.yaml +34 -0
- uvr5_models/mdx_c_configs/model3.yaml +34 -0
- uvr5_models/mdx_c_configs/modelA.yaml +39 -0
.gitattributes
CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
miku.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
uvr5_models/Demucs/04573f0d-f3cf25b2.th filter=lfs diff=lfs merge=lfs -text
|
38 |
+
uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th filter=lfs diff=lfs merge=lfs -text
|
39 |
+
uvr5_models/Demucs/955717e8-8726e21a.th filter=lfs diff=lfs merge=lfs -text
|
40 |
+
uvr5_models/Demucs/d12395a8-e57c48e6.th filter=lfs diff=lfs merge=lfs -text
|
41 |
+
uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th filter=lfs diff=lfs merge=lfs -text
|
miku.png
ADDED
![]() |
Git LFS Details
|
uvr5_models/Demucs/04573f0d-f3cf25b2.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3cf25b222c4eed7cd49dd8b2c9597d50c18bd154090f7b919cfa5f93cf22c49
|
3 |
+
size 84141271
|
uvr5_models/Demucs/92cfc3b6-ef3bcb9c.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef3bcb9c8b40d14ae5d51b6db2587339cc12c6b77c0be151ce6d69002e087bf2
|
3 |
+
size 84141271
|
uvr5_models/Demucs/955717e8-8726e21a.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8726e21a993978c7ba086d3872e7608d7d5bfca646ca4aca459ffda844faa8b4
|
3 |
+
size 84141911
|
uvr5_models/Demucs/d12395a8-e57c48e6.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e57c48e6b0e38af4f7118d7bd08c49f0a0c0edf7d09143bdd902ea0d237303e6
|
3 |
+
size 84141271
|
uvr5_models/Demucs/f7e0c4bc-ba3fe64a.th
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba3fe64ae8ef66ac9a4857222ce48efbdc5eb3ad375cb79dd13debee5aaa4066
|
3 |
+
size 84141271
|
uvr5_models/Demucs/hdemucs_mmi.yaml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
models: ['75fc33f5']
|
2 |
+
segment: 44
|
uvr5_models/Demucs/htdemucs.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
models: ['955717e8']
|
uvr5_models/Demucs/htdemucs_6s.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
models: ['5c90dfd2']
|
uvr5_models/Demucs/htdemucs_ft.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d']
|
2 |
+
weights: [
|
3 |
+
[1., 0., 0., 0.],
|
4 |
+
[0., 1., 0., 0.],
|
5 |
+
[0., 0., 1., 0.],
|
6 |
+
[0., 0., 0., 1.],
|
7 |
+
]
|
uvr5_models/MDX/Kim_Inst.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86b1940e7122fbdd2beadc65507cbff6c352d79012a8a7e60d56db98532af5f7
|
3 |
+
size 66759214
|
uvr5_models/MDX/Kim_Vocal_1.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f313140ef8fecc3041881b60ecb993d985a0281a138b2fb634aa8901aebc38cb
|
3 |
+
size 66759214
|
uvr5_models/MDX/Kim_Vocal_2.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
|
3 |
+
size 66759214
|
uvr5_models/MDX/Reverb_HQ_By_FoxJoy.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:233bb5c6aaa365e568659a0a81211746fa881f8f47f82d9e864fce1f7692db80
|
3 |
+
size 66780123
|
uvr5_models/MDX/UVR-MDX-NET-Inst_1.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ca53f94b7a0cbb04fcfcc8f3ea5ec1ae22cd8ad044f5e673588859f83976f5e
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_2.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3a96a664d28b52db9def0a9cae9a16dbb524d8325bfe8f0ac64ac5d231456bc
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_3.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b7834e2972158d8c9864e7376e3a7d084079c80a23f38dc31c4b0a4e901a1cb
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_1.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38a045c4ded87e3bf97b609ec5be7910e8a7cecec455f507227ab12b5e29f7f9
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_2.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:197f8ab296df850f961e68c595f6649acb7d9e621b5600b460f3458967299112
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_3.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:317554b07fe1ea5279a77f2b1520a41ea4b93432560c4ffd08792c30fddf9adc
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_4.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c4b5b9b05090fdf238f38ba5046813982d50e2a652e9cb3324ea79720c3c9c8
|
3 |
+
size 59074342
|
uvr5_models/MDX/UVR-MDX-NET-Inst_HQ_5.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:811cb24095d865763752310848b7ec86aeede0626cb05749ab35350e46897000
|
3 |
+
size 59074342
|
uvr5_models/MDX/UVR-MDX-NET-Inst_Main.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ab401dfe4a548b87deb64f975294bd56ff946aa32903f53b4b24bb13b2cce1e
|
3 |
+
size 52786726
|
uvr5_models/MDX/UVR-MDX-NET-Inst_full_292.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:020f6b65fa219fb7c285e4f3fc2863bf22daf03c4c93e547b6d13d5f2757a7ec
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET-Voc_FT.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:534b2070fcc7df514b13ef660dc8cbb328679c2374d04354a5c42bb14ecce111
|
3 |
+
size 66762490
|
uvr5_models/MDX/UVR-MDX-NET_Crowd_HQ_1.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:313b7bf869c411fdafe005cf0d5a635c405cb3d0df137178a64091952d75225c
|
3 |
+
size 59074342
|
uvr5_models/MDX/UVR-MDX-NET_Inst_187_beta.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c74566f3c3033cacba996328b2ee90bf77ef79ea6c35b7841df183b7906f54a5
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Inst_82_beta.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6c268302f09ab53687072618e056a611272a7e2c3fd9b3b59164da152f3588e
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Inst_90_beta.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d902868a46575aea6ee2335736ff3b53faf497a6bdaa1b864e0fd84eb1b42a5
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Main_340.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78792633b4007755af12ecde20f709b4f0b99563b1d25fe0a501ed2122aff218
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Main_390.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:286c4f0847ca837e2c3f4c4058f756d5f150cbf080506aa6f33a2847aba92e8c
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Main_406.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f158816a44eef1f0ba0f48b813cbfcf460ed1c70a754af3609ade44aaf7d1b23
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Main_427.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95275802a27801b97e3c0552b6eaa69f9bb3bd7df53cdf0536cce0a753f702cc
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR-MDX-NET_Main_438.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5e1ad93587a163a0987a0168b99a2ad875c0d9bfc3afb596b7c36b09c7f5c26
|
3 |
+
size 66759214
|
uvr5_models/MDX/UVR_MDXNET_1_9703.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:229ad3bb96a037e89d8ed86732d6d3675856e6a07c3e3f02896eac01ec7ee4be
|
3 |
+
size 29704436
|
uvr5_models/MDX/UVR_MDXNET_2_9682.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1deb7295acd3206bc9582a5d92f1b0a74bf3f41c7c1fb78a0ac0123cde4372db
|
3 |
+
size 29704436
|
uvr5_models/MDX/UVR_MDXNET_3_9662.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02220e80d8253f4c2209f8924298b2b686bbdf2868b788ff5500fb9bd94aadc
|
3 |
+
size 29704436
|
uvr5_models/MDX/UVR_MDXNET_9482.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4f365207c56deb115bceedff3ad8fe98a751c745f9e370cecec6226b8b47184
|
3 |
+
size 29704436
|
uvr5_models/MDX/UVR_MDXNET_KARA.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3167c87333a48548413e972a286bf40bf5694001d2853861eb1435953f02d63
|
3 |
+
size 29704436
|
uvr5_models/MDX/UVR_MDXNET_KARA_2.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf32e15105a09c0f7dddd2b67346146334d6f3ecb399ed7638eba2ab07cbf5f4
|
3 |
+
size 52786726
|
uvr5_models/MDX/UVR_MDXNET_Main.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8289784cda38543ff431add4070662813311a8cccfc0112ca82f76d9dba2b4ca
|
3 |
+
size 66759214
|
uvr5_models/mdx_c_configs/config_melbandroformer_inst.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 485100
|
3 |
+
dim_f: 1024
|
4 |
+
dim_t: 1101
|
5 |
+
hop_length: 441
|
6 |
+
n_fft: 2048
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
min_mean_abs: 0.000
|
10 |
+
|
11 |
+
model:
|
12 |
+
dim: 384
|
13 |
+
depth: 6
|
14 |
+
stereo: true
|
15 |
+
num_stems: 1
|
16 |
+
time_transformer_depth: 1
|
17 |
+
freq_transformer_depth: 1
|
18 |
+
num_bands: 60
|
19 |
+
dim_head: 64
|
20 |
+
heads: 8
|
21 |
+
attn_dropout: 0
|
22 |
+
ff_dropout: 0
|
23 |
+
flash_attn: True
|
24 |
+
dim_freqs_in: 1025
|
25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
26 |
+
stft_n_fft: 2048
|
27 |
+
stft_hop_length: 441
|
28 |
+
stft_win_length: 2048
|
29 |
+
stft_normalized: False
|
30 |
+
mask_estimator_depth: 2
|
31 |
+
multi_stft_resolution_loss_weight: 1.0
|
32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
33 |
+
- 4096
|
34 |
+
- 2048
|
35 |
+
- 1024
|
36 |
+
- 512
|
37 |
+
- 256
|
38 |
+
multi_stft_hop_size: 147
|
39 |
+
multi_stft_normalized: False
|
40 |
+
|
41 |
+
training:
|
42 |
+
instruments:
|
43 |
+
- Instrumental
|
44 |
+
- Vocals
|
45 |
+
target_instrument: Instrumental
|
46 |
+
use_amp: True
|
47 |
+
|
48 |
+
inference:
|
49 |
+
batch_size: 1
|
50 |
+
dim_t: 1101
|
51 |
+
num_overlap: 2
|
uvr5_models/mdx_c_configs/config_melbandroformer_inst_v2.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 485100
|
3 |
+
dim_f: 1024
|
4 |
+
dim_t: 1101
|
5 |
+
hop_length: 441
|
6 |
+
n_fft: 2048
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
min_mean_abs: 0.000
|
10 |
+
|
11 |
+
model:
|
12 |
+
dim: 384
|
13 |
+
depth: 12
|
14 |
+
stereo: true
|
15 |
+
num_stems: 1
|
16 |
+
time_transformer_depth: 1
|
17 |
+
freq_transformer_depth: 1
|
18 |
+
num_bands: 60
|
19 |
+
dim_head: 64
|
20 |
+
heads: 8
|
21 |
+
attn_dropout: 0
|
22 |
+
ff_dropout: 0
|
23 |
+
flash_attn: True
|
24 |
+
dim_freqs_in: 1025
|
25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
26 |
+
stft_n_fft: 2048
|
27 |
+
stft_hop_length: 441
|
28 |
+
stft_win_length: 2048
|
29 |
+
stft_normalized: False
|
30 |
+
mask_estimator_depth: 3
|
31 |
+
multi_stft_resolution_loss_weight: 1.0
|
32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
33 |
+
- 4096
|
34 |
+
- 2048
|
35 |
+
- 1024
|
36 |
+
- 512
|
37 |
+
- 256
|
38 |
+
multi_stft_hop_size: 147
|
39 |
+
multi_stft_normalized: False
|
40 |
+
|
41 |
+
training:
|
42 |
+
instruments:
|
43 |
+
- Instrumental
|
44 |
+
- Vocals
|
45 |
+
target_instrument: Instrumental
|
46 |
+
use_amp: True
|
47 |
+
|
48 |
+
inference:
|
49 |
+
batch_size: 1
|
50 |
+
dim_t: 1101
|
51 |
+
num_overlap: 2
|
uvr5_models/mdx_c_configs/config_melbandroformer_instvoc_duality.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 485100
|
3 |
+
dim_f: 1024
|
4 |
+
dim_t: 256
|
5 |
+
hop_length: 441
|
6 |
+
n_fft: 2048
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
min_mean_abs: 0.000
|
10 |
+
|
11 |
+
model:
|
12 |
+
dim: 384
|
13 |
+
depth: 6
|
14 |
+
stereo: true
|
15 |
+
num_stems: 2
|
16 |
+
time_transformer_depth: 1
|
17 |
+
freq_transformer_depth: 1
|
18 |
+
num_bands: 60
|
19 |
+
dim_head: 64
|
20 |
+
heads: 8
|
21 |
+
attn_dropout: 0
|
22 |
+
ff_dropout: 0
|
23 |
+
flash_attn: True
|
24 |
+
dim_freqs_in: 1025
|
25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
26 |
+
stft_n_fft: 2048
|
27 |
+
stft_hop_length: 441
|
28 |
+
stft_win_length: 2048
|
29 |
+
stft_normalized: False
|
30 |
+
mask_estimator_depth: 2
|
31 |
+
multi_stft_resolution_loss_weight: 1.0
|
32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
33 |
+
- 4096
|
34 |
+
- 2048
|
35 |
+
- 1024
|
36 |
+
- 512
|
37 |
+
- 256
|
38 |
+
multi_stft_hop_size: 147
|
39 |
+
multi_stft_normalized: False
|
40 |
+
|
41 |
+
training:
|
42 |
+
instruments:
|
43 |
+
- Vocals
|
44 |
+
- Instrumental
|
45 |
+
target_instrument: null
|
46 |
+
use_amp: True
|
47 |
+
|
48 |
+
inference:
|
49 |
+
batch_size: 1
|
50 |
+
dim_t: 1101
|
51 |
+
num_overlap: 2
|
uvr5_models/mdx_c_configs/config_vocals_mel_band_roformer_kim.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 352800
|
3 |
+
dim_f: 1024
|
4 |
+
dim_t: 256
|
5 |
+
hop_length: 441
|
6 |
+
n_fft: 2048
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
min_mean_abs: 0.001
|
10 |
+
|
11 |
+
model:
|
12 |
+
dim: 384
|
13 |
+
depth: 6
|
14 |
+
stereo: true
|
15 |
+
num_stems: 1
|
16 |
+
time_transformer_depth: 1
|
17 |
+
freq_transformer_depth: 1
|
18 |
+
num_bands: 60
|
19 |
+
dim_head: 64
|
20 |
+
heads: 8
|
21 |
+
attn_dropout: 0
|
22 |
+
ff_dropout: 0
|
23 |
+
flash_attn: True
|
24 |
+
dim_freqs_in: 1025
|
25 |
+
sample_rate: 44100 # needed for mel filter bank from librosa
|
26 |
+
stft_n_fft: 2048
|
27 |
+
stft_hop_length: 441
|
28 |
+
stft_win_length: 2048
|
29 |
+
stft_normalized: False
|
30 |
+
mask_estimator_depth: 2
|
31 |
+
multi_stft_resolution_loss_weight: 1.0
|
32 |
+
multi_stft_resolutions_window_sizes: !!python/tuple
|
33 |
+
- 4096
|
34 |
+
- 2048
|
35 |
+
- 1024
|
36 |
+
- 512
|
37 |
+
- 256
|
38 |
+
multi_stft_hop_size: 147
|
39 |
+
multi_stft_normalized: False
|
40 |
+
|
41 |
+
training:
|
42 |
+
instruments:
|
43 |
+
- Vocals
|
44 |
+
- Instrumental
|
45 |
+
target_instrument: Vocals
|
46 |
+
|
47 |
+
inference:
|
48 |
+
batch_size: 1
|
49 |
+
dim_t: 1101
|
50 |
+
num_overlap: 1
|
51 |
+
chunk_size: 352800
|
uvr5_models/mdx_c_configs/model1.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 260096
|
3 |
+
dim_f: 4096
|
4 |
+
dim_t: 128
|
5 |
+
hop_length: 2048
|
6 |
+
n_fft: 8192
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
model:
|
10 |
+
act: gelu
|
11 |
+
bottleneck_factor: 4
|
12 |
+
growth: 64
|
13 |
+
norm: InstanceNorm
|
14 |
+
num_blocks_per_scale: 2
|
15 |
+
num_channels: 128
|
16 |
+
num_scales: 5
|
17 |
+
num_subbands: 4
|
18 |
+
scale:
|
19 |
+
- 2
|
20 |
+
- 2
|
21 |
+
training:
|
22 |
+
batch_size: 8
|
23 |
+
grad_clip: 0
|
24 |
+
instruments:
|
25 |
+
- Vocals
|
26 |
+
- Drums
|
27 |
+
- Bass
|
28 |
+
- Other
|
29 |
+
lr: 5.0e-05
|
30 |
+
target_instrument: null
|
31 |
+
inference:
|
32 |
+
batch_size: 1
|
33 |
+
dim_t: 1024
|
34 |
+
num_overlap: 8
|
uvr5_models/mdx_c_configs/model2.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 260096
|
3 |
+
dim_f: 4096
|
4 |
+
dim_t: 128
|
5 |
+
hop_length: 2048
|
6 |
+
n_fft: 8192
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
model:
|
10 |
+
act: gelu
|
11 |
+
bottleneck_factor: 4
|
12 |
+
growth: 64
|
13 |
+
norm: InstanceNorm
|
14 |
+
num_blocks_per_scale: 2
|
15 |
+
num_channels: 256
|
16 |
+
num_scales: 5
|
17 |
+
num_subbands: 4
|
18 |
+
scale:
|
19 |
+
- 2
|
20 |
+
- 2
|
21 |
+
training:
|
22 |
+
batch_size: 8
|
23 |
+
grad_clip: 0
|
24 |
+
instruments:
|
25 |
+
- Vocals
|
26 |
+
- Drums
|
27 |
+
- Bass
|
28 |
+
- Other
|
29 |
+
lr: 3.0e-05
|
30 |
+
target_instrument: null
|
31 |
+
inference:
|
32 |
+
batch_size: 1
|
33 |
+
dim_t: 1024
|
34 |
+
num_overlap: 8
|
uvr5_models/mdx_c_configs/model3.yaml
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 260096
|
3 |
+
dim_f: 4096
|
4 |
+
dim_t: 128
|
5 |
+
hop_length: 2048
|
6 |
+
n_fft: 12288
|
7 |
+
num_channels: 2
|
8 |
+
sample_rate: 44100
|
9 |
+
model:
|
10 |
+
act: gelu
|
11 |
+
bottleneck_factor: 4
|
12 |
+
growth: 64
|
13 |
+
norm: InstanceNorm
|
14 |
+
num_blocks_per_scale: 2
|
15 |
+
num_channels: 128
|
16 |
+
num_scales: 5
|
17 |
+
num_subbands: 4
|
18 |
+
scale:
|
19 |
+
- 2
|
20 |
+
- 2
|
21 |
+
training:
|
22 |
+
batch_size: 8
|
23 |
+
grad_clip: 0
|
24 |
+
instruments:
|
25 |
+
- Vocals
|
26 |
+
- Drums
|
27 |
+
- Bass
|
28 |
+
- Other
|
29 |
+
lr: 5.0e-05
|
30 |
+
target_instrument: Vocals
|
31 |
+
inference:
|
32 |
+
batch_size: 1
|
33 |
+
dim_t: 1024
|
34 |
+
num_overlap: 8
|
uvr5_models/mdx_c_configs/modelA.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
audio:
|
2 |
+
chunk_size: 261120
|
3 |
+
dim_f: 4096
|
4 |
+
dim_t: 256
|
5 |
+
hop_length: 1024
|
6 |
+
min_mean_abs: 0.01
|
7 |
+
n_fft: 8192
|
8 |
+
num_channels: 2
|
9 |
+
sample_rate: 44100
|
10 |
+
model:
|
11 |
+
act: gelu
|
12 |
+
bottleneck_factor: 4
|
13 |
+
growth: 64
|
14 |
+
norm: InstanceNorm
|
15 |
+
num_blocks_per_scale: 2
|
16 |
+
num_channels: 64
|
17 |
+
num_scales: 5
|
18 |
+
num_subbands: 4
|
19 |
+
scale:
|
20 |
+
- 2
|
21 |
+
- 2
|
22 |
+
training:
|
23 |
+
batch_size: 6
|
24 |
+
coarse_loss_clip: true
|
25 |
+
ema_momentum: 0.999
|
26 |
+
grad_clip: null
|
27 |
+
instruments:
|
28 |
+
- Vocals
|
29 |
+
- Drums
|
30 |
+
- Bass
|
31 |
+
- Other
|
32 |
+
lr: 0.0001
|
33 |
+
num_steps: 100000
|
34 |
+
q: 0.4
|
35 |
+
target_instrument: null
|
36 |
+
inference:
|
37 |
+
batch_size: 2
|
38 |
+
dim_t: 1024
|
39 |
+
num_overlap: 8
|