fabiogra commited on
Commit
6654c1a
·
0 Parent(s):

Duplicate from fabiogra/moseca

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [server]
2
+ enableXsrfProtection = false
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1
2
+
3
+ FROM python:3.8
4
+
5
+
6
+ RUN apt-get update && \
7
+ apt-get install -y ffmpeg jq curl && \
8
+ pip install --upgrade pip
9
+
10
+ WORKDIR /app
11
+
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ COPY scripts/ .
16
+ COPY app ./app
17
+ copy img ./img
18
+
19
+ RUN wget --progress=bar:force:noscroll https://huggingface.co/fabiogra/baseline_vocal_remover/resolve/main/baseline.pth
20
+
21
+ RUN mkdir -p /tmp/ /tmp/vocal_remover /.cache /.config && \
22
+ chmod 777 /tmp /tmp/vocal_remover /.cache /.config
23
+
24
+ ENV PYTHONPATH "${PYTHONPATH}:/app"
25
+
26
+ RUN chmod +x prepare_samples.sh
27
+
28
+ EXPOSE 7860
29
+
30
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
31
+
32
+ RUN ["./prepare_samples.sh"]
33
+
34
+ ENTRYPOINT ["streamlit", "run", "app/header.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Moseca
3
+ emoji: 🎤🎸🥁🎹
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ models:
9
+ - https://huggingface.co/fabiogra/baseline_vocal_remover
10
+ tags:
11
+ - audio
12
+ - music
13
+ - vocal-removal
14
+ - karaoke
15
+ - music-separation
16
+ - music-source-separation
17
+ pinned: true
18
+ duplicated_from: fabiogra/moseca
19
+ ---
20
+
21
+ <p align="center">
22
+ <img src="img/logo_moseca.png" alt="logo" width="70" />
23
+ </p>
24
+ <h2 align="center">Moseca</h1>
25
+ <p align="center">Music Source Separation & Karaoke</p>
26
+
27
+
28
+ </a>
29
+ <a href="https://huggingface.co/spaces/fabiogra/moseca">
30
+ <img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
31
+ alt="Hugging Face Spaces"></a>
32
+ <a href="https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true">
33
+ <img src="https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white"
34
+ alt="Docker"></a><a href="https://www.buymeacoffee.com/fabiogra">
35
+ <img src="https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social"
36
+ alt="Buy me a coffee"></a>
37
+
38
+ ---
39
+
40
+ - [Setup](#setup)
41
+ - [About](#about)
42
+ - [High-Quality Stem Separation](#high-quality-stem-separation)
43
+ - [Advanced AI Algorithms](#advanced-ai-algorithms)
44
+ - [Karaoke Fun](#karaoke-fun)
45
+ - [Easy Deployment](#easy-deployment)
46
+ - [Open-Source and Free](#open-source-and-free)
47
+ - [Support](#support)
48
+ - [FAQs](#faqs)
49
+ - [What is Moseca?](#what-is-moseca)
50
+ - [Are there any limitations?](#are-there-any-limitations)
51
+ - [How does Moseca work?](#how-does-moseca-work)
52
+ - [How do I use Moseca?](#how-do-i-use-moseca)
53
+ - [Where can I find the code for Moseca?](#where-can-i-find-the-code-for-moseca)
54
+ - [How can I get in touch with you?](#how-can-i-get-in-touch-with-you)
55
+ - [Disclaimer](#disclaimer)
56
+
57
+
58
+ ---
59
+
60
+
61
+ ## Setup
62
+ ### Local environment
63
+ Create a new environment with Python 3.8 and install the requirements:
64
+ ```bash
65
+ pip install -r requirements.txt
66
+ ```
67
+ then run the app with:
68
+ ```bash
69
+ streamlit run app/header.py
70
+ ```
71
+ ### Docker
72
+ You can also run the app with Docker:
73
+ ```bash
74
+ docker build -t moseca .
75
+ docker run -it --rm -p 7860:7860 $(DOCKER_IMAGE_NAME)
76
+ ```
77
+ or pull the image from Hugging Face Spaces:
78
+ ```bash
79
+ docker run -it -p 7860:7860 --platform=linux/amd64 \
80
+ registry.hf.space/fabiogra-moseca:latest
81
+ ```
82
+
83
+ You can set the following environment variables to limit the resources used by the app:
84
+ - ENV_LIMITATION=true
85
+ - LIMIT_CPU=true
86
+ ---
87
+ ## About
88
+
89
+ Welcome to Moseca, your personal web application designed to redefine your music experience.
90
+ Whether you're a musician looking to remix your favorite songs, a karaoke
91
+ enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
92
+ Moseca is for you.
93
+
94
+ <br>
95
+
96
+ ### High-Quality Stem Separation
97
+
98
+ <img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="250" ></img>
99
+
100
+
101
+ <br>
102
+
103
+ Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
104
+ 🎹piano (beta), and 🎶 others.
105
+
106
+ <br>
107
+
108
+ ### Advanced AI Algorithms
109
+
110
+ <img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="250" ></img>
111
+
112
+ <br>
113
+
114
+ Moseca utilizes state-of-the-art AI technology to extract voice or music from
115
+ your original songs accurately.
116
+
117
+ <br>
118
+
119
+ ### Karaoke Fun
120
+
121
+ <img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="250" ></img>
122
+
123
+ <br>
124
+
125
+ Engage with your favorite tunes in a whole new way!
126
+
127
+ Moseca offers an immersive online karaoke experience, allowing you to search
128
+ for any song on YouTube and remove the vocals online.
129
+
130
+ Enjoy singing along with high-quality instrumentals at the comfort of your home.
131
+
132
+
133
+ <br>
134
+
135
+ ### Easy Deployment
136
+
137
+
138
+ With Moseca, you can deploy your personal Moseca app in the
139
+ <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
140
+ <img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
141
+ alt="Hugging Face Spaces"></a> or locally with
142
+ [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
143
+ in just one click.
144
+
145
+ <br>
146
+
147
+ ### Open-Source and Free
148
+
149
+ Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
150
+
151
+ You can modify, distribute, and use it free of charge. I believe in the power of community
152
+ collaboration and encourage users to contribute to our source code, making Moseca better with
153
+ each update.
154
+
155
+
156
+ <br>
157
+
158
+ ### Support
159
+
160
+ - Show your support by giving a star to the GitHub repository [![GitHub stars](https://img.shields.io/github/stars/fabiogra/moseca.svg?style=social&label=Star&maxAge=2592000)](https://github.com/fabiogra/moseca).
161
+ - If you have found an issue or have a suggestion to improve Moseca, you can open an [![GitHub issues](https://img.shields.io/github/issues/fabiogra/moseca.svg)](https://github.com/fabiogra/moseca/issues/new)
162
+ - Enjoy Moseca? [![Buymeacoffee](https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social)](https://www.buymeacoffee.com/fabiogra)
163
+
164
+ ------
165
+
166
+ ## FAQs
167
+
168
+ ### What is Moseca?
169
+
170
+ Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
171
+ instrumentals from music tracks. It also provides an online karaoke experience by allowing you
172
+ to search for any song on YouTube and remove the vocals.
173
+
174
+ ### Are there any limitations?
175
+ Yes, in this environment there are some limitations regarding lenght processing
176
+ and CPU usage to allow a smooth experience for all users.
177
+
178
+ <b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
179
+ environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
180
+
181
+ ### How does Moseca work?
182
+ Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
183
+
184
+ ### How do I use Moseca?
185
+ 1. Upload your file: choose your song and upload it to Moseca. It supports
186
+ a wide range of music formats for your convenience.
187
+
188
+ 2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
189
+ depending on your requirement.
190
+
191
+ 3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
192
+ from music in a matter of minutes, giving you high-quality, separated audio tracks.
193
+
194
+ 4. Download and enjoy: preview and download your separated audio tracks.
195
+ Now you can enjoy them anytime, anywhere!
196
+
197
+
198
+ ### Where can I find the code for Moseca?
199
+
200
+ The code for Moseca is readily available on
201
+ [GitHub](https://github.com/fabiogra/moseca) and
202
+ [Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
203
+
204
+
205
+ ### How can I get in touch with you?
206
+
207
+ For any questions or feedback, feel free to contact me on
208
+ [![Twitter](https://badgen.net/badge/icon/twitter?icon=twitter&label)](https://twitter.com/grsFabio)
209
+ or [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
210
+
211
+ ------
212
+ ## Disclaimer
213
+
214
+ Moseca is designed to separate vocals and instruments from copyrighted music for
215
+ legally permissible purposes, such as learning, practicing, research, or other non-commercial
216
+ activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
217
+ responsible for ensuring that your use of separated audio tracks complies with the legal
218
+ requirements in your jurisdiction.
app/__init__.py ADDED
File without changes
app/_fastapi_server.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.responses import FileResponse
3
+ from urllib.parse import unquote
4
+
5
+ import os
6
+
7
+ app = FastAPI()
8
+
9
+
10
+ @app.get("/streaming/{path:path}")
11
+ async def serve_streaming(path: str):
12
+ # Decode URL-encoded characters
13
+ decoded_path = unquote(path)
14
+ return FileResponse(decoded_path, filename=os.path.basename(decoded_path))
15
+
16
+
17
+ if __name__ == "__main__":
18
+ import uvicorn
19
+
20
+ uvicorn.run(app, host="127.0.0.1", port=8000)
app/footer.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from streamlit.components.v1 import html
4
+ from htbuilder import HtmlElement, div, a, p, img, styles
5
+ from htbuilder.units import percent, px
6
+
7
+
8
+ def image(src_as_string, **style):
9
+ return img(src=src_as_string, style=styles(**style))
10
+
11
+
12
+ def link(link, text, **style):
13
+ return a(_href=link, _target="_blank", style=styles(**style))(text)
14
+
15
+
16
+ def layout(*args):
17
+ style = """
18
+ <style>
19
+ footer {visibility: hidden;}
20
+ .stApp { bottom: 50px; }
21
+ </style>
22
+ """
23
+
24
+ style_div = styles(
25
+ position="fixed",
26
+ left=0,
27
+ bottom=0,
28
+ margin=px(0, 0, 0, 0),
29
+ width=percent(100),
30
+ color="black",
31
+ text_align="center",
32
+ height="auto",
33
+ opacity=1,
34
+ align_items="center",
35
+ flex_direction="column",
36
+ display="flex",
37
+ )
38
+ body = p(
39
+ id="myFooter",
40
+ style=styles(
41
+ margin=px(0, 0, 0, 0),
42
+ padding=px(5),
43
+ font_size="0.8rem",
44
+ color="rgb(51,51,51)",
45
+ font_family="Exo",
46
+ ),
47
+ )
48
+ foot = div(style=style_div)(body)
49
+
50
+ st.markdown(style, unsafe_allow_html=True)
51
+
52
+ for arg in args:
53
+ if isinstance(arg, str):
54
+ body(arg)
55
+
56
+ elif isinstance(arg, HtmlElement):
57
+ body(arg)
58
+
59
+ st.markdown(str(foot), unsafe_allow_html=True)
60
+
61
+ js_code = """
62
+ <script>
63
+ function rgbReverse(rgb){
64
+ var r = rgb[0]*0.299;
65
+ var g = rgb[1]*0.587;
66
+ var b = rgb[2]*0.114;
67
+
68
+ if ((r + g + b)/255 > 0.5){
69
+ return "rgb(49, 51, 63)"
70
+ }else{
71
+ return "rgb(250, 250, 250)"
72
+ }
73
+
74
+ };
75
+ var stApp_css = window.parent.document.querySelector("#root > div:nth-child(1) > div > div > div");
76
+ window.onload = function () {
77
+ var mutationObserver = new MutationObserver(function(mutations) {
78
+ mutations.forEach(function(mutation) {
79
+ var bgColor = window.getComputedStyle(stApp_css).backgroundColor.replace("rgb(", "").replace(")", "").split(", ");
80
+ var fontColor = rgbReverse(bgColor);
81
+ var pTag = window.parent.document.getElementById("myFooter");
82
+ pTag.style.color = fontColor;
83
+ });
84
+ });
85
+
86
+ /**Element**/
87
+ mutationObserver.observe(stApp_css, {
88
+ attributes: true,
89
+ characterData: true,
90
+ childList: true,
91
+ subtree: true,
92
+ attributeOldValue: true,
93
+ characterDataOldValue: true
94
+ });
95
+ }
96
+
97
+
98
+ </script>
99
+ """
100
+ html(js_code)
101
+
102
+
103
+ def footer():
104
+ myargs = [
105
+ "Made in ",
106
+ link(
107
+ "https://streamlit.io/",
108
+ image("https://streamlit.io/images/brand/streamlit-mark-color.png", width="20px"),
109
+ ),
110
+ " with ❤️ by ",
111
+ link("https://twitter.com/grsFabio", "@grsFabio"),
112
+ "&nbsp;&nbsp;&nbsp;",
113
+ link(
114
+ "https://www.buymeacoffee.com/fabiogra",
115
+ image("https://i.imgur.com/YFu6MMA.png", margin="0em", align="top", width="130px"),
116
+ ),
117
+ ]
118
+ layout(*myargs)
app/header.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from helpers import switch_page
4
+ from style import CSS
5
+ import logging
6
+
7
+ from streamlit_option_menu import option_menu
8
+
9
+ logging.basicConfig(
10
+ format="%(asctime)s %(levelname)-8s %(message)s",
11
+ level=logging.INFO,
12
+ datefmt="%Y-%m-%d %H:%M:%S",
13
+ )
14
+
15
+
16
+ def header(logo_and_title=True):
17
+ if "first_run" not in st.session_state:
18
+ st.session_state.first_run = True
19
+ for key in [
20
+ "search_results",
21
+ "selected_value",
22
+ "filename",
23
+ "executed",
24
+ "play_karaoke",
25
+ "url",
26
+ "random_song",
27
+ "last_dir",
28
+ ]:
29
+ st.session_state[key] = None
30
+ st.session_state.video_options = []
31
+ st.session_state.page = "Karaoke"
32
+ switch_page(st.session_state.page)
33
+
34
+ st.set_page_config(
35
+ page_title="Moseca - Music Separation and Karaoke - Free and Open Source alternative to lalal.ai, splitter.ai or media.io vocal remover.",
36
+ page_icon="img/logo_moseca.png",
37
+ layout="wide",
38
+ initial_sidebar_state="collapsed",
39
+ )
40
+ st.markdown(CSS, unsafe_allow_html=True)
41
+
42
+ options = ["Karaoke", "Separate", "About"]
43
+ page = option_menu(
44
+ menu_title=None,
45
+ options=options,
46
+ # bootrap icons
47
+ icons=["play-btn-fill", "file-earmark-music", "info-circle"],
48
+ default_index=options.index(st.session_state.page),
49
+ orientation="horizontal",
50
+ styles={"nav-link": {"padding-left": "1.5rem", "padding-right": "1.5rem"}},
51
+ key="",
52
+ )
53
+ if page != st.session_state.page:
54
+ switch_page(page)
55
+
56
+ if logo_and_title:
57
+ head = st.columns([5, 1, 3, 5])
58
+ with head[1]:
59
+ st.image("img/logo_moseca.png", use_column_width=False, width=80)
60
+ with head[2]:
61
+ st.markdown(
62
+ "<h1>moseca</h1><p><b>Music Source Separation & Karaoke</b></p>",
63
+ unsafe_allow_html=True,
64
+ )
65
+
66
+
67
+ if __name__ == "__main__":
68
+ header()
app/helpers.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ from io import BytesIO
3
+ import json
4
+
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import requests
8
+ import streamlit as st
9
+ from PIL import Image
10
+ from pydub import AudioSegment
11
+ from base64 import b64encode
12
+ from pathlib import Path
13
+ from streamlit.runtime.scriptrunner import RerunData, RerunException
14
+ from streamlit.source_util import get_pages
15
+ from streamlit_player import st_player
16
+
17
+ extensions = ["mp3", "wav", "ogg", "flac"] # we will look for all those file types.
18
+ example_songs = [1, 2, 3]
19
+
20
+
21
+ def img_to_bytes(img_path):
22
+ img_bytes = Path(img_path).read_bytes()
23
+ encoded = b64encode(img_bytes).decode()
24
+ return encoded
25
+
26
+
27
+ # @st.cache_data(show_spinner=False)
28
+ def img_to_html(img_path):
29
+ img_html = "<div style='display: flex; justify-content: center; align-items: center; height: 50vh;'><img src='data:image/png;base64,{}' class='img-fluid' style='max-width: 100%; max-height: 100%;' ></div>".format(
30
+ img_to_bytes(img_path)
31
+ )
32
+ return img_html
33
+
34
+
35
+ @st.cache_data(show_spinner=False)
36
+ def url_is_valid(url):
37
+ if url.startswith("http") is False:
38
+ st.error("URL should start with http or https.")
39
+ return False
40
+ elif url.split(".")[-1] not in extensions:
41
+ st.error("Extension not supported.")
42
+ return False
43
+ try:
44
+ r = requests.get(url)
45
+ r.raise_for_status()
46
+ return True
47
+ except Exception:
48
+ st.error("URL is not valid.")
49
+ return False
50
+
51
+
52
+ @st.cache_data(show_spinner=False)
53
+ def load_audio_segment(path: str, format: str) -> AudioSegment:
54
+ return AudioSegment.from_file(path, format=format)
55
+
56
+
57
+ @st.cache_data(show_spinner=False)
58
+ def plot_audio(_audio_segment: AudioSegment, *args, **kwargs) -> Image.Image:
59
+ samples = _audio_segment.get_array_of_samples()
60
+ arr = np.array(samples)
61
+
62
+ fig, ax = plt.subplots(figsize=(10, 2))
63
+ ax.plot(arr, linewidth=0.05)
64
+ ax.set_axis_off()
65
+
66
+ # Set the background color to transparent
67
+ fig.patch.set_alpha(0)
68
+ ax.patch.set_alpha(0)
69
+
70
+ buf = BytesIO()
71
+ plt.savefig(buf, format="png", dpi=100, bbox_inches="tight")
72
+ buf.seek(0)
73
+ image = Image.open(buf)
74
+
75
+ plt.close(fig)
76
+ return image
77
+
78
+
79
+ def get_random_song():
80
+ sample_songs = json.load(open("sample_songs.json"))
81
+ name, url = random.choice(list(sample_songs.items()))
82
+ return name, url
83
+
84
+
85
+ def streamlit_player(
86
+ player,
87
+ url,
88
+ height,
89
+ is_active,
90
+ muted,
91
+ start,
92
+ key,
93
+ playback_rate=1,
94
+ events=None,
95
+ play_inline=False,
96
+ light=False,
97
+ ):
98
+ with player:
99
+ options = {
100
+ "progress_interval": 1000,
101
+ "playing": is_active, # st.checkbox("Playing", False),
102
+ "muted": muted,
103
+ "light": light,
104
+ "play_inline": play_inline,
105
+ "playback_rate": playback_rate,
106
+ "height": height,
107
+ "config": {"start": start},
108
+ "events": events,
109
+ }
110
+ if url != "":
111
+ events = st_player(url, **options, key=key)
112
+ return events
113
+
114
+
115
+ @st.cache_data(show_spinner=False)
116
+ def local_audio(path, mime="audio/mp3"):
117
+ data = b64encode(Path(path).read_bytes()).decode()
118
+ return [{"type": mime, "src": f"data:{mime};base64,{data}"}]
119
+
120
+
121
+ def _standardize_name(name: str) -> str:
122
+ return name.lower().replace("_", " ").strip()
123
+
124
+
125
+ @st.cache_data(show_spinner=False)
126
+ def switch_page(page_name: str):
127
+ st.session_state.page = page_name
128
+
129
+ page_name = _standardize_name(page_name)
130
+
131
+ pages = get_pages("header.py") # OR whatever your main page is called
132
+
133
+ for page_hash, config in pages.items():
134
+ if _standardize_name(config["page_name"]) == page_name:
135
+ raise RerunException(
136
+ RerunData(
137
+ page_script_hash=page_hash,
138
+ page_name=page_name,
139
+ )
140
+ )
141
+
142
+ page_names = [_standardize_name(config["page_name"]) for config in pages.values()]
143
+ raise ValueError(f"Could not find page {page_name}. Must be one of {page_names}")
144
+
145
+
146
+ def st_local_audio(pathname, key):
147
+ st_player(
148
+ local_audio(pathname),
149
+ **{
150
+ "progress_interval": 1000,
151
+ "playing": False,
152
+ "muted": False,
153
+ "light": False,
154
+ "play_inline": True,
155
+ "playback_rate": 1,
156
+ "height": 40,
157
+ "config": {"start": 0, "forceAudio": True, "forceHLS": True, "forceSafariHLS": True},
158
+ },
159
+ key=key,
160
+ )
app/pages/About.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from header import header
4
+ from footer import footer
5
+
6
+
7
+ def body():
8
+ with st.columns([2, 3, 2])[1]:
9
+ st.markdown(
10
+ """
11
+ <center>
12
+
13
+ ## Welcome to Moseca, your personal web application designed to redefine your music experience.
14
+ <font size="3"> Whether you're a musician looking to remix your favorite songs, a karaoke
15
+ enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
16
+ Moseca is for you. </font>
17
+
18
+ <br>
19
+
20
+ ### High-Quality Stem Separation
21
+
22
+ <center><img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="60%" ></img></center>
23
+
24
+
25
+ <br>
26
+
27
+ <font size="3"> Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
28
+ 🎹piano (beta), and 🎶 others. </font>
29
+
30
+ <br>
31
+
32
+ ### Advanced AI Algorithms
33
+
34
+ <center><img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="60%" ></img></center>
35
+
36
+ <br>
37
+
38
+ <font size="3"> Moseca utilizes state-of-the-art AI technology to extract voice or music from
39
+ your original songs accurately. </font>
40
+
41
+ <br>
42
+
43
+ ### Karaoke Fun
44
+
45
+ <center><img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="60%" ></img></center>
46
+
47
+ <br>
48
+
49
+ <font size="3"> Engage with your favorite tunes in a whole new way! </font>
50
+
51
+ <font size="3"> Moseca offers an immersive online karaoke experience, allowing you to search
52
+ for any song on YouTube and remove the vocals online. </font>
53
+
54
+ <font size="3"> Enjoy singing along with high-quality instrumentals at the comfort of your home.
55
+ </font>
56
+
57
+ <br>
58
+
59
+ ### Easy Deployment
60
+
61
+
62
+ <font size="3"> With Moseca, you can deploy your personal Moseca app in the
63
+ <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
64
+ <img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
65
+ alt="Hugging Face Spaces"></a> or locally with </font>
66
+ [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
67
+ <font size="3"> in just one click. </font>
68
+
69
+ <br>
70
+
71
+ ### Open-Source and Free
72
+
73
+ <font size="3"> Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
74
+
75
+ You can modify, distribute, and use it free of charge. I believe in the power of community
76
+ collaboration and encourage users to contribute to our source code, making Moseca better with
77
+ each update.
78
+ </font>
79
+
80
+ <br>
81
+
82
+ ### Support
83
+
84
+ - <font size="3"> Show your support by giving a star to the GitHub repository</font> [![GitHub stars](https://img.shields.io/github/stars/fabiogra/moseca.svg?style=social&label=Star&maxAge=2592000)](https://github.com/fabiogra/moseca).
85
+ - <font size="3"> If you have found an issue or have a suggestion to improve Moseca, you can open an</font> [![GitHub issues](https://img.shields.io/github/issues/fabiogra/moseca.svg)](https://github.com/fabiogra/moseca/issues/new)
86
+ - <font size="3"> Enjoy Moseca?</font> [![Buymeacoffee](https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social)](https://www.buymeacoffee.com/fabiogra)
87
+
88
+ ------
89
+
90
+ ## FAQs
91
+
92
+ ### What is Moseca?
93
+
94
+ <font size="3"> Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
95
+ instrumentals from music tracks. It also provides an online karaoke experience by allowing you
96
+ to search for any song on YouTube and remove the vocals.</font>
97
+
98
+ ### Are there any limitations?
99
+ <font size="3">Yes, in this environment there are some limitations regarding lenght processing
100
+ and CPU usage to allow a smooth experience for all users.
101
+
102
+ <b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
103
+ environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
104
+ </font>
105
+ ### How does Moseca work?
106
+ <font size="3"> Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
107
+ </font>
108
+ ### How do I use Moseca?
109
+ <font size="3">1. Upload your file: choose your song and upload it to Moseca. It supports
110
+ a wide range of music formats for your convenience.</font>
111
+
112
+ <font size="3">2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
113
+ depending on your requirement.</font>
114
+
115
+ <font size="3">3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
116
+ from music in a matter of minutes, giving you high-quality, separated audio tracks.</font>
117
+
118
+ <font size="3">4. Download and enjoy: preview and download your separated audio tracks.
119
+ Now you can enjoy them anytime, anywhere! </font>
120
+ </font>
121
+
122
+ ### Where can I find the code for Moseca?
123
+
124
+ <font size="3">The code for Moseca is readily available on
125
+ [GitHub](https://github.com/fabiogra/moseca) and
126
+ [Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
127
+ </font>
128
+
129
+ ### How can I get in touch with you?
130
+
131
+ <font size="3">For any questions or feedback, feel free to contact me on </font>
132
+ [![Twitter](https://badgen.net/badge/icon/twitter?icon=twitter&label)](https://twitter.com/grsFabio)
133
+ <font size="3">or</font> [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
134
+
135
+ ------
136
+ ## Disclaimer
137
+
138
+ <font size="3">Moseca is designed to separate vocals and instruments from copyrighted music for
139
+ legally permissible purposes, such as learning, practicing, research, or other non-commercial
140
+ activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
141
+ responsible for ensuring that your use of separated audio tracks complies with the legal
142
+ requirements in your jurisdiction.
143
+ </font>
144
+
145
+ </center>
146
+ """,
147
+ unsafe_allow_html=True,
148
+ )
149
+
150
+
151
+ if __name__ == "__main__":
152
+ header(logo_and_title=False)
153
+ body()
154
+ footer()
app/pages/Karaoke.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import streamlit as st
4
+ from streamlit_player import st_player
5
+ from streamlit_searchbox import st_searchbox
6
+
7
+ from service.youtube import (
8
+ get_youtube_url,
9
+ search_youtube,
10
+ download_audio_from_youtube,
11
+ )
12
+ from helpers import (
13
+ get_random_song,
14
+ load_audio_segment,
15
+ streamlit_player,
16
+ local_audio,
17
+ )
18
+
19
+ from service.vocal_remover.runner import separate, load_model
20
+ from footer import footer
21
+ from header import header
22
+
23
+
24
+ out_path = Path("/tmp")
25
+ in_path = Path("/tmp")
26
+
27
+ sess = st.session_state
28
+
29
+
30
+ def show_karaoke(pathname, initial_player):
31
+ cols = st.columns([1, 1, 3, 1])
32
+ with cols[1]:
33
+ sess.delay = st.slider(
34
+ label="Start delay in karaoke (seconds)",
35
+ key="delay_slider",
36
+ value=2,
37
+ min_value=0,
38
+ max_value=5,
39
+ help="Synchronize youtube player with karaoke audio by adding a delay to the youtube player.",
40
+ )
41
+ with cols[2]:
42
+ events = st_player(
43
+ local_audio(pathname),
44
+ **{
45
+ "progress_interval": 1000,
46
+ "playing": False,
47
+ "muted": False,
48
+ "light": False,
49
+ "play_inline": True,
50
+ "playback_rate": 1,
51
+ "height": 40,
52
+ "config": {
53
+ "start": 0,
54
+ "forceAudio": True,
55
+ },
56
+ "events": ["onProgress", "onPlay"],
57
+ },
58
+ key="karaoke_player",
59
+ )
60
+ st.markdown(
61
+ "<center>⬆️ Click on the play button to start karaoke<center>",
62
+ unsafe_allow_html=True,
63
+ )
64
+ with st.columns([1, 4, 1])[1]:
65
+ if events.name == "onProgress" and events.data["playedSeconds"] > 0:
66
+ initial_player.empty()
67
+ st_player(
68
+ sess.url + f"&t={sess.delay}s",
69
+ **{
70
+ "progress_interval": 1000,
71
+ "playing": True,
72
+ "muted": True,
73
+ "light": False,
74
+ "play_inline": False,
75
+ "playback_rate": 1,
76
+ "height": 250,
77
+ "events": None,
78
+ },
79
+ key="yt_muted_player",
80
+ )
81
+
82
+
83
+ def body():
84
+ st.markdown("<center>Search for a song on YouTube<center>", unsafe_allow_html=True)
85
+ yt_cols = st.columns([1, 3, 2, 1])
86
+ with yt_cols[1]:
87
+ selected_value = st_searchbox(
88
+ search_youtube,
89
+ label=None,
90
+ placeholder="Search by name...",
91
+ clear_on_submit=True,
92
+ key="yt_searchbox",
93
+ )
94
+ if selected_value is not None and selected_value in sess.video_options:
95
+ sess.random_song = None
96
+
97
+ if selected_value != sess.selected_value: # New song selected
98
+ sess.executed = False
99
+
100
+ sess.selected_value = selected_value
101
+ sess.url = get_youtube_url(selected_value)
102
+
103
+ with yt_cols[2]:
104
+ if st.button("🎲 Random song", use_container_width=True):
105
+ sess.last_dir, sess.url = get_random_song()
106
+ sess.random_song = True
107
+ sess.video_options = []
108
+ sess.executed = False
109
+
110
+ if sess.url is not None:
111
+ player_cols = st.columns([2, 2, 1, 1], gap="medium")
112
+ with player_cols[1]:
113
+ player = st.empty()
114
+ streamlit_player(
115
+ player,
116
+ sess.url,
117
+ height=200,
118
+ is_active=False,
119
+ muted=False,
120
+ start=0,
121
+ key="yt_player",
122
+ events=["onProgress"],
123
+ )
124
+
125
+ # Separate vocals
126
+ cols_before_sep = st.columns([2, 4, 2])
127
+ with cols_before_sep[1]:
128
+ execute_button = st.empty()
129
+ execute = execute_button.button(
130
+ "Confirm and remove vocals 🎤 🎶",
131
+ type="primary",
132
+ use_container_width=True,
133
+ )
134
+ if execute or sess.executed:
135
+ execute_button.empty()
136
+ player.empty()
137
+ if execute:
138
+ sess.executed = False
139
+ if sess.random_song is None:
140
+ if not sess.executed:
141
+ cols_spinners = st.columns([1, 2, 1])
142
+ with cols_spinners[1]:
143
+ with st.spinner(
144
+ "Separating vocals from music, it will take a while..."
145
+ ):
146
+ sess.filename = download_audio_from_youtube(sess.url, in_path)
147
+ if sess.filename is None:
148
+ st.stop()
149
+ sess.url = None
150
+ filename = sess.filename
151
+ song = load_audio_segment(
152
+ in_path / filename, filename.split(".")[-1]
153
+ )
154
+ song.export(in_path / filename, format=filename.split(".")[-1])
155
+ model, device = load_model(pretrained_model="baseline.pth")
156
+ separate(
157
+ input=in_path / filename,
158
+ model=model,
159
+ device=device,
160
+ output_dir=out_path,
161
+ only_no_vocals=True,
162
+ )
163
+ selected_value = None
164
+ sess.last_dir = ".".join(sess.filename.split(".")[:-1])
165
+ sess.executed = True
166
+ else:
167
+ sess.executed = True
168
+
169
+ if sess.executed:
170
+ show_karaoke(out_path / "vocal_remover" / sess.last_dir / "no_vocals.mp3", player)
171
+
172
+
173
+ if __name__ == "__main__":
174
+ header()
175
+ body()
176
+ footer()
app/pages/Separate.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import streamlit as st
5
+ from streamlit_option_menu import option_menu
6
+
7
+ from service.demucs_runner import separator
8
+ from helpers import (
9
+ load_audio_segment,
10
+ plot_audio,
11
+ st_local_audio,
12
+ url_is_valid,
13
+ )
14
+
15
+ from service.vocal_remover.runner import separate, load_model
16
+
17
+ from footer import footer
18
+ from header import header
19
+
20
+ label_sources = {
21
+ "no_vocals.mp3": "🎶 Instrumental",
22
+ "vocals.mp3": "🎤 Vocals",
23
+ "drums.mp3": "🥁 Drums",
24
+ "bass.mp3": "🎸 Bass",
25
+ "guitar.mp3": "🎸 Guitar",
26
+ "piano.mp3": "🎹 Piano",
27
+ "other.mp3": "🎶 Other",
28
+ }
29
+
30
+ extensions = ["mp3", "wav", "ogg", "flac"]
31
+
32
+
33
+ out_path = Path("/tmp")
34
+ in_path = Path("/tmp")
35
+
36
+
37
+ def reset_execution():
38
+ st.session_state.executed = False
39
+
40
+
41
+ def body():
42
+ filename = None
43
+ cols = st.columns([1, 3, 2, 1])
44
+ with cols[1]:
45
+ with st.columns([1, 5, 1])[1]:
46
+ option = option_menu(
47
+ menu_title=None,
48
+ options=["Upload File", "From URL"],
49
+ icons=["cloud-upload-fill", "link-45deg"],
50
+ orientation="horizontal",
51
+ styles={"container": {"width": "100%", "margin": "0px", "padding": "0px"}},
52
+ key="option_separate",
53
+ )
54
+ if option == "Upload File":
55
+ uploaded_file = st.file_uploader(
56
+ "Choose a file",
57
+ type=extensions,
58
+ key="file",
59
+ help="Supported formats: mp3, wav, ogg, flac.",
60
+ )
61
+ if uploaded_file is not None:
62
+ with open(in_path / uploaded_file.name, "wb") as f:
63
+ f.write(uploaded_file.getbuffer())
64
+ filename = uploaded_file.name
65
+ st_local_audio(in_path / filename, key="input_upload_file")
66
+
67
+ elif option == "From URL": # TODO: show examples
68
+ url = st.text_input(
69
+ "Paste the URL of the audio file",
70
+ key="url_input",
71
+ help="Supported formats: mp3, wav, ogg, flac.",
72
+ )
73
+ if url != "":
74
+ if url_is_valid(url):
75
+ with st.spinner("Downloading audio..."):
76
+ filename = url.split("/")[-1]
77
+ os.system(f"wget -O {in_path / filename} {url}")
78
+ st_local_audio(in_path / filename, key="input_from_url")
79
+ with cols[2]:
80
+ separation_mode = st.selectbox(
81
+ "Choose the separation mode",
82
+ [
83
+ "Vocals & Instrumental (Faster)",
84
+ "Vocals & Instrumental (High Quality, Slower)",
85
+ "Vocals, Drums, Bass & Other (Slower)",
86
+ "Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)",
87
+ ],
88
+ on_change=reset_execution(),
89
+ key="separation_mode",
90
+ )
91
+ if separation_mode == "Vocals & Instrumental (Faster)":
92
+ max_duration = 30
93
+ else:
94
+ max_duration = 15
95
+
96
+ if filename is not None:
97
+ song = load_audio_segment(in_path / filename, filename.split(".")[-1])
98
+ n_secs = round(len(song) / 1000)
99
+ if os.environ.get("ENV_LIMITATION", False):
100
+ with cols[2]:
101
+ start_time = st.number_input(
102
+ "Choose the start time",
103
+ min_value=0,
104
+ max_value=n_secs,
105
+ step=1,
106
+ value=0,
107
+ help=f"Maximum duration is {max_duration} seconds for this separation mode. Duplicate this space to remove any limit.",
108
+ format="%d",
109
+ )
110
+ st.session_state.start_time = start_time
111
+ end_time = min(start_time + max_duration, n_secs)
112
+ song = song[start_time * 1000 : end_time * 1000]
113
+ st.info(
114
+ f"Audio source will be processed from {start_time} to {end_time} seconds. Duplicate this space to remove any limit.",
115
+ icon="⏱",
116
+ )
117
+ else:
118
+ start_time = 0
119
+ end_time = n_secs
120
+ with st.columns([1, 3, 1])[1]:
121
+ execute = st.button("Split Music 🎶", type="primary", use_container_width=True)
122
+ if execute or st.session_state.executed:
123
+ if execute:
124
+ st.session_state.executed = False
125
+
126
+ if not st.session_state.executed:
127
+ song.export(in_path / filename, format=filename.split(".")[-1])
128
+ with st.spinner("Separating source audio, it will take a while..."):
129
+ if separation_mode == "Vocals & Instrumental (Faster)":
130
+ model_name = "vocal_remover"
131
+ model, device = load_model(pretrained_model="baseline.pth")
132
+ separate(
133
+ input=in_path / filename,
134
+ model=model,
135
+ device=device,
136
+ output_dir=out_path,
137
+ )
138
+ else:
139
+ stem = None
140
+ model_name = "htdemucs"
141
+ if (
142
+ separation_mode
143
+ == "Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)"
144
+ ):
145
+ model_name = "htdemucs_6s"
146
+ elif separation_mode == "Vocals & Instrumental (High Quality, Slower)":
147
+ stem = "vocals"
148
+
149
+ separator(
150
+ tracks=[in_path / filename],
151
+ out=out_path,
152
+ model=model_name,
153
+ shifts=1,
154
+ overlap=0.5,
155
+ stem=stem,
156
+ int24=False,
157
+ float32=False,
158
+ clip_mode="rescale",
159
+ mp3=True,
160
+ mp3_bitrate=320,
161
+ verbose=True,
162
+ start_time=start_time,
163
+ end_time=end_time,
164
+ )
165
+ last_dir = ".".join(filename.split(".")[:-1])
166
+ filename = None
167
+ st.session_state.executed = True
168
+
169
+ def get_sources(path):
170
+ sources = {}
171
+ for file in [
172
+ "no_vocals.mp3",
173
+ "vocals.mp3",
174
+ "drums.mp3",
175
+ "bass.mp3",
176
+ "guitar.mp3",
177
+ "piano.mp3",
178
+ "other.mp3",
179
+ ]:
180
+ fullpath = path / file
181
+ if fullpath.exists():
182
+ sources[file] = fullpath
183
+ return sources
184
+
185
+ sources = get_sources(out_path / Path(model_name) / last_dir)
186
+ tab_sources = st.tabs([f"**{label_sources.get(k)}**" for k in sources.keys()])
187
+ for i, (file, pathname) in enumerate(sources.items()):
188
+ with tab_sources[i]:
189
+ cols = st.columns(2)
190
+ with cols[0]:
191
+ auseg = load_audio_segment(pathname, "mp3")
192
+ st.image(
193
+ plot_audio(auseg, title="", file=file),
194
+ use_column_width="always",
195
+ )
196
+ with cols[1]:
197
+ st_local_audio(pathname, key=f"output_{file}")
198
+
199
+
200
+ if __name__ == "__main__":
201
+ header()
202
+ body()
203
+ footer()
app/service/__init__.py ADDED
File without changes
app/service/demucs_runner.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import List
5
+ import os
6
+ from dora.log import fatal
7
+ import torch as th
8
+
9
+ from demucs.apply import apply_model, BagOfModels
10
+ from demucs.audio import save_audio
11
+ from demucs.pretrained import get_model_from_args, ModelLoadingError
12
+ from demucs.separate import load_track
13
+
14
+ import streamlit as st
15
+
16
+
17
+ @st.cache_data(show_spinner=False)
18
+ def separator(
19
+ tracks: List[Path],
20
+ out: Path,
21
+ model: str,
22
+ shifts: int,
23
+ overlap: float,
24
+ stem: str,
25
+ int24: bool,
26
+ float32: bool,
27
+ clip_mode: str,
28
+ mp3: bool,
29
+ mp3_bitrate: int,
30
+ verbose: bool,
31
+ *args,
32
+ **kwargs,
33
+ ):
34
+ """Separate the sources for the given tracks
35
+
36
+ Args:
37
+ tracks (Path): Path to tracks
38
+ out (Path): Folder where to put extracted tracks. A subfolder with the model name will be
39
+ created.
40
+ model (str): Model name
41
+ shifts (int): Number of random shifts for equivariant stabilization.
42
+ Increase separation time but improves quality for Demucs.
43
+ 10 was used in the original paper.
44
+ overlap (float): Overlap
45
+ stem (str): Only separate audio into {STEM} and no_{STEM}.
46
+ int24 (bool): Save wav output as 24 bits wav.
47
+ float32 (bool): Save wav output as float32 (2x bigger).
48
+ clip_mode (str): Strategy for avoiding clipping: rescaling entire signal if necessary
49
+ (rescale) or hard clipping (clamp).
50
+ mp3 (bool): Convert the output wavs to mp3.
51
+ mp3_bitrate (int): Bitrate of converted mp3.
52
+ verbose (bool): Verbose
53
+ """
54
+
55
+ if os.environ.get("LIMIT_CPU", False):
56
+ th.set_num_threads(1)
57
+ jobs = 1
58
+ else:
59
+ # Number of jobs. This can increase memory usage but will be much faster when
60
+ # multiple cores are available.
61
+ jobs = os.cpu_count()
62
+
63
+ if th.cuda.is_available():
64
+ device = "cuda"
65
+ else:
66
+ device = "cpu"
67
+ args = argparse.Namespace()
68
+ args.tracks = tracks
69
+ args.out = out
70
+ args.model = model
71
+ args.device = device
72
+ args.shifts = shifts
73
+ args.overlap = overlap
74
+ args.stem = stem
75
+ args.int24 = int24
76
+ args.float32 = float32
77
+ args.clip_mode = clip_mode
78
+ args.mp3 = mp3
79
+ args.mp3_bitrate = mp3_bitrate
80
+ args.jobs = jobs
81
+ args.verbose = verbose
82
+ args.filename = "{track}/{stem}.{ext}"
83
+ args.split = True
84
+ args.segment = None
85
+ args.name = model
86
+ args.repo = None
87
+
88
+ try:
89
+ model = get_model_from_args(args)
90
+ except ModelLoadingError as error:
91
+ fatal(error.args[0])
92
+
93
+ if args.segment is not None and args.segment < 8:
94
+ fatal("Segment must greater than 8. ")
95
+
96
+ if ".." in args.filename.replace("\\", "/").split("/"):
97
+ fatal('".." must not appear in filename. ')
98
+
99
+ if isinstance(model, BagOfModels):
100
+ print(
101
+ f"Selected model is a bag of {len(model.models)} models. "
102
+ "You will see that many progress bars per track."
103
+ )
104
+ if args.segment is not None:
105
+ for sub in model.models:
106
+ sub.segment = args.segment
107
+ else:
108
+ if args.segment is not None:
109
+ model.segment = args.segment
110
+
111
+ model.cpu()
112
+ model.eval()
113
+
114
+ if args.stem is not None and args.stem not in model.sources:
115
+ fatal(
116
+ 'error: stem "{stem}" is not in selected model. STEM must be one of {sources}.'.format(
117
+ stem=args.stem, sources=", ".join(model.sources)
118
+ )
119
+ )
120
+ out = args.out / args.name
121
+ out.mkdir(parents=True, exist_ok=True)
122
+ print(f"Separated tracks will be stored in {out.resolve()}")
123
+ for track in args.tracks:
124
+ if not track.exists():
125
+ print(
126
+ f"File {track} does not exist. If the path contains spaces, "
127
+ 'please try again after surrounding the entire path with quotes "".',
128
+ file=sys.stderr,
129
+ )
130
+ continue
131
+ print(f"Separating track {track}")
132
+ wav = load_track(track, model.audio_channels, model.samplerate)
133
+
134
+ ref = wav.mean(0)
135
+ wav = (wav - ref.mean()) / ref.std()
136
+ sources = apply_model(
137
+ model,
138
+ wav[None],
139
+ device=args.device,
140
+ shifts=args.shifts,
141
+ split=args.split,
142
+ overlap=args.overlap,
143
+ progress=True,
144
+ num_workers=args.jobs,
145
+ )[0]
146
+ sources = sources * ref.std() + ref.mean()
147
+
148
+ if args.mp3:
149
+ ext = "mp3"
150
+ else:
151
+ ext = "wav"
152
+ kwargs = {
153
+ "samplerate": model.samplerate,
154
+ "bitrate": args.mp3_bitrate,
155
+ "clip": args.clip_mode,
156
+ "as_float": args.float32,
157
+ "bits_per_sample": 24 if args.int24 else 16,
158
+ }
159
+ if args.stem is None:
160
+ for source, name in zip(sources, model.sources):
161
+ stem = out / args.filename.format(
162
+ track=track.name.rsplit(".", 1)[0],
163
+ trackext=track.name.rsplit(".", 1)[-1],
164
+ stem=name,
165
+ ext=ext,
166
+ )
167
+ stem.parent.mkdir(parents=True, exist_ok=True)
168
+ save_audio(source, str(stem), **kwargs)
169
+ else:
170
+ sources = list(sources)
171
+ stem = out / args.filename.format(
172
+ track=track.name.rsplit(".", 1)[0],
173
+ trackext=track.name.rsplit(".", 1)[-1],
174
+ stem=args.stem,
175
+ ext=ext,
176
+ )
177
+ stem.parent.mkdir(parents=True, exist_ok=True)
178
+ save_audio(sources.pop(model.sources.index(args.stem)), str(stem), **kwargs)
179
+ # Warning : after poping the stem, selected stem is no longer in the list 'sources'
180
+ other_stem = th.zeros_like(sources[0])
181
+ for i in sources:
182
+ other_stem += i
183
+ stem = out / args.filename.format(
184
+ track=track.name.rsplit(".", 1)[0],
185
+ trackext=track.name.rsplit(".", 1)[-1],
186
+ stem="no_" + args.stem,
187
+ ext=ext,
188
+ )
189
+ stem.parent.mkdir(parents=True, exist_ok=True)
190
+ save_audio(other_stem, str(stem), **kwargs)
app/service/vocal_remover/__init__.py ADDED
File without changes
app/service/vocal_remover/layers.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ def crop_center(h1, h2):
7
+ h1_shape = h1.size()
8
+ h2_shape = h2.size()
9
+
10
+ if h1_shape[3] == h2_shape[3]:
11
+ return h1
12
+ elif h1_shape[3] < h2_shape[3]:
13
+ raise ValueError("h1_shape[3] must be greater than h2_shape[3]")
14
+
15
+ s_time = (h1_shape[3] - h2_shape[3]) // 2
16
+ e_time = s_time + h2_shape[3]
17
+ h1 = h1[:, :, :, s_time:e_time]
18
+
19
+ return h1
20
+
21
+
22
+ class Conv2DBNActiv(nn.Module):
23
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
24
+ super(Conv2DBNActiv, self).__init__()
25
+ self.conv = nn.Sequential(
26
+ nn.Conv2d(
27
+ nin,
28
+ nout,
29
+ kernel_size=ksize,
30
+ stride=stride,
31
+ padding=pad,
32
+ dilation=dilation,
33
+ bias=False,
34
+ ),
35
+ nn.BatchNorm2d(nout),
36
+ activ(),
37
+ )
38
+
39
+ def __call__(self, x):
40
+ return self.conv(x)
41
+
42
+
43
+ class Encoder(nn.Module):
44
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
45
+ super(Encoder, self).__init__()
46
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ)
47
+ self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
48
+
49
+ def __call__(self, x):
50
+ h = self.conv1(x)
51
+ h = self.conv2(h)
52
+
53
+ return h
54
+
55
+
56
+ class Decoder(nn.Module):
57
+ def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
58
+ super(Decoder, self).__init__()
59
+ self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
60
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
61
+
62
+ def __call__(self, x, skip=None):
63
+ x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
64
+
65
+ if skip is not None:
66
+ skip = crop_center(skip, x)
67
+ x = torch.cat([x, skip], dim=1)
68
+
69
+ h = self.conv1(x)
70
+ # h = self.conv2(h)
71
+
72
+ if self.dropout is not None:
73
+ h = self.dropout(h)
74
+
75
+ return h
76
+
77
+
78
+ class ASPPModule(nn.Module):
79
+ def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False):
80
+ super(ASPPModule, self).__init__()
81
+ self.conv1 = nn.Sequential(
82
+ nn.AdaptiveAvgPool2d((1, None)),
83
+ Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ),
84
+ )
85
+ self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
86
+ self.conv3 = Conv2DBNActiv(nin, nout, 3, 1, dilations[0], dilations[0], activ=activ)
87
+ self.conv4 = Conv2DBNActiv(nin, nout, 3, 1, dilations[1], dilations[1], activ=activ)
88
+ self.conv5 = Conv2DBNActiv(nin, nout, 3, 1, dilations[2], dilations[2], activ=activ)
89
+ self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
90
+ self.dropout = nn.Dropout2d(0.1) if dropout else None
91
+
92
+ def forward(self, x):
93
+ _, _, h, w = x.size()
94
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
95
+ feat2 = self.conv2(x)
96
+ feat3 = self.conv3(x)
97
+ feat4 = self.conv4(x)
98
+ feat5 = self.conv5(x)
99
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
100
+ out = self.bottleneck(out)
101
+
102
+ if self.dropout is not None:
103
+ out = self.dropout(out)
104
+
105
+ return out
106
+
107
+
108
+ class LSTMModule(nn.Module):
109
+ def __init__(self, nin_conv, nin_lstm, nout_lstm):
110
+ super(LSTMModule, self).__init__()
111
+ self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
112
+ self.lstm = nn.LSTM(input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True)
113
+ self.dense = nn.Sequential(
114
+ nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU()
115
+ )
116
+
117
+ def forward(self, x):
118
+ N, _, nbins, nframes = x.size()
119
+ h = self.conv(x)[:, 0] # N, nbins, nframes
120
+ h = h.permute(2, 0, 1) # nframes, N, nbins
121
+ h, _ = self.lstm(h)
122
+ h = self.dense(h.reshape(-1, h.size()[-1])) # nframes * N, nbins
123
+ h = h.reshape(nframes, N, 1, nbins)
124
+ h = h.permute(1, 2, 3, 0)
125
+
126
+ return h
app/service/vocal_remover/nets.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import torch.nn.functional as F
4
+
5
+ from app.service.vocal_remover import layers
6
+
7
+
8
+ class BaseNet(nn.Module):
9
+ def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))):
10
+ super(BaseNet, self).__init__()
11
+ self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1)
12
+ self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1)
13
+ self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1)
14
+ self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1)
15
+ self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1)
16
+
17
+ self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
18
+
19
+ self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
20
+ self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
21
+ self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
22
+ self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm)
23
+ self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
24
+
25
+ def __call__(self, x):
26
+ e1 = self.enc1(x)
27
+ e2 = self.enc2(e1)
28
+ e3 = self.enc3(e2)
29
+ e4 = self.enc4(e3)
30
+ e5 = self.enc5(e4)
31
+
32
+ h = self.aspp(e5)
33
+
34
+ h = self.dec4(h, e4)
35
+ h = self.dec3(h, e3)
36
+ h = self.dec2(h, e2)
37
+ h = torch.cat([h, self.lstm_dec2(h)], dim=1)
38
+ h = self.dec1(h, e1)
39
+
40
+ return h
41
+
42
+
43
+ class CascadedNet(nn.Module):
44
+ def __init__(self, n_fft, nout=32, nout_lstm=128):
45
+ super(CascadedNet, self).__init__()
46
+ self.max_bin = n_fft // 2
47
+ self.output_bin = n_fft // 2 + 1
48
+ self.nin_lstm = self.max_bin // 2
49
+ self.offset = 64
50
+
51
+ self.stg1_low_band_net = nn.Sequential(
52
+ BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
53
+ layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
54
+ )
55
+ self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2)
56
+
57
+ self.stg2_low_band_net = nn.Sequential(
58
+ BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
59
+ layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
60
+ )
61
+ self.stg2_high_band_net = BaseNet(
62
+ nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2
63
+ )
64
+
65
+ self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm)
66
+
67
+ self.out = nn.Conv2d(nout, 2, 1, bias=False)
68
+ self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
69
+
70
+ def forward(self, x):
71
+ x = x[:, :, : self.max_bin]
72
+
73
+ bandw = x.size()[2] // 2
74
+ l1_in = x[:, :, :bandw]
75
+ h1_in = x[:, :, bandw:]
76
+ l1 = self.stg1_low_band_net(l1_in)
77
+ h1 = self.stg1_high_band_net(h1_in)
78
+ aux1 = torch.cat([l1, h1], dim=2)
79
+
80
+ l2_in = torch.cat([l1_in, l1], dim=1)
81
+ h2_in = torch.cat([h1_in, h1], dim=1)
82
+ l2 = self.stg2_low_band_net(l2_in)
83
+ h2 = self.stg2_high_band_net(h2_in)
84
+ aux2 = torch.cat([l2, h2], dim=2)
85
+
86
+ f3_in = torch.cat([x, aux1, aux2], dim=1)
87
+ f3 = self.stg3_full_band_net(f3_in)
88
+
89
+ mask = torch.sigmoid(self.out(f3))
90
+ mask = F.pad(
91
+ input=mask,
92
+ pad=(0, 0, 0, self.output_bin - mask.size()[2]),
93
+ mode="replicate",
94
+ )
95
+
96
+ if self.training:
97
+ aux = torch.cat([aux1, aux2], dim=1)
98
+ aux = torch.sigmoid(self.aux_out(aux))
99
+ aux = F.pad(
100
+ input=aux,
101
+ pad=(0, 0, 0, self.output_bin - aux.size()[2]),
102
+ mode="replicate",
103
+ )
104
+ return mask, aux
105
+ else:
106
+ return mask
107
+
108
+ def predict_mask(self, x):
109
+ mask = self.forward(x)
110
+
111
+ if self.offset > 0:
112
+ mask = mask[:, :, :, self.offset : -self.offset]
113
+ assert mask.size()[3] > 0
114
+
115
+ return mask
116
+
117
+ def predict(self, x):
118
+ mask = self.forward(x)
119
+ pred_mag = x * mask
120
+
121
+ if self.offset > 0:
122
+ pred_mag = pred_mag[:, :, :, self.offset : -self.offset]
123
+ assert pred_mag.size()[3] > 0
124
+
125
+ return pred_mag
app/service/vocal_remover/runner.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import librosa
4
+ import numpy as np
5
+ import soundfile as sf
6
+ import torch
7
+ from stqdm import stqdm
8
+ import streamlit as st
9
+ from pydub import AudioSegment
10
+
11
+ from app.service.vocal_remover import nets
12
+
13
+
14
+ if os.environ.get("LIMIT_CPU", False):
15
+ torch.set_num_threads(1)
16
+
17
+
18
+ def merge_artifacts(y_mask, thres=0.05, min_range=64, fade_size=32):
19
+ if min_range < fade_size * 2:
20
+ raise ValueError("min_range must be >= fade_size * 2")
21
+
22
+ idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
23
+ start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
24
+ end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
25
+ artifact_idx = np.where(end_idx - start_idx > min_range)[0]
26
+ weight = np.zeros_like(y_mask)
27
+ if len(artifact_idx) > 0:
28
+ start_idx = start_idx[artifact_idx]
29
+ end_idx = end_idx[artifact_idx]
30
+ old_e = None
31
+ for s, e in zip(start_idx, end_idx):
32
+ if old_e is not None and s - old_e < fade_size:
33
+ s = old_e - fade_size * 2
34
+
35
+ if s != 0:
36
+ weight[:, :, s : s + fade_size] = np.linspace(0, 1, fade_size)
37
+ else:
38
+ s -= fade_size
39
+
40
+ if e != y_mask.shape[2]:
41
+ weight[:, :, e - fade_size : e] = np.linspace(1, 0, fade_size)
42
+ else:
43
+ e += fade_size
44
+
45
+ weight[:, :, s + fade_size : e - fade_size] = 1
46
+ old_e = e
47
+
48
+ v_mask = 1 - y_mask
49
+ y_mask += weight * v_mask
50
+
51
+ return y_mask
52
+
53
+
54
+ def make_padding(width, cropsize, offset):
55
+ left = offset
56
+ roi_size = cropsize - offset * 2
57
+ if roi_size == 0:
58
+ roi_size = cropsize
59
+ right = roi_size - (width % roi_size) + left
60
+
61
+ return left, right, roi_size
62
+
63
+
64
+ def wave_to_spectrogram(wave, hop_length, n_fft):
65
+ wave_left = np.asfortranarray(wave[0])
66
+ wave_right = np.asfortranarray(wave[1])
67
+
68
+ spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
69
+ spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
70
+ spec = np.asfortranarray([spec_left, spec_right])
71
+
72
+ return spec
73
+
74
+
75
+ def spectrogram_to_wave(spec, hop_length=1024):
76
+ if spec.ndim == 2:
77
+ wave = librosa.istft(spec, hop_length=hop_length)
78
+ elif spec.ndim == 3:
79
+ spec_left = np.asfortranarray(spec[0])
80
+ spec_right = np.asfortranarray(spec[1])
81
+
82
+ wave_left = librosa.istft(spec_left, hop_length=hop_length)
83
+ wave_right = librosa.istft(spec_right, hop_length=hop_length)
84
+ wave = np.asfortranarray([wave_left, wave_right])
85
+
86
+ return wave
87
+
88
+
89
+ class Separator(object):
90
+ def __init__(self, model, device, batchsize, cropsize, postprocess=False, progress_bar=None):
91
+ self.model = model
92
+ self.offset = model.offset
93
+ self.device = device
94
+ self.batchsize = batchsize
95
+ self.cropsize = cropsize
96
+ self.postprocess = postprocess
97
+ self.progress_bar = progress_bar
98
+
99
+ def _separate(self, X_mag_pad, roi_size):
100
+ X_dataset = []
101
+ patches = (X_mag_pad.shape[2] - 2 * self.offset) // roi_size
102
+ for i in range(patches):
103
+ start = i * roi_size
104
+ X_mag_crop = X_mag_pad[:, :, start : start + self.cropsize]
105
+ X_dataset.append(X_mag_crop)
106
+
107
+ X_dataset = np.asarray(X_dataset)
108
+
109
+ self.model.eval()
110
+ with torch.no_grad():
111
+ mask = []
112
+ # To reduce the overhead, dataloader is not used.
113
+ for i in stqdm(
114
+ range(0, patches, self.batchsize),
115
+ st_container=self.progress_bar,
116
+ gui=False,
117
+ ):
118
+ X_batch = X_dataset[i : i + self.batchsize]
119
+ X_batch = torch.from_numpy(X_batch).to(self.device)
120
+
121
+ pred = self.model.predict_mask(X_batch)
122
+
123
+ pred = pred.detach().cpu().numpy()
124
+ pred = np.concatenate(pred, axis=2)
125
+ mask.append(pred)
126
+
127
+ mask = np.concatenate(mask, axis=2)
128
+
129
+ return mask
130
+
131
+ def _preprocess(self, X_spec):
132
+ X_mag = np.abs(X_spec)
133
+ X_phase = np.angle(X_spec)
134
+
135
+ return X_mag, X_phase
136
+
137
+ def _postprocess(self, mask, X_mag, X_phase):
138
+ if self.postprocess:
139
+ mask = merge_artifacts(mask)
140
+
141
+ y_spec = mask * X_mag * np.exp(1.0j * X_phase)
142
+ v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
143
+
144
+ return y_spec, v_spec
145
+
146
+ def separate(self, X_spec):
147
+ X_mag, X_phase = self._preprocess(X_spec)
148
+
149
+ n_frame = X_mag.shape[2]
150
+ pad_l, pad_r, roi_size = make_padding(n_frame, self.cropsize, self.offset)
151
+ X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
152
+ X_mag_pad /= X_mag_pad.max()
153
+
154
+ mask = self._separate(X_mag_pad, roi_size)
155
+ mask = mask[:, :, :n_frame]
156
+
157
+ y_spec, v_spec = self._postprocess(mask, X_mag, X_phase)
158
+
159
+ return y_spec, v_spec
160
+
161
+
162
+ @st.cache_resource(show_spinner=False)
163
+ def load_model(pretrained_model, n_fft=2048):
164
+ model = nets.CascadedNet(n_fft, 32, 128)
165
+ if torch.cuda.is_available():
166
+ device = torch.device("cuda:0")
167
+ model.to(device)
168
+ # elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
169
+ # device = torch.device("mps")
170
+ # model.to(device)
171
+ else:
172
+ device = torch.device("cpu")
173
+ model.load_state_dict(torch.load(pretrained_model, map_location=device))
174
+ return model, device
175
+
176
+
177
+ # @st.cache_data(show_spinner=False)
178
+ def separate(
179
+ input,
180
+ model,
181
+ device,
182
+ output_dir,
183
+ batchsize=4,
184
+ cropsize=256,
185
+ postprocess=False,
186
+ hop_length=1024,
187
+ n_fft=2048,
188
+ sr=44100,
189
+ progress_bar=None,
190
+ only_no_vocals=False,
191
+ ):
192
+ X, sr = librosa.load(input, sr=sr, mono=False, dtype=np.float32, res_type="kaiser_fast")
193
+ basename = os.path.splitext(os.path.basename(input))[0]
194
+
195
+ if X.ndim == 1:
196
+ # mono to stereo
197
+ X = np.asarray([X, X])
198
+
199
+ X_spec = wave_to_spectrogram(X, hop_length, n_fft)
200
+
201
+ with torch.no_grad():
202
+ sp = Separator(model, device, batchsize, cropsize, postprocess, progress_bar=progress_bar)
203
+ y_spec, v_spec = sp.separate(X_spec)
204
+
205
+ base_dir = f"{output_dir}/vocal_remover/{basename}"
206
+ os.makedirs(base_dir, exist_ok=True)
207
+
208
+ wave = spectrogram_to_wave(y_spec, hop_length=hop_length)
209
+ try:
210
+ sf.write(f"{base_dir}/no_vocals.mp3", wave.T, sr)
211
+ except Exception:
212
+ logging.error("Failed to write no_vocals.mp3, trying pydub...")
213
+ pydub_write(wave, f"{base_dir}/no_vocals.mp3", sr)
214
+ if only_no_vocals:
215
+ return
216
+ wave = spectrogram_to_wave(v_spec, hop_length=hop_length)
217
+ try:
218
+ sf.write(f"{base_dir}/vocals.mp3", wave.T, sr)
219
+ except Exception:
220
+ logging.error("Failed to write vocals.mp3, trying pydub...")
221
+ pydub_write(wave, f"{base_dir}/vocals.mp3", sr)
222
+
223
+
224
+ def pydub_write(wave, output_path, frame_rate, audio_format="mp3"):
225
+ # Ensure the wave data is in the right format for pydub (mono and 16-bit depth)
226
+ wave_16bit = (wave * 32767).astype(np.int16)
227
+
228
+ audio_segment = AudioSegment(
229
+ wave_16bit.tobytes(),
230
+ frame_rate=frame_rate,
231
+ sample_width=wave_16bit.dtype.itemsize,
232
+ channels=1,
233
+ )
234
+ audio_segment.export(output_path, format=audio_format)
app/service/youtube.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+ import yt_dlp
4
+ import string
5
+ import time
6
+ import re
7
+ import streamlit as st
8
+ from pytube import Search
9
+
10
+
11
+ def _sanitize_filename(filename):
12
+ safe_chars = "-_.() %s%s" % (
13
+ re.escape(string.ascii_letters),
14
+ re.escape(string.digits),
15
+ )
16
+ safe_filename = re.sub(f"[^{safe_chars}]", "_", filename)
17
+ return safe_filename.strip()
18
+
19
+
20
+ @st.cache_data(show_spinner=False)
21
+ def download_audio_from_youtube(url, output_path):
22
+ if not os.path.exists(output_path):
23
+ os.makedirs(output_path)
24
+
25
+ with yt_dlp.YoutubeDL() as ydl:
26
+ info_dict = ydl.extract_info(url, download=False)
27
+ if info_dict.get("duration") > 360:
28
+ st.error("Song is too long. Please use a song no longer than 6 minutes.")
29
+ return
30
+ video_title = info_dict.get("title", None)
31
+ video_title = _sanitize_filename(video_title)
32
+ ydl_opts = {
33
+ "format": "bestaudio/best",
34
+ "postprocessors": [
35
+ {
36
+ "key": "FFmpegExtractAudio",
37
+ "preferredcodec": "mp3",
38
+ "preferredquality": "192",
39
+ }
40
+ ],
41
+ "outtmpl": os.path.join(output_path, video_title),
42
+ #'quiet': True,
43
+ }
44
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
45
+ ydl.download([url])
46
+ return f"{video_title}.mp3"
47
+
48
+
49
+ @st.cache_data(show_spinner=False)
50
+ def query_youtube(query: str) -> Search:
51
+ return Search(query)
52
+
53
+
54
+ def search_youtube(query: str) -> List:
55
+ if len(query) > 3:
56
+ time.sleep(0.5)
57
+ search = query_youtube(query + " lyrics")
58
+ st.session_state.search_results = search.results
59
+ video_options = [video.title for video in st.session_state.search_results]
60
+ st.session_state.video_options = video_options
61
+ else:
62
+ video_options = []
63
+ return video_options
64
+
65
+
66
+ def get_youtube_url(title: str) -> str:
67
+ video = st.session_state.search_results[st.session_state.video_options.index(title)]
68
+ return video.embed_url
69
+
70
+
71
+ def check_if_is_youtube_url(url: str) -> bool:
72
+ return url.startswith("http")
app/style.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _font_title = "Monoton"
2
+ _font_subtitle = "Exo"
3
+
4
+ CSS = (
5
+ """
6
+ <!-- Add the font link from Google Fonts -->
7
+ <link href="https://fonts.googleapis.com/css2?family="""
8
+ + _font_title
9
+ + """&display=swap" rel="stylesheet">
10
+ <link href="https://fonts.googleapis.com/css2?family="""
11
+ + _font_subtitle
12
+ + """&display=swap" rel="stylesheet">
13
+
14
+ <style>
15
+ /* Remove the streamlit header */
16
+ header[data-testid="stHeader"] {
17
+ display: none;
18
+ }
19
+ /* Remove the sidebar menu */
20
+ div[data-testid="collapsedControl"]{
21
+ display: none;
22
+ }
23
+ /* Background */
24
+ .css-z5fcl4 {
25
+ padding: 0.5rem;
26
+ padding-top: 0rem;
27
+ }
28
+
29
+ /* Distances between the title and the image in mobile */
30
+ .css-1uifejx.e1tzin5v1 {
31
+ margin-bottom: 0px;
32
+ padding-bottom: 0px;
33
+ }
34
+ h1 {
35
+ padding-top: 0px;
36
+ }
37
+
38
+
39
+ /* Center the image within its container */
40
+ .css-1kyxreq {
41
+ justify-content: center;
42
+ }
43
+
44
+ /* Remove fixed width from the image container */
45
+ .css-1kyxreq.etr89bj2 {
46
+ width: 100% !important;
47
+ }
48
+
49
+ /* Center the title */
50
+ .css-k7vsyb {
51
+ text-align: center;
52
+ }
53
+
54
+ /* Hide the anchor button */
55
+ .css-zt5igj.e16nr0p33 a {
56
+ display: none;
57
+ }
58
+ /* Hide the full screen button */
59
+ .css-e370rw.e19lei0e1 {
60
+ display: none;
61
+ }
62
+ .css-6awftf.e19lei0e1 {
63
+ display: none;
64
+ }
65
+
66
+ /* Desktop */
67
+ @media (min-width: 640px) {
68
+ .stMarkdown {
69
+ max-width: 100%;
70
+ width: auto;
71
+ display: inline-block;
72
+ }
73
+ /* Dynamically add space between the image and the title */
74
+ .css-1kyxreq {
75
+ justify-content: right;
76
+ }
77
+ }
78
+
79
+ /* Add space after the image and the title */
80
+ .css-1a32fsj {
81
+ margin-right: 0px;
82
+ }
83
+
84
+ /* Apply the futuristic font to the text title*/
85
+ #moseca {
86
+ font-family: '"""
87
+ + _font_title
88
+ + """', sans-serif;
89
+ font-size: 3rem;
90
+ text-align: center;
91
+ /* Align the text to the center of the box */
92
+ align-items: center;
93
+ /* Set the line height to the same as the height of the box */
94
+ line-height: 3.5rem;
95
+ margin-bottom: -1rem;
96
+ }
97
+
98
+ /* subtitle */
99
+ .css-5rimss p, .css-nahz7x p {
100
+ font-family: """
101
+ + _font_subtitle
102
+ + """, sans-serif;
103
+ font-size: 0.8rem;
104
+ text-align: center;
105
+ }
106
+
107
+ /* Desktop */
108
+ @media (min-width: 640px) {
109
+ .css-zt5igj, .css-nahz7x p {
110
+ text-align: left;
111
+ }
112
+ .css-5rimss p {
113
+ text-align: left;
114
+ }
115
+ }
116
+
117
+ .st-af {
118
+ align-items: center;
119
+ padding-right: 2rem;
120
+ }
121
+
122
+ /* Remove the gap around the player */
123
+ .css-434r0z {
124
+ gap: 0rem;
125
+ }
126
+
127
+
128
+ </style>
129
+
130
+ """
131
+ )
img/bmc-button.png ADDED
img/image_stems.png ADDED
img/karaoke_fun.png ADDED
img/logo_moseca.png ADDED
img/state-of-art.png ADDED
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.black]
2
+ line-length = 100
3
+ target-version = ['py39', 'py310']
4
+ preview_string_processing = true
5
+
6
+ [tool.isort]
7
+ profile = 'black'
8
+ multi_line_output = 3
9
+
10
+ [tool.ruff]
11
+ line-length = 100
12
+ ignore = ['E501']
13
+
14
+
15
+ [tool.pytest.ini_options]
16
+ pythonpath = [
17
+ "app",
18
+ ]
19
+ testpaths = "tests"
requirements.in ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.22.0
2
+ demucs==4.0.0
3
+ plotly==5.13.0
4
+ pandas==1.5.3
5
+ pydub==0.25.1
6
+ pytube==12.1.3
7
+ streamlit-player==0.1.5
8
+ streamlit-searchbox==0.1.2
9
+ yt-dlp==2023.3.4
10
+ kaleido==0.2.1
11
+ matplotlib==3.7.1
12
+ librosa==0.10.0.post2
13
+ resampy==0.4.2
14
+ stqdm==0.0.5
15
+ streamlit_option_menu==0.3.6
16
+ htbuilder==0.6.1
requirements.txt ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.8
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=requirements.txt --resolver=backtracking requirements.in
6
+ #
7
+ altair==4.2.2
8
+ # via streamlit
9
+ antlr4-python3-runtime==4.9.3
10
+ # via omegaconf
11
+ appdirs==1.4.4
12
+ # via pooch
13
+ attrs==23.1.0
14
+ # via jsonschema
15
+ audioread==3.0.0
16
+ # via librosa
17
+ backports-zoneinfo==0.2.1
18
+ # via tzlocal
19
+ blinker==1.6.2
20
+ # via streamlit
21
+ brotli==1.0.9
22
+ # via yt-dlp
23
+ cachetools==5.3.1
24
+ # via streamlit
25
+ certifi==2023.5.7
26
+ # via
27
+ # requests
28
+ # yt-dlp
29
+ cffi==1.15.1
30
+ # via soundfile
31
+ charset-normalizer==3.1.0
32
+ # via requests
33
+ click==8.1.3
34
+ # via streamlit
35
+ cloudpickle==2.2.1
36
+ # via submitit
37
+ contourpy==1.1.0
38
+ # via matplotlib
39
+ cycler==0.11.0
40
+ # via matplotlib
41
+ cython==0.29.35
42
+ # via diffq
43
+ decorator==5.1.1
44
+ # via
45
+ # librosa
46
+ # validators
47
+ demucs==4.0.0
48
+ # via -r requirements.in
49
+ diffq==0.2.4
50
+ # via demucs
51
+ dora-search==0.1.12
52
+ # via demucs
53
+ einops==0.6.1
54
+ # via demucs
55
+ entrypoints==0.4
56
+ # via altair
57
+ filelock==3.12.2
58
+ # via torch
59
+ fonttools==4.40.0
60
+ # via matplotlib
61
+ gitdb==4.0.10
62
+ # via gitpython
63
+ gitpython==3.1.31
64
+ # via streamlit
65
+ htbuilder==0.6.1
66
+ # via -r requirements.in
67
+ idna==3.4
68
+ # via requests
69
+ importlib-metadata==6.7.0
70
+ # via
71
+ # numba
72
+ # streamlit
73
+ importlib-resources==5.12.0
74
+ # via
75
+ # jsonschema
76
+ # matplotlib
77
+ jinja2==3.1.2
78
+ # via
79
+ # altair
80
+ # pydeck
81
+ # torch
82
+ joblib==1.3.1
83
+ # via
84
+ # librosa
85
+ # scikit-learn
86
+ jsonschema==4.17.3
87
+ # via altair
88
+ julius==0.2.7
89
+ # via demucs
90
+ kaleido==0.2.1
91
+ # via -r requirements.in
92
+ kiwisolver==1.4.4
93
+ # via matplotlib
94
+ lameenc==1.5.0
95
+ # via demucs
96
+ lazy-loader==0.2
97
+ # via librosa
98
+ librosa==0.10.0.post2
99
+ # via -r requirements.in
100
+ llvmlite==0.40.1
101
+ # via numba
102
+ markdown-it-py==3.0.0
103
+ # via rich
104
+ markupsafe==2.1.3
105
+ # via jinja2
106
+ matplotlib==3.7.1
107
+ # via -r requirements.in
108
+ mdurl==0.1.2
109
+ # via markdown-it-py
110
+ more-itertools==9.1.0
111
+ # via htbuilder
112
+ mpmath==1.3.0
113
+ # via sympy
114
+ msgpack==1.0.5
115
+ # via librosa
116
+ mutagen==1.46.0
117
+ # via yt-dlp
118
+ networkx==3.1
119
+ # via torch
120
+ numba==0.57.1
121
+ # via
122
+ # librosa
123
+ # resampy
124
+ numpy==1.24.4
125
+ # via
126
+ # altair
127
+ # contourpy
128
+ # diffq
129
+ # librosa
130
+ # matplotlib
131
+ # numba
132
+ # openunmix
133
+ # pandas
134
+ # pyarrow
135
+ # pydeck
136
+ # resampy
137
+ # scikit-learn
138
+ # scipy
139
+ # soxr
140
+ # streamlit
141
+ omegaconf==2.3.0
142
+ # via dora-search
143
+ openunmix==1.2.1
144
+ # via demucs
145
+ packaging==23.1
146
+ # via
147
+ # matplotlib
148
+ # pooch
149
+ # streamlit
150
+ pandas==1.5.3
151
+ # via
152
+ # -r requirements.in
153
+ # altair
154
+ # streamlit
155
+ pillow==9.5.0
156
+ # via
157
+ # matplotlib
158
+ # streamlit
159
+ pkgutil-resolve-name==1.3.10
160
+ # via jsonschema
161
+ plotly==5.13.0
162
+ # via -r requirements.in
163
+ pooch==1.6.0
164
+ # via librosa
165
+ protobuf==3.20.3
166
+ # via streamlit
167
+ pyarrow==12.0.1
168
+ # via streamlit
169
+ pycparser==2.21
170
+ # via cffi
171
+ pycryptodomex==3.18.0
172
+ # via yt-dlp
173
+ pydeck==0.8.1b0
174
+ # via streamlit
175
+ pydub==0.25.1
176
+ # via -r requirements.in
177
+ pygments==2.15.1
178
+ # via rich
179
+ pympler==1.0.1
180
+ # via streamlit
181
+ pyparsing==3.1.0
182
+ # via matplotlib
183
+ pyrsistent==0.19.3
184
+ # via jsonschema
185
+ python-dateutil==2.8.2
186
+ # via
187
+ # matplotlib
188
+ # pandas
189
+ # streamlit
190
+ pytube==12.1.3
191
+ # via -r requirements.in
192
+ pytz==2023.3
193
+ # via pandas
194
+ pyyaml==6.0
195
+ # via
196
+ # demucs
197
+ # omegaconf
198
+ requests==2.31.0
199
+ # via
200
+ # pooch
201
+ # streamlit
202
+ resampy==0.4.2
203
+ # via -r requirements.in
204
+ retrying==1.3.4
205
+ # via dora-search
206
+ rich==13.4.2
207
+ # via streamlit
208
+ scikit-learn==1.3.0
209
+ # via librosa
210
+ scipy==1.10.1
211
+ # via
212
+ # librosa
213
+ # scikit-learn
214
+ six==1.16.0
215
+ # via
216
+ # python-dateutil
217
+ # retrying
218
+ smmap==5.0.0
219
+ # via gitdb
220
+ soundfile==0.12.1
221
+ # via librosa
222
+ soxr==0.3.5
223
+ # via librosa
224
+ stqdm==0.0.5
225
+ # via -r requirements.in
226
+ streamlit==1.22.0
227
+ # via
228
+ # -r requirements.in
229
+ # stqdm
230
+ # streamlit-option-menu
231
+ # streamlit-player
232
+ # streamlit-searchbox
233
+ streamlit-option-menu==0.3.6
234
+ # via -r requirements.in
235
+ streamlit-player==0.1.5
236
+ # via -r requirements.in
237
+ streamlit-searchbox==0.1.2
238
+ # via -r requirements.in
239
+ submitit==1.4.5
240
+ # via dora-search
241
+ sympy==1.12
242
+ # via torch
243
+ tenacity==8.2.2
244
+ # via
245
+ # plotly
246
+ # streamlit
247
+ threadpoolctl==3.1.0
248
+ # via scikit-learn
249
+ toml==0.10.2
250
+ # via streamlit
251
+ toolz==0.12.0
252
+ # via altair
253
+ torch==2.0.1
254
+ # via
255
+ # demucs
256
+ # diffq
257
+ # dora-search
258
+ # julius
259
+ # openunmix
260
+ # torchaudio
261
+ torchaudio==2.0.2
262
+ # via
263
+ # demucs
264
+ # openunmix
265
+ tornado==6.3.2
266
+ # via streamlit
267
+ tqdm==4.65.0
268
+ # via
269
+ # demucs
270
+ # openunmix
271
+ # stqdm
272
+ treetable==0.2.5
273
+ # via dora-search
274
+ typing-extensions==4.7.0
275
+ # via
276
+ # librosa
277
+ # rich
278
+ # streamlit
279
+ # submitit
280
+ # torch
281
+ tzlocal==5.0.1
282
+ # via streamlit
283
+ urllib3==2.0.3
284
+ # via requests
285
+ validators==0.20.0
286
+ # via streamlit
287
+ websockets==11.0.3
288
+ # via yt-dlp
289
+ yt-dlp==2023.3.4
290
+ # via -r requirements.in
291
+ zipp==3.15.0
292
+ # via
293
+ # importlib-metadata
294
+ # importlib-resources
scripts/inference.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import warnings
4
+ from app.service.vocal_remover.runner import load_model, separate
5
+
6
+ warnings.simplefilter("ignore", UserWarning)
7
+ warnings.simplefilter("ignore", FutureWarning)
8
+ warnings.filterwarnings("ignore", module="streamlit")
9
+
10
+
11
+ def main():
12
+ p = argparse.ArgumentParser()
13
+ p.add_argument("--gpu", "-g", type=int, default=-1)
14
+ p.add_argument("--pretrained_model", "-P", type=str, default="baseline.pth")
15
+ p.add_argument("--input", "-i", required=True)
16
+ p.add_argument("--output_dir", "-o", type=str, default="")
17
+ args = p.parse_args()
18
+
19
+ model, device = load_model(pretrained_model=args.pretrained_model)
20
+ separate(
21
+ input=args.input,
22
+ model=model,
23
+ device=device,
24
+ output_dir=args.output_dir,
25
+ only_no_vocals=True,
26
+ )
27
+
28
+
29
+ if __name__ == "__main__":
30
+ main()
scripts/prepare_samples.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Read JSON file into a variable
4
+ json=$(cat sample_songs.json)
5
+
6
+ # Iterate through keys and values
7
+ for name in $(echo "${json}" | jq -r 'keys[]'); do
8
+ url=$(echo "${json}" | jq -r --arg name "${name}" '.[$name]')
9
+ echo "Separating ${name} from ${url}"
10
+
11
+ # Download with pytube
12
+ yt-dlp ${url} -o "/tmp/${name}" --format "bestaudio/best"
13
+ mkdir -p "/tmp/vocal_remover"
14
+
15
+ # Run inference
16
+ python inference.py --input /tmp/${name} --output /tmp
17
+ echo "Done separating ${name}"
18
+ done
scripts/sample_songs.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dancing_queen": "https://www.youtube.com/watch?v=3qiMJt-JBb4",
3
+ "bohemian_rhapsody": "https://www.youtube.com/watch?v=yk3prd8GER4",
4
+ "i_want_it_that_way": "https://www.youtube.com/watch?v=qjlVAsvQLM8",
5
+ "let_it_be": "https://www.youtube.com/watch?v=FIV73iG_e5I",
6
+ "viva_la_vida": "https://www.youtube.com/watch?v=a1EYnngNHIA",
7
+ "zombie": "https://www.youtube.com/watch?v=8sM-rm4lFZg"
8
+ }