Spaces:
Sleeping
Sleeping
Commit
·
6654c1a
0
Parent(s):
Duplicate from fabiogra/moseca
Browse files- .gitattributes +35 -0
- .streamlit/config.toml +2 -0
- Dockerfile +34 -0
- README.md +218 -0
- app/__init__.py +0 -0
- app/_fastapi_server.py +20 -0
- app/footer.py +118 -0
- app/header.py +68 -0
- app/helpers.py +160 -0
- app/pages/About.py +154 -0
- app/pages/Karaoke.py +176 -0
- app/pages/Separate.py +203 -0
- app/service/__init__.py +0 -0
- app/service/demucs_runner.py +190 -0
- app/service/vocal_remover/__init__.py +0 -0
- app/service/vocal_remover/layers.py +126 -0
- app/service/vocal_remover/nets.py +125 -0
- app/service/vocal_remover/runner.py +234 -0
- app/service/youtube.py +72 -0
- app/style.py +131 -0
- img/bmc-button.png +0 -0
- img/image_stems.png +0 -0
- img/karaoke_fun.png +0 -0
- img/logo_moseca.png +0 -0
- img/state-of-art.png +0 -0
- pyproject.toml +19 -0
- requirements.in +16 -0
- requirements.txt +294 -0
- scripts/inference.py +30 -0
- scripts/prepare_samples.sh +18 -0
- scripts/sample_songs.json +8 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[server]
|
2 |
+
enableXsrfProtection = false
|
Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# syntax=docker/dockerfile:1
|
2 |
+
|
3 |
+
FROM python:3.8
|
4 |
+
|
5 |
+
|
6 |
+
RUN apt-get update && \
|
7 |
+
apt-get install -y ffmpeg jq curl && \
|
8 |
+
pip install --upgrade pip
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY requirements.txt .
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
COPY scripts/ .
|
16 |
+
COPY app ./app
|
17 |
+
copy img ./img
|
18 |
+
|
19 |
+
RUN wget --progress=bar:force:noscroll https://huggingface.co/fabiogra/baseline_vocal_remover/resolve/main/baseline.pth
|
20 |
+
|
21 |
+
RUN mkdir -p /tmp/ /tmp/vocal_remover /.cache /.config && \
|
22 |
+
chmod 777 /tmp /tmp/vocal_remover /.cache /.config
|
23 |
+
|
24 |
+
ENV PYTHONPATH "${PYTHONPATH}:/app"
|
25 |
+
|
26 |
+
RUN chmod +x prepare_samples.sh
|
27 |
+
|
28 |
+
EXPOSE 7860
|
29 |
+
|
30 |
+
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
31 |
+
|
32 |
+
RUN ["./prepare_samples.sh"]
|
33 |
+
|
34 |
+
ENTRYPOINT ["streamlit", "run", "app/header.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
README.md
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Moseca
|
3 |
+
emoji: 🎤🎸🥁🎹
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: purple
|
6 |
+
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
models:
|
9 |
+
- https://huggingface.co/fabiogra/baseline_vocal_remover
|
10 |
+
tags:
|
11 |
+
- audio
|
12 |
+
- music
|
13 |
+
- vocal-removal
|
14 |
+
- karaoke
|
15 |
+
- music-separation
|
16 |
+
- music-source-separation
|
17 |
+
pinned: true
|
18 |
+
duplicated_from: fabiogra/moseca
|
19 |
+
---
|
20 |
+
|
21 |
+
<p align="center">
|
22 |
+
<img src="img/logo_moseca.png" alt="logo" width="70" />
|
23 |
+
</p>
|
24 |
+
<h2 align="center">Moseca</h1>
|
25 |
+
<p align="center">Music Source Separation & Karaoke</p>
|
26 |
+
|
27 |
+
|
28 |
+
</a>
|
29 |
+
<a href="https://huggingface.co/spaces/fabiogra/moseca">
|
30 |
+
<img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
|
31 |
+
alt="Hugging Face Spaces"></a>
|
32 |
+
<a href="https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true">
|
33 |
+
<img src="https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white"
|
34 |
+
alt="Docker"></a><a href="https://www.buymeacoffee.com/fabiogra">
|
35 |
+
<img src="https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social"
|
36 |
+
alt="Buy me a coffee"></a>
|
37 |
+
|
38 |
+
---
|
39 |
+
|
40 |
+
- [Setup](#setup)
|
41 |
+
- [About](#about)
|
42 |
+
- [High-Quality Stem Separation](#high-quality-stem-separation)
|
43 |
+
- [Advanced AI Algorithms](#advanced-ai-algorithms)
|
44 |
+
- [Karaoke Fun](#karaoke-fun)
|
45 |
+
- [Easy Deployment](#easy-deployment)
|
46 |
+
- [Open-Source and Free](#open-source-and-free)
|
47 |
+
- [Support](#support)
|
48 |
+
- [FAQs](#faqs)
|
49 |
+
- [What is Moseca?](#what-is-moseca)
|
50 |
+
- [Are there any limitations?](#are-there-any-limitations)
|
51 |
+
- [How does Moseca work?](#how-does-moseca-work)
|
52 |
+
- [How do I use Moseca?](#how-do-i-use-moseca)
|
53 |
+
- [Where can I find the code for Moseca?](#where-can-i-find-the-code-for-moseca)
|
54 |
+
- [How can I get in touch with you?](#how-can-i-get-in-touch-with-you)
|
55 |
+
- [Disclaimer](#disclaimer)
|
56 |
+
|
57 |
+
|
58 |
+
---
|
59 |
+
|
60 |
+
|
61 |
+
## Setup
|
62 |
+
### Local environment
|
63 |
+
Create a new environment with Python 3.8 and install the requirements:
|
64 |
+
```bash
|
65 |
+
pip install -r requirements.txt
|
66 |
+
```
|
67 |
+
then run the app with:
|
68 |
+
```bash
|
69 |
+
streamlit run app/header.py
|
70 |
+
```
|
71 |
+
### Docker
|
72 |
+
You can also run the app with Docker:
|
73 |
+
```bash
|
74 |
+
docker build -t moseca .
|
75 |
+
docker run -it --rm -p 7860:7860 $(DOCKER_IMAGE_NAME)
|
76 |
+
```
|
77 |
+
or pull the image from Hugging Face Spaces:
|
78 |
+
```bash
|
79 |
+
docker run -it -p 7860:7860 --platform=linux/amd64 \
|
80 |
+
registry.hf.space/fabiogra-moseca:latest
|
81 |
+
```
|
82 |
+
|
83 |
+
You can set the following environment variables to limit the resources used by the app:
|
84 |
+
- ENV_LIMITATION=true
|
85 |
+
- LIMIT_CPU=true
|
86 |
+
---
|
87 |
+
## About
|
88 |
+
|
89 |
+
Welcome to Moseca, your personal web application designed to redefine your music experience.
|
90 |
+
Whether you're a musician looking to remix your favorite songs, a karaoke
|
91 |
+
enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
|
92 |
+
Moseca is for you.
|
93 |
+
|
94 |
+
<br>
|
95 |
+
|
96 |
+
### High-Quality Stem Separation
|
97 |
+
|
98 |
+
<img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="250" ></img>
|
99 |
+
|
100 |
+
|
101 |
+
<br>
|
102 |
+
|
103 |
+
Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
|
104 |
+
🎹piano (beta), and 🎶 others.
|
105 |
+
|
106 |
+
<br>
|
107 |
+
|
108 |
+
### Advanced AI Algorithms
|
109 |
+
|
110 |
+
<img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="250" ></img>
|
111 |
+
|
112 |
+
<br>
|
113 |
+
|
114 |
+
Moseca utilizes state-of-the-art AI technology to extract voice or music from
|
115 |
+
your original songs accurately.
|
116 |
+
|
117 |
+
<br>
|
118 |
+
|
119 |
+
### Karaoke Fun
|
120 |
+
|
121 |
+
<img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="250" ></img>
|
122 |
+
|
123 |
+
<br>
|
124 |
+
|
125 |
+
Engage with your favorite tunes in a whole new way!
|
126 |
+
|
127 |
+
Moseca offers an immersive online karaoke experience, allowing you to search
|
128 |
+
for any song on YouTube and remove the vocals online.
|
129 |
+
|
130 |
+
Enjoy singing along with high-quality instrumentals at the comfort of your home.
|
131 |
+
|
132 |
+
|
133 |
+
<br>
|
134 |
+
|
135 |
+
### Easy Deployment
|
136 |
+
|
137 |
+
|
138 |
+
With Moseca, you can deploy your personal Moseca app in the
|
139 |
+
<a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
|
140 |
+
<img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
|
141 |
+
alt="Hugging Face Spaces"></a> or locally with
|
142 |
+
[](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
|
143 |
+
in just one click.
|
144 |
+
|
145 |
+
<br>
|
146 |
+
|
147 |
+
### Open-Source and Free
|
148 |
+
|
149 |
+
Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
|
150 |
+
|
151 |
+
You can modify, distribute, and use it free of charge. I believe in the power of community
|
152 |
+
collaboration and encourage users to contribute to our source code, making Moseca better with
|
153 |
+
each update.
|
154 |
+
|
155 |
+
|
156 |
+
<br>
|
157 |
+
|
158 |
+
### Support
|
159 |
+
|
160 |
+
- Show your support by giving a star to the GitHub repository [](https://github.com/fabiogra/moseca).
|
161 |
+
- If you have found an issue or have a suggestion to improve Moseca, you can open an [](https://github.com/fabiogra/moseca/issues/new)
|
162 |
+
- Enjoy Moseca? [](https://www.buymeacoffee.com/fabiogra)
|
163 |
+
|
164 |
+
------
|
165 |
+
|
166 |
+
## FAQs
|
167 |
+
|
168 |
+
### What is Moseca?
|
169 |
+
|
170 |
+
Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
|
171 |
+
instrumentals from music tracks. It also provides an online karaoke experience by allowing you
|
172 |
+
to search for any song on YouTube and remove the vocals.
|
173 |
+
|
174 |
+
### Are there any limitations?
|
175 |
+
Yes, in this environment there are some limitations regarding lenght processing
|
176 |
+
and CPU usage to allow a smooth experience for all users.
|
177 |
+
|
178 |
+
<b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
|
179 |
+
environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
|
180 |
+
|
181 |
+
### How does Moseca work?
|
182 |
+
Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
|
183 |
+
|
184 |
+
### How do I use Moseca?
|
185 |
+
1. Upload your file: choose your song and upload it to Moseca. It supports
|
186 |
+
a wide range of music formats for your convenience.
|
187 |
+
|
188 |
+
2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
|
189 |
+
depending on your requirement.
|
190 |
+
|
191 |
+
3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
|
192 |
+
from music in a matter of minutes, giving you high-quality, separated audio tracks.
|
193 |
+
|
194 |
+
4. Download and enjoy: preview and download your separated audio tracks.
|
195 |
+
Now you can enjoy them anytime, anywhere!
|
196 |
+
|
197 |
+
|
198 |
+
### Where can I find the code for Moseca?
|
199 |
+
|
200 |
+
The code for Moseca is readily available on
|
201 |
+
[GitHub](https://github.com/fabiogra/moseca) and
|
202 |
+
[Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
|
203 |
+
|
204 |
+
|
205 |
+
### How can I get in touch with you?
|
206 |
+
|
207 |
+
For any questions or feedback, feel free to contact me on
|
208 |
+
[](https://twitter.com/grsFabio)
|
209 |
+
or [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
|
210 |
+
|
211 |
+
------
|
212 |
+
## Disclaimer
|
213 |
+
|
214 |
+
Moseca is designed to separate vocals and instruments from copyrighted music for
|
215 |
+
legally permissible purposes, such as learning, practicing, research, or other non-commercial
|
216 |
+
activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
|
217 |
+
responsible for ensuring that your use of separated audio tracks complies with the legal
|
218 |
+
requirements in your jurisdiction.
|
app/__init__.py
ADDED
File without changes
|
app/_fastapi_server.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.responses import FileResponse
|
3 |
+
from urllib.parse import unquote
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
|
10 |
+
@app.get("/streaming/{path:path}")
|
11 |
+
async def serve_streaming(path: str):
|
12 |
+
# Decode URL-encoded characters
|
13 |
+
decoded_path = unquote(path)
|
14 |
+
return FileResponse(decoded_path, filename=os.path.basename(decoded_path))
|
15 |
+
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
import uvicorn
|
19 |
+
|
20 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
app/footer.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from streamlit.components.v1 import html
|
4 |
+
from htbuilder import HtmlElement, div, a, p, img, styles
|
5 |
+
from htbuilder.units import percent, px
|
6 |
+
|
7 |
+
|
8 |
+
def image(src_as_string, **style):
|
9 |
+
return img(src=src_as_string, style=styles(**style))
|
10 |
+
|
11 |
+
|
12 |
+
def link(link, text, **style):
|
13 |
+
return a(_href=link, _target="_blank", style=styles(**style))(text)
|
14 |
+
|
15 |
+
|
16 |
+
def layout(*args):
|
17 |
+
style = """
|
18 |
+
<style>
|
19 |
+
footer {visibility: hidden;}
|
20 |
+
.stApp { bottom: 50px; }
|
21 |
+
</style>
|
22 |
+
"""
|
23 |
+
|
24 |
+
style_div = styles(
|
25 |
+
position="fixed",
|
26 |
+
left=0,
|
27 |
+
bottom=0,
|
28 |
+
margin=px(0, 0, 0, 0),
|
29 |
+
width=percent(100),
|
30 |
+
color="black",
|
31 |
+
text_align="center",
|
32 |
+
height="auto",
|
33 |
+
opacity=1,
|
34 |
+
align_items="center",
|
35 |
+
flex_direction="column",
|
36 |
+
display="flex",
|
37 |
+
)
|
38 |
+
body = p(
|
39 |
+
id="myFooter",
|
40 |
+
style=styles(
|
41 |
+
margin=px(0, 0, 0, 0),
|
42 |
+
padding=px(5),
|
43 |
+
font_size="0.8rem",
|
44 |
+
color="rgb(51,51,51)",
|
45 |
+
font_family="Exo",
|
46 |
+
),
|
47 |
+
)
|
48 |
+
foot = div(style=style_div)(body)
|
49 |
+
|
50 |
+
st.markdown(style, unsafe_allow_html=True)
|
51 |
+
|
52 |
+
for arg in args:
|
53 |
+
if isinstance(arg, str):
|
54 |
+
body(arg)
|
55 |
+
|
56 |
+
elif isinstance(arg, HtmlElement):
|
57 |
+
body(arg)
|
58 |
+
|
59 |
+
st.markdown(str(foot), unsafe_allow_html=True)
|
60 |
+
|
61 |
+
js_code = """
|
62 |
+
<script>
|
63 |
+
function rgbReverse(rgb){
|
64 |
+
var r = rgb[0]*0.299;
|
65 |
+
var g = rgb[1]*0.587;
|
66 |
+
var b = rgb[2]*0.114;
|
67 |
+
|
68 |
+
if ((r + g + b)/255 > 0.5){
|
69 |
+
return "rgb(49, 51, 63)"
|
70 |
+
}else{
|
71 |
+
return "rgb(250, 250, 250)"
|
72 |
+
}
|
73 |
+
|
74 |
+
};
|
75 |
+
var stApp_css = window.parent.document.querySelector("#root > div:nth-child(1) > div > div > div");
|
76 |
+
window.onload = function () {
|
77 |
+
var mutationObserver = new MutationObserver(function(mutations) {
|
78 |
+
mutations.forEach(function(mutation) {
|
79 |
+
var bgColor = window.getComputedStyle(stApp_css).backgroundColor.replace("rgb(", "").replace(")", "").split(", ");
|
80 |
+
var fontColor = rgbReverse(bgColor);
|
81 |
+
var pTag = window.parent.document.getElementById("myFooter");
|
82 |
+
pTag.style.color = fontColor;
|
83 |
+
});
|
84 |
+
});
|
85 |
+
|
86 |
+
/**Element**/
|
87 |
+
mutationObserver.observe(stApp_css, {
|
88 |
+
attributes: true,
|
89 |
+
characterData: true,
|
90 |
+
childList: true,
|
91 |
+
subtree: true,
|
92 |
+
attributeOldValue: true,
|
93 |
+
characterDataOldValue: true
|
94 |
+
});
|
95 |
+
}
|
96 |
+
|
97 |
+
|
98 |
+
</script>
|
99 |
+
"""
|
100 |
+
html(js_code)
|
101 |
+
|
102 |
+
|
103 |
+
def footer():
|
104 |
+
myargs = [
|
105 |
+
"Made in ",
|
106 |
+
link(
|
107 |
+
"https://streamlit.io/",
|
108 |
+
image("https://streamlit.io/images/brand/streamlit-mark-color.png", width="20px"),
|
109 |
+
),
|
110 |
+
" with ❤️ by ",
|
111 |
+
link("https://twitter.com/grsFabio", "@grsFabio"),
|
112 |
+
" ",
|
113 |
+
link(
|
114 |
+
"https://www.buymeacoffee.com/fabiogra",
|
115 |
+
image("https://i.imgur.com/YFu6MMA.png", margin="0em", align="top", width="130px"),
|
116 |
+
),
|
117 |
+
]
|
118 |
+
layout(*myargs)
|
app/header.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from helpers import switch_page
|
4 |
+
from style import CSS
|
5 |
+
import logging
|
6 |
+
|
7 |
+
from streamlit_option_menu import option_menu
|
8 |
+
|
9 |
+
logging.basicConfig(
|
10 |
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
11 |
+
level=logging.INFO,
|
12 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
13 |
+
)
|
14 |
+
|
15 |
+
|
16 |
+
def header(logo_and_title=True):
|
17 |
+
if "first_run" not in st.session_state:
|
18 |
+
st.session_state.first_run = True
|
19 |
+
for key in [
|
20 |
+
"search_results",
|
21 |
+
"selected_value",
|
22 |
+
"filename",
|
23 |
+
"executed",
|
24 |
+
"play_karaoke",
|
25 |
+
"url",
|
26 |
+
"random_song",
|
27 |
+
"last_dir",
|
28 |
+
]:
|
29 |
+
st.session_state[key] = None
|
30 |
+
st.session_state.video_options = []
|
31 |
+
st.session_state.page = "Karaoke"
|
32 |
+
switch_page(st.session_state.page)
|
33 |
+
|
34 |
+
st.set_page_config(
|
35 |
+
page_title="Moseca - Music Separation and Karaoke - Free and Open Source alternative to lalal.ai, splitter.ai or media.io vocal remover.",
|
36 |
+
page_icon="img/logo_moseca.png",
|
37 |
+
layout="wide",
|
38 |
+
initial_sidebar_state="collapsed",
|
39 |
+
)
|
40 |
+
st.markdown(CSS, unsafe_allow_html=True)
|
41 |
+
|
42 |
+
options = ["Karaoke", "Separate", "About"]
|
43 |
+
page = option_menu(
|
44 |
+
menu_title=None,
|
45 |
+
options=options,
|
46 |
+
# bootrap icons
|
47 |
+
icons=["play-btn-fill", "file-earmark-music", "info-circle"],
|
48 |
+
default_index=options.index(st.session_state.page),
|
49 |
+
orientation="horizontal",
|
50 |
+
styles={"nav-link": {"padding-left": "1.5rem", "padding-right": "1.5rem"}},
|
51 |
+
key="",
|
52 |
+
)
|
53 |
+
if page != st.session_state.page:
|
54 |
+
switch_page(page)
|
55 |
+
|
56 |
+
if logo_and_title:
|
57 |
+
head = st.columns([5, 1, 3, 5])
|
58 |
+
with head[1]:
|
59 |
+
st.image("img/logo_moseca.png", use_column_width=False, width=80)
|
60 |
+
with head[2]:
|
61 |
+
st.markdown(
|
62 |
+
"<h1>moseca</h1><p><b>Music Source Separation & Karaoke</b></p>",
|
63 |
+
unsafe_allow_html=True,
|
64 |
+
)
|
65 |
+
|
66 |
+
|
67 |
+
if __name__ == "__main__":
|
68 |
+
header()
|
app/helpers.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
from io import BytesIO
|
3 |
+
import json
|
4 |
+
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import numpy as np
|
7 |
+
import requests
|
8 |
+
import streamlit as st
|
9 |
+
from PIL import Image
|
10 |
+
from pydub import AudioSegment
|
11 |
+
from base64 import b64encode
|
12 |
+
from pathlib import Path
|
13 |
+
from streamlit.runtime.scriptrunner import RerunData, RerunException
|
14 |
+
from streamlit.source_util import get_pages
|
15 |
+
from streamlit_player import st_player
|
16 |
+
|
17 |
+
extensions = ["mp3", "wav", "ogg", "flac"] # we will look for all those file types.
|
18 |
+
example_songs = [1, 2, 3]
|
19 |
+
|
20 |
+
|
21 |
+
def img_to_bytes(img_path):
|
22 |
+
img_bytes = Path(img_path).read_bytes()
|
23 |
+
encoded = b64encode(img_bytes).decode()
|
24 |
+
return encoded
|
25 |
+
|
26 |
+
|
27 |
+
# @st.cache_data(show_spinner=False)
|
28 |
+
def img_to_html(img_path):
|
29 |
+
img_html = "<div style='display: flex; justify-content: center; align-items: center; height: 50vh;'><img src='data:image/png;base64,{}' class='img-fluid' style='max-width: 100%; max-height: 100%;' ></div>".format(
|
30 |
+
img_to_bytes(img_path)
|
31 |
+
)
|
32 |
+
return img_html
|
33 |
+
|
34 |
+
|
35 |
+
@st.cache_data(show_spinner=False)
|
36 |
+
def url_is_valid(url):
|
37 |
+
if url.startswith("http") is False:
|
38 |
+
st.error("URL should start with http or https.")
|
39 |
+
return False
|
40 |
+
elif url.split(".")[-1] not in extensions:
|
41 |
+
st.error("Extension not supported.")
|
42 |
+
return False
|
43 |
+
try:
|
44 |
+
r = requests.get(url)
|
45 |
+
r.raise_for_status()
|
46 |
+
return True
|
47 |
+
except Exception:
|
48 |
+
st.error("URL is not valid.")
|
49 |
+
return False
|
50 |
+
|
51 |
+
|
52 |
+
@st.cache_data(show_spinner=False)
|
53 |
+
def load_audio_segment(path: str, format: str) -> AudioSegment:
|
54 |
+
return AudioSegment.from_file(path, format=format)
|
55 |
+
|
56 |
+
|
57 |
+
@st.cache_data(show_spinner=False)
|
58 |
+
def plot_audio(_audio_segment: AudioSegment, *args, **kwargs) -> Image.Image:
|
59 |
+
samples = _audio_segment.get_array_of_samples()
|
60 |
+
arr = np.array(samples)
|
61 |
+
|
62 |
+
fig, ax = plt.subplots(figsize=(10, 2))
|
63 |
+
ax.plot(arr, linewidth=0.05)
|
64 |
+
ax.set_axis_off()
|
65 |
+
|
66 |
+
# Set the background color to transparent
|
67 |
+
fig.patch.set_alpha(0)
|
68 |
+
ax.patch.set_alpha(0)
|
69 |
+
|
70 |
+
buf = BytesIO()
|
71 |
+
plt.savefig(buf, format="png", dpi=100, bbox_inches="tight")
|
72 |
+
buf.seek(0)
|
73 |
+
image = Image.open(buf)
|
74 |
+
|
75 |
+
plt.close(fig)
|
76 |
+
return image
|
77 |
+
|
78 |
+
|
79 |
+
def get_random_song():
|
80 |
+
sample_songs = json.load(open("sample_songs.json"))
|
81 |
+
name, url = random.choice(list(sample_songs.items()))
|
82 |
+
return name, url
|
83 |
+
|
84 |
+
|
85 |
+
def streamlit_player(
|
86 |
+
player,
|
87 |
+
url,
|
88 |
+
height,
|
89 |
+
is_active,
|
90 |
+
muted,
|
91 |
+
start,
|
92 |
+
key,
|
93 |
+
playback_rate=1,
|
94 |
+
events=None,
|
95 |
+
play_inline=False,
|
96 |
+
light=False,
|
97 |
+
):
|
98 |
+
with player:
|
99 |
+
options = {
|
100 |
+
"progress_interval": 1000,
|
101 |
+
"playing": is_active, # st.checkbox("Playing", False),
|
102 |
+
"muted": muted,
|
103 |
+
"light": light,
|
104 |
+
"play_inline": play_inline,
|
105 |
+
"playback_rate": playback_rate,
|
106 |
+
"height": height,
|
107 |
+
"config": {"start": start},
|
108 |
+
"events": events,
|
109 |
+
}
|
110 |
+
if url != "":
|
111 |
+
events = st_player(url, **options, key=key)
|
112 |
+
return events
|
113 |
+
|
114 |
+
|
115 |
+
@st.cache_data(show_spinner=False)
|
116 |
+
def local_audio(path, mime="audio/mp3"):
|
117 |
+
data = b64encode(Path(path).read_bytes()).decode()
|
118 |
+
return [{"type": mime, "src": f"data:{mime};base64,{data}"}]
|
119 |
+
|
120 |
+
|
121 |
+
def _standardize_name(name: str) -> str:
|
122 |
+
return name.lower().replace("_", " ").strip()
|
123 |
+
|
124 |
+
|
125 |
+
@st.cache_data(show_spinner=False)
|
126 |
+
def switch_page(page_name: str):
|
127 |
+
st.session_state.page = page_name
|
128 |
+
|
129 |
+
page_name = _standardize_name(page_name)
|
130 |
+
|
131 |
+
pages = get_pages("header.py") # OR whatever your main page is called
|
132 |
+
|
133 |
+
for page_hash, config in pages.items():
|
134 |
+
if _standardize_name(config["page_name"]) == page_name:
|
135 |
+
raise RerunException(
|
136 |
+
RerunData(
|
137 |
+
page_script_hash=page_hash,
|
138 |
+
page_name=page_name,
|
139 |
+
)
|
140 |
+
)
|
141 |
+
|
142 |
+
page_names = [_standardize_name(config["page_name"]) for config in pages.values()]
|
143 |
+
raise ValueError(f"Could not find page {page_name}. Must be one of {page_names}")
|
144 |
+
|
145 |
+
|
146 |
+
def st_local_audio(pathname, key):
|
147 |
+
st_player(
|
148 |
+
local_audio(pathname),
|
149 |
+
**{
|
150 |
+
"progress_interval": 1000,
|
151 |
+
"playing": False,
|
152 |
+
"muted": False,
|
153 |
+
"light": False,
|
154 |
+
"play_inline": True,
|
155 |
+
"playback_rate": 1,
|
156 |
+
"height": 40,
|
157 |
+
"config": {"start": 0, "forceAudio": True, "forceHLS": True, "forceSafariHLS": True},
|
158 |
+
},
|
159 |
+
key=key,
|
160 |
+
)
|
app/pages/About.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from header import header
|
4 |
+
from footer import footer
|
5 |
+
|
6 |
+
|
7 |
+
def body():
|
8 |
+
with st.columns([2, 3, 2])[1]:
|
9 |
+
st.markdown(
|
10 |
+
"""
|
11 |
+
<center>
|
12 |
+
|
13 |
+
## Welcome to Moseca, your personal web application designed to redefine your music experience.
|
14 |
+
<font size="3"> Whether you're a musician looking to remix your favorite songs, a karaoke
|
15 |
+
enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
|
16 |
+
Moseca is for you. </font>
|
17 |
+
|
18 |
+
<br>
|
19 |
+
|
20 |
+
### High-Quality Stem Separation
|
21 |
+
|
22 |
+
<center><img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="60%" ></img></center>
|
23 |
+
|
24 |
+
|
25 |
+
<br>
|
26 |
+
|
27 |
+
<font size="3"> Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
|
28 |
+
🎹piano (beta), and 🎶 others. </font>
|
29 |
+
|
30 |
+
<br>
|
31 |
+
|
32 |
+
### Advanced AI Algorithms
|
33 |
+
|
34 |
+
<center><img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="60%" ></img></center>
|
35 |
+
|
36 |
+
<br>
|
37 |
+
|
38 |
+
<font size="3"> Moseca utilizes state-of-the-art AI technology to extract voice or music from
|
39 |
+
your original songs accurately. </font>
|
40 |
+
|
41 |
+
<br>
|
42 |
+
|
43 |
+
### Karaoke Fun
|
44 |
+
|
45 |
+
<center><img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="60%" ></img></center>
|
46 |
+
|
47 |
+
<br>
|
48 |
+
|
49 |
+
<font size="3"> Engage with your favorite tunes in a whole new way! </font>
|
50 |
+
|
51 |
+
<font size="3"> Moseca offers an immersive online karaoke experience, allowing you to search
|
52 |
+
for any song on YouTube and remove the vocals online. </font>
|
53 |
+
|
54 |
+
<font size="3"> Enjoy singing along with high-quality instrumentals at the comfort of your home.
|
55 |
+
</font>
|
56 |
+
|
57 |
+
<br>
|
58 |
+
|
59 |
+
### Easy Deployment
|
60 |
+
|
61 |
+
|
62 |
+
<font size="3"> With Moseca, you can deploy your personal Moseca app in the
|
63 |
+
<a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
|
64 |
+
<img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
|
65 |
+
alt="Hugging Face Spaces"></a> or locally with </font>
|
66 |
+
[](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
|
67 |
+
<font size="3"> in just one click. </font>
|
68 |
+
|
69 |
+
<br>
|
70 |
+
|
71 |
+
### Open-Source and Free
|
72 |
+
|
73 |
+
<font size="3"> Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
|
74 |
+
|
75 |
+
You can modify, distribute, and use it free of charge. I believe in the power of community
|
76 |
+
collaboration and encourage users to contribute to our source code, making Moseca better with
|
77 |
+
each update.
|
78 |
+
</font>
|
79 |
+
|
80 |
+
<br>
|
81 |
+
|
82 |
+
### Support
|
83 |
+
|
84 |
+
- <font size="3"> Show your support by giving a star to the GitHub repository</font> [](https://github.com/fabiogra/moseca).
|
85 |
+
- <font size="3"> If you have found an issue or have a suggestion to improve Moseca, you can open an</font> [](https://github.com/fabiogra/moseca/issues/new)
|
86 |
+
- <font size="3"> Enjoy Moseca?</font> [](https://www.buymeacoffee.com/fabiogra)
|
87 |
+
|
88 |
+
------
|
89 |
+
|
90 |
+
## FAQs
|
91 |
+
|
92 |
+
### What is Moseca?
|
93 |
+
|
94 |
+
<font size="3"> Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
|
95 |
+
instrumentals from music tracks. It also provides an online karaoke experience by allowing you
|
96 |
+
to search for any song on YouTube and remove the vocals.</font>
|
97 |
+
|
98 |
+
### Are there any limitations?
|
99 |
+
<font size="3">Yes, in this environment there are some limitations regarding lenght processing
|
100 |
+
and CPU usage to allow a smooth experience for all users.
|
101 |
+
|
102 |
+
<b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
|
103 |
+
environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
|
104 |
+
</font>
|
105 |
+
### How does Moseca work?
|
106 |
+
<font size="3"> Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
|
107 |
+
</font>
|
108 |
+
### How do I use Moseca?
|
109 |
+
<font size="3">1. Upload your file: choose your song and upload it to Moseca. It supports
|
110 |
+
a wide range of music formats for your convenience.</font>
|
111 |
+
|
112 |
+
<font size="3">2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
|
113 |
+
depending on your requirement.</font>
|
114 |
+
|
115 |
+
<font size="3">3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
|
116 |
+
from music in a matter of minutes, giving you high-quality, separated audio tracks.</font>
|
117 |
+
|
118 |
+
<font size="3">4. Download and enjoy: preview and download your separated audio tracks.
|
119 |
+
Now you can enjoy them anytime, anywhere! </font>
|
120 |
+
</font>
|
121 |
+
|
122 |
+
### Where can I find the code for Moseca?
|
123 |
+
|
124 |
+
<font size="3">The code for Moseca is readily available on
|
125 |
+
[GitHub](https://github.com/fabiogra/moseca) and
|
126 |
+
[Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
|
127 |
+
</font>
|
128 |
+
|
129 |
+
### How can I get in touch with you?
|
130 |
+
|
131 |
+
<font size="3">For any questions or feedback, feel free to contact me on </font>
|
132 |
+
[](https://twitter.com/grsFabio)
|
133 |
+
<font size="3">or</font> [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
|
134 |
+
|
135 |
+
------
|
136 |
+
## Disclaimer
|
137 |
+
|
138 |
+
<font size="3">Moseca is designed to separate vocals and instruments from copyrighted music for
|
139 |
+
legally permissible purposes, such as learning, practicing, research, or other non-commercial
|
140 |
+
activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
|
141 |
+
responsible for ensuring that your use of separated audio tracks complies with the legal
|
142 |
+
requirements in your jurisdiction.
|
143 |
+
</font>
|
144 |
+
|
145 |
+
</center>
|
146 |
+
""",
|
147 |
+
unsafe_allow_html=True,
|
148 |
+
)
|
149 |
+
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
header(logo_and_title=False)
|
153 |
+
body()
|
154 |
+
footer()
|
app/pages/Karaoke.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from streamlit_player import st_player
|
5 |
+
from streamlit_searchbox import st_searchbox
|
6 |
+
|
7 |
+
from service.youtube import (
|
8 |
+
get_youtube_url,
|
9 |
+
search_youtube,
|
10 |
+
download_audio_from_youtube,
|
11 |
+
)
|
12 |
+
from helpers import (
|
13 |
+
get_random_song,
|
14 |
+
load_audio_segment,
|
15 |
+
streamlit_player,
|
16 |
+
local_audio,
|
17 |
+
)
|
18 |
+
|
19 |
+
from service.vocal_remover.runner import separate, load_model
|
20 |
+
from footer import footer
|
21 |
+
from header import header
|
22 |
+
|
23 |
+
|
24 |
+
out_path = Path("/tmp")
|
25 |
+
in_path = Path("/tmp")
|
26 |
+
|
27 |
+
sess = st.session_state
|
28 |
+
|
29 |
+
|
30 |
+
def show_karaoke(pathname, initial_player):
|
31 |
+
cols = st.columns([1, 1, 3, 1])
|
32 |
+
with cols[1]:
|
33 |
+
sess.delay = st.slider(
|
34 |
+
label="Start delay in karaoke (seconds)",
|
35 |
+
key="delay_slider",
|
36 |
+
value=2,
|
37 |
+
min_value=0,
|
38 |
+
max_value=5,
|
39 |
+
help="Synchronize youtube player with karaoke audio by adding a delay to the youtube player.",
|
40 |
+
)
|
41 |
+
with cols[2]:
|
42 |
+
events = st_player(
|
43 |
+
local_audio(pathname),
|
44 |
+
**{
|
45 |
+
"progress_interval": 1000,
|
46 |
+
"playing": False,
|
47 |
+
"muted": False,
|
48 |
+
"light": False,
|
49 |
+
"play_inline": True,
|
50 |
+
"playback_rate": 1,
|
51 |
+
"height": 40,
|
52 |
+
"config": {
|
53 |
+
"start": 0,
|
54 |
+
"forceAudio": True,
|
55 |
+
},
|
56 |
+
"events": ["onProgress", "onPlay"],
|
57 |
+
},
|
58 |
+
key="karaoke_player",
|
59 |
+
)
|
60 |
+
st.markdown(
|
61 |
+
"<center>⬆️ Click on the play button to start karaoke<center>",
|
62 |
+
unsafe_allow_html=True,
|
63 |
+
)
|
64 |
+
with st.columns([1, 4, 1])[1]:
|
65 |
+
if events.name == "onProgress" and events.data["playedSeconds"] > 0:
|
66 |
+
initial_player.empty()
|
67 |
+
st_player(
|
68 |
+
sess.url + f"&t={sess.delay}s",
|
69 |
+
**{
|
70 |
+
"progress_interval": 1000,
|
71 |
+
"playing": True,
|
72 |
+
"muted": True,
|
73 |
+
"light": False,
|
74 |
+
"play_inline": False,
|
75 |
+
"playback_rate": 1,
|
76 |
+
"height": 250,
|
77 |
+
"events": None,
|
78 |
+
},
|
79 |
+
key="yt_muted_player",
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
+
def body():
|
84 |
+
st.markdown("<center>Search for a song on YouTube<center>", unsafe_allow_html=True)
|
85 |
+
yt_cols = st.columns([1, 3, 2, 1])
|
86 |
+
with yt_cols[1]:
|
87 |
+
selected_value = st_searchbox(
|
88 |
+
search_youtube,
|
89 |
+
label=None,
|
90 |
+
placeholder="Search by name...",
|
91 |
+
clear_on_submit=True,
|
92 |
+
key="yt_searchbox",
|
93 |
+
)
|
94 |
+
if selected_value is not None and selected_value in sess.video_options:
|
95 |
+
sess.random_song = None
|
96 |
+
|
97 |
+
if selected_value != sess.selected_value: # New song selected
|
98 |
+
sess.executed = False
|
99 |
+
|
100 |
+
sess.selected_value = selected_value
|
101 |
+
sess.url = get_youtube_url(selected_value)
|
102 |
+
|
103 |
+
with yt_cols[2]:
|
104 |
+
if st.button("🎲 Random song", use_container_width=True):
|
105 |
+
sess.last_dir, sess.url = get_random_song()
|
106 |
+
sess.random_song = True
|
107 |
+
sess.video_options = []
|
108 |
+
sess.executed = False
|
109 |
+
|
110 |
+
if sess.url is not None:
|
111 |
+
player_cols = st.columns([2, 2, 1, 1], gap="medium")
|
112 |
+
with player_cols[1]:
|
113 |
+
player = st.empty()
|
114 |
+
streamlit_player(
|
115 |
+
player,
|
116 |
+
sess.url,
|
117 |
+
height=200,
|
118 |
+
is_active=False,
|
119 |
+
muted=False,
|
120 |
+
start=0,
|
121 |
+
key="yt_player",
|
122 |
+
events=["onProgress"],
|
123 |
+
)
|
124 |
+
|
125 |
+
# Separate vocals
|
126 |
+
cols_before_sep = st.columns([2, 4, 2])
|
127 |
+
with cols_before_sep[1]:
|
128 |
+
execute_button = st.empty()
|
129 |
+
execute = execute_button.button(
|
130 |
+
"Confirm and remove vocals 🎤 🎶",
|
131 |
+
type="primary",
|
132 |
+
use_container_width=True,
|
133 |
+
)
|
134 |
+
if execute or sess.executed:
|
135 |
+
execute_button.empty()
|
136 |
+
player.empty()
|
137 |
+
if execute:
|
138 |
+
sess.executed = False
|
139 |
+
if sess.random_song is None:
|
140 |
+
if not sess.executed:
|
141 |
+
cols_spinners = st.columns([1, 2, 1])
|
142 |
+
with cols_spinners[1]:
|
143 |
+
with st.spinner(
|
144 |
+
"Separating vocals from music, it will take a while..."
|
145 |
+
):
|
146 |
+
sess.filename = download_audio_from_youtube(sess.url, in_path)
|
147 |
+
if sess.filename is None:
|
148 |
+
st.stop()
|
149 |
+
sess.url = None
|
150 |
+
filename = sess.filename
|
151 |
+
song = load_audio_segment(
|
152 |
+
in_path / filename, filename.split(".")[-1]
|
153 |
+
)
|
154 |
+
song.export(in_path / filename, format=filename.split(".")[-1])
|
155 |
+
model, device = load_model(pretrained_model="baseline.pth")
|
156 |
+
separate(
|
157 |
+
input=in_path / filename,
|
158 |
+
model=model,
|
159 |
+
device=device,
|
160 |
+
output_dir=out_path,
|
161 |
+
only_no_vocals=True,
|
162 |
+
)
|
163 |
+
selected_value = None
|
164 |
+
sess.last_dir = ".".join(sess.filename.split(".")[:-1])
|
165 |
+
sess.executed = True
|
166 |
+
else:
|
167 |
+
sess.executed = True
|
168 |
+
|
169 |
+
if sess.executed:
|
170 |
+
show_karaoke(out_path / "vocal_remover" / sess.last_dir / "no_vocals.mp3", player)
|
171 |
+
|
172 |
+
|
173 |
+
if __name__ == "__main__":
|
174 |
+
header()
|
175 |
+
body()
|
176 |
+
footer()
|
app/pages/Separate.py
ADDED
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
import streamlit as st
|
5 |
+
from streamlit_option_menu import option_menu
|
6 |
+
|
7 |
+
from service.demucs_runner import separator
|
8 |
+
from helpers import (
|
9 |
+
load_audio_segment,
|
10 |
+
plot_audio,
|
11 |
+
st_local_audio,
|
12 |
+
url_is_valid,
|
13 |
+
)
|
14 |
+
|
15 |
+
from service.vocal_remover.runner import separate, load_model
|
16 |
+
|
17 |
+
from footer import footer
|
18 |
+
from header import header
|
19 |
+
|
20 |
+
label_sources = {
|
21 |
+
"no_vocals.mp3": "🎶 Instrumental",
|
22 |
+
"vocals.mp3": "🎤 Vocals",
|
23 |
+
"drums.mp3": "🥁 Drums",
|
24 |
+
"bass.mp3": "🎸 Bass",
|
25 |
+
"guitar.mp3": "🎸 Guitar",
|
26 |
+
"piano.mp3": "🎹 Piano",
|
27 |
+
"other.mp3": "🎶 Other",
|
28 |
+
}
|
29 |
+
|
30 |
+
extensions = ["mp3", "wav", "ogg", "flac"]
|
31 |
+
|
32 |
+
|
33 |
+
out_path = Path("/tmp")
|
34 |
+
in_path = Path("/tmp")
|
35 |
+
|
36 |
+
|
37 |
+
def reset_execution():
|
38 |
+
st.session_state.executed = False
|
39 |
+
|
40 |
+
|
41 |
+
def body():
|
42 |
+
filename = None
|
43 |
+
cols = st.columns([1, 3, 2, 1])
|
44 |
+
with cols[1]:
|
45 |
+
with st.columns([1, 5, 1])[1]:
|
46 |
+
option = option_menu(
|
47 |
+
menu_title=None,
|
48 |
+
options=["Upload File", "From URL"],
|
49 |
+
icons=["cloud-upload-fill", "link-45deg"],
|
50 |
+
orientation="horizontal",
|
51 |
+
styles={"container": {"width": "100%", "margin": "0px", "padding": "0px"}},
|
52 |
+
key="option_separate",
|
53 |
+
)
|
54 |
+
if option == "Upload File":
|
55 |
+
uploaded_file = st.file_uploader(
|
56 |
+
"Choose a file",
|
57 |
+
type=extensions,
|
58 |
+
key="file",
|
59 |
+
help="Supported formats: mp3, wav, ogg, flac.",
|
60 |
+
)
|
61 |
+
if uploaded_file is not None:
|
62 |
+
with open(in_path / uploaded_file.name, "wb") as f:
|
63 |
+
f.write(uploaded_file.getbuffer())
|
64 |
+
filename = uploaded_file.name
|
65 |
+
st_local_audio(in_path / filename, key="input_upload_file")
|
66 |
+
|
67 |
+
elif option == "From URL": # TODO: show examples
|
68 |
+
url = st.text_input(
|
69 |
+
"Paste the URL of the audio file",
|
70 |
+
key="url_input",
|
71 |
+
help="Supported formats: mp3, wav, ogg, flac.",
|
72 |
+
)
|
73 |
+
if url != "":
|
74 |
+
if url_is_valid(url):
|
75 |
+
with st.spinner("Downloading audio..."):
|
76 |
+
filename = url.split("/")[-1]
|
77 |
+
os.system(f"wget -O {in_path / filename} {url}")
|
78 |
+
st_local_audio(in_path / filename, key="input_from_url")
|
79 |
+
with cols[2]:
|
80 |
+
separation_mode = st.selectbox(
|
81 |
+
"Choose the separation mode",
|
82 |
+
[
|
83 |
+
"Vocals & Instrumental (Faster)",
|
84 |
+
"Vocals & Instrumental (High Quality, Slower)",
|
85 |
+
"Vocals, Drums, Bass & Other (Slower)",
|
86 |
+
"Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)",
|
87 |
+
],
|
88 |
+
on_change=reset_execution(),
|
89 |
+
key="separation_mode",
|
90 |
+
)
|
91 |
+
if separation_mode == "Vocals & Instrumental (Faster)":
|
92 |
+
max_duration = 30
|
93 |
+
else:
|
94 |
+
max_duration = 15
|
95 |
+
|
96 |
+
if filename is not None:
|
97 |
+
song = load_audio_segment(in_path / filename, filename.split(".")[-1])
|
98 |
+
n_secs = round(len(song) / 1000)
|
99 |
+
if os.environ.get("ENV_LIMITATION", False):
|
100 |
+
with cols[2]:
|
101 |
+
start_time = st.number_input(
|
102 |
+
"Choose the start time",
|
103 |
+
min_value=0,
|
104 |
+
max_value=n_secs,
|
105 |
+
step=1,
|
106 |
+
value=0,
|
107 |
+
help=f"Maximum duration is {max_duration} seconds for this separation mode. Duplicate this space to remove any limit.",
|
108 |
+
format="%d",
|
109 |
+
)
|
110 |
+
st.session_state.start_time = start_time
|
111 |
+
end_time = min(start_time + max_duration, n_secs)
|
112 |
+
song = song[start_time * 1000 : end_time * 1000]
|
113 |
+
st.info(
|
114 |
+
f"Audio source will be processed from {start_time} to {end_time} seconds. Duplicate this space to remove any limit.",
|
115 |
+
icon="⏱",
|
116 |
+
)
|
117 |
+
else:
|
118 |
+
start_time = 0
|
119 |
+
end_time = n_secs
|
120 |
+
with st.columns([1, 3, 1])[1]:
|
121 |
+
execute = st.button("Split Music 🎶", type="primary", use_container_width=True)
|
122 |
+
if execute or st.session_state.executed:
|
123 |
+
if execute:
|
124 |
+
st.session_state.executed = False
|
125 |
+
|
126 |
+
if not st.session_state.executed:
|
127 |
+
song.export(in_path / filename, format=filename.split(".")[-1])
|
128 |
+
with st.spinner("Separating source audio, it will take a while..."):
|
129 |
+
if separation_mode == "Vocals & Instrumental (Faster)":
|
130 |
+
model_name = "vocal_remover"
|
131 |
+
model, device = load_model(pretrained_model="baseline.pth")
|
132 |
+
separate(
|
133 |
+
input=in_path / filename,
|
134 |
+
model=model,
|
135 |
+
device=device,
|
136 |
+
output_dir=out_path,
|
137 |
+
)
|
138 |
+
else:
|
139 |
+
stem = None
|
140 |
+
model_name = "htdemucs"
|
141 |
+
if (
|
142 |
+
separation_mode
|
143 |
+
== "Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)"
|
144 |
+
):
|
145 |
+
model_name = "htdemucs_6s"
|
146 |
+
elif separation_mode == "Vocals & Instrumental (High Quality, Slower)":
|
147 |
+
stem = "vocals"
|
148 |
+
|
149 |
+
separator(
|
150 |
+
tracks=[in_path / filename],
|
151 |
+
out=out_path,
|
152 |
+
model=model_name,
|
153 |
+
shifts=1,
|
154 |
+
overlap=0.5,
|
155 |
+
stem=stem,
|
156 |
+
int24=False,
|
157 |
+
float32=False,
|
158 |
+
clip_mode="rescale",
|
159 |
+
mp3=True,
|
160 |
+
mp3_bitrate=320,
|
161 |
+
verbose=True,
|
162 |
+
start_time=start_time,
|
163 |
+
end_time=end_time,
|
164 |
+
)
|
165 |
+
last_dir = ".".join(filename.split(".")[:-1])
|
166 |
+
filename = None
|
167 |
+
st.session_state.executed = True
|
168 |
+
|
169 |
+
def get_sources(path):
|
170 |
+
sources = {}
|
171 |
+
for file in [
|
172 |
+
"no_vocals.mp3",
|
173 |
+
"vocals.mp3",
|
174 |
+
"drums.mp3",
|
175 |
+
"bass.mp3",
|
176 |
+
"guitar.mp3",
|
177 |
+
"piano.mp3",
|
178 |
+
"other.mp3",
|
179 |
+
]:
|
180 |
+
fullpath = path / file
|
181 |
+
if fullpath.exists():
|
182 |
+
sources[file] = fullpath
|
183 |
+
return sources
|
184 |
+
|
185 |
+
sources = get_sources(out_path / Path(model_name) / last_dir)
|
186 |
+
tab_sources = st.tabs([f"**{label_sources.get(k)}**" for k in sources.keys()])
|
187 |
+
for i, (file, pathname) in enumerate(sources.items()):
|
188 |
+
with tab_sources[i]:
|
189 |
+
cols = st.columns(2)
|
190 |
+
with cols[0]:
|
191 |
+
auseg = load_audio_segment(pathname, "mp3")
|
192 |
+
st.image(
|
193 |
+
plot_audio(auseg, title="", file=file),
|
194 |
+
use_column_width="always",
|
195 |
+
)
|
196 |
+
with cols[1]:
|
197 |
+
st_local_audio(pathname, key=f"output_{file}")
|
198 |
+
|
199 |
+
|
200 |
+
if __name__ == "__main__":
|
201 |
+
header()
|
202 |
+
body()
|
203 |
+
footer()
|
app/service/__init__.py
ADDED
File without changes
|
app/service/demucs_runner.py
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import sys
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import List
|
5 |
+
import os
|
6 |
+
from dora.log import fatal
|
7 |
+
import torch as th
|
8 |
+
|
9 |
+
from demucs.apply import apply_model, BagOfModels
|
10 |
+
from demucs.audio import save_audio
|
11 |
+
from demucs.pretrained import get_model_from_args, ModelLoadingError
|
12 |
+
from demucs.separate import load_track
|
13 |
+
|
14 |
+
import streamlit as st
|
15 |
+
|
16 |
+
|
17 |
+
@st.cache_data(show_spinner=False)
|
18 |
+
def separator(
|
19 |
+
tracks: List[Path],
|
20 |
+
out: Path,
|
21 |
+
model: str,
|
22 |
+
shifts: int,
|
23 |
+
overlap: float,
|
24 |
+
stem: str,
|
25 |
+
int24: bool,
|
26 |
+
float32: bool,
|
27 |
+
clip_mode: str,
|
28 |
+
mp3: bool,
|
29 |
+
mp3_bitrate: int,
|
30 |
+
verbose: bool,
|
31 |
+
*args,
|
32 |
+
**kwargs,
|
33 |
+
):
|
34 |
+
"""Separate the sources for the given tracks
|
35 |
+
|
36 |
+
Args:
|
37 |
+
tracks (Path): Path to tracks
|
38 |
+
out (Path): Folder where to put extracted tracks. A subfolder with the model name will be
|
39 |
+
created.
|
40 |
+
model (str): Model name
|
41 |
+
shifts (int): Number of random shifts for equivariant stabilization.
|
42 |
+
Increase separation time but improves quality for Demucs.
|
43 |
+
10 was used in the original paper.
|
44 |
+
overlap (float): Overlap
|
45 |
+
stem (str): Only separate audio into {STEM} and no_{STEM}.
|
46 |
+
int24 (bool): Save wav output as 24 bits wav.
|
47 |
+
float32 (bool): Save wav output as float32 (2x bigger).
|
48 |
+
clip_mode (str): Strategy for avoiding clipping: rescaling entire signal if necessary
|
49 |
+
(rescale) or hard clipping (clamp).
|
50 |
+
mp3 (bool): Convert the output wavs to mp3.
|
51 |
+
mp3_bitrate (int): Bitrate of converted mp3.
|
52 |
+
verbose (bool): Verbose
|
53 |
+
"""
|
54 |
+
|
55 |
+
if os.environ.get("LIMIT_CPU", False):
|
56 |
+
th.set_num_threads(1)
|
57 |
+
jobs = 1
|
58 |
+
else:
|
59 |
+
# Number of jobs. This can increase memory usage but will be much faster when
|
60 |
+
# multiple cores are available.
|
61 |
+
jobs = os.cpu_count()
|
62 |
+
|
63 |
+
if th.cuda.is_available():
|
64 |
+
device = "cuda"
|
65 |
+
else:
|
66 |
+
device = "cpu"
|
67 |
+
args = argparse.Namespace()
|
68 |
+
args.tracks = tracks
|
69 |
+
args.out = out
|
70 |
+
args.model = model
|
71 |
+
args.device = device
|
72 |
+
args.shifts = shifts
|
73 |
+
args.overlap = overlap
|
74 |
+
args.stem = stem
|
75 |
+
args.int24 = int24
|
76 |
+
args.float32 = float32
|
77 |
+
args.clip_mode = clip_mode
|
78 |
+
args.mp3 = mp3
|
79 |
+
args.mp3_bitrate = mp3_bitrate
|
80 |
+
args.jobs = jobs
|
81 |
+
args.verbose = verbose
|
82 |
+
args.filename = "{track}/{stem}.{ext}"
|
83 |
+
args.split = True
|
84 |
+
args.segment = None
|
85 |
+
args.name = model
|
86 |
+
args.repo = None
|
87 |
+
|
88 |
+
try:
|
89 |
+
model = get_model_from_args(args)
|
90 |
+
except ModelLoadingError as error:
|
91 |
+
fatal(error.args[0])
|
92 |
+
|
93 |
+
if args.segment is not None and args.segment < 8:
|
94 |
+
fatal("Segment must greater than 8. ")
|
95 |
+
|
96 |
+
if ".." in args.filename.replace("\\", "/").split("/"):
|
97 |
+
fatal('".." must not appear in filename. ')
|
98 |
+
|
99 |
+
if isinstance(model, BagOfModels):
|
100 |
+
print(
|
101 |
+
f"Selected model is a bag of {len(model.models)} models. "
|
102 |
+
"You will see that many progress bars per track."
|
103 |
+
)
|
104 |
+
if args.segment is not None:
|
105 |
+
for sub in model.models:
|
106 |
+
sub.segment = args.segment
|
107 |
+
else:
|
108 |
+
if args.segment is not None:
|
109 |
+
model.segment = args.segment
|
110 |
+
|
111 |
+
model.cpu()
|
112 |
+
model.eval()
|
113 |
+
|
114 |
+
if args.stem is not None and args.stem not in model.sources:
|
115 |
+
fatal(
|
116 |
+
'error: stem "{stem}" is not in selected model. STEM must be one of {sources}.'.format(
|
117 |
+
stem=args.stem, sources=", ".join(model.sources)
|
118 |
+
)
|
119 |
+
)
|
120 |
+
out = args.out / args.name
|
121 |
+
out.mkdir(parents=True, exist_ok=True)
|
122 |
+
print(f"Separated tracks will be stored in {out.resolve()}")
|
123 |
+
for track in args.tracks:
|
124 |
+
if not track.exists():
|
125 |
+
print(
|
126 |
+
f"File {track} does not exist. If the path contains spaces, "
|
127 |
+
'please try again after surrounding the entire path with quotes "".',
|
128 |
+
file=sys.stderr,
|
129 |
+
)
|
130 |
+
continue
|
131 |
+
print(f"Separating track {track}")
|
132 |
+
wav = load_track(track, model.audio_channels, model.samplerate)
|
133 |
+
|
134 |
+
ref = wav.mean(0)
|
135 |
+
wav = (wav - ref.mean()) / ref.std()
|
136 |
+
sources = apply_model(
|
137 |
+
model,
|
138 |
+
wav[None],
|
139 |
+
device=args.device,
|
140 |
+
shifts=args.shifts,
|
141 |
+
split=args.split,
|
142 |
+
overlap=args.overlap,
|
143 |
+
progress=True,
|
144 |
+
num_workers=args.jobs,
|
145 |
+
)[0]
|
146 |
+
sources = sources * ref.std() + ref.mean()
|
147 |
+
|
148 |
+
if args.mp3:
|
149 |
+
ext = "mp3"
|
150 |
+
else:
|
151 |
+
ext = "wav"
|
152 |
+
kwargs = {
|
153 |
+
"samplerate": model.samplerate,
|
154 |
+
"bitrate": args.mp3_bitrate,
|
155 |
+
"clip": args.clip_mode,
|
156 |
+
"as_float": args.float32,
|
157 |
+
"bits_per_sample": 24 if args.int24 else 16,
|
158 |
+
}
|
159 |
+
if args.stem is None:
|
160 |
+
for source, name in zip(sources, model.sources):
|
161 |
+
stem = out / args.filename.format(
|
162 |
+
track=track.name.rsplit(".", 1)[0],
|
163 |
+
trackext=track.name.rsplit(".", 1)[-1],
|
164 |
+
stem=name,
|
165 |
+
ext=ext,
|
166 |
+
)
|
167 |
+
stem.parent.mkdir(parents=True, exist_ok=True)
|
168 |
+
save_audio(source, str(stem), **kwargs)
|
169 |
+
else:
|
170 |
+
sources = list(sources)
|
171 |
+
stem = out / args.filename.format(
|
172 |
+
track=track.name.rsplit(".", 1)[0],
|
173 |
+
trackext=track.name.rsplit(".", 1)[-1],
|
174 |
+
stem=args.stem,
|
175 |
+
ext=ext,
|
176 |
+
)
|
177 |
+
stem.parent.mkdir(parents=True, exist_ok=True)
|
178 |
+
save_audio(sources.pop(model.sources.index(args.stem)), str(stem), **kwargs)
|
179 |
+
# Warning : after poping the stem, selected stem is no longer in the list 'sources'
|
180 |
+
other_stem = th.zeros_like(sources[0])
|
181 |
+
for i in sources:
|
182 |
+
other_stem += i
|
183 |
+
stem = out / args.filename.format(
|
184 |
+
track=track.name.rsplit(".", 1)[0],
|
185 |
+
trackext=track.name.rsplit(".", 1)[-1],
|
186 |
+
stem="no_" + args.stem,
|
187 |
+
ext=ext,
|
188 |
+
)
|
189 |
+
stem.parent.mkdir(parents=True, exist_ok=True)
|
190 |
+
save_audio(other_stem, str(stem), **kwargs)
|
app/service/vocal_remover/__init__.py
ADDED
File without changes
|
app/service/vocal_remover/layers.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
def crop_center(h1, h2):
|
7 |
+
h1_shape = h1.size()
|
8 |
+
h2_shape = h2.size()
|
9 |
+
|
10 |
+
if h1_shape[3] == h2_shape[3]:
|
11 |
+
return h1
|
12 |
+
elif h1_shape[3] < h2_shape[3]:
|
13 |
+
raise ValueError("h1_shape[3] must be greater than h2_shape[3]")
|
14 |
+
|
15 |
+
s_time = (h1_shape[3] - h2_shape[3]) // 2
|
16 |
+
e_time = s_time + h2_shape[3]
|
17 |
+
h1 = h1[:, :, :, s_time:e_time]
|
18 |
+
|
19 |
+
return h1
|
20 |
+
|
21 |
+
|
22 |
+
class Conv2DBNActiv(nn.Module):
|
23 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
|
24 |
+
super(Conv2DBNActiv, self).__init__()
|
25 |
+
self.conv = nn.Sequential(
|
26 |
+
nn.Conv2d(
|
27 |
+
nin,
|
28 |
+
nout,
|
29 |
+
kernel_size=ksize,
|
30 |
+
stride=stride,
|
31 |
+
padding=pad,
|
32 |
+
dilation=dilation,
|
33 |
+
bias=False,
|
34 |
+
),
|
35 |
+
nn.BatchNorm2d(nout),
|
36 |
+
activ(),
|
37 |
+
)
|
38 |
+
|
39 |
+
def __call__(self, x):
|
40 |
+
return self.conv(x)
|
41 |
+
|
42 |
+
|
43 |
+
class Encoder(nn.Module):
|
44 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
|
45 |
+
super(Encoder, self).__init__()
|
46 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ)
|
47 |
+
self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
|
48 |
+
|
49 |
+
def __call__(self, x):
|
50 |
+
h = self.conv1(x)
|
51 |
+
h = self.conv2(h)
|
52 |
+
|
53 |
+
return h
|
54 |
+
|
55 |
+
|
56 |
+
class Decoder(nn.Module):
|
57 |
+
def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
|
58 |
+
super(Decoder, self).__init__()
|
59 |
+
self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
|
60 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
61 |
+
|
62 |
+
def __call__(self, x, skip=None):
|
63 |
+
x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
|
64 |
+
|
65 |
+
if skip is not None:
|
66 |
+
skip = crop_center(skip, x)
|
67 |
+
x = torch.cat([x, skip], dim=1)
|
68 |
+
|
69 |
+
h = self.conv1(x)
|
70 |
+
# h = self.conv2(h)
|
71 |
+
|
72 |
+
if self.dropout is not None:
|
73 |
+
h = self.dropout(h)
|
74 |
+
|
75 |
+
return h
|
76 |
+
|
77 |
+
|
78 |
+
class ASPPModule(nn.Module):
|
79 |
+
def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False):
|
80 |
+
super(ASPPModule, self).__init__()
|
81 |
+
self.conv1 = nn.Sequential(
|
82 |
+
nn.AdaptiveAvgPool2d((1, None)),
|
83 |
+
Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ),
|
84 |
+
)
|
85 |
+
self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
|
86 |
+
self.conv3 = Conv2DBNActiv(nin, nout, 3, 1, dilations[0], dilations[0], activ=activ)
|
87 |
+
self.conv4 = Conv2DBNActiv(nin, nout, 3, 1, dilations[1], dilations[1], activ=activ)
|
88 |
+
self.conv5 = Conv2DBNActiv(nin, nout, 3, 1, dilations[2], dilations[2], activ=activ)
|
89 |
+
self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
|
90 |
+
self.dropout = nn.Dropout2d(0.1) if dropout else None
|
91 |
+
|
92 |
+
def forward(self, x):
|
93 |
+
_, _, h, w = x.size()
|
94 |
+
feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
|
95 |
+
feat2 = self.conv2(x)
|
96 |
+
feat3 = self.conv3(x)
|
97 |
+
feat4 = self.conv4(x)
|
98 |
+
feat5 = self.conv5(x)
|
99 |
+
out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
|
100 |
+
out = self.bottleneck(out)
|
101 |
+
|
102 |
+
if self.dropout is not None:
|
103 |
+
out = self.dropout(out)
|
104 |
+
|
105 |
+
return out
|
106 |
+
|
107 |
+
|
108 |
+
class LSTMModule(nn.Module):
|
109 |
+
def __init__(self, nin_conv, nin_lstm, nout_lstm):
|
110 |
+
super(LSTMModule, self).__init__()
|
111 |
+
self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
|
112 |
+
self.lstm = nn.LSTM(input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True)
|
113 |
+
self.dense = nn.Sequential(
|
114 |
+
nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU()
|
115 |
+
)
|
116 |
+
|
117 |
+
def forward(self, x):
|
118 |
+
N, _, nbins, nframes = x.size()
|
119 |
+
h = self.conv(x)[:, 0] # N, nbins, nframes
|
120 |
+
h = h.permute(2, 0, 1) # nframes, N, nbins
|
121 |
+
h, _ = self.lstm(h)
|
122 |
+
h = self.dense(h.reshape(-1, h.size()[-1])) # nframes * N, nbins
|
123 |
+
h = h.reshape(nframes, N, 1, nbins)
|
124 |
+
h = h.permute(1, 2, 3, 0)
|
125 |
+
|
126 |
+
return h
|
app/service/vocal_remover/nets.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from app.service.vocal_remover import layers
|
6 |
+
|
7 |
+
|
8 |
+
class BaseNet(nn.Module):
|
9 |
+
def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))):
|
10 |
+
super(BaseNet, self).__init__()
|
11 |
+
self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1)
|
12 |
+
self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1)
|
13 |
+
self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1)
|
14 |
+
self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1)
|
15 |
+
self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1)
|
16 |
+
|
17 |
+
self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
|
18 |
+
|
19 |
+
self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
|
20 |
+
self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
|
21 |
+
self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
|
22 |
+
self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm)
|
23 |
+
self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
|
24 |
+
|
25 |
+
def __call__(self, x):
|
26 |
+
e1 = self.enc1(x)
|
27 |
+
e2 = self.enc2(e1)
|
28 |
+
e3 = self.enc3(e2)
|
29 |
+
e4 = self.enc4(e3)
|
30 |
+
e5 = self.enc5(e4)
|
31 |
+
|
32 |
+
h = self.aspp(e5)
|
33 |
+
|
34 |
+
h = self.dec4(h, e4)
|
35 |
+
h = self.dec3(h, e3)
|
36 |
+
h = self.dec2(h, e2)
|
37 |
+
h = torch.cat([h, self.lstm_dec2(h)], dim=1)
|
38 |
+
h = self.dec1(h, e1)
|
39 |
+
|
40 |
+
return h
|
41 |
+
|
42 |
+
|
43 |
+
class CascadedNet(nn.Module):
|
44 |
+
def __init__(self, n_fft, nout=32, nout_lstm=128):
|
45 |
+
super(CascadedNet, self).__init__()
|
46 |
+
self.max_bin = n_fft // 2
|
47 |
+
self.output_bin = n_fft // 2 + 1
|
48 |
+
self.nin_lstm = self.max_bin // 2
|
49 |
+
self.offset = 64
|
50 |
+
|
51 |
+
self.stg1_low_band_net = nn.Sequential(
|
52 |
+
BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
|
53 |
+
layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
|
54 |
+
)
|
55 |
+
self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2)
|
56 |
+
|
57 |
+
self.stg2_low_band_net = nn.Sequential(
|
58 |
+
BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
|
59 |
+
layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
|
60 |
+
)
|
61 |
+
self.stg2_high_band_net = BaseNet(
|
62 |
+
nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2
|
63 |
+
)
|
64 |
+
|
65 |
+
self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm)
|
66 |
+
|
67 |
+
self.out = nn.Conv2d(nout, 2, 1, bias=False)
|
68 |
+
self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
|
69 |
+
|
70 |
+
def forward(self, x):
|
71 |
+
x = x[:, :, : self.max_bin]
|
72 |
+
|
73 |
+
bandw = x.size()[2] // 2
|
74 |
+
l1_in = x[:, :, :bandw]
|
75 |
+
h1_in = x[:, :, bandw:]
|
76 |
+
l1 = self.stg1_low_band_net(l1_in)
|
77 |
+
h1 = self.stg1_high_band_net(h1_in)
|
78 |
+
aux1 = torch.cat([l1, h1], dim=2)
|
79 |
+
|
80 |
+
l2_in = torch.cat([l1_in, l1], dim=1)
|
81 |
+
h2_in = torch.cat([h1_in, h1], dim=1)
|
82 |
+
l2 = self.stg2_low_band_net(l2_in)
|
83 |
+
h2 = self.stg2_high_band_net(h2_in)
|
84 |
+
aux2 = torch.cat([l2, h2], dim=2)
|
85 |
+
|
86 |
+
f3_in = torch.cat([x, aux1, aux2], dim=1)
|
87 |
+
f3 = self.stg3_full_band_net(f3_in)
|
88 |
+
|
89 |
+
mask = torch.sigmoid(self.out(f3))
|
90 |
+
mask = F.pad(
|
91 |
+
input=mask,
|
92 |
+
pad=(0, 0, 0, self.output_bin - mask.size()[2]),
|
93 |
+
mode="replicate",
|
94 |
+
)
|
95 |
+
|
96 |
+
if self.training:
|
97 |
+
aux = torch.cat([aux1, aux2], dim=1)
|
98 |
+
aux = torch.sigmoid(self.aux_out(aux))
|
99 |
+
aux = F.pad(
|
100 |
+
input=aux,
|
101 |
+
pad=(0, 0, 0, self.output_bin - aux.size()[2]),
|
102 |
+
mode="replicate",
|
103 |
+
)
|
104 |
+
return mask, aux
|
105 |
+
else:
|
106 |
+
return mask
|
107 |
+
|
108 |
+
def predict_mask(self, x):
|
109 |
+
mask = self.forward(x)
|
110 |
+
|
111 |
+
if self.offset > 0:
|
112 |
+
mask = mask[:, :, :, self.offset : -self.offset]
|
113 |
+
assert mask.size()[3] > 0
|
114 |
+
|
115 |
+
return mask
|
116 |
+
|
117 |
+
def predict(self, x):
|
118 |
+
mask = self.forward(x)
|
119 |
+
pred_mag = x * mask
|
120 |
+
|
121 |
+
if self.offset > 0:
|
122 |
+
pred_mag = pred_mag[:, :, :, self.offset : -self.offset]
|
123 |
+
assert pred_mag.size()[3] > 0
|
124 |
+
|
125 |
+
return pred_mag
|
app/service/vocal_remover/runner.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
import soundfile as sf
|
6 |
+
import torch
|
7 |
+
from stqdm import stqdm
|
8 |
+
import streamlit as st
|
9 |
+
from pydub import AudioSegment
|
10 |
+
|
11 |
+
from app.service.vocal_remover import nets
|
12 |
+
|
13 |
+
|
14 |
+
if os.environ.get("LIMIT_CPU", False):
|
15 |
+
torch.set_num_threads(1)
|
16 |
+
|
17 |
+
|
18 |
+
def merge_artifacts(y_mask, thres=0.05, min_range=64, fade_size=32):
|
19 |
+
if min_range < fade_size * 2:
|
20 |
+
raise ValueError("min_range must be >= fade_size * 2")
|
21 |
+
|
22 |
+
idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
|
23 |
+
start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
|
24 |
+
end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
|
25 |
+
artifact_idx = np.where(end_idx - start_idx > min_range)[0]
|
26 |
+
weight = np.zeros_like(y_mask)
|
27 |
+
if len(artifact_idx) > 0:
|
28 |
+
start_idx = start_idx[artifact_idx]
|
29 |
+
end_idx = end_idx[artifact_idx]
|
30 |
+
old_e = None
|
31 |
+
for s, e in zip(start_idx, end_idx):
|
32 |
+
if old_e is not None and s - old_e < fade_size:
|
33 |
+
s = old_e - fade_size * 2
|
34 |
+
|
35 |
+
if s != 0:
|
36 |
+
weight[:, :, s : s + fade_size] = np.linspace(0, 1, fade_size)
|
37 |
+
else:
|
38 |
+
s -= fade_size
|
39 |
+
|
40 |
+
if e != y_mask.shape[2]:
|
41 |
+
weight[:, :, e - fade_size : e] = np.linspace(1, 0, fade_size)
|
42 |
+
else:
|
43 |
+
e += fade_size
|
44 |
+
|
45 |
+
weight[:, :, s + fade_size : e - fade_size] = 1
|
46 |
+
old_e = e
|
47 |
+
|
48 |
+
v_mask = 1 - y_mask
|
49 |
+
y_mask += weight * v_mask
|
50 |
+
|
51 |
+
return y_mask
|
52 |
+
|
53 |
+
|
54 |
+
def make_padding(width, cropsize, offset):
|
55 |
+
left = offset
|
56 |
+
roi_size = cropsize - offset * 2
|
57 |
+
if roi_size == 0:
|
58 |
+
roi_size = cropsize
|
59 |
+
right = roi_size - (width % roi_size) + left
|
60 |
+
|
61 |
+
return left, right, roi_size
|
62 |
+
|
63 |
+
|
64 |
+
def wave_to_spectrogram(wave, hop_length, n_fft):
|
65 |
+
wave_left = np.asfortranarray(wave[0])
|
66 |
+
wave_right = np.asfortranarray(wave[1])
|
67 |
+
|
68 |
+
spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
|
69 |
+
spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
|
70 |
+
spec = np.asfortranarray([spec_left, spec_right])
|
71 |
+
|
72 |
+
return spec
|
73 |
+
|
74 |
+
|
75 |
+
def spectrogram_to_wave(spec, hop_length=1024):
|
76 |
+
if spec.ndim == 2:
|
77 |
+
wave = librosa.istft(spec, hop_length=hop_length)
|
78 |
+
elif spec.ndim == 3:
|
79 |
+
spec_left = np.asfortranarray(spec[0])
|
80 |
+
spec_right = np.asfortranarray(spec[1])
|
81 |
+
|
82 |
+
wave_left = librosa.istft(spec_left, hop_length=hop_length)
|
83 |
+
wave_right = librosa.istft(spec_right, hop_length=hop_length)
|
84 |
+
wave = np.asfortranarray([wave_left, wave_right])
|
85 |
+
|
86 |
+
return wave
|
87 |
+
|
88 |
+
|
89 |
+
class Separator(object):
|
90 |
+
def __init__(self, model, device, batchsize, cropsize, postprocess=False, progress_bar=None):
|
91 |
+
self.model = model
|
92 |
+
self.offset = model.offset
|
93 |
+
self.device = device
|
94 |
+
self.batchsize = batchsize
|
95 |
+
self.cropsize = cropsize
|
96 |
+
self.postprocess = postprocess
|
97 |
+
self.progress_bar = progress_bar
|
98 |
+
|
99 |
+
def _separate(self, X_mag_pad, roi_size):
|
100 |
+
X_dataset = []
|
101 |
+
patches = (X_mag_pad.shape[2] - 2 * self.offset) // roi_size
|
102 |
+
for i in range(patches):
|
103 |
+
start = i * roi_size
|
104 |
+
X_mag_crop = X_mag_pad[:, :, start : start + self.cropsize]
|
105 |
+
X_dataset.append(X_mag_crop)
|
106 |
+
|
107 |
+
X_dataset = np.asarray(X_dataset)
|
108 |
+
|
109 |
+
self.model.eval()
|
110 |
+
with torch.no_grad():
|
111 |
+
mask = []
|
112 |
+
# To reduce the overhead, dataloader is not used.
|
113 |
+
for i in stqdm(
|
114 |
+
range(0, patches, self.batchsize),
|
115 |
+
st_container=self.progress_bar,
|
116 |
+
gui=False,
|
117 |
+
):
|
118 |
+
X_batch = X_dataset[i : i + self.batchsize]
|
119 |
+
X_batch = torch.from_numpy(X_batch).to(self.device)
|
120 |
+
|
121 |
+
pred = self.model.predict_mask(X_batch)
|
122 |
+
|
123 |
+
pred = pred.detach().cpu().numpy()
|
124 |
+
pred = np.concatenate(pred, axis=2)
|
125 |
+
mask.append(pred)
|
126 |
+
|
127 |
+
mask = np.concatenate(mask, axis=2)
|
128 |
+
|
129 |
+
return mask
|
130 |
+
|
131 |
+
def _preprocess(self, X_spec):
|
132 |
+
X_mag = np.abs(X_spec)
|
133 |
+
X_phase = np.angle(X_spec)
|
134 |
+
|
135 |
+
return X_mag, X_phase
|
136 |
+
|
137 |
+
def _postprocess(self, mask, X_mag, X_phase):
|
138 |
+
if self.postprocess:
|
139 |
+
mask = merge_artifacts(mask)
|
140 |
+
|
141 |
+
y_spec = mask * X_mag * np.exp(1.0j * X_phase)
|
142 |
+
v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
|
143 |
+
|
144 |
+
return y_spec, v_spec
|
145 |
+
|
146 |
+
def separate(self, X_spec):
|
147 |
+
X_mag, X_phase = self._preprocess(X_spec)
|
148 |
+
|
149 |
+
n_frame = X_mag.shape[2]
|
150 |
+
pad_l, pad_r, roi_size = make_padding(n_frame, self.cropsize, self.offset)
|
151 |
+
X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
|
152 |
+
X_mag_pad /= X_mag_pad.max()
|
153 |
+
|
154 |
+
mask = self._separate(X_mag_pad, roi_size)
|
155 |
+
mask = mask[:, :, :n_frame]
|
156 |
+
|
157 |
+
y_spec, v_spec = self._postprocess(mask, X_mag, X_phase)
|
158 |
+
|
159 |
+
return y_spec, v_spec
|
160 |
+
|
161 |
+
|
162 |
+
@st.cache_resource(show_spinner=False)
|
163 |
+
def load_model(pretrained_model, n_fft=2048):
|
164 |
+
model = nets.CascadedNet(n_fft, 32, 128)
|
165 |
+
if torch.cuda.is_available():
|
166 |
+
device = torch.device("cuda:0")
|
167 |
+
model.to(device)
|
168 |
+
# elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
|
169 |
+
# device = torch.device("mps")
|
170 |
+
# model.to(device)
|
171 |
+
else:
|
172 |
+
device = torch.device("cpu")
|
173 |
+
model.load_state_dict(torch.load(pretrained_model, map_location=device))
|
174 |
+
return model, device
|
175 |
+
|
176 |
+
|
177 |
+
# @st.cache_data(show_spinner=False)
|
178 |
+
def separate(
|
179 |
+
input,
|
180 |
+
model,
|
181 |
+
device,
|
182 |
+
output_dir,
|
183 |
+
batchsize=4,
|
184 |
+
cropsize=256,
|
185 |
+
postprocess=False,
|
186 |
+
hop_length=1024,
|
187 |
+
n_fft=2048,
|
188 |
+
sr=44100,
|
189 |
+
progress_bar=None,
|
190 |
+
only_no_vocals=False,
|
191 |
+
):
|
192 |
+
X, sr = librosa.load(input, sr=sr, mono=False, dtype=np.float32, res_type="kaiser_fast")
|
193 |
+
basename = os.path.splitext(os.path.basename(input))[0]
|
194 |
+
|
195 |
+
if X.ndim == 1:
|
196 |
+
# mono to stereo
|
197 |
+
X = np.asarray([X, X])
|
198 |
+
|
199 |
+
X_spec = wave_to_spectrogram(X, hop_length, n_fft)
|
200 |
+
|
201 |
+
with torch.no_grad():
|
202 |
+
sp = Separator(model, device, batchsize, cropsize, postprocess, progress_bar=progress_bar)
|
203 |
+
y_spec, v_spec = sp.separate(X_spec)
|
204 |
+
|
205 |
+
base_dir = f"{output_dir}/vocal_remover/{basename}"
|
206 |
+
os.makedirs(base_dir, exist_ok=True)
|
207 |
+
|
208 |
+
wave = spectrogram_to_wave(y_spec, hop_length=hop_length)
|
209 |
+
try:
|
210 |
+
sf.write(f"{base_dir}/no_vocals.mp3", wave.T, sr)
|
211 |
+
except Exception:
|
212 |
+
logging.error("Failed to write no_vocals.mp3, trying pydub...")
|
213 |
+
pydub_write(wave, f"{base_dir}/no_vocals.mp3", sr)
|
214 |
+
if only_no_vocals:
|
215 |
+
return
|
216 |
+
wave = spectrogram_to_wave(v_spec, hop_length=hop_length)
|
217 |
+
try:
|
218 |
+
sf.write(f"{base_dir}/vocals.mp3", wave.T, sr)
|
219 |
+
except Exception:
|
220 |
+
logging.error("Failed to write vocals.mp3, trying pydub...")
|
221 |
+
pydub_write(wave, f"{base_dir}/vocals.mp3", sr)
|
222 |
+
|
223 |
+
|
224 |
+
def pydub_write(wave, output_path, frame_rate, audio_format="mp3"):
|
225 |
+
# Ensure the wave data is in the right format for pydub (mono and 16-bit depth)
|
226 |
+
wave_16bit = (wave * 32767).astype(np.int16)
|
227 |
+
|
228 |
+
audio_segment = AudioSegment(
|
229 |
+
wave_16bit.tobytes(),
|
230 |
+
frame_rate=frame_rate,
|
231 |
+
sample_width=wave_16bit.dtype.itemsize,
|
232 |
+
channels=1,
|
233 |
+
)
|
234 |
+
audio_segment.export(output_path, format=audio_format)
|
app/service/youtube.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import List
|
3 |
+
import yt_dlp
|
4 |
+
import string
|
5 |
+
import time
|
6 |
+
import re
|
7 |
+
import streamlit as st
|
8 |
+
from pytube import Search
|
9 |
+
|
10 |
+
|
11 |
+
def _sanitize_filename(filename):
|
12 |
+
safe_chars = "-_.() %s%s" % (
|
13 |
+
re.escape(string.ascii_letters),
|
14 |
+
re.escape(string.digits),
|
15 |
+
)
|
16 |
+
safe_filename = re.sub(f"[^{safe_chars}]", "_", filename)
|
17 |
+
return safe_filename.strip()
|
18 |
+
|
19 |
+
|
20 |
+
@st.cache_data(show_spinner=False)
|
21 |
+
def download_audio_from_youtube(url, output_path):
|
22 |
+
if not os.path.exists(output_path):
|
23 |
+
os.makedirs(output_path)
|
24 |
+
|
25 |
+
with yt_dlp.YoutubeDL() as ydl:
|
26 |
+
info_dict = ydl.extract_info(url, download=False)
|
27 |
+
if info_dict.get("duration") > 360:
|
28 |
+
st.error("Song is too long. Please use a song no longer than 6 minutes.")
|
29 |
+
return
|
30 |
+
video_title = info_dict.get("title", None)
|
31 |
+
video_title = _sanitize_filename(video_title)
|
32 |
+
ydl_opts = {
|
33 |
+
"format": "bestaudio/best",
|
34 |
+
"postprocessors": [
|
35 |
+
{
|
36 |
+
"key": "FFmpegExtractAudio",
|
37 |
+
"preferredcodec": "mp3",
|
38 |
+
"preferredquality": "192",
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"outtmpl": os.path.join(output_path, video_title),
|
42 |
+
#'quiet': True,
|
43 |
+
}
|
44 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
45 |
+
ydl.download([url])
|
46 |
+
return f"{video_title}.mp3"
|
47 |
+
|
48 |
+
|
49 |
+
@st.cache_data(show_spinner=False)
|
50 |
+
def query_youtube(query: str) -> Search:
|
51 |
+
return Search(query)
|
52 |
+
|
53 |
+
|
54 |
+
def search_youtube(query: str) -> List:
|
55 |
+
if len(query) > 3:
|
56 |
+
time.sleep(0.5)
|
57 |
+
search = query_youtube(query + " lyrics")
|
58 |
+
st.session_state.search_results = search.results
|
59 |
+
video_options = [video.title for video in st.session_state.search_results]
|
60 |
+
st.session_state.video_options = video_options
|
61 |
+
else:
|
62 |
+
video_options = []
|
63 |
+
return video_options
|
64 |
+
|
65 |
+
|
66 |
+
def get_youtube_url(title: str) -> str:
|
67 |
+
video = st.session_state.search_results[st.session_state.video_options.index(title)]
|
68 |
+
return video.embed_url
|
69 |
+
|
70 |
+
|
71 |
+
def check_if_is_youtube_url(url: str) -> bool:
|
72 |
+
return url.startswith("http")
|
app/style.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_font_title = "Monoton"
|
2 |
+
_font_subtitle = "Exo"
|
3 |
+
|
4 |
+
CSS = (
|
5 |
+
"""
|
6 |
+
<!-- Add the font link from Google Fonts -->
|
7 |
+
<link href="https://fonts.googleapis.com/css2?family="""
|
8 |
+
+ _font_title
|
9 |
+
+ """&display=swap" rel="stylesheet">
|
10 |
+
<link href="https://fonts.googleapis.com/css2?family="""
|
11 |
+
+ _font_subtitle
|
12 |
+
+ """&display=swap" rel="stylesheet">
|
13 |
+
|
14 |
+
<style>
|
15 |
+
/* Remove the streamlit header */
|
16 |
+
header[data-testid="stHeader"] {
|
17 |
+
display: none;
|
18 |
+
}
|
19 |
+
/* Remove the sidebar menu */
|
20 |
+
div[data-testid="collapsedControl"]{
|
21 |
+
display: none;
|
22 |
+
}
|
23 |
+
/* Background */
|
24 |
+
.css-z5fcl4 {
|
25 |
+
padding: 0.5rem;
|
26 |
+
padding-top: 0rem;
|
27 |
+
}
|
28 |
+
|
29 |
+
/* Distances between the title and the image in mobile */
|
30 |
+
.css-1uifejx.e1tzin5v1 {
|
31 |
+
margin-bottom: 0px;
|
32 |
+
padding-bottom: 0px;
|
33 |
+
}
|
34 |
+
h1 {
|
35 |
+
padding-top: 0px;
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
+
/* Center the image within its container */
|
40 |
+
.css-1kyxreq {
|
41 |
+
justify-content: center;
|
42 |
+
}
|
43 |
+
|
44 |
+
/* Remove fixed width from the image container */
|
45 |
+
.css-1kyxreq.etr89bj2 {
|
46 |
+
width: 100% !important;
|
47 |
+
}
|
48 |
+
|
49 |
+
/* Center the title */
|
50 |
+
.css-k7vsyb {
|
51 |
+
text-align: center;
|
52 |
+
}
|
53 |
+
|
54 |
+
/* Hide the anchor button */
|
55 |
+
.css-zt5igj.e16nr0p33 a {
|
56 |
+
display: none;
|
57 |
+
}
|
58 |
+
/* Hide the full screen button */
|
59 |
+
.css-e370rw.e19lei0e1 {
|
60 |
+
display: none;
|
61 |
+
}
|
62 |
+
.css-6awftf.e19lei0e1 {
|
63 |
+
display: none;
|
64 |
+
}
|
65 |
+
|
66 |
+
/* Desktop */
|
67 |
+
@media (min-width: 640px) {
|
68 |
+
.stMarkdown {
|
69 |
+
max-width: 100%;
|
70 |
+
width: auto;
|
71 |
+
display: inline-block;
|
72 |
+
}
|
73 |
+
/* Dynamically add space between the image and the title */
|
74 |
+
.css-1kyxreq {
|
75 |
+
justify-content: right;
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
/* Add space after the image and the title */
|
80 |
+
.css-1a32fsj {
|
81 |
+
margin-right: 0px;
|
82 |
+
}
|
83 |
+
|
84 |
+
/* Apply the futuristic font to the text title*/
|
85 |
+
#moseca {
|
86 |
+
font-family: '"""
|
87 |
+
+ _font_title
|
88 |
+
+ """', sans-serif;
|
89 |
+
font-size: 3rem;
|
90 |
+
text-align: center;
|
91 |
+
/* Align the text to the center of the box */
|
92 |
+
align-items: center;
|
93 |
+
/* Set the line height to the same as the height of the box */
|
94 |
+
line-height: 3.5rem;
|
95 |
+
margin-bottom: -1rem;
|
96 |
+
}
|
97 |
+
|
98 |
+
/* subtitle */
|
99 |
+
.css-5rimss p, .css-nahz7x p {
|
100 |
+
font-family: """
|
101 |
+
+ _font_subtitle
|
102 |
+
+ """, sans-serif;
|
103 |
+
font-size: 0.8rem;
|
104 |
+
text-align: center;
|
105 |
+
}
|
106 |
+
|
107 |
+
/* Desktop */
|
108 |
+
@media (min-width: 640px) {
|
109 |
+
.css-zt5igj, .css-nahz7x p {
|
110 |
+
text-align: left;
|
111 |
+
}
|
112 |
+
.css-5rimss p {
|
113 |
+
text-align: left;
|
114 |
+
}
|
115 |
+
}
|
116 |
+
|
117 |
+
.st-af {
|
118 |
+
align-items: center;
|
119 |
+
padding-right: 2rem;
|
120 |
+
}
|
121 |
+
|
122 |
+
/* Remove the gap around the player */
|
123 |
+
.css-434r0z {
|
124 |
+
gap: 0rem;
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
</style>
|
129 |
+
|
130 |
+
"""
|
131 |
+
)
|
img/bmc-button.png
ADDED
![]() |
img/image_stems.png
ADDED
![]() |
img/karaoke_fun.png
ADDED
![]() |
img/logo_moseca.png
ADDED
![]() |
img/state-of-art.png
ADDED
![]() |
pyproject.toml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.black]
|
2 |
+
line-length = 100
|
3 |
+
target-version = ['py39', 'py310']
|
4 |
+
preview_string_processing = true
|
5 |
+
|
6 |
+
[tool.isort]
|
7 |
+
profile = 'black'
|
8 |
+
multi_line_output = 3
|
9 |
+
|
10 |
+
[tool.ruff]
|
11 |
+
line-length = 100
|
12 |
+
ignore = ['E501']
|
13 |
+
|
14 |
+
|
15 |
+
[tool.pytest.ini_options]
|
16 |
+
pythonpath = [
|
17 |
+
"app",
|
18 |
+
]
|
19 |
+
testpaths = "tests"
|
requirements.in
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.22.0
|
2 |
+
demucs==4.0.0
|
3 |
+
plotly==5.13.0
|
4 |
+
pandas==1.5.3
|
5 |
+
pydub==0.25.1
|
6 |
+
pytube==12.1.3
|
7 |
+
streamlit-player==0.1.5
|
8 |
+
streamlit-searchbox==0.1.2
|
9 |
+
yt-dlp==2023.3.4
|
10 |
+
kaleido==0.2.1
|
11 |
+
matplotlib==3.7.1
|
12 |
+
librosa==0.10.0.post2
|
13 |
+
resampy==0.4.2
|
14 |
+
stqdm==0.0.5
|
15 |
+
streamlit_option_menu==0.3.6
|
16 |
+
htbuilder==0.6.1
|
requirements.txt
ADDED
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# This file is autogenerated by pip-compile with Python 3.8
|
3 |
+
# by the following command:
|
4 |
+
#
|
5 |
+
# pip-compile --output-file=requirements.txt --resolver=backtracking requirements.in
|
6 |
+
#
|
7 |
+
altair==4.2.2
|
8 |
+
# via streamlit
|
9 |
+
antlr4-python3-runtime==4.9.3
|
10 |
+
# via omegaconf
|
11 |
+
appdirs==1.4.4
|
12 |
+
# via pooch
|
13 |
+
attrs==23.1.0
|
14 |
+
# via jsonschema
|
15 |
+
audioread==3.0.0
|
16 |
+
# via librosa
|
17 |
+
backports-zoneinfo==0.2.1
|
18 |
+
# via tzlocal
|
19 |
+
blinker==1.6.2
|
20 |
+
# via streamlit
|
21 |
+
brotli==1.0.9
|
22 |
+
# via yt-dlp
|
23 |
+
cachetools==5.3.1
|
24 |
+
# via streamlit
|
25 |
+
certifi==2023.5.7
|
26 |
+
# via
|
27 |
+
# requests
|
28 |
+
# yt-dlp
|
29 |
+
cffi==1.15.1
|
30 |
+
# via soundfile
|
31 |
+
charset-normalizer==3.1.0
|
32 |
+
# via requests
|
33 |
+
click==8.1.3
|
34 |
+
# via streamlit
|
35 |
+
cloudpickle==2.2.1
|
36 |
+
# via submitit
|
37 |
+
contourpy==1.1.0
|
38 |
+
# via matplotlib
|
39 |
+
cycler==0.11.0
|
40 |
+
# via matplotlib
|
41 |
+
cython==0.29.35
|
42 |
+
# via diffq
|
43 |
+
decorator==5.1.1
|
44 |
+
# via
|
45 |
+
# librosa
|
46 |
+
# validators
|
47 |
+
demucs==4.0.0
|
48 |
+
# via -r requirements.in
|
49 |
+
diffq==0.2.4
|
50 |
+
# via demucs
|
51 |
+
dora-search==0.1.12
|
52 |
+
# via demucs
|
53 |
+
einops==0.6.1
|
54 |
+
# via demucs
|
55 |
+
entrypoints==0.4
|
56 |
+
# via altair
|
57 |
+
filelock==3.12.2
|
58 |
+
# via torch
|
59 |
+
fonttools==4.40.0
|
60 |
+
# via matplotlib
|
61 |
+
gitdb==4.0.10
|
62 |
+
# via gitpython
|
63 |
+
gitpython==3.1.31
|
64 |
+
# via streamlit
|
65 |
+
htbuilder==0.6.1
|
66 |
+
# via -r requirements.in
|
67 |
+
idna==3.4
|
68 |
+
# via requests
|
69 |
+
importlib-metadata==6.7.0
|
70 |
+
# via
|
71 |
+
# numba
|
72 |
+
# streamlit
|
73 |
+
importlib-resources==5.12.0
|
74 |
+
# via
|
75 |
+
# jsonschema
|
76 |
+
# matplotlib
|
77 |
+
jinja2==3.1.2
|
78 |
+
# via
|
79 |
+
# altair
|
80 |
+
# pydeck
|
81 |
+
# torch
|
82 |
+
joblib==1.3.1
|
83 |
+
# via
|
84 |
+
# librosa
|
85 |
+
# scikit-learn
|
86 |
+
jsonschema==4.17.3
|
87 |
+
# via altair
|
88 |
+
julius==0.2.7
|
89 |
+
# via demucs
|
90 |
+
kaleido==0.2.1
|
91 |
+
# via -r requirements.in
|
92 |
+
kiwisolver==1.4.4
|
93 |
+
# via matplotlib
|
94 |
+
lameenc==1.5.0
|
95 |
+
# via demucs
|
96 |
+
lazy-loader==0.2
|
97 |
+
# via librosa
|
98 |
+
librosa==0.10.0.post2
|
99 |
+
# via -r requirements.in
|
100 |
+
llvmlite==0.40.1
|
101 |
+
# via numba
|
102 |
+
markdown-it-py==3.0.0
|
103 |
+
# via rich
|
104 |
+
markupsafe==2.1.3
|
105 |
+
# via jinja2
|
106 |
+
matplotlib==3.7.1
|
107 |
+
# via -r requirements.in
|
108 |
+
mdurl==0.1.2
|
109 |
+
# via markdown-it-py
|
110 |
+
more-itertools==9.1.0
|
111 |
+
# via htbuilder
|
112 |
+
mpmath==1.3.0
|
113 |
+
# via sympy
|
114 |
+
msgpack==1.0.5
|
115 |
+
# via librosa
|
116 |
+
mutagen==1.46.0
|
117 |
+
# via yt-dlp
|
118 |
+
networkx==3.1
|
119 |
+
# via torch
|
120 |
+
numba==0.57.1
|
121 |
+
# via
|
122 |
+
# librosa
|
123 |
+
# resampy
|
124 |
+
numpy==1.24.4
|
125 |
+
# via
|
126 |
+
# altair
|
127 |
+
# contourpy
|
128 |
+
# diffq
|
129 |
+
# librosa
|
130 |
+
# matplotlib
|
131 |
+
# numba
|
132 |
+
# openunmix
|
133 |
+
# pandas
|
134 |
+
# pyarrow
|
135 |
+
# pydeck
|
136 |
+
# resampy
|
137 |
+
# scikit-learn
|
138 |
+
# scipy
|
139 |
+
# soxr
|
140 |
+
# streamlit
|
141 |
+
omegaconf==2.3.0
|
142 |
+
# via dora-search
|
143 |
+
openunmix==1.2.1
|
144 |
+
# via demucs
|
145 |
+
packaging==23.1
|
146 |
+
# via
|
147 |
+
# matplotlib
|
148 |
+
# pooch
|
149 |
+
# streamlit
|
150 |
+
pandas==1.5.3
|
151 |
+
# via
|
152 |
+
# -r requirements.in
|
153 |
+
# altair
|
154 |
+
# streamlit
|
155 |
+
pillow==9.5.0
|
156 |
+
# via
|
157 |
+
# matplotlib
|
158 |
+
# streamlit
|
159 |
+
pkgutil-resolve-name==1.3.10
|
160 |
+
# via jsonschema
|
161 |
+
plotly==5.13.0
|
162 |
+
# via -r requirements.in
|
163 |
+
pooch==1.6.0
|
164 |
+
# via librosa
|
165 |
+
protobuf==3.20.3
|
166 |
+
# via streamlit
|
167 |
+
pyarrow==12.0.1
|
168 |
+
# via streamlit
|
169 |
+
pycparser==2.21
|
170 |
+
# via cffi
|
171 |
+
pycryptodomex==3.18.0
|
172 |
+
# via yt-dlp
|
173 |
+
pydeck==0.8.1b0
|
174 |
+
# via streamlit
|
175 |
+
pydub==0.25.1
|
176 |
+
# via -r requirements.in
|
177 |
+
pygments==2.15.1
|
178 |
+
# via rich
|
179 |
+
pympler==1.0.1
|
180 |
+
# via streamlit
|
181 |
+
pyparsing==3.1.0
|
182 |
+
# via matplotlib
|
183 |
+
pyrsistent==0.19.3
|
184 |
+
# via jsonschema
|
185 |
+
python-dateutil==2.8.2
|
186 |
+
# via
|
187 |
+
# matplotlib
|
188 |
+
# pandas
|
189 |
+
# streamlit
|
190 |
+
pytube==12.1.3
|
191 |
+
# via -r requirements.in
|
192 |
+
pytz==2023.3
|
193 |
+
# via pandas
|
194 |
+
pyyaml==6.0
|
195 |
+
# via
|
196 |
+
# demucs
|
197 |
+
# omegaconf
|
198 |
+
requests==2.31.0
|
199 |
+
# via
|
200 |
+
# pooch
|
201 |
+
# streamlit
|
202 |
+
resampy==0.4.2
|
203 |
+
# via -r requirements.in
|
204 |
+
retrying==1.3.4
|
205 |
+
# via dora-search
|
206 |
+
rich==13.4.2
|
207 |
+
# via streamlit
|
208 |
+
scikit-learn==1.3.0
|
209 |
+
# via librosa
|
210 |
+
scipy==1.10.1
|
211 |
+
# via
|
212 |
+
# librosa
|
213 |
+
# scikit-learn
|
214 |
+
six==1.16.0
|
215 |
+
# via
|
216 |
+
# python-dateutil
|
217 |
+
# retrying
|
218 |
+
smmap==5.0.0
|
219 |
+
# via gitdb
|
220 |
+
soundfile==0.12.1
|
221 |
+
# via librosa
|
222 |
+
soxr==0.3.5
|
223 |
+
# via librosa
|
224 |
+
stqdm==0.0.5
|
225 |
+
# via -r requirements.in
|
226 |
+
streamlit==1.22.0
|
227 |
+
# via
|
228 |
+
# -r requirements.in
|
229 |
+
# stqdm
|
230 |
+
# streamlit-option-menu
|
231 |
+
# streamlit-player
|
232 |
+
# streamlit-searchbox
|
233 |
+
streamlit-option-menu==0.3.6
|
234 |
+
# via -r requirements.in
|
235 |
+
streamlit-player==0.1.5
|
236 |
+
# via -r requirements.in
|
237 |
+
streamlit-searchbox==0.1.2
|
238 |
+
# via -r requirements.in
|
239 |
+
submitit==1.4.5
|
240 |
+
# via dora-search
|
241 |
+
sympy==1.12
|
242 |
+
# via torch
|
243 |
+
tenacity==8.2.2
|
244 |
+
# via
|
245 |
+
# plotly
|
246 |
+
# streamlit
|
247 |
+
threadpoolctl==3.1.0
|
248 |
+
# via scikit-learn
|
249 |
+
toml==0.10.2
|
250 |
+
# via streamlit
|
251 |
+
toolz==0.12.0
|
252 |
+
# via altair
|
253 |
+
torch==2.0.1
|
254 |
+
# via
|
255 |
+
# demucs
|
256 |
+
# diffq
|
257 |
+
# dora-search
|
258 |
+
# julius
|
259 |
+
# openunmix
|
260 |
+
# torchaudio
|
261 |
+
torchaudio==2.0.2
|
262 |
+
# via
|
263 |
+
# demucs
|
264 |
+
# openunmix
|
265 |
+
tornado==6.3.2
|
266 |
+
# via streamlit
|
267 |
+
tqdm==4.65.0
|
268 |
+
# via
|
269 |
+
# demucs
|
270 |
+
# openunmix
|
271 |
+
# stqdm
|
272 |
+
treetable==0.2.5
|
273 |
+
# via dora-search
|
274 |
+
typing-extensions==4.7.0
|
275 |
+
# via
|
276 |
+
# librosa
|
277 |
+
# rich
|
278 |
+
# streamlit
|
279 |
+
# submitit
|
280 |
+
# torch
|
281 |
+
tzlocal==5.0.1
|
282 |
+
# via streamlit
|
283 |
+
urllib3==2.0.3
|
284 |
+
# via requests
|
285 |
+
validators==0.20.0
|
286 |
+
# via streamlit
|
287 |
+
websockets==11.0.3
|
288 |
+
# via yt-dlp
|
289 |
+
yt-dlp==2023.3.4
|
290 |
+
# via -r requirements.in
|
291 |
+
zipp==3.15.0
|
292 |
+
# via
|
293 |
+
# importlib-metadata
|
294 |
+
# importlib-resources
|
scripts/inference.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
import warnings
|
4 |
+
from app.service.vocal_remover.runner import load_model, separate
|
5 |
+
|
6 |
+
warnings.simplefilter("ignore", UserWarning)
|
7 |
+
warnings.simplefilter("ignore", FutureWarning)
|
8 |
+
warnings.filterwarnings("ignore", module="streamlit")
|
9 |
+
|
10 |
+
|
11 |
+
def main():
|
12 |
+
p = argparse.ArgumentParser()
|
13 |
+
p.add_argument("--gpu", "-g", type=int, default=-1)
|
14 |
+
p.add_argument("--pretrained_model", "-P", type=str, default="baseline.pth")
|
15 |
+
p.add_argument("--input", "-i", required=True)
|
16 |
+
p.add_argument("--output_dir", "-o", type=str, default="")
|
17 |
+
args = p.parse_args()
|
18 |
+
|
19 |
+
model, device = load_model(pretrained_model=args.pretrained_model)
|
20 |
+
separate(
|
21 |
+
input=args.input,
|
22 |
+
model=model,
|
23 |
+
device=device,
|
24 |
+
output_dir=args.output_dir,
|
25 |
+
only_no_vocals=True,
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
main()
|
scripts/prepare_samples.sh
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Read JSON file into a variable
|
4 |
+
json=$(cat sample_songs.json)
|
5 |
+
|
6 |
+
# Iterate through keys and values
|
7 |
+
for name in $(echo "${json}" | jq -r 'keys[]'); do
|
8 |
+
url=$(echo "${json}" | jq -r --arg name "${name}" '.[$name]')
|
9 |
+
echo "Separating ${name} from ${url}"
|
10 |
+
|
11 |
+
# Download with pytube
|
12 |
+
yt-dlp ${url} -o "/tmp/${name}" --format "bestaudio/best"
|
13 |
+
mkdir -p "/tmp/vocal_remover"
|
14 |
+
|
15 |
+
# Run inference
|
16 |
+
python inference.py --input /tmp/${name} --output /tmp
|
17 |
+
echo "Done separating ${name}"
|
18 |
+
done
|
scripts/sample_songs.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dancing_queen": "https://www.youtube.com/watch?v=3qiMJt-JBb4",
|
3 |
+
"bohemian_rhapsody": "https://www.youtube.com/watch?v=yk3prd8GER4",
|
4 |
+
"i_want_it_that_way": "https://www.youtube.com/watch?v=qjlVAsvQLM8",
|
5 |
+
"let_it_be": "https://www.youtube.com/watch?v=FIV73iG_e5I",
|
6 |
+
"viva_la_vida": "https://www.youtube.com/watch?v=a1EYnngNHIA",
|
7 |
+
"zombie": "https://www.youtube.com/watch?v=8sM-rm4lFZg"
|
8 |
+
}
|