my_moseca

Sleeping

App Files Files Community

fabiogra commited on Jul 4, 2023

Commit

6654c1a

0 Parent(s):

Duplicate from fabiogra/moseca

Browse files

Files changed (31) hide show

.gitattributes +35 -0
.streamlit/config.toml +2 -0
Dockerfile +34 -0
README.md +218 -0
app/__init__.py +0 -0
app/_fastapi_server.py +20 -0
app/footer.py +118 -0
app/header.py +68 -0
app/helpers.py +160 -0
app/pages/About.py +154 -0
app/pages/Karaoke.py +176 -0
app/pages/Separate.py +203 -0
app/service/__init__.py +0 -0
app/service/demucs_runner.py +190 -0
app/service/vocal_remover/__init__.py +0 -0
app/service/vocal_remover/layers.py +126 -0
app/service/vocal_remover/nets.py +125 -0
app/service/vocal_remover/runner.py +234 -0
app/service/youtube.py +72 -0
app/style.py +131 -0
img/bmc-button.png +0 -0
img/image_stems.png +0 -0
img/karaoke_fun.png +0 -0
img/logo_moseca.png +0 -0
img/state-of-art.png +0 -0
pyproject.toml +19 -0
requirements.in +16 -0
requirements.txt +294 -0
scripts/inference.py +30 -0
scripts/prepare_samples.sh +18 -0
scripts/sample_songs.json +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [server]
2	+ enableXsrfProtection = false

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# syntax=docker/dockerfile:1
+FROM python:3.8
+RUN apt-get update && \
+    apt-get install -y ffmpeg jq curl && \
+    pip install --upgrade pip
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY scripts/ .
+COPY app ./app
+copy img ./img
+RUN wget --progress=bar:force:noscroll https://huggingface.co/fabiogra/baseline_vocal_remover/resolve/main/baseline.pth
+RUN mkdir -p /tmp/ /tmp/vocal_remover /.cache /.config && \
+    chmod 777 /tmp /tmp/vocal_remover /.cache /.config
+ENV PYTHONPATH "${PYTHONPATH}:/app"
+RUN chmod +x prepare_samples.sh
+EXPOSE 7860
+HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
+RUN ["./prepare_samples.sh"]
+ENTRYPOINT ["streamlit", "run", "app/header.py", "--server.port=7860", "--server.address=0.0.0.0"]

README.md ADDED Viewed

	@@ -0,0 +1,218 @@

+---
+title: Moseca
+emoji: 🎤🎸🥁🎹
+colorFrom: yellow
+colorTo: purple
+sdk: docker
+app_port: 7860
+models:
+- https://huggingface.co/fabiogra/baseline_vocal_remover
+tags:
+- audio
+- music
+- vocal-removal
+- karaoke
+- music-separation
+- music-source-separation
+pinned: true
+duplicated_from: fabiogra/moseca
+---
+<p align="center">
+  <img src="img/logo_moseca.png" alt="logo" width="70" />
+</p>
+<h2 align="center">Moseca</h1>
+<p align="center">Music Source Separation & Karaoke</p>
+</a>
+<a href="https://huggingface.co/spaces/fabiogra/moseca">
+<img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
+alt="Hugging Face Spaces"></a>
+<a href="https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true">
+<img src="https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white"
+alt="Docker"></a><a href="https://www.buymeacoffee.com/fabiogra">
+<img src="https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social"
+alt="Buy me a coffee"></a>
+---
+- [Setup](#setup)
+- [About](#about)
+  - [High-Quality Stem Separation](#high-quality-stem-separation)
+  - [Advanced AI Algorithms](#advanced-ai-algorithms)
+  - [Karaoke Fun](#karaoke-fun)
+  - [Easy Deployment](#easy-deployment)
+  - [Open-Source and Free](#open-source-and-free)
+  - [Support](#support)
+- [FAQs](#faqs)
+  - [What is Moseca?](#what-is-moseca)
+  - [Are there any limitations?](#are-there-any-limitations)
+  - [How does Moseca work?](#how-does-moseca-work)
+  - [How do I use Moseca?](#how-do-i-use-moseca)
+  - [Where can I find the code for Moseca?](#where-can-i-find-the-code-for-moseca)
+  - [How can I get in touch with you?](#how-can-i-get-in-touch-with-you)
+- [Disclaimer](#disclaimer)
+---
+## Setup
+### Local environment
+Create a new environment with Python 3.8 and install the requirements:
+```bash
+pip install -r requirements.txt
+```
+then run the app with:
+```bash
+streamlit run app/header.py
+```
+### Docker
+You can also run the app with Docker:
+```bash
+docker build -t moseca .
+docker run -it --rm -p 7860:7860 $(DOCKER_IMAGE_NAME)
+```
+or pull the image from Hugging Face Spaces:
+```bash
+docker run -it -p 7860:7860 --platform=linux/amd64 \
+	registry.hf.space/fabiogra-moseca:latest
+```
+You can set the following environment variables to limit the resources used by the app:
+- ENV_LIMITATION=true
+- LIMIT_CPU=true
+---
+## About
+Welcome to Moseca, your personal web application designed to redefine your music experience.
+Whether you're a musician looking to remix your favorite songs, a karaoke
+enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
+Moseca is for you.
+<br>
+### High-Quality Stem Separation
+<img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="250" ></img>
+<br>
+Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
+🎹piano (beta), and 🎶 others.
+<br>
+### Advanced AI Algorithms
+<img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="250" ></img>
+<br>
+Moseca utilizes state-of-the-art AI technology to extract voice or music from
+your original songs accurately.
+<br>
+### Karaoke Fun
+<img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="250" ></img>
+<br>
+Engage with your favorite tunes in a whole new way!
+Moseca offers an immersive online karaoke experience, allowing you to search
+for any song on YouTube and remove the vocals online.
+Enjoy singing along with high-quality instrumentals at the comfort of your home.
+<br>
+### Easy Deployment
+With Moseca, you can deploy your personal Moseca app in the
+<a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
+<img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
+alt="Hugging Face Spaces"></a> or locally with
+[![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
+in just one click.
+<br>
+### Open-Source and Free
+Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
+You can modify, distribute, and use it free of charge. I believe in the power of community
+collaboration and encourage users to contribute to our source code, making Moseca better with
+each update.
+<br>
+### Support
+- Show your support by giving a star to the GitHub repository [![GitHub stars](https://img.shields.io/github/stars/fabiogra/moseca.svg?style=social&label=Star&maxAge=2592000)](https://github.com/fabiogra/moseca).
+- If you have found an issue or have a suggestion to improve Moseca, you can open an [![GitHub issues](https://img.shields.io/github/issues/fabiogra/moseca.svg)](https://github.com/fabiogra/moseca/issues/new)
+- Enjoy Moseca? [![Buymeacoffee](https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social)](https://www.buymeacoffee.com/fabiogra)
+------
+## FAQs
+### What is Moseca?
+Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
+instrumentals from music tracks. It also provides an online karaoke experience by allowing you
+to search for any song on YouTube and remove the vocals.
+### Are there any limitations?
+Yes, in this environment there are some limitations regarding lenght processing
+and CPU usage to allow a smooth experience for all users.
+<b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
+environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
+### How does Moseca work?
+Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
+### How do I use Moseca?
+1. Upload your file: choose your song and upload it to Moseca. It supports
+a wide range of music formats for your convenience.
+2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
+depending on your requirement.
+3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
+from music in a matter of minutes, giving you high-quality, separated audio tracks.
+4. Download and enjoy: preview and download your separated audio tracks.
+Now you can enjoy them anytime, anywhere!
+### Where can I find the code for Moseca?
+The code for Moseca is readily available on
+[GitHub](https://github.com/fabiogra/moseca) and
+[Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
+### How can I get in touch with you?
+For any questions or feedback, feel free to contact me on
+[![Twitter](https://badgen.net/badge/icon/twitter?icon=twitter&label)](https://twitter.com/grsFabio)
+or [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
+------
+## Disclaimer
+Moseca is designed to separate vocals and instruments from copyrighted music for
+legally permissible purposes, such as learning, practicing, research, or other non-commercial
+activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
+responsible for ensuring that your use of separated audio tracks complies with the legal
+requirements in your jurisdiction.

app/__init__.py ADDED Viewed

File without changes

app/_fastapi_server.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from fastapi import FastAPI
+from fastapi.responses import FileResponse
+from urllib.parse import unquote
+import os
+app = FastAPI()
+@app.get("/streaming/{path:path}")
+async def serve_streaming(path: str):
+    # Decode URL-encoded characters
+    decoded_path = unquote(path)
+    return FileResponse(decoded_path, filename=os.path.basename(decoded_path))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000)

app/footer.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import streamlit as st
+from streamlit.components.v1 import html
+from htbuilder import HtmlElement, div, a, p, img, styles
+from htbuilder.units import percent, px
+def image(src_as_string, **style):
+    return img(src=src_as_string, style=styles(**style))
+def link(link, text, **style):
+    return a(_href=link, _target="_blank", style=styles(**style))(text)
+def layout(*args):
+    style = """
+    <style>
+      footer {visibility: hidden;}
+     .stApp { bottom: 50px; }
+    </style>
+    """
+    style_div = styles(
+        position="fixed",
+        left=0,
+        bottom=0,
+        margin=px(0, 0, 0, 0),
+        width=percent(100),
+        color="black",
+        text_align="center",
+        height="auto",
+        opacity=1,
+        align_items="center",
+        flex_direction="column",
+        display="flex",
+    )
+    body = p(
+        id="myFooter",
+        style=styles(
+            margin=px(0, 0, 0, 0),
+            padding=px(5),
+            font_size="0.8rem",
+            color="rgb(51,51,51)",
+            font_family="Exo",
+        ),
+    )
+    foot = div(style=style_div)(body)
+    st.markdown(style, unsafe_allow_html=True)
+    for arg in args:
+        if isinstance(arg, str):
+            body(arg)
+        elif isinstance(arg, HtmlElement):
+            body(arg)
+    st.markdown(str(foot), unsafe_allow_html=True)
+    js_code = """
+    <script>
+    function rgbReverse(rgb){
+        var r = rgb[0]*0.299;
+        var g = rgb[1]*0.587;
+        var b = rgb[2]*0.114;
+        if ((r + g + b)/255 > 0.5){
+            return "rgb(49, 51, 63)"
+        }else{
+            return "rgb(250, 250, 250)"
+        }
+    };
+    var stApp_css = window.parent.document.querySelector("#root > div:nth-child(1) > div > div > div");
+    window.onload = function () {
+        var mutationObserver = new MutationObserver(function(mutations) {
+                mutations.forEach(function(mutation) {
+                    var bgColor = window.getComputedStyle(stApp_css).backgroundColor.replace("rgb(", "").replace(")", "").split(", ");
+                    var fontColor = rgbReverse(bgColor);
+                    var pTag = window.parent.document.getElementById("myFooter");
+                    pTag.style.color = fontColor;
+                });
+            });
+            /**Element**/
+            mutationObserver.observe(stApp_css, {
+                attributes: true,
+                characterData: true,
+                childList: true,
+                subtree: true,
+                attributeOldValue: true,
+                characterDataOldValue: true
+            });
+    }
+    </script>
+    """
+    html(js_code)
+def footer():
+    myargs = [
+        "Made in ",
+        link(
+            "https://streamlit.io/",
+            image("https://streamlit.io/images/brand/streamlit-mark-color.png", width="20px"),
+        ),
+        " with ❤️ by ",
+        link("https://twitter.com/grsFabio", "@grsFabio"),
+        "&nbsp;&nbsp;&nbsp;",
+        link(
+            "https://www.buymeacoffee.com/fabiogra",
+            image("https://i.imgur.com/YFu6MMA.png", margin="0em", align="top", width="130px"),
+        ),
+    ]
+    layout(*myargs)

app/header.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import streamlit as st
+from helpers import switch_page
+from style import CSS
+import logging
+from streamlit_option_menu import option_menu
+logging.basicConfig(
+    format="%(asctime)s %(levelname)-8s %(message)s",
+    level=logging.INFO,
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+def header(logo_and_title=True):
+    if "first_run" not in st.session_state:
+        st.session_state.first_run = True
+        for key in [
+            "search_results",
+            "selected_value",
+            "filename",
+            "executed",
+            "play_karaoke",
+            "url",
+            "random_song",
+            "last_dir",
+        ]:
+            st.session_state[key] = None
+        st.session_state.video_options = []
+        st.session_state.page = "Karaoke"
+        switch_page(st.session_state.page)
+    st.set_page_config(
+        page_title="Moseca - Music Separation and Karaoke - Free and Open Source alternative to lalal.ai, splitter.ai or media.io vocal remover.",
+        page_icon="img/logo_moseca.png",
+        layout="wide",
+        initial_sidebar_state="collapsed",
+    )
+    st.markdown(CSS, unsafe_allow_html=True)
+    options = ["Karaoke", "Separate", "About"]
+    page = option_menu(
+        menu_title=None,
+        options=options,
+        # bootrap icons
+        icons=["play-btn-fill", "file-earmark-music", "info-circle"],
+        default_index=options.index(st.session_state.page),
+        orientation="horizontal",
+        styles={"nav-link": {"padding-left": "1.5rem", "padding-right": "1.5rem"}},
+        key="",
+    )
+    if page != st.session_state.page:
+        switch_page(page)
+    if logo_and_title:
+        head = st.columns([5, 1, 3, 5])
+        with head[1]:
+            st.image("img/logo_moseca.png", use_column_width=False, width=80)
+        with head[2]:
+            st.markdown(
+                "<h1>moseca</h1><p><b>Music Source Separation & Karaoke</b></p>",
+                unsafe_allow_html=True,
+            )
+if __name__ == "__main__":
+    header()

app/helpers.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import random
+from io import BytesIO
+import json
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+import streamlit as st
+from PIL import Image
+from pydub import AudioSegment
+from base64 import b64encode
+from pathlib import Path
+from streamlit.runtime.scriptrunner import RerunData, RerunException
+from streamlit.source_util import get_pages
+from streamlit_player import st_player
+extensions = ["mp3", "wav", "ogg", "flac"]  # we will look for all those file types.
+example_songs = [1, 2, 3]
+def img_to_bytes(img_path):
+    img_bytes = Path(img_path).read_bytes()
+    encoded = b64encode(img_bytes).decode()
+    return encoded
+# @st.cache_data(show_spinner=False)
+def img_to_html(img_path):
+    img_html = "<div style='display: flex; justify-content: center; align-items: center; height: 50vh;'><img src='data:image/png;base64,{}' class='img-fluid' style='max-width: 100%; max-height: 100%;' ></div>".format(
+        img_to_bytes(img_path)
+    )
+    return img_html
+@st.cache_data(show_spinner=False)
+def url_is_valid(url):
+    if url.startswith("http") is False:
+        st.error("URL should start with http or https.")
+        return False
+    elif url.split(".")[-1] not in extensions:
+        st.error("Extension not supported.")
+        return False
+    try:
+        r = requests.get(url)
+        r.raise_for_status()
+        return True
+    except Exception:
+        st.error("URL is not valid.")
+        return False
+@st.cache_data(show_spinner=False)
+def load_audio_segment(path: str, format: str) -> AudioSegment:
+    return AudioSegment.from_file(path, format=format)
+@st.cache_data(show_spinner=False)
+def plot_audio(_audio_segment: AudioSegment, *args, **kwargs) -> Image.Image:
+    samples = _audio_segment.get_array_of_samples()
+    arr = np.array(samples)
+    fig, ax = plt.subplots(figsize=(10, 2))
+    ax.plot(arr, linewidth=0.05)
+    ax.set_axis_off()
+    # Set the background color to transparent
+    fig.patch.set_alpha(0)
+    ax.patch.set_alpha(0)
+    buf = BytesIO()
+    plt.savefig(buf, format="png", dpi=100, bbox_inches="tight")
+    buf.seek(0)
+    image = Image.open(buf)
+    plt.close(fig)
+    return image
+def get_random_song():
+    sample_songs = json.load(open("sample_songs.json"))
+    name, url = random.choice(list(sample_songs.items()))
+    return name, url
+def streamlit_player(
+    player,
+    url,
+    height,
+    is_active,
+    muted,
+    start,
+    key,
+    playback_rate=1,
+    events=None,
+    play_inline=False,
+    light=False,
+):
+    with player:
+        options = {
+            "progress_interval": 1000,
+            "playing": is_active,  # st.checkbox("Playing", False),
+            "muted": muted,
+            "light": light,
+            "play_inline": play_inline,
+            "playback_rate": playback_rate,
+            "height": height,
+            "config": {"start": start},
+            "events": events,
+        }
+        if url != "":
+            events = st_player(url, **options, key=key)
+    return events
+@st.cache_data(show_spinner=False)
+def local_audio(path, mime="audio/mp3"):
+    data = b64encode(Path(path).read_bytes()).decode()
+    return [{"type": mime, "src": f"data:{mime};base64,{data}"}]
+def _standardize_name(name: str) -> str:
+    return name.lower().replace("_", " ").strip()
+@st.cache_data(show_spinner=False)
+def switch_page(page_name: str):
+    st.session_state.page = page_name
+    page_name = _standardize_name(page_name)
+    pages = get_pages("header.py")  # OR whatever your main page is called
+    for page_hash, config in pages.items():
+        if _standardize_name(config["page_name"]) == page_name:
+            raise RerunException(
+                RerunData(
+                    page_script_hash=page_hash,
+                    page_name=page_name,
+                )
+            )
+    page_names = [_standardize_name(config["page_name"]) for config in pages.values()]
+    raise ValueError(f"Could not find page {page_name}. Must be one of {page_names}")
+def st_local_audio(pathname, key):
+    st_player(
+        local_audio(pathname),
+        **{
+            "progress_interval": 1000,
+            "playing": False,
+            "muted": False,
+            "light": False,
+            "play_inline": True,
+            "playback_rate": 1,
+            "height": 40,
+            "config": {"start": 0, "forceAudio": True, "forceHLS": True, "forceSafariHLS": True},
+        },
+        key=key,
+    )

app/pages/About.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import streamlit as st
+from header import header
+from footer import footer
+def body():
+    with st.columns([2, 3, 2])[1]:
+        st.markdown(
+            """
+        <center>
+        ## Welcome to Moseca, your personal web application designed to redefine your music experience.
+        <font size="3"> Whether you're a musician looking to remix your favorite songs, a karaoke
+        enthusiast, or a music lover wanting to dive deeper into your favorite tracks,
+        Moseca is for you. </font>
+        <br>
+        ### High-Quality Stem Separation
+        <center><img title="High-Quality Stem Separation" src="https://i.imgur.com/l7H8YWL.png" width="60%" ></img></center>
+        <br>
+        <font size="3"> Separate up to 6 stems including 🗣voice, 🥁drums, 🔉bass, 🎸guitar,
+        🎹piano (beta), and 🎶 others. </font>
+        <br>
+        ### Advanced AI Algorithms
+        <center><img title="Advanced AI Algorithms" src="https://i.imgur.com/I8Pvdav.png" width="60%" ></img></center>
+        <br>
+        <font size="3"> Moseca utilizes state-of-the-art AI technology to extract voice or music from
+        your original songs accurately. </font>
+        <br>
+        ### Karaoke Fun
+        <center><img title="Karaoke Fun" src="https://i.imgur.com/nsn3JGV.png" width="60%" ></img></center>
+        <br>
+        <font size="3"> Engage with your favorite tunes in a whole new way! </font>
+        <font size="3"> Moseca offers an immersive online karaoke experience, allowing you to search
+        for any song on YouTube and remove the vocals online. </font>
+        <font size="3"> Enjoy singing along with high-quality instrumentals at the comfort of your home.
+        </font>
+        <br>
+        ### Easy Deployment
+        <font size="3"> With Moseca, you can deploy your personal Moseca app in the
+        <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true">
+        <img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue"
+        alt="Hugging Face Spaces"></a> or locally with </font>
+        [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)
+        <font size="3"> in just one click. </font>
+        <br>
+        ### Open-Source and Free
+        <font size="3"> Moseca is the free and open-source alternative to lalal.ai, splitter.ai or media.io vocal remover.
+        You can modify, distribute, and use it free of charge. I believe in the power of community
+        collaboration and encourage users to contribute to our source code, making Moseca better with
+        each update.
+        </font>
+        <br>
+        ### Support
+        - <font size="3"> Show your support by giving a star to the GitHub repository</font> [![GitHub stars](https://img.shields.io/github/stars/fabiogra/moseca.svg?style=social&label=Star&maxAge=2592000)](https://github.com/fabiogra/moseca).
+        - <font size="3"> If you have found an issue or have a suggestion to improve Moseca, you can open an</font> [![GitHub issues](https://img.shields.io/github/issues/fabiogra/moseca.svg)](https://github.com/fabiogra/moseca/issues/new)
+        - <font size="3"> Enjoy Moseca?</font> [![Buymeacoffee](https://img.shields.io/badge/Buy%20me%20a%20coffee--yellow.svg?logo=buy-me-a-coffee&logoColor=orange&style=social)](https://www.buymeacoffee.com/fabiogra)
+        ------
+        ## FAQs
+        ### What is Moseca?
+        <font size="3"> Moseca is an open-source web app that utilizes advanced AI technology to separate vocals and
+        instrumentals from music tracks. It also provides an online karaoke experience by allowing you
+        to search for any song on YouTube and remove the vocals.</font>
+        ### Are there any limitations?
+        <font size="3">Yes, in this environment there are some limitations regarding lenght processing
+        and CPU usage to allow a smooth experience for all users.
+        <b>If you want to <u>remove these limitations</u> you can deploy a Moseca app in your personal
+        environment like in the <a href="https://huggingface.co/spaces/fabiogra/moseca?duplicate=true"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces"></a> or locally with [![Docker Call](https://img.shields.io/badge/-Docker%20Image-blue?logo=docker&labelColor=white)](https://huggingface.co/spaces/fabiogra/moseca/discussions?docker=true)</b>
+        </font>
+        ### How does Moseca work?
+        <font size="3"> Moseca utilizes the Hybrid Spectrogram and Waveform Source Separation ([DEMUCS](https://github.com/facebookresearch/demucs)) model from Facebook. For fast karaoke vocal removal, Moseca uses the AI vocal remover developed by [tsurumeso](https://github.com/tsurumeso/vocal-remover).
+        </font>
+        ### How do I use Moseca?
+        <font size="3">1. Upload your file: choose your song and upload it to Moseca. It supports
+        a wide range of music formats for your convenience.</font>
+        <font size="3">2. Choose separation mode: opt for voice only, 4-stem or 6-stem separation
+        depending on your requirement.</font>
+        <font size="3">3. Let AI do its magic: Moseca’s advanced AI will work to separate vocals
+        from music in a matter of minutes, giving you high-quality, separated audio tracks.</font>
+        <font size="3">4. Download and enjoy: preview and download your separated audio tracks.
+        Now you can enjoy them anytime, anywhere! </font>
+        </font>
+        ### Where can I find the code for Moseca?
+        <font size="3">The code for Moseca is readily available on
+        [GitHub](https://github.com/fabiogra/moseca) and
+        [Hugging Face](https://huggingface.co/spaces/fabiogra/moseca).
+        </font>
+        ### How can I get in touch with you?
+        <font size="3">For any questions or feedback, feel free to contact me on </font>
+        [![Twitter](https://badgen.net/badge/icon/twitter?icon=twitter&label)](https://twitter.com/grsFabio)
+        <font size="3">or</font> [LinkedIn](https://www.linkedin.com/in/fabio-grasso/en).
+        ------
+        ## Disclaimer
+        <font size="3">Moseca is designed to separate vocals and instruments from copyrighted music for
+        legally permissible purposes, such as learning, practicing, research, or other non-commercial
+        activities that fall within the scope of fair use or exceptions to copyright. As a user, you are
+        responsible for ensuring that your use of separated audio tracks complies with the legal
+        requirements in your jurisdiction.
+        </font>
+        </center>
+        """,
+            unsafe_allow_html=True,
+        )
+if __name__ == "__main__":
+    header(logo_and_title=False)
+    body()
+    footer()

app/pages/Karaoke.py ADDED Viewed

	@@ -0,0 +1,176 @@

+from pathlib import Path
+import streamlit as st
+from streamlit_player import st_player
+from streamlit_searchbox import st_searchbox
+from service.youtube import (
+    get_youtube_url,
+    search_youtube,
+    download_audio_from_youtube,
+)
+from helpers import (
+    get_random_song,
+    load_audio_segment,
+    streamlit_player,
+    local_audio,
+)
+from service.vocal_remover.runner import separate, load_model
+from footer import footer
+from header import header
+out_path = Path("/tmp")
+in_path = Path("/tmp")
+sess = st.session_state
+def show_karaoke(pathname, initial_player):
+    cols = st.columns([1, 1, 3, 1])
+    with cols[1]:
+        sess.delay = st.slider(
+            label="Start delay in karaoke (seconds)",
+            key="delay_slider",
+            value=2,
+            min_value=0,
+            max_value=5,
+            help="Synchronize youtube player with karaoke audio by adding a delay to the youtube player.",
+        )
+    with cols[2]:
+        events = st_player(
+            local_audio(pathname),
+            **{
+                "progress_interval": 1000,
+                "playing": False,
+                "muted": False,
+                "light": False,
+                "play_inline": True,
+                "playback_rate": 1,
+                "height": 40,
+                "config": {
+                    "start": 0,
+                    "forceAudio": True,
+                },
+                "events": ["onProgress", "onPlay"],
+            },
+            key="karaoke_player",
+        )
+    st.markdown(
+        "<center>⬆️ Click on the play button to start karaoke<center>",
+        unsafe_allow_html=True,
+    )
+    with st.columns([1, 4, 1])[1]:
+        if events.name == "onProgress" and events.data["playedSeconds"] > 0:
+            initial_player.empty()
+            st_player(
+                sess.url + f"&t={sess.delay}s",
+                **{
+                    "progress_interval": 1000,
+                    "playing": True,
+                    "muted": True,
+                    "light": False,
+                    "play_inline": False,
+                    "playback_rate": 1,
+                    "height": 250,
+                    "events": None,
+                },
+                key="yt_muted_player",
+            )
+def body():
+    st.markdown("<center>Search for a song on YouTube<center>", unsafe_allow_html=True)
+    yt_cols = st.columns([1, 3, 2, 1])
+    with yt_cols[1]:
+        selected_value = st_searchbox(
+            search_youtube,
+            label=None,
+            placeholder="Search by name...",
+            clear_on_submit=True,
+            key="yt_searchbox",
+        )
+        if selected_value is not None and selected_value in sess.video_options:
+            sess.random_song = None
+            if selected_value != sess.selected_value:  # New song selected
+                sess.executed = False
+            sess.selected_value = selected_value
+            sess.url = get_youtube_url(selected_value)
+    with yt_cols[2]:
+        if st.button("🎲 Random song", use_container_width=True):
+            sess.last_dir, sess.url = get_random_song()
+            sess.random_song = True
+            sess.video_options = []
+            sess.executed = False
+    if sess.url is not None:
+        player_cols = st.columns([2, 2, 1, 1], gap="medium")
+        with player_cols[1]:
+            player = st.empty()
+            streamlit_player(
+                player,
+                sess.url,
+                height=200,
+                is_active=False,
+                muted=False,
+                start=0,
+                key="yt_player",
+                events=["onProgress"],
+            )
+            # Separate vocals
+            cols_before_sep = st.columns([2, 4, 2])
+            with cols_before_sep[1]:
+                execute_button = st.empty()
+                execute = execute_button.button(
+                    "Confirm and remove vocals 🎤 🎶",
+                    type="primary",
+                    use_container_width=True,
+                )
+            if execute or sess.executed:
+                execute_button.empty()
+                player.empty()
+                if execute:
+                    sess.executed = False
+                if sess.random_song is None:
+                    if not sess.executed:
+                        cols_spinners = st.columns([1, 2, 1])
+                        with cols_spinners[1]:
+                            with st.spinner(
+                                "Separating vocals from music, it will take a while..."
+                            ):
+                                sess.filename = download_audio_from_youtube(sess.url, in_path)
+                                if sess.filename is None:
+                                    st.stop()
+                                sess.url = None
+                                filename = sess.filename
+                                song = load_audio_segment(
+                                    in_path / filename, filename.split(".")[-1]
+                                )
+                                song.export(in_path / filename, format=filename.split(".")[-1])
+                                model, device = load_model(pretrained_model="baseline.pth")
+                                separate(
+                                    input=in_path / filename,
+                                    model=model,
+                                    device=device,
+                                    output_dir=out_path,
+                                    only_no_vocals=True,
+                                )
+                                selected_value = None
+                                sess.last_dir = ".".join(sess.filename.split(".")[:-1])
+                                sess.executed = True
+                else:
+                    sess.executed = True
+        if sess.executed:
+            show_karaoke(out_path / "vocal_remover" / sess.last_dir / "no_vocals.mp3", player)
+if __name__ == "__main__":
+    header()
+    body()
+    footer()

app/pages/Separate.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import os
+from pathlib import Path
+import streamlit as st
+from streamlit_option_menu import option_menu
+from service.demucs_runner import separator
+from helpers import (
+    load_audio_segment,
+    plot_audio,
+    st_local_audio,
+    url_is_valid,
+)
+from service.vocal_remover.runner import separate, load_model
+from footer import footer
+from header import header
+label_sources = {
+    "no_vocals.mp3": "🎶 Instrumental",
+    "vocals.mp3": "🎤 Vocals",
+    "drums.mp3": "🥁 Drums",
+    "bass.mp3": "🎸 Bass",
+    "guitar.mp3": "🎸 Guitar",
+    "piano.mp3": "🎹 Piano",
+    "other.mp3": "🎶 Other",
+}
+extensions = ["mp3", "wav", "ogg", "flac"]
+out_path = Path("/tmp")
+in_path = Path("/tmp")
+def reset_execution():
+    st.session_state.executed = False
+def body():
+    filename = None
+    cols = st.columns([1, 3, 2, 1])
+    with cols[1]:
+        with st.columns([1, 5, 1])[1]:
+            option = option_menu(
+                menu_title=None,
+                options=["Upload File", "From URL"],
+                icons=["cloud-upload-fill", "link-45deg"],
+                orientation="horizontal",
+                styles={"container": {"width": "100%", "margin": "0px", "padding": "0px"}},
+                key="option_separate",
+            )
+        if option == "Upload File":
+            uploaded_file = st.file_uploader(
+                "Choose a file",
+                type=extensions,
+                key="file",
+                help="Supported formats: mp3, wav, ogg, flac.",
+            )
+            if uploaded_file is not None:
+                with open(in_path / uploaded_file.name, "wb") as f:
+                    f.write(uploaded_file.getbuffer())
+                filename = uploaded_file.name
+                st_local_audio(in_path / filename, key="input_upload_file")
+        elif option == "From URL":  # TODO: show examples
+            url = st.text_input(
+                "Paste the URL of the audio file",
+                key="url_input",
+                help="Supported formats: mp3, wav, ogg, flac.",
+            )
+            if url != "":
+                if url_is_valid(url):
+                    with st.spinner("Downloading audio..."):
+                        filename = url.split("/")[-1]
+                        os.system(f"wget -O {in_path / filename} {url}")
+                st_local_audio(in_path / filename, key="input_from_url")
+    with cols[2]:
+        separation_mode = st.selectbox(
+            "Choose the separation mode",
+            [
+                "Vocals & Instrumental (Faster)",
+                "Vocals & Instrumental (High Quality, Slower)",
+                "Vocals, Drums, Bass & Other (Slower)",
+                "Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)",
+            ],
+            on_change=reset_execution(),
+            key="separation_mode",
+        )
+        if separation_mode == "Vocals & Instrumental (Faster)":
+            max_duration = 30
+        else:
+            max_duration = 15
+    if filename is not None:
+        song = load_audio_segment(in_path / filename, filename.split(".")[-1])
+        n_secs = round(len(song) / 1000)
+        if os.environ.get("ENV_LIMITATION", False):
+            with cols[2]:
+                start_time = st.number_input(
+                    "Choose the start time",
+                    min_value=0,
+                    max_value=n_secs,
+                    step=1,
+                    value=0,
+                    help=f"Maximum duration is {max_duration} seconds for this separation mode. Duplicate this space to remove any limit.",
+                    format="%d",
+                )
+                st.session_state.start_time = start_time
+                end_time = min(start_time + max_duration, n_secs)
+                song = song[start_time * 1000 : end_time * 1000]
+                st.info(
+                    f"Audio source will be processed from {start_time} to {end_time} seconds. Duplicate this space to remove any limit.",
+                    icon="⏱",
+                )
+        else:
+            start_time = 0
+            end_time = n_secs
+        with st.columns([1, 3, 1])[1]:
+            execute = st.button("Split Music 🎶", type="primary", use_container_width=True)
+            if execute or st.session_state.executed:
+                if execute:
+                    st.session_state.executed = False
+                if not st.session_state.executed:
+                    song.export(in_path / filename, format=filename.split(".")[-1])
+                    with st.spinner("Separating source audio, it will take a while..."):
+                        if separation_mode == "Vocals & Instrumental (Faster)":
+                            model_name = "vocal_remover"
+                            model, device = load_model(pretrained_model="baseline.pth")
+                            separate(
+                                input=in_path / filename,
+                                model=model,
+                                device=device,
+                                output_dir=out_path,
+                            )
+                        else:
+                            stem = None
+                            model_name = "htdemucs"
+                            if (
+                                separation_mode
+                                == "Vocal, Drums, Bass, Guitar, Piano & Other (Slowest)"
+                            ):
+                                model_name = "htdemucs_6s"
+                            elif separation_mode == "Vocals & Instrumental (High Quality, Slower)":
+                                stem = "vocals"
+                            separator(
+                                tracks=[in_path / filename],
+                                out=out_path,
+                                model=model_name,
+                                shifts=1,
+                                overlap=0.5,
+                                stem=stem,
+                                int24=False,
+                                float32=False,
+                                clip_mode="rescale",
+                                mp3=True,
+                                mp3_bitrate=320,
+                                verbose=True,
+                                start_time=start_time,
+                                end_time=end_time,
+                            )
+                last_dir = ".".join(filename.split(".")[:-1])
+                filename = None
+                st.session_state.executed = True
+                def get_sources(path):
+                    sources = {}
+                    for file in [
+                        "no_vocals.mp3",
+                        "vocals.mp3",
+                        "drums.mp3",
+                        "bass.mp3",
+                        "guitar.mp3",
+                        "piano.mp3",
+                        "other.mp3",
+                    ]:
+                        fullpath = path / file
+                        if fullpath.exists():
+                            sources[file] = fullpath
+                    return sources
+                sources = get_sources(out_path / Path(model_name) / last_dir)
+                tab_sources = st.tabs([f"**{label_sources.get(k)}**" for k in sources.keys()])
+                for i, (file, pathname) in enumerate(sources.items()):
+                    with tab_sources[i]:
+                        cols = st.columns(2)
+                        with cols[0]:
+                            auseg = load_audio_segment(pathname, "mp3")
+                            st.image(
+                                plot_audio(auseg, title="", file=file),
+                                use_column_width="always",
+                            )
+                        with cols[1]:
+                            st_local_audio(pathname, key=f"output_{file}")
+if __name__ == "__main__":
+    header()
+    body()
+    footer()

app/service/__init__.py ADDED Viewed

File without changes

app/service/demucs_runner.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import argparse
+import sys
+from pathlib import Path
+from typing import List
+import os
+from dora.log import fatal
+import torch as th
+from demucs.apply import apply_model, BagOfModels
+from demucs.audio import save_audio
+from demucs.pretrained import get_model_from_args, ModelLoadingError
+from demucs.separate import load_track
+import streamlit as st
+@st.cache_data(show_spinner=False)
+def separator(
+    tracks: List[Path],
+    out: Path,
+    model: str,
+    shifts: int,
+    overlap: float,
+    stem: str,
+    int24: bool,
+    float32: bool,
+    clip_mode: str,
+    mp3: bool,
+    mp3_bitrate: int,
+    verbose: bool,
+    *args,
+    **kwargs,
+):
+    """Separate the sources for the given tracks
+    Args:
+        tracks (Path): Path to tracks
+        out (Path): Folder where to put extracted tracks. A subfolder with the model name will be
+                    created.
+        model (str): Model name
+        shifts (int): Number of random shifts for equivariant stabilization.
+                      Increase separation time but improves quality for Demucs.
+                      10 was used in the original paper.
+        overlap (float): Overlap
+        stem (str): Only separate audio into {STEM} and no_{STEM}.
+        int24 (bool): Save wav output as 24 bits wav.
+        float32 (bool): Save wav output as float32 (2x bigger).
+        clip_mode (str): Strategy for avoiding clipping: rescaling entire signal if necessary
+                        (rescale) or hard clipping (clamp).
+        mp3 (bool): Convert the output wavs to mp3.
+        mp3_bitrate (int): Bitrate of converted mp3.
+        verbose (bool): Verbose
+    """
+    if os.environ.get("LIMIT_CPU", False):
+        th.set_num_threads(1)
+        jobs = 1
+    else:
+        # Number of jobs. This can increase memory usage but will be much faster when
+        # multiple cores are available.
+        jobs = os.cpu_count()
+    if th.cuda.is_available():
+        device = "cuda"
+    else:
+        device = "cpu"
+    args = argparse.Namespace()
+    args.tracks = tracks
+    args.out = out
+    args.model = model
+    args.device = device
+    args.shifts = shifts
+    args.overlap = overlap
+    args.stem = stem
+    args.int24 = int24
+    args.float32 = float32
+    args.clip_mode = clip_mode
+    args.mp3 = mp3
+    args.mp3_bitrate = mp3_bitrate
+    args.jobs = jobs
+    args.verbose = verbose
+    args.filename = "{track}/{stem}.{ext}"
+    args.split = True
+    args.segment = None
+    args.name = model
+    args.repo = None
+    try:
+        model = get_model_from_args(args)
+    except ModelLoadingError as error:
+        fatal(error.args[0])
+    if args.segment is not None and args.segment < 8:
+        fatal("Segment must greater than 8. ")
+    if ".." in args.filename.replace("\\", "/").split("/"):
+        fatal('".." must not appear in filename. ')
+    if isinstance(model, BagOfModels):
+        print(
+            f"Selected model is a bag of {len(model.models)} models. "
+            "You will see that many progress bars per track."
+        )
+        if args.segment is not None:
+            for sub in model.models:
+                sub.segment = args.segment
+    else:
+        if args.segment is not None:
+            model.segment = args.segment
+    model.cpu()
+    model.eval()
+    if args.stem is not None and args.stem not in model.sources:
+        fatal(
+            'error: stem "{stem}" is not in selected model. STEM must be one of {sources}.'.format(
+                stem=args.stem, sources=", ".join(model.sources)
+            )
+        )
+    out = args.out / args.name
+    out.mkdir(parents=True, exist_ok=True)
+    print(f"Separated tracks will be stored in {out.resolve()}")
+    for track in args.tracks:
+        if not track.exists():
+            print(
+                f"File {track} does not exist. If the path contains spaces, "
+                'please try again after surrounding the entire path with quotes "".',
+                file=sys.stderr,
+            )
+            continue
+        print(f"Separating track {track}")
+        wav = load_track(track, model.audio_channels, model.samplerate)
+        ref = wav.mean(0)
+        wav = (wav - ref.mean()) / ref.std()
+        sources = apply_model(
+            model,
+            wav[None],
+            device=args.device,
+            shifts=args.shifts,
+            split=args.split,
+            overlap=args.overlap,
+            progress=True,
+            num_workers=args.jobs,
+        )[0]
+        sources = sources * ref.std() + ref.mean()
+        if args.mp3:
+            ext = "mp3"
+        else:
+            ext = "wav"
+        kwargs = {
+            "samplerate": model.samplerate,
+            "bitrate": args.mp3_bitrate,
+            "clip": args.clip_mode,
+            "as_float": args.float32,
+            "bits_per_sample": 24 if args.int24 else 16,
+        }
+        if args.stem is None:
+            for source, name in zip(sources, model.sources):
+                stem = out / args.filename.format(
+                    track=track.name.rsplit(".", 1)[0],
+                    trackext=track.name.rsplit(".", 1)[-1],
+                    stem=name,
+                    ext=ext,
+                )
+                stem.parent.mkdir(parents=True, exist_ok=True)
+                save_audio(source, str(stem), **kwargs)
+        else:
+            sources = list(sources)
+            stem = out / args.filename.format(
+                track=track.name.rsplit(".", 1)[0],
+                trackext=track.name.rsplit(".", 1)[-1],
+                stem=args.stem,
+                ext=ext,
+            )
+            stem.parent.mkdir(parents=True, exist_ok=True)
+            save_audio(sources.pop(model.sources.index(args.stem)), str(stem), **kwargs)
+            # Warning : after poping the stem, selected stem is no longer in the list 'sources'
+            other_stem = th.zeros_like(sources[0])
+            for i in sources:
+                other_stem += i
+            stem = out / args.filename.format(
+                track=track.name.rsplit(".", 1)[0],
+                trackext=track.name.rsplit(".", 1)[-1],
+                stem="no_" + args.stem,
+                ext=ext,
+            )
+            stem.parent.mkdir(parents=True, exist_ok=True)
+            save_audio(other_stem, str(stem), **kwargs)

app/service/vocal_remover/__init__.py ADDED Viewed

File without changes

app/service/vocal_remover/layers.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+def crop_center(h1, h2):
+    h1_shape = h1.size()
+    h2_shape = h2.size()
+    if h1_shape[3] == h2_shape[3]:
+        return h1
+    elif h1_shape[3] < h2_shape[3]:
+        raise ValueError("h1_shape[3] must be greater than h2_shape[3]")
+    s_time = (h1_shape[3] - h2_shape[3]) // 2
+    e_time = s_time + h2_shape[3]
+    h1 = h1[:, :, :, s_time:e_time]
+    return h1
+class Conv2DBNActiv(nn.Module):
+    def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
+        super(Conv2DBNActiv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(
+                nin,
+                nout,
+                kernel_size=ksize,
+                stride=stride,
+                padding=pad,
+                dilation=dilation,
+                bias=False,
+            ),
+            nn.BatchNorm2d(nout),
+            activ(),
+        )
+    def __call__(self, x):
+        return self.conv(x)
+class Encoder(nn.Module):
+    def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
+        super(Encoder, self).__init__()
+        self.conv1 = Conv2DBNActiv(nin, nout, ksize, stride, pad, activ=activ)
+        self.conv2 = Conv2DBNActiv(nout, nout, ksize, 1, pad, activ=activ)
+    def __call__(self, x):
+        h = self.conv1(x)
+        h = self.conv2(h)
+        return h
+class Decoder(nn.Module):
+    def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
+        super(Decoder, self).__init__()
+        self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
+        self.dropout = nn.Dropout2d(0.1) if dropout else None
+    def __call__(self, x, skip=None):
+        x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=True)
+        if skip is not None:
+            skip = crop_center(skip, x)
+            x = torch.cat([x, skip], dim=1)
+        h = self.conv1(x)
+        # h = self.conv2(h)
+        if self.dropout is not None:
+            h = self.dropout(h)
+        return h
+class ASPPModule(nn.Module):
+    def __init__(self, nin, nout, dilations=(4, 8, 12), activ=nn.ReLU, dropout=False):
+        super(ASPPModule, self).__init__()
+        self.conv1 = nn.Sequential(
+            nn.AdaptiveAvgPool2d((1, None)),
+            Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ),
+        )
+        self.conv2 = Conv2DBNActiv(nin, nout, 1, 1, 0, activ=activ)
+        self.conv3 = Conv2DBNActiv(nin, nout, 3, 1, dilations[0], dilations[0], activ=activ)
+        self.conv4 = Conv2DBNActiv(nin, nout, 3, 1, dilations[1], dilations[1], activ=activ)
+        self.conv5 = Conv2DBNActiv(nin, nout, 3, 1, dilations[2], dilations[2], activ=activ)
+        self.bottleneck = Conv2DBNActiv(nout * 5, nout, 1, 1, 0, activ=activ)
+        self.dropout = nn.Dropout2d(0.1) if dropout else None
+    def forward(self, x):
+        _, _, h, w = x.size()
+        feat1 = F.interpolate(self.conv1(x), size=(h, w), mode="bilinear", align_corners=True)
+        feat2 = self.conv2(x)
+        feat3 = self.conv3(x)
+        feat4 = self.conv4(x)
+        feat5 = self.conv5(x)
+        out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
+        out = self.bottleneck(out)
+        if self.dropout is not None:
+            out = self.dropout(out)
+        return out
+class LSTMModule(nn.Module):
+    def __init__(self, nin_conv, nin_lstm, nout_lstm):
+        super(LSTMModule, self).__init__()
+        self.conv = Conv2DBNActiv(nin_conv, 1, 1, 1, 0)
+        self.lstm = nn.LSTM(input_size=nin_lstm, hidden_size=nout_lstm // 2, bidirectional=True)
+        self.dense = nn.Sequential(
+            nn.Linear(nout_lstm, nin_lstm), nn.BatchNorm1d(nin_lstm), nn.ReLU()
+        )
+    def forward(self, x):
+        N, _, nbins, nframes = x.size()
+        h = self.conv(x)[:, 0]  # N, nbins, nframes
+        h = h.permute(2, 0, 1)  # nframes, N, nbins
+        h, _ = self.lstm(h)
+        h = self.dense(h.reshape(-1, h.size()[-1]))  # nframes * N, nbins
+        h = h.reshape(nframes, N, 1, nbins)
+        h = h.permute(1, 2, 3, 0)
+        return h

app/service/vocal_remover/nets.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from app.service.vocal_remover import layers
+class BaseNet(nn.Module):
+    def __init__(self, nin, nout, nin_lstm, nout_lstm, dilations=((4, 2), (8, 4), (12, 6))):
+        super(BaseNet, self).__init__()
+        self.enc1 = layers.Conv2DBNActiv(nin, nout, 3, 1, 1)
+        self.enc2 = layers.Encoder(nout, nout * 2, 3, 2, 1)
+        self.enc3 = layers.Encoder(nout * 2, nout * 4, 3, 2, 1)
+        self.enc4 = layers.Encoder(nout * 4, nout * 6, 3, 2, 1)
+        self.enc5 = layers.Encoder(nout * 6, nout * 8, 3, 2, 1)
+        self.aspp = layers.ASPPModule(nout * 8, nout * 8, dilations, dropout=True)
+        self.dec4 = layers.Decoder(nout * (6 + 8), nout * 6, 3, 1, 1)
+        self.dec3 = layers.Decoder(nout * (4 + 6), nout * 4, 3, 1, 1)
+        self.dec2 = layers.Decoder(nout * (2 + 4), nout * 2, 3, 1, 1)
+        self.lstm_dec2 = layers.LSTMModule(nout * 2, nin_lstm, nout_lstm)
+        self.dec1 = layers.Decoder(nout * (1 + 2) + 1, nout * 1, 3, 1, 1)
+    def __call__(self, x):
+        e1 = self.enc1(x)
+        e2 = self.enc2(e1)
+        e3 = self.enc3(e2)
+        e4 = self.enc4(e3)
+        e5 = self.enc5(e4)
+        h = self.aspp(e5)
+        h = self.dec4(h, e4)
+        h = self.dec3(h, e3)
+        h = self.dec2(h, e2)
+        h = torch.cat([h, self.lstm_dec2(h)], dim=1)
+        h = self.dec1(h, e1)
+        return h
+class CascadedNet(nn.Module):
+    def __init__(self, n_fft, nout=32, nout_lstm=128):
+        super(CascadedNet, self).__init__()
+        self.max_bin = n_fft // 2
+        self.output_bin = n_fft // 2 + 1
+        self.nin_lstm = self.max_bin // 2
+        self.offset = 64
+        self.stg1_low_band_net = nn.Sequential(
+            BaseNet(2, nout // 2, self.nin_lstm // 2, nout_lstm),
+            layers.Conv2DBNActiv(nout // 2, nout // 4, 1, 1, 0),
+        )
+        self.stg1_high_band_net = BaseNet(2, nout // 4, self.nin_lstm // 2, nout_lstm // 2)
+        self.stg2_low_band_net = nn.Sequential(
+            BaseNet(nout // 4 + 2, nout, self.nin_lstm // 2, nout_lstm),
+            layers.Conv2DBNActiv(nout, nout // 2, 1, 1, 0),
+        )
+        self.stg2_high_band_net = BaseNet(
+            nout // 4 + 2, nout // 2, self.nin_lstm // 2, nout_lstm // 2
+        )
+        self.stg3_full_band_net = BaseNet(3 * nout // 4 + 2, nout, self.nin_lstm, nout_lstm)
+        self.out = nn.Conv2d(nout, 2, 1, bias=False)
+        self.aux_out = nn.Conv2d(3 * nout // 4, 2, 1, bias=False)
+    def forward(self, x):
+        x = x[:, :, : self.max_bin]
+        bandw = x.size()[2] // 2
+        l1_in = x[:, :, :bandw]
+        h1_in = x[:, :, bandw:]
+        l1 = self.stg1_low_band_net(l1_in)
+        h1 = self.stg1_high_band_net(h1_in)
+        aux1 = torch.cat([l1, h1], dim=2)
+        l2_in = torch.cat([l1_in, l1], dim=1)
+        h2_in = torch.cat([h1_in, h1], dim=1)
+        l2 = self.stg2_low_band_net(l2_in)
+        h2 = self.stg2_high_band_net(h2_in)
+        aux2 = torch.cat([l2, h2], dim=2)
+        f3_in = torch.cat([x, aux1, aux2], dim=1)
+        f3 = self.stg3_full_band_net(f3_in)
+        mask = torch.sigmoid(self.out(f3))
+        mask = F.pad(
+            input=mask,
+            pad=(0, 0, 0, self.output_bin - mask.size()[2]),
+            mode="replicate",
+        )
+        if self.training:
+            aux = torch.cat([aux1, aux2], dim=1)
+            aux = torch.sigmoid(self.aux_out(aux))
+            aux = F.pad(
+                input=aux,
+                pad=(0, 0, 0, self.output_bin - aux.size()[2]),
+                mode="replicate",
+            )
+            return mask, aux
+        else:
+            return mask
+    def predict_mask(self, x):
+        mask = self.forward(x)
+        if self.offset > 0:
+            mask = mask[:, :, :, self.offset : -self.offset]
+            assert mask.size()[3] > 0
+        return mask
+    def predict(self, x):
+        mask = self.forward(x)
+        pred_mag = x * mask
+        if self.offset > 0:
+            pred_mag = pred_mag[:, :, :, self.offset : -self.offset]
+            assert pred_mag.size()[3] > 0
+        return pred_mag

app/service/vocal_remover/runner.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import os
+import logging
+import librosa
+import numpy as np
+import soundfile as sf
+import torch
+from stqdm import stqdm
+import streamlit as st
+from pydub import AudioSegment
+from app.service.vocal_remover import nets
+if os.environ.get("LIMIT_CPU", False):
+    torch.set_num_threads(1)
+def merge_artifacts(y_mask, thres=0.05, min_range=64, fade_size=32):
+    if min_range < fade_size * 2:
+        raise ValueError("min_range must be >= fade_size * 2")
+    idx = np.where(y_mask.min(axis=(0, 1)) > thres)[0]
+    start_idx = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
+    end_idx = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
+    artifact_idx = np.where(end_idx - start_idx > min_range)[0]
+    weight = np.zeros_like(y_mask)
+    if len(artifact_idx) > 0:
+        start_idx = start_idx[artifact_idx]
+        end_idx = end_idx[artifact_idx]
+        old_e = None
+        for s, e in zip(start_idx, end_idx):
+            if old_e is not None and s - old_e < fade_size:
+                s = old_e - fade_size * 2
+            if s != 0:
+                weight[:, :, s : s + fade_size] = np.linspace(0, 1, fade_size)
+            else:
+                s -= fade_size
+            if e != y_mask.shape[2]:
+                weight[:, :, e - fade_size : e] = np.linspace(1, 0, fade_size)
+            else:
+                e += fade_size
+            weight[:, :, s + fade_size : e - fade_size] = 1
+            old_e = e
+    v_mask = 1 - y_mask
+    y_mask += weight * v_mask
+    return y_mask
+def make_padding(width, cropsize, offset):
+    left = offset
+    roi_size = cropsize - offset * 2
+    if roi_size == 0:
+        roi_size = cropsize
+    right = roi_size - (width % roi_size) + left
+    return left, right, roi_size
+def wave_to_spectrogram(wave, hop_length, n_fft):
+    wave_left = np.asfortranarray(wave[0])
+    wave_right = np.asfortranarray(wave[1])
+    spec_left = librosa.stft(wave_left, n_fft=n_fft, hop_length=hop_length)
+    spec_right = librosa.stft(wave_right, n_fft=n_fft, hop_length=hop_length)
+    spec = np.asfortranarray([spec_left, spec_right])
+    return spec
+def spectrogram_to_wave(spec, hop_length=1024):
+    if spec.ndim == 2:
+        wave = librosa.istft(spec, hop_length=hop_length)
+    elif spec.ndim == 3:
+        spec_left = np.asfortranarray(spec[0])
+        spec_right = np.asfortranarray(spec[1])
+        wave_left = librosa.istft(spec_left, hop_length=hop_length)
+        wave_right = librosa.istft(spec_right, hop_length=hop_length)
+        wave = np.asfortranarray([wave_left, wave_right])
+    return wave
+class Separator(object):
+    def __init__(self, model, device, batchsize, cropsize, postprocess=False, progress_bar=None):
+        self.model = model
+        self.offset = model.offset
+        self.device = device
+        self.batchsize = batchsize
+        self.cropsize = cropsize
+        self.postprocess = postprocess
+        self.progress_bar = progress_bar
+    def _separate(self, X_mag_pad, roi_size):
+        X_dataset = []
+        patches = (X_mag_pad.shape[2] - 2 * self.offset) // roi_size
+        for i in range(patches):
+            start = i * roi_size
+            X_mag_crop = X_mag_pad[:, :, start : start + self.cropsize]
+            X_dataset.append(X_mag_crop)
+        X_dataset = np.asarray(X_dataset)
+        self.model.eval()
+        with torch.no_grad():
+            mask = []
+            # To reduce the overhead, dataloader is not used.
+            for i in stqdm(
+                range(0, patches, self.batchsize),
+                st_container=self.progress_bar,
+                gui=False,
+            ):
+                X_batch = X_dataset[i : i + self.batchsize]
+                X_batch = torch.from_numpy(X_batch).to(self.device)
+                pred = self.model.predict_mask(X_batch)
+                pred = pred.detach().cpu().numpy()
+                pred = np.concatenate(pred, axis=2)
+                mask.append(pred)
+            mask = np.concatenate(mask, axis=2)
+        return mask
+    def _preprocess(self, X_spec):
+        X_mag = np.abs(X_spec)
+        X_phase = np.angle(X_spec)
+        return X_mag, X_phase
+    def _postprocess(self, mask, X_mag, X_phase):
+        if self.postprocess:
+            mask = merge_artifacts(mask)
+        y_spec = mask * X_mag * np.exp(1.0j * X_phase)
+        v_spec = (1 - mask) * X_mag * np.exp(1.0j * X_phase)
+        return y_spec, v_spec
+    def separate(self, X_spec):
+        X_mag, X_phase = self._preprocess(X_spec)
+        n_frame = X_mag.shape[2]
+        pad_l, pad_r, roi_size = make_padding(n_frame, self.cropsize, self.offset)
+        X_mag_pad = np.pad(X_mag, ((0, 0), (0, 0), (pad_l, pad_r)), mode="constant")
+        X_mag_pad /= X_mag_pad.max()
+        mask = self._separate(X_mag_pad, roi_size)
+        mask = mask[:, :, :n_frame]
+        y_spec, v_spec = self._postprocess(mask, X_mag, X_phase)
+        return y_spec, v_spec
+@st.cache_resource(show_spinner=False)
+def load_model(pretrained_model, n_fft=2048):
+    model = nets.CascadedNet(n_fft, 32, 128)
+    if torch.cuda.is_available():
+        device = torch.device("cuda:0")
+        model.to(device)
+    # elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
+    #     device = torch.device("mps")
+    #     model.to(device)
+    else:
+        device = torch.device("cpu")
+    model.load_state_dict(torch.load(pretrained_model, map_location=device))
+    return model, device
+# @st.cache_data(show_spinner=False)
+def separate(
+    input,
+    model,
+    device,
+    output_dir,
+    batchsize=4,
+    cropsize=256,
+    postprocess=False,
+    hop_length=1024,
+    n_fft=2048,
+    sr=44100,
+    progress_bar=None,
+    only_no_vocals=False,
+):
+    X, sr = librosa.load(input, sr=sr, mono=False, dtype=np.float32, res_type="kaiser_fast")
+    basename = os.path.splitext(os.path.basename(input))[0]
+    if X.ndim == 1:
+        # mono to stereo
+        X = np.asarray([X, X])
+    X_spec = wave_to_spectrogram(X, hop_length, n_fft)
+    with torch.no_grad():
+        sp = Separator(model, device, batchsize, cropsize, postprocess, progress_bar=progress_bar)
+        y_spec, v_spec = sp.separate(X_spec)
+    base_dir = f"{output_dir}/vocal_remover/{basename}"
+    os.makedirs(base_dir, exist_ok=True)
+    wave = spectrogram_to_wave(y_spec, hop_length=hop_length)
+    try:
+        sf.write(f"{base_dir}/no_vocals.mp3", wave.T, sr)
+    except Exception:
+        logging.error("Failed to write no_vocals.mp3, trying pydub...")
+        pydub_write(wave, f"{base_dir}/no_vocals.mp3", sr)
+    if only_no_vocals:
+        return
+    wave = spectrogram_to_wave(v_spec, hop_length=hop_length)
+    try:
+        sf.write(f"{base_dir}/vocals.mp3", wave.T, sr)
+    except Exception:
+        logging.error("Failed to write vocals.mp3, trying pydub...")
+        pydub_write(wave, f"{base_dir}/vocals.mp3", sr)
+def pydub_write(wave, output_path, frame_rate, audio_format="mp3"):
+    # Ensure the wave data is in the right format for pydub (mono and 16-bit depth)
+    wave_16bit = (wave * 32767).astype(np.int16)
+    audio_segment = AudioSegment(
+        wave_16bit.tobytes(),
+        frame_rate=frame_rate,
+        sample_width=wave_16bit.dtype.itemsize,
+        channels=1,
+    )
+    audio_segment.export(output_path, format=audio_format)

app/service/youtube.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+from typing import List
+import yt_dlp
+import string
+import time
+import re
+import streamlit as st
+from pytube import Search
+def _sanitize_filename(filename):
+    safe_chars = "-_.() %s%s" % (
+        re.escape(string.ascii_letters),
+        re.escape(string.digits),
+    )
+    safe_filename = re.sub(f"[^{safe_chars}]", "_", filename)
+    return safe_filename.strip()
+@st.cache_data(show_spinner=False)
+def download_audio_from_youtube(url, output_path):
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    with yt_dlp.YoutubeDL() as ydl:
+        info_dict = ydl.extract_info(url, download=False)
+    if info_dict.get("duration") > 360:
+        st.error("Song is too long. Please use a song no longer than 6 minutes.")
+        return
+    video_title = info_dict.get("title", None)
+    video_title = _sanitize_filename(video_title)
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "mp3",
+                "preferredquality": "192",
+            }
+        ],
+        "outtmpl": os.path.join(output_path, video_title),
+        #'quiet': True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([url])
+    return f"{video_title}.mp3"
+@st.cache_data(show_spinner=False)
+def query_youtube(query: str) -> Search:
+    return Search(query)
+def search_youtube(query: str) -> List:
+    if len(query) > 3:
+        time.sleep(0.5)
+        search = query_youtube(query + " lyrics")
+        st.session_state.search_results = search.results
+        video_options = [video.title for video in st.session_state.search_results]
+        st.session_state.video_options = video_options
+    else:
+        video_options = []
+    return video_options
+def get_youtube_url(title: str) -> str:
+    video = st.session_state.search_results[st.session_state.video_options.index(title)]
+    return video.embed_url
+def check_if_is_youtube_url(url: str) -> bool:
+    return url.startswith("http")

app/style.py ADDED Viewed

	@@ -0,0 +1,131 @@

+_font_title = "Monoton"
+_font_subtitle = "Exo"
+CSS = (
+    """
+<!-- Add the font link from Google Fonts -->
+<link href="https://fonts.googleapis.com/css2?family="""
+    + _font_title
+    + """&display=swap" rel="stylesheet">
+<link href="https://fonts.googleapis.com/css2?family="""
+    + _font_subtitle
+    + """&display=swap" rel="stylesheet">
+<style>
+    /* Remove the streamlit header */
+    header[data-testid="stHeader"]  {
+        display: none;
+    }
+    /* Remove the sidebar menu */
+    div[data-testid="collapsedControl"]{
+        display: none;
+    }
+    /* Background */
+    .css-z5fcl4 {
+        padding: 0.5rem;
+        padding-top: 0rem;
+    }
+    /* Distances between the title and the image in mobile */
+    .css-1uifejx.e1tzin5v1 {
+        margin-bottom: 0px;
+        padding-bottom: 0px;
+    }
+    h1 {
+        padding-top: 0px;
+    }
+    /* Center the image within its container */
+    .css-1kyxreq {
+    justify-content: center;
+    }
+    /* Remove fixed width from the image container */
+    .css-1kyxreq.etr89bj2 {
+    width: 100% !important;
+    }
+    /* Center the title */
+    .css-k7vsyb {
+    text-align: center;
+    }
+    /* Hide the anchor button */
+    .css-zt5igj.e16nr0p33 a {
+        display: none;
+    }
+    /* Hide the full screen button */
+    .css-e370rw.e19lei0e1 {
+        display: none;
+    }
+    .css-6awftf.e19lei0e1 {
+        display: none;
+    }
+    /* Desktop */
+    @media (min-width: 640px) {
+        .stMarkdown {
+            max-width: 100%;
+            width: auto;
+            display: inline-block;
+        }
+        /* Dynamically add space between the image and the title */
+        .css-1kyxreq {
+            justify-content: right;
+        }
+    }
+    /* Add space after the image and the title */
+    .css-1a32fsj {
+        margin-right: 0px;
+    }
+    /* Apply the futuristic font to the text title*/
+    #moseca {
+        font-family: '"""
+    + _font_title
+    + """', sans-serif;
+        font-size: 3rem;
+        text-align: center;
+        /* Align the text to the center of the box */
+        align-items: center;
+        /* Set the line height to the same as the height of the box */
+        line-height: 3.5rem;
+        margin-bottom: -1rem;
+    }
+    /* subtitle */
+    .css-5rimss p, .css-nahz7x p {
+        font-family: """
+    + _font_subtitle
+    + """, sans-serif;
+        font-size: 0.8rem;
+        text-align: center;
+    }
+    /* Desktop */
+    @media (min-width: 640px) {
+        .css-zt5igj, .css-nahz7x p {
+            text-align: left;
+        }
+        .css-5rimss p {
+            text-align: left;
+        }
+    }
+    .st-af {
+        align-items: center;
+        padding-right: 2rem;
+    }
+    /* Remove the gap around the player */
+    .css-434r0z {
+        gap: 0rem;
+    }
+</style>
+"""
+)

img/bmc-button.png ADDED Viewed

img/image_stems.png ADDED Viewed

img/karaoke_fun.png ADDED Viewed

img/logo_moseca.png ADDED Viewed

img/state-of-art.png ADDED Viewed

pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[tool.black]
+line-length = 100
+target-version = ['py39', 'py310']
+preview_string_processing = true
+[tool.isort]
+profile = 'black'
+multi_line_output = 3
+[tool.ruff]
+line-length = 100
+ignore = ['E501']
+[tool.pytest.ini_options]
+pythonpath = [
+    "app",
+]
+testpaths = "tests"

requirements.in ADDED Viewed

	@@ -0,0 +1,16 @@

+streamlit==1.22.0
+demucs==4.0.0
+plotly==5.13.0
+pandas==1.5.3
+pydub==0.25.1
+pytube==12.1.3
+streamlit-player==0.1.5
+streamlit-searchbox==0.1.2
+yt-dlp==2023.3.4
+kaleido==0.2.1
+matplotlib==3.7.1
+librosa==0.10.0.post2
+resampy==0.4.2
+stqdm==0.0.5
+streamlit_option_menu==0.3.6
+htbuilder==0.6.1

requirements.txt ADDED Viewed

	@@ -0,0 +1,294 @@

+#
+# This file is autogenerated by pip-compile with Python 3.8
+# by the following command:
+#
+#    pip-compile --output-file=requirements.txt --resolver=backtracking requirements.in
+#
+altair==4.2.2
+    # via streamlit
+antlr4-python3-runtime==4.9.3
+    # via omegaconf
+appdirs==1.4.4
+    # via pooch
+attrs==23.1.0
+    # via jsonschema
+audioread==3.0.0
+    # via librosa
+backports-zoneinfo==0.2.1
+    # via tzlocal
+blinker==1.6.2
+    # via streamlit
+brotli==1.0.9
+    # via yt-dlp
+cachetools==5.3.1
+    # via streamlit
+certifi==2023.5.7
+    # via
+    #   requests
+    #   yt-dlp
+cffi==1.15.1
+    # via soundfile
+charset-normalizer==3.1.0
+    # via requests
+click==8.1.3
+    # via streamlit
+cloudpickle==2.2.1
+    # via submitit
+contourpy==1.1.0
+    # via matplotlib
+cycler==0.11.0
+    # via matplotlib
+cython==0.29.35
+    # via diffq
+decorator==5.1.1
+    # via
+    #   librosa
+    #   validators
+demucs==4.0.0
+    # via -r requirements.in
+diffq==0.2.4
+    # via demucs
+dora-search==0.1.12
+    # via demucs
+einops==0.6.1
+    # via demucs
+entrypoints==0.4
+    # via altair
+filelock==3.12.2
+    # via torch
+fonttools==4.40.0
+    # via matplotlib
+gitdb==4.0.10
+    # via gitpython
+gitpython==3.1.31
+    # via streamlit
+htbuilder==0.6.1
+    # via -r requirements.in
+idna==3.4
+    # via requests
+importlib-metadata==6.7.0
+    # via
+    #   numba
+    #   streamlit
+importlib-resources==5.12.0
+    # via
+    #   jsonschema
+    #   matplotlib
+jinja2==3.1.2
+    # via
+    #   altair
+    #   pydeck
+    #   torch
+joblib==1.3.1
+    # via
+    #   librosa
+    #   scikit-learn
+jsonschema==4.17.3
+    # via altair
+julius==0.2.7
+    # via demucs
+kaleido==0.2.1
+    # via -r requirements.in
+kiwisolver==1.4.4
+    # via matplotlib
+lameenc==1.5.0
+    # via demucs
+lazy-loader==0.2
+    # via librosa
+librosa==0.10.0.post2
+    # via -r requirements.in
+llvmlite==0.40.1
+    # via numba
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.3
+    # via jinja2
+matplotlib==3.7.1
+    # via -r requirements.in
+mdurl==0.1.2
+    # via markdown-it-py
+more-itertools==9.1.0
+    # via htbuilder
+mpmath==1.3.0
+    # via sympy
+msgpack==1.0.5
+    # via librosa
+mutagen==1.46.0
+    # via yt-dlp
+networkx==3.1
+    # via torch
+numba==0.57.1
+    # via
+    #   librosa
+    #   resampy
+numpy==1.24.4
+    # via
+    #   altair
+    #   contourpy
+    #   diffq
+    #   librosa
+    #   matplotlib
+    #   numba
+    #   openunmix
+    #   pandas
+    #   pyarrow
+    #   pydeck
+    #   resampy
+    #   scikit-learn
+    #   scipy
+    #   soxr
+    #   streamlit
+omegaconf==2.3.0
+    # via dora-search
+openunmix==1.2.1
+    # via demucs
+packaging==23.1
+    # via
+    #   matplotlib
+    #   pooch
+    #   streamlit
+pandas==1.5.3
+    # via
+    #   -r requirements.in
+    #   altair
+    #   streamlit
+pillow==9.5.0
+    # via
+    #   matplotlib
+    #   streamlit
+pkgutil-resolve-name==1.3.10
+    # via jsonschema
+plotly==5.13.0
+    # via -r requirements.in
+pooch==1.6.0
+    # via librosa
+protobuf==3.20.3
+    # via streamlit
+pyarrow==12.0.1
+    # via streamlit
+pycparser==2.21
+    # via cffi
+pycryptodomex==3.18.0
+    # via yt-dlp
+pydeck==0.8.1b0
+    # via streamlit
+pydub==0.25.1
+    # via -r requirements.in
+pygments==2.15.1
+    # via rich
+pympler==1.0.1
+    # via streamlit
+pyparsing==3.1.0
+    # via matplotlib
+pyrsistent==0.19.3
+    # via jsonschema
+python-dateutil==2.8.2
+    # via
+    #   matplotlib
+    #   pandas
+    #   streamlit
+pytube==12.1.3
+    # via -r requirements.in
+pytz==2023.3
+    # via pandas
+pyyaml==6.0
+    # via
+    #   demucs
+    #   omegaconf
+requests==2.31.0
+    # via
+    #   pooch
+    #   streamlit
+resampy==0.4.2
+    # via -r requirements.in
+retrying==1.3.4
+    # via dora-search
+rich==13.4.2
+    # via streamlit
+scikit-learn==1.3.0
+    # via librosa
+scipy==1.10.1
+    # via
+    #   librosa
+    #   scikit-learn
+six==1.16.0
+    # via
+    #   python-dateutil
+    #   retrying
+smmap==5.0.0
+    # via gitdb
+soundfile==0.12.1
+    # via librosa
+soxr==0.3.5
+    # via librosa
+stqdm==0.0.5
+    # via -r requirements.in
+streamlit==1.22.0
+    # via
+    #   -r requirements.in
+    #   stqdm
+    #   streamlit-option-menu
+    #   streamlit-player
+    #   streamlit-searchbox
+streamlit-option-menu==0.3.6
+    # via -r requirements.in
+streamlit-player==0.1.5
+    # via -r requirements.in
+streamlit-searchbox==0.1.2
+    # via -r requirements.in
+submitit==1.4.5
+    # via dora-search
+sympy==1.12
+    # via torch
+tenacity==8.2.2
+    # via
+    #   plotly
+    #   streamlit
+threadpoolctl==3.1.0
+    # via scikit-learn
+toml==0.10.2
+    # via streamlit
+toolz==0.12.0
+    # via altair
+torch==2.0.1
+    # via
+    #   demucs
+    #   diffq
+    #   dora-search
+    #   julius
+    #   openunmix
+    #   torchaudio
+torchaudio==2.0.2
+    # via
+    #   demucs
+    #   openunmix
+tornado==6.3.2
+    # via streamlit
+tqdm==4.65.0
+    # via
+    #   demucs
+    #   openunmix
+    #   stqdm
+treetable==0.2.5
+    # via dora-search
+typing-extensions==4.7.0
+    # via
+    #   librosa
+    #   rich
+    #   streamlit
+    #   submitit
+    #   torch
+tzlocal==5.0.1
+    # via streamlit
+urllib3==2.0.3
+    # via requests
+validators==0.20.0
+    # via streamlit
+websockets==11.0.3
+    # via yt-dlp
+yt-dlp==2023.3.4
+    # via -r requirements.in
+zipp==3.15.0
+    # via
+    #   importlib-metadata
+    #   importlib-resources

scripts/inference.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import argparse
+import warnings
+from app.service.vocal_remover.runner import load_model, separate
+warnings.simplefilter("ignore", UserWarning)
+warnings.simplefilter("ignore", FutureWarning)
+warnings.filterwarnings("ignore", module="streamlit")
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--gpu", "-g", type=int, default=-1)
+    p.add_argument("--pretrained_model", "-P", type=str, default="baseline.pth")
+    p.add_argument("--input", "-i", required=True)
+    p.add_argument("--output_dir", "-o", type=str, default="")
+    args = p.parse_args()
+    model, device = load_model(pretrained_model=args.pretrained_model)
+    separate(
+        input=args.input,
+        model=model,
+        device=device,
+        output_dir=args.output_dir,
+        only_no_vocals=True,
+    )
+if __name__ == "__main__":
+    main()

scripts/prepare_samples.sh ADDED Viewed

	@@ -0,0 +1,18 @@

+#!/bin/bash
+# Read JSON file into a variable
+json=$(cat sample_songs.json)
+# Iterate through keys and values
+for name in $(echo "${json}" | jq -r 'keys[]'); do
+    url=$(echo "${json}" | jq -r --arg name "${name}" '.[$name]')
+    echo "Separating ${name} from ${url}"
+    # Download with pytube
+    yt-dlp ${url} -o "/tmp/${name}" --format "bestaudio/best"
+    mkdir -p "/tmp/vocal_remover"
+    # Run inference
+    python inference.py --input /tmp/${name} --output /tmp
+    echo "Done separating ${name}"
+done

scripts/sample_songs.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "dancing_queen": "https://www.youtube.com/watch?v=3qiMJt-JBb4",
+    "bohemian_rhapsody": "https://www.youtube.com/watch?v=yk3prd8GER4",
+    "i_want_it_that_way": "https://www.youtube.com/watch?v=qjlVAsvQLM8",
+    "let_it_be": "https://www.youtube.com/watch?v=FIV73iG_e5I",
+    "viva_la_vida": "https://www.youtube.com/watch?v=a1EYnngNHIA",
+    "zombie": "https://www.youtube.com/watch?v=8sM-rm4lFZg"
+}