Spaces:

TIMBOVILL
/

UltraSingerUI

Sleeping

App Files Files Community

TIMBOVILL commited on Jun 16, 2024

Commit

f5799e7

verified ·

1 Parent(s): 4f6ccf0

Upload 5 files

Browse files

Files changed (5) hide show

src/modules/Audio/denoise.py +35 -0
src/modules/Audio/separation.py +25 -0
src/modules/Audio/silence_processing.py +120 -0
src/modules/Audio/vocal_chunks.py +89 -0
src/modules/Audio/youtube.py +89 -0

src/modules/Audio/denoise.py ADDED Viewed

	@@ -0,0 +1,35 @@

+"""Reduce noise from audio"""
+import ffmpeg
+from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted
+def ffmpeg_reduce_noise(input_file_path: str, output_file: str) -> None:
+    """Reduce noise from vocal audio with ffmpeg."""
+    # Denoise audio samples with FFT.
+    # A description of the accepted parameters follows.
+    # noise_reduction, nr
+    #    Set the noise reduction in dB, allowed range is 0.01 to 97. Default value is 12 dB.
+    # noise_floor, nf
+    #    Set the noise floor in dB, allowed range is -80 to -20. Default value is -50 dB.
+    # track_noise, tn
+    #    Enable noise floor tracking. By default is disabled.
+    #    With this enabled, noise floor is automatically adjusted.
+    print(
+        f"{ULTRASINGER_HEAD} Reduce noise from vocal audio with {blue_highlighted('ffmpeg')}."
+    )
+    try:
+        (
+            ffmpeg.input(input_file_path)
+            .output(output_file, af="afftdn=nr=70:nf=-80:tn=1")
+            .overwrite_output()
+            .run(capture_stdout=True, capture_stderr=True)
+        )
+    except ffmpeg.Error as ffmpeg_exception:
+        print("ffmpeg stdout:", ffmpeg_exception.stdout.decode("utf8"))
+        print("ffmpeg stderr:", ffmpeg_exception.stderr.decode("utf8"))
+        raise ffmpeg_exception

src/modules/Audio/separation.py ADDED Viewed

	@@ -0,0 +1,25 @@

+"""Separate vocals from audio"""
+import subprocess
+from modules.console_colors import (
+    ULTRASINGER_HEAD,
+    blue_highlighted,
+    red_highlighted,
+)
+from modules.os_helper import current_executor_path, move, path_join
+def separate_audio(input_file_path: str, output_file: str, device="cpu") -> None:
+    """Separate vocals from audio with demucs."""
+    print(
+        f"{ULTRASINGER_HEAD} Separating vocals from audio with {blue_highlighted('demucs')} and {red_highlighted(device)} as worker."
+    )
+    # Model selection?
+    # -n htdemucs_ft
+    subprocess.run(
+        ["demucs", "-d", device, "--two-stems=vocals", "--float32", input_file_path]
+    )
+    separated_folder = path_join(current_executor_path(), "separated")
+    move(separated_folder, output_file)

src/modules/Audio/silence_processing.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""Silence processing module"""
+from pydub import AudioSegment, silence
+from modules.console_colors import ULTRASINGER_HEAD
+from modules.Speech_Recognition.TranscribedData import TranscribedData
+def remove_silence_from_transcription_data(audio_path: str, transcribed_data: list[TranscribedData]) -> list[
+    TranscribedData]:
+    """Remove silence from given transcription data"""
+    print(
+        f"{ULTRASINGER_HEAD} Removing silent parts from transcription data"
+    )
+    silence_timestamps = get_silence_sections(audio_path)
+    data = remove_silence(silence_timestamps, transcribed_data)
+    return data
+def get_silence_sections(audio_path: str,
+                         min_silence_len=50,
+                         silence_thresh=-50) -> list[tuple[float, float]]:
+    y = AudioSegment.from_wav(audio_path)
+    s = silence.detect_silence(y, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
+    s = [((start / 1000), (stop / 1000)) for start, stop in s]  # convert to sec
+    return s
+def remove_silence(silence_parts_list: list[tuple[float, float]], transcribed_data: list[TranscribedData]):
+    new_transcribed_data = []
+    for data in transcribed_data:
+        new_transcribed_data.append(data)
+        origin_end = data.end
+        was_split = False
+        for silence_start, silence_end in silence_parts_list:
+            # |    ****    | silence
+            # |  **    **  | data
+            # |0 1 2 3 4 5 | time
+            if silence_start > origin_end or silence_end < data.start:
+                continue
+            # |    **  **    | silence
+            # |  **********  | data
+            # |0 1 2 3 4 5 6 | time
+            if silence_start >= data.start and silence_end <= origin_end:
+                next_index = silence_parts_list.index((silence_start, silence_end)) + 1
+                if next_index < len(silence_parts_list) and silence_parts_list[next_index][0] < origin_end:
+                    split_end = silence_parts_list[next_index][0]
+                    if silence_parts_list[next_index][1] >= origin_end:
+                        split_word = "~ "
+                        is_word_end = True
+                    else:
+                        split_word = "~"
+                        is_word_end = False
+                else:
+                    split_end = origin_end
+                    split_word = "~ "
+                    is_word_end = True
+                split_data = TranscribedData({"conf": data.conf, "word": split_word, "end": split_end, "start": silence_end, "is_word_end": is_word_end})
+                if not was_split:
+                    data.end = silence_start
+                    if data.end - data.start < 0.1:
+                        data.start = silence_end
+                        data.end = split_end
+                        continue
+                    if split_data.end - split_data.start <= 0.1:
+                        continue
+                    data.is_word_end = False
+                    # Remove last whitespace from the data.word
+                    if data.word[-1] == " ":
+                        data.word = data.word[:-1]
+                if split_data.end - split_data.start > 0.1:
+                    was_split = True
+                    new_transcribed_data.append(split_data)
+                elif split_word == "~ " and not data.is_word_end:
+                    if new_transcribed_data[-1].word[-1] != " ":
+                        new_transcribed_data[-1].word += " "
+                    new_transcribed_data[-1].is_word_end = True
+                continue
+            # |    ****  | silence
+            # |     **   | data
+            # |0 1 2 3 4 | time
+            if silence_start < data.start and silence_end > origin_end:
+                new_transcribed_data.remove(data)
+                break
+            # |    ****    | silence
+            # |      ****  | data
+            # |0 1 2 3 4 5 | time
+            if silence_start < data.start:
+                data.start = silence_end
+            # |    ****  | silence
+            # |  ****    | data
+            # |0 1 2 3 4 | time
+            if silence_end > origin_end:
+                data.end = silence_start
+            # |    ****  | silence
+            # |  **      | data
+            # |0 1 2 3 4 | time
+            if silence_start > origin_end:
+                # Nothing to do with this word anymore, go to next word
+                break
+    return new_transcribed_data

src/modules/Audio/vocal_chunks.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""Vocal chunks module."""
+import os
+import re
+import wave
+from modules.console_colors import ULTRASINGER_HEAD
+from modules.os_helper import create_folder
+from modules.Ultrastar.ultrastar_converter import (
+    get_end_time_from_ultrastar,
+    get_start_time_from_ultrastar,
+)
+from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
+class AudioManipulation:
+    """Docstring"""
+def export_chunks_from_transcribed_data(
+    audio_filename: str, transcribed_data: [], output_folder_name: str
+) -> None:
+    """Export transcribed_data as vocal chunks wav files"""
+    print(
+        f"{ULTRASINGER_HEAD} Export transcribed data as vocal chunks wav files"
+    )
+    wave_file = wave.open(audio_filename, "rb")
+    sample_rate, n_channels = wave_file.getparams()[2], wave_file.getparams()[0]
+    for i, data in enumerate(transcribed_data):
+        start_byte = int(data.start * sample_rate * n_channels)
+        end_byte = int(data.end * sample_rate * n_channels)
+        chunk = get_chunk(end_byte, start_byte, wave_file)
+        export_chunk_to_wav_file(
+            chunk, output_folder_name, i, data.word, wave_file
+        )
+    wave_file.close()
+def export_chunks_from_ultrastar_data(
+    audio_filename: str, ultrastar_data: UltrastarTxtValue, folder_name: str
+) -> None:
+    """Export ultrastar data as vocal chunks wav files"""
+    print(f"{ULTRASINGER_HEAD} Export Ultrastar data as vocal chunks wav files")
+    create_folder(folder_name)
+    wave_file = wave.open(audio_filename, "rb")
+    sample_rate, n_channels = wave_file.getparams()[2], wave_file.getparams()[0]
+    for i, word in enumerate(ultrastar_data.words):
+        start_time = get_start_time_from_ultrastar(ultrastar_data, i)
+        end_time = get_end_time_from_ultrastar(ultrastar_data, i)
+        start_byte = int(start_time * sample_rate * n_channels)
+        end_byte = int(end_time * sample_rate * n_channels)
+        chunk = get_chunk(end_byte, start_byte, wave_file)
+        export_chunk_to_wav_file(
+            chunk, folder_name, i, word, wave_file
+        )
+def export_chunk_to_wav_file(chunk, folder_name: str, i: int, word: str, wave_file) -> None:
+    """Export vocal chunks to wav file"""
+    clean_word = re.sub("[^A-Za-z0-9]+", "", word)
+    # todo: Progress?
+    # print(f"{str(i)} {clean_word}")
+    with wave.open(
+        os.path.join(folder_name, f"chunk_{i}_{clean_word}.wav"), "wb"
+    ) as chunk_file:
+        chunk_file.setparams(wave_file.getparams())
+        chunk_file.writeframes(chunk)
+def get_chunk(end_byte: int, start_byte: int, wave_file):
+    """
+    Gets the chunk from wave file.
+    Returns chunk as n frames of audio, as a bytes object.
+    """
+    # todo: get out of position error message
+    wave_file.setpos(start_byte)  # ({:.2f})
+    chunk = wave_file.readframes(end_byte - start_byte)
+    return chunk

src/modules/Audio/youtube.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""YouTube Downloader"""
+import io
+import os
+import yt_dlp
+from PIL import Image
+from modules.console_colors import ULTRASINGER_HEAD
+from modules.Image.image_helper import crop_image_to_square
+def get_youtube_title(url: str) -> tuple[str, str]:
+    """Get the title of the YouTube video"""
+    ydl_opts = {}
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        result = ydl.extract_info(
+            url, download=False  # We just want to extract the info
+        )
+    if "artist" in result:
+        return result["artist"].strip(), result["track"].strip()
+    if "-" in result["title"]:
+        return result["title"].split("-")[0].strip(), result["title"].split("-")[1].strip()
+    return result["channel"].strip(), result["title"].strip()
+def download_youtube_audio(url: str, clear_filename: str, output_path: str):
+    """Download audio from YouTube"""
+    print(f"{ULTRASINGER_HEAD} Downloading Audio")
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": output_path + "/" + clear_filename,
+        "postprocessors": [
+            {"key": "FFmpegExtractAudio", "preferredcodec": "mp3"}
+        ],
+    }
+    start_download(ydl_opts, url)
+def download_youtube_thumbnail(url: str, clear_filename: str, output_path: str):
+    """Download thumbnail from YouTube"""
+    print(f"{ULTRASINGER_HEAD} Downloading thumbnail")
+    ydl_opts = {
+        "skip_download": True,
+        "writethumbnail": True,
+    }
+    download_and_convert_thumbnail(ydl_opts, url, clear_filename, output_path)
+def download_and_convert_thumbnail(ydl_opts, url: str, clear_filename: str, output_path: str) -> None:
+    """Download and convert thumbnail from YouTube"""
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(url, download=False)
+        thumbnail_url = info_dict.get("thumbnail")
+        if thumbnail_url:
+            response = ydl.urlopen(thumbnail_url)
+            image_data = response.read()
+            image = Image.open(io.BytesIO(image_data))
+            image = image.convert('RGB') # Convert to RGB to avoid transparency or RGBA issues
+            image_path = os.path.join(output_path, clear_filename + " [CO].jpg")
+            image.save(image_path, "JPEG")
+            crop_image_to_square(image_path)
+def download_youtube_video(url: str, clear_filename: str, output_path: str) -> None:
+    """Download video from YouTube"""
+    print(f"{ULTRASINGER_HEAD} Downloading Video")
+    ydl_opts = {
+        "format": "bestvideo[ext=mp4]/mp4",
+        "outtmpl": output_path + "/" + clear_filename + ".mp4",
+    }
+    start_download(ydl_opts, url)
+def start_download(ydl_opts, url: str) -> None:
+    """Start the download the ydl_opts"""
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        errors = ydl.download(url)
+        if errors:
+            raise Exception("Download failed with error: " + str(errors))