TIMBOVILL commited on
Commit
f5799e7
·
verified ·
1 Parent(s): 4f6ccf0

Upload 5 files

Browse files
src/modules/Audio/denoise.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reduce noise from audio"""
2
+
3
+ import ffmpeg
4
+
5
+ from modules.console_colors import ULTRASINGER_HEAD, blue_highlighted
6
+
7
+
8
+ def ffmpeg_reduce_noise(input_file_path: str, output_file: str) -> None:
9
+ """Reduce noise from vocal audio with ffmpeg."""
10
+
11
+ # Denoise audio samples with FFT.
12
+ # A description of the accepted parameters follows.
13
+
14
+ # noise_reduction, nr
15
+ # Set the noise reduction in dB, allowed range is 0.01 to 97. Default value is 12 dB.
16
+ # noise_floor, nf
17
+ # Set the noise floor in dB, allowed range is -80 to -20. Default value is -50 dB.
18
+ # track_noise, tn
19
+ # Enable noise floor tracking. By default is disabled.
20
+ # With this enabled, noise floor is automatically adjusted.
21
+
22
+ print(
23
+ f"{ULTRASINGER_HEAD} Reduce noise from vocal audio with {blue_highlighted('ffmpeg')}."
24
+ )
25
+ try:
26
+ (
27
+ ffmpeg.input(input_file_path)
28
+ .output(output_file, af="afftdn=nr=70:nf=-80:tn=1")
29
+ .overwrite_output()
30
+ .run(capture_stdout=True, capture_stderr=True)
31
+ )
32
+ except ffmpeg.Error as ffmpeg_exception:
33
+ print("ffmpeg stdout:", ffmpeg_exception.stdout.decode("utf8"))
34
+ print("ffmpeg stderr:", ffmpeg_exception.stderr.decode("utf8"))
35
+ raise ffmpeg_exception
src/modules/Audio/separation.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Separate vocals from audio"""
2
+
3
+ import subprocess
4
+
5
+ from modules.console_colors import (
6
+ ULTRASINGER_HEAD,
7
+ blue_highlighted,
8
+ red_highlighted,
9
+ )
10
+ from modules.os_helper import current_executor_path, move, path_join
11
+
12
+
13
+ def separate_audio(input_file_path: str, output_file: str, device="cpu") -> None:
14
+ """Separate vocals from audio with demucs."""
15
+
16
+ print(
17
+ f"{ULTRASINGER_HEAD} Separating vocals from audio with {blue_highlighted('demucs')} and {red_highlighted(device)} as worker."
18
+ )
19
+ # Model selection?
20
+ # -n htdemucs_ft
21
+ subprocess.run(
22
+ ["demucs", "-d", device, "--two-stems=vocals", "--float32", input_file_path]
23
+ )
24
+ separated_folder = path_join(current_executor_path(), "separated")
25
+ move(separated_folder, output_file)
src/modules/Audio/silence_processing.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Silence processing module"""
2
+
3
+ from pydub import AudioSegment, silence
4
+
5
+ from modules.console_colors import ULTRASINGER_HEAD
6
+ from modules.Speech_Recognition.TranscribedData import TranscribedData
7
+
8
+ def remove_silence_from_transcription_data(audio_path: str, transcribed_data: list[TranscribedData]) -> list[
9
+ TranscribedData]:
10
+ """Remove silence from given transcription data"""
11
+
12
+ print(
13
+ f"{ULTRASINGER_HEAD} Removing silent parts from transcription data"
14
+ )
15
+
16
+ silence_timestamps = get_silence_sections(audio_path)
17
+ data = remove_silence(silence_timestamps, transcribed_data)
18
+ return data
19
+
20
+
21
+ def get_silence_sections(audio_path: str,
22
+ min_silence_len=50,
23
+ silence_thresh=-50) -> list[tuple[float, float]]:
24
+ y = AudioSegment.from_wav(audio_path)
25
+ s = silence.detect_silence(y, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
26
+ s = [((start / 1000), (stop / 1000)) for start, stop in s] # convert to sec
27
+ return s
28
+
29
+
30
+ def remove_silence(silence_parts_list: list[tuple[float, float]], transcribed_data: list[TranscribedData]):
31
+ new_transcribed_data = []
32
+
33
+ for data in transcribed_data:
34
+ new_transcribed_data.append(data)
35
+
36
+ origin_end = data.end
37
+ was_split = False
38
+
39
+ for silence_start, silence_end in silence_parts_list:
40
+
41
+ # | **** | silence
42
+ # | ** ** | data
43
+ # |0 1 2 3 4 5 | time
44
+ if silence_start > origin_end or silence_end < data.start:
45
+ continue
46
+
47
+ # | ** ** | silence
48
+ # | ********** | data
49
+ # |0 1 2 3 4 5 6 | time
50
+ if silence_start >= data.start and silence_end <= origin_end:
51
+ next_index = silence_parts_list.index((silence_start, silence_end)) + 1
52
+ if next_index < len(silence_parts_list) and silence_parts_list[next_index][0] < origin_end:
53
+ split_end = silence_parts_list[next_index][0]
54
+
55
+ if silence_parts_list[next_index][1] >= origin_end:
56
+ split_word = "~ "
57
+ is_word_end = True
58
+ else:
59
+ split_word = "~"
60
+ is_word_end = False
61
+ else:
62
+ split_end = origin_end
63
+ split_word = "~ "
64
+ is_word_end = True
65
+
66
+ split_data = TranscribedData({"conf": data.conf, "word": split_word, "end": split_end, "start": silence_end, "is_word_end": is_word_end})
67
+
68
+ if not was_split:
69
+ data.end = silence_start
70
+
71
+ if data.end - data.start < 0.1:
72
+ data.start = silence_end
73
+ data.end = split_end
74
+ continue
75
+
76
+ if split_data.end - split_data.start <= 0.1:
77
+ continue
78
+
79
+ data.is_word_end = False
80
+
81
+ # Remove last whitespace from the data.word
82
+ if data.word[-1] == " ":
83
+ data.word = data.word[:-1]
84
+
85
+ if split_data.end - split_data.start > 0.1:
86
+ was_split = True
87
+ new_transcribed_data.append(split_data)
88
+ elif split_word == "~ " and not data.is_word_end:
89
+ if new_transcribed_data[-1].word[-1] != " ":
90
+ new_transcribed_data[-1].word += " "
91
+ new_transcribed_data[-1].is_word_end = True
92
+
93
+ continue
94
+
95
+ # | **** | silence
96
+ # | ** | data
97
+ # |0 1 2 3 4 | time
98
+ if silence_start < data.start and silence_end > origin_end:
99
+ new_transcribed_data.remove(data)
100
+ break
101
+
102
+ # | **** | silence
103
+ # | **** | data
104
+ # |0 1 2 3 4 5 | time
105
+ if silence_start < data.start:
106
+ data.start = silence_end
107
+
108
+ # | **** | silence
109
+ # | **** | data
110
+ # |0 1 2 3 4 | time
111
+ if silence_end > origin_end:
112
+ data.end = silence_start
113
+
114
+ # | **** | silence
115
+ # | ** | data
116
+ # |0 1 2 3 4 | time
117
+ if silence_start > origin_end:
118
+ # Nothing to do with this word anymore, go to next word
119
+ break
120
+ return new_transcribed_data
src/modules/Audio/vocal_chunks.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Vocal chunks module."""
2
+
3
+ import os
4
+ import re
5
+ import wave
6
+
7
+ from modules.console_colors import ULTRASINGER_HEAD
8
+ from modules.os_helper import create_folder
9
+ from modules.Ultrastar.ultrastar_converter import (
10
+ get_end_time_from_ultrastar,
11
+ get_start_time_from_ultrastar,
12
+ )
13
+ from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue
14
+
15
+
16
+ class AudioManipulation:
17
+ """Docstring"""
18
+
19
+
20
+ def export_chunks_from_transcribed_data(
21
+ audio_filename: str, transcribed_data: [], output_folder_name: str
22
+ ) -> None:
23
+ """Export transcribed_data as vocal chunks wav files"""
24
+ print(
25
+ f"{ULTRASINGER_HEAD} Export transcribed data as vocal chunks wav files"
26
+ )
27
+
28
+ wave_file = wave.open(audio_filename, "rb")
29
+ sample_rate, n_channels = wave_file.getparams()[2], wave_file.getparams()[0]
30
+
31
+ for i, data in enumerate(transcribed_data):
32
+ start_byte = int(data.start * sample_rate * n_channels)
33
+ end_byte = int(data.end * sample_rate * n_channels)
34
+
35
+ chunk = get_chunk(end_byte, start_byte, wave_file)
36
+ export_chunk_to_wav_file(
37
+ chunk, output_folder_name, i, data.word, wave_file
38
+ )
39
+
40
+ wave_file.close()
41
+
42
+
43
+ def export_chunks_from_ultrastar_data(
44
+ audio_filename: str, ultrastar_data: UltrastarTxtValue, folder_name: str
45
+ ) -> None:
46
+ """Export ultrastar data as vocal chunks wav files"""
47
+ print(f"{ULTRASINGER_HEAD} Export Ultrastar data as vocal chunks wav files")
48
+
49
+ create_folder(folder_name)
50
+
51
+ wave_file = wave.open(audio_filename, "rb")
52
+ sample_rate, n_channels = wave_file.getparams()[2], wave_file.getparams()[0]
53
+
54
+ for i, word in enumerate(ultrastar_data.words):
55
+ start_time = get_start_time_from_ultrastar(ultrastar_data, i)
56
+ end_time = get_end_time_from_ultrastar(ultrastar_data, i)
57
+
58
+ start_byte = int(start_time * sample_rate * n_channels)
59
+ end_byte = int(end_time * sample_rate * n_channels)
60
+
61
+ chunk = get_chunk(end_byte, start_byte, wave_file)
62
+ export_chunk_to_wav_file(
63
+ chunk, folder_name, i, word, wave_file
64
+ )
65
+
66
+
67
+ def export_chunk_to_wav_file(chunk, folder_name: str, i: int, word: str, wave_file) -> None:
68
+ """Export vocal chunks to wav file"""
69
+
70
+ clean_word = re.sub("[^A-Za-z0-9]+", "", word)
71
+ # todo: Progress?
72
+ # print(f"{str(i)} {clean_word}")
73
+ with wave.open(
74
+ os.path.join(folder_name, f"chunk_{i}_{clean_word}.wav"), "wb"
75
+ ) as chunk_file:
76
+ chunk_file.setparams(wave_file.getparams())
77
+ chunk_file.writeframes(chunk)
78
+
79
+
80
+ def get_chunk(end_byte: int, start_byte: int, wave_file):
81
+ """
82
+ Gets the chunk from wave file.
83
+ Returns chunk as n frames of audio, as a bytes object.
84
+ """
85
+
86
+ # todo: get out of position error message
87
+ wave_file.setpos(start_byte) # ({:.2f})
88
+ chunk = wave_file.readframes(end_byte - start_byte)
89
+ return chunk
src/modules/Audio/youtube.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """YouTube Downloader"""
2
+
3
+ import io
4
+ import os
5
+
6
+ import yt_dlp
7
+ from PIL import Image
8
+
9
+ from modules.console_colors import ULTRASINGER_HEAD
10
+ from modules.Image.image_helper import crop_image_to_square
11
+
12
+
13
+ def get_youtube_title(url: str) -> tuple[str, str]:
14
+ """Get the title of the YouTube video"""
15
+
16
+ ydl_opts = {}
17
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
18
+ result = ydl.extract_info(
19
+ url, download=False # We just want to extract the info
20
+ )
21
+
22
+ if "artist" in result:
23
+ return result["artist"].strip(), result["track"].strip()
24
+ if "-" in result["title"]:
25
+ return result["title"].split("-")[0].strip(), result["title"].split("-")[1].strip()
26
+ return result["channel"].strip(), result["title"].strip()
27
+
28
+
29
+ def download_youtube_audio(url: str, clear_filename: str, output_path: str):
30
+ """Download audio from YouTube"""
31
+
32
+ print(f"{ULTRASINGER_HEAD} Downloading Audio")
33
+ ydl_opts = {
34
+ "format": "bestaudio/best",
35
+ "outtmpl": output_path + "/" + clear_filename,
36
+ "postprocessors": [
37
+ {"key": "FFmpegExtractAudio", "preferredcodec": "mp3"}
38
+ ],
39
+ }
40
+
41
+ start_download(ydl_opts, url)
42
+
43
+
44
+ def download_youtube_thumbnail(url: str, clear_filename: str, output_path: str):
45
+ """Download thumbnail from YouTube"""
46
+
47
+ print(f"{ULTRASINGER_HEAD} Downloading thumbnail")
48
+ ydl_opts = {
49
+ "skip_download": True,
50
+ "writethumbnail": True,
51
+ }
52
+
53
+ download_and_convert_thumbnail(ydl_opts, url, clear_filename, output_path)
54
+
55
+
56
+ def download_and_convert_thumbnail(ydl_opts, url: str, clear_filename: str, output_path: str) -> None:
57
+ """Download and convert thumbnail from YouTube"""
58
+
59
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
60
+ info_dict = ydl.extract_info(url, download=False)
61
+ thumbnail_url = info_dict.get("thumbnail")
62
+ if thumbnail_url:
63
+ response = ydl.urlopen(thumbnail_url)
64
+ image_data = response.read()
65
+ image = Image.open(io.BytesIO(image_data))
66
+ image = image.convert('RGB') # Convert to RGB to avoid transparency or RGBA issues
67
+ image_path = os.path.join(output_path, clear_filename + " [CO].jpg")
68
+ image.save(image_path, "JPEG")
69
+ crop_image_to_square(image_path)
70
+
71
+
72
+ def download_youtube_video(url: str, clear_filename: str, output_path: str) -> None:
73
+ """Download video from YouTube"""
74
+
75
+ print(f"{ULTRASINGER_HEAD} Downloading Video")
76
+ ydl_opts = {
77
+ "format": "bestvideo[ext=mp4]/mp4",
78
+ "outtmpl": output_path + "/" + clear_filename + ".mp4",
79
+ }
80
+ start_download(ydl_opts, url)
81
+
82
+
83
+ def start_download(ydl_opts, url: str) -> None:
84
+ """Start the download the ydl_opts"""
85
+
86
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
87
+ errors = ydl.download(url)
88
+ if errors:
89
+ raise Exception("Download failed with error: " + str(errors))