Spaces:
Runtime error
Runtime error
import musdb | |
import os | |
import numpy as np | |
import glob | |
from data.utils import load, write_wav | |
def get_musdbhq(database_path): | |
''' | |
Retrieve audio file paths for MUSDB HQ dataset | |
:param database_path: MUSDB HQ root directory | |
:return: dictionary with train and test keys, each containing list of samples, each sample containing all audio paths | |
''' | |
subsets = list() | |
for subset in ["train", "test"]: | |
print("Loading " + subset + " set...") | |
tracks = glob.glob(os.path.join(database_path, subset, "*")) | |
samples = list() | |
# Go through tracks | |
for track_folder in sorted(tracks): | |
# Skip track if mixture is already written, assuming this track is done already | |
example = dict() | |
for stem in ["mix", "bass", "drums", "other", "vocals"]: | |
filename = stem if stem != "mix" else "mixture" | |
audio_path = os.path.join(track_folder, filename + ".wav") | |
example[stem] = audio_path | |
# Add other instruments to form accompaniment | |
acc_path = os.path.join(track_folder, "accompaniment.wav") | |
if not os.path.exists(acc_path): | |
print("Writing accompaniment to " + track_folder) | |
stem_audio = [] | |
for stem in ["bass", "drums", "other"]: | |
audio, sr = load(example[stem], sr=None, mono=False) | |
stem_audio.append(audio) | |
acc_audio = np.clip(sum(stem_audio), -1.0, 1.0) | |
write_wav(acc_path, acc_audio, sr) | |
example["accompaniment"] = acc_path | |
samples.append(example) | |
subsets.append(samples) | |
return subsets | |
def get_musdb(database_path): | |
''' | |
Retrieve audio file paths for MUSDB dataset | |
:param database_path: MUSDB root directory | |
:return: dictionary with train and test keys, each containing list of samples, each sample containing all audio paths | |
''' | |
mus = musdb.DB(root=database_path, is_wav=False) | |
subsets = list() | |
for subset in ["train", "test"]: | |
tracks = mus.load_mus_tracks(subset) | |
samples = list() | |
# Go through tracks | |
for track in sorted(tracks): | |
# Skip track if mixture is already written, assuming this track is done already | |
track_path = track.path[:-4] | |
mix_path = track_path + "_mix.wav" | |
acc_path = track_path + "_accompaniment.wav" | |
if os.path.exists(mix_path): | |
print("WARNING: Skipping track " + mix_path + " since it exists already") | |
# Add paths and then skip | |
paths = {"mix" : mix_path, "accompaniment" : acc_path} | |
paths.update({key : track_path + "_" + key + ".wav" for key in ["bass", "drums", "other", "vocals"]}) | |
samples.append(paths) | |
continue | |
rate = track.rate | |
# Go through each instrument | |
paths = dict() | |
stem_audio = dict() | |
for stem in ["bass", "drums", "other", "vocals"]: | |
path = track_path + "_" + stem + ".wav" | |
audio = track.targets[stem].audio | |
write_wav(path, audio, rate) | |
stem_audio[stem] = audio | |
paths[stem] = path | |
# Add other instruments to form accompaniment | |
acc_audio = np.clip(sum([stem_audio[key] for key in list(stem_audio.keys()) if key != "vocals"]), -1.0, 1.0) | |
write_wav(acc_path, acc_audio, rate) | |
paths["accompaniment"] = acc_path | |
# Create mixture | |
mix_audio = track.audio | |
write_wav(mix_path, mix_audio, rate) | |
paths["mix"] = mix_path | |
diff_signal = np.abs(mix_audio - acc_audio - stem_audio["vocals"]) | |
print("Maximum absolute deviation from source additivity constraint: " + str(np.max(diff_signal)))# Check if acc+vocals=mix | |
print("Mean absolute deviation from source additivity constraint: " + str(np.mean(diff_signal))) | |
samples.append(paths) | |
subsets.append(samples) | |
print("DONE preparing dataset!") | |
return subsets | |
def get_musdb_folds(root_path, version="HQ"): | |
if version == "HQ": | |
dataset = get_musdbhq(root_path) | |
else: | |
dataset = get_musdb(root_path) | |
train_val_list = dataset[0] | |
test_list = dataset[1] | |
np.random.seed(1337) # Ensure that partitioning is always the same on each run | |
train_list = np.random.choice(train_val_list, 75, replace=False) | |
val_list = [elem for elem in train_val_list if elem not in train_list] | |
# print("First training song: " + str(train_list[0])) # To debug whether partitioning is deterministic | |
return {"train" : train_list, "val" : val_list, "test" : test_list} |