import os import librosa from utils.chords import Chords import re from enum import Enum import pyrubberband as pyrb import torch import math class FeatureTypes(Enum): cqt = 'cqt' class Preprocess(): def __init__(self, config, feature_to_use, dataset_names, root_dir): self.config = config self.dataset_names = dataset_names self.root_path = root_dir + '/' self.time_interval = config.feature["hop_length"]/config.mp3["song_hz"] self.no_of_chord_datapoints_per_sequence = math.ceil(config.mp3['inst_len'] / self.time_interval) self.Chord_class = Chords() # isophonic self.isophonic_directory = self.root_path + 'isophonic/' # uspop self.uspop_directory = self.root_path + 'uspop/' self.uspop_audio_path = 'audio/' self.uspop_lab_path = 'annotations/uspopLabels/' self.uspop_index_path = 'annotations/uspopLabels.txt' # robbie williams self.robbie_williams_directory = self.root_path + 'robbiewilliams/' self.robbie_williams_audio_path = 'audio/' self.robbie_williams_lab_path = 'chords/' self.feature_name = feature_to_use self.is_cut_last_chord = False def find_mp3_path(self, dirpath, word): for filename in os.listdir(dirpath): last_dir = dirpath.split("/")[-2] if ".mp3" in filename: tmp = filename.replace(".mp3", "") tmp = tmp.replace(last_dir, "") filename_lower = tmp.lower() filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower)) if word.lower().replace(" ", "") in filename_lower.replace(" ", ""): return filename def find_mp3_path_robbiewilliams(self, dirpath, word): for filename in os.listdir(dirpath): if ".mp3" in filename: tmp = filename.replace(".mp3", "") filename_lower = tmp.lower() filename_lower = filename_lower.replace("robbie williams", "") filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower)) filename_lower = self.song_pre(filename_lower) if self.song_pre(word.lower()).replace(" ", "") in filename_lower.replace(" ", ""): return filename def get_all_files(self): res_list = [] # isophonic if "isophonic" in self.dataset_names: for dirpath, dirnames, filenames in os.walk(self.isophonic_directory): if not dirnames: for filename in filenames: if ".lab" in filename: tmp = filename.replace(".lab", "") song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("CD", "") mp3_path = self.find_mp3_path(dirpath, song_name) res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(dirpath, mp3_path), os.path.join(self.root_path, "result", "isophonic")]) # uspop if "uspop" in self.dataset_names: with open(os.path.join(self.uspop_directory, self.uspop_index_path)) as f: uspop_lab_list = f.readlines() uspop_lab_list = [x.strip() for x in uspop_lab_list] for lab_path in uspop_lab_list: spl = lab_path.split('/') lab_artist = self.uspop_pre(spl[2]) lab_title = self.uspop_pre(spl[4][3:-4]) lab_path = lab_path.replace('./uspopLabels/', '') lab_path = os.path.join(self.uspop_directory, self.uspop_lab_path, lab_path) for filename in os.listdir(os.path.join(self.uspop_directory, self.uspop_audio_path)): if not '.csv' in filename: spl = filename.split('-') mp3_artist = self.uspop_pre(spl[0]) mp3_title = self.uspop_pre(spl[1][:-4]) if lab_artist == mp3_artist and lab_title == mp3_title: res_list.append([mp3_artist + mp3_title, lab_path, os.path.join(self.uspop_directory, self.uspop_audio_path, filename), os.path.join(self.root_path, "result", "uspop")]) break # robbie williams if "robbiewilliams" in self.dataset_names: for dirpath, dirnames, filenames in os.walk(self.robbie_williams_directory): if not dirnames: for filename in filenames: if ".txt" in filename and (not 'README' in filename): tmp = filename.replace(".txt", "") song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("GTChords", "") mp3_dir = dirpath.replace("chords", "audio") mp3_path = self.find_mp3_path_robbiewilliams(mp3_dir, song_name) res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(mp3_dir, mp3_path), os.path.join(self.root_path, "result", "robbiewilliams")]) return res_list def uspop_pre(self, text): text = text.lower() text = text.replace('_', '') text = text.replace(' ', '') text = " ".join(re.findall("[a-zA-Z]+", text)) return text def song_pre(self, text): to_remove = ["'", '`', '(', ')', ' ', '&', 'and', 'And'] for remove in to_remove: text = text.replace(remove, '') return text def config_to_folder(self): mp3_config = self.config.mp3 feature_config = self.config.feature mp3_string = "%d_%.1f_%.1f" % \ (mp3_config['song_hz'], mp3_config['inst_len'], mp3_config['skip_interval']) feature_string = "%s_%d_%d_%d" % \ (self.feature_name.value, feature_config['n_bins'], feature_config['bins_per_octave'], feature_config['hop_length']) return mp3_config, feature_config, mp3_string, feature_string def generate_labels_features_new(self, all_list): pid = os.getpid() mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder() i = 0 # number of songs j = 0 # number of impossible songs k = 0 # number of tried songs total = 0 # number of generated instances stretch_factors = [1.0] shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6] loop_broken = False for song_name, lab_path, mp3_path, save_path in all_list: # different song initialization if loop_broken: loop_broken = False i += 1 print(pid, "generating features from ...", os.path.join(mp3_path)) if i % 10 == 0: print(i, ' th song') original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz']) # make result path if not exists # save_path, mp3_string, feature_string, song_name, aug.pt result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip()) if not os.path.exists(result_path): os.makedirs(result_path) # calculate result for stretch_factor in stretch_factors: if loop_broken: loop_broken = False break for shift_factor in shift_factors: # for filename idx = 0 chord_info = self.Chord_class.get_converted_chord(os.path.join(lab_path)) k += 1 # stretch original sound and chord info x = pyrb.time_stretch(original_wav, sr, stretch_factor) x = pyrb.pitch_shift(x, sr, shift_factor) audio_length = x.shape[0] chord_info['start'] = chord_info['start'] * 1/stretch_factor chord_info['end'] = chord_info['end'] * 1/stretch_factor last_sec = chord_info.iloc[-1]['end'] last_sec_hz = int(last_sec * mp3_config['song_hz']) if audio_length + mp3_config['skip_interval'] < last_sec_hz: print('loaded song is too short :', song_name) loop_broken = True j += 1 break elif audio_length > last_sec_hz: x = x[:last_sec_hz] origin_length = last_sec_hz origin_length_in_sec = origin_length / mp3_config['song_hz'] current_start_second = 0 # get chord list between current_start_second and current+song_length while current_start_second + mp3_config['inst_len'] < origin_length_in_sec: inst_start_sec = current_start_second curSec = current_start_second chord_list = [] # extract chord per 1/self.time_interval while curSec < inst_start_sec + mp3_config['inst_len']: try: available_chords = chord_info.loc[(chord_info['start'] <= curSec) & ( chord_info['end'] > curSec + self.time_interval)].copy() if len(available_chords) == 0: available_chords = chord_info.loc[((chord_info['start'] >= curSec) & ( chord_info['start'] <= curSec + self.time_interval)) | ( (chord_info['end'] >= curSec) & ( chord_info['end'] <= curSec + self.time_interval))].copy() if len(available_chords) == 1: chord = available_chords['chord_id'].iloc[0] elif len(available_chords) > 1: max_starts = available_chords.apply(lambda row: max(row['start'], curSec), axis=1) available_chords['max_start'] = max_starts min_ends = available_chords.apply( lambda row: min(row.end, curSec + self.time_interval), axis=1) available_chords['min_end'] = min_ends chords_lengths = available_chords['min_end'] - available_chords['max_start'] available_chords['chord_length'] = chords_lengths chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id'] else: chord = 24 except Exception as e: chord = 24 print(e) print(pid, "no chord") raise RuntimeError() finally: # convert chord by shift factor if chord != 24: chord += shift_factor * 2 chord = chord % 24 chord_list.append(chord) curSec += self.time_interval if len(chord_list) == self.no_of_chord_datapoints_per_sequence: try: sequence_start_time = current_start_second sequence_end_time = current_start_second + mp3_config['inst_len'] start_index = int(sequence_start_time * mp3_config['song_hz']) end_index = int(sequence_end_time * mp3_config['song_hz']) song_seq = x[start_index:end_index] etc = '%.1f_%.1f' % ( current_start_second, current_start_second + mp3_config['inst_len']) aug = '%.2f_%i' % (stretch_factor, shift_factor) if self.feature_name == FeatureTypes.cqt: # print(pid, "make feature") feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'], bins_per_octave=feature_config['bins_per_octave'], hop_length=feature_config['hop_length']) else: raise NotImplementedError if feature.shape[1] > self.no_of_chord_datapoints_per_sequence: feature = feature[:, :self.no_of_chord_datapoints_per_sequence] if feature.shape[1] != self.no_of_chord_datapoints_per_sequence: print('loaded features length is too short :', song_name) loop_broken = True j += 1 break result = { 'feature': feature, 'chord': chord_list, 'etc': etc } # save_path, mp3_string, feature_string, song_name, aug.pt filename = aug + "_" + str(idx) + ".pt" torch.save(result, os.path.join(result_path, filename)) idx += 1 total += 1 except Exception as e: print(e) print(pid, "feature error") raise RuntimeError() else: print("invalid number of chord datapoints in sequence :", len(chord_list)) current_start_second += mp3_config['skip_interval'] print(pid, "total instances: %d" % total) def generate_labels_features_voca(self, all_list): pid = os.getpid() mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder() i = 0 # number of songs j = 0 # number of impossible songs k = 0 # number of tried songs total = 0 # number of generated instances stretch_factors = [1.0] shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6] loop_broken = False for song_name, lab_path, mp3_path, save_path in all_list: save_path = save_path + '_voca' # different song initialization if loop_broken: loop_broken = False i += 1 print(pid, "generating features from ...", os.path.join(mp3_path)) if i % 10 == 0: print(i, ' th song') original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz']) # save_path, mp3_string, feature_string, song_name, aug.pt result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip()) if not os.path.exists(result_path): os.makedirs(result_path) # calculate result for stretch_factor in stretch_factors: if loop_broken: loop_broken = False break for shift_factor in shift_factors: # for filename idx = 0 try: chord_info = self.Chord_class.get_converted_chord_voca(os.path.join(lab_path)) except Exception as e: print(e) print(pid, " chord lab file error : %s" % song_name) loop_broken = True j += 1 break k += 1 # stretch original sound and chord info x = pyrb.time_stretch(original_wav, sr, stretch_factor) x = pyrb.pitch_shift(x, sr, shift_factor) audio_length = x.shape[0] chord_info['start'] = chord_info['start'] * 1/stretch_factor chord_info['end'] = chord_info['end'] * 1/stretch_factor last_sec = chord_info.iloc[-1]['end'] last_sec_hz = int(last_sec * mp3_config['song_hz']) if audio_length + mp3_config['skip_interval'] < last_sec_hz: print('loaded song is too short :', song_name) loop_broken = True j += 1 break elif audio_length > last_sec_hz: x = x[:last_sec_hz] origin_length = last_sec_hz origin_length_in_sec = origin_length / mp3_config['song_hz'] current_start_second = 0 # get chord list between current_start_second and current+song_length while current_start_second + mp3_config['inst_len'] < origin_length_in_sec: inst_start_sec = current_start_second curSec = current_start_second chord_list = [] # extract chord per 1/self.time_interval while curSec < inst_start_sec + mp3_config['inst_len']: try: available_chords = chord_info.loc[(chord_info['start'] <= curSec) & (chord_info['end'] > curSec + self.time_interval)].copy() if len(available_chords) == 0: available_chords = chord_info.loc[((chord_info['start'] >= curSec) & (chord_info['start'] <= curSec + self.time_interval)) | ((chord_info['end'] >= curSec) & (chord_info['end'] <= curSec + self.time_interval))].copy() if len(available_chords) == 1: chord = available_chords['chord_id'].iloc[0] elif len(available_chords) > 1: max_starts = available_chords.apply(lambda row: max(row['start'], curSec),axis=1) available_chords['max_start'] = max_starts min_ends = available_chords.apply(lambda row: min(row.end, curSec + self.time_interval), axis=1) available_chords['min_end'] = min_ends chords_lengths = available_chords['min_end'] - available_chords['max_start'] available_chords['chord_length'] = chords_lengths chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id'] else: chord = 169 except Exception as e: chord = 169 print(e) print(pid, "no chord") raise RuntimeError() finally: # convert chord by shift factor if chord != 169 and chord != 168: chord += shift_factor * 14 chord = chord % 168 chord_list.append(chord) curSec += self.time_interval if len(chord_list) == self.no_of_chord_datapoints_per_sequence: try: sequence_start_time = current_start_second sequence_end_time = current_start_second + mp3_config['inst_len'] start_index = int(sequence_start_time * mp3_config['song_hz']) end_index = int(sequence_end_time * mp3_config['song_hz']) song_seq = x[start_index:end_index] etc = '%.1f_%.1f' % ( current_start_second, current_start_second + mp3_config['inst_len']) aug = '%.2f_%i' % (stretch_factor, shift_factor) if self.feature_name == FeatureTypes.cqt: feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'], bins_per_octave=feature_config['bins_per_octave'], hop_length=feature_config['hop_length']) else: raise NotImplementedError if feature.shape[1] > self.no_of_chord_datapoints_per_sequence: feature = feature[:, :self.no_of_chord_datapoints_per_sequence] if feature.shape[1] != self.no_of_chord_datapoints_per_sequence: print('loaded features length is too short :', song_name) loop_broken = True j += 1 break result = { 'feature': feature, 'chord': chord_list, 'etc': etc } # save_path, mp3_string, feature_string, song_name, aug.pt filename = aug + "_" + str(idx) + ".pt" torch.save(result, os.path.join(result_path, filename)) idx += 1 total += 1 except Exception as e: print(e) print(pid, "feature error") raise RuntimeError() else: print("invalid number of chord datapoints in sequence :", len(chord_list)) current_start_second += mp3_config['skip_interval'] print(pid, "total instances: %d" % total)