music2emo-youtube-link-ja

Running

App Files Files Community

music2emo-youtube-link-ja / utils /preprocess.py

kjysmu

Upload 22 files

6ad6801 verified 5 months ago

raw

history blame

23.4 kB

	import os
	import librosa
	from utils.chords import Chords
	import re
	from enum import Enum
	import pyrubberband as pyrb
	import torch
	import math

	class FeatureTypes(Enum):
	cqt = 'cqt'

	class Preprocess():
	def __init__(self, config, feature_to_use, dataset_names, root_dir):
	self.config = config
	self.dataset_names = dataset_names
	self.root_path = root_dir + '/'

	self.time_interval = config.feature["hop_length"]/config.mp3["song_hz"]
	self.no_of_chord_datapoints_per_sequence = math.ceil(config.mp3['inst_len'] / self.time_interval)
	self.Chord_class = Chords()

	# isophonic
	self.isophonic_directory = self.root_path + 'isophonic/'

	# uspop
	self.uspop_directory = self.root_path + 'uspop/'
	self.uspop_audio_path = 'audio/'
	self.uspop_lab_path = 'annotations/uspopLabels/'
	self.uspop_index_path = 'annotations/uspopLabels.txt'

	# robbie williams
	self.robbie_williams_directory = self.root_path + 'robbiewilliams/'
	self.robbie_williams_audio_path = 'audio/'
	self.robbie_williams_lab_path = 'chords/'

	self.feature_name = feature_to_use
	self.is_cut_last_chord = False

	def find_mp3_path(self, dirpath, word):
	for filename in os.listdir(dirpath):
	last_dir = dirpath.split("/")[-2]
	if ".mp3" in filename:
	tmp = filename.replace(".mp3", "")
	tmp = tmp.replace(last_dir, "")
	filename_lower = tmp.lower()
	filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower))
	if word.lower().replace(" ", "") in filename_lower.replace(" ", ""):
	return filename

	def find_mp3_path_robbiewilliams(self, dirpath, word):
	for filename in os.listdir(dirpath):
	if ".mp3" in filename:
	tmp = filename.replace(".mp3", "")
	filename_lower = tmp.lower()
	filename_lower = filename_lower.replace("robbie williams", "")
	filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower))
	filename_lower = self.song_pre(filename_lower)
	if self.song_pre(word.lower()).replace(" ", "") in filename_lower.replace(" ", ""):
	return filename

	def get_all_files(self):
	res_list = []

	# isophonic
	if "isophonic" in self.dataset_names:
	for dirpath, dirnames, filenames in os.walk(self.isophonic_directory):
	if not dirnames:
	for filename in filenames:
	if ".lab" in filename:
	tmp = filename.replace(".lab", "")
	song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("CD", "")
	mp3_path = self.find_mp3_path(dirpath, song_name)
	res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(dirpath, mp3_path),
	os.path.join(self.root_path, "result", "isophonic")])

	# uspop
	if "uspop" in self.dataset_names:
	with open(os.path.join(self.uspop_directory, self.uspop_index_path)) as f:
	uspop_lab_list = f.readlines()
	uspop_lab_list = [x.strip() for x in uspop_lab_list]

	for lab_path in uspop_lab_list:
	spl = lab_path.split('/')
	lab_artist = self.uspop_pre(spl[2])
	lab_title = self.uspop_pre(spl[4][3:-4])
	lab_path = lab_path.replace('./uspopLabels/', '')
	lab_path = os.path.join(self.uspop_directory, self.uspop_lab_path, lab_path)

	for filename in os.listdir(os.path.join(self.uspop_directory, self.uspop_audio_path)):
	if not '.csv' in filename:
	spl = filename.split('-')
	mp3_artist = self.uspop_pre(spl[0])
	mp3_title = self.uspop_pre(spl[1][:-4])

	if lab_artist == mp3_artist and lab_title == mp3_title:
	res_list.append([mp3_artist + mp3_title, lab_path,
	os.path.join(self.uspop_directory, self.uspop_audio_path, filename),
	os.path.join(self.root_path, "result", "uspop")])
	break

	# robbie williams
	if "robbiewilliams" in self.dataset_names:
	for dirpath, dirnames, filenames in os.walk(self.robbie_williams_directory):
	if not dirnames:
	for filename in filenames:
	if ".txt" in filename and (not 'README' in filename):
	tmp = filename.replace(".txt", "")
	song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("GTChords", "")
	mp3_dir = dirpath.replace("chords", "audio")
	mp3_path = self.find_mp3_path_robbiewilliams(mp3_dir, song_name)
	res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(mp3_dir, mp3_path),
	os.path.join(self.root_path, "result", "robbiewilliams")])
	return res_list

	def uspop_pre(self, text):
	text = text.lower()
	text = text.replace('_', '')
	text = text.replace(' ', '')
	text = " ".join(re.findall("[a-zA-Z]+", text))
	return text

	def song_pre(self, text):
	to_remove = ["'", '`', '(', ')', ' ', '&', 'and', 'And']

	for remove in to_remove:
	text = text.replace(remove, '')

	return text

	def config_to_folder(self):
	mp3_config = self.config.mp3
	feature_config = self.config.feature
	mp3_string = "%d_%.1f_%.1f" % \
	(mp3_config['song_hz'], mp3_config['inst_len'],
	mp3_config['skip_interval'])
	feature_string = "%s_%d_%d_%d" % \
	(self.feature_name.value, feature_config['n_bins'], feature_config['bins_per_octave'], feature_config['hop_length'])

	return mp3_config, feature_config, mp3_string, feature_string

	def generate_labels_features_new(self, all_list):
	pid = os.getpid()
	mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder()

	i = 0 # number of songs
	j = 0 # number of impossible songs
	k = 0 # number of tried songs
	total = 0 # number of generated instances

	stretch_factors = [1.0]
	shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6]

	loop_broken = False
	for song_name, lab_path, mp3_path, save_path in all_list:

	# different song initialization
	if loop_broken:
	loop_broken = False

	i += 1
	print(pid, "generating features from ...", os.path.join(mp3_path))
	if i % 10 == 0:
	print(i, ' th song')

	original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz'])

	# make result path if not exists
	# save_path, mp3_string, feature_string, song_name, aug.pt
	result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip())
	if not os.path.exists(result_path):
	os.makedirs(result_path)

	# calculate result
	for stretch_factor in stretch_factors:
	if loop_broken:
	loop_broken = False
	break

	for shift_factor in shift_factors:
	# for filename
	idx = 0

	chord_info = self.Chord_class.get_converted_chord(os.path.join(lab_path))

	k += 1
	# stretch original sound and chord info
	x = pyrb.time_stretch(original_wav, sr, stretch_factor)
	x = pyrb.pitch_shift(x, sr, shift_factor)
	audio_length = x.shape[0]
	chord_info['start'] = chord_info['start'] * 1/stretch_factor
	chord_info['end'] = chord_info['end'] * 1/stretch_factor

	last_sec = chord_info.iloc[-1]['end']
	last_sec_hz = int(last_sec * mp3_config['song_hz'])

	if audio_length + mp3_config['skip_interval'] < last_sec_hz:
	print('loaded song is too short :', song_name)
	loop_broken = True
	j += 1
	break
	elif audio_length > last_sec_hz:
	x = x[:last_sec_hz]

	origin_length = last_sec_hz
	origin_length_in_sec = origin_length / mp3_config['song_hz']

	current_start_second = 0

	# get chord list between current_start_second and current+song_length
	while current_start_second + mp3_config['inst_len'] < origin_length_in_sec:
	inst_start_sec = current_start_second
	curSec = current_start_second

	chord_list = []
	# extract chord per 1/self.time_interval
	while curSec < inst_start_sec + mp3_config['inst_len']:
	try:
	available_chords = chord_info.loc[(chord_info['start'] <= curSec) & (
	chord_info['end'] > curSec + self.time_interval)].copy()
	if len(available_chords) == 0:
	available_chords = chord_info.loc[((chord_info['start'] >= curSec) & (
	chord_info['start'] <= curSec + self.time_interval)) \| (
	(chord_info['end'] >= curSec) & (
	chord_info['end'] <= curSec + self.time_interval))].copy()
	if len(available_chords) == 1:
	chord = available_chords['chord_id'].iloc[0]
	elif len(available_chords) > 1:
	max_starts = available_chords.apply(lambda row: max(row['start'], curSec),
	axis=1)
	available_chords['max_start'] = max_starts
	min_ends = available_chords.apply(
	lambda row: min(row.end, curSec + self.time_interval), axis=1)
	available_chords['min_end'] = min_ends
	chords_lengths = available_chords['min_end'] - available_chords['max_start']
	available_chords['chord_length'] = chords_lengths
	chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id']
	else:
	chord = 24
	except Exception as e:
	chord = 24
	print(e)
	print(pid, "no chord")
	raise RuntimeError()
	finally:
	# convert chord by shift factor
	if chord != 24:
	chord += shift_factor * 2
	chord = chord % 24

	chord_list.append(chord)
	curSec += self.time_interval

	if len(chord_list) == self.no_of_chord_datapoints_per_sequence:
	try:
	sequence_start_time = current_start_second
	sequence_end_time = current_start_second + mp3_config['inst_len']

	start_index = int(sequence_start_time * mp3_config['song_hz'])
	end_index = int(sequence_end_time * mp3_config['song_hz'])

	song_seq = x[start_index:end_index]

	etc = '%.1f_%.1f' % (
	current_start_second, current_start_second + mp3_config['inst_len'])
	aug = '%.2f_%i' % (stretch_factor, shift_factor)

	if self.feature_name == FeatureTypes.cqt:
	# print(pid, "make feature")
	feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'],
	bins_per_octave=feature_config['bins_per_octave'],
	hop_length=feature_config['hop_length'])
	else:
	raise NotImplementedError

	if feature.shape[1] > self.no_of_chord_datapoints_per_sequence:
	feature = feature[:, :self.no_of_chord_datapoints_per_sequence]

	if feature.shape[1] != self.no_of_chord_datapoints_per_sequence:
	print('loaded features length is too short :', song_name)
	loop_broken = True
	j += 1
	break

	result = {
	'feature': feature,
	'chord': chord_list,
	'etc': etc
	}

	# save_path, mp3_string, feature_string, song_name, aug.pt
	filename = aug + "_" + str(idx) + ".pt"
	torch.save(result, os.path.join(result_path, filename))
	idx += 1
	total += 1
	except Exception as e:
	print(e)
	print(pid, "feature error")
	raise RuntimeError()
	else:
	print("invalid number of chord datapoints in sequence :", len(chord_list))
	current_start_second += mp3_config['skip_interval']
	print(pid, "total instances: %d" % total)

	def generate_labels_features_voca(self, all_list):
	pid = os.getpid()
	mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder()

	i = 0 # number of songs
	j = 0 # number of impossible songs
	k = 0 # number of tried songs
	total = 0 # number of generated instances
	stretch_factors = [1.0]
	shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6]

	loop_broken = False
	for song_name, lab_path, mp3_path, save_path in all_list:
	save_path = save_path + '_voca'

	# different song initialization
	if loop_broken:
	loop_broken = False

	i += 1
	print(pid, "generating features from ...", os.path.join(mp3_path))
	if i % 10 == 0:
	print(i, ' th song')

	original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz'])

	# save_path, mp3_string, feature_string, song_name, aug.pt
	result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip())
	if not os.path.exists(result_path):
	os.makedirs(result_path)

	# calculate result
	for stretch_factor in stretch_factors:
	if loop_broken:
	loop_broken = False
	break

	for shift_factor in shift_factors:
	# for filename
	idx = 0

	try:
	chord_info = self.Chord_class.get_converted_chord_voca(os.path.join(lab_path))
	except Exception as e:
	print(e)
	print(pid, " chord lab file error : %s" % song_name)
	loop_broken = True
	j += 1
	break

	k += 1
	# stretch original sound and chord info
	x = pyrb.time_stretch(original_wav, sr, stretch_factor)
	x = pyrb.pitch_shift(x, sr, shift_factor)
	audio_length = x.shape[0]
	chord_info['start'] = chord_info['start'] * 1/stretch_factor
	chord_info['end'] = chord_info['end'] * 1/stretch_factor

	last_sec = chord_info.iloc[-1]['end']
	last_sec_hz = int(last_sec * mp3_config['song_hz'])

	if audio_length + mp3_config['skip_interval'] < last_sec_hz:
	print('loaded song is too short :', song_name)
	loop_broken = True
	j += 1
	break
	elif audio_length > last_sec_hz:
	x = x[:last_sec_hz]

	origin_length = last_sec_hz
	origin_length_in_sec = origin_length / mp3_config['song_hz']

	current_start_second = 0

	# get chord list between current_start_second and current+song_length
	while current_start_second + mp3_config['inst_len'] < origin_length_in_sec:
	inst_start_sec = current_start_second
	curSec = current_start_second

	chord_list = []
	# extract chord per 1/self.time_interval
	while curSec < inst_start_sec + mp3_config['inst_len']:
	try:
	available_chords = chord_info.loc[(chord_info['start'] <= curSec) & (chord_info['end'] > curSec + self.time_interval)].copy()
	if len(available_chords) == 0:
	available_chords = chord_info.loc[((chord_info['start'] >= curSec) & (chord_info['start'] <= curSec + self.time_interval)) \| ((chord_info['end'] >= curSec) & (chord_info['end'] <= curSec + self.time_interval))].copy()

	if len(available_chords) == 1:
	chord = available_chords['chord_id'].iloc[0]
	elif len(available_chords) > 1:
	max_starts = available_chords.apply(lambda row: max(row['start'], curSec),axis=1)
	available_chords['max_start'] = max_starts
	min_ends = available_chords.apply(lambda row: min(row.end, curSec + self.time_interval), axis=1)
	available_chords['min_end'] = min_ends
	chords_lengths = available_chords['min_end'] - available_chords['max_start']
	available_chords['chord_length'] = chords_lengths
	chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id']
	else:
	chord = 169
	except Exception as e:
	chord = 169
	print(e)
	print(pid, "no chord")
	raise RuntimeError()
	finally:
	# convert chord by shift factor
	if chord != 169 and chord != 168:
	chord += shift_factor * 14
	chord = chord % 168

	chord_list.append(chord)
	curSec += self.time_interval

	if len(chord_list) == self.no_of_chord_datapoints_per_sequence:
	try:
	sequence_start_time = current_start_second
	sequence_end_time = current_start_second + mp3_config['inst_len']

	start_index = int(sequence_start_time * mp3_config['song_hz'])
	end_index = int(sequence_end_time * mp3_config['song_hz'])

	song_seq = x[start_index:end_index]

	etc = '%.1f_%.1f' % (
	current_start_second, current_start_second + mp3_config['inst_len'])
	aug = '%.2f_%i' % (stretch_factor, shift_factor)

	if self.feature_name == FeatureTypes.cqt:
	feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'],
	bins_per_octave=feature_config['bins_per_octave'],
	hop_length=feature_config['hop_length'])
	else:
	raise NotImplementedError

	if feature.shape[1] > self.no_of_chord_datapoints_per_sequence:
	feature = feature[:, :self.no_of_chord_datapoints_per_sequence]

	if feature.shape[1] != self.no_of_chord_datapoints_per_sequence:
	print('loaded features length is too short :', song_name)
	loop_broken = True
	j += 1
	break

	result = {
	'feature': feature,
	'chord': chord_list,
	'etc': etc
	}

	# save_path, mp3_string, feature_string, song_name, aug.pt
	filename = aug + "_" + str(idx) + ".pt"
	torch.save(result, os.path.join(result_path, filename))
	idx += 1
	total += 1
	except Exception as e:
	print(e)
	print(pid, "feature error")
	raise RuntimeError()
	else:
	print("invalid number of chord datapoints in sequence :", len(chord_list))
	current_start_second += mp3_config['skip_interval']
	print(pid, "total instances: %d" % total)