music2emo-youtube-link-ja

Sleeping

File size: 23,371 Bytes

6ad6801

import os
import librosa
from utils.chords import Chords
import re
from enum import Enum
import pyrubberband as pyrb
import torch
import math

class FeatureTypes(Enum):
    cqt = 'cqt'

class Preprocess():
    def __init__(self, config, feature_to_use, dataset_names, root_dir):
        self.config = config
        self.dataset_names = dataset_names
        self.root_path = root_dir + '/'

        self.time_interval = config.feature["hop_length"]/config.mp3["song_hz"]
        self.no_of_chord_datapoints_per_sequence = math.ceil(config.mp3['inst_len'] / self.time_interval)
        self.Chord_class = Chords()

        # isophonic
        self.isophonic_directory = self.root_path + 'isophonic/'

        # uspop
        self.uspop_directory = self.root_path + 'uspop/'
        self.uspop_audio_path = 'audio/'
        self.uspop_lab_path = 'annotations/uspopLabels/'
        self.uspop_index_path = 'annotations/uspopLabels.txt'

        # robbie williams
        self.robbie_williams_directory = self.root_path + 'robbiewilliams/'
        self.robbie_williams_audio_path = 'audio/'
        self.robbie_williams_lab_path = 'chords/'

        self.feature_name = feature_to_use
        self.is_cut_last_chord = False

    def find_mp3_path(self, dirpath, word):
        for filename in os.listdir(dirpath):
            last_dir = dirpath.split("/")[-2]
            if ".mp3" in filename:
                tmp = filename.replace(".mp3", "")
                tmp = tmp.replace(last_dir, "")
                filename_lower = tmp.lower()
                filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower))
                if word.lower().replace(" ", "") in filename_lower.replace(" ", ""):
                    return filename

    def find_mp3_path_robbiewilliams(self, dirpath, word):
        for filename in os.listdir(dirpath):
            if ".mp3" in filename:
                tmp = filename.replace(".mp3", "")
                filename_lower = tmp.lower()
                filename_lower = filename_lower.replace("robbie williams", "")
                filename_lower = " ".join(re.findall("[a-zA-Z]+", filename_lower))
                filename_lower = self.song_pre(filename_lower)
                if self.song_pre(word.lower()).replace(" ", "") in filename_lower.replace(" ", ""):
                    return filename

    def get_all_files(self):
        res_list = []

        # isophonic
        if "isophonic" in self.dataset_names:
            for dirpath, dirnames, filenames in os.walk(self.isophonic_directory):
                if not dirnames:
                    for filename in filenames:
                        if ".lab" in filename:
                            tmp = filename.replace(".lab", "")
                            song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("CD", "")
                            mp3_path = self.find_mp3_path(dirpath, song_name)
                            res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(dirpath, mp3_path),
                                             os.path.join(self.root_path, "result", "isophonic")])

        # uspop
        if "uspop" in self.dataset_names:
            with open(os.path.join(self.uspop_directory, self.uspop_index_path)) as f:
                uspop_lab_list = f.readlines()
            uspop_lab_list = [x.strip() for x in uspop_lab_list]

            for lab_path in uspop_lab_list:
                spl = lab_path.split('/')
                lab_artist = self.uspop_pre(spl[2])
                lab_title = self.uspop_pre(spl[4][3:-4])
                lab_path = lab_path.replace('./uspopLabels/', '')
                lab_path = os.path.join(self.uspop_directory, self.uspop_lab_path, lab_path)

                for filename in os.listdir(os.path.join(self.uspop_directory, self.uspop_audio_path)):
                    if not '.csv' in filename:
                        spl = filename.split('-')
                        mp3_artist = self.uspop_pre(spl[0])
                        mp3_title = self.uspop_pre(spl[1][:-4])

                        if lab_artist == mp3_artist and lab_title == mp3_title:
                            res_list.append([mp3_artist + mp3_title, lab_path,
                                             os.path.join(self.uspop_directory, self.uspop_audio_path, filename),
                                             os.path.join(self.root_path, "result", "uspop")])
                            break

        # robbie williams
        if "robbiewilliams" in self.dataset_names:
            for dirpath, dirnames, filenames in os.walk(self.robbie_williams_directory):
                if not dirnames:
                    for filename in filenames:
                        if ".txt" in filename and (not 'README' in filename):
                            tmp = filename.replace(".txt", "")
                            song_name = " ".join(re.findall("[a-zA-Z]+", tmp)).replace("GTChords", "")
                            mp3_dir = dirpath.replace("chords", "audio")
                            mp3_path = self.find_mp3_path_robbiewilliams(mp3_dir, song_name)
                            res_list.append([song_name, os.path.join(dirpath, filename), os.path.join(mp3_dir, mp3_path),
                                             os.path.join(self.root_path, "result", "robbiewilliams")])
        return res_list

    def uspop_pre(self, text):
        text = text.lower()
        text = text.replace('_', '')
        text = text.replace(' ', '')
        text = " ".join(re.findall("[a-zA-Z]+", text))
        return text

    def song_pre(self, text):
        to_remove = ["'", '`', '(', ')', ' ', '&', 'and', 'And']

        for remove in to_remove:
            text = text.replace(remove, '')

        return text

    def config_to_folder(self):
        mp3_config = self.config.mp3
        feature_config = self.config.feature
        mp3_string = "%d_%.1f_%.1f" % \
                     (mp3_config['song_hz'], mp3_config['inst_len'],
                      mp3_config['skip_interval'])
        feature_string = "%s_%d_%d_%d" % \
                         (self.feature_name.value, feature_config['n_bins'], feature_config['bins_per_octave'], feature_config['hop_length'])

        return mp3_config, feature_config, mp3_string, feature_string

    def generate_labels_features_new(self, all_list):
        pid = os.getpid()
        mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder()

        i = 0  # number of songs
        j = 0  # number of impossible songs
        k = 0  # number of tried songs
        total = 0  # number of generated instances

        stretch_factors = [1.0]
        shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6]

        loop_broken = False
        for song_name, lab_path, mp3_path, save_path in all_list:

            # different song initialization
            if loop_broken:
                loop_broken = False

            i += 1
            print(pid, "generating features from ...", os.path.join(mp3_path))
            if i % 10 == 0:
                print(i, ' th song')

            original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz'])

            # make result path if not exists
            # save_path, mp3_string, feature_string, song_name, aug.pt
            result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip())
            if not os.path.exists(result_path):
                os.makedirs(result_path)

            # calculate result
            for stretch_factor in stretch_factors:
                if loop_broken:
                    loop_broken = False
                    break

                for shift_factor in shift_factors:
                    # for filename
                    idx = 0

                    chord_info = self.Chord_class.get_converted_chord(os.path.join(lab_path))

                    k += 1
                    # stretch original sound and chord info
                    x = pyrb.time_stretch(original_wav, sr, stretch_factor)
                    x = pyrb.pitch_shift(x, sr, shift_factor)
                    audio_length = x.shape[0]
                    chord_info['start'] = chord_info['start'] * 1/stretch_factor
                    chord_info['end'] = chord_info['end'] * 1/stretch_factor

                    last_sec = chord_info.iloc[-1]['end']
                    last_sec_hz = int(last_sec * mp3_config['song_hz'])

                    if audio_length + mp3_config['skip_interval'] < last_sec_hz:
                        print('loaded song is too short :', song_name)
                        loop_broken = True
                        j += 1
                        break
                    elif audio_length > last_sec_hz:
                        x = x[:last_sec_hz]

                    origin_length = last_sec_hz
                    origin_length_in_sec = origin_length / mp3_config['song_hz']

                    current_start_second = 0

                    # get chord list between current_start_second and current+song_length
                    while current_start_second + mp3_config['inst_len'] < origin_length_in_sec:
                        inst_start_sec = current_start_second
                        curSec = current_start_second

                        chord_list = []
                        # extract chord per 1/self.time_interval
                        while curSec < inst_start_sec + mp3_config['inst_len']:
                            try:
                                available_chords = chord_info.loc[(chord_info['start'] <= curSec) & (
                                        chord_info['end'] > curSec + self.time_interval)].copy()
                                if len(available_chords) == 0:
                                    available_chords = chord_info.loc[((chord_info['start'] >= curSec) & (
                                            chord_info['start'] <= curSec + self.time_interval)) | (
                                                                              (chord_info['end'] >= curSec) & (
                                                                              chord_info['end'] <= curSec + self.time_interval))].copy()
                                if len(available_chords) == 1:
                                    chord = available_chords['chord_id'].iloc[0]
                                elif len(available_chords) > 1:
                                    max_starts = available_chords.apply(lambda row: max(row['start'], curSec),
                                                                        axis=1)
                                    available_chords['max_start'] = max_starts
                                    min_ends = available_chords.apply(
                                        lambda row: min(row.end, curSec + self.time_interval), axis=1)
                                    available_chords['min_end'] = min_ends
                                    chords_lengths = available_chords['min_end'] - available_chords['max_start']
                                    available_chords['chord_length'] = chords_lengths
                                    chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id']
                                else:
                                    chord = 24
                            except Exception as e:
                                chord = 24
                                print(e)
                                print(pid, "no chord")
                                raise RuntimeError()
                            finally:
                                # convert chord by shift factor
                                if chord != 24:
                                    chord += shift_factor * 2
                                    chord = chord % 24

                                chord_list.append(chord)
                                curSec += self.time_interval

                        if len(chord_list) == self.no_of_chord_datapoints_per_sequence:
                            try:
                                sequence_start_time = current_start_second
                                sequence_end_time = current_start_second + mp3_config['inst_len']

                                start_index = int(sequence_start_time * mp3_config['song_hz'])
                                end_index = int(sequence_end_time * mp3_config['song_hz'])

                                song_seq = x[start_index:end_index]

                                etc = '%.1f_%.1f' % (
                                    current_start_second, current_start_second + mp3_config['inst_len'])
                                aug = '%.2f_%i' % (stretch_factor, shift_factor)

                                if self.feature_name == FeatureTypes.cqt:
                                    # print(pid, "make feature")
                                    feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'],
                                                          bins_per_octave=feature_config['bins_per_octave'],
                                                          hop_length=feature_config['hop_length'])
                                else:
                                    raise NotImplementedError

                                if feature.shape[1] > self.no_of_chord_datapoints_per_sequence:
                                    feature = feature[:, :self.no_of_chord_datapoints_per_sequence]

                                if feature.shape[1] != self.no_of_chord_datapoints_per_sequence:
                                    print('loaded features length is too short :', song_name)
                                    loop_broken = True
                                    j += 1
                                    break

                                result = {
                                    'feature': feature,
                                    'chord': chord_list,
                                    'etc': etc
                                }

                                # save_path, mp3_string, feature_string, song_name, aug.pt
                                filename = aug + "_" + str(idx) + ".pt"
                                torch.save(result, os.path.join(result_path, filename))
                                idx += 1
                                total += 1
                            except Exception as e:
                                print(e)
                                print(pid, "feature error")
                                raise RuntimeError()
                        else:
                            print("invalid number of chord datapoints in sequence :", len(chord_list))
                        current_start_second += mp3_config['skip_interval']
        print(pid, "total instances: %d" % total)

    def generate_labels_features_voca(self, all_list):
        pid = os.getpid()
        mp3_config, feature_config, mp3_str, feature_str = self.config_to_folder()

        i = 0  # number of songs
        j = 0  # number of impossible songs
        k = 0  # number of tried songs
        total = 0  # number of generated instances
        stretch_factors = [1.0]
        shift_factors = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6]

        loop_broken = False
        for song_name, lab_path, mp3_path, save_path in all_list:
            save_path = save_path + '_voca'

            # different song initialization
            if loop_broken:
                loop_broken = False

            i += 1
            print(pid, "generating features from ...", os.path.join(mp3_path))
            if i % 10 == 0:
                print(i, ' th song')

            original_wav, sr = librosa.load(os.path.join(mp3_path), sr=mp3_config['song_hz'])

            # save_path, mp3_string, feature_string, song_name, aug.pt
            result_path = os.path.join(save_path, mp3_str, feature_str, song_name.strip())
            if not os.path.exists(result_path):
                os.makedirs(result_path)

            # calculate result
            for stretch_factor in stretch_factors:
                if loop_broken:
                    loop_broken = False
                    break

                for shift_factor in shift_factors:
                    # for filename
                    idx = 0

                    try:
                        chord_info = self.Chord_class.get_converted_chord_voca(os.path.join(lab_path))
                    except Exception as e:
                        print(e)
                        print(pid, " chord lab file error : %s" % song_name)
                        loop_broken = True
                        j += 1
                        break

                    k += 1
                    # stretch original sound and chord info
                    x = pyrb.time_stretch(original_wav, sr, stretch_factor)
                    x = pyrb.pitch_shift(x, sr, shift_factor)
                    audio_length = x.shape[0]
                    chord_info['start'] = chord_info['start'] * 1/stretch_factor
                    chord_info['end'] = chord_info['end'] * 1/stretch_factor

                    last_sec = chord_info.iloc[-1]['end']
                    last_sec_hz = int(last_sec * mp3_config['song_hz'])

                    if audio_length + mp3_config['skip_interval'] < last_sec_hz:
                        print('loaded song is too short :', song_name)
                        loop_broken = True
                        j += 1
                        break
                    elif audio_length > last_sec_hz:
                        x = x[:last_sec_hz]

                    origin_length = last_sec_hz
                    origin_length_in_sec = origin_length / mp3_config['song_hz']

                    current_start_second = 0

                    # get chord list between current_start_second and current+song_length
                    while current_start_second + mp3_config['inst_len'] < origin_length_in_sec:
                        inst_start_sec = current_start_second
                        curSec = current_start_second

                        chord_list = []
                        # extract chord per 1/self.time_interval
                        while curSec < inst_start_sec + mp3_config['inst_len']:
                            try:
                                available_chords = chord_info.loc[(chord_info['start'] <= curSec) & (chord_info['end'] > curSec + self.time_interval)].copy()
                                if len(available_chords) == 0:
                                    available_chords = chord_info.loc[((chord_info['start'] >= curSec) & (chord_info['start'] <= curSec + self.time_interval)) | ((chord_info['end'] >= curSec) & (chord_info['end'] <= curSec + self.time_interval))].copy()

                                if len(available_chords) == 1:
                                    chord = available_chords['chord_id'].iloc[0]
                                elif len(available_chords) > 1:
                                    max_starts = available_chords.apply(lambda row: max(row['start'], curSec),axis=1)
                                    available_chords['max_start'] = max_starts
                                    min_ends = available_chords.apply(lambda row: min(row.end, curSec + self.time_interval), axis=1)
                                    available_chords['min_end'] = min_ends
                                    chords_lengths = available_chords['min_end'] - available_chords['max_start']
                                    available_chords['chord_length'] = chords_lengths
                                    chord = available_chords.ix[available_chords['chord_length'].idxmax()]['chord_id']
                                else:
                                    chord = 169
                            except Exception as e:
                                chord = 169
                                print(e)
                                print(pid, "no chord")
                                raise RuntimeError()
                            finally:
                                # convert chord by shift factor
                                if chord != 169 and chord != 168:
                                    chord += shift_factor * 14
                                    chord = chord % 168

                                chord_list.append(chord)
                                curSec += self.time_interval

                        if len(chord_list) == self.no_of_chord_datapoints_per_sequence:
                            try:
                                sequence_start_time = current_start_second
                                sequence_end_time = current_start_second + mp3_config['inst_len']

                                start_index = int(sequence_start_time * mp3_config['song_hz'])
                                end_index = int(sequence_end_time * mp3_config['song_hz'])

                                song_seq = x[start_index:end_index]

                                etc = '%.1f_%.1f' % (
                                    current_start_second, current_start_second + mp3_config['inst_len'])
                                aug = '%.2f_%i' % (stretch_factor, shift_factor)

                                if self.feature_name == FeatureTypes.cqt:
                                    feature = librosa.cqt(song_seq, sr=sr, n_bins=feature_config['n_bins'],
                                                          bins_per_octave=feature_config['bins_per_octave'],
                                                          hop_length=feature_config['hop_length'])
                                else:
                                    raise NotImplementedError

                                if feature.shape[1] > self.no_of_chord_datapoints_per_sequence:
                                    feature = feature[:, :self.no_of_chord_datapoints_per_sequence]

                                if feature.shape[1] != self.no_of_chord_datapoints_per_sequence:
                                    print('loaded features length is too short :', song_name)
                                    loop_broken = True
                                    j += 1
                                    break

                                result = {
                                    'feature': feature,
                                    'chord': chord_list,
                                    'etc': etc
                                }

                                # save_path, mp3_string, feature_string, song_name, aug.pt
                                filename = aug + "_" + str(idx) + ".pt"
                                torch.save(result, os.path.join(result_path, filename))
                                idx += 1
                                total += 1
                            except Exception as e:
                                print(e)
                                print(pid, "feature error")
                                raise RuntimeError()
                        else:
                            print("invalid number of chord datapoints in sequence :", len(chord_list))
                        current_start_second += mp3_config['skip_interval']
        print(pid, "total instances: %d" % total)