|
from .RajatsMinecraftLibrary.audio import MyAudio, AudioManipulator |
|
import librosa |
|
import configparser |
|
import json |
|
import pickle as pkl |
|
import argparse |
|
from collections import deque |
|
import numpy as np |
|
import soundfile as sf |
|
import os |
|
|
|
config = configparser.ConfigParser() |
|
script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
config.read(os.path.join(script_dir, 'config.ini')) |
|
|
|
|
|
def preProcess( |
|
mainAudioValues, |
|
sr, |
|
instruments_dict, |
|
scaling_dict, |
|
initialBestMatchesLength, |
|
simThresh, |
|
binLength, |
|
sounds_file_path, |
|
amplitudeMode, |
|
): |
|
startTime = 0 |
|
result = [] |
|
resAudioValues = np.zeros(len(mainAudioValues)) |
|
|
|
while startTime < 1000 * len(mainAudioValues) / sr: |
|
|
|
resAudio = MyAudio( |
|
[{"fileName": "resFile", "pitchShift": 0, "ASF": 1}], |
|
AudioManipulator.splitAudioValues( |
|
resAudioValues, sr, startTime, startTime + binLength |
|
), |
|
) |
|
mainAudio = MyAudio( |
|
[{"fileName": "targetFile", "pitchShift": 0, "ASF": 1}], |
|
AudioManipulator.splitAudioValues( |
|
mainAudioValues, sr, startTime, startTime + binLength |
|
), |
|
) |
|
|
|
|
|
initialBestMatches = [] |
|
for instrument in instruments_dict: |
|
rng = instruments_dict[instrument] |
|
audioValues, sr = librosa.load(os.path.join(script_dir, "Instruments/" + instrument)) |
|
audioValues *= scaling_dict[instrument] |
|
for pitchShift in range(rng[0], rng[1] + 1): |
|
asf = AudioManipulator.calculateAmplitudeShiftOfAudioValues( |
|
mainAudio.audioValues, AudioManipulator.shiftPitchOfAudioValues( |
|
audioValues, sr, pitchShift |
|
), amplitudeMode |
|
) |
|
pitchShiftedAudio = MyAudio( |
|
[{"instrument": instrument, "pitchShift": pitchShift, "ASF": asf}], |
|
AudioManipulator.shiftPitchOfAudioValues( |
|
audioValues, sr, pitchShift |
|
) |
|
* asf, |
|
) |
|
combinedAudio = MyAudio.combineTwoAudios(resAudio, pitchShiftedAudio) |
|
sim = MyAudio.compareTwoFFTAudios( |
|
MyAudio.changeAudioToFFT(mainAudio), |
|
MyAudio.changeAudioToFFT(combinedAudio), |
|
) |
|
initialBestMatches.append( |
|
{ |
|
"similarity": round(sim, 2), |
|
"instrument": instrument, |
|
"pitchShift": pitchShift, |
|
"ASF": asf, |
|
} |
|
) |
|
initialBestMatches = sorted( |
|
initialBestMatches, key=lambda x: x["similarity"], reverse=True |
|
) |
|
|
|
|
|
combinationsQueue = deque() |
|
ogAudios = [] |
|
mxIndex = initialBestMatchesLength |
|
for idx, note in enumerate(initialBestMatches[:initialBestMatchesLength]): |
|
audioValues, _ = librosa.load(os.path.join(script_dir, f'Instruments/{note["instrument"]}')) |
|
audioValues *= scaling_dict[note["instrument"]] |
|
audio = MyAudio( |
|
[ |
|
{ |
|
"instrument": note["instrument"], |
|
"pitchShift": note["pitchShift"], |
|
"ASF": note["ASF"], |
|
} |
|
], |
|
AudioManipulator.shiftPitchOfAudioValues( |
|
audioValues, sr, note["pitchShift"] |
|
) |
|
* note["ASF"], |
|
) |
|
|
|
ogAudios.append(audio) |
|
combinationsQueue.append({"idx": idx, "audio": audio}) |
|
combinationSimilarities = [] |
|
while len(combinationsQueue): |
|
combination = combinationsQueue.popleft() |
|
|
|
sim = MyAudio.compareTwoFFTAudios( |
|
MyAudio.changeAudioToFFT(mainAudio), |
|
MyAudio.changeAudioToFFT( |
|
MyAudio.combineTwoAudios(resAudio, combination["audio"]) |
|
), |
|
) |
|
combinationSimilarities.append( |
|
{ |
|
"similarity": round(sim, 2), |
|
"combination": combination["audio"].details, |
|
} |
|
) |
|
for combinableAudioId in range(combination["idx"] + 1, mxIndex): |
|
combinationsQueue.append( |
|
{ |
|
"idx": combinableAudioId, |
|
"audio": MyAudio.combineTwoAudios( |
|
combination["audio"], ogAudios[combinableAudioId] |
|
), |
|
} |
|
) |
|
combinationSimilarities = sorted( |
|
combinationSimilarities, key=lambda x: x["similarity"], reverse=True |
|
) |
|
|
|
|
|
bestMatch = combinationSimilarities[0] |
|
result.append((startTime, bestMatch)) |
|
if bestMatch["similarity"] >= simThresh: |
|
for instrumentDetails in bestMatch["combination"]: |
|
instrumentAudioValues, _ = librosa.load( |
|
os.path.join(script_dir, f'Instruments/{instrumentDetails["instrument"]}') |
|
) |
|
instrumentAudioValues *= scaling_dict[instrumentDetails["instrument"]] |
|
|
|
instrumentAudioValues = ( |
|
AudioManipulator.shiftPitchOfAudioValues( |
|
instrumentAudioValues, sr, instrumentDetails["pitchShift"] |
|
) |
|
* instrumentDetails["ASF"] |
|
) |
|
resAudioValues = AudioManipulator.addAudioValuesInDuration( |
|
resAudioValues, instrumentAudioValues, startTime, sr |
|
) |
|
|
|
|
|
|
|
if startTime % 1000 == 0: |
|
|
|
|
|
sf.write( |
|
sounds_file_path, resAudioValues, sr |
|
) |
|
startTime += binLength |
|
|
|
|
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|