File size: 6,566 Bytes
a8cb0a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import numpy as np
import librosa
import IPython.display as ipd
import matplotlib.pyplot as plt
# import audioread
# import time
# import soundfile as sf
# def read_audio(path):
# try:
# if path[-4:] == '.ogg':
# y, sr_native = sf.read(path)
# else:
# buf = []
# with audioread.audio_open(path) as input_file:
# sr_native = input_file.samplerate
# n_channels = input_file.channels
# for frame in input_file:
# frame = (1.0 / float(1 << 15)) * np.frombuffer(frame, f"<i{2:d}").astype(np.float32)
# buf.append(frame)
# y = np.concatenate(buf)
# if n_channels > 1:
# y = y.reshape((-1, n_channels)).T
# y = np.mean(y, axis=tuple(range(y.ndim - 1)))
# y = librosa.resample(y, orig_sr=sr_native, target_sr=22050, res_type="soxr_hq")
# return y, 22050
# except Exception as e:
# print(f"Error reading audio file: {e}")
# return None, None
class MyAudio:
def __init__(self, details, audioValues):
self.details = details
self.audioValues = audioValues
@staticmethod
def combineTwoAudios(audio1, audio2):
details = audio1.details.copy()
details.extend(audio2.details)
audioValues = AudioManipulator.joinDiffAudiosValues(
[audio1.audioValues, audio2.audioValues]
)
return MyAudio(details, audioValues)
@staticmethod
def changeAudioToFFT(audio):
return MyAudio(audio.details, librosa.stft(audio.audioValues.copy()))
@staticmethod
def compareTwoFFTAudios(audio1, audio2):
audio1Values = np.abs(audio1.audioValues)
audio2Values = np.abs(audio2.audioValues)
if audio1Values.shape[1] > audio2Values.shape[1]:
audio1Values, audio2Values = audio2Values, audio1Values
audio2Values = audio2Values[:, : audio1Values.shape[1]]
norm = np.linalg.norm(audio1Values) * np.linalg.norm(audio2Values)
if norm == 0:
return 0
return np.dot(audio1Values.flatten(), audio2Values.flatten()) / norm
class AudioManipulator:
# def __init__(self):
# self.n_mels = 128 * 2
@staticmethod
def addAudioValuesInDuration(audioValues1, audioValues2, timeSt, sr):
indexSt = min(len(audioValues1) - 1, int(timeSt / 1000 * sr))
indexEd = min(len(audioValues1), indexSt + len(audioValues2))
for index in range(indexSt, indexEd):
audioValues1[index] += audioValues2[index - indexSt]
return audioValues1
@staticmethod
def joinDiffAudiosValues(audiosValues):
mx = -1
for i in range(len(audiosValues)):
mx = max(mx, len(audiosValues[i]))
for i in range(len(audiosValues)):
if len(audiosValues[i]) < mx:
audiosValues[i] = np.concatenate(
(audiosValues[i], np.zeros(int(mx - len(audiosValues[i]))))
)
return np.sum(audiosValues, axis=0)
@staticmethod
def getAudioValuesInterface(audioValues):
return ipd.Audio(audioValues)
@staticmethod
def splitAudioValues(audioValues, sr, start_time, end_time):
audioValues = audioValues[
int(sr * start_time / 1000) : int(sr * end_time / 1000)
]
return audioValues
@staticmethod
def shiftPitchOfAudioValues(audioValues, sr, pitch_shift):
audio_with_pitch_shift = librosa.effects.pitch_shift(
audioValues, sr=sr, n_steps=pitch_shift
)
return audio_with_pitch_shift
@staticmethod
def calculateAmplitudeShiftOfAudioValues(audioValues1, audioValues2, mode):
if mode == "Max":
peak_amplitude1 = np.max(np.abs(audioValues1))
peak_amplitude2 = np.max(np.abs(audioValues2))
elif mode == "Mean":
peak_amplitude1 = np.mean(np.abs(audioValues1))
peak_amplitude2 = np.mean(np.abs(audioValues2))
scaling_factor = peak_amplitude1 / peak_amplitude2
return round(scaling_factor, 2)
@staticmethod
def getStftAndStftDb(audioValues):
stft = librosa.stft(audioValues)
stft_db = librosa.amplitude_to_db(abs(stft))
return stft, stft_db
@staticmethod
def getMelSpectogram(audioValues, sr):
mel_spec = librosa.feature.melspectrogram(
y=audioValues, sr=sr, n_mels=128*2
)
mel_spec_db = librosa.amplitude_to_db(mel_spec) # ref = np.max
return mel_spec, mel_spec_db
@staticmethod
def getChromaGram(audioValues, sr):
chromaGram = librosa.feature.chroma_stft(
y=audioValues, sr=sr, hop_length=12
)
return chromaGram
@staticmethod
def drawAudioValues(audioValues, sr):
plt.figure(figsize=(8.8, 3))
plt.plot([(i + 1) / sr for i in range(len(audioValues))], audioValues)
plt.title("Raw Audio Example")
plt.show()
@staticmethod
def drawAudioValuesSpectrum(audioValues, sr):
X, Xdb = AudioManipulator.getStft(audioValues)
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis="time", y_axis="log")
plt.colorbar()
plt.show()
def drawAudioValuesSpectrumNormalized(audioValues, sr):
X, Xdb = AudioManipulator.getStft(audioValues / audioValues.max() * 32767.00)
plt.figure(figsize=(14, 5))
librosa.display.specshow(Xdb, sr=sr, x_axis="time", y_axis="log")
plt.colorbar()
plt.show()
@staticmethod
def drawMelSpectrogram(audioValues, sr):
S, S_db_mel = AudioManipulator.getMelSpectogram(audioValues, sr)
fig, ax = plt.subplots(figsize=(10, 3))
img = librosa.display.specshow(S_db_mel, x_axis="time", y_axis="log", ax=ax)
ax.set_title("Mel Spectogram Example", fontsize=20)
fig.colorbar(img, ax=ax, format=f"%0.2f")
plt.show()
@staticmethod
def drawChromaGram(audioValues, sr):
chromagram = AudioManipulator.getChromaGram(audioValues, sr)
plt.figure(figsize=(15, 5))
librosa.display.specshow(
chromagram, x_axis="time", y_axis="chroma", hop_length=12, cmap="coolwarm"
)
if __name__ == "__main__":
print(
"This is a library for Audio Manipulation via fourier transform made specificaly for minecraft audio production using note blocks"
)
print("Author -: Rajat Bansal, IIT Mandi, B20123")
|