File size: 6,566 Bytes
a8cb0a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import numpy as np
import librosa
import IPython.display as ipd
import matplotlib.pyplot as plt
# import audioread
# import time
# import soundfile as sf

# def read_audio(path):
#     try:
#         if path[-4:] == '.ogg':
#             y, sr_native = sf.read(path)
#         else:
#             buf = []
#             with audioread.audio_open(path) as input_file:
#                 sr_native = input_file.samplerate
#                 n_channels = input_file.channels

#                 for frame in input_file:
#                     frame = (1.0 / float(1 << 15)) * np.frombuffer(frame, f"<i{2:d}").astype(np.float32)
#                     buf.append(frame)

#                 y = np.concatenate(buf)
#                 if n_channels > 1:
#                     y = y.reshape((-1, n_channels)).T
#                 y = np.mean(y, axis=tuple(range(y.ndim - 1)))
#         y = librosa.resample(y, orig_sr=sr_native, target_sr=22050, res_type="soxr_hq")
#         return y, 22050

#     except Exception as e:
#         print(f"Error reading audio file: {e}")
#         return None, None

class MyAudio:
    def __init__(self, details, audioValues):
        self.details = details
        self.audioValues = audioValues

    @staticmethod
    def combineTwoAudios(audio1, audio2):
        details = audio1.details.copy()
        details.extend(audio2.details)
        audioValues = AudioManipulator.joinDiffAudiosValues(
            [audio1.audioValues, audio2.audioValues]
        )
        return MyAudio(details, audioValues)

    @staticmethod
    def changeAudioToFFT(audio):
        return MyAudio(audio.details, librosa.stft(audio.audioValues.copy()))

    @staticmethod
    def compareTwoFFTAudios(audio1, audio2):
        audio1Values = np.abs(audio1.audioValues)
        audio2Values = np.abs(audio2.audioValues)
        if audio1Values.shape[1] > audio2Values.shape[1]:
            audio1Values, audio2Values = audio2Values, audio1Values
        audio2Values = audio2Values[:, : audio1Values.shape[1]]

        norm = np.linalg.norm(audio1Values) * np.linalg.norm(audio2Values)
        if norm == 0:
            return 0
        return np.dot(audio1Values.flatten(), audio2Values.flatten()) / norm


class AudioManipulator:
    # def __init__(self):
    # self.n_mels = 128 * 2

    @staticmethod
    def addAudioValuesInDuration(audioValues1, audioValues2, timeSt, sr):
        indexSt = min(len(audioValues1) - 1, int(timeSt / 1000 * sr))
        indexEd = min(len(audioValues1), indexSt + len(audioValues2))
        for index in range(indexSt, indexEd):
            audioValues1[index] += audioValues2[index - indexSt]
        return audioValues1

    @staticmethod
    def joinDiffAudiosValues(audiosValues):
        mx = -1
        for i in range(len(audiosValues)):
            mx = max(mx, len(audiosValues[i]))
        for i in range(len(audiosValues)):
            if len(audiosValues[i]) < mx:
                audiosValues[i] = np.concatenate(
                    (audiosValues[i], np.zeros(int(mx - len(audiosValues[i]))))
                )
        return np.sum(audiosValues, axis=0)

    @staticmethod
    def getAudioValuesInterface(audioValues):
        return ipd.Audio(audioValues)

    @staticmethod
    def splitAudioValues(audioValues, sr, start_time, end_time):
        audioValues = audioValues[
            int(sr * start_time / 1000) : int(sr * end_time / 1000)
        ]
        return audioValues

    @staticmethod
    def shiftPitchOfAudioValues(audioValues, sr, pitch_shift):
        audio_with_pitch_shift = librosa.effects.pitch_shift(
            audioValues, sr=sr, n_steps=pitch_shift
        )
        return audio_with_pitch_shift

    @staticmethod
    def calculateAmplitudeShiftOfAudioValues(audioValues1, audioValues2, mode):
        if mode == "Max":
            peak_amplitude1 = np.max(np.abs(audioValues1))
            peak_amplitude2 = np.max(np.abs(audioValues2))
        elif mode == "Mean":
            peak_amplitude1 = np.mean(np.abs(audioValues1))
            peak_amplitude2 = np.mean(np.abs(audioValues2))

        scaling_factor = peak_amplitude1 / peak_amplitude2
        return round(scaling_factor, 2)

    @staticmethod
    def getStftAndStftDb(audioValues):
        stft = librosa.stft(audioValues)
        stft_db = librosa.amplitude_to_db(abs(stft))
        return stft, stft_db

    @staticmethod
    def getMelSpectogram(audioValues, sr):
        mel_spec = librosa.feature.melspectrogram(
            y=audioValues, sr=sr, n_mels=128*2
        )
        mel_spec_db = librosa.amplitude_to_db(mel_spec)  # ref = np.max
        return mel_spec, mel_spec_db

    @staticmethod
    def getChromaGram(audioValues, sr):
        chromaGram = librosa.feature.chroma_stft(
            y=audioValues, sr=sr, hop_length=12
        )
        return chromaGram

    @staticmethod
    def drawAudioValues(audioValues, sr):
        plt.figure(figsize=(8.8, 3))
        plt.plot([(i + 1) / sr for i in range(len(audioValues))], audioValues)
        plt.title("Raw Audio Example")
        plt.show()

    @staticmethod
    def drawAudioValuesSpectrum(audioValues, sr):
        X, Xdb = AudioManipulator.getStft(audioValues)
        plt.figure(figsize=(14, 5))
        librosa.display.specshow(Xdb, sr=sr, x_axis="time", y_axis="log")
        plt.colorbar()
        plt.show()

    def drawAudioValuesSpectrumNormalized(audioValues, sr):
        X, Xdb = AudioManipulator.getStft(audioValues / audioValues.max() * 32767.00)
        plt.figure(figsize=(14, 5))
        librosa.display.specshow(Xdb, sr=sr, x_axis="time", y_axis="log")
        plt.colorbar()
        plt.show()

    @staticmethod
    def drawMelSpectrogram(audioValues, sr):
        S, S_db_mel = AudioManipulator.getMelSpectogram(audioValues, sr)

        fig, ax = plt.subplots(figsize=(10, 3))
        img = librosa.display.specshow(S_db_mel, x_axis="time", y_axis="log", ax=ax)
        ax.set_title("Mel Spectogram Example", fontsize=20)
        fig.colorbar(img, ax=ax, format=f"%0.2f")
        plt.show()

    @staticmethod
    def drawChromaGram(audioValues, sr):
        chromagram = AudioManipulator.getChromaGram(audioValues, sr)
        plt.figure(figsize=(15, 5))
        librosa.display.specshow(
            chromagram, x_axis="time", y_axis="chroma", hop_length=12, cmap="coolwarm"
        )


if __name__ == "__main__":
    print(
        "This is a library for Audio Manipulation via fourier transform made specificaly for minecraft audio production using note blocks"
    )
    print("Author -: Rajat Bansal, IIT Mandi, B20123")