Spaces:
Build error
Build error
File size: 2,287 Bytes
90501fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
from python_speech_features import mfcc
from python_speech_features import delta
class FeaturesExtractor:
def __init__(self):
pass
def extract_features(self, audio_path):
"""
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
from an audio using the python_speech_features module, performs Cepstral Mean
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
deltas.
Args:
audio_path (str) : path to wave file without silent moments.
Returns:
(array) : Extracted features matrix.
"""
rate, audio = read(audio_path)
mfcc_feature = mfcc(# The audio signal from which to compute features.
audio,
# The samplerate of the signal we are working with.
rate,
# The length of the analysis window in seconds.
# Default is 0.025s (25 milliseconds)
winlen = 0.05,
# The step between successive windows in seconds.
# Default is 0.01s (10 milliseconds)
winstep = 0.01,
# The number of cepstrum to return.
# Default 13.
numcep = 5,
# The number of filters in the filterbank.
# Default is 26.
nfilt = 30,
# The FFT size. Default is 512.
nfft = 512,
# If true, the zeroth cepstral coefficient is replaced
# with the log of the total frame energy.
appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
deltas = delta(mfcc_feature, 2)
double_deltas = delta(deltas, 2)
combined = np.hstack((mfcc_feature, deltas, double_deltas))
return combined |