|
'''
|
|
This is the main ArrayID feature building script
|
|
|
|
revised: April 04, 2021
|
|
|
|
'''
|
|
|
|
import glob
|
|
import os
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.io.wavfile import read
|
|
from scipy.fftpack import fft, ifft, fftfreq
|
|
from scipy import signal
|
|
import random
|
|
from librosa.core import lpc
|
|
import librosa.feature
|
|
import csv
|
|
from sklearn.preprocessing import normalize
|
|
from direction_detection import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def hz_to_indices(freqs, lowcut, highcut):
|
|
i = 0
|
|
while freqs[i] < lowcut:
|
|
i += 1
|
|
low = i
|
|
while freqs[i] < highcut:
|
|
i += 1
|
|
return low, i
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_row_compressor(old_dimension, new_dimension):
|
|
dim_compressor = np.zeros((new_dimension, old_dimension))
|
|
bin_size = float(old_dimension) / new_dimension
|
|
next_bin_break = bin_size
|
|
which_row = 0
|
|
which_column = 0
|
|
while which_row < dim_compressor.shape[0] and which_column < dim_compressor.shape[1]:
|
|
if round(next_bin_break - which_column, 10) >= 1:
|
|
dim_compressor[which_row, which_column] = 1
|
|
which_column += 1
|
|
elif next_bin_break == which_column:
|
|
|
|
which_row += 1
|
|
next_bin_break += bin_size
|
|
else:
|
|
partial_credit = next_bin_break - which_column
|
|
dim_compressor[which_row, which_column] = partial_credit
|
|
which_row += 1
|
|
dim_compressor[which_row, which_column] = 1 - partial_credit
|
|
which_column += 1
|
|
next_bin_break += bin_size
|
|
dim_compressor /= bin_size
|
|
return dim_compressor
|
|
|
|
|
|
def get_column_compressor(old_dimension, new_dimension):
|
|
return get_row_compressor(old_dimension, new_dimension).transpose()
|
|
|
|
def compress_and_average(array, new_shape):
|
|
return np.mat(get_row_compressor(array.shape[0], new_shape[0])) * \
|
|
np.mat(array) * \
|
|
np.mat(get_column_compressor(array.shape[1], new_shape[1]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_filelist(dir):
|
|
Filelist = []
|
|
for home, dirs, files in os.walk(dir):
|
|
for filename in files:
|
|
Filelist.append(os.path.join(home, filename))
|
|
return Filelist
|
|
|
|
|
|
def lpcc(data, n=15):
|
|
"""
|
|
f_LPC = lpcc(data, n): get the LPCC from the voice data
|
|
The order n is 15
|
|
"""
|
|
size_lpc = n
|
|
a = lpc(data, order = size_lpc)
|
|
a = -a
|
|
f_LPC = np.zeros(len(a))
|
|
f_LPC[0] = np.log(size_lpc)
|
|
for i in range(1, len(a)):
|
|
k = np.arange(1, i)
|
|
f_LPC[i] = a[i] + np.sum((1 - k/i) * a[k] * f_LPC[i - k])
|
|
return f_LPC[1:]
|
|
|
|
|
|
|
|
def get_ltfd(spec, m=20, start_index=1, end_index=86):
|
|
|
|
spec = spec[:, start_index: end_index, :(spec.shape[2] - spec.shape[2] % m)]
|
|
|
|
|
|
channels = np.sum(spec, axis=2)
|
|
|
|
all_ffts = np.sum(channels, axis=0)
|
|
all_ffts /= np.max(all_ffts)
|
|
|
|
channels_ffts = np.asarray([channels[i, :] / np.max(channels[i, :]) for i in range(channels.shape[0])])
|
|
|
|
return all_ffts, channels_ffts
|
|
|
|
|
|
|
|
def get_ltfp(spec, m=20, start_index_fp=1, end_index_fp=86):
|
|
|
|
spec = spec[:, start_index_fp:end_index_fp, :(spec.shape[2] - spec.shape[2] % m)]
|
|
|
|
|
|
splices = np.asarray(np.split(spec, m, axis=2))
|
|
|
|
|
|
mesh = np.zeros((splices.shape[0], splices.shape[1], splices.shape[2]))
|
|
for i in range(mesh.shape[0]):
|
|
for j in range(mesh.shape[1]):
|
|
for k in range(mesh.shape[2]):
|
|
mesh[i, j, k] = np.sum(splices[i, j, k, :])
|
|
|
|
|
|
std_feature = np.zeros((mesh.shape[0], mesh.shape[2]))
|
|
for i in range(std_feature.shape[0]):
|
|
for j in range(std_feature.shape[1]):
|
|
std_feature[i, j] = np.std(mesh[i, :, j]) / np.mean(mesh[i, :, j])
|
|
|
|
|
|
LTFP = np.mean(std_feature, axis=0)
|
|
LTFP = LTFP / np.max(LTFP)
|
|
return LTFP
|
|
|
|
|
|
def feature_distribution(channel_fft):
|
|
num_feature = 5
|
|
f_dis = np.zeros(2 * num_feature)
|
|
co = np.zeros((num_feature, len(channel_fft)))
|
|
for num in range(len(channel_fft)):
|
|
a = channel_fft[num]
|
|
for i in range(1, len(a)):
|
|
a[i] = a[i-1] + a[i]
|
|
a = a / np.max(a)
|
|
dis_index = [0.1, 0.3, 0.5, 0.7, 0.9]
|
|
for i in range(len(dis_index)):
|
|
co[i, num] = find_value(a, dis_index[i])
|
|
co[:, num] /= len(a)
|
|
for i in range(num_feature):
|
|
f_dis[i] = np.mean(co[i, :])
|
|
f_dis[i + num_feature] = np.std(co[i, :])
|
|
return co, f_dis
|
|
|
|
|
|
def find_value(a, dis_index):
|
|
c = 0
|
|
for i in range(len(a) - 1):
|
|
if a[i] <= dis_index <= a[i + 1]:
|
|
c = i
|
|
break
|
|
return c
|
|
|
|
|
|
def mald_feature(rate, data):
|
|
n_fft = 4096
|
|
|
|
if data.shape[1] == 4:
|
|
closestPair = getAngle_for_four(data, fs=rate)
|
|
elif data.shape[1] == 6:
|
|
closestPair = getAngle_for_six(data, fs=rate)
|
|
elif data.shape[1] == 8:
|
|
closestPair = getAngle_for_eight(data, fs=rate)
|
|
pairs = getDirection_Pair(closestPair, data.shape[1])
|
|
|
|
|
|
lowcut_fp = 1
|
|
highcut_fp = 5000
|
|
if highcut_fp > rate / 2:
|
|
highcut_fp = rate / 2 - 100
|
|
highcut_fd = 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
freq = fftfreq(n_fft, 1. / rate)
|
|
start_index, end_index = hz_to_indices(freq, lowcut_fp, highcut_fd)
|
|
start_index_fp, end_index_fp = hz_to_indices(freq, lowcut_fp, highcut_fp)
|
|
|
|
|
|
|
|
_lpcc = []
|
|
|
|
for i in pairs:
|
|
a = np.asfortranarray(data[:, i]).astype(dtype=float)
|
|
_lpcc += list(lpcc(a))
|
|
|
|
|
|
spec = [signal.stft(data[:, i], fs=rate, window='hann', nperseg=1024, noverlap=768, nfft=n_fft)[2] for i in range(data.shape[1])]
|
|
spec = np.asarray(spec)
|
|
|
|
spec = np.abs(spec)
|
|
|
|
|
|
|
|
|
|
|
|
_ltfd, channel_fft = get_ltfd(spec=spec, start_index=start_index, end_index=end_index)
|
|
|
|
_ltfd = list(compress_and_average(_ltfd.reshape(len(_ltfd), 1), (20, 1)).flat)
|
|
|
|
co, _fdis = feature_distribution(channel_fft)
|
|
|
|
|
|
_ltfp = get_ltfp(spec=spec, start_index_fp=start_index_fp, end_index_fp=end_index_fp)
|
|
_ltfp = list(compress_and_average(_ltfp.reshape(len(_ltfp), 1), (20, 1)).flat)
|
|
|
|
|
|
|
|
feature = np.concatenate((_lpcc, _ltfd, _fdis, _ltfp))
|
|
return feature
|
|
|
|
|