Spaces:
Build error
Build error
Nikhil0987
commited on
Commit
·
90501fb
1
Parent(s):
1c7de53
- FeaturesExtractor.py +52 -0
- det.py +68 -0
- modeltrainer.py +76 -0
FeaturesExtractor.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn import preprocessing
|
3 |
+
from scipy.io.wavfile import read
|
4 |
+
from python_speech_features import mfcc
|
5 |
+
from python_speech_features import delta
|
6 |
+
|
7 |
+
|
8 |
+
class FeaturesExtractor:
|
9 |
+
def __init__(self):
|
10 |
+
pass
|
11 |
+
|
12 |
+
def extract_features(self, audio_path):
|
13 |
+
"""
|
14 |
+
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
|
15 |
+
from an audio using the python_speech_features module, performs Cepstral Mean
|
16 |
+
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
|
17 |
+
deltas.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
audio_path (str) : path to wave file without silent moments.
|
21 |
+
Returns:
|
22 |
+
(array) : Extracted features matrix.
|
23 |
+
"""
|
24 |
+
rate, audio = read(audio_path)
|
25 |
+
mfcc_feature = mfcc(# The audio signal from which to compute features.
|
26 |
+
audio,
|
27 |
+
# The samplerate of the signal we are working with.
|
28 |
+
rate,
|
29 |
+
# The length of the analysis window in seconds.
|
30 |
+
# Default is 0.025s (25 milliseconds)
|
31 |
+
winlen = 0.05,
|
32 |
+
# The step between successive windows in seconds.
|
33 |
+
# Default is 0.01s (10 milliseconds)
|
34 |
+
winstep = 0.01,
|
35 |
+
# The number of cepstrum to return.
|
36 |
+
# Default 13.
|
37 |
+
numcep = 5,
|
38 |
+
# The number of filters in the filterbank.
|
39 |
+
# Default is 26.
|
40 |
+
nfilt = 30,
|
41 |
+
# The FFT size. Default is 512.
|
42 |
+
nfft = 512,
|
43 |
+
# If true, the zeroth cepstral coefficient is replaced
|
44 |
+
# with the log of the total frame energy.
|
45 |
+
appendEnergy = True)
|
46 |
+
|
47 |
+
|
48 |
+
mfcc_feature = preprocessing.scale(mfcc_feature)
|
49 |
+
deltas = delta(mfcc_feature, 2)
|
50 |
+
double_deltas = delta(deltas, 2)
|
51 |
+
combined = np.hstack((mfcc_feature, deltas, double_deltas))
|
52 |
+
return combined
|
det.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import warnings
|
4 |
+
import numpy as np
|
5 |
+
from FeaturesExtractor import FeaturesExtractor
|
6 |
+
|
7 |
+
warnings.filterwarnings("ignore")
|
8 |
+
|
9 |
+
|
10 |
+
class GenderIdentifier:
|
11 |
+
|
12 |
+
def __init__(self, females_files_path, males_files_path, females_model_path, males_model_path):
|
13 |
+
self.females_training_path = females_files_path
|
14 |
+
self.males_training_path = males_files_path
|
15 |
+
self.error = 0
|
16 |
+
self.total_sample = 0
|
17 |
+
self.features_extractor = FeaturesExtractor()
|
18 |
+
# load models
|
19 |
+
self.females_gmm = pickle.load(open(females_model_path, 'rb'))
|
20 |
+
self.males_gmm = pickle.load(open(males_model_path, 'rb'))
|
21 |
+
|
22 |
+
def process(self):
|
23 |
+
files = self.get_file_paths(self.females_training_path, self.males_training_path)
|
24 |
+
# read the test directory and get the list of test audio files
|
25 |
+
for file in files:
|
26 |
+
self.total_sample += 1
|
27 |
+
print("%10s %8s %1s" % ("--> TESTING", ":", os.path.basename(file)))
|
28 |
+
|
29 |
+
vector = self.features_extractor.extract_features(file)
|
30 |
+
winner = self.identify_gender(vector)
|
31 |
+
expected_gender = file.split("/")[1][:-1]
|
32 |
+
|
33 |
+
print("%10s %6s %1s" % ("+ EXPECTATION",":", expected_gender))
|
34 |
+
print("%10s %3s %1s" % ("+ IDENTIFICATION", ":", winner))
|
35 |
+
|
36 |
+
if winner != expected_gender: self.error += 1
|
37 |
+
print("----------------------------------------------------")
|
38 |
+
|
39 |
+
accuracy = ( float(self.total_sample - self.error) / float(self.total_sample) ) * 100
|
40 |
+
accuracy_msg = "*** Accuracy = " + str(round(accuracy, 3)) + "% ***"
|
41 |
+
print(accuracy_msg)
|
42 |
+
|
43 |
+
def get_file_paths(self, females_training_path, males_training_path):
|
44 |
+
# get file paths
|
45 |
+
females = [ os.path.join(females_training_path, f) for f in os.listdir(females_training_path) ]
|
46 |
+
males = [ os.path.join(males_training_path, f) for f in os.listdir(males_training_path) ]
|
47 |
+
files = females + males
|
48 |
+
return files
|
49 |
+
|
50 |
+
def identify_gender(self, vector):
|
51 |
+
# female hypothesis scoring
|
52 |
+
is_female_scores = np.array(self.females_gmm.score(vector))
|
53 |
+
is_female_log_likelihood = is_female_scores.sum()
|
54 |
+
# male hypothesis scoring
|
55 |
+
is_male_scores = np.array(self.males_gmm.score(vector))
|
56 |
+
is_male_log_likelihood = is_male_scores.sum()
|
57 |
+
|
58 |
+
print("%10s %5s %1s" % ("+ FEMALE SCORE",":", str(round(is_female_log_likelihood, 3))))
|
59 |
+
print("%10s %7s %1s" % ("+ MALE SCORE", ":", str(round(is_male_log_likelihood,3))))
|
60 |
+
|
61 |
+
if is_male_log_likelihood > is_female_log_likelihood: winner = "male"
|
62 |
+
else : winner = "female"
|
63 |
+
return winner
|
64 |
+
|
65 |
+
|
66 |
+
if __name__== "__main__":
|
67 |
+
gender_identifier = GenderIdentifier("TestingData/females", "TestingData/males", "females.gmm", "males.gmm")
|
68 |
+
gender_identifier.process()
|
modeltrainer.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import warnings
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.mixture import GMM
|
6 |
+
from FeaturesExtractor import FeaturesExtractor
|
7 |
+
|
8 |
+
warnings.filterwarnings("ignore")
|
9 |
+
|
10 |
+
|
11 |
+
class ModelsTrainer:
|
12 |
+
|
13 |
+
def __init__(self, females_files_path, males_files_path):
|
14 |
+
self.females_training_path = females_files_path
|
15 |
+
self.males_training_path = males_files_path
|
16 |
+
self.features_extractor = FeaturesExtractor()
|
17 |
+
|
18 |
+
def process(self):
|
19 |
+
females, males = self.get_file_paths(self.females_training_path,
|
20 |
+
self.males_training_path)
|
21 |
+
# collect voice features
|
22 |
+
female_voice_features = self.collect_features(females)
|
23 |
+
male_voice_features = self.collect_features(males)
|
24 |
+
# generate gaussian mixture models
|
25 |
+
females_gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
|
26 |
+
males_gmm = GMM(n_components = 16, n_iter = 200, covariance_type='diag', n_init = 3)
|
27 |
+
# fit features to models
|
28 |
+
females_gmm.fit(female_voice_features)
|
29 |
+
males_gmm.fit(male_voice_features)
|
30 |
+
# save models
|
31 |
+
self.save_gmm(females_gmm, "females")
|
32 |
+
self.save_gmm(males_gmm, "males")
|
33 |
+
|
34 |
+
def get_file_paths(self, females_training_path, males_training_path):
|
35 |
+
# get file paths
|
36 |
+
females = [ os.path.join(females_training_path, f) for f in os.listdir(females_training_path) ]
|
37 |
+
males = [ os.path.join(males_training_path, f) for f in os.listdir(males_training_path) ]
|
38 |
+
return females, males
|
39 |
+
|
40 |
+
def collect_features(self, files):
|
41 |
+
"""
|
42 |
+
Collect voice features from various speakers of the same gender.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
files (list) : List of voice file paths.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
(array) : Extracted features matrix.
|
49 |
+
"""
|
50 |
+
features = np.asarray(())
|
51 |
+
# extract features for each speaker
|
52 |
+
for file in files:
|
53 |
+
print("%5s %10s" % ("PROCESSNG ", file))
|
54 |
+
# extract MFCC & delta MFCC features from audio
|
55 |
+
vector = self.features_extractor.extract_features(file)
|
56 |
+
# stack the features
|
57 |
+
if features.size == 0: features = vector
|
58 |
+
else: features = np.vstack((features, vector))
|
59 |
+
return features
|
60 |
+
|
61 |
+
def save_gmm(self, gmm, name):
|
62 |
+
""" Save Gaussian mixture model using pickle.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
gmm : Gaussian mixture model.
|
66 |
+
name (str) : File name.
|
67 |
+
"""
|
68 |
+
filename = name + ".gmm"
|
69 |
+
with open(filename, 'wb') as gmm_file:
|
70 |
+
pickle.dump(gmm, gmm_file)
|
71 |
+
print ("%5s %10s" % ("SAVING", filename,))
|
72 |
+
|
73 |
+
|
74 |
+
if __name__== "__main__":
|
75 |
+
models_trainer = ModelsTrainer("TrainingData/females", "TrainingData/males")
|
76 |
+
models_trainer.process()
|