Spaces:
Build error
Build error
File size: 8,144 Bytes
7aac284 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import librosa
import joblib
from keras.models import load_model
import numpy as np
import pandas as pd
import gradio as gr
import h5py
TF_ENABLE_ONEDNN_OPTS=0
root_path ="./model/"
num2label = {0:"Neutral", 1: "Calm", 2:"Happy", 3:"Sad", 4:"Angry", 5:"Fearful", 6:"Disgust", 7:"Surprised"}
SVM_spectral = joblib.load(root_path + "SVM_spectral.joblib")
SVM_prosodic = joblib.load(root_path + "SVM_prosodic.joblib")
SVM_full = joblib.load(root_path + "SVM_full.joblib")
SVM_mfcc = joblib.load(root_path + "SVM_mfcc.joblib")
NB_spectral = joblib.load(root_path + "NB_spectral.joblib")
NB_prosodic = joblib.load(root_path + "NB_prosodic.joblib")
NB_full = joblib.load(root_path + "NB_full.joblib")
NB_mfcc = joblib.load(root_path + "NB_mfcc.joblib")
DT_spectral = joblib.load(root_path + "DT_spectral.joblib")
DT_prosodic = joblib.load(root_path + "DT_prosodic.joblib")
DT_full = joblib.load(root_path + "DT_full.joblib")
DT_mfcc = joblib.load(root_path + "DT_mfcc.joblib")
MLP_spectral = joblib.load(root_path + "MLP_spectral.joblib")
MLP_prosodic = joblib.load(root_path + "MLP_prosodic.joblib")
MLP_full = joblib.load(root_path + "MLP_full.joblib")
MLP_mfcc = joblib.load(root_path + "MLP_mfcc.joblib")
RF_spectral = joblib.load(root_path + "RF_spectral.joblib")
RF_prosodic = joblib.load(root_path + "RF_prosodic.joblib")
RF_full = joblib.load(root_path + "RF_full.joblib")
RF_mfcc = joblib.load(root_path + "RF_mfcc.joblib")
def load_model_from_h5(file_path):
with h5py.File(file_path, 'r') as file:
model = load_model(file, compile=False)
return model
LSTM_spectral = load_model_from_h5(root_path + "LSTM_spectral.h5")
LSTM_prosodic = load_model_from_h5(root_path + "LSTM_prosodic.h5")
LSTM_full = load_model_from_h5(root_path + "LSTM_full.h5")
LSTM_mfcc = load_model_from_h5(root_path + "LSTM_mfcc.h5")
LSTM_CNN_spectral = load_model_from_h5(root_path + "LSTM_CNN_spectral.h5")
LSTM_CNN_prosodic = load_model_from_h5(root_path + "LSTM_CNN_prosodic.h5")
LSTM_CNN_full = load_model_from_h5(root_path + "LSTM_CNN_full.h5")
LSTM_CNN_mfcc = load_model_from_h5(root_path + "LSTM_CNN_mfcc.h5")
CNN_spectral = load_model_from_h5(root_path + "CNN_spectral.h5")
CNN_prosodic = load_model_from_h5(root_path + "CNN_prosodic.h5")
CNN_full = load_model_from_h5(root_path + "CNN_full.h5")
CNN_mfcc = load_model_from_h5(root_path + "CNN_mfcc.h5")
total_model = {"SVM": {'mfcc': SVM_mfcc, 'spectral': SVM_spectral, 'prosodic':SVM_prosodic, 'full':SVM_full},
"NB": {'mfcc': NB_mfcc, 'spectral': NB_spectral, 'prosodic': NB_prosodic, 'full': NB_full},
"DT": {'mfcc': DT_mfcc, 'spectral': DT_spectral, 'prosodic': DT_prosodic, 'full': DT_full},
"MLP": {'mfcc': MLP_mfcc, 'spectral': MLP_spectral, 'prosodic':MLP_prosodic, 'full':MLP_full},
"RF": {'mfcc': RF_mfcc, 'spectral': RF_spectral, 'prosodic': RF_prosodic, 'full': RF_full},
"LSTM": {'mfcc': LSTM_mfcc, 'spectral': LSTM_spectral, 'prosodic': LSTM_prosodic, 'full': LSTM_full},
"LSTM_CNN": {'mfcc': LSTM_CNN_mfcc, 'spectral': LSTM_CNN_spectral, 'prosodic': LSTM_CNN_prosodic, 'full': LSTM_CNN_full},
"CNN": {'mfcc': CNN_mfcc, 'spectral': CNN_spectral, 'prosodic': CNN_prosodic, 'full': CNN_full}
}
spectral_scaler = joblib.load(root_path + 'spectral_features_standard_scaler.joblib')
prosodic_scaler = joblib.load(root_path + 'prosodic_features_standard_scaler.joblib')
full_scaler = joblib.load(root_path + 'full_features_standard_scaler.joblib')
mfcc_scaler = joblib.load(root_path + 'mfcc_features_standard_scaler.joblib')
scaler = {'mfcc': mfcc_scaler, 'spectral': spectral_scaler, 'prosodic': prosodic_scaler, 'full': full_scaler}
def Load_audio(audio_path):
# Đọc file âm thanh và tần số lấy mẫu
y, sr = librosa.load(audio_path, sr=48000)
return y
# Bạn có thể sử dụng y và sr cho các mục đích xử lý âm thanh tiếp theo
def Spectral_extract_features(audio): # data là một file âm thanh thôi
mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
chroma = librosa.feature.chroma_stft(y=audio)
spectral_contrast = librosa.feature.spectral_contrast(y=audio)
tonal_centroid = librosa.feature.tonnetz(y=audio)
mel_spectrogram = librosa.feature.melspectrogram(y=audio)
feature_vector = np.concatenate((mfccs.mean(axis=1), chroma.mean(axis=1), spectral_contrast.mean(axis=1), tonal_centroid.mean(axis = 1), mel_spectrogram.mean(axis = 1)))
return np.array(feature_vector)
def mfcc_extract_features(audio):
mfccs = librosa.feature.mfcc(y=audio, n_mfcc=40) # sr=sr,
mfcc_features = mfccs.mean(axis=1)
return mfcc_features
def Prosodic_extract_features(audio):
pitch, _ = librosa.piptrack(y=audio, n_fft=128, hop_length = 512)
#print("pitch:", pitch.mean(axis=1)) # ok
duration = librosa.get_duration(y=audio)
#print("duration:",duration) # ok
energy = librosa.feature.rms(y=audio)
#print("energy:", energy.shape)
duration = np.array([duration]).reshape(1,1)
#print("duration:", duration.shape)
feature_vector = np.concatenate((pitch.mean(axis=1), duration.mean(axis=1), energy.mean(axis=1)))
return np.array(feature_vector)
def Spectral_Prosodic(audio):
Spectral_features = Spectral_extract_features(audio)
Prosodic_features = Prosodic_extract_features(audio)
full_features = np.concatenate((Spectral_features, Prosodic_features))
return full_features
def Total_features(audio, scaler):
features = {}
features['spectral'] = scaler['spectral'].transform(Spectral_extract_features(audio).reshape(1, -1))
features['prosodic'] = scaler['prosodic'].transform(Prosodic_extract_features(audio).reshape(1, -1))
features['full'] = scaler['full'].transform(Spectral_Prosodic(audio).reshape(1, -1))
features['mfcc'] = scaler['mfcc'].transform(mfcc_extract_features(audio).reshape(1, -1))
return features
def total_predict(feature, total_model): # feature là một dict tổng hợp 4 loại đặc trưng
result = {'mfcc': {}, 'spectral' : {}, 'prosodic': {}, 'full': {} }
f_keys = ['mfcc', 'spectral', 'prosodic', 'full']
ML = ['SVM', 'NB', 'DT', 'MLP', 'RF']
m_keys = ['SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']
for f in f_keys:
for m in m_keys:
try:
if m in ML:
model = total_model[m][f]
result[f][m] = num2label[model.predict(feature[f])[0]]
else:
model = total_model[m][f]
temp = [np.array(feature[f]).reshape((1,-1))]
y_pred = model.predict(temp)
y_pred_labels = np.argmax(y_pred, axis=1)[0]
result[f][m] = num2label[y_pred_labels]
except:
print(f, m)
return result
# def main_function(audio_path, scaler, total_model):
# audio = Load_audio(audio_path)
# feature = Total_features(audio, scaler)
# labels = total_predict(feature, total_model)
# table = pd.DataFrame.from_dict(labels).T
# return table
def main_function(audio_path, scaler, total_model):
audio = Load_audio(audio_path)
feature = Total_features(audio, scaler)
labels = total_predict(feature, total_model)
table = pd.DataFrame.from_dict(labels).T
table.insert(0, 'Đặc trưng', ['mfcc', 'spectral', 'prosodic', 'full'])
return table
def main_interface(audio_file):
# print("đường dẫn", audio_file)
# sr, audio_data = audio_file
# print(sr, audio_data)
# if 1:
# audio_data = audio_data.astype(float)
# audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=48000)
# print("đã đọc lại file")
# else:
# pass
# # audio_path = "./uploaded.wav"
# # write(audio_path, 48000, np.int16(audio_data))
# # print("đã lưu")
result_table = main_function(audio_file, scaler, total_model)
return result_table
# Create Gradio Interface
iface = gr.Interface(
fn=main_interface,
inputs=gr.Audio(type= 'filepath'),
outputs=gr.Dataframe(headers=['Đặc trưng', 'SVM', 'NB', 'DT', 'MLP', 'RF', 'LSTM', 'LSTM_CNN', 'CNN']),
)
# Launch the Gradio Interface
iface.launch()
|