|
import gradio as gr |
|
import numpy as np |
|
import librosa |
|
import joblib |
|
from tensorflow.keras.models import load_model |
|
|
|
|
|
model = load_model('lstmm_model.h5') |
|
scaler = joblib.load('scalerr.pkl') |
|
label_encoder = joblib.load('label_encoderr.pkl') |
|
|
|
|
|
def extract_features_for_prediction(file_path): |
|
try: |
|
y, sr = librosa.load(file_path, sr=None) |
|
|
|
features = {} |
|
pitches, magnitudes = librosa.piptrack(y=y, sr=sr) |
|
pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0 |
|
features['pitch'] = pitch |
|
|
|
rms = librosa.feature.rms(y=y) |
|
features['intensity'] = np.mean(rms) |
|
|
|
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) |
|
features['mfcc_mean'] = np.mean(mfcc) |
|
features['mfcc_var'] = np.var(mfcc) |
|
|
|
sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr) |
|
features['sfcc_mean'] = np.mean(sfcc) |
|
features['sfcc_var'] = np.var(sfcc) |
|
features['sfcc_delta_mean'] = np.mean(sfcc_delta) |
|
features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta) |
|
|
|
mfcc_delta = librosa.feature.delta(mfcc) |
|
mfcc_double_delta = librosa.feature.delta(mfcc, order=2) |
|
features['mfcc_delta_mean'] = np.mean(mfcc_delta) |
|
features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta) |
|
|
|
hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6)) |
|
features['hnr'] = hnr |
|
|
|
harmonic = librosa.effects.harmonic(y) |
|
percussive = librosa.effects.percussive(y) |
|
h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6) |
|
features['h_n_ratio'] = h_n_ratio |
|
|
|
return features |
|
|
|
except Exception as e: |
|
print(f"Error processing file: {e}") |
|
return None |
|
|
|
def compute_sfcc(y, sr): |
|
nfft = 1024 |
|
shift_ms = int(10 * (sr / 1000)) |
|
cep_channels = int(0.85 * (sr / 1000)) |
|
|
|
hop_length = shift_ms |
|
stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2 |
|
|
|
sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels) |
|
sfcc_delta = librosa.feature.delta(sfcc) |
|
sfcc_double_delta = librosa.feature.delta(sfcc, order=2) |
|
|
|
return sfcc, sfcc_delta, sfcc_double_delta |
|
|
|
|
|
def predict_audio(file): |
|
features = extract_features_for_prediction(file) |
|
if features: |
|
feature_values = np.array(list(features.values())).reshape(1, 1, -1) |
|
scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1) |
|
prediction = model.predict(scaled_features) |
|
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) |
|
return f"Predicted Class: {predicted_class[0]}" |
|
else: |
|
return "Error in feature extraction." |
|
|
|
|
|
interface = gr.Interface( |
|
fn=predict_audio, |
|
inputs=gr.Audio(type="filepath", label="Upload a WAV File"), |
|
outputs="text", |
|
title="Voice Disorder Prediction", |
|
description="Upload a .wav file to predict its class." |
|
) |
|
|
|
|
|
interface.launch() |
|
|