RITISHREE / app.py
sancho10's picture
Update app.py
dbb1ae5 verified
import gradio as gr
import numpy as np
import librosa
import joblib
from tensorflow.keras.models import load_model
# Load the trained model, scaler, and label encoder
model = load_model('lstmm_model.h5') # Ensure this path is correct
scaler = joblib.load('scalerr.pkl') # Ensure this path is correct
label_encoder = joblib.load('label_encoderr.pkl') # Ensure this path is correct
# Feature extraction function for prediction
def extract_features_for_prediction(file_path):
try:
y, sr = librosa.load(file_path, sr=None)
features = {}
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
features['pitch'] = pitch
rms = librosa.feature.rms(y=y)
features['intensity'] = np.mean(rms)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
features['mfcc_mean'] = np.mean(mfcc)
features['mfcc_var'] = np.var(mfcc)
sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
features['sfcc_mean'] = np.mean(sfcc)
features['sfcc_var'] = np.var(sfcc)
features['sfcc_delta_mean'] = np.mean(sfcc_delta)
features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)
mfcc_delta = librosa.feature.delta(mfcc)
mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
features['mfcc_delta_mean'] = np.mean(mfcc_delta)
features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)
hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
features['hnr'] = hnr
harmonic = librosa.effects.harmonic(y)
percussive = librosa.effects.percussive(y)
h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
features['h_n_ratio'] = h_n_ratio
return features
except Exception as e:
print(f"Error processing file: {e}")
return None
def compute_sfcc(y, sr):
nfft = 1024
shift_ms = int(10 * (sr / 1000))
cep_channels = int(0.85 * (sr / 1000))
hop_length = shift_ms
stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2
sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
sfcc_delta = librosa.feature.delta(sfcc)
sfcc_double_delta = librosa.feature.delta(sfcc, order=2)
return sfcc, sfcc_delta, sfcc_double_delta
# Predict function
def predict_audio(file):
features = extract_features_for_prediction(file)
if features:
feature_values = np.array(list(features.values())).reshape(1, 1, -1)
scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
prediction = model.predict(scaled_features)
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
return f"Predicted Class: {predicted_class[0]}"
else:
return "Error in feature extraction."
# Gradio Interface
interface = gr.Interface(
fn=predict_audio,
inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
outputs="text",
title="Voice Disorder Prediction",
description="Upload a .wav file to predict its class."
)
# Launch the app
interface.launch()