import gradio as gr import numpy as np import librosa import joblib from tensorflow.keras.models import load_model # Load the trained model, scaler, and label encoder model = load_model('lstmm_model.h5') # Ensure this path is correct scaler = joblib.load('scalerr.pkl') # Ensure this path is correct label_encoder = joblib.load('label_encoderr.pkl') # Ensure this path is correct # Feature extraction function for prediction def extract_features_for_prediction(file_path): try: y, sr = librosa.load(file_path, sr=None) features = {} pitches, magnitudes = librosa.piptrack(y=y, sr=sr) pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0 features['pitch'] = pitch rms = librosa.feature.rms(y=y) features['intensity'] = np.mean(rms) mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) features['mfcc_mean'] = np.mean(mfcc) features['mfcc_var'] = np.var(mfcc) sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr) features['sfcc_mean'] = np.mean(sfcc) features['sfcc_var'] = np.var(sfcc) features['sfcc_delta_mean'] = np.mean(sfcc_delta) features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta) mfcc_delta = librosa.feature.delta(mfcc) mfcc_double_delta = librosa.feature.delta(mfcc, order=2) features['mfcc_delta_mean'] = np.mean(mfcc_delta) features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta) hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6)) features['hnr'] = hnr harmonic = librosa.effects.harmonic(y) percussive = librosa.effects.percussive(y) h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6) features['h_n_ratio'] = h_n_ratio return features except Exception as e: print(f"Error processing file: {e}") return None def compute_sfcc(y, sr): nfft = 1024 shift_ms = int(10 * (sr / 1000)) cep_channels = int(0.85 * (sr / 1000)) hop_length = shift_ms stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2 sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels) sfcc_delta = librosa.feature.delta(sfcc) sfcc_double_delta = librosa.feature.delta(sfcc, order=2) return sfcc, sfcc_delta, sfcc_double_delta # Predict function def predict_audio(file): features = extract_features_for_prediction(file) if features: feature_values = np.array(list(features.values())).reshape(1, 1, -1) scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1) prediction = model.predict(scaled_features) predicted_class = label_encoder.inverse_transform([np.argmax(prediction)]) return f"Predicted Class: {predicted_class[0]}" else: return "Error in feature extraction." # Gradio Interface interface = gr.Interface( fn=predict_audio, inputs=gr.Audio(type="filepath", label="Upload a WAV File"), outputs="text", title="Voice Disorder Prediction", description="Upload a .wav file to predict its class." ) # Launch the app interface.launch()