File size: 3,247 Bytes
849911b e52bbe2 849911b e52bbe2 7084f59 e52bbe2 849911b e52bbe2 7084f59 e52bbe2 849911b e52bbe2 7084f59 e52bbe2 7084f59 e52bbe2 7084f59 e52bbe2 7084f59 e52bbe2 7084f59 e52bbe2 849911b e52bbe2 7084f59 849911b e52bbe2 7084f59 849911b e52bbe2 849911b e52bbe2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import numpy as np
import librosa
import joblib
from tensorflow.keras.models import load_model
# Load the trained model, scaler, and label encoder
model = load_model('lstmm_model.h5') # Ensure this path is correct
scaler = joblib.load('scalerr.pkl') # Ensure this path is correct
label_encoder = joblib.load('label_encoderr.pkl') # Ensure this path is correct
# Feature extraction function for prediction
def extract_features_for_prediction(file_path):
try:
y, sr = librosa.load(file_path, sr=None)
features = {}
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
features['pitch'] = pitch
rms = librosa.feature.rms(y=y)
features['intensity'] = np.mean(rms)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
features['mfcc_mean'] = np.mean(mfcc)
features['mfcc_var'] = np.var(mfcc)
sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
features['sfcc_mean'] = np.mean(sfcc)
features['sfcc_var'] = np.var(sfcc)
features['sfcc_delta_mean'] = np.mean(sfcc_delta)
features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)
mfcc_delta = librosa.feature.delta(mfcc)
mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
features['mfcc_delta_mean'] = np.mean(mfcc_delta)
features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)
hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
features['hnr'] = hnr
harmonic = librosa.effects.harmonic(y)
percussive = librosa.effects.percussive(y)
h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
features['h_n_ratio'] = h_n_ratio
return features
except Exception as e:
print(f"Error processing file: {e}")
return None
def compute_sfcc(y, sr):
nfft = 1024
shift_ms = int(10 * (sr / 1000))
cep_channels = int(0.85 * (sr / 1000))
hop_length = shift_ms
stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2
sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
sfcc_delta = librosa.feature.delta(sfcc)
sfcc_double_delta = librosa.feature.delta(sfcc, order=2)
return sfcc, sfcc_delta, sfcc_double_delta
# Predict function
def predict_audio(file):
features = extract_features_for_prediction(file)
if features:
feature_values = np.array(list(features.values())).reshape(1, 1, -1)
scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
prediction = model.predict(scaled_features)
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
return f"Predicted Class: {predicted_class[0]}"
else:
return "Error in feature extraction."
# Gradio Interface
interface = gr.Interface(
fn=predict_audio,
inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
outputs="text",
title="Audio Class Prediction",
description="Upload a .wav file to predict its class."
)
# Launch the app
interface.launch()
|