File size: 3,247 Bytes
849911b
 
 
e52bbe2
 
849911b
e52bbe2
 
 
 
7084f59
e52bbe2
 
849911b
e52bbe2
7084f59
e52bbe2
 
 
 
849911b
e52bbe2
 
7084f59
e52bbe2
 
 
7084f59
e52bbe2
 
 
 
 
7084f59
e52bbe2
 
 
 
7084f59
e52bbe2
 
7084f59
e52bbe2
 
 
 
849911b
e52bbe2
7084f59
849911b
e52bbe2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7084f59
 
849911b
e52bbe2
 
 
 
 
849911b
 
e52bbe2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import numpy as np
import librosa
import joblib
from tensorflow.keras.models import load_model

# Load the trained model, scaler, and label encoder
model = load_model('lstmm_model.h5')  # Ensure this path is correct
scaler = joblib.load('scalerr.pkl')  # Ensure this path is correct
label_encoder = joblib.load('label_encoderr.pkl')  # Ensure this path is correct

# Feature extraction function for prediction
def extract_features_for_prediction(file_path):
    try:
        y, sr = librosa.load(file_path, sr=None)

        features = {}
        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
        pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
        features['pitch'] = pitch

        rms = librosa.feature.rms(y=y)
        features['intensity'] = np.mean(rms)

        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features['mfcc_mean'] = np.mean(mfcc)
        features['mfcc_var'] = np.var(mfcc)

        sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
        features['sfcc_mean'] = np.mean(sfcc)
        features['sfcc_var'] = np.var(sfcc)
        features['sfcc_delta_mean'] = np.mean(sfcc_delta)
        features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)

        mfcc_delta = librosa.feature.delta(mfcc)
        mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
        features['mfcc_delta_mean'] = np.mean(mfcc_delta)
        features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)

        hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
        features['hnr'] = hnr

        harmonic = librosa.effects.harmonic(y)
        percussive = librosa.effects.percussive(y)
        h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
        features['h_n_ratio'] = h_n_ratio

        return features

    except Exception as e:
        print(f"Error processing file: {e}")
        return None

def compute_sfcc(y, sr):
    nfft = 1024
    shift_ms = int(10 * (sr / 1000))
    cep_channels = int(0.85 * (sr / 1000))

    hop_length = shift_ms
    stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2

    sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
    sfcc_delta = librosa.feature.delta(sfcc)
    sfcc_double_delta = librosa.feature.delta(sfcc, order=2)

    return sfcc, sfcc_delta, sfcc_double_delta

# Predict function
def predict_audio(file):
    features = extract_features_for_prediction(file)
    if features:
        feature_values = np.array(list(features.values())).reshape(1, 1, -1)
        scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
        prediction = model.predict(scaled_features)
        predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
        return f"Predicted Class: {predicted_class[0]}"
    else:
        return "Error in feature extraction."

# Gradio Interface
interface = gr.Interface(
    fn=predict_audio,
    inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
    outputs="text",
    title="Audio Class Prediction",
    description="Upload a .wav file to predict its class."
)

# Launch the app
interface.launch()