Update app.py
Browse files
app.py
CHANGED
@@ -1,117 +1,90 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
-
import tensorflow as tf
|
4 |
import librosa
|
5 |
-
import
|
6 |
-
import
|
7 |
-
from sklearn.preprocessing import LabelEncoder
|
8 |
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
# Feature
|
11 |
-
def
|
12 |
try:
|
13 |
-
|
14 |
-
y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
# Extract SFCC
|
22 |
-
sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
|
23 |
-
sfcc_db = librosa.power_to_db(sfcc)
|
24 |
-
sfcc_delta = librosa.feature.delta(sfcc_db)
|
25 |
-
sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
|
26 |
-
|
27 |
-
# Calculate HNR (Harmonics-to-Noise Ratio)
|
28 |
-
hnr = np.mean(librosa.effects.harmonic(y)) # Approximation for simplicity
|
29 |
-
|
30 |
-
# Padding/truncating for consistency
|
31 |
-
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
|
32 |
-
mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
|
33 |
-
mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
|
34 |
-
sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
|
35 |
-
sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
|
36 |
-
sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
|
37 |
-
|
38 |
-
# Concatenate all features into a single matrix
|
39 |
-
features = np.vstack([
|
40 |
-
mfcc, mfcc_delta, mfcc_double_delta,
|
41 |
-
sfcc_db, sfcc_delta, sfcc_double_delta
|
42 |
-
])
|
43 |
-
|
44 |
-
return {"features": features, "hnr": hnr}
|
45 |
-
except Exception as e:
|
46 |
-
raise ValueError(f"Error in feature extraction: {str(e)}")
|
47 |
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
hnr = features["hnr"] # Single scalar value
|
53 |
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
60 |
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
features = extract_features(file_path)
|
67 |
-
input_vector = prepare_input(features)
|
68 |
|
69 |
-
|
70 |
-
prediction = model.predict(input_vector)
|
71 |
-
predicted_index = np.argmax(prediction)
|
72 |
|
73 |
-
# Map predicted index to class label
|
74 |
-
predicted_class = label_encoder.inverse_transform([predicted_index])
|
75 |
-
return f"Predicted Class: {predicted_class[0]}"
|
76 |
except Exception as e:
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
103 |
|
104 |
# Gradio Interface
|
105 |
interface = gr.Interface(
|
106 |
-
fn=
|
107 |
-
inputs=gr.Audio(type="filepath", label="Upload
|
108 |
-
outputs=
|
109 |
-
title="
|
110 |
-
description="Upload
|
111 |
-
examples=["example_audio.wav"], # Replace with paths to example audio files
|
112 |
)
|
113 |
|
114 |
-
# Launch
|
115 |
-
|
116 |
-
interface.launch()
|
117 |
-
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
|
|
3 |
import librosa
|
4 |
+
import joblib
|
5 |
+
from tensorflow.keras.models import load_model
|
|
|
6 |
|
7 |
+
# Load the trained model, scaler, and label encoder
|
8 |
+
model = load_model('lstmm_model.h5') # Ensure this path is correct
|
9 |
+
scaler = joblib.load('scalerr.pkl') # Ensure this path is correct
|
10 |
+
label_encoder = joblib.load('label_encoderr.pkl') # Ensure this path is correct
|
11 |
|
12 |
+
# Feature extraction function for prediction
|
13 |
+
def extract_features_for_prediction(file_path):
|
14 |
try:
|
15 |
+
y, sr = librosa.load(file_path, sr=None)
|
|
|
16 |
|
17 |
+
features = {}
|
18 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
19 |
+
pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
|
20 |
+
features['pitch'] = pitch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
rms = librosa.feature.rms(y=y)
|
23 |
+
features['intensity'] = np.mean(rms)
|
24 |
|
25 |
+
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
26 |
+
features['mfcc_mean'] = np.mean(mfcc)
|
27 |
+
features['mfcc_var'] = np.var(mfcc)
|
|
|
28 |
|
29 |
+
sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
|
30 |
+
features['sfcc_mean'] = np.mean(sfcc)
|
31 |
+
features['sfcc_var'] = np.var(sfcc)
|
32 |
+
features['sfcc_delta_mean'] = np.mean(sfcc_delta)
|
33 |
+
features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)
|
34 |
|
35 |
+
mfcc_delta = librosa.feature.delta(mfcc)
|
36 |
+
mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
|
37 |
+
features['mfcc_delta_mean'] = np.mean(mfcc_delta)
|
38 |
+
features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)
|
39 |
|
40 |
+
hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
|
41 |
+
features['hnr'] = hnr
|
42 |
|
43 |
+
harmonic = librosa.effects.harmonic(y)
|
44 |
+
percussive = librosa.effects.percussive(y)
|
45 |
+
h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
|
46 |
+
features['h_n_ratio'] = h_n_ratio
|
|
|
|
|
47 |
|
48 |
+
return features
|
|
|
|
|
49 |
|
|
|
|
|
|
|
50 |
except Exception as e:
|
51 |
+
print(f"Error processing file: {e}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
def compute_sfcc(y, sr):
|
55 |
+
nfft = 1024
|
56 |
+
shift_ms = int(10 * (sr / 1000))
|
57 |
+
cep_channels = int(0.85 * (sr / 1000))
|
58 |
+
|
59 |
+
hop_length = shift_ms
|
60 |
+
stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2
|
61 |
+
|
62 |
+
sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
|
63 |
+
sfcc_delta = librosa.feature.delta(sfcc)
|
64 |
+
sfcc_double_delta = librosa.feature.delta(sfcc, order=2)
|
65 |
+
|
66 |
+
return sfcc, sfcc_delta, sfcc_double_delta
|
67 |
+
|
68 |
+
# Predict function
|
69 |
+
def predict_audio(file):
|
70 |
+
features = extract_features_for_prediction(file)
|
71 |
+
if features:
|
72 |
+
feature_values = np.array(list(features.values())).reshape(1, 1, -1)
|
73 |
+
scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
|
74 |
+
prediction = model.predict(scaled_features)
|
75 |
+
predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
|
76 |
+
return f"Predicted Class: {predicted_class[0]}"
|
77 |
+
else:
|
78 |
+
return "Error in feature extraction."
|
79 |
|
80 |
# Gradio Interface
|
81 |
interface = gr.Interface(
|
82 |
+
fn=predict_audio,
|
83 |
+
inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
|
84 |
+
outputs="text",
|
85 |
+
title="Audio Class Prediction",
|
86 |
+
description="Upload a .wav file to predict its class."
|
|
|
87 |
)
|
88 |
|
89 |
+
# Launch the app
|
90 |
+
interface.launch()
|
|
|
|