sancho10 commited on
Commit
e52bbe2
1 Parent(s): 7ea5dc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -96
app.py CHANGED
@@ -1,117 +1,90 @@
1
  import gradio as gr
2
  import numpy as np
3
- import tensorflow as tf
4
  import librosa
5
- import librosa.util
6
- import pickle
7
- from sklearn.preprocessing import LabelEncoder
8
 
 
 
 
 
9
 
10
- # Feature Extraction Function
11
- def extract_features(file_path):
12
  try:
13
- # Load audio
14
- y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz
15
 
16
- # Extract MFCC and deltas
17
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
18
- mfcc_delta = librosa.feature.delta(mfcc)
19
- mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
20
-
21
- # Extract SFCC
22
- sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
23
- sfcc_db = librosa.power_to_db(sfcc)
24
- sfcc_delta = librosa.feature.delta(sfcc_db)
25
- sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
26
-
27
- # Calculate HNR (Harmonics-to-Noise Ratio)
28
- hnr = np.mean(librosa.effects.harmonic(y)) # Approximation for simplicity
29
-
30
- # Padding/truncating for consistency
31
- mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
32
- mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
33
- mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
34
- sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
35
- sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
36
- sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
37
-
38
- # Concatenate all features into a single matrix
39
- features = np.vstack([
40
- mfcc, mfcc_delta, mfcc_double_delta,
41
- sfcc_db, sfcc_delta, sfcc_double_delta
42
- ])
43
-
44
- return {"features": features, "hnr": hnr}
45
- except Exception as e:
46
- raise ValueError(f"Error in feature extraction: {str(e)}")
47
 
 
 
48
 
49
- # Prepare Input Function
50
- def prepare_input(features):
51
- feature_matrix = features["features"] # Shape: (78, 100)
52
- hnr = features["hnr"] # Single scalar value
53
 
54
- # Normalize feature matrix
55
- feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)
 
 
 
56
 
57
- # Add batch and channel dimensions for model compatibility
58
- feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis] # Shape: (1, 78, 100, 1)
59
- return feature_matrix, hnr
 
60
 
 
 
61
 
62
- # Prediction Function
63
- def predict_class(file_path, model, label_encoder):
64
- try:
65
- # Extract and prepare features
66
- features = extract_features(file_path)
67
- input_vector = prepare_input(features)
68
 
69
- # Make prediction
70
- prediction = model.predict(input_vector)
71
- predicted_index = np.argmax(prediction)
72
 
73
- # Map predicted index to class label
74
- predicted_class = label_encoder.inverse_transform([predicted_index])
75
- return f"Predicted Class: {predicted_class[0]}"
76
  except Exception as e:
77
- return f"Error in prediction: {str(e)}"
78
-
79
-
80
-
81
- # Load Pre-trained Model
82
- model = tf.keras.models.load_model("voice_classification_modelm.h5")
83
-
84
- # Create Label Encoder
85
- # Note: Replace these labels with the actual classes used during training
86
- labels = [
87
- "all_vowels_healthy",
88
- "allvowels_functional",
89
- "allvowels_laryngitis",
90
- "allvowels_leukoplakia",
91
- "allvowels_psychogenic",
92
- "allvowels_rlnp",
93
- "allvowels_sd",
94
- ]
95
- label_encoder = LabelEncoder()
96
- label_encoder.fit(labels)
97
-
98
-
99
- # Gradio Interface Function
100
- def classify_audio(audio_file):
101
- return predict_class(audio_file, model, label_encoder)
102
-
 
 
103
 
104
  # Gradio Interface
105
  interface = gr.Interface(
106
- fn=classify_audio,
107
- inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
108
- outputs=gr.Textbox(label="Predicted Class"),
109
- title="Voice Disorder Classification",
110
- description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
111
- examples=["example_audio.wav"], # Replace with paths to example audio files
112
  )
113
 
114
- # Launch Gradio App
115
- if __name__ == "__main__":
116
- interface.launch()
117
-
 
1
  import gradio as gr
2
  import numpy as np
 
3
  import librosa
4
+ import joblib
5
+ from tensorflow.keras.models import load_model
 
6
 
7
+ # Load the trained model, scaler, and label encoder
8
+ model = load_model('lstmm_model.h5') # Ensure this path is correct
9
+ scaler = joblib.load('scalerr.pkl') # Ensure this path is correct
10
+ label_encoder = joblib.load('label_encoderr.pkl') # Ensure this path is correct
11
 
12
+ # Feature extraction function for prediction
13
+ def extract_features_for_prediction(file_path):
14
  try:
15
+ y, sr = librosa.load(file_path, sr=None)
 
16
 
17
+ features = {}
18
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
19
+ pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
20
+ features['pitch'] = pitch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ rms = librosa.feature.rms(y=y)
23
+ features['intensity'] = np.mean(rms)
24
 
25
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
26
+ features['mfcc_mean'] = np.mean(mfcc)
27
+ features['mfcc_var'] = np.var(mfcc)
 
28
 
29
+ sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
30
+ features['sfcc_mean'] = np.mean(sfcc)
31
+ features['sfcc_var'] = np.var(sfcc)
32
+ features['sfcc_delta_mean'] = np.mean(sfcc_delta)
33
+ features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)
34
 
35
+ mfcc_delta = librosa.feature.delta(mfcc)
36
+ mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
37
+ features['mfcc_delta_mean'] = np.mean(mfcc_delta)
38
+ features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)
39
 
40
+ hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
41
+ features['hnr'] = hnr
42
 
43
+ harmonic = librosa.effects.harmonic(y)
44
+ percussive = librosa.effects.percussive(y)
45
+ h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
46
+ features['h_n_ratio'] = h_n_ratio
 
 
47
 
48
+ return features
 
 
49
 
 
 
 
50
  except Exception as e:
51
+ print(f"Error processing file: {e}")
52
+ return None
53
+
54
+ def compute_sfcc(y, sr):
55
+ nfft = 1024
56
+ shift_ms = int(10 * (sr / 1000))
57
+ cep_channels = int(0.85 * (sr / 1000))
58
+
59
+ hop_length = shift_ms
60
+ stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2
61
+
62
+ sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
63
+ sfcc_delta = librosa.feature.delta(sfcc)
64
+ sfcc_double_delta = librosa.feature.delta(sfcc, order=2)
65
+
66
+ return sfcc, sfcc_delta, sfcc_double_delta
67
+
68
+ # Predict function
69
+ def predict_audio(file):
70
+ features = extract_features_for_prediction(file)
71
+ if features:
72
+ feature_values = np.array(list(features.values())).reshape(1, 1, -1)
73
+ scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
74
+ prediction = model.predict(scaled_features)
75
+ predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
76
+ return f"Predicted Class: {predicted_class[0]}"
77
+ else:
78
+ return "Error in feature extraction."
79
 
80
  # Gradio Interface
81
  interface = gr.Interface(
82
+ fn=predict_audio,
83
+ inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
84
+ outputs="text",
85
+ title="Audio Class Prediction",
86
+ description="Upload a .wav file to predict its class."
 
87
  )
88
 
89
+ # Launch the app
90
+ interface.launch()