sancho10 commited on
Commit
7084f59
·
verified ·
1 Parent(s): 0b747fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -28
app.py CHANGED
@@ -3,74 +3,114 @@ import numpy as np
3
  import tensorflow as tf
4
  import librosa
5
  import librosa.util
 
6
  from sklearn.preprocessing import LabelEncoder
7
 
8
- # Feature extraction function
 
9
  def extract_features(file_path):
10
  try:
11
- # Load the audio file
12
  y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz
 
 
13
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
14
-
15
- # Pad or truncate to 100 frames along axis 1
 
 
 
 
 
 
 
 
 
 
 
16
  mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
 
 
 
 
 
17
 
18
- # Ensure the shape is (13, 100)
19
- if mfcc.shape[0] != 13:
20
- mfcc = librosa.util.fix_length(mfcc, size=13, axis=0)
 
 
21
 
22
- return {"mfcc": mfcc}
23
  except Exception as e:
24
  raise ValueError(f"Error in feature extraction: {str(e)}")
25
 
26
- # Prediction function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def predict_class(file_path, model, label_encoder):
28
  try:
 
29
  features = extract_features(file_path)
30
- mfcc = features["mfcc"]
31
-
32
- # Add batch and channel dimensions for model compatibility
33
- mfcc = mfcc[np.newaxis, ..., np.newaxis] # Shape: (1, 13, 100, 1)
34
 
35
  # Make prediction
36
- prediction = model.predict(mfcc)
37
- predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
 
 
 
38
  return f"Predicted Class: {predicted_class[0]}"
39
  except Exception as e:
40
  return f"Error in prediction: {str(e)}"
41
 
42
- # Load the pre-trained model
 
43
  model = tf.keras.models.load_model("voice_classification_modelm.h5")
44
 
45
- # Define the class labels (same as used during training)
46
- class_labels = [
 
47
  "all_vowels_healthy",
48
  "allvowels_functional",
49
  "allvowels_laryngitis",
50
- "allvowels_lukoplakia",
51
  "allvowels_psychogenic",
52
  "allvowels_rlnp",
53
- "allvowels_sd"
54
  ]
55
-
56
- # Initialize the LabelEncoder
57
  label_encoder = LabelEncoder()
58
- label_encoder.fit(class_labels)
59
 
60
- # Define the Gradio function
 
61
  def classify_audio(audio_file):
62
  return predict_class(audio_file, model, label_encoder)
63
 
64
- # Create the Gradio interface
 
65
  interface = gr.Interface(
66
  fn=classify_audio,
67
- inputs=gr.Audio(type="filepath", label="Upload an Audio File"), # Removed 'source' argument
68
  outputs=gr.Textbox(label="Predicted Class"),
69
  title="Voice Disorder Classification",
70
  description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
71
  examples=["example_audio.wav"], # Replace with paths to example audio files
72
  )
73
 
 
 
 
74
 
75
- # Launch the Gradio app
76
- interface.launch()
 
3
  import tensorflow as tf
4
  import librosa
5
  import librosa.util
6
+ import pickle
7
  from sklearn.preprocessing import LabelEncoder
8
 
9
+
10
+ # Feature Extraction Function
11
  def extract_features(file_path):
12
  try:
13
+ # Load audio
14
  y, sr = librosa.load(file_path, sr=8000) # Resample to 8kHz
15
+
16
+ # Extract MFCC and deltas
17
  mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
18
+ mfcc_delta = librosa.feature.delta(mfcc)
19
+ mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
20
+
21
+ # Extract SFCC
22
+ sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
23
+ sfcc_db = librosa.power_to_db(sfcc)
24
+ sfcc_delta = librosa.feature.delta(sfcc_db)
25
+ sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
26
+
27
+ # Calculate HNR (Harmonics-to-Noise Ratio)
28
+ hnr = np.mean(librosa.effects.harmonic(y)) # Approximation for simplicity
29
+
30
+ # Padding/truncating for consistency
31
  mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
32
+ mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
33
+ mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
34
+ sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
35
+ sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
36
+ sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
37
 
38
+ # Concatenate all features into a single matrix
39
+ features = np.vstack([
40
+ mfcc, mfcc_delta, mfcc_double_delta,
41
+ sfcc_db, sfcc_delta, sfcc_double_delta
42
+ ])
43
 
44
+ return {"features": features, "hnr": hnr}
45
  except Exception as e:
46
  raise ValueError(f"Error in feature extraction: {str(e)}")
47
 
48
+
49
+ # Prepare Input Function
50
+ def prepare_input(features):
51
+ feature_matrix = features["features"] # Shape: (78, 100)
52
+ hnr = features["hnr"] # Single scalar value
53
+
54
+ # Normalize feature matrix
55
+ feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)
56
+
57
+ # Add batch and channel dimensions for model compatibility
58
+ feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis] # Shape: (1, 78, 100, 1)
59
+ return feature_matrix, hnr
60
+
61
+
62
+ # Prediction Function
63
  def predict_class(file_path, model, label_encoder):
64
  try:
65
+ # Extract and prepare features
66
  features = extract_features(file_path)
67
+ feature_matrix, _ = prepare_input(features)
 
 
 
68
 
69
  # Make prediction
70
+ prediction = model.predict(feature_matrix)
71
+ predicted_index = np.argmax(prediction)
72
+
73
+ # Map predicted index to class label
74
+ predicted_class = label_encoder.inverse_transform([predicted_index])
75
  return f"Predicted Class: {predicted_class[0]}"
76
  except Exception as e:
77
  return f"Error in prediction: {str(e)}"
78
 
79
+
80
+ # Load Pre-trained Model
81
  model = tf.keras.models.load_model("voice_classification_modelm.h5")
82
 
83
+ # Create Label Encoder
84
+ # Note: Replace these labels with the actual classes used during training
85
+ labels = [
86
  "all_vowels_healthy",
87
  "allvowels_functional",
88
  "allvowels_laryngitis",
89
+ "allvowels_leukoplakia",
90
  "allvowels_psychogenic",
91
  "allvowels_rlnp",
92
+ "allvowels_sd",
93
  ]
 
 
94
  label_encoder = LabelEncoder()
95
+ label_encoder.fit(labels)
96
 
97
+
98
+ # Gradio Interface Function
99
  def classify_audio(audio_file):
100
  return predict_class(audio_file, model, label_encoder)
101
 
102
+
103
+ # Gradio Interface
104
  interface = gr.Interface(
105
  fn=classify_audio,
106
+ inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
107
  outputs=gr.Textbox(label="Predicted Class"),
108
  title="Voice Disorder Classification",
109
  description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
110
  examples=["example_audio.wav"], # Replace with paths to example audio files
111
  )
112
 
113
+ # Launch Gradio App
114
+ if __name__ == "__main__":
115
+ interface.launch()
116