Spaces:

sancho10
/

RITISHREE

Sleeping

App Files Files Community

sancho10 commited on Nov 30, 2024

Commit

7084f59

verified ·

1 Parent(s): 0b747fe

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -28

app.py CHANGED Viewed

@@ -3,74 +3,114 @@ import numpy as np
 import tensorflow as tf
 import librosa
 import librosa.util
 from sklearn.preprocessing import LabelEncoder
-# Feature extraction function
 def extract_features(file_path):
     try:
-        # Load the audio file
         y, sr = librosa.load(file_path, sr=8000)  # Resample to 8kHz
         mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-        # Pad or truncate to 100 frames along axis 1
         mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
-        # Ensure the shape is (13, 100)
-        if mfcc.shape[0] != 13:
-            mfcc = librosa.util.fix_length(mfcc, size=13, axis=0)
-        return {"mfcc": mfcc}
     except Exception as e:
         raise ValueError(f"Error in feature extraction: {str(e)}")
-# Prediction function
 def predict_class(file_path, model, label_encoder):
     try:
         features = extract_features(file_path)
-        mfcc = features["mfcc"]
-        # Add batch and channel dimensions for model compatibility
-        mfcc = mfcc[np.newaxis, ..., np.newaxis]  # Shape: (1, 13, 100, 1)
         # Make prediction
-        prediction = model.predict(mfcc)
-        predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
         return f"Predicted Class: {predicted_class[0]}"
     except Exception as e:
         return f"Error in prediction: {str(e)}"
-# Load the pre-trained model
 model = tf.keras.models.load_model("voice_classification_modelm.h5")
-# Define the class labels (same as used during training)
-class_labels = [
     "all_vowels_healthy",
     "allvowels_functional",
     "allvowels_laryngitis",
-    "allvowels_lukoplakia",
     "allvowels_psychogenic",
     "allvowels_rlnp",
-    "allvowels_sd"
 ]
-# Initialize the LabelEncoder
 label_encoder = LabelEncoder()
-label_encoder.fit(class_labels)
-# Define the Gradio function
 def classify_audio(audio_file):
     return predict_class(audio_file, model, label_encoder)
-# Create the Gradio interface
 interface = gr.Interface(
     fn=classify_audio,
-    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),  # Removed 'source' argument
     outputs=gr.Textbox(label="Predicted Class"),
     title="Voice Disorder Classification",
     description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
     examples=["example_audio.wav"],  # Replace with paths to example audio files
 )
-# Launch the Gradio app
-interface.launch()

 import tensorflow as tf
 import librosa
 import librosa.util
+import pickle
 from sklearn.preprocessing import LabelEncoder
+# Feature Extraction Function
 def extract_features(file_path):
     try:
+        # Load audio
         y, sr = librosa.load(file_path, sr=8000)  # Resample to 8kHz
+        # Extract MFCC and deltas
         mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+        mfcc_delta = librosa.feature.delta(mfcc)
+        mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
+        # Extract SFCC
+        sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
+        sfcc_db = librosa.power_to_db(sfcc)
+        sfcc_delta = librosa.feature.delta(sfcc_db)
+        sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
+        # Calculate HNR (Harmonics-to-Noise Ratio)
+        hnr = np.mean(librosa.effects.harmonic(y))  # Approximation for simplicity
+        # Padding/truncating for consistency
         mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
+        mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
+        mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
+        sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
+        sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
+        sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
+        # Concatenate all features into a single matrix
+        features = np.vstack([
+            mfcc, mfcc_delta, mfcc_double_delta,
+            sfcc_db, sfcc_delta, sfcc_double_delta
+        ])
+        return {"features": features, "hnr": hnr}
     except Exception as e:
         raise ValueError(f"Error in feature extraction: {str(e)}")
+# Prepare Input Function
+def prepare_input(features):
+    feature_matrix = features["features"]  # Shape: (78, 100)
+    hnr = features["hnr"]  # Single scalar value
+    # Normalize feature matrix
+    feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)
+    # Add batch and channel dimensions for model compatibility
+    feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis]  # Shape: (1, 78, 100, 1)
+    return feature_matrix, hnr
+# Prediction Function
 def predict_class(file_path, model, label_encoder):
     try:
+        # Extract and prepare features
         features = extract_features(file_path)
+        feature_matrix, _ = prepare_input(features)
         # Make prediction
+        prediction = model.predict(feature_matrix)
+        predicted_index = np.argmax(prediction)
+        # Map predicted index to class label
+        predicted_class = label_encoder.inverse_transform([predicted_index])
         return f"Predicted Class: {predicted_class[0]}"
     except Exception as e:
         return f"Error in prediction: {str(e)}"
+# Load Pre-trained Model
 model = tf.keras.models.load_model("voice_classification_modelm.h5")
+# Create Label Encoder
+# Note: Replace these labels with the actual classes used during training
+labels = [
     "all_vowels_healthy",
     "allvowels_functional",
     "allvowels_laryngitis",
+    "allvowels_leukoplakia",
     "allvowels_psychogenic",
     "allvowels_rlnp",
+    "allvowels_sd",
 ]
 label_encoder = LabelEncoder()
+label_encoder.fit(labels)
+# Gradio Interface Function
 def classify_audio(audio_file):
     return predict_class(audio_file, model, label_encoder)
+# Gradio Interface
 interface = gr.Interface(
     fn=classify_audio,
+    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
     outputs=gr.Textbox(label="Predicted Class"),
     title="Voice Disorder Classification",
     description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
     examples=["example_audio.wav"],  # Replace with paths to example audio files
 )
+# Launch Gradio App
+if __name__ == "__main__":
+    interface.launch()