Spaces:

sancho10
/

RITISHREE

Sleeping

App Files Files Community

sancho10 commited on Dec 10, 2024

Commit

e52bbe2

verified ·

1 Parent(s): 7ea5dc7

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -96

app.py CHANGED Viewed

@@ -1,117 +1,90 @@
 import gradio as gr
 import numpy as np
-import tensorflow as tf
 import librosa
-import librosa.util
-import pickle
-from sklearn.preprocessing import LabelEncoder
-# Feature Extraction Function
-def extract_features(file_path):
     try:
-        # Load audio
-        y, sr = librosa.load(file_path, sr=8000)  # Resample to 8kHz
-        # Extract MFCC and deltas
-        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
-        mfcc_delta = librosa.feature.delta(mfcc)
-        mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
-        # Extract SFCC
-        sfcc = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=13)
-        sfcc_db = librosa.power_to_db(sfcc)
-        sfcc_delta = librosa.feature.delta(sfcc_db)
-        sfcc_double_delta = librosa.feature.delta(sfcc_db, order=2)
-        # Calculate HNR (Harmonics-to-Noise Ratio)
-        hnr = np.mean(librosa.effects.harmonic(y))  # Approximation for simplicity
-        # Padding/truncating for consistency
-        mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
-        mfcc_delta = librosa.util.fix_length(mfcc_delta, size=100, axis=1)
-        mfcc_double_delta = librosa.util.fix_length(mfcc_double_delta, size=100, axis=1)
-        sfcc_db = librosa.util.fix_length(sfcc_db, size=100, axis=1)
-        sfcc_delta = librosa.util.fix_length(sfcc_delta, size=100, axis=1)
-        sfcc_double_delta = librosa.util.fix_length(sfcc_double_delta, size=100, axis=1)
-        # Concatenate all features into a single matrix
-        features = np.vstack([
-            mfcc, mfcc_delta, mfcc_double_delta,
-            sfcc_db, sfcc_delta, sfcc_double_delta
-        ])
-        return {"features": features, "hnr": hnr}
-    except Exception as e:
-        raise ValueError(f"Error in feature extraction: {str(e)}")
-# Prepare Input Function
-def prepare_input(features):
-    feature_matrix = features["features"]  # Shape: (78, 100)
-    hnr = features["hnr"]  # Single scalar value
-    # Normalize feature matrix
-    feature_matrix = (feature_matrix - np.mean(feature_matrix)) / np.std(feature_matrix)
-    # Add batch and channel dimensions for model compatibility
-    feature_matrix = feature_matrix[np.newaxis, ..., np.newaxis]  # Shape: (1, 78, 100, 1)
-    return feature_matrix, hnr
-# Prediction Function
-def predict_class(file_path, model, label_encoder):
-    try:
-        # Extract and prepare features
-        features = extract_features(file_path)
-        input_vector = prepare_input(features)
-        # Make prediction
-        prediction = model.predict(input_vector)
-        predicted_index = np.argmax(prediction)
-        # Map predicted index to class label
-        predicted_class = label_encoder.inverse_transform([predicted_index])
-        return f"Predicted Class: {predicted_class[0]}"
     except Exception as e:
-        return f"Error in prediction: {str(e)}"
-# Load Pre-trained Model
-model = tf.keras.models.load_model("voice_classification_modelm.h5")
-# Create Label Encoder
-# Note: Replace these labels with the actual classes used during training
-labels = [
-    "all_vowels_healthy",
-    "allvowels_functional",
-    "allvowels_laryngitis",
-    "allvowels_leukoplakia",
-    "allvowels_psychogenic",
-    "allvowels_rlnp",
-    "allvowels_sd",
-]
-label_encoder = LabelEncoder()
-label_encoder.fit(labels)
-# Gradio Interface Function
-def classify_audio(audio_file):
-    return predict_class(audio_file, model, label_encoder)
 # Gradio Interface
 interface = gr.Interface(
-    fn=classify_audio,
-    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
-    outputs=gr.Textbox(label="Predicted Class"),
-    title="Voice Disorder Classification",
-    description="Upload an audio file to classify its voice type (e.g., healthy or various disorder types).",
-    examples=["example_audio.wav"],  # Replace with paths to example audio files
 )
-# Launch Gradio App
-if __name__ == "__main__":
-    interface.launch()

 import gradio as gr
 import numpy as np
 import librosa
+import joblib
+from tensorflow.keras.models import load_model
+# Load the trained model, scaler, and label encoder
+model = load_model('lstmm_model.h5')  # Ensure this path is correct
+scaler = joblib.load('scalerr.pkl')  # Ensure this path is correct
+label_encoder = joblib.load('label_encoderr.pkl')  # Ensure this path is correct
+# Feature extraction function for prediction
+def extract_features_for_prediction(file_path):
     try:
+        y, sr = librosa.load(file_path, sr=None)
+        features = {}
+        pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
+        pitch = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
+        features['pitch'] = pitch
+        rms = librosa.feature.rms(y=y)
+        features['intensity'] = np.mean(rms)
+        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
+        features['mfcc_mean'] = np.mean(mfcc)
+        features['mfcc_var'] = np.var(mfcc)
+        sfcc, sfcc_delta, sfcc_double_delta = compute_sfcc(y, sr)
+        features['sfcc_mean'] = np.mean(sfcc)
+        features['sfcc_var'] = np.var(sfcc)
+        features['sfcc_delta_mean'] = np.mean(sfcc_delta)
+        features['sfcc_double_delta_mean'] = np.mean(sfcc_double_delta)
+        mfcc_delta = librosa.feature.delta(mfcc)
+        mfcc_double_delta = librosa.feature.delta(mfcc, order=2)
+        features['mfcc_delta_mean'] = np.mean(mfcc_delta)
+        features['mfcc_double_delta_mean'] = np.mean(mfcc_double_delta)
+        hnr = np.mean(librosa.effects.harmonic(y) / (librosa.effects.percussive(y) + 1e-6))
+        features['hnr'] = hnr
+        harmonic = librosa.effects.harmonic(y)
+        percussive = librosa.effects.percussive(y)
+        h_n_ratio = np.mean(harmonic) / (np.mean(percussive) + 1e-6)
+        features['h_n_ratio'] = h_n_ratio
+        return features
     except Exception as e:
+        print(f"Error processing file: {e}")
+        return None
+def compute_sfcc(y, sr):
+    nfft = 1024
+    shift_ms = int(10 * (sr / 1000))
+    cep_channels = int(0.85 * (sr / 1000))
+    hop_length = shift_ms
+    stft = np.abs(librosa.stft(y, n_fft=nfft, hop_length=hop_length, win_length=nfft))**2
+    sfcc = librosa.feature.mfcc(S=librosa.power_to_db(stft), sr=sr, n_mfcc=cep_channels)
+    sfcc_delta = librosa.feature.delta(sfcc)
+    sfcc_double_delta = librosa.feature.delta(sfcc, order=2)
+    return sfcc, sfcc_delta, sfcc_double_delta
+# Predict function
+def predict_audio(file):
+    features = extract_features_for_prediction(file)
+    if features:
+        feature_values = np.array(list(features.values())).reshape(1, 1, -1)
+        scaled_features = scaler.transform(feature_values.reshape(1, -1)).reshape(1, 1, -1)
+        prediction = model.predict(scaled_features)
+        predicted_class = label_encoder.inverse_transform([np.argmax(prediction)])
+        return f"Predicted Class: {predicted_class[0]}"
+    else:
+        return "Error in feature extraction."
 # Gradio Interface
 interface = gr.Interface(
+    fn=predict_audio,
+    inputs=gr.Audio(type="filepath", label="Upload a WAV File"),
+    outputs="text",
+    title="Audio Class Prediction",
+    description="Upload a .wav file to predict its class."
 )
+# Launch the app
+interface.launch()