Spaces:

KingInTheNorth
/

audio-emotion-detector

Sleeping

App Files Files Community

manikanta2026 commited on May 21

Commit

eba92d1

1 Parent(s): e042967

initial

Browse files

Files changed (4) hide show

ann_new_emotion_recognition_model.h5 +3 -0
app.py +60 -0
new_label_encoder (1).pkl +3 -0
requirements.txt +5 -0

ann_new_emotion_recognition_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e1979b8299de6a4e94fdf2a847ea78de60a778386f8f0745068c5e01c80fc9b
+size 34282072

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import numpy as np
+import librosa
+import pickle
+import tensorflow as tf
+import gradio as gr
+# Load model and label encoder
+model = tf.keras.models.load_model("ann_new_emotion_recognition_model.h5", compile=False)
+with open("new_label_encoder.pkl", "rb") as f:
+    label_encoder = pickle.load(f)
+def extract_features(audio, sr, max_len=40):
+    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
+    mfccs = np.mean(mfccs.T, axis=0)
+    chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
+    chroma = np.mean(chroma.T, axis=0)
+    contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
+    contrast = np.mean(contrast.T, axis=0)
+    zcr = librosa.feature.zero_crossing_rate(y=audio)
+    zcr = np.mean(zcr.T, axis=0)
+    centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
+    centroid = np.mean(centroid.T, axis=0)
+    rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
+    rolloff = np.mean(rolloff.T, axis=0)
+    rms = librosa.feature.rms(y=audio)
+    rms = np.mean(rms.T, axis=0)
+    features = np.concatenate([mfccs, chroma, contrast, zcr, centroid, rolloff, rms])
+    if len(features) < max_len:
+        features = np.pad(features, (0, max_len - len(features)), mode='constant')
+    else:
+        features = features[:max_len]
+    return features
+def predict_emotion(audio_file):
+    audio_np, sr = librosa.load(audio_file, sr=None)
+    features = extract_features(audio_np, sr)
+    features = np.expand_dims(features, axis=0)
+    predictions = model.predict(features, verbose=0)
+    predicted_class = np.argmax(predictions[0])
+    predicted_emotion = label_encoder.inverse_transform([predicted_class])[0]
+    emotion_probabilities = {
+        label_encoder.inverse_transform([i])[0]: f"{pred * 100:.2f}%"
+        for i, pred in enumerate(predictions[0])
+    }
+    return predicted_emotion, emotion_probabilities
+# Gradio interface
+iface = gr.Interface(
+    fn=predict_emotion,
+    inputs=gr.Audio(type="filepath"),
+    outputs=["text", "label"],
+    title="🎤 Emotion Recognition from Audio",
+    description="Upload or record audio to identify the emotion being expressed."
+)
+iface.launch()

new_label_encoder (1).pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0131c5626b3df306fd6dfef89cd8a6b41609c25f378ffe40202c0f84e6f30054
+size 403

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+tensorflow
+librosa
+gradio
+numpy
+scikit-learn