manikanta2026 commited on
Commit
eba92d1
·
1 Parent(s): e042967
ann_new_emotion_recognition_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1979b8299de6a4e94fdf2a847ea78de60a778386f8f0745068c5e01c80fc9b
3
+ size 34282072
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import librosa
3
+ import pickle
4
+ import tensorflow as tf
5
+ import gradio as gr
6
+
7
+ # Load model and label encoder
8
+ model = tf.keras.models.load_model("ann_new_emotion_recognition_model.h5", compile=False)
9
+ with open("new_label_encoder.pkl", "rb") as f:
10
+ label_encoder = pickle.load(f)
11
+
12
+ def extract_features(audio, sr, max_len=40):
13
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=20)
14
+ mfccs = np.mean(mfccs.T, axis=0)
15
+ chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
16
+ chroma = np.mean(chroma.T, axis=0)
17
+ contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)
18
+ contrast = np.mean(contrast.T, axis=0)
19
+ zcr = librosa.feature.zero_crossing_rate(y=audio)
20
+ zcr = np.mean(zcr.T, axis=0)
21
+ centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)
22
+ centroid = np.mean(centroid.T, axis=0)
23
+ rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)
24
+ rolloff = np.mean(rolloff.T, axis=0)
25
+ rms = librosa.feature.rms(y=audio)
26
+ rms = np.mean(rms.T, axis=0)
27
+
28
+ features = np.concatenate([mfccs, chroma, contrast, zcr, centroid, rolloff, rms])
29
+ if len(features) < max_len:
30
+ features = np.pad(features, (0, max_len - len(features)), mode='constant')
31
+ else:
32
+ features = features[:max_len]
33
+ return features
34
+
35
+ def predict_emotion(audio_file):
36
+ audio_np, sr = librosa.load(audio_file, sr=None)
37
+ features = extract_features(audio_np, sr)
38
+ features = np.expand_dims(features, axis=0)
39
+
40
+ predictions = model.predict(features, verbose=0)
41
+ predicted_class = np.argmax(predictions[0])
42
+ predicted_emotion = label_encoder.inverse_transform([predicted_class])[0]
43
+
44
+ emotion_probabilities = {
45
+ label_encoder.inverse_transform([i])[0]: f"{pred * 100:.2f}%"
46
+ for i, pred in enumerate(predictions[0])
47
+ }
48
+
49
+ return predicted_emotion, emotion_probabilities
50
+
51
+ # Gradio interface
52
+ iface = gr.Interface(
53
+ fn=predict_emotion,
54
+ inputs=gr.Audio(type="filepath"),
55
+ outputs=["text", "label"],
56
+ title="🎤 Emotion Recognition from Audio",
57
+ description="Upload or record audio to identify the emotion being expressed."
58
+ )
59
+
60
+ iface.launch()
new_label_encoder (1).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0131c5626b3df306fd6dfef89cd8a6b41609c25f378ffe40202c0f84e6f30054
3
+ size 403
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ tensorflow
2
+ librosa
3
+ gradio
4
+ numpy
5
+ scikit-learn