Devops-hestabit commited on
Commit
a9c0da2
·
1 Parent(s): 0a828f9

Upload 8 files

Browse files

Uploaded model file and handler.py file for inference endpoint

.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ audio/Test_Emotion.wav filter=lfs diff=lfs merge=lfs -text
37
+ audio/test.wav filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: other
3
+ ---
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ from tensorflow.keras.models import load_model
5
+
6
+ emotion_model = load_model('models/best_model_emotion.h5')
7
+ depression_model = load_model('models/best_model_depression.h5')
8
+
9
+ emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
10
+ def extract_features(audio_path):
11
+ X, sample_rate = librosa.load(audio_path,duration=2.5,sr=22050*2,offset=0.5) #, res_type='kaiser_fast'
12
+ features = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=30)
13
+ pad_emotion = 216 - features.shape[1]
14
+ pad_depression = 2584 - features.shape[1]
15
+
16
+ if pad_emotion > 0:
17
+ emo_features = np.pad(features, [(0, 0), (0, pad_emotion)], mode='constant')
18
+ elif pad_emotion < 0:
19
+ emo_features = features[:,pad_emotion ]
20
+ else :
21
+ emo_features = features
22
+
23
+ if pad_depression > 0:
24
+ dep_features = np.pad(features, [(0, 0), (0, pad_depression)], mode='constant')
25
+ elif pad_depression < 0:
26
+ dep_features = features[:,pad_depression]
27
+ else:
28
+ dep_features = features
29
+
30
+ emo_features = np.expand_dims(emo_features, axis = 0)
31
+ dep_features = np.expand_dims(dep_features, axis = 0)
32
+
33
+ return emo_features, dep_features
34
+
35
+ def predict_emotion_and_depression(audio):
36
+ # Extract audio features
37
+ print(audio)
38
+ print(len(audio))
39
+ emo_features, dep_features = extract_features(audio)
40
+
41
+ # Predict emotion
42
+ emotion_pred = emotion_model.predict(emo_features)[0]
43
+ print(emotion_pred)
44
+ emotion_index = np.argmax(emotion_pred)
45
+ emotion = emotion_labels[emotion_index]
46
+
47
+ # Predict depression
48
+ depression_pred = depression_model.predict(dep_features)[0]
49
+ depression = "Depressed" if depression_pred >= 0.5 else "Not Depressed"
50
+
51
+ return emotion, depression
52
+
53
+ def handler(request):
54
+ if request.method == 'POST':
55
+ # Get the audio data from the request
56
+ audio = request.data # Replace this with the actual way to access the audio data in the request
57
+
58
+ # Make predictions using the models
59
+ emotion, depression = predict_emotion_and_depression(audio)
60
+
61
+ # Return the predictions as a response
62
+ response = {
63
+ "emotion": emotion,
64
+ "depression": depression
65
+ }
66
+
67
+ return response
audio/Test_Emotion.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
3
+ size 1098284
audio/test.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
3
+ size 1098284
handler.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import numpy as np
3
+ import pydub
4
+ import scipy
5
+ from scipy.io import wavfile
6
+ from pydub import AudioSegment
7
+ import base64
8
+ import librosa
9
+ import tensorflow as tf
10
+
11
+ class EndpointHandler():
12
+
13
+ def __init__(self, path):
14
+ self.emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
15
+ self.emotion_model = tf.keras.models.load_model(f"{path}/models/best_model_emotion.h5")
16
+ self.depression_model = tf.keras.models.load_model(f"{path}/models/best_model_depression.h5")
17
+
18
+ def __call__(self, input_data):
19
+ audio_base64 = input_data.pop("inputs", input_data)
20
+ audio_features = self.preprocess_audio_data(audio_base64)
21
+ emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
22
+ return {
23
+ "emotion": emotion_prediction,
24
+ "depression": depression_prediction
25
+ }
26
+
27
+ def get_mfcc_features(self, features, padding):
28
+ padded_features = padding - features.shape[1]
29
+ if padded_features > 0:
30
+ features = np.pad(features, [(0, 0), (0, padded_features)], mode='constant')
31
+ elif padded_features < 0:
32
+ features = features[:, padded_features:]
33
+ return np.expand_dims(features, axis=0)
34
+
35
+ def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
+ # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
37
+ audio_bytes = base64.b64decode(base64_string)
38
+ audio_io = io.BytesIO(audio_bytes)
39
+ audio = AudioSegment.from_file(audio_io, format="webm")
40
+
41
+ byte_io = io.BytesIO()
42
+ audio.export(byte_io, format="wav")
43
+ byte_io.seek(0)
44
+
45
+ sample_rate, audio_array = wavfile.read(byte_io)
46
+
47
+ audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
48
+ start_sample = int(offset * desired_sr)
49
+ end_sample = start_sample + int(duration * desired_sr)
50
+ audio_array = audio_array[start_sample:end_sample]
51
+
52
+
53
+ # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
54
+ X = librosa.util.normalize(audio_array)
55
+ return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
56
+
57
+ def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
58
+ emotion_features = self.get_mfcc_features(features, emotion_padding)
59
+ depression_features = self.get_mfcc_features(features, depression_padding)
60
+ emotion_prediction = self.emotion_model.predict(emotion_features)[0]
61
+ emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
62
+ depression_prediction = self.depression_model.predict(depression_features)[0]
63
+ # depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
64
+ return emotion_prediction, depression_prediction
models/best_model_depression.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdf804b5015afbf43d692a629e79c6bf8511ae878101cba13904e3954643d95
3
+ size 10756184
models/best_model_emotion.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df1b2e83367f87c9c91ec49e256dbf1533421dcbfba61afc697c5b4679c26ea
3
+ size 10761304
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ keras==2.13.1
2
+ librosa==0.10.0.post2
3
+ tensorflow-cpu==2.13.0
4
+ pydub
5
+ scipy