Devops-hestabit
commited on
Update handler.py
Browse files- handler.py +22 -14
handler.py
CHANGED
@@ -21,7 +21,7 @@ class EndpointHandler():
|
|
21 |
emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
|
22 |
return {
|
23 |
"emotion": emotion_prediction,
|
24 |
-
"depression": depression_prediction
|
25 |
}
|
26 |
|
27 |
def get_mfcc_features(self, features, padding):
|
@@ -34,25 +34,33 @@ class EndpointHandler():
|
|
34 |
|
35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
36 |
# audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
|
37 |
-
|
|
|
|
|
38 |
audio_io = io.BytesIO(audio_bytes)
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
byte_io = io.BytesIO()
|
42 |
-
audio.export(byte_io, format="wav")
|
43 |
-
byte_io.seek(0)
|
44 |
|
45 |
-
sample_rate, audio_array = wavfile.read(byte_io)
|
46 |
|
47 |
-
audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
|
48 |
-
start_sample = int(offset * desired_sr)
|
49 |
-
end_sample = start_sample + int(duration * desired_sr)
|
50 |
-
audio_array = audio_array[start_sample:end_sample]
|
51 |
|
52 |
|
53 |
-
# X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
|
54 |
-
X = librosa.util.normalize(audio_array)
|
55 |
-
return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
|
56 |
|
57 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
58 |
emotion_features = self.get_mfcc_features(features, emotion_padding)
|
|
|
21 |
emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
|
22 |
return {
|
23 |
"emotion": emotion_prediction,
|
24 |
+
"depression": float(depression_prediction[0])
|
25 |
}
|
26 |
|
27 |
def get_mfcc_features(self, features, padding):
|
|
|
34 |
|
35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
36 |
# audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
|
37 |
+
|
38 |
+
audio_base64 = base64_string.split(',')[-1] if ',' in base64_string else base64_string
|
39 |
+
audio_bytes = base64.b64decode(audio_base64)
|
40 |
audio_io = io.BytesIO(audio_bytes)
|
41 |
+
y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
|
42 |
+
y = librosa.util.normalize(y)
|
43 |
+
y = librosa.util.normalize(y)
|
44 |
+
return mfcc
|
45 |
+
# audio_bytes = base64.b64decode(base64_string)
|
46 |
+
# audio_io = io.BytesIO(audio_bytes)
|
47 |
+
# audio = AudioSegment.from_file(audio_io, format="webm")
|
48 |
|
49 |
+
# byte_io = io.BytesIO()
|
50 |
+
# audio.export(byte_io, format="wav")
|
51 |
+
# byte_io.seek(0)
|
52 |
|
53 |
+
# sample_rate, audio_array = wavfile.read(byte_io)
|
54 |
|
55 |
+
# audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
|
56 |
+
# start_sample = int(offset * desired_sr)
|
57 |
+
# end_sample = start_sample + int(duration * desired_sr)
|
58 |
+
# audio_array = audio_array[start_sample:end_sample]
|
59 |
|
60 |
|
61 |
+
# # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
|
62 |
+
# X = librosa.util.normalize(audio_array)
|
63 |
+
# return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
|
64 |
|
65 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
66 |
emotion_features = self.get_mfcc_features(features, emotion_padding)
|