Devops-hestabit
commited on
Update handler.py
Browse files- handler.py +26 -24
handler.py
CHANGED
@@ -33,34 +33,36 @@ class EndpointHandler():
|
|
33 |
return np.expand_dims(features, axis=0)
|
34 |
|
35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
36 |
-
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
# byte_io.seek(0)
|
52 |
|
53 |
-
#
|
|
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
# end_sample = start_sample + int(duration * desired_sr)
|
58 |
-
# audio_array = audio_array[start_sample:end_sample]
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
64 |
|
65 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
66 |
emotion_features = self.get_mfcc_features(features, emotion_padding)
|
|
|
33 |
return np.expand_dims(features, axis=0)
|
34 |
|
35 |
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
36 |
+
try:
|
37 |
+
# Decode the base64 string
|
38 |
+
audio_bytes = base64.b64decode(base64_string)
|
39 |
+
audio_io = io.BytesIO(audio_bytes)
|
40 |
|
41 |
+
# Try to load with librosa first
|
42 |
+
try:
|
43 |
+
y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
|
44 |
+
except:
|
45 |
+
# If librosa fails, try using pydub
|
46 |
+
audio_io.seek(0) # Reset file pointer
|
47 |
+
audio = AudioSegment.from_file(audio_io)
|
48 |
+
audio = audio.set_channels(1) # Convert to mono
|
49 |
+
audio = audio.set_frame_rate(desired_sr)
|
50 |
+
|
51 |
+
samples = np.array(audio.get_array_of_samples())
|
52 |
+
y = samples.astype(np.float32) / 32768.0 # Normalize
|
53 |
+
sr = desired_sr
|
|
|
54 |
|
55 |
+
# Normalize the audio
|
56 |
+
y = librosa.util.normalize(y)
|
57 |
|
58 |
+
# Extract MFCC features
|
59 |
+
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
|
|
|
|
|
60 |
|
61 |
+
return mfcc
|
62 |
+
|
63 |
+
except Exception as e:
|
64 |
+
print(f"Error in preprocess_audio_data: {str(e)}")
|
65 |
+
raise
|
66 |
|
67 |
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
68 |
emotion_features = self.get_mfcc_features(features, emotion_padding)
|