Devops-hestabit commited on
Commit
69fe77b
·
verified ·
1 Parent(s): 6b6c787

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +20 -41
handler.py CHANGED
@@ -21,7 +21,7 @@ class EndpointHandler():
21
  emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
22
  return {
23
  "emotion": emotion_prediction,
24
- "depression": float(depression_prediction[0])
25
  }
26
 
27
  def get_mfcc_features(self, features, padding):
@@ -33,53 +33,32 @@ class EndpointHandler():
33
  return np.expand_dims(features, axis=0)
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
- try:
37
- # Decode the base64 string
38
- audio_bytes = base64.b64decode(base64_string)
39
- audio_io = io.BytesIO(audio_bytes)
 
 
 
 
40
 
41
- # Try to load with librosa first
42
- try:
43
- y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
44
- except:
45
- # If librosa fails, try using pydub
46
- audio_io.seek(0) # Reset file pointer
47
- audio = AudioSegment.from_file(audio_io)
48
- audio = audio.set_channels(1) # Convert to mono
49
- audio = audio.set_frame_rate(desired_sr)
50
-
51
- samples = np.array(audio.get_array_of_samples())
52
- y = samples.astype(np.float32) / 32768.0 # Normalize
53
- sr = desired_sr
54
 
55
- # Normalize the audio
56
- y = librosa.util.normalize(y)
 
 
57
 
58
- # Extract MFCC features
59
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
60
- if mfcc.shape[1] < 216:
61
- mfcc = np.pad(mfcc, ((0, 0), (0, 216 - mfcc.shape[1])), mode='constant')
62
- elif mfcc.shape[1] > 216:
63
- mfcc = mfcc[:, :216]
64
-
65
- return mfcc
66
-
67
- except Exception as e:
68
- print(f"Error in preprocess_audio_data: {str(e)}")
69
- raise
70
 
71
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
72
- emotion_features = features[:, :emotion_padding]
73
- emotion_features = np.expand_dims(emotion_features, axis=-1) # Add channel dimension
74
- emotion_features = np.expand_dims(emotion_features, axis=0) # Add batch dimension
75
-
76
  depression_features = self.get_mfcc_features(features, depression_padding)
77
-
78
- print("Emotion model input shape:", self.emotion_model.input_shape)
79
- print("Emotion features shape:", emotion_features.shape)
80
-
81
  emotion_prediction = self.emotion_model.predict(emotion_features)[0]
82
  emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
83
-
84
  depression_prediction = self.depression_model.predict(depression_features)[0]
 
85
  return emotion_prediction, depression_prediction
 
21
  emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
22
  return {
23
  "emotion": emotion_prediction,
24
+ "depression": depression_prediction
25
  }
26
 
27
  def get_mfcc_features(self, features, padding):
 
33
  return np.expand_dims(features, axis=0)
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
+ # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
37
+ audio_bytes = base64.b64decode(base64_string)
38
+ audio_io = io.BytesIO(audio_bytes)
39
+ audio = AudioSegment.from_file(audio_io, format="webm")
40
+
41
+ byte_io = io.BytesIO()
42
+ audio.export(byte_io, format="wav")
43
+ byte_io.seek(0)
44
 
45
+ sample_rate, audio_array = wavfile.read(byte_io)
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
48
+ start_sample = int(offset * desired_sr)
49
+ end_sample = start_sample + int(duration * desired_sr)
50
+ audio_array = audio_array[start_sample:end_sample]
51
 
52
+
53
+ # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
54
+ X = librosa.util.normalize(audio_array)
55
+ return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
 
 
 
 
 
 
 
 
56
 
57
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
58
+ emotion_features = self.get_mfcc_features(features, emotion_padding)
 
 
 
59
  depression_features = self.get_mfcc_features(features, depression_padding)
 
 
 
 
60
  emotion_prediction = self.emotion_model.predict(emotion_features)[0]
61
  emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
 
62
  depression_prediction = self.depression_model.predict(depression_features)[0]
63
+ # depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
64
  return emotion_prediction, depression_prediction