Devops-hestabit commited on
Commit
cdc083e
·
verified ·
1 Parent(s): 1b8f67c

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +22 -14
handler.py CHANGED
@@ -21,7 +21,7 @@ class EndpointHandler():
21
  emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
22
  return {
23
  "emotion": emotion_prediction,
24
- "depression": depression_prediction
25
  }
26
 
27
  def get_mfcc_features(self, features, padding):
@@ -34,25 +34,33 @@ class EndpointHandler():
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
  # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
37
- audio_bytes = base64.b64decode(base64_string)
 
 
38
  audio_io = io.BytesIO(audio_bytes)
39
- audio = AudioSegment.from_file(audio_io, format="webm")
 
 
 
 
 
 
40
 
41
- byte_io = io.BytesIO()
42
- audio.export(byte_io, format="wav")
43
- byte_io.seek(0)
44
 
45
- sample_rate, audio_array = wavfile.read(byte_io)
46
 
47
- audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
48
- start_sample = int(offset * desired_sr)
49
- end_sample = start_sample + int(duration * desired_sr)
50
- audio_array = audio_array[start_sample:end_sample]
51
 
52
 
53
- # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
54
- X = librosa.util.normalize(audio_array)
55
- return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
56
 
57
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
58
  emotion_features = self.get_mfcc_features(features, emotion_padding)
 
21
  emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
22
  return {
23
  "emotion": emotion_prediction,
24
+ "depression": float(depression_prediction[0])
25
  }
26
 
27
  def get_mfcc_features(self, features, padding):
 
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
  # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
37
+
38
+ audio_base64 = base64_string.split(',')[-1] if ',' in base64_string else base64_string
39
+ audio_bytes = base64.b64decode(audio_base64)
40
  audio_io = io.BytesIO(audio_bytes)
41
+ y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
42
+ y = librosa.util.normalize(y)
43
+ y = librosa.util.normalize(y)
44
+ return mfcc
45
+ # audio_bytes = base64.b64decode(base64_string)
46
+ # audio_io = io.BytesIO(audio_bytes)
47
+ # audio = AudioSegment.from_file(audio_io, format="webm")
48
 
49
+ # byte_io = io.BytesIO()
50
+ # audio.export(byte_io, format="wav")
51
+ # byte_io.seek(0)
52
 
53
+ # sample_rate, audio_array = wavfile.read(byte_io)
54
 
55
+ # audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
56
+ # start_sample = int(offset * desired_sr)
57
+ # end_sample = start_sample + int(duration * desired_sr)
58
+ # audio_array = audio_array[start_sample:end_sample]
59
 
60
 
61
+ # # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
62
+ # X = librosa.util.normalize(audio_array)
63
+ # return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
64
 
65
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
66
  emotion_features = self.get_mfcc_features(features, emotion_padding)