Devops-hestabit commited on
Commit
e04d5f6
·
verified ·
1 Parent(s): 53edb6f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +26 -24
handler.py CHANGED
@@ -33,34 +33,36 @@ class EndpointHandler():
33
  return np.expand_dims(features, axis=0)
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
- # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
 
 
 
37
 
38
- audio_base64 = base64_string.split(',')[-1] if ',' in base64_string else base64_string
39
- audio_bytes = base64.b64decode(audio_base64)
40
- audio_io = io.BytesIO(audio_bytes)
41
- y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
42
- y = librosa.util.normalize(y)
43
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
44
- return mfcc
45
- # audio_bytes = base64.b64decode(base64_string)
46
- # audio_io = io.BytesIO(audio_bytes)
47
- # audio = AudioSegment.from_file(audio_io, format="webm")
48
-
49
- # byte_io = io.BytesIO()
50
- # audio.export(byte_io, format="wav")
51
- # byte_io.seek(0)
52
 
53
- # sample_rate, audio_array = wavfile.read(byte_io)
 
54
 
55
- # audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
56
- # start_sample = int(offset * desired_sr)
57
- # end_sample = start_sample + int(duration * desired_sr)
58
- # audio_array = audio_array[start_sample:end_sample]
59
 
60
-
61
- # # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
62
- # X = librosa.util.normalize(audio_array)
63
- # return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
 
64
 
65
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
66
  emotion_features = self.get_mfcc_features(features, emotion_padding)
 
33
  return np.expand_dims(features, axis=0)
34
 
35
  def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
36
+ try:
37
+ # Decode the base64 string
38
+ audio_bytes = base64.b64decode(base64_string)
39
+ audio_io = io.BytesIO(audio_bytes)
40
 
41
+ # Try to load with librosa first
42
+ try:
43
+ y, sr = librosa.load(audio_io, sr=desired_sr, duration=duration, offset=offset)
44
+ except:
45
+ # If librosa fails, try using pydub
46
+ audio_io.seek(0) # Reset file pointer
47
+ audio = AudioSegment.from_file(audio_io)
48
+ audio = audio.set_channels(1) # Convert to mono
49
+ audio = audio.set_frame_rate(desired_sr)
50
+
51
+ samples = np.array(audio.get_array_of_samples())
52
+ y = samples.astype(np.float32) / 32768.0 # Normalize
53
+ sr = desired_sr
 
54
 
55
+ # Normalize the audio
56
+ y = librosa.util.normalize(y)
57
 
58
+ # Extract MFCC features
59
+ mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
 
 
60
 
61
+ return mfcc
62
+
63
+ except Exception as e:
64
+ print(f"Error in preprocess_audio_data: {str(e)}")
65
+ raise
66
 
67
  def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
68
  emotion_features = self.get_mfcc_features(features, emotion_padding)