Kabatubare commited on
Commit
86776b4
·
verified ·
1 Parent(s): 30c595f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -19
app.py CHANGED
@@ -11,43 +11,32 @@ model_path = "./"
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
12
 
13
  def preprocess_audio(audio_path, sr=22050):
14
- # Load audio file
15
  audio, sr = librosa.load(audio_path, sr=sr)
16
- # Trim silence
17
  audio, _ = librosa.effects.trim(audio)
18
  return audio, sr
19
 
20
  def extract_features(audio, sr):
21
- # Get Mel-spectrogram
22
  S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
23
  log_S = librosa.power_to_db(S, ref=np.max)
24
-
25
- # Harmonic-Percussive source separation
26
  y_harmonic, y_percussive = librosa.effects.hpss(audio)
27
-
28
- # Tempo, beat frames
29
- tempo, beat_frames = librosa.beat.beat_track(y=audio, sr=sr)
30
-
31
- # Chroma feature
32
  chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
33
-
34
- # Spectral contrast
35
  contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
36
-
37
- # Tonnetz
38
  tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sr)
39
-
40
- # Concatenate all features
41
  features = np.vstack([log_S, chroma, contrast, tonnetz])
42
- features = torch.tensor(features).float().unsqueeze(0) # Add batch dimension
43
-
44
- return features
45
 
46
  def predict_voice(audio_file_path):
47
  try:
48
  audio, sr = preprocess_audio(audio_file_path)
49
  features = extract_features(audio, sr)
50
 
 
51
  with torch.no_grad():
52
  outputs = model(features)
53
  logits = outputs.logits
 
11
  model = AutoModelForAudioClassification.from_pretrained(model_path)
12
 
13
  def preprocess_audio(audio_path, sr=22050):
14
+ # Load and trim the audio file
15
  audio, sr = librosa.load(audio_path, sr=sr)
 
16
  audio, _ = librosa.effects.trim(audio)
17
  return audio, sr
18
 
19
  def extract_features(audio, sr):
20
+ # Extract various features from the audio
21
  S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
22
  log_S = librosa.power_to_db(S, ref=np.max)
 
 
23
  y_harmonic, y_percussive = librosa.effects.hpss(audio)
 
 
 
 
 
24
  chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
 
 
25
  contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
 
 
26
  tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(audio), sr=sr)
27
+
28
+ # Stack features and add batch dimension
29
  features = np.vstack([log_S, chroma, contrast, tonnetz])
30
+ features_tensor = torch.tensor(features).float().unsqueeze(0) # (1, feature_dim, time_steps)
31
+
32
+ return features_tensor
33
 
34
  def predict_voice(audio_file_path):
35
  try:
36
  audio, sr = preprocess_audio(audio_file_path)
37
  features = extract_features(audio, sr)
38
 
39
+ # Model prediction
40
  with torch.no_grad():
41
  outputs = model(features)
42
  logits = outputs.logits