Kabatubare commited on
Commit
797b845
·
verified ·
1 Parent(s): 323b26a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -3,15 +3,15 @@ import librosa
3
  import numpy as np
4
  import torch
5
  import logging
6
- import soundfile as sf
7
  from transformers import AutoModelForAudioClassification
 
8
 
9
  logging.basicConfig(level=logging.INFO)
10
 
11
  model_path = "./"
12
  model = AutoModelForAudioClassification.from_pretrained(model_path)
13
 
14
- def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, hop_length=512):
15
  y, sr = librosa.load(audio_path, sr=sr)
16
  y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=4)
17
  y_augmented = librosa.effects.time_stretch(y_augmented, rate=1.2)
@@ -25,7 +25,14 @@ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, ho
25
  features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
26
  features = (features - np.mean(features, axis=1, keepdims=True)) / np.std(features, axis=1, keepdims=True)
27
 
28
- features_tensor = torch.tensor(features).float().unsqueeze(0)
 
 
 
 
 
 
 
29
  return features_tensor
30
 
31
  def predict_voice(audio_file_path):
@@ -55,4 +62,3 @@ iface = gr.Interface(
55
  )
56
 
57
  iface.launch()
58
-
 
3
  import numpy as np
4
  import torch
5
  import logging
 
6
  from transformers import AutoModelForAudioClassification
7
+ import soundfile as sf
8
 
9
  logging.basicConfig(level=logging.INFO)
10
 
11
  model_path = "./"
12
  model = AutoModelForAudioClassification.from_pretrained(model_path)
13
 
14
+ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, hop_length=512, target_length=512):
15
  y, sr = librosa.load(audio_path, sr=sr)
16
  y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=4)
17
  y_augmented = librosa.effects.time_stretch(y_augmented, rate=1.2)
 
25
  features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
26
  features = (features - np.mean(features, axis=1, keepdims=True)) / np.std(features, axis=1, keepdims=True)
27
 
28
+ # Reshape the features to match the model's expected input
29
+ if features.shape[1] > target_length:
30
+ features = features[:, :target_length]
31
+ else:
32
+ padding = target_length - features.shape[1]
33
+ features = np.pad(features, ((0, 0), (0, padding)), 'constant')
34
+
35
+ features_tensor = torch.tensor(features).float().unsqueeze(0) # Add batch dimension
36
  return features_tensor
37
 
38
  def predict_voice(audio_file_path):
 
62
  )
63
 
64
  iface.launch()