Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,15 +3,15 @@ import librosa
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import logging
|
6 |
-
import soundfile as sf
|
7 |
from transformers import AutoModelForAudioClassification
|
|
|
8 |
|
9 |
logging.basicConfig(level=logging.INFO)
|
10 |
|
11 |
model_path = "./"
|
12 |
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
13 |
|
14 |
-
def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, hop_length=512):
|
15 |
y, sr = librosa.load(audio_path, sr=sr)
|
16 |
y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=4)
|
17 |
y_augmented = librosa.effects.time_stretch(y_augmented, rate=1.2)
|
@@ -25,7 +25,14 @@ def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, ho
|
|
25 |
features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
|
26 |
features = (features - np.mean(features, axis=1, keepdims=True)) / np.std(features, axis=1, keepdims=True)
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
return features_tensor
|
30 |
|
31 |
def predict_voice(audio_file_path):
|
@@ -55,4 +62,3 @@ iface = gr.Interface(
|
|
55 |
)
|
56 |
|
57 |
iface.launch()
|
58 |
-
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import logging
|
|
|
6 |
from transformers import AutoModelForAudioClassification
|
7 |
+
import soundfile as sf
|
8 |
|
9 |
logging.basicConfig(level=logging.INFO)
|
10 |
|
11 |
model_path = "./"
|
12 |
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
13 |
|
14 |
+
def augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, hop_length=512, target_length=512):
|
15 |
y, sr = librosa.load(audio_path, sr=sr)
|
16 |
y_augmented = librosa.effects.pitch_shift(y, sr=sr, n_steps=4)
|
17 |
y_augmented = librosa.effects.time_stretch(y_augmented, rate=1.2)
|
|
|
25 |
features = np.concatenate((mfcc, chroma, mel, contrast, tonnetz), axis=0)
|
26 |
features = (features - np.mean(features, axis=1, keepdims=True)) / np.std(features, axis=1, keepdims=True)
|
27 |
|
28 |
+
# Reshape the features to match the model's expected input
|
29 |
+
if features.shape[1] > target_length:
|
30 |
+
features = features[:, :target_length]
|
31 |
+
else:
|
32 |
+
padding = target_length - features.shape[1]
|
33 |
+
features = np.pad(features, ((0, 0), (0, padding)), 'constant')
|
34 |
+
|
35 |
+
features_tensor = torch.tensor(features).float().unsqueeze(0) # Add batch dimension
|
36 |
return features_tensor
|
37 |
|
38 |
def predict_voice(audio_file_path):
|
|
|
62 |
)
|
63 |
|
64 |
iface.launch()
|
|