Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,41 +3,41 @@ import librosa
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import logging
|
6 |
-
from transformers import AutoModelForAudioClassification
|
7 |
import soundfile as sf
|
|
|
8 |
|
9 |
logging.basicConfig(level=logging.INFO)
|
10 |
|
11 |
model_path = "./"
|
12 |
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
|
|
13 |
|
14 |
-
def
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y_augmented), sr=sr)
|
24 |
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
27 |
|
28 |
-
|
29 |
-
if features.shape[1] > target_length:
|
30 |
-
features = features[:, :target_length]
|
31 |
-
else:
|
32 |
-
padding = target_length - features.shape[1]
|
33 |
-
features = np.pad(features, ((0, 0), (0, padding)), 'constant')
|
34 |
|
35 |
-
|
36 |
-
return features_tensor
|
37 |
|
38 |
def predict_voice(audio_file_path):
|
39 |
try:
|
40 |
-
features_tensor =
|
41 |
with torch.no_grad():
|
42 |
outputs = model(features_tensor)
|
43 |
|
@@ -62,3 +62,4 @@ iface = gr.Interface(
|
|
62 |
)
|
63 |
|
64 |
iface.launch()
|
|
|
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
import logging
|
6 |
+
from transformers import AutoModelForAudioClassification, Wav2Vec2Processor
|
7 |
import soundfile as sf
|
8 |
+
from scipy.signal import butter, lfilter
|
9 |
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
|
12 |
model_path = "./"
|
13 |
model = AutoModelForAudioClassification.from_pretrained(model_path)
|
14 |
+
processor = Wav2Vec2Processor.from_pretrained(model_path)
|
15 |
|
16 |
+
def butter_bandpass(lowcut, highcut, fs, order=5):
|
17 |
+
nyq = 0.5 * fs
|
18 |
+
low = lowcut / nyq
|
19 |
+
high = highcut / nyq
|
20 |
+
b, a = butter(order, [low, high], btype='band')
|
21 |
+
return b, a
|
22 |
|
23 |
+
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
|
24 |
+
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
|
25 |
+
y = lfilter(b, a, data)
|
26 |
+
return y
|
|
|
27 |
|
28 |
+
def quantum_augment_and_extract_features(audio_path, sr=16000, n_mfcc=40, n_fft=2048, hop_length=512):
|
29 |
+
y, sr = librosa.load(audio_path, sr=sr)
|
30 |
+
y_filtered = butter_bandpass_filter(y, 300, 3400, sr)
|
31 |
+
y_quantum = np.fft.fft(y_filtered)
|
32 |
+
y_ifft = np.fft.ifft(y_quantum).real
|
33 |
|
34 |
+
features_processor = processor(y_ifft, sampling_rate=sr, return_tensors="pt", padding=True)
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
return features_processor.input_values
|
|
|
37 |
|
38 |
def predict_voice(audio_file_path):
|
39 |
try:
|
40 |
+
features_tensor = quantum_augment_and_extract_features(audio_file_path)
|
41 |
with torch.no_grad():
|
42 |
outputs = model(features_tensor)
|
43 |
|
|
|
62 |
)
|
63 |
|
64 |
iface.launch()
|
65 |
+
|