Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from scipy.ndimage import zoom
|
|
8 |
import gradio as gr
|
9 |
import pickle
|
10 |
from joblib import load
|
11 |
-
|
12 |
|
13 |
# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
|
14 |
language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
|
@@ -41,9 +41,10 @@ pca = load('pca.pkl')
|
|
41 |
|
42 |
vgg16 = models.vgg16(pretrained=True).features
|
43 |
# Function to load and preprocess a single audio file
|
44 |
-
def preprocess_single_audio_vgg16(
|
45 |
# Your existing preprocessing code goes here
|
46 |
-
y
|
|
|
47 |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
|
48 |
log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
|
49 |
norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
|
@@ -74,12 +75,18 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
|
|
74 |
features_tensor = torch.from_numpy(features_pca).float()
|
75 |
return features_tensor
|
76 |
|
77 |
-
def predict_language(
|
78 |
# Load VGG16 model
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
# Preprocess the single audio file using VGG16 for feature extraction
|
82 |
-
preprocessed_features = preprocess_single_audio_vgg16(
|
83 |
|
84 |
# Make predictions using the trained model
|
85 |
ann_model.eval()
|
@@ -92,5 +99,6 @@ def predict_language(audio_file_path):
|
|
92 |
|
93 |
return predicted_label
|
94 |
|
95 |
-
iface = gr.Interface(fn=predict_language, inputs="
|
|
|
96 |
iface.launch()
|
|
|
8 |
import gradio as gr
|
9 |
import pickle
|
10 |
from joblib import load
|
11 |
+
import soundfile as sf
|
12 |
|
13 |
# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
|
14 |
language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
|
|
|
41 |
|
42 |
vgg16 = models.vgg16(pretrained=True).features
|
43 |
# Function to load and preprocess a single audio file
|
44 |
+
def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
|
45 |
# Your existing preprocessing code goes here
|
46 |
+
y= audio_data
|
47 |
+
sr = sr# Load audio
|
48 |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
|
49 |
log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
|
50 |
norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
|
|
|
75 |
features_tensor = torch.from_numpy(features_pca).float()
|
76 |
return features_tensor
|
77 |
|
78 |
+
def predict_language(audio_input):
|
79 |
# Load VGG16 model
|
80 |
+
if isinstance(audio_input, str):
|
81 |
+
# Load the audio file
|
82 |
+
audio, sr = librosa.load(audio_input, sr=None)
|
83 |
+
else:
|
84 |
+
# Get the sample rate and convert the audio data to float
|
85 |
+
sr, audio = audio_input
|
86 |
+
audio = audio.astype(np.float32)
|
87 |
|
88 |
# Preprocess the single audio file using VGG16 for feature extraction
|
89 |
+
preprocessed_features = preprocess_single_audio_vgg16(audio, sr, vgg16, pca)
|
90 |
|
91 |
# Make predictions using the trained model
|
92 |
ann_model.eval()
|
|
|
99 |
|
100 |
return predicted_label
|
101 |
|
102 |
+
iface = gr.Interface(fn=predict_language, inputs="audio", outputs="text")
|
103 |
+
|
104 |
iface.launch()
|