Update app.py
Browse files
app.py
CHANGED
@@ -16,9 +16,9 @@ language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3}
|
|
16 |
class ANNModel(nn.Module):
|
17 |
def __init__(self):
|
18 |
super(ANNModel, self).__init__()
|
19 |
-
self.fc1 = nn.Linear(300,
|
20 |
self.relu1 = nn.ReLU()
|
21 |
-
self.fc2 = nn.Linear(
|
22 |
self.relu2 = nn.ReLU()
|
23 |
self.fc3 = nn.Linear(64, 4)
|
24 |
|
@@ -34,23 +34,23 @@ class ANNModel(nn.Module):
|
|
34 |
ann_model = ANNModel()
|
35 |
|
36 |
# Load the trained model
|
37 |
-
ann_model.load_state_dict(torch.load('
|
38 |
|
39 |
# Load the PCA instance
|
40 |
-
pca = load('
|
41 |
|
42 |
vgg16 = models.vgg16(pretrained=True).features
|
43 |
# Function to load and preprocess a single audio file
|
44 |
def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
|
45 |
# Your existing preprocessing code goes here
|
46 |
y= audio_data
|
47 |
-
sr =
|
48 |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
|
49 |
log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
|
50 |
norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
|
51 |
|
52 |
# Resize mel spectrogram to the target shape (128, 128) using zoom
|
53 |
-
target_shape = (
|
54 |
resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
|
55 |
|
56 |
# Stack the resized mel spectrogram along the third axis to create 3 channels
|
|
|
16 |
class ANNModel(nn.Module):
|
17 |
def __init__(self):
|
18 |
super(ANNModel, self).__init__()
|
19 |
+
self.fc1 = nn.Linear(300, 256)
|
20 |
self.relu1 = nn.ReLU()
|
21 |
+
self.fc2 = nn.Linear(256, 64)
|
22 |
self.relu2 = nn.ReLU()
|
23 |
self.fc3 = nn.Linear(64, 4)
|
24 |
|
|
|
34 |
ann_model = ANNModel()
|
35 |
|
36 |
# Load the trained model
|
37 |
+
ann_model.load_state_dict(torch.load('ann_model_256_01_94.pth'))
|
38 |
|
39 |
# Load the PCA instance
|
40 |
+
pca = load('pca_256_01_94.pkl')
|
41 |
|
42 |
vgg16 = models.vgg16(pretrained=True).features
|
43 |
# Function to load and preprocess a single audio file
|
44 |
def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
|
45 |
# Your existing preprocessing code goes here
|
46 |
y= audio_data
|
47 |
+
sr = 22050# Load audio
|
48 |
mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
|
49 |
log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
|
50 |
norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
|
51 |
|
52 |
# Resize mel spectrogram to the target shape (128, 128) using zoom
|
53 |
+
target_shape = (224, 224)
|
54 |
resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
|
55 |
|
56 |
# Stack the resized mel spectrogram along the third axis to create 3 channels
|