saronium commited on
Commit
f5f3175
·
verified ·
1 Parent(s): 9bccc69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -16,9 +16,9 @@ language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3}
16
  class ANNModel(nn.Module):
17
  def __init__(self):
18
  super(ANNModel, self).__init__()
19
- self.fc1 = nn.Linear(300, 128)
20
  self.relu1 = nn.ReLU()
21
- self.fc2 = nn.Linear(128, 64)
22
  self.relu2 = nn.ReLU()
23
  self.fc3 = nn.Linear(64, 4)
24
 
@@ -34,23 +34,23 @@ class ANNModel(nn.Module):
34
  ann_model = ANNModel()
35
 
36
  # Load the trained model
37
- ann_model.load_state_dict(torch.load('ann_model1.pth'))
38
 
39
  # Load the PCA instance
40
- pca = load('pca1.pkl')
41
 
42
  vgg16 = models.vgg16(pretrained=True).features
43
  # Function to load and preprocess a single audio file
44
  def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
45
  # Your existing preprocessing code goes here
46
  y= audio_data
47
- sr = sr# Load audio
48
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
49
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
50
  norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
51
 
52
  # Resize mel spectrogram to the target shape (128, 128) using zoom
53
- target_shape = (128, 128)
54
  resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
55
 
56
  # Stack the resized mel spectrogram along the third axis to create 3 channels
 
16
  class ANNModel(nn.Module):
17
  def __init__(self):
18
  super(ANNModel, self).__init__()
19
+ self.fc1 = nn.Linear(300, 256)
20
  self.relu1 = nn.ReLU()
21
+ self.fc2 = nn.Linear(256, 64)
22
  self.relu2 = nn.ReLU()
23
  self.fc3 = nn.Linear(64, 4)
24
 
 
34
  ann_model = ANNModel()
35
 
36
  # Load the trained model
37
+ ann_model.load_state_dict(torch.load('ann_model_256_01_94.pth'))
38
 
39
  # Load the PCA instance
40
+ pca = load('pca_256_01_94.pkl')
41
 
42
  vgg16 = models.vgg16(pretrained=True).features
43
  # Function to load and preprocess a single audio file
44
  def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
45
  # Your existing preprocessing code goes here
46
  y= audio_data
47
+ sr = 22050# Load audio
48
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
49
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
50
  norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
51
 
52
  # Resize mel spectrogram to the target shape (128, 128) using zoom
53
+ target_shape = (224, 224)
54
  resized_mel_spec = zoom(norm_mel_spec, (target_shape[0] / norm_mel_spec.shape[0], target_shape[1] / norm_mel_spec.shape[1]), mode='nearest')
55
 
56
  # Stack the resized mel spectrogram along the third axis to create 3 channels