saronium commited on
Commit
8b10513
·
verified ·
1 Parent(s): a4864b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -16
app.py CHANGED
@@ -1,22 +1,16 @@
1
- import gradio as gr
2
  import torch
3
  import librosa
4
  import numpy as np
5
  from torchvision import models
6
  from scipy.ndimage import zoom
7
- from sklearn.decomposition import PCA
8
- import joblib
9
-
10
- # Load the trained model and PCA instance
11
- ann_model = torch.load('ann_model.pth')
12
- pca = joblib.load('pca.pkl')
13
 
14
- # Load VGG16 model
15
- vgg16 = models.vgg16(pretrained=True).features
16
 
17
  # Function to load and preprocess a single audio file
18
  def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
19
- # Load and preprocess the audio file
20
  y, sr = librosa.load(audio_file, sr=None) # Load audio
21
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
22
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
@@ -46,14 +40,16 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
46
 
47
  # Convert to PyTorch tensor
48
  features_tensor = torch.from_numpy(features_pca).float()
49
-
50
  return features_tensor
51
 
52
- def predict(audio_file):
53
- # Preprocess the audio file
54
- preprocessed_features = preprocess_single_audio_vgg16(audio_file.name, vgg16, pca)
 
 
 
55
 
56
- # Make a prediction
57
  ann_model.eval()
58
  with torch.no_grad():
59
  output = ann_model(preprocessed_features)
@@ -64,5 +60,5 @@ def predict(audio_file):
64
 
65
  return predicted_label
66
 
67
- iface = gr.Interface(fn=predict, inputs="file", outputs="text")
68
  iface.launch()
 
 
1
  import torch
2
  import librosa
3
  import numpy as np
4
  from torchvision import models
5
  from scipy.ndimage import zoom
6
+ import gradio as gr
 
 
 
 
 
7
 
8
+ # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
9
+ language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
10
 
11
  # Function to load and preprocess a single audio file
12
  def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
13
+ # Your existing preprocessing code goes here
14
  y, sr = librosa.load(audio_file, sr=None) # Load audio
15
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
16
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
 
40
 
41
  # Convert to PyTorch tensor
42
  features_tensor = torch.from_numpy(features_pca).float()
 
43
  return features_tensor
44
 
45
+ def predict_language(audio_file_path):
46
+ # Load VGG16 model
47
+ vgg16 = models.vgg16(pretrained=True).features
48
+
49
+ # Preprocess the single audio file using VGG16 for feature extraction
50
+ preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
51
 
52
+ # Make predictions using the trained model
53
  ann_model.eval()
54
  with torch.no_grad():
55
  output = ann_model(preprocessed_features)
 
60
 
61
  return predicted_label
62
 
63
+ iface = gr.Interface(fn=predict_language, inputs="file", outputs="text")
64
  iface.launch()