saronium commited on
Commit
5bba7e5
·
verified ·
1 Parent(s): 63a2f0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -8,7 +8,7 @@ from scipy.ndimage import zoom
8
  import gradio as gr
9
  import pickle
10
  from joblib import load
11
-
12
 
13
  # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
14
  language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
@@ -41,9 +41,10 @@ pca = load('pca.pkl')
41
 
42
  vgg16 = models.vgg16(pretrained=True).features
43
  # Function to load and preprocess a single audio file
44
- def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
45
  # Your existing preprocessing code goes here
46
- y, sr = librosa.load(audio_file, sr=None) # Load audio
 
47
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
48
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
49
  norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
@@ -74,12 +75,18 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
74
  features_tensor = torch.from_numpy(features_pca).float()
75
  return features_tensor
76
 
77
- def predict_language(audio_file_path):
78
  # Load VGG16 model
79
-
 
 
 
 
 
 
80
 
81
  # Preprocess the single audio file using VGG16 for feature extraction
82
- preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
83
 
84
  # Make predictions using the trained model
85
  ann_model.eval()
@@ -92,5 +99,6 @@ def predict_language(audio_file_path):
92
 
93
  return predicted_label
94
 
95
- iface = gr.Interface(fn=predict_language, inputs="microphone", outputs="text")
 
96
  iface.launch()
 
8
  import gradio as gr
9
  import pickle
10
  from joblib import load
11
+ import soundfile as sf
12
 
13
  # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
14
  language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
 
41
 
42
  vgg16 = models.vgg16(pretrained=True).features
43
  # Function to load and preprocess a single audio file
44
+ def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
45
  # Your existing preprocessing code goes here
46
+ y= audio_data
47
+ sr = sr# Load audio
48
  mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) # Compute Mel spectrogram
49
  log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max) # Apply log transformation
50
  norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec) # Normalize
 
75
  features_tensor = torch.from_numpy(features_pca).float()
76
  return features_tensor
77
 
78
+ def predict_language(audio_input):
79
  # Load VGG16 model
80
+ if isinstance(audio_input, str):
81
+ # Load the audio file
82
+ audio, sr = librosa.load(audio_input, sr=None)
83
+ else:
84
+ # Get the sample rate and convert the audio data to float
85
+ sr, audio = audio_input
86
+ audio = audio.astype(np.float32)
87
 
88
  # Preprocess the single audio file using VGG16 for feature extraction
89
+ preprocessed_features = preprocess_single_audio_vgg16(audio, sr, vgg16, pca)
90
 
91
  # Make predictions using the trained model
92
  ann_model.eval()
 
99
 
100
  return predicted_label
101
 
102
+ iface = gr.Interface(fn=predict_language, inputs="audio", outputs="text")
103
+
104
  iface.launch()