Spaces:

saronium
/

Indian-language-identification-from-audio

Sleeping

saronium commited on Mar 6, 2024

Commit

8b10513

verified ·

1 Parent(s): a4864b2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,16 @@
-import gradio as gr
 import torch
 import librosa
 import numpy as np
 from torchvision import models
 from scipy.ndimage import zoom
-from sklearn.decomposition import PCA
-import joblib
-# Load the trained model and PCA instance
-ann_model = torch.load('ann_model.pth')
-pca = joblib.load('pca.pkl')
-# Load VGG16 model
-vgg16 = models.vgg16(pretrained=True).features
 # Function to load and preprocess a single audio file
 def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
-    # Load and preprocess the audio file
     y, sr = librosa.load(audio_file, sr=None)  # Load audio
     mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # Compute Mel spectrogram
     log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Apply log transformation
@@ -46,14 +40,16 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     # Convert to PyTorch tensor
     features_tensor = torch.from_numpy(features_pca).float()
     return features_tensor
-def predict(audio_file):
-    # Preprocess the audio file
-    preprocessed_features = preprocess_single_audio_vgg16(audio_file.name, vgg16, pca)
-    # Make a prediction
     ann_model.eval()
     with torch.no_grad():
         output = ann_model(preprocessed_features)
@@ -64,5 +60,5 @@ def predict(audio_file):
     return predicted_label
-iface = gr.Interface(fn=predict, inputs="file", outputs="text")
 iface.launch()

 import torch
 import librosa
 import numpy as np
 from torchvision import models
 from scipy.ndimage import zoom
+import gradio as gr
+# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
+language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
 # Function to load and preprocess a single audio file
 def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
+    # Your existing preprocessing code goes here
     y, sr = librosa.load(audio_file, sr=None)  # Load audio
     mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # Compute Mel spectrogram
     log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Apply log transformation
     # Convert to PyTorch tensor
     features_tensor = torch.from_numpy(features_pca).float()
     return features_tensor
+def predict_language(audio_file_path):
+    # Load VGG16 model
+    vgg16 = models.vgg16(pretrained=True).features
+    # Preprocess the single audio file using VGG16 for feature extraction
+    preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
+    # Make predictions using the trained model
     ann_model.eval()
     with torch.no_grad():
         output = ann_model(preprocessed_features)
     return predicted_label
+iface = gr.Interface(fn=predict_language, inputs="file", outputs="text")
 iface.launch()