Spaces:

saronium
/

Indian-language-identification-from-audio

Sleeping

App Files Files Community

saronium commited on Mar 6, 2024

Commit

5bba7e5

verified ·

1 Parent(s): 63a2f0a

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -7

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from scipy.ndimage import zoom
 import gradio as gr
 import pickle
 from joblib import load
 # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
 language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
@@ -41,9 +41,10 @@ pca = load('pca.pkl')
 vgg16 = models.vgg16(pretrained=True).features
 # Function to load and preprocess a single audio file
-def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     # Your existing preprocessing code goes here
-    y, sr = librosa.load(audio_file, sr=None)  # Load audio
     mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # Compute Mel spectrogram
     log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Apply log transformation
     norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec)  # Normalize
@@ -74,12 +75,18 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     features_tensor = torch.from_numpy(features_pca).float()
     return features_tensor
-def predict_language(audio_file_path):
     # Load VGG16 model
     # Preprocess the single audio file using VGG16 for feature extraction
-    preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
     # Make predictions using the trained model
     ann_model.eval()
@@ -92,5 +99,6 @@ def predict_language(audio_file_path):
     return predicted_label
-iface = gr.Interface(fn=predict_language, inputs="microphone", outputs="text")
 iface.launch()

 import gradio as gr
 import pickle
 from joblib import load
+import soundfile as sf
 # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
 language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
 vgg16 = models.vgg16(pretrained=True).features
 # Function to load and preprocess a single audio file
+def preprocess_single_audio_vgg16(audio_data, sr, vgg16_model, pca_instance):
     # Your existing preprocessing code goes here
+    y= audio_data
+    sr = sr# Load audio
     mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)  # Compute Mel spectrogram
     log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)  # Apply log transformation
     norm_mel_spec = (log_mel_spec - np.mean(log_mel_spec)) / np.std(log_mel_spec)  # Normalize
     features_tensor = torch.from_numpy(features_pca).float()
     return features_tensor
+def predict_language(audio_input):
     # Load VGG16 model
+     if isinstance(audio_input, str):
+        # Load the audio file
+        audio, sr = librosa.load(audio_input, sr=None)
+    else:
+        # Get the sample rate and convert the audio data to float
+        sr, audio = audio_input
+        audio = audio.astype(np.float32)
     # Preprocess the single audio file using VGG16 for feature extraction
+    preprocessed_features = preprocess_single_audio_vgg16(audio, sr, vgg16, pca)
     # Make predictions using the trained model
     ann_model.eval()
     return predicted_label
+iface = gr.Interface(fn=predict_language, inputs="audio", outputs="text")
 iface.launch()