Spaces:

saronium
/

Indian-language-identification-from-audio

Sleeping

App Files Files Community

saronium commited on Mar 6, 2024

Commit

3c699d2

verified ·

1 Parent(s): 6abefd9

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -51

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
-from flask import Flask, request, jsonify, render_template
-import os
 import torch
 import librosa
 import numpy as np
@@ -7,12 +6,6 @@ from torchvision import models
 from scipy.ndimage import zoom
 from sklearn.decomposition import PCA
 import joblib
-from keras.utils import to_categorical
-# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
-language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
-app = Flask(__name__)
 # Load the trained model and PCA instance
 ann_model = torch.load('ann_model.pth')
@@ -20,6 +13,8 @@ pca = joblib.load('pca.pkl')
 # Load VGG16 model
 vgg16 = models.vgg16(pretrained=True).features
 def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     # Load and preprocess the audio file
     y, sr = librosa.load(audio_file, sr=None)  # Load audio
@@ -54,50 +49,20 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     return features_tensor
-@app.route('/')
-def home():
-    return render_template('index.html')
-@app.route('/predict', methods=['POST'])
-def predict():
-        try:
-    # Get the audio file from the request
-            audio_file = request.files['file']
-            audio_file.save('temp.wav')
-            audio_file_path = 'temp.wav'
-            # Preprocess the audio file
-            preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
-            # Make a prediction
-            ann_model.eval()
-            with torch.no_grad():
-                output = ann_model(preprocessed_features)
-                _, predicted_class = torch.max(output, 1)
-            # Map predicted class index to actual label
-            predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
-            # Delete the temporary audio file
-            os.remove('temp.wav')
-            # Return the prediction
-            return jsonify({'prediction': predicted_label})
-        except KeyError:
-          return jsonify({'error': 'Audio file not found in the request'}), 400
-        except Exception as e:
-          return jsonify({'error': str(e)}), 500
-if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=8000)
-# Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
-# Function to load and preprocess a single audio file
-# Load VGG16 model

+import gradio as gr
 import torch
 import librosa
 import numpy as np
 from scipy.ndimage import zoom
 from sklearn.decomposition import PCA
 import joblib
 # Load the trained model and PCA instance
 ann_model = torch.load('ann_model.pth')
 # Load VGG16 model
 vgg16 = models.vgg16(pretrained=True).features
+# Function to load and preprocess a single audio file
 def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
     # Load and preprocess the audio file
     y, sr = librosa.load(audio_file, sr=None)  # Load audio
     return features_tensor
+def predict(audio_file):
+    # Preprocess the audio file
+    preprocessed_features = preprocess_single_audio_vgg16(audio_file.name, vgg16, pca)
+    # Make a prediction
+    ann_model.eval()
+    with torch.no_grad():
+        output = ann_model(preprocessed_features)
+        _, predicted_class = torch.max(output, 1)
+    # Map predicted class index to actual label
+    predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
+    return predicted_label
+iface = gr.Interface(fn=predict, inputs="file", outputs="text")
+iface.launch()