saronium commited on
Commit
3c699d2
·
verified ·
1 Parent(s): 6abefd9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -51
app.py CHANGED
@@ -1,5 +1,4 @@
1
- from flask import Flask, request, jsonify, render_template
2
- import os
3
  import torch
4
  import librosa
5
  import numpy as np
@@ -7,12 +6,6 @@ from torchvision import models
7
  from scipy.ndimage import zoom
8
  from sklearn.decomposition import PCA
9
  import joblib
10
- from keras.utils import to_categorical
11
-
12
- # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
13
- language_mapping = {'malayalam': 0, 'english': 1, 'tamil': 2,'hindi':3,'kannada':4,'telugu':5}
14
-
15
- app = Flask(__name__)
16
 
17
  # Load the trained model and PCA instance
18
  ann_model = torch.load('ann_model.pth')
@@ -20,6 +13,8 @@ pca = joblib.load('pca.pkl')
20
 
21
  # Load VGG16 model
22
  vgg16 = models.vgg16(pretrained=True).features
 
 
23
  def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
24
  # Load and preprocess the audio file
25
  y, sr = librosa.load(audio_file, sr=None) # Load audio
@@ -54,50 +49,20 @@ def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
54
 
55
  return features_tensor
56
 
 
 
 
57
 
58
- @app.route('/')
59
- def home():
60
- return render_template('index.html')
61
-
62
- @app.route('/predict', methods=['POST'])
63
- def predict():
64
- try:
65
- # Get the audio file from the request
66
- audio_file = request.files['file']
67
- audio_file.save('temp.wav')
68
- audio_file_path = 'temp.wav'
69
- # Preprocess the audio file
70
- preprocessed_features = preprocess_single_audio_vgg16(audio_file_path, vgg16, pca)
71
-
72
- # Make a prediction
73
- ann_model.eval()
74
- with torch.no_grad():
75
- output = ann_model(preprocessed_features)
76
- _, predicted_class = torch.max(output, 1)
77
-
78
- # Map predicted class index to actual label
79
- predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
80
-
81
- # Delete the temporary audio file
82
- os.remove('temp.wav')
83
-
84
- # Return the prediction
85
- return jsonify({'prediction': predicted_label})
86
- except KeyError:
87
- return jsonify({'error': 'Audio file not found in the request'}), 400
88
-
89
- except Exception as e:
90
- return jsonify({'error': str(e)}), 500
91
- if __name__ == '__main__':
92
- app.run(host='0.0.0.0', port=8000)
93
-
94
-
95
-
96
-
97
-
98
- # Assuming you already have the 'ann_model' trained and 'pca' instance from the previous code
99
 
100
- # Function to load and preprocess a single audio file
 
101
 
102
- # Load VGG16 model
103
 
 
 
 
1
+ import gradio as gr
 
2
  import torch
3
  import librosa
4
  import numpy as np
 
6
  from scipy.ndimage import zoom
7
  from sklearn.decomposition import PCA
8
  import joblib
 
 
 
 
 
 
9
 
10
  # Load the trained model and PCA instance
11
  ann_model = torch.load('ann_model.pth')
 
13
 
14
  # Load VGG16 model
15
  vgg16 = models.vgg16(pretrained=True).features
16
+
17
+ # Function to load and preprocess a single audio file
18
  def preprocess_single_audio_vgg16(audio_file, vgg16_model, pca_instance):
19
  # Load and preprocess the audio file
20
  y, sr = librosa.load(audio_file, sr=None) # Load audio
 
49
 
50
  return features_tensor
51
 
52
+ def predict(audio_file):
53
+ # Preprocess the audio file
54
+ preprocessed_features = preprocess_single_audio_vgg16(audio_file.name, vgg16, pca)
55
 
56
+ # Make a prediction
57
+ ann_model.eval()
58
+ with torch.no_grad():
59
+ output = ann_model(preprocessed_features)
60
+ _, predicted_class = torch.max(output, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Map predicted class index to actual label
63
+ predicted_label = {v: k for k, v in language_mapping.items()}[predicted_class.item()]
64
 
65
+ return predicted_label
66
 
67
+ iface = gr.Interface(fn=predict, inputs="file", outputs="text")
68
+ iface.launch()