joshieyu commited on
Commit
0d5549c
·
1 Parent(s): 5fc0563

Added files

Browse files
Files changed (3) hide show
  1. app.py +56 -0
  2. export.pkl +3 -0
  3. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from fastai.vision.all import load_learner, PILImage
3
+ import librosa
4
+ import librosa.display
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import os
8
+
9
+ # Load your fastai model
10
+ learn_inf = load_learner('export.pkl')
11
+
12
+ # Function to save mel spectrogram and run inference
13
+ def save_mel_spectrogram_and_predict(wav_path):
14
+ # Define paths
15
+ output_dir = 'temp_spectrograms'
16
+ os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists
17
+ output_path = os.path.join(output_dir, 'temp_spectrogram.png')
18
+
19
+ # Load the audio file
20
+ y, sr = librosa.load(wav_path, sr=16000)
21
+
22
+ # Compute the mel spectrogram
23
+ S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
24
+ S_dB = librosa.power_to_db(S, ref=np.max)
25
+
26
+ # Save the mel spectrogram as an image
27
+ plt.figure(figsize=(10, 4))
28
+ librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
29
+ # plt.colorbar(format='%+2.0f dB')
30
+ # plt.title('Mel spectrogram')
31
+ plt.axis('off')
32
+ plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png')
33
+ plt.close()
34
+
35
+ # Run inference on the saved mel spectrogram image
36
+ img = PILImage.create(output_path)
37
+ pred_class, pred_idx, probs = learn_inf.predict(img)
38
+
39
+ return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))}
40
+
41
+ # Gradio interface function
42
+ def gradio_interface(audio):
43
+ spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio)
44
+ return spectrogram_path, predictions
45
+
46
+ # Create the Gradio interface
47
+ interface = gr.Interface(
48
+ fn=gradio_interface,
49
+ inputs=gr.Audio(sources="upload", type="filepath"),
50
+ outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")],
51
+ title="Audio Classification with Mel Spectrogram",
52
+ description="Upload an audio file to see its mel spectrogram and classification probabilities."
53
+ )
54
+
55
+ # Launch the interface
56
+ interface.launch()
export.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c29fe3e98a173226597f419791d98781181e0a75a7b6abcf4143ce95a9a681b
3
+ size 46977485
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ fastai
3
+ librosa
4
+ matplotlib
5
+ numpy