AudioClassifier / app.py
joshieyu's picture
Updated website description
0a1e9ce
import gradio as gr
from fastai.vision.all import load_learner, PILImage
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib
if os.name != 'nt':
pathlib.WindowsPath = pathlib.PosixPath
# For Windows
# learn_inf = load_learner('export.pkl')
# else:
# learn_inf = load_learner('export.pkl')
# Load your fastai model
learn_inf = load_learner('export.pkl')
# Function to save mel spectrogram and run inference
def save_mel_spectrogram_and_predict(wav_path):
# Define paths
output_dir = 'temp_spectrograms'
os.makedirs(output_dir, exist_ok=True) # Ensure the directory exists
output_path = os.path.join(output_dir, 'temp_spectrogram.png')
# Load the audio file
y, sr = librosa.load(wav_path, sr=16000)
# Compute the mel spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
S_dB = librosa.power_to_db(S, ref=np.max)
# Save the mel spectrogram as an image
plt.figure(figsize=(10, 4))
librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
# plt.colorbar(format='%+2.0f dB')
# plt.title('Mel spectrogram')
plt.axis('off')
plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png')
plt.close()
# Run inference on the saved mel spectrogram image
img = PILImage.create(output_path)
pred_class, pred_idx, probs = learn_inf.predict(img)
return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))}
# Gradio interface function
def gradio_interface(audio):
spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio)
return spectrogram_path, predictions
# Create the Gradio interface
interface = gr.Interface(
fn=gradio_interface,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")],
title="Audio Classification with Mel Spectrogram",
description=
"Upload an audio file to see its mel spectrogram and classification probabilities. Currently supports acoustic guitar, electric guitar, bass, synth lead, and synth pad."
)
# Launch the interface
interface.launch()