Spaces:

joshieyu
/

AudioClassifier

Running

File size: 2,278 Bytes

import gradio as gr
from fastai.vision.all import load_learner, PILImage
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib

if os.name != 'nt':
    pathlib.WindowsPath = pathlib.PosixPath
    # For Windows
    # learn_inf = load_learner('export.pkl')
# else:
    # learn_inf = load_learner('export.pkl')

# Load your fastai model
learn_inf = load_learner('export.pkl')


# Function to save mel spectrogram and run inference
def save_mel_spectrogram_and_predict(wav_path):
    # Define paths
    output_dir = 'temp_spectrograms'
    os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
    output_path = os.path.join(output_dir, 'temp_spectrogram.png')
    
    # Load the audio file
    y, sr = librosa.load(wav_path, sr=16000)
    
    # Compute the mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    # Save the mel spectrogram as an image
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
    # plt.colorbar(format='%+2.0f dB')
    # plt.title('Mel spectrogram')
    plt.axis('off')
    plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png')
    plt.close()
    
    # Run inference on the saved mel spectrogram image
    img = PILImage.create(output_path)
    pred_class, pred_idx, probs = learn_inf.predict(img)
    
    return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))}

# Gradio interface function
def gradio_interface(audio):
    spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio)
    return spectrogram_path, predictions

# Create the Gradio interface
interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")],
    title="Audio Classification with Mel Spectrogram",
    description=
        "Upload an audio file to see its mel spectrogram and classification probabilities. Currently supports acoustic guitar, electric guitar, bass, synth lead, and synth pad."
)

# Launch the interface
interface.launch()