Spaces:

joshieyu
/

AudioClassifier

Sleeping

App Files Files Community

joshieyu commited on Jul 22, 2024

Commit

0d5549c

1 Parent(s): 5fc0563

Added files

Browse files

Files changed (3) hide show

app.py +56 -0
export.pkl +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gradio as gr
+from fastai.vision.all import load_learner, PILImage
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+# Load your fastai model
+learn_inf = load_learner('export.pkl')
+# Function to save mel spectrogram and run inference
+def save_mel_spectrogram_and_predict(wav_path):
+    # Define paths
+    output_dir = 'temp_spectrograms'
+    os.makedirs(output_dir, exist_ok=True)  # Ensure the directory exists
+    output_path = os.path.join(output_dir, 'temp_spectrogram.png')
+    # Load the audio file
+    y, sr = librosa.load(wav_path, sr=16000)
+    # Compute the mel spectrogram
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
+    S_dB = librosa.power_to_db(S, ref=np.max)
+    # Save the mel spectrogram as an image
+    plt.figure(figsize=(10, 4))
+    librosa.display.specshow(S_dB, sr=sr, x_axis='time', y_axis='mel', cmap='viridis')
+    # plt.colorbar(format='%+2.0f dB')
+    # plt.title('Mel spectrogram')
+    plt.axis('off')
+    plt.savefig(output_path, bbox_inches='tight', pad_inches=0, format='png')
+    plt.close()
+    # Run inference on the saved mel spectrogram image
+    img = PILImage.create(output_path)
+    pred_class, pred_idx, probs = learn_inf.predict(img)
+    return output_path, {learn_inf.dls.vocab[i]: float(probs[i]) for i in range(len(probs))}
+# Gradio interface function
+def gradio_interface(audio):
+    spectrogram_path, predictions = save_mel_spectrogram_and_predict(audio)
+    return spectrogram_path, predictions
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Audio(sources="upload", type="filepath"),
+    outputs=[gr.Image(type="filepath", label="Mel Spectrogram"), gr.JSON(label="Class Probabilities")],
+    title="Audio Classification with Mel Spectrogram",
+    description="Upload an audio file to see its mel spectrogram and classification probabilities."
+)
+# Launch the interface
+interface.launch()

export.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c29fe3e98a173226597f419791d98781181e0a75a7b6abcf4143ce95a9a681b
+size 46977485

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+fastai
+librosa
+matplotlib
+numpy