File size: 1,903 Bytes
4da5324 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
import numpy as np
import tensorflow as tf
import librosa
import librosa.util
# Define your predict_class function
def predict_class(file_path, model, labels):
# Extract MFCC features
y, sr = librosa.load(file_path, sr=None)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
# Pad or truncate to 100 frames along axis 1
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
# Ensure mfcc has shape (13, 100)
if mfcc.shape[0] != 13:
mfcc = librosa.util.fix_length(mfcc, size=13, axis=0)
# Add batch and channel dimensions
mfcc = mfcc[np.newaxis, ..., np.newaxis] # Shape: (1, 13, 100, 1)
# Predict using the model
prediction = model.predict(mfcc)
predicted_class = labels[np.argmax(prediction)]
return predicted_class
# Load your pre-trained model
model = tf.keras.models.load_model("voice_classification_modelm.h5")
# Define the class labels based on your folder names
labels = [
"all_vowels_healthy",
"allvowels_functional",
"allvowels_laryngitis",
"allvowels_lukoplakia",
"allvowels_psychogenic",
"allvowels_rlnp",
"allvowels_sd"
]
# Define the Gradio function
def classify_audio(audio_file):
try:
predicted_class = predict_class(audio_file, model, labels)
return f"Predicted Class: {predicted_class}"
except Exception as e:
return f"Error: {str(e)}"
# Create the Gradio interface
interface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(source="upload", type="filepath", label="Upload an Audio File"),
outputs=gr.Textbox(label="Predicted Class"),
title="Voice Classification",
description="Upload an audio file to classify its voice type.",
examples=["example_audio.wav"] # Replace with paths to sample audio files
)
# Launch the app
interface.launch()
|