RITISHREE / app.py
sancho10's picture
Upload app.py
4da5324 verified
raw
history blame
1.9 kB
import gradio as gr
import numpy as np
import tensorflow as tf
import librosa
import librosa.util
# Define your predict_class function
def predict_class(file_path, model, labels):
# Extract MFCC features
y, sr = librosa.load(file_path, sr=None)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
# Pad or truncate to 100 frames along axis 1
mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)
# Ensure mfcc has shape (13, 100)
if mfcc.shape[0] != 13:
mfcc = librosa.util.fix_length(mfcc, size=13, axis=0)
# Add batch and channel dimensions
mfcc = mfcc[np.newaxis, ..., np.newaxis] # Shape: (1, 13, 100, 1)
# Predict using the model
prediction = model.predict(mfcc)
predicted_class = labels[np.argmax(prediction)]
return predicted_class
# Load your pre-trained model
model = tf.keras.models.load_model("voice_classification_modelm.h5")
# Define the class labels based on your folder names
labels = [
"all_vowels_healthy",
"allvowels_functional",
"allvowels_laryngitis",
"allvowels_lukoplakia",
"allvowels_psychogenic",
"allvowels_rlnp",
"allvowels_sd"
]
# Define the Gradio function
def classify_audio(audio_file):
try:
predicted_class = predict_class(audio_file, model, labels)
return f"Predicted Class: {predicted_class}"
except Exception as e:
return f"Error: {str(e)}"
# Create the Gradio interface
interface = gr.Interface(
fn=classify_audio,
inputs=gr.Audio(source="upload", type="filepath", label="Upload an Audio File"),
outputs=gr.Textbox(label="Predicted Class"),
title="Voice Classification",
description="Upload an audio file to classify its voice type.",
examples=["example_audio.wav"] # Replace with paths to sample audio files
)
# Launch the app
interface.launch()