import gradio as gr
import numpy as np
import tensorflow as tf
import librosa
import librosa.util

# Define your predict_class function
def predict_class(file_path, model, labels):
    # Extract MFCC features
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    
    # Pad or truncate to 100 frames along axis 1
    mfcc = librosa.util.fix_length(mfcc, size=100, axis=1)

    # Ensure mfcc has shape (13, 100)
    if mfcc.shape[0] != 13:
        mfcc = librosa.util.fix_length(mfcc, size=13, axis=0)

    # Add batch and channel dimensions
    mfcc = mfcc[np.newaxis, ..., np.newaxis]  # Shape: (1, 13, 100, 1)

    # Predict using the model
    prediction = model.predict(mfcc)
    predicted_class = labels[np.argmax(prediction)]
    return predicted_class

# Load your pre-trained model
model = tf.keras.models.load_model("voice_classification_modelm.h5")

# Define the class labels based on your folder names
labels = [
    "all_vowels_healthy",
    "allvowels_functional",
    "allvowels_laryngitis",
    "allvowels_lukoplakia",
    "allvowels_psychogenic",
    "allvowels_rlnp",
    "allvowels_sd"
]

# Define the Gradio function
def classify_audio(audio_file):
    try:
        predicted_class = predict_class(audio_file, model, labels)
        return f"Predicted Class: {predicted_class}"
    except Exception as e:
        return f"Error: {str(e)}"

# Create the Gradio interface
interface = gr.Interface(
    fn=classify_audio,
    inputs=gr.Audio(source="upload", type="filepath", label="Upload an Audio File"),
    outputs=gr.Textbox(label="Predicted Class"),
    title="Voice Classification",
    description="Upload an audio file to classify its voice type.",
    examples=["example_audio.wav"]  # Replace with paths to sample audio files
)

# Launch the app
interface.launch()