import gradio as gr
import torchaudio
from speechbrain.pretrained import EncoderClassifier

# Load the SpeechBrain model separately
model = EncoderClassifier.from_hparams(source="speechbrain/mtl-mimic-voicebank", savedir="tmp")

# Define the function to transcribe audio
def transcribe(audio):
    # Load and process the audio file using torchaudio
    signal, rate = torchaudio.load(audio)
    
    # Make predictions using the SpeechBrain model
    output = model.classify_batch(signal)
    return output

# Define a CSS string to hide the footer
custom_css = """
footer {visibility: hidden;}
"""

# Create the Gradio interface
demo = gr.Interface(
    fn=transcribe,        # Function to process input
    inputs=gr.Audio(sources="upload"),  # Take audio input
    outputs="text",       # Display output as text
    css=custom_css        # Hide the Gradio footer
)

# Launch the interface
demo.launch()