import gradio as gr | |
import torchaudio | |
from speechbrain.pretrained import EncoderClassifier | |
# Load the SpeechBrain model separately | |
model = EncoderClassifier.from_hparams(source="speechbrain/mtl-mimic-voicebank", savedir="tmp") | |
# Define the function to transcribe audio | |
def transcribe(audio): | |
# Load and process the audio file using torchaudio | |
signal, rate = torchaudio.load(audio) | |
# Make predictions using the SpeechBrain model | |
output = model.classify_batch(signal) | |
return output | |
# Define a CSS string to hide the footer | |
custom_css = """ | |
footer {visibility: hidden;} | |
""" | |
# Create the Gradio interface | |
demo = gr.Interface( | |
fn=transcribe, # Function to process input | |
inputs=gr.Audio(sources="upload"), # Take audio input | |
outputs="text", # Display output as text | |
css=custom_css # Hide the Gradio footer | |
) | |
# Launch the interface | |
demo.launch() |