Spaces:
Runtime error
Runtime error
import gradio as gr | |
from datasets import load_dataset | |
import torch | |
from transformers import SpeechT5ForSpeechToText, SpeechT5Processor | |
# Load the English subset of the VoxPopuli dataset | |
dataset = load_dataset("facebook/voxpopuli", "en") | |
# Example function to load audio and transcriptions | |
def get_sample(dataset): | |
# Get a random sample from the training set | |
sample = dataset['train'][0] # You can modify to pick a random sample or any sample index | |
audio_file = sample["audio"]["path"] | |
transcription = sample["sentence"] | |
return audio_file, transcription | |
# Initialize the SpeechT5 model and processor | |
processor = SpeechT5Processor.from_pretrained("facebook/speech_t5_base") | |
model = SpeechT5ForSpeechToText.from_pretrained("facebook/speech_t5_base") | |
# Example Gradio interface function | |
def transcribe(audio): | |
# Process the audio and get transcription | |
inputs = processor(audio, return_tensors="pt", sampling_rate=16000) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
transcription = processor.decode(logits[0], skip_special_tokens=True) | |
return transcription | |
# Load a sample to check if everything is set up | |
audio_file, transcription = get_sample(dataset) | |
# Set up Gradio interface | |
iface = gr.Interface(fn=transcribe, inputs=gr.Audio(source="upload", type="filepath"), outputs="text") | |
# Launch the interface | |
iface.launch() | |