Spaces:

awacke1
/

Speech-recognition

Runtime error

File size: 1,784 Bytes
import streamlit as st
import torch
import torchaudio
import requests
from io import BytesIO

# Load the Hugging Face model for speech recognition
model_name = "facebook/wav2vec2-large-xlsr-53"
model = torch.hub.load('pytorch/fairseq', model_name)

# Create a function to transcribe audio from a URL using the model
def transcribe_audio(url):
    # Download the audio file from the URL
    response = requests.get(url)
    audio_bytes = BytesIO(response.content)

    # Load the audio file with Torchaudio and apply preprocessing
    waveform, sample_rate = torchaudio.load(audio_bytes)
    with torch.no_grad():
        features = model.feature_extractor(waveform)
        logits = model.feature_aggregator(features)
        transcription = model.decoder.decode(logits)

    return transcription[0]['text']

# Define the Streamlit app
st.title("Speech Recognition with Hugging Face")

# Add a file uploader to allow the user to upload an audio file
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])

if audio_file is not None:
    # Load the audio file with Torchaudio and apply preprocessing
    waveform, sample_rate = torchaudio.load(audio_file)
    with torch.no_grad():
        features = model.feature_extractor(waveform)
        logits = model.feature_aggregator(features)
        transcription = model.decoder.decode(logits)

    # Display the transcription
    st.write("Transcription:")
    st.write(transcription[0]['text'])

# Add a text input to allow the user to enter a URL of an audio file
url = st.text_input("Enter the URL of an audio file")
if url:
    # Transcribe the audio from the URL using the model
    transcription = transcribe_audio(url)

    # Display the transcription
    st.write("Transcription:")
    st.write(transcription)