Spaces:
Runtime error
Runtime error
File size: 1,784 Bytes
8079286 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import streamlit as st
import torch
import torchaudio
import requests
from io import BytesIO
# Load the Hugging Face model for speech recognition
model_name = "facebook/wav2vec2-large-xlsr-53"
model = torch.hub.load('pytorch/fairseq', model_name)
# Create a function to transcribe audio from a URL using the model
def transcribe_audio(url):
# Download the audio file from the URL
response = requests.get(url)
audio_bytes = BytesIO(response.content)
# Load the audio file with Torchaudio and apply preprocessing
waveform, sample_rate = torchaudio.load(audio_bytes)
with torch.no_grad():
features = model.feature_extractor(waveform)
logits = model.feature_aggregator(features)
transcription = model.decoder.decode(logits)
return transcription[0]['text']
# Define the Streamlit app
st.title("Speech Recognition with Hugging Face")
# Add a file uploader to allow the user to upload an audio file
audio_file = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
if audio_file is not None:
# Load the audio file with Torchaudio and apply preprocessing
waveform, sample_rate = torchaudio.load(audio_file)
with torch.no_grad():
features = model.feature_extractor(waveform)
logits = model.feature_aggregator(features)
transcription = model.decoder.decode(logits)
# Display the transcription
st.write("Transcription:")
st.write(transcription[0]['text'])
# Add a text input to allow the user to enter a URL of an audio file
url = st.text_input("Enter the URL of an audio file")
if url:
# Transcribe the audio from the URL using the model
transcription = transcribe_audio(url)
# Display the transcription
st.write("Transcription:")
st.write(transcription)
|