Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM | |
import torch | |
import torchaudio | |
import torchaudio.functional as F | |
st.set_page_config( | |
page_title="Swedish Speech-to-Text", | |
page_icon="๐๏ธ" | |
) | |
st.image( | |
"https://emojipedia-us.s3.dualstack.us-west-1.amazonaws.com/thumbs/320/apple/325/studio-microphone_1f399-fe0f.png", | |
width=100, | |
) | |
st.markdown(""" | |
# Swedish high-quality transcription | |
Generate Swedish transcripts for download from an audio file with this high-quality speech-to-text model. The model is KBLab's wav2vec 2.0 large VoxRex Swedish (C) with a 4-gram language model, which you can access [here](https://huggingface.co/viktor-enzell/wav2vec2-large-voxrex-swedish-4gram). | |
""") | |
model_name = "viktor-enzell/wav2vec2-large-voxrex-swedish-4gram" | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = Wav2Vec2ForCTC.from_pretrained(model_name).to(device) | |
processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_name) | |
def run_inference(file): | |
waveform, sample_rate = torchaudio.load(file) | |
if sample_rate == 16_000: | |
waveform = waveform[0] | |
else: | |
waveform = F.resample(waveform, sample_rate, 16_000)[0] | |
inputs = processor( | |
waveform, | |
sampling_rate=16_000, | |
return_tensors="pt", | |
padding=True | |
).to(device) | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
return processor.batch_decode(logits.cpu().numpy()).text[0].lower() | |
uploaded_file = st.file_uploader("Choose a file", type=[".wav"]) | |
if uploaded_file is not None: | |
if uploaded_file.type != "audio/wav": | |
pass | |
# TODO: convert to wav | |
# bytes = uploaded_file.getvalue() | |
# audio_input = ffmpeg.input(bytes).audio | |
# audio_output = ffmpeg.output(audio_input, "tmp.wav", format="wav") | |
# ffmpeg.run(audio_output) | |
transcript = run_inference(uploaded_file) | |
st.download_button("Download transcript", transcript, | |
f"{uploaded_file.name}-swedish-transcript.txt") | |
with st.expander("Transcript", expanded=True): | |
st.write(transcript) | |