acr / app.py
roman
try new approach
d242d3a
raw
history blame
1.54 kB
import streamlit as st
from transformers import pipeline
import tempfile
from pydub import AudioSegment
import numpy as np
# Load the ASR pipeline
@st.cache_resource
def load_asr_pipeline():
asr_pipeline = pipeline("automatic-speech-recognition", model="Yehor/whisper-small-ukrainian")
return asr_pipeline
st.title("Voice Recognition App using Whisper")
st.write("Upload an audio file and the Whisper model will transcribe it to text.")
# Load the ASR pipeline
asr_pipeline = load_asr_pipeline()
st.write("Model loaded successfully.")
# File uploader for audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Convert audio file to WAV format if necessary
audio = AudioSegment.from_file(temp_file_path)
temp_wav_path = tempfile.mktemp(suffix=".wav")
audio.export(temp_wav_path, format="wav")
st.audio(uploaded_file, format="audio/wav")
st.write("Transcribing audio...")
# Read the audio file
audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
audio_input = np.array(audio_input.get_array_of_samples(), dtype=np.float32)
# Perform transcription
result = asr_pipeline(audio_input)
# Display transcription
st.write("Transcription:")
st.write(result['text'])