File size: 1,535 Bytes
94ac2ac d242d3a c1dd4e9 62e68d5 c1dd4e9 d242d3a 62e68d5 d242d3a 62e68d5 d242d3a 62e68d5 d242d3a 62e68d5 c1dd4e9 06c4ac4 c1dd4e9 06c4ac4 d242d3a c1dd4e9 94ac2ac c1dd4e9 6e39d3b c1dd4e9 6e39d3b d242d3a 7bd33ad d242d3a 62e68d5 d242d3a c1dd4e9 d242d3a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import streamlit as st
from transformers import pipeline
import tempfile
from pydub import AudioSegment
import numpy as np
# Load the ASR pipeline
@st.cache_resource
def load_asr_pipeline():
asr_pipeline = pipeline("automatic-speech-recognition", model="Yehor/whisper-small-ukrainian")
return asr_pipeline
st.title("Voice Recognition App using Whisper")
st.write("Upload an audio file and the Whisper model will transcribe it to text.")
# Load the ASR pipeline
asr_pipeline = load_asr_pipeline()
st.write("Model loaded successfully.")
# File uploader for audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])
if uploaded_file is not None:
# Save the uploaded file temporarily
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(uploaded_file.read())
temp_file_path = temp_file.name
# Convert audio file to WAV format if necessary
audio = AudioSegment.from_file(temp_file_path)
temp_wav_path = tempfile.mktemp(suffix=".wav")
audio.export(temp_wav_path, format="wav")
st.audio(uploaded_file, format="audio/wav")
st.write("Transcribing audio...")
# Read the audio file
audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
audio_input = np.array(audio_input.get_array_of_samples(), dtype=np.float32)
# Perform transcription
result = asr_pipeline(audio_input)
# Display transcription
st.write("Transcription:")
st.write(result['text'])
|