File size: 1,535 Bytes
94ac2ac
d242d3a
c1dd4e9
 
62e68d5
c1dd4e9
d242d3a
 
 
 
 
62e68d5
d242d3a
62e68d5
d242d3a
62e68d5
d242d3a
 
 
62e68d5
c1dd4e9
 
06c4ac4
c1dd4e9
 
 
 
 
06c4ac4
d242d3a
c1dd4e9
 
 
94ac2ac
c1dd4e9
6e39d3b
c1dd4e9
6e39d3b
d242d3a
 
 
7bd33ad
d242d3a
 
62e68d5
d242d3a
c1dd4e9
d242d3a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import streamlit as st
from transformers import pipeline
import tempfile
from pydub import AudioSegment
import numpy as np

# Load the ASR pipeline
@st.cache_resource
def load_asr_pipeline():
    asr_pipeline = pipeline("automatic-speech-recognition", model="Yehor/whisper-small-ukrainian")
    return asr_pipeline

st.title("Voice Recognition App using Whisper")

st.write("Upload an audio file and the Whisper model will transcribe it to text.")

# Load the ASR pipeline
asr_pipeline = load_asr_pipeline()
st.write("Model loaded successfully.")

# File uploader for audio file
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a"])

if uploaded_file is not None:
    # Save the uploaded file temporarily
    with tempfile.NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(uploaded_file.read())
        temp_file_path = temp_file.name

    # Convert audio file to WAV format if necessary
    audio = AudioSegment.from_file(temp_file_path)
    temp_wav_path = tempfile.mktemp(suffix=".wav")
    audio.export(temp_wav_path, format="wav")

    st.audio(uploaded_file, format="audio/wav")

    st.write("Transcribing audio...")

    # Read the audio file
    audio_input = AudioSegment.from_file(temp_wav_path).set_frame_rate(16000).set_channels(1)
    audio_input = np.array(audio_input.get_array_of_samples(), dtype=np.float32)

    # Perform transcription
    result = asr_pipeline(audio_input)

    # Display transcription
    st.write("Transcription:")
    st.write(result['text'])