Image / app.py
Dmtlant's picture
Update app.py
dfb92f3 verified
raw
history blame
2.72 kB
import streamlit as st
import requests
import pyaudio
import wave
import os
import tempfile
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}
def query(file):
data = file.read()
response = requests.post(API_URL, headers=headers, data=data)
return response.json()
def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1):
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=channels,
rate=sample_rate,
input=True,
frames_per_buffer=chunk)
st.info(f"Recording for {duration} seconds...")
frames = []
for i in range(0, int(sample_rate / chunk * duration)):
data = stream.read(chunk)
frames.append(data)
st.info("Recording finished.")
stream.stop_stream()
stream.close()
p.terminate()
return frames, sample_rate
st.title("Speech Recognition with Whisper")
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
if option == 'Upload File':
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
if uploaded_file is not None:
st.audio(uploaded_file, format='audio/wav')
file_to_transcribe = uploaded_file
else:
duration = st.slider("Recording duration (seconds)", 1, 30, 5)
if st.button('Start Recording'):
frames, sample_rate = record_audio(duration=duration)
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
wf = wave.open(tmpfile.name, 'wb')
wf.setnchannels(1)
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
wf.setframerate(sample_rate)
wf.writeframes(b''.join(frames))
wf.close()
st.audio(tmpfile.name, format='audio/wav')
file_to_transcribe = open(tmpfile.name, 'rb')
if 'file_to_transcribe' in locals():
if st.button('Transcribe'):
with st.spinner('Transcribing...'):
result = query(file_to_transcribe)
if 'text' in result:
st.success("Transcription completed!")
st.write("Transcribed text:")
st.write(result['text'])
else:
st.error("An error occurred during transcription.")
st.write("Error details:")
st.write(result)
if option == 'Record from Microphone':
os.unlink(file_to_transcribe.name)
st.markdown("---")
st.write("Note: This app uses the Whisper API from Hugging Face.")