|
import streamlit as st |
|
import requests |
|
import pyaudio |
|
import wave |
|
import os |
|
import tempfile |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" |
|
headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"} |
|
|
|
def query(file): |
|
data = file.read() |
|
response = requests.post(API_URL, headers=headers, data=data) |
|
return response.json() |
|
|
|
def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1): |
|
p = pyaudio.PyAudio() |
|
stream = p.open(format=pyaudio.paInt16, |
|
channels=channels, |
|
rate=sample_rate, |
|
input=True, |
|
frames_per_buffer=chunk) |
|
|
|
st.info(f"Recording for {duration} seconds...") |
|
frames = [] |
|
for i in range(0, int(sample_rate / chunk * duration)): |
|
data = stream.read(chunk) |
|
frames.append(data) |
|
st.info("Recording finished.") |
|
|
|
stream.stop_stream() |
|
stream.close() |
|
p.terminate() |
|
|
|
return frames, sample_rate |
|
|
|
st.title("Speech Recognition with Whisper") |
|
|
|
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone')) |
|
|
|
if option == 'Upload File': |
|
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac']) |
|
if uploaded_file is not None: |
|
st.audio(uploaded_file, format='audio/wav') |
|
file_to_transcribe = uploaded_file |
|
else: |
|
duration = st.slider("Recording duration (seconds)", 1, 30, 5) |
|
if st.button('Start Recording'): |
|
frames, sample_rate = record_audio(duration=duration) |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile: |
|
wf = wave.open(tmpfile.name, 'wb') |
|
wf.setnchannels(1) |
|
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)) |
|
wf.setframerate(sample_rate) |
|
wf.writeframes(b''.join(frames)) |
|
wf.close() |
|
|
|
st.audio(tmpfile.name, format='audio/wav') |
|
file_to_transcribe = open(tmpfile.name, 'rb') |
|
|
|
if 'file_to_transcribe' in locals(): |
|
if st.button('Transcribe'): |
|
with st.spinner('Transcribing...'): |
|
result = query(file_to_transcribe) |
|
|
|
if 'text' in result: |
|
st.success("Transcription completed!") |
|
st.write("Transcribed text:") |
|
st.write(result['text']) |
|
else: |
|
st.error("An error occurred during transcription.") |
|
st.write("Error details:") |
|
st.write(result) |
|
|
|
if option == 'Record from Microphone': |
|
os.unlink(file_to_transcribe.name) |
|
|
|
st.markdown("---") |
|
st.write("Note: This app uses the Whisper API from Hugging Face.") |