import streamlit as st import requests import pyaudio import wave import os import tempfile API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"} def query(file): data = file.read() response = requests.post(API_URL, headers=headers, data=data) return response.json() def record_audio(duration=5, sample_rate=44100, chunk=1024, channels=1): p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=channels, rate=sample_rate, input=True, frames_per_buffer=chunk) st.info(f"Recording for {duration} seconds...") frames = [] for i in range(0, int(sample_rate / chunk * duration)): data = stream.read(chunk) frames.append(data) st.info("Recording finished.") stream.stop_stream() stream.close() p.terminate() return frames, sample_rate st.title("Speech Recognition with Whisper") option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone')) if option == 'Upload File': uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac']) if uploaded_file is not None: st.audio(uploaded_file, format='audio/wav') file_to_transcribe = uploaded_file else: duration = st.slider("Recording duration (seconds)", 1, 30, 5) if st.button('Start Recording'): frames, sample_rate = record_audio(duration=duration) with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile: wf = wave.open(tmpfile.name, 'wb') wf.setnchannels(1) wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16)) wf.setframerate(sample_rate) wf.writeframes(b''.join(frames)) wf.close() st.audio(tmpfile.name, format='audio/wav') file_to_transcribe = open(tmpfile.name, 'rb') if 'file_to_transcribe' in locals(): if st.button('Transcribe'): with st.spinner('Transcribing...'): result = query(file_to_transcribe) if 'text' in result: st.success("Transcription completed!") st.write("Transcribed text:") st.write(result['text']) else: st.error("An error occurred during transcription.") st.write("Error details:") st.write(result) if option == 'Record from Microphone': os.unlink(file_to_transcribe.name) st.markdown("---") st.write("Note: This app uses the Whisper API from Hugging Face.")