import streamlit as st import requests from io import BytesIO import base64 API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"} def query(audio_bytes): response = requests.post(API_URL, headers=headers, data=audio_bytes) return response.json() st.title("Speech Recognition with Whisper") option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone')) if option == 'Upload File': uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac']) if uploaded_file is not None: st.audio(uploaded_file, format='audio/wav') audio_bytes = uploaded_file.read() else: st.write("Click the button below and allow microphone access to start recording") # JavaScript to handle audio recording js_code = """ var audioData = null; var recorder = null; var audioContext = null; function startRecording() { navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { audioContext = new AudioContext(); var input = audioContext.createMediaStreamSource(stream); recorder = new Recorder(input); recorder.record(); document.getElementById('startButton').style.display = 'none'; document.getElementById('stopButton').style.display = 'inline-block'; }); } function stopRecording() { recorder.stop(); document.getElementById('startButton').style.display = 'inline-block'; document.getElementById('stopButton').style.display = 'none'; recorder.exportWAV(function(blob) { var reader = new FileReader(); reader.readAsDataURL(blob); reader.onloadend = function() { var base64data = reader.result; audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part document.getElementById('audioData').value = audioData; document.getElementById('submitButton').click(); } }); } """ # HTML for buttons html_code = """ """ st.components.v1.html(html_code + f'', height=100) audio_data = st.text_input("Audio data", key="audioData", type="password") submit_button = st.empty() if submit_button.button("Submit", key="submitButton"): if audio_data: audio_bytes = base64.b64decode(audio_data) st.audio(audio_bytes, format="audio/wav") else: st.warning("No audio recorded. Please record audio before submitting.") if 'audio_bytes' in locals(): if st.button('Transcribe'): with st.spinner('Transcribing...'): result = query(audio_bytes) if 'text' in result: st.success("Transcription completed!") st.write("Transcribed text:") st.write(result['text']) else: st.error("An error occurred during transcription.") st.write("Error details:") st.write(result) st.markdown("---") st.write("Note: This app uses the Whisper API from Hugging Face.")