import os import io import streamlit as st from groq import Groq import soundfile as sf from st_audiorec import st_audiorec # Load environment variables api_key = os.getenv('groq_whisper') if api_key is None: st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.") st.stop() # Initialize Groq client client = Groq(api_key=api_key) def process_audio(audio_data): """Process audio data and return transcription.""" try: sample_rate, samples = audio_data # Create in-memory WAV file with io.BytesIO() as wav_buffer: sf.write(wav_buffer, samples, sample_rate, format='WAV') wav_buffer.seek(0) # Send to Groq for transcription transcription = client.audio.transcriptions.create( file=("recording.wav", wav_buffer.read(), "audio/wav"), model="whisper-large-v3-turbo", prompt="transcribe", language="de", response_format="json", temperature=0.0 ) return transcription.text except Exception as e: return f"An error occurred: {str(e)}" # Streamlit UI st.title("🎤 Live Audio Transcription") st.write("Record audio using your microphone and get real-time transcription") # Audio recorder component audio_bytes = st_audiorec() if audio_bytes: # Convert bytes to numpy array using soundfile with io.BytesIO(audio_bytes) as wav_io: samples, sample_rate = sf.read(wav_io) # Convert stereo to mono if necessary if len(samples.shape) > 1 and samples.shape[1] == 2: samples = samples.mean(axis=1) # Process the audio with st.spinner("Transcribing..."): transcription = process_audio((sample_rate, samples)) # Display results st.success("Transcription Complete!") st.subheader("Result:") st.write(transcription) st.audio(audio_bytes, format='audio/wav')