schichtplan / app.py
mgokg's picture
Update app.py
f59ad6c verified
raw
history blame
2.06 kB
import os
import io
import streamlit as st
from groq import Groq
import soundfile as sf
from st_audiorec import st_audiorec
# Load environment variables
api_key = os.getenv('groq_whisper')
if api_key is None:
st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
st.stop()
# Initialize Groq client
client = Groq(api_key=api_key)
def process_audio(audio_data):
"""Process audio data and return transcription."""
try:
sample_rate, samples = audio_data
# Create in-memory WAV file
with io.BytesIO() as wav_buffer:
sf.write(wav_buffer, samples, sample_rate, format='WAV')
wav_buffer.seek(0)
# Send to Groq for transcription
transcription = client.audio.transcriptions.create(
file=("recording.wav", wav_buffer.read(), "audio/wav"),
model="whisper-large-v3-turbo",
prompt="transcribe",
language="de",
response_format="json",
temperature=0.0
)
return transcription.text
except Exception as e:
return f"An error occurred: {str(e)}"
# Streamlit UI
st.title("🎀 Live Audio Transcription")
st.write("Record audio using your microphone and get real-time transcription")
# Audio recorder component
audio_bytes = st_audiorec()
if audio_bytes:
# Convert bytes to numpy array using soundfile
with io.BytesIO(audio_bytes) as wav_io:
samples, sample_rate = sf.read(wav_io)
# Convert stereo to mono if necessary
if len(samples.shape) > 1 and samples.shape[1] == 2:
samples = samples.mean(axis=1)
# Process the audio
with st.spinner("Transcribing..."):
transcription = process_audio((sample_rate, samples))
# Display results
st.success("Transcription Complete!")
st.subheader("Result:")
st.write(transcription)
st.audio(audio_bytes, format='audio/wav')