File size: 2,063 Bytes
1328c27
79c7151
f59ad6c
93a1282
 
f59ad6c
1328c27
f59ad6c
 
 
 
79c7151
93a1282
f59ad6c
93a1282
 
f59ad6c
 
1328c27
f59ad6c
1328c27
f59ad6c
 
 
 
1328c27
f59ad6c
 
 
 
 
 
 
 
 
 
1328c27
f59ad6c
1948d7c
79c7151
f59ad6c
 
1a4d67e
f59ad6c
 
dacc07c
f4903a8
f59ad6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import io
import streamlit as st
from groq import Groq
import soundfile as sf
from st_audiorec import st_audiorec

# Load environment variables
api_key = os.getenv('groq_whisper')
if api_key is None:
    st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
    st.stop()

# Initialize Groq client
client = Groq(api_key=api_key)

def process_audio(audio_data):
    """Process audio data and return transcription."""
    try:
        sample_rate, samples = audio_data
        
        # Create in-memory WAV file
        with io.BytesIO() as wav_buffer:
            sf.write(wav_buffer, samples, sample_rate, format='WAV')
            wav_buffer.seek(0)
            
            # Send to Groq for transcription
            transcription = client.audio.transcriptions.create(
                file=("recording.wav", wav_buffer.read(), "audio/wav"),
                model="whisper-large-v3-turbo",
                prompt="transcribe",
                language="de",
                response_format="json",
                temperature=0.0
            )
        return transcription.text
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Streamlit UI
st.title("🎤 Live Audio Transcription")
st.write("Record audio using your microphone and get real-time transcription")

# Audio recorder component
audio_bytes = st_audiorec()

if audio_bytes:
    # Convert bytes to numpy array using soundfile
    with io.BytesIO(audio_bytes) as wav_io:
        samples, sample_rate = sf.read(wav_io)
        
        # Convert stereo to mono if necessary
        if len(samples.shape) > 1 and samples.shape[1] == 2:
            samples = samples.mean(axis=1)
        
        # Process the audio
        with st.spinner("Transcribing..."):
            transcription = process_audio((sample_rate, samples))
        
        # Display results
        st.success("Transcription Complete!")
        st.subheader("Result:")
        st.write(transcription)
        st.audio(audio_bytes, format='audio/wav')