mgokg commited on
Commit
f59ad6c
·
verified ·
1 Parent(s): 3a64588

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -52
app.py CHANGED
@@ -1,70 +1,64 @@
1
- import streamlit as st
2
  import os
3
  import io
 
4
  from groq import Groq
5
  import soundfile as sf
6
- from tempfile import NamedTemporaryFile
7
-
8
- # API-Key aus Umgebungsvariable laden
9
- api_key = os.getenv('groqwhisper')
10
 
11
- if not api_key:
12
- st.error("Bitte setzen Sie die Umgebungsvariable 'groqwhisper'")
 
 
13
  st.stop()
14
 
15
- # Groq-Client initialisieren
16
  client = Groq(api_key=api_key)
17
 
18
- def process_audio(audio_bytes):
19
- """Verarbeitet Audio-Bytes und gibt Transkript zurück"""
20
- tmpfile = None # Initialisierung für finally-Block
21
  try:
22
- # Konvertiere Bytes in Audio-Daten
23
- audio_io = io.BytesIO(audio_bytes)
24
- samples, sample_rate = sf.read(audio_io)
25
 
26
- with NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
27
- sf.write(tmpfile.name, samples, sample_rate)
 
 
28
 
29
- with open(tmpfile.name, "rb") as audio_file:
30
- transcription = client.audio.transcriptions.create(
31
- file=(os.path.basename(tmpfile.name), audio_file), # Korrigierte Parameter
32
- model="whisper-large-v3-turbo",
33
- language="de",
34
- response_format="text"
35
- )
36
- return transcription
37
-
 
38
  except Exception as e:
39
- return f"Fehler: {str(e)}"
40
- finally:
41
- if tmpfile and os.path.exists(tmpfile.name):
42
- os.unlink(tmpfile.name)
43
 
44
  # Streamlit UI
45
- st.title("🎤 Audio Transkription")
46
- st.info("Funktioniert auf Hugging Face Spaces!")
47
 
48
- # Kombinierter Uploader für Datei und Mikrofon
49
- audio_bytes = st.audio_input(
50
- "Aufnahme starten oder Datei hochladen",
51
- )
52
 
53
  if audio_bytes:
54
- with st.spinner("Verarbeite Audio..."):
55
- try:
56
- # Datei in Bytes lesen
57
- #audio_bytes = audio_file.read()
58
- with open(audio_bytes, "rb") as file:
59
- # Transkription durchführen
60
- result = process_audio(audio_bytes)
61
-
62
- # Ergebnis anzeigen
63
- st.subheader("Transkription:")
64
- st.text(result)
65
-
66
- # Audio-Player anzeigen
67
- #st.audio(audio_bytes, format="audio/wav")
68
-
69
- except Exception as e:
70
- st.error(f"Fehler: {str(e)}")
 
 
1
  import os
2
  import io
3
+ import streamlit as st
4
  from groq import Groq
5
  import soundfile as sf
6
+ from st_audiorec import st_audiorec
 
 
 
7
 
8
+ # Load environment variables
9
+ api_key = os.getenv('groq_whisper')
10
+ if api_key is None:
11
+ st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
12
  st.stop()
13
 
14
+ # Initialize Groq client
15
  client = Groq(api_key=api_key)
16
 
17
+ def process_audio(audio_data):
18
+ """Process audio data and return transcription."""
 
19
  try:
20
+ sample_rate, samples = audio_data
 
 
21
 
22
+ # Create in-memory WAV file
23
+ with io.BytesIO() as wav_buffer:
24
+ sf.write(wav_buffer, samples, sample_rate, format='WAV')
25
+ wav_buffer.seek(0)
26
 
27
+ # Send to Groq for transcription
28
+ transcription = client.audio.transcriptions.create(
29
+ file=("recording.wav", wav_buffer.read(), "audio/wav"),
30
+ model="whisper-large-v3-turbo",
31
+ prompt="transcribe",
32
+ language="de",
33
+ response_format="json",
34
+ temperature=0.0
35
+ )
36
+ return transcription.text
37
  except Exception as e:
38
+ return f"An error occurred: {str(e)}"
 
 
 
39
 
40
  # Streamlit UI
41
+ st.title("🎤 Live Audio Transcription")
42
+ st.write("Record audio using your microphone and get real-time transcription")
43
 
44
+ # Audio recorder component
45
+ audio_bytes = st_audiorec()
 
 
46
 
47
  if audio_bytes:
48
+ # Convert bytes to numpy array using soundfile
49
+ with io.BytesIO(audio_bytes) as wav_io:
50
+ samples, sample_rate = sf.read(wav_io)
51
+
52
+ # Convert stereo to mono if necessary
53
+ if len(samples.shape) > 1 and samples.shape[1] == 2:
54
+ samples = samples.mean(axis=1)
55
+
56
+ # Process the audio
57
+ with st.spinner("Transcribing..."):
58
+ transcription = process_audio((sample_rate, samples))
59
+
60
+ # Display results
61
+ st.success("Transcription Complete!")
62
+ st.subheader("Result:")
63
+ st.write(transcription)
64
+ st.audio(audio_bytes, format='audio/wav')