mgokg commited on
Commit
6cc9288
·
verified ·
1 Parent(s): eb6637c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -37
app.py CHANGED
@@ -3,30 +3,56 @@ import io
3
  import streamlit as st
4
  from groq import Groq
5
  import soundfile as sf
6
- #from st_audiorec import st_audiorec
7
  from audiorecorder import audiorecorder
8
 
9
- # Load environment variables
10
- api_key = os.getenv('groqwhisper')
11
- if api_key is None:
12
- st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
 
 
13
  st.stop()
14
 
15
- # Initialize Groq client
16
- client = Groq(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def process_audio(audio_data):
19
- """Process audio data and return transcription."""
20
  try:
21
  sample_rate, samples = audio_data
22
 
23
- # Create in-memory WAV file
24
  with io.BytesIO() as wav_buffer:
25
  sf.write(wav_buffer, samples, sample_rate, format='WAV')
26
  wav_buffer.seek(0)
27
 
28
- # Send to Groq for transcription
29
- transcription = client.audio.transcriptions.create(
30
  file=("recording.wav", wav_buffer.read(), "audio/wav"),
31
  model="whisper-large-v3-turbo",
32
  prompt="transcribe",
@@ -36,34 +62,52 @@ def process_audio(audio_data):
36
  )
37
  return transcription.text
38
  except Exception as e:
39
- return f"An error occurred: {str(e)}"
40
 
41
  # Streamlit UI
42
- st.title("🎤 Live Audio Transcription")
43
- st.write("Record audio using your microphone and get real-time transcription")
 
 
 
44
 
45
- # Audio recorder component
46
- #audio_bytes = st.audio()
47
- audio_bytes = st.audio_input("Click to record")
 
48
 
 
 
49
  if audio_bytes:
50
- # Extrahiere die Bytes aus dem UploadedFile-Objekt
51
- audio_bytes_content = audio_bytes.getvalue()
52
-
53
- # Konvertiere die Bytes in ein numpy-Array mit soundfile
54
- with io.BytesIO(audio_bytes_content) as wav_io:
55
- samples, sample_rate = sf.read(wav_io)
56
-
57
- # Konvertiere Stereo in Mono, falls erforderlich
58
- if len(samples.shape) > 1 and samples.shape[1] == 2:
59
- samples = samples.mean(axis=1)
60
-
61
- # Verarbeite das Audio
62
- with st.spinner("Transcribing..."):
63
- transcription = process_audio((sample_rate, samples))
64
-
65
- # Ergebnisse anzeigen
66
- st.success(transcription)
67
- #st.subheader("Result:")
68
- #st.write(transcription)
69
- #st.audio(audio_bytes_content, format='audio/wav')
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import streamlit as st
4
  from groq import Groq
5
  import soundfile as sf
6
+ import google.generativeai as genai
7
  from audiorecorder import audiorecorder
8
 
9
+ # Umgebungsvariablen laden
10
+ groq_api_key = os.getenv('groqwhisper')
11
+ gemini_api_key = os.getenv('geminiapi')
12
+
13
+ if not groq_api_key or not gemini_api_key:
14
+ st.error("Bitte setze die Umgebungsvariablen 'groqwhisper' und 'geminiapi'")
15
  st.stop()
16
 
17
+ # Groq Client initialisieren
18
+ groq_client = Groq(api_key=groq_api_key)
19
+
20
+ # Gemini konfigurieren
21
+ genai.configure(api_key=gemini_api_key)
22
+
23
+ # Modellkonfiguration
24
+ generation_config = {
25
+ "temperature": 0.4,
26
+ "top_p": 0.95,
27
+ "top_k": 40,
28
+ "max_output_tokens": 8192,
29
+ "response_mime_type": "text/plain",
30
+ }
31
+
32
+ model = genai.GenerativeModel(
33
+ model_name="gemini-2.0-flash-exp",
34
+ generation_config=generation_config,
35
+ )
36
+ audio_bytes = st.audio_input("Click to record")
37
+ # Session State für Chatverlauf
38
+ if "chat_session" not in st.session_state:
39
+ st.session_state.chat_session = model.start_chat(history=[])
40
+
41
+ if "display_history" not in st.session_state:
42
+ st.session_state.display_history = []
43
 
44
  def process_audio(audio_data):
45
+ """Verarbeitet Audiodaten und gibt Transkript zurück."""
46
  try:
47
  sample_rate, samples = audio_data
48
 
49
+ # WAV-Datei im Speicher erstellen
50
  with io.BytesIO() as wav_buffer:
51
  sf.write(wav_buffer, samples, sample_rate, format='WAV')
52
  wav_buffer.seek(0)
53
 
54
+ # Transkription mit Groq
55
+ transcription = groq_client.audio.transcriptions.create(
56
  file=("recording.wav", wav_buffer.read(), "audio/wav"),
57
  model="whisper-large-v3-turbo",
58
  prompt="transcribe",
 
62
  )
63
  return transcription.text
64
  except Exception as e:
65
+ return f"Fehler: {str(e)}"
66
 
67
  # Streamlit UI
68
+ st.set_page_config(
69
+ page_title="Gemini Chatbot mit Spracheingabe",
70
+ page_icon="🤖"
71
+ )
72
+ st.title("Gemini Chatbot 🎤+📝")
73
 
74
+ # Chatverlauf anzeigen
75
+ for role, text in st.session_state.display_history:
76
+ with st.chat_message(role):
77
+ st.markdown(text)
78
 
79
+ # Spracheingabe verarbeiten
80
+ audio_bytes = st.audio_input("Sprachnachricht aufnehmen")
81
  if audio_bytes:
82
+ try:
83
+ audio_content = audio_bytes.getvalue()
84
+ with io.BytesIO(audio_content) as wav_io:
85
+ samples, sample_rate = sf.read(wav_io)
86
+ if len(samples.shape) > 1 and samples.shape[1] == 2:
87
+ samples = samples.mean(axis=1)
88
+
89
+ with st.spinner("Transkription..."):
90
+ transcription = process_audio((sample_rate, samples))
91
+
92
+ if transcription:
93
+ if transcription.startswith("Fehler:"):
94
+ st.error(transcription)
95
+ else:
96
+ st.session_state.display_history.append(("user", transcription))
97
+ full_prompt = f"{transcription}\nAntworte immer auf Deutsch"
98
+ response = st.session_state.chat_session.send_message(full_prompt)
99
+ response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
100
+ st.session_state.display_history.append(("assistant", response_text))
101
+ st.rerun()
102
+ except Exception as e:
103
+ st.error(f"Audioprocessing fehlgeschlagen: {str(e)}")
104
+
105
+ # Texteingabe verarbeiten
106
+ user_input = st.text_input("Schreibe deine Frage:", key="user_input")
107
+ if user_input:
108
+ st.session_state.display_history.append(("user", user_input))
109
+ full_prompt = f"{user_input}\nAntworte immer auf Deutsch"
110
+ response = st.session_state.chat_session.send_message(full_prompt)
111
+ response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
112
+ st.session_state.display_history.append(("assistant", response_text))
113
+ st.rerun()