File size: 5,188 Bytes
1328c27
79c7151
f59ad6c
93a1282
3aa852f
93a1282
6cc9288
6440454
1328c27
6cc9288
 
 
c0c289e
13a2655
6cc9288
 
 
79c7151
93a1282
6cc9288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f47f413
6cc9288
 
 
 
 
 
93a1282
c0c289e
 
 
 
 
 
 
 
 
 
 
 
 
 
0b91f97
 
c0c289e
 
f59ad6c
6cc9288
1328c27
f59ad6c
1328c27
6cc9288
f59ad6c
 
 
1328c27
6cc9288
 
f59ad6c
 
 
 
 
 
 
 
1328c27
6cc9288
1948d7c
161f398
224c987
3162e8f
1a4d67e
6cc9288
 
 
 
dacc07c
6cc9288
 
f4903a8
6cc9288
 
 
 
 
 
 
 
 
 
 
 
 
 
c0c289e
 
539f77a
6cc9288
 
918ca80
 
6ac5d17
3f2068f
6cc9288
 
 
 
 
 
ffd6a56
693a295
ffd6a56
6cc9288
 
f11b3dc
918ca80
209811f
f47f413
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import os
import io
import streamlit as st
from groq import Groq
import requests
import soundfile as sf
import google.generativeai as genai
from audiorecorder import audiorecorder

# Umgebungsvariablen laden
groq_api_key = os.getenv('groqwhisper')
gemini_api_key = os.getenv('geminiapi')
google_api_key = os.getenv('google_search')
cx="77f1602c0ff764edb"

if not groq_api_key or not gemini_api_key:
    st.error("Bitte setze die Umgebungsvariablen 'groqwhisper' und 'geminiapi'")
    st.stop()

# Groq Client initialisieren
groq_client = Groq(api_key=groq_api_key)

# Gemini konfigurieren
genai.configure(api_key=gemini_api_key)

# Modellkonfiguration
generation_config = {
    "temperature": 0.4,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    generation_config=generation_config,
)

# Session State für Chatverlauf
if "chat_session" not in st.session_state:
    st.session_state.chat_session = model.start_chat(history=[])

if "display_history" not in st.session_state:
    st.session_state.display_history = []


def websearch(prompt):  
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }
    url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={cx}&q={prompt}"   
    response = requests.get(url, headers=headers)
    data = response.json()  # JSON-Daten direkt verarbeiten
    # Extrahieren des Textes aus den Ergebnissen
    items = data.get('items', []) 
    results = [item['snippet'] for item in items]
    result_text = '\n'.join(results)   
    # Formuliere die Antwort
    search_query = f"{prompt} antworte kurz und knapp. antworte auf deutsch. du findest die antwort hier: {result_text}"
    #result = predict(search_query)
    return result_text


def process_audio(audio_data):
    """Verarbeitet Audiodaten und gibt Transkript zurück."""
    try:
        sample_rate, samples = audio_data
        
        # WAV-Datei im Speicher erstellen
        with io.BytesIO() as wav_buffer:
            sf.write(wav_buffer, samples, sample_rate, format='WAV')
            wav_buffer.seek(0)
            
            # Transkription mit Groq
            transcription = groq_client.audio.transcriptions.create(
                file=("recording.wav", wav_buffer.read(), "audio/wav"),
                model="whisper-large-v3-turbo",
                prompt="transcribe",
                language="de",
                response_format="json",
                temperature=0.0
            )
        return transcription.text
    except Exception as e:
        return f"Fehler: {str(e)}"


st.subheader("Gemini-2/Websearch")
messages = st.container()

# Chatverlauf anzeigen
for role, text in st.session_state.display_history:
    with st.chat_message(role):
        st.markdown(text)

# Spracheingabe verarbeiten
audio_bytes = st.audio_input("Sprachnachricht aufnehmen")
if audio_bytes:
    try:
        audio_content = audio_bytes.getvalue()
        with io.BytesIO(audio_content) as wav_io:
            samples, sample_rate = sf.read(wav_io)
            if len(samples.shape) > 1 and samples.shape[1] == 2:
                samples = samples.mean(axis=1)
                
            with st.spinner("Transkription..."):
                transcription = process_audio((sample_rate, samples))
                
            if transcription:
                if transcription.startswith("Fehler:"):
                    st.error(transcription)
                else:
                    #st.session_state.display_history.append(("user", transcription))                    
                    websearch = websearch(transcription)
                    full_prompt = f"{transcription}\nAntworte immer auf Deutsch\n Du findest die Antwort hier:\n{websearch}"
                    response = st.session_state.chat_session.send_message(full_prompt)
                    response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
                    messages.chat_message("assistant").write(f"{response_text}")
                    #st.write(response_text)
                    #st.session_state.display_history.append(("assistant", response_text))
                    #st.rerun()
    except Exception as e:
        st.error(f"Audioprocessing fehlgeschlagen: {str(e)}")

# Texteingabe verarbeiten
user_input = st.text_input("Schreibe deine Frage:", key="user_input")
if user_input:
    websearch = websearch(user_input)   
    #st.session_state.display_history.append(("user", user_input))
    full_prompt = f"{user_input}\nAntworte immer auf Deutsch\n antworte kurz und knapp.\n Du findest die Antwort hier:\n{websearch}"
    response = st.session_state.chat_session.send_message(full_prompt)
    response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
    messages.chat_message("assistant").write(f"{response_text}")
    #st.markdown(response_text)
    #st.session_state.display_history.append(("assistant", response_text))
    #st.rerun()