Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
-
|
|
|
|
|
|
|
2 |
import base64
|
3 |
import os
|
4 |
import time
|
@@ -22,7 +25,9 @@ from PIL import Image
|
|
22 |
|
23 |
load_dotenv()
|
24 |
|
25 |
-
|
|
|
|
|
26 |
|
27 |
def encode_audio(data: np.ndarray) -> dict:
|
28 |
"""Encode Audio data to send to the server"""
|
@@ -44,7 +49,6 @@ def encode_image(data: np.ndarray) -> dict:
|
|
44 |
class GeminiHandler(AsyncAudioVideoStreamHandler):
|
45 |
def __init__(
|
46 |
self,
|
47 |
-
system_message: str, # Add system_message as an argument
|
48 |
) -> None:
|
49 |
super().__init__(
|
50 |
"mono",
|
@@ -56,10 +60,9 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
|
56 |
self.session = None
|
57 |
self.last_frame_time = 0
|
58 |
self.quit = asyncio.Event()
|
59 |
-
self.system_message = system_message # Store the system message
|
60 |
|
61 |
def copy(self) -> "GeminiHandler":
|
62 |
-
return GeminiHandler(
|
63 |
|
64 |
async def start_up(self):
|
65 |
client = genai.Client(
|
@@ -72,7 +75,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
|
72 |
]
|
73 |
|
74 |
system_instruction = types.Content(
|
75 |
-
parts=[types.Part.from_text(text=f"{
|
76 |
role="user"
|
77 |
)
|
78 |
|
@@ -165,6 +168,23 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
|
|
165 |
self.quit.clear()
|
166 |
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
css = """
|
169 |
#video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
|
170 |
#video-source video {
|
@@ -185,9 +205,6 @@ with gr.Blocks(css=css) as demo:
|
|
185 |
)
|
186 |
with gr.Row() as row:
|
187 |
with gr.Column():
|
188 |
-
mode_selector = gr.Radio(
|
189 |
-
["Chat", "Translate"], label="Select Mode", value="Chat"
|
190 |
-
)
|
191 |
webrtc = WebRTC(
|
192 |
label="Voice Chat",
|
193 |
modality="audio",
|
@@ -198,36 +215,28 @@ with gr.Blocks(css=css) as demo:
|
|
198 |
pulse_color="rgb(255, 255, 255)",
|
199 |
icon_button_color="rgb(255, 255, 255)",
|
200 |
)
|
|
|
|
|
|
|
|
|
201 |
|
202 |
-
def update_handler(mode):
|
203 |
-
if mode == "Chat":
|
204 |
-
system_message = "you are a helpful assistant."
|
205 |
-
elif mode == "Translate":
|
206 |
-
system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."
|
207 |
-
return GeminiHandler(system_message=system_message)
|
208 |
-
|
209 |
-
mode_selector.change(
|
210 |
-
update_handler,
|
211 |
-
inputs=[mode_selector],
|
212 |
-
outputs=[webrtc], # This will trigger a restart of the WebRTC component with the new handler
|
213 |
-
queue=False # Don't queue this event, it should happen immediately
|
214 |
-
)
|
215 |
-
|
216 |
-
# Initial setup of the handler based on the default mode
|
217 |
-
initial_system_message = "you are a helpful assistant."
|
218 |
webrtc.stream(
|
219 |
-
GeminiHandler(
|
220 |
inputs=[webrtc],
|
221 |
outputs=[webrtc],
|
222 |
-
time_limit=
|
223 |
concurrency_limit=2 if get_space() else None,
|
224 |
)
|
225 |
|
|
|
|
|
226 |
|
227 |
if __name__ == "__main__":
|
228 |
if (mode := os.getenv("MODE")) == "UI":
|
229 |
-
|
230 |
elif mode == "PHONE":
|
231 |
raise ValueError("Phone mode not supported for this demo")
|
232 |
else:
|
233 |
-
|
|
|
|
|
|
1 |
+
modify the code. füge ein auswahfeld hinzu wo man entweder chat oder translate auswählen kann. ist die auswahl chat dann ist
|
2 |
+
system_message = "you are a helpful assistant."
|
3 |
+
ist die auswahl translate dann ist
|
4 |
+
system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."import asyncio
|
5 |
import base64
|
6 |
import os
|
7 |
import time
|
|
|
25 |
|
26 |
load_dotenv()
|
27 |
|
28 |
+
system_message = "you are a helpful assistant."
|
29 |
+
#system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."
|
30 |
+
|
31 |
|
32 |
def encode_audio(data: np.ndarray) -> dict:
|
33 |
"""Encode Audio data to send to the server"""
|
|
|
49 |
class GeminiHandler(AsyncAudioVideoStreamHandler):
|
50 |
def __init__(
|
51 |
self,
|
|
|
52 |
) -> None:
|
53 |
super().__init__(
|
54 |
"mono",
|
|
|
60 |
self.session = None
|
61 |
self.last_frame_time = 0
|
62 |
self.quit = asyncio.Event()
|
|
|
63 |
|
64 |
def copy(self) -> "GeminiHandler":
|
65 |
+
return GeminiHandler()
|
66 |
|
67 |
async def start_up(self):
|
68 |
client = genai.Client(
|
|
|
75 |
]
|
76 |
|
77 |
system_instruction = types.Content(
|
78 |
+
parts=[types.Part.from_text(text=f"{system_message}")],
|
79 |
role="user"
|
80 |
)
|
81 |
|
|
|
168 |
self.quit.clear()
|
169 |
|
170 |
|
171 |
+
stream = Stream(
|
172 |
+
handler=GeminiHandler(),
|
173 |
+
modality="audio",
|
174 |
+
mode="send-receive",
|
175 |
+
rtc_configuration=get_cloudflare_turn_credentials_async,
|
176 |
+
time_limit=1800 if get_space() else None,
|
177 |
+
additional_inputs=[
|
178 |
+
gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
|
179 |
+
],
|
180 |
+
ui_args={
|
181 |
+
"icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
|
182 |
+
"pulse_color": "rgb(255, 255, 255)",
|
183 |
+
"icon_button_color": "rgb(255, 255, 255)",
|
184 |
+
"title": "Gemini Audio Video Chat",
|
185 |
+
},
|
186 |
+
)
|
187 |
+
|
188 |
css = """
|
189 |
#video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
|
190 |
#video-source video {
|
|
|
205 |
)
|
206 |
with gr.Row() as row:
|
207 |
with gr.Column():
|
|
|
|
|
|
|
208 |
webrtc = WebRTC(
|
209 |
label="Voice Chat",
|
210 |
modality="audio",
|
|
|
215 |
pulse_color="rgb(255, 255, 255)",
|
216 |
icon_button_color="rgb(255, 255, 255)",
|
217 |
)
|
218 |
+
#with gr.Column():
|
219 |
+
#image_input = gr.Image(
|
220 |
+
#label="Image", type="numpy", sources=["upload", "clipboard"]
|
221 |
+
#)
|
222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
webrtc.stream(
|
224 |
+
GeminiHandler(),
|
225 |
inputs=[webrtc],
|
226 |
outputs=[webrtc],
|
227 |
+
time_limit=1800 if get_space() else None,
|
228 |
concurrency_limit=2 if get_space() else None,
|
229 |
)
|
230 |
|
231 |
+
stream.ui = demo
|
232 |
+
|
233 |
|
234 |
if __name__ == "__main__":
|
235 |
if (mode := os.getenv("MODE")) == "UI":
|
236 |
+
stream.ui.launch(server_port=7860)
|
237 |
elif mode == "PHONE":
|
238 |
raise ValueError("Phone mode not supported for this demo")
|
239 |
else:
|
240 |
+
stream.ui.launch(server_port=7860)
|
241 |
+
|
242 |
+
|