mgokg commited on
Commit
3497472
·
verified ·
1 Parent(s): aa93a81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -29
app.py CHANGED
@@ -1,4 +1,7 @@
1
- import asyncio
 
 
 
2
  import base64
3
  import os
4
  import time
@@ -22,7 +25,9 @@ from PIL import Image
22
 
23
  load_dotenv()
24
 
25
- # system_message will be set based on the user's selection
 
 
26
 
27
  def encode_audio(data: np.ndarray) -> dict:
28
  """Encode Audio data to send to the server"""
@@ -44,7 +49,6 @@ def encode_image(data: np.ndarray) -> dict:
44
  class GeminiHandler(AsyncAudioVideoStreamHandler):
45
  def __init__(
46
  self,
47
- system_message: str, # Add system_message as an argument
48
  ) -> None:
49
  super().__init__(
50
  "mono",
@@ -56,10 +60,9 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
56
  self.session = None
57
  self.last_frame_time = 0
58
  self.quit = asyncio.Event()
59
- self.system_message = system_message # Store the system message
60
 
61
  def copy(self) -> "GeminiHandler":
62
- return GeminiHandler(self.system_message) # Pass the system message when copying
63
 
64
  async def start_up(self):
65
  client = genai.Client(
@@ -72,7 +75,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
72
  ]
73
 
74
  system_instruction = types.Content(
75
- parts=[types.Part.from_text(text=f"{self.system_message}")], # Use the stored system message
76
  role="user"
77
  )
78
 
@@ -165,6 +168,23 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
165
  self.quit.clear()
166
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  css = """
169
  #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
170
  #video-source video {
@@ -185,9 +205,6 @@ with gr.Blocks(css=css) as demo:
185
  )
186
  with gr.Row() as row:
187
  with gr.Column():
188
- mode_selector = gr.Radio(
189
- ["Chat", "Translate"], label="Select Mode", value="Chat"
190
- )
191
  webrtc = WebRTC(
192
  label="Voice Chat",
193
  modality="audio",
@@ -198,36 +215,28 @@ with gr.Blocks(css=css) as demo:
198
  pulse_color="rgb(255, 255, 255)",
199
  icon_button_color="rgb(255, 255, 255)",
200
  )
 
 
 
 
201
 
202
- def update_handler(mode):
203
- if mode == "Chat":
204
- system_message = "you are a helpful assistant."
205
- elif mode == "Translate":
206
- system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."
207
- return GeminiHandler(system_message=system_message)
208
-
209
- mode_selector.change(
210
- update_handler,
211
- inputs=[mode_selector],
212
- outputs=[webrtc], # This will trigger a restart of the WebRTC component with the new handler
213
- queue=False # Don't queue this event, it should happen immediately
214
- )
215
-
216
- # Initial setup of the handler based on the default mode
217
- initial_system_message = "you are a helpful assistant."
218
  webrtc.stream(
219
- GeminiHandler(system_message=initial_system_message),
220
  inputs=[webrtc],
221
  outputs=[webrtc],
222
- time_limit=180 if get_space() else None,
223
  concurrency_limit=2 if get_space() else None,
224
  )
225
 
 
 
226
 
227
  if __name__ == "__main__":
228
  if (mode := os.getenv("MODE")) == "UI":
229
- demo.launch(server_port=7860)
230
  elif mode == "PHONE":
231
  raise ValueError("Phone mode not supported for this demo")
232
  else:
233
- demo.launch(server_port=7860)
 
 
 
1
+ modify the code. füge ein auswahfeld hinzu wo man entweder chat oder translate auswählen kann. ist die auswahl chat dann ist
2
+ system_message = "you are a helpful assistant."
3
+ ist die auswahl translate dann ist
4
+ system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."import asyncio
5
  import base64
6
  import os
7
  import time
 
25
 
26
  load_dotenv()
27
 
28
+ system_message = "you are a helpful assistant."
29
+ #system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts, füge nichts hinzu, nur übersetzen."
30
+
31
 
32
  def encode_audio(data: np.ndarray) -> dict:
33
  """Encode Audio data to send to the server"""
 
49
  class GeminiHandler(AsyncAudioVideoStreamHandler):
50
  def __init__(
51
  self,
 
52
  ) -> None:
53
  super().__init__(
54
  "mono",
 
60
  self.session = None
61
  self.last_frame_time = 0
62
  self.quit = asyncio.Event()
 
63
 
64
  def copy(self) -> "GeminiHandler":
65
+ return GeminiHandler()
66
 
67
  async def start_up(self):
68
  client = genai.Client(
 
75
  ]
76
 
77
  system_instruction = types.Content(
78
+ parts=[types.Part.from_text(text=f"{system_message}")],
79
  role="user"
80
  )
81
 
 
168
  self.quit.clear()
169
 
170
 
171
+ stream = Stream(
172
+ handler=GeminiHandler(),
173
+ modality="audio",
174
+ mode="send-receive",
175
+ rtc_configuration=get_cloudflare_turn_credentials_async,
176
+ time_limit=1800 if get_space() else None,
177
+ additional_inputs=[
178
+ gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
179
+ ],
180
+ ui_args={
181
+ "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
182
+ "pulse_color": "rgb(255, 255, 255)",
183
+ "icon_button_color": "rgb(255, 255, 255)",
184
+ "title": "Gemini Audio Video Chat",
185
+ },
186
+ )
187
+
188
  css = """
189
  #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
190
  #video-source video {
 
205
  )
206
  with gr.Row() as row:
207
  with gr.Column():
 
 
 
208
  webrtc = WebRTC(
209
  label="Voice Chat",
210
  modality="audio",
 
215
  pulse_color="rgb(255, 255, 255)",
216
  icon_button_color="rgb(255, 255, 255)",
217
  )
218
+ #with gr.Column():
219
+ #image_input = gr.Image(
220
+ #label="Image", type="numpy", sources=["upload", "clipboard"]
221
+ #)
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  webrtc.stream(
224
+ GeminiHandler(),
225
  inputs=[webrtc],
226
  outputs=[webrtc],
227
+ time_limit=1800 if get_space() else None,
228
  concurrency_limit=2 if get_space() else None,
229
  )
230
 
231
+ stream.ui = demo
232
+
233
 
234
  if __name__ == "__main__":
235
  if (mode := os.getenv("MODE")) == "UI":
236
+ stream.ui.launch(server_port=7860)
237
  elif mode == "PHONE":
238
  raise ValueError("Phone mode not supported for this demo")
239
  else:
240
+ stream.ui.launch(server_port=7860)
241
+
242
+