gemini-webrtc

Running

App Files Files Community

mgokg commited on May 16

Commit

aa93a81

verified ·

1 Parent(s): ed2fba6

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -32

app.py CHANGED Viewed

@@ -22,9 +22,7 @@ from PIL import Image
 load_dotenv()
-system_message = "you are a helpful assistant."
-#system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts,  füge nichts hinzu, nur übersetzen."
 def encode_audio(data: np.ndarray) -> dict:
     """Encode Audio data to send to the server"""
@@ -46,6 +44,7 @@ def encode_image(data: np.ndarray) -> dict:
 class GeminiHandler(AsyncAudioVideoStreamHandler):
     def __init__(
         self,
     ) -> None:
         super().__init__(
             "mono",
@@ -57,9 +56,10 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
         self.session = None
         self.last_frame_time = 0
         self.quit = asyncio.Event()
     def copy(self) -> "GeminiHandler":
-        return GeminiHandler()
     async def start_up(self):
         client = genai.Client(
@@ -72,7 +72,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
         ]
         system_instruction = types.Content(
-            parts=[types.Part.from_text(text=f"{system_message}")],
             role="user"
         )
@@ -165,23 +165,6 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
             self.quit.clear()
-stream = Stream(
-    handler=GeminiHandler(),
-    modality="audio",
-    mode="send-receive",
-    rtc_configuration=get_cloudflare_turn_credentials_async,
-    time_limit=180 if get_space() else None,
-    additional_inputs=[
-        gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
-    ],
-    ui_args={
-        "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
-        "pulse_color": "rgb(255, 255, 255)",
-        "icon_button_color": "rgb(255, 255, 255)",
-        "title": "Gemini Audio Video Chat",
-    },
-)
 css = """
 #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
 #video-source video {
@@ -202,6 +185,9 @@ with gr.Blocks(css=css) as demo:
     )
     with gr.Row() as row:
         with gr.Column():
             webrtc = WebRTC(
                 label="Voice Chat",
                 modality="audio",
@@ -212,27 +198,36 @@ with gr.Blocks(css=css) as demo:
                 pulse_color="rgb(255, 255, 255)",
                 icon_button_color="rgb(255, 255, 255)",
             )
-        #with gr.Column():
-            #image_input = gr.Image(
-                #label="Image", type="numpy", sources=["upload", "clipboard"]
-            #)
         webrtc.stream(
-            GeminiHandler(),
             inputs=[webrtc],
             outputs=[webrtc],
             time_limit=180 if get_space() else None,
             concurrency_limit=2 if get_space() else None,
         )
-stream.ui = demo
 if __name__ == "__main__":
     if (mode := os.getenv("MODE")) == "UI":
-        stream.ui.launch(server_port=7860)
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
-        stream.ui.launch(server_port=7860)

 load_dotenv()
+# system_message will be set based on the user's selection
 def encode_audio(data: np.ndarray) -> dict:
     """Encode Audio data to send to the server"""
 class GeminiHandler(AsyncAudioVideoStreamHandler):
     def __init__(
         self,
+        system_message: str, # Add system_message as an argument
     ) -> None:
         super().__init__(
             "mono",
         self.session = None
         self.last_frame_time = 0
         self.quit = asyncio.Event()
+        self.system_message = system_message # Store the system message
     def copy(self) -> "GeminiHandler":
+        return GeminiHandler(self.system_message) # Pass the system message when copying
     async def start_up(self):
         client = genai.Client(
         ]
         system_instruction = types.Content(
+            parts=[types.Part.from_text(text=f"{self.system_message}")], # Use the stored system message
             role="user"
         )
             self.quit.clear()
 css = """
 #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
 #video-source video {
     )
     with gr.Row() as row:
         with gr.Column():
+            mode_selector = gr.Radio(
+                ["Chat", "Translate"], label="Select Mode", value="Chat"
+            )
             webrtc = WebRTC(
                 label="Voice Chat",
                 modality="audio",
                 pulse_color="rgb(255, 255, 255)",
                 icon_button_color="rgb(255, 255, 255)",
             )
+        def update_handler(mode):
+            if mode == "Chat":
+                system_message = "you are a helpful assistant."
+            elif mode == "Translate":
+                system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts,  füge nichts hinzu, nur übersetzen."
+            return GeminiHandler(system_message=system_message)
+        mode_selector.change(
+            update_handler,
+            inputs=[mode_selector],
+            outputs=[webrtc], # This will trigger a restart of the WebRTC component with the new handler
+            queue=False # Don't queue this event, it should happen immediately
+        )
+        # Initial setup of the handler based on the default mode
+        initial_system_message = "you are a helpful assistant."
         webrtc.stream(
+            GeminiHandler(system_message=initial_system_message),
             inputs=[webrtc],
             outputs=[webrtc],
             time_limit=180 if get_space() else None,
             concurrency_limit=2 if get_space() else None,
         )
 if __name__ == "__main__":
     if (mode := os.getenv("MODE")) == "UI":
+        demo.launch(server_port=7860)
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
+        demo.launch(server_port=7860)