gemini-webrtc

Running

App Files Files Community

mgokg commited on May 16

Commit

3497472

verified ·

1 Parent(s): aa93a81

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -29

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
-import asyncio
 import base64
 import os
 import time
@@ -22,7 +25,9 @@ from PIL import Image
 load_dotenv()
-# system_message will be set based on the user's selection
 def encode_audio(data: np.ndarray) -> dict:
     """Encode Audio data to send to the server"""
@@ -44,7 +49,6 @@ def encode_image(data: np.ndarray) -> dict:
 class GeminiHandler(AsyncAudioVideoStreamHandler):
     def __init__(
         self,
-        system_message: str, # Add system_message as an argument
     ) -> None:
         super().__init__(
             "mono",
@@ -56,10 +60,9 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
         self.session = None
         self.last_frame_time = 0
         self.quit = asyncio.Event()
-        self.system_message = system_message # Store the system message
     def copy(self) -> "GeminiHandler":
-        return GeminiHandler(self.system_message) # Pass the system message when copying
     async def start_up(self):
         client = genai.Client(
@@ -72,7 +75,7 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
         ]
         system_instruction = types.Content(
-            parts=[types.Part.from_text(text=f"{self.system_message}")], # Use the stored system message
             role="user"
         )
@@ -165,6 +168,23 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
             self.quit.clear()
 css = """
 #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
 #video-source video {
@@ -185,9 +205,6 @@ with gr.Blocks(css=css) as demo:
     )
     with gr.Row() as row:
         with gr.Column():
-            mode_selector = gr.Radio(
-                ["Chat", "Translate"], label="Select Mode", value="Chat"
-            )
             webrtc = WebRTC(
                 label="Voice Chat",
                 modality="audio",
@@ -198,36 +215,28 @@ with gr.Blocks(css=css) as demo:
                 pulse_color="rgb(255, 255, 255)",
                 icon_button_color="rgb(255, 255, 255)",
             )
-        def update_handler(mode):
-            if mode == "Chat":
-                system_message = "you are a helpful assistant."
-            elif mode == "Translate":
-                system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts,  füge nichts hinzu, nur übersetzen."
-            return GeminiHandler(system_message=system_message)
-        mode_selector.change(
-            update_handler,
-            inputs=[mode_selector],
-            outputs=[webrtc], # This will trigger a restart of the WebRTC component with the new handler
-            queue=False # Don't queue this event, it should happen immediately
-        )
-        # Initial setup of the handler based on the default mode
-        initial_system_message = "you are a helpful assistant."
         webrtc.stream(
-            GeminiHandler(system_message=initial_system_message),
             inputs=[webrtc],
             outputs=[webrtc],
-            time_limit=180 if get_space() else None,
             concurrency_limit=2 if get_space() else None,
         )
 if __name__ == "__main__":
     if (mode := os.getenv("MODE")) == "UI":
-        demo.launch(server_port=7860)
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
-        demo.launch(server_port=7860)

+modify the code. füge ein auswahfeld hinzu wo man entweder chat oder translate auswählen kann. ist die auswahl chat dann ist
+system_message = "you are a helpful assistant."
+ist die auswahl translate dann ist
+system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts,  füge nichts hinzu, nur übersetzen."import asyncio
 import base64
 import os
 import time
 load_dotenv()
+system_message = "you are a helpful assistant."
+#system_message = "Du bist ein echzeitübersetzer. übersetze deutsch auf italienisch und italienisch auf deutsch. erkläre nichts, kommentiere nichts,  füge nichts hinzu, nur übersetzen."
 def encode_audio(data: np.ndarray) -> dict:
     """Encode Audio data to send to the server"""
 class GeminiHandler(AsyncAudioVideoStreamHandler):
     def __init__(
         self,
     ) -> None:
         super().__init__(
             "mono",
         self.session = None
         self.last_frame_time = 0
         self.quit = asyncio.Event()
     def copy(self) -> "GeminiHandler":
+        return GeminiHandler()
     async def start_up(self):
         client = genai.Client(
         ]
         system_instruction = types.Content(
+            parts=[types.Part.from_text(text=f"{system_message}")],
             role="user"
         )
             self.quit.clear()
+stream = Stream(
+    handler=GeminiHandler(),
+    modality="audio",
+    mode="send-receive",
+    rtc_configuration=get_cloudflare_turn_credentials_async,
+    time_limit=1800 if get_space() else None,
+    additional_inputs=[
+        gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
+    ],
+    ui_args={
+        "icon": "https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png",
+        "pulse_color": "rgb(255, 255, 255)",
+        "icon_button_color": "rgb(255, 255, 255)",
+        "title": "Gemini Audio Video Chat",
+    },
+)
 css = """
 #video-source {max-width: 500px !important; max-height: 500px !important; background-color: #0f0f11 }
 #video-source video {
     )
     with gr.Row() as row:
         with gr.Column():
             webrtc = WebRTC(
                 label="Voice Chat",
                 modality="audio",
                 pulse_color="rgb(255, 255, 255)",
                 icon_button_color="rgb(255, 255, 255)",
             )
+        #with gr.Column():
+            #image_input = gr.Image(
+                #label="Image", type="numpy", sources=["upload", "clipboard"]
+            #)
         webrtc.stream(
+            GeminiHandler(),
             inputs=[webrtc],
             outputs=[webrtc],
+            time_limit=1800 if get_space() else None,
             concurrency_limit=2 if get_space() else None,
         )
+stream.ui = demo
 if __name__ == "__main__":
     if (mode := os.getenv("MODE")) == "UI":
+        stream.ui.launch(server_port=7860)
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
+        stream.ui.launch(server_port=7860)