gemini-webrtc

Sleeping

App Files Files Community

mgokg commited on May 15

Commit

d44f598

verified ·

1 Parent(s): bbf9a20

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -3

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from fastrtc import (
     wait_for_item,
 )
 from google import genai
 from gradio.utils import get_space
 from PIL import Image
@@ -61,7 +62,29 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
         client = genai.Client(
             api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
         )
-        config = {"response_modalities": ["AUDIO"]}
         async with client.aio.live.connect(
             model="gemini-2.0-flash-exp",
             config=config,  # type: ignore
@@ -146,7 +169,7 @@ with gr.Blocks(css=css) as demo:
             <h1>Gen AI Voice Chat</h1>
             <p>real-time audio streaming</p>
           </center>
-        </div>
     """
     )
     with gr.Row() as row:
@@ -183,4 +206,4 @@ if __name__ == "__main__":
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
-        stream.ui.launch(server_port=7860)

     wait_for_item,
 )
 from google import genai
+from google.genai import types # Import the types module
 from gradio.utils import get_space
 from PIL import Image
         client = genai.Client(
             api_key=os.getenv("GEMINI_API_KEY"), http_options={"api_version": "v1alpha"}
         )
+        # Define the tools and system instruction
+        tools = [
+            types.Tool(google_search=types.GoogleSearch()),
+        ]
+        system_instruction = types.Content(
+            parts=[types.Part.from_text(text="you are a helpful assistant")],
+            role="user"
+        )
+        # Update the config to include tools and system_instruction
+        config = types.LiveConnectConfig(
+            response_modalities=["AUDIO"],
+            speech_config=types.SpeechConfig(
+                voice_config=types.VoiceConfig(
+                    prebuilt_voice_config=types.PrebuiltVoiceConfig(voice_name="Leda")
+                )
+            ),
+            tools=tools,
+            system_instruction=system_instruction,
+        )
         async with client.aio.live.connect(
             model="gemini-2.0-flash-exp",
             config=config,  # type: ignore
             <h1>Gen AI Voice Chat</h1>
             <p>real-time audio streaming</p>
           </center>
+        </div>
     """
     )
     with gr.Row() as row:
     elif mode == "PHONE":
         raise ValueError("Phone mode not supported for this demo")
     else:
+        stream.ui.launch(server_port=7860)