Spaces:

SalexAI
/

JARVIS

No application file

App Files Files Community

SalexAI commited on Apr 24

Commit

56d02e9

verified ·

1 Parent(s): 49384b3

Upload 2 files

Browse files

Files changed (2) hide show

index.html +361 -0
main.py +122 -0

index.html ADDED Viewed

	@@ -0,0 +1,361 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
+    <link rel="stylesheet" href="https://code.getmdl.io/1.3.0/material.indigo-pink.min.css">
+    <script defer src="https://code.getmdl.io/1.3.0/material.min.js"></script>
+    <style>
+        #videoElement {
+            width: 640px;
+            height: 480px;
+            border-radius: 20px;
+        }
+        #canvasElement {
+            display: none;
+            width: 640px;
+            height: 480px;
+        }
+        .demo-content {
+            padding: 20px;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+        }
+        .button-group {
+            margin-bottom: 20px;
+        }
+    </style>
+</head>
+<body>
+    <div class="mdl-layout mdl-js-layout mdl-layout--fixed-header">
+        <header class="mdl-layout__header">
+            <div class="mdl-layout__header-row">
+                <!-- Title -->
+                <span class="mdl-layout-title">Gemini Live Demo</span>
+            </div>
+        </header>
+        <main class="mdl-layout__content">
+            <div class="page-content">
+                <div class="demo-content">
+                    <!-- Voice Control Buttons -->
+                    <div class="button-group">
+                        <button id="startButton"
+                            class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab mdl-button--colored">
+                            <i class="material-icons">mic</i>
+                        </button>
+                        <button id="stopButton"
+                            class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab">
+                            <i class="material-icons">mic_off</i>
+                        </button>
+                    </div>
+                    <!-- Video Element -->
+                    <video id="videoElement" autoplay style="width: 640px; height: 480px;"></video>
+                    <!-- Hidden Canvas -->
+                    <canvas id="canvasElement" style="width: 640px; height: 480px;"></canvas>
+                    <!-- Text Output -->
+                    <div id="chatLog"></div>
+                </div>
+            </div>
+        </main>
+    </div>
+    <script defer>
+        const URL = "ws://localhost:9083";
+        const video = document.getElementById("videoElement");
+        const canvas = document.getElementById("canvasElement");
+        let context;
+        // Initialize context here
+        window.addEventListener("load", () => {
+            context = canvas.getContext("2d");
+            setInterval(captureImage, 3000);
+        });
+        const startButton = document.getElementById('startButton');
+        const stopButton = document.getElementById('stopButton');
+        let stream = null;
+        let currentFrameB64;
+        let webSocket = null;
+        let audioContext = null;
+        let mediaRecorder = null;
+        let processor = null;
+        let pcmData = [];
+        let interval = null;
+        let initialized = false;
+        let audioInputContext;
+        let workletNode;
+        // Function to start screen capture
+        async function startScreenShare() {
+            try {
+                stream = await navigator.mediaDevices.getDisplayMedia({
+                    video: {
+                        width: { max: 640 },
+                        height: { max: 480 },
+                    },
+                });
+                video.srcObject = stream;
+                await new Promise(resolve => {
+                    video.onloadedmetadata = () => {
+                        console.log("video loaded metadata");
+                        resolve();
+                    }
+                });
+            } catch (err) {
+                console.error("Error accessing the screen: ", err);
+            }
+        }
+        // Function to capture an image from the shared screen
+        function captureImage() {
+            if (stream && video.videoWidth > 0 && video.videoHeight > 0 && context) {
+                canvas.width = 640;
+                canvas.height = 480;
+                context.drawImage(video, 0, 0, canvas.width, canvas.height);
+                const imageData = canvas.toDataURL("image/jpeg").split(",")[1].trim();
+                currentFrameB64 = imageData;
+            }
+            else {
+                console.log("no stream or video metadata not loaded");
+            }
+        }
+        window.addEventListener("load", async () => {
+            await startScreenShare();
+            //setInterval(captureImage, 3000);
+            // Initialize audio context right away
+            await initializeAudioContext();
+            connect();
+        });
+        function connect() {
+            console.log("connecting: ", URL);
+            webSocket = new WebSocket(URL);
+            webSocket.onclose = (event) => {
+                console.log("websocket closed: ", event);
+                alert("Connection closed");
+            };
+            webSocket.onerror = (event) => {
+                console.log("websocket error: ", event);
+            };
+            webSocket.onopen = (event) => {
+                console.log("websocket open: ", event);
+                sendInitialSetupMessage();
+            };
+            webSocket.onmessage = receiveMessage;
+        }
+        function sendInitialSetupMessage() {
+            console.log("sending setup message");
+            setup_client_message = {
+                setup: {
+                    generation_config: { response_modalities: ["AUDIO"] },
+                },
+            };
+            webSocket.send(JSON.stringify(setup_client_message));
+        }
+        function sendVoiceMessage(b64PCM) {
+            if (webSocket == null) {
+                console.log("websocket not initialized");
+                return;
+            }
+            payload = {
+                realtime_input: {
+                    media_chunks: [{
+                        mime_type: "audio/pcm",
+                        data: b64PCM,
+                    },
+                    {
+                        mime_type: "image/jpeg",
+                        data: currentFrameB64,
+                    },
+                    ],
+                },
+            };
+            webSocket.send(JSON.stringify(payload));
+            console.log("sent: ", payload);
+        }
+        function receiveMessage(event) {
+            const messageData = JSON.parse(event.data);
+            const response = new Response(messageData);
+            if (response.text) {
+                displayMessage("GEMINI: " + response.text);
+            }
+            if (response.audioData) {
+                injestAudioChuckToPlay(response.audioData);
+            }
+        }
+        async function initializeAudioContext() {
+            if (initialized) return;
+            audioInputContext = new (window.AudioContext ||
+                window.webkitAudioContext)({
+                sampleRate: 24000
+            });
+            await audioInputContext.audioWorklet.addModule("pcm-processor.js");
+            workletNode = new AudioWorkletNode(audioInputContext, "pcm-processor");
+            workletNode.connect(audioInputContext.destination);
+            initialized = true;
+        }
+        function base64ToArrayBuffer(base64) {
+            const binaryString = window.atob(base64);
+            const bytes = new Uint8Array(binaryString.length);
+            for (let i = 0; i < binaryString.length; i++) {
+                bytes[i] = binaryString.charCodeAt(i);
+            }
+            return bytes.buffer;
+        }
+        function convertPCM16LEToFloat32(pcmData) {
+            const inputArray = new Int16Array(pcmData);
+            const float32Array = new Float32Array(inputArray.length);
+            for (let i = 0; i < inputArray.length; i++) {
+                float32Array[i] = inputArray[i] / 32768;
+            }
+            return float32Array;
+        }
+        async function injestAudioChuckToPlay(base64AudioChunk) {
+            try {
+                if (audioInputContext.state === "suspended") {
+                    await audioInputContext.resume();
+                }
+                const arrayBuffer = base64ToArrayBuffer(base64AudioChunk);
+                const float32Data = convertPCM16LEToFloat32(arrayBuffer);
+                workletNode.port.postMessage(float32Data);
+            } catch (error) {
+                console.error("Error processing audio chunk:", error);
+            }
+        }
+        function recordChunk() {
+            const buffer = new ArrayBuffer(pcmData.length * 2);
+            const view = new DataView(buffer);
+            pcmData.forEach((value, index) => {
+                view.setInt16(index * 2, value, true);
+            });
+            const base64 = btoa(
+                String.fromCharCode.apply(null, new Uint8Array(buffer))
+            );
+            sendVoiceMessage(base64);
+            pcmData = [];
+        }
+        async function startAudioInput() {
+            audioContext = new AudioContext({
+                sampleRate: 16000,
+            });
+            const stream = await navigator.mediaDevices.getUserMedia({
+                audio: {
+                    channelCount: 1,
+                    sampleRate: 16000,
+                },
+            });
+            const source = audioContext.createMediaStreamSource(stream);
+            processor = audioContext.createScriptProcessor(4096, 1, 1);
+            processor.onaudioprocess = (e) => {
+                const inputData = e.inputBuffer.getChannelData(0);
+                const pcm16 = new Int16Array(inputData.length);
+                for (let i = 0; i < inputData.length; i++) {
+                    pcm16[i] = inputData[i] * 0x7fff;
+                }
+                pcmData.push(...pcm16);
+            };
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+            interval = setInterval(recordChunk, 3000);
+        }
+        function stopAudioInput() {
+            if (processor) {
+                processor.disconnect();
+            }
+            if (audioContext) {
+                audioContext.close();
+            }
+            clearInterval(interval);
+        }
+        function displayMessage(message) {
+            console.log(message);
+            addParagraphToDiv("chatLog", message);
+        }
+        function addParagraphToDiv(divId, text) {
+            const newParagraph = document.createElement("p");
+            newParagraph.textContent = text;
+            const div = document.getElementById(divId);
+            div.appendChild(newParagraph);
+        }
+        startButton.addEventListener('click', startAudioInput);
+        stopButton.addEventListener('click', stopAudioInput);
+        class Response {
+            constructor(data) {
+                this.text = null;
+                this.audioData = null;
+                this.endOfTurn = null;
+                if (data.text) {
+                    this.text = data.text
+                }
+                if (data.audio) {
+                    this.audioData = data.audio;
+                }
+            }
+        }
+    </script>
+</body>
+</html>

main.py ADDED Viewed

	@@ -0,0 +1,122 @@

+## pip install --upgrade google-genai==0.3.0##
+import asyncio
+import json
+import os
+import websockets
+from google import genai
+import base64
+# Load API key from environment
+os.environ['GOOGLE_API_KEY'] = ''
+MODEL = "gemini-2.0-flash-exp"  # use your model ID
+client = genai.Client(
+  http_options={
+    'api_version': 'v1alpha',
+  }
+)
+async def gemini_session_handler(client_websocket: websockets.WebSocketServerProtocol):
+    """Handles the interaction with Gemini API within a websocket session.
+    Args:
+        client_websocket: The websocket connection to the client.
+    """
+    try:
+        config_message = await client_websocket.recv()
+        config_data = json.loads(config_message)
+        config = config_data.get("setup", {})
+        config["system_instruction"] = """You are a helpful assistant for screen sharing sessions. Your role is to:
+                                        1) Analyze and describe the content being shared on screen
+                                        2) Answer questions about the shared content
+                                        3) Provide relevant information and context about what's being shown
+                                        4) Assist with technical issues related to screen sharing
+                                        5) Maintain a professional and helpful tone. Focus on being concise and clear in your responses."""
+        async with client.aio.live.connect(model=MODEL, config=config) as session:
+            print("Connected to Gemini API")
+            async def send_to_gemini():
+                """Sends messages from the client websocket to the Gemini API."""
+                try:
+                  async for message in client_websocket:
+                      try:
+                          data = json.loads(message)
+                          if "realtime_input" in data:
+                              for chunk in data["realtime_input"]["media_chunks"]:
+                                  if chunk["mime_type"] == "audio/pcm":
+                                      await session.send({"mime_type": "audio/pcm", "data": chunk["data"]})
+                                  elif chunk["mime_type"] == "image/jpeg":
+                                      await session.send({"mime_type": "image/jpeg", "data": chunk["data"]})
+                      except Exception as e:
+                          print(f"Error sending to Gemini: {e}")
+                  print("Client connection closed (send)")
+                except Exception as e:
+                     print(f"Error sending to Gemini: {e}")
+                finally:
+                   print("send_to_gemini closed")
+            async def receive_from_gemini():
+                """Receives responses from the Gemini API and forwards them to the client, looping until turn is complete."""
+                try:
+                    while True:
+                        try:
+                            print("receiving from gemini")
+                            async for response in session.receive():
+                                if response.server_content is None:
+                                    print(f'Unhandled server message! - {response}')
+                                    continue
+                                model_turn = response.server_content.model_turn
+                                if model_turn:
+                                    for part in model_turn.parts:
+                                        if hasattr(part, 'text') and part.text is not None:
+                                            await client_websocket.send(json.dumps({"text": part.text}))
+                                        elif hasattr(part, 'inline_data') and part.inline_data is not None:
+                                            print("audio mime_type:", part.inline_data.mime_type)
+                                            base64_audio = base64.b64encode(part.inline_data.data).decode('utf-8')
+                                            await client_websocket.send(json.dumps({
+                                                "audio": base64_audio,
+                                            }))
+                                            print("audio received")
+                                if response.server_content.turn_complete:
+                                    print('\n<Turn complete>')
+                        except websockets.exceptions.ConnectionClosedOK:
+                            print("Client connection closed normally (receive)")
+                            break  # Exit the loop if the connection is closed
+                        except Exception as e:
+                            print(f"Error receiving from Gemini: {e}")
+                            break
+                except Exception as e:
+                      print(f"Error receiving from Gemini: {e}")
+                finally:
+                      print("Gemini connection closed (receive)")
+            # Start send loop
+            send_task = asyncio.create_task(send_to_gemini())
+            # Launch receive loop as a background task
+            receive_task = asyncio.create_task(receive_from_gemini())
+            await asyncio.gather(send_task, receive_task)
+    except Exception as e:
+        print(f"Error in Gemini session: {e}")
+    finally:
+        print("Gemini session closed.")
+async def main() -> None:
+    async with websockets.serve(gemini_session_handler, "localhost", 9083):
+        print("Running websocket server localhost:9083...")
+        await asyncio.Future()  # Keep the server running indefinitely
+if __name__ == "__main__":
+    asyncio.run(main())