JARVIS / index.html
SalexAI's picture
Upload 2 files
56d02e9 verified
<!DOCTYPE html>
<html>
<head>
<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons">
<link rel="stylesheet" href="https://code.getmdl.io/1.3.0/material.indigo-pink.min.css">
<script defer src="https://code.getmdl.io/1.3.0/material.min.js"></script>
<style>
#videoElement {
width: 640px;
height: 480px;
border-radius: 20px;
}
#canvasElement {
display: none;
width: 640px;
height: 480px;
}
.demo-content {
padding: 20px;
display: flex;
flex-direction: column;
align-items: center;
}
.button-group {
margin-bottom: 20px;
}
</style>
</head>
<body>
<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header">
<header class="mdl-layout__header">
<div class="mdl-layout__header-row">
<!-- Title -->
<span class="mdl-layout-title">Gemini Live Demo</span>
</div>
</header>
<main class="mdl-layout__content">
<div class="page-content">
<div class="demo-content">
<!-- Voice Control Buttons -->
<div class="button-group">
<button id="startButton"
class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab mdl-button--colored">
<i class="material-icons">mic</i>
</button>
<button id="stopButton"
class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab">
<i class="material-icons">mic_off</i>
</button>
</div>
<!-- Video Element -->
<video id="videoElement" autoplay style="width: 640px; height: 480px;"></video>
<!-- Hidden Canvas -->
<canvas id="canvasElement" style="width: 640px; height: 480px;"></canvas>
<!-- Text Output -->
<div id="chatLog"></div>
</div>
</div>
</main>
</div>
<script defer>
const URL = "ws://localhost:9083";
const video = document.getElementById("videoElement");
const canvas = document.getElementById("canvasElement");
let context;
// Initialize context here
window.addEventListener("load", () => {
context = canvas.getContext("2d");
setInterval(captureImage, 3000);
});
const startButton = document.getElementById('startButton');
const stopButton = document.getElementById('stopButton');
let stream = null;
let currentFrameB64;
let webSocket = null;
let audioContext = null;
let mediaRecorder = null;
let processor = null;
let pcmData = [];
let interval = null;
let initialized = false;
let audioInputContext;
let workletNode;
// Function to start screen capture
async function startScreenShare() {
try {
stream = await navigator.mediaDevices.getDisplayMedia({
video: {
width: { max: 640 },
height: { max: 480 },
},
});
video.srcObject = stream;
await new Promise(resolve => {
video.onloadedmetadata = () => {
console.log("video loaded metadata");
resolve();
}
});
} catch (err) {
console.error("Error accessing the screen: ", err);
}
}
// Function to capture an image from the shared screen
function captureImage() {
if (stream && video.videoWidth > 0 && video.videoHeight > 0 && context) {
canvas.width = 640;
canvas.height = 480;
context.drawImage(video, 0, 0, canvas.width, canvas.height);
const imageData = canvas.toDataURL("image/jpeg").split(",")[1].trim();
currentFrameB64 = imageData;
}
else {
console.log("no stream or video metadata not loaded");
}
}
window.addEventListener("load", async () => {
await startScreenShare();
//setInterval(captureImage, 3000);
// Initialize audio context right away
await initializeAudioContext();
connect();
});
function connect() {
console.log("connecting: ", URL);
webSocket = new WebSocket(URL);
webSocket.onclose = (event) => {
console.log("websocket closed: ", event);
alert("Connection closed");
};
webSocket.onerror = (event) => {
console.log("websocket error: ", event);
};
webSocket.onopen = (event) => {
console.log("websocket open: ", event);
sendInitialSetupMessage();
};
webSocket.onmessage = receiveMessage;
}
function sendInitialSetupMessage() {
console.log("sending setup message");
setup_client_message = {
setup: {
generation_config: { response_modalities: ["AUDIO"] },
},
};
webSocket.send(JSON.stringify(setup_client_message));
}
function sendVoiceMessage(b64PCM) {
if (webSocket == null) {
console.log("websocket not initialized");
return;
}
payload = {
realtime_input: {
media_chunks: [{
mime_type: "audio/pcm",
data: b64PCM,
},
{
mime_type: "image/jpeg",
data: currentFrameB64,
},
],
},
};
webSocket.send(JSON.stringify(payload));
console.log("sent: ", payload);
}
function receiveMessage(event) {
const messageData = JSON.parse(event.data);
const response = new Response(messageData);
if (response.text) {
displayMessage("GEMINI: " + response.text);
}
if (response.audioData) {
injestAudioChuckToPlay(response.audioData);
}
}
async function initializeAudioContext() {
if (initialized) return;
audioInputContext = new (window.AudioContext ||
window.webkitAudioContext)({
sampleRate: 24000
});
await audioInputContext.audioWorklet.addModule("pcm-processor.js");
workletNode = new AudioWorkletNode(audioInputContext, "pcm-processor");
workletNode.connect(audioInputContext.destination);
initialized = true;
}
function base64ToArrayBuffer(base64) {
const binaryString = window.atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
function convertPCM16LEToFloat32(pcmData) {
const inputArray = new Int16Array(pcmData);
const float32Array = new Float32Array(inputArray.length);
for (let i = 0; i < inputArray.length; i++) {
float32Array[i] = inputArray[i] / 32768;
}
return float32Array;
}
async function injestAudioChuckToPlay(base64AudioChunk) {
try {
if (audioInputContext.state === "suspended") {
await audioInputContext.resume();
}
const arrayBuffer = base64ToArrayBuffer(base64AudioChunk);
const float32Data = convertPCM16LEToFloat32(arrayBuffer);
workletNode.port.postMessage(float32Data);
} catch (error) {
console.error("Error processing audio chunk:", error);
}
}
function recordChunk() {
const buffer = new ArrayBuffer(pcmData.length * 2);
const view = new DataView(buffer);
pcmData.forEach((value, index) => {
view.setInt16(index * 2, value, true);
});
const base64 = btoa(
String.fromCharCode.apply(null, new Uint8Array(buffer))
);
sendVoiceMessage(base64);
pcmData = [];
}
async function startAudioInput() {
audioContext = new AudioContext({
sampleRate: 16000,
});
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
channelCount: 1,
sampleRate: 16000,
},
});
const source = audioContext.createMediaStreamSource(stream);
processor = audioContext.createScriptProcessor(4096, 1, 1);
processor.onaudioprocess = (e) => {
const inputData = e.inputBuffer.getChannelData(0);
const pcm16 = new Int16Array(inputData.length);
for (let i = 0; i < inputData.length; i++) {
pcm16[i] = inputData[i] * 0x7fff;
}
pcmData.push(...pcm16);
};
source.connect(processor);
processor.connect(audioContext.destination);
interval = setInterval(recordChunk, 3000);
}
function stopAudioInput() {
if (processor) {
processor.disconnect();
}
if (audioContext) {
audioContext.close();
}
clearInterval(interval);
}
function displayMessage(message) {
console.log(message);
addParagraphToDiv("chatLog", message);
}
function addParagraphToDiv(divId, text) {
const newParagraph = document.createElement("p");
newParagraph.textContent = text;
const div = document.getElementById(divId);
div.appendChild(newParagraph);
}
startButton.addEventListener('click', startAudioInput);
stopButton.addEventListener('click', stopAudioInput);
class Response {
constructor(data) {
this.text = null;
this.audioData = null;
this.endOfTurn = null;
if (data.text) {
this.text = data.text
}
if (data.audio) {
this.audioData = data.audio;
}
}
}
</script>
</body>
</html>