|
<!DOCTYPE html> |
|
<html> |
|
|
|
<head> |
|
<link rel="stylesheet" href="https://fonts.googleapis.com/icon?family=Material+Icons"> |
|
<link rel="stylesheet" href="https://code.getmdl.io/1.3.0/material.indigo-pink.min.css"> |
|
<script defer src="https://code.getmdl.io/1.3.0/material.min.js"></script> |
|
|
|
<style> |
|
#videoElement { |
|
width: 640px; |
|
height: 480px; |
|
border-radius: 20px; |
|
} |
|
|
|
#canvasElement { |
|
display: none; |
|
width: 640px; |
|
height: 480px; |
|
} |
|
|
|
.demo-content { |
|
padding: 20px; |
|
display: flex; |
|
flex-direction: column; |
|
align-items: center; |
|
} |
|
|
|
.button-group { |
|
margin-bottom: 20px; |
|
} |
|
</style> |
|
</head> |
|
|
|
<body> |
|
<div class="mdl-layout mdl-js-layout mdl-layout--fixed-header"> |
|
<header class="mdl-layout__header"> |
|
<div class="mdl-layout__header-row"> |
|
|
|
<span class="mdl-layout-title">Gemini Live Demo</span> |
|
</div> |
|
</header> |
|
<main class="mdl-layout__content"> |
|
<div class="page-content"> |
|
<div class="demo-content"> |
|
|
|
<div class="button-group"> |
|
<button id="startButton" |
|
class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab mdl-button--colored"> |
|
<i class="material-icons">mic</i> |
|
</button> |
|
<button id="stopButton" |
|
class="mdl-button mdl-js-button mdl-button--fab mdl-button--mini-fab"> |
|
<i class="material-icons">mic_off</i> |
|
</button> |
|
</div> |
|
|
|
|
|
<video id="videoElement" autoplay style="width: 640px; height: 480px;"></video> |
|
|
|
|
|
<canvas id="canvasElement" style="width: 640px; height: 480px;"></canvas> |
|
|
|
<div id="chatLog"></div> |
|
</div> |
|
</div> |
|
</main> |
|
</div> |
|
|
|
<script defer> |
|
const URL = "ws://localhost:9083"; |
|
const video = document.getElementById("videoElement"); |
|
const canvas = document.getElementById("canvasElement"); |
|
let context; |
|
|
|
|
|
window.addEventListener("load", () => { |
|
context = canvas.getContext("2d"); |
|
setInterval(captureImage, 3000); |
|
}); |
|
|
|
const startButton = document.getElementById('startButton'); |
|
const stopButton = document.getElementById('stopButton'); |
|
let stream = null; |
|
let currentFrameB64; |
|
let webSocket = null; |
|
let audioContext = null; |
|
let mediaRecorder = null; |
|
let processor = null; |
|
let pcmData = []; |
|
let interval = null; |
|
let initialized = false; |
|
let audioInputContext; |
|
let workletNode; |
|
|
|
|
|
|
|
async function startScreenShare() { |
|
try { |
|
stream = await navigator.mediaDevices.getDisplayMedia({ |
|
video: { |
|
width: { max: 640 }, |
|
height: { max: 480 }, |
|
}, |
|
}); |
|
|
|
video.srcObject = stream; |
|
await new Promise(resolve => { |
|
video.onloadedmetadata = () => { |
|
console.log("video loaded metadata"); |
|
resolve(); |
|
} |
|
}); |
|
|
|
} catch (err) { |
|
console.error("Error accessing the screen: ", err); |
|
} |
|
} |
|
|
|
|
|
|
|
function captureImage() { |
|
if (stream && video.videoWidth > 0 && video.videoHeight > 0 && context) { |
|
canvas.width = 640; |
|
canvas.height = 480; |
|
context.drawImage(video, 0, 0, canvas.width, canvas.height); |
|
const imageData = canvas.toDataURL("image/jpeg").split(",")[1].trim(); |
|
currentFrameB64 = imageData; |
|
} |
|
else { |
|
console.log("no stream or video metadata not loaded"); |
|
} |
|
} |
|
|
|
|
|
|
|
window.addEventListener("load", async () => { |
|
await startScreenShare(); |
|
|
|
|
|
|
|
await initializeAudioContext(); |
|
|
|
connect(); |
|
}); |
|
|
|
function connect() { |
|
console.log("connecting: ", URL); |
|
|
|
webSocket = new WebSocket(URL); |
|
|
|
webSocket.onclose = (event) => { |
|
console.log("websocket closed: ", event); |
|
alert("Connection closed"); |
|
}; |
|
|
|
webSocket.onerror = (event) => { |
|
console.log("websocket error: ", event); |
|
}; |
|
|
|
webSocket.onopen = (event) => { |
|
console.log("websocket open: ", event); |
|
sendInitialSetupMessage(); |
|
}; |
|
|
|
webSocket.onmessage = receiveMessage; |
|
} |
|
|
|
function sendInitialSetupMessage() { |
|
|
|
console.log("sending setup message"); |
|
setup_client_message = { |
|
setup: { |
|
generation_config: { response_modalities: ["AUDIO"] }, |
|
}, |
|
}; |
|
|
|
webSocket.send(JSON.stringify(setup_client_message)); |
|
} |
|
|
|
|
|
function sendVoiceMessage(b64PCM) { |
|
if (webSocket == null) { |
|
console.log("websocket not initialized"); |
|
return; |
|
} |
|
|
|
payload = { |
|
realtime_input: { |
|
media_chunks: [{ |
|
mime_type: "audio/pcm", |
|
data: b64PCM, |
|
}, |
|
{ |
|
mime_type: "image/jpeg", |
|
data: currentFrameB64, |
|
}, |
|
], |
|
}, |
|
}; |
|
|
|
webSocket.send(JSON.stringify(payload)); |
|
console.log("sent: ", payload); |
|
} |
|
|
|
function receiveMessage(event) { |
|
const messageData = JSON.parse(event.data); |
|
const response = new Response(messageData); |
|
|
|
if (response.text) { |
|
displayMessage("GEMINI: " + response.text); |
|
} |
|
if (response.audioData) { |
|
injestAudioChuckToPlay(response.audioData); |
|
} |
|
} |
|
|
|
|
|
async function initializeAudioContext() { |
|
if (initialized) return; |
|
|
|
audioInputContext = new (window.AudioContext || |
|
window.webkitAudioContext)({ |
|
sampleRate: 24000 |
|
}); |
|
await audioInputContext.audioWorklet.addModule("pcm-processor.js"); |
|
workletNode = new AudioWorkletNode(audioInputContext, "pcm-processor"); |
|
workletNode.connect(audioInputContext.destination); |
|
initialized = true; |
|
} |
|
|
|
|
|
function base64ToArrayBuffer(base64) { |
|
const binaryString = window.atob(base64); |
|
const bytes = new Uint8Array(binaryString.length); |
|
for (let i = 0; i < binaryString.length; i++) { |
|
bytes[i] = binaryString.charCodeAt(i); |
|
} |
|
return bytes.buffer; |
|
} |
|
|
|
function convertPCM16LEToFloat32(pcmData) { |
|
const inputArray = new Int16Array(pcmData); |
|
const float32Array = new Float32Array(inputArray.length); |
|
|
|
for (let i = 0; i < inputArray.length; i++) { |
|
float32Array[i] = inputArray[i] / 32768; |
|
} |
|
|
|
return float32Array; |
|
} |
|
|
|
|
|
async function injestAudioChuckToPlay(base64AudioChunk) { |
|
try { |
|
if (audioInputContext.state === "suspended") { |
|
await audioInputContext.resume(); |
|
} |
|
const arrayBuffer = base64ToArrayBuffer(base64AudioChunk); |
|
const float32Data = convertPCM16LEToFloat32(arrayBuffer); |
|
|
|
workletNode.port.postMessage(float32Data); |
|
} catch (error) { |
|
console.error("Error processing audio chunk:", error); |
|
} |
|
} |
|
|
|
|
|
function recordChunk() { |
|
const buffer = new ArrayBuffer(pcmData.length * 2); |
|
const view = new DataView(buffer); |
|
pcmData.forEach((value, index) => { |
|
view.setInt16(index * 2, value, true); |
|
}); |
|
|
|
const base64 = btoa( |
|
String.fromCharCode.apply(null, new Uint8Array(buffer)) |
|
); |
|
|
|
sendVoiceMessage(base64); |
|
pcmData = []; |
|
} |
|
|
|
async function startAudioInput() { |
|
audioContext = new AudioContext({ |
|
sampleRate: 16000, |
|
}); |
|
|
|
const stream = await navigator.mediaDevices.getUserMedia({ |
|
audio: { |
|
channelCount: 1, |
|
sampleRate: 16000, |
|
}, |
|
}); |
|
|
|
const source = audioContext.createMediaStreamSource(stream); |
|
processor = audioContext.createScriptProcessor(4096, 1, 1); |
|
|
|
processor.onaudioprocess = (e) => { |
|
const inputData = e.inputBuffer.getChannelData(0); |
|
const pcm16 = new Int16Array(inputData.length); |
|
for (let i = 0; i < inputData.length; i++) { |
|
pcm16[i] = inputData[i] * 0x7fff; |
|
} |
|
pcmData.push(...pcm16); |
|
}; |
|
|
|
source.connect(processor); |
|
processor.connect(audioContext.destination); |
|
|
|
interval = setInterval(recordChunk, 3000); |
|
} |
|
|
|
function stopAudioInput() { |
|
if (processor) { |
|
processor.disconnect(); |
|
} |
|
if (audioContext) { |
|
audioContext.close(); |
|
} |
|
|
|
clearInterval(interval); |
|
} |
|
|
|
function displayMessage(message) { |
|
console.log(message); |
|
addParagraphToDiv("chatLog", message); |
|
} |
|
|
|
|
|
function addParagraphToDiv(divId, text) { |
|
const newParagraph = document.createElement("p"); |
|
newParagraph.textContent = text; |
|
const div = document.getElementById(divId); |
|
div.appendChild(newParagraph); |
|
} |
|
|
|
startButton.addEventListener('click', startAudioInput); |
|
stopButton.addEventListener('click', stopAudioInput); |
|
|
|
|
|
class Response { |
|
constructor(data) { |
|
this.text = null; |
|
this.audioData = null; |
|
this.endOfTurn = null; |
|
|
|
if (data.text) { |
|
this.text = data.text |
|
} |
|
|
|
if (data.audio) { |
|
this.audioData = data.audio; |
|
} |
|
} |
|
} |
|
</script> |
|
|
|
</body> |
|
|
|
</html> |