gemini-live-p5 / src /lib /audio-recorder.ts
Trudy's picture
init p5
ec50620
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { audioContext } from "./utils";
import AudioRecordingWorklet from "./worklets/audio-processing";
import SafariAudioRecordingWorklet from "./worklets/safari-audio-processing";
import VolMeterWorket from "./worklets/vol-meter";
import { createWorketFromSrc } from "./audioworklet-registry";
import EventEmitter from "eventemitter3";
function arrayBufferToBase64(buffer: ArrayBuffer) {
var binary = "";
var bytes = new Uint8Array(buffer);
var len = bytes.byteLength;
for (var i = 0; i < len; i++) {
binary += String.fromCharCode(bytes[i]);
}
return window.btoa(binary);
}
// Add Safari-specific audio context creation
async function createSafariAudioContext(sampleRate: number): Promise<AudioContext> {
console.log('Creating Safari audio context with options:', { sampleRate });
// Safari requires webkit prefix
const AudioContextClass = (window as any).webkitAudioContext || window.AudioContext;
console.log('Using AudioContext class:', AudioContextClass.name);
const ctx = new AudioContextClass({
sampleRate,
latencyHint: 'interactive'
});
console.log('Safari AudioContext initial state:', {
state: ctx.state,
sampleRate: ctx.sampleRate,
baseLatency: ctx.baseLatency,
destination: ctx.destination,
});
// Safari requires user interaction to start audio context
if (ctx.state === 'suspended') {
console.log('Attempting to resume suspended Safari audio context...');
try {
await ctx.resume();
console.log('Successfully resumed Safari audio context:', ctx.state);
} catch (err) {
console.error('Failed to resume Safari audio context:', err);
throw err;
}
}
return ctx;
}
export class AudioRecorder extends EventEmitter {
stream: MediaStream | undefined;
audioContext: AudioContext | undefined;
source: MediaStreamAudioSourceNode | undefined;
recording: boolean = false;
recordingWorklet: AudioWorkletNode | undefined;
vuWorklet: AudioWorkletNode | undefined;
private starting: Promise<void> | null = null;
// Add browser detection
isSafari: boolean;
isIOS: boolean;
constructor(public sampleRate = 16000) {
super();
this.isSafari = /^((?!chrome|android).)*safari/i.test(navigator.userAgent);
this.isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) && !(window as any).MSStream;
console.log('AudioRecorder initialized:', {
isSafari: this.isSafari,
isIOS: this.isIOS,
sampleRate: this.sampleRate,
userAgent: navigator.userAgent,
webAudioSupport: !!(window.AudioContext || (window as any).webkitAudioContext),
mediaDevicesSupport: !!navigator.mediaDevices
});
}
async start() {
if (!navigator.mediaDevices?.getUserMedia) {
console.error('MediaDevices API not available:', {
mediaDevices: !!navigator.mediaDevices,
getUserMedia: !!(navigator.mediaDevices && navigator.mediaDevices.getUserMedia)
});
throw new Error("Could not request user media");
}
console.log('Starting AudioRecorder with full environment info:', {
userAgent: navigator.userAgent,
platform: navigator.platform,
vendor: navigator.vendor,
audioWorkletSupport: !!(window.AudioWorklet),
sampleRate: this.sampleRate,
existingAudioContext: !!this.audioContext,
existingStream: !!this.stream,
isSafari: this.isSafari
});
this.starting = new Promise(async (resolve, reject) => {
try {
if (this.isSafari) {
// Safari implementation
console.log('Safari detected - using Safari-specific audio initialization');
// 1. First get audio permissions
console.log('Requesting audio permissions first for Safari...');
const constraints = {
audio: {
echoCancellation: false,
noiseSuppression: false,
autoGainControl: false,
sampleRate: this.sampleRate,
channelCount: 1
}
};
console.log('Safari audio constraints:', constraints);
try {
this.stream = await navigator.mediaDevices.getUserMedia(constraints);
const track = this.stream.getAudioTracks()[0];
console.log('Safari audio permissions granted:', {
track: track.label,
settings: track.getSettings(),
constraints: track.getConstraints(),
enabled: track.enabled,
muted: track.muted,
readyState: track.readyState
});
} catch (err) {
console.error('Failed to get Safari audio permissions:', err);
throw err;
}
// 2. Create and initialize audio context
try {
this.audioContext = await createSafariAudioContext(this.sampleRate);
console.log('Safari audio context ready:', {
state: this.audioContext.state,
currentTime: this.audioContext.currentTime
});
} catch (err) {
console.error('Failed to initialize Safari audio context:', err);
throw err;
}
// 3. Create and connect audio source
try {
console.log('Creating Safari audio source...');
this.source = this.audioContext.createMediaStreamSource(this.stream);
console.log('Safari audio source created successfully:', {
numberOfInputs: this.source.numberOfInputs,
numberOfOutputs: this.source.numberOfOutputs,
channelCount: this.source.channelCount
});
} catch (err) {
console.error('Failed to create Safari audio source:', err);
throw err;
}
// 4. Load and create worklet
try {
const workletName = "audio-recorder-worklet";
console.log('Loading Safari audio worklet...');
const src = createWorketFromSrc(workletName, SafariAudioRecordingWorklet);
await this.audioContext.audioWorklet.addModule(src);
console.log('Safari audio worklet module loaded');
this.recordingWorklet = new AudioWorkletNode(
this.audioContext,
workletName,
{
numberOfInputs: 1,
numberOfOutputs: 1,
channelCount: 1,
processorOptions: {
sampleRate: this.sampleRate
}
}
);
// Add detailed error handlers
this.recordingWorklet.onprocessorerror = (event) => {
console.error('Safari AudioWorklet processor error:', event);
};
this.recordingWorklet.port.onmessageerror = (event) => {
console.error('Safari AudioWorklet message error:', event);
};
// Add data handler with detailed logging
this.recordingWorklet.port.onmessage = (ev: MessageEvent) => {
const data = ev.data.data;
console.log('Safari AudioWorklet message received:', {
eventType: ev.data.event,
hasData: !!data,
dataType: data ? typeof data : null,
timestamp: Date.now()
});
if (data?.int16arrayBuffer) {
console.log('Processing Safari audio chunk:', {
byteLength: data.int16arrayBuffer.byteLength,
timestamp: Date.now()
});
const arrayBufferString = arrayBufferToBase64(data.int16arrayBuffer);
this.emit("data", arrayBufferString);
} else {
console.warn('Invalid Safari audio chunk received:', ev.data);
}
};
console.log('Safari AudioWorkletNode created successfully');
} catch (err) {
console.error('Failed to setup Safari audio worklet:', err);
throw err;
}
// 5. Connect nodes
try {
console.log('Connecting Safari audio nodes...');
this.source.connect(this.recordingWorklet);
console.log('Safari audio nodes connected successfully');
} catch (err) {
console.error('Failed to connect Safari audio nodes:', err);
throw err;
}
} else {
// Chrome/other browsers implementation
console.log('Non-Safari browser detected - using standard audio initialization');
// Get media stream first for Chrome
const constraints = {
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
sampleRate: this.sampleRate
}
};
console.log('Chrome audio constraints:', constraints);
try {
this.stream = await navigator.mediaDevices.getUserMedia(constraints);
const track = this.stream.getAudioTracks()[0];
console.log('Chrome audio permissions granted:', {
track: track.label,
settings: track.getSettings()
});
} catch (err) {
console.error('Failed to get Chrome audio permissions:', err);
throw err;
}
// Create audio context after getting stream for Chrome
try {
console.log('Creating Chrome audio context...');
this.audioContext = await audioContext({ sampleRate: this.sampleRate });
console.log('Chrome audio context created:', {
state: this.audioContext.state,
sampleRate: this.audioContext.sampleRate
});
} catch (err) {
console.error('Failed to create Chrome audio context:', err);
throw err;
}
// Create media stream source
try {
console.log('Creating Chrome audio source...');
this.source = this.audioContext.createMediaStreamSource(this.stream);
console.log('Chrome audio source created');
} catch (err) {
console.error('Failed to create Chrome audio source:', err);
throw err;
}
// Load and create standard worklet
try {
const workletName = "audio-recorder-worklet";
console.log('Loading Chrome audio worklet...');
const src = createWorketFromSrc(workletName, AudioRecordingWorklet);
await this.audioContext.audioWorklet.addModule(src);
console.log('Chrome audio worklet loaded');
this.recordingWorklet = new AudioWorkletNode(
this.audioContext,
workletName,
{
numberOfInputs: 1,
numberOfOutputs: 1,
channelCount: 1,
processorOptions: {
sampleRate: this.sampleRate
}
}
);
// Add error handlers
this.recordingWorklet.onprocessorerror = (event) => {
console.error('Chrome AudioWorklet processor error:', event);
};
this.recordingWorklet.port.onmessageerror = (event) => {
console.error('Chrome AudioWorklet message error:', event);
};
// Add data handler
this.recordingWorklet.port.onmessage = async (ev: MessageEvent) => {
const arrayBuffer = ev.data.data?.int16arrayBuffer;
if (arrayBuffer) {
const arrayBufferString = arrayBufferToBase64(arrayBuffer);
this.emit("data", arrayBufferString);
} else {
console.warn('Invalid Chrome audio chunk received:', ev.data);
}
};
console.log('Chrome AudioWorkletNode created');
} catch (err) {
console.error('Failed to setup Chrome audio worklet:', err);
throw err;
}
// Connect nodes
try {
console.log('Connecting Chrome audio nodes...');
this.source.connect(this.recordingWorklet);
console.log('Chrome audio nodes connected');
// Set up VU meter
const vuWorkletName = "vu-meter";
await this.audioContext.audioWorklet.addModule(
createWorketFromSrc(vuWorkletName, VolMeterWorket),
);
this.vuWorklet = new AudioWorkletNode(this.audioContext, vuWorkletName);
this.vuWorklet.port.onmessage = (ev: MessageEvent) => {
this.emit("volume", ev.data.volume);
};
this.source.connect(this.vuWorklet);
console.log('Chrome VU meter connected');
} catch (err) {
console.error('Failed to connect Chrome audio nodes:', err);
throw err;
}
}
this.recording = true;
console.log('Recording started successfully');
resolve();
this.starting = null;
} catch (error) {
console.error('Failed to start recording:', error);
this.stop();
reject(error);
this.starting = null;
}
});
return this.starting;
}
stop() {
console.log('Stopping audio recorder...');
// its plausible that stop would be called before start completes
// such as if the websocket immediately hangs up
const handleStop = () => {
try {
if (this.source) {
console.log('Disconnecting audio source...');
this.source.disconnect();
}
if (this.stream) {
console.log('Stopping media stream tracks...');
this.stream.getTracks().forEach(track => {
track.stop();
console.log('Stopped track:', track.label);
});
}
if (this.audioContext && this.isSafari) {
console.log('Closing Safari audio context...');
this.audioContext.close();
}
this.stream = undefined;
this.recordingWorklet = undefined;
this.vuWorklet = undefined;
console.log('Audio recorder stopped successfully');
} catch (err) {
console.error('Error while stopping audio recorder:', err);
}
};
if (this.starting) {
console.log('Stop called while starting - waiting for start to complete...');
this.starting.then(handleStop);
return;
}
handleStop();
}
}