// @ts-expect-error this package does not have typing import TextLineStream from 'textlinestream'; import { Client } from '@gradio/client'; import * as lamejs from '@breezystack/lamejs'; // ponyfill for missing ReadableStream asyncIterator on Safari import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator'; import { CONFIG } from '../config'; import { uploadFiles } from '@huggingface/hub'; export const isDev: boolean = import.meta.env.MODE === 'development'; export const testToken: string = import.meta.env.VITE_TEST_TOKEN; export const isBlogMode: boolean = !!window.location.href.match(/blogmode/); export const delay = (ms: number) => new Promise((res) => setTimeout(res, ms)); // return URL to the WAV file export const generateAudio = async ( content: string, voice: string, speed: number = 1.1 ): Promise => { const maxRetries = 3; for (let i = 0; i < maxRetries; i++) { try { const client = await Client.connect(CONFIG.ttsSpaceId); const result = await client.predict('/tts', { text: content, voice, speed, }); console.log(result.data); return (result.data as any)[0].url; } catch (e) { if (i === maxRetries - 1) { throw e; // last retry, throw error } console.error('Failed to generate audio, retrying...', e); } continue; } return ''; // should never reach here }; export const pickRand = (arr: T[]): T => { return arr[Math.floor(Math.random() * arr.length)]; }; // wrapper for SSE export async function* getSSEStreamAsync(fetchResponse: Response) { if (!fetchResponse.body) throw new Error('Response body is empty'); const lines: ReadableStream = fetchResponse.body .pipeThrough(new TextDecoderStream()) .pipeThrough(new TextLineStream()); // @ts-expect-error asyncIterator complains about type, but it should work for await (const line of asyncIterator(lines)) { //if (isDev) console.log({ line }); if (line.startsWith('data:') && !line.endsWith('[DONE]')) { const data = JSON.parse(line.slice(5)); yield data; } else if (line.startsWith('error:')) { const data = JSON.parse(line.slice(6)); throw new Error(data.message || 'Unknown error'); } } } export const uploadFileToHub = async ( buf: ArrayBuffer, filename: string, repoId: string, hfToken: string ) => { await uploadFiles({ accessToken: hfToken, repo: repoId, files: [ { path: filename, content: new Blob([buf], { type: 'audio/wav' }), }, ], }); }; /** * Ok now, most of the functions below are written by ChatGPT using Reasoning mode. */ //////////////////////////////////////// // Audio manipulation utils export const trimSilence = (audioBuffer: AudioBuffer): AudioBuffer => { const threshold = 0.01; // Amplitude below which a sample is considered silent. const numChannels = audioBuffer.numberOfChannels; const totalSamples = audioBuffer.length; // Helper function to check if a sample at the given index is silent in all channels. const isSilent = (index: number): boolean => { for (let channel = 0; channel < numChannels; channel++) { const channelData = audioBuffer.getChannelData(channel); if (Math.abs(channelData[index]) > threshold) { return false; } } return true; }; // Find the first non-silent sample. let startSample = 0; while (startSample < totalSamples && isSilent(startSample)) { startSample++; } // Find the last non-silent sample. let endSample = totalSamples - 1; while (endSample >= startSample && isSilent(endSample)) { endSample--; } // If no non-silent samples were found, return an empty AudioBuffer. if (startSample >= totalSamples || endSample < startSample) { return new AudioBuffer({ length: 1, numberOfChannels: numChannels, sampleRate: audioBuffer.sampleRate, }); } const newLength = endSample - startSample + 1; const newBuffer = new AudioBuffer({ length: newLength, numberOfChannels: numChannels, sampleRate: audioBuffer.sampleRate, }); // Copy the trimmed audio samples from the original buffer to the new buffer. for (let channel = 0; channel < numChannels; channel++) { const oldData = audioBuffer.getChannelData(channel); const newData = newBuffer.getChannelData(channel); for (let i = 0; i < newLength; i++) { newData[i] = oldData[startSample + i]; } } return newBuffer; }; export const joinAudio = ( audio1: AudioBuffer, audio2: AudioBuffer, gapMilisecs: number, overlap: 'none' | 'cross-fade' = 'none' ): AudioBuffer => { const sampleRate = audio1.sampleRate; const numChannels = audio1.numberOfChannels; // Ensure both audio buffers are compatible. if (audio2.sampleRate !== sampleRate) { throw new Error('Audio buffers must have the same sample rate'); } if (audio2.numberOfChannels !== numChannels) { throw new Error('Audio buffers must have the same number of channels'); } const gapSeconds = gapMilisecs / 1000; let newLength: number; if (gapSeconds > 0) { // Pad with silence: gapSamples of silence in between. const gapSamples = Math.round(gapSeconds * sampleRate); newLength = audio1.length + gapSamples + audio2.length; } else if (gapSeconds === 0) { // Simply join one after the other. newLength = audio1.length + audio2.length; } else { // gapSeconds < 0 means we blend (overlap) the end of audio1 with the beginning of audio2. const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); // Ensure we don't overlap more than available in either buffer. const effectiveOverlap = Math.min( overlapSamplesRequested, audio1.length, audio2.length ); newLength = audio1.length + audio2.length - effectiveOverlap; } // Create a new AudioBuffer for the joined result. const newBuffer = new AudioBuffer({ length: newLength, numberOfChannels: numChannels, sampleRate: sampleRate, }); // Process each channel. for (let channel = 0; channel < numChannels; channel++) { const outputData = newBuffer.getChannelData(channel); const data1 = audio1.getChannelData(channel); const data2 = audio2.getChannelData(channel); let offset = 0; if (gapSeconds < 0) { // Blend the join section. const overlapSamplesRequested = Math.round(-gapSeconds * sampleRate); const effectiveOverlap = Math.min( overlapSamplesRequested, audio1.length, audio2.length ); // Copy audio1 data up to the start of the overlapping section. const nonOverlapLength = audio1.length - effectiveOverlap; outputData.set(data1.subarray(0, nonOverlapLength), offset); offset += nonOverlapLength; // Blend overlapping region. if (overlap === 'cross-fade') { for (let i = 0; i < effectiveOverlap; i++) { // Linear crossfade: const fadeOut = 1 - i / effectiveOverlap; const fadeIn = i / effectiveOverlap; outputData[offset + i] = data1[nonOverlapLength + i] * fadeOut + data2[i] * fadeIn; } } else { for (let i = 0; i < effectiveOverlap; i++) { outputData[offset + i] = data1[nonOverlapLength + i] + data2[i]; } } offset += effectiveOverlap; // Append remaining audio2 data. outputData.set(data2.subarray(effectiveOverlap), offset); } else if (gapSeconds === 0) { // Directly concatenate: copy audio1 then audio2. outputData.set(data1, offset); offset += audio1.length; outputData.set(data2, offset); } else { // gapSeconds > 0: insert silence between audio1 and audio2. const gapSamples = Math.round(gapSeconds * sampleRate); outputData.set(data1, offset); offset += audio1.length; // Silence: the buffer is initialized with zeros, so we simply move the offset. offset += gapSamples; outputData.set(data2, offset); } } return newBuffer; }; export const addNoise = ( audioBuffer: AudioBuffer, magnitude: number ): AudioBuffer => { const { numberOfChannels, sampleRate, length } = audioBuffer; const newBuffer = new AudioBuffer({ length, numberOfChannels, sampleRate, }); for (let channel = 0; channel < numberOfChannels; channel++) { const inputData = audioBuffer.getChannelData(channel); const outputData = newBuffer.getChannelData(channel); for (let i = 0; i < length; i++) { // Generate white noise in the range [-magnitude, +magnitude]. const noise = (Math.random() * 2 - 1) * magnitude; outputData[i] = inputData[i] + noise; } } return newBuffer; }; export const addSilence = ( audioBuffer: AudioBuffer, toBeginning: boolean, durationMilisecs: number ): AudioBuffer => { // Convert duration from milliseconds to samples. const sampleRate = audioBuffer.sampleRate; const silenceSamples = Math.round((durationMilisecs / 1000) * sampleRate); const numChannels = audioBuffer.numberOfChannels; const originalLength = audioBuffer.length; const newLength = originalLength + silenceSamples; // Create a new AudioBuffer with extra space for the silence. const newBuffer = new AudioBuffer({ length: newLength, numberOfChannels: numChannels, sampleRate: sampleRate, }); // Process each channel: copy original audio into the correct position. for (let channel = 0; channel < numChannels; channel++) { const originalData = audioBuffer.getChannelData(channel); const newData = newBuffer.getChannelData(channel); if (toBeginning) { // Leave the first `silenceSamples` as zeros, then copy the original data. newData.set(originalData, silenceSamples); } else { // Copy the original data first; the remaining samples are already zeros. newData.set(originalData, 0); } } return newBuffer; }; //////////////////////////////////////// // Audio formatting utils export const loadWavAndDecode = async (url: string): Promise => { const response = await fetch(url); const arrayBuffer = await response.arrayBuffer(); // @ts-expect-error this is fine const AudioContext = window.AudioContext || window.webkitAudioContext; if (!AudioContext) { throw new Error('AudioContext is not supported on this browser'); } const audioCtx = new AudioContext(); let audioBuffer = await audioCtx.decodeAudioData(arrayBuffer); // force mono if (audioBuffer.numberOfChannels > 1) { const monoBuffer = new AudioContext().createBuffer( 1, audioBuffer.length, audioBuffer.sampleRate ); const monoData = monoBuffer.getChannelData(0); for (let i = 0; i < audioBuffer.length; i++) { let sum = 0; for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) { sum += audioBuffer.getChannelData(channel)[i]; } monoData[i] = sum / audioBuffer.numberOfChannels; } audioBuffer = monoBuffer; } return audioBuffer; }; export function audioBufferToWav( buffer: AudioBuffer, options: { float32?: boolean } = {} ): ArrayBuffer { const numChannels = buffer.numberOfChannels; const sampleRate = buffer.sampleRate; const format = options.float32 ? 3 : 1; // 3 = IEEE float, 1 = PCM const bitDepth = options.float32 ? 32 : 16; const numSamples = buffer.length; const headerLength = 44; const bytesPerSample = bitDepth / 8; const dataLength = numSamples * numChannels * bytesPerSample; const bufferLength = headerLength + dataLength; const arrayBuffer = new ArrayBuffer(bufferLength); const view = new DataView(arrayBuffer); let offset = 0; function writeString(str: string) { for (let i = 0; i < str.length; i++) { view.setUint8(offset, str.charCodeAt(i)); offset++; } } // Write WAV header writeString('RIFF'); view.setUint32(offset, 36 + dataLength, true); offset += 4; writeString('WAVE'); writeString('fmt '); view.setUint32(offset, 16, true); offset += 4; view.setUint16(offset, format, true); offset += 2; view.setUint16(offset, numChannels, true); offset += 2; view.setUint32(offset, sampleRate, true); offset += 4; view.setUint32(offset, sampleRate * numChannels * bytesPerSample, true); offset += 4; view.setUint16(offset, numChannels * bytesPerSample, true); offset += 2; view.setUint16(offset, bitDepth, true); offset += 2; writeString('data'); view.setUint32(offset, dataLength, true); offset += 4; // Write PCM samples: interleave channels const channels: Float32Array[] = []; for (let i = 0; i < numChannels; i++) { channels.push(buffer.getChannelData(i)); } for (let i = 0; i < numSamples; i++) { for (let channel = 0; channel < numChannels; channel++) { let sample = channels[channel][i]; // Clamp the sample to [-1, 1] sample = Math.max(-1, Math.min(1, sample)); if (options.float32) { view.setFloat32(offset, sample, true); offset += 4; } else { // Convert to 16-bit PCM sample const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7fff; view.setInt16(offset, intSample, true); offset += 2; } } } return arrayBuffer; } export const blobFromAudioBuffer = (audioBuffer: AudioBuffer): Blob => { // Using 16-bit PCM for compatibility. const wavArrayBuffer = audioBufferToWav(audioBuffer, { float32: false }); return new Blob([wavArrayBuffer], { type: 'audio/wav' }); }; export function audioBufferToMp3(buffer: AudioBuffer): ArrayBuffer { const numChannels = buffer.numberOfChannels; const sampleRate = buffer.sampleRate; const bitRate = 128; // kbps - adjust as desired // Initialize MP3 encoder. // Note: If more than 2 channels are present, only the first 2 channels will be used. const mp3encoder = new lamejs.Mp3Encoder( numChannels >= 2 ? 2 : 1, sampleRate, bitRate ); const samples = buffer.length; const chunkSize = 1152; // Frame size for MP3 encoding // Prepare channel data. const channels: Float32Array[] = []; for (let ch = 0; ch < numChannels; ch++) { channels.push(buffer.getChannelData(ch)); } const mp3Data: Uint8Array[] = []; // For mono audio, encode directly. if (numChannels === 1) { for (let i = 0; i < samples; i += chunkSize) { const sampleChunk = channels[0].subarray(i, i + chunkSize); const int16Buffer = floatTo16BitPCM(sampleChunk); const mp3buf = mp3encoder.encodeBuffer(int16Buffer); if (mp3buf.length > 0) { mp3Data.push(new Uint8Array(mp3buf)); } } } else { // For stereo (or more channels, use first two channels). const left = channels[0]; const right = channels[1]; for (let i = 0; i < samples; i += chunkSize) { const leftChunk = left.subarray(i, i + chunkSize); const rightChunk = right.subarray(i, i + chunkSize); const leftInt16 = floatTo16BitPCM(leftChunk); const rightInt16 = floatTo16BitPCM(rightChunk); const mp3buf = mp3encoder.encodeBuffer(leftInt16, rightInt16); if (mp3buf.length > 0) { mp3Data.push(new Uint8Array(mp3buf)); } } } // Flush the encoder to get any remaining MP3 data. const endBuf = mp3encoder.flush(); if (endBuf.length > 0) { mp3Data.push(new Uint8Array(endBuf)); } // Concatenate all MP3 chunks into a single ArrayBuffer. const totalLength = mp3Data.reduce((acc, curr) => acc + curr.length, 0); const result = new Uint8Array(totalLength); let offset = 0; for (const chunk of mp3Data) { result.set(chunk, offset); offset += chunk.length; } return result.buffer; } /** * Helper function that converts a Float32Array of PCM samples (range -1..1) * into an Int16Array (range -32768..32767). */ function floatTo16BitPCM(input: Float32Array): Int16Array { const output = new Int16Array(input.length); for (let i = 0; i < input.length; i++) { const s = Math.max(-1, Math.min(1, input[i])); output[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } return output; } // clean up filename for saving export const cleanupFilename = (name: string): string => { return name.replace(/[^a-zA-Z0-9-_]/g, '_'); };