Spaces:
Running
Running
import { Podcast, PodcastTurn } from './types'; | |
import { | |
addNoise, | |
addSilence, | |
generateAudio, | |
joinAudio, | |
loadWavAndDecode, | |
} from './utils'; | |
// taken from https://freesound.org/people/artxmp1/sounds/660540 | |
import openingSoundSrc from '../opening-sound.wav'; | |
export interface GenerationStep { | |
turn: PodcastTurn; | |
audioBuffer?: AudioBuffer; | |
} | |
export interface PodcastGenerationOptions { | |
podcast: Podcast; | |
speaker1: string; | |
speaker2: string; | |
speed: number; | |
isAddIntroMusic: boolean; | |
isAddNoise: boolean; | |
} | |
export const pipelineGeneratePodcast = async ( | |
{ | |
podcast, | |
speaker1, | |
speaker2, | |
speed, | |
isAddIntroMusic, | |
isAddNoise, | |
}: PodcastGenerationOptions, | |
onUpdate: (done: number, total: number) => void | |
) => { | |
let outputWav: AudioBuffer; | |
const { speakerNames, turns } = podcast; | |
for (const turn of turns) { | |
// normalize the gap, make it not too long or too short | |
turn.nextGapMilisecs = | |
Math.max(-600, Math.min(300, turn.nextGapMilisecs)) - 100; | |
// normalize text input for TTS | |
turn.text = turn.text | |
.trim() | |
.replace(/’/g, "'") | |
.replace(/“/g, '"') | |
.replace(/”/g, '"'); | |
} | |
const steps: GenerationStep[] = turns.map((turn) => ({ turn })); | |
onUpdate(0, steps.length); | |
// generate audio for each step (aka each turn) | |
for (let i = 0; i < steps.length; i++) { | |
const step = steps[i]; | |
const speakerIdx = speakerNames.indexOf(step.turn.speakerName as string) as | |
| 1 | |
| 0; | |
const speakerVoice = speakerIdx === 0 ? speaker1 : speaker2; | |
const url = await generateAudio(step.turn.text, speakerVoice, speed); | |
step.audioBuffer = await loadWavAndDecode(url); | |
if (i === 0) { | |
outputWav = step.audioBuffer; | |
if (isAddIntroMusic) { | |
// add intro music at the beginning to make it feels like radio station | |
const openingSound = await loadWavAndDecode(openingSoundSrc); | |
outputWav = joinAudio(openingSound, outputWav!, -2000); | |
} else { | |
// if there is no intro music, add a little silence at the beginning | |
outputWav = addSilence(outputWav!, true, 200); | |
} | |
} else { | |
const lastStep = steps[i - 1]; | |
outputWav = joinAudio( | |
outputWav!, | |
step.audioBuffer, | |
lastStep.turn.nextGapMilisecs | |
); | |
} | |
onUpdate(i + 1, steps.length); | |
} | |
if (isAddNoise) { | |
// small nits: adding small background noise to the whole audio make it sound more natural | |
outputWav = addNoise(outputWav!, 0.002); | |
} | |
// @ts-expect-error this is fine | |
if (!outputWav) { | |
throw new Error('outputWav is undefined'); | |
} | |
return outputWav; | |
}; | |