Spaces:
Running
Running
import { ClapProject, ClapSegment, getClapAssetSourceType, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap" | |
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine" | |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS" | |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo" | |
import { ClapCompletionMode } from "../types" | |
export async function processShot({ | |
shotSegment, | |
existingClap, | |
newerClap, | |
mode | |
}: { | |
shotSegment: ClapSegment | |
existingClap: ClapProject | |
newerClap: ClapProject | |
mode: ClapCompletionMode | |
}): Promise<void> { | |
const shotSegments: ClapSegment[] = filterSegments( | |
ClapSegmentFilteringMode.START, | |
shotSegment, | |
existingClap.segments | |
) | |
const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s => | |
s.category === "dialogue" | |
) | |
let shotDialogueSegment: ClapSegment | undefined = shotDialogueSegments.at(0) | |
console.log(`[api/edit/dialogues] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`) | |
if (shotDialogueSegment && !shotDialogueSegment.assetUrl) { | |
// console.log(`[api/edit/dialogues] generating audio..`) | |
try { | |
// this generates a mp3 | |
shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({ | |
text: shotDialogueSegment.prompt, | |
audioId: getSpeechBackgroundAudioPrompt( | |
shotSegments, | |
existingClap.entityIndex, | |
["high quality", "crisp", "detailed"] | |
), | |
debug: true, | |
}) | |
shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl) | |
const { durationInMs, durationInSec, hasAudio } = await getMediaInfo(shotDialogueSegment.assetUrl) | |
if (hasAudio && durationInMs > 1000) { | |
shotDialogueSegment.assetDurationInMs = durationInMs | |
shotSegment.assetDurationInMs = durationInMs | |
// we update the duration of all the segments for this shot | |
// (it is possible that this makes the two previous lines redundant) | |
existingClap.segments.forEach(s => { | |
s.assetDurationInMs = durationInMs | |
}) | |
} | |
} catch (err) { | |
console.log(`[api/edit/dialogues] processShot: failed to generate audio: ${err}`) | |
throw err | |
} | |
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`) | |
// if it's partial, we need to manually add it | |
if (mode === "partial") { | |
newerClap.segments.push(shotDialogueSegment) | |
} | |
} else { | |
console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`) | |
} | |
} | |