Spaces:

jbilcke-hf
/

ai-tube

Running

App Files Files Community

ai-tube / src /app /api /v1 /edit /dialogues /processShot.ts

jbilcke-hf HF staff

upgraded to @aitube/client 0.0.12

f24ad59 9 months ago

raw

history blame

2.8 kB


	import { ClapProject, ClapSegment, getClapAssetSourceType, filterSegments, ClapSegmentFilteringMode } from "@aitube/clap"
	import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"

	import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
	import { getMediaInfo } from "@/app/api/utils/getMediaInfo"

	import { ClapCompletionMode } from "../types"

	export async function processShot({
	shotSegment,
	existingClap,
	newerClap,
	mode
	}: {
	shotSegment: ClapSegment
	existingClap: ClapProject
	newerClap: ClapProject
	mode: ClapCompletionMode
	}): Promise<void> {

	const shotSegments: ClapSegment[] = filterSegments(
	ClapSegmentFilteringMode.START,
	shotSegment,
	existingClap.segments
	)

	const shotDialogueSegments: ClapSegment[] = shotSegments.filter(s =>
	s.category === "dialogue"
	)

	let shotDialogueSegment: ClapSegment \| undefined = shotDialogueSegments.at(0)

	console.log(`[api/edit/dialogues] processShot: shot [${shotSegment.startTimeInMs}:${shotSegment.endTimeInMs}] has ${shotSegments.length} segments (${shotDialogueSegments.length} dialogues)`)

	if (shotDialogueSegment && !shotDialogueSegment.assetUrl) {
	// console.log(`[api/edit/dialogues] generating audio..`)

	try {
	// this generates a mp3
	shotDialogueSegment.assetUrl = await generateSpeechWithParlerTTS({
	text: shotDialogueSegment.prompt,
	audioId: getSpeechBackgroundAudioPrompt(
	shotSegments,
	existingClap.entityIndex,
	["high quality", "crisp", "detailed"]
	),
	debug: true,
	})
	shotDialogueSegment.assetSourceType = getClapAssetSourceType(shotDialogueSegment.assetUrl)

	const { durationInMs, durationInSec, hasAudio } = await getMediaInfo(shotDialogueSegment.assetUrl)

	if (hasAudio && durationInMs > 1000) {
	shotDialogueSegment.assetDurationInMs = durationInMs
	shotSegment.assetDurationInMs = durationInMs

	// we update the duration of all the segments for this shot
	// (it is possible that this makes the two previous lines redundant)
	existingClap.segments.forEach(s => {
	s.assetDurationInMs = durationInMs
	})
	}

	} catch (err) {
	console.log(`[api/edit/dialogues] processShot: failed to generate audio: ${err}`)
	throw err
	}

	console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)

	// if it's partial, we need to manually add it
	if (mode === "partial") {
	newerClap.segments.push(shotDialogueSegment)
	}
	} else {
	console.log(`[api/edit/dialogues] processShot: there is already a dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
	}
	}