Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

App Files Files Community

VideoChain-API / src /production /generateShot.mts

jbilcke-hf HF Staff

ok! now, time to debug and build the frontend..

5dfc565 almost 2 years ago

raw

history blame

6.19 kB

	import path from "node:path"

	import { v4 as uuidv4 } from "uuid"
	import tmpDir from "temp-dir"

	import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
	import { generateAudio } from "./generateAudio.mts"
	import { generateVideo } from "./generateVideo.mts"
	import { upscaleVideo } from "./upscaleVideo.mts"
	import { generateVoice } from "./generateVoice.mts"
	import { generateSeed } from "../utils/generateSeed.mts"
	import { mergeAudio } from "./mergeAudio.mts"
	import { addAudioToVideo } from "./addAudioToVideo.mts"
	import { interpolateVideo } from "./interpolateVideo.mts"
	import { postInterpolation } from "./postInterpolation.mts"


	export const generateShot = async ({
	seed = 0,
	shotId = "",
	actorPrompt = "",
	shotPrompt = "",
	backgroundAudioPrompt = "",
	foregroundAudioPrompt = "",
	actorDialoguePrompt = "",
	actorVoicePrompt = "",
	duration = 2,
	nbFrames = 24,
	resolution = 576,
	nbSteps = 35,
	upscale = true,
	interpolate = true,
	noise = true,
	}: {
	seed?: number;
	shotId?: string;
	actorPrompt?: string;
	shotPrompt?: string;
	backgroundAudioPrompt?: string;
	foregroundAudioPrompt?: string;
	actorDialoguePrompt?: string;
	actorVoicePrompt?: string;
	duration?: number; // 2 seconds
	nbFrames?: number; // 24 FPS
	resolution?: number; // 256, 320, 512, 576, 720, 1080..
	nbSteps?: number;
	upscale?: boolean;
	interpolate?: boolean;
	noise?: boolean;
	}) => {
	seed = seed \|\| generateSeed()
	shotId = shotId \|\| uuidv4()

	const shotFileName = `${shotId}.mp4`

	console.log("generating video shot:", {
	seed,
	shotId,
	actorPrompt,
	shotPrompt,
	backgroundAudioPrompt,
	foregroundAudioPrompt,
	actorDialoguePrompt,
	actorVoicePrompt,
	duration,
	nbFrames,
	resolution,
	nbSteps,
	upscale,
	interpolate,
	noise,
	})


	if (actorPrompt) {
	console.log("generating actor..")
	const actorIdentityFileName = `actor_${Date.now()}.png`
	// await generateActor(actorPrompt, actorIdentityFileName, seed)
	}

	console.log("generating base video ..")
	let generatedVideoUrl = ""

	// currenty the base model is incapable of generating more than 24 FPS,
	// because otherwise the upscaler will have trouble

	// so for now, we fix it to 24 frames
	// const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
	const nbFramesForBaseModel = 24

	try {
	generatedVideoUrl = await generateVideo(shotPrompt, {
	seed,
	nbFrames: nbFramesForBaseModel,
	nbSteps
	})
	} catch (err) {
	// upscaling can be finicky, if it fails we try again
	console.log('- trying again to generate base shot..')
	generatedVideoUrl = await generateVideo(shotPrompt, {
	seed,
	nbFrames: nbFramesForBaseModel,
	nbSteps
	})
	}

	console.log("downloading video..")

	const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName)

	if (upscale) {
	console.log("upscaling video..")
	try {
	await upscaleVideo(videoFileName, shotPrompt)
	} catch (err) {
	// upscaling can be finicky, if it fails we try again
	console.log('- trying again to upscale shot..')
	await upscaleVideo(videoFileName, shotPrompt)
	}
	}

	if (interpolate) {
	console.log("upscaling video..")
	// ATTENTION 1:
	// the interpolation step always create a SLOW MOTION video
	// it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
	// than the duration generated by the original video model

	// ATTENTION 2:
	// the interpolation step generates videos in 910x512!

	// ATTENTION 3:
	// the interpolation step parameters are currently not passed to the space,
	// so changing those two variables below will have no effect!
	const interpolationSteps = 3
	const interpolatedFramesPerSecond = 24
	await interpolateVideo(
	task,
	interpolationSteps,
	interpolatedFramesPerSecond
	)
	console.log('creating slow-mo video (910x512 @ 24 FPS)')

	// with our current interpolation settings, the 3 seconds video generated by the model
	// become a 7 seconds video, at 24 FPS

	// so we want to scale it back to the desired duration length
	// also, as a last trick we want to upscale it (without AI) and add some FXs
	console.log('performing final scaling (1280x720 @ 24 FPS)')
	await postInterpolation(videoFileName, duration, nbFrames)
	}

	let backgroundAudioFileName = ''
	if (backgroundAudioPrompt) {
	console.log("generating background audio..")
	backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
	}

	let foregroundAudioFileName = ''
	if (foregroundAudioPrompt) {
	console.log("generating foreground audio..")
	foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
	}


	let voiceAudioFileName = ''
	if (actorDialoguePrompt) {
	console.log("configuring dialogue..")
	if (actorVoicePrompt) {
	console.log("configuring voice..")
	// well.. that's a TODO!
	// for now let's always use the same voice model

	console.log('TODO this should be done in the sequence, not the prompt!')
	voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
	}
	}

	console.log('merging audio with video..')
	if (backgroundAudioFileName \|\| foregroundAudioFileName) {
	let audioFileName = ''

	// we have both background and foreground
	if (backgroundAudioFileName && foregroundAudioFileName) {
	audioFileName = await mergeAudio({
	input1FileName: backgroundAudioFileName,
	input1Volume: 0.2,// 20% volume
	input2FileName: foregroundAudioFileName,
	input2Volume: 0.7, // 70% volume
	})
	} else if (backgroundAudioFileName) {
	audioFileName = backgroundAudioFileName
	} else if (foregroundAudioFileName) {
	audioFileName = foregroundAudioFileName
	}

	await addAudioToVideo(task, audioFileName)
	}

	console.log("returning result to user..")

	const filePath = path.resolve(tmpDir, videoFileName)

	return {
	shotId,
	filePath,
	videoFileName
	}
	}