Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import path from "node:path" | |
import { v4 as uuidv4 } from "uuid" | |
import tmpDir from "temp-dir" | |
import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts" | |
import { generateAudio } from "./generateAudio.mts" | |
import { generateVideo } from "./generateVideo.mts" | |
import { upscaleVideo } from "./upscaleVideo.mts" | |
import { generateVoice } from "./generateVoice.mts" | |
import { generateSeed } from "../utils/generateSeed.mts" | |
import { mergeAudio } from "./mergeAudio.mts" | |
import { addAudioToVideo } from "./addAudioToVideo.mts" | |
import { interpolateVideo } from "./interpolateVideo.mts" | |
import { postInterpolation } from "./postInterpolation.mts" | |
export const generateShot = async ({ | |
seed = 0, | |
shotId = "", | |
actorPrompt = "", | |
shotPrompt = "", | |
backgroundAudioPrompt = "", | |
foregroundAudioPrompt = "", | |
actorDialoguePrompt = "", | |
actorVoicePrompt = "", | |
duration = 2, | |
nbFrames = 24, | |
resolution = 576, | |
nbSteps = 35, | |
upscale = true, | |
interpolate = true, | |
noise = true, | |
}: { | |
seed?: number; | |
shotId?: string; | |
actorPrompt?: string; | |
shotPrompt?: string; | |
backgroundAudioPrompt?: string; | |
foregroundAudioPrompt?: string; | |
actorDialoguePrompt?: string; | |
actorVoicePrompt?: string; | |
duration?: number; // 2 seconds | |
nbFrames?: number; // 24 FPS | |
resolution?: number; // 256, 320, 512, 576, 720, 1080.. | |
nbSteps?: number; | |
upscale?: boolean; | |
interpolate?: boolean; | |
noise?: boolean; | |
}) => { | |
seed = seed || generateSeed() | |
shotId = shotId || uuidv4() | |
const shotFileName = `${shotId}.mp4` | |
console.log("generating video shot:", { | |
seed, | |
shotId, | |
actorPrompt, | |
shotPrompt, | |
backgroundAudioPrompt, | |
foregroundAudioPrompt, | |
actorDialoguePrompt, | |
actorVoicePrompt, | |
duration, | |
nbFrames, | |
resolution, | |
nbSteps, | |
upscale, | |
interpolate, | |
noise, | |
}) | |
if (actorPrompt) { | |
console.log("generating actor..") | |
const actorIdentityFileName = `actor_${Date.now()}.png` | |
// await generateActor(actorPrompt, actorIdentityFileName, seed) | |
} | |
console.log("generating base video ..") | |
let generatedVideoUrl = "" | |
// currenty the base model is incapable of generating more than 24 FPS, | |
// because otherwise the upscaler will have trouble | |
// so for now, we fix it to 24 frames | |
// const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8 | |
const nbFramesForBaseModel = 24 | |
try { | |
generatedVideoUrl = await generateVideo(shotPrompt, { | |
seed, | |
nbFrames: nbFramesForBaseModel, | |
nbSteps | |
}) | |
} catch (err) { | |
// upscaling can be finicky, if it fails we try again | |
console.log('- trying again to generate base shot..') | |
generatedVideoUrl = await generateVideo(shotPrompt, { | |
seed, | |
nbFrames: nbFramesForBaseModel, | |
nbSteps | |
}) | |
} | |
console.log("downloading video..") | |
const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName) | |
if (upscale) { | |
console.log("upscaling video..") | |
try { | |
await upscaleVideo(videoFileName, shotPrompt) | |
} catch (err) { | |
// upscaling can be finicky, if it fails we try again | |
console.log('- trying again to upscale shot..') | |
await upscaleVideo(videoFileName, shotPrompt) | |
} | |
} | |
if (interpolate) { | |
console.log("upscaling video..") | |
// ATTENTION 1: | |
// the interpolation step always create a SLOW MOTION video | |
// it means it can last a lot longer (eg. 2x, 3x, 4x.. longer) | |
// than the duration generated by the original video model | |
// ATTENTION 2: | |
// the interpolation step generates videos in 910x512! | |
// ATTENTION 3: | |
// the interpolation step parameters are currently not passed to the space, | |
// so changing those two variables below will have no effect! | |
const interpolationSteps = 3 | |
const interpolatedFramesPerSecond = 24 | |
await interpolateVideo( | |
task, | |
interpolationSteps, | |
interpolatedFramesPerSecond | |
) | |
console.log('creating slow-mo video (910x512 @ 24 FPS)') | |
// with our current interpolation settings, the 3 seconds video generated by the model | |
// become a 7 seconds video, at 24 FPS | |
// so we want to scale it back to the desired duration length | |
// also, as a last trick we want to upscale it (without AI) and add some FXs | |
console.log('performing final scaling (1280x720 @ 24 FPS)') | |
await postInterpolation(videoFileName, duration, nbFrames) | |
} | |
let backgroundAudioFileName = '' | |
if (backgroundAudioPrompt) { | |
console.log("generating background audio..") | |
backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`) | |
} | |
let foregroundAudioFileName = '' | |
if (foregroundAudioPrompt) { | |
console.log("generating foreground audio..") | |
foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`) | |
} | |
let voiceAudioFileName = '' | |
if (actorDialoguePrompt) { | |
console.log("configuring dialogue..") | |
if (actorVoicePrompt) { | |
console.log("configuring voice..") | |
// well.. that's a TODO! | |
// for now let's always use the same voice model | |
console.log('TODO this should be done in the sequence, not the prompt!') | |
voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`) | |
} | |
} | |
console.log('merging audio with video..') | |
if (backgroundAudioFileName || foregroundAudioFileName) { | |
let audioFileName = '' | |
// we have both background and foreground | |
if (backgroundAudioFileName && foregroundAudioFileName) { | |
audioFileName = await mergeAudio({ | |
input1FileName: backgroundAudioFileName, | |
input1Volume: 0.2,// 20% volume | |
input2FileName: foregroundAudioFileName, | |
input2Volume: 0.7, // 70% volume | |
}) | |
} else if (backgroundAudioFileName) { | |
audioFileName = backgroundAudioFileName | |
} else if (foregroundAudioFileName) { | |
audioFileName = foregroundAudioFileName | |
} | |
await addAudioToVideo(task, audioFileName) | |
} | |
console.log("returning result to user..") | |
const filePath = path.resolve(tmpDir, videoFileName) | |
return { | |
shotId, | |
filePath, | |
videoFileName | |
} | |
} |