import path from "node:path" import { v4 as uuidv4 } from "uuid" import tmpDir from "temp-dir" import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts" import { generateAudio } from "./generateAudio.mts" import { generateVideo } from "./generateVideo.mts" import { upscaleVideo } from "./upscaleVideo.mts" import { generateVoice } from "./generateVoice.mts" import { generateSeed } from "../utils/generateSeed.mts" import { mergeAudio } from "./mergeAudio.mts" import { addAudioToVideo } from "./addAudioToVideo.mts" import { interpolateVideo } from "./interpolateVideo.mts" import { postInterpolation } from "./postInterpolation.mts" export const generateShot = async ({ seed = 0, shotId = "", actorPrompt = "", shotPrompt = "", backgroundAudioPrompt = "", foregroundAudioPrompt = "", actorDialoguePrompt = "", actorVoicePrompt = "", duration = 2, nbFrames = 24, resolution = 576, nbSteps = 35, upscale = true, interpolate = true, noise = true, }: { seed?: number; shotId?: string; actorPrompt?: string; shotPrompt?: string; backgroundAudioPrompt?: string; foregroundAudioPrompt?: string; actorDialoguePrompt?: string; actorVoicePrompt?: string; duration?: number; // 2 seconds nbFrames?: number; // 24 FPS resolution?: number; // 256, 320, 512, 576, 720, 1080.. nbSteps?: number; upscale?: boolean; interpolate?: boolean; noise?: boolean; }) => { seed = seed || generateSeed() shotId = shotId || uuidv4() const shotFileName = `${shotId}.mp4` console.log("generating video shot:", { seed, shotId, actorPrompt, shotPrompt, backgroundAudioPrompt, foregroundAudioPrompt, actorDialoguePrompt, actorVoicePrompt, duration, nbFrames, resolution, nbSteps, upscale, interpolate, noise, }) if (actorPrompt) { console.log("generating actor..") const actorIdentityFileName = `actor_${Date.now()}.png` // await generateActor(actorPrompt, actorIdentityFileName, seed) } console.log("generating base video ..") let generatedVideoUrl = "" // currenty the base model is incapable of generating more than 24 FPS, // because otherwise the upscaler will have trouble // so for now, we fix it to 24 frames // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8 const nbFramesForBaseModel = 24 try { generatedVideoUrl = await generateVideo(shotPrompt, { seed, nbFrames: nbFramesForBaseModel, nbSteps }) } catch (err) { // upscaling can be finicky, if it fails we try again console.log('- trying again to generate base shot..') generatedVideoUrl = await generateVideo(shotPrompt, { seed, nbFrames: nbFramesForBaseModel, nbSteps }) } console.log("downloading video..") const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName) if (upscale) { console.log("upscaling video..") try { await upscaleVideo(videoFileName, shotPrompt) } catch (err) { // upscaling can be finicky, if it fails we try again console.log('- trying again to upscale shot..') await upscaleVideo(videoFileName, shotPrompt) } } if (interpolate) { console.log("upscaling video..") // ATTENTION 1: // the interpolation step always create a SLOW MOTION video // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer) // than the duration generated by the original video model // ATTENTION 2: // the interpolation step generates videos in 910x512! // ATTENTION 3: // the interpolation step parameters are currently not passed to the space, // so changing those two variables below will have no effect! const interpolationSteps = 3 const interpolatedFramesPerSecond = 24 await interpolateVideo( task, interpolationSteps, interpolatedFramesPerSecond ) console.log('creating slow-mo video (910x512 @ 24 FPS)') // with our current interpolation settings, the 3 seconds video generated by the model // become a 7 seconds video, at 24 FPS // so we want to scale it back to the desired duration length // also, as a last trick we want to upscale it (without AI) and add some FXs console.log('performing final scaling (1280x720 @ 24 FPS)') await postInterpolation(videoFileName, duration, nbFrames) } let backgroundAudioFileName = '' if (backgroundAudioPrompt) { console.log("generating background audio..") backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`) } let foregroundAudioFileName = '' if (foregroundAudioPrompt) { console.log("generating foreground audio..") foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`) } let voiceAudioFileName = '' if (actorDialoguePrompt) { console.log("configuring dialogue..") if (actorVoicePrompt) { console.log("configuring voice..") // well.. that's a TODO! // for now let's always use the same voice model console.log('TODO this should be done in the sequence, not the prompt!') voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`) } } console.log('merging audio with video..') if (backgroundAudioFileName || foregroundAudioFileName) { let audioFileName = '' // we have both background and foreground if (backgroundAudioFileName && foregroundAudioFileName) { audioFileName = await mergeAudio({ input1FileName: backgroundAudioFileName, input1Volume: 0.2,// 20% volume input2FileName: foregroundAudioFileName, input2Volume: 0.7, // 70% volume }) } else if (backgroundAudioFileName) { audioFileName = backgroundAudioFileName } else if (foregroundAudioFileName) { audioFileName = foregroundAudioFileName } await addAudioToVideo(task, audioFileName) } console.log("returning result to user..") const filePath = path.resolve(tmpDir, videoFileName) return { shotId, filePath, videoFileName } }