Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

File size: 6,189 Bytes

import path from "node:path"

import { v4 as uuidv4 } from "uuid"
import tmpDir from "temp-dir"

import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
import { generateAudio } from "./generateAudio.mts"
import { generateVideo } from "./generateVideo.mts"
import { upscaleVideo } from "./upscaleVideo.mts"
import { generateVoice } from "./generateVoice.mts"
import { generateSeed } from "../utils/generateSeed.mts"
import { mergeAudio } from "./mergeAudio.mts"
import { addAudioToVideo } from "./addAudioToVideo.mts"
import { interpolateVideo } from "./interpolateVideo.mts"
import { postInterpolation } from "./postInterpolation.mts"


export const generateShot = async ({
  seed = 0,
  shotId = "",
  actorPrompt = "",
  shotPrompt = "",
  backgroundAudioPrompt = "",
  foregroundAudioPrompt = "",
  actorDialoguePrompt = "",
  actorVoicePrompt = "",
  duration = 2,
  nbFrames = 24,
  resolution = 576,
  nbSteps = 35,
  upscale = true,
  interpolate = true,
  noise = true,
}: {
  seed?: number;
  shotId?: string;
  actorPrompt?: string;
  shotPrompt?: string;
  backgroundAudioPrompt?: string;
  foregroundAudioPrompt?: string;
  actorDialoguePrompt?: string;
  actorVoicePrompt?: string;
  duration?: number; // 2 seconds
  nbFrames?: number; // 24 FPS
  resolution?: number; // 256, 320, 512, 576, 720, 1080..
  nbSteps?: number;
  upscale?: boolean;
  interpolate?: boolean;
  noise?: boolean;
}) => {
  seed = seed || generateSeed()
  shotId = shotId || uuidv4()

  const shotFileName = `${shotId}.mp4`

  console.log("generating video shot:", {
    seed,
    shotId,
    actorPrompt,
    shotPrompt,
    backgroundAudioPrompt,
    foregroundAudioPrompt,
    actorDialoguePrompt,
    actorVoicePrompt,
    duration,
    nbFrames,
    resolution,
    nbSteps,
    upscale,
    interpolate,
    noise,
  })


  if (actorPrompt) {
    console.log("generating actor..")
    const actorIdentityFileName = `actor_${Date.now()}.png`
    // await generateActor(actorPrompt, actorIdentityFileName, seed)
  }

  console.log("generating base video ..")
  let generatedVideoUrl = ""

  // currenty the base model is incapable of generating more than 24 FPS,
  // because otherwise the upscaler will have trouble

  // so for now, we fix it to 24 frames
  // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
  const nbFramesForBaseModel = 24

  try {
    generatedVideoUrl = await generateVideo(shotPrompt, {
      seed,
      nbFrames: nbFramesForBaseModel,
      nbSteps
    })
  } catch (err) {
    // upscaling can be finicky, if it fails we try again
    console.log('- trying again to generate base shot..')
    generatedVideoUrl = await generateVideo(shotPrompt, {
      seed,
      nbFrames: nbFramesForBaseModel,
      nbSteps
    })
  }

  console.log("downloading video..")

  const videoFileName = await downloadFileToTmp(generatedVideoUrl, shotFileName)

  if (upscale) {
    console.log("upscaling video..")
    try {
      await upscaleVideo(videoFileName, shotPrompt)
    } catch (err) {
      // upscaling can be finicky, if it fails we try again
      console.log('- trying again to upscale shot..')
      await upscaleVideo(videoFileName, shotPrompt)
    }
  }

  if (interpolate) {
    console.log("upscaling video..")
    // ATTENTION 1:
    // the interpolation step always create a SLOW MOTION video
    // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
    // than the duration generated by the original video model

    // ATTENTION 2:
    // the interpolation step generates videos in 910x512!

    // ATTENTION 3:
    // the interpolation step parameters are currently not passed to the space,
    // so changing those two variables below will have no effect!
    const interpolationSteps = 3
    const interpolatedFramesPerSecond = 24
    await interpolateVideo(
      task,
      interpolationSteps,
      interpolatedFramesPerSecond
    )
    console.log('creating slow-mo video (910x512 @ 24 FPS)')

    // with our current interpolation settings, the 3 seconds video generated by the model
    // become a 7 seconds video, at 24 FPS
  
    // so we want to scale it back to the desired duration length
    // also, as a last trick we want to upscale it (without AI) and add some FXs
    console.log('performing final scaling (1280x720 @ 24 FPS)')
    await postInterpolation(videoFileName, duration, nbFrames)
  }
  
  let backgroundAudioFileName = ''
  if (backgroundAudioPrompt) {
    console.log("generating background audio..")
    backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
  }

  let foregroundAudioFileName = ''
  if (foregroundAudioPrompt) {
    console.log("generating foreground audio..")
    foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
  }


  let voiceAudioFileName = ''
  if (actorDialoguePrompt) {
    console.log("configuring dialogue..")
    if (actorVoicePrompt) {
      console.log("configuring voice..")
      // well.. that's a TODO!
      // for now let's always use the same voice model

      console.log('TODO this should be done in the sequence, not the prompt!')
      voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
    }
  }

  console.log('merging audio with video..')
  if (backgroundAudioFileName || foregroundAudioFileName) {
    let audioFileName = ''

    // we have both background and foreground
    if (backgroundAudioFileName && foregroundAudioFileName) {
      audioFileName = await mergeAudio({
        input1FileName: backgroundAudioFileName, 
        input1Volume: 0.2,// 20% volume
        input2FileName: foregroundAudioFileName,
        input2Volume: 0.7, // 70% volume
      })
    } else if (backgroundAudioFileName) {
      audioFileName = backgroundAudioFileName
    } else if (foregroundAudioFileName) {
      audioFileName = foregroundAudioFileName
    }

    await addAudioToVideo(task, audioFileName)
  }

  console.log("returning result to user..")

  const filePath = path.resolve(tmpDir, videoFileName)

  return {
    shotId,
    filePath,
    videoFileName
  }
}