Spaces:

jbilcke-hf
/

VideoChain-API

Running on CPU Upgrade

App Files Files Community

jbilcke-hf HF Staff commited on Jul 10, 2023

Commit

9658ad9

1 Parent(s): bda5f6b

wip

Browse files

Files changed (8) hide show

src/index.mts +58 -12
src/services/{callZeroscope.mts → generateAudio.mts} +11 -6
src/services/generateSeed.mts +1 -1
src/services/generateVideo.mts +33 -0
src/services/interpolateVideo.mts +3 -3
src/services/upscaleVideo.mts +8 -4
src/test.mts +0 -0
src/types.mts +18 -0

src/index.mts CHANGED Viewed

@@ -2,15 +2,29 @@ import { promises as fs } from 'fs'
 import express from 'express'
-import { callZeroscope } from './services/callZeroscope.mts'
 import { downloadVideo } from './services/downloadVideo.mts'
 import { upscaleVideo } from './services/upscaleVideo.mts'
 const app = express()
 const port = 7860
 app.post('/shot', async (req, res) => {
-  const shotPrompt = `${req.query.shotPrompt || ''}`
   if (shotPrompt.length) {
     res.write(JSON.stringify({ error: true, message: 'prompt too short' }))
     res.end()
@@ -18,28 +32,60 @@ app.post('/shot', async (req, res) => {
   }
   // optional video URL
-  const inputVideo = `${req.query.inputVideo || ''}`
   // optional audio prompt
-  const audioPrompt = `${req.query.audioPrompt || ''}`
   // should we upscale or not?
-  const upscale = `${req.query.audioPrompt || 'false'}` === 'true'
   // duration of the prompt, in seconds
-  const durationStr = Number(`${req.query.audioPrompt || '3'}`)
   const maybeDuration = Number(durationStr)
-  const duration = Math.min(3, Math.max(1, isNaN(maybeDuration) || isFinite(maybeDuration) ? 3 : maybeDuration))
   // const frames per second
-  const fps = `${req.query.audioPrompt || 'false'}` === 'true'
-  console.log('calling zeroscope..')
-  const generatedVideoUrl = await callZeroscope(shotPrompt)
   const shotFileName = `${Date.now()}.mp4`
   console.log('downloading video..')
   const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)

 import express from 'express'
+import { generateVideo } from './services/generateVideo.mts'
 import { downloadVideo } from './services/downloadVideo.mts'
 import { upscaleVideo } from './services/upscaleVideo.mts'
+import { generateSeed } from './services/generateSeed.mts'
+import { MakeShot } from './types.mts'
 const app = express()
 const port = 7860
+app.use(express.json())
 app.post('/shot', async (req, res) => {
+  const query = req.body as MakeShot
+  const token = `${query.token || ''}`
+  if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
+    res.write(JSON.stringify({ error: true, message: 'access denied' }))
+    res.end()
+    return
+  }
+  const shotPrompt = `${query.shotPrompt || ''}`
   if (shotPrompt.length) {
     res.write(JSON.stringify({ error: true, message: 'prompt too short' }))
     res.end()
   }
   // optional video URL
+  // const inputVideo = `${req.query.inputVideo || ''}`
   // optional audio prompt
+  const audioPrompt = `${query.audioPrompt || ''}`
+    // optional seed
+    const seedStr = Number(`${query.seed || ''}`)
+    const maybeSeed = Number(seedStr)
+    const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? generateSeed() : maybeSeed
   // should we upscale or not?
+  const upscale = `${query.upscale || 'false'}` === 'true'
   // duration of the prompt, in seconds
+  const durationStr = Number(`${query.duration || ''}`)
   const maybeDuration = Number(durationStr)
+  const duration = Math.min(3, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? 3 : maybeDuration))
+  const stepsStr = Number(`${query.steps || ''}`)
+  const maybeSteps = Number(stepsStr)
+  const nbSteps = Math.min(60, Math.max(1, isNaN(maybeSteps) || !isFinite(maybeSteps) ? 35 : maybeSteps))
   // const frames per second
+  const fpsStr = Number(`${query.fps || ''}`)
+  const maybeFps = Number(fpsStr)
+  const fps = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? 24 : maybeFps))
+  const resolutionStr = Number(`${query.resolution || ''}`)
+  const maybeResolution = Number(resolutionStr)
+  const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? 576 : maybeResolution))
   const shotFileName = `${Date.now()}.mp4`
+  console.log('generating video with the following params:', {
+    shotPrompt,
+    audioPrompt,
+    resolution,
+    duration,
+    nbSteps,
+    fps,
+    seed,
+    upscale,
+    shotFileName
+  })
+  console.log('generating base video ..')
+  const generatedVideoUrl = await generateVideo(shotPrompt, {
+    seed,
+    nbFrames: 24, // if we try more eg 48 frames, this will crash the upscaler (not enough memory)
+    nbSteps
+  })
   console.log('downloading video..')
   const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)

src/services/{callZeroscope.mts → generateAudio.mts} RENAMED Viewed

@@ -1,19 +1,24 @@
 import { client } from '@gradio/client'
-import { getRandomInt } from "./generateSeed.mts"
-const videoSpaceApiUrl = process.env.VS_VIDEO_SPACE_API_URL
-export const callZeroscope = async (prompt: string, options?: {
   seed: number;
   nbFrames: number;
   nbSteps: number;
 }) => {
-  const seed = options?.seed || getRandomInt()
   const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
   const nbSteps = options?.nbSteps || 35
-  const api = await client(videoSpaceApiUrl)
   const rawResponse = await api.predict('/run', [
     prompt, // string  in 'Prompt' Textbox component
@@ -24,5 +29,5 @@ export const callZeroscope = async (prompt: string, options?: {
   const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
-  return `${videoSpaceApiUrl}/file=${name}`
 }

 import { client } from '@gradio/client'
+import { generateSeed } from "./generateSeed.mts"
+const instances: string[] = [
+  process.env.VS_AUDIO_GENERATION_SPACE_API_URL
+]
+export const generateAudio = async (prompt: string, options?: {
   seed: number;
   nbFrames: number;
   nbSteps: number;
 }) => {
+  const seed = options?.seed || generateSeed()
   const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
   const nbSteps = options?.nbSteps || 35
+  const instance = instances.shift()
+  instances.push(instance)
+  const api = await client(instance)
   const rawResponse = await api.predict('/run', [
     prompt, // string  in 'Prompt' Textbox component
   const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
+  return `${instance}/file=${name}`
 }

src/services/generateSeed.mts CHANGED Viewed

@@ -1,3 +1,3 @@
-export function getRandomInt() {
   return Math.floor(Math.random() * Math.pow(2, 31));
 }

+export function generateSeed() {
   return Math.floor(Math.random() * Math.pow(2, 31));
 }

src/services/generateVideo.mts ADDED Viewed

	@@ -0,0 +1,33 @@

+import { client } from '@gradio/client'
+import { generateSeed } from "./generateSeed.mts"
+const instances: string[] = [
+  process.env.VS_VIDEO_GENERATION_SPACE_API_URL
+]
+export const generateVideo = async (prompt: string, options?: {
+  seed: number;
+  nbFrames: number;
+  nbSteps: number;
+}) => {
+  const seed = options?.seed || generateSeed()
+  const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
+  const nbSteps = options?.nbSteps || 35
+  const instance = instances.shift()
+  instances.push(instance)
+  const api = await client(instance)
+  const rawResponse = await api.predict('/run', [
+    prompt, // string  in 'Prompt' Textbox component
+    seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
+    nbFrames, // 24 // it is the nb of frames per seconds I think?
+    nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
+  ]) as any
+  const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
+  return `${instance}/file=${name}`
+}

src/services/interpolateVideo.mts CHANGED Viewed

@@ -9,7 +9,7 @@ import tmpDir from "temp-dir"
 import { downloadVideo } from './downloadVideo.mts'
 const instances: string[] = [
-  process.env.VS_INTERPOLATION_SPACE_URL
 ]
 export const interpolateVideo = async (fileName: string) => {
@@ -19,13 +19,13 @@ export const interpolateVideo = async (fileName: string) => {
   const instance = instances.shift()
   instances.push(instance)
-  const app = await client(instance)
   const video = await fs.readFile(inputFilePath)
   const blob = new Blob([video], { type: 'video/mp4' })
   // const blob = blobFrom(filePath)
-  const result = await app.predict(1, [
     blob, 	// blob in 'parameter_5' Video component
     1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
     24, // string  in 'FPS output' Radio component

 import { downloadVideo } from './downloadVideo.mts'
 const instances: string[] = [
+  process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
 ]
 export const interpolateVideo = async (fileName: string) => {
   const instance = instances.shift()
   instances.push(instance)
+  const api = await client(instance)
   const video = await fs.readFile(inputFilePath)
   const blob = new Blob([video], { type: 'video/mp4' })
   // const blob = blobFrom(filePath)
+  const result = await api.predict(1, [
     blob, 	// blob in 'parameter_5' Video component
     1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
     24, // string  in 'FPS output' Radio component

src/services/upscaleVideo.mts CHANGED Viewed

@@ -5,19 +5,23 @@ import tmpDir from 'temp-dir'
 import puppeteer from 'puppeteer'
 import { downloadVideo } from './downloadVideo.mts'
 // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
 export async function upscaleVideo(fileName: string, prompt: string) {
   const browser = await puppeteer.launch({
     // headless: true,
     protocolTimeout: 800000,
   })
-  const spaceUrl = process.env.VS_UPSCALE_SPACE_API_URL
   const page = await browser.newPage()
-  await page.goto(spaceUrl, {
     waitUntil: 'networkidle2',
   })

 import puppeteer from 'puppeteer'
 import { downloadVideo } from './downloadVideo.mts'
+const instances: string[] = [
+  process.env.VS_VIDEO_UPSCALE_SPACE_API_URL
+]
 // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
 export async function upscaleVideo(fileName: string, prompt: string) {
+  const instance = instances.shift()
+  instances.push(instance)
   const browser = await puppeteer.launch({
     // headless: true,
     protocolTimeout: 800000,
   })
   const page = await browser.newPage()
+  await page.goto(instance, {
     waitUntil: 'networkidle2',
   })

src/test.mts ADDED Viewed

File without changes

src/types.mts CHANGED Viewed

@@ -23,4 +23,22 @@ export interface Database {
   version: number
   startAtShotId: string
   sequences: Sequence[]
 }

   version: number
   startAtShotId: string
   sequences: Sequence[]
+}
+export interface MakeShot {
+  token: string
+  shotPrompt: string
+  // inputVideo?: string
+  audioPrompt?: string
+  seed?: number
+  upscale?: boolean
+  duration?: number
+  steps?: number
+  fps?: number // 8, 12, 24, 30, 60
+  resolution?: number // 256, 512, 576, 720, 1080
 }