jbilcke-hf HF staff commited on
Commit
9658ad9
Β·
1 Parent(s): bda5f6b
src/index.mts CHANGED
@@ -2,15 +2,29 @@ import { promises as fs } from 'fs'
2
 
3
  import express from 'express'
4
 
5
- import { callZeroscope } from './services/callZeroscope.mts'
6
  import { downloadVideo } from './services/downloadVideo.mts'
7
  import { upscaleVideo } from './services/upscaleVideo.mts'
 
 
8
 
9
  const app = express()
10
  const port = 7860
11
 
 
 
 
12
  app.post('/shot', async (req, res) => {
13
- const shotPrompt = `${req.query.shotPrompt || ''}`
 
 
 
 
 
 
 
 
 
14
  if (shotPrompt.length) {
15
  res.write(JSON.stringify({ error: true, message: 'prompt too short' }))
16
  res.end()
@@ -18,28 +32,60 @@ app.post('/shot', async (req, res) => {
18
  }
19
 
20
  // optional video URL
21
- const inputVideo = `${req.query.inputVideo || ''}`
22
 
23
  // optional audio prompt
24
- const audioPrompt = `${req.query.audioPrompt || ''}`
 
 
 
 
 
 
25
 
26
  // should we upscale or not?
27
- const upscale = `${req.query.audioPrompt || 'false'}` === 'true'
28
 
29
  // duration of the prompt, in seconds
30
- const durationStr = Number(`${req.query.audioPrompt || '3'}`)
31
  const maybeDuration = Number(durationStr)
32
- const duration = Math.min(3, Math.max(1, isNaN(maybeDuration) || isFinite(maybeDuration) ? 3 : maybeDuration))
 
 
 
 
33
 
34
  // const frames per second
35
- const fps = `${req.query.audioPrompt || 'false'}` === 'true'
36
-
37
- console.log('calling zeroscope..')
38
- const generatedVideoUrl = await callZeroscope(shotPrompt)
 
 
 
 
39
 
40
  const shotFileName = `${Date.now()}.mp4`
41
 
42
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  console.log('downloading video..')
44
  const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
45
 
 
2
 
3
  import express from 'express'
4
 
5
+ import { generateVideo } from './services/generateVideo.mts'
6
  import { downloadVideo } from './services/downloadVideo.mts'
7
  import { upscaleVideo } from './services/upscaleVideo.mts'
8
+ import { generateSeed } from './services/generateSeed.mts'
9
+ import { MakeShot } from './types.mts'
10
 
11
  const app = express()
12
  const port = 7860
13
 
14
+ app.use(express.json())
15
+
16
+
17
  app.post('/shot', async (req, res) => {
18
+ const query = req.body as MakeShot
19
+
20
+ const token = `${query.token || ''}`
21
+ if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
22
+ res.write(JSON.stringify({ error: true, message: 'access denied' }))
23
+ res.end()
24
+ return
25
+ }
26
+
27
+ const shotPrompt = `${query.shotPrompt || ''}`
28
  if (shotPrompt.length) {
29
  res.write(JSON.stringify({ error: true, message: 'prompt too short' }))
30
  res.end()
 
32
  }
33
 
34
  // optional video URL
35
+ // const inputVideo = `${req.query.inputVideo || ''}`
36
 
37
  // optional audio prompt
38
+ const audioPrompt = `${query.audioPrompt || ''}`
39
+
40
+ // optional seed
41
+ const seedStr = Number(`${query.seed || ''}`)
42
+ const maybeSeed = Number(seedStr)
43
+ const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? generateSeed() : maybeSeed
44
+
45
 
46
  // should we upscale or not?
47
+ const upscale = `${query.upscale || 'false'}` === 'true'
48
 
49
  // duration of the prompt, in seconds
50
+ const durationStr = Number(`${query.duration || ''}`)
51
  const maybeDuration = Number(durationStr)
52
+ const duration = Math.min(3, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? 3 : maybeDuration))
53
+
54
+ const stepsStr = Number(`${query.steps || ''}`)
55
+ const maybeSteps = Number(stepsStr)
56
+ const nbSteps = Math.min(60, Math.max(1, isNaN(maybeSteps) || !isFinite(maybeSteps) ? 35 : maybeSteps))
57
 
58
  // const frames per second
59
+ const fpsStr = Number(`${query.fps || ''}`)
60
+ const maybeFps = Number(fpsStr)
61
+ const fps = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? 24 : maybeFps))
62
+
63
+ const resolutionStr = Number(`${query.resolution || ''}`)
64
+ const maybeResolution = Number(resolutionStr)
65
+ const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? 576 : maybeResolution))
66
+
67
 
68
  const shotFileName = `${Date.now()}.mp4`
69
 
70
+ console.log('generating video with the following params:', {
71
+ shotPrompt,
72
+ audioPrompt,
73
+ resolution,
74
+ duration,
75
+ nbSteps,
76
+ fps,
77
+ seed,
78
+ upscale,
79
+ shotFileName
80
+ })
81
+ console.log('generating base video ..')
82
+ const generatedVideoUrl = await generateVideo(shotPrompt, {
83
+ seed,
84
+ nbFrames: 24, // if we try more eg 48 frames, this will crash the upscaler (not enough memory)
85
+ nbSteps
86
+ })
87
+
88
+
89
  console.log('downloading video..')
90
  const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
91
 
src/services/{callZeroscope.mts β†’ generateAudio.mts} RENAMED
@@ -1,19 +1,24 @@
1
  import { client } from '@gradio/client'
2
 
3
- import { getRandomInt } from "./generateSeed.mts"
4
 
5
- const videoSpaceApiUrl = process.env.VS_VIDEO_SPACE_API_URL
 
 
6
 
7
- export const callZeroscope = async (prompt: string, options?: {
8
  seed: number;
9
  nbFrames: number;
10
  nbSteps: number;
11
  }) => {
12
- const seed = options?.seed || getRandomInt()
13
  const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
14
  const nbSteps = options?.nbSteps || 35
15
 
16
- const api = await client(videoSpaceApiUrl)
 
 
 
17
 
18
  const rawResponse = await api.predict('/run', [
19
  prompt, // string in 'Prompt' Textbox component
@@ -24,5 +29,5 @@ export const callZeroscope = async (prompt: string, options?: {
24
 
25
  const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
26
 
27
- return `${videoSpaceApiUrl}/file=${name}`
28
  }
 
1
  import { client } from '@gradio/client'
2
 
3
+ import { generateSeed } from "./generateSeed.mts"
4
 
5
+ const instances: string[] = [
6
+ process.env.VS_AUDIO_GENERATION_SPACE_API_URL
7
+ ]
8
 
9
+ export const generateAudio = async (prompt: string, options?: {
10
  seed: number;
11
  nbFrames: number;
12
  nbSteps: number;
13
  }) => {
14
+ const seed = options?.seed || generateSeed()
15
  const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
16
  const nbSteps = options?.nbSteps || 35
17
 
18
+ const instance = instances.shift()
19
+ instances.push(instance)
20
+
21
+ const api = await client(instance)
22
 
23
  const rawResponse = await api.predict('/run', [
24
  prompt, // string in 'Prompt' Textbox component
 
29
 
30
  const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
31
 
32
+ return `${instance}/file=${name}`
33
  }
src/services/generateSeed.mts CHANGED
@@ -1,3 +1,3 @@
1
- export function getRandomInt() {
2
  return Math.floor(Math.random() * Math.pow(2, 31));
3
  }
 
1
+ export function generateSeed() {
2
  return Math.floor(Math.random() * Math.pow(2, 31));
3
  }
src/services/generateVideo.mts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from '@gradio/client'
2
+
3
+ import { generateSeed } from "./generateSeed.mts"
4
+
5
+ const instances: string[] = [
6
+ process.env.VS_VIDEO_GENERATION_SPACE_API_URL
7
+ ]
8
+
9
+ export const generateVideo = async (prompt: string, options?: {
10
+ seed: number;
11
+ nbFrames: number;
12
+ nbSteps: number;
13
+ }) => {
14
+ const seed = options?.seed || generateSeed()
15
+ const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
16
+ const nbSteps = options?.nbSteps || 35
17
+
18
+ const instance = instances.shift()
19
+ instances.push(instance)
20
+
21
+ const api = await client(instance)
22
+
23
+ const rawResponse = await api.predict('/run', [
24
+ prompt, // string in 'Prompt' Textbox component
25
+ seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
26
+ nbFrames, // 24 // it is the nb of frames per seconds I think?
27
+ nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
28
+ ]) as any
29
+
30
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
31
+
32
+ return `${instance}/file=${name}`
33
+ }
src/services/interpolateVideo.mts CHANGED
@@ -9,7 +9,7 @@ import tmpDir from "temp-dir"
9
  import { downloadVideo } from './downloadVideo.mts'
10
 
11
  const instances: string[] = [
12
- process.env.VS_INTERPOLATION_SPACE_URL
13
  ]
14
 
15
  export const interpolateVideo = async (fileName: string) => {
@@ -19,13 +19,13 @@ export const interpolateVideo = async (fileName: string) => {
19
  const instance = instances.shift()
20
  instances.push(instance)
21
 
22
- const app = await client(instance)
23
 
24
  const video = await fs.readFile(inputFilePath)
25
 
26
  const blob = new Blob([video], { type: 'video/mp4' })
27
  // const blob = blobFrom(filePath)
28
- const result = await app.predict(1, [
29
  blob, // blob in 'parameter_5' Video component
30
  1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
31
  24, // string in 'FPS output' Radio component
 
9
  import { downloadVideo } from './downloadVideo.mts'
10
 
11
  const instances: string[] = [
12
+ process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
13
  ]
14
 
15
  export const interpolateVideo = async (fileName: string) => {
 
19
  const instance = instances.shift()
20
  instances.push(instance)
21
 
22
+ const api = await client(instance)
23
 
24
  const video = await fs.readFile(inputFilePath)
25
 
26
  const blob = new Blob([video], { type: 'video/mp4' })
27
  // const blob = blobFrom(filePath)
28
+ const result = await api.predict(1, [
29
  blob, // blob in 'parameter_5' Video component
30
  1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
31
  24, // string in 'FPS output' Radio component
src/services/upscaleVideo.mts CHANGED
@@ -5,19 +5,23 @@ import tmpDir from 'temp-dir'
5
  import puppeteer from 'puppeteer'
6
  import { downloadVideo } from './downloadVideo.mts'
7
 
 
 
 
 
8
  // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
9
  export async function upscaleVideo(fileName: string, prompt: string) {
10
-
 
 
11
  const browser = await puppeteer.launch({
12
  // headless: true,
13
  protocolTimeout: 800000,
14
  })
15
 
16
- const spaceUrl = process.env.VS_UPSCALE_SPACE_API_URL
17
-
18
  const page = await browser.newPage()
19
 
20
- await page.goto(spaceUrl, {
21
  waitUntil: 'networkidle2',
22
  })
23
 
 
5
  import puppeteer from 'puppeteer'
6
  import { downloadVideo } from './downloadVideo.mts'
7
 
8
+ const instances: string[] = [
9
+ process.env.VS_VIDEO_UPSCALE_SPACE_API_URL
10
+ ]
11
+
12
  // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
13
  export async function upscaleVideo(fileName: string, prompt: string) {
14
+ const instance = instances.shift()
15
+ instances.push(instance)
16
+
17
  const browser = await puppeteer.launch({
18
  // headless: true,
19
  protocolTimeout: 800000,
20
  })
21
 
 
 
22
  const page = await browser.newPage()
23
 
24
+ await page.goto(instance, {
25
  waitUntil: 'networkidle2',
26
  })
27
 
src/test.mts ADDED
File without changes
src/types.mts CHANGED
@@ -23,4 +23,22 @@ export interface Database {
23
  version: number
24
  startAtShotId: string
25
  sequences: Sequence[]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
 
23
  version: number
24
  startAtShotId: string
25
  sequences: Sequence[]
26
+ }
27
+
28
+
29
+ export interface MakeShot {
30
+ token: string
31
+ shotPrompt: string
32
+ // inputVideo?: string
33
+
34
+ audioPrompt?: string
35
+ seed?: number
36
+ upscale?: boolean
37
+
38
+ duration?: number
39
+ steps?: number
40
+
41
+ fps?: number // 8, 12, 24, 30, 60
42
+
43
+ resolution?: number // 256, 512, 576, 720, 1080
44
  }