jbilcke-hf HF staff commited on
Commit
a65e95e
·
1 Parent(s): 29598d1

work in progress to create the video service

Browse files
Dockerfile CHANGED
@@ -30,6 +30,6 @@ RUN npm install
30
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
31
  COPY --chown=user . $HOME/app
32
 
33
- EXPOSE 7860 1935 8000
34
 
35
  CMD [ "npm", "run", "start" ]
 
30
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
31
  COPY --chown=user . $HOME/app
32
 
33
+ EXPOSE 7860
34
 
35
  CMD [ "npm", "run", "start" ]
package-lock.json CHANGED
@@ -10,6 +10,7 @@
10
  "license": "Apache License",
11
  "dependencies": {
12
  "@gradio/client": "^0.1.4",
 
13
  "@types/express": "^4.17.17",
14
  "@types/uuid": "^9.0.2",
15
  "express": "^4.18.2",
@@ -78,6 +79,14 @@
78
  "node": ">=18.0.0"
79
  }
80
  },
 
 
 
 
 
 
 
 
81
  "node_modules/@jridgewell/resolve-uri": {
82
  "version": "3.1.1",
83
  "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
 
10
  "license": "Apache License",
11
  "dependencies": {
12
  "@gradio/client": "^0.1.4",
13
+ "@huggingface/inference": "^2.6.1",
14
  "@types/express": "^4.17.17",
15
  "@types/uuid": "^9.0.2",
16
  "express": "^4.18.2",
 
79
  "node": ">=18.0.0"
80
  }
81
  },
82
+ "node_modules/@huggingface/inference": {
83
+ "version": "2.6.1",
84
+ "resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.6.1.tgz",
85
+ "integrity": "sha512-qFYchgOCPeEkZJKiSr7Kz62QwukJtgkeQCT7Q0SSKUcvHpTQVNJp6i/JrJMR4dBdzQysJ1SZDC0pLBBnnskTag==",
86
+ "engines": {
87
+ "node": ">=18"
88
+ }
89
+ },
90
  "node_modules/@jridgewell/resolve-uri": {
91
  "version": "3.1.1",
92
  "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
package.json CHANGED
@@ -6,6 +6,7 @@
6
  "scripts": {
7
  "start": "node --loader ts-node/esm src/index.mts",
8
  "test": "node --loader ts-node/esm src/test.mts",
 
9
  "docker": "npm run docker:build && npm run docker:run",
10
  "docker:build": "docker build -t ai-webtv .",
11
  "docker:run": "docker run -it -p 7860:7860 video-service"
@@ -14,6 +15,7 @@
14
  "license": "Apache License",
15
  "dependencies": {
16
  "@gradio/client": "^0.1.4",
 
17
  "@types/express": "^4.17.17",
18
  "@types/uuid": "^9.0.2",
19
  "express": "^4.18.2",
 
6
  "scripts": {
7
  "start": "node --loader ts-node/esm src/index.mts",
8
  "test": "node --loader ts-node/esm src/test.mts",
9
+ "test2": "node --loader ts-node/esm src/test2.mts",
10
  "docker": "npm run docker:build && npm run docker:run",
11
  "docker:build": "docker build -t ai-webtv .",
12
  "docker:run": "docker run -it -p 7860:7860 video-service"
 
15
  "license": "Apache License",
16
  "dependencies": {
17
  "@gradio/client": "^0.1.4",
18
+ "@huggingface/inference": "^2.6.1",
19
  "@types/express": "^4.17.17",
20
  "@types/uuid": "^9.0.2",
21
  "express": "^4.18.2",
src/data/all_words.json ADDED
The diff for this file is too large to render. See raw diff
 
src/data/good_words.json ADDED
The diff for this file is too large to render. See raw diff
 
src/index.mts CHANGED
@@ -1,63 +1,62 @@
1
- import { promises as fs } from 'fs'
2
- import path from 'node:path'
3
 
4
- import tmpDir from 'temp-dir'
5
- import express from 'express'
6
 
7
- import { generateVideo } from './services/generateVideo.mts'
8
- import { downloadVideo } from './services/downloadVideo.mts'
9
- import { upscaleVideo } from './services/upscaleVideo.mts'
10
- import { generateSeed } from './services/generateSeed.mts'
11
- import { addAudioToVideo } from './services/addAudioToVideo.mts'
12
-
13
- import { MakeShot } from './types.mts'
14
 
15
  const app = express()
16
  const port = 7860
17
 
18
  app.use(express.json())
19
 
 
20
 
21
- app.post('/shot', async (req, res) => {
22
- const query = req.body as MakeShot
23
 
24
- console.log('received query:', query)
25
- const token = `${query.token || ''}`
26
  if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
27
  console.log("couldn't find access token in the query")
28
- res.write(JSON.stringify({ error: true, message: 'access denied' }))
29
  res.end()
30
  return
31
  }
32
 
33
- const shotPrompt = `${query.shotPrompt || ''}`
34
  if (shotPrompt.length < 5) {
35
- res.write(JSON.stringify({ error: true, message: 'prompt too short (must be at least 5 in length)' }))
36
  res.end()
37
  return
38
  }
39
 
40
  // optional video URL
41
- // const inputVideo = `${req.query.inputVideo || ''}`
 
 
 
42
 
43
- // optional audio prompt
44
- const audioPrompt = `${query.audioPrompt || ''}`
45
 
46
  // optional seed
47
  const defaultSeed = generateSeed()
48
  const seedStr = Number(`${query.seed || defaultSeed}`)
49
  const maybeSeed = Number(seedStr)
50
  const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
51
-
 
 
 
 
52
 
53
- // should we upscale or not?
54
- const upscale = `${query.upscale || 'true'}` === 'true'
55
 
56
- // duration of the prompt, in seconds
57
  const defaultDuration = 3
 
58
  const durationStr = Number(`${query.duration || defaultDuration}`)
59
  const maybeDuration = Number(durationStr)
60
- const duration = Math.min(3, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? defaultDuration : maybeDuration))
61
 
62
  const defaultSteps = 35
63
  const stepsStr = Number(`${query.steps || defaultSteps}`)
@@ -68,58 +67,45 @@ app.post('/shot', async (req, res) => {
68
  const defaultFps = 24
69
  const fpsStr = Number(`${query.fps || defaultFps}`)
70
  const maybeFps = Number(fpsStr)
71
- const fps = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? defaultFps : maybeFps))
72
 
73
  const defaultResolution = 576
74
  const resolutionStr = Number(`${query.resolution || defaultResolution}`)
75
  const maybeResolution = Number(resolutionStr)
76
  const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
77
 
 
 
 
 
 
78
 
79
- const shotFileName = `${Date.now()}.mp4`
80
 
81
- console.log('generating video with the following params:', {
 
 
82
  shotPrompt,
83
- audioPrompt,
84
- resolution,
 
 
85
  duration,
 
 
86
  nbSteps,
87
- fps,
88
- seed,
89
  upscale,
90
- shotFileName
 
91
  })
92
- console.log('generating base video ..')
93
- const generatedVideoUrl = await generateVideo(shotPrompt, {
94
- seed,
95
- nbFrames: 24, // if we try more eg 48 frames, this will crash the upscaler (not enough memory)
96
- nbSteps
97
- })
98
-
99
 
100
- console.log('downloading video..')
101
- const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
102
 
103
- if (upscale) {
104
- console.log('upscaling video..')
105
- await upscaleVideo(videoFileName, shotPrompt)
106
- }
107
-
108
- // TODO call AudioLDM
109
- if (audioPrompt) {
110
- // const audioFileName = await callAudioLDM(audioPrompt)
111
- console.log('calling audio prompt')
112
-
113
- // await addAudioToVideo(videoFileName, audioFileName)
114
- }
115
-
116
- console.log('returning result to user..')
117
-
118
- const filePath = path.resolve(tmpDir, videoFileName)
119
 
120
  const buffer = await fs.readFile(filePath)
121
- res.setHeader('Content-Type', 'media/mp4')
122
- res.setHeader('Content-Length', buffer.length)
 
123
  res.end(buffer)
124
  })
125
 
 
1
+ import { promises as fs } from "fs"
 
2
 
3
+ import express from "express"
 
4
 
5
+ import { generateSeed } from "./services/generateSeed.mts"
6
+ import { Job, ShotQuery } from "./types.mts"
7
+ import { generateShot } from "./services/generateShot.mts"
 
 
 
 
8
 
9
  const app = express()
10
  const port = 7860
11
 
12
  app.use(express.json())
13
 
14
+ const queue: Job[] = []
15
 
16
+ app.post("/shot", async (req, res) => {
17
+ const query = req.body as ShotQuery
18
 
19
+ const token = `${query.token || ""}`
 
20
  if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
21
  console.log("couldn't find access token in the query")
22
+ res.write(JSON.stringify({ error: true, message: "access denied" }))
23
  res.end()
24
  return
25
  }
26
 
27
+ const shotPrompt = `${query.shotPrompt || ""}`
28
  if (shotPrompt.length < 5) {
29
+ res.write(JSON.stringify({ error: true, message: "prompt too short (must be at least 5 in length)" }))
30
  res.end()
31
  return
32
  }
33
 
34
  // optional video URL
35
+ // const inputVideo = `${req.query.inputVideo || ""}`
36
+
37
+ // optional background audio prompt
38
+ const backgroundAudioPrompt = `${query.backgroundAudioPrompt || ""}`
39
 
40
+ // optional foreground audio prompt
41
+ const foregroundAudioPrompt = `${query.foregroundAudioPrompt || ""}`
42
 
43
  // optional seed
44
  const defaultSeed = generateSeed()
45
  const seedStr = Number(`${query.seed || defaultSeed}`)
46
  const maybeSeed = Number(seedStr)
47
  const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
48
+
49
+ // in production we want those ON by default
50
+ const upscale = `${query.upscale || "true"}` === "true"
51
+ const interpolate = `${query.upscale || "true"}` === "true"
52
+ const noise = `${query.noise || "true"}` === "true"
53
 
 
 
54
 
 
55
  const defaultDuration = 3
56
+ const maxDuration = 5
57
  const durationStr = Number(`${query.duration || defaultDuration}`)
58
  const maybeDuration = Number(durationStr)
59
+ const duration = Math.min(maxDuration, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? defaultDuration : maybeDuration))
60
 
61
  const defaultSteps = 35
62
  const stepsStr = Number(`${query.steps || defaultSteps}`)
 
67
  const defaultFps = 24
68
  const fpsStr = Number(`${query.fps || defaultFps}`)
69
  const maybeFps = Number(fpsStr)
70
+ const nbFrames = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? defaultFps : maybeFps))
71
 
72
  const defaultResolution = 576
73
  const resolutionStr = Number(`${query.resolution || defaultResolution}`)
74
  const maybeResolution = Number(resolutionStr)
75
  const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
76
 
77
+ const actorPrompt = `${query.actorPrompt || ""}`
78
+
79
+ const actorVoicePrompt = `${query.actorVoicePrompt || ""}`
80
+
81
+ const actorDialoguePrompt = `${query.actorDialoguePrompt || ""}`
82
 
 
83
 
84
+ const { filePath } = await generateShot({
85
+ seed,
86
+ actorPrompt,
87
  shotPrompt,
88
+ backgroundAudioPrompt,
89
+ foregroundAudioPrompt,
90
+ actorDialoguePrompt,
91
+ actorVoicePrompt,
92
  duration,
93
+ nbFrames,
94
+ resolution,
95
  nbSteps,
 
 
96
  upscale,
97
+ interpolate,
98
+ noise,
99
  })
 
 
 
 
 
 
 
100
 
101
+ console.log(`generated video in ${filePath}`)
 
102
 
103
+ console.log("returning result to user..")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  const buffer = await fs.readFile(filePath)
106
+
107
+ res.setHeader("Content-Type", "media/mp4")
108
+ res.setHeader("Content-Length", buffer.length)
109
  res.end(buffer)
110
  })
111
 
src/services/addAudioToVideo.mts CHANGED
@@ -1,29 +1,45 @@
1
- import path from 'node:path'
2
- import { promises as fs } from 'node:fs'
3
 
4
- import tmpDir from 'temp-dir'
5
- import ffmpeg from 'fluent-ffmpeg'
6
 
7
- export const addAudioToVideo = async (videoFilePath: string, audioFilePath: string): Promise<string> => {
8
-
9
- const tempOutputFilePath = `${videoFilePath.split('.')[0]}-temp.mp4`
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
11
  await new Promise((resolve, reject) => {
12
  ffmpeg(videoFilePath)
13
  .input(audioFilePath)
14
- .outputOptions('-c:v copy') // use video copy codec
15
- .outputOptions('-c:a aac') // use audio codec
16
- .outputOptions('-map 0:v:0') // map video from 0th to 0th
17
- .outputOptions('-map 1:a:0') // map audio from 1st to 0th
18
- .outputOptions('-shortest') // finish encoding when shortest input stream ends
 
19
  .output(tempOutputFilePath)
20
- .on('end', resolve)
21
- .on('error', reject)
22
  .run()
23
  })
24
 
25
  // Now we want to replace the original video file with the new file that has been created
26
  await fs.rename(tempOutputFilePath, videoFilePath)
27
 
28
- return videoFilePath
29
  };
 
1
+ import { promises as fs } from "node:fs"
2
+ import path from "node:path"
3
 
4
+ import tmpDir from "temp-dir"
5
+ import { v4 as uuidv4 } from "uuid"
6
 
7
+ import ffmpeg from "fluent-ffmpeg"
8
+
9
+ export const addAudioToVideo = async (
10
+ videoFileName: string,
11
+ audioFileName: string,
12
+
13
+ /*
14
+ * 0.0: mute the audio completely
15
+ * 0.5: set the audio to 50% of original volume (half volume)
16
+ * 1.0: maintain the audio at original volume (100% of original volume)
17
+ * 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
18
+ */
19
+ volume: number = 1.0
20
+ ): Promise<string> => {
21
 
22
+ const tempOutputFilePath = `${uuidv4()}.mp4`
23
+ const videoFilePath = path.resolve(tmpDir, videoFileName)
24
+ const audioFilePath = path.resolve(tmpDir, audioFileName)
25
+
26
  await new Promise((resolve, reject) => {
27
  ffmpeg(videoFilePath)
28
  .input(audioFilePath)
29
+ .audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
30
+ .outputOptions("-c:v copy") // use video copy codec
31
+ .outputOptions("-c:a aac") // use audio codec
32
+ .outputOptions("-map 0:v:0") // map video from 0th to 0th
33
+ .outputOptions("-map 1:a:0") // map audio from 1st to 0th
34
+ .outputOptions("-shortest") // finish encoding when shortest input stream ends
35
  .output(tempOutputFilePath)
36
+ .on("end", resolve)
37
+ .on("error", reject)
38
  .run()
39
  })
40
 
41
  // Now we want to replace the original video file with the new file that has been created
42
  await fs.rename(tempOutputFilePath, videoFilePath)
43
 
44
+ return videoFileName
45
  };
src/services/generateActor.mts ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { promises as fs } from "node:fs"
2
+ import path from "node:path"
3
+ import tmpDir from "temp-dir"
4
+
5
+ import { HfInference } from "@huggingface/inference"
6
+
7
+ const hf = new HfInference(process.env.VS_HF_API_TOKEN)
8
+
9
+ export const generateActor = async (prompt: string, fileName: string, seed: number) => {
10
+ const positivePrompt = [
11
+ `profile photo of ${prompt || ""}`,
12
+ "id picture",
13
+ "photoshoot",
14
+ "portrait photography",
15
+ "neutral expression",
16
+ "neutral background",
17
+ "studio photo",
18
+ "award winning",
19
+ "high resolution",
20
+ "photo realistic",
21
+ "intricate details",
22
+ "beautiful",
23
+ ]
24
+ const negativePrompt = [
25
+ "anime",
26
+ "drawing",
27
+ "painting",
28
+ "lowres",
29
+ "blurry",
30
+ "artificial"
31
+ ]
32
+
33
+ console.log(`generating actor: ${positivePrompt.join(", ")}`)
34
+
35
+ const blob = await hf.textToImage({
36
+ inputs: positivePrompt.join(", "),
37
+ model: "stabilityai/stable-diffusion-2-1",
38
+ parameters: {
39
+ negative_prompt: negativePrompt.join(", "),
40
+ // seed, no seed?
41
+ }
42
+ })
43
+
44
+ const filePath = path.resolve(tmpDir, fileName)
45
+
46
+ const buffer = Buffer.from(await blob.arrayBuffer())
47
+ await fs.writeFile(filePath, buffer, "utf8")
48
+
49
+ return filePath
50
+ }
src/services/generateAudio.mts CHANGED
@@ -1,33 +1,56 @@
1
- import { client } from '@gradio/client'
2
-
3
- import { generateSeed } from "./generateSeed.mts"
4
 
5
  const instances: string[] = [
6
  process.env.VS_AUDIO_GENERATION_SPACE_API_URL
7
  ]
8
 
9
- export const generateAudio = async (prompt: string, options?: {
10
- seed: number;
11
- nbFrames: number;
12
- nbSteps: number;
13
- }) => {
14
- const seed = options?.seed || generateSeed()
15
- const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
16
- const nbSteps = options?.nbSteps || 35
17
-
18
  const instance = instances.shift()
19
  instances.push(instance)
20
 
21
- const api = await client(instance)
22
-
23
- const rawResponse = await api.predict('/run', [
24
- prompt, // string in 'Prompt' Textbox component
25
- seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
26
- nbFrames, // 24 // it is the nb of frames per seconds I think?
27
- nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
28
- ]) as any
29
 
30
- const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- return `${instance}/file=${name}`
33
  }
 
1
+ import puppeteer from "puppeteer"
2
+ import { downloadVideo } from "./downloadVideo.mts"
 
3
 
4
  const instances: string[] = [
5
  process.env.VS_AUDIO_GENERATION_SPACE_API_URL
6
  ]
7
 
8
+ // TODO we should use an inference endpoint instead
9
+ export async function generateAudio(prompt: string, audioFileName: string) {
 
 
 
 
 
 
 
10
  const instance = instances.shift()
11
  instances.push(instance)
12
 
13
+ console.log("instance:", instance)
 
 
 
 
 
 
 
14
 
15
+ const browser = await puppeteer.launch({
16
+ headless: false,
17
+ protocolTimeout: 800000,
18
+ })
19
+
20
+ const page = await browser.newPage()
21
+
22
+ await page.goto(instance, {
23
+ waitUntil: "networkidle2",
24
+ })
25
+
26
+ await new Promise(r => setTimeout(r, 3000))
27
+
28
+ const firstTextboxInput = await page.$('input[data-testid="textbox"]')
29
+
30
+ await firstTextboxInput.type(prompt)
31
+
32
+ // console.log("looking for the button to submit")
33
+ const submitButton = await page.$("button.lg")
34
+
35
+ // console.log("clicking on the button")
36
+ await submitButton.click()
37
+
38
+ await page.waitForSelector("a[download]", {
39
+ timeout: 800000, // need to be large enough in case someone else attemps to use our space
40
+ })
41
+
42
+ const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
43
+
44
+
45
+ console.log({
46
+ audioRemoteUrl,
47
+ })
48
+
49
+
50
+ // console.log("downloading file from space..")
51
+ console.log(`- downloading ${audioFileName} from ${audioRemoteUrl}`)
52
+
53
+ await downloadVideo(audioRemoteUrl, audioFileName)
54
 
55
+ return audioFileName
56
  }
src/services/generateAudioLegacy.mts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from '@gradio/client'
2
+
3
+ import { generateSeed } from "./generateSeed.mts"
4
+
5
+ const instances: string[] = [
6
+ process.env.VS_AUDIO_GENERATION_SPACE_API_URL
7
+ ]
8
+
9
+ export const generateAudio = async (prompt: string, options?: {
10
+ seed: number;
11
+ nbFrames: number;
12
+ nbSteps: number;
13
+ }) => {
14
+ const seed = options?.seed || generateSeed()
15
+ const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
16
+ const nbSteps = options?.nbSteps || 35
17
+
18
+ const instance = instances.shift()
19
+ instances.push(instance)
20
+
21
+ const api = await client(instance)
22
+
23
+ const rawResponse = await api.predict('/run', [
24
+ prompt, // string in 'Prompt' Textbox component
25
+ seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
26
+ nbFrames, // 24 // it is the nb of frames per seconds I think?
27
+ nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
28
+ ]) as any
29
+
30
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
31
+
32
+ return `${instance}/file=${name}`
33
+ }
src/services/generateShot.mts ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+
6
+ import { downloadVideo } from "./downloadVideo.mts"
7
+ import { generateAudio } from "./generateAudio.mts"
8
+ import { generateVideo } from "./generateVideo.mts"
9
+ import { upscaleVideo } from "./upscaleVideo.mts"
10
+ import { generateVoice } from "./generateVoice.mts"
11
+ import { generateSeed } from "./generateSeed.mts"
12
+ import { mergeAudio } from "./mergeAudio.mts"
13
+ import { addAudioToVideo } from "./addAudioToVideo.mts"
14
+ import { interpolateVideo } from "./interpolateVideo.mts"
15
+ import { postInterpolation } from "./postInterpolation.mts"
16
+
17
+
18
+ export const generateShot = async ({
19
+ seed = 0,
20
+ shotId = "",
21
+ actorPrompt = "",
22
+ shotPrompt = "",
23
+ backgroundAudioPrompt = "",
24
+ foregroundAudioPrompt = "",
25
+ actorDialoguePrompt = "",
26
+ actorVoicePrompt = "",
27
+ duration = 2,
28
+ nbFrames = 24,
29
+ resolution = 576,
30
+ nbSteps = 35,
31
+ upscale = true,
32
+ interpolate = true,
33
+ noise = true,
34
+ }: {
35
+ seed?: number;
36
+ shotId?: string;
37
+ actorPrompt?: string;
38
+ shotPrompt?: string;
39
+ backgroundAudioPrompt?: string;
40
+ foregroundAudioPrompt?: string;
41
+ actorDialoguePrompt?: string;
42
+ actorVoicePrompt?: string;
43
+ duration?: number; // 2 seconds
44
+ nbFrames?: number; // 24 FPS
45
+ resolution?: number; // 256, 320, 512, 576, 720, 1080..
46
+ nbSteps?: number;
47
+ upscale?: boolean;
48
+ interpolate?: boolean;
49
+ noise?: boolean;
50
+ }) => {
51
+ seed = seed || generateSeed()
52
+ shotId = shotId || uuidv4()
53
+
54
+ const shotFileName = `${shotId}.mp4`
55
+
56
+ console.log("generating video shot:", {
57
+ seed,
58
+ shotId,
59
+ actorPrompt,
60
+ shotPrompt,
61
+ backgroundAudioPrompt,
62
+ foregroundAudioPrompt,
63
+ actorDialoguePrompt,
64
+ actorVoicePrompt,
65
+ duration,
66
+ nbFrames,
67
+ resolution,
68
+ nbSteps,
69
+ upscale,
70
+ interpolate,
71
+ noise,
72
+ })
73
+
74
+
75
+ if (actorPrompt) {
76
+ console.log("generating actor..")
77
+ const actorIdentityFileName = `actor_${Date.now()}.png`
78
+ // await generateActor(actorPrompt, actorIdentityFileName, seed)
79
+ }
80
+
81
+ console.log("generating base video ..")
82
+ let generatedVideoUrl = ""
83
+
84
+ // currenty the base model is incapable of generating more than 24 FPS,
85
+ // because otherwise the upscaler will have trouble
86
+
87
+ // so for now, we fix it to 24 frames
88
+ // const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
89
+ const nbFramesForBaseModel = 24
90
+
91
+ try {
92
+ generatedVideoUrl = await generateVideo(shotPrompt, {
93
+ seed,
94
+ nbFrames: nbFramesForBaseModel,
95
+ nbSteps
96
+ })
97
+ } catch (err) {
98
+ // upscaling can be finicky, if it fails we try again
99
+ console.log('- trying again to generate base shot..')
100
+ generatedVideoUrl = await generateVideo(shotPrompt, {
101
+ seed,
102
+ nbFrames: nbFramesForBaseModel,
103
+ nbSteps
104
+ })
105
+ }
106
+
107
+ console.log("downloading video..")
108
+
109
+ const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
110
+
111
+ if (upscale) {
112
+ console.log("upscaling video..")
113
+ try {
114
+ await upscaleVideo(videoFileName, shotPrompt)
115
+ } catch (err) {
116
+ // upscaling can be finicky, if it fails we try again
117
+ console.log('- trying again to upscale shot..')
118
+ await upscaleVideo(videoFileName, shotPrompt)
119
+ }
120
+ }
121
+
122
+ if (interpolate) {
123
+ console.log("upscaling video..")
124
+ // ATTENTION 1:
125
+ // the interpolation step always create a SLOW MOTION video
126
+ // it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
127
+ // than the duration generated by the original video model
128
+
129
+ // ATTENTION 2:
130
+ // the interpolation step generates videos in 910x512!
131
+
132
+ // ATTENTION 3:
133
+ // the interpolation step parameters are currently not passed to the space,
134
+ // so changing those two variables below will have no effect!
135
+ const interpolationSteps = 3
136
+ const interpolatedFramesPerSecond = 24
137
+ await interpolateVideo(
138
+ videoFileName,
139
+ interpolationSteps,
140
+ interpolatedFramesPerSecond
141
+ )
142
+ console.log('creating slow-mo video (910x512 @ 24 FPS)')
143
+
144
+ // with our current interpolation settings, the 3 seconds video generated by the model
145
+ // become a 7 seconds video, at 24 FPS
146
+
147
+ // so we want to scale it back to the desired duration length
148
+ // also, as a last trick we want to upscale it (without AI) and add some FXs
149
+ console.log('performing final scaling (1280x720 @ 24 FPS)')
150
+ await postInterpolation(videoFileName, duration, nbFrames)
151
+ }
152
+
153
+ let backgroundAudioFileName = ''
154
+ if (backgroundAudioPrompt) {
155
+ console.log("generating background audio..")
156
+ backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
157
+ }
158
+
159
+ let foregroundAudioFileName = ''
160
+ if (foregroundAudioPrompt) {
161
+ console.log("generating foreground audio..")
162
+ foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
163
+ }
164
+
165
+
166
+ let voiceAudioFileName = ''
167
+ if (actorDialoguePrompt) {
168
+ console.log("configuring dialogue..")
169
+ if (actorVoicePrompt) {
170
+ console.log("configuring voice..")
171
+ // well.. that's a TODO!
172
+ // for now let's always use the same voice model
173
+
174
+ console.log('TODO this should be done in the sequence, not the prompt!')
175
+ voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
176
+ }
177
+ }
178
+
179
+ console.log('merging audio with video..')
180
+ if (backgroundAudioFileName || foregroundAudioFileName) {
181
+ let audioFileName = ''
182
+
183
+ // we have both background and foreground
184
+ if (backgroundAudioFileName && foregroundAudioFileName) {
185
+ audioFileName = await mergeAudio({
186
+ input1FileName: backgroundAudioFileName,
187
+ input1Volume: 0.2,// 20% volume
188
+ input2FileName: foregroundAudioFileName,
189
+ input2Volume: 0.7, // 70% volume
190
+ })
191
+ } else if (backgroundAudioFileName) {
192
+ audioFileName = backgroundAudioFileName
193
+ } else if (foregroundAudioFileName) {
194
+ audioFileName = foregroundAudioFileName
195
+ }
196
+
197
+ await addAudioToVideo(videoFileName, audioFileName)
198
+ }
199
+
200
+ console.log("returning result to user..")
201
+
202
+ const filePath = path.resolve(tmpDir, videoFileName)
203
+
204
+ return {
205
+ shotId,
206
+ filePath,
207
+ videoFileName
208
+ }
209
+ }
src/services/generateVideo.mts CHANGED
@@ -1,4 +1,5 @@
1
- import { client } from '@gradio/client'
 
2
 
3
  import { generateSeed } from "./generateSeed.mts"
4
 
 
1
+ import { client } from "@gradio/client"
2
+
3
 
4
  import { generateSeed } from "./generateSeed.mts"
5
 
src/services/generateVoice.mts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import puppeteer from "puppeteer"
2
+
3
+ import { downloadVideo } from "./downloadVideo.mts"
4
+
5
+ const instances: string[] = [
6
+ process.env.VS_VOICE_GENERATION_SPACE_API_URL
7
+ ]
8
+
9
+ // TODO we should use an inference endpoint instead
10
+ export async function generateVoice(prompt: string, voiceFileName: string) {
11
+ const instance = instances.shift()
12
+ instances.push(instance)
13
+
14
+ console.log("instance:", instance)
15
+
16
+ const browser = await puppeteer.launch({
17
+ headless: false,
18
+ protocolTimeout: 800000,
19
+ })
20
+
21
+ const page = await browser.newPage()
22
+
23
+ await page.goto(instance, {
24
+ waitUntil: "networkidle2",
25
+ })
26
+
27
+ await new Promise(r => setTimeout(r, 3000))
28
+
29
+ const firstTextarea = await page.$('textarea[data-testid="textbox"]')
30
+
31
+ await firstTextarea.type(prompt)
32
+
33
+ // console.log("looking for the button to submit")
34
+ const submitButton = await page.$("button.lg")
35
+
36
+ // console.log("clicking on the button")
37
+ await submitButton.click()
38
+
39
+ await page.waitForSelector("audio", {
40
+ timeout: 800000, // need to be large enough in case someone else attemps to use our space
41
+ })
42
+
43
+ const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
44
+
45
+
46
+ console.log({
47
+ voiceRemoteUrl,
48
+ })
49
+
50
+
51
+ console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
52
+
53
+ await downloadVideo(voiceRemoteUrl, voiceFileName)
54
+
55
+ return voiceFileName
56
+ }
src/services/interpolateVideo.mts CHANGED
@@ -1,40 +1,53 @@
1
- import { promises as fs } from "node:fs"
2
  import path from "node:path"
3
- import { Blob } from "buffer"
4
- // import { blobFrom } from "fetch-blob"
5
 
6
- import { client } from "@gradio/client"
7
  import tmpDir from "temp-dir"
8
-
9
- import { downloadVideo } from './downloadVideo.mts'
10
 
11
  const instances: string[] = [
12
  process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
13
  ]
14
 
15
- export const interpolateVideo = async (fileName: string) => {
16
 
 
 
17
  const inputFilePath = path.join(tmpDir, fileName)
18
 
 
 
 
 
19
  const instance = instances.shift()
20
  instances.push(instance)
21
 
22
- const api = await client(instance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- const video = await fs.readFile(inputFilePath)
25
 
26
- const blob = new Blob([video], { type: 'video/mp4' })
27
- // const blob = blobFrom(filePath)
28
- const result = await api.predict(1, [
29
- blob, // blob in 'parameter_5' Video component
30
- 1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
31
- 24, // string in 'FPS output' Radio component
32
- ])
33
 
34
- const data = (result as any).data[0]
35
- console.log('raw data:', data)
36
- const { orig_name, data: remoteFilePath } = data
37
- const remoteUrl = `${instance}/file=${remoteFilePath}`
38
- console.log("remoteUrl:", remoteUrl)
39
- await downloadVideo(remoteUrl, fileName)
40
  }
 
 
1
  import path from "node:path"
 
 
2
 
3
+ import puppeteer from "puppeteer"
4
  import tmpDir from "temp-dir"
5
+ import { downloadVideo } from "./downloadVideo.mts"
 
6
 
7
  const instances: string[] = [
8
  process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
9
  ]
10
 
 
11
 
12
+ // TODO we should use an inference endpoint instead
13
+ export async function interpolateVideo(fileName: string, steps: number, fps: number) {
14
  const inputFilePath = path.join(tmpDir, fileName)
15
 
16
+ console.log(`interpolating ${fileName}`)
17
+ console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
18
+ console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
19
+
20
  const instance = instances.shift()
21
  instances.push(instance)
22
 
23
+ const browser = await puppeteer.launch({
24
+ headless: true,
25
+ protocolTimeout: 400000,
26
+ })
27
+
28
+ const page = await browser.newPage()
29
+ await page.goto(instance, { waitUntil: 'networkidle2' })
30
+
31
+ await new Promise(r => setTimeout(r, 3000))
32
+
33
+ const fileField = await page.$('input[type=file]')
34
+
35
+ // console.log(`uploading file..`)
36
+ await fileField.uploadFile(inputFilePath)
37
+
38
+ // console.log('looking for the button to submit')
39
+ const submitButton = await page.$('button.lg')
40
+
41
+ // console.log('clicking on the button')
42
+ await submitButton.click()
43
+
44
+ await page.waitForSelector('a[download="interpolated_result.mp4"]', {
45
+ timeout: 400000, // need to be large enough in case someone else attemps to use our space
46
+ })
47
 
48
+ const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
49
 
50
+ await downloadVideo(interpolatedFileUrl, fileName)
 
 
 
 
 
 
51
 
52
+ return fileName
 
 
 
 
 
53
  }
src/services/interpolateVideoLegacy.mts ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { promises as fs } from "node:fs"
2
+ import path from "node:path"
3
+ import { Blob } from "buffer"
4
+
5
+ import { client } from "@gradio/client"
6
+ import tmpDir from "temp-dir"
7
+
8
+ import { downloadVideo } from './downloadVideo.mts'
9
+
10
+ const instances: string[] = [
11
+ process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
12
+ ]
13
+
14
+ export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
15
+
16
+ const inputFilePath = path.join(tmpDir, fileName)
17
+
18
+ const instance = instances.shift()
19
+ instances.push(instance)
20
+
21
+ const api = await client(instance)
22
+
23
+ const video = await fs.readFile(inputFilePath)
24
+
25
+ const blob = new Blob([video], { type: 'video/mp4' })
26
+ // const blob = blobFrom(filePath)
27
+ const result = await api.predict(1, [
28
+ blob, // blob in 'parameter_5' Video component
29
+ steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
30
+ fps, // string (FALSE! it's a number) in 'FPS output' Radio component
31
+ ])
32
+
33
+ const data = (result as any).data[0]
34
+ console.log('raw data:', data)
35
+ const { orig_name, data: remoteFilePath } = data
36
+ const remoteUrl = `${instance}/file=${remoteFilePath}`
37
+ console.log("remoteUrl:", remoteUrl)
38
+ await downloadVideo(remoteUrl, fileName)
39
+ }
src/services/mergeAudio.mts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import tmpDir from "temp-dir"
4
+ import { v4 as uuidv4 } from "uuid"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+
7
+ export const mergeAudio = async ({
8
+ input1FileName,
9
+ input1Volume,
10
+ input2FileName,
11
+ input2Volume,
12
+ outputFileName = ''
13
+ }: {
14
+ input1FileName: string,
15
+ input1Volume: number,
16
+ input2FileName: string,
17
+ input2Volume: number,
18
+ outputFileName?: string
19
+ }): Promise<string> => {
20
+ outputFileName = `${uuidv4()}.m4a`
21
+
22
+ const input1FilePath = path.resolve(tmpDir, input1FileName)
23
+ const input2FilePath = path.resolve(tmpDir, input2FileName)
24
+ const outputFilePath = path.resolve(tmpDir, outputFileName)
25
+
26
+ const input1Ffmpeg = ffmpeg(input1FilePath)
27
+ .outputOptions("-map 0:a:0")
28
+ .audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
29
+
30
+ const input2Ffmpeg = ffmpeg(input2FilePath)
31
+ .outputOptions("-map 1:a:0")
32
+ .audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
33
+
34
+ await new Promise((resolve, reject) => {
35
+ ffmpeg()
36
+ .input(input1Ffmpeg)
37
+ .input(input2Ffmpeg)
38
+ .outputOptions("-c:a aac") // use audio codec
39
+ .outputOptions("-shortest") // finish encoding when shortest input stream ends
40
+ .output(outputFilePath)
41
+ .on("end", resolve)
42
+ .on("error", reject)
43
+ .run()
44
+ })
45
+
46
+ console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
47
+
48
+ return outputFileName
49
+ }
src/services/postInterpolation.mts ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+ import fs from "node:fs"
3
+
4
+ import { v4 as uuidv4 } from "uuid"
5
+ import tmpDir from "temp-dir"
6
+ import ffmpeg from "fluent-ffmpeg"
7
+
8
+ export const postInterpolation = async (fileName: string, duration: number, nbFrames: number): Promise<string> => {
9
+ return new Promise((resolve,reject) => {
10
+
11
+ const tmpFileName = `${uuidv4()}.mp4`
12
+
13
+ const filePath = path.join(tmpDir, fileName)
14
+ const tmpFilePath = path.join(tmpDir, tmpFileName)
15
+
16
+
17
+ ffmpeg.ffprobe(filePath, function(err, metadata) {
18
+ if (err) { reject(err); return; }
19
+
20
+
21
+ const currentVideoDuration = metadata.format.duration
22
+
23
+ // compute a ratio ex. 0.3 = 30% of the total length
24
+ const durationRatio = currentVideoDuration / duration
25
+
26
+ ffmpeg(filePath)
27
+
28
+ // convert to HD
29
+ .size("1280x720")
30
+
31
+ .videoFilters([
32
+ `setpts=${durationRatio}*PTS`, // we make the video faster
33
+ //'scale=-1:576:lanczos',
34
+ // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
35
+ 'noise=c0s=10:c0f=t+u' // add a movie grain noise
36
+ ])
37
+ .outputOptions([
38
+ `-r ${nbFrames}`,
39
+ ])
40
+
41
+ .save(tmpFilePath)
42
+ .on("end", async () => {
43
+ await fs.promises.copyFile(tmpFilePath, filePath)
44
+ try {
45
+ await fs.promises.unlink(tmpFilePath)
46
+ } catch (err) {
47
+ console.log("failed to cleanup (no big deal..)")
48
+ }
49
+
50
+ resolve(fileName)
51
+ })
52
+ .on("error", (err) => {
53
+ reject(err)
54
+ })
55
+ })
56
+ })
57
+ }
src/test2.mts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import { generateAudio } from "./services/generateAudio.mts"
2
+
3
+
4
+ console.log('generating background audio..')
5
+ const audioFileName = await generateAudio("sounds of a castle bell ringing alarm", "test_juju_audio.mp3")
6
+
7
+ console.log('result:', audioFileName)
src/types.mts CHANGED
@@ -26,19 +26,40 @@ export interface Database {
26
  }
27
 
28
 
29
- export interface MakeShot {
30
  token: string
31
  shotPrompt: string
32
  // inputVideo?: string
33
 
34
- audioPrompt?: string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  seed?: number
36
  upscale?: boolean
37
 
 
 
38
  duration?: number
39
  steps?: number
40
 
41
  fps?: number // 8, 12, 24, 30, 60
42
 
43
  resolution?: number // 256, 512, 576, 720, 1080
 
 
 
 
 
44
  }
 
26
  }
27
 
28
 
29
+ export interface ShotQuery {
30
  token: string
31
  shotPrompt: string
32
  // inputVideo?: string
33
 
34
+ // describe the background audio (crowd, birds, wind, sea etc..)
35
+ backgroundAudioPrompt?: string
36
+
37
+ // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
38
+ foregroundAudioPrompt?: string
39
+
40
+ // describe the main actor visible in the shot (optional)
41
+ actorPrompt?: string
42
+
43
+ // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
44
+ actorVoicePrompt?: string
45
+
46
+ // describe the main actor dialogue line
47
+ actorDialoguePrompt?: string
48
+
49
  seed?: number
50
  upscale?: boolean
51
 
52
+ noise?: boolean // add movie noise
53
+
54
  duration?: number
55
  steps?: number
56
 
57
  fps?: number // 8, 12, 24, 30, 60
58
 
59
  resolution?: number // 256, 512, 576, 720, 1080
60
+ }
61
+
62
+ export interface Job {
63
+ startedAt: string
64
+ query: ShotQuery
65
  }