Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
a65e95e
1
Parent(s):
29598d1
work in progress to create the video service
Browse files- Dockerfile +1 -1
- package-lock.json +9 -0
- package.json +2 -0
- src/data/all_words.json +0 -0
- src/data/good_words.json +0 -0
- src/index.mts +47 -61
- src/services/addAudioToVideo.mts +31 -15
- src/services/generateActor.mts +50 -0
- src/services/generateAudio.mts +45 -22
- src/services/generateAudioLegacy.mts +33 -0
- src/services/generateShot.mts +209 -0
- src/services/generateVideo.mts +2 -1
- src/services/generateVoice.mts +56 -0
- src/services/interpolateVideo.mts +35 -22
- src/services/interpolateVideoLegacy.mts +39 -0
- src/services/mergeAudio.mts +49 -0
- src/services/postInterpolation.mts +57 -0
- src/test2.mts +7 -0
- src/types.mts +23 -2
Dockerfile
CHANGED
@@ -30,6 +30,6 @@ RUN npm install
|
|
30 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
31 |
COPY --chown=user . $HOME/app
|
32 |
|
33 |
-
EXPOSE 7860
|
34 |
|
35 |
CMD [ "npm", "run", "start" ]
|
|
|
30 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
31 |
COPY --chown=user . $HOME/app
|
32 |
|
33 |
+
EXPOSE 7860
|
34 |
|
35 |
CMD [ "npm", "run", "start" ]
|
package-lock.json
CHANGED
@@ -10,6 +10,7 @@
|
|
10 |
"license": "Apache License",
|
11 |
"dependencies": {
|
12 |
"@gradio/client": "^0.1.4",
|
|
|
13 |
"@types/express": "^4.17.17",
|
14 |
"@types/uuid": "^9.0.2",
|
15 |
"express": "^4.18.2",
|
@@ -78,6 +79,14 @@
|
|
78 |
"node": ">=18.0.0"
|
79 |
}
|
80 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
"node_modules/@jridgewell/resolve-uri": {
|
82 |
"version": "3.1.1",
|
83 |
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
|
|
|
10 |
"license": "Apache License",
|
11 |
"dependencies": {
|
12 |
"@gradio/client": "^0.1.4",
|
13 |
+
"@huggingface/inference": "^2.6.1",
|
14 |
"@types/express": "^4.17.17",
|
15 |
"@types/uuid": "^9.0.2",
|
16 |
"express": "^4.18.2",
|
|
|
79 |
"node": ">=18.0.0"
|
80 |
}
|
81 |
},
|
82 |
+
"node_modules/@huggingface/inference": {
|
83 |
+
"version": "2.6.1",
|
84 |
+
"resolved": "https://registry.npmjs.org/@huggingface/inference/-/inference-2.6.1.tgz",
|
85 |
+
"integrity": "sha512-qFYchgOCPeEkZJKiSr7Kz62QwukJtgkeQCT7Q0SSKUcvHpTQVNJp6i/JrJMR4dBdzQysJ1SZDC0pLBBnnskTag==",
|
86 |
+
"engines": {
|
87 |
+
"node": ">=18"
|
88 |
+
}
|
89 |
+
},
|
90 |
"node_modules/@jridgewell/resolve-uri": {
|
91 |
"version": "3.1.1",
|
92 |
"resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz",
|
package.json
CHANGED
@@ -6,6 +6,7 @@
|
|
6 |
"scripts": {
|
7 |
"start": "node --loader ts-node/esm src/index.mts",
|
8 |
"test": "node --loader ts-node/esm src/test.mts",
|
|
|
9 |
"docker": "npm run docker:build && npm run docker:run",
|
10 |
"docker:build": "docker build -t ai-webtv .",
|
11 |
"docker:run": "docker run -it -p 7860:7860 video-service"
|
@@ -14,6 +15,7 @@
|
|
14 |
"license": "Apache License",
|
15 |
"dependencies": {
|
16 |
"@gradio/client": "^0.1.4",
|
|
|
17 |
"@types/express": "^4.17.17",
|
18 |
"@types/uuid": "^9.0.2",
|
19 |
"express": "^4.18.2",
|
|
|
6 |
"scripts": {
|
7 |
"start": "node --loader ts-node/esm src/index.mts",
|
8 |
"test": "node --loader ts-node/esm src/test.mts",
|
9 |
+
"test2": "node --loader ts-node/esm src/test2.mts",
|
10 |
"docker": "npm run docker:build && npm run docker:run",
|
11 |
"docker:build": "docker build -t ai-webtv .",
|
12 |
"docker:run": "docker run -it -p 7860:7860 video-service"
|
|
|
15 |
"license": "Apache License",
|
16 |
"dependencies": {
|
17 |
"@gradio/client": "^0.1.4",
|
18 |
+
"@huggingface/inference": "^2.6.1",
|
19 |
"@types/express": "^4.17.17",
|
20 |
"@types/uuid": "^9.0.2",
|
21 |
"express": "^4.18.2",
|
src/data/all_words.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/data/good_words.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/index.mts
CHANGED
@@ -1,63 +1,62 @@
|
|
1 |
-
import { promises as fs } from
|
2 |
-
import path from 'node:path'
|
3 |
|
4 |
-
import
|
5 |
-
import express from 'express'
|
6 |
|
7 |
-
import {
|
8 |
-
import {
|
9 |
-
import {
|
10 |
-
import { generateSeed } from './services/generateSeed.mts'
|
11 |
-
import { addAudioToVideo } from './services/addAudioToVideo.mts'
|
12 |
-
|
13 |
-
import { MakeShot } from './types.mts'
|
14 |
|
15 |
const app = express()
|
16 |
const port = 7860
|
17 |
|
18 |
app.use(express.json())
|
19 |
|
|
|
20 |
|
21 |
-
app.post(
|
22 |
-
const query = req.body as
|
23 |
|
24 |
-
|
25 |
-
const token = `${query.token || ''}`
|
26 |
if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
|
27 |
console.log("couldn't find access token in the query")
|
28 |
-
res.write(JSON.stringify({ error: true, message:
|
29 |
res.end()
|
30 |
return
|
31 |
}
|
32 |
|
33 |
-
const shotPrompt = `${query.shotPrompt ||
|
34 |
if (shotPrompt.length < 5) {
|
35 |
-
res.write(JSON.stringify({ error: true, message:
|
36 |
res.end()
|
37 |
return
|
38 |
}
|
39 |
|
40 |
// optional video URL
|
41 |
-
// const inputVideo = `${req.query.inputVideo ||
|
|
|
|
|
|
|
42 |
|
43 |
-
// optional audio prompt
|
44 |
-
const
|
45 |
|
46 |
// optional seed
|
47 |
const defaultSeed = generateSeed()
|
48 |
const seedStr = Number(`${query.seed || defaultSeed}`)
|
49 |
const maybeSeed = Number(seedStr)
|
50 |
const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
|
51 |
-
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
// should we upscale or not?
|
54 |
-
const upscale = `${query.upscale || 'true'}` === 'true'
|
55 |
|
56 |
-
// duration of the prompt, in seconds
|
57 |
const defaultDuration = 3
|
|
|
58 |
const durationStr = Number(`${query.duration || defaultDuration}`)
|
59 |
const maybeDuration = Number(durationStr)
|
60 |
-
const duration = Math.min(
|
61 |
|
62 |
const defaultSteps = 35
|
63 |
const stepsStr = Number(`${query.steps || defaultSteps}`)
|
@@ -68,58 +67,45 @@ app.post('/shot', async (req, res) => {
|
|
68 |
const defaultFps = 24
|
69 |
const fpsStr = Number(`${query.fps || defaultFps}`)
|
70 |
const maybeFps = Number(fpsStr)
|
71 |
-
const
|
72 |
|
73 |
const defaultResolution = 576
|
74 |
const resolutionStr = Number(`${query.resolution || defaultResolution}`)
|
75 |
const maybeResolution = Number(resolutionStr)
|
76 |
const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
|
77 |
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
-
const shotFileName = `${Date.now()}.mp4`
|
80 |
|
81 |
-
|
|
|
|
|
82 |
shotPrompt,
|
83 |
-
|
84 |
-
|
|
|
|
|
85 |
duration,
|
|
|
|
|
86 |
nbSteps,
|
87 |
-
fps,
|
88 |
-
seed,
|
89 |
upscale,
|
90 |
-
|
|
|
91 |
})
|
92 |
-
console.log('generating base video ..')
|
93 |
-
const generatedVideoUrl = await generateVideo(shotPrompt, {
|
94 |
-
seed,
|
95 |
-
nbFrames: 24, // if we try more eg 48 frames, this will crash the upscaler (not enough memory)
|
96 |
-
nbSteps
|
97 |
-
})
|
98 |
-
|
99 |
|
100 |
-
console.log(
|
101 |
-
const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
|
102 |
|
103 |
-
|
104 |
-
console.log('upscaling video..')
|
105 |
-
await upscaleVideo(videoFileName, shotPrompt)
|
106 |
-
}
|
107 |
-
|
108 |
-
// TODO call AudioLDM
|
109 |
-
if (audioPrompt) {
|
110 |
-
// const audioFileName = await callAudioLDM(audioPrompt)
|
111 |
-
console.log('calling audio prompt')
|
112 |
-
|
113 |
-
// await addAudioToVideo(videoFileName, audioFileName)
|
114 |
-
}
|
115 |
-
|
116 |
-
console.log('returning result to user..')
|
117 |
-
|
118 |
-
const filePath = path.resolve(tmpDir, videoFileName)
|
119 |
|
120 |
const buffer = await fs.readFile(filePath)
|
121 |
-
|
122 |
-
res.setHeader(
|
|
|
123 |
res.end(buffer)
|
124 |
})
|
125 |
|
|
|
1 |
+
import { promises as fs } from "fs"
|
|
|
2 |
|
3 |
+
import express from "express"
|
|
|
4 |
|
5 |
+
import { generateSeed } from "./services/generateSeed.mts"
|
6 |
+
import { Job, ShotQuery } from "./types.mts"
|
7 |
+
import { generateShot } from "./services/generateShot.mts"
|
|
|
|
|
|
|
|
|
8 |
|
9 |
const app = express()
|
10 |
const port = 7860
|
11 |
|
12 |
app.use(express.json())
|
13 |
|
14 |
+
const queue: Job[] = []
|
15 |
|
16 |
+
app.post("/shot", async (req, res) => {
|
17 |
+
const query = req.body as ShotQuery
|
18 |
|
19 |
+
const token = `${query.token || ""}`
|
|
|
20 |
if (token !== process.env.VS_SECRET_ACCESS_TOKEN) {
|
21 |
console.log("couldn't find access token in the query")
|
22 |
+
res.write(JSON.stringify({ error: true, message: "access denied" }))
|
23 |
res.end()
|
24 |
return
|
25 |
}
|
26 |
|
27 |
+
const shotPrompt = `${query.shotPrompt || ""}`
|
28 |
if (shotPrompt.length < 5) {
|
29 |
+
res.write(JSON.stringify({ error: true, message: "prompt too short (must be at least 5 in length)" }))
|
30 |
res.end()
|
31 |
return
|
32 |
}
|
33 |
|
34 |
// optional video URL
|
35 |
+
// const inputVideo = `${req.query.inputVideo || ""}`
|
36 |
+
|
37 |
+
// optional background audio prompt
|
38 |
+
const backgroundAudioPrompt = `${query.backgroundAudioPrompt || ""}`
|
39 |
|
40 |
+
// optional foreground audio prompt
|
41 |
+
const foregroundAudioPrompt = `${query.foregroundAudioPrompt || ""}`
|
42 |
|
43 |
// optional seed
|
44 |
const defaultSeed = generateSeed()
|
45 |
const seedStr = Number(`${query.seed || defaultSeed}`)
|
46 |
const maybeSeed = Number(seedStr)
|
47 |
const seed = isNaN(maybeSeed) || ! isFinite(maybeSeed) ? defaultSeed : maybeSeed
|
48 |
+
|
49 |
+
// in production we want those ON by default
|
50 |
+
const upscale = `${query.upscale || "true"}` === "true"
|
51 |
+
const interpolate = `${query.upscale || "true"}` === "true"
|
52 |
+
const noise = `${query.noise || "true"}` === "true"
|
53 |
|
|
|
|
|
54 |
|
|
|
55 |
const defaultDuration = 3
|
56 |
+
const maxDuration = 5
|
57 |
const durationStr = Number(`${query.duration || defaultDuration}`)
|
58 |
const maybeDuration = Number(durationStr)
|
59 |
+
const duration = Math.min(maxDuration, Math.max(1, isNaN(maybeDuration) || !isFinite(maybeDuration) ? defaultDuration : maybeDuration))
|
60 |
|
61 |
const defaultSteps = 35
|
62 |
const stepsStr = Number(`${query.steps || defaultSteps}`)
|
|
|
67 |
const defaultFps = 24
|
68 |
const fpsStr = Number(`${query.fps || defaultFps}`)
|
69 |
const maybeFps = Number(fpsStr)
|
70 |
+
const nbFrames = Math.min(60, Math.max(8, isNaN(maybeFps) || !isFinite(maybeFps) ? defaultFps : maybeFps))
|
71 |
|
72 |
const defaultResolution = 576
|
73 |
const resolutionStr = Number(`${query.resolution || defaultResolution}`)
|
74 |
const maybeResolution = Number(resolutionStr)
|
75 |
const resolution = Math.min(1080, Math.max(256, isNaN(maybeResolution) || !isFinite(maybeResolution) ? defaultResolution : maybeResolution))
|
76 |
|
77 |
+
const actorPrompt = `${query.actorPrompt || ""}`
|
78 |
+
|
79 |
+
const actorVoicePrompt = `${query.actorVoicePrompt || ""}`
|
80 |
+
|
81 |
+
const actorDialoguePrompt = `${query.actorDialoguePrompt || ""}`
|
82 |
|
|
|
83 |
|
84 |
+
const { filePath } = await generateShot({
|
85 |
+
seed,
|
86 |
+
actorPrompt,
|
87 |
shotPrompt,
|
88 |
+
backgroundAudioPrompt,
|
89 |
+
foregroundAudioPrompt,
|
90 |
+
actorDialoguePrompt,
|
91 |
+
actorVoicePrompt,
|
92 |
duration,
|
93 |
+
nbFrames,
|
94 |
+
resolution,
|
95 |
nbSteps,
|
|
|
|
|
96 |
upscale,
|
97 |
+
interpolate,
|
98 |
+
noise,
|
99 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
console.log(`generated video in ${filePath}`)
|
|
|
102 |
|
103 |
+
console.log("returning result to user..")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
const buffer = await fs.readFile(filePath)
|
106 |
+
|
107 |
+
res.setHeader("Content-Type", "media/mp4")
|
108 |
+
res.setHeader("Content-Length", buffer.length)
|
109 |
res.end(buffer)
|
110 |
})
|
111 |
|
src/services/addAudioToVideo.mts
CHANGED
@@ -1,29 +1,45 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
|
4 |
-
import tmpDir from
|
5 |
-
import
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
|
|
|
|
|
|
|
|
11 |
await new Promise((resolve, reject) => {
|
12 |
ffmpeg(videoFilePath)
|
13 |
.input(audioFilePath)
|
14 |
-
.
|
15 |
-
.outputOptions(
|
16 |
-
.outputOptions(
|
17 |
-
.outputOptions(
|
18 |
-
.outputOptions(
|
|
|
19 |
.output(tempOutputFilePath)
|
20 |
-
.on(
|
21 |
-
.on(
|
22 |
.run()
|
23 |
})
|
24 |
|
25 |
// Now we want to replace the original video file with the new file that has been created
|
26 |
await fs.rename(tempOutputFilePath, videoFilePath)
|
27 |
|
28 |
-
return
|
29 |
};
|
|
|
1 |
+
import { promises as fs } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
import { v4 as uuidv4 } from "uuid"
|
6 |
|
7 |
+
import ffmpeg from "fluent-ffmpeg"
|
8 |
+
|
9 |
+
export const addAudioToVideo = async (
|
10 |
+
videoFileName: string,
|
11 |
+
audioFileName: string,
|
12 |
+
|
13 |
+
/*
|
14 |
+
* 0.0: mute the audio completely
|
15 |
+
* 0.5: set the audio to 50% of original volume (half volume)
|
16 |
+
* 1.0: maintain the audio at original volume (100% of original volume)
|
17 |
+
* 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
|
18 |
+
*/
|
19 |
+
volume: number = 1.0
|
20 |
+
): Promise<string> => {
|
21 |
|
22 |
+
const tempOutputFilePath = `${uuidv4()}.mp4`
|
23 |
+
const videoFilePath = path.resolve(tmpDir, videoFileName)
|
24 |
+
const audioFilePath = path.resolve(tmpDir, audioFileName)
|
25 |
+
|
26 |
await new Promise((resolve, reject) => {
|
27 |
ffmpeg(videoFilePath)
|
28 |
.input(audioFilePath)
|
29 |
+
.audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
|
30 |
+
.outputOptions("-c:v copy") // use video copy codec
|
31 |
+
.outputOptions("-c:a aac") // use audio codec
|
32 |
+
.outputOptions("-map 0:v:0") // map video from 0th to 0th
|
33 |
+
.outputOptions("-map 1:a:0") // map audio from 1st to 0th
|
34 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
35 |
.output(tempOutputFilePath)
|
36 |
+
.on("end", resolve)
|
37 |
+
.on("error", reject)
|
38 |
.run()
|
39 |
})
|
40 |
|
41 |
// Now we want to replace the original video file with the new file that has been created
|
42 |
await fs.rename(tempOutputFilePath, videoFilePath)
|
43 |
|
44 |
+
return videoFileName
|
45 |
};
|
src/services/generateActor.mts
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { promises as fs } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
+
import tmpDir from "temp-dir"
|
4 |
+
|
5 |
+
import { HfInference } from "@huggingface/inference"
|
6 |
+
|
7 |
+
const hf = new HfInference(process.env.VS_HF_API_TOKEN)
|
8 |
+
|
9 |
+
export const generateActor = async (prompt: string, fileName: string, seed: number) => {
|
10 |
+
const positivePrompt = [
|
11 |
+
`profile photo of ${prompt || ""}`,
|
12 |
+
"id picture",
|
13 |
+
"photoshoot",
|
14 |
+
"portrait photography",
|
15 |
+
"neutral expression",
|
16 |
+
"neutral background",
|
17 |
+
"studio photo",
|
18 |
+
"award winning",
|
19 |
+
"high resolution",
|
20 |
+
"photo realistic",
|
21 |
+
"intricate details",
|
22 |
+
"beautiful",
|
23 |
+
]
|
24 |
+
const negativePrompt = [
|
25 |
+
"anime",
|
26 |
+
"drawing",
|
27 |
+
"painting",
|
28 |
+
"lowres",
|
29 |
+
"blurry",
|
30 |
+
"artificial"
|
31 |
+
]
|
32 |
+
|
33 |
+
console.log(`generating actor: ${positivePrompt.join(", ")}`)
|
34 |
+
|
35 |
+
const blob = await hf.textToImage({
|
36 |
+
inputs: positivePrompt.join(", "),
|
37 |
+
model: "stabilityai/stable-diffusion-2-1",
|
38 |
+
parameters: {
|
39 |
+
negative_prompt: negativePrompt.join(", "),
|
40 |
+
// seed, no seed?
|
41 |
+
}
|
42 |
+
})
|
43 |
+
|
44 |
+
const filePath = path.resolve(tmpDir, fileName)
|
45 |
+
|
46 |
+
const buffer = Buffer.from(await blob.arrayBuffer())
|
47 |
+
await fs.writeFile(filePath, buffer, "utf8")
|
48 |
+
|
49 |
+
return filePath
|
50 |
+
}
|
src/services/generateAudio.mts
CHANGED
@@ -1,33 +1,56 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
import { generateSeed } from "./generateSeed.mts"
|
4 |
|
5 |
const instances: string[] = [
|
6 |
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
7 |
]
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
nbFrames: number;
|
12 |
-
nbSteps: number;
|
13 |
-
}) => {
|
14 |
-
const seed = options?.seed || generateSeed()
|
15 |
-
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
16 |
-
const nbSteps = options?.nbSteps || 35
|
17 |
-
|
18 |
const instance = instances.shift()
|
19 |
instances.push(instance)
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
const rawResponse = await api.predict('/run', [
|
24 |
-
prompt, // string in 'Prompt' Textbox component
|
25 |
-
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
26 |
-
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
27 |
-
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
28 |
-
]) as any
|
29 |
|
30 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
return
|
33 |
}
|
|
|
1 |
+
import puppeteer from "puppeteer"
|
2 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
|
|
3 |
|
4 |
const instances: string[] = [
|
5 |
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
6 |
]
|
7 |
|
8 |
+
// TODO we should use an inference endpoint instead
|
9 |
+
export async function generateAudio(prompt: string, audioFileName: string) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
const instance = instances.shift()
|
11 |
instances.push(instance)
|
12 |
|
13 |
+
console.log("instance:", instance)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
+
const browser = await puppeteer.launch({
|
16 |
+
headless: false,
|
17 |
+
protocolTimeout: 800000,
|
18 |
+
})
|
19 |
+
|
20 |
+
const page = await browser.newPage()
|
21 |
+
|
22 |
+
await page.goto(instance, {
|
23 |
+
waitUntil: "networkidle2",
|
24 |
+
})
|
25 |
+
|
26 |
+
await new Promise(r => setTimeout(r, 3000))
|
27 |
+
|
28 |
+
const firstTextboxInput = await page.$('input[data-testid="textbox"]')
|
29 |
+
|
30 |
+
await firstTextboxInput.type(prompt)
|
31 |
+
|
32 |
+
// console.log("looking for the button to submit")
|
33 |
+
const submitButton = await page.$("button.lg")
|
34 |
+
|
35 |
+
// console.log("clicking on the button")
|
36 |
+
await submitButton.click()
|
37 |
+
|
38 |
+
await page.waitForSelector("a[download]", {
|
39 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
40 |
+
})
|
41 |
+
|
42 |
+
const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
|
43 |
+
|
44 |
+
|
45 |
+
console.log({
|
46 |
+
audioRemoteUrl,
|
47 |
+
})
|
48 |
+
|
49 |
+
|
50 |
+
// console.log("downloading file from space..")
|
51 |
+
console.log(`- downloading ${audioFileName} from ${audioRemoteUrl}`)
|
52 |
+
|
53 |
+
await downloadVideo(audioRemoteUrl, audioFileName)
|
54 |
|
55 |
+
return audioFileName
|
56 |
}
|
src/services/generateAudioLegacy.mts
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { client } from '@gradio/client'
|
2 |
+
|
3 |
+
import { generateSeed } from "./generateSeed.mts"
|
4 |
+
|
5 |
+
const instances: string[] = [
|
6 |
+
process.env.VS_AUDIO_GENERATION_SPACE_API_URL
|
7 |
+
]
|
8 |
+
|
9 |
+
export const generateAudio = async (prompt: string, options?: {
|
10 |
+
seed: number;
|
11 |
+
nbFrames: number;
|
12 |
+
nbSteps: number;
|
13 |
+
}) => {
|
14 |
+
const seed = options?.seed || generateSeed()
|
15 |
+
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
16 |
+
const nbSteps = options?.nbSteps || 35
|
17 |
+
|
18 |
+
const instance = instances.shift()
|
19 |
+
instances.push(instance)
|
20 |
+
|
21 |
+
const api = await client(instance)
|
22 |
+
|
23 |
+
const rawResponse = await api.predict('/run', [
|
24 |
+
prompt, // string in 'Prompt' Textbox component
|
25 |
+
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
26 |
+
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
27 |
+
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
28 |
+
]) as any
|
29 |
+
|
30 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
31 |
+
|
32 |
+
return `${instance}/file=${name}`
|
33 |
+
}
|
src/services/generateShot.mts
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
|
6 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
7 |
+
import { generateAudio } from "./generateAudio.mts"
|
8 |
+
import { generateVideo } from "./generateVideo.mts"
|
9 |
+
import { upscaleVideo } from "./upscaleVideo.mts"
|
10 |
+
import { generateVoice } from "./generateVoice.mts"
|
11 |
+
import { generateSeed } from "./generateSeed.mts"
|
12 |
+
import { mergeAudio } from "./mergeAudio.mts"
|
13 |
+
import { addAudioToVideo } from "./addAudioToVideo.mts"
|
14 |
+
import { interpolateVideo } from "./interpolateVideo.mts"
|
15 |
+
import { postInterpolation } from "./postInterpolation.mts"
|
16 |
+
|
17 |
+
|
18 |
+
export const generateShot = async ({
|
19 |
+
seed = 0,
|
20 |
+
shotId = "",
|
21 |
+
actorPrompt = "",
|
22 |
+
shotPrompt = "",
|
23 |
+
backgroundAudioPrompt = "",
|
24 |
+
foregroundAudioPrompt = "",
|
25 |
+
actorDialoguePrompt = "",
|
26 |
+
actorVoicePrompt = "",
|
27 |
+
duration = 2,
|
28 |
+
nbFrames = 24,
|
29 |
+
resolution = 576,
|
30 |
+
nbSteps = 35,
|
31 |
+
upscale = true,
|
32 |
+
interpolate = true,
|
33 |
+
noise = true,
|
34 |
+
}: {
|
35 |
+
seed?: number;
|
36 |
+
shotId?: string;
|
37 |
+
actorPrompt?: string;
|
38 |
+
shotPrompt?: string;
|
39 |
+
backgroundAudioPrompt?: string;
|
40 |
+
foregroundAudioPrompt?: string;
|
41 |
+
actorDialoguePrompt?: string;
|
42 |
+
actorVoicePrompt?: string;
|
43 |
+
duration?: number; // 2 seconds
|
44 |
+
nbFrames?: number; // 24 FPS
|
45 |
+
resolution?: number; // 256, 320, 512, 576, 720, 1080..
|
46 |
+
nbSteps?: number;
|
47 |
+
upscale?: boolean;
|
48 |
+
interpolate?: boolean;
|
49 |
+
noise?: boolean;
|
50 |
+
}) => {
|
51 |
+
seed = seed || generateSeed()
|
52 |
+
shotId = shotId || uuidv4()
|
53 |
+
|
54 |
+
const shotFileName = `${shotId}.mp4`
|
55 |
+
|
56 |
+
console.log("generating video shot:", {
|
57 |
+
seed,
|
58 |
+
shotId,
|
59 |
+
actorPrompt,
|
60 |
+
shotPrompt,
|
61 |
+
backgroundAudioPrompt,
|
62 |
+
foregroundAudioPrompt,
|
63 |
+
actorDialoguePrompt,
|
64 |
+
actorVoicePrompt,
|
65 |
+
duration,
|
66 |
+
nbFrames,
|
67 |
+
resolution,
|
68 |
+
nbSteps,
|
69 |
+
upscale,
|
70 |
+
interpolate,
|
71 |
+
noise,
|
72 |
+
})
|
73 |
+
|
74 |
+
|
75 |
+
if (actorPrompt) {
|
76 |
+
console.log("generating actor..")
|
77 |
+
const actorIdentityFileName = `actor_${Date.now()}.png`
|
78 |
+
// await generateActor(actorPrompt, actorIdentityFileName, seed)
|
79 |
+
}
|
80 |
+
|
81 |
+
console.log("generating base video ..")
|
82 |
+
let generatedVideoUrl = ""
|
83 |
+
|
84 |
+
// currenty the base model is incapable of generating more than 24 FPS,
|
85 |
+
// because otherwise the upscaler will have trouble
|
86 |
+
|
87 |
+
// so for now, we fix it to 24 frames
|
88 |
+
// const nbFramesForBaseModel = Math.min(3, Math.max(1, Math.round(duration))) * 8
|
89 |
+
const nbFramesForBaseModel = 24
|
90 |
+
|
91 |
+
try {
|
92 |
+
generatedVideoUrl = await generateVideo(shotPrompt, {
|
93 |
+
seed,
|
94 |
+
nbFrames: nbFramesForBaseModel,
|
95 |
+
nbSteps
|
96 |
+
})
|
97 |
+
} catch (err) {
|
98 |
+
// upscaling can be finicky, if it fails we try again
|
99 |
+
console.log('- trying again to generate base shot..')
|
100 |
+
generatedVideoUrl = await generateVideo(shotPrompt, {
|
101 |
+
seed,
|
102 |
+
nbFrames: nbFramesForBaseModel,
|
103 |
+
nbSteps
|
104 |
+
})
|
105 |
+
}
|
106 |
+
|
107 |
+
console.log("downloading video..")
|
108 |
+
|
109 |
+
const videoFileName = await downloadVideo(generatedVideoUrl, shotFileName)
|
110 |
+
|
111 |
+
if (upscale) {
|
112 |
+
console.log("upscaling video..")
|
113 |
+
try {
|
114 |
+
await upscaleVideo(videoFileName, shotPrompt)
|
115 |
+
} catch (err) {
|
116 |
+
// upscaling can be finicky, if it fails we try again
|
117 |
+
console.log('- trying again to upscale shot..')
|
118 |
+
await upscaleVideo(videoFileName, shotPrompt)
|
119 |
+
}
|
120 |
+
}
|
121 |
+
|
122 |
+
if (interpolate) {
|
123 |
+
console.log("upscaling video..")
|
124 |
+
// ATTENTION 1:
|
125 |
+
// the interpolation step always create a SLOW MOTION video
|
126 |
+
// it means it can last a lot longer (eg. 2x, 3x, 4x.. longer)
|
127 |
+
// than the duration generated by the original video model
|
128 |
+
|
129 |
+
// ATTENTION 2:
|
130 |
+
// the interpolation step generates videos in 910x512!
|
131 |
+
|
132 |
+
// ATTENTION 3:
|
133 |
+
// the interpolation step parameters are currently not passed to the space,
|
134 |
+
// so changing those two variables below will have no effect!
|
135 |
+
const interpolationSteps = 3
|
136 |
+
const interpolatedFramesPerSecond = 24
|
137 |
+
await interpolateVideo(
|
138 |
+
videoFileName,
|
139 |
+
interpolationSteps,
|
140 |
+
interpolatedFramesPerSecond
|
141 |
+
)
|
142 |
+
console.log('creating slow-mo video (910x512 @ 24 FPS)')
|
143 |
+
|
144 |
+
// with our current interpolation settings, the 3 seconds video generated by the model
|
145 |
+
// become a 7 seconds video, at 24 FPS
|
146 |
+
|
147 |
+
// so we want to scale it back to the desired duration length
|
148 |
+
// also, as a last trick we want to upscale it (without AI) and add some FXs
|
149 |
+
console.log('performing final scaling (1280x720 @ 24 FPS)')
|
150 |
+
await postInterpolation(videoFileName, duration, nbFrames)
|
151 |
+
}
|
152 |
+
|
153 |
+
let backgroundAudioFileName = ''
|
154 |
+
if (backgroundAudioPrompt) {
|
155 |
+
console.log("generating background audio..")
|
156 |
+
backgroundAudioFileName = await generateAudio(backgroundAudioPrompt, `shot_${shotId}_audio_${uuidv4}.m4a`)
|
157 |
+
}
|
158 |
+
|
159 |
+
let foregroundAudioFileName = ''
|
160 |
+
if (foregroundAudioPrompt) {
|
161 |
+
console.log("generating foreground audio..")
|
162 |
+
foregroundAudioFileName = await generateAudio(foregroundAudioPrompt, `shot_${shotId}_audio_${uuidv4()}.m4a`)
|
163 |
+
}
|
164 |
+
|
165 |
+
|
166 |
+
let voiceAudioFileName = ''
|
167 |
+
if (actorDialoguePrompt) {
|
168 |
+
console.log("configuring dialogue..")
|
169 |
+
if (actorVoicePrompt) {
|
170 |
+
console.log("configuring voice..")
|
171 |
+
// well.. that's a TODO!
|
172 |
+
// for now let's always use the same voice model
|
173 |
+
|
174 |
+
console.log('TODO this should be done in the sequence, not the prompt!')
|
175 |
+
voiceAudioFileName = await generateVoice(actorDialoguePrompt, `shot_${shotId}_voice_${uuidv4()}.m4a`)
|
176 |
+
}
|
177 |
+
}
|
178 |
+
|
179 |
+
console.log('merging audio with video..')
|
180 |
+
if (backgroundAudioFileName || foregroundAudioFileName) {
|
181 |
+
let audioFileName = ''
|
182 |
+
|
183 |
+
// we have both background and foreground
|
184 |
+
if (backgroundAudioFileName && foregroundAudioFileName) {
|
185 |
+
audioFileName = await mergeAudio({
|
186 |
+
input1FileName: backgroundAudioFileName,
|
187 |
+
input1Volume: 0.2,// 20% volume
|
188 |
+
input2FileName: foregroundAudioFileName,
|
189 |
+
input2Volume: 0.7, // 70% volume
|
190 |
+
})
|
191 |
+
} else if (backgroundAudioFileName) {
|
192 |
+
audioFileName = backgroundAudioFileName
|
193 |
+
} else if (foregroundAudioFileName) {
|
194 |
+
audioFileName = foregroundAudioFileName
|
195 |
+
}
|
196 |
+
|
197 |
+
await addAudioToVideo(videoFileName, audioFileName)
|
198 |
+
}
|
199 |
+
|
200 |
+
console.log("returning result to user..")
|
201 |
+
|
202 |
+
const filePath = path.resolve(tmpDir, videoFileName)
|
203 |
+
|
204 |
+
return {
|
205 |
+
shotId,
|
206 |
+
filePath,
|
207 |
+
videoFileName
|
208 |
+
}
|
209 |
+
}
|
src/services/generateVideo.mts
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
import { client } from
|
|
|
2 |
|
3 |
import { generateSeed } from "./generateSeed.mts"
|
4 |
|
|
|
1 |
+
import { client } from "@gradio/client"
|
2 |
+
|
3 |
|
4 |
import { generateSeed } from "./generateSeed.mts"
|
5 |
|
src/services/generateVoice.mts
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import puppeteer from "puppeteer"
|
2 |
+
|
3 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
4 |
+
|
5 |
+
const instances: string[] = [
|
6 |
+
process.env.VS_VOICE_GENERATION_SPACE_API_URL
|
7 |
+
]
|
8 |
+
|
9 |
+
// TODO we should use an inference endpoint instead
|
10 |
+
export async function generateVoice(prompt: string, voiceFileName: string) {
|
11 |
+
const instance = instances.shift()
|
12 |
+
instances.push(instance)
|
13 |
+
|
14 |
+
console.log("instance:", instance)
|
15 |
+
|
16 |
+
const browser = await puppeteer.launch({
|
17 |
+
headless: false,
|
18 |
+
protocolTimeout: 800000,
|
19 |
+
})
|
20 |
+
|
21 |
+
const page = await browser.newPage()
|
22 |
+
|
23 |
+
await page.goto(instance, {
|
24 |
+
waitUntil: "networkidle2",
|
25 |
+
})
|
26 |
+
|
27 |
+
await new Promise(r => setTimeout(r, 3000))
|
28 |
+
|
29 |
+
const firstTextarea = await page.$('textarea[data-testid="textbox"]')
|
30 |
+
|
31 |
+
await firstTextarea.type(prompt)
|
32 |
+
|
33 |
+
// console.log("looking for the button to submit")
|
34 |
+
const submitButton = await page.$("button.lg")
|
35 |
+
|
36 |
+
// console.log("clicking on the button")
|
37 |
+
await submitButton.click()
|
38 |
+
|
39 |
+
await page.waitForSelector("audio", {
|
40 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
41 |
+
})
|
42 |
+
|
43 |
+
const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
|
44 |
+
|
45 |
+
|
46 |
+
console.log({
|
47 |
+
voiceRemoteUrl,
|
48 |
+
})
|
49 |
+
|
50 |
+
|
51 |
+
console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
|
52 |
+
|
53 |
+
await downloadVideo(voiceRemoteUrl, voiceFileName)
|
54 |
+
|
55 |
+
return voiceFileName
|
56 |
+
}
|
src/services/interpolateVideo.mts
CHANGED
@@ -1,40 +1,53 @@
|
|
1 |
-
import { promises as fs } from "node:fs"
|
2 |
import path from "node:path"
|
3 |
-
import { Blob } from "buffer"
|
4 |
-
// import { blobFrom } from "fetch-blob"
|
5 |
|
6 |
-
import
|
7 |
import tmpDir from "temp-dir"
|
8 |
-
|
9 |
-
import { downloadVideo } from './downloadVideo.mts'
|
10 |
|
11 |
const instances: string[] = [
|
12 |
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
13 |
]
|
14 |
|
15 |
-
export const interpolateVideo = async (fileName: string) => {
|
16 |
|
|
|
|
|
17 |
const inputFilePath = path.join(tmpDir, fileName)
|
18 |
|
|
|
|
|
|
|
|
|
19 |
const instance = instances.shift()
|
20 |
instances.push(instance)
|
21 |
|
22 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
const
|
25 |
|
26 |
-
|
27 |
-
// const blob = blobFrom(filePath)
|
28 |
-
const result = await api.predict(1, [
|
29 |
-
blob, // blob in 'parameter_5' Video component
|
30 |
-
1, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
|
31 |
-
24, // string in 'FPS output' Radio component
|
32 |
-
])
|
33 |
|
34 |
-
|
35 |
-
console.log('raw data:', data)
|
36 |
-
const { orig_name, data: remoteFilePath } = data
|
37 |
-
const remoteUrl = `${instance}/file=${remoteFilePath}`
|
38 |
-
console.log("remoteUrl:", remoteUrl)
|
39 |
-
await downloadVideo(remoteUrl, fileName)
|
40 |
}
|
|
|
|
|
1 |
import path from "node:path"
|
|
|
|
|
2 |
|
3 |
+
import puppeteer from "puppeteer"
|
4 |
import tmpDir from "temp-dir"
|
5 |
+
import { downloadVideo } from "./downloadVideo.mts"
|
|
|
6 |
|
7 |
const instances: string[] = [
|
8 |
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
9 |
]
|
10 |
|
|
|
11 |
|
12 |
+
// TODO we should use an inference endpoint instead
|
13 |
+
export async function interpolateVideo(fileName: string, steps: number, fps: number) {
|
14 |
const inputFilePath = path.join(tmpDir, fileName)
|
15 |
|
16 |
+
console.log(`interpolating ${fileName}`)
|
17 |
+
console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
|
18 |
+
console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
|
19 |
+
|
20 |
const instance = instances.shift()
|
21 |
instances.push(instance)
|
22 |
|
23 |
+
const browser = await puppeteer.launch({
|
24 |
+
headless: true,
|
25 |
+
protocolTimeout: 400000,
|
26 |
+
})
|
27 |
+
|
28 |
+
const page = await browser.newPage()
|
29 |
+
await page.goto(instance, { waitUntil: 'networkidle2' })
|
30 |
+
|
31 |
+
await new Promise(r => setTimeout(r, 3000))
|
32 |
+
|
33 |
+
const fileField = await page.$('input[type=file]')
|
34 |
+
|
35 |
+
// console.log(`uploading file..`)
|
36 |
+
await fileField.uploadFile(inputFilePath)
|
37 |
+
|
38 |
+
// console.log('looking for the button to submit')
|
39 |
+
const submitButton = await page.$('button.lg')
|
40 |
+
|
41 |
+
// console.log('clicking on the button')
|
42 |
+
await submitButton.click()
|
43 |
+
|
44 |
+
await page.waitForSelector('a[download="interpolated_result.mp4"]', {
|
45 |
+
timeout: 400000, // need to be large enough in case someone else attemps to use our space
|
46 |
+
})
|
47 |
|
48 |
+
const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
|
49 |
|
50 |
+
await downloadVideo(interpolatedFileUrl, fileName)
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
return fileName
|
|
|
|
|
|
|
|
|
|
|
53 |
}
|
src/services/interpolateVideoLegacy.mts
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { promises as fs } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
+
import { Blob } from "buffer"
|
4 |
+
|
5 |
+
import { client } from "@gradio/client"
|
6 |
+
import tmpDir from "temp-dir"
|
7 |
+
|
8 |
+
import { downloadVideo } from './downloadVideo.mts'
|
9 |
+
|
10 |
+
const instances: string[] = [
|
11 |
+
process.env.VS_VIDEO_INTERPOLATION_SPACE_API_URL
|
12 |
+
]
|
13 |
+
|
14 |
+
export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
|
15 |
+
|
16 |
+
const inputFilePath = path.join(tmpDir, fileName)
|
17 |
+
|
18 |
+
const instance = instances.shift()
|
19 |
+
instances.push(instance)
|
20 |
+
|
21 |
+
const api = await client(instance)
|
22 |
+
|
23 |
+
const video = await fs.readFile(inputFilePath)
|
24 |
+
|
25 |
+
const blob = new Blob([video], { type: 'video/mp4' })
|
26 |
+
// const blob = blobFrom(filePath)
|
27 |
+
const result = await api.predict(1, [
|
28 |
+
blob, // blob in 'parameter_5' Video component
|
29 |
+
steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
|
30 |
+
fps, // string (FALSE! it's a number) in 'FPS output' Radio component
|
31 |
+
])
|
32 |
+
|
33 |
+
const data = (result as any).data[0]
|
34 |
+
console.log('raw data:', data)
|
35 |
+
const { orig_name, data: remoteFilePath } = data
|
36 |
+
const remoteUrl = `${instance}/file=${remoteFilePath}`
|
37 |
+
console.log("remoteUrl:", remoteUrl)
|
38 |
+
await downloadVideo(remoteUrl, fileName)
|
39 |
+
}
|
src/services/mergeAudio.mts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import tmpDir from "temp-dir"
|
4 |
+
import { v4 as uuidv4 } from "uuid"
|
5 |
+
import ffmpeg from "fluent-ffmpeg"
|
6 |
+
|
7 |
+
export const mergeAudio = async ({
|
8 |
+
input1FileName,
|
9 |
+
input1Volume,
|
10 |
+
input2FileName,
|
11 |
+
input2Volume,
|
12 |
+
outputFileName = ''
|
13 |
+
}: {
|
14 |
+
input1FileName: string,
|
15 |
+
input1Volume: number,
|
16 |
+
input2FileName: string,
|
17 |
+
input2Volume: number,
|
18 |
+
outputFileName?: string
|
19 |
+
}): Promise<string> => {
|
20 |
+
outputFileName = `${uuidv4()}.m4a`
|
21 |
+
|
22 |
+
const input1FilePath = path.resolve(tmpDir, input1FileName)
|
23 |
+
const input2FilePath = path.resolve(tmpDir, input2FileName)
|
24 |
+
const outputFilePath = path.resolve(tmpDir, outputFileName)
|
25 |
+
|
26 |
+
const input1Ffmpeg = ffmpeg(input1FilePath)
|
27 |
+
.outputOptions("-map 0:a:0")
|
28 |
+
.audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
|
29 |
+
|
30 |
+
const input2Ffmpeg = ffmpeg(input2FilePath)
|
31 |
+
.outputOptions("-map 1:a:0")
|
32 |
+
.audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
|
33 |
+
|
34 |
+
await new Promise((resolve, reject) => {
|
35 |
+
ffmpeg()
|
36 |
+
.input(input1Ffmpeg)
|
37 |
+
.input(input2Ffmpeg)
|
38 |
+
.outputOptions("-c:a aac") // use audio codec
|
39 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
40 |
+
.output(outputFilePath)
|
41 |
+
.on("end", resolve)
|
42 |
+
.on("error", reject)
|
43 |
+
.run()
|
44 |
+
})
|
45 |
+
|
46 |
+
console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
|
47 |
+
|
48 |
+
return outputFileName
|
49 |
+
}
|
src/services/postInterpolation.mts
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
import fs from "node:fs"
|
3 |
+
|
4 |
+
import { v4 as uuidv4 } from "uuid"
|
5 |
+
import tmpDir from "temp-dir"
|
6 |
+
import ffmpeg from "fluent-ffmpeg"
|
7 |
+
|
8 |
+
export const postInterpolation = async (fileName: string, duration: number, nbFrames: number): Promise<string> => {
|
9 |
+
return new Promise((resolve,reject) => {
|
10 |
+
|
11 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
12 |
+
|
13 |
+
const filePath = path.join(tmpDir, fileName)
|
14 |
+
const tmpFilePath = path.join(tmpDir, tmpFileName)
|
15 |
+
|
16 |
+
|
17 |
+
ffmpeg.ffprobe(filePath, function(err, metadata) {
|
18 |
+
if (err) { reject(err); return; }
|
19 |
+
|
20 |
+
|
21 |
+
const currentVideoDuration = metadata.format.duration
|
22 |
+
|
23 |
+
// compute a ratio ex. 0.3 = 30% of the total length
|
24 |
+
const durationRatio = currentVideoDuration / duration
|
25 |
+
|
26 |
+
ffmpeg(filePath)
|
27 |
+
|
28 |
+
// convert to HD
|
29 |
+
.size("1280x720")
|
30 |
+
|
31 |
+
.videoFilters([
|
32 |
+
`setpts=${durationRatio}*PTS`, // we make the video faster
|
33 |
+
//'scale=-1:576:lanczos',
|
34 |
+
// 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
|
35 |
+
'noise=c0s=10:c0f=t+u' // add a movie grain noise
|
36 |
+
])
|
37 |
+
.outputOptions([
|
38 |
+
`-r ${nbFrames}`,
|
39 |
+
])
|
40 |
+
|
41 |
+
.save(tmpFilePath)
|
42 |
+
.on("end", async () => {
|
43 |
+
await fs.promises.copyFile(tmpFilePath, filePath)
|
44 |
+
try {
|
45 |
+
await fs.promises.unlink(tmpFilePath)
|
46 |
+
} catch (err) {
|
47 |
+
console.log("failed to cleanup (no big deal..)")
|
48 |
+
}
|
49 |
+
|
50 |
+
resolve(fileName)
|
51 |
+
})
|
52 |
+
.on("error", (err) => {
|
53 |
+
reject(err)
|
54 |
+
})
|
55 |
+
})
|
56 |
+
})
|
57 |
+
}
|
src/test2.mts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { generateAudio } from "./services/generateAudio.mts"
|
2 |
+
|
3 |
+
|
4 |
+
console.log('generating background audio..')
|
5 |
+
const audioFileName = await generateAudio("sounds of a castle bell ringing alarm", "test_juju_audio.mp3")
|
6 |
+
|
7 |
+
console.log('result:', audioFileName)
|
src/types.mts
CHANGED
@@ -26,19 +26,40 @@ export interface Database {
|
|
26 |
}
|
27 |
|
28 |
|
29 |
-
export interface
|
30 |
token: string
|
31 |
shotPrompt: string
|
32 |
// inputVideo?: string
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
seed?: number
|
36 |
upscale?: boolean
|
37 |
|
|
|
|
|
38 |
duration?: number
|
39 |
steps?: number
|
40 |
|
41 |
fps?: number // 8, 12, 24, 30, 60
|
42 |
|
43 |
resolution?: number // 256, 512, 576, 720, 1080
|
|
|
|
|
|
|
|
|
|
|
44 |
}
|
|
|
26 |
}
|
27 |
|
28 |
|
29 |
+
export interface ShotQuery {
|
30 |
token: string
|
31 |
shotPrompt: string
|
32 |
// inputVideo?: string
|
33 |
|
34 |
+
// describe the background audio (crowd, birds, wind, sea etc..)
|
35 |
+
backgroundAudioPrompt?: string
|
36 |
+
|
37 |
+
// describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
|
38 |
+
foregroundAudioPrompt?: string
|
39 |
+
|
40 |
+
// describe the main actor visible in the shot (optional)
|
41 |
+
actorPrompt?: string
|
42 |
+
|
43 |
+
// describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
|
44 |
+
actorVoicePrompt?: string
|
45 |
+
|
46 |
+
// describe the main actor dialogue line
|
47 |
+
actorDialoguePrompt?: string
|
48 |
+
|
49 |
seed?: number
|
50 |
upscale?: boolean
|
51 |
|
52 |
+
noise?: boolean // add movie noise
|
53 |
+
|
54 |
duration?: number
|
55 |
steps?: number
|
56 |
|
57 |
fps?: number // 8, 12, 24, 30, 60
|
58 |
|
59 |
resolution?: number // 256, 512, 576, 720, 1080
|
60 |
+
}
|
61 |
+
|
62 |
+
export interface Job {
|
63 |
+
startedAt: string
|
64 |
+
query: ShotQuery
|
65 |
}
|