Spaces:
Running
Running
Commit
·
8919651
1
Parent(s):
5513dc6
eh, not bad for a side project
Browse files- package-lock.json +19 -16
- package.json +1 -1
- src/app/api/generators/image/generateImageWithVideochain.ts +4 -1
- src/app/api/generators/search/unknownObjectToLatentSearchResults.ts +4 -4
- src/app/api/parsers/parseCompletionMode.ts +19 -4
- src/app/api/parsers/parseEntityPrompts.ts +11 -0
- src/app/api/parsers/parseSupportedExportFormat.ts +16 -0
- src/app/api/parsers/{parseString.ts → parseTrimmedString.ts} +1 -1
- src/app/api/v1/auth/config.ts +5 -0
- src/app/api/{auth → v1/auth}/getToken.ts +5 -5
- src/app/api/v1/auth/parseToken.ts +7 -0
- src/app/api/v1/auth/throwIfInvalidToken.ts +22 -0
- src/app/api/v1/create/index.ts +15 -11
- src/app/api/v1/create/route.ts +5 -2
- src/app/api/v1/create/systemPrompt.ts +15 -11
- src/app/api/v1/create/types.ts +0 -6
- src/app/api/v1/edit/dialogues/processShot.ts +9 -4
- src/app/api/v1/edit/dialogues/route.ts +4 -4
- src/app/api/v1/edit/entities/clapToLatentStory.ts +50 -0
- src/app/api/v1/edit/entities/generateEntityPrompts.ts +135 -0
- src/app/api/v1/edit/entities/generateImageID.ts +0 -1
- src/app/api/v1/edit/entities/index.ts +97 -7
- src/app/api/v1/edit/entities/route.ts +7 -3
- src/app/api/v1/edit/entities/systemPrompt.ts +62 -1
- src/app/api/v1/edit/storyboards/processShot.ts +10 -3
- src/app/api/v1/edit/storyboards/route.ts +6 -7
- src/app/api/v1/edit/types.ts +0 -8
- src/app/api/v1/edit/videos/processShot.ts +11 -3
- src/app/api/v1/edit/videos/route.ts +8 -10
- src/app/api/v1/export/route.ts +4 -9
- src/app/api/v1/types.ts +15 -0
- src/app/latent/search/page.tsx +1 -1
- src/app/latent/watch/page.tsx +1 -1
- src/types/general.ts +2 -0
package-lock.json
CHANGED
@@ -9,7 +9,7 @@
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.10",
|
12 |
-
"@aitube/client": "0.0.
|
13 |
"@aitube/engine": "0.0.2",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
@@ -129,9 +129,12 @@
|
|
129 |
}
|
130 |
},
|
131 |
"node_modules/@aitube/client": {
|
132 |
-
"version": "0.0.
|
133 |
-
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.
|
134 |
-
"integrity": "sha512-
|
|
|
|
|
|
|
135 |
"peerDependencies": {
|
136 |
"@aitube/clap": "0.0.10"
|
137 |
}
|
@@ -922,9 +925,9 @@
|
|
922 |
}
|
923 |
},
|
924 |
"node_modules/@floating-ui/dom": {
|
925 |
-
"version": "1.6.
|
926 |
-
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.
|
927 |
-
"integrity": "sha512-
|
928 |
"dependencies": {
|
929 |
"@floating-ui/core": "^1.0.0",
|
930 |
"@floating-ui/utils": "^0.2.0"
|
@@ -2958,9 +2961,9 @@
|
|
2958 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
2959 |
},
|
2960 |
"node_modules/@types/lodash": {
|
2961 |
-
"version": "4.17.
|
2962 |
-
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.
|
2963 |
-
"integrity": "sha512-
|
2964 |
},
|
2965 |
"node_modules/@types/lodash.debounce": {
|
2966 |
"version": "4.0.9",
|
@@ -3740,9 +3743,9 @@
|
|
3740 |
}
|
3741 |
},
|
3742 |
"node_modules/caniuse-lite": {
|
3743 |
-
"version": "1.0.
|
3744 |
-
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.
|
3745 |
-
"integrity": "sha512-
|
3746 |
"funding": [
|
3747 |
{
|
3748 |
"type": "opencollective",
|
@@ -6354,9 +6357,9 @@
|
|
6354 |
}
|
6355 |
},
|
6356 |
"node_modules/minipass": {
|
6357 |
-
"version": "7.0
|
6358 |
-
"resolved": "https://registry.npmjs.org/minipass/-/minipass-7.0.
|
6359 |
-
"integrity": "sha512-
|
6360 |
"engines": {
|
6361 |
"node": ">=16 || 14 >=14.17"
|
6362 |
}
|
|
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
"@aitube/clap": "0.0.10",
|
12 |
+
"@aitube/client": "0.0.15",
|
13 |
"@aitube/engine": "0.0.2",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
|
|
129 |
}
|
130 |
},
|
131 |
"node_modules/@aitube/client": {
|
132 |
+
"version": "0.0.15",
|
133 |
+
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.15.tgz",
|
134 |
+
"integrity": "sha512-lGmdsBqjNVStBxZSH+Iig/nOyPdSpqpqU6M0OvOBMTwR4rohSvIQ7TnFJGvoc4WEFciNoCc6Vg6Q5W99ovG+fg==",
|
135 |
+
"dependencies": {
|
136 |
+
"query-string": "^9.0.0"
|
137 |
+
},
|
138 |
"peerDependencies": {
|
139 |
"@aitube/clap": "0.0.10"
|
140 |
}
|
|
|
925 |
}
|
926 |
},
|
927 |
"node_modules/@floating-ui/dom": {
|
928 |
+
"version": "1.6.5",
|
929 |
+
"resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.5.tgz",
|
930 |
+
"integrity": "sha512-Nsdud2X65Dz+1RHjAIP0t8z5e2ff/IRbei6BqFrl1urT8sDVzM1HMQ+R0XcU5ceRfyO3I6ayeqIfh+6Wb8LGTw==",
|
931 |
"dependencies": {
|
932 |
"@floating-ui/core": "^1.0.0",
|
933 |
"@floating-ui/utils": "^0.2.0"
|
|
|
2961 |
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ=="
|
2962 |
},
|
2963 |
"node_modules/@types/lodash": {
|
2964 |
+
"version": "4.17.1",
|
2965 |
+
"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.1.tgz",
|
2966 |
+
"integrity": "sha512-X+2qazGS3jxLAIz5JDXDzglAF3KpijdhFxlf/V1+hEsOUc+HnWi81L/uv/EvGuV90WY+7mPGFCUDGfQC3Gj95Q=="
|
2967 |
},
|
2968 |
"node_modules/@types/lodash.debounce": {
|
2969 |
"version": "4.0.9",
|
|
|
3743 |
}
|
3744 |
},
|
3745 |
"node_modules/caniuse-lite": {
|
3746 |
+
"version": "1.0.30001616",
|
3747 |
+
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001616.tgz",
|
3748 |
+
"integrity": "sha512-RHVYKov7IcdNjVHJFNY/78RdG4oGVjbayxv8u5IO74Wv7Hlq4PnJE6mo/OjFijjVFNy5ijnCt6H3IIo4t+wfEw==",
|
3749 |
"funding": [
|
3750 |
{
|
3751 |
"type": "opencollective",
|
|
|
6357 |
}
|
6358 |
},
|
6359 |
"node_modules/minipass": {
|
6360 |
+
"version": "7.1.0",
|
6361 |
+
"resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.0.tgz",
|
6362 |
+
"integrity": "sha512-oGZRv2OT1lO2UF1zUcwdTb3wqUwI0kBGTgt/T7OdSj6M6N5m3o5uPf0AIW6lVxGGoiWUR7e2AwTE+xiwK8WQig==",
|
6363 |
"engines": {
|
6364 |
"node": ">=16 || 14 >=14.17"
|
6365 |
}
|
package.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
},
|
12 |
"dependencies": {
|
13 |
"@aitube/clap": "0.0.10",
|
14 |
-
"@aitube/client": "0.0.
|
15 |
"@aitube/engine": "0.0.2",
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
|
|
11 |
},
|
12 |
"dependencies": {
|
13 |
"@aitube/clap": "0.0.10",
|
14 |
+
"@aitube/client": "0.0.15",
|
15 |
"@aitube/engine": "0.0.2",
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
src/app/api/generators/image/generateImageWithVideochain.ts
CHANGED
@@ -10,6 +10,7 @@ const apiKey = `${process.env.VIDEOCHAIN_API_KEY || ""}`
|
|
10 |
export async function newRender({
|
11 |
prompt,
|
12 |
negativePrompt,
|
|
|
13 |
nbFrames,
|
14 |
nbSteps,
|
15 |
width,
|
@@ -20,6 +21,7 @@ export async function newRender({
|
|
20 |
}: {
|
21 |
prompt: string
|
22 |
negativePrompt: string
|
|
|
23 |
nbFrames: number
|
24 |
nbSteps: number
|
25 |
width: number
|
@@ -61,6 +63,7 @@ export async function newRender({
|
|
61 |
body: JSON.stringify({
|
62 |
prompt,
|
63 |
negativePrompt,
|
|
|
64 |
// nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
|
65 |
nbFrames, // when nbFrames is 1, we will only generate static images
|
66 |
nbSteps, // 20 = fast, 30 = better, 50 = best
|
@@ -72,7 +75,7 @@ export async function newRender({
|
|
72 |
upscalingFactor: 1, // let's disable upscaling right now
|
73 |
turbo, // always use turbo mode (it's for images only anyway)
|
74 |
// also what could be done iw that we could use the width and height to control this
|
75 |
-
cache: shouldRenewCache ? "renew" : "use"
|
76 |
} as Partial<RenderRequest>),
|
77 |
cache: 'no-store',
|
78 |
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
|
|
10 |
export async function newRender({
|
11 |
prompt,
|
12 |
negativePrompt,
|
13 |
+
identityImage,
|
14 |
nbFrames,
|
15 |
nbSteps,
|
16 |
width,
|
|
|
21 |
}: {
|
22 |
prompt: string
|
23 |
negativePrompt: string
|
24 |
+
identityImage: string
|
25 |
nbFrames: number
|
26 |
nbSteps: number
|
27 |
width: number
|
|
|
63 |
body: JSON.stringify({
|
64 |
prompt,
|
65 |
negativePrompt,
|
66 |
+
identityImage,
|
67 |
// nbFrames: 8 and nbSteps: 15 --> ~10 sec generation
|
68 |
nbFrames, // when nbFrames is 1, we will only generate static images
|
69 |
nbSteps, // 20 = fast, 30 = better, 50 = best
|
|
|
75 |
upscalingFactor: 1, // let's disable upscaling right now
|
76 |
turbo, // always use turbo mode (it's for images only anyway)
|
77 |
// also what could be done iw that we could use the width and height to control this
|
78 |
+
cache: shouldRenewCache ? "renew" : "use",
|
79 |
} as Partial<RenderRequest>),
|
80 |
cache: 'no-store',
|
81 |
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
src/app/api/generators/search/unknownObjectToLatentSearchResults.ts
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import { generateSeed } from "@aitube/clap"
|
2 |
|
3 |
-
import {
|
4 |
import { parseStringArray } from "../../parsers/parseStringArray"
|
5 |
import { LatentSearchResult, LatentSearchResults } from "./types"
|
6 |
|
@@ -9,9 +9,9 @@ export function unknownObjectToLatentSearchResults(something: any): LatentSearch
|
|
9 |
|
10 |
if (Array.isArray(something)) {
|
11 |
results = something.map(thing => ({
|
12 |
-
label:
|
13 |
-
summary:
|
14 |
-
thumbnail:
|
15 |
tags: parseStringArray(thing && (thing?.tag)),
|
16 |
seed: generateSeed(), // a seed is necessary for consistency between search results and viewer
|
17 |
} as LatentSearchResult))
|
|
|
1 |
import { generateSeed } from "@aitube/clap"
|
2 |
|
3 |
+
import { parseTrimmedString } from "../../parsers/parseTrimmedString"
|
4 |
import { parseStringArray } from "../../parsers/parseStringArray"
|
5 |
import { LatentSearchResult, LatentSearchResults } from "./types"
|
6 |
|
|
|
9 |
|
10 |
if (Array.isArray(something)) {
|
11 |
results = something.map(thing => ({
|
12 |
+
label: parseTrimmedString(thing && (thing?.label || thing?.title)),
|
13 |
+
summary: parseTrimmedString(thing && (thing?.summary || thing?.description || thing?.synopsis)),
|
14 |
+
thumbnail: parseTrimmedString(thing && (thing?.thumbnail)),
|
15 |
tags: parseStringArray(thing && (thing?.tag)),
|
16 |
seed: generateSeed(), // a seed is necessary for consistency between search results and viewer
|
17 |
} as LatentSearchResult))
|
src/app/api/parsers/parseCompletionMode.ts
CHANGED
@@ -1,10 +1,25 @@
|
|
1 |
-
import { ClapCompletionMode } from "
|
2 |
|
3 |
-
export function parseCompletionMode(
|
|
|
|
|
|
|
4 |
let mode = defaultMode
|
|
|
5 |
try {
|
6 |
-
let maybeMode = decodeURIComponent(`${input || ""}`
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
} catch (err) {}
|
|
|
|
|
|
|
|
|
|
|
9 |
return mode
|
10 |
}
|
|
|
1 |
+
import { ClapCompletionMode } from "@aitube/client"
|
2 |
|
3 |
+
export function parseCompletionMode(
|
4 |
+
input?: any,
|
5 |
+
defaultMode: ClapCompletionMode = ClapCompletionMode.PARTIAL
|
6 |
+
): ClapCompletionMode {
|
7 |
let mode = defaultMode
|
8 |
+
|
9 |
try {
|
10 |
+
let maybeMode = decodeURIComponent(`${input || ""}`).trim()
|
11 |
+
|
12 |
+
if (!maybeMode) {
|
13 |
+
maybeMode = defaultMode
|
14 |
+
}
|
15 |
+
|
16 |
+
mode = maybeMode as ClapCompletionMode
|
17 |
+
|
18 |
} catch (err) {}
|
19 |
+
|
20 |
+
if (!Object.values(ClapCompletionMode).includes(mode)) {
|
21 |
+
throw new Error(`Invalid clap completion mode: "${mode}"`)
|
22 |
+
}
|
23 |
+
|
24 |
return mode
|
25 |
}
|
src/app/api/parsers/parseEntityPrompts.ts
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapEntityPrompt } from "@aitube/client"
|
2 |
+
import { decode } from "js-base64"
|
3 |
+
|
4 |
+
export function parseClapEntityPrompts(input?: any): ClapEntityPrompt[] {
|
5 |
+
let basicResult = JSON.parse(decode(`${input || ""}`))
|
6 |
+
if (Array.isArray(basicResult)) {
|
7 |
+
return basicResult as ClapEntityPrompt[]
|
8 |
+
} else {
|
9 |
+
return []
|
10 |
+
}
|
11 |
+
}
|
src/app/api/parsers/parseSupportedExportFormat.ts
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { defaultExportFormat, SupportedExportFormat } from "@aitube/client"
|
2 |
+
|
3 |
+
export function parseSupportedExportFormat(
|
4 |
+
input?: any,
|
5 |
+
defaultFormat: SupportedExportFormat = defaultExportFormat
|
6 |
+
): SupportedExportFormat {
|
7 |
+
|
8 |
+
let format: SupportedExportFormat = defaultFormat
|
9 |
+
try {
|
10 |
+
format = decodeURIComponent(`${input || ""}` || defaultFormat).trim() as SupportedExportFormat
|
11 |
+
if (format !== "mp4" && format !== "webm") {
|
12 |
+
format = defaultFormat
|
13 |
+
}
|
14 |
+
} catch (err) {}
|
15 |
+
return format
|
16 |
+
}
|
src/app/api/parsers/{parseString.ts → parseTrimmedString.ts}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
export function
|
2 |
let result: string = ""
|
3 |
if (typeof something === "string") {
|
4 |
result = `${something}`.trim()
|
|
|
1 |
+
export function parseTrimmedString(something: any): string {
|
2 |
let result: string = ""
|
3 |
if (typeof something === "string") {
|
4 |
result = `${something}`.trim()
|
src/app/api/v1/auth/config.ts
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { createSecretKey } from "node:crypto"
|
2 |
+
|
3 |
+
export const secretKey = createSecretKey(`${process.env.API_SECRET_JWT_KEY || ""}`, 'utf-8')
|
4 |
+
export const issuer = `${process.env.API_SECRET_JWT_ISSUER || ""}`
|
5 |
+
export const audience = `${process.env.API_SECRET_JWT_AUDIENCE || ""}`
|
src/app/api/{auth → v1/auth}/getToken.ts
RENAMED
@@ -1,20 +1,20 @@
|
|
1 |
-
import { createSecretKey } from "crypto"
|
2 |
import { SignJWT } from "jose"
|
3 |
|
|
|
|
|
4 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
5 |
|
6 |
export async function getToken(data: Record<string, any> = {}): Promise<string> {
|
7 |
-
const secretKey = createSecretKey(`${process.env.API_SECRET_JWT_KEY || ""}`, 'utf-8');
|
8 |
|
9 |
const jwtToken = await new SignJWT(data)
|
10 |
.setProtectedHeader({
|
11 |
alg: 'HS256'
|
12 |
}) // algorithm
|
13 |
.setIssuedAt()
|
14 |
-
.setIssuer(
|
15 |
-
.setAudience(
|
16 |
.setExpirationTime("1 day") // token expiration time - to prevent hackers from re-using our URLs more than a day
|
17 |
-
.sign(secretKey)
|
18 |
|
19 |
return jwtToken
|
20 |
}
|
|
|
|
|
1 |
import { SignJWT } from "jose"
|
2 |
|
3 |
+
import { secretKey, issuer, audience } from "./config"
|
4 |
+
|
5 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
6 |
|
7 |
export async function getToken(data: Record<string, any> = {}): Promise<string> {
|
|
|
8 |
|
9 |
const jwtToken = await new SignJWT(data)
|
10 |
.setProtectedHeader({
|
11 |
alg: 'HS256'
|
12 |
}) // algorithm
|
13 |
.setIssuedAt()
|
14 |
+
.setIssuer(issuer) // issuer
|
15 |
+
.setAudience(audience) // audience
|
16 |
.setExpirationTime("1 day") // token expiration time - to prevent hackers from re-using our URLs more than a day
|
17 |
+
.sign(secretKey) // secretKey generated from previous step
|
18 |
|
19 |
return jwtToken
|
20 |
}
|
src/app/api/v1/auth/parseToken.ts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function parseToken(input?: any): string {
|
2 |
+
try {
|
3 |
+
return (decodeURIComponent(`${input || ""}`).split("Bearer").pop() || "").trim()
|
4 |
+
} catch (err) {
|
5 |
+
return ""
|
6 |
+
}
|
7 |
+
}
|
src/app/api/v1/auth/throwIfInvalidToken.ts
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { jwtVerify } from "jose"
|
2 |
+
|
3 |
+
import { secretKey } from "./config"
|
4 |
+
import { parseToken } from "./parseToken"
|
5 |
+
|
6 |
+
export async function throwIfInvalidToken(input?: any): Promise<boolean> {
|
7 |
+
|
8 |
+
// note: this performs a decodeURI, but I'm not sure we need to
|
9 |
+
const token = parseToken(input)
|
10 |
+
|
11 |
+
// verify token
|
12 |
+
const { payload, protectedHeader } = await jwtVerify(token, secretKey, {
|
13 |
+
issuer: `${process.env.API_SECRET_JWT_ISSUER || ""}`, // issuer
|
14 |
+
audience: `${process.env.API_SECRET_JWT_AUDIENCE || ""}`, // audience
|
15 |
+
})
|
16 |
+
|
17 |
+
// log values to console
|
18 |
+
console.log(payload)
|
19 |
+
console.log(protectedHeader)
|
20 |
+
|
21 |
+
return true
|
22 |
+
}
|
src/app/api/v1/create/index.ts
CHANGED
@@ -2,12 +2,12 @@
|
|
2 |
|
3 |
import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
|
4 |
|
|
|
5 |
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
6 |
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
|
7 |
-
import {
|
8 |
|
9 |
import { systemPrompt } from "./systemPrompt"
|
10 |
-
import { LatentStory } from "./types"
|
11 |
|
12 |
// a helper to generate Clap stories from a few sentences
|
13 |
// this is mostly used by external apps such as the Stories Factory
|
@@ -20,7 +20,6 @@ export async function create(request: {
|
|
20 |
width: 1024,
|
21 |
height: 576,
|
22 |
}): Promise<ClapProject> {
|
23 |
-
|
24 |
const prompt = `${request?.prompt || ""}`.trim()
|
25 |
|
26 |
console.log("api/v1/create(): request:", request)
|
@@ -30,7 +29,9 @@ export async function create(request: {
|
|
30 |
const width = getValidNumber(request?.width, 256, 8192, 1024)
|
31 |
const height = getValidNumber(request?.height, 256, 8192, 576)
|
32 |
|
33 |
-
const userPrompt = `
|
|
|
|
|
34 |
|
35 |
const prefix = "```yaml\n"
|
36 |
const nbMaxNewTokens = 1400
|
@@ -70,12 +71,15 @@ export async function create(request: {
|
|
70 |
maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
|
71 |
if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
|
72 |
console.log(`api/v1/create(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
|
73 |
-
}
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
shots = maybeShots
|
|
|
|
|
78 |
}
|
|
|
79 |
console.log(`api/v1/create(): generated ${shots.length} shots`)
|
80 |
|
81 |
// this is approximate - TTS generation will determine the final duration of each shot
|
@@ -88,8 +92,8 @@ export async function create(request: {
|
|
88 |
title: "Not needed", // we don't need a title actually
|
89 |
description: "This video has been generated using AI",
|
90 |
synopsis: "",
|
91 |
-
licence: "
|
92 |
-
orientation: "
|
93 |
width,
|
94 |
height,
|
95 |
isInteractive: false,
|
@@ -160,7 +164,7 @@ export async function create(request: {
|
|
160 |
startTimeInMs: currentElapsedTimeInMs,
|
161 |
assetDurationInMs: defaultSegmentDurationInMs,
|
162 |
category: "camera",
|
163 |
-
prompt: "
|
164 |
outputType: "text"
|
165 |
}))
|
166 |
|
|
|
2 |
|
3 |
import { ClapProject, getValidNumber, newClap, newSegment } from "@aitube/clap"
|
4 |
|
5 |
+
import { sleep } from "@/lib/utils/sleep"
|
6 |
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
7 |
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
|
8 |
+
import { LatentStory } from "@/app/api/v1/types"
|
9 |
|
10 |
import { systemPrompt } from "./systemPrompt"
|
|
|
11 |
|
12 |
// a helper to generate Clap stories from a few sentences
|
13 |
// this is mostly used by external apps such as the Stories Factory
|
|
|
20 |
width: 1024,
|
21 |
height: 576,
|
22 |
}): Promise<ClapProject> {
|
|
|
23 |
const prompt = `${request?.prompt || ""}`.trim()
|
24 |
|
25 |
console.log("api/v1/create(): request:", request)
|
|
|
29 |
const width = getValidNumber(request?.width, 256, 8192, 1024)
|
30 |
const height = getValidNumber(request?.height, 256, 8192, 576)
|
31 |
|
32 |
+
const userPrompt = `Movie story to generate: ${prompt}
|
33 |
+
|
34 |
+
Output: `
|
35 |
|
36 |
const prefix = "```yaml\n"
|
37 |
const nbMaxNewTokens = 1400
|
|
|
71 |
maybeShots = parseRawStringToYAML<LatentStory[]>(rawString, [])
|
72 |
if (!Array.isArray(maybeShots) || maybeShots.length === 0) {
|
73 |
console.log(`api/v1/create(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
|
74 |
+
}
|
75 |
+
}
|
76 |
+
|
77 |
+
if (maybeShots.length) {
|
78 |
shots = maybeShots
|
79 |
+
} else {
|
80 |
+
throw new Error(`Hugging Face Inference API failure (the model failed to generate the shots)`)
|
81 |
}
|
82 |
+
|
83 |
console.log(`api/v1/create(): generated ${shots.length} shots`)
|
84 |
|
85 |
// this is approximate - TTS generation will determine the final duration of each shot
|
|
|
92 |
title: "Not needed", // we don't need a title actually
|
93 |
description: "This video has been generated using AI",
|
94 |
synopsis: "",
|
95 |
+
licence: "",
|
96 |
+
orientation: width > height ? "landscape" : height > width ? "portrait" : "square",
|
97 |
width,
|
98 |
height,
|
99 |
isInteractive: false,
|
|
|
164 |
startTimeInMs: currentElapsedTimeInMs,
|
165 |
assetDurationInMs: defaultSegmentDurationInMs,
|
166 |
category: "camera",
|
167 |
+
prompt: "video",
|
168 |
outputType: "text"
|
169 |
}))
|
170 |
|
src/app/api/v1/create/route.ts
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import { getValidNumber, serializeClap } from "@aitube/clap"
|
3 |
|
|
|
|
|
4 |
import { create } from "."
|
5 |
|
6 |
// a helper to generate Clap stories from a few sentences
|
7 |
// this is mostly used by external apps such as the Stories Factory
|
8 |
export async function POST(req: NextRequest) {
|
|
|
9 |
|
10 |
const request = await req.json() as {
|
11 |
prompt: string
|
@@ -17,9 +20,9 @@ export async function POST(req: NextRequest) {
|
|
17 |
console.log("[api/v1/create] request:", request)
|
18 |
|
19 |
const clap = await create({
|
20 |
-
prompt:
|
21 |
width: getValidNumber(request?.width, 256, 8192, 1024),
|
22 |
-
height:
|
23 |
})
|
24 |
|
25 |
// TODO replace by Clap file streaming
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import { getValidNumber, serializeClap } from "@aitube/clap"
|
3 |
|
4 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
5 |
+
|
6 |
import { create } from "."
|
7 |
|
8 |
// a helper to generate Clap stories from a few sentences
|
9 |
// this is mostly used by external apps such as the Stories Factory
|
10 |
export async function POST(req: NextRequest) {
|
11 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
12 |
|
13 |
const request = await req.json() as {
|
14 |
prompt: string
|
|
|
20 |
console.log("[api/v1/create] request:", request)
|
21 |
|
22 |
const clap = await create({
|
23 |
+
prompt: `${request?.prompt || ""}`.trim(),
|
24 |
width: getValidNumber(request?.width, 256, 8192, 1024),
|
25 |
+
height: getValidNumber(request?.height, 256, 8192, 576)
|
26 |
})
|
27 |
|
28 |
// TODO replace by Clap file streaming
|
src/app/api/v1/create/systemPrompt.ts
CHANGED
@@ -1,23 +1,29 @@
|
|
1 |
export const systemPrompt: string =
|
2 |
`# Context
|
3 |
You are a server-side function generating stories from a single synopsis/brief (a "prompt").
|
4 |
-
The
|
5 |
-
|
6 |
-
Each video is composed of a sequence of static panels (a dozen in average),
|
7 |
-
with a voice over and text.
|
8 |
|
9 |
# Task
|
10 |
-
Your mission is to generate a sequence of
|
11 |
|
12 |
You will be provided a "prompt" (for the story) and max number of images
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
- one title (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
|
15 |
-
- one image (you must describe it using a Stable Diffusion prompt - about ~300
|
16 |
- one voice over (should be short too, about 10 to 15 words)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
# Examples
|
19 |
|
20 |
-
You most reply by writing/completing a YAML list of objects.
|
21 |
Here is a short example, the prompt was "a cute puppy who misbehaves in the kitchen, in 3 parts 🐶"
|
22 |
Note how we asked for "3 parts". Sometimes the user will talk about steps, slides etc instead (that's fine, it means the same thing),
|
23 |
or the user might omit to give the number (that's fine too, you can use 5 by default),
|
@@ -34,6 +40,4 @@ but if the user asks for large numbers, it should be ignored (our limit is 32).
|
|
34 |
image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
|
35 |
voice: "Now my dog is eating my birtday cake. Please send help."
|
36 |
\`\`\
|
37 |
-
|
38 |
-
# Your turn:
|
39 |
-
`
|
|
|
1 |
export const systemPrompt: string =
|
2 |
`# Context
|
3 |
You are a server-side function generating stories from a single synopsis/brief (a "prompt").
|
4 |
+
The videos are meant to be shared on social media platform (Instagram, TikTok, Snapchat, Twitter, YouTube Shorts etc).
|
5 |
+
Each video is composed of a sequence of shots (a dozen in average), with a voice over and text.
|
|
|
|
|
6 |
|
7 |
# Task
|
8 |
+
Your mission is to generate a sequence of shots that will form the final video.
|
9 |
|
10 |
You will be provided a "prompt" (for the story) and max number of images
|
11 |
+
|
12 |
+
# Output schema
|
13 |
+
|
14 |
+
Each shot is composed of:
|
15 |
+
|
16 |
- one title (which will be displayed as an overlay over the video, so keep it short eg. max 10/12 words),
|
17 |
+
- one image (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
|
18 |
- one voice over (should be short too, about 10 to 15 words)
|
19 |
|
20 |
+
# Important
|
21 |
+
|
22 |
+
You MUST reply by writing/completing a YAML list of objects.
|
23 |
+
Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
|
24 |
+
|
25 |
# Examples
|
26 |
|
|
|
27 |
Here is a short example, the prompt was "a cute puppy who misbehaves in the kitchen, in 3 parts 🐶"
|
28 |
Note how we asked for "3 parts". Sometimes the user will talk about steps, slides etc instead (that's fine, it means the same thing),
|
29 |
or the user might omit to give the number (that's fine too, you can use 5 by default),
|
|
|
40 |
image: "medium-shot of a puppy eating a cake, on the kitchen table, birthday cake, eating, cute, instagram, funny, messy, vertical photo"
|
41 |
voice: "Now my dog is eating my birtday cake. Please send help."
|
42 |
\`\`\
|
43 |
+
`
|
|
|
|
src/app/api/v1/create/types.ts
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
|
2 |
-
export type LatentStory = {
|
3 |
-
title: string
|
4 |
-
image: string
|
5 |
-
voice: string
|
6 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app/api/v1/edit/dialogues/processShot.ts
CHANGED
@@ -1,12 +1,17 @@
|
|
1 |
|
2 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
|
4 |
|
5 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
6 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
7 |
|
8 |
-
import { ClapCompletionMode } from "../types"
|
9 |
-
|
10 |
export async function processShot({
|
11 |
shotSegment,
|
12 |
existingClap,
|
@@ -70,7 +75,7 @@ export async function processShot({
|
|
70 |
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
71 |
|
72 |
// if it's partial, we need to manually add it
|
73 |
-
if (mode
|
74 |
newerClap.segments.push(shotDialogueSegment)
|
75 |
}
|
76 |
} else {
|
|
|
1 |
|
2 |
+
import {
|
3 |
+
ClapProject,
|
4 |
+
ClapSegment,
|
5 |
+
getClapAssetSourceType,
|
6 |
+
filterSegments,
|
7 |
+
ClapSegmentFilteringMode
|
8 |
+
} from "@aitube/clap"
|
9 |
+
import { ClapCompletionMode } from "@aitube/client"
|
10 |
import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
|
11 |
|
12 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
13 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
14 |
|
|
|
|
|
15 |
export async function processShot({
|
16 |
shotSegment,
|
17 |
existingClap,
|
|
|
75 |
console.log(`[api/edit/dialogues] processShot: generated dialogue audio: ${shotDialogueSegment?.assetUrl?.slice?.(0, 50)}...`)
|
76 |
|
77 |
// if it's partial, we need to manually add it
|
78 |
+
if (mode !== ClapCompletionMode.FULL) {
|
79 |
newerClap.segments.push(shotDialogueSegment)
|
80 |
}
|
81 |
} else {
|
src/app/api/v1/edit/dialogues/route.ts
CHANGED
@@ -2,16 +2,16 @@ import { NextResponse, NextRequest } from "next/server"
|
|
2 |
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
-
import { getToken } from "@/app/api/auth/getToken"
|
6 |
|
7 |
import { processShot } from "./processShot"
|
8 |
import queryString from "query-string"
|
9 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
|
|
|
|
10 |
|
11 |
// a helper to generate speech for a Clap
|
12 |
export async function POST(req: NextRequest) {
|
13 |
-
|
14 |
-
const jwtToken = await getToken({ user: "anonymous" })
|
15 |
|
16 |
const qs = queryString.parseUrl(req.url || "")
|
17 |
const query = (qs || {}).query
|
@@ -33,7 +33,7 @@ export async function POST(req: NextRequest) {
|
|
33 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
34 |
}
|
35 |
|
36 |
-
const newerClap = mode ===
|
37 |
|
38 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
39 |
await Promise.all(shotsSegments.map(shotSegment =>
|
|
|
2 |
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
|
|
5 |
|
6 |
import { processShot } from "./processShot"
|
7 |
import queryString from "query-string"
|
8 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
9 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
10 |
+
import { ClapCompletionMode } from "@aitube/client"
|
11 |
|
12 |
// a helper to generate speech for a Clap
|
13 |
export async function POST(req: NextRequest) {
|
14 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
|
|
15 |
|
16 |
const qs = queryString.parseUrl(req.url || "")
|
17 |
const query = (qs || {}).query
|
|
|
33 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
34 |
}
|
35 |
|
36 |
+
const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
|
37 |
|
38 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
39 |
await Promise.all(shotsSegments.map(shotSegment =>
|
src/app/api/v1/edit/entities/clapToLatentStory.ts
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapProject, ClapSegmentFilteringMode, filterSegments } from "@aitube/clap"
|
2 |
+
|
3 |
+
import { LatentStory } from "@/app/api/v1/types"
|
4 |
+
|
5 |
+
/**
|
6 |
+
* Extract the latent story from a ClapProject
|
7 |
+
*
|
8 |
+
* This is useful to pass a simplified representation of a story to a LLM
|
9 |
+
*
|
10 |
+
* @param clap
|
11 |
+
* @returns
|
12 |
+
*/
|
13 |
+
export async function clapToLatentStory(clap: ClapProject): Promise<LatentStory[]> {
|
14 |
+
const shots = clap.segments.filter(s => s.category === "camera")
|
15 |
+
|
16 |
+
const latentStories: LatentStory[] = []
|
17 |
+
|
18 |
+
for (const shot of shots) {
|
19 |
+
const image = filterSegments(
|
20 |
+
ClapSegmentFilteringMode.START,
|
21 |
+
shot,
|
22 |
+
clap.segments,
|
23 |
+
"storyboard"
|
24 |
+
).at(0)
|
25 |
+
|
26 |
+
const title = filterSegments(
|
27 |
+
ClapSegmentFilteringMode.START,
|
28 |
+
shot,
|
29 |
+
clap.segments,
|
30 |
+
"interface"
|
31 |
+
).at(0)
|
32 |
+
|
33 |
+
const voice = filterSegments(
|
34 |
+
ClapSegmentFilteringMode.START,
|
35 |
+
shot,
|
36 |
+
clap.segments,
|
37 |
+
"dialogue"
|
38 |
+
).at(0)
|
39 |
+
|
40 |
+
const latentStory: LatentStory = {
|
41 |
+
title: title.prompt,
|
42 |
+
image: image.prompt,
|
43 |
+
voice: voice.prompt,
|
44 |
+
}
|
45 |
+
|
46 |
+
latentStories.push(latentStory)
|
47 |
+
}
|
48 |
+
|
49 |
+
return latentStories
|
50 |
+
}
|
src/app/api/v1/edit/entities/generateEntityPrompts.ts
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use server"
|
2 |
+
|
3 |
+
import YAML from "yaml"
|
4 |
+
import { generateSeed } from "@aitube/clap"
|
5 |
+
import { ClapEntityPrompt } from "@aitube/client"
|
6 |
+
|
7 |
+
import { sleep } from "@/lib/utils/sleep"
|
8 |
+
import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
|
9 |
+
import { parseRawStringToYAML } from "@/app/api/parsers/parseRawStringToYAML"
|
10 |
+
import { LatentEntity, LatentStory } from "@/app/api/v1/types"
|
11 |
+
|
12 |
+
import { systemPrompt } from "./systemPrompt"
|
13 |
+
import { generateImageID } from "./generateImageID"
|
14 |
+
|
15 |
+
export type EntityPromptResult = {
|
16 |
+
entityPrompt: ClapEntityPrompt
|
17 |
+
shots: number[]
|
18 |
+
}
|
19 |
+
|
20 |
+
// a helper to generate Clap stories from a few sentences
|
21 |
+
// this is mostly used by external apps such as the Stories Factory
|
22 |
+
export async function generateEntityPrompts({
|
23 |
+
prompt = "",
|
24 |
+
latentStory = []
|
25 |
+
}: {
|
26 |
+
prompt?: string
|
27 |
+
latentStory?: LatentStory[]
|
28 |
+
} = {
|
29 |
+
prompt: "",
|
30 |
+
latentStory: []
|
31 |
+
}): Promise<EntityPromptResult[]> {
|
32 |
+
|
33 |
+
if (!prompt.length) { throw new Error(`please provide a prompt`) }
|
34 |
+
console.log("generateEntityPrompts(): prompt:", prompt)
|
35 |
+
|
36 |
+
|
37 |
+
if (!latentStory.length) { throw new Error(`please provide a story`) }
|
38 |
+
|
39 |
+
console.log("generateEntityPrompts(): latentStory:", latentStory)
|
40 |
+
|
41 |
+
const userPrompt = `The input story is about: ${prompt}.
|
42 |
+
|
43 |
+
The input story timeline is:
|
44 |
+
\`\`\`yaml
|
45 |
+
${YAML.stringify(
|
46 |
+
// we need to help the LLM by marking the shots with a simple numeric ID
|
47 |
+
latentStory.map((shot, i) => ({
|
48 |
+
shot: i,
|
49 |
+
...shot,
|
50 |
+
}))
|
51 |
+
)}
|
52 |
+
\`\`\`
|
53 |
+
|
54 |
+
Now please generate the output entities:`
|
55 |
+
|
56 |
+
const prefix = "```yaml\n"
|
57 |
+
const nbMaxNewTokens = 1400
|
58 |
+
|
59 |
+
// TODO use streaming for the Hugging Face prediction
|
60 |
+
//
|
61 |
+
// note that a Clap file is actually a YAML stream of documents
|
62 |
+
// so technically we could stream everything from end-to-end
|
63 |
+
// (but I haven't coded the helpers to do this yet)
|
64 |
+
let rawString = await predict({
|
65 |
+
systemPrompt,
|
66 |
+
userPrompt,
|
67 |
+
nbMaxNewTokens,
|
68 |
+
prefix,
|
69 |
+
})
|
70 |
+
|
71 |
+
console.log("generateEntityPrompts(): rawString: ", rawString)
|
72 |
+
|
73 |
+
let results: EntityPromptResult[] = []
|
74 |
+
|
75 |
+
let maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
|
76 |
+
|
77 |
+
if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
|
78 |
+
console.log(`generateEntityPrompts(): failed to generate entities.. trying again`)
|
79 |
+
|
80 |
+
await sleep(2000)
|
81 |
+
|
82 |
+
rawString = await predict({
|
83 |
+
systemPrompt,
|
84 |
+
userPrompt: userPrompt + ".", // we trick the Hugging Face cache
|
85 |
+
nbMaxNewTokens,
|
86 |
+
prefix,
|
87 |
+
})
|
88 |
+
|
89 |
+
console.log("generateEntityPrompts(): rawString: ", rawString)
|
90 |
+
|
91 |
+
maybeEntities = parseRawStringToYAML<LatentEntity[]>(rawString, [])
|
92 |
+
if (!Array.isArray(maybeEntities) || maybeEntities.length === 0) {
|
93 |
+
console.log(`generateEntityPrompts(): failed to generate shots for the second time, which indicates an issue with the Hugging Face API`)
|
94 |
+
}
|
95 |
+
}
|
96 |
+
|
97 |
+
if (maybeEntities.length) {
|
98 |
+
results = await Promise.all(maybeEntities.map(async ({
|
99 |
+
name,
|
100 |
+
category,
|
101 |
+
image,
|
102 |
+
audio,
|
103 |
+
shots,
|
104 |
+
}) => {
|
105 |
+
|
106 |
+
const entityPrompt: ClapEntityPrompt = {
|
107 |
+
name,
|
108 |
+
category,
|
109 |
+
age: "",
|
110 |
+
variant: image,
|
111 |
+
region: "",
|
112 |
+
identityImage: await generateImageID({
|
113 |
+
prompt: image,
|
114 |
+
seed: generateSeed()
|
115 |
+
}),
|
116 |
+
|
117 |
+
// TODO later
|
118 |
+
identityVoice: "" // await generateAudioID({ prompt: e.audio, seed: generateSeed() })
|
119 |
+
}
|
120 |
+
|
121 |
+
const result: EntityPromptResult = {
|
122 |
+
entityPrompt,
|
123 |
+
shots
|
124 |
+
}
|
125 |
+
|
126 |
+
return result
|
127 |
+
}))
|
128 |
+
} else {
|
129 |
+
throw new Error(`Hugging Face Inference API failure (the model failed to generate the entities)`)
|
130 |
+
}
|
131 |
+
|
132 |
+
console.log(`generateEntityPrompts(): generated ${results.length} entities with their images and voice ids`)
|
133 |
+
|
134 |
+
return results
|
135 |
+
}
|
src/app/api/v1/edit/entities/generateImageID.ts
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
import { generateSeed } from "@aitube/clap"
|
3 |
|
4 |
import { sleep } from "@/lib/utils/sleep"
|
5 |
-
import { getValidNumber } from "@/lib/utils/getValidNumber"
|
6 |
|
7 |
import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
|
8 |
import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
|
|
2 |
import { generateSeed } from "@aitube/clap"
|
3 |
|
4 |
import { sleep } from "@/lib/utils/sleep"
|
|
|
5 |
|
6 |
import { newRender, getRender } from "@/app/api/providers/videochain/renderWithVideoChain"
|
7 |
import { getNegativePrompt, getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
src/app/api/v1/edit/entities/index.ts
CHANGED
@@ -1,23 +1,113 @@
|
|
1 |
|
2 |
-
import { ClapProject, getClapAssetSourceType,
|
|
|
3 |
|
4 |
import { generateImageID } from "./generateImageID"
|
5 |
import { generateAudioID } from "./generateAudioID"
|
6 |
-
|
7 |
-
import {
|
8 |
|
9 |
export async function editEntities({
|
10 |
existingClap,
|
11 |
newerClap,
|
12 |
-
|
|
|
13 |
}: {
|
14 |
existingClap: ClapProject
|
15 |
newerClap: ClapProject
|
16 |
-
|
|
|
17 |
}) {
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
|
20 |
|
|
|
|
|
21 |
for (const entity of existingClap.entities) {
|
22 |
|
23 |
let entityHasBeenModified = false
|
@@ -57,13 +147,13 @@ export async function editEntities({
|
|
57 |
}
|
58 |
|
59 |
// in case we are doing a partial update
|
60 |
-
if (mode
|
61 |
newerClap.entities.push(entity)
|
62 |
newerClap.entityIndex[entity.id] = entity
|
63 |
}
|
64 |
}
|
65 |
|
66 |
-
console.log(`
|
67 |
|
68 |
return newerClap
|
69 |
}
|
|
|
1 |
|
2 |
+
import { ClapProject, getClapAssetSourceType, getValidNumber, newEntity } from "@aitube/clap"
|
3 |
+
import { ClapCompletionMode, ClapEntityPrompt } from "@aitube/client"
|
4 |
|
5 |
import { generateImageID } from "./generateImageID"
|
6 |
import { generateAudioID } from "./generateAudioID"
|
7 |
+
import { generateEntityPrompts } from "./generateEntityPrompts"
|
8 |
+
import { clapToLatentStory } from "./clapToLatentStory"
|
9 |
|
10 |
export async function editEntities({
|
11 |
existingClap,
|
12 |
newerClap,
|
13 |
+
entityPrompts = [],
|
14 |
+
mode = ClapCompletionMode.PARTIAL
|
15 |
}: {
|
16 |
existingClap: ClapProject
|
17 |
newerClap: ClapProject
|
18 |
+
entityPrompts?: ClapEntityPrompt[]
|
19 |
+
mode?: ClapCompletionMode
|
20 |
}) {
|
21 |
|
22 |
+
// note that we can only handle either FULL or PARTIAL
|
23 |
+
// other modes such as MERGE, REPLACE.. are irrelevant since those are client-side modes
|
24 |
+
// so from a server point of view those correspond to PARTIAL
|
25 |
+
//
|
26 |
+
// it is also worth noting that the use of FULL should be discouraged
|
27 |
+
const isFull = mode === ClapCompletionMode.FULL
|
28 |
+
const isPartial = !isFull
|
29 |
+
|
30 |
+
// if we don't have existing entities, and user passed none,
|
31 |
+
// then we need to hallucinate them
|
32 |
+
if (existingClap.entities.length === 0 && entityPrompts.length === 0) {
|
33 |
+
const entityPromptsWithShots = await generateEntityPrompts({
|
34 |
+
prompt: existingClap.meta.description,
|
35 |
+
latentStory: await clapToLatentStory(existingClap)
|
36 |
+
})
|
37 |
+
|
38 |
+
for (const {
|
39 |
+
entityPrompt: { name, category, age, variant, region, identityImage, identityVoice },
|
40 |
+
shots
|
41 |
+
} of entityPromptsWithShots) {
|
42 |
+
const newEnt = newEntity({
|
43 |
+
category,
|
44 |
+
triggerName: name,
|
45 |
+
label: name,
|
46 |
+
description: name,
|
47 |
+
author: "auto",
|
48 |
+
thumbnailUrl: "",
|
49 |
+
|
50 |
+
imagePrompt: "",
|
51 |
+
imageSourceType: getClapAssetSourceType(identityImage),
|
52 |
+
imageEngine: "SDXL Lightning",
|
53 |
+
imageId: identityImage,
|
54 |
+
audioPrompt: "",
|
55 |
+
audioSourceType: getClapAssetSourceType(identityVoice),
|
56 |
+
audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
|
57 |
+
audioId: identityVoice,
|
58 |
+
|
59 |
+
// note: using a numeric age should be deprecated,
|
60 |
+
// instead we should be able to specify things using text,
|
61 |
+
// eg. "8 months", "25 years old", "12th century"
|
62 |
+
age: getValidNumber(age, 0, 120, 25),
|
63 |
+
|
64 |
+
// TODO: delete gender and appearance, replace by a single concept of "variant"
|
65 |
+
gender: "",
|
66 |
+
appearance: variant,
|
67 |
+
region: region,
|
68 |
+
})
|
69 |
+
|
70 |
+
existingClap.entities.push(newEnt)
|
71 |
+
}
|
72 |
+
}
|
73 |
+
|
74 |
+
// otherwise try to add what's new
|
75 |
+
for (const { name, category, age, variant, region, identityImage, identityVoice } of entityPrompts) {
|
76 |
+
const newEnt = newEntity({
|
77 |
+
category,
|
78 |
+
triggerName: name,
|
79 |
+
label: name,
|
80 |
+
description: name,
|
81 |
+
author: "auto",
|
82 |
+
thumbnailUrl: "",
|
83 |
+
|
84 |
+
imagePrompt: "",
|
85 |
+
imageSourceType: getClapAssetSourceType(identityImage),
|
86 |
+
imageEngine: "SDXL Lightning",
|
87 |
+
imageId: identityImage,
|
88 |
+
audioPrompt: "",
|
89 |
+
audioSourceType: getClapAssetSourceType(identityVoice),
|
90 |
+
audioEngine: "Parler-TTS", // <- TODO: use OpenVoice 2, that way it can be personalized
|
91 |
+
audioId: identityVoice,
|
92 |
+
|
93 |
+
// note: using a numeric age should be deprecated,
|
94 |
+
// instead we should be able to specify things using text,
|
95 |
+
// eg. "8 months", "25 years old", "12th century"
|
96 |
+
age: getValidNumber(age, 0, 120, 25),
|
97 |
+
|
98 |
+
// TODO: delete gender and appearance, replace by a single concept of "variant"
|
99 |
+
gender: "",
|
100 |
+
appearance: variant,
|
101 |
+
region: region,
|
102 |
+
})
|
103 |
+
|
104 |
+
existingClap.entities.push(newEnt)
|
105 |
+
}
|
106 |
+
|
107 |
if (!existingClap.entities.length) { throw new Error(`please provide at least one entity`) }
|
108 |
|
109 |
+
// then we try to automatically repair, edit, complete.. all the existing entities
|
110 |
+
|
111 |
for (const entity of existingClap.entities) {
|
112 |
|
113 |
let entityHasBeenModified = false
|
|
|
147 |
}
|
148 |
|
149 |
// in case we are doing a partial update
|
150 |
+
if (mode !== ClapCompletionMode.FULL && entityHasBeenModified && !newerClap.entityIndex[entity.id]) {
|
151 |
newerClap.entities.push(entity)
|
152 |
newerClap.entityIndex[entity.id] = entity
|
153 |
}
|
154 |
}
|
155 |
|
156 |
+
console.log(`api/edit/entities(): returning the newerClap`)
|
157 |
|
158 |
return newerClap
|
159 |
}
|
src/app/api/v1/edit/entities/route.ts
CHANGED
@@ -2,12 +2,15 @@ import { NextResponse, NextRequest } from "next/server"
|
|
2 |
import queryString from "query-string"
|
3 |
import { newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
-
import { getToken } from "@/app/api/auth/getToken"
|
6 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
|
|
|
|
7 |
|
8 |
import { editEntities } from "."
|
|
|
9 |
|
10 |
export async function POST(req: NextRequest) {
|
|
|
11 |
|
12 |
const qs = queryString.parseUrl(req.url || "")
|
13 |
const query = (qs || {}).query
|
@@ -15,17 +18,18 @@ export async function POST(req: NextRequest) {
|
|
15 |
const mode = parseCompletionMode(query?.c)
|
16 |
// const prompt = parsePrompt(query?.p)
|
17 |
|
18 |
-
const
|
19 |
|
20 |
const blob = await req.blob()
|
21 |
|
22 |
const existingClap = await parseClap(blob)
|
23 |
|
24 |
-
const newerClap = mode ===
|
25 |
|
26 |
await editEntities({
|
27 |
existingClap,
|
28 |
newerClap,
|
|
|
29 |
mode
|
30 |
})
|
31 |
|
|
|
2 |
import queryString from "query-string"
|
3 |
import { newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
|
|
5 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
6 |
+
import { parseClapEntityPrompts } from "@/app/api/parsers/parseEntityPrompts"
|
7 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
8 |
|
9 |
import { editEntities } from "."
|
10 |
+
import { ClapCompletionMode } from "@aitube/client"
|
11 |
|
12 |
export async function POST(req: NextRequest) {
|
13 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
14 |
|
15 |
const qs = queryString.parseUrl(req.url || "")
|
16 |
const query = (qs || {}).query
|
|
|
18 |
const mode = parseCompletionMode(query?.c)
|
19 |
// const prompt = parsePrompt(query?.p)
|
20 |
|
21 |
+
const entityPrompts = parseClapEntityPrompts(query?.e)
|
22 |
|
23 |
const blob = await req.blob()
|
24 |
|
25 |
const existingClap = await parseClap(blob)
|
26 |
|
27 |
+
const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
|
28 |
|
29 |
await editEntities({
|
30 |
existingClap,
|
31 |
newerClap,
|
32 |
+
entityPrompts,
|
33 |
mode
|
34 |
})
|
35 |
|
src/app/api/v1/edit/entities/systemPrompt.ts
CHANGED
@@ -1,3 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
|
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export const systemPrompt: string =
|
2 |
+
`# Context
|
3 |
+
You are a server-side function generating stories from a single synopsis/brief (a "prompt").
|
4 |
+
The video are meant to be shared on social media platform (Instagram, TikTok, Snapchat, Twitter, YouTube Shorts etc).
|
5 |
+
Each video is composed of a sequence of shots (a dozen in average), with a voice over and text.
|
6 |
|
7 |
+
# Task
|
8 |
+
You mission is to generate a list of entities/assets (characters, locations etc) associated with each shot.
|
9 |
|
10 |
+
# Important
|
11 |
+
|
12 |
+
- You MUST reply by writing/completing a YAML list of objects.
|
13 |
+
- Copy the structure of the examples, but not their content: come up with your own original ideal, you should be creativeç
|
14 |
+
|
15 |
+
# Output schema:
|
16 |
+
|
17 |
+
name: name of the entity
|
18 |
+
category: can be "character" or "location"
|
19 |
+
image: a description of the entity (you must describe it using a Stable Diffusion prompt - about ~300 chars - using simple descriptive words and adjectives. Describe facts about characters, location, lights, texture, camera orientation, colors, clothes, movements etc. But don't give your opinion, don't talk about the emotions it evokes etc.)
|
20 |
+
audio: a textual description of what and how the entity sounds like
|
21 |
+
shots: an array containing the shot IDs where the entity is present
|
22 |
+
|
23 |
+
# Short example
|
24 |
+
|
25 |
+
Given the following inputs:
|
26 |
+
|
27 |
+
"A king goes to see a witch to ask if or how he can win an upcoming and challenging battle"
|
28 |
+
|
29 |
+
\`\`\`yaml
|
30 |
+
- shot: 1
|
31 |
+
title: "King Arthus seeks the witch's guidance to win his imminent battle."
|
32 |
+
image: "Establishing shot of KING ARTHUS, nervous, wet brown hair. dressed in golden armor and a colorful cape. His face reveals a mix of concern and determination. He's standing in the bright sunshine, inside a castle's courtyard, under cloudy skies. Behind him, a group of soldiers can be seen marching towards the castle gates."
|
33 |
+
voice: "Dark sorceress of the shadows, it is time for you to serve your Lord. Tell me the augur, tell me what you foreknow. Tell me how I will cleave my ennemies to the bone, and ravage them in battle to come up victorious."
|
34 |
+
- shot: 2
|
35 |
+
title: "The witch gives her counsel but warns of an unknown cost."
|
36 |
+
image: "close-up shot of THE WITCH, smiling cunningly, raising a finger while speaking. Background bokeh, dim lightning, menacing, mysterious."
|
37 |
+
voice: "Your Majesty, this will be a bloody battle, but I espy a way to victory for you. But if my advice you follow, victory I foresee, although at a great cost it will be."
|
38 |
+
- shot: 3
|
39 |
+
title: "The words of the witch are sinking in, but King Arthus tries to appear strong"
|
40 |
+
image: "close-up shot on KING ARTHUS, looking concerned, somber, false confidence"
|
41 |
+
voice: "Witch with the wicked tongue, what must be done will be done. I will do everything for my people's sake. Speak now, make know the path to glory."
|
42 |
+
\`\`\
|
43 |
+
|
44 |
+
An example YAML output from the server-side function can be:
|
45 |
+
|
46 |
+
\`\`\`yaml
|
47 |
+
- name: "Castle's Courtyard"
|
48 |
+
category: "location"
|
49 |
+
image: "A medieval castle courtyard, ashlar walls, soldiers and horses, cloudy sky"
|
50 |
+
audio: "Background noises of voices, horses, birds, wind, carriages"
|
51 |
+
shots: [1, 2, 3]
|
52 |
+
- name: "King Arthus"
|
53 |
+
category: "character"
|
54 |
+
image: 1 middle-aged king, pepper-and-salt hair, beared. Dressed in golden armor and a dark purple cape. Majestic, imposing."
|
55 |
+
label: King Arthus seeks the witch's guidance to win his imminent battle."
|
56 |
+
audio: a middle-aged man speaking clearly, with a deep voice tone, confident, imposing, calm, overpowering."
|
57 |
+
shots: [1, 3]
|
58 |
+
- name: "The Witch"
|
59 |
+
category: "character"
|
60 |
+
image: "an old witch, with a villainous face full of warts, gray hair, and a hunchback. Gypsy look. Yellowed teeth, piercing eyes. She wears a crude robe, she has wrinkled hands with long dirty nails."
|
61 |
+
audio: "a sneering old woman, speaking with a hoarse and raspy voice. She is confident, hiding something."
|
62 |
+
shots: [2]
|
63 |
+
\`\`\
|
64 |
+
`
|
src/app/api/v1/edit/storyboards/processShot.ts
CHANGED
@@ -1,10 +1,17 @@
|
|
1 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import { getVideoPrompt } from "@aitube/engine"
|
3 |
|
4 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
5 |
|
6 |
import { generateStoryboard } from "./generateStoryboard"
|
7 |
-
import { ClapCompletionMode } from "../types"
|
8 |
|
9 |
export async function processShot({
|
10 |
shotSegment,
|
@@ -84,7 +91,7 @@ export async function processShot({
|
|
84 |
|
85 |
// if mode is full, newerClap already contains the ference to shotStoryboardSegment
|
86 |
// but if it's partial, we need to manually add it
|
87 |
-
if (mode
|
88 |
newerClap.segments.push(shotStoryboardSegment)
|
89 |
}
|
90 |
} else {
|
|
|
1 |
+
import {
|
2 |
+
ClapProject,
|
3 |
+
ClapSegment,
|
4 |
+
getClapAssetSourceType,
|
5 |
+
newSegment,
|
6 |
+
filterSegments,
|
7 |
+
ClapSegmentFilteringMode
|
8 |
+
} from "@aitube/clap"
|
9 |
+
import { ClapCompletionMode } from "@aitube/client"
|
10 |
import { getVideoPrompt } from "@aitube/engine"
|
11 |
|
12 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
13 |
|
14 |
import { generateStoryboard } from "./generateStoryboard"
|
|
|
15 |
|
16 |
export async function processShot({
|
17 |
shotSegment,
|
|
|
91 |
|
92 |
// if mode is full, newerClap already contains the ference to shotStoryboardSegment
|
93 |
// but if it's partial, we need to manually add it
|
94 |
+
if (mode !== ClapCompletionMode.FULL) {
|
95 |
newerClap.segments.push(shotStoryboardSegment)
|
96 |
}
|
97 |
} else {
|
src/app/api/v1/edit/storyboards/route.ts
CHANGED
@@ -2,11 +2,11 @@ import { NextResponse, NextRequest } from "next/server"
|
|
2 |
import queryString from "query-string"
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
5 |
-
import { getToken } from "@/app/api/auth/getToken"
|
6 |
-
|
7 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
|
|
8 |
|
9 |
import { processShot } from "./processShot"
|
|
|
10 |
|
11 |
// a helper to generate storyboards for a Clap
|
12 |
// this is mostly used by external apps such as the Stories Factory
|
@@ -16,8 +16,7 @@ import { processShot } from "./processShot"
|
|
16 |
// - add missing storyboard prompts
|
17 |
// - add missing storyboard images
|
18 |
export async function POST(req: NextRequest) {
|
19 |
-
|
20 |
-
const jwtToken = await getToken({ user: "anonymous" })
|
21 |
|
22 |
const qs = queryString.parseUrl(req.url || "")
|
23 |
const query = (qs || {}).query
|
@@ -30,16 +29,16 @@ export async function POST(req: NextRequest) {
|
|
30 |
|
31 |
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
32 |
|
33 |
-
console.log(`
|
34 |
|
35 |
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
36 |
-
console.log(`
|
37 |
|
38 |
if (shotsSegments.length > 32) {
|
39 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
40 |
}
|
41 |
|
42 |
-
const newerClap = mode ===
|
43 |
|
44 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
45 |
await Promise.all(shotsSegments.map(shotSegment =>
|
|
|
2 |
import queryString from "query-string"
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
|
|
|
|
|
5 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
6 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
7 |
|
8 |
import { processShot } from "./processShot"
|
9 |
+
import { ClapCompletionMode } from "@aitube/client"
|
10 |
|
11 |
// a helper to generate storyboards for a Clap
|
12 |
// this is mostly used by external apps such as the Stories Factory
|
|
|
16 |
// - add missing storyboard prompts
|
17 |
// - add missing storyboard images
|
18 |
export async function POST(req: NextRequest) {
|
19 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
|
|
20 |
|
21 |
const qs = queryString.parseUrl(req.url || "")
|
22 |
const query = (qs || {}).query
|
|
|
29 |
|
30 |
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
31 |
|
32 |
+
console.log(`api/v1/edit/storyboards(): detected ${existingClap.segments.length} segments`)
|
33 |
|
34 |
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
35 |
+
console.log(`api/v1/edit/storyboards(): detected ${shotsSegments.length} shots`)
|
36 |
|
37 |
if (shotsSegments.length > 32) {
|
38 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
39 |
}
|
40 |
|
41 |
+
const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
|
42 |
|
43 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
44 |
await Promise.all(shotsSegments.map(shotSegment =>
|
src/app/api/v1/edit/types.ts
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
export type ClapCompletionMode =
|
2 |
-
// the full .clap is returned, containing both previous data and also new entries
|
3 |
-
// this isn't the most optimized mode, obviously
|
4 |
-
| "full"
|
5 |
-
|
6 |
-
// only changes are
|
7 |
-
| "partial"
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app/api/v1/edit/videos/processShot.ts
CHANGED
@@ -1,11 +1,19 @@
|
|
1 |
|
2 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import { getVideoPrompt } from "@aitube/engine"
|
4 |
|
5 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
6 |
|
7 |
import { generateVideo } from "./generateVideo"
|
8 |
-
|
9 |
|
10 |
export async function processShot({
|
11 |
shotSegment,
|
@@ -89,7 +97,7 @@ export async function processShot({
|
|
89 |
|
90 |
// if mode is full, newerClap already contains the ference to shotVideoSegment
|
91 |
// but if it's partial, we need to manually add it
|
92 |
-
if (mode
|
93 |
newerClap.segments.push(shotVideoSegment)
|
94 |
}
|
95 |
|
|
|
1 |
|
2 |
+
import {
|
3 |
+
ClapProject,
|
4 |
+
ClapSegment,
|
5 |
+
getClapAssetSourceType,
|
6 |
+
newSegment,
|
7 |
+
filterSegments,
|
8 |
+
ClapSegmentFilteringMode
|
9 |
+
} from "@aitube/clap"
|
10 |
+
import { ClapCompletionMode } from "@aitube/client"
|
11 |
import { getVideoPrompt } from "@aitube/engine"
|
12 |
|
13 |
import { getPositivePrompt } from "@/app/api/utils/imagePrompts"
|
14 |
|
15 |
import { generateVideo } from "./generateVideo"
|
16 |
+
|
17 |
|
18 |
export async function processShot({
|
19 |
shotSegment,
|
|
|
97 |
|
98 |
// if mode is full, newerClap already contains the ference to shotVideoSegment
|
99 |
// but if it's partial, we need to manually add it
|
100 |
+
if (mode !== ClapCompletionMode.FULL) {
|
101 |
newerClap.segments.push(shotVideoSegment)
|
102 |
}
|
103 |
|
src/app/api/v1/edit/videos/route.ts
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
-
|
5 |
-
import { getToken } from "@/app/api/auth/getToken"
|
6 |
-
|
7 |
-
import { processShot } from "./processShot"
|
8 |
|
9 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
|
|
10 |
|
|
|
11 |
|
12 |
// a helper to generate videos for a Clap
|
13 |
// this is mostly used by external apps such as the Stories Factory
|
@@ -17,8 +16,7 @@ import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
|
17 |
// - add missing video prompts
|
18 |
// - add missing video files
|
19 |
export async function POST(req: NextRequest) {
|
20 |
-
|
21 |
-
const jwtToken = await getToken({ user: "anonymous" })
|
22 |
|
23 |
const qs = queryString.parseUrl(req.url || "")
|
24 |
const query = (qs || {}).query
|
@@ -31,16 +29,16 @@ export async function POST(req: NextRequest) {
|
|
31 |
|
32 |
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
33 |
|
34 |
-
console.log(`
|
35 |
|
36 |
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
37 |
-
console.log(`
|
38 |
|
39 |
if (shotsSegments.length > 32) {
|
40 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
41 |
}
|
42 |
|
43 |
-
const newerClap = mode ===
|
44 |
|
45 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
46 |
await Promise.all(shotsSegments.map(shotSegment =>
|
@@ -52,7 +50,7 @@ export async function POST(req: NextRequest) {
|
|
52 |
})
|
53 |
))
|
54 |
|
55 |
-
console.log(`
|
56 |
|
57 |
return new NextResponse(await serializeClap(newerClap), {
|
58 |
status: 200,
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
import { ClapProject, ClapSegment, newClap, parseClap, serializeClap } from "@aitube/clap"
|
4 |
+
import { ClapCompletionMode } from "@aitube/client"
|
|
|
|
|
|
|
5 |
|
6 |
import { parseCompletionMode } from "@/app/api/parsers/parseCompletionMode"
|
7 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
8 |
|
9 |
+
import { processShot } from "./processShot"
|
10 |
|
11 |
// a helper to generate videos for a Clap
|
12 |
// this is mostly used by external apps such as the Stories Factory
|
|
|
16 |
// - add missing video prompts
|
17 |
// - add missing video files
|
18 |
export async function POST(req: NextRequest) {
|
19 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
|
|
20 |
|
21 |
const qs = queryString.parseUrl(req.url || "")
|
22 |
const query = (qs || {}).query
|
|
|
29 |
|
30 |
if (!existingClap?.segments) { throw new Error(`no segment found in the provided clap!`) }
|
31 |
|
32 |
+
console.log(`api/edit/videos(): detected ${existingClap.segments.length} segments`)
|
33 |
|
34 |
const shotsSegments: ClapSegment[] = existingClap.segments.filter(s => s.category === "camera")
|
35 |
+
console.log(`api/edit/videos(): detected ${shotsSegments.length} shots`)
|
36 |
|
37 |
if (shotsSegments.length > 32) {
|
38 |
throw new Error(`Error, this endpoint being synchronous, it is designed for short stories only (max 32 shots).`)
|
39 |
}
|
40 |
|
41 |
+
const newerClap = mode === ClapCompletionMode.FULL ? existingClap : newClap()
|
42 |
|
43 |
// we process the shots in parallel (this will increase the queue size in the Gradio spaces)
|
44 |
await Promise.all(shotsSegments.map(shotSegment =>
|
|
|
50 |
})
|
51 |
))
|
52 |
|
53 |
+
console.log(`api/edit/videos(): returning the clap augmented with videos`)
|
54 |
|
55 |
return new NextResponse(await serializeClap(newerClap), {
|
56 |
status: 200,
|
src/app/api/v1/export/route.ts
CHANGED
@@ -1,22 +1,17 @@
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
|
4 |
-
|
5 |
-
|
6 |
|
7 |
// we hide/wrap the micro-service under a unified AiTube API
|
8 |
export async function POST(req: NextRequest, res: NextResponse) {
|
|
|
9 |
|
10 |
const qs = queryString.parseUrl(req.url || "")
|
11 |
const query = (qs || {}).query
|
12 |
|
13 |
-
|
14 |
-
try {
|
15 |
-
format = decodeURIComponent(query?.f?.toString() || defaultExportFormat).trim() as SupportedExportFormat
|
16 |
-
if (format !== "mp4" && format !== "webm") {
|
17 |
-
format = defaultExportFormat
|
18 |
-
}
|
19 |
-
} catch (err) {}
|
20 |
|
21 |
// let's call our micro-service, which is currently open bar.
|
22 |
const result = await fetch(
|
|
|
1 |
import { NextResponse, NextRequest } from "next/server"
|
2 |
import queryString from "query-string"
|
3 |
|
4 |
+
import { parseSupportedExportFormat } from "@/app/api/parsers/parseSupportedExportFormat"
|
5 |
+
import { throwIfInvalidToken } from "@/app/api/v1/auth/throwIfInvalidToken"
|
6 |
|
7 |
// we hide/wrap the micro-service under a unified AiTube API
|
8 |
export async function POST(req: NextRequest, res: NextResponse) {
|
9 |
+
await throwIfInvalidToken(req.headers.get("Authorization"))
|
10 |
|
11 |
const qs = queryString.parseUrl(req.url || "")
|
12 |
const query = (qs || {}).query
|
13 |
|
14 |
+
const format = parseSupportedExportFormat(query?.f)
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
// let's call our micro-service, which is currently open bar.
|
17 |
const result = await fetch(
|
src/app/api/v1/types.ts
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ClapSegmentCategory } from "@aitube/clap"
|
2 |
+
|
3 |
+
export type LatentEntity = {
|
4 |
+
name: string
|
5 |
+
category: ClapSegmentCategory
|
6 |
+
image: string
|
7 |
+
audio: string
|
8 |
+
shots: number[]
|
9 |
+
}
|
10 |
+
|
11 |
+
export type LatentStory = {
|
12 |
+
title: string
|
13 |
+
image: string
|
14 |
+
voice: string
|
15 |
+
}
|
src/app/latent/search/page.tsx
CHANGED
@@ -5,7 +5,7 @@ import { LatentQueryProps } from "@/types/general"
|
|
5 |
|
6 |
import { Main } from "../../main"
|
7 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
8 |
-
import { getToken } from "../../api/auth/getToken"
|
9 |
|
10 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
11 |
|
|
|
5 |
|
6 |
import { Main } from "../../main"
|
7 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
8 |
+
import { getToken } from "../../api/v1/auth/getToken"
|
9 |
|
10 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
11 |
|
src/app/latent/watch/page.tsx
CHANGED
@@ -6,7 +6,7 @@ import { parseBasicSearchResult } from '@/app/api/parsers/parseBasicSearchResult
|
|
6 |
|
7 |
import { Main } from "../../main"
|
8 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
9 |
-
import { getToken } from "../../api/auth/getToken"
|
10 |
|
11 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
12 |
|
|
|
6 |
|
7 |
import { Main } from "../../main"
|
8 |
import { getNewMediaInfo } from "../../api/generators/search/getNewMediaInfo"
|
9 |
+
import { getToken } from "../../api/v1/auth/getToken"
|
10 |
|
11 |
// https://jmswrnr.com/blog/protecting-next-js-api-routes-query-parameters
|
12 |
|
src/types/general.ts
CHANGED
@@ -54,6 +54,8 @@ export interface RenderRequest {
|
|
54 |
wait: boolean // wait until the job is completed
|
55 |
|
56 |
analyze: boolean // analyze the image to generate a caption (optional)
|
|
|
|
|
57 |
}
|
58 |
|
59 |
export interface ImageSegment {
|
|
|
54 |
wait: boolean // wait until the job is completed
|
55 |
|
56 |
analyze: boolean // analyze the image to generate a caption (optional)
|
57 |
+
|
58 |
+
identityImage: string // reference image for the main entity
|
59 |
}
|
60 |
|
61 |
export interface ImageSegment {
|