Commit
•
241036e
1
Parent(s):
8c5d17c
up
Browse files- src/app/engine/caption.ts +54 -0
- src/app/engine/render.ts +3 -1
- src/app/interface/panel/index.tsx +33 -0
- src/app/layouts/index.tsx +2 -0
- src/app/store/index.ts +39 -3
- src/lib/getInitialRenderedScene.ts +1 -0
- src/types.ts +12 -0
src/app/engine/caption.ts
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use server"
|
2 |
+
|
3 |
+
import { ImageAnalysisRequest, ImageAnalysisResponse } from "@/types"
|
4 |
+
|
5 |
+
const apiUrl = `${process.env.RENDERING_ENGINE_API || ""}`
|
6 |
+
|
7 |
+
export async function see({
|
8 |
+
prompt,
|
9 |
+
imageBase64
|
10 |
+
}: {
|
11 |
+
prompt: string
|
12 |
+
imageBase64: string
|
13 |
+
}): Promise<string> {
|
14 |
+
if (!prompt) {
|
15 |
+
console.error(`cannot call the API without an image, aborting..`)
|
16 |
+
throw new Error(`cannot call the API without an image, aborting..`)
|
17 |
+
}
|
18 |
+
|
19 |
+
try {
|
20 |
+
const request = {
|
21 |
+
prompt,
|
22 |
+
image: imageBase64
|
23 |
+
|
24 |
+
} as ImageAnalysisRequest
|
25 |
+
|
26 |
+
console.log(`calling ${apiUrl}/analyze called with: `, {
|
27 |
+
prompt: request.prompt,
|
28 |
+
image: request.image.slice(0, 20)
|
29 |
+
})
|
30 |
+
|
31 |
+
const res = await fetch(`${apiUrl}/analyze`, {
|
32 |
+
method: "POST",
|
33 |
+
headers: {
|
34 |
+
Accept: "application/json",
|
35 |
+
"Content-Type": "application/json",
|
36 |
+
// Authorization: `Bearer ${process.env.VC_SECRET_ACCESS_TOKEN}`,
|
37 |
+
},
|
38 |
+
body: JSON.stringify(request),
|
39 |
+
cache: 'no-store',
|
40 |
+
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
41 |
+
// next: { revalidate: 1 }
|
42 |
+
})
|
43 |
+
|
44 |
+
if (res.status !== 200) {
|
45 |
+
throw new Error('Failed to fetch data')
|
46 |
+
}
|
47 |
+
|
48 |
+
const response = (await res.json()) as ImageAnalysisResponse
|
49 |
+
return response.result
|
50 |
+
} catch (err) {
|
51 |
+
console.error(err)
|
52 |
+
return ""
|
53 |
+
}
|
54 |
+
}
|
src/app/engine/render.ts
CHANGED
@@ -29,6 +29,7 @@ export async function newRender({
|
|
29 |
renderId: "",
|
30 |
status: "error",
|
31 |
assetUrl: "",
|
|
|
32 |
maskUrl: "",
|
33 |
error: "failed to fetch the data",
|
34 |
segments: []
|
@@ -58,7 +59,7 @@ export async function newRender({
|
|
58 |
// no need to upscale right now as we generate tiny panels
|
59 |
// maybe later we can provide an "export" button to PDF
|
60 |
upscalingFactor: 2,
|
61 |
-
|
62 |
cache: "ignore"
|
63 |
} as Partial<RenderRequest>),
|
64 |
cache: 'no-store',
|
@@ -95,6 +96,7 @@ export async function getRender(renderId: string) {
|
|
95 |
renderId: "",
|
96 |
status: "error",
|
97 |
assetUrl: "",
|
|
|
98 |
maskUrl: "",
|
99 |
error: "failed to fetch the data",
|
100 |
segments: []
|
|
|
29 |
renderId: "",
|
30 |
status: "error",
|
31 |
assetUrl: "",
|
32 |
+
alt: prompt || "",
|
33 |
maskUrl: "",
|
34 |
error: "failed to fetch the data",
|
35 |
segments: []
|
|
|
59 |
// no need to upscale right now as we generate tiny panels
|
60 |
// maybe later we can provide an "export" button to PDF
|
61 |
upscalingFactor: 2,
|
62 |
+
analyze: true,
|
63 |
cache: "ignore"
|
64 |
} as Partial<RenderRequest>),
|
65 |
cache: 'no-store',
|
|
|
96 |
renderId: "",
|
97 |
status: "error",
|
98 |
assetUrl: "",
|
99 |
+
alt: "",
|
100 |
maskUrl: "",
|
101 |
error: "failed to fetch the data",
|
102 |
segments: []
|
src/app/interface/panel/index.tsx
CHANGED
@@ -11,6 +11,7 @@ import { useStore } from "@/app/store"
|
|
11 |
import { cn } from "@/lib/utils"
|
12 |
import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
|
13 |
import { Progress } from "@/app/interface/progress"
|
|
|
14 |
// import { Bubble } from "./bubble"
|
15 |
|
16 |
export function Panel({
|
@@ -34,6 +35,10 @@ export function Panel({
|
|
34 |
const panels = useStore(state => state.panels)
|
35 |
const prompt = panels[panel] || ""
|
36 |
|
|
|
|
|
|
|
|
|
37 |
const [_isPending, startTransition] = useTransition()
|
38 |
const [rendered, setRendered] = useState<RenderedScene>(getInitialRenderedScene())
|
39 |
const renderedRef = useRef<RenderedScene>()
|
@@ -72,6 +77,7 @@ export function Panel({
|
|
72 |
renderId: "",
|
73 |
status: "error",
|
74 |
assetUrl: "",
|
|
|
75 |
maskUrl: "",
|
76 |
error: "failed to fetch the data",
|
77 |
segments: []
|
@@ -128,6 +134,32 @@ export function Panel({
|
|
128 |
}
|
129 |
}, [])
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
if (isLoading) {
|
132 |
return (
|
133 |
<div className={cn(
|
@@ -152,6 +184,7 @@ export function Panel({
|
|
152 |
{rendered.assetUrl && <img
|
153 |
src={rendered.assetUrl}
|
154 |
className="w-full h-full object-cover"
|
|
|
155 |
/>}
|
156 |
|
157 |
{/*<Bubble className="absolute top-4 left-4">
|
|
|
11 |
import { cn } from "@/lib/utils"
|
12 |
import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
|
13 |
import { Progress } from "@/app/interface/progress"
|
14 |
+
import { see } from "@/app/engine/caption"
|
15 |
// import { Bubble } from "./bubble"
|
16 |
|
17 |
export function Panel({
|
|
|
35 |
const panels = useStore(state => state.panels)
|
36 |
const prompt = panels[panel] || ""
|
37 |
|
38 |
+
// const setCaption = useStore(state => state.setCaption)
|
39 |
+
// const captions = useStore(state => state.captions)
|
40 |
+
// const caption = captions[panel] || ""
|
41 |
+
|
42 |
const [_isPending, startTransition] = useTransition()
|
43 |
const [rendered, setRendered] = useState<RenderedScene>(getInitialRenderedScene())
|
44 |
const renderedRef = useRef<RenderedScene>()
|
|
|
77 |
renderId: "",
|
78 |
status: "error",
|
79 |
assetUrl: "",
|
80 |
+
alt: "",
|
81 |
maskUrl: "",
|
82 |
error: "failed to fetch the data",
|
83 |
segments: []
|
|
|
134 |
}
|
135 |
}, [])
|
136 |
|
137 |
+
/*
|
138 |
+
doing the captionning from the browser is expensive
|
139 |
+
a simpler solution is to caption directly during SDXL generation
|
140 |
+
|
141 |
+
useEffect(() => {
|
142 |
+
if (!rendered.assetUrl) { return }
|
143 |
+
// the asset url can evolve with time (link to a better resolution image)
|
144 |
+
// however it would be costly to ask for the caption, the low resolution is enough for the semantic resolution
|
145 |
+
// so we just do nothing if we already have the caption
|
146 |
+
if (caption) { return }
|
147 |
+
startTransition(async () => {
|
148 |
+
try {
|
149 |
+
const newCaption = await see({
|
150 |
+
prompt: "please caption the following image",
|
151 |
+
imageBase64: rendered.assetUrl
|
152 |
+
})
|
153 |
+
if (newCaption) {
|
154 |
+
setCaption(newCaption)
|
155 |
+
}
|
156 |
+
} catch (err) {
|
157 |
+
console.error(`failed to generate the caption:`, err)
|
158 |
+
}
|
159 |
+
})
|
160 |
+
}, [rendered.assetUrl, caption])
|
161 |
+
*/
|
162 |
+
|
163 |
if (isLoading) {
|
164 |
return (
|
165 |
<div className={cn(
|
|
|
184 |
{rendered.assetUrl && <img
|
185 |
src={rendered.assetUrl}
|
186 |
className="w-full h-full object-cover"
|
187 |
+
alt={rendered.alt}
|
188 |
/>}
|
189 |
|
190 |
{/*<Bubble className="absolute top-4 left-4">
|
src/app/layouts/index.tsx
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import { Panel } from "@/app/interface/panel"
|
2 |
import { pick } from "@/lib/pick"
|
3 |
|
|
|
1 |
+
"use client"
|
2 |
+
|
3 |
import { Panel } from "@/app/interface/panel"
|
4 |
import { pick } from "@/lib/pick"
|
5 |
|
src/app/store/index.ts
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
"use client"
|
|
|
2 |
import { create } from "zustand"
|
3 |
|
4 |
import { FontName } from "@/lib/fonts"
|
@@ -10,6 +11,7 @@ export const useStore = create<{
|
|
10 |
font: FontName
|
11 |
preset: Preset
|
12 |
panels: string[]
|
|
|
13 |
layout: LayoutName
|
14 |
zoomLevel: number
|
15 |
isGeneratingLogic: boolean
|
@@ -21,6 +23,7 @@ export const useStore = create<{
|
|
21 |
setPreset: (preset: Preset) => void
|
22 |
setPanels: (panels: string[]) => void
|
23 |
setLayout: (layout: LayoutName) => void
|
|
|
24 |
setZoomLevel: (zoomLevel: number) => void
|
25 |
setGeneratingLogic: (isGeneratingLogic: boolean) => void
|
26 |
setGeneratingImages: (panelId: number, value: boolean) => void
|
@@ -30,16 +33,49 @@ export const useStore = create<{
|
|
30 |
font: "cartoonist",
|
31 |
preset: getPreset("japanese_manga"),
|
32 |
panels: [],
|
|
|
33 |
layout: getRandomLayoutName(),
|
34 |
zoomLevel: 50,
|
35 |
isGeneratingLogic: false,
|
36 |
panelGenerationStatus: {},
|
37 |
isGeneratingText: false,
|
38 |
atLeastOnePanelIsBusy: false,
|
39 |
-
setPrompt: (prompt: string) =>
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
setPanels: (panels: string[]) => set({ panels }),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
setLayout: (layout: LayoutName) => set({ layout }),
|
44 |
setZoomLevel: (zoomLevel: number) => set({ zoomLevel }),
|
45 |
setGeneratingLogic: (isGeneratingLogic: boolean) => set({ isGeneratingLogic }),
|
|
|
1 |
"use client"
|
2 |
+
|
3 |
import { create } from "zustand"
|
4 |
|
5 |
import { FontName } from "@/lib/fonts"
|
|
|
11 |
font: FontName
|
12 |
preset: Preset
|
13 |
panels: string[]
|
14 |
+
captions: Record<string, string>
|
15 |
layout: LayoutName
|
16 |
zoomLevel: number
|
17 |
isGeneratingLogic: boolean
|
|
|
23 |
setPreset: (preset: Preset) => void
|
24 |
setPanels: (panels: string[]) => void
|
25 |
setLayout: (layout: LayoutName) => void
|
26 |
+
setCaption: (panelId: number, caption: string) => void
|
27 |
setZoomLevel: (zoomLevel: number) => void
|
28 |
setGeneratingLogic: (isGeneratingLogic: boolean) => void
|
29 |
setGeneratingImages: (panelId: number, value: boolean) => void
|
|
|
33 |
font: "cartoonist",
|
34 |
preset: getPreset("japanese_manga"),
|
35 |
panels: [],
|
36 |
+
captions: {},
|
37 |
layout: getRandomLayoutName(),
|
38 |
zoomLevel: 50,
|
39 |
isGeneratingLogic: false,
|
40 |
panelGenerationStatus: {},
|
41 |
isGeneratingText: false,
|
42 |
atLeastOnePanelIsBusy: false,
|
43 |
+
setPrompt: (prompt: string) => {
|
44 |
+
const existingPrompt = get().prompt
|
45 |
+
if (prompt === existingPrompt) { return }
|
46 |
+
set({
|
47 |
+
prompt,
|
48 |
+
panels: [],
|
49 |
+
captions: {},
|
50 |
+
})
|
51 |
+
},
|
52 |
+
setFont: (font: FontName) => {
|
53 |
+
const existingFont = get().font
|
54 |
+
if (font === existingFont) { return }
|
55 |
+
set({
|
56 |
+
font,
|
57 |
+
panels: [],
|
58 |
+
captions: {}
|
59 |
+
})
|
60 |
+
},
|
61 |
+
setPreset: (preset: Preset) => {
|
62 |
+
const existingPreset = get().preset
|
63 |
+
if (preset.label === existingPreset.label) { return }
|
64 |
+
set({
|
65 |
+
preset,
|
66 |
+
panels: [],
|
67 |
+
captions: {}
|
68 |
+
})
|
69 |
+
},
|
70 |
setPanels: (panels: string[]) => set({ panels }),
|
71 |
+
setCaption: (panelId: number, caption: string) => {
|
72 |
+
set({
|
73 |
+
captions: {
|
74 |
+
...get().captions,
|
75 |
+
[panelId]: caption
|
76 |
+
}
|
77 |
+
})
|
78 |
+
},
|
79 |
setLayout: (layout: LayoutName) => set({ layout }),
|
80 |
setZoomLevel: (zoomLevel: number) => set({ zoomLevel }),
|
81 |
setGeneratingLogic: (isGeneratingLogic: boolean) => set({ isGeneratingLogic }),
|
src/lib/getInitialRenderedScene.ts
CHANGED
@@ -4,6 +4,7 @@ export const getInitialRenderedScene = (): RenderedScene => ({
|
|
4 |
renderId: "",
|
5 |
status: "pending",
|
6 |
assetUrl: "",
|
|
|
7 |
error: "",
|
8 |
maskUrl: "",
|
9 |
segments: []
|
|
|
4 |
renderId: "",
|
5 |
status: "pending",
|
6 |
assetUrl: "",
|
7 |
+
alt: "",
|
8 |
error: "",
|
9 |
maskUrl: "",
|
10 |
segments: []
|
src/types.ts
CHANGED
@@ -42,6 +42,8 @@ export interface RenderRequest {
|
|
42 |
cache: CacheMode
|
43 |
|
44 |
wait: boolean // wait until the job is completed
|
|
|
|
|
45 |
}
|
46 |
|
47 |
export interface ImageSegment {
|
@@ -61,8 +63,18 @@ export interface RenderedScene {
|
|
61 |
renderId: string
|
62 |
status: RenderedSceneStatus
|
63 |
assetUrl: string
|
|
|
64 |
error: string
|
65 |
maskUrl: string
|
66 |
segments: ImageSegment[]
|
67 |
}
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
cache: CacheMode
|
43 |
|
44 |
wait: boolean // wait until the job is completed
|
45 |
+
|
46 |
+
analyze: boolean // analyze the image to generate a caption (optional)
|
47 |
}
|
48 |
|
49 |
export interface ImageSegment {
|
|
|
63 |
renderId: string
|
64 |
status: RenderedSceneStatus
|
65 |
assetUrl: string
|
66 |
+
alt: string
|
67 |
error: string
|
68 |
maskUrl: string
|
69 |
segments: ImageSegment[]
|
70 |
}
|
71 |
|
72 |
+
export interface ImageAnalysisRequest {
|
73 |
+
image: string // in base64
|
74 |
+
prompt: string
|
75 |
+
}
|
76 |
+
|
77 |
+
export interface ImageAnalysisResponse {
|
78 |
+
result: string
|
79 |
+
error?: string
|
80 |
+
}
|