Commit
·
53aa97a
0
Parent(s):
Duplicate from jbilcke-hf/VideoChain-API
Browse filesCo-authored-by: Julian Bilcke <[email protected]>
This view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +6 -0
- .gitignore +8 -0
- .nvmrc +1 -0
- Dockerfile +41 -0
- LICENSE.txt +201 -0
- README.md +12 -0
- TODO.md +6 -0
- package-lock.json +0 -0
- package.json +41 -0
- src/config.mts +14 -0
- src/data/all_words.json +0 -0
- src/data/good_words.json +0 -0
- src/index.mts +605 -0
- src/initFolders.mts +19 -0
- src/llm/enrichVideoSpecsUsingLLM.mts +75 -0
- src/llm/openai/createChatCompletion.mts +105 -0
- src/llm/openai/createChatCompletionStream.mts +66 -0
- src/llm/openai/generateYAML.mts +42 -0
- src/llm/openai/getTextPrompt.mts +4 -0
- src/llm/openai/getUserContent.mts +7 -0
- src/llm/openai/openai.mts +7 -0
- src/llm/openai/runModerationCheck.mts +30 -0
- src/llm/openai/stream.mts +35 -0
- src/llm/types.mts +25 -0
- src/main.mts +41 -0
- src/preproduction/mocks.mts +36 -0
- src/preproduction/prompts.mts +123 -0
- src/production/addAudioToVideo.mts +43 -0
- src/production/assembleShots.mts +59 -0
- src/production/concatNoGL.mts +33 -0
- src/production/generateActor.mts +50 -0
- src/production/generateAudio.mts +76 -0
- src/production/generateAudioLegacy.mts +52 -0
- src/production/generateVideo.mts +57 -0
- src/production/generateVoice.mts +78 -0
- src/production/interpolateVideo.mts +81 -0
- src/production/interpolateVideoLegacy.mts +56 -0
- src/production/mergeAudio.mts +49 -0
- src/production/normalizePendingVideoToTmpFilePath.mts +33 -0
- src/production/postInterpolation.mts +58 -0
- src/production/renderImage.mts +54 -0
- src/production/renderImageSegmentation.mts +69 -0
- src/production/renderPipeline.mts +46 -0
- src/production/renderScene.mts +62 -0
- src/production/renderVideo.mts +41 -0
- src/production/renderVideoSegmentation.mts +52 -0
- src/production/upscaleVideo.mts +78 -0
- src/scheduler/deleteVideo.mts +28 -0
- src/scheduler/getAllVideosForOwner.mts +9 -0
- src/scheduler/getCompletedVideos.mts +9 -0
.dockerignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
node_modules
|
2 |
+
npm-debug.log
|
3 |
+
models
|
4 |
+
sandbox
|
5 |
+
audio.pipe
|
6 |
+
video.pipe
|
.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
node_modules
|
2 |
+
*.log
|
3 |
+
*.bin
|
4 |
+
.DS_Store
|
5 |
+
.venv
|
6 |
+
*.mp4
|
7 |
+
sandbox
|
8 |
+
scripts
|
.nvmrc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
v18.16.0
|
Dockerfile
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM node:18
|
2 |
+
# try this maybe
|
3 |
+
|
4 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
5 |
+
|
6 |
+
RUN apt update
|
7 |
+
|
8 |
+
# For FFMPEG and gl concat
|
9 |
+
RUN apt --yes install ffmpeg curl build-essential python3 python3-dev libx11-dev libxext-dev libxext6 libglu1-mesa-dev xvfb libxi-dev libglew-dev pkg-config
|
10 |
+
|
11 |
+
# For Puppeteer
|
12 |
+
RUN apt --yes install libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgbm1 libasound2 libpangocairo-1.0-0 libxss1 libgtk-3-0
|
13 |
+
|
14 |
+
# Set up a new user named "user" with user ID 1000
|
15 |
+
RUN useradd -o -u 1000 user
|
16 |
+
|
17 |
+
# Switch to the "user" user
|
18 |
+
USER user
|
19 |
+
|
20 |
+
# Set home to the user's home directory
|
21 |
+
ENV HOME=/home/user \
|
22 |
+
PATH=/home/user/.local/bin:$PATH
|
23 |
+
|
24 |
+
# Set the working directory to the user's home directory
|
25 |
+
WORKDIR $HOME/app
|
26 |
+
|
27 |
+
# Install app dependencies
|
28 |
+
# A wildcard is used to ensure both package.json AND package-lock.json are copied
|
29 |
+
# where available (npm@5+)
|
30 |
+
COPY --chown=user package*.json $HOME/app
|
31 |
+
|
32 |
+
RUN npm install
|
33 |
+
|
34 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
35 |
+
COPY --chown=user . $HOME/app
|
36 |
+
|
37 |
+
EXPOSE 7860
|
38 |
+
|
39 |
+
# we can't use this (it time out)
|
40 |
+
# CMD [ "xvfb-run", "-s", "-ac -screen 0 1920x1080x24", "npm", "run", "start" ]
|
41 |
+
CMD [ "npm", "run", "start" ]
|
LICENSE.txt
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: VideoChain API
|
3 |
+
emoji: 🎥 🔗
|
4 |
+
colorFrom: black
|
5 |
+
colorTo: white
|
6 |
+
sdk: docker
|
7 |
+
pinned: false
|
8 |
+
app_port: 7860
|
9 |
+
duplicated_from: jbilcke-hf/VideoChain-API
|
10 |
+
---
|
11 |
+
|
12 |
+
A micro service to generate videos
|
TODO.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
to allow multiple videos to be processed a the same time:
|
4 |
+
|
5 |
+
[ ] yield from the loop at each step
|
6 |
+
[ ] random processing of videos
|
package-lock.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
package.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"name": "videochain-api",
|
3 |
+
"version": "1.0.0",
|
4 |
+
"description": "A service which wraps and chains video and audio spaces together",
|
5 |
+
"main": "src/index.mts",
|
6 |
+
"scripts": {
|
7 |
+
"start": "node --loader ts-node/esm src/index.mts",
|
8 |
+
"test:submitVideo": "node --loader ts-node/esm src/tests/submitVideo.mts",
|
9 |
+
"test:checkStatus": "node --loader ts-node/esm src/tests/checkStatus.mts",
|
10 |
+
"test:downloadFileToTmp": "node --loader ts-node/esm src/tests/downloadFileToTmp.mts",
|
11 |
+
"test:stuff": "node --loader ts-node/esm src/utils/segmentImage.mts",
|
12 |
+
"docker": "npm run docker:build && npm run docker:run",
|
13 |
+
"docker:build": "docker build -t videochain-api .",
|
14 |
+
"docker:run": "docker run -it -p 7860:7860 videochain-api"
|
15 |
+
},
|
16 |
+
"author": "Julian Bilcke <[email protected]>",
|
17 |
+
"license": "Apache License",
|
18 |
+
"dependencies": {
|
19 |
+
"@gorgonjs/gorgon": "^1.4.1",
|
20 |
+
"@gradio/client": "^0.1.4",
|
21 |
+
"@huggingface/inference": "^2.6.1",
|
22 |
+
"@types/express": "^4.17.17",
|
23 |
+
"@types/ffmpeg-concat": "^1.1.2",
|
24 |
+
"@types/uuid": "^9.0.2",
|
25 |
+
"eventsource-parser": "^1.0.0",
|
26 |
+
"express": "^4.18.2",
|
27 |
+
"ffmpeg-concat": "^1.3.0",
|
28 |
+
"fluent-ffmpeg": "^2.1.2",
|
29 |
+
"fs-extra": "^11.1.1",
|
30 |
+
"gpt-tokens": "^1.1.1",
|
31 |
+
"node-fetch": "^3.3.1",
|
32 |
+
"openai": "^3.3.0",
|
33 |
+
"puppeteer": "^20.8.0",
|
34 |
+
"resize-base64": "^1.0.12",
|
35 |
+
"sharp": "^0.32.4",
|
36 |
+
"temp-dir": "^3.0.0",
|
37 |
+
"ts-node": "^10.9.1",
|
38 |
+
"uuid": "^9.0.0",
|
39 |
+
"yaml": "^2.3.1"
|
40 |
+
}
|
41 |
+
}
|
src/config.mts
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
export const storagePath = `${process.env.VC_STORAGE_PATH || './sandbox'}`
|
4 |
+
|
5 |
+
export const metadataDirPath = path.join(storagePath, "metadata")
|
6 |
+
export const pendingMetadataDirFilePath = path.join(metadataDirPath, "pending")
|
7 |
+
export const completedMetadataDirFilePath = path.join(metadataDirPath, "completed")
|
8 |
+
|
9 |
+
export const filesDirPath = path.join(storagePath, "files")
|
10 |
+
export const pendingFilesDirFilePath = path.join(filesDirPath, "pending")
|
11 |
+
export const completedFilesDirFilePath = path.join(filesDirPath, "completed")
|
12 |
+
|
13 |
+
export const shotFormatVersion = 1
|
14 |
+
export const sequenceFormatVersion = 1
|
src/data/all_words.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/data/good_words.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/index.mts
ADDED
@@ -0,0 +1,605 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { createReadStream, existsSync } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
+
|
4 |
+
import { v4 as uuidv4, validate as uuidValidate } from "uuid"
|
5 |
+
import express from "express"
|
6 |
+
|
7 |
+
import { Video, VideoStatus, VideoAPIRequest, RenderRequest, RenderedScene } from "./types.mts"
|
8 |
+
import { parseVideoRequest } from "./utils/parseVideoRequest.mts"
|
9 |
+
import { savePendingVideo } from "./scheduler/savePendingVideo.mts"
|
10 |
+
import { getVideo } from "./scheduler/getVideo.mts"
|
11 |
+
import { main } from "./main.mts"
|
12 |
+
import { completedFilesDirFilePath } from "./config.mts"
|
13 |
+
import { markVideoAsToDelete } from "./scheduler/markVideoAsToDelete.mts"
|
14 |
+
import { markVideoAsToAbort } from "./scheduler/markVideoAsToAbort.mts"
|
15 |
+
import { markVideoAsToPause } from "./scheduler/markVideoAsToPause.mts"
|
16 |
+
import { markVideoAsPending } from "./scheduler/markVideoAsPending.mts"
|
17 |
+
import { getPendingVideos } from "./scheduler/getPendingVideos.mts"
|
18 |
+
import { hasValidAuthorization } from "./utils/hasValidAuthorization.mts"
|
19 |
+
import { getAllVideosForOwner } from "./scheduler/getAllVideosForOwner.mts"
|
20 |
+
import { initFolders } from "./initFolders.mts"
|
21 |
+
import { sortVideosByYoungestFirst } from "./utils/sortVideosByYoungestFirst.mts"
|
22 |
+
import { generateVideo } from "./production/generateVideo.mts"
|
23 |
+
import { generateSeed } from "./utils/generateSeed.mts"
|
24 |
+
import { getRenderedScene, renderScene } from "./production/renderScene.mts"
|
25 |
+
|
26 |
+
initFolders()
|
27 |
+
// to disable all processing (eg. to debug)
|
28 |
+
// then comment the following line:
|
29 |
+
main()
|
30 |
+
|
31 |
+
const app = express()
|
32 |
+
const port = 7860
|
33 |
+
|
34 |
+
app.use(express.json())
|
35 |
+
|
36 |
+
let isRendering = false
|
37 |
+
|
38 |
+
// a "fast track" pipeline
|
39 |
+
app.post("/render", async (req, res) => {
|
40 |
+
|
41 |
+
const request = req.body as RenderRequest
|
42 |
+
console.log(req.body)
|
43 |
+
if (!request.prompt) {
|
44 |
+
console.log("Invalid prompt")
|
45 |
+
res.status(400)
|
46 |
+
res.write(JSON.stringify({ url: "", error: "invalid prompt" }))
|
47 |
+
res.end()
|
48 |
+
return
|
49 |
+
}
|
50 |
+
|
51 |
+
let response: RenderedScene = {
|
52 |
+
renderId: "",
|
53 |
+
status: "pending",
|
54 |
+
assetUrl: "",
|
55 |
+
maskBase64: "",
|
56 |
+
error: "",
|
57 |
+
segments: []
|
58 |
+
}
|
59 |
+
|
60 |
+
try {
|
61 |
+
response = await renderScene(request)
|
62 |
+
} catch (err) {
|
63 |
+
// console.log("failed to render scene!")
|
64 |
+
response.error = `failed to render scene: ${err}`
|
65 |
+
}
|
66 |
+
|
67 |
+
if (response.error === "already rendering") {
|
68 |
+
console.log("server busy")
|
69 |
+
res.status(200)
|
70 |
+
res.write(JSON.stringify(response))
|
71 |
+
res.end()
|
72 |
+
return
|
73 |
+
} else if (response.error.length > 0) {
|
74 |
+
// console.log("server error")
|
75 |
+
res.status(500)
|
76 |
+
res.write(JSON.stringify(response))
|
77 |
+
res.end()
|
78 |
+
return
|
79 |
+
} else {
|
80 |
+
// console.log("all good")
|
81 |
+
res.status(200)
|
82 |
+
res.write(JSON.stringify(response))
|
83 |
+
res.end()
|
84 |
+
return
|
85 |
+
}
|
86 |
+
})
|
87 |
+
|
88 |
+
// a "fast track" pipeline
|
89 |
+
app.get("/render/:renderId", async (req, res) => {
|
90 |
+
|
91 |
+
const renderId = `${req.params.renderId}`
|
92 |
+
|
93 |
+
if (!uuidValidate(renderId)) {
|
94 |
+
console.error("invalid render id")
|
95 |
+
res.status(400)
|
96 |
+
res.write(JSON.stringify({ error: `invalid render id` }))
|
97 |
+
res.end()
|
98 |
+
return
|
99 |
+
}
|
100 |
+
|
101 |
+
let response: RenderedScene = {
|
102 |
+
renderId: "",
|
103 |
+
status: "pending",
|
104 |
+
assetUrl: "",
|
105 |
+
error: "",
|
106 |
+
maskBase64: "",
|
107 |
+
segments: []
|
108 |
+
}
|
109 |
+
|
110 |
+
try {
|
111 |
+
response = await getRenderedScene(renderId)
|
112 |
+
} catch (err) {
|
113 |
+
// console.log("failed to render scene!")
|
114 |
+
response.error = `failed to render scene: ${err}`
|
115 |
+
}
|
116 |
+
|
117 |
+
if (response.error === "already rendering") {
|
118 |
+
console.log("server busy")
|
119 |
+
res.status(200)
|
120 |
+
res.write(JSON.stringify(response))
|
121 |
+
res.end()
|
122 |
+
return
|
123 |
+
} else if (response.error.length > 0) {
|
124 |
+
// console.log("server error")
|
125 |
+
res.status(500)
|
126 |
+
res.write(JSON.stringify(response))
|
127 |
+
res.end()
|
128 |
+
return
|
129 |
+
} else {
|
130 |
+
// console.log("all good")
|
131 |
+
res.status(200)
|
132 |
+
res.write(JSON.stringify(response))
|
133 |
+
res.end()
|
134 |
+
return
|
135 |
+
}
|
136 |
+
})
|
137 |
+
|
138 |
+
|
139 |
+
// a "fast track" pipeline
|
140 |
+
/*
|
141 |
+
app.post("/segment", async (req, res) => {
|
142 |
+
|
143 |
+
const request = req.body as RenderRequest
|
144 |
+
console.log(req.body)
|
145 |
+
|
146 |
+
let result: RenderedScene = {
|
147 |
+
assetUrl: "",
|
148 |
+
maskBase64: "",
|
149 |
+
error: "",
|
150 |
+
segments: []
|
151 |
+
}
|
152 |
+
|
153 |
+
try {
|
154 |
+
result = await renderScene(request)
|
155 |
+
} catch (err) {
|
156 |
+
// console.log("failed to render scene!")
|
157 |
+
result.error = `failed to render scene: ${err}`
|
158 |
+
}
|
159 |
+
|
160 |
+
if (result.error === "already rendering") {
|
161 |
+
console.log("server busy")
|
162 |
+
res.status(200)
|
163 |
+
res.write(JSON.stringify({ url: "", error: result.error }))
|
164 |
+
res.end()
|
165 |
+
return
|
166 |
+
} else if (result.error.length > 0) {
|
167 |
+
// console.log("server error")
|
168 |
+
res.status(500)
|
169 |
+
res.write(JSON.stringify({ url: "", error: result.error }))
|
170 |
+
res.end()
|
171 |
+
return
|
172 |
+
} else {
|
173 |
+
// console.log("all good")
|
174 |
+
res.status(200)
|
175 |
+
res.write(JSON.stringify(result))
|
176 |
+
res.end()
|
177 |
+
return
|
178 |
+
}
|
179 |
+
})
|
180 |
+
*/
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
+
app.post("/:ownerId", async (req, res) => {
|
185 |
+
const request = req.body as VideoAPIRequest
|
186 |
+
|
187 |
+
if (!hasValidAuthorization(req.headers)) {
|
188 |
+
console.log("Invalid authorization")
|
189 |
+
res.status(401)
|
190 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
191 |
+
res.end()
|
192 |
+
return
|
193 |
+
}
|
194 |
+
|
195 |
+
const ownerId = req.params.ownerId
|
196 |
+
|
197 |
+
if (!uuidValidate(ownerId)) {
|
198 |
+
console.error("invalid owner id")
|
199 |
+
res.status(400)
|
200 |
+
res.write(JSON.stringify({ error: `invalid owner id` }))
|
201 |
+
res.end()
|
202 |
+
return
|
203 |
+
}
|
204 |
+
|
205 |
+
let video: Video = null
|
206 |
+
|
207 |
+
console.log(`creating video from request..`)
|
208 |
+
console.log(`request: `, JSON.stringify(request))
|
209 |
+
if (!request?.prompt?.length) {
|
210 |
+
console.error(`failed to create video (prompt is empty})`)
|
211 |
+
res.status(400)
|
212 |
+
res.write(JSON.stringify({ error: "prompt is empty" }))
|
213 |
+
res.end()
|
214 |
+
return
|
215 |
+
}
|
216 |
+
try {
|
217 |
+
video = await parseVideoRequest(ownerId, request)
|
218 |
+
} catch (err) {
|
219 |
+
console.error(`failed to create video: ${video} (${err})`)
|
220 |
+
res.status(400)
|
221 |
+
res.write(JSON.stringify({ error: "query seems to be malformed" }))
|
222 |
+
res.end()
|
223 |
+
return
|
224 |
+
}
|
225 |
+
|
226 |
+
console.log(`saving video ${video.id}`)
|
227 |
+
try {
|
228 |
+
await savePendingVideo(video)
|
229 |
+
res.status(200)
|
230 |
+
res.write(JSON.stringify(video))
|
231 |
+
res.end()
|
232 |
+
} catch (err) {
|
233 |
+
console.error(err)
|
234 |
+
res.status(500)
|
235 |
+
res.write(JSON.stringify({ error: "couldn't save the video" }))
|
236 |
+
res.end()
|
237 |
+
}
|
238 |
+
})
|
239 |
+
|
240 |
+
|
241 |
+
app.get("/:ownerId/:videoId\.mp4", async (req, res) => {
|
242 |
+
|
243 |
+
/*
|
244 |
+
for simplicity, let's skip auth when fetching videos
|
245 |
+
the UUIDs cannot easily be guessed anyway
|
246 |
+
|
247 |
+
if (!hasValidAuthorization(req.headers)) {
|
248 |
+
console.log("Invalid authorization")
|
249 |
+
res.status(401)
|
250 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
251 |
+
res.end()
|
252 |
+
return
|
253 |
+
}
|
254 |
+
*/
|
255 |
+
|
256 |
+
const ownerId = req.params.ownerId
|
257 |
+
console.log("downloading..")
|
258 |
+
|
259 |
+
if (!uuidValidate(ownerId)) {
|
260 |
+
console.error("invalid owner id")
|
261 |
+
res.status(400)
|
262 |
+
res.write(JSON.stringify({ error: `invalid owner id` }))
|
263 |
+
res.end()
|
264 |
+
return
|
265 |
+
}
|
266 |
+
|
267 |
+
const videoId = req.params.videoId
|
268 |
+
|
269 |
+
if (!uuidValidate(videoId)) {
|
270 |
+
console.error("invalid video id")
|
271 |
+
res.status(400)
|
272 |
+
res.write(JSON.stringify({ error: `invalid video id` }))
|
273 |
+
res.end()
|
274 |
+
return
|
275 |
+
}
|
276 |
+
|
277 |
+
let video: Video = null
|
278 |
+
try {
|
279 |
+
video = await getVideo(ownerId, videoId)
|
280 |
+
console.log(`returning video ${videoId} to owner ${ownerId}`)
|
281 |
+
} catch (err) {
|
282 |
+
res.status(404)
|
283 |
+
res.write(JSON.stringify({ error: "this video doesn't exist" }))
|
284 |
+
res.end()
|
285 |
+
return
|
286 |
+
}
|
287 |
+
|
288 |
+
const completedFilePath = path.join(completedFilesDirFilePath, video.fileName)
|
289 |
+
|
290 |
+
// note: we DON'T want to use the pending file path, as there may be operations on it
|
291 |
+
// (ie. a process might be busy writing stuff to it)
|
292 |
+
const filePath = existsSync(completedFilePath) ? completedFilePath : ""
|
293 |
+
if (!filePath) {
|
294 |
+
res.status(400)
|
295 |
+
res.write(JSON.stringify({ error: "video exists, but cannot be previewed yet" }))
|
296 |
+
res.end()
|
297 |
+
return
|
298 |
+
}
|
299 |
+
|
300 |
+
// file path exists, let's try to read it
|
301 |
+
try {
|
302 |
+
// do we need this?
|
303 |
+
// res.status(200)
|
304 |
+
// res.setHeader("Content-Type", "media/mp4")
|
305 |
+
console.log(`creating a video read stream from ${filePath}`)
|
306 |
+
const stream = createReadStream(filePath)
|
307 |
+
|
308 |
+
stream.on('close', () => {
|
309 |
+
console.log(`finished streaming the video`)
|
310 |
+
res.end()
|
311 |
+
})
|
312 |
+
|
313 |
+
stream.pipe(res)
|
314 |
+
} catch (err) {
|
315 |
+
console.error(`failed to read the video file at ${filePath}: ${err}`)
|
316 |
+
res.status(500)
|
317 |
+
res.write(JSON.stringify({ error: "failed to read the video file" }))
|
318 |
+
res.end()
|
319 |
+
}
|
320 |
+
})
|
321 |
+
|
322 |
+
// get metadata (json)
|
323 |
+
app.get("/:ownerId/:videoId", async (req, res) => {
|
324 |
+
|
325 |
+
if (!hasValidAuthorization(req.headers)) {
|
326 |
+
console.log("Invalid authorization")
|
327 |
+
res.status(401)
|
328 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
329 |
+
res.end()
|
330 |
+
return
|
331 |
+
}
|
332 |
+
|
333 |
+
const ownerId = req.params.ownerId
|
334 |
+
|
335 |
+
if (!uuidValidate(ownerId)) {
|
336 |
+
console.error("invalid owner id")
|
337 |
+
res.status(400)
|
338 |
+
res.write(JSON.stringify({ error: `invalid owner id` }))
|
339 |
+
res.end()
|
340 |
+
return
|
341 |
+
}
|
342 |
+
|
343 |
+
const videoId = req.params.videoId
|
344 |
+
|
345 |
+
if (!uuidValidate(videoId)) {
|
346 |
+
console.error("invalid video id")
|
347 |
+
res.status(400)
|
348 |
+
res.write(JSON.stringify({ error: `invalid video id` }))
|
349 |
+
res.end()
|
350 |
+
return
|
351 |
+
}
|
352 |
+
|
353 |
+
try {
|
354 |
+
const video = await getVideo(ownerId, videoId)
|
355 |
+
res.status(200)
|
356 |
+
res.write(JSON.stringify(video))
|
357 |
+
res.end()
|
358 |
+
} catch (err) {
|
359 |
+
console.error(err)
|
360 |
+
res.status(404)
|
361 |
+
res.write(JSON.stringify({ error: "couldn't find this video" }))
|
362 |
+
res.end()
|
363 |
+
}
|
364 |
+
})
|
365 |
+
|
366 |
+
// only get the videos for a specific owner
|
367 |
+
app.get("/:ownerId", async (req, res) => {
|
368 |
+
if (!hasValidAuthorization(req.headers)) {
|
369 |
+
console.log("Invalid authorization")
|
370 |
+
res.status(401)
|
371 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
372 |
+
res.end()
|
373 |
+
return
|
374 |
+
}
|
375 |
+
|
376 |
+
const ownerId = req.params.ownerId
|
377 |
+
|
378 |
+
if (!uuidValidate(ownerId)) {
|
379 |
+
console.error(`invalid owner d ${ownerId}`)
|
380 |
+
res.status(400)
|
381 |
+
res.write(JSON.stringify({ error: `invalid owner id ${ownerId}` }))
|
382 |
+
res.end()
|
383 |
+
return
|
384 |
+
}
|
385 |
+
|
386 |
+
try {
|
387 |
+
const videos = await getAllVideosForOwner(ownerId)
|
388 |
+
sortVideosByYoungestFirst(videos)
|
389 |
+
|
390 |
+
res.status(200)
|
391 |
+
res.write(JSON.stringify(videos.filter(video => video.status !== "delete"), null, 2))
|
392 |
+
res.end()
|
393 |
+
} catch (err) {
|
394 |
+
console.error(err)
|
395 |
+
res.status(500)
|
396 |
+
res.write(JSON.stringify({ error: `couldn't get the videos for owner ${ownerId}` }))
|
397 |
+
res.end()
|
398 |
+
}
|
399 |
+
})
|
400 |
+
|
401 |
+
// get all pending videos - this is for admin usage only
|
402 |
+
app.get("/", async (req, res) => {
|
403 |
+
if (!hasValidAuthorization(req.headers)) {
|
404 |
+
// this is what users will see in the space - but no need to show something scary
|
405 |
+
console.log("Invalid authorization")
|
406 |
+
res.status(200)
|
407 |
+
res.write(`<html><head></head><body>
|
408 |
+
This space is the rendering engine used by various demos spaces, such as <a href="https://jbilcke-hf-fishtank.hf.space" target="_blank">FishTank</a> and <a href="https://jbilcke-hf-videochain-ui.hf.space" target="_blank">VideoChain UI</a>
|
409 |
+
</body></html>`)
|
410 |
+
res.end()
|
411 |
+
// res.status(401)
|
412 |
+
// res.write(JSON.stringify({ error: "invalid token" }))
|
413 |
+
// res.end()
|
414 |
+
return
|
415 |
+
}
|
416 |
+
|
417 |
+
try {
|
418 |
+
const videos = await getPendingVideos()
|
419 |
+
res.status(200)
|
420 |
+
res.write(JSON.stringify(videos, null, 2))
|
421 |
+
res.end()
|
422 |
+
} catch (err) {
|
423 |
+
console.error(err)
|
424 |
+
res.status(500)
|
425 |
+
res.write(JSON.stringify({ error: "couldn't get the videos" }))
|
426 |
+
res.end()
|
427 |
+
}
|
428 |
+
})
|
429 |
+
|
430 |
+
|
431 |
+
// edit a video
|
432 |
+
app.patch("/:ownerId/:videoId", async (req, res) => {
|
433 |
+
|
434 |
+
if (!hasValidAuthorization(req.headers)) {
|
435 |
+
console.log("Invalid authorization")
|
436 |
+
res.status(401)
|
437 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
438 |
+
res.end()
|
439 |
+
return
|
440 |
+
}
|
441 |
+
|
442 |
+
const ownerId = req.params.ownerId
|
443 |
+
|
444 |
+
if (!uuidValidate(ownerId)) {
|
445 |
+
console.error(`invalid owner id ${ownerId}`)
|
446 |
+
res.status(400)
|
447 |
+
res.write(JSON.stringify({ error: `invalid owner id ${ownerId}` }))
|
448 |
+
res.end()
|
449 |
+
return
|
450 |
+
}
|
451 |
+
|
452 |
+
const videoId = req.params.videoId
|
453 |
+
|
454 |
+
if (!uuidValidate(videoId)) {
|
455 |
+
console.error(`invalid video id ${videoId}`)
|
456 |
+
res.status(400)
|
457 |
+
res.write(JSON.stringify({ error: `invalid video id ${videoId}` }))
|
458 |
+
res.end()
|
459 |
+
return
|
460 |
+
}
|
461 |
+
|
462 |
+
let status: VideoStatus = "unknown"
|
463 |
+
try {
|
464 |
+
const request = req.body as { status: VideoStatus }
|
465 |
+
if (['pending', 'abort', 'delete', 'pause'].includes(request.status)) {
|
466 |
+
status = request.status
|
467 |
+
} else {
|
468 |
+
throw new Error(`invalid video status "${request.status}"`)
|
469 |
+
}
|
470 |
+
} catch (err) {
|
471 |
+
console.error(`invalid parameter (${err})`)
|
472 |
+
res.status(401)
|
473 |
+
res.write(JSON.stringify({ error: `invalid parameter (${err})` }))
|
474 |
+
res.end()
|
475 |
+
return
|
476 |
+
}
|
477 |
+
|
478 |
+
switch (status) {
|
479 |
+
case 'delete':
|
480 |
+
try {
|
481 |
+
await markVideoAsToDelete(ownerId, videoId)
|
482 |
+
console.log(`deleting video ${videoId}`)
|
483 |
+
res.status(200)
|
484 |
+
res.write(JSON.stringify({ success: true }))
|
485 |
+
res.end()
|
486 |
+
} catch (err) {
|
487 |
+
console.error(`failed to delete video ${videoId} (${err})`)
|
488 |
+
res.status(500)
|
489 |
+
res.write(JSON.stringify({ error: `failed to delete video ${videoId}` }))
|
490 |
+
res.end()
|
491 |
+
}
|
492 |
+
break
|
493 |
+
|
494 |
+
case 'abort':
|
495 |
+
try {
|
496 |
+
await markVideoAsToAbort(ownerId, videoId)
|
497 |
+
console.log(`aborted video ${videoId}`)
|
498 |
+
res.status(200)
|
499 |
+
res.write(JSON.stringify({ success: true }))
|
500 |
+
res.end()
|
501 |
+
} catch (err) {
|
502 |
+
console.error(`failed to abort video ${videoId} (${err})`)
|
503 |
+
res.status(500)
|
504 |
+
res.write(JSON.stringify({ error: `failed to abort video ${videoId}` }))
|
505 |
+
res.end()
|
506 |
+
}
|
507 |
+
break
|
508 |
+
|
509 |
+
case 'pause':
|
510 |
+
try {
|
511 |
+
await markVideoAsToPause(ownerId, videoId)
|
512 |
+
console.log(`paused video ${videoId}`)
|
513 |
+
res.status(200)
|
514 |
+
res.write(JSON.stringify({ success: true }))
|
515 |
+
res.end()
|
516 |
+
} catch (err) {
|
517 |
+
console.error(`failed to pause video ${videoId} (${err})`)
|
518 |
+
res.status(500)
|
519 |
+
res.write(JSON.stringify({ error: `failed to pause video ${videoId}` }))
|
520 |
+
res.end()
|
521 |
+
}
|
522 |
+
break
|
523 |
+
|
524 |
+
case 'pending':
|
525 |
+
try {
|
526 |
+
await markVideoAsPending(ownerId, videoId)
|
527 |
+
console.log(`unpausing video ${videoId}`)
|
528 |
+
res.status(200)
|
529 |
+
res.write(JSON.stringify({ success: true }))
|
530 |
+
res.end()
|
531 |
+
} catch (err) {
|
532 |
+
console.error(`failed to unpause video ${videoId} (${err})`)
|
533 |
+
res.status(500)
|
534 |
+
res.write(JSON.stringify({ error: `failed to unpause video ${videoId}` }))
|
535 |
+
res.end()
|
536 |
+
}
|
537 |
+
break
|
538 |
+
|
539 |
+
default:
|
540 |
+
console.log(`unsupported status ${status}`)
|
541 |
+
res.status(401)
|
542 |
+
res.write(JSON.stringify({ error: `unsupported status ${status}` }))
|
543 |
+
res.end()
|
544 |
+
}
|
545 |
+
})
|
546 |
+
|
547 |
+
// delete a video - this is legacy, we should use other functions instead
|
548 |
+
/*
|
549 |
+
app.delete("/:id", async (req, res) => {
|
550 |
+
|
551 |
+
if (!hasValidAuthorization(req.headers)) {
|
552 |
+
console.log("Invalid authorization")
|
553 |
+
res.status(401)
|
554 |
+
res.write(JSON.stringify({ error: "invalid token" }))
|
555 |
+
res.end()
|
556 |
+
return
|
557 |
+
}
|
558 |
+
|
559 |
+
const [ownerId, videoId] = `${req.params.id}`.split("_")
|
560 |
+
|
561 |
+
if (!uuidValidate(ownerId)) {
|
562 |
+
console.error("invalid owner id")
|
563 |
+
res.status(400)
|
564 |
+
res.write(JSON.stringify({ error: `invalid owner id` }))
|
565 |
+
res.end()
|
566 |
+
return
|
567 |
+
}
|
568 |
+
|
569 |
+
if (!uuidValidate(videoId)) {
|
570 |
+
console.error("invalid video id")
|
571 |
+
res.status(400)
|
572 |
+
res.write(JSON.stringify({ error: `invalid video id` }))
|
573 |
+
res.end()
|
574 |
+
return
|
575 |
+
}
|
576 |
+
|
577 |
+
// ecurity note: we always check the existence if the video first
|
578 |
+
// that's because we are going to delete all the associated files with a glob,
|
579 |
+
// so we must be sure the id is not a system path or something ^^
|
580 |
+
let video: Video = null
|
581 |
+
try {
|
582 |
+
video = await getVideo(ownerId, videoId)
|
583 |
+
} catch (err) {
|
584 |
+
console.error(err)
|
585 |
+
res.status(404)
|
586 |
+
res.write(JSON.stringify({ error: "couldn't find this video" }))
|
587 |
+
res.end()
|
588 |
+
return
|
589 |
+
}
|
590 |
+
|
591 |
+
try {
|
592 |
+
await markVideoAsToDelete(ownerId, videoId)
|
593 |
+
res.status(200)
|
594 |
+
res.write(JSON.stringify({ success: true }))
|
595 |
+
res.end()
|
596 |
+
} catch (err) {
|
597 |
+
console.error(err)
|
598 |
+
res.status(500)
|
599 |
+
res.write(JSON.stringify({ success: false, error: "failed to delete the video" }))
|
600 |
+
res.end()
|
601 |
+
}
|
602 |
+
})
|
603 |
+
*/
|
604 |
+
|
605 |
+
app.listen(port, () => { console.log(`Open http://localhost:${port}`) })
|
src/initFolders.mts
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
metadataDirPath,
|
3 |
+
pendingMetadataDirFilePath,
|
4 |
+
completedMetadataDirFilePath,
|
5 |
+
filesDirPath,
|
6 |
+
pendingFilesDirFilePath,
|
7 |
+
completedFilesDirFilePath
|
8 |
+
} from "./config.mts"
|
9 |
+
import { createDirIfNeeded } from "./utils/createDirIfNeeded.mts"
|
10 |
+
|
11 |
+
export const initFolders = () => {
|
12 |
+
console.log(`initializing folders..`)
|
13 |
+
createDirIfNeeded(metadataDirPath)
|
14 |
+
createDirIfNeeded(pendingMetadataDirFilePath)
|
15 |
+
createDirIfNeeded(completedMetadataDirFilePath)
|
16 |
+
createDirIfNeeded(filesDirPath)
|
17 |
+
createDirIfNeeded(pendingFilesDirFilePath)
|
18 |
+
createDirIfNeeded(completedFilesDirFilePath)
|
19 |
+
}
|
src/llm/enrichVideoSpecsUsingLLM.mts
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
|
3 |
+
import { Video, VideoAPIRequest } from "../types.mts"
|
4 |
+
import { generateYAML } from "./openai/generateYAML.mts"
|
5 |
+
import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
|
6 |
+
import { getQueryChatMessages } from "../preproduction/prompts.mts"
|
7 |
+
import { getValidNumber } from "../utils/getValidNumber.mts"
|
8 |
+
import { parseShotRequest } from "../utils/parseShotRequest.mts"
|
9 |
+
|
10 |
+
|
11 |
+
export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
|
12 |
+
|
13 |
+
const messages: ChatCompletionRequestMessage[] = getQueryChatMessages(video.videoPrompt)
|
14 |
+
|
15 |
+
const defaultValue = {} as unknown as HallucinatedVideoRequest
|
16 |
+
|
17 |
+
let hallucinatedVideo: HallucinatedVideoRequest
|
18 |
+
video.shots = []
|
19 |
+
|
20 |
+
try {
|
21 |
+
hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
|
22 |
+
messages,
|
23 |
+
defaultValue
|
24 |
+
)
|
25 |
+
console.log("enrichVideoSpecsUsingLLM: hallucinatedVideo = ", hallucinatedVideo)
|
26 |
+
} catch (err) {
|
27 |
+
|
28 |
+
let error: OpenAIErrorResponse = err?.response?.data?.error as unknown as OpenAIErrorResponse
|
29 |
+
if (!error) {
|
30 |
+
error = { message: `${err || ""}` } as unknown as OpenAIErrorResponse
|
31 |
+
}
|
32 |
+
|
33 |
+
console.error(JSON.stringify(error, null, 2))
|
34 |
+
throw new Error(`failed to call the LLM: ${error.message}`)
|
35 |
+
}
|
36 |
+
|
37 |
+
// const video = JSON.parse(JSON.stringify(referenceVideo)) as Video
|
38 |
+
|
39 |
+
// TODO here we should make some verifications and perhaps even some conversions
|
40 |
+
// betwen the LLM response and the actual format used in a videoRequest
|
41 |
+
video.backgroundAudioPrompt = hallucinatedVideo.backgroundAudioPrompt || video.backgroundAudioPrompt
|
42 |
+
video.foregroundAudioPrompt = hallucinatedVideo.foregroundAudioPrompt || video.foregroundAudioPrompt
|
43 |
+
video.actorPrompt = hallucinatedVideo.actorPrompt || video.actorPrompt
|
44 |
+
video.actorVoicePrompt = hallucinatedVideo.actorVoicePrompt || video.actorVoicePrompt
|
45 |
+
|
46 |
+
video.noise = typeof hallucinatedVideo.noise !== "undefined"
|
47 |
+
? (`${hallucinatedVideo.noise || ""}`.toLowerCase() === "true")
|
48 |
+
: video.noise
|
49 |
+
|
50 |
+
video.noiseAmount = typeof hallucinatedVideo.noiseAmount !== "undefined"
|
51 |
+
? getValidNumber(hallucinatedVideo.noiseAmount, 0, 10, 2)
|
52 |
+
: video.noiseAmount
|
53 |
+
|
54 |
+
video.outroDurationMs = typeof hallucinatedVideo.outroDurationMs !== "undefined"
|
55 |
+
? getValidNumber(hallucinatedVideo.outroDurationMs, 0, 3000, 500)
|
56 |
+
: video.outroDurationMs
|
57 |
+
|
58 |
+
const hallucinatedShots = Array.isArray(hallucinatedVideo.shots) ? hallucinatedVideo.shots : []
|
59 |
+
|
60 |
+
|
61 |
+
for (const hallucinatedShot of hallucinatedShots) {
|
62 |
+
const shot = await parseShotRequest(video, {
|
63 |
+
shotPrompt: hallucinatedShot.shotPrompt,
|
64 |
+
environmentPrompt: hallucinatedShot.environmentPrompt,
|
65 |
+
photographyPrompt: hallucinatedShot.photographyPrompt,
|
66 |
+
actionPrompt: hallucinatedShot.actionPrompt,
|
67 |
+
foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
|
68 |
+
})
|
69 |
+
video.shots.push(shot)
|
70 |
+
}
|
71 |
+
|
72 |
+
console.log("enrichVideoSpecsUsingLLM: video = ", video)
|
73 |
+
|
74 |
+
return video
|
75 |
+
}
|
src/llm/openai/createChatCompletion.mts
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
import { GPTTokens } from "gpt-tokens"
|
3 |
+
|
4 |
+
import { openai } from "./openai.mts"
|
5 |
+
import { runModerationCheck } from "./runModerationCheck.mts"
|
6 |
+
import { getUserContent } from "./getUserContent.mts"
|
7 |
+
import { getTextPrompt } from "./getTextPrompt.mts"
|
8 |
+
|
9 |
+
export const createChatCompletion = async (
|
10 |
+
messages: ChatCompletionRequestMessage[],
|
11 |
+
model = "gpt-4"
|
12 |
+
): Promise<string> => {
|
13 |
+
// this is the part added by the user, and the one we need to check against the moderation API
|
14 |
+
const userContent = getUserContent(messages)
|
15 |
+
|
16 |
+
const check = await runModerationCheck(userContent)
|
17 |
+
|
18 |
+
if (check.flagged) {
|
19 |
+
console.error("Thoughtcrime: content flagged by the AI police", {
|
20 |
+
userContent,
|
21 |
+
moderationResult: check,
|
22 |
+
})
|
23 |
+
return "Thoughtcrime: content flagged by the AI police"
|
24 |
+
}
|
25 |
+
|
26 |
+
const rawPrompt = getTextPrompt(messages)
|
27 |
+
|
28 |
+
|
29 |
+
// for doc: https://www.npmjs.com/package/gpt-tokens
|
30 |
+
const usageInfo = new GPTTokens({
|
31 |
+
// Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens)
|
32 |
+
plus : false,
|
33 |
+
model : "gpt-4",
|
34 |
+
messages: messages as any,
|
35 |
+
})
|
36 |
+
|
37 |
+
console.table({
|
38 |
+
"Tokens prompt": usageInfo.promptUsedTokens,
|
39 |
+
"Tokens completion": usageInfo.completionUsedTokens,
|
40 |
+
"Tokens total": usageInfo.usedTokens,
|
41 |
+
})
|
42 |
+
|
43 |
+
// Price USD: 0.000298
|
44 |
+
console.log("Price USD: ", usageInfo.usedUSD)
|
45 |
+
|
46 |
+
// const tokenLimit = 4000
|
47 |
+
|
48 |
+
const maxTokens = 4000 - usageInfo.promptUsedTokens
|
49 |
+
|
50 |
+
console.log("maxTokens:", maxTokens)
|
51 |
+
/*
|
52 |
+
console.log("settings:", {
|
53 |
+
tokenLimit,
|
54 |
+
promptLength: rawPrompt.length,
|
55 |
+
promptTokenLengh: rawPrompt.length / 1.9,
|
56 |
+
maxTokens
|
57 |
+
})
|
58 |
+
|
59 |
+
console.log("createChatCompletion(): raw prompt length:", rawPrompt.length)
|
60 |
+
console.log(
|
61 |
+
`createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables`
|
62 |
+
)
|
63 |
+
*/
|
64 |
+
|
65 |
+
console.log("query:", {
|
66 |
+
model,
|
67 |
+
// messages,
|
68 |
+
user: "Anonymous User",
|
69 |
+
temperature: 0.7,
|
70 |
+
max_tokens: maxTokens,
|
71 |
+
// stop: preset.stop?.length ? preset.stop : undefined,
|
72 |
+
})
|
73 |
+
|
74 |
+
const response = await openai.createChatCompletion({
|
75 |
+
model,
|
76 |
+
messages,
|
77 |
+
// TODO use the Hugging Face Login username here
|
78 |
+
user: "Anonymous User",
|
79 |
+
temperature: 0.7,
|
80 |
+
|
81 |
+
// 30 tokens is about 120 characters
|
82 |
+
// we don't want more, as it will take longer to respond
|
83 |
+
max_tokens: maxTokens,
|
84 |
+
// stop: preset.stop?.length ? preset.stop : undefined,
|
85 |
+
})
|
86 |
+
|
87 |
+
const { choices } = response.data
|
88 |
+
|
89 |
+
if (!choices.length) {
|
90 |
+
console.log("createChatCompletion(): no choice found in the LLM response..")
|
91 |
+
return ""
|
92 |
+
}
|
93 |
+
const firstChoice = choices[0]
|
94 |
+
|
95 |
+
if (firstChoice?.message?.role !== "assistant") {
|
96 |
+
console.log(
|
97 |
+
"createChatCompletion(): something went wrong, the model imagined the user response?!"
|
98 |
+
)
|
99 |
+
return ""
|
100 |
+
}
|
101 |
+
|
102 |
+
console.log("createChatCompletion(): response", firstChoice.message.content)
|
103 |
+
|
104 |
+
return firstChoice.message.content || ""
|
105 |
+
}
|
src/llm/openai/createChatCompletionStream.mts
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
|
3 |
+
import { openai } from "./openai.mts"
|
4 |
+
import { streamCompletion } from "./stream.mts"
|
5 |
+
import { getTextPrompt } from "./getTextPrompt.mts"
|
6 |
+
|
7 |
+
export const createChatCompletionStream = async (
|
8 |
+
messages: ChatCompletionRequestMessage[],
|
9 |
+
model: string,
|
10 |
+
onMessage: (message: string) => Promise<void>,
|
11 |
+
onEnd = () => Promise<void>
|
12 |
+
) => {
|
13 |
+
try {
|
14 |
+
const rawPrompt = getTextPrompt(messages)
|
15 |
+
|
16 |
+
const tokenLimit = 4096 // 8000
|
17 |
+
|
18 |
+
const maxTokens = Math.round(tokenLimit - rawPrompt.length / 1.9)
|
19 |
+
|
20 |
+
const completion = await openai.createCompletion({
|
21 |
+
model,
|
22 |
+
prompt: messages,
|
23 |
+
temperature: 0.7,
|
24 |
+
max_tokens: Math.min(30, maxTokens),
|
25 |
+
stream: true,
|
26 |
+
})
|
27 |
+
|
28 |
+
for await (const message of streamCompletion(completion as any)) {
|
29 |
+
try {
|
30 |
+
const parsed = JSON.parse(message)
|
31 |
+
const { text } = parsed.choices[0]
|
32 |
+
|
33 |
+
try {
|
34 |
+
await onMessage(text)
|
35 |
+
} catch (err) {
|
36 |
+
console.error(
|
37 |
+
'Could not process stream message (callback failed)',
|
38 |
+
message,
|
39 |
+
err
|
40 |
+
)
|
41 |
+
}
|
42 |
+
} catch (error) {
|
43 |
+
console.error('Could not JSON parse stream message', message, error)
|
44 |
+
}
|
45 |
+
}
|
46 |
+
try {
|
47 |
+
await onEnd()
|
48 |
+
} catch (err) {
|
49 |
+
console.error('onEnd callback failed', err)
|
50 |
+
}
|
51 |
+
} catch (error: any) {
|
52 |
+
if (error.code) {
|
53 |
+
try {
|
54 |
+
const parsed = JSON.parse(error.body)
|
55 |
+
console.error('An error occurred during OpenAI request: ', parsed)
|
56 |
+
} catch (error) {
|
57 |
+
console.error(
|
58 |
+
'An error occurred during OpenAI request (invalid json): ',
|
59 |
+
error
|
60 |
+
)
|
61 |
+
}
|
62 |
+
} else {
|
63 |
+
console.error('An error occurred during OpenAI request', error)
|
64 |
+
}
|
65 |
+
}
|
66 |
+
}
|
src/llm/openai/generateYAML.mts
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
import { parse } from "yaml"
|
3 |
+
|
4 |
+
import { createChatCompletion } from "./createChatCompletion.mts"
|
5 |
+
|
6 |
+
export const generateYAML = async <T,>(messages: ChatCompletionRequestMessage[] = [], defaultValue?: T): Promise<T> => {
|
7 |
+
|
8 |
+
const defaultResult = defaultValue || ({} as T)
|
9 |
+
|
10 |
+
if (!messages.length) {
|
11 |
+
return defaultResult
|
12 |
+
}
|
13 |
+
|
14 |
+
const output = await createChatCompletion(messages)
|
15 |
+
|
16 |
+
let raw = ""
|
17 |
+
|
18 |
+
// cleanup any remains of the markdown response
|
19 |
+
raw = output.split("```")[0]
|
20 |
+
|
21 |
+
// remove any remaining `
|
22 |
+
const input = raw.replaceAll("`", "")
|
23 |
+
|
24 |
+
try {
|
25 |
+
const obj = parse(input) as T
|
26 |
+
|
27 |
+
if (obj === null || typeof obj === undefined) {
|
28 |
+
throw new Error("couldn't parse YAML")
|
29 |
+
}
|
30 |
+
|
31 |
+
return obj
|
32 |
+
} catch (err) {
|
33 |
+
// just in case, we also try JSON!
|
34 |
+
const obj = JSON.parse(input) as T
|
35 |
+
|
36 |
+
if (obj === null || typeof obj === undefined) {
|
37 |
+
throw new Error("couldn't parse JSON")
|
38 |
+
}
|
39 |
+
|
40 |
+
return obj
|
41 |
+
}
|
42 |
+
}
|
src/llm/openai/getTextPrompt.mts
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
|
3 |
+
export const getTextPrompt = (prompt: ChatCompletionRequestMessage[]) =>
|
4 |
+
prompt.reduce((acc, item) => acc.concat(item.content), "") || ""
|
src/llm/openai/getUserContent.mts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { ChatCompletionRequestMessage } from "openai"
|
2 |
+
|
3 |
+
export const getUserContent = (prompt: ChatCompletionRequestMessage[]) =>
|
4 |
+
prompt
|
5 |
+
.filter((message) => message.role === "user")
|
6 |
+
.map((message) => message.content)
|
7 |
+
.join("\n")
|
src/llm/openai/openai.mts
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Configuration, OpenAIApi } from "openai"
|
2 |
+
|
3 |
+
export const openai = new OpenAIApi(
|
4 |
+
new Configuration({
|
5 |
+
apiKey: process.env.VC_OPENAI_API_KEY
|
6 |
+
})
|
7 |
+
)
|
src/llm/openai/runModerationCheck.mts
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import {
|
2 |
+
CreateModerationResponseResultsInnerCategories,
|
3 |
+
CreateModerationResponseResultsInnerCategoryScores,
|
4 |
+
} from "openai"
|
5 |
+
|
6 |
+
import { openai } from "./openai.mts"
|
7 |
+
|
8 |
+
export const runModerationCheck = async (
|
9 |
+
input = ''
|
10 |
+
): Promise<{
|
11 |
+
categories?: CreateModerationResponseResultsInnerCategories
|
12 |
+
category_scores?: CreateModerationResponseResultsInnerCategoryScores
|
13 |
+
flagged: boolean
|
14 |
+
}> => {
|
15 |
+
if (!input || !input.length) {
|
16 |
+
console.log(`skipping moderation check as input length is too shot`)
|
17 |
+
return {
|
18 |
+
flagged: false,
|
19 |
+
}
|
20 |
+
}
|
21 |
+
|
22 |
+
const response = await openai.createModeration({ input })
|
23 |
+
const { results } = response.data
|
24 |
+
|
25 |
+
if (!results.length) {
|
26 |
+
throw new Error(`failed to call the moderation endpoint`)
|
27 |
+
}
|
28 |
+
|
29 |
+
return results[0]
|
30 |
+
}
|
src/llm/openai/stream.mts
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Readable } from "node:stream"
|
2 |
+
|
3 |
+
async function* chunksToLines(
|
4 |
+
chunksAsync: AsyncIterable<Buffer>
|
5 |
+
): AsyncIterable<string> {
|
6 |
+
let previous = ""
|
7 |
+
for await (const chunk of chunksAsync) {
|
8 |
+
const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)
|
9 |
+
previous += bufferChunk
|
10 |
+
let eolIndex
|
11 |
+
while ((eolIndex = previous.indexOf("\n")) >= 0) {
|
12 |
+
// line includes the EOL
|
13 |
+
const line = previous.slice(0, eolIndex + 1).trimEnd()
|
14 |
+
if (line === "data: [DONE]") break
|
15 |
+
if (line.startsWith("data: ")) yield line
|
16 |
+
previous = previous.slice(eolIndex + 1)
|
17 |
+
}
|
18 |
+
}
|
19 |
+
}
|
20 |
+
|
21 |
+
async function* linesToMessages(
|
22 |
+
linesAsync: AsyncIterable<string>
|
23 |
+
): AsyncIterable<string> {
|
24 |
+
for await (const line of linesAsync) {
|
25 |
+
const message = line.substring("data :".length)
|
26 |
+
|
27 |
+
yield message
|
28 |
+
}
|
29 |
+
}
|
30 |
+
|
31 |
+
export async function* streamCompletion(
|
32 |
+
stream: Readable
|
33 |
+
): AsyncGenerator<string, void, undefined> {
|
34 |
+
yield* linesToMessages(chunksToLines(stream))
|
35 |
+
}
|
src/llm/types.mts
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// note: this has to exactly match what is in the prompt, in ../preproduction/prompts.mts
|
2 |
+
export interface HallucinatedVideoRequest {
|
3 |
+
backgroundAudioPrompt: string; // describe the background audio (crowd, birds, wind, sea etc..)
|
4 |
+
foregroundAudioPrompt: string; // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
|
5 |
+
actorPrompt: string; // describe the physical look of the main actor visible in the shot (man, woman, old, young, hair, glasses, clothes etc)
|
6 |
+
actorVoicePrompt: string; // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
|
7 |
+
noise: boolean; // whether to apply movie noise or not
|
8 |
+
noiseAmount: number; // (integer) the amount of ffmpeg noise (film grain) to apply. 0 is none, 10 is a lot
|
9 |
+
outroDurationMs: number; // in milliseconds. An outro generally only lasts between 0 and 3000 (3s)
|
10 |
+
|
11 |
+
shots: Array<{
|
12 |
+
shotPrompt: string; // describe the main elements of a shot, in excruciating details. You must include ALL those parameters: characters, shot story, what is happening. How they look, the textures, the expressions, their clothes. The color, materials and style of clothes.
|
13 |
+
environmentPrompt: string; // describe the environment, in excruciating details. You must include ALL those parameters: Lights, atmosphere and weather (misty, dust, clear, rain, snow..). Time of the day and hour of the day. Furnitures, their shape, style, era. The materials used for each object. The global time period, time of the day, era. Explain if anything is moving in the backgroung.
|
14 |
+
photographyPrompt: string; // describe the photography, in excruciating details. You must include ALL those parameters: Camera angle, position and movement. Type of shot and angle. Lighting. Mood. Settings. Tint of the lights. Position of the sun or moon. Shadows and their direction. Camera shutter speed, blur, bokeh, aperture.
|
15 |
+
actionPrompt: string; // describe the dynamics of a shot, in excruciating details. You must include ALL those parameters: What is happening, who and what is moving. Which entity are in movements. What are the directions, starting and ending position. At which speed entities or objects are moving. Is there motion blur, slow motion, timelapse etc.
|
16 |
+
foregroundAudioPrompt: string; // describe the sounds in a concise way (eg. ringing bells, underwater sound and whistling dolphin, cat mewong etc),
|
17 |
+
}>
|
18 |
+
}
|
19 |
+
|
20 |
+
export interface OpenAIErrorResponse {
|
21 |
+
message: string
|
22 |
+
type: string
|
23 |
+
param: any
|
24 |
+
code: any
|
25 |
+
}
|
src/main.mts
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { initFolders } from "./initFolders.mts"
|
2 |
+
import { getPendingVideos } from "./scheduler/getPendingVideos.mts"
|
3 |
+
import { processVideo } from "./scheduler/processVideo.mts"
|
4 |
+
import { sortPendingVideosByLeastCompletedFirst } from "./utils/sortPendingVideosByLeastCompletedFirst.mts"
|
5 |
+
|
6 |
+
export const main = async () => {
|
7 |
+
|
8 |
+
const videos = await getPendingVideos()
|
9 |
+
if (!videos.length) {
|
10 |
+
// console.log(`no job to process.. going to try in 200 ms`)
|
11 |
+
setTimeout(() => {
|
12 |
+
main()
|
13 |
+
}, 200)
|
14 |
+
return
|
15 |
+
}
|
16 |
+
|
17 |
+
console.log(`there are ${videos.length} pending videos`)
|
18 |
+
|
19 |
+
sortPendingVideosByLeastCompletedFirst(videos)
|
20 |
+
|
21 |
+
let somethingFailed = ""
|
22 |
+
await Promise.all(videos.map(async video => {
|
23 |
+
try {
|
24 |
+
const result = await processVideo(video)
|
25 |
+
return result
|
26 |
+
} catch (err) {
|
27 |
+
somethingFailed = `${err}`
|
28 |
+
// a video failed.. no big deal
|
29 |
+
return Promise.resolve(somethingFailed)
|
30 |
+
}
|
31 |
+
}))
|
32 |
+
|
33 |
+
if (somethingFailed) {
|
34 |
+
console.error(`one of the jobs failed: ${somethingFailed}, let's wait 5 seconds`)
|
35 |
+
setTimeout(() => { main() }, 5000)
|
36 |
+
} else {
|
37 |
+
console.log(`successfully worked on the jobs, let's immediately loop`)
|
38 |
+
setTimeout(() => { main() }, 50)
|
39 |
+
}
|
40 |
+
|
41 |
+
}
|
src/preproduction/mocks.mts
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Video, VideoShot } from "../types.mts"
|
2 |
+
|
3 |
+
export const mockShots: VideoShot[] = [
|
4 |
+
{
|
5 |
+
"shotPrompt": "In the extreme wide shot, a flock of ducks is converging on the Central Park, coming from multiple directions. Their feathers are glossy and clean, casting off varying degrees of green, brown and white",
|
6 |
+
"environmentPrompt": "Central Park at sunrise, the park looks slightly misty, the sky is tinged with shades of pink and orange as the day breaks. There's dew on the grass, and the leaves on trees are rustling in the light breeze",
|
7 |
+
"photographyPrompt": "Eye-level shot with a slight tilt in the camera, capturing the panorama of the park. There's natural lighting, sun just rising. The camera zooms out to capture the ducks entering the park. Shutter speed is slow to capture the movement of ducks",
|
8 |
+
"actionPrompt": "Large groups of ducks waddle into the park from various directions, some fly in groups, landing on the pond with small splashes. Movement is slow, slightly sped up to depict the invasion",
|
9 |
+
"foregroundAudioPrompt": "A symphony of soft quacking and rustling feathers",
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"shotPrompt": "In the medium shot, a group of ducks are by the pond, pecking at the ground and frolicking in the water. One male mallard is particularly captivating with its emerald green head and healthy body",
|
13 |
+
"environmentPrompt": "It's a sunny spring day in Central Park. The pond is surrounded by lush, green vegetation and dappled with sunlight filtering through the leaves",
|
14 |
+
"photographyPrompt": "Low angle shot near the water level, the camera moves in a crane shot to capture ducks in action, and the camera's aperture is partially open. Natural sunlight creates playful shadows",
|
15 |
+
"actionPrompt": "Ducks are pecking at the ground, dabbling at the water's edge and frolicking in the pond. The camera tracks a particularly majestic mallard navigating through the pond",
|
16 |
+
"foregroundAudioPrompt": "Sounds of ducks quacking and splashing in the water"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"shotPrompt": "Close-up shot of a mother duck with ducklings following her in a line on the grass and into the water",
|
20 |
+
"environmentPrompt": "Central Park, by one of the smaller ponds, surrounded by green trees. Sun is high up giving off warm, radiant light",
|
21 |
+
"photographyPrompt": "High angle shot, focusing on the line of ducklings following their mother. The camera follows the ducklings. The setting is bright and clear with sun illuminating the ducklings",
|
22 |
+
"actionPrompt": "Mother duck is leading her ducklings from the grass into the water, the ducklings obediently follow, creating a neat line. The whole scene feels peaceful",
|
23 |
+
"foregroundAudioPrompt": "Ducklings' high pitched chirping, soft lapping of water at the edge of the pond"
|
24 |
+
}
|
25 |
+
] as any
|
26 |
+
|
27 |
+
export const mock: Video = {
|
28 |
+
"backgroundAudioPrompt": "City ambience mixed with the rustling leaves and the chirping birds in the park",
|
29 |
+
"foregroundAudioPrompt": "Rustling feathers, soft quacking, flapping wings, occasional splash in the pond",
|
30 |
+
"actorPrompt": "Main actors are ducks - a variety of breeds, mostly mallards: males with glossy green heads and females in mottled brown; all plump, medium-sized waterfowl",
|
31 |
+
"actorVoicePrompt": "Soft, low pitched quacking of adult ducks and higher pitched chirping of ducklings",
|
32 |
+
"noise": true,
|
33 |
+
"noiseAmount": 2,
|
34 |
+
"outroDurationMs": 1500,
|
35 |
+
"shots": mockShots
|
36 |
+
} as any
|
src/preproduction/prompts.mts
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
// types of movie shots: https://www.nfi.edu/types-of-film-shots/
|
4 |
+
|
5 |
+
import { ChatCompletionRequestMessage } from "openai"
|
6 |
+
|
7 |
+
export const getQueryChatMessages = (sceneDescription: string): ChatCompletionRequestMessage[] => {
|
8 |
+
return [
|
9 |
+
{
|
10 |
+
role: "system",
|
11 |
+
name: "moviemaking_rules",
|
12 |
+
content: `# Context
|
13 |
+
You are an AI Movie Director Assistant, and you need to help generating input requests (movie "specs") for an automated movie generation API.
|
14 |
+
The format expected by the API must be in YAML. The TypeScript schema for this YAML file is:
|
15 |
+
\`\`\`typescript
|
16 |
+
{
|
17 |
+
backgroundAudioPrompt: string; // describe the background audio (crowd, birds, wind, sea etc..)
|
18 |
+
foregroundAudioPrompt: string; // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
|
19 |
+
actorPrompt: string; // describe the physical look of the main actor visible in the shot (man, woman, old, young, hair, glasses, clothes etc)
|
20 |
+
actorVoicePrompt: string; // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
|
21 |
+
noise: boolean; // whether to apply movie noise or not
|
22 |
+
noiseAmount: number; // (integer) the amount of noise (film grain) to apply. This is mapped from the FFmpeg filter (0 is none, 10 is already a lot)
|
23 |
+
outroDurationMs: number; // in milliseconds. An outro generally only lasts between 0 and 3000 (3s)
|
24 |
+
shots: Array<{
|
25 |
+
shotPrompt: string; // describe the main elements of a shot, in excruciating details. You must include ALL those parameters: characters, shot story, what is happening. How they look, the textures, the expressions, their clothes. The color, materials and style of clothes.
|
26 |
+
environmentPrompt: string; // describe the environment, in excruciating details. You must include ALL those parameters: Lights, atmosphere and weather (misty, dust, clear, rain, snow..). Time of the day and hour of the day. Furnitures, their shape, style, era. The materials used for each object. The global time period, time of the day, era. Explain if anything is moving in the backgroung.
|
27 |
+
photographyPrompt: string; // describe the photography, in excruciating details. You must include ALL those parameters: Camera angle, position and movement. Type of shot and angle. Lighting. Mood. Settings. Tint of the lights. Position of the sun or moon. Shadows and their direction. Camera shutter speed, blur, bokeh, aperture.
|
28 |
+
actionPrompt: string; // describe the dynamics of a shot, in excruciating details. You must include ALL those parameters: What is happening, who and what is moving. Which entity are in movements. What are the directions, starting and ending position. At which speed entities or objects are moving. Is there motion blur, slow motion, timelapse etc.
|
29 |
+
foregroundAudioPrompt: string; // describe the sounds in a concise way (eg. ringing bells, underwater sound and whistling dolphin, cat mewong etc),
|
30 |
+
}>
|
31 |
+
}
|
32 |
+
\`\`\`
|
33 |
+
# Guidelines for number of shots and their duration
|
34 |
+
You will generate 1 shot by default, unless more or less are specified.
|
35 |
+
A shot can only last 3 seconds max.
|
36 |
+
So if you are asked to generate a 6 seconds videos, you need 2 shots, for a 9 seconds video, 3 shots, and so on.
|
37 |
+
If you are asked to generate for instance a 11 seconds videos, you need three 3sec shots plus one 2sec shot.
|
38 |
+
# Guidelines for writing descriptions
|
39 |
+
Is it crucial to repeat the elements constituting a sequence of multiple shots verbatim from one shot to another.
|
40 |
+
For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
|
41 |
+
This will ensure consistency from one scene to another.
|
42 |
+
## Choosing the right words
|
43 |
+
Here are some guidelines regarding film-making:
|
44 |
+
- The distance your subject is to the camera impacts how the audience feels about them.
|
45 |
+
- Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.
|
46 |
+
- Camera movement is a technique for changing the relationship between the subject and the camera frame, controlling the delivery of the narrative. It helps to give additional meaning to what’s happening on the screen.
|
47 |
+
- Do not hesitate to combine types of shots with camera movement shots and camera position (angle) shots.
|
48 |
+
## Shots
|
49 |
+
Single shot: where the shot only captures one subject.
|
50 |
+
Two shot: which has only two characters.
|
51 |
+
Three shot: when three characters are in the frame.
|
52 |
+
Point-of-view shot (POV): shows the scene from the point of view of one of the characters, makes the audience feel that they are there seeing what the character is seeing.
|
53 |
+
Over-the-shoulder shot (OTS): shows the subject from behind the shoulder of another character.
|
54 |
+
Over-the-hip (OTH) shot, in which the camera is placed on the hip of one character and the focus is on the subject.
|
55 |
+
Reverse angle shot: which is approximately 180 degrees opposite the previous shot.
|
56 |
+
Reaction shot: which shows the character’s reaction to the previous shot.
|
57 |
+
Weather shot: where the subject of the filming is the weather.
|
58 |
+
Extreme wide shot/extreme long shot: used to show the subject and the entire area of the environment they are in.
|
59 |
+
Wide shot/long shot: used to focus on the subject while still showing the scene the subject is in.
|
60 |
+
Medium shot: shows the subject from the knees up, and is often referred to as the 3/4 shot.
|
61 |
+
Medium close-up shot: The subject fills the frame. It is somewhere between a medium close-up and a close-up.
|
62 |
+
Close-up shot: shows emotions and detailed reactions, with the subject filling the entire frame.
|
63 |
+
Choker shot: shows the subject’s face from just above the eyebrows to just below the mouth and is between a close-up and an extreme close-up.
|
64 |
+
Extreme close-up shot: shows the detail of an object, such as one a character is handling, or a person, such as just their eyes or moving lips.
|
65 |
+
Full shot: similar to a wide shot except that it focuses on the character in the frame, showing them from head to toe.
|
66 |
+
Cowboy shot: similar to the medium shot except that the character is shown from the hips or waist up.
|
67 |
+
Establishing shot: a long shot at the beginning of a scene that shows objects, buildings, and other elements of a setting from a distance to establish where the next sequence of events takes place.
|
68 |
+
## Camera angles
|
69 |
+
Eye-level shot: This is when the camera is placed at the same height as the eyes of the characters.
|
70 |
+
Low angle shot: This shot frames the subject from a low height, often used to emphasize differences in power between characters.
|
71 |
+
Aerial shot/helicopter shot: Taken from way up high, this shot is usually from a drone or helicopter to establish the expanse of the surrounding landscape.
|
72 |
+
High angle shot: This is when the subject is framed with the camera looking down at them.
|
73 |
+
Birds-eye-view shot/overhead shot: This is a shot taken from way above the subject, usually including a significant amount of the surrounding environment to create a sense of scale or movement.
|
74 |
+
Shoulder-level shot: This is where the camera is approximately the same height as the character’s shoulders.
|
75 |
+
Hip-level shot: The camera is approximately at the height of the character’s hips.
|
76 |
+
Knee-level shot: The camera is approximately at the same level as the character’s knees.
|
77 |
+
Ground-level shot: When the height of the camera is at ground level with the character, this shot captures what’s happening on the ground the character is standing on.
|
78 |
+
Dutch-angle/tilt shot: This is where the camera is tilted to the side.
|
79 |
+
Cut-in shot: This type of shot cuts into the action on the screen to offer a different view of something happening in this main scene.
|
80 |
+
Cutaway shot: As a shot that cuts away from the main action on the screen, it’s used to focus on secondary action and add more information for greater understanding for the audience.
|
81 |
+
Master shot: A long shot that captures most or all of the action happening in a scene.
|
82 |
+
Deep focus: A shot that keeps everything on the screen in sharp focus, including the foreground, background, and middle ground.
|
83 |
+
Locked-down shot: With this shot, the camera is fixed in one position and the action continues off-screen.
|
84 |
+
## Camera movements
|
85 |
+
Zoom Shot: involves changing the focal length of the lens to zoom in or out during filming.
|
86 |
+
Pan shot: involves moving the camera from side to side to show something to the audience or help them better follow the sequence of events.
|
87 |
+
Tilt shot: similar to a pan shot, except moving the camera up and down.
|
88 |
+
Dolly shot: the camera is attached to a dolly that moves on tracks and can possibly move up and down.
|
89 |
+
Truck shot: you move the entire camera on a fixed point and the motion goes from side to side.
|
90 |
+
Pedestal shot: the entire camera is moved vertically, not just the angle of view, and is often combined with panning and/or tilting.
|
91 |
+
Static/fixed shot: where there is no camera movement, and the shot emphasizes the movement of the subject in the environment.
|
92 |
+
Arc shot: where the camera moves in an arc pattern around the subject to give the audience a better perspective of their surroundings.
|
93 |
+
Crab shot: a less-common version of tracking a subject where the dolly the camera is on goes sideways.
|
94 |
+
Dolly zoom shot: the position of the camera and focal length are changed simultaneously.
|
95 |
+
Whip pan shot/swish pan shot: used to create a blur as you pan from one shot to the next.
|
96 |
+
Tracking shot: the camera follows the subject, either from behind or at their side, moving with them.
|
97 |
+
Whip tilt shot: used to create a blur panning from one shot to the next vertically.
|
98 |
+
Bridging shot: denotes a shift in place or time.
|
99 |
+
## Focus
|
100 |
+
Focus pull: focus the lens to keep the subject within an acceptable focus range.
|
101 |
+
Rack focus: focus is more aggressively shifted from subject A to subject B.
|
102 |
+
Tilt-shift: parts of the image are in focus while other parts are out of focus.
|
103 |
+
Deep focus: both the subject and the environment are in focus.
|
104 |
+
Shallow focus: subject is crisp and in focus while the background is out of focus.
|
105 |
+
## Camera angles
|
106 |
+
High-angle
|
107 |
+
Low-angle
|
108 |
+
Over-the-shoulder
|
109 |
+
Bird’s eye
|
110 |
+
Dutch angle/tilt`
|
111 |
+
},
|
112 |
+
{
|
113 |
+
role: "user",
|
114 |
+
name: "movie_director",
|
115 |
+
content: `# Task
|
116 |
+
Please generate the movie spec YAML based on the following description:
|
117 |
+
${sceneDescription}.
|
118 |
+
# YAML
|
119 |
+
\`\`\`
|
120 |
+
`
|
121 |
+
},
|
122 |
+
]
|
123 |
+
}
|
src/production/addAudioToVideo.mts
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import tmpDir from "temp-dir"
|
4 |
+
import { v4 as uuidv4 } from "uuid"
|
5 |
+
import ffmpeg from "fluent-ffmpeg"
|
6 |
+
|
7 |
+
import { pendingFilesDirFilePath } from "../config.mts"
|
8 |
+
import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
|
9 |
+
|
10 |
+
export const addAudioToVideo = async (
|
11 |
+
videoFileName: string,
|
12 |
+
audioFileName: string,
|
13 |
+
|
14 |
+
/*
|
15 |
+
* 0.0: mute the audio completely
|
16 |
+
* 0.5: set the audio to 50% of original volume (half volume)
|
17 |
+
* 1.0: maintain the audio at original volume (100% of original volume)
|
18 |
+
* 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
|
19 |
+
*/
|
20 |
+
volume: number = 1.0
|
21 |
+
) => {
|
22 |
+
const inputFilePath = path.join(pendingFilesDirFilePath, videoFileName)
|
23 |
+
const audioFilePath = path.resolve(pendingFilesDirFilePath, audioFileName)
|
24 |
+
|
25 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
26 |
+
const tempOutputFilePath = path.join(tmpDir, tmpFileName)
|
27 |
+
|
28 |
+
await new Promise((resolve, reject) => {
|
29 |
+
ffmpeg(inputFilePath)
|
30 |
+
.input(audioFilePath)
|
31 |
+
.audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
|
32 |
+
.outputOptions("-c:v copy") // use video copy codec
|
33 |
+
.outputOptions("-c:a aac") // use audio codec
|
34 |
+
.outputOptions("-map 0:v:0") // map video from 0th to 0th
|
35 |
+
.outputOptions("-map 1:a:0") // map audio from 1st to 0th
|
36 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
37 |
+
.output(tempOutputFilePath)
|
38 |
+
.on("end", resolve)
|
39 |
+
.on("error", reject)
|
40 |
+
.run()
|
41 |
+
})
|
42 |
+
await moveFileFromTmpToPending(tmpFileName, videoFileName)
|
43 |
+
};
|
src/production/assembleShots.mts
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
// due to Docker issues, we disable OpenGL transitions for now
|
4 |
+
// import concat from 'ffmpeg-concat'
|
5 |
+
import concat from './concatNoGL.mts'
|
6 |
+
|
7 |
+
import { VideoShot } from '../types.mts'
|
8 |
+
import { pendingFilesDirFilePath } from "../config.mts"
|
9 |
+
import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts"
|
10 |
+
|
11 |
+
export const assembleShots = async (shots: VideoShot[], fileName: string) => {
|
12 |
+
|
13 |
+
if (!Array.isArray(shots) || shots.length < 2) {
|
14 |
+
throw new Error(`need at least 2 shots`)
|
15 |
+
}
|
16 |
+
|
17 |
+
const transitions = [
|
18 |
+
{
|
19 |
+
name: 'circleOpen',
|
20 |
+
duration: 1000,
|
21 |
+
},
|
22 |
+
{
|
23 |
+
name: 'crossWarp',
|
24 |
+
duration: 800,
|
25 |
+
},
|
26 |
+
{
|
27 |
+
name: 'directionalWarp',
|
28 |
+
duration: 800,
|
29 |
+
// pass custom params to a transition
|
30 |
+
params: { direction: [1, -1] },
|
31 |
+
},
|
32 |
+
|
33 |
+
/*
|
34 |
+
{
|
35 |
+
name: 'squaresWire',
|
36 |
+
duration: 2000,
|
37 |
+
},
|
38 |
+
*/
|
39 |
+
]
|
40 |
+
|
41 |
+
const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
|
42 |
+
|
43 |
+
// before performing assembly, we must normalize images
|
44 |
+
const shotFilesPaths: string[] = []
|
45 |
+
for (let shot of shots) {
|
46 |
+
const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName)
|
47 |
+
shotFilesPaths.push(normalizedShotFilePath)
|
48 |
+
}
|
49 |
+
|
50 |
+
await concat({
|
51 |
+
output: videoFilePath,
|
52 |
+
videos: shotFilesPaths,
|
53 |
+
transitions: shotFilesPaths
|
54 |
+
.slice(0, shotFilesPaths.length - 1)
|
55 |
+
.map(
|
56 |
+
(vid) => transitions[Math.floor(Math.random() * transitions.length)]
|
57 |
+
),
|
58 |
+
})
|
59 |
+
}
|
src/production/concatNoGL.mts
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ffmpeg from "fluent-ffmpeg";
|
2 |
+
import fs from "fs";
|
3 |
+
|
4 |
+
interface IConcatParams {
|
5 |
+
output: string;
|
6 |
+
videos: string[];
|
7 |
+
transitions: any;
|
8 |
+
}
|
9 |
+
|
10 |
+
const concat = async ({ output, videos }: IConcatParams): Promise<void> => {
|
11 |
+
if(!output || !Array.isArray(videos)) {
|
12 |
+
throw new Error("An output file and videos must be provided");
|
13 |
+
}
|
14 |
+
|
15 |
+
if(!videos.every(video => fs.existsSync(video))) {
|
16 |
+
throw new Error("All videos must exist");
|
17 |
+
}
|
18 |
+
|
19 |
+
const ffmpegCommand = ffmpeg();
|
20 |
+
|
21 |
+
videos.forEach((video) =>
|
22 |
+
ffmpegCommand.addInput(video)
|
23 |
+
);
|
24 |
+
|
25 |
+
return new Promise<void>((resolve, reject) => {
|
26 |
+
ffmpegCommand
|
27 |
+
.on('error', reject)
|
28 |
+
.on('end', resolve)
|
29 |
+
.mergeToFile(output);
|
30 |
+
});
|
31 |
+
};
|
32 |
+
|
33 |
+
export default concat;
|
src/production/generateActor.mts
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { promises as fs } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
+
import tmpDir from "temp-dir"
|
4 |
+
|
5 |
+
import { HfInference } from "@huggingface/inference"
|
6 |
+
|
7 |
+
const hf = new HfInference(process.env.VC_HF_API_TOKEN)
|
8 |
+
|
9 |
+
export const generateActor = async (prompt: string, fileName: string, seed: number) => {
|
10 |
+
const positivePrompt = [
|
11 |
+
`profile photo of ${prompt || ""}`,
|
12 |
+
"id picture",
|
13 |
+
"photoshoot",
|
14 |
+
"portrait photography",
|
15 |
+
"neutral expression",
|
16 |
+
"neutral background",
|
17 |
+
"studio photo",
|
18 |
+
"award winning",
|
19 |
+
"high resolution",
|
20 |
+
"photo realistic",
|
21 |
+
"intricate details",
|
22 |
+
"beautiful",
|
23 |
+
]
|
24 |
+
const negativePrompt = [
|
25 |
+
"anime",
|
26 |
+
"drawing",
|
27 |
+
"painting",
|
28 |
+
"lowres",
|
29 |
+
"blurry",
|
30 |
+
"artificial"
|
31 |
+
]
|
32 |
+
|
33 |
+
console.log(`generating actor: ${positivePrompt.join(", ")}`)
|
34 |
+
|
35 |
+
const blob = await hf.textToImage({
|
36 |
+
inputs: positivePrompt.join(", "),
|
37 |
+
model: "stabilityai/stable-diffusion-2-1",
|
38 |
+
parameters: {
|
39 |
+
negative_prompt: negativePrompt.join(", "),
|
40 |
+
// seed, no seed?
|
41 |
+
}
|
42 |
+
})
|
43 |
+
|
44 |
+
const filePath = path.resolve(tmpDir, fileName)
|
45 |
+
|
46 |
+
const buffer = Buffer.from(await blob.arrayBuffer())
|
47 |
+
await fs.writeFile(filePath, buffer, "utf8")
|
48 |
+
|
49 |
+
return filePath
|
50 |
+
}
|
src/production/generateAudio.mts
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { v4 as uuidv4 } from "uuid"
|
2 |
+
import puppeteer from "puppeteer"
|
3 |
+
|
4 |
+
import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
|
5 |
+
import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
|
6 |
+
|
7 |
+
export const state = {
|
8 |
+
load: 0,
|
9 |
+
}
|
10 |
+
|
11 |
+
const instances: string[] = [
|
12 |
+
process.env.VC_AUDIO_GENERATION_SPACE_API_URL
|
13 |
+
]
|
14 |
+
|
15 |
+
// TODO we should use an inference endpoint instead
|
16 |
+
export async function generateAudio(prompt: string, audioFileName: string) {
|
17 |
+
|
18 |
+
if (state.load === instances.length) {
|
19 |
+
throw new Error(`all audio generation servers are busy, try again later..`)
|
20 |
+
}
|
21 |
+
|
22 |
+
state.load += 1
|
23 |
+
|
24 |
+
try {
|
25 |
+
const instance = instances.shift()
|
26 |
+
instances.push(instance)
|
27 |
+
|
28 |
+
const browser = await puppeteer.launch({
|
29 |
+
headless: true,
|
30 |
+
protocolTimeout: 120000,
|
31 |
+
})
|
32 |
+
|
33 |
+
try {
|
34 |
+
const page = await browser.newPage()
|
35 |
+
|
36 |
+
await page.goto(instance, {
|
37 |
+
waitUntil: "networkidle2",
|
38 |
+
})
|
39 |
+
|
40 |
+
await new Promise(r => setTimeout(r, 3000))
|
41 |
+
|
42 |
+
const firstTextboxInput = await page.$('input[data-testid="textbox"]')
|
43 |
+
|
44 |
+
await firstTextboxInput.type(prompt)
|
45 |
+
|
46 |
+
// console.log("looking for the button to submit")
|
47 |
+
const submitButton = await page.$("button.lg")
|
48 |
+
|
49 |
+
// console.log("clicking on the button")
|
50 |
+
await submitButton.click()
|
51 |
+
|
52 |
+
await page.waitForSelector("a[download]", {
|
53 |
+
timeout: 120000, // no need to wait for too long, generation is quick
|
54 |
+
})
|
55 |
+
|
56 |
+
const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
|
57 |
+
|
58 |
+
|
59 |
+
// it is always a good idea to download to a tmp dir before saving to the pending dir
|
60 |
+
// because there is always a risk that the download will fail
|
61 |
+
|
62 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
63 |
+
|
64 |
+
await downloadFileToTmp(audioRemoteUrl, tmpFileName)
|
65 |
+
await moveFileFromTmpToPending(tmpFileName, audioFileName)
|
66 |
+
} catch (err) {
|
67 |
+
throw err
|
68 |
+
} finally {
|
69 |
+
await browser.close()
|
70 |
+
}
|
71 |
+
} catch (err) {
|
72 |
+
throw err
|
73 |
+
} finally {
|
74 |
+
state.load -= 1
|
75 |
+
}
|
76 |
+
}
|
src/production/generateAudioLegacy.mts
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { client } from '@gradio/client'
|
2 |
+
|
3 |
+
import { generateSeed } from "../utils/generateSeed.mts"
|
4 |
+
|
5 |
+
export const state = {
|
6 |
+
load: 0
|
7 |
+
}
|
8 |
+
|
9 |
+
const instances: string[] = [
|
10 |
+
process.env.VC_AUDIO_GENERATION_SPACE_API_URL
|
11 |
+
]
|
12 |
+
|
13 |
+
export const generateAudio = async (prompt: string, options?: {
|
14 |
+
seed: number;
|
15 |
+
nbFrames: number;
|
16 |
+
nbSteps: number;
|
17 |
+
}) => {
|
18 |
+
|
19 |
+
if (state.load === instances.length) {
|
20 |
+
throw new Error(`all audio generation servers are busy, try again later..`)
|
21 |
+
}
|
22 |
+
|
23 |
+
state.load += 1
|
24 |
+
|
25 |
+
try {
|
26 |
+
const seed = options?.seed || generateSeed()
|
27 |
+
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
28 |
+
const nbSteps = options?.nbSteps || 35
|
29 |
+
|
30 |
+
const instance = instances.shift()
|
31 |
+
instances.push(instance)
|
32 |
+
|
33 |
+
const api = await client(instance, {
|
34 |
+
hf_token: `${process.env.VC_HF_API_TOKEN}` as any
|
35 |
+
})
|
36 |
+
|
37 |
+
const rawResponse = await api.predict('/run', [
|
38 |
+
prompt, // string in 'Prompt' Textbox component
|
39 |
+
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
40 |
+
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
41 |
+
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
42 |
+
]) as any
|
43 |
+
|
44 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
45 |
+
|
46 |
+
return `${instance}/file=${name}`
|
47 |
+
} catch (err) {
|
48 |
+
throw err
|
49 |
+
} finally {
|
50 |
+
state.load -= 1
|
51 |
+
}
|
52 |
+
}
|
src/production/generateVideo.mts
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { client } from "@gradio/client"
|
2 |
+
|
3 |
+
import { generateSeed } from "../utils/generateSeed.mts"
|
4 |
+
|
5 |
+
export const state = {
|
6 |
+
load: 0,
|
7 |
+
}
|
8 |
+
|
9 |
+
// we don't use replicas yet, because it ain't easy to get their hostname
|
10 |
+
const instances: string[] = [
|
11 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
|
12 |
+
`${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
|
13 |
+
// `${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
|
14 |
+
].filter(instance => instance?.length > 0)
|
15 |
+
|
16 |
+
export const generateVideo = async (prompt: string, options?: {
|
17 |
+
seed: number;
|
18 |
+
nbFrames: number;
|
19 |
+
nbSteps: number;
|
20 |
+
}) => {
|
21 |
+
|
22 |
+
if (state.load === instances.length) {
|
23 |
+
throw new Error(`all video generation servers are busy, try again later..`)
|
24 |
+
}
|
25 |
+
|
26 |
+
state.load += 1
|
27 |
+
|
28 |
+
try {
|
29 |
+
const seed = options?.seed || generateSeed()
|
30 |
+
const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
|
31 |
+
const nbSteps = options?.nbSteps || 35
|
32 |
+
|
33 |
+
const instance = instances.shift()
|
34 |
+
instances.push(instance)
|
35 |
+
|
36 |
+
const api = await client(instance, {
|
37 |
+
hf_token: `${process.env.VC_HF_API_TOKEN}` as any
|
38 |
+
})
|
39 |
+
|
40 |
+
const rawResponse = await api.predict('/run', [
|
41 |
+
prompt, // string in 'Prompt' Textbox component
|
42 |
+
seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
|
43 |
+
nbFrames, // 24 // it is the nb of frames per seconds I think?
|
44 |
+
nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
|
45 |
+
]) as any
|
46 |
+
|
47 |
+
// console.log("rawResponse:", rawResponse)
|
48 |
+
|
49 |
+
const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
|
50 |
+
|
51 |
+
return `${instance}/file=${name}`
|
52 |
+
} catch (err) {
|
53 |
+
throw err
|
54 |
+
} finally {
|
55 |
+
state.load -= 1
|
56 |
+
}
|
57 |
+
}
|
src/production/generateVoice.mts
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import puppeteer from "puppeteer"
|
2 |
+
|
3 |
+
import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
|
4 |
+
|
5 |
+
export const state = {
|
6 |
+
load: 0
|
7 |
+
}
|
8 |
+
|
9 |
+
const instances: string[] = [
|
10 |
+
process.env.VC_VOICE_GENERATION_SPACE_API_URL
|
11 |
+
]
|
12 |
+
|
13 |
+
// TODO we should use an inference endpoint instead
|
14 |
+
export async function generateVoice(prompt: string, voiceFileName: string) {
|
15 |
+
if (state.load === instances.length) {
|
16 |
+
throw new Error(`all voice generation servers are busy, try again later..`)
|
17 |
+
}
|
18 |
+
|
19 |
+
state.load += 1
|
20 |
+
|
21 |
+
try {
|
22 |
+
const instance = instances.shift()
|
23 |
+
instances.push(instance)
|
24 |
+
|
25 |
+
console.log("instance:", instance)
|
26 |
+
|
27 |
+
const browser = await puppeteer.launch({
|
28 |
+
headless: true,
|
29 |
+
protocolTimeout: 800000,
|
30 |
+
})
|
31 |
+
|
32 |
+
try {
|
33 |
+
const page = await browser.newPage()
|
34 |
+
|
35 |
+
await page.goto(instance, {
|
36 |
+
waitUntil: "networkidle2",
|
37 |
+
})
|
38 |
+
|
39 |
+
await new Promise(r => setTimeout(r, 3000))
|
40 |
+
|
41 |
+
const firstTextarea = await page.$('textarea[data-testid="textbox"]')
|
42 |
+
|
43 |
+
await firstTextarea.type(prompt)
|
44 |
+
|
45 |
+
// console.log("looking for the button to submit")
|
46 |
+
const submitButton = await page.$("button.lg")
|
47 |
+
|
48 |
+
// console.log("clicking on the button")
|
49 |
+
await submitButton.click()
|
50 |
+
|
51 |
+
await page.waitForSelector("audio", {
|
52 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
53 |
+
})
|
54 |
+
|
55 |
+
const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
|
56 |
+
|
57 |
+
|
58 |
+
console.log({
|
59 |
+
voiceRemoteUrl,
|
60 |
+
})
|
61 |
+
|
62 |
+
|
63 |
+
console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
|
64 |
+
|
65 |
+
await downloadFileToTmp(voiceRemoteUrl, voiceFileName)
|
66 |
+
|
67 |
+
return voiceFileName
|
68 |
+
} catch (err) {
|
69 |
+
throw err
|
70 |
+
} finally {
|
71 |
+
await browser.close()
|
72 |
+
}
|
73 |
+
} catch (err) {
|
74 |
+
throw err
|
75 |
+
} finally {
|
76 |
+
state.load -= 1
|
77 |
+
}
|
78 |
+
}
|
src/production/interpolateVideo.mts
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import puppeteer from "puppeteer"
|
5 |
+
|
6 |
+
import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
|
7 |
+
import { pendingFilesDirFilePath } from "../config.mts"
|
8 |
+
import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
|
9 |
+
|
10 |
+
export const state = {
|
11 |
+
load: 0
|
12 |
+
}
|
13 |
+
|
14 |
+
const instances: string[] = [
|
15 |
+
process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
|
16 |
+
]
|
17 |
+
|
18 |
+
// TODO we should use an inference endpoint instead
|
19 |
+
export async function interpolateVideo(fileName: string, steps: number, fps: number) {
|
20 |
+
if (state.load === instances.length) {
|
21 |
+
throw new Error(`all video interpolation servers are busy, try again later..`)
|
22 |
+
}
|
23 |
+
|
24 |
+
state.load += 1
|
25 |
+
|
26 |
+
try {
|
27 |
+
const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
|
28 |
+
|
29 |
+
console.log(`interpolating ${fileName}`)
|
30 |
+
console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
|
31 |
+
console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
|
32 |
+
|
33 |
+
const instance = instances.shift()
|
34 |
+
instances.push(instance)
|
35 |
+
|
36 |
+
const browser = await puppeteer.launch({
|
37 |
+
headless: true,
|
38 |
+
protocolTimeout: 400000,
|
39 |
+
})
|
40 |
+
|
41 |
+
try {
|
42 |
+
const page = await browser.newPage()
|
43 |
+
await page.goto(instance, { waitUntil: 'networkidle2' })
|
44 |
+
|
45 |
+
await new Promise(r => setTimeout(r, 3000))
|
46 |
+
|
47 |
+
const fileField = await page.$('input[type=file]')
|
48 |
+
|
49 |
+
// console.log(`uploading file..`)
|
50 |
+
await fileField.uploadFile(inputFilePath)
|
51 |
+
|
52 |
+
// console.log('looking for the button to submit')
|
53 |
+
const submitButton = await page.$('button.lg')
|
54 |
+
|
55 |
+
// console.log('clicking on the button')
|
56 |
+
await submitButton.click()
|
57 |
+
|
58 |
+
await page.waitForSelector('a[download="interpolated_result.mp4"]', {
|
59 |
+
timeout: 400000, // need to be large enough in case someone else attemps to use our space
|
60 |
+
})
|
61 |
+
|
62 |
+
const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
|
63 |
+
|
64 |
+
// it is always a good idea to download to a tmp dir before saving to the pending dir
|
65 |
+
// because there is always a risk that the download will fail
|
66 |
+
|
67 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
68 |
+
|
69 |
+
await downloadFileToTmp(interpolatedFileUrl, tmpFileName)
|
70 |
+
await moveFileFromTmpToPending(tmpFileName, fileName)
|
71 |
+
} catch (err) {
|
72 |
+
throw err
|
73 |
+
} finally {
|
74 |
+
await browser.close()
|
75 |
+
}
|
76 |
+
} catch (err) {
|
77 |
+
throw err
|
78 |
+
} finally {
|
79 |
+
state.load -= 1
|
80 |
+
}
|
81 |
+
}
|
src/production/interpolateVideoLegacy.mts
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { promises as fs } from "node:fs"
|
2 |
+
import path from "node:path"
|
3 |
+
import { Blob } from "buffer"
|
4 |
+
|
5 |
+
import { client } from "@gradio/client"
|
6 |
+
import tmpDir from "temp-dir"
|
7 |
+
|
8 |
+
import { downloadFileToTmp } from '../utils/downloadFileToTmp.mts'
|
9 |
+
|
10 |
+
export const state = {
|
11 |
+
load: 0
|
12 |
+
}
|
13 |
+
|
14 |
+
const instances: string[] = [
|
15 |
+
process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
|
16 |
+
]
|
17 |
+
|
18 |
+
export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
|
19 |
+
if (state.load === instances.length) {
|
20 |
+
throw new Error(`all video interpolation servers are busy, try again later..`)
|
21 |
+
}
|
22 |
+
|
23 |
+
state.load += 1
|
24 |
+
|
25 |
+
try {
|
26 |
+
const inputFilePath = path.join(tmpDir, fileName)
|
27 |
+
|
28 |
+
const instance = instances.shift()
|
29 |
+
instances.push(instance)
|
30 |
+
|
31 |
+
const api = await client(instance, {
|
32 |
+
hf_token: `${process.env.VC_HF_API_TOKEN}` as any
|
33 |
+
})
|
34 |
+
|
35 |
+
const video = await fs.readFile(inputFilePath)
|
36 |
+
|
37 |
+
const blob = new Blob([video], { type: 'video/mp4' })
|
38 |
+
// const blob = blobFrom(filePath)
|
39 |
+
const result = await api.predict(1, [
|
40 |
+
blob, // blob in 'parameter_5' Video component
|
41 |
+
steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
|
42 |
+
fps, // string (FALSE! it's a number) in 'FPS output' Radio component
|
43 |
+
])
|
44 |
+
|
45 |
+
const data = (result as any).data[0]
|
46 |
+
console.log('raw data:', data)
|
47 |
+
const { orig_name, data: remoteFilePath } = data
|
48 |
+
const remoteUrl = `${instance}/file=${remoteFilePath}`
|
49 |
+
console.log("remoteUrl:", remoteUrl)
|
50 |
+
await downloadFileToTmp(remoteUrl, fileName)
|
51 |
+
} catch (err) {
|
52 |
+
throw err
|
53 |
+
} finally {
|
54 |
+
state.load -= 1
|
55 |
+
}
|
56 |
+
}
|
src/production/mergeAudio.mts
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import tmpDir from "temp-dir"
|
4 |
+
import { v4 as uuidv4 } from "uuid"
|
5 |
+
import ffmpeg from "fluent-ffmpeg"
|
6 |
+
|
7 |
+
export const mergeAudio = async ({
|
8 |
+
input1FileName,
|
9 |
+
input1Volume,
|
10 |
+
input2FileName,
|
11 |
+
input2Volume,
|
12 |
+
outputFileName = ''
|
13 |
+
}: {
|
14 |
+
input1FileName: string,
|
15 |
+
input1Volume: number,
|
16 |
+
input2FileName: string,
|
17 |
+
input2Volume: number,
|
18 |
+
outputFileName?: string
|
19 |
+
}): Promise<string> => {
|
20 |
+
outputFileName = `${uuidv4()}.m4a`
|
21 |
+
|
22 |
+
const input1FilePath = path.resolve(tmpDir, input1FileName)
|
23 |
+
const input2FilePath = path.resolve(tmpDir, input2FileName)
|
24 |
+
const outputFilePath = path.resolve(tmpDir, outputFileName)
|
25 |
+
|
26 |
+
const input1Ffmpeg = ffmpeg(input1FilePath)
|
27 |
+
.outputOptions("-map 0:a:0")
|
28 |
+
.audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
|
29 |
+
|
30 |
+
const input2Ffmpeg = ffmpeg(input2FilePath)
|
31 |
+
.outputOptions("-map 1:a:0")
|
32 |
+
.audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
|
33 |
+
|
34 |
+
await new Promise((resolve, reject) => {
|
35 |
+
ffmpeg()
|
36 |
+
.input(input1Ffmpeg)
|
37 |
+
.input(input2Ffmpeg)
|
38 |
+
.outputOptions("-c:a aac") // use audio codec
|
39 |
+
.outputOptions("-shortest") // finish encoding when shortest input stream ends
|
40 |
+
.output(outputFilePath)
|
41 |
+
.on("end", resolve)
|
42 |
+
.on("error", reject)
|
43 |
+
.run()
|
44 |
+
})
|
45 |
+
|
46 |
+
console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
|
47 |
+
|
48 |
+
return outputFileName
|
49 |
+
}
|
src/production/normalizePendingVideoToTmpFilePath.mts
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
import ffmpeg from "fluent-ffmpeg"
|
6 |
+
|
7 |
+
import { pendingFilesDirFilePath } from "../config.mts"
|
8 |
+
|
9 |
+
export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise<string> => {
|
10 |
+
return new Promise((resolve,reject) => {
|
11 |
+
|
12 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
13 |
+
|
14 |
+
const filePath = path.join(pendingFilesDirFilePath, fileName)
|
15 |
+
const tmpFilePath = path.join(tmpDir, tmpFileName)
|
16 |
+
|
17 |
+
ffmpeg.ffprobe(filePath, function(err,) {
|
18 |
+
if (err) { reject(err); return; }
|
19 |
+
|
20 |
+
ffmpeg(filePath)
|
21 |
+
|
22 |
+
.size("1280x720")
|
23 |
+
|
24 |
+
.save(tmpFilePath)
|
25 |
+
.on("end", async () => {
|
26 |
+
resolve(tmpFilePath)
|
27 |
+
})
|
28 |
+
.on("error", (err) => {
|
29 |
+
reject(err)
|
30 |
+
})
|
31 |
+
})
|
32 |
+
})
|
33 |
+
}
|
src/production/postInterpolation.mts
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
import ffmpeg from "fluent-ffmpeg"
|
6 |
+
import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
|
7 |
+
import { pendingFilesDirFilePath } from "../config.mts"
|
8 |
+
|
9 |
+
export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
|
10 |
+
return new Promise((resolve,reject) => {
|
11 |
+
|
12 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
13 |
+
|
14 |
+
const filePath = path.join(pendingFilesDirFilePath, fileName)
|
15 |
+
const tmpFilePath = path.join(tmpDir, tmpFileName)
|
16 |
+
|
17 |
+
ffmpeg.ffprobe(filePath, function(err, metadata) {
|
18 |
+
if (err) { reject(err); return; }
|
19 |
+
|
20 |
+
const durationInSec = durationMs / 1000
|
21 |
+
|
22 |
+
const currentVideoDurationInSec = metadata.format.duration
|
23 |
+
|
24 |
+
console.log(`currentVideoDurationInSec in sec: ${currentVideoDurationInSec}s`)
|
25 |
+
|
26 |
+
console.log(`target duration in sec: ${durationInSec}s (${durationMs}ms)`)
|
27 |
+
|
28 |
+
// compute a ratio ex. 0.3 = 30% of the total length
|
29 |
+
const durationRatio = currentVideoDurationInSec / durationInSec
|
30 |
+
console.log(`durationRatio: ${durationRatio}`)
|
31 |
+
|
32 |
+
ffmpeg(filePath)
|
33 |
+
|
34 |
+
// convert to HD
|
35 |
+
.size("1280x720")
|
36 |
+
|
37 |
+
.videoFilters([
|
38 |
+
`setpts=0.5*PTS`, // we make the video faster
|
39 |
+
//'scale=-1:576:lanczos',
|
40 |
+
// 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
|
41 |
+
`noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise
|
42 |
+
])
|
43 |
+
.outputOptions([
|
44 |
+
`-r ${nbFrames}`,
|
45 |
+
])
|
46 |
+
|
47 |
+
.save(tmpFilePath)
|
48 |
+
.on("end", async () => {
|
49 |
+
await moveFileFromTmpToPending(tmpFileName, fileName)
|
50 |
+
|
51 |
+
resolve(fileName)
|
52 |
+
})
|
53 |
+
.on("error", (err) => {
|
54 |
+
reject(err)
|
55 |
+
})
|
56 |
+
})
|
57 |
+
})
|
58 |
+
}
|
src/production/renderImage.mts
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
2 |
+
import { generateImageSDXLAsBase64 } from "../utils/generateImageSDXL.mts"
|
3 |
+
import { generateImageSDXL360AsBase64 } from "../utils/generateImageSDXL360.mts"
|
4 |
+
import { generateSeed } from "../utils/generateSeed.mts"
|
5 |
+
|
6 |
+
export async function renderImage(
|
7 |
+
request: RenderRequest,
|
8 |
+
response: RenderedScene,
|
9 |
+
): Promise<RenderedScene> {
|
10 |
+
|
11 |
+
const isSpherical = request.projection === 'spherical'
|
12 |
+
|
13 |
+
const generateImageAsBase64 = isSpherical
|
14 |
+
? generateImageSDXL360AsBase64
|
15 |
+
: generateImageSDXLAsBase64
|
16 |
+
|
17 |
+
console.log(`going to generate an image using ${request.projection || "default (cartesian)"} projection`)
|
18 |
+
|
19 |
+
const params = {
|
20 |
+
positivePrompt: request.prompt,
|
21 |
+
seed: request.seed,
|
22 |
+
nbSteps: request.nbSteps,
|
23 |
+
width: request.width,
|
24 |
+
height: request.height
|
25 |
+
}
|
26 |
+
|
27 |
+
console.log(`calling generateImageAsBase64 with: `, JSON.stringify(params, null, 2))
|
28 |
+
|
29 |
+
|
30 |
+
// first we generate a quick low quality version
|
31 |
+
try {
|
32 |
+
response.assetUrl = await generateImageAsBase64(params)
|
33 |
+
console.log("successful generation!", response.assetUrl.slice(0, 30))
|
34 |
+
if (!response.assetUrl?.length) {
|
35 |
+
throw new Error(`the generated image is empty`)
|
36 |
+
}
|
37 |
+
} catch (err) {
|
38 |
+
console.error(`failed to render.. but let's try again!`)
|
39 |
+
try {
|
40 |
+
response.assetUrl = await generateImageAsBase64(params)
|
41 |
+
console.log("successful generation!", response.assetUrl.slice(0, 30))
|
42 |
+
if (!response.assetUrl?.length) {
|
43 |
+
throw new Error(`the generated image is empty`)
|
44 |
+
}
|
45 |
+
} catch (err) {
|
46 |
+
console.error(`failed to generate the image, although ${err}`)
|
47 |
+
response.error = `failed to render scene: ${err}`
|
48 |
+
response.status = "error"
|
49 |
+
response.assetUrl = ""
|
50 |
+
}
|
51 |
+
}
|
52 |
+
|
53 |
+
return response
|
54 |
+
}
|
src/production/renderImageSegmentation.mts
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
|
6 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
7 |
+
import { segmentImage } from "../utils/segmentImage.mts"
|
8 |
+
import { writeBase64ToFile } from "../utils/writeBase64ToFile.mts"
|
9 |
+
|
10 |
+
|
11 |
+
export async function renderImageSegmentation(
|
12 |
+
request: RenderRequest,
|
13 |
+
response: RenderedScene,
|
14 |
+
): Promise<RenderedScene> {
|
15 |
+
|
16 |
+
const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
|
17 |
+
|
18 |
+
if (actionnables.length > 0) {
|
19 |
+
console.log("we have some actionnables:", actionnables)
|
20 |
+
console.log("going to grab the first frame")
|
21 |
+
|
22 |
+
const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.png`)
|
23 |
+
|
24 |
+
// console.log("beginning:", imageBase64.slice(0, 100))
|
25 |
+
await writeBase64ToFile(response.assetUrl, tmpImageFilePath)
|
26 |
+
console.log("wrote the image to ", tmpImageFilePath)
|
27 |
+
|
28 |
+
if (!tmpImageFilePath) {
|
29 |
+
console.error("failed to get the image")
|
30 |
+
response.error = "failed to segment the image"
|
31 |
+
response.status = "error"
|
32 |
+
} else {
|
33 |
+
console.log("got the first frame! segmenting..")
|
34 |
+
try {
|
35 |
+
const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
|
36 |
+
response.maskBase64 = result.pngInBase64
|
37 |
+
response.segments = result.segments
|
38 |
+
|
39 |
+
console.log(`it worked the first time! got ${response.segments.length} segments`)
|
40 |
+
} catch (err) {
|
41 |
+
console.log("this takes too long :/ trying another server..")
|
42 |
+
try {
|
43 |
+
const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
|
44 |
+
response.maskBase64 = result.pngInBase64
|
45 |
+
response.segments = result.segments
|
46 |
+
|
47 |
+
console.log(`it worked the second time! got ${response.segments.length} segments`)
|
48 |
+
} catch (err) {
|
49 |
+
console.log("trying one last time, on a 3rd server..")
|
50 |
+
try {
|
51 |
+
const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
|
52 |
+
response.maskBase64 = result.pngInBase64
|
53 |
+
response.segments = result.segments
|
54 |
+
|
55 |
+
console.log(`it worked the third time! got ${response.segments.length} segments`)
|
56 |
+
} catch (err) {
|
57 |
+
console.log("yeah, all servers are busy it seems.. aborting")
|
58 |
+
response.error = "all servers are busy"
|
59 |
+
response.status = "error"
|
60 |
+
}
|
61 |
+
}
|
62 |
+
}
|
63 |
+
}
|
64 |
+
} else {
|
65 |
+
console.log("no actionnables: just returning the image, then")
|
66 |
+
}
|
67 |
+
|
68 |
+
return response
|
69 |
+
}
|
src/production/renderPipeline.mts
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
3 |
+
|
4 |
+
import { renderImage } from "./renderImage.mts"
|
5 |
+
import { renderVideo } from "./renderVideo.mts"
|
6 |
+
import { renderImageSegmentation } from "./renderImageSegmentation.mts"
|
7 |
+
import { renderVideoSegmentation } from "./renderVideoSegmentation.mts"
|
8 |
+
|
9 |
+
export async function renderPipeline(request: RenderRequest, response: RenderedScene) {
|
10 |
+
const isVideo = request?.nbFrames > 1
|
11 |
+
|
12 |
+
const renderContent = isVideo ? renderVideo : renderImage
|
13 |
+
const renderSegmentation = isVideo ? renderVideoSegmentation : renderImageSegmentation
|
14 |
+
|
15 |
+
if (isVideo) {
|
16 |
+
console.log(`rendering a video..`)
|
17 |
+
} else {
|
18 |
+
console.log(`rendering an image..`)
|
19 |
+
}
|
20 |
+
await renderContent(request, response)
|
21 |
+
await renderSegmentation(request, response)
|
22 |
+
|
23 |
+
/*
|
24 |
+
this is the optimized pipeline
|
25 |
+
However, right now it doesn't work because for some reason,
|
26 |
+
asking to generate the same seed + prompt on different nb of steps
|
27 |
+
doesn't generate the same image!
|
28 |
+
|
29 |
+
// first we need to wait for the low quality pre-render
|
30 |
+
await renderContent({
|
31 |
+
...request,
|
32 |
+
|
33 |
+
// we are a bit more aggressive with the quality of the video preview
|
34 |
+
nbSteps: isVideo ? 8 : 16
|
35 |
+
}, response)
|
36 |
+
|
37 |
+
// then we can run both the segmentation and the high-res render at the same time
|
38 |
+
await Promise.all([
|
39 |
+
renderSegmentation(request, response),
|
40 |
+
renderContent(request, response)
|
41 |
+
])
|
42 |
+
*/
|
43 |
+
|
44 |
+
response.status = "completed"
|
45 |
+
response.error = ""
|
46 |
+
}
|
src/production/renderScene.mts
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { v4 as uuidv4 } from "uuid"
|
2 |
+
|
3 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
4 |
+
import { generateSeed } from "../utils/generateSeed.mts"
|
5 |
+
import { getValidNumber } from "../utils/getValidNumber.mts"
|
6 |
+
import { renderPipeline } from "./renderPipeline.mts"
|
7 |
+
|
8 |
+
const cache: Record<string, RenderedScene> = {}
|
9 |
+
const cacheQueue: string[] = []
|
10 |
+
const maxCacheSize = 1000
|
11 |
+
|
12 |
+
export async function renderScene(request: RenderRequest): Promise<RenderedScene> {
|
13 |
+
// const key = getCacheKey(scene)
|
14 |
+
const renderId = uuidv4()
|
15 |
+
|
16 |
+
request.nbFrames = getValidNumber(request.nbFrames, 1, 24, 16)
|
17 |
+
|
18 |
+
const isVideo = request?.nbFrames === 1
|
19 |
+
|
20 |
+
// important: we need a consistent seed for our multiple rendering passes
|
21 |
+
request.seed = getValidNumber(request.seed, 0, 2147483647, generateSeed())
|
22 |
+
request.nbSteps = getValidNumber(request.nbSteps, 5, 50, 10)
|
23 |
+
|
24 |
+
if (isVideo) {
|
25 |
+
request.width = getValidNumber(request.width, 256, 1024, 1024)
|
26 |
+
request.height = getValidNumber(request.width, 256, 1024, 512)
|
27 |
+
} else {
|
28 |
+
request.width = getValidNumber(request.width, 256, 1280, 576)
|
29 |
+
request.height = getValidNumber(request.width, 256, 720, 320)
|
30 |
+
}
|
31 |
+
|
32 |
+
const response: RenderedScene = {
|
33 |
+
renderId,
|
34 |
+
status: "pending",
|
35 |
+
assetUrl: "",
|
36 |
+
error: "",
|
37 |
+
maskBase64: "",
|
38 |
+
segments: []
|
39 |
+
}
|
40 |
+
|
41 |
+
cache[renderId] = response
|
42 |
+
cacheQueue.push(renderId)
|
43 |
+
if (cacheQueue.length > maxCacheSize) {
|
44 |
+
const toRemove = cacheQueue.shift()
|
45 |
+
delete cache[toRemove]
|
46 |
+
}
|
47 |
+
|
48 |
+
// this is a fire-and-forget asynchronous pipeline:
|
49 |
+
// we start it, but we do not await for the response
|
50 |
+
renderPipeline(request, response)
|
51 |
+
|
52 |
+
console.log("renderScene: yielding the scene", response)
|
53 |
+
return response
|
54 |
+
}
|
55 |
+
|
56 |
+
export async function getRenderedScene(renderId: string): Promise<RenderedScene> {
|
57 |
+
const rendered = cache[renderId]
|
58 |
+
if (!rendered) {
|
59 |
+
throw new Error(`couldn't find any rendered scene with renderId ${renderId}`)
|
60 |
+
}
|
61 |
+
return cache[renderId]
|
62 |
+
}
|
src/production/renderVideo.mts
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
2 |
+
import { generateVideo } from "./generateVideo.mts"
|
3 |
+
|
4 |
+
export async function renderVideo(
|
5 |
+
request: RenderRequest,
|
6 |
+
response: RenderedScene
|
7 |
+
): Promise<RenderedScene> {
|
8 |
+
|
9 |
+
const params = {
|
10 |
+
seed: request.seed,
|
11 |
+
nbFrames: request.nbFrames,
|
12 |
+
nbSteps: request.nbSteps,
|
13 |
+
}
|
14 |
+
|
15 |
+
try {
|
16 |
+
response.assetUrl = await generateVideo(request.prompt, params)
|
17 |
+
// console.log("successfull generation")
|
18 |
+
|
19 |
+
if (!response.assetUrl?.length) {
|
20 |
+
throw new Error(`url for the generated video is empty`)
|
21 |
+
}
|
22 |
+
} catch (err) {
|
23 |
+
console.error(`failed to render the video scene.. but let's try again!`)
|
24 |
+
|
25 |
+
try {
|
26 |
+
response.assetUrl = await generateVideo(request.prompt, params)
|
27 |
+
// console.log("successfull generation")
|
28 |
+
|
29 |
+
if (!response.assetUrl?.length) {
|
30 |
+
throw new Error(`url for the generated video is empty`)
|
31 |
+
}
|
32 |
+
|
33 |
+
} catch (err) {
|
34 |
+
console.error(`it failed the video for second time ${err}`)
|
35 |
+
response.error = `failed to render video scene: ${err}`
|
36 |
+
response.status = "error"
|
37 |
+
}
|
38 |
+
}
|
39 |
+
|
40 |
+
return response
|
41 |
+
}
|
src/production/renderVideoSegmentation.mts
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { v4 as uuidv4 } from "uuid"
|
2 |
+
|
3 |
+
import { RenderedScene, RenderRequest } from "../types.mts"
|
4 |
+
import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
|
5 |
+
import { getFirstVideoFrame } from "../utils/getFirstVideoFrame.mts"
|
6 |
+
import { segmentImage } from "../utils/segmentImage.mts"
|
7 |
+
|
8 |
+
export async function renderVideoSegmentation(
|
9 |
+
request: RenderRequest,
|
10 |
+
response: RenderedScene
|
11 |
+
): Promise<RenderedScene> {
|
12 |
+
|
13 |
+
const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
|
14 |
+
|
15 |
+
if (actionnables.length > 0) {
|
16 |
+
console.log("we have some actionnables:", actionnables)
|
17 |
+
if (request.segmentation === "firstframe") {
|
18 |
+
console.log("going to grab the first frame")
|
19 |
+
const tmpVideoFilePath = await downloadFileToTmp(response.assetUrl, `${uuidv4()}`)
|
20 |
+
console.log("downloaded the first frame to ", tmpVideoFilePath)
|
21 |
+
const firstFrameFilePath = await getFirstVideoFrame(tmpVideoFilePath)
|
22 |
+
console.log("downloaded the first frame to ", firstFrameFilePath)
|
23 |
+
|
24 |
+
if (!firstFrameFilePath) {
|
25 |
+
console.error("failed to get the image")
|
26 |
+
response.error = "failed to segment the image"
|
27 |
+
response.status = "error"
|
28 |
+
} else {
|
29 |
+
console.log("got the first frame! segmenting..")
|
30 |
+
const result = await segmentImage(firstFrameFilePath, actionnables, request.width, request.height)
|
31 |
+
response.maskBase64 = result.pngInBase64
|
32 |
+
response.segments = result.segments
|
33 |
+
|
34 |
+
// console.log("success!", { segments })
|
35 |
+
}
|
36 |
+
/*
|
37 |
+
const jpgBase64 = await getFirstVideoFrame(tmpVideoFileName)
|
38 |
+
if (!jpgBase64) {
|
39 |
+
console.error("failed to get the image")
|
40 |
+
error = "failed to segment the image"
|
41 |
+
} else {
|
42 |
+
console.log(`got the first frame (${jpgBase64.length})`)
|
43 |
+
|
44 |
+
console.log("TODO: call segmentImage with the base64 image")
|
45 |
+
await segmentImage()
|
46 |
+
}
|
47 |
+
*/
|
48 |
+
}
|
49 |
+
}
|
50 |
+
|
51 |
+
return response
|
52 |
+
}
|
src/production/upscaleVideo.mts
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import path from "node:path"
|
2 |
+
|
3 |
+
import { v4 as uuidv4 } from "uuid"
|
4 |
+
import tmpDir from "temp-dir"
|
5 |
+
import puppeteer from "puppeteer"
|
6 |
+
|
7 |
+
import { downloadFileToTmp } from '../utils/downloadFileToTmp.mts'
|
8 |
+
import { pendingFilesDirFilePath } from '../config.mts'
|
9 |
+
import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
|
10 |
+
|
11 |
+
const instances: string[] = [
|
12 |
+
process.env.VC_VIDEO_UPSCALE_SPACE_API_URL
|
13 |
+
]
|
14 |
+
|
15 |
+
// TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
|
16 |
+
export async function upscaleVideo(fileName: string, prompt: string) {
|
17 |
+
const instance = instances.shift()
|
18 |
+
instances.push(instance)
|
19 |
+
|
20 |
+
const browser = await puppeteer.launch({
|
21 |
+
// headless: true,
|
22 |
+
protocolTimeout: 800000,
|
23 |
+
})
|
24 |
+
|
25 |
+
try {
|
26 |
+
const page = await browser.newPage()
|
27 |
+
|
28 |
+
await page.goto(instance, {
|
29 |
+
waitUntil: 'networkidle2',
|
30 |
+
})
|
31 |
+
|
32 |
+
const promptField = await page.$('textarea')
|
33 |
+
await promptField.type(prompt)
|
34 |
+
|
35 |
+
const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
|
36 |
+
// console.log(`local file to upscale: ${inputFilePath}`)
|
37 |
+
|
38 |
+
await new Promise(r => setTimeout(r, 3000))
|
39 |
+
|
40 |
+
const fileField = await page.$('input[type=file]')
|
41 |
+
|
42 |
+
// console.log(`uploading file..`)
|
43 |
+
await fileField.uploadFile(inputFilePath)
|
44 |
+
|
45 |
+
// console.log('looking for the button to submit')
|
46 |
+
const submitButton = await page.$('button.lg')
|
47 |
+
|
48 |
+
// console.log('clicking on the button')
|
49 |
+
await submitButton.click()
|
50 |
+
|
51 |
+
/*
|
52 |
+
const client = await page.target().createCDPSession()
|
53 |
+
|
54 |
+
await client.send('Page.setDownloadBehavior', {
|
55 |
+
behavior: 'allow',
|
56 |
+
downloadPath: tmpDir,
|
57 |
+
})
|
58 |
+
*/
|
59 |
+
|
60 |
+
await page.waitForSelector('a[download="xl_result.mp4"]', {
|
61 |
+
timeout: 800000, // need to be large enough in case someone else attemps to use our space
|
62 |
+
})
|
63 |
+
|
64 |
+
const upscaledFileUrl = await page.$$eval('a[download="xl_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
|
65 |
+
|
66 |
+
// it is always a good idea to download to a tmp dir before saving to the pending dir
|
67 |
+
// because there is always a risk that the download will fail
|
68 |
+
|
69 |
+
const tmpFileName = `${uuidv4()}.mp4`
|
70 |
+
|
71 |
+
await downloadFileToTmp(upscaledFileUrl, tmpFileName)
|
72 |
+
await moveFileFromTmpToPending(tmpFileName, fileName)
|
73 |
+
} catch (err) {
|
74 |
+
throw err
|
75 |
+
} finally {
|
76 |
+
await browser.close()
|
77 |
+
}
|
78 |
+
}
|
src/scheduler/deleteVideo.mts
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tmpDir from "temp-dir"
|
2 |
+
import { validate as uuidValidate } from "uuid"
|
3 |
+
|
4 |
+
import { completedMetadataDirFilePath, completedFilesDirFilePath, pendingMetadataDirFilePath, pendingFilesDirFilePath } from "../config.mts"
|
5 |
+
import { deleteFilesWithName } from "../utils/deleteAllFilesWith.mts"
|
6 |
+
|
7 |
+
|
8 |
+
// note: we make sure ownerId and videoId are *VALID*
|
9 |
+
// otherwise an attacker could try to delete important files!
|
10 |
+
export const deleteVideo = async (ownerId: string, videoId?: string) => {
|
11 |
+
if (!uuidValidate(ownerId)) {
|
12 |
+
throw new Error(`fatal error: ownerId ${ownerId} is invalid!`)
|
13 |
+
}
|
14 |
+
|
15 |
+
if (videoId && !uuidValidate(videoId)) {
|
16 |
+
throw new Error(`fatal error: videoId ${videoId} is invalid!`)
|
17 |
+
}
|
18 |
+
const id = videoId ? `${ownerId}_${videoId}` : ownerId
|
19 |
+
|
20 |
+
// this should delete everything, including audio files
|
21 |
+
// however we still have some temporary files with a name that is unique:
|
22 |
+
// we should probably rename those
|
23 |
+
await deleteFilesWithName(tmpDir, id)
|
24 |
+
await deleteFilesWithName(completedMetadataDirFilePath, id)
|
25 |
+
await deleteFilesWithName(completedFilesDirFilePath, id)
|
26 |
+
await deleteFilesWithName(pendingMetadataDirFilePath, id)
|
27 |
+
await deleteFilesWithName(pendingFilesDirFilePath, id)
|
28 |
+
}
|
src/scheduler/getAllVideosForOwner.mts
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Video } from "../types.mts"
|
2 |
+
import { getCompletedVideos } from "./getCompletedVideos.mts"
|
3 |
+
import { getPendingVideos } from "./getPendingVideos.mts"
|
4 |
+
|
5 |
+
export const getAllVideosForOwner = async (ownerId: string): Promise<Video[]> => {
|
6 |
+
const pendingVideos = await getPendingVideos(ownerId)
|
7 |
+
const completedVideos = await getCompletedVideos(ownerId)
|
8 |
+
return [...pendingVideos, ...completedVideos]
|
9 |
+
}
|
src/scheduler/getCompletedVideos.mts
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Video } from "../types.mts"
|
2 |
+
import { completedMetadataDirFilePath } from "../config.mts"
|
3 |
+
import { readVideoMetadataFiles } from "./readVideoMetadataFiles.mts"
|
4 |
+
|
5 |
+
export const getCompletedVideos = async (ownerId?: string): Promise<Video[]> => {
|
6 |
+
const completedVideos = await readVideoMetadataFiles(completedMetadataDirFilePath, ownerId)
|
7 |
+
|
8 |
+
return completedVideos
|
9 |
+
}
|