barton jbilcke-hf HF Staff commited on
Commit
53aa97a
·
0 Parent(s):

Duplicate from jbilcke-hf/VideoChain-API

Browse files

Co-authored-by: Julian Bilcke <[email protected]>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +6 -0
  2. .gitignore +8 -0
  3. .nvmrc +1 -0
  4. Dockerfile +41 -0
  5. LICENSE.txt +201 -0
  6. README.md +12 -0
  7. TODO.md +6 -0
  8. package-lock.json +0 -0
  9. package.json +41 -0
  10. src/config.mts +14 -0
  11. src/data/all_words.json +0 -0
  12. src/data/good_words.json +0 -0
  13. src/index.mts +605 -0
  14. src/initFolders.mts +19 -0
  15. src/llm/enrichVideoSpecsUsingLLM.mts +75 -0
  16. src/llm/openai/createChatCompletion.mts +105 -0
  17. src/llm/openai/createChatCompletionStream.mts +66 -0
  18. src/llm/openai/generateYAML.mts +42 -0
  19. src/llm/openai/getTextPrompt.mts +4 -0
  20. src/llm/openai/getUserContent.mts +7 -0
  21. src/llm/openai/openai.mts +7 -0
  22. src/llm/openai/runModerationCheck.mts +30 -0
  23. src/llm/openai/stream.mts +35 -0
  24. src/llm/types.mts +25 -0
  25. src/main.mts +41 -0
  26. src/preproduction/mocks.mts +36 -0
  27. src/preproduction/prompts.mts +123 -0
  28. src/production/addAudioToVideo.mts +43 -0
  29. src/production/assembleShots.mts +59 -0
  30. src/production/concatNoGL.mts +33 -0
  31. src/production/generateActor.mts +50 -0
  32. src/production/generateAudio.mts +76 -0
  33. src/production/generateAudioLegacy.mts +52 -0
  34. src/production/generateVideo.mts +57 -0
  35. src/production/generateVoice.mts +78 -0
  36. src/production/interpolateVideo.mts +81 -0
  37. src/production/interpolateVideoLegacy.mts +56 -0
  38. src/production/mergeAudio.mts +49 -0
  39. src/production/normalizePendingVideoToTmpFilePath.mts +33 -0
  40. src/production/postInterpolation.mts +58 -0
  41. src/production/renderImage.mts +54 -0
  42. src/production/renderImageSegmentation.mts +69 -0
  43. src/production/renderPipeline.mts +46 -0
  44. src/production/renderScene.mts +62 -0
  45. src/production/renderVideo.mts +41 -0
  46. src/production/renderVideoSegmentation.mts +52 -0
  47. src/production/upscaleVideo.mts +78 -0
  48. src/scheduler/deleteVideo.mts +28 -0
  49. src/scheduler/getAllVideosForOwner.mts +9 -0
  50. src/scheduler/getCompletedVideos.mts +9 -0
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ node_modules
2
+ npm-debug.log
3
+ models
4
+ sandbox
5
+ audio.pipe
6
+ video.pipe
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ node_modules
2
+ *.log
3
+ *.bin
4
+ .DS_Store
5
+ .venv
6
+ *.mp4
7
+ sandbox
8
+ scripts
.nvmrc ADDED
@@ -0,0 +1 @@
 
 
1
+ v18.16.0
Dockerfile ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18
2
+ # try this maybe
3
+
4
+ ARG DEBIAN_FRONTEND=noninteractive
5
+
6
+ RUN apt update
7
+
8
+ # For FFMPEG and gl concat
9
+ RUN apt --yes install ffmpeg curl build-essential python3 python3-dev libx11-dev libxext-dev libxext6 libglu1-mesa-dev xvfb libxi-dev libglew-dev pkg-config
10
+
11
+ # For Puppeteer
12
+ RUN apt --yes install libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libgbm1 libasound2 libpangocairo-1.0-0 libxss1 libgtk-3-0
13
+
14
+ # Set up a new user named "user" with user ID 1000
15
+ RUN useradd -o -u 1000 user
16
+
17
+ # Switch to the "user" user
18
+ USER user
19
+
20
+ # Set home to the user's home directory
21
+ ENV HOME=/home/user \
22
+ PATH=/home/user/.local/bin:$PATH
23
+
24
+ # Set the working directory to the user's home directory
25
+ WORKDIR $HOME/app
26
+
27
+ # Install app dependencies
28
+ # A wildcard is used to ensure both package.json AND package-lock.json are copied
29
+ # where available (npm@5+)
30
+ COPY --chown=user package*.json $HOME/app
31
+
32
+ RUN npm install
33
+
34
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
35
+ COPY --chown=user . $HOME/app
36
+
37
+ EXPOSE 7860
38
+
39
+ # we can't use this (it time out)
40
+ # CMD [ "xvfb-run", "-s", "-ac -screen 0 1920x1080x24", "npm", "run", "start" ]
41
+ CMD [ "npm", "run", "start" ]
LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: VideoChain API
3
+ emoji: 🎥 🔗
4
+ colorFrom: black
5
+ colorTo: white
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 7860
9
+ duplicated_from: jbilcke-hf/VideoChain-API
10
+ ---
11
+
12
+ A micro service to generate videos
TODO.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+
2
+
3
+ to allow multiple videos to be processed a the same time:
4
+
5
+ [ ] yield from the loop at each step
6
+ [ ] random processing of videos
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "videochain-api",
3
+ "version": "1.0.0",
4
+ "description": "A service which wraps and chains video and audio spaces together",
5
+ "main": "src/index.mts",
6
+ "scripts": {
7
+ "start": "node --loader ts-node/esm src/index.mts",
8
+ "test:submitVideo": "node --loader ts-node/esm src/tests/submitVideo.mts",
9
+ "test:checkStatus": "node --loader ts-node/esm src/tests/checkStatus.mts",
10
+ "test:downloadFileToTmp": "node --loader ts-node/esm src/tests/downloadFileToTmp.mts",
11
+ "test:stuff": "node --loader ts-node/esm src/utils/segmentImage.mts",
12
+ "docker": "npm run docker:build && npm run docker:run",
13
+ "docker:build": "docker build -t videochain-api .",
14
+ "docker:run": "docker run -it -p 7860:7860 videochain-api"
15
+ },
16
+ "author": "Julian Bilcke <[email protected]>",
17
+ "license": "Apache License",
18
+ "dependencies": {
19
+ "@gorgonjs/gorgon": "^1.4.1",
20
+ "@gradio/client": "^0.1.4",
21
+ "@huggingface/inference": "^2.6.1",
22
+ "@types/express": "^4.17.17",
23
+ "@types/ffmpeg-concat": "^1.1.2",
24
+ "@types/uuid": "^9.0.2",
25
+ "eventsource-parser": "^1.0.0",
26
+ "express": "^4.18.2",
27
+ "ffmpeg-concat": "^1.3.0",
28
+ "fluent-ffmpeg": "^2.1.2",
29
+ "fs-extra": "^11.1.1",
30
+ "gpt-tokens": "^1.1.1",
31
+ "node-fetch": "^3.3.1",
32
+ "openai": "^3.3.0",
33
+ "puppeteer": "^20.8.0",
34
+ "resize-base64": "^1.0.12",
35
+ "sharp": "^0.32.4",
36
+ "temp-dir": "^3.0.0",
37
+ "ts-node": "^10.9.1",
38
+ "uuid": "^9.0.0",
39
+ "yaml": "^2.3.1"
40
+ }
41
+ }
src/config.mts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ export const storagePath = `${process.env.VC_STORAGE_PATH || './sandbox'}`
4
+
5
+ export const metadataDirPath = path.join(storagePath, "metadata")
6
+ export const pendingMetadataDirFilePath = path.join(metadataDirPath, "pending")
7
+ export const completedMetadataDirFilePath = path.join(metadataDirPath, "completed")
8
+
9
+ export const filesDirPath = path.join(storagePath, "files")
10
+ export const pendingFilesDirFilePath = path.join(filesDirPath, "pending")
11
+ export const completedFilesDirFilePath = path.join(filesDirPath, "completed")
12
+
13
+ export const shotFormatVersion = 1
14
+ export const sequenceFormatVersion = 1
src/data/all_words.json ADDED
The diff for this file is too large to render. See raw diff
 
src/data/good_words.json ADDED
The diff for this file is too large to render. See raw diff
 
src/index.mts ADDED
@@ -0,0 +1,605 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { createReadStream, existsSync } from "node:fs"
2
+ import path from "node:path"
3
+
4
+ import { v4 as uuidv4, validate as uuidValidate } from "uuid"
5
+ import express from "express"
6
+
7
+ import { Video, VideoStatus, VideoAPIRequest, RenderRequest, RenderedScene } from "./types.mts"
8
+ import { parseVideoRequest } from "./utils/parseVideoRequest.mts"
9
+ import { savePendingVideo } from "./scheduler/savePendingVideo.mts"
10
+ import { getVideo } from "./scheduler/getVideo.mts"
11
+ import { main } from "./main.mts"
12
+ import { completedFilesDirFilePath } from "./config.mts"
13
+ import { markVideoAsToDelete } from "./scheduler/markVideoAsToDelete.mts"
14
+ import { markVideoAsToAbort } from "./scheduler/markVideoAsToAbort.mts"
15
+ import { markVideoAsToPause } from "./scheduler/markVideoAsToPause.mts"
16
+ import { markVideoAsPending } from "./scheduler/markVideoAsPending.mts"
17
+ import { getPendingVideos } from "./scheduler/getPendingVideos.mts"
18
+ import { hasValidAuthorization } from "./utils/hasValidAuthorization.mts"
19
+ import { getAllVideosForOwner } from "./scheduler/getAllVideosForOwner.mts"
20
+ import { initFolders } from "./initFolders.mts"
21
+ import { sortVideosByYoungestFirst } from "./utils/sortVideosByYoungestFirst.mts"
22
+ import { generateVideo } from "./production/generateVideo.mts"
23
+ import { generateSeed } from "./utils/generateSeed.mts"
24
+ import { getRenderedScene, renderScene } from "./production/renderScene.mts"
25
+
26
+ initFolders()
27
+ // to disable all processing (eg. to debug)
28
+ // then comment the following line:
29
+ main()
30
+
31
+ const app = express()
32
+ const port = 7860
33
+
34
+ app.use(express.json())
35
+
36
+ let isRendering = false
37
+
38
+ // a "fast track" pipeline
39
+ app.post("/render", async (req, res) => {
40
+
41
+ const request = req.body as RenderRequest
42
+ console.log(req.body)
43
+ if (!request.prompt) {
44
+ console.log("Invalid prompt")
45
+ res.status(400)
46
+ res.write(JSON.stringify({ url: "", error: "invalid prompt" }))
47
+ res.end()
48
+ return
49
+ }
50
+
51
+ let response: RenderedScene = {
52
+ renderId: "",
53
+ status: "pending",
54
+ assetUrl: "",
55
+ maskBase64: "",
56
+ error: "",
57
+ segments: []
58
+ }
59
+
60
+ try {
61
+ response = await renderScene(request)
62
+ } catch (err) {
63
+ // console.log("failed to render scene!")
64
+ response.error = `failed to render scene: ${err}`
65
+ }
66
+
67
+ if (response.error === "already rendering") {
68
+ console.log("server busy")
69
+ res.status(200)
70
+ res.write(JSON.stringify(response))
71
+ res.end()
72
+ return
73
+ } else if (response.error.length > 0) {
74
+ // console.log("server error")
75
+ res.status(500)
76
+ res.write(JSON.stringify(response))
77
+ res.end()
78
+ return
79
+ } else {
80
+ // console.log("all good")
81
+ res.status(200)
82
+ res.write(JSON.stringify(response))
83
+ res.end()
84
+ return
85
+ }
86
+ })
87
+
88
+ // a "fast track" pipeline
89
+ app.get("/render/:renderId", async (req, res) => {
90
+
91
+ const renderId = `${req.params.renderId}`
92
+
93
+ if (!uuidValidate(renderId)) {
94
+ console.error("invalid render id")
95
+ res.status(400)
96
+ res.write(JSON.stringify({ error: `invalid render id` }))
97
+ res.end()
98
+ return
99
+ }
100
+
101
+ let response: RenderedScene = {
102
+ renderId: "",
103
+ status: "pending",
104
+ assetUrl: "",
105
+ error: "",
106
+ maskBase64: "",
107
+ segments: []
108
+ }
109
+
110
+ try {
111
+ response = await getRenderedScene(renderId)
112
+ } catch (err) {
113
+ // console.log("failed to render scene!")
114
+ response.error = `failed to render scene: ${err}`
115
+ }
116
+
117
+ if (response.error === "already rendering") {
118
+ console.log("server busy")
119
+ res.status(200)
120
+ res.write(JSON.stringify(response))
121
+ res.end()
122
+ return
123
+ } else if (response.error.length > 0) {
124
+ // console.log("server error")
125
+ res.status(500)
126
+ res.write(JSON.stringify(response))
127
+ res.end()
128
+ return
129
+ } else {
130
+ // console.log("all good")
131
+ res.status(200)
132
+ res.write(JSON.stringify(response))
133
+ res.end()
134
+ return
135
+ }
136
+ })
137
+
138
+
139
+ // a "fast track" pipeline
140
+ /*
141
+ app.post("/segment", async (req, res) => {
142
+
143
+ const request = req.body as RenderRequest
144
+ console.log(req.body)
145
+
146
+ let result: RenderedScene = {
147
+ assetUrl: "",
148
+ maskBase64: "",
149
+ error: "",
150
+ segments: []
151
+ }
152
+
153
+ try {
154
+ result = await renderScene(request)
155
+ } catch (err) {
156
+ // console.log("failed to render scene!")
157
+ result.error = `failed to render scene: ${err}`
158
+ }
159
+
160
+ if (result.error === "already rendering") {
161
+ console.log("server busy")
162
+ res.status(200)
163
+ res.write(JSON.stringify({ url: "", error: result.error }))
164
+ res.end()
165
+ return
166
+ } else if (result.error.length > 0) {
167
+ // console.log("server error")
168
+ res.status(500)
169
+ res.write(JSON.stringify({ url: "", error: result.error }))
170
+ res.end()
171
+ return
172
+ } else {
173
+ // console.log("all good")
174
+ res.status(200)
175
+ res.write(JSON.stringify(result))
176
+ res.end()
177
+ return
178
+ }
179
+ })
180
+ */
181
+
182
+
183
+
184
+ app.post("/:ownerId", async (req, res) => {
185
+ const request = req.body as VideoAPIRequest
186
+
187
+ if (!hasValidAuthorization(req.headers)) {
188
+ console.log("Invalid authorization")
189
+ res.status(401)
190
+ res.write(JSON.stringify({ error: "invalid token" }))
191
+ res.end()
192
+ return
193
+ }
194
+
195
+ const ownerId = req.params.ownerId
196
+
197
+ if (!uuidValidate(ownerId)) {
198
+ console.error("invalid owner id")
199
+ res.status(400)
200
+ res.write(JSON.stringify({ error: `invalid owner id` }))
201
+ res.end()
202
+ return
203
+ }
204
+
205
+ let video: Video = null
206
+
207
+ console.log(`creating video from request..`)
208
+ console.log(`request: `, JSON.stringify(request))
209
+ if (!request?.prompt?.length) {
210
+ console.error(`failed to create video (prompt is empty})`)
211
+ res.status(400)
212
+ res.write(JSON.stringify({ error: "prompt is empty" }))
213
+ res.end()
214
+ return
215
+ }
216
+ try {
217
+ video = await parseVideoRequest(ownerId, request)
218
+ } catch (err) {
219
+ console.error(`failed to create video: ${video} (${err})`)
220
+ res.status(400)
221
+ res.write(JSON.stringify({ error: "query seems to be malformed" }))
222
+ res.end()
223
+ return
224
+ }
225
+
226
+ console.log(`saving video ${video.id}`)
227
+ try {
228
+ await savePendingVideo(video)
229
+ res.status(200)
230
+ res.write(JSON.stringify(video))
231
+ res.end()
232
+ } catch (err) {
233
+ console.error(err)
234
+ res.status(500)
235
+ res.write(JSON.stringify({ error: "couldn't save the video" }))
236
+ res.end()
237
+ }
238
+ })
239
+
240
+
241
+ app.get("/:ownerId/:videoId\.mp4", async (req, res) => {
242
+
243
+ /*
244
+ for simplicity, let's skip auth when fetching videos
245
+ the UUIDs cannot easily be guessed anyway
246
+
247
+ if (!hasValidAuthorization(req.headers)) {
248
+ console.log("Invalid authorization")
249
+ res.status(401)
250
+ res.write(JSON.stringify({ error: "invalid token" }))
251
+ res.end()
252
+ return
253
+ }
254
+ */
255
+
256
+ const ownerId = req.params.ownerId
257
+ console.log("downloading..")
258
+
259
+ if (!uuidValidate(ownerId)) {
260
+ console.error("invalid owner id")
261
+ res.status(400)
262
+ res.write(JSON.stringify({ error: `invalid owner id` }))
263
+ res.end()
264
+ return
265
+ }
266
+
267
+ const videoId = req.params.videoId
268
+
269
+ if (!uuidValidate(videoId)) {
270
+ console.error("invalid video id")
271
+ res.status(400)
272
+ res.write(JSON.stringify({ error: `invalid video id` }))
273
+ res.end()
274
+ return
275
+ }
276
+
277
+ let video: Video = null
278
+ try {
279
+ video = await getVideo(ownerId, videoId)
280
+ console.log(`returning video ${videoId} to owner ${ownerId}`)
281
+ } catch (err) {
282
+ res.status(404)
283
+ res.write(JSON.stringify({ error: "this video doesn't exist" }))
284
+ res.end()
285
+ return
286
+ }
287
+
288
+ const completedFilePath = path.join(completedFilesDirFilePath, video.fileName)
289
+
290
+ // note: we DON'T want to use the pending file path, as there may be operations on it
291
+ // (ie. a process might be busy writing stuff to it)
292
+ const filePath = existsSync(completedFilePath) ? completedFilePath : ""
293
+ if (!filePath) {
294
+ res.status(400)
295
+ res.write(JSON.stringify({ error: "video exists, but cannot be previewed yet" }))
296
+ res.end()
297
+ return
298
+ }
299
+
300
+ // file path exists, let's try to read it
301
+ try {
302
+ // do we need this?
303
+ // res.status(200)
304
+ // res.setHeader("Content-Type", "media/mp4")
305
+ console.log(`creating a video read stream from ${filePath}`)
306
+ const stream = createReadStream(filePath)
307
+
308
+ stream.on('close', () => {
309
+ console.log(`finished streaming the video`)
310
+ res.end()
311
+ })
312
+
313
+ stream.pipe(res)
314
+ } catch (err) {
315
+ console.error(`failed to read the video file at ${filePath}: ${err}`)
316
+ res.status(500)
317
+ res.write(JSON.stringify({ error: "failed to read the video file" }))
318
+ res.end()
319
+ }
320
+ })
321
+
322
+ // get metadata (json)
323
+ app.get("/:ownerId/:videoId", async (req, res) => {
324
+
325
+ if (!hasValidAuthorization(req.headers)) {
326
+ console.log("Invalid authorization")
327
+ res.status(401)
328
+ res.write(JSON.stringify({ error: "invalid token" }))
329
+ res.end()
330
+ return
331
+ }
332
+
333
+ const ownerId = req.params.ownerId
334
+
335
+ if (!uuidValidate(ownerId)) {
336
+ console.error("invalid owner id")
337
+ res.status(400)
338
+ res.write(JSON.stringify({ error: `invalid owner id` }))
339
+ res.end()
340
+ return
341
+ }
342
+
343
+ const videoId = req.params.videoId
344
+
345
+ if (!uuidValidate(videoId)) {
346
+ console.error("invalid video id")
347
+ res.status(400)
348
+ res.write(JSON.stringify({ error: `invalid video id` }))
349
+ res.end()
350
+ return
351
+ }
352
+
353
+ try {
354
+ const video = await getVideo(ownerId, videoId)
355
+ res.status(200)
356
+ res.write(JSON.stringify(video))
357
+ res.end()
358
+ } catch (err) {
359
+ console.error(err)
360
+ res.status(404)
361
+ res.write(JSON.stringify({ error: "couldn't find this video" }))
362
+ res.end()
363
+ }
364
+ })
365
+
366
+ // only get the videos for a specific owner
367
+ app.get("/:ownerId", async (req, res) => {
368
+ if (!hasValidAuthorization(req.headers)) {
369
+ console.log("Invalid authorization")
370
+ res.status(401)
371
+ res.write(JSON.stringify({ error: "invalid token" }))
372
+ res.end()
373
+ return
374
+ }
375
+
376
+ const ownerId = req.params.ownerId
377
+
378
+ if (!uuidValidate(ownerId)) {
379
+ console.error(`invalid owner d ${ownerId}`)
380
+ res.status(400)
381
+ res.write(JSON.stringify({ error: `invalid owner id ${ownerId}` }))
382
+ res.end()
383
+ return
384
+ }
385
+
386
+ try {
387
+ const videos = await getAllVideosForOwner(ownerId)
388
+ sortVideosByYoungestFirst(videos)
389
+
390
+ res.status(200)
391
+ res.write(JSON.stringify(videos.filter(video => video.status !== "delete"), null, 2))
392
+ res.end()
393
+ } catch (err) {
394
+ console.error(err)
395
+ res.status(500)
396
+ res.write(JSON.stringify({ error: `couldn't get the videos for owner ${ownerId}` }))
397
+ res.end()
398
+ }
399
+ })
400
+
401
+ // get all pending videos - this is for admin usage only
402
+ app.get("/", async (req, res) => {
403
+ if (!hasValidAuthorization(req.headers)) {
404
+ // this is what users will see in the space - but no need to show something scary
405
+ console.log("Invalid authorization")
406
+ res.status(200)
407
+ res.write(`<html><head></head><body>
408
+ This space is the rendering engine used by various demos spaces, such as <a href="https://jbilcke-hf-fishtank.hf.space" target="_blank">FishTank</a> and <a href="https://jbilcke-hf-videochain-ui.hf.space" target="_blank">VideoChain UI</a>
409
+ </body></html>`)
410
+ res.end()
411
+ // res.status(401)
412
+ // res.write(JSON.stringify({ error: "invalid token" }))
413
+ // res.end()
414
+ return
415
+ }
416
+
417
+ try {
418
+ const videos = await getPendingVideos()
419
+ res.status(200)
420
+ res.write(JSON.stringify(videos, null, 2))
421
+ res.end()
422
+ } catch (err) {
423
+ console.error(err)
424
+ res.status(500)
425
+ res.write(JSON.stringify({ error: "couldn't get the videos" }))
426
+ res.end()
427
+ }
428
+ })
429
+
430
+
431
+ // edit a video
432
+ app.patch("/:ownerId/:videoId", async (req, res) => {
433
+
434
+ if (!hasValidAuthorization(req.headers)) {
435
+ console.log("Invalid authorization")
436
+ res.status(401)
437
+ res.write(JSON.stringify({ error: "invalid token" }))
438
+ res.end()
439
+ return
440
+ }
441
+
442
+ const ownerId = req.params.ownerId
443
+
444
+ if (!uuidValidate(ownerId)) {
445
+ console.error(`invalid owner id ${ownerId}`)
446
+ res.status(400)
447
+ res.write(JSON.stringify({ error: `invalid owner id ${ownerId}` }))
448
+ res.end()
449
+ return
450
+ }
451
+
452
+ const videoId = req.params.videoId
453
+
454
+ if (!uuidValidate(videoId)) {
455
+ console.error(`invalid video id ${videoId}`)
456
+ res.status(400)
457
+ res.write(JSON.stringify({ error: `invalid video id ${videoId}` }))
458
+ res.end()
459
+ return
460
+ }
461
+
462
+ let status: VideoStatus = "unknown"
463
+ try {
464
+ const request = req.body as { status: VideoStatus }
465
+ if (['pending', 'abort', 'delete', 'pause'].includes(request.status)) {
466
+ status = request.status
467
+ } else {
468
+ throw new Error(`invalid video status "${request.status}"`)
469
+ }
470
+ } catch (err) {
471
+ console.error(`invalid parameter (${err})`)
472
+ res.status(401)
473
+ res.write(JSON.stringify({ error: `invalid parameter (${err})` }))
474
+ res.end()
475
+ return
476
+ }
477
+
478
+ switch (status) {
479
+ case 'delete':
480
+ try {
481
+ await markVideoAsToDelete(ownerId, videoId)
482
+ console.log(`deleting video ${videoId}`)
483
+ res.status(200)
484
+ res.write(JSON.stringify({ success: true }))
485
+ res.end()
486
+ } catch (err) {
487
+ console.error(`failed to delete video ${videoId} (${err})`)
488
+ res.status(500)
489
+ res.write(JSON.stringify({ error: `failed to delete video ${videoId}` }))
490
+ res.end()
491
+ }
492
+ break
493
+
494
+ case 'abort':
495
+ try {
496
+ await markVideoAsToAbort(ownerId, videoId)
497
+ console.log(`aborted video ${videoId}`)
498
+ res.status(200)
499
+ res.write(JSON.stringify({ success: true }))
500
+ res.end()
501
+ } catch (err) {
502
+ console.error(`failed to abort video ${videoId} (${err})`)
503
+ res.status(500)
504
+ res.write(JSON.stringify({ error: `failed to abort video ${videoId}` }))
505
+ res.end()
506
+ }
507
+ break
508
+
509
+ case 'pause':
510
+ try {
511
+ await markVideoAsToPause(ownerId, videoId)
512
+ console.log(`paused video ${videoId}`)
513
+ res.status(200)
514
+ res.write(JSON.stringify({ success: true }))
515
+ res.end()
516
+ } catch (err) {
517
+ console.error(`failed to pause video ${videoId} (${err})`)
518
+ res.status(500)
519
+ res.write(JSON.stringify({ error: `failed to pause video ${videoId}` }))
520
+ res.end()
521
+ }
522
+ break
523
+
524
+ case 'pending':
525
+ try {
526
+ await markVideoAsPending(ownerId, videoId)
527
+ console.log(`unpausing video ${videoId}`)
528
+ res.status(200)
529
+ res.write(JSON.stringify({ success: true }))
530
+ res.end()
531
+ } catch (err) {
532
+ console.error(`failed to unpause video ${videoId} (${err})`)
533
+ res.status(500)
534
+ res.write(JSON.stringify({ error: `failed to unpause video ${videoId}` }))
535
+ res.end()
536
+ }
537
+ break
538
+
539
+ default:
540
+ console.log(`unsupported status ${status}`)
541
+ res.status(401)
542
+ res.write(JSON.stringify({ error: `unsupported status ${status}` }))
543
+ res.end()
544
+ }
545
+ })
546
+
547
+ // delete a video - this is legacy, we should use other functions instead
548
+ /*
549
+ app.delete("/:id", async (req, res) => {
550
+
551
+ if (!hasValidAuthorization(req.headers)) {
552
+ console.log("Invalid authorization")
553
+ res.status(401)
554
+ res.write(JSON.stringify({ error: "invalid token" }))
555
+ res.end()
556
+ return
557
+ }
558
+
559
+ const [ownerId, videoId] = `${req.params.id}`.split("_")
560
+
561
+ if (!uuidValidate(ownerId)) {
562
+ console.error("invalid owner id")
563
+ res.status(400)
564
+ res.write(JSON.stringify({ error: `invalid owner id` }))
565
+ res.end()
566
+ return
567
+ }
568
+
569
+ if (!uuidValidate(videoId)) {
570
+ console.error("invalid video id")
571
+ res.status(400)
572
+ res.write(JSON.stringify({ error: `invalid video id` }))
573
+ res.end()
574
+ return
575
+ }
576
+
577
+ // ecurity note: we always check the existence if the video first
578
+ // that's because we are going to delete all the associated files with a glob,
579
+ // so we must be sure the id is not a system path or something ^^
580
+ let video: Video = null
581
+ try {
582
+ video = await getVideo(ownerId, videoId)
583
+ } catch (err) {
584
+ console.error(err)
585
+ res.status(404)
586
+ res.write(JSON.stringify({ error: "couldn't find this video" }))
587
+ res.end()
588
+ return
589
+ }
590
+
591
+ try {
592
+ await markVideoAsToDelete(ownerId, videoId)
593
+ res.status(200)
594
+ res.write(JSON.stringify({ success: true }))
595
+ res.end()
596
+ } catch (err) {
597
+ console.error(err)
598
+ res.status(500)
599
+ res.write(JSON.stringify({ success: false, error: "failed to delete the video" }))
600
+ res.end()
601
+ }
602
+ })
603
+ */
604
+
605
+ app.listen(port, () => { console.log(`Open http://localhost:${port}`) })
src/initFolders.mts ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ metadataDirPath,
3
+ pendingMetadataDirFilePath,
4
+ completedMetadataDirFilePath,
5
+ filesDirPath,
6
+ pendingFilesDirFilePath,
7
+ completedFilesDirFilePath
8
+ } from "./config.mts"
9
+ import { createDirIfNeeded } from "./utils/createDirIfNeeded.mts"
10
+
11
+ export const initFolders = () => {
12
+ console.log(`initializing folders..`)
13
+ createDirIfNeeded(metadataDirPath)
14
+ createDirIfNeeded(pendingMetadataDirFilePath)
15
+ createDirIfNeeded(completedMetadataDirFilePath)
16
+ createDirIfNeeded(filesDirPath)
17
+ createDirIfNeeded(pendingFilesDirFilePath)
18
+ createDirIfNeeded(completedFilesDirFilePath)
19
+ }
src/llm/enrichVideoSpecsUsingLLM.mts ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+
3
+ import { Video, VideoAPIRequest } from "../types.mts"
4
+ import { generateYAML } from "./openai/generateYAML.mts"
5
+ import { HallucinatedVideoRequest, OpenAIErrorResponse } from "./types.mts"
6
+ import { getQueryChatMessages } from "../preproduction/prompts.mts"
7
+ import { getValidNumber } from "../utils/getValidNumber.mts"
8
+ import { parseShotRequest } from "../utils/parseShotRequest.mts"
9
+
10
+
11
+ export const enrichVideoSpecsUsingLLM = async (video: Video): Promise<Video> => {
12
+
13
+ const messages: ChatCompletionRequestMessage[] = getQueryChatMessages(video.videoPrompt)
14
+
15
+ const defaultValue = {} as unknown as HallucinatedVideoRequest
16
+
17
+ let hallucinatedVideo: HallucinatedVideoRequest
18
+ video.shots = []
19
+
20
+ try {
21
+ hallucinatedVideo = await generateYAML<HallucinatedVideoRequest>(
22
+ messages,
23
+ defaultValue
24
+ )
25
+ console.log("enrichVideoSpecsUsingLLM: hallucinatedVideo = ", hallucinatedVideo)
26
+ } catch (err) {
27
+
28
+ let error: OpenAIErrorResponse = err?.response?.data?.error as unknown as OpenAIErrorResponse
29
+ if (!error) {
30
+ error = { message: `${err || ""}` } as unknown as OpenAIErrorResponse
31
+ }
32
+
33
+ console.error(JSON.stringify(error, null, 2))
34
+ throw new Error(`failed to call the LLM: ${error.message}`)
35
+ }
36
+
37
+ // const video = JSON.parse(JSON.stringify(referenceVideo)) as Video
38
+
39
+ // TODO here we should make some verifications and perhaps even some conversions
40
+ // betwen the LLM response and the actual format used in a videoRequest
41
+ video.backgroundAudioPrompt = hallucinatedVideo.backgroundAudioPrompt || video.backgroundAudioPrompt
42
+ video.foregroundAudioPrompt = hallucinatedVideo.foregroundAudioPrompt || video.foregroundAudioPrompt
43
+ video.actorPrompt = hallucinatedVideo.actorPrompt || video.actorPrompt
44
+ video.actorVoicePrompt = hallucinatedVideo.actorVoicePrompt || video.actorVoicePrompt
45
+
46
+ video.noise = typeof hallucinatedVideo.noise !== "undefined"
47
+ ? (`${hallucinatedVideo.noise || ""}`.toLowerCase() === "true")
48
+ : video.noise
49
+
50
+ video.noiseAmount = typeof hallucinatedVideo.noiseAmount !== "undefined"
51
+ ? getValidNumber(hallucinatedVideo.noiseAmount, 0, 10, 2)
52
+ : video.noiseAmount
53
+
54
+ video.outroDurationMs = typeof hallucinatedVideo.outroDurationMs !== "undefined"
55
+ ? getValidNumber(hallucinatedVideo.outroDurationMs, 0, 3000, 500)
56
+ : video.outroDurationMs
57
+
58
+ const hallucinatedShots = Array.isArray(hallucinatedVideo.shots) ? hallucinatedVideo.shots : []
59
+
60
+
61
+ for (const hallucinatedShot of hallucinatedShots) {
62
+ const shot = await parseShotRequest(video, {
63
+ shotPrompt: hallucinatedShot.shotPrompt,
64
+ environmentPrompt: hallucinatedShot.environmentPrompt,
65
+ photographyPrompt: hallucinatedShot.photographyPrompt,
66
+ actionPrompt: hallucinatedShot.actionPrompt,
67
+ foregroundAudioPrompt: hallucinatedShot.foregroundAudioPrompt
68
+ })
69
+ video.shots.push(shot)
70
+ }
71
+
72
+ console.log("enrichVideoSpecsUsingLLM: video = ", video)
73
+
74
+ return video
75
+ }
src/llm/openai/createChatCompletion.mts ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+ import { GPTTokens } from "gpt-tokens"
3
+
4
+ import { openai } from "./openai.mts"
5
+ import { runModerationCheck } from "./runModerationCheck.mts"
6
+ import { getUserContent } from "./getUserContent.mts"
7
+ import { getTextPrompt } from "./getTextPrompt.mts"
8
+
9
+ export const createChatCompletion = async (
10
+ messages: ChatCompletionRequestMessage[],
11
+ model = "gpt-4"
12
+ ): Promise<string> => {
13
+ // this is the part added by the user, and the one we need to check against the moderation API
14
+ const userContent = getUserContent(messages)
15
+
16
+ const check = await runModerationCheck(userContent)
17
+
18
+ if (check.flagged) {
19
+ console.error("Thoughtcrime: content flagged by the AI police", {
20
+ userContent,
21
+ moderationResult: check,
22
+ })
23
+ return "Thoughtcrime: content flagged by the AI police"
24
+ }
25
+
26
+ const rawPrompt = getTextPrompt(messages)
27
+
28
+
29
+ // for doc: https://www.npmjs.com/package/gpt-tokens
30
+ const usageInfo = new GPTTokens({
31
+ // Plus enjoy a 25% cost reduction for input tokens on GPT-3.5 Turbo (0.0015 per 1K input tokens)
32
+ plus : false,
33
+ model : "gpt-4",
34
+ messages: messages as any,
35
+ })
36
+
37
+ console.table({
38
+ "Tokens prompt": usageInfo.promptUsedTokens,
39
+ "Tokens completion": usageInfo.completionUsedTokens,
40
+ "Tokens total": usageInfo.usedTokens,
41
+ })
42
+
43
+ // Price USD: 0.000298
44
+ console.log("Price USD: ", usageInfo.usedUSD)
45
+
46
+ // const tokenLimit = 4000
47
+
48
+ const maxTokens = 4000 - usageInfo.promptUsedTokens
49
+
50
+ console.log("maxTokens:", maxTokens)
51
+ /*
52
+ console.log("settings:", {
53
+ tokenLimit,
54
+ promptLength: rawPrompt.length,
55
+ promptTokenLengh: rawPrompt.length / 1.9,
56
+ maxTokens
57
+ })
58
+
59
+ console.log("createChatCompletion(): raw prompt length:", rawPrompt.length)
60
+ console.log(
61
+ `createChatCompletion(): requesting ${maxTokens} of the ${tokenLimit} tokens availables`
62
+ )
63
+ */
64
+
65
+ console.log("query:", {
66
+ model,
67
+ // messages,
68
+ user: "Anonymous User",
69
+ temperature: 0.7,
70
+ max_tokens: maxTokens,
71
+ // stop: preset.stop?.length ? preset.stop : undefined,
72
+ })
73
+
74
+ const response = await openai.createChatCompletion({
75
+ model,
76
+ messages,
77
+ // TODO use the Hugging Face Login username here
78
+ user: "Anonymous User",
79
+ temperature: 0.7,
80
+
81
+ // 30 tokens is about 120 characters
82
+ // we don't want more, as it will take longer to respond
83
+ max_tokens: maxTokens,
84
+ // stop: preset.stop?.length ? preset.stop : undefined,
85
+ })
86
+
87
+ const { choices } = response.data
88
+
89
+ if (!choices.length) {
90
+ console.log("createChatCompletion(): no choice found in the LLM response..")
91
+ return ""
92
+ }
93
+ const firstChoice = choices[0]
94
+
95
+ if (firstChoice?.message?.role !== "assistant") {
96
+ console.log(
97
+ "createChatCompletion(): something went wrong, the model imagined the user response?!"
98
+ )
99
+ return ""
100
+ }
101
+
102
+ console.log("createChatCompletion(): response", firstChoice.message.content)
103
+
104
+ return firstChoice.message.content || ""
105
+ }
src/llm/openai/createChatCompletionStream.mts ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+
3
+ import { openai } from "./openai.mts"
4
+ import { streamCompletion } from "./stream.mts"
5
+ import { getTextPrompt } from "./getTextPrompt.mts"
6
+
7
+ export const createChatCompletionStream = async (
8
+ messages: ChatCompletionRequestMessage[],
9
+ model: string,
10
+ onMessage: (message: string) => Promise<void>,
11
+ onEnd = () => Promise<void>
12
+ ) => {
13
+ try {
14
+ const rawPrompt = getTextPrompt(messages)
15
+
16
+ const tokenLimit = 4096 // 8000
17
+
18
+ const maxTokens = Math.round(tokenLimit - rawPrompt.length / 1.9)
19
+
20
+ const completion = await openai.createCompletion({
21
+ model,
22
+ prompt: messages,
23
+ temperature: 0.7,
24
+ max_tokens: Math.min(30, maxTokens),
25
+ stream: true,
26
+ })
27
+
28
+ for await (const message of streamCompletion(completion as any)) {
29
+ try {
30
+ const parsed = JSON.parse(message)
31
+ const { text } = parsed.choices[0]
32
+
33
+ try {
34
+ await onMessage(text)
35
+ } catch (err) {
36
+ console.error(
37
+ 'Could not process stream message (callback failed)',
38
+ message,
39
+ err
40
+ )
41
+ }
42
+ } catch (error) {
43
+ console.error('Could not JSON parse stream message', message, error)
44
+ }
45
+ }
46
+ try {
47
+ await onEnd()
48
+ } catch (err) {
49
+ console.error('onEnd callback failed', err)
50
+ }
51
+ } catch (error: any) {
52
+ if (error.code) {
53
+ try {
54
+ const parsed = JSON.parse(error.body)
55
+ console.error('An error occurred during OpenAI request: ', parsed)
56
+ } catch (error) {
57
+ console.error(
58
+ 'An error occurred during OpenAI request (invalid json): ',
59
+ error
60
+ )
61
+ }
62
+ } else {
63
+ console.error('An error occurred during OpenAI request', error)
64
+ }
65
+ }
66
+ }
src/llm/openai/generateYAML.mts ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+ import { parse } from "yaml"
3
+
4
+ import { createChatCompletion } from "./createChatCompletion.mts"
5
+
6
+ export const generateYAML = async <T,>(messages: ChatCompletionRequestMessage[] = [], defaultValue?: T): Promise<T> => {
7
+
8
+ const defaultResult = defaultValue || ({} as T)
9
+
10
+ if (!messages.length) {
11
+ return defaultResult
12
+ }
13
+
14
+ const output = await createChatCompletion(messages)
15
+
16
+ let raw = ""
17
+
18
+ // cleanup any remains of the markdown response
19
+ raw = output.split("```")[0]
20
+
21
+ // remove any remaining `
22
+ const input = raw.replaceAll("`", "")
23
+
24
+ try {
25
+ const obj = parse(input) as T
26
+
27
+ if (obj === null || typeof obj === undefined) {
28
+ throw new Error("couldn't parse YAML")
29
+ }
30
+
31
+ return obj
32
+ } catch (err) {
33
+ // just in case, we also try JSON!
34
+ const obj = JSON.parse(input) as T
35
+
36
+ if (obj === null || typeof obj === undefined) {
37
+ throw new Error("couldn't parse JSON")
38
+ }
39
+
40
+ return obj
41
+ }
42
+ }
src/llm/openai/getTextPrompt.mts ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+
3
+ export const getTextPrompt = (prompt: ChatCompletionRequestMessage[]) =>
4
+ prompt.reduce((acc, item) => acc.concat(item.content), "") || ""
src/llm/openai/getUserContent.mts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import { ChatCompletionRequestMessage } from "openai"
2
+
3
+ export const getUserContent = (prompt: ChatCompletionRequestMessage[]) =>
4
+ prompt
5
+ .filter((message) => message.role === "user")
6
+ .map((message) => message.content)
7
+ .join("\n")
src/llm/openai/openai.mts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import { Configuration, OpenAIApi } from "openai"
2
+
3
+ export const openai = new OpenAIApi(
4
+ new Configuration({
5
+ apiKey: process.env.VC_OPENAI_API_KEY
6
+ })
7
+ )
src/llm/openai/runModerationCheck.mts ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ CreateModerationResponseResultsInnerCategories,
3
+ CreateModerationResponseResultsInnerCategoryScores,
4
+ } from "openai"
5
+
6
+ import { openai } from "./openai.mts"
7
+
8
+ export const runModerationCheck = async (
9
+ input = ''
10
+ ): Promise<{
11
+ categories?: CreateModerationResponseResultsInnerCategories
12
+ category_scores?: CreateModerationResponseResultsInnerCategoryScores
13
+ flagged: boolean
14
+ }> => {
15
+ if (!input || !input.length) {
16
+ console.log(`skipping moderation check as input length is too shot`)
17
+ return {
18
+ flagged: false,
19
+ }
20
+ }
21
+
22
+ const response = await openai.createModeration({ input })
23
+ const { results } = response.data
24
+
25
+ if (!results.length) {
26
+ throw new Error(`failed to call the moderation endpoint`)
27
+ }
28
+
29
+ return results[0]
30
+ }
src/llm/openai/stream.mts ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Readable } from "node:stream"
2
+
3
+ async function* chunksToLines(
4
+ chunksAsync: AsyncIterable<Buffer>
5
+ ): AsyncIterable<string> {
6
+ let previous = ""
7
+ for await (const chunk of chunksAsync) {
8
+ const bufferChunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)
9
+ previous += bufferChunk
10
+ let eolIndex
11
+ while ((eolIndex = previous.indexOf("\n")) >= 0) {
12
+ // line includes the EOL
13
+ const line = previous.slice(0, eolIndex + 1).trimEnd()
14
+ if (line === "data: [DONE]") break
15
+ if (line.startsWith("data: ")) yield line
16
+ previous = previous.slice(eolIndex + 1)
17
+ }
18
+ }
19
+ }
20
+
21
+ async function* linesToMessages(
22
+ linesAsync: AsyncIterable<string>
23
+ ): AsyncIterable<string> {
24
+ for await (const line of linesAsync) {
25
+ const message = line.substring("data :".length)
26
+
27
+ yield message
28
+ }
29
+ }
30
+
31
+ export async function* streamCompletion(
32
+ stream: Readable
33
+ ): AsyncGenerator<string, void, undefined> {
34
+ yield* linesToMessages(chunksToLines(stream))
35
+ }
src/llm/types.mts ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // note: this has to exactly match what is in the prompt, in ../preproduction/prompts.mts
2
+ export interface HallucinatedVideoRequest {
3
+ backgroundAudioPrompt: string; // describe the background audio (crowd, birds, wind, sea etc..)
4
+ foregroundAudioPrompt: string; // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
5
+ actorPrompt: string; // describe the physical look of the main actor visible in the shot (man, woman, old, young, hair, glasses, clothes etc)
6
+ actorVoicePrompt: string; // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
7
+ noise: boolean; // whether to apply movie noise or not
8
+ noiseAmount: number; // (integer) the amount of ffmpeg noise (film grain) to apply. 0 is none, 10 is a lot
9
+ outroDurationMs: number; // in milliseconds. An outro generally only lasts between 0 and 3000 (3s)
10
+
11
+ shots: Array<{
12
+ shotPrompt: string; // describe the main elements of a shot, in excruciating details. You must include ALL those parameters: characters, shot story, what is happening. How they look, the textures, the expressions, their clothes. The color, materials and style of clothes.
13
+ environmentPrompt: string; // describe the environment, in excruciating details. You must include ALL those parameters: Lights, atmosphere and weather (misty, dust, clear, rain, snow..). Time of the day and hour of the day. Furnitures, their shape, style, era. The materials used for each object. The global time period, time of the day, era. Explain if anything is moving in the backgroung.
14
+ photographyPrompt: string; // describe the photography, in excruciating details. You must include ALL those parameters: Camera angle, position and movement. Type of shot and angle. Lighting. Mood. Settings. Tint of the lights. Position of the sun or moon. Shadows and their direction. Camera shutter speed, blur, bokeh, aperture.
15
+ actionPrompt: string; // describe the dynamics of a shot, in excruciating details. You must include ALL those parameters: What is happening, who and what is moving. Which entity are in movements. What are the directions, starting and ending position. At which speed entities or objects are moving. Is there motion blur, slow motion, timelapse etc.
16
+ foregroundAudioPrompt: string; // describe the sounds in a concise way (eg. ringing bells, underwater sound and whistling dolphin, cat mewong etc),
17
+ }>
18
+ }
19
+
20
+ export interface OpenAIErrorResponse {
21
+ message: string
22
+ type: string
23
+ param: any
24
+ code: any
25
+ }
src/main.mts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { initFolders } from "./initFolders.mts"
2
+ import { getPendingVideos } from "./scheduler/getPendingVideos.mts"
3
+ import { processVideo } from "./scheduler/processVideo.mts"
4
+ import { sortPendingVideosByLeastCompletedFirst } from "./utils/sortPendingVideosByLeastCompletedFirst.mts"
5
+
6
+ export const main = async () => {
7
+
8
+ const videos = await getPendingVideos()
9
+ if (!videos.length) {
10
+ // console.log(`no job to process.. going to try in 200 ms`)
11
+ setTimeout(() => {
12
+ main()
13
+ }, 200)
14
+ return
15
+ }
16
+
17
+ console.log(`there are ${videos.length} pending videos`)
18
+
19
+ sortPendingVideosByLeastCompletedFirst(videos)
20
+
21
+ let somethingFailed = ""
22
+ await Promise.all(videos.map(async video => {
23
+ try {
24
+ const result = await processVideo(video)
25
+ return result
26
+ } catch (err) {
27
+ somethingFailed = `${err}`
28
+ // a video failed.. no big deal
29
+ return Promise.resolve(somethingFailed)
30
+ }
31
+ }))
32
+
33
+ if (somethingFailed) {
34
+ console.error(`one of the jobs failed: ${somethingFailed}, let's wait 5 seconds`)
35
+ setTimeout(() => { main() }, 5000)
36
+ } else {
37
+ console.log(`successfully worked on the jobs, let's immediately loop`)
38
+ setTimeout(() => { main() }, 50)
39
+ }
40
+
41
+ }
src/preproduction/mocks.mts ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Video, VideoShot } from "../types.mts"
2
+
3
+ export const mockShots: VideoShot[] = [
4
+ {
5
+ "shotPrompt": "In the extreme wide shot, a flock of ducks is converging on the Central Park, coming from multiple directions. Their feathers are glossy and clean, casting off varying degrees of green, brown and white",
6
+ "environmentPrompt": "Central Park at sunrise, the park looks slightly misty, the sky is tinged with shades of pink and orange as the day breaks. There's dew on the grass, and the leaves on trees are rustling in the light breeze",
7
+ "photographyPrompt": "Eye-level shot with a slight tilt in the camera, capturing the panorama of the park. There's natural lighting, sun just rising. The camera zooms out to capture the ducks entering the park. Shutter speed is slow to capture the movement of ducks",
8
+ "actionPrompt": "Large groups of ducks waddle into the park from various directions, some fly in groups, landing on the pond with small splashes. Movement is slow, slightly sped up to depict the invasion",
9
+ "foregroundAudioPrompt": "A symphony of soft quacking and rustling feathers",
10
+ },
11
+ {
12
+ "shotPrompt": "In the medium shot, a group of ducks are by the pond, pecking at the ground and frolicking in the water. One male mallard is particularly captivating with its emerald green head and healthy body",
13
+ "environmentPrompt": "It's a sunny spring day in Central Park. The pond is surrounded by lush, green vegetation and dappled with sunlight filtering through the leaves",
14
+ "photographyPrompt": "Low angle shot near the water level, the camera moves in a crane shot to capture ducks in action, and the camera's aperture is partially open. Natural sunlight creates playful shadows",
15
+ "actionPrompt": "Ducks are pecking at the ground, dabbling at the water's edge and frolicking in the pond. The camera tracks a particularly majestic mallard navigating through the pond",
16
+ "foregroundAudioPrompt": "Sounds of ducks quacking and splashing in the water"
17
+ },
18
+ {
19
+ "shotPrompt": "Close-up shot of a mother duck with ducklings following her in a line on the grass and into the water",
20
+ "environmentPrompt": "Central Park, by one of the smaller ponds, surrounded by green trees. Sun is high up giving off warm, radiant light",
21
+ "photographyPrompt": "High angle shot, focusing on the line of ducklings following their mother. The camera follows the ducklings. The setting is bright and clear with sun illuminating the ducklings",
22
+ "actionPrompt": "Mother duck is leading her ducklings from the grass into the water, the ducklings obediently follow, creating a neat line. The whole scene feels peaceful",
23
+ "foregroundAudioPrompt": "Ducklings' high pitched chirping, soft lapping of water at the edge of the pond"
24
+ }
25
+ ] as any
26
+
27
+ export const mock: Video = {
28
+ "backgroundAudioPrompt": "City ambience mixed with the rustling leaves and the chirping birds in the park",
29
+ "foregroundAudioPrompt": "Rustling feathers, soft quacking, flapping wings, occasional splash in the pond",
30
+ "actorPrompt": "Main actors are ducks - a variety of breeds, mostly mallards: males with glossy green heads and females in mottled brown; all plump, medium-sized waterfowl",
31
+ "actorVoicePrompt": "Soft, low pitched quacking of adult ducks and higher pitched chirping of ducklings",
32
+ "noise": true,
33
+ "noiseAmount": 2,
34
+ "outroDurationMs": 1500,
35
+ "shots": mockShots
36
+ } as any
src/preproduction/prompts.mts ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ // types of movie shots: https://www.nfi.edu/types-of-film-shots/
4
+
5
+ import { ChatCompletionRequestMessage } from "openai"
6
+
7
+ export const getQueryChatMessages = (sceneDescription: string): ChatCompletionRequestMessage[] => {
8
+ return [
9
+ {
10
+ role: "system",
11
+ name: "moviemaking_rules",
12
+ content: `# Context
13
+ You are an AI Movie Director Assistant, and you need to help generating input requests (movie "specs") for an automated movie generation API.
14
+ The format expected by the API must be in YAML. The TypeScript schema for this YAML file is:
15
+ \`\`\`typescript
16
+ {
17
+ backgroundAudioPrompt: string; // describe the background audio (crowd, birds, wind, sea etc..)
18
+ foregroundAudioPrompt: string; // describe the foreground audio (cars revving, footsteps, objects breaking, explosion etc)
19
+ actorPrompt: string; // describe the physical look of the main actor visible in the shot (man, woman, old, young, hair, glasses, clothes etc)
20
+ actorVoicePrompt: string; // describe the main actor voice (man, woman, old, young, amused, annoyed.. etc)
21
+ noise: boolean; // whether to apply movie noise or not
22
+ noiseAmount: number; // (integer) the amount of noise (film grain) to apply. This is mapped from the FFmpeg filter (0 is none, 10 is already a lot)
23
+ outroDurationMs: number; // in milliseconds. An outro generally only lasts between 0 and 3000 (3s)
24
+ shots: Array<{
25
+ shotPrompt: string; // describe the main elements of a shot, in excruciating details. You must include ALL those parameters: characters, shot story, what is happening. How they look, the textures, the expressions, their clothes. The color, materials and style of clothes.
26
+ environmentPrompt: string; // describe the environment, in excruciating details. You must include ALL those parameters: Lights, atmosphere and weather (misty, dust, clear, rain, snow..). Time of the day and hour of the day. Furnitures, their shape, style, era. The materials used for each object. The global time period, time of the day, era. Explain if anything is moving in the backgroung.
27
+ photographyPrompt: string; // describe the photography, in excruciating details. You must include ALL those parameters: Camera angle, position and movement. Type of shot and angle. Lighting. Mood. Settings. Tint of the lights. Position of the sun or moon. Shadows and their direction. Camera shutter speed, blur, bokeh, aperture.
28
+ actionPrompt: string; // describe the dynamics of a shot, in excruciating details. You must include ALL those parameters: What is happening, who and what is moving. Which entity are in movements. What are the directions, starting and ending position. At which speed entities or objects are moving. Is there motion blur, slow motion, timelapse etc.
29
+ foregroundAudioPrompt: string; // describe the sounds in a concise way (eg. ringing bells, underwater sound and whistling dolphin, cat mewong etc),
30
+ }>
31
+ }
32
+ \`\`\`
33
+ # Guidelines for number of shots and their duration
34
+ You will generate 1 shot by default, unless more or less are specified.
35
+ A shot can only last 3 seconds max.
36
+ So if you are asked to generate a 6 seconds videos, you need 2 shots, for a 9 seconds video, 3 shots, and so on.
37
+ If you are asked to generate for instance a 11 seconds videos, you need three 3sec shots plus one 2sec shot.
38
+ # Guidelines for writing descriptions
39
+ Is it crucial to repeat the elements constituting a sequence of multiple shots verbatim from one shot to another.
40
+ For instance, you will have to repeat exactly what a character or background look like, how they are dressed etc.
41
+ This will ensure consistency from one scene to another.
42
+ ## Choosing the right words
43
+ Here are some guidelines regarding film-making:
44
+ - The distance your subject is to the camera impacts how the audience feels about them.
45
+ - Subject will appear largest in a close-up or choker shot and smallest in a wide or long shot.
46
+ - Camera movement is a technique for changing the relationship between the subject and the camera frame, controlling the delivery of the narrative. It helps to give additional meaning to what’s happening on the screen.
47
+ - Do not hesitate to combine types of shots with camera movement shots and camera position (angle) shots.
48
+ ## Shots
49
+ Single shot: where the shot only captures one subject.
50
+ Two shot: which has only two characters.
51
+ Three shot: when three characters are in the frame.
52
+ Point-of-view shot (POV): shows the scene from the point of view of one of the characters, makes the audience feel that they are there seeing what the character is seeing.
53
+ Over-the-shoulder shot (OTS): shows the subject from behind the shoulder of another character.
54
+ Over-the-hip (OTH) shot, in which the camera is placed on the hip of one character and the focus is on the subject.
55
+ Reverse angle shot: which is approximately 180 degrees opposite the previous shot.
56
+ Reaction shot: which shows the character’s reaction to the previous shot.
57
+ Weather shot: where the subject of the filming is the weather.
58
+ Extreme wide shot/extreme long shot: used to show the subject and the entire area of the environment they are in.
59
+ Wide shot/long shot: used to focus on the subject while still showing the scene the subject is in.
60
+ Medium shot: shows the subject from the knees up, and is often referred to as the 3/4 shot.
61
+ Medium close-up shot: The subject fills the frame. It is somewhere between a medium close-up and a close-up.
62
+ Close-up shot: shows emotions and detailed reactions, with the subject filling the entire frame.
63
+ Choker shot: shows the subject’s face from just above the eyebrows to just below the mouth and is between a close-up and an extreme close-up.
64
+ Extreme close-up shot: shows the detail of an object, such as one a character is handling, or a person, such as just their eyes or moving lips.
65
+ Full shot: similar to a wide shot except that it focuses on the character in the frame, showing them from head to toe.
66
+ Cowboy shot: similar to the medium shot except that the character is shown from the hips or waist up.
67
+ Establishing shot: a long shot at the beginning of a scene that shows objects, buildings, and other elements of a setting from a distance to establish where the next sequence of events takes place.
68
+ ## Camera angles
69
+ Eye-level shot: This is when the camera is placed at the same height as the eyes of the characters.
70
+ Low angle shot: This shot frames the subject from a low height, often used to emphasize differences in power between characters.
71
+ Aerial shot/helicopter shot: Taken from way up high, this shot is usually from a drone or helicopter to establish the expanse of the surrounding landscape.
72
+ High angle shot: This is when the subject is framed with the camera looking down at them.
73
+ Birds-eye-view shot/overhead shot: This is a shot taken from way above the subject, usually including a significant amount of the surrounding environment to create a sense of scale or movement.
74
+ Shoulder-level shot: This is where the camera is approximately the same height as the character’s shoulders.
75
+ Hip-level shot: The camera is approximately at the height of the character’s hips.
76
+ Knee-level shot: The camera is approximately at the same level as the character’s knees.
77
+ Ground-level shot: When the height of the camera is at ground level with the character, this shot captures what’s happening on the ground the character is standing on.
78
+ Dutch-angle/tilt shot: This is where the camera is tilted to the side.
79
+ Cut-in shot: This type of shot cuts into the action on the screen to offer a different view of something happening in this main scene.
80
+ Cutaway shot: As a shot that cuts away from the main action on the screen, it’s used to focus on secondary action and add more information for greater understanding for the audience.
81
+ Master shot: A long shot that captures most or all of the action happening in a scene.
82
+ Deep focus: A shot that keeps everything on the screen in sharp focus, including the foreground, background, and middle ground.
83
+ Locked-down shot: With this shot, the camera is fixed in one position and the action continues off-screen.
84
+ ## Camera movements
85
+ Zoom Shot: involves changing the focal length of the lens to zoom in or out during filming.
86
+ Pan shot: involves moving the camera from side to side to show something to the audience or help them better follow the sequence of events.
87
+ Tilt shot: similar to a pan shot, except moving the camera up and down.
88
+ Dolly shot: the camera is attached to a dolly that moves on tracks and can possibly move up and down.
89
+ Truck shot: you move the entire camera on a fixed point and the motion goes from side to side.
90
+ Pedestal shot: the entire camera is moved vertically, not just the angle of view, and is often combined with panning and/or tilting.
91
+ Static/fixed shot: where there is no camera movement, and the shot emphasizes the movement of the subject in the environment.
92
+ Arc shot: where the camera moves in an arc pattern around the subject to give the audience a better perspective of their surroundings.
93
+ Crab shot: a less-common version of tracking a subject where the dolly the camera is on goes sideways.
94
+ Dolly zoom shot: the position of the camera and focal length are changed simultaneously.
95
+ Whip pan shot/swish pan shot: used to create a blur as you pan from one shot to the next.
96
+ Tracking shot: the camera follows the subject, either from behind or at their side, moving with them.
97
+ Whip tilt shot: used to create a blur panning from one shot to the next vertically.
98
+ Bridging shot: denotes a shift in place or time.
99
+ ## Focus
100
+ Focus pull: focus the lens to keep the subject within an acceptable focus range.
101
+ Rack focus: focus is more aggressively shifted from subject A to subject B.
102
+ Tilt-shift: parts of the image are in focus while other parts are out of focus.
103
+ Deep focus: both the subject and the environment are in focus.
104
+ Shallow focus: subject is crisp and in focus while the background is out of focus.
105
+ ## Camera angles
106
+ High-angle
107
+ Low-angle
108
+ Over-the-shoulder
109
+ Bird’s eye
110
+ Dutch angle/tilt`
111
+ },
112
+ {
113
+ role: "user",
114
+ name: "movie_director",
115
+ content: `# Task
116
+ Please generate the movie spec YAML based on the following description:
117
+ ${sceneDescription}.
118
+ # YAML
119
+ \`\`\`
120
+ `
121
+ },
122
+ ]
123
+ }
src/production/addAudioToVideo.mts ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import tmpDir from "temp-dir"
4
+ import { v4 as uuidv4 } from "uuid"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
+ import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
9
+
10
+ export const addAudioToVideo = async (
11
+ videoFileName: string,
12
+ audioFileName: string,
13
+
14
+ /*
15
+ * 0.0: mute the audio completely
16
+ * 0.5: set the audio to 50% of original volume (half volume)
17
+ * 1.0: maintain the audio at original volume (100% of original volume)
18
+ * 2.0: amplify the audio to 200% of original volume (double volume - might cause clipping)
19
+ */
20
+ volume: number = 1.0
21
+ ) => {
22
+ const inputFilePath = path.join(pendingFilesDirFilePath, videoFileName)
23
+ const audioFilePath = path.resolve(pendingFilesDirFilePath, audioFileName)
24
+
25
+ const tmpFileName = `${uuidv4()}.mp4`
26
+ const tempOutputFilePath = path.join(tmpDir, tmpFileName)
27
+
28
+ await new Promise((resolve, reject) => {
29
+ ffmpeg(inputFilePath)
30
+ .input(audioFilePath)
31
+ .audioFilters({ filter: 'volume', options: volume }) // add audio filter for volume
32
+ .outputOptions("-c:v copy") // use video copy codec
33
+ .outputOptions("-c:a aac") // use audio codec
34
+ .outputOptions("-map 0:v:0") // map video from 0th to 0th
35
+ .outputOptions("-map 1:a:0") // map audio from 1st to 0th
36
+ .outputOptions("-shortest") // finish encoding when shortest input stream ends
37
+ .output(tempOutputFilePath)
38
+ .on("end", resolve)
39
+ .on("error", reject)
40
+ .run()
41
+ })
42
+ await moveFileFromTmpToPending(tmpFileName, videoFileName)
43
+ };
src/production/assembleShots.mts ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ // due to Docker issues, we disable OpenGL transitions for now
4
+ // import concat from 'ffmpeg-concat'
5
+ import concat from './concatNoGL.mts'
6
+
7
+ import { VideoShot } from '../types.mts'
8
+ import { pendingFilesDirFilePath } from "../config.mts"
9
+ import { normalizePendingVideoToTmpFilePath } from "./normalizePendingVideoToTmpFilePath.mts"
10
+
11
+ export const assembleShots = async (shots: VideoShot[], fileName: string) => {
12
+
13
+ if (!Array.isArray(shots) || shots.length < 2) {
14
+ throw new Error(`need at least 2 shots`)
15
+ }
16
+
17
+ const transitions = [
18
+ {
19
+ name: 'circleOpen',
20
+ duration: 1000,
21
+ },
22
+ {
23
+ name: 'crossWarp',
24
+ duration: 800,
25
+ },
26
+ {
27
+ name: 'directionalWarp',
28
+ duration: 800,
29
+ // pass custom params to a transition
30
+ params: { direction: [1, -1] },
31
+ },
32
+
33
+ /*
34
+ {
35
+ name: 'squaresWire',
36
+ duration: 2000,
37
+ },
38
+ */
39
+ ]
40
+
41
+ const videoFilePath = path.join(pendingFilesDirFilePath, fileName)
42
+
43
+ // before performing assembly, we must normalize images
44
+ const shotFilesPaths: string[] = []
45
+ for (let shot of shots) {
46
+ const normalizedShotFilePath = await normalizePendingVideoToTmpFilePath(shot.fileName)
47
+ shotFilesPaths.push(normalizedShotFilePath)
48
+ }
49
+
50
+ await concat({
51
+ output: videoFilePath,
52
+ videos: shotFilesPaths,
53
+ transitions: shotFilesPaths
54
+ .slice(0, shotFilesPaths.length - 1)
55
+ .map(
56
+ (vid) => transitions[Math.floor(Math.random() * transitions.length)]
57
+ ),
58
+ })
59
+ }
src/production/concatNoGL.mts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ffmpeg from "fluent-ffmpeg";
2
+ import fs from "fs";
3
+
4
+ interface IConcatParams {
5
+ output: string;
6
+ videos: string[];
7
+ transitions: any;
8
+ }
9
+
10
+ const concat = async ({ output, videos }: IConcatParams): Promise<void> => {
11
+ if(!output || !Array.isArray(videos)) {
12
+ throw new Error("An output file and videos must be provided");
13
+ }
14
+
15
+ if(!videos.every(video => fs.existsSync(video))) {
16
+ throw new Error("All videos must exist");
17
+ }
18
+
19
+ const ffmpegCommand = ffmpeg();
20
+
21
+ videos.forEach((video) =>
22
+ ffmpegCommand.addInput(video)
23
+ );
24
+
25
+ return new Promise<void>((resolve, reject) => {
26
+ ffmpegCommand
27
+ .on('error', reject)
28
+ .on('end', resolve)
29
+ .mergeToFile(output);
30
+ });
31
+ };
32
+
33
+ export default concat;
src/production/generateActor.mts ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { promises as fs } from "node:fs"
2
+ import path from "node:path"
3
+ import tmpDir from "temp-dir"
4
+
5
+ import { HfInference } from "@huggingface/inference"
6
+
7
+ const hf = new HfInference(process.env.VC_HF_API_TOKEN)
8
+
9
+ export const generateActor = async (prompt: string, fileName: string, seed: number) => {
10
+ const positivePrompt = [
11
+ `profile photo of ${prompt || ""}`,
12
+ "id picture",
13
+ "photoshoot",
14
+ "portrait photography",
15
+ "neutral expression",
16
+ "neutral background",
17
+ "studio photo",
18
+ "award winning",
19
+ "high resolution",
20
+ "photo realistic",
21
+ "intricate details",
22
+ "beautiful",
23
+ ]
24
+ const negativePrompt = [
25
+ "anime",
26
+ "drawing",
27
+ "painting",
28
+ "lowres",
29
+ "blurry",
30
+ "artificial"
31
+ ]
32
+
33
+ console.log(`generating actor: ${positivePrompt.join(", ")}`)
34
+
35
+ const blob = await hf.textToImage({
36
+ inputs: positivePrompt.join(", "),
37
+ model: "stabilityai/stable-diffusion-2-1",
38
+ parameters: {
39
+ negative_prompt: negativePrompt.join(", "),
40
+ // seed, no seed?
41
+ }
42
+ })
43
+
44
+ const filePath = path.resolve(tmpDir, fileName)
45
+
46
+ const buffer = Buffer.from(await blob.arrayBuffer())
47
+ await fs.writeFile(filePath, buffer, "utf8")
48
+
49
+ return filePath
50
+ }
src/production/generateAudio.mts ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { v4 as uuidv4 } from "uuid"
2
+ import puppeteer from "puppeteer"
3
+
4
+ import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
5
+ import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
6
+
7
+ export const state = {
8
+ load: 0,
9
+ }
10
+
11
+ const instances: string[] = [
12
+ process.env.VC_AUDIO_GENERATION_SPACE_API_URL
13
+ ]
14
+
15
+ // TODO we should use an inference endpoint instead
16
+ export async function generateAudio(prompt: string, audioFileName: string) {
17
+
18
+ if (state.load === instances.length) {
19
+ throw new Error(`all audio generation servers are busy, try again later..`)
20
+ }
21
+
22
+ state.load += 1
23
+
24
+ try {
25
+ const instance = instances.shift()
26
+ instances.push(instance)
27
+
28
+ const browser = await puppeteer.launch({
29
+ headless: true,
30
+ protocolTimeout: 120000,
31
+ })
32
+
33
+ try {
34
+ const page = await browser.newPage()
35
+
36
+ await page.goto(instance, {
37
+ waitUntil: "networkidle2",
38
+ })
39
+
40
+ await new Promise(r => setTimeout(r, 3000))
41
+
42
+ const firstTextboxInput = await page.$('input[data-testid="textbox"]')
43
+
44
+ await firstTextboxInput.type(prompt)
45
+
46
+ // console.log("looking for the button to submit")
47
+ const submitButton = await page.$("button.lg")
48
+
49
+ // console.log("clicking on the button")
50
+ await submitButton.click()
51
+
52
+ await page.waitForSelector("a[download]", {
53
+ timeout: 120000, // no need to wait for too long, generation is quick
54
+ })
55
+
56
+ const audioRemoteUrl = await page.$$eval("a[download]", el => el.map(x => x.getAttribute("href"))[0])
57
+
58
+
59
+ // it is always a good idea to download to a tmp dir before saving to the pending dir
60
+ // because there is always a risk that the download will fail
61
+
62
+ const tmpFileName = `${uuidv4()}.mp4`
63
+
64
+ await downloadFileToTmp(audioRemoteUrl, tmpFileName)
65
+ await moveFileFromTmpToPending(tmpFileName, audioFileName)
66
+ } catch (err) {
67
+ throw err
68
+ } finally {
69
+ await browser.close()
70
+ }
71
+ } catch (err) {
72
+ throw err
73
+ } finally {
74
+ state.load -= 1
75
+ }
76
+ }
src/production/generateAudioLegacy.mts ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from '@gradio/client'
2
+
3
+ import { generateSeed } from "../utils/generateSeed.mts"
4
+
5
+ export const state = {
6
+ load: 0
7
+ }
8
+
9
+ const instances: string[] = [
10
+ process.env.VC_AUDIO_GENERATION_SPACE_API_URL
11
+ ]
12
+
13
+ export const generateAudio = async (prompt: string, options?: {
14
+ seed: number;
15
+ nbFrames: number;
16
+ nbSteps: number;
17
+ }) => {
18
+
19
+ if (state.load === instances.length) {
20
+ throw new Error(`all audio generation servers are busy, try again later..`)
21
+ }
22
+
23
+ state.load += 1
24
+
25
+ try {
26
+ const seed = options?.seed || generateSeed()
27
+ const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
28
+ const nbSteps = options?.nbSteps || 35
29
+
30
+ const instance = instances.shift()
31
+ instances.push(instance)
32
+
33
+ const api = await client(instance, {
34
+ hf_token: `${process.env.VC_HF_API_TOKEN}` as any
35
+ })
36
+
37
+ const rawResponse = await api.predict('/run', [
38
+ prompt, // string in 'Prompt' Textbox component
39
+ seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
40
+ nbFrames, // 24 // it is the nb of frames per seconds I think?
41
+ nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
42
+ ]) as any
43
+
44
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
45
+
46
+ return `${instance}/file=${name}`
47
+ } catch (err) {
48
+ throw err
49
+ } finally {
50
+ state.load -= 1
51
+ }
52
+ }
src/production/generateVideo.mts ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { client } from "@gradio/client"
2
+
3
+ import { generateSeed } from "../utils/generateSeed.mts"
4
+
5
+ export const state = {
6
+ load: 0,
7
+ }
8
+
9
+ // we don't use replicas yet, because it ain't easy to get their hostname
10
+ const instances: string[] = [
11
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_1 || ""}`,
12
+ `${process.env.VC_ZEROSCOPE_SPACE_API_URL_2 || ""}`,
13
+ // `${process.env.VC_ZEROSCOPE_SPACE_API_URL_3 || ""}`,
14
+ ].filter(instance => instance?.length > 0)
15
+
16
+ export const generateVideo = async (prompt: string, options?: {
17
+ seed: number;
18
+ nbFrames: number;
19
+ nbSteps: number;
20
+ }) => {
21
+
22
+ if (state.load === instances.length) {
23
+ throw new Error(`all video generation servers are busy, try again later..`)
24
+ }
25
+
26
+ state.load += 1
27
+
28
+ try {
29
+ const seed = options?.seed || generateSeed()
30
+ const nbFrames = options?.nbFrames || 24 // we can go up to 48 frames, but then upscaling quill require too much memory!
31
+ const nbSteps = options?.nbSteps || 35
32
+
33
+ const instance = instances.shift()
34
+ instances.push(instance)
35
+
36
+ const api = await client(instance, {
37
+ hf_token: `${process.env.VC_HF_API_TOKEN}` as any
38
+ })
39
+
40
+ const rawResponse = await api.predict('/run', [
41
+ prompt, // string in 'Prompt' Textbox component
42
+ seed, // number (numeric value between 0 and 2147483647) in 'Seed' Slider component
43
+ nbFrames, // 24 // it is the nb of frames per seconds I think?
44
+ nbSteps, // 10, (numeric value between 10 and 50) in 'Number of inference steps' Slider component
45
+ ]) as any
46
+
47
+ // console.log("rawResponse:", rawResponse)
48
+
49
+ const { name } = rawResponse?.data?.[0]?.[0] as { name: string, orig_name: string }
50
+
51
+ return `${instance}/file=${name}`
52
+ } catch (err) {
53
+ throw err
54
+ } finally {
55
+ state.load -= 1
56
+ }
57
+ }
src/production/generateVoice.mts ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import puppeteer from "puppeteer"
2
+
3
+ import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
4
+
5
+ export const state = {
6
+ load: 0
7
+ }
8
+
9
+ const instances: string[] = [
10
+ process.env.VC_VOICE_GENERATION_SPACE_API_URL
11
+ ]
12
+
13
+ // TODO we should use an inference endpoint instead
14
+ export async function generateVoice(prompt: string, voiceFileName: string) {
15
+ if (state.load === instances.length) {
16
+ throw new Error(`all voice generation servers are busy, try again later..`)
17
+ }
18
+
19
+ state.load += 1
20
+
21
+ try {
22
+ const instance = instances.shift()
23
+ instances.push(instance)
24
+
25
+ console.log("instance:", instance)
26
+
27
+ const browser = await puppeteer.launch({
28
+ headless: true,
29
+ protocolTimeout: 800000,
30
+ })
31
+
32
+ try {
33
+ const page = await browser.newPage()
34
+
35
+ await page.goto(instance, {
36
+ waitUntil: "networkidle2",
37
+ })
38
+
39
+ await new Promise(r => setTimeout(r, 3000))
40
+
41
+ const firstTextarea = await page.$('textarea[data-testid="textbox"]')
42
+
43
+ await firstTextarea.type(prompt)
44
+
45
+ // console.log("looking for the button to submit")
46
+ const submitButton = await page.$("button.lg")
47
+
48
+ // console.log("clicking on the button")
49
+ await submitButton.click()
50
+
51
+ await page.waitForSelector("audio", {
52
+ timeout: 800000, // need to be large enough in case someone else attemps to use our space
53
+ })
54
+
55
+ const voiceRemoteUrl = await page.$$eval("audio", el => el.map(x => x.getAttribute("src"))[0])
56
+
57
+
58
+ console.log({
59
+ voiceRemoteUrl,
60
+ })
61
+
62
+
63
+ console.log(`- downloading ${voiceFileName} from ${voiceRemoteUrl}`)
64
+
65
+ await downloadFileToTmp(voiceRemoteUrl, voiceFileName)
66
+
67
+ return voiceFileName
68
+ } catch (err) {
69
+ throw err
70
+ } finally {
71
+ await browser.close()
72
+ }
73
+ } catch (err) {
74
+ throw err
75
+ } finally {
76
+ state.load -= 1
77
+ }
78
+ }
src/production/interpolateVideo.mts ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import puppeteer from "puppeteer"
5
+
6
+ import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
+ import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
9
+
10
+ export const state = {
11
+ load: 0
12
+ }
13
+
14
+ const instances: string[] = [
15
+ process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
16
+ ]
17
+
18
+ // TODO we should use an inference endpoint instead
19
+ export async function interpolateVideo(fileName: string, steps: number, fps: number) {
20
+ if (state.load === instances.length) {
21
+ throw new Error(`all video interpolation servers are busy, try again later..`)
22
+ }
23
+
24
+ state.load += 1
25
+
26
+ try {
27
+ const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
28
+
29
+ console.log(`interpolating ${fileName}`)
30
+ console.log(`warning: interpolateVideo parameter "${steps}" is ignored!`)
31
+ console.log(`warning: interpolateVideo parameter "${fps}" is ignored!`)
32
+
33
+ const instance = instances.shift()
34
+ instances.push(instance)
35
+
36
+ const browser = await puppeteer.launch({
37
+ headless: true,
38
+ protocolTimeout: 400000,
39
+ })
40
+
41
+ try {
42
+ const page = await browser.newPage()
43
+ await page.goto(instance, { waitUntil: 'networkidle2' })
44
+
45
+ await new Promise(r => setTimeout(r, 3000))
46
+
47
+ const fileField = await page.$('input[type=file]')
48
+
49
+ // console.log(`uploading file..`)
50
+ await fileField.uploadFile(inputFilePath)
51
+
52
+ // console.log('looking for the button to submit')
53
+ const submitButton = await page.$('button.lg')
54
+
55
+ // console.log('clicking on the button')
56
+ await submitButton.click()
57
+
58
+ await page.waitForSelector('a[download="interpolated_result.mp4"]', {
59
+ timeout: 400000, // need to be large enough in case someone else attemps to use our space
60
+ })
61
+
62
+ const interpolatedFileUrl = await page.$$eval('a[download="interpolated_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
63
+
64
+ // it is always a good idea to download to a tmp dir before saving to the pending dir
65
+ // because there is always a risk that the download will fail
66
+
67
+ const tmpFileName = `${uuidv4()}.mp4`
68
+
69
+ await downloadFileToTmp(interpolatedFileUrl, tmpFileName)
70
+ await moveFileFromTmpToPending(tmpFileName, fileName)
71
+ } catch (err) {
72
+ throw err
73
+ } finally {
74
+ await browser.close()
75
+ }
76
+ } catch (err) {
77
+ throw err
78
+ } finally {
79
+ state.load -= 1
80
+ }
81
+ }
src/production/interpolateVideoLegacy.mts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { promises as fs } from "node:fs"
2
+ import path from "node:path"
3
+ import { Blob } from "buffer"
4
+
5
+ import { client } from "@gradio/client"
6
+ import tmpDir from "temp-dir"
7
+
8
+ import { downloadFileToTmp } from '../utils/downloadFileToTmp.mts'
9
+
10
+ export const state = {
11
+ load: 0
12
+ }
13
+
14
+ const instances: string[] = [
15
+ process.env.VC_VIDEO_INTERPOLATION_SPACE_API_URL
16
+ ]
17
+
18
+ export const interpolateVideo = async (fileName: string, steps: number, fps: number) => {
19
+ if (state.load === instances.length) {
20
+ throw new Error(`all video interpolation servers are busy, try again later..`)
21
+ }
22
+
23
+ state.load += 1
24
+
25
+ try {
26
+ const inputFilePath = path.join(tmpDir, fileName)
27
+
28
+ const instance = instances.shift()
29
+ instances.push(instance)
30
+
31
+ const api = await client(instance, {
32
+ hf_token: `${process.env.VC_HF_API_TOKEN}` as any
33
+ })
34
+
35
+ const video = await fs.readFile(inputFilePath)
36
+
37
+ const blob = new Blob([video], { type: 'video/mp4' })
38
+ // const blob = blobFrom(filePath)
39
+ const result = await api.predict(1, [
40
+ blob, // blob in 'parameter_5' Video component
41
+ steps, // number (numeric value between 1 and 4) in 'Interpolation Steps' Slider component
42
+ fps, // string (FALSE! it's a number) in 'FPS output' Radio component
43
+ ])
44
+
45
+ const data = (result as any).data[0]
46
+ console.log('raw data:', data)
47
+ const { orig_name, data: remoteFilePath } = data
48
+ const remoteUrl = `${instance}/file=${remoteFilePath}`
49
+ console.log("remoteUrl:", remoteUrl)
50
+ await downloadFileToTmp(remoteUrl, fileName)
51
+ } catch (err) {
52
+ throw err
53
+ } finally {
54
+ state.load -= 1
55
+ }
56
+ }
src/production/mergeAudio.mts ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import tmpDir from "temp-dir"
4
+ import { v4 as uuidv4 } from "uuid"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+
7
+ export const mergeAudio = async ({
8
+ input1FileName,
9
+ input1Volume,
10
+ input2FileName,
11
+ input2Volume,
12
+ outputFileName = ''
13
+ }: {
14
+ input1FileName: string,
15
+ input1Volume: number,
16
+ input2FileName: string,
17
+ input2Volume: number,
18
+ outputFileName?: string
19
+ }): Promise<string> => {
20
+ outputFileName = `${uuidv4()}.m4a`
21
+
22
+ const input1FilePath = path.resolve(tmpDir, input1FileName)
23
+ const input2FilePath = path.resolve(tmpDir, input2FileName)
24
+ const outputFilePath = path.resolve(tmpDir, outputFileName)
25
+
26
+ const input1Ffmpeg = ffmpeg(input1FilePath)
27
+ .outputOptions("-map 0:a:0")
28
+ .audioFilters([{ filter: 'volume', options: input1Volume }]); // set volume for main audio
29
+
30
+ const input2Ffmpeg = ffmpeg(input2FilePath)
31
+ .outputOptions("-map 1:a:0")
32
+ .audioFilters([{ filter: 'volume', options: input2Volume }]); // set volume for additional audio
33
+
34
+ await new Promise((resolve, reject) => {
35
+ ffmpeg()
36
+ .input(input1Ffmpeg)
37
+ .input(input2Ffmpeg)
38
+ .outputOptions("-c:a aac") // use audio codec
39
+ .outputOptions("-shortest") // finish encoding when shortest input stream ends
40
+ .output(outputFilePath)
41
+ .on("end", resolve)
42
+ .on("error", reject)
43
+ .run()
44
+ })
45
+
46
+ console.log(`merged audio from ${input1FileName} and ${input2FileName} into ${outputFileName}`)
47
+
48
+ return outputFileName
49
+ }
src/production/normalizePendingVideoToTmpFilePath.mts ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
+
9
+ export const normalizePendingVideoToTmpFilePath = async (fileName: string): Promise<string> => {
10
+ return new Promise((resolve,reject) => {
11
+
12
+ const tmpFileName = `${uuidv4()}.mp4`
13
+
14
+ const filePath = path.join(pendingFilesDirFilePath, fileName)
15
+ const tmpFilePath = path.join(tmpDir, tmpFileName)
16
+
17
+ ffmpeg.ffprobe(filePath, function(err,) {
18
+ if (err) { reject(err); return; }
19
+
20
+ ffmpeg(filePath)
21
+
22
+ .size("1280x720")
23
+
24
+ .save(tmpFilePath)
25
+ .on("end", async () => {
26
+ resolve(tmpFilePath)
27
+ })
28
+ .on("error", (err) => {
29
+ reject(err)
30
+ })
31
+ })
32
+ })
33
+ }
src/production/postInterpolation.mts ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+ import ffmpeg from "fluent-ffmpeg"
6
+ import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
7
+ import { pendingFilesDirFilePath } from "../config.mts"
8
+
9
+ export const postInterpolation = async (fileName: string, durationMs: number, nbFrames: number, noiseAmount: number): Promise<string> => {
10
+ return new Promise((resolve,reject) => {
11
+
12
+ const tmpFileName = `${uuidv4()}.mp4`
13
+
14
+ const filePath = path.join(pendingFilesDirFilePath, fileName)
15
+ const tmpFilePath = path.join(tmpDir, tmpFileName)
16
+
17
+ ffmpeg.ffprobe(filePath, function(err, metadata) {
18
+ if (err) { reject(err); return; }
19
+
20
+ const durationInSec = durationMs / 1000
21
+
22
+ const currentVideoDurationInSec = metadata.format.duration
23
+
24
+ console.log(`currentVideoDurationInSec in sec: ${currentVideoDurationInSec}s`)
25
+
26
+ console.log(`target duration in sec: ${durationInSec}s (${durationMs}ms)`)
27
+
28
+ // compute a ratio ex. 0.3 = 30% of the total length
29
+ const durationRatio = currentVideoDurationInSec / durationInSec
30
+ console.log(`durationRatio: ${durationRatio}`)
31
+
32
+ ffmpeg(filePath)
33
+
34
+ // convert to HD
35
+ .size("1280x720")
36
+
37
+ .videoFilters([
38
+ `setpts=0.5*PTS`, // we make the video faster
39
+ //'scale=-1:576:lanczos',
40
+ // 'unsharp=5:5:0.2:5:5:0.2', // not recommended, this make the video more "pixely"
41
+ `noise=c0s=${noiseAmount}:c0f=t+u` // add a movie grain noise
42
+ ])
43
+ .outputOptions([
44
+ `-r ${nbFrames}`,
45
+ ])
46
+
47
+ .save(tmpFilePath)
48
+ .on("end", async () => {
49
+ await moveFileFromTmpToPending(tmpFileName, fileName)
50
+
51
+ resolve(fileName)
52
+ })
53
+ .on("error", (err) => {
54
+ reject(err)
55
+ })
56
+ })
57
+ })
58
+ }
src/production/renderImage.mts ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { RenderedScene, RenderRequest } from "../types.mts"
2
+ import { generateImageSDXLAsBase64 } from "../utils/generateImageSDXL.mts"
3
+ import { generateImageSDXL360AsBase64 } from "../utils/generateImageSDXL360.mts"
4
+ import { generateSeed } from "../utils/generateSeed.mts"
5
+
6
+ export async function renderImage(
7
+ request: RenderRequest,
8
+ response: RenderedScene,
9
+ ): Promise<RenderedScene> {
10
+
11
+ const isSpherical = request.projection === 'spherical'
12
+
13
+ const generateImageAsBase64 = isSpherical
14
+ ? generateImageSDXL360AsBase64
15
+ : generateImageSDXLAsBase64
16
+
17
+ console.log(`going to generate an image using ${request.projection || "default (cartesian)"} projection`)
18
+
19
+ const params = {
20
+ positivePrompt: request.prompt,
21
+ seed: request.seed,
22
+ nbSteps: request.nbSteps,
23
+ width: request.width,
24
+ height: request.height
25
+ }
26
+
27
+ console.log(`calling generateImageAsBase64 with: `, JSON.stringify(params, null, 2))
28
+
29
+
30
+ // first we generate a quick low quality version
31
+ try {
32
+ response.assetUrl = await generateImageAsBase64(params)
33
+ console.log("successful generation!", response.assetUrl.slice(0, 30))
34
+ if (!response.assetUrl?.length) {
35
+ throw new Error(`the generated image is empty`)
36
+ }
37
+ } catch (err) {
38
+ console.error(`failed to render.. but let's try again!`)
39
+ try {
40
+ response.assetUrl = await generateImageAsBase64(params)
41
+ console.log("successful generation!", response.assetUrl.slice(0, 30))
42
+ if (!response.assetUrl?.length) {
43
+ throw new Error(`the generated image is empty`)
44
+ }
45
+ } catch (err) {
46
+ console.error(`failed to generate the image, although ${err}`)
47
+ response.error = `failed to render scene: ${err}`
48
+ response.status = "error"
49
+ response.assetUrl = ""
50
+ }
51
+ }
52
+
53
+ return response
54
+ }
src/production/renderImageSegmentation.mts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+
6
+ import { RenderedScene, RenderRequest } from "../types.mts"
7
+ import { segmentImage } from "../utils/segmentImage.mts"
8
+ import { writeBase64ToFile } from "../utils/writeBase64ToFile.mts"
9
+
10
+
11
+ export async function renderImageSegmentation(
12
+ request: RenderRequest,
13
+ response: RenderedScene,
14
+ ): Promise<RenderedScene> {
15
+
16
+ const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
17
+
18
+ if (actionnables.length > 0) {
19
+ console.log("we have some actionnables:", actionnables)
20
+ console.log("going to grab the first frame")
21
+
22
+ const tmpImageFilePath = path.join(tmpDir, `${uuidv4()}.png`)
23
+
24
+ // console.log("beginning:", imageBase64.slice(0, 100))
25
+ await writeBase64ToFile(response.assetUrl, tmpImageFilePath)
26
+ console.log("wrote the image to ", tmpImageFilePath)
27
+
28
+ if (!tmpImageFilePath) {
29
+ console.error("failed to get the image")
30
+ response.error = "failed to segment the image"
31
+ response.status = "error"
32
+ } else {
33
+ console.log("got the first frame! segmenting..")
34
+ try {
35
+ const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
36
+ response.maskBase64 = result.pngInBase64
37
+ response.segments = result.segments
38
+
39
+ console.log(`it worked the first time! got ${response.segments.length} segments`)
40
+ } catch (err) {
41
+ console.log("this takes too long :/ trying another server..")
42
+ try {
43
+ const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
44
+ response.maskBase64 = result.pngInBase64
45
+ response.segments = result.segments
46
+
47
+ console.log(`it worked the second time! got ${response.segments.length} segments`)
48
+ } catch (err) {
49
+ console.log("trying one last time, on a 3rd server..")
50
+ try {
51
+ const result = await segmentImage(tmpImageFilePath, actionnables, request.width, request.height)
52
+ response.maskBase64 = result.pngInBase64
53
+ response.segments = result.segments
54
+
55
+ console.log(`it worked the third time! got ${response.segments.length} segments`)
56
+ } catch (err) {
57
+ console.log("yeah, all servers are busy it seems.. aborting")
58
+ response.error = "all servers are busy"
59
+ response.status = "error"
60
+ }
61
+ }
62
+ }
63
+ }
64
+ } else {
65
+ console.log("no actionnables: just returning the image, then")
66
+ }
67
+
68
+ return response
69
+ }
src/production/renderPipeline.mts ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import { RenderedScene, RenderRequest } from "../types.mts"
3
+
4
+ import { renderImage } from "./renderImage.mts"
5
+ import { renderVideo } from "./renderVideo.mts"
6
+ import { renderImageSegmentation } from "./renderImageSegmentation.mts"
7
+ import { renderVideoSegmentation } from "./renderVideoSegmentation.mts"
8
+
9
+ export async function renderPipeline(request: RenderRequest, response: RenderedScene) {
10
+ const isVideo = request?.nbFrames > 1
11
+
12
+ const renderContent = isVideo ? renderVideo : renderImage
13
+ const renderSegmentation = isVideo ? renderVideoSegmentation : renderImageSegmentation
14
+
15
+ if (isVideo) {
16
+ console.log(`rendering a video..`)
17
+ } else {
18
+ console.log(`rendering an image..`)
19
+ }
20
+ await renderContent(request, response)
21
+ await renderSegmentation(request, response)
22
+
23
+ /*
24
+ this is the optimized pipeline
25
+ However, right now it doesn't work because for some reason,
26
+ asking to generate the same seed + prompt on different nb of steps
27
+ doesn't generate the same image!
28
+
29
+ // first we need to wait for the low quality pre-render
30
+ await renderContent({
31
+ ...request,
32
+
33
+ // we are a bit more aggressive with the quality of the video preview
34
+ nbSteps: isVideo ? 8 : 16
35
+ }, response)
36
+
37
+ // then we can run both the segmentation and the high-res render at the same time
38
+ await Promise.all([
39
+ renderSegmentation(request, response),
40
+ renderContent(request, response)
41
+ ])
42
+ */
43
+
44
+ response.status = "completed"
45
+ response.error = ""
46
+ }
src/production/renderScene.mts ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { v4 as uuidv4 } from "uuid"
2
+
3
+ import { RenderedScene, RenderRequest } from "../types.mts"
4
+ import { generateSeed } from "../utils/generateSeed.mts"
5
+ import { getValidNumber } from "../utils/getValidNumber.mts"
6
+ import { renderPipeline } from "./renderPipeline.mts"
7
+
8
+ const cache: Record<string, RenderedScene> = {}
9
+ const cacheQueue: string[] = []
10
+ const maxCacheSize = 1000
11
+
12
+ export async function renderScene(request: RenderRequest): Promise<RenderedScene> {
13
+ // const key = getCacheKey(scene)
14
+ const renderId = uuidv4()
15
+
16
+ request.nbFrames = getValidNumber(request.nbFrames, 1, 24, 16)
17
+
18
+ const isVideo = request?.nbFrames === 1
19
+
20
+ // important: we need a consistent seed for our multiple rendering passes
21
+ request.seed = getValidNumber(request.seed, 0, 2147483647, generateSeed())
22
+ request.nbSteps = getValidNumber(request.nbSteps, 5, 50, 10)
23
+
24
+ if (isVideo) {
25
+ request.width = getValidNumber(request.width, 256, 1024, 1024)
26
+ request.height = getValidNumber(request.width, 256, 1024, 512)
27
+ } else {
28
+ request.width = getValidNumber(request.width, 256, 1280, 576)
29
+ request.height = getValidNumber(request.width, 256, 720, 320)
30
+ }
31
+
32
+ const response: RenderedScene = {
33
+ renderId,
34
+ status: "pending",
35
+ assetUrl: "",
36
+ error: "",
37
+ maskBase64: "",
38
+ segments: []
39
+ }
40
+
41
+ cache[renderId] = response
42
+ cacheQueue.push(renderId)
43
+ if (cacheQueue.length > maxCacheSize) {
44
+ const toRemove = cacheQueue.shift()
45
+ delete cache[toRemove]
46
+ }
47
+
48
+ // this is a fire-and-forget asynchronous pipeline:
49
+ // we start it, but we do not await for the response
50
+ renderPipeline(request, response)
51
+
52
+ console.log("renderScene: yielding the scene", response)
53
+ return response
54
+ }
55
+
56
+ export async function getRenderedScene(renderId: string): Promise<RenderedScene> {
57
+ const rendered = cache[renderId]
58
+ if (!rendered) {
59
+ throw new Error(`couldn't find any rendered scene with renderId ${renderId}`)
60
+ }
61
+ return cache[renderId]
62
+ }
src/production/renderVideo.mts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { RenderedScene, RenderRequest } from "../types.mts"
2
+ import { generateVideo } from "./generateVideo.mts"
3
+
4
+ export async function renderVideo(
5
+ request: RenderRequest,
6
+ response: RenderedScene
7
+ ): Promise<RenderedScene> {
8
+
9
+ const params = {
10
+ seed: request.seed,
11
+ nbFrames: request.nbFrames,
12
+ nbSteps: request.nbSteps,
13
+ }
14
+
15
+ try {
16
+ response.assetUrl = await generateVideo(request.prompt, params)
17
+ // console.log("successfull generation")
18
+
19
+ if (!response.assetUrl?.length) {
20
+ throw new Error(`url for the generated video is empty`)
21
+ }
22
+ } catch (err) {
23
+ console.error(`failed to render the video scene.. but let's try again!`)
24
+
25
+ try {
26
+ response.assetUrl = await generateVideo(request.prompt, params)
27
+ // console.log("successfull generation")
28
+
29
+ if (!response.assetUrl?.length) {
30
+ throw new Error(`url for the generated video is empty`)
31
+ }
32
+
33
+ } catch (err) {
34
+ console.error(`it failed the video for second time ${err}`)
35
+ response.error = `failed to render video scene: ${err}`
36
+ response.status = "error"
37
+ }
38
+ }
39
+
40
+ return response
41
+ }
src/production/renderVideoSegmentation.mts ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { v4 as uuidv4 } from "uuid"
2
+
3
+ import { RenderedScene, RenderRequest } from "../types.mts"
4
+ import { downloadFileToTmp } from "../utils/downloadFileToTmp.mts"
5
+ import { getFirstVideoFrame } from "../utils/getFirstVideoFrame.mts"
6
+ import { segmentImage } from "../utils/segmentImage.mts"
7
+
8
+ export async function renderVideoSegmentation(
9
+ request: RenderRequest,
10
+ response: RenderedScene
11
+ ): Promise<RenderedScene> {
12
+
13
+ const actionnables = Array.isArray(request.actionnables) ? request.actionnables : []
14
+
15
+ if (actionnables.length > 0) {
16
+ console.log("we have some actionnables:", actionnables)
17
+ if (request.segmentation === "firstframe") {
18
+ console.log("going to grab the first frame")
19
+ const tmpVideoFilePath = await downloadFileToTmp(response.assetUrl, `${uuidv4()}`)
20
+ console.log("downloaded the first frame to ", tmpVideoFilePath)
21
+ const firstFrameFilePath = await getFirstVideoFrame(tmpVideoFilePath)
22
+ console.log("downloaded the first frame to ", firstFrameFilePath)
23
+
24
+ if (!firstFrameFilePath) {
25
+ console.error("failed to get the image")
26
+ response.error = "failed to segment the image"
27
+ response.status = "error"
28
+ } else {
29
+ console.log("got the first frame! segmenting..")
30
+ const result = await segmentImage(firstFrameFilePath, actionnables, request.width, request.height)
31
+ response.maskBase64 = result.pngInBase64
32
+ response.segments = result.segments
33
+
34
+ // console.log("success!", { segments })
35
+ }
36
+ /*
37
+ const jpgBase64 = await getFirstVideoFrame(tmpVideoFileName)
38
+ if (!jpgBase64) {
39
+ console.error("failed to get the image")
40
+ error = "failed to segment the image"
41
+ } else {
42
+ console.log(`got the first frame (${jpgBase64.length})`)
43
+
44
+ console.log("TODO: call segmentImage with the base64 image")
45
+ await segmentImage()
46
+ }
47
+ */
48
+ }
49
+ }
50
+
51
+ return response
52
+ }
src/production/upscaleVideo.mts ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import path from "node:path"
2
+
3
+ import { v4 as uuidv4 } from "uuid"
4
+ import tmpDir from "temp-dir"
5
+ import puppeteer from "puppeteer"
6
+
7
+ import { downloadFileToTmp } from '../utils/downloadFileToTmp.mts'
8
+ import { pendingFilesDirFilePath } from '../config.mts'
9
+ import { moveFileFromTmpToPending } from "../utils/moveFileFromTmpToPending.mts"
10
+
11
+ const instances: string[] = [
12
+ process.env.VC_VIDEO_UPSCALE_SPACE_API_URL
13
+ ]
14
+
15
+ // TODO we should use an inference endpoint instead (or a space which bakes generation + upscale at the same time)
16
+ export async function upscaleVideo(fileName: string, prompt: string) {
17
+ const instance = instances.shift()
18
+ instances.push(instance)
19
+
20
+ const browser = await puppeteer.launch({
21
+ // headless: true,
22
+ protocolTimeout: 800000,
23
+ })
24
+
25
+ try {
26
+ const page = await browser.newPage()
27
+
28
+ await page.goto(instance, {
29
+ waitUntil: 'networkidle2',
30
+ })
31
+
32
+ const promptField = await page.$('textarea')
33
+ await promptField.type(prompt)
34
+
35
+ const inputFilePath = path.join(pendingFilesDirFilePath, fileName)
36
+ // console.log(`local file to upscale: ${inputFilePath}`)
37
+
38
+ await new Promise(r => setTimeout(r, 3000))
39
+
40
+ const fileField = await page.$('input[type=file]')
41
+
42
+ // console.log(`uploading file..`)
43
+ await fileField.uploadFile(inputFilePath)
44
+
45
+ // console.log('looking for the button to submit')
46
+ const submitButton = await page.$('button.lg')
47
+
48
+ // console.log('clicking on the button')
49
+ await submitButton.click()
50
+
51
+ /*
52
+ const client = await page.target().createCDPSession()
53
+
54
+ await client.send('Page.setDownloadBehavior', {
55
+ behavior: 'allow',
56
+ downloadPath: tmpDir,
57
+ })
58
+ */
59
+
60
+ await page.waitForSelector('a[download="xl_result.mp4"]', {
61
+ timeout: 800000, // need to be large enough in case someone else attemps to use our space
62
+ })
63
+
64
+ const upscaledFileUrl = await page.$$eval('a[download="xl_result.mp4"]', el => el.map(x => x.getAttribute("href"))[0])
65
+
66
+ // it is always a good idea to download to a tmp dir before saving to the pending dir
67
+ // because there is always a risk that the download will fail
68
+
69
+ const tmpFileName = `${uuidv4()}.mp4`
70
+
71
+ await downloadFileToTmp(upscaledFileUrl, tmpFileName)
72
+ await moveFileFromTmpToPending(tmpFileName, fileName)
73
+ } catch (err) {
74
+ throw err
75
+ } finally {
76
+ await browser.close()
77
+ }
78
+ }
src/scheduler/deleteVideo.mts ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tmpDir from "temp-dir"
2
+ import { validate as uuidValidate } from "uuid"
3
+
4
+ import { completedMetadataDirFilePath, completedFilesDirFilePath, pendingMetadataDirFilePath, pendingFilesDirFilePath } from "../config.mts"
5
+ import { deleteFilesWithName } from "../utils/deleteAllFilesWith.mts"
6
+
7
+
8
+ // note: we make sure ownerId and videoId are *VALID*
9
+ // otherwise an attacker could try to delete important files!
10
+ export const deleteVideo = async (ownerId: string, videoId?: string) => {
11
+ if (!uuidValidate(ownerId)) {
12
+ throw new Error(`fatal error: ownerId ${ownerId} is invalid!`)
13
+ }
14
+
15
+ if (videoId && !uuidValidate(videoId)) {
16
+ throw new Error(`fatal error: videoId ${videoId} is invalid!`)
17
+ }
18
+ const id = videoId ? `${ownerId}_${videoId}` : ownerId
19
+
20
+ // this should delete everything, including audio files
21
+ // however we still have some temporary files with a name that is unique:
22
+ // we should probably rename those
23
+ await deleteFilesWithName(tmpDir, id)
24
+ await deleteFilesWithName(completedMetadataDirFilePath, id)
25
+ await deleteFilesWithName(completedFilesDirFilePath, id)
26
+ await deleteFilesWithName(pendingMetadataDirFilePath, id)
27
+ await deleteFilesWithName(pendingFilesDirFilePath, id)
28
+ }
src/scheduler/getAllVideosForOwner.mts ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import { Video } from "../types.mts"
2
+ import { getCompletedVideos } from "./getCompletedVideos.mts"
3
+ import { getPendingVideos } from "./getPendingVideos.mts"
4
+
5
+ export const getAllVideosForOwner = async (ownerId: string): Promise<Video[]> => {
6
+ const pendingVideos = await getPendingVideos(ownerId)
7
+ const completedVideos = await getCompletedVideos(ownerId)
8
+ return [...pendingVideos, ...completedVideos]
9
+ }
src/scheduler/getCompletedVideos.mts ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import { Video } from "../types.mts"
2
+ import { completedMetadataDirFilePath } from "../config.mts"
3
+ import { readVideoMetadataFiles } from "./readVideoMetadataFiles.mts"
4
+
5
+ export const getCompletedVideos = async (ownerId?: string): Promise<Video[]> => {
6
+ const completedVideos = await readVideoMetadataFiles(completedMetadataDirFilePath, ownerId)
7
+
8
+ return completedVideos
9
+ }