Upload 36 files
Browse files- .gitattributes +15 -0
- .gitignore +11 -0
- LICENSE +24 -0
- README.md +293 -0
- README.zh_CN.md +91 -0
- README_ext.md +34 -0
- img/ctrlnet-depth.gif +3 -0
- img/ctrlnet-ref.gif +3 -0
- img/embryo.png +0 -0
- img/i2i-e-ddim.gif +3 -0
- img/i2i-e-euler_a.gif +3 -0
- img/i2i-f-ddim-pp.gif +3 -0
- img/i2i-f-ddim.gif +3 -0
- img/i2i-f-euler_a.gif +3 -0
- img/i2i-ref.png +0 -0
- img/i2i-s-ddim.gif +3 -0
- img/i2i-s-euler_a.gif +3 -0
- img/manager.png +0 -0
- img/ref_ctrlnet/0.png +0 -0
- img/ref_ctrlnet/1.png +0 -0
- img/t2i-e-ddim.gif +3 -0
- img/t2i-e-euler_a.gif +3 -0
- img/t2i-f-ddim.gif +3 -0
- img/t2i-f-euler_a.gif +3 -0
- img/t2i-s-ddim.gif +3 -0
- img/t2i-s-euler_a.gif +3 -0
- install.py +4 -0
- manager.cmd +41 -0
- manager.py +513 -0
- postprocess-config.cmd.example +43 -0
- postprocess.cmd +164 -0
- requirements.txt +6 -0
- scripts/controlnet_travel.py +913 -0
- scripts/prompt_travel.py +818 -0
- tools/README.txt +25 -0
- tools/install.cmd +109 -0
- tools/link.cmd +20 -0
.gitattributes
CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
img/ctrlnet-depth.gif filter=lfs diff=lfs merge=lfs -text
|
37 |
+
img/ctrlnet-ref.gif filter=lfs diff=lfs merge=lfs -text
|
38 |
+
img/i2i-e-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
39 |
+
img/i2i-e-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
40 |
+
img/i2i-f-ddim-pp.gif filter=lfs diff=lfs merge=lfs -text
|
41 |
+
img/i2i-f-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
42 |
+
img/i2i-f-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
43 |
+
img/i2i-s-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
44 |
+
img/i2i-s-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
45 |
+
img/t2i-e-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
46 |
+
img/t2i-e-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
47 |
+
img/t2i-f-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
48 |
+
img/t2i-f-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
49 |
+
img/t2i-s-ddim.gif filter=lfs diff=lfs merge=lfs -text
|
50 |
+
img/t2i-s-euler_a.gif filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# meta
|
2 |
+
.vscode/
|
3 |
+
__pycache__/
|
4 |
+
|
5 |
+
# third party tools
|
6 |
+
tools/*
|
7 |
+
!tools/README.txt
|
8 |
+
!tools/*.cmd
|
9 |
+
|
10 |
+
# user-wise config files
|
11 |
+
postprocess-config.cmd
|
LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This is free and unencumbered software released into the public domain.
|
2 |
+
|
3 |
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
4 |
+
distribute this software, either in source code form or as a compiled
|
5 |
+
binary, for any purpose, commercial or non-commercial, and by any
|
6 |
+
means.
|
7 |
+
|
8 |
+
In jurisdictions that recognize copyright laws, the author or authors
|
9 |
+
of this software dedicate any and all copyright interest in the
|
10 |
+
software to the public domain. We make this dedication for the benefit
|
11 |
+
of the public at large and to the detriment of our heirs and
|
12 |
+
successors. We intend this dedication to be an overt act of
|
13 |
+
relinquishment in perpetuity of all present and future rights to this
|
14 |
+
software under copyright law.
|
15 |
+
|
16 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19 |
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
20 |
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
21 |
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
23 |
+
|
24 |
+
For more information, please refer to <https://unlicense.org>
|
README.md
ADDED
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# stable-diffusion-webui-prompt-travel
|
2 |
+
|
3 |
+
Travel between prompts in the latent space to make pseudo-animation, extension script for AUTOMATIC1111/stable-diffusion-webui.
|
4 |
+
|
5 |
+
----
|
6 |
+
|
7 |
+
<p align="left">
|
8 |
+
<a href="https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel/commits"><img alt="Last Commit" src="https://img.shields.io/github/last-commit/Kahsolt/stable-diffusion-webui-prompt-travel"></a>
|
9 |
+
<a href="https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel/issues"><img alt="GitHub issues" src="https://img.shields.io/github/issues/Kahsolt/stable-diffusion-webui-prompt-travel"></a>
|
10 |
+
<a href="https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel/stargazers"><img alt="GitHub stars" src="https://img.shields.io/github/stars/Kahsolt/stable-diffusion-webui-prompt-travel"></a>
|
11 |
+
<a href="https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel/network"><img alt="GitHub forks" src="https://img.shields.io/github/forks/Kahsolt/stable-diffusion-webui-prompt-travel"></a>
|
12 |
+
<img alt="Language" src="https://img.shields.io/github/languages/top/Kahsolt/stable-diffusion-webui-prompt-travel">
|
13 |
+
<img alt="License" src="https://img.shields.io/github/license/Kahsolt/stable-diffusion-webui-prompt-travel">
|
14 |
+
<br/>
|
15 |
+
</p>
|
16 |
+
|
17 |
+

|
18 |
+
|
19 |
+
Try interpolating on the hidden vectors of conditioning prompt to make seemingly-continuous image sequence, or let's say a pseudo-animation. 😀
|
20 |
+
Not only prompts! We also support non-prompt conditions, read => [README_ext.md](README_ext.md) ~
|
21 |
+
|
22 |
+
⚠ 我们成立了插件反馈 QQ 群: 616795645 (赤狐屿),欢迎出建议、意见、报告bug等 (w
|
23 |
+
⚠ We have a QQ chat group (616795645) now, any suggestions, discussions and bug reports are highly wellllcome!!
|
24 |
+
|
25 |
+
ℹ 实话不说,我想有可能通过这个来做ppt童话绘本<del>甚至本子</del>……
|
26 |
+
ℹ 聪明的用法:先手工盲搜两张好看的图 (只有prompt差异),然后再尝试在其间 travel :lolipop:
|
27 |
+
|
28 |
+
⚠ Remeber to check "Always save all generated images" on in the settings tab, otherwise "upscaling" and "saving intermediate images" would not work.
|
29 |
+
⚠ 记得在设置页勾选 “总是保存所有生成的图片”,否则 上采样 与 保存中间图片 将无法工作。
|
30 |
+
|
31 |
+
|
32 |
+
### Change Log
|
33 |
+
|
34 |
+
⚪ Compatibility
|
35 |
+
|
36 |
+
The latest version `v3.0` is synced & tested with:
|
37 |
+
|
38 |
+
- [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui): version `v1.4.0`, tag [v1.4.0](https://github.com/AUTOMATIC1111/stable-diffusion-webui/releases/tag/v1.4.0)
|
39 |
+
- [Mikubill/sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet): version `v1.1.229`, commit [eceeec7a7e](https://github.com/Mikubill/sd-webui-controlnet/commit/eceeec7a7e856867de56e26cae9f3e1076480344)
|
40 |
+
|
41 |
+
⚪ Features
|
42 |
+
|
43 |
+
- 2023/07/05: `v3.0` re-impl core with sd-webui `v1.4.0` callbacks; this new implementation will be slower, but more compatible with other extensions
|
44 |
+
- 2023/04/13: `v2.7` add RIFE to controlnet-travel, skip fusion (experimental)
|
45 |
+
- 2023/03/31: `v2.6` add a tkinter [GUI](#run-each-time) for postprocess toolchain
|
46 |
+
- 2023/03/30: `v2.5` add controlnet-travel script (experimental), interpolating between hint conditions **instead of prompts**, thx for the code base from [sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet)
|
47 |
+
- 2023/02/14: `v2.3` integrate basic function of [depth-image-io](https://github.com/AnonymousCervine/depth-image-io-for-SDWebui) for depth2img models
|
48 |
+
- 2023/01/27: `v2.2` add 'slerp' linear interpolation method
|
49 |
+
- 2023/01/22: `v2.1` add experimental 'replace' mode again, it's not smooth interpolation
|
50 |
+
- 2023/01/20: `v2.0` add optional external [post-processing pipeline](#post-processing-pipeline) to highly boost up smoothness, greate thx to [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) and [RIFE](https://github.com/nihui/rife-ncnn-vulkan)!!
|
51 |
+
- 2023/01/16: `v1.5` add upscale options (issue #12); add 'embryo' genesis, reproducing idea of [stable-diffusion-animation](https://replicate.com/andreasjansson/stable-diffusion-animation) except [FILM](https://github.com/google-research/frame-interpolation) support (issue #11)
|
52 |
+
- 2023/01/12: `v1.4` remove 'replace' & 'grad' mode support, due to webui's code change
|
53 |
+
- 2022/12/11: `v1.3` work in a more 'successive' way, idea borrowed from [deforum](https://github.com/deforum-art/deforum-for-automatic1111-webui) ('genesis' option)
|
54 |
+
- 2022/11/14: `v1.2` walk by substituting token embedding ('replace' mode)
|
55 |
+
- 2022/11/13: `v1.1` walk by optimizing condition ('grad' mode)
|
56 |
+
- 2022/11/10: `v1.0` interpolate linearly on condition/uncondition ('linear' mode)
|
57 |
+
|
58 |
+
⚪ Fixups
|
59 |
+
|
60 |
+
- 2023/07/05: sync sd-webui-controlnet to `v1.1.229`
|
61 |
+
- 2023/04/30: update controlnet core to `v1.1.116`
|
62 |
+
- 2023/03/29: `v2.4` bug fixes on script hook, now working correctly with extra networks & [sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet)
|
63 |
+
- 2023/01/31: keep up with webui's updates, (issue #14: `ImportError: cannot import name 'single_sample_to_image'`)
|
64 |
+
- 2023/01/28: keep up with webui's updates, extra-networks rework
|
65 |
+
- 2023/01/16: `v1.5` apply zero padding when condition length mismatch (issue #10: `RuntimeError: The size of tensor a (77) must match the size of tensor b (154) at non-singleton dimension 0`), typo in demo filename
|
66 |
+
- 2023/01/12: `v1.4` keep up with webui's updates (issue #9: `AttributeError: 'FrozenCLIPEmbedderWithCustomWords' object has no attribute 'process_text'`)
|
67 |
+
- 2022/12/13: `#bdd8bed` fixup no working when negative prompt is left empty (issue #6: `neg_prompts[-1] IndexError: List index out of range`)
|
68 |
+
- 2022/11/27: `v1.2-fix2` keep up with webui's updates (error `ImportError: FrozenCLIPEmbedderWithCustomWords`)
|
69 |
+
- 2022/11/20: `v1.2-fix1` keep up with webui's updates (error `AttributeError: p.all_negative_prompts[0]`)
|
70 |
+
|
71 |
+
⚠ this script will NOT probably support the schedule syntax (i.e.: `[prompt:prompt:number]`), because interpolate on mutable conditions requires sampler level tracing which is hard to maintain :(
|
72 |
+
⚠ this script will NOT probably work together with `hires.fix` due to some inner conceptual/logical conflict of `denoising_strength`, you can alternatively perform batch-upscale then batch-img2img.
|
73 |
+
|
74 |
+
|
75 |
+
### How it works?
|
76 |
+
|
77 |
+
- input **multiple lines** in the prompt/negative-prompt box, each line is called a **stage**
|
78 |
+
- generate images one by one, interpolating from one stage towards the next (batch configs are ignored)
|
79 |
+
- gradually change the digested inputs between prompts
|
80 |
+
- freeze all other settings (`steps`, `sampler`, `cfg factor`, `seed`, etc.)
|
81 |
+
- note that only the major `seed` will be forcely fixed through all processes, you can still set `subseed = -1` to allow more variances
|
82 |
+
- export a video!
|
83 |
+
- follow [post-processing pipeline](#post-processing-pipeline) to get much better result 👌
|
84 |
+
|
85 |
+
⚪ Txt2Img
|
86 |
+
|
87 |
+
| sampler \ genesis | fixed | successive | embryo |
|
88 |
+
| :-: | :-: | :-: | :-: |
|
89 |
+
| Eular a |  |  |  |
|
90 |
+
| DDIM |  |  |  |
|
91 |
+
|
92 |
+
⚪ Img2Img
|
93 |
+
|
94 |
+
| sampler \ genesis | fixed | successive | embryo |
|
95 |
+
| :-: | :-: | :-: | :-: |
|
96 |
+
| Eular a |  |  |  |
|
97 |
+
| DDIM |  |  |  |
|
98 |
+
|
99 |
+
post-processing pipeline (case `i2i-f-ddim`):
|
100 |
+
|
101 |
+
| w/o. post-processing | w/. post-processing |
|
102 |
+
| :-: | :-: |
|
103 |
+
|  |  |
|
104 |
+
|
105 |
+
other stuff:
|
106 |
+
|
107 |
+
| reference image for img2img | embryo image decoded <br/> case `i2i-e-euler_a` with `embryo_step=8` |
|
108 |
+
| :-: | :-: |
|
109 |
+
|  |  |
|
110 |
+
|
111 |
+
⚪ ControlNet support
|
112 |
+
|
113 |
+
| prompt-travel with ControlNet (depth) | controlnet-travel (depth) |
|
114 |
+
| :-: | :-: |
|
115 |
+
|  |  |
|
116 |
+
|
117 |
+
|
118 |
+
Example above run configure:
|
119 |
+
|
120 |
+
```text
|
121 |
+
Prompt:
|
122 |
+
(((masterpiece))), highres, ((boy)), child, cat ears, white hair, red eyes, yellow bell, red cloak, barefoot, angel, [flying], egyptian
|
123 |
+
((masterpiece)), highres, ((girl)), loli, cat ears, light blue hair, red eyes, magical wand, barefoot, [running]
|
124 |
+
|
125 |
+
Negative prompt:
|
126 |
+
(((nsfw))), ugly,duplicate,morbid,mutilated,tranny,trans,trannsexual,mutation,deformed,long neck,bad anatomy,bad proportions,extra arms,extra legs, disfigured,more than 2 nipples,malformed,mutated,hermaphrodite,out of frame,extra limbs,missing arms,missing legs,poorly drawn hands,poorty drawn face,mutation,poorly drawn,long body,multiple breasts,cloned face,gross proportions, mutated hands,bad hands,bad feet,long neck,missing limb,malformed limbs,malformed hands,fused fingers,too many fingers,extra fingers,missing fingers,extra digit,fewer digits,mutated hands and fingers,lowres,text,error,cropped,worst quality,low quality,normal quality,jpeg artifacts,signature,watermark,username,blurry,text font ufemale focus, poorly drawn, deformed, poorly drawn face, (extra leg:1.3), (extra fingers:1.2),out of frame
|
127 |
+
|
128 |
+
Steps: 15
|
129 |
+
CFG scale: 7
|
130 |
+
Clip skip: 1
|
131 |
+
Seed: 114514
|
132 |
+
Size: 512 x 512
|
133 |
+
Model hash: animefull-final-pruned.ckpt
|
134 |
+
Hypernet: (this is my secret :)
|
135 |
+
```
|
136 |
+
|
137 |
+
|
138 |
+
### Options
|
139 |
+
|
140 |
+
- prompt: (list of strings)
|
141 |
+
- negative prompt: (list of strings)
|
142 |
+
- input multiple lines of prompt text
|
143 |
+
- we call each line of prompt a stage, usually you need at least 2 lines of text to starts travel
|
144 |
+
- if len(positive_prompts) != len(negative_prompts), the shorter one's last item will be repeated to match the longer one
|
145 |
+
- mode: (categorical)
|
146 |
+
- `linear`: linear interpolation on condition/uncondition of CLIP output
|
147 |
+
- `replace`: gradually replace of CLIP output
|
148 |
+
- replace_dim: (categorical)
|
149 |
+
- `token`: per token-wise vector
|
150 |
+
- `channel`: per channel-wise vector
|
151 |
+
- `random`: per point-wise element
|
152 |
+
- replace_order: (categorical)
|
153 |
+
- `similiar`: from the most similiar first (L1 distance)
|
154 |
+
- `different`: from the most different first
|
155 |
+
- `random`: just randomly
|
156 |
+
- `embryo`: pre-denoise few steps, then hatch a set of image from the common embryo by linear interpolation
|
157 |
+
- steps: (int, list of int)
|
158 |
+
- number of images to interpolate between two stages
|
159 |
+
- if int, constant number of travel steps
|
160 |
+
- if list of int, length should match `len(stages)-1`, separate by comma, e.g.: `12, 24, 36`
|
161 |
+
- genesis: (categorical), the a prior for each image frame
|
162 |
+
- `fixed`: starts from pure noise in txt2img pipeline, or from the same ref-image given in img2img pipeline
|
163 |
+
- `successive`: starts from the last generated image (this will force txt2img turn to actually be img2img from the 2nd frame on)
|
164 |
+
- `embryo`: starts from the same half-denoised image, see [=> How does it work?](https://replicate.com/andreasjansson/stable-diffusion-animation#readme)
|
165 |
+
- (experimental) it only processes 2 lines of prompts, and does not interpolate on negative_prompt :(
|
166 |
+
- genesis_extra_params
|
167 |
+
- denoise_strength: (float), denoise strength in img2img pipelines (for `successive`)
|
168 |
+
- embryo_step: (int or float), steps to hatch the common embryo (for `embryo`)
|
169 |
+
- if >= 1, taken as step cout
|
170 |
+
- if < 1, taken as ratio of total step
|
171 |
+
- video_*
|
172 |
+
- fps: (float), FPS of video, set `0` to disable file saving
|
173 |
+
- fmt: (categorical), export video file format
|
174 |
+
- pad: (int), repeat beginning/ending frames, giving a in/out time
|
175 |
+
- pick: (string), cherry pick frames by [python slice syntax](https://www.pythoncentral.io/how-to-slice-listsarrays-and-tuples-in-python) before padding (e.g.: set `::2` to get only even frames, set `:-1` to drop last frame)
|
176 |
+
|
177 |
+
|
178 |
+
### Installation
|
179 |
+
|
180 |
+
Easiest way to install it is to:
|
181 |
+
1. Go to the "Extensions" tab in the webui, switch to the "Install from URL" tab
|
182 |
+
2. Paste https://github.com/Kahsolt/stable-diffusion-webui-prompt-travel.git into "URL for extension's git repository" and click install
|
183 |
+
3. (Optional) You will need to restart the webui for dependencies to be installed or you won't be able to generate video files
|
184 |
+
|
185 |
+
Manual install:
|
186 |
+
1. Copy this repo folder to the 'extensions' folder of https://github.com/AUTOMATIC1111/stable-diffusion-webui
|
187 |
+
2. (Optional) Restart the webui
|
188 |
+
|
189 |
+
|
190 |
+
### Post-processing pipeline
|
191 |
+
|
192 |
+
There are still two steps away from a really smooth and high resolution animation, namely image **super-resolution** & video **frame interpolation** (see `third-party tools` below).
|
193 |
+
⚠ Media data processing is intrinsic resource-exhausting, and it's also not webui's work or duty, hence we separated it out. 😃
|
194 |
+
|
195 |
+
#### setup once
|
196 |
+
|
197 |
+
⚪ auto install (Windows)
|
198 |
+
|
199 |
+
- run `cd tools & install.cmd`
|
200 |
+
- trouble shooting
|
201 |
+
- if you got any file system access errors like `Access denied.`, try run it again until you see `Done!` without errors 😂
|
202 |
+
- if you got SSL errors about `curl schannel ... Unknown error ... certificate.`, the downloader not work due to some SSL security reasons, just turn to install manually...
|
203 |
+
- you will have four components: [Busybox](https://frippery.org/busybox/), [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan), [RIFE](https://github.com/nihui/rife-ncnn-vulkan) and [FFmpeg](https://ffmpeg.org/) installed under the [tools](tools) folder
|
204 |
+
|
205 |
+
⚪ manually install (Windows/Linux/Mac)
|
206 |
+
|
207 |
+
ℹ Understand the `tools` folder layout first => [tools/README.txt](tools/README.txt)
|
208 |
+
ℹ If you indeed wanna put the tools elsewhere, modify paths in [tools/link.cmd](tools/link.cmd) and run `cd tools & link.cmd` 😉
|
209 |
+
|
210 |
+
For Windows:
|
211 |
+
|
212 |
+
- download [Busybox](https://frippery.org/busybox/)
|
213 |
+
- download [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN/releases) (e.g.: `realesrgan-ncnn-vulkan-20220424-windows.zip`)
|
214 |
+
- (optional) download interesting seperated model checkpoints (e.g.: `realesr-animevideov3.pth`)
|
215 |
+
- download [rife-ncnn-vulkan](https://github.com/nihui/rife-ncnn-vulkan/releases) bundle (e.g.: `rife-ncnn-vulkan-20221029-windows.zip `)
|
216 |
+
- download [FFmpeg](https://ffmpeg.org/download.html) binary (e.g.: `ffmpeg-release-full-shared.7z` or `ffmpeg-git-full.7z`)
|
217 |
+
|
218 |
+
For Linux/Mac:
|
219 |
+
|
220 |
+
- download [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN/releases) and [rife-ncnn-vulkan](https://github.com/nihui/rife-ncnn-vulkan/releases), put them according to the `tools` folder layout, manually apply `chmod 755` to the executables
|
221 |
+
- `ffmpeg` can be easily found in your app store or package manager, run like `apt install ffmpeg`; DO NOT need to link it under `tools` folder
|
222 |
+
|
223 |
+
|
224 |
+
#### run each time
|
225 |
+
|
226 |
+
⚪ tkinter GUI (Windows/Linux/Mac)
|
227 |
+
|
228 |
+

|
229 |
+
|
230 |
+
For Windows:
|
231 |
+
- run `manager.cmd`, to start webui's python venv
|
232 |
+
- run the [DOSKEY](https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/doskey) `install` (only setup once)
|
233 |
+
- run the DOSKEY `run`
|
234 |
+
|
235 |
+
For Linux/Mac:
|
236 |
+
- run `../../venv/Scripts/activate`, to start webui's python venv
|
237 |
+
- run `pip install -r requirements.txt` (only setup once)
|
238 |
+
- run `python manager.py`
|
239 |
+
|
240 |
+
ℹ find usage help message in right click pop menu~
|
241 |
+
|
242 |
+
⚪ <del> cmd script (Windows) - deprecated </del>
|
243 |
+
|
244 |
+
- check params in [postprocess-config.cmd](postprocess-config.cmd)
|
245 |
+
- pick one way to start 😃
|
246 |
+
- run `postprocess.cmd path/to/<image_folder>` from command line
|
247 |
+
- drag & drop any image folder over `postprocess.cmd` icon
|
248 |
+
- once processing finished, the explorer will be auto lauched to locate the generated file named with `synth.mp4`
|
249 |
+
|
250 |
+
|
251 |
+
### Related Projects
|
252 |
+
|
253 |
+
⚪ extensions that inspired this repo
|
254 |
+
|
255 |
+
- sd-webui-controlnet (various image conditions): [https://github.com/Mikubill/sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet)
|
256 |
+
- depth-image-io (custom depth2img): [https://github.com/AnonymousCervine/depth-image-io-for-SDWebui](https://github.com/AnonymousCervine/depth-image-io-for-SDWebui)
|
257 |
+
- animator (img2img): [https://github.com/Animator-Anon/animator_extension](https://github.com/Animator-Anon/animator_extension)
|
258 |
+
- sd-webui-riffusion (music gen): [https://github.com/enlyth/sd-webui-riffusion](https://github.com/enlyth/sd-webui-riffusion)
|
259 |
+
- sd-animation (half denoise + FILM):
|
260 |
+
- Github: [https://github.com/andreasjansson/cog-stable-diffusion](https://github.com/andreasjansson/cog-stable-diffusion)
|
261 |
+
- Replicate: [https://replicate.com/andreasjansson/stable-diffusion-animation](https://replicate.com/andreasjansson/stable-diffusion-animation)
|
262 |
+
- deforum (img2img + depth model): [https://github.com/deforum-art/deforum-for-automatic1111-webui](https://github.com/deforum-art/deforum-for-automatic1111-webui)
|
263 |
+
- seed-travel (varying seed): [https://github.com/yownas/seed_travel](https://github.com/yownas/seed_travel)
|
264 |
+
|
265 |
+
⚪ third-party tools
|
266 |
+
|
267 |
+
- image super-resoultion
|
268 |
+
- ESRGAN:
|
269 |
+
- ESRGAN: [https://github.com/xinntao/ESRGAN](https://github.com/xinntao/ESRGAN)
|
270 |
+
- Real-ESRGAN: [https://github.com/xinntao/Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN)
|
271 |
+
- Real-ESRGAN-ncnn-vulkan (recommended): [https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan)
|
272 |
+
- video frame interpolation
|
273 |
+
- FILM (recommended): [https://github.com/google-research/frame-interpolation](https://github.com/google-research/frame-interpolation)
|
274 |
+
- RIFE:
|
275 |
+
- ECCV2022-RIFE: [https://github.com/megvii-research/ECCV2022-RIFE](https://github.com/megvii-research/ECCV2022-RIFE)
|
276 |
+
- rife-ncnn-vulkan (recommended): [https://github.com/nihui/rife-ncnn-vulkan](https://github.com/nihui/rife-ncnn-vulkan)
|
277 |
+
- Squirrel-RIFE: [https://github.com/Justin62628/Squirrel-RIFE](https://github.com/Justin62628/Squirrel-RIFE)
|
278 |
+
- Practical-RIFE: [https://github.com/hzwer/Practical-RIFE](https://github.com/hzwer/Practical-RIFE)
|
279 |
+
- GNU tool-kits
|
280 |
+
- BusyBox: [https://www.busybox.net/](https://www.busybox.net/)
|
281 |
+
- BusyBox for Windows: [https://frippery.org/busybox/](https://frippery.org/busybox/)
|
282 |
+
- FFmpeg: [https://ffmpeg.org/](https://ffmpeg.org/)
|
283 |
+
|
284 |
+
⚪ my other experimental toy extensions
|
285 |
+
|
286 |
+
- vid2vid (video2video) [https://github.com/Kahsolt/stable-diffusion-webui-vid2vid](https://github.com/Kahsolt/stable-diffusion-webui-vid2vid)
|
287 |
+
- hires-fix-progressive (a progressive version of hires.fix): [https://github.com/Kahsolt/stable-diffusion-webui-hires-fix-progressive](https://github.com/Kahsolt/stable-diffusion-webui-hires-fix-progressive)
|
288 |
+
- sonar (k_diffuison samplers): [https://github.com/Kahsolt/stable-diffusion-webui-sonar](https://github.com/Kahsolt/stable-diffusion-webui-sonar)
|
289 |
+
- size-travel (kind of X-Y plot on image size): [https://github.com/Kahsolt/stable-diffusion-webui-size-travel](https://github.com/Kahsolt/stable-diffusion-webui-size-travel)
|
290 |
+
|
291 |
+
----
|
292 |
+
by Armit
|
293 |
+
2022/11/10
|
README.zh_CN.md
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 提示词跃迁
|
2 |
+
|
3 |
+
在模型隐层旅行以制作伪动画,项目 AUTOMATIC1111/stable-diffusion-webui 的插件。
|
4 |
+
|
5 |
+
----
|
6 |
+
|
7 |
+
对语言理解模型 CLIP 的输出进行插值,从而实现多条提示词之间的语义过渡,产生看似连续的图像序列,或者说伪动画。😀
|
8 |
+
|
9 |
+
⚠ 我们成立了插件反馈 QQ 群: 616795645 (赤狐屿),欢迎出建议、意见、报告bug等 (w
|
10 |
+
|
11 |
+
ℹ 实话不说,我想有可能通过这个来做ppt童话绘本<del>甚至本子</del>……
|
12 |
+
ℹ 聪明的用法:先手工盲搜两张好看的图 (只有提示词差异),然后再尝试在其间跃迁 :lolipop:
|
13 |
+
|
14 |
+
|
15 |
+
### 使用方法 & 它如何工作
|
16 |
+
|
17 |
+
- 在提示词/负向提示词框里输入**多行**文本,每一行被称作一个**阶段**
|
18 |
+
- 逐帧生成图像,在每个阶段内,所使用的提示词向量是经过插值运算的
|
19 |
+
- 为了保证某种连续性,所有其他参数将被固定
|
20 |
+
- 虽然所有图的主随机数种子将被统一固定,但你仍然可以启用 `subseed` 去增加随机性
|
21 |
+
- 导出视频!
|
22 |
+
- 使用额外的 [后处理流程](#post-processing-pipeline) 可以获得更好的画质和流畅度 👌
|
23 |
+
|
24 |
+
|
25 |
+
### 参数选项
|
26 |
+
|
27 |
+
- 提示词: (多行文本)
|
28 |
+
- 反向提示词: (多行文本)
|
29 |
+
- 就是提示词和反向提示词的输入框,但是你必须输入多行文本,每一行是一个阶段
|
30 |
+
- 如果提示词和反向提示词的阶段数量不一致,少的那一方会被重复到对齐多的一方
|
31 |
+
- 插帧数/steps: (整数,或者逗号分隔的多个整数)
|
32 |
+
- 每个阶段之间插帧的数量
|
33 |
+
- 若为单个整数,每个阶段使用相同的插帧数量
|
34 |
+
- 若为西文逗号分隔的多个整数,每个阶段使用不同的插帧数量,比如有4个阶段则可给出3个独立步数:`12, 24, 36`
|
35 |
+
- 起源/genesis: (选项), 每张图像的内容先验
|
36 |
+
- `固定/fixed`: 在 txt2img 流程中,始终从高斯噪声开始降噪;在 img2img 流程中,始终从给定的参考图开始降噪
|
37 |
+
- `连续/successive`: 从上一帧的内容开始降噪 (这会导致 txt2img 流程从第二步开始强制转为 img2img 流程)
|
38 |
+
- `胚胎/embryo`: 从某个已部分降噪的公共先祖胚胎开始降噪,参考 [=> 原理](https://replicate.com/andreasjansson/stable-diffusion-animation#readme)
|
39 |
+
- (该功能为实验性质) 只支持两个阶段跃迁,并且不能为逆向提示词插值 :(
|
40 |
+
- 起源的额外参数
|
41 |
+
- 降噪强度: (浮点数), 在 img2img 流程中所用的降噪强度 (仅对 `连续/successive` 模式)
|
42 |
+
- 胚胎步数: (整数或浮点数), 产生公共胚胎的预降噪步数 (仅对 `胚胎/embryo` 模式)
|
43 |
+
- 如果 >= 1,解释为采样步数
|
44 |
+
- 如果 < 1,解释为占总采样步数的比例
|
45 |
+
- 视频相关
|
46 |
+
- 帧率/fps: (浮点数), 导出视频的帧率,设置为 `0` 将禁用导出
|
47 |
+
- 文件格式/fmt: (选项), 导出视频的文件格式
|
48 |
+
- 首尾填充/pad: (整数), 重复首尾帧 `N` 次以留出一段入场/退场时间
|
49 |
+
- 帧选择器/pick: (切片器), 使用 [Python切片语法](https://www.pythoncentral.io/how-to-slice-listsarrays-and-tuples-in-python) 精心选择所需要导出的帧,注意切片发生在填充之前 (例如:设为 `::2` 将只使用偶数帧 , 设为 `:-1` 将去除最后一帧)
|
50 |
+
- 调试开关: (逻辑值)
|
51 |
+
- 是否在控制台显示详细日志
|
52 |
+
|
53 |
+
|
54 |
+
### 后处理流程
|
55 |
+
|
56 |
+
单凭CLIP模型自身能实现语义插值就已经到达能力天花板了,但我们距离高清丝滑的动画还差两步: **图像超分辨率** 和 **视频插帧**。
|
57 |
+
⚠ 多媒体数据的处理是非常消耗资源的,我们不能指望 webui 去做这件事。实际上,我们将其从宿主和插件中分离,出来作为一个可选的外部工具。 😃
|
58 |
+
|
59 |
+
#### 安装依赖
|
60 |
+
|
61 |
+
⚪ 自动安装
|
62 |
+
|
63 |
+
- 运行 `tools/install.cmd`
|
64 |
+
- 如果遇到诸如 `访问被拒绝` 之类的错误,多次运行直到提示 `Done!` 无错误退出 😂
|
65 |
+
- 你将安装好 [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN-ncnn-vulkan), [RIFE](https://github.com/nihui/rife-ncnn-vulkan), [FFmpeg](https://ffmpeg.org/) 这三个组件在 [tools](tools) 目录下
|
66 |
+
|
67 |
+
⚪ 手动安装
|
68 |
+
|
69 |
+
- 参照 [README.md](README.md#post-processing-pipeline)
|
70 |
+
- 我寻思你既然都想着手动安装了,也不至于不肯咬一口英文罢…… 🤔
|
71 |
+
|
72 |
+
#### 运行任务
|
73 |
+
|
74 |
+
- 检查 [postprocess.cmd](postprocess.cmd) 中的默认参数
|
75 |
+
- 你有两种方式启动后处理任务 😃
|
76 |
+
- 从命令行运行 `postprocess.cmd path/to/<image_folder>`
|
77 |
+
- 鼠标拖拽任意图片文件夹到 `postprocess.cmd` 的文件图标上然后释放
|
78 |
+
|
79 |
+
ℹ 任务完成后,资源浏览器将被自动打开并定位到导出的 `synth.mp4` 文件~
|
80 |
+
|
81 |
+
|
82 |
+
插件直出和加入后处理对比 (配置为 `img2img-fixed-ddim`):
|
83 |
+
|
84 |
+
| 插件直出 | 加入后处理 |
|
85 |
+
| :-: | :-: |
|
86 |
+
|  |  |
|
87 |
+
|
88 |
+
|
89 |
+
----
|
90 |
+
by Armit
|
91 |
+
2023/01/20
|
README_ext.md
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# stable-diffusion-webui-non-prompt-travel (extensions)
|
2 |
+
|
3 |
+
Of course not only prompts! -- You shall also be able to travel through any other conditions. 😀
|
4 |
+
|
5 |
+
----
|
6 |
+
|
7 |
+
### ControlNet-Travel
|
8 |
+
|
9 |
+
Travel through ControlNet's control conditions like canny, depth, openpose, etc...
|
10 |
+
|
11 |
+
⚠ Memory (not VRAM) usage grows linearly with sampling steps, and fusion layers count, this is its nature 😥
|
12 |
+
|
13 |
+
Quickstart instructions:
|
14 |
+
|
15 |
+
- prepare a folder of images, might be frames from a video
|
16 |
+
- check enble `sd-webui-controlnet`, set all parameters as you want, but it's ok to **leave the ref image box empty**
|
17 |
+
- reference images will be read from the image folder given in controlnet-travel :)
|
18 |
+
- find `ControlNet Travel` in the script dropdown, set all parameters again, specify your image folder path here
|
19 |
+
- click Generate button
|
20 |
+
|
21 |
+
Options:
|
22 |
+
|
23 |
+
- interp_meth: (categorical)
|
24 |
+
- `linear`: linear weighted sum, better for area-based annotaions like `depth`, `seg`
|
25 |
+
- `rife`: optical flow model (requires to install postprocess tools first), better for edge-base annotaions like `canny`, `openpose`
|
26 |
+
- skip_latent_fusion: (list of bool), experimental
|
27 |
+
- skip some latent layers fusion for saving memory, but might get wierd results 🤔
|
28 |
+
- ℹ in my experiences, the `mid` and `in` blocks are more safe to skip
|
29 |
+
- save_rife: (bool), save the rife interpolated condtion images
|
30 |
+
|
31 |
+
|
32 |
+
----
|
33 |
+
by Armit
|
34 |
+
2023/04/12
|
img/ctrlnet-depth.gif
ADDED
![]() |
Git LFS Details
|
img/ctrlnet-ref.gif
ADDED
![]() |
Git LFS Details
|
img/embryo.png
ADDED
![]() |
img/i2i-e-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-e-euler_a.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-f-ddim-pp.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-f-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-f-euler_a.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-ref.png
ADDED
![]() |
img/i2i-s-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/i2i-s-euler_a.gif
ADDED
![]() |
Git LFS Details
|
img/manager.png
ADDED
![]() |
img/ref_ctrlnet/0.png
ADDED
![]() |
img/ref_ctrlnet/1.png
ADDED
![]() |
img/t2i-e-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/t2i-e-euler_a.gif
ADDED
![]() |
Git LFS Details
|
img/t2i-f-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/t2i-f-euler_a.gif
ADDED
![]() |
Git LFS Details
|
img/t2i-s-ddim.gif
ADDED
![]() |
Git LFS Details
|
img/t2i-s-euler_a.gif
ADDED
![]() |
Git LFS Details
|
install.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import launch
|
2 |
+
|
3 |
+
if not launch.is_installed("moviepy"):
|
4 |
+
launch.run_pip("install moviepy==1.0.3", "requirements for Prompt Travel to generate video")
|
manager.cmd
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@REM start webui's python venv
|
2 |
+
@ECHO OFF
|
3 |
+
|
4 |
+
SET SD_PATH=%~dp0\..\..
|
5 |
+
PUSHD %SD_PATH%
|
6 |
+
SET SD_PATH=%CD%
|
7 |
+
POPD
|
8 |
+
|
9 |
+
REM SET VENV_PATH=C:\Miniconda3
|
10 |
+
SET VENV_PATH=%SD_PATH%\venv
|
11 |
+
|
12 |
+
SET PATH=%VENV_PATH%\Scripts;%PATH%
|
13 |
+
SET PY_BIN=python.exe
|
14 |
+
|
15 |
+
%PY_BIN% --version > NUL
|
16 |
+
IF ERRORLEVEL 1 GOTO die
|
17 |
+
|
18 |
+
DOSKEY run=python manager.py
|
19 |
+
DOSKEY install=pip install -r requirements.txt
|
20 |
+
|
21 |
+
CMD /K activate.bat ^& ^
|
22 |
+
ECHO VENV_PATH: %VENV_PATH% ^& ^
|
23 |
+
%PY_BIN% --version ^& ^
|
24 |
+
ECHO. ^& ^
|
25 |
+
ECHO Commands shortcuts: ^& ^
|
26 |
+
ECHO run start ptravel manager ^& ^
|
27 |
+
ECHO install install requirements.txt
|
28 |
+
|
29 |
+
|
30 |
+
GOTO EOF
|
31 |
+
|
32 |
+
:die
|
33 |
+
ECHO ERRORLEVEL: %ERRORLEVEL%
|
34 |
+
ECHO PATH: %PATH%
|
35 |
+
ECHO VENV_PATH: %VENV_PATH%
|
36 |
+
ECHO Python executables:
|
37 |
+
WHERE python.exe
|
38 |
+
|
39 |
+
PAUSE
|
40 |
+
|
41 |
+
:EOF
|
manager.py
ADDED
@@ -0,0 +1,513 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env pythonw3
|
2 |
+
# Author: Armit
|
3 |
+
# Create Time: 2023/03/31
|
4 |
+
|
5 |
+
import sys
|
6 |
+
import os
|
7 |
+
import shutil
|
8 |
+
import psutil
|
9 |
+
from pathlib import Path
|
10 |
+
from time import time
|
11 |
+
from PIL import Image
|
12 |
+
from PIL.ImageTk import PhotoImage
|
13 |
+
import subprocess
|
14 |
+
from subprocess import Popen
|
15 |
+
from threading import Thread
|
16 |
+
from typing import Union
|
17 |
+
import gc
|
18 |
+
|
19 |
+
import tkinter as tk
|
20 |
+
import tkinter.ttk as ttk
|
21 |
+
import tkinter.messagebox as tkmsg
|
22 |
+
import tkinter.filedialog as tkfdlg
|
23 |
+
from traceback import print_exc, format_exc
|
24 |
+
|
25 |
+
__version__ = '0.1'
|
26 |
+
|
27 |
+
BASE_PATH = Path(__file__).absolute().parent
|
28 |
+
WEBUI_PATH = BASE_PATH.parent.parent
|
29 |
+
OUTPUT_PATH = WEBUI_PATH / 'outputs'
|
30 |
+
DEFAULT_OUTPUT_PATH = OUTPUT_PATH / 'txt2img-images' / 'prompt_travel'
|
31 |
+
|
32 |
+
TOOL_PATH = BASE_PATH / 'tools'
|
33 |
+
paths_ext = []
|
34 |
+
paths_ext.append(str(TOOL_PATH))
|
35 |
+
paths_ext.append(str(TOOL_PATH / 'realesrgan-ncnn-vulkan'))
|
36 |
+
paths_ext.append(str(TOOL_PATH / 'rife-ncnn-vulkan'))
|
37 |
+
paths_ext.append(str(TOOL_PATH / 'ffmpeg'))
|
38 |
+
os.environ['PATH'] += os.path.pathsep + os.path.pathsep.join(paths_ext)
|
39 |
+
|
40 |
+
RESR_MODELS = {
|
41 |
+
'realesr-animevideov3': [2, 3, 4],
|
42 |
+
'realesrgan-x4plus-anime': [4],
|
43 |
+
'realesrgan-x4plus': [4],
|
44 |
+
}
|
45 |
+
RIFE_MODELS = [
|
46 |
+
'rife',
|
47 |
+
'rife-anime',
|
48 |
+
'rife-HD',
|
49 |
+
'rife-UHD',
|
50 |
+
'rife-v2',
|
51 |
+
'rife-v2.3',
|
52 |
+
'rife-v2.4',
|
53 |
+
'rife-v3.0',
|
54 |
+
'rife-v3.1',
|
55 |
+
'rife-v4',
|
56 |
+
'rife-v4.6',
|
57 |
+
]
|
58 |
+
EXPORT_FMT = [
|
59 |
+
'mp4',
|
60 |
+
'gif',
|
61 |
+
'webm',
|
62 |
+
]
|
63 |
+
|
64 |
+
def sanitize_pathname(path: Union[str, Path]) -> str:
|
65 |
+
if isinstance(path, Path): path = str(path)
|
66 |
+
return path.replace('\\', os.path.sep)
|
67 |
+
|
68 |
+
def startfile(path:Union[str, Path]):
|
69 |
+
# ref: https://stackoverflow.com/questions/17317219/is-there-an-platform-independent-equivalent-of-os-startfile/17317468#17317468
|
70 |
+
if isinstance(path, Path): path = str(path)
|
71 |
+
if sys.platform == 'win32':
|
72 |
+
os.startfile(path)
|
73 |
+
else:
|
74 |
+
opener = "open" if sys.platform == "darwin" else "xdg-open"
|
75 |
+
subprocess.call([opener, path])
|
76 |
+
|
77 |
+
def run_cmd(cmd:str) -> bool:
|
78 |
+
try:
|
79 |
+
print(f'[exec] {cmd}')
|
80 |
+
Popen(cmd, shell=True, encoding='utf-8').wait()
|
81 |
+
return True
|
82 |
+
except:
|
83 |
+
return False
|
84 |
+
|
85 |
+
def run_resr(model:str, ratio:int, in_dp:Path, out_dp:Path) -> bool:
|
86 |
+
if out_dp.exists(): shutil.rmtree(str(out_dp))
|
87 |
+
out_dp.mkdir(exist_ok=True)
|
88 |
+
|
89 |
+
if model == 'realesr-animevideov3': model = f'realesr-animevideov3-x{ratio}'
|
90 |
+
safe_out_dp = sanitize_pathname(out_dp)
|
91 |
+
ok = run_cmd(f'realesrgan-ncnn-vulkan -v -s {ratio} -n {model} -i "{sanitize_pathname(in_dp)}" -o "{safe_out_dp}"')
|
92 |
+
|
93 |
+
# NOTE: fix case of Embryo mode
|
94 |
+
embryo_fp: Path = out_dp / 'embryo.png'
|
95 |
+
if embryo_fp.exists(): embryo_fp.unlink()
|
96 |
+
|
97 |
+
return ok
|
98 |
+
|
99 |
+
def run_rife(model:str, interp:int, in_dp:Path, out_dp:Path) -> bool:
|
100 |
+
if out_dp.exists(): shutil.rmtree(str(out_dp))
|
101 |
+
out_dp.mkdir(exist_ok=True)
|
102 |
+
|
103 |
+
if model == 'rife-v4':
|
104 |
+
if interp > 0: interp *= len(list(in_dp.iterdir()))
|
105 |
+
return run_cmd(f'rife-ncnn-vulkan -v -n {interp} -m {model} -i "{sanitize_pathname(in_dp)}" -o "{sanitize_pathname(out_dp)}"')
|
106 |
+
else:
|
107 |
+
return run_cmd(f'rife-ncnn-vulkan -v -m {model} -i "{sanitize_pathname(in_dp)}" -o "{sanitize_pathname(out_dp)}"')
|
108 |
+
|
109 |
+
def run_ffmpeg(fps:float, fmt:str, in_dp:Path, out_dp:Path) -> bool:
|
110 |
+
out_fp = out_dp / f'synth.{fmt}'
|
111 |
+
if out_fp.exists(): out_fp.unlink()
|
112 |
+
|
113 |
+
if fmt == 'gif':
|
114 |
+
return run_cmd(f'ffmpeg -y -framerate {fps} -i "{sanitize_pathname(in_dp / r"%08d.png")}" "{sanitize_pathname(out_fp)}"')
|
115 |
+
if fmt == 'mp4':
|
116 |
+
return run_cmd(f'ffmpeg -y -framerate {fps} -i "{sanitize_pathname(in_dp / r"%08d.png")}" -crf 30 -c:v libx264 -pix_fmt yuv420p "{sanitize_pathname(out_fp)}"')
|
117 |
+
if fmt == 'webm':
|
118 |
+
# -c:v libvpx/libvpx-vp9/libaom-av1 (VP8/VP9/AV1)
|
119 |
+
# -b:v 0/1M
|
120 |
+
# -crf 15~30
|
121 |
+
return run_cmd(f'ffmpeg -y -framerate {fps} -i "{sanitize_pathname(in_dp / r"%08d.png")}" -crf 30 -c:v libvpx-vp9 -pix_fmt yuv420p "{sanitize_pathname(out_fp)}"')
|
122 |
+
|
123 |
+
|
124 |
+
WINDOW_TITLE = f'Prompt Travel Manager v{__version__}'
|
125 |
+
WINDOW_SIZE = (710, 660)
|
126 |
+
IMAGE_SIZE = 512
|
127 |
+
LIST_HEIGHT = 100
|
128 |
+
COMBOX_WIDTH = 18
|
129 |
+
COMBOX_WIDTH1 = 4
|
130 |
+
ENTRY_WIDTH = 7
|
131 |
+
MEMINFO_REFRESH = 16 # refresh status memory info every k-image loads
|
132 |
+
|
133 |
+
HELP_INFO = '''
|
134 |
+
[Settings]
|
135 |
+
resr: model_name, upscale_ratio
|
136 |
+
- only realesr-animevideov3 supports custom upscale_ratio
|
137 |
+
- others are forced x4
|
138 |
+
rife: model_name, interp_ratio (NOT frame count!!)
|
139 |
+
- only rife-v4 supports custom interp_ratio
|
140 |
+
- others are forced x2
|
141 |
+
ffmpeg: export_format, export_fps
|
142 |
+
|
143 |
+
The checkboxes are enable switches specifying to run or not :)
|
144 |
+
'''
|
145 |
+
|
146 |
+
|
147 |
+
class App:
|
148 |
+
|
149 |
+
def __init__(self):
|
150 |
+
self.setup_gui()
|
151 |
+
|
152 |
+
self.is_running = False
|
153 |
+
self.cur_name = None # str, current travel id
|
154 |
+
self.cache = {} # { 'name': [Image|Path] }
|
155 |
+
|
156 |
+
self.p = psutil.Process(os.getpid())
|
157 |
+
self.cnt_pv_load = 0
|
158 |
+
|
159 |
+
if DEFAULT_OUTPUT_PATH.exists():
|
160 |
+
self.open_(DEFAULT_OUTPUT_PATH)
|
161 |
+
self.var_status.set(self._mem_info_str())
|
162 |
+
|
163 |
+
try:
|
164 |
+
self.wnd.mainloop()
|
165 |
+
except KeyboardInterrupt:
|
166 |
+
self.wnd.quit()
|
167 |
+
except: print_exc()
|
168 |
+
|
169 |
+
def setup_gui(self):
|
170 |
+
# window
|
171 |
+
wnd = tk.Tk()
|
172 |
+
W, H = wnd.winfo_screenwidth(), wnd.winfo_screenheight()
|
173 |
+
w, h = WINDOW_SIZE
|
174 |
+
wnd.geometry(f'{w}x{h}+{(W-w)//2}+{(H-h)//2}')
|
175 |
+
wnd.resizable(False, False)
|
176 |
+
wnd.title(WINDOW_TITLE)
|
177 |
+
wnd.protocol('WM_DELETE_WINDOW', wnd.quit)
|
178 |
+
self.wnd = wnd
|
179 |
+
|
180 |
+
# menu
|
181 |
+
menu = tk.Menu(wnd, tearoff=0)
|
182 |
+
menu.add_command(label='Open folder...', command=self._menu_open_dir)
|
183 |
+
menu.add_command(label='Remove folder', command=self._menu_remove_dir)
|
184 |
+
menu.add_separator()
|
185 |
+
menu.add_command(label='Memory cache clean', command=self.mem_clear)
|
186 |
+
menu.add_command(label='Help', command=lambda: tkmsg.showinfo('Help', HELP_INFO))
|
187 |
+
def menu_show(evt):
|
188 |
+
try: menu.tk_popup(evt.x_root, evt.y_root)
|
189 |
+
finally: menu.grab_release()
|
190 |
+
|
191 |
+
# top: travel folder
|
192 |
+
frm1 = ttk.LabelFrame(wnd, text='Travel root folder')
|
193 |
+
frm1.pack(side=tk.TOP, anchor=tk.N, expand=tk.YES, fill=tk.X)
|
194 |
+
if True:
|
195 |
+
self.var_root_dp = tk.StringVar(wnd)
|
196 |
+
tk.Entry(frm1, textvariable=self.var_root_dp).pack(side=tk.LEFT, expand=tk.YES, fill=tk.X)
|
197 |
+
tk.Button(frm1, text='Open..', command=self.open_).pack(side=tk.RIGHT)
|
198 |
+
tk.Button(frm1, text='Refresh', command=lambda: self.open_(refresh=True)).pack(side=tk.RIGHT)
|
199 |
+
|
200 |
+
# bottom status
|
201 |
+
# NOTE: do not know why the display order is messy...
|
202 |
+
frm3 = ttk.Label(wnd)
|
203 |
+
frm3.pack(side=tk.BOTTOM, anchor=tk.S, expand=tk.YES, fill=tk.X)
|
204 |
+
if True:
|
205 |
+
self.var_status = tk.StringVar(wnd)
|
206 |
+
tk.Label(frm3, textvariable=self.var_status).pack(anchor=tk.W)
|
207 |
+
|
208 |
+
# middel: plot
|
209 |
+
frm2 = ttk.Frame(wnd)
|
210 |
+
frm2.pack(expand=tk.YES, fill=tk.BOTH)
|
211 |
+
if True:
|
212 |
+
# left: control
|
213 |
+
frm21 = ttk.Frame(frm2)
|
214 |
+
frm21.pack(side=tk.LEFT, expand=tk.YES, fill=tk.BOTH)
|
215 |
+
if True:
|
216 |
+
# top: action
|
217 |
+
frm211 = ttk.Frame(frm21)
|
218 |
+
frm211.pack(side=tk.TOP, expand=tk.YES, fill=tk.X)
|
219 |
+
if True:
|
220 |
+
self.var_resr = tk.BooleanVar(wnd, True)
|
221 |
+
self.var_resr_m = tk.StringVar(wnd, 'realesr-animevideov3')
|
222 |
+
self.var_resr_r = tk.IntVar(wnd, 2)
|
223 |
+
self.var_rife = tk.BooleanVar(wnd, True)
|
224 |
+
self.var_rife_m = tk.StringVar(wnd, 'rife-v4')
|
225 |
+
self.var_rife_r = tk.IntVar(wnd, 2)
|
226 |
+
self.var_ffmpeg = tk.BooleanVar(wnd, True)
|
227 |
+
self.var_ffmpeg_r = tk.IntVar(wnd, 20)
|
228 |
+
self.var_ffmpeg_f = tk.StringVar(wnd, 'mp4')
|
229 |
+
|
230 |
+
frm2111 = ttk.LabelFrame(frm211, text='Real-ESRGAN')
|
231 |
+
frm2111.pack(expand=tk.YES, fill=tk.X)
|
232 |
+
if True:
|
233 |
+
cb_m = ttk.Combobox(frm2111, text='model', values=list(RESR_MODELS.keys()), textvariable=self.var_resr_m, state='readonly', width=COMBOX_WIDTH)
|
234 |
+
cb_r = ttk.Combobox(frm2111, text='ratio', values=[], textvariable=self.var_resr_r, state='readonly', width=COMBOX_WIDTH1)
|
235 |
+
cb_m.grid(row=0, column=0, padx=2)
|
236 |
+
cb_r.grid(row=0, column=1, padx=2)
|
237 |
+
self.cb_resr = cb_r
|
238 |
+
|
239 |
+
def _cb_r_update():
|
240 |
+
values = RESR_MODELS[self.var_resr_m.get()]
|
241 |
+
cb_r.config(values=values)
|
242 |
+
if self.var_resr_r.get() not in values:
|
243 |
+
self.var_resr_r.set(values[0])
|
244 |
+
if len(values) == 1:
|
245 |
+
self.cb_resr.config(state=tk.DISABLED)
|
246 |
+
else:
|
247 |
+
self.cb_resr.config(state=tk.NORMAL)
|
248 |
+
cb_m.bind('<<ComboboxSelected>>', lambda evt: _cb_r_update())
|
249 |
+
_cb_r_update()
|
250 |
+
|
251 |
+
frm2112 = ttk.LabelFrame(frm211, text='RIFE')
|
252 |
+
frm2112.pack(expand=tk.YES, fill=tk.X)
|
253 |
+
if True:
|
254 |
+
cb = ttk.Combobox(frm2112, text='model', values=RIFE_MODELS, textvariable=self.var_rife_m, state='readonly', width=COMBOX_WIDTH)
|
255 |
+
et = ttk.Entry(frm2112, text='ratio', textvariable=self.var_rife_r, width=ENTRY_WIDTH)
|
256 |
+
cb.grid(row=0, column=0, padx=2)
|
257 |
+
et.grid(row=0, column=1, padx=2)
|
258 |
+
self.et_rife = et
|
259 |
+
|
260 |
+
def _et_update():
|
261 |
+
if self.var_rife_m.get() != 'rife-v4':
|
262 |
+
self.var_rife_r.set(2)
|
263 |
+
self.et_rife.config(state=tk.DISABLED)
|
264 |
+
else:
|
265 |
+
self.et_rife.config(state=tk.NORMAL)
|
266 |
+
cb.bind('<<ComboboxSelected>>', lambda evt: _et_update())
|
267 |
+
_et_update()
|
268 |
+
|
269 |
+
frm2113 = ttk.LabelFrame(frm211, text='FFmpeg')
|
270 |
+
frm2113.pack(expand=tk.YES, fill=tk.X)
|
271 |
+
if True:
|
272 |
+
cb = ttk.Combobox(frm2113, text='format', values=EXPORT_FMT, textvariable=self.var_ffmpeg_f, state='readonly', width=COMBOX_WIDTH)
|
273 |
+
et = ttk.Entry(frm2113, text='fps', textvariable=self.var_ffmpeg_r, width=ENTRY_WIDTH)
|
274 |
+
cb.grid(row=0, column=0, padx=2)
|
275 |
+
et.grid(row=0, column=1, padx=2)
|
276 |
+
|
277 |
+
frm2114 = ttk.Frame(frm211)
|
278 |
+
frm2114.pack(expand=tk.YES, fill=tk.X)
|
279 |
+
if True:
|
280 |
+
frm21141 = ttk.Frame(frm2114)
|
281 |
+
frm21141.pack(expand=tk.YES, fill=tk.X)
|
282 |
+
for i in range(3): frm21141.columnconfigure(i, weight=1)
|
283 |
+
if True:
|
284 |
+
ttk.Checkbutton(frm21141, text='resr', variable=self.var_resr) .grid(row=0, column=0, padx=0)
|
285 |
+
ttk.Checkbutton(frm21141, text='rife', variable=self.var_rife) .grid(row=0, column=1, padx=0)
|
286 |
+
ttk.Checkbutton(frm21141, text='ffmpeg', variable=self.var_ffmpeg).grid(row=0, column=2, padx=0)
|
287 |
+
|
288 |
+
btn = ttk.Button(frm2114, text='Run!', command=self.run)
|
289 |
+
btn.pack()
|
290 |
+
self.btn = btn
|
291 |
+
|
292 |
+
frm212 = ttk.LabelFrame(frm21, text='Travels')
|
293 |
+
frm212.pack(expand=tk.YES, fill=tk.BOTH)
|
294 |
+
if True:
|
295 |
+
self.var_ls = tk.StringVar()
|
296 |
+
sc = tk.Scrollbar(frm212, orient=tk.VERTICAL)
|
297 |
+
ls = tk.Listbox(frm212, listvariable=self.var_ls, selectmode=tk.BROWSE, yscrollcommand=sc.set, height=LIST_HEIGHT)
|
298 |
+
ls.bind('<<ListboxSelect>>', lambda evt: self._ls_change())
|
299 |
+
ls.pack(expand=tk.YES, fill=tk.BOTH)
|
300 |
+
sc.config(command=ls.yview)
|
301 |
+
sc.pack(side=tk.RIGHT, anchor=tk.E, expand=tk.YES, fill=tk.Y)
|
302 |
+
ls.bind('<Button-3>', menu_show)
|
303 |
+
self.ls = ls
|
304 |
+
|
305 |
+
# right: pv
|
306 |
+
frm22 = ttk.LabelFrame(frm2, text='Frames')
|
307 |
+
frm22.bind('<MouseWheel>', self._pv_change)
|
308 |
+
frm22.pack(side=tk.RIGHT, expand=tk.YES, fill=tk.BOTH)
|
309 |
+
if True:
|
310 |
+
# top
|
311 |
+
if True:
|
312 |
+
pv = ttk.Label(frm22, image=None)
|
313 |
+
pv.bind('<MouseWheel>', self._pv_change)
|
314 |
+
pv.bind('<Button-3>', menu_show)
|
315 |
+
pv.pack(anchor=tk.CENTER, expand=tk.YES, fill=tk.BOTH)
|
316 |
+
self.pv = pv
|
317 |
+
|
318 |
+
# bottom
|
319 |
+
if True:
|
320 |
+
self.var_fps_ip = tk.IntVar(wnd, 0)
|
321 |
+
sc = tk.Scale(frm22, orient=tk.HORIZONTAL, command=lambda _: self._pv_change(),
|
322 |
+
from_=0, to=9, tickinterval=10, resolution=1, variable=self.var_fps_ip)
|
323 |
+
sc.bind('<MouseWheel>', self._pv_change)
|
324 |
+
sc.pack(anchor=tk.S, expand=tk.YES, fill=tk.X)
|
325 |
+
self.sc = sc
|
326 |
+
|
327 |
+
def _menu_open_dir(self):
|
328 |
+
try: startfile(Path(self.var_root_dp.get()) / self.cur_name)
|
329 |
+
except: print_exc()
|
330 |
+
|
331 |
+
def _menu_remove_dir(self):
|
332 |
+
idx: tuple = self.ls.curselection()
|
333 |
+
if not idx: return
|
334 |
+
name = self.ls.get(idx)
|
335 |
+
if name is None: return
|
336 |
+
|
337 |
+
dp = Path(self.var_root_dp.get()) / name
|
338 |
+
if name in self.cache:
|
339 |
+
cnt = len(self.cache[name])
|
340 |
+
else:
|
341 |
+
cnt = len([fp for fp in dp.iterdir() if fp.suffix.lower() in ['.png', '.jpg', '.jpeg']])
|
342 |
+
|
343 |
+
if not tkmsg.askyesno('Remove', f'Confirm to remove folder "{name}" with {cnt} images?'):
|
344 |
+
return
|
345 |
+
|
346 |
+
try:
|
347 |
+
shutil.rmtree(str(dp))
|
348 |
+
self.ls.delete(idx)
|
349 |
+
except: print_exc()
|
350 |
+
|
351 |
+
def _mem_info_str(self, title='Mem'):
|
352 |
+
mem = self.p.memory_info()
|
353 |
+
return f'[{title}] rss: {mem.rss//2**20:.3f} MB, vms: {mem.vms//2**20:.3f} MB'
|
354 |
+
|
355 |
+
def mem_clear(self):
|
356 |
+
info1 = self._mem_info_str('Before')
|
357 |
+
|
358 |
+
to_del = set(self.cache.keys()) - {self.cur_name}
|
359 |
+
for name in to_del: del self.cache[name]
|
360 |
+
gc.collect()
|
361 |
+
|
362 |
+
info2 = self._mem_info_str('After')
|
363 |
+
tkmsg.showinfo('Meminfo', info1 + '\n' + info2)
|
364 |
+
|
365 |
+
self.cnt_pv_load = 0
|
366 |
+
self.var_status.set(self._mem_info_str())
|
367 |
+
|
368 |
+
def open_(self, root_dp:Path=None, refresh=False):
|
369 |
+
''' Open a new travel root folder '''
|
370 |
+
|
371 |
+
if refresh: root_dp = self.var_root_dp.get()
|
372 |
+
if root_dp is None: root_dp = tkfdlg.askdirectory(initialdir=str(OUTPUT_PATH))
|
373 |
+
if not root_dp: return
|
374 |
+
if not Path(root_dp).exists():
|
375 |
+
tkmsg.showerror('Error', f'invalid path: {root_dp} not exist')
|
376 |
+
return
|
377 |
+
|
378 |
+
self.var_root_dp.set(root_dp)
|
379 |
+
|
380 |
+
dps = sorted([dp for dp in Path(root_dp).iterdir() if dp.is_dir()])
|
381 |
+
if len(dps) == 0: tkmsg.showerror('Error', 'No travels found!\Your root folder should be like <root_folder>/<travel_number>/*.png')
|
382 |
+
|
383 |
+
self.ls.selection_clear(0, tk.END)
|
384 |
+
self.var_ls.set([dp.name for dp in dps])
|
385 |
+
|
386 |
+
self.cache.clear() ; gc.collect()
|
387 |
+
self.ls.select_set(len(dps) - 1)
|
388 |
+
self.ls.yview_scroll(len(dps), 'units')
|
389 |
+
self._ls_change()
|
390 |
+
|
391 |
+
def _ls_change(self):
|
392 |
+
''' Open a new travel id folder '''
|
393 |
+
|
394 |
+
idx: tuple = self.ls.curselection()
|
395 |
+
if not idx: return
|
396 |
+
name = self.ls.get(idx)
|
397 |
+
if name is None: return
|
398 |
+
|
399 |
+
self.cur_name = name
|
400 |
+
if name not in self.cache:
|
401 |
+
dp: Path = Path(self.var_root_dp.get()) / name
|
402 |
+
if dp.exists():
|
403 |
+
self.cache[name] = sorted([fp for fp in dp.iterdir() if fp.suffix.lower() in ['.png', '.jpg', '.jpeg'] and fp.stem != 'embryo'])
|
404 |
+
else:
|
405 |
+
self.ls.delete(idx)
|
406 |
+
|
407 |
+
n_imgs = len(self.cache[name])
|
408 |
+
self.sc.config(to=n_imgs-1)
|
409 |
+
try: self.sc.config(tickinterval=n_imgs // (n_imgs / 10))
|
410 |
+
except: self.sc.config(tickinterval=1)
|
411 |
+
|
412 |
+
self.var_fps_ip.set(0)
|
413 |
+
self._pv_change()
|
414 |
+
|
415 |
+
def _pv_change(self, evt=None):
|
416 |
+
''' Load a travel frame '''
|
417 |
+
|
418 |
+
if not self.cur_name: return
|
419 |
+
|
420 |
+
cache = self.cache[self.cur_name]
|
421 |
+
if not len(cache):
|
422 |
+
tkmsg.showinfo('Info', 'This folder is empty...')
|
423 |
+
return
|
424 |
+
|
425 |
+
idx = self.var_fps_ip.get()
|
426 |
+
if evt is not None:
|
427 |
+
offset = 1 if evt.delta < 0 else -1
|
428 |
+
idx = (idx + offset + len(cache)) % len(cache)
|
429 |
+
self.var_fps_ip.set(idx)
|
430 |
+
|
431 |
+
if isinstance(cache[idx], Path):
|
432 |
+
img = Image.open(cache[idx])
|
433 |
+
img.thumbnail((IMAGE_SIZE, IMAGE_SIZE), Image.LANCZOS)
|
434 |
+
cache[idx] = PhotoImage(img)
|
435 |
+
|
436 |
+
self.cnt_pv_load += 1
|
437 |
+
if self.cnt_pv_load >= MEMINFO_REFRESH:
|
438 |
+
self.cnt_pv_load = 0
|
439 |
+
self.var_status.set(self._mem_info_str())
|
440 |
+
|
441 |
+
img = cache[idx]
|
442 |
+
self.pv.config(image=img)
|
443 |
+
self.pv.image = img
|
444 |
+
|
445 |
+
def run(self):
|
446 |
+
if self.is_running:
|
447 |
+
tkmsg.showerror('Error', 'Another task running at background, please wait before finish...')
|
448 |
+
return
|
449 |
+
|
450 |
+
def run_tasks(*args):
|
451 |
+
(
|
452 |
+
base_dp,
|
453 |
+
var_resr, var_resr_m, var_resr_r,
|
454 |
+
var_rife, var_rife_m, var_rife_r,
|
455 |
+
var_ffmpeg, var_ffmpeg_r, var_ffmpeg_f
|
456 |
+
) = args
|
457 |
+
|
458 |
+
if not (0 <= var_rife_r < 8):
|
459 |
+
tkmsg.showerror('Error', f'rife_ratio is the interp ratio should be safe in range 0 ~ 4, but got {var_rife_r} :(')
|
460 |
+
return
|
461 |
+
if not (1 <= var_ffmpeg_r <= 60):
|
462 |
+
tkmsg.showerror('Error', f'fps should be safe in range 1 ~ 60, but got {var_ffmpeg_r} :(')
|
463 |
+
return
|
464 |
+
|
465 |
+
print('[Task] start') ; t = time()
|
466 |
+
try:
|
467 |
+
self.is_running = True
|
468 |
+
self.btn.config(state=tk.DISABLED, text='Running...')
|
469 |
+
|
470 |
+
if var_resr:
|
471 |
+
assert run_resr(var_resr_m, var_resr_r, base_dp, base_dp / 'resr')
|
472 |
+
|
473 |
+
if var_rife:
|
474 |
+
assert run_rife(var_rife_m, var_rife_r, base_dp / 'resr', base_dp / 'rife')
|
475 |
+
|
476 |
+
if var_ffmpeg:
|
477 |
+
dp: Path = base_dp / 'rife'
|
478 |
+
if dp.exists():
|
479 |
+
assert run_ffmpeg(var_ffmpeg_r, var_ffmpeg_f, base_dp / 'rife', base_dp)
|
480 |
+
else:
|
481 |
+
if tkmsg.askyesno('Warn', 'rife results not found, try synth from resr results?'):
|
482 |
+
assert run_ffmpeg(var_ffmpeg_r, var_ffmpeg_f, base_dp / 'resr', base_dp)
|
483 |
+
|
484 |
+
print(f'[Task] done ({time() - t:3f}s)')
|
485 |
+
r = tkmsg.askyesno('Ok', 'Task done! Open output folder?')
|
486 |
+
if r: startfile(base_dp)
|
487 |
+
except:
|
488 |
+
e = format_exc()
|
489 |
+
print(e)
|
490 |
+
print(f'[Task] faild ({time() - t:3f}s)')
|
491 |
+
tkmsg.showerror('Error', e)
|
492 |
+
finally:
|
493 |
+
self.is_running = False
|
494 |
+
self.btn.config(state=tk.NORMAL, text='Run!')
|
495 |
+
|
496 |
+
args = (
|
497 |
+
Path(self.var_root_dp.get()) / self.cur_name,
|
498 |
+
self.var_resr.get(),
|
499 |
+
self.var_resr_m.get(),
|
500 |
+
self.var_resr_r.get(),
|
501 |
+
self.var_rife.get(),
|
502 |
+
self.var_rife_m.get(),
|
503 |
+
self.var_rife_r.get(),
|
504 |
+
self.var_ffmpeg.get(),
|
505 |
+
self.var_ffmpeg_r.get(),
|
506 |
+
self.var_ffmpeg_f.get(),
|
507 |
+
)
|
508 |
+
Thread(target=run_tasks, args=args, daemon=True).start()
|
509 |
+
print(args)
|
510 |
+
|
511 |
+
|
512 |
+
if __name__ == '__main__':
|
513 |
+
App()
|
postprocess-config.cmd.example
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@REM Configs for postprocess.cmd
|
2 |
+
@ECHO OFF
|
3 |
+
|
4 |
+
REM Real-ESRGAN model ckpt
|
5 |
+
REM string, [realesr-animevideov3, realesrgan-x4plus-anime, realesrgan-x4plus]
|
6 |
+
REM default: realesr-animevideov3
|
7 |
+
SET RESR_MODEL=realesr-animevideov3
|
8 |
+
|
9 |
+
REM image upscale rate
|
10 |
+
REM int, [2, 3, 4]
|
11 |
+
REM default: 2
|
12 |
+
SET RESR_UPSCALE=2
|
13 |
+
|
14 |
+
REM RIFE model ckpt
|
15 |
+
REM string, [rife-v4.6, rife-v4, rife-v2.3, rife-anime, ...]
|
16 |
+
REM default: rife-v4
|
17 |
+
SET RIFE_MODEL=rife-v4
|
18 |
+
|
19 |
+
REM interpolated frame count
|
20 |
+
REM int, 0 means n_images x 2
|
21 |
+
REM default: 0
|
22 |
+
SET RIFE_INTERP=0
|
23 |
+
|
24 |
+
REM rendered video fps, higher value requires more interpolations
|
25 |
+
REM int, 12 ~ 60 should be fine
|
26 |
+
REM default: 20 (to match the default fps of prompt-travel)
|
27 |
+
SET FPS=20
|
28 |
+
|
29 |
+
|
30 |
+
REM time count down before task start (in seconds)
|
31 |
+
REM int, non-negative
|
32 |
+
REM default: 5
|
33 |
+
SET WAIT_BEFORE_START=5
|
34 |
+
|
35 |
+
REM auto launch explorer and locate the generated file when done
|
36 |
+
REM boolean, [0, 1]
|
37 |
+
REM default: 1
|
38 |
+
SET EXPLORER_FLAG=1
|
39 |
+
|
40 |
+
REM clean all cache files when done, saving disk usage
|
41 |
+
REM boolean, [0, 1]
|
42 |
+
REM default: 0
|
43 |
+
SET CLEAN_FLAG=0
|
postprocess.cmd
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@REM Handy script for post-process pipeline
|
2 |
+
@ECHO OFF
|
3 |
+
SETLOCAL
|
4 |
+
|
5 |
+
TITLE Post-processing for prompt-travel...
|
6 |
+
|
7 |
+
REM remeber base path and script name
|
8 |
+
SET _=%~dp0
|
9 |
+
SET $=%~nx0
|
10 |
+
SHIFT
|
11 |
+
|
12 |
+
REM init configs or make default
|
13 |
+
SET CONFIG_FILE=%_%postprocess-config.cmd
|
14 |
+
IF EXIST %CONFIG_FILE% GOTO skip_init_cfg
|
15 |
+
COPY %CONFIG_FILE%.example %CONFIG_FILE%
|
16 |
+
IF ERRORLEVEL 1 GOTO die
|
17 |
+
:skip_init_cfg
|
18 |
+
|
19 |
+
REM load configs
|
20 |
+
CALL %CONFIG_FILE%
|
21 |
+
IF ERRORLEVEL 1 GOTO die
|
22 |
+
|
23 |
+
REM assert required arguments
|
24 |
+
IF /I "%~0"=="-c" (
|
25 |
+
SET CLEAN_FLAG=1
|
26 |
+
SHIFT
|
27 |
+
)
|
28 |
+
SET IMAGE_FOLDER=%~0
|
29 |
+
SHIFT
|
30 |
+
|
31 |
+
REM show help
|
32 |
+
IF NOT EXIST "%IMAGE_FOLDER%" (
|
33 |
+
ECHO Usage: %$% [-c] ^<image_folder^> [upscale] [interp] [fps] [resr_model] [rife_model]
|
34 |
+
ECHO -c clean cache data when done
|
35 |
+
ECHO upscale image upsampling rate ^(default: %RESR_UPSCALE%^)
|
36 |
+
ECHO interp interpolated video frame count ^(default: %RIFE_INTERP%^)
|
37 |
+
ECHO fps rendered video frame rate ^(default: %FPS%^)
|
38 |
+
ECHO resr_model Real-ESRGAN model checkpoint name ^(default: %RESR_MODEL%^)
|
39 |
+
ECHO rife_model RIFE model checkpoint name ^(default: %RIFE_MODEL%^)
|
40 |
+
ECHO.
|
41 |
+
ECHO e.g. %$% D:\images
|
42 |
+
ECHO %$% -c D:\images
|
43 |
+
ECHO %$% D:\images 2 0
|
44 |
+
ECHO %$% D:\images 4 120 24
|
45 |
+
ECHO %$% D:\images 4 0 24 realesr-animevideov3 rife-v2.3
|
46 |
+
ECHO note:
|
47 |
+
ECHO ^<args^> arguments are required
|
48 |
+
ECHO ^[args^] arguments are optional
|
49 |
+
ECHO.
|
50 |
+
GOTO :end
|
51 |
+
)
|
52 |
+
|
53 |
+
REM override optional arguments by command line
|
54 |
+
IF NOT "%~0"=="" (
|
55 |
+
SET RESR_UPSCALE=%~0
|
56 |
+
SHIFT
|
57 |
+
)
|
58 |
+
IF NOT "%~0"=="" (
|
59 |
+
SET RIFE_INTERP=%~0
|
60 |
+
SHIFT
|
61 |
+
)
|
62 |
+
IF NOT "%~0"=="" (
|
63 |
+
SET FPS=%~0
|
64 |
+
SHIFT
|
65 |
+
)
|
66 |
+
IF NOT "%~0"=="" (
|
67 |
+
SET RESR_MODEL=%~0
|
68 |
+
SHIFT
|
69 |
+
)
|
70 |
+
IF NOT "%~0"=="" (
|
71 |
+
SET RIFE_MODEL=%~0
|
72 |
+
SHIFT
|
73 |
+
)
|
74 |
+
|
75 |
+
REM prepare paths
|
76 |
+
SET TOOL_HOME=%_%tools
|
77 |
+
SET RESR_HOME=%TOOL_HOME%\realesrgan-ncnn-vulkan
|
78 |
+
SET RIFE_HOME=%TOOL_HOME%\rife-ncnn-vulkan
|
79 |
+
SET FFMPEG_HOME=%TOOL_HOME%\ffmpeg
|
80 |
+
|
81 |
+
SET BBOX_BIN=busybox.exe
|
82 |
+
SET RESR_BIN=realesrgan-ncnn-vulkan.exe
|
83 |
+
SET RIFE_BIN=rife-ncnn-vulkan.exe
|
84 |
+
SET FFMPEG_BIN=ffmpeg.exe
|
85 |
+
|
86 |
+
PATH %TOOL_HOME%;%PATH%
|
87 |
+
PATH %RESR_HOME%;%PATH%
|
88 |
+
PATH %RIFE_HOME%;%PATH%
|
89 |
+
PATH %FFMPEG_HOME%\bin;%FFMPEG_HOME%;%PATH%
|
90 |
+
|
91 |
+
SET RESR_FOLDER=%IMAGE_FOLDER%\resr
|
92 |
+
SET RIFE_FOLDER=%IMAGE_FOLDER%\rife
|
93 |
+
SET OUT_FILE=%IMAGE_FOLDER%\synth.mp4
|
94 |
+
|
95 |
+
REM show configs for debug
|
96 |
+
ECHO ==================================================
|
97 |
+
ECHO RESR_MODEL = %RESR_MODEL%
|
98 |
+
ECHO RESR_UPSCALE = %RESR_UPSCALE%
|
99 |
+
ECHO RIFE_MODEL = %RIFE_MODEL%
|
100 |
+
ECHO RIFE_INTERP = %RIFE_INTERP%
|
101 |
+
ECHO FPS = %FPS%
|
102 |
+
ECHO RESR_FOLDER = %RESR_FOLDER%
|
103 |
+
ECHO RIFE_FOLDER = %RIFE_FOLDER%
|
104 |
+
ECHO OUT_FILE = %OUT_FILE%
|
105 |
+
ECHO.
|
106 |
+
|
107 |
+
ECHO ^>^> wait for %WAIT_BEFORE_START% seconds before start...
|
108 |
+
%BBOX_BIN% sleep %WAIT_BEFORE_START%
|
109 |
+
IF ERRORLEVEL 1 GOTO die
|
110 |
+
ECHO ^>^> start processing!
|
111 |
+
|
112 |
+
REM start processing
|
113 |
+
ECHO ==================================================
|
114 |
+
|
115 |
+
ECHO [1/3] image super-resolution
|
116 |
+
IF EXIST %RESR_FOLDER% GOTO skip_resr
|
117 |
+
MKDIR %RESR_FOLDER%
|
118 |
+
%RESR_BIN% -v -s %RESR_UPSCALE% -n %RESR_MODEL% -i %IMAGE_FOLDER% -o %RESR_FOLDER%
|
119 |
+
IF ERRORLEVEL 1 GOTO die
|
120 |
+
:skip_resr
|
121 |
+
|
122 |
+
ECHO ==================================================
|
123 |
+
|
124 |
+
ECHO [2/3] video frame-interpolation
|
125 |
+
IF EXIST %RIFE_FOLDER% GOTO skip_rife
|
126 |
+
MKDIR %RIFE_FOLDER%
|
127 |
+
SET NFRAMES=%RESR_FOLDER%
|
128 |
+
|
129 |
+
%RIFE_BIN% -v -n %RIFE_INTERP% -m %RIFE_MODEL% -i %RESR_FOLDER% -o %RIFE_FOLDER%
|
130 |
+
IF ERRORLEVEL 1 GOTO die
|
131 |
+
:skip_rife
|
132 |
+
|
133 |
+
ECHO ==================================================
|
134 |
+
|
135 |
+
ECHO [3/3] render video
|
136 |
+
%FFMPEG_BIN% -y -framerate %FPS% -i %RIFE_FOLDER%\%%08d.png -crf 20 -c:v libx264 -pix_fmt yuv420p %OUT_FILE%
|
137 |
+
IF ERRORLEVEL 1 GOTO die
|
138 |
+
|
139 |
+
ECHO ==================================================
|
140 |
+
|
141 |
+
REM clean cache
|
142 |
+
IF "%CLEAN_FLAG%"=="1" (
|
143 |
+
RMDIR /S /Q %RESR_FOLDER%
|
144 |
+
RMDIR /S /Q %RIFE_FOLDER%
|
145 |
+
)
|
146 |
+
|
147 |
+
REM finished
|
148 |
+
ECHO ^>^> file saved to %OUT_FILE%
|
149 |
+
IF "%EXPLORER_FLAG%"=="1" (
|
150 |
+
explorer.exe /e,/select,%OUT_FILE%
|
151 |
+
)
|
152 |
+
|
153 |
+
ECHO ^>^> Done!
|
154 |
+
ECHO.
|
155 |
+
GOTO :end
|
156 |
+
|
157 |
+
REM error handle
|
158 |
+
:die
|
159 |
+
ECHO ^<^< Error!
|
160 |
+
ECHO ^<^< errorlevel: %ERRORLEVEL%
|
161 |
+
ECHO.
|
162 |
+
|
163 |
+
:end
|
164 |
+
PAUSE
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# webui script
|
2 |
+
moviepy
|
3 |
+
|
4 |
+
# postprocessor (GUI)
|
5 |
+
psutil
|
6 |
+
Pillow
|
scripts/controlnet_travel.py
ADDED
@@ -0,0 +1,913 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This extension works with [Mikubill/sd-webui-controlnet](https://github.com/Mikubill/sd-webui-controlnet)
|
2 |
+
# version: v1.1.229
|
3 |
+
|
4 |
+
LOG_PREFIX = '[ControlNet-Travel]'
|
5 |
+
|
6 |
+
# ↓↓↓ EXIT EARLY IF EXTERNAL REPOSITORY NOT FOUND ↓↓↓
|
7 |
+
|
8 |
+
CTRLNET_REPO_NAME = 'sdcontrol'
|
9 |
+
if 'externel repo sanity check':
|
10 |
+
from pathlib import Path
|
11 |
+
from modules.scripts import basedir
|
12 |
+
from traceback import print_exc
|
13 |
+
|
14 |
+
ME_PATH = Path(basedir())
|
15 |
+
CTRLNET_PATH = ME_PATH.parent / 'sdcontrol'
|
16 |
+
|
17 |
+
controlnet_found = False
|
18 |
+
try:
|
19 |
+
import sys ; sys.path.append(str(CTRLNET_PATH))
|
20 |
+
#from scripts.controlnet import Script as ControlNetScript # NOTE: this will mess up the import order
|
21 |
+
from scripts.external_code import ControlNetUnit
|
22 |
+
from scripts.hook import UNetModel, UnetHook, ControlParams
|
23 |
+
from scripts.hook import *
|
24 |
+
|
25 |
+
controlnet_found = True
|
26 |
+
print(f'{LOG_PREFIX} extension {CTRLNET_REPO_NAME} found, ControlNet-Travel loaded :)')
|
27 |
+
except ImportError:
|
28 |
+
print(f'{LOG_PREFIX} extension {CTRLNET_REPO_NAME} not found, ControlNet-Travel ignored :(')
|
29 |
+
exit(0)
|
30 |
+
except:
|
31 |
+
print_exc()
|
32 |
+
exit(0)
|
33 |
+
|
34 |
+
# ↑↑↑ EXIT EARLY IF EXTERNAL REPOSITORY NOT FOUND ↑↑↑
|
35 |
+
|
36 |
+
|
37 |
+
import sys
|
38 |
+
from PIL import Image
|
39 |
+
|
40 |
+
from ldm.models.diffusion.ddpm import LatentDiffusion
|
41 |
+
from modules import shared, devices, lowvram
|
42 |
+
from modules.processing import StableDiffusionProcessing as Processing
|
43 |
+
|
44 |
+
from scripts.prompt_travel import *
|
45 |
+
from manager import run_cmd
|
46 |
+
|
47 |
+
class InterpMethod(Enum):
|
48 |
+
LINEAR = 'linear (weight sum)'
|
49 |
+
RIFE = 'rife (optical flow)'
|
50 |
+
|
51 |
+
if 'consts':
|
52 |
+
__ = lambda key, value=None: opts.data.get(f'customscript/controlnet_travel.py/txt2img/{key}/value', value)
|
53 |
+
|
54 |
+
|
55 |
+
LABEL_CTRLNET_REF_DIR = 'Reference image folder (one ref image per stage :)'
|
56 |
+
LABEL_INTERP_METH = 'Interpolate method'
|
57 |
+
LABEL_SKIP_FUSE = 'Ext. skip latent fusion'
|
58 |
+
LABEL_DEBUG_RIFE = 'Save RIFE intermediates'
|
59 |
+
|
60 |
+
DEFAULT_STEPS = 10
|
61 |
+
DEFAULT_CTRLNET_REF_DIR = str(ME_PATH / 'img' / 'ref_ctrlnet')
|
62 |
+
DEFAULT_INTERP_METH = __(LABEL_INTERP_METH, InterpMethod.LINEAR.value)
|
63 |
+
DEFAULT_SKIP_FUSE = __(LABEL_SKIP_FUSE, False)
|
64 |
+
DEFAULT_DEBUG_RIFE = __(LABEL_DEBUG_RIFE, False)
|
65 |
+
|
66 |
+
CHOICES_INTERP_METH = [x.value for x in InterpMethod]
|
67 |
+
|
68 |
+
if 'vars':
|
69 |
+
skip_fuse_plan: List[bool] = [] # n_blocks (13)
|
70 |
+
|
71 |
+
interp_alpha: float = 0.0
|
72 |
+
interp_ip: int = 0 # 0 ~ n_sampling_step-1
|
73 |
+
from_hint_cond: List[Tensor] = [] # n_contrlnet_set
|
74 |
+
to_hint_cond: List[Tensor] = []
|
75 |
+
mid_hint_cond: List[Tensor] = []
|
76 |
+
from_control_tensors: List[List[Tensor]] = [] # n_sampling_step x n_blocks
|
77 |
+
to_control_tensors: List[List[Tensor]] = []
|
78 |
+
|
79 |
+
caches: List[list] = [from_hint_cond, to_hint_cond, mid_hint_cond, from_control_tensors, to_control_tensors]
|
80 |
+
|
81 |
+
|
82 |
+
# ↓↓↓ the following is modified from 'sd-webui-controlnet/scripts/hook.py' ↓↓↓
|
83 |
+
|
84 |
+
def hook_hijack(self:UnetHook, model:UNetModel, sd_ldm:LatentDiffusion, control_params:List[ControlParams], process:Processing):
|
85 |
+
self.model = model
|
86 |
+
self.sd_ldm = sd_ldm
|
87 |
+
self.control_params = control_params
|
88 |
+
|
89 |
+
outer = self
|
90 |
+
|
91 |
+
def process_sample(*args, **kwargs):
|
92 |
+
# ControlNet must know whether a prompt is conditional prompt (positive prompt) or unconditional conditioning prompt (negative prompt).
|
93 |
+
# You can use the hook.py's `mark_prompt_context` to mark the prompts that will be seen by ControlNet.
|
94 |
+
# Let us say XXX is a MulticondLearnedConditioning or a ComposableScheduledPromptConditioning or a ScheduledPromptConditioning or a list of these components,
|
95 |
+
# if XXX is a positive prompt, you should call mark_prompt_context(XXX, positive=True)
|
96 |
+
# if XXX is a negative prompt, you should call mark_prompt_context(XXX, positive=False)
|
97 |
+
# After you mark the prompts, the ControlNet will know which prompt is cond/uncond and works as expected.
|
98 |
+
# After you mark the prompts, the mismatch errors will disappear.
|
99 |
+
mark_prompt_context(kwargs.get('conditioning', []), positive=True)
|
100 |
+
mark_prompt_context(kwargs.get('unconditional_conditioning', []), positive=False)
|
101 |
+
mark_prompt_context(getattr(process, 'hr_c', []), positive=True)
|
102 |
+
mark_prompt_context(getattr(process, 'hr_uc', []), positive=False)
|
103 |
+
return process.sample_before_CN_hack(*args, **kwargs)
|
104 |
+
|
105 |
+
# NOTE: ↓↓↓ only hack this method ↓↓↓
|
106 |
+
def forward(self:UNetModel, x:Tensor, timesteps:Tensor=None, context:Tensor=None, **kwargs):
|
107 |
+
total_controlnet_embedding = [0.0] * 13
|
108 |
+
total_t2i_adapter_embedding = [0.0] * 4
|
109 |
+
require_inpaint_hijack = False
|
110 |
+
is_in_high_res_fix = False
|
111 |
+
batch_size = int(x.shape[0])
|
112 |
+
|
113 |
+
# NOTE: declare globals
|
114 |
+
global from_hint_cond, to_hint_cond, from_control_tensors, to_control_tensors, mid_hint_cond, interp_alpha, interp_ip
|
115 |
+
x: Tensor # [1, 4, 64, 64]
|
116 |
+
timesteps: Tensor # [1]
|
117 |
+
context: Tensor # [1, 78, 768]
|
118 |
+
kwargs: dict # {}
|
119 |
+
|
120 |
+
# Handle cond-uncond marker
|
121 |
+
cond_mark, outer.current_uc_indices, context = unmark_prompt_context(context)
|
122 |
+
# logger.info(str(cond_mark[:, 0, 0, 0].detach().cpu().numpy().tolist()) + ' - ' + str(outer.current_uc_indices))
|
123 |
+
|
124 |
+
# High-res fix
|
125 |
+
for param in outer.control_params:
|
126 |
+
# select which hint_cond to use
|
127 |
+
if param.used_hint_cond is None:
|
128 |
+
param.used_hint_cond = param.hint_cond # NOTE: input hint cond tensor, [1, 3, 512, 512]
|
129 |
+
param.used_hint_cond_latent = None
|
130 |
+
param.used_hint_inpaint_hijack = None
|
131 |
+
|
132 |
+
# has high-res fix
|
133 |
+
if param.hr_hint_cond is not None and x.ndim == 4 and param.hint_cond.ndim == 4 and param.hr_hint_cond.ndim == 4:
|
134 |
+
_, _, h_lr, w_lr = param.hint_cond.shape
|
135 |
+
_, _, h_hr, w_hr = param.hr_hint_cond.shape
|
136 |
+
_, _, h, w = x.shape
|
137 |
+
h, w = h * 8, w * 8
|
138 |
+
if abs(h - h_lr) < abs(h - h_hr):
|
139 |
+
is_in_high_res_fix = False
|
140 |
+
if param.used_hint_cond is not param.hint_cond:
|
141 |
+
param.used_hint_cond = param.hint_cond
|
142 |
+
param.used_hint_cond_latent = None
|
143 |
+
param.used_hint_inpaint_hijack = None
|
144 |
+
else:
|
145 |
+
is_in_high_res_fix = True
|
146 |
+
if param.used_hint_cond is not param.hr_hint_cond:
|
147 |
+
param.used_hint_cond = param.hr_hint_cond
|
148 |
+
param.used_hint_cond_latent = None
|
149 |
+
param.used_hint_inpaint_hijack = None
|
150 |
+
|
151 |
+
# NOTE: hint shallow fusion, overwrite param.used_hint_cond
|
152 |
+
for i, param in enumerate(outer.control_params):
|
153 |
+
if interp_alpha == 0.0: # collect hind_cond on key frames
|
154 |
+
if len(to_hint_cond) < len(outer.control_params):
|
155 |
+
to_hint_cond.append(param.used_hint_cond.clone().detach().cpu())
|
156 |
+
else: # interp with cached hind_cond
|
157 |
+
param.used_hint_cond = mid_hint_cond[i].to(x.device)
|
158 |
+
|
159 |
+
# Convert control image to latent
|
160 |
+
for param in outer.control_params:
|
161 |
+
if param.used_hint_cond_latent is not None:
|
162 |
+
continue
|
163 |
+
if param.control_model_type not in [ControlModelType.AttentionInjection] \
|
164 |
+
and 'colorfix' not in param.preprocessor['name'] \
|
165 |
+
and 'inpaint_only' not in param.preprocessor['name']:
|
166 |
+
continue
|
167 |
+
param.used_hint_cond_latent = outer.call_vae_using_process(process, param.used_hint_cond, batch_size=batch_size)
|
168 |
+
|
169 |
+
# handle prompt token control
|
170 |
+
for param in outer.control_params:
|
171 |
+
if param.guidance_stopped:
|
172 |
+
continue
|
173 |
+
|
174 |
+
if param.control_model_type not in [ControlModelType.T2I_StyleAdapter]:
|
175 |
+
continue
|
176 |
+
|
177 |
+
param.control_model.to(devices.get_device_for("controlnet"))
|
178 |
+
control = param.control_model(x=x, hint=param.used_hint_cond, timesteps=timesteps, context=context)
|
179 |
+
control = torch.cat([control.clone() for _ in range(batch_size)], dim=0)
|
180 |
+
control *= param.weight
|
181 |
+
control *= cond_mark[:, :, :, 0]
|
182 |
+
context = torch.cat([context, control.clone()], dim=1)
|
183 |
+
|
184 |
+
# handle ControlNet / T2I_Adapter
|
185 |
+
for param in outer.control_params:
|
186 |
+
if param.guidance_stopped:
|
187 |
+
continue
|
188 |
+
|
189 |
+
if param.control_model_type not in [ControlModelType.ControlNet, ControlModelType.T2I_Adapter]:
|
190 |
+
continue
|
191 |
+
|
192 |
+
param.control_model.to(devices.get_device_for("controlnet"))
|
193 |
+
# inpaint model workaround
|
194 |
+
x_in = x
|
195 |
+
control_model = param.control_model.control_model
|
196 |
+
|
197 |
+
if param.control_model_type == ControlModelType.ControlNet:
|
198 |
+
if x.shape[1] != control_model.input_blocks[0][0].in_channels and x.shape[1] == 9:
|
199 |
+
# inpaint_model: 4 data + 4 downscaled image + 1 mask
|
200 |
+
x_in = x[:, :4, ...]
|
201 |
+
require_inpaint_hijack = True
|
202 |
+
|
203 |
+
assert param.used_hint_cond is not None, f"Controlnet is enabled but no input image is given"
|
204 |
+
|
205 |
+
hint = param.used_hint_cond
|
206 |
+
|
207 |
+
# ControlNet inpaint protocol
|
208 |
+
if hint.shape[1] == 4:
|
209 |
+
c = hint[:, 0:3, :, :]
|
210 |
+
m = hint[:, 3:4, :, :]
|
211 |
+
m = (m > 0.5).float()
|
212 |
+
hint = c * (1 - m) - m
|
213 |
+
|
214 |
+
# NOTE: len(control) == 13, control[i]:Tensor
|
215 |
+
control = param.control_model(x=x_in, hint=hint, timesteps=timesteps, context=context)
|
216 |
+
control_scales = ([param.weight] * 13)
|
217 |
+
|
218 |
+
if outer.lowvram:
|
219 |
+
param.control_model.to("cpu")
|
220 |
+
|
221 |
+
if param.cfg_injection or param.global_average_pooling:
|
222 |
+
if param.control_model_type == ControlModelType.T2I_Adapter:
|
223 |
+
control = [torch.cat([c.clone() for _ in range(batch_size)], dim=0) for c in control]
|
224 |
+
control = [c * cond_mark for c in control]
|
225 |
+
|
226 |
+
high_res_fix_forced_soft_injection = False
|
227 |
+
|
228 |
+
if is_in_high_res_fix:
|
229 |
+
if 'canny' in param.preprocessor['name']:
|
230 |
+
high_res_fix_forced_soft_injection = True
|
231 |
+
if 'mlsd' in param.preprocessor['name']:
|
232 |
+
high_res_fix_forced_soft_injection = True
|
233 |
+
|
234 |
+
# if high_res_fix_forced_soft_injection:
|
235 |
+
# logger.info('[ControlNet] Forced soft_injection in high_res_fix in enabled.')
|
236 |
+
|
237 |
+
if param.soft_injection or high_res_fix_forced_soft_injection:
|
238 |
+
# important! use the soft weights with high-res fix can significantly reduce artifacts.
|
239 |
+
if param.control_model_type == ControlModelType.T2I_Adapter:
|
240 |
+
control_scales = [param.weight * x for x in (0.25, 0.62, 0.825, 1.0)]
|
241 |
+
elif param.control_model_type == ControlModelType.ControlNet:
|
242 |
+
control_scales = [param.weight * (0.825 ** float(12 - i)) for i in range(13)]
|
243 |
+
|
244 |
+
if param.advanced_weighting is not None:
|
245 |
+
control_scales = param.advanced_weighting
|
246 |
+
|
247 |
+
control = [c * scale for c, scale in zip(control, control_scales)]
|
248 |
+
if param.global_average_pooling:
|
249 |
+
control = [torch.mean(c, dim=(2, 3), keepdim=True) for c in control]
|
250 |
+
|
251 |
+
for idx, item in enumerate(control):
|
252 |
+
target = None
|
253 |
+
if param.control_model_type == ControlModelType.ControlNet:
|
254 |
+
target = total_controlnet_embedding
|
255 |
+
if param.control_model_type == ControlModelType.T2I_Adapter:
|
256 |
+
target = total_t2i_adapter_embedding
|
257 |
+
if target is not None:
|
258 |
+
target[idx] = item + target[idx]
|
259 |
+
|
260 |
+
# Replace x_t to support inpaint models
|
261 |
+
for param in outer.control_params:
|
262 |
+
if param.used_hint_cond.shape[1] != 4:
|
263 |
+
continue
|
264 |
+
if x.shape[1] != 9:
|
265 |
+
continue
|
266 |
+
if param.used_hint_inpaint_hijack is None:
|
267 |
+
mask_pixel = param.used_hint_cond[:, 3:4, :, :]
|
268 |
+
image_pixel = param.used_hint_cond[:, 0:3, :, :]
|
269 |
+
mask_pixel = (mask_pixel > 0.5).to(mask_pixel.dtype)
|
270 |
+
masked_latent = outer.call_vae_using_process(process, image_pixel, batch_size, mask=mask_pixel)
|
271 |
+
mask_latent = torch.nn.functional.max_pool2d(mask_pixel, (8, 8))
|
272 |
+
if mask_latent.shape[0] != batch_size:
|
273 |
+
mask_latent = torch.cat([mask_latent.clone() for _ in range(batch_size)], dim=0)
|
274 |
+
param.used_hint_inpaint_hijack = torch.cat([mask_latent, masked_latent], dim=1)
|
275 |
+
param.used_hint_inpaint_hijack.to(x.dtype).to(x.device)
|
276 |
+
x = torch.cat([x[:, :4, :, :], param.used_hint_inpaint_hijack], dim=1)
|
277 |
+
|
278 |
+
# A1111 fix for medvram.
|
279 |
+
if shared.cmd_opts.medvram:
|
280 |
+
try:
|
281 |
+
# Trigger the register_forward_pre_hook
|
282 |
+
outer.sd_ldm.model()
|
283 |
+
except:
|
284 |
+
pass
|
285 |
+
|
286 |
+
# Clear attention and AdaIn cache
|
287 |
+
for module in outer.attn_module_list:
|
288 |
+
module.bank = []
|
289 |
+
module.style_cfgs = []
|
290 |
+
for module in outer.gn_module_list:
|
291 |
+
module.mean_bank = []
|
292 |
+
module.var_bank = []
|
293 |
+
module.style_cfgs = []
|
294 |
+
|
295 |
+
# Handle attention and AdaIn control
|
296 |
+
for param in outer.control_params:
|
297 |
+
if param.guidance_stopped:
|
298 |
+
continue
|
299 |
+
|
300 |
+
if param.used_hint_cond_latent is None:
|
301 |
+
continue
|
302 |
+
|
303 |
+
if param.control_model_type not in [ControlModelType.AttentionInjection]:
|
304 |
+
continue
|
305 |
+
|
306 |
+
ref_xt = outer.sd_ldm.q_sample(param.used_hint_cond_latent, torch.round(timesteps.float()).long())
|
307 |
+
|
308 |
+
# Inpaint Hijack
|
309 |
+
if x.shape[1] == 9:
|
310 |
+
ref_xt = torch.cat([
|
311 |
+
ref_xt,
|
312 |
+
torch.zeros_like(ref_xt)[:, 0:1, :, :],
|
313 |
+
param.used_hint_cond_latent
|
314 |
+
], dim=1)
|
315 |
+
|
316 |
+
outer.current_style_fidelity = float(param.preprocessor['threshold_a'])
|
317 |
+
outer.current_style_fidelity = max(0.0, min(1.0, outer.current_style_fidelity))
|
318 |
+
|
319 |
+
if param.cfg_injection:
|
320 |
+
outer.current_style_fidelity = 1.0
|
321 |
+
elif param.soft_injection or is_in_high_res_fix:
|
322 |
+
outer.current_style_fidelity = 0.0
|
323 |
+
|
324 |
+
control_name = param.preprocessor['name']
|
325 |
+
|
326 |
+
if control_name in ['reference_only', 'reference_adain+attn']:
|
327 |
+
outer.attention_auto_machine = AutoMachine.Write
|
328 |
+
outer.attention_auto_machine_weight = param.weight
|
329 |
+
|
330 |
+
if control_name in ['reference_adain', 'reference_adain+attn']:
|
331 |
+
outer.gn_auto_machine = AutoMachine.Write
|
332 |
+
outer.gn_auto_machine_weight = param.weight
|
333 |
+
|
334 |
+
outer.original_forward(
|
335 |
+
x=ref_xt.to(devices.dtype_unet),
|
336 |
+
timesteps=timesteps.to(devices.dtype_unet),
|
337 |
+
context=context.to(devices.dtype_unet)
|
338 |
+
)
|
339 |
+
|
340 |
+
outer.attention_auto_machine = AutoMachine.Read
|
341 |
+
outer.gn_auto_machine = AutoMachine.Read
|
342 |
+
|
343 |
+
# NOTE: hint latent fusion, overwrite control tensors
|
344 |
+
total_control = total_controlnet_embedding
|
345 |
+
if interp_alpha == 0.0: # collect control tensors on key frames
|
346 |
+
tensors: List[Tensor] = []
|
347 |
+
for i, t in enumerate(total_control):
|
348 |
+
if len(skip_fuse_plan) and skip_fuse_plan[i]:
|
349 |
+
tensors.append(None)
|
350 |
+
else:
|
351 |
+
tensors.append(t.clone().detach().cpu())
|
352 |
+
to_control_tensors.append(tensors)
|
353 |
+
else: # interp with cached control tensors
|
354 |
+
device = total_control[0].device
|
355 |
+
for i, (ctrlA, ctrlB) in enumerate(zip(from_control_tensors[interp_ip], to_control_tensors[interp_ip])):
|
356 |
+
if ctrlA is not None and ctrlB is not None:
|
357 |
+
ctrlC = weighted_sum(ctrlA.to(device), ctrlB.to(device), interp_alpha)
|
358 |
+
#print(' ctrl diff:', (ctrlC - total_control[i]).abs().mean().item())
|
359 |
+
total_control[i].data = ctrlC
|
360 |
+
interp_ip += 1
|
361 |
+
|
362 |
+
# NOTE: warn on T2I adapter
|
363 |
+
if total_t2i_adapter_embedding[0] != 0:
|
364 |
+
print(f'{LOG_PREFIX} warn: currently t2i_adapter is not supported. if you wanna this, put a feature request on Kahsolt/stable-diffusion-webui-prompt-travel')
|
365 |
+
|
366 |
+
# U-Net Encoder
|
367 |
+
hs = []
|
368 |
+
with th.no_grad():
|
369 |
+
t_emb = cond_cast_unet(timestep_embedding(timesteps, self.model_channels, repeat_only=False))
|
370 |
+
emb = self.time_embed(t_emb)
|
371 |
+
h = x.type(self.dtype)
|
372 |
+
for i, module in enumerate(self.input_blocks):
|
373 |
+
h = module(h, emb, context)
|
374 |
+
|
375 |
+
if (i + 1) % 3 == 0:
|
376 |
+
h = aligned_adding(h, total_t2i_adapter_embedding.pop(0), require_inpaint_hijack)
|
377 |
+
|
378 |
+
hs.append(h)
|
379 |
+
h = self.middle_block(h, emb, context)
|
380 |
+
|
381 |
+
# U-Net Middle Block
|
382 |
+
h = aligned_adding(h, total_controlnet_embedding.pop(), require_inpaint_hijack)
|
383 |
+
|
384 |
+
# U-Net Decoder
|
385 |
+
for i, module in enumerate(self.output_blocks):
|
386 |
+
h = th.cat([h, aligned_adding(hs.pop(), total_controlnet_embedding.pop(), require_inpaint_hijack)], dim=1)
|
387 |
+
h = module(h, emb, context)
|
388 |
+
|
389 |
+
# U-Net Output
|
390 |
+
h = h.type(x.dtype)
|
391 |
+
h = self.out(h)
|
392 |
+
|
393 |
+
# Post-processing for color fix
|
394 |
+
for param in outer.control_params:
|
395 |
+
if param.used_hint_cond_latent is None:
|
396 |
+
continue
|
397 |
+
if 'colorfix' not in param.preprocessor['name']:
|
398 |
+
continue
|
399 |
+
|
400 |
+
k = int(param.preprocessor['threshold_a'])
|
401 |
+
if is_in_high_res_fix:
|
402 |
+
k *= 2
|
403 |
+
|
404 |
+
# Inpaint hijack
|
405 |
+
xt = x[:, :4, :, :]
|
406 |
+
|
407 |
+
x0_origin = param.used_hint_cond_latent
|
408 |
+
t = torch.round(timesteps.float()).long()
|
409 |
+
x0_prd = predict_start_from_noise(outer.sd_ldm, xt, t, h)
|
410 |
+
x0 = x0_prd - blur(x0_prd, k) + blur(x0_origin, k)
|
411 |
+
|
412 |
+
if '+sharp' in param.preprocessor['name']:
|
413 |
+
detail_weight = float(param.preprocessor['threshold_b']) * 0.01
|
414 |
+
neg = detail_weight * blur(x0, k) + (1 - detail_weight) * x0
|
415 |
+
x0 = cond_mark * x0 + (1 - cond_mark) * neg
|
416 |
+
|
417 |
+
eps_prd = predict_noise_from_start(outer.sd_ldm, xt, t, x0)
|
418 |
+
|
419 |
+
w = max(0.0, min(1.0, float(param.weight)))
|
420 |
+
h = eps_prd * w + h * (1 - w)
|
421 |
+
|
422 |
+
# Post-processing for restore
|
423 |
+
for param in outer.control_params:
|
424 |
+
if param.used_hint_cond_latent is None:
|
425 |
+
continue
|
426 |
+
if 'inpaint_only' not in param.preprocessor['name']:
|
427 |
+
continue
|
428 |
+
if param.used_hint_cond.shape[1] != 4:
|
429 |
+
continue
|
430 |
+
|
431 |
+
# Inpaint hijack
|
432 |
+
xt = x[:, :4, :, :]
|
433 |
+
|
434 |
+
mask = param.used_hint_cond[:, 3:4, :, :]
|
435 |
+
mask = torch.nn.functional.max_pool2d(mask, (10, 10), stride=(8, 8), padding=1)
|
436 |
+
|
437 |
+
x0_origin = param.used_hint_cond_latent
|
438 |
+
t = torch.round(timesteps.float()).long()
|
439 |
+
x0_prd = predict_start_from_noise(outer.sd_ldm, xt, t, h)
|
440 |
+
x0 = x0_prd * mask + x0_origin * (1 - mask)
|
441 |
+
eps_prd = predict_noise_from_start(outer.sd_ldm, xt, t, x0)
|
442 |
+
|
443 |
+
w = max(0.0, min(1.0, float(param.weight)))
|
444 |
+
h = eps_prd * w + h * (1 - w)
|
445 |
+
|
446 |
+
return h
|
447 |
+
|
448 |
+
def forward_webui(*args, **kwargs):
|
449 |
+
# webui will handle other compoments
|
450 |
+
try:
|
451 |
+
if shared.cmd_opts.lowvram:
|
452 |
+
lowvram.send_everything_to_cpu()
|
453 |
+
|
454 |
+
return forward(*args, **kwargs)
|
455 |
+
finally:
|
456 |
+
if self.lowvram:
|
457 |
+
for param in self.control_params:
|
458 |
+
if isinstance(param.control_model, torch.nn.Module):
|
459 |
+
param.control_model.to("cpu")
|
460 |
+
|
461 |
+
def hacked_basic_transformer_inner_forward(self, x, context=None):
|
462 |
+
x_norm1 = self.norm1(x)
|
463 |
+
self_attn1 = None
|
464 |
+
if self.disable_self_attn:
|
465 |
+
# Do not use self-attention
|
466 |
+
self_attn1 = self.attn1(x_norm1, context=context)
|
467 |
+
else:
|
468 |
+
# Use self-attention
|
469 |
+
self_attention_context = x_norm1
|
470 |
+
if outer.attention_auto_machine == AutoMachine.Write:
|
471 |
+
if outer.attention_auto_machine_weight > self.attn_weight:
|
472 |
+
self.bank.append(self_attention_context.detach().clone())
|
473 |
+
self.style_cfgs.append(outer.current_style_fidelity)
|
474 |
+
if outer.attention_auto_machine == AutoMachine.Read:
|
475 |
+
if len(self.bank) > 0:
|
476 |
+
style_cfg = sum(self.style_cfgs) / float(len(self.style_cfgs))
|
477 |
+
self_attn1_uc = self.attn1(x_norm1, context=torch.cat([self_attention_context] + self.bank, dim=1))
|
478 |
+
self_attn1_c = self_attn1_uc.clone()
|
479 |
+
if len(outer.current_uc_indices) > 0 and style_cfg > 1e-5:
|
480 |
+
self_attn1_c[outer.current_uc_indices] = self.attn1(
|
481 |
+
x_norm1[outer.current_uc_indices],
|
482 |
+
context=self_attention_context[outer.current_uc_indices])
|
483 |
+
self_attn1 = style_cfg * self_attn1_c + (1.0 - style_cfg) * self_attn1_uc
|
484 |
+
self.bank = []
|
485 |
+
self.style_cfgs = []
|
486 |
+
if self_attn1 is None:
|
487 |
+
self_attn1 = self.attn1(x_norm1, context=self_attention_context)
|
488 |
+
|
489 |
+
x = self_attn1.to(x.dtype) + x
|
490 |
+
x = self.attn2(self.norm2(x), context=context) + x
|
491 |
+
x = self.ff(self.norm3(x)) + x
|
492 |
+
return x
|
493 |
+
|
494 |
+
def hacked_group_norm_forward(self, *args, **kwargs):
|
495 |
+
eps = 1e-6
|
496 |
+
x = self.original_forward(*args, **kwargs)
|
497 |
+
y = None
|
498 |
+
if outer.gn_auto_machine == AutoMachine.Write:
|
499 |
+
if outer.gn_auto_machine_weight > self.gn_weight:
|
500 |
+
var, mean = torch.var_mean(x, dim=(2, 3), keepdim=True, correction=0)
|
501 |
+
self.mean_bank.append(mean)
|
502 |
+
self.var_bank.append(var)
|
503 |
+
self.style_cfgs.append(outer.current_style_fidelity)
|
504 |
+
if outer.gn_auto_machine == AutoMachine.Read:
|
505 |
+
if len(self.mean_bank) > 0 and len(self.var_bank) > 0:
|
506 |
+
style_cfg = sum(self.style_cfgs) / float(len(self.style_cfgs))
|
507 |
+
var, mean = torch.var_mean(x, dim=(2, 3), keepdim=True, correction=0)
|
508 |
+
std = torch.maximum(var, torch.zeros_like(var) + eps) ** 0.5
|
509 |
+
mean_acc = sum(self.mean_bank) / float(len(self.mean_bank))
|
510 |
+
var_acc = sum(self.var_bank) / float(len(self.var_bank))
|
511 |
+
std_acc = torch.maximum(var_acc, torch.zeros_like(var_acc) + eps) ** 0.5
|
512 |
+
y_uc = (((x - mean) / std) * std_acc) + mean_acc
|
513 |
+
y_c = y_uc.clone()
|
514 |
+
if len(outer.current_uc_indices) > 0 and style_cfg > 1e-5:
|
515 |
+
y_c[outer.current_uc_indices] = x.to(y_c.dtype)[outer.current_uc_indices]
|
516 |
+
y = style_cfg * y_c + (1.0 - style_cfg) * y_uc
|
517 |
+
self.mean_bank = []
|
518 |
+
self.var_bank = []
|
519 |
+
self.style_cfgs = []
|
520 |
+
if y is None:
|
521 |
+
y = x
|
522 |
+
return y.to(x.dtype)
|
523 |
+
|
524 |
+
if getattr(process, 'sample_before_CN_hack', None) is None:
|
525 |
+
process.sample_before_CN_hack = process.sample
|
526 |
+
process.sample = process_sample
|
527 |
+
|
528 |
+
model._original_forward = model.forward
|
529 |
+
outer.original_forward = model.forward
|
530 |
+
model.forward = forward_webui.__get__(model, UNetModel)
|
531 |
+
|
532 |
+
all_modules = torch_dfs(model)
|
533 |
+
|
534 |
+
attn_modules = [module for module in all_modules if isinstance(module, BasicTransformerBlock)]
|
535 |
+
attn_modules = sorted(attn_modules, key=lambda x: - x.norm1.normalized_shape[0])
|
536 |
+
|
537 |
+
for i, module in enumerate(attn_modules):
|
538 |
+
if getattr(module, '_original_inner_forward', None) is None:
|
539 |
+
module._original_inner_forward = module._forward
|
540 |
+
module._forward = hacked_basic_transformer_inner_forward.__get__(module, BasicTransformerBlock)
|
541 |
+
module.bank = []
|
542 |
+
module.style_cfgs = []
|
543 |
+
module.attn_weight = float(i) / float(len(attn_modules))
|
544 |
+
|
545 |
+
gn_modules = [model.middle_block]
|
546 |
+
model.middle_block.gn_weight = 0
|
547 |
+
|
548 |
+
input_block_indices = [4, 5, 7, 8, 10, 11]
|
549 |
+
for w, i in enumerate(input_block_indices):
|
550 |
+
module = model.input_blocks[i]
|
551 |
+
module.gn_weight = 1.0 - float(w) / float(len(input_block_indices))
|
552 |
+
gn_modules.append(module)
|
553 |
+
|
554 |
+
output_block_indices = [0, 1, 2, 3, 4, 5, 6, 7]
|
555 |
+
for w, i in enumerate(output_block_indices):
|
556 |
+
module = model.output_blocks[i]
|
557 |
+
module.gn_weight = float(w) / float(len(output_block_indices))
|
558 |
+
gn_modules.append(module)
|
559 |
+
|
560 |
+
for i, module in enumerate(gn_modules):
|
561 |
+
if getattr(module, 'original_forward', None) is None:
|
562 |
+
module.original_forward = module.forward
|
563 |
+
module.forward = hacked_group_norm_forward.__get__(module, torch.nn.Module)
|
564 |
+
module.mean_bank = []
|
565 |
+
module.var_bank = []
|
566 |
+
module.style_cfgs = []
|
567 |
+
module.gn_weight *= 2
|
568 |
+
|
569 |
+
outer.attn_module_list = attn_modules
|
570 |
+
outer.gn_module_list = gn_modules
|
571 |
+
|
572 |
+
scripts.script_callbacks.on_cfg_denoiser(self.guidance_schedule_handler)
|
573 |
+
|
574 |
+
# ↑↑↑ the above is modified from 'sd-webui-controlnet/scripts/hook.py' ↑↑↑
|
575 |
+
|
576 |
+
def reset_cuda():
|
577 |
+
devices.torch_gc()
|
578 |
+
import gc; gc.collect()
|
579 |
+
|
580 |
+
try:
|
581 |
+
import os
|
582 |
+
import psutil
|
583 |
+
mem = psutil.Process(os.getpid()).memory_info()
|
584 |
+
print(f'[Mem] rss: {mem.rss/2**30:.3f} GB, vms: {mem.vms/2**30:.3f} GB')
|
585 |
+
from modules.shared import mem_mon as vram_mon
|
586 |
+
free, total = vram_mon.cuda_mem_get_info()
|
587 |
+
print(f'[VRAM] free: {free/2**30:.3f} GB, total: {total/2**30:.3f} GB')
|
588 |
+
except:
|
589 |
+
pass
|
590 |
+
|
591 |
+
|
592 |
+
class Script(scripts.Script):
|
593 |
+
|
594 |
+
def title(self):
|
595 |
+
return 'ControlNet Travel'
|
596 |
+
|
597 |
+
def describe(self):
|
598 |
+
return 'Travel from one controlnet hint condition to another in the tensor space.'
|
599 |
+
|
600 |
+
def show(self, is_img2img):
|
601 |
+
return controlnet_found
|
602 |
+
|
603 |
+
def ui(self, is_img2img):
|
604 |
+
with gr.Row(variant='compact'):
|
605 |
+
interp_meth = gr.Dropdown(label=LABEL_INTERP_METH, value=lambda: DEFAULT_INTERP_METH, choices=CHOICES_INTERP_METH)
|
606 |
+
steps = gr.Text (label=LABEL_STEPS, value=lambda: DEFAULT_STEPS, max_lines=1)
|
607 |
+
|
608 |
+
reset = gr.Button(value='Reset Cuda', variant='tool')
|
609 |
+
reset.click(fn=reset_cuda, show_progress=False)
|
610 |
+
|
611 |
+
with gr.Row(variant='compact'):
|
612 |
+
ctrlnet_ref_dir = gr.Text(label=LABEL_CTRLNET_REF_DIR, value=lambda: DEFAULT_CTRLNET_REF_DIR, max_lines=1)
|
613 |
+
|
614 |
+
with gr.Group(visible=DEFAULT_SKIP_FUSE) as tab_ext_skip_fuse:
|
615 |
+
with gr.Row(variant='compact'):
|
616 |
+
skip_in_0 = gr.Checkbox(label='in_0')
|
617 |
+
skip_in_3 = gr.Checkbox(label='in_3')
|
618 |
+
skip_out_0 = gr.Checkbox(label='out_0')
|
619 |
+
skip_out_3 = gr.Checkbox(label='out_3')
|
620 |
+
with gr.Row(variant='compact'):
|
621 |
+
skip_in_1 = gr.Checkbox(label='in_1')
|
622 |
+
skip_in_4 = gr.Checkbox(label='in_4')
|
623 |
+
skip_out_1 = gr.Checkbox(label='out_1')
|
624 |
+
skip_out_4 = gr.Checkbox(label='out_4')
|
625 |
+
with gr.Row(variant='compact'):
|
626 |
+
skip_in_2 = gr.Checkbox(label='in_2')
|
627 |
+
skip_in_5 = gr.Checkbox(label='in_5')
|
628 |
+
skip_out_2 = gr.Checkbox(label='out_2')
|
629 |
+
skip_out_5 = gr.Checkbox(label='out_5')
|
630 |
+
with gr.Row(variant='compact'):
|
631 |
+
skip_mid = gr.Checkbox(label='mid')
|
632 |
+
|
633 |
+
with gr.Row(variant='compact', visible=DEFAULT_UPSCALE) as tab_ext_upscale:
|
634 |
+
upscale_meth = gr.Dropdown(label=LABEL_UPSCALE_METH, value=lambda: DEFAULT_UPSCALE_METH, choices=CHOICES_UPSCALER)
|
635 |
+
upscale_ratio = gr.Slider (label=LABEL_UPSCALE_RATIO, value=lambda: DEFAULT_UPSCALE_RATIO, minimum=1.0, maximum=16.0, step=0.1)
|
636 |
+
upscale_width = gr.Slider (label=LABEL_UPSCALE_WIDTH, value=lambda: DEFAULT_UPSCALE_WIDTH, minimum=0, maximum=2048, step=8)
|
637 |
+
upscale_height = gr.Slider (label=LABEL_UPSCALE_HEIGHT, value=lambda: DEFAULT_UPSCALE_HEIGHT, minimum=0, maximum=2048, step=8)
|
638 |
+
|
639 |
+
with gr.Row(variant='compact', visible=DEFAULT_VIDEO) as tab_ext_video:
|
640 |
+
video_fmt = gr.Dropdown(label=LABEL_VIDEO_FMT, value=lambda: DEFAULT_VIDEO_FMT, choices=CHOICES_VIDEO_FMT)
|
641 |
+
video_fps = gr.Number (label=LABEL_VIDEO_FPS, value=lambda: DEFAULT_VIDEO_FPS)
|
642 |
+
video_pad = gr.Number (label=LABEL_VIDEO_PAD, value=lambda: DEFAULT_VIDEO_PAD, precision=0)
|
643 |
+
video_pick = gr.Text (label=LABEL_VIDEO_PICK, value=lambda: DEFAULT_VIDEO_PICK, max_lines=1)
|
644 |
+
|
645 |
+
with gr.Row(variant='compact') as tab_ext:
|
646 |
+
ext_video = gr.Checkbox(label=LABEL_VIDEO, value=lambda: DEFAULT_VIDEO)
|
647 |
+
ext_upscale = gr.Checkbox(label=LABEL_UPSCALE, value=lambda: DEFAULT_UPSCALE)
|
648 |
+
ext_skip_fuse = gr.Checkbox(label=LABEL_SKIP_FUSE, value=lambda: DEFAULT_SKIP_FUSE)
|
649 |
+
dbg_rife = gr.Checkbox(label=LABEL_DEBUG_RIFE, value=lambda: DEFAULT_DEBUG_RIFE)
|
650 |
+
|
651 |
+
ext_video .change(gr_show, inputs=ext_video, outputs=tab_ext_video, show_progress=False)
|
652 |
+
ext_upscale .change(gr_show, inputs=ext_upscale, outputs=tab_ext_upscale, show_progress=False)
|
653 |
+
ext_skip_fuse.change(gr_show, inputs=ext_skip_fuse, outputs=tab_ext_skip_fuse, show_progress=False)
|
654 |
+
|
655 |
+
skip_fuses = [
|
656 |
+
skip_in_0,
|
657 |
+
skip_in_1,
|
658 |
+
skip_in_2,
|
659 |
+
skip_in_3,
|
660 |
+
skip_in_4,
|
661 |
+
skip_in_5,
|
662 |
+
skip_mid,
|
663 |
+
skip_out_0,
|
664 |
+
skip_out_1,
|
665 |
+
skip_out_2,
|
666 |
+
skip_out_3,
|
667 |
+
skip_out_4,
|
668 |
+
skip_out_5,
|
669 |
+
]
|
670 |
+
return [
|
671 |
+
interp_meth, steps, ctrlnet_ref_dir,
|
672 |
+
upscale_meth, upscale_ratio, upscale_width, upscale_height,
|
673 |
+
video_fmt, video_fps, video_pad, video_pick,
|
674 |
+
ext_video, ext_upscale, ext_skip_fuse, dbg_rife,
|
675 |
+
*skip_fuses,
|
676 |
+
]
|
677 |
+
|
678 |
+
def run(self, p:Processing,
|
679 |
+
interp_meth:str, steps:str, ctrlnet_ref_dir:str,
|
680 |
+
upscale_meth:str, upscale_ratio:float, upscale_width:int, upscale_height:int,
|
681 |
+
video_fmt:str, video_fps:float, video_pad:int, video_pick:str,
|
682 |
+
ext_video:bool, ext_upscale:bool, ext_skip_fuse:bool, dbg_rife:bool,
|
683 |
+
*skip_fuses:bool,
|
684 |
+
):
|
685 |
+
|
686 |
+
# Prepare ControlNet
|
687 |
+
#self.controlnet_script: ControlNetScript = None
|
688 |
+
self.controlnet_script = None
|
689 |
+
try:
|
690 |
+
for script in p.scripts.alwayson_scripts:
|
691 |
+
if hasattr(script, "latest_network") and script.title().lower() == "controlnet":
|
692 |
+
script_args: Tuple[ControlNetUnit] = p.script_args[script.args_from:script.args_to]
|
693 |
+
if not any([u.enabled for u in script_args]): return Processed(p, [], p.seed, f'{CTRLNET_REPO_NAME} not enabled')
|
694 |
+
self.controlnet_script = script
|
695 |
+
break
|
696 |
+
except ImportError:
|
697 |
+
return Processed(p, [], p.seed, f'{CTRLNET_REPO_NAME} not installed')
|
698 |
+
except:
|
699 |
+
print_exc()
|
700 |
+
if not self.controlnet_script: return Processed(p, [], p.seed, f'{CTRLNET_REPO_NAME} not loaded')
|
701 |
+
|
702 |
+
# Enum lookup
|
703 |
+
interp_meth: InterpMethod = InterpMethod(interp_meth)
|
704 |
+
video_fmt: VideoFormat = VideoFormat (video_fmt)
|
705 |
+
|
706 |
+
# Param check & type convert
|
707 |
+
if ext_video:
|
708 |
+
if video_pad < 0: return Processed(p, [], p.seed, f'video_pad must >= 0, but got {video_pad}')
|
709 |
+
if video_fps <= 0: return Processed(p, [], p.seed, f'video_fps must > 0, but got {video_fps}')
|
710 |
+
try: video_slice = parse_slice(video_pick)
|
711 |
+
except: return Processed(p, [], p.seed, 'syntax error in video_slice')
|
712 |
+
if ext_skip_fuse:
|
713 |
+
global skip_fuse_plan
|
714 |
+
skip_fuse_plan = skip_fuses
|
715 |
+
|
716 |
+
# Prepare ref-images
|
717 |
+
if not ctrlnet_ref_dir: return Processed(p, [], p.seed, f'invalid image folder path: {ctrlnet_ref_dir}')
|
718 |
+
ctrlnet_ref_dir: Path = Path(ctrlnet_ref_dir)
|
719 |
+
if not ctrlnet_ref_dir.is_dir(): return Processed(p, [], p.seed, f'invalid image folder path: {ctrlnet_ref_dir}(')
|
720 |
+
self.ctrlnet_ref_fps = [fp for fp in list(ctrlnet_ref_dir.iterdir()) if fp.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp']]
|
721 |
+
n_stages = len(self.ctrlnet_ref_fps)
|
722 |
+
if n_stages == 0: return Processed(p, [], p.seed, f'no images file (*.jpg/*.png/*.bmp/*.webp) found in folder path: {ctrlnet_ref_dir}')
|
723 |
+
if n_stages == 1: return Processed(p, [], p.seed, 'requires at least two images to travel between, but found only 1 :(')
|
724 |
+
|
725 |
+
# Prepare steps (n_interp)
|
726 |
+
try: steps: List[int] = [int(s.strip()) for s in steps.strip().split(',')]
|
727 |
+
except: return Processed(p, [], p.seed, f'cannot parse steps options: {steps}')
|
728 |
+
if len(steps) == 1: steps = [steps[0]] * (n_stages - 1)
|
729 |
+
elif len(steps) != n_stages - 1: return Processed(p, [], p.seed, f'stage count mismatch: len_steps({len(steps)}) != n_stages({n_stages} - 1))')
|
730 |
+
n_frames = sum(steps) + n_stages
|
731 |
+
if 'show_debug':
|
732 |
+
print('n_stages:', n_stages)
|
733 |
+
print('n_frames:', n_frames)
|
734 |
+
print('steps:', steps)
|
735 |
+
steps.insert(0, -1) # fixup the first stage
|
736 |
+
|
737 |
+
# Custom saving path
|
738 |
+
travel_path = os.path.join(p.outpath_samples, 'prompt_travel')
|
739 |
+
os.makedirs(travel_path, exist_ok=True)
|
740 |
+
travel_number = get_next_sequence_number(travel_path)
|
741 |
+
self.log_dp = os.path.join(travel_path, f'{travel_number:05}')
|
742 |
+
p.outpath_samples = self.log_dp
|
743 |
+
os.makedirs(self.log_dp, exist_ok=True)
|
744 |
+
self.tmp_dp = Path(self.log_dp) / 'ctrl_cond' # cache for rife
|
745 |
+
self.tmp_fp = self.tmp_dp / 'tmp.png' # cache for rife
|
746 |
+
|
747 |
+
# Force Batch Count and Batch Size to 1
|
748 |
+
p.n_iter = 1
|
749 |
+
p.batch_size = 1
|
750 |
+
|
751 |
+
# Random unified const seed
|
752 |
+
p.seed = get_fixed_seed(p.seed) # fix it to assure all processes using the same major seed
|
753 |
+
self.subseed = p.subseed # stash it to allow using random subseed for each process (when -1)
|
754 |
+
if 'show_debug':
|
755 |
+
print('seed:', p.seed)
|
756 |
+
print('subseed:', p.subseed)
|
757 |
+
print('subseed_strength:', p.subseed_strength)
|
758 |
+
|
759 |
+
# Start job
|
760 |
+
state.job_count = n_frames
|
761 |
+
|
762 |
+
# Pack params
|
763 |
+
self.n_stages = n_stages
|
764 |
+
self.steps = steps
|
765 |
+
self.interp_meth = interp_meth
|
766 |
+
self.dbg_rife = dbg_rife
|
767 |
+
|
768 |
+
def upscale_image_callback(params:ImageSaveParams):
|
769 |
+
params.image = upscale_image(params.image, p.width, p.height, upscale_meth, upscale_ratio, upscale_width, upscale_height)
|
770 |
+
|
771 |
+
images: List[PILImage] = []
|
772 |
+
info: str = None
|
773 |
+
try:
|
774 |
+
if ext_upscale: on_before_image_saved(upscale_image_callback)
|
775 |
+
|
776 |
+
self.UnetHook_hook_original = UnetHook.hook
|
777 |
+
UnetHook.hook = hook_hijack
|
778 |
+
|
779 |
+
[c.clear() for c in caches]
|
780 |
+
images, info = self.run_linear(p)
|
781 |
+
except:
|
782 |
+
info = format_exc()
|
783 |
+
print(info)
|
784 |
+
finally:
|
785 |
+
if self.tmp_fp.exists(): os.unlink(self.tmp_fp)
|
786 |
+
[c.clear() for c in caches]
|
787 |
+
|
788 |
+
UnetHook.hook = self.UnetHook_hook_original
|
789 |
+
|
790 |
+
self.controlnet_script.input_image = None
|
791 |
+
if self.controlnet_script.latest_network:
|
792 |
+
self.controlnet_script.latest_network: UnetHook
|
793 |
+
self.controlnet_script.latest_network.restore(p.sd_model.model.diffusion_model)
|
794 |
+
self.controlnet_script.latest_network = None
|
795 |
+
|
796 |
+
if ext_upscale: remove_callbacks_for_function(upscale_image_callback)
|
797 |
+
|
798 |
+
reset_cuda()
|
799 |
+
|
800 |
+
# Save video
|
801 |
+
if ext_video: save_video(images, video_slice, video_pad, video_fps, video_fmt, os.path.join(self.log_dp, f'travel-{travel_number:05}'))
|
802 |
+
|
803 |
+
return Processed(p, images, p.seed, info)
|
804 |
+
|
805 |
+
def run_linear(self, p:Processing) -> RunResults:
|
806 |
+
global from_hint_cond, to_hint_cond, from_control_tensors, to_control_tensors, interp_alpha, interp_ip
|
807 |
+
|
808 |
+
images: List[PILImage] = []
|
809 |
+
info: str = None
|
810 |
+
def process_p(append:bool=True) -> Optional[List[PILImage]]:
|
811 |
+
nonlocal p, images, info
|
812 |
+
proc = process_images(p)
|
813 |
+
if not info: info = proc.info
|
814 |
+
if append: images.extend(proc.images)
|
815 |
+
else: return proc.images
|
816 |
+
|
817 |
+
''' ↓↓↓ rife interp utils ↓↓↓ '''
|
818 |
+
def save_ctrl_cond(idx:int):
|
819 |
+
self.tmp_dp.mkdir(exist_ok=True)
|
820 |
+
for i, x in enumerate(to_hint_cond):
|
821 |
+
x = x[0]
|
822 |
+
if len(x.shape) == 3:
|
823 |
+
if x.shape[0] == 1: x = x.squeeze_(0) # [C=1, H, W] => [H, W]
|
824 |
+
elif x.shape[0] == 3: x = x.permute([1, 2, 0]) # [C=3, H, W] => [H, W, C]
|
825 |
+
else: raise ValueError(f'unknown cond shape: {x.shape}')
|
826 |
+
else:
|
827 |
+
raise ValueError(f'unknown cond shape: {x.shape}')
|
828 |
+
im = (x.detach().clamp(0.0, 1.0).cpu().numpy() * 255).astype(np.uint8)
|
829 |
+
Image.fromarray(im).save(self.tmp_dp / f'{idx}-{i}.png')
|
830 |
+
def rife_interp(i:int, j:int, k:int, alpha:float) -> Tensor:
|
831 |
+
''' interp between i-th and j-th cond of the k-th ctrlnet set '''
|
832 |
+
fp0 = self.tmp_dp / f'{i}-{k}.png'
|
833 |
+
fp1 = self.tmp_dp / f'{j}-{k}.png'
|
834 |
+
fpo = self.tmp_dp / f'{i}-{j}-{alpha:.3f}.png' if self.dbg_rife else self.tmp_fp
|
835 |
+
assert run_cmd(f'rife-ncnn-vulkan -m rife-v4 -s {alpha:.3f} -0 "{fp0}" -1 "{fp1}" -o "{fpo}"')
|
836 |
+
x = torch.from_numpy(np.asarray(Image.open(fpo)) / 255.0)
|
837 |
+
if len(x.shape) == 2: x = x.unsqueeze_(0) # [H, W] => [C=1, H, W]
|
838 |
+
elif len(x.shape) == 3: x = x.permute([2, 0, 1]) # [H, W, C] => [C, H, W]
|
839 |
+
else: raise ValueError(f'unknown cond shape: {x.shape}')
|
840 |
+
x = x.unsqueeze(dim=0)
|
841 |
+
return x
|
842 |
+
''' ↑↑↑ rife interp utils ↑↑↑ '''
|
843 |
+
|
844 |
+
''' ↓↓↓ filename reorder utils ↓↓↓ '''
|
845 |
+
iframe = 0
|
846 |
+
def rename_image_filename(idx:int, param: ImageSaveParams):
|
847 |
+
fn = param.filename
|
848 |
+
stem, suffix = os.path.splitext(os.path.basename(fn))
|
849 |
+
param.filename = os.path.join(os.path.dirname(fn), f'{idx:05d}' + suffix)
|
850 |
+
class on_before_image_saved_wrapper:
|
851 |
+
def __init__(self, callback_fn):
|
852 |
+
self.callback_fn = callback_fn
|
853 |
+
def __enter__(self):
|
854 |
+
on_before_image_saved(self.callback_fn)
|
855 |
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
856 |
+
remove_callbacks_for_function(self.callback_fn)
|
857 |
+
''' ↑↑↑ filename reorder utils ↑↑↑ '''
|
858 |
+
|
859 |
+
# Step 1: draw the init image
|
860 |
+
setattr(p, 'init_images', [Image.open(self.ctrlnet_ref_fps[0])])
|
861 |
+
interp_alpha = 0.0
|
862 |
+
with on_before_image_saved_wrapper(partial(rename_image_filename, 0)):
|
863 |
+
process_p()
|
864 |
+
iframe += 1
|
865 |
+
save_ctrl_cond(0)
|
866 |
+
|
867 |
+
# travel through stages
|
868 |
+
for i in range(1, self.n_stages):
|
869 |
+
if state.interrupted: break
|
870 |
+
|
871 |
+
# Setp 3: move to next stage
|
872 |
+
from_hint_cond = [t for t in to_hint_cond] ; to_hint_cond .clear()
|
873 |
+
from_control_tensors = [t for t in to_control_tensors] ; to_control_tensors.clear()
|
874 |
+
setattr(p, 'init_images', [Image.open(self.ctrlnet_ref_fps[i])])
|
875 |
+
interp_alpha = 0.0
|
876 |
+
|
877 |
+
with on_before_image_saved_wrapper(partial(rename_image_filename, iframe + self.steps[i])):
|
878 |
+
cached_images = process_p(append=False)
|
879 |
+
save_ctrl_cond(i)
|
880 |
+
|
881 |
+
# Step 2: draw the interpolated images
|
882 |
+
is_interrupted = False
|
883 |
+
n_inter = self.steps[i] + 1
|
884 |
+
for t in range(1, n_inter):
|
885 |
+
if state.interrupted: is_interrupted = True ; break
|
886 |
+
|
887 |
+
interp_alpha = t / n_inter # [1/T, 2/T, .. T-1/T]
|
888 |
+
|
889 |
+
mid_hint_cond.clear()
|
890 |
+
device = devices.get_device_for("controlnet")
|
891 |
+
if self.interp_meth == InterpMethod.LINEAR:
|
892 |
+
for hintA, hintB in zip(from_hint_cond, to_hint_cond):
|
893 |
+
hintC = weighted_sum(hintA.to(device), hintB.to(device), interp_alpha)
|
894 |
+
mid_hint_cond.append(hintC)
|
895 |
+
elif self.interp_meth == InterpMethod.RIFE:
|
896 |
+
dtype = to_hint_cond[0].dtype
|
897 |
+
for k in range(len(to_hint_cond)):
|
898 |
+
hintC = rife_interp(i-1, i, k, interp_alpha).to(device, dtype)
|
899 |
+
mid_hint_cond.append(hintC)
|
900 |
+
else: raise ValueError(f'unknown interp_meth: {self.interp_meth}')
|
901 |
+
|
902 |
+
interp_ip = 0
|
903 |
+
with on_before_image_saved_wrapper(partial(rename_image_filename, iframe)):
|
904 |
+
process_p()
|
905 |
+
iframe += 1
|
906 |
+
|
907 |
+
# adjust order
|
908 |
+
images.extend(cached_images)
|
909 |
+
iframe += 1
|
910 |
+
|
911 |
+
if is_interrupted: break
|
912 |
+
|
913 |
+
return images, info
|
scripts/prompt_travel.py
ADDED
@@ -0,0 +1,818 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This extension works with [https://github.com/AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
|
2 |
+
# version: v1.4.0
|
3 |
+
|
4 |
+
LOG_PREFIX = '[Prompt-Travel]'
|
5 |
+
|
6 |
+
import os
|
7 |
+
from pathlib import Path
|
8 |
+
from PIL.Image import Image as PILImage
|
9 |
+
from enum import Enum
|
10 |
+
from dataclasses import dataclass
|
11 |
+
from functools import partial
|
12 |
+
from typing import List, Tuple, Callable, Any, Optional, Generic, TypeVar
|
13 |
+
from traceback import print_exc, format_exc
|
14 |
+
|
15 |
+
import gradio as gr
|
16 |
+
import numpy as np
|
17 |
+
import torch
|
18 |
+
from torch import Tensor
|
19 |
+
import torch.nn.functional as F
|
20 |
+
try:
|
21 |
+
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
|
22 |
+
from moviepy.editor import concatenate_videoclips, ImageClip
|
23 |
+
except ImportError:
|
24 |
+
print(f'{LOG_PREFIX} package moviepy not installed, will not be able to generate video')
|
25 |
+
|
26 |
+
import modules.scripts as scripts
|
27 |
+
from modules.script_callbacks import on_before_image_saved, ImageSaveParams, on_cfg_denoiser, CFGDenoiserParams, remove_callbacks_for_function
|
28 |
+
from modules.ui import gr_show
|
29 |
+
from modules.shared import state, opts, sd_upscalers
|
30 |
+
from modules.processing import process_images, get_fixed_seed
|
31 |
+
from modules.processing import Processed, StableDiffusionProcessing as Processing, StableDiffusionProcessingTxt2Img as ProcessingTxt2Img, StableDiffusionProcessingImg2Img as ProcessingImg2Img
|
32 |
+
from modules.images import resize_image
|
33 |
+
from modules.sd_samplers_common import single_sample_to_image
|
34 |
+
|
35 |
+
class Mode(Enum):
|
36 |
+
LINEAR = 'linear'
|
37 |
+
REPLACE = 'replace'
|
38 |
+
|
39 |
+
class LerpMethod(Enum):
|
40 |
+
LERP = 'lerp'
|
41 |
+
SLERP = 'slerp'
|
42 |
+
|
43 |
+
class ModeReplaceDim(Enum):
|
44 |
+
TOKEN = 'token'
|
45 |
+
CHANNEL = 'channel'
|
46 |
+
RANDOM = 'random'
|
47 |
+
|
48 |
+
class ModeReplaceOrder(Enum):
|
49 |
+
SIMILAR = 'similar'
|
50 |
+
DIFFERENT = 'different'
|
51 |
+
RANDOM = 'random'
|
52 |
+
|
53 |
+
class Gensis(Enum):
|
54 |
+
FIXED = 'fixed'
|
55 |
+
SUCCESSIVE = 'successive'
|
56 |
+
EMBRYO = 'embryo'
|
57 |
+
|
58 |
+
class VideoFormat(Enum):
|
59 |
+
MP4 = 'mp4'
|
60 |
+
GIF = 'gif'
|
61 |
+
WEBM = 'webm'
|
62 |
+
|
63 |
+
if 'typing':
|
64 |
+
T = TypeVar('T')
|
65 |
+
@dataclass
|
66 |
+
class Ref(Generic[T]): value: T = None
|
67 |
+
|
68 |
+
TensorRef = Ref[Tensor]
|
69 |
+
StrRef = Ref[str]
|
70 |
+
PILImages = List[PILImage]
|
71 |
+
RunResults = Tuple[PILImages, str]
|
72 |
+
|
73 |
+
if 'consts':
|
74 |
+
__ = lambda key, value=None: opts.data.get(f'customscript/prompt_travel.py/txt2img/{key}/value', value)
|
75 |
+
|
76 |
+
LABEL_MODE = 'Travel mode'
|
77 |
+
LABEL_STEPS = 'Travel steps between stages'
|
78 |
+
LABEL_GENESIS = 'Frame genesis'
|
79 |
+
LABEL_DENOISE_W = 'Denoise strength'
|
80 |
+
LABEL_EMBRYO_STEP = 'Denoise steps for embryo'
|
81 |
+
LABEL_LERP_METH = 'Linear interp method'
|
82 |
+
LABEL_REPLACE_DIM = 'Replace dimension'
|
83 |
+
LABEL_REPLACE_ORDER = 'Replace order'
|
84 |
+
LABEL_VIDEO = 'Ext. export video'
|
85 |
+
LABEL_VIDEO_FPS = 'Video FPS'
|
86 |
+
LABEL_VIDEO_FMT = 'Video file format'
|
87 |
+
LABEL_VIDEO_PAD = 'Pad begin/end frames'
|
88 |
+
LABEL_VIDEO_PICK = 'Pick frame by slice'
|
89 |
+
LABEL_UPSCALE = 'Ext. upscale'
|
90 |
+
LABEL_UPSCALE_METH = 'Upscaler'
|
91 |
+
LABEL_UPSCALE_RATIO = 'Upscale ratio'
|
92 |
+
LABEL_UPSCALE_WIDTH = 'Upscale width'
|
93 |
+
LABEL_UPSCALE_HEIGHT = 'Upscale height'
|
94 |
+
LABEL_DEPTH = 'Ext. depth-image-io (for depth2img models)'
|
95 |
+
LABEL_DEPTH_IMG = 'Depth image file'
|
96 |
+
|
97 |
+
DEFAULT_MODE = __(LABEL_MODE, Mode.LINEAR.value)
|
98 |
+
DEFAULT_STEPS = __(LABEL_STEPS, 30)
|
99 |
+
DEFAULT_GENESIS = __(LABEL_GENESIS, Gensis.FIXED.value)
|
100 |
+
DEFAULT_DENOISE_W = __(LABEL_DENOISE_W, 1.0)
|
101 |
+
DEFAULT_EMBRYO_STEP = __(LABEL_EMBRYO_STEP, 8)
|
102 |
+
DEFAULT_LERP_METH = __(LABEL_LERP_METH, LerpMethod.LERP.value)
|
103 |
+
DEFAULT_REPLACE_DIM = __(LABEL_REPLACE_DIM, ModeReplaceDim.TOKEN.value)
|
104 |
+
DEFAULT_REPLACE_ORDER = __(LABEL_REPLACE_ORDER, ModeReplaceOrder.RANDOM.value)
|
105 |
+
DEFAULT_UPSCALE = __(LABEL_UPSCALE, False)
|
106 |
+
DEFAULT_UPSCALE_METH = __(LABEL_UPSCALE_METH, 'Lanczos')
|
107 |
+
DEFAULT_UPSCALE_RATIO = __(LABEL_UPSCALE_RATIO, 2.0)
|
108 |
+
DEFAULT_UPSCALE_WIDTH = __(LABEL_UPSCALE_WIDTH, 0)
|
109 |
+
DEFAULT_UPSCALE_HEIGHT = __(LABEL_UPSCALE_HEIGHT, 0)
|
110 |
+
DEFAULT_VIDEO = __(LABEL_VIDEO, True)
|
111 |
+
DEFAULT_VIDEO_FPS = __(LABEL_VIDEO_FPS, 10)
|
112 |
+
DEFAULT_VIDEO_FMT = __(LABEL_VIDEO_FMT, VideoFormat.MP4.value)
|
113 |
+
DEFAULT_VIDEO_PAD = __(LABEL_VIDEO_PAD, 0)
|
114 |
+
DEFAULT_VIDEO_PICK = __(LABEL_VIDEO_PICK, '')
|
115 |
+
DEFAULT_DEPTH = __(LABEL_DEPTH, False)
|
116 |
+
|
117 |
+
CHOICES_MODE = [x.value for x in Mode]
|
118 |
+
CHOICES_LERP_METH = [x.value for x in LerpMethod]
|
119 |
+
CHOICES_GENESIS = [x.value for x in Gensis]
|
120 |
+
CHOICES_REPLACE_DIM = [x.value for x in ModeReplaceDim]
|
121 |
+
CHOICES_REPLACE_ORDER = [x.value for x in ModeReplaceOrder]
|
122 |
+
CHOICES_UPSCALER = [x.name for x in sd_upscalers]
|
123 |
+
CHOICES_VIDEO_FMT = [x.value for x in VideoFormat]
|
124 |
+
|
125 |
+
EPS = 1e-6
|
126 |
+
|
127 |
+
|
128 |
+
def cond_align(condA:Tensor, condB:Tensor) -> Tuple[Tensor, Tensor]:
|
129 |
+
d = condA.shape[0] - condB.shape[0]
|
130 |
+
if d < 0: condA = F.pad(condA, (0, 0, 0, -d))
|
131 |
+
elif d > 0: condB = F.pad(condB, (0, 0, 0, d))
|
132 |
+
return condA, condB
|
133 |
+
|
134 |
+
def wrap_cond_align(fn:Callable[..., Tensor]):
|
135 |
+
def wrapper(condA:Tensor, condB:Tensor, *args, **kwargs) -> Tensor:
|
136 |
+
condA, condB = cond_align(condA, condB)
|
137 |
+
return fn(condA, condB, *args, **kwargs)
|
138 |
+
return wrapper
|
139 |
+
|
140 |
+
@wrap_cond_align
|
141 |
+
def weighted_sum(condA:Tensor, condB:Tensor, alpha:float) -> Tensor:
|
142 |
+
''' linear interpolate on latent space of condition '''
|
143 |
+
|
144 |
+
return (1 - alpha) * condA + (alpha) * condB
|
145 |
+
|
146 |
+
@wrap_cond_align
|
147 |
+
def geometric_slerp(condA:Tensor, condB:Tensor, alpha:float) -> Tensor:
|
148 |
+
''' spherical linear interpolation on latent space of condition, ref: https://en.wikipedia.org/wiki/Slerp '''
|
149 |
+
|
150 |
+
A_n = condA / torch.norm(condA, dim=-1, keepdim=True) # [T=77, D=768]
|
151 |
+
B_n = condB / torch.norm(condB, dim=-1, keepdim=True)
|
152 |
+
|
153 |
+
dot = (A_n * B_n).sum(dim=-1, keepdim=True) # [T=77, D=1]
|
154 |
+
omega = torch.acos(dot) # [T=77, D=1]
|
155 |
+
so = torch.sin(omega) # [T=77, D=1]
|
156 |
+
|
157 |
+
slerp = (torch.sin((1 - alpha) * omega) / so) * condA + (torch.sin(alpha * omega) / so) * condB
|
158 |
+
|
159 |
+
mask: Tensor = dot > 0.9995 # [T=77, D=1]
|
160 |
+
if not mask.any():
|
161 |
+
return slerp
|
162 |
+
else:
|
163 |
+
lerp = (1 - alpha) * condA + (alpha) * condB
|
164 |
+
return torch.where(mask, lerp, slerp) # use simple lerp when angle very close to avoid NaN
|
165 |
+
|
166 |
+
@wrap_cond_align
|
167 |
+
def replace_until_match(condA:Tensor, condB:Tensor, count:int, dist:Tensor, order:str=ModeReplaceOrder.RANDOM) -> Tensor:
|
168 |
+
''' value substite on condition tensor; will inplace modify `dist` '''
|
169 |
+
|
170 |
+
def index_tensor_to_tuple(index:Tensor) -> Tuple[Tensor, ...]:
|
171 |
+
return tuple([index[..., i] for i in range(index.shape[-1])]) # tuple([nDiff], ...)
|
172 |
+
|
173 |
+
# mask: [T=77, D=768], [T=77] or [D=768]
|
174 |
+
mask = dist > EPS
|
175 |
+
# idx_diff: [nDiff, nDim=2] or [nDiff, nDim=1]
|
176 |
+
idx_diff = torch.nonzero(mask)
|
177 |
+
n_diff = len(idx_diff)
|
178 |
+
|
179 |
+
if order == ModeReplaceOrder.RANDOM:
|
180 |
+
sel = np.random.choice(range(n_diff), size=count, replace=False) if n_diff > count else slice(None)
|
181 |
+
else:
|
182 |
+
val_diff = dist[index_tensor_to_tuple(idx_diff)] # [nDiff]
|
183 |
+
|
184 |
+
if order == ModeReplaceOrder.SIMILAR:
|
185 |
+
sorted_index = val_diff.argsort()
|
186 |
+
elif order == ModeReplaceOrder.DIFFERENT:
|
187 |
+
sorted_index = val_diff.argsort(descending=True)
|
188 |
+
else: raise ValueError(f'unknown replace_order: {order}')
|
189 |
+
|
190 |
+
sel = sorted_index[:count]
|
191 |
+
|
192 |
+
idx_diff_sel = idx_diff[sel, ...] # [cnt] => [cnt, nDim]
|
193 |
+
idx_diff_sel_tp = index_tensor_to_tuple(idx_diff_sel)
|
194 |
+
dist[idx_diff_sel_tp] = 0.0
|
195 |
+
mask[idx_diff_sel_tp] = False
|
196 |
+
|
197 |
+
if mask.shape != condA.shape: # cond.shape = [T=77, D=768]
|
198 |
+
mask_len = mask.shape[0]
|
199 |
+
if mask_len == condA.shape[0]: mask = mask.unsqueeze(1)
|
200 |
+
elif mask_len == condA.shape[1]: mask = mask.unsqueeze(0)
|
201 |
+
else: raise ValueError(f'unknown mask.shape: {mask.shape}')
|
202 |
+
mask = mask.expand_as(condA)
|
203 |
+
|
204 |
+
return mask * condA + ~mask * condB
|
205 |
+
|
206 |
+
|
207 |
+
def get_next_sequence_number(path:str) -> int:
|
208 |
+
""" Determines and returns the next sequence number to use when saving an image in the specified directory. The sequence starts at 0. """
|
209 |
+
result = -1
|
210 |
+
dir = Path(path)
|
211 |
+
for file in dir.iterdir():
|
212 |
+
if not file.is_dir(): continue
|
213 |
+
try:
|
214 |
+
num = int(file.name)
|
215 |
+
if num > result: result = num
|
216 |
+
except ValueError:
|
217 |
+
pass
|
218 |
+
return result + 1
|
219 |
+
|
220 |
+
def update_img2img_p(p:Processing, imgs:PILImages, denoising_strength:float=0.75) -> ProcessingImg2Img:
|
221 |
+
if isinstance(p, ProcessingImg2Img):
|
222 |
+
p.init_images = imgs
|
223 |
+
p.denoising_strength = denoising_strength
|
224 |
+
return p
|
225 |
+
|
226 |
+
if isinstance(p, ProcessingTxt2Img):
|
227 |
+
KNOWN_KEYS = [ # see `StableDiffusionProcessing.__init__()`
|
228 |
+
'sd_model',
|
229 |
+
'outpath_samples',
|
230 |
+
'outpath_grids',
|
231 |
+
'prompt',
|
232 |
+
'styles',
|
233 |
+
'seed',
|
234 |
+
'subseed',
|
235 |
+
'subseed_strength',
|
236 |
+
'seed_resize_from_h',
|
237 |
+
'seed_resize_from_w',
|
238 |
+
'seed_enable_extras',
|
239 |
+
'sampler_name',
|
240 |
+
'batch_size',
|
241 |
+
'n_iter',
|
242 |
+
'steps',
|
243 |
+
'cfg_scale',
|
244 |
+
'width',
|
245 |
+
'height',
|
246 |
+
'restore_faces',
|
247 |
+
'tiling',
|
248 |
+
'do_not_save_samples',
|
249 |
+
'do_not_save_grid',
|
250 |
+
'extra_generation_params',
|
251 |
+
'overlay_images',
|
252 |
+
'negative_prompt',
|
253 |
+
'eta',
|
254 |
+
'do_not_reload_embeddings',
|
255 |
+
#'denoising_strength',
|
256 |
+
'ddim_discretize',
|
257 |
+
's_min_uncond',
|
258 |
+
's_churn',
|
259 |
+
's_tmax',
|
260 |
+
's_tmin',
|
261 |
+
's_noise',
|
262 |
+
'override_settings',
|
263 |
+
'override_settings_restore_afterwards',
|
264 |
+
'sampler_index',
|
265 |
+
'script_args',
|
266 |
+
]
|
267 |
+
kwargs = { k: getattr(p, k) for k in dir(p) if k in KNOWN_KEYS } # inherit params
|
268 |
+
return ProcessingImg2Img(
|
269 |
+
init_images=imgs,
|
270 |
+
denoising_strength=denoising_strength,
|
271 |
+
**kwargs,
|
272 |
+
)
|
273 |
+
|
274 |
+
def parse_slice(picker:str) -> Optional[slice]:
|
275 |
+
if not picker.strip(): return None
|
276 |
+
|
277 |
+
to_int = lambda s: None if not s else int(s)
|
278 |
+
segs = [to_int(x.strip()) for x in picker.strip().split(':')]
|
279 |
+
|
280 |
+
start, stop, step = None, None, None
|
281 |
+
if len(segs) == 1: stop, = segs
|
282 |
+
elif len(segs) == 2: start, stop = segs
|
283 |
+
elif len(segs) == 3: start, stop, step = segs
|
284 |
+
else: raise ValueError
|
285 |
+
|
286 |
+
return slice(start, stop, step)
|
287 |
+
|
288 |
+
def parse_resolution(width:int, height:int, upscale_ratio:float, upscale_width:int, upscale_height:int) -> Tuple[bool, Tuple[int, int]]:
|
289 |
+
if upscale_width == upscale_height == 0:
|
290 |
+
if upscale_ratio == 1.0:
|
291 |
+
return False, (width, height)
|
292 |
+
else:
|
293 |
+
return True, (round(width * upscale_ratio), round(height * upscale_ratio))
|
294 |
+
else:
|
295 |
+
if upscale_width == 0: upscale_width = round(width * upscale_height / height)
|
296 |
+
if upscale_height == 0: upscale_height = round(height * upscale_width / width)
|
297 |
+
return (width != upscale_width and height != upscale_height), (upscale_width, upscale_height)
|
298 |
+
|
299 |
+
|
300 |
+
def upscale_image(img:PILImage, width:int, height:int, upscale_meth:str, upscale_ratio:float, upscale_width:int, upscale_height:int) -> PILImage:
|
301 |
+
if upscale_meth == 'None': return img
|
302 |
+
need_upscale, (tgt_w, tgt_h) = parse_resolution(width, height, upscale_ratio, upscale_width, upscale_height)
|
303 |
+
if need_upscale:
|
304 |
+
if 'show_debug': print(f'>> upscale: ({width}, {height}) => ({tgt_w}, {tgt_h})')
|
305 |
+
|
306 |
+
if max(tgt_w / width, tgt_h / height) > 4: # must split into two rounds for NN model capatibility
|
307 |
+
hf_w, hf_h = round(width * 4), round(height * 4)
|
308 |
+
img = resize_image(0, img, hf_w, hf_h, upscaler_name=upscale_meth)
|
309 |
+
img = resize_image(0, img, tgt_w, tgt_h, upscaler_name=upscale_meth)
|
310 |
+
return img
|
311 |
+
|
312 |
+
def save_video(imgs:PILImages, video_slice:slice, video_pad:int, video_fps:float, video_fmt:VideoFormat, fbase:str):
|
313 |
+
if len(imgs) <= 1 or 'ImageSequenceClip' not in globals(): return
|
314 |
+
|
315 |
+
try:
|
316 |
+
# arrange frames
|
317 |
+
if video_slice: imgs = imgs[video_slice]
|
318 |
+
if video_pad > 0: imgs = [imgs[0]] * video_pad + imgs + [imgs[-1]] * video_pad
|
319 |
+
|
320 |
+
# export video
|
321 |
+
seq: List[np.ndarray] = [np.asarray(img) for img in imgs]
|
322 |
+
try:
|
323 |
+
clip = ImageSequenceClip(seq, fps=video_fps)
|
324 |
+
except: # images may have different size (small probability due to upscaler)
|
325 |
+
clip = concatenate_videoclips([ImageClip(img, duration=1/video_fps) for img in seq], method='compose')
|
326 |
+
clip.fps = video_fps
|
327 |
+
if video_fmt == VideoFormat.MP4: clip.write_videofile(fbase + '.mp4', verbose=False, audio=False)
|
328 |
+
elif video_fmt == VideoFormat.WEBM: clip.write_videofile(fbase + '.webm', verbose=False, audio=False)
|
329 |
+
elif video_fmt == VideoFormat.GIF: clip.write_gif (fbase + '.gif', loop=True)
|
330 |
+
except: print_exc()
|
331 |
+
|
332 |
+
|
333 |
+
class on_cfg_denoiser_wrapper:
|
334 |
+
def __init__(self, callback_fn:Callable):
|
335 |
+
self.callback_fn = callback_fn
|
336 |
+
def __enter__(self):
|
337 |
+
on_cfg_denoiser(self.callback_fn)
|
338 |
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
339 |
+
remove_callbacks_for_function(self.callback_fn)
|
340 |
+
|
341 |
+
class p_steps_overrider:
|
342 |
+
def __init__(self, p:Processing, steps:int=1):
|
343 |
+
self.p = p
|
344 |
+
self.steps = steps
|
345 |
+
self.steps_saved = self.p.steps
|
346 |
+
def __enter__(self):
|
347 |
+
self.p.steps = self.steps
|
348 |
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
349 |
+
self.p.steps = self.steps_saved
|
350 |
+
|
351 |
+
class p_save_samples_overrider:
|
352 |
+
def __init__(self, p:Processing, save:bool=True):
|
353 |
+
self.p = p
|
354 |
+
self.save = save
|
355 |
+
self.do_not_save_samples_saved = self.p.do_not_save_samples
|
356 |
+
def __enter__(self):
|
357 |
+
self.p.do_not_save_samples = not self.save
|
358 |
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
359 |
+
self.p.do_not_save_samples = self.do_not_save_samples_saved
|
360 |
+
|
361 |
+
def get_cond_callback(refs:List[TensorRef], params:CFGDenoiserParams):
|
362 |
+
if params.sampling_step > 0: return
|
363 |
+
values = [
|
364 |
+
params.text_cond, # [B=1, L= 77, D=768]
|
365 |
+
params.text_uncond, # [B=1, L=231, D=768]
|
366 |
+
]
|
367 |
+
for i, ref in enumerate(refs):
|
368 |
+
ref.value = values[i]
|
369 |
+
|
370 |
+
def set_cond_callback(refs:List[TensorRef], params:CFGDenoiserParams):
|
371 |
+
values = [
|
372 |
+
params.text_cond, # [B=1, L= 77, D=768]
|
373 |
+
params.text_uncond, # [B=1, L=231, D=768]
|
374 |
+
]
|
375 |
+
for i, ref in enumerate(refs):
|
376 |
+
values[i].data = ref.value
|
377 |
+
|
378 |
+
def get_latent_callback(ref:TensorRef, embryo_step:int, params:CFGDenoiserParams):
|
379 |
+
if params.sampling_step != embryo_step: return
|
380 |
+
ref.value = params.x
|
381 |
+
|
382 |
+
def set_latent_callback(ref:TensorRef, embryo_step:int, params:CFGDenoiserParams):
|
383 |
+
if params.sampling_step != embryo_step: return
|
384 |
+
params.x.data = ref.value
|
385 |
+
|
386 |
+
|
387 |
+
def switch_to_stage_binding_(self:'Script', i:int):
|
388 |
+
if 'show_debug':
|
389 |
+
print(f'[stage {i+1}/{self.n_stages}]')
|
390 |
+
print(f' pos prompt: {self.pos_prompts[i]}')
|
391 |
+
if hasattr(self, 'neg_prompts'):
|
392 |
+
print(f' neg prompt: {self.neg_prompts[i]}')
|
393 |
+
self.p.prompt = self.pos_prompts[i]
|
394 |
+
if hasattr(self, 'neg_prompts'):
|
395 |
+
self.p.negative_prompt = self.neg_prompts[i]
|
396 |
+
self.p.subseed = self.subseed
|
397 |
+
|
398 |
+
def process_p_binding_(self:'Script', append:bool=True, save:bool=True) -> PILImages:
|
399 |
+
assert hasattr(self, 'images') and hasattr(self, 'info'), 'unknown logic, "images" and "info" not initialized'
|
400 |
+
with p_save_samples_overrider(self.p, save):
|
401 |
+
proc = process_images(self.p)
|
402 |
+
if save:
|
403 |
+
if not self.info.value: self.info.value = proc.info
|
404 |
+
if append: self.images.extend(proc.images)
|
405 |
+
if self.genesis == Gensis.SUCCESSIVE:
|
406 |
+
self.p = update_img2img_p(self.p, self.images[-1:], self.denoise_w)
|
407 |
+
return proc.images
|
408 |
+
|
409 |
+
|
410 |
+
class Script(scripts.Script):
|
411 |
+
|
412 |
+
def title(self):
|
413 |
+
return 'Prompt Travel'
|
414 |
+
|
415 |
+
def describe(self):
|
416 |
+
return 'Travel from one prompt to another in the text encoder latent space.'
|
417 |
+
|
418 |
+
def show(self, is_img2img):
|
419 |
+
return True
|
420 |
+
|
421 |
+
def ui(self, is_img2img):
|
422 |
+
with gr.Row(variant='compact') as tab_mode:
|
423 |
+
mode = gr.Radio (label=LABEL_MODE, value=lambda: DEFAULT_MODE, choices=CHOICES_MODE)
|
424 |
+
lerp_meth = gr.Dropdown(label=LABEL_LERP_METH, value=lambda: DEFAULT_LERP_METH, choices=CHOICES_LERP_METH)
|
425 |
+
replace_dim = gr.Dropdown(label=LABEL_REPLACE_DIM, value=lambda: DEFAULT_REPLACE_DIM, choices=CHOICES_REPLACE_DIM, visible=False)
|
426 |
+
replace_order = gr.Dropdown(label=LABEL_REPLACE_ORDER, value=lambda: DEFAULT_REPLACE_ORDER, choices=CHOICES_REPLACE_ORDER, visible=False)
|
427 |
+
|
428 |
+
def switch_mode(mode:str):
|
429 |
+
show_meth = Mode(mode) == Mode.LINEAR
|
430 |
+
show_repl = Mode(mode) == Mode.REPLACE
|
431 |
+
return [gr_show(x) for x in [show_meth, show_repl, show_repl]]
|
432 |
+
mode.change(switch_mode, inputs=[mode], outputs=[lerp_meth, replace_dim, replace_order], show_progress=False)
|
433 |
+
|
434 |
+
with gr.Row(variant='compact') as tab_param:
|
435 |
+
steps = gr.Text (label=LABEL_STEPS, value=lambda: DEFAULT_STEPS, max_lines=1)
|
436 |
+
genesis = gr.Dropdown(label=LABEL_GENESIS, value=lambda: DEFAULT_GENESIS, choices=CHOICES_GENESIS)
|
437 |
+
denoise_w = gr.Slider (label=LABEL_DENOISE_W, value=lambda: DEFAULT_DENOISE_W, minimum=0.0, maximum=1.0, visible=False)
|
438 |
+
embryo_step = gr.Text (label=LABEL_EMBRYO_STEP, value=lambda: DEFAULT_EMBRYO_STEP, max_lines=1, visible=False)
|
439 |
+
|
440 |
+
def switch_genesis(genesis:str):
|
441 |
+
show_dw = Gensis(genesis) == Gensis.SUCCESSIVE # show 'denoise_w' for 'successive'
|
442 |
+
show_es = Gensis(genesis) == Gensis.EMBRYO # show 'embryo_step' for 'embryo'
|
443 |
+
return [gr_show(x) for x in [show_dw, show_es]]
|
444 |
+
genesis.change(switch_genesis, inputs=[genesis], outputs=[denoise_w, embryo_step], show_progress=False)
|
445 |
+
|
446 |
+
with gr.Row(variant='compact', visible=DEFAULT_DEPTH) as tab_ext_depth:
|
447 |
+
depth_img = gr.Image(label=LABEL_DEPTH_IMG, source='upload', type='pil', image_mode=None)
|
448 |
+
|
449 |
+
with gr.Row(variant='compact', visible=DEFAULT_UPSCALE) as tab_ext_upscale:
|
450 |
+
upscale_meth = gr.Dropdown(label=LABEL_UPSCALE_METH, value=lambda: DEFAULT_UPSCALE_METH, choices=CHOICES_UPSCALER)
|
451 |
+
upscale_ratio = gr.Slider (label=LABEL_UPSCALE_RATIO, value=lambda: DEFAULT_UPSCALE_RATIO, minimum=1.0, maximum=16.0, step=0.1)
|
452 |
+
upscale_width = gr.Slider (label=LABEL_UPSCALE_WIDTH, value=lambda: DEFAULT_UPSCALE_WIDTH, minimum=0, maximum=2048, step=8)
|
453 |
+
upscale_height = gr.Slider (label=LABEL_UPSCALE_HEIGHT, value=lambda: DEFAULT_UPSCALE_HEIGHT, minimum=0, maximum=2048, step=8)
|
454 |
+
|
455 |
+
with gr.Row(variant='compact', visible=DEFAULT_VIDEO) as tab_ext_video:
|
456 |
+
video_fmt = gr.Dropdown(label=LABEL_VIDEO_FMT, value=lambda: DEFAULT_VIDEO_FMT, choices=CHOICES_VIDEO_FMT)
|
457 |
+
video_fps = gr.Number (label=LABEL_VIDEO_FPS, value=lambda: DEFAULT_VIDEO_FPS)
|
458 |
+
video_pad = gr.Number (label=LABEL_VIDEO_PAD, value=lambda: DEFAULT_VIDEO_PAD, precision=0)
|
459 |
+
video_pick = gr.Text (label=LABEL_VIDEO_PICK, value=lambda: DEFAULT_VIDEO_PICK, max_lines=1)
|
460 |
+
|
461 |
+
with gr.Row(variant='compact') as tab_ext:
|
462 |
+
ext_video = gr.Checkbox(label=LABEL_VIDEO, value=lambda: DEFAULT_VIDEO)
|
463 |
+
ext_upscale = gr.Checkbox(label=LABEL_UPSCALE, value=lambda: DEFAULT_UPSCALE)
|
464 |
+
ext_depth = gr.Checkbox(label=LABEL_DEPTH, value=lambda: DEFAULT_DEPTH)
|
465 |
+
|
466 |
+
ext_video .change(gr_show, inputs=ext_video, outputs=tab_ext_video, show_progress=False)
|
467 |
+
ext_upscale.change(gr_show, inputs=ext_upscale, outputs=tab_ext_upscale, show_progress=False)
|
468 |
+
ext_depth .change(gr_show, inputs=ext_depth, outputs=tab_ext_depth, show_progress=False)
|
469 |
+
|
470 |
+
return [
|
471 |
+
mode, lerp_meth, replace_dim, replace_order,
|
472 |
+
steps, genesis, denoise_w, embryo_step,
|
473 |
+
depth_img,
|
474 |
+
upscale_meth, upscale_ratio, upscale_width, upscale_height,
|
475 |
+
video_fmt, video_fps, video_pad, video_pick,
|
476 |
+
ext_video, ext_upscale, ext_depth,
|
477 |
+
]
|
478 |
+
|
479 |
+
def run(self, p:Processing,
|
480 |
+
mode:str, lerp_meth:str, replace_dim:str, replace_order:str,
|
481 |
+
steps:str, genesis:str, denoise_w:float, embryo_step:str,
|
482 |
+
depth_img:PILImage,
|
483 |
+
upscale_meth:str, upscale_ratio:float, upscale_width:int, upscale_height:int,
|
484 |
+
video_fmt:str, video_fps:float, video_pad:int, video_pick:str,
|
485 |
+
ext_video:bool, ext_upscale:bool, ext_depth:bool,
|
486 |
+
):
|
487 |
+
|
488 |
+
# enum lookup
|
489 |
+
mode: Mode = Mode(mode)
|
490 |
+
lerp_meth: LerpMethod = LerpMethod(lerp_meth)
|
491 |
+
replace_dim: ModeReplaceDim = ModeReplaceDim(replace_dim)
|
492 |
+
replace_order: ModeReplaceOrder = ModeReplaceOrder(replace_order)
|
493 |
+
genesis: Gensis = Gensis(genesis)
|
494 |
+
video_fmt: VideoFormat = VideoFormat(video_fmt)
|
495 |
+
|
496 |
+
# Param check & type convert
|
497 |
+
if ext_video:
|
498 |
+
if video_pad < 0: return Processed(p, [], p.seed, f'video_pad must >= 0, but got {video_pad}')
|
499 |
+
if video_fps <= 0: return Processed(p, [], p.seed, f'video_fps must > 0, but got {video_fps}')
|
500 |
+
try: video_slice = parse_slice(video_pick)
|
501 |
+
except: return Processed(p, [], p.seed, 'syntax error in video_slice')
|
502 |
+
if genesis == Gensis.EMBRYO:
|
503 |
+
try: x = float(embryo_step)
|
504 |
+
except: return Processed(p, [], p.seed, f'embryo_step is not a number: {embryo_step}')
|
505 |
+
if x <= 0: return Processed(p, [], p.seed, f'embryo_step must > 0, but got {embryo_step}')
|
506 |
+
embryo_step: int = round(x * p.steps if x < 1.0 else x) ; del x
|
507 |
+
|
508 |
+
# Prepare prompts & steps
|
509 |
+
prompt_pos = p.prompt.strip()
|
510 |
+
if not prompt_pos: return Processed(p, [], p.seed, 'positive prompt should not be empty :(')
|
511 |
+
pos_prompts = [p.strip() for p in prompt_pos.split('\n') if p.strip()]
|
512 |
+
if len(pos_prompts) == 1: return Processed(p, [], p.seed, 'should specify at least two lines of prompt to travel between :(')
|
513 |
+
if genesis == Gensis.EMBRYO and len(pos_prompts) > 2: return Processed(p, [], p.seed, 'processing with "embryo" genesis takes exactly two lines of prompt :(')
|
514 |
+
prompt_neg = p.negative_prompt.strip()
|
515 |
+
neg_prompts = [p.strip() for p in prompt_neg.split('\n') if p.strip()]
|
516 |
+
if len(neg_prompts) == 0: neg_prompts = ['']
|
517 |
+
n_stages = max(len(pos_prompts), len(neg_prompts))
|
518 |
+
while len(pos_prompts) < n_stages: pos_prompts.append(pos_prompts[-1])
|
519 |
+
while len(neg_prompts) < n_stages: neg_prompts.append(neg_prompts[-1])
|
520 |
+
|
521 |
+
try: steps: List[int] = [int(s.strip()) for s in steps.strip().split(',')]
|
522 |
+
except: return Processed(p, [], p.seed, f'cannot parse steps option: {steps}')
|
523 |
+
if len(steps) == 1:
|
524 |
+
steps = [steps[0]] * (n_stages - 1)
|
525 |
+
elif len(steps) != n_stages - 1:
|
526 |
+
return Processed(p, [], p.seed, f'stage count mismatch: you have {n_stages} prompt stages, but specified {len(steps)} steps; should assure len(steps) == len(stages) - 1')
|
527 |
+
n_frames = sum(steps) + n_stages
|
528 |
+
if 'show_debug':
|
529 |
+
print('n_stages:', n_stages)
|
530 |
+
print('n_frames:', n_frames)
|
531 |
+
print('steps:', steps)
|
532 |
+
steps.insert(0, -1) # fixup the first stage
|
533 |
+
|
534 |
+
# Custom saving path
|
535 |
+
travel_path = os.path.join(p.outpath_samples, 'prompt_travel')
|
536 |
+
os.makedirs(travel_path, exist_ok=True)
|
537 |
+
travel_number = get_next_sequence_number(travel_path)
|
538 |
+
self.log_dp = os.path.join(travel_path, f'{travel_number:05}')
|
539 |
+
p.outpath_samples = self.log_dp
|
540 |
+
os.makedirs(self.log_dp, exist_ok=True)
|
541 |
+
#self.log_fp = os.path.join(self.log_dp, 'log.txt')
|
542 |
+
|
543 |
+
# Force batch count and batch size to 1
|
544 |
+
p.n_iter = 1
|
545 |
+
p.batch_size = 1
|
546 |
+
|
547 |
+
# Random unified const seed
|
548 |
+
p.seed = get_fixed_seed(p.seed) # fix it to assure all processes using the same major seed
|
549 |
+
self.subseed = p.subseed # stash it to allow using random subseed for each process (when -1)
|
550 |
+
if 'show_debug':
|
551 |
+
print('seed:', p.seed)
|
552 |
+
print('subseed:', p.subseed)
|
553 |
+
print('subseed_strength:', p.subseed_strength)
|
554 |
+
|
555 |
+
# Start job
|
556 |
+
state.job_count = n_frames
|
557 |
+
|
558 |
+
# Pack parameters
|
559 |
+
self.pos_prompts = pos_prompts
|
560 |
+
self.neg_prompts = neg_prompts
|
561 |
+
self.steps = steps
|
562 |
+
self.genesis = genesis
|
563 |
+
self.denoise_w = denoise_w
|
564 |
+
self.embryo_step = embryo_step
|
565 |
+
self.lerp_meth = lerp_meth
|
566 |
+
self.replace_dim = replace_dim
|
567 |
+
self.replace_order = replace_order
|
568 |
+
self.n_stages = n_stages
|
569 |
+
self.n_frames = n_frames
|
570 |
+
|
571 |
+
def upscale_image_callback(params:ImageSaveParams):
|
572 |
+
params.image = upscale_image(params.image, p.width, p.height, upscale_meth, upscale_ratio, upscale_width, upscale_height)
|
573 |
+
|
574 |
+
# Dispatch
|
575 |
+
self.p: Processing = p
|
576 |
+
self.images: PILImages = []
|
577 |
+
self.info: StrRef = Ref()
|
578 |
+
try:
|
579 |
+
if ext_upscale: on_before_image_saved(upscale_image_callback)
|
580 |
+
if ext_depth: self.ext_depth_preprocess(p, depth_img)
|
581 |
+
|
582 |
+
runner = getattr(self, f'run_{mode.value}')
|
583 |
+
if not runner: return Processed(p, [], p.seed, f'no runner found for mode: {mode.value}')
|
584 |
+
runner()
|
585 |
+
except:
|
586 |
+
e = format_exc()
|
587 |
+
print(e)
|
588 |
+
self.info.value = e
|
589 |
+
finally:
|
590 |
+
if ext_depth: self.ext_depth_postprocess(p, depth_img)
|
591 |
+
if ext_upscale: remove_callbacks_for_function(upscale_image_callback)
|
592 |
+
|
593 |
+
# Save video
|
594 |
+
if ext_video: save_video(self.images, video_slice, video_pad, video_fps, video_fmt, os.path.join(self.log_dp, f'travel-{travel_number:05}'))
|
595 |
+
|
596 |
+
return Processed(p, self.images, p.seed, self.info.value)
|
597 |
+
|
598 |
+
def run_linear(self):
|
599 |
+
# dispatch for special case
|
600 |
+
if self.genesis == Gensis.EMBRYO: return self.run_linear_embryo()
|
601 |
+
|
602 |
+
lerp_fn = weighted_sum if self.lerp_meth == LerpMethod.LERP else geometric_slerp
|
603 |
+
|
604 |
+
if 'auxiliary':
|
605 |
+
switch_to_stage = partial(switch_to_stage_binding_, self)
|
606 |
+
process_p = partial(process_p_binding_, self)
|
607 |
+
|
608 |
+
from_pos_hidden: TensorRef = Ref()
|
609 |
+
from_neg_hidden: TensorRef = Ref()
|
610 |
+
to_pos_hidden: TensorRef = Ref()
|
611 |
+
to_neg_hidden: TensorRef = Ref()
|
612 |
+
inter_pos_hidden: TensorRef = Ref()
|
613 |
+
inter_neg_hidden: TensorRef = Ref()
|
614 |
+
|
615 |
+
# Step 1: draw the init image
|
616 |
+
switch_to_stage(0)
|
617 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [from_pos_hidden, from_neg_hidden])):
|
618 |
+
process_p()
|
619 |
+
|
620 |
+
# travel through stages
|
621 |
+
for i in range(1, self.n_stages):
|
622 |
+
if state.interrupted: break
|
623 |
+
|
624 |
+
state.job = f'{i}/{self.n_frames}'
|
625 |
+
state.job_no = i + 1
|
626 |
+
|
627 |
+
# only change target prompts
|
628 |
+
switch_to_stage(i)
|
629 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [to_pos_hidden, to_neg_hidden])):
|
630 |
+
if self.genesis == Gensis.FIXED:
|
631 |
+
imgs = process_p(append=False) # stash it to make order right
|
632 |
+
elif self.genesis == Gensis.SUCCESSIVE:
|
633 |
+
with p_steps_overrider(self.p, steps=1): # ignore final image, only need cond
|
634 |
+
process_p(save=False, append=False)
|
635 |
+
else: raise ValueError(f'invalid genesis: {self.genesis.value}')
|
636 |
+
|
637 |
+
# Step 2: draw the interpolated images
|
638 |
+
is_break_iter = False
|
639 |
+
n_inter = self.steps[i]
|
640 |
+
for t in range(1, n_inter + (1 if self.genesis == Gensis.SUCCESSIVE else 0)):
|
641 |
+
if state.interrupted: is_break_iter = True ; break
|
642 |
+
|
643 |
+
alpha = t / n_inter # [1/T, 2/T, .. T-1/T] (+ [T/T])?
|
644 |
+
inter_pos_hidden.value = lerp_fn(from_pos_hidden.value, to_pos_hidden.value, alpha)
|
645 |
+
inter_neg_hidden.value = lerp_fn(from_neg_hidden.value, to_neg_hidden.value, alpha)
|
646 |
+
with on_cfg_denoiser_wrapper(partial(set_cond_callback, [inter_pos_hidden, inter_neg_hidden])):
|
647 |
+
process_p()
|
648 |
+
|
649 |
+
if is_break_iter: break
|
650 |
+
|
651 |
+
# Step 3: append the final stage
|
652 |
+
if self.genesis != Gensis.SUCCESSIVE: self.images.extend(imgs)
|
653 |
+
# move to next stage
|
654 |
+
from_pos_hidden.value, from_neg_hidden.value = to_pos_hidden.value, to_neg_hidden.value
|
655 |
+
inter_pos_hidden.value, inter_neg_hidden.value = None, None
|
656 |
+
|
657 |
+
def run_linear_embryo(self):
|
658 |
+
''' NOTE: this procedure has special logic, we separate it from run_linear() so far '''
|
659 |
+
|
660 |
+
lerp_fn = weighted_sum if self.lerp_meth == LerpMethod.LERP else geometric_slerp
|
661 |
+
n_frames = self.steps[1] + 2
|
662 |
+
|
663 |
+
if 'auxiliary':
|
664 |
+
switch_to_stage = partial(switch_to_stage_binding_, self)
|
665 |
+
process_p = partial(process_p_binding_, self)
|
666 |
+
|
667 |
+
from_pos_hidden: TensorRef = Ref()
|
668 |
+
to_pos_hidden: TensorRef = Ref()
|
669 |
+
inter_pos_hidden: TensorRef = Ref()
|
670 |
+
embryo: TensorRef = Ref() # latent image, the common half-denoised prototype of all frames
|
671 |
+
|
672 |
+
# Step 1: get starting & ending condition
|
673 |
+
switch_to_stage(0)
|
674 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [from_pos_hidden])):
|
675 |
+
with p_steps_overrider(self.p, steps=1):
|
676 |
+
process_p(save=False)
|
677 |
+
switch_to_stage(1)
|
678 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [to_pos_hidden])):
|
679 |
+
with p_steps_overrider(self.p, steps=1):
|
680 |
+
process_p(save=False)
|
681 |
+
|
682 |
+
# Step 2: get the condition middle-point as embryo then hatch it halfway
|
683 |
+
inter_pos_hidden.value = lerp_fn(from_pos_hidden.value, to_pos_hidden.value, 0.5)
|
684 |
+
with on_cfg_denoiser_wrapper(partial(set_cond_callback, [inter_pos_hidden])):
|
685 |
+
with on_cfg_denoiser_wrapper(partial(get_latent_callback, embryo, self.embryo_step)):
|
686 |
+
process_p(save=False)
|
687 |
+
try:
|
688 |
+
img: PILImage = single_sample_to_image(embryo.value[0], approximation=-1) # the data is duplicated, just get first item
|
689 |
+
img.save(os.path.join(self.log_dp, 'embryo.png'))
|
690 |
+
except: pass
|
691 |
+
|
692 |
+
# Step 3: derive the embryo towards each interpolated condition
|
693 |
+
for t in range(0, n_frames+1):
|
694 |
+
if state.interrupted: break
|
695 |
+
|
696 |
+
alpha = t / n_frames # [0, 1/T, 2/T, .. T-1/T, 1]
|
697 |
+
inter_pos_hidden.value = lerp_fn(from_pos_hidden.value, to_pos_hidden.value, alpha)
|
698 |
+
with on_cfg_denoiser_wrapper(partial(set_cond_callback, [inter_pos_hidden])):
|
699 |
+
with on_cfg_denoiser_wrapper(partial(set_latent_callback, embryo, self.embryo_step)):
|
700 |
+
process_p()
|
701 |
+
|
702 |
+
def run_replace(self):
|
703 |
+
''' yet another replace method, but do replacing on the condition tensor by token dim or channel dim '''
|
704 |
+
|
705 |
+
if self.genesis == Gensis.EMBRYO: raise NotImplementedError(f'genesis {self.genesis.value!r} is only supported in linear mode currently :(')
|
706 |
+
|
707 |
+
if 'auxiliary':
|
708 |
+
switch_to_stage = partial(switch_to_stage_binding_, self)
|
709 |
+
process_p = partial(process_p_binding_, self)
|
710 |
+
|
711 |
+
from_pos_hidden: TensorRef = Ref()
|
712 |
+
to_pos_hidden: TensorRef = Ref()
|
713 |
+
inter_pos_hidden: TensorRef = Ref()
|
714 |
+
|
715 |
+
# Step 1: draw the init image
|
716 |
+
switch_to_stage(0)
|
717 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [from_pos_hidden])):
|
718 |
+
process_p()
|
719 |
+
|
720 |
+
# travel through stages
|
721 |
+
for i in range(1, self.n_stages):
|
722 |
+
if state.interrupted: break
|
723 |
+
|
724 |
+
state.job = f'{i}/{self.n_frames}'
|
725 |
+
state.job_no = i + 1
|
726 |
+
|
727 |
+
# only change target prompts
|
728 |
+
switch_to_stage(i)
|
729 |
+
with on_cfg_denoiser_wrapper(partial(get_cond_callback, [to_pos_hidden])):
|
730 |
+
if self.genesis == Gensis.FIXED:
|
731 |
+
imgs = process_p(append=False) # stash it to make order right
|
732 |
+
elif self.genesis == Gensis.SUCCESSIVE:
|
733 |
+
with p_steps_overrider(self.p, steps=1): # ignore final image, only need cond
|
734 |
+
process_p(save=False, append=False)
|
735 |
+
else: raise ValueError(f'invalid genesis: {self.genesis.value}')
|
736 |
+
|
737 |
+
# ========== ↓↓↓ major differences from run_linear() ↓↓↓ ==========
|
738 |
+
|
739 |
+
# decide change portion in each iter
|
740 |
+
L1 = torch.abs(from_pos_hidden.value - to_pos_hidden.value)
|
741 |
+
if self.replace_dim == ModeReplaceDim.RANDOM:
|
742 |
+
dist = L1 # [T=77, D=768]
|
743 |
+
elif self.replace_dim == ModeReplaceDim.TOKEN:
|
744 |
+
dist = L1.mean(axis=1) # [T=77]
|
745 |
+
elif self.replace_dim == ModeReplaceDim.CHANNEL:
|
746 |
+
dist = L1.mean(axis=0) # [D=768]
|
747 |
+
else: raise ValueError(f'unknown replace_dim: {self.replace_dim}')
|
748 |
+
mask = dist > EPS
|
749 |
+
dist = torch.where(mask, dist, 0.0)
|
750 |
+
n_diff = mask.sum().item() # when value differs we have mask==True
|
751 |
+
n_inter = self.steps[i] + 1
|
752 |
+
replace_count = int(n_diff / n_inter) + 1 # => accumulative modifies [1/T, 2/T, .. T-1/T] of total cond
|
753 |
+
|
754 |
+
# Step 2: draw the replaced images
|
755 |
+
inter_pos_hidden.value = from_pos_hidden.value
|
756 |
+
is_break_iter = False
|
757 |
+
for _ in range(1, n_inter):
|
758 |
+
if state.interrupted: is_break_iter = True ; break
|
759 |
+
|
760 |
+
inter_pos_hidden.value = replace_until_match(inter_pos_hidden.value, to_pos_hidden.value, replace_count, dist=dist, order=self.replace_order)
|
761 |
+
with on_cfg_denoiser_wrapper(partial(set_cond_callback, [inter_pos_hidden])):
|
762 |
+
process_p()
|
763 |
+
|
764 |
+
# ========== ↑↑↑ major differences from run_linear() ↑↑↑ ==========
|
765 |
+
|
766 |
+
if is_break_iter: break
|
767 |
+
|
768 |
+
# Step 3: append the final stage
|
769 |
+
if self.genesis != Gensis.SUCCESSIVE: self.images.extend(imgs)
|
770 |
+
# move to next stage
|
771 |
+
from_pos_hidden.value = to_pos_hidden.value
|
772 |
+
inter_pos_hidden.value = None
|
773 |
+
|
774 |
+
''' ↓↓↓ extension support ↓↓↓ '''
|
775 |
+
|
776 |
+
def ext_depth_preprocess(self, p:Processing, depth_img:PILImage): # copy from repo `AnonymousCervine/depth-image-io-for-SDWebui`
|
777 |
+
from types import MethodType
|
778 |
+
from einops import repeat, rearrange
|
779 |
+
import modules.shared as shared
|
780 |
+
import modules.devices as devices
|
781 |
+
|
782 |
+
def sanitize_pil_image_mode(img):
|
783 |
+
if img.mode in {'P', 'CMYK', 'HSV'}:
|
784 |
+
img = img.convert(mode='RGB')
|
785 |
+
return img
|
786 |
+
|
787 |
+
def alt_depth_image_conditioning(self, source_image):
|
788 |
+
with devices.autocast():
|
789 |
+
conditioning_image = self.sd_model.get_first_stage_encoding(self.sd_model.encode_first_stage(source_image))
|
790 |
+
depth_data = np.array(sanitize_pil_image_mode(depth_img))
|
791 |
+
|
792 |
+
if len(np.shape(depth_data)) == 2:
|
793 |
+
depth_data = rearrange(depth_data, "h w -> 1 1 h w")
|
794 |
+
else:
|
795 |
+
depth_data = rearrange(depth_data, "h w c -> c 1 1 h w")[0]
|
796 |
+
depth_data = torch.from_numpy(depth_data).to(device=shared.device).to(dtype=torch.float32)
|
797 |
+
depth_data = repeat(depth_data, "1 ... -> n ...", n=self.batch_size)
|
798 |
+
|
799 |
+
conditioning = torch.nn.functional.interpolate(
|
800 |
+
depth_data,
|
801 |
+
size=conditioning_image.shape[2:],
|
802 |
+
mode="bicubic",
|
803 |
+
align_corners=False,
|
804 |
+
)
|
805 |
+
(depth_min, depth_max) = torch.aminmax(conditioning)
|
806 |
+
conditioning = 2. * (conditioning - depth_min) / (depth_max - depth_min) - 1.
|
807 |
+
return conditioning
|
808 |
+
|
809 |
+
p.depth2img_image_conditioning = MethodType(alt_depth_image_conditioning, p)
|
810 |
+
|
811 |
+
def alt_txt2img_image_conditioning(self, x, width=None, height=None):
|
812 |
+
fake_img = torch.zeros(1, 3, height or self.height, width or self.width).to(shared.device).type(self.sd_model.dtype)
|
813 |
+
return self.depth2img_image_conditioning(fake_img)
|
814 |
+
|
815 |
+
p.txt2img_image_conditioning = MethodType(alt_txt2img_image_conditioning, p)
|
816 |
+
|
817 |
+
def ext_depth_postprocess(self, p:Processing, depth_img:PILImage):
|
818 |
+
depth_img.close()
|
tools/README.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Put your post-processing tools or linkings here.
|
2 |
+
|
3 |
+
The directory layout should be like:
|
4 |
+
|
5 |
+
tools
|
6 |
+
├── install.cmd
|
7 |
+
├── link.cmd
|
8 |
+
├── busybox.exe
|
9 |
+
├── realesrgan-ncnn-vulkan
|
10 |
+
│ ├── realesrgan-ncnn-vulkan.exe # executable
|
11 |
+
│ └── models # model checkpoints
|
12 |
+
│ ├── *.bin
|
13 |
+
│ ├── *.param
|
14 |
+
│ └── *.pth
|
15 |
+
├── rife-ncnn-vulkan
|
16 |
+
│ ├── rife-ncnn-vulkan.exe # executable
|
17 |
+
│ └── rife* # model checkpoints
|
18 |
+
│ ├── *.bin
|
19 |
+
│ ├── *.param
|
20 |
+
│ └── *.pth
|
21 |
+
└── ffmpeg
|
22 |
+
└── bin
|
23 |
+
├── ffmpeg.exe # executable
|
24 |
+
├── ffplay.exe
|
25 |
+
└── ffprobe.exe
|
tools/install.cmd
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@REM Auto download and setup post-process tools
|
2 |
+
@ECHO OFF
|
3 |
+
SETLOCAL
|
4 |
+
|
5 |
+
REM Usage: install.cmd install and keep .downloaded folder
|
6 |
+
REM install.cmd -c install and clean .downloaded folder
|
7 |
+
|
8 |
+
TITLE Install tools for post-process...
|
9 |
+
CD %~dp0
|
10 |
+
|
11 |
+
REM paths to web resources
|
12 |
+
SET CURL_BIN=curl.exe -L -C -
|
13 |
+
|
14 |
+
SET BBOX_URL=https://frippery.org/files/busybox/busybox.exe
|
15 |
+
SET BBOX_BIN=busybox.exe
|
16 |
+
SET UNZIP_BIN=%BBOX_BIN% unzip
|
17 |
+
|
18 |
+
SET RESR_URL=https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesrgan-ncnn-vulkan-20220424-windows.zip
|
19 |
+
SET RESR_ZIP=realesrgan-ncnn-vulkan.zip
|
20 |
+
SET RESR_DIR=realesrgan-ncnn-vulkan
|
21 |
+
|
22 |
+
SET RIFE_URL=https://github.com/nihui/rife-ncnn-vulkan/releases/download/20221029/rife-ncnn-vulkan-20221029-windows.zip
|
23 |
+
SET RIFE_ZIP=rife-ncnn-vulkan.zip
|
24 |
+
SET RIFE_DIR=rife-ncnn-vulkan
|
25 |
+
SET RIFE_RDIR=rife-ncnn-vulkan-20221029-windows
|
26 |
+
|
27 |
+
SET FFMPEG_URL=https://github.com/GyanD/codexffmpeg/releases/download/5.1.2/ffmpeg-5.1.2-full_build-shared.zip
|
28 |
+
SET FFMPEG_ZIP=ffmpeg.zip
|
29 |
+
SET FFMPEG_DIR=ffmpeg
|
30 |
+
SET FFMPEG_RDIR=ffmpeg-5.1.2-full_build-shared
|
31 |
+
|
32 |
+
REM make cache tmpdir
|
33 |
+
SET DOWNLOAD_DIR=.download
|
34 |
+
IF NOT EXIST %DOWNLOAD_DIR% MKDIR %DOWNLOAD_DIR%
|
35 |
+
ATTRIB +H %DOWNLOAD_DIR%
|
36 |
+
|
37 |
+
REM start installation
|
38 |
+
ECHO ==================================================
|
39 |
+
|
40 |
+
ECHO [0/3] download BusyBox
|
41 |
+
IF EXIST %BBOX_BIN% GOTO skip_bbox
|
42 |
+
%CURL_BIN% %BBOX_URL% -o %BBOX_BIN%
|
43 |
+
:skip_bbox
|
44 |
+
|
45 |
+
ECHO ==================================================
|
46 |
+
|
47 |
+
ECHO [1/3] install Real-ESRGAN
|
48 |
+
IF EXIST %RESR_DIR% GOTO skip_resr
|
49 |
+
IF EXIST %DOWNLOAD_DIR%\%RESR_ZIP% GOTO skip_dl_resr
|
50 |
+
ECHO ^>^> download from %RESR_URL%
|
51 |
+
%CURL_BIN% %RESR_URL% -o %DOWNLOAD_DIR%\%RESR_ZIP%
|
52 |
+
IF ERRORLEVEL 1 GOTO die
|
53 |
+
:skip_dl_resr
|
54 |
+
ECHO ^>^> uzip %RESR_ZIP%
|
55 |
+
MKDIR %RESR_DIR%
|
56 |
+
%UNZIP_BIN% %DOWNLOAD_DIR%\%RESR_ZIP% -d %RESR_DIR%
|
57 |
+
IF ERRORLEVEL 1 GOTO die
|
58 |
+
:skip_resr
|
59 |
+
|
60 |
+
ECHO ==================================================
|
61 |
+
|
62 |
+
ECHO [2/3] install RIFE
|
63 |
+
IF EXIST %RIFE_DIR% GOTO skip_rife
|
64 |
+
IF EXIST %DOWNLOAD_DIR%\%RIFE_ZIP% GOTO skip_dl_rife
|
65 |
+
ECHO ^>^> download from %RIFE_URL%
|
66 |
+
%CURL_BIN% %RIFE_URL% -o %DOWNLOAD_DIR%\%RIFE_ZIP%
|
67 |
+
IF ERRORLEVEL 1 GOTO die
|
68 |
+
:skip_dl_rife
|
69 |
+
ECHO ^>^> uzip %RIFE_ZIP%
|
70 |
+
%UNZIP_BIN% %DOWNLOAD_DIR%\%RIFE_ZIP%
|
71 |
+
IF ERRORLEVEL 1 GOTO die
|
72 |
+
RENAME %RIFE_RDIR% %RIFE_DIR%
|
73 |
+
:skip_rife
|
74 |
+
|
75 |
+
ECHO ==================================================
|
76 |
+
|
77 |
+
ECHO [3/3] install FFmpeg
|
78 |
+
IF EXIST %FFMPEG_DIR% GOTO skip_ffmpeg
|
79 |
+
IF EXIST %DOWNLOAD_DIR%\%FFMPEG_ZIP% GOTO skip_dl_ffmpeg
|
80 |
+
ECHO ^>^> download from %FFMPEG_URL%
|
81 |
+
%CURL_BIN% %FFMPEG_URL% -o %DOWNLOAD_DIR%\%FFMPEG_ZIP%
|
82 |
+
IF ERRORLEVEL 1 GOTO die
|
83 |
+
:skip_dl_ffmpeg
|
84 |
+
ECHO ^>^> uzip %FFMPEG_ZIP%
|
85 |
+
%UNZIP_BIN% %DOWNLOAD_DIR%\%FFMPEG_ZIP%
|
86 |
+
IF ERRORLEVEL 1 GOTO die
|
87 |
+
RENAME %FFMPEG_RDIR% %FFMPEG_DIR%
|
88 |
+
:skip_ffmpeg
|
89 |
+
|
90 |
+
ECHO ==================================================
|
91 |
+
|
92 |
+
REM clean cache
|
93 |
+
IF /I "%~1"=="-c" (
|
94 |
+
ATTRIB -H %DOWNLOAD_DIR%
|
95 |
+
RMDIR /S /Q %DOWNLOAD_DIR%
|
96 |
+
)
|
97 |
+
|
98 |
+
REM finished
|
99 |
+
ECHO ^>^> Done!
|
100 |
+
ECHO.
|
101 |
+
GOTO :end
|
102 |
+
|
103 |
+
REM error handle
|
104 |
+
:die
|
105 |
+
ECHO ^<^< Error!
|
106 |
+
ECHO ^<^< errorlevel: %ERRORLEVEL%
|
107 |
+
|
108 |
+
:end
|
109 |
+
PAUSE
|
tools/link.cmd
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@REM Make soft links to post-process tools
|
2 |
+
@ECHO OFF
|
3 |
+
SETLOCAL
|
4 |
+
|
5 |
+
SET RESR_HOME=D:\tools\realesrgan-ncnn-vulkan
|
6 |
+
SET RIFE_HOME=D:\tools\rife-ncnn-vulkan
|
7 |
+
SET FFMPEG_HOME=D:\tools\ffmpeg
|
8 |
+
|
9 |
+
@ECHO ON
|
10 |
+
|
11 |
+
PUSHD %~dp0
|
12 |
+
MKLINK /J realesrgan-ncnn-vulkan %RESR_HOME%
|
13 |
+
MKLINK /J rife-ncnn-vulkan %RIFE_HOME%
|
14 |
+
MKLINK /J ffmpeg %FFMPEG_HOME%
|
15 |
+
POPD
|
16 |
+
|
17 |
+
ECHO ^>^> Done!
|
18 |
+
ECHO.
|
19 |
+
|
20 |
+
PAUSE
|