JasonSmithSO commited on
Commit
74f4a06
·
verified ·
1 Parent(s): 2f7e25d

Upload 48 files

Browse files
Files changed (49) hide show
  1. .gitattributes +1 -0
  2. LICENSE.txt +201 -0
  3. NotoSans-Regular.ttf +3 -0
  4. README.md +252 -12
  5. __init__.py +214 -0
  6. dev_interface.py +6 -0
  7. hint_image_enchance.py +233 -0
  8. install.bat +20 -0
  9. log.py +80 -0
  10. lvminthin.py +87 -0
  11. node_wrappers/anime_face_segment.py +43 -0
  12. node_wrappers/anyline.py +87 -0
  13. node_wrappers/binary.py +29 -0
  14. node_wrappers/canny.py +30 -0
  15. node_wrappers/color.py +26 -0
  16. node_wrappers/densepose.py +31 -0
  17. node_wrappers/depth_anything.py +55 -0
  18. node_wrappers/depth_anything_v2.py +56 -0
  19. node_wrappers/diffusion_edge.py +41 -0
  20. node_wrappers/dsine.py +31 -0
  21. node_wrappers/dwpose.py +160 -0
  22. node_wrappers/hed.py +53 -0
  23. node_wrappers/inpaint.py +27 -0
  24. node_wrappers/leres.py +32 -0
  25. node_wrappers/lineart.py +30 -0
  26. node_wrappers/lineart_anime.py +27 -0
  27. node_wrappers/lineart_standard.py +27 -0
  28. node_wrappers/manga_line.py +27 -0
  29. node_wrappers/mediapipe_face.py +39 -0
  30. node_wrappers/mesh_graphormer.py +158 -0
  31. node_wrappers/metric3d.py +57 -0
  32. node_wrappers/midas.py +59 -0
  33. node_wrappers/mlsd.py +31 -0
  34. node_wrappers/normalbae.py +27 -0
  35. node_wrappers/oneformer.py +50 -0
  36. node_wrappers/openpose.py +46 -0
  37. node_wrappers/pidinet.py +30 -0
  38. node_wrappers/pose_keypoint_postprocess.py +340 -0
  39. node_wrappers/recolor.py +46 -0
  40. node_wrappers/scribble.py +74 -0
  41. node_wrappers/segment_anything.py +27 -0
  42. node_wrappers/shuffle.py +27 -0
  43. node_wrappers/teed.py +30 -0
  44. node_wrappers/tile.py +73 -0
  45. node_wrappers/uniformer.py +29 -0
  46. node_wrappers/unimatch.py +75 -0
  47. node_wrappers/zoe.py +27 -0
  48. pyproject.toml +14 -0
  49. requirements.txt +24 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  comfyui_screenshot.png filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  comfyui_screenshot.png filter=lfs diff=lfs merge=lfs -text
37
+ NotoSans-Regular.ttf filter=lfs diff=lfs merge=lfs -text
LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
NotoSans-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b04c8dd65af6b73eb4279472ed1580b29102d6496a377340e80a40cdb3b22c9
3
+ size 455188
README.md CHANGED
@@ -1,12 +1,252 @@
1
- ---
2
- title: FooocusEnhanced
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.37.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ComfyUI's ControlNet Auxiliary Preprocessors
2
+ Plug-and-play [ComfyUI](https://github.com/comfyanonymous/ComfyUI) node sets for making [ControlNet](https://github.com/lllyasviel/ControlNet/) hint images
3
+
4
+ "anime style, a protest in the street, cyberpunk city, a woman with pink hair and golden eyes (looking at the viewer) is holding a sign with the text "ComfyUI ControlNet Aux" in bold, neon pink" on Flux.1 Dev
5
+
6
+ ![](./examples/CNAuxBanner.jpg)
7
+
8
+ The code is copy-pasted from the respective folders in https://github.com/lllyasviel/ControlNet/tree/main/annotator and connected to [the 🤗 Hub](https://huggingface.co/lllyasviel/Annotators).
9
+
10
+ All credit & copyright goes to https://github.com/lllyasviel.
11
+
12
+ # Updates
13
+ Go to [Update page](./UPDATES.md) to follow updates
14
+
15
+ # Installation:
16
+ ## Using ComfyUI Manager (recommended):
17
+ Install [ComfyUI Manager](https://github.com/ltdrdata/ComfyUI-Manager) and do steps introduced there to install this repo.
18
+
19
+ ## Alternative:
20
+ If you're running on Linux, or non-admin account on windows you'll want to ensure `/ComfyUI/custom_nodes` and `comfyui_controlnet_aux` has write permissions.
21
+
22
+ There is now a **install.bat** you can run to install to portable if detected. Otherwise it will default to system and assume you followed ConfyUI's manual installation steps.
23
+
24
+ If you can't run **install.bat** (e.g. you are a Linux user). Open the CMD/Shell and do the following:
25
+ - Navigate to your `/ComfyUI/custom_nodes/` folder
26
+ - Run `git clone https://github.com/Fannovel16/comfyui_controlnet_aux/`
27
+ - Navigate to your `comfyui_controlnet_aux` folder
28
+ - Portable/venv:
29
+ - Run `path/to/ComfUI/python_embedded/python.exe -s -m pip install -r requirements.txt`
30
+ - With system python
31
+ - Run `pip install -r requirements.txt`
32
+ - Start ComfyUI
33
+
34
+ # Nodes
35
+ Please note that this repo only supports preprocessors making hint images (e.g. stickman, canny edge, etc).
36
+ All preprocessors except Inpaint are intergrated into `AIO Aux Preprocessor` node.
37
+ This node allow you to quickly get the preprocessor but a preprocessor's own threshold parameters won't be able to set.
38
+ You need to use its node directly to set thresholds.
39
+
40
+ # Nodes (sections are categories in Comfy menu)
41
+ ## Line Extractors
42
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
43
+ |-----------------------------|---------------------------|-------------------------------------------|
44
+ | Binary Lines | binary | control_scribble |
45
+ | Canny Edge | canny | control_v11p_sd15_canny <br> control_canny <br> t2iadapter_canny |
46
+ | HED Soft-Edge Lines | hed | control_v11p_sd15_softedge <br> control_hed |
47
+ | Standard Lineart | standard_lineart | control_v11p_sd15_lineart |
48
+ | Realistic Lineart | lineart (or `lineart_coarse` if `coarse` is enabled) | control_v11p_sd15_lineart |
49
+ | Anime Lineart | lineart_anime | control_v11p_sd15s2_lineart_anime |
50
+ | Manga Lineart | lineart_anime_denoise | control_v11p_sd15s2_lineart_anime |
51
+ | M-LSD Lines | mlsd | control_v11p_sd15_mlsd <br> control_mlsd |
52
+ | PiDiNet Soft-Edge Lines | pidinet | control_v11p_sd15_softedge <br> control_scribble |
53
+ | Scribble Lines | scribble | control_v11p_sd15_scribble <br> control_scribble |
54
+ | Scribble XDoG Lines | scribble_xdog | control_v11p_sd15_scribble <br> control_scribble |
55
+ | Fake Scribble Lines | scribble_hed | control_v11p_sd15_scribble <br> control_scribble |
56
+ | TEED Soft-Edge Lines | teed | [controlnet-sd-xl-1.0-softedge-dexined](https://huggingface.co/SargeZT/controlnet-sd-xl-1.0-softedge-dexined/blob/main/controlnet-sd-xl-1.0-softedge-dexined.safetensors) <br> control_v11p_sd15_softedge (Theoretically)
57
+ | Scribble PiDiNet Lines | scribble_pidinet | control_v11p_sd15_scribble <br> control_scribble |
58
+ | AnyLine Lineart | | mistoLine_fp16.safetensors <br> mistoLine_rank256 <br> control_v11p_sd15s2_lineart_anime <br> control_v11p_sd15_lineart |
59
+
60
+ ## Normal and Depth Estimators
61
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
62
+ |-----------------------------|---------------------------|-------------------------------------------|
63
+ | MiDaS Depth Map | (normal) depth | control_v11f1p_sd15_depth <br> control_depth <br> t2iadapter_depth |
64
+ | LeReS Depth Map | depth_leres | control_v11f1p_sd15_depth <br> control_depth <br> t2iadapter_depth |
65
+ | Zoe Depth Map | depth_zoe | control_v11f1p_sd15_depth <br> control_depth <br> t2iadapter_depth |
66
+ | MiDaS Normal Map | normal_map | control_normal |
67
+ | BAE Normal Map | normal_bae | control_v11p_sd15_normalbae |
68
+ | MeshGraphormer Hand Refiner ([HandRefinder](https://github.com/wenquanlu/HandRefiner)) | depth_hand_refiner | [control_sd15_inpaint_depth_hand_fp16](https://huggingface.co/hr16/ControlNet-HandRefiner-pruned/blob/main/control_sd15_inpaint_depth_hand_fp16.safetensors) |
69
+ | Depth Anything | depth_anything | [Depth-Anything](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints_controlnet/diffusion_pytorch_model.safetensors) |
70
+ | Zoe Depth Anything <br> (Basically Zoe but the encoder is replaced with DepthAnything) | depth_anything | [Depth-Anything](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints_controlnet/diffusion_pytorch_model.safetensors) |
71
+ | Normal DSINE | | control_normal/control_v11p_sd15_normalbae |
72
+ | Metric3D Depth | | control_v11f1p_sd15_depth <br> control_depth <br> t2iadapter_depth |
73
+ | Metric3D Normal | | control_v11p_sd15_normalbae |
74
+ | Depth Anything V2 | | [Depth-Anything](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints_controlnet/diffusion_pytorch_model.safetensors) |
75
+
76
+ ## Faces and Poses Estimators
77
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
78
+ |-----------------------------|---------------------------|-------------------------------------------|
79
+ | DWPose Estimator | dw_openpose_full | control_v11p_sd15_openpose <br> control_openpose <br> t2iadapter_openpose |
80
+ | OpenPose Estimator | openpose (detect_body) <br> openpose_hand (detect_body + detect_hand) <br> openpose_faceonly (detect_face) <br> openpose_full (detect_hand + detect_body + detect_face) | control_v11p_sd15_openpose <br> control_openpose <br> t2iadapter_openpose |
81
+ | MediaPipe Face Mesh | mediapipe_face | controlnet_sd21_laion_face_v2 |
82
+ | Animal Estimator | animal_openpose | [control_sd15_animal_openpose_fp16](https://huggingface.co/huchenlei/animal_openpose/blob/main/control_sd15_animal_openpose_fp16.pth) |
83
+
84
+ ## Optical Flow Estimators
85
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
86
+ |-----------------------------|---------------------------|-------------------------------------------|
87
+ | Unimatch Optical Flow | | [DragNUWA](https://github.com/ProjectNUWA/DragNUWA) |
88
+
89
+ ### How to get OpenPose-format JSON?
90
+ #### User-side
91
+ This workflow will save images to ComfyUI's output folder (the same location as output images). If you haven't found `Save Pose Keypoints` node, update this extension
92
+ ![](./examples/example_save_kps.png)
93
+
94
+ #### Dev-side
95
+ An array of [OpenPose-format JSON](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/02_output.md#json-output-format) corresponsding to each frame in an IMAGE batch can be gotten from DWPose and OpenPose using `app.nodeOutputs` on the UI or `/history` API endpoint. JSON output from AnimalPose uses a kinda similar format to OpenPose JSON:
96
+ ```
97
+ [
98
+ {
99
+ "version": "ap10k",
100
+ "animals": [
101
+ [[x1, y1, 1], [x2, y2, 1],..., [x17, y17, 1]],
102
+ [[x1, y1, 1], [x2, y2, 1],..., [x17, y17, 1]],
103
+ ...
104
+ ],
105
+ "canvas_height": 512,
106
+ "canvas_width": 768
107
+ },
108
+ ...
109
+ ]
110
+ ```
111
+
112
+ For extension developers (e.g. Openpose editor):
113
+ ```js
114
+ const poseNodes = app.graph._nodes.filter(node => ["OpenposePreprocessor", "DWPreprocessor", "AnimalPosePreprocessor"].includes(node.type))
115
+ for (const poseNode of poseNodes) {
116
+ const openposeResults = JSON.parse(app.nodeOutputs[poseNode.id].openpose_json[0])
117
+ console.log(openposeResults) //An array containing Openpose JSON for each frame
118
+ }
119
+ ```
120
+
121
+ For API users:
122
+ Javascript
123
+ ```js
124
+ import fetch from "node-fetch" //Remember to add "type": "module" to "package.json"
125
+ async function main() {
126
+ const promptId = '792c1905-ecfe-41f4-8114-83e6a4a09a9f' //Too lazy to POST /queue
127
+ let history = await fetch(`http://127.0.0.1:8188/history/${promptId}`).then(re => re.json())
128
+ history = history[promptId]
129
+ const nodeOutputs = Object.values(history.outputs).filter(output => output.openpose_json)
130
+ for (const nodeOutput of nodeOutputs) {
131
+ const openposeResults = JSON.parse(nodeOutput.openpose_json[0])
132
+ console.log(openposeResults) //An array containing Openpose JSON for each frame
133
+ }
134
+ }
135
+ main()
136
+ ```
137
+
138
+ Python
139
+ ```py
140
+ import json, urllib.request
141
+
142
+ server_address = "127.0.0.1:8188"
143
+ prompt_id = '' #Too lazy to POST /queue
144
+
145
+ def get_history(prompt_id):
146
+ with urllib.request.urlopen("http://{}/history/{}".format(server_address, prompt_id)) as response:
147
+ return json.loads(response.read())
148
+
149
+ history = get_history(prompt_id)[prompt_id]
150
+ for o in history['outputs']:
151
+ for node_id in history['outputs']:
152
+ node_output = history['outputs'][node_id]
153
+ if 'openpose_json' in node_output:
154
+ print(json.loads(node_output['openpose_json'][0])) #An list containing Openpose JSON for each frame
155
+ ```
156
+ ## Semantic Segmentation
157
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
158
+ |-----------------------------|---------------------------|-------------------------------------------|
159
+ | OneFormer ADE20K Segmentor | oneformer_ade20k | control_v11p_sd15_seg |
160
+ | OneFormer COCO Segmentor | oneformer_coco | control_v11p_sd15_seg |
161
+ | UniFormer Segmentor | segmentation |control_sd15_seg <br> control_v11p_sd15_seg|
162
+
163
+ ## T2IAdapter-only
164
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
165
+ |-----------------------------|---------------------------|-------------------------------------------|
166
+ | Color Pallete | color | t2iadapter_color |
167
+ | Content Shuffle | shuffle | t2iadapter_style |
168
+
169
+ ## Recolor
170
+ | Preprocessor Node | sd-webui-controlnet/other | ControlNet/T2I-Adapter |
171
+ |-----------------------------|---------------------------|-------------------------------------------|
172
+ | Image Luminance | recolor_luminance | [ioclab_sd15_recolor](https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/ioclab_sd15_recolor.safetensors) <br> [sai_xl_recolor_256lora](https://huggingface.co/lllyasviel/sd_control_collection/resolve/main/sai_xl_recolor_256lora.safetensors) <br> [bdsqlsz_controlllite_xl_recolor_luminance](https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/resolve/main/bdsqlsz_controlllite_xl_recolor_luminance.safetensors) |
173
+ | Image Intensity | recolor_intensity | Idk. Maybe same as above? |
174
+
175
+ # Examples
176
+ > A picture is worth a thousand words
177
+
178
+ ![](./examples/ExecuteAll1.jpg)
179
+ ![](./examples/ExecuteAll2.jpg)
180
+
181
+ # Testing workflow
182
+ https://github.com/Fannovel16/comfyui_controlnet_aux/blob/main/examples/ExecuteAll.png
183
+ Input image: https://github.com/Fannovel16/comfyui_controlnet_aux/blob/main/examples/comfyui-controlnet-aux-logo.png
184
+
185
+ # Q&A:
186
+ ## Why some nodes doesn't appear after I installed this repo?
187
+
188
+ This repo has a new mechanism which will skip any custom node can't be imported. If you meet this case, please create a issue on [Issues tab](https://github.com/Fannovel16/comfyui_controlnet_aux/issues) with the log from the command line.
189
+
190
+ ## DWPose/AnimalPose only uses CPU so it's so slow. How can I make it use GPU?
191
+ There are two ways to speed-up DWPose: using TorchScript checkpoints (.torchscript.pt) checkpoints or ONNXRuntime (.onnx). TorchScript way is little bit slower than ONNXRuntime but doesn't require any additional library and still way way faster than CPU.
192
+
193
+ A torchscript bbox detector is compatiable with an onnx pose estimator and vice versa.
194
+ ### TorchScript
195
+ Set `bbox_detector` and `pose_estimator` according to this picture. You can try other bbox detector endings with `.torchscript.pt` to reduce bbox detection time if input images are ideal.
196
+ ![](./examples/example_torchscript.png)
197
+ ### ONNXRuntime
198
+ If onnxruntime is installed successfully and the checkpoint used endings with `.onnx`, it will replace default cv2 backend to take advantage of GPU. Note that if you are using NVidia card, this method currently can only works on CUDA 11.8 (ComfyUI_windows_portable_nvidia_cu118_or_cpu.7z) unless you compile onnxruntime yourself.
199
+
200
+ 1. Know your onnxruntime build:
201
+ * * NVidia CUDA 11.x or bellow/AMD GPU: `onnxruntime-gpu`
202
+ * * NVidia CUDA 12.x: `onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/`
203
+ * * DirectML: `onnxruntime-directml`
204
+ * * OpenVINO: `onnxruntime-openvino`
205
+
206
+ Note that if this is your first time using ComfyUI, please test if it can run on your device before doing next steps.
207
+
208
+ 2. Add it into `requirements.txt`
209
+
210
+ 3. Run `install.bat` or pip command mentioned in Installation
211
+
212
+ ![](./examples/example_onnx.png)
213
+
214
+ # Assets files of preprocessors
215
+ * anime_face_segment: [bdsqlsz/qinglong_controlnet-lllite/Annotators/UNet.pth](https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/blob/main/Annotators/UNet.pth), [anime-seg/isnetis.ckpt](https://huggingface.co/skytnt/anime-seg/blob/main/isnetis.ckpt)
216
+ * densepose: [LayerNorm/DensePose-TorchScript-with-hint-image/densepose_r50_fpn_dl.torchscript](https://huggingface.co/LayerNorm/DensePose-TorchScript-with-hint-image/blob/main/densepose_r50_fpn_dl.torchscript)
217
+ * dwpose:
218
+ * * bbox_detector: Either [yzd-v/DWPose/yolox_l.onnx](https://huggingface.co/yzd-v/DWPose/blob/main/yolox_l.onnx), [hr16/yolox-onnx/yolox_l.torchscript.pt](https://huggingface.co/hr16/yolox-onnx/blob/main/yolox_l.torchscript.pt), [hr16/yolo-nas-fp16/yolo_nas_l_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_l_fp16.onnx), [hr16/yolo-nas-fp16/yolo_nas_m_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_m_fp16.onnx), [hr16/yolo-nas-fp16/yolo_nas_s_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_s_fp16.onnx)
219
+ * * pose_estimator: Either [hr16/DWPose-TorchScript-BatchSize5/dw-ll_ucoco_384_bs5.torchscript.pt](https://huggingface.co/hr16/DWPose-TorchScript-BatchSize5/blob/main/dw-ll_ucoco_384_bs5.torchscript.pt), [yzd-v/DWPose/dw-ll_ucoco_384.onnx](https://huggingface.co/yzd-v/DWPose/blob/main/dw-ll_ucoco_384.onnx)
220
+ * animal_pose (ap10k):
221
+ * * bbox_detector: Either [yzd-v/DWPose/yolox_l.onnx](https://huggingface.co/yzd-v/DWPose/blob/main/yolox_l.onnx), [hr16/yolox-onnx/yolox_l.torchscript.pt](https://huggingface.co/hr16/yolox-onnx/blob/main/yolox_l.torchscript.pt), [hr16/yolo-nas-fp16/yolo_nas_l_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_l_fp16.onnx), [hr16/yolo-nas-fp16/yolo_nas_m_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_m_fp16.onnx), [hr16/yolo-nas-fp16/yolo_nas_s_fp16.onnx](https://huggingface.co/hr16/yolo-nas-fp16/blob/main/yolo_nas_s_fp16.onnx)
222
+ * * pose_estimator: Either [hr16/DWPose-TorchScript-BatchSize5/rtmpose-m_ap10k_256_bs5.torchscript.pt](https://huggingface.co/hr16/DWPose-TorchScript-BatchSize5/blob/main/rtmpose-m_ap10k_256_bs5.torchscript.pt), [hr16/UnJIT-DWPose/rtmpose-m_ap10k_256.onnx](https://huggingface.co/hr16/UnJIT-DWPose/blob/main/rtmpose-m_ap10k_256.onnx)
223
+ * hed: [lllyasviel/Annotators/ControlNetHED.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/ControlNetHED.pth)
224
+ * leres: [lllyasviel/Annotators/res101.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/res101.pth), [lllyasviel/Annotators/latest_net_G.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/latest_net_G.pth)
225
+ * lineart: [lllyasviel/Annotators/sk_model.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/sk_model.pth), [lllyasviel/Annotators/sk_model2.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/sk_model2.pth)
226
+ * lineart_anime: [lllyasviel/Annotators/netG.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/netG.pth)
227
+ * manga_line: [lllyasviel/Annotators/erika.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/erika.pth)
228
+ * mesh_graphormer: [hr16/ControlNet-HandRefiner-pruned/graphormer_hand_state_dict.bin](https://huggingface.co/hr16/ControlNet-HandRefiner-pruned/blob/main/graphormer_hand_state_dict.bin), [hr16/ControlNet-HandRefiner-pruned/hrnetv2_w64_imagenet_pretrained.pth](https://huggingface.co/hr16/ControlNet-HandRefiner-pruned/blob/main/hrnetv2_w64_imagenet_pretrained.pth)
229
+ * midas: [lllyasviel/Annotators/dpt_hybrid-midas-501f0c75.pt](https://huggingface.co/lllyasviel/Annotators/blob/main/dpt_hybrid-midas-501f0c75.pt)
230
+ * mlsd: [lllyasviel/Annotators/mlsd_large_512_fp32.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/mlsd_large_512_fp32.pth)
231
+ * normalbae: [lllyasviel/Annotators/scannet.pt](https://huggingface.co/lllyasviel/Annotators/blob/main/scannet.pt)
232
+ * oneformer: [lllyasviel/Annotators/250_16_swin_l_oneformer_ade20k_160k.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/250_16_swin_l_oneformer_ade20k_160k.pth)
233
+ * open_pose: [lllyasviel/Annotators/body_pose_model.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/body_pose_model.pth), [lllyasviel/Annotators/hand_pose_model.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/hand_pose_model.pth), [lllyasviel/Annotators/facenet.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/facenet.pth)
234
+ * pidi: [lllyasviel/Annotators/table5_pidinet.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/table5_pidinet.pth)
235
+ * sam: [dhkim2810/MobileSAM/mobile_sam.pt](https://huggingface.co/dhkim2810/MobileSAM/blob/main/mobile_sam.pt)
236
+ * uniformer: [lllyasviel/Annotators/upernet_global_small.pth](https://huggingface.co/lllyasviel/Annotators/blob/main/upernet_global_small.pth)
237
+ * zoe: [lllyasviel/Annotators/ZoeD_M12_N.pt](https://huggingface.co/lllyasviel/Annotators/blob/main/ZoeD_M12_N.pt)
238
+ * teed: [bdsqlsz/qinglong_controlnet-lllite/7_model.pth](https://huggingface.co/bdsqlsz/qinglong_controlnet-lllite/blob/main/Annotators/7_model.pth)
239
+ * depth_anything: Either [LiheYoung/Depth-Anything/checkpoints/depth_anything_vitl14.pth](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitl14.pth), [LiheYoung/Depth-Anything/checkpoints/depth_anything_vitb14.pth](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vitb14.pth) or [LiheYoung/Depth-Anything/checkpoints/depth_anything_vits14.pth](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints/depth_anything_vits14.pth)
240
+ * diffusion_edge: Either [hr16/Diffusion-Edge/diffusion_edge_indoor.pt](https://huggingface.co/hr16/Diffusion-Edge/blob/main/diffusion_edge_indoor.pt), [hr16/Diffusion-Edge/diffusion_edge_urban.pt](https://huggingface.co/hr16/Diffusion-Edge/blob/main/diffusion_edge_urban.pt) or [hr16/Diffusion-Edge/diffusion_edge_natrual.pt](https://huggingface.co/hr16/Diffusion-Edge/blob/main/diffusion_edge_natrual.pt)
241
+ * unimatch: Either [hr16/Unimatch/gmflow-scale2-regrefine6-mixdata.pth](https://huggingface.co/hr16/Unimatch/blob/main/gmflow-scale2-regrefine6-mixdata.pth), [hr16/Unimatch/gmflow-scale2-mixdata.pth](https://huggingface.co/hr16/Unimatch/blob/main/gmflow-scale2-mixdata.pth) or [hr16/Unimatch/gmflow-scale1-mixdata.pth](https://huggingface.co/hr16/Unimatch/blob/main/gmflow-scale1-mixdata.pth)
242
+ * zoe_depth_anything: Either [LiheYoung/Depth-Anything/checkpoints_metric_depth/depth_anything_metric_depth_indoor.pt](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints_metric_depth/depth_anything_metric_depth_indoor.pt) or [LiheYoung/Depth-Anything/checkpoints_metric_depth/depth_anything_metric_depth_outdoor.pt](https://huggingface.co/spaces/LiheYoung/Depth-Anything/blob/main/checkpoints_metric_depth/depth_anything_metric_depth_outdoor.pt)
243
+ # 1500 Stars 😄
244
+ <a href="https://star-history.com/#Fannovel16/comfyui_controlnet_aux&Date">
245
+ <picture>
246
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=Fannovel16/comfyui_controlnet_aux&type=Date&theme=dark" />
247
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=Fannovel16/comfyui_controlnet_aux&type=Date" />
248
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=Fannovel16/comfyui_controlnet_aux&type=Date" />
249
+ </picture>
250
+ </a>
251
+
252
+ Thanks for yalls supports. I never thought the graph for stars would be linear lol.
__init__.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys, os
2
+ from .utils import here, define_preprocessor_inputs, INPUT
3
+ from pathlib import Path
4
+ import traceback
5
+ import importlib
6
+ from .log import log, blue_text, cyan_text, get_summary, get_label
7
+ from .hint_image_enchance import NODE_CLASS_MAPPINGS as HIE_NODE_CLASS_MAPPINGS
8
+ from .hint_image_enchance import NODE_DISPLAY_NAME_MAPPINGS as HIE_NODE_DISPLAY_NAME_MAPPINGS
9
+ #Ref: https://github.com/comfyanonymous/ComfyUI/blob/76d53c4622fc06372975ed2a43ad345935b8a551/nodes.py#L17
10
+ sys.path.insert(0, str(Path(here, "src").resolve()))
11
+ for pkg_name in ["custom_controlnet_aux", "custom_mmpkg"]:
12
+ sys.path.append(str(Path(here, "src", pkg_name).resolve()))
13
+
14
+ #Enable CPU fallback for ops not being supported by MPS like upsample_bicubic2d.out
15
+ #https://github.com/pytorch/pytorch/issues/77764
16
+ #https://github.com/Fannovel16/comfyui_controlnet_aux/issues/2#issuecomment-1763579485
17
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = os.getenv("PYTORCH_ENABLE_MPS_FALLBACK", '1')
18
+
19
+
20
+ def load_nodes():
21
+ shorted_errors = []
22
+ full_error_messages = []
23
+ node_class_mappings = {}
24
+ node_display_name_mappings = {}
25
+
26
+ for filename in (here / "node_wrappers").iterdir():
27
+ module_name = filename.stem
28
+ if module_name.startswith('.'): continue #Skip hidden files created by the OS (e.g. [.DS_Store](https://en.wikipedia.org/wiki/.DS_Store))
29
+ try:
30
+ module = importlib.import_module(
31
+ f".node_wrappers.{module_name}", package=__package__
32
+ )
33
+ node_class_mappings.update(getattr(module, "NODE_CLASS_MAPPINGS"))
34
+ if hasattr(module, "NODE_DISPLAY_NAME_MAPPINGS"):
35
+ node_display_name_mappings.update(getattr(module, "NODE_DISPLAY_NAME_MAPPINGS"))
36
+
37
+ log.debug(f"Imported {module_name} nodes")
38
+
39
+ except AttributeError:
40
+ pass # wip nodes
41
+ except Exception:
42
+ error_message = traceback.format_exc()
43
+ full_error_messages.append(error_message)
44
+ error_message = error_message.splitlines()[-1]
45
+ shorted_errors.append(
46
+ f"Failed to import module {module_name} because {error_message}"
47
+ )
48
+
49
+ if len(shorted_errors) > 0:
50
+ full_err_log = '\n\n'.join(full_error_messages)
51
+ print(f"\n\nFull error log from comfyui_controlnet_aux: \n{full_err_log}\n\n")
52
+ log.info(
53
+ f"Some nodes failed to load:\n\t"
54
+ + "\n\t".join(shorted_errors)
55
+ + "\n\n"
56
+ + "Check that you properly installed the dependencies.\n"
57
+ + "If you think this is a bug, please report it on the github page (https://github.com/Fannovel16/comfyui_controlnet_aux/issues)"
58
+ )
59
+ return node_class_mappings, node_display_name_mappings
60
+
61
+ AUX_NODE_MAPPINGS, AUX_DISPLAY_NAME_MAPPINGS = load_nodes()
62
+
63
+ #For nodes not mapping image to image or has special requirements
64
+ AIO_NOT_SUPPORTED = ["InpaintPreprocessor", "MeshGraphormer+ImpactDetector-DepthMapPreprocessor", "DiffusionEdge_Preprocessor"]
65
+ AIO_NOT_SUPPORTED += ["SavePoseKpsAsJsonFile", "FacialPartColoringFromPoseKps", "UpperBodyTrackingFromPoseKps", "RenderPeopleKps", "RenderAnimalKps"]
66
+ AIO_NOT_SUPPORTED += ["Unimatch_OptFlowPreprocessor", "MaskOptFlow"]
67
+
68
+ def preprocessor_options():
69
+ auxs = list(AUX_NODE_MAPPINGS.keys())
70
+ auxs.insert(0, "none")
71
+ for name in AIO_NOT_SUPPORTED:
72
+ if name in auxs:
73
+ auxs.remove(name)
74
+ return auxs
75
+
76
+
77
+ PREPROCESSOR_OPTIONS = preprocessor_options()
78
+
79
+ class AIO_Preprocessor:
80
+ @classmethod
81
+ def INPUT_TYPES(s):
82
+ return define_preprocessor_inputs(
83
+ preprocessor=INPUT.COMBO(PREPROCESSOR_OPTIONS, default="none"),
84
+ resolution=INPUT.RESOLUTION()
85
+ )
86
+
87
+ RETURN_TYPES = ("IMAGE",)
88
+ FUNCTION = "execute"
89
+
90
+ CATEGORY = "ControlNet Preprocessors"
91
+
92
+ def execute(self, preprocessor, image, resolution=512):
93
+ if preprocessor == "none":
94
+ return (image, )
95
+ else:
96
+ aux_class = AUX_NODE_MAPPINGS[preprocessor]
97
+ input_types = aux_class.INPUT_TYPES()
98
+ input_types = {
99
+ **input_types["required"],
100
+ **(input_types["optional"] if "optional" in input_types else {})
101
+ }
102
+ params = {}
103
+ for name, input_type in input_types.items():
104
+ if name == "image":
105
+ params[name] = image
106
+ continue
107
+
108
+ if name == "resolution":
109
+ params[name] = resolution
110
+ continue
111
+
112
+ if len(input_type) == 2 and ("default" in input_type[1]):
113
+ params[name] = input_type[1]["default"]
114
+ continue
115
+
116
+ default_values = { "INT": 0, "FLOAT": 0.0 }
117
+ if input_type[0] in default_values:
118
+ params[name] = default_values[input_type[0]]
119
+
120
+ return getattr(aux_class(), aux_class.FUNCTION)(**params)
121
+
122
+ class ControlNetAuxSimpleAddText:
123
+ @classmethod
124
+ def INPUT_TYPES(s):
125
+ return dict(
126
+ required=dict(image=INPUT.IMAGE(), text=INPUT.STRING())
127
+ )
128
+
129
+ RETURN_TYPES = ("IMAGE",)
130
+ FUNCTION = "execute"
131
+ CATEGORY = "ControlNet Preprocessors"
132
+ def execute(self, image, text):
133
+ from PIL import Image, ImageDraw, ImageFont
134
+ import numpy as np
135
+ import torch
136
+
137
+ font = ImageFont.truetype(str((here / "NotoSans-Regular.ttf").resolve()), 40)
138
+ img = Image.fromarray(image[0].cpu().numpy().__mul__(255.).astype(np.uint8))
139
+ ImageDraw.Draw(img).text((0,0), text, fill=(0,255,0), font=font)
140
+ return (torch.from_numpy(np.array(img)).unsqueeze(0) / 255.,)
141
+
142
+ class ExecuteAllControlNetPreprocessors:
143
+ @classmethod
144
+ def INPUT_TYPES(s):
145
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
146
+ RETURN_TYPES = ("IMAGE",)
147
+ FUNCTION = "execute"
148
+
149
+ CATEGORY = "ControlNet Preprocessors"
150
+
151
+ def execute(self, image, resolution=512):
152
+ try:
153
+ from comfy_execution.graph_utils import GraphBuilder
154
+ except:
155
+ raise RuntimeError("ExecuteAllControlNetPreprocessor requries [Execution Model Inversion](https://github.com/comfyanonymous/ComfyUI/commit/5cfe38). Update ComfyUI/SwarmUI to get this feature")
156
+
157
+ graph = GraphBuilder()
158
+ curr_outputs = []
159
+ for preprocc in PREPROCESSOR_OPTIONS:
160
+ preprocc_node = graph.node("AIO_Preprocessor", preprocessor=preprocc, image=image, resolution=resolution)
161
+ hint_img = preprocc_node.out(0)
162
+ add_text_node = graph.node("ControlNetAuxSimpleAddText", image=hint_img, text=preprocc)
163
+ curr_outputs.append(add_text_node.out(0))
164
+
165
+ while len(curr_outputs) > 1:
166
+ _outputs = []
167
+ for i in range(0, len(curr_outputs), 2):
168
+ if i+1 < len(curr_outputs):
169
+ image_batch = graph.node("ImageBatch", image1=curr_outputs[i], image2=curr_outputs[i+1])
170
+ _outputs.append(image_batch.out(0))
171
+ else:
172
+ _outputs.append(curr_outputs[i])
173
+ curr_outputs = _outputs
174
+
175
+ return {
176
+ "result": (curr_outputs[0],),
177
+ "expand": graph.finalize(),
178
+ }
179
+
180
+ class ControlNetPreprocessorSelector:
181
+ @classmethod
182
+ def INPUT_TYPES(s):
183
+ return {
184
+ "required": {
185
+ "preprocessor": (PREPROCESSOR_OPTIONS,),
186
+ }
187
+ }
188
+
189
+ RETURN_TYPES = (PREPROCESSOR_OPTIONS,)
190
+ RETURN_NAMES = ("preprocessor",)
191
+ FUNCTION = "get_preprocessor"
192
+
193
+ CATEGORY = "ControlNet Preprocessors"
194
+
195
+ def get_preprocessor(self, preprocessor: str):
196
+ return (preprocessor,)
197
+
198
+
199
+ NODE_CLASS_MAPPINGS = {
200
+ **AUX_NODE_MAPPINGS,
201
+ "AIO_Preprocessor": AIO_Preprocessor,
202
+ "ControlNetPreprocessorSelector": ControlNetPreprocessorSelector,
203
+ **HIE_NODE_CLASS_MAPPINGS,
204
+ "ExecuteAllControlNetPreprocessors": ExecuteAllControlNetPreprocessors,
205
+ "ControlNetAuxSimpleAddText": ControlNetAuxSimpleAddText
206
+ }
207
+
208
+ NODE_DISPLAY_NAME_MAPPINGS = {
209
+ **AUX_DISPLAY_NAME_MAPPINGS,
210
+ "AIO_Preprocessor": "AIO Aux Preprocessor",
211
+ "ControlNetPreprocessorSelector": "Preprocessor Selector",
212
+ **HIE_NODE_DISPLAY_NAME_MAPPINGS,
213
+ "ExecuteAllControlNetPreprocessors": "Execute All ControlNet Preprocessors"
214
+ }
dev_interface.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from utils import here
3
+ import sys
4
+ sys.path.append(str(Path(here, "src")))
5
+
6
+ from custom_controlnet_aux import *
hint_image_enchance.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .log import log
2
+ from .utils import ResizeMode, safe_numpy
3
+ import numpy as np
4
+ import torch
5
+ import cv2
6
+ from .utils import get_unique_axis0
7
+ from .lvminthin import nake_nms, lvmin_thin
8
+
9
+ MAX_IMAGEGEN_RESOLUTION = 8192 #https://github.com/comfyanonymous/ComfyUI/blob/c910b4a01ca58b04e5d4ab4c747680b996ada02b/nodes.py#L42
10
+ RESIZE_MODES = [ResizeMode.RESIZE.value, ResizeMode.INNER_FIT.value, ResizeMode.OUTER_FIT.value]
11
+
12
+ #Port from https://github.com/Mikubill/sd-webui-controlnet/blob/e67e017731aad05796b9615dc6eadce911298ea1/internal_controlnet/external_code.py#L89
13
+ class PixelPerfectResolution:
14
+ @classmethod
15
+ def INPUT_TYPES(s):
16
+ return {
17
+ "required": {
18
+ "original_image": ("IMAGE", ),
19
+ "image_gen_width": ("INT", {"default": 512, "min": 64, "max": MAX_IMAGEGEN_RESOLUTION, "step": 8}),
20
+ "image_gen_height": ("INT", {"default": 512, "min": 64, "max": MAX_IMAGEGEN_RESOLUTION, "step": 8}),
21
+ #https://github.com/comfyanonymous/ComfyUI/blob/c910b4a01ca58b04e5d4ab4c747680b996ada02b/nodes.py#L854
22
+ "resize_mode": (RESIZE_MODES, {"default": ResizeMode.RESIZE.value})
23
+ }
24
+ }
25
+
26
+ RETURN_TYPES = ("INT",)
27
+ RETURN_NAMES = ("RESOLUTION (INT)", )
28
+ FUNCTION = "execute"
29
+
30
+ CATEGORY = "ControlNet Preprocessors"
31
+
32
+ def execute(self, original_image, image_gen_width, image_gen_height, resize_mode):
33
+ _, raw_H, raw_W, _ = original_image.shape
34
+
35
+ k0 = float(image_gen_height) / float(raw_H)
36
+ k1 = float(image_gen_width) / float(raw_W)
37
+
38
+ if resize_mode == ResizeMode.OUTER_FIT.value:
39
+ estimation = min(k0, k1) * float(min(raw_H, raw_W))
40
+ else:
41
+ estimation = max(k0, k1) * float(min(raw_H, raw_W))
42
+
43
+ log.debug(f"Pixel Perfect Computation:")
44
+ log.debug(f"resize_mode = {resize_mode}")
45
+ log.debug(f"raw_H = {raw_H}")
46
+ log.debug(f"raw_W = {raw_W}")
47
+ log.debug(f"target_H = {image_gen_height}")
48
+ log.debug(f"target_W = {image_gen_width}")
49
+ log.debug(f"estimation = {estimation}")
50
+
51
+ return (int(np.round(estimation)), )
52
+
53
+ class HintImageEnchance:
54
+ @classmethod
55
+ def INPUT_TYPES(s):
56
+ return {
57
+ "required": {
58
+ "hint_image": ("IMAGE", ),
59
+ "image_gen_width": ("INT", {"default": 512, "min": 64, "max": MAX_IMAGEGEN_RESOLUTION, "step": 8}),
60
+ "image_gen_height": ("INT", {"default": 512, "min": 64, "max": MAX_IMAGEGEN_RESOLUTION, "step": 8}),
61
+ #https://github.com/comfyanonymous/ComfyUI/blob/c910b4a01ca58b04e5d4ab4c747680b996ada02b/nodes.py#L854
62
+ "resize_mode": (RESIZE_MODES, {"default": ResizeMode.RESIZE.value})
63
+ }
64
+ }
65
+
66
+ RETURN_TYPES = ("IMAGE",)
67
+ FUNCTION = "execute"
68
+
69
+ CATEGORY = "ControlNet Preprocessors"
70
+ def execute(self, hint_image, image_gen_width, image_gen_height, resize_mode):
71
+ outs = []
72
+ for single_hint_image in hint_image:
73
+ np_hint_image = np.asarray(single_hint_image * 255., dtype=np.uint8)
74
+
75
+ if resize_mode == ResizeMode.RESIZE.value:
76
+ np_hint_image = self.execute_resize(np_hint_image, image_gen_width, image_gen_height)
77
+ elif resize_mode == ResizeMode.OUTER_FIT.value:
78
+ np_hint_image = self.execute_outer_fit(np_hint_image, image_gen_width, image_gen_height)
79
+ else:
80
+ np_hint_image = self.execute_inner_fit(np_hint_image, image_gen_width, image_gen_height)
81
+
82
+ outs.append(torch.from_numpy(np_hint_image.astype(np.float32) / 255.0))
83
+
84
+ return (torch.stack(outs, dim=0),)
85
+
86
+ def execute_resize(self, detected_map, w, h):
87
+ detected_map = self.high_quality_resize(detected_map, (w, h))
88
+ detected_map = safe_numpy(detected_map)
89
+ return detected_map
90
+
91
+ def execute_outer_fit(self, detected_map, w, h):
92
+ old_h, old_w, _ = detected_map.shape
93
+ old_w = float(old_w)
94
+ old_h = float(old_h)
95
+ k0 = float(h) / old_h
96
+ k1 = float(w) / old_w
97
+ safeint = lambda x: int(np.round(x))
98
+ k = min(k0, k1)
99
+
100
+ borders = np.concatenate([detected_map[0, :, :], detected_map[-1, :, :], detected_map[:, 0, :], detected_map[:, -1, :]], axis=0)
101
+ high_quality_border_color = np.median(borders, axis=0).astype(detected_map.dtype)
102
+ if len(high_quality_border_color) == 4:
103
+ # Inpaint hijack
104
+ high_quality_border_color[3] = 255
105
+ high_quality_background = np.tile(high_quality_border_color[None, None], [h, w, 1])
106
+ detected_map = self.high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k)))
107
+ new_h, new_w, _ = detected_map.shape
108
+ pad_h = max(0, (h - new_h) // 2)
109
+ pad_w = max(0, (w - new_w) // 2)
110
+ high_quality_background[pad_h:pad_h + new_h, pad_w:pad_w + new_w] = detected_map
111
+ detected_map = high_quality_background
112
+ detected_map = safe_numpy(detected_map)
113
+ return detected_map
114
+
115
+ def execute_inner_fit(self, detected_map, w, h):
116
+ old_h, old_w, _ = detected_map.shape
117
+ old_w = float(old_w)
118
+ old_h = float(old_h)
119
+ k0 = float(h) / old_h
120
+ k1 = float(w) / old_w
121
+ safeint = lambda x: int(np.round(x))
122
+ k = max(k0, k1)
123
+
124
+ detected_map = self.high_quality_resize(detected_map, (safeint(old_w * k), safeint(old_h * k)))
125
+ new_h, new_w, _ = detected_map.shape
126
+ pad_h = max(0, (new_h - h) // 2)
127
+ pad_w = max(0, (new_w - w) // 2)
128
+ detected_map = detected_map[pad_h:pad_h+h, pad_w:pad_w+w]
129
+ detected_map = safe_numpy(detected_map)
130
+ return detected_map
131
+
132
+ def high_quality_resize(self, x, size):
133
+ # Written by lvmin
134
+ # Super high-quality control map up-scaling, considering binary, seg, and one-pixel edges
135
+
136
+ inpaint_mask = None
137
+ if x.ndim == 3 and x.shape[2] == 4:
138
+ inpaint_mask = x[:, :, 3]
139
+ x = x[:, :, 0:3]
140
+
141
+ if x.shape[0] != size[1] or x.shape[1] != size[0]:
142
+ new_size_is_smaller = (size[0] * size[1]) < (x.shape[0] * x.shape[1])
143
+ new_size_is_bigger = (size[0] * size[1]) > (x.shape[0] * x.shape[1])
144
+ unique_color_count = len(get_unique_axis0(x.reshape(-1, x.shape[2])))
145
+ is_one_pixel_edge = False
146
+ is_binary = False
147
+ if unique_color_count == 2:
148
+ is_binary = np.min(x) < 16 and np.max(x) > 240
149
+ if is_binary:
150
+ xc = x
151
+ xc = cv2.erode(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
152
+ xc = cv2.dilate(xc, np.ones(shape=(3, 3), dtype=np.uint8), iterations=1)
153
+ one_pixel_edge_count = np.where(xc < x)[0].shape[0]
154
+ all_edge_count = np.where(x > 127)[0].shape[0]
155
+ is_one_pixel_edge = one_pixel_edge_count * 2 > all_edge_count
156
+
157
+ if 2 < unique_color_count < 200:
158
+ interpolation = cv2.INTER_NEAREST
159
+ elif new_size_is_smaller:
160
+ interpolation = cv2.INTER_AREA
161
+ else:
162
+ interpolation = cv2.INTER_CUBIC # Must be CUBIC because we now use nms. NEVER CHANGE THIS
163
+
164
+ y = cv2.resize(x, size, interpolation=interpolation)
165
+ if inpaint_mask is not None:
166
+ inpaint_mask = cv2.resize(inpaint_mask, size, interpolation=interpolation)
167
+
168
+ if is_binary:
169
+ y = np.mean(y.astype(np.float32), axis=2).clip(0, 255).astype(np.uint8)
170
+ if is_one_pixel_edge:
171
+ y = nake_nms(y)
172
+ _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
173
+ y = lvmin_thin(y, prunings=new_size_is_bigger)
174
+ else:
175
+ _, y = cv2.threshold(y, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
176
+ y = np.stack([y] * 3, axis=2)
177
+ else:
178
+ y = x
179
+
180
+ if inpaint_mask is not None:
181
+ inpaint_mask = (inpaint_mask > 127).astype(np.float32) * 255.0
182
+ inpaint_mask = inpaint_mask[:, :, None].clip(0, 255).astype(np.uint8)
183
+ y = np.concatenate([y, inpaint_mask], axis=2)
184
+
185
+ return y
186
+
187
+
188
+ class ImageGenResolutionFromLatent:
189
+ @classmethod
190
+ def INPUT_TYPES(s):
191
+ return {
192
+ "required": { "latent": ("LATENT", ) }
193
+ }
194
+
195
+ RETURN_TYPES = ("INT", "INT")
196
+ RETURN_NAMES = ("IMAGE_GEN_WIDTH (INT)", "IMAGE_GEN_HEIGHT (INT)")
197
+ FUNCTION = "execute"
198
+
199
+ CATEGORY = "ControlNet Preprocessors"
200
+
201
+ def execute(self, latent):
202
+ _, _, H, W = latent["samples"].shape
203
+ return (W * 8, H * 8)
204
+
205
+ class ImageGenResolutionFromImage:
206
+ @classmethod
207
+ def INPUT_TYPES(s):
208
+ return {
209
+ "required": { "image": ("IMAGE", ) }
210
+ }
211
+
212
+ RETURN_TYPES = ("INT", "INT")
213
+ RETURN_NAMES = ("IMAGE_GEN_WIDTH (INT)", "IMAGE_GEN_HEIGHT (INT)")
214
+ FUNCTION = "execute"
215
+
216
+ CATEGORY = "ControlNet Preprocessors"
217
+
218
+ def execute(self, image):
219
+ _, H, W, _ = image.shape
220
+ return (W, H)
221
+
222
+ NODE_CLASS_MAPPINGS = {
223
+ "PixelPerfectResolution": PixelPerfectResolution,
224
+ "ImageGenResolutionFromImage": ImageGenResolutionFromImage,
225
+ "ImageGenResolutionFromLatent": ImageGenResolutionFromLatent,
226
+ "HintImageEnchance": HintImageEnchance
227
+ }
228
+ NODE_DISPLAY_NAME_MAPPINGS = {
229
+ "PixelPerfectResolution": "Pixel Perfect Resolution",
230
+ "ImageGenResolutionFromImage": "Generation Resolution From Image",
231
+ "ImageGenResolutionFromLatent": "Generation Resolution From Latent",
232
+ "HintImageEnchance": "Enchance And Resize Hint Images"
233
+ }
install.bat ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ set "requirements_txt=%~dp0\requirements.txt"
4
+ set "python_exec=..\..\..\python_embedded\python.exe"
5
+
6
+ echo Installing ComfyUI's ControlNet Auxiliary Preprocessors..
7
+
8
+ if exist "%python_exec%" (
9
+ echo Installing with ComfyUI Portable
10
+ for /f "delims=" %%i in (%requirements_txt%) do (
11
+ %python_exec% -s -m pip install "%%i"
12
+ )
13
+ ) else (
14
+ echo Installing with system Python
15
+ for /f "delims=" %%i in (%requirements_txt%) do (
16
+ pip install "%%i"
17
+ )
18
+ )
19
+
20
+ pause
log.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Cre: https://github.com/melMass/comfy_mtb/blob/main/log.py
2
+ import logging
3
+ import re
4
+ import os
5
+
6
+ base_log_level = logging.INFO
7
+
8
+
9
+ # Custom object that discards the output
10
+ class NullWriter:
11
+ def write(self, text):
12
+ pass
13
+
14
+
15
+ class Formatter(logging.Formatter):
16
+ grey = "\x1b[38;20m"
17
+ cyan = "\x1b[36;20m"
18
+ purple = "\x1b[35;20m"
19
+ yellow = "\x1b[33;20m"
20
+ red = "\x1b[31;20m"
21
+ bold_red = "\x1b[31;1m"
22
+ reset = "\x1b[0m"
23
+ # format = "%(asctime)s - [%(name)s] - %(levelname)s - %(message)s (%(filename)s:%(lineno)d)"
24
+ format = "[%(name)s] | %(levelname)s -> %(message)s"
25
+
26
+ FORMATS = {
27
+ logging.DEBUG: purple + format + reset,
28
+ logging.INFO: cyan + format + reset,
29
+ logging.WARNING: yellow + format + reset,
30
+ logging.ERROR: red + format + reset,
31
+ logging.CRITICAL: bold_red + format + reset,
32
+ }
33
+
34
+ def format(self, record):
35
+ log_fmt = self.FORMATS.get(record.levelno)
36
+ formatter = logging.Formatter(log_fmt)
37
+ return formatter.format(record)
38
+
39
+
40
+ def mklog(name, level=base_log_level):
41
+ logger = logging.getLogger(name)
42
+ logger.setLevel(level)
43
+
44
+ for handler in logger.handlers:
45
+ logger.removeHandler(handler)
46
+
47
+ ch = logging.StreamHandler()
48
+ ch.setLevel(level)
49
+ ch.setFormatter(Formatter())
50
+ logger.addHandler(ch)
51
+
52
+ # Disable log propagation
53
+ logger.propagate = False
54
+
55
+ return logger
56
+
57
+
58
+ # - The main app logger
59
+ log = mklog(__package__, base_log_level)
60
+
61
+
62
+ def log_user(arg):
63
+ print("\033[34mComfyUI ControlNet AUX:\033[0m {arg}")
64
+
65
+
66
+ def get_summary(docstring):
67
+ return docstring.strip().split("\n\n", 1)[0]
68
+
69
+
70
+ def blue_text(text):
71
+ return f"\033[94m{text}\033[0m"
72
+
73
+
74
+ def cyan_text(text):
75
+ return f"\033[96m{text}\033[0m"
76
+
77
+
78
+ def get_label(label):
79
+ words = re.findall(r"(?:^|[A-Z])[a-z]*", label)
80
+ return " ".join(words).strip()
lvminthin.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # High Quality Edge Thinning using Pure Python
2
+ # Written by Lvmin Zhang
3
+ # 2023 April
4
+ # Stanford University
5
+ # If you use this, please Cite "High Quality Edge Thinning using Pure Python", Lvmin Zhang, In Mikubill/sd-webui-controlnet.
6
+
7
+
8
+ import cv2
9
+ import numpy as np
10
+
11
+
12
+ lvmin_kernels_raw = [
13
+ np.array([
14
+ [-1, -1, -1],
15
+ [0, 1, 0],
16
+ [1, 1, 1]
17
+ ], dtype=np.int32),
18
+ np.array([
19
+ [0, -1, -1],
20
+ [1, 1, -1],
21
+ [0, 1, 0]
22
+ ], dtype=np.int32)
23
+ ]
24
+
25
+ lvmin_kernels = []
26
+ lvmin_kernels += [np.rot90(x, k=0, axes=(0, 1)) for x in lvmin_kernels_raw]
27
+ lvmin_kernels += [np.rot90(x, k=1, axes=(0, 1)) for x in lvmin_kernels_raw]
28
+ lvmin_kernels += [np.rot90(x, k=2, axes=(0, 1)) for x in lvmin_kernels_raw]
29
+ lvmin_kernels += [np.rot90(x, k=3, axes=(0, 1)) for x in lvmin_kernels_raw]
30
+
31
+ lvmin_prunings_raw = [
32
+ np.array([
33
+ [-1, -1, -1],
34
+ [-1, 1, -1],
35
+ [0, 0, -1]
36
+ ], dtype=np.int32),
37
+ np.array([
38
+ [-1, -1, -1],
39
+ [-1, 1, -1],
40
+ [-1, 0, 0]
41
+ ], dtype=np.int32)
42
+ ]
43
+
44
+ lvmin_prunings = []
45
+ lvmin_prunings += [np.rot90(x, k=0, axes=(0, 1)) for x in lvmin_prunings_raw]
46
+ lvmin_prunings += [np.rot90(x, k=1, axes=(0, 1)) for x in lvmin_prunings_raw]
47
+ lvmin_prunings += [np.rot90(x, k=2, axes=(0, 1)) for x in lvmin_prunings_raw]
48
+ lvmin_prunings += [np.rot90(x, k=3, axes=(0, 1)) for x in lvmin_prunings_raw]
49
+
50
+
51
+ def remove_pattern(x, kernel):
52
+ objects = cv2.morphologyEx(x, cv2.MORPH_HITMISS, kernel)
53
+ objects = np.where(objects > 127)
54
+ x[objects] = 0
55
+ return x, objects[0].shape[0] > 0
56
+
57
+
58
+ def thin_one_time(x, kernels):
59
+ y = x
60
+ is_done = True
61
+ for k in kernels:
62
+ y, has_update = remove_pattern(y, k)
63
+ if has_update:
64
+ is_done = False
65
+ return y, is_done
66
+
67
+
68
+ def lvmin_thin(x, prunings=True):
69
+ y = x
70
+ for i in range(32):
71
+ y, is_done = thin_one_time(y, lvmin_kernels)
72
+ if is_done:
73
+ break
74
+ if prunings:
75
+ y, _ = thin_one_time(y, lvmin_prunings)
76
+ return y
77
+
78
+
79
+ def nake_nms(x):
80
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
81
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
82
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
83
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
84
+ y = np.zeros_like(x)
85
+ for f in [f1, f2, f3, f4]:
86
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
87
+ return y
node_wrappers/anime_face_segment.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+ import torch
4
+ from einops import rearrange
5
+
6
+ class AnimeFace_SemSegPreprocessor:
7
+ @classmethod
8
+ def INPUT_TYPES(s):
9
+ #This preprocessor is only trained on 512x resolution
10
+ #https://github.com/siyeong0/Anime-Face-Segmentation/blob/main/predict.py#L25
11
+ return define_preprocessor_inputs(
12
+ remove_background_using_abg=INPUT.BOOLEAN(True),
13
+ resolution=INPUT.RESOLUTION(default=512, min=512, max=512)
14
+ )
15
+
16
+ RETURN_TYPES = ("IMAGE", "MASK")
17
+ RETURN_NAMES = ("IMAGE", "ABG_CHARACTER_MASK (MASK)")
18
+ FUNCTION = "execute"
19
+
20
+ CATEGORY = "ControlNet Preprocessors/Semantic Segmentation"
21
+
22
+ def execute(self, image, remove_background_using_abg=True, resolution=512, **kwargs):
23
+ from custom_controlnet_aux.anime_face_segment import AnimeFaceSegmentor
24
+
25
+ model = AnimeFaceSegmentor.from_pretrained().to(model_management.get_torch_device())
26
+ if remove_background_using_abg:
27
+ out_image_with_mask = common_annotator_call(model, image, resolution=resolution, remove_background=True)
28
+ out_image = out_image_with_mask[..., :3]
29
+ mask = out_image_with_mask[..., 3:]
30
+ mask = rearrange(mask, "n h w c -> n c h w")
31
+ else:
32
+ out_image = common_annotator_call(model, image, resolution=resolution, remove_background=False)
33
+ N, H, W, C = out_image.shape
34
+ mask = torch.ones(N, C, H, W)
35
+ del model
36
+ return (out_image, mask)
37
+
38
+ NODE_CLASS_MAPPINGS = {
39
+ "AnimeFace_SemSegPreprocessor": AnimeFace_SemSegPreprocessor
40
+ }
41
+ NODE_DISPLAY_NAME_MAPPINGS = {
42
+ "AnimeFace_SemSegPreprocessor": "Anime Face Segmentor"
43
+ }
node_wrappers/anyline.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import comfy.model_management as model_management
4
+ import comfy.utils
5
+
6
+ # Requires comfyui_controlnet_aux funcsions and classes
7
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
8
+
9
+ def get_intensity_mask(image_array, lower_bound, upper_bound):
10
+ mask = image_array[:, :, 0]
11
+ mask = np.where((mask >= lower_bound) & (mask <= upper_bound), mask, 0)
12
+ mask = np.expand_dims(mask, 2).repeat(3, axis=2)
13
+ return mask
14
+
15
+ def combine_layers(base_layer, top_layer):
16
+ mask = top_layer.astype(bool)
17
+ temp = 1 - (1 - top_layer) * (1 - base_layer)
18
+ result = base_layer * (~mask) + temp * mask
19
+ return result
20
+
21
+ class AnyLinePreprocessor:
22
+ @classmethod
23
+ def INPUT_TYPES(s):
24
+ return define_preprocessor_inputs(
25
+ merge_with_lineart=INPUT.COMBO(["lineart_standard", "lineart_realisitic", "lineart_anime", "manga_line"], default="lineart_standard"),
26
+ resolution=INPUT.RESOLUTION(default=1280, step=8),
27
+ lineart_lower_bound=INPUT.FLOAT(default=0),
28
+ lineart_upper_bound=INPUT.FLOAT(default=1),
29
+ object_min_size=INPUT.INT(default=36, min=1),
30
+ object_connectivity=INPUT.INT(default=1, min=1)
31
+ )
32
+
33
+ RETURN_TYPES = ("IMAGE",)
34
+ RETURN_NAMES = ("image",)
35
+
36
+ FUNCTION = "get_anyline"
37
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
38
+
39
+ def __init__(self):
40
+ self.device = model_management.get_torch_device()
41
+
42
+ def get_anyline(self, image, merge_with_lineart="lineart_standard", resolution=512, lineart_lower_bound=0, lineart_upper_bound=1, object_min_size=36, object_connectivity=1):
43
+ from custom_controlnet_aux.teed import TEDDetector
44
+ from skimage import morphology
45
+ pbar = comfy.utils.ProgressBar(3)
46
+
47
+ # Process the image with MTEED model
48
+ mteed_model = TEDDetector.from_pretrained("TheMistoAI/MistoLine", "MTEED.pth", subfolder="Anyline").to(self.device)
49
+ mteed_result = common_annotator_call(mteed_model, image, resolution=resolution, show_pbar=False)
50
+ mteed_result = mteed_result.numpy()
51
+ del mteed_model
52
+ pbar.update(1)
53
+
54
+ # Process the image with the lineart standard preprocessor
55
+ if merge_with_lineart == "lineart_standard":
56
+ from custom_controlnet_aux.lineart_standard import LineartStandardDetector
57
+ lineart_standard_detector = LineartStandardDetector()
58
+ lineart_result = common_annotator_call(lineart_standard_detector, image, guassian_sigma=2, intensity_threshold=3, resolution=resolution, show_pbar=False).numpy()
59
+ del lineart_standard_detector
60
+ else:
61
+ from custom_controlnet_aux.lineart import LineartDetector
62
+ from custom_controlnet_aux.lineart_anime import LineartAnimeDetector
63
+ from custom_controlnet_aux.manga_line import LineartMangaDetector
64
+ lineart_detector = dict(lineart_realisitic=LineartDetector, lineart_anime=LineartAnimeDetector, manga_line=LineartMangaDetector)[merge_with_lineart]
65
+ lineart_detector = lineart_detector.from_pretrained().to(self.device)
66
+ lineart_result = common_annotator_call(lineart_detector, image, resolution=resolution, show_pbar=False).numpy()
67
+ del lineart_detector
68
+ pbar.update(1)
69
+
70
+ final_result = []
71
+ for i in range(len(image)):
72
+ _lineart_result = get_intensity_mask(lineart_result[i], lower_bound=lineart_lower_bound, upper_bound=lineart_upper_bound)
73
+ _cleaned = morphology.remove_small_objects(_lineart_result.astype(bool), min_size=object_min_size, connectivity=object_connectivity)
74
+ _lineart_result = _lineart_result * _cleaned
75
+ _mteed_result = mteed_result[i]
76
+
77
+ # Combine the results
78
+ final_result.append(torch.from_numpy(combine_layers(_mteed_result, _lineart_result)))
79
+ pbar.update(1)
80
+ return (torch.stack(final_result),)
81
+
82
+ NODE_CLASS_MAPPINGS = {
83
+ "AnyLineArtPreprocessor_aux": AnyLinePreprocessor
84
+ }
85
+ NODE_DISPLAY_NAME_MAPPINGS = {
86
+ "AnyLineArtPreprocessor_aux": "AnyLine Lineart"
87
+ }
node_wrappers/binary.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
2
+ import comfy.model_management as model_management
3
+
4
+ class Binary_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ bin_threshold=INPUT.INT(default=100, max=255),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
16
+
17
+ def execute(self, image, bin_threshold=100, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.binary import BinaryDetector
19
+
20
+ return (common_annotator_call(BinaryDetector(), image, bin_threshold=bin_threshold, resolution=resolution), )
21
+
22
+
23
+
24
+ NODE_CLASS_MAPPINGS = {
25
+ "BinaryPreprocessor": Binary_Preprocessor
26
+ }
27
+ NODE_DISPLAY_NAME_MAPPINGS = {
28
+ "BinaryPreprocessor": "Binary Lines"
29
+ }
node_wrappers/canny.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
2
+ import comfy.model_management as model_management
3
+
4
+ class Canny_Edge_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ low_threshold=INPUT.INT(default=100, max=255),
9
+ high_threshold=INPUT.INT(default=200, max=255),
10
+ resolution=INPUT.RESOLUTION()
11
+ )
12
+
13
+ RETURN_TYPES = ("IMAGE",)
14
+ FUNCTION = "execute"
15
+
16
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
17
+
18
+ def execute(self, image, low_threshold=100, high_threshold=200, resolution=512, **kwargs):
19
+ from custom_controlnet_aux.canny import CannyDetector
20
+
21
+ return (common_annotator_call(CannyDetector(), image, low_threshold=low_threshold, high_threshold=high_threshold, resolution=resolution), )
22
+
23
+
24
+
25
+ NODE_CLASS_MAPPINGS = {
26
+ "CannyEdgePreprocessor": Canny_Edge_Preprocessor
27
+ }
28
+ NODE_DISPLAY_NAME_MAPPINGS = {
29
+ "CannyEdgePreprocessor": "Canny Edge"
30
+ }
node_wrappers/color.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
2
+ import comfy.model_management as model_management
3
+
4
+ class Color_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/T2IAdapter-only"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.color import ColorDetector
16
+
17
+ return (common_annotator_call(ColorDetector(), image, resolution=resolution), )
18
+
19
+
20
+
21
+ NODE_CLASS_MAPPINGS = {
22
+ "ColorPreprocessor": Color_Preprocessor
23
+ }
24
+ NODE_DISPLAY_NAME_MAPPINGS = {
25
+ "ColorPreprocessor": "Color Pallete"
26
+ }
node_wrappers/densepose.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
2
+ import comfy.model_management as model_management
3
+
4
+ class DensePose_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ model=INPUT.COMBO(["densepose_r50_fpn_dl.torchscript", "densepose_r101_fpn_dl.torchscript"]),
9
+ cmap=INPUT.COMBO(["Viridis (MagicAnimate)", "Parula (CivitAI)"]),
10
+ resolution=INPUT.RESOLUTION()
11
+ )
12
+
13
+ RETURN_TYPES = ("IMAGE",)
14
+ FUNCTION = "execute"
15
+
16
+ CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
17
+
18
+ def execute(self, image, model="densepose_r50_fpn_dl.torchscript", cmap="Viridis (MagicAnimate)", resolution=512):
19
+ from custom_controlnet_aux.densepose import DenseposeDetector
20
+ model = DenseposeDetector \
21
+ .from_pretrained(filename=model) \
22
+ .to(model_management.get_torch_device())
23
+ return (common_annotator_call(model, image, cmap="viridis" if "Viridis" in cmap else "parula", resolution=resolution), )
24
+
25
+
26
+ NODE_CLASS_MAPPINGS = {
27
+ "DensePosePreprocessor": DensePose_Preprocessor
28
+ }
29
+ NODE_DISPLAY_NAME_MAPPINGS = {
30
+ "DensePosePreprocessor": "DensePose Estimator"
31
+ }
node_wrappers/depth_anything.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class Depth_Anything_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ ckpt_name=INPUT.COMBO(
9
+ ["depth_anything_vitl14.pth", "depth_anything_vitb14.pth", "depth_anything_vits14.pth"]
10
+ ),
11
+ resolution=INPUT.RESOLUTION()
12
+ )
13
+
14
+ RETURN_TYPES = ("IMAGE",)
15
+ FUNCTION = "execute"
16
+
17
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
18
+
19
+ def execute(self, image, ckpt_name="depth_anything_vitl14.pth", resolution=512, **kwargs):
20
+ from custom_controlnet_aux.depth_anything import DepthAnythingDetector
21
+
22
+ model = DepthAnythingDetector.from_pretrained(filename=ckpt_name).to(model_management.get_torch_device())
23
+ out = common_annotator_call(model, image, resolution=resolution)
24
+ del model
25
+ return (out, )
26
+
27
+ class Zoe_Depth_Anything_Preprocessor:
28
+ @classmethod
29
+ def INPUT_TYPES(s):
30
+ return define_preprocessor_inputs(
31
+ environment=INPUT.COMBO(["indoor", "outdoor"]),
32
+ resolution=INPUT.RESOLUTION()
33
+ )
34
+
35
+ RETURN_TYPES = ("IMAGE",)
36
+ FUNCTION = "execute"
37
+
38
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
39
+
40
+ def execute(self, image, environment="indoor", resolution=512, **kwargs):
41
+ from custom_controlnet_aux.zoe import ZoeDepthAnythingDetector
42
+ ckpt_name = "depth_anything_metric_depth_indoor.pt" if environment == "indoor" else "depth_anything_metric_depth_outdoor.pt"
43
+ model = ZoeDepthAnythingDetector.from_pretrained(filename=ckpt_name).to(model_management.get_torch_device())
44
+ out = common_annotator_call(model, image, resolution=resolution)
45
+ del model
46
+ return (out, )
47
+
48
+ NODE_CLASS_MAPPINGS = {
49
+ "DepthAnythingPreprocessor": Depth_Anything_Preprocessor,
50
+ "Zoe_DepthAnythingPreprocessor": Zoe_Depth_Anything_Preprocessor
51
+ }
52
+ NODE_DISPLAY_NAME_MAPPINGS = {
53
+ "DepthAnythingPreprocessor": "Depth Anything",
54
+ "Zoe_DepthAnythingPreprocessor": "Zoe Depth Anything"
55
+ }
node_wrappers/depth_anything_v2.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, INPUT, define_preprocessor_inputs
2
+ import comfy.model_management as model_management
3
+
4
+ class Depth_Anything_V2_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ ckpt_name=INPUT.COMBO(
9
+ ["depth_anything_v2_vitg.pth", "depth_anything_v2_vitl.pth", "depth_anything_v2_vitb.pth", "depth_anything_v2_vits.pth"],
10
+ default="depth_anything_v2_vitl.pth"
11
+ ),
12
+ resolution=INPUT.RESOLUTION()
13
+ )
14
+
15
+ RETURN_TYPES = ("IMAGE",)
16
+ FUNCTION = "execute"
17
+
18
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
19
+
20
+ def execute(self, image, ckpt_name="depth_anything_v2_vitl.pth", resolution=512, **kwargs):
21
+ from custom_controlnet_aux.depth_anything_v2 import DepthAnythingV2Detector
22
+
23
+ model = DepthAnythingV2Detector.from_pretrained(filename=ckpt_name).to(model_management.get_torch_device())
24
+ out = common_annotator_call(model, image, resolution=resolution, max_depth=1)
25
+ del model
26
+ return (out, )
27
+
28
+ """ class Depth_Anything_Metric_V2_Preprocessor:
29
+ @classmethod
30
+ def INPUT_TYPES(s):
31
+ return create_node_input_types(
32
+ environment=(["indoor", "outdoor"], {"default": "indoor"}),
33
+ max_depth=("FLOAT", {"min": 0, "max": 100, "default": 20.0, "step": 0.01})
34
+ )
35
+
36
+ RETURN_TYPES = ("IMAGE",)
37
+ FUNCTION = "execute"
38
+
39
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
40
+
41
+ def execute(self, image, environment, resolution=512, max_depth=20.0, **kwargs):
42
+ from custom_controlnet_aux.depth_anything_v2 import DepthAnythingV2Detector
43
+ filename = dict(indoor="depth_anything_v2_metric_hypersim_vitl.pth", outdoor="depth_anything_v2_metric_vkitti_vitl.pth")[environment]
44
+ model = DepthAnythingV2Detector.from_pretrained(filename=filename).to(model_management.get_torch_device())
45
+ out = common_annotator_call(model, image, resolution=resolution, max_depth=max_depth)
46
+ del model
47
+ return (out, ) """
48
+
49
+ NODE_CLASS_MAPPINGS = {
50
+ "DepthAnythingV2Preprocessor": Depth_Anything_V2_Preprocessor,
51
+ #"Metric_DepthAnythingV2Preprocessor": Depth_Anything_Metric_V2_Preprocessor
52
+ }
53
+ NODE_DISPLAY_NAME_MAPPINGS = {
54
+ "DepthAnythingV2Preprocessor": "Depth Anything V2 - Relative",
55
+ #"Metric_DepthAnythingV2Preprocessor": "Depth Anything V2 - Metric"
56
+ }
node_wrappers/diffusion_edge.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, run_script
2
+ import comfy.model_management as model_management
3
+ import sys
4
+
5
+ def install_deps():
6
+ try:
7
+ import sklearn
8
+ except:
9
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', 'scikit-learn'])
10
+
11
+ class DiffusionEdge_Preprocessor:
12
+ @classmethod
13
+ def INPUT_TYPES(s):
14
+ return define_preprocessor_inputs(
15
+ environment=INPUT.COMBO(["indoor", "urban", "natrual"]),
16
+ patch_batch_size=INPUT.INT(default=4, min=1, max=16),
17
+ resolution=INPUT.RESOLUTION()
18
+ )
19
+
20
+ RETURN_TYPES = ("IMAGE",)
21
+ FUNCTION = "execute"
22
+
23
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
24
+
25
+ def execute(self, image, environment="indoor", patch_batch_size=4, resolution=512, **kwargs):
26
+ install_deps()
27
+ from custom_controlnet_aux.diffusion_edge import DiffusionEdgeDetector
28
+
29
+ model = DiffusionEdgeDetector \
30
+ .from_pretrained(filename = f"diffusion_edge_{environment}.pt") \
31
+ .to(model_management.get_torch_device())
32
+ out = common_annotator_call(model, image, resolution=resolution, patch_batch_size=patch_batch_size)
33
+ del model
34
+ return (out, )
35
+
36
+ NODE_CLASS_MAPPINGS = {
37
+ "DiffusionEdge_Preprocessor": DiffusionEdge_Preprocessor,
38
+ }
39
+ NODE_DISPLAY_NAME_MAPPINGS = {
40
+ "DiffusionEdge_Preprocessor": "Diffusion Edge (batch size ↑ => speed ↑, VRAM ↑)",
41
+ }
node_wrappers/dsine.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class DSINE_Normal_Map_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ fov=INPUT.FLOAT(max=365.0, default=60.0),
9
+ iterations=INPUT.INT(min=1, max=20, default=5),
10
+ resolution=INPUT.RESOLUTION()
11
+ )
12
+
13
+ RETURN_TYPES = ("IMAGE",)
14
+ FUNCTION = "execute"
15
+
16
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
17
+
18
+ def execute(self, image, fov=60.0, iterations=5, resolution=512, **kwargs):
19
+ from custom_controlnet_aux.dsine import DsineDetector
20
+
21
+ model = DsineDetector.from_pretrained().to(model_management.get_torch_device())
22
+ out = common_annotator_call(model, image, fov=fov, iterations=iterations, resolution=resolution)
23
+ del model
24
+ return (out,)
25
+
26
+ NODE_CLASS_MAPPINGS = {
27
+ "DSINE-NormalMapPreprocessor": DSINE_Normal_Map_Preprocessor
28
+ }
29
+ NODE_DISPLAY_NAME_MAPPINGS = {
30
+ "DSINE-NormalMapPreprocessor": "DSINE Normal Map"
31
+ }
node_wrappers/dwpose.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+ import numpy as np
4
+ import warnings
5
+ from custom_controlnet_aux.dwpose import DwposeDetector, AnimalposeDetector
6
+ import os
7
+ import json
8
+
9
+ DWPOSE_MODEL_NAME = "yzd-v/DWPose"
10
+ #Trigger startup caching for onnxruntime
11
+ GPU_PROVIDERS = ["CUDAExecutionProvider", "DirectMLExecutionProvider", "OpenVINOExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"]
12
+ def check_ort_gpu():
13
+ try:
14
+ import onnxruntime as ort
15
+ for provider in GPU_PROVIDERS:
16
+ if provider in ort.get_available_providers():
17
+ return True
18
+ return False
19
+ except:
20
+ return False
21
+
22
+ if not os.environ.get("DWPOSE_ONNXRT_CHECKED"):
23
+ if check_ort_gpu():
24
+ print("DWPose: Onnxruntime with acceleration providers detected")
25
+ else:
26
+ warnings.warn("DWPose: Onnxruntime not found or doesn't come with acceleration providers, switch to OpenCV with CPU device. DWPose might run very slowly")
27
+ os.environ['AUX_ORT_PROVIDERS'] = ''
28
+ os.environ["DWPOSE_ONNXRT_CHECKED"] = '1'
29
+
30
+ class DWPose_Preprocessor:
31
+ @classmethod
32
+ def INPUT_TYPES(s):
33
+ return define_preprocessor_inputs(
34
+ detect_hand=INPUT.COMBO(["enable", "disable"]),
35
+ detect_body=INPUT.COMBO(["enable", "disable"]),
36
+ detect_face=INPUT.COMBO(["enable", "disable"]),
37
+ resolution=INPUT.RESOLUTION(),
38
+ bbox_detector=INPUT.COMBO(
39
+ ["yolox_l.torchscript.pt", "yolox_l.onnx", "yolo_nas_l_fp16.onnx", "yolo_nas_m_fp16.onnx", "yolo_nas_s_fp16.onnx"],
40
+ default="yolox_l.onnx"
41
+ ),
42
+ pose_estimator=INPUT.COMBO(
43
+ ["dw-ll_ucoco_384_bs5.torchscript.pt", "dw-ll_ucoco_384.onnx", "dw-ll_ucoco.onnx"],
44
+ default="dw-ll_ucoco_384_bs5.torchscript.pt"
45
+ )
46
+ )
47
+
48
+ RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT")
49
+ FUNCTION = "estimate_pose"
50
+
51
+ CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
52
+
53
+ def estimate_pose(self, image, detect_hand="enable", detect_body="enable", detect_face="enable", resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="dw-ll_ucoco_384.onnx", **kwargs):
54
+ if bbox_detector == "yolox_l.onnx":
55
+ yolo_repo = DWPOSE_MODEL_NAME
56
+ elif "yolox" in bbox_detector:
57
+ yolo_repo = "hr16/yolox-onnx"
58
+ elif "yolo_nas" in bbox_detector:
59
+ yolo_repo = "hr16/yolo-nas-fp16"
60
+ else:
61
+ raise NotImplementedError(f"Download mechanism for {bbox_detector}")
62
+
63
+ if pose_estimator == "dw-ll_ucoco_384.onnx":
64
+ pose_repo = DWPOSE_MODEL_NAME
65
+ elif pose_estimator.endswith(".onnx"):
66
+ pose_repo = "hr16/UnJIT-DWPose"
67
+ elif pose_estimator.endswith(".torchscript.pt"):
68
+ pose_repo = "hr16/DWPose-TorchScript-BatchSize5"
69
+ else:
70
+ raise NotImplementedError(f"Download mechanism for {pose_estimator}")
71
+
72
+ model = DwposeDetector.from_pretrained(
73
+ pose_repo,
74
+ yolo_repo,
75
+ det_filename=bbox_detector, pose_filename=pose_estimator,
76
+ torchscript_device=model_management.get_torch_device()
77
+ )
78
+ detect_hand = detect_hand == "enable"
79
+ detect_body = detect_body == "enable"
80
+ detect_face = detect_face == "enable"
81
+ self.openpose_dicts = []
82
+ def func(image, **kwargs):
83
+ pose_img, openpose_dict = model(image, **kwargs)
84
+ self.openpose_dicts.append(openpose_dict)
85
+ return pose_img
86
+
87
+ out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution)
88
+ del model
89
+ return {
90
+ 'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] },
91
+ "result": (out, self.openpose_dicts)
92
+ }
93
+
94
+ class AnimalPose_Preprocessor:
95
+ @classmethod
96
+ def INPUT_TYPES(s):
97
+ return define_preprocessor_inputs(
98
+ bbox_detector = INPUT.COMBO(
99
+ ["yolox_l.torchscript.pt", "yolox_l.onnx", "yolo_nas_l_fp16.onnx", "yolo_nas_m_fp16.onnx", "yolo_nas_s_fp16.onnx"],
100
+ default="yolox_l.torchscript.pt"
101
+ ),
102
+ pose_estimator = INPUT.COMBO(
103
+ ["rtmpose-m_ap10k_256_bs5.torchscript.pt", "rtmpose-m_ap10k_256.onnx"],
104
+ default="rtmpose-m_ap10k_256_bs5.torchscript.pt"
105
+ ),
106
+ resolution = INPUT.RESOLUTION()
107
+ )
108
+
109
+ RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT")
110
+ FUNCTION = "estimate_pose"
111
+
112
+ CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
113
+
114
+ def estimate_pose(self, image, resolution=512, bbox_detector="yolox_l.onnx", pose_estimator="rtmpose-m_ap10k_256.onnx", **kwargs):
115
+ if bbox_detector == "yolox_l.onnx":
116
+ yolo_repo = DWPOSE_MODEL_NAME
117
+ elif "yolox" in bbox_detector:
118
+ yolo_repo = "hr16/yolox-onnx"
119
+ elif "yolo_nas" in bbox_detector:
120
+ yolo_repo = "hr16/yolo-nas-fp16"
121
+ else:
122
+ raise NotImplementedError(f"Download mechanism for {bbox_detector}")
123
+
124
+ if pose_estimator == "dw-ll_ucoco_384.onnx":
125
+ pose_repo = DWPOSE_MODEL_NAME
126
+ elif pose_estimator.endswith(".onnx"):
127
+ pose_repo = "hr16/UnJIT-DWPose"
128
+ elif pose_estimator.endswith(".torchscript.pt"):
129
+ pose_repo = "hr16/DWPose-TorchScript-BatchSize5"
130
+ else:
131
+ raise NotImplementedError(f"Download mechanism for {pose_estimator}")
132
+
133
+ model = AnimalposeDetector.from_pretrained(
134
+ pose_repo,
135
+ yolo_repo,
136
+ det_filename=bbox_detector, pose_filename=pose_estimator,
137
+ torchscript_device=model_management.get_torch_device()
138
+ )
139
+
140
+ self.openpose_dicts = []
141
+ def func(image, **kwargs):
142
+ pose_img, openpose_dict = model(image, **kwargs)
143
+ self.openpose_dicts.append(openpose_dict)
144
+ return pose_img
145
+
146
+ out = common_annotator_call(func, image, image_and_json=True, resolution=resolution)
147
+ del model
148
+ return {
149
+ 'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] },
150
+ "result": (out, self.openpose_dicts)
151
+ }
152
+
153
+ NODE_CLASS_MAPPINGS = {
154
+ "DWPreprocessor": DWPose_Preprocessor,
155
+ "AnimalPosePreprocessor": AnimalPose_Preprocessor
156
+ }
157
+ NODE_DISPLAY_NAME_MAPPINGS = {
158
+ "DWPreprocessor": "DWPose Estimator",
159
+ "AnimalPosePreprocessor": "AnimalPose Estimator (AP10K)"
160
+ }
node_wrappers/hed.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class HED_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ safe=INPUT.COMBO(["enable", "disable"]),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
16
+
17
+ def execute(self, image, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.hed import HEDdetector
19
+
20
+ model = HEDdetector.from_pretrained().to(model_management.get_torch_device())
21
+ out = common_annotator_call(model, image, resolution=resolution, safe = kwargs["safe"] == "enable")
22
+ del model
23
+ return (out, )
24
+
25
+ class Fake_Scribble_Preprocessor:
26
+ @classmethod
27
+ def INPUT_TYPES(s):
28
+ return define_preprocessor_inputs(
29
+ safe=INPUT.COMBO(["enable", "disable"]),
30
+ resolution=INPUT.RESOLUTION()
31
+ )
32
+
33
+ RETURN_TYPES = ("IMAGE",)
34
+ FUNCTION = "execute"
35
+
36
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
37
+
38
+ def execute(self, image, resolution=512, **kwargs):
39
+ from custom_controlnet_aux.hed import HEDdetector
40
+
41
+ model = HEDdetector.from_pretrained().to(model_management.get_torch_device())
42
+ out = common_annotator_call(model, image, resolution=resolution, scribble=True, safe=kwargs["safe"]=="enable")
43
+ del model
44
+ return (out, )
45
+
46
+ NODE_CLASS_MAPPINGS = {
47
+ "HEDPreprocessor": HED_Preprocessor,
48
+ "FakeScribblePreprocessor": Fake_Scribble_Preprocessor
49
+ }
50
+ NODE_DISPLAY_NAME_MAPPINGS = {
51
+ "HEDPreprocessor": "HED Soft-Edge Lines",
52
+ "FakeScribblePreprocessor": "Fake Scribble Lines (aka scribble_hed)"
53
+ }
node_wrappers/inpaint.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from ..utils import INPUT
3
+
4
+ class InpaintPreprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return dict(
8
+ required=dict(image=INPUT.IMAGE(), mask=INPUT.MASK())
9
+ )
10
+ RETURN_TYPES = ("IMAGE",)
11
+ FUNCTION = "preprocess"
12
+
13
+ CATEGORY = "ControlNet Preprocessors/others"
14
+
15
+ def preprocess(self, image, mask):
16
+ mask = torch.nn.functional.interpolate(mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1])), size=(image.shape[1], image.shape[2]), mode="bilinear")
17
+ mask = mask.movedim(1,-1).expand((-1,-1,-1,3))
18
+ image = image.clone()
19
+ image[mask > 0.5] = -1.0 # set as masked pixel
20
+ return (image,)
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "InpaintPreprocessor": InpaintPreprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "InpaintPreprocessor": "Inpaint Preprocessor"
27
+ }
node_wrappers/leres.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class LERES_Depth_Map_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ rm_nearest=INPUT.FLOAT(max=100.0),
9
+ rm_background=INPUT.FLOAT(max=100.0),
10
+ boost=INPUT.COMBO(["disable", "enable"]),
11
+ resolution=INPUT.RESOLUTION()
12
+ )
13
+
14
+ RETURN_TYPES = ("IMAGE",)
15
+ FUNCTION = "execute"
16
+
17
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
18
+
19
+ def execute(self, image, rm_nearest=0, rm_background=0, resolution=512, boost="disable", **kwargs):
20
+ from custom_controlnet_aux.leres import LeresDetector
21
+
22
+ model = LeresDetector.from_pretrained().to(model_management.get_torch_device())
23
+ out = common_annotator_call(model, image, resolution=resolution, thr_a=rm_nearest, thr_b=rm_background, boost=boost == "enable")
24
+ del model
25
+ return (out, )
26
+
27
+ NODE_CLASS_MAPPINGS = {
28
+ "LeReS-DepthMapPreprocessor": LERES_Depth_Map_Preprocessor
29
+ }
30
+ NODE_DISPLAY_NAME_MAPPINGS = {
31
+ "LeReS-DepthMapPreprocessor": "LeReS Depth Map (enable boost for leres++)"
32
+ }
node_wrappers/lineart.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class LineArt_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ coarse=INPUT.COMBO((["disable", "enable"])),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
16
+
17
+ def execute(self, image, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.lineart import LineartDetector
19
+
20
+ model = LineartDetector.from_pretrained().to(model_management.get_torch_device())
21
+ out = common_annotator_call(model, image, resolution=resolution, coarse = kwargs["coarse"] == "enable")
22
+ del model
23
+ return (out, )
24
+
25
+ NODE_CLASS_MAPPINGS = {
26
+ "LineArtPreprocessor": LineArt_Preprocessor
27
+ }
28
+ NODE_DISPLAY_NAME_MAPPINGS = {
29
+ "LineArtPreprocessor": "Realistic Lineart"
30
+ }
node_wrappers/lineart_anime.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class AnimeLineArt_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.lineart_anime import LineartAnimeDetector
16
+
17
+ model = LineartAnimeDetector.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(model, image, resolution=resolution)
19
+ del model
20
+ return (out, )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "AnimeLineArtPreprocessor": AnimeLineArt_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "AnimeLineArtPreprocessor": "Anime Lineart"
27
+ }
node_wrappers/lineart_standard.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class Lineart_Standard_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ guassian_sigma=INPUT.FLOAT(default=6.0, max=100.0),
9
+ intensity_threshold=INPUT.INT(default=8, max=16),
10
+ resolution=INPUT.RESOLUTION()
11
+ )
12
+
13
+ RETURN_TYPES = ("IMAGE",)
14
+ FUNCTION = "execute"
15
+
16
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
17
+
18
+ def execute(self, image, guassian_sigma=6, intensity_threshold=8, resolution=512, **kwargs):
19
+ from custom_controlnet_aux.lineart_standard import LineartStandardDetector
20
+ return (common_annotator_call(LineartStandardDetector(), image, guassian_sigma=guassian_sigma, intensity_threshold=intensity_threshold, resolution=resolution), )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "LineartStandardPreprocessor": Lineart_Standard_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "LineartStandardPreprocessor": "Standard Lineart"
27
+ }
node_wrappers/manga_line.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class Manga2Anime_LineArt_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.manga_line import LineartMangaDetector
16
+
17
+ model = LineartMangaDetector.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(model, image, resolution=resolution)
19
+ del model
20
+ return (out, )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "Manga2Anime_LineArt_Preprocessor": Manga2Anime_LineArt_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "Manga2Anime_LineArt_Preprocessor": "Manga Lineart (aka lineart_anime_denoise)"
27
+ }
node_wrappers/mediapipe_face.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, run_script
2
+ import comfy.model_management as model_management
3
+ import os, sys
4
+ import subprocess, threading
5
+
6
+ def install_deps():
7
+ try:
8
+ import mediapipe
9
+ except ImportError:
10
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', 'mediapipe'])
11
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', '--upgrade', 'protobuf'])
12
+
13
+ class Media_Pipe_Face_Mesh_Preprocessor:
14
+ @classmethod
15
+ def INPUT_TYPES(s):
16
+ return define_preprocessor_inputs(
17
+ max_faces=INPUT.INT(default=10, min=1, max=50), #Which image has more than 50 detectable faces?
18
+ min_confidence=INPUT.FLOAT(default=0.5, min=0.1),
19
+ resolution=INPUT.RESOLUTION()
20
+ )
21
+
22
+ RETURN_TYPES = ("IMAGE",)
23
+ FUNCTION = "detect"
24
+
25
+ CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
26
+
27
+ def detect(self, image, max_faces=10, min_confidence=0.5, resolution=512):
28
+ #Ref: https://github.com/Fannovel16/comfy_controlnet_preprocessors/issues/70#issuecomment-1677967369
29
+ install_deps()
30
+ from custom_controlnet_aux.mediapipe_face import MediapipeFaceDetector
31
+ return (common_annotator_call(MediapipeFaceDetector(), image, max_faces=max_faces, min_confidence=min_confidence, resolution=resolution), )
32
+
33
+ NODE_CLASS_MAPPINGS = {
34
+ "MediaPipe-FaceMeshPreprocessor": Media_Pipe_Face_Mesh_Preprocessor
35
+ }
36
+
37
+ NODE_DISPLAY_NAME_MAPPINGS = {
38
+ "MediaPipe-FaceMeshPreprocessor": "MediaPipe Face Mesh"
39
+ }
node_wrappers/mesh_graphormer.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, MAX_RESOLUTION, run_script
2
+ import comfy.model_management as model_management
3
+ import numpy as np
4
+ import torch
5
+ from einops import rearrange
6
+ import os, sys
7
+ import subprocess, threading
8
+ import scipy.ndimage
9
+ import cv2
10
+ import torch.nn.functional as F
11
+
12
+ def install_deps():
13
+ try:
14
+ import mediapipe
15
+ except ImportError:
16
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', 'mediapipe'])
17
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', '--upgrade', 'protobuf'])
18
+
19
+ try:
20
+ import trimesh
21
+ except ImportError:
22
+ run_script([sys.executable, '-s', '-m', 'pip', 'install', 'trimesh[easy]'])
23
+
24
+ #Sauce: https://github.com/comfyanonymous/ComfyUI/blob/8c6493578b3dda233e9b9a953feeaf1e6ca434ad/comfy_extras/nodes_mask.py#L309
25
+ def expand_mask(mask, expand, tapered_corners):
26
+ c = 0 if tapered_corners else 1
27
+ kernel = np.array([[c, 1, c],
28
+ [1, 1, 1],
29
+ [c, 1, c]])
30
+ mask = mask.reshape((-1, mask.shape[-2], mask.shape[-1]))
31
+ out = []
32
+ for m in mask:
33
+ output = m.numpy()
34
+ for _ in range(abs(expand)):
35
+ if expand < 0:
36
+ output = scipy.ndimage.grey_erosion(output, footprint=kernel)
37
+ else:
38
+ output = scipy.ndimage.grey_dilation(output, footprint=kernel)
39
+ output = torch.from_numpy(output)
40
+ out.append(output)
41
+ return torch.stack(out, dim=0)
42
+
43
+ class Mesh_Graphormer_Depth_Map_Preprocessor:
44
+ @classmethod
45
+ def INPUT_TYPES(s):
46
+ return define_preprocessor_inputs(
47
+ mask_bbox_padding=("INT", {"default": 30, "min": 0, "max": 100}),
48
+ resolution=INPUT.RESOLUTION(),
49
+ mask_type=INPUT.COMBO(["based_on_depth", "tight_bboxes", "original"]),
50
+ mask_expand=INPUT.INT(default=5, min=-MAX_RESOLUTION, max=MAX_RESOLUTION),
51
+ rand_seed=INPUT.INT(default=88, min=0, max=0xffffffffffffffff),
52
+ detect_thr=INPUT.FLOAT(default=0.6, min=0.1),
53
+ presence_thr=INPUT.FLOAT(default=0.6, min=0.1)
54
+ )
55
+
56
+ RETURN_TYPES = ("IMAGE", "MASK")
57
+ RETURN_NAMES = ("IMAGE", "INPAINTING_MASK")
58
+ FUNCTION = "execute"
59
+
60
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
61
+
62
+ def execute(self, image, mask_bbox_padding=30, mask_type="based_on_depth", mask_expand=5, resolution=512, rand_seed=88, detect_thr=0.6, presence_thr=0.6, **kwargs):
63
+ install_deps()
64
+ from custom_controlnet_aux.mesh_graphormer import MeshGraphormerDetector
65
+ model = kwargs["model"] if "model" in kwargs \
66
+ else MeshGraphormerDetector.from_pretrained(detect_thr=detect_thr, presence_thr=presence_thr).to(model_management.get_torch_device())
67
+
68
+ depth_map_list = []
69
+ mask_list = []
70
+ for single_image in image:
71
+ np_image = np.asarray(single_image.cpu() * 255., dtype=np.uint8)
72
+ depth_map, mask, info = model(np_image, output_type="np", detect_resolution=resolution, mask_bbox_padding=mask_bbox_padding, seed=rand_seed)
73
+ if mask_type == "based_on_depth":
74
+ H, W = mask.shape[:2]
75
+ mask = cv2.resize(depth_map.copy(), (W, H))
76
+ mask[mask > 0] = 255
77
+
78
+ elif mask_type == "tight_bboxes":
79
+ mask = np.zeros_like(mask)
80
+ hand_bboxes = (info or {}).get("abs_boxes") or []
81
+ for hand_bbox in hand_bboxes:
82
+ x_min, x_max, y_min, y_max = hand_bbox
83
+ mask[y_min:y_max+1, x_min:x_max+1, :] = 255 #HWC
84
+
85
+ mask = mask[:, :, :1]
86
+ depth_map_list.append(torch.from_numpy(depth_map.astype(np.float32) / 255.0))
87
+ mask_list.append(torch.from_numpy(mask.astype(np.float32) / 255.0))
88
+ depth_maps, masks = torch.stack(depth_map_list, dim=0), rearrange(torch.stack(mask_list, dim=0), "n h w 1 -> n 1 h w")
89
+ return depth_maps, expand_mask(masks, mask_expand, tapered_corners=True)
90
+
91
+ def normalize_size_base_64(w, h):
92
+ short_side = min(w, h)
93
+ remainder = short_side % 64
94
+ return short_side - remainder + (64 if remainder > 0 else 0)
95
+
96
+ class Mesh_Graphormer_With_ImpactDetector_Depth_Map_Preprocessor:
97
+ @classmethod
98
+ def INPUT_TYPES(s):
99
+ types = define_preprocessor_inputs(
100
+ # Impact pack
101
+ bbox_threshold=INPUT.FLOAT(default=0.5, min=0.1),
102
+ bbox_dilation=INPUT.INT(default=10, min=-512, max=512),
103
+ bbox_crop_factor=INPUT.FLOAT(default=3.0, min=1.0, max=10.0),
104
+ drop_size=INPUT.INT(default=10, min=1, max=MAX_RESOLUTION),
105
+ # Mesh Graphormer
106
+ mask_bbox_padding=INPUT.INT(default=30, min=0, max=100),
107
+ mask_type=INPUT.COMBO(["based_on_depth", "tight_bboxes", "original"]),
108
+ mask_expand=INPUT.INT(default=5, min=-MAX_RESOLUTION, max=MAX_RESOLUTION),
109
+ rand_seed=INPUT.INT(default=88, min=0, max=0xffffffffffffffff),
110
+ resolution=INPUT.RESOLUTION()
111
+ )
112
+ types["required"]["bbox_detector"] = ("BBOX_DETECTOR", )
113
+ return types
114
+
115
+ RETURN_TYPES = ("IMAGE", "MASK")
116
+ RETURN_NAMES = ("IMAGE", "INPAINTING_MASK")
117
+ FUNCTION = "execute"
118
+
119
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
120
+
121
+ def execute(self, image, bbox_detector, bbox_threshold=0.5, bbox_dilation=10, bbox_crop_factor=3.0, drop_size=10, resolution=512, **mesh_graphormer_kwargs):
122
+ install_deps()
123
+ from custom_controlnet_aux.mesh_graphormer import MeshGraphormerDetector
124
+ mesh_graphormer_node = Mesh_Graphormer_Depth_Map_Preprocessor()
125
+ model = MeshGraphormerDetector.from_pretrained(detect_thr=0.6, presence_thr=0.6).to(model_management.get_torch_device())
126
+ mesh_graphormer_kwargs["model"] = model
127
+
128
+ frames = image
129
+ depth_maps, masks = [], []
130
+ for idx in range(len(frames)):
131
+ frame = frames[idx:idx+1,...] #Impact Pack's BBOX_DETECTOR only supports single batch image
132
+ bbox_detector.setAux('face') # make default prompt as 'face' if empty prompt for CLIPSeg
133
+ _, segs = bbox_detector.detect(frame, bbox_threshold, bbox_dilation, bbox_crop_factor, drop_size)
134
+ bbox_detector.setAux(None)
135
+
136
+ n, h, w, _ = frame.shape
137
+ depth_map, mask = torch.zeros_like(frame), torch.zeros(n, 1, h, w)
138
+ for i, seg in enumerate(segs):
139
+ x1, y1, x2, y2 = seg.crop_region
140
+ cropped_image = frame[:, y1:y2, x1:x2, :] # Never use seg.cropped_image to handle overlapping area
141
+ mesh_graphormer_kwargs["resolution"] = 0 #Disable resizing
142
+ sub_depth_map, sub_mask = mesh_graphormer_node.execute(cropped_image, **mesh_graphormer_kwargs)
143
+ depth_map[:, y1:y2, x1:x2, :] = sub_depth_map
144
+ mask[:, :, y1:y2, x1:x2] = sub_mask
145
+
146
+ depth_maps.append(depth_map)
147
+ masks.append(mask)
148
+
149
+ return (torch.cat(depth_maps), torch.cat(masks))
150
+
151
+ NODE_CLASS_MAPPINGS = {
152
+ "MeshGraphormer-DepthMapPreprocessor": Mesh_Graphormer_Depth_Map_Preprocessor,
153
+ "MeshGraphormer+ImpactDetector-DepthMapPreprocessor": Mesh_Graphormer_With_ImpactDetector_Depth_Map_Preprocessor
154
+ }
155
+ NODE_DISPLAY_NAME_MAPPINGS = {
156
+ "MeshGraphormer-DepthMapPreprocessor": "MeshGraphormer Hand Refiner",
157
+ "MeshGraphormer+ImpactDetector-DepthMapPreprocessor": "MeshGraphormer Hand Refiner With External Detector"
158
+ }
node_wrappers/metric3d.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, MAX_RESOLUTION
2
+ import comfy.model_management as model_management
3
+
4
+ class Metric3D_Depth_Map_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ backbone=INPUT.COMBO(["vit-small", "vit-large", "vit-giant2"]),
9
+ fx=INPUT.INT(default=1000, min=1, max=MAX_RESOLUTION),
10
+ fy=INPUT.INT(default=1000, min=1, max=MAX_RESOLUTION),
11
+ resolution=INPUT.RESOLUTION()
12
+ )
13
+
14
+ RETURN_TYPES = ("IMAGE",)
15
+ FUNCTION = "execute"
16
+
17
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
18
+
19
+ def execute(self, image, backbone="vit-small", fx=1000, fy=1000, resolution=512):
20
+ from custom_controlnet_aux.metric3d import Metric3DDetector
21
+ model = Metric3DDetector.from_pretrained(filename=f"metric_depth_{backbone.replace('-', '_')}_800k.pth").to(model_management.get_torch_device())
22
+ cb = lambda image, **kwargs: model(image, **kwargs)[0]
23
+ out = common_annotator_call(cb, image, resolution=resolution, fx=fx, fy=fy, depth_and_normal=True)
24
+ del model
25
+ return (out, )
26
+
27
+ class Metric3D_Normal_Map_Preprocessor:
28
+ @classmethod
29
+ def INPUT_TYPES(s):
30
+ return define_preprocessor_inputs(
31
+ backbone=INPUT.COMBO(["vit-small", "vit-large", "vit-giant2"]),
32
+ fx=INPUT.INT(default=1000, min=1, max=MAX_RESOLUTION),
33
+ fy=INPUT.INT(default=1000, min=1, max=MAX_RESOLUTION),
34
+ resolution=INPUT.RESOLUTION()
35
+ )
36
+
37
+ RETURN_TYPES = ("IMAGE",)
38
+ FUNCTION = "execute"
39
+
40
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
41
+
42
+ def execute(self, image, backbone="vit-small", fx=1000, fy=1000, resolution=512):
43
+ from custom_controlnet_aux.metric3d import Metric3DDetector
44
+ model = Metric3DDetector.from_pretrained(filename=f"metric_depth_{backbone.replace('-', '_')}_800k.pth").to(model_management.get_torch_device())
45
+ cb = lambda image, **kwargs: model(image, **kwargs)[1]
46
+ out = common_annotator_call(cb, image, resolution=resolution, fx=fx, fy=fy, depth_and_normal=True)
47
+ del model
48
+ return (out, )
49
+
50
+ NODE_CLASS_MAPPINGS = {
51
+ "Metric3D-DepthMapPreprocessor": Metric3D_Depth_Map_Preprocessor,
52
+ "Metric3D-NormalMapPreprocessor": Metric3D_Normal_Map_Preprocessor
53
+ }
54
+ NODE_DISPLAY_NAME_MAPPINGS = {
55
+ "Metric3D-DepthMapPreprocessor": "Metric3D Depth Map",
56
+ "Metric3D-NormalMapPreprocessor": "Metric3D Normal Map"
57
+ }
node_wrappers/midas.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+ import numpy as np
4
+
5
+ class MIDAS_Normal_Map_Preprocessor:
6
+ @classmethod
7
+ def INPUT_TYPES(s):
8
+ return define_preprocessor_inputs(
9
+ a=INPUT.FLOAT(default=np.pi * 2.0, min=0.0, max=np.pi * 5.0),
10
+ bg_threshold=INPUT.FLOAT(default=0.1),
11
+ resolution=INPUT.RESOLUTION()
12
+ )
13
+
14
+ RETURN_TYPES = ("IMAGE",)
15
+ FUNCTION = "execute"
16
+
17
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
18
+
19
+ def execute(self, image, a=np.pi * 2.0, bg_threshold=0.1, resolution=512, **kwargs):
20
+ from custom_controlnet_aux.midas import MidasDetector
21
+
22
+ model = MidasDetector.from_pretrained().to(model_management.get_torch_device())
23
+ #Dirty hack :))
24
+ cb = lambda image, **kargs: model(image, **kargs)[1]
25
+ out = common_annotator_call(cb, image, resolution=resolution, a=a, bg_th=bg_threshold, depth_and_normal=True)
26
+ del model
27
+ return (out, )
28
+
29
+ class MIDAS_Depth_Map_Preprocessor:
30
+ @classmethod
31
+ def INPUT_TYPES(s):
32
+ return define_preprocessor_inputs(
33
+ a=INPUT.FLOAT(default=np.pi * 2.0, min=0.0, max=np.pi * 5.0),
34
+ bg_threshold=INPUT.FLOAT(default=0.1),
35
+ resolution=INPUT.RESOLUTION()
36
+ )
37
+
38
+ RETURN_TYPES = ("IMAGE",)
39
+ FUNCTION = "execute"
40
+
41
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
42
+
43
+ def execute(self, image, a=np.pi * 2.0, bg_threshold=0.1, resolution=512, **kwargs):
44
+ from custom_controlnet_aux.midas import MidasDetector
45
+
46
+ # Ref: https://github.com/lllyasviel/ControlNet/blob/main/gradio_depth2image.py
47
+ model = MidasDetector.from_pretrained().to(model_management.get_torch_device())
48
+ out = common_annotator_call(model, image, resolution=resolution, a=a, bg_th=bg_threshold)
49
+ del model
50
+ return (out, )
51
+
52
+ NODE_CLASS_MAPPINGS = {
53
+ "MiDaS-NormalMapPreprocessor": MIDAS_Normal_Map_Preprocessor,
54
+ "MiDaS-DepthMapPreprocessor": MIDAS_Depth_Map_Preprocessor
55
+ }
56
+ NODE_DISPLAY_NAME_MAPPINGS = {
57
+ "MiDaS-NormalMapPreprocessor": "MiDaS Normal Map",
58
+ "MiDaS-DepthMapPreprocessor": "MiDaS Depth Map"
59
+ }
node_wrappers/mlsd.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+ import numpy as np
4
+
5
+ class MLSD_Preprocessor:
6
+ @classmethod
7
+ def INPUT_TYPES(s):
8
+ return define_preprocessor_inputs(
9
+ score_threshold=INPUT.FLOAT(default=0.1, min=0.01, max=2.0),
10
+ dist_threshold=INPUT.FLOAT(default=0.1, min=0.01, max=20.0),
11
+ resolution=INPUT.RESOLUTION()
12
+ )
13
+
14
+ RETURN_TYPES = ("IMAGE",)
15
+ FUNCTION = "execute"
16
+
17
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
18
+
19
+ def execute(self, image, score_threshold, dist_threshold, resolution=512, **kwargs):
20
+ from custom_controlnet_aux.mlsd import MLSDdetector
21
+
22
+ model = MLSDdetector.from_pretrained().to(model_management.get_torch_device())
23
+ out = common_annotator_call(model, image, resolution=resolution, thr_v=score_threshold, thr_d=dist_threshold)
24
+ return (out, )
25
+
26
+ NODE_CLASS_MAPPINGS = {
27
+ "M-LSDPreprocessor": MLSD_Preprocessor
28
+ }
29
+ NODE_DISPLAY_NAME_MAPPINGS = {
30
+ "M-LSDPreprocessor": "M-LSD Lines"
31
+ }
node_wrappers/normalbae.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class BAE_Normal_Map_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.normalbae import NormalBaeDetector
16
+
17
+ model = NormalBaeDetector.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(model, image, resolution=resolution)
19
+ del model
20
+ return (out,)
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "BAE-NormalMapPreprocessor": BAE_Normal_Map_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "BAE-NormalMapPreprocessor": "BAE Normal Map"
27
+ }
node_wrappers/oneformer.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class OneFormer_COCO_SemSegPreprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "semantic_segmentate"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Semantic Segmentation"
13
+
14
+ def semantic_segmentate(self, image, resolution=512):
15
+ from custom_controlnet_aux.oneformer import OneformerSegmentor
16
+
17
+ model = OneformerSegmentor.from_pretrained(filename="150_16_swin_l_oneformer_coco_100ep.pth")
18
+ model = model.to(model_management.get_torch_device())
19
+ out = common_annotator_call(model, image, resolution=resolution)
20
+ del model
21
+ return (out,)
22
+
23
+ class OneFormer_ADE20K_SemSegPreprocessor:
24
+ @classmethod
25
+ def INPUT_TYPES(s):
26
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
27
+
28
+ RETURN_TYPES = ("IMAGE",)
29
+ FUNCTION = "semantic_segmentate"
30
+
31
+ CATEGORY = "ControlNet Preprocessors/Semantic Segmentation"
32
+
33
+ def semantic_segmentate(self, image, resolution=512):
34
+ from custom_controlnet_aux.oneformer import OneformerSegmentor
35
+
36
+ model = OneformerSegmentor.from_pretrained(filename="250_16_swin_l_oneformer_ade20k_160k.pth")
37
+ model = model.to(model_management.get_torch_device())
38
+ out = common_annotator_call(model, image, resolution=resolution)
39
+ del model
40
+ return (out,)
41
+
42
+ NODE_CLASS_MAPPINGS = {
43
+ "OneFormer-COCO-SemSegPreprocessor": OneFormer_COCO_SemSegPreprocessor,
44
+ "OneFormer-ADE20K-SemSegPreprocessor": OneFormer_ADE20K_SemSegPreprocessor
45
+ }
46
+
47
+ NODE_DISPLAY_NAME_MAPPINGS = {
48
+ "OneFormer-COCO-SemSegPreprocessor": "OneFormer COCO Segmentor",
49
+ "OneFormer-ADE20K-SemSegPreprocessor": "OneFormer ADE20K Segmentor"
50
+ }
node_wrappers/openpose.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+ import json
4
+
5
+ class OpenPose_Preprocessor:
6
+ @classmethod
7
+ def INPUT_TYPES(s):
8
+ return define_preprocessor_inputs(
9
+ detect_hand=INPUT.COMBO(["enable", "disable"]),
10
+ detect_body=INPUT.COMBO(["enable", "disable"]),
11
+ detect_face=INPUT.COMBO(["enable", "disable"]),
12
+ resolution=INPUT.RESOLUTION()
13
+ )
14
+
15
+ RETURN_TYPES = ("IMAGE", "POSE_KEYPOINT")
16
+ FUNCTION = "estimate_pose"
17
+
18
+ CATEGORY = "ControlNet Preprocessors/Faces and Poses Estimators"
19
+
20
+ def estimate_pose(self, image, detect_hand, detect_body, detect_face, resolution=512, **kwargs):
21
+ from custom_controlnet_aux.open_pose import OpenposeDetector
22
+
23
+ detect_hand = detect_hand == "enable"
24
+ detect_body = detect_body == "enable"
25
+ detect_face = detect_face == "enable"
26
+
27
+ model = OpenposeDetector.from_pretrained().to(model_management.get_torch_device())
28
+ self.openpose_dicts = []
29
+ def func(image, **kwargs):
30
+ pose_img, openpose_dict = model(image, **kwargs)
31
+ self.openpose_dicts.append(openpose_dict)
32
+ return pose_img
33
+
34
+ out = common_annotator_call(func, image, include_hand=detect_hand, include_face=detect_face, include_body=detect_body, image_and_json=True, resolution=resolution)
35
+ del model
36
+ return {
37
+ 'ui': { "openpose_json": [json.dumps(self.openpose_dicts, indent=4)] },
38
+ "result": (out, self.openpose_dicts)
39
+ }
40
+
41
+ NODE_CLASS_MAPPINGS = {
42
+ "OpenposePreprocessor": OpenPose_Preprocessor,
43
+ }
44
+ NODE_DISPLAY_NAME_MAPPINGS = {
45
+ "OpenposePreprocessor": "OpenPose Pose",
46
+ }
node_wrappers/pidinet.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class PIDINET_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ safe=INPUT.COMBO(["enable", "disable"]),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
16
+
17
+ def execute(self, image, safe, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.pidi import PidiNetDetector
19
+
20
+ model = PidiNetDetector.from_pretrained().to(model_management.get_torch_device())
21
+ out = common_annotator_call(model, image, resolution=resolution, safe = safe == "enable")
22
+ del model
23
+ return (out, )
24
+
25
+ NODE_CLASS_MAPPINGS = {
26
+ "PiDiNetPreprocessor": PIDINET_Preprocessor,
27
+ }
28
+ NODE_DISPLAY_NAME_MAPPINGS = {
29
+ "PiDiNetPreprocessor": "PiDiNet Soft-Edge Lines"
30
+ }
node_wrappers/pose_keypoint_postprocess.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import folder_paths
2
+ import json
3
+ import os
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import ImageColor
7
+ from einops import rearrange
8
+ import torch
9
+ import itertools
10
+
11
+ from ..src.custom_controlnet_aux.dwpose import draw_poses, draw_animalposes, decode_json_as_poses
12
+
13
+
14
+ """
15
+ Format of POSE_KEYPOINT (AP10K keypoints):
16
+ [{
17
+ "version": "ap10k",
18
+ "animals": [
19
+ [[x1, y1, 1], [x2, y2, 1],..., [x17, y17, 1]],
20
+ [[x1, y1, 1], [x2, y2, 1],..., [x17, y17, 1]],
21
+ ...
22
+ ],
23
+ "canvas_height": 512,
24
+ "canvas_width": 768
25
+ },...]
26
+ Format of POSE_KEYPOINT (OpenPose keypoints):
27
+ [{
28
+ "people": [
29
+ {
30
+ 'pose_keypoints_2d': [[x1, y1, 1], [x2, y2, 1],..., [x17, y17, 1]]
31
+ "face_keypoints_2d": [[x1, y1, 1], [x2, y2, 1],..., [x68, y68, 1]],
32
+ "hand_left_keypoints_2d": [[x1, y1, 1], [x2, y2, 1],..., [x21, y21, 1]],
33
+ "hand_right_keypoints_2d":[[x1, y1, 1], [x2, y2, 1],..., [x21, y21, 1]],
34
+ }
35
+ ],
36
+ "canvas_height": canvas_height,
37
+ "canvas_width": canvas_width,
38
+ },...]
39
+ """
40
+
41
+ class SavePoseKpsAsJsonFile:
42
+ @classmethod
43
+ def INPUT_TYPES(s):
44
+ return {
45
+ "required": {
46
+ "pose_kps": ("POSE_KEYPOINT",),
47
+ "filename_prefix": ("STRING", {"default": "PoseKeypoint"})
48
+ }
49
+ }
50
+ RETURN_TYPES = ()
51
+ FUNCTION = "save_pose_kps"
52
+ OUTPUT_NODE = True
53
+ CATEGORY = "ControlNet Preprocessors/Pose Keypoint Postprocess"
54
+ def __init__(self):
55
+ self.output_dir = folder_paths.get_output_directory()
56
+ self.type = "output"
57
+ self.prefix_append = ""
58
+ def save_pose_kps(self, pose_kps, filename_prefix):
59
+ filename_prefix += self.prefix_append
60
+ full_output_folder, filename, counter, subfolder, filename_prefix = \
61
+ folder_paths.get_save_image_path(filename_prefix, self.output_dir, pose_kps[0]["canvas_width"], pose_kps[0]["canvas_height"])
62
+ file = f"{filename}_{counter:05}.json"
63
+ with open(os.path.join(full_output_folder, file), 'w') as f:
64
+ json.dump(pose_kps , f)
65
+ return {}
66
+
67
+ #COCO-Wholebody doesn't have eyebrows as it inherits 68 keypoints format
68
+ #Perhaps eyebrows can be estimated tho
69
+ FACIAL_PARTS = ["skin", "left_eye", "right_eye", "nose", "upper_lip", "inner_mouth", "lower_lip"]
70
+ LAPA_COLORS = dict(
71
+ skin="rgb(0, 153, 255)",
72
+ left_eye="rgb(0, 204, 153)",
73
+ right_eye="rgb(255, 153, 0)",
74
+ nose="rgb(255, 102, 255)",
75
+ upper_lip="rgb(102, 0, 51)",
76
+ inner_mouth="rgb(255, 204, 255)",
77
+ lower_lip="rgb(255, 0, 102)"
78
+ )
79
+
80
+ #One-based index
81
+ def kps_idxs(start, end):
82
+ step = -1 if start > end else 1
83
+ return list(range(start-1, end+1-1, step))
84
+
85
+ #Source: https://www.researchgate.net/profile/Fabrizio-Falchi/publication/338048224/figure/fig1/AS:837860722741255@1576772971540/68-facial-landmarks.jpg
86
+ FACIAL_PART_RANGES = dict(
87
+ skin=kps_idxs(1, 17) + kps_idxs(27, 18),
88
+ nose=kps_idxs(28, 36),
89
+ left_eye=kps_idxs(37, 42),
90
+ right_eye=kps_idxs(43, 48),
91
+ upper_lip=kps_idxs(49, 55) + kps_idxs(65, 61),
92
+ lower_lip=kps_idxs(61, 68),
93
+ inner_mouth=kps_idxs(61, 65) + kps_idxs(55, 49)
94
+ )
95
+
96
+ def is_normalized(keypoints) -> bool:
97
+ point_normalized = [
98
+ 0 <= np.abs(k[0]) <= 1 and 0 <= np.abs(k[1]) <= 1
99
+ for k in keypoints
100
+ if k is not None
101
+ ]
102
+ if not point_normalized:
103
+ return False
104
+ return np.all(point_normalized)
105
+
106
+ class FacialPartColoringFromPoseKps:
107
+ @classmethod
108
+ def INPUT_TYPES(s):
109
+ input_types = {
110
+ "required": {"pose_kps": ("POSE_KEYPOINT",), "mode": (["point", "polygon"], {"default": "polygon"})}
111
+ }
112
+ for facial_part in FACIAL_PARTS:
113
+ input_types["required"][facial_part] = ("STRING", {"default": LAPA_COLORS[facial_part], "multiline": False})
114
+ return input_types
115
+ RETURN_TYPES = ("IMAGE",)
116
+ FUNCTION = "colorize"
117
+ CATEGORY = "ControlNet Preprocessors/Pose Keypoint Postprocess"
118
+ def colorize(self, pose_kps, mode, **facial_part_colors):
119
+ pose_frames = pose_kps
120
+ np_frames = [self.draw_kps(pose_frame, mode, **facial_part_colors) for pose_frame in pose_frames]
121
+ np_frames = np.stack(np_frames, axis=0)
122
+ return (torch.from_numpy(np_frames).float() / 255.,)
123
+
124
+ def draw_kps(self, pose_frame, mode, **facial_part_colors):
125
+ width, height = pose_frame["canvas_width"], pose_frame["canvas_height"]
126
+ canvas = np.zeros((height, width, 3), dtype=np.uint8)
127
+ for person, part_name in itertools.product(pose_frame["people"], FACIAL_PARTS):
128
+ n = len(person["face_keypoints_2d"]) // 3
129
+ facial_kps = rearrange(np.array(person["face_keypoints_2d"]), "(n c) -> n c", n=n, c=3)[:, :2]
130
+ if is_normalized(facial_kps):
131
+ facial_kps *= (width, height)
132
+ facial_kps = facial_kps.astype(np.int32)
133
+ part_color = ImageColor.getrgb(facial_part_colors[part_name])[:3]
134
+ part_contours = facial_kps[FACIAL_PART_RANGES[part_name], :]
135
+ if mode == "point":
136
+ for pt in part_contours:
137
+ cv2.circle(canvas, pt, radius=2, color=part_color, thickness=-1)
138
+ else:
139
+ cv2.fillPoly(canvas, pts=[part_contours], color=part_color)
140
+ return canvas
141
+
142
+ # https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/openpose/master/.github/media/keypoints_pose_18.png
143
+ BODY_PART_INDEXES = {
144
+ "Head": (16, 14, 0, 15, 17),
145
+ "Neck": (0, 1),
146
+ "Shoulder": (2, 5),
147
+ "Torso": (2, 5, 8, 11),
148
+ "RArm": (2, 3),
149
+ "RForearm": (3, 4),
150
+ "LArm": (5, 6),
151
+ "LForearm": (6, 7),
152
+ "RThigh": (8, 9),
153
+ "RLeg": (9, 10),
154
+ "LThigh": (11, 12),
155
+ "LLeg": (12, 13)
156
+ }
157
+ BODY_PART_DEFAULT_W_H = {
158
+ "Head": "256, 256",
159
+ "Neck": "100, 100",
160
+ "Shoulder": '',
161
+ "Torso": "350, 450",
162
+ "RArm": "128, 256",
163
+ "RForearm": "128, 256",
164
+ "LArm": "128, 256",
165
+ "LForearm": "128, 256",
166
+ "RThigh": "128, 256",
167
+ "RLeg": "128, 256",
168
+ "LThigh": "128, 256",
169
+ "LLeg": "128, 256"
170
+ }
171
+
172
+ class SinglePersonProcess:
173
+ @classmethod
174
+ def sort_and_get_max_people(s, pose_kps):
175
+ for idx in range(len(pose_kps)):
176
+ pose_kps[idx]["people"] = sorted(pose_kps[idx]["people"], key=lambda person:person["pose_keypoints_2d"][0])
177
+ return pose_kps, max(len(frame["people"]) for frame in pose_kps)
178
+
179
+ def __init__(self, pose_kps, person_idx=0) -> None:
180
+ self.width, self.height = pose_kps[0]["canvas_width"], pose_kps[0]["canvas_height"]
181
+ self.poses = [
182
+ self.normalize(pose_frame["people"][person_idx]["pose_keypoints_2d"])
183
+ if person_idx < len(pose_frame["people"])
184
+ else None
185
+ for pose_frame in pose_kps
186
+ ]
187
+
188
+ def normalize(self, pose_kps_2d):
189
+ n = len(pose_kps_2d) // 3
190
+ pose_kps_2d = rearrange(np.array(pose_kps_2d), "(n c) -> n c", n=n, c=3)
191
+ pose_kps_2d[np.argwhere(pose_kps_2d[:,2]==0), :] = np.iinfo(np.int32).max // 2 #Safe large value
192
+ pose_kps_2d = pose_kps_2d[:, :2]
193
+ if is_normalized(pose_kps_2d):
194
+ pose_kps_2d *= (self.width, self.height)
195
+ return pose_kps_2d
196
+
197
+ def get_xyxy_bboxes(self, part_name, bbox_size=(128, 256)):
198
+ width, height = bbox_size
199
+ xyxy_bboxes = {}
200
+ for idx, pose in enumerate(self.poses):
201
+ if pose is None:
202
+ xyxy_bboxes[idx] = (np.iinfo(np.int32).max // 2,) * 4
203
+ continue
204
+ pts = pose[BODY_PART_INDEXES[part_name], :]
205
+
206
+ #top_left = np.min(pts[:,0]), np.min(pts[:,1])
207
+ #bottom_right = np.max(pts[:,0]), np.max(pts[:,1])
208
+ #pad_width = np.maximum(width - (bottom_right[0]-top_left[0]), 0) / 2
209
+ #pad_height = np.maximum(height - (bottom_right[1]-top_left[1]), 0) / 2
210
+ #xyxy_bboxes.append((
211
+ # top_left[0] - pad_width, top_left[1] - pad_height,
212
+ # bottom_right[0] + pad_width, bottom_right[1] + pad_height,
213
+ #))
214
+
215
+ x_mid, y_mid = np.mean(pts[:, 0]), np.mean(pts[:, 1])
216
+ xyxy_bboxes[idx] = (
217
+ x_mid - width/2, y_mid - height/2,
218
+ x_mid + width/2, y_mid + height/2
219
+ )
220
+ return xyxy_bboxes
221
+
222
+ class UpperBodyTrackingFromPoseKps:
223
+ PART_NAMES = ["Head", "Neck", "Shoulder", "Torso", "RArm", "RForearm", "LArm", "LForearm"]
224
+
225
+ @classmethod
226
+ def INPUT_TYPES(s):
227
+ return {
228
+ "required": {
229
+ "pose_kps": ("POSE_KEYPOINT",),
230
+ "id_include": ("STRING", {"default": '', "multiline": False}),
231
+ **{part_name + "_width_height": ("STRING", {"default": BODY_PART_DEFAULT_W_H[part_name], "multiline": False}) for part_name in s.PART_NAMES}
232
+ }
233
+ }
234
+
235
+ RETURN_TYPES = ("TRACKING", "STRING")
236
+ RETURN_NAMES = ("tracking", "prompt")
237
+ FUNCTION = "convert"
238
+ CATEGORY = "ControlNet Preprocessors/Pose Keypoint Postprocess"
239
+
240
+ def convert(self, pose_kps, id_include, **parts_width_height):
241
+ parts_width_height = {part_name.replace("_width_height", ''): value for part_name, value in parts_width_height.items()}
242
+ enabled_part_names = [part_name for part_name in self.PART_NAMES if len(parts_width_height[part_name].strip())]
243
+ tracked = {part_name: {} for part_name in enabled_part_names}
244
+ id_include = id_include.strip()
245
+ id_include = list(map(int, id_include.split(','))) if len(id_include) else []
246
+ prompt_string = ''
247
+ pose_kps, max_people = SinglePersonProcess.sort_and_get_max_people(pose_kps)
248
+
249
+ for person_idx in range(max_people):
250
+ if len(id_include) and person_idx not in id_include:
251
+ continue
252
+ processor = SinglePersonProcess(pose_kps, person_idx)
253
+ for part_name in enabled_part_names:
254
+ bbox_size = tuple(map(int, parts_width_height[part_name].split(',')))
255
+ part_bboxes = processor.get_xyxy_bboxes(part_name, bbox_size)
256
+ id_coordinates = {idx: part_bbox+(processor.width, processor.height) for idx, part_bbox in part_bboxes.items()}
257
+ tracked[part_name][person_idx] = id_coordinates
258
+
259
+ for class_name, class_data in tracked.items():
260
+ for class_id in class_data.keys():
261
+ class_id_str = str(class_id)
262
+ # Use the incoming prompt for each class name and ID
263
+ _class_name = class_name.replace('L', '').replace('R', '').lower()
264
+ prompt_string += f'"{class_id_str}.{class_name}": "({_class_name})",\n'
265
+
266
+ return (tracked, prompt_string)
267
+
268
+
269
+ def numpy2torch(np_image: np.ndarray) -> torch.Tensor:
270
+ """ [H, W, C] => [B=1, H, W, C]"""
271
+ return torch.from_numpy(np_image.astype(np.float32) / 255).unsqueeze(0)
272
+
273
+
274
+ class RenderPeopleKps:
275
+ @classmethod
276
+ def INPUT_TYPES(s):
277
+ return {
278
+ "required": {
279
+ "kps": ("POSE_KEYPOINT",),
280
+ "render_body": ("BOOLEAN", {"default": True}),
281
+ "render_hand": ("BOOLEAN", {"default": True}),
282
+ "render_face": ("BOOLEAN", {"default": True}),
283
+ }
284
+ }
285
+
286
+ RETURN_TYPES = ("IMAGE",)
287
+ FUNCTION = "render"
288
+ CATEGORY = "ControlNet Preprocessors/Pose Keypoint Postprocess"
289
+
290
+ def render(self, kps, render_body, render_hand, render_face) -> tuple[np.ndarray]:
291
+ if isinstance(kps, list):
292
+ kps = kps[0]
293
+
294
+ poses, _, height, width = decode_json_as_poses(kps)
295
+ np_image = draw_poses(
296
+ poses,
297
+ height,
298
+ width,
299
+ render_body,
300
+ render_hand,
301
+ render_face,
302
+ )
303
+ return (numpy2torch(np_image),)
304
+
305
+ class RenderAnimalKps:
306
+ @classmethod
307
+ def INPUT_TYPES(s):
308
+ return {
309
+ "required": {
310
+ "kps": ("POSE_KEYPOINT",),
311
+ }
312
+ }
313
+
314
+ RETURN_TYPES = ("IMAGE",)
315
+ FUNCTION = "render"
316
+ CATEGORY = "ControlNet Preprocessors/Pose Keypoint Postprocess"
317
+
318
+ def render(self, kps) -> tuple[np.ndarray]:
319
+ if isinstance(kps, list):
320
+ kps = kps[0]
321
+
322
+ _, poses, height, width = decode_json_as_poses(kps)
323
+ np_image = draw_animalposes(poses, height, width)
324
+ return (numpy2torch(np_image),)
325
+
326
+
327
+ NODE_CLASS_MAPPINGS = {
328
+ "SavePoseKpsAsJsonFile": SavePoseKpsAsJsonFile,
329
+ "FacialPartColoringFromPoseKps": FacialPartColoringFromPoseKps,
330
+ "UpperBodyTrackingFromPoseKps": UpperBodyTrackingFromPoseKps,
331
+ "RenderPeopleKps": RenderPeopleKps,
332
+ "RenderAnimalKps": RenderAnimalKps,
333
+ }
334
+ NODE_DISPLAY_NAME_MAPPINGS = {
335
+ "SavePoseKpsAsJsonFile": "Save Pose Keypoints",
336
+ "FacialPartColoringFromPoseKps": "Colorize Facial Parts from PoseKPS",
337
+ "UpperBodyTrackingFromPoseKps": "Upper Body Tracking From PoseKps (InstanceDiffusion)",
338
+ "RenderPeopleKps": "Render Pose JSON (Human)",
339
+ "RenderAnimalKps": "Render Pose JSON (Animal)",
340
+ }
node_wrappers/recolor.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+
3
+ class ImageLuminanceDetector:
4
+ @classmethod
5
+ def INPUT_TYPES(s):
6
+ #https://github.com/Mikubill/sd-webui-controlnet/blob/416c345072c9c2066101e225964e3986abe6945e/scripts/processor.py#L1229
7
+ return define_preprocessor_inputs(
8
+ gamma_correction=INPUT.FLOAT(default=1.0, min=0.1, max=2.0),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Recolor"
16
+
17
+ def execute(self, image, gamma_correction=1.0, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.recolor import Recolorizer
19
+ return (common_annotator_call(Recolorizer(), image, mode="luminance", gamma_correction=gamma_correction , resolution=resolution), )
20
+
21
+ class ImageIntensityDetector:
22
+ @classmethod
23
+ def INPUT_TYPES(s):
24
+ #https://github.com/Mikubill/sd-webui-controlnet/blob/416c345072c9c2066101e225964e3986abe6945e/scripts/processor.py#L1229
25
+ return define_preprocessor_inputs(
26
+ gamma_correction=INPUT.FLOAT(default=1.0, min=0.1, max=2.0),
27
+ resolution=INPUT.RESOLUTION()
28
+ )
29
+
30
+ RETURN_TYPES = ("IMAGE",)
31
+ FUNCTION = "execute"
32
+
33
+ CATEGORY = "ControlNet Preprocessors/Recolor"
34
+
35
+ def execute(self, image, gamma_correction=1.0, resolution=512, **kwargs):
36
+ from custom_controlnet_aux.recolor import Recolorizer
37
+ return (common_annotator_call(Recolorizer(), image, mode="intensity", gamma_correction=gamma_correction , resolution=resolution), )
38
+
39
+ NODE_CLASS_MAPPINGS = {
40
+ "ImageLuminanceDetector": ImageLuminanceDetector,
41
+ "ImageIntensityDetector": ImageIntensityDetector
42
+ }
43
+ NODE_DISPLAY_NAME_MAPPINGS = {
44
+ "ImageLuminanceDetector": "Image Luminance",
45
+ "ImageIntensityDetector": "Image Intensity"
46
+ }
node_wrappers/scribble.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, nms
2
+ import comfy.model_management as model_management
3
+ import cv2
4
+
5
+ class Scribble_Preprocessor:
6
+ @classmethod
7
+ def INPUT_TYPES(s):
8
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
9
+
10
+ RETURN_TYPES = ("IMAGE",)
11
+ FUNCTION = "execute"
12
+
13
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
14
+
15
+ def execute(self, image, resolution=512, **kwargs):
16
+ from custom_controlnet_aux.scribble import ScribbleDetector
17
+
18
+ model = ScribbleDetector()
19
+ return (common_annotator_call(model, image, resolution=resolution), )
20
+
21
+ class Scribble_XDoG_Preprocessor:
22
+ @classmethod
23
+ def INPUT_TYPES(s):
24
+ return define_preprocessor_inputs(
25
+ threshold=INPUT.INT(default=32, min=1, max=64),
26
+ resolution=INPUT.RESOLUTION()
27
+ )
28
+
29
+ RETURN_TYPES = ("IMAGE",)
30
+ FUNCTION = "execute"
31
+
32
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
33
+
34
+ def execute(self, image, threshold=32, resolution=512, **kwargs):
35
+ from custom_controlnet_aux.scribble import ScribbleXDog_Detector
36
+
37
+ model = ScribbleXDog_Detector()
38
+ return (common_annotator_call(model, image, resolution=resolution, thr_a=threshold), )
39
+
40
+ class Scribble_PiDiNet_Preprocessor:
41
+ @classmethod
42
+ def INPUT_TYPES(s):
43
+ return define_preprocessor_inputs(
44
+ safe=(["enable", "disable"]),
45
+ resolution=INPUT.RESOLUTION()
46
+ )
47
+
48
+ RETURN_TYPES = ("IMAGE",)
49
+ FUNCTION = "execute"
50
+
51
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
52
+
53
+ def execute(self, image, safe="enable", resolution=512):
54
+ def model(img, **kwargs):
55
+ from custom_controlnet_aux.pidi import PidiNetDetector
56
+ pidinet = PidiNetDetector.from_pretrained().to(model_management.get_torch_device())
57
+ result = pidinet(img, scribble=True, **kwargs)
58
+ result = nms(result, 127, 3.0)
59
+ result = cv2.GaussianBlur(result, (0, 0), 3.0)
60
+ result[result > 4] = 255
61
+ result[result < 255] = 0
62
+ return result
63
+ return (common_annotator_call(model, image, resolution=resolution, safe=safe=="enable"),)
64
+
65
+ NODE_CLASS_MAPPINGS = {
66
+ "ScribblePreprocessor": Scribble_Preprocessor,
67
+ "Scribble_XDoG_Preprocessor": Scribble_XDoG_Preprocessor,
68
+ "Scribble_PiDiNet_Preprocessor": Scribble_PiDiNet_Preprocessor
69
+ }
70
+ NODE_DISPLAY_NAME_MAPPINGS = {
71
+ "ScribblePreprocessor": "Scribble Lines",
72
+ "Scribble_XDoG_Preprocessor": "Scribble XDoG Lines",
73
+ "Scribble_PiDiNet_Preprocessor": "Scribble PiDiNet Lines"
74
+ }
node_wrappers/segment_anything.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class SAM_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/others"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.sam import SamDetector
16
+
17
+ mobile_sam = SamDetector.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(mobile_sam, image, resolution=resolution)
19
+ del mobile_sam
20
+ return (out, )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "SAMPreprocessor": SAM_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "SAMPreprocessor": "SAM Segmentor"
27
+ }
node_wrappers/shuffle.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT, MAX_RESOLUTION
2
+ import comfy.model_management as model_management
3
+
4
+ class Shuffle_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ resolution=INPUT.RESOLUTION(),
9
+ seed=INPUT.SEED()
10
+ )
11
+ RETURN_TYPES = ("IMAGE",)
12
+ FUNCTION = "preprocess"
13
+
14
+ CATEGORY = "ControlNet Preprocessors/T2IAdapter-only"
15
+
16
+ def preprocess(self, image, resolution=512, seed=0):
17
+ from custom_controlnet_aux.shuffle import ContentShuffleDetector
18
+
19
+ return (common_annotator_call(ContentShuffleDetector(), image, resolution=resolution, seed=seed), )
20
+
21
+ NODE_CLASS_MAPPINGS = {
22
+ "ShufflePreprocessor": Shuffle_Preprocessor
23
+ }
24
+
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "ShufflePreprocessor": "Content Shuffle"
27
+ }
node_wrappers/teed.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class TEED_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ safe_steps=INPUT.INT(default=2, max=10),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+ RETURN_TYPES = ("IMAGE",)
13
+ FUNCTION = "execute"
14
+
15
+ CATEGORY = "ControlNet Preprocessors/Line Extractors"
16
+
17
+ def execute(self, image, safe_steps=2, resolution=512, **kwargs):
18
+ from custom_controlnet_aux.teed import TEDDetector
19
+
20
+ model = TEDDetector.from_pretrained().to(model_management.get_torch_device())
21
+ out = common_annotator_call(model, image, resolution=resolution, safe_steps=safe_steps)
22
+ del model
23
+ return (out, )
24
+
25
+ NODE_CLASS_MAPPINGS = {
26
+ "TEEDPreprocessor": TEED_Preprocessor,
27
+ }
28
+ NODE_DISPLAY_NAME_MAPPINGS = {
29
+ "TEED_Preprocessor": "TEED Soft-Edge Lines",
30
+ }
node_wrappers/tile.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+
3
+
4
+ class Tile_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(
8
+ pyrUp_iters=INPUT.INT(default=3, min=1, max=10),
9
+ resolution=INPUT.RESOLUTION()
10
+ )
11
+
12
+
13
+ RETURN_TYPES = ("IMAGE",)
14
+ FUNCTION = "execute"
15
+
16
+ CATEGORY = "ControlNet Preprocessors/tile"
17
+
18
+ def execute(self, image, pyrUp_iters, resolution=512, **kwargs):
19
+ from custom_controlnet_aux.tile import TileDetector
20
+
21
+ return (common_annotator_call(TileDetector(), image, pyrUp_iters=pyrUp_iters, resolution=resolution),)
22
+
23
+ class TTPlanet_TileGF_Preprocessor:
24
+ @classmethod
25
+ def INPUT_TYPES(s):
26
+ return define_preprocessor_inputs(
27
+ scale_factor=INPUT.FLOAT(default=1.00, min=1.000, max=8.00),
28
+ blur_strength=INPUT.FLOAT(default=2.0, min=1.0, max=10.0),
29
+ radius=INPUT.INT(default=7, min=1, max=20),
30
+ eps=INPUT.FLOAT(default=0.01, min=0.001, max=0.1, step=0.001),
31
+ resolution=INPUT.RESOLUTION()
32
+ )
33
+
34
+ RETURN_TYPES = ("IMAGE",)
35
+ FUNCTION = "execute"
36
+
37
+ CATEGORY = "ControlNet Preprocessors/tile"
38
+
39
+ def execute(self, image, scale_factor, blur_strength, radius, eps, **kwargs):
40
+ from custom_controlnet_aux.tile import TTPlanet_Tile_Detector_GF
41
+
42
+ return (common_annotator_call(TTPlanet_Tile_Detector_GF(), image, scale_factor=scale_factor, blur_strength=blur_strength, radius=radius, eps=eps),)
43
+
44
+ class TTPlanet_TileSimple_Preprocessor:
45
+ @classmethod
46
+ def INPUT_TYPES(s):
47
+ return define_preprocessor_inputs(
48
+ scale_factor=INPUT.FLOAT(default=1.00, min=1.000, max=8.00),
49
+ blur_strength=INPUT.FLOAT(default=2.0, min=1.0, max=10.0),
50
+ )
51
+
52
+ RETURN_TYPES = ("IMAGE",)
53
+ FUNCTION = "execute"
54
+
55
+ CATEGORY = "ControlNet Preprocessors/tile"
56
+
57
+ def execute(self, image, scale_factor, blur_strength):
58
+ from custom_controlnet_aux.tile import TTPLanet_Tile_Detector_Simple
59
+
60
+ return (common_annotator_call(TTPLanet_Tile_Detector_Simple(), image, scale_factor=scale_factor, blur_strength=blur_strength),)
61
+
62
+
63
+ NODE_CLASS_MAPPINGS = {
64
+ "TilePreprocessor": Tile_Preprocessor,
65
+ "TTPlanet_TileGF_Preprocessor": TTPlanet_TileGF_Preprocessor,
66
+ "TTPlanet_TileSimple_Preprocessor": TTPlanet_TileSimple_Preprocessor
67
+ }
68
+
69
+ NODE_DISPLAY_NAME_MAPPINGS = {
70
+ "TilePreprocessor": "Tile",
71
+ "TTPlanet_TileGF_Preprocessor": "TTPlanet Tile GuidedFilter",
72
+ "TTPlanet_TileSimple_Preprocessor": "TTPlanet Tile Simple"
73
+ }
node_wrappers/uniformer.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class Uniformer_SemSegPreprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "semantic_segmentate"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Semantic Segmentation"
13
+
14
+ def semantic_segmentate(self, image, resolution=512):
15
+ from custom_controlnet_aux.uniformer import UniformerSegmentor
16
+
17
+ model = UniformerSegmentor.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(model, image, resolution=resolution)
19
+ del model
20
+ return (out, )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "UniFormer-SemSegPreprocessor": Uniformer_SemSegPreprocessor,
24
+ "SemSegPreprocessor": Uniformer_SemSegPreprocessor,
25
+ }
26
+ NODE_DISPLAY_NAME_MAPPINGS = {
27
+ "UniFormer-SemSegPreprocessor": "UniFormer Segmentor",
28
+ "SemSegPreprocessor": "Semantic Segmentor (legacy, alias for UniFormer)",
29
+ }
node_wrappers/unimatch.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call
2
+ import comfy.model_management as model_management
3
+ import torch
4
+ import numpy as np
5
+ from einops import rearrange
6
+ import torch.nn.functional as F
7
+
8
+ class Unimatch_OptFlowPreprocessor:
9
+ @classmethod
10
+ def INPUT_TYPES(s):
11
+ return {
12
+ "required": dict(
13
+ image=("IMAGE",),
14
+ ckpt_name=(
15
+ ["gmflow-scale1-mixdata.pth", "gmflow-scale2-mixdata.pth", "gmflow-scale2-regrefine6-mixdata.pth"],
16
+ {"default": "gmflow-scale2-regrefine6-mixdata.pth"}
17
+ ),
18
+ backward_flow=("BOOLEAN", {"default": False}),
19
+ bidirectional_flow=("BOOLEAN", {"default": False})
20
+ )
21
+ }
22
+
23
+ RETURN_TYPES = ("OPTICAL_FLOW", "IMAGE")
24
+ RETURN_NAMES = ("OPTICAL_FLOW", "PREVIEW_IMAGE")
25
+ FUNCTION = "estimate"
26
+
27
+ CATEGORY = "ControlNet Preprocessors/Optical Flow"
28
+
29
+ def estimate(self, image, ckpt_name, backward_flow=False, bidirectional_flow=False):
30
+ assert len(image) > 1, "[Unimatch] Requiring as least two frames as an optical flow estimator. Only use this node on video input."
31
+ from custom_controlnet_aux.unimatch import UnimatchDetector
32
+ tensor_images = image
33
+ model = UnimatchDetector.from_pretrained(filename=ckpt_name).to(model_management.get_torch_device())
34
+ flows, vis_flows = [], []
35
+ for i in range(len(tensor_images) - 1):
36
+ image0, image1 = np.asarray(image[i:i+2].cpu() * 255., dtype=np.uint8)
37
+ flow, vis_flow = model(image0, image1, output_type="np", pred_bwd_flow=backward_flow, pred_bidir_flow=bidirectional_flow)
38
+ flows.append(torch.from_numpy(flow).float())
39
+ vis_flows.append(torch.from_numpy(vis_flow).float() / 255.)
40
+ del model
41
+ return (torch.stack(flows, dim=0), torch.stack(vis_flows, dim=0))
42
+
43
+ class MaskOptFlow:
44
+ @classmethod
45
+ def INPUT_TYPES(s):
46
+ return {
47
+ "required": dict(optical_flow=("OPTICAL_FLOW",), mask=("MASK",))
48
+ }
49
+
50
+ RETURN_TYPES = ("OPTICAL_FLOW", "IMAGE")
51
+ RETURN_NAMES = ("OPTICAL_FLOW", "PREVIEW_IMAGE")
52
+ FUNCTION = "mask_opt_flow"
53
+
54
+ CATEGORY = "ControlNet Preprocessors/Optical Flow"
55
+
56
+ def mask_opt_flow(self, optical_flow, mask):
57
+ from custom_controlnet_aux.unimatch import flow_to_image
58
+ assert len(mask) >= len(optical_flow), f"Not enough masks to mask optical flow: {len(mask)} vs {len(optical_flow)}"
59
+ mask = mask[:optical_flow.shape[0]]
60
+ mask = F.interpolate(mask, optical_flow.shape[1:3])
61
+ mask = rearrange(mask, "n 1 h w -> n h w 1")
62
+ vis_flows = torch.stack([torch.from_numpy(flow_to_image(flow)).float() / 255. for flow in optical_flow.numpy()], dim=0)
63
+ vis_flows *= mask
64
+ optical_flow *= mask
65
+ return (optical_flow, vis_flows)
66
+
67
+
68
+ NODE_CLASS_MAPPINGS = {
69
+ "Unimatch_OptFlowPreprocessor": Unimatch_OptFlowPreprocessor,
70
+ "MaskOptFlow": MaskOptFlow
71
+ }
72
+ NODE_DISPLAY_NAME_MAPPINGS = {
73
+ "Unimatch_OptFlowPreprocessor": "Unimatch Optical Flow",
74
+ "MaskOptFlow": "Mask Optical Flow (DragNUWA)"
75
+ }
node_wrappers/zoe.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ..utils import common_annotator_call, define_preprocessor_inputs, INPUT
2
+ import comfy.model_management as model_management
3
+
4
+ class Zoe_Depth_Map_Preprocessor:
5
+ @classmethod
6
+ def INPUT_TYPES(s):
7
+ return define_preprocessor_inputs(resolution=INPUT.RESOLUTION())
8
+
9
+ RETURN_TYPES = ("IMAGE",)
10
+ FUNCTION = "execute"
11
+
12
+ CATEGORY = "ControlNet Preprocessors/Normal and Depth Estimators"
13
+
14
+ def execute(self, image, resolution=512, **kwargs):
15
+ from custom_controlnet_aux.zoe import ZoeDetector
16
+
17
+ model = ZoeDetector.from_pretrained().to(model_management.get_torch_device())
18
+ out = common_annotator_call(model, image, resolution=resolution)
19
+ del model
20
+ return (out, )
21
+
22
+ NODE_CLASS_MAPPINGS = {
23
+ "Zoe-DepthMapPreprocessor": Zoe_Depth_Map_Preprocessor
24
+ }
25
+ NODE_DISPLAY_NAME_MAPPINGS = {
26
+ "Zoe-DepthMapPreprocessor": "Zoe Depth Map"
27
+ }
pyproject.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "comfyui_controlnet_aux"
3
+ description = "Plug-and-play ComfyUI node sets for making ControlNet hint images"
4
+ version = "1.0.4-alpha.4"
5
+ license = "LICENSE"
6
+ dependencies = ["torch", "importlib_metadata", "huggingface_hub", "scipy", "opencv-python>=4.7.0.72", "filelock", "numpy", "Pillow", "einops", "torchvision", "pyyaml", "scikit-image", "python-dateutil", "mediapipe", "svglib", "fvcore", "yapf", "omegaconf", "ftfy", "addict", "yacs", "trimesh[easy]", "albumentations", "scikit-learn"]
7
+
8
+ [project.urls]
9
+ Repository = "https://github.com/Fannovel16/comfyui_controlnet_aux"
10
+
11
+ [tool.comfy]
12
+ PublisherId = "fannovel16"
13
+ DisplayName = "comfyui_controlnet_aux"
14
+ Icon = ""
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ importlib_metadata
3
+ huggingface_hub
4
+ scipy
5
+ opencv-python>=4.7.0.72
6
+ filelock
7
+ numpy
8
+ Pillow
9
+ einops
10
+ torchvision
11
+ pyyaml
12
+ scikit-image
13
+ python-dateutil
14
+ mediapipe
15
+ svglib
16
+ fvcore
17
+ yapf
18
+ omegaconf
19
+ ftfy
20
+ addict
21
+ yacs
22
+ trimesh[easy]
23
+ albumentations
24
+ scikit-learn