wanghaofan commited on
Commit
60198b1
1 Parent(s): 18b78ec

Upload 20 files

Browse files
controlnet_aux/.gitignore ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Initially taken from Github's Python gitignore file
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # tests and logs
12
+ tests/fixtures/cached_*_text.txt
13
+ logs/
14
+ lightning_logs/
15
+ lang_code_data/
16
+ tests/outputs
17
+
18
+ # Distribution / packaging
19
+ .Python
20
+ build/
21
+ develop-eggs/
22
+ dist/
23
+ downloads/
24
+ eggs/
25
+ .eggs/
26
+ lib/
27
+ lib64/
28
+ parts/
29
+ sdist/
30
+ var/
31
+ wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ .hypothesis/
58
+ .pytest_cache/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+
69
+ # Flask stuff:
70
+ instance/
71
+ .webassets-cache
72
+
73
+ # Scrapy stuff:
74
+ .scrapy
75
+
76
+ # Sphinx documentation
77
+ docs/_build/
78
+
79
+ # PyBuilder
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ .python-version
91
+
92
+ # celery beat schedule file
93
+ celerybeat-schedule
94
+
95
+ # SageMath parsed files
96
+ *.sage.py
97
+
98
+ # Environments
99
+ .env
100
+ .venv
101
+ env/
102
+ venv/
103
+ ENV/
104
+ env.bak/
105
+ venv.bak/
106
+
107
+ # Spyder project settings
108
+ .spyderproject
109
+ .spyproject
110
+
111
+ # Rope project settings
112
+ .ropeproject
113
+
114
+ # mkdocs documentation
115
+ /site
116
+
117
+ # mypy
118
+ .mypy_cache/
119
+ .dmypy.json
120
+ dmypy.json
121
+
122
+ # Pyre type checker
123
+ .pyre/
124
+
125
+ # vscode
126
+ .vs
127
+ .vscode
128
+
129
+ # Pycharm
130
+ .idea
131
+
132
+ # TF code
133
+ tensorflow_code
134
+
135
+ # Models
136
+ proc_data
137
+
138
+ # examples
139
+ runs
140
+ /runs_old
141
+ /wandb
142
+ /examples/runs
143
+ /examples/**/*.args
144
+ /examples/rag/sweep
145
+
146
+ # data
147
+ /data
148
+ serialization_dir
149
+
150
+ # emacs
151
+ *.*~
152
+ debug.env
153
+
154
+ # vim
155
+ .*.swp
156
+
157
+ #ctags
158
+ tags
159
+
160
+ # pre-commit
161
+ .pre-commit*
162
+
163
+ # .lock
164
+ *.lock
165
+
166
+ # DS_Store (MacOS)
167
+ .DS_Store
168
+ # RL pipelines may produce mp4 outputs
169
+ *.mp4
170
+
171
+ # dependencies
172
+ /transformers
173
+
174
+ # ruff
175
+ .ruff_cache
176
+
177
+ wandb
178
+
controlnet_aux/LICENSE.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
controlnet_aux/README.md ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ControlNet auxiliary models
2
+
3
+ This is a PyPi installable package of [lllyasviel's ControlNet Annotators](https://github.com/lllyasviel/ControlNet/tree/main/annotator)
4
+
5
+ The code is copy-pasted from the respective folders in <https://github.com/lllyasviel/ControlNet/tree/main/annotator> and connected to [the 🤗 Hub](https://huggingface.co/lllyasviel/Annotators).
6
+
7
+ All credit & copyright goes to <https://github.com/lllyasviel> .
8
+
9
+ ## Install
10
+
11
+ ```
12
+ pip install -U controlnet-aux
13
+ ```
14
+
15
+ To support DWPose which is dependent on MMDetection, MMCV and MMPose
16
+
17
+ ```
18
+ pip install -U openmim
19
+ mim install mmengine
20
+ mim install "mmcv>=2.0.1"
21
+ mim install "mmdet>=3.1.0"
22
+ mim install "mmpose>=1.1.0"
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ You can use the processor class, which can load each of the auxiliary models with the following code
28
+
29
+ ```python
30
+ import requests
31
+ from PIL import Image
32
+ from io import BytesIO
33
+
34
+ from controlnet_aux.processor import Processor
35
+
36
+ # load image
37
+ url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
38
+
39
+ response = requests.get(url)
40
+ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
41
+
42
+ # load processor from processor_id
43
+ # options are:
44
+ # ["canny", "depth_leres", "depth_leres++", "depth_midas", "depth_zoe", "lineart_anime",
45
+ # "lineart_coarse", "lineart_realistic", "mediapipe_face", "mlsd", "normal_bae", "normal_midas",
46
+ # "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand",
47
+ # "scribble_hed, "scribble_pidinet", "shuffle", "softedge_hed", "softedge_hedsafe",
48
+ # "softedge_pidinet", "softedge_pidsafe", "dwpose"]
49
+ processor_id = 'scribble_hed'
50
+ processor = Processor(processor_id)
51
+
52
+ processed_image = processor(img, to_pil=True)
53
+ ```
54
+
55
+ Each model can be loaded individually by importing and instantiating them as follows
56
+
57
+ ```python
58
+ from PIL import Image
59
+ import requests
60
+ from io import BytesIO
61
+ from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
62
+
63
+ # load image
64
+ url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
65
+
66
+ response = requests.get(url)
67
+ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
68
+
69
+ # load checkpoints
70
+ hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
71
+ midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
72
+ mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
73
+ open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
74
+ pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
75
+ normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
76
+ lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
77
+ lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
78
+ zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
79
+ sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
80
+ mobile_sam = SamDetector.from_pretrained("dhkim2810/MobileSAM", model_type="vit_t", filename="mobile_sam.pt")
81
+ leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
82
+ teed = TEEDdetector.from_pretrained("fal-ai/teed", filename="5_model.pth")
83
+ anyline = AnylineDetector.from_pretrained(
84
+ "TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
85
+ )
86
+
87
+ # specify configs, ckpts and device, or it will be downloaded automatically and use cpu by default
88
+ # det_config: ./src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
89
+ # det_ckpt: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
90
+ # pose_config: ./src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
91
+ # pose_ckpt: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth
92
+ import torch
93
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
94
+ dwpose = DWposeDetector(det_config=det_config, det_ckpt=det_ckpt, pose_config=pose_config, pose_ckpt=pose_ckpt, device=device)
95
+
96
+ # instantiate
97
+ canny = CannyDetector()
98
+ content = ContentShuffleDetector()
99
+ face_detector = MediapipeFaceDetector()
100
+ lineart_standard = LineartStandardDetector()
101
+
102
+
103
+ # process
104
+ processed_image_hed = hed(img)
105
+ processed_image_midas = midas(img)
106
+ processed_image_mlsd = mlsd(img)
107
+ processed_image_open_pose = open_pose(img, hand_and_face=True)
108
+ processed_image_pidi = pidi(img, safe=True)
109
+ processed_image_normal_bae = normal_bae(img)
110
+ processed_image_lineart = lineart(img, coarse=True)
111
+ processed_image_lineart_anime = lineart_anime(img)
112
+ processed_image_zoe = zoe(img)
113
+ processed_image_sam = sam(img)
114
+ processed_image_leres = leres(img)
115
+ processed_image_teed = teed(img, detect_resolution=1024)
116
+ processed_image_anyline = anyline(img, detect_resolution=1280)
117
+
118
+ processed_image_canny = canny(img)
119
+ processed_image_content = content(img)
120
+ processed_image_mediapipe_face = face_detector(img)
121
+ processed_image_dwpose = dwpose(img)
122
+ processed_image_lineart_standard = lineart_standard(img, detect_resolution=1024)
123
+ ```
124
+
125
+ ### Image resolution
126
+
127
+ In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
controlnet_aux/setup.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """
16
+ Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py
17
+
18
+ To create the package for pypi.
19
+
20
+ 1. Run `make pre-release` (or `make pre-patch` for a patch release) then run `make fix-copies` to fix the index of the
21
+ documentation.
22
+
23
+ If releasing on a special branch, copy the updated README.md on the main branch for your the commit you will make
24
+ for the post-release and run `make fix-copies` on the main branch as well.
25
+
26
+ 2. Run Tests for Amazon Sagemaker. The documentation is located in `./tests/sagemaker/README.md`, otherwise @philschmid.
27
+
28
+ 3. Unpin specific versions from setup.py that use a git install.
29
+
30
+ 4. Checkout the release branch (v<RELEASE>-release, for example v4.19-release), and commit these changes with the
31
+ message: "Release: <RELEASE>" and push.
32
+
33
+ 5. Wait for the tests on main to be completed and be green (otherwise revert and fix bugs)
34
+
35
+ 6. Add a tag in git to mark the release: "git tag v<RELEASE> -m 'Adds tag v<RELEASE> for pypi' "
36
+ Push the tag to git: git push --tags origin v<RELEASE>-release
37
+
38
+ 7. Build both the sources and the wheel. Do not change anything in setup.py between
39
+ creating the wheel and the source distribution (obviously).
40
+
41
+ For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
42
+ (this will build a wheel for the python version you use to build it).
43
+
44
+ For the sources, run: "python setup.py sdist"
45
+ You should now have a /dist directory with both .whl and .tar.gz source versions.
46
+
47
+ 8. Check that everything looks correct by uploading the package to the pypi test server:
48
+
49
+ twine upload dist/* -r pypitest
50
+ (pypi suggest using twine as other methods upload files via plaintext.)
51
+ You may have to specify the repository url, use the following command then:
52
+ twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
53
+
54
+ Check that you can install it in a virtualenv by running:
55
+ pip install -i https://testpypi.python.org/pypi diffusers
56
+
57
+ Check you can run the following commands:
58
+ python -c "from diffusers import pipeline; classifier = pipeline('text-classification'); print(classifier('What a nice release'))"
59
+ python -c "from diffusers import *"
60
+
61
+ 9. Upload the final version to actual pypi:
62
+ twine upload dist/* -r pypi
63
+
64
+ 10. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
65
+
66
+ 11. Run `make post-release` (or, for a patch release, `make post-patch`). If you were on a branch for the release,
67
+ you need to go back to main before executing this.
68
+ """
69
+
70
+ import os
71
+ import re
72
+ from distutils.core import Command
73
+
74
+ from setuptools import find_packages, setup
75
+
76
+ # IMPORTANT:
77
+ # 1. all dependencies should be listed here with their version requirements if any
78
+ # 2. once modified, run: `make deps_table_update` to update src/diffusers/dependency_versions_table.py
79
+ _deps = [
80
+ "Pillow",
81
+ "torch",
82
+ "numpy",
83
+ "filelock",
84
+ "importlib_metadata",
85
+ "opencv-python-headless",
86
+ "scipy",
87
+ "huggingface_hub",
88
+ "einops",
89
+ "timm<=0.6.7",
90
+ "torchvision",
91
+ "scikit-image",
92
+ ]
93
+
94
+ # this is a lookup table with items like:
95
+ #
96
+ # tokenizers: "huggingface-hub==0.8.0"
97
+ # packaging: "packaging"
98
+ #
99
+ # some of the values are versioned whereas others aren't.
100
+ deps = {
101
+ b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)
102
+ }
103
+
104
+ # since we save this data in src/diffusers/dependency_versions_table.py it can be easily accessed from
105
+ # anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
106
+ #
107
+ # python -c 'import sys; from diffusers.dependency_versions_table import deps; \
108
+ # print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets
109
+ #
110
+ # Just pass the desired package names to that script as it's shown with 2 packages above.
111
+ #
112
+ # If diffusers is not yet installed and the work is done from the cloned repo remember to add `PYTHONPATH=src` to the script above
113
+ #
114
+ # You can then feed this for example to `pip`:
115
+ #
116
+ # pip install -U $(python -c 'import sys; from diffusers.dependency_versions_table import deps; \
117
+ # print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets)
118
+ #
119
+
120
+
121
+ def deps_list(*pkgs):
122
+ return [deps[pkg] for pkg in pkgs]
123
+
124
+
125
+ class DepsTableUpdateCommand(Command):
126
+ """
127
+ A custom distutils command that updates the dependency table.
128
+ usage: python setup.py deps_table_update
129
+ """
130
+
131
+ description = "build runtime dependency table"
132
+ user_options = [
133
+ # format: (long option, short option, description).
134
+ (
135
+ "dep-table-update",
136
+ None,
137
+ "updates src/diffusers/dependency_versions_table.py",
138
+ ),
139
+ ]
140
+
141
+ def initialize_options(self):
142
+ pass
143
+
144
+ def finalize_options(self):
145
+ pass
146
+
147
+ def run(self):
148
+ entries = "\n".join([f' "{k}": "{v}",' for k, v in deps.items()])
149
+ content = [
150
+ "# THIS FILE HAS BEEN AUTOGENERATED. To update:",
151
+ "# 1. modify the `_deps` dict in setup.py",
152
+ "# 2. run `make deps_table_update``",
153
+ "deps = {",
154
+ entries,
155
+ "}",
156
+ "",
157
+ ]
158
+ target = "src/controlnet_aux/dependency_versions_table.py"
159
+ print(f"updating {target}")
160
+ with open(target, "w", encoding="utf-8", newline="\n") as f:
161
+ f.write("\n".join(content))
162
+
163
+
164
+ extras = {}
165
+
166
+ install_requires = [
167
+ deps["torch"],
168
+ deps["importlib_metadata"],
169
+ deps["huggingface_hub"],
170
+ deps["scipy"],
171
+ deps["opencv-python-headless"],
172
+ deps["filelock"],
173
+ deps["numpy"],
174
+ deps["Pillow"],
175
+ deps["einops"],
176
+ deps["torchvision"],
177
+ deps["timm"],
178
+ deps["scikit-image"],
179
+ ]
180
+
181
+ setup(
182
+ name="controlnet_aux",
183
+ version="0.0.9", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
184
+ description="Auxillary models for controlnet",
185
+ long_description=open("README.md", "r", encoding="utf-8").read(),
186
+ long_description_content_type="text/markdown",
187
+ keywords="deep learning",
188
+ license="Apache",
189
+ author="The HuggingFace team",
190
+ author_email="[email protected]",
191
+ url="https://github.com/patrickvonplaten/controlnet_aux",
192
+ package_dir={"": "src"},
193
+ packages=find_packages("src"),
194
+ include_package_data=True,
195
+ python_requires=">=3.7.0",
196
+ install_requires=install_requires,
197
+ extras_require=extras,
198
+ classifiers=[
199
+ "Development Status :: 5 - Production/Stable",
200
+ "Intended Audience :: Developers",
201
+ "Intended Audience :: Education",
202
+ "Intended Audience :: Science/Research",
203
+ "License :: OSI Approved :: Apache Software License",
204
+ "Operating System :: OS Independent",
205
+ "Programming Language :: Python :: 3",
206
+ "Programming Language :: Python :: 3.7",
207
+ "Programming Language :: Python :: 3.8",
208
+ "Programming Language :: Python :: 3.9",
209
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
210
+ ],
211
+ cmdclass={"deps_table_update": DepsTableUpdateCommand},
212
+ package_data={'controlnet_aux' : ['zoe/zoedepth/models/zoedepth/*.json', 'zoe/zoedepth/models/zoedepth_nk/*.json']}
213
+ )
214
+
215
+ # Release checklist
216
+ # 1. Change the version in __init__.py and setup.py.
217
+ # 2. Commit these changes with the message: "Release: Release"
218
+ # 3. Add a tag in git to mark the release: "git tag RELEASE -m 'Adds tag RELEASE for pypi' "
219
+ # Push the tag to git: git push --tags origin main
220
+ # 4. Run the following commands in the top-level directory:
221
+ # python setup.py bdist_wheel
222
+ # python setup.py sdist
223
+ # 5. Upload the package to the pypi test server first:
224
+ # twine upload dist/* -r pypitest
225
+ # twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
226
+ # 6. Check that you can install it in a virtualenv by running:
227
+ # pip install -i https://testpypi.python.org/pypi diffusers
228
+ # diffusers env
229
+ # diffusers test
230
+ # 7. Upload the final version to actual pypi:
231
+ # twine upload dist/* -r pypi
232
+ # 8. Add release notes to the tag in github once everything is looking hunky-dory.
233
+ # 9. Update the version in __init__.py, setup.py to the new version "-dev" and push to master
controlnet_aux/src/controlnet_aux.egg-info/PKG-INFO ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: controlnet_aux
3
+ Version: 0.0.9
4
+ Summary: Auxillary models for controlnet
5
+ Home-page: https://github.com/patrickvonplaten/controlnet_aux
6
+ Author: The HuggingFace team
7
+ Author-email: [email protected]
8
+ License: Apache
9
+ Keywords: deep learning
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Education
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.7
18
+ Classifier: Programming Language :: Python :: 3.8
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.7.0
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE.txt
24
+ Requires-Dist: torch
25
+ Requires-Dist: importlib_metadata
26
+ Requires-Dist: huggingface_hub
27
+ Requires-Dist: scipy
28
+ Requires-Dist: opencv-python-headless
29
+ Requires-Dist: filelock
30
+ Requires-Dist: numpy
31
+ Requires-Dist: Pillow
32
+ Requires-Dist: einops
33
+ Requires-Dist: torchvision
34
+ Requires-Dist: timm<=0.6.7
35
+ Requires-Dist: scikit-image
36
+
37
+ # ControlNet auxiliary models
38
+
39
+ This is a PyPi installable package of [lllyasviel's ControlNet Annotators](https://github.com/lllyasviel/ControlNet/tree/main/annotator)
40
+
41
+ The code is copy-pasted from the respective folders in <https://github.com/lllyasviel/ControlNet/tree/main/annotator> and connected to [the 🤗 Hub](https://huggingface.co/lllyasviel/Annotators).
42
+
43
+ All credit & copyright goes to <https://github.com/lllyasviel> .
44
+
45
+ ## Install
46
+
47
+ ```
48
+ pip install -U controlnet-aux
49
+ ```
50
+
51
+ To support DWPose which is dependent on MMDetection, MMCV and MMPose
52
+
53
+ ```
54
+ pip install -U openmim
55
+ mim install mmengine
56
+ mim install "mmcv>=2.0.1"
57
+ mim install "mmdet>=3.1.0"
58
+ mim install "mmpose>=1.1.0"
59
+ ```
60
+
61
+ ## Usage
62
+
63
+ You can use the processor class, which can load each of the auxiliary models with the following code
64
+
65
+ ```python
66
+ import requests
67
+ from PIL import Image
68
+ from io import BytesIO
69
+
70
+ from controlnet_aux.processor import Processor
71
+
72
+ # load image
73
+ url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
74
+
75
+ response = requests.get(url)
76
+ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
77
+
78
+ # load processor from processor_id
79
+ # options are:
80
+ # ["canny", "depth_leres", "depth_leres++", "depth_midas", "depth_zoe", "lineart_anime",
81
+ # "lineart_coarse", "lineart_realistic", "mediapipe_face", "mlsd", "normal_bae", "normal_midas",
82
+ # "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand",
83
+ # "scribble_hed, "scribble_pidinet", "shuffle", "softedge_hed", "softedge_hedsafe",
84
+ # "softedge_pidinet", "softedge_pidsafe", "dwpose"]
85
+ processor_id = 'scribble_hed'
86
+ processor = Processor(processor_id)
87
+
88
+ processed_image = processor(img, to_pil=True)
89
+ ```
90
+
91
+ Each model can be loaded individually by importing and instantiating them as follows
92
+
93
+ ```python
94
+ from PIL import Image
95
+ import requests
96
+ from io import BytesIO
97
+ from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
98
+
99
+ # load image
100
+ url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
101
+
102
+ response = requests.get(url)
103
+ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
104
+
105
+ # load checkpoints
106
+ hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
107
+ midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
108
+ mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
109
+ open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
110
+ pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
111
+ normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
112
+ lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
113
+ lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
114
+ zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
115
+ sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
116
+ mobile_sam = SamDetector.from_pretrained("dhkim2810/MobileSAM", model_type="vit_t", filename="mobile_sam.pt")
117
+ leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
118
+ teed = TEEDdetector.from_pretrained("fal-ai/teed", filename="5_model.pth")
119
+ anyline = AnylineDetector.from_pretrained(
120
+ "TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
121
+ )
122
+
123
+ # specify configs, ckpts and device, or it will be downloaded automatically and use cpu by default
124
+ # det_config: ./src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
125
+ # det_ckpt: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
126
+ # pose_config: ./src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
127
+ # pose_ckpt: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth
128
+ import torch
129
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
130
+ dwpose = DWposeDetector(det_config=det_config, det_ckpt=det_ckpt, pose_config=pose_config, pose_ckpt=pose_ckpt, device=device)
131
+
132
+ # instantiate
133
+ canny = CannyDetector()
134
+ content = ContentShuffleDetector()
135
+ face_detector = MediapipeFaceDetector()
136
+ lineart_standard = LineartStandardDetector()
137
+
138
+
139
+ # process
140
+ processed_image_hed = hed(img)
141
+ processed_image_midas = midas(img)
142
+ processed_image_mlsd = mlsd(img)
143
+ processed_image_open_pose = open_pose(img, hand_and_face=True)
144
+ processed_image_pidi = pidi(img, safe=True)
145
+ processed_image_normal_bae = normal_bae(img)
146
+ processed_image_lineart = lineart(img, coarse=True)
147
+ processed_image_lineart_anime = lineart_anime(img)
148
+ processed_image_zoe = zoe(img)
149
+ processed_image_sam = sam(img)
150
+ processed_image_leres = leres(img)
151
+ processed_image_teed = teed(img, detect_resolution=1024)
152
+ processed_image_anyline = anyline(img, detect_resolution=1280)
153
+
154
+ processed_image_canny = canny(img)
155
+ processed_image_content = content(img)
156
+ processed_image_mediapipe_face = face_detector(img)
157
+ processed_image_dwpose = dwpose(img)
158
+ processed_image_lineart_standard = lineart_standard(img, detect_resolution=1024)
159
+ ```
160
+
161
+ ### Image resolution
162
+
163
+ In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
controlnet_aux/src/controlnet_aux.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE.txt
2
+ README.md
3
+ setup.py
4
+ src/controlnet_aux/__init__.py
5
+ src/controlnet_aux/processor.py
6
+ src/controlnet_aux/util.py
7
+ src/controlnet_aux.egg-info/PKG-INFO
8
+ src/controlnet_aux.egg-info/SOURCES.txt
9
+ src/controlnet_aux.egg-info/dependency_links.txt
10
+ src/controlnet_aux.egg-info/requires.txt
11
+ src/controlnet_aux.egg-info/top_level.txt
12
+ src/controlnet_aux/anyline/__init__.py
13
+ src/controlnet_aux/canny/__init__.py
14
+ src/controlnet_aux/dwpose/__init__.py
15
+ src/controlnet_aux/dwpose/util.py
16
+ src/controlnet_aux/dwpose/wholebody.py
17
+ src/controlnet_aux/dwpose/dwpose_config/__init__.py
18
+ src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
19
+ src/controlnet_aux/dwpose/yolox_config/__init__.py
20
+ src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
21
+ src/controlnet_aux/hed/__init__.py
22
+ src/controlnet_aux/leres/__init__.py
23
+ src/controlnet_aux/leres/leres/Resnet.py
24
+ src/controlnet_aux/leres/leres/Resnext_torch.py
25
+ src/controlnet_aux/leres/leres/__init__.py
26
+ src/controlnet_aux/leres/leres/depthmap.py
27
+ src/controlnet_aux/leres/leres/multi_depth_model_woauxi.py
28
+ src/controlnet_aux/leres/leres/net_tools.py
29
+ src/controlnet_aux/leres/leres/network_auxi.py
30
+ src/controlnet_aux/leres/pix2pix/__init__.py
31
+ src/controlnet_aux/leres/pix2pix/models/__init__.py
32
+ src/controlnet_aux/leres/pix2pix/models/base_model.py
33
+ src/controlnet_aux/leres/pix2pix/models/base_model_hg.py
34
+ src/controlnet_aux/leres/pix2pix/models/networks.py
35
+ src/controlnet_aux/leres/pix2pix/models/pix2pix4depth_model.py
36
+ src/controlnet_aux/leres/pix2pix/options/__init__.py
37
+ src/controlnet_aux/leres/pix2pix/options/base_options.py
38
+ src/controlnet_aux/leres/pix2pix/options/test_options.py
39
+ src/controlnet_aux/leres/pix2pix/util/__init__.py
40
+ src/controlnet_aux/leres/pix2pix/util/util.py
41
+ src/controlnet_aux/lineart/__init__.py
42
+ src/controlnet_aux/lineart_anime/__init__.py
43
+ src/controlnet_aux/lineart_standard/__init__.py
44
+ src/controlnet_aux/mediapipe_face/__init__.py
45
+ src/controlnet_aux/mediapipe_face/mediapipe_face_common.py
46
+ src/controlnet_aux/midas/__init__.py
47
+ src/controlnet_aux/midas/api.py
48
+ src/controlnet_aux/midas/utils.py
49
+ src/controlnet_aux/midas/midas/__init__.py
50
+ src/controlnet_aux/midas/midas/base_model.py
51
+ src/controlnet_aux/midas/midas/blocks.py
52
+ src/controlnet_aux/midas/midas/dpt_depth.py
53
+ src/controlnet_aux/midas/midas/midas_net.py
54
+ src/controlnet_aux/midas/midas/midas_net_custom.py
55
+ src/controlnet_aux/midas/midas/transforms.py
56
+ src/controlnet_aux/midas/midas/vit.py
57
+ src/controlnet_aux/mlsd/__init__.py
58
+ src/controlnet_aux/mlsd/utils.py
59
+ src/controlnet_aux/mlsd/models/__init__.py
60
+ src/controlnet_aux/mlsd/models/mbv2_mlsd_large.py
61
+ src/controlnet_aux/mlsd/models/mbv2_mlsd_tiny.py
62
+ src/controlnet_aux/normalbae/__init__.py
63
+ src/controlnet_aux/normalbae/nets/NNET.py
64
+ src/controlnet_aux/normalbae/nets/__init__.py
65
+ src/controlnet_aux/normalbae/nets/baseline.py
66
+ src/controlnet_aux/normalbae/nets/submodules/__init__.py
67
+ src/controlnet_aux/normalbae/nets/submodules/decoder.py
68
+ src/controlnet_aux/normalbae/nets/submodules/encoder.py
69
+ src/controlnet_aux/normalbae/nets/submodules/submodules.py
70
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/__init__.py
71
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/caffe2_benchmark.py
72
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/caffe2_validate.py
73
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/hubconf.py
74
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_export.py
75
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_optimize.py
76
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_to_caffe.py
77
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_validate.py
78
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/setup.py
79
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/utils.py
80
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/validate.py
81
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/__init__.py
82
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/config.py
83
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/conv2d_layers.py
84
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/efficientnet_builder.py
85
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/gen_efficientnet.py
86
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/helpers.py
87
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/mobilenetv3.py
88
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/model_factory.py
89
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/version.py
90
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py
91
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py
92
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations_jit.py
93
+ src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations_me.py
94
+ src/controlnet_aux/open_pose/__init__.py
95
+ src/controlnet_aux/open_pose/body.py
96
+ src/controlnet_aux/open_pose/face.py
97
+ src/controlnet_aux/open_pose/hand.py
98
+ src/controlnet_aux/open_pose/model.py
99
+ src/controlnet_aux/open_pose/util.py
100
+ src/controlnet_aux/pidi/__init__.py
101
+ src/controlnet_aux/pidi/model.py
102
+ src/controlnet_aux/segment_anything/__init__.py
103
+ src/controlnet_aux/segment_anything/automatic_mask_generator.py
104
+ src/controlnet_aux/segment_anything/build_sam.py
105
+ src/controlnet_aux/segment_anything/predictor.py
106
+ src/controlnet_aux/segment_anything/modeling/__init__.py
107
+ src/controlnet_aux/segment_anything/modeling/common.py
108
+ src/controlnet_aux/segment_anything/modeling/image_encoder.py
109
+ src/controlnet_aux/segment_anything/modeling/mask_decoder.py
110
+ src/controlnet_aux/segment_anything/modeling/prompt_encoder.py
111
+ src/controlnet_aux/segment_anything/modeling/sam.py
112
+ src/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py
113
+ src/controlnet_aux/segment_anything/modeling/transformer.py
114
+ src/controlnet_aux/segment_anything/utils/__init__.py
115
+ src/controlnet_aux/segment_anything/utils/amg.py
116
+ src/controlnet_aux/segment_anything/utils/onnx.py
117
+ src/controlnet_aux/segment_anything/utils/transforms.py
118
+ src/controlnet_aux/shuffle/__init__.py
119
+ src/controlnet_aux/teed/Fsmish.py
120
+ src/controlnet_aux/teed/Xsmish.py
121
+ src/controlnet_aux/teed/__init__.py
122
+ src/controlnet_aux/teed/ted.py
123
+ src/controlnet_aux/zoe/__init__.py
124
+ src/controlnet_aux/zoe/zoedepth/__init__.py
125
+ src/controlnet_aux/zoe/zoedepth/models/__init__.py
126
+ src/controlnet_aux/zoe/zoedepth/models/builder.py
127
+ src/controlnet_aux/zoe/zoedepth/models/depth_model.py
128
+ src/controlnet_aux/zoe/zoedepth/models/model_io.py
129
+ src/controlnet_aux/zoe/zoedepth/models/base_models/__init__.py
130
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas.py
131
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/__init__.py
132
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/hubconf.py
133
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/__init__.py
134
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py
135
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py
136
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/dpt_depth.py
137
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py
138
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net_custom.py
139
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/model_loader.py
140
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py
141
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/__init__.py
142
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/beit.py
143
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py
144
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py
145
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py
146
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py
147
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py
148
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/utils.py
149
+ src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/vit.py
150
+ src/controlnet_aux/zoe/zoedepth/models/layers/__init__.py
151
+ src/controlnet_aux/zoe/zoedepth/models/layers/attractor.py
152
+ src/controlnet_aux/zoe/zoedepth/models/layers/dist_layers.py
153
+ src/controlnet_aux/zoe/zoedepth/models/layers/localbins_layers.py
154
+ src/controlnet_aux/zoe/zoedepth/models/layers/patch_transformer.py
155
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth/__init__.py
156
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth/config_zoedepth.json
157
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
158
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth/zoedepth_v1.py
159
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/__init__.py
160
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
161
+ src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py
162
+ src/controlnet_aux/zoe/zoedepth/utils/__init__.py
163
+ src/controlnet_aux/zoe/zoedepth/utils/arg_utils.py
164
+ src/controlnet_aux/zoe/zoedepth/utils/config.py
165
+ src/controlnet_aux/zoe/zoedepth/utils/easydict/__init__.py
166
+ tests/test_controlnet_aux.py
controlnet_aux/src/controlnet_aux.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
controlnet_aux/src/controlnet_aux.egg-info/requires.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ importlib_metadata
3
+ huggingface_hub
4
+ scipy
5
+ opencv-python-headless
6
+ filelock
7
+ numpy
8
+ Pillow
9
+ einops
10
+ torchvision
11
+ timm<=0.6.7
12
+ scikit-image
controlnet_aux/src/controlnet_aux.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ controlnet_aux
controlnet_aux/src/controlnet_aux/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __version__ = "0.0.9"
2
+
3
+ from .canny import CannyDetector
4
+ from .open_pose import OpenposeDetector
5
+
controlnet_aux/src/controlnet_aux/canny/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ import cv2
3
+ import numpy as np
4
+ from PIL import Image
5
+ from ..util import HWC3, resize_image
6
+
7
+ class CannyDetector:
8
+ def __call__(self, input_image=None, low_threshold=100, high_threshold=200, detect_resolution=512, image_resolution=512, output_type=None, **kwargs):
9
+ if "img" in kwargs:
10
+ warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
11
+ input_image = kwargs.pop("img")
12
+
13
+ if input_image is None:
14
+ raise ValueError("input_image must be defined.")
15
+
16
+ if not isinstance(input_image, np.ndarray):
17
+ input_image = np.array(input_image, dtype=np.uint8)
18
+ output_type = output_type or "pil"
19
+ else:
20
+ output_type = output_type or "np"
21
+
22
+ input_image = HWC3(input_image)
23
+ input_image = resize_image(input_image, detect_resolution)
24
+
25
+ detected_map = cv2.Canny(input_image, low_threshold, high_threshold)
26
+ detected_map = HWC3(detected_map)
27
+
28
+ img = resize_image(input_image, image_resolution)
29
+ H, W, C = img.shape
30
+
31
+ detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
32
+
33
+ if output_type == "pil":
34
+ detected_map = Image.fromarray(detected_map)
35
+
36
+ return detected_map
controlnet_aux/src/controlnet_aux/open_pose/LICENSE ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENPOSE: MULTIPERSON KEYPOINT DETECTION
2
+ SOFTWARE LICENSE AGREEMENT
3
+ ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
4
+
5
+ BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
6
+
7
+ This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
8
+
9
+ RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
10
+ Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
11
+ non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
12
+
13
+ CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
14
+
15
+ COPYRIGHT: The Software is owned by Licensor and is protected by United
16
+ States copyright laws and applicable international treaties and/or conventions.
17
+
18
+ PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
19
+
20
+ DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
21
+
22
+ BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
23
+
24
+ USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “OpenPose", "Carnegie Mellon" or any renditions thereof without the prior written permission of Licensor.
25
+
26
+ You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
27
+
28
+ ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
29
+
30
+ TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
31
+
32
+ The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
33
+
34
+ FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
35
+
36
+ DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
37
+
38
+ SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
39
+
40
+ EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
41
+
42
+ EXPORT REGULATION: Licensee agrees to comply with any and all applicable
43
+ U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
44
+
45
+ SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
46
+
47
+ NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
48
+
49
+ GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania.
50
+
51
+ ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
52
+
53
+
54
+
55
+ ************************************************************************
56
+
57
+ THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
58
+
59
+ This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
60
+
61
+ 1. Caffe, version 1.0.0, (https://github.com/BVLC/caffe/)
62
+
63
+ COPYRIGHT
64
+
65
+ All contributions by the University of California:
66
+ Copyright (c) 2014-2017 The Regents of the University of California (Regents)
67
+ All rights reserved.
68
+
69
+ All other contributions:
70
+ Copyright (c) 2014-2017, the respective contributors
71
+ All rights reserved.
72
+
73
+ Caffe uses a shared copyright model: each contributor holds copyright over
74
+ their contributions to Caffe. The project versioning records all such
75
+ contribution and copyright details. If a contributor wants to further mark
76
+ their specific copyright on a particular contribution, they should indicate
77
+ their copyright solely in the commit message of the change when it is
78
+ committed.
79
+
80
+ LICENSE
81
+
82
+ Redistribution and use in source and binary forms, with or without
83
+ modification, are permitted provided that the following conditions are met:
84
+
85
+ 1. Redistributions of source code must retain the above copyright notice, this
86
+ list of conditions and the following disclaimer.
87
+ 2. Redistributions in binary form must reproduce the above copyright notice,
88
+ this list of conditions and the following disclaimer in the documentation
89
+ and/or other materials provided with the distribution.
90
+
91
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
92
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
93
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
94
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
95
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
96
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
97
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
98
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
99
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
100
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
101
+
102
+ CONTRIBUTION AGREEMENT
103
+
104
+ By contributing to the BVLC/caffe repository through pull-request, comment,
105
+ or otherwise, the contributor releases their content to the
106
+ license and copyright terms herein.
107
+
108
+ ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
controlnet_aux/src/controlnet_aux/open_pose/__init__.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Openpose
2
+ # Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
3
+ # 2nd Edited by https://github.com/Hzzone/pytorch-openpose
4
+ # 3rd Edited by ControlNet
5
+ # 4th Edited by ControlNet (added face and correct hands)
6
+ # 5th Edited by ControlNet (Improved JSON serialization/deserialization, and lots of bug fixs)
7
+ # This preprocessor is licensed by CMU for non-commercial use only.
8
+
9
+
10
+ import os
11
+
12
+ os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
13
+
14
+ import json
15
+ import warnings
16
+ from typing import Callable, List, NamedTuple, Tuple, Union
17
+
18
+ import cv2
19
+ import numpy as np
20
+ import torch
21
+ from huggingface_hub import hf_hub_download
22
+ from PIL import Image
23
+
24
+ from ..util import HWC3, resize_image
25
+ from . import util
26
+ from .body import Body, BodyResult, Keypoint
27
+ from .face import Face
28
+ from .hand import Hand
29
+
30
+ HandResult = List[Keypoint]
31
+ FaceResult = List[Keypoint]
32
+
33
+ class PoseResult(NamedTuple):
34
+ body: BodyResult
35
+ left_hand: Union[HandResult, None]
36
+ right_hand: Union[HandResult, None]
37
+ face: Union[FaceResult, None]
38
+
39
+ def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True):
40
+ """
41
+ Draw the detected poses on an empty canvas.
42
+
43
+ Args:
44
+ poses (List[PoseResult]): A list of PoseResult objects containing the detected poses.
45
+ H (int): The height of the canvas.
46
+ W (int): The width of the canvas.
47
+ draw_body (bool, optional): Whether to draw body keypoints. Defaults to True.
48
+ draw_hand (bool, optional): Whether to draw hand keypoints. Defaults to True.
49
+ draw_face (bool, optional): Whether to draw face keypoints. Defaults to True.
50
+
51
+ Returns:
52
+ numpy.ndarray: A 3D numpy array representing the canvas with the drawn poses.
53
+ """
54
+ canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
55
+
56
+ for pose in poses:
57
+ if draw_body:
58
+ canvas = util.draw_bodypose(canvas, pose.body.keypoints)
59
+
60
+ if draw_hand:
61
+ canvas = util.draw_handpose(canvas, pose.left_hand)
62
+ canvas = util.draw_handpose(canvas, pose.right_hand)
63
+
64
+ if draw_face:
65
+ canvas = util.draw_facepose(canvas, pose.face)
66
+
67
+ return canvas
68
+
69
+
70
+ class OpenposeDetector:
71
+ """
72
+ A class for detecting human poses in images using the Openpose model.
73
+
74
+ Attributes:
75
+ model_dir (str): Path to the directory where the pose models are stored.
76
+ """
77
+ def __init__(self, body_estimation, hand_estimation=None, face_estimation=None):
78
+ self.body_estimation = body_estimation
79
+ self.hand_estimation = hand_estimation
80
+ self.face_estimation = face_estimation
81
+
82
+ @classmethod
83
+ def from_pretrained(cls, pretrained_model_or_path, filename=None, hand_filename=None, face_filename=None, cache_dir=None, local_files_only=False):
84
+
85
+ if pretrained_model_or_path == "lllyasviel/ControlNet":
86
+ filename = filename or "annotator/ckpts/body_pose_model.pth"
87
+ hand_filename = hand_filename or "annotator/ckpts/hand_pose_model.pth"
88
+ face_filename = face_filename or "facenet.pth"
89
+
90
+ face_pretrained_model_or_path = "lllyasviel/Annotators"
91
+ else:
92
+ filename = filename or "body_pose_model.pth"
93
+ hand_filename = hand_filename or "hand_pose_model.pth"
94
+ face_filename = face_filename or "facenet.pth"
95
+
96
+ face_pretrained_model_or_path = pretrained_model_or_path
97
+
98
+ if os.path.isdir(pretrained_model_or_path):
99
+ body_model_path = os.path.join(pretrained_model_or_path, filename)
100
+ hand_model_path = os.path.join(pretrained_model_or_path, hand_filename)
101
+ face_model_path = os.path.join(face_pretrained_model_or_path, face_filename)
102
+ else:
103
+ body_model_path = hf_hub_download(pretrained_model_or_path, filename, cache_dir=cache_dir, local_files_only=local_files_only)
104
+ hand_model_path = hf_hub_download(pretrained_model_or_path, hand_filename, cache_dir=cache_dir, local_files_only=local_files_only)
105
+ face_model_path = hf_hub_download(face_pretrained_model_or_path, face_filename, cache_dir=cache_dir, local_files_only=local_files_only)
106
+
107
+ body_estimation = Body(body_model_path)
108
+ hand_estimation = Hand(hand_model_path)
109
+ face_estimation = Face(face_model_path)
110
+
111
+ return cls(body_estimation, hand_estimation, face_estimation)
112
+
113
+ def to(self, device):
114
+ self.body_estimation.to(device)
115
+ self.hand_estimation.to(device)
116
+ self.face_estimation.to(device)
117
+ return self
118
+
119
+ def detect_hands(self, body: BodyResult, oriImg) -> Tuple[Union[HandResult, None], Union[HandResult, None]]:
120
+ left_hand = None
121
+ right_hand = None
122
+ H, W, _ = oriImg.shape
123
+ for x, y, w, is_left in util.handDetect(body, oriImg):
124
+ peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]).astype(np.float32)
125
+ if peaks.ndim == 2 and peaks.shape[1] == 2:
126
+ peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
127
+ peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
128
+
129
+ hand_result = [
130
+ Keypoint(x=peak[0], y=peak[1])
131
+ for peak in peaks
132
+ ]
133
+
134
+ if is_left:
135
+ left_hand = hand_result
136
+ else:
137
+ right_hand = hand_result
138
+
139
+ return left_hand, right_hand
140
+
141
+ def detect_face(self, body: BodyResult, oriImg) -> Union[FaceResult, None]:
142
+ face = util.faceDetect(body, oriImg)
143
+ if face is None:
144
+ return None
145
+
146
+ x, y, w = face
147
+ H, W, _ = oriImg.shape
148
+ heatmaps = self.face_estimation(oriImg[y:y+w, x:x+w, :])
149
+ peaks = self.face_estimation.compute_peaks_from_heatmaps(heatmaps).astype(np.float32)
150
+ if peaks.ndim == 2 and peaks.shape[1] == 2:
151
+ peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
152
+ peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
153
+ return [
154
+ Keypoint(x=peak[0], y=peak[1])
155
+ for peak in peaks
156
+ ]
157
+
158
+ return None
159
+
160
+ def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[PoseResult]:
161
+ """
162
+ Detect poses in the given image.
163
+ Args:
164
+ oriImg (numpy.ndarray): The input image for pose detection.
165
+ include_hand (bool, optional): Whether to include hand detection. Defaults to False.
166
+ include_face (bool, optional): Whether to include face detection. Defaults to False.
167
+
168
+ Returns:
169
+ List[PoseResult]: A list of PoseResult objects containing the detected poses.
170
+ """
171
+ oriImg = oriImg[:, :, ::-1].copy()
172
+ H, W, C = oriImg.shape
173
+ with torch.no_grad():
174
+ candidate, subset = self.body_estimation(oriImg)
175
+ bodies = self.body_estimation.format_body_result(candidate, subset)
176
+
177
+ results = []
178
+ for body in bodies:
179
+ left_hand, right_hand, face = (None,) * 3
180
+ if include_hand:
181
+ left_hand, right_hand = self.detect_hands(body, oriImg)
182
+ if include_face:
183
+ face = self.detect_face(body, oriImg)
184
+
185
+ results.append(PoseResult(BodyResult(
186
+ keypoints=[
187
+ Keypoint(
188
+ x=keypoint.x / float(W),
189
+ y=keypoint.y / float(H)
190
+ ) if keypoint is not None else None
191
+ for keypoint in body.keypoints
192
+ ],
193
+ total_score=body.total_score,
194
+ total_parts=body.total_parts
195
+ ), left_hand, right_hand, face))
196
+
197
+ return results
198
+
199
+ def __call__(self, input_image, detect_resolution=512, image_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", **kwargs):
200
+ if hand_and_face is not None:
201
+ warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning)
202
+ include_hand = hand_and_face
203
+ include_face = hand_and_face
204
+
205
+ if "return_pil" in kwargs:
206
+ warnings.warn("return_pil is deprecated. Use output_type instead.", DeprecationWarning)
207
+ output_type = "pil" if kwargs["return_pil"] else "np"
208
+ if type(output_type) is bool:
209
+ warnings.warn("Passing `True` or `False` to `output_type` is deprecated and will raise an error in future versions")
210
+ if output_type:
211
+ output_type = "pil"
212
+
213
+ if not isinstance(input_image, np.ndarray):
214
+ input_image = np.array(input_image, dtype=np.uint8)
215
+
216
+ input_image = HWC3(input_image)
217
+ input_image = resize_image(input_image, detect_resolution)
218
+ H, W, C = input_image.shape
219
+
220
+ poses = self.detect_poses(input_image, include_hand, include_face)
221
+ canvas = draw_poses(poses, H, W, draw_body=include_body, draw_hand=include_hand, draw_face=include_face)
222
+
223
+ detected_map = canvas
224
+ detected_map = HWC3(detected_map)
225
+
226
+ img = resize_image(input_image, image_resolution)
227
+ H, W, C = img.shape
228
+
229
+ detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
230
+
231
+ if output_type == "pil":
232
+ detected_map = Image.fromarray(detected_map)
233
+
234
+ return detected_map
controlnet_aux/src/controlnet_aux/open_pose/body.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import List, NamedTuple, Union
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+ from scipy.ndimage.filters import gaussian_filter
8
+
9
+ from . import util
10
+ from .model import bodypose_model
11
+
12
+
13
+ class Keypoint(NamedTuple):
14
+ x: float
15
+ y: float
16
+ score: float = 1.0
17
+ id: int = -1
18
+
19
+
20
+ class BodyResult(NamedTuple):
21
+ # Note: Using `Union` instead of `|` operator as the ladder is a Python
22
+ # 3.10 feature.
23
+ # Annotator code should be Python 3.8 Compatible, as controlnet repo uses
24
+ # Python 3.8 environment.
25
+ # https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6
26
+ keypoints: List[Union[Keypoint, None]]
27
+ total_score: float
28
+ total_parts: int
29
+
30
+
31
+ class Body(object):
32
+ def __init__(self, model_path):
33
+ self.model = bodypose_model()
34
+ model_dict = util.transfer(self.model, torch.load(model_path))
35
+ self.model.load_state_dict(model_dict)
36
+ self.model.eval()
37
+
38
+ def to(self, device):
39
+ self.model.to(device)
40
+ return self
41
+
42
+ def __call__(self, oriImg):
43
+ device = next(iter(self.model.parameters())).device
44
+ # scale_search = [0.5, 1.0, 1.5, 2.0]
45
+ scale_search = [0.5]
46
+ boxsize = 368
47
+ stride = 8
48
+ padValue = 128
49
+ thre1 = 0.1
50
+ thre2 = 0.05
51
+ multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
52
+ heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
53
+ paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
54
+
55
+ for m in range(len(multiplier)):
56
+ scale = multiplier[m]
57
+ imageToTest = util.smart_resize_k(oriImg, fx=scale, fy=scale)
58
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
59
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
60
+ im = np.ascontiguousarray(im)
61
+
62
+ data = torch.from_numpy(im).float()
63
+ data = data.to(device)
64
+ # data = data.permute([2, 0, 1]).unsqueeze(0).float()
65
+ with torch.no_grad():
66
+ Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
67
+ Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
68
+ Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
69
+
70
+ # extract outputs, resize, and remove padding
71
+ # heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
72
+ heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
73
+ heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
74
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
75
+ heatmap = util.smart_resize(heatmap, (oriImg.shape[0], oriImg.shape[1]))
76
+
77
+ # paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
78
+ paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
79
+ paf = util.smart_resize_k(paf, fx=stride, fy=stride)
80
+ paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
81
+ paf = util.smart_resize(paf, (oriImg.shape[0], oriImg.shape[1]))
82
+
83
+ heatmap_avg += heatmap_avg + heatmap / len(multiplier)
84
+ paf_avg += + paf / len(multiplier)
85
+
86
+ all_peaks = []
87
+ peak_counter = 0
88
+
89
+ for part in range(18):
90
+ map_ori = heatmap_avg[:, :, part]
91
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
92
+
93
+ map_left = np.zeros(one_heatmap.shape)
94
+ map_left[1:, :] = one_heatmap[:-1, :]
95
+ map_right = np.zeros(one_heatmap.shape)
96
+ map_right[:-1, :] = one_heatmap[1:, :]
97
+ map_up = np.zeros(one_heatmap.shape)
98
+ map_up[:, 1:] = one_heatmap[:, :-1]
99
+ map_down = np.zeros(one_heatmap.shape)
100
+ map_down[:, :-1] = one_heatmap[:, 1:]
101
+
102
+ peaks_binary = np.logical_and.reduce(
103
+ (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
104
+ peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
105
+ peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
106
+ peak_id = range(peak_counter, peak_counter + len(peaks))
107
+ peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
108
+
109
+ all_peaks.append(peaks_with_score_and_id)
110
+ peak_counter += len(peaks)
111
+
112
+ # find connection in the specified sequence, center 29 is in the position 15
113
+ limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
114
+ [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
115
+ [1, 16], [16, 18], [3, 17], [6, 18]]
116
+ # the middle joints heatmap correpondence
117
+ mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
118
+ [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
119
+ [55, 56], [37, 38], [45, 46]]
120
+
121
+ connection_all = []
122
+ special_k = []
123
+ mid_num = 10
124
+
125
+ for k in range(len(mapIdx)):
126
+ score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
127
+ candA = all_peaks[limbSeq[k][0] - 1]
128
+ candB = all_peaks[limbSeq[k][1] - 1]
129
+ nA = len(candA)
130
+ nB = len(candB)
131
+ indexA, indexB = limbSeq[k]
132
+ if (nA != 0 and nB != 0):
133
+ connection_candidate = []
134
+ for i in range(nA):
135
+ for j in range(nB):
136
+ vec = np.subtract(candB[j][:2], candA[i][:2])
137
+ norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
138
+ norm = max(0.001, norm)
139
+ vec = np.divide(vec, norm)
140
+
141
+ startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
142
+ np.linspace(candA[i][1], candB[j][1], num=mid_num)))
143
+
144
+ vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
145
+ for I in range(len(startend))])
146
+ vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
147
+ for I in range(len(startend))])
148
+
149
+ score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
150
+ score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
151
+ 0.5 * oriImg.shape[0] / norm - 1, 0)
152
+ criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
153
+ criterion2 = score_with_dist_prior > 0
154
+ if criterion1 and criterion2:
155
+ connection_candidate.append(
156
+ [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
157
+
158
+ connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
159
+ connection = np.zeros((0, 5))
160
+ for c in range(len(connection_candidate)):
161
+ i, j, s = connection_candidate[c][0:3]
162
+ if (i not in connection[:, 3] and j not in connection[:, 4]):
163
+ connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
164
+ if (len(connection) >= min(nA, nB)):
165
+ break
166
+
167
+ connection_all.append(connection)
168
+ else:
169
+ special_k.append(k)
170
+ connection_all.append([])
171
+
172
+ # last number in each row is the total parts number of that person
173
+ # the second last number in each row is the score of the overall configuration
174
+ subset = -1 * np.ones((0, 20))
175
+ candidate = np.array([item for sublist in all_peaks for item in sublist])
176
+
177
+ for k in range(len(mapIdx)):
178
+ if k not in special_k:
179
+ partAs = connection_all[k][:, 0]
180
+ partBs = connection_all[k][:, 1]
181
+ indexA, indexB = np.array(limbSeq[k]) - 1
182
+
183
+ for i in range(len(connection_all[k])): # = 1:size(temp,1)
184
+ found = 0
185
+ subset_idx = [-1, -1]
186
+ for j in range(len(subset)): # 1:size(subset,1):
187
+ if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
188
+ subset_idx[found] = j
189
+ found += 1
190
+
191
+ if found == 1:
192
+ j = subset_idx[0]
193
+ if subset[j][indexB] != partBs[i]:
194
+ subset[j][indexB] = partBs[i]
195
+ subset[j][-1] += 1
196
+ subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
197
+ elif found == 2: # if found 2 and disjoint, merge them
198
+ j1, j2 = subset_idx
199
+ membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
200
+ if len(np.nonzero(membership == 2)[0]) == 0: # merge
201
+ subset[j1][:-2] += (subset[j2][:-2] + 1)
202
+ subset[j1][-2:] += subset[j2][-2:]
203
+ subset[j1][-2] += connection_all[k][i][2]
204
+ subset = np.delete(subset, j2, 0)
205
+ else: # as like found == 1
206
+ subset[j1][indexB] = partBs[i]
207
+ subset[j1][-1] += 1
208
+ subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
209
+
210
+ # if find no partA in the subset, create a new subset
211
+ elif not found and k < 17:
212
+ row = -1 * np.ones(20)
213
+ row[indexA] = partAs[i]
214
+ row[indexB] = partBs[i]
215
+ row[-1] = 2
216
+ row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
217
+ subset = np.vstack([subset, row])
218
+ # delete some rows of subset which has few parts occur
219
+ deleteIdx = []
220
+ for i in range(len(subset)):
221
+ if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
222
+ deleteIdx.append(i)
223
+ subset = np.delete(subset, deleteIdx, axis=0)
224
+
225
+ # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
226
+ # candidate: x, y, score, id
227
+ return candidate, subset
228
+
229
+ @staticmethod
230
+ def format_body_result(candidate: np.ndarray, subset: np.ndarray) -> List[BodyResult]:
231
+ """
232
+ Format the body results from the candidate and subset arrays into a list of BodyResult objects.
233
+
234
+ Args:
235
+ candidate (np.ndarray): An array of candidates containing the x, y coordinates, score, and id
236
+ for each body part.
237
+ subset (np.ndarray): An array of subsets containing indices to the candidate array for each
238
+ person detected. The last two columns of each row hold the total score and total parts
239
+ of the person.
240
+
241
+ Returns:
242
+ List[BodyResult]: A list of BodyResult objects, where each object represents a person with
243
+ detected keypoints, total score, and total parts.
244
+ """
245
+ return [
246
+ BodyResult(
247
+ keypoints=[
248
+ Keypoint(
249
+ x=candidate[candidate_index][0],
250
+ y=candidate[candidate_index][1],
251
+ score=candidate[candidate_index][2],
252
+ id=candidate[candidate_index][3]
253
+ ) if candidate_index != -1 else None
254
+ for candidate_index in person[:18].astype(int)
255
+ ],
256
+ total_score=person[18],
257
+ total_parts=person[19]
258
+ )
259
+ for person in subset
260
+ ]
controlnet_aux/src/controlnet_aux/open_pose/face.py ADDED
@@ -0,0 +1,364 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch.nn import Conv2d, MaxPool2d, Module, ReLU, init
7
+ from torchvision.transforms import ToPILImage, ToTensor
8
+
9
+ from . import util
10
+
11
+
12
+ class FaceNet(Module):
13
+ """Model the cascading heatmaps. """
14
+ def __init__(self):
15
+ super(FaceNet, self).__init__()
16
+ # cnn to make feature map
17
+ self.relu = ReLU()
18
+ self.max_pooling_2d = MaxPool2d(kernel_size=2, stride=2)
19
+ self.conv1_1 = Conv2d(in_channels=3, out_channels=64,
20
+ kernel_size=3, stride=1, padding=1)
21
+ self.conv1_2 = Conv2d(
22
+ in_channels=64, out_channels=64, kernel_size=3, stride=1,
23
+ padding=1)
24
+ self.conv2_1 = Conv2d(
25
+ in_channels=64, out_channels=128, kernel_size=3, stride=1,
26
+ padding=1)
27
+ self.conv2_2 = Conv2d(
28
+ in_channels=128, out_channels=128, kernel_size=3, stride=1,
29
+ padding=1)
30
+ self.conv3_1 = Conv2d(
31
+ in_channels=128, out_channels=256, kernel_size=3, stride=1,
32
+ padding=1)
33
+ self.conv3_2 = Conv2d(
34
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
35
+ padding=1)
36
+ self.conv3_3 = Conv2d(
37
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
38
+ padding=1)
39
+ self.conv3_4 = Conv2d(
40
+ in_channels=256, out_channels=256, kernel_size=3, stride=1,
41
+ padding=1)
42
+ self.conv4_1 = Conv2d(
43
+ in_channels=256, out_channels=512, kernel_size=3, stride=1,
44
+ padding=1)
45
+ self.conv4_2 = Conv2d(
46
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
47
+ padding=1)
48
+ self.conv4_3 = Conv2d(
49
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
50
+ padding=1)
51
+ self.conv4_4 = Conv2d(
52
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
53
+ padding=1)
54
+ self.conv5_1 = Conv2d(
55
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
56
+ padding=1)
57
+ self.conv5_2 = Conv2d(
58
+ in_channels=512, out_channels=512, kernel_size=3, stride=1,
59
+ padding=1)
60
+ self.conv5_3_CPM = Conv2d(
61
+ in_channels=512, out_channels=128, kernel_size=3, stride=1,
62
+ padding=1)
63
+
64
+ # stage1
65
+ self.conv6_1_CPM = Conv2d(
66
+ in_channels=128, out_channels=512, kernel_size=1, stride=1,
67
+ padding=0)
68
+ self.conv6_2_CPM = Conv2d(
69
+ in_channels=512, out_channels=71, kernel_size=1, stride=1,
70
+ padding=0)
71
+
72
+ # stage2
73
+ self.Mconv1_stage2 = Conv2d(
74
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
75
+ padding=3)
76
+ self.Mconv2_stage2 = Conv2d(
77
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
78
+ padding=3)
79
+ self.Mconv3_stage2 = Conv2d(
80
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
81
+ padding=3)
82
+ self.Mconv4_stage2 = Conv2d(
83
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
84
+ padding=3)
85
+ self.Mconv5_stage2 = Conv2d(
86
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
87
+ padding=3)
88
+ self.Mconv6_stage2 = Conv2d(
89
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
90
+ padding=0)
91
+ self.Mconv7_stage2 = Conv2d(
92
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
93
+ padding=0)
94
+
95
+ # stage3
96
+ self.Mconv1_stage3 = Conv2d(
97
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
98
+ padding=3)
99
+ self.Mconv2_stage3 = Conv2d(
100
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
101
+ padding=3)
102
+ self.Mconv3_stage3 = Conv2d(
103
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
104
+ padding=3)
105
+ self.Mconv4_stage3 = Conv2d(
106
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
107
+ padding=3)
108
+ self.Mconv5_stage3 = Conv2d(
109
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
110
+ padding=3)
111
+ self.Mconv6_stage3 = Conv2d(
112
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
113
+ padding=0)
114
+ self.Mconv7_stage3 = Conv2d(
115
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
116
+ padding=0)
117
+
118
+ # stage4
119
+ self.Mconv1_stage4 = Conv2d(
120
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
121
+ padding=3)
122
+ self.Mconv2_stage4 = Conv2d(
123
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
124
+ padding=3)
125
+ self.Mconv3_stage4 = Conv2d(
126
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
127
+ padding=3)
128
+ self.Mconv4_stage4 = Conv2d(
129
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
130
+ padding=3)
131
+ self.Mconv5_stage4 = Conv2d(
132
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
133
+ padding=3)
134
+ self.Mconv6_stage4 = Conv2d(
135
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
136
+ padding=0)
137
+ self.Mconv7_stage4 = Conv2d(
138
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
139
+ padding=0)
140
+
141
+ # stage5
142
+ self.Mconv1_stage5 = Conv2d(
143
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
144
+ padding=3)
145
+ self.Mconv2_stage5 = Conv2d(
146
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
147
+ padding=3)
148
+ self.Mconv3_stage5 = Conv2d(
149
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
150
+ padding=3)
151
+ self.Mconv4_stage5 = Conv2d(
152
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
153
+ padding=3)
154
+ self.Mconv5_stage5 = Conv2d(
155
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
156
+ padding=3)
157
+ self.Mconv6_stage5 = Conv2d(
158
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
159
+ padding=0)
160
+ self.Mconv7_stage5 = Conv2d(
161
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
162
+ padding=0)
163
+
164
+ # stage6
165
+ self.Mconv1_stage6 = Conv2d(
166
+ in_channels=199, out_channels=128, kernel_size=7, stride=1,
167
+ padding=3)
168
+ self.Mconv2_stage6 = Conv2d(
169
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
170
+ padding=3)
171
+ self.Mconv3_stage6 = Conv2d(
172
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
173
+ padding=3)
174
+ self.Mconv4_stage6 = Conv2d(
175
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
176
+ padding=3)
177
+ self.Mconv5_stage6 = Conv2d(
178
+ in_channels=128, out_channels=128, kernel_size=7, stride=1,
179
+ padding=3)
180
+ self.Mconv6_stage6 = Conv2d(
181
+ in_channels=128, out_channels=128, kernel_size=1, stride=1,
182
+ padding=0)
183
+ self.Mconv7_stage6 = Conv2d(
184
+ in_channels=128, out_channels=71, kernel_size=1, stride=1,
185
+ padding=0)
186
+
187
+ for m in self.modules():
188
+ if isinstance(m, Conv2d):
189
+ init.constant_(m.bias, 0)
190
+
191
+ def forward(self, x):
192
+ """Return a list of heatmaps."""
193
+ heatmaps = []
194
+
195
+ h = self.relu(self.conv1_1(x))
196
+ h = self.relu(self.conv1_2(h))
197
+ h = self.max_pooling_2d(h)
198
+ h = self.relu(self.conv2_1(h))
199
+ h = self.relu(self.conv2_2(h))
200
+ h = self.max_pooling_2d(h)
201
+ h = self.relu(self.conv3_1(h))
202
+ h = self.relu(self.conv3_2(h))
203
+ h = self.relu(self.conv3_3(h))
204
+ h = self.relu(self.conv3_4(h))
205
+ h = self.max_pooling_2d(h)
206
+ h = self.relu(self.conv4_1(h))
207
+ h = self.relu(self.conv4_2(h))
208
+ h = self.relu(self.conv4_3(h))
209
+ h = self.relu(self.conv4_4(h))
210
+ h = self.relu(self.conv5_1(h))
211
+ h = self.relu(self.conv5_2(h))
212
+ h = self.relu(self.conv5_3_CPM(h))
213
+ feature_map = h
214
+
215
+ # stage1
216
+ h = self.relu(self.conv6_1_CPM(h))
217
+ h = self.conv6_2_CPM(h)
218
+ heatmaps.append(h)
219
+
220
+ # stage2
221
+ h = torch.cat([h, feature_map], dim=1) # channel concat
222
+ h = self.relu(self.Mconv1_stage2(h))
223
+ h = self.relu(self.Mconv2_stage2(h))
224
+ h = self.relu(self.Mconv3_stage2(h))
225
+ h = self.relu(self.Mconv4_stage2(h))
226
+ h = self.relu(self.Mconv5_stage2(h))
227
+ h = self.relu(self.Mconv6_stage2(h))
228
+ h = self.Mconv7_stage2(h)
229
+ heatmaps.append(h)
230
+
231
+ # stage3
232
+ h = torch.cat([h, feature_map], dim=1) # channel concat
233
+ h = self.relu(self.Mconv1_stage3(h))
234
+ h = self.relu(self.Mconv2_stage3(h))
235
+ h = self.relu(self.Mconv3_stage3(h))
236
+ h = self.relu(self.Mconv4_stage3(h))
237
+ h = self.relu(self.Mconv5_stage3(h))
238
+ h = self.relu(self.Mconv6_stage3(h))
239
+ h = self.Mconv7_stage3(h)
240
+ heatmaps.append(h)
241
+
242
+ # stage4
243
+ h = torch.cat([h, feature_map], dim=1) # channel concat
244
+ h = self.relu(self.Mconv1_stage4(h))
245
+ h = self.relu(self.Mconv2_stage4(h))
246
+ h = self.relu(self.Mconv3_stage4(h))
247
+ h = self.relu(self.Mconv4_stage4(h))
248
+ h = self.relu(self.Mconv5_stage4(h))
249
+ h = self.relu(self.Mconv6_stage4(h))
250
+ h = self.Mconv7_stage4(h)
251
+ heatmaps.append(h)
252
+
253
+ # stage5
254
+ h = torch.cat([h, feature_map], dim=1) # channel concat
255
+ h = self.relu(self.Mconv1_stage5(h))
256
+ h = self.relu(self.Mconv2_stage5(h))
257
+ h = self.relu(self.Mconv3_stage5(h))
258
+ h = self.relu(self.Mconv4_stage5(h))
259
+ h = self.relu(self.Mconv5_stage5(h))
260
+ h = self.relu(self.Mconv6_stage5(h))
261
+ h = self.Mconv7_stage5(h)
262
+ heatmaps.append(h)
263
+
264
+ # stage6
265
+ h = torch.cat([h, feature_map], dim=1) # channel concat
266
+ h = self.relu(self.Mconv1_stage6(h))
267
+ h = self.relu(self.Mconv2_stage6(h))
268
+ h = self.relu(self.Mconv3_stage6(h))
269
+ h = self.relu(self.Mconv4_stage6(h))
270
+ h = self.relu(self.Mconv5_stage6(h))
271
+ h = self.relu(self.Mconv6_stage6(h))
272
+ h = self.Mconv7_stage6(h)
273
+ heatmaps.append(h)
274
+
275
+ return heatmaps
276
+
277
+
278
+ LOG = logging.getLogger(__name__)
279
+ TOTEN = ToTensor()
280
+ TOPIL = ToPILImage()
281
+
282
+
283
+ params = {
284
+ 'gaussian_sigma': 2.5,
285
+ 'inference_img_size': 736, # 368, 736, 1312
286
+ 'heatmap_peak_thresh': 0.1,
287
+ 'crop_scale': 1.5,
288
+ 'line_indices': [
289
+ [0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
290
+ [6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13],
291
+ [13, 14], [14, 15], [15, 16],
292
+ [17, 18], [18, 19], [19, 20], [20, 21],
293
+ [22, 23], [23, 24], [24, 25], [25, 26],
294
+ [27, 28], [28, 29], [29, 30],
295
+ [31, 32], [32, 33], [33, 34], [34, 35],
296
+ [36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [41, 36],
297
+ [42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [47, 42],
298
+ [48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54],
299
+ [54, 55], [55, 56], [56, 57], [57, 58], [58, 59], [59, 48],
300
+ [60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66],
301
+ [66, 67], [67, 60]
302
+ ],
303
+ }
304
+
305
+
306
+ class Face(object):
307
+ """
308
+ The OpenPose face landmark detector model.
309
+
310
+ Args:
311
+ inference_size: set the size of the inference image size, suggested:
312
+ 368, 736, 1312, default 736
313
+ gaussian_sigma: blur the heatmaps, default 2.5
314
+ heatmap_peak_thresh: return landmark if over threshold, default 0.1
315
+
316
+ """
317
+ def __init__(self, face_model_path,
318
+ inference_size=None,
319
+ gaussian_sigma=None,
320
+ heatmap_peak_thresh=None):
321
+ self.inference_size = inference_size or params["inference_img_size"]
322
+ self.sigma = gaussian_sigma or params['gaussian_sigma']
323
+ self.threshold = heatmap_peak_thresh or params["heatmap_peak_thresh"]
324
+ self.model = FaceNet()
325
+ self.model.load_state_dict(torch.load(face_model_path))
326
+ self.model.eval()
327
+
328
+ def to(self, device):
329
+ self.model.to(device)
330
+ return self
331
+
332
+ def __call__(self, face_img):
333
+ device = next(iter(self.model.parameters())).device
334
+ H, W, C = face_img.shape
335
+
336
+ w_size = 384
337
+ x_data = torch.from_numpy(util.smart_resize(face_img, (w_size, w_size))).permute([2, 0, 1]) / 256.0 - 0.5
338
+
339
+ x_data = x_data.to(device)
340
+
341
+ with torch.no_grad():
342
+ hs = self.model(x_data[None, ...])
343
+ heatmaps = F.interpolate(
344
+ hs[-1],
345
+ (H, W),
346
+ mode='bilinear', align_corners=True).cpu().numpy()[0]
347
+ return heatmaps
348
+
349
+ def compute_peaks_from_heatmaps(self, heatmaps):
350
+ all_peaks = []
351
+ for part in range(heatmaps.shape[0]):
352
+ map_ori = heatmaps[part].copy()
353
+ binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8)
354
+
355
+ if np.sum(binary) == 0:
356
+ continue
357
+
358
+ positions = np.where(binary > 0.5)
359
+ intensities = map_ori[positions]
360
+ mi = np.argmax(intensities)
361
+ y, x = positions[0][mi], positions[1][mi]
362
+ all_peaks.append([x, y])
363
+
364
+ return np.array(all_peaks)
controlnet_aux/src/controlnet_aux/open_pose/hand.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+ from scipy.ndimage.filters import gaussian_filter
5
+ from skimage.measure import label
6
+
7
+ from . import util
8
+ from .model import handpose_model
9
+
10
+
11
+ class Hand(object):
12
+ def __init__(self, model_path):
13
+ self.model = handpose_model()
14
+ model_dict = util.transfer(self.model, torch.load(model_path))
15
+ self.model.load_state_dict(model_dict)
16
+ self.model.eval()
17
+
18
+ def to(self, device):
19
+ self.model.to(device)
20
+ return self
21
+
22
+ def __call__(self, oriImgRaw):
23
+ device = next(iter(self.model.parameters())).device
24
+ scale_search = [0.5, 1.0, 1.5, 2.0]
25
+ # scale_search = [0.5]
26
+ boxsize = 368
27
+ stride = 8
28
+ padValue = 128
29
+ thre = 0.05
30
+ multiplier = [x * boxsize for x in scale_search]
31
+
32
+ wsize = 128
33
+ heatmap_avg = np.zeros((wsize, wsize, 22))
34
+
35
+ Hr, Wr, Cr = oriImgRaw.shape
36
+
37
+ oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8)
38
+
39
+ for m in range(len(multiplier)):
40
+ scale = multiplier[m]
41
+ imageToTest = util.smart_resize(oriImg, (scale, scale))
42
+
43
+ imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
44
+ im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
45
+ im = np.ascontiguousarray(im)
46
+
47
+ data = torch.from_numpy(im).float()
48
+ data = data.to(device)
49
+
50
+ with torch.no_grad():
51
+ output = self.model(data).cpu().numpy()
52
+
53
+ # extract outputs, resize, and remove padding
54
+ heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
55
+ heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
56
+ heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
57
+ heatmap = util.smart_resize(heatmap, (wsize, wsize))
58
+
59
+ heatmap_avg += heatmap / len(multiplier)
60
+
61
+ all_peaks = []
62
+ for part in range(21):
63
+ map_ori = heatmap_avg[:, :, part]
64
+ one_heatmap = gaussian_filter(map_ori, sigma=3)
65
+ binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
66
+
67
+ if np.sum(binary) == 0:
68
+ all_peaks.append([0, 0])
69
+ continue
70
+ label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
71
+ max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
72
+ label_img[label_img != max_index] = 0
73
+ map_ori[label_img == 0] = 0
74
+
75
+ y, x = util.npmax(map_ori)
76
+ y = int(float(y) * float(Hr) / float(wsize))
77
+ x = int(float(x) * float(Wr) / float(wsize))
78
+ all_peaks.append([x, y])
79
+ return np.array(all_peaks)
80
+
81
+ if __name__ == "__main__":
82
+ hand_estimation = Hand('../model/hand_pose_model.pth')
83
+
84
+ # test_image = '../images/hand.jpg'
85
+ test_image = '../images/hand.jpg'
86
+ oriImg = cv2.imread(test_image) # B,G,R order
87
+ peaks = hand_estimation(oriImg)
88
+ canvas = util.draw_handpose(oriImg, peaks, True)
89
+ cv2.imshow('', canvas)
90
+ cv2.waitKey(0)
controlnet_aux/src/controlnet_aux/open_pose/model.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from collections import OrderedDict
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ def make_layers(block, no_relu_layers):
8
+ layers = []
9
+ for layer_name, v in block.items():
10
+ if 'pool' in layer_name:
11
+ layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
12
+ padding=v[2])
13
+ layers.append((layer_name, layer))
14
+ else:
15
+ conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
16
+ kernel_size=v[2], stride=v[3],
17
+ padding=v[4])
18
+ layers.append((layer_name, conv2d))
19
+ if layer_name not in no_relu_layers:
20
+ layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
21
+
22
+ return nn.Sequential(OrderedDict(layers))
23
+
24
+ class bodypose_model(nn.Module):
25
+ def __init__(self):
26
+ super(bodypose_model, self).__init__()
27
+
28
+ # these layers have no relu layer
29
+ no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
30
+ 'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
31
+ 'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
32
+ 'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
33
+ blocks = {}
34
+ block0 = OrderedDict([
35
+ ('conv1_1', [3, 64, 3, 1, 1]),
36
+ ('conv1_2', [64, 64, 3, 1, 1]),
37
+ ('pool1_stage1', [2, 2, 0]),
38
+ ('conv2_1', [64, 128, 3, 1, 1]),
39
+ ('conv2_2', [128, 128, 3, 1, 1]),
40
+ ('pool2_stage1', [2, 2, 0]),
41
+ ('conv3_1', [128, 256, 3, 1, 1]),
42
+ ('conv3_2', [256, 256, 3, 1, 1]),
43
+ ('conv3_3', [256, 256, 3, 1, 1]),
44
+ ('conv3_4', [256, 256, 3, 1, 1]),
45
+ ('pool3_stage1', [2, 2, 0]),
46
+ ('conv4_1', [256, 512, 3, 1, 1]),
47
+ ('conv4_2', [512, 512, 3, 1, 1]),
48
+ ('conv4_3_CPM', [512, 256, 3, 1, 1]),
49
+ ('conv4_4_CPM', [256, 128, 3, 1, 1])
50
+ ])
51
+
52
+
53
+ # Stage 1
54
+ block1_1 = OrderedDict([
55
+ ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
56
+ ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
57
+ ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
58
+ ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
59
+ ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
60
+ ])
61
+
62
+ block1_2 = OrderedDict([
63
+ ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
64
+ ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
65
+ ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
66
+ ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
67
+ ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
68
+ ])
69
+ blocks['block1_1'] = block1_1
70
+ blocks['block1_2'] = block1_2
71
+
72
+ self.model0 = make_layers(block0, no_relu_layers)
73
+
74
+ # Stages 2 - 6
75
+ for i in range(2, 7):
76
+ blocks['block%d_1' % i] = OrderedDict([
77
+ ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
78
+ ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
79
+ ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
80
+ ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
81
+ ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
82
+ ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
83
+ ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
84
+ ])
85
+
86
+ blocks['block%d_2' % i] = OrderedDict([
87
+ ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
88
+ ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
89
+ ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
90
+ ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
91
+ ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
92
+ ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
93
+ ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
94
+ ])
95
+
96
+ for k in blocks.keys():
97
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
98
+
99
+ self.model1_1 = blocks['block1_1']
100
+ self.model2_1 = blocks['block2_1']
101
+ self.model3_1 = blocks['block3_1']
102
+ self.model4_1 = blocks['block4_1']
103
+ self.model5_1 = blocks['block5_1']
104
+ self.model6_1 = blocks['block6_1']
105
+
106
+ self.model1_2 = blocks['block1_2']
107
+ self.model2_2 = blocks['block2_2']
108
+ self.model3_2 = blocks['block3_2']
109
+ self.model4_2 = blocks['block4_2']
110
+ self.model5_2 = blocks['block5_2']
111
+ self.model6_2 = blocks['block6_2']
112
+
113
+
114
+ def forward(self, x):
115
+
116
+ out1 = self.model0(x)
117
+
118
+ out1_1 = self.model1_1(out1)
119
+ out1_2 = self.model1_2(out1)
120
+ out2 = torch.cat([out1_1, out1_2, out1], 1)
121
+
122
+ out2_1 = self.model2_1(out2)
123
+ out2_2 = self.model2_2(out2)
124
+ out3 = torch.cat([out2_1, out2_2, out1], 1)
125
+
126
+ out3_1 = self.model3_1(out3)
127
+ out3_2 = self.model3_2(out3)
128
+ out4 = torch.cat([out3_1, out3_2, out1], 1)
129
+
130
+ out4_1 = self.model4_1(out4)
131
+ out4_2 = self.model4_2(out4)
132
+ out5 = torch.cat([out4_1, out4_2, out1], 1)
133
+
134
+ out5_1 = self.model5_1(out5)
135
+ out5_2 = self.model5_2(out5)
136
+ out6 = torch.cat([out5_1, out5_2, out1], 1)
137
+
138
+ out6_1 = self.model6_1(out6)
139
+ out6_2 = self.model6_2(out6)
140
+
141
+ return out6_1, out6_2
142
+
143
+ class handpose_model(nn.Module):
144
+ def __init__(self):
145
+ super(handpose_model, self).__init__()
146
+
147
+ # these layers have no relu layer
148
+ no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
149
+ 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
150
+ # stage 1
151
+ block1_0 = OrderedDict([
152
+ ('conv1_1', [3, 64, 3, 1, 1]),
153
+ ('conv1_2', [64, 64, 3, 1, 1]),
154
+ ('pool1_stage1', [2, 2, 0]),
155
+ ('conv2_1', [64, 128, 3, 1, 1]),
156
+ ('conv2_2', [128, 128, 3, 1, 1]),
157
+ ('pool2_stage1', [2, 2, 0]),
158
+ ('conv3_1', [128, 256, 3, 1, 1]),
159
+ ('conv3_2', [256, 256, 3, 1, 1]),
160
+ ('conv3_3', [256, 256, 3, 1, 1]),
161
+ ('conv3_4', [256, 256, 3, 1, 1]),
162
+ ('pool3_stage1', [2, 2, 0]),
163
+ ('conv4_1', [256, 512, 3, 1, 1]),
164
+ ('conv4_2', [512, 512, 3, 1, 1]),
165
+ ('conv4_3', [512, 512, 3, 1, 1]),
166
+ ('conv4_4', [512, 512, 3, 1, 1]),
167
+ ('conv5_1', [512, 512, 3, 1, 1]),
168
+ ('conv5_2', [512, 512, 3, 1, 1]),
169
+ ('conv5_3_CPM', [512, 128, 3, 1, 1])
170
+ ])
171
+
172
+ block1_1 = OrderedDict([
173
+ ('conv6_1_CPM', [128, 512, 1, 1, 0]),
174
+ ('conv6_2_CPM', [512, 22, 1, 1, 0])
175
+ ])
176
+
177
+ blocks = {}
178
+ blocks['block1_0'] = block1_0
179
+ blocks['block1_1'] = block1_1
180
+
181
+ # stage 2-6
182
+ for i in range(2, 7):
183
+ blocks['block%d' % i] = OrderedDict([
184
+ ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
185
+ ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
186
+ ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
187
+ ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
188
+ ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
189
+ ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
190
+ ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
191
+ ])
192
+
193
+ for k in blocks.keys():
194
+ blocks[k] = make_layers(blocks[k], no_relu_layers)
195
+
196
+ self.model1_0 = blocks['block1_0']
197
+ self.model1_1 = blocks['block1_1']
198
+ self.model2 = blocks['block2']
199
+ self.model3 = blocks['block3']
200
+ self.model4 = blocks['block4']
201
+ self.model5 = blocks['block5']
202
+ self.model6 = blocks['block6']
203
+
204
+ def forward(self, x):
205
+ out1_0 = self.model1_0(x)
206
+ out1_1 = self.model1_1(out1_0)
207
+ concat_stage2 = torch.cat([out1_1, out1_0], 1)
208
+ out_stage2 = self.model2(concat_stage2)
209
+ concat_stage3 = torch.cat([out_stage2, out1_0], 1)
210
+ out_stage3 = self.model3(concat_stage3)
211
+ concat_stage4 = torch.cat([out_stage3, out1_0], 1)
212
+ out_stage4 = self.model4(concat_stage4)
213
+ concat_stage5 = torch.cat([out_stage4, out1_0], 1)
214
+ out_stage5 = self.model5(concat_stage5)
215
+ concat_stage6 = torch.cat([out_stage5, out1_0], 1)
216
+ out_stage6 = self.model6(concat_stage6)
217
+ return out_stage6
controlnet_aux/src/controlnet_aux/open_pose/util.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import cv2
4
+ from typing import List, Tuple, Union
5
+
6
+ from .body import BodyResult, Keypoint
7
+
8
+ eps = 0.01
9
+
10
+
11
+ def smart_resize(x, s):
12
+ Ht, Wt = s
13
+ if x.ndim == 2:
14
+ Ho, Wo = x.shape
15
+ Co = 1
16
+ else:
17
+ Ho, Wo, Co = x.shape
18
+ if Co == 3 or Co == 1:
19
+ k = float(Ht + Wt) / float(Ho + Wo)
20
+ return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
21
+ else:
22
+ return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
23
+
24
+
25
+ def smart_resize_k(x, fx, fy):
26
+ if x.ndim == 2:
27
+ Ho, Wo = x.shape
28
+ Co = 1
29
+ else:
30
+ Ho, Wo, Co = x.shape
31
+ Ht, Wt = Ho * fy, Wo * fx
32
+ if Co == 3 or Co == 1:
33
+ k = float(Ht + Wt) / float(Ho + Wo)
34
+ return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
35
+ else:
36
+ return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
37
+
38
+
39
+ def padRightDownCorner(img, stride, padValue):
40
+ h = img.shape[0]
41
+ w = img.shape[1]
42
+
43
+ pad = 4 * [None]
44
+ pad[0] = 0 # up
45
+ pad[1] = 0 # left
46
+ pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
47
+ pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
48
+
49
+ img_padded = img
50
+ pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
51
+ img_padded = np.concatenate((pad_up, img_padded), axis=0)
52
+ pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
53
+ img_padded = np.concatenate((pad_left, img_padded), axis=1)
54
+ pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
55
+ img_padded = np.concatenate((img_padded, pad_down), axis=0)
56
+ pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
57
+ img_padded = np.concatenate((img_padded, pad_right), axis=1)
58
+
59
+ return img_padded, pad
60
+
61
+
62
+ def transfer(model, model_weights):
63
+ transfered_model_weights = {}
64
+ for weights_name in model.state_dict().keys():
65
+ transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
66
+ return transfered_model_weights
67
+
68
+
69
+ def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
70
+ """
71
+ Draw keypoints and limbs representing body pose on a given canvas.
72
+
73
+ Args:
74
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose.
75
+ keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn.
76
+
77
+ Returns:
78
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose.
79
+
80
+ Note:
81
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
82
+ """
83
+ H, W, C = canvas.shape
84
+ stickwidth = 4
85
+
86
+ limbSeq = [
87
+ [2, 3], [2, 6], [3, 4], [4, 5],
88
+ [6, 7], [7, 8], [2, 9], [9, 10],
89
+ [10, 11], [2, 12], [12, 13], [13, 14],
90
+ [2, 1], [1, 15], [15, 17], [1, 16],
91
+ [16, 18],
92
+ ]
93
+
94
+ colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
95
+ [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
96
+ [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
97
+
98
+ for (k1_index, k2_index), color in zip(limbSeq, colors):
99
+ keypoint1 = keypoints[k1_index - 1]
100
+ keypoint2 = keypoints[k2_index - 1]
101
+
102
+ if keypoint1 is None or keypoint2 is None:
103
+ continue
104
+
105
+ Y = np.array([keypoint1.x, keypoint2.x]) * float(W)
106
+ X = np.array([keypoint1.y, keypoint2.y]) * float(H)
107
+ mX = np.mean(X)
108
+ mY = np.mean(Y)
109
+ length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
110
+ angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
111
+ polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
112
+ cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
113
+
114
+ for keypoint, color in zip(keypoints, colors):
115
+ if keypoint is None:
116
+ continue
117
+
118
+ x, y = keypoint.x, keypoint.y
119
+ x = int(x * W)
120
+ y = int(y * H)
121
+ cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
122
+
123
+ return canvas
124
+
125
+
126
+ def draw_handpose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
127
+ import matplotlib
128
+ """
129
+ Draw keypoints and connections representing hand pose on a given canvas.
130
+
131
+ Args:
132
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
133
+ keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
134
+ or None if no keypoints are present.
135
+
136
+ Returns:
137
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
138
+
139
+ Note:
140
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
141
+ """
142
+ if not keypoints:
143
+ return canvas
144
+
145
+ H, W, C = canvas.shape
146
+
147
+ edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
148
+ [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
149
+
150
+ for ie, (e1, e2) in enumerate(edges):
151
+ k1 = keypoints[e1]
152
+ k2 = keypoints[e2]
153
+ if k1 is None or k2 is None:
154
+ continue
155
+
156
+ x1 = int(k1.x * W)
157
+ y1 = int(k1.y * H)
158
+ x2 = int(k2.x * W)
159
+ y2 = int(k2.y * H)
160
+ if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
161
+ cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
162
+
163
+ for keypoint in keypoints:
164
+ x, y = keypoint.x, keypoint.y
165
+ x = int(x * W)
166
+ y = int(y * H)
167
+ if x > eps and y > eps:
168
+ cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
169
+ return canvas
170
+
171
+
172
+ def draw_facepose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
173
+ """
174
+ Draw keypoints representing face pose on a given canvas.
175
+
176
+ Args:
177
+ canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the face pose.
178
+ keypoints (List[Keypoint]| None): A list of Keypoint objects representing the face keypoints to be drawn
179
+ or None if no keypoints are present.
180
+
181
+ Returns:
182
+ np.ndarray: A 3D numpy array representing the modified canvas with the drawn face pose.
183
+
184
+ Note:
185
+ The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
186
+ """
187
+ if not keypoints:
188
+ return canvas
189
+
190
+ H, W, C = canvas.shape
191
+ for keypoint in keypoints:
192
+ x, y = keypoint.x, keypoint.y
193
+ x = int(x * W)
194
+ y = int(y * H)
195
+ if x > eps and y > eps:
196
+ cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
197
+ return canvas
198
+
199
+
200
+ # detect hand according to body pose keypoints
201
+ # please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
202
+ def handDetect(body: BodyResult, oriImg) -> List[Tuple[int, int, int, bool]]:
203
+ """
204
+ Detect hands in the input body pose keypoints and calculate the bounding box for each hand.
205
+
206
+ Args:
207
+ body (BodyResult): A BodyResult object containing the detected body pose keypoints.
208
+ oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
209
+
210
+ Returns:
211
+ List[Tuple[int, int, int, bool]]: A list of tuples, each containing the coordinates (x, y) of the top-left
212
+ corner of the bounding box, the width (height) of the bounding box, and
213
+ a boolean flag indicating whether the hand is a left hand (True) or a
214
+ right hand (False).
215
+
216
+ Notes:
217
+ - The width and height of the bounding boxes are equal since the network requires squared input.
218
+ - The minimum bounding box size is 20 pixels.
219
+ """
220
+ ratioWristElbow = 0.33
221
+ detect_result = []
222
+ image_height, image_width = oriImg.shape[0:2]
223
+
224
+ keypoints = body.keypoints
225
+ # right hand: wrist 4, elbow 3, shoulder 2
226
+ # left hand: wrist 7, elbow 6, shoulder 5
227
+ left_shoulder = keypoints[5]
228
+ left_elbow = keypoints[6]
229
+ left_wrist = keypoints[7]
230
+ right_shoulder = keypoints[2]
231
+ right_elbow = keypoints[3]
232
+ right_wrist = keypoints[4]
233
+
234
+ # if any of three not detected
235
+ has_left = all(keypoint is not None for keypoint in (left_shoulder, left_elbow, left_wrist))
236
+ has_right = all(keypoint is not None for keypoint in (right_shoulder, right_elbow, right_wrist))
237
+ if not (has_left or has_right):
238
+ return []
239
+
240
+ hands = []
241
+ #left hand
242
+ if has_left:
243
+ hands.append([
244
+ left_shoulder.x, left_shoulder.y,
245
+ left_elbow.x, left_elbow.y,
246
+ left_wrist.x, left_wrist.y,
247
+ True
248
+ ])
249
+ # right hand
250
+ if has_right:
251
+ hands.append([
252
+ right_shoulder.x, right_shoulder.y,
253
+ right_elbow.x, right_elbow.y,
254
+ right_wrist.x, right_wrist.y,
255
+ False
256
+ ])
257
+
258
+ for x1, y1, x2, y2, x3, y3, is_left in hands:
259
+ # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
260
+ # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
261
+ # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
262
+ # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
263
+ # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
264
+ # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
265
+ x = x3 + ratioWristElbow * (x3 - x2)
266
+ y = y3 + ratioWristElbow * (y3 - y2)
267
+ distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
268
+ distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
269
+ width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
270
+ # x-y refers to the center --> offset to topLeft point
271
+ # handRectangle.x -= handRectangle.width / 2.f;
272
+ # handRectangle.y -= handRectangle.height / 2.f;
273
+ x -= width / 2
274
+ y -= width / 2 # width = height
275
+ # overflow the image
276
+ if x < 0: x = 0
277
+ if y < 0: y = 0
278
+ width1 = width
279
+ width2 = width
280
+ if x + width > image_width: width1 = image_width - x
281
+ if y + width > image_height: width2 = image_height - y
282
+ width = min(width1, width2)
283
+ # the max hand box value is 20 pixels
284
+ if width >= 20:
285
+ detect_result.append((int(x), int(y), int(width), is_left))
286
+
287
+ '''
288
+ return value: [[x, y, w, True if left hand else False]].
289
+ width=height since the network require squared input.
290
+ x, y is the coordinate of top left
291
+ '''
292
+ return detect_result
293
+
294
+
295
+ # Written by Lvmin
296
+ def faceDetect(body: BodyResult, oriImg) -> Union[Tuple[int, int, int], None]:
297
+ """
298
+ Detect the face in the input body pose keypoints and calculate the bounding box for the face.
299
+
300
+ Args:
301
+ body (BodyResult): A BodyResult object containing the detected body pose keypoints.
302
+ oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
303
+
304
+ Returns:
305
+ Tuple[int, int, int] | None: A tuple containing the coordinates (x, y) of the top-left corner of the
306
+ bounding box and the width (height) of the bounding box, or None if the
307
+ face is not detected or the bounding box width is less than 20 pixels.
308
+
309
+ Notes:
310
+ - The width and height of the bounding box are equal.
311
+ - The minimum bounding box size is 20 pixels.
312
+ """
313
+ # left right eye ear 14 15 16 17
314
+ image_height, image_width = oriImg.shape[0:2]
315
+
316
+ keypoints = body.keypoints
317
+ head = keypoints[0]
318
+ left_eye = keypoints[14]
319
+ right_eye = keypoints[15]
320
+ left_ear = keypoints[16]
321
+ right_ear = keypoints[17]
322
+
323
+ if head is None or all(keypoint is None for keypoint in (left_eye, right_eye, left_ear, right_ear)):
324
+ return None
325
+
326
+ width = 0.0
327
+ x0, y0 = head.x, head.y
328
+
329
+ if left_eye is not None:
330
+ x1, y1 = left_eye.x, left_eye.y
331
+ d = max(abs(x0 - x1), abs(y0 - y1))
332
+ width = max(width, d * 3.0)
333
+
334
+ if right_eye is not None:
335
+ x1, y1 = right_eye.x, right_eye.y
336
+ d = max(abs(x0 - x1), abs(y0 - y1))
337
+ width = max(width, d * 3.0)
338
+
339
+ if left_ear is not None:
340
+ x1, y1 = left_ear.x, left_ear.y
341
+ d = max(abs(x0 - x1), abs(y0 - y1))
342
+ width = max(width, d * 1.5)
343
+
344
+ if right_ear is not None:
345
+ x1, y1 = right_ear.x, right_ear.y
346
+ d = max(abs(x0 - x1), abs(y0 - y1))
347
+ width = max(width, d * 1.5)
348
+
349
+ x, y = x0, y0
350
+
351
+ x -= width
352
+ y -= width
353
+
354
+ if x < 0:
355
+ x = 0
356
+
357
+ if y < 0:
358
+ y = 0
359
+
360
+ width1 = width * 2
361
+ width2 = width * 2
362
+
363
+ if x + width > image_width:
364
+ width1 = image_width - x
365
+
366
+ if y + width > image_height:
367
+ width2 = image_height - y
368
+
369
+ width = min(width1, width2)
370
+
371
+ if width >= 20:
372
+ return int(x), int(y), int(width)
373
+ else:
374
+ return None
375
+
376
+
377
+ # get max index of 2d array
378
+ def npmax(array):
379
+ arrayindex = array.argmax(1)
380
+ arrayvalue = array.max(1)
381
+ i = arrayvalue.argmax()
382
+ j = arrayindex[i]
383
+ return i, j
controlnet_aux/src/controlnet_aux/util.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random
3
+
4
+ import cv2
5
+ import numpy as np
6
+ import torch
7
+
8
+ annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')
9
+
10
+
11
+ def HWC3(x):
12
+ assert x.dtype == np.uint8
13
+ if x.ndim == 2:
14
+ x = x[:, :, None]
15
+ assert x.ndim == 3
16
+ H, W, C = x.shape
17
+ assert C == 1 or C == 3 or C == 4
18
+ if C == 3:
19
+ return x
20
+ if C == 1:
21
+ return np.concatenate([x, x, x], axis=2)
22
+ if C == 4:
23
+ color = x[:, :, 0:3].astype(np.float32)
24
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
25
+ y = color * alpha + 255.0 * (1.0 - alpha)
26
+ y = y.clip(0, 255).astype(np.uint8)
27
+ return y
28
+
29
+
30
+ def make_noise_disk(H, W, C, F):
31
+ noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
32
+ noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
33
+ noise = noise[F: F + H, F: F + W]
34
+ noise -= np.min(noise)
35
+ noise /= np.max(noise)
36
+ if C == 1:
37
+ noise = noise[:, :, None]
38
+ return noise
39
+
40
+
41
+ def nms(x, t, s):
42
+ x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
43
+
44
+ f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
45
+ f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
46
+ f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
47
+ f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
48
+
49
+ y = np.zeros_like(x)
50
+
51
+ for f in [f1, f2, f3, f4]:
52
+ np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
53
+
54
+ z = np.zeros_like(y, dtype=np.uint8)
55
+ z[y > t] = 255
56
+ return z
57
+
58
+ def min_max_norm(x):
59
+ x -= np.min(x)
60
+ x /= np.maximum(np.max(x), 1e-5)
61
+ return x
62
+
63
+
64
+ def safe_step(x, step=2):
65
+ y = x.astype(np.float32) * float(step + 1)
66
+ y = y.astype(np.int32).astype(np.float32) / float(step)
67
+ return y
68
+
69
+
70
+ def img2mask(img, H, W, low=10, high=90):
71
+ assert img.ndim == 3 or img.ndim == 2
72
+ assert img.dtype == np.uint8
73
+
74
+ if img.ndim == 3:
75
+ y = img[:, :, random.randrange(0, img.shape[2])]
76
+ else:
77
+ y = img
78
+
79
+ y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC)
80
+
81
+ if random.uniform(0, 1) < 0.5:
82
+ y = 255 - y
83
+
84
+ return y < np.percentile(y, random.randrange(low, high))
85
+
86
+
87
+ def resize_image(input_image, resolution):
88
+ H, W, C = input_image.shape
89
+ H = float(H)
90
+ W = float(W)
91
+ k = float(resolution) / min(H, W)
92
+ H *= k
93
+ W *= k
94
+ H = int(np.round(H / 64.0)) * 64
95
+ W = int(np.round(W / 64.0)) * 64
96
+ img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
97
+ return img
98
+
99
+
100
+ def torch_gc():
101
+ if torch.cuda.is_available():
102
+ torch.cuda.empty_cache()
103
+ torch.cuda.ipc_collect()
104
+
105
+
106
+ def ade_palette():
107
+ """ADE20K palette that maps each class to RGB values."""
108
+ return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
109
+ [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
110
+ [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
111
+ [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
112
+ [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
113
+ [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
114
+ [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
115
+ [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
116
+ [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
117
+ [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
118
+ [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
119
+ [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
120
+ [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
121
+ [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
122
+ [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
123
+ [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
124
+ [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
125
+ [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
126
+ [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
127
+ [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
128
+ [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
129
+ [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
130
+ [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
131
+ [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
132
+ [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
133
+ [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
134
+ [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
135
+ [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
136
+ [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
137
+ [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
138
+ [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
139
+ [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
140
+ [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
141
+ [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
142
+ [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
143
+ [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
144
+ [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
145
+ [102, 255, 0], [92, 0, 255]]
146
+
controlnet_aux/tests/test_controlnet_aux.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from io import BytesIO
4
+
5
+ import numpy as np
6
+ import pytest
7
+ import requests
8
+ from PIL import Image
9
+
10
+ from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,
11
+ LeresDetector, LineartAnimeDetector,
12
+ LineartDetector, MediapipeFaceDetector,
13
+ MidasDetector, MLSDdetector, NormalBaeDetector,
14
+ OpenposeDetector, PidiNetDetector, SamDetector,
15
+ ZoeDetector, DWposeDetector)
16
+
17
+ OUTPUT_DIR = "tests/outputs"
18
+
19
+ def output(name, img):
20
+ img.save(os.path.join(OUTPUT_DIR, "{:s}.png".format(name)))
21
+
22
+ def common(name, processor, img):
23
+ output(name, processor(img))
24
+ output(name + "_pil_np", Image.fromarray(processor(img, output_type="np")))
25
+ output(name + "_np_np", Image.fromarray(processor(np.array(img, dtype=np.uint8), output_type="np")))
26
+ output(name + "_np_pil", processor(np.array(img, dtype=np.uint8), output_type="pil"))
27
+ output(name + "_scaled", processor(img, detect_resolution=640, image_resolution=768))
28
+
29
+ def return_pil(name, processor, img):
30
+ output(name + "_pil_false", Image.fromarray(processor(img, return_pil=False)))
31
+ output(name + "_pil_true", processor(img, return_pil=True))
32
+
33
+ @pytest.fixture(scope="module")
34
+ def img():
35
+ if os.path.exists(OUTPUT_DIR):
36
+ shutil.rmtree(OUTPUT_DIR)
37
+ os.mkdir(OUTPUT_DIR)
38
+ url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
39
+ response = requests.get(url)
40
+ img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
41
+ return img
42
+
43
+ def test_canny(img):
44
+ canny = CannyDetector()
45
+ common("canny", canny, img)
46
+ output("canny_img", canny(img=img))
47
+
48
+ def test_hed(img):
49
+ hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
50
+ common("hed", hed, img)
51
+ return_pil("hed", hed, img)
52
+ output("hed_safe", hed(img, safe=True))
53
+ output("hed_scribble", hed(img, scribble=True))
54
+
55
+ def test_leres(img):
56
+ leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
57
+ common("leres", leres, img)
58
+ output("leres_boost", leres(img, boost=True))
59
+
60
+ def test_lineart(img):
61
+ lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
62
+ common("lineart", lineart, img)
63
+ return_pil("lineart", lineart, img)
64
+ output("lineart_coarse", lineart(img, coarse=True))
65
+
66
+ def test_lineart_anime(img):
67
+ lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
68
+ common("lineart_anime", lineart_anime, img)
69
+ return_pil("lineart_anime", lineart_anime, img)
70
+
71
+ def test_mediapipe_face(img):
72
+ mediapipe = MediapipeFaceDetector()
73
+ common("mediapipe", mediapipe, img)
74
+ output("mediapipe_image", mediapipe(image=img))
75
+
76
+ def test_midas(img):
77
+ midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
78
+ common("midas", midas, img)
79
+ output("midas_normal", midas(img, depth_and_normal=True)[1])
80
+
81
+ def test_mlsd(img):
82
+ mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
83
+ common("mlsd", mlsd, img)
84
+ return_pil("mlsd", mlsd, img)
85
+
86
+ def test_normalbae(img):
87
+ normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
88
+ common("normal_bae", normal_bae, img)
89
+ return_pil("normal_bae", normal_bae, img)
90
+
91
+ def test_openpose(img):
92
+ openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
93
+ common("openpose", openpose, img)
94
+ return_pil("openpose", openpose, img)
95
+ output("openpose_hand_and_face_false", openpose(img, hand_and_face=False))
96
+ output("openpose_hand_and_face_true", openpose(img, hand_and_face=True))
97
+ output("openpose_face", openpose(img, include_body=True, include_hand=False, include_face=True))
98
+ output("openpose_faceonly", openpose(img, include_body=False, include_hand=False, include_face=True))
99
+ output("openpose_full", openpose(img, include_body=True, include_hand=True, include_face=True))
100
+ output("openpose_hand", openpose(img, include_body=True, include_hand=True, include_face=False))
101
+
102
+ def test_pidi(img):
103
+ pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
104
+ common("pidi", pidi, img)
105
+ return_pil("pidi", pidi, img)
106
+ output("pidi_safe", pidi(img, safe=True))
107
+ output("pidi_scribble", pidi(img, scribble=True))
108
+
109
+ def test_sam(img):
110
+ sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
111
+ common("sam", sam, img)
112
+ output("sam_image", sam(image=img))
113
+
114
+ def test_shuffle(img):
115
+ shuffle = ContentShuffleDetector()
116
+ common("shuffle", shuffle, img)
117
+ return_pil("shuffle", shuffle, img)
118
+
119
+ def test_zoe(img):
120
+ zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
121
+ common("zoe", zoe, img)
122
+
123
+ def test_dwpose(img):
124
+ dwpose = DWposeDetector()
125
+ common("dwpose", dwpose, img)
126
+ return_pil("dwpose", dwpose, img)