Spaces:
Running
on
Zero
Running
on
Zero
wanghaofan
commited on
Commit
•
60198b1
1
Parent(s):
18b78ec
Upload 20 files
Browse files- controlnet_aux/.gitignore +178 -0
- controlnet_aux/LICENSE.txt +201 -0
- controlnet_aux/README.md +127 -0
- controlnet_aux/setup.py +233 -0
- controlnet_aux/src/controlnet_aux.egg-info/PKG-INFO +163 -0
- controlnet_aux/src/controlnet_aux.egg-info/SOURCES.txt +166 -0
- controlnet_aux/src/controlnet_aux.egg-info/dependency_links.txt +1 -0
- controlnet_aux/src/controlnet_aux.egg-info/requires.txt +12 -0
- controlnet_aux/src/controlnet_aux.egg-info/top_level.txt +1 -0
- controlnet_aux/src/controlnet_aux/__init__.py +5 -0
- controlnet_aux/src/controlnet_aux/canny/__init__.py +36 -0
- controlnet_aux/src/controlnet_aux/open_pose/LICENSE +108 -0
- controlnet_aux/src/controlnet_aux/open_pose/__init__.py +234 -0
- controlnet_aux/src/controlnet_aux/open_pose/body.py +260 -0
- controlnet_aux/src/controlnet_aux/open_pose/face.py +364 -0
- controlnet_aux/src/controlnet_aux/open_pose/hand.py +90 -0
- controlnet_aux/src/controlnet_aux/open_pose/model.py +217 -0
- controlnet_aux/src/controlnet_aux/open_pose/util.py +383 -0
- controlnet_aux/src/controlnet_aux/util.py +146 -0
- controlnet_aux/tests/test_controlnet_aux.py +126 -0
controlnet_aux/.gitignore
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Initially taken from Github's Python gitignore file
|
2 |
+
|
3 |
+
# Byte-compiled / optimized / DLL files
|
4 |
+
__pycache__/
|
5 |
+
*.py[cod]
|
6 |
+
*$py.class
|
7 |
+
|
8 |
+
# C extensions
|
9 |
+
*.so
|
10 |
+
|
11 |
+
# tests and logs
|
12 |
+
tests/fixtures/cached_*_text.txt
|
13 |
+
logs/
|
14 |
+
lightning_logs/
|
15 |
+
lang_code_data/
|
16 |
+
tests/outputs
|
17 |
+
|
18 |
+
# Distribution / packaging
|
19 |
+
.Python
|
20 |
+
build/
|
21 |
+
develop-eggs/
|
22 |
+
dist/
|
23 |
+
downloads/
|
24 |
+
eggs/
|
25 |
+
.eggs/
|
26 |
+
lib/
|
27 |
+
lib64/
|
28 |
+
parts/
|
29 |
+
sdist/
|
30 |
+
var/
|
31 |
+
wheels/
|
32 |
+
*.egg-info/
|
33 |
+
.installed.cfg
|
34 |
+
*.egg
|
35 |
+
MANIFEST
|
36 |
+
|
37 |
+
# PyInstaller
|
38 |
+
# Usually these files are written by a python script from a template
|
39 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
40 |
+
*.manifest
|
41 |
+
*.spec
|
42 |
+
|
43 |
+
# Installer logs
|
44 |
+
pip-log.txt
|
45 |
+
pip-delete-this-directory.txt
|
46 |
+
|
47 |
+
# Unit test / coverage reports
|
48 |
+
htmlcov/
|
49 |
+
.tox/
|
50 |
+
.nox/
|
51 |
+
.coverage
|
52 |
+
.coverage.*
|
53 |
+
.cache
|
54 |
+
nosetests.xml
|
55 |
+
coverage.xml
|
56 |
+
*.cover
|
57 |
+
.hypothesis/
|
58 |
+
.pytest_cache/
|
59 |
+
|
60 |
+
# Translations
|
61 |
+
*.mo
|
62 |
+
*.pot
|
63 |
+
|
64 |
+
# Django stuff:
|
65 |
+
*.log
|
66 |
+
local_settings.py
|
67 |
+
db.sqlite3
|
68 |
+
|
69 |
+
# Flask stuff:
|
70 |
+
instance/
|
71 |
+
.webassets-cache
|
72 |
+
|
73 |
+
# Scrapy stuff:
|
74 |
+
.scrapy
|
75 |
+
|
76 |
+
# Sphinx documentation
|
77 |
+
docs/_build/
|
78 |
+
|
79 |
+
# PyBuilder
|
80 |
+
target/
|
81 |
+
|
82 |
+
# Jupyter Notebook
|
83 |
+
.ipynb_checkpoints
|
84 |
+
|
85 |
+
# IPython
|
86 |
+
profile_default/
|
87 |
+
ipython_config.py
|
88 |
+
|
89 |
+
# pyenv
|
90 |
+
.python-version
|
91 |
+
|
92 |
+
# celery beat schedule file
|
93 |
+
celerybeat-schedule
|
94 |
+
|
95 |
+
# SageMath parsed files
|
96 |
+
*.sage.py
|
97 |
+
|
98 |
+
# Environments
|
99 |
+
.env
|
100 |
+
.venv
|
101 |
+
env/
|
102 |
+
venv/
|
103 |
+
ENV/
|
104 |
+
env.bak/
|
105 |
+
venv.bak/
|
106 |
+
|
107 |
+
# Spyder project settings
|
108 |
+
.spyderproject
|
109 |
+
.spyproject
|
110 |
+
|
111 |
+
# Rope project settings
|
112 |
+
.ropeproject
|
113 |
+
|
114 |
+
# mkdocs documentation
|
115 |
+
/site
|
116 |
+
|
117 |
+
# mypy
|
118 |
+
.mypy_cache/
|
119 |
+
.dmypy.json
|
120 |
+
dmypy.json
|
121 |
+
|
122 |
+
# Pyre type checker
|
123 |
+
.pyre/
|
124 |
+
|
125 |
+
# vscode
|
126 |
+
.vs
|
127 |
+
.vscode
|
128 |
+
|
129 |
+
# Pycharm
|
130 |
+
.idea
|
131 |
+
|
132 |
+
# TF code
|
133 |
+
tensorflow_code
|
134 |
+
|
135 |
+
# Models
|
136 |
+
proc_data
|
137 |
+
|
138 |
+
# examples
|
139 |
+
runs
|
140 |
+
/runs_old
|
141 |
+
/wandb
|
142 |
+
/examples/runs
|
143 |
+
/examples/**/*.args
|
144 |
+
/examples/rag/sweep
|
145 |
+
|
146 |
+
# data
|
147 |
+
/data
|
148 |
+
serialization_dir
|
149 |
+
|
150 |
+
# emacs
|
151 |
+
*.*~
|
152 |
+
debug.env
|
153 |
+
|
154 |
+
# vim
|
155 |
+
.*.swp
|
156 |
+
|
157 |
+
#ctags
|
158 |
+
tags
|
159 |
+
|
160 |
+
# pre-commit
|
161 |
+
.pre-commit*
|
162 |
+
|
163 |
+
# .lock
|
164 |
+
*.lock
|
165 |
+
|
166 |
+
# DS_Store (MacOS)
|
167 |
+
.DS_Store
|
168 |
+
# RL pipelines may produce mp4 outputs
|
169 |
+
*.mp4
|
170 |
+
|
171 |
+
# dependencies
|
172 |
+
/transformers
|
173 |
+
|
174 |
+
# ruff
|
175 |
+
.ruff_cache
|
176 |
+
|
177 |
+
wandb
|
178 |
+
|
controlnet_aux/LICENSE.txt
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
controlnet_aux/README.md
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ControlNet auxiliary models
|
2 |
+
|
3 |
+
This is a PyPi installable package of [lllyasviel's ControlNet Annotators](https://github.com/lllyasviel/ControlNet/tree/main/annotator)
|
4 |
+
|
5 |
+
The code is copy-pasted from the respective folders in <https://github.com/lllyasviel/ControlNet/tree/main/annotator> and connected to [the 🤗 Hub](https://huggingface.co/lllyasviel/Annotators).
|
6 |
+
|
7 |
+
All credit & copyright goes to <https://github.com/lllyasviel> .
|
8 |
+
|
9 |
+
## Install
|
10 |
+
|
11 |
+
```
|
12 |
+
pip install -U controlnet-aux
|
13 |
+
```
|
14 |
+
|
15 |
+
To support DWPose which is dependent on MMDetection, MMCV and MMPose
|
16 |
+
|
17 |
+
```
|
18 |
+
pip install -U openmim
|
19 |
+
mim install mmengine
|
20 |
+
mim install "mmcv>=2.0.1"
|
21 |
+
mim install "mmdet>=3.1.0"
|
22 |
+
mim install "mmpose>=1.1.0"
|
23 |
+
```
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
You can use the processor class, which can load each of the auxiliary models with the following code
|
28 |
+
|
29 |
+
```python
|
30 |
+
import requests
|
31 |
+
from PIL import Image
|
32 |
+
from io import BytesIO
|
33 |
+
|
34 |
+
from controlnet_aux.processor import Processor
|
35 |
+
|
36 |
+
# load image
|
37 |
+
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
|
38 |
+
|
39 |
+
response = requests.get(url)
|
40 |
+
img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
|
41 |
+
|
42 |
+
# load processor from processor_id
|
43 |
+
# options are:
|
44 |
+
# ["canny", "depth_leres", "depth_leres++", "depth_midas", "depth_zoe", "lineart_anime",
|
45 |
+
# "lineart_coarse", "lineart_realistic", "mediapipe_face", "mlsd", "normal_bae", "normal_midas",
|
46 |
+
# "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand",
|
47 |
+
# "scribble_hed, "scribble_pidinet", "shuffle", "softedge_hed", "softedge_hedsafe",
|
48 |
+
# "softedge_pidinet", "softedge_pidsafe", "dwpose"]
|
49 |
+
processor_id = 'scribble_hed'
|
50 |
+
processor = Processor(processor_id)
|
51 |
+
|
52 |
+
processed_image = processor(img, to_pil=True)
|
53 |
+
```
|
54 |
+
|
55 |
+
Each model can be loaded individually by importing and instantiating them as follows
|
56 |
+
|
57 |
+
```python
|
58 |
+
from PIL import Image
|
59 |
+
import requests
|
60 |
+
from io import BytesIO
|
61 |
+
from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
|
62 |
+
|
63 |
+
# load image
|
64 |
+
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
|
65 |
+
|
66 |
+
response = requests.get(url)
|
67 |
+
img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
|
68 |
+
|
69 |
+
# load checkpoints
|
70 |
+
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
71 |
+
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
72 |
+
mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
|
73 |
+
open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
|
74 |
+
pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
|
75 |
+
normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
|
76 |
+
lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
|
77 |
+
lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
|
78 |
+
zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
|
79 |
+
sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
|
80 |
+
mobile_sam = SamDetector.from_pretrained("dhkim2810/MobileSAM", model_type="vit_t", filename="mobile_sam.pt")
|
81 |
+
leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
|
82 |
+
teed = TEEDdetector.from_pretrained("fal-ai/teed", filename="5_model.pth")
|
83 |
+
anyline = AnylineDetector.from_pretrained(
|
84 |
+
"TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
|
85 |
+
)
|
86 |
+
|
87 |
+
# specify configs, ckpts and device, or it will be downloaded automatically and use cpu by default
|
88 |
+
# det_config: ./src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
|
89 |
+
# det_ckpt: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
|
90 |
+
# pose_config: ./src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
|
91 |
+
# pose_ckpt: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth
|
92 |
+
import torch
|
93 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
94 |
+
dwpose = DWposeDetector(det_config=det_config, det_ckpt=det_ckpt, pose_config=pose_config, pose_ckpt=pose_ckpt, device=device)
|
95 |
+
|
96 |
+
# instantiate
|
97 |
+
canny = CannyDetector()
|
98 |
+
content = ContentShuffleDetector()
|
99 |
+
face_detector = MediapipeFaceDetector()
|
100 |
+
lineart_standard = LineartStandardDetector()
|
101 |
+
|
102 |
+
|
103 |
+
# process
|
104 |
+
processed_image_hed = hed(img)
|
105 |
+
processed_image_midas = midas(img)
|
106 |
+
processed_image_mlsd = mlsd(img)
|
107 |
+
processed_image_open_pose = open_pose(img, hand_and_face=True)
|
108 |
+
processed_image_pidi = pidi(img, safe=True)
|
109 |
+
processed_image_normal_bae = normal_bae(img)
|
110 |
+
processed_image_lineart = lineart(img, coarse=True)
|
111 |
+
processed_image_lineart_anime = lineart_anime(img)
|
112 |
+
processed_image_zoe = zoe(img)
|
113 |
+
processed_image_sam = sam(img)
|
114 |
+
processed_image_leres = leres(img)
|
115 |
+
processed_image_teed = teed(img, detect_resolution=1024)
|
116 |
+
processed_image_anyline = anyline(img, detect_resolution=1280)
|
117 |
+
|
118 |
+
processed_image_canny = canny(img)
|
119 |
+
processed_image_content = content(img)
|
120 |
+
processed_image_mediapipe_face = face_detector(img)
|
121 |
+
processed_image_dwpose = dwpose(img)
|
122 |
+
processed_image_lineart_standard = lineart_standard(img, detect_resolution=1024)
|
123 |
+
```
|
124 |
+
|
125 |
+
### Image resolution
|
126 |
+
|
127 |
+
In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
|
controlnet_aux/setup.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2023 The HuggingFace Team. All rights reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""
|
16 |
+
Simple check list from AllenNLP repo: https://github.com/allenai/allennlp/blob/main/setup.py
|
17 |
+
|
18 |
+
To create the package for pypi.
|
19 |
+
|
20 |
+
1. Run `make pre-release` (or `make pre-patch` for a patch release) then run `make fix-copies` to fix the index of the
|
21 |
+
documentation.
|
22 |
+
|
23 |
+
If releasing on a special branch, copy the updated README.md on the main branch for your the commit you will make
|
24 |
+
for the post-release and run `make fix-copies` on the main branch as well.
|
25 |
+
|
26 |
+
2. Run Tests for Amazon Sagemaker. The documentation is located in `./tests/sagemaker/README.md`, otherwise @philschmid.
|
27 |
+
|
28 |
+
3. Unpin specific versions from setup.py that use a git install.
|
29 |
+
|
30 |
+
4. Checkout the release branch (v<RELEASE>-release, for example v4.19-release), and commit these changes with the
|
31 |
+
message: "Release: <RELEASE>" and push.
|
32 |
+
|
33 |
+
5. Wait for the tests on main to be completed and be green (otherwise revert and fix bugs)
|
34 |
+
|
35 |
+
6. Add a tag in git to mark the release: "git tag v<RELEASE> -m 'Adds tag v<RELEASE> for pypi' "
|
36 |
+
Push the tag to git: git push --tags origin v<RELEASE>-release
|
37 |
+
|
38 |
+
7. Build both the sources and the wheel. Do not change anything in setup.py between
|
39 |
+
creating the wheel and the source distribution (obviously).
|
40 |
+
|
41 |
+
For the wheel, run: "python setup.py bdist_wheel" in the top level directory.
|
42 |
+
(this will build a wheel for the python version you use to build it).
|
43 |
+
|
44 |
+
For the sources, run: "python setup.py sdist"
|
45 |
+
You should now have a /dist directory with both .whl and .tar.gz source versions.
|
46 |
+
|
47 |
+
8. Check that everything looks correct by uploading the package to the pypi test server:
|
48 |
+
|
49 |
+
twine upload dist/* -r pypitest
|
50 |
+
(pypi suggest using twine as other methods upload files via plaintext.)
|
51 |
+
You may have to specify the repository url, use the following command then:
|
52 |
+
twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
|
53 |
+
|
54 |
+
Check that you can install it in a virtualenv by running:
|
55 |
+
pip install -i https://testpypi.python.org/pypi diffusers
|
56 |
+
|
57 |
+
Check you can run the following commands:
|
58 |
+
python -c "from diffusers import pipeline; classifier = pipeline('text-classification'); print(classifier('What a nice release'))"
|
59 |
+
python -c "from diffusers import *"
|
60 |
+
|
61 |
+
9. Upload the final version to actual pypi:
|
62 |
+
twine upload dist/* -r pypi
|
63 |
+
|
64 |
+
10. Copy the release notes from RELEASE.md to the tag in github once everything is looking hunky-dory.
|
65 |
+
|
66 |
+
11. Run `make post-release` (or, for a patch release, `make post-patch`). If you were on a branch for the release,
|
67 |
+
you need to go back to main before executing this.
|
68 |
+
"""
|
69 |
+
|
70 |
+
import os
|
71 |
+
import re
|
72 |
+
from distutils.core import Command
|
73 |
+
|
74 |
+
from setuptools import find_packages, setup
|
75 |
+
|
76 |
+
# IMPORTANT:
|
77 |
+
# 1. all dependencies should be listed here with their version requirements if any
|
78 |
+
# 2. once modified, run: `make deps_table_update` to update src/diffusers/dependency_versions_table.py
|
79 |
+
_deps = [
|
80 |
+
"Pillow",
|
81 |
+
"torch",
|
82 |
+
"numpy",
|
83 |
+
"filelock",
|
84 |
+
"importlib_metadata",
|
85 |
+
"opencv-python-headless",
|
86 |
+
"scipy",
|
87 |
+
"huggingface_hub",
|
88 |
+
"einops",
|
89 |
+
"timm<=0.6.7",
|
90 |
+
"torchvision",
|
91 |
+
"scikit-image",
|
92 |
+
]
|
93 |
+
|
94 |
+
# this is a lookup table with items like:
|
95 |
+
#
|
96 |
+
# tokenizers: "huggingface-hub==0.8.0"
|
97 |
+
# packaging: "packaging"
|
98 |
+
#
|
99 |
+
# some of the values are versioned whereas others aren't.
|
100 |
+
deps = {
|
101 |
+
b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)
|
102 |
+
}
|
103 |
+
|
104 |
+
# since we save this data in src/diffusers/dependency_versions_table.py it can be easily accessed from
|
105 |
+
# anywhere. If you need to quickly access the data from this table in a shell, you can do so easily with:
|
106 |
+
#
|
107 |
+
# python -c 'import sys; from diffusers.dependency_versions_table import deps; \
|
108 |
+
# print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets
|
109 |
+
#
|
110 |
+
# Just pass the desired package names to that script as it's shown with 2 packages above.
|
111 |
+
#
|
112 |
+
# If diffusers is not yet installed and the work is done from the cloned repo remember to add `PYTHONPATH=src` to the script above
|
113 |
+
#
|
114 |
+
# You can then feed this for example to `pip`:
|
115 |
+
#
|
116 |
+
# pip install -U $(python -c 'import sys; from diffusers.dependency_versions_table import deps; \
|
117 |
+
# print(" ".join([ deps[x] for x in sys.argv[1:]]))' tokenizers datasets)
|
118 |
+
#
|
119 |
+
|
120 |
+
|
121 |
+
def deps_list(*pkgs):
|
122 |
+
return [deps[pkg] for pkg in pkgs]
|
123 |
+
|
124 |
+
|
125 |
+
class DepsTableUpdateCommand(Command):
|
126 |
+
"""
|
127 |
+
A custom distutils command that updates the dependency table.
|
128 |
+
usage: python setup.py deps_table_update
|
129 |
+
"""
|
130 |
+
|
131 |
+
description = "build runtime dependency table"
|
132 |
+
user_options = [
|
133 |
+
# format: (long option, short option, description).
|
134 |
+
(
|
135 |
+
"dep-table-update",
|
136 |
+
None,
|
137 |
+
"updates src/diffusers/dependency_versions_table.py",
|
138 |
+
),
|
139 |
+
]
|
140 |
+
|
141 |
+
def initialize_options(self):
|
142 |
+
pass
|
143 |
+
|
144 |
+
def finalize_options(self):
|
145 |
+
pass
|
146 |
+
|
147 |
+
def run(self):
|
148 |
+
entries = "\n".join([f' "{k}": "{v}",' for k, v in deps.items()])
|
149 |
+
content = [
|
150 |
+
"# THIS FILE HAS BEEN AUTOGENERATED. To update:",
|
151 |
+
"# 1. modify the `_deps` dict in setup.py",
|
152 |
+
"# 2. run `make deps_table_update``",
|
153 |
+
"deps = {",
|
154 |
+
entries,
|
155 |
+
"}",
|
156 |
+
"",
|
157 |
+
]
|
158 |
+
target = "src/controlnet_aux/dependency_versions_table.py"
|
159 |
+
print(f"updating {target}")
|
160 |
+
with open(target, "w", encoding="utf-8", newline="\n") as f:
|
161 |
+
f.write("\n".join(content))
|
162 |
+
|
163 |
+
|
164 |
+
extras = {}
|
165 |
+
|
166 |
+
install_requires = [
|
167 |
+
deps["torch"],
|
168 |
+
deps["importlib_metadata"],
|
169 |
+
deps["huggingface_hub"],
|
170 |
+
deps["scipy"],
|
171 |
+
deps["opencv-python-headless"],
|
172 |
+
deps["filelock"],
|
173 |
+
deps["numpy"],
|
174 |
+
deps["Pillow"],
|
175 |
+
deps["einops"],
|
176 |
+
deps["torchvision"],
|
177 |
+
deps["timm"],
|
178 |
+
deps["scikit-image"],
|
179 |
+
]
|
180 |
+
|
181 |
+
setup(
|
182 |
+
name="controlnet_aux",
|
183 |
+
version="0.0.9", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
184 |
+
description="Auxillary models for controlnet",
|
185 |
+
long_description=open("README.md", "r", encoding="utf-8").read(),
|
186 |
+
long_description_content_type="text/markdown",
|
187 |
+
keywords="deep learning",
|
188 |
+
license="Apache",
|
189 |
+
author="The HuggingFace team",
|
190 |
+
author_email="[email protected]",
|
191 |
+
url="https://github.com/patrickvonplaten/controlnet_aux",
|
192 |
+
package_dir={"": "src"},
|
193 |
+
packages=find_packages("src"),
|
194 |
+
include_package_data=True,
|
195 |
+
python_requires=">=3.7.0",
|
196 |
+
install_requires=install_requires,
|
197 |
+
extras_require=extras,
|
198 |
+
classifiers=[
|
199 |
+
"Development Status :: 5 - Production/Stable",
|
200 |
+
"Intended Audience :: Developers",
|
201 |
+
"Intended Audience :: Education",
|
202 |
+
"Intended Audience :: Science/Research",
|
203 |
+
"License :: OSI Approved :: Apache Software License",
|
204 |
+
"Operating System :: OS Independent",
|
205 |
+
"Programming Language :: Python :: 3",
|
206 |
+
"Programming Language :: Python :: 3.7",
|
207 |
+
"Programming Language :: Python :: 3.8",
|
208 |
+
"Programming Language :: Python :: 3.9",
|
209 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
210 |
+
],
|
211 |
+
cmdclass={"deps_table_update": DepsTableUpdateCommand},
|
212 |
+
package_data={'controlnet_aux' : ['zoe/zoedepth/models/zoedepth/*.json', 'zoe/zoedepth/models/zoedepth_nk/*.json']}
|
213 |
+
)
|
214 |
+
|
215 |
+
# Release checklist
|
216 |
+
# 1. Change the version in __init__.py and setup.py.
|
217 |
+
# 2. Commit these changes with the message: "Release: Release"
|
218 |
+
# 3. Add a tag in git to mark the release: "git tag RELEASE -m 'Adds tag RELEASE for pypi' "
|
219 |
+
# Push the tag to git: git push --tags origin main
|
220 |
+
# 4. Run the following commands in the top-level directory:
|
221 |
+
# python setup.py bdist_wheel
|
222 |
+
# python setup.py sdist
|
223 |
+
# 5. Upload the package to the pypi test server first:
|
224 |
+
# twine upload dist/* -r pypitest
|
225 |
+
# twine upload dist/* -r pypitest --repository-url=https://test.pypi.org/legacy/
|
226 |
+
# 6. Check that you can install it in a virtualenv by running:
|
227 |
+
# pip install -i https://testpypi.python.org/pypi diffusers
|
228 |
+
# diffusers env
|
229 |
+
# diffusers test
|
230 |
+
# 7. Upload the final version to actual pypi:
|
231 |
+
# twine upload dist/* -r pypi
|
232 |
+
# 8. Add release notes to the tag in github once everything is looking hunky-dory.
|
233 |
+
# 9. Update the version in __init__.py, setup.py to the new version "-dev" and push to master
|
controlnet_aux/src/controlnet_aux.egg-info/PKG-INFO
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: controlnet_aux
|
3 |
+
Version: 0.0.9
|
4 |
+
Summary: Auxillary models for controlnet
|
5 |
+
Home-page: https://github.com/patrickvonplaten/controlnet_aux
|
6 |
+
Author: The HuggingFace team
|
7 |
+
Author-email: [email protected]
|
8 |
+
License: Apache
|
9 |
+
Keywords: deep learning
|
10 |
+
Classifier: Development Status :: 5 - Production/Stable
|
11 |
+
Classifier: Intended Audience :: Developers
|
12 |
+
Classifier: Intended Audience :: Education
|
13 |
+
Classifier: Intended Audience :: Science/Research
|
14 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
15 |
+
Classifier: Operating System :: OS Independent
|
16 |
+
Classifier: Programming Language :: Python :: 3
|
17 |
+
Classifier: Programming Language :: Python :: 3.7
|
18 |
+
Classifier: Programming Language :: Python :: 3.8
|
19 |
+
Classifier: Programming Language :: Python :: 3.9
|
20 |
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
21 |
+
Requires-Python: >=3.7.0
|
22 |
+
Description-Content-Type: text/markdown
|
23 |
+
License-File: LICENSE.txt
|
24 |
+
Requires-Dist: torch
|
25 |
+
Requires-Dist: importlib_metadata
|
26 |
+
Requires-Dist: huggingface_hub
|
27 |
+
Requires-Dist: scipy
|
28 |
+
Requires-Dist: opencv-python-headless
|
29 |
+
Requires-Dist: filelock
|
30 |
+
Requires-Dist: numpy
|
31 |
+
Requires-Dist: Pillow
|
32 |
+
Requires-Dist: einops
|
33 |
+
Requires-Dist: torchvision
|
34 |
+
Requires-Dist: timm<=0.6.7
|
35 |
+
Requires-Dist: scikit-image
|
36 |
+
|
37 |
+
# ControlNet auxiliary models
|
38 |
+
|
39 |
+
This is a PyPi installable package of [lllyasviel's ControlNet Annotators](https://github.com/lllyasviel/ControlNet/tree/main/annotator)
|
40 |
+
|
41 |
+
The code is copy-pasted from the respective folders in <https://github.com/lllyasviel/ControlNet/tree/main/annotator> and connected to [the 🤗 Hub](https://huggingface.co/lllyasviel/Annotators).
|
42 |
+
|
43 |
+
All credit & copyright goes to <https://github.com/lllyasviel> .
|
44 |
+
|
45 |
+
## Install
|
46 |
+
|
47 |
+
```
|
48 |
+
pip install -U controlnet-aux
|
49 |
+
```
|
50 |
+
|
51 |
+
To support DWPose which is dependent on MMDetection, MMCV and MMPose
|
52 |
+
|
53 |
+
```
|
54 |
+
pip install -U openmim
|
55 |
+
mim install mmengine
|
56 |
+
mim install "mmcv>=2.0.1"
|
57 |
+
mim install "mmdet>=3.1.0"
|
58 |
+
mim install "mmpose>=1.1.0"
|
59 |
+
```
|
60 |
+
|
61 |
+
## Usage
|
62 |
+
|
63 |
+
You can use the processor class, which can load each of the auxiliary models with the following code
|
64 |
+
|
65 |
+
```python
|
66 |
+
import requests
|
67 |
+
from PIL import Image
|
68 |
+
from io import BytesIO
|
69 |
+
|
70 |
+
from controlnet_aux.processor import Processor
|
71 |
+
|
72 |
+
# load image
|
73 |
+
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
|
74 |
+
|
75 |
+
response = requests.get(url)
|
76 |
+
img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
|
77 |
+
|
78 |
+
# load processor from processor_id
|
79 |
+
# options are:
|
80 |
+
# ["canny", "depth_leres", "depth_leres++", "depth_midas", "depth_zoe", "lineart_anime",
|
81 |
+
# "lineart_coarse", "lineart_realistic", "mediapipe_face", "mlsd", "normal_bae", "normal_midas",
|
82 |
+
# "openpose", "openpose_face", "openpose_faceonly", "openpose_full", "openpose_hand",
|
83 |
+
# "scribble_hed, "scribble_pidinet", "shuffle", "softedge_hed", "softedge_hedsafe",
|
84 |
+
# "softedge_pidinet", "softedge_pidsafe", "dwpose"]
|
85 |
+
processor_id = 'scribble_hed'
|
86 |
+
processor = Processor(processor_id)
|
87 |
+
|
88 |
+
processed_image = processor(img, to_pil=True)
|
89 |
+
```
|
90 |
+
|
91 |
+
Each model can be loaded individually by importing and instantiating them as follows
|
92 |
+
|
93 |
+
```python
|
94 |
+
from PIL import Image
|
95 |
+
import requests
|
96 |
+
from io import BytesIO
|
97 |
+
from controlnet_aux import HEDdetector, MidasDetector, MLSDdetector, OpenposeDetector, PidiNetDetector, NormalBaeDetector, LineartDetector, LineartAnimeDetector, CannyDetector, ContentShuffleDetector, ZoeDetector, MediapipeFaceDetector, SamDetector, LeresDetector, DWposeDetector
|
98 |
+
|
99 |
+
# load image
|
100 |
+
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
|
101 |
+
|
102 |
+
response = requests.get(url)
|
103 |
+
img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
|
104 |
+
|
105 |
+
# load checkpoints
|
106 |
+
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
107 |
+
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
108 |
+
mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
|
109 |
+
open_pose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
|
110 |
+
pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
|
111 |
+
normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
|
112 |
+
lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
|
113 |
+
lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
|
114 |
+
zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
|
115 |
+
sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
|
116 |
+
mobile_sam = SamDetector.from_pretrained("dhkim2810/MobileSAM", model_type="vit_t", filename="mobile_sam.pt")
|
117 |
+
leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
|
118 |
+
teed = TEEDdetector.from_pretrained("fal-ai/teed", filename="5_model.pth")
|
119 |
+
anyline = AnylineDetector.from_pretrained(
|
120 |
+
"TheMistoAI/MistoLine", filename="MTEED.pth", subfolder="Anyline"
|
121 |
+
)
|
122 |
+
|
123 |
+
# specify configs, ckpts and device, or it will be downloaded automatically and use cpu by default
|
124 |
+
# det_config: ./src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
|
125 |
+
# det_ckpt: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth
|
126 |
+
# pose_config: ./src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
|
127 |
+
# pose_ckpt: https://huggingface.co/wanghaofan/dw-ll_ucoco_384/resolve/main/dw-ll_ucoco_384.pth
|
128 |
+
import torch
|
129 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
130 |
+
dwpose = DWposeDetector(det_config=det_config, det_ckpt=det_ckpt, pose_config=pose_config, pose_ckpt=pose_ckpt, device=device)
|
131 |
+
|
132 |
+
# instantiate
|
133 |
+
canny = CannyDetector()
|
134 |
+
content = ContentShuffleDetector()
|
135 |
+
face_detector = MediapipeFaceDetector()
|
136 |
+
lineart_standard = LineartStandardDetector()
|
137 |
+
|
138 |
+
|
139 |
+
# process
|
140 |
+
processed_image_hed = hed(img)
|
141 |
+
processed_image_midas = midas(img)
|
142 |
+
processed_image_mlsd = mlsd(img)
|
143 |
+
processed_image_open_pose = open_pose(img, hand_and_face=True)
|
144 |
+
processed_image_pidi = pidi(img, safe=True)
|
145 |
+
processed_image_normal_bae = normal_bae(img)
|
146 |
+
processed_image_lineart = lineart(img, coarse=True)
|
147 |
+
processed_image_lineart_anime = lineart_anime(img)
|
148 |
+
processed_image_zoe = zoe(img)
|
149 |
+
processed_image_sam = sam(img)
|
150 |
+
processed_image_leres = leres(img)
|
151 |
+
processed_image_teed = teed(img, detect_resolution=1024)
|
152 |
+
processed_image_anyline = anyline(img, detect_resolution=1280)
|
153 |
+
|
154 |
+
processed_image_canny = canny(img)
|
155 |
+
processed_image_content = content(img)
|
156 |
+
processed_image_mediapipe_face = face_detector(img)
|
157 |
+
processed_image_dwpose = dwpose(img)
|
158 |
+
processed_image_lineart_standard = lineart_standard(img, detect_resolution=1024)
|
159 |
+
```
|
160 |
+
|
161 |
+
### Image resolution
|
162 |
+
|
163 |
+
In order to maintain the image aspect ratio, `detect_resolution`, `image_resolution` and images sizes need to be using multiple of `64`.
|
controlnet_aux/src/controlnet_aux.egg-info/SOURCES.txt
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LICENSE.txt
|
2 |
+
README.md
|
3 |
+
setup.py
|
4 |
+
src/controlnet_aux/__init__.py
|
5 |
+
src/controlnet_aux/processor.py
|
6 |
+
src/controlnet_aux/util.py
|
7 |
+
src/controlnet_aux.egg-info/PKG-INFO
|
8 |
+
src/controlnet_aux.egg-info/SOURCES.txt
|
9 |
+
src/controlnet_aux.egg-info/dependency_links.txt
|
10 |
+
src/controlnet_aux.egg-info/requires.txt
|
11 |
+
src/controlnet_aux.egg-info/top_level.txt
|
12 |
+
src/controlnet_aux/anyline/__init__.py
|
13 |
+
src/controlnet_aux/canny/__init__.py
|
14 |
+
src/controlnet_aux/dwpose/__init__.py
|
15 |
+
src/controlnet_aux/dwpose/util.py
|
16 |
+
src/controlnet_aux/dwpose/wholebody.py
|
17 |
+
src/controlnet_aux/dwpose/dwpose_config/__init__.py
|
18 |
+
src/controlnet_aux/dwpose/dwpose_config/dwpose-l_384x288.py
|
19 |
+
src/controlnet_aux/dwpose/yolox_config/__init__.py
|
20 |
+
src/controlnet_aux/dwpose/yolox_config/yolox_l_8xb8-300e_coco.py
|
21 |
+
src/controlnet_aux/hed/__init__.py
|
22 |
+
src/controlnet_aux/leres/__init__.py
|
23 |
+
src/controlnet_aux/leres/leres/Resnet.py
|
24 |
+
src/controlnet_aux/leres/leres/Resnext_torch.py
|
25 |
+
src/controlnet_aux/leres/leres/__init__.py
|
26 |
+
src/controlnet_aux/leres/leres/depthmap.py
|
27 |
+
src/controlnet_aux/leres/leres/multi_depth_model_woauxi.py
|
28 |
+
src/controlnet_aux/leres/leres/net_tools.py
|
29 |
+
src/controlnet_aux/leres/leres/network_auxi.py
|
30 |
+
src/controlnet_aux/leres/pix2pix/__init__.py
|
31 |
+
src/controlnet_aux/leres/pix2pix/models/__init__.py
|
32 |
+
src/controlnet_aux/leres/pix2pix/models/base_model.py
|
33 |
+
src/controlnet_aux/leres/pix2pix/models/base_model_hg.py
|
34 |
+
src/controlnet_aux/leres/pix2pix/models/networks.py
|
35 |
+
src/controlnet_aux/leres/pix2pix/models/pix2pix4depth_model.py
|
36 |
+
src/controlnet_aux/leres/pix2pix/options/__init__.py
|
37 |
+
src/controlnet_aux/leres/pix2pix/options/base_options.py
|
38 |
+
src/controlnet_aux/leres/pix2pix/options/test_options.py
|
39 |
+
src/controlnet_aux/leres/pix2pix/util/__init__.py
|
40 |
+
src/controlnet_aux/leres/pix2pix/util/util.py
|
41 |
+
src/controlnet_aux/lineart/__init__.py
|
42 |
+
src/controlnet_aux/lineart_anime/__init__.py
|
43 |
+
src/controlnet_aux/lineart_standard/__init__.py
|
44 |
+
src/controlnet_aux/mediapipe_face/__init__.py
|
45 |
+
src/controlnet_aux/mediapipe_face/mediapipe_face_common.py
|
46 |
+
src/controlnet_aux/midas/__init__.py
|
47 |
+
src/controlnet_aux/midas/api.py
|
48 |
+
src/controlnet_aux/midas/utils.py
|
49 |
+
src/controlnet_aux/midas/midas/__init__.py
|
50 |
+
src/controlnet_aux/midas/midas/base_model.py
|
51 |
+
src/controlnet_aux/midas/midas/blocks.py
|
52 |
+
src/controlnet_aux/midas/midas/dpt_depth.py
|
53 |
+
src/controlnet_aux/midas/midas/midas_net.py
|
54 |
+
src/controlnet_aux/midas/midas/midas_net_custom.py
|
55 |
+
src/controlnet_aux/midas/midas/transforms.py
|
56 |
+
src/controlnet_aux/midas/midas/vit.py
|
57 |
+
src/controlnet_aux/mlsd/__init__.py
|
58 |
+
src/controlnet_aux/mlsd/utils.py
|
59 |
+
src/controlnet_aux/mlsd/models/__init__.py
|
60 |
+
src/controlnet_aux/mlsd/models/mbv2_mlsd_large.py
|
61 |
+
src/controlnet_aux/mlsd/models/mbv2_mlsd_tiny.py
|
62 |
+
src/controlnet_aux/normalbae/__init__.py
|
63 |
+
src/controlnet_aux/normalbae/nets/NNET.py
|
64 |
+
src/controlnet_aux/normalbae/nets/__init__.py
|
65 |
+
src/controlnet_aux/normalbae/nets/baseline.py
|
66 |
+
src/controlnet_aux/normalbae/nets/submodules/__init__.py
|
67 |
+
src/controlnet_aux/normalbae/nets/submodules/decoder.py
|
68 |
+
src/controlnet_aux/normalbae/nets/submodules/encoder.py
|
69 |
+
src/controlnet_aux/normalbae/nets/submodules/submodules.py
|
70 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/__init__.py
|
71 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/caffe2_benchmark.py
|
72 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/caffe2_validate.py
|
73 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/hubconf.py
|
74 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_export.py
|
75 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_optimize.py
|
76 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_to_caffe.py
|
77 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/onnx_validate.py
|
78 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/setup.py
|
79 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/utils.py
|
80 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/validate.py
|
81 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/__init__.py
|
82 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/config.py
|
83 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/conv2d_layers.py
|
84 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/efficientnet_builder.py
|
85 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/gen_efficientnet.py
|
86 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/helpers.py
|
87 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/mobilenetv3.py
|
88 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/model_factory.py
|
89 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/version.py
|
90 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/__init__.py
|
91 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations.py
|
92 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations_jit.py
|
93 |
+
src/controlnet_aux/normalbae/nets/submodules/efficientnet_repo/geffnet/activations/activations_me.py
|
94 |
+
src/controlnet_aux/open_pose/__init__.py
|
95 |
+
src/controlnet_aux/open_pose/body.py
|
96 |
+
src/controlnet_aux/open_pose/face.py
|
97 |
+
src/controlnet_aux/open_pose/hand.py
|
98 |
+
src/controlnet_aux/open_pose/model.py
|
99 |
+
src/controlnet_aux/open_pose/util.py
|
100 |
+
src/controlnet_aux/pidi/__init__.py
|
101 |
+
src/controlnet_aux/pidi/model.py
|
102 |
+
src/controlnet_aux/segment_anything/__init__.py
|
103 |
+
src/controlnet_aux/segment_anything/automatic_mask_generator.py
|
104 |
+
src/controlnet_aux/segment_anything/build_sam.py
|
105 |
+
src/controlnet_aux/segment_anything/predictor.py
|
106 |
+
src/controlnet_aux/segment_anything/modeling/__init__.py
|
107 |
+
src/controlnet_aux/segment_anything/modeling/common.py
|
108 |
+
src/controlnet_aux/segment_anything/modeling/image_encoder.py
|
109 |
+
src/controlnet_aux/segment_anything/modeling/mask_decoder.py
|
110 |
+
src/controlnet_aux/segment_anything/modeling/prompt_encoder.py
|
111 |
+
src/controlnet_aux/segment_anything/modeling/sam.py
|
112 |
+
src/controlnet_aux/segment_anything/modeling/tiny_vit_sam.py
|
113 |
+
src/controlnet_aux/segment_anything/modeling/transformer.py
|
114 |
+
src/controlnet_aux/segment_anything/utils/__init__.py
|
115 |
+
src/controlnet_aux/segment_anything/utils/amg.py
|
116 |
+
src/controlnet_aux/segment_anything/utils/onnx.py
|
117 |
+
src/controlnet_aux/segment_anything/utils/transforms.py
|
118 |
+
src/controlnet_aux/shuffle/__init__.py
|
119 |
+
src/controlnet_aux/teed/Fsmish.py
|
120 |
+
src/controlnet_aux/teed/Xsmish.py
|
121 |
+
src/controlnet_aux/teed/__init__.py
|
122 |
+
src/controlnet_aux/teed/ted.py
|
123 |
+
src/controlnet_aux/zoe/__init__.py
|
124 |
+
src/controlnet_aux/zoe/zoedepth/__init__.py
|
125 |
+
src/controlnet_aux/zoe/zoedepth/models/__init__.py
|
126 |
+
src/controlnet_aux/zoe/zoedepth/models/builder.py
|
127 |
+
src/controlnet_aux/zoe/zoedepth/models/depth_model.py
|
128 |
+
src/controlnet_aux/zoe/zoedepth/models/model_io.py
|
129 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/__init__.py
|
130 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas.py
|
131 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/__init__.py
|
132 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/hubconf.py
|
133 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/__init__.py
|
134 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/base_model.py
|
135 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/blocks.py
|
136 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/dpt_depth.py
|
137 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net.py
|
138 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/midas_net_custom.py
|
139 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/model_loader.py
|
140 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/transforms.py
|
141 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/__init__.py
|
142 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/beit.py
|
143 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/levit.py
|
144 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/next_vit.py
|
145 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin.py
|
146 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin2.py
|
147 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/swin_common.py
|
148 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/utils.py
|
149 |
+
src/controlnet_aux/zoe/zoedepth/models/base_models/midas_repo/midas/backbones/vit.py
|
150 |
+
src/controlnet_aux/zoe/zoedepth/models/layers/__init__.py
|
151 |
+
src/controlnet_aux/zoe/zoedepth/models/layers/attractor.py
|
152 |
+
src/controlnet_aux/zoe/zoedepth/models/layers/dist_layers.py
|
153 |
+
src/controlnet_aux/zoe/zoedepth/models/layers/localbins_layers.py
|
154 |
+
src/controlnet_aux/zoe/zoedepth/models/layers/patch_transformer.py
|
155 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth/__init__.py
|
156 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth/config_zoedepth.json
|
157 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth/config_zoedepth_kitti.json
|
158 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth/zoedepth_v1.py
|
159 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/__init__.py
|
160 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/config_zoedepth_nk.json
|
161 |
+
src/controlnet_aux/zoe/zoedepth/models/zoedepth_nk/zoedepth_nk_v1.py
|
162 |
+
src/controlnet_aux/zoe/zoedepth/utils/__init__.py
|
163 |
+
src/controlnet_aux/zoe/zoedepth/utils/arg_utils.py
|
164 |
+
src/controlnet_aux/zoe/zoedepth/utils/config.py
|
165 |
+
src/controlnet_aux/zoe/zoedepth/utils/easydict/__init__.py
|
166 |
+
tests/test_controlnet_aux.py
|
controlnet_aux/src/controlnet_aux.egg-info/dependency_links.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
controlnet_aux/src/controlnet_aux.egg-info/requires.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
importlib_metadata
|
3 |
+
huggingface_hub
|
4 |
+
scipy
|
5 |
+
opencv-python-headless
|
6 |
+
filelock
|
7 |
+
numpy
|
8 |
+
Pillow
|
9 |
+
einops
|
10 |
+
torchvision
|
11 |
+
timm<=0.6.7
|
12 |
+
scikit-image
|
controlnet_aux/src/controlnet_aux.egg-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
controlnet_aux
|
controlnet_aux/src/controlnet_aux/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__version__ = "0.0.9"
|
2 |
+
|
3 |
+
from .canny import CannyDetector
|
4 |
+
from .open_pose import OpenposeDetector
|
5 |
+
|
controlnet_aux/src/controlnet_aux/canny/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
from ..util import HWC3, resize_image
|
6 |
+
|
7 |
+
class CannyDetector:
|
8 |
+
def __call__(self, input_image=None, low_threshold=100, high_threshold=200, detect_resolution=512, image_resolution=512, output_type=None, **kwargs):
|
9 |
+
if "img" in kwargs:
|
10 |
+
warnings.warn("img is deprecated, please use `input_image=...` instead.", DeprecationWarning)
|
11 |
+
input_image = kwargs.pop("img")
|
12 |
+
|
13 |
+
if input_image is None:
|
14 |
+
raise ValueError("input_image must be defined.")
|
15 |
+
|
16 |
+
if not isinstance(input_image, np.ndarray):
|
17 |
+
input_image = np.array(input_image, dtype=np.uint8)
|
18 |
+
output_type = output_type or "pil"
|
19 |
+
else:
|
20 |
+
output_type = output_type or "np"
|
21 |
+
|
22 |
+
input_image = HWC3(input_image)
|
23 |
+
input_image = resize_image(input_image, detect_resolution)
|
24 |
+
|
25 |
+
detected_map = cv2.Canny(input_image, low_threshold, high_threshold)
|
26 |
+
detected_map = HWC3(detected_map)
|
27 |
+
|
28 |
+
img = resize_image(input_image, image_resolution)
|
29 |
+
H, W, C = img.shape
|
30 |
+
|
31 |
+
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
|
32 |
+
|
33 |
+
if output_type == "pil":
|
34 |
+
detected_map = Image.fromarray(detected_map)
|
35 |
+
|
36 |
+
return detected_map
|
controlnet_aux/src/controlnet_aux/open_pose/LICENSE
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPENPOSE: MULTIPERSON KEYPOINT DETECTION
|
2 |
+
SOFTWARE LICENSE AGREEMENT
|
3 |
+
ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
|
4 |
+
|
5 |
+
BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT. IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
|
6 |
+
|
7 |
+
This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Carnegie Mellon University (called "Licensor" in this Agreement). All rights not specifically granted to you in this Agreement are reserved for Licensor.
|
8 |
+
|
9 |
+
RESERVATION OF OWNERSHIP AND GRANT OF LICENSE:
|
10 |
+
Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive,
|
11 |
+
non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement. As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor, including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
|
12 |
+
|
13 |
+
CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement. Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
|
14 |
+
|
15 |
+
COPYRIGHT: The Software is owned by Licensor and is protected by United
|
16 |
+
States copyright laws and applicable international treaties and/or conventions.
|
17 |
+
|
18 |
+
PERMITTED USES: The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
|
19 |
+
|
20 |
+
DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement. You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
|
21 |
+
|
22 |
+
BACKUPS: If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
|
23 |
+
|
24 |
+
USES NOT PERMITTED: You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “OpenPose", "Carnegie Mellon" or any renditions thereof without the prior written permission of Licensor.
|
25 |
+
|
26 |
+
You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
|
27 |
+
|
28 |
+
ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
|
29 |
+
|
30 |
+
TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
|
31 |
+
|
32 |
+
The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement. Licensee may terminate this Agreement by ceasing using the Software. Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
|
33 |
+
|
34 |
+
FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
|
35 |
+
|
36 |
+
DISCLAIMER OF WARRANTIES: THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT. LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
|
37 |
+
|
38 |
+
SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.
|
39 |
+
|
40 |
+
EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
|
41 |
+
|
42 |
+
EXPORT REGULATION: Licensee agrees to comply with any and all applicable
|
43 |
+
U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
|
44 |
+
|
45 |
+
SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
|
46 |
+
|
47 |
+
NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
|
48 |
+
|
49 |
+
GOVERNING LAW: This Agreement shall be construed and enforced in accordance with the laws of the Commonwealth of Pennsylvania without reference to conflict of laws principles. You consent to the personal jurisdiction of the courts of this County and waive their rights to venue outside of Allegheny County, Pennsylvania.
|
50 |
+
|
51 |
+
ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
************************************************************************
|
56 |
+
|
57 |
+
THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
|
58 |
+
|
59 |
+
This project incorporates material from the project(s) listed below (collectively, "Third Party Code"). This Third Party Code is licensed to you under their original license terms set forth below. We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
|
60 |
+
|
61 |
+
1. Caffe, version 1.0.0, (https://github.com/BVLC/caffe/)
|
62 |
+
|
63 |
+
COPYRIGHT
|
64 |
+
|
65 |
+
All contributions by the University of California:
|
66 |
+
Copyright (c) 2014-2017 The Regents of the University of California (Regents)
|
67 |
+
All rights reserved.
|
68 |
+
|
69 |
+
All other contributions:
|
70 |
+
Copyright (c) 2014-2017, the respective contributors
|
71 |
+
All rights reserved.
|
72 |
+
|
73 |
+
Caffe uses a shared copyright model: each contributor holds copyright over
|
74 |
+
their contributions to Caffe. The project versioning records all such
|
75 |
+
contribution and copyright details. If a contributor wants to further mark
|
76 |
+
their specific copyright on a particular contribution, they should indicate
|
77 |
+
their copyright solely in the commit message of the change when it is
|
78 |
+
committed.
|
79 |
+
|
80 |
+
LICENSE
|
81 |
+
|
82 |
+
Redistribution and use in source and binary forms, with or without
|
83 |
+
modification, are permitted provided that the following conditions are met:
|
84 |
+
|
85 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
86 |
+
list of conditions and the following disclaimer.
|
87 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
88 |
+
this list of conditions and the following disclaimer in the documentation
|
89 |
+
and/or other materials provided with the distribution.
|
90 |
+
|
91 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
92 |
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
93 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
94 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
95 |
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
96 |
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
97 |
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
98 |
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
99 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
100 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
101 |
+
|
102 |
+
CONTRIBUTION AGREEMENT
|
103 |
+
|
104 |
+
By contributing to the BVLC/caffe repository through pull-request, comment,
|
105 |
+
or otherwise, the contributor releases their content to the
|
106 |
+
license and copyright terms herein.
|
107 |
+
|
108 |
+
************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
|
controlnet_aux/src/controlnet_aux/open_pose/__init__.py
ADDED
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Openpose
|
2 |
+
# Original from CMU https://github.com/CMU-Perceptual-Computing-Lab/openpose
|
3 |
+
# 2nd Edited by https://github.com/Hzzone/pytorch-openpose
|
4 |
+
# 3rd Edited by ControlNet
|
5 |
+
# 4th Edited by ControlNet (added face and correct hands)
|
6 |
+
# 5th Edited by ControlNet (Improved JSON serialization/deserialization, and lots of bug fixs)
|
7 |
+
# This preprocessor is licensed by CMU for non-commercial use only.
|
8 |
+
|
9 |
+
|
10 |
+
import os
|
11 |
+
|
12 |
+
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
13 |
+
|
14 |
+
import json
|
15 |
+
import warnings
|
16 |
+
from typing import Callable, List, NamedTuple, Tuple, Union
|
17 |
+
|
18 |
+
import cv2
|
19 |
+
import numpy as np
|
20 |
+
import torch
|
21 |
+
from huggingface_hub import hf_hub_download
|
22 |
+
from PIL import Image
|
23 |
+
|
24 |
+
from ..util import HWC3, resize_image
|
25 |
+
from . import util
|
26 |
+
from .body import Body, BodyResult, Keypoint
|
27 |
+
from .face import Face
|
28 |
+
from .hand import Hand
|
29 |
+
|
30 |
+
HandResult = List[Keypoint]
|
31 |
+
FaceResult = List[Keypoint]
|
32 |
+
|
33 |
+
class PoseResult(NamedTuple):
|
34 |
+
body: BodyResult
|
35 |
+
left_hand: Union[HandResult, None]
|
36 |
+
right_hand: Union[HandResult, None]
|
37 |
+
face: Union[FaceResult, None]
|
38 |
+
|
39 |
+
def draw_poses(poses: List[PoseResult], H, W, draw_body=True, draw_hand=True, draw_face=True):
|
40 |
+
"""
|
41 |
+
Draw the detected poses on an empty canvas.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
poses (List[PoseResult]): A list of PoseResult objects containing the detected poses.
|
45 |
+
H (int): The height of the canvas.
|
46 |
+
W (int): The width of the canvas.
|
47 |
+
draw_body (bool, optional): Whether to draw body keypoints. Defaults to True.
|
48 |
+
draw_hand (bool, optional): Whether to draw hand keypoints. Defaults to True.
|
49 |
+
draw_face (bool, optional): Whether to draw face keypoints. Defaults to True.
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
numpy.ndarray: A 3D numpy array representing the canvas with the drawn poses.
|
53 |
+
"""
|
54 |
+
canvas = np.zeros(shape=(H, W, 3), dtype=np.uint8)
|
55 |
+
|
56 |
+
for pose in poses:
|
57 |
+
if draw_body:
|
58 |
+
canvas = util.draw_bodypose(canvas, pose.body.keypoints)
|
59 |
+
|
60 |
+
if draw_hand:
|
61 |
+
canvas = util.draw_handpose(canvas, pose.left_hand)
|
62 |
+
canvas = util.draw_handpose(canvas, pose.right_hand)
|
63 |
+
|
64 |
+
if draw_face:
|
65 |
+
canvas = util.draw_facepose(canvas, pose.face)
|
66 |
+
|
67 |
+
return canvas
|
68 |
+
|
69 |
+
|
70 |
+
class OpenposeDetector:
|
71 |
+
"""
|
72 |
+
A class for detecting human poses in images using the Openpose model.
|
73 |
+
|
74 |
+
Attributes:
|
75 |
+
model_dir (str): Path to the directory where the pose models are stored.
|
76 |
+
"""
|
77 |
+
def __init__(self, body_estimation, hand_estimation=None, face_estimation=None):
|
78 |
+
self.body_estimation = body_estimation
|
79 |
+
self.hand_estimation = hand_estimation
|
80 |
+
self.face_estimation = face_estimation
|
81 |
+
|
82 |
+
@classmethod
|
83 |
+
def from_pretrained(cls, pretrained_model_or_path, filename=None, hand_filename=None, face_filename=None, cache_dir=None, local_files_only=False):
|
84 |
+
|
85 |
+
if pretrained_model_or_path == "lllyasviel/ControlNet":
|
86 |
+
filename = filename or "annotator/ckpts/body_pose_model.pth"
|
87 |
+
hand_filename = hand_filename or "annotator/ckpts/hand_pose_model.pth"
|
88 |
+
face_filename = face_filename or "facenet.pth"
|
89 |
+
|
90 |
+
face_pretrained_model_or_path = "lllyasviel/Annotators"
|
91 |
+
else:
|
92 |
+
filename = filename or "body_pose_model.pth"
|
93 |
+
hand_filename = hand_filename or "hand_pose_model.pth"
|
94 |
+
face_filename = face_filename or "facenet.pth"
|
95 |
+
|
96 |
+
face_pretrained_model_or_path = pretrained_model_or_path
|
97 |
+
|
98 |
+
if os.path.isdir(pretrained_model_or_path):
|
99 |
+
body_model_path = os.path.join(pretrained_model_or_path, filename)
|
100 |
+
hand_model_path = os.path.join(pretrained_model_or_path, hand_filename)
|
101 |
+
face_model_path = os.path.join(face_pretrained_model_or_path, face_filename)
|
102 |
+
else:
|
103 |
+
body_model_path = hf_hub_download(pretrained_model_or_path, filename, cache_dir=cache_dir, local_files_only=local_files_only)
|
104 |
+
hand_model_path = hf_hub_download(pretrained_model_or_path, hand_filename, cache_dir=cache_dir, local_files_only=local_files_only)
|
105 |
+
face_model_path = hf_hub_download(face_pretrained_model_or_path, face_filename, cache_dir=cache_dir, local_files_only=local_files_only)
|
106 |
+
|
107 |
+
body_estimation = Body(body_model_path)
|
108 |
+
hand_estimation = Hand(hand_model_path)
|
109 |
+
face_estimation = Face(face_model_path)
|
110 |
+
|
111 |
+
return cls(body_estimation, hand_estimation, face_estimation)
|
112 |
+
|
113 |
+
def to(self, device):
|
114 |
+
self.body_estimation.to(device)
|
115 |
+
self.hand_estimation.to(device)
|
116 |
+
self.face_estimation.to(device)
|
117 |
+
return self
|
118 |
+
|
119 |
+
def detect_hands(self, body: BodyResult, oriImg) -> Tuple[Union[HandResult, None], Union[HandResult, None]]:
|
120 |
+
left_hand = None
|
121 |
+
right_hand = None
|
122 |
+
H, W, _ = oriImg.shape
|
123 |
+
for x, y, w, is_left in util.handDetect(body, oriImg):
|
124 |
+
peaks = self.hand_estimation(oriImg[y:y+w, x:x+w, :]).astype(np.float32)
|
125 |
+
if peaks.ndim == 2 and peaks.shape[1] == 2:
|
126 |
+
peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
|
127 |
+
peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
|
128 |
+
|
129 |
+
hand_result = [
|
130 |
+
Keypoint(x=peak[0], y=peak[1])
|
131 |
+
for peak in peaks
|
132 |
+
]
|
133 |
+
|
134 |
+
if is_left:
|
135 |
+
left_hand = hand_result
|
136 |
+
else:
|
137 |
+
right_hand = hand_result
|
138 |
+
|
139 |
+
return left_hand, right_hand
|
140 |
+
|
141 |
+
def detect_face(self, body: BodyResult, oriImg) -> Union[FaceResult, None]:
|
142 |
+
face = util.faceDetect(body, oriImg)
|
143 |
+
if face is None:
|
144 |
+
return None
|
145 |
+
|
146 |
+
x, y, w = face
|
147 |
+
H, W, _ = oriImg.shape
|
148 |
+
heatmaps = self.face_estimation(oriImg[y:y+w, x:x+w, :])
|
149 |
+
peaks = self.face_estimation.compute_peaks_from_heatmaps(heatmaps).astype(np.float32)
|
150 |
+
if peaks.ndim == 2 and peaks.shape[1] == 2:
|
151 |
+
peaks[:, 0] = np.where(peaks[:, 0] < 1e-6, -1, peaks[:, 0] + x) / float(W)
|
152 |
+
peaks[:, 1] = np.where(peaks[:, 1] < 1e-6, -1, peaks[:, 1] + y) / float(H)
|
153 |
+
return [
|
154 |
+
Keypoint(x=peak[0], y=peak[1])
|
155 |
+
for peak in peaks
|
156 |
+
]
|
157 |
+
|
158 |
+
return None
|
159 |
+
|
160 |
+
def detect_poses(self, oriImg, include_hand=False, include_face=False) -> List[PoseResult]:
|
161 |
+
"""
|
162 |
+
Detect poses in the given image.
|
163 |
+
Args:
|
164 |
+
oriImg (numpy.ndarray): The input image for pose detection.
|
165 |
+
include_hand (bool, optional): Whether to include hand detection. Defaults to False.
|
166 |
+
include_face (bool, optional): Whether to include face detection. Defaults to False.
|
167 |
+
|
168 |
+
Returns:
|
169 |
+
List[PoseResult]: A list of PoseResult objects containing the detected poses.
|
170 |
+
"""
|
171 |
+
oriImg = oriImg[:, :, ::-1].copy()
|
172 |
+
H, W, C = oriImg.shape
|
173 |
+
with torch.no_grad():
|
174 |
+
candidate, subset = self.body_estimation(oriImg)
|
175 |
+
bodies = self.body_estimation.format_body_result(candidate, subset)
|
176 |
+
|
177 |
+
results = []
|
178 |
+
for body in bodies:
|
179 |
+
left_hand, right_hand, face = (None,) * 3
|
180 |
+
if include_hand:
|
181 |
+
left_hand, right_hand = self.detect_hands(body, oriImg)
|
182 |
+
if include_face:
|
183 |
+
face = self.detect_face(body, oriImg)
|
184 |
+
|
185 |
+
results.append(PoseResult(BodyResult(
|
186 |
+
keypoints=[
|
187 |
+
Keypoint(
|
188 |
+
x=keypoint.x / float(W),
|
189 |
+
y=keypoint.y / float(H)
|
190 |
+
) if keypoint is not None else None
|
191 |
+
for keypoint in body.keypoints
|
192 |
+
],
|
193 |
+
total_score=body.total_score,
|
194 |
+
total_parts=body.total_parts
|
195 |
+
), left_hand, right_hand, face))
|
196 |
+
|
197 |
+
return results
|
198 |
+
|
199 |
+
def __call__(self, input_image, detect_resolution=512, image_resolution=512, include_body=True, include_hand=False, include_face=False, hand_and_face=None, output_type="pil", **kwargs):
|
200 |
+
if hand_and_face is not None:
|
201 |
+
warnings.warn("hand_and_face is deprecated. Use include_hand and include_face instead.", DeprecationWarning)
|
202 |
+
include_hand = hand_and_face
|
203 |
+
include_face = hand_and_face
|
204 |
+
|
205 |
+
if "return_pil" in kwargs:
|
206 |
+
warnings.warn("return_pil is deprecated. Use output_type instead.", DeprecationWarning)
|
207 |
+
output_type = "pil" if kwargs["return_pil"] else "np"
|
208 |
+
if type(output_type) is bool:
|
209 |
+
warnings.warn("Passing `True` or `False` to `output_type` is deprecated and will raise an error in future versions")
|
210 |
+
if output_type:
|
211 |
+
output_type = "pil"
|
212 |
+
|
213 |
+
if not isinstance(input_image, np.ndarray):
|
214 |
+
input_image = np.array(input_image, dtype=np.uint8)
|
215 |
+
|
216 |
+
input_image = HWC3(input_image)
|
217 |
+
input_image = resize_image(input_image, detect_resolution)
|
218 |
+
H, W, C = input_image.shape
|
219 |
+
|
220 |
+
poses = self.detect_poses(input_image, include_hand, include_face)
|
221 |
+
canvas = draw_poses(poses, H, W, draw_body=include_body, draw_hand=include_hand, draw_face=include_face)
|
222 |
+
|
223 |
+
detected_map = canvas
|
224 |
+
detected_map = HWC3(detected_map)
|
225 |
+
|
226 |
+
img = resize_image(input_image, image_resolution)
|
227 |
+
H, W, C = img.shape
|
228 |
+
|
229 |
+
detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_LINEAR)
|
230 |
+
|
231 |
+
if output_type == "pil":
|
232 |
+
detected_map = Image.fromarray(detected_map)
|
233 |
+
|
234 |
+
return detected_map
|
controlnet_aux/src/controlnet_aux/open_pose/body.py
ADDED
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
from typing import List, NamedTuple, Union
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from scipy.ndimage.filters import gaussian_filter
|
8 |
+
|
9 |
+
from . import util
|
10 |
+
from .model import bodypose_model
|
11 |
+
|
12 |
+
|
13 |
+
class Keypoint(NamedTuple):
|
14 |
+
x: float
|
15 |
+
y: float
|
16 |
+
score: float = 1.0
|
17 |
+
id: int = -1
|
18 |
+
|
19 |
+
|
20 |
+
class BodyResult(NamedTuple):
|
21 |
+
# Note: Using `Union` instead of `|` operator as the ladder is a Python
|
22 |
+
# 3.10 feature.
|
23 |
+
# Annotator code should be Python 3.8 Compatible, as controlnet repo uses
|
24 |
+
# Python 3.8 environment.
|
25 |
+
# https://github.com/lllyasviel/ControlNet/blob/d3284fcd0972c510635a4f5abe2eeb71dc0de524/environment.yaml#L6
|
26 |
+
keypoints: List[Union[Keypoint, None]]
|
27 |
+
total_score: float
|
28 |
+
total_parts: int
|
29 |
+
|
30 |
+
|
31 |
+
class Body(object):
|
32 |
+
def __init__(self, model_path):
|
33 |
+
self.model = bodypose_model()
|
34 |
+
model_dict = util.transfer(self.model, torch.load(model_path))
|
35 |
+
self.model.load_state_dict(model_dict)
|
36 |
+
self.model.eval()
|
37 |
+
|
38 |
+
def to(self, device):
|
39 |
+
self.model.to(device)
|
40 |
+
return self
|
41 |
+
|
42 |
+
def __call__(self, oriImg):
|
43 |
+
device = next(iter(self.model.parameters())).device
|
44 |
+
# scale_search = [0.5, 1.0, 1.5, 2.0]
|
45 |
+
scale_search = [0.5]
|
46 |
+
boxsize = 368
|
47 |
+
stride = 8
|
48 |
+
padValue = 128
|
49 |
+
thre1 = 0.1
|
50 |
+
thre2 = 0.05
|
51 |
+
multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
|
52 |
+
heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
|
53 |
+
paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
|
54 |
+
|
55 |
+
for m in range(len(multiplier)):
|
56 |
+
scale = multiplier[m]
|
57 |
+
imageToTest = util.smart_resize_k(oriImg, fx=scale, fy=scale)
|
58 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
59 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
60 |
+
im = np.ascontiguousarray(im)
|
61 |
+
|
62 |
+
data = torch.from_numpy(im).float()
|
63 |
+
data = data.to(device)
|
64 |
+
# data = data.permute([2, 0, 1]).unsqueeze(0).float()
|
65 |
+
with torch.no_grad():
|
66 |
+
Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
|
67 |
+
Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
|
68 |
+
Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
|
69 |
+
|
70 |
+
# extract outputs, resize, and remove padding
|
71 |
+
# heatmap = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[1]].data), (1, 2, 0)) # output 1 is heatmaps
|
72 |
+
heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0)) # output 1 is heatmaps
|
73 |
+
heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
|
74 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
75 |
+
heatmap = util.smart_resize(heatmap, (oriImg.shape[0], oriImg.shape[1]))
|
76 |
+
|
77 |
+
# paf = np.transpose(np.squeeze(net.blobs[output_blobs.keys()[0]].data), (1, 2, 0)) # output 0 is PAFs
|
78 |
+
paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0)) # output 0 is PAFs
|
79 |
+
paf = util.smart_resize_k(paf, fx=stride, fy=stride)
|
80 |
+
paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
81 |
+
paf = util.smart_resize(paf, (oriImg.shape[0], oriImg.shape[1]))
|
82 |
+
|
83 |
+
heatmap_avg += heatmap_avg + heatmap / len(multiplier)
|
84 |
+
paf_avg += + paf / len(multiplier)
|
85 |
+
|
86 |
+
all_peaks = []
|
87 |
+
peak_counter = 0
|
88 |
+
|
89 |
+
for part in range(18):
|
90 |
+
map_ori = heatmap_avg[:, :, part]
|
91 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
92 |
+
|
93 |
+
map_left = np.zeros(one_heatmap.shape)
|
94 |
+
map_left[1:, :] = one_heatmap[:-1, :]
|
95 |
+
map_right = np.zeros(one_heatmap.shape)
|
96 |
+
map_right[:-1, :] = one_heatmap[1:, :]
|
97 |
+
map_up = np.zeros(one_heatmap.shape)
|
98 |
+
map_up[:, 1:] = one_heatmap[:, :-1]
|
99 |
+
map_down = np.zeros(one_heatmap.shape)
|
100 |
+
map_down[:, :-1] = one_heatmap[:, 1:]
|
101 |
+
|
102 |
+
peaks_binary = np.logical_and.reduce(
|
103 |
+
(one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
|
104 |
+
peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
|
105 |
+
peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
|
106 |
+
peak_id = range(peak_counter, peak_counter + len(peaks))
|
107 |
+
peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
|
108 |
+
|
109 |
+
all_peaks.append(peaks_with_score_and_id)
|
110 |
+
peak_counter += len(peaks)
|
111 |
+
|
112 |
+
# find connection in the specified sequence, center 29 is in the position 15
|
113 |
+
limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], \
|
114 |
+
[10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], \
|
115 |
+
[1, 16], [16, 18], [3, 17], [6, 18]]
|
116 |
+
# the middle joints heatmap correpondence
|
117 |
+
mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], \
|
118 |
+
[23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], \
|
119 |
+
[55, 56], [37, 38], [45, 46]]
|
120 |
+
|
121 |
+
connection_all = []
|
122 |
+
special_k = []
|
123 |
+
mid_num = 10
|
124 |
+
|
125 |
+
for k in range(len(mapIdx)):
|
126 |
+
score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]]
|
127 |
+
candA = all_peaks[limbSeq[k][0] - 1]
|
128 |
+
candB = all_peaks[limbSeq[k][1] - 1]
|
129 |
+
nA = len(candA)
|
130 |
+
nB = len(candB)
|
131 |
+
indexA, indexB = limbSeq[k]
|
132 |
+
if (nA != 0 and nB != 0):
|
133 |
+
connection_candidate = []
|
134 |
+
for i in range(nA):
|
135 |
+
for j in range(nB):
|
136 |
+
vec = np.subtract(candB[j][:2], candA[i][:2])
|
137 |
+
norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
|
138 |
+
norm = max(0.001, norm)
|
139 |
+
vec = np.divide(vec, norm)
|
140 |
+
|
141 |
+
startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
|
142 |
+
np.linspace(candA[i][1], candB[j][1], num=mid_num)))
|
143 |
+
|
144 |
+
vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
|
145 |
+
for I in range(len(startend))])
|
146 |
+
vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
|
147 |
+
for I in range(len(startend))])
|
148 |
+
|
149 |
+
score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
|
150 |
+
score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
|
151 |
+
0.5 * oriImg.shape[0] / norm - 1, 0)
|
152 |
+
criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
|
153 |
+
criterion2 = score_with_dist_prior > 0
|
154 |
+
if criterion1 and criterion2:
|
155 |
+
connection_candidate.append(
|
156 |
+
[i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
|
157 |
+
|
158 |
+
connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
|
159 |
+
connection = np.zeros((0, 5))
|
160 |
+
for c in range(len(connection_candidate)):
|
161 |
+
i, j, s = connection_candidate[c][0:3]
|
162 |
+
if (i not in connection[:, 3] and j not in connection[:, 4]):
|
163 |
+
connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
|
164 |
+
if (len(connection) >= min(nA, nB)):
|
165 |
+
break
|
166 |
+
|
167 |
+
connection_all.append(connection)
|
168 |
+
else:
|
169 |
+
special_k.append(k)
|
170 |
+
connection_all.append([])
|
171 |
+
|
172 |
+
# last number in each row is the total parts number of that person
|
173 |
+
# the second last number in each row is the score of the overall configuration
|
174 |
+
subset = -1 * np.ones((0, 20))
|
175 |
+
candidate = np.array([item for sublist in all_peaks for item in sublist])
|
176 |
+
|
177 |
+
for k in range(len(mapIdx)):
|
178 |
+
if k not in special_k:
|
179 |
+
partAs = connection_all[k][:, 0]
|
180 |
+
partBs = connection_all[k][:, 1]
|
181 |
+
indexA, indexB = np.array(limbSeq[k]) - 1
|
182 |
+
|
183 |
+
for i in range(len(connection_all[k])): # = 1:size(temp,1)
|
184 |
+
found = 0
|
185 |
+
subset_idx = [-1, -1]
|
186 |
+
for j in range(len(subset)): # 1:size(subset,1):
|
187 |
+
if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
|
188 |
+
subset_idx[found] = j
|
189 |
+
found += 1
|
190 |
+
|
191 |
+
if found == 1:
|
192 |
+
j = subset_idx[0]
|
193 |
+
if subset[j][indexB] != partBs[i]:
|
194 |
+
subset[j][indexB] = partBs[i]
|
195 |
+
subset[j][-1] += 1
|
196 |
+
subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
197 |
+
elif found == 2: # if found 2 and disjoint, merge them
|
198 |
+
j1, j2 = subset_idx
|
199 |
+
membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
|
200 |
+
if len(np.nonzero(membership == 2)[0]) == 0: # merge
|
201 |
+
subset[j1][:-2] += (subset[j2][:-2] + 1)
|
202 |
+
subset[j1][-2:] += subset[j2][-2:]
|
203 |
+
subset[j1][-2] += connection_all[k][i][2]
|
204 |
+
subset = np.delete(subset, j2, 0)
|
205 |
+
else: # as like found == 1
|
206 |
+
subset[j1][indexB] = partBs[i]
|
207 |
+
subset[j1][-1] += 1
|
208 |
+
subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
|
209 |
+
|
210 |
+
# if find no partA in the subset, create a new subset
|
211 |
+
elif not found and k < 17:
|
212 |
+
row = -1 * np.ones(20)
|
213 |
+
row[indexA] = partAs[i]
|
214 |
+
row[indexB] = partBs[i]
|
215 |
+
row[-1] = 2
|
216 |
+
row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
|
217 |
+
subset = np.vstack([subset, row])
|
218 |
+
# delete some rows of subset which has few parts occur
|
219 |
+
deleteIdx = []
|
220 |
+
for i in range(len(subset)):
|
221 |
+
if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
|
222 |
+
deleteIdx.append(i)
|
223 |
+
subset = np.delete(subset, deleteIdx, axis=0)
|
224 |
+
|
225 |
+
# subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
|
226 |
+
# candidate: x, y, score, id
|
227 |
+
return candidate, subset
|
228 |
+
|
229 |
+
@staticmethod
|
230 |
+
def format_body_result(candidate: np.ndarray, subset: np.ndarray) -> List[BodyResult]:
|
231 |
+
"""
|
232 |
+
Format the body results from the candidate and subset arrays into a list of BodyResult objects.
|
233 |
+
|
234 |
+
Args:
|
235 |
+
candidate (np.ndarray): An array of candidates containing the x, y coordinates, score, and id
|
236 |
+
for each body part.
|
237 |
+
subset (np.ndarray): An array of subsets containing indices to the candidate array for each
|
238 |
+
person detected. The last two columns of each row hold the total score and total parts
|
239 |
+
of the person.
|
240 |
+
|
241 |
+
Returns:
|
242 |
+
List[BodyResult]: A list of BodyResult objects, where each object represents a person with
|
243 |
+
detected keypoints, total score, and total parts.
|
244 |
+
"""
|
245 |
+
return [
|
246 |
+
BodyResult(
|
247 |
+
keypoints=[
|
248 |
+
Keypoint(
|
249 |
+
x=candidate[candidate_index][0],
|
250 |
+
y=candidate[candidate_index][1],
|
251 |
+
score=candidate[candidate_index][2],
|
252 |
+
id=candidate[candidate_index][3]
|
253 |
+
) if candidate_index != -1 else None
|
254 |
+
for candidate_index in person[:18].astype(int)
|
255 |
+
],
|
256 |
+
total_score=person[18],
|
257 |
+
total_parts=person[19]
|
258 |
+
)
|
259 |
+
for person in subset
|
260 |
+
]
|
controlnet_aux/src/controlnet_aux/open_pose/face.py
ADDED
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from torch.nn import Conv2d, MaxPool2d, Module, ReLU, init
|
7 |
+
from torchvision.transforms import ToPILImage, ToTensor
|
8 |
+
|
9 |
+
from . import util
|
10 |
+
|
11 |
+
|
12 |
+
class FaceNet(Module):
|
13 |
+
"""Model the cascading heatmaps. """
|
14 |
+
def __init__(self):
|
15 |
+
super(FaceNet, self).__init__()
|
16 |
+
# cnn to make feature map
|
17 |
+
self.relu = ReLU()
|
18 |
+
self.max_pooling_2d = MaxPool2d(kernel_size=2, stride=2)
|
19 |
+
self.conv1_1 = Conv2d(in_channels=3, out_channels=64,
|
20 |
+
kernel_size=3, stride=1, padding=1)
|
21 |
+
self.conv1_2 = Conv2d(
|
22 |
+
in_channels=64, out_channels=64, kernel_size=3, stride=1,
|
23 |
+
padding=1)
|
24 |
+
self.conv2_1 = Conv2d(
|
25 |
+
in_channels=64, out_channels=128, kernel_size=3, stride=1,
|
26 |
+
padding=1)
|
27 |
+
self.conv2_2 = Conv2d(
|
28 |
+
in_channels=128, out_channels=128, kernel_size=3, stride=1,
|
29 |
+
padding=1)
|
30 |
+
self.conv3_1 = Conv2d(
|
31 |
+
in_channels=128, out_channels=256, kernel_size=3, stride=1,
|
32 |
+
padding=1)
|
33 |
+
self.conv3_2 = Conv2d(
|
34 |
+
in_channels=256, out_channels=256, kernel_size=3, stride=1,
|
35 |
+
padding=1)
|
36 |
+
self.conv3_3 = Conv2d(
|
37 |
+
in_channels=256, out_channels=256, kernel_size=3, stride=1,
|
38 |
+
padding=1)
|
39 |
+
self.conv3_4 = Conv2d(
|
40 |
+
in_channels=256, out_channels=256, kernel_size=3, stride=1,
|
41 |
+
padding=1)
|
42 |
+
self.conv4_1 = Conv2d(
|
43 |
+
in_channels=256, out_channels=512, kernel_size=3, stride=1,
|
44 |
+
padding=1)
|
45 |
+
self.conv4_2 = Conv2d(
|
46 |
+
in_channels=512, out_channels=512, kernel_size=3, stride=1,
|
47 |
+
padding=1)
|
48 |
+
self.conv4_3 = Conv2d(
|
49 |
+
in_channels=512, out_channels=512, kernel_size=3, stride=1,
|
50 |
+
padding=1)
|
51 |
+
self.conv4_4 = Conv2d(
|
52 |
+
in_channels=512, out_channels=512, kernel_size=3, stride=1,
|
53 |
+
padding=1)
|
54 |
+
self.conv5_1 = Conv2d(
|
55 |
+
in_channels=512, out_channels=512, kernel_size=3, stride=1,
|
56 |
+
padding=1)
|
57 |
+
self.conv5_2 = Conv2d(
|
58 |
+
in_channels=512, out_channels=512, kernel_size=3, stride=1,
|
59 |
+
padding=1)
|
60 |
+
self.conv5_3_CPM = Conv2d(
|
61 |
+
in_channels=512, out_channels=128, kernel_size=3, stride=1,
|
62 |
+
padding=1)
|
63 |
+
|
64 |
+
# stage1
|
65 |
+
self.conv6_1_CPM = Conv2d(
|
66 |
+
in_channels=128, out_channels=512, kernel_size=1, stride=1,
|
67 |
+
padding=0)
|
68 |
+
self.conv6_2_CPM = Conv2d(
|
69 |
+
in_channels=512, out_channels=71, kernel_size=1, stride=1,
|
70 |
+
padding=0)
|
71 |
+
|
72 |
+
# stage2
|
73 |
+
self.Mconv1_stage2 = Conv2d(
|
74 |
+
in_channels=199, out_channels=128, kernel_size=7, stride=1,
|
75 |
+
padding=3)
|
76 |
+
self.Mconv2_stage2 = Conv2d(
|
77 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
78 |
+
padding=3)
|
79 |
+
self.Mconv3_stage2 = Conv2d(
|
80 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
81 |
+
padding=3)
|
82 |
+
self.Mconv4_stage2 = Conv2d(
|
83 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
84 |
+
padding=3)
|
85 |
+
self.Mconv5_stage2 = Conv2d(
|
86 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
87 |
+
padding=3)
|
88 |
+
self.Mconv6_stage2 = Conv2d(
|
89 |
+
in_channels=128, out_channels=128, kernel_size=1, stride=1,
|
90 |
+
padding=0)
|
91 |
+
self.Mconv7_stage2 = Conv2d(
|
92 |
+
in_channels=128, out_channels=71, kernel_size=1, stride=1,
|
93 |
+
padding=0)
|
94 |
+
|
95 |
+
# stage3
|
96 |
+
self.Mconv1_stage3 = Conv2d(
|
97 |
+
in_channels=199, out_channels=128, kernel_size=7, stride=1,
|
98 |
+
padding=3)
|
99 |
+
self.Mconv2_stage3 = Conv2d(
|
100 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
101 |
+
padding=3)
|
102 |
+
self.Mconv3_stage3 = Conv2d(
|
103 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
104 |
+
padding=3)
|
105 |
+
self.Mconv4_stage3 = Conv2d(
|
106 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
107 |
+
padding=3)
|
108 |
+
self.Mconv5_stage3 = Conv2d(
|
109 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
110 |
+
padding=3)
|
111 |
+
self.Mconv6_stage3 = Conv2d(
|
112 |
+
in_channels=128, out_channels=128, kernel_size=1, stride=1,
|
113 |
+
padding=0)
|
114 |
+
self.Mconv7_stage3 = Conv2d(
|
115 |
+
in_channels=128, out_channels=71, kernel_size=1, stride=1,
|
116 |
+
padding=0)
|
117 |
+
|
118 |
+
# stage4
|
119 |
+
self.Mconv1_stage4 = Conv2d(
|
120 |
+
in_channels=199, out_channels=128, kernel_size=7, stride=1,
|
121 |
+
padding=3)
|
122 |
+
self.Mconv2_stage4 = Conv2d(
|
123 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
124 |
+
padding=3)
|
125 |
+
self.Mconv3_stage4 = Conv2d(
|
126 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
127 |
+
padding=3)
|
128 |
+
self.Mconv4_stage4 = Conv2d(
|
129 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
130 |
+
padding=3)
|
131 |
+
self.Mconv5_stage4 = Conv2d(
|
132 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
133 |
+
padding=3)
|
134 |
+
self.Mconv6_stage4 = Conv2d(
|
135 |
+
in_channels=128, out_channels=128, kernel_size=1, stride=1,
|
136 |
+
padding=0)
|
137 |
+
self.Mconv7_stage4 = Conv2d(
|
138 |
+
in_channels=128, out_channels=71, kernel_size=1, stride=1,
|
139 |
+
padding=0)
|
140 |
+
|
141 |
+
# stage5
|
142 |
+
self.Mconv1_stage5 = Conv2d(
|
143 |
+
in_channels=199, out_channels=128, kernel_size=7, stride=1,
|
144 |
+
padding=3)
|
145 |
+
self.Mconv2_stage5 = Conv2d(
|
146 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
147 |
+
padding=3)
|
148 |
+
self.Mconv3_stage5 = Conv2d(
|
149 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
150 |
+
padding=3)
|
151 |
+
self.Mconv4_stage5 = Conv2d(
|
152 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
153 |
+
padding=3)
|
154 |
+
self.Mconv5_stage5 = Conv2d(
|
155 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
156 |
+
padding=3)
|
157 |
+
self.Mconv6_stage5 = Conv2d(
|
158 |
+
in_channels=128, out_channels=128, kernel_size=1, stride=1,
|
159 |
+
padding=0)
|
160 |
+
self.Mconv7_stage5 = Conv2d(
|
161 |
+
in_channels=128, out_channels=71, kernel_size=1, stride=1,
|
162 |
+
padding=0)
|
163 |
+
|
164 |
+
# stage6
|
165 |
+
self.Mconv1_stage6 = Conv2d(
|
166 |
+
in_channels=199, out_channels=128, kernel_size=7, stride=1,
|
167 |
+
padding=3)
|
168 |
+
self.Mconv2_stage6 = Conv2d(
|
169 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
170 |
+
padding=3)
|
171 |
+
self.Mconv3_stage6 = Conv2d(
|
172 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
173 |
+
padding=3)
|
174 |
+
self.Mconv4_stage6 = Conv2d(
|
175 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
176 |
+
padding=3)
|
177 |
+
self.Mconv5_stage6 = Conv2d(
|
178 |
+
in_channels=128, out_channels=128, kernel_size=7, stride=1,
|
179 |
+
padding=3)
|
180 |
+
self.Mconv6_stage6 = Conv2d(
|
181 |
+
in_channels=128, out_channels=128, kernel_size=1, stride=1,
|
182 |
+
padding=0)
|
183 |
+
self.Mconv7_stage6 = Conv2d(
|
184 |
+
in_channels=128, out_channels=71, kernel_size=1, stride=1,
|
185 |
+
padding=0)
|
186 |
+
|
187 |
+
for m in self.modules():
|
188 |
+
if isinstance(m, Conv2d):
|
189 |
+
init.constant_(m.bias, 0)
|
190 |
+
|
191 |
+
def forward(self, x):
|
192 |
+
"""Return a list of heatmaps."""
|
193 |
+
heatmaps = []
|
194 |
+
|
195 |
+
h = self.relu(self.conv1_1(x))
|
196 |
+
h = self.relu(self.conv1_2(h))
|
197 |
+
h = self.max_pooling_2d(h)
|
198 |
+
h = self.relu(self.conv2_1(h))
|
199 |
+
h = self.relu(self.conv2_2(h))
|
200 |
+
h = self.max_pooling_2d(h)
|
201 |
+
h = self.relu(self.conv3_1(h))
|
202 |
+
h = self.relu(self.conv3_2(h))
|
203 |
+
h = self.relu(self.conv3_3(h))
|
204 |
+
h = self.relu(self.conv3_4(h))
|
205 |
+
h = self.max_pooling_2d(h)
|
206 |
+
h = self.relu(self.conv4_1(h))
|
207 |
+
h = self.relu(self.conv4_2(h))
|
208 |
+
h = self.relu(self.conv4_3(h))
|
209 |
+
h = self.relu(self.conv4_4(h))
|
210 |
+
h = self.relu(self.conv5_1(h))
|
211 |
+
h = self.relu(self.conv5_2(h))
|
212 |
+
h = self.relu(self.conv5_3_CPM(h))
|
213 |
+
feature_map = h
|
214 |
+
|
215 |
+
# stage1
|
216 |
+
h = self.relu(self.conv6_1_CPM(h))
|
217 |
+
h = self.conv6_2_CPM(h)
|
218 |
+
heatmaps.append(h)
|
219 |
+
|
220 |
+
# stage2
|
221 |
+
h = torch.cat([h, feature_map], dim=1) # channel concat
|
222 |
+
h = self.relu(self.Mconv1_stage2(h))
|
223 |
+
h = self.relu(self.Mconv2_stage2(h))
|
224 |
+
h = self.relu(self.Mconv3_stage2(h))
|
225 |
+
h = self.relu(self.Mconv4_stage2(h))
|
226 |
+
h = self.relu(self.Mconv5_stage2(h))
|
227 |
+
h = self.relu(self.Mconv6_stage2(h))
|
228 |
+
h = self.Mconv7_stage2(h)
|
229 |
+
heatmaps.append(h)
|
230 |
+
|
231 |
+
# stage3
|
232 |
+
h = torch.cat([h, feature_map], dim=1) # channel concat
|
233 |
+
h = self.relu(self.Mconv1_stage3(h))
|
234 |
+
h = self.relu(self.Mconv2_stage3(h))
|
235 |
+
h = self.relu(self.Mconv3_stage3(h))
|
236 |
+
h = self.relu(self.Mconv4_stage3(h))
|
237 |
+
h = self.relu(self.Mconv5_stage3(h))
|
238 |
+
h = self.relu(self.Mconv6_stage3(h))
|
239 |
+
h = self.Mconv7_stage3(h)
|
240 |
+
heatmaps.append(h)
|
241 |
+
|
242 |
+
# stage4
|
243 |
+
h = torch.cat([h, feature_map], dim=1) # channel concat
|
244 |
+
h = self.relu(self.Mconv1_stage4(h))
|
245 |
+
h = self.relu(self.Mconv2_stage4(h))
|
246 |
+
h = self.relu(self.Mconv3_stage4(h))
|
247 |
+
h = self.relu(self.Mconv4_stage4(h))
|
248 |
+
h = self.relu(self.Mconv5_stage4(h))
|
249 |
+
h = self.relu(self.Mconv6_stage4(h))
|
250 |
+
h = self.Mconv7_stage4(h)
|
251 |
+
heatmaps.append(h)
|
252 |
+
|
253 |
+
# stage5
|
254 |
+
h = torch.cat([h, feature_map], dim=1) # channel concat
|
255 |
+
h = self.relu(self.Mconv1_stage5(h))
|
256 |
+
h = self.relu(self.Mconv2_stage5(h))
|
257 |
+
h = self.relu(self.Mconv3_stage5(h))
|
258 |
+
h = self.relu(self.Mconv4_stage5(h))
|
259 |
+
h = self.relu(self.Mconv5_stage5(h))
|
260 |
+
h = self.relu(self.Mconv6_stage5(h))
|
261 |
+
h = self.Mconv7_stage5(h)
|
262 |
+
heatmaps.append(h)
|
263 |
+
|
264 |
+
# stage6
|
265 |
+
h = torch.cat([h, feature_map], dim=1) # channel concat
|
266 |
+
h = self.relu(self.Mconv1_stage6(h))
|
267 |
+
h = self.relu(self.Mconv2_stage6(h))
|
268 |
+
h = self.relu(self.Mconv3_stage6(h))
|
269 |
+
h = self.relu(self.Mconv4_stage6(h))
|
270 |
+
h = self.relu(self.Mconv5_stage6(h))
|
271 |
+
h = self.relu(self.Mconv6_stage6(h))
|
272 |
+
h = self.Mconv7_stage6(h)
|
273 |
+
heatmaps.append(h)
|
274 |
+
|
275 |
+
return heatmaps
|
276 |
+
|
277 |
+
|
278 |
+
LOG = logging.getLogger(__name__)
|
279 |
+
TOTEN = ToTensor()
|
280 |
+
TOPIL = ToPILImage()
|
281 |
+
|
282 |
+
|
283 |
+
params = {
|
284 |
+
'gaussian_sigma': 2.5,
|
285 |
+
'inference_img_size': 736, # 368, 736, 1312
|
286 |
+
'heatmap_peak_thresh': 0.1,
|
287 |
+
'crop_scale': 1.5,
|
288 |
+
'line_indices': [
|
289 |
+
[0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6],
|
290 |
+
[6, 7], [7, 8], [8, 9], [9, 10], [10, 11], [11, 12], [12, 13],
|
291 |
+
[13, 14], [14, 15], [15, 16],
|
292 |
+
[17, 18], [18, 19], [19, 20], [20, 21],
|
293 |
+
[22, 23], [23, 24], [24, 25], [25, 26],
|
294 |
+
[27, 28], [28, 29], [29, 30],
|
295 |
+
[31, 32], [32, 33], [33, 34], [34, 35],
|
296 |
+
[36, 37], [37, 38], [38, 39], [39, 40], [40, 41], [41, 36],
|
297 |
+
[42, 43], [43, 44], [44, 45], [45, 46], [46, 47], [47, 42],
|
298 |
+
[48, 49], [49, 50], [50, 51], [51, 52], [52, 53], [53, 54],
|
299 |
+
[54, 55], [55, 56], [56, 57], [57, 58], [58, 59], [59, 48],
|
300 |
+
[60, 61], [61, 62], [62, 63], [63, 64], [64, 65], [65, 66],
|
301 |
+
[66, 67], [67, 60]
|
302 |
+
],
|
303 |
+
}
|
304 |
+
|
305 |
+
|
306 |
+
class Face(object):
|
307 |
+
"""
|
308 |
+
The OpenPose face landmark detector model.
|
309 |
+
|
310 |
+
Args:
|
311 |
+
inference_size: set the size of the inference image size, suggested:
|
312 |
+
368, 736, 1312, default 736
|
313 |
+
gaussian_sigma: blur the heatmaps, default 2.5
|
314 |
+
heatmap_peak_thresh: return landmark if over threshold, default 0.1
|
315 |
+
|
316 |
+
"""
|
317 |
+
def __init__(self, face_model_path,
|
318 |
+
inference_size=None,
|
319 |
+
gaussian_sigma=None,
|
320 |
+
heatmap_peak_thresh=None):
|
321 |
+
self.inference_size = inference_size or params["inference_img_size"]
|
322 |
+
self.sigma = gaussian_sigma or params['gaussian_sigma']
|
323 |
+
self.threshold = heatmap_peak_thresh or params["heatmap_peak_thresh"]
|
324 |
+
self.model = FaceNet()
|
325 |
+
self.model.load_state_dict(torch.load(face_model_path))
|
326 |
+
self.model.eval()
|
327 |
+
|
328 |
+
def to(self, device):
|
329 |
+
self.model.to(device)
|
330 |
+
return self
|
331 |
+
|
332 |
+
def __call__(self, face_img):
|
333 |
+
device = next(iter(self.model.parameters())).device
|
334 |
+
H, W, C = face_img.shape
|
335 |
+
|
336 |
+
w_size = 384
|
337 |
+
x_data = torch.from_numpy(util.smart_resize(face_img, (w_size, w_size))).permute([2, 0, 1]) / 256.0 - 0.5
|
338 |
+
|
339 |
+
x_data = x_data.to(device)
|
340 |
+
|
341 |
+
with torch.no_grad():
|
342 |
+
hs = self.model(x_data[None, ...])
|
343 |
+
heatmaps = F.interpolate(
|
344 |
+
hs[-1],
|
345 |
+
(H, W),
|
346 |
+
mode='bilinear', align_corners=True).cpu().numpy()[0]
|
347 |
+
return heatmaps
|
348 |
+
|
349 |
+
def compute_peaks_from_heatmaps(self, heatmaps):
|
350 |
+
all_peaks = []
|
351 |
+
for part in range(heatmaps.shape[0]):
|
352 |
+
map_ori = heatmaps[part].copy()
|
353 |
+
binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8)
|
354 |
+
|
355 |
+
if np.sum(binary) == 0:
|
356 |
+
continue
|
357 |
+
|
358 |
+
positions = np.where(binary > 0.5)
|
359 |
+
intensities = map_ori[positions]
|
360 |
+
mi = np.argmax(intensities)
|
361 |
+
y, x = positions[0][mi], positions[1][mi]
|
362 |
+
all_peaks.append([x, y])
|
363 |
+
|
364 |
+
return np.array(all_peaks)
|
controlnet_aux/src/controlnet_aux/open_pose/hand.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from scipy.ndimage.filters import gaussian_filter
|
5 |
+
from skimage.measure import label
|
6 |
+
|
7 |
+
from . import util
|
8 |
+
from .model import handpose_model
|
9 |
+
|
10 |
+
|
11 |
+
class Hand(object):
|
12 |
+
def __init__(self, model_path):
|
13 |
+
self.model = handpose_model()
|
14 |
+
model_dict = util.transfer(self.model, torch.load(model_path))
|
15 |
+
self.model.load_state_dict(model_dict)
|
16 |
+
self.model.eval()
|
17 |
+
|
18 |
+
def to(self, device):
|
19 |
+
self.model.to(device)
|
20 |
+
return self
|
21 |
+
|
22 |
+
def __call__(self, oriImgRaw):
|
23 |
+
device = next(iter(self.model.parameters())).device
|
24 |
+
scale_search = [0.5, 1.0, 1.5, 2.0]
|
25 |
+
# scale_search = [0.5]
|
26 |
+
boxsize = 368
|
27 |
+
stride = 8
|
28 |
+
padValue = 128
|
29 |
+
thre = 0.05
|
30 |
+
multiplier = [x * boxsize for x in scale_search]
|
31 |
+
|
32 |
+
wsize = 128
|
33 |
+
heatmap_avg = np.zeros((wsize, wsize, 22))
|
34 |
+
|
35 |
+
Hr, Wr, Cr = oriImgRaw.shape
|
36 |
+
|
37 |
+
oriImg = cv2.GaussianBlur(oriImgRaw, (0, 0), 0.8)
|
38 |
+
|
39 |
+
for m in range(len(multiplier)):
|
40 |
+
scale = multiplier[m]
|
41 |
+
imageToTest = util.smart_resize(oriImg, (scale, scale))
|
42 |
+
|
43 |
+
imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
|
44 |
+
im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
|
45 |
+
im = np.ascontiguousarray(im)
|
46 |
+
|
47 |
+
data = torch.from_numpy(im).float()
|
48 |
+
data = data.to(device)
|
49 |
+
|
50 |
+
with torch.no_grad():
|
51 |
+
output = self.model(data).cpu().numpy()
|
52 |
+
|
53 |
+
# extract outputs, resize, and remove padding
|
54 |
+
heatmap = np.transpose(np.squeeze(output), (1, 2, 0)) # output 1 is heatmaps
|
55 |
+
heatmap = util.smart_resize_k(heatmap, fx=stride, fy=stride)
|
56 |
+
heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
|
57 |
+
heatmap = util.smart_resize(heatmap, (wsize, wsize))
|
58 |
+
|
59 |
+
heatmap_avg += heatmap / len(multiplier)
|
60 |
+
|
61 |
+
all_peaks = []
|
62 |
+
for part in range(21):
|
63 |
+
map_ori = heatmap_avg[:, :, part]
|
64 |
+
one_heatmap = gaussian_filter(map_ori, sigma=3)
|
65 |
+
binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
|
66 |
+
|
67 |
+
if np.sum(binary) == 0:
|
68 |
+
all_peaks.append([0, 0])
|
69 |
+
continue
|
70 |
+
label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
|
71 |
+
max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
|
72 |
+
label_img[label_img != max_index] = 0
|
73 |
+
map_ori[label_img == 0] = 0
|
74 |
+
|
75 |
+
y, x = util.npmax(map_ori)
|
76 |
+
y = int(float(y) * float(Hr) / float(wsize))
|
77 |
+
x = int(float(x) * float(Wr) / float(wsize))
|
78 |
+
all_peaks.append([x, y])
|
79 |
+
return np.array(all_peaks)
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
hand_estimation = Hand('../model/hand_pose_model.pth')
|
83 |
+
|
84 |
+
# test_image = '../images/hand.jpg'
|
85 |
+
test_image = '../images/hand.jpg'
|
86 |
+
oriImg = cv2.imread(test_image) # B,G,R order
|
87 |
+
peaks = hand_estimation(oriImg)
|
88 |
+
canvas = util.draw_handpose(oriImg, peaks, True)
|
89 |
+
cv2.imshow('', canvas)
|
90 |
+
cv2.waitKey(0)
|
controlnet_aux/src/controlnet_aux/open_pose/model.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from collections import OrderedDict
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
|
7 |
+
def make_layers(block, no_relu_layers):
|
8 |
+
layers = []
|
9 |
+
for layer_name, v in block.items():
|
10 |
+
if 'pool' in layer_name:
|
11 |
+
layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
|
12 |
+
padding=v[2])
|
13 |
+
layers.append((layer_name, layer))
|
14 |
+
else:
|
15 |
+
conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
|
16 |
+
kernel_size=v[2], stride=v[3],
|
17 |
+
padding=v[4])
|
18 |
+
layers.append((layer_name, conv2d))
|
19 |
+
if layer_name not in no_relu_layers:
|
20 |
+
layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
|
21 |
+
|
22 |
+
return nn.Sequential(OrderedDict(layers))
|
23 |
+
|
24 |
+
class bodypose_model(nn.Module):
|
25 |
+
def __init__(self):
|
26 |
+
super(bodypose_model, self).__init__()
|
27 |
+
|
28 |
+
# these layers have no relu layer
|
29 |
+
no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
|
30 |
+
'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
|
31 |
+
'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
|
32 |
+
'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
|
33 |
+
blocks = {}
|
34 |
+
block0 = OrderedDict([
|
35 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
36 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
37 |
+
('pool1_stage1', [2, 2, 0]),
|
38 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
39 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
40 |
+
('pool2_stage1', [2, 2, 0]),
|
41 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
42 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
43 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
44 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
45 |
+
('pool3_stage1', [2, 2, 0]),
|
46 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
47 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
48 |
+
('conv4_3_CPM', [512, 256, 3, 1, 1]),
|
49 |
+
('conv4_4_CPM', [256, 128, 3, 1, 1])
|
50 |
+
])
|
51 |
+
|
52 |
+
|
53 |
+
# Stage 1
|
54 |
+
block1_1 = OrderedDict([
|
55 |
+
('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
|
56 |
+
('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
|
57 |
+
('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
|
58 |
+
('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
|
59 |
+
('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
|
60 |
+
])
|
61 |
+
|
62 |
+
block1_2 = OrderedDict([
|
63 |
+
('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
|
64 |
+
('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
|
65 |
+
('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
|
66 |
+
('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
|
67 |
+
('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
|
68 |
+
])
|
69 |
+
blocks['block1_1'] = block1_1
|
70 |
+
blocks['block1_2'] = block1_2
|
71 |
+
|
72 |
+
self.model0 = make_layers(block0, no_relu_layers)
|
73 |
+
|
74 |
+
# Stages 2 - 6
|
75 |
+
for i in range(2, 7):
|
76 |
+
blocks['block%d_1' % i] = OrderedDict([
|
77 |
+
('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
|
78 |
+
('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
79 |
+
('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
80 |
+
('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
81 |
+
('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
|
82 |
+
('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
|
83 |
+
('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
|
84 |
+
])
|
85 |
+
|
86 |
+
blocks['block%d_2' % i] = OrderedDict([
|
87 |
+
('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
|
88 |
+
('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
89 |
+
('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
90 |
+
('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
91 |
+
('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
|
92 |
+
('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
|
93 |
+
('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
|
94 |
+
])
|
95 |
+
|
96 |
+
for k in blocks.keys():
|
97 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
98 |
+
|
99 |
+
self.model1_1 = blocks['block1_1']
|
100 |
+
self.model2_1 = blocks['block2_1']
|
101 |
+
self.model3_1 = blocks['block3_1']
|
102 |
+
self.model4_1 = blocks['block4_1']
|
103 |
+
self.model5_1 = blocks['block5_1']
|
104 |
+
self.model6_1 = blocks['block6_1']
|
105 |
+
|
106 |
+
self.model1_2 = blocks['block1_2']
|
107 |
+
self.model2_2 = blocks['block2_2']
|
108 |
+
self.model3_2 = blocks['block3_2']
|
109 |
+
self.model4_2 = blocks['block4_2']
|
110 |
+
self.model5_2 = blocks['block5_2']
|
111 |
+
self.model6_2 = blocks['block6_2']
|
112 |
+
|
113 |
+
|
114 |
+
def forward(self, x):
|
115 |
+
|
116 |
+
out1 = self.model0(x)
|
117 |
+
|
118 |
+
out1_1 = self.model1_1(out1)
|
119 |
+
out1_2 = self.model1_2(out1)
|
120 |
+
out2 = torch.cat([out1_1, out1_2, out1], 1)
|
121 |
+
|
122 |
+
out2_1 = self.model2_1(out2)
|
123 |
+
out2_2 = self.model2_2(out2)
|
124 |
+
out3 = torch.cat([out2_1, out2_2, out1], 1)
|
125 |
+
|
126 |
+
out3_1 = self.model3_1(out3)
|
127 |
+
out3_2 = self.model3_2(out3)
|
128 |
+
out4 = torch.cat([out3_1, out3_2, out1], 1)
|
129 |
+
|
130 |
+
out4_1 = self.model4_1(out4)
|
131 |
+
out4_2 = self.model4_2(out4)
|
132 |
+
out5 = torch.cat([out4_1, out4_2, out1], 1)
|
133 |
+
|
134 |
+
out5_1 = self.model5_1(out5)
|
135 |
+
out5_2 = self.model5_2(out5)
|
136 |
+
out6 = torch.cat([out5_1, out5_2, out1], 1)
|
137 |
+
|
138 |
+
out6_1 = self.model6_1(out6)
|
139 |
+
out6_2 = self.model6_2(out6)
|
140 |
+
|
141 |
+
return out6_1, out6_2
|
142 |
+
|
143 |
+
class handpose_model(nn.Module):
|
144 |
+
def __init__(self):
|
145 |
+
super(handpose_model, self).__init__()
|
146 |
+
|
147 |
+
# these layers have no relu layer
|
148 |
+
no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
|
149 |
+
'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
|
150 |
+
# stage 1
|
151 |
+
block1_0 = OrderedDict([
|
152 |
+
('conv1_1', [3, 64, 3, 1, 1]),
|
153 |
+
('conv1_2', [64, 64, 3, 1, 1]),
|
154 |
+
('pool1_stage1', [2, 2, 0]),
|
155 |
+
('conv2_1', [64, 128, 3, 1, 1]),
|
156 |
+
('conv2_2', [128, 128, 3, 1, 1]),
|
157 |
+
('pool2_stage1', [2, 2, 0]),
|
158 |
+
('conv3_1', [128, 256, 3, 1, 1]),
|
159 |
+
('conv3_2', [256, 256, 3, 1, 1]),
|
160 |
+
('conv3_3', [256, 256, 3, 1, 1]),
|
161 |
+
('conv3_4', [256, 256, 3, 1, 1]),
|
162 |
+
('pool3_stage1', [2, 2, 0]),
|
163 |
+
('conv4_1', [256, 512, 3, 1, 1]),
|
164 |
+
('conv4_2', [512, 512, 3, 1, 1]),
|
165 |
+
('conv4_3', [512, 512, 3, 1, 1]),
|
166 |
+
('conv4_4', [512, 512, 3, 1, 1]),
|
167 |
+
('conv5_1', [512, 512, 3, 1, 1]),
|
168 |
+
('conv5_2', [512, 512, 3, 1, 1]),
|
169 |
+
('conv5_3_CPM', [512, 128, 3, 1, 1])
|
170 |
+
])
|
171 |
+
|
172 |
+
block1_1 = OrderedDict([
|
173 |
+
('conv6_1_CPM', [128, 512, 1, 1, 0]),
|
174 |
+
('conv6_2_CPM', [512, 22, 1, 1, 0])
|
175 |
+
])
|
176 |
+
|
177 |
+
blocks = {}
|
178 |
+
blocks['block1_0'] = block1_0
|
179 |
+
blocks['block1_1'] = block1_1
|
180 |
+
|
181 |
+
# stage 2-6
|
182 |
+
for i in range(2, 7):
|
183 |
+
blocks['block%d' % i] = OrderedDict([
|
184 |
+
('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
|
185 |
+
('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
|
186 |
+
('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
|
187 |
+
('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
|
188 |
+
('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
|
189 |
+
('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
|
190 |
+
('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
|
191 |
+
])
|
192 |
+
|
193 |
+
for k in blocks.keys():
|
194 |
+
blocks[k] = make_layers(blocks[k], no_relu_layers)
|
195 |
+
|
196 |
+
self.model1_0 = blocks['block1_0']
|
197 |
+
self.model1_1 = blocks['block1_1']
|
198 |
+
self.model2 = blocks['block2']
|
199 |
+
self.model3 = blocks['block3']
|
200 |
+
self.model4 = blocks['block4']
|
201 |
+
self.model5 = blocks['block5']
|
202 |
+
self.model6 = blocks['block6']
|
203 |
+
|
204 |
+
def forward(self, x):
|
205 |
+
out1_0 = self.model1_0(x)
|
206 |
+
out1_1 = self.model1_1(out1_0)
|
207 |
+
concat_stage2 = torch.cat([out1_1, out1_0], 1)
|
208 |
+
out_stage2 = self.model2(concat_stage2)
|
209 |
+
concat_stage3 = torch.cat([out_stage2, out1_0], 1)
|
210 |
+
out_stage3 = self.model3(concat_stage3)
|
211 |
+
concat_stage4 = torch.cat([out_stage3, out1_0], 1)
|
212 |
+
out_stage4 = self.model4(concat_stage4)
|
213 |
+
concat_stage5 = torch.cat([out_stage4, out1_0], 1)
|
214 |
+
out_stage5 = self.model5(concat_stage5)
|
215 |
+
concat_stage6 = torch.cat([out_stage5, out1_0], 1)
|
216 |
+
out_stage6 = self.model6(concat_stage6)
|
217 |
+
return out_stage6
|
controlnet_aux/src/controlnet_aux/open_pose/util.py
ADDED
@@ -0,0 +1,383 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
from typing import List, Tuple, Union
|
5 |
+
|
6 |
+
from .body import BodyResult, Keypoint
|
7 |
+
|
8 |
+
eps = 0.01
|
9 |
+
|
10 |
+
|
11 |
+
def smart_resize(x, s):
|
12 |
+
Ht, Wt = s
|
13 |
+
if x.ndim == 2:
|
14 |
+
Ho, Wo = x.shape
|
15 |
+
Co = 1
|
16 |
+
else:
|
17 |
+
Ho, Wo, Co = x.shape
|
18 |
+
if Co == 3 or Co == 1:
|
19 |
+
k = float(Ht + Wt) / float(Ho + Wo)
|
20 |
+
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
|
21 |
+
else:
|
22 |
+
return np.stack([smart_resize(x[:, :, i], s) for i in range(Co)], axis=2)
|
23 |
+
|
24 |
+
|
25 |
+
def smart_resize_k(x, fx, fy):
|
26 |
+
if x.ndim == 2:
|
27 |
+
Ho, Wo = x.shape
|
28 |
+
Co = 1
|
29 |
+
else:
|
30 |
+
Ho, Wo, Co = x.shape
|
31 |
+
Ht, Wt = Ho * fy, Wo * fx
|
32 |
+
if Co == 3 or Co == 1:
|
33 |
+
k = float(Ht + Wt) / float(Ho + Wo)
|
34 |
+
return cv2.resize(x, (int(Wt), int(Ht)), interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4)
|
35 |
+
else:
|
36 |
+
return np.stack([smart_resize_k(x[:, :, i], fx, fy) for i in range(Co)], axis=2)
|
37 |
+
|
38 |
+
|
39 |
+
def padRightDownCorner(img, stride, padValue):
|
40 |
+
h = img.shape[0]
|
41 |
+
w = img.shape[1]
|
42 |
+
|
43 |
+
pad = 4 * [None]
|
44 |
+
pad[0] = 0 # up
|
45 |
+
pad[1] = 0 # left
|
46 |
+
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
|
47 |
+
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
|
48 |
+
|
49 |
+
img_padded = img
|
50 |
+
pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
|
51 |
+
img_padded = np.concatenate((pad_up, img_padded), axis=0)
|
52 |
+
pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
|
53 |
+
img_padded = np.concatenate((pad_left, img_padded), axis=1)
|
54 |
+
pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
|
55 |
+
img_padded = np.concatenate((img_padded, pad_down), axis=0)
|
56 |
+
pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
|
57 |
+
img_padded = np.concatenate((img_padded, pad_right), axis=1)
|
58 |
+
|
59 |
+
return img_padded, pad
|
60 |
+
|
61 |
+
|
62 |
+
def transfer(model, model_weights):
|
63 |
+
transfered_model_weights = {}
|
64 |
+
for weights_name in model.state_dict().keys():
|
65 |
+
transfered_model_weights[weights_name] = model_weights['.'.join(weights_name.split('.')[1:])]
|
66 |
+
return transfered_model_weights
|
67 |
+
|
68 |
+
|
69 |
+
def draw_bodypose(canvas: np.ndarray, keypoints: List[Keypoint]) -> np.ndarray:
|
70 |
+
"""
|
71 |
+
Draw keypoints and limbs representing body pose on a given canvas.
|
72 |
+
|
73 |
+
Args:
|
74 |
+
canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the body pose.
|
75 |
+
keypoints (List[Keypoint]): A list of Keypoint objects representing the body keypoints to be drawn.
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
np.ndarray: A 3D numpy array representing the modified canvas with the drawn body pose.
|
79 |
+
|
80 |
+
Note:
|
81 |
+
The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
|
82 |
+
"""
|
83 |
+
H, W, C = canvas.shape
|
84 |
+
stickwidth = 4
|
85 |
+
|
86 |
+
limbSeq = [
|
87 |
+
[2, 3], [2, 6], [3, 4], [4, 5],
|
88 |
+
[6, 7], [7, 8], [2, 9], [9, 10],
|
89 |
+
[10, 11], [2, 12], [12, 13], [13, 14],
|
90 |
+
[2, 1], [1, 15], [15, 17], [1, 16],
|
91 |
+
[16, 18],
|
92 |
+
]
|
93 |
+
|
94 |
+
colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
|
95 |
+
[0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
|
96 |
+
[170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
|
97 |
+
|
98 |
+
for (k1_index, k2_index), color in zip(limbSeq, colors):
|
99 |
+
keypoint1 = keypoints[k1_index - 1]
|
100 |
+
keypoint2 = keypoints[k2_index - 1]
|
101 |
+
|
102 |
+
if keypoint1 is None or keypoint2 is None:
|
103 |
+
continue
|
104 |
+
|
105 |
+
Y = np.array([keypoint1.x, keypoint2.x]) * float(W)
|
106 |
+
X = np.array([keypoint1.y, keypoint2.y]) * float(H)
|
107 |
+
mX = np.mean(X)
|
108 |
+
mY = np.mean(Y)
|
109 |
+
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
110 |
+
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
111 |
+
polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
112 |
+
cv2.fillConvexPoly(canvas, polygon, [int(float(c) * 0.6) for c in color])
|
113 |
+
|
114 |
+
for keypoint, color in zip(keypoints, colors):
|
115 |
+
if keypoint is None:
|
116 |
+
continue
|
117 |
+
|
118 |
+
x, y = keypoint.x, keypoint.y
|
119 |
+
x = int(x * W)
|
120 |
+
y = int(y * H)
|
121 |
+
cv2.circle(canvas, (int(x), int(y)), 4, color, thickness=-1)
|
122 |
+
|
123 |
+
return canvas
|
124 |
+
|
125 |
+
|
126 |
+
def draw_handpose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
|
127 |
+
import matplotlib
|
128 |
+
"""
|
129 |
+
Draw keypoints and connections representing hand pose on a given canvas.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the hand pose.
|
133 |
+
keypoints (List[Keypoint]| None): A list of Keypoint objects representing the hand keypoints to be drawn
|
134 |
+
or None if no keypoints are present.
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
np.ndarray: A 3D numpy array representing the modified canvas with the drawn hand pose.
|
138 |
+
|
139 |
+
Note:
|
140 |
+
The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
|
141 |
+
"""
|
142 |
+
if not keypoints:
|
143 |
+
return canvas
|
144 |
+
|
145 |
+
H, W, C = canvas.shape
|
146 |
+
|
147 |
+
edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
|
148 |
+
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
|
149 |
+
|
150 |
+
for ie, (e1, e2) in enumerate(edges):
|
151 |
+
k1 = keypoints[e1]
|
152 |
+
k2 = keypoints[e2]
|
153 |
+
if k1 is None or k2 is None:
|
154 |
+
continue
|
155 |
+
|
156 |
+
x1 = int(k1.x * W)
|
157 |
+
y1 = int(k1.y * H)
|
158 |
+
x2 = int(k2.x * W)
|
159 |
+
y2 = int(k2.y * H)
|
160 |
+
if x1 > eps and y1 > eps and x2 > eps and y2 > eps:
|
161 |
+
cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie / float(len(edges)), 1.0, 1.0]) * 255, thickness=2)
|
162 |
+
|
163 |
+
for keypoint in keypoints:
|
164 |
+
x, y = keypoint.x, keypoint.y
|
165 |
+
x = int(x * W)
|
166 |
+
y = int(y * H)
|
167 |
+
if x > eps and y > eps:
|
168 |
+
cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
|
169 |
+
return canvas
|
170 |
+
|
171 |
+
|
172 |
+
def draw_facepose(canvas: np.ndarray, keypoints: Union[List[Keypoint], None]) -> np.ndarray:
|
173 |
+
"""
|
174 |
+
Draw keypoints representing face pose on a given canvas.
|
175 |
+
|
176 |
+
Args:
|
177 |
+
canvas (np.ndarray): A 3D numpy array representing the canvas (image) on which to draw the face pose.
|
178 |
+
keypoints (List[Keypoint]| None): A list of Keypoint objects representing the face keypoints to be drawn
|
179 |
+
or None if no keypoints are present.
|
180 |
+
|
181 |
+
Returns:
|
182 |
+
np.ndarray: A 3D numpy array representing the modified canvas with the drawn face pose.
|
183 |
+
|
184 |
+
Note:
|
185 |
+
The function expects the x and y coordinates of the keypoints to be normalized between 0 and 1.
|
186 |
+
"""
|
187 |
+
if not keypoints:
|
188 |
+
return canvas
|
189 |
+
|
190 |
+
H, W, C = canvas.shape
|
191 |
+
for keypoint in keypoints:
|
192 |
+
x, y = keypoint.x, keypoint.y
|
193 |
+
x = int(x * W)
|
194 |
+
y = int(y * H)
|
195 |
+
if x > eps and y > eps:
|
196 |
+
cv2.circle(canvas, (x, y), 3, (255, 255, 255), thickness=-1)
|
197 |
+
return canvas
|
198 |
+
|
199 |
+
|
200 |
+
# detect hand according to body pose keypoints
|
201 |
+
# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
|
202 |
+
def handDetect(body: BodyResult, oriImg) -> List[Tuple[int, int, int, bool]]:
|
203 |
+
"""
|
204 |
+
Detect hands in the input body pose keypoints and calculate the bounding box for each hand.
|
205 |
+
|
206 |
+
Args:
|
207 |
+
body (BodyResult): A BodyResult object containing the detected body pose keypoints.
|
208 |
+
oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
|
209 |
+
|
210 |
+
Returns:
|
211 |
+
List[Tuple[int, int, int, bool]]: A list of tuples, each containing the coordinates (x, y) of the top-left
|
212 |
+
corner of the bounding box, the width (height) of the bounding box, and
|
213 |
+
a boolean flag indicating whether the hand is a left hand (True) or a
|
214 |
+
right hand (False).
|
215 |
+
|
216 |
+
Notes:
|
217 |
+
- The width and height of the bounding boxes are equal since the network requires squared input.
|
218 |
+
- The minimum bounding box size is 20 pixels.
|
219 |
+
"""
|
220 |
+
ratioWristElbow = 0.33
|
221 |
+
detect_result = []
|
222 |
+
image_height, image_width = oriImg.shape[0:2]
|
223 |
+
|
224 |
+
keypoints = body.keypoints
|
225 |
+
# right hand: wrist 4, elbow 3, shoulder 2
|
226 |
+
# left hand: wrist 7, elbow 6, shoulder 5
|
227 |
+
left_shoulder = keypoints[5]
|
228 |
+
left_elbow = keypoints[6]
|
229 |
+
left_wrist = keypoints[7]
|
230 |
+
right_shoulder = keypoints[2]
|
231 |
+
right_elbow = keypoints[3]
|
232 |
+
right_wrist = keypoints[4]
|
233 |
+
|
234 |
+
# if any of three not detected
|
235 |
+
has_left = all(keypoint is not None for keypoint in (left_shoulder, left_elbow, left_wrist))
|
236 |
+
has_right = all(keypoint is not None for keypoint in (right_shoulder, right_elbow, right_wrist))
|
237 |
+
if not (has_left or has_right):
|
238 |
+
return []
|
239 |
+
|
240 |
+
hands = []
|
241 |
+
#left hand
|
242 |
+
if has_left:
|
243 |
+
hands.append([
|
244 |
+
left_shoulder.x, left_shoulder.y,
|
245 |
+
left_elbow.x, left_elbow.y,
|
246 |
+
left_wrist.x, left_wrist.y,
|
247 |
+
True
|
248 |
+
])
|
249 |
+
# right hand
|
250 |
+
if has_right:
|
251 |
+
hands.append([
|
252 |
+
right_shoulder.x, right_shoulder.y,
|
253 |
+
right_elbow.x, right_elbow.y,
|
254 |
+
right_wrist.x, right_wrist.y,
|
255 |
+
False
|
256 |
+
])
|
257 |
+
|
258 |
+
for x1, y1, x2, y2, x3, y3, is_left in hands:
|
259 |
+
# pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
|
260 |
+
# handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
|
261 |
+
# handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
|
262 |
+
# const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
|
263 |
+
# const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
|
264 |
+
# handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
|
265 |
+
x = x3 + ratioWristElbow * (x3 - x2)
|
266 |
+
y = y3 + ratioWristElbow * (y3 - y2)
|
267 |
+
distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
|
268 |
+
distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
269 |
+
width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
|
270 |
+
# x-y refers to the center --> offset to topLeft point
|
271 |
+
# handRectangle.x -= handRectangle.width / 2.f;
|
272 |
+
# handRectangle.y -= handRectangle.height / 2.f;
|
273 |
+
x -= width / 2
|
274 |
+
y -= width / 2 # width = height
|
275 |
+
# overflow the image
|
276 |
+
if x < 0: x = 0
|
277 |
+
if y < 0: y = 0
|
278 |
+
width1 = width
|
279 |
+
width2 = width
|
280 |
+
if x + width > image_width: width1 = image_width - x
|
281 |
+
if y + width > image_height: width2 = image_height - y
|
282 |
+
width = min(width1, width2)
|
283 |
+
# the max hand box value is 20 pixels
|
284 |
+
if width >= 20:
|
285 |
+
detect_result.append((int(x), int(y), int(width), is_left))
|
286 |
+
|
287 |
+
'''
|
288 |
+
return value: [[x, y, w, True if left hand else False]].
|
289 |
+
width=height since the network require squared input.
|
290 |
+
x, y is the coordinate of top left
|
291 |
+
'''
|
292 |
+
return detect_result
|
293 |
+
|
294 |
+
|
295 |
+
# Written by Lvmin
|
296 |
+
def faceDetect(body: BodyResult, oriImg) -> Union[Tuple[int, int, int], None]:
|
297 |
+
"""
|
298 |
+
Detect the face in the input body pose keypoints and calculate the bounding box for the face.
|
299 |
+
|
300 |
+
Args:
|
301 |
+
body (BodyResult): A BodyResult object containing the detected body pose keypoints.
|
302 |
+
oriImg (numpy.ndarray): A 3D numpy array representing the original input image.
|
303 |
+
|
304 |
+
Returns:
|
305 |
+
Tuple[int, int, int] | None: A tuple containing the coordinates (x, y) of the top-left corner of the
|
306 |
+
bounding box and the width (height) of the bounding box, or None if the
|
307 |
+
face is not detected or the bounding box width is less than 20 pixels.
|
308 |
+
|
309 |
+
Notes:
|
310 |
+
- The width and height of the bounding box are equal.
|
311 |
+
- The minimum bounding box size is 20 pixels.
|
312 |
+
"""
|
313 |
+
# left right eye ear 14 15 16 17
|
314 |
+
image_height, image_width = oriImg.shape[0:2]
|
315 |
+
|
316 |
+
keypoints = body.keypoints
|
317 |
+
head = keypoints[0]
|
318 |
+
left_eye = keypoints[14]
|
319 |
+
right_eye = keypoints[15]
|
320 |
+
left_ear = keypoints[16]
|
321 |
+
right_ear = keypoints[17]
|
322 |
+
|
323 |
+
if head is None or all(keypoint is None for keypoint in (left_eye, right_eye, left_ear, right_ear)):
|
324 |
+
return None
|
325 |
+
|
326 |
+
width = 0.0
|
327 |
+
x0, y0 = head.x, head.y
|
328 |
+
|
329 |
+
if left_eye is not None:
|
330 |
+
x1, y1 = left_eye.x, left_eye.y
|
331 |
+
d = max(abs(x0 - x1), abs(y0 - y1))
|
332 |
+
width = max(width, d * 3.0)
|
333 |
+
|
334 |
+
if right_eye is not None:
|
335 |
+
x1, y1 = right_eye.x, right_eye.y
|
336 |
+
d = max(abs(x0 - x1), abs(y0 - y1))
|
337 |
+
width = max(width, d * 3.0)
|
338 |
+
|
339 |
+
if left_ear is not None:
|
340 |
+
x1, y1 = left_ear.x, left_ear.y
|
341 |
+
d = max(abs(x0 - x1), abs(y0 - y1))
|
342 |
+
width = max(width, d * 1.5)
|
343 |
+
|
344 |
+
if right_ear is not None:
|
345 |
+
x1, y1 = right_ear.x, right_ear.y
|
346 |
+
d = max(abs(x0 - x1), abs(y0 - y1))
|
347 |
+
width = max(width, d * 1.5)
|
348 |
+
|
349 |
+
x, y = x0, y0
|
350 |
+
|
351 |
+
x -= width
|
352 |
+
y -= width
|
353 |
+
|
354 |
+
if x < 0:
|
355 |
+
x = 0
|
356 |
+
|
357 |
+
if y < 0:
|
358 |
+
y = 0
|
359 |
+
|
360 |
+
width1 = width * 2
|
361 |
+
width2 = width * 2
|
362 |
+
|
363 |
+
if x + width > image_width:
|
364 |
+
width1 = image_width - x
|
365 |
+
|
366 |
+
if y + width > image_height:
|
367 |
+
width2 = image_height - y
|
368 |
+
|
369 |
+
width = min(width1, width2)
|
370 |
+
|
371 |
+
if width >= 20:
|
372 |
+
return int(x), int(y), int(width)
|
373 |
+
else:
|
374 |
+
return None
|
375 |
+
|
376 |
+
|
377 |
+
# get max index of 2d array
|
378 |
+
def npmax(array):
|
379 |
+
arrayindex = array.argmax(1)
|
380 |
+
arrayvalue = array.max(1)
|
381 |
+
i = arrayvalue.argmax()
|
382 |
+
j = arrayindex[i]
|
383 |
+
return i, j
|
controlnet_aux/src/controlnet_aux/util.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
|
8 |
+
annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts')
|
9 |
+
|
10 |
+
|
11 |
+
def HWC3(x):
|
12 |
+
assert x.dtype == np.uint8
|
13 |
+
if x.ndim == 2:
|
14 |
+
x = x[:, :, None]
|
15 |
+
assert x.ndim == 3
|
16 |
+
H, W, C = x.shape
|
17 |
+
assert C == 1 or C == 3 or C == 4
|
18 |
+
if C == 3:
|
19 |
+
return x
|
20 |
+
if C == 1:
|
21 |
+
return np.concatenate([x, x, x], axis=2)
|
22 |
+
if C == 4:
|
23 |
+
color = x[:, :, 0:3].astype(np.float32)
|
24 |
+
alpha = x[:, :, 3:4].astype(np.float32) / 255.0
|
25 |
+
y = color * alpha + 255.0 * (1.0 - alpha)
|
26 |
+
y = y.clip(0, 255).astype(np.uint8)
|
27 |
+
return y
|
28 |
+
|
29 |
+
|
30 |
+
def make_noise_disk(H, W, C, F):
|
31 |
+
noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C))
|
32 |
+
noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC)
|
33 |
+
noise = noise[F: F + H, F: F + W]
|
34 |
+
noise -= np.min(noise)
|
35 |
+
noise /= np.max(noise)
|
36 |
+
if C == 1:
|
37 |
+
noise = noise[:, :, None]
|
38 |
+
return noise
|
39 |
+
|
40 |
+
|
41 |
+
def nms(x, t, s):
|
42 |
+
x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
|
43 |
+
|
44 |
+
f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
|
45 |
+
f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
|
46 |
+
f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
|
47 |
+
f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
|
48 |
+
|
49 |
+
y = np.zeros_like(x)
|
50 |
+
|
51 |
+
for f in [f1, f2, f3, f4]:
|
52 |
+
np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
|
53 |
+
|
54 |
+
z = np.zeros_like(y, dtype=np.uint8)
|
55 |
+
z[y > t] = 255
|
56 |
+
return z
|
57 |
+
|
58 |
+
def min_max_norm(x):
|
59 |
+
x -= np.min(x)
|
60 |
+
x /= np.maximum(np.max(x), 1e-5)
|
61 |
+
return x
|
62 |
+
|
63 |
+
|
64 |
+
def safe_step(x, step=2):
|
65 |
+
y = x.astype(np.float32) * float(step + 1)
|
66 |
+
y = y.astype(np.int32).astype(np.float32) / float(step)
|
67 |
+
return y
|
68 |
+
|
69 |
+
|
70 |
+
def img2mask(img, H, W, low=10, high=90):
|
71 |
+
assert img.ndim == 3 or img.ndim == 2
|
72 |
+
assert img.dtype == np.uint8
|
73 |
+
|
74 |
+
if img.ndim == 3:
|
75 |
+
y = img[:, :, random.randrange(0, img.shape[2])]
|
76 |
+
else:
|
77 |
+
y = img
|
78 |
+
|
79 |
+
y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC)
|
80 |
+
|
81 |
+
if random.uniform(0, 1) < 0.5:
|
82 |
+
y = 255 - y
|
83 |
+
|
84 |
+
return y < np.percentile(y, random.randrange(low, high))
|
85 |
+
|
86 |
+
|
87 |
+
def resize_image(input_image, resolution):
|
88 |
+
H, W, C = input_image.shape
|
89 |
+
H = float(H)
|
90 |
+
W = float(W)
|
91 |
+
k = float(resolution) / min(H, W)
|
92 |
+
H *= k
|
93 |
+
W *= k
|
94 |
+
H = int(np.round(H / 64.0)) * 64
|
95 |
+
W = int(np.round(W / 64.0)) * 64
|
96 |
+
img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA)
|
97 |
+
return img
|
98 |
+
|
99 |
+
|
100 |
+
def torch_gc():
|
101 |
+
if torch.cuda.is_available():
|
102 |
+
torch.cuda.empty_cache()
|
103 |
+
torch.cuda.ipc_collect()
|
104 |
+
|
105 |
+
|
106 |
+
def ade_palette():
|
107 |
+
"""ADE20K palette that maps each class to RGB values."""
|
108 |
+
return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50],
|
109 |
+
[4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255],
|
110 |
+
[230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7],
|
111 |
+
[150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82],
|
112 |
+
[143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3],
|
113 |
+
[0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255],
|
114 |
+
[255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220],
|
115 |
+
[255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224],
|
116 |
+
[255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255],
|
117 |
+
[224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7],
|
118 |
+
[255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153],
|
119 |
+
[6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255],
|
120 |
+
[140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0],
|
121 |
+
[255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255],
|
122 |
+
[255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255],
|
123 |
+
[11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255],
|
124 |
+
[0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0],
|
125 |
+
[255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0],
|
126 |
+
[0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255],
|
127 |
+
[173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255],
|
128 |
+
[255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20],
|
129 |
+
[255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255],
|
130 |
+
[255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255],
|
131 |
+
[0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255],
|
132 |
+
[0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0],
|
133 |
+
[143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0],
|
134 |
+
[8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255],
|
135 |
+
[255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112],
|
136 |
+
[92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160],
|
137 |
+
[163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163],
|
138 |
+
[255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0],
|
139 |
+
[255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0],
|
140 |
+
[10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255],
|
141 |
+
[255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204],
|
142 |
+
[41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255],
|
143 |
+
[71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255],
|
144 |
+
[184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194],
|
145 |
+
[102, 255, 0], [92, 0, 255]]
|
146 |
+
|
controlnet_aux/tests/test_controlnet_aux.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
from io import BytesIO
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import pytest
|
7 |
+
import requests
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
from controlnet_aux import (CannyDetector, ContentShuffleDetector, HEDdetector,
|
11 |
+
LeresDetector, LineartAnimeDetector,
|
12 |
+
LineartDetector, MediapipeFaceDetector,
|
13 |
+
MidasDetector, MLSDdetector, NormalBaeDetector,
|
14 |
+
OpenposeDetector, PidiNetDetector, SamDetector,
|
15 |
+
ZoeDetector, DWposeDetector)
|
16 |
+
|
17 |
+
OUTPUT_DIR = "tests/outputs"
|
18 |
+
|
19 |
+
def output(name, img):
|
20 |
+
img.save(os.path.join(OUTPUT_DIR, "{:s}.png".format(name)))
|
21 |
+
|
22 |
+
def common(name, processor, img):
|
23 |
+
output(name, processor(img))
|
24 |
+
output(name + "_pil_np", Image.fromarray(processor(img, output_type="np")))
|
25 |
+
output(name + "_np_np", Image.fromarray(processor(np.array(img, dtype=np.uint8), output_type="np")))
|
26 |
+
output(name + "_np_pil", processor(np.array(img, dtype=np.uint8), output_type="pil"))
|
27 |
+
output(name + "_scaled", processor(img, detect_resolution=640, image_resolution=768))
|
28 |
+
|
29 |
+
def return_pil(name, processor, img):
|
30 |
+
output(name + "_pil_false", Image.fromarray(processor(img, return_pil=False)))
|
31 |
+
output(name + "_pil_true", processor(img, return_pil=True))
|
32 |
+
|
33 |
+
@pytest.fixture(scope="module")
|
34 |
+
def img():
|
35 |
+
if os.path.exists(OUTPUT_DIR):
|
36 |
+
shutil.rmtree(OUTPUT_DIR)
|
37 |
+
os.mkdir(OUTPUT_DIR)
|
38 |
+
url = "https://huggingface.co/lllyasviel/sd-controlnet-openpose/resolve/main/images/pose.png"
|
39 |
+
response = requests.get(url)
|
40 |
+
img = Image.open(BytesIO(response.content)).convert("RGB").resize((512, 512))
|
41 |
+
return img
|
42 |
+
|
43 |
+
def test_canny(img):
|
44 |
+
canny = CannyDetector()
|
45 |
+
common("canny", canny, img)
|
46 |
+
output("canny_img", canny(img=img))
|
47 |
+
|
48 |
+
def test_hed(img):
|
49 |
+
hed = HEDdetector.from_pretrained("lllyasviel/Annotators")
|
50 |
+
common("hed", hed, img)
|
51 |
+
return_pil("hed", hed, img)
|
52 |
+
output("hed_safe", hed(img, safe=True))
|
53 |
+
output("hed_scribble", hed(img, scribble=True))
|
54 |
+
|
55 |
+
def test_leres(img):
|
56 |
+
leres = LeresDetector.from_pretrained("lllyasviel/Annotators")
|
57 |
+
common("leres", leres, img)
|
58 |
+
output("leres_boost", leres(img, boost=True))
|
59 |
+
|
60 |
+
def test_lineart(img):
|
61 |
+
lineart = LineartDetector.from_pretrained("lllyasviel/Annotators")
|
62 |
+
common("lineart", lineart, img)
|
63 |
+
return_pil("lineart", lineart, img)
|
64 |
+
output("lineart_coarse", lineart(img, coarse=True))
|
65 |
+
|
66 |
+
def test_lineart_anime(img):
|
67 |
+
lineart_anime = LineartAnimeDetector.from_pretrained("lllyasviel/Annotators")
|
68 |
+
common("lineart_anime", lineart_anime, img)
|
69 |
+
return_pil("lineart_anime", lineart_anime, img)
|
70 |
+
|
71 |
+
def test_mediapipe_face(img):
|
72 |
+
mediapipe = MediapipeFaceDetector()
|
73 |
+
common("mediapipe", mediapipe, img)
|
74 |
+
output("mediapipe_image", mediapipe(image=img))
|
75 |
+
|
76 |
+
def test_midas(img):
|
77 |
+
midas = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
78 |
+
common("midas", midas, img)
|
79 |
+
output("midas_normal", midas(img, depth_and_normal=True)[1])
|
80 |
+
|
81 |
+
def test_mlsd(img):
|
82 |
+
mlsd = MLSDdetector.from_pretrained("lllyasviel/Annotators")
|
83 |
+
common("mlsd", mlsd, img)
|
84 |
+
return_pil("mlsd", mlsd, img)
|
85 |
+
|
86 |
+
def test_normalbae(img):
|
87 |
+
normal_bae = NormalBaeDetector.from_pretrained("lllyasviel/Annotators")
|
88 |
+
common("normal_bae", normal_bae, img)
|
89 |
+
return_pil("normal_bae", normal_bae, img)
|
90 |
+
|
91 |
+
def test_openpose(img):
|
92 |
+
openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
|
93 |
+
common("openpose", openpose, img)
|
94 |
+
return_pil("openpose", openpose, img)
|
95 |
+
output("openpose_hand_and_face_false", openpose(img, hand_and_face=False))
|
96 |
+
output("openpose_hand_and_face_true", openpose(img, hand_and_face=True))
|
97 |
+
output("openpose_face", openpose(img, include_body=True, include_hand=False, include_face=True))
|
98 |
+
output("openpose_faceonly", openpose(img, include_body=False, include_hand=False, include_face=True))
|
99 |
+
output("openpose_full", openpose(img, include_body=True, include_hand=True, include_face=True))
|
100 |
+
output("openpose_hand", openpose(img, include_body=True, include_hand=True, include_face=False))
|
101 |
+
|
102 |
+
def test_pidi(img):
|
103 |
+
pidi = PidiNetDetector.from_pretrained("lllyasviel/Annotators")
|
104 |
+
common("pidi", pidi, img)
|
105 |
+
return_pil("pidi", pidi, img)
|
106 |
+
output("pidi_safe", pidi(img, safe=True))
|
107 |
+
output("pidi_scribble", pidi(img, scribble=True))
|
108 |
+
|
109 |
+
def test_sam(img):
|
110 |
+
sam = SamDetector.from_pretrained("ybelkada/segment-anything", subfolder="checkpoints")
|
111 |
+
common("sam", sam, img)
|
112 |
+
output("sam_image", sam(image=img))
|
113 |
+
|
114 |
+
def test_shuffle(img):
|
115 |
+
shuffle = ContentShuffleDetector()
|
116 |
+
common("shuffle", shuffle, img)
|
117 |
+
return_pil("shuffle", shuffle, img)
|
118 |
+
|
119 |
+
def test_zoe(img):
|
120 |
+
zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
|
121 |
+
common("zoe", zoe, img)
|
122 |
+
|
123 |
+
def test_dwpose(img):
|
124 |
+
dwpose = DWposeDetector()
|
125 |
+
common("dwpose", dwpose, img)
|
126 |
+
return_pil("dwpose", dwpose, img)
|