Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .dockerignore +40 -0
- .gitattributes +13 -0
- .gitignore +186 -0
- LICENSE +201 -0
- README.md +3 -9
- Untitled.ipynb +432 -0
- checkpoints/ControlNetModel/config.json +57 -0
- checkpoints/ControlNetModel/diffusion_pytorch_model.safetensors +3 -0
- checkpoints/ip-adapter.bin +3 -0
- checkpoints/pytorch_lora_weights.safetensors +3 -0
- cog.yaml +40 -0
- cog/README.md +60 -0
- cog/predict.py +756 -0
- feature-extractor/preprocessor_config.json +27 -0
- generated_images/20240723_053704_668578_0.png +3 -0
- generated_images/20240723_053801_148984_0.png +3 -0
- generated_images/20240723_053853_022841_0.png +3 -0
- generated_images/20240723_053948_468290_0.png +3 -0
- generated_images/20240723_054025_692605_0.png +3 -0
- generated_images/20240723_054124_697176_0.png +3 -0
- generation_log.csv +7 -0
- gradio_demo/aaa.py +957 -0
- gradio_demo/app-multicontrolnet.py +670 -0
- gradio_demo/app.py +656 -0
- gradio_demo/app1.py +434 -0
- gradio_demo/background.jpg +0 -0
- gradio_demo/controlnet_util.py +39 -0
- gradio_demo/demo.py +369 -0
- gradio_demo/download_models.py +27 -0
- gradio_demo/logo.png +0 -0
- gradio_demo/logo1.png +0 -0
- gradio_demo/model_util.py +472 -0
- gradio_demo/preprocess.py +232 -0
- gradio_demo/requirements.txt +19 -0
- gradio_demo/style_template.py +136 -0
- gradio_demo/test.py +400 -0
- gradio_demo/watermark.png +0 -0
- image_data.csv +15 -0
- images/aa.ll_gallery1.png +3 -0
- images/aa.ll_gallery2.png +0 -0
- images/aa.ll_gallery3.png +0 -0
- images/aa.ll_gallery4.png +0 -0
- images/heeral@img_gallery1.png +0 -0
- images/heeral@img_gallery2.png +0 -0
- images/heeral@img_gallery3.png +0 -0
- images/heeral@img_gallery4.png +0 -0
- images/kajal@img_gallery1.png +0 -0
- images/kajal@img_gallery2.png +0 -0
- images/kajal@img_gallery3.png +0 -0
- images/kajal@img_gallery4.png +0 -0
.dockerignore
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The .dockerignore file excludes files from the container build process.
|
2 |
+
#
|
3 |
+
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
4 |
+
|
5 |
+
# Replicate
|
6 |
+
/safety-cache/
|
7 |
+
/gradio_cached_examples/
|
8 |
+
*.mp4
|
9 |
+
*.pth
|
10 |
+
*.pt
|
11 |
+
*.bin
|
12 |
+
*.ckpt
|
13 |
+
*.onnx
|
14 |
+
*.tar
|
15 |
+
*.tar.gz
|
16 |
+
*.h5
|
17 |
+
*.pb
|
18 |
+
*.caffemodel
|
19 |
+
*.weights
|
20 |
+
*.tar
|
21 |
+
*.jpg
|
22 |
+
*.jpeg
|
23 |
+
*.png
|
24 |
+
*.webp
|
25 |
+
.vscode
|
26 |
+
|
27 |
+
# Exclude Git files
|
28 |
+
.git
|
29 |
+
.github
|
30 |
+
.gitignore
|
31 |
+
|
32 |
+
# Exclude Python cache files
|
33 |
+
__pycache__
|
34 |
+
.pytest_cache/
|
35 |
+
.mypy_cache
|
36 |
+
.pytest_cache
|
37 |
+
.ruff_cache
|
38 |
+
|
39 |
+
# Exclude Python virtual environment
|
40 |
+
/venv
|
.gitattributes
CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
generated_images/20240723_053704_668578_0.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
generated_images/20240723_053801_148984_0.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
generated_images/20240723_053853_022841_0.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
generated_images/20240723_053948_468290_0.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
generated_images/20240723_054025_692605_0.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
generated_images/20240723_054124_697176_0.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
images/aa.ll_gallery1.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
images/yashvi_gallery1.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
images/yashvi_gallery4.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
images/[email protected]_gallery1.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
images/[email protected]_gallery2.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
images/[email protected]_gallery3.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
images/[email protected]_gallery4.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
huggingface/
|
162 |
+
|
163 |
+
# Cog
|
164 |
+
/.cog/
|
165 |
+
/safety-cache/
|
166 |
+
*.tar
|
167 |
+
.vscode
|
168 |
+
gradio_cached_examples
|
169 |
+
cog/test_batchsize.py
|
170 |
+
input.png
|
171 |
+
output_*.png
|
172 |
+
output.*.png
|
173 |
+
output_image_*.png
|
174 |
+
output_image.*.png
|
175 |
+
output_*.webp
|
176 |
+
output.*.webp
|
177 |
+
output_image_*.webp
|
178 |
+
output_image.*.webp
|
179 |
+
output_*.jpg
|
180 |
+
output.*.jpg
|
181 |
+
output_image_*.jpg
|
182 |
+
output_image.*.jpg
|
183 |
+
output_*.jpeg
|
184 |
+
output.*.jpeg
|
185 |
+
output_image_*.jpeg
|
186 |
+
output_image.*.jpeg
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title: IDfy
|
3 |
-
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: IDfy-Avatarify
|
3 |
+
app_file: gradio_demo/app.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
+
sdk_version: 4.38.1
|
|
|
|
|
6 |
---
|
|
|
|
Untitled.ipynb
ADDED
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"id": "82c97d2c-16bf-4b2c-b16c-d9f7a8a5b12f",
|
7 |
+
"metadata": {
|
8 |
+
"tags": []
|
9 |
+
},
|
10 |
+
"outputs": [
|
11 |
+
{
|
12 |
+
"name": "stdout",
|
13 |
+
"output_type": "stream",
|
14 |
+
"text": [
|
15 |
+
"Collecting diffusers==0.25.1 (from -r gradio_demo/requirements.txt (line 1))\n",
|
16 |
+
" Downloading diffusers-0.25.1-py3-none-any.whl.metadata (19 kB)\n",
|
17 |
+
"Requirement already satisfied: torch==2.0.0 in /opt/conda/lib/python3.10/site-packages (from -r gradio_demo/requirements.txt (line 2)) (2.0.0+cu118)\n",
|
18 |
+
"Requirement already satisfied: torchvision==0.15.1 in /opt/conda/lib/python3.10/site-packages (from -r gradio_demo/requirements.txt (line 3)) (0.15.1+cu118)\n",
|
19 |
+
"Collecting transformers==4.37.1 (from -r gradio_demo/requirements.txt (line 4))\n",
|
20 |
+
" Downloading transformers-4.37.1-py3-none-any.whl.metadata (129 kB)\n",
|
21 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.4/129.4 kB\u001b[0m \u001b[31m1.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
22 |
+
"\u001b[?25hCollecting accelerate (from -r gradio_demo/requirements.txt (line 5))\n",
|
23 |
+
" Downloading accelerate-0.32.1-py3-none-any.whl.metadata (18 kB)\n",
|
24 |
+
"Collecting safetensors (from -r gradio_demo/requirements.txt (line 6))\n",
|
25 |
+
" Downloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
|
26 |
+
"Collecting einops (from -r gradio_demo/requirements.txt (line 7))\n",
|
27 |
+
" Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)\n",
|
28 |
+
"Collecting onnxruntime-gpu (from -r gradio_demo/requirements.txt (line 8))\n",
|
29 |
+
" Downloading onnxruntime_gpu-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.4 kB)\n",
|
30 |
+
"Collecting spaces==0.19.4 (from -r gradio_demo/requirements.txt (line 9))\n",
|
31 |
+
" Downloading spaces-0.19.4-py3-none-any.whl.metadata (972 bytes)\n",
|
32 |
+
"Collecting omegaconf (from -r gradio_demo/requirements.txt (line 10))\n",
|
33 |
+
" Downloading omegaconf-2.3.0-py3-none-any.whl.metadata (3.9 kB)\n",
|
34 |
+
"Collecting peft (from -r gradio_demo/requirements.txt (line 11))\n",
|
35 |
+
" Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)\n",
|
36 |
+
"Collecting huggingface-hub==0.20.2 (from -r gradio_demo/requirements.txt (line 12))\n",
|
37 |
+
" Downloading huggingface_hub-0.20.2-py3-none-any.whl.metadata (12 kB)\n",
|
38 |
+
"Collecting opencv-python (from -r gradio_demo/requirements.txt (line 13))\n",
|
39 |
+
" Downloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
|
40 |
+
"Collecting insightface (from -r gradio_demo/requirements.txt (line 14))\n",
|
41 |
+
" Downloading insightface-0.7.3.tar.gz (439 kB)\n",
|
42 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m439.5/439.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
|
43 |
+
"\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n",
|
44 |
+
"\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
|
45 |
+
"\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
|
46 |
+
"\u001b[?25hCollecting gradio (from -r gradio_demo/requirements.txt (line 15))\n",
|
47 |
+
" Downloading gradio-4.38.1-py3-none-any.whl.metadata (15 kB)\n",
|
48 |
+
"Collecting controlnet_aux (from -r gradio_demo/requirements.txt (line 16))\n",
|
49 |
+
" Downloading controlnet_aux-0.0.9-py3-none-any.whl.metadata (6.5 kB)\n",
|
50 |
+
"Collecting gdown (from -r gradio_demo/requirements.txt (line 17))\n",
|
51 |
+
" Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)\n",
|
52 |
+
"Requirement already satisfied: importlib-metadata in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (7.0.0)\n",
|
53 |
+
"Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.15.4)\n",
|
54 |
+
"Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (1.25.2)\n",
|
55 |
+
"Collecting regex!=2019.12.17 (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1))\n",
|
56 |
+
" Downloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
|
57 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
58 |
+
"\u001b[?25hRequirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (2.32.3)\n",
|
59 |
+
"Requirement already satisfied: Pillow in /opt/conda/lib/python3.10/site-packages (from diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (10.4.0)\n",
|
60 |
+
"Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (4.12.2)\n",
|
61 |
+
"Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (1.13.0)\n",
|
62 |
+
"Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.3)\n",
|
63 |
+
"Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.1.4)\n",
|
64 |
+
"Requirement already satisfied: triton==2.0.0 in /opt/conda/lib/python3.10/site-packages (from torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (2.0.0)\n",
|
65 |
+
"Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (24.1)\n",
|
66 |
+
"Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (6.0.1)\n",
|
67 |
+
"Collecting tokenizers<0.19,>=0.14 (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4))\n",
|
68 |
+
" Downloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
|
69 |
+
"Requirement already satisfied: tqdm>=4.27 in /opt/conda/lib/python3.10/site-packages (from transformers==4.37.1->-r gradio_demo/requirements.txt (line 4)) (4.66.4)\n",
|
70 |
+
"Requirement already satisfied: httpx>=0.20 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (0.27.0)\n",
|
71 |
+
"Requirement already satisfied: psutil<6,>=2 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (5.9.3)\n",
|
72 |
+
"Requirement already satisfied: pydantic<3,>=1 in /opt/conda/lib/python3.10/site-packages (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.10.17)\n",
|
73 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.20.2->-r gradio_demo/requirements.txt (line 12)) (2024.6.1)\n",
|
74 |
+
"Requirement already satisfied: cmake in /opt/conda/lib/python3.10/site-packages (from triton==2.0.0->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (3.30.0)\n",
|
75 |
+
"Requirement already satisfied: lit in /opt/conda/lib/python3.10/site-packages (from triton==2.0.0->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (18.1.8)\n",
|
76 |
+
"Collecting coloredlogs (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
|
77 |
+
" Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)\n",
|
78 |
+
"Collecting flatbuffers (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
|
79 |
+
" Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)\n",
|
80 |
+
"Requirement already satisfied: protobuf in /opt/conda/lib/python3.10/site-packages (from onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8)) (3.20.3)\n",
|
81 |
+
"Collecting antlr4-python3-runtime==4.9.* (from omegaconf->-r gradio_demo/requirements.txt (line 10))\n",
|
82 |
+
" Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)\n",
|
83 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.0/117.0 kB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
84 |
+
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
85 |
+
"\u001b[?25hCollecting onnx (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
|
86 |
+
" Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)\n",
|
87 |
+
"Requirement already satisfied: matplotlib in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.7.3)\n",
|
88 |
+
"Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (1.11.4)\n",
|
89 |
+
"Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (1.5.1)\n",
|
90 |
+
"Requirement already satisfied: scikit-image in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (0.24.0)\n",
|
91 |
+
"Collecting easydict (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
|
92 |
+
" Downloading easydict-1.13-py3-none-any.whl.metadata (4.2 kB)\n",
|
93 |
+
"Requirement already satisfied: cython in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.0.10)\n",
|
94 |
+
"Collecting albumentations (from insightface->-r gradio_demo/requirements.txt (line 14))\n",
|
95 |
+
" Downloading albumentations-1.4.11-py3-none-any.whl.metadata (39 kB)\n",
|
96 |
+
"Requirement already satisfied: prettytable in /opt/conda/lib/python3.10/site-packages (from insightface->-r gradio_demo/requirements.txt (line 14)) (3.10.0)\n",
|
97 |
+
"Requirement already satisfied: aiofiles<24.0,>=22.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (22.1.0)\n",
|
98 |
+
"Collecting altair<6.0,>=5.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
99 |
+
" Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)\n",
|
100 |
+
"Requirement already satisfied: fastapi in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.111.0)\n",
|
101 |
+
"Collecting ffmpy (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
102 |
+
" Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n",
|
103 |
+
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
104 |
+
"\u001b[?25hCollecting gradio-client==1.1.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
105 |
+
" Downloading gradio_client-1.1.0-py3-none-any.whl.metadata (7.1 kB)\n",
|
106 |
+
"Requirement already satisfied: importlib-resources<7.0,>=1.3 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (6.4.0)\n",
|
107 |
+
"Requirement already satisfied: markupsafe~=2.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (2.1.5)\n",
|
108 |
+
"Requirement already satisfied: orjson~=3.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (3.10.6)\n",
|
109 |
+
"Requirement already satisfied: pandas<3.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (2.0.3)\n",
|
110 |
+
"Collecting pydantic<3,>=1 (from spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
|
111 |
+
" Downloading pydantic-2.8.2-py3-none-any.whl.metadata (125 kB)\n",
|
112 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.2/125.2 kB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
113 |
+
"\u001b[?25hCollecting pydub (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
114 |
+
" Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
|
115 |
+
"Requirement already satisfied: python-multipart>=0.0.9 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.0.9)\n",
|
116 |
+
"Collecting ruff>=0.2.2 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
117 |
+
" Downloading ruff-0.5.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)\n",
|
118 |
+
"Collecting semantic-version~=2.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
119 |
+
" Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n",
|
120 |
+
"Collecting tomlkit==0.12.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
121 |
+
" Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n",
|
122 |
+
"Requirement already satisfied: typer<1.0,>=0.12 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.12.3)\n",
|
123 |
+
"Collecting urllib3~=2.0 (from gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
124 |
+
" Downloading urllib3-2.2.2-py3-none-any.whl.metadata (6.4 kB)\n",
|
125 |
+
"Requirement already satisfied: uvicorn>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from gradio->-r gradio_demo/requirements.txt (line 15)) (0.30.1)\n",
|
126 |
+
"Collecting websockets<12.0,>=10.0 (from gradio-client==1.1.0->gradio->-r gradio_demo/requirements.txt (line 15))\n",
|
127 |
+
" Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
|
128 |
+
"Collecting opencv-python-headless (from controlnet_aux->-r gradio_demo/requirements.txt (line 16))\n",
|
129 |
+
" Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)\n",
|
130 |
+
"Collecting timm<=0.6.7 (from controlnet_aux->-r gradio_demo/requirements.txt (line 16))\n",
|
131 |
+
" Downloading timm-0.6.7-py3-none-any.whl.metadata (33 kB)\n",
|
132 |
+
"Requirement already satisfied: beautifulsoup4 in /opt/conda/lib/python3.10/site-packages (from gdown->-r gradio_demo/requirements.txt (line 17)) (4.12.3)\n",
|
133 |
+
"Requirement already satisfied: jsonschema>=3.0 in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (4.22.0)\n",
|
134 |
+
"Requirement already satisfied: toolz in /opt/conda/lib/python3.10/site-packages (from altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.12.1)\n",
|
135 |
+
"Requirement already satisfied: anyio in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (4.4.0)\n",
|
136 |
+
"Requirement already satisfied: certifi in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (2024.7.4)\n",
|
137 |
+
"Requirement already satisfied: httpcore==1.* in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.0.5)\n",
|
138 |
+
"Requirement already satisfied: idna in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (3.7)\n",
|
139 |
+
"Requirement already satisfied: sniffio in /opt/conda/lib/python3.10/site-packages (from httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.3.1)\n",
|
140 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in /opt/conda/lib/python3.10/site-packages (from httpcore==1.*->httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (0.14.0)\n",
|
141 |
+
"Requirement already satisfied: contourpy>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.2.1)\n",
|
142 |
+
"Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (0.12.1)\n",
|
143 |
+
"Requirement already satisfied: fonttools>=4.22.0 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (4.53.1)\n",
|
144 |
+
"Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.4.5)\n",
|
145 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (3.1.2)\n",
|
146 |
+
"Requirement already satisfied: python-dateutil>=2.7 in /opt/conda/lib/python3.10/site-packages (from matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (2.9.0)\n",
|
147 |
+
"Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2024.1)\n",
|
148 |
+
"Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas<3.0,>=1.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2024.1)\n",
|
149 |
+
"Collecting annotated-types>=0.4.0 (from pydantic<3,>=1->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
|
150 |
+
" Downloading annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
|
151 |
+
"Collecting pydantic-core==2.20.1 (from pydantic<3,>=1->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9))\n",
|
152 |
+
" Downloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n",
|
153 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.3.2)\n",
|
154 |
+
"Requirement already satisfied: click>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (8.1.7)\n",
|
155 |
+
"Requirement already satisfied: shellingham>=1.3.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (1.5.4)\n",
|
156 |
+
"Requirement already satisfied: rich>=10.11.0 in /opt/conda/lib/python3.10/site-packages (from typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (13.7.1)\n",
|
157 |
+
"Collecting albucore>=0.0.11 (from albumentations->insightface->-r gradio_demo/requirements.txt (line 14))\n",
|
158 |
+
" Downloading albucore-0.0.12-py3-none-any.whl.metadata (3.1 kB)\n",
|
159 |
+
"Collecting eval-type-backport (from albumentations->insightface->-r gradio_demo/requirements.txt (line 14))\n",
|
160 |
+
" Downloading eval_type_backport-0.2.0-py3-none-any.whl.metadata (2.2 kB)\n",
|
161 |
+
"Requirement already satisfied: imageio>=2.33 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (2.34.2)\n",
|
162 |
+
"Requirement already satisfied: tifffile>=2022.8.12 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (2024.7.2)\n",
|
163 |
+
"Requirement already satisfied: lazy-loader>=0.4 in /opt/conda/lib/python3.10/site-packages (from scikit-image->insightface->-r gradio_demo/requirements.txt (line 14)) (0.4)\n",
|
164 |
+
"Requirement already satisfied: joblib>=1.2.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->insightface->-r gradio_demo/requirements.txt (line 14)) (1.4.2)\n",
|
165 |
+
"Requirement already satisfied: threadpoolctl>=3.1.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->insightface->-r gradio_demo/requirements.txt (line 14)) (3.5.0)\n",
|
166 |
+
"Requirement already satisfied: soupsieve>1.2 in /opt/conda/lib/python3.10/site-packages (from beautifulsoup4->gdown->-r gradio_demo/requirements.txt (line 17)) (2.5)\n",
|
167 |
+
"Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu->-r gradio_demo/requirements.txt (line 8))\n",
|
168 |
+
" Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)\n",
|
169 |
+
"Requirement already satisfied: starlette<0.38.0,>=0.37.2 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.37.2)\n",
|
170 |
+
"Requirement already satisfied: fastapi-cli>=0.0.2 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.0.4)\n",
|
171 |
+
"Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (5.10.0)\n",
|
172 |
+
"Requirement already satisfied: email_validator>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (2.2.0)\n",
|
173 |
+
"Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.10/site-packages (from importlib-metadata->diffusers==0.25.1->-r gradio_demo/requirements.txt (line 1)) (3.19.2)\n",
|
174 |
+
"Requirement already satisfied: wcwidth in /opt/conda/lib/python3.10/site-packages (from prettytable->insightface->-r gradio_demo/requirements.txt (line 14)) (0.2.13)\n",
|
175 |
+
"Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /opt/conda/lib/python3.10/site-packages (from requests[socks]->gdown->-r gradio_demo/requirements.txt (line 17)) (1.7.1)\n",
|
176 |
+
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /opt/conda/lib/python3.10/site-packages (from sympy->torch==2.0.0->-r gradio_demo/requirements.txt (line 2)) (1.3.0)\n",
|
177 |
+
"Requirement already satisfied: tomli>=2.0.1 in /opt/conda/lib/python3.10/site-packages (from albucore>=0.0.11->albumentations->insightface->-r gradio_demo/requirements.txt (line 14)) (2.0.1)\n",
|
178 |
+
"Requirement already satisfied: dnspython>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from email_validator>=2.0.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (2.6.1)\n",
|
179 |
+
"Requirement already satisfied: attrs>=22.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (23.2.0)\n",
|
180 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (2023.12.1)\n",
|
181 |
+
"Requirement already satisfied: referencing>=0.28.4 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.35.1)\n",
|
182 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema>=3.0->altair<6.0,>=5.0->gradio->-r gradio_demo/requirements.txt (line 15)) (0.19.0)\n",
|
183 |
+
"Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib->insightface->-r gradio_demo/requirements.txt (line 14)) (1.16.0)\n",
|
184 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (3.0.0)\n",
|
185 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (2.18.0)\n",
|
186 |
+
"Requirement already satisfied: exceptiongroup>=1.0.2 in /opt/conda/lib/python3.10/site-packages (from anyio->httpx>=0.20->spaces==0.19.4->-r gradio_demo/requirements.txt (line 9)) (1.2.0)\n",
|
187 |
+
"Requirement already satisfied: httptools>=0.5.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.6.1)\n",
|
188 |
+
"Requirement already satisfied: python-dotenv>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (1.0.1)\n",
|
189 |
+
"Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.19.0)\n",
|
190 |
+
"Requirement already satisfied: watchfiles>=0.13 in /opt/conda/lib/python3.10/site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio->-r gradio_demo/requirements.txt (line 15)) (0.22.0)\n",
|
191 |
+
"Requirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio->-r gradio_demo/requirements.txt (line 15)) (0.1.2)\n",
|
192 |
+
"Downloading diffusers-0.25.1-py3-none-any.whl (1.8 MB)\n",
|
193 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━���━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
194 |
+
"\u001b[?25hDownloading transformers-4.37.1-py3-none-any.whl (8.4 MB)\n",
|
195 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m87.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
196 |
+
"\u001b[?25hDownloading spaces-0.19.4-py3-none-any.whl (15 kB)\n",
|
197 |
+
"Downloading huggingface_hub-0.20.2-py3-none-any.whl (330 kB)\n",
|
198 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.3/330.3 kB\u001b[0m \u001b[31m37.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
199 |
+
"\u001b[?25hDownloading accelerate-0.32.1-py3-none-any.whl (314 kB)\n",
|
200 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m314.1/314.1 kB\u001b[0m \u001b[31m36.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
201 |
+
"\u001b[?25hDownloading safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
|
202 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m74.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
203 |
+
"\u001b[?25hDownloading einops-0.8.0-py3-none-any.whl (43 kB)\n",
|
204 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.2/43.2 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
205 |
+
"\u001b[?25hDownloading onnxruntime_gpu-1.18.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (200.8 MB)\n",
|
206 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m200.8/200.8 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
207 |
+
"\u001b[?25hDownloading omegaconf-2.3.0-py3-none-any.whl (79 kB)\n",
|
208 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.5/79.5 kB\u001b[0m \u001b[31m133.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
209 |
+
"\u001b[?25hDownloading peft-0.11.1-py3-none-any.whl (251 kB)\n",
|
210 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m251.6/251.6 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
211 |
+
"\u001b[?25hDownloading opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.5 MB)\n",
|
212 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 MB\u001b[0m \u001b[31m21.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
213 |
+
"\u001b[?25hDownloading gradio-4.38.1-py3-none-any.whl (12.4 MB)\n",
|
214 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m84.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
|
215 |
+
"\u001b[?25hDownloading gradio_client-1.1.0-py3-none-any.whl (318 kB)\n",
|
216 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m318.1/318.1 kB\u001b[0m \u001b[31m38.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
217 |
+
"\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n",
|
218 |
+
"Downloading controlnet_aux-0.0.9-py3-none-any.whl (282 kB)\n",
|
219 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m282.4/282.4 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
220 |
+
"\u001b[?25hDownloading gdown-5.2.0-py3-none-any.whl (18 kB)\n",
|
221 |
+
"Downloading altair-5.3.0-py3-none-any.whl (857 kB)\n",
|
222 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m857.8/857.8 kB\u001b[0m \u001b[31m64.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
223 |
+
"\u001b[?25hDownloading pydantic-2.8.2-py3-none-any.whl (423 kB)\n",
|
224 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━���━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m423.9/423.9 kB\u001b[0m \u001b[31m46.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
225 |
+
"\u001b[?25hDownloading pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)\n",
|
226 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m101.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
227 |
+
"\u001b[?25hDownloading regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (775 kB)\n",
|
228 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m775.1/775.1 kB\u001b[0m \u001b[31m58.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
229 |
+
"\u001b[?25hDownloading ruff-0.5.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.1 MB)\n",
|
230 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.1/10.1 MB\u001b[0m \u001b[31m82.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m0:01\u001b[0m\n",
|
231 |
+
"\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n",
|
232 |
+
"Downloading timm-0.6.7-py3-none-any.whl (509 kB)\n",
|
233 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m510.0/510.0 kB\u001b[0m \u001b[31m45.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
234 |
+
"\u001b[?25hDownloading tokenizers-0.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n",
|
235 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m90.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n",
|
236 |
+
"\u001b[?25hDownloading urllib3-2.2.2-py3-none-any.whl (121 kB)\n",
|
237 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.4/121.4 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
238 |
+
"\u001b[?25hDownloading albumentations-1.4.11-py3-none-any.whl (165 kB)\n",
|
239 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m165.3/165.3 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
240 |
+
"\u001b[?25hDownloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.9 MB)\n",
|
241 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.9/49.9 MB\u001b[0m \u001b[31m27.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
242 |
+
"\u001b[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
|
243 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
244 |
+
"\u001b[?25hDownloading easydict-1.13-py3-none-any.whl (6.8 kB)\n",
|
245 |
+
"Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)\n",
|
246 |
+
"Downloading onnx-1.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)\n",
|
247 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.9/15.9 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
|
248 |
+
"\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n",
|
249 |
+
"Downloading albucore-0.0.12-py3-none-any.whl (8.4 kB)\n",
|
250 |
+
"Downloading annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
|
251 |
+
"Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
|
252 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m13.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
253 |
+
"\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n",
|
254 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
255 |
+
"\u001b[?25hDownloading eval_type_backport-0.2.0-py3-none-any.whl (5.9 kB)\n",
|
256 |
+
"Building wheels for collected packages: antlr4-python3-runtime, insightface, ffmpy\n",
|
257 |
+
" Building wheel for antlr4-python3-runtime (setup.py) ... \u001b[?25ldone\n",
|
258 |
+
"\u001b[?25h Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.9.3-py3-none-any.whl size=144552 sha256=e9ae543340a864dee947980bab8fc7d8fc3b8a5a04b28963252f790444a5cd1f\n",
|
259 |
+
" Stored in directory: /home/jupyter/.cache/pip/wheels/12/93/dd/1f6a127edc45659556564c5730f6d4e300888f4bca2d4c5a88\n",
|
260 |
+
" Building wheel for insightface (pyproject.toml) ... \u001b[?25ldone\n",
|
261 |
+
"\u001b[?25h Created wheel for insightface: filename=insightface-0.7.3-cp310-cp310-linux_x86_64.whl size=874168 sha256=f381a87957a87ca37e1795c8ba2b854664428cf5117760cc1f7d58368918523b\n",
|
262 |
+
" Stored in directory: /home/jupyter/.cache/pip/wheels/e3/d0/80/e3773fb8b6d1cca87ea1d33d9b1f20a223a6493c896da249b5\n",
|
263 |
+
" Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\n",
|
264 |
+
"\u001b[?25h Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5581 sha256=d671b217ecfc883cea0aa0408a98e3d187bd0e888ba4e85318ea4b8bfa539786\n",
|
265 |
+
" Stored in directory: /home/jupyter/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n",
|
266 |
+
"Successfully built antlr4-python3-runtime insightface ffmpy\n",
|
267 |
+
"Installing collected packages: pydub, flatbuffers, ffmpy, easydict, antlr4-python3-runtime, websockets, urllib3, tomlkit, semantic-version, safetensors, ruff, regex, pydantic-core, opencv-python-headless, opencv-python, onnx, omegaconf, humanfriendly, eval-type-backport, einops, annotated-types, pydantic, coloredlogs, albucore, onnxruntime-gpu, huggingface-hub, albumentations, tokenizers, insightface, gradio-client, gdown, diffusers, altair, transformers, gradio, spaces, timm, accelerate, peft, controlnet_aux\n",
|
268 |
+
" Attempting uninstall: websockets\n",
|
269 |
+
" Found existing installation: websockets 12.0\n",
|
270 |
+
" Uninstalling websockets-12.0:\n",
|
271 |
+
" Successfully uninstalled websockets-12.0\n",
|
272 |
+
" Attempting uninstall: urllib3\n",
|
273 |
+
" Found existing installation: urllib3 1.26.19\n",
|
274 |
+
" Uninstalling urllib3-1.26.19:\n",
|
275 |
+
" Successfully uninstalled urllib3-1.26.19\n",
|
276 |
+
" Attempting uninstall: pydantic\n",
|
277 |
+
" Found existing installation: pydantic 1.10.17\n",
|
278 |
+
" Uninstalling pydantic-1.10.17:\n",
|
279 |
+
" Successfully uninstalled pydantic-1.10.17\n",
|
280 |
+
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
281 |
+
"dataproc-jupyter-plugin 0.1.79 requires pydantic~=1.10.0, but you have pydantic 2.8.2 which is incompatible.\n",
|
282 |
+
"kfp 2.5.0 requires urllib3<2.0.0, but you have urllib3 2.2.2 which is incompatible.\n",
|
283 |
+
"ydata-profiling 4.6.0 requires pydantic<2,>=1.8.1, but you have pydantic 2.8.2 which is incompatible.\u001b[0m\u001b[31m\n",
|
284 |
+
"\u001b[0mSuccessfully installed accelerate-0.32.1 albucore-0.0.12 albumentations-1.4.11 altair-5.3.0 annotated-types-0.7.0 antlr4-python3-runtime-4.9.3 coloredlogs-15.0.1 controlnet_aux-0.0.9 diffusers-0.25.1 easydict-1.13 einops-0.8.0 eval-type-backport-0.2.0 ffmpy-0.3.2 flatbuffers-24.3.25 gdown-5.2.0 gradio-4.38.1 gradio-client-1.1.0 huggingface-hub-0.20.2 humanfriendly-10.0 insightface-0.7.3 omegaconf-2.3.0 onnx-1.16.1 onnxruntime-gpu-1.18.1 opencv-python-4.10.0.84 opencv-python-headless-4.10.0.84 peft-0.11.1 pydantic-2.8.2 pydantic-core-2.20.1 pydub-0.25.1 regex-2024.5.15 ruff-0.5.4 safetensors-0.4.3 semantic-version-2.10.0 spaces-0.19.4 timm-0.6.7 tokenizers-0.15.2 tomlkit-0.12.0 transformers-4.37.1 urllib3-2.2.2 websockets-11.0.3\n"
|
285 |
+
]
|
286 |
+
}
|
287 |
+
],
|
288 |
+
"source": [
|
289 |
+
"!pip install -r gradio_demo/requirements.txt"
|
290 |
+
]
|
291 |
+
},
|
292 |
+
{
|
293 |
+
"cell_type": "code",
|
294 |
+
"execution_count": 3,
|
295 |
+
"id": "dec146ca-0832-4c71-8b31-1586af435d67",
|
296 |
+
"metadata": {
|
297 |
+
"tags": []
|
298 |
+
},
|
299 |
+
"outputs": [
|
300 |
+
{
|
301 |
+
"name": "stdout",
|
302 |
+
"output_type": "stream",
|
303 |
+
"text": [
|
304 |
+
"ControlNetModel/config.json: 100%|█████████| 1.38k/1.38k [00:00<00:00, 7.07MB/s]\n",
|
305 |
+
"diffusion_pytorch_model.safetensors: 100%|██| 2.50G/2.50G [00:05<00:00, 449MB/s]\n",
|
306 |
+
"ip-adapter.bin: 100%|███████████████████████| 1.69G/1.69G [00:09<00:00, 180MB/s]\n",
|
307 |
+
"pytorch_lora_weights.safetensors: 100%|███████| 394M/394M [00:02<00:00, 169MB/s]\n",
|
308 |
+
"Downloading...\n",
|
309 |
+
"From (original): https://drive.google.com/uc?id=18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8\n",
|
310 |
+
"From (redirected): https://drive.google.com/uc?id=18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8&confirm=t&uuid=abca1ed1-5c28-423b-a1c7-4fe1fa0d4dbc\n",
|
311 |
+
"To: /home/jupyter/InstantID/models/antelopev2.zip\n",
|
312 |
+
"100%|████████████████████████████████████████| 361M/361M [00:21<00:00, 16.9MB/s]\n",
|
313 |
+
"Archive: ./models/antelopev2.zip\n",
|
314 |
+
" creating: ./models/antelopev2/\n",
|
315 |
+
" inflating: ./models/antelopev2/genderage.onnx \n",
|
316 |
+
" inflating: ./models/antelopev2/2d106det.onnx \n",
|
317 |
+
" inflating: ./models/antelopev2/1k3d68.onnx \n",
|
318 |
+
" inflating: ./models/antelopev2/glintr100.onnx \n",
|
319 |
+
" inflating: ./models/antelopev2/scrfd_10g_bnkps.onnx \n"
|
320 |
+
]
|
321 |
+
}
|
322 |
+
],
|
323 |
+
"source": [
|
324 |
+
"!python gradio_demo/download_models.py"
|
325 |
+
]
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"cell_type": "code",
|
329 |
+
"execution_count": 4,
|
330 |
+
"id": "e4900619-4519-4ec9-bb32-a620128d1727",
|
331 |
+
"metadata": {
|
332 |
+
"tags": []
|
333 |
+
},
|
334 |
+
"outputs": [
|
335 |
+
{
|
336 |
+
"name": "stdout",
|
337 |
+
"output_type": "stream",
|
338 |
+
"text": [
|
339 |
+
"Collecting setuptools==69.5.1\n",
|
340 |
+
" Downloading setuptools-69.5.1-py3-none-any.whl.metadata (6.2 kB)\n",
|
341 |
+
"Downloading setuptools-69.5.1-py3-none-any.whl (894 kB)\n",
|
342 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m894.6/894.6 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
|
343 |
+
"\u001b[?25hInstalling collected packages: setuptools\n",
|
344 |
+
" Attempting uninstall: setuptools\n",
|
345 |
+
" Found existing installation: setuptools 70.1.1\n",
|
346 |
+
" Uninstalling setuptools-70.1.1:\n",
|
347 |
+
" Successfully uninstalled setuptools-70.1.1\n",
|
348 |
+
"Successfully installed setuptools-69.5.1\n"
|
349 |
+
]
|
350 |
+
}
|
351 |
+
],
|
352 |
+
"source": [
|
353 |
+
"!pip install setuptools==69.5.1"
|
354 |
+
]
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"cell_type": "code",
|
358 |
+
"execution_count": 6,
|
359 |
+
"id": "9584f180-48a2-46bc-968d-9c99bc56f06c",
|
360 |
+
"metadata": {
|
361 |
+
"tags": []
|
362 |
+
},
|
363 |
+
"outputs": [
|
364 |
+
{
|
365 |
+
"name": "stdout",
|
366 |
+
"output_type": "stream",
|
367 |
+
"text": [
|
368 |
+
"Collecting huggingface-hub==0.23.4\n",
|
369 |
+
" Downloading huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)\n",
|
370 |
+
"Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (3.15.4)\n",
|
371 |
+
"Requirement already satisfied: fsspec>=2023.5.0 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (2024.6.1)\n",
|
372 |
+
"Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (24.1)\n",
|
373 |
+
"Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (6.0.1)\n",
|
374 |
+
"Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (2.32.3)\n",
|
375 |
+
"Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (4.66.4)\n",
|
376 |
+
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub==0.23.4) (4.12.2)\n",
|
377 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (3.3.2)\n",
|
378 |
+
"Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (3.7)\n",
|
379 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (2.2.2)\n",
|
380 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub==0.23.4) (2024.7.4)\n",
|
381 |
+
"Downloading huggingface_hub-0.23.4-py3-none-any.whl (402 kB)\n",
|
382 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.6/402.6 kB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
|
383 |
+
"\u001b[?25hInstalling collected packages: huggingface-hub\n",
|
384 |
+
" Attempting uninstall: huggingface-hub\n",
|
385 |
+
" Found existing installation: huggingface-hub 0.20.2\n",
|
386 |
+
" Uninstalling huggingface-hub-0.20.2:\n",
|
387 |
+
" Successfully uninstalled huggingface-hub-0.20.2\n",
|
388 |
+
"Successfully installed huggingface-hub-0.23.4\n"
|
389 |
+
]
|
390 |
+
}
|
391 |
+
],
|
392 |
+
"source": [
|
393 |
+
"!pip install huggingface-hub==0.23.4"
|
394 |
+
]
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"cell_type": "code",
|
398 |
+
"execution_count": null,
|
399 |
+
"id": "56683081-5e9f-4378-84df-b957c84b23ad",
|
400 |
+
"metadata": {},
|
401 |
+
"outputs": [],
|
402 |
+
"source": []
|
403 |
+
}
|
404 |
+
],
|
405 |
+
"metadata": {
|
406 |
+
"environment": {
|
407 |
+
"kernel": "python3",
|
408 |
+
"name": ".m123",
|
409 |
+
"type": "gcloud",
|
410 |
+
"uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/:m123"
|
411 |
+
},
|
412 |
+
"kernelspec": {
|
413 |
+
"display_name": "Python 3 (Local)",
|
414 |
+
"language": "python",
|
415 |
+
"name": "python3"
|
416 |
+
},
|
417 |
+
"language_info": {
|
418 |
+
"codemirror_mode": {
|
419 |
+
"name": "ipython",
|
420 |
+
"version": 3
|
421 |
+
},
|
422 |
+
"file_extension": ".py",
|
423 |
+
"mimetype": "text/x-python",
|
424 |
+
"name": "python",
|
425 |
+
"nbconvert_exporter": "python",
|
426 |
+
"pygments_lexer": "ipython3",
|
427 |
+
"version": "3.10.14"
|
428 |
+
}
|
429 |
+
},
|
430 |
+
"nbformat": 4,
|
431 |
+
"nbformat_minor": 5
|
432 |
+
}
|
checkpoints/ControlNetModel/config.json
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "ControlNetModel",
|
3 |
+
"_diffusers_version": "0.21.2",
|
4 |
+
"_name_or_path": "/mnt/nj-aigc/usr/guiwan/workspace/diffusion_output/face_xl_ipc_v4_2_XiezhenAnimeForeigner/checkpoint-150000/ControlNetModel",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": "text_time",
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": 256,
|
9 |
+
"attention_head_dim": [
|
10 |
+
5,
|
11 |
+
10,
|
12 |
+
20
|
13 |
+
],
|
14 |
+
"block_out_channels": [
|
15 |
+
320,
|
16 |
+
640,
|
17 |
+
1280
|
18 |
+
],
|
19 |
+
"class_embed_type": null,
|
20 |
+
"conditioning_channels": 3,
|
21 |
+
"conditioning_embedding_out_channels": [
|
22 |
+
16,
|
23 |
+
32,
|
24 |
+
96,
|
25 |
+
256
|
26 |
+
],
|
27 |
+
"controlnet_conditioning_channel_order": "rgb",
|
28 |
+
"cross_attention_dim": 2048,
|
29 |
+
"down_block_types": [
|
30 |
+
"DownBlock2D",
|
31 |
+
"CrossAttnDownBlock2D",
|
32 |
+
"CrossAttnDownBlock2D"
|
33 |
+
],
|
34 |
+
"downsample_padding": 1,
|
35 |
+
"encoder_hid_dim": null,
|
36 |
+
"encoder_hid_dim_type": null,
|
37 |
+
"flip_sin_to_cos": true,
|
38 |
+
"freq_shift": 0,
|
39 |
+
"global_pool_conditions": false,
|
40 |
+
"in_channels": 4,
|
41 |
+
"layers_per_block": 2,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"norm_eps": 1e-05,
|
44 |
+
"norm_num_groups": 32,
|
45 |
+
"num_attention_heads": null,
|
46 |
+
"num_class_embeds": null,
|
47 |
+
"only_cross_attention": false,
|
48 |
+
"projection_class_embeddings_input_dim": 2816,
|
49 |
+
"resnet_time_scale_shift": "default",
|
50 |
+
"transformer_layers_per_block": [
|
51 |
+
1,
|
52 |
+
2,
|
53 |
+
10
|
54 |
+
],
|
55 |
+
"upcast_attention": null,
|
56 |
+
"use_linear_projection": true
|
57 |
+
}
|
checkpoints/ControlNetModel/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8127be9f174101ebdafee9964d856b49b634435cf6daa396d3f593cf0bbbb05
|
3 |
+
size 2502139136
|
checkpoints/ip-adapter.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02b3618e36d803784166660520098089a81388e61a93ef8002aa79a5b1c546e1
|
3 |
+
size 1691134141
|
checkpoints/pytorch_lora_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a764e6859b6e04047cd761c08ff0cee96413a8e004c9f07707530cd776b19141
|
3 |
+
size 393855224
|
cog.yaml
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Configuration for Cog ⚙️
|
2 |
+
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
|
3 |
+
|
4 |
+
build:
|
5 |
+
# set to true if your model requires a GPU
|
6 |
+
gpu: true
|
7 |
+
cuda: "12.1"
|
8 |
+
|
9 |
+
# a list of ubuntu apt packages to install
|
10 |
+
system_packages:
|
11 |
+
- "libgl1-mesa-glx"
|
12 |
+
- "libglib2.0-0"
|
13 |
+
|
14 |
+
# python version in the form '3.11' or '3.11.4'
|
15 |
+
python_version: "3.11"
|
16 |
+
|
17 |
+
# a list of packages in the format <package-name>==<version>
|
18 |
+
python_packages:
|
19 |
+
- "opencv-python==4.9.0.80"
|
20 |
+
- "transformers==4.37.0"
|
21 |
+
- "accelerate==0.26.1"
|
22 |
+
- "insightface==0.7.3"
|
23 |
+
- "diffusers==0.25.1"
|
24 |
+
- "onnxruntime==1.16.3"
|
25 |
+
- "omegaconf==2.3.0"
|
26 |
+
- "gradio==3.50.2"
|
27 |
+
- "peft==0.8.2"
|
28 |
+
- "transformers==4.37.0"
|
29 |
+
- "controlnet-aux==0.0.7"
|
30 |
+
|
31 |
+
# fix for pydantic issues in cog
|
32 |
+
# https://github.com/replicate/cog/issues/1623
|
33 |
+
- albumentations==1.4.3
|
34 |
+
|
35 |
+
# commands run after the environment is setup
|
36 |
+
run:
|
37 |
+
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.6.0/pget_linux_x86_64" && chmod +x /usr/local/bin/pget
|
38 |
+
|
39 |
+
# predict.py defines how predictions are run on your model
|
40 |
+
predict: "cog/predict.py:Predictor"
|
cog/README.md
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# InstantID Cog Model
|
2 |
+
|
3 |
+
[![Replicate](https://replicate.com/zsxkib/instant-id/badge)](https://replicate.com/zsxkib/instant-id)
|
4 |
+
|
5 |
+
## Overview
|
6 |
+
This repository contains the implementation of [InstantID](https://github.com/InstantID/InstantID) as a [Cog](https://github.com/replicate/cog) model.
|
7 |
+
|
8 |
+
Using [Cog](https://github.com/replicate/cog) allows any users with a GPU to run the model locally easily, without the hassle of downloading weights, installing libraries, or managing CUDA versions. Everything just works.
|
9 |
+
|
10 |
+
## Development
|
11 |
+
To push your own fork of InstantID to [Replicate](https://replicate.com), follow the [Model Pushing Guide](https://replicate.com/docs/guides/push-a-model).
|
12 |
+
|
13 |
+
## Basic Usage
|
14 |
+
To make predictions using the model, execute the following command from the root of this project:
|
15 |
+
|
16 |
+
```bash
|
17 |
+
cog predict \
|
18 |
+
-i image=@examples/sam_resize.png \
|
19 |
+
-i prompt="analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage, masterpiece, best quality" \
|
20 |
+
-i negative_prompt="nsfw" \
|
21 |
+
-i width=680 \
|
22 |
+
-i height=680 \
|
23 |
+
-i ip_adapter_scale=0.8 \
|
24 |
+
-i controlnet_conditioning_scale=0.8 \
|
25 |
+
-i num_inference_steps=30 \
|
26 |
+
-i guidance_scale=5
|
27 |
+
```
|
28 |
+
|
29 |
+
<table>
|
30 |
+
<tr>
|
31 |
+
<td>
|
32 |
+
<p align="center">Input</p>
|
33 |
+
<img src="https://replicate.delivery/pbxt/KGy0R72cMwriR9EnCLu6hgVkQNd60mY01mDZAQqcUic9rVw4/musk_resize.jpeg" alt="Sample Input Image" width="90%"/>
|
34 |
+
</td>
|
35 |
+
<td>
|
36 |
+
<p align="center">Output</p>
|
37 |
+
<img src="https://replicate.delivery/pbxt/oGOxXELcLcpaMBeIeffwdxKZAkuzwOzzoxKadjhV8YgQWk8IB/result.jpg" alt="Sample Output Image" width="100%"/>
|
38 |
+
</td>
|
39 |
+
</tr>
|
40 |
+
</table>
|
41 |
+
|
42 |
+
## Input Parameters
|
43 |
+
|
44 |
+
The following table provides details about each input parameter for the `predict` function:
|
45 |
+
|
46 |
+
| Parameter | Description | Default Value | Range |
|
47 |
+
| ------------------------------- | ---------------------------------- | -------------------------------------------------------------------------------------------------------------- | ----------- |
|
48 |
+
| `image` | Input image | A path to the input image file | Path string |
|
49 |
+
| `prompt` | Input prompt | "analog film photo of a man. faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, ... " | String |
|
50 |
+
| `negative_prompt` | Input Negative Prompt | (empty string) | String |
|
51 |
+
| `width` | Width of output image | 640 | 512 - 2048 |
|
52 |
+
| `height` | Height of output image | 640 | 512 - 2048 |
|
53 |
+
| `ip_adapter_scale` | Scale for IP adapter | 0.8 | 0.0 - 1.0 |
|
54 |
+
| `controlnet_conditioning_scale` | Scale for ControlNet conditioning | 0.8 | 0.0 - 1.0 |
|
55 |
+
| `num_inference_steps` | Number of denoising steps | 30 | 1 - 500 |
|
56 |
+
| `guidance_scale` | Scale for classifier-free guidance | 5 | 1 - 50 |
|
57 |
+
|
58 |
+
This table provides a quick reference to understand and modify the inputs for generating predictions using the model.
|
59 |
+
|
60 |
+
|
cog/predict.py
ADDED
@@ -0,0 +1,756 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Prediction interface for Cog ⚙️
|
2 |
+
# https://github.com/replicate/cog/blob/main/docs/python.md
|
3 |
+
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
|
7 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
8 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), "../gradio_demo"))
|
9 |
+
|
10 |
+
import cv2
|
11 |
+
import time
|
12 |
+
import torch
|
13 |
+
import mimetypes
|
14 |
+
import subprocess
|
15 |
+
import numpy as np
|
16 |
+
from typing import List
|
17 |
+
from cog import BasePredictor, Input, Path
|
18 |
+
|
19 |
+
import PIL
|
20 |
+
from PIL import Image
|
21 |
+
|
22 |
+
import diffusers
|
23 |
+
from diffusers import LCMScheduler
|
24 |
+
from diffusers.utils import load_image
|
25 |
+
from diffusers.models import ControlNetModel
|
26 |
+
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
|
27 |
+
|
28 |
+
from model_util import get_torch_device
|
29 |
+
from insightface.app import FaceAnalysis
|
30 |
+
from transformers import CLIPImageProcessor
|
31 |
+
from controlnet_util import openpose, get_depth_map, get_canny_image
|
32 |
+
|
33 |
+
from diffusers.pipelines.stable_diffusion.safety_checker import (
|
34 |
+
StableDiffusionSafetyChecker,
|
35 |
+
)
|
36 |
+
from pipeline_stable_diffusion_xl_instantid_full import (
|
37 |
+
StableDiffusionXLInstantIDPipeline,
|
38 |
+
draw_kps,
|
39 |
+
)
|
40 |
+
|
41 |
+
mimetypes.add_type("image/webp", ".webp")
|
42 |
+
|
43 |
+
# GPU global variables
|
44 |
+
DEVICE = get_torch_device()
|
45 |
+
DTYPE = torch.float16 if str(DEVICE).__contains__("cuda") else torch.float32
|
46 |
+
|
47 |
+
# for `ip-adapter`, `ControlNetModel`, and `stable-diffusion-xl-base-1.0`
|
48 |
+
CHECKPOINTS_CACHE = "./checkpoints"
|
49 |
+
CHECKPOINTS_URL = "https://weights.replicate.delivery/default/InstantID/checkpoints.tar"
|
50 |
+
|
51 |
+
# for `models/antelopev2`
|
52 |
+
MODELS_CACHE = "./models"
|
53 |
+
MODELS_URL = "https://weights.replicate.delivery/default/InstantID/models.tar"
|
54 |
+
|
55 |
+
# for the safety checker
|
56 |
+
SAFETY_CACHE = "./safety-cache"
|
57 |
+
FEATURE_EXTRACTOR = "./feature-extractor"
|
58 |
+
SAFETY_URL = "https://weights.replicate.delivery/default/playgroundai/safety-cache.tar"
|
59 |
+
|
60 |
+
SDXL_NAME_TO_PATHLIKE = {
|
61 |
+
# These are all huggingface models that we host via gcp + pget
|
62 |
+
"stable-diffusion-xl-base-1.0": {
|
63 |
+
"slug": "stabilityai/stable-diffusion-xl-base-1.0",
|
64 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stabilityai--stable-diffusion-xl-base-1.0.tar",
|
65 |
+
"path": "checkpoints/models--stabilityai--stable-diffusion-xl-base-1.0",
|
66 |
+
},
|
67 |
+
"afrodite-xl-v2": {
|
68 |
+
"slug": "stablediffusionapi/afrodite-xl-v2",
|
69 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--afrodite-xl-v2.tar",
|
70 |
+
"path": "checkpoints/models--stablediffusionapi--afrodite-xl-v2",
|
71 |
+
},
|
72 |
+
"albedobase-xl-20": {
|
73 |
+
"slug": "stablediffusionapi/albedobase-xl-20",
|
74 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--albedobase-xl-20.tar",
|
75 |
+
"path": "checkpoints/models--stablediffusionapi--albedobase-xl-20",
|
76 |
+
},
|
77 |
+
"albedobase-xl-v13": {
|
78 |
+
"slug": "stablediffusionapi/albedobase-xl-v13",
|
79 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--albedobase-xl-v13.tar",
|
80 |
+
"path": "checkpoints/models--stablediffusionapi--albedobase-xl-v13",
|
81 |
+
},
|
82 |
+
"animagine-xl-30": {
|
83 |
+
"slug": "stablediffusionapi/animagine-xl-30",
|
84 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--animagine-xl-30.tar",
|
85 |
+
"path": "checkpoints/models--stablediffusionapi--animagine-xl-30",
|
86 |
+
},
|
87 |
+
"anime-art-diffusion-xl": {
|
88 |
+
"slug": "stablediffusionapi/anime-art-diffusion-xl",
|
89 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--anime-art-diffusion-xl.tar",
|
90 |
+
"path": "checkpoints/models--stablediffusionapi--anime-art-diffusion-xl",
|
91 |
+
},
|
92 |
+
"anime-illust-diffusion-xl": {
|
93 |
+
"slug": "stablediffusionapi/anime-illust-diffusion-xl",
|
94 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--anime-illust-diffusion-xl.tar",
|
95 |
+
"path": "checkpoints/models--stablediffusionapi--anime-illust-diffusion-xl",
|
96 |
+
},
|
97 |
+
"dreamshaper-xl": {
|
98 |
+
"slug": "stablediffusionapi/dreamshaper-xl",
|
99 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--dreamshaper-xl.tar",
|
100 |
+
"path": "checkpoints/models--stablediffusionapi--dreamshaper-xl",
|
101 |
+
},
|
102 |
+
"dynavision-xl-v0610": {
|
103 |
+
"slug": "stablediffusionapi/dynavision-xl-v0610",
|
104 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--dynavision-xl-v0610.tar",
|
105 |
+
"path": "checkpoints/models--stablediffusionapi--dynavision-xl-v0610",
|
106 |
+
},
|
107 |
+
"guofeng4-xl": {
|
108 |
+
"slug": "stablediffusionapi/guofeng4-xl",
|
109 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--guofeng4-xl.tar",
|
110 |
+
"path": "checkpoints/models--stablediffusionapi--guofeng4-xl",
|
111 |
+
},
|
112 |
+
"juggernaut-xl-v8": {
|
113 |
+
"slug": "stablediffusionapi/juggernaut-xl-v8",
|
114 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--juggernaut-xl-v8.tar",
|
115 |
+
"path": "checkpoints/models--stablediffusionapi--juggernaut-xl-v8",
|
116 |
+
},
|
117 |
+
"nightvision-xl-0791": {
|
118 |
+
"slug": "stablediffusionapi/nightvision-xl-0791",
|
119 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--nightvision-xl-0791.tar",
|
120 |
+
"path": "checkpoints/models--stablediffusionapi--nightvision-xl-0791",
|
121 |
+
},
|
122 |
+
"omnigen-xl": {
|
123 |
+
"slug": "stablediffusionapi/omnigen-xl",
|
124 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--omnigen-xl.tar",
|
125 |
+
"path": "checkpoints/models--stablediffusionapi--omnigen-xl",
|
126 |
+
},
|
127 |
+
"pony-diffusion-v6-xl": {
|
128 |
+
"slug": "stablediffusionapi/pony-diffusion-v6-xl",
|
129 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--pony-diffusion-v6-xl.tar",
|
130 |
+
"path": "checkpoints/models--stablediffusionapi--pony-diffusion-v6-xl",
|
131 |
+
},
|
132 |
+
"protovision-xl-high-fidel": {
|
133 |
+
"slug": "stablediffusionapi/protovision-xl-high-fidel",
|
134 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--stablediffusionapi--protovision-xl-high-fidel.tar",
|
135 |
+
"path": "checkpoints/models--stablediffusionapi--protovision-xl-high-fidel",
|
136 |
+
},
|
137 |
+
"RealVisXL_V3.0_Turbo": {
|
138 |
+
"slug": "SG161222/RealVisXL_V3.0_Turbo",
|
139 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--SG161222--RealVisXL_V3.0_Turbo.tar",
|
140 |
+
"path": "checkpoints/models--SG161222--RealVisXL_V3.0_Turbo",
|
141 |
+
},
|
142 |
+
"RealVisXL_V4.0_Lightning": {
|
143 |
+
"slug": "SG161222/RealVisXL_V4.0_Lightning",
|
144 |
+
"url": "https://weights.replicate.delivery/default/InstantID/models--SG161222--RealVisXL_V4.0_Lightning.tar",
|
145 |
+
"path": "checkpoints/models--SG161222--RealVisXL_V4.0_Lightning",
|
146 |
+
},
|
147 |
+
}
|
148 |
+
|
149 |
+
|
150 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
151 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
152 |
+
|
153 |
+
|
154 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
155 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
156 |
+
|
157 |
+
|
158 |
+
def resize_img(
|
159 |
+
input_image,
|
160 |
+
max_side=1280,
|
161 |
+
min_side=1024,
|
162 |
+
size=None,
|
163 |
+
pad_to_max_side=False,
|
164 |
+
mode=PIL.Image.BILINEAR,
|
165 |
+
base_pixel_number=64,
|
166 |
+
):
|
167 |
+
w, h = input_image.size
|
168 |
+
if size is not None:
|
169 |
+
w_resize_new, h_resize_new = size
|
170 |
+
else:
|
171 |
+
ratio = min_side / min(h, w)
|
172 |
+
w, h = round(ratio * w), round(ratio * h)
|
173 |
+
ratio = max_side / max(h, w)
|
174 |
+
input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
|
175 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
176 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
177 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
178 |
+
|
179 |
+
if pad_to_max_side:
|
180 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
181 |
+
offset_x = (max_side - w_resize_new) // 2
|
182 |
+
offset_y = (max_side - h_resize_new) // 2
|
183 |
+
res[offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new] = (
|
184 |
+
np.array(input_image)
|
185 |
+
)
|
186 |
+
input_image = Image.fromarray(res)
|
187 |
+
return input_image
|
188 |
+
|
189 |
+
|
190 |
+
def download_weights(url, dest):
|
191 |
+
start = time.time()
|
192 |
+
print("[!] Initiating download from URL: ", url)
|
193 |
+
print("[~] Destination path: ", dest)
|
194 |
+
command = ["pget", "-vf", url, dest]
|
195 |
+
if ".tar" in url:
|
196 |
+
command.append("-x")
|
197 |
+
try:
|
198 |
+
subprocess.check_call(command, close_fds=False)
|
199 |
+
except subprocess.CalledProcessError as e:
|
200 |
+
print(
|
201 |
+
f"[ERROR] Failed to download weights. Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}."
|
202 |
+
)
|
203 |
+
raise
|
204 |
+
print("[+] Download completed in: ", time.time() - start, "seconds")
|
205 |
+
|
206 |
+
|
207 |
+
class Predictor(BasePredictor):
|
208 |
+
def setup(self) -> None:
|
209 |
+
"""Load the model into memory to make running multiple predictions efficient"""
|
210 |
+
|
211 |
+
if not os.path.exists(CHECKPOINTS_CACHE):
|
212 |
+
download_weights(CHECKPOINTS_URL, CHECKPOINTS_CACHE)
|
213 |
+
|
214 |
+
if not os.path.exists(MODELS_CACHE):
|
215 |
+
download_weights(MODELS_URL, MODELS_CACHE)
|
216 |
+
|
217 |
+
self.face_detection_input_width, self.face_detection_input_height = 640, 640
|
218 |
+
self.app = FaceAnalysis(
|
219 |
+
name="antelopev2",
|
220 |
+
root="./",
|
221 |
+
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
222 |
+
)
|
223 |
+
self.app.prepare(ctx_id=0, det_size=(self.face_detection_input_width, self.face_detection_input_height))
|
224 |
+
|
225 |
+
# Path to InstantID models
|
226 |
+
self.face_adapter = f"./checkpoints/ip-adapter.bin"
|
227 |
+
controlnet_path = f"./checkpoints/ControlNetModel"
|
228 |
+
|
229 |
+
# Load pipeline face ControlNetModel
|
230 |
+
self.controlnet_identitynet = ControlNetModel.from_pretrained(
|
231 |
+
controlnet_path,
|
232 |
+
torch_dtype=DTYPE,
|
233 |
+
cache_dir=CHECKPOINTS_CACHE,
|
234 |
+
local_files_only=True,
|
235 |
+
)
|
236 |
+
self.setup_extra_controlnets()
|
237 |
+
|
238 |
+
self.load_weights("stable-diffusion-xl-base-1.0")
|
239 |
+
self.setup_safety_checker()
|
240 |
+
|
241 |
+
def setup_safety_checker(self):
|
242 |
+
print(f"[~] Seting up safety checker")
|
243 |
+
|
244 |
+
if not os.path.exists(SAFETY_CACHE):
|
245 |
+
download_weights(SAFETY_URL, SAFETY_CACHE)
|
246 |
+
|
247 |
+
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
|
248 |
+
SAFETY_CACHE,
|
249 |
+
torch_dtype=DTYPE,
|
250 |
+
local_files_only=True,
|
251 |
+
)
|
252 |
+
self.safety_checker.to(DEVICE)
|
253 |
+
self.feature_extractor = CLIPImageProcessor.from_pretrained(FEATURE_EXTRACTOR)
|
254 |
+
|
255 |
+
def run_safety_checker(self, image):
|
256 |
+
safety_checker_input = self.feature_extractor(image, return_tensors="pt").to(
|
257 |
+
DEVICE
|
258 |
+
)
|
259 |
+
np_image = np.array(image)
|
260 |
+
image, has_nsfw_concept = self.safety_checker(
|
261 |
+
images=[np_image],
|
262 |
+
clip_input=safety_checker_input.pixel_values.to(DTYPE),
|
263 |
+
)
|
264 |
+
return image, has_nsfw_concept
|
265 |
+
|
266 |
+
def load_weights(self, sdxl_weights):
|
267 |
+
self.base_weights = sdxl_weights
|
268 |
+
weights_info = SDXL_NAME_TO_PATHLIKE[self.base_weights]
|
269 |
+
|
270 |
+
download_url = weights_info["url"]
|
271 |
+
path_to_weights_dir = weights_info["path"]
|
272 |
+
if not os.path.exists(path_to_weights_dir):
|
273 |
+
download_weights(download_url, path_to_weights_dir)
|
274 |
+
|
275 |
+
is_hugging_face_model = "slug" in weights_info.keys()
|
276 |
+
path_to_weights_file = os.path.join(
|
277 |
+
path_to_weights_dir,
|
278 |
+
weights_info.get("file", ""),
|
279 |
+
)
|
280 |
+
|
281 |
+
print(f"[~] Loading new SDXL weights: {path_to_weights_file}")
|
282 |
+
if is_hugging_face_model:
|
283 |
+
self.pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
284 |
+
weights_info["slug"],
|
285 |
+
controlnet=[self.controlnet_identitynet],
|
286 |
+
torch_dtype=DTYPE,
|
287 |
+
cache_dir=CHECKPOINTS_CACHE,
|
288 |
+
local_files_only=True,
|
289 |
+
safety_checker=None,
|
290 |
+
feature_extractor=None,
|
291 |
+
)
|
292 |
+
self.pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
|
293 |
+
self.pipe.scheduler.config
|
294 |
+
)
|
295 |
+
else: # e.g. .safetensors, NOTE: This functionality is not being used right now
|
296 |
+
self.pipe.from_single_file(
|
297 |
+
path_to_weights_file,
|
298 |
+
controlnet=self.controlnet_identitynet,
|
299 |
+
torch_dtype=DTYPE,
|
300 |
+
cache_dir=CHECKPOINTS_CACHE,
|
301 |
+
)
|
302 |
+
|
303 |
+
self.pipe.load_ip_adapter_instantid(self.face_adapter)
|
304 |
+
self.setup_lcm_lora()
|
305 |
+
self.pipe.cuda()
|
306 |
+
|
307 |
+
def setup_lcm_lora(self):
|
308 |
+
print(f"[~] Seting up LCM (just in case)")
|
309 |
+
|
310 |
+
lcm_lora_key = "models--latent-consistency--lcm-lora-sdxl"
|
311 |
+
lcm_lora_path = f"checkpoints/{lcm_lora_key}"
|
312 |
+
if not os.path.exists(lcm_lora_path):
|
313 |
+
download_weights(
|
314 |
+
f"https://weights.replicate.delivery/default/InstantID/{lcm_lora_key}.tar",
|
315 |
+
lcm_lora_path,
|
316 |
+
)
|
317 |
+
self.pipe.load_lora_weights(
|
318 |
+
"latent-consistency/lcm-lora-sdxl",
|
319 |
+
cache_dir=CHECKPOINTS_CACHE,
|
320 |
+
local_files_only=True,
|
321 |
+
weight_name="pytorch_lora_weights.safetensors",
|
322 |
+
)
|
323 |
+
self.pipe.disable_lora()
|
324 |
+
|
325 |
+
def setup_extra_controlnets(self):
|
326 |
+
print(f"[~] Seting up pose, canny, depth ControlNets")
|
327 |
+
|
328 |
+
controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
|
329 |
+
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
330 |
+
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
331 |
+
|
332 |
+
for controlnet_key in [
|
333 |
+
"models--diffusers--controlnet-canny-sdxl-1.0",
|
334 |
+
"models--diffusers--controlnet-depth-sdxl-1.0-small",
|
335 |
+
"models--thibaud--controlnet-openpose-sdxl-1.0",
|
336 |
+
]:
|
337 |
+
controlnet_path = f"checkpoints/{controlnet_key}"
|
338 |
+
if not os.path.exists(controlnet_path):
|
339 |
+
download_weights(
|
340 |
+
f"https://weights.replicate.delivery/default/InstantID/{controlnet_key}.tar",
|
341 |
+
controlnet_path,
|
342 |
+
)
|
343 |
+
|
344 |
+
controlnet_pose = ControlNetModel.from_pretrained(
|
345 |
+
controlnet_pose_model,
|
346 |
+
torch_dtype=DTYPE,
|
347 |
+
cache_dir=CHECKPOINTS_CACHE,
|
348 |
+
local_files_only=True,
|
349 |
+
).to(DEVICE)
|
350 |
+
controlnet_canny = ControlNetModel.from_pretrained(
|
351 |
+
controlnet_canny_model,
|
352 |
+
torch_dtype=DTYPE,
|
353 |
+
cache_dir=CHECKPOINTS_CACHE,
|
354 |
+
local_files_only=True,
|
355 |
+
).to(DEVICE)
|
356 |
+
controlnet_depth = ControlNetModel.from_pretrained(
|
357 |
+
controlnet_depth_model,
|
358 |
+
torch_dtype=DTYPE,
|
359 |
+
cache_dir=CHECKPOINTS_CACHE,
|
360 |
+
local_files_only=True,
|
361 |
+
).to(DEVICE)
|
362 |
+
|
363 |
+
self.controlnet_map = {
|
364 |
+
"pose": controlnet_pose,
|
365 |
+
"canny": controlnet_canny,
|
366 |
+
"depth": controlnet_depth,
|
367 |
+
}
|
368 |
+
self.controlnet_map_fn = {
|
369 |
+
"pose": openpose,
|
370 |
+
"canny": get_canny_image,
|
371 |
+
"depth": get_depth_map,
|
372 |
+
}
|
373 |
+
|
374 |
+
def generate_image(
|
375 |
+
self,
|
376 |
+
face_image_path,
|
377 |
+
pose_image_path,
|
378 |
+
prompt,
|
379 |
+
negative_prompt,
|
380 |
+
num_steps,
|
381 |
+
identitynet_strength_ratio,
|
382 |
+
adapter_strength_ratio,
|
383 |
+
pose_strength,
|
384 |
+
canny_strength,
|
385 |
+
depth_strength,
|
386 |
+
controlnet_selection,
|
387 |
+
guidance_scale,
|
388 |
+
seed,
|
389 |
+
scheduler,
|
390 |
+
enable_LCM,
|
391 |
+
enhance_face_region,
|
392 |
+
num_images_per_prompt,
|
393 |
+
):
|
394 |
+
if enable_LCM:
|
395 |
+
self.pipe.enable_lora()
|
396 |
+
self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
|
397 |
+
else:
|
398 |
+
self.pipe.disable_lora()
|
399 |
+
scheduler_class_name = scheduler.split("-")[0]
|
400 |
+
|
401 |
+
add_kwargs = {}
|
402 |
+
if len(scheduler.split("-")) > 1:
|
403 |
+
add_kwargs["use_karras_sigmas"] = True
|
404 |
+
if len(scheduler.split("-")) > 2:
|
405 |
+
add_kwargs["algorithm_type"] = "sde-dpmsolver++"
|
406 |
+
scheduler = getattr(diffusers, scheduler_class_name)
|
407 |
+
self.pipe.scheduler = scheduler.from_config(
|
408 |
+
self.pipe.scheduler.config,
|
409 |
+
**add_kwargs,
|
410 |
+
)
|
411 |
+
|
412 |
+
if face_image_path is None:
|
413 |
+
raise Exception(
|
414 |
+
f"Cannot find any input face `image`! Please upload the face `image`"
|
415 |
+
)
|
416 |
+
|
417 |
+
face_image = load_image(face_image_path)
|
418 |
+
face_image = resize_img(face_image)
|
419 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
420 |
+
height, width, _ = face_image_cv2.shape
|
421 |
+
|
422 |
+
# Extract face features
|
423 |
+
face_info = self.app.get(face_image_cv2)
|
424 |
+
|
425 |
+
if len(face_info) == 0:
|
426 |
+
raise Exception(
|
427 |
+
"Face detector could not find a face in the `image`. Please use a different `image` as input."
|
428 |
+
)
|
429 |
+
|
430 |
+
face_info = sorted(
|
431 |
+
face_info,
|
432 |
+
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1],
|
433 |
+
)[
|
434 |
+
-1
|
435 |
+
] # only use the maximum face
|
436 |
+
face_emb = face_info["embedding"]
|
437 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
|
438 |
+
|
439 |
+
img_controlnet = face_image
|
440 |
+
if pose_image_path is not None:
|
441 |
+
pose_image = load_image(pose_image_path)
|
442 |
+
pose_image = resize_img(pose_image, max_side=1024)
|
443 |
+
img_controlnet = pose_image
|
444 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
445 |
+
|
446 |
+
face_info = self.app.get(pose_image_cv2)
|
447 |
+
|
448 |
+
if len(face_info) == 0:
|
449 |
+
raise Exception(
|
450 |
+
"Face detector could not find a face in the `pose_image`. Please use a different `pose_image` as input."
|
451 |
+
)
|
452 |
+
|
453 |
+
face_info = face_info[-1]
|
454 |
+
face_kps = draw_kps(pose_image, face_info["kps"])
|
455 |
+
|
456 |
+
width, height = face_kps.size
|
457 |
+
|
458 |
+
if enhance_face_region:
|
459 |
+
control_mask = np.zeros([height, width, 3])
|
460 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
461 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
462 |
+
control_mask[y1:y2, x1:x2] = 255
|
463 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
464 |
+
else:
|
465 |
+
control_mask = None
|
466 |
+
|
467 |
+
if len(controlnet_selection) > 0:
|
468 |
+
controlnet_scales = {
|
469 |
+
"pose": pose_strength,
|
470 |
+
"canny": canny_strength,
|
471 |
+
"depth": depth_strength,
|
472 |
+
}
|
473 |
+
self.pipe.controlnet = MultiControlNetModel(
|
474 |
+
[self.controlnet_identitynet]
|
475 |
+
+ [self.controlnet_map[s] for s in controlnet_selection]
|
476 |
+
)
|
477 |
+
control_scales = [float(identitynet_strength_ratio)] + [
|
478 |
+
controlnet_scales[s] for s in controlnet_selection
|
479 |
+
]
|
480 |
+
control_images = [face_kps] + [
|
481 |
+
self.controlnet_map_fn[s](img_controlnet).resize((width, height))
|
482 |
+
for s in controlnet_selection
|
483 |
+
]
|
484 |
+
else:
|
485 |
+
self.pipe.controlnet = self.controlnet_identitynet
|
486 |
+
control_scales = float(identitynet_strength_ratio)
|
487 |
+
control_images = face_kps
|
488 |
+
|
489 |
+
generator = torch.Generator(device=DEVICE).manual_seed(seed)
|
490 |
+
|
491 |
+
print("Start inference...")
|
492 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
493 |
+
|
494 |
+
self.pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
495 |
+
images = self.pipe(
|
496 |
+
prompt=prompt,
|
497 |
+
negative_prompt=negative_prompt,
|
498 |
+
image_embeds=face_emb,
|
499 |
+
image=control_images,
|
500 |
+
control_mask=control_mask,
|
501 |
+
controlnet_conditioning_scale=control_scales,
|
502 |
+
num_inference_steps=num_steps,
|
503 |
+
guidance_scale=guidance_scale,
|
504 |
+
height=height,
|
505 |
+
width=width,
|
506 |
+
generator=generator,
|
507 |
+
num_images_per_prompt=num_images_per_prompt,
|
508 |
+
).images
|
509 |
+
|
510 |
+
return images
|
511 |
+
|
512 |
+
def predict(
|
513 |
+
self,
|
514 |
+
image: Path = Input(
|
515 |
+
description="Input face image",
|
516 |
+
),
|
517 |
+
pose_image: Path = Input(
|
518 |
+
description="(Optional) reference pose image",
|
519 |
+
default=None,
|
520 |
+
),
|
521 |
+
prompt: str = Input(
|
522 |
+
description="Input prompt",
|
523 |
+
default="a person",
|
524 |
+
),
|
525 |
+
negative_prompt: str = Input(
|
526 |
+
description="Input Negative Prompt",
|
527 |
+
default="",
|
528 |
+
),
|
529 |
+
sdxl_weights: str = Input(
|
530 |
+
description="Pick which base weights you want to use",
|
531 |
+
default="stable-diffusion-xl-base-1.0",
|
532 |
+
choices=[
|
533 |
+
"stable-diffusion-xl-base-1.0",
|
534 |
+
"juggernaut-xl-v8",
|
535 |
+
"afrodite-xl-v2",
|
536 |
+
"albedobase-xl-20",
|
537 |
+
"albedobase-xl-v13",
|
538 |
+
"animagine-xl-30",
|
539 |
+
"anime-art-diffusion-xl",
|
540 |
+
"anime-illust-diffusion-xl",
|
541 |
+
"dreamshaper-xl",
|
542 |
+
"dynavision-xl-v0610",
|
543 |
+
"guofeng4-xl",
|
544 |
+
"nightvision-xl-0791",
|
545 |
+
"omnigen-xl",
|
546 |
+
"pony-diffusion-v6-xl",
|
547 |
+
"protovision-xl-high-fidel",
|
548 |
+
"RealVisXL_V3.0_Turbo",
|
549 |
+
"RealVisXL_V4.0_Lightning",
|
550 |
+
],
|
551 |
+
),
|
552 |
+
face_detection_input_width: int = Input(
|
553 |
+
description="Width of the input image for face detection",
|
554 |
+
default=640,
|
555 |
+
ge=640,
|
556 |
+
le=4096,
|
557 |
+
),
|
558 |
+
face_detection_input_height: int = Input(
|
559 |
+
description="Height of the input image for face detection",
|
560 |
+
default=640,
|
561 |
+
ge=640,
|
562 |
+
le=4096,
|
563 |
+
),
|
564 |
+
scheduler: str = Input(
|
565 |
+
description="Scheduler",
|
566 |
+
choices=[
|
567 |
+
"DEISMultistepScheduler",
|
568 |
+
"HeunDiscreteScheduler",
|
569 |
+
"EulerDiscreteScheduler",
|
570 |
+
"DPMSolverMultistepScheduler",
|
571 |
+
"DPMSolverMultistepScheduler-Karras",
|
572 |
+
"DPMSolverMultistepScheduler-Karras-SDE",
|
573 |
+
],
|
574 |
+
default="EulerDiscreteScheduler",
|
575 |
+
),
|
576 |
+
num_inference_steps: int = Input(
|
577 |
+
description="Number of denoising steps",
|
578 |
+
default=30,
|
579 |
+
ge=1,
|
580 |
+
le=500,
|
581 |
+
),
|
582 |
+
guidance_scale: float = Input(
|
583 |
+
description="Scale for classifier-free guidance",
|
584 |
+
default=7.5,
|
585 |
+
ge=1,
|
586 |
+
le=50,
|
587 |
+
),
|
588 |
+
ip_adapter_scale: float = Input(
|
589 |
+
description="Scale for image adapter strength (for detail)", # adapter_strength_ratio
|
590 |
+
default=0.8,
|
591 |
+
ge=0,
|
592 |
+
le=1.5,
|
593 |
+
),
|
594 |
+
controlnet_conditioning_scale: float = Input(
|
595 |
+
description="Scale for IdentityNet strength (for fidelity)", # identitynet_strength_ratio
|
596 |
+
default=0.8,
|
597 |
+
ge=0,
|
598 |
+
le=1.5,
|
599 |
+
),
|
600 |
+
enable_pose_controlnet: bool = Input(
|
601 |
+
description="Enable Openpose ControlNet, overrides strength if set to false",
|
602 |
+
default=True,
|
603 |
+
),
|
604 |
+
pose_strength: float = Input(
|
605 |
+
description="Openpose ControlNet strength, effective only if `enable_pose_controlnet` is true",
|
606 |
+
default=0.4,
|
607 |
+
ge=0,
|
608 |
+
le=1,
|
609 |
+
),
|
610 |
+
enable_canny_controlnet: bool = Input(
|
611 |
+
description="Enable Canny ControlNet, overrides strength if set to false",
|
612 |
+
default=False,
|
613 |
+
),
|
614 |
+
canny_strength: float = Input(
|
615 |
+
description="Canny ControlNet strength, effective only if `enable_canny_controlnet` is true",
|
616 |
+
default=0.3,
|
617 |
+
ge=0,
|
618 |
+
le=1,
|
619 |
+
),
|
620 |
+
enable_depth_controlnet: bool = Input(
|
621 |
+
description="Enable Depth ControlNet, overrides strength if set to false",
|
622 |
+
default=False,
|
623 |
+
),
|
624 |
+
depth_strength: float = Input(
|
625 |
+
description="Depth ControlNet strength, effective only if `enable_depth_controlnet` is true",
|
626 |
+
default=0.5,
|
627 |
+
ge=0,
|
628 |
+
le=1,
|
629 |
+
),
|
630 |
+
enable_lcm: bool = Input(
|
631 |
+
description="Enable Fast Inference with LCM (Latent Consistency Models) - speeds up inference steps, trade-off is the quality of the generated image. Performs better with close-up portrait face images",
|
632 |
+
default=False,
|
633 |
+
),
|
634 |
+
lcm_num_inference_steps: int = Input(
|
635 |
+
description="Only used when `enable_lcm` is set to True, Number of denoising steps when using LCM",
|
636 |
+
default=5,
|
637 |
+
ge=1,
|
638 |
+
le=10,
|
639 |
+
),
|
640 |
+
lcm_guidance_scale: float = Input(
|
641 |
+
description="Only used when `enable_lcm` is set to True, Scale for classifier-free guidance when using LCM",
|
642 |
+
default=1.5,
|
643 |
+
ge=1,
|
644 |
+
le=20,
|
645 |
+
),
|
646 |
+
enhance_nonface_region: bool = Input(
|
647 |
+
description="Enhance non-face region", default=True
|
648 |
+
),
|
649 |
+
output_format: str = Input(
|
650 |
+
description="Format of the output images",
|
651 |
+
choices=["webp", "jpg", "png"],
|
652 |
+
default="webp",
|
653 |
+
),
|
654 |
+
output_quality: int = Input(
|
655 |
+
description="Quality of the output images, from 0 to 100. 100 is best quality, 0 is lowest quality.",
|
656 |
+
default=80,
|
657 |
+
ge=0,
|
658 |
+
le=100,
|
659 |
+
),
|
660 |
+
seed: int = Input(
|
661 |
+
description="Random seed. Leave blank to randomize the seed",
|
662 |
+
default=None,
|
663 |
+
),
|
664 |
+
num_outputs: int = Input(
|
665 |
+
description="Number of images to output",
|
666 |
+
default=1,
|
667 |
+
ge=1,
|
668 |
+
le=8,
|
669 |
+
),
|
670 |
+
disable_safety_checker: bool = Input(
|
671 |
+
description="Disable safety checker for generated images",
|
672 |
+
default=False,
|
673 |
+
),
|
674 |
+
) -> List[Path]:
|
675 |
+
"""Run a single prediction on the model"""
|
676 |
+
|
677 |
+
# If no seed is provided, generate a random seed
|
678 |
+
if seed is None:
|
679 |
+
seed = int.from_bytes(os.urandom(2), "big")
|
680 |
+
print(f"Using seed: {seed}")
|
681 |
+
|
682 |
+
# Load the weights if they are different from the base weights
|
683 |
+
if sdxl_weights != self.base_weights:
|
684 |
+
self.load_weights(sdxl_weights)
|
685 |
+
|
686 |
+
# Resize the output if the provided dimensions are different from the current ones
|
687 |
+
if self.face_detection_input_width != face_detection_input_width or self.face_detection_input_height != face_detection_input_height:
|
688 |
+
print(f"[!] Resizing output to {face_detection_input_width}x{face_detection_input_height}")
|
689 |
+
self.face_detection_input_width = face_detection_input_width
|
690 |
+
self.face_detection_input_height = face_detection_input_height
|
691 |
+
self.app.prepare(ctx_id=0, det_size=(self.face_detection_input_width, self.face_detection_input_height))
|
692 |
+
|
693 |
+
# Set up ControlNet selection and their respective strength values (if any)
|
694 |
+
controlnet_selection = []
|
695 |
+
if pose_strength > 0 and enable_pose_controlnet:
|
696 |
+
controlnet_selection.append("pose")
|
697 |
+
if canny_strength > 0 and enable_canny_controlnet:
|
698 |
+
controlnet_selection.append("canny")
|
699 |
+
if depth_strength > 0 and enable_depth_controlnet:
|
700 |
+
controlnet_selection.append("depth")
|
701 |
+
|
702 |
+
# Switch to LCM inference steps and guidance scale if LCM is enabled
|
703 |
+
if enable_lcm:
|
704 |
+
num_inference_steps = lcm_num_inference_steps
|
705 |
+
guidance_scale = lcm_guidance_scale
|
706 |
+
|
707 |
+
# Generate
|
708 |
+
images = self.generate_image(
|
709 |
+
face_image_path=str(image),
|
710 |
+
pose_image_path=str(pose_image) if pose_image else None,
|
711 |
+
prompt=prompt,
|
712 |
+
negative_prompt=negative_prompt,
|
713 |
+
num_steps=num_inference_steps,
|
714 |
+
identitynet_strength_ratio=controlnet_conditioning_scale,
|
715 |
+
adapter_strength_ratio=ip_adapter_scale,
|
716 |
+
pose_strength=pose_strength,
|
717 |
+
canny_strength=canny_strength,
|
718 |
+
depth_strength=depth_strength,
|
719 |
+
controlnet_selection=controlnet_selection,
|
720 |
+
scheduler=scheduler,
|
721 |
+
guidance_scale=guidance_scale,
|
722 |
+
seed=seed,
|
723 |
+
enable_LCM=enable_lcm,
|
724 |
+
enhance_face_region=enhance_nonface_region,
|
725 |
+
num_images_per_prompt=num_outputs,
|
726 |
+
)
|
727 |
+
|
728 |
+
# Save the generated images and check for NSFW content
|
729 |
+
output_paths = []
|
730 |
+
for i, output_image in enumerate(images):
|
731 |
+
if not disable_safety_checker:
|
732 |
+
_, has_nsfw_content_list = self.run_safety_checker(output_image)
|
733 |
+
has_nsfw_content = any(has_nsfw_content_list)
|
734 |
+
print(f"NSFW content detected: {has_nsfw_content}")
|
735 |
+
if has_nsfw_content:
|
736 |
+
raise Exception(
|
737 |
+
"NSFW content detected. Try running it again, or try a different prompt."
|
738 |
+
)
|
739 |
+
|
740 |
+
extension = output_format.lower()
|
741 |
+
extension = "jpeg" if extension == "jpg" else extension
|
742 |
+
output_path = f"/tmp/out_{i}.{extension}"
|
743 |
+
|
744 |
+
print(f"[~] Saving to {output_path}...")
|
745 |
+
print(f"[~] Output format: {extension.upper()}")
|
746 |
+
if output_format != "png":
|
747 |
+
print(f"[~] Output quality: {output_quality}")
|
748 |
+
|
749 |
+
save_params = {"format": extension.upper()}
|
750 |
+
if output_format != "png":
|
751 |
+
save_params["quality"] = output_quality
|
752 |
+
save_params["optimize"] = True
|
753 |
+
|
754 |
+
output_image.save(output_path, **save_params)
|
755 |
+
output_paths.append(Path(output_path))
|
756 |
+
return output_paths
|
feature-extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 224,
|
4 |
+
"width": 224
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
0.48145466,
|
13 |
+
0.4578275,
|
14 |
+
0.40821073
|
15 |
+
],
|
16 |
+
"image_processor_type": "CLIPImageProcessor",
|
17 |
+
"image_std": [
|
18 |
+
0.26862954,
|
19 |
+
0.26130258,
|
20 |
+
0.27577711
|
21 |
+
],
|
22 |
+
"resample": 3,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"size": {
|
25 |
+
"shortest_edge": 224
|
26 |
+
}
|
27 |
+
}
|
generated_images/20240723_053704_668578_0.png
ADDED
Git LFS Details
|
generated_images/20240723_053801_148984_0.png
ADDED
Git LFS Details
|
generated_images/20240723_053853_022841_0.png
ADDED
Git LFS Details
|
generated_images/20240723_053948_468290_0.png
ADDED
Git LFS Details
|
generated_images/20240723_054025_692605_0.png
ADDED
Git LFS Details
|
generated_images/20240723_054124_697176_0.png
ADDED
Git LFS Details
|
generation_log.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
image_name,new_file_name,identitynet_strength_ratio,adapter_strength_ratio,num_inference_steps,guidance_scale,seed,success,error_message,style_name,prompt,negative_prompt,time_taken,current_timestamp
|
2 |
+
musk_resize.jpeg,20240723_053704_668578_0.png,1.1491785966677859,0.8654292835406997,50,10.881974934041711,4170043132,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",53.160874,2024-07-23 05:37:05
|
3 |
+
sam_resize.png,20240723_053801_148984_0.png,1.0277924316289087,0.9683019180411349,53,11.111615489229361,1039000092,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",56.287061,2024-07-23 05:38:01
|
4 |
+
schmidhuber_resize.png,20240723_053853_022841_0.png,1.4917970061395218,0.7393876001187043,48,11.679426057392323,3752244045,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",51.661633,2024-07-23 05:38:53
|
5 |
+
kaifu_resize.png,20240723_053948_468290_0.png,1.4485948536834086,0.8122224472625851,52,9.984434112216853,2295950491,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",55.366897,2024-07-23 05:39:49
|
6 |
+
pp_0.jpg,20240723_054025_692605_0.png,1.1794069160183727,0.9857350785784462,51,8.76420747179281,2648835109,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",36.771416,2024-07-23 05:40:26
|
7 |
+
yann-lecun_resize.jpg,20240723_054124_697176_0.png,1.2770220875965888,0.8245108249424827,56,9.372671733967127,3933691473,True,,(No style),"human, sharp focus","(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",59.069521,2024-07-23 05:41:25
|
gradio_demo/aaa.py
ADDED
@@ -0,0 +1,957 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('./')
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import PIL
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
import diffusers
|
18 |
+
from diffusers.utils import load_image
|
19 |
+
from diffusers.models import ControlNetModel
|
20 |
+
from diffusers import LCMScheduler
|
21 |
+
|
22 |
+
from huggingface_hub import hf_hub_download
|
23 |
+
|
24 |
+
import insightface
|
25 |
+
from insightface.app import FaceAnalysis
|
26 |
+
|
27 |
+
from style_template import styles
|
28 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
29 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
30 |
+
|
31 |
+
|
32 |
+
# global variable
|
33 |
+
MAX_SEED = np.iinfo(np.int32).max
|
34 |
+
device = get_torch_device()
|
35 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
36 |
+
STYLE_NAMES = list(styles.keys())
|
37 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
38 |
+
|
39 |
+
# Load face encoder
|
40 |
+
app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
41 |
+
app.prepare(ctx_id=0, det_size=(320, 320))
|
42 |
+
|
43 |
+
# Path to InstantID models
|
44 |
+
face_adapter = f'./checkpoints/ip-adapter.bin'
|
45 |
+
controlnet_path = f'./checkpoints/ControlNetModel'
|
46 |
+
|
47 |
+
# Load pipeline
|
48 |
+
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
49 |
+
|
50 |
+
logo = Image.open("./gradio_demo/logo.png")
|
51 |
+
|
52 |
+
from cv2 import imencode
|
53 |
+
import base64
|
54 |
+
|
55 |
+
# def encode_pil_to_base64_new(pil_image):
|
56 |
+
# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
57 |
+
# image_arr = np.asarray(pil_image)[:,:,::-1]
|
58 |
+
# _, byte_data = imencode('.png', image_arr)
|
59 |
+
# base64_data = base64.b64encode(byte_data)
|
60 |
+
# base64_string_opencv = base64_data.decode("utf-8")
|
61 |
+
# return "data:image/png;base64," + base64_string_opencv
|
62 |
+
|
63 |
+
import gradio as gr
|
64 |
+
|
65 |
+
# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
66 |
+
|
67 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
68 |
+
|
69 |
+
if pretrained_model_name_or_path.endswith(
|
70 |
+
".ckpt"
|
71 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
72 |
+
scheduler_kwargs = hf_hub_download(
|
73 |
+
repo_id="wangqixun/YamerMIX_v8",
|
74 |
+
subfolder="scheduler",
|
75 |
+
filename="scheduler_config.json",
|
76 |
+
)
|
77 |
+
|
78 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
79 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
80 |
+
scheduler_name=None,
|
81 |
+
weight_dtype=dtype,
|
82 |
+
)
|
83 |
+
|
84 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
85 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
86 |
+
vae=vae,
|
87 |
+
text_encoder=text_encoders[0],
|
88 |
+
text_encoder_2=text_encoders[1],
|
89 |
+
tokenizer=tokenizers[0],
|
90 |
+
tokenizer_2=tokenizers[1],
|
91 |
+
unet=unet,
|
92 |
+
scheduler=scheduler,
|
93 |
+
controlnet=controlnet,
|
94 |
+
).to(device)
|
95 |
+
|
96 |
+
else:
|
97 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
98 |
+
pretrained_model_name_or_path,
|
99 |
+
controlnet=controlnet,
|
100 |
+
torch_dtype=dtype,
|
101 |
+
safety_checker=None,
|
102 |
+
feature_extractor=None,
|
103 |
+
).to(device)
|
104 |
+
|
105 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
106 |
+
|
107 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
108 |
+
# load and disable LCM
|
109 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
110 |
+
pipe.disable_lora()
|
111 |
+
|
112 |
+
def remove_tips():
|
113 |
+
print("GG")
|
114 |
+
return gr.update(visible=False)
|
115 |
+
|
116 |
+
|
117 |
+
# prompts = [
|
118 |
+
# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
|
119 |
+
# # ["Suited professional","(No style)"],
|
120 |
+
# ["Scooba diver","Line art"], ["eskimo","Snow"]
|
121 |
+
# ]
|
122 |
+
|
123 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
124 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
125 |
+
|
126 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
127 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
128 |
+
|
129 |
+
def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
130 |
+
# if email != "":
|
131 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
132 |
+
return generate_image(face_file, p[0], n)
|
133 |
+
# else:
|
134 |
+
# raise gr.Error("Email ID is compulsory")
|
135 |
+
def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
136 |
+
# if email != "":
|
137 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
138 |
+
return generate_image(face_file, p[1], n)
|
139 |
+
def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
140 |
+
# if email != "":
|
141 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
142 |
+
return generate_image(face_file, p[2], n)
|
143 |
+
def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
144 |
+
# if email != "":
|
145 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
146 |
+
return generate_image(face_file, p[3], n)
|
147 |
+
|
148 |
+
# def validate_and_process(face_file, style, email):
|
149 |
+
|
150 |
+
# # Your processing logic here
|
151 |
+
# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
|
152 |
+
# return gallery1, gallery2, gallery3, gallery4
|
153 |
+
|
154 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
155 |
+
stickwidth = 4
|
156 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
157 |
+
kps = np.array(kps)
|
158 |
+
|
159 |
+
w, h = image_pil.size
|
160 |
+
out_img = np.zeros([h, w, 3])
|
161 |
+
|
162 |
+
for i in range(len(limbSeq)):
|
163 |
+
index = limbSeq[i]
|
164 |
+
color = color_list[index[0]]
|
165 |
+
|
166 |
+
x = kps[index][:, 0]
|
167 |
+
y = kps[index][:, 1]
|
168 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
169 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
170 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
171 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
172 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
173 |
+
|
174 |
+
for idx_kp, kp in enumerate(kps):
|
175 |
+
color = color_list[idx_kp]
|
176 |
+
x, y = kp
|
177 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
178 |
+
|
179 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
180 |
+
return out_img_pil
|
181 |
+
|
182 |
+
def resize_img(input_image, max_side=640, min_side=640, size=None,
|
183 |
+
pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
184 |
+
|
185 |
+
w, h = input_image.size
|
186 |
+
print(w)
|
187 |
+
print(h)
|
188 |
+
if size is not None:
|
189 |
+
w_resize_new, h_resize_new = size
|
190 |
+
else:
|
191 |
+
ratio = min_side / min(h, w)
|
192 |
+
w, h = round(ratio*w), round(ratio*h)
|
193 |
+
ratio = max_side / max(h, w)
|
194 |
+
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
195 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
196 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
197 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
198 |
+
|
199 |
+
if pad_to_max_side:
|
200 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
201 |
+
offset_x = (max_side - w_resize_new) // 2
|
202 |
+
offset_y = (max_side - h_resize_new) // 2
|
203 |
+
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
204 |
+
input_image = Image.fromarray(res)
|
205 |
+
return input_image
|
206 |
+
|
207 |
+
# def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
|
208 |
+
# p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
209 |
+
# return p.replace("{prompt}", positive), n + ' ' + negative
|
210 |
+
|
211 |
+
def generate_image(face_image,prompt,negative_prompt):
|
212 |
+
pose_image_path = None
|
213 |
+
# prompt = "superman"
|
214 |
+
enable_LCM = False
|
215 |
+
identitynet_strength_ratio = 0.95
|
216 |
+
adapter_strength_ratio = 0.60
|
217 |
+
num_steps = 15
|
218 |
+
guidance_scale = 8.5
|
219 |
+
seed = random.randint(0, MAX_SEED)
|
220 |
+
# negative_prompt = ""
|
221 |
+
# negative_prompt += neg
|
222 |
+
enhance_face_region = True
|
223 |
+
if enable_LCM:
|
224 |
+
pipe.enable_lora()
|
225 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
226 |
+
else:
|
227 |
+
pipe.disable_lora()
|
228 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
229 |
+
|
230 |
+
if face_image is None:
|
231 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
232 |
+
|
233 |
+
# if prompt is None:
|
234 |
+
# prompt = "a person"
|
235 |
+
|
236 |
+
# apply the style template
|
237 |
+
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
238 |
+
|
239 |
+
# face_image = load_image(face_image_path)
|
240 |
+
face_image = resize_img(face_image)
|
241 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
242 |
+
height, width, _ = face_image_cv2.shape
|
243 |
+
|
244 |
+
# Extract face features
|
245 |
+
face_info = app.get(face_image_cv2)
|
246 |
+
|
247 |
+
if len(face_info) == 0:
|
248 |
+
raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
249 |
+
|
250 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
251 |
+
face_emb = face_info['embedding']
|
252 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
253 |
+
|
254 |
+
if pose_image_path is not None:
|
255 |
+
pose_image = load_image(pose_image_path)
|
256 |
+
pose_image = resize_img(pose_image)
|
257 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
258 |
+
|
259 |
+
face_info = app.get(pose_image_cv2)
|
260 |
+
|
261 |
+
if len(face_info) == 0:
|
262 |
+
raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
263 |
+
|
264 |
+
face_info = face_info[-1]
|
265 |
+
face_kps = draw_kps(pose_image, face_info['kps'])
|
266 |
+
|
267 |
+
width, height = face_kps.size
|
268 |
+
|
269 |
+
if enhance_face_region:
|
270 |
+
control_mask = np.zeros([height, width, 3])
|
271 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
272 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
273 |
+
control_mask[y1:y2, x1:x2] = 255
|
274 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
275 |
+
else:
|
276 |
+
control_mask = None
|
277 |
+
|
278 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
279 |
+
|
280 |
+
print("Start inference...")
|
281 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
282 |
+
|
283 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
284 |
+
images = pipe(
|
285 |
+
prompt=prompt,
|
286 |
+
negative_prompt=negative_prompt,
|
287 |
+
image_embeds=face_emb,
|
288 |
+
image=face_kps,
|
289 |
+
control_mask=control_mask,
|
290 |
+
controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
291 |
+
num_inference_steps=num_steps,
|
292 |
+
guidance_scale=guidance_scale,
|
293 |
+
height=height,
|
294 |
+
width=width,
|
295 |
+
generator=generator,
|
296 |
+
# num_images_per_prompt = 4
|
297 |
+
).images
|
298 |
+
|
299 |
+
return images[0]
|
300 |
+
|
301 |
+
### Description
|
302 |
+
title = r"""
|
303 |
+
<h1 align="center">Choose your AVATAR</h1>
|
304 |
+
"""
|
305 |
+
|
306 |
+
description = r"""
|
307 |
+
<h2> Powered by IDfy </h2>"""
|
308 |
+
|
309 |
+
article = r""""""
|
310 |
+
|
311 |
+
tips = r""""""
|
312 |
+
|
313 |
+
# js = ''' '''
|
314 |
+
|
315 |
+
css = '''
|
316 |
+
.gradio-container {width: 95% !important; background-color: #E6F3FF;}
|
317 |
+
.image-gallery {height: 100vh !important; overflow: auto;}
|
318 |
+
.gradio-row .gradio-element { margin: 0 !important; }
|
319 |
+
'''
|
320 |
+
# with gr.Blocks(css=css, js=js) as demo:
|
321 |
+
|
322 |
+
# # description
|
323 |
+
# gr.Markdown(title)
|
324 |
+
# with gr.Row():
|
325 |
+
# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
326 |
+
# gr.Markdown(description)
|
327 |
+
# with gr.Row():
|
328 |
+
# with gr.Column():
|
329 |
+
# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
330 |
+
# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
331 |
+
# submit = gr.Button("Submit", variant="primary")
|
332 |
+
# with gr.Column():
|
333 |
+
# with gr.Row():
|
334 |
+
# gallery1 = gr.Image(label="Generated Images")
|
335 |
+
# gallery2 = gr.Image(label="Generated Images")
|
336 |
+
# with gr.Row():
|
337 |
+
# gallery3 = gr.Image(label="Generated Images")
|
338 |
+
# gallery4 = gr.Image(label="Generated Images")
|
339 |
+
# email = gr.Textbox(label="Email",
|
340 |
+
# info="Enter your email address",
|
341 |
+
# value="")
|
342 |
+
# # submit1 = gr.Button("Store")
|
343 |
+
|
344 |
+
# usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
345 |
+
|
346 |
+
# face_file.upload(
|
347 |
+
# fn=remove_tips,
|
348 |
+
# outputs=usage_tips,
|
349 |
+
# queue=True,
|
350 |
+
# api_name=False,
|
351 |
+
# show_progress = "full"
|
352 |
+
# ).then(
|
353 |
+
# fn=run_for_prompts1,
|
354 |
+
# inputs=[face_file,style],
|
355 |
+
# outputs=[gallery1]
|
356 |
+
# ).then(
|
357 |
+
# fn=run_for_prompts2,
|
358 |
+
# inputs=[face_file,style],
|
359 |
+
# outputs=[gallery2]
|
360 |
+
# ).then(
|
361 |
+
# fn=run_for_prompts3,
|
362 |
+
# inputs=[face_file,style],
|
363 |
+
# outputs=[gallery3]
|
364 |
+
# ).then(
|
365 |
+
# fn=run_for_prompts4,
|
366 |
+
# inputs=[face_file,style],
|
367 |
+
# outputs=[gallery4]
|
368 |
+
# )
|
369 |
+
# submit.click(
|
370 |
+
# fn=remove_tips,
|
371 |
+
# outputs=usage_tips,
|
372 |
+
# queue=True,
|
373 |
+
# api_name=False,
|
374 |
+
# show_progress = "full"
|
375 |
+
# ).then(
|
376 |
+
# fn=run_for_prompts1,
|
377 |
+
# inputs=[face_file,style],
|
378 |
+
# outputs=[gallery1]
|
379 |
+
# ).then(
|
380 |
+
# fn=run_for_prompts2,
|
381 |
+
# inputs=[face_file,style],
|
382 |
+
# outputs=[gallery2]
|
383 |
+
# ).then(
|
384 |
+
# fn=run_for_prompts3,
|
385 |
+
# inputs=[face_file,style],
|
386 |
+
# outputs=[gallery3]
|
387 |
+
# ).then(
|
388 |
+
# fn=run_for_prompts4,
|
389 |
+
# inputs=[face_file,style],
|
390 |
+
# outputs=[gallery4]
|
391 |
+
# )
|
392 |
+
|
393 |
+
# # submit1.click(
|
394 |
+
# # fn=store_images,
|
395 |
+
# # inputs=[email,gallery1,gallery2,gallery3,gallery4],
|
396 |
+
# # outputs=None)
|
397 |
+
|
398 |
+
|
399 |
+
|
400 |
+
# gr.Markdown(article)
|
401 |
+
|
402 |
+
# demo.launch(share=True)
|
403 |
+
|
404 |
+
with gr.Blocks(css=css) as demo:
|
405 |
+
|
406 |
+
# description
|
407 |
+
gr.Markdown(title)
|
408 |
+
with gr.Row():
|
409 |
+
gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
410 |
+
gr.Markdown(description)
|
411 |
+
with gr.Row():
|
412 |
+
with gr.Column():
|
413 |
+
style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
414 |
+
face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
415 |
+
submit = gr.Button("Submit", variant="primary")
|
416 |
+
with gr.Column():
|
417 |
+
with gr.Row():
|
418 |
+
gallery1 = gr.Image(label="Generated Images")
|
419 |
+
gallery2 = gr.Image(label="Generated Images")
|
420 |
+
with gr.Row():
|
421 |
+
gallery3 = gr.Image(label="Generated Images")
|
422 |
+
gallery4 = gr.Image(label="Generated Images")
|
423 |
+
email = gr.Textbox(label="Email",
|
424 |
+
info="Enter your email address",
|
425 |
+
value="")
|
426 |
+
|
427 |
+
usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
428 |
+
# identitynet_strength_ratio = gr.Slider(
|
429 |
+
# label="IdentityNet strength (for fidelity)",
|
430 |
+
# minimum=0,
|
431 |
+
# maximum=1.5,
|
432 |
+
# step=0.05,
|
433 |
+
# value=0.95,
|
434 |
+
# )
|
435 |
+
# adapter_strength_ratio = gr.Slider(
|
436 |
+
# label="Image adapter strength (for detail)",
|
437 |
+
# minimum=0,
|
438 |
+
# maximum=1.5,
|
439 |
+
# step=0.05,
|
440 |
+
# value=0.60,
|
441 |
+
# )
|
442 |
+
# negative_prompt = gr.Textbox(
|
443 |
+
# label="Negative Prompt",
|
444 |
+
# placeholder="low quality",
|
445 |
+
# value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
446 |
+
# )
|
447 |
+
# num_steps = gr.Slider(
|
448 |
+
# label="Number of sample steps",
|
449 |
+
# minimum=15,
|
450 |
+
# maximum=100,
|
451 |
+
# step=1,
|
452 |
+
# value=5 if enable_lcm_arg else 15,
|
453 |
+
# )
|
454 |
+
# guidance_scale = gr.Slider(
|
455 |
+
# label="Guidance scale",
|
456 |
+
# minimum=0.1,
|
457 |
+
# maximum=10.0,
|
458 |
+
# step=0.1,
|
459 |
+
# value=0 if enable_lcm_arg else 8.5,
|
460 |
+
# )
|
461 |
+
# if email is None:
|
462 |
+
# print("STOPPPP")
|
463 |
+
# raise gr.Error("Email ID is compulsory")
|
464 |
+
face_file.upload(
|
465 |
+
fn=remove_tips,
|
466 |
+
outputs=usage_tips,
|
467 |
+
queue=True,
|
468 |
+
api_name=False,
|
469 |
+
show_progress = "full"
|
470 |
+
).then(
|
471 |
+
fn=run_for_prompts1,
|
472 |
+
inputs=[face_file,style],
|
473 |
+
outputs=[gallery1]
|
474 |
+
).then(
|
475 |
+
fn=run_for_prompts2,
|
476 |
+
inputs=[face_file,style],
|
477 |
+
outputs=[gallery2]
|
478 |
+
).then(
|
479 |
+
fn=run_for_prompts3,
|
480 |
+
inputs=[face_file,style],
|
481 |
+
outputs=[gallery3]
|
482 |
+
).then(
|
483 |
+
fn=run_for_prompts4,
|
484 |
+
inputs=[face_file,style],
|
485 |
+
outputs=[gallery4]
|
486 |
+
)
|
487 |
+
submit.click(
|
488 |
+
fn=remove_tips,
|
489 |
+
outputs=usage_tips,
|
490 |
+
queue=True,
|
491 |
+
api_name=False,
|
492 |
+
show_progress = "full"
|
493 |
+
).then(
|
494 |
+
fn=run_for_prompts1,
|
495 |
+
inputs=[face_file,style],
|
496 |
+
outputs=[gallery1]
|
497 |
+
).then(
|
498 |
+
fn=run_for_prompts2,
|
499 |
+
inputs=[face_file,style],
|
500 |
+
outputs=[gallery2]
|
501 |
+
).then(
|
502 |
+
fn=run_for_prompts3,
|
503 |
+
inputs=[face_file,style],
|
504 |
+
outputs=[gallery3]
|
505 |
+
).then(
|
506 |
+
fn=run_for_prompts4,
|
507 |
+
inputs=[face_file,style],
|
508 |
+
outputs=[gallery4]
|
509 |
+
)
|
510 |
+
|
511 |
+
|
512 |
+
gr.Markdown(article)
|
513 |
+
|
514 |
+
demo.launch(share=True)
|
515 |
+
|
516 |
+
if __name__ == "__main__":
|
517 |
+
parser = argparse.ArgumentParser()
|
518 |
+
parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
519 |
+
args = parser.parse_args()
|
520 |
+
|
521 |
+
main(args.pretrained_model_name_or_path, False)
|
522 |
+
|
523 |
+
|
524 |
+
# import sys
|
525 |
+
# sys.path.append('./')
|
526 |
+
|
527 |
+
# from typing import Tuple
|
528 |
+
|
529 |
+
# import os
|
530 |
+
# import cv2
|
531 |
+
# import math
|
532 |
+
# import torch
|
533 |
+
# import random
|
534 |
+
# import numpy as np
|
535 |
+
# import argparse
|
536 |
+
|
537 |
+
# import PIL
|
538 |
+
# from PIL import Image
|
539 |
+
|
540 |
+
# import diffusers
|
541 |
+
# from diffusers.utils import load_image
|
542 |
+
# from diffusers.models import ControlNetModel
|
543 |
+
# from diffusers import LCMScheduler
|
544 |
+
|
545 |
+
# from huggingface_hub import hf_hub_download
|
546 |
+
|
547 |
+
# import insightface
|
548 |
+
# from insightface.app import FaceAnalysis
|
549 |
+
|
550 |
+
# from style_template import styles
|
551 |
+
# from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
552 |
+
# from model_util import load_models_xl, get_torch_device, torch_gc
|
553 |
+
|
554 |
+
|
555 |
+
# # global variable
|
556 |
+
# MAX_SEED = np.iinfo(np.int32).max
|
557 |
+
# device = get_torch_device()
|
558 |
+
# dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
559 |
+
# STYLE_NAMES = list(styles.keys())
|
560 |
+
# DEFAULT_STYLE_NAME = "Watercolor"
|
561 |
+
|
562 |
+
# # Load face encoder
|
563 |
+
# app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
564 |
+
# app.prepare(ctx_id=0, det_size=(320, 320))
|
565 |
+
|
566 |
+
# # Path to InstantID models
|
567 |
+
# face_adapter = f'./checkpoints/ip-adapter.bin'
|
568 |
+
# controlnet_path = f'./checkpoints/ControlNetModel'
|
569 |
+
|
570 |
+
# # Load pipeline
|
571 |
+
# controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
572 |
+
|
573 |
+
# logo = Image.open("./gradio_demo/logo.png")
|
574 |
+
|
575 |
+
# from cv2 import imencode
|
576 |
+
# import base64
|
577 |
+
|
578 |
+
# # def encode_pil_to_base64_new(pil_image):
|
579 |
+
# # print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
580 |
+
# # image_arr = np.asarray(pil_image)[:,:,::-1]
|
581 |
+
# # _, byte_data = imencode('.png', image_arr)
|
582 |
+
# # base64_data = base64.b64encode(byte_data)
|
583 |
+
# # base64_string_opencv = base64_data.decode("utf-8")
|
584 |
+
# # return "data:image/png;base64," + base64_string_opencv
|
585 |
+
|
586 |
+
# import gradio as gr
|
587 |
+
|
588 |
+
# # gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
589 |
+
|
590 |
+
# def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
591 |
+
|
592 |
+
# if pretrained_model_name_or_path.endswith(
|
593 |
+
# ".ckpt"
|
594 |
+
# ) or pretrained_model_name_or_path.endswith(".safetensors"):
|
595 |
+
# scheduler_kwargs = hf_hub_download(
|
596 |
+
# repo_id="wangqixun/YamerMIX_v8",
|
597 |
+
# subfolder="scheduler",
|
598 |
+
# filename="scheduler_config.json",
|
599 |
+
# )
|
600 |
+
|
601 |
+
# (tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
602 |
+
# pretrained_model_name_or_path=pretrained_model_name_or_path,
|
603 |
+
# scheduler_name=None,
|
604 |
+
# weight_dtype=dtype,
|
605 |
+
# )
|
606 |
+
|
607 |
+
# scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
608 |
+
# pipe = StableDiffusionXLInstantIDPipeline(
|
609 |
+
# vae=vae,
|
610 |
+
# text_encoder=text_encoders[0],
|
611 |
+
# text_encoder_2=text_encoders[1],
|
612 |
+
# tokenizer=tokenizers[0],
|
613 |
+
# tokenizer_2=tokenizers[1],
|
614 |
+
# unet=unet,
|
615 |
+
# scheduler=scheduler,
|
616 |
+
# controlnet=controlnet,
|
617 |
+
# ).to(device)
|
618 |
+
|
619 |
+
# else:
|
620 |
+
# pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
621 |
+
# pretrained_model_name_or_path,
|
622 |
+
# controlnet=controlnet,
|
623 |
+
# torch_dtype=dtype,
|
624 |
+
# safety_checker=None,
|
625 |
+
# feature_extractor=None,
|
626 |
+
# ).to(device)
|
627 |
+
|
628 |
+
# pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
629 |
+
|
630 |
+
# pipe.load_ip_adapter_instantid(face_adapter)
|
631 |
+
# # load and disable LCM
|
632 |
+
# pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
633 |
+
# pipe.disable_lora()
|
634 |
+
|
635 |
+
# def remove_tips():
|
636 |
+
# return gr.update(visible=False)
|
637 |
+
|
638 |
+
|
639 |
+
# # prompts = [
|
640 |
+
# # ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
|
641 |
+
# # # ["Suited professional","(No style)"],
|
642 |
+
# # ["Scooba diver","Line art"], ["eskimo","Snow"]
|
643 |
+
# # ]
|
644 |
+
|
645 |
+
# def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
646 |
+
# return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
647 |
+
|
648 |
+
# def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
649 |
+
# return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
650 |
+
|
651 |
+
# def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
652 |
+
# # if email != "":
|
653 |
+
# p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
654 |
+
# return generate_image(face_file, p[0], n)
|
655 |
+
# # else:
|
656 |
+
# # raise gr.Error("Email ID is compulsory")
|
657 |
+
# def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
658 |
+
# # if email != "":
|
659 |
+
# p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
660 |
+
# return generate_image(face_file, p[1], n)
|
661 |
+
# def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
662 |
+
# # if email != "":
|
663 |
+
# p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
664 |
+
# return generate_image(face_file, p[2], n)
|
665 |
+
# def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
666 |
+
# # if email != "":
|
667 |
+
# p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
668 |
+
# return generate_image(face_file, p[3], n)
|
669 |
+
|
670 |
+
# # def validate_and_process(face_file, style, email):
|
671 |
+
|
672 |
+
# # # Your processing logic here
|
673 |
+
# # gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
|
674 |
+
# # return gallery1, gallery2, gallery3, gallery4
|
675 |
+
|
676 |
+
# def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
677 |
+
# stickwidth = 4
|
678 |
+
# limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
679 |
+
# kps = np.array(kps)
|
680 |
+
|
681 |
+
# w, h = image_pil.size
|
682 |
+
# out_img = np.zeros([h, w, 3])
|
683 |
+
|
684 |
+
# for i in range(len(limbSeq)):
|
685 |
+
# index = limbSeq[i]
|
686 |
+
# color = color_list[index[0]]
|
687 |
+
|
688 |
+
# x = kps[index][:, 0]
|
689 |
+
# y = kps[index][:, 1]
|
690 |
+
# length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
691 |
+
# angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
692 |
+
# polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
693 |
+
# out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
694 |
+
# out_img = (out_img * 0.6).astype(np.uint8)
|
695 |
+
|
696 |
+
# for idx_kp, kp in enumerate(kps):
|
697 |
+
# color = color_list[idx_kp]
|
698 |
+
# x, y = kp
|
699 |
+
# out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
700 |
+
|
701 |
+
# out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
702 |
+
# return out_img_pil
|
703 |
+
|
704 |
+
# def resize_img(input_image, max_side=640, min_side=640, size=None,
|
705 |
+
# pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
706 |
+
|
707 |
+
# w, h = input_image.size
|
708 |
+
# print(w)
|
709 |
+
# print(h)
|
710 |
+
# if size is not None:
|
711 |
+
# w_resize_new, h_resize_new = size
|
712 |
+
# else:
|
713 |
+
# ratio = min_side / min(h, w)
|
714 |
+
# w, h = round(ratio*w), round(ratio*h)
|
715 |
+
# ratio = max_side / max(h, w)
|
716 |
+
# input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
717 |
+
# w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
718 |
+
# h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
719 |
+
# input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
720 |
+
|
721 |
+
# if pad_to_max_side:
|
722 |
+
# res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
723 |
+
# offset_x = (max_side - w_resize_new) // 2
|
724 |
+
# offset_y = (max_side - h_resize_new) // 2
|
725 |
+
# res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
726 |
+
# input_image = Image.fromarray(res)
|
727 |
+
# return input_image
|
728 |
+
|
729 |
+
# # def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
|
730 |
+
# # p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
731 |
+
# # return p.replace("{prompt}", positive), n + ' ' + negative
|
732 |
+
|
733 |
+
# def generate_image(face_image,prompt,negative_prompt):
|
734 |
+
# pose_image_path = None
|
735 |
+
# # prompt = "superman"
|
736 |
+
# enable_LCM = False
|
737 |
+
# identitynet_strength_ratio = 0.95
|
738 |
+
# adapter_strength_ratio = 0.60
|
739 |
+
# num_steps = 15
|
740 |
+
# guidance_scale = 8.5
|
741 |
+
# seed = random.randint(0, MAX_SEED)
|
742 |
+
# # negative_prompt = ""
|
743 |
+
# # negative_prompt += neg
|
744 |
+
# enhance_face_region = True
|
745 |
+
# if enable_LCM:
|
746 |
+
# pipe.enable_lora()
|
747 |
+
# pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
748 |
+
# else:
|
749 |
+
# pipe.disable_lora()
|
750 |
+
# pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
751 |
+
|
752 |
+
# if face_image is None:
|
753 |
+
# raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
754 |
+
|
755 |
+
# # if prompt is None:
|
756 |
+
# # prompt = "a person"
|
757 |
+
|
758 |
+
# # apply the style template
|
759 |
+
# # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
760 |
+
|
761 |
+
# # face_image = load_image(face_image_path)
|
762 |
+
# face_image = resize_img(face_image)
|
763 |
+
# face_image_cv2 = convert_from_image_to_cv2(face_image)
|
764 |
+
# height, width, _ = face_image_cv2.shape
|
765 |
+
|
766 |
+
# # Extract face features
|
767 |
+
# face_info = app.get(face_image_cv2)
|
768 |
+
|
769 |
+
# if len(face_info) == 0:
|
770 |
+
# raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
771 |
+
|
772 |
+
# face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
773 |
+
# face_emb = face_info['embedding']
|
774 |
+
# face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
775 |
+
|
776 |
+
# if pose_image_path is not None:
|
777 |
+
# pose_image = load_image(pose_image_path)
|
778 |
+
# pose_image = resize_img(pose_image)
|
779 |
+
# pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
780 |
+
|
781 |
+
# face_info = app.get(pose_image_cv2)
|
782 |
+
|
783 |
+
# if len(face_info) == 0:
|
784 |
+
# raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
785 |
+
|
786 |
+
# face_info = face_info[-1]
|
787 |
+
# face_kps = draw_kps(pose_image, face_info['kps'])
|
788 |
+
|
789 |
+
# width, height = face_kps.size
|
790 |
+
|
791 |
+
# if enhance_face_region:
|
792 |
+
# control_mask = np.zeros([height, width, 3])
|
793 |
+
# x1, y1, x2, y2 = face_info["bbox"]
|
794 |
+
# x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
795 |
+
# control_mask[y1:y2, x1:x2] = 255
|
796 |
+
# control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
797 |
+
# else:
|
798 |
+
# control_mask = None
|
799 |
+
|
800 |
+
# generator = torch.Generator(device=device).manual_seed(seed)
|
801 |
+
|
802 |
+
# print("Start inference...")
|
803 |
+
# print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
804 |
+
|
805 |
+
# pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
806 |
+
# images = pipe(
|
807 |
+
# prompt=prompt,
|
808 |
+
# negative_prompt=negative_prompt,
|
809 |
+
# image_embeds=face_emb,
|
810 |
+
# image=face_kps,
|
811 |
+
# control_mask=control_mask,
|
812 |
+
# controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
813 |
+
# num_inference_steps=num_steps,
|
814 |
+
# guidance_scale=guidance_scale,
|
815 |
+
# height=height,
|
816 |
+
# width=width,
|
817 |
+
# generator=generator,
|
818 |
+
# # num_images_per_prompt = 4
|
819 |
+
# ).images
|
820 |
+
|
821 |
+
# return images[0]
|
822 |
+
|
823 |
+
# ### Description
|
824 |
+
# title = r"""
|
825 |
+
# <h1 align="center">Choose your AVATAR</h1>
|
826 |
+
# """
|
827 |
+
|
828 |
+
# description = r"""
|
829 |
+
# <h2> Powered by IDfy </h2>"""
|
830 |
+
|
831 |
+
# article = r""""""
|
832 |
+
|
833 |
+
# tips = r""""""
|
834 |
+
|
835 |
+
# css = '''
|
836 |
+
# .gradio-container {width: 95% !important; background-color: #E6F3FF;}
|
837 |
+
# .image-gallery {height: 100vh !important; overflow: auto;}
|
838 |
+
# .gradio-row .gradio-element { margin: 0 !important; }
|
839 |
+
# '''
|
840 |
+
# with gr.Blocks(css=css) as demo:
|
841 |
+
|
842 |
+
# # description
|
843 |
+
# gr.Markdown(title)
|
844 |
+
# with gr.Row():
|
845 |
+
# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
846 |
+
# gr.Markdown(description)
|
847 |
+
# with gr.Row():
|
848 |
+
# with gr.Column():
|
849 |
+
# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
850 |
+
# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
851 |
+
# submit = gr.Button("Submit", variant="primary")
|
852 |
+
# with gr.Column():
|
853 |
+
# with gr.Row():
|
854 |
+
# gallery1 = gr.Image(label="Generated Images")
|
855 |
+
# gallery2 = gr.Image(label="Generated Images")
|
856 |
+
# with gr.Row():
|
857 |
+
# gallery3 = gr.Image(label="Generated Images")
|
858 |
+
# gallery4 = gr.Image(label="Generated Images")
|
859 |
+
# email = gr.Textbox(label="Email",
|
860 |
+
# info="Enter your email address",
|
861 |
+
# value="")
|
862 |
+
|
863 |
+
# usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
864 |
+
# # identitynet_strength_ratio = gr.Slider(
|
865 |
+
# # label="IdentityNet strength (for fidelity)",
|
866 |
+
# # minimum=0,
|
867 |
+
# # maximum=1.5,
|
868 |
+
# # step=0.05,
|
869 |
+
# # value=0.95,
|
870 |
+
# # )
|
871 |
+
# # adapter_strength_ratio = gr.Slider(
|
872 |
+
# # label="Image adapter strength (for detail)",
|
873 |
+
# # minimum=0,
|
874 |
+
# # maximum=1.5,
|
875 |
+
# # step=0.05,
|
876 |
+
# # value=0.60,
|
877 |
+
# # )
|
878 |
+
# # negative_prompt = gr.Textbox(
|
879 |
+
# # label="Negative Prompt",
|
880 |
+
# # placeholder="low quality",
|
881 |
+
# # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
882 |
+
# # )
|
883 |
+
# # num_steps = gr.Slider(
|
884 |
+
# # label="Number of sample steps",
|
885 |
+
# # minimum=15,
|
886 |
+
# # maximum=100,
|
887 |
+
# # step=1,
|
888 |
+
# # value=5 if enable_lcm_arg else 15,
|
889 |
+
# # )
|
890 |
+
# # guidance_scale = gr.Slider(
|
891 |
+
# # label="Guidance scale",
|
892 |
+
# # minimum=0.1,
|
893 |
+
# # maximum=10.0,
|
894 |
+
# # step=0.1,
|
895 |
+
# # value=0 if enable_lcm_arg else 8.5,
|
896 |
+
# # )
|
897 |
+
# # if email is None:
|
898 |
+
# # print("STOPPPP")
|
899 |
+
# # raise gr.Error("Email ID is compulsory")
|
900 |
+
# face_file.upload(
|
901 |
+
# fn=remove_tips,
|
902 |
+
# outputs=usage_tips,
|
903 |
+
# queue=True,
|
904 |
+
# api_name=False,
|
905 |
+
# show_progress = "full"
|
906 |
+
# ).then(
|
907 |
+
# fn=run_for_prompts1,
|
908 |
+
# inputs=[face_file,style],
|
909 |
+
# outputs=[gallery1]
|
910 |
+
# ).then(
|
911 |
+
# fn=run_for_prompts2,
|
912 |
+
# inputs=[face_file,style],
|
913 |
+
# outputs=[gallery2]
|
914 |
+
# ).then(
|
915 |
+
# fn=run_for_prompts3,
|
916 |
+
# inputs=[face_file,style],
|
917 |
+
# outputs=[gallery3]
|
918 |
+
# ).then(
|
919 |
+
# fn=run_for_prompts4,
|
920 |
+
# inputs=[face_file,style],
|
921 |
+
# outputs=[gallery4]
|
922 |
+
# )
|
923 |
+
# submit.click(
|
924 |
+
# fn=remove_tips,
|
925 |
+
# outputs=usage_tips,
|
926 |
+
# queue=True,
|
927 |
+
# api_name=False,
|
928 |
+
# show_progress = "full"
|
929 |
+
# ).then(
|
930 |
+
# fn=run_for_prompts1,
|
931 |
+
# inputs=[face_file,style],
|
932 |
+
# outputs=[gallery1]
|
933 |
+
# ).then(
|
934 |
+
# fn=run_for_prompts2,
|
935 |
+
# inputs=[face_file,style],
|
936 |
+
# outputs=[gallery2]
|
937 |
+
# ).then(
|
938 |
+
# fn=run_for_prompts3,
|
939 |
+
# inputs=[face_file,style],
|
940 |
+
# outputs=[gallery3]
|
941 |
+
# ).then(
|
942 |
+
# fn=run_for_prompts4,
|
943 |
+
# inputs=[face_file,style],
|
944 |
+
# outputs=[gallery4]
|
945 |
+
# )
|
946 |
+
|
947 |
+
|
948 |
+
# gr.Markdown(article)
|
949 |
+
|
950 |
+
# demo.launch(share=True)
|
951 |
+
|
952 |
+
# if __name__ == "__main__":
|
953 |
+
# parser = argparse.ArgumentParser()
|
954 |
+
# parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
955 |
+
# args = parser.parse_args()
|
956 |
+
|
957 |
+
# main(args.pretrained_model_name_or_path, False)
|
gradio_demo/app-multicontrolnet.py
ADDED
@@ -0,0 +1,670 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append("./")
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import PIL
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
import diffusers
|
18 |
+
from diffusers.utils import load_image
|
19 |
+
from diffusers.models import ControlNetModel
|
20 |
+
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
|
21 |
+
|
22 |
+
from huggingface_hub import hf_hub_download
|
23 |
+
|
24 |
+
from insightface.app import FaceAnalysis
|
25 |
+
|
26 |
+
from style_template import styles
|
27 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
28 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
29 |
+
from controlnet_util import openpose, get_depth_map, get_canny_image
|
30 |
+
|
31 |
+
import gradio as gr
|
32 |
+
|
33 |
+
|
34 |
+
# global variable
|
35 |
+
MAX_SEED = np.iinfo(np.int32).max
|
36 |
+
device = get_torch_device()
|
37 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
38 |
+
STYLE_NAMES = list(styles.keys())
|
39 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
40 |
+
|
41 |
+
# Load face encoder
|
42 |
+
app = FaceAnalysis(
|
43 |
+
name="antelopev2",
|
44 |
+
root="./",
|
45 |
+
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
46 |
+
)
|
47 |
+
app.prepare(ctx_id=0, det_size=(640, 640))
|
48 |
+
|
49 |
+
# Path to InstantID models
|
50 |
+
face_adapter = f"./checkpoints/ip-adapter.bin"
|
51 |
+
controlnet_path = f"./checkpoints/ControlNetModel"
|
52 |
+
|
53 |
+
# Load pipeline face ControlNetModel
|
54 |
+
controlnet_identitynet = ControlNetModel.from_pretrained(
|
55 |
+
controlnet_path, torch_dtype=dtype
|
56 |
+
)
|
57 |
+
|
58 |
+
# controlnet-pose
|
59 |
+
controlnet_pose_model = "thibaud/controlnet-openpose-sdxl-1.0"
|
60 |
+
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
61 |
+
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
62 |
+
|
63 |
+
controlnet_pose = ControlNetModel.from_pretrained(
|
64 |
+
controlnet_pose_model, torch_dtype=dtype
|
65 |
+
).to(device)
|
66 |
+
controlnet_canny = ControlNetModel.from_pretrained(
|
67 |
+
controlnet_canny_model, torch_dtype=dtype
|
68 |
+
).to(device)
|
69 |
+
controlnet_depth = ControlNetModel.from_pretrained(
|
70 |
+
controlnet_depth_model, torch_dtype=dtype
|
71 |
+
).to(device)
|
72 |
+
|
73 |
+
controlnet_map = {
|
74 |
+
"pose": controlnet_pose,
|
75 |
+
"canny": controlnet_canny,
|
76 |
+
"depth": controlnet_depth,
|
77 |
+
}
|
78 |
+
controlnet_map_fn = {
|
79 |
+
"pose": openpose,
|
80 |
+
"canny": get_canny_image,
|
81 |
+
"depth": get_depth_map,
|
82 |
+
}
|
83 |
+
|
84 |
+
|
85 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
86 |
+
if pretrained_model_name_or_path.endswith(
|
87 |
+
".ckpt"
|
88 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
89 |
+
scheduler_kwargs = hf_hub_download(
|
90 |
+
repo_id="wangqixun/YamerMIX_v8",
|
91 |
+
subfolder="scheduler",
|
92 |
+
filename="scheduler_config.json",
|
93 |
+
)
|
94 |
+
|
95 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
96 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
97 |
+
scheduler_name=None,
|
98 |
+
weight_dtype=dtype,
|
99 |
+
)
|
100 |
+
|
101 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
102 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
103 |
+
vae=vae,
|
104 |
+
text_encoder=text_encoders[0],
|
105 |
+
text_encoder_2=text_encoders[1],
|
106 |
+
tokenizer=tokenizers[0],
|
107 |
+
tokenizer_2=tokenizers[1],
|
108 |
+
unet=unet,
|
109 |
+
scheduler=scheduler,
|
110 |
+
controlnet=[controlnet_identitynet],
|
111 |
+
).to(device)
|
112 |
+
|
113 |
+
else:
|
114 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
115 |
+
pretrained_model_name_or_path,
|
116 |
+
controlnet=[controlnet_identitynet],
|
117 |
+
torch_dtype=dtype,
|
118 |
+
safety_checker=None,
|
119 |
+
feature_extractor=None,
|
120 |
+
).to(device)
|
121 |
+
|
122 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(
|
123 |
+
pipe.scheduler.config
|
124 |
+
)
|
125 |
+
|
126 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
127 |
+
# load and disable LCM
|
128 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
129 |
+
pipe.disable_lora()
|
130 |
+
|
131 |
+
def toggle_lcm_ui(value):
|
132 |
+
if value:
|
133 |
+
return (
|
134 |
+
gr.update(minimum=0, maximum=100, step=1, value=5),
|
135 |
+
gr.update(minimum=0.1, maximum=20.0, step=0.1, value=1.5),
|
136 |
+
)
|
137 |
+
else:
|
138 |
+
return (
|
139 |
+
gr.update(minimum=5, maximum=100, step=1, value=30),
|
140 |
+
gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5),
|
141 |
+
)
|
142 |
+
|
143 |
+
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
144 |
+
if randomize_seed:
|
145 |
+
seed = random.randint(0, MAX_SEED)
|
146 |
+
return seed
|
147 |
+
|
148 |
+
def remove_tips():
|
149 |
+
return gr.update(visible=False)
|
150 |
+
|
151 |
+
def get_example():
|
152 |
+
case = [
|
153 |
+
[
|
154 |
+
"./examples/yann-lecun_resize.jpg",
|
155 |
+
None,
|
156 |
+
"a man",
|
157 |
+
"Snow",
|
158 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
159 |
+
],
|
160 |
+
[
|
161 |
+
"./examples/musk_resize.jpeg",
|
162 |
+
"./examples/poses/pose2.jpg",
|
163 |
+
"a man flying in the sky in Mars",
|
164 |
+
"Mars",
|
165 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
166 |
+
],
|
167 |
+
[
|
168 |
+
"./examples/sam_resize.png",
|
169 |
+
"./examples/poses/pose4.jpg",
|
170 |
+
"a man doing a silly pose wearing a suite",
|
171 |
+
"Jungle",
|
172 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, gree",
|
173 |
+
],
|
174 |
+
[
|
175 |
+
"./examples/schmidhuber_resize.png",
|
176 |
+
"./examples/poses/pose3.jpg",
|
177 |
+
"a man sit on a chair",
|
178 |
+
"Neon",
|
179 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
180 |
+
],
|
181 |
+
[
|
182 |
+
"./examples/kaifu_resize.png",
|
183 |
+
"./examples/poses/pose.jpg",
|
184 |
+
"a man",
|
185 |
+
"Vibrant Color",
|
186 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
187 |
+
],
|
188 |
+
]
|
189 |
+
return case
|
190 |
+
|
191 |
+
def run_for_examples(face_file, pose_file, prompt, style, negative_prompt):
|
192 |
+
return generate_image(
|
193 |
+
face_file,
|
194 |
+
pose_file,
|
195 |
+
prompt,
|
196 |
+
negative_prompt,
|
197 |
+
style,
|
198 |
+
20, # num_steps
|
199 |
+
0.8, # identitynet_strength_ratio
|
200 |
+
0.8, # adapter_strength_ratio
|
201 |
+
0.4, # pose_strength
|
202 |
+
0.3, # canny_strength
|
203 |
+
0.5, # depth_strength
|
204 |
+
["pose", "canny"], # controlnet_selection
|
205 |
+
5.0, # guidance_scale
|
206 |
+
42, # seed
|
207 |
+
"EulerDiscreteScheduler", # scheduler
|
208 |
+
False, # enable_LCM
|
209 |
+
True, # enable_Face_Region
|
210 |
+
)
|
211 |
+
|
212 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
213 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
214 |
+
|
215 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
216 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
217 |
+
|
218 |
+
def draw_kps(
|
219 |
+
image_pil,
|
220 |
+
kps,
|
221 |
+
color_list=[
|
222 |
+
(255, 0, 0),
|
223 |
+
(0, 255, 0),
|
224 |
+
(0, 0, 255),
|
225 |
+
(255, 255, 0),
|
226 |
+
(255, 0, 255),
|
227 |
+
],
|
228 |
+
):
|
229 |
+
stickwidth = 4
|
230 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
231 |
+
kps = np.array(kps)
|
232 |
+
|
233 |
+
w, h = image_pil.size
|
234 |
+
out_img = np.zeros([h, w, 3])
|
235 |
+
|
236 |
+
for i in range(len(limbSeq)):
|
237 |
+
index = limbSeq[i]
|
238 |
+
color = color_list[index[0]]
|
239 |
+
|
240 |
+
x = kps[index][:, 0]
|
241 |
+
y = kps[index][:, 1]
|
242 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
243 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
244 |
+
polygon = cv2.ellipse2Poly(
|
245 |
+
(int(np.mean(x)), int(np.mean(y))),
|
246 |
+
(int(length / 2), stickwidth),
|
247 |
+
int(angle),
|
248 |
+
0,
|
249 |
+
360,
|
250 |
+
1,
|
251 |
+
)
|
252 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
253 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
254 |
+
|
255 |
+
for idx_kp, kp in enumerate(kps):
|
256 |
+
color = color_list[idx_kp]
|
257 |
+
x, y = kp
|
258 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
259 |
+
|
260 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
261 |
+
return out_img_pil
|
262 |
+
|
263 |
+
def resize_img(
|
264 |
+
input_image,
|
265 |
+
max_side=1280,
|
266 |
+
min_side=1024,
|
267 |
+
size=None,
|
268 |
+
pad_to_max_side=False,
|
269 |
+
mode=PIL.Image.BILINEAR,
|
270 |
+
base_pixel_number=64,
|
271 |
+
):
|
272 |
+
w, h = input_image.size
|
273 |
+
if size is not None:
|
274 |
+
w_resize_new, h_resize_new = size
|
275 |
+
else:
|
276 |
+
ratio = min_side / min(h, w)
|
277 |
+
w, h = round(ratio * w), round(ratio * h)
|
278 |
+
ratio = max_side / max(h, w)
|
279 |
+
input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
|
280 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
281 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
282 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
283 |
+
|
284 |
+
if pad_to_max_side:
|
285 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
286 |
+
offset_x = (max_side - w_resize_new) // 2
|
287 |
+
offset_y = (max_side - h_resize_new) // 2
|
288 |
+
res[
|
289 |
+
offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
|
290 |
+
] = np.array(input_image)
|
291 |
+
input_image = Image.fromarray(res)
|
292 |
+
return input_image
|
293 |
+
|
294 |
+
def apply_style(
|
295 |
+
style_name: str, positive: str, negative: str = ""
|
296 |
+
) -> Tuple[str, str]:
|
297 |
+
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
298 |
+
return p.replace("{prompt}", positive), n + " " + negative
|
299 |
+
|
300 |
+
def generate_image(
|
301 |
+
face_image_path,
|
302 |
+
pose_image_path,
|
303 |
+
prompt,
|
304 |
+
negative_prompt,
|
305 |
+
style_name,
|
306 |
+
num_steps,
|
307 |
+
identitynet_strength_ratio,
|
308 |
+
adapter_strength_ratio,
|
309 |
+
pose_strength,
|
310 |
+
canny_strength,
|
311 |
+
depth_strength,
|
312 |
+
controlnet_selection,
|
313 |
+
guidance_scale,
|
314 |
+
seed,
|
315 |
+
scheduler,
|
316 |
+
enable_LCM,
|
317 |
+
enhance_face_region,
|
318 |
+
progress=gr.Progress(track_tqdm=True),
|
319 |
+
):
|
320 |
+
|
321 |
+
if enable_LCM:
|
322 |
+
pipe.scheduler = diffusers.LCMScheduler.from_config(pipe.scheduler.config)
|
323 |
+
pipe.enable_lora()
|
324 |
+
else:
|
325 |
+
pipe.disable_lora()
|
326 |
+
scheduler_class_name = scheduler.split("-")[0]
|
327 |
+
|
328 |
+
add_kwargs = {}
|
329 |
+
if len(scheduler.split("-")) > 1:
|
330 |
+
add_kwargs["use_karras_sigmas"] = True
|
331 |
+
if len(scheduler.split("-")) > 2:
|
332 |
+
add_kwargs["algorithm_type"] = "sde-dpmsolver++"
|
333 |
+
scheduler = getattr(diffusers, scheduler_class_name)
|
334 |
+
pipe.scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
335 |
+
|
336 |
+
if face_image_path is None:
|
337 |
+
raise gr.Error(
|
338 |
+
f"Cannot find any input face image! Please upload the face image"
|
339 |
+
)
|
340 |
+
|
341 |
+
if prompt is None:
|
342 |
+
prompt = "a person"
|
343 |
+
|
344 |
+
# apply the style template
|
345 |
+
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
346 |
+
|
347 |
+
face_image = load_image(face_image_path)
|
348 |
+
face_image = resize_img(face_image, max_side=1024)
|
349 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
350 |
+
height, width, _ = face_image_cv2.shape
|
351 |
+
|
352 |
+
# Extract face features
|
353 |
+
face_info = app.get(face_image_cv2)
|
354 |
+
|
355 |
+
if len(face_info) == 0:
|
356 |
+
raise gr.Error(
|
357 |
+
f"Unable to detect a face in the image. Please upload a different photo with a clear face."
|
358 |
+
)
|
359 |
+
|
360 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
361 |
+
face_emb = face_info["embedding"]
|
362 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
|
363 |
+
img_controlnet = face_image
|
364 |
+
if pose_image_path is not None:
|
365 |
+
pose_image = load_image(pose_image_path)
|
366 |
+
pose_image = resize_img(pose_image, max_side=1024)
|
367 |
+
img_controlnet = pose_image
|
368 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
369 |
+
|
370 |
+
face_info = app.get(pose_image_cv2)
|
371 |
+
|
372 |
+
if len(face_info) == 0:
|
373 |
+
raise gr.Error(
|
374 |
+
f"Cannot find any face in the reference image! Please upload another person image"
|
375 |
+
)
|
376 |
+
|
377 |
+
face_info = face_info[-1]
|
378 |
+
face_kps = draw_kps(pose_image, face_info["kps"])
|
379 |
+
|
380 |
+
width, height = face_kps.size
|
381 |
+
|
382 |
+
if enhance_face_region:
|
383 |
+
control_mask = np.zeros([height, width, 3])
|
384 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
385 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
386 |
+
control_mask[y1:y2, x1:x2] = 255
|
387 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
388 |
+
else:
|
389 |
+
control_mask = None
|
390 |
+
|
391 |
+
if len(controlnet_selection) > 0:
|
392 |
+
controlnet_scales = {
|
393 |
+
"pose": pose_strength,
|
394 |
+
"canny": canny_strength,
|
395 |
+
"depth": depth_strength,
|
396 |
+
}
|
397 |
+
pipe.controlnet = MultiControlNetModel(
|
398 |
+
[controlnet_identitynet]
|
399 |
+
+ [controlnet_map[s] for s in controlnet_selection]
|
400 |
+
)
|
401 |
+
control_scales = [float(identitynet_strength_ratio)] + [
|
402 |
+
controlnet_scales[s] for s in controlnet_selection
|
403 |
+
]
|
404 |
+
control_images = [face_kps] + [
|
405 |
+
controlnet_map_fn[s](img_controlnet).resize((width, height))
|
406 |
+
for s in controlnet_selection
|
407 |
+
]
|
408 |
+
else:
|
409 |
+
pipe.controlnet = controlnet_identitynet
|
410 |
+
control_scales = float(identitynet_strength_ratio)
|
411 |
+
control_images = face_kps
|
412 |
+
|
413 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
414 |
+
|
415 |
+
print("Start inference...")
|
416 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
417 |
+
|
418 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
419 |
+
images = pipe(
|
420 |
+
prompt=prompt,
|
421 |
+
negative_prompt=negative_prompt,
|
422 |
+
image_embeds=face_emb,
|
423 |
+
image=control_images,
|
424 |
+
control_mask=control_mask,
|
425 |
+
controlnet_conditioning_scale=control_scales,
|
426 |
+
num_inference_steps=num_steps,
|
427 |
+
guidance_scale=guidance_scale,
|
428 |
+
height=height,
|
429 |
+
width=width,
|
430 |
+
generator=generator,
|
431 |
+
).images
|
432 |
+
|
433 |
+
return images[0], gr.update(visible=True)
|
434 |
+
|
435 |
+
# Description
|
436 |
+
title = r"""
|
437 |
+
<h1 align="center">InstantID: Zero-shot Identity-Preserving Generation in Seconds</h1>
|
438 |
+
"""
|
439 |
+
|
440 |
+
description = r"""
|
441 |
+
<b>Official 🤗 Gradio demo</b> for <a href='https://github.com/InstantID/InstantID' target='_blank'><b>InstantID: Zero-shot Identity-Preserving Generation in Seconds</b></a>.<br>
|
442 |
+
|
443 |
+
How to use:<br>
|
444 |
+
1. Upload an image with a face. For images with multiple faces, we will only detect the largest face. Ensure the face is not too small and is clearly visible without significant obstructions or blurring.
|
445 |
+
2. (Optional) You can upload another image as a reference for the face pose. If you don't, we will use the first detected face image to extract facial landmarks. If you use a cropped face at step 1, it is recommended to upload it to define a new face pose.
|
446 |
+
3. (Optional) You can select multiple ControlNet models to control the generation process. The default is to use the IdentityNet only. The ControlNet models include pose skeleton, canny, and depth. You can adjust the strength of each ControlNet model to control the generation process.
|
447 |
+
4. Enter a text prompt, as done in normal text-to-image models.
|
448 |
+
5. Click the <b>Submit</b> button to begin customization.
|
449 |
+
6. Share your customized photo with your friends and enjoy! 😊"""
|
450 |
+
|
451 |
+
article = r"""
|
452 |
+
---
|
453 |
+
📝 **Citation**
|
454 |
+
<br>
|
455 |
+
If our work is helpful for your research or applications, please cite us via:
|
456 |
+
```bibtex
|
457 |
+
@article{wang2024instantid,
|
458 |
+
title={InstantID: Zero-shot Identity-Preserving Generation in Seconds},
|
459 |
+
author={Wang, Qixun and Bai, Xu and Wang, Haofan and Qin, Zekui and Chen, Anthony},
|
460 |
+
journal={arXiv preprint arXiv:2401.07519},
|
461 |
+
year={2024}
|
462 |
+
}
|
463 |
+
```
|
464 |
+
📧 **Contact**
|
465 |
+
<br>
|
466 |
+
If you have any questions, please feel free to open an issue or directly reach us out at <b>[email protected]</b>.
|
467 |
+
"""
|
468 |
+
|
469 |
+
tips = r"""
|
470 |
+
### Usage tips of InstantID
|
471 |
+
1. If you're not satisfied with the similarity, try increasing the weight of "IdentityNet Strength" and "Adapter Strength."
|
472 |
+
2. If you feel that the saturation is too high, first decrease the Adapter strength. If it remains too high, then decrease the IdentityNet strength.
|
473 |
+
3. If you find that text control is not as expected, decrease Adapter strength.
|
474 |
+
4. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
|
475 |
+
"""
|
476 |
+
|
477 |
+
css = """
|
478 |
+
.gradio-container {width: 85% !important}
|
479 |
+
"""
|
480 |
+
with gr.Blocks(css=css) as demo:
|
481 |
+
# description
|
482 |
+
gr.Markdown(title)
|
483 |
+
gr.Markdown(description)
|
484 |
+
|
485 |
+
with gr.Row():
|
486 |
+
with gr.Column():
|
487 |
+
with gr.Row(equal_height=True):
|
488 |
+
# upload face image
|
489 |
+
face_file = gr.Image(
|
490 |
+
label="Upload a photo of your face", type="filepath"
|
491 |
+
)
|
492 |
+
# optional: upload a reference pose image
|
493 |
+
pose_file = gr.Image(
|
494 |
+
label="Upload a reference pose image (Optional)",
|
495 |
+
type="filepath",
|
496 |
+
)
|
497 |
+
|
498 |
+
# prompt
|
499 |
+
prompt = gr.Textbox(
|
500 |
+
label="Prompt",
|
501 |
+
info="Give simple prompt is enough to achieve good face fidelity",
|
502 |
+
placeholder="A photo of a person",
|
503 |
+
value="",
|
504 |
+
)
|
505 |
+
|
506 |
+
submit = gr.Button("Submit", variant="primary")
|
507 |
+
enable_LCM = gr.Checkbox(
|
508 |
+
label="Enable Fast Inference with LCM", value=enable_lcm_arg,
|
509 |
+
info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
|
510 |
+
)
|
511 |
+
style = gr.Dropdown(
|
512 |
+
label="Style template",
|
513 |
+
choices=STYLE_NAMES,
|
514 |
+
value=DEFAULT_STYLE_NAME,
|
515 |
+
)
|
516 |
+
|
517 |
+
# strength
|
518 |
+
identitynet_strength_ratio = gr.Slider(
|
519 |
+
label="IdentityNet strength (for fidelity)",
|
520 |
+
minimum=0,
|
521 |
+
maximum=1.5,
|
522 |
+
step=0.05,
|
523 |
+
value=0.80,
|
524 |
+
)
|
525 |
+
adapter_strength_ratio = gr.Slider(
|
526 |
+
label="Image adapter strength (for detail)",
|
527 |
+
minimum=0,
|
528 |
+
maximum=1.5,
|
529 |
+
step=0.05,
|
530 |
+
value=0.80,
|
531 |
+
)
|
532 |
+
with gr.Accordion("Controlnet"):
|
533 |
+
controlnet_selection = gr.CheckboxGroup(
|
534 |
+
["pose", "canny", "depth"], label="Controlnet", value=["pose"],
|
535 |
+
info="Use pose for skeleton inference, canny for edge detection, and depth for depth map estimation. You can try all three to control the generation process"
|
536 |
+
)
|
537 |
+
pose_strength = gr.Slider(
|
538 |
+
label="Pose strength",
|
539 |
+
minimum=0,
|
540 |
+
maximum=1.5,
|
541 |
+
step=0.05,
|
542 |
+
value=0.40,
|
543 |
+
)
|
544 |
+
canny_strength = gr.Slider(
|
545 |
+
label="Canny strength",
|
546 |
+
minimum=0,
|
547 |
+
maximum=1.5,
|
548 |
+
step=0.05,
|
549 |
+
value=0.40,
|
550 |
+
)
|
551 |
+
depth_strength = gr.Slider(
|
552 |
+
label="Depth strength",
|
553 |
+
minimum=0,
|
554 |
+
maximum=1.5,
|
555 |
+
step=0.05,
|
556 |
+
value=0.40,
|
557 |
+
)
|
558 |
+
with gr.Accordion(open=False, label="Advanced Options"):
|
559 |
+
negative_prompt = gr.Textbox(
|
560 |
+
label="Negative Prompt",
|
561 |
+
placeholder="low quality",
|
562 |
+
value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
563 |
+
)
|
564 |
+
num_steps = gr.Slider(
|
565 |
+
label="Number of sample steps",
|
566 |
+
minimum=1,
|
567 |
+
maximum=100,
|
568 |
+
step=1,
|
569 |
+
value=5 if enable_lcm_arg else 30,
|
570 |
+
)
|
571 |
+
guidance_scale = gr.Slider(
|
572 |
+
label="Guidance scale",
|
573 |
+
minimum=0.1,
|
574 |
+
maximum=20.0,
|
575 |
+
step=0.1,
|
576 |
+
value=0.0 if enable_lcm_arg else 5.0,
|
577 |
+
)
|
578 |
+
seed = gr.Slider(
|
579 |
+
label="Seed",
|
580 |
+
minimum=0,
|
581 |
+
maximum=MAX_SEED,
|
582 |
+
step=1,
|
583 |
+
value=42,
|
584 |
+
)
|
585 |
+
schedulers = [
|
586 |
+
"DEISMultistepScheduler",
|
587 |
+
"HeunDiscreteScheduler",
|
588 |
+
"EulerDiscreteScheduler",
|
589 |
+
"DPMSolverMultistepScheduler",
|
590 |
+
"DPMSolverMultistepScheduler-Karras",
|
591 |
+
"DPMSolverMultistepScheduler-Karras-SDE",
|
592 |
+
]
|
593 |
+
scheduler = gr.Dropdown(
|
594 |
+
label="Schedulers",
|
595 |
+
choices=schedulers,
|
596 |
+
value="EulerDiscreteScheduler",
|
597 |
+
)
|
598 |
+
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
599 |
+
enhance_face_region = gr.Checkbox(label="Enhance non-face region", value=True)
|
600 |
+
|
601 |
+
with gr.Column(scale=1):
|
602 |
+
gallery = gr.Image(label="Generated Images")
|
603 |
+
usage_tips = gr.Markdown(
|
604 |
+
label="InstantID Usage Tips", value=tips, visible=False
|
605 |
+
)
|
606 |
+
|
607 |
+
submit.click(
|
608 |
+
fn=remove_tips,
|
609 |
+
outputs=usage_tips,
|
610 |
+
).then(
|
611 |
+
fn=randomize_seed_fn,
|
612 |
+
inputs=[seed, randomize_seed],
|
613 |
+
outputs=seed,
|
614 |
+
queue=False,
|
615 |
+
api_name=False,
|
616 |
+
).then(
|
617 |
+
fn=generate_image,
|
618 |
+
inputs=[
|
619 |
+
face_file,
|
620 |
+
pose_file,
|
621 |
+
prompt,
|
622 |
+
negative_prompt,
|
623 |
+
style,
|
624 |
+
num_steps,
|
625 |
+
identitynet_strength_ratio,
|
626 |
+
adapter_strength_ratio,
|
627 |
+
pose_strength,
|
628 |
+
canny_strength,
|
629 |
+
depth_strength,
|
630 |
+
controlnet_selection,
|
631 |
+
guidance_scale,
|
632 |
+
seed,
|
633 |
+
scheduler,
|
634 |
+
enable_LCM,
|
635 |
+
enhance_face_region,
|
636 |
+
],
|
637 |
+
outputs=[gallery, usage_tips],
|
638 |
+
)
|
639 |
+
|
640 |
+
enable_LCM.input(
|
641 |
+
fn=toggle_lcm_ui,
|
642 |
+
inputs=[enable_LCM],
|
643 |
+
outputs=[num_steps, guidance_scale],
|
644 |
+
queue=False,
|
645 |
+
)
|
646 |
+
|
647 |
+
gr.Examples(
|
648 |
+
examples=get_example(),
|
649 |
+
inputs=[face_file, pose_file, prompt, style, negative_prompt],
|
650 |
+
fn=run_for_examples,
|
651 |
+
outputs=[gallery, usage_tips],
|
652 |
+
cache_examples=True,
|
653 |
+
)
|
654 |
+
|
655 |
+
gr.Markdown(article)
|
656 |
+
|
657 |
+
demo.launch()
|
658 |
+
|
659 |
+
|
660 |
+
if __name__ == "__main__":
|
661 |
+
parser = argparse.ArgumentParser()
|
662 |
+
parser.add_argument(
|
663 |
+
"--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8"
|
664 |
+
)
|
665 |
+
parser.add_argument(
|
666 |
+
"--enable_LCM", type=bool, default=os.environ.get("ENABLE_LCM", False)
|
667 |
+
)
|
668 |
+
args = parser.parse_args()
|
669 |
+
|
670 |
+
main(args.pretrained_model_name_or_path, args.enable_LCM)
|
gradio_demo/app.py
ADDED
@@ -0,0 +1,656 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('./')
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
import pandas as pd
|
14 |
+
|
15 |
+
import PIL
|
16 |
+
from PIL import Image
|
17 |
+
|
18 |
+
import diffusers
|
19 |
+
from diffusers.utils import load_image
|
20 |
+
from diffusers.models import ControlNetModel
|
21 |
+
from diffusers import LCMScheduler
|
22 |
+
|
23 |
+
from huggingface_hub import hf_hub_download
|
24 |
+
|
25 |
+
import insightface
|
26 |
+
from insightface.app import FaceAnalysis
|
27 |
+
|
28 |
+
from style_template import styles
|
29 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
30 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
31 |
+
|
32 |
+
|
33 |
+
# global variable
|
34 |
+
MAX_SEED = np.iinfo(np.int32).max
|
35 |
+
device = get_torch_device()
|
36 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
37 |
+
STYLE_NAMES = list(styles.keys())
|
38 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
39 |
+
|
40 |
+
# Load face encoder
|
41 |
+
app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
42 |
+
app.prepare(ctx_id=0, det_size=(640, 640))
|
43 |
+
|
44 |
+
# Path to InstantID models
|
45 |
+
face_adapter = f'./checkpoints/ip-adapter.bin'
|
46 |
+
controlnet_path = f'./checkpoints/ControlNetModel'
|
47 |
+
|
48 |
+
# Load pipeline
|
49 |
+
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
50 |
+
|
51 |
+
logo = Image.open("./gradio_demo/watermark.png")
|
52 |
+
logo = logo.resize((100, 70))
|
53 |
+
|
54 |
+
from cv2 import imencode
|
55 |
+
import base64
|
56 |
+
|
57 |
+
# def encode_pil_to_base64_new(pil_image):
|
58 |
+
# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
59 |
+
# image_arr = np.asarray(pil_image)[:,:,::-1]
|
60 |
+
# _, byte_data = imencode('.png', image_arr)
|
61 |
+
# base64_data = base64.b64encode(byte_data)
|
62 |
+
# base64_string_opencv = base64_data.decode("utf-8")
|
63 |
+
# return "data:image/png;base64," + base64_string_opencv
|
64 |
+
|
65 |
+
import gradio as gr
|
66 |
+
|
67 |
+
# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
68 |
+
|
69 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
70 |
+
|
71 |
+
if pretrained_model_name_or_path.endswith(
|
72 |
+
".ckpt"
|
73 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
74 |
+
scheduler_kwargs = hf_hub_download(
|
75 |
+
repo_id="wangqixun/YamerMIX_v8",
|
76 |
+
subfolder="scheduler",
|
77 |
+
filename="scheduler_config.json",
|
78 |
+
)
|
79 |
+
|
80 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
81 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
82 |
+
scheduler_name=None,
|
83 |
+
weight_dtype=dtype,
|
84 |
+
)
|
85 |
+
|
86 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
87 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
88 |
+
vae=vae,
|
89 |
+
text_encoder=text_encoders[0],
|
90 |
+
text_encoder_2=text_encoders[1],
|
91 |
+
tokenizer=tokenizers[0],
|
92 |
+
tokenizer_2=tokenizers[1],
|
93 |
+
unet=unet,
|
94 |
+
scheduler=scheduler,
|
95 |
+
controlnet=controlnet,
|
96 |
+
).to(device)
|
97 |
+
|
98 |
+
else:
|
99 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
100 |
+
pretrained_model_name_or_path,
|
101 |
+
controlnet=controlnet,
|
102 |
+
torch_dtype=dtype,
|
103 |
+
safety_checker=None,
|
104 |
+
feature_extractor=None,
|
105 |
+
).to(device)
|
106 |
+
|
107 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
108 |
+
|
109 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
110 |
+
# load and disable LCM
|
111 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
112 |
+
pipe.disable_lora()
|
113 |
+
|
114 |
+
def remove_tips():
|
115 |
+
print("GG")
|
116 |
+
return gr.update(visible=False)
|
117 |
+
|
118 |
+
|
119 |
+
# prompts = [
|
120 |
+
# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
|
121 |
+
# # ["Suited professional","(No style)"],
|
122 |
+
# ["Scooba diver","Line art"], ["eskimo","Snow"]
|
123 |
+
# ]
|
124 |
+
|
125 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
126 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
127 |
+
|
128 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
129 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
130 |
+
|
131 |
+
def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
132 |
+
# if email != "":
|
133 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
134 |
+
return generate_image(face_file, p[0], n)
|
135 |
+
# else:
|
136 |
+
# raise gr.Error("Email ID is compulsory")
|
137 |
+
def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
138 |
+
# if email != "":
|
139 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
140 |
+
return generate_image(face_file, p[1], n)
|
141 |
+
def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
142 |
+
# if email != "":
|
143 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
144 |
+
return generate_image(face_file, p[2], n)
|
145 |
+
def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
146 |
+
# if email != "":
|
147 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
148 |
+
return generate_image(face_file, p[3], n)
|
149 |
+
|
150 |
+
# def validate_and_process(face_file, style, email):
|
151 |
+
|
152 |
+
# # Your processing logic here
|
153 |
+
# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
|
154 |
+
# return gallery1, gallery2, gallery3, gallery4
|
155 |
+
|
156 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
157 |
+
stickwidth = 4
|
158 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
159 |
+
kps = np.array(kps)
|
160 |
+
|
161 |
+
w, h = image_pil.size
|
162 |
+
out_img = np.zeros([h, w, 3])
|
163 |
+
|
164 |
+
for i in range(len(limbSeq)):
|
165 |
+
index = limbSeq[i]
|
166 |
+
color = color_list[index[0]]
|
167 |
+
|
168 |
+
x = kps[index][:, 0]
|
169 |
+
y = kps[index][:, 1]
|
170 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
171 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
172 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
173 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
174 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
175 |
+
|
176 |
+
for idx_kp, kp in enumerate(kps):
|
177 |
+
color = color_list[idx_kp]
|
178 |
+
x, y = kp
|
179 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
180 |
+
|
181 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
182 |
+
return out_img_pil
|
183 |
+
|
184 |
+
def resize_img(input_image, max_side=1280, min_side=1280, size=None,
|
185 |
+
pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
186 |
+
|
187 |
+
w, h = input_image.size
|
188 |
+
print(f"Original Size --> {input_image.size}")
|
189 |
+
if size is not None:
|
190 |
+
w_resize_new, h_resize_new = size
|
191 |
+
else:
|
192 |
+
ratio = min_side / min(h, w)
|
193 |
+
w, h = round(ratio*w), round(ratio*h)
|
194 |
+
ratio = max_side / max(h, w)
|
195 |
+
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
196 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
197 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
198 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
199 |
+
|
200 |
+
if pad_to_max_side:
|
201 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
202 |
+
offset_x = (max_side - w_resize_new) // 2
|
203 |
+
offset_y = (max_side - h_resize_new) // 2
|
204 |
+
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
205 |
+
input_image = Image.fromarray(res)
|
206 |
+
|
207 |
+
print(f"Final modified image size --> {input_image.size}")
|
208 |
+
return input_image
|
209 |
+
|
210 |
+
# def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
|
211 |
+
# p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
212 |
+
# return p.replace("{prompt}", positive), n + ' ' + negative
|
213 |
+
|
214 |
+
def store_images(email, gallery1, gallery2, gallery3, gallery4,consent):
|
215 |
+
if not consent:
|
216 |
+
raise gr.Error("Consent not provided")
|
217 |
+
galleries = []
|
218 |
+
for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
|
219 |
+
if isinstance(img, np.ndarray):
|
220 |
+
img = Image.fromarray(img)
|
221 |
+
print(f"Gallery {i} type after conversion: {type(img)}")
|
222 |
+
galleries.append(img)
|
223 |
+
# Create the images directory if it doesn't exist
|
224 |
+
if not os.path.exists('images'):
|
225 |
+
os.makedirs('images')
|
226 |
+
|
227 |
+
# Define image file paths
|
228 |
+
image_paths = []
|
229 |
+
for i, img in enumerate(galleries, start=1):
|
230 |
+
img_path = f'images/{email}_gallery{i}.png'
|
231 |
+
img.save(img_path)
|
232 |
+
image_paths.append(img_path)
|
233 |
+
|
234 |
+
# Define the CSV file path
|
235 |
+
csv_file_path = 'image_data.csv'
|
236 |
+
|
237 |
+
# Create a DataFrame for the email and image paths
|
238 |
+
df = pd.DataFrame({
|
239 |
+
'email': [email],
|
240 |
+
'img1_path': [image_paths[0]],
|
241 |
+
'img2_path': [image_paths[1]],
|
242 |
+
'img3_path': [image_paths[2]],
|
243 |
+
'img4_path': [image_paths[3]],
|
244 |
+
})
|
245 |
+
|
246 |
+
# Write to CSV (append if the file exists, create a new one if it doesn't)
|
247 |
+
if not os.path.isfile(csv_file_path):
|
248 |
+
df.to_csv(csv_file_path, index=False)
|
249 |
+
else:
|
250 |
+
df.to_csv(csv_file_path, mode='a', header=False, index=False)
|
251 |
+
|
252 |
+
gr.Info("Thankyou!! Your avatar is on the way to your inbox")
|
253 |
+
|
254 |
+
def add_watermark(image, watermark=logo, opacity=128, position="bottom_right", padding=10):
|
255 |
+
# Convert NumPy array to PIL Image if needed
|
256 |
+
if isinstance(image, np.ndarray):
|
257 |
+
image = Image.fromarray(image)
|
258 |
+
|
259 |
+
if isinstance(watermark, np.ndarray):
|
260 |
+
watermark = Image.fromarray(watermark)
|
261 |
+
|
262 |
+
# Convert images to 'RGBA' mode to handle transparency
|
263 |
+
image = image.convert("RGBA")
|
264 |
+
watermark = watermark.convert("RGBA")
|
265 |
+
|
266 |
+
# Adjust the watermark opacity
|
267 |
+
watermark = watermark.copy()
|
268 |
+
watermark.putalpha(opacity)
|
269 |
+
|
270 |
+
# Calculate the position for the watermark
|
271 |
+
if position == "bottom_right":
|
272 |
+
x = image.width - watermark.width - padding
|
273 |
+
y = image.height - watermark.height - padding
|
274 |
+
elif position == "bottom_left":
|
275 |
+
x = padding
|
276 |
+
y = image.height - watermark.height - padding
|
277 |
+
elif position == "top_right":
|
278 |
+
x = image.width - watermark.width - padding
|
279 |
+
y = padding
|
280 |
+
elif position == "top_left":
|
281 |
+
x = padding
|
282 |
+
y = padding
|
283 |
+
else:
|
284 |
+
raise ValueError("Unsupported position. Choose from 'bottom_right', 'bottom_left', 'top_right', 'top_left'.")
|
285 |
+
|
286 |
+
# Paste the watermark onto the image
|
287 |
+
image.paste(watermark, (x, y), watermark)
|
288 |
+
|
289 |
+
# Convert back to 'RGB' if the original image was not 'RGBA'
|
290 |
+
if image.mode != "RGBA":
|
291 |
+
image = image.convert("RGB")
|
292 |
+
|
293 |
+
# return resize_img(image)
|
294 |
+
return image
|
295 |
+
|
296 |
+
def generate_image(face_image,prompt,negative_prompt):
|
297 |
+
pose_image_path = None
|
298 |
+
# prompt = "superman"
|
299 |
+
enable_LCM = False
|
300 |
+
identitynet_strength_ratio = 0.90
|
301 |
+
adapter_strength_ratio = 0.60
|
302 |
+
num_steps = 15
|
303 |
+
guidance_scale = 5
|
304 |
+
seed = random.randint(0, MAX_SEED)
|
305 |
+
print(f"Seed --> {seed}")
|
306 |
+
|
307 |
+
# negative_prompt = ""
|
308 |
+
# negative_prompt += neg
|
309 |
+
enhance_face_region = True
|
310 |
+
if enable_LCM:
|
311 |
+
pipe.enable_lora()
|
312 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
313 |
+
else:
|
314 |
+
pipe.disable_lora()
|
315 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
316 |
+
|
317 |
+
if face_image is None:
|
318 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
319 |
+
|
320 |
+
# if prompt is None:
|
321 |
+
# prompt = "a person"
|
322 |
+
|
323 |
+
# apply the style template
|
324 |
+
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
325 |
+
|
326 |
+
# face_image = load_image(face_image_path)
|
327 |
+
face_image = resize_img(face_image)
|
328 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
329 |
+
height, width, _ = face_image_cv2.shape
|
330 |
+
|
331 |
+
# Extract face features
|
332 |
+
face_info = app.get(face_image_cv2)
|
333 |
+
|
334 |
+
if len(face_info) == 0:
|
335 |
+
raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
336 |
+
|
337 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
338 |
+
face_emb = face_info['embedding']
|
339 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
340 |
+
|
341 |
+
if pose_image_path is not None:
|
342 |
+
pose_image = load_image(pose_image_path)
|
343 |
+
pose_image = resize_img(pose_image)
|
344 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
345 |
+
|
346 |
+
face_info = app.get(pose_image_cv2)
|
347 |
+
|
348 |
+
if len(face_info) == 0:
|
349 |
+
raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
350 |
+
|
351 |
+
face_info = face_info[-1]
|
352 |
+
face_kps = draw_kps(pose_image, face_info['kps'])
|
353 |
+
|
354 |
+
width, height = face_kps.size
|
355 |
+
|
356 |
+
if enhance_face_region:
|
357 |
+
control_mask = np.zeros([height, width, 3])
|
358 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
359 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
360 |
+
control_mask[y1:y2, x1:x2] = 255
|
361 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
362 |
+
else:
|
363 |
+
control_mask = None
|
364 |
+
|
365 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
366 |
+
|
367 |
+
print("Start inference...")
|
368 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
369 |
+
|
370 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
371 |
+
images = pipe(
|
372 |
+
prompt=prompt,
|
373 |
+
negative_prompt=negative_prompt,
|
374 |
+
image_embeds=face_emb,
|
375 |
+
image=face_kps,
|
376 |
+
control_mask=control_mask,
|
377 |
+
controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
378 |
+
num_inference_steps=num_steps,
|
379 |
+
guidance_scale=guidance_scale,
|
380 |
+
height=height,
|
381 |
+
width=width,
|
382 |
+
generator=generator,
|
383 |
+
# num_images_per_prompt = 4
|
384 |
+
).images
|
385 |
+
|
386 |
+
watermarked_image = add_watermark(images[0])
|
387 |
+
|
388 |
+
# return images[0]
|
389 |
+
return watermarked_image
|
390 |
+
|
391 |
+
### Description
|
392 |
+
title = r"""
|
393 |
+
<h1 align="center" style="color:white;">Choose your AVATAR</h1>
|
394 |
+
"""
|
395 |
+
|
396 |
+
description = r"""
|
397 |
+
<h2 style="color:white;"> Powered by IDfy </h2>"""
|
398 |
+
|
399 |
+
article = r""""""
|
400 |
+
|
401 |
+
tips = r""""""
|
402 |
+
# css = '''
|
403 |
+
# .gradio-container {
|
404 |
+
# width: 95% !important;
|
405 |
+
# background-image: url('./InstantID/gradio_demo/logo.png');
|
406 |
+
# background-size: cover;
|
407 |
+
# background-position: center;
|
408 |
+
# }
|
409 |
+
# .image-gallery {
|
410 |
+
# height: 100vh !important;
|
411 |
+
# overflow: auto;
|
412 |
+
# }
|
413 |
+
# .gradio-row .gradio-element {
|
414 |
+
# margin: 0 !important;
|
415 |
+
# }
|
416 |
+
# '''
|
417 |
+
css = '''
|
418 |
+
.gradio-container {width: 100% !important; color: white; background: linear-gradient(135deg, #1C43B9, #254977, #343434);}
|
419 |
+
.gradio-row .gradio-element { margin: 0 !important; }
|
420 |
+
.centered-column {
|
421 |
+
display: flex;
|
422 |
+
justify-content: center;
|
423 |
+
align-items: center;
|
424 |
+
width: 100%;}
|
425 |
+
#store-btn {
|
426 |
+
background: #f2bb13 !important;
|
427 |
+
color: white !important;
|
428 |
+
}
|
429 |
+
'''
|
430 |
+
with gr.Blocks(css=css) as demo:
|
431 |
+
|
432 |
+
# description
|
433 |
+
gr.Markdown(title)
|
434 |
+
with gr.Column():
|
435 |
+
with gr.Row():
|
436 |
+
gr.Image("./gradio_demo/logo.png", scale=0, min_width=50, show_label=False, show_download_button=False)
|
437 |
+
gr.Markdown(description)
|
438 |
+
style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
439 |
+
with gr.Row(equal_height=True): # Center the face file
|
440 |
+
with gr.Column(elem_id="centered-face", elem_classes=["centered-column"]): # Use CSS class for centering
|
441 |
+
face_file = gr.Image(label="Upload a photo of your face", type="pil", sources="webcam", height=400, width=500)
|
442 |
+
# submit = gr.Button("Submit", variant="primary")
|
443 |
+
with gr.Column():
|
444 |
+
with gr.Row():
|
445 |
+
gallery1 = gr.Image(label="Generated Images")
|
446 |
+
gallery2 = gr.Image(label="Generated Images")
|
447 |
+
with gr.Row():
|
448 |
+
gallery3 = gr.Image(label="Generated Images")
|
449 |
+
gallery4 = gr.Image(label="Generated Images")
|
450 |
+
email = gr.Textbox(label="Email", info="Enter your email address", value="")
|
451 |
+
consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
|
452 |
+
submit1 = gr.Button("STORE",elem_id="store-btn")
|
453 |
+
# with gr.Blocks(css=css) as demo:
|
454 |
+
|
455 |
+
# # description
|
456 |
+
# gr.Markdown(title)
|
457 |
+
# with gr.Column():
|
458 |
+
# with gr.Row():
|
459 |
+
# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
460 |
+
# gr.Markdown(description)
|
461 |
+
# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
462 |
+
# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam", height=400, width=500)
|
463 |
+
# submit = gr.Button("Submit", variant="primary")
|
464 |
+
# with gr.Column():
|
465 |
+
# with gr.Row():
|
466 |
+
# gallery1 = gr.Image(label="Generated Images")
|
467 |
+
# gallery2 = gr.Image(label="Generated Images")
|
468 |
+
# with gr.Row():
|
469 |
+
# gallery3 = gr.Image(label="Generated Images")
|
470 |
+
# gallery4 = gr.Image(label="Generated Images")
|
471 |
+
# email = gr.Textbox(label="Email",
|
472 |
+
# info="Enter your email address",
|
473 |
+
# value="")
|
474 |
+
# consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
|
475 |
+
# submit1 = gr.Button("STORE", variant="primary")
|
476 |
+
# # submit1 = gr.Button("Store")
|
477 |
+
usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
478 |
+
|
479 |
+
face_file.upload(
|
480 |
+
fn=remove_tips,
|
481 |
+
outputs=usage_tips,
|
482 |
+
queue=True,
|
483 |
+
api_name=False,
|
484 |
+
show_progress = "full"
|
485 |
+
).then(
|
486 |
+
fn=run_for_prompts1,
|
487 |
+
inputs=[face_file,style],
|
488 |
+
outputs=[gallery1]
|
489 |
+
).then(
|
490 |
+
fn=run_for_prompts2,
|
491 |
+
inputs=[face_file,style],
|
492 |
+
outputs=[gallery2]
|
493 |
+
).then(
|
494 |
+
fn=run_for_prompts3,
|
495 |
+
inputs=[face_file,style],
|
496 |
+
outputs=[gallery3]
|
497 |
+
).then(
|
498 |
+
fn=run_for_prompts4,
|
499 |
+
inputs=[face_file,style],
|
500 |
+
outputs=[gallery4]
|
501 |
+
)
|
502 |
+
# submit.click(
|
503 |
+
# fn=remove_tips,
|
504 |
+
# outputs=usage_tips,
|
505 |
+
# queue=True,
|
506 |
+
# api_name=False,
|
507 |
+
# show_progress = "full"
|
508 |
+
# ).then(
|
509 |
+
# fn=run_for_prompts1,
|
510 |
+
# inputs=[face_file,style],
|
511 |
+
# outputs=[gallery1]
|
512 |
+
# ).then(
|
513 |
+
# fn=run_for_prompts2,
|
514 |
+
# inputs=[face_file,style],
|
515 |
+
# outputs=[gallery2]
|
516 |
+
# ).then(
|
517 |
+
# fn=run_for_prompts3,
|
518 |
+
# inputs=[face_file,style],
|
519 |
+
# outputs=[gallery3]
|
520 |
+
# ).then(
|
521 |
+
# fn=run_for_prompts4,
|
522 |
+
# inputs=[face_file,style],
|
523 |
+
# outputs=[gallery4]
|
524 |
+
# )
|
525 |
+
|
526 |
+
# submit1.click(
|
527 |
+
# fn=store_images,
|
528 |
+
# inputs=[email,gallery1,gallery2,gallery3,gallery4,consent],
|
529 |
+
# outputs=None)
|
530 |
+
|
531 |
+
|
532 |
+
|
533 |
+
gr.Markdown(article)
|
534 |
+
|
535 |
+
demo.launch(share=True)
|
536 |
+
|
537 |
+
# with gr.Blocks(css=css, js=js) as demo:
|
538 |
+
|
539 |
+
# # description
|
540 |
+
# gr.Markdown(title)
|
541 |
+
# with gr.Row():
|
542 |
+
# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
543 |
+
# gr.Markdown(description)
|
544 |
+
# with gr.Row():
|
545 |
+
# with gr.Column():
|
546 |
+
# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
547 |
+
# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
548 |
+
# submit = gr.Button("Submit", variant="primary")
|
549 |
+
# with gr.Column():
|
550 |
+
# with gr.Row():
|
551 |
+
# gallery1 = gr.Image(label="Generated Images")
|
552 |
+
# gallery2 = gr.Image(label="Generated Images")
|
553 |
+
# with gr.Row():
|
554 |
+
# gallery3 = gr.Image(label="Generated Images")
|
555 |
+
# gallery4 = gr.Image(label="Generated Images")
|
556 |
+
# email = gr.Textbox(label="Email",
|
557 |
+
# info="Enter your email address",
|
558 |
+
# value="")
|
559 |
+
|
560 |
+
# usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
561 |
+
# # identitynet_strength_ratio = gr.Slider(
|
562 |
+
# # label="IdentityNet strength (for fidelity)",
|
563 |
+
# # minimum=0,
|
564 |
+
# # maximum=1.5,
|
565 |
+
# # step=0.05,
|
566 |
+
# # value=0.95,
|
567 |
+
# # )
|
568 |
+
# # adapter_strength_ratio = gr.Slider(
|
569 |
+
# # label="Image adapter strength (for detail)",
|
570 |
+
# # minimum=0,
|
571 |
+
# # maximum=1.5,
|
572 |
+
# # step=0.05,
|
573 |
+
# # value=0.60,
|
574 |
+
# # )
|
575 |
+
# # negative_prompt = gr.Textbox(
|
576 |
+
# # label="Negative Prompt",
|
577 |
+
# # placeholder="low quality",
|
578 |
+
# # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
579 |
+
# # )
|
580 |
+
# # num_steps = gr.Slider(
|
581 |
+
# # label="Number of sample steps",
|
582 |
+
# # minimum=15,
|
583 |
+
# # maximum=100,
|
584 |
+
# # step=1,
|
585 |
+
# # value=5 if enable_lcm_arg else 15,
|
586 |
+
# # )
|
587 |
+
# # guidance_scale = gr.Slider(
|
588 |
+
# # label="Guidance scale",
|
589 |
+
# # minimum=0.1,
|
590 |
+
# # maximum=10.0,
|
591 |
+
# # step=0.1,
|
592 |
+
# # value=0 if enable_lcm_arg else 8.5,
|
593 |
+
# # )
|
594 |
+
# # if email is None:
|
595 |
+
# # print("STOPPPP")
|
596 |
+
# # raise gr.Error("Email ID is compulsory")
|
597 |
+
# face_file.upload(
|
598 |
+
# fn=remove_tips,
|
599 |
+
# outputs=usage_tips,
|
600 |
+
# queue=True,
|
601 |
+
# api_name=False,
|
602 |
+
# show_progress = "full"
|
603 |
+
# ).then(
|
604 |
+
# fn=run_for_prompts1,
|
605 |
+
# inputs=[face_file,style],
|
606 |
+
# outputs=[gallery1]
|
607 |
+
# ).then(
|
608 |
+
# fn=run_for_prompts2,
|
609 |
+
# inputs=[face_file,style],
|
610 |
+
# outputs=[gallery2]
|
611 |
+
# ).then(
|
612 |
+
# fn=run_for_prompts3,
|
613 |
+
# inputs=[face_file,style],
|
614 |
+
# outputs=[gallery3]
|
615 |
+
# ).then(
|
616 |
+
# fn=run_for_prompts4,
|
617 |
+
# inputs=[face_file,style],
|
618 |
+
# outputs=[gallery4]
|
619 |
+
# )
|
620 |
+
# submit.click(
|
621 |
+
# fn=remove_tips,
|
622 |
+
# outputs=usage_tips,
|
623 |
+
# queue=True,
|
624 |
+
# api_name=False,
|
625 |
+
# show_progress = "full"
|
626 |
+
# ).then(
|
627 |
+
# fn=run_for_prompts1,
|
628 |
+
# inputs=[face_file,style],
|
629 |
+
# outputs=[gallery1]
|
630 |
+
# ).then(
|
631 |
+
# fn=run_for_prompts2,
|
632 |
+
# inputs=[face_file,style],
|
633 |
+
# outputs=[gallery2]
|
634 |
+
# ).then(
|
635 |
+
# fn=run_for_prompts3,
|
636 |
+
# inputs=[face_file,style],
|
637 |
+
# outputs=[gallery3]
|
638 |
+
# ).then(
|
639 |
+
# fn=run_for_prompts4,
|
640 |
+
# inputs=[face_file,style],
|
641 |
+
# outputs=[gallery4]
|
642 |
+
# )
|
643 |
+
|
644 |
+
|
645 |
+
# gr.Markdown(article)
|
646 |
+
|
647 |
+
# demo.launch(share=True)
|
648 |
+
|
649 |
+
if __name__ == "__main__":
|
650 |
+
parser = argparse.ArgumentParser()
|
651 |
+
parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
652 |
+
args = parser.parse_args()
|
653 |
+
|
654 |
+
main(args.pretrained_model_name_or_path, False)
|
655 |
+
|
656 |
+
|
gradio_demo/app1.py
ADDED
@@ -0,0 +1,434 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('./')
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import PIL
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
import diffusers
|
18 |
+
from diffusers.utils import load_image
|
19 |
+
from diffusers.models import ControlNetModel
|
20 |
+
from diffusers import LCMScheduler
|
21 |
+
|
22 |
+
from huggingface_hub import hf_hub_download
|
23 |
+
|
24 |
+
import insightface
|
25 |
+
from insightface.app import FaceAnalysis
|
26 |
+
|
27 |
+
from style_template import styles
|
28 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
29 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
30 |
+
|
31 |
+
|
32 |
+
# global variable
|
33 |
+
MAX_SEED = np.iinfo(np.int32).max
|
34 |
+
device = get_torch_device()
|
35 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
36 |
+
STYLE_NAMES = list(styles.keys())
|
37 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
38 |
+
|
39 |
+
# Load face encoder
|
40 |
+
app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
41 |
+
app.prepare(ctx_id=0, det_size=(320, 320))
|
42 |
+
|
43 |
+
# Path to InstantID models
|
44 |
+
face_adapter = f'./checkpoints/ip-adapter.bin'
|
45 |
+
controlnet_path = f'./checkpoints/ControlNetModel'
|
46 |
+
|
47 |
+
# Load pipeline
|
48 |
+
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
49 |
+
|
50 |
+
logo = Image.open("./gradio_demo/logo.png")
|
51 |
+
|
52 |
+
from cv2 import imencode
|
53 |
+
import base64
|
54 |
+
|
55 |
+
# def encode_pil_to_base64_new(pil_image):
|
56 |
+
# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
57 |
+
# image_arr = np.asarray(pil_image)[:,:,::-1]
|
58 |
+
# _, byte_data = imencode('.png', image_arr)
|
59 |
+
# base64_data = base64.b64encode(byte_data)
|
60 |
+
# base64_string_opencv = base64_data.decode("utf-8")
|
61 |
+
# return "data:image/png;base64," + base64_string_opencv
|
62 |
+
|
63 |
+
import gradio as gr
|
64 |
+
|
65 |
+
# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
66 |
+
|
67 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
68 |
+
|
69 |
+
if pretrained_model_name_or_path.endswith(
|
70 |
+
".ckpt"
|
71 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
72 |
+
scheduler_kwargs = hf_hub_download(
|
73 |
+
repo_id="wangqixun/YamerMIX_v8",
|
74 |
+
subfolder="scheduler",
|
75 |
+
filename="scheduler_config.json",
|
76 |
+
)
|
77 |
+
|
78 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
79 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
80 |
+
scheduler_name=None,
|
81 |
+
weight_dtype=dtype,
|
82 |
+
)
|
83 |
+
|
84 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
85 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
86 |
+
vae=vae,
|
87 |
+
text_encoder=text_encoders[0],
|
88 |
+
text_encoder_2=text_encoders[1],
|
89 |
+
tokenizer=tokenizers[0],
|
90 |
+
tokenizer_2=tokenizers[1],
|
91 |
+
unet=unet,
|
92 |
+
scheduler=scheduler,
|
93 |
+
controlnet=controlnet,
|
94 |
+
).to(device)
|
95 |
+
|
96 |
+
else:
|
97 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
98 |
+
pretrained_model_name_or_path,
|
99 |
+
controlnet=controlnet,
|
100 |
+
torch_dtype=dtype,
|
101 |
+
safety_checker=None,
|
102 |
+
feature_extractor=None,
|
103 |
+
).to(device)
|
104 |
+
|
105 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
106 |
+
|
107 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
108 |
+
# load and disable LCM
|
109 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
110 |
+
pipe.disable_lora()
|
111 |
+
|
112 |
+
def remove_tips():
|
113 |
+
return gr.update(visible=False)
|
114 |
+
|
115 |
+
|
116 |
+
# prompts = [
|
117 |
+
# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
|
118 |
+
# # ["Suited professional","(No style)"],
|
119 |
+
# ["Scooba diver","Line art"], ["eskimo","Snow"]
|
120 |
+
# ]
|
121 |
+
|
122 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
123 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
124 |
+
|
125 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
126 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
127 |
+
|
128 |
+
def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
129 |
+
# if email != "":
|
130 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
131 |
+
return generate_image(face_file, p[0], n)
|
132 |
+
# else:
|
133 |
+
# raise gr.Error("Email ID is compulsory")
|
134 |
+
def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
135 |
+
# if email != "":
|
136 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
137 |
+
return generate_image(face_file, p[1], n)
|
138 |
+
def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
139 |
+
# if email != "":
|
140 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
141 |
+
return generate_image(face_file, p[2], n)
|
142 |
+
def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
143 |
+
# if email != "":
|
144 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
145 |
+
return generate_image(face_file, p[3], n)
|
146 |
+
|
147 |
+
# def validate_and_process(face_file, style, email):
|
148 |
+
|
149 |
+
# # Your processing logic here
|
150 |
+
# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
|
151 |
+
# return gallery1, gallery2, gallery3, gallery4
|
152 |
+
|
153 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
154 |
+
stickwidth = 4
|
155 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
156 |
+
kps = np.array(kps)
|
157 |
+
|
158 |
+
w, h = image_pil.size
|
159 |
+
out_img = np.zeros([h, w, 3])
|
160 |
+
|
161 |
+
for i in range(len(limbSeq)):
|
162 |
+
index = limbSeq[i]
|
163 |
+
color = color_list[index[0]]
|
164 |
+
|
165 |
+
x = kps[index][:, 0]
|
166 |
+
y = kps[index][:, 1]
|
167 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
168 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
169 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
170 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
171 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
172 |
+
|
173 |
+
for idx_kp, kp in enumerate(kps):
|
174 |
+
color = color_list[idx_kp]
|
175 |
+
x, y = kp
|
176 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
177 |
+
|
178 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
179 |
+
return out_img_pil
|
180 |
+
|
181 |
+
def resize_img(input_image, max_side=640, min_side=640, size=None,
|
182 |
+
pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
183 |
+
|
184 |
+
w, h = input_image.size
|
185 |
+
print(w)
|
186 |
+
print(h)
|
187 |
+
if size is not None:
|
188 |
+
w_resize_new, h_resize_new = size
|
189 |
+
else:
|
190 |
+
ratio = min_side / min(h, w)
|
191 |
+
w, h = round(ratio*w), round(ratio*h)
|
192 |
+
ratio = max_side / max(h, w)
|
193 |
+
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
194 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
195 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
196 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
197 |
+
|
198 |
+
if pad_to_max_side:
|
199 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
200 |
+
offset_x = (max_side - w_resize_new) // 2
|
201 |
+
offset_y = (max_side - h_resize_new) // 2
|
202 |
+
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
203 |
+
input_image = Image.fromarray(res)
|
204 |
+
return input_image
|
205 |
+
|
206 |
+
# def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
|
207 |
+
# p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
208 |
+
# return p.replace("{prompt}", positive), n + ' ' + negative
|
209 |
+
|
210 |
+
def generate_image(face_image,prompt,negative_prompt):
|
211 |
+
pose_image_path = None
|
212 |
+
# prompt = "superman"
|
213 |
+
enable_LCM = False
|
214 |
+
identitynet_strength_ratio = 0.95
|
215 |
+
adapter_strength_ratio = 0.60
|
216 |
+
num_steps = 15
|
217 |
+
guidance_scale = 8.5
|
218 |
+
seed = random.randint(0, MAX_SEED)
|
219 |
+
# negative_prompt = ""
|
220 |
+
# negative_prompt += neg
|
221 |
+
enhance_face_region = True
|
222 |
+
if enable_LCM:
|
223 |
+
pipe.enable_lora()
|
224 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
225 |
+
else:
|
226 |
+
pipe.disable_lora()
|
227 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
228 |
+
|
229 |
+
if face_image is None:
|
230 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
231 |
+
|
232 |
+
# if prompt is None:
|
233 |
+
# prompt = "a person"
|
234 |
+
|
235 |
+
# apply the style template
|
236 |
+
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
237 |
+
|
238 |
+
# face_image = load_image(face_image_path)
|
239 |
+
face_image = resize_img(face_image)
|
240 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
241 |
+
height, width, _ = face_image_cv2.shape
|
242 |
+
|
243 |
+
# Extract face features
|
244 |
+
face_info = app.get(face_image_cv2)
|
245 |
+
|
246 |
+
if len(face_info) == 0:
|
247 |
+
raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
248 |
+
|
249 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
250 |
+
face_emb = face_info['embedding']
|
251 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
252 |
+
|
253 |
+
if pose_image_path is not None:
|
254 |
+
pose_image = load_image(pose_image_path)
|
255 |
+
pose_image = resize_img(pose_image)
|
256 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
257 |
+
|
258 |
+
face_info = app.get(pose_image_cv2)
|
259 |
+
|
260 |
+
if len(face_info) == 0:
|
261 |
+
raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
262 |
+
|
263 |
+
face_info = face_info[-1]
|
264 |
+
face_kps = draw_kps(pose_image, face_info['kps'])
|
265 |
+
|
266 |
+
width, height = face_kps.size
|
267 |
+
|
268 |
+
if enhance_face_region:
|
269 |
+
control_mask = np.zeros([height, width, 3])
|
270 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
271 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
272 |
+
control_mask[y1:y2, x1:x2] = 255
|
273 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
274 |
+
else:
|
275 |
+
control_mask = None
|
276 |
+
|
277 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
278 |
+
|
279 |
+
print("Start inference...")
|
280 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
281 |
+
|
282 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
283 |
+
images = pipe(
|
284 |
+
prompt=prompt,
|
285 |
+
negative_prompt=negative_prompt,
|
286 |
+
image_embeds=face_emb,
|
287 |
+
image=face_kps,
|
288 |
+
control_mask=control_mask,
|
289 |
+
controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
290 |
+
num_inference_steps=num_steps,
|
291 |
+
guidance_scale=guidance_scale,
|
292 |
+
height=height,
|
293 |
+
width=width,
|
294 |
+
generator=generator,
|
295 |
+
# num_images_per_prompt = 4
|
296 |
+
).images
|
297 |
+
|
298 |
+
return images[0]
|
299 |
+
|
300 |
+
### Description
|
301 |
+
title = r"""
|
302 |
+
<h1 align="center">Choose your AVATAR</h1>
|
303 |
+
"""
|
304 |
+
|
305 |
+
description = r"""
|
306 |
+
<h2> Powered by IDfy </h2>"""
|
307 |
+
|
308 |
+
article = r""""""
|
309 |
+
|
310 |
+
tips = r""""""
|
311 |
+
|
312 |
+
css = '''
|
313 |
+
.gradio-container {width: 95% !important; background-color: #E6F3FF;}
|
314 |
+
.image-gallery {height: 100vh !important; overflow: auto;}
|
315 |
+
.gradio-row .gradio-element { margin: 0 !important; }
|
316 |
+
'''
|
317 |
+
with gr.Blocks(css=css) as demo:
|
318 |
+
|
319 |
+
# description
|
320 |
+
gr.Markdown(title)
|
321 |
+
with gr.Row():
|
322 |
+
gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
323 |
+
gr.Markdown(description)
|
324 |
+
with gr.Row():
|
325 |
+
with gr.Column():
|
326 |
+
style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
327 |
+
face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
328 |
+
submit = gr.Button("Submit", variant="primary")
|
329 |
+
with gr.Column():
|
330 |
+
with gr.Row():
|
331 |
+
gallery1 = gr.Image(label="Generated Images")
|
332 |
+
gallery2 = gr.Image(label="Generated Images")
|
333 |
+
with gr.Row():
|
334 |
+
gallery3 = gr.Image(label="Generated Images")
|
335 |
+
gallery4 = gr.Image(label="Generated Images")
|
336 |
+
email = gr.Textbox(label="Email",
|
337 |
+
info="Enter your email address",
|
338 |
+
value="")
|
339 |
+
|
340 |
+
usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
341 |
+
# identitynet_strength_ratio = gr.Slider(
|
342 |
+
# label="IdentityNet strength (for fidelity)",
|
343 |
+
# minimum=0,
|
344 |
+
# maximum=1.5,
|
345 |
+
# step=0.05,
|
346 |
+
# value=0.95,
|
347 |
+
# )
|
348 |
+
# adapter_strength_ratio = gr.Slider(
|
349 |
+
# label="Image adapter strength (for detail)",
|
350 |
+
# minimum=0,
|
351 |
+
# maximum=1.5,
|
352 |
+
# step=0.05,
|
353 |
+
# value=0.60,
|
354 |
+
# )
|
355 |
+
# negative_prompt = gr.Textbox(
|
356 |
+
# label="Negative Prompt",
|
357 |
+
# placeholder="low quality",
|
358 |
+
# value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
359 |
+
# )
|
360 |
+
# num_steps = gr.Slider(
|
361 |
+
# label="Number of sample steps",
|
362 |
+
# minimum=15,
|
363 |
+
# maximum=100,
|
364 |
+
# step=1,
|
365 |
+
# value=5 if enable_lcm_arg else 15,
|
366 |
+
# )
|
367 |
+
# guidance_scale = gr.Slider(
|
368 |
+
# label="Guidance scale",
|
369 |
+
# minimum=0.1,
|
370 |
+
# maximum=10.0,
|
371 |
+
# step=0.1,
|
372 |
+
# value=0 if enable_lcm_arg else 8.5,
|
373 |
+
# )
|
374 |
+
# if email is None:
|
375 |
+
# print("STOPPPP")
|
376 |
+
# raise gr.Error("Email ID is compulsory")
|
377 |
+
face_file.upload(
|
378 |
+
fn=remove_tips,
|
379 |
+
outputs=usage_tips,
|
380 |
+
queue=True,
|
381 |
+
api_name=False,
|
382 |
+
show_progress = "full"
|
383 |
+
).then(
|
384 |
+
fn=run_for_prompts1,
|
385 |
+
inputs=[face_file,style],
|
386 |
+
outputs=[gallery1]
|
387 |
+
).then(
|
388 |
+
fn=run_for_prompts2,
|
389 |
+
inputs=[face_file,style],
|
390 |
+
outputs=[gallery2]
|
391 |
+
).then(
|
392 |
+
fn=run_for_prompts3,
|
393 |
+
inputs=[face_file,style],
|
394 |
+
outputs=[gallery3]
|
395 |
+
).then(
|
396 |
+
fn=run_for_prompts4,
|
397 |
+
inputs=[face_file,style],
|
398 |
+
outputs=[gallery4]
|
399 |
+
)
|
400 |
+
submit.click(
|
401 |
+
fn=remove_tips,
|
402 |
+
outputs=usage_tips,
|
403 |
+
queue=True,
|
404 |
+
api_name=False,
|
405 |
+
show_progress = "full"
|
406 |
+
).then(
|
407 |
+
fn=run_for_prompts1,
|
408 |
+
inputs=[face_file,style],
|
409 |
+
outputs=[gallery1]
|
410 |
+
).then(
|
411 |
+
fn=run_for_prompts2,
|
412 |
+
inputs=[face_file,style],
|
413 |
+
outputs=[gallery2]
|
414 |
+
).then(
|
415 |
+
fn=run_for_prompts3,
|
416 |
+
inputs=[face_file,style],
|
417 |
+
outputs=[gallery3]
|
418 |
+
).then(
|
419 |
+
fn=run_for_prompts4,
|
420 |
+
inputs=[face_file,style],
|
421 |
+
outputs=[gallery4]
|
422 |
+
)
|
423 |
+
|
424 |
+
|
425 |
+
gr.Markdown(article)
|
426 |
+
|
427 |
+
demo.launch(share=True)
|
428 |
+
|
429 |
+
if __name__ == "__main__":
|
430 |
+
parser = argparse.ArgumentParser()
|
431 |
+
parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
432 |
+
args = parser.parse_args()
|
433 |
+
|
434 |
+
main(args.pretrained_model_name_or_path, False)
|
gradio_demo/background.jpg
ADDED
gradio_demo/controlnet_util.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image
|
4 |
+
from controlnet_aux import OpenposeDetector
|
5 |
+
from model_util import get_torch_device
|
6 |
+
import cv2
|
7 |
+
|
8 |
+
|
9 |
+
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
10 |
+
|
11 |
+
device = get_torch_device()
|
12 |
+
depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
|
13 |
+
feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
|
14 |
+
openpose = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
|
15 |
+
|
16 |
+
def get_depth_map(image):
|
17 |
+
image = feature_extractor(images=image, return_tensors="pt").pixel_values.to("cuda")
|
18 |
+
with torch.no_grad(), torch.autocast("cuda"):
|
19 |
+
depth_map = depth_estimator(image).predicted_depth
|
20 |
+
|
21 |
+
depth_map = torch.nn.functional.interpolate(
|
22 |
+
depth_map.unsqueeze(1),
|
23 |
+
size=(1024, 1024),
|
24 |
+
mode="bicubic",
|
25 |
+
align_corners=False,
|
26 |
+
)
|
27 |
+
depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
|
28 |
+
depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
|
29 |
+
depth_map = (depth_map - depth_min) / (depth_max - depth_min)
|
30 |
+
image = torch.cat([depth_map] * 3, dim=1)
|
31 |
+
|
32 |
+
image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
|
33 |
+
image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
|
34 |
+
return image
|
35 |
+
|
36 |
+
def get_canny_image(image, t1=100, t2=200):
|
37 |
+
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
38 |
+
edges = cv2.Canny(image, t1, t2)
|
39 |
+
return Image.fromarray(edges, "L")
|
gradio_demo/demo.py
ADDED
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('./')
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
|
14 |
+
import PIL
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
import diffusers
|
18 |
+
from diffusers.utils import load_image
|
19 |
+
from diffusers.models import ControlNetModel
|
20 |
+
from diffusers import LCMScheduler
|
21 |
+
|
22 |
+
from huggingface_hub import hf_hub_download
|
23 |
+
|
24 |
+
import insightface
|
25 |
+
from insightface.app import FaceAnalysis
|
26 |
+
|
27 |
+
from style_template import styles
|
28 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
29 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
30 |
+
|
31 |
+
from cv2 import imencode
|
32 |
+
import base64
|
33 |
+
|
34 |
+
# def encode_pil_to_base64_new(pil_image):
|
35 |
+
# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
36 |
+
# image_arr = np.asarray(pil_image)[:,:,::-1]
|
37 |
+
# _, byte_data = imencode('.png', image_arr)
|
38 |
+
# base64_data = base64.b64encode(byte_data)
|
39 |
+
# base64_string_opencv = base64_data.decode("utf-8")
|
40 |
+
# return "data:image/png;base64," + base64_string_opencv
|
41 |
+
|
42 |
+
import gradio as gr
|
43 |
+
|
44 |
+
|
45 |
+
# global variable
|
46 |
+
MAX_SEED = np.iinfo(np.int32).max
|
47 |
+
device = get_torch_device()
|
48 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
49 |
+
STYLE_NAMES = list(styles.keys())
|
50 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
51 |
+
|
52 |
+
# Load face encoder
|
53 |
+
app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
54 |
+
app.prepare(ctx_id=0, det_size=(320, 320))
|
55 |
+
|
56 |
+
# Path to InstantID models
|
57 |
+
face_adapter = f'./checkpoints/ip-adapter.bin'
|
58 |
+
controlnet_path = f'./checkpoints/ControlNetModel'
|
59 |
+
|
60 |
+
# Load pipeline
|
61 |
+
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
62 |
+
|
63 |
+
logo = Image.open("./gradio_demo/logo.png")
|
64 |
+
|
65 |
+
pretrained_model_name_or_path="wangqixun/YamerMIX_v8"
|
66 |
+
|
67 |
+
|
68 |
+
if pretrained_model_name_or_path.endswith(
|
69 |
+
".ckpt"
|
70 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
71 |
+
scheduler_kwargs = hf_hub_download(
|
72 |
+
repo_id="wangqixun/YamerMIX_v8",
|
73 |
+
subfolder="scheduler",
|
74 |
+
filename="scheduler_config.json",
|
75 |
+
)
|
76 |
+
|
77 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
78 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
79 |
+
scheduler_name=None,
|
80 |
+
weight_dtype=dtype,
|
81 |
+
)
|
82 |
+
|
83 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
84 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
85 |
+
vae=vae,
|
86 |
+
text_encoder=text_encoders[0],
|
87 |
+
text_encoder_2=text_encoders[1],
|
88 |
+
tokenizer=tokenizers[0],
|
89 |
+
tokenizer_2=tokenizers[1],
|
90 |
+
unet=unet,
|
91 |
+
scheduler=scheduler,
|
92 |
+
controlnet=controlnet,
|
93 |
+
).to(device)
|
94 |
+
|
95 |
+
else:
|
96 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
97 |
+
pretrained_model_name_or_path,
|
98 |
+
controlnet=controlnet,
|
99 |
+
torch_dtype=dtype,
|
100 |
+
safety_checker=None,
|
101 |
+
feature_extractor=None,
|
102 |
+
).to(device)
|
103 |
+
|
104 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
105 |
+
|
106 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
107 |
+
# load and disable LCM
|
108 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
109 |
+
pipe.disable_lora()
|
110 |
+
|
111 |
+
# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
112 |
+
def remove_tips():
|
113 |
+
print("GG")
|
114 |
+
return gr.update(visible=False)
|
115 |
+
|
116 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
117 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
118 |
+
|
119 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
120 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
121 |
+
|
122 |
+
def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
123 |
+
# if email != "":
|
124 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
125 |
+
return generate_image(face_file, p[0], n)
|
126 |
+
# else:
|
127 |
+
# raise gr.Error("Email ID is compulsory")
|
128 |
+
def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
129 |
+
# if email != "":
|
130 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
131 |
+
return generate_image(face_file, p[1], n)
|
132 |
+
|
133 |
+
def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
134 |
+
# if email != "":
|
135 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
136 |
+
return generate_image(face_file, p[2], n)
|
137 |
+
|
138 |
+
def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
139 |
+
# if email != "":
|
140 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
141 |
+
return generate_image(face_file, p[3], n)
|
142 |
+
|
143 |
+
|
144 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
145 |
+
stickwidth = 4
|
146 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
147 |
+
kps = np.array(kps)
|
148 |
+
|
149 |
+
w, h = image_pil.size
|
150 |
+
out_img = np.zeros([h, w, 3])
|
151 |
+
|
152 |
+
for i in range(len(limbSeq)):
|
153 |
+
index = limbSeq[i]
|
154 |
+
color = color_list[index[0]]
|
155 |
+
|
156 |
+
x = kps[index][:, 0]
|
157 |
+
y = kps[index][:, 1]
|
158 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
159 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
160 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
161 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
162 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
163 |
+
|
164 |
+
for idx_kp, kp in enumerate(kps):
|
165 |
+
color = color_list[idx_kp]
|
166 |
+
x, y = kp
|
167 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
168 |
+
|
169 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
170 |
+
return out_img_pil
|
171 |
+
|
172 |
+
def resize_img(input_image, max_side=640, min_side=640, size=None,
|
173 |
+
pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
174 |
+
|
175 |
+
w, h = input_image.size
|
176 |
+
print(w)
|
177 |
+
print(h)
|
178 |
+
if size is not None:
|
179 |
+
w_resize_new, h_resize_new = size
|
180 |
+
else:
|
181 |
+
ratio = min_side / min(h, w)
|
182 |
+
w, h = round(ratio*w), round(ratio*h)
|
183 |
+
ratio = max_side / max(h, w)
|
184 |
+
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
185 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
186 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
187 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
188 |
+
|
189 |
+
if pad_to_max_side:
|
190 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
191 |
+
offset_x = (max_side - w_resize_new) // 2
|
192 |
+
offset_y = (max_side - h_resize_new) // 2
|
193 |
+
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
194 |
+
input_image = Image.fromarray(res)
|
195 |
+
return input_image
|
196 |
+
|
197 |
+
|
198 |
+
def generate_image(face_image,prompt,negative_prompt):
|
199 |
+
pose_image_path = None
|
200 |
+
# prompt = "superman"
|
201 |
+
enable_LCM = False
|
202 |
+
identitynet_strength_ratio = 0.95
|
203 |
+
adapter_strength_ratio = 0.60
|
204 |
+
num_steps = 15
|
205 |
+
guidance_scale = 8.5
|
206 |
+
seed = random.randint(0, MAX_SEED)
|
207 |
+
# negative_prompt = ""
|
208 |
+
# negative_prompt += neg
|
209 |
+
enhance_face_region = True
|
210 |
+
if enable_LCM:
|
211 |
+
pipe.enable_lora()
|
212 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
213 |
+
else:
|
214 |
+
pipe.disable_lora()
|
215 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
216 |
+
|
217 |
+
if face_image is None:
|
218 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
219 |
+
|
220 |
+
# if prompt is None:
|
221 |
+
# prompt = "a person"
|
222 |
+
|
223 |
+
# apply the style template
|
224 |
+
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
225 |
+
|
226 |
+
# face_image = load_image(face_image_path)
|
227 |
+
face_image = resize_img(face_image)
|
228 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
229 |
+
height, width, _ = face_image_cv2.shape
|
230 |
+
|
231 |
+
# Extract face features
|
232 |
+
face_info = app.get(face_image_cv2)
|
233 |
+
|
234 |
+
if len(face_info) == 0:
|
235 |
+
raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
236 |
+
|
237 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
238 |
+
face_emb = face_info['embedding']
|
239 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
240 |
+
|
241 |
+
if pose_image_path is not None:
|
242 |
+
pose_image = load_image(pose_image_path)
|
243 |
+
pose_image = resize_img(pose_image)
|
244 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
245 |
+
|
246 |
+
face_info = app.get(pose_image_cv2)
|
247 |
+
|
248 |
+
if len(face_info) == 0:
|
249 |
+
raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
250 |
+
|
251 |
+
face_info = face_info[-1]
|
252 |
+
face_kps = draw_kps(pose_image, face_info['kps'])
|
253 |
+
|
254 |
+
width, height = face_kps.size
|
255 |
+
|
256 |
+
if enhance_face_region:
|
257 |
+
control_mask = np.zeros([height, width, 3])
|
258 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
259 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
260 |
+
control_mask[y1:y2, x1:x2] = 255
|
261 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
262 |
+
else:
|
263 |
+
control_mask = None
|
264 |
+
|
265 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
266 |
+
|
267 |
+
print("Start inference...")
|
268 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
269 |
+
|
270 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
271 |
+
images = pipe(
|
272 |
+
prompt=prompt,
|
273 |
+
negative_prompt=negative_prompt,
|
274 |
+
image_embeds=face_emb,
|
275 |
+
image=face_kps,
|
276 |
+
control_mask=control_mask,
|
277 |
+
controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
278 |
+
num_inference_steps=num_steps,
|
279 |
+
guidance_scale=guidance_scale,
|
280 |
+
height=height,
|
281 |
+
width=width,
|
282 |
+
generator=generator,
|
283 |
+
# num_images_per_prompt = 4
|
284 |
+
).images
|
285 |
+
|
286 |
+
return images[0]
|
287 |
+
|
288 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
289 |
+
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
### Description
|
294 |
+
title = r"""
|
295 |
+
<h1 align="center">Choose your AVATAR</h1>
|
296 |
+
"""
|
297 |
+
|
298 |
+
description = r"""
|
299 |
+
<h2> Powered by IDfy </h2>"""
|
300 |
+
|
301 |
+
article = r""""""
|
302 |
+
|
303 |
+
tips = r""""""
|
304 |
+
|
305 |
+
js = ''' '''
|
306 |
+
|
307 |
+
css = '''
|
308 |
+
.gradio-container {width: 95% !important; background-color: #E6F3FF;}
|
309 |
+
.image-gallery {height: 100vh !important; overflow: auto;}
|
310 |
+
.gradio-row .gradio-element { margin: 0 !important; }
|
311 |
+
'''
|
312 |
+
|
313 |
+
|
314 |
+
with gr.Blocks(css=css, js=js) as demo:
|
315 |
+
|
316 |
+
# description
|
317 |
+
gr.Markdown(title)
|
318 |
+
with gr.Row():
|
319 |
+
gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
320 |
+
gr.Markdown(description)
|
321 |
+
with gr.Row():
|
322 |
+
with gr.Column():
|
323 |
+
style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
324 |
+
face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
|
325 |
+
submit = gr.Button("Submit", variant="primary")
|
326 |
+
with gr.Column():
|
327 |
+
with gr.Row():
|
328 |
+
gallery1 = gr.Image(label="Generated Images")
|
329 |
+
gallery2 = gr.Image(label="Generated Images")
|
330 |
+
with gr.Row():
|
331 |
+
gallery3 = gr.Image(label="Generated Images")
|
332 |
+
gallery4 = gr.Image(label="Generated Images")
|
333 |
+
email = gr.Textbox(label="Email",
|
334 |
+
info="Enter your email address",
|
335 |
+
value="")
|
336 |
+
|
337 |
+
usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
|
338 |
+
|
339 |
+
face_file.upload(
|
340 |
+
fn=remove_tips,
|
341 |
+
outputs=usage_tips,
|
342 |
+
queue=True,
|
343 |
+
api_name=False,
|
344 |
+
show_progress = "full"
|
345 |
+
)
|
346 |
+
|
347 |
+
submit.click(
|
348 |
+
fn=remove_tips,
|
349 |
+
outputs=usage_tips,
|
350 |
+
queue=True,
|
351 |
+
api_name=False,
|
352 |
+
show_progress = "full"
|
353 |
+
).then(
|
354 |
+
fn=run_for_prompts1,
|
355 |
+
inputs=[face_file,style],
|
356 |
+
outputs=[gallery1]
|
357 |
+
)
|
358 |
+
|
359 |
+
|
360 |
+
gr.Markdown(article)
|
361 |
+
|
362 |
+
demo.launch(share=True)
|
363 |
+
|
364 |
+
if __name__ == "__main__":
|
365 |
+
parser = argparse.ArgumentParser()
|
366 |
+
parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
367 |
+
args = parser.parse_args()
|
368 |
+
|
369 |
+
main(args.pretrained_model_name_or_path, False)
|
gradio_demo/download_models.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import hf_hub_download
|
2 |
+
import gdown
|
3 |
+
import os
|
4 |
+
|
5 |
+
# download models
|
6 |
+
hf_hub_download(
|
7 |
+
repo_id="InstantX/InstantID",
|
8 |
+
filename="ControlNetModel/config.json",
|
9 |
+
local_dir="./checkpoints",
|
10 |
+
)
|
11 |
+
hf_hub_download(
|
12 |
+
repo_id="InstantX/InstantID",
|
13 |
+
filename="ControlNetModel/diffusion_pytorch_model.safetensors",
|
14 |
+
local_dir="./checkpoints",
|
15 |
+
)
|
16 |
+
hf_hub_download(
|
17 |
+
repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints"
|
18 |
+
)
|
19 |
+
hf_hub_download(
|
20 |
+
repo_id="latent-consistency/lcm-lora-sdxl",
|
21 |
+
filename="pytorch_lora_weights.safetensors",
|
22 |
+
local_dir="./checkpoints",
|
23 |
+
)
|
24 |
+
# download antelopev2
|
25 |
+
gdown.download(url="https://drive.google.com/file/d/18wEUfMNohBJ4K3Ly5wpTejPfDzp-8fI8/view?usp=sharing", output="./models/", quiet=False, fuzzy=True)
|
26 |
+
# unzip antelopev2.zip
|
27 |
+
os.system("unzip ./models/antelopev2.zip -d ./models/")
|
gradio_demo/logo.png
ADDED
gradio_demo/logo1.png
ADDED
gradio_demo/model_util.py
ADDED
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Literal, Union, Optional, Tuple, List
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from transformers import CLIPTextModel, CLIPTokenizer, CLIPTextModelWithProjection
|
5 |
+
from diffusers import (
|
6 |
+
UNet2DConditionModel,
|
7 |
+
SchedulerMixin,
|
8 |
+
StableDiffusionPipeline,
|
9 |
+
StableDiffusionXLPipeline,
|
10 |
+
AutoencoderKL,
|
11 |
+
)
|
12 |
+
from diffusers.pipelines.stable_diffusion.convert_from_ckpt import (
|
13 |
+
convert_ldm_unet_checkpoint,
|
14 |
+
)
|
15 |
+
from safetensors.torch import load_file
|
16 |
+
from diffusers.schedulers import (
|
17 |
+
DDIMScheduler,
|
18 |
+
DDPMScheduler,
|
19 |
+
LMSDiscreteScheduler,
|
20 |
+
EulerDiscreteScheduler,
|
21 |
+
EulerAncestralDiscreteScheduler,
|
22 |
+
UniPCMultistepScheduler,
|
23 |
+
)
|
24 |
+
|
25 |
+
from omegaconf import OmegaConf
|
26 |
+
|
27 |
+
# DiffUsers版StableDiffusionのモデルパラメータ
|
28 |
+
NUM_TRAIN_TIMESTEPS = 1000
|
29 |
+
BETA_START = 0.00085
|
30 |
+
BETA_END = 0.0120
|
31 |
+
|
32 |
+
UNET_PARAMS_MODEL_CHANNELS = 320
|
33 |
+
UNET_PARAMS_CHANNEL_MULT = [1, 2, 4, 4]
|
34 |
+
UNET_PARAMS_ATTENTION_RESOLUTIONS = [4, 2, 1]
|
35 |
+
UNET_PARAMS_IMAGE_SIZE = 64 # fixed from old invalid value `32`
|
36 |
+
UNET_PARAMS_IN_CHANNELS = 4
|
37 |
+
UNET_PARAMS_OUT_CHANNELS = 4
|
38 |
+
UNET_PARAMS_NUM_RES_BLOCKS = 2
|
39 |
+
UNET_PARAMS_CONTEXT_DIM = 768
|
40 |
+
UNET_PARAMS_NUM_HEADS = 8
|
41 |
+
# UNET_PARAMS_USE_LINEAR_PROJECTION = False
|
42 |
+
|
43 |
+
VAE_PARAMS_Z_CHANNELS = 4
|
44 |
+
VAE_PARAMS_RESOLUTION = 256
|
45 |
+
VAE_PARAMS_IN_CHANNELS = 3
|
46 |
+
VAE_PARAMS_OUT_CH = 3
|
47 |
+
VAE_PARAMS_CH = 128
|
48 |
+
VAE_PARAMS_CH_MULT = [1, 2, 4, 4]
|
49 |
+
VAE_PARAMS_NUM_RES_BLOCKS = 2
|
50 |
+
|
51 |
+
# V2
|
52 |
+
V2_UNET_PARAMS_ATTENTION_HEAD_DIM = [5, 10, 20, 20]
|
53 |
+
V2_UNET_PARAMS_CONTEXT_DIM = 1024
|
54 |
+
# V2_UNET_PARAMS_USE_LINEAR_PROJECTION = True
|
55 |
+
|
56 |
+
TOKENIZER_V1_MODEL_NAME = "CompVis/stable-diffusion-v1-4"
|
57 |
+
TOKENIZER_V2_MODEL_NAME = "stabilityai/stable-diffusion-2-1"
|
58 |
+
|
59 |
+
AVAILABLE_SCHEDULERS = Literal["ddim", "ddpm", "lms", "euler_a", "euler", "uniPC"]
|
60 |
+
|
61 |
+
SDXL_TEXT_ENCODER_TYPE = Union[CLIPTextModel, CLIPTextModelWithProjection]
|
62 |
+
|
63 |
+
DIFFUSERS_CACHE_DIR = None # if you want to change the cache dir, change this
|
64 |
+
|
65 |
+
|
66 |
+
def load_checkpoint_with_text_encoder_conversion(ckpt_path: str, device="cpu"):
|
67 |
+
# text encoderの格納形式が違うモデルに対応する ('text_model'がない)
|
68 |
+
TEXT_ENCODER_KEY_REPLACEMENTS = [
|
69 |
+
(
|
70 |
+
"cond_stage_model.transformer.embeddings.",
|
71 |
+
"cond_stage_model.transformer.text_model.embeddings.",
|
72 |
+
),
|
73 |
+
(
|
74 |
+
"cond_stage_model.transformer.encoder.",
|
75 |
+
"cond_stage_model.transformer.text_model.encoder.",
|
76 |
+
),
|
77 |
+
(
|
78 |
+
"cond_stage_model.transformer.final_layer_norm.",
|
79 |
+
"cond_stage_model.transformer.text_model.final_layer_norm.",
|
80 |
+
),
|
81 |
+
]
|
82 |
+
|
83 |
+
if ckpt_path.endswith(".safetensors"):
|
84 |
+
checkpoint = None
|
85 |
+
state_dict = load_file(ckpt_path) # , device) # may causes error
|
86 |
+
else:
|
87 |
+
checkpoint = torch.load(ckpt_path, map_location=device)
|
88 |
+
if "state_dict" in checkpoint:
|
89 |
+
state_dict = checkpoint["state_dict"]
|
90 |
+
else:
|
91 |
+
state_dict = checkpoint
|
92 |
+
checkpoint = None
|
93 |
+
|
94 |
+
key_reps = []
|
95 |
+
for rep_from, rep_to in TEXT_ENCODER_KEY_REPLACEMENTS:
|
96 |
+
for key in state_dict.keys():
|
97 |
+
if key.startswith(rep_from):
|
98 |
+
new_key = rep_to + key[len(rep_from) :]
|
99 |
+
key_reps.append((key, new_key))
|
100 |
+
|
101 |
+
for key, new_key in key_reps:
|
102 |
+
state_dict[new_key] = state_dict[key]
|
103 |
+
del state_dict[key]
|
104 |
+
|
105 |
+
return checkpoint, state_dict
|
106 |
+
|
107 |
+
|
108 |
+
def create_unet_diffusers_config(v2, use_linear_projection_in_v2=False):
|
109 |
+
"""
|
110 |
+
Creates a config for the diffusers based on the config of the LDM model.
|
111 |
+
"""
|
112 |
+
# unet_params = original_config.model.params.unet_config.params
|
113 |
+
|
114 |
+
block_out_channels = [
|
115 |
+
UNET_PARAMS_MODEL_CHANNELS * mult for mult in UNET_PARAMS_CHANNEL_MULT
|
116 |
+
]
|
117 |
+
|
118 |
+
down_block_types = []
|
119 |
+
resolution = 1
|
120 |
+
for i in range(len(block_out_channels)):
|
121 |
+
block_type = (
|
122 |
+
"CrossAttnDownBlock2D"
|
123 |
+
if resolution in UNET_PARAMS_ATTENTION_RESOLUTIONS
|
124 |
+
else "DownBlock2D"
|
125 |
+
)
|
126 |
+
down_block_types.append(block_type)
|
127 |
+
if i != len(block_out_channels) - 1:
|
128 |
+
resolution *= 2
|
129 |
+
|
130 |
+
up_block_types = []
|
131 |
+
for i in range(len(block_out_channels)):
|
132 |
+
block_type = (
|
133 |
+
"CrossAttnUpBlock2D"
|
134 |
+
if resolution in UNET_PARAMS_ATTENTION_RESOLUTIONS
|
135 |
+
else "UpBlock2D"
|
136 |
+
)
|
137 |
+
up_block_types.append(block_type)
|
138 |
+
resolution //= 2
|
139 |
+
|
140 |
+
config = dict(
|
141 |
+
sample_size=UNET_PARAMS_IMAGE_SIZE,
|
142 |
+
in_channels=UNET_PARAMS_IN_CHANNELS,
|
143 |
+
out_channels=UNET_PARAMS_OUT_CHANNELS,
|
144 |
+
down_block_types=tuple(down_block_types),
|
145 |
+
up_block_types=tuple(up_block_types),
|
146 |
+
block_out_channels=tuple(block_out_channels),
|
147 |
+
layers_per_block=UNET_PARAMS_NUM_RES_BLOCKS,
|
148 |
+
cross_attention_dim=UNET_PARAMS_CONTEXT_DIM
|
149 |
+
if not v2
|
150 |
+
else V2_UNET_PARAMS_CONTEXT_DIM,
|
151 |
+
attention_head_dim=UNET_PARAMS_NUM_HEADS
|
152 |
+
if not v2
|
153 |
+
else V2_UNET_PARAMS_ATTENTION_HEAD_DIM,
|
154 |
+
# use_linear_projection=UNET_PARAMS_USE_LINEAR_PROJECTION if not v2 else V2_UNET_PARAMS_USE_LINEAR_PROJECTION,
|
155 |
+
)
|
156 |
+
if v2 and use_linear_projection_in_v2:
|
157 |
+
config["use_linear_projection"] = True
|
158 |
+
|
159 |
+
return config
|
160 |
+
|
161 |
+
|
162 |
+
def load_diffusers_model(
|
163 |
+
pretrained_model_name_or_path: str,
|
164 |
+
v2: bool = False,
|
165 |
+
clip_skip: Optional[int] = None,
|
166 |
+
weight_dtype: torch.dtype = torch.float32,
|
167 |
+
) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel,]:
|
168 |
+
if v2:
|
169 |
+
tokenizer = CLIPTokenizer.from_pretrained(
|
170 |
+
TOKENIZER_V2_MODEL_NAME,
|
171 |
+
subfolder="tokenizer",
|
172 |
+
torch_dtype=weight_dtype,
|
173 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
174 |
+
)
|
175 |
+
text_encoder = CLIPTextModel.from_pretrained(
|
176 |
+
pretrained_model_name_or_path,
|
177 |
+
subfolder="text_encoder",
|
178 |
+
# default is clip skip 2
|
179 |
+
num_hidden_layers=24 - (clip_skip - 1) if clip_skip is not None else 23,
|
180 |
+
torch_dtype=weight_dtype,
|
181 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
182 |
+
)
|
183 |
+
else:
|
184 |
+
tokenizer = CLIPTokenizer.from_pretrained(
|
185 |
+
TOKENIZER_V1_MODEL_NAME,
|
186 |
+
subfolder="tokenizer",
|
187 |
+
torch_dtype=weight_dtype,
|
188 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
189 |
+
)
|
190 |
+
text_encoder = CLIPTextModel.from_pretrained(
|
191 |
+
pretrained_model_name_or_path,
|
192 |
+
subfolder="text_encoder",
|
193 |
+
num_hidden_layers=12 - (clip_skip - 1) if clip_skip is not None else 12,
|
194 |
+
torch_dtype=weight_dtype,
|
195 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
196 |
+
)
|
197 |
+
|
198 |
+
unet = UNet2DConditionModel.from_pretrained(
|
199 |
+
pretrained_model_name_or_path,
|
200 |
+
subfolder="unet",
|
201 |
+
torch_dtype=weight_dtype,
|
202 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
203 |
+
)
|
204 |
+
|
205 |
+
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae")
|
206 |
+
|
207 |
+
return tokenizer, text_encoder, unet, vae
|
208 |
+
|
209 |
+
|
210 |
+
def load_checkpoint_model(
|
211 |
+
checkpoint_path: str,
|
212 |
+
v2: bool = False,
|
213 |
+
clip_skip: Optional[int] = None,
|
214 |
+
weight_dtype: torch.dtype = torch.float32,
|
215 |
+
) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel,]:
|
216 |
+
pipe = StableDiffusionPipeline.from_single_file(
|
217 |
+
checkpoint_path,
|
218 |
+
upcast_attention=True if v2 else False,
|
219 |
+
torch_dtype=weight_dtype,
|
220 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
221 |
+
)
|
222 |
+
|
223 |
+
_, state_dict = load_checkpoint_with_text_encoder_conversion(checkpoint_path)
|
224 |
+
unet_config = create_unet_diffusers_config(v2, use_linear_projection_in_v2=v2)
|
225 |
+
unet_config["class_embed_type"] = None
|
226 |
+
unet_config["addition_embed_type"] = None
|
227 |
+
converted_unet_checkpoint = convert_ldm_unet_checkpoint(state_dict, unet_config)
|
228 |
+
unet = UNet2DConditionModel(**unet_config)
|
229 |
+
unet.load_state_dict(converted_unet_checkpoint)
|
230 |
+
|
231 |
+
tokenizer = pipe.tokenizer
|
232 |
+
text_encoder = pipe.text_encoder
|
233 |
+
vae = pipe.vae
|
234 |
+
if clip_skip is not None:
|
235 |
+
if v2:
|
236 |
+
text_encoder.config.num_hidden_layers = 24 - (clip_skip - 1)
|
237 |
+
else:
|
238 |
+
text_encoder.config.num_hidden_layers = 12 - (clip_skip - 1)
|
239 |
+
|
240 |
+
del pipe
|
241 |
+
|
242 |
+
return tokenizer, text_encoder, unet, vae
|
243 |
+
|
244 |
+
|
245 |
+
def load_models(
|
246 |
+
pretrained_model_name_or_path: str,
|
247 |
+
scheduler_name: str,
|
248 |
+
v2: bool = False,
|
249 |
+
v_pred: bool = False,
|
250 |
+
weight_dtype: torch.dtype = torch.float32,
|
251 |
+
) -> Tuple[CLIPTokenizer, CLIPTextModel, UNet2DConditionModel, SchedulerMixin,]:
|
252 |
+
if pretrained_model_name_or_path.endswith(
|
253 |
+
".ckpt"
|
254 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
255 |
+
tokenizer, text_encoder, unet, vae = load_checkpoint_model(
|
256 |
+
pretrained_model_name_or_path, v2=v2, weight_dtype=weight_dtype
|
257 |
+
)
|
258 |
+
else: # diffusers
|
259 |
+
tokenizer, text_encoder, unet, vae = load_diffusers_model(
|
260 |
+
pretrained_model_name_or_path, v2=v2, weight_dtype=weight_dtype
|
261 |
+
)
|
262 |
+
|
263 |
+
if scheduler_name:
|
264 |
+
scheduler = create_noise_scheduler(
|
265 |
+
scheduler_name,
|
266 |
+
prediction_type="v_prediction" if v_pred else "epsilon",
|
267 |
+
)
|
268 |
+
else:
|
269 |
+
scheduler = None
|
270 |
+
|
271 |
+
return tokenizer, text_encoder, unet, scheduler, vae
|
272 |
+
|
273 |
+
|
274 |
+
def load_diffusers_model_xl(
|
275 |
+
pretrained_model_name_or_path: str,
|
276 |
+
weight_dtype: torch.dtype = torch.float32,
|
277 |
+
) -> Tuple[List[CLIPTokenizer], List[SDXL_TEXT_ENCODER_TYPE], UNet2DConditionModel,]:
|
278 |
+
# returns tokenizer, tokenizer_2, text_encoder, text_encoder_2, unet
|
279 |
+
|
280 |
+
tokenizers = [
|
281 |
+
CLIPTokenizer.from_pretrained(
|
282 |
+
pretrained_model_name_or_path,
|
283 |
+
subfolder="tokenizer",
|
284 |
+
torch_dtype=weight_dtype,
|
285 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
286 |
+
),
|
287 |
+
CLIPTokenizer.from_pretrained(
|
288 |
+
pretrained_model_name_or_path,
|
289 |
+
subfolder="tokenizer_2",
|
290 |
+
torch_dtype=weight_dtype,
|
291 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
292 |
+
pad_token_id=0, # same as open clip
|
293 |
+
),
|
294 |
+
]
|
295 |
+
|
296 |
+
text_encoders = [
|
297 |
+
CLIPTextModel.from_pretrained(
|
298 |
+
pretrained_model_name_or_path,
|
299 |
+
subfolder="text_encoder",
|
300 |
+
torch_dtype=weight_dtype,
|
301 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
302 |
+
),
|
303 |
+
CLIPTextModelWithProjection.from_pretrained(
|
304 |
+
pretrained_model_name_or_path,
|
305 |
+
subfolder="text_encoder_2",
|
306 |
+
torch_dtype=weight_dtype,
|
307 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
308 |
+
),
|
309 |
+
]
|
310 |
+
|
311 |
+
unet = UNet2DConditionModel.from_pretrained(
|
312 |
+
pretrained_model_name_or_path,
|
313 |
+
subfolder="unet",
|
314 |
+
torch_dtype=weight_dtype,
|
315 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
316 |
+
)
|
317 |
+
vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae")
|
318 |
+
return tokenizers, text_encoders, unet, vae
|
319 |
+
|
320 |
+
|
321 |
+
def load_checkpoint_model_xl(
|
322 |
+
checkpoint_path: str,
|
323 |
+
weight_dtype: torch.dtype = torch.float32,
|
324 |
+
) -> Tuple[List[CLIPTokenizer], List[SDXL_TEXT_ENCODER_TYPE], UNet2DConditionModel,]:
|
325 |
+
pipe = StableDiffusionXLPipeline.from_single_file(
|
326 |
+
checkpoint_path,
|
327 |
+
torch_dtype=weight_dtype,
|
328 |
+
cache_dir=DIFFUSERS_CACHE_DIR,
|
329 |
+
)
|
330 |
+
|
331 |
+
unet = pipe.unet
|
332 |
+
vae = pipe.vae
|
333 |
+
tokenizers = [pipe.tokenizer, pipe.tokenizer_2]
|
334 |
+
text_encoders = [pipe.text_encoder, pipe.text_encoder_2]
|
335 |
+
if len(text_encoders) == 2:
|
336 |
+
text_encoders[1].pad_token_id = 0
|
337 |
+
|
338 |
+
del pipe
|
339 |
+
|
340 |
+
return tokenizers, text_encoders, unet, vae
|
341 |
+
|
342 |
+
|
343 |
+
def load_models_xl(
|
344 |
+
pretrained_model_name_or_path: str,
|
345 |
+
scheduler_name: str,
|
346 |
+
weight_dtype: torch.dtype = torch.float32,
|
347 |
+
noise_scheduler_kwargs=None,
|
348 |
+
) -> Tuple[
|
349 |
+
List[CLIPTokenizer],
|
350 |
+
List[SDXL_TEXT_ENCODER_TYPE],
|
351 |
+
UNet2DConditionModel,
|
352 |
+
SchedulerMixin,
|
353 |
+
]:
|
354 |
+
if pretrained_model_name_or_path.endswith(
|
355 |
+
".ckpt"
|
356 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
357 |
+
(tokenizers, text_encoders, unet, vae) = load_checkpoint_model_xl(
|
358 |
+
pretrained_model_name_or_path, weight_dtype
|
359 |
+
)
|
360 |
+
else: # diffusers
|
361 |
+
(tokenizers, text_encoders, unet, vae) = load_diffusers_model_xl(
|
362 |
+
pretrained_model_name_or_path, weight_dtype
|
363 |
+
)
|
364 |
+
if scheduler_name:
|
365 |
+
scheduler = create_noise_scheduler(scheduler_name, noise_scheduler_kwargs)
|
366 |
+
else:
|
367 |
+
scheduler = None
|
368 |
+
|
369 |
+
return tokenizers, text_encoders, unet, scheduler, vae
|
370 |
+
|
371 |
+
def create_noise_scheduler(
|
372 |
+
scheduler_name: AVAILABLE_SCHEDULERS = "ddpm",
|
373 |
+
noise_scheduler_kwargs=None,
|
374 |
+
prediction_type: Literal["epsilon", "v_prediction"] = "epsilon",
|
375 |
+
) -> SchedulerMixin:
|
376 |
+
name = scheduler_name.lower().replace(" ", "_")
|
377 |
+
if name.lower() == "ddim":
|
378 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/ddim
|
379 |
+
scheduler = DDIMScheduler(**OmegaConf.to_container(noise_scheduler_kwargs))
|
380 |
+
elif name.lower() == "ddpm":
|
381 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/ddpm
|
382 |
+
scheduler = DDPMScheduler(**OmegaConf.to_container(noise_scheduler_kwargs))
|
383 |
+
elif name.lower() == "lms":
|
384 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/lms_discrete
|
385 |
+
scheduler = LMSDiscreteScheduler(
|
386 |
+
**OmegaConf.to_container(noise_scheduler_kwargs)
|
387 |
+
)
|
388 |
+
elif name.lower() == "euler_a":
|
389 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/euler_ancestral
|
390 |
+
scheduler = EulerAncestralDiscreteScheduler(
|
391 |
+
**OmegaConf.to_container(noise_scheduler_kwargs)
|
392 |
+
)
|
393 |
+
elif name.lower() == "euler":
|
394 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/euler_ancestral
|
395 |
+
scheduler = EulerDiscreteScheduler(
|
396 |
+
**OmegaConf.to_container(noise_scheduler_kwargs)
|
397 |
+
)
|
398 |
+
elif name.lower() == "unipc":
|
399 |
+
# https://huggingface.co/docs/diffusers/v0.17.1/en/api/schedulers/unipc
|
400 |
+
scheduler = UniPCMultistepScheduler(
|
401 |
+
**OmegaConf.to_container(noise_scheduler_kwargs)
|
402 |
+
)
|
403 |
+
else:
|
404 |
+
raise ValueError(f"Unknown scheduler name: {name}")
|
405 |
+
|
406 |
+
return scheduler
|
407 |
+
|
408 |
+
|
409 |
+
def torch_gc():
|
410 |
+
import gc
|
411 |
+
|
412 |
+
gc.collect()
|
413 |
+
if torch.cuda.is_available():
|
414 |
+
with torch.cuda.device("cuda"):
|
415 |
+
torch.cuda.empty_cache()
|
416 |
+
torch.cuda.ipc_collect()
|
417 |
+
|
418 |
+
|
419 |
+
from enum import Enum
|
420 |
+
|
421 |
+
|
422 |
+
class CPUState(Enum):
|
423 |
+
GPU = 0
|
424 |
+
CPU = 1
|
425 |
+
MPS = 2
|
426 |
+
|
427 |
+
|
428 |
+
cpu_state = CPUState.GPU
|
429 |
+
xpu_available = False
|
430 |
+
directml_enabled = False
|
431 |
+
|
432 |
+
|
433 |
+
def is_intel_xpu():
|
434 |
+
global cpu_state
|
435 |
+
global xpu_available
|
436 |
+
if cpu_state == CPUState.GPU:
|
437 |
+
if xpu_available:
|
438 |
+
return True
|
439 |
+
return False
|
440 |
+
|
441 |
+
|
442 |
+
try:
|
443 |
+
import intel_extension_for_pytorch as ipex
|
444 |
+
|
445 |
+
if torch.xpu.is_available():
|
446 |
+
xpu_available = True
|
447 |
+
except:
|
448 |
+
pass
|
449 |
+
|
450 |
+
try:
|
451 |
+
if torch.backends.mps.is_available():
|
452 |
+
cpu_state = CPUState.MPS
|
453 |
+
import torch.mps
|
454 |
+
except:
|
455 |
+
pass
|
456 |
+
|
457 |
+
|
458 |
+
def get_torch_device():
|
459 |
+
global directml_enabled
|
460 |
+
global cpu_state
|
461 |
+
if directml_enabled:
|
462 |
+
global directml_device
|
463 |
+
return directml_device
|
464 |
+
if cpu_state == CPUState.MPS:
|
465 |
+
return torch.device("mps")
|
466 |
+
if cpu_state == CPUState.CPU:
|
467 |
+
return torch.device("cpu")
|
468 |
+
else:
|
469 |
+
if is_intel_xpu():
|
470 |
+
return torch.device("xpu")
|
471 |
+
else:
|
472 |
+
return torch.device(torch.cuda.current_device())
|
gradio_demo/preprocess.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import csv
|
4 |
+
import gc
|
5 |
+
import glob
|
6 |
+
from datetime import datetime
|
7 |
+
import time
|
8 |
+
from pathlib import Path
|
9 |
+
from style_template import style_list
|
10 |
+
from PIL import Image, ImageOps
|
11 |
+
|
12 |
+
# Default Configuration variables
|
13 |
+
INPUT_FOLDER_NAME = 'examples'
|
14 |
+
OUTPUT_FOLDER_NAME = 'generated_images'
|
15 |
+
LOG_FILENAME = 'generation_log.csv'
|
16 |
+
logfile_path = os.path.join(os.getcwd(), LOG_FILENAME)
|
17 |
+
|
18 |
+
PROMPT = "human, sharp focus"
|
19 |
+
NEGATIVE_PROMPT = "(blurry, blur, text, abstract, glitch, lowres, low quality, worst quality:1.2), (text:1.2), watermark, painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured"
|
20 |
+
IDENTITYNET_STRENGTH_RATIO_RANGE = (1.0, 1.5)
|
21 |
+
ADAPTER_STRENGTH_RATIO_RANGE = (0.7, 1.0)
|
22 |
+
NUM_INFERENCE_STEPS_RANGE = (40, 60)
|
23 |
+
GUIDANCE_SCALE_RANGE = (7.0, 12.0)
|
24 |
+
MAX_SIDE = 1280
|
25 |
+
MIN_SIDE = 1024
|
26 |
+
NUMBER_OF_LOOPS = 1
|
27 |
+
|
28 |
+
# Dynamically create the STYLES list from imported style_list
|
29 |
+
STYLES = [style["name"] for style in style_list]
|
30 |
+
USE_RANDOM_STYLE = False
|
31 |
+
|
32 |
+
def choose_random_style():
|
33 |
+
return random.choice(STYLES)
|
34 |
+
|
35 |
+
def get_random_image_file(input_folder):
|
36 |
+
valid_extensions = [".jpg", ".jpeg", ".png"]
|
37 |
+
files = [file for file in Path(input_folder).glob("*") if file.suffix.lower() in valid_extensions]
|
38 |
+
if not files:
|
39 |
+
raise FileNotFoundError(f"No images found in directory {input_folder}")
|
40 |
+
return str(random.choice(files))
|
41 |
+
|
42 |
+
def resize_and_pad_image(image_path, max_side, min_side, pad_color=(255, 255, 255)):
|
43 |
+
# Open an image using PIL
|
44 |
+
image = Image.open(image_path)
|
45 |
+
|
46 |
+
# Calculate the scale and new size
|
47 |
+
ratio = min(min_side / min(image.size), max_side / max(image.size))
|
48 |
+
new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
|
49 |
+
|
50 |
+
# Resize the image
|
51 |
+
image = image.resize(new_size, Image.BILINEAR)
|
52 |
+
|
53 |
+
# Calculate padding
|
54 |
+
delta_w = max_side - new_size[0]
|
55 |
+
delta_h = max_side - new_size[1]
|
56 |
+
|
57 |
+
# Pad the resized image to make it square
|
58 |
+
padding = (delta_w // 2, delta_h // 2, delta_w - (delta_w // 2), delta_h - (delta_h // 2))
|
59 |
+
image = ImageOps.expand(image, padding, pad_color)
|
60 |
+
|
61 |
+
return image
|
62 |
+
|
63 |
+
def log_to_csv(logfile_path, image_name, new_file_name='Unknown', identitynet_strength_ratio=0.0, adapter_strength_ratio=0.0, num_inference_steps=0, guidance_scale=0.0, seed=0, success=True, error_message='', style_name="", prompt="", negative_prompt="", time_taken=0.0, current_timestamp=""):
|
64 |
+
os.makedirs(os.path.dirname(logfile_path), exist_ok=True)
|
65 |
+
file_exists = os.path.isfile(logfile_path)
|
66 |
+
|
67 |
+
with open(logfile_path, 'a', newline='', encoding='utf-8') as csvfile:
|
68 |
+
fieldnames = ['image_name', 'new_file_name', 'identitynet_strength_ratio', 'adapter_strength_ratio', 'num_inference_steps', 'guidance_scale', 'seed', 'success', 'error_message', 'style_name', 'prompt', 'negative_prompt', 'time_taken', 'current_timestamp']
|
69 |
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
70 |
+
if not file_exists:
|
71 |
+
writer.writeheader()
|
72 |
+
writer.writerow({
|
73 |
+
'image_name': image_name,
|
74 |
+
'new_file_name': new_file_name,
|
75 |
+
'identitynet_strength_ratio': identitynet_strength_ratio,
|
76 |
+
'adapter_strength_ratio': adapter_strength_ratio,
|
77 |
+
'num_inference_steps': num_inference_steps,
|
78 |
+
'guidance_scale': guidance_scale,
|
79 |
+
'seed': seed,
|
80 |
+
'success': success,
|
81 |
+
'error_message': error_message,
|
82 |
+
'style_name': style_name,
|
83 |
+
'prompt': prompt,
|
84 |
+
'negative_prompt': negative_prompt,
|
85 |
+
'time_taken': time_taken,
|
86 |
+
'current_timestamp': current_timestamp
|
87 |
+
})
|
88 |
+
|
89 |
+
def initial_image(generate_image_func):
|
90 |
+
overall_start_time = time.time()
|
91 |
+
total_time_taken = 0.0
|
92 |
+
|
93 |
+
# Initialize a counter for processed images at the beginning of the function
|
94 |
+
processed_images_count = 0
|
95 |
+
|
96 |
+
# List all image files in the `INPUT_FOLDER_NAME`
|
97 |
+
image_files = glob.glob(f'{INPUT_FOLDER_NAME}/*.png') + \
|
98 |
+
glob.glob(f'{INPUT_FOLDER_NAME}/*.jpg') + \
|
99 |
+
glob.glob(f'{INPUT_FOLDER_NAME}/*.jpeg')
|
100 |
+
|
101 |
+
# Check if we found any images
|
102 |
+
if not image_files:
|
103 |
+
raise FileNotFoundError(f"No images found in directory {INPUT_FOLDER_NAME}")
|
104 |
+
|
105 |
+
# Print the count of detected image files
|
106 |
+
print(f"Processing a total of {len(image_files)} image(s) in '{INPUT_FOLDER_NAME}'")
|
107 |
+
|
108 |
+
# Shuffle the image files randomly
|
109 |
+
random.shuffle(image_files)
|
110 |
+
|
111 |
+
total_images = len(image_files) # Get the total number of images to process
|
112 |
+
|
113 |
+
for loop in range(NUMBER_OF_LOOPS):
|
114 |
+
print(f"Starting loop {loop+1} of {NUMBER_OF_LOOPS}")
|
115 |
+
|
116 |
+
for image_number, face_image_path in enumerate(image_files, start=1):
|
117 |
+
loop_start_time = datetime.now()
|
118 |
+
face_image = [face_image_path]
|
119 |
+
basename = os.path.basename(face_image_path)
|
120 |
+
processed_images_count += 1
|
121 |
+
|
122 |
+
# Resize and pad the image before processing
|
123 |
+
processed_image = resize_and_pad_image(
|
124 |
+
image_path=face_image_path,
|
125 |
+
max_side=MAX_SIDE,
|
126 |
+
min_side=MIN_SIDE
|
127 |
+
)
|
128 |
+
|
129 |
+
if USE_RANDOM_STYLE:
|
130 |
+
style_name = choose_random_style()
|
131 |
+
else:
|
132 |
+
style_name = "(No style)"
|
133 |
+
|
134 |
+
identitynet_strength_ratio = random.uniform(*IDENTITYNET_STRENGTH_RATIO_RANGE)
|
135 |
+
adapter_strength_ratio = random.uniform(*ADAPTER_STRENGTH_RATIO_RANGE)
|
136 |
+
num_inference_steps = random.randint(*NUM_INFERENCE_STEPS_RANGE)
|
137 |
+
guidance_scale = random.uniform(*GUIDANCE_SCALE_RANGE)
|
138 |
+
seed = random.randint(0, 2**32 - 1)
|
139 |
+
|
140 |
+
# Print settings for the current image BEFORE processing it
|
141 |
+
print_generation_settings(basename, style_name, identitynet_strength_ratio,
|
142 |
+
adapter_strength_ratio, num_inference_steps, guidance_scale, seed,
|
143 |
+
image_number, total_images)
|
144 |
+
|
145 |
+
# Here, the generate_image_func is supposedly called and image processing happens
|
146 |
+
_, _, generated_file_paths = generate_image_func(
|
147 |
+
face_image=face_image,
|
148 |
+
pose_image=None,
|
149 |
+
prompt=PROMPT,
|
150 |
+
negative_prompt=NEGATIVE_PROMPT,
|
151 |
+
style_name=style_name,
|
152 |
+
enhance_face_region=True,
|
153 |
+
num_steps=num_inference_steps,
|
154 |
+
identitynet_strength_ratio=identitynet_strength_ratio,
|
155 |
+
adapter_strength_ratio=adapter_strength_ratio,
|
156 |
+
guidance_scale=guidance_scale,
|
157 |
+
seed=seed
|
158 |
+
)
|
159 |
+
|
160 |
+
loop_end_time = datetime.now()
|
161 |
+
loop_time_taken = (loop_end_time - loop_start_time).total_seconds()
|
162 |
+
|
163 |
+
# Immediately print the time taken and current time.
|
164 |
+
print(f"Time taken to process image: {loop_time_taken:.2f} seconds")
|
165 |
+
|
166 |
+
# Update the total time taken with this image's processing time
|
167 |
+
total_time_taken += loop_time_taken
|
168 |
+
|
169 |
+
# Calculate the average time taken per image
|
170 |
+
average_time_per_image = total_time_taken / image_number
|
171 |
+
|
172 |
+
current_timestamp = loop_end_time.strftime("%Y-%m-%d %H:%M:%S") # Current time after processing
|
173 |
+
print(f"Current timestamp: {current_timestamp}")
|
174 |
+
|
175 |
+
# Calculate estimated remaining time considering the images left in this loop and the additional loops
|
176 |
+
remaining_images_this_loop = total_images - image_number
|
177 |
+
remaining_images_in_additional_loops = (NUMBER_OF_LOOPS - (loop + 1)) * total_images
|
178 |
+
total_remaining_images = remaining_images_this_loop + remaining_images_in_additional_loops
|
179 |
+
estimated_time_remaining = average_time_per_image * total_remaining_images
|
180 |
+
|
181 |
+
# Display the estimated time remaining including remaining loops
|
182 |
+
print(f"Estimated time remaining (including loops): {estimated_time_remaining // 60:.0f} minutes, {estimated_time_remaining % 60:.0f} seconds")
|
183 |
+
|
184 |
+
# Display the overall average time per image in seconds
|
185 |
+
print(f"Overall average time per image: {average_time_per_image:.2f} seconds")
|
186 |
+
|
187 |
+
# Display the total number of remaining images to process including looping
|
188 |
+
print(f"Total remaining images to process (including loops): {total_remaining_images}")
|
189 |
+
|
190 |
+
|
191 |
+
success = True # Assuming generation was successful.
|
192 |
+
error_message = "" # Assuming no error.
|
193 |
+
|
194 |
+
# Log to CSV after the image generation.
|
195 |
+
for generated_file_path in generated_file_paths:
|
196 |
+
new_file_name = os.path.basename(generated_file_path)
|
197 |
+
log_to_csv(logfile_path, basename, new_file_name, identitynet_strength_ratio,
|
198 |
+
adapter_strength_ratio, num_inference_steps, guidance_scale, seed, success,
|
199 |
+
error_message, style_name, PROMPT, NEGATIVE_PROMPT, loop_time_taken, current_timestamp)
|
200 |
+
|
201 |
+
|
202 |
+
del generated_file_paths # Explicitly delete large variables
|
203 |
+
gc.collect() # Call garbage collection
|
204 |
+
|
205 |
+
|
206 |
+
# At the end of the initial_image() function, add:
|
207 |
+
total_elapsed_time = time.time() - overall_start_time
|
208 |
+
print("\n===FINAL SUMMARY===")
|
209 |
+
print(f"Total loops completed: {NUMBER_OF_LOOPS}")
|
210 |
+
print(f"Total images processed per loop: {len(image_files)}")
|
211 |
+
print(f"Overall total images processed: {NUMBER_OF_LOOPS * len(image_files)}") # Multiplied by the number of loops
|
212 |
+
print(f"Overall total time: {total_elapsed_time / 60:.2f} minutes")
|
213 |
+
|
214 |
+
|
215 |
+
def print_generation_settings(basename, style_name, identitynet_strength_ratio, adapter_strength_ratio, num_inference_steps, guidance_scale, seed, image_number, total_images):
|
216 |
+
print("===IMAGE GENERATION DATA SUMMARY===")
|
217 |
+
# Print settings for the current image
|
218 |
+
print(f"- Image {image_number} of {total_images}\n"
|
219 |
+
f"- Filename: {basename}\n"
|
220 |
+
f"- Style: {style_name}\n"
|
221 |
+
f"- IdentityNet strength ratio: {identitynet_strength_ratio:0.2f}\n"
|
222 |
+
f"- Adapter strength ratio: {adapter_strength_ratio:0.2f}\n"
|
223 |
+
f"- Number of inference steps: {num_inference_steps}\n"
|
224 |
+
f"- Guidance scale: {guidance_scale:0.2f}\n"
|
225 |
+
f"- Seed: {seed}\n"
|
226 |
+
f"- Input folder name: {INPUT_FOLDER_NAME}\n"
|
227 |
+
f"- Output folder name: {OUTPUT_FOLDER_NAME}\n"
|
228 |
+
f"- Prompt: {PROMPT}\n"
|
229 |
+
f"- Negative prompt: {NEGATIVE_PROMPT}\n"
|
230 |
+
f"- Number of loops: {NUMBER_OF_LOOPS}\n"
|
231 |
+
f"- Use random style: {USE_RANDOM_STYLE}\n")
|
232 |
+
print("===DEFINING COMPLETE, GENERATING IMAGE...===")
|
gradio_demo/requirements.txt
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diffusers==0.25.1
|
2 |
+
torch==2.0.0
|
3 |
+
torchvision==0.15.1
|
4 |
+
transformers==4.37.1
|
5 |
+
accelerate==0.25.0
|
6 |
+
safetensors==0.4.3
|
7 |
+
einops==0.7.0
|
8 |
+
onnxruntime-gpu==1.18.1
|
9 |
+
spaces==0.19.4
|
10 |
+
omegaconf==2.3.0
|
11 |
+
peft==0.11.1
|
12 |
+
huggingface-hub==0.23.4
|
13 |
+
opencv-python==4.10.0.84
|
14 |
+
insightface==0.7.3
|
15 |
+
gradio==4.38.1
|
16 |
+
controlnet_aux==0.0.9
|
17 |
+
gdown==5.2.0
|
18 |
+
peft==0.11.1
|
19 |
+
setuptools=71.1.0
|
gradio_demo/style_template.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# style_list = [
|
2 |
+
# {
|
3 |
+
# "name": "Professional",
|
4 |
+
# "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look"],
|
5 |
+
# # "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look,highly detailed, sharp focus","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look"],
|
6 |
+
# "negative_prompt":
|
7 |
+
# # "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, blurry, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people, green, deformed"
|
8 |
+
# # "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly"
|
9 |
+
|
10 |
+
# # "lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed"
|
11 |
+
|
12 |
+
# "high saturation, multiple people, two people, patchy, photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch,blurred, blurry, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white"
|
13 |
+
# },
|
14 |
+
# {
|
15 |
+
# "name": "Quirky",
|
16 |
+
# "prompt": ["vibrant colorful, ink sketch|vector|2d colors, sharp focus, superman/wonderwoman, highly detailed, the clouds,colorful,ultra sharpness,4k","watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic","vibrant colorful, ink sketch|vector|2d colors, sharp focus, scooba diver, highly detailed, the ocean,fishes,colorful,ultra sharpness,4k","individual dressed as an eskimo, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution"],
|
17 |
+
# "negative_prompt": "saturation, highly saturated,(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, multiple people, buildings in background, green"
|
18 |
+
# },
|
19 |
+
# {
|
20 |
+
# "name": "Sci-fi",
|
21 |
+
# "prompt": ["ethereal fantasy concept art individual, magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy","Dystopian style cyborg. Bleak, post-apocalyptic, somber, dramatic, highly detailed","Alien-themed, Extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed", "Legend of Zelda style . Vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series"],
|
22 |
+
# "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people, green, deformed",
|
23 |
+
# }
|
24 |
+
# ]
|
25 |
+
|
26 |
+
# styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
|
27 |
+
|
28 |
+
# # lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed
|
29 |
+
|
30 |
+
# style_list = [
|
31 |
+
# {
|
32 |
+
# "name": "(No style)",
|
33 |
+
# "prompt": "Realistic, 4k resolution, ultra sharpness, {prompt} sitiing at a desk, office environment, professional photoshoot",
|
34 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
35 |
+
# },
|
36 |
+
# {
|
37 |
+
# "name": "Watercolor",
|
38 |
+
# "prompt": "watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic",
|
39 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy",
|
40 |
+
# },
|
41 |
+
# {
|
42 |
+
# "name": "Film Noir",
|
43 |
+
# "prompt": "film noir style, ink sketch|vector, {prompt} highly detailed, sharp focus, ultra sharpness, monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic",
|
44 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
45 |
+
# },
|
46 |
+
# {
|
47 |
+
# "name": "Neon",
|
48 |
+
# "prompt": "masterpiece painting, buildings in the backdrop, kaleidoscope, lilac orange blue cream fuchsia bright vivid gradient colors, the scene is cinematic, {prompt}, emotional realism, double exposure, watercolor ink pencil, graded wash, color layering, magic realism, figurative painting, intricate motifs, organic tracery, polished",
|
49 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
50 |
+
# },
|
51 |
+
# {
|
52 |
+
# "name": "Jungle",
|
53 |
+
# "prompt": 'waist-up "{prompt} in a Jungle" by Syd Mead, tangerine cold color palette, muted colors, detailed, 8k,photo r3al,dripping paint,3d toon style,3d style,Movie Still',
|
54 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
55 |
+
# },
|
56 |
+
# {
|
57 |
+
# "name": "Mars",
|
58 |
+
# "prompt": "{prompt}, Post-apocalyptic. Mars Colony, Scavengers roam the wastelands searching for valuable resources, rovers, bright morning sunlight shining, (detailed) (intricate) (8k) (HDR) (cinematic lighting) (sharp focus)",
|
59 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
60 |
+
# },
|
61 |
+
# {
|
62 |
+
# "name": "Vibrant Color",
|
63 |
+
# "prompt": "vibrant colorful, ink sketch|vector|2d colors, sharp focus, {prompt}, highly detailed, the clouds,colorful,ultra sharpness,4k",
|
64 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly,distorted, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
65 |
+
# },
|
66 |
+
# {
|
67 |
+
# "name": "Snow",
|
68 |
+
# "prompt": "individual dressed as an {prompt}, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution",
|
69 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
70 |
+
# },
|
71 |
+
# {
|
72 |
+
# "name": "Line art",
|
73 |
+
# "prompt": "vibrant colorful, sharp focus,individual wearing {prompt} costume, highly detailed, sharp focus, the ocean, fishes swimming in the background,coral reef behind, ocean landscape, 4k, colorful",
|
74 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
75 |
+
# },
|
76 |
+
# ]
|
77 |
+
|
78 |
+
# styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
|
79 |
+
|
80 |
+
style_list = [
|
81 |
+
{
|
82 |
+
"name": "Professional",
|
83 |
+
# "prompt": ["Minimalist style, Simple, clean, uncluttered, modern, elegant, White background, suit and tie, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, white background, professional photo, linkedin profile photo, formal attire, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, formal attire, sitting on a chair, professional look","Minimalist style, Simple, clean, uncluttered, modern, elegant, formal attire, individual sitiing at a desk, office environment, professional look"],
|
84 |
+
"prompt": ["professional portrait, gender-aligned, natural skin tones, cinematic lighting, highly detailed, well-composed, professional photography, subtle background blur","Minimalist portrait, clean lines, soft colors, simple background, modern, elegant, subtle details, focus on facial features","Professional, Corporate, formal attire, polished, sharp features, clean background, high clarity, refined, business style","LinkedIn professional, business attire, neutral background, sharp focus, approachable, polished, suited for professional networking"],
|
85 |
+
"negative_prompt": "oversaturated, unnatural skin tones, deformed, disfigured, low resolution, cartoonish, unrealistic"
|
86 |
+
},
|
87 |
+
# {
|
88 |
+
# "name": "Watercolor",
|
89 |
+
# "prompt": "watercolor painting, {prompt}. vibrant, beautiful, painterly, detailed, textural, artistic",
|
90 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, anime, photorealistic, 35mm film, deformed, glitch, low contrast, noisy",
|
91 |
+
# },
|
92 |
+
# {
|
93 |
+
# "name": "Film Noir",
|
94 |
+
# "prompt": "film noir style, ink sketch|vector, {prompt} highly detailed, sharp focus, ultra sharpness, monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic",
|
95 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
96 |
+
# },
|
97 |
+
# {
|
98 |
+
# "name": "Neon",
|
99 |
+
# "prompt": "masterpiece painting, buildings in the backdrop, kaleidoscope, lilac orange blue cream fuchsia bright vivid gradient colors, the scene is cinematic, {prompt}, emotional realism, double exposure, watercolor ink pencil, graded wash, color layering, magic realism, figurative painting, intricate motifs, organic tracery, polished",
|
100 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
101 |
+
# },
|
102 |
+
# {
|
103 |
+
# "name": "Jungle",
|
104 |
+
# "prompt": 'waist-up "{prompt} in a Jungle" by Syd Mead, tangerine cold color palette, muted colors, detailed, 8k,photo r3al,dripping paint,3d toon style,3d style,Movie Still',
|
105 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
106 |
+
# },
|
107 |
+
# {
|
108 |
+
# "name": "Mars",
|
109 |
+
# "prompt": "{prompt}, Post-apocalyptic. Mars Colony, Scavengers roam the wastelands searching for valuable resources, rovers, bright morning sunlight shining, (detailed) (intricate) (8k) (HDR) (cinematic lighting) (sharp focus)",
|
110 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
111 |
+
# },
|
112 |
+
# {
|
113 |
+
# "name": "Vibrant Color",
|
114 |
+
# "prompt": "vibrant colorful, ink sketch|vector|2d colors, sharp focus, {prompt}, highly detailed, the clouds,colorful,ultra sharpness,4k",
|
115 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly,distorted, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
116 |
+
# },
|
117 |
+
# {
|
118 |
+
# "name": "Snow",
|
119 |
+
# "prompt": "individual dressed as an {prompt}, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution",
|
120 |
+
# "negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
121 |
+
# },
|
122 |
+
{
|
123 |
+
"name": "Quirky",
|
124 |
+
"prompt": ["vibrant colorful, ink sketch|vector|2d colors, sharp focus, superman/wonderwoman, highly detailed, the clouds,colorful,ultra sharpness,4k","watercolor painting, japanese anime character with white/neon hair. vibrant, beautiful, painterly, detailed, textural, artistic","vibrant colorful, ink sketch|vector|2d colors, sharp focus, scooba diver, highly detailed, the ocean,fishes,colorful,ultra sharpness,4k","individual dressed as an eskimo, high contrast, surrounded by snowy mountains and igloo, snow crystals, cold, windy background, frozen natural landscape in background,highly detailed, sharp focus, intricate design, 4k resolution"],
|
125 |
+
"negative_prompt": "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green"
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"name": "Sci-fi",
|
129 |
+
"prompt": ["ethereal fantasy concept art individual, magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy","Dystopian style cyborg. Bleak, post-apocalyptic, somber, dramatic, highly detailed","Alien-themed, Extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed", "Legend of Zelda style . Vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series"],
|
130 |
+
"negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white, multiple people",
|
131 |
+
}
|
132 |
+
]
|
133 |
+
|
134 |
+
styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
|
135 |
+
|
136 |
+
# lowres
|
gradio_demo/test.py
ADDED
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('./')
|
3 |
+
|
4 |
+
from typing import Tuple
|
5 |
+
|
6 |
+
import os
|
7 |
+
import cv2
|
8 |
+
import math
|
9 |
+
import torch
|
10 |
+
import random
|
11 |
+
import numpy as np
|
12 |
+
import argparse
|
13 |
+
import pandas as pd
|
14 |
+
|
15 |
+
import PIL
|
16 |
+
from PIL import Image
|
17 |
+
|
18 |
+
import diffusers
|
19 |
+
from diffusers.utils import load_image
|
20 |
+
from diffusers.models import ControlNetModel
|
21 |
+
from diffusers import LCMScheduler
|
22 |
+
|
23 |
+
from huggingface_hub import hf_hub_download
|
24 |
+
|
25 |
+
import insightface
|
26 |
+
from insightface.app import FaceAnalysis
|
27 |
+
|
28 |
+
from style_template import styles
|
29 |
+
from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
|
30 |
+
from model_util import load_models_xl, get_torch_device, torch_gc
|
31 |
+
|
32 |
+
|
33 |
+
# global variable
|
34 |
+
MAX_SEED = np.iinfo(np.int32).max
|
35 |
+
device = get_torch_device()
|
36 |
+
dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
37 |
+
STYLE_NAMES = list(styles.keys())
|
38 |
+
DEFAULT_STYLE_NAME = "Watercolor"
|
39 |
+
|
40 |
+
# Load face encoder
|
41 |
+
app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
42 |
+
app.prepare(ctx_id=0, det_size=(320, 320))
|
43 |
+
|
44 |
+
# Path to InstantID models
|
45 |
+
face_adapter = f'./checkpoints/ip-adapter.bin'
|
46 |
+
controlnet_path = f'./checkpoints/ControlNetModel'
|
47 |
+
|
48 |
+
# Load pipeline
|
49 |
+
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
50 |
+
|
51 |
+
logo = Image.open("./gradio_demo/logo.png")
|
52 |
+
|
53 |
+
from cv2 import imencode
|
54 |
+
import base64
|
55 |
+
|
56 |
+
# def encode_pil_to_base64_new(pil_image):
|
57 |
+
# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
|
58 |
+
# image_arr = np.asarray(pil_image)[:,:,::-1]
|
59 |
+
# _, byte_data = imencode('.png', image_arr)
|
60 |
+
# base64_data = base64.b64encode(byte_data)
|
61 |
+
# base64_string_opencv = base64_data.decode("utf-8")
|
62 |
+
# return "data:image/png;base64," + base64_string_opencv
|
63 |
+
|
64 |
+
import gradio as gr
|
65 |
+
|
66 |
+
# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new
|
67 |
+
|
68 |
+
def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):
|
69 |
+
|
70 |
+
if pretrained_model_name_or_path.endswith(
|
71 |
+
".ckpt"
|
72 |
+
) or pretrained_model_name_or_path.endswith(".safetensors"):
|
73 |
+
scheduler_kwargs = hf_hub_download(
|
74 |
+
repo_id="wangqixun/YamerMIX_v8",
|
75 |
+
subfolder="scheduler",
|
76 |
+
filename="scheduler_config.json",
|
77 |
+
)
|
78 |
+
|
79 |
+
(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
|
80 |
+
pretrained_model_name_or_path=pretrained_model_name_or_path,
|
81 |
+
scheduler_name=None,
|
82 |
+
weight_dtype=dtype,
|
83 |
+
)
|
84 |
+
|
85 |
+
scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
|
86 |
+
pipe = StableDiffusionXLInstantIDPipeline(
|
87 |
+
vae=vae,
|
88 |
+
text_encoder=text_encoders[0],
|
89 |
+
text_encoder_2=text_encoders[1],
|
90 |
+
tokenizer=tokenizers[0],
|
91 |
+
tokenizer_2=tokenizers[1],
|
92 |
+
unet=unet,
|
93 |
+
scheduler=scheduler,
|
94 |
+
controlnet=controlnet,
|
95 |
+
).to(device)
|
96 |
+
|
97 |
+
else:
|
98 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
99 |
+
pretrained_model_name_or_path,
|
100 |
+
controlnet=controlnet,
|
101 |
+
torch_dtype=dtype,
|
102 |
+
safety_checker=None,
|
103 |
+
feature_extractor=None,
|
104 |
+
).to(device)
|
105 |
+
|
106 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
107 |
+
|
108 |
+
pipe.load_ip_adapter_instantid(face_adapter)
|
109 |
+
# load and disable LCM
|
110 |
+
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
111 |
+
pipe.disable_lora()
|
112 |
+
|
113 |
+
def remove_tips():
|
114 |
+
return gr.update(visible=False)
|
115 |
+
|
116 |
+
|
117 |
+
# prompts = [
|
118 |
+
# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
|
119 |
+
# # ["Suited professional","(No style)"],
|
120 |
+
# ["Scooba diver","Line art"], ["eskimo","Snow"]
|
121 |
+
# ]
|
122 |
+
|
123 |
+
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
124 |
+
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
|
125 |
+
|
126 |
+
def convert_from_image_to_cv2(img: Image) -> np.ndarray:
|
127 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
128 |
+
|
129 |
+
def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
130 |
+
# if email != "":
|
131 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
132 |
+
return generate_image(face_file, p[0], n)
|
133 |
+
# else:
|
134 |
+
# raise gr.Error("Email ID is compulsory")
|
135 |
+
def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
136 |
+
# if email != "":
|
137 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
138 |
+
return generate_image(face_file, p[1], n)
|
139 |
+
def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
140 |
+
# if email != "":
|
141 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
142 |
+
return generate_image(face_file, p[2], n)
|
143 |
+
def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
|
144 |
+
# if email != "":
|
145 |
+
p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
|
146 |
+
return generate_image(face_file, p[3], n)
|
147 |
+
|
148 |
+
# def validate_and_process(face_file, style, email):
|
149 |
+
|
150 |
+
# # Your processing logic here
|
151 |
+
# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
|
152 |
+
# return gallery1, gallery2, gallery3, gallery4
|
153 |
+
|
154 |
+
def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
|
155 |
+
stickwidth = 4
|
156 |
+
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
157 |
+
kps = np.array(kps)
|
158 |
+
|
159 |
+
w, h = image_pil.size
|
160 |
+
out_img = np.zeros([h, w, 3])
|
161 |
+
|
162 |
+
for i in range(len(limbSeq)):
|
163 |
+
index = limbSeq[i]
|
164 |
+
color = color_list[index[0]]
|
165 |
+
|
166 |
+
x = kps[index][:, 0]
|
167 |
+
y = kps[index][:, 1]
|
168 |
+
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
|
169 |
+
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
|
170 |
+
polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
|
171 |
+
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
|
172 |
+
out_img = (out_img * 0.6).astype(np.uint8)
|
173 |
+
|
174 |
+
for idx_kp, kp in enumerate(kps):
|
175 |
+
color = color_list[idx_kp]
|
176 |
+
x, y = kp
|
177 |
+
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
|
178 |
+
|
179 |
+
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
|
180 |
+
return out_img_pil
|
181 |
+
|
182 |
+
def resize_img(input_image, max_side=640, min_side=640, size=None,
|
183 |
+
pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):
|
184 |
+
|
185 |
+
w, h = input_image.size
|
186 |
+
print(w)
|
187 |
+
print(h)
|
188 |
+
if size is not None:
|
189 |
+
w_resize_new, h_resize_new = size
|
190 |
+
else:
|
191 |
+
ratio = min_side / min(h, w)
|
192 |
+
w, h = round(ratio*w), round(ratio*h)
|
193 |
+
ratio = max_side / max(h, w)
|
194 |
+
input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
|
195 |
+
w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
|
196 |
+
h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
|
197 |
+
input_image = input_image.resize([w_resize_new, h_resize_new], mode)
|
198 |
+
|
199 |
+
if pad_to_max_side:
|
200 |
+
res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
|
201 |
+
offset_x = (max_side - w_resize_new) // 2
|
202 |
+
offset_y = (max_side - h_resize_new) // 2
|
203 |
+
res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
|
204 |
+
input_image = Image.fromarray(res)
|
205 |
+
return input_image
|
206 |
+
|
207 |
+
def store_images(email, gallery1, gallery2, gallery3, gallery4):
|
208 |
+
galleries = []
|
209 |
+
for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
|
210 |
+
if isinstance(img, np.ndarray):
|
211 |
+
img = Image.fromarray(img)
|
212 |
+
print(f"Gallery {i} type after conversion: {type(img)}")
|
213 |
+
galleries.append(img)
|
214 |
+
# Create the images directory if it doesn't exist
|
215 |
+
if not os.path.exists('images'):
|
216 |
+
os.makedirs('images')
|
217 |
+
|
218 |
+
# Define image file paths
|
219 |
+
image_paths = []
|
220 |
+
for i, img in enumerate(galleries, start=1):
|
221 |
+
img_path = f'images/{email}_gallery{i}.png'
|
222 |
+
img.save(img_path)
|
223 |
+
image_paths.append(img_path)
|
224 |
+
|
225 |
+
# Define the CSV file path
|
226 |
+
csv_file_path = 'image_data.csv'
|
227 |
+
|
228 |
+
# Create a DataFrame for the email and image paths
|
229 |
+
df = pd.DataFrame({
|
230 |
+
'email': [email],
|
231 |
+
'img1_path': [image_paths[0]],
|
232 |
+
'img2_path': [image_paths[1]],
|
233 |
+
'img3_path': [image_paths[2]],
|
234 |
+
'img4_path': [image_paths[3]],
|
235 |
+
})
|
236 |
+
|
237 |
+
# Write to CSV (append if the file exists, create a new one if it doesn't)
|
238 |
+
if not os.path.isfile(csv_file_path):
|
239 |
+
df.to_csv(csv_file_path, index=False)
|
240 |
+
else:
|
241 |
+
df.to_csv(csv_file_path, mode='a', header=False, index=False)
|
242 |
+
|
243 |
+
|
244 |
+
def generate_image(face_image,prompt,negative_prompt):
|
245 |
+
pose_image_path = None
|
246 |
+
# prompt = "superman"
|
247 |
+
enable_LCM = False
|
248 |
+
identitynet_strength_ratio = 0.95
|
249 |
+
adapter_strength_ratio = 0.60
|
250 |
+
num_steps = 15
|
251 |
+
guidance_scale = 8.5
|
252 |
+
seed = random.randint(0, MAX_SEED)
|
253 |
+
# negative_prompt = ""
|
254 |
+
# negative_prompt += neg
|
255 |
+
enhance_face_region = True
|
256 |
+
if enable_LCM:
|
257 |
+
pipe.enable_lora()
|
258 |
+
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
|
259 |
+
else:
|
260 |
+
pipe.disable_lora()
|
261 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
262 |
+
|
263 |
+
if face_image is None:
|
264 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
265 |
+
|
266 |
+
# if prompt is None:
|
267 |
+
# prompt = "a person"
|
268 |
+
|
269 |
+
# apply the style template
|
270 |
+
# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
271 |
+
|
272 |
+
# face_image = load_image(face_image_path)
|
273 |
+
face_image = resize_img(face_image)
|
274 |
+
face_image_cv2 = convert_from_image_to_cv2(face_image)
|
275 |
+
height, width, _ = face_image_cv2.shape
|
276 |
+
|
277 |
+
# Extract face features
|
278 |
+
face_info = app.get(face_image_cv2)
|
279 |
+
|
280 |
+
if len(face_info) == 0:
|
281 |
+
raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
|
282 |
+
|
283 |
+
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
284 |
+
face_emb = face_info['embedding']
|
285 |
+
face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
|
286 |
+
|
287 |
+
if pose_image_path is not None:
|
288 |
+
pose_image = load_image(pose_image_path)
|
289 |
+
pose_image = resize_img(pose_image)
|
290 |
+
pose_image_cv2 = convert_from_image_to_cv2(pose_image)
|
291 |
+
|
292 |
+
face_info = app.get(pose_image_cv2)
|
293 |
+
|
294 |
+
if len(face_info) == 0:
|
295 |
+
raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
|
296 |
+
|
297 |
+
face_info = face_info[-1]
|
298 |
+
face_kps = draw_kps(pose_image, face_info['kps'])
|
299 |
+
|
300 |
+
width, height = face_kps.size
|
301 |
+
|
302 |
+
if enhance_face_region:
|
303 |
+
control_mask = np.zeros([height, width, 3])
|
304 |
+
x1, y1, x2, y2 = face_info["bbox"]
|
305 |
+
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
|
306 |
+
control_mask[y1:y2, x1:x2] = 255
|
307 |
+
control_mask = Image.fromarray(control_mask.astype(np.uint8))
|
308 |
+
else:
|
309 |
+
control_mask = None
|
310 |
+
|
311 |
+
generator = torch.Generator(device=device).manual_seed(seed)
|
312 |
+
|
313 |
+
print("Start inference...")
|
314 |
+
print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
|
315 |
+
|
316 |
+
pipe.set_ip_adapter_scale(adapter_strength_ratio)
|
317 |
+
images = pipe(
|
318 |
+
prompt=prompt,
|
319 |
+
negative_prompt=negative_prompt,
|
320 |
+
image_embeds=face_emb,
|
321 |
+
image=face_kps,
|
322 |
+
control_mask=control_mask,
|
323 |
+
controlnet_conditioning_scale=float(identitynet_strength_ratio),
|
324 |
+
num_inference_steps=num_steps,
|
325 |
+
guidance_scale=guidance_scale,
|
326 |
+
height=height,
|
327 |
+
width=width,
|
328 |
+
generator=generator,
|
329 |
+
# num_images_per_prompt = 4
|
330 |
+
).images
|
331 |
+
|
332 |
+
print(images[0])
|
333 |
+
|
334 |
+
return images[0]
|
335 |
+
|
336 |
+
|
337 |
+
### Description
|
338 |
+
title = r"""
|
339 |
+
<h1 align="center">Choose your AVATAR</h1>
|
340 |
+
"""
|
341 |
+
|
342 |
+
description = r"""
|
343 |
+
<h2> Powered by IDfy </h2>"""
|
344 |
+
|
345 |
+
article = r""""""
|
346 |
+
|
347 |
+
tips = r""""""
|
348 |
+
|
349 |
+
css = '''
|
350 |
+
.gradio-container {width: 95% !important; background-color: #E6F3FF;}
|
351 |
+
.image-gallery {height: 100vh !important; overflow: auto;}
|
352 |
+
.gradio-row .gradio-element { margin: 0 !important; }
|
353 |
+
'''
|
354 |
+
with gr.Blocks(css=css) as demo:
|
355 |
+
title = "<h1 align='center'>Choose your AVATAR</h1>"
|
356 |
+
description = "<h2> Powered by IDfy </h2>"
|
357 |
+
|
358 |
+
# Description
|
359 |
+
gr.Markdown(title)
|
360 |
+
with gr.Row():
|
361 |
+
gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
|
362 |
+
gr.Markdown(description)
|
363 |
+
with gr.Row():
|
364 |
+
with gr.Column():
|
365 |
+
style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
|
366 |
+
face_file = gr.Image(label="Upload a photo of your face", type="pil")
|
367 |
+
submit = gr.Button("Submit", variant="primary")
|
368 |
+
with gr.Column():
|
369 |
+
with gr.Row():
|
370 |
+
gallery1 = gr.Image(label="Generated Images")
|
371 |
+
gallery2 = gr.Image(label="Generated Images")
|
372 |
+
with gr.Row():
|
373 |
+
gallery3 = gr.Image(label="Generated Images")
|
374 |
+
gallery4 = gr.Image(label="Generated Images")
|
375 |
+
email = gr.Textbox(label="Email",
|
376 |
+
info="Enter your email address",
|
377 |
+
value="")
|
378 |
+
submit1 = gr.Button("STORE", variant="primary")
|
379 |
+
usage_tips = gr.Markdown(label="Usage tips of InstantID", value="", visible=False)
|
380 |
+
|
381 |
+
# Image upload and processing chain
|
382 |
+
face_file.upload(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])
|
383 |
+
submit.click(remove_tips, outputs=usage_tips).then(run_for_prompts1, inputs=[face_file, style], outputs=[gallery1]).then(run_for_prompts2, inputs=[face_file, style], outputs=[gallery2]).then(run_for_prompts3, inputs=[face_file, style], outputs=[gallery3]).then(run_for_prompts4, inputs=[face_file, style], outputs=[gallery4])
|
384 |
+
|
385 |
+
# Store data on button click
|
386 |
+
submit1.click(
|
387 |
+
fn=store_images,
|
388 |
+
inputs=[email,gallery1,gallery2,gallery3,gallery4],
|
389 |
+
outputs=None)
|
390 |
+
|
391 |
+
gr.Markdown("")
|
392 |
+
|
393 |
+
demo.launch(share=True)
|
394 |
+
|
395 |
+
if __name__ == "__main__":
|
396 |
+
parser = argparse.ArgumentParser()
|
397 |
+
parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
|
398 |
+
args = parser.parse_args()
|
399 |
+
|
400 |
+
main(args.pretrained_model_name_or_path, False)
|
gradio_demo/watermark.png
ADDED
image_data.csv
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
email,img1_path,img2_path,img3_path,img4_path
|
2 |
+
ll,images/ll_gallery1.png,images/ll_gallery2.png,images/ll_gallery3.png,images/ll_gallery4.png
|
3 |
+
kajal@img,images/kajal@img_gallery1.png,images/kajal@img_gallery2.png,images/kajal@img_gallery3.png,images/kajal@img_gallery4.png
|
4 |
+
heeral@img,images/heeral@img_gallery1.png,images/heeral@img_gallery2.png,images/heeral@img_gallery3.png,images/heeral@img_gallery4.png
|
5 |
+
sanskruti@img-scifi,images/sanskruti@img-scifi_gallery1.png,images/sanskruti@img-scifi_gallery2.png,images/sanskruti@img-scifi_gallery3.png,images/sanskruti@img-scifi_gallery4.png
|
6 |
+
sanskruti@img-scifi,images/sanskruti@img-scifi_gallery1.png,images/sanskruti@img-scifi_gallery2.png,images/sanskruti@img-scifi_gallery3.png,images/sanskruti@img-scifi_gallery4.png
|
7 |
+
sanskruti@img-quirky,images/sanskruti@img-quirky_gallery1.png,images/sanskruti@img-quirky_gallery2.png,images/sanskruti@img-quirky_gallery3.png,images/sanskruti@img-quirky_gallery4.png
|
8 |
+
kajal@quirky,images/kajal@quirky_gallery1.png,images/kajal@quirky_gallery2.png,images/kajal@quirky_gallery3.png,images/kajal@quirky_gallery4.png
|
9 |
+
kajal@prof,images/kajal@prof_gallery1.png,images/kajal@prof_gallery2.png,images/kajal@prof_gallery3.png,images/kajal@prof_gallery4.png
|
10 |
+
kajal@quirky,images/kajal@quirky_gallery1.png,images/kajal@quirky_gallery2.png,images/kajal@quirky_gallery3.png,images/kajal@quirky_gallery4.png
|
11 |
+
kajal@sci-fi,images/kajal@sci-fi_gallery1.png,images/kajal@sci-fi_gallery2.png,images/kajal@sci-fi_gallery3.png,images/kajal@sci-fi_gallery4.png
|
12 |
+
yashvi,images/yashvi_gallery1.png,images/yashvi_gallery2.png,images/yashvi_gallery3.png,images/yashvi_gallery4.png
|
13 |
+
[email protected],images/[email protected]_gallery1.png,images/[email protected]_gallery2.png,images/[email protected]_gallery3.png,images/[email protected]_gallery4.png
|
14 |
+
kartik@prof,images/kartik@prof_gallery1.png,images/kartik@prof_gallery2.png,images/kartik@prof_gallery3.png,images/kartik@prof_gallery4.png
|
15 |
+
yashvii@proffffff,images/yashvii@proffffff_gallery1.png,images/yashvii@proffffff_gallery2.png,images/yashvii@proffffff_gallery3.png,images/yashvii@proffffff_gallery4.png
|
images/aa.ll_gallery1.png
ADDED
Git LFS Details
|
images/aa.ll_gallery2.png
ADDED
images/aa.ll_gallery3.png
ADDED
images/aa.ll_gallery4.png
ADDED
images/heeral@img_gallery1.png
ADDED
images/heeral@img_gallery2.png
ADDED
images/heeral@img_gallery3.png
ADDED
images/heeral@img_gallery4.png
ADDED
images/kajal@img_gallery1.png
ADDED
images/kajal@img_gallery2.png
ADDED
images/kajal@img_gallery3.png
ADDED
images/kajal@img_gallery4.png
ADDED