diff --git a/.augmentignore b/.augmentignore new file mode 100644 index 0000000000000000000000000000000000000000..62c8d7538c5f5d1c2ea264439bd433f1b17802e2 --- /dev/null +++ b/.augmentignore @@ -0,0 +1,15 @@ +.env +.cache +.vscode +__pycache__ +bitsandbytes_windows +cudnn_windows +data +dataset +docs +examples +outputs +SmilingWolf +test +v2_inference +venv \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..7e9e5b44451c99410bcbd11fa70707dbbc78c37a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,15 @@ +.cache/ +cudnn_windows/ +bitsandbytes_windows/ +bitsandbytes_windows_deprecated/ +dataset/ +__pycache__/ +venv/ +**/.hadolint.yml +**/*.log +**/.git +**/.gitignore +**/.env +**/.github +**/.vscode +**/*.ps1 diff --git a/.env b/.env new file mode 100644 index 0000000000000000000000000000000000000000..3d93a99e2d0548cf6a08ff6c123cc6bf6dd3703d --- /dev/null +++ b/.env @@ -0,0 +1 @@ +TENSORBOARD_PORT=6006 diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..1c2323362752ea76c517706c23c8503b5a08ddc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1,4 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +*.sh text eol=lf +*.ps1 text eol=crlf +*.bat text eol=crlf +*.cmd text eol=crlf \ No newline at end of file diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e9a98d8295e1856347a9701be7bafd206baf1d2 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +# These are supported funding model platforms + +github: [bmaltais] diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000000000000000000000000000000000..3fdb8a90e3128c705cb43307474486cc70dd5abe --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +--- +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" diff --git a/.github/workflows/docker_publish.yml b/.github/workflows/docker_publish.yml new file mode 100644 index 0000000000000000000000000000000000000000..f63c6f5bd30b3edbe1ce9ff95cb461eba1b81917 --- /dev/null +++ b/.github/workflows/docker_publish.yml @@ -0,0 +1,91 @@ +# Check this guide for more information about publishing to ghcr.io with GitHub Actions: +# https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#upgrading-a-workflow-that-accesses-ghcrio + +# Build the Docker image and push it to the registry +name: docker_publish + +on: + # Trigger the workflow on tags push that match the pattern v*, for example v1.0.0 + push: + tags: + - "v*" + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + # Only run this job on tags + docker-tag: + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + + # Sets the permissions granted to the GITHUB_TOKEN for the actions in this job. + permissions: + contents: read + packages: write + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: true + + # We require additional space due to the large size of our image. (~10GB) + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + tool-cache: true + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + + - name: Docker meta:${{ github.ref_name }} + id: meta + uses: docker/metadata-action@v5 + with: + images: ghcr.io/${{ github.repository_owner }}/kohya-ss-gui + flavor: | + latest=auto + prefix= + suffix= + # https://github.com/docker/metadata-action/tree/v5/?tab=readme-ov-file#tags-input + tags: | + type=semver,pattern=v{{major}} + type=semver,pattern={{raw}} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # You may need to manage write and read access of GitHub Actions for repositories in the container settings. + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push + uses: docker/build-push-action@v5 + id: publish + with: + context: . + file: ./Dockerfile + push: true + target: final + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: | + VERSION=${{ github.ref_name }} + RELEASE=${{ github.run_number }} + platforms: linux/amd64 + # Cache to regietry instead of gha to avoid the capacity limit. + cache-from: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache + cache-to: type=registry,ref=ghcr.io/${{ github.repository_owner }}/kohya-ss-gui:cache,mode=max + sbom: true + provenance: true diff --git a/.github/workflows/typos.yaml b/.github/workflows/typos.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d674ca249be218f59f1436aa39eeb5e0854fceed --- /dev/null +++ b/.github/workflows/typos.yaml @@ -0,0 +1,21 @@ +--- +# yamllint disable rule:line-length +name: Typos + +on: # yamllint disable-line rule:truthy + push: + pull_request: + types: + - opened + - synchronize + - reopened + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: typos-action + uses: crate-ci/typos@v1.21.0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7e288dce5ca9f66881efe2a1724a3d20fbb76810 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +# Python +venv +venv2 +__pycache__ +*.egg-info +build +wd14_tagger_model + +# IDE and Editor specific +.vscode + +# CUDNN for Windows +cudnn_windows + +# Cache and temporary files +.cache +.DS_Store + +# Scripts and executables +locon +gui-user.bat +gui-user.ps1 + +# Version control +SmilingWolf +wandb + +# Setup and logs +setup.log +logs + +# Miscellaneous +uninstall.txt + +# Test files +test/output +test/log* +test/*.json +test/ft + +# Temporary requirements +requirements_tmp_for_setup.txt + +*.npz +presets/*/user_presets/* +inputs +outputs +dataset/** +!dataset/**/ +!dataset/**/.gitkeep +models +data +config.toml +sd-scripts \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..5bfde67cf7e561f99f6192d46f48bf670910fac8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "sd-scripts"] + path = sd-scripts + url = https://github.com/kohya-ss/sd-scripts.git \ No newline at end of file diff --git a/.hadolint.yml b/.hadolint.yml new file mode 100644 index 0000000000000000000000000000000000000000..66dfa0f12ea7f2c79185e826562bea0a4d71e44f --- /dev/null +++ b/.hadolint.yml @@ -0,0 +1,6 @@ +ignored: + - DL3042 # Avoid use of cache directory with pip. Use `pip install --no-cache-dir ` + - DL3013 # Pin versions in pip. Instead of `pip install ` use `pip install ==` + - DL3008 # Pin versions in apt get install. Instead of `apt-get install ` use `apt-get install =` + - DL4006 # Set the SHELL option -o pipefail before RUN with a pipe in it + - SC2015 # Note that A && B || C is not if-then-else. C may run when A is true. \ No newline at end of file diff --git a/.release b/.release new file mode 100644 index 0000000000000000000000000000000000000000..7355d4be1df51ec98a8cf7b77eabefa16da175fa --- /dev/null +++ b/.release @@ -0,0 +1 @@ +v24.1.3 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..6607b5c259bca14cef59c3a617152c2db7a2d1d8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,147 @@ +# syntax=docker/dockerfile:1 +ARG UID=1000 +ARG VERSION=EDGE +ARG RELEASE=0 + +FROM python:3.10-slim as build + +# RUN mount cache for multi-arch: https://github.com/docker/buildx/issues/549#issuecomment-1788297892 +ARG TARGETARCH +ARG TARGETVARIANT + +WORKDIR /app + +# Install under /root/.local +ENV PIP_USER="true" +ARG PIP_NO_WARN_SCRIPT_LOCATION=0 +ARG PIP_ROOT_USER_ACTION="ignore" + +# Install build dependencies +RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ + apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends python3-launchpadlib git curl + +# Install PyTorch +# The versions must align and be in sync with the requirements_linux_docker.txt +# hadolint ignore=SC2102 +RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \ + pip install -U --extra-index-url https://download.pytorch.org/whl/cu121 --extra-index-url https://pypi.nvidia.com \ + torch==2.1.2 torchvision==0.16.2 \ + xformers==0.0.23.post1 \ + ninja \ + pip setuptools wheel + +# Install requirements +RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/root/.cache/pip \ + --mount=source=requirements_linux_docker.txt,target=requirements_linux_docker.txt \ + --mount=source=requirements.txt,target=requirements.txt \ + --mount=source=setup/docker_setup.py,target=setup.py \ + --mount=source=sd-scripts,target=sd-scripts,rw \ + pip install -r requirements_linux_docker.txt -r requirements.txt + +# Replace pillow with pillow-simd (Only for x86) +ARG TARGETPLATFORM +RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ + if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + apt-get update && apt-get install -y --no-install-recommends zlib1g-dev libjpeg62-turbo-dev build-essential && \ + pip uninstall -y pillow && \ + CC="cc -mavx2" pip install -U --force-reinstall pillow-simd; \ + fi + +FROM python:3.10-slim as final + +ARG TARGETARCH +ARG TARGETVARIANT + +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility + +WORKDIR /tmp + +ENV CUDA_VERSION=12.1.1 +ENV NV_CUDA_CUDART_VERSION=12.1.105-1 +ENV NVIDIA_REQUIRE_CUDA=cuda>=12.1 +ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1 + +# Install CUDA partially +ADD https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb . +RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ + rm cuda-keyring_1.0-1_all.deb && \ + sed -i 's/^Components: main$/& contrib/' /etc/apt/sources.list.d/debian.sources && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + # Installing the whole CUDA typically increases the image size by approximately **8GB**. + # To decrease the image size, we opt to install only the necessary libraries. + # Here is the package list for your reference: https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64 + # !If you experience any related issues, replace the following line with `cuda-12-1` to obtain the complete CUDA package. + cuda-cudart-12-1=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} libcusparse-12-1 libnvjitlink-12-1 + +# Install runtime dependencies +RUN --mount=type=cache,id=apt-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/cache/apt \ + --mount=type=cache,id=aptlists-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/var/lib/apt/lists \ + apt-get update && \ + apt-get install -y --no-install-recommends libgl1 libglib2.0-0 libjpeg62 libtcl8.6 libtk8.6 libgoogle-perftools-dev dumb-init + +# Fix missing libnvinfer7 +RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \ + ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7 + +# Create user +ARG UID +RUN groupadd -g $UID $UID && \ + useradd -l -u $UID -g $UID -m -s /bin/sh -N $UID + +# Create directories with correct permissions +RUN install -d -m 775 -o $UID -g 0 /dataset && \ + install -d -m 775 -o $UID -g 0 /licenses && \ + install -d -m 775 -o $UID -g 0 /app + +# Copy licenses (OpenShift Policy) +COPY --link --chmod=775 LICENSE.md /licenses/LICENSE.md + +# Copy dependencies and code (and support arbitrary uid for OpenShift best practice) +COPY --link --chown=$UID:0 --chmod=775 --from=build /root/.local /home/$UID/.local +COPY --link --chown=$UID:0 --chmod=775 . /app + +ENV PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:/home/$UID/.local/bin:$PATH" +ENV PYTHONPATH="${PYTHONPATH}:/home/$UID/.local/lib/python3.10/site-packages" +ENV LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" +ENV LD_PRELOAD=libtcmalloc.so +ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Rich logging +# https://rich.readthedocs.io/en/stable/console.html#interactive-mode +ENV FORCE_COLOR="true" +ENV COLUMNS="100" + +WORKDIR /app + +VOLUME [ "/dataset" ] + +# 7860: Kohya GUI +EXPOSE 7860 + +USER $UID + +STOPSIGNAL SIGINT + +# Use dumb-init as PID 1 to handle signals properly +ENTRYPOINT ["dumb-init", "--"] +CMD ["python3", "kohya_gui.py", "--listen", "0.0.0.0", "--server_port", "7860", "--headless"] + +ARG VERSION +ARG RELEASE +LABEL name="bmaltais/kohya_ss" \ + vendor="bmaltais" \ + maintainer="bmaltais" \ + # Dockerfile source repository + url="https://github.com/bmaltais/kohya_ss" \ + version=${VERSION} \ + # This should be a number, incremented with each change + release=${RELEASE} \ + io.k8s.display-name="kohya_ss" \ + summary="Kohya's GUI: This repository provides a Gradio GUI for Kohya's Stable Diffusion trainers(https://github.com/kohya-ss/sd-scripts)." \ + description="The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. This is the docker image for Kohya's GUI. For more information about this tool, please visit the following website: https://github.com/bmaltais/kohya_ss." \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000000000000000000000000000000000000..6a5305c7ae51f9bbea289daa05b67b6c25d2ab93 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [2022] [kohya-ss] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index cc458fdf98bc142e006b675596805e3f0b4f9814..0a7a225287dab109aa856730be2a10e853906710 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,449 @@ ---- -title: Kohya Ss -emoji: 📈 -colorFrom: purple -colorTo: blue -sdk: gradio -sdk_version: 4.38.1 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +--- +title: kohya_ss +app_file: kohya_gui.py +sdk: gradio +sdk_version: 4.26.0 +--- +# Kohya's GUI + +This repository primarily provides a Gradio GUI for [Kohya's Stable Diffusion trainers](https://github.com/kohya-ss/sd-scripts). However, support for Linux OS is also offered through community contributions. macOS support is not optimal at the moment but might work if the conditions are favorable. + +The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model. + +## Table of Contents + +- [Kohya's GUI](#kohyas-gui) + - [Table of Contents](#table-of-contents) + - [🦒 Colab](#-colab) + - [Installation](#installation) + - [Windows](#windows) + - [Windows Pre-requirements](#windows-pre-requirements) + - [Setup Windows](#setup-windows) + - [Optional: CUDNN 8.9.6.50](#optional-cudnn-89650) + - [Linux and macOS](#linux-and-macos) + - [Linux Pre-requirements](#linux-pre-requirements) + - [Setup Linux](#setup-linux) + - [Install Location](#install-location) + - [Runpod](#runpod) + - [Manual installation](#manual-installation) + - [Pre-built Runpod template](#pre-built-runpod-template) + - [Docker](#docker) + - [Get your Docker ready for GPU support](#get-your-docker-ready-for-gpu-support) + - [Windows](#windows-1) + - [Linux, OSX](#linux-osx) + - [Design of our Dockerfile](#design-of-our-dockerfile) + - [Use the pre-built Docker image](#use-the-pre-built-docker-image) + - [Local docker build](#local-docker-build) + - [ashleykleynhans runpod docker builds](#ashleykleynhans-runpod-docker-builds) + - [Upgrading](#upgrading) + - [Windows Upgrade](#windows-upgrade) + - [Linux and macOS Upgrade](#linux-and-macos-upgrade) + - [Starting GUI Service](#starting-gui-service) + - [Launching the GUI on Windows](#launching-the-gui-on-windows) + - [Launching the GUI on Linux and macOS](#launching-the-gui-on-linux-and-macos) + - [Custom Path Defaults](#custom-path-defaults) + - [LoRA](#lora) + - [Sample image generation during training](#sample-image-generation-during-training) + - [Troubleshooting](#troubleshooting) + - [Page File Limit](#page-file-limit) + - [No module called tkinter](#no-module-called-tkinter) + - [LORA Training on TESLA V100 - GPU Utilization Issue](#lora-training-on-tesla-v100---gpu-utilization-issue) + - [Issue Summary](#issue-summary) + - [Potential Solutions](#potential-solutions) + - [SDXL training](#sdxl-training) + - [Masked loss](#masked-loss) + - [Change History](#change-history) + +## 🦒 Colab + +This Colab notebook was not created or maintained by me; however, it appears to function effectively. The source can be found at: . + +I would like to express my gratitude to camendutu for their valuable contribution. If you encounter any issues with the Colab notebook, please report them on their repository. + +| Colab | Info | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ | +| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/camenduru/kohya_ss-colab/blob/main/kohya_ss_colab.ipynb) | kohya_ss_gui_colab | + +## Installation + +### Windows + +#### Windows Pre-requirements + +To install the necessary dependencies on a Windows system, follow these steps: + +1. Install [Python 3.10.11](https://www.python.org/ftp/python/3.10.11/python-3.10.11-amd64.exe). + - During the installation process, ensure that you select the option to add Python to the 'PATH' environment variable. + +2. Install [CUDA 11.8 toolkit](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Windows&target_arch=x86_64). + +3. Install [Git](https://git-scm.com/download/win). + +4. Install the [Visual Studio 2015, 2017, 2019, and 2022 redistributable](https://aka.ms/vs/17/release/vc_redist.x64.exe). + +#### Setup Windows + +To set up the project, follow these steps: + +1. Open a terminal and navigate to the desired installation directory. + +2. Clone the repository by running the following command: + + ```shell + git clone --recursive https://github.com/bmaltais/kohya_ss.git + ``` + +3. Change into the `kohya_ss` directory: + + ```shell + cd kohya_ss + ``` + +4. Run one of the following setup script by executing the following command: + + For systems with only python 3.10.11 installed: + + ```shell + .\setup.bat + ``` + + For systems with only more than one python release installed: + + ```shell + .\setup-3.10.bat + ``` + + During the accelerate config step, use the default values as proposed during the configuration unless you know your hardware demands otherwise. The amount of VRAM on your GPU does not impact the values used. + +#### Optional: CUDNN 8.9.6.50 + +The following steps are optional but will improve the learning speed for owners of NVIDIA 30X0/40X0 GPUs. These steps enable larger training batch sizes and faster training speeds. + +1. Run `.\setup.bat` and select `2. (Optional) Install cudnn files (if you want to use the latest supported cudnn version)`. + +### Linux and macOS + +#### Linux Pre-requirements + +To install the necessary dependencies on a Linux system, ensure that you fulfill the following requirements: + +- Ensure that `venv` support is pre-installed. You can install it on Ubuntu 22.04 using the command: + + ```shell + apt install python3.10-venv + ``` + +- Install the CUDA 11.8 Toolkit by following the instructions provided in [this link](https://developer.nvidia.com/cuda-11-8-0-download-archive?target_os=Linux&target_arch=x86_64). + +- Make sure you have Python version 3.10.9 or higher (but lower than 3.11.0) installed on your system. + +#### Setup Linux + +To set up the project on Linux or macOS, perform the following steps: + +1. Open a terminal and navigate to the desired installation directory. + +2. Clone the repository by running the following command: + + ```shell + git clone --recursive https://github.com/bmaltais/kohya_ss.git + ``` + +3. Change into the `kohya_ss` directory: + + ```shell + cd kohya_ss + ``` + +4. If you encounter permission issues, make the `setup.sh` script executable by running the following command: + + ```shell + chmod +x ./setup.sh + ``` + +5. Run the setup script by executing the following command: + + ```shell + ./setup.sh + ``` + + Note: If you need additional options or information about the runpod environment, you can use `setup.sh -h` or `setup.sh --help` to display the help message. + +#### Install Location + +The default installation location on Linux is the directory where the script is located. If a previous installation is detected in that location, the setup will proceed there. Otherwise, the installation will fall back to `/opt/kohya_ss`. If `/opt` is not writable, the fallback location will be `$HOME/kohya_ss`. Finally, if none of the previous options are viable, the installation will be performed in the current directory. + +For macOS and other non-Linux systems, the installation process will attempt to detect the previous installation directory based on where the script is run. If a previous installation is not found, the default location will be `$HOME/kohya_ss`. You can override this behavior by specifying a custom installation directory using the `-d` or `--dir` option when running the setup script. + +If you choose to use the interactive mode, the default values for the accelerate configuration screen will be "This machine," "None," and "No" for the remaining questions. These default answers are the same as the Windows installation. + +### Runpod + +#### Manual installation + +To install the necessary components for Runpod and run kohya_ss, follow these steps: + +1. Select the Runpod pytorch 2.0.1 template. This is important. Other templates may not work. + +2. SSH into the Runpod. + +3. Clone the repository by running the following command: + + ```shell + cd /workspace + git clone --recursive https://github.com/bmaltais/kohya_ss.git + ``` + +4. Run the setup script: + + ```shell + cd kohya_ss + ./setup-runpod.sh + ``` + +5. Run the GUI with: + + ```shell + ./gui.sh --share --headless + ``` + + or with this if you expose 7860 directly via the runpod configuration: + + ```shell + ./gui.sh --listen=0.0.0.0 --headless + ``` + +6. Connect to the public URL displayed after the installation process is completed. + +#### Pre-built Runpod template + +To run from a pre-built Runpod template, you can: + +1. Open the Runpod template by clicking on . + +2. Deploy the template on the desired host. + +3. Once deployed, connect to the Runpod on HTTP 3010 to access the kohya_ss GUI. You can also connect to auto1111 on HTTP 3000. + +### Docker + +#### Get your Docker ready for GPU support + +##### Windows + +Once you have installed [**Docker Desktop**](https://www.docker.com/products/docker-desktop/), [**CUDA Toolkit**](https://developer.nvidia.com/cuda-downloads), [**NVIDIA Windows Driver**](https://www.nvidia.com.tw/Download/index.aspx), and ensured that your Docker is running with [**WSL2**](https://docs.docker.com/desktop/wsl/#turn-on-docker-desktop-wsl-2), you are ready to go. + +Here is the official documentation for further reference. + + + +##### Linux, OSX + +Install an NVIDIA GPU Driver if you do not already have one installed. + + +Install the NVIDIA Container Toolkit with this guide. + + +#### Design of our Dockerfile + +- It is required that all training data is stored in the `dataset` subdirectory, which is mounted into the container at `/dataset`. +- Please note that the file picker functionality is not available. Instead, you will need to manually input the folder path and configuration file path. +- TensorBoard has been separated from the project. + - TensorBoard is not included in the Docker image. + - The "Start TensorBoard" button has been hidden. + - TensorBoard is launched from a distinct container [as shown here](/docker-compose.yaml#L41). +- The browser won't be launched automatically. You will need to manually open the browser and navigate to [http://localhost:7860/](http://localhost:7860/) and [http://localhost:6006/](http://localhost:6006/) +- This Dockerfile has been designed to be easily disposable. You can discard the container at any time and restart it with the new code version. + +#### Use the pre-built Docker image + +```bash +git clone --recursive https://github.com/bmaltais/kohya_ss.git +cd kohya_ss +docker compose up -d +``` + +To update the system, do `docker compose down && docker compose up -d --pull always` + +#### Local docker build + +> [!IMPORTANT] +> Clone the Git repository ***recursively*** to include submodules: +> `git clone --recursive https://github.com/bmaltais/kohya_ss.git` + +```bash +git clone --recursive https://github.com/bmaltais/kohya_ss.git +cd kohya_ss +docker compose up -d --build +``` + +> [!NOTE] +> Building the image may take up to 20 minutes to complete. + +To update the system, ***checkout to the new code version*** and rebuild using `docker compose down && docker compose up -d --build --pull always` + +> If you are running on Linux, an alternative Docker container port with fewer limitations is available [here](https://github.com/P2Enjoy/kohya_ss-docker). + +#### ashleykleynhans runpod docker builds + +You may want to use the following repositories when running on runpod: + +- Standalone Kohya_ss template: +- Auto1111 + Kohya_ss GUI template: + +## Upgrading + +To upgrade your installation to a new version, follow the instructions below. + +### Windows Upgrade + +If a new release becomes available, you can upgrade your repository by running the following commands from the root directory of the project: + +1. Pull the latest changes from the repository: + + ```powershell + git pull + ``` + +2. Run the setup script: + + ```powershell + .\setup.bat + ``` + +### Linux and macOS Upgrade + +To upgrade your installation on Linux or macOS, follow these steps: + +1. Open a terminal and navigate to the root directory of the project. + +2. Pull the latest changes from the repository: + + ```bash + git pull + ``` + +3. Refresh and update everything: + + ```bash + ./setup.sh + ``` + +## Starting GUI Service + +To launch the GUI service, you can use the provided scripts or run the `kohya_gui.py` script directly. Use the command line arguments listed below to configure the underlying service. + +```text +--listen: Specify the IP address to listen on for connections to Gradio. +--username: Set a username for authentication. +--password: Set a password for authentication. +--server_port: Define the port to run the server listener on. +--inbrowser: Open the Gradio UI in a web browser. +--share: Share the Gradio UI. +--language: Set custom language +``` + +### Launching the GUI on Windows + +On Windows, you can use either the `gui.ps1` or `gui.bat` script located in the root directory. Choose the script that suits your preference and run it in a terminal, providing the desired command line arguments. Here's an example: + +```powershell +gui.ps1 --listen 127.0.0.1 --server_port 7860 --inbrowser --share +``` + +or + +```powershell +gui.bat --listen 127.0.0.1 --server_port 7860 --inbrowser --share +``` + +### Launching the GUI on Linux and macOS + +To launch the GUI on Linux or macOS, run the `gui.sh` script located in the root directory. Provide the desired command line arguments as follows: + +```bash +gui.sh --listen 127.0.0.1 --server_port 7860 --inbrowser --share +``` + +## Custom Path Defaults + +The repository now provides a default configuration file named `config.toml`. This file is a template that you can customize to suit your needs. + +To use the default configuration file, follow these steps: + +1. Copy the `config example.toml` file from the root directory of the repository to `config.toml`. +2. Open the `config.toml` file in a text editor. +3. Modify the paths and settings as per your requirements. + +This approach allows you to easily adjust the configuration to suit your specific needs to open the desired default folders for each type of folder/file input supported in the GUI. + +You can specify the path to your config.toml (or any other name you like) when running the GUI. For instance: ./gui.bat --config c:\my_config.toml + +## LoRA + +To train a LoRA, you can currently use the `train_network.py` code. You can create a LoRA network by using the all-in-one GUI. + +Once you have created the LoRA network, you can generate images using auto1111 by installing [this extension](https://github.com/kohya-ss/sd-webui-additional-networks). + +## Sample image generation during training + +A prompt file might look like this, for example: + +```txt +# prompt 1 +masterpiece, best quality, (1girl), in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy, bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n (low quality, worst quality), bad anatomy, bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + +Lines beginning with `#` are comments. You can specify options for the generated image with options like `--n` after the prompt. The following options can be used: + +- `--n`: Negative prompt up to the next option. +- `--w`: Specifies the width of the generated image. +- `--h`: Specifies the height of the generated image. +- `--d`: Specifies the seed of the generated image. +- `--l`: Specifies the CFG scale of the generated image. +- `--s`: Specifies the number of steps in the generation. + +The prompt weighting such as `( )` and `[ ]` is working. + +## Troubleshooting + +If you encounter any issues, refer to the troubleshooting steps below. + +### Page File Limit + +If you encounter an X error related to the page file, you may need to increase the page file size limit in Windows. + +### No module called tkinter + +If you encounter an error indicating that the module `tkinter` is not found, try reinstalling Python 3.10 on your system. + +### LORA Training on TESLA V100 - GPU Utilization Issue + +#### Issue Summary + +When training LORA on a TESLA V100, users reported low GPU utilization. Additionally, there was difficulty in specifying GPUs other than the default for training. + +#### Potential Solutions + +- **GPU Selection:** Users can specify GPU IDs in the setup configuration to select the desired GPUs for training. +- **Improving GPU Load:** Utilizing `adamW8bit` optimizer and increasing the batch size can help achieve 70-80% GPU utilization without exceeding GPU memory limits. + +## SDXL training + +The documentation in this section will be moved to a separate document later. + +## Masked loss + +The masked loss is supported in each training script. To enable the masked loss, specify the `--masked_loss` option. + +The feature is not fully tested, so there may be bugs. If you find any issues, please open an Issue. + +ControlNet dataset is used to specify the mask. The mask images should be the RGB images. The pixel value 255 in R channel is treated as the mask (the loss is calculated only for the pixels with the mask), and 0 is treated as the non-mask. The pixel values 0-255 are converted to 0-1 (i.e., the pixel value 128 is treated as the half weight of the loss). See details for the dataset specification in the [LLLite documentation](./docs/train_lllite_README.md#preparing-the-dataset). + +## Change History + +See release information. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..eaac3815229944c7c8bd40acda1564a81a3c288e --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ +# Security Policy + +## Supported Versions + +Versions that are currently being supported with security updates. + +| Version | Supported | +| ------- | ------------------ | +| 23.2.x | :white_check_mark: | +| < 23.1.x | :x: | + +## Reporting a Vulnerability + +Please open an issue if you discover a security issue. diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 0000000000000000000000000000000000000000..1262f74a497f358f942b24785ddbd58369f8bd11 --- /dev/null +++ b/_typos.toml @@ -0,0 +1,34 @@ +# Files for typos +# Instruction: https://github.com/marketplace/actions/typos-action#getting-started + +[default.extend-identifiers] + +[default.extend-words] +NIN="NIN" +parms="parms" +nin="nin" +extention="extention" # Intentionally left +nd="nd" +shs="shs" +sts="sts" +scs="scs" +cpc="cpc" +coc="coc" +cic="cic" +msm="msm" +usu="usu" +ici="ici" +lvl="lvl" +dii="dii" +muk="muk" +ori="ori" +hru="hru" +rik="rik" +koo="koo" +yos="yos" +wn="wn" +parm = "parm" + + +[files] +extend-exclude = ["_typos.toml", "venv"] diff --git a/assets/js/localization.js b/assets/js/localization.js new file mode 100644 index 0000000000000000000000000000000000000000..1d15e16da7ef0f5ed59a953ea7c1a85e086db017 --- /dev/null +++ b/assets/js/localization.js @@ -0,0 +1,103 @@ +var re_num = /^[.\d]+$/; +var re_emoji = /[\p{Extended_Pictographic}\u{1F3FB}-\u{1F3FF}\u{1F9B0}-\u{1F9B3}]/u; + +var original_lines = {}; +var translated_lines = {}; + +function hasLocalization() { + return window.localization && Object.keys(window.localization).length > 0; +} + +function textNodesUnder(el) { + var n, a = [], walk = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null, false); + while ((n = walk.nextNode())) a.push(n); + return a; +} + +function canBeTranslated(node, text) { + if (!text) return false; + if (!node.parentElement) return false; + + var parentType = node.parentElement.nodeName; + if (parentType == 'SCRIPT' || parentType == 'STYLE' || parentType == 'TEXTAREA') return false; + + if (parentType == 'OPTION' || parentType == 'SPAN') { + var pnode = node; + for (var level = 0; level < 4; level++) { + pnode = pnode.parentElement; + if (!pnode) break; + } + } + + if (re_num.test(text)) return false; + if (re_emoji.test(text)) return false; + return true; +} + +function getTranslation(text) { + if (!text) return undefined; + + if (translated_lines[text] === undefined) { + original_lines[text] = 1; + } + + var tl = localization[text]; + if (tl !== undefined) { + translated_lines[tl] = 1; + } + + return tl; +} + +function processTextNode(node) { + var text = node.textContent.trim(); + + if (!canBeTranslated(node, text)) return; + + var tl = getTranslation(text); + if (tl !== undefined) { + node.textContent = tl; + } +} + +function processNode(node) { + console.log(node.nodeType + " " + node.nodeName + " " + node.nodeValue) + if (node.nodeType == 3) { + processTextNode(node); + return; + } + + if (node.title) { + let tl = getTranslation(node.title); + if (tl !== undefined) { + node.title = tl; + } + } + + if (node.placeholder) { + let tl = getTranslation(node.placeholder); + if (tl !== undefined) { + node.placeholder = tl; + } + } + + textNodesUnder(node).forEach(function(node) { + processTextNode(node); + }); +} + +document.addEventListener("DOMContentLoaded", function() { + if (!hasLocalization()) { + return; + } + + onUiUpdate(function(m) { + m.forEach(function(mutation) { + mutation.addedNodes.forEach(function(node) { + processNode(node); + }); + }); + }); + + processNode(gradioApp()); +}); \ No newline at end of file diff --git a/assets/js/script.js b/assets/js/script.js new file mode 100644 index 0000000000000000000000000000000000000000..4e4ccc2264764ac5cc85bd0b2c8f4396964913aa --- /dev/null +++ b/assets/js/script.js @@ -0,0 +1,104 @@ +function gradioApp() { + const elems = document.getElementsByTagName('gradio-app'); + const elem = elems.length == 0 ? document : elems[0]; + + if (elem !== document) { + elem.getElementById = function(id) { + return document.getElementById(id); + }; + } + return elem.shadowRoot ? elem.shadowRoot : elem; +} + +/** + * Get the currently selected top-level UI tab button (e.g. the button that says "Extras"). + */ +function get_uiCurrentTab() { + return gradioApp().querySelector('#tabs > .tab-nav > button.selected'); +} + +/** + * Get the first currently visible top-level UI tab content (e.g. the div hosting the "txt2img" UI). + */ +function get_uiCurrentTabContent() { + return gradioApp().querySelector('#tabs > .tabitem[id^=tab_]:not([style*="display: none"])'); +} + +var uiUpdateCallbacks = []; +var uiAfterUpdateCallbacks = []; +var uiLoadedCallbacks = []; +var uiTabChangeCallbacks = []; +var uiAfterUpdateTimeout = null; +var uiCurrentTab = null; + +/** + * Register callback to be called at each UI update. + * The callback receives an array of MutationRecords as an argument. + */ +function onUiUpdate(callback) { + uiUpdateCallbacks.push(callback); +} + + + +function executeCallbacks(queue, arg) { + for (const callback of queue) { + try { + callback(arg); + } catch (e) { + console.error("error running callback", callback, ":", e); + } + } +} + +/** + * Schedule the execution of the callbacks registered with onAfterUiUpdate. + * The callbacks are executed after a short while, unless another call to this function + * is made before that time. IOW, the callbacks are executed only once, even + * when there are multiple mutations observed. + */ +function scheduleAfterUiUpdateCallbacks() { + clearTimeout(uiAfterUpdateTimeout); + uiAfterUpdateTimeout = setTimeout(function() { + executeCallbacks(uiAfterUpdateCallbacks); + }, 200); +} + +var executedOnLoaded = false; + +document.addEventListener("DOMContentLoaded", function() { + var mutationObserver = new MutationObserver(function(m) { + if (!executedOnLoaded && gradioApp().querySelector('#txt2img_prompt')) { + executedOnLoaded = true; + executeCallbacks(uiLoadedCallbacks); + } + + executeCallbacks(uiUpdateCallbacks, m); + scheduleAfterUiUpdateCallbacks(); + const newTab = get_uiCurrentTab(); + if (newTab && (newTab !== uiCurrentTab)) { + uiCurrentTab = newTab; + executeCallbacks(uiTabChangeCallbacks); + } + }); + mutationObserver.observe(gradioApp(), {childList: true, subtree: true}); +}); + +/** + * Add a ctrl+enter as a shortcut to start a generation + */ +document.addEventListener('keydown', function(e) { + var handled = false; + if (e.key !== undefined) { + if ((e.key == "Enter" && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; + } else if (e.keyCode !== undefined) { + if ((e.keyCode == 13 && (e.metaKey || e.ctrlKey || e.altKey))) handled = true; + } + if (handled) { + var button = get_uiCurrentTabContent().querySelector('button[id$=_generate]'); + if (button) { + button.click(); + } + e.preventDefault(); + } +}); \ No newline at end of file diff --git a/assets/style.css b/assets/style.css new file mode 100644 index 0000000000000000000000000000000000000000..d98c7575f1af5aa291a395e19690a6a91e04409a --- /dev/null +++ b/assets/style.css @@ -0,0 +1,47 @@ +#open_folder_small{ + min-width: auto; + flex-grow: 0; + padding-left: 0.25em; + padding-right: 0.25em; + padding: 0.5em; + font-size: 1.5em; +} + +#open_folder{ + height: auto; + flex-grow: 0; + padding-left: 0.25em; + padding-right: 0.25em; +} + +#number_input{ + min-width: min-content; + flex-grow: 0.3; + padding-left: 0.75em; + padding-right: 0.75em; +} + +.ver-class { + color: #808080; + font-size: small; + text-align: right; + padding-right: 1em; +} + +#myDropdown { + height: auto; + width: 33%; + flex-grow: 0; +} + +#myTensorButton { + background: radial-gradient(ellipse, #3a99ff, #52c8ff); + color: white; + border: #296eb8; +} + +#myTensorButtonStop { + background: radial-gradient(ellipse, #52c8ff, #3a99ff); + color: black; + border: #296eb8; +} \ No newline at end of file diff --git a/config example.toml b/config example.toml new file mode 100644 index 0000000000000000000000000000000000000000..30855d5c9a37814ef46a7de0e5b2786d8c0336e2 --- /dev/null +++ b/config example.toml @@ -0,0 +1,185 @@ +# Copy this file and name it config.toml +# Edit the values to suit your needs + +[settings] +use_shell = false # Use shell furing process run of sd-scripts oython code. Most secure is false but some systems may require it to be true to properly run sd-scripts. + +# Default folders location +[model] +models_dir = "./models" # Pretrained model name or path +output_name = "new model" # Trained model output name +train_data_dir = "./data" # Image folder (containing training images subfolders) / Image folder (containing training images) +dataset_config = "./test.toml" # Dataset config file (Optional. Select the toml configuration file to use for the dataset) +training_comment = "Some training comment" # Training comment +save_model_as = "safetensors" # Save model as (ckpt, safetensors, diffusers, diffusers_safetensors) +save_precision = "bf16" # Save model precision (fp16, bf16, float) + +[folders] +output_dir = "./outputs" # Output directory for trained model +reg_data_dir = "./data/reg" # Regularisation directory +logging_dir = "./logs" # Logging directory + +[configuration] +config_dir = "./presets" # Load/Save Config file + +[accelerate_launch] +dynamo_backend = "no" # Dynamo backend +dynamo_mode = "default" # Dynamo mode +dynamo_use_dynamic = false # Dynamo use dynamic +dynamo_use_fullgraph = false # Dynamo use fullgraph +extra_accelerate_launch_args = "" # Extra accelerate launch args +gpu_ids = "" # GPU IDs +main_process_port = 0 # Main process port +mixed_precision = "fp16" # Mixed precision (fp16, bf16, fp8) +multi_gpu = false # Multi GPU +num_cpu_threads_per_process = 2 # Number of CPU threads per process +num_machines = 1 # Number of machines +num_processes = 1 # Number of processes + +[basic] +cache_latents = true # Cache latents +cache_latents_to_disk = false # Cache latents to disk +caption_extension = ".txt" # Caption extension +enable_bucket = true # Enable bucket +epoch = 1 # Epoch +learning_rate = 0.0001 # Learning rate +learning_rate_te = 0.0001 # Learning rate text encoder +learning_rate_te1 = 0.0001 # Learning rate text encoder 1 +learning_rate_te2 = 0.0001 # Learning rate text encoder 2 +lr_scheduler = "cosine" # LR Scheduler +lr_scheduler_args = "" # LR Scheduler args +lr_warmup = 0 # LR Warmup (% of total steps) +lr_scheduler_num_cycles = 1 # LR Scheduler num cycles +lr_scheduler_power = 1.0 # LR Scheduler power +max_bucket_reso = 2048 # Max bucket resolution +max_grad_norm = 1.0 # Max grad norm +max_resolution = "512,512" # Max resolution +max_train_steps = 0 # Max train steps +max_train_epochs = 0 # Max train epochs +min_bucket_reso = 256 # Min bucket resolution +optimizer = "AdamW8bit" # Optimizer (AdamW, AdamW8bit, Adafactor, DAdaptation, DAdaptAdaGrad, DAdaptAdam, DAdaptAdan, DAdaptAdanIP, DAdaptAdamPreprint, DAdaptLion, DAdaptSGD, Lion, Lion8bit, PagedAdam +optimizer_args = "" # Optimizer args +save_every_n_epochs = 1 # Save every n epochs +save_every_n_steps = 1 # Save every n steps +seed = 1234 # Seed +stop_text_encoder_training = 0 # Stop text encoder training (% of total steps) +train_batch_size = 1 # Train batch size + +[advanced] +adaptive_noise_scale = 0 # Adaptive noise scale +additional_parameters = "" # Additional parameters +bucket_no_upscale = true # Don't upscale bucket resolution +bucket_reso_steps = 64 # Bucket resolution steps +caption_dropout_every_n_epochs = 0 # Caption dropout every n epochs +caption_dropout_rate = 0 # Caption dropout rate +color_aug = false # Color augmentation +clip_skip = 1 # Clip skip +debiased_estimation_loss = false # Debiased estimation loss +flip_aug = false # Flip augmentation +fp8_base = false # FP8 base training (experimental) +full_bf16 = false # Full bf16 training (experimental) +full_fp16 = false # Full fp16 training (experimental) +gradient_accumulation_steps = 1 # Gradient accumulation steps +gradient_checkpointing = false # Gradient checkpointing +huber_c = 0.1 # The huber loss parameter. Only used if one of the huber loss modes (huber or smooth l1) is selected with loss_type +huber_schedule = "snr" # The type of loss to use and whether it's scheduled based on the timestep +ip_noise_gamma = 0 # IP noise gamma +ip_noise_gamma_random_strength = false # IP noise gamma random strength (true, false) +keep_tokens = 0 # Keep tokens +log_tracker_config_dir = "./logs" # Log tracker configs directory +log_tracker_name = "" # Log tracker name +loss_type = "l2" # Loss type (l2, huber, smooth_l1) +masked_loss = false # Masked loss +max_data_loader_n_workers = 0 # Max data loader n workers (string) +max_timestep = 1000 # Max timestep +max_token_length = 150 # Max token length ("75", "150", "225") +mem_eff_attn = false # Memory efficient attention +min_snr_gamma = 0 # Min SNR gamma +min_timestep = 0 # Min timestep +multires_noise_iterations = 0 # Multires noise iterations +multires_noise_discount = 0 # Multires noise discount +no_token_padding = false # Disable token padding +noise_offset = 0 # Noise offset +noise_offset_random_strength = false # Noise offset random strength (true, false) +noise_offset_type = "Original" # Noise offset type ("Original", "Multires") +persistent_data_loader_workers = false # Persistent data loader workers +prior_loss_weight = 1.0 # Prior loss weight +random_crop = false # Random crop +save_every_n_steps = 0 # Save every n steps +save_last_n_steps = 0 # Save last n steps +save_last_n_steps_state = 0 # Save last n steps state +save_state = false # Save state +save_state_on_train_end = false # Save state on train end +scale_v_pred_loss_like_noise_pred = false # Scale v pred loss like noise pred +shuffle_caption = false # Shuffle captions +state_dir = "./outputs" # Resume from saved training state +log_with = "" # Logger to use ["wandb", "tensorboard", "all", ""] +vae_batch_size = 0 # VAE batch size +vae_dir = "./models/vae" # VAEs folder path +v_pred_like_loss = 0 # V pred like loss weight +wandb_api_key = "" # Wandb api key +wandb_run_name = "" # Wandb run name +weighted_captions = false # Weighted captions +xformers = "xformers" # CrossAttention (none, sdp, xformers) + +# This next section can be used to set default values for the Dataset Preparation section +# The "Destination training direcroty" field will be equal to "train_data_dir" as specified above +[dataset_preparation] +class_prompt = "class" # Class prompt +images_folder = "/some/folder/where/images/are" # Training images directory +instance_prompt = "instance" # Instance prompt +reg_images_folder = "/some/folder/where/reg/images/are" # Regularisation images directory +reg_images_repeat = 1 # Regularisation images repeat +util_regularization_images_repeat_input = 1 # Regularisation images repeat input +util_training_images_repeat_input = 40 # Training images repeat input + +[huggingface] +async_upload = false # Async upload +huggingface_path_in_repo = "" # Huggingface path in repo +huggingface_repo_id = "" # Huggingface repo id +huggingface_repo_type = "" # Huggingface repo type +huggingface_repo_visibility = "" # Huggingface repo visibility +huggingface_token = "" # Huggingface token +resume_from_huggingface = "" # Resume from huggingface (ex: {repo_id}/{path_in_repo}:{revision}:{repo_type}) +save_state_to_huggingface = false # Save state to huggingface + +[samples] +sample_every_n_steps = 0 # Sample every n steps +sample_every_n_epochs = 0 # Sample every n epochs +sample_prompts = "" # Sample prompts +sample_sampler = "euler_a" # Sampler to use for image sampling + +[sdxl] +sdxl_cache_text_encoder_outputs = false # Cache text encoder outputs +sdxl_no_half_vae = true # No half VAE + +[wd14_caption] +always_first_tags = "" # comma-separated list of tags to always put at the beginning, e.g. 1girl,1boy +append_tags = false # Append TAGs +batch_size = 8 # Batch size +caption_extension = ".txt" # Extension for caption file (e.g., .caption, .txt) +caption_separator = ", " # Caption Separator +character_tag_expand = false # Expand tag tail parenthesis to another tag for character tags. `chara_name_(series)` becomes `chara_name, series` +character_threshold = 0.35 # Character threshold +debug = false # Debug mode +force_download = false # Force model re-download when switching to onnx +frequency_tags = false # Frequency tags +general_threshold = 0.35 # General threshold +max_data_loader_n_workers = 2 # Max dataloader workers +onnx = true # ONNX +recursive = false # Recursive +remove_underscore = false # Remove underscore +repo_id = "SmilingWolf/wd-convnext-tagger-v3" # Repo id for wd14 tagger on Hugging Face +tag_replacement = "" # Tag replacement in the format of `source1,target1;source2,target2; ...`. Escape `,` and `;` with `\`. e.g. `tag1,tag2;tag3,tag4` +thresh = 0.36 # Threshold +train_data_dir = "" # Image folder to caption (containing the images to caption) +undesired_tags = "" # comma-separated list of tags to remove, e.g. 1girl,1boy +use_rating_tags = false # Use rating tags +use_rating_tags_as_last_tag = false # Use rating tags as last tagging tags + +[metadata] +metadata_title = "" # Title for model metadata (default is output_name) +metadata_author = "" # Author name for model metadata +metadata_description = "" # Description for model metadata +metadata_license = "" # License for model metadata +metadata_tags = "" # Tags for model metadata diff --git a/config_files/accelerate/default_config.yaml b/config_files/accelerate/default_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1e62429f648cac986e5b549b38f3febe1fa5dbd --- /dev/null +++ b/config_files/accelerate/default_config.yaml @@ -0,0 +1,22 @@ +command_file: null +commands: null +compute_environment: LOCAL_MACHINE +deepspeed_config: {} +distributed_type: 'NO' +downcast_bf16: 'no' +dynamo_backend: 'NO' +fsdp_config: {} +gpu_ids: all +machine_rank: 0 +main_process_ip: null +main_process_port: null +main_training_function: main +megatron_lm_config: {} +mixed_precision: 'no' +num_machines: 1 +num_processes: 1 +rdzv_backend: static +same_network: true +tpu_name: null +tpu_zone: null +use_cpu: false \ No newline at end of file diff --git a/config_files/accelerate/runpod.yaml b/config_files/accelerate/runpod.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1e62429f648cac986e5b549b38f3febe1fa5dbd --- /dev/null +++ b/config_files/accelerate/runpod.yaml @@ -0,0 +1,22 @@ +command_file: null +commands: null +compute_environment: LOCAL_MACHINE +deepspeed_config: {} +distributed_type: 'NO' +downcast_bf16: 'no' +dynamo_backend: 'NO' +fsdp_config: {} +gpu_ids: all +machine_rank: 0 +main_process_ip: null +main_process_port: null +main_training_function: main +megatron_lm_config: {} +mixed_precision: 'no' +num_machines: 1 +num_processes: 1 +rdzv_backend: static +same_network: true +tpu_name: null +tpu_zone: null +use_cpu: false \ No newline at end of file diff --git a/dataset/images/.gitkeep b/dataset/images/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dataset/logs/.gitkeep b/dataset/logs/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dataset/outputs/.gitkeep b/dataset/outputs/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/dataset/regularization/.gitkeep b/dataset/regularization/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/deprecated/dreambooth_gui.py b/deprecated/dreambooth_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b98941c08fd1185739763e2a016fb654aa70c0 --- /dev/null +++ b/deprecated/dreambooth_gui.py @@ -0,0 +1,110 @@ +import argparse +import gradio as gr +import os + +from kohya_gui.dreambooth_gui import dreambooth_tab +from kohya_gui.utilities import utilities_tab + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + + +# Set up logging +log = setup_logging() + + +def UI(**kwargs): + add_javascript(kwargs.get("language")) + css = "" + + headless = kwargs.get("headless", False) + log.info(f"headless: {headless}") + + if os.path.exists("./assets/style.css"): + with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file: + log.info("Load CSS...") + css += file.read() + "\n" + + interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default()) + + with interface: + with gr.Tab("Dreambooth"): + ( + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ) = dreambooth_tab(headless=headless) + with gr.Tab("Utilities"): + utilities_tab( + train_data_dir_input=train_data_dir_input, + reg_data_dir_input=reg_data_dir_input, + output_dir_input=output_dir_input, + logging_dir_input=logging_dir_input, + enable_copy_info_button=True, + headless=headless, + ) + + # Show the interface + launch_kwargs = {} + username = kwargs.get("username") + password = kwargs.get("password") + server_port = kwargs.get("server_port", 0) + inbrowser = kwargs.get("inbrowser", False) + share = kwargs.get("share", False) + server_name = kwargs.get("listen") + + launch_kwargs["server_name"] = server_name + if username and password: + launch_kwargs["auth"] = (username, password) + if server_port > 0: + launch_kwargs["server_port"] = server_port + if inbrowser: + launch_kwargs["inbrowser"] = inbrowser + if share: + launch_kwargs["share"] = share + interface.launch(**launch_kwargs) + + +if __name__ == "__main__": + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + "--listen", + type=str, + default="127.0.0.1", + help="IP to listen on for connections to Gradio", + ) + parser.add_argument( + "--username", type=str, default="", help="Username for authentication" + ) + parser.add_argument( + "--password", type=str, default="", help="Password for authentication" + ) + parser.add_argument( + "--server_port", + type=int, + default=0, + help="Port to run the server listener on", + ) + parser.add_argument("--inbrowser", action="store_true", help="Open in browser") + parser.add_argument("--share", action="store_true", help="Share the gradio UI") + parser.add_argument( + "--headless", action="store_true", help="Is the server headless" + ) + parser.add_argument( + "--language", type=str, default=None, help="Set custom language" + ) + + args = parser.parse_args() + + UI( + username=args.username, + password=args.password, + inbrowser=args.inbrowser, + server_port=args.server_port, + share=args.share, + listen=args.listen, + headless=args.headless, + language=args.language, + ) diff --git a/deprecated/finetune_gui.py b/deprecated/finetune_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..0ff52bee7a64d22524eb23c6e1a8273cbe5e1bb9 --- /dev/null +++ b/deprecated/finetune_gui.py @@ -0,0 +1,97 @@ +import argparse +import gradio as gr +import os + +from kohya_gui.utilities import utilities_tab +from kohya_gui.finetune_gui import finetune_tab + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + +# Set up logging +log = setup_logging() + + +def UI(**kwargs): + add_javascript(kwargs.get("language")) + css = "" + + headless = kwargs.get("headless", False) + log.info(f"headless: {headless}") + + if os.path.exists("./assets/style.css"): + with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file: + log.info("Load CSS...") + css += file.read() + "\n" + + interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default()) + + with interface: + with gr.Tab("Finetune"): + finetune_tab(headless=headless) + with gr.Tab("Utilities"): + utilities_tab(enable_dreambooth_tab=False, headless=headless) + + # Show the interface + launch_kwargs = {} + username = kwargs.get("username") + password = kwargs.get("password") + server_port = kwargs.get("server_port", 0) + inbrowser = kwargs.get("inbrowser", False) + share = kwargs.get("share", False) + server_name = kwargs.get("listen") + + launch_kwargs["server_name"] = server_name + if username and password: + launch_kwargs["auth"] = (username, password) + if server_port > 0: + launch_kwargs["server_port"] = server_port + if inbrowser: + launch_kwargs["inbrowser"] = inbrowser + if share: + launch_kwargs["share"] = share + interface.launch(**launch_kwargs) + + +if __name__ == "__main__": + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + "--listen", + type=str, + default="127.0.0.1", + help="IP to listen on for connections to Gradio", + ) + parser.add_argument( + "--username", type=str, default="", help="Username for authentication" + ) + parser.add_argument( + "--password", type=str, default="", help="Password for authentication" + ) + parser.add_argument( + "--server_port", + type=int, + default=0, + help="Port to run the server listener on", + ) + parser.add_argument("--inbrowser", action="store_true", help="Open in browser") + parser.add_argument("--share", action="store_true", help="Share the gradio UI") + parser.add_argument( + "--headless", action="store_true", help="Is the server headless" + ) + parser.add_argument( + "--language", type=str, default=None, help="Set custom language" + ) + + args = parser.parse_args() + + UI( + username=args.username, + password=args.password, + inbrowser=args.inbrowser, + server_port=args.server_port, + share=args.share, + listen=args.listen, + headless=args.headless, + language=args.language, + ) diff --git a/deprecated/lora_gui.py b/deprecated/lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..cd251280afabaa889aadf1b0a28b1c897e574a28 --- /dev/null +++ b/deprecated/lora_gui.py @@ -0,0 +1,118 @@ +import argparse +import gradio as gr +import os + +from kohya_gui.utilities import utilities_tab +from kohya_gui.lora_gui import lora_tab + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + +# Set up logging +log = setup_logging() + + +def UI(**kwargs): + try: + # Your main code goes here + while True: + add_javascript(kwargs.get("language")) + css = "" + + headless = kwargs.get("headless", False) + log.info(f"headless: {headless}") + + if os.path.exists("./assets/style.css"): + with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file: + log.info("Load CSS...") + css += file.read() + "\n" + + interface = gr.Blocks( + css=css, title="Kohya_ss GUI", theme=gr.themes.Default() + ) + + with interface: + with gr.Tab("LoRA"): + ( + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ) = lora_tab(headless=headless) + with gr.Tab("Utilities"): + utilities_tab( + train_data_dir_input=train_data_dir_input, + reg_data_dir_input=reg_data_dir_input, + output_dir_input=output_dir_input, + logging_dir_input=logging_dir_input, + enable_copy_info_button=True, + headless=headless, + ) + + # Show the interface + launch_kwargs = {} + username = kwargs.get("username") + password = kwargs.get("password") + server_port = kwargs.get("server_port", 0) + inbrowser = kwargs.get("inbrowser", False) + share = kwargs.get("share", False) + server_name = kwargs.get("listen") + + launch_kwargs["server_name"] = server_name + if username and password: + launch_kwargs["auth"] = (username, password) + if server_port > 0: + launch_kwargs["server_port"] = server_port + if inbrowser: + launch_kwargs["inbrowser"] = inbrowser + if share: + launch_kwargs["share"] = share + log.info(launch_kwargs) + interface.launch(**launch_kwargs) + except KeyboardInterrupt: + # Code to execute when Ctrl+C is pressed + print("You pressed Ctrl+C!") + + +if __name__ == "__main__": + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + "--listen", + type=str, + default="127.0.0.1", + help="IP to listen on for connections to Gradio", + ) + parser.add_argument( + "--username", type=str, default="", help="Username for authentication" + ) + parser.add_argument( + "--password", type=str, default="", help="Password for authentication" + ) + parser.add_argument( + "--server_port", + type=int, + default=0, + help="Port to run the server listener on", + ) + parser.add_argument("--inbrowser", action="store_true", help="Open in browser") + parser.add_argument("--share", action="store_true", help="Share the gradio UI") + parser.add_argument( + "--headless", action="store_true", help="Is the server headless" + ) + parser.add_argument( + "--language", type=str, default=None, help="Set custom language" + ) + + args = parser.parse_args() + + UI( + username=args.username, + password=args.password, + inbrowser=args.inbrowser, + server_port=args.server_port, + share=args.share, + listen=args.listen, + headless=args.headless, + language=args.language, + ) diff --git a/deprecated/textual_inversion_gui.py b/deprecated/textual_inversion_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..d54a795bafe272174d1f705621e40e0fb290e588 --- /dev/null +++ b/deprecated/textual_inversion_gui.py @@ -0,0 +1,110 @@ +import argparse +import gradio as gr +import os + +from kohya_gui.textual_inversion_gui import ti_tab +from kohya_gui.utilities import utilities_tab + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + + +# Set up logging +log = setup_logging() + + +def UI(**kwargs): + add_javascript(kwargs.get("language")) + css = "" + + headless = kwargs.get("headless", False) + log.info(f"headless: {headless}") + + if os.path.exists("./assets/style.css"): + with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file: + log.info("Load CSS...") + css += file.read() + "\n" + + interface = gr.Blocks(css=css, title="Kohya_ss GUI", theme=gr.themes.Default()) + + with interface: + with gr.Tab("Dreambooth TI"): + ( + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ) = ti_tab(headless=headless) + with gr.Tab("Utilities"): + utilities_tab( + train_data_dir_input=train_data_dir_input, + reg_data_dir_input=reg_data_dir_input, + output_dir_input=output_dir_input, + logging_dir_input=logging_dir_input, + enable_copy_info_button=True, + headless=headless, + ) + + # Show the interface + launch_kwargs = {} + username = kwargs.get("username") + password = kwargs.get("password") + server_port = kwargs.get("server_port", 0) + inbrowser = kwargs.get("inbrowser", False) + share = kwargs.get("share", False) + server_name = kwargs.get("listen") + + launch_kwargs["server_name"] = server_name + if username and password: + launch_kwargs["auth"] = (username, password) + if server_port > 0: + launch_kwargs["server_port"] = server_port + if inbrowser: + launch_kwargs["inbrowser"] = inbrowser + if share: + launch_kwargs["share"] = share + interface.launch(**launch_kwargs) + + +if __name__ == "__main__": + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + "--listen", + type=str, + default="127.0.0.1", + help="IP to listen on for connections to Gradio", + ) + parser.add_argument( + "--username", type=str, default="", help="Username for authentication" + ) + parser.add_argument( + "--password", type=str, default="", help="Password for authentication" + ) + parser.add_argument( + "--server_port", + type=int, + default=0, + help="Port to run the server listener on", + ) + parser.add_argument("--inbrowser", action="store_true", help="Open in browser") + parser.add_argument("--share", action="store_true", help="Share the gradio UI") + parser.add_argument( + "--headless", action="store_true", help="Is the server headless" + ) + parser.add_argument( + "--language", type=str, default=None, help="Set custom language" + ) + + args = parser.parse_args() + + UI( + username=args.username, + password=args.password, + inbrowser=args.inbrowser, + server_port=args.server_port, + share=args.share, + listen=args.listen, + headless=args.headless, + language=args.language, + ) diff --git a/deprecated/utilities_gui.py b/deprecated/utilities_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..19f1a46a01e3ba45ec560d18b370cb5ee7042694 --- /dev/null +++ b/deprecated/utilities_gui.py @@ -0,0 +1,69 @@ +import argparse +import gradio as gr +import os + +from kohya_gui.utilities import utilities_tab + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + + +# Set up logging +log = setup_logging() + + +def UI(**kwargs): + css = '' + + if os.path.exists('./assets/style.css'): + with open(os.path.join('./assets/style.css'), 'r', encoding='utf8') as file: + print('Load CSS...') + css += file.read() + '\n' + + interface = gr.Blocks(css=css) + + with interface: + utilities_tab() + + # Show the interface + launch_kwargs = {} + if not kwargs.get('username', None) == '': + launch_kwargs['auth'] = ( + kwargs.get('username', None), + kwargs.get('password', None), + ) + if kwargs.get('server_port', 0) > 0: + launch_kwargs['server_port'] = kwargs.get('server_port', 0) + if kwargs.get('inbrowser', False): + launch_kwargs['inbrowser'] = kwargs.get('inbrowser', False) + print(launch_kwargs) + interface.launch(**launch_kwargs) + + +if __name__ == '__main__': + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + '--username', type=str, default='', help='Username for authentication' + ) + parser.add_argument( + '--password', type=str, default='', help='Password for authentication' + ) + parser.add_argument( + '--server_port', + type=int, + default=0, + help='Port to run the server listener on', + ) + parser.add_argument( + '--inbrowser', action='store_true', help='Open in browser' + ) + + args = parser.parse_args() + + UI( + username=args.username, + password=args.password, + inbrowser=args.inbrowser, + server_port=args.server_port, + ) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4932bcee2a5cbf0b19c0e42eb63a1490c611c920 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,57 @@ +services: + kohya-ss-gui: + container_name: kohya-ss-gui + image: ghcr.io/bmaltais/kohya-ss-gui:latest + user: 1000:0 + build: + context: . + args: + - UID=1000 + cache_from: + - ghcr.io/bmaltais/kohya-ss-gui:cache + cache_to: + - type=inline + ports: + - 7860:7860 + environment: + SAFETENSORS_FAST_GPU: 1 + TENSORBOARD_PORT: ${TENSORBOARD_PORT:-6006} + tmpfs: + - /tmp + volumes: + - /tmp/.X11-unix:/tmp/.X11-unix + - ./dataset:/dataset + - ./dataset/images:/app/data + - ./dataset/logs:/app/logs + - ./dataset/outputs:/app/outputs + - ./dataset/regularization:/app/regularization + - ./.cache/config:/app/config + - ./.cache/user:/home/1000/.cache + - ./.cache/triton:/home/1000/.triton + - ./.cache/nv:/home/1000/.nv + - ./.cache/keras:/home/1000/.keras + - ./.cache/config:/home/1000/.config # For backward compatibility + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + device_ids: ["all"] + + tensorboard: + container_name: tensorboard + image: tensorflow/tensorflow:latest-gpu + ports: + # !Please change the port in .env file + - ${TENSORBOARD_PORT:-6006}:6006 + volumes: + - ./dataset/logs:/app/logs + command: tensorboard --logdir=/app/logs --bind_all + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + device_ids: ["all"] diff --git a/docs/Finetuning/top_level.md b/docs/Finetuning/top_level.md new file mode 100644 index 0000000000000000000000000000000000000000..b9464612d89533e188332b5ab26760bfcb09863c --- /dev/null +++ b/docs/Finetuning/top_level.md @@ -0,0 +1,28 @@ +# Finetuning Resource Guide + +This guide is a resource compilation to facilitate the development of robust LoRA models. + +-Need to add resources here + +## Guidelines for SDXL Finetuning + +- Set the `Max resolution` to at least 1024x1024, as this is the standard resolution for SDXL. +- The fine-tuning can be done with 24GB GPU memory with the batch size of 1. + - Train U-Net only. + - Use gradient checkpointing. + - Use `--cache_text_encoder_outputs` option and caching latents. + - Use Adafactor optimizer. RMSprop 8bit or Adagrad 8bit may work. AdamW 8bit doesn't seem to work. +- PyTorch 2 seems to use slightly less GPU memory than PyTorch 1. + +Example of the optimizer settings for Adafactor with the fixed learning rate: +``` +optimizer_type = "adafactor" +optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False" ] +lr_scheduler = "constant_with_warmup" +lr_warmup_steps = 100 +learning_rate = 4e-7 # SDXL original learning rate +``` + +## Resource Contributions + +If you have valuable resources to add, kindly create a PR on Github. \ No newline at end of file diff --git a/docs/LoRA/options.md b/docs/LoRA/options.md new file mode 100644 index 0000000000000000000000000000000000000000..89b06c0d2058cce21be396c7cff39092c5ca06bd --- /dev/null +++ b/docs/LoRA/options.md @@ -0,0 +1,752 @@ +# Explaining LoRA Learning Settings Using Kohya_ss for Stable Diffusion Understanding by Anyone + +To understand the meaning of each setting in kohya_ss, it is necessary to know how LoRA performs additional learning. + +We will also explain what the "model," which is the target of additional learning, is. + +## What is a "model" + +Stable Diffusion loads and uses modules called " models ". A model is, so to speak, a "brain", and its true identity is " weight information of a neural network ". + +A neural network is made up of many " neurons ", and the clusters of neurons form many layers of " layers ". Neurons in one layer are connected to neurons in another layer by lines, and the strength of the connection is " weight ". It is this "weight" that holds a huge amount of picture information. + +### LoRA adds a small neural net + +LoRA is a kind of "additional learning", but additional learning is to upgrade the neural network. + +An additional learning method called "DreamBooth" uses this method. + +With this method, if you want to publish the additional training data, you need to distribute the whole model that has been updated with additional training. + +Models are typically 2G to 5G bytes in size, making them difficult to distribute. + +In contrast, LoRA learning leaves the model alone and creates a new “small neural net ” for each position you want to learn. Additional training is done on this small neural net . + +When you want to distribute LoRA, you only need to distribute this small neural network , so the data size is small. + +### Structure of a small neural net + +LoRA's small neural net consists of three layers. The number of neurons in the "input layer" on the left and the "output layer" on the right is the same as the number of neurons in the "input layer" and "output layer" of the target neural network . The number of neurons in the middle layer (middle layer) is called the "rank number" (or the number of dimensions), and this number can be freely determined when learning. + +### LoRA Learning Target 1: U-Net + +U-Net is divided into "Down" (left half), "Mid" (bottom) and "Up" (right half). + +And it consists of 25 blocks in total: Down12 block, Mid1 block, and Up12 block. The neural net added here is simply called "UNet" in Kohya_ss. + +### LoRA Learning Object 2: Text Encoder + +This isn't the only time LoRA adds neural nets . + +The block called "Cross Attention" in the figure above receives text information from a module called "Text Encoder ". This "text encoder " has the role of converting the prompt, which is text data, into a string of numbers (vector). + +There is only one text encoder , which is shared by all Attention Blocks in U-Net. This text encoder is originally treated as a "finished product" within Stable Diffusion and is not subject to model learning, but it is also subject to additional learning by LoRA. + +The LoRA updated text encoder is used in all Attention blocks, so any neural nets added here will have a huge impact on the final image. + +The neural network added here is called "Text Encoder" in Kohya_ss. + +## Basic training parameters + +### LoRA type + +Specifies the type of LoRA learning. The LoRA explained above is the "standard" type. "DyLoRA" learns multiple ranks below the specified rank at the same time, so it is convenient when you want to select the optimum rank. LoHa is highly efficient LoRA, and LoCon extends learning to U-Net's Res block. + +There is no problem with the Standard type at first. If you are having trouble learning, try another type. + +### LoRA network weights + +If you want to use the already learned LoRA file for additional learning, specify the LoRA file here. + +The LoRA specified here will be read at the start of learning, and learning will start from this LoRA state. LoRA after learning is saved as another file, so the LoRA file specified here will not be overwritten. + +### DIM from weights + +This is an option only when doing additional training with LoRA network weights. + +As shown in the figure above, LoRA adds a small neural network , but the number of neurons (number of ranks) in the middle layer can be freely set with Network Rank (described later). + +However, turning this option on will set the number of ranks of the created LoRA to the same number of ranks as the LoRA specified in LoRA network weights. When this is turned on, the specification of Network Rank is ignored. + +For example, when the number of LoRA ranks used for additional learning is 32, the number of LoRA ranks to be created will also be set to 32. + +Default is off. + +### Train batch size + +Specify a batch size. A batch is "the number of images to read at once". A batch size of 2 will train two images at a time simultaneously. If multiple different pictures are learned at the same time, the tuning accuracy for each picture will drop, but since it will be learning that comprehensively captures the characteristics of multiple pictures, the final result may instead be better. + +(If you tune too much to a specific picture, it will become LoRA that is not applicable.) + +Since multiple pictures are learned at once, the higher the batch size, the shorter the learning time. However, the tuning accuracy decreases and the number of weight changes decreases, so there is a possibility that the learning may be insufficient in some cases. + +(There is also a report that when increasing the batch size, it is better to increase the learning rate (described later). For example, if the batch size is 2, the learning rate should be doubled.) + +Also, the higher the batch size, the more memory is consumed. Let's decide according to the size of VRAM of your PC. + +With 6GB of VRAM, a batch size of 2 would be barely possible. + +Default is 1. + +*Since all the images read at the same time for each batch must be the same size, if the sizes of the training images are different, the number of images that are processed simultaneously may be less than the number of batches specified here. + +### Epoch + +One epoch is "one set of learning". + +For example, let's say you want to learn by reading 50 images each 10 times. In this case, 1 epoch is 50x10 = 500 trainings. If it is 2 epochs, this will be repeated twice, so it will be 500x2 = 1000 times of learning. + +After training for the specified number of epochs, a LoRA file will be created and saved to the specified location. + +For LoRA, 2-3 epochs of learning is sufficient. + +### Save every N epochs + +You can save the progress as a LoRA file for each epoch number specified here. + +For example, if you specify 10 in "Epoch" and specify 2 in "Save every N epochs", the LoRA file will be saved in the specified folder every 2 epochs (at the end of 2, 4, 6, 8 epochs). + +If you don't need to create an intermediate LoRA, set the value here to the same value as "Epoch". + +### Caption Extension + +If you have prepared a caption file for each image, specify the extension of the caption file here. + +If this is blank, the extension will be ".caption". If the extension of the caption file is ".txt", specify ".txt" here. + +If you don't have a caption file, you can ignore it. + +### Mixed precision + +Specifies the type of mixed precision for the weight data during training. + +The weight data is originally in 32-bit units (when no is selected), but if necessary, learning by mixing 16-bit unit data will lead to considerable memory savings and speedup. fp16 is a data format with half the precision , and bf16 is a data format devised to handle the same numerical width as 32-bit data . + +You can get LoRA with a sufficiently high accuracy at fp16. + +### Save precision + +Specifies the type of weight data to save in the LoRA file. + +float is 32-bit, fp16 and bf16 are 16-bit units. The two below have smaller file sizes. + +The default is fp16. + +### Number of CPU threads per core + +The number of threads per CPU core during training. Basically, the higher the number, the higher the efficiency, but it is necessary to adjust the settings according to the specifications. + +Default is 2. + +### Seeds + +During learning, there are a number of random processes such as ``in what order to read the images'' and``how much noise to put on the training images (details omitted)''. + +Seed is like an ID for determining the random processing procedure, and if the same Seed is specified, the same random procedure will be used each time, making it easier to reproduce the learning results. + +However, there are random processes that do not use this seed (such as randomly cropping images), so specifying the same seed does not always give the same learning results. + +Default is blank. If not specified, Seed will be set appropriately when training is executed. + +If you want to reproduce the result as much as possible, there is no loss by setting a number (such as 1234) appropriately. + +### Cache latents + +The training image is read into VRAM, "compressed" to a state called Latent before entering U-Net, and is trained in VRAM in this state. Normally, images are "compressed" each time they are loaded, but you can specify that "compressed" images are kept in main memory by checking Cache latents. + +Keeping it in the main memory saves VRAM space and speeds up, but you can't process the image before "compression", so you can't use augmentation (described later) other than flip_aug. Also, random crop (described later), which crops the image in a random range each time, cannot be used. + +Default is on. + +### Cache latents to disk + +Similar to the Cache latents option, but checking this allows you to specify that compressed image data be saved to disk as temporary files. + +This temporary file can be reused even after restarting kohya_ss, so if you want to do LoRA learning with the same data many times, turning on this option will increase learning efficiency. + +However, if you turn this on, you will not be able to use augmentation and random crop other than flip_aug. + +Default is off. + +### Learning rate + +Specify the learning rate. " Learning" is to change the thickness (weight) of the wiring in the neural network so that a picture that looks exactly like the given picture can be made, but every time a picture is given, the wiring is changed. If you tune too much only to the given picture, you will not be able to draw other pictures at all. + +To avoid this, we change the weights slightly each time to incorporate a little bit more of the given picture. The "learning rate" determines the amount of this "just a little". + +The default value is 0.0001. + +### LR Scheduler + +You can change the learning rate in the middle of learning. A scheduler is a setting for how to change the learning rate. Possible values include: + +- `adafactor`: Select this to set the optimizer (described later) to Adafactor . Learn while automatically adjusting the learning rate according to the situation to save VRAM +- `constant`: the learning rate does not change from beginning to end +- `constant_with_warmup`: Start with a learning rate of 0 and gradually increase it toward the set value of Learning rate during warm-up, and use the set value of Learning rate during main learning. +- `cosine` : Gradually decrease the learning rate toward 0 while drawing a wave (cosine curve) +- `cosine _with_restarts`: repeat cosine many times (see also description of LR number of cycles) +- `linear`: Start at the Learning rate setting and decrease linearly towards 0 +- `polynomial`: Same behavior as linear, but a bit more complicated to reduce (see also LR power description) +Set to constant if you want the learning rate to be fixed at the Learning rate setting. + +Default is cosine + +### LR warmup + +If you have selected constant_with_warmup in the scheduler, set here how many times to warm up. + +The number specified here is a percentage of the total number of steps. + +For example, if you train 50 images 10 times with a batch size of 1 and do this for 2 epochs, the total number of steps is 50x10x2=1000. If you set LR warmup to 10, the first 10% of the 1000 total steps, or 100 steps, will be the warmup. + +You can ignore this if your scheduler is not constant_with_warmup. + +Default is 10. + +### Optimizer + +The optimizer is a setting for "how to update the neural net weights during training ". Various methods have been proposed for smart learning, but the most commonly used in LoRA learning is "AdamW" (32-bit) or "AdamW8bit". AdamW8bit uses less VRAM and has enough accuracy, so if you get lost, use this. + +In addition, "Adafactor", which adjusts the learning rate appropriately according to the progress of learning while incorporating Adam's method, is also often used (Learning rate setting is ignored when using Adafactor). + +"DAdapt" is an optimizer that adjusts the learning rate, and "Lion" is a relatively new optimizer , but it has not been fully verified yet. There is a report that "SGDNesterov" has good learning accuracy but slows down. + +The default is "AdamW8bit". There is no problem basically as it is. + +### Optimizer extra arguments + +If you want more granularity for a given optimizer , write the command here. + +You can usually leave this field blank. + +### Text Encoder learning rate + +Sets the learning rate for the text encoder . As I wrote earlier, the effect of additional training on text encoders affects the entire U-Net. + +Therefore, it is usually set lower than the learning rate (Unet learning rate) for each block of U-Net. + +The default value is 0.00005(5e-5). + +If you specify a number here, it takes precedence over the Learning rate value. + +### Unet learning rate + +Sets the learning rate for U-Net. This is the learning rate when performing additional learning on each attention block (and other blocks depending on the setting) in U-Net. + +The default value is 0.0001. + +If you specify a number here, it takes precedence over the Learning rate value. + +### Network Rank (Dimension) + +Specifies the number of neurons in the hidden layer of the "additional small neural net " described earlier in the article (see the figure above for details). + +The larger the number of neurons , the more learning information can be stored, but the possibility of learning unnecessary information other than the learning target increases, and the LoRA file size also increases. + +Generally, it is often set to a maximum of about 128, but there are reports that 32 is sufficient. + +When making LoRA on a trial basis, it may be better to start from around 2 to 8. + +Default is 8. + +### Network alpha + +This was introduced as a convenience measure to prevent weights from being rounded to 0 when saving LoRA. + +Due to the structure of LoRA, the weight value of the neural network tends to be small, and if it becomes too small, it may become indistinguishable from zero (that is, the same as not learning anything). Therefore, a technique was proposed in which the actual (stored) weight value is kept large, but the weight is always weakened at a constant rate during learning to make the weight value appear smaller. Network alpha determines this "weight weakening rate". + +The smaller the Network alpha value, the larger the stored LoRA neural net weights. + +How much the weight weakens when used (usage strength) is calculated by "Network_Alpha/Network_Rank" (roughly a value between 0 and 1) and is closely related to the Network Rank number. + +If the accuracy of LoRA after learning is not good enough, the weight data may be too small and collapsed to 0. In such a case, try lowering the Network Alpha value (=increasing the save weight value). + +The default is 1 (that is, maximize the stored weight value). + +If Network Alpha and Network Rank have the same value, the effect will be turned off. + +*Network Alpha value must not exceed Network Rank value. It is possible to specify a higher number, but there is a high probability that it will result in an unintended LoRA. + +Also, when setting the Network Alpha, you should consider the effect on the learning rate. + +For example, with an Alpha of 16 and a Rank of 32, the strength of the weight used is 16/32 = 0.5, meaning that the learning rate is only half as powerful as the Learning Rate setting. + +If Alpha and Rank are the same number, the strength used will be 1 and will have no effect on the learning rate. + +### Max resolution + +Specify the maximum resolution of training images in the order of "width, height". If the training images exceed the resolution specified here, they will be scaled down to this resolution. + +The default is "512,512". Many models use images of this size, so it is safe to use images of this size when learning LoRA. + +### Stop text encoder training + +You can stop learning the text encoder in the middle. As I wrote above, updating the text encoder has a big impact on the whole, so it is easy to fall into overfitting (tuning too much to the training image and other images can not be drawn), and it is also overfitting to stop learning at a moderate point is one way to prevent + +The number specified here is a percentage of the total training step. Once learning reaches this percentage, the text encoder stops learning. + +For example, if the total number of steps is 1000 and you specify 80 here, the text encoder will finish training when the learning progress is 80%, i.e. 1000x0.8=800 steps. + +Training of U-Net continues with 200 remaining steps. + +If this is 0, the text encoder training will not stop until the end. + +### Enable buckets + +" bucket " is a "bucket" (container) as the name suggests. The training images used in LoRA do not have to be of the same size, but images of different sizes cannot be trained at the same time. Therefore, it is necessary to sort the images into "buckets" according to their size before training. Put similar sized images in the same bucket and different sized images in different buckets. + +Default is on. + +If your training images are all the same size, you can turn this option off, but leaving it on has no effect. + +*If you turn off Enable buckets when the size of the training images is not unified, the training images will be enlarged or reduced to have the same size. + +Enlargement and reduction are performed while maintaining the aspect ratio of the image. If the aspect ratio is not the same as the standard size, the vertical or horizontal size of the image after scaling may exceed the standard size. For example, if the base size is 512x512 ( 1 aspect ratio ) and the image size is 1536x1024 ( 1.5 aspect ratio ), the image will be scaled down to 768x512 ( 1.5 aspect ratio remains). + +## Advanced Configuration + +After this are the options in the Advanced Configuration section. + +### Weights, Blocks, Conv + +These are the "learning weight" and "rank" settings for each block in U-Net. Selecting each tab will bring up the corresponding configuration screen. + +*These settings are for advanced users. If you have no preference, you can leave all fields blank. + +#### Weights: Down LR weights/Mid LR weights/Up LR weights + +As you can see from the U-Net structure diagram, U-Net consists of 12 IN blocks, 1 MID block, and 12 OUT blocks, a total of 25 blocks. + +If you want different learning rate weights for each block, you can set them here individually. + +The weight here is the "strength of learning" represented by a numerical value of 0 to 1. If it is 0, it is "not learning at all", and if it is 1, it is "learning at the learning rate set in Learning rate". can vary the intensity of learning. + +A weight of 0.5 means half the learning rate. + +"Down LR weights" specify the weights for each of the 12 IN blocks. + +"Mid LR weights" specifies the weights of the MID block. + +"Up LR weights" specify the weight of each of the 12 OUT blocks. + +#### Weights: Blocks LR zero threshold + +I explained that "LoRA adds neural nets ", but it doesn't make sense to add neural nets with too small weights (i.e. barely learned). Therefore, you can set "Do not add neural nets to blocks with too small weights ". + +Blocks that do not exceed the weight value set here will not be added to the neural net . For example, if you specify 0.1 here, the neural net will not be added to blocks with weights less than or equal to 0.1 (note that exclusions also include the specified value!). + +The default is blank, which is 0 (do nothing). + +#### Blocks: Block dims, Block alphas + +Here you can set different rank (dim) and alpha values ​​for each of the 25 blocks IN0~11, MID, OUT0~11. + +See Network Rank, Network alpha for rank and alpha values. + +Blocks with higher rank are expected to hold more information. + +You must always specify 25 numbers for this parameter value, but since LoRA targets attention blocks, IN0, IN3, IN6, IN9, IN10, IN11, IN11, OUT0, and IN1 do not have attention blocks. , IN2 settings (1st, 4th, 7th, 11th, 12th, 14th, 15th, 16th digits) are ignored during learning. + +*This is a setting for advanced users. If you don't care, you can leave it blank. If not specified here, "Network Rank(Dimension)" value and "Network Alpha" value will be applied to all blocks. + +#### Conv: Conv dims, Conv, alphas + +The attention block that LoRA learns from has a neural network called "Conv ", which is also updated by additional learning (see the diagram of the attention layer structure at the top of the article). This is a process called "convolution", and the size of the "filter" used there is 1x1 square. + +Read this article about convolutions . + +On the other hand, some of the blocks other than Attention (Res, Down blocks) and some of the Attention blocks in OUT are convoluted using a 3x3 square filter. Originally, that is not the learning target of LoRA, but by specifying it with this parameter, the 3x3 convolution of the Res block can also be the learning target. + +Since there are more learning targets, there is a possibility that more precise LoRA learning can be performed. + +The setting method is the same as "Blocks: Blocks dims, Blocks alphas". + +A 3x3 conv exists on all 25 layers. + +*This is a setting for advanced users. If you don't care, you can leave it blank. + +### No token padding + +Captions attached to training images are processed every 75 tokens tokens " can basically be regarded as "words"). + +If the caption length is less than 75 tokens align to 75 tokens This is called "padding". + +Here you can specify not to pad tokens + +Default is off. You can basically leave it off. + +### Gradient accumulation steps + +Changing the weights (that is, "learning") is usually done for each batch read, but it is also possible to do multiple batches of training at once. This option specifies how many batches to learn at once. + +This has a similar effect (not the "same effect"!) as increasing the number of batches. + +For example, if the batch size is 4, the number of images read simultaneously in one batch is 4. In other words, one learning is performed every four readings. If we set the Gradient accumulation steps to 2, training will be performed once every 2 batches, resulting in 1 learning per 8 reads. This works similarly (but not the same!) as batch number 8. + +If you increase this value, the number of times of learning will decrease, so the processing will be faster, but it will consume more memory. + +Default is 1. + +### Weighted captions + +Currently, the most popular Stable Diffusion usage environment is "Stable Diffusion WebUI", which has a unique prompt description method. For example, if you want to emphasize "Black" very strongly when specifying " black cat " at the prompt, put the word you want to emphasize in parentheses like "(black:1.2) cat" and put ": number" after the word , Words are emphasized by multiples of that number. + +This option allows this notation to be used in the training image captions as well. + +If you want to write complex captions, it's a good idea to give it a try. + +Default is off. + +### Prior loss weight + +The prior loss weight determines how much importance is given to the " regularization images" (see the description of the Regularization folder above for details) during training . + +If this value is low, the regularization images are considered less important, and LoRA is generated that is more characteristic of the training images. + +This setting has no meaning if you are not using a regularized image. + +This is a value between 0 and 1, and defaults to 1 ( also respects regularized images). + +### LR number of cycles + +If you select " Cosine with restart" or "Polynomial" for the scheduler, this option specifies how many cycles the scheduler runs during training. + +If the number of this option is 2 or greater, the scheduler will run multiple times during a single training run. + +In both Cosine with restart and Polynomial, the learning rate gradually decreases to 0 as learning progresses, but if the number of cycles is 2 or more, the learning rate is reset and restarted when the learning rate reaches 0. + +The figure below (source) is an example of the change in learning rate for Cosine with restart (purple) and Polynomial (light green). + +The purple example has the number of cycles set to 4. The light green example has a cycle number of 1. + +Since the specified number of cycles is executed within the determined learning step, the more the number of cycles increases, the more the learning rate changes. + +Default is blank, leaving blank equals 1. + +Example of learning rate movement +Cosine with restart "LR number of cycle = 4" (purple) +Polynomial "LR power = 2" (light green) + +### LR power + +This is an option when the scheduler is set to Polynomial. The higher this number, the steeper the initial learning rate drops. (The slope of the light green line in the image above becomes steeper). + +When power is 1, it has the same shape as the linear scheduler. + +If the number is too large, the learning rate will stick close to 0, resulting in insufficient learning, so be careful. + +Defaults to blank, leaving blank equals 1 (that is, the same as the linear scheduler). + +### Additional parameters + +If you want to tweak learning setting parameters that are not displayed in the kohya_ss GUI , enter them here as commands. + +You can usually leave this field blank. + +### Save every N steps + +A LoRA file is created and saved each time the number of steps specified here is completed. + +For example, when the total number of learning steps is 1000, if you specify 200 here, LoRA files will be saved at the end of 200, 400, 600, and 800 steps. + +See also "Save every N epochs" for saving intermediate LoRA. + +Default is 0 (do not save intermediate LoRA). + +### Save last N steps + +This is an option when Save every N steps is specified to save LoRA during learning. + +If you want to keep only recent LoRA files and discard old LoRA files, you can set "how many recent steps of LoRA files to keep" here. + +For example, if the total number of training steps is 600 and the Save every N steps option is specified to save every 100 steps. Then LoRA files will be saved at the 100th, 200th, 300th, 400th, and 500th steps, but if Save every N steps is set to 300, only the last 300 steps of LoRA files will be saved. In other words, at the 500th step, LoRA older than the 200th (=500-300) step (that is, LoRA at the 100th step) is deleted. + +Default is 0. + +### Keep n tokens + +If your training images have captions, you can randomly shuffle the comma-separated words in the captions (see Shuffle caption option for details). However, if you have words that you want to keep at the beginning, you can use this option to specify "Keep the first 0 words at the beginning". + +The number of first words specified here will always be fixed at the beginning. + +Default is 0. This option does nothing if the shuffle caption option is off. + +- A "word" here is a piece of text separated by commas. No matter how many words the delimited text contains, it counts as "one word". + +In the case of " black cat , eating, sitting", " black cat " is one word. + +### Clip skip + +The text encoder uses a mechanism called "CLIP", which is made up of 12 similar layers. + +Texts ( tokens ) are originally converted to numeric sequences (vectors) through these 12 layers, and the vectors coming out of the last layer are sent to the U-Net Attention block. + +However, the model developed independently by the service "Novel AI", commonly known as "Novel AI model", adopted a unique specification that uses the vector output by the second to last layer instead of the last layer. The same is true for models derived from Novel AI models. Therefore, it is necessary to specify "Which layer of CLIP is the vector from which the base model used for learning is used?" + +"Clip skip" specifies the layer number of this "Xth from the end". + +Setting this to 2 sends the penultimate layer's output vector to the Attention block. If 1, the output vector of the last layer is used. + +If the base model is a Novel AI model (or a mix of them), 2 should be fine. In other cases, 1 is fine. + +### Max Token Length + +Specifies the length of the maximum token included in the caption . + +The "tokens" here are not the number of words, but the number of tokens Note that commas also count as one token. + +It's unlikely that you'll use more than 75 tokens in your caption, but if you find your caption to be too long, specify a higher number here. + +### Full fp16 training (experimental) + +When the option "Mixed precision" described above is turned on (fp16 or bf16), a mixture of 32-bit and 16-bit data is used during training, but when this option is turned on, all weight data is 16-bit (fp16 format). Although it saves memory, the accuracy of some data is halved, so there is a possibility that the learning accuracy will also drop. + +Default is off. You should leave it off unless you really want to save memory. + +### Gradient checkpointing + +Normally, during training, we modify and update the weights of a large number of neural nets all at once each time an image is loaded. By fixing this "gradually" rather than "all at once," you can save memory by reducing computation. + +This option specifies that the weight calculation should be done incrementally. Turning this on or off will have no effect on LoRA's learning results. + +Default is off. + +### Shuffle caption + +If the training images have captions, most of the captions are written in the form of words separated by commas, such as " black cat , eating, sitting". The Shuffle caption option randomly changes the order of these comma-separated words each time. + +Words in captions are generally given more weight the closer they are to the beginning. Therefore, if the word order is fixed, backward words may not be learned well, and forward words may have unintended associations with training images. It is hoped that this bias can be corrected by reordering the words each time the image is loaded. + +This option has no meaning if the caption is written in sentences instead of comma separated. + +Default is off. + +- A "word" here is a piece of text separated by commas. No matter how many words the delimited text contains, it counts as "one word". + +In the case of " black cat , eating, sitting", " black cat " is one word. + +### Persistent data loaders + +The data required for training is discarded and reloaded after each epoch. This is an option to keep it instead of throwing it away. Turning this option on speeds up the start of training for new epochs, but uses more memory to hold the data. + +Default is off. + +### Memory efficient attention + +If this is checked, VRAM usage is suppressed and attention block processing is performed. It's slower than the next option "xformers". Turn it on if you don't have enough VRAM. + +Default is off. + +### Use xformers + +Using a Python library called "xformers" will trade attention blocking for less VRAM usage at the cost of some speed. Turn it on if you don't have enough VRAM. + +Default is on. + +### Color augmentation + +"augmentation" means "padded image". By slightly processing the training images each time, we artificially increase the number of types of training images. + +When Color Augmentation is turned on, the Hue of the image is changed randomly each time. LoRA learned from this is expected to have a slight range in color tone. + +Not available if the Cache latents option is on. + +Default is off. + +### Flip augmentation + +If this option is turned on, the image will be horizontally flipped randomly. It can learn left and right angles, which is useful when you want to learn symmetrical people and objects . + +Default is off. + +### Min SNR gamma + +In LoRA learning, learning is performed by putting noise of various strengths on the training image (details about this are omitted), but depending on the difference in strength of the noise on which it is placed, learning will be stable by moving closer to or farther from the learning target. not, and the Min SNR gamma was introduced to compensate for that. Especially when learning images with little noise on them, it may deviate greatly from the target, so try to suppress this jump. + +I won't go into details because it's confusing, but you can set this value from 0 to 20, and the default is 0. + +According to the paper that proposed this method, the optimal value is 5. + +I don't know how effective it is, but if you're unsatisfied with the learning results, try different values. + +### Don't upscale bucket resolution + +The Bucket size defaults to 256-1024 pixels (or a maximum resolution if specified with the Max resolution option, which takes precedence). Images that fall outside this size range, either vertically or horizontally, will be scaled (preserving the aspect ratio ) to fit within the specified range. + +However, when this option is turned on, the bucket size range setting is ignored and the buckets are automatically prepared according to the size of the training images, so all training images are loaded unscaled. . However, even at this time, some parts of the image may be cropped to fit the Bucket resolution steps (described later). + +Default is on. + +### Bucket resolution steps + +If using buckets , specify the resolution interval for each bucket here. + +For example, if you specify 64 here, each training image will be sorted into separate buckets by 64 pixels according to their size. This sorting is done for each vertical and horizontal. + +If the image size does not fit the specified size of the bucket, the protruding part will be cut off. + +For example, if the maximum resolution is 512 pixels and the bucket step size is every 64 pixels , then the buckets will be 512, 448, 384... but a 500 pixel image will be put into a 448 pixel bucket, with an extra 52 pixels are clipped. + +Default is 64 pixels . + +- If this number is too small, the buckets will be divided too finely, and in the worst case, it will be like "one bucket for each image". + +Note that we always load images from the same bucket for each batch, so having too few images in a bucket will unintentionally reduce the number of batches. + +### Random crop instead of center crop + +As mentioned above, half-sized images are sorted into buckets and then partly cropped to align the size, but usually it is cropped so as to keep the center of the image. + +When this option is on, it randomly determines which part of the picture is cut. Turn on this option if you want to extend the learning range beyond the center of the image. + +*This option cannot be used when the cache latents option is on. + +### Noise offset type + +This is an option to specify which method to use when adding additional noise to training images. At the time of learning, we always add noise to the image (details are omitted here), but it is preferable that this noise is "hard to predict" noise, so adding more noise makes it more "predictable". "hard" noise. + +Default is Original. Multires adds noise in a slightly more complicated way. + +#### Noise offset + +This is an option when "Original" is selected for Noise offset type. If you enter a value greater than 0 here, additional noise will be added. Values ​​range from 0 to 1, where 0 adds no noise at all. A value of 1 adds strong noise. + +It has been reported that adding about 0.1 noise makes LoRA's colors more vivid (brighter and darker). Default is 0. + +#### A daptive noise scale + +Used in combination with the Noise offset option. Specifying a number here will further adjust the amount of additional noise specified by Noise offset to be amplified or attenuated. The amount of amplification (or attenuation) is automatically adjusted depending on how noisy the image is currently. Values ​​range from -1 to 1, with positive values ​​increasing the amount of added noise and negative values ​​decreasing the amount of added noise. + +Default is 0. + +#### Multires noise iterations + +This is an option when "Multires" is selected for Noise offset type. If you enter a value greater than 0 here, additional noise will be added. + +Multires creates noise of various resolutions and adds them together to create the final additive noise. Here you specify how many "various resolutions" to create. + +Default is 0, when 0 there is no additional noise. It is recommended to set it to 6 if you want to use it. + +#### Multires noise discount + +Pair with the Multires noise iterations option. It is a numerical value for weakening the noise amount of each resolution to some extent. A value between 0 and 1, the lower the number, the weaker the noise. By the way, the amount of attenuation differs depending on the resolution, and noise with low resolution is attenuated a lot. + +Default is 0, if 0 it will be set to 0.3 when used. 0.8 is usually recommended. If the number of training images is relatively small, it seems to be good to lower it to about 0.3. + +### Dropout caption every n epochs + +Normally, images and captions are trained in pairs, but it is possible to train only "images without captions" without using captions for each specific epoch. + +This option allows you to specify "Don't use captions every 0 epochs ( Dropout )". + +For example, if you specify 2 here, image learning without captions will be performed every 2 epochs (2nd epoch, 4th epoch, 6th epoch...). + +When learning images without captions, LoRA is expected to learn more comprehensive image features. It can also be expected to have the effect of not associating too many image features with specific words. However, if you don't use too many captions, the LoRA may become a LoRA without prompts, so be careful. + +The default is 0, which means no caption dropout . + +### Rate of caption dropout + +It is similar to Dropout caption every n epochs above, but you can learn as "images without captions" without using captions for a certain percentage of the entire learning process. + +Here you can set the percentage of images without captions. 0 is the setting for "always use captions during learning", and 1 is the setting for "never use captions during learning". + +It is random which images are learned as "images without captions". + +For example, if 20 images are read 50 times each and LoRA learning is performed for only 1 epoch, the total number of image learning is 20 images x 50 times x 1 epoch = 1000 times. At this time, if the Rate of caption dropout is set to 0.1, 1000 times x 0.1 = 100 times will be learned as "images without captions". + +Default is 0, which trains all images with captions. + +### VAE batch size + +If you turn on the Cache latents option, you can keep the "compressed" image data in the main memory. size. Since the number of images specified by batch size is learned at once, it is normal to match the VAE batch size with this. + +Default is 0, in which case it is set to the same number as Batch size. + +### Save training state + +LoRA will take a long time to train if there are many training images, number of iterations, and number of epochs. + +If you turn on this option, you can interrupt the study in the middle and resume the study from where you left off at a later date. + +Intermediate learning data is saved in a folder called "last-state". + +### Resume from saved training state + +Specify the location of the "last-state" folder here if you want to resume learning that has been interrupted. + +In order to resume learning, the intermediate progress data of learning must be saved. + +### Max train epoch + +Specify the maximum number of epochs for training. It is basic to specify the number of epochs with the Epoch option, but learning will always end when the number of epochs specified here is reached. + +Default is blank. You can leave this field blank. + +### Max num workers for DataLoader + +This option specifies the number of CPU processes to use when reading data for training. Increasing this number will enable subprocesses and increase the speed of reading data, but increasing the number too much may actually result in inefficiency. + +Note that no matter how large the number is specified, it will not exceed the number of concurrently executing threads of the CPU used. + +The default is 0, which loads data only in the CPU's main process. + +### WANDB API Key + +There is a machine learning service called " WandB " (Weights&Biases) . This is a service that displays the progress of learning in graphs to find the optimal settings, records and shares learning logs online, and kohya_ss can now use this service. + +However, you will need an account for this service. After creating an account, you can get an " API key" from . If you enter the acquired API key here, you will be automatically logged in when learning and you will be able to link with WandB services. + +I won't go into details about WandB, but if you want to become a "LoRA craftsman", give it a try. + +### WANDB Logging + +Here you can specify whether or not to record learning progress logs using the WandB service. + +The default is off, and when off, it logs in the form of a tool called 'tensorboard'. + +## Sample images config + +If you want to check what image generation with LoRA looks like while learning, enter the image generation prompt here. + +However, since LoRA has a relatively short learning time, there may not be much need for image generation tests. + +### Sample every n steps + +Specify at what step you want to generate an image during learning. For example, specifying 100 will generate an image every 100 steps. + +Default is 0, if 0 no image is generated. + +### Sample every n epochs + +Specifies the number of epochs to generate images during training. For example, 2 will generate an image every 2 epochs. + +Default is 0, if 0 no image is generated. + +### Sample sampler + +Specifies the sampler to use for image generation . Many of the samplers specified here are the same as the samplers provided in the Stable Diffusion Web UI , so please refer to the web UI explanation site for details. + +The default is euler_a. + +### Sample prompts + +Enter the prompt here. + +However, you can enter other settings here than just prompts. If you want to enter other settings, specify the setting by combining two minus letters and alphabets like "--n". For example, if you want to put "white, dog" in the negative prompt, write "--n white, dog". + +Here are some commonly used settings: + +--n: negative prompt + +--w: image width + +--h: image height + +--d: Seeds + +--l: CFG Scale + +--s: number of steps + +Default is blank. When the field is blank, the description example is displayed in faint color, so please refer to it. diff --git a/docs/LoRA/top_level.md b/docs/LoRA/top_level.md new file mode 100644 index 0000000000000000000000000000000000000000..b85e6dc9ce4d05283da504a37f484117e27637f0 --- /dev/null +++ b/docs/LoRA/top_level.md @@ -0,0 +1,26 @@ +# LoRA Resource Guide + +This guide is a resource compilation to facilitate the development of robust LoRA models. + +Access EDG's tutorials here: https://ko-fi.com/post/EDGs-tutorials-P5P6KT5MT + +## Guidelines for SDXL LoRA Training + +- Set the `Max resolution` to at least 1024x1024, as this is the standard resolution for SDXL. +- Use a GPU that has at least 12GB memory for the LoRA training process. +- We strongly recommend using the `--network_train_unet_only` option for SDXL LoRA to avoid unforeseen training results caused by dual text encoders in SDXL. +- PyTorch 2 tends to use less GPU memory than PyTorch 1. + +Here's an example configuration for the Adafactor optimizer with a fixed learning rate: + +``` +optimizer_type = "adafactor" +optimizer_args = [ "scale_parameter=False", "relative_step=False", "warmup_init=False" ] +lr_scheduler = "constant_with_warmup" +lr_warmup_steps = 100 +learning_rate = 4e-7 # This is the standard learning rate for SDXL +``` + +## Resource Contributions + +If you have valuable resources to add, kindly create a PR on Github. \ No newline at end of file diff --git a/docs/config_README-ja.md b/docs/config_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..ec339f88027b7d38ad8c917ec69857eeb646029c --- /dev/null +++ b/docs/config_README-ja.md @@ -0,0 +1,283 @@ +For non-Japanese speakers: this README is provided only in Japanese in the current state. Sorry for inconvenience. We will provide English version in the near future. + +`--dataset_config` で渡すことができる設定ファイルに関する説明です。 + +## 概要 + +設定ファイルを渡すことにより、ユーザが細かい設定を行えるようにします。 + +* 複数のデータセットが設定可能になります + * 例えば `resolution` をデータセットごとに設定して、それらを混合して学習できます。 + * DreamBooth の手法と fine tuning の手法の両方に対応している学習方法では、DreamBooth 方式と fine tuning 方式のデータセットを混合することが可能です。 +* サブセットごとに設定を変更することが可能になります + * データセットを画像ディレクトリ別またはメタデータ別に分割したものがサブセットです。いくつかのサブセットが集まってデータセットを構成します。 + * `keep_tokens` や `flip_aug` 等のオプションはサブセットごとに設定可能です。一方、`resolution` や `batch_size` といったオプションはデータセットごとに設定可能で、同じデータセットに属するサブセットでは値が共通になります。詳しくは後述します。 + +設定ファイルの形式は JSON か TOML を利用できます。記述のしやすさを考えると [TOML](https://toml.io/ja/v1.0.0-rc.2) を利用するのがオススメです。以下、TOML の利用を前提に説明します。 + +TOML で記述した設定ファイルの例です。 + +```toml +[general] +shuffle_caption = true +caption_extension = '.txt' +keep_tokens = 1 + +# これは DreamBooth 方式のデータセット +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 2 + + [[datasets.subsets]] + image_dir = 'C:\hoge' + class_tokens = 'hoge girl' + # このサブセットは keep_tokens = 2 (所属する datasets の値が使われる) + + [[datasets.subsets]] + image_dir = 'C:\fuga' + class_tokens = 'fuga boy' + keep_tokens = 3 + + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' + class_tokens = 'human' + keep_tokens = 1 + +# これは fine tuning 方式のデータセット +[[datasets]] +resolution = [768, 768] +batch_size = 2 + + [[datasets.subsets]] + image_dir = 'C:\piyo' + metadata_file = 'C:\piyo\piyo_md.json' + # このサブセットは keep_tokens = 1 (general の値が使われる) +``` + +この例では、3 つのディレクトリを DreamBooth 方式のデータセットとして 512x512 (batch size 4) で学習させ、1 つのディレクトリを fine tuning 方式のデータセットとして 768x768 (batch size 2) で学習させることになります。 + +## データセット・サブセットに関する設定 + +データセット・サブセットに関する設定は、登録可能な箇所がいくつかに分かれています。 + +* `[general]` + * 全データセットまたは全サブセットに適用されるオプションを指定する箇所です。 + * データセットごとの設定及びサブセットごとの設定に同名のオプションが存在していた場合には、データセット・サブセットごとの設定が優先されます。 +* `[[datasets]]` + * `datasets` はデータセットに関する設定の登録箇所になります。各データセットに個別に適用されるオプションを指定する箇所です。 + * サブセットごとの設定が存在していた場合には、サブセットごとの設定が優先されます。 +* `[[datasets.subsets]]` + * `datasets.subsets` はサブセットに関する設定の登録箇所になります。各サブセットに個別に適用されるオプションを指定する箇所です。 + +先程の例における、画像ディレクトリと登録箇所の対応に関するイメージ図です。 + +``` +C:\ +├─ hoge -> [[datasets.subsets]] No.1 ┐ ┐ +├─ fuga -> [[datasets.subsets]] No.2 |-> [[datasets]] No.1 |-> [general] +├─ reg -> [[datasets.subsets]] No.3 ┘ | +└─ piyo -> [[datasets.subsets]] No.4 --> [[datasets]] No.2 ┘ +``` + +画像ディレクトリがそれぞれ1つの `[[datasets.subsets]]` に対応しています。そして `[[datasets.subsets]]` が1つ以上組み合わさって1つの `[[datasets]]` を構成します。`[general]` には全ての `[[datasets]]`, `[[datasets.subsets]]` が属します。 + +登録箇所ごとに指定可能なオプションは異なりますが、同名のオプションが指定された場合は下位の登録箇所にある値が優先されます。先程の例の `keep_tokens` オプションの扱われ方を確認してもらうと理解しやすいかと思います。 + +加えて、学習方法が対応している手法によっても指定可能なオプションが変化します。 + +* DreamBooth 方式専用のオプション +* fine tuning 方式専用のオプション +* caption dropout の手法が使える場合のオプション + +DreamBooth の手法と fine tuning の手法の両方とも利用可能な学習方法では、両者を併用することができます。 +併用する際の注意点として、DreamBooth 方式なのか fine tuning 方式なのかはデータセット単位で判別を行っているため、同じデータセット中に DreamBooth 方式のサブセットと fine tuning 方式のサブセットを混在させることはできません。 +つまり、これらを併用したい場合には異なる方式のサブセットが異なるデータセットに所属するように設定する必要があります。 + +プログラムの挙動としては、後述する `metadata_file` オプションが存在していたら fine tuning 方式のサブセットだと判断します。 +そのため、同一のデータセットに所属するサブセットについて言うと、「全てが `metadata_file` オプションを持つ」か「全てが `metadata_file` オプションを持たない」かのどちらかになっていれば問題ありません。 + +以下、利用可能なオプションを説明します。コマンドライン引数と名称が同一のオプションについては、基本的に説明を割愛します。他の README を参照してください。 + +### 全学習方法で共通のオプション + +学習方法によらずに指定可能なオプションです。 + +#### データセット向けオプション + +データセットの設定に関わるオプションです。`datasets.subsets` には記述できません。 + +| オプション名 | 設定例 | `[general]` | `[[datasets]]` | +| ---- | ---- | ---- | ---- | +| `batch_size` | `1` | o | o | +| `bucket_no_upscale` | `true` | o | o | +| `bucket_reso_steps` | `64` | o | o | +| `enable_bucket` | `true` | o | o | +| `max_bucket_reso` | `1024` | o | o | +| `min_bucket_reso` | `128` | o | o | +| `resolution` | `256`, `[512, 512]` | o | o | + +* `batch_size` + * コマンドライン引数の `--train_batch_size` と同等です。 + +これらの設定はデータセットごとに固定です。 +つまり、データセットに所属するサブセットはこれらの設定を共有することになります。 +例えば解像度が異なるデータセットを用意したい場合は、上に挙げた例のように別々のデータセットとして定義すれば別々の解像度を設定可能です。 + +#### サブセット向けオプション + +サブセットの設定に関わるオプションです。 + +| オプション名 | 設定例 | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `color_aug` | `false` | o | o | o | +| `face_crop_aug_range` | `[1.0, 3.0]` | o | o | o | +| `flip_aug` | `true` | o | o | o | +| `keep_tokens` | `2` | o | o | o | +| `num_repeats` | `10` | o | o | o | +| `random_crop` | `false` | o | o | o | +| `shuffle_caption` | `true` | o | o | o | +| `caption_prefix` | `“masterpiece, best quality, ”` | o | o | o | +| `caption_suffix` | `“, from side”` | o | o | o | + +* `num_repeats` + * サブセットの画像の繰り返し回数を指定します。fine tuning における `--dataset_repeats` に相当しますが、`num_repeats` はどの学習方法でも指定可能です。 +* `caption_prefix`, `caption_suffix` + * キャプションの前、後に付与する文字列を指定します。シャッフルはこれらの文字列を含めた状態で行われます。`keep_tokens` を指定する場合には注意してください。 + +### DreamBooth 方式専用のオプション + +DreamBooth 方式のオプションは、サブセット向けオプションのみ存在します。 + +#### サブセット向けオプション + +DreamBooth 方式のサブセットの設定に関わるオプションです。 + +| オプション名 | 設定例 | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `image_dir` | `‘C:\hoge’` | - | - | o(必須) | +| `caption_extension` | `".txt"` | o | o | o | +| `class_tokens` | `“sks girl”` | - | - | o | +| `is_reg` | `false` | - | - | o | + +まず注意点として、 `image_dir` には画像ファイルが直下に置かれているパスを指定する必要があります。従来の DreamBooth の手法ではサブディレクトリに画像を置く必要がありましたが、そちらとは仕様に互換性がありません。また、`5_cat` のようなフォルダ名にしても、画像の繰り返し回数とクラス名は反映されません。これらを個別に設定したい場合、`num_repeats` と `class_tokens` で明示的に指定する必要があることに注意してください。 + +* `image_dir` + * 画像ディレクトリのパスを指定します。指定必須オプションです。 + * 画像はディレクトリ直下に置かれている必要があります。 +* `class_tokens` + * クラストークンを設定します。 + * 画像に対応する caption ファイルが存在しない場合にのみ学習時に利用されます。利用するかどうかの判定は画像ごとに行います。`class_tokens` を指定しなかった場合に caption ファイルも見つからなかった場合にはエラーになります。 +* `is_reg` + * サブセットの画像が正規化用かどうかを指定します。指定しなかった場合は `false` として、つまり正規化画像ではないとして扱います。 + +### fine tuning 方式専用のオプション + +fine tuning 方式のオプションは、サブセット向けオプションのみ存在します。 + +#### サブセット向けオプション + +fine tuning 方式のサブセットの設定に関わるオプションです。 + +| オプション名 | 設定例 | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | ---- | +| `image_dir` | `‘C:\hoge’` | - | - | o | +| `metadata_file` | `'C:\piyo\piyo_md.json'` | - | - | o(必須) | + +* `image_dir` + * 画像ディレクトリのパスを指定します。DreamBooth の手法の方とは異なり指定は必須ではありませんが、設定することを推奨します。 + * 指定する必要がない状況としては、メタデータファイルの生成時に `--full_path` を付与して実行していた場合です。 + * 画像はディレクトリ直下に置かれている必要があります。 +* `metadata_file` + * サブセットで利用されるメタデータファイルのパスを指定します。指定必須オプションです。 + * コマンドライン引数の `--in_json` と同等です。 + * サブセットごとにメタデータファイルを指定する必要がある仕様上、ディレクトリを跨いだメタデータを1つのメタデータファイルとして作成することは避けた方が良いでしょう。画像ディレクトリごとにメタデータファイルを用意し、それらを別々のサブセットとして登録することを強く推奨します。 + +### caption dropout の手法が使える場合に指定可能なオプション + +caption dropout の手法が使える場合のオプションは、サブセット向けオプションのみ存在します。 +DreamBooth 方式か fine tuning 方式かに関わらず、caption dropout に対応している学習方法であれば指定可能です。 + +#### サブセット向けオプション + +caption dropout が使えるサブセットの設定に関わるオプションです。 + +| オプション名 | `[general]` | `[[datasets]]` | `[[dataset.subsets]]` | +| ---- | ---- | ---- | ---- | +| `caption_dropout_every_n_epochs` | o | o | o | +| `caption_dropout_rate` | o | o | o | +| `caption_tag_dropout_rate` | o | o | o | + +## 重複したサブセットが存在する時の挙動 + +DreamBooth 方式のデータセットの場合、その中にある `image_dir` が同一のサブセットは重複していると見なされます。 +fine tuning 方式のデータセットの場合は、その中にある `metadata_file` が同一のサブセットは重複していると見なされます。 +データセット中に重複したサブセットが存在する場合、2個目以降は無視されます。 + +一方、異なるデータセットに所属している場合は、重複しているとは見なされません。 +例えば、以下のように同一の `image_dir` を持つサブセットを別々のデータセットに入れた場合には、重複していないと見なします。 +これは、同じ画像でも異なる解像度で学習したい場合に役立ちます。 + +```toml +# 別々のデータセットに存在している場合は重複とは見なされず、両方とも学習に使われる + +[[datasets]] +resolution = 512 + + [[datasets.subsets]] + image_dir = 'C:\hoge' + +[[datasets]] +resolution = 768 + + [[datasets.subsets]] + image_dir = 'C:\hoge' +``` + +## コマンドライン引数との併用 + +設定ファイルのオプションの中には、コマンドライン引数のオプションと役割が重複しているものがあります。 + +以下に挙げるコマンドライン引数のオプションは、設定ファイルを渡した場合には無視されます。 + +* `--train_data_dir` +* `--reg_data_dir` +* `--in_json` + +以下に挙げるコマンドライン引数のオプションは、コマンドライン引数と設定ファイルで同時に指定された場合、コマンドライン引数の値よりも設定ファイルの値が優先されます。特に断りがなければ同名のオプションとなります。 + +| コマンドライン引数のオプション | 優先される設定ファイルのオプション | +| ---------------------------------- | ---------------------------------- | +| `--bucket_no_upscale` | | +| `--bucket_reso_steps` | | +| `--caption_dropout_every_n_epochs` | | +| `--caption_dropout_rate` | | +| `--caption_extension` | | +| `--caption_tag_dropout_rate` | | +| `--color_aug` | | +| `--dataset_repeats` | `num_repeats` | +| `--enable_bucket` | | +| `--face_crop_aug_range` | | +| `--flip_aug` | | +| `--keep_tokens` | | +| `--min_bucket_reso` | | +| `--random_crop` | | +| `--resolution` | | +| `--shuffle_caption` | | +| `--train_batch_size` | `batch_size` | + +## エラーの手引き + +現在、外部ライブラリを利用して設定ファイルの記述が正しいかどうかをチェックしているのですが、整備が行き届いておらずエラーメッセージがわかりづらいという問題があります。 +将来的にはこの問題の改善に取り組む予定です。 + +次善策として、頻出のエラーとその対処法について載せておきます。 +正しいはずなのにエラーが出る場合、エラー内容がどうしても分からない場合は、バグかもしれないのでご連絡ください。 + +* `voluptuous.error.MultipleInvalid: required key not provided @ ...`: 指定必須のオプションが指定されていないというエラーです。指定を忘れているか、オプション名を間違って記述している可能性が高いです。 + * `...` の箇所にはエラーが発生した場所が載っています。例えば `voluptuous.error.MultipleInvalid: required key not provided @ data['datasets'][0]['subsets'][0]['image_dir']` のようなエラーが出たら、0 番目の `datasets` 中の 0 番目の `subsets` の設定に `image_dir` が存在しないということになります。 +* `voluptuous.error.MultipleInvalid: expected int for dictionary value @ ...`: 指定する値の形式が不正というエラーです。値の形式が間違っている可能性が高いです。`int` の部分は対象となるオプションによって変わります。この README に載っているオプションの「設定例」が役立つかもしれません。 +* `voluptuous.error.MultipleInvalid: extra keys not allowed @ ...`: 対応していないオプション名が存在している場合に発生するエラーです。オプション名を間違って記述しているか、誤って紛れ込んでいる可能性が高いです。 + + diff --git a/docs/fine_tune_README_ja.md b/docs/fine_tune_README_ja.md new file mode 100644 index 0000000000000000000000000000000000000000..758a00c5d136c3bd9781d2277720a33fc274159c --- /dev/null +++ b/docs/fine_tune_README_ja.md @@ -0,0 +1,140 @@ +NovelAIの提案した学習手法、自動キャプションニング、タグ付け、Windows+VRAM 12GB(SD v1.xの場合)環境等に対応したfine tuningです。ここでfine tuningとは、モデルを画像とキャプションで学習することを指します(LoRAやTextual Inversion、Hypernetworksは含みません) + +[学習についての共通ドキュメント](./train_README-ja.md) もあわせてご覧ください。 + +# 概要 + +Diffusersを用いてStable DiffusionのU-Netのfine tuningを行います。NovelAIの記事にある以下の改善に対応しています(Aspect Ratio BucketingについてはNovelAIのコードを参考にしましたが、最終的なコードはすべてオリジナルです)。 + +* CLIP(Text Encoder)の最後の層ではなく最後から二番目の層の出力を用いる。 +* 正方形以外の解像度での学習(Aspect Ratio Bucketing) 。 +* トークン長を75から225に拡張する。 +* BLIPによるキャプショニング(キャプションの自動作成)、DeepDanbooruまたはWD14Taggerによる自動タグ付けを行う。 +* Hypernetworkの学習にも対応する。 +* Stable Diffusion v2.0(baseおよび768/v)に対応。 +* VAEの出力をあらかじめ取得しディスクに保存しておくことで、学習の省メモリ化、高速化を図る。 + +デフォルトではText Encoderの学習は行いません。モデル全体のfine tuningではU-Netだけを学習するのが一般的なようです(NovelAIもそのようです)。オプション指定でText Encoderも学習対象とできます。 + +# 追加機能について + +## CLIPの出力の変更 + +プロンプトを画像に反映するため、テキストの特徴量への変換を行うのがCLIP(Text Encoder)です。Stable DiffusionではCLIPの最後の層の出力を用いていますが、それを最後から二番目の層の出力を用いるよう変更できます。NovelAIによると、これによりより正確にプロンプトが反映されるようになるとのことです。 +元のまま、最後の層の出力を用いることも可能です。 + +※Stable Diffusion 2.0では最後から二番目の層をデフォルトで使います。clip_skipオプションを指定しないでください。 + +## 正方形以外の解像度での学習 + +Stable Diffusionは512\*512で学習されていますが、それに加えて256\*1024や384\*640といった解像度でも学習します。これによりトリミングされる部分が減り、より正しくプロンプトと画像の関係が学習されることが期待されます。 +学習解像度はパラメータとして与えられた解像度の面積(=メモリ使用量)を超えない範囲で、64ピクセル単位で縦横に調整、作成されます。 + +機械学習では入力サイズをすべて統一するのが一般的ですが、特に制約があるわけではなく、実際は同一のバッチ内で統一されていれば大丈夫です。NovelAIの言うbucketingは、あらかじめ教師データを、アスペクト比に応じた学習解像度ごとに分類しておくことを指しているようです。そしてバッチを各bucket内の画像で作成することで、バッチの画像サイズを統一します。 + +## トークン長の75から225への拡張 + +Stable Diffusionでは最大75トークン(開始・終了を含むと77トークン)ですが、それを225トークンまで拡張します。 +ただしCLIPが受け付ける最大長は75トークンですので、225トークンの場合、単純に三分割してCLIPを呼び出してから結果を連結しています。 + +※これが望ましい実装なのかどうかはいまひとつわかりません。とりあえず動いてはいるようです。特に2.0では何も参考になる実装がないので独自に実装してあります。 + +※Automatic1111氏のWeb UIではカンマを意識して分割、といったこともしているようですが、私の場合はそこまでしておらず単純な分割です。 + +# 学習の手順 + +あらかじめこのリポジトリのREADMEを参照し、環境整備を行ってください。 + +## データの準備 + +[学習データの準備について](./train_README-ja.md) を参照してください。fine tuningではメタデータを用いるfine tuning方式のみ対応しています。 + +## 学習の実行 +たとえば以下のように実行します。以下は省メモリ化のための設定です。それぞれの行を必要に応じて書き換えてください。 + +``` +accelerate launch --num_cpu_threads_per_process 1 fine_tune.py + --pretrained_model_name_or_path=<.ckptまたは.safetensordまたはDiffusers版モデルのディレクトリ> + --output_dir=<学習したモデルの出力先フォルダ> + --output_name=<学習したモデル出力時のファイル名> + --dataset_config=<データ準備で作成した.tomlファイル> + --save_model_as=safetensors + --learning_rate=5e-6 --max_train_steps=10000 + --use_8bit_adam --xformers --gradient_checkpointing + --mixed_precision=fp16 +``` + +`num_cpu_threads_per_process` には通常は1を指定するとよいようです。 + +`pretrained_model_name_or_path` に追加学習を行う元となるモデルを指定します。Stable Diffusionのcheckpointファイル(.ckptまたは.safetensors)、Diffusersのローカルディスクにあるモデルディレクトリ、DiffusersのモデルID("stabilityai/stable-diffusion-2"など)が指定できます。 + +`output_dir` に学習後のモデルを保存するフォルダを指定します。`output_name` にモデルのファイル名を拡張子を除いて指定します。`save_model_as` でsafetensors形式での保存を指定しています。 + +`dataset_config` に `.toml` ファイルを指定します。ファイル内でのバッチサイズ指定は、当初はメモリ消費を抑えるために `1` としてください。 + +学習させるステップ数 `max_train_steps` を10000とします。学習率 `learning_rate` はここでは5e-6を指定しています。 + +省メモリ化のため `mixed_precision="fp16"` を指定します(RTX30 シリーズ以降では `bf16` も指定できます。環境整備時にaccelerateに行った設定と合わせてください)。また `gradient_checkpointing` を指定します。 + +オプティマイザ(モデルを学習データにあうように最適化=学習させるクラス)にメモリ消費の少ない 8bit AdamW を使うため、 `optimizer_type="AdamW8bit"` を指定します。 + +`xformers` オプションを指定し、xformersのCrossAttentionを用います。xformersをインストールしていない場合やエラーとなる場合(環境にもよりますが `mixed_precision="no"` の場合など)、代わりに `mem_eff_attn` オプションを指定すると省メモリ版CrossAttentionを使用します(速度は遅くなります)。 + +ある程度メモリがある場合は、`.toml` ファイルを編集してバッチサイズをたとえば `4` くらいに増やしてください(高速化と精度向上の可能性があります)。 + +### よく使われるオプションについて + +以下の場合にはオプションに関するドキュメントを参照してください。 + +- Stable Diffusion 2.xまたはそこからの派生モデルを学習する +- clip skipを2以上を前提としたモデルを学習する +- 75トークンを超えたキャプションで学習する + +### バッチサイズについて + +モデル全体を学習するためLoRA等の学習に比べるとメモリ消費量は多くなります(DreamBoothと同じ)。 + +### 学習率について + +1e-6から5e-6程度が一般的なようです。他のfine tuningの例なども参照してみてください。 + +### 以前の形式のデータセット指定をした場合のコマンドライン + +解像度やバッチサイズをオプションで指定します。コマンドラインの例は以下の通りです。 + +``` +accelerate launch --num_cpu_threads_per_process 1 fine_tune.py + --pretrained_model_name_or_path=model.ckpt + --in_json meta_lat.json + --train_data_dir=train_data + --output_dir=fine_tuned + --shuffle_caption + --train_batch_size=1 --learning_rate=5e-6 --max_train_steps=10000 + --use_8bit_adam --xformers --gradient_checkpointing + --mixed_precision=bf16 + --save_every_n_epochs=4 +``` + + + +# fine tuning特有のその他の主なオプション + +すべてのオプションについては別文書を参照してください。 + +## `train_text_encoder` +Text Encoderも学習対象とします。メモリ使用量が若干増加します。 + +通常のfine tuningではText Encoderは学習対象としませんが(恐らくText Encoderの出力に従うようにU-Netを学習するため)、学習データ数が少ない場合には、DreamBoothのようにText Encoder側に学習させるのも有効的なようです。 + +## `diffusers_xformers` +スクリプト独自のxformers置換機能ではなくDiffusersのxformers機能を利用します。Hypernetworkの学習はできなくなります。 diff --git a/docs/gen_img_README-ja.md b/docs/gen_img_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..a11dec6ae9b953fece91bdafe4ecafbc15143cc4 --- /dev/null +++ b/docs/gen_img_README-ja.md @@ -0,0 +1,487 @@ +SD 1.xおよび2.xのモデル、当リポジトリで学習したLoRA、ControlNet(v1.0のみ動作確認)などに対応した、Diffusersベースの推論(画像生成)スクリプトです。コマンドラインから用います。 + +# 概要 + +* Diffusers (v0.10.2) ベースの推論(画像生成)スクリプト。 +* SD 1.xおよび2.x (base/v-parameterization)モデルに対応。 +* txt2img、img2img、inpaintingに対応。 +* 対話モード、およびファイルからのプロンプト読み込み、連続生成に対応。 +* プロンプト1行あたりの生成枚数を指定可能。 +* 全体の繰り返し回数を指定可能。 +* `fp16`だけでなく`bf16`にも対応。 +* xformersに対応し高速生成が可能。 + * xformersにより省メモリ生成を行いますが、Automatic 1111氏のWeb UIほど最適化していないため、512*512の画像生成でおおむね6GB程度のVRAMを使用します。 +* プロンプトの225トークンへの拡張。ネガティブプロンプト、重みづけに対応。 +* Diffusersの各種samplerに対応(Web UIよりもsampler数は少ないです)。 +* Text Encoderのclip skip(最後からn番目の層の出力を用いる)に対応。 +* VAEの別途読み込み。 +* CLIP Guided Stable Diffusion、VGG16 Guided Stable Diffusion、Highres. fix、upscale対応。 + * Highres. fixはWeb UIの実装を全く確認していない独自実装のため、出力結果は異なるかもしれません。 +* LoRA対応。適用率指定、複数LoRA同時利用、重みのマージに対応。 + * Text EncoderとU-Netで別の適用率を指定することはできません。 +* Attention Coupleに対応。 +* ControlNet v1.0に対応。 +* 途中でモデルを切り替えることはできませんが、バッチファイルを組むことで対応できます。 +* 個人的に欲しくなった機能をいろいろ追加。 + +機能追加時にすべてのテストを行っているわけではないため、以前の機能に影響が出て一部機能が動かない可能性があります。何か問題があればお知らせください。 + +# 基本的な使い方 + +## 対話モードでの画像生成 + +以下のように入力してください。 + +```batchfile +python gen_img_diffusers.py --ckpt <モデル名> --outdir <画像出力先> --xformers --fp16 --interactive +``` + +`--ckpt`オプションにモデル(Stable Diffusionのcheckpointファイル、またはDiffusersのモデルフォルダ)、`--outdir`オプションに画像の出力先フォルダを指定します。 + +`--xformers`オプションでxformersの使用を指定します(xformersを使わない場合は外してください)。`--fp16`オプションでfp16(単精度)での推論を行います。RTX 30系のGPUでは `--bf16`オプションでbf16(bfloat16)での推論を行うこともできます。 + +`--interactive`オプションで対話モードを指定しています。 + +Stable Diffusion 2.0(またはそこからの追加学習モデル)を使う場合は`--v2`オプションを追加してください。v-parameterizationを使うモデル(`768-v-ema.ckpt`およびそこからの追加学習モデル)を使う場合はさらに`--v_parameterization`を追加してください。 + +`--v2`の指定有無が間違っているとモデル読み込み時にエラーになります。`--v_parameterization`の指定有無が間違っていると茶色い画像が表示されます。 + +`Type prompt:`と表示されたらプロンプトを入力してください。 + +![image](https://user-images.githubusercontent.com/52813779/235343115-f3b8ac82-456d-4aab-9724-0cc73c4534aa.png) + +※画像が表示されずエラーになる場合、headless(画面表示機能なし)のOpenCVがインストールされているかもしれません。`pip install opencv-python`として通常のOpenCVを入れてください。または`--no_preview`オプションで画像表示を止めてください。 + +画像ウィンドウを選択してから何らかのキーを押すとウィンドウが閉じ、次のプロンプトが入力できます。プロンプトでCtrl+Z、エンターの順に打鍵するとスクリプトを閉じます。 + +## 単一のプロンプトで画像を一括生成 + +以下のように入力します(実際には1行で入力します)。 + +```batchfile +python gen_img_diffusers.py --ckpt <モデル名> --outdir <画像出力先> + --xformers --fp16 --images_per_prompt <生成枚数> --prompt "<プロンプト>" +``` + +`--images_per_prompt`オプションで、プロンプト1件当たりの生成枚数を指定します。`--prompt`オプションでプロンプトを指定します。スペースを含む場合はダブルクォーテーションで囲んでください。 + +`--batch_size`オプションでバッチサイズを指定できます(後述)。 + +## ファイルからプロンプトを読み込み一括生成 + +以下のように入力します。 + +```batchfile +python gen_img_diffusers.py --ckpt <モデル名> --outdir <画像出力先> + --xformers --fp16 --from_file <プロンプトファイル名> +``` + +`--from_file`オプションで、プロンプトが記述されたファイルを指定します。1行1プロンプトで記述してください。`--images_per_prompt`オプションを指定して1行あたり生成枚数を指定できます。 + +## ネガティブプロンプト、重みづけの使用 + +プロンプトオプション(プロンプト内で`--x`のように指定、後述)で`--n`を書くと、以降がネガティブプロンプトとなります。 + +またAUTOMATIC1111氏のWeb UIと同様の `()` や` []` 、`(xxx:1.3)` などによる重みづけが可能です(実装はDiffusersの[Long Prompt Weighting Stable Diffusion](https://github.com/huggingface/diffusers/blob/main/examples/community/README.md#long-prompt-weighting-stable-diffusion)からコピーしたものです)。 + +コマンドラインからのプロンプト指定、ファイルからのプロンプト読み込みでも同様に指定できます。 + +![image](https://user-images.githubusercontent.com/52813779/235343128-e79cd768-ec59-46f5-8395-fce9bdc46208.png) + +# 主なオプション + +コマンドラインから指定してください。 + +## モデルの指定 + +- `--ckpt <モデル名>`:モデル名を指定します。`--ckpt`オプションは必須です。Stable Diffusionのcheckpointファイル、またはDiffusersのモデルフォルダ、Hugging FaceのモデルIDを指定できます。 + +- `--v2`:Stable Diffusion 2.x系のモデルを使う場合に指定します。1.x系の場合には指定不要です。 + +- `--v_parameterization`:v-parameterizationを使うモデルを使う場合に指定します(`768-v-ema.ckpt`およびそこからの追加学習モデル、Waifu Diffusion v1.5など)。 + + `--v2`の指定有無が間違っているとモデル読み込み時にエラーになります。`--v_parameterization`の指定有無が間違っていると茶色い画像が表示されます。 + +- `--vae`:使用するVAEを指定します。未指定時はモデル内のVAEを使用します。 + +## 画像生成と出力 + +- `--interactive`:インタラクティブモードで動作します。プロンプトを入力すると画像が生成されます。 + +- `--prompt <プロンプト>`:プロンプトを指定します。スペースを含む場合はダブルクォーテーションで囲んでください。 + +- `--from_file <プロンプトファイル名>`:プロンプトが記述されたファイルを指定します。1行1プロンプトで記述してください。なお画像サイズやguidance scaleはプロンプトオプション(後述)で指定できます。 + +- `--W <画像幅>`:画像の幅を指定します。デフォルトは`512`です。 + +- `--H <画像高さ>`:画像の高さを指定します。デフォルトは`512`です。 + +- `--steps <ステップ数>`:サンプリングステップ数を指定します。デフォルトは`50`です。 + +- `--scale <ガイダンススケール>`:unconditionalガイダンススケールを指定します。デフォルトは`7.5`です。 + +- `--sampler <サンプラー名>`:サンプラーを指定します。デフォルトは`ddim`です。Diffusersで提供されているddim、pndm、dpmsolver、dpmsolver+++、lms、euler、euler_a、が指定可能です(後ろの三つはk_lms、k_euler、k_euler_aでも指定できます)。 + +- `--outdir <画像出力先フォルダ>`:画像の出力先を指定します。 + +- `--images_per_prompt <生成枚数>`:プロンプト1件当たりの生成枚数を指定します。デフォルトは`1`です。 + +- `--clip_skip <スキップ数>`:CLIPの後ろから何番目の層を使うかを指定します。省略時は最後の層を使います。 + +- `--max_embeddings_multiples <倍数>`:CLIPの入出力長をデフォルト(75)の何倍にするかを指定します。未指定時は75のままです。たとえば3を指定すると入出力長が225になります。 + +- `--negative_scale` : uncoditioningのguidance scaleを個別に指定します。[gcem156氏のこちらの記事](https://note.com/gcem156/n/ne9a53e4a6f43)を参考に実装したものです。 + +## メモリ使用量や生成速度の調整 + +- `--batch_size <バッチサイズ>`:バッチサイズを指定します。デフォルトは`1`です。バッチサイズが大きいとメモリを多く消費しますが、生成速度が速くなります。 + +- `--vae_batch_size `:VAEのバッチサイズを指定します。デフォルトはバッチサイズと同じです。 + VAEのほうがメモリを多く消費するため、デノイジング後(stepが100%になった後)でメモリ不足になる場合があります。このような場合にはVAEのバッチサイズを小さくしてください。 + +- `--xformers`:xformersを使う場合に指定します。 + +- `--fp16`:fp16(単精度)での推論を行います。`fp16`と`bf16`をどちらも指定しない場合はfp32(単精度)での推論を行います。 + +- `--bf16`:bf16(bfloat16)での推論を行います。RTX 30系のGPUでのみ指定可能です。`--bf16`オプションはRTX 30系以外のGPUではエラーになります。`fp16`よりも`bf16`のほうが推論結果がNaNになる(真っ黒の画像になる)可能性が低いようです。 + +## 追加ネットワーク(LoRA等)の使用 + +- `--network_module`:使用する追加ネットワークを指定します。LoRAの場合は`--network_module networks.lora`と指定します。複数のLoRAを使用する場合は`--network_module networks.lora networks.lora networks.lora`のように指定します。 + +- `--network_weights`:使用する追加ネットワークの重みファイルを指定します。`--network_weights model.safetensors`のように指定します。複数のLoRAを使用する場合は`--network_weights model1.safetensors model2.safetensors model3.safetensors`のように指定します。引数の数は`--network_module`で指定した数と同じにしてください。 + +- `--network_mul`:使用する追加ネットワークの重みを何倍にするかを指定します。デフォルトは`1`です。`--network_mul 0.8`のように指定します。複数のLoRAを使用する場合は`--network_mul 0.4 0.5 0.7`のように指定します。引数の数は`--network_module`で指定した数と同じにしてください。 + +- `--network_merge`:使用する追加ネットワークの重みを`--network_mul`に指定した重みであらかじめマージします。`--network_pre_calc` と同時に使用できません。プロンプトオプションの`--am`、およびRegional LoRAは使用できなくなりますが、LoRA未使用時と同じ程度まで生成が高速化されます。 + +- `--network_pre_calc`:使用する追加ネットワークの重みを生成ごとにあらかじめ計算します。プロンプトオプションの`--am`が使用できます。LoRA未使用時と同じ程度まで生成は高速化されますが、生成前に重みを計算する時間が必要で、またメモリ使用量も若干増加します。Regional LoRA使用時は無効になります 。 + +# 主なオプションの指定例 + +次は同一プロンプトで64枚をバッチサイズ4で一括生成する例です。 + +```batchfile +python gen_img_diffusers.py --ckpt model.ckpt --outdir outputs + --xformers --fp16 --W 512 --H 704 --scale 12.5 --sampler k_euler_a + --steps 32 --batch_size 4 --images_per_prompt 64 + --prompt "beautiful flowers --n monochrome" +``` + +次はファイルに書かれたプロンプトを、それぞれ10枚ずつ、バッチサイズ4で一括生成する例です。 + +```batchfile +python gen_img_diffusers.py --ckpt model.ckpt --outdir outputs + --xformers --fp16 --W 512 --H 704 --scale 12.5 --sampler k_euler_a + --steps 32 --batch_size 4 --images_per_prompt 10 + --from_file prompts.txt +``` + +Textual Inversion(後述)およびLoRAの使用例です。 + +```batchfile +python gen_img_diffusers.py --ckpt model.safetensors + --scale 8 --steps 48 --outdir txt2img --xformers + --W 512 --H 768 --fp16 --sampler k_euler_a + --textual_inversion_embeddings goodembed.safetensors negprompt.pt + --network_module networks.lora networks.lora + --network_weights model1.safetensors model2.safetensors + --network_mul 0.4 0.8 + --clip_skip 2 --max_embeddings_multiples 1 + --batch_size 8 --images_per_prompt 1 --interactive +``` + +# プロンプトオプション + +プロンプト内で、`--n`のように「ハイフンふたつ+アルファベットn文字」でプロンプトから各種オプションの指定が可能です。対話モード、コマンドライン、ファイル、いずれからプロンプトを指定する場合でも有効です。 + +プロンプトのオプション指定`--n`の前後にはスペースを入れてください。 + +- `--n`:ネガティブプロンプトを指定します。 + +- `--w`:画像幅を指定します。コマンドラインからの指定を上書きします。 + +- `--h`:画像高さを指定します。コマンドラインからの指定を上書きします。 + +- `--s`:ステップ数を指定します。コマンドラインからの指定を上書きします。 + +- `--d`:この画像の乱数seedを指定します。`--images_per_prompt`を指定している場合は「--d 1,2,3,4」のようにカンマ区切りで複数指定してください。 + ※様々な理由により、Web UIとは同じ乱数seedでも生成される画像が異なる場合があります。 + +- `--l`:guidance scaleを指定します。コマンドラインからの指定を上書きします。 + +- `--t`:img2img(後述)のstrengthを指定します。コマンドラインからの指定を上書きします。 + +- `--nl`:ネガティブプロンプトのguidance scaleを指定します(後述)。コマンドラインからの指定を上書きします。 + +- `--am`:追加ネットワークの重みを指定します。コマンドラインからの指定を上書きします。複数の追加ネットワークを使用する場合は`--am 0.8,0.5,0.3`のように __カンマ区切りで__ 指定します。 + +※これらのオプションを指定すると、バッチサイズよりも小さいサイズでバッチが実行される場合があります(これらの値が異なると一括生成できないため)。(あまり気にしなくて大丈夫ですが、ファイルからプロンプトを読み込み生成する場合は、これらの値が同一のプロンプトを並べておくと効率が良くなります。) + +例: +``` +(masterpiece, best quality), 1girl, in shirt and plated skirt, standing at street under cherry blossoms, upper body, [from below], kind smile, looking at another, [goodembed] --n realistic, real life, (negprompt), (lowres:1.1), (worst quality:1.2), (low quality:1.1), bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, normal quality, jpeg artifacts, signature, watermark, username, blurry --w 960 --h 640 --s 28 --d 1 +``` + +![image](https://user-images.githubusercontent.com/52813779/235343446-25654172-fff4-4aaf-977a-20d262b51676.png) + +# img2img + +## オプション + +- `--image_path`:img2imgに利用する画像を指定します。`--image_path template.png`のように指定します。フォルダを指定すると、そのフォルダの画像を順次利用します。 + +- `--strength`:img2imgのstrengthを指定します。`--strength 0.8`のように指定します。デフォルトは`0.8`です。 + +- `--sequential_file_name`:ファイル名を連番にするかどうかを指定します。指定すると生成されるファイル名が`im_000001.png`からの連番になります。 + +- `--use_original_file_name`:指定すると生成ファイル名がオリジナルのファイル名と同じになります。 + +## コマンドラインからの実行例 + +```batchfile +python gen_img_diffusers.py --ckpt trinart_characters_it4_v1_vae_merged.ckpt + --outdir outputs --xformers --fp16 --scale 12.5 --sampler k_euler --steps 32 + --image_path template.png --strength 0.8 + --prompt "1girl, cowboy shot, brown hair, pony tail, brown eyes, + sailor school uniform, outdoors + --n lowres, bad anatomy, bad hands, error, missing fingers, cropped, + worst quality, low quality, normal quality, jpeg artifacts, (blurry), + hair ornament, glasses" + --batch_size 8 --images_per_prompt 32 +``` + +`--image_path`オプションにフォルダを指定すると、そのフォルダの画像を順次読み込みます。生成される枚数は画像枚数ではなく、プロンプト数になりますので、`--images_per_promptPPオプションを指定してimg2imgする画像の枚数とプロンプト数を合わせてください。 + +ファイルはファイル名でソートして読み込みます。なおソート順は文字列順となりますので(`1.jpg→2.jpg→10.jpg`ではなく`1.jpg→10.jpg→2.jpg`の順)、頭を0埋めするなどしてご対応ください(`01.jpg→02.jpg→10.jpg`)。 + +## img2imgを利用したupscale + +img2img時にコマンドラインオプションの`--W`と`--H`で生成画像サイズを指定すると、元画像をそのサイズにリサイズしてからimg2imgを行います。 + +またimg2imgの元画像がこのスクリプトで生成した画像の場合、プロンプトを省略すると、元画像のメタデータからプロンプトを取得しそのまま用います。これによりHighres. fixの2nd stageの動作だけを行うことができます。 + +## img2img時のinpainting + +画像およびマスク画像を指定してinpaintingできます(inpaintingモデルには対応しておらず、単にマスク領域を対象にimg2imgするだけです)。 + +オプションは以下の通りです。 + +- `--mask_image`:マスク画像を指定します。`--img_path`と同様にフォルダを指定すると、そのフォルダの画像を順次利用します。 + +マスク画像はグレースケール画像で、白の部分がinpaintingされます。境界をグラデーションしておくとなんとなく滑らかになりますのでお勧めです。 + +![image](https://user-images.githubusercontent.com/52813779/235343795-9eaa6d98-02ff-4f32-b089-80d1fc482453.png) + +# その他の機能 + +## Textual Inversion + +`--textual_inversion_embeddings`オプションで使用するembeddingsを指定します(複数指定可)。拡張子を除いたファイル名をプロンプト内で使用することで、そのembeddingsを利用します(Web UIと同様の使用法です)。ネガティブプロンプト内でも使用できます。 + +モデルとして、当リポジトリで学習したTextual Inversionモデル、およびWeb UIで学習したTextual Inversionモデル(画像埋め込みは非対応)を利用できます + +## Extended Textual Inversion + +`--textual_inversion_embeddings`の代わりに`--XTI_embeddings`オプションを指定してください。使用法は`--textual_inversion_embeddings`と同じです。 + +## Highres. fix + +AUTOMATIC1111氏のWeb UIにある機能の類似機能です(独自実装のためもしかしたらいろいろ異なるかもしれません)。最初に小さめの画像を生成し、その画像を元にimg2imgすることで、画像全体の破綻を防ぎつつ大きな解像度の画像を生成します。 + +2nd stageのstep数は`--steps` と`--strength`オプションの値から計算されます(`steps*strength`)。 + +img2imgと併用できません。 + +以下のオプションがあります。 + +- `--highres_fix_scale`:Highres. fixを有効にして、1st stageで生成する画像のサイズを、倍率で指定します。最終出力が1024x1024で、最初に512x512の画像を生成する場合は`--highres_fix_scale 0.5`のように指定します。Web UI出の指定の逆数になっていますのでご注意ください。 + +- `--highres_fix_steps`:1st stageの画像のステップ数を指定します。デフォルトは`28`です。 + +- `--highres_fix_save_1st`:1st stageの画像を保存するかどうかを指定します。 + +- `--highres_fix_latents_upscaling`:指定すると2nd stageの画像生成時に1st stageの画像をlatentベースでupscalingします(bilinearのみ対応)。未指定時は画像をLANCZOS4でupscalingします。 + +- `--highres_fix_upscaler`:2nd stageに任意のupscalerを利用します。現在は`--highres_fix_upscaler tools.latent_upscaler` のみ対応しています。 + +- `--highres_fix_upscaler_args`:`--highres_fix_upscaler`で指定したupscalerに渡す引数を指定します。 + `tools.latent_upscaler`の場合は、`--highres_fix_upscaler_args "weights=D:\Work\SD\Models\others\etc\upscaler-v1-e100-220.safetensors"`のように重みファイルを指定します。 + +コマンドラインの例です。 + +```batchfile +python gen_img_diffusers.py --ckpt trinart_characters_it4_v1_vae_merged.ckpt + --n_iter 1 --scale 7.5 --W 1024 --H 1024 --batch_size 1 --outdir ../txt2img + --steps 48 --sampler ddim --fp16 + --xformers + --images_per_prompt 1 --interactive + --highres_fix_scale 0.5 --highres_fix_steps 28 --strength 0.5 +``` + +## ControlNet + +現在はControlNet 1.0のみ動作確認しています。プリプロセスはCannyのみサポートしています。 + +以下のオプションがあります。 + +- `--control_net_models`:ControlNetのモデルファイルを指定します。 + 複数指定すると、それらをstepごとに切り替えて利用します(Web UIのControlNet拡張の実装と異なります)。diffと通常の両方をサポートします。 + +- `--guide_image_path`:ControlNetに使うヒント画像を指定します。`--img_path`と同様にフォルダを指定すると、そのフォルダの画像を順次利用します。Canny以外のモデルの場合には、あらかじめプリプロセスを行っておいてください。 + +- `--control_net_preps`:ControlNetのプリプロセスを指定します。`--control_net_models`と同様に複数指定可能です。現在はcannyのみ対応しています。対象モデルでプリプロセスを使用しない場合は `none` を指定します。 + cannyの場合 `--control_net_preps canny_63_191`のように、閾値1と2を'_'で区切って指定できます。 + +- `--control_net_weights`:ControlNetの適用時の重みを指定します(`1.0`で通常、`0.5`なら半分の影響力で適用)。`--control_net_models`と同様に複数指定可能です。 + +- `--control_net_ratios`:ControlNetを適用するstepの範囲を指定します。`0.5`の場合は、step数の半分までControlNetを適用します。`--control_net_models`と同様に複数指定可能です。 + +コマンドラインの例です。 + +```batchfile +python gen_img_diffusers.py --ckpt model_ckpt --scale 8 --steps 48 --outdir txt2img --xformers + --W 512 --H 768 --bf16 --sampler k_euler_a + --control_net_models diff_control_sd15_canny.safetensors --control_net_weights 1.0 + --guide_image_path guide.png --control_net_ratios 1.0 --interactive +``` + +## Attention Couple + Reginal LoRA + +プロンプトをいくつかの部分に分割し、それぞれのプロンプトを画像内のどの領域に適用するかを指定できる機能です。個別のオプションはありませんが、`mask_path`とプロンプトで指定します。 + +まず、プロンプトで` AND `を利用して、複数部分を定義します。最初の3つに対して領域指定ができ、以降の部分は画像全体へ適用されます。ネガティブプロンプトは画像全体に適用されます。 + +以下ではANDで3つの部分を定義しています。 + +``` +shs 2girls, looking at viewer, smile AND bsb 2girls, looking back AND 2girls --n bad quality, worst quality +``` + +次にマスク画像を用意します。マスク画像はカラーの画像で、RGBの各チャネルがプロンプトのANDで区切られた部分に対応します。またあるチャネルの値がすべて0の場合、画像全体に適用されます。 + +上記の例では、Rチャネルが`shs 2girls, looking at viewer, smile`、Gチャネルが`bsb 2girls, looking back`に、Bチャネルが`2girls`に対応します。次のようなマスク画像を使用すると、Bチャネルに指定がありませんので、`2girls`は画像全体に適用されます。 + +![image](https://user-images.githubusercontent.com/52813779/235343061-b4dc9392-3dae-4831-8347-1e9ae5054251.png) + +マスク画像は`--mask_path`で指定します。現在は1枚のみ対応しています。指定した画像サイズに自動的にリサイズされ適用されます。 + +ControlNetと組み合わせることも可能です(細かい位置指定にはControlNetとの組み合わせを推奨します)。 + +LoRAを指定すると、`--network_weights`で指定した複数のLoRAがそれぞれANDの各部分に対応します。現在の制約として、LoRAの数はANDの部分の数と同じである必要があります。 + +## CLIP Guided Stable Diffusion + +DiffusersのCommunity Examplesの[こちらのcustom pipeline](https://github.com/huggingface/diffusers/blob/main/examples/community/README.md#clip-guided-stable-diffusion)からソースをコピー、変更したものです。 + +通常のプロンプトによる生成指定に加えて、追加でより大規模のCLIPでプロンプトのテキストの特徴量を取得し、生成中の画像の特徴量がそのテキストの特徴量に近づくよう、生成される画像をコントロールします(私のざっくりとした理解です)。大きめのCLIPを使いますのでVRAM使用量はかなり増加し(VRAM 8GBでは512*512でも厳しいかもしれません)、生成時間も掛かります。 + +なお選択できるサンプラーはDDIM、PNDM、LMSのみとなります。 + +`--clip_guidance_scale`オプションにどの程度、CLIPの特徴量を反映するかを数値で指定します。先のサンプルでは100になっていますので、そのあたりから始めて増減すると良いようです。 + +デフォルトではプロンプトの先頭75トークン(重みづけの特殊文字を除く)がCLIPに渡されます。プロンプトの`--c`オプションで、通常のプロンプトではなく、CLIPに渡すテキストを別に指定できます(たとえばCLIPはDreamBoothのidentifier(識別子)や「1girl」などのモデル特有の単語は認識できないと思われますので、それらを省いたテキストが良いと思われます)。 + +コマンドラインの例です。 + +```batchfile +python gen_img_diffusers.py --ckpt v1-5-pruned-emaonly.ckpt --n_iter 1 + --scale 2.5 --W 512 --H 512 --batch_size 1 --outdir ../txt2img --steps 36 + --sampler ddim --fp16 --opt_channels_last --xformers --images_per_prompt 1 + --interactive --clip_guidance_scale 100 +``` + +## CLIP Image Guided Stable Diffusion + +テキストではなくCLIPに別の画像を渡し、その特徴量に近づくよう生成をコントロールする機能です。`--clip_image_guidance_scale`オプションで適用量の数値を、`--guide_image_path`オプションでguideに使用する画像(ファイルまたはフォルダ)を指定してください。 + +コマンドラインの例です。 + +```batchfile +python gen_img_diffusers.py --ckpt trinart_characters_it4_v1_vae_merged.ckpt + --n_iter 1 --scale 7.5 --W 512 --H 512 --batch_size 1 --outdir ../txt2img + --steps 80 --sampler ddim --fp16 --opt_channels_last --xformers + --images_per_prompt 1 --interactive --clip_image_guidance_scale 100 + --guide_image_path YUKA160113420I9A4104_TP_V.jpg +``` + +### VGG16 Guided Stable Diffusion + +指定した画像に近づくように画像生成する機能です。通常のプロンプトによる生成指定に加えて、追加でVGG16の特徴量を取得し、生成中の画像が指定したガイド画像に近づくよう、生成される画像をコントロールします。img2imgでの使用をお勧めします(通常の生成では画像がぼやけた感じになります)。CLIP Guided Stable Diffusionの仕組みを流用した独自の機能です。またアイデアはVGGを利用したスタイル変換から拝借しています。 + +なお選択できるサンプラーはDDIM、PNDM、LMSのみとなります。 + +`--vgg16_guidance_scale`オプションにどの程度、VGG16特徴量を反映するかを数値で指定します。試した感じでは100くらいから始めて増減すると良いようです。`--guide_image_path`オプションでguideに使用する画像(ファイルまたはフォルダ)を指定してください。 + +複数枚の画像を一括でimg2img変換し、元画像をガイド画像とする場合、`--guide_image_path`と`--image_path`に同じ値を指定すればOKです。 + +コマンドラインの例です。 + +```batchfile +python gen_img_diffusers.py --ckpt wd-v1-3-full-pruned-half.ckpt + --n_iter 1 --scale 5.5 --steps 60 --outdir ../txt2img + --xformers --sampler ddim --fp16 --W 512 --H 704 + --batch_size 1 --images_per_prompt 1 + --prompt "picturesque, 1girl, solo, anime face, skirt, beautiful face + --n lowres, bad anatomy, bad hands, error, missing fingers, + cropped, worst quality, low quality, normal quality, + jpeg artifacts, blurry, 3d, bad face, monochrome --d 1" + --strength 0.8 --image_path ..\src_image + --vgg16_guidance_scale 100 --guide_image_path ..\src_image +``` + +`--vgg16_guidance_layerPで特徴量取得に使用するVGG16のレイヤー番号を指定できます(デフォルトは20でconv4-2のReLUです)。上の層ほど画風を表現し、下の層ほどコンテンツを表現するといわれています。 + +![image](https://user-images.githubusercontent.com/52813779/235343813-3c1f0d7a-4fb3-4274-98e4-b92d76b551df.png) + +# その他のオプション + +- `--no_preview` : 対話モードでプレビュー画像を表示しません。OpenCVがインストールされていない場合や、出力されたファイルを直接確認する場合に指定してください。 + +- `--n_iter` : 生成を繰り返す回数を指定します。デフォルトは1です。プロンプトをファイルから読み込むとき、複数回の生成を行いたい場合に指定します。 + +- `--tokenizer_cache_dir` : トークナイザーのキャッシュディレクトリを指定します。(作業中) + +- `--seed` : 乱数seedを指定します。1枚生成時はその画像のseed、複数枚生成時は各画像のseedを生成するための乱数のseedになります(`--from_file`で複数画像生成するとき、`--seed`オプションを指定すると複数回実行したときに各画像が同じseedになります)。 + +- `--iter_same_seed` : プロンプトに乱数seedの指定がないとき、`--n_iter`の繰り返し内ではすべて同じseedを使います。`--from_file`で指定した複数のプロンプト間でseedを統一して比較するときに使います。 + +- `--diffusers_xformers` : Diffuserのxformersを使用します。 + +- `--opt_channels_last` : 推論時にテンソルのチャンネルを最後に配置します。場合によっては高速化されることがあります。 + +- `--network_show_meta` : 追加ネットワークのメタデータを表示します。 + + +--- + +# About Gradual Latent + +Gradual Latent is a Hires fix that gradually increases the size of the latent. `gen_img.py`, `sdxl_gen_img.py`, and `gen_img_diffusers.py` have the following options. + +- `--gradual_latent_timesteps`: Specifies the timestep to start increasing the size of the latent. The default is None, which means Gradual Latent is not used. Please try around 750 at first. +- `--gradual_latent_ratio`: Specifies the initial size of the latent. The default is 0.5, which means it starts with half the default latent size. +- `--gradual_latent_ratio_step`: Specifies the ratio to increase the size of the latent. The default is 0.125, which means the latent size is gradually increased to 0.625, 0.75, 0.875, 1.0. +- `--gradual_latent_ratio_every_n_steps`: Specifies the interval to increase the size of the latent. The default is 3, which means the latent size is increased every 3 steps. + +Each option can also be specified with prompt options, `--glt`, `--glr`, `--gls`, `--gle`. + +__Please specify `euler_a` for the sampler.__ Because the source code of the sampler is modified. It will not work with other samplers. + +It is more effective with SD 1.5. It is quite subtle with SDXL. + +# Gradual Latent について + +latentのサイズを徐々に大きくしていくHires fixです。`gen_img.py` 、``sdxl_gen_img.py` 、`gen_img_diffusers.py` に以下のオプションが追加されています。 + +- `--gradual_latent_timesteps` : latentのサイズを大きくし始めるタイムステップを指定します。デフォルトは None で、Gradual Latentを使用しません。750 くらいから始めてみてください。 +- `--gradual_latent_ratio` : latentの初期サイズを指定します。デフォルトは 0.5 で、デフォルトの latent サイズの半分のサイズから始めます。 +- `--gradual_latent_ratio_step`: latentのサイズを大きくする割合を指定します。デフォルトは 0.125 で、latentのサイズを 0.625, 0.75, 0.875, 1.0 と徐々に大きくします。 +- `--gradual_latent_ratio_every_n_steps`: latentのサイズを大きくする間隔を指定します。デフォルトは 3 で、3ステップごとに latent のサイズを大きくします。 + +それぞれのオプションは、プロンプトオプション、`--glt`、`--glr`、`--gls`、`--gle` でも指定できます。 + +サンプラーに手を加えているため、__サンプラーに `euler_a` を指定してください。__ 他のサンプラーでは動作しません。 + +SD 1.5 のほうが効果があります。SDXL ではかなり微妙です。 + diff --git a/docs/image_folder_structure.md b/docs/image_folder_structure.md new file mode 100644 index 0000000000000000000000000000000000000000..35075c10710b8f7ac68aeab8a7714e17369f50d2 --- /dev/null +++ b/docs/image_folder_structure.md @@ -0,0 +1,59 @@ +# Drambootd, Lora and TI image folder structure + +To ensure successful training with Kohya, it is crucial to follow a specific folder structure that provides the necessary image repeats. Please adhere to the following structure precisely: + +Folder Structure Example: + +```txt +c: +| +├──images +| | +| ├── 30_cat +| | | +| | ├── image1.jpg +| | ├── image1.txt +| | ├── image2.png +| | └── image2.txt +| | +| ├── 30_dog +| | | +| | ├── image1.jpg +| | ├── image1.txt +| | ├── image2.png +| | └── image2.txt +| | +| └── 40_black mamba +| | +| ├── image1.jpg +| ├── image1.txt +| ├── image2.png +| └── image2.txt +| +├──regularization +| | +| ├── 1_cat +| | | +| | ├── reg1.jpg +| | ├── reg2.jpg +| | +| ├── 1_dog +| | | +| | ├── reg1.jpg +| | ├── reg2.jpg +| | +| └── 1_black mamba +| | +| ├── reg1.jpg +| ├── reg2.jpg + +``` + +Please note the following important information regarding file extensions and their impact on concept names during model training: + +If a file with a .txt or .caption extension and the same name as an image is present in the image subfolder, it will take precedence over the concept name during the model training process. +For example, if there is an image file named image1.jpg in the 30_cat subfolder, and there is a corresponding text file named image1.txt or image1.caption in the same subfolder, the concept name used during training will be determined by the content of that text file rather than the subfolder name. + +Ensure that the content of such text files accurately reflects the desired concept name or any relevant caption information associated with the corresponding image. + +By considering this information and maintaining the proper folder structure, including any necessary text or caption files, you can ensure a smooth and effective training process with Kohya. \ No newline at end of file diff --git a/docs/train_README-ja.md b/docs/train_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..47d7d1e6b0e8eab48bb5fa5677ab3325fbc31c16 --- /dev/null +++ b/docs/train_README-ja.md @@ -0,0 +1,1008 @@ +__ドキュメント更新中のため記述に誤りがあるかもしれません。__ + +# 学習について、共通編 + +当リポジトリではモデルのfine tuning、DreamBooth、およびLoRAとTextual Inversion([XTI:P+](https://github.com/kohya-ss/sd-scripts/pull/327)を含む)の学習をサポートします。この文書ではそれらに共通する、学習データの準備方法やオプション等について説明します。 + +# 概要 + +あらかじめこのリポジトリのREADMEを参照し、環境整備を行ってください。 + + +以下について説明します。 + +1. 学習データの準備について(設定ファイルを用いる新形式) +1. 学習で使われる用語のごく簡単な解説 +1. 以前の指定形式(設定ファイルを用いずコマンドラインから指定) +1. 学習途中のサンプル画像生成 +1. 各スクリプトで共通の、よく使われるオプション +1. fine tuning 方式のメタデータ準備:キャプションニングなど + +1.だけ実行すればとりあえず学習は可能です(学習については各スクリプトのドキュメントを参照)。2.以降は必要に応じて参照してください。 + + +# 学習データの準備について + +任意のフォルダ(複数でも可)に学習データの画像ファイルを用意しておきます。`.png`, `.jpg`, `.jpeg`, `.webp`, `.bmp` をサポートします。リサイズなどの前処理は基本的に必要ありません。 + +ただし学習解像度(後述)よりも極端に小さい画像は使わないか、あらかじめ超解像AIなどで拡大しておくことをお勧めします。また極端に大きな画像(3000x3000ピクセル程度?)よりも大きな画像はエラーになる場合があるようですので事前に縮小してください。 + +学習時には、モデルに学ばせる画像データを整理し、スクリプトに対して指定する必要があります。学習データの数、学習対象、キャプション(画像の説明)が用意できるか否かなどにより、いくつかの方法で学習データを指定できます。以下の方式があります(それぞれの名前は一般的なものではなく、当リポジトリ独自の定義です)。正則化画像については後述します。 + +1. DreamBooth、class+identifier方式(正則化画像使用可) + + 特定の単語 (identifier) に学習対象を紐づけるように学習します。キャプションを用意する必要はありません。たとえば特定のキャラを学ばせる場合に使うとキャプションを用意する必要がない分、手軽ですが、髪型や服装、背景など学習データの全要素が identifier に紐づけられて学習されるため、生成時のプロンプトで服が変えられない、といった事態も起こりえます。 + +1. DreamBooth、キャプション方式(正則化画像使用可) + + 画像ごとにキャプションが記録されたテキストファイルを用意して学習します。たとえば特定のキャラを学ばせると、画像の詳細をキャプションに記述することで(白い服を着たキャラA、赤い服を着たキャラA、など)キャラとそれ以外の要素が分離され、より厳密にモデルがキャラだけを学ぶことが期待できます。 + +1. fine tuning方式(正則化画像使用不可) + + あらかじめキャプションをメタデータファイルにまとめます。タグとキャプションを分けて管理したり、学習を高速化するためlatentsを事前キャッシュしたりなどの機能をサポートします(いずれも別文書で説明しています)。(fine tuning方式という名前ですが fine tuning 以外でも使えます。) + +学習したいものと使用できる指定方法の組み合わせは以下の通りです。 + +| 学習対象または方法 | スクリプト | DB / class+identifier | DB / キャプション | fine tuning | +| ----- | ----- | ----- | ----- | ----- | +| モデルをfine tuning | `fine_tune.py`| x | x | o | +| モデルをDreamBooth | `train_db.py`| o | o | x | +| LoRA | `train_network.py`| o | o | o | +| Textual Invesion | `train_textual_inversion.py`| o | o | o | + +## どれを選ぶか + +LoRA、Textual Inversionについては、手軽にキャプションファイルを用意せずに学習したい場合はDreamBooth class+identifier、用意できるならDreamBooth キャプション方式がよいでしょう。学習データの枚数が多く、かつ正則化画像を使用しない場合はfine tuning方式も検討してください。 + +DreamBoothについても同様ですが、fine tuning方式は使えません。fine tuningの場合はfine tuning方式のみです。 + +# 各方式の指定方法について + +ここではそれぞれの指定方法で典型的なパターンについてだけ説明します。より詳細な指定方法については [データセット設定](./config_README-ja.md) をご覧ください。 + +# DreamBooth、class+identifier方式(正則化画像使用可) + +この方式では、各画像は `class identifier` というキャプションで学習されたのと同じことになります(`shs dog` など)。 + +## step 1. identifierとclassを決める + +学ばせたい対象を結びつける単語identifierと、対象の属するclassを決めます。 + +(instanceなどいろいろな呼び方がありますが、とりあえず元の論文に合わせます。) + +以下ごく簡単に説明します(詳しくは調べてください)。 + +classは学習対象の一般的な種別です。たとえば特定の犬種を学ばせる場合には、classはdogになります。アニメキャラならモデルによりboyやgirl、1boyや1girlになるでしょう。 + +identifierは学習対象を識別して学習するためのものです。任意の単語で構いませんが、元論文によると「tokinizerで1トークンになる3文字以下でレアな単語」が良いとのことです。 + +identifierとclassを使い、たとえば「shs dog」などでモデルを学習することで、学習させたい対象をclassから識別して学習できます。 + +画像生成時には「shs dog」とすれば学ばせた犬種の画像が生成されます。 + +(identifierとして私が最近使っているものを参考までに挙げると、``shs sts scs cpc coc cic msm usu ici lvl cic dii muk ori hru rik koo yos wny`` などです。本当は Danbooru Tag に含まれないやつがより望ましいです。) + +## step 2. 正則化画像を使うか否かを決め、使う場合には正則化画像を生成する + +正則化画像とは、前述のclass全体が、学習対象に引っ張られることを防ぐための画像です(language drift)。正則化画像を使わないと、たとえば `shs 1girl` で特定のキャラクタを学ばせると、単なる `1girl` というプロンプトで生成してもそのキャラに似てきます。これは `1girl` が学習時のキャプションに含まれているためです。 + +学習対象の画像と正則化画像を同時に学ばせることで、class は class のままで留まり、identifier をプロンプトにつけた時だけ学習対象が生成されるようになります。 + +LoRAやDreamBoothで特定のキャラだけ出てくればよい場合は、正則化画像を用いなくても良いといえます。 + +Textual Inversionでは用いなくてよいでしょう(学ばせる token string がキャプションに含まれない場合はなにも学習されないため)。 + +正則化画像としては、学習対象のモデルで、class 名だけで生成した画像を用いるのが一般的です(たとえば `1girl`)。ただし生成画像の品質が悪い場合には、プロンプトを工夫したり、ネットから別途ダウンロードした画像を用いることもできます。 + +(正則化画像も学習されるため、その品質はモデルに影響します。) + +一般的には数百枚程度、用意するのが望ましいようです(枚数が少ないと class 画像が一般化されずそれらの特徴を学んでしまいます)。 + +生成画像を使う場合、通常、生成画像のサイズは学習解像度(より正確にはbucketの解像度、後述)にあわせてください。 + +## step 2. 設定ファイルの記述 + +テキストファイルを作成し、拡張子を `.toml` にします。たとえば以下のように記述します。 + +(`#` で始まっている部分はコメントですので、このままコピペしてそのままでもよいですし、削除しても問題ありません。) + +```toml +[general] +enable_bucket = true # Aspect Ratio Bucketingを使うか否か + +[[datasets]] +resolution = 512 # 学習解像度 +batch_size = 4 # バッチサイズ + + [[datasets.subsets]] + image_dir = 'C:\hoge' # 学習用画像を入れたフォルダを指定 + class_tokens = 'hoge girl' # identifier class を指定 + num_repeats = 10 # 学習用画像の繰り返し回数 + + # 以下は正則化画像を用いる場合のみ記述する。用いない場合は削除する + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' # 正則化画像を入れたフォルダを指定 + class_tokens = 'girl' # class を指定 + num_repeats = 1 # 正則化画像の繰り返し回数、基本的には1でよい +``` + +基本的には以下の場所のみ書き換えれば学習できます。 + +1. 学習解像度 + + 数値1つを指定すると正方形(`512`なら512x512)、鍵カッコカンマ区切りで2つ指定すると横×縦(`[512,768]`なら512x768)になります。SD1.x系ではもともとの学習解像度は512です。`[512,768]` 等の大きめの解像度を指定すると縦長、横長画像生成時の破綻を小さくできるかもしれません。SD2.x 768系では `768` です。 + +1. バッチサイズ + + 同時に何件のデータを学習するかを指定します。GPUのVRAMサイズ、学習解像度によって変わってきます。詳しくは後述します。またfine tuning/DreamBooth/LoRA等でも変わってきますので各スクリプトの説明もご覧ください。 + +1. フォルダ指定 + + 学習用画像、正則化画像(使用する場合のみ)のフォルダを指定します。画像データが含まれているフォルダそのものを指定します。 + +1. identifier と class の指定 + + 前述のサンプルの通りです。 + +1. 繰り返し回数 + + 後述します。 + +### 繰り返し回数について + +繰り返し回数は、正則化画像の枚数と学習用画像の枚数を調整するために用いられます。正則化画像の枚数は学習用画像よりも多いため、学習用画像を繰り返して枚数を合わせ、1対1の比率で学習できるようにします。 + +繰り返し回数は「 __学習用画像の繰り返し回数×学習用画像の枚数≧正則化画像の繰り返し回数×正則化画像の枚数__ 」となるように指定してください。 + +(1 epoch(データが一周すると1 epoch)のデータ数が「学習用画像の繰り返し回数×学習用画像の枚数」となります。正則化画像の枚数がそれより多いと、余った部分の正則化画像は使用されません。) + +## step 3. 学習 + +それぞれのドキュメントを参考に学習を行ってください。 + +# DreamBooth、キャプション方式(正則化画像使用可) + +この方式では各画像はキャプションで学習されます。 + +## step 1. キャプションファイルを準備する + +学習用画像のフォルダに、画像と同じファイル名で、拡張子 `.caption`(設定で変えられます)のファイルを置いてください。それぞれのファイルは1行のみとしてください。エンコーディングは `UTF-8` です。 + +## step 2. 正則化画像を使うか否かを決め、使う場合には正則化画像を生成する + +class+identifier形式と同様です。なお正則化画像にもキャプションを付けることができますが、通常は不要でしょう。 + +## step 2. 設定ファイルの記述 + +テキストファイルを作成し、拡張子を `.toml` にします。たとえば以下のように記述します。 + +```toml +[general] +enable_bucket = true # Aspect Ratio Bucketingを使うか否か + +[[datasets]] +resolution = 512 # 学習解像度 +batch_size = 4 # バッチサイズ + + [[datasets.subsets]] + image_dir = 'C:\hoge' # 学習用画像を入れたフォルダを指定 + caption_extension = '.caption' # キャプションファイルの拡張子 .txt を使う場合には書き換える + num_repeats = 10 # 学習用画像の繰り返し回数 + + # 以下は正則化画像を用いる場合のみ記述する。用いない場合は削除する + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' # 正則化画像を入れたフォルダを指定 + class_tokens = 'girl' # class を指定 + num_repeats = 1 # 正則化画像の繰り返し回数、基本的には1でよい +``` + +基本的には以下を場所のみ書き換えれば学習できます。特に記述がない部分は class+identifier 方式と同じです。 + +1. 学習解像度 +1. バッチサイズ +1. フォルダ指定 +1. キャプションファイルの拡張子 + + 任意の拡張子を指定できます。 +1. 繰り返し回数 + +## step 3. 学習 + +それぞれのドキュメントを参考に学習を行ってください。 + +# fine tuning 方式 + +## step 1. メタデータを準備する + +キャプションやタグをまとめた管理用ファイルをメタデータと呼びます。json形式で拡張子は `.json` + です。作成方法は長くなりますのでこの文書の末尾に書きました。 + +## step 2. 設定ファイルの記述 + +テキストファイルを作成し、拡張子を `.toml` にします。たとえば以下のように記述します。 + +```toml +[general] +shuffle_caption = true +keep_tokens = 1 + +[[datasets]] +resolution = 512 # 学習解像度 +batch_size = 4 # バッチサイズ + + [[datasets.subsets]] + image_dir = 'C:\piyo' # 学習用画像を入れたフォルダを指定 + metadata_file = 'C:\piyo\piyo_md.json' # メタデータファイル名 +``` + +基本的には以下を場所のみ書き換えれば学習できます。特に記述がない部分は DreamBooth, class+identifier 方式と同じです。 + +1. 学習解像度 +1. バッチサイズ +1. フォルダ指定 +1. メタデータファイル名 + + 後述の方法で作成したメタデータファイルを指定します。 + + +## step 3. 学習 + +それぞれのドキュメントを参考に学習を行ってください。 + +# 学習で使われる用語のごく簡単な解説 + +細かいことは省略していますし私も完全には理解していないため、詳しくは各自お調べください。 + +## fine tuning(ファインチューニング) + +モデルを学習して微調整することを指します。使われ方によって意味が異なってきますが、狭義のfine tuningはStable Diffusionの場合、モデルを画像とキャプションで学習することです。DreamBoothは狭義のfine tuningのひとつの特殊なやり方と言えます。広義のfine tuningは、LoRAやTextual Inversion、Hypernetworksなどを含み、モデルを学習することすべてを含みます。 + +## ステップ + +ざっくりいうと学習データで1回計算すると1ステップです。「学習データのキャプションを今のモデルに流してみて、出てくる画像を学習データの画像と比較し、学習データに近づくようにモデルをわずかに変更する」のが1ステップです。 + +## バッチサイズ + +バッチサイズは1ステップで何件のデータをまとめて計算するかを指定する値です。まとめて計算するため速度は相対的に向上します。また一般的には精度も高くなるといわれています。 + +`バッチサイズ×ステップ数` が学習に使われるデータの件数になります。そのため、バッチサイズを増やした分だけステップ数を減らすとよいでしょう。 + +(ただし、たとえば「バッチサイズ1で1600ステップ」と「バッチサイズ4で400ステップ」は同じ結果にはなりません。同じ学習率の場合、一般的には後者のほうが学習不足になります。学習率を多少大きくするか(たとえば `2e-6` など)、ステップ数をたとえば500ステップにするなどして工夫してください。) + +バッチサイズを大きくするとその分だけGPUメモリを消費します。メモリが足りなくなるとエラーになりますし、エラーにならないギリギリでは学習速度が低下します。タスクマネージャーや `nvidia-smi` コマンドで使用メモリ量を確認しながら調整するとよいでしょう。 + +なお、バッチは「一塊のデータ」位の意味です。 + +## 学習率 + +ざっくりいうと1ステップごとにどのくらい変化させるかを表します。大きな値を指定するとそれだけ速く学習が進みますが、変化しすぎてモデルが壊れたり、最適な状態にまで至れない場合があります。小さい値を指定すると学習速度は遅くなり、また最適な状態にやはり至れない場合があります。 + +fine tuning、DreamBoooth、LoRAそれぞれで大きく異なり、また学習データや学習させたいモデル、バッチサイズやステップ数によっても変わってきます。一般的な値から初めて学習状態を見ながら増減してください。 + +デフォルトでは学習全体を通して学習率は固定です。スケジューラの指定で学習率をどう変化させるか決められますので、それらによっても結果は変わってきます。 + +## エポック(epoch) + +学習データが一通り学習されると(データが一周すると)1 epochです。繰り返し回数を指定した場合は、その繰り返し後のデータが一周すると1 epochです。 + +1 epochのステップ数は、基本的には `データ件数÷バッチサイズ` ですが、Aspect Ratio Bucketing を使うと微妙に増えます(異なるbucketのデータは同じバッチにできないため、ステップ数が増えます)。 + +## Aspect Ratio Bucketing + +Stable Diffusion のv1は512\*512で学習されていますが、それに加えて256\*1024や384\*640といった解像度でも学習します。これによりトリミングされる部分が減り、より正しくキャプションと画像の関係が学習されることが期待されます。 + +また任意の解像度で学習するため、事前に画像データの縦横比を統一しておく必要がなくなります。 + +設定で有効、無効が切り替えられますが、ここまでの設定ファイルの記述例では有効になっています(`true` が設定されています)。 + +学習解像度はパラメータとして与えられた解像度の面積(=メモリ使用量)を超えない範囲で、64ピクセル単位(デフォルト、変更可)で縦横に調整、作成されます。 + +機械学習では入力サイズをすべて統一するのが一般的ですが、特に制約があるわけではなく、実際は同一のバッチ内で統一されていれば大丈夫です。NovelAIの言うbucketingは、あらかじめ教師データを、アスペクト比に応じた学習解像度ごとに分類しておくことを指しているようです。そしてバッチを各bucket内の画像で作成することで、バッチの画像サイズを統一します。 + +# 以前の指定形式(設定ファイルを用いずコマンドラインから指定) + +`.toml` ファイルを指定せずコマンドラインオプションで指定する方法です。DreamBooth class+identifier方式、DreamBooth キャプション方式、fine tuning方式があります。 + +## DreamBooth、class+identifier方式 + +フォルダ名で繰り返し回数を指定します。また `train_data_dir` オプションと `reg_data_dir` オプションを用います。 + +### step 1. 学習用画像の準備 + +学習用画像を格納するフォルダを作成します。 __さらにその中に__ 、以下の名前でディレクトリを作成します。 + +``` +<繰り返し回数>_ +``` + +間の``_``を忘れないでください。 + +たとえば「sls frog」というプロンプトで、データを20回繰り返す場合、「20_sls frog」となります。以下のようになります。 + +![image](https://user-images.githubusercontent.com/52813779/210770636-1c851377-5936-4c15-90b7-8ac8ad6c2074.png) + +### 複数class、複数対象(identifier)の学習 + +方法は単純で、学習用画像のフォルダ内に ``繰り返し回数_ `` のフォルダを複数、正則化画像フォルダにも同様に ``繰り返し回数_`` のフォルダを複数、用意してください。 + +たとえば「sls frog」と「cpc rabbit」を同時に学習する場合、以下のようになります。 + +![image](https://user-images.githubusercontent.com/52813779/210777933-a22229db-b219-4cd8-83ca-e87320fc4192.png) + +classがひとつで対象が複数の場合、正則化画像フォルダはひとつで構いません。たとえば1girlにキャラAとキャラBがいる場合は次のようにします。 + +- train_girls + - 10_sls 1girl + - 10_cpc 1girl +- reg_girls + - 1_1girl + +### step 2. 正則化画像の準備 + +正則化画像を使う場合の手順です。 + +正則化画像を格納するフォルダを作成します。 __さらにその中に__ ``<繰り返し回数>_`` という名前でディレクトリを作成します。 + +たとえば「frog」というプロンプトで、データを繰り返さない(1回だけ)場合、以下のようになります。 + +![image](https://user-images.githubusercontent.com/52813779/210770897-329758e5-3675-49f1-b345-c135f1725832.png) + + +### step 3. 学習の実行 + +各学習スクリプトを実行します。 `--train_data_dir` オプションで前述の学習用データのフォルダを(__画像を含むフォルダではなく、その親フォルダ__)、`--reg_data_dir` オプションで正則化画像のフォルダ(__画像を含むフォルダではなく、その親フォルダ__)を指定してください。 + +## DreamBooth、キャプション方式 + +学習用画像、正則化画像のフォルダに、画像と同じファイル名で、拡張子.caption(オプションで変えられます)のファイルを置くと、そのファイルからキャプションを読み込みプロンプトとして学習します。 + +※それらの画像の学習に、フォルダ名(identifier class)は使用されなくなります。 + +キャプションファイルの拡張子はデフォルトで.captionです。学習スクリプトの `--caption_extension` オプションで変更できます。`--shuffle_caption` オプションで学習時のキャプションについて、カンマ区切りの各部分をシャッフルしながら学習します。 + +## fine tuning 方式 + +メタデータを作るところまでは設定ファイルを使う場合と同様です。`in_json` オプションでメタデータファイルを指定します。 + +# 学習途中でのサンプル出力 + +学習中のモデルで試しに画像生成することで学習の進み方を確認できます。学習スクリプトに以下のオプションを指定します。 + +- `--sample_every_n_steps` / `--sample_every_n_epochs` + + サンプル出力するステップ数またはエポック数を指定します。この数ごとにサンプル出力します。両方指定するとエポック数が優先されます。 + +- `--sample_at_first` + + 学習開始前にサンプル出力します。学習前との比較ができます。 + +- `--sample_prompts` + + サンプル出力用プロンプトのファイルを指定します。 + +- `--sample_sampler` + + サンプル出力に使うサンプラーを指定します。 + `'ddim', 'pndm', 'heun', 'dpmsolver', 'dpmsolver++', 'dpmsingle', 'k_lms', 'k_euler', 'k_euler_a', 'k_dpm_2', 'k_dpm_2_a'`が選べます。 + +サンプル出力を行うにはあらかじめプロンプトを記述したテキストファイルを用意しておく必要があります。1行につき1プロンプトで記述します。 + +たとえば以下のようになります。 + +```txt +# prompt 1 +masterpiece, best quality, 1girl, in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + +先頭が `#` の行はコメントになります。`--n` のように 「`--` + 英小文字」で生成画像へのオプションを指定できます。以下が使えます。 + +- `--n` 次のオプションまでをネガティブプロンプトとします。 +- `--w` 生成画像の横幅を指定します。 +- `--h` 生成画像の高さを指定します。 +- `--d` 生成画像のseedを指定します。 +- `--l` 生成画像のCFG scaleを指定します。 +- `--s` 生成時のステップ数を指定します。 + + +# 各スクリプトで共通の、よく使われるオプション + +スクリプトの更新後、ドキュメントの更新が追い付いていない場合があります。その場合は `--help` オプションで使用できるオプションを確認してください。 + +## 学習に使うモデル指定 + +- `--v2` / `--v_parameterization` + + 学習対象モデルとしてHugging Faceのstable-diffusion-2-base、またはそこからのfine tuningモデルを使う場合(推論時に `v2-inference.yaml` を使うように指示されているモデルの場合)は `--v2` オプションを、stable-diffusion-2や768-v-ema.ckpt、およびそれらのfine tuningモデルを使う場合(推論時に `v2-inference-v.yaml` を使うモデルの場合)は `--v2` と `--v_parameterization` の両方のオプションを指定してください。 + + Stable Diffusion 2.0では大きく以下の点が変わっています。 + + 1. 使用するTokenizer + 2. 使用するText Encoderおよび使用する出力層(2.0は最後から二番目の層を使う) + 3. Text Encoderの出力次元数(768->1024) + 4. U-Netの構造(CrossAttentionのhead数など) + 5. v-parameterization(サンプリング方法が変更されているらしい) + + このうちbaseでは1~4が、baseのつかない方(768-v)では1~5が採用されています。1~4を有効にするのがv2オプション、5を有効にするのがv_parameterizationオプションです。 + +- `--pretrained_model_name_or_path` + + 追加学習を行う元となるモデルを指定します。Stable Diffusionのcheckpointファイル(.ckptまたは.safetensors)、Diffusersのローカルディスクにあるモデルディレクトリ、DiffusersのモデルID("stabilityai/stable-diffusion-2"など)が指定できます。 + +## 学習に関する設定 + +- `--output_dir` + + 学習後のモデルを保存するフォルダを指定します。 + +- `--output_name` + + モデルのファイル名を拡張子を除いて指定します。 + +- `--dataset_config` + + データセットの設定を記述した `.toml` ファイルを指定します。 + +- `--max_train_steps` / `--max_train_epochs` + + 学習するステップ数やエポック数を指定します。両方指定するとエポック数のほうが優先されます。 + +- `--mixed_precision` + + 省メモリ化のため mixed precision (混合精度)で学習します。`--mixed_precision="fp16"` のように指定します。mixed precision なし(デフォルト)と比べて精度が低くなる可能性がありますが、学習に必要なGPUメモリ量が大きく減ります。 + + (RTX30 シリーズ以降では `bf16` も指定できます。環境整備時にaccelerateに行った設定と合わせてください)。 + +- `--gradient_checkpointing` + + 学習時の重みの計算をまとめて行うのではなく少しずつ行うことで、学習に必要なGPUメモリ量を減らします。オンオフは精度には影響しませんが、オンにするとバッチサイズを大きくできるため、そちらでの影響はあります。 + + また一般的にはオンにすると速度は低下しますが、バッチサイズを大きくできるので、トータルでの学習時間はむしろ速くなるかもしれません。 + +- `--xformers` / `--mem_eff_attn` + + xformersオプションを指定するとxformersのCrossAttentionを用います。xformersをインストールしていない場合やエラーとなる場合(環境にもよりますが `mixed_precision="no"` の場合など)、代わりに `mem_eff_attn` オプションを指定すると省メモリ版CrossAttentionを使用します(xformersよりも速度は遅くなります)。 + +- `--clip_skip` + + `2` を指定すると、Text Encoder (CLIP) の後ろから二番目の層の出力を用います。1またはオプション省略時は最後の層を用います。 + + ※SD2.0はデフォルトで後ろから二番目の層を使うため、SD2.0の学習では指定しないでください。 + + 学習対象のモデルがもともと二番目の層を使うように学習されている場合は、2を指定するとよいでしょう。 + + そうではなく最後の層を使用していた場合はモデル全体がそれを前提に学習されています。そのため改めて二番目の層を使用して学習すると、望ましい学習結果を得るにはある程度の枚数の教師データ、長めの学習が必要になるかもしれません。 + +- `--max_token_length` + + デフォルトは75です。`150` または `225` を指定することでトークン長を拡張して学習できます。長いキャプションで学習する場合に指定してください。 + + ただし学習時のトークン拡張の仕様は Automatic1111 氏のWeb UIとは微妙に異なるため(分割の仕様など)、必要なければ75で学習することをお勧めします。 + + clip_skipと同様に、モデルの学習状態と異なる長さで学習するには、ある程度の教師データ枚数、長めの学習時間が必要になると思われます。 + +- `--weighted_captions` + + 指定するとAutomatic1111氏のWeb UIと同様の重み付きキャプションが有効になります。「Textual Inversion と XTI」以外の学習に使用できます。キャプションだけでなく DreamBooth 手法の token string でも有効です。 + + 重みづけキャプションの記法はWeb UIとほぼ同じで、(abc)や[abc]、(abc:1.23)などが使用できます。入れ子も可能です。括弧内にカンマを含めるとプロンプトのshuffle/dropoutで括弧の対応付けがおかしくなるため、括弧内にはカンマを含めないでください。 + +- `--persistent_data_loader_workers` + + Windows環境で指定するとエポック間の待ち時間が大幅に短縮されます。 + +- `--max_data_loader_n_workers` + + データ読み込みのプロセス数を指定します。プロセス数が多いとデータ読み込みが速くなりGPUを効率的に利用できますが、メインメモリを消費します。デフォルトは「`8` または `CPU同時実行スレッド数-1` の小さいほう」なので、メインメモリに余裕がない場合や、GPU使用率が90%程度以上なら、それらの数値を見ながら `2` または `1` 程度まで下げてください。 + +- `--logging_dir` / `--log_prefix` + + 学習ログの保存に関するオプションです。logging_dirオプションにログ保存先フォルダを指定してください。TensorBoard形式のログが保存されます。 + + たとえば--logging_dir=logsと指定すると、作業フォルダにlogsフォルダが作成され、その中の日時フォルダにログが保存されます。 + また--log_prefixオプションを指定すると、日時の前に指定した文字列が追加されます。「--logging_dir=logs --log_prefix=db_style1_」などとして識別用にお使いください。 + + TensorBoardでログを確認するには、別のコマンドプロンプトを開き、作業フォルダで以下のように入力します。 + + ``` + tensorboard --logdir=logs + ``` + + (tensorboardは環境整備時にあわせてインストールされると思いますが、もし入っていないなら `pip install tensorboard` で入れてください。) + + その後ブラウザを開き、http://localhost:6006/ へアクセスすると表示されます。 + +- `--log_with` / `--log_tracker_name` + + 学習ログの保存に関するオプションです。`tensorboard` だけでなく `wandb`への保存が可能です。詳細は [PR#428](https://github.com/kohya-ss/sd-scripts/pull/428)をご覧ください。 + +- `--noise_offset` + + こちらの記事の実装になります: https://www.crosslabs.org//blog/diffusion-with-offset-noise + + 全体的に暗い、明るい画像の生成結果が良くなる可能性があるようです。LoRA学習でも有効なようです。`0.1` 程度の値を指定するとよいようです。 + +- `--adaptive_noise_scale` (実験的オプション) + + Noise offsetの値を、latentsの各チャネルの平均値の絶対値に応じて自動調整するオプションです。`--noise_offset` と同時に指定することで有効になります。Noise offsetの値は `noise_offset + abs(mean(latents, dim=(2,3))) * adaptive_noise_scale` で計算されます。latentは正規分布に近いためnoise_offsetの1/10~同程度の値を指定するとよいかもしれません。 + + 負の値も指定でき、その場合はnoise offsetは0以上にclipされます。 + +- `--multires_noise_iterations` / `--multires_noise_discount` + + Multi resolution noise (pyramid noise)の設定です。詳細は [PR#471](https://github.com/kohya-ss/sd-scripts/pull/471) およびこちらのページ [Multi-Resolution Noise for Diffusion Model Training](https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2) を参照してください。 + + `--multires_noise_iterations` に数値を指定すると有効になります。6~10程度の値が良いようです。`--multires_noise_discount` に0.1~0.3 程度の値(LoRA学習等比較的データセットが小さい場合のPR作者の推奨)、ないしは0.8程度の値(元記事の推奨)を指定してください(デフォルトは 0.3)。 + +- `--debug_dataset` + + このオプションを付けることで学習を行う前に事前にどのような画像データ、キャプションで学習されるかを確認できます。Escキーを押すと終了してコマンドラインに戻ります。`S`キーで次のステップ(バッチ)、`E`キーで次のエポックに進みます。 + + ※Linux環境(Colabを含む)では画像は表示されません。 + +- `--vae` + + vaeオプションにStable Diffusionのcheckpoint、VAEのcheckpointファイル、DiffusesのモデルまたはVAE(ともにローカルまたはHugging FaceのモデルIDが指定できます)のいずれかを指定すると、そのVAEを使って学習します(latentsのキャッシュ時または学習中のlatents取得時)。 + + DreamBoothおよびfine tuningでは、保存されるモデルはこのVAEを組み込んだものになります。 + +- `--cache_latents` / `--cache_latents_to_disk` + + 使用VRAMを減らすためVAEの出力をメインメモリにキャッシュします。`flip_aug` 以外のaugmentationは使えなくなります。また全体の学習速度が若干速くなります。 + + cache_latents_to_diskを指定するとキャッシュをディスクに保存します。スクリプトを終了し、再度起動した場合もキャッシュが有効になります。 + +- `--min_snr_gamma` + + Min-SNR Weighting strategyを指定します。詳細は[こちら](https://github.com/kohya-ss/sd-scripts/pull/308)を参照してください。論文では`5`が推奨されています。 + +## モデルの保存に関する設定 + +- `--save_precision` + + 保存時のデータ精度を指定します。save_precisionオプションにfloat、fp16、bf16のいずれかを指定すると、その形式でモデルを保存します(DreamBooth、fine tuningでDiffusers形式でモデルを保存する場合は無効です)。モデルのサイズを削減したい場合などにお使いください。 + +- `--save_every_n_epochs` / `--save_state` / `--resume` + + save_every_n_epochsオプションに数値を指定すると、そのエポックごとに学習途中のモデルを保存します。 + + save_stateオプションを同時に指定すると、optimizer等の状態も含めた学習状態を合わせて保存します(保存したモデルからも学習再開できますが、それに比べると精度の向上、学習時間の短縮が期待できます)。保存先はフォルダになります。 + + 学習状態は保存先フォルダに `-??????-state`(??????はエポック数)という名前のフォルダで出力されます。長時間にわたる学習時にご利用ください。 + + 保存された学習状態から学習を再開するにはresumeオプションを使います。学習状態のフォルダ(`output_dir` ではなくその中のstateのフォルダ)を指定してください。 + + なおAcceleratorの仕様により、エポック数、global stepは保存されておらず、resumeしたときにも1からになりますがご容赦ください。 + +- `--save_every_n_steps` + + save_every_n_stepsオプションに数値を指定すると、そのステップごとに学習途中のモデルを保存します。save_every_n_epochsと同時に指定できます。 + +- `--save_model_as` (DreamBooth, fine tuning のみ) + + モデルの保存形式を`ckpt, safetensors, diffusers, diffusers_safetensors` から選べます。 + + `--save_model_as=safetensors` のように指定します。Stable Diffusion形式(ckptまたはsafetensors)を読み込み、Diffusers形式で保存する場合、不足する情報はHugging Faceからv1.5またはv2.1の情報を落としてきて補完します。 + +- `--huggingface_repo_id` 等 + + huggingface_repo_idが指定されているとモデル保存時に同時にHuggingFaceにアップロードします。アクセストークンの取り扱いに注意してください(HuggingFaceのドキュメントを参照してください)。 + + 他の引数をたとえば以下のように指定してください。 + + - `--huggingface_repo_id "your-hf-name/your-model" --huggingface_path_in_repo "path" --huggingface_repo_type model --huggingface_repo_visibility private --huggingface_token hf_YourAccessTokenHere` + + huggingface_repo_visibilityに`public`を指定するとリポジトリが公開されます。省略時または`private`(などpublic以外)を指定すると非公開になります。 + + `--save_state`オプション指定時に`--save_state_to_huggingface`を指定するとstateもアップロードします。 + + `--resume`オプション指定時に`--resume_from_huggingface`を指定するとHuggingFaceからstateをダウンロードして再開します。その時の --resumeオプションは `--resume {repo_id}/{path_in_repo}:{revision}:{repo_type}`になります。 + + 例: `--resume_from_huggingface --resume your-hf-name/your-model/path/test-000002-state:main:model` + + `--async_upload`オプションを指定するとアップロードを非同期で行います。 + +## オプティマイザ関係 + +- `--optimizer_type` + --オプティマイザの種類を指定します。以下が指定できます。 + - AdamW : [torch.optim.AdamW](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html) + - 過去のバージョンのオプション未指定時と同じ + - AdamW8bit : 引数は同上 + - PagedAdamW8bit : 引数は同上 + - 過去のバージョンの--use_8bit_adam指定時と同じ + - Lion : https://github.com/lucidrains/lion-pytorch + - 過去のバージョンの--use_lion_optimizer指定時と同じ + - Lion8bit : 引数は同上 + - PagedLion8bit : 引数は同上 + - SGDNesterov : [torch.optim.SGD](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html), nesterov=True + - SGDNesterov8bit : 引数は同上 + - DAdaptation(DAdaptAdamPreprint) : https://github.com/facebookresearch/dadaptation + - DAdaptAdam : 引数は同上 + - DAdaptAdaGrad : 引数は同上 + - DAdaptAdan : 引数は同上 + - DAdaptAdanIP : 引数は同上 + - DAdaptLion : 引数は同上 + - DAdaptSGD : 引数は同上 + - Prodigy : https://github.com/konstmish/prodigy + - AdaFactor : [Transformers AdaFactor](https://huggingface.co/docs/transformers/main_classes/optimizer_schedules) + - 任意のオプティマイザ + +- `--learning_rate` + + 学習率を指定します。適切な学習率は学習スクリプトにより異なりますので、それぞれの説明を参照してください。 + +- `--lr_scheduler` / `--lr_warmup_steps` / `--lr_scheduler_num_cycles` / `--lr_scheduler_power` + + 学習率のスケジューラ関連の指定です。 + + lr_schedulerオプションで学習率のスケジューラをlinear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup, 任意のスケジューラから選べます。デフォルトはconstantです。 + + lr_warmup_stepsでスケジューラのウォームアップ(だんだん学習率を変えていく)ステップ数を指定できます。 + + lr_scheduler_num_cycles は cosine with restartsスケジューラでのリスタート回数、lr_scheduler_power は polynomialスケジューラでのpolynomial power です。 + + 詳細については各自お調べください。 + + 任意のスケジューラを使う場合、任意のオプティマイザと同様に、`--scheduler_args`でオプション引数を指定してください。 + +### オプティマイザの指定について + +オプティマイザのオプション引数は--optimizer_argsオプションで指定してください。key=valueの形式で、複数の値が指定できます。また、valueはカンマ区切りで複数の値が指定できます。たとえばAdamWオプティマイザに引数を指定する場合は、``--optimizer_args weight_decay=0.01 betas=.9,.999``のようになります。 + +オプション引数を指定する場合は、それぞれのオプティマイザの仕様をご確認ください。 + +一部のオプティマイザでは必須の引数があり、省略すると自動的に追加されます(SGDNesterovのmomentumなど)。コンソールの出力を確認してください。 + +D-Adaptationオプティマイザは学習率を自動調整します。学習率のオプションに指定した値は学習率そのものではなくD-Adaptationが決定した学習率の適用率になりますので、通常は1.0を指定してください。Text EncoderにU-Netの半分の学習率を指定したい場合は、``--text_encoder_lr=0.5 --unet_lr=1.0``と指定します。 + +AdaFactorオプティマイザはrelative_step=Trueを指定すると学習率を自動調整できます(省略時はデフォルトで追加されます)。自動調整する場合は学習率のスケジューラにはadafactor_schedulerが強制的に使用されます。またscale_parameterとwarmup_initを指定するとよいようです。 + +自動調整する場合のオプション指定はたとえば ``--optimizer_args "relative_step=True" "scale_parameter=True" "warmup_init=True"`` のようになります。 + +学習率を自動調整しない場合はオプション引数 ``relative_step=False`` を追加してください。その場合、学習率のスケジューラにはconstant_with_warmupが、また勾配のclip normをしないことが推奨されているようです。そのため引数は ``--optimizer_type=adafactor --optimizer_args "relative_step=False" --lr_scheduler="constant_with_warmup" --max_grad_norm=0.0`` のようになります。 + +### 任意のオプティマイザを使う + +``torch.optim`` のオプティマイザを使う場合にはクラス名のみを(``--optimizer_type=RMSprop``など)、他のモジュールのオプティマイザを使う時は「モジュール名.クラス名」を指定してください(``--optimizer_type=bitsandbytes.optim.lamb.LAMB``など)。 + +(内部でimportlibしているだけで動作は未確認です。必要ならパッケージをインストールしてください。) + + + + +# メタデータファイルの作成 + +## 教師データの用意 + +前述のように学習させたい画像データを用意し、任意のフォルダに入れてください。 + +たとえば以下のように画像を格納します。 + +![教師データフォルダのスクショ](https://user-images.githubusercontent.com/52813779/208907739-8e89d5fa-6ca8-4b60-8927-f484d2a9ae04.png) + +## 自動キャプショニング + +キャプションを使わずタグだけで学習する場合はスキップしてください。 + +また手動でキャプションを用意する場合、キャプションは教師データ画像と同じディレクトリに、同じファイル名、拡張子.caption等で用意してください。各ファイルは1行のみのテキストファイルとします。 + +### BLIPによるキャプショニング + +最新版ではBLIPのダウンロード、重みのダウンロード、仮想環境の追加は不要になりました。そのままで動作します。 + +finetuneフォルダ内のmake_captions.pyを実行します。 + +``` +python finetune\make_captions.py --batch_size <バッチサイズ> <教師データフォルダ> +``` + +バッチサイズ8、教師データを親フォルダのtrain_dataに置いた場合、以下のようになります。 + +``` +python finetune\make_captions.py --batch_size 8 ..\train_data +``` + +キャプションファイルが教師データ画像と同じディレクトリに、同じファイル名、拡張子.captionで作成されます。 + +batch_sizeはGPUのVRAM容量に応じて増減してください。大きいほうが速くなります(VRAM 12GBでももう少し増やせると思います)。 +max_lengthオプションでキャプションの最大長を指定できます。デフォルトは75です。モデルをトークン長225で学習する場合には長くしても良いかもしれません。 +caption_extensionオプションでキャプションの拡張子を変更できます。デフォルトは.captionです(.txtにすると後述のDeepDanbooruと競合します)。 + +複数の教師データフォルダがある場合には、それぞれのフォルダに対して実行してください。 + +なお、推論にランダム性があるため、実行するたびに結果が変わります。固定する場合には--seedオプションで `--seed 42` のように乱数seedを指定してください。 + +その他のオプションは `--help` でヘルプをご参照ください(パラメータの意味についてはドキュメントがまとまっていないようで、ソースを見るしかないようです)。 + +デフォルトでは拡張子.captionでキャプションファイルが生成されます。 + +![captionが生成されたフォルダ](https://user-images.githubusercontent.com/52813779/208908845-48a9d36c-f6ee-4dae-af71-9ab462d1459e.png) + +たとえば以下のようなキャプションが付きます。 + +![キャプションと画像](https://user-images.githubusercontent.com/52813779/208908947-af936957-5d73-4339-b6c8-945a52857373.png) + +## DeepDanbooruによるタグ付け + +danbooruタグのタグ付け自体を行わない場合は「キャプションとタグ情報の前処理」に進んでください。 + +タグ付けはDeepDanbooruまたはWD14Taggerで行います。WD14Taggerのほうが精度が良いようです。WD14Taggerでタグ付けする場合は、次の章へ進んでください。 + +### 環境整備 + +DeepDanbooru https://github.com/KichangKim/DeepDanbooru を作業フォルダにcloneしてくるか、zipをダウンロードして展開します。私はzipで展開しました。 +またDeepDanbooruのReleasesのページ https://github.com/KichangKim/DeepDanbooru/releases の「DeepDanbooru Pretrained Model v3-20211112-sgd-e28」のAssetsから、deepdanbooru-v3-20211112-sgd-e28.zipをダウンロードしてきてDeepDanbooruのフォルダに展開します。 + +以下からダウンロードします。Assetsをクリックして開き、そこからダウンロードします。 + +![DeepDanbooruダウンロードページ](https://user-images.githubusercontent.com/52813779/208909417-10e597df-7085-41ee-bd06-3e856a1339df.png) + +以下のようなこういうディレクトリ構造にしてください + +![DeepDanbooruのディレクトリ構造](https://user-images.githubusercontent.com/52813779/208909486-38935d8b-8dc6-43f1-84d3-fef99bc471aa.png) + +Diffusersの環境に必要なライブラリをインストールします。DeepDanbooruのフォルダに移動してインストールします(実質的にはtensorflow-ioが追加されるだけだと思います)。 + +``` +pip install -r requirements.txt +``` + +続いてDeepDanbooru自体をインストールします。 + +``` +pip install . +``` + +以上でタグ付けの環境整備は完了です。 + +### タグ付けの実施 +DeepDanbooruのフォルダに移動し、deepdanbooruを実行してタグ付けを行います。 + +``` +deepdanbooru evaluate <教師データフォルダ> --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +教師データを親フォルダのtrain_dataに置いた場合、以下のようになります。 + +``` +deepdanbooru evaluate ../train_data --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +タグファイルが教師データ画像と同じディレクトリに、同じファイル名、拡張子.txtで作成されます。1件ずつ処理されるためわりと遅いです。 + +複数の教師データフォルダがある場合には、それぞれのフォルダに対して実行してください。 + +以下のように生成されます。 + +![DeepDanbooruの生成ファイル](https://user-images.githubusercontent.com/52813779/208909855-d21b9c98-f2d3-4283-8238-5b0e5aad6691.png) + +こんな感じにタグが付きます(すごい情報量……)。 + +![DeepDanbooruタグと画像](https://user-images.githubusercontent.com/52813779/208909908-a7920174-266e-48d5-aaef-940aba709519.png) + +## WD14Taggerによるタグ付け + +DeepDanbooruの代わりにWD14Taggerを用いる手順です。 + +Automatic1111氏のWebUIで使用しているtaggerを利用します。こちらのgithubページ(https://github.com/toriato/stable-diffusion-webui-wd14-tagger#mrsmilingwolfs-model-aka-waifu-diffusion-14-tagger )の情報を参考にさせていただきました。 + +最初の環境整備で必要なモジュールはインストール済みです。また重みはHugging Faceから自動的にダウンロードしてきます。 + +### タグ付けの実施 + +スクリプトを実行してタグ付けを行います。 +``` +python tag_images_by_wd14_tagger.py --batch_size <バッチサイズ> <教師データフォルダ> +``` + +教師データを親フォルダのtrain_dataに置いた場合、以下のようになります。 +``` +python tag_images_by_wd14_tagger.py --batch_size 4 ..\train_data +``` + +初回起動時にはモデルファイルがwd14_tagger_modelフォルダに自動的にダウンロードされます(フォルダはオプションで変えられます)。以下のようになります。 + +![ダウンロードされたファイル](https://user-images.githubusercontent.com/52813779/208910447-f7eb0582-90d6-49d3-a666-2b508c7d1842.png) + +タグファイルが教師データ画像と同じディレクトリに、同じファイル名、拡張子.txtで作成されます。 + +![生成されたタグファイル](https://user-images.githubusercontent.com/52813779/208910534-ea514373-1185-4b7d-9ae3-61eb50bc294e.png) + +![タグと画像](https://user-images.githubusercontent.com/52813779/208910599-29070c15-7639-474f-b3e4-06bd5a3df29e.png) + +threshオプションで、判定されたタグのconfidence(確信度)がいくつ以上でタグをつけるかが指定できます。デフォルトはWD14Taggerのサンプルと同じ0.35です。値を下げるとより多くのタグが付与されますが、精度は下がります。 + +batch_sizeはGPUのVRAM容量に応じて増減してください。大きいほうが速くなります(VRAM 12GBでももう少し増やせると思います)。caption_extensionオプションでタグファイルの拡張子を変更できます。デフォルトは.txtです。 + +model_dirオプションでモデルの保存先フォルダを指定できます。 + +またforce_downloadオプションを指定すると保存先フォルダがあってもモデルを再ダウンロードします。 + +複数の教師データフォルダがある場合には、それぞれのフォルダに対して実行してください。 + +## キャプションとタグ情報の前処理 + +スクリプトから処理しやすいようにキャプションとタグをメタデータとしてひとつのファイルにまとめます。 + +### キャプションの前処理 + +キャプションをメタデータに入れるには、作業フォルダ内で以下を実行してください(キャプションを学習に使わない場合は実行不要です)(実際は1行で記述します、以下同様)。`--full_path` オプションを指定してメタデータに画像ファイルの場所をフルパスで格納します。このオプションを省略すると相対パスで記録されますが、フォルダ指定が `.toml` ファイル内で別途必要になります。 + +``` +python merge_captions_to_metadata.py --full_path <教師データフォルダ> +  --in_json <読み込むメタデータファイル名> <メタデータファイル名> +``` + +メタデータファイル名は任意の名前です。 +教師データがtrain_data、読み込むメタデータファイルなし、メタデータファイルがmeta_cap.jsonの場合、以下のようになります。 + +``` +python merge_captions_to_metadata.py --full_path train_data meta_cap.json +``` + +caption_extensionオプションでキャプションの拡張子を指定できます。 + +複数の教師データフォルダがある場合には、full_path引数を指定しつつ、それぞれのフォルダに対して実行してください。 + +``` +python merge_captions_to_metadata.py --full_path + train_data1 meta_cap1.json +python merge_captions_to_metadata.py --full_path --in_json meta_cap1.json + train_data2 meta_cap2.json +``` + +in_jsonを省略すると書き込み先メタデータファイルがあるとそこから読み込み、そこに上書きします。 + +__※in_jsonオプションと書き込み先を都度書き換えて、別のメタデータファイルへ書き出すようにすると安全です。__ + +### タグの前処理 + +同様にタグもメタデータにまとめます(タグを学習に使わない場合は実行不要です)。 +``` +python merge_dd_tags_to_metadata.py --full_path <教師データフォルダ> + --in_json <読み込むメタデータファイル名> <書き込むメタデータファイル名> +``` + +先と同じディレクトリ構成で、meta_cap.jsonを読み、meta_cap_dd.jsonに書きだす場合、以下となります。 +``` +python merge_dd_tags_to_metadata.py --full_path train_data --in_json meta_cap.json meta_cap_dd.json +``` + +複数の教師データフォルダがある場合には、full_path引数を指定しつつ、それぞれのフォルダに対して実行してください。 + +``` +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap2.json + train_data1 meta_cap_dd1.json +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap_dd1.json + train_data2 meta_cap_dd2.json +``` + +in_jsonを省略すると書き込み先メタデータファイルがあるとそこから読み込み、そこに上書きします。 + +__※in_jsonオプションと書き込み先を都度書き換えて、別のメタデータファイルへ書き出すようにすると安全です。__ + +### キャプションとタグのクリーニング + +ここまででメタデータファイルにキャプションとDeepDanbooruのタグがまとめられています。ただ自動キャプショニングにしたキャプションは表記ゆれなどがあり微妙(※)ですし、タグにはアンダースコアが含まれていたりratingが付いていたりしますので(DeepDanbooruの場合)、エディタの置換機能などを用いてキャプションとタグのクリーニングをしたほうがいいでしょう。 + +※たとえばアニメ絵の少女を学習する場合、キャプションにはgirl/girls/woman/womenなどのばらつきがあります。また「anime girl」なども単に「girl」としたほうが適切かもしれません。 + +クリーニング用のスクリプトが用意してありますので、スクリプトの内容を状況に応じて編集してお使いください。 + +(教師データフォルダの指定は不要になりました。メタデータ内の全データをクリーニングします。) + +``` +python clean_captions_and_tags.py <読み込むメタデータファイル名> <書き込むメタデータファイル名> +``` + +--in_jsonは付きませんのでご注意ください。たとえば次のようになります。 + +``` +python clean_captions_and_tags.py meta_cap_dd.json meta_clean.json +``` + +以上でキャプションとタグの前処理は完了です。 + +## latentsの事前取得 + +※ このステップは必須ではありません。省略しても学習時にlatentsを取得しながら学習できます。 +また学習時に `random_crop` や `color_aug` などを行う場合にはlatentsの事前取得はできません(画像を毎回変えながら学習するため)。事前取得をしない場合、ここまでのメタデータで学習できます。 + +あらかじめ画像の潜在表現を取得しディスクに保存しておきます。それにより、学習を高速に進めることができます。あわせてbucketing(教師データをアスペクト比に応じて分類する)を行います。 + +作業フォルダで以下のように入力してください。 +``` +python prepare_buckets_latents.py --full_path <教師データフォルダ> + <読み込むメタデータファイル名> <書き込むメタデータファイル名> + + --batch_size <バッチサイズ> + --max_resolution <解像度 幅,高さ> + --mixed_precision <精度> +``` + +モデルがmodel.ckpt、バッチサイズ4、学習解像度は512\*512、精度no(float32)で、meta_clean.jsonからメタデータを読み込み、meta_lat.jsonに書き込む場合、以下のようになります。 + +``` +python prepare_buckets_latents.py --full_path + train_data meta_clean.json meta_lat.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no +``` + +教師データフォルダにnumpyのnpz形式でlatentsが保存されます。 + +解像度の最小サイズを--min_bucket_resoオプションで、最大サイズを--max_bucket_resoで指定できます。デフォルトはそれぞれ256、1024です。たとえば最小サイズに384を指定すると、256\*1024や320\*768などの解像度は使わなくなります。 +解像度を768\*768のように大きくした場合、最大サイズに1280などを指定すると良いでしょう。 + +--flip_augオプションを指定すると左右反転のaugmentation(データ拡張)を行います。疑似的にデータ量を二倍に増やすことができますが、データが左右対称でない場合に指定すると(例えばキャラクタの外見、髪型など)学習がうまく行かなくなります。 + + +(反転した画像についてもlatentsを取得し、\*\_flip.npzファイルを保存する単純な実装です。fline_tune.pyには特にオプション指定は必要ありません。\_flip付きのファイルがある場合、flip付き・なしのファイルを、ランダムに読み込みます。) + +バッチサイズはVRAM 12GBでももう少し増やせるかもしれません。 +解像度は64で割り切れる数字で、"幅,高さ"で指定します。解像度はfine tuning時のメモリサイズに直結します。VRAM 12GBでは512,512が限界と思われます(※)。16GBなら512,704や512,768まで上げられるかもしれません。なお256,256等にしてもVRAM 8GBでは厳しいようです(パラメータやoptimizerなどは解像度に関係せず一定のメモリが必要なため)。 + +※batch size 1の学習で12GB VRAM、640,640で動いたとの報告もありました。 + +以下のようにbucketingの結果が表示されます。 + +![bucketingの結果](https://user-images.githubusercontent.com/52813779/208911419-71c00fbb-2ce6-49d5-89b5-b78d7715e441.png) + +複数の教師データフォルダがある場合には、full_path引数を指定しつつ、それぞれのフォルダに対して実行してください。 +``` +python prepare_buckets_latents.py --full_path + train_data1 meta_clean.json meta_lat1.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +python prepare_buckets_latents.py --full_path + train_data2 meta_lat1.json meta_lat2.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +``` +読み込み元と書き込み先を同じにすることも可能ですが別々の方が安全です。 + +__※引数を都度書き換えて、別のメタデータファイルに書き込むと安全です。__ + diff --git a/docs/train_README-zh.md b/docs/train_README-zh.md new file mode 100644 index 0000000000000000000000000000000000000000..066c3f914b97319f9adac2855927d0ce7db5d1d7 --- /dev/null +++ b/docs/train_README-zh.md @@ -0,0 +1,912 @@ +__由于文档正在更新中,描述可能有错误。__ + +# 关于训练,通用描述 +本库支持模型微调(fine tuning)、DreamBooth、训练LoRA和文本反转(Textual Inversion)(包括[XTI:P+](https://github.com/kohya-ss/sd-scripts/pull/327) +) +本文档将说明它们通用的训练数据准备方法和选项等。 + +# 概要 + +请提前参考本仓库的README,准备好环境。 + + +以下本节说明。 + +1. 准备训练数据(使用设置文件的新格式) +1. 训练中使用的术语的简要解释 +1. 先前的指定格式(不使用设置文件,而是从命令行指定) +1. 生成训练过程中的示例图像 +1. 各脚本中常用的共同选项 +1. 准备 fine tuning 方法的元数据:如说明文字(打标签)等 + + +1. 如果只执行一次,训练就可以进行(相关内容,请参阅各个脚本的文档)。如果需要,以后可以随时参考。 + + + +# 关于准备训练数据 + +在任意文件夹(也可以是多个文件夹)中准备好训练数据的图像文件。支持 `.png`, `.jpg`, `.jpeg`, `.webp`, `.bmp` 格式的文件。通常不需要进行任何预处理,如调整大小等。 + +但是请勿使用极小的图像,若其尺寸比训练分辨率(稍后将提到)还小,建议事先使用超分辨率AI等进行放大。另外,请注意不要使用过大的图像(约为3000 x 3000像素以上),因为这可能会导致错误,建议事先缩小。 + +在训练时,需要整理要用于训练模型的图像数据,并将其指定给脚本。根据训练数据的数量、训练目标和说明(图像描述)是否可用等因素,可以使用几种方法指定训练数据。以下是其中的一些方法(每个名称都不是通用的,而是该存储库自定义的定义)。有关正则化图像的信息将在稍后提供。 + +1. DreamBooth、class + identifier方式(可使用正则化图像) + + 将训练目标与特定单词(identifier)相关联进行训练。无需准备说明。例如,当要学习特定角色时,由于无需准备说明,因此比较方便,但由于训练数据的所有元素都与identifier相关联,例如发型、服装、背景等,因此在生成时可能会出现无法更换服装的情况。 + +2. DreamBooth、说明方式(可使用正则化图像) + + 事先给每个图片写说明(caption),存放到文本文件中,然后进行训练。例如,通过将图像详细信息(如穿着白色衣服的角色A、穿着红色衣服的角色A等)记录在caption中,可以将角色和其他元素分离,并期望模型更准确地学习角色。 + +3. 微调方式(不可使用正则化图像) + + 先将说明收集到元数据文件中。支持分离标签和说明以及预先缓存latents等功能,以加速训练(这些将在另一篇文档中介绍)。(虽然名为fine tuning方式,但不仅限于fine tuning。) + +训练对象和你可以使用的规范方法的组合如下。 + +| 训练对象或方法 | 脚本 | DB/class+identifier | DB/caption | fine tuning | +|----------------| ----- | ----- | ----- | ----- | +| fine tuning微调模型 | `fine_tune.py`| x | x | o | +| DreamBooth训练模型 | `train_db.py`| o | o | x | +| LoRA | `train_network.py`| o | o | o | +| Textual Invesion | `train_textual_inversion.py`| o | o | o | + +## 选择哪一个 + +如果您想要训练LoRA、Textual Inversion而不需要准备说明(caption)文件,则建议使用DreamBooth class+identifier。如果您能够准备caption文件,则DreamBooth Captions方法更好。如果您有大量的训练数据并且不使用正则化图像,则请考虑使用fine-tuning方法。 + +对于DreamBooth也是一样的,但不能使用fine-tuning方法。若要进行微调,只能使用fine-tuning方式。 + +# 每种方法的指定方式 + +在这里,我们只介绍每种指定方法的典型模式。有关更详细的指定方法,请参见[数据集设置](./config_README-ja.md)。 + +# DreamBooth,class+identifier方法(可使用正则化图像) + +在该方法中,每个图像将被视为使用与 `class identifier` 相同的标题进行训练(例如 `shs dog`)。 + +这样一来,每张图片都相当于使用标题“分类标识”(例如“shs dog”)进行训练。 + +## step 1.确定identifier和class + +要将训练的目标与identifier和属于该目标的class相关联。 + +(虽然有很多称呼,但暂时按照原始论文的说法。) + +以下是简要说明(请查阅详细信息)。 + +class是训练目标的一般类别。例如,如果要学习特定品种的狗,则class将是“dog”。对于动漫角色,根据模型不同,可能是“boy”或“girl”,也可能是“1boy”或“1girl”。 + +identifier是用于识别训练目标并进行学习的单词。可以使用任何单词,但是根据原始论文,“Tokenizer生成的3个或更少字符的罕见单词”是最好的选择。 + +使用identifier和class,例如,“shs dog”可以将模型训练为从class中识别并学习所需的目标。 + +在图像生成时,使用“shs dog”将生成所学习狗种的图像。 + +(作为identifier,我最近使用的一些参考是“shs sts scs cpc coc cic msm usu ici lvl cic dii muk ori hru rik koo yos wny”等。最好是不包含在Danbooru标签中的单词。) + +## step 2. 决定是否使用正则化图像,并在使用时生成正则化图像 + +正则化图像是为防止前面提到的语言漂移,即整个类别被拉扯成为训练目标而生成的图像。如果不使用正则化图像,例如在 `shs 1girl` 中学习特定角色时,即使在简单的 `1girl` 提示下生成,也会越来越像该角色。这是因为 `1girl` 在训练时的标题中包含了该角色的信息。 + +通过同时学习目标图像和正则化图像,类别仍然保持不变,仅在将标识符附加到提示中时才生成目标图像。 + +如果您只想在LoRA或DreamBooth中使用特定的角色,则可以不使用正则化图像。 + +在Textual Inversion中也不需要使用(如果要学习的token string不包含在标题中,则不会学习任何内容)。 + +一般情况下,使用在训练目标模型时只使用类别名称生成的图像作为正则化图像是常见的做法(例如 `1girl`)。但是,如果生成的图像质量不佳,可以尝试修改提示或使用从网络上另外下载的图像。 + +(由于正则化图像也被训练,因此其质量会影响模型。) + +通常,准备数百张图像是理想的(图像数量太少会导致类别图像无法被归纳,特征也不会被学习)。 + +如果要使用生成的图像,生成图像的大小通常应与训练分辨率(更准确地说,是bucket的分辨率,见下文)相匹配。 + + + +## step 2. 设置文件的描述 + +创建一个文本文件,并将其扩展名更改为`.toml`。例如,您可以按以下方式进行描述: + +(以`#`开头的部分是注释,因此您可以直接复制粘贴,或者将其删除。) + +```toml +[general] +enable_bucket = true # 是否使用Aspect Ratio Bucketing + +[[datasets]] +resolution = 512 # 训练分辨率 +batch_size = 4 # 批次大小 + + [[datasets.subsets]] + image_dir = 'C:\hoge' # 指定包含训练图像的文件夹 + class_tokens = 'hoge girl' # 指定标识符类 + num_repeats = 10 # 训练图像的重复次数 + + # 以下仅在使用正则化图像时进行描述。不使用则删除 + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' # 指定包含正则化图像的文件夹 + class_tokens = 'girl' # 指定class + num_repeats = 1 # 正则化图像的重复次数,基本上1就可以了 +``` + +基本上只需更改以下几个地方即可进行训练。 + +1. 训练分辨率 + + 指定一个数字表示正方形(如果是 `512`,则为 512x512),如果使用方括号和逗号分隔的两个数字,则表示横向×纵向(如果是`[512,768]`,则为 512x768)。在SD1.x系列中,原始训练分辨率为512。指定较大的分辨率,如 `[512,768]` 可能会减少纵向和横向图像生成时的错误。在SD2.x 768系列中,分辨率为 `768`。 + +1. 批次大小 + + 指定同时训练多少个数据。这取决于GPU的VRAM大小和训练分辨率。详细信息将在后面说明。此外,fine tuning/DreamBooth/LoRA等也会影响批次大小,请查看各个脚本的说明。 + +1. 文件夹指定 + + 指定用于学习的图像和正则化图像(仅在使用时)的文件夹。指定包含图像数据的文件夹。 + +1. identifier 和 class 的指定 + + 如前所述,与示例相同。 + +1. 重复次数 + + 将在后面说明。 + +### 关于重复次数 + +重复次数用于调整正则化图像和训练用图像的数量。由于正则化图像的数量多于训练用图像,因此需要重复使用训练用图像来达到一对一的比例,从而实现训练。 + +请将重复次数指定为“ __训练用图像的重复次数×训练用图像的数量≥正则化图像的重复次数×正则化图像的数量__ ”。 + +(1个epoch(指训练数据过完一遍)的数据量为“训练用图像的重复次数×训练用图像的数量”。如果正则化图像的数量多于这个值,则剩余的正则化图像将不会被使用。) + +## 步骤 3. 训练 + +详情请参考相关文档进行训练。 + +# DreamBooth,文本说明(caption)方式(可使用正则化图像) + +在此方式中,每个图像都将通过caption进行训练。 + +## 步骤 1. 准备文本说明文件 + +请将与图像具有相同文件名且扩展名为 `.caption`(可以在设置中更改)的文件放置在用于训练图像的文件夹中。每个文件应该只有一行。编码为 `UTF-8`。 + +## 步骤 2. 决定是否使用正则化图像,并在使用时生成正则化图像 + +与class+identifier格式相同。可以在规范化图像上附加caption,但通常不需要。 + +## 步骤 2. 编写设置文件 + +创建一个文本文件并将扩展名更改为 `.toml`。例如,您可以按以下方式进行描述: + +```toml +[general] +enable_bucket = true # 是否使用Aspect Ratio Bucketing + +[[datasets]] +resolution = 512 # 训练分辨率 +batch_size = 4 # 批次大小 + + [[datasets.subsets]] + image_dir = 'C:\hoge' # 指定包含训练图像的文件夹 + caption_extension = '.caption' # 若使用txt文件,更改此项 + num_repeats = 10 # 训练图像的重复次数 + + # 以下仅在使用正则化图像时进行描述。不使用则删除 + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' # 指定包含正则化图像的文件夹 + class_tokens = 'girl' # 指定class + num_repeats = 1 # 正则化图像的重复次数,基本上1就可以了 +``` + +基本上只需更改以下几个地方来训练。除非另有说明,否则与class+identifier方法相同。 + +1. 训练分辨率 +2. 批次大小 +3. 文件夹指定 +4. caption文件的扩展名 + + 可以指定任意的扩展名。 +5. 重复次数 + +## 步骤 3. 训练 + +详情请参考相关文档进行训练。 + +# 微调方法(fine tuning) + +## 步骤 1. 准备元数据 + +将caption和标签整合到管理文件中称为元数据。它的扩展名为 `.json`,格式为json。由于创建方法较长,因此在本文档的末尾进行描述。 + +## 步骤 2. 编写设置文件 + +创建一个文本文件,将扩展名设置为 `.toml`。例如,可以按以下方式编写: +```toml +[general] +shuffle_caption = true +keep_tokens = 1 + +[[datasets]] +resolution = 512 # 图像分辨率 +batch_size = 4 # 批次大小 + + [[datasets.subsets]] + image_dir = 'C:\piyo' # 指定包含训练图像的文件夹 + metadata_file = 'C:\piyo\piyo_md.json' # 元数据文件名 +``` + +基本上只需更改以下几个地方来训练。除非另有说明,否则与DreamBooth, class+identifier方法相同。 + +1. 训练分辨率 +2. 批次大小 +3. 指定文件夹 +4. 元数据文件名 + + 指定使用后面所述方法创建的元数据文件。 + + +## 第三步:训练 + +详情请参考相关文档进行训练。 + +# 训练中使用的术语简单解释 + +由于省略了细节并且我自己也没有完全理解,因此请自行查阅详细信息。 + +## 微调(fine tuning) + +指训练模型并微调其性能。具体含义因用法而异,但在 Stable Diffusion 中,狭义的微调是指使用图像和caption进行训练模型。DreamBooth 可视为狭义微调的一种特殊方法。广义的微调包括 LoRA、Textual Inversion、Hypernetworks 等,包括训练模型的所有内容。 + +## 步骤(step) + +粗略地说,每次在训练数据上进行一次计算即为一步。具体来说,“将训练数据的caption传递给当前模型,将生成的图像与训练数据的图像进行比较,稍微更改模型,以使其更接近训练数据”即为一步。 + +## 批次大小(batch size) + +批次大小指定每个步骤要计算多少数据。批次计算可以提高速度。一般来说,批次大小越大,精度也越高。 + +“批次大小×步数”是用于训练的数据数量。因此,建议减少步数以增加批次大小。 + +(但是,例如,“批次大小为 1,步数为 1600”和“批次大小为 4,步数为 400”将不会产生相同的结果。如果使用相同的学习速率,通常后者会导致模型欠拟合。请尝试增加学习率(例如 `2e-6`),将步数设置为 500 等。) + +批次大小越大,GPU 内存消耗就越大。如果内存不足,将导致错误,或者在边缘时将导致训练速度降低。建议在任务管理器或 `nvidia-smi` 命令中检查使用的内存量进行调整。 + +注意,一个批次是指“一个数据单位”。 + +## 学习率 + + 学习率指的是每个步骤中改变的程度。如果指定一个大的值,学习速度就会加快,但是可能会出现变化太大导致模型崩溃或无法达到最佳状态的情况。如果指定一个小的值,学习速度会变慢,同时可能无法达到最佳状态。 + +在fine tuning、DreamBooth、LoRA等过程中,学习率会有很大的差异,并且也会受到训练数据、所需训练的模型、批次大小和步骤数等因素的影响。建议从通常值开始,观察训练状态并逐渐调整。 + +默认情况下,整个训练过程中学习率是固定的。但是可以通过调度程序指定学习率如何变化,因此结果也会有所不同。 + +## Epoch + +Epoch指的是训练数据被完整训练一遍(即数据已经迭代一轮)。如果指定了重复次数,则在重复后的数据迭代一轮后,为1个epoch。 + +1个epoch的步骤数通常为“数据量÷批次大小”,但如果使用Aspect Ratio Bucketing,则略微增加(由于不同bucket的数据不能在同一个批次中,因此步骤数会增加)。 + +## 长宽比分桶(Aspect Ratio Bucketing) + +Stable Diffusion 的 v1 是以 512\*512 的分辨率进行训练的,但同时也可以在其他分辨率下进行训练,例如 256\*1024 和 384\*640。这样可以减少裁剪的部分,希望更准确地学习图像和标题之间的关系。 + +此外,由于可以在任意分辨率下进行训练,因此不再需要事先统一图像数据的长宽比。 + +此值可以被设定,其在此之前的配置文件示例中已被启用(设置为 `true`)。 + +只要不超过作为参数给出的分辨率区域(= 内存使用量),就可以按 64 像素的增量(默认值,可更改)在垂直和水平方向上调整和创建训练分辨率。 + +在机器学习中,通常需要将所有输入大小统一,但实际上只要在同一批次中统一即可。 NovelAI 所说的分桶(bucketing) 指的是,预先将训练数据按照长宽比分类到每个学习分辨率下,并通过使用每个 bucket 内的图像创建批次来统一批次图像大小。 + +# 以前的指定格式(不使用 .toml 文件,而是使用命令行选项指定) + +这是一种通过命令行选项而不是指定 .toml 文件的方法。有 DreamBooth 类+标识符方法、DreamBooth caption方法、微调方法三种方式。 + +## DreamBooth、类+标识符方式 + +指定文件夹名称以指定迭代次数。还要使用 `train_data_dir` 和 `reg_data_dir` 选项。 + +### 第1步。准备用于训练的图像 + +创建一个用于存储训练图像的文件夹。__此外__,按以下名称创建目录。 + +``` +<迭代次数>_<标识符> <类别> +``` + +不要忘记下划线``_``。 + +例如,如果在名为“sls frog”的提示下重复数据 20 次,则为“20_sls frog”。如下所示: + +![image](https://user-images.githubusercontent.com/52813779/210770636-1c851377-5936-4c15-90b7-8ac8ad6c2074.png) + +### 多个类别、多个标识符的训练 + +该方法很简单,在用于训练的图像文件夹中,需要准备多个文件夹,每个文件夹都是以“重复次数_<标识符> <类别>”命名的,同样,在正则化图像文件夹中,也需要准备多个文件夹,每个文件夹都是以“重复次数_<类别>”命名的。 + +例如,如果要同时训练“sls青蛙”和“cpc兔子”,则应按以下方式准备文件夹。 + +![image](https://user-images.githubusercontent.com/52813779/210777933-a22229db-b219-4cd8-83ca-e87320fc4192.png) + +如果一个类别包含多个对象,可以只使用一个正则化图像文件夹。例如,如果在1girl类别中有角色A和角色B,则可以按照以下方式处理: + +- train_girls + - 10_sls 1girl + - 10_cpc 1girl +- reg_girls + - 1_1girl + +### step 2. 准备正规化图像 + +这是使用正则化图像时的过程。 + +创建一个文件夹来存储正则化的图像。 __此外,__ 创建一个名为``_`` 的目录。 + +例如,使用提示“frog”并且不重复数据(仅一次): +![image](https://user-images.githubusercontent.com/52813779/210770897-329758e5-3675-49f1-b345-c135f1725832.png) + + +步骤3. 执行训练 + +执行每个训练脚本。使用 `--train_data_dir` 选项指定包含训练数据文件夹的父文件夹(不是包含图像的文件夹),使用 `--reg_data_dir` 选项指定包含正则化图像的父文件夹(不是包含图像的文件夹)。 + +## DreamBooth,带文本说明(caption)的方式 + +在包含训练图像和正则化图像的文件夹中,将与图像具有相同文件名的文件.caption(可以使用选项进行更改)放置在该文件夹中,然后从该文件中加载caption所作为提示进行训练。 + +※文件夹名称(标识符类)不再用于这些图像的训练。 + +默认的caption文件扩展名为.caption。可以使用训练脚本的 `--caption_extension` 选项进行更改。 使用 `--shuffle_caption` 选项,同时对每个逗号分隔的部分进行训练时会对训练时的caption进行混洗。 + +## 微调方式 + +创建元数据的方式与使用配置文件相同。 使用 `in_json` 选项指定元数据文件。 + +# 训练过程中的样本输出 + +通过在训练中使用模型生成图像,可以检查训练进度。将以下选项指定为训练脚本。 + +- `--sample_every_n_steps` / `--sample_every_n_epochs` + + 指定要采样的步数或epoch数。为这些数字中的每一个输出样本。如果两者都指定,则 epoch 数优先。 +- `--sample_prompts` + + 指定示例输出的提示文件。 + +- `--sample_sampler` + + 指定用于采样输出的采样器。 + `'ddim', 'pndm', 'heun', 'dpmsolver', 'dpmsolver++', 'dpmsingle', 'k_lms', 'k_euler', 'k_euler_a', 'k_dpm_2', 'k_dpm_2_a'`が選べます。 + +要输出样本,您需要提前准备一个包含提示的文本文件。每行输入一个提示。 + +```txt +# prompt 1 +masterpiece, best quality, 1girl, in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + +以“#”开头的行是注释。您可以使用“`--` + 小写字母”为生成的图像指定选项,例如 `--n`。您可以使用: + +- `--n` 否定提示到下一个选项。 +- `--w` 指定生成图像的宽度。 +- `--h` 指定生成图像的高度。 +- `--d` 指定生成图像的种子。 +- `--l` 指定生成图像的 CFG 比例。 +- `--s` 指定生成过程中的步骤数。 + + +# 每个脚本通用的常用选项 + +文档更新可能跟不上脚本更新。在这种情况下,请使用 `--help` 选项检查可用选项。 +## 学习模型规范 + +- `--v2` / `--v_parameterization` + + 如果使用 Hugging Face 的 stable-diffusion-2-base 或来自它的微调模型作为学习目标模型(对于在推理时指示使用 `v2-inference.yaml` 的模型),`- 当使用-v2` 选项与 stable-diffusion-2、768-v-ema.ckpt 及其微调模型(对于在推理过程中使用 `v2-inference-v.yaml` 的模型),`- 指定两个 -v2`和 `--v_parameterization` 选项。 + + 以下几点在 Stable Diffusion 2.0 中发生了显着变化。 + + 1. 使用分词器 + 2. 使用哪个Text Encoder,使用哪个输出层(2.0使用倒数第二层) + 3. Text Encoder的输出维度(768->1024) + 4. U-Net的结构(CrossAttention的头数等) + 5. v-parameterization(采样方式好像变了) + + 其中base使用1-4,非base使用1-5(768-v)。使用 1-4 进行 v2 选择,使用 5 进行 v_parameterization 选择。 +- `--pretrained_model_name_or_path` + + 指定要从中执行额外训练的模型。您可以指定Stable Diffusion检查点文件(.ckpt 或 .safetensors)、diffusers本地磁盘上的模型目录或diffusers模型 ID(例如“stabilityai/stable-diffusion-2”)。 +## 训练设置 + +- `--output_dir` + + 指定训练后保存模型的文件夹。 + +- `--output_name` + + 指定不带扩展名的模型文件名。 + +- `--dataset_config` + + 指定描述数据集配置的 .toml 文件。 + +- `--max_train_steps` / `--max_train_epochs` + + 指定要训练的步数或epoch数。如果两者都指定,则 epoch 数优先。 +- +- `--mixed_precision` + + 训练混合精度以节省内存。指定像`--mixed_precision = "fp16"`。与无混合精度(默认)相比,精度可能较低,但训练所需的 GPU 内存明显较少。 + + (在RTX30系列以后也可以指定`bf16`,请配合您在搭建环境时做的加速设置)。 +- `--gradient_checkpointing` + + 通过逐步计算权重而不是在训练期间一次计算所有权重来减少训练所需的 GPU 内存量。关闭它不会影响准确性,但打开它允许更大的批次大小,所以那里有影响。 + + 另外,打开它通常会减慢速度,但可以增加批次大小,因此总的训练时间实际上可能会更快。 + +- `--xformers` / `--mem_eff_attn` + + 当指定 xformers 选项时,使用 xformers 的 CrossAttention。如果未安装 xformers 或发生错误(取决于环境,例如 `mixed_precision="no"`),请指定 `mem_eff_attn` 选项而不是使用 CrossAttention 的内存节省版本(xformers 比 慢)。 +- `--save_precision` + + 指定保存时的数据精度。为 save_precision 选项指定 float、fp16 或 bf16 将以该格式保存模型(在 DreamBooth 中保存 Diffusers 格式时无效,微调)。当您想缩小模型的尺寸时请使用它。 +- `--save_every_n_epochs` / `--save_state` / `--resume` + 为 save_every_n_epochs 选项指定一个数字可以在每个时期的训练期间保存模型。 + + 如果同时指定save_state选项,训练状态包括优化器的状态等都会一起保存。。保存目的地将是一个文件夹。 + + 训练状态输出到目标文件夹中名为“-??????-state”(??????是epoch数)的文件夹中。长时间训练时请使用。 + + 使用 resume 选项从保存的训练状态恢复训练。指定训练状态文件夹(其中的状态文件夹,而不是 `output_dir`)。 + + 请注意,由于 Accelerator 规范,epoch 数和全局步数不会保存,即使恢复时它们也从 1 开始。 +- `--save_model_as` (DreamBooth, fine tuning 仅有的) + + 您可以从 `ckpt, safetensors, diffusers, diffusers_safetensors` 中选择模型保存格式。 + +- `--save_model_as=safetensors` 指定喜欢当读取Stable Diffusion格式(ckpt 或safetensors)并以diffusers格式保存时,缺少的信息通过从 Hugging Face 中删除 v1.5 或 v2.1 信息来补充。 + +- `--clip_skip` + + `2` 如果指定,则使用文本编码器 (CLIP) 的倒数第二层的输出。如果省略 1 或选项,则使用最后一层。 + + *SD2.0默认使用倒数第二层,训练SD2.0时请不要指定。 + + 如果被训练的模型最初被训练为使用第二层,则 2 是一个很好的值。 + + 如果您使用的是最后一层,那么整个模型都会根据该假设进行训练。因此,如果再次使用第二层进行训练,可能需要一定数量的teacher数据和更长时间的训练才能得到想要的训练结果。 +- `--max_token_length` + + 默认值为 75。您可以通过指定“150”或“225”来扩展令牌长度来训练。使用长字幕训练时指定。 + + 但由于训练时token展开的规范与Automatic1111的web UI(除法等规范)略有不同,如非必要建议用75训练。 + + 与clip_skip一样,训练与模型训练状态不同的长度可能需要一定量的teacher数据和更长的学习时间。 + +- `--persistent_data_loader_workers` + + 在 Windows 环境中指定它可以显着减少时期之间的延迟。 + +- `--max_data_loader_n_workers` + + 指定数据加载的进程数。大量的进程会更快地加载数据并更有效地使用 GPU,但会消耗更多的主内存。默认是"`8`或者`CPU并发执行线程数 - 1`,取小者",所以如果主存没有空间或者GPU使用率大概在90%以上,就看那些数字和 `2` 或将其降低到大约 `1`。 +- `--logging_dir` / `--log_prefix` + + 保存训练日志的选项。在 logging_dir 选项中指定日志保存目标文件夹。以 TensorBoard 格式保存日志。 + + 例如,如果您指定 --logging_dir=logs,将在您的工作文件夹中创建一个日志文件夹,并将日志保存在日期/时间文件夹中。 + 此外,如果您指定 --log_prefix 选项,则指定的字符串将添加到日期和时间之前。使用“--logging_dir=logs --log_prefix=db_style1_”进行识别。 + + 要检查 TensorBoard 中的日志,请打开另一个命令提示符并在您的工作文件夹中键入: + ``` + tensorboard --logdir=logs + ``` + + 我觉得tensorboard会在环境搭建的时候安装,如果没有安装,请用`pip install tensorboard`安装。) + + 然后打开浏览器到http://localhost:6006/就可以看到了。 +- `--noise_offset` +本文的实现:https://www.crosslabs.org//blog/diffusion-with-offset-noise + + 看起来它可能会为整体更暗和更亮的图像产生更好的结果。它似乎对 LoRA 训练也有效。指定一个大约 0.1 的值似乎很好。 + +- `--debug_dataset` + + 通过添加此选项,您可以在训练之前检查将训练什么样的图像数据和标题。按 Esc 退出并返回命令行。按 `S` 进入下一步(批次),按 `E` 进入下一个epoch。 + + *图片在 Linux 环境(包括 Colab)下不显示。 + +- `--vae` + + 如果您在 vae 选项中指定Stable Diffusion检查点、VAE 检查点文件、扩散模型或 VAE(两者都可以指定本地或拥抱面模型 ID),则该 VAE 用于训练(缓存时的潜伏)或在训练过程中获得潜伏)。 + + 对于 DreamBooth 和微调,保存的模型将包含此 VAE + +- `--cache_latents` + + 在主内存中缓存 VAE 输出以减少 VRAM 使用。除 flip_aug 之外的任何增强都将不可用。此外,整体训练速度略快。 +- `--min_snr_gamma` + + 指定最小 SNR 加权策略。细节是[这里](https://github.com/kohya-ss/sd-scripts/pull/308)请参阅。论文中推荐`5`。 + +## 优化器相关 + +- `--optimizer_type` + -- 指定优化器类型。您可以指定 + - AdamW : [torch.optim.AdamW](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html) + - 与过去版本中未指定选项时相同 + - AdamW8bit : 参数同上 + - PagedAdamW8bit : 参数同上 + - 与过去版本中指定的 --use_8bit_adam 相同 + - Lion : https://github.com/lucidrains/lion-pytorch + - Lion8bit : 参数同上 + - PagedLion8bit : 参数同上 + - 与过去版本中指定的 --use_lion_optimizer 相同 + - SGDNesterov : [torch.optim.SGD](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html), nesterov=True + - SGDNesterov8bit : 参数同上 + - DAdaptation(DAdaptAdamPreprint) : https://github.com/facebookresearch/dadaptation + - DAdaptAdam : 参数同上 + - DAdaptAdaGrad : 参数同上 + - DAdaptAdan : 参数同上 + - DAdaptAdanIP : 参数同上 + - DAdaptLion : 参数同上 + - DAdaptSGD : 参数同上 + - Prodigy : https://github.com/konstmish/prodigy + - AdaFactor : [Transformers AdaFactor](https://huggingface.co/docs/transformers/main_classes/optimizer_schedules) + - 任何优化器 + +- `--learning_rate` + + 指定学习率。合适的学习率取决于训练脚本,所以请参考每个解释。 +- `--lr_scheduler` / `--lr_warmup_steps` / `--lr_scheduler_num_cycles` / `--lr_scheduler_power` + + 学习率的调度程序相关规范。 + + 使用 lr_scheduler 选项,您可以从线性、余弦、cosine_with_restarts、多项式、常数、constant_with_warmup 或任何调度程序中选择学习率调度程序。默认值是常量。 + + 使用 lr_warmup_steps,您可以指定预热调度程序的步数(逐渐改变学习率)。 + + lr_scheduler_num_cycles 是 cosine with restarts 调度器中的重启次数,lr_scheduler_power 是多项式调度器中的多项式幂。 + + 有关详细信息,请自行研究。 + + 要使用任何调度程序,请像使用任何优化器一样使用“--scheduler_args”指定可选参数。 +### 关于指定优化器 + +使用 --optimizer_args 选项指定优化器选项参数。可以以key=value的格式指定多个值。此外,您可以指定多个值,以逗号分隔。例如,要指定 AdamW 优化器的参数,``--optimizer_args weight_decay=0.01 betas=.9,.999``。 + +指定可选参数时,请检查每个优化器的规格。 +一些优化器有一个必需的参数,如果省略它会自动添加(例如 SGDNesterov 的动量)。检查控制台输出。 + +D-Adaptation 优化器自动调整学习率。学习率选项指定的值不是学习率本身,而是D-Adaptation决定的学习率的应用率,所以通常指定1.0。如果您希望 Text Encoder 的学习率是 U-Net 的一半,请指定 ``--text_encoder_lr=0.5 --unet_lr=1.0``。 +如果指定 relative_step=True,AdaFactor 优化器可以自动调整学习率(如果省略,将默认添加)。自动调整时,学习率调度器被迫使用 adafactor_scheduler。此外,指定 scale_parameter 和 warmup_init 似乎也不错。 + +自动调整的选项类似于``--optimizer_args "relative_step=True" "scale_parameter=True" "warmup_init=True"``。 + +如果您不想自动调整学习率,请添加可选参数 ``relative_step=False``。在那种情况下,似乎建议将 constant_with_warmup 用于学习率调度程序,而不要为梯度剪裁范数。所以参数就像``--optimizer_type=adafactor --optimizer_args "relative_step=False" --lr_scheduler="constant_with_warmup" --max_grad_norm=0.0``。 + +### 使用任何优化器 + +使用 ``torch.optim`` 优化器时,仅指定类名(例如 ``--optimizer_type=RMSprop``),使用其他模块的优化器时,指定“模块名.类名”。(例如``--optimizer_type=bitsandbytes.optim.lamb.LAMB``)。 + +(内部仅通过 importlib 未确认操作。如果需要,请安装包。) + + +# 创建元数据文件 + +## 准备训练数据 + +如上所述准备好你要训练的图像数据,放在任意文件夹中。 + +例如,存储这样的图像: + +![教师数据文件夹的屏幕截图](https://user-images.githubusercontent.com/52813779/208907739-8e89d5fa-6ca8-4b60-8927-f484d2a9ae04.png) + +## 自动captioning + +如果您只想训练没有标题的标签,请跳过。 + +另外,手动准备caption时,请准备在与教师数据图像相同的目录下,文件名相同,扩展名.caption等。每个文件应该是只有一行的文本文件。 +### 使用 BLIP 添加caption + +最新版本不再需要 BLIP 下载、权重下载和额外的虚拟环境。按原样工作。 + +运行 finetune 文件夹中的 make_captions.py。 + +``` +python finetune\make_captions.py --batch_size <バッチサイズ> <教師データフォルダ> +``` + +如果batch size为8,训练数据放在父文件夹train_data中,则会如下所示 +``` +python finetune\make_captions.py --batch_size 8 ..\train_data +``` + +caption文件创建在与教师数据图像相同的目录中,具有相同的文件名和扩展名.caption。 + +根据 GPU 的 VRAM 容量增加或减少 batch_size。越大越快(我认为 12GB 的 VRAM 可以多一点)。 +您可以使用 max_length 选项指定caption的最大长度。默认值为 75。如果使用 225 的令牌长度训练模型,它可能会更长。 +您可以使用 caption_extension 选项更改caption扩展名。默认为 .caption(.txt 与稍后描述的 DeepDanbooru 冲突)。 +如果有多个教师数据文件夹,则对每个文件夹执行。 + +请注意,推理是随机的,因此每次运行时结果都会发生变化。如果要修复它,请使用 --seed 选项指定一个随机数种子,例如 `--seed 42`。 + +其他的选项,请参考help with `--help`(好像没有文档说明参数的含义,得看源码)。 + +默认情况下,会生成扩展名为 .caption 的caption文件。 + +![caption生成的文件夹](https://user-images.githubusercontent.com/52813779/208908845-48a9d36c-f6ee-4dae-af71-9ab462d1459e.png) + +例如,标题如下: + +![caption和图像](https://user-images.githubusercontent.com/52813779/208908947-af936957-5d73-4339-b6c8-945a52857373.png) + +## 由 DeepDanbooru 标记 + +如果不想给danbooru标签本身打标签,请继续“标题和标签信息的预处理”。 + +标记是使用 DeepDanbooru 或 WD14Tagger 完成的。 WD14Tagger 似乎更准确。如果您想使用 WD14Tagger 进行标记,请跳至下一章。 +### 环境布置 + +将 DeepDanbooru https://github.com/KichangKim/DeepDanbooru 克隆到您的工作文件夹中,或下载并展开 zip。我解压缩了它。 +另外,从 DeepDanbooru 发布页面 https://github.com/KichangKim/DeepDanbooru/releases 上的“DeepDanbooru 预训练模型 v3-20211112-sgd-e28”的资产下载 deepdanbooru-v3-20211112-sgd-e28.zip 并解压到 DeepDanbooru 文件夹。 + +从下面下载。单击以打开资产并从那里下载。 + +![DeepDanbooru下载页面](https://user-images.githubusercontent.com/52813779/208909417-10e597df-7085-41ee-bd06-3e856a1339df.png) + +做一个这样的目录结构 + +![DeepDanbooru的目录结构](https://user-images.githubusercontent.com/52813779/208909486-38935d8b-8dc6-43f1-84d3-fef99bc471aa.png) +为diffusers环境安装必要的库。进入 DeepDanbooru 文件夹并安装它(我认为它实际上只是添加了 tensorflow-io)。 +``` +pip install -r requirements.txt +``` + +接下来,安装 DeepDanbooru 本身。 + +``` +pip install . +``` + +这样就完成了标注环境的准备工作。 + +### 实施标记 +转到 DeepDanbooru 的文件夹并运行 deepdanbooru 进行标记。 +``` +deepdanbooru evaluate <教师资料夹> --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +如果将训练数据放在父文件夹train_data中,则如下所示。 +``` +deepdanbooru evaluate ../train_data --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +在与教师数据图像相同的目录中创建具有相同文件名和扩展名.txt 的标记文件。它很慢,因为它是一个接一个地处理的。 + +如果有多个教师数据文件夹,则对每个文件夹执行。 + +它生成如下。 + +![DeepDanbooru生成的文件](https://user-images.githubusercontent.com/52813779/208909855-d21b9c98-f2d3-4283-8238-5b0e5aad6691.png) + +它会被这样标记(信息量很大...)。 + +![DeepDanbooru标签和图片](https://user-images.githubusercontent.com/52813779/208909908-a7920174-266e-48d5-aaef-940aba709519.png) + +## WD14Tagger标记为 + +此过程使用 WD14Tagger 而不是 DeepDanbooru。 + +使用 Mr. Automatic1111 的 WebUI 中使用的标记器。我参考了这个 github 页面上的信息 (https://github.com/toriato/stable-diffusion-webui-wd14-tagger#mrsmilingwolfs-model-aka-waifu-diffusion-14-tagger)。 + +初始环境维护所需的模块已经安装。权重自动从 Hugging Face 下载。 +### 实施标记 + +运行脚本以进行标记。 +``` +python tag_images_by_wd14_tagger.py --batch_size <バッチサイズ> <教師データフォルダ> +``` + +如果将训练数据放在父文件夹train_data中,则如下所示 +``` +python tag_images_by_wd14_tagger.py --batch_size 4 ..\train_data +``` + +模型文件将在首次启动时自动下载到 wd14_tagger_model 文件夹(文件夹可以在选项中更改)。它将如下所示。 +![下载文件](https://user-images.githubusercontent.com/52813779/208910447-f7eb0582-90d6-49d3-a666-2b508c7d1842.png) + +在与教师数据图像相同的目录中创建具有相同文件名和扩展名.txt 的标记文件。 +![生成的标签文件](https://user-images.githubusercontent.com/52813779/208910534-ea514373-1185-4b7d-9ae3-61eb50bc294e.png) + +![标签和图片](https://user-images.githubusercontent.com/52813779/208910599-29070c15-7639-474f-b3e4-06bd5a3df29e.png) + +使用 thresh 选项,您可以指定确定的标签的置信度数以附加标签。默认值为 0.35,与 WD14Tagger 示例相同。较低的值给出更多的标签,但准确性较低。 + +根据 GPU 的 VRAM 容量增加或减少 batch_size。越大越快(我认为 12GB 的 VRAM 可以多一点)。您可以使用 caption_extension 选项更改标记文件扩展名。默认为 .txt。 + +您可以使用 model_dir 选项指定保存模型的文件夹。 + +此外,如果指定 force_download 选项,即使有保存目标文件夹,也会重新下载模型。 + +如果有多个教师数据文件夹,则对每个文件夹执行。 + +## 预处理caption和标签信息 + +将caption和标签作为元数据合并到一个文件中,以便从脚本中轻松处理。 +### caption预处理 + +要将caption放入元数据,请在您的工作文件夹中运行以下命令(如果您不使用caption进行训练,则不需要运行它)(它实际上是一行,依此类推)。指定 `--full_path` 选项以将图像文件的完整路径存储在元数据中。如果省略此选项,则会记录相对路径,但 .toml 文件中需要单独的文件夹规范。 +``` +python merge_captions_to_metadata.py --full_path <教师资料夹> +  --in_json <要读取的元数据文件名> <元数据文件名> +``` + +元数据文件名是任意名称。 +如果训练数据为train_data,没有读取元数据文件,元数据文件为meta_cap.json,则会如下。 +``` +python merge_captions_to_metadata.py --full_path train_data meta_cap.json +``` + +您可以使用 caption_extension 选项指定标题扩展。 + +如果有多个教师数据文件夹,请指定 full_path 参数并为每个文件夹执行。 +``` +python merge_captions_to_metadata.py --full_path + train_data1 meta_cap1.json +python merge_captions_to_metadata.py --full_path --in_json meta_cap1.json + train_data2 meta_cap2.json +``` +如果省略in_json,如果有写入目标元数据文件,将从那里读取并覆盖。 + +__* 每次重写 in_json 选项和写入目标并写入单独的元数据文件是安全的。 __ +### 标签预处理 + +同样,标签也收集在元数据中(如果标签不用于训练,则无需这样做)。 +``` +python merge_dd_tags_to_metadata.py --full_path <教师资料夹> + --in_json <要读取的元数据文件名> <要写入的元数据文件名> +``` + +同样的目录结构,读取meta_cap.json和写入meta_cap_dd.json时,会是这样的。 +``` +python merge_dd_tags_to_metadata.py --full_path train_data --in_json meta_cap.json meta_cap_dd.json +``` + +如果有多个教师数据文件夹,请指定 full_path 参数并为每个文件夹执行。 + +``` +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap2.json + train_data1 meta_cap_dd1.json +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap_dd1.json + train_data2 meta_cap_dd2.json +``` + +如果省略in_json,如果有写入目标元数据文件,将从那里读取并覆盖。 +__※ 通过每次重写 in_json 选项和写入目标,写入单独的元数据文件是安全的。 __ +### 标题和标签清理 + +到目前为止,标题和DeepDanbooru标签已经被整理到元数据文件中。然而,自动标题生成的标题存在表达差异等微妙问题(※),而标签中可能包含下划线和评级(DeepDanbooru的情况下)。因此,最好使用编辑器的替换功能清理标题和标签。 + +※例如,如果要学习动漫中的女孩,标题可能会包含girl/girls/woman/women等不同的表达方式。另外,将"anime girl"简单地替换为"girl"可能更合适。 + +我们提供了用于清理的脚本,请根据情况编辑脚本并使用它。 + +(不需要指定教师数据文件夹。将清理元数据中的所有数据。) + +``` +python clean_captions_and_tags.py <要读取的元数据文件名> <要写入的元数据文件名> +``` + +--in_json 请注意,不包括在内。例如: + +``` +python clean_captions_and_tags.py meta_cap_dd.json meta_clean.json +``` + +标题和标签的预处理现已完成。 + +## 预先获取 latents + +※ 这一步骤并非必须。即使省略此步骤,也可以在训练过程中获取 latents。但是,如果在训练时执行 `random_crop` 或 `color_aug` 等操作,则无法预先获取 latents(因为每次图像都会改变)。如果不进行预先获取,则可以使用到目前为止的元数据进行训练。 + +提前获取图像的潜在表达并保存到磁盘上。这样可以加速训练过程。同时进行 bucketing(根据宽高比对训练数据进行分类)。 + +请在工作文件夹中输入以下内容。 + +``` +python prepare_buckets_latents.py --full_path <教师资料夹> + <要读取的元数据文件名> <要写入的元数据文件名> + <要微调的模型名称或检查点> + --batch_size <批次大小> + --max_resolution <分辨率宽、高> + --mixed_precision <准确性> +``` + +如果要从meta_clean.json中读取元数据,并将其写入meta_lat.json,使用模型model.ckpt,批处理大小为4,训练分辨率为512*512,精度为no(float32),则应如下所示。 +``` +python prepare_buckets_latents.py --full_path + train_data meta_clean.json meta_lat.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no +``` + +教师数据文件夹中,latents以numpy的npz格式保存。 + +您可以使用--min_bucket_reso选项指定最小分辨率大小,--max_bucket_reso指定最大大小。默认值分别为256和1024。例如,如果指定最小大小为384,则将不再使用分辨率为256 * 1024或320 * 768等。如果将分辨率增加到768 * 768等较大的值,则最好将最大大小指定为1280等。 + +如果指定--flip_aug选项,则进行左右翻转的数据增强。虽然这可以使数据量伪造一倍,但如果数据不是左右对称的(例如角色外观、发型等),则可能会导致训练不成功。 + +对于翻转的图像,也会获取latents,并保存名为\ *_flip.npz的文件,这是一个简单的实现。在fline_tune.py中不需要特定的选项。如果有带有\_flip的文件,则会随机加载带有和不带有flip的文件。 + +即使VRAM为12GB,批次大小也可以稍微增加。分辨率以“宽度,高度”的形式指定,必须是64的倍数。分辨率直接影响fine tuning时的内存大小。在12GB VRAM中,512,512似乎是极限(*)。如果有16GB,则可以将其提高到512,704或512,768。即使分辨率为256,256等,VRAM 8GB也很难承受(因为参数、优化器等与分辨率无关,需要一定的内存)。 + +*有报道称,在batch size为1的训练中,使用12GB VRAM和640,640的分辨率。 + +以下是bucketing结果的显示方式。 + +![bucketing的結果](https://user-images.githubusercontent.com/52813779/208911419-71c00fbb-2ce6-49d5-89b5-b78d7715e441.png) + +如果有多个教师数据文件夹,请指定 full_path 参数并为每个文件夹执行 + +``` +python prepare_buckets_latents.py --full_path + train_data1 meta_clean.json meta_lat1.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +python prepare_buckets_latents.py --full_path + train_data2 meta_lat1.json meta_lat2.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +``` +可以将读取源和写入目标设为相同,但分开设定更为安全。 + +__※建议每次更改参数并将其写入另一个元数据文件,以确保安全性。__ diff --git a/docs/train_README.md b/docs/train_README.md new file mode 100644 index 0000000000000000000000000000000000000000..1ff480b758680e3634efbf7707a98c27f094c4c5 --- /dev/null +++ b/docs/train_README.md @@ -0,0 +1,1007 @@ +> **Note:** This document is under revision, and some errors may persist. Please refer to the latest version for accurate information. + +# Training, Common Section + +This document facilitates training with models such as DreamBooth and LoRA, including [XTI:P+](https://github.com/kohya-ss/sd-scripts/pull/327). It details how to prepare training data and outlines common options. + +# Overview + +For environment setup, please refer to the README of this repository. The document covers the following topics: + +1. Preparation of training data (new format using configuration files) +2. Simplified explanation of terms used in training +3. Previous specification format (specify without using configuration files) +4. Sample image generation during training +5. Common options used in each script +6. Preparation of metadata for fine tuning: captioning, etc. + +Start training by following the initial steps; further details are available in each script's documentation. + +# Training Data Preparation + +Prepare your training data in any designated folder(s). Supported formats include `.png`, `.jpg`, `.jpeg`, `.webp`, `.bmp`. Generally, preprocessing such as resizing is not required. + +However, using images larger than the training resolution or upscaling them with super-resolution AI beforehand is recommended. Additionally, it's advisable to resize overly large images (around 3000x3000 pixels?) to prevent errors. + +When organizing your image data for training, you can specify the training data in various ways depending on the number of data sets, the learning targets, captions (image descriptions), etc. The methods available (specific to this repository) are described below. The usage of regularization images will be discussed later. + +1. **DreamBooth, Class+Identifier Method (regularization images available):** + + Learn from images associated with a specific identifier without needing captions. This method is convenient for learning a specific character but associates the character's hairstyle, clothing, and background with the identifier, potentially causing the model to generate images without changing the clothing when generating the prompt. + +2. **DreamBooth, Captioning Method (regularization images available):** + + Prepare a text file with captions for each image. If you aim to learn a specific character, detailed descriptions in the captions can help differentiate the character from other elements, enabling the model to learn the character more precisely. + +3. **Fine Tuning Method (regularization images not available):** + + Pre-prepare captions in a metadata file. Tags and captions can be managed separately, and caching latents can speed up learning. This method is broadly referred to as fine tuning but has various applications. + +The following table illustrates the combinations of targets to be learned or methods of use: + +| Target or Method | Script | DB / Class+Identifier | DB / Caption | Fine Tuning | +|----------------------------------|-----------------------------|-----------------------|--------------|-------------| +| Model Fine Tuning | `fine_tune.py` | x | x | o | +| Model DreamBooth | `train_db.py` | o | o | x | +| LoRA | `train_network.py` | o | o | o | +| Textual Inversion | `train_textual_inversion.py`| o | o | o | + +## Method Selection Guide + +For those considering LoRA or Textual Inversion methods and who prefer not to prepare a caption file, the DreamBooth class+identifier method is recommended. However, if preparing a caption file is feasible, the DreamBooth captioning method may be more effective. Additionally, if you possess a substantial volume of training data and wish to incorporate regularization images, the fine tuning method should also be considered. + +Note: The fine tuning method is not applicable for DreamBooth alone. + +## Configuration Guidelines + +Below are the general guidelines for configuring each method. For detailed configuration instructions, please refer to [Dataset Settings](./config_README-ja.md). + +# DreamBooth, Class+Identifier Method (Regularization Images Available) + +This method involves training each image with a caption formatted as `class identifier` (e.g., `shs dog`). + +## Step 1. Determine Identifier and Class + +Choose a unique identifier for recognizing and learning the target, and determine the class the target belongs to. + +Here is a simple explanation (for further details, please conduct your own research): + +- **Class**: This refers to the broad category of the learning target. For instance, to learn about a specific dog breed, the class would be "dog." For anime characters, appropriate classes might be "boy" or "girl," "1boy," "1girl," etc., based on the model. + +- **Identifier**: This is a specific term used to identify and learn about the target. Any term can be used, but following the original paper's recommendations, a "rare word that can be tokenized into a single token with a maximum length of three characters" is suggested. + +Using both an identifier and a class (e.g., `shs dog`) allows for precise target recognition and learning. + +When generating images post-training, images of the learned dog breed can be produced by specifying `shs dog`. + +(Reference: Recent identifiers I've used include `shs`, `sts`, `scs`, `cpc`, `coc`, `cic`, `msm`, `usu`, `ici`, `lvl`, `cic`, `dii`, `muk`, `ori`, `hru`, `rik`, `koo`, `yos`, `wny`. Ideally, select identifiers not listed in Danbooru Tags.) + +## Step 2. Decide Whether to Use Regularization Images and Generate Them if Necessary + +Regularization images help prevent the model from biasing too heavily towards a single learning target, a phenomenon known as "language drift". For instance, training a specific character with the prompt `shs 1girl` without regularization images might cause the model to generate images that are too similar to the generic `1girl` class, even if `1girl` appears in the training caption. + +Using regularization images alongside the target image ensures that the class remains broadly defined, while the identifier is specifically learned only when added to the prompt. + +If the objective is to have a specific character appear distinctively, regularization images are essential. + +Textual Inversion is not recommended here, as it requires the token string to be included in the caption to learn anything effectively. + +Typically, regularization images are generated using only the class name (e.g., `1girl`) from the learning target model. If the quality of these generated images is subpar, consider adjusting the prompt or using downloaded images from the internet. + +Note: The quality of regularization images can influence the model, so choose carefully. + +It is generally advised to prepare around 100 images to ensure enough diversity to generalize the class images and effectively learn their characteristics. + +For generated images, it's best to match the size of the training resolution, or more specifically, the resolution of the bucket (to be discussed later). + +## Step 2. Write the Configuration File + +Create a text file and change the extension to `.toml`. Example configuration: + +(Note: Lines starting with `#` are comments and can be copied as-is or omitted if preferred.) + +```toml +[general] +enable_bucket = true # Whether to use Aspect Ratio Bucketing + +[[datasets]] +resolution = 512 # Training resolution +batch_size = 4 # Batch size + + [[datasets.subsets]] + image_dir = 'C:\\hoge' # Specify the folder for learning target images + class_tokens = 'hoge girl' # Specify the identifier class + num_repeats = 10 # Number of repetitions for the learning target image + + # Include the following only if using regularization images + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\\reg' # Specify the folder for regularization images + class_tokens = 'girl' # Specify the class for regularization + num_repeats = 1 # Number of repetitions for regularization images, typically 1 is sufficient +``` + +# Training Configuration Details + +When setting up your training environment, consider the following parameters to optimize your model's learning effectiveness. + +## 1. Training Resolution + +Specify the resolution at which the training images should be processed. Use a single number for square images (e.g., `512` produces a 512x512 image). For rectangular images, provide two numbers separated by a comma (e.g., `[512,768]` for 512x768). Standard resolutions for SD1.x models are typically 512 pixels, whereas SD2.x models are often trained at 768 pixels. + +## 2. Batch Size + +This parameter defines the number of data instances processed simultaneously during training. It should be adjusted based on the GPU VRAM capacity and the desired training resolution. Further guidance is provided in later sections. + +## 3. Folder Specification + +Identify the directory containing the target images for learning and, if applicable, regularization images. This should be the direct path to the folder holding the actual image files. + +## 4. Identifier and Class + +Specify these as detailed in the previous guidelines. + +## 5. Number of Repetitions + +For a comprehensive understanding of how many times the training images should be repeated, see the detailed explanation below. + +### About Repetitions + +The number of repetitions helps synchronize the count of regularization images with the number of target images for learning. Typically, there are more regularization images than target images, so the target images are repeated to maintain a 1:1 ratio. + +Configure the number of repetitions as follows: "__Number of target image repetitions * Number of target images ≥ Number of regularization image repetitions * Number of regularization images__". + +(The total data count for one epoch—when the data cycles through once—is "Number of target image repetitions * Number of target images". If there are more regularization images than this total, the surplus will not be utilized.) + +## Step 3. Training Execution + +Consult the specific documentation related to your training setup for detailed instructions. + +# DreamBooth, Captioning Method (Regularization Images Available) + +In this approach, each image is learned with an associated caption, facilitating more nuanced understanding and generation of visual content. + +## step 1. Prepare the caption file + +Place a file with the same name as the image in the folder where the learning image is placed, with the extension `.caption` (which can be changed in the settings). Each file should contain only one line. The encoding is `UTF-8`. + +## step 2. Decide whether to use regularization images and generate them if necessary + +Same as class+identifier. You can also add captions to regularization images, but it is usually not necessary. + +## step 2. Write the configuration file + +Create a text file and change the extension to `.toml`. For example, write as follows. + +```toml +[general] +enable_bucket = true # Whether to use Aspect Ratio Bucketing + +[[datasets]] +resolution = 512 # Training resolution +batch_size = 4 # Batch size + + [[datasets.subsets]] + image_dir = 'C:\hoge' # Specify the folder where the learning target images are placed + caption_extension = '.caption' # Specify the extension of the caption file. If you want to use .txt, change it + num_repeats = 10 # Number of times to repeat the learning target image + + # Write the following only if you are using regularization images + [[datasets.subsets]] + is_reg = true + image_dir = 'C:\reg' # Specify the folder where the regularization images are placed + class_tokens = 'girl' # Specify the class + num_repeats = 1 # Number of times to repeat the regularization image, usually 1 is enough +``` + +Basically, you can change the following only. + +1. Training resolution +1. Batch size +1. Folder specification +1. Caption file extension + + You can specify any extension. +1. Number of repetitions + +## step 3. Training + +Please refer to each document for training. + +## step 3. Training + +Please refer to each document for training. + +# fine tuning method + +## step 1. Prepare the metadata + +The file that collects captions and tags is called metadata. It is in json format with the extension `.json`. The method of creating it is too long to write here, so it is written at the end of this document. + +## step 2. Write the configuration file + +Create a text file and change the extension to `.toml`. For example, write as follows. + +```toml +[general] +shuffle_caption = true +keep_tokens = 1 + +[[datasets]] +resolution = 512 # Training resolution +batch_size = 4 # Batch size + + [[datasets.subsets]] + image_dir = 'C:\piyo' # Specify the folder where the learning target images are placed + metadata_file = 'C:\piyo\piyo_md.json' # Metadata file name +``` + +Basically, you can change the following only. Placeholders are the same as DreamBooth, class+identifier. + +1. Training resolution +1. Batch size +1. Folder specification +1. Metadata file name + + Specify the metadata file created according to the method above. + +## step 3. Training + +Please refer to each document for training. + +# Explanation of terms used in training + +I've omitted the details and I'm not fully understanding it myself, so please do your own research. + +## fine tuning + +This refers to training the model and fine-tuning it. The meaning varies depending on how it's used, but in the context of Stable Diffusion, it means training the model with images and captions. DreamBooth is one of the special ways of fine tuning. Broadly speaking, fine tuning includes training the model with LoRA, Textual Inversion, Hypernetworks, etc. + +## Step + +In a nutshell, 1 step is calculated with 1 piece of learning data. "Feed the caption of the learning data to the current model and see what image comes out. Compare the image with the learning data's image and make the model slightly change to get closer to the learning data." is 1 step. + +## Batch size + +The batch size is a value that specifies how many pieces of data to calculate together in 1 step. By calculating together, the speed is relatively improved. Also, it is generally said that the accuracy is also improved. + +`Batch size × number of steps` is the number of data used for training. Therefore, if you increase the batch size, you should also reduce the number of steps. + +(However, for example, "batch size 1 with 1600 steps" and "batch size 4 with 400 steps" do not necessarily result in the same outcome. If the same learning rate is used, it is generally the latter that is underfed. Try to increase the learning rate (for example, `2e-6`) or reduce the number of steps (for example, 500 steps) to compensate for this.) + +Increasing the batch size will consume more GPU memory. If the memory is insufficient, an error will occur. If the error does not occur, the learning speed will be slowed down. You can use the task manager or `nvidia-smi` command to check the memory usage and adjust accordingly. + +Also, note that a batch is a unit of "a batch of data". + +## Learning Rate + +In a nutshell, it refers to how much to change each step. If you specify a large value, it will progressively learn faster, but if it changes too much, the model may be damaged or may not reach the optimal state. If you specify a small value, the learning speed will be slower, and it may not reach the optimal state. + +The learning rate differs greatly depending on the method used, including fine tuning, DreamBooth, and LoRA, and also depends on the learning data, the model to be trained, the batch size, and the number of steps. It is recommended to start with a general value and adjust it according to the learning state. + +By default, the learning rate is fixed throughout the entire learning. The scheduler determines how the learning rate changes, so the results can vary depending on the scheduler. + +## Epoch + +When the learning data is learned once (the data is cycled once), it is considered 1 epoch. If the number of repetitions is specified, it is considered 1 epoch after the repetition. + +The number of steps per epoch is generally `number of dataÅbatch size`, but it increases slightly with Aspect Ratio Bucketing (since different buckets of data cannot be in the same batch, the number of steps increases). + +## Aspect Ratio Bucketing + +Stable Diffusion v1 is trained at 512*512, but it also learns at 256*1024 and 384*640, etc., which reduces the amount of trimming and expects to learn more accurately about the relationship between captions and images. + +This allows learning at any resolution, so you don't need to pre-uniform the aspect ratio of the image data. + +It can be enabled or disabled by the settings, and in the examples of this document, it is enabled (`true` is set). + +The learning resolution is created by adjusting the resolution in the parameters to be less than or equal to the area of the memory used by the parameters (default, changeable), and is created in multiples of 64 pixels. + +In machine learning, it is common to standardize the input size, but there is no specific constraint, and in fact, as long as the same batch is uniformly sized, it is okay. NovelAI's bucketing seems to refer to classifying the teacher data according to the learning resolution according to the aspect ratio, and then creating a batch with each bucket's images to standardize the image size of the batch. + +# Previous Specification Method (Without Using the Configuration File and Specifying Through the Command Line) + +The method of specifying without using the `.toml` file and specifying through the command line option. There are three methods: DreamBooth class+identifier, DreamBooth captioning, and fine tuning. + +## DreamBooth, class+identifier Method + +You specify the number of repetitions by the folder name, and also use the `train_data_dir` and `reg_data_dir` options. + +### step 1. Prepare the Learning Image + +Create a folder to store the learning images. __Inside the folder, create a directory with the following name.__ + +``` +_ +``` + +Don't forget the `_` in between. + +For example, if you specify the prompt "sls frog" and repeat the data 20 times, it will be named "20_sls frog". It will look like this: + +![image](https://user-images.githubusercontent.com/52813779/210770636-1c851377-5936-4c15-90b7-8ac8ad6c2074.png) + +### Training with multiple classes and multiple targets (identifiers) + +The method is simple. Prepare a folder for learning images, and create a directory with the name ``_ `` inside the folder. + +For example, if you want to learn both "sls frog" and "cpc rabbit" at the same time, it will look like this: + +![image](https://user-images.githubusercontent.com/52813779/210777933-a22229db-b219-4cd8-83ca-e87320fc4192.png) + +If there is only one class but multiple targets, you only need one regularization image folder. For example, if there are characters A and B in 1girl, it will look like this: + +- train_girls + - 10_sls 1girl + - 10_cpc 1girl +- reg_girls + - 1_1girl + +### step 2. Prepare regularization images + +The steps to use regularization images. + +Create a folder to store the regularization images. __Inside the folder, create a directory with the name__ ``_``. + +For example, if you specify the prompt "frog" and do not repeat the data (1 time only), it will look like this: + +![image](https://user-images.githubusercontent.com/52813779/210770897-329758e5-3675-49f1-b345-c135f1725832.png) + +### step 3. Execute training + +Execute each training script. Specify the folder for learning data with the `--train_data_dir` option (__not the folder containing the images, but the parent folder__), and specify the regularization image folder with the `--reg_data_dir` option (__not the folder containing the images, but the parent folder__). + +## DreamBooth, captioning method + +If you put a file with the same name as the image but with the extension .caption (optionally changeable) in the learning image and regularization image folders, it will learn the caption from the file as the prompt. + +※The folder name (identifier class) will no longer be used for the learning of those images. + +The default extension for the caption file is .caption. You can change it with the `--caption_extension` option in the training script. The `--shuffle_caption` option shuffles each part of the caption when learning, separated by commas. + +## fine tuning method + +The preparation of metadata is the same as when using a configuration file. Specify the metadata file with the `in_json` option. + +# Sample output during training + +You can check the progress of learning by generating images with the model being trained. Specify the following options in the training script. + +- `--sample_every_n_steps` / `--sample_every_n_epochs` + + Specify the number of steps or epochs to output a sample. Output a sample every time this number is reached. The epoch number takes precedence if both are specified. + +- `--sample_at_first` + + Output a sample before learning starts. You can compare before and after learning. + +- `--sample_prompts` + + Specify the file containing the prompts for sample output. + +- `--sample_sampler` + + Specify the sampler to use for sample output. + `'ddim', 'pndm', 'heun', 'dpmsolver', 'dpmsolver++', 'dpmsingle', 'k_lms', 'k_euler', 'k_euler_a', 'k_dpm_2', 'k_dpm_2_a'` can be selected. + +You need to prepare a text file with the prompts for sample output in advance. Write one prompt per line. + +For example: + +```txt +# prompt 1 +masterpiece, best quality, 1girl, in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28 + +# prompt 2 +masterpiece, best quality, 1boy, in business suit, standing at street, looking back --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 576 --h 832 --d 2 --l 5.5 --s 40 +``` + +Lines starting with `#` are comments. You can specify options for the generated image with `--` followed by a lowercase English letter. The following options are available: + +- `--n` The following options are treated as negative prompts. +- `--w` Specify the width of the generated image. +- `--h` Specify the height of the generated image. +- `--d` Specify the seed for the generated image. +- `--l` Specify the CFG scale for the generated image. +- `--s` Specify the number of steps for the generation. + +# Common options used by each script + +If the script has been updated but the documentation has not been updated, please check the available options using the `--help` option. + +## Specifying the model to be used for training + +- `--v2` / `--v_parameterization` + + If you want to use Hugging Face's stable-diffusion-2-base or a fine-tuned model based on it (models that instruct you to use `v2-inference.yaml`), specify the `--v2` option. If you want to use stable-diffusion-2, 768-v-ema.ckpt, or any fine-tuned models that instruct you to use `v2-inference-v.yaml`, specify both the `--v2` and `--v_parameterization` options. + + In Stable Diffusion 2.0, the following points have changed significantly: + + 1. The Tokenizer used + 2. The Text Encoder used and the output layer used (Stable Diffusion 2.0 uses the second-to-last layer) + 3. The output dimension of the Text Encoder (768->1024) + 4. The structure of the U-Net (the number of heads in CrossAttention, etc.) + 5. v-parameterization (the sampling method has changed, according to what I've seen) + + Of these, 1-4 are used in the base, and 1-5 are used in the non-base (768-v). The `--v2` option enables 1-4, and the `--v_parameterization` option enables 5. + +- `--pretrained_model_name_or_path` + + Specify the model to be used as the starting point for additional training. You can specify a Stable Diffusion checkpoint file (.ckpt or .safetensors), a local directory of a Diffusers model, or a Diffusers model ID ("stabilityai/stable-diffusion-2", etc.). + +## Training settings + +- `--output_dir` + + Specify the folder to save the model after training. + +- `--output_name` + + Specify the model file name without the extension. + +- `--dataset_config` + + Specify the `.toml` file that describes the dataset configuration. + +- `--max_train_steps` / `--max_train_epochs` + + Specify the number of steps or epochs to train. If both are specified, the number of epochs is prioritized. + +- `--mixed_precision` + + Train with mixed precision (mixed precision) to save memory. Specify as `--mixed_precision="fp16"`. The accuracy may be lower than without mixed precision, but the amount of GPU memory required for training is reduced. + + (RTX30 series and later can also specify `bf16`. Please match the settings you made when setting up the environment with accelerate). + +- `--gradient_checkpointing` + + Calculate the weights in a piecemeal manner instead of all at once to reduce the amount of GPU memory required for training. It does not affect the accuracy, but it can make the batch size larger, so it may affect the overall training time. + + In addition, it is generally slower than on, but it can make the batch size larger, so the total training time may be faster. + +- `--xformers` / `--mem_eff_attn` + + If you specify the xformers option, xformers' CrossAttention is used. If you do not have xformers installed or encounter an error (depending on the environment, such as `mixed_precision="no"`), you can specify the `mem_eff_attn` option to use the memory-efficient version of CrossAttention (which is slower than xformers). + +- `--clip_skip` + + Specify `2` to use the second-to-last layer's output after the Text Encoder (CLIP). Specify `1` or omit the option to use the last layer. + + ※SD2.0 is set to use the second-to-last layer by default, so do not specify it when training SD2.0. + + If the model was trained to use the second layer, specify `2`. + + If the model was trained to use the last layer, the model is trained assuming the last layer. Therefore, re-training with the second layer may require a certain number of teacher data, and a long training time. + +- `--max_token_length` + + The default is `75`. Specify `150` or `225` to extend the token length for training. Specify it when training with long captions. + + However, the token extension specification during training is slightly different from the Web UI of Automatic1111 (such as the specification of splitting), so it is recommended to learn with `75`. + + Like clip_skip, to learn with a different length than the model's learning state, you may need a certain number of teacher data and a long training time. + +- `--weighted_captions` + + Specify to enable weighted captions similar to Automatic1111's Web UI. Can be used for training except for "Textual Inversion and XTI". Can also be used for DreamBooth token strings. + + The notation for weighted captions is almost the same as the Web UI, and (abc), [abc], (abc:1.23), etc. can be used. Nesting is also possible. If you include a comma within the parentheses, the shuffle/dropout of the prompt will be incorrect, so do not include a comma within the parentheses. + +- `--persistent_data_loader_workers` + + If you specify it on a Windows environment, the waiting time between epochs is significantly shortened. + +- `--max_data_loader_n_workers` + + Specify the number of processes for data loading. The more processes, the faster the data loading and the GPU is used more efficiently, but it consumes main memory. The default is "`8` or `CPU concurrent thread number - 1`, whichever is smaller", so if there is no spare main memory or the GPU usage is about 90%, please reduce the number of processes to `2` or `1`. + +- `--logging_dir` / `--log_prefix` + + Options for saving training logs. Specify the folder to save the log with the logging_dir option. TensorBoard-formatted logs are saved. + + For example, specify `--logging_dir=logs`, and a logs folder will be created in the working directory, and the log will be saved in the date-named folder within it. + Also, if you specify the `--log_prefix` option, the specified string will be added before the date. Use it to identify, such as `--logging_dir=logs --log_prefix=db_style1_`. + + To check the log with TensorBoard, open another command prompt, and enter the following in the working directory. + + ``` + tensorboard --logdir=logs + ``` + + (I think tensorboard is installed together with the environment setup, but if it is not installed, please install it with `pip install tensorboard`.) + + After that, open the browser and access to display it. + +- `--log_with` / `--log_tracker_name` + + Options for saving training logs. You can save logs to `tensorboard` and `wandb`. For details, please refer to [PR#428](https://github.com/kohya-ss/sd-scripts/pull/428). + +- `--noise_offset` + + This implementation is based on the following article: + + It seems that overall, the results of generating dark and bright images are improved. It may be effective in LoRA training. It is recommended to specify a value of `0.1` or less. + +- `--adaptive_noise_scale` (Experimental option) + + This option automatically adjusts the noise offset value according to the absolute value of the mean of the latents. It is enabled when `--noise_offset` is specified. The noise offset value is calculated as `noise_offset + abs(mean(latents, dim=(2,3))) * adaptive_noise_scale`. Latents are assumed to be close to a normal distribution, so it is recommended to specify a value of `noise_offset` of 1/10~ the same or less. + + Negative values can also be specified, in which case the noise offset is clipped to be 0 or above. + +- `--multires_noise_iterations` / `--multires_noise_discount` + + Settings for multi-resolution noise (pyramid noise). For details, please refer to [PR#471](https://github.com/kohya-ss/sd-scripts/pull/471) and the following page [Multi-Resolution Noise for Diffusion Model Training](https://wandb.ai/johnowhitaker/multires_noise/reports/Multi-Resolution-Noise-for-Diffusion-Model-Training--VmlldzozNjYyOTU2). + + Specify a number value to enable. A value of 6~10 is recommended. Specify a value of `--multires_noise_discount` of 0.1~0.3 (recommended by the author of PR#471 for datasets with a smaller number of samples) or 0.8 (recommended by the author of the original article) (default is 0.3). + +- `--debug_dataset` + + With this option, you can check which images and captions are used for training before starting the training. Press `Esc` to exit and return to the command line. Press `S` to go to the next step (batch) and `E` to go to the next epoch. + + ※Images are not displayed in Linux environments (including Colab). + +- `--vae` + + Specify either a Stable Diffusion checkpoint, a VAE checkpoint file, a Diffusers model, or a VAE (both local or Hugging Face model IDs) to use that VAE for training (when caching latents or retrieving latents during training). + + In DreamBooth and fine tuning, the saved model will include this VAE. + +- `--cache_latents` / `--cache_latents_to_disk` + + Cache the output of the VAE to the main memory to reduce VRAM usage. Augmentations other than `flip_aug` are disabled. The overall training speed is also slightly faster. + + Specify `--cache_latents_to_disk` to save the cache to disk. The cache will be effective even if the script is exited and restarted. + +- `--min_snr_gamma` + + Specify the Min-SNR Weighting strategy. For details, please refer to [this link](https://github.com/kohya-ss/sd-scripts/pull/308). The paper recommends `5`. + +## Model Saving Settings + +- `--save_precision` + + Specify the data precision to save. If you specify float, fp16, or bf16, the model will be saved in that format (DreamBooth and fine tuning will not be saved in Diffusers format). This is useful for reducing model size, for example. + +- `--save_every_n_epochs` / `--save_state` / `--resume` + + If you specify a number for save_every_n_epochs, the model will be saved every n epochs during training. + + If you specify save_state at the same time, the optimizer, etc. will also be saved together with the training state (you can resume training from the saved state). This is expected to improve accuracy and shorten training time. The save destination is a folder. + + The training state is output to a folder named `-??????-state` (?????? is the epoch number) in the save destination folder. Use it for long-term training. + + To resume training from a saved state, specify resume and the state folder (not `output_dir` but the state folder inside it). + + Note that due to the specifications of Accelerator, the epoch number, global step are not saved and will be reset to 1 when resumed. + +- `--save_every_n_steps` + + If you specify a number for save_every_n_steps, the model will be saved every n steps during training. You can specify it at the same time as save_every_n_epochs. + +- `--save_model_as` (DreamBooth, fine tuning only) + + You can select the model saving format from `ckpt, safetensors, diffusers, diffusers_safetensors`. + + Specify it like `--save_model_as=safetensors`. If you load a Stable Diffusion model (ckpt or safetensors) and save it in Diffusers format, missing information will be filled in with information from Hugging Face, such as v1.5 or v2.1. + +- `--huggingface_repo_id`, etc. + + If huggingface_repo_id is specified, the model will be uploaded to HuggingFace at the same time. Be careful with the handling of the access token (refer to HuggingFace's documentation). + + For example, specify as follows: + + - `--huggingface_repo_id "your-hf-name/your-model" --huggingface_path_in_repo "path" --huggingface_repo_type model --huggingface_repo_visibility private --huggingface_token hf_YourAccessTokenHere` + + If you specify `public` for huggingface_repo_visibility, the repository will be public. If you omit it or specify anything other than `public`, it will be private. + + If you specify `--save_state`, specify `--save_state_to_huggingface` to save the state. + + If you specify `--resume`, specify `--resume_from_huggingface` to download the state from HuggingFace and resume. The --resume option will be `--resume {repo_id}/{path_in_repo}:{revision}:{repo_type}`. + + For example: `--resume_from_huggingface --resume your-hf-name/your-model/path/test-000002-state:main:model` + + Specify `--async_upload` to upload asynchronously. + +## Optimizer-related + +- `--optimizer_type` + -- Specify the type of optimizer. The following can be specified. + - AdamW : [torch.optim.AdamW](https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html) + - Same as when no option specified in past versions + - AdamW8bit : Same arguments as above + - PagedAdamW8bit : Same arguments as above + - Same as when `--use_8bit_adam` specified in past versions + - Lion : + - Same as when `--use_lion_optimizer` specified in past versions + - Lion8bit : Same arguments as above + - PagedLion8bit : Same arguments as above + - SGDNesterov : [torch.optim.SGD](https://pytorch.org/docs/stable/generated/torch.optim.SGD.html), nesterov=True + - SGDNesterov8bit : Same arguments as above + - DAdaptation(DAdaptAdamPreprint) : + - DAdaptAdam : Same arguments as above + - DAdaptAdaGrad : Same arguments as above + - DAdaptAdan : Same arguments as above + - DAdaptAdanIP : Same arguments as above + - DAdaptLion : Same arguments as above + - DAdaptSGD : Same arguments as above + - Prodigy : + - AdaFactor : [Transformers AdaFactor](https://huggingface.co/docs/transformers/main_classes/optimizer_schedules) + - Any optimizer + +- `--learning_rate` + + Specify the learning rate. The appropriate learning rate varies depending on the learning script, so please refer to the descriptions for each. + +- `--lr_scheduler` / `--lr_warmup_steps` / `--lr_scheduler_num_cycles` / `--lr_scheduler_power` + + Specifications related to the learning rate scheduler. + + With the lr_scheduler option, you can select a learning rate scheduler from linear, cosine, cosine_with_restarts, polynomial, constant, constant_with_warmup, or any scheduler. The default is constant. + + With lr_warmup_steps, you can specify the number of steps for the scheduler's warmup (gradually changing the learning rate). + + lr_scheduler_num_cycles is the number of restarts for the cosine with restarts scheduler, and lr_scheduler_power is the polynomial power for the polynomial scheduler. + + For more details, please refer to the scheduler's documentation. + + If you use any scheduler, specify the options as arguments with `--scheduler_args` in the same way as for any optimizer. + +### Specifying the Optimizer + +To specify the arguments for the optimizer, use the `--optimizer_args` option. Specify the arguments in the format of `key=value`, and multiple values can be specified by separating them with commas. For example, to specify arguments for the AdamW optimizer, you would use `--optimizer_args weight_decay=0.01 betas=.9,.999`. + +When specifying the arguments, please refer to the specifications of each optimizer. + +Some optimizers require certain arguments, and if they are omitted, they will be automatically added (e.g., SGDNesterov's momentum). Check the console output for details. + +The D-Adaptation optimizer adjusts the learning rate automatically. The value specified for the learning rate option is the rate of application of the learning rate determined by D-Adaptation, not the actual learning rate itself, so it is usually recommended to specify 1.0. If you want to specify half the learning rate for the text encoder and the full learning rate for the U-Net, you can specify `--text_encoder_lr=0.5 --unet_lr=1.0`. + +The AdaFactor optimizer can adjust the learning rate automatically by specifying `relative_step=True` (which is added by default if omitted). To adjust the learning rate, the learning rate scheduler should be set to `adafactor_scheduler`. It is also recommended to specify `scale_parameter` and `warmup_init`. + +The options for adjusting the learning rate are specified as `--optimizer_args "relative_step=True" "scale_parameter=True" "warmup_init=True"`. + +If you do not want to adjust the learning rate, specify the argument `relative_step=False`. In this case, the learning rate scheduler should be set to `constant_with_warmup`, and it is recommended not to clip the gradient norm. The arguments would be `--optimizer_type=adafactor --optimizer_args "relative_step=False" --lr_scheduler="constant_with_warmup" --max_grad_norm=0.0`. + +### Using Any Optimizer + +To use an optimizer from `torch.optim`, specify only the class name (e.g., `--optimizer_type=RMSprop`). For other optimizers from other modules, specify the module name and class name separated by a period (e.g., `--optimizer_type=bitsandbytes.optim.lamb.LAMB`). + +(Note: This is implemented using `importlib` internally, and the actual behavior is not confirmed. If necessary, please install the necessary package.) + + + +# Creating Metadata Files + +## Preparing Teacher Data + +As described above, prepare the image data you want to learn and put it in any folder. + +For example, you can store the images as follows: + +![Screenshot of Teacher Data Folder](https://user-images.githubusercontent.com/52813779/208907739-8e89d5fa-6ca8-4b60-8927-f484d2a9ae04.png) + +## Automatic Captioning + +If you want to learn only with tags without captions, skip this section. + +If you want to prepare captions manually, prepare the captions in the same directory as the teacher data images, with the same file name, but with the extension .caption, for example. Each file should be a single-line text file. + +### Captioning with BLIP + +With the latest version, you no longer need to download BLIP, download weights, or add a virtual environment. It should work as is. + +Run make_captions.py in the finetune folder. + +``` +python finetune\make_captions.py --batch_size +``` + +If you put the teacher data in the parent folder's train_data, it would look like this: + +``` +python finetune\make_captions.py --batch_size 8 ..\train_data +``` + +Caption files are created in the same directory as the teacher data images, with the same file name, but with the extension .caption. + +You can adjust the batch_size according to the capacity of your GPU's VRAM. Larger is faster (I think it can be increased even with VRAM 12GB). +You can specify the maximum length of the caption with the max_length option. The default is 75. If you want to increase it for models trained with token length 225, you can increase it. +You can change the extension of the caption with the caption_extension option. The default is .caption (if you change it to .txt, it will conflict with DeepDanbooru in the next section). + +If there are multiple teacher data folders, run for each folder. + +Note that inference has randomness, so the results will vary each time you run. To fix it, specify the random seed with the `--seed` option, such as `--seed 42`. + +For other options, refer to the help with `--help` (the meanings of parameters are not well documented, so you need to refer to the source). + +By default, caption files are generated with the extension .caption. + +![Folder with Generated Captions](https://user-images.githubusercontent.com/52813779/208908845-48a9d36c-f6ee-4dae-af71-9ab462d1459e.png) + +For example, the captions might look like this: + +![Caption and Image](https://user-images.githubusercontent.com/52813779/208908947-af936957-5d73-4339-b6c8-945a52857373.png) + +## Tagging with DeepDanbooru + +If you do not want to tag with danbooru tags yourself, proceed to "Preprocessing Captions and Tag Information". + +Tagging is done with DeepDanbooru or WD14Tagger. WD14Tagger seems to have higher accuracy. If you want to tag with WD14Tagger, proceed to the next section. + +### Setting up the Environment + +Clone DeepDanbooru into your working directory or download the zip and extract it. I extracted it. +Also, download deepdanbooru-v3-20211112-sgd-e28.zip from the Releases page and extract it into the DeepDanbooru folder. + +Download from the following page: + +![DeepDanbooru Download Page](https://user-images.githubusercontent.com/52813779/208909417-10e597df-7085-41ee-bd06-3e856a1339df.png) + +Set up the directory structure as follows: + +![DeepDanbooru Directory Structure](https://user-images.githubusercontent.com/52813779/208909486-38935d8b-8dc6-43f1-84d3-fef99bc471aa.png) + +Install the necessary libraries for the Diffusers environment. Move to the DeepDanbooru folder and install (I think it just adds tensorflow-io). + +``` +pip install -r requirements.txt +``` + +Next, install DeepDanbooru itself. + +``` +pip install . +``` + +With these steps, the tagging environment setup is complete. + +### Tagging + +Move to the DeepDanbooru folder and run deepdanbooru to tag. + +``` +deepdanbooru evaluate --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +If you put the teacher data in the parent folder's train_data, it will look like this: + +``` +deepdanbooru evaluate ../train_data --project-path deepdanbooru-v3-20211112-sgd-e28 --allow-folder --save-txt +``` + +The tag file is created in the same directory as the teacher data image, with the same file name and extension .txt. It is processed one by one, so it is quite slow. + +If there are multiple teacher data folders, please run for each folder. + +The following will be generated: + +![DeepDanbooru Generated Files](https://user-images.githubusercontent.com/52813779/208909855-d21b9c98-f2d3-4283-8238-5b0e5aad6691.png) + +The tags look like this (a lot of information...). + +![DeepDanbooru Tags and Images](https://user-images.githubusercontent.com/52813779/208909908-a7920174-266e-48d5-aaef-940aba709519.png) + +## Tagging with WD14Tagger + +Steps to use WD14Tagger instead of DeepDanbooru. + +We use the tagger used in Automatic1111's WebUI. Please refer to the information on the following github page (). + +The necessary modules for the initial setup are already installed. The weights are automatically downloaded from Hugging Face. + +### Tagging + +Run the script to tag. + +``` +python tag_images_by_wd14_tagger.py --batch_size +``` + +If you put the teacher data in the parent folder's train_data, it will look like this: + +``` +python tag_images_by_wd14_tagger.py --batch_size 4 ..\train_data +``` + +The model file is automatically downloaded to the wd14_tagger_model folder on the first run (you can specify the folder with the model_dir option). + +![Downloaded Files](https://user-images.githubusercontent.com/52813779/208910447-f7eb0582-90d6-49d3-a666-2b508c7d1842.png) + +The tag file is created in the same directory as the teacher data image, with the same file name and extension .txt. + +![Generated Tag File](https://user-images.githubusercontent.com/52813779/208910534-ea514373-1185-4b7d-9ae3-61eb50bc294e.png) + +![Tags and Images](https://user-images.githubusercontent.com/52813779/208910599-29070c15-7639-474f-b3e4-06bd5a3df29e.png) + +The thresh option specifies the minimum confidence (probability) for a tag to be assigned. The default is the same as the WD14Tagger sample, 0.35. Lowering the value will increase the number of tags, but the accuracy will decrease. + +The batch_size depends on the VRAM capacity of the GPU. Larger is faster (even if you can increase it a bit with a VRAM of 12GB). The caption_extension option allows you to change the extension of the tag file. The default is .txt. + +The model_dir option allows you to specify the destination folder for the model. + +The force_download option forces the model to be re-downloaded even if the destination folder already exists. + +If there are multiple teacher data folders, please run for each folder. + +## Preprocessing of captions and tag information + +To make it easier to process from the script, we aggregate the captions and tags into one metadata file. + +### Preprocessing of captions + +To put the captions into the metadata, run the following in the working directory (if you don't use captions for learning, you don't need to run it) (actually, it's written in one line, as follows). The `--full_path` option stores the location of the image file in the metadata as a full path. If this option is omitted, the relative path is recorded, but the folder specification is required in the `.toml` file separately. + +``` +python merge_captions_to_metadata.py --full_path +  --in_json +``` + +The metadata file name is arbitrary. +If the teacher data is train_data, there is no metadata file to read, and the metadata file to write is meta_cap.json, it will look like this: + +``` +python merge_captions_to_metadata.py --full_path train_data meta_cap.json +``` + +The caption_extension option allows you to specify the extension of the caption. + +If there are multiple teacher data folders, specify the full_path argument and run for each folder. + +``` +python merge_captions_to_metadata.py --full_path + train_data1 meta_cap1.json +python merge_captions_to_metadata.py --full_path --in_json meta_cap1.json + train_data2 meta_cap2.json +``` + +If in_json is omitted, it will read from the existing metadata file and overwrite it. + +__※If you specify in_json and write to a different metadata file each time, it is safer.__ + +### Preprocessing of tags + +Similarly, we can also put the tags into the metadata (if you don't use tags for learning, you don't need to run it). + +``` +python merge_dd_tags_to_metadata.py --full_path + --in_json +``` + +If the same directory structure as before, read meta_cap.json and write to meta_cap_dd.json, it will look like this: + +``` +python merge_dd_tags_to_metadata.py --full_path train_data --in_json meta_cap.json meta_cap_dd.json +``` + +If there are multiple teacher data folders, specify the full_path argument and run for each folder. + +``` +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap2.json + train_data1 meta_cap_dd1.json +python merge_dd_tags_to_metadata.py --full_path --in_json meta_cap_dd1.json + train_data2 meta_cap_dd2.json +``` + +If in_json is omitted, it will read from the existing metadata file and overwrite it. + +__※If you specify in_json and write to a different metadata file each time, it is safer.__ + +### Cleaning of captions and tags + +So far, the metadata file has been assembled with captions and DeepDanbooru tags. However, the automatically captioned captions are slightly awkward (※), and the tags contain underscores and ratings, so (for DeepDanbooru) it is better to clean up the captions and tags using the editor's replace function. + +※For example, if you are learning about anime girls, the captions may contain variations such as girl/girls/woman/women. Also, "anime girl" could be simply "girl". + +A script is prepared for cleaning, so please edit the script contents according to the situation. + +(No need to specify the teacher data folder. Clean all data in the metadata.) + +``` +python clean_captions_and_tags.py +``` + +--in_json is not specified, so please be careful. For example, it will look like this: + +``` +python clean_captions_and_tags.py meta_cap_dd.json meta_clean.json +``` + +So far, the preprocessing of captions and tags is complete. + +## Pre-acquisition of latents + +※ This step is not required. You can omit it and learn while acquiring latents. +Also, if you perform `random_crop` or `color_aug` during learning, latents cannot be acquired in advance (because the image is changed every time). If you do not acquire it in advance, you can learn with the metadata up to this point. + +Pre-acquire the latent representations of the images and save them to the disk. This allows you to speed up the learning process. Also, perform bucketing (classify teacher data according to aspect ratio). + +Enter the following in the working directory: + +``` +python prepare_buckets_latents.py --full_path + + + --batch_size + --max_resolution + --mixed_precision +``` + +If the model is model.ckpt, the batch size is 4, the learning resolution is 512*512, the precision is no (float32), read the metadata from meta_clean.json and write to meta_lat.json, it will look like this: + +``` +python prepare_buckets_latents.py --full_path + train_data meta_clean.json meta_lat.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no +``` + +The teacher data folder will save the latents in numpy's npz format. + +You can specify the minimum resolution with the --min_bucket_reso option and the maximum resolution with the --max_bucket_reso option. The default is 256 and 1024, respectively. For example, if you specify a minimum resolution of 384, resolutions such as 256*1024 or 320*768 will not be used. +If you increase the resolution to 768*768 or larger, it is recommended to specify a maximum resolution of 1280 or higher. + +Specify the --flip_aug option to perform augmentation (data expansion) of left-right flips. This can artificially double the amount of data, but if the data is not symmetrical on the left and right (for example, character appearance, hair type, etc.), it may not learn well. + +(A simple implementation is to acquire latents for flipped images and save them to a file named *_flip.npz. There is no need to specify any options in fine_tune.py. If there is a file with _flip, it will randomly read either the flipped or non-flipped file.) + +The batch size can be increased a little with VRAM 12GB. +The resolution must be a multiple of 64, specified as "width,height". The resolution is directly related to the memory size during fine tuning. With VRAM 12GB, 512,512 seems to be the limit (※). With 16GB, you can increase it to 512,704 or 512,768. However, with 256,256, it is said to be difficult with VRAM 8GB (parameters, optimizer, etc. require a certain amount of memory regardless of resolution). + +※It has been reported that a batch size of 1 can run with 12GB VRAM at 640,640. + +The following shows the results of bucketing. + +![bucketing results](https://user-images.githubusercontent.com/52813779/208911419-71c00fbb-2ce6-49d5-89b5-b78d7715e441.png) + +If there are multiple teacher data folders, specify the full_path argument and run for each folder. + +``` +python prepare_buckets_latents.py --full_path + train_data1 meta_clean.json meta_lat1.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +python prepare_buckets_latents.py --full_path + train_data2 meta_lat1.json meta_lat2.json model.ckpt + --batch_size 4 --max_resolution 512,512 --mixed_precision no + +``` + +It is also possible to read and write to the same file, but it is safer to use different files. + +__※If you specify in_json and write to a different metadata file each time, it is safer.__ diff --git a/docs/train_db_README-ja.md b/docs/train_db_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..a034d52453c9ce3568814cc1b4460628011a807c --- /dev/null +++ b/docs/train_db_README-ja.md @@ -0,0 +1,167 @@ +DreamBoothのガイドです。 + +[学習についての共通ドキュメント](./train_README-ja.md) もあわせてご覧ください。 + +# 概要 + +DreamBoothとは、画像生成モデルに特定の主題を追加学習し、それを特定の識別子で生成する技術です。[論文はこちら](https://arxiv.org/abs/2208.12242)。 + +具体的には、Stable Diffusionのモデルにキャラや画風などを学ばせ、それを `shs` のような特定の単語で呼び出せる(生成画像に出現させる)ことができます。 + +スクリプトは[DiffusersのDreamBooth](https://github.com/huggingface/diffusers/tree/main/examples/dreambooth)を元にしていますが、以下のような機能追加を行っています(いくつかの機能は元のスクリプト側もその後対応しています)。 + +スクリプトの主な機能は以下の通りです。 + +- 8bit Adam optimizerおよびlatentのキャッシュによる省メモリ化([Shivam Shrirao氏版](https://github.com/ShivamShrirao/diffusers/tree/main/examples/dreambooth)と同様)。 +- xformersによる省メモリ化。 +- 512x512だけではなく任意サイズでの学習。 +- augmentationによる品質の向上。 +- DreamBoothだけではなくText Encoder+U-Netのfine tuningに対応。 +- Stable Diffusion形式でのモデルの読み書き。 +- Aspect Ratio Bucketing。 +- Stable Diffusion v2.0対応。 + +# 学習の手順 + +あらかじめこのリポジトリのREADMEを参照し、環境整備を行ってください。 + +## データの準備 + +[学習データの準備について](./train_README-ja.md) を参照してください。 + +## 学習の実行 + +スクリプトを実行します。最大限、メモリを節約したコマンドは以下のようになります(実際には1行で入力します)。それぞれの行を必要に応じて書き換えてください。12GB程度のVRAMで動作するようです。 + +``` +accelerate launch --num_cpu_threads_per_process 1 train_db.py + --pretrained_model_name_or_path=<.ckptまたは.safetensordまたはDiffusers版モデルのディレクトリ> + --dataset_config=<データ準備で作成した.tomlファイル> + --output_dir=<学習したモデルの出力先フォルダ> + --output_name=<学習したモデル出力時のファイル名> + --save_model_as=safetensors + --prior_loss_weight=1.0 + --max_train_steps=1600 + --learning_rate=1e-6 + --optimizer_type="AdamW8bit" + --xformers + --mixed_precision="fp16" + --cache_latents + --gradient_checkpointing +``` + +`num_cpu_threads_per_process` には通常は1を指定するとよいようです。 + +`pretrained_model_name_or_path` に追加学習を行う元となるモデルを指定します。Stable Diffusionのcheckpointファイル(.ckptまたは.safetensors)、Diffusersのローカルディスクにあるモデルディレクトリ、DiffusersのモデルID("stabilityai/stable-diffusion-2"など)が指定できます。 + +`output_dir` に学習後のモデルを保存するフォルダを指定します。`output_name` にモデルのファイル名を拡張子を除いて指定します。`save_model_as` でsafetensors形式での保存を指定しています。 + +`dataset_config` に `.toml` ファイルを指定します。ファイル内でのバッチサイズ指定は、当初はメモリ消費を抑えるために `1` としてください。 + +`prior_loss_weight` は正則化画像のlossの重みです。通常は1.0を指定します。 + +学習させるステップ数 `max_train_steps` を1600とします。学習率 `learning_rate` はここでは1e-6を指定しています。 + +省メモリ化のため `mixed_precision="fp16"` を指定します(RTX30 シリーズ以降では `bf16` も指定できます。環境整備時にaccelerateに行った設定と合わせてください)。また `gradient_checkpointing` を指定します。 + +オプティマイザ(モデルを学習データにあうように最適化=学習させるクラス)にメモリ消費の少ない 8bit AdamW を使うため、 `optimizer_type="AdamW8bit"` を指定します。 + +`xformers` オプションを指定し、xformersのCrossAttentionを用います。xformersをインストールしていない場合やエラーとなる場合(環境にもよりますが `mixed_precision="no"` の場合など)、代わりに `mem_eff_attn` オプションを指定すると省メモリ版CrossAttentionを使用します(速度は遅くなります)。 + +省メモリ化のため `cache_latents` オプションを指定してVAEの出力をキャッシュします。 + +ある程度メモリがある場合は、`.toml` ファイルを編集してバッチサイズをたとえば `4` くらいに増やしてください(高速化と精度向上の可能性があります)。また `cache_latents` を外すことで augmentation が可能になります。 + +### よく使われるオプションについて + +以下の場合には [学習の共通ドキュメント](./train_README-ja.md) の「よく使われるオプション」を参照してください。 + +- Stable Diffusion 2.xまたはそこからの派生モデルを学習する +- clip skipを2以上を前提としたモデルを学習する +- 75トークンを超えたキャプションで学習する + +### DreamBoothでのステップ数について + +当スクリプトでは省メモリ化のため、ステップ当たりの学習回数が元のスクリプトの半分になっています(対象の画像と正則化画像を同一のバッチではなく別のバッチに分割して学習するため)。 + +元のDiffusers版やXavierXiao氏のStable Diffusion版とほぼ同じ学習を行うには、ステップ数を倍にしてください。 + +(学習画像と正則化画像をまとめてから shuffle するため厳密にはデータの順番が変わってしまいますが、学習には大きな影響はないと思います。) + +### DreamBoothでのバッチサイズについて + +モデル全体を学習するためLoRA等の学習に比べるとメモリ消費量は多くなります(fine tuningと同じ)。 + +### 学習率について + +Diffusers版では5e-6ですがStable Diffusion版は1e-6ですので、上のサンプルでは1e-6を指定しています。 + +### 以前の形式のデータセット指定をした場合のコマンドライン + +解像度やバッチサイズをオプションで指定します。コマンドラインの例は以下の通りです。 + +``` +accelerate launch --num_cpu_threads_per_process 1 train_db.py + --pretrained_model_name_or_path=<.ckptまたは.safetensordまたはDiffusers版モデルのディレクトリ> + --train_data_dir=<学習用データのディレクトリ> + --reg_data_dir=<正則化画像のディレクトリ> + --output_dir=<学習したモデルの出力先ディレクトリ> + --output_name=<学習したモデル出力時のファイル名> + --prior_loss_weight=1.0 + --resolution=512 + --train_batch_size=1 + --learning_rate=1e-6 + --max_train_steps=1600 + --use_8bit_adam + --xformers + --mixed_precision="bf16" + --cache_latents + --gradient_checkpointing +``` + +## 学習したモデルで画像生成する + +学習が終わると指定したフォルダに指定した名前でsafetensorsファイルが出力されます。 + +v1.4/1.5およびその他の派生モデルの場合、このモデルでAutomatic1111氏のWebUIなどで推論できます。models\Stable-diffusionフォルダに置いてください。 + +v2.xモデルでWebUIで画像生成する場合、モデルの仕様が記述された.yamlファイルが別途必要になります。v2.x baseの場合はv2-inference.yamlを、768/vの場合はv2-inference-v.yamlを、同じフォルダに置き、拡張子の前の部分をモデルと同じ名前にしてください。 + +![image](https://user-images.githubusercontent.com/52813779/210776915-061d79c3-6582-42c2-8884-8b91d2f07313.png) + +各yamlファイルは[Stability AIのSD2.0のリポジトリ](https://github.com/Stability-AI/stablediffusion/tree/main/configs/stable-diffusion)にあります。 + +# DreamBooth特有のその他の主なオプション + +すべてのオプションについては別文書を参照してください。 + +## Text Encoderの学習を途中から行わない --stop_text_encoder_training + +stop_text_encoder_trainingオプションに数値を指定すると、そのステップ数以降はText Encoderの学習を行わずU-Netだけ学習します。場合によっては精度の向上が期待できるかもしれません。 + +(恐らくText Encoderだけ先に過学習することがあり、それを防げるのではないかと推測していますが、詳細な影響は不明です。) + +## Tokenizerのパディングをしない --no_token_padding +no_token_paddingオプションを指定するとTokenizerの出力をpaddingしません(Diffusers版の旧DreamBoothと同じ動きになります)。 + + + diff --git a/docs/train_db_README-zh.md b/docs/train_db_README-zh.md new file mode 100644 index 0000000000000000000000000000000000000000..4094f0b7c46a99f8d7531b728b935db089f23c99 --- /dev/null +++ b/docs/train_db_README-zh.md @@ -0,0 +1,162 @@ +这是DreamBooth的指南。 + +请同时查看[关于学习的通用文档](./train_README-zh.md)。 + +# 概要 + +DreamBooth是一种将特定主题添加到图像生成模型中进行学习,并使用特定识别子生成它的技术。论文链接。 + +具体来说,它可以将角色和绘画风格等添加到Stable Diffusion模型中进行学习,并使用特定的单词(例如`shs`)来调用(呈现在生成的图像中)。 + +脚本基于Diffusers的DreamBooth,但添加了以下功能(一些功能已在原始脚本中得到支持)。 + +脚本的主要功能如下: + +- 使用8位Adam优化器和潜在变量的缓存来节省内存(与Shivam Shrirao版相似)。 +- 使用xformers来节省内存。 +- 不仅支持512x512,还支持任意尺寸的训练。 +- 通过数据增强来提高质量。 +- 支持DreamBooth和Text Encoder + U-Net的微调。 +- 支持以Stable Diffusion格式读写模型。 +- 支持Aspect Ratio Bucketing。 +- 支持Stable Diffusion v2.0。 + +# 训练步骤 + +请先参阅此存储库的README以进行环境设置。 + +## 准备数据 + +请参阅[有关准备训练数据的说明](./train_README-zh.md)。 + +## 运行训练 + +运行脚本。以下是最大程度地节省内存的命令(实际上,这将在一行中输入)。请根据需要修改每行。它似乎需要约12GB的VRAM才能运行。 +``` +accelerate launch --num_cpu_threads_per_process 1 train_db.py + --pretrained_model_name_or_path=<.ckpt或.safetensord或Diffusers版模型的目录> + --dataset_config=<数据准备时创建的.toml文件> + --output_dir=<训练模型的输出目录> + --output_name=<训练模型输出时的文件名> + --save_model_as=safetensors + --prior_loss_weight=1.0 + --max_train_steps=1600 + --learning_rate=1e-6 + --optimizer_type="AdamW8bit" + --xformers + --mixed_precision="fp16" + --cache_latents + --gradient_checkpointing +``` +`num_cpu_threads_per_process` 通常应该设置为1。 + +`pretrained_model_name_or_path` 指定要进行追加训练的基础模型。可以指定 Stable Diffusion 的 checkpoint 文件(.ckpt 或 .safetensors)、Diffusers 的本地模型目录或模型 ID(如 "stabilityai/stable-diffusion-2")。 + +`output_dir` 指定保存训练后模型的文件夹。在 `output_name` 中指定模型文件名,不包括扩展名。使用 `save_model_as` 指定以 safetensors 格式保存。 + +在 `dataset_config` 中指定 `.toml` 文件。初始批处理大小应为 `1`,以减少内存消耗。 + +`prior_loss_weight` 是正则化图像损失的权重。通常设为1.0。 + +将要训练的步数 `max_train_steps` 设置为1600。在这里,学习率 `learning_rate` 被设置为1e-6。 + +为了节省内存,设置 `mixed_precision="fp16"`(在 RTX30 系列及更高版本中也可以设置为 `bf16`)。同时指定 `gradient_checkpointing`。 + +为了使用内存消耗较少的 8bit AdamW 优化器(将模型优化为适合于训练数据的状态),指定 `optimizer_type="AdamW8bit"`。 + +指定 `xformers` 选项,并使用 xformers 的 CrossAttention。如果未安装 xformers 或出现错误(具体情况取决于环境,例如使用 `mixed_precision="no"`),则可以指定 `mem_eff_attn` 选项以使用省内存版的 CrossAttention(速度会变慢)。 + +为了节省内存,指定 `cache_latents` 选项以缓存 VAE 的输出。 + +如果有足够的内存,请编辑 `.toml` 文件将批处理大小增加到大约 `4`(可能会提高速度和精度)。此外,取消 `cache_latents` 选项可以进行数据增强。 + +### 常用选项 + +对于以下情况,请参阅“常用选项”部分。 + +- 学习 Stable Diffusion 2.x 或其衍生模型。 +- 学习基于 clip skip 大于等于2的模型。 +- 学习超过75个令牌的标题。 + +### 关于DreamBooth中的步数 + +为了实现省内存化,该脚本中每个步骤的学习次数减半(因为学习和正则化的图像在训练时被分为不同的批次)。 + +要进行与原始Diffusers版或XavierXiao的Stable Diffusion版几乎相同的学习,请将步骤数加倍。 + +(虽然在将学习图像和正则化图像整合后再打乱顺序,但我认为对学习没有太大影响。) + +关于DreamBooth的批量大小 + +与像LoRA这样的学习相比,为了训练整个模型,内存消耗量会更大(与微调相同)。 + +关于学习率 + +在Diffusers版中,学习率为5e-6,而在Stable Diffusion版中为1e-6,因此在上面的示例中指定了1e-6。 + +当使用旧格式的数据集指定命令行时 + +使用选项指定分辨率和批量大小。命令行示例如下。 +``` +accelerate launch --num_cpu_threads_per_process 1 train_db.py + --pretrained_model_name_or_path=<.ckpt或.safetensord或Diffusers版模型的目录> + --train_data_dir=<训练数据的目录> + --reg_data_dir=<正则化图像的目录> + --output_dir=<训练后模型的输出目录> + --output_name=<训练后模型输出文件的名称> + --prior_loss_weight=1.0 + --resolution=512 + --train_batch_size=1 + --learning_rate=1e-6 + --max_train_steps=1600 + --use_8bit_adam + --xformers + --mixed_precision="bf16" + --cache_latents + --gradient_checkpointing +``` + +## 使用训练好的模型生成图像 + +训练完成后,将在指定的文件夹中以指定的名称输出safetensors文件。 + +对于v1.4/1.5和其他派生模型,可以在此模型中使用Automatic1111先生的WebUI进行推断。请将其放置在models\Stable-diffusion文件夹中。 + +对于使用v2.x模型在WebUI中生成图像的情况,需要单独的.yaml文件来描述模型的规格。对于v2.x base,需要v2-inference.yaml,对于768/v,则需要v2-inference-v.yaml。请将它们放置在相同的文件夹中,并将文件扩展名之前的部分命名为与模型相同的名称。 +![image](https://user-images.githubusercontent.com/52813779/210776915-061d79c3-6582-42c2-8884-8b91d2f07313.png) + +每个yaml文件都在[Stability AI的SD2.0存储库](https://github.com/Stability-AI/stablediffusion/tree/main/configs/stable-diffusion)……之中。 + +# DreamBooth的其他主要选项 + +有关所有选项的详细信息,请参阅另一份文档。 + +## 不在中途开始对文本编码器进行训练 --stop_text_encoder_training + +如果在stop_text_encoder_training选项中指定一个数字,则在该步骤之后,将不再对文本编码器进行训练,只会对U-Net进行训练。在某些情况下,可能会期望提高精度。 + +(我们推测可能会有时候仅仅文本编码器会过度学习,而这样做可以避免这种情况,但详细影响尚不清楚。) + +## 不进行分词器的填充 --no_token_padding + +如果指定no_token_padding选项,则不会对分词器的输出进行填充(与Diffusers版本的旧DreamBooth相同)。 + + diff --git a/docs/train_lllite_README-ja.md b/docs/train_lllite_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..236e64b578bde99790555ead9d115a664faebb51 --- /dev/null +++ b/docs/train_lllite_README-ja.md @@ -0,0 +1,214 @@ +# ControlNet-LLLite について + +__きわめて実験的な実装のため、将来的に大きく変更される可能性があります。__ + +## 概要 +ControlNet-LLLite は、[ControlNet](https://github.com/lllyasviel/ControlNet) の軽量版です。LoRA Like Lite という意味で、LoRAからインスピレーションを得た構造を持つ、軽量なControlNetです。現在はSDXLにのみ対応しています。 + +## サンプルの重みファイルと推論 + +こちらにあります: https://huggingface.co/kohya-ss/controlnet-lllite + +ComfyUIのカスタムノードを用意しています。: https://github.com/kohya-ss/ControlNet-LLLite-ComfyUI + +生成サンプルはこのページの末尾にあります。 + +## モデル構造 +ひとつのLLLiteモジュールは、制御用画像(以下conditioning image)を潜在空間に写像するconditioning image embeddingと、LoRAにちょっと似た構造を持つ小型のネットワークからなります。LLLiteモジュールを、LoRAと同様にU-NetのLinearやConvに追加します。詳しくはソースコードを参照してください。 + +推論環境の制限で、現在はCrossAttentionのみ(attn1のq/k/v、attn2のq)に追加されます。 + +## モデルの学習 + +### データセットの準備 +通常のdatasetに加え、`conditioning_data_dir` で指定したディレクトリにconditioning imageを格納してください。conditioning imageは学習用画像と同じbasenameを持つ必要があります。また、conditioning imageは学習用画像と同じサイズに自動的にリサイズされます。conditioning imageにはキャプションファイルは不要です。 + +たとえば DreamBooth 方式でキャプションファイルを用いる場合の設定ファイルは以下のようになります。 + +```toml +[[datasets.subsets]] +image_dir = "path/to/image/dir" +caption_extension = ".txt" +conditioning_data_dir = "path/to/conditioning/image/dir" +``` + +現時点の制約として、random_cropは使用できません。 + +学習データとしては、元のモデルで生成した画像を学習用画像として、そこから加工した画像をconditioning imageとした、合成によるデータセットを用いるのがもっとも簡単です(データセットの品質的には問題があるかもしれません)。具体的なデータセットの合成方法については後述します。 + +なお、元モデルと異なる画風の画像を学習用画像とすると、制御に加えて、その画風についても学ぶ必要が生じます。ControlNet-LLLiteは容量が少ないため、画風学習には不向きです。このような場合には、後述の次元数を多めにしてください。 + +### 学習 +スクリプトで生成する場合は、`sdxl_train_control_net_lllite.py` を実行してください。`--cond_emb_dim` でconditioning image embeddingの次元数を指定できます。`--network_dim` でLoRA的モジュールのrankを指定できます。その他のオプションは`sdxl_train_network.py`に準じますが、`--network_module`の指定は不要です。 + +学習時にはメモリを大量に使用しますので、キャッシュやgradient checkpointingなどの省メモリ化のオプションを有効にしてください。また`--full_bf16` オプションで、BFloat16を使用するのも有効です(RTX 30シリーズ以降のGPUが必要です)。24GB VRAMで動作確認しています。 + +conditioning image embeddingの次元数は、サンプルのCannyでは32を指定しています。LoRA的モジュールのrankは同じく64です。対象とするconditioning imageの特徴に合わせて調整してください。 + +(サンプルのCannyは恐らくかなり難しいと思われます。depthなどでは半分程度にしてもいいかもしれません。) + +以下は .toml の設定例です。 + +```toml +pretrained_model_name_or_path = "/path/to/model_trained_on.safetensors" +max_train_epochs = 12 +max_data_loader_n_workers = 4 +persistent_data_loader_workers = true +seed = 42 +gradient_checkpointing = true +mixed_precision = "bf16" +save_precision = "bf16" +full_bf16 = true +optimizer_type = "adamw8bit" +learning_rate = 2e-4 +xformers = true +output_dir = "/path/to/output/dir" +output_name = "output_name" +save_every_n_epochs = 1 +save_model_as = "safetensors" +vae_batch_size = 4 +cache_latents = true +cache_latents_to_disk = true +cache_text_encoder_outputs = true +cache_text_encoder_outputs_to_disk = true +network_dim = 64 +cond_emb_dim = 32 +dataset_config = "/path/to/dataset.toml" +``` + +### 推論 + +スクリプトで生成する場合は、`sdxl_gen_img.py` を実行してください。`--control_net_lllite_models` でLLLiteのモデルファイルを指定できます。次元数はモデルファイルから自動取得します。 + +`--guide_image_path`で推論に用いるconditioning imageを指定してください。なおpreprocessは行われないため、たとえばCannyならCanny処理を行った画像を指定してください(背景黒に白線)。`--control_net_preps`, `--control_net_weights`, `--control_net_ratios` には未対応です。 + +## データセットの合成方法 + +### 学習用画像の生成 + +学習のベースとなるモデルで画像生成を行います。Web UIやComfyUIなどで生成してください。画像サイズはモデルのデフォルトサイズで良いと思われます(1024x1024など)。bucketingを用いることもできます。その場合は適宜適切な解像度で生成してください。 + +生成時のキャプション等は、ControlNet-LLLiteの利用時に生成したい画像にあわせるのが良いと思われます。 + +生成した画像を任意のディレクトリに保存してください。このディレクトリをデータセットの設定ファイルで指定します。 + +当リポジトリ内の `sdxl_gen_img.py` でも生成できます。例えば以下のように実行します。 + +```dos +python sdxl_gen_img.py --ckpt path/to/model.safetensors --n_iter 1 --scale 10 --steps 36 --outdir path/to/output/dir --xformers --W 1024 --H 1024 --original_width 2048 --original_height 2048 --bf16 --sampler ddim --batch_size 4 --vae_batch_size 2 --images_per_prompt 512 --max_embeddings_multiples 1 --prompt "{portrait|digital art|anime screen cap|detailed illustration} of 1girl, {standing|sitting|walking|running|dancing} on {classroom|street|town|beach|indoors|outdoors}, {looking at viewer|looking away|looking at another}, {in|wearing} {shirt and skirt|school uniform|casual wear} { |, dynamic pose}, (solo), teen age, {0-1$$smile,|blush,|kind smile,|expression less,|happy,|sadness,} {0-1$$upper body,|full body,|cowboy shot,|face focus,} trending on pixiv, {0-2$$depth of fields,|8k wallpaper,|highly detailed,|pov,} {0-1$$summer, |winter, |spring, |autumn, } beautiful face { |, from below|, from above|, from side|, from behind|, from back} --n nsfw, bad face, lowres, low quality, worst quality, low effort, watermark, signature, ugly, poorly drawn" +``` + +VRAM 24GBの設定です。VRAMサイズにより`--batch_size` `--vae_batch_size`を調整してください。 + +`--prompt`でワイルドカードを利用してランダムに生成しています。適宜調整してください。 + +### 画像の加工 + +外部のプログラムを用いて、生成した画像を加工します。加工した画像を任意のディレクトリに保存してください。これらがconditioning imageになります。 + +加工にはたとえばCannyなら以下のようなスクリプトが使えます。 + +```python +import glob +import os +import random +import cv2 +import numpy as np + +IMAGES_DIR = "path/to/generated/images" +CANNY_DIR = "path/to/canny/images" + +os.makedirs(CANNY_DIR, exist_ok=True) +img_files = glob.glob(IMAGES_DIR + "/*.png") +for img_file in img_files: + can_file = CANNY_DIR + "/" + os.path.basename(img_file) + if os.path.exists(can_file): + print("Skip: " + img_file) + continue + + print(img_file) + + img = cv2.imread(img_file) + + # random threshold + # while True: + # threshold1 = random.randint(0, 127) + # threshold2 = random.randint(128, 255) + # if threshold2 - threshold1 > 80: + # break + + # fixed threshold + threshold1 = 100 + threshold2 = 200 + + img = cv2.Canny(img, threshold1, threshold2) + + cv2.imwrite(can_file, img) +``` + +### キャプションファイルの作成 + +学習用画像のbasenameと同じ名前で、それぞれの画像に対応したキャプションファイルを作成してください。生成時のプロンプトをそのまま利用すれば良いと思われます。 + +`sdxl_gen_img.py` で生成した場合は、画像内のメタデータに生成時のプロンプトが記録されていますので、以下のようなスクリプトで学習用画像と同じディレクトリにキャプションファイルを作成できます(拡張子 `.txt`)。 + +```python +import glob +import os +from PIL import Image + +IMAGES_DIR = "path/to/generated/images" + +img_files = glob.glob(IMAGES_DIR + "/*.png") +for img_file in img_files: + cap_file = img_file.replace(".png", ".txt") + if os.path.exists(cap_file): + print(f"Skip: {img_file}") + continue + print(img_file) + + img = Image.open(img_file) + prompt = img.text["prompt"] if "prompt" in img.text else "" + if prompt == "": + print(f"Prompt not found in {img_file}") + + with open(cap_file, "w") as f: + f.write(prompt + "\n") +``` + +### データセットの設定ファイルの作成 + +コマンドラインオプションからの指定も可能ですが、`.toml`ファイルを作成する場合は `conditioning_data_dir` に加工した画像を保存したディレクトリを指定します。 + +以下は設定ファイルの例です。 + +```toml +[general] +flip_aug = false +color_aug = false +resolution = [1024,1024] + +[[datasets]] +batch_size = 8 +enable_bucket = false + + [[datasets.subsets]] + image_dir = "path/to/generated/image/dir" + caption_extension = ".txt" + conditioning_data_dir = "path/to/canny/image/dir" +``` + +## 謝辞 + +ControlNetの作者である lllyasviel 氏、実装上のアドバイスとトラブル解決へのご尽力をいただいた furusu 氏、ControlNetデータセットを実装していただいた ddPn08 氏に感謝いたします。 + +## サンプル +Canny +![kohya_ss_girl_standing_at_classroom_smiling_to_the_viewer_class_78976b3e-0d4d-4ea0-b8e3-053ae493abbc](https://github.com/kohya-ss/sd-scripts/assets/52813779/37e9a736-649b-4c0f-ab26-880a1bf319b5) + +![im_20230820104253_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/c8896900-ab86-4120-932f-6e2ae17b77c0) + +![im_20230820104302_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/b12457a0-ee3c-450e-ba9a-b712d0fe86bb) + +![im_20230820104310_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/8845b8d9-804a-44ac-9618-113a28eac8a1) + diff --git a/docs/train_lllite_README.md b/docs/train_lllite_README.md new file mode 100644 index 0000000000000000000000000000000000000000..948a5cfced792c9830f4ddc8898735e04686019d --- /dev/null +++ b/docs/train_lllite_README.md @@ -0,0 +1,217 @@ +# About ControlNet-LLLite + +__This is an extremely experimental implementation and may change significantly in the future.__ + +日本語版は[こちら](./train_lllite_README-ja.md) + +## Overview + +ControlNet-LLLite is a lightweight version of [ControlNet](https://github.com/lllyasviel/ControlNet). It is a "LoRA Like Lite" that is inspired by LoRA and has a lightweight structure. Currently, only SDXL is supported. + +## Sample weight file and inference + +Sample weight file is available here: https://huggingface.co/kohya-ss/controlnet-lllite + +A custom node for ComfyUI is available: https://github.com/kohya-ss/ControlNet-LLLite-ComfyUI + +Sample images are at the end of this page. + +## Model structure + +A single LLLite module consists of a conditioning image embedding that maps a conditioning image to a latent space and a small network with a structure similar to LoRA. The LLLite module is added to U-Net's Linear and Conv in the same way as LoRA. Please refer to the source code for details. + +Due to the limitations of the inference environment, only CrossAttention (attn1 q/k/v, attn2 q) is currently added. + +## Model training + +### Preparing the dataset + +In addition to the normal dataset, please store the conditioning image in the directory specified by `conditioning_data_dir`. The conditioning image must have the same basename as the training image. The conditioning image will be automatically resized to the same size as the training image. The conditioning image does not require a caption file. + +```toml +[[datasets.subsets]] +image_dir = "path/to/image/dir" +caption_extension = ".txt" +conditioning_data_dir = "path/to/conditioning/image/dir" +``` + +At the moment, random_crop cannot be used. + +For training data, it is easiest to use a synthetic dataset with the original model-generated images as training images and processed images as conditioning images (the quality of the dataset may be problematic). See below for specific methods of synthesizing datasets. + +Note that if you use an image with a different art style than the original model as a training image, the model will have to learn not only the control but also the art style. ControlNet-LLLite has a small capacity, so it is not suitable for learning art styles. In such cases, increase the number of dimensions as described below. + +### Training + +Run `sdxl_train_control_net_lllite.py`. You can specify the dimension of the conditioning image embedding with `--cond_emb_dim`. You can specify the rank of the LoRA-like module with `--network_dim`. Other options are the same as `sdxl_train_network.py`, but `--network_module` is not required. + +Since a large amount of memory is used during training, please enable memory-saving options such as cache and gradient checkpointing. It is also effective to use BFloat16 with the `--full_bf16` option (requires RTX 30 series or later GPU). It has been confirmed to work with 24GB VRAM. + +For the sample Canny, the dimension of the conditioning image embedding is 32. The rank of the LoRA-like module is also 64. Adjust according to the features of the conditioning image you are targeting. + +(The sample Canny is probably quite difficult. It may be better to reduce it to about half for depth, etc.) + +The following is an example of a .toml configuration. + +```toml +pretrained_model_name_or_path = "/path/to/model_trained_on.safetensors" +max_train_epochs = 12 +max_data_loader_n_workers = 4 +persistent_data_loader_workers = true +seed = 42 +gradient_checkpointing = true +mixed_precision = "bf16" +save_precision = "bf16" +full_bf16 = true +optimizer_type = "adamw8bit" +learning_rate = 2e-4 +xformers = true +output_dir = "/path/to/output/dir" +output_name = "output_name" +save_every_n_epochs = 1 +save_model_as = "safetensors" +vae_batch_size = 4 +cache_latents = true +cache_latents_to_disk = true +cache_text_encoder_outputs = true +cache_text_encoder_outputs_to_disk = true +network_dim = 64 +cond_emb_dim = 32 +dataset_config = "/path/to/dataset.toml" +``` + +### Inference + +If you want to generate images with a script, run `sdxl_gen_img.py`. You can specify the LLLite model file with `--control_net_lllite_models`. The dimension is automatically obtained from the model file. + +Specify the conditioning image to be used for inference with `--guide_image_path`. Since preprocess is not performed, if it is Canny, specify an image processed with Canny (white line on black background). `--control_net_preps`, `--control_net_weights`, and `--control_net_ratios` are not supported. + +## How to synthesize a dataset + +### Generating training images + +Generate images with the base model for training. Please generate them with Web UI or ComfyUI etc. The image size should be the default size of the model (1024x1024, etc.). You can also use bucketing. In that case, please generate it at an arbitrary resolution. + +The captions and other settings when generating the images should be the same as when generating the images with the trained ControlNet-LLLite model. + +Save the generated images in an arbitrary directory. Specify this directory in the dataset configuration file. + + +You can also generate them with `sdxl_gen_img.py` in this repository. For example, run as follows: + +```dos +python sdxl_gen_img.py --ckpt path/to/model.safetensors --n_iter 1 --scale 10 --steps 36 --outdir path/to/output/dir --xformers --W 1024 --H 1024 --original_width 2048 --original_height 2048 --bf16 --sampler ddim --batch_size 4 --vae_batch_size 2 --images_per_prompt 512 --max_embeddings_multiples 1 --prompt "{portrait|digital art|anime screen cap|detailed illustration} of 1girl, {standing|sitting|walking|running|dancing} on {classroom|street|town|beach|indoors|outdoors}, {looking at viewer|looking away|looking at another}, {in|wearing} {shirt and skirt|school uniform|casual wear} { |, dynamic pose}, (solo), teen age, {0-1$$smile,|blush,|kind smile,|expression less,|happy,|sadness,} {0-1$$upper body,|full body,|cowboy shot,|face focus,} trending on pixiv, {0-2$$depth of fields,|8k wallpaper,|highly detailed,|pov,} {0-1$$summer, |winter, |spring, |autumn, } beautiful face { |, from below|, from above|, from side|, from behind|, from back} --n nsfw, bad face, lowres, low quality, worst quality, low effort, watermark, signature, ugly, poorly drawn" +``` + +This is a setting for VRAM 24GB. Adjust `--batch_size` and `--vae_batch_size` according to the VRAM size. + +The images are generated randomly using wildcards in `--prompt`. Adjust as necessary. + +### Processing images + +Use an external program to process the generated images. Save the processed images in an arbitrary directory. These will be the conditioning images. + +For example, you can use the following script to process the images with Canny. + +```python +import glob +import os +import random +import cv2 +import numpy as np + +IMAGES_DIR = "path/to/generated/images" +CANNY_DIR = "path/to/canny/images" + +os.makedirs(CANNY_DIR, exist_ok=True) +img_files = glob.glob(IMAGES_DIR + "/*.png") +for img_file in img_files: + can_file = CANNY_DIR + "/" + os.path.basename(img_file) + if os.path.exists(can_file): + print("Skip: " + img_file) + continue + + print(img_file) + + img = cv2.imread(img_file) + + # random threshold + # while True: + # threshold1 = random.randint(0, 127) + # threshold2 = random.randint(128, 255) + # if threshold2 - threshold1 > 80: + # break + + # fixed threshold + threshold1 = 100 + threshold2 = 200 + + img = cv2.Canny(img, threshold1, threshold2) + + cv2.imwrite(can_file, img) +``` + +### Creating caption files + +Create a caption file for each image with the same basename as the training image. It is fine to use the same caption as the one used when generating the image. + +If you generated the images with `sdxl_gen_img.py`, you can use the following script to create the caption files (`*.txt`) from the metadata in the generated images. + +```python +import glob +import os +from PIL import Image + +IMAGES_DIR = "path/to/generated/images" + +img_files = glob.glob(IMAGES_DIR + "/*.png") +for img_file in img_files: + cap_file = img_file.replace(".png", ".txt") + if os.path.exists(cap_file): + print(f"Skip: {img_file}") + continue + print(img_file) + + img = Image.open(img_file) + prompt = img.text["prompt"] if "prompt" in img.text else "" + if prompt == "": + print(f"Prompt not found in {img_file}") + + with open(cap_file, "w") as f: + f.write(prompt + "\n") +``` + +### Creating a dataset configuration file + +You can use the command line arguments of `sdxl_train_control_net_lllite.py` to specify the conditioning image directory. However, if you want to use a `.toml` file, specify the conditioning image directory in `conditioning_data_dir`. + +```toml +[general] +flip_aug = false +color_aug = false +resolution = [1024,1024] + +[[datasets]] +batch_size = 8 +enable_bucket = false + + [[datasets.subsets]] + image_dir = "path/to/generated/image/dir" + caption_extension = ".txt" + conditioning_data_dir = "path/to/canny/image/dir" +``` + +## Credit + +I would like to thank lllyasviel, the author of ControlNet, furusu, who provided me with advice on implementation and helped me solve problems, and ddPn08, who implemented the ControlNet dataset. + +## Sample + +Canny +![kohya_ss_girl_standing_at_classroom_smiling_to_the_viewer_class_78976b3e-0d4d-4ea0-b8e3-053ae493abbc](https://github.com/kohya-ss/sd-scripts/assets/52813779/37e9a736-649b-4c0f-ab26-880a1bf319b5) + +![im_20230820104253_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/c8896900-ab86-4120-932f-6e2ae17b77c0) + +![im_20230820104302_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/b12457a0-ee3c-450e-ba9a-b712d0fe86bb) + +![im_20230820104310_000_1](https://github.com/kohya-ss/sd-scripts/assets/52813779/8845b8d9-804a-44ac-9618-113a28eac8a1) diff --git a/docs/train_network_README-ja.md b/docs/train_network_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..a65c7e2ed3c1e86e814cfef38aa1646ebfaa095f --- /dev/null +++ b/docs/train_network_README-ja.md @@ -0,0 +1,486 @@ +# LoRAの学習について + +[LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685)(arxiv)、[LoRA](https://github.com/microsoft/LoRA)(github)をStable Diffusionに適用したものです。 + +[cloneofsimo氏のリポジトリ](https://github.com/cloneofsimo/lora)を大いに参考にさせていただきました。ありがとうございます。 + +通常のLoRAは Linear およぴカーネルサイズ 1x1 の Conv2d にのみ適用されますが、カーネルサイズ 3x3 のConv2dに適用を拡大することもできます。 + +Conv2d 3x3への拡大は [cloneofsimo氏](https://github.com/cloneofsimo/lora) が最初にリリースし、KohakuBlueleaf氏が [LoCon](https://github.com/KohakuBlueleaf/LoCon) でその有効性を明らかにしたものです。KohakuBlueleaf氏に深く感謝します。 + +8GB VRAMでもぎりぎり動作するようです。 + +[学習についての共通ドキュメント](./train_README-ja.md) もあわせてご覧ください。 + +# 学習できるLoRAの種類 + +以下の二種類をサポートします。以下は当リポジトリ内の独自の名称です。 + +1. __LoRA-LierLa__ : (LoRA for __Li__ n __e__ a __r__ __La__ yers、リエラと読みます) + + Linear およびカーネルサイズ 1x1 の Conv2d に適用されるLoRA + +2. __LoRA-C3Lier__ : (LoRA for __C__ olutional layers with __3__ x3 Kernel and __Li__ n __e__ a __r__ layers、セリアと読みます) + + 1.に加え、カーネルサイズ 3x3 の Conv2d に適用されるLoRA + +LoRA-LierLaに比べ、LoRA-C3Liarは適用される層が増える分、高い精度が期待できるかもしれません。 + +また学習時は __DyLoRA__ を使用することもできます(後述します)。 + +## 学習したモデルに関する注意 + +LoRA-LierLa は、AUTOMATIC1111氏のWeb UIのLoRA機能で使用することができます。 + +LoRA-C3Liarを使いWeb UIで生成するには、こちらの[WebUI用extension](https://github.com/kohya-ss/sd-webui-additional-networks)を使ってください。 + +いずれも学習したLoRAのモデルを、Stable Diffusionのモデルにこのリポジトリ内のスクリプトであらかじめマージすることもできます。 + +cloneofsimo氏のリポジトリ、およびd8ahazard氏の[Dreambooth Extension for Stable-Diffusion-WebUI](https://github.com/d8ahazard/sd_dreambooth_extension)とは、現時点では互換性がありません。いくつかの機能拡張を行っているためです(後述)。 + +# 学習の手順 + +あらかじめこのリポジトリのREADMEを参照し、環境整備を行ってください。 + +## データの準備 + +[学習データの準備について](./train_README-ja.md) を参照してください。 + + +## 学習の実行 + +`train_network.py`を用います。 + +`train_network.py`では `--network_module` オプションに、学習対象のモジュール名を指定します。LoRAに対応するのは`network.lora`となりますので、それを指定してください。 + +なお学習率は通常のDreamBoothやfine tuningよりも高めの、`1e-4`~`1e-3`程度を指定するとよいようです。 + +以下はコマンドラインの例です。 + +``` +accelerate launch --num_cpu_threads_per_process 1 train_network.py + --pretrained_model_name_or_path=<.ckptまたは.safetensordまたはDiffusers版モデルのディレクトリ> + --dataset_config=<データ準備で作成した.tomlファイル> + --output_dir=<学習したモデルの出力先フォルダ> + --output_name=<学習したモデル出力時のファイル名> + --save_model_as=safetensors + --prior_loss_weight=1.0 + --max_train_steps=400 + --learning_rate=1e-4 + --optimizer_type="AdamW8bit" + --xformers + --mixed_precision="fp16" + --cache_latents + --gradient_checkpointing + --save_every_n_epochs=1 + --network_module=networks.lora +``` + +このコマンドラインでは LoRA-LierLa が学習されます。 + +`--output_dir` オプションで指定したフォルダに、LoRAのモデルが保存されます。他のオプション、オプティマイザ等については [学習の共通ドキュメント](./train_README-ja.md) の「よく使われるオプション」も参照してください。 + +その他、以下のオプションが指定できます。 + +* `--network_dim` + * LoRAのRANKを指定します(``--networkdim=4``など)。省略時は4になります。数が多いほど表現力は増しますが、学習に必要なメモリ、時間は増えます。また闇雲に増やしても良くないようです。 +* `--network_alpha` + * アンダーフローを防ぎ安定して学習するための ``alpha`` 値を指定します。デフォルトは1です。``network_dim``と同じ値を指定すると以前のバージョンと同じ動作になります。 +* `--persistent_data_loader_workers` + * Windows環境で指定するとエポック間の待ち時間が大幅に短縮されます。 +* `--max_data_loader_n_workers` + * データ読み込みのプロセス数を指定します。プロセス数が多いとデータ読み込みが速くなりGPUを効率的に利用できますが、メインメモリを消費します。デフォルトは「`8` または `CPU同時実行スレッド数-1` の小さいほう」なので、メインメモリに余裕がない場合や、GPU使用率が90%程度以上なら、それらの数値を見ながら `2` または `1` 程度まで下げてください。 +* `--network_weights` + * 学習前に学習済みのLoRAの重みを読み込み、そこから追加で学習します。 +* `--network_train_unet_only` + * U-Netに関連するLoRAモジュールのみ有効とします。fine tuning的な学習で指定するとよいかもしれません。 +* `--network_train_text_encoder_only` + * Text Encoderに関連するLoRAモジュールのみ有効とします。Textual Inversion的な効果が期待できるかもしれません。 +* `--unet_lr` + * U-Netに関連するLoRAモジュールに、通常の学習率(--learning_rateオプションで指定)とは異なる学習率を使う時に指定します。 +* `--text_encoder_lr` + * Text Encoderに関連するLoRAモジュールに、通常の学習率(--learning_rateオプションで指定)とは異なる学習率を使う時に指定します。Text Encoderのほうを若干低めの学習率(5e-5など)にしたほうが良い、という話もあるようです。 +* `--network_args` + * 複数の引数を指定できます。後述します。 + +`--network_train_unet_only` と `--network_train_text_encoder_only` の両方とも未指定時(デフォルト)はText EncoderとU-Netの両方のLoRAモジュールを有効にします。 + +# その他の学習方法 + +## LoRA-C3Lier を学習する + +`--network_args` に以下のように指定してください。`conv_dim` で Conv2d (3x3) の rank を、`conv_alpha` で alpha を指定してください。 + +``` +--network_args "conv_dim=4" "conv_alpha=1" +``` + +以下のように alpha 省略時は1になります。 + +``` +--network_args "conv_dim=4" +``` + +## DyLoRA + +DyLoRAはこちらの論文で提案されたものです。[DyLoRA: Parameter Efficient Tuning of Pre-trained Models using Dynamic Search-Free Low-Rank Adaptation](https://arxiv.org/abs/2210.07558) 公式実装は[こちら](https://github.com/huawei-noah/KD-NLP/tree/main/DyLoRA)です。 + +論文によると、LoRAのrankは必ずしも高いほうが良いわけではなく、対象のモデル、データセット、タスクなどにより適切なrankを探す必要があるようです。DyLoRAを使うと、指定したdim(rank)以下のさまざまなrankで同時にLoRAを学習します。これにより最適なrankをそれぞれ学習して探す手間を省くことができます。 + +当リポジトリの実装は公式実装をベースに独自の拡張を加えています(そのため不具合などあるかもしれません)。 + +### 当リポジトリのDyLoRAの特徴 + +学習後のDyLoRAのモデルファイルはLoRAと互換性があります。また、モデルファイルから指定したdim(rank)以下の複数のdimのLoRAを抽出できます。 + +DyLoRA-LierLa、DyLoRA-C3Lierのどちらも学習できます。 + +### DyLoRAで学習する + +`--network_module=networks.dylora` のように、DyLoRAに対応する`network.dylora`を指定してください。 + +また `--network_args` に、たとえば`--network_args "unit=4"`のように`unit`を指定します。`unit`はrankを分割する単位です。たとえば`--network_dim=16 --network_args "unit=4"` のように指定します。`unit`は`network_dim`を割り切れる値(`network_dim`は`unit`の倍数)としてください。 + +`unit`を指定しない場合は、`unit=1`として扱われます。 + +記述例は以下です。 + +``` +--network_module=networks.dylora --network_dim=16 --network_args "unit=4" + +--network_module=networks.dylora --network_dim=32 --network_alpha=16 --network_args "unit=4" +``` + +DyLoRA-C3Lierの場合は、`--network_args` に`"conv_dim=4"`のように`conv_dim`を指定します。通常のLoRAと異なり、`conv_dim`は`network_dim`と同じ値である必要があります。記述例は以下です。 + +``` +--network_module=networks.dylora --network_dim=16 --network_args "conv_dim=16" "unit=4" + +--network_module=networks.dylora --network_dim=32 --network_alpha=16 --network_args "conv_dim=32" "conv_alpha=16" "unit=8" +``` + +たとえばdim=16、unit=4(後述)で学習すると、4、8、12、16の4つのrankのLoRAを学習、抽出できます。抽出した各モデルで画像を生成し、比較することで、最適なrankのLoRAを選択できます。 + +その他のオプションは通常のLoRAと同じです。 + +※ `unit`は当リポジトリの独自拡張で、DyLoRAでは同dim(rank)の通常LoRAに比べると学習時間が長くなることが予想されるため、分割単位を大きくしたものです。 + +### DyLoRAのモデルからLoRAモデルを抽出する + +`networks`フォルダ内の `extract_lora_from_dylora.py`を使用します。指定した`unit`単位で、DyLoRAのモデルからLoRAのモデルを抽出します。 + +コマンドラインはたとえば以下のようになります。 + +```powershell +python networks\extract_lora_from_dylora.py --model "foldername/dylora-model.safetensors" --save_to "foldername/dylora-model-split.safetensors" --unit 4 +``` + +`--model` にはDyLoRAのモデルファイルを指定します。`--save_to` には抽出したモデルを保存するファイル名を指定します(rankの数値がファイル名に付加されます)。`--unit` にはDyLoRAの学習時の`unit`を指定します。 + +## 階層別学習率 + +詳細は[PR #355](https://github.com/kohya-ss/sd-scripts/pull/355) をご覧ください。 + +SDXLは現在サポートしていません。 + +フルモデルの25個のブロックの重みを指定できます。最初のブロックに該当するLoRAは存在しませんが、階層別LoRA適用等との互換性のために25個としています。またconv2d3x3に拡張しない場合も一部のブロックにはLoRAが存在しませんが、記述を統一するため常に25個の値を指定してください。 + +`--network_args` で以下の引数を指定してください。 + +- `down_lr_weight` : U-Netのdown blocksの学習率の重みを指定します。以下が指定可能です。 + - ブロックごとの重み : `"down_lr_weight=0,0,0,0,0,0,1,1,1,1,1,1"` のように12個の数値を指定します。 + - プリセットからの指定 : `"down_lr_weight=sine"` のように指定します(サインカーブで重みを指定します)。sine, cosine, linear, reverse_linear, zeros が指定可能です。また `"down_lr_weight=cosine+.25"` のように `+数値` を追加すると、指定した数値を加算します(0.25~1.25になります)。 +- `mid_lr_weight` : U-Netのmid blockの学習率の重みを指定します。`"down_lr_weight=0.5"` のように数値を一つだけ指定します。 +- `up_lr_weight` : U-Netのup blocksの学習率の重みを指定します。down_lr_weightと同様です。 +- 指定を省略した部分は1.0として扱われます。また重みを0にするとそのブロックのLoRAモジュールは作成されません。 +- `block_lr_zero_threshold` : 重みがこの値以下の場合、LoRAモジュールを作成しません。デフォルトは0です。 + +### 階層別学習率コマンドライン指定例: + +```powershell +--network_args "down_lr_weight=0.5,0.5,0.5,0.5,1.0,1.0,1.0,1.0,1.5,1.5,1.5,1.5" "mid_lr_weight=2.0" "up_lr_weight=1.5,1.5,1.5,1.5,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5" + +--network_args "block_lr_zero_threshold=0.1" "down_lr_weight=sine+.5" "mid_lr_weight=1.5" "up_lr_weight=cosine+.5" +``` + +### 階層別学習率tomlファイル指定例: + +```toml +network_args = [ "down_lr_weight=0.5,0.5,0.5,0.5,1.0,1.0,1.0,1.0,1.5,1.5,1.5,1.5", "mid_lr_weight=2.0", "up_lr_weight=1.5,1.5,1.5,1.5,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5",] + +network_args = [ "block_lr_zero_threshold=0.1", "down_lr_weight=sine+.5", "mid_lr_weight=1.5", "up_lr_weight=cosine+.5", ] +``` + +## 階層別dim (rank) + +フルモデルの25個のブロックのdim (rank)を指定できます。階層別学習率と同様に一部のブロックにはLoRAが存在しない場合がありますが、常に25個の値を指定してください。 + +`--network_args` で以下の引数を指定してください。 + +- `block_dims` : 各ブロックのdim (rank)を指定します。`"block_dims=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2"` のように25個の数値を指定します。 +- `block_alphas` : 各ブロックのalphaを指定します。block_dimsと同様に25個の数値を指定します。省略時はnetwork_alphaの値が使用されます。 +- `conv_block_dims` : LoRAをConv2d 3x3に拡張し、各ブロックのdim (rank)を指定します。 +- `conv_block_alphas` : LoRAをConv2d 3x3に拡張したときの各ブロックのalphaを指定します。省略時はconv_alphaの値が使用されます。 + +### 階層別dim (rank)コマンドライン指定例: + +```powershell +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" + +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" "conv_block_dims=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2" + +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" "block_alphas=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2" +``` + +### 階層別dim (rank)tomlファイル指定例: + +```toml +network_args = [ "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2",] + +network_args = [ "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2", "block_alphas=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2",] +``` + +# その他のスクリプト + +マージ等LoRAに関連するスクリプト群です。 + +## マージスクリプトについて + +merge_lora.pyでStable DiffusionのモデルにLoRAの学習結果をマージしたり、複数のLoRAモデルをマージしたりできます。 + +SDXL向けにはsdxl_merge_lora.pyを用意しています。オプション等は同一ですので、以下のmerge_lora.pyを読み替えてください。 + +### Stable DiffusionのモデルにLoRAのモデルをマージする + +マージ後のモデルは通常のStable Diffusionのckptと同様に扱えます。たとえば以下のようなコマンドラインになります。 + +``` +python networks\merge_lora.py --sd_model ..\model\model.ckpt + --save_to ..\lora_train1\model-char1-merged.safetensors + --models ..\lora_train1\last.safetensors --ratios 0.8 +``` + +Stable Diffusion v2.xのモデルで学習し、それにマージする場合は、--v2オプションを指定してください。 + +--sd_modelオプションにマージの元となるStable Diffusionのモデルファイルを指定します(.ckptまたは.safetensorsのみ対応で、Diffusersは今のところ対応していません)。 + +--save_toオプションにマージ後のモデルの保存先を指定します(.ckptまたは.safetensors、拡張子で自動判定)。 + +--modelsに学習したLoRAのモデルファイルを指定します。複数指定も可能で、その時は順にマージします。 + +--ratiosにそれぞれのモデルの適用率(どのくらい重みを元モデルに反映するか)を0~1.0の数値で指定します。例えば過学習に近いような場合は、適用率を下げるとマシになるかもしれません。モデルの数と同じだけ指定してください。 + +複数指定時は以下のようになります。 + +``` +python networks\merge_lora.py --sd_model ..\model\model.ckpt + --save_to ..\lora_train1\model-char1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors --ratios 0.8 0.5 +``` + +### 複数のLoRAのモデルをマージする + +--concatオプションを指定すると、複数のLoRAを単純に結合して新しいLoRAモデルを作成できます。ファイルサイズ(およびdim/rank)は指定したLoRAの合計サイズになります(マージ時にdim (rank)を変更する場合は `svd_merge_lora.py` を使用してください)。 + +たとえば以下のようなコマンドラインになります。 + +``` +python networks\merge_lora.py --save_precision bf16 + --save_to ..\lora_train1\model-char1-style1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors + --ratios 1.0 -1.0 --concat --shuffle +``` + +--concatオプションを指定します。 + +また--shuffleオプションを追加し、重みをシャッフルします。シャッフルしないとマージ後のLoRAから元のLoRAを取り出せるため、コピー機学習などの場合には学習元データが明らかになります。ご注意ください。 + +--save_toオプションにマージ後のLoRAモデルの保存先を指定します(.ckptまたは.safetensors、拡張子で自動判定)。 + +--modelsに学習したLoRAのモデルファイルを指定します。三つ以上も指定可能です。 + +--ratiosにそれぞれのモデルの比率(どのくらい重みを元モデルに反映するか)を0~1.0の数値で指定します。二つのモデルを一対一でマージする場合は、「0.5 0.5」になります。「1.0 1.0」では合計の重みが大きくなりすぎて、恐らく結果はあまり望ましくないものになると思われます。 + +v1で学習したLoRAとv2で学習したLoRA、rank(次元数)の異なるLoRAはマージできません。U-NetだけのLoRAとU-Net+Text EncoderのLoRAはマージできるはずですが、結果は未知数です。 + +### その他のオプション + +* precision + * マージ計算時の精度をfloat、fp16、bf16から指定できます。省略時は精度を確保するためfloatになります。メモリ使用量を減らしたい場合はfp16/bf16を指定してください。 +* save_precision + * モデル保存時の精度をfloat、fp16、bf16から指定できます。省略時はprecisionと同じ精度になります。 + +他にもいくつかのオプションがありますので、--helpで確認してください。 + +## 複数のrankが異なるLoRAのモデルをマージする + +複数のLoRAをひとつのLoRAで近似します(完全な再現はできません)。`svd_merge_lora.py`を用います。たとえば以下のようなコマンドラインになります。 + +``` +python networks\svd_merge_lora.py + --save_to ..\lora_train1\model-char1-style1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors + --ratios 0.6 0.4 --new_rank 32 --device cuda +``` + +`merge_lora.py` と主なオプションは同一です。以下のオプションが追加されています。 + +- `--new_rank` + - 作成するLoRAのrankを指定します。 +- `--new_conv_rank` + - 作成する Conv2d 3x3 LoRA の rank を指定します。省略時は `new_rank` と同じになります。 +- `--device` + - `--device cuda`としてcudaを指定すると計算をGPU上で行います。処理が速くなります。 + +## 当リポジトリ内の画像生成スクリプトで生成する + +gen_img_diffusers.pyに、--network_module、--network_weightsの各オプションを追加してください。意味は学習時と同様です。 + +--network_mulオプションで0~1.0の数値を指定すると、LoRAの適用率を変えられます。 + +## Diffusersのpipelineで生成する + +以下の例を参考にしてください。必要なファイルはnetworks/lora.pyのみです。Diffusersのバージョンは0.10.2以外では動作しない可能性があります。 + +```python +import torch +from diffusers import StableDiffusionPipeline +from networks.lora import LoRAModule, create_network_from_weights +from safetensors.torch import load_file + +# if the ckpt is CompVis based, convert it to Diffusers beforehand with tools/convert_diffusers20_original_sd.py. See --help for more details. + +model_id_or_dir = r"model_id_on_hugging_face_or_dir" +device = "cuda" + +# create pipe +print(f"creating pipe from {model_id_or_dir}...") +pipe = StableDiffusionPipeline.from_pretrained(model_id_or_dir, revision="fp16", torch_dtype=torch.float16) +pipe = pipe.to(device) +vae = pipe.vae +text_encoder = pipe.text_encoder +unet = pipe.unet + +# load lora networks +print(f"loading lora networks...") + +lora_path1 = r"lora1.safetensors" +sd = load_file(lora_path1) # If the file is .ckpt, use torch.load instead. +network1, sd = create_network_from_weights(0.5, None, vae, text_encoder,unet, sd) +network1.apply_to(text_encoder, unet) +network1.load_state_dict(sd) +network1.to(device, dtype=torch.float16) + +# # You can merge weights instead of apply_to+load_state_dict. network.set_multiplier does not work +# network.merge_to(text_encoder, unet, sd) + +lora_path2 = r"lora2.safetensors" +sd = load_file(lora_path2) +network2, sd = create_network_from_weights(0.7, None, vae, text_encoder,unet, sd) +network2.apply_to(text_encoder, unet) +network2.load_state_dict(sd) +network2.to(device, dtype=torch.float16) + +lora_path3 = r"lora3.safetensors" +sd = load_file(lora_path3) +network3, sd = create_network_from_weights(0.5, None, vae, text_encoder,unet, sd) +network3.apply_to(text_encoder, unet) +network3.load_state_dict(sd) +network3.to(device, dtype=torch.float16) + +# prompts +prompt = "masterpiece, best quality, 1girl, in white shirt, looking at viewer" +negative_prompt = "bad quality, worst quality, bad anatomy, bad hands" + +# exec pipe +print("generating image...") +with torch.autocast("cuda"): + image = pipe(prompt, guidance_scale=7.5, negative_prompt=negative_prompt).images[0] + +# if not merged, you can use set_multiplier +# network1.set_multiplier(0.8) +# and generate image again... + +# save image +image.save(r"by_diffusers..png") +``` + +## 二つのモデルの差分からLoRAモデルを作成する + +[こちらのディスカッション](https://github.com/cloneofsimo/lora/discussions/56)を参考に実装したものです。数式はそのまま使わせていただきました(よく理解していませんが近似には特異値分解を用いるようです)。 + +二つのモデル(たとえばfine tuningの元モデルとfine tuning後のモデル)の差分を、LoRAで近似します。 + +### スクリプトの実行方法 + +以下のように指定してください。 +``` +python networks\extract_lora_from_models.py --model_org base-model.ckpt + --model_tuned fine-tuned-model.ckpt + --save_to lora-weights.safetensors --dim 4 +``` + +--model_orgオプションに元のStable Diffusionモデルを指定します。作成したLoRAモデルを適用する場合は、このモデルを指定して適用することになります。.ckptまたは.safetensorsが指定できます。 + +--model_tunedオプションに差分を抽出する対象のStable Diffusionモデルを指定します。たとえばfine tuningやDreamBooth後のモデルを指定します。.ckptまたは.safetensorsが指定できます。 + +--save_toにLoRAモデルの保存先を指定します。--dimにLoRAの次元数を指定します。 + +生成されたLoRAモデルは、学習したLoRAモデルと同様に使用できます。 + +Text Encoderが二つのモデルで同じ場合にはLoRAはU-NetのみのLoRAとなります。 + +### その他のオプション + +- `--v2` + - v2.xのStable Diffusionモデルを使う場合に指定してください。 +- `--device` + - ``--device cuda``としてcudaを指定すると計算をGPU上で行います。処理が速くなります(CPUでもそこまで遅くないため、せいぜい倍~数倍程度のようです)。 +- `--save_precision` + - LoRAの保存形式を"float", "fp16", "bf16"から指定します。省略時はfloatになります。 +- `--conv_dim` + - 指定するとLoRAの適用範囲を Conv2d 3x3 へ拡大します。Conv2d 3x3 の rank を指定します。 + +## 画像リサイズスクリプト + +(のちほどドキュメントを整理しますがとりあえずここに説明を書いておきます。) + +Aspect Ratio Bucketingの機能拡張で、小さな画像については拡大しないでそのまま教師データとすることが可能になりました。元の教師画像を縮小した画像を、教師データに加えると精度が向上したという報告とともに前処理用のスクリプトをいただきましたので整備して追加しました。bmaltais氏に感謝します。 + +### スクリプトの実行方法 + +以下のように指定してください。元の画像そのまま、およびリサイズ後の画像が変換先フォルダに保存されます。リサイズ後の画像には、ファイル名に ``+512x512`` のようにリサイズ先の解像度が付け加えられます(画像サイズとは異なります)。リサイズ先の解像度より小さい画像は拡大されることはありません。 + +``` +python tools\resize_images_to_resolution.py --max_resolution 512x512,384x384,256x256 --save_as_png + --copy_associated_files 元画像フォルダ 変換先フォルダ +``` + +元画像フォルダ内の画像ファイルが、指定した解像度(複数指定可)と同じ面積になるようにリサイズされ、変換先フォルダに保存されます。画像以外のファイルはそのままコピーされます。 + +``--max_resolution`` オプションにリサイズ先のサイズを例のように指定してください。面積がそのサイズになるようにリサイズします。複数指定すると、それぞれの解像度でリサイズされます。``512x512,384x384,256x256``なら、変換先フォルダの画像は、元サイズとリサイズ後サイズ×3の計4枚になります。 + +``--save_as_png`` オプションを指定するとpng形式で保存します。省略するとjpeg形式(quality=100)で保存されます。 + +``--copy_associated_files`` オプションを指定すると、拡張子を除き画像と同じファイル名(たとえばキャプションなど)のファイルが、リサイズ後の画像のファイル名と同じ名前でコピーされます。 + + +### その他のオプション + +- divisible_by + - リサイズ後の画像のサイズ(縦、横のそれぞれ)がこの値で割り切れるように、画像中心を切り出します。 +- interpolation + - 縮小時の補完方法を指定します。``area, cubic, lanczos4``から選択可能で、デフォルトは``area``です。 + + +# 追加情報 + +## cloneofsimo氏のリポジトリとの違い + +2022/12/25時点では、当リポジトリはLoRAの適用個所をText EncoderのMLP、U-NetのFFN、Transformerのin/out projectionに拡大し、表現力が増しています。ただその代わりメモリ使用量は増え、8GBぎりぎりになりました。 + +またモジュール入れ替え機構は全く異なります。 + +## 将来拡張について + +LoRAだけでなく他の拡張にも対応可能ですので、それらも追加予定です。 diff --git a/docs/train_network_README-zh.md b/docs/train_network_README-zh.md new file mode 100644 index 0000000000000000000000000000000000000000..87a99153ab858cf3364d30306684baf08e8c923d --- /dev/null +++ b/docs/train_network_README-zh.md @@ -0,0 +1,466 @@ +# 关于LoRA的学习。 + +[LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685)(arxiv)、[LoRA](https://github.com/microsoft/LoRA)(github)这是应用于Stable Diffusion“稳定扩散”的内容。 + +[cloneofsimo先生的代码仓库](https://github.com/cloneofsimo/lora) 我们非常感謝您提供的参考。非常感謝。 + +通常情況下,LoRA只适用于Linear和Kernel大小为1x1的Conv2d,但也可以將其擴展到Kernel大小为3x3的Conv2d。 + +Conv2d 3x3的扩展最初是由 [cloneofsimo先生的代码仓库](https://github.com/cloneofsimo/lora) +而KohakuBlueleaf先生在[LoCon](https://github.com/KohakuBlueleaf/LoCon)中揭示了其有效性。我们深深地感谢KohakuBlueleaf先生。 + +看起来即使在8GB VRAM上也可以勉强运行。 + +请同时查看关于[学习的通用文档](./train_README-zh.md)。 +# 可学习的LoRA 类型 + +支持以下两种类型。以下是本仓库中自定义的名称。 + +1. __LoRA-LierLa__:(用于 __Li__ n __e__ a __r__ __La__ yers 的 LoRA,读作 "Liela") + + 适用于 Linear 和卷积层 Conv2d 的 1x1 Kernel 的 LoRA + +2. __LoRA-C3Lier__:(用于具有 3x3 Kernel 的卷积层和 __Li__ n __e__ a __r__ 层的 LoRA,读作 "Seria") + + 除了第一种类型外,还适用于 3x3 Kernel 的 Conv2d 的 LoRA + +与 LoRA-LierLa 相比,LoRA-C3Lier 可能会获得更高的准确性,因为它适用于更多的层。 + +在训练时,也可以使用 __DyLoRA__(将在后面介绍)。 + +## 请注意与所学模型相关的事项。 + +LoRA-LierLa可以用于AUTOMATIC1111先生的Web UI LoRA功能。 + +要使用LoRA-C3Liar并在Web UI中生成,请使用此处的[WebUI用extension](https://github.com/kohya-ss/sd-webui-additional-networks)。 + +在此存储库的脚本中,您还可以预先将经过训练的LoRA模型合并到Stable Diffusion模型中。 + +请注意,与cloneofsimo先生的存储库以及d8ahazard先生的[Stable-Diffusion-WebUI的Dreambooth扩展](https://github.com/d8ahazard/sd_dreambooth_extension)不兼容,因为它们进行了一些功能扩展(如下文所述)。 + +# 学习步骤 + +请先参考此存储库的README文件并进行环境设置。 + +## 准备数据 + +请参考 [关于准备学习数据](./train_README-zh.md)。 + +## 网络训练 + +使用`train_network.py`。 + +在`train_network.py`中,使用`--network_module`选项指定要训练的模块名称。对于LoRA模块,它应该是`network.lora`,请指定它。 + +请注意,学习率应该比通常的DreamBooth或fine tuning要高,建议指定为`1e-4`至`1e-3`左右。 + +以下是命令行示例。 + +``` +accelerate launch --num_cpu_threads_per_process 1 train_network.py + --pretrained_model_name_or_path=<.ckpt或.safetensord或Diffusers版模型目录> + --dataset_config=<数据集配置的.toml文件> + --output_dir=<训练过程中的模型输出文件夹> + --output_name=<训练模型输出时的文件名> + --save_model_as=safetensors + --prior_loss_weight=1.0 + --max_train_steps=400 + --learning_rate=1e-4 + --optimizer_type="AdamW8bit" + --xformers + --mixed_precision="fp16" + --cache_latents + --gradient_checkpointing + --save_every_n_epochs=1 + --network_module=networks.lora +``` + +在这个命令行中,LoRA-LierLa将会被训练。 + +LoRA的模型将会被保存在通过`--output_dir`选项指定的文件夹中。关于其他选项和优化器等,请参阅[学习的通用文档](./train_README-zh.md)中的“常用选项”。 + +此外,还可以指定以下选项: + +* `--network_dim` + * 指定LoRA的RANK(例如:`--network_dim=4`)。默认值为4。数值越大表示表现力越强,但需要更多的内存和时间来训练。而且不要盲目增加此数值。 +* `--network_alpha` + * 指定用于防止下溢并稳定训练的alpha值。默认值为1。如果与`network_dim`指定相同的值,则将获得与以前版本相同的行为。 +* `--persistent_data_loader_workers` + * 在Windows环境中指定可大幅缩短epoch之间的等待时间。 +* `--max_data_loader_n_workers` + * 指定数据读取进程的数量。进程数越多,数据读取速度越快,可以更有效地利用GPU,但会占用主存。默认值为“`8`或`CPU同步执行线程数-1`的最小值”,因此如果主存不足或GPU使用率超过90%,则应将这些数字降低到约`2`或`1`。 +* `--network_weights` + * 在训练之前读取预训练的LoRA权重,并在此基础上进行进一步的训练。 +* `--network_train_unet_only` + * 仅启用与U-Net相关的LoRA模块。在类似fine tuning的学习中指定此选项可能会很有用。 +* `--network_train_text_encoder_only` + * 仅启用与Text Encoder相关的LoRA模块。可能会期望Textual Inversion效果。 +* `--unet_lr` + * 当在U-Net相关的LoRA模块中使用与常规学习率(由`--learning_rate`选项指定)不同的学习率时,应指定此选项。 +* `--text_encoder_lr` + * 当在Text Encoder相关的LoRA模块中使用与常规学习率(由`--learning_rate`选项指定)不同的学习率时,应指定此选项。可能最好将Text Encoder的学习率稍微降低(例如5e-5)。 +* `--network_args` + * 可以指定多个参数。将在下面详细说明。 + +当未指定`--network_train_unet_only`和`--network_train_text_encoder_only`时(默认情况),将启用Text Encoder和U-Net的两个LoRA模块。 + +# 其他的学习方法 + +## 学习 LoRA-C3Lier + +请使用以下方式 + +``` +--network_args "conv_dim=4" +``` + +DyLoRA是在这篇论文中提出的[DyLoRA: Parameter Efficient Tuning of Pre-trained Models using Dynamic Search-Free Low-Rank Adaptation](​https://arxiv.org/abs/2210.07558), +[其官方实现可在这里找到](​https://github.com/huawei-noah/KD-NLP/tree/main/DyLoRA)。 + +根据论文,LoRA的rank并不是越高越好,而是需要根据模型、数据集、任务等因素来寻找合适的rank。使用DyLoRA,可以同时在指定的维度(rank)下学习多种rank的LoRA,从而省去了寻找最佳rank的麻烦。 + +本存储库的实现基于官方实现进行了自定义扩展(因此可能存在缺陷)。 + +### 本存储库DyLoRA的特点 + +DyLoRA训练后的模型文件与LoRA兼容。此外,可以从模型文件中提取多个低于指定维度(rank)的LoRA。 + +DyLoRA-LierLa和DyLoRA-C3Lier均可训练。 + +### 使用DyLoRA进行训练 + +请指定与DyLoRA相对应的`network.dylora`,例如 `--network_module=networks.dylora`。 + +此外,通过 `--network_args` 指定例如`--network_args "unit=4"`的参数。`unit`是划分rank的单位。例如,可以指定为`--network_dim=16 --network_args "unit=4"`。请将`unit`视为可以被`network_dim`整除的值(`network_dim`是`unit`的倍数)。 + +如果未指定`unit`,则默认为`unit=1`。 + +以下是示例说明。 + +``` +--network_module=networks.dylora --network_dim=16 --network_args "unit=4" + +--network_module=networks.dylora --network_dim=32 --network_alpha=16 --network_args "unit=4" +``` + +对于DyLoRA-C3Lier,需要在 `--network_args` 中指定 `conv_dim`,例如 `conv_dim=4`。与普通的LoRA不同,`conv_dim`必须与`network_dim`具有相同的值。以下是一个示例描述: + +``` +--network_module=networks.dylora --network_dim=16 --network_args "conv_dim=16" "unit=4" + +--network_module=networks.dylora --network_dim=32 --network_alpha=16 --network_args "conv_dim=32" "conv_alpha=16" "unit=8" +``` + +例如,当使用dim=16、unit=4(如下所述)进行学习时,可以学习和提取4个rank的LoRA,即4、8、12和16。通过在每个提取的模型中生成图像并进行比较,可以选择最佳rank的LoRA。 + +其他选项与普通的LoRA相同。 + +*`unit`是本存储库的独有扩展,在DyLoRA中,由于预计相比同维度(rank)的普通LoRA,学习时间更长,因此将分割单位增加。 + +### 从DyLoRA模型中提取LoRA模型 + +请使用`networks`文件夹中的`extract_lora_from_dylora.py`。指定`unit`单位后,从DyLoRA模型中提取LoRA模型。 + +例如,命令行如下: + +```powershell +python networks\extract_lora_from_dylora.py --model "foldername/dylora-model.safetensors" --save_to "foldername/dylora-model-split.safetensors" --unit 4 +``` + +`--model` 参数用于指定DyLoRA模型文件。`--save_to` 参数用于指定要保存提取的模型的文件名(rank值将附加到文件名中)。`--unit` 参数用于指定DyLoRA训练时的`unit`。 + +## 分层学习率 + +请参阅PR#355了解详细信息。 + +您可以指定完整模型的25个块的权重。虽然第一个块没有对应的LoRA,但为了与分层LoRA应用等的兼容性,将其设为25个。此外,如果不扩展到conv2d3x3,则某些块中可能不存在LoRA,但为了统一描述,请始终指定25个值。 + +请在 `--network_args` 中指定以下参数。 + +- `down_lr_weight`:指定U-Net down blocks的学习率权重。可以指定以下内容: + - 每个块的权重:指定12个数字,例如`"down_lr_weight=0,0,0,0,0,0,1,1,1,1,1,1"` + - 从预设中指定:例如`"down_lr_weight=sine"`(使用正弦曲线指定权重)。可以指定sine、cosine、linear、reverse_linear、zeros。另外,添加 `+数字` 时,可以将指定的数字加上(变为0.25〜1.25)。 +- `mid_lr_weight`:指定U-Net mid block的学习率权重。只需指定一个数字,例如 `"mid_lr_weight=0.5"`。 +- `up_lr_weight`:指定U-Net up blocks的学习率权重。与down_lr_weight相同。 +- 省略指定的部分将被视为1.0。另外,如果将权重设为0,则不会创建该块的LoRA模块。 +- `block_lr_zero_threshold`:如果权重小于此值,则不会创建LoRA模块。默认值为0。 + +### 分层学习率命令行指定示例: + + +```powershell +--network_args "down_lr_weight=0.5,0.5,0.5,0.5,1.0,1.0,1.0,1.0,1.5,1.5,1.5,1.5" "mid_lr_weight=2.0" "up_lr_weight=1.5,1.5,1.5,1.5,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5" + +--network_args "block_lr_zero_threshold=0.1" "down_lr_weight=sine+.5" "mid_lr_weight=1.5" "up_lr_weight=cosine+.5" +``` + +### Hierarchical Learning Rate指定的toml文件示例: + +```toml +network_args = [ "down_lr_weight=0.5,0.5,0.5,0.5,1.0,1.0,1.0,1.0,1.5,1.5,1.5,1.5", "mid_lr_weight=2.0", "up_lr_weight=1.5,1.5,1.5,1.5,1.0,1.0,1.0,1.0,0.5,0.5,0.5,0.5",] + +network_args = [ "block_lr_zero_threshold=0.1", "down_lr_weight=sine+.5", "mid_lr_weight=1.5", "up_lr_weight=cosine+.5", ] +``` + +## 层次结构维度(rank) + +您可以指定完整模型的25个块的维度(rank)。与分层学习率一样,某些块可能不存在LoRA,但请始终指定25个值。 + +请在 `--network_args` 中指定以下参数: + +- `block_dims`:指定每个块的维度(rank)。指定25个数字,例如 `"block_dims=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2"`。 +- `block_alphas`:指定每个块的alpha。与block_dims一样,指定25个数字。如果省略,将使用network_alpha的值。 +- `conv_block_dims`:将LoRA扩展到Conv2d 3x3,并指定每个块的维度(rank)。 +- `conv_block_alphas`:在将LoRA扩展到Conv2d 3x3时指定每个块的alpha。如果省略,将使用conv_alpha的值。 + +### 层次结构维度(rank)命令行指定示例: + + +```powershell +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" + +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" "conv_block_dims=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2" + +--network_args "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2" "block_alphas=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2" +``` + +### 层级别dim(rank) toml文件指定示例: + +```toml +network_args = [ "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2",] + +network_args = [ "block_dims=2,4,4,4,8,8,8,8,12,12,12,12,16,12,12,12,12,8,8,8,8,4,4,4,2", "block_alphas=2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2",] +``` + +# Other scripts +这些是与LoRA相关的脚本,如合并脚本等。 + +关于合并脚本 +您可以使用merge_lora.py脚本将LoRA的训练结果合并到稳定扩散模型中,也可以将多个LoRA模型合并。 + +合并到稳定扩散模型中的LoRA模型 +合并后的模型可以像常规的稳定扩散ckpt一样使用。例如,以下是一个命令行示例: + +``` +python networks\merge_lora.py --sd_model ..\model\model.ckpt + --save_to ..\lora_train1\model-char1-merged.safetensors + --models ..\lora_train1\last.safetensors --ratios 0.8 +``` + +请使用 Stable Diffusion v2.x 模型进行训练并进行合并时,需要指定--v2选项。 + +使用--sd_model选项指定要合并的 Stable Diffusion 模型文件(仅支持 .ckpt 或 .safetensors 格式,目前不支持 Diffusers)。 + +使用--save_to选项指定合并后模型的保存路径(根据扩展名自动判断为 .ckpt 或 .safetensors)。 + +使用--models选项指定已训练的 LoRA 模型文件,也可以指定多个,然后按顺序进行合并。 + +使用--ratios选项以0~1.0的数字指定每个模型的应用率(将多大比例的权重反映到原始模型中)。例如,在接近过度拟合的情况下,降低应用率可能会使结果更好。请指定与模型数量相同的比率。 + +当指定多个模型时,格式如下: + + +``` +python networks\merge_lora.py --sd_model ..\model\model.ckpt + --save_to ..\lora_train1\model-char1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors --ratios 0.8 0.5 +``` + +### 将多个LoRA模型合并 + +将多个LoRA模型逐个应用于SD模型与将多个LoRA模型合并后再应用于SD模型之间,由于计算顺序的不同,会得到微妙不同的结果。 + +例如,下面是一个命令行示例: + +``` +python networks\merge_lora.py + --save_to ..\lora_train1\model-char1-style1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors --ratios 0.6 0.4 +``` + +--sd_model选项不需要指定。 + +通过--save_to选项指定合并后的LoRA模型的保存位置(.ckpt或.safetensors,根据扩展名自动识别)。 + +通过--models选项指定学习的LoRA模型文件。可以指定三个或更多。 + +通过--ratios选项以0~1.0的数字指定每个模型的比率(反映多少权重来自原始模型)。如果将两个模型一对一合并,则比率将是“0.5 0.5”。如果比率为“1.0 1.0”,则总重量将过大,可能会产生不理想的结果。 + +在v1和v2中学习的LoRA,以及rank(维数)或“alpha”不同的LoRA不能合并。仅包含U-Net的LoRA和包含U-Net+文本编码器的LoRA可以合并,但结果未知。 + +### 其他选项 + +* 精度 + * 可以从float、fp16或bf16中选择合并计算时的精度。默认为float以保证精度。如果想减少内存使用量,请指定fp16/bf16。 +* save_precision + * 可以从float、fp16或bf16中选择在保存模型时的精度。默认与精度相同。 + +## 合并多个维度不同的LoRA模型 + +将多个LoRA近似为一个LoRA(无法完全复制)。使用'svd_merge_lora.py'。例如,以下是命令行的示例。 +``` +python networks\svd_merge_lora.py + --save_to ..\lora_train1\model-char1-style1-merged.safetensors + --models ..\lora_train1\last.safetensors ..\lora_train2\last.safetensors + --ratios 0.6 0.4 --new_rank 32 --device cuda +``` +`merge_lora.py`和主要选项相同。以下选项已添加: + +- `--new_rank` + - 指定要创建的LoRA rank。 +- `--new_conv_rank` + - 指定要创建的Conv2d 3x3 LoRA的rank。如果省略,则与`new_rank`相同。 +- `--device` + - 如果指定为`--device cuda`,则在GPU上执行计算。处理速度将更快。 + +## 在此存储库中生成图像的脚本中 + +请在`gen_img_diffusers.py`中添加`--network_module`和`--network_weights`选项。其含义与训练时相同。 + +通过`--network_mul`选项,可以指定0~1.0的数字来改变LoRA的应用率。 + +## 请参考以下示例,在Diffusers的pipeline中生成。 + +所需文件仅为networks/lora.py。请注意,该示例只能在Diffusers版本0.10.2中正常运行。 + +```python +import torch +from diffusers import StableDiffusionPipeline +from networks.lora import LoRAModule, create_network_from_weights +from safetensors.torch import load_file + +# if the ckpt is CompVis based, convert it to Diffusers beforehand with tools/convert_diffusers20_original_sd.py. See --help for more details. + +model_id_or_dir = r"model_id_on_hugging_face_or_dir" +device = "cuda" + +# create pipe +print(f"creating pipe from {model_id_or_dir}...") +pipe = StableDiffusionPipeline.from_pretrained(model_id_or_dir, revision="fp16", torch_dtype=torch.float16) +pipe = pipe.to(device) +vae = pipe.vae +text_encoder = pipe.text_encoder +unet = pipe.unet + +# load lora networks +print(f"loading lora networks...") + +lora_path1 = r"lora1.safetensors" +sd = load_file(lora_path1) # If the file is .ckpt, use torch.load instead. +network1, sd = create_network_from_weights(0.5, None, vae, text_encoder,unet, sd) +network1.apply_to(text_encoder, unet) +network1.load_state_dict(sd) +network1.to(device, dtype=torch.float16) + +# # You can merge weights instead of apply_to+load_state_dict. network.set_multiplier does not work +# network.merge_to(text_encoder, unet, sd) + +lora_path2 = r"lora2.safetensors" +sd = load_file(lora_path2) +network2, sd = create_network_from_weights(0.7, None, vae, text_encoder,unet, sd) +network2.apply_to(text_encoder, unet) +network2.load_state_dict(sd) +network2.to(device, dtype=torch.float16) + +lora_path3 = r"lora3.safetensors" +sd = load_file(lora_path3) +network3, sd = create_network_from_weights(0.5, None, vae, text_encoder,unet, sd) +network3.apply_to(text_encoder, unet) +network3.load_state_dict(sd) +network3.to(device, dtype=torch.float16) + +# prompts +prompt = "masterpiece, best quality, 1girl, in white shirt, looking at viewer" +negative_prompt = "bad quality, worst quality, bad anatomy, bad hands" + +# exec pipe +print("generating image...") +with torch.autocast("cuda"): + image = pipe(prompt, guidance_scale=7.5, negative_prompt=negative_prompt).images[0] + +# if not merged, you can use set_multiplier +# network1.set_multiplier(0.8) +# and generate image again... + +# save image +image.save(r"by_diffusers..png") +``` + +## 从两个模型的差异中创建LoRA模型。 + +[参考讨论链接](https://github.com/cloneofsimo/lora/discussions/56)這是參考實現的結果。數學公式沒有改變(我並不完全理解,但似乎使用奇異值分解進行了近似)。 + +将两个模型(例如微调原始模型和微调后的模型)的差异近似为LoRA。 + +### 脚本执行方法 + +请按以下方式指定。 + +``` +python networks\extract_lora_from_models.py --model_org base-model.ckpt + --model_tuned fine-tuned-model.ckpt + --save_to lora-weights.safetensors --dim 4 +``` + +--model_org 选项指定原始的Stable Diffusion模型。如果要应用创建的LoRA模型,则需要指定该模型并将其应用。可以指定.ckpt或.safetensors文件。 + +--model_tuned 选项指定要提取差分的目标Stable Diffusion模型。例如,可以指定经过Fine Tuning或DreamBooth后的模型。可以指定.ckpt或.safetensors文件。 + +--save_to 指定LoRA模型的保存路径。--dim指定LoRA的维数。 + +生成的LoRA模型可以像已训练的LoRA模型一样使用。 + +当两个模型的文本编码器相同时,LoRA将成为仅包含U-Net的LoRA。 + +### 其他选项 + +- `--v2` + - 如果使用v2.x的稳定扩散模型,请指定此选项。 +- `--device` + - 指定为 ``--device cuda`` 可在GPU上执行计算。这会使处理速度更快(即使在CPU上也不会太慢,大约快几倍)。 +- `--save_precision` + - 指定LoRA的保存格式为“float”、“fp16”、“bf16”。如果省略,将使用float。 +- `--conv_dim` + - 指定后,将扩展LoRA的应用范围到Conv2d 3x3。指定Conv2d 3x3的rank。 + - +## 图像大小调整脚本 + +(稍后将整理文件,但现在先在这里写下说明。) + +在 Aspect Ratio Bucketing 的功能扩展中,现在可以将小图像直接用作教师数据,而无需进行放大。我收到了一个用于前处理的脚本,其中包括将原始教师图像缩小的图像添加到教师数据中可以提高准确性的报告。我整理了这个脚本并加入了感谢 bmaltais 先生。 + +### 执行脚本的方法如下。 +原始图像以及调整大小后的图像将保存到转换目标文件夹中。调整大小后的图像将在文件名中添加“+512x512”之类的调整后的分辨率(与图像大小不同)。小于调整大小后分辨率的图像将不会被放大。 + +``` +python tools\resize_images_to_resolution.py --max_resolution 512x512,384x384,256x256 --save_as_png + --copy_associated_files 源图像文件夹目标文件夹 +``` + +在元画像文件夹中的图像文件将被调整大小以达到指定的分辨率(可以指定多个),并保存到目标文件夹中。除图像外的文件将被保留为原样。 + +请使用“--max_resolution”选项指定调整大小后的大小,使其达到指定的面积大小。如果指定多个,则会在每个分辨率上进行调整大小。例如,“512x512,384x384,256x256”将使目标文件夹中的图像变为原始大小和调整大小后的大小×3共计4张图像。 + +如果使用“--save_as_png”选项,则会以PNG格式保存。如果省略,则默认以JPEG格式(quality=100)保存。 + +如果使用“--copy_associated_files”选项,则会将与图像相同的文件名(例如标题等)的文件复制到调整大小后的图像文件的文件名相同的位置,但不包括扩展名。 + +### 其他选项 + +- divisible_by + - 将图像中心裁剪到能够被该值整除的大小(分别是垂直和水平的大小),以便调整大小后的图像大小可以被该值整除。 +- interpolation + - 指定缩小时的插值方法。可从``area、cubic、lanczos4``中选择,默认为``area``。 + + +# 追加信息 + +## 与cloneofsimo的代码库的区别 + +截至2022年12月25日,本代码库将LoRA应用扩展到了Text Encoder的MLP、U-Net的FFN以及Transformer的输入/输出投影中,从而增强了表现力。但是,内存使用量增加了,接近了8GB的限制。 + +此外,模块交换机制也完全不同。 + +## 关于未来的扩展 + +除了LoRA之外,我们还计划添加其他扩展,以支持更多的功能。 diff --git a/docs/train_ti_README-ja.md b/docs/train_ti_README-ja.md new file mode 100644 index 0000000000000000000000000000000000000000..c3f8e8cf64c303c86527f9f574409e14c3d59481 --- /dev/null +++ b/docs/train_ti_README-ja.md @@ -0,0 +1,105 @@ +[Textual Inversion](https://textual-inversion.github.io/) の学習についての説明です。 + +[学習についての共通ドキュメント](./train_README-ja.md) もあわせてご覧ください。 + +実装に当たっては https://github.com/huggingface/diffusers/tree/main/examples/textual_inversion を大いに参考にしました。 + +学習したモデルはWeb UIでもそのまま使えます。 + +# 学習の手順 + +あらかじめこのリポジトリのREADMEを参照し、環境整備を行ってください。 + +## データの準備 + +[学習データの準備について](./train_README-ja.md) を参照してください。 + +## 学習の実行 + +``train_textual_inversion.py`` を用います。以下はコマンドラインの例です(DreamBooth手法)。 + +``` +accelerate launch --num_cpu_threads_per_process 1 train_textual_inversion.py + --dataset_config=<データ準備で作成した.tomlファイル> + --output_dir=<学習したモデルの出力先フォルダ> + --output_name=<学習したモデル出力時のファイル名> + --save_model_as=safetensors + --prior_loss_weight=1.0 + --max_train_steps=1600 + --learning_rate=1e-6 + --optimizer_type="AdamW8bit" + --xformers + --mixed_precision="fp16" + --cache_latents + --gradient_checkpointing + --token_string=mychar4 --init_word=cute --num_vectors_per_token=4 +``` + +``--token_string`` に学習時のトークン文字列を指定します。__学習時のプロンプトは、この文字列を含むようにしてください(token_stringがmychar4なら、``mychar4 1girl`` など)__。プロンプトのこの文字列の部分が、Textual Inversionの新しいtokenに置換されて学習されます。DreamBooth, class+identifier形式のデータセットとして、`token_string` をトークン文字列にするのが最も簡単で確実です。 + +プロンプトにトークン文字列が含まれているかどうかは、``--debug_dataset`` で置換後のtoken idが表示されますので、以下のように ``49408`` 以降のtokenが存在するかどうかで確認できます。 + +``` +input ids: tensor([[49406, 49408, 49409, 49410, 49411, 49412, 49413, 49414, 49415, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, 49407, + 49407, 49407, 49407, 49407, 49407, 49407, 49407]]) +``` + +tokenizerがすでに持っている単語(一般的な単語)は使用できません。 + +``--init_word`` にembeddingsを初期化するときのコピー元トークンの文字列を指定します。学ばせたい概念が近いものを選ぶとよいようです。二つ以上のトークンになる文字列は指定できません。 + +``--num_vectors_per_token`` にいくつのトークンをこの学習で使うかを指定します。多いほうが表現力が増しますが、その分多くのトークンを消費します。たとえばnum_vectors_per_token=8の場合、指定したトークン文字列は(一般的なプロンプトの77トークン制限のうち)8トークンを消費します。 + +以上がTextual Inversionのための主なオプションです。以降は他の学習スクリプトと同様です。 + +`num_cpu_threads_per_process` には通常は1を指定するとよいようです。 + +`pretrained_model_name_or_path` に追加学習を行う元となるモデルを指定します。Stable Diffusionのcheckpointファイル(.ckptまたは.safetensors)、Diffusersのローカルディスクにあるモデルディレクトリ、DiffusersのモデルID("stabilityai/stable-diffusion-2"など)が指定できます。 + +`output_dir` に学習後のモデルを保存するフォルダを指定します。`output_name` にモデルのファイル名を拡張子を除いて指定します。`save_model_as` でsafetensors形式での保存を指定しています。 + +`dataset_config` に `.toml` ファイルを指定します。ファイル内でのバッチサイズ指定は、当初はメモリ消費を抑えるために `1` としてください。 + +学習させるステップ数 `max_train_steps` を10000とします。学習率 `learning_rate` はここでは5e-6を指定しています。 + +省メモリ化のため `mixed_precision="fp16"` を指定します(RTX30 シリーズ以降では `bf16` も指定できます。環境整備時にaccelerateに行った設定と合わせてください)。また `gradient_checkpointing` を指定します。 + +オプティマイザ(モデルを学習データにあうように最適化=学習させるクラス)にメモリ消費の少ない 8bit AdamW を使うため、 `optimizer_type="AdamW8bit"` を指定します。 + +`xformers` オプションを指定し、xformersのCrossAttentionを用います。xformersをインストールしていない場合やエラーとなる場合(環境にもよりますが `mixed_precision="no"` の場合など)、代わりに `mem_eff_attn` オプションを指定すると省メモリ版CrossAttentionを使用します(速度は遅くなります)。 + +ある程度メモリがある場合は、`.toml` ファイルを編集してバッチサイズをたとえば `8` くらいに増やしてください(高速化と精度向上の可能性があります)。 + +### よく使われるオプションについて + +以下の場合にはオプションに関するドキュメントを参照してください。 + +- Stable Diffusion 2.xまたはそこからの派生モデルを学習する +- clip skipを2以上を前提としたモデルを学習する +- 75トークンを超えたキャプションで学習する + +### Textual Inversionでのバッチサイズについて + +モデル全体を学習するDreamBoothやfine tuningに比べてメモリ使用量が少ないため、バッチサイズは大きめにできます。 + +# Textual Inversionのその他の主なオプション + +すべてのオプションについては別文書を参照してください。 + +* `--weights` + * 学習前に学習済みのembeddingsを読み込み、そこから追加で学習します。 +* `--use_object_template` + * キャプションではなく既定の物体用テンプレート文字列(``a photo of a {}``など)で学習します。公式実装と同じになります。キャプションは無視されます。 +* `--use_style_template` + * キャプションではなく既定のスタイル用テンプレート文字列で学習します(``a painting in the style of {}``など)。公式実装と同じになります。キャプションは無視されます。 + +## 当リポジトリ内の画像生成スクリプトで生成する + +gen_img_diffusers.pyに、``--textual_inversion_embeddings`` オプションで学習したembeddingsファイルを指定してください(複数可)。プロンプトでembeddingsファイルのファイル名(拡張子を除く)を使うと、そのembeddingsが適用されます。 + diff --git a/examples/LoRA based finetuning 2 phase.ps1 b/examples/LoRA based finetuning 2 phase.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..d5b0ddb8f27bc5f575459104eabbb9a6dcded6c0 --- /dev/null +++ b/examples/LoRA based finetuning 2 phase.ps1 @@ -0,0 +1,12 @@ +$pretrainedModel = "D:\models\sdxl\nsfw_v1.0_00002_.safetensors" +$trainDataDir = "D:\dataset\harold\img" +$loggingDir = "D:\dataset\harold\lora\sdxl-logs" +$outputName = "harold_v1.0a" +$outputDir = "d:\lycoris\sdxl" + +$networkWeights = Join-Path -Path $outputDir -ChildPath "$outputName.safetensors" +$outputName2 = "$outputName" + "e2" + +accelerate launch --num_cpu_threads_per_process=2 "./sdxl_train_network.py" --enable_bucket --pretrained_model_name_or_path="$pretrainedModel" --train_data_dir="$trainDataDir" --resolution="1024,1024" --output_dir="$outputDir" --logging_dir="$loggingDir" --network_alpha="256" --training_comment="trigger words: " --save_model_as=safetensors --network_module=networks.lora --unet_lr=1e-05 --network_train_unet_only --network_dim=256 --output_name="$outputName" --lr_scheduler_num_cycles="1" --scale_weight_norms="1" --network_dropout="0.1" --cache_text_encoder_outputs --no_half_vae --lr_scheduler="cosine" --train_batch_size="4" --max_train_steps="40" --save_every_n_epochs="10" --mixed_precision="bf16" --save_precision="bf16" --seed="17415" --caption_extension=".txt" --cache_latents --cache_latents_to_disk --optimizer_type="AdamW" --optimizer_args weight_decay=0.05 betas=0.9,0.98 --max_train_epochs="10" --max_data_loader_n_workers="0" --keep_tokens="1" --bucket_reso_steps=32 --min_snr_gamma=5 --gradient_checkpointing --xformers --bucket_no_upscale --noise_offset=0.0357 --adaptive_noise_scale=0.00357 --log_prefix=xl-loha + +accelerate launch --num_cpu_threads_per_process=2 "./sdxl_train_network.py" --enable_bucket --pretrained_model_name_or_path="$pretrainedModel" --train_data_dir="$trainDataDir" --resolution="1024,1024" --output_dir="$outputDir" --logging_dir="$loggingDir" --network_alpha="256" --training_comment="trigger: portrait" --save_model_as=safetensors --network_module=networks.lora --unet_lr=1e-05 --network_train_unet_only --network_dim=256 --network_weights="$networkWeights" --output_name="$outputName2" --lr_scheduler_num_cycles="1" --scale_weight_norms="1" --network_dropout="0.1" --cache_text_encoder_outputs --no_half_vae --lr_scheduler="constant" --train_batch_size="1" --max_train_steps="16" --save_every_n_epochs="1" --mixed_precision="bf16" --save_precision="bf16" --seed="17415" --caption_extension=".txt" --cache_latents --cache_latents_to_disk --optimizer_type="AdamW" --optimizer_args weight_decay=0.05 betas=0.9,0.98 --max_train_epochs="1" --max_data_loader_n_workers="0" --keep_tokens="1" --bucket_reso_steps=32 --min_snr_gamma=5 --gradient_checkpointing --xformers --bucket_no_upscale --noise_offset=0.0357 --adaptive_noise_scale=0.00357 --log_prefix=xl-loha diff --git a/examples/caption.ps1 b/examples/caption.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..ec6cd8a48cc3928bca79d56e37bcb37b3b436014 --- /dev/null +++ b/examples/caption.ps1 @@ -0,0 +1,14 @@ +# This powershell script will create a text file for each files in the folder +# +# Useful to create base caption that will be augmented on a per image basis + +$folder = "D:\some\folder\location\" +$file_pattern="*.*" +$caption_text="some caption text" + +$files = Get-ChildItem $folder$file_pattern -Include *.png, *.jpg, *.webp -File +foreach ($file in $files) { + if (-not(Test-Path -Path $folder\"$($file.BaseName).txt" -PathType Leaf)) { + New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $caption_text + } +} \ No newline at end of file diff --git a/examples/caption_subfolders.ps1 b/examples/caption_subfolders.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..2c0f213eb5edbc1efd7c59325740f65c4655317d --- /dev/null +++ b/examples/caption_subfolders.ps1 @@ -0,0 +1,20 @@ +# This powershell script will create a text file for each files in the folder +# +# Useful to create base caption that will be augmented on a per image basis + +$folder = "D:\test\t2\" +$file_pattern="*.*" +$text_fir_file="bigeyes style" + +foreach ($file in Get-ChildItem $folder\$file_pattern -File) +{ + New-Item -ItemType file -Path $folder -Name "$($file.BaseName).txt" -Value $text_fir_file +} + +foreach($directory in Get-ChildItem -path $folder -Directory) +{ + foreach ($file in Get-ChildItem $folder\$directory\$file_pattern) + { + New-Item -ItemType file -Path $folder\$directory -Name "$($file.BaseName).txt" -Value $text_fir_file + } +} diff --git a/examples/finetune_latent.ps1 b/examples/finetune_latent.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..307f974f0c34fcbd39c01b4a96b2ae50ac2a5158 --- /dev/null +++ b/examples/finetune_latent.ps1 @@ -0,0 +1,11 @@ +# Command 1: merge_captions_to_metadata.py +$captionExtension = "--caption_extension=.txt" +$sourceDir1 = "d:\test\1_1960-1969" +$targetFile1 = "d:\test\1_1960-1969/meta_cap.json" + +# Command 2: prepare_buckets_latents.py +$targetLatentFile = "d:\test\1_1960-1969/meta_lat.json" +$modelFile = "E:\models\sdxl\sd_xl_base_0.9.safetensors" + +./venv/Scripts/python.exe finetune/merge_captions_to_metadata.py $captionExtension $sourceDir1 $targetFile1 --full_path +./venv/Scripts/python.exe finetune/prepare_buckets_latents.py $sourceDir1 $targetFile1 $targetLatentFile $modelFile --batch_size=4 --max_resolution=1024,1024 --min_bucket_reso=64 --max_bucket_reso=2048 --mixed_precision=bf16 --full_path diff --git a/examples/kohya-1-folders.ps1 b/examples/kohya-1-folders.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..71009ab008bf5ced53ab0e480143e8ef88f7c0ff --- /dev/null +++ b/examples/kohya-1-folders.ps1 @@ -0,0 +1,87 @@ +# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape, +# portrait and square images. +# +# Adjust the script to your own needs + +# Sylvia Ritter +# variable values +$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt" +$data_dir = "D:\test\squat" +$train_dir = "D:\test\" +$resolution = "512,512" + +$image_num = Get-ChildItem $data_dir -Recurse -File -Include *.png | Measure-Object | %{$_.Count} + +Write-Output "image_num: $image_num" + +$learning_rate = 1e-6 +$dataset_repeats = 40 +$train_batch_size = 8 +$epoch = 1 +$save_every_n_epochs=1 +$mixed_precision="fp16" +$num_cpu_threads_per_process=6 + +# You should not have to change values past this point + +$output_dir = $train_dir + "\model" +$repeats = $image_num * $dataset_repeats +$mts = [Math]::Ceiling($repeats / $train_batch_size * $epoch) + +Write-Output "Repeats: $repeats" + +.\venv\Scripts\activate + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --train_data_dir=$data_dir ` + --output_dir=$output_dir ` + --resolution=$resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_precision="fp16" + +# 2nd pass at half the dataset repeat value + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$output_dir"\last.ckpt" ` + --train_data_dir=$data_dir ` + --output_dir=$output_dir"2" ` + --resolution=$resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_precision="fp16" + + accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$output_dir"\last.ckpt" ` + --train_data_dir=$data_dir ` + --output_dir=$output_dir"2" ` + --resolution=$resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_precision="fp16" + \ No newline at end of file diff --git a/examples/kohya-3-folders.ps1 b/examples/kohya-3-folders.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..b3bfb6c275cbefb97a62124b7810036f549106ac --- /dev/null +++ b/examples/kohya-3-folders.ps1 @@ -0,0 +1,154 @@ +# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape, +# portrait and square images. +# +# Adjust the script to your own needs + +# Sylvia Ritter +# variable values +$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt" +$train_dir = "D:\dreambooth\train_sylvia_ritter\raw_data" + +$landscape_image_num = 4 +$portrait_image_num = 25 +$square_image_num = 2 + +$learning_rate = 1e-6 +$dataset_repeats = 120 +$train_batch_size = 4 +$epoch = 1 +$save_every_n_epochs=1 +$mixed_precision="fp16" +$num_cpu_threads_per_process=6 + +$landscape_folder_name = "landscape-pp" +$landscape_resolution = "832,512" +$portrait_folder_name = "portrait-pp" +$portrait_resolution = "448,896" +$square_folder_name = "square-pp" +$square_resolution = "512,512" + +# You should not have to change values past this point + +$landscape_data_dir = $train_dir + "\" + $landscape_folder_name +$portrait_data_dir = $train_dir + "\" + $portrait_folder_name +$square_data_dir = $train_dir + "\" + $square_folder_name +$landscape_output_dir = $train_dir + "\model-l" +$portrait_output_dir = $train_dir + "\model-lp" +$square_output_dir = $train_dir + "\model-lps" + +$landscape_repeats = $landscape_image_num * $dataset_repeats +$portrait_repeats = $portrait_image_num * $dataset_repeats +$square_repeats = $square_image_num * $dataset_repeats + +$landscape_mts = [Math]::Ceiling($landscape_repeats / $train_batch_size * $epoch) +$portrait_mts = [Math]::Ceiling($portrait_repeats / $train_batch_size * $epoch) +$square_mts = [Math]::Ceiling($square_repeats / $train_batch_size * $epoch) + +# Write-Output $landscape_repeats + +.\venv\Scripts\activate + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --train_data_dir=$landscape_data_dir ` + --output_dir=$landscape_output_dir ` + --resolution=$landscape_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$landscape_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_precision="fp16" + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" ` + --train_data_dir=$portrait_data_dir ` + --output_dir=$portrait_output_dir ` + --resolution=$portrait_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$portrait_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_precision="fp16" + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" ` + --train_data_dir=$square_data_dir ` + --output_dir=$square_output_dir ` + --resolution=$square_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$square_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_precision="fp16" + +# 2nd pass at half the dataset repeat value + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$square_output_dir"\last.ckpt" ` + --train_data_dir=$landscape_data_dir ` + --output_dir=$landscape_output_dir"2" ` + --resolution=$landscape_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($landscape_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_precision="fp16" + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" ` + --train_data_dir=$portrait_data_dir ` + --output_dir=$portrait_output_dir"2" ` + --resolution=$portrait_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($portrait_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_precision="fp16" + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" ` + --train_data_dir=$square_data_dir ` + --output_dir=$square_output_dir"2" ` + --resolution=$square_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($square_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_precision="fp16" + \ No newline at end of file diff --git a/examples/kohya.ps1 b/examples/kohya.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..2f2776f181e64574b90d5da64f2a448a5bcb5509 --- /dev/null +++ b/examples/kohya.ps1 @@ -0,0 +1,154 @@ +# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape, +# portrait and square images. +# +# Adjust the script to your own needs + +# Sylvia Ritter +# variable values +$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt" +$train_dir = "D:\dreambooth\train_sylvia_ritter\raw_data" + +$landscape_image_num = 4 +$portrait_image_num = 25 +$square_image_num = 2 + +$learning_rate = 1e-6 +$dataset_repeats = 120 +$train_batch_size = 4 +$epoch = 1 +$save_every_n_epochs=1 +$mixed_precision="fp16" +$num_cpu_threads_per_process=6 + +$landscape_folder_name = "landscape-pp" +$landscape_resolution = "832,512" +$portrait_folder_name = "portrait-pp" +$portrait_resolution = "448,896" +$square_folder_name = "square-pp" +$square_resolution = "512,512" + +# You should not have to change values past this point + +$landscape_data_dir = $train_dir + "\" + $landscape_folder_name +$portrait_data_dir = $train_dir + "\" + $portrait_folder_name +$square_data_dir = $train_dir + "\" + $square_folder_name +$landscape_output_dir = $train_dir + "\model-l" +$portrait_output_dir = $train_dir + "\model-lp" +$square_output_dir = $train_dir + "\model-lps" + +$landscape_repeats = $landscape_image_num * $dataset_repeats +$portrait_repeats = $portrait_image_num * $dataset_repeats +$square_repeats = $square_image_num * $dataset_repeats + +$landscape_mts = [Math]::Ceiling($landscape_repeats / $train_batch_size * $epoch) +$portrait_mts = [Math]::Ceiling($portrait_repeats / $train_batch_size * $epoch) +$square_mts = [Math]::Ceiling($square_repeats / $train_batch_size * $epoch) + +# Write-Output $landscape_repeats + +.\venv\Scripts\activate + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --train_data_dir=$landscape_data_dir ` + --output_dir=$landscape_output_dir ` + --resolution=$landscape_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$landscape_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_half + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$landscape_output_dir"\last.ckpt" ` + --train_data_dir=$portrait_data_dir ` + --output_dir=$portrait_output_dir ` + --resolution=$portrait_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$portrait_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_half + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$portrait_output_dir"\last.ckpt" ` + --train_data_dir=$square_data_dir ` + --output_dir=$square_output_dir ` + --resolution=$square_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$square_mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$dataset_repeats ` + --save_half + +# 2nd pass at half the dataset repeat value + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$square_output_dir"\last.ckpt" ` + --train_data_dir=$landscape_data_dir ` + --output_dir=$landscape_output_dir"2" ` + --resolution=$landscape_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($landscape_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_half + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$landscape_output_dir"2\last.ckpt" ` + --train_data_dir=$portrait_data_dir ` + --output_dir=$portrait_output_dir"2" ` + --resolution=$portrait_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($portrait_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_half + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --pretrained_model_name_or_path=$portrait_output_dir"2\last.ckpt" ` + --train_data_dir=$square_data_dir ` + --output_dir=$square_output_dir"2" ` + --resolution=$square_resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($square_mts/2)) ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --cache_latents ` + --save_every_n_epochs=$save_every_n_epochs ` + --fine_tuning ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats/2)) ` + --save_half + \ No newline at end of file diff --git a/examples/kohya_finetune.ps1 b/examples/kohya_finetune.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..b8ee228bd7337df93cdbe484fa8682265ef48060 --- /dev/null +++ b/examples/kohya_finetune.ps1 @@ -0,0 +1,153 @@ +# variables related to the pretrained model +$pretrained_model_name_or_path = "D:\models\test\samdoesart2\model\last" +$v2 = 1 # set to 1 for true or 0 for false +$v_model = 0 # set to 1 for true or 0 for false + +# variables related to the training dataset and output directory +$train_dir = "D:\models\test\samdoesart2" +$image_folder = "D:\dataset\samdoesart2\raw" +$output_dir = "D:\models\test\samdoesart2\model_e2\" +$max_resolution = "512,512" + +# variables related to the training process +$learning_rate = 1e-6 +$lr_scheduler = "constant" # Default is constant +$lr_warmup = 0 # % of steps to warmup for 0 - 100. Default is 0. +$dataset_repeats = 40 +$train_batch_size = 8 +$epoch = 1 +$save_every_n_epochs = 1 +$mixed_precision = "bf16" +$save_precision = "fp16" # use fp16 for better compatibility with auto1111 and other repo +$seed = "494481440" +$num_cpu_threads_per_process = 6 +$train_text_encoder = 0 # set to 1 to train text encoder otherwise set to 0 + +# variables related to the resulting diffuser model. If input is ckpt or tensors then it is not applicable +$convert_to_safetensors = 1 # set to 1 to convert resulting diffuser to ckpt +$convert_to_ckpt = 1 # set to 1 to convert resulting diffuser to ckpt + +# other variables +$kohya_finetune_repo_path = "D:\kohya_ss" + +### You should not need to change things below + +# Set variables to useful values using ternary operator +$v_model = ($v_model -eq 0) ? $null : "--v_parameterization" +$v2 = ($v2 -eq 0) ? $null : "--v2" +$train_text_encoder = ($train_text_encoder -eq 0) ? $null : "--train_text_encoder" + +# stop script on error +$ErrorActionPreference = "Stop" + +# define a list of substrings to search for +$substrings_v2 = "stable-diffusion-2-1-base", "stable-diffusion-2-base" + +# check if $v2 and $v_model are empty and if $pretrained_model_name_or_path contains any of the substrings in the v2 list +if ($v2 -eq $null -and $v_model -eq $null -and ($substrings_v2 | Where-Object { $pretrained_model_name_or_path -match $_ }).Count -gt 0) { + Write-Host("SD v2 model detected. Setting --v2 parameter") + $v2 = "--v2" + $v_model = $null +} + +# define a list of substrings to search for v-objective +$substrings_v_model = "stable-diffusion-2-1", "stable-diffusion-2" + +# check if $v2 and $v_model are empty and if $pretrained_model_name_or_path contains any of the substrings in the v_model list +elseif ($v2 -eq $null -and $v_model -eq $null -and ($substrings_v_model | Where-Object { $pretrained_model_name_or_path -match $_ }).Count -gt 0) { + Write-Host("SD v2 v_model detected. Setting --v2 parameter and --v_parameterization") + $v2 = "--v2" + $v_model = "--v_parameterization" +} + +# activate venv +cd $kohya_finetune_repo_path +.\venv\Scripts\activate + +# create caption json file +if (!(Test-Path -Path $train_dir)) { + New-Item -Path $train_dir -ItemType "directory" +} + +python $kohya_finetune_repo_path\script\merge_captions_to_metadata.py ` + --caption_extention ".txt" $image_folder $train_dir"\meta_cap.json" + +# create images buckets +python $kohya_finetune_repo_path\script\prepare_buckets_latents.py ` + $image_folder ` + $train_dir"\meta_cap.json" ` + $train_dir"\meta_lat.json" ` + $pretrained_model_name_or_path ` + --batch_size 4 --max_resolution $max_resolution --mixed_precision $mixed_precision + +# Get number of valid images +$image_num = Get-ChildItem "$image_folder" -Recurse -File -Include *.npz | Measure-Object | % { $_.Count } + +$repeats = $image_num * $dataset_repeats +Write-Host("Repeats = $repeats") + +# calculate max_train_set +$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch) +Write-Host("max_train_set = $max_train_set") + +$lr_warmup_steps = [Math]::Round($lr_warmup * $max_train_set / 100) +Write-Host("lr_warmup_steps = $lr_warmup_steps") + +Write-Host("$v2 $v_model") + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process $kohya_finetune_repo_path\script\fine_tune.py ` + $v2 ` + $v_model ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --in_json $train_dir\meta_lat.json ` + --train_data_dir="$image_folder" ` + --output_dir=$output_dir ` + --train_batch_size=$train_batch_size ` + --dataset_repeats=$dataset_repeats ` + --learning_rate=$learning_rate ` + --lr_scheduler=$lr_scheduler ` + --lr_warmup_steps=$lr_warmup_steps ` + --max_train_steps=$max_train_set ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + --save_every_n_epochs=$save_every_n_epochs ` + --seed=$seed ` + $train_text_encoder ` + --save_precision=$save_precision + +# check if $output_dir\last is a directory... therefore it is a diffuser model +if (Test-Path "$output_dir\last" -PathType Container) { + if ($convert_to_ckpt) { + Write-Host("Converting diffuser model $output_dir\last to $output_dir\last.ckpt") + python "$kohya_finetune_repo_path\tools\convert_diffusers20_original_sd.py" ` + $output_dir\last ` + $output_dir\last.ckpt ` + --$save_precision + } + if ($convert_to_safetensors) { + Write-Host("Converting diffuser model $output_dir\last to $output_dir\last.safetensors") + python "$kohya_finetune_repo_path\tools\convert_diffusers20_original_sd.py" ` + $output_dir\last ` + $output_dir\last.safetensors ` + --$save_precision + } +} + +# define a list of substrings to search for inference file +$substrings_sd_model = ".ckpt", ".safetensors" +$matching_extension = foreach ($ext in $substrings_sd_model) { + Get-ChildItem $output_dir -File | Where-Object { $_.Extension -contains $ext } +} + +if ($matching_extension.Count -gt 0) { + # copy the file named "v2-inference.yaml" from the "v2_inference" folder to $output_dir as last.yaml + if ( $v2 -ne $null -and $v_model -ne $null) { + Write-Host("Saving v2-inference-v.yaml as $output_dir\last.yaml") + Copy-Item -Path "$kohya_finetune_repo_path\v2_inference\v2-inference-v.yaml" -Destination "$output_dir\last.yaml" + } + elseif ( $v2 -ne $null ) { + Write-Host("Saving v2-inference.yaml as $output_dir\last.yaml") + Copy-Item -Path "$kohya_finetune_repo_path\v2_inference\v2-inference.yaml" -Destination "$output_dir\last.yaml" + } +} \ No newline at end of file diff --git a/examples/kohya_new-v3.ps1 b/examples/kohya_new-v3.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..f73675b7ee02ccdc8fdef6fead9fbf778191b07d --- /dev/null +++ b/examples/kohya_new-v3.ps1 @@ -0,0 +1,90 @@ +# Sylvia Ritter. AKA: by silvery trait + +# variable values +$pretrained_model_name_or_path = "D:\models\v1-5-pruned-mse-vae.ckpt" +$train_dir = "D:\dreambooth\train_sylvia_ritter\raw_data" +$training_folder = "all-images-v3" + +$learning_rate = 5e-6 +$dataset_repeats = 40 +$train_batch_size = 6 +$epoch = 4 +$save_every_n_epochs=1 +$mixed_precision="bf16" +$num_cpu_threads_per_process=6 + +$max_resolution = "768,576" + +# You should not have to change values past this point + +# stop script on error +$ErrorActionPreference = "Stop" + +# activate venv +.\venv\Scripts\activate + +# create caption json file +python D:\kohya_ss\finetune\merge_captions_to_metadata.py ` +--caption_extention ".txt" $train_dir"\"$training_folder $train_dir"\meta_cap.json" + +# create images buckets +python D:\kohya_ss\finetune\prepare_buckets_latents.py ` + $train_dir"\"$training_folder ` + $train_dir"\meta_cap.json" ` + $train_dir"\meta_lat.json" ` + $pretrained_model_name_or_path ` + --batch_size 4 --max_resolution $max_resolution --mixed_precision fp16 + +# Get number of valid images +$image_num = Get-ChildItem "$train_dir\$training_folder" -Recurse -File -Include *.npz | Measure-Object | %{$_.Count} +$repeats = $image_num * $dataset_repeats + +# calculate max_train_set +$max_train_set = [Math]::Ceiling($repeats / $train_batch_size * $epoch) + + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --in_json $train_dir"\meta_lat.json" ` + --train_data_dir=$train_dir"\"$training_folder ` + --output_dir=$train_dir"\fine_tuned2" ` + --train_batch_size=$train_batch_size ` + --dataset_repeats=$dataset_repeats ` + --learning_rate=$learning_rate ` + --max_train_steps=$max_train_set ` + --use_8bit_adam --xformers ` + --mixed_precision=$mixed_precision ` + --save_every_n_epochs=$save_every_n_epochs ` + --train_text_encoder ` + --save_precision="fp16" + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py ` + --pretrained_model_name_or_path=$train_dir"\fine_tuned\last.ckpt" ` + --in_json $train_dir"\meta_lat.json" ` + --train_data_dir=$train_dir"\"$training_folder ` + --output_dir=$train_dir"\fine_tuned2" ` + --train_batch_size=$train_batch_size ` + --dataset_repeats=$([Math]::Ceiling($dataset_repeats / 2)) ` + --learning_rate=$learning_rate ` + --max_train_steps=$([Math]::Ceiling($max_train_set / 2)) ` + --use_8bit_adam --xformers ` + --mixed_precision=$mixed_precision ` + --save_every_n_epochs=$save_every_n_epochs ` + --save_precision="fp16" + +# Hypernetwork + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process D:\kohya_ss\finetune\fine_tune.py ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --in_json $train_dir"\meta_lat.json" ` + --train_data_dir=$train_dir"\"$training_folder ` + --output_dir=$train_dir"\fine_tuned" ` + --train_batch_size=$train_batch_size ` + --dataset_repeats=$dataset_repeats ` + --learning_rate=$learning_rate ` + --max_train_steps=$max_train_set ` + --use_8bit_adam --xformers ` + --mixed_precision=$mixed_precision ` + --save_every_n_epochs=$save_every_n_epochs ` + --save_precision="fp16" ` + --hypernetwork_module="hypernetwork_nai" \ No newline at end of file diff --git a/examples/kohya_train_db_fixed_with-reg_SDv2 512 base.ps1 b/examples/kohya_train_db_fixed_with-reg_SDv2 512 base.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..73f3274185b8b2230e74860eb408745f7e5085fa --- /dev/null +++ b/examples/kohya_train_db_fixed_with-reg_SDv2 512 base.ps1 @@ -0,0 +1,64 @@ +# This powershell script will create a model using the fine tuning dreambooth method. It will require landscape, +# portrait and square images. +# +# Adjust the script to your own needs + +# variable values +$pretrained_model_name_or_path = "D:\models\512-base-ema.ckpt" +$data_dir = "D:\models\dariusz_zawadzki\kohya_reg\data" +$reg_data_dir = "D:\models\dariusz_zawadzki\kohya_reg\reg" +$logging_dir = "D:\models\dariusz_zawadzki\logs" +$output_dir = "D:\models\dariusz_zawadzki\train_db_model_reg_v2" +$resolution = "512,512" +$lr_scheduler="polynomial" +$cache_latents = 1 # 1 = true, 0 = false + +$image_num = Get-ChildItem $data_dir -Recurse -File -Include *.png, *.jpg, *.webp | Measure-Object | %{$_.Count} + +Write-Output "image_num: $image_num" + +$dataset_repeats = 200 +$learning_rate = 2e-6 +$train_batch_size = 4 +$epoch = 1 +$save_every_n_epochs=1 +$mixed_precision="bf16" +$num_cpu_threads_per_process=6 + +# You should not have to change values past this point +if ($cache_latents -eq 1) { + $cache_latents_value="--cache_latents" +} +else { + $cache_latents_value="" +} + +$repeats = $image_num * $dataset_repeats +$mts = [Math]::Ceiling($repeats / $train_batch_size * $epoch) + +Write-Output "Repeats: $repeats" + +cd D:\kohya_ss +.\venv\Scripts\activate + +accelerate launch --num_cpu_threads_per_process $num_cpu_threads_per_process train_db.py ` + --v2 ` + --pretrained_model_name_or_path=$pretrained_model_name_or_path ` + --train_data_dir=$data_dir ` + --output_dir=$output_dir ` + --resolution=$resolution ` + --train_batch_size=$train_batch_size ` + --learning_rate=$learning_rate ` + --max_train_steps=$mts ` + --use_8bit_adam ` + --xformers ` + --mixed_precision=$mixed_precision ` + $cache_latents_value ` + --save_every_n_epochs=$save_every_n_epochs ` + --logging_dir=$logging_dir ` + --save_precision="fp16" ` + --reg_data_dir=$reg_data_dir ` + --seed=494481440 ` + --lr_scheduler=$lr_scheduler + +# Add the inference yaml file along with the model for proper loading. Need to have the same name as model... Most likely "last.yaml" in our case. diff --git a/examples/lucoris extract examples.txt b/examples/lucoris extract examples.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca37f3da2d8cab97a98a94c785ab351fd01acb52 --- /dev/null +++ b/examples/lucoris extract examples.txt @@ -0,0 +1,13 @@ +python tools\lycoris_locon_extract.py --mode quantile --safetensors --linear_ratio 0.9 --conv_ratio 0.9 --device cuda D:/models/v1-5-pruned.ckpt D:/models/cyberrealistic_v12.safetensors "D:/lora/sd1.5/cyberrealistic_v12.safetensors" + +python tools\lycoris_locon_extract.py --mode quantile --safetensors --linear_quantile 0.75 --conv_quantile 0.75 --device cuda D:/models/v1-5-pruned.ckpt "C:\Users\berna\Downloads\deliberate_v2.safetensors" "D:/lora/sd1.5/deliberate_v2.safetensors" + +python tools\lycoris_locon_extract.py --mode fixed --safetensors --linear_dim 512 --conv_dim 512 --device cuda D:/models/v1-5-pruned.ckpt D:/models/cyberrealistic_v12.safetensors "D:/lora/sd1.5/cyberrealistic_v12.safetensors" + +python tools\lycoris_locon_extract.py --use_sparse_bias --sparsity 0.98 --mode quantile --safetensors --linear_quantile 0.75 --conv_quantile 0.75 --device cuda D:/models/v1-5-pruned.ckpt "C:\Users\berna\Downloads\deliberate_v2.safetensors" "D:/lora/sd1.5/deliberate_v2.safetensors" + +python tools\lycoris_locon_extract.py --use_sparse_bias --sparsity 0.98 --mode quantile --safetensors --linear_quantile 0.75 --conv_quantile 0.75 --device cuda D:/models/v1-5-pruned.ckpt "D:/models/test\claire_v1.0ee2-000003.safetensors" "D:/lora/sd1.5/claire_v1.0ee2-000003.safetensors" + +python tools\lycoris_locon_extract.py --use_sparse_bias --sparsity 0.98 --mode quantile --safetensors --linear_quantile 0.5 --conv_quantile 0.5 --device cuda D:/models/v1-5-pruned.ckpt "D:/models/test\claire_v1.0ee2-000003.safetensors" "D:/lora/sd1.5/claire_v1.0ee2-0.5.safetensors" + +python tools\lycoris_locon_extract.py --use_sparse_bias --sparsity 0.98 --mode quantile --safetensors --linear_quantile 0.5 --conv_quantile 0.5 --device cuda D:/models/v1-5-pruned.ckpt "D:/models/test\claire_v1.0f.safetensors" "D:/lora/sd1.5/claire_v1.0f0.5.safetensors" \ No newline at end of file diff --git a/examples/pull kohya_ss sd-scripts updates in.md b/examples/pull kohya_ss sd-scripts updates in.md new file mode 100644 index 0000000000000000000000000000000000000000..024583f569d4e7f714b1551112057625013c3bf0 --- /dev/null +++ b/examples/pull kohya_ss sd-scripts updates in.md @@ -0,0 +1,32 @@ +## Updating a Local Branch with the Latest sd-scripts Changes + +To update your local branch with the most recent changes from kohya/sd-scripts, follow these steps: + +1. Add sd-scripts as an alternative remote by executing the following command: + + ``` + git remote add sd-scripts https://github.com/kohya-ss/sd-scripts.git + ``` + +2. When you wish to perform an update, execute the following commands: + + ``` + git checkout dev + git pull sd-scripts main + ``` + + Alternatively, if you want to obtain the latest code, even if it may be unstable: + + ``` + git checkout dev + git pull sd-scripts dev + ``` + +3. If you encounter a conflict with the Readme file, you can resolve it by taking the following steps: + + ``` + git add README.md + git merge --continue + ``` + + This may open a text editor for a commit message, but you can simply save and close it to proceed. Following these steps should resolve the conflict. If you encounter additional merge conflicts, consider them as valuable learning opportunities for personal growth. \ No newline at end of file diff --git a/examples/stable_cascade/test.toml b/examples/stable_cascade/test.toml new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/examples/word_frequency.ps1 b/examples/word_frequency.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..a541bc8b497759748114b85de58050d467a69214 --- /dev/null +++ b/examples/word_frequency.ps1 @@ -0,0 +1,15 @@ +$txt_files_folder = "D:\dataset\" +$txt_prefix_to_ignore = "asds" +$txt_postfix_ti_ignore = "asds" + +# Should not need to touch anything below + +# (Get-Content $txt_files_folder"\*.txt" ).Replace(",", "") -Split '\W' | Group-Object -NoElement | Sort-Object -Descending -Property Count + +$combined_txt = Get-Content $txt_files_folder"\*.txt" +$combined_txt = $combined_txt.Replace(",", "") +$combined_txt = $combined_txt.Replace("$txt_prefix_to_ignore", "") +$combined_txt = $combined_txt.Replace("$txt_postfix_ti_ignore", "") -Split '\W' | Group-Object -NoElement | Sort-Object -Descending -Property Count + +Write-Output "Sorted by count" +Write-Output $combined_txt.Name \ No newline at end of file diff --git a/gui.bat b/gui.bat new file mode 100644 index 0000000000000000000000000000000000000000..164423af2efecdfedb6850f2c677c3a0c8eaeb55 --- /dev/null +++ b/gui.bat @@ -0,0 +1,26 @@ +@echo off + +set PYTHON_VER=3.10.9 + +:: Deactivate the virtual environment +call .\venv\Scripts\deactivate.bat + +:: Activate the virtual environment +call .\venv\Scripts\activate.bat +set PATH=%PATH%;%~dp0venv\Lib\site-packages\torch\lib + +:: Validate requirements +python.exe .\setup\validate_requirements.py +if %errorlevel% neq 0 exit /b %errorlevel% + +:: If the exit code is 0, run the kohya_gui.py script with the command-line arguments +if %errorlevel% equ 0 ( + REM Check if the batch was started via double-click + IF /i "%comspec% /c %~0 " equ "%cmdcmdline:"=%" ( + REM echo This script was started by double clicking. + cmd /k python.exe kohya_gui.py %* + ) ELSE ( + REM echo This script was started from a command prompt. + python.exe kohya_gui.py %* + ) +) diff --git a/gui.ps1 b/gui.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..f67ea4587b5a41290f9a181a9267fd1d0a0c86b0 --- /dev/null +++ b/gui.ps1 @@ -0,0 +1,34 @@ +# Check if a virtual environment is active and deactivate it if necessary +if ($env:VIRTUAL_ENV) { + # Write-Host "Deactivating the virtual environment to test for modules installed locally..." + & deactivate +} + +# Activate the virtual environment +# Write-Host "Activating the virtual environment..." +& .\venv\Scripts\activate +$env:PATH += ";$($MyInvocation.MyCommand.Path)\venv\Lib\site-packages\torch\lib" + +# Debug info about system +# python.exe .\setup\debug_info.py + +# Validate the requirements and store the exit code +python.exe .\setup\validate_requirements.py + +# Check the exit code and stop execution if it is not 0 +if ($LASTEXITCODE -ne 0) { + Write-Host "Failed to validate requirements. Exiting script..." + exit $LASTEXITCODE +} + +# If the exit code is 0, read arguments from gui_parameters.txt (if it exists) +# and run the kohya_gui.py script with the command-line arguments +if ($LASTEXITCODE -eq 0) { + $argsFromFile = @() + if (Test-Path .\gui_parameters.txt) { + $argsFromFile = Get-Content .\gui_parameters.txt -Encoding UTF8 | Where-Object { $_ -notmatch "^#" } | Foreach-Object { $_ -split " " } + } + $args_combo = $argsFromFile + $args + # Write-Host "The arguments passed to this script were: $args_combo" + python.exe kohya_gui.py $args_combo +} diff --git a/gui.sh b/gui.sh new file mode 100644 index 0000000000000000000000000000000000000000..17c5207a48664a337d2532b1688162fe5b2f2540 --- /dev/null +++ b/gui.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +# Checks to see if variable is set and non-empty. +# This is defined first, so we can use the function for some default variable values +env_var_exists() { + if [[ -n "${!1}" ]]; then + return 0 + else + return 1 + fi +} + +# Define the directory path for WSL2 +lib_path="/usr/lib/wsl/lib/" + +# Check if the directory exists +if [ -d "$lib_path" ]; then + # Check if LD_LIBRARY_PATH is already set + if [ -z "${LD_LIBRARY_PATH}" ]; then + # LD_LIBRARY_PATH is not set, set it to the lib_path + export LD_LIBRARY_PATH="$lib_path" + # echo "LD_LIBRARY_PATH set to: $LD_LIBRARY_PATH" + fi +fi + +# Need RUNPOD to have a default value before first access +RUNPOD=false +if env_var_exists RUNPOD_POD_ID || env_var_exists RUNPOD_API_KEY; then + RUNPOD=true +fi + +# If it is run with the sudo command, get the complete LD_LIBRARY_PATH environment variable of the system and assign it to the current environment, +# because it will be used later. +if [ -n "$SUDO_USER" ] || [ -n "$SUDO_COMMAND" ]; then + echo "The sudo command resets the non-essential environment variables, we keep the LD_LIBRARY_PATH variable." + export LD_LIBRARY_PATH=$(sudo -i printenv LD_LIBRARY_PATH) +fi + +# This gets the directory the script is run from so pathing can work relative to the script where needed. +SCRIPT_DIR=$(cd -- "$(dirname -- "$0")" && pwd) + +# Step into GUI local directory +cd "$SCRIPT_DIR" || exit 1 + +if [ -d "$SCRIPT_DIR/venv" ]; then + source "$SCRIPT_DIR/venv/bin/activate" || exit 1 +else + echo "venv folder does not exist. Not activating..." +fi + +# Check if LD_LIBRARY_PATH environment variable exists +if [[ -z "${LD_LIBRARY_PATH}" ]]; then + # Set the ANSI escape sequence for yellow text + YELLOW='\033[0;33m' + # Set the ANSI escape sequence to reset text color + RESET='\033[0m' + + echo -e "${YELLOW}Warning: LD_LIBRARY_PATH environment variable is not set.${RESET}" + echo -e "${YELLOW}Certain functionalities may not work correctly.${RESET}" + echo -e "${YELLOW}Please ensure that the required libraries are properly configured.${RESET}" + echo -e " " + echo -e "${YELLOW}If you use WSL2 you may want to: export LD_LIBRARY_PATH=/usr/lib/wsl/lib/${RESET}" + echo -e " " +fi + +# Determine the requirements file based on the system +if [[ "$OSTYPE" == "darwin"* ]]; then + if [[ "$(uname -m)" == "arm64" ]]; then + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_macos_arm64.txt" + else + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_macos_amd64.txt" + fi +else + if [ "$RUNPOD" = false ]; then + if [[ "$@" == *"--use-ipex"* ]]; then + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux_ipex.txt" + elif [[ "$@" == *"--use-rocm"* ]] || [ -x "$(command -v rocminfo)" ] || [ -f "/opt/rocm/bin/rocminfo" ]; then + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux_rocm.txt" + else + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_linux.txt" + fi + else + REQUIREMENTS_FILE="$SCRIPT_DIR/requirements_runpod.txt" + fi +fi + +#Set OneAPI if it's not set by the user +if [[ "$@" == *"--use-ipex"* ]] +then + if [ -d "$SCRIPT_DIR/venv" ] && [[ -z "${DISABLE_VENV_LIBS}" ]]; then + export LD_LIBRARY_PATH=$(realpath "$SCRIPT_DIR/venv")/lib/:$LD_LIBRARY_PATH + fi + export NEOReadDebugKeys=1 + export ClDeviceGlobalMemSizeAvailablePercent=100 + if [[ ! -z "${IPEXRUN}" ]] && [ ${IPEXRUN}="True" ] && [ -x "$(command -v ipexrun)" ] + then + if [[ -z "$STARTUP_CMD" ]] + then + STARTUP_CMD=ipexrun + fi + if [[ -z "$STARTUP_CMD_ARGS" ]] + then + STARTUP_CMD_ARGS="--multi-task-manager taskset --memory-allocator tcmalloc" + fi + fi +fi + +#Set STARTUP_CMD as normal python if not specified +if [[ -z "$STARTUP_CMD" ]] +then + STARTUP_CMD=python +fi + +# Validate the requirements and run the script if successful +if python "$SCRIPT_DIR/setup/validate_requirements.py" -r "$REQUIREMENTS_FILE"; then + "${STARTUP_CMD}" $STARTUP_CMD_ARGS "$SCRIPT_DIR/kohya_gui.py" "$@" +else + echo "Validation failed. Exiting..." + exit 1 +fi diff --git a/kohya_gui.py b/kohya_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..c241e8c6370491eee480290f141532dc6c3bcf03 --- /dev/null +++ b/kohya_gui.py @@ -0,0 +1,185 @@ +import gradio as gr +import os +import argparse +from kohya_gui.class_gui_config import KohyaSSGUIConfig +from kohya_gui.dreambooth_gui import dreambooth_tab +from kohya_gui.finetune_gui import finetune_tab +from kohya_gui.textual_inversion_gui import ti_tab +from kohya_gui.utilities import utilities_tab +from kohya_gui.lora_gui import lora_tab +from kohya_gui.class_lora_tab import LoRATools + +from kohya_gui.custom_logging import setup_logging +from kohya_gui.localization_ext import add_javascript + + +def UI(**kwargs): + add_javascript(kwargs.get("language")) + css = "" + + headless = kwargs.get("headless", False) + log.info(f"headless: {headless}") + + if os.path.exists("./assets/style.css"): + with open(os.path.join("./assets/style.css"), "r", encoding="utf8") as file: + log.debug("Load CSS...") + css += file.read() + "\n" + + if os.path.exists("./.release"): + with open(os.path.join("./.release"), "r", encoding="utf8") as file: + release = file.read() + + if os.path.exists("./README.md"): + with open(os.path.join("./README.md"), "r", encoding="utf8") as file: + README = file.read() + + interface = gr.Blocks( + css=css, title=f"Kohya_ss GUI {release}", theme=gr.themes.Default() + ) + + config = KohyaSSGUIConfig(config_file_path=kwargs.get("config")) + + if config.is_config_loaded(): + log.info(f"Loaded default GUI values from '{kwargs.get('config')}'...") + + use_shell_flag = True + # if os.name == "posix": + # use_shell_flag = True + + use_shell_flag = config.get("settings.use_shell", use_shell_flag) + + if kwargs.get("do_not_use_shell", False): + use_shell_flag = False + + if use_shell_flag: + log.info("Using shell=True when running external commands...") + + with interface: + with gr.Tab("Dreambooth"): + ( + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ) = dreambooth_tab( + headless=headless, config=config, use_shell_flag=use_shell_flag + ) + with gr.Tab("LoRA"): + lora_tab(headless=headless, config=config, use_shell_flag=use_shell_flag) + with gr.Tab("Textual Inversion"): + ti_tab(headless=headless, config=config, use_shell_flag=use_shell_flag) + with gr.Tab("Finetuning"): + finetune_tab( + headless=headless, config=config, use_shell_flag=use_shell_flag + ) + with gr.Tab("Utilities"): + utilities_tab( + train_data_dir_input=train_data_dir_input, + reg_data_dir_input=reg_data_dir_input, + output_dir_input=output_dir_input, + logging_dir_input=logging_dir_input, + headless=headless, + config=config, + ) + with gr.Tab("LoRA"): + _ = LoRATools(headless=headless) + with gr.Tab("About"): + gr.Markdown(f"kohya_ss GUI release {release}") + with gr.Tab("README"): + gr.Markdown(README) + + htmlStr = f""" + + +
{release}
+ + + """ + gr.HTML(htmlStr) + # Show the interface + launch_kwargs = {} + username = kwargs.get("username") + password = kwargs.get("password") + server_port = kwargs.get("server_port", 0) + inbrowser = kwargs.get("inbrowser", False) + share = kwargs.get("share", False) + do_not_share = kwargs.get("do_not_share", False) + server_name = kwargs.get("listen") + root_path = kwargs.get("root_path", None) + + launch_kwargs["server_name"] = server_name + if username and password: + launch_kwargs["auth"] = (username, password) + if server_port > 0: + launch_kwargs["server_port"] = server_port + if inbrowser: + launch_kwargs["inbrowser"] = inbrowser + if do_not_share: + launch_kwargs["share"] = False + else: + if share: + launch_kwargs["share"] = share + if root_path: + launch_kwargs["root_path"] = root_path + launch_kwargs["debug"] = True + interface.launch(**launch_kwargs) + + +if __name__ == "__main__": + # torch.cuda.set_per_process_memory_fraction(0.48) + parser = argparse.ArgumentParser() + parser.add_argument( + "--config", + type=str, + default="./config.toml", + help="Path to the toml config file for interface defaults", + ) + parser.add_argument("--debug", action="store_true", help="Debug on") + parser.add_argument( + "--listen", + type=str, + default="127.0.0.1", + help="IP to listen on for connections to Gradio", + ) + parser.add_argument( + "--username", type=str, default="", help="Username for authentication" + ) + parser.add_argument( + "--password", type=str, default="", help="Password for authentication" + ) + parser.add_argument( + "--server_port", + type=int, + default=0, + help="Port to run the server listener on", + ) + parser.add_argument("--inbrowser", action="store_true", help="Open in browser") + parser.add_argument("--share", action="store_true", help="Share the gradio UI") + parser.add_argument( + "--headless", action="store_true", help="Is the server headless" + ) + parser.add_argument( + "--language", type=str, default=None, help="Set custom language" + ) + + parser.add_argument("--use-ipex", action="store_true", help="Use IPEX environment") + parser.add_argument("--use-rocm", action="store_true", help="Use ROCm environment") + + parser.add_argument( + "--do_not_use_shell", action="store_true", help="Enforce not to use shell=True when running external commands" + ) + + parser.add_argument( + "--do_not_share", action="store_true", help="Do not share the gradio UI" + ) + + parser.add_argument( + "--root_path", type=str, default=None, help="`root_path` for Gradio to enable reverse proxy support. e.g. /kohya_ss" + ) + + args = parser.parse_args() + + # Set up logging + log = setup_logging(debug=args.debug) + + UI(**vars(args)) diff --git a/kohya_gui/__init__.py b/kohya_gui/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cd476a669dca5aa898398fe4a52f0394d93a5efd --- /dev/null +++ b/kohya_gui/__init__.py @@ -0,0 +1 @@ +"""empty""" diff --git a/kohya_gui/basic_caption_gui.py b/kohya_gui/basic_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..ae693c57ad78128a1c6f6515048227031ddcf432 --- /dev/null +++ b/kohya_gui/basic_caption_gui.py @@ -0,0 +1,273 @@ +import gradio as gr +import subprocess +from .common_gui import ( + get_folder_path, + add_pre_postfix, + find_replace, + scriptdir, + list_dirs, + setup_environment, +) +import os +import sys + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +PYTHON = sys.executable + + +def caption_images( + caption_text: str, + images_dir: str, + overwrite: bool, + caption_ext: str, + prefix: str, + postfix: str, + find_text: str, + replace_text: str, +): + """ + Captions images in a given directory with a given caption text. + + Args: + caption_text (str): The text to be used as the caption. + images_dir (str): The directory containing the images to be captioned. + overwrite (bool): Whether to overwrite existing captions. + caption_ext (str): The file extension for the caption files. + prefix (str): Text to be added before the caption text. + postfix (str): Text to be added after the caption text. + find_text (str): Text to be replaced in the caption files. + replace_text (str): Text to replace the found text in the caption files. + + Returns: + None + """ + # Check if images_dir and caption_ext are provided + missing_parameters = [] + if not images_dir: + missing_parameters.append("image directory") + if not caption_ext: + missing_parameters.append("caption file extension") + + if missing_parameters: + log.info( + "The following parameter(s) are missing: {}. " + "Please provide these to proceed with captioning the images.".format(", ".join(missing_parameters)) + ) + return + + # Log the captioning process + if caption_text: + log.info(f"Captioning files in {images_dir} with {caption_text}...") + + # Build the command to run caption.py + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/tools/caption.py", + "--caption_text", + caption_text, + ] + + # Add optional flags to the command + if overwrite: + run_cmd.append("--overwrite") + if caption_ext: + run_cmd.append("--caption_file_ext") + run_cmd.append(caption_ext) + + run_cmd.append(rf"{images_dir}") + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Set the environment variable for the Python path + env = setup_environment() + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env, shell=False) + + # Check if overwrite option is enabled + if overwrite: + # Add prefix and postfix to caption files or find and replace text in caption files + if prefix or postfix or find_text: + # Add prefix and/or postfix to caption files + add_pre_postfix( + folder=images_dir, + caption_file_ext=caption_ext, + prefix=prefix, + postfix=postfix, + ) + # Replace specified text in caption files if find and replace text is provided + if find_text and replace_text: + find_replace( + folder_path=images_dir, + caption_file_ext=caption_ext, + search_text=find_text, + replace_text=replace_text, + ) + else: + # Show a message if modification is not possible without overwrite option enabled + if prefix or postfix: + log.info( + 'Could not modify caption files with requested change because the "Overwrite existing captions in folder" option is not selected.' + ) + + # Log the end of the captioning process + log.info("Captioning done.") + + +# Gradio UI +def gradio_basic_caption_gui_tab(headless=False, default_images_dir=None): + """ + Creates a Gradio tab for basic image captioning. + + Args: + headless (bool, optional): If True, the GUI will be headless (no visible elements). Defaults to False. + default_images_dir (str, optional): The default directory to use for image selection. If not provided, + it defaults to the 'data' directory in the script directory. + + Returns: + None + """ + from .common_gui import create_refresh_button + + # Set default images directory if not provided + default_images_dir = ( + default_images_dir + if default_images_dir is not None + else os.path.join(scriptdir, "data") + ) + current_images_dir = default_images_dir + + # Function to list directories + def list_images_dirs(path): + """ + Lists directories within a specified path and updates the current image directory. + + Parameters: + path (str): The directory path to list image directories from. + + Returns: + list: A list of directories within the specified path. + """ + # Allows list_images_dirs to modify current_images_dir outside of this function + nonlocal current_images_dir + current_images_dir = path + return list(list_dirs(path)) + + # Gradio tab for basic captioning + with gr.Tab("Basic Captioning"): + # Markdown description + gr.Markdown( + "This utility allows you to create simple caption files for each image in a folder." + ) + # Group and row for image folder selection + with gr.Group(), gr.Row(): + # Dropdown for image folder + images_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[""] + list_images_dirs(default_images_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + # Refresh button for image folder + create_refresh_button( + images_dir, + lambda: None, + lambda: {"choices": list_images_dirs(current_images_dir)}, + "open_folder_small", + ) + # Button to open folder + folder_button = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + # Event handler for button click + folder_button.click( + get_folder_path, + outputs=images_dir, + show_progress=False, + ) + # Textbox for caption file extension + caption_ext = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + # Checkbox to overwrite existing captions + overwrite = gr.Checkbox( + label="Overwrite existing captions in folder", + interactive=True, + value=False, + ) + # Row for caption prefix and text + with gr.Row(): + # Textbox for caption prefix + prefix = gr.Textbox( + label="Prefix to add to caption", + placeholder="(Optional)", + interactive=True, + ) + # Textbox for caption text + caption_text = gr.Textbox( + label="Caption text", + placeholder='e.g., "by some artist". Leave empty if you only want to add a prefix or postfix.', + interactive=True, + lines=2, + ) + # Textbox for caption postfix + postfix = gr.Textbox( + label="Postfix to add to caption", + placeholder="(Optional)", + interactive=True, + ) + # Group and row for find and replace text + with gr.Group(), gr.Row(): + # Textbox for find text + find_text = gr.Textbox( + label="Find text", + placeholder='e.g., "by some artist". Leave empty if you only want to add a prefix or postfix.', + interactive=True, + lines=2, + ) + # Textbox for replace text + replace_text = gr.Textbox( + label="Replacement text", + placeholder='e.g., "by some artist". Leave empty if you want to replace with nothing.', + interactive=True, + lines=2, + ) + # Button to caption images + caption_button = gr.Button("Caption images") + # Event handler for button click + caption_button.click( + caption_images, + inputs=[ + caption_text, + images_dir, + overwrite, + caption_ext, + prefix, + postfix, + find_text, + replace_text, + ], + show_progress=False, + ) + + # Event handler for dynamic update of dropdown choices + images_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_images_dirs(path)), + inputs=images_dir, + outputs=images_dir, + show_progress=False, + ) diff --git a/kohya_gui/blip2_caption_gui.py b/kohya_gui/blip2_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..484d9f882525e92e588a55a60ae5d75814834081 --- /dev/null +++ b/kohya_gui/blip2_caption_gui.py @@ -0,0 +1,357 @@ +from PIL import Image +from transformers import Blip2Processor, Blip2ForConditionalGeneration +import torch +import gradio as gr +import os + +from .common_gui import get_folder_path, scriptdir, list_dirs +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +def load_model(): + # Set the device to GPU if available, otherwise use CPU + device = "cuda" if torch.cuda.is_available() else "cpu" + + # Initialize the BLIP2 processor + processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b") + + # Initialize the BLIP2 model + model = Blip2ForConditionalGeneration.from_pretrained( + "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16 + ) + + # Move the model to the specified device + model.to(device) + + return processor, model, device + + +def get_images_in_directory(directory_path): + """ + Returns a list of image file paths found in the provided directory path. + + Parameters: + - directory_path: A string representing the path to the directory to search for images. + + Returns: + - A list of strings, where each string is the full path to an image file found in the specified directory. + """ + import os + + # List of common image file extensions to look for + image_extensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif"] + + # Generate a list of image file paths in the directory + image_files = [ + # constructs the full path to the file + os.path.join(directory_path, file) + # lists all files and directories in the given path + for file in os.listdir(directory_path) + # gets the file extension in lowercase + if os.path.splitext(file)[1].lower() in image_extensions + ] + + # Return the list of image file paths + return image_files + + +def generate_caption( + file_list, + processor, + model, + device, + caption_file_ext=".txt", + num_beams=5, + repetition_penalty=1.5, + length_penalty=1.2, + max_new_tokens=40, + min_new_tokens=20, + do_sample=True, + temperature=1.0, + top_p=0.0, +): + """ + Fetches and processes each image in file_list, generates captions based on the image, and writes the generated captions to a file. + + Parameters: + - file_list: A list of file paths pointing to the images to be captioned. + - processor: The preprocessor for the BLIP2 model. + - model: The BLIP2 model to be used for generating captions. + - device: The device on which the computation is performed. + - extension: The extension for the output text files. + - num_beams: Number of beams for beam search. Default: 5. + - repetition_penalty: Penalty for repeating tokens. Default: 1.5. + - length_penalty: Penalty for sentence length. Default: 1.2. + - max_new_tokens: Maximum number of new tokens to generate. Default: 40. + - min_new_tokens: Minimum number of new tokens to generate. Default: 20. + """ + for file_path in file_list: + image = Image.open(file_path) + + inputs = processor(images=image, return_tensors="pt").to(device, torch.float16) + + if top_p == 0.0: + generated_ids = model.generate( + **inputs, + num_beams=num_beams, + repetition_penalty=repetition_penalty, + length_penalty=length_penalty, + max_new_tokens=max_new_tokens, + min_new_tokens=min_new_tokens, + ) + else: + generated_ids = model.generate( + **inputs, + do_sample=do_sample, + top_p=top_p, + max_new_tokens=max_new_tokens, + min_new_tokens=min_new_tokens, + temperature=temperature, + ) + + generated_text = processor.batch_decode( + generated_ids, skip_special_tokens=True + )[0].strip() + + # Construct the output file path by replacing the original file extension with the specified extension + output_file_path = os.path.splitext(file_path)[0] + caption_file_ext + + # Write the generated text to the output file + with open(output_file_path, "w", encoding="utf-8") as output_file: + output_file.write(generated_text) + + # Log the image file path with a message about the fact that the caption was generated + log.info(f"{file_path} caption was generated") + + +def caption_images_beam_search( + directory_path, + num_beams, + repetition_penalty, + length_penalty, + min_new_tokens, + max_new_tokens, + caption_file_ext, +): + """ + Captions all images in the specified directory using the provided prompt. + + Parameters: + - directory_path: A string representing the path to the directory containing the images to be captioned. + """ + log.info("BLIP2 captionning beam...") + + if not os.path.isdir(directory_path): + log.error(f"Directory {directory_path} does not exist.") + return + + processor, model, device = load_model() + image_files = get_images_in_directory(directory_path) + generate_caption( + file_list=image_files, + processor=processor, + model=model, + device=device, + num_beams=int(num_beams), + repetition_penalty=float(repetition_penalty), + length_penalty=length_penalty, + min_new_tokens=int(min_new_tokens), + max_new_tokens=int(max_new_tokens), + caption_file_ext=caption_file_ext, + ) + + +def caption_images_nucleus( + directory_path, + do_sample, + temperature, + top_p, + min_new_tokens, + max_new_tokens, + caption_file_ext, +): + """ + Captions all images in the specified directory using the provided prompt. + + Parameters: + - directory_path: A string representing the path to the directory containing the images to be captioned. + """ + log.info("BLIP2 captionning nucleus...") + + if not os.path.isdir(directory_path): + log.error(f"Directory {directory_path} does not exist.") + return + + processor, model, device = load_model() + image_files = get_images_in_directory(directory_path) + generate_caption( + file_list=image_files, + processor=processor, + model=model, + device=device, + do_sample=do_sample, + temperature=temperature, + top_p=top_p, + min_new_tokens=int(min_new_tokens), + max_new_tokens=int(max_new_tokens), + caption_file_ext=caption_file_ext, + ) + + +def gradio_blip2_caption_gui_tab(headless=False, directory_path=None): + from .common_gui import create_refresh_button + + directory_path = ( + directory_path + if directory_path is not None + else os.path.join(scriptdir, "data") + ) + current_train_dir = directory_path + + def list_train_dirs(path): + nonlocal current_train_dir + current_train_dir = path + return list(list_dirs(path)) + + with gr.Tab("BLIP2 Captioning"): + gr.Markdown( + "This utility uses BLIP2 to caption files for each image in a folder." + ) + + with gr.Group(), gr.Row(): + directory_path_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[""] + list_train_dirs(directory_path), + value="", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + directory_path_dir, + lambda: None, + lambda: {"choices": list_train_dirs(current_train_dir)}, + "open_folder_small", + ) + button_directory_path_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_directory_path_dir_input.click( + get_folder_path, + outputs=directory_path_dir, + show_progress=False, + ) + with gr.Group(), gr.Row(): + min_new_tokens = gr.Number( + value=20, + label="Min new tokens", + interactive=True, + step=1, + minimum=5, + maximum=300, + ) + max_new_tokens = gr.Number( + value=40, + label="Max new tokens", + interactive=True, + step=1, + minimum=5, + maximum=300, + ) + caption_file_ext = gr.Textbox( + label="Caption file extension", + placeholder="Extension for caption file (e.g., .caption, .txt)", + value=".txt", + interactive=True, + ) + + with gr.Row(): + with gr.Tab("Beam search"): + with gr.Row(): + num_beams = gr.Slider( + minimum=1, + maximum=16, + value=16, + step=1, + interactive=True, + label="Number of beams", + ) + + len_penalty = gr.Slider( + minimum=-1.0, + maximum=2.0, + value=1.0, + step=0.2, + interactive=True, + label="Length Penalty", + info="increase for longer sequence", + ) + + rep_penalty = gr.Slider( + minimum=1.0, + maximum=5.0, + value=1.5, + step=0.5, + interactive=True, + label="Repeat Penalty", + info="larger value prevents repetition", + ) + + caption_button_beam = gr.Button( + value="Caption images", interactive=True, variant="primary" + ) + caption_button_beam.click( + caption_images_beam_search, + inputs=[ + directory_path_dir, + num_beams, + rep_penalty, + len_penalty, + min_new_tokens, + max_new_tokens, + caption_file_ext, + ], + ) + with gr.Tab("Nucleus sampling"): + with gr.Row(): + do_sample = gr.Checkbox(label="Sample", value=True) + + temperature = gr.Slider( + minimum=0.5, + maximum=1.0, + value=1.0, + step=0.1, + interactive=True, + label="Temperature", + info="used with nucleus sampling", + ) + + top_p = gr.Slider( + minimum=0, + maximum=1, + value=0.9, + step=0.1, + interactive=True, + label="Top_p", + ) + + caption_button_nucleus = gr.Button( + value="Caption images", interactive=True, variant="primary" + ) + caption_button_nucleus.click( + caption_images_nucleus, + inputs=[ + directory_path_dir, + do_sample, + temperature, + top_p, + min_new_tokens, + max_new_tokens, + caption_file_ext, + ], + ) diff --git a/kohya_gui/blip_caption_gui.py b/kohya_gui/blip_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..cbfdb6b234d19e80d3e6d956950ce7d01d7dbe2e --- /dev/null +++ b/kohya_gui/blip_caption_gui.py @@ -0,0 +1,215 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import get_folder_path, add_pre_postfix, scriptdir, list_dirs, setup_environment +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +PYTHON = sys.executable + + +def caption_images( + train_data_dir: str, + caption_file_ext: str, + batch_size: int, + num_beams: int, + top_p: float, + max_length: int, + min_length: int, + beam_search: bool, + prefix: str = "", + postfix: str = "", +) -> None: + """ + Automatically generates captions for images in the specified directory using the BLIP model. + + This function prepares and executes a command-line script to process images in batches, applying advanced + NLP techniques for caption generation. It supports customization of the captioning process through various + parameters like batch size, beam search, and more. Optionally, prefixes and postfixes can be added to captions. + + + Args: + train_data_dir (str): The directory containing the images to be captioned. + caption_file_ext (str): The extension for the caption files. + batch_size (int): The batch size for the captioning process. + num_beams (int): The number of beams to use in the captioning process. + top_p (float): The top p value to use in the captioning process. + max_length (int): The maximum length of the captions. + min_length (int): The minimum length of the captions. + beam_search (bool): Whether to use beam search in the captioning process. + prefix (str): The prefix to add to the captions. + postfix (str): The postfix to add to the captions. + """ + # Check if the image folder is provided + if not train_data_dir: + log.info("Image folder is missing...") + return + + # Check if the caption file extension is provided + if not caption_file_ext: + log.info("Please provide an extension for the caption files.") + return + + log.info(f"Captioning files in {train_data_dir}...") + + # Construct the command to run make_captions.py + run_cmd = [rf"{PYTHON}", rf"{scriptdir}/sd-scripts/finetune/make_captions.py"] + + # Add required arguments + run_cmd.append("--batch_size") + run_cmd.append(str(batch_size)) + run_cmd.append("--num_beams") + run_cmd.append(str(num_beams)) + run_cmd.append("--top_p") + run_cmd.append(str(top_p)) + run_cmd.append("--max_length") + run_cmd.append(str(max_length)) + run_cmd.append("--min_length") + run_cmd.append(str(min_length)) + + # Add optional flags to the command + if beam_search: + run_cmd.append("--beam_search") + if caption_file_ext: + run_cmd.append("--caption_extension") + run_cmd.append(caption_file_ext) + + # Add the directory containing the training data + run_cmd.append(rf"{train_data_dir}") + + # Add URL for caption model weights + run_cmd.append("--caption_weights") + run_cmd.append( + rf"https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth" + ) + + # Set up the environment + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env, shell=False, cwd=rf"{scriptdir}/sd-scripts") + + # Add prefix and postfix + add_pre_postfix( + folder=train_data_dir, + caption_file_ext=caption_file_ext, + prefix=prefix, + postfix=postfix, + ) + + log.info("...captioning done") + + +### +# Gradio UI +### + + +def gradio_blip_caption_gui_tab(headless=False, default_train_dir=None): + from .common_gui import create_refresh_button + + default_train_dir = ( + default_train_dir + if default_train_dir is not None + else os.path.join(scriptdir, "data") + ) + current_train_dir = default_train_dir + + def list_train_dirs(path): + nonlocal current_train_dir + current_train_dir = path + return list(list_dirs(path)) + + with gr.Tab("BLIP Captioning"): + gr.Markdown( + "This utility uses BLIP to caption files for each image in a folder." + ) + with gr.Group(), gr.Row(): + train_data_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[""] + list_train_dirs(default_train_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + train_data_dir, + lambda: None, + lambda: {"choices": list_train_dirs(current_train_dir)}, + "open_folder_small", + ) + button_train_data_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_train_data_dir_input.click( + get_folder_path, + outputs=train_data_dir, + show_progress=False, + ) + with gr.Row(): + caption_file_ext = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + + prefix = gr.Textbox( + label="Prefix to add to BLIP caption", + placeholder="(Optional)", + interactive=True, + ) + + postfix = gr.Textbox( + label="Postfix to add to BLIP caption", + placeholder="(Optional)", + interactive=True, + ) + + batch_size = gr.Number(value=1, label="Batch size", interactive=True) + + with gr.Row(): + beam_search = gr.Checkbox( + label="Use beam search", interactive=True, value=True + ) + num_beams = gr.Number(value=1, label="Number of beams", interactive=True) + top_p = gr.Number(value=0.9, label="Top p", interactive=True) + max_length = gr.Number(value=75, label="Max length", interactive=True) + min_length = gr.Number(value=5, label="Min length", interactive=True) + + caption_button = gr.Button("Caption images") + + caption_button.click( + caption_images, + inputs=[ + train_data_dir, + caption_file_ext, + batch_size, + num_beams, + top_p, + max_length, + min_length, + beam_search, + prefix, + postfix, + ], + show_progress=False, + ) + + train_data_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_train_dirs(path)), + inputs=train_data_dir, + outputs=train_data_dir, + show_progress=False, + ) diff --git a/kohya_gui/class_accelerate_launch.py b/kohya_gui/class_accelerate_launch.py new file mode 100644 index 0000000000000000000000000000000000000000..257dc6d15da71b1ce5f8da74f399509b0e330d36 --- /dev/null +++ b/kohya_gui/class_accelerate_launch.py @@ -0,0 +1,175 @@ +import gradio as gr +import os +import shlex + +from .class_gui_config import KohyaSSGUIConfig + + +class AccelerateLaunch: + def __init__( + self, + config: KohyaSSGUIConfig = {}, + ) -> None: + self.config = config + + with gr.Accordion("Resource Selection", open=True): + with gr.Row(): + self.mixed_precision = gr.Dropdown( + label="Mixed precision", + choices=["no", "fp16", "bf16", "fp8"], + value=self.config.get("accelerate_launch.mixed_precision", "fp16"), + info="Whether or not to use mixed precision training.", + ) + self.num_processes = gr.Number( + label="Number of processes", + value=self.config.get("accelerate_launch.num_processes", 1), + # precision=0, + step=1, + minimum=1, + info="The total number of processes to be launched in parallel.", + ) + self.num_machines = gr.Number( + label="Number of machines", + value=self.config.get("accelerate_launch.num_machines", 1), + # precision=0, + step=1, + minimum=1, + info="The total number of machines used in this training.", + ) + self.num_cpu_threads_per_process = gr.Slider( + minimum=1, + maximum=os.cpu_count(), + step=1, + label="Number of CPU threads per core", + value=self.config.get( + "accelerate_launch.num_cpu_threads_per_process", 2 + ), + info="The number of CPU threads per process.", + ) + with gr.Row(): + self.dynamo_backend = gr.Dropdown( + label="Dynamo backend", + choices=[ + "no", + "eager", + "aot_eager", + "inductor", + "aot_ts_nvfuser", + "nvprims_nvfuser", + "cudagraphs", + "ofi", + "fx2trt", + "onnxrt", + "tensorrt", + "ipex", + "tvm", + ], + value=self.config.get("accelerate_launch.dynamo_backend", "no"), + info="The backend to use for the dynamo JIT compiler.", + ) + self.dynamo_mode = gr.Dropdown( + label="Dynamo mode", + choices=[ + "default", + "reduce-overhead", + "max-autotune", + ], + value=self.config.get("accelerate_launch.dynamo_mode", "default"), + info="Choose a mode to optimize your training with dynamo.", + ) + self.dynamo_use_fullgraph = gr.Checkbox( + label="Dynamo use fullgraph", + value=self.config.get("accelerate_launch.dynamo_use_fullgraph", False), + info="Whether to use full graph mode for dynamo or it is ok to break model into several subgraphs", + ) + self.dynamo_use_dynamic = gr.Checkbox( + label="Dynamo use dynamic", + value=self.config.get("accelerate_launch.dynamo_use_dynamic", False), + info="Whether to enable dynamic shape tracing.", + ) + + with gr.Accordion("Hardware Selection", open=True): + with gr.Row(): + self.multi_gpu = gr.Checkbox( + label="Multi GPU", + value=self.config.get("accelerate_launch.multi_gpu", False), + info="Whether or not this should launch a distributed GPU training.", + ) + with gr.Accordion("Distributed GPUs", open=True): + with gr.Row(): + self.gpu_ids = gr.Textbox( + label="GPU IDs", + value=self.config.get("accelerate_launch.gpu_ids", ""), + placeholder="example: 0,1", + info=" What GPUs (by id) should be used for training on this machine as a comma-separated list", + ) + self.main_process_port = gr.Number( + label="Main process port", + value=self.config.get("accelerate_launch.main_process_port", 0), + # precision=1, + step=1, + minimum=0, + maximum=65535, + info="The port to use to communicate with the machine of rank 0.", + ) + with gr.Row(): + self.extra_accelerate_launch_args = gr.Textbox( + label="Extra accelerate launch arguments", + value=self.config.get( + "accelerate_launch.extra_accelerate_launch_args", "" + ), + placeholder="example: --same_network --machine_rank 4", + info="List of extra parameters to pass to accelerate launch", + ) + + def run_cmd(run_cmd: list, **kwargs): + if "dynamo_backend" in kwargs and kwargs.get("dynamo_backend"): + run_cmd.append("--dynamo_backend") + run_cmd.append(kwargs["dynamo_backend"]) + + if "dynamo_mode" in kwargs and kwargs.get("dynamo_mode"): + run_cmd.append("--dynamo_mode") + run_cmd.append(kwargs["dynamo_mode"]) + + if "dynamo_use_fullgraph" in kwargs and kwargs.get("dynamo_use_fullgraph"): + run_cmd.append("--dynamo_use_fullgraph") + + if "dynamo_use_dynamic" in kwargs and kwargs.get("dynamo_use_dynamic"): + run_cmd.append("--dynamo_use_dynamic") + + if "extra_accelerate_launch_args" in kwargs and kwargs["extra_accelerate_launch_args"] != "": + extra_accelerate_launch_args = kwargs["extra_accelerate_launch_args"].replace('"', "") + for arg in extra_accelerate_launch_args.split(): + run_cmd.append(shlex.quote(arg)) + + if "gpu_ids" in kwargs and kwargs.get("gpu_ids") != "": + run_cmd.append("--gpu_ids") + run_cmd.append(shlex.quote(kwargs["gpu_ids"])) + + if "main_process_port" in kwargs and kwargs.get("main_process_port", 0) > 0: + run_cmd.append("--main_process_port") + run_cmd.append(str(int(kwargs["main_process_port"]))) + + if "mixed_precision" in kwargs and kwargs.get("mixed_precision"): + run_cmd.append("--mixed_precision") + run_cmd.append(shlex.quote(kwargs["mixed_precision"])) + + if "multi_gpu" in kwargs and kwargs.get("multi_gpu"): + run_cmd.append("--multi_gpu") + + if "num_processes" in kwargs and int(kwargs.get("num_processes", 0)) > 0: + run_cmd.append("--num_processes") + run_cmd.append(str(int(kwargs["num_processes"]))) + + if "num_machines" in kwargs and int(kwargs.get("num_machines", 0)) > 0: + run_cmd.append("--num_machines") + run_cmd.append(str(int(kwargs["num_machines"]))) + + if ( + "num_cpu_threads_per_process" in kwargs + and int(kwargs.get("num_cpu_threads_per_process", 0)) > 0 + ): + run_cmd.append("--num_cpu_threads_per_process") + run_cmd.append(str(int(kwargs["num_cpu_threads_per_process"]))) + + return run_cmd diff --git a/kohya_gui/class_advanced_training.py b/kohya_gui/class_advanced_training.py new file mode 100644 index 0000000000000000000000000000000000000000..010985b47b543297b4b26f6a53d4b69f9de882a9 --- /dev/null +++ b/kohya_gui/class_advanced_training.py @@ -0,0 +1,577 @@ +import gradio as gr +from typing import Tuple +from .common_gui import ( + get_folder_path, + get_any_file_path, + list_files, + list_dirs, + create_refresh_button, + document_symbol, +) + + +class AdvancedTraining: + """ + This class configures and initializes the advanced training settings for a machine learning model, + including options for headless operation, fine-tuning, training type selection, and default directory paths. + + Attributes: + headless (bool): If True, run without the Gradio interface. + finetuning (bool): If True, enables fine-tuning of the model. + training_type (str): Specifies the type of training to perform. + no_token_padding (gr.Checkbox): Checkbox to disable token padding. + gradient_accumulation_steps (gr.Slider): Slider to set the number of gradient accumulation steps. + weighted_captions (gr.Checkbox): Checkbox to enable weighted captions. + """ + + def __init__( + self, + headless: bool = False, + finetuning: bool = False, + training_type: str = "", + config: dict = {}, + ) -> None: + """ + Initializes the AdvancedTraining class with given settings. + + Parameters: + headless (bool): Run in headless mode without GUI. + finetuning (bool): Enable model fine-tuning. + training_type (str): The type of training to be performed. + config (dict): Configuration options for the training process. + """ + self.headless = headless + self.finetuning = finetuning + self.training_type = training_type + self.config = config + + # Determine the current directories for VAE and output, falling back to defaults if not specified. + self.current_vae_dir = self.config.get("advanced.vae_dir", "./models/vae") + self.current_state_dir = self.config.get("advanced.state_dir", "./outputs") + self.current_log_tracker_config_dir = self.config.get( + "advanced.log_tracker_config_dir", "./logs" + ) + + # Define the behavior for changing noise offset type. + def noise_offset_type_change( + noise_offset_type: str, + ) -> Tuple[gr.Group, gr.Group]: + """ + Returns a tuple of Gradio Groups with visibility set based on the noise offset type. + + Parameters: + noise_offset_type (str): The selected noise offset type. + + Returns: + Tuple[gr.Group, gr.Group]: A tuple containing two Gradio Group elements with their visibility set. + """ + if noise_offset_type == "Original": + return (gr.Group(visible=True), gr.Group(visible=False)) + else: + return (gr.Group(visible=False), gr.Group(visible=True)) + + # GUI elements are only visible when not fine-tuning. + with gr.Row(visible=not finetuning): + # Exclude token padding option for LoRA training type. + if training_type != "lora": + self.no_token_padding = gr.Checkbox( + label="No token padding", + value=self.config.get("advanced.no_token_padding", False), + ) + self.gradient_accumulation_steps = gr.Slider( + label="Gradient accumulate steps", + info="Number of updates steps to accumulate before performing a backward/update pass", + value=self.config.get("advanced.gradient_accumulation_steps", 1), + minimum=1, + maximum=120, + step=1, + ) + self.weighted_captions = gr.Checkbox( + label="Weighted captions", + value=self.config.get("advanced.weighted_captions", False), + ) + with gr.Group(), gr.Row(visible=not finetuning): + self.prior_loss_weight = gr.Number( + label="Prior loss weight", + value=self.config.get("advanced.prior_loss_weight", 1.0), + ) + + def list_vae_files(path): + self.current_vae_dir = path if not path == "" else "." + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + self.vae = gr.Dropdown( + label="VAE (Optional: Path to checkpoint of vae for training)", + interactive=True, + choices=[self.config.get("advanced.vae_dir", "")] + + list_vae_files(self.current_vae_dir), + value=self.config.get("advanced.vae_dir", ""), + allow_custom_value=True, + ) + create_refresh_button( + self.vae, + lambda: None, + lambda: { + "choices": [self.config.get("advanced.vae_dir", "")] + + list_vae_files(self.current_vae_dir) + }, + "open_folder_small", + ) + self.vae_button = gr.Button( + "📂", elem_id="open_folder_small", visible=(not headless) + ) + self.vae_button.click( + get_any_file_path, + outputs=self.vae, + show_progress=False, + ) + + self.vae.change( + fn=lambda path: gr.Dropdown( + choices=[self.config.get("advanced.vae_dir", "")] + + list_vae_files(path) + ), + inputs=self.vae, + outputs=self.vae, + show_progress=False, + ) + + with gr.Row(): + self.additional_parameters = gr.Textbox( + label="Additional parameters", + placeholder='(Optional) Use to provide additional parameters not handled by the GUI. Eg: --some_parameters "value"', + value=self.config.get("advanced.additional_parameters", ""), + ) + with gr.Accordion("Scheduled Huber Loss", open=False): + with gr.Row(): + self.loss_type = gr.Dropdown( + label="Loss type", + choices=["huber", "smooth_l1", "l2"], + value=self.config.get("advanced.loss_type", "l2"), + info="The type of loss to use and whether it's scheduled based on the timestep", + ) + self.huber_schedule = gr.Dropdown( + label="Huber schedule", + choices=[ + "constant", + "exponential", + "snr", + ], + value=self.config.get("advanced.huber_schedule", "snr"), + info="The type of loss to use and whether it's scheduled based on the timestep", + ) + self.huber_c = gr.Number( + label="Huber C", + value=self.config.get("advanced.huber_c", 0.1), + minimum=0.0, + maximum=1.0, + step=0.01, + info="The huber loss parameter. Only used if one of the huber loss modes (huber or smooth l1) is selected with loss_type", + ) + + with gr.Row(): + self.save_every_n_steps = gr.Number( + label="Save every N steps", + value=self.config.get("advanced.save_every_n_steps", 0), + precision=0, + info="(Optional) The model is saved every specified steps", + ) + self.save_last_n_steps = gr.Number( + label="Save last N steps", + value=self.config.get("advanced.save_last_n_steps", 0), + precision=0, + info="(Optional) Save only the specified number of models (old models will be deleted)", + ) + self.save_last_n_steps_state = gr.Number( + label="Save last N steps state", + value=self.config.get("advanced.save_last_n_steps_state", 0), + precision=0, + info="(Optional) Save only the specified number of states (old models will be deleted)", + ) + with gr.Row(): + + def full_options_update(full_fp16, full_bf16): + full_fp16_active = True + full_bf16_active = True + + if full_fp16: + full_bf16_active = False + if full_bf16: + full_fp16_active = False + return gr.Checkbox( + interactive=full_fp16_active, + ), gr.Checkbox(interactive=full_bf16_active) + + self.keep_tokens = gr.Slider( + label="Keep n tokens", + value=self.config.get("advanced.keep_tokens", 0), + minimum=0, + maximum=32, + step=1, + ) + self.clip_skip = gr.Slider( + label="Clip skip", + value=self.config.get("advanced.clip_skip", 1), + minimum=0, + maximum=12, + step=1, + ) + self.max_token_length = gr.Dropdown( + label="Max Token Length", + choices=[ + 75, + 150, + 225, + ], + info="max token length of text encoder", + value=self.config.get("advanced.max_token_length", 75), + ) + + with gr.Row(): + if training_type == "lora": + self.fp8_base = gr.Checkbox( + label="fp8 base training (experimental)", + info="U-Net and Text Encoder can be trained with fp8 (experimental)", + value=self.config.get("advanced.fp8_base", False), + ) + self.full_fp16 = gr.Checkbox( + label="Full fp16 training (experimental)", + value=self.config.get("advanced.full_fp16", False), + ) + self.full_bf16 = gr.Checkbox( + label="Full bf16 training (experimental)", + value=self.config.get("advanced.full_bf16", False), + info="Required bitsandbytes >= 0.36.0", + ) + + self.full_fp16.change( + full_options_update, + inputs=[self.full_fp16, self.full_bf16], + outputs=[self.full_fp16, self.full_bf16], + ) + self.full_bf16.change( + full_options_update, + inputs=[self.full_fp16, self.full_bf16], + outputs=[self.full_fp16, self.full_bf16], + ) + + with gr.Row(): + self.gradient_checkpointing = gr.Checkbox( + label="Gradient checkpointing", + value=self.config.get("advanced.gradient_checkpointing", False), + ) + self.shuffle_caption = gr.Checkbox( + label="Shuffle caption", + value=self.config.get("advanced.shuffle_caption", False), + ) + self.persistent_data_loader_workers = gr.Checkbox( + label="Persistent data loader", + value=self.config.get("advanced.persistent_data_loader_workers", False), + ) + self.mem_eff_attn = gr.Checkbox( + label="Memory efficient attention", + value=self.config.get("advanced.mem_eff_attn", False), + ) + with gr.Row(): + self.xformers = gr.Dropdown( + label="CrossAttention", + choices=["none", "sdpa", "xformers"], + value=self.config.get("advanced.xformers", "xformers"), + ) + self.color_aug = gr.Checkbox( + label="Color augmentation", + value=self.config.get("advanced.color_aug", False), + info="Enable weak color augmentation", + ) + self.flip_aug = gr.Checkbox( + label="Flip augmentation", + value=getattr(self.config, "advanced.flip_aug", False), + info="Enable horizontal flip augmentation", + ) + self.masked_loss = gr.Checkbox( + label="Masked loss", + value=self.config.get("advanced.masked_loss", False), + info="Apply mask for calculating loss. conditioning_data_dir is required for dataset", + ) + with gr.Row(): + self.scale_v_pred_loss_like_noise_pred = gr.Checkbox( + label="Scale v prediction loss", + value=self.config.get( + "advanced.scale_v_pred_loss_like_noise_pred", False + ), + info="Only for SD v2 models. By scaling the loss according to the time step, the weights of global noise prediction and local noise prediction become the same, and the improvement of details may be expected.", + ) + self.min_snr_gamma = gr.Slider( + label="Min SNR gamma", + value=self.config.get("advanced.min_snr_gamma", 0), + minimum=0, + maximum=20, + step=1, + info="Recommended value of 5 when used", + ) + self.debiased_estimation_loss = gr.Checkbox( + label="Debiased Estimation loss", + value=self.config.get("advanced.debiased_estimation_loss", False), + info="Automates the processing of noise, allowing for faster model fitting, as well as balancing out color issues. Do not use if Min SNR gamma is specified.", + ) + with gr.Row(): + # self.sdpa = gr.Checkbox(label='Use sdpa', value=False, info='Use sdpa for CrossAttention') + self.bucket_no_upscale = gr.Checkbox( + label="Don't upscale bucket resolution", + value=self.config.get("advanced.bucket_no_upscale", True), + ) + self.bucket_reso_steps = gr.Slider( + label="Bucket resolution steps", + value=self.config.get("advanced.bucket_reso_steps", 64), + minimum=1, + maximum=128, + ) + self.random_crop = gr.Checkbox( + label="Random crop instead of center crop", + value=self.config.get("advanced.random_crop", False), + ) + self.v_pred_like_loss = gr.Slider( + label="V Pred like loss", + value=self.config.get("advanced.v_pred_like_loss", 0), + minimum=0, + maximum=1, + step=0.01, + info="Recommended value of 0.5 when used", + ) + + with gr.Row(): + self.min_timestep = gr.Slider( + label="Min Timestep", + value=self.config.get("advanced.min_timestep", 0), + step=1, + minimum=0, + maximum=1000, + info="Values greater than 0 will make the model more img2img focussed. 0 = image only", + ) + self.max_timestep = gr.Slider( + label="Max Timestep", + value=self.config.get("advanced.max_timestep", 1000), + step=1, + minimum=0, + maximum=1000, + info="Values lower than 1000 will make the model more img2img focussed. 1000 = noise only", + ) + + with gr.Row(): + self.noise_offset_type = gr.Dropdown( + label="Noise offset type", + choices=[ + "Original", + "Multires", + ], + value=self.config.get("advanced.noise_offset_type", "Original"), + scale=1, + ) + with gr.Row(visible=True) as self.noise_offset_original: + self.noise_offset = gr.Slider( + label="Noise offset", + value=self.config.get("advanced.noise_offset", 0), + minimum=0, + maximum=1, + step=0.01, + info="Recommended values are 0.05 - 0.15", + ) + self.noise_offset_random_strength = gr.Checkbox( + label="Noise offset random strength", + value=self.config.get( + "advanced.noise_offset_random_strength", False + ), + info="Use random strength between 0~noise_offset for noise offset", + ) + self.adaptive_noise_scale = gr.Slider( + label="Adaptive noise scale", + value=self.config.get("advanced.adaptive_noise_scale", 0), + minimum=-1, + maximum=1, + step=0.001, + info="Add `latent mean absolute value * this value` to noise_offset", + ) + with gr.Row(visible=False) as self.noise_offset_multires: + self.multires_noise_iterations = gr.Slider( + label="Multires noise iterations", + value=self.config.get("advanced.multires_noise_iterations", 0), + minimum=0, + maximum=64, + step=1, + info="Enable multires noise (recommended values are 6-10)", + ) + self.multires_noise_discount = gr.Slider( + label="Multires noise discount", + value=self.config.get("advanced.multires_noise_discount", 0.3), + minimum=0, + maximum=1, + step=0.01, + info="Recommended values are 0.8. For LoRAs with small datasets, 0.1-0.3", + ) + with gr.Row(visible=True): + self.ip_noise_gamma = gr.Slider( + label="IP noise gamma", + value=self.config.get("advanced.ip_noise_gamma", 0), + minimum=0, + maximum=1, + step=0.01, + info="enable input perturbation noise. used for regularization. recommended value: around 0.1", + ) + self.ip_noise_gamma_random_strength = gr.Checkbox( + label="IP noise gamma random strength", + value=self.config.get( + "advanced.ip_noise_gamma_random_strength", False + ), + info="Use random strength between 0~ip_noise_gamma for input perturbation noise", + ) + self.noise_offset_type.change( + noise_offset_type_change, + inputs=[self.noise_offset_type], + outputs=[ + self.noise_offset_original, + self.noise_offset_multires, + ], + ) + with gr.Row(): + self.caption_dropout_every_n_epochs = gr.Number( + label="Dropout caption every n epochs", + value=self.config.get("advanced.caption_dropout_every_n_epochs", 0), + ) + self.caption_dropout_rate = gr.Slider( + label="Rate of caption dropout", + value=self.config.get("advanced.caption_dropout_rate", 0), + minimum=0, + maximum=1, + ) + self.vae_batch_size = gr.Slider( + label="VAE batch size", + minimum=0, + maximum=32, + value=self.config.get("advanced.vae_batch_size", 0), + step=1, + ) + with gr.Group(), gr.Row(): + self.save_state = gr.Checkbox( + label="Save training state", + value=self.config.get("advanced.save_state", False), + info="Save training state (including optimizer states etc.) when saving models" + ) + + self.save_state_on_train_end = gr.Checkbox( + label="Save training state at end of training", + value=self.config.get("advanced.save_state_on_train_end", False), + info="Save training state (including optimizer states etc.) on train end" + ) + + def list_state_dirs(path): + self.current_state_dir = path if not path == "" else "." + return list(list_dirs(path)) + + self.resume = gr.Dropdown( + label='Resume from saved training state (path to "last-state" state folder)', + choices=[self.config.get("advanced.state_dir", "")] + + list_state_dirs(self.current_state_dir), + value=self.config.get("advanced.state_dir", ""), + interactive=True, + allow_custom_value=True, + info="Saved state to resume training from" + ) + create_refresh_button( + self.resume, + lambda: None, + lambda: { + "choices": [self.config.get("advanced.state_dir", "")] + + list_state_dirs(self.current_state_dir) + }, + "open_folder_small", + ) + self.resume_button = gr.Button( + "📂", elem_id="open_folder_small", visible=(not headless) + ) + self.resume_button.click( + get_folder_path, + outputs=self.resume, + show_progress=False, + ) + self.resume.change( + fn=lambda path: gr.Dropdown( + choices=[self.config.get("advanced.state_dir", "")] + + list_state_dirs(path) + ), + inputs=self.resume, + outputs=self.resume, + show_progress=False, + ) + self.max_data_loader_n_workers = gr.Number( + label="Max num workers for DataLoader", + info="Override number of epoch. Default: 0", + step=1, + minimum=0, + value=self.config.get("advanced.max_data_loader_n_workers", 0), + ) + with gr.Row(): + self.log_with = gr.Dropdown( + label="Logging", + choices=["","wandb", "tensorboard","all"], + value="", + info="Loggers to use, tensorboard will be used as the default.", + ) + self.wandb_api_key = gr.Textbox( + label="WANDB API Key", + value=self.config.get("advanced.wandb_api_key", ""), + placeholder="(Optional)", + info="Users can obtain and/or generate an api key in the their user settings on the website: https://wandb.ai/login", + ) + self.wandb_run_name = gr.Textbox( + label="WANDB run name", + value=self.config.get("advanced.wandb_run_name", ""), + placeholder="(Optional)", + info="The name of the specific wandb session", + ) + with gr.Group(), gr.Row(): + + def list_log_tracker_config_files(path): + self.current_log_tracker_config_dir = path if not path == "" else "." + return list(list_files(path, exts=[".json"], all=True)) + + self.log_tracker_name = gr.Textbox( + label="Log tracker name", + value=self.config.get("advanced.log_tracker_name", ""), + placeholder="(Optional)", + info="Name of tracker to use for logging, default is script-specific default name", + ) + self.log_tracker_config = gr.Dropdown( + label="Log tracker config", + choices=[self.config.get("log_tracker_config_dir", "")] + + list_log_tracker_config_files(self.current_log_tracker_config_dir), + value=self.config.get("log_tracker_config_dir", ""), + info="Path to tracker config file to use for logging", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + self.log_tracker_config, + lambda: None, + lambda: { + "choices": [self.config.get("log_tracker_config_dir", "")] + + list_log_tracker_config_files(self.current_log_tracker_config_dir) + }, + "open_folder_small", + ) + self.log_tracker_config_button = gr.Button( + document_symbol, elem_id="open_folder_small", visible=(not headless) + ) + self.log_tracker_config_button.click( + get_any_file_path, + outputs=self.log_tracker_config, + show_progress=False, + ) + self.log_tracker_config.change( + fn=lambda path: gr.Dropdown( + choices=[self.config.get("log_tracker_config_dir", "")] + + list_log_tracker_config_files(path) + ), + inputs=self.log_tracker_config, + outputs=self.log_tracker_config, + show_progress=False, + ) diff --git a/kohya_gui/class_basic_training.py b/kohya_gui/class_basic_training.py new file mode 100644 index 0000000000000000000000000000000000000000..430d49132f6b02923e4321a9146a68410780bd34 --- /dev/null +++ b/kohya_gui/class_basic_training.py @@ -0,0 +1,417 @@ +import gradio as gr +from typing import Tuple +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +class BasicTraining: + """ + This class configures and initializes the basic training settings for a machine learning model, + including options for SDXL, learning rate, learning rate scheduler, and training epochs. + + Attributes: + sdxl_checkbox (gr.Checkbox): Checkbox to enable SDXL training. + learning_rate_value (str): Initial learning rate value. + lr_scheduler_value (str): Initial learning rate scheduler value. + lr_warmup_value (str): Initial learning rate warmup value. + finetuning (bool): If True, enables fine-tuning of the model. + dreambooth (bool): If True, enables Dreambooth training. + """ + + def __init__( + self, + sdxl_checkbox: gr.Checkbox, + learning_rate_value: float = "1e-6", + lr_scheduler_value: str = "constant", + lr_warmup_value: float = "0", + finetuning: bool = False, + dreambooth: bool = False, + config: dict = {}, + ) -> None: + """ + Initializes the BasicTraining object with the given parameters. + + Args: + sdxl_checkbox (gr.Checkbox): Checkbox to enable SDXL training. + learning_rate_value (str): Initial learning rate value. + lr_scheduler_value (str): Initial learning rate scheduler value. + lr_warmup_value (str): Initial learning rate warmup value. + finetuning (bool): If True, enables fine-tuning of the model. + dreambooth (bool): If True, enables Dreambooth training. + """ + self.sdxl_checkbox = sdxl_checkbox + self.learning_rate_value = learning_rate_value + self.lr_scheduler_value = lr_scheduler_value + self.lr_warmup_value = lr_warmup_value + self.finetuning = finetuning + self.dreambooth = dreambooth + self.config = config + self.old_lr_warmup = 0 + + # Initialize the UI components + self.initialize_ui_components() + + def initialize_ui_components(self) -> None: + """ + Initializes the UI components for the training settings. + """ + # Initialize the training controls + self.init_training_controls() + # Initialize the precision and resources controls + self.init_precision_and_resources_controls() + # Initialize the learning rate and optimizer controls + self.init_lr_and_optimizer_controls() + # Initialize the gradient and learning rate controls + self.init_grad_and_lr_controls() + # Initialize the learning rate controls + self.init_learning_rate_controls() + # Initialize the scheduler controls + self.init_scheduler_controls() + # Initialize the resolution and bucket controls + self.init_resolution_and_bucket_controls() + # Setup the behavior of the SDXL checkbox + self.setup_sdxl_checkbox_behavior() + + def init_training_controls(self) -> None: + """ + Initializes the training controls for the model. + """ + # Create a row for the training controls + with gr.Row(): + # Initialize the train batch size slider + self.train_batch_size = gr.Slider( + minimum=1, + maximum=64, + label="Train batch size", + value=1, + step=self.config.get("basic.train_batch_size", 1), + ) + # Initialize the epoch number input + self.epoch = gr.Number( + label="Epoch", value=self.config.get("basic.epoch", 1), precision=0 + ) + # Initialize the maximum train epochs input + self.max_train_epochs = gr.Number( + label="Max train epoch", + info="training epochs (overrides max_train_steps). 0 = no override", + step=1, + # precision=0, + minimum=0, + value=self.config.get("basic.max_train_epochs", 0), + ) + # Initialize the maximum train steps input + self.max_train_steps = gr.Number( + label="Max train steps", + info="Overrides # training steps. 0 = no override", + step=1, + # precision=0, + value=self.config.get("basic.max_train_steps", 1600), + ) + # Initialize the save every N epochs input + self.save_every_n_epochs = gr.Number( + label="Save every N epochs", + value=self.config.get("basic.save_every_n_epochs", 1), + precision=0, + ) + # Initialize the caption extension input + self.caption_extension = gr.Dropdown( + label="Caption file extension", + choices=["", ".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + ) + + def init_precision_and_resources_controls(self) -> None: + """ + Initializes the precision and resources controls for the model. + """ + with gr.Row(): + # Initialize the seed textbox + self.seed = gr.Number( + label="Seed", + # precision=0, + step=1, + minimum=0, + value=self.config.get("basic.seed", 0), + info="Set to 0 to make random", + ) + # Initialize the cache latents checkbox + self.cache_latents = gr.Checkbox( + label="Cache latents", + value=self.config.get("basic.cache_latents", True), + ) + # Initialize the cache latents to disk checkbox + self.cache_latents_to_disk = gr.Checkbox( + label="Cache latents to disk", + value=self.config.get("basic.cache_latents_to_disk", False), + ) + + def init_lr_and_optimizer_controls(self) -> None: + """ + Initializes the learning rate and optimizer controls for the model. + """ + with gr.Row(): + # Initialize the learning rate scheduler dropdown + self.lr_scheduler = gr.Dropdown( + label="LR Scheduler", + choices=[ + "adafactor", + "constant", + "constant_with_warmup", + "cosine", + "cosine_with_restarts", + "linear", + "polynomial", + ], + value=self.config.get("basic.lr_scheduler", self.lr_scheduler_value), + ) + + + + # Initialize the optimizer dropdown + self.optimizer = gr.Dropdown( + label="Optimizer", + choices=[ + "AdamW", + "AdamW8bit", + "Adafactor", + "DAdaptation", + "DAdaptAdaGrad", + "DAdaptAdam", + "DAdaptAdan", + "DAdaptAdanIP", + "DAdaptAdamPreprint", + "DAdaptLion", + "DAdaptSGD", + "Lion", + "Lion8bit", + "PagedAdamW8bit", + "PagedAdamW32bit", + "PagedLion8bit", + "Prodigy", + "SGDNesterov", + "SGDNesterov8bit", + ], + value=self.config.get("basic.optimizer", "AdamW8bit"), + interactive=True, + ) + + def init_grad_and_lr_controls(self) -> None: + """ + Initializes the gradient and learning rate controls for the model. + """ + with gr.Row(): + # Initialize the maximum gradient norm slider + self.max_grad_norm = gr.Slider( + label="Max grad norm", + value=self.config.get("basic.max_grad_norm", 1.0), + minimum=0.0, + maximum=1.0, + interactive=True, + ) + # Initialize the learning rate scheduler extra arguments textbox + self.lr_scheduler_args = gr.Textbox( + label="LR scheduler extra arguments", + lines=2, + placeholder="(Optional) eg: milestones=[1,10,30,50] gamma=0.1", + value=self.config.get("basic.lr_scheduler_args", ""), + ) + # Initialize the optimizer extra arguments textbox + self.optimizer_args = gr.Textbox( + label="Optimizer extra arguments", + lines=2, + placeholder="(Optional) eg: relative_step=True scale_parameter=True warmup_init=True", + value=self.config.get("basic.optimizer_args", ""), + ) + + def init_learning_rate_controls(self) -> None: + """ + Initializes the learning rate controls for the model. + """ + with gr.Row(): + # Adjust visibility based on training modes + lr_label = ( + "Learning rate Unet" + if self.finetuning or self.dreambooth + else "Learning rate" + ) + # Initialize the learning rate number input + self.learning_rate = gr.Number( + label=lr_label, + value=self.config.get("basic.learning_rate", self.learning_rate_value), + minimum=0, + maximum=1, + info="Set to 0 to not train the Unet", + ) + # Initialize the learning rate TE number input + self.learning_rate_te = gr.Number( + label="Learning rate TE", + value=self.config.get( + "basic.learning_rate_te", self.learning_rate_value + ), + visible=self.finetuning or self.dreambooth, + minimum=0, + maximum=1, + info="Set to 0 to not train the Text Encoder", + ) + # Initialize the learning rate TE1 number input + self.learning_rate_te1 = gr.Number( + label="Learning rate TE1", + value=self.config.get( + "basic.learning_rate_te1", self.learning_rate_value + ), + visible=False, + minimum=0, + maximum=1, + info="Set to 0 to not train the Text Encoder 1", + ) + # Initialize the learning rate TE2 number input + self.learning_rate_te2 = gr.Number( + label="Learning rate TE2", + value=self.config.get( + "basic.learning_rate_te2", self.learning_rate_value + ), + visible=False, + minimum=0, + maximum=1, + info="Set to 0 to not train the Text Encoder 2", + ) + # Initialize the learning rate warmup slider + self.lr_warmup = gr.Slider( + label="LR warmup (% of total steps)", + value=self.config.get("basic.lr_warmup", self.lr_warmup_value), + minimum=0, + maximum=100, + step=1, + ) + + def lr_scheduler_changed(scheduler, value): + if scheduler == "constant": + self.old_lr_warmup = value + value = 0 + interactive=False + info="Can't use LR warmup with LR Scheduler constant... setting to 0 and disabling field..." + else: + if self.old_lr_warmup != 0: + value = self.old_lr_warmup + self.old_lr_warmup = 0 + interactive=True + info="" + return gr.Slider(value=value, interactive=interactive, info=info) + + self.lr_scheduler.change( + lr_scheduler_changed, + inputs=[self.lr_scheduler, self.lr_warmup], + outputs=self.lr_warmup, + ) + + def init_scheduler_controls(self) -> None: + """ + Initializes the scheduler controls for the model. + """ + with gr.Row(visible=not self.finetuning): + # Initialize the learning rate scheduler number of cycles textbox + self.lr_scheduler_num_cycles = gr.Number( + label="LR # cycles", + minimum=1, + # precision=0, # round to nearest integer + step=1, # Increment value by 1 + info="Number of restarts for cosine scheduler with restarts", + value=self.config.get("basic.lr_scheduler_num_cycles", 1), + ) + # Initialize the learning rate scheduler power textbox + self.lr_scheduler_power = gr.Number( + label="LR power", + minimum=0.0, + step=0.01, + info="Polynomial power for polynomial scheduler", + value=self.config.get("basic.lr_scheduler_power", 1.0), + ) + + def init_resolution_and_bucket_controls(self) -> None: + """ + Initializes the resolution and bucket controls for the model. + """ + with gr.Row(visible=not self.finetuning): + # Initialize the maximum resolution textbox + self.max_resolution = gr.Textbox( + label="Max resolution", + value=self.config.get("basic.max_resolution", "512,512"), + placeholder="512,512", + ) + # Initialize the stop text encoder training slider + self.stop_text_encoder_training = gr.Slider( + minimum=-1, + maximum=100, + value=self.config.get("basic.stop_text_encoder_training", 0), + step=1, + label="Stop TE (% of total steps)", + ) + # Initialize the enable buckets checkbox + self.enable_bucket = gr.Checkbox( + label="Enable buckets", + value=self.config.get("basic.enable_bucket", True), + ) + # Initialize the minimum bucket resolution slider + self.min_bucket_reso = gr.Slider( + label="Minimum bucket resolution", + value=self.config.get("basic.min_bucket_reso", 256), + minimum=64, + maximum=4096, + step=64, + info="Minimum size in pixel a bucket can be (>= 64)", + ) + # Initialize the maximum bucket resolution slider + self.max_bucket_reso = gr.Slider( + label="Maximum bucket resolution", + value=self.config.get("basic.max_bucket_reso", 2048), + minimum=64, + maximum=4096, + step=64, + info="Maximum size in pixel a bucket can be (>= 64)", + ) + + def setup_sdxl_checkbox_behavior(self) -> None: + """ + Sets up the behavior of the SDXL checkbox based on the finetuning and dreambooth flags. + """ + self.sdxl_checkbox.change( + self.update_learning_rate_te, + inputs=[ + self.sdxl_checkbox, + gr.Checkbox(value=self.finetuning, visible=False), + gr.Checkbox(value=self.dreambooth, visible=False), + ], + outputs=[ + self.learning_rate_te, + self.learning_rate_te1, + self.learning_rate_te2, + ], + ) + + def update_learning_rate_te( + self, + sdxl_checkbox: gr.Checkbox, + finetuning: bool, + dreambooth: bool, + ) -> Tuple[gr.Number, gr.Number, gr.Number]: + """ + Updates the visibility of the learning rate TE, TE1, and TE2 based on the SDXL checkbox and finetuning/dreambooth flags. + + Args: + sdxl_checkbox (gr.Checkbox): The SDXL checkbox. + finetuning (bool): Whether finetuning is enabled. + dreambooth (bool): Whether dreambooth is enabled. + + Returns: + Tuple[gr.Number, gr.Number, gr.Number]: A tuple containing the updated visibility for learning rate TE, TE1, and TE2. + """ + # Determine the visibility condition based on finetuning and dreambooth flags + visibility_condition = finetuning or dreambooth + # Return a tuple of gr.Number instances with updated visibility + return ( + gr.Number(visible=(not sdxl_checkbox and visibility_condition)), + gr.Number(visible=(sdxl_checkbox and visibility_condition)), + gr.Number(visible=(sdxl_checkbox and visibility_condition)), + ) diff --git a/kohya_gui/class_command_executor.py b/kohya_gui/class_command_executor.py new file mode 100644 index 0000000000000000000000000000000000000000..af4f81692ff0d4a85dcbc776b776d926834bab01 --- /dev/null +++ b/kohya_gui/class_command_executor.py @@ -0,0 +1,93 @@ +import subprocess +import psutil +import time +import gradio as gr + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +class CommandExecutor: + """ + A class to execute and manage commands. + """ + + def __init__(self, headless: bool = False): + """ + Initialize the CommandExecutor. + """ + self.headless = headless + self.process = None + + with gr.Row(): + self.button_run = gr.Button("Start training", variant="primary") + + self.button_stop_training = gr.Button( + "Stop training", visible=self.process is not None or headless, variant="stop" + ) + + def execute_command(self, run_cmd: str, **kwargs): + """ + Execute a command if no other command is currently running. + + Parameters: + - run_cmd (str): The command to execute. + - **kwargs: Additional keyword arguments to pass to subprocess.Popen. + """ + if self.process and self.process.poll() is None: + log.info("The command is already running. Please wait for it to finish.") + else: + # for i, item in enumerate(run_cmd): + # log.info(f"{i}: {item}") + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Execute the command securely + self.process = subprocess.Popen(run_cmd, **kwargs) + log.info("Command executed.") + + def kill_command(self): + """ + Kill the currently running command and its child processes. + """ + if self.is_running(): + try: + # Get the parent process and kill all its children + parent = psutil.Process(self.process.pid) + for child in parent.children(recursive=True): + child.kill() + parent.kill() + log.info("The running process has been terminated.") + except psutil.NoSuchProcess: + # Explicitly handle the case where the process does not exist + log.info( + "The process does not exist. It might have terminated before the kill command was issued." + ) + except Exception as e: + # General exception handling for any other errors + log.info(f"Error when terminating process: {e}") + else: + self.process = None + log.info("There is no running process to kill.") + + return gr.Button(visible=True), gr.Button(visible=False or self.headless) + + def wait_for_training_to_end(self): + while self.is_running(): + time.sleep(1) + log.debug("Waiting for training to end...") + log.info("Training has ended.") + return gr.Button(visible=True), gr.Button(visible=False or self.headless) + + def is_running(self): + """ + Check if the command is currently running. + + Returns: + - bool: True if the command is running, False otherwise. + """ + return self.process is not None and self.process.poll() is None diff --git a/kohya_gui/class_configuration_file.py b/kohya_gui/class_configuration_file.py new file mode 100644 index 0000000000000000000000000000000000000000..b65de3a68c733a683a22b24c41c1ed98db21673d --- /dev/null +++ b/kohya_gui/class_configuration_file.py @@ -0,0 +1,103 @@ +import gradio as gr +import os +from .common_gui import list_files, scriptdir, create_refresh_button +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +class ConfigurationFile: + """ + A class to handle configuration file operations in the GUI. + """ + + def __init__( + self, headless: bool = False, config_dir: str = None, config: dict = {} + ): + """ + Initialize the ConfigurationFile class. + + Parameters: + - headless (bool): Whether to run in headless mode. + - config_dir (str): The directory for configuration files. + """ + + self.headless = headless + + self.config = config + + # Sets the directory for storing configuration files, defaults to a 'presets' folder within the script directory. + self.current_config_dir = self.config.get( + "config_dir", os.path.join(scriptdir, "presets") + ) + + # Initialize the GUI components for configuration. + self.create_config_gui() + + def list_config_dir(self, path: str) -> list: + """ + List directories in the data directory. + + Parameters: + - path (str): The path to list directories from. + + Returns: + - list: A list of directories. + """ + self.current_config_dir = path if not path == "" else "." + # Lists all .json files in the current configuration directory, used for populating dropdown choices. + return list(list_files(self.current_config_dir, exts=[".json"], all=True)) + + def create_config_gui(self) -> None: + """ + Create the GUI for configuration file operations. + """ + # Starts a new group in the GUI for better layout organization. + with gr.Group(): + # Creates a row within the group to align elements horizontally. + with gr.Row(): + # Dropdown for selecting or entering the name of a configuration file. + self.config_file_name = gr.Dropdown( + label="Load/Save Config file", + choices=[self.config.get("config_dir", "")] + self.list_config_dir(self.current_config_dir), + value=self.config.get("config_dir", ""), + interactive=True, + allow_custom_value=True, + ) + + # Button to refresh the list of configuration files in the dropdown. + create_refresh_button( + self.config_file_name, + lambda: None, # Placeholder for potential future functionality. + lambda: { + "choices": [""] + self.list_config_dir(self.current_config_dir) + }, + "open_folder_small", + ) + + # Buttons for opening, saving, and loading configuration files, displayed conditionally based on headless mode. + self.button_open_config = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + self.button_save_config = gr.Button( + "💾", + elem_id="open_folder_small", + elem_classes=["tool"], + ) + self.button_load_config = gr.Button( + "↩️ ", + elem_id="open_folder_small", + elem_classes=["tool"], + ) + + # Handler for change events on the configuration file dropdown, allowing dynamic update of choices. + self.config_file_name.change( + fn=lambda path: gr.Dropdown(choices=[""] + self.list_config_dir(path)), + inputs=self.config_file_name, + outputs=self.config_file_name, + show_progress=False, + ) diff --git a/kohya_gui/class_folders.py b/kohya_gui/class_folders.py new file mode 100644 index 0000000000000000000000000000000000000000..621319f163483d104b685e2c1d72d4581f9fa878 --- /dev/null +++ b/kohya_gui/class_folders.py @@ -0,0 +1,224 @@ +import gradio as gr +import os +from .common_gui import get_folder_path, scriptdir, list_dirs, create_refresh_button + + +class Folders: + """ + A class to handle folder operations in the GUI. + """ + + def __init__( + self, finetune: bool = False, headless: bool = False, config: dict = {} + ): + """ + Initialize the Folders class. + + Parameters: + - finetune (bool): Whether to finetune the model. + - headless (bool): Whether to run in headless mode. + """ + self.headless = headless + self.finetune = finetune + + # Load kohya_ss GUI configs from config.toml if it exist + self.config = config + + # Set default directories if not provided + self.current_output_dir = self.config.get( + "output_dir", os.path.join(scriptdir, "outputs") + ) + self.current_logging_dir = self.config.get( + "logging_dir", os.path.join(scriptdir, "logs") + ) + self.current_reg_data_dir = self.config.get( + "reg_data_dir", os.path.join(scriptdir, "reg") + ) + + # Create directories if they don't exist + self.create_directory_if_not_exists(self.current_output_dir) + self.create_directory_if_not_exists(self.current_logging_dir) + + # Create the GUI for folder selection + self.create_folders_gui() + + def create_directory_if_not_exists(self, directory: str) -> None: + """ + Create a directory if it does not exist. + + Parameters: + - directory (str): The directory to create. + """ + if ( + directory is not None + and directory.strip() != "" + and not os.path.exists(directory) + ): + os.makedirs(directory, exist_ok=True) + + def list_output_dirs(self, path: str) -> list: + """ + List directories in the output directory. + + Parameters: + - path (str): The path to list directories from. + + Returns: + - list: A list of directories. + """ + self.current_output_dir = path if not path == "" else "." + return list(list_dirs(path)) + + def list_logging_dirs(self, path: str) -> list: + """ + List directories in the logging directory. + + Parameters: + - path (str): The path to list directories from. + + Returns: + - list: A list of directories. + """ + self.current_logging_dir = path if not path == "" else "." + return list(list_dirs(path)) + + def list_reg_data_dirs(self, path: str) -> list: + """ + List directories in the regularization data directory. + + Parameters: + - path (str): The path to list directories from. + + Returns: + - list: A list of directories. + """ + self.current_reg_data_dir = path if not path == "" else "." + return list(list_dirs(path)) + + def create_folders_gui(self) -> None: + """ + Create the GUI for folder selection. + """ + with gr.Row(): + # Output directory dropdown + self.output_dir = gr.Dropdown( + label="Output directory for trained model", + choices=[self.config.get("folders.output_dir", "")] + self.list_output_dirs(self.current_output_dir), + value=self.config.get("folders.output_dir", ""), + interactive=True, + allow_custom_value=True, + ) + # Refresh button for output directory + create_refresh_button( + self.output_dir, + lambda: None, + lambda: { + "choices": [""] + self.list_output_dirs(self.current_output_dir) + }, + "open_folder_small", + ) + # Output directory button + self.output_dir_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + # Output directory button click event + self.output_dir_folder.click( + get_folder_path, + outputs=self.output_dir, + show_progress=False, + ) + + # Regularisation directory dropdown + self.reg_data_dir = gr.Dropdown( + label=( + "Regularisation directory (Optional. containing regularisation images)" + if not self.finetune + else "Train config directory (Optional. where config files will be saved)" + ), + choices=[self.config.get("folders.reg_data_dir", "")] + self.list_reg_data_dirs(self.current_reg_data_dir), + value=self.config.get("folders.reg_data_dir", ""), + interactive=True, + allow_custom_value=True, + ) + # Refresh button for regularisation directory + create_refresh_button( + self.reg_data_dir, + lambda: None, + lambda: { + "choices": [""] + self.list_reg_data_dirs(self.current_reg_data_dir) + }, + "open_folder_small", + ) + # Regularisation directory button + self.reg_data_dir_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + # Regularisation directory button click event + self.reg_data_dir_folder.click( + get_folder_path, + outputs=self.reg_data_dir, + show_progress=False, + ) + with gr.Row(): + # Logging directory dropdown + self.logging_dir = gr.Dropdown( + label="Logging directory (Optional. to enable logging and output Tensorboard log)", + choices=[self.config.get("folders.logging_dir", "")] + self.list_logging_dirs(self.current_logging_dir), + value=self.config.get("folders.logging_dir", ""), + interactive=True, + allow_custom_value=True, + ) + # Refresh button for logging directory + create_refresh_button( + self.logging_dir, + lambda: None, + lambda: { + "choices": [""] + self.list_logging_dirs(self.current_logging_dir) + }, + "open_folder_small", + ) + # Logging directory button + self.logging_dir_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + # Logging directory button click event + self.logging_dir_folder.click( + get_folder_path, + outputs=self.logging_dir, + show_progress=False, + ) + + # Change event for output directory dropdown + self.output_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + self.list_output_dirs(path)), + inputs=self.output_dir, + outputs=self.output_dir, + show_progress=False, + ) + # Change event for regularisation directory dropdown + self.reg_data_dir.change( + fn=lambda path: gr.Dropdown( + choices=[""] + self.list_reg_data_dirs(path) + ), + inputs=self.reg_data_dir, + outputs=self.reg_data_dir, + show_progress=False, + ) + # Change event for logging directory dropdown + self.logging_dir.change( + fn=lambda path: gr.Dropdown( + choices=[""] + self.list_logging_dirs(path) + ), + inputs=self.logging_dir, + outputs=self.logging_dir, + show_progress=False, + ) diff --git a/kohya_gui/class_gui_config.py b/kohya_gui/class_gui_config.py new file mode 100644 index 0000000000000000000000000000000000000000..456dbd45663e22269638582c14a903a388db1216 --- /dev/null +++ b/kohya_gui/class_gui_config.py @@ -0,0 +1,93 @@ +import toml +from .common_gui import scriptdir +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +class KohyaSSGUIConfig: + """ + A class to handle the configuration for the Kohya SS GUI. + """ + + def __init__(self, config_file_path: str = "./config.toml"): + """ + Initialize the KohyaSSGUIConfig class. + """ + self.config = self.load_config(config_file_path=config_file_path) + + def load_config(self, config_file_path: str = "./config.toml") -> dict: + """ + Loads the Kohya SS GUI configuration from a TOML file. + + Returns: + dict: The configuration data loaded from the TOML file. + """ + try: + # Attempt to load the TOML configuration file from the specified directory. + config = toml.load(f"{config_file_path}") + log.debug(f"Loaded configuration from {config_file_path}") + except FileNotFoundError: + # If the config file is not found, initialize `config` as an empty dictionary to handle missing configurations gracefully. + config = {} + log.debug( + f"No configuration file found at {config_file_path}. Initializing empty configuration." + ) + + return config + + def save_config(self, config: dict, config_file_path: str = "./config.toml"): + """ + Saves the Kohya SS GUI configuration to a TOML file. + + Parameters: + - config (dict): The configuration data to save. + """ + # Write the configuration data to the TOML file + with open(f"{config_file_path}", "w", encoding="utf-8") as f: + toml.dump(config, f) + + def get(self, key: str, default=None): + """ + Retrieves the value of a specified key from the configuration data. + + Parameters: + - key (str): The key to retrieve the value for. + - default: The default value to return if the key is not found. + + Returns: + The value associated with the key, or the default value if the key is not found. + """ + # Split the key into a list of keys if it contains a dot (.) + keys = key.split(".") + # Initialize `data` with the entire configuration data + data = self.config + + # Iterate over the keys to access nested values + for k in keys: + log.debug(k) + # If the key is not found in the current data, return the default value + if k not in data: + log.debug( + f"Key '{key}' not found in configuration. Returning default value." + ) + return default + + # Update `data` to the value associated with the current key + data = data.get(k) + + # Return the final value + log.debug(f"Returned {data}") + return data + + def is_config_loaded(self) -> bool: + """ + Checks if the configuration was loaded from a file. + + Returns: + bool: True if the configuration was loaded from a file, False otherwise. + """ + is_loaded = self.config != {} + log.debug(f"Configuration was loaded from file: {is_loaded}") + return is_loaded diff --git a/kohya_gui/class_huggingface.py b/kohya_gui/class_huggingface.py new file mode 100644 index 0000000000000000000000000000000000000000..4a2e25db6bbd05c754ea53b7f0bc4cc54c51ca8d --- /dev/null +++ b/kohya_gui/class_huggingface.py @@ -0,0 +1,82 @@ +import gradio as gr +import toml +from .class_gui_config import KohyaSSGUIConfig + +class HuggingFace: + def __init__( + self, + config: KohyaSSGUIConfig, + ) -> None: + self.config = config + + # Initialize the UI components + self.initialize_ui_components() + + def initialize_ui_components(self) -> None: + # --huggingface_repo_id HUGGINGFACE_REPO_ID + # huggingface repo name to upload / huggingfaceにアップロードするリポジトリ名 + # --huggingface_repo_type HUGGINGFACE_REPO_TYPE + # huggingface repo type to upload / huggingfaceにアップロードするリポジトリの種類 + # --huggingface_path_in_repo HUGGINGFACE_PATH_IN_REPO + # huggingface model path to upload files / huggingfaceにアップロードするファイルのパス + # --huggingface_token HUGGINGFACE_TOKEN + # huggingface token / huggingfaceのトークン + # --huggingface_repo_visibility HUGGINGFACE_REPO_VISIBILITY + # huggingface repository visibility ('public' for public, 'private' or None for private) / huggingfaceにアップロードするリポジトリの公開設定('public'で公開、'private'またはNoneで非公開) + # --save_state_to_huggingface + # save state to huggingface / huggingfaceにstateを保存する + # --resume_from_huggingface + # resume from huggingface (ex: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type}) / huggingfaceから学習を再開する(例: --resume {repo_id}/{path_in_repo}:{revision}:{repo_type}) + # --async_upload upload to huggingface asynchronously / huggingfaceに非同期でアップロードする + with gr.Row(): + self.huggingface_repo_id = gr.Textbox( + label="Huggingface repo id", + placeholder="huggingface repo id", + value=self.config.get("huggingface.repo_id", ""), + ) + + self.huggingface_token = gr.Textbox( + label="Huggingface token", + placeholder="huggingface token", + value=self.config.get("huggingface.token", ""), + ) + + with gr.Row(): + # Repository settings + self.huggingface_repo_type = gr.Textbox( + label="Huggingface repo type", + placeholder="huggingface repo type", + value=self.config.get("huggingface.repo_type", ""), + ) + + self.huggingface_repo_visibility = gr.Textbox( + label="Huggingface repo visibility", + placeholder="huggingface repo visibility", + value=self.config.get("huggingface.repo_visibility", ""), + ) + + with gr.Row(): + # File location in the repository + self.huggingface_path_in_repo = gr.Textbox( + label="Huggingface path in repo", + placeholder="huggingface path in repo", + value=self.config.get("huggingface.path_in_repo", ""), + ) + + with gr.Row(): + # Functions + self.save_state_to_huggingface = gr.Checkbox( + label="Save state to huggingface", + value=self.config.get("huggingface.save_state_to_huggingface", False), + ) + + self.resume_from_huggingface = gr.Textbox( + label="Resume from huggingface", + placeholder="resume from huggingface", + value=self.config.get("huggingface.resume_from_huggingface", ""), + ) + + self.async_upload = gr.Checkbox( + label="Async upload", + value=self.config.get("huggingface.async_upload", False), + ) \ No newline at end of file diff --git a/kohya_gui/class_lora_tab.py b/kohya_gui/class_lora_tab.py new file mode 100644 index 0000000000000000000000000000000000000000..81334489297ab125b072ff1c08fb250b4deee391 --- /dev/null +++ b/kohya_gui/class_lora_tab.py @@ -0,0 +1,27 @@ +import gradio as gr +from .merge_lora_gui import GradioMergeLoRaTab +from .svd_merge_lora_gui import gradio_svd_merge_lora_tab +from .verify_lora_gui import gradio_verify_lora_tab +from .resize_lora_gui import gradio_resize_lora_tab +from .extract_lora_gui import gradio_extract_lora_tab +from .convert_lcm_gui import gradio_convert_lcm_tab +from .extract_lycoris_locon_gui import gradio_extract_lycoris_locon_tab +from .extract_lora_from_dylora_gui import gradio_extract_dylora_tab +from .merge_lycoris_gui import gradio_merge_lycoris_tab + + +class LoRATools: + def __init__( + self, + headless: bool = False, + ): + gr.Markdown("This section provide various LoRA tools...") + gradio_extract_dylora_tab(headless=headless) + gradio_convert_lcm_tab(headless=headless) + gradio_extract_lora_tab(headless=headless) + gradio_extract_lycoris_locon_tab(headless=headless) + gradio_merge_lora_tab = GradioMergeLoRaTab() + gradio_merge_lycoris_tab(headless=headless) + gradio_svd_merge_lora_tab(headless=headless) + gradio_resize_lora_tab(headless=headless) + gradio_verify_lora_tab(headless=headless) diff --git a/kohya_gui/class_metadata.py b/kohya_gui/class_metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c3cad3f8cdb43d0d38560c8db787e5ffbb5532 --- /dev/null +++ b/kohya_gui/class_metadata.py @@ -0,0 +1,67 @@ +import gradio as gr + +from .class_gui_config import KohyaSSGUIConfig + + +class MetaData: + def __init__( + self, + config: KohyaSSGUIConfig = {}, + ) -> None: + self.config = config + + with gr.Row(): + self.metadata_title = gr.Textbox( + label="Metadata title", + placeholder="(optional) title for model metadata (default is output_name)", + interactive=True, + value=self.config.get("metadata.title", ""), + ) + self.metadata_author = gr.Textbox( + label="Metadata author", + placeholder="(optional) author name for model metadata", + interactive=True, + value=self.config.get("metadata.author", ""), + ) + self.metadata_description = gr.Textbox( + label="Metadata description", + placeholder="(optional) description for model metadata", + interactive=True, + value=self.config.get("metadata.description", ""), + ) + with gr.Row(): + self.metadata_license = gr.Textbox( + label="Metadata license", + placeholder="(optional) license for model metadata", + interactive=True, + value=self.config.get("metadata.license", ""), + ) + self.metadata_tags = gr.Textbox( + label="Metadata tags", + placeholder="(optional) tags for model metadata, separated by comma", + interactive=True, + value=self.config.get("metadata.tags", ""), + ) + + def run_cmd(run_cmd: list, **kwargs): + if "metadata_title" in kwargs and kwargs.get("metadata_title") != "": + run_cmd.append("--metadata_title") + run_cmd.append(kwargs["metadata_title"]) + + if "metadata_author" in kwargs and kwargs.get("metadata_author") != "": + run_cmd.append("--metadata_author") + run_cmd.append(kwargs["metadata_author"]) + + if "metadata_description" in kwargs and kwargs.get("metadata_description") != "": + run_cmd.append("--metadata_description") + run_cmd.append(kwargs["metadata_description"]) + + if "metadata_license" in kwargs and kwargs.get("metadata_license") != "": + run_cmd.append("--metadata_license") + run_cmd.append(kwargs["metadata_license"]) + + if "metadata_tags" in kwargs and kwargs.get("metadata_tags") != "": + run_cmd.append("--metadata_tags") + run_cmd.append(kwargs["metadata_tags"]) + + return run_cmd diff --git a/kohya_gui/class_sample_images.py b/kohya_gui/class_sample_images.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d8354f4bfa82ac3464af87135efcab5d9126f4 --- /dev/null +++ b/kohya_gui/class_sample_images.py @@ -0,0 +1,169 @@ +import os +import gradio as gr +import shlex + +from .custom_logging import setup_logging +from .class_gui_config import KohyaSSGUIConfig + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + + +### +### Gradio common sampler GUI section +### +def create_prompt_file(sample_prompts, output_dir): + """ + Creates a prompt file for image sampling. + + Args: + sample_prompts (str): The prompts to use for image sampling. + output_dir (str): The directory where the output images will be saved. + + Returns: + str: The path to the prompt file. + """ + sample_prompts_path = os.path.join(output_dir, "prompt.txt") + + with open(sample_prompts_path, "w", encoding="utf-8") as f: + f.write(sample_prompts) + + return sample_prompts_path + + +# def run_cmd_sample( +# run_cmd: list, +# sample_every_n_steps, +# sample_every_n_epochs, +# sample_sampler, +# sample_prompts, +# output_dir, +# ): +# """ +# Generates a command string for sampling images during training. + +# Args: +# sample_every_n_steps (int): The number of steps after which to sample images. +# sample_every_n_epochs (int): The number of epochs after which to sample images. +# sample_sampler (str): The sampler to use for image sampling. +# sample_prompts (str): The prompts to use for image sampling. +# output_dir (str): The directory where the output images will be saved. + +# Returns: +# str: The command string for sampling images. +# """ +# output_dir = os.path.join(output_dir, "sample") +# os.makedirs(output_dir, exist_ok=True) + +# if sample_every_n_epochs is None: +# sample_every_n_epochs = 0 + +# if sample_every_n_steps is None: +# sample_every_n_steps = 0 + +# if sample_every_n_epochs == sample_every_n_steps == 0: +# return run_cmd + +# # Create the prompt file and get its path +# sample_prompts_path = os.path.join(output_dir, "prompt.txt") + +# with open(sample_prompts_path, "w") as f: +# f.write(sample_prompts) + +# # Append the sampler with proper quoting for safety against special characters +# run_cmd.append("--sample_sampler") +# run_cmd.append(shlex.quote(sample_sampler)) + +# # Normalize and fix the path for the sample prompts, handle cross-platform path differences +# sample_prompts_path = os.path.abspath(os.path.normpath(sample_prompts_path)) +# if os.name == "nt": # Normalize path for Windows +# sample_prompts_path = sample_prompts_path.replace("\\", "/") + +# # Append the sample prompts path +# run_cmd.append('--sample_prompts') +# run_cmd.append(sample_prompts_path) + +# # Append the sampling frequency for epochs, only if non-zero +# if sample_every_n_epochs != 0: +# run_cmd.append("--sample_every_n_epochs") +# run_cmd.append(str(sample_every_n_epochs)) + +# # Append the sampling frequency for steps, only if non-zero +# if sample_every_n_steps != 0: +# run_cmd.append("--sample_every_n_steps") +# run_cmd.append(str(sample_every_n_steps)) + +# return run_cmd + + + +class SampleImages: + """ + A class for managing the Gradio interface for sampling images during training. + """ + + def __init__( + self, + config: KohyaSSGUIConfig = {}, + ): + """ + Initializes the SampleImages class. + """ + self.config = config + + self.initialize_accordion() + + def initialize_accordion(self): + """ + Initializes the accordion for the Gradio interface. + """ + with gr.Row(): + self.sample_every_n_steps = gr.Number( + label="Sample every n steps", + value=self.config.get("samples.sample_every_n_steps", 0), + precision=0, + interactive=True, + ) + self.sample_every_n_epochs = gr.Number( + label="Sample every n epochs", + value=self.config.get("samples.sample_every_n_epochs", 0), + precision=0, + interactive=True, + ) + self.sample_sampler = gr.Dropdown( + label="Sample sampler", + choices=[ + "ddim", + "pndm", + "lms", + "euler", + "euler_a", + "heun", + "dpm_2", + "dpm_2_a", + "dpmsolver", + "dpmsolver++", + "dpmsingle", + "k_lms", + "k_euler", + "k_euler_a", + "k_dpm_2", + "k_dpm_2_a", + ], + value=self.config.get("samples.sample_sampler", "euler_a"), + interactive=True, + ) + with gr.Row(): + self.sample_prompts = gr.Textbox( + lines=5, + label="Sample prompts", + interactive=True, + placeholder="masterpiece, best quality, 1girl, in white shirts, upper body, looking at viewer, simple background --n low quality, worst quality, bad anatomy,bad composition, poor, low effort --w 768 --h 768 --d 1 --l 7.5 --s 28", + info="Enter one sample prompt per line to generate multiple samples per cycle. Optional specifiers include: --w (width), --h (height), --d (seed), --l (cfg scale), --s (sampler steps) and --n (negative prompt). To modify sample prompts during training, edit the prompt.txt file in the samples directory.", + value=self.config.get("samples.sample_prompts", ""), + ) diff --git a/kohya_gui/class_sdxl_parameters.py b/kohya_gui/class_sdxl_parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..9b6478ac26ea487398db25a52e34b59ab8c65532 --- /dev/null +++ b/kohya_gui/class_sdxl_parameters.py @@ -0,0 +1,38 @@ +import gradio as gr +from .class_gui_config import KohyaSSGUIConfig + +class SDXLParameters: + def __init__( + self, + sdxl_checkbox: gr.Checkbox, + show_sdxl_cache_text_encoder_outputs: bool = True, + config: KohyaSSGUIConfig = {}, + ): + self.sdxl_checkbox = sdxl_checkbox + self.show_sdxl_cache_text_encoder_outputs = show_sdxl_cache_text_encoder_outputs + self.config = config + + self.initialize_accordion() + + def initialize_accordion(self): + with gr.Accordion( + visible=False, open=True, label="SDXL Specific Parameters" + ) as self.sdxl_row: + with gr.Row(): + self.sdxl_cache_text_encoder_outputs = gr.Checkbox( + label="Cache text encoder outputs", + info="Cache the outputs of the text encoders. This option is useful to reduce the GPU memory usage. This option cannot be used with options for shuffling or dropping the captions.", + value=self.config.get("sdxl.sdxl_cache_text_encoder_outputs", False), + visible=self.show_sdxl_cache_text_encoder_outputs, + ) + self.sdxl_no_half_vae = gr.Checkbox( + label="No half VAE", + info="Disable the half-precision (mixed-precision) VAE. VAE for SDXL seems to produce NaNs in some cases. This option is useful to avoid the NaNs.", + value=self.config.get("sdxl.sdxl_no_half_vae", False), + ) + + self.sdxl_checkbox.change( + lambda sdxl_checkbox: gr.Accordion(visible=sdxl_checkbox), + inputs=[self.sdxl_checkbox], + outputs=[self.sdxl_row], + ) diff --git a/kohya_gui/class_source_model.py b/kohya_gui/class_source_model.py new file mode 100644 index 0000000000000000000000000000000000000000..0ae1a5c9beca10027ca8b480841dbe25d717cee4 --- /dev/null +++ b/kohya_gui/class_source_model.py @@ -0,0 +1,308 @@ +import gradio as gr +import os + +from .common_gui import ( + get_file_path, + get_folder_path, + set_pretrained_model_name_or_path_input, + scriptdir, + list_dirs, + list_files, + create_refresh_button, +) +from .class_gui_config import KohyaSSGUIConfig + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +default_models = [ + "stabilityai/stable-diffusion-xl-base-1.0", + "stabilityai/stable-diffusion-xl-refiner-1.0", + "stabilityai/stable-diffusion-2-1-base/blob/main/v2-1_512-ema-pruned", + "stabilityai/stable-diffusion-2-1-base", + "stabilityai/stable-diffusion-2-base", + "stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-ema-pruned", + "stabilityai/stable-diffusion-2-1", + "stabilityai/stable-diffusion-2", + "runwayml/stable-diffusion-v1-5", + "CompVis/stable-diffusion-v1-4", +] + + +class SourceModel: + def __init__( + self, + save_model_as_choices=[ + "same as source model", + "ckpt", + "diffusers", + "diffusers_safetensors", + "safetensors", + ], + save_precision_choices=[ + "float", + "fp16", + "bf16", + ], + headless=False, + finetuning=False, + config: KohyaSSGUIConfig = {}, + ): + self.headless = headless + self.save_model_as_choices = save_model_as_choices + self.finetuning = finetuning + self.config = config + + # Set default directories if not provided + self.current_models_dir = self.config.get( + "model.models_dir", os.path.join(scriptdir, "models") + ) + self.current_train_data_dir = self.config.get( + "model.train_data_dir", os.path.join(scriptdir, "data") + ) + self.current_dataset_config_dir = self.config.get( + "model.dataset_config", os.path.join(scriptdir, "dataset_config") + ) + + model_checkpoints = list( + list_files( + self.current_models_dir, exts=[".ckpt", ".safetensors"], all=True + ) + ) + + def list_models(path): + self.current_models_dir = ( + path if os.path.isdir(path) else os.path.dirname(path) + ) + return default_models + list( + list_files(path, exts=[".ckpt", ".safetensors"], all=True) + ) + + def list_train_data_dirs(path): + self.current_train_data_dir = path if not path == "" else "." + return list(list_dirs(self.current_train_data_dir)) + + def list_dataset_config_dirs(path: str) -> list: + """ + List directories and toml files in the dataset_config directory. + + Parameters: + - path (str): The path to list directories and files from. + + Returns: + - list: A list of directories and files. + """ + current_dataset_config_dir = path if not path == "" else "." + # Lists all .json files in the current configuration directory, used for populating dropdown choices. + return list( + list_files(current_dataset_config_dir, exts=[".toml"], all=True) + ) + + with gr.Accordion("Model", open=True): + with gr.Column(), gr.Group(): + model_ext = gr.Textbox(value="*.safetensors *.ckpt", visible=False) + model_ext_name = gr.Textbox(value="Model types", visible=False) + + # Define the input elements + with gr.Row(): + with gr.Column(), gr.Row(): + self.model_list = gr.Textbox(visible=False, value="") + self.pretrained_model_name_or_path = gr.Dropdown( + label="Pretrained model name or path", + choices=default_models + model_checkpoints, + value=self.config.get("model.models_dir", "runwayml/stable-diffusion-v1-5"), + allow_custom_value=True, + visible=True, + min_width=100, + ) + create_refresh_button( + self.pretrained_model_name_or_path, + lambda: None, + lambda: {"choices": list_models(self.current_models_dir)}, + "open_folder_small", + ) + + self.pretrained_model_name_or_path_file = gr.Button( + document_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + self.pretrained_model_name_or_path_file.click( + get_file_path, + inputs=[self.pretrained_model_name_or_path, model_ext, model_ext_name], + outputs=self.pretrained_model_name_or_path, + show_progress=False, + ) + self.pretrained_model_name_or_path_folder = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + self.pretrained_model_name_or_path_folder.click( + get_folder_path, + inputs=self.pretrained_model_name_or_path, + outputs=self.pretrained_model_name_or_path, + show_progress=False, + ) + + with gr.Column(), gr.Row(): + self.output_name = gr.Textbox( + label="Trained Model output name", + placeholder="(Name of the model to output)", + value=self.config.get("model.output_name", "last"), + interactive=True, + ) + with gr.Row(): + with gr.Column(), gr.Row(): + self.train_data_dir = gr.Dropdown( + label=( + "Image folder (containing training images subfolders)" + if not finetuning + else "Image folder (containing training images)" + ), + choices=[""] + + list_train_data_dirs(self.current_train_data_dir), + value=self.config.get("model.train_data_dir", ""), + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + self.train_data_dir, + lambda: None, + lambda: { + "choices": [""] + + list_train_data_dirs(self.current_train_data_dir) + }, + "open_folder_small", + ) + self.train_data_dir_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + self.train_data_dir_folder.click( + get_folder_path, + outputs=self.train_data_dir, + show_progress=False, + ) + with gr.Column(), gr.Row(): + # Toml directory dropdown + self.dataset_config = gr.Dropdown( + label="Dataset config file (Optional. Select the toml configuration file to use for the dataset)", + choices=[self.config.get("model.dataset_config", "")] + + list_dataset_config_dirs(self.current_dataset_config_dir), + value=self.config.get("model.dataset_config", ""), + interactive=True, + allow_custom_value=True, + ) + # Refresh button for dataset_config directory + create_refresh_button( + self.dataset_config, + lambda: None, + lambda: { + "choices": [""] + + list_dataset_config_dirs( + self.current_dataset_config_dir + ) + }, + "open_folder_small", + ) + # Toml directory button + self.dataset_config_folder = gr.Button( + document_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + + # Toml directory button click event + self.dataset_config_folder.click( + get_file_path, + inputs=[ + self.dataset_config, + gr.Textbox(value="*.toml", visible=False), + gr.Textbox(value="Dataset config types", visible=False), + ], + outputs=self.dataset_config, + show_progress=False, + ) + # Change event for dataset_config directory dropdown + self.dataset_config.change( + fn=lambda path: gr.Dropdown( + choices=[""] + list_dataset_config_dirs(path) + ), + inputs=self.dataset_config, + outputs=self.dataset_config, + show_progress=False, + ) + + with gr.Row(): + with gr.Column(): + with gr.Row(): + self.v2 = gr.Checkbox( + label="v2", value=False, visible=False, min_width=60 + ) + self.v_parameterization = gr.Checkbox( + label="v_parameterization", + value=False, + visible=False, + min_width=130, + ) + self.sdxl_checkbox = gr.Checkbox( + label="SDXL", + value=False, + visible=False, + min_width=60, + ) + with gr.Column(): + gr.Group(visible=False) + + with gr.Row(): + self.training_comment = gr.Textbox( + label="Training comment", + placeholder="(Optional) Add training comment to be included in metadata", + interactive=True, + value=self.config.get("model.training_comment", ""), + ) + + with gr.Row(): + self.save_model_as = gr.Radio( + save_model_as_choices, + label="Save trained model as", + value=self.config.get("model.save_model_as", "safetensors"), + ) + self.save_precision = gr.Radio( + save_precision_choices, + label="Save precision", + value=self.config.get("model.save_precision", "fp16"), + ) + + self.pretrained_model_name_or_path.change( + fn=lambda path: set_pretrained_model_name_or_path_input( + path, refresh_method=list_models + ), + inputs=[ + self.pretrained_model_name_or_path, + ], + outputs=[ + self.pretrained_model_name_or_path, + self.v2, + self.v_parameterization, + self.sdxl_checkbox, + ], + show_progress=False, + ) + + self.train_data_dir.change( + fn=lambda path: gr.Dropdown( + choices=[""] + list_train_data_dirs(path) + ), + inputs=self.train_data_dir, + outputs=self.train_data_dir, + show_progress=False, + ) diff --git a/kohya_gui/class_tensorboard.py b/kohya_gui/class_tensorboard.py new file mode 100644 index 0000000000000000000000000000000000000000..ae6f89e1477102ea426dcb2c2d9f9b5c347651a3 --- /dev/null +++ b/kohya_gui/class_tensorboard.py @@ -0,0 +1,139 @@ +import os +import gradio as gr +import subprocess +import time +import webbrowser + +try: + os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" + import tensorflow # Attempt to import tensorflow to check if it is installed + + visibility = True +except ImportError: + visibility = False + +from easygui import msgbox +from threading import Thread, Event +from .custom_logging import setup_logging +from .common_gui import setup_environment + + +class TensorboardManager: + DEFAULT_TENSORBOARD_PORT = 6006 + + def __init__(self, logging_dir, headless: bool = False, wait_time=5): + self.logging_dir = logging_dir + self.headless = headless + self.wait_time = wait_time + self.tensorboard_proc = None + self.tensorboard_port = os.environ.get( + "TENSORBOARD_PORT", self.DEFAULT_TENSORBOARD_PORT + ) + self.log = setup_logging() + self.thread = None + self.stop_event = Event() + + self.gradio_interface() + + def get_button_states(self, started=False): + return gr.Button( + visible=visibility and (not started or self.headless) + ), gr.Button(visible=visibility and (started or self.headless)) + + def open_tensorboard_url(self): + tensorboard_url = f"http://localhost:{self.tensorboard_port}" + self.log.info(f"Opening TensorBoard URL in browser: {tensorboard_url}") + webbrowser.open(tensorboard_url) + + def start_tensorboard(self, logging_dir=None): + if self.tensorboard_proc is not None: + self.log.info( + "Tensorboard is already running. Terminating existing process before starting new one..." + ) + self.stop_tensorboard() + + if not os.path.exists(logging_dir) or not os.listdir(logging_dir): + self.log.error( + "Error: logging folder does not exist or does not contain logs." + ) + msgbox(msg="Error: logging folder does not exist or does not contain logs.") + return self.get_button_states(started=False) + + run_cmd = [ + "tensorboard", + "--logdir", + logging_dir, + "--host", + "0.0.0.0", + "--port", + str(self.tensorboard_port), + ] + + self.log.info(run_cmd) + + self.log.info("Starting TensorBoard on port {}".format(self.tensorboard_port)) + try: + env = setup_environment() + self.tensorboard_proc = subprocess.Popen(run_cmd, env=env) + except Exception as e: + self.log.error("Failed to start Tensorboard:", e) + return self.get_button_states(started=False) + + if not self.headless: + self.stop_event.clear() + + time.sleep(self.wait_time) + if not self.stop_event.is_set(): + self.thread = Thread(target=self.open_tensorboard_url) + self.thread.start() + + return self.get_button_states(started=True) + + def stop_tensorboard(self): + if self.tensorboard_proc is not None: + self.log.info("Stopping tensorboard process...") + try: + self.tensorboard_proc.terminate() + self.tensorboard_proc = None + self.log.info("...process stopped") + except Exception as e: + self.log.error("Failed to stop Tensorboard:", e) + + if self.thread is not None: + self.stop_event.set() + self.thread.join() # Wait for the thread to finish + self.thread = None + self.log.info("Thread terminated successfully.") + + return self.get_button_states(started=False) + + def gradio_interface(self): + + with gr.Row(): + button_start_tensorboard = gr.Button( + value="Start tensorboard", + elem_id="myTensorButton", + visible=visibility, + ) + button_stop_tensorboard = gr.Button( + value="Stop tensorboard", + visible=visibility and self.headless, + elem_id="myTensorButtonStop", + ) + button_open_tensorboard = gr.Button( + value="Open tensorboard", + elem_id="myTensorButton", + visible=not visibility, + link=f"http://localhost:{self.tensorboard_port}", + ) + button_start_tensorboard.click( + self.start_tensorboard, + inputs=[self.logging_dir], + outputs=[button_start_tensorboard, button_stop_tensorboard], + show_progress=False, + ) + button_stop_tensorboard.click( + self.stop_tensorboard, + outputs=[button_start_tensorboard, button_stop_tensorboard], + show_progress=False, + ) diff --git a/kohya_gui/common_gui.py b/kohya_gui/common_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..65f376f400304a0212c1f07545cc0bf8dd1e9cb7 --- /dev/null +++ b/kohya_gui/common_gui.py @@ -0,0 +1,1502 @@ +try: + from tkinter import filedialog, Tk +except ImportError: + pass +from easygui import msgbox, ynbox +from typing import Optional +from .custom_logging import setup_logging + +import os +import re +import gradio as gr +import sys +import shlex +import json +import math +import shutil +import toml + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +scriptdir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + +if os.name == "nt": + scriptdir = scriptdir.replace("\\", "/") + +# insert sd-scripts path into PYTHONPATH +sys.path.insert(0, os.path.join(scriptdir, "sd-scripts")) + +# define a list of substrings to search for v2 base models +V2_BASE_MODELS = [ + "stabilityai/stable-diffusion-2-1-base/blob/main/v2-1_512-ema-pruned", + "stabilityai/stable-diffusion-2-1-base", + "stabilityai/stable-diffusion-2-base", +] + +# define a list of substrings to search for v_parameterization models +V_PARAMETERIZATION_MODELS = [ + "stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-ema-pruned", + "stabilityai/stable-diffusion-2-1", + "stabilityai/stable-diffusion-2", +] + +# define a list of substrings to v1.x models +V1_MODELS = [ + "CompVis/stable-diffusion-v1-4", + "runwayml/stable-diffusion-v1-5", +] + +# define a list of substrings to search for SDXL base models +SDXL_MODELS = [ + "stabilityai/stable-diffusion-xl-base-1.0", + "stabilityai/stable-diffusion-xl-refiner-1.0", +] + +# define a list of substrings to search for +ALL_PRESET_MODELS = V2_BASE_MODELS + V_PARAMETERIZATION_MODELS + V1_MODELS + SDXL_MODELS + +ENV_EXCLUSION = ["COLAB_GPU", "RUNPOD_POD_ID"] + + +def get_executable_path(executable_name: str = None) -> str: + """ + Retrieve and sanitize the path to an executable in the system's PATH. + + Args: + executable_name (str): The name of the executable to find. + + Returns: + str: The full, sanitized path to the executable if found, otherwise an empty string. + """ + if executable_name: + executable_path = shutil.which(executable_name) + if executable_path: + # Replace backslashes with forward slashes on Windows + # if os.name == "nt": + # executable_path = executable_path.replace("\\", "/") + return executable_path + else: + return "" # Return empty string if the executable is not found + else: + return "" # Return empty string if no executable name is provided + + +def calculate_max_train_steps( + total_steps: int, + train_batch_size: int, + gradient_accumulation_steps: int, + epoch: int, + reg_factor: int, +): + return int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + + +def check_if_model_exist( + output_name: str, output_dir: str, save_model_as: str, headless: bool = False +) -> bool: + """ + Checks if a model with the same name already exists and prompts the user to overwrite it if it does. + + Parameters: + output_name (str): The name of the output model. + output_dir (str): The directory where the model is saved. + save_model_as (str): The format to save the model as. + headless (bool, optional): If True, skips the verification and returns False. Defaults to False. + + Returns: + bool: True if the model already exists and the user chooses not to overwrite it, otherwise False. + """ + if headless: + log.info( + "Headless mode, skipping verification if model already exist... if model already exist it will be overwritten..." + ) + return False + + if save_model_as in ["diffusers", "diffusers_safetendors"]: + ckpt_folder = os.path.join(output_dir, output_name) + if os.path.isdir(ckpt_folder): + msg = f"A diffuser model with the same name {ckpt_folder} already exists. Do you want to overwrite it?" + if not ynbox(msg, "Overwrite Existing Model?"): + log.info("Aborting training due to existing model with same name...") + return True + elif save_model_as in ["ckpt", "safetensors"]: + ckpt_file = os.path.join(output_dir, output_name + "." + save_model_as) + if os.path.isfile(ckpt_file): + msg = f"A model with the same file name {ckpt_file} already exists. Do you want to overwrite it?" + if not ynbox(msg, "Overwrite Existing Model?"): + log.info("Aborting training due to existing model with same name...") + return True + else: + log.info( + 'Can\'t verify if existing model exist when save model is set as "same as source model", continuing to train model...' + ) + return False + + return False + + +def output_message(msg: str = "", title: str = "", headless: bool = False) -> None: + """ + Outputs a message to the user, either in a message box or in the log. + + Parameters: + msg (str, optional): The message to be displayed. Defaults to an empty string. + title (str, optional): The title of the message box. Defaults to an empty string. + headless (bool, optional): If True, the message is logged instead of displayed in a message box. Defaults to False. + + Returns: + None + """ + if headless: + log.info(msg) + else: + msgbox(msg=msg, title=title) + + +def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_id): + """ + Creates a refresh button that can be used to update UI components. + + Parameters: + refresh_component (list or object): The UI component(s) to be refreshed. + refresh_method (callable): The method to be called when the button is clicked. + refreshed_args (dict or callable): The arguments to be passed to the refresh method. + elem_id (str): The ID of the button element. + + Returns: + gr.Button: The configured refresh button. + """ + # Converts refresh_component into a list for uniform processing. If it's already a list, keep it the same. + refresh_components = ( + refresh_component + if isinstance(refresh_component, list) + else [refresh_component] + ) + + # Initialize label to None. This will store the label of the first component with a non-None label, if any. + label = None + # Iterate over each component to find the first non-None label and assign it to 'label'. + for comp in refresh_components: + label = getattr(comp, "label", None) + if label is not None: + break + + # Define the refresh function that will be triggered upon clicking the refresh button. + def refresh(): + # Invoke the refresh_method, which is intended to perform the refresh operation. + refresh_method() + # Determine the arguments for the refresh: call refreshed_args if it's callable, otherwise use it directly. + args = refreshed_args() if callable(refreshed_args) else refreshed_args + + # For each key-value pair in args, update the corresponding properties of each component. + for k, v in args.items(): + for comp in refresh_components: + setattr(comp, k, v) + + # Use gr.update to refresh the UI components. If multiple components are present, update each; else, update only the first. + return ( + [gr.Dropdown(**(args or {})) for _ in refresh_components] + if len(refresh_components) > 1 + else gr.Dropdown(**(args or {})) + ) + + # Create a refresh button with the specified label (via refresh_symbol), ID, and classes. + # 'refresh_symbol' should be defined outside this function or passed as an argument, representing the button's label or icon. + refresh_button = gr.Button( + value=refresh_symbol, elem_id=elem_id, elem_classes=["tool"] + ) + # Configure the button to invoke the refresh function. + refresh_button.click(fn=refresh, inputs=[], outputs=refresh_components) + # Return the configured refresh button to be used in the UI. + return refresh_button + + +def list_dirs(path): + if path is None or path == "None" or path == "": + return + + if not os.path.exists(path): + path = os.path.dirname(path) + if not os.path.exists(path): + return + + if not os.path.isdir(path): + path = os.path.dirname(path) + + def natural_sort_key(s, regex=re.compile("([0-9]+)")): + return [ + int(text) if text.isdigit() else text.lower() for text in regex.split(s) + ] + + subdirs = [ + (item, os.path.join(path, item)) + for item in os.listdir(path) + if os.path.isdir(os.path.join(path, item)) + ] + subdirs = [ + filename + for item, filename in subdirs + if item[0] != "." and item not in ["__pycache__"] + ] + subdirs = sorted(subdirs, key=natural_sort_key) + if os.path.dirname(path) != "": + dirs = [os.path.dirname(path), path] + subdirs + else: + dirs = [path] + subdirs + + if os.sep == "\\": + dirs = [d.replace("\\", "/") for d in dirs] + for d in dirs: + yield d + + +def list_files(path, exts=None, all=False): + if path is None or path == "None" or path == "": + return + + if not os.path.exists(path): + path = os.path.dirname(path) + if not os.path.exists(path): + return + + if not os.path.isdir(path): + path = os.path.dirname(path) + + files = [ + (item, os.path.join(path, item)) + for item in os.listdir(path) + if all or os.path.isfile(os.path.join(path, item)) + ] + files = [ + filename + for item, filename in files + if item[0] != "." and item not in ["__pycache__"] + ] + exts = set(exts) if exts is not None else None + + def natural_sort_key(s, regex=re.compile("([0-9]+)")): + return [ + int(text) if text.isdigit() else text.lower() for text in regex.split(s) + ] + + files = sorted(files, key=natural_sort_key) + if os.path.dirname(path) != "": + files = [os.path.dirname(path), path] + files + else: + files = [path] + files + + if os.sep == "\\": + files = [d.replace("\\", "/") for d in files] + + for filename in files: + if exts is not None: + if os.path.isdir(filename): + yield filename + _, ext = os.path.splitext(filename) + if ext.lower() not in exts: + continue + yield filename + else: + yield filename + + +def update_my_data(my_data): + # Update the optimizer based on the use_8bit_adam flag + use_8bit_adam = my_data.get("use_8bit_adam", False) + my_data.setdefault("optimizer", "AdamW8bit" if use_8bit_adam else "AdamW") + + # Update model_list to custom if empty or pretrained_model_name_or_path is not a preset model + model_list = my_data.get("model_list", []) + pretrained_model_name_or_path = my_data.get("pretrained_model_name_or_path", "") + if not model_list or pretrained_model_name_or_path not in ALL_PRESET_MODELS: + my_data["model_list"] = "custom" + + # Convert values to int if they are strings + for key in [ + "adaptive_noise_scale", + "clip_skip", + "epoch", + "gradient_accumulation_steps", + "keep_tokens", + "lr_warmup", + "max_data_loader_n_workers", + "max_train_epochs", + "save_every_n_epochs", + "seed", + ]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = int(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = int(0) + + # Convert values to int if they are strings + for key in ["lr_scheduler_num_cycles"]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = int(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = int(1) + + for key in [ + "max_train_steps", + ]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = int(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = int(0) + + # Convert values to int if they are strings + for key in ["max_token_length"]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = int(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = int(75) + + # Convert values to float if they are strings, correctly handling float representations + for key in ["noise_offset", "learning_rate", "text_encoder_lr", "unet_lr"]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = float(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = float(0.0) + + # Convert values to float if they are strings, correctly handling float representations + for key in ["lr_scheduler_power"]: + value = my_data.get(key) + if value is not None: + try: + my_data[key] = float(value) + except ValueError: + # Handle the case where the string is not a valid float + my_data[key] = float(1.0) + + # Update LoRA_type if it is set to LoCon + if my_data.get("LoRA_type", "Standard") == "LoCon": + my_data["LoRA_type"] = "LyCORIS/LoCon" + + # Update model save choices due to changes for LoRA and TI training + if "save_model_as" in my_data: + if ( + my_data.get("LoRA_type") or my_data.get("num_vectors_per_token") + ) and my_data.get("save_model_as") not in ["safetensors", "ckpt"]: + message = "Updating save_model_as to safetensors because the current value in the config file is no longer applicable to {}" + if my_data.get("LoRA_type"): + log.info(message.format("LoRA")) + if my_data.get("num_vectors_per_token"): + log.info(message.format("TI")) + my_data["save_model_as"] = "safetensors" + + # Update xformers if it is set to True and is a boolean + xformers_value = my_data.get("xformers", None) + if isinstance(xformers_value, bool): + if xformers_value: + my_data["xformers"] = "xformers" + else: + my_data["xformers"] = "none" + + # Convert use_wandb to log_with="wandb" if it is set to True + for key in ["use_wandb"]: + value = my_data.get(key) + if value is not None: + try: + if value == "True": + my_data["log_with"] = "wandb" + except ValueError: + # Handle the case where the string is not a valid float + pass + + my_data.pop(key, None) + + # Replace the lora_network_weights key with network_weights keeping the original value + for key in ["lora_network_weights"]: + value = my_data.get(key) # Get original value + if value is not None: # Check if the key exists in the dictionary + my_data["network_weights"] = value + my_data.pop(key, None) + + return my_data + + +def get_dir_and_file(file_path): + dir_path, file_name = os.path.split(file_path) + return (dir_path, file_name) + + +def get_file_path( + file_path="", default_extension=".json", extension_name="Config files" +): + """ + Opens a file dialog to select a file, allowing the user to navigate and choose a file with a specific extension. + If no file is selected, returns the initially provided file path or an empty string if not provided. + This function is conditioned to skip the file dialog on macOS or if specific environment variables are present, + indicating a possible automated environment where a dialog cannot be displayed. + + Parameters: + - file_path (str): The initial file path or an empty string by default. Used as the fallback if no file is selected. + - default_extension (str): The default file extension (e.g., ".json") for the file dialog. + - extension_name (str): The display name for the type of files being selected (e.g., "Config files"). + + Returns: + - str: The path of the file selected by the user, or the initial `file_path` if no selection is made. + + Raises: + - TypeError: If `file_path`, `default_extension`, or `extension_name` are not strings. + + Note: + - The function checks the `ENV_EXCLUSION` list against environment variables to determine if the file dialog should be skipped, aiming to prevent its appearance during automated operations. + - The dialog will also be skipped on macOS (`sys.platform != "darwin"`) as a specific behavior adjustment. + """ + # Validate parameter types + if not isinstance(file_path, str): + raise TypeError("file_path must be a string") + if not isinstance(default_extension, str): + raise TypeError("default_extension must be a string") + if not isinstance(extension_name, str): + raise TypeError("extension_name must be a string") + + # Environment and platform check to decide on showing the file dialog + if not any(var in os.environ for var in ENV_EXCLUSION) and sys.platform != "darwin": + current_file_path = file_path # Backup in case no file is selected + + initial_dir, initial_file = get_dir_and_file( + file_path + ) # Decompose file path for dialog setup + + # Initialize a hidden Tkinter window for the file dialog + root = Tk() + root.wm_attributes("-topmost", 1) # Ensure the dialog is topmost + root.withdraw() # Hide the root window to show only the dialog + + # Open the file dialog and capture the selected file path + file_path = filedialog.askopenfilename( + filetypes=((extension_name, f"*{default_extension}"), ("All files", "*.*")), + defaultextension=default_extension, + initialfile=initial_file, + initialdir=initial_dir, + ) + + root.destroy() # Cleanup by destroying the Tkinter root window + + # Fallback to the initial path if no selection is made + if not file_path: + file_path = current_file_path + + # Return the selected or fallback file path + return file_path + + +def get_any_file_path(file_path: str = "") -> str: + """ + Opens a file dialog to select any file, allowing the user to navigate and choose a file. + If no file is selected, returns the initially provided file path or an empty string if not provided. + This function is conditioned to skip the file dialog on macOS or if specific environment variables are present, + indicating a possible automated environment where a dialog cannot be displayed. + + Parameters: + - file_path (str): The initial file path or an empty string by default. Used as the fallback if no file is selected. + + Returns: + - str: The path of the file selected by the user, or the initial `file_path` if no selection is made. + + Raises: + - TypeError: If `file_path` is not a string. + - EnvironmentError: If there's an issue accessing environment variables. + - RuntimeError: If there's an issue initializing the file dialog. + + Note: + - The function checks the `ENV_EXCLUSION` list against environment variables to determine if the file dialog should be skipped, aiming to prevent its appearance during automated operations. + - The dialog will also be skipped on macOS (`sys.platform != "darwin"`) as a specific behavior adjustment. + """ + # Validate parameter type + if not isinstance(file_path, str): + raise TypeError("file_path must be a string") + + try: + # Check for environment variable conditions + if ( + not any(var in os.environ for var in ENV_EXCLUSION) + and sys.platform != "darwin" + ): + current_file_path: str = file_path + + initial_dir, initial_file = get_dir_and_file(file_path) + + # Initialize a hidden Tkinter window for the file dialog + root = Tk() + root.wm_attributes("-topmost", 1) + root.withdraw() + + try: + # Open the file dialog and capture the selected file path + file_path = filedialog.askopenfilename( + initialdir=initial_dir, + initialfile=initial_file, + ) + except Exception as e: + raise RuntimeError(f"Failed to open file dialog: {e}") + finally: + root.destroy() + + # Fallback to the initial path if no selection is made + if not file_path: + file_path = current_file_path + except KeyError as e: + raise EnvironmentError(f"Failed to access environment variables: {e}") + + # Return the selected or fallback file path + return file_path + + +def get_folder_path(folder_path: str = "") -> str: + """ + Opens a folder dialog to select a folder, allowing the user to navigate and choose a folder. + If no folder is selected, returns the initially provided folder path or an empty string if not provided. + This function is conditioned to skip the folder dialog on macOS or if specific environment variables are present, + indicating a possible automated environment where a dialog cannot be displayed. + + Parameters: + - folder_path (str): The initial folder path or an empty string by default. Used as the fallback if no folder is selected. + + Returns: + - str: The path of the folder selected by the user, or the initial `folder_path` if no selection is made. + + Raises: + - TypeError: If `folder_path` is not a string. + - EnvironmentError: If there's an issue accessing environment variables. + - RuntimeError: If there's an issue initializing the folder dialog. + + Note: + - The function checks the `ENV_EXCLUSION` list against environment variables to determine if the folder dialog should be skipped, aiming to prevent its appearance during automated operations. + - The dialog will also be skipped on macOS (`sys.platform != "darwin"`) as a specific behavior adjustment. + """ + # Validate parameter type + if not isinstance(folder_path, str): + raise TypeError("folder_path must be a string") + + try: + # Check for environment variable conditions + if any(var in os.environ for var in ENV_EXCLUSION) or sys.platform == "darwin": + return folder_path or "" + + root = Tk() + root.withdraw() + root.wm_attributes("-topmost", 1) + selected_folder = filedialog.askdirectory(initialdir=folder_path or ".") + root.destroy() + return selected_folder or folder_path + except Exception as e: + raise RuntimeError(f"Error initializing folder dialog: {e}") from e + + +def get_saveasfile_path( + file_path: str = "", + defaultextension: str = ".json", + extension_name: str = "Config files", +) -> str: + # Check if the current environment is not macOS and if the environment variables do not match the exclusion list + if not any(var in os.environ for var in ENV_EXCLUSION) and sys.platform != "darwin": + # Store the initial file path to use as a fallback in case no file is selected + current_file_path = file_path + + # Logging the current file path for debugging purposes; helps in tracking the flow of file selection + # log.info(f'current file path: {current_file_path}') + + # Split the file path into directory and file name for setting the file dialog start location and filename + initial_dir, initial_file = get_dir_and_file(file_path) + + # Initialize a hidden Tkinter window to act as the parent for the file dialog, ensuring it appears on top + root = Tk() + root.wm_attributes("-topmost", 1) + root.withdraw() + save_file_path = filedialog.asksaveasfile( + filetypes=( + (f"{extension_name}", f"{defaultextension}"), + ("All files", "*"), + ), + defaultextension=defaultextension, + initialdir=initial_dir, + initialfile=initial_file, + ) + # Close the Tkinter root window to clean up the UI + root.destroy() + + # Logging the save file path for auditing purposes; useful in confirming the user's file choice + # log.info(save_file_path) + + # Default to the current file path if no file is selected, ensuring there's always a valid file path + if save_file_path == None: + file_path = current_file_path + else: + # Log the selected file name for transparency and tracking user actions + # log.info(save_file_path.name) + + # Update the file path with the user-selected file name, facilitating the save operation + file_path = save_file_path.name + + # Log the final file path for verification, ensuring the intended file is being used + # log.info(file_path) + + # Return the final file path, either the user-selected file or the fallback path + return file_path + + +def get_saveasfilename_path( + file_path: str = "", + extensions: str = "*", + extension_name: str = "Config files", +) -> str: + """ + Opens a file dialog to select a file name for saving, allowing the user to specify a file name and location. + If no file is selected, returns the initially provided file path or an empty string if not provided. + This function is conditioned to skip the file dialog on macOS or if specific environment variables are present, + indicating a possible automated environment where a dialog cannot be displayed. + + Parameters: + - file_path (str): The initial file path or an empty string by default. Used as the fallback if no file is selected. + - extensions (str): The file extensions to filter the file dialog by. Defaults to "*" for all files. + - extension_name (str): The name to display for the file extensions in the file dialog. Defaults to "Config files". + + Returns: + - str: The path of the file selected by the user, or the initial `file_path` if no selection is made. + + Raises: + - TypeError: If `file_path` is not a string. + - EnvironmentError: If there's an issue accessing environment variables. + - RuntimeError: If there's an issue initializing the file dialog. + + Note: + - The function checks the `ENV_EXCLUSION` list against environment variables to determine if the file dialog should be skipped, aiming to prevent its appearance during automated operations. + - The dialog will also be skipped on macOS (`sys.platform == "darwin"`) as a specific behavior adjustment. + """ + # Check if the current environment is not macOS and if the environment variables do not match the exclusion list + if not any(var in os.environ for var in ENV_EXCLUSION) and sys.platform != "darwin": + # Store the initial file path to use as a fallback in case no file is selected + current_file_path: str = file_path + # log.info(f'current file path: {current_file_path}') + + # Split the file path into directory and file name for setting the file dialog start location and filename + initial_dir, initial_file = get_dir_and_file(file_path) + + # Initialize a hidden Tkinter window to act as the parent for the file dialog, ensuring it appears on top + root = Tk() + root.wm_attributes("-topmost", 1) + root.withdraw() + # Open the file dialog and capture the selected file path + save_file_path = filedialog.asksaveasfilename( + filetypes=( + (f"{extension_name}", f"{extensions}"), + ("All files", "*"), + ), + defaultextension=extensions, + initialdir=initial_dir, + initialfile=initial_file, + ) + # Close the Tkinter root window to clean up the UI + root.destroy() + + # Default to the current file path if no file is selected, ensuring there's always a valid file path + if save_file_path == "": + file_path = current_file_path + else: + # Logging the save file path for auditing purposes; useful in confirming the user's file choice + # log.info(save_file_path) + # Update the file path with the user-selected file name, facilitating the save operation + file_path = save_file_path + + # Return the final file path, either the user-selected file or the fallback path + return file_path + + +def add_pre_postfix( + folder: str = "", + prefix: str = "", + postfix: str = "", + caption_file_ext: str = ".caption", + recursive: bool = False, +) -> None: + """ + Add prefix and/or postfix to the content of caption files within a folder. + If no caption files are found, create one with the requested prefix and/or postfix. + + Args: + folder (str): Path to the folder containing caption files. + prefix (str, optional): Prefix to add to the content of the caption files. + postfix (str, optional): Postfix to add to the content of the caption files. + caption_file_ext (str, optional): Extension of the caption files. + recursive (bool, optional): Whether to search for caption files recursively. + """ + # If neither prefix nor postfix is provided, return early + if prefix == "" and postfix == "": + return + + # Define the image file extensions to filter + image_extensions = (".jpg", ".jpeg", ".png", ".webp") + + # If recursive is true, list all image files in the folder and its subfolders + if recursive: + image_files = [] + for root, dirs, files in os.walk(folder): + for file in files: + if file.lower().endswith(image_extensions): + image_files.append(os.path.join(root, file)) + else: + # List all image files in the folder + image_files = [ + f for f in os.listdir(folder) if f.lower().endswith(image_extensions) + ] + + # Iterate over the list of image files + for image_file in image_files: + # Construct the caption file name by appending the caption file extension to the image file name + caption_file_name = f"{os.path.splitext(image_file)[0]}{caption_file_ext}" + # Construct the full path to the caption file + caption_file_path = os.path.join(folder, caption_file_name) + + # Check if the caption file does not exist + if not os.path.exists(caption_file_path): + # Create a new caption file with the specified prefix and/or postfix + try: + with open(caption_file_path, "w", encoding="utf-8") as f: + # Determine the separator based on whether both prefix and postfix are provided + separator = " " if prefix and postfix else "" + f.write(f"{prefix}{separator}{postfix}") + except Exception as e: + log.error(f"Error writing to file {caption_file_path}: {e}") + else: + # Open the existing caption file for reading and writing + try: + with open(caption_file_path, "r+", encoding="utf-8") as f: + # Read the content of the caption file, stripping any trailing whitespace + content = f.read().rstrip() + # Move the file pointer to the beginning of the file + f.seek(0, 0) + + # Determine the separator based on whether only prefix is provided + prefix_separator = " " if prefix else "" + # Determine the separator based on whether only postfix is provided + postfix_separator = " " if postfix else "" + # Write the updated content to the caption file, adding prefix and/or postfix + f.write( + f"{prefix}{prefix_separator}{content}{postfix_separator}{postfix}" + ) + except Exception as e: + log.error(f"Error writing to file {caption_file_path}: {e}") + + +def has_ext_files(folder_path: str, file_extension: str) -> bool: + """ + Determines whether any files within a specified folder have a given file extension. + + This function iterates through each file in the specified folder and checks if + its extension matches the provided file_extension argument. The search is case-sensitive + and expects file_extension to include the dot ('.') if applicable (e.g., '.txt'). + + Args: + folder_path (str): The absolute or relative path to the folder to search within. + file_extension (str): The file extension to search for, including the dot ('.') if applicable. + + Returns: + bool: True if at least one file with the specified extension is found, False otherwise. + """ + # Iterate directly over files in the specified folder path + for file in os.listdir(folder_path): + # Return True at the first occurrence of a file with the specified extension + if file.endswith(file_extension): + return True + + # If no file with the specified extension is found, return False + return False + + +def find_replace( + folder_path: str = "", + caption_file_ext: str = ".caption", + search_text: str = "", + replace_text: str = "", +) -> None: + """ + Efficiently finds and replaces specified text across all caption files in a given folder. + + This function iterates through each caption file matching the specified extension within the given folder path, replacing all occurrences of the search text with the replacement text. It ensures that the operation only proceeds if the search text is provided and there are caption files to process. + + Args: + folder_path (str, optional): The directory path where caption files are located. Defaults to an empty string, which implies the current directory. + caption_file_ext (str, optional): The file extension for caption files. Defaults to ".caption". + search_text (str, optional): The text to search for within the caption files. Defaults to an empty string. + replace_text (str, optional): The text to use as a replacement. Defaults to an empty string. + """ + # Log the start of the caption find/replace operation + log.info("Running caption find/replace") + + # Validate the presence of caption files and the search text + if not search_text or not has_ext_files(folder_path, caption_file_ext): + # Display a message box indicating no files were found + msgbox( + f"No files with extension {caption_file_ext} were found in {folder_path}..." + ) + log.warning( + "No files with extension {caption_file_ext} were found in {folder_path}..." + ) + # Exit the function early + return + + # Check if the caption file extension is one of the supported extensions + if caption_file_ext not in [".caption", ".txt", ".txt2", ".cap"]: + log.error( + f"Unsupported file extension {caption_file_ext} for caption files. Please use .caption, .txt, .txt2, or .cap." + ) + # Exit the function early + return + + # Check if the folder path exists + if not os.path.exists(folder_path): + log.error(f"The provided path '{folder_path}' is not a valid folder.") + return + + # List all caption files in the folder + try: + caption_files = [ + f for f in os.listdir(folder_path) if f.endswith(caption_file_ext) + ] + except Exception as e: + log.error(f"Error accessing folder {folder_path}: {e}") + return + + # Iterate over the list of caption files + for caption_file in caption_files: + # Construct the full path for each caption file + file_path = os.path.join(folder_path, caption_file) + # Read and replace text + try: + with open(file_path, "r", errors="ignore", encoding="utf-8") as f: + content = f.read().replace(search_text, replace_text) + + # Write the updated content back to the file + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + except Exception as e: + log.error(f"Error processing file {file_path}: {e}") + + +def color_aug_changed(color_aug): + """ + Handles the change in color augmentation checkbox. + + This function is called when the color augmentation checkbox is toggled. + If color augmentation is enabled, it disables the cache latent checkbox + and returns a new checkbox with the value set to False and interactive set to False. + If color augmentation is disabled, it returns a new checkbox with interactive set to True. + + Args: + color_aug (bool): The new state of the color augmentation checkbox. + + Returns: + gr.Checkbox: A new checkbox with the appropriate settings based on the color augmentation state. + """ + # If color augmentation is enabled, disable cache latent and return a new checkbox + if color_aug: + msgbox( + 'Disabling "Cache latent" because "Color augmentation" has been selected...' + ) + return gr.Checkbox(value=False, interactive=False) + # If color augmentation is disabled, return a new checkbox with interactive set to True + else: + return gr.Checkbox(interactive=True) + + +def set_pretrained_model_name_or_path_input( + pretrained_model_name_or_path, refresh_method=None +): + """ + Sets the pretrained model name or path input based on the model type. + + This function checks the type of the pretrained model and sets the appropriate + parameters for the model. It also handles the case where the model list is + set to 'custom' and a refresh method is provided. + + Args: + pretrained_model_name_or_path (str): The name or path of the pretrained model. + refresh_method (callable, optional): A function to refresh the model list. + + Returns: + tuple: A tuple containing the Dropdown widget, v2 checkbox, v_parameterization checkbox, + and sdxl checkbox. + """ + # Check if the given pretrained_model_name_or_path is in the list of SDXL models + if pretrained_model_name_or_path in SDXL_MODELS: + log.info("SDXL model selected. Setting sdxl parameters") + v2 = gr.Checkbox(value=False, visible=False) + v_parameterization = gr.Checkbox(value=False, visible=False) + sdxl = gr.Checkbox(value=True, visible=False) + return ( + gr.Dropdown(), + v2, + v_parameterization, + sdxl, + ) + + # Check if the given pretrained_model_name_or_path is in the list of V2 base models + if pretrained_model_name_or_path in V2_BASE_MODELS: + log.info("SD v2 base model selected. Setting --v2 parameter") + v2 = gr.Checkbox(value=True, visible=False) + v_parameterization = gr.Checkbox(value=False, visible=False) + sdxl = gr.Checkbox(value=False, visible=False) + return ( + gr.Dropdown(), + v2, + v_parameterization, + sdxl, + ) + + # Check if the given pretrained_model_name_or_path is in the list of V parameterization models + if pretrained_model_name_or_path in V_PARAMETERIZATION_MODELS: + log.info( + "SD v2 model selected. Setting --v2 and --v_parameterization parameters" + ) + v2 = gr.Checkbox(value=True, visible=False) + v_parameterization = gr.Checkbox(value=True, visible=False) + sdxl = gr.Checkbox(value=False, visible=False) + return ( + gr.Dropdown(), + v2, + v_parameterization, + sdxl, + ) + + # Check if the given pretrained_model_name_or_path is in the list of V1 models + if pretrained_model_name_or_path in V1_MODELS: + log.info(f"{pretrained_model_name_or_path} model selected.") + v2 = gr.Checkbox(value=False, visible=False) + v_parameterization = gr.Checkbox(value=False, visible=False) + sdxl = gr.Checkbox(value=False, visible=False) + return ( + gr.Dropdown(), + v2, + v_parameterization, + sdxl, + ) + + # Check if the model_list is set to 'custom' + v2 = gr.Checkbox(visible=True) + v_parameterization = gr.Checkbox(visible=True) + sdxl = gr.Checkbox(visible=True) + + # If a refresh method is provided, use it to update the choices for the Dropdown widget + if refresh_method is not None: + args = dict( + choices=refresh_method(pretrained_model_name_or_path), + ) + else: + args = {} + return ( + gr.Dropdown(**args), + v2, + v_parameterization, + sdxl, + ) + + +### +### Gradio common GUI section +### + + +def get_int_or_default(kwargs, key, default_value=0): + """ + Retrieves an integer value from the provided kwargs dictionary based on the given key. If the key is not found, + or the value cannot be converted to an integer, a default value is returned. + + Args: + kwargs (dict): A dictionary of keyword arguments. + key (str): The key to retrieve from the kwargs dictionary. + default_value (int, optional): The default value to return if the key is not found or the value is not an integer. + + Returns: + int: The integer value if found and valid, otherwise the default value. + """ + # Try to retrieve the value for the specified key from the kwargs. + # Use the provided default_value if the key does not exist. + value = kwargs.get(key, default_value) + try: + # Try to convert the value to a integer. This should works for int, + # and strings that represent a valid floating-point number. + return int(value) + except (ValueError, TypeError): + # If the conversion fails (for example, the value is a string that cannot + # be converted to an integer), log the issue and return the provided default_value. + log.info( + f"{key} is not an int or cannot be converted to int, setting value to {default_value}" + ) + return default_value + + +def get_float_or_default(kwargs, key, default_value=0.0): + """ + Retrieves a float value from the provided kwargs dictionary based on the given key. If the key is not found, + or the value cannot be converted to a float, a default value is returned. + + This function attempts to convert the value to a float, which works for integers, floats, and strings that + represent valid floating-point numbers. If the conversion fails, the issue is logged, and the provided + default_value is returned. + + Args: + kwargs (dict): A dictionary of keyword arguments. + key (str): The key to retrieve from the kwargs dictionary. + default_value (float, optional): The default value to return if the key is not found or the value is not a float. + + Returns: + float: The float value if found and valid, otherwise the default value. + """ + # Try to retrieve the value for the specified key from the kwargs. + # Use the provided default_value if the key does not exist. + value = kwargs.get(key, default_value) + + try: + # Try to convert the value to a float. This should works for int, float, + # and strings that represent a valid floating-point number. + return float(value) + except ValueError: + # If the conversion fails (for example, the value is a string that cannot + # be converted to a float), log the issue and return the provided default_value. + log.info( + f"{key} is not an int, float or a valid string for conversion, setting value to {default_value}" + ) + return default_value + + +def get_str_or_default(kwargs, key, default_value=""): + """ + Retrieves a string value from the provided kwargs dictionary based on the given key. If the key is not found, + or the value is not a string, a default value is returned. + + Args: + kwargs (dict): A dictionary of keyword arguments. + key (str): The key to retrieve from the kwargs dictionary. + default_value (str, optional): The default value to return if the key is not found or the value is not a string. + + Returns: + str: The string value if found and valid, otherwise the default value. + """ + # Try to retrieve the value for the specified key from the kwargs. + # Use the provided default_value if the key does not exist. + value = kwargs.get(key, default_value) + + # Check if the retrieved value is already a string. + if isinstance(value, str): + return value + else: + # If the value is not a string (e.g., int, float, or any other type), + # convert it to a string and return the converted value. + return str(value) + + +def run_cmd_advanced_training(run_cmd: list = [], **kwargs): + """ + This function, run_cmd_advanced_training, dynamically constructs a command line string for advanced training + configurations based on provided keyword arguments (kwargs). Each argument represents a different training parameter + or flag that can be used to customize the training process. The function checks for the presence and validity of + arguments, appending them to the command line string with appropriate formatting. + + Purpose + The primary purpose of this function is to enable flexible and customizable training configurations for machine + learning models. It allows users to specify a wide range of parameters and flags that control various aspects of + the training process, such as learning rates, batch sizes, augmentation options, precision settings, and many more. + + Args: + kwargs (dict): A variable number of keyword arguments that represent different training parameters or flags. + Each argument has a specific expected data type and format, which the function checks before + appending to the command line string. + + Returns: + str: A command line string constructed based on the provided keyword arguments. This string includes the base + command and additional parameters and flags tailored to the user's specifications for the training process + """ + if "additional_parameters" in kwargs and kwargs["additional_parameters"] != "": + additional_parameters = kwargs["additional_parameters"].replace('"', "") + for arg in additional_parameters.split(): + run_cmd.append(shlex.quote(arg)) + + if "max_data_loader_n_workers" in kwargs: + max_data_loader_n_workers = kwargs.get("max_data_loader_n_workers") + if max_data_loader_n_workers != "": + run_cmd.append("--max_data_loader_n_workers") + run_cmd.append(str(max_data_loader_n_workers)) + + return run_cmd + + +def verify_image_folder_pattern(folder_path: str) -> bool: + """ + Verify the image folder pattern in the given folder path. + + Args: + folder_path (str): The path to the folder containing image folders. + + Returns: + bool: True if the image folder pattern is valid, False otherwise. + """ + # Initialize the return value to True + return_value = True + + # Log the start of the verification process + log.info(f"Verifying image folder pattern of {folder_path}...") + + # Check if the folder exists + if not os.path.isdir(folder_path): + # Log an error message if the folder does not exist + log.error( + f"...the provided path '{folder_path}' is not a valid folder. " + "Please follow the folder structure documentation found at docs\image_folder_structure.md ..." + ) + # Return False to indicate that the folder pattern is not valid + return False + + # Create a regular expression pattern to match the required sub-folder names + # The pattern should start with one or more digits (\d+) followed by an underscore (_) + # After the underscore, it should match one or more word characters (\w+), which can be letters, numbers, or underscores + # Example of a valid pattern matching name: 123_example_folder + pattern = r"^\d+_\w+" + + # Get the list of sub-folders in the directory + subfolders = [ + os.path.join(folder_path, subfolder) + for subfolder in os.listdir(folder_path) + if os.path.isdir(os.path.join(folder_path, subfolder)) + ] + + # Check the pattern of each sub-folder + matching_subfolders = [ + subfolder + for subfolder in subfolders + if re.match(pattern, os.path.basename(subfolder)) + ] + + # Print non-matching sub-folders + non_matching_subfolders = set(subfolders) - set(matching_subfolders) + if non_matching_subfolders: + # Log an error message if any sub-folders do not match the pattern + log.error( + f"...the following folders do not match the required pattern _: {', '.join(non_matching_subfolders)}" + ) + # Log an error message suggesting to follow the folder structure documentation + log.error( + f"...please follow the folder structure documentation found at docs\image_folder_structure.md ..." + ) + # Return False to indicate that the folder pattern is not valid + return False + + # Check if no sub-folders exist + if not matching_subfolders: + # Log an error message if no image folders are found + log.error( + f"...no image folders found in {folder_path}. " + "Please follow the folder structure documentation found at docs\image_folder_structure.md ..." + ) + # Return False to indicate that the folder pattern is not valid + return False + + # Log the successful verification + log.info(f"...valid") + # Return True to indicate that the folder pattern is valid + return return_value + + +def SaveConfigFile( + parameters, + file_path: str, + exclusion: list = ["file_path", "save_as", "headless", "print_only"], +) -> None: + """ + Saves the configuration parameters to a JSON file, excluding specified keys. + + This function iterates over a dictionary of parameters, filters out keys listed + in the `exclusion` list, and saves the remaining parameters to a JSON file + specified by `file_path`. + + Args: + parameters (dict): Dictionary containing the configuration parameters. + file_path (str): Path to the file where the filtered parameters should be saved. + exclusion (list): List of keys to exclude from saving. Defaults to ["file_path", "save_as", "headless", "print_only"]. + """ + # Return the values of the variables as a dictionary + variables = { + name: value + for name, value in sorted(parameters, key=lambda x: x[0]) + if name not in exclusion + } + + # Check if the folder path for the file_path is valid + # Extrach folder path + folder_path = os.path.dirname(file_path) + + # Check if the folder exists + if not os.path.exists(folder_path): + # If not, create the folder + os.makedirs(os.path.dirname(folder_path)) + log.info(f"Creating folder {folder_path} for the configuration file...") + + # Save the data to the specified JSON file + with open(file_path, "w", encoding="utf-8") as file: + json.dump(variables, file, indent=2) + + +def save_to_file(content): + """ + Appends the given content to a file named 'print_command.txt' within a 'logs' directory. + + This function checks for the existence of a 'logs' directory and creates it if + it doesn't exist. Then, it appends the provided content along with a newline character + to the 'print_command.txt' file within this directory. + + Args: + content (str): The content to be saved to the file. + """ + logs_directory = "logs" + file_path = os.path.join(logs_directory, "print_command.txt") + + # Ensure the 'logs' directory exists + if not os.path.exists(logs_directory): + os.makedirs(logs_directory) + + # Append content to the specified file + try: + with open(file_path, "a", encoding="utf-8") as file: + file.write(content + "\n") + except IOError as e: + print(f"Error: Could not write to file - {e}") + except OSError as e: + print(f"Error: Could not create 'logs' directory - {e}") + + +def check_duplicate_filenames( + folder_path: str, + image_extension: list = [".gif", ".png", ".jpg", ".jpeg", ".webp"], +) -> None: + """ + Checks for duplicate image filenames in a given folder path. + + This function walks through the directory structure of the given folder path, + and logs a warning if it finds files with the same name but different image extensions. + This can lead to issues during training if not handled properly. + + Args: + folder_path (str): The path to the folder containing image files. + image_extension (list, optional): List of image file extensions to consider. + Defaults to [".gif", ".png", ".jpg", ".jpeg", ".webp"]. + """ + # Initialize a flag to track if duplicates are found + duplicate = False + + # Log the start of the duplicate check + log.info( + f"Checking for duplicate image filenames in training data directory {folder_path}..." + ) + + # Walk through the directory structure + for root, dirs, files in os.walk(folder_path): + # Initialize a dictionary to store filenames and their paths + filenames = {} + + # Process each file in the current directory + for file in files: + # Split the filename and extension + filename, extension = os.path.splitext(file) + + # Check if the extension is in the list of image extensions + if extension.lower() in image_extension: + # Construct the full path to the file + full_path = os.path.join(root, file) + + # Check if the filename is already in the dictionary + if filename in filenames: + # If it is, compare the existing path with the current path + existing_path = filenames[filename] + if existing_path != full_path: + # Log a warning if the paths are different + log.warning( + f"...same filename '{filename}' with different image extension found. This will cause training issues. Rename one of the file." + ) + log.warning(f" Existing file: {existing_path}") + log.warning(f" Current file: {full_path}") + + # Set the duplicate flag to True + duplicate = True + else: + # If not, add the filename and path to the dictionary + filenames[filename] = full_path + + # If no duplicates were found, log a message indicating validation + if not duplicate: + log.info("...valid") + + +def validate_file_path(file_path: str) -> bool: + if file_path == "": + return True + msg = f"Validating {file_path} existence..." + if not os.path.isfile(file_path): + log.error(f"{msg} FAILED: does not exist") + return False + log.info(f"{msg} SUCCESS") + return True + + +def validate_folder_path(folder_path: str, can_be_written_to: bool = False, create_if_not_exists: bool = False) -> bool: + if folder_path == "": + return True + msg = f"Validating {folder_path} existence{' and writability' if can_be_written_to else ''}..." + if not os.path.isdir(folder_path): + if create_if_not_exists: + os.makedirs(folder_path) + log.info(f"{msg} SUCCESS") + return True + else: + log.error(f"{msg} FAILED: does not exist") + return False + if can_be_written_to and not os.access(folder_path, os.W_OK): + log.error(f"{msg} FAILED: is not writable.") + return False + log.info(f"{msg} SUCCESS") + return True + +def validate_toml_file(file_path: str) -> bool: + if file_path == "": + return True + msg = f"Validating toml {file_path} existence and validity..." + if not os.path.isfile(file_path): + log.error(f"{msg} FAILED: does not exist") + return False + + try: + toml.load(file_path) + except: + log.error(f"{msg} FAILED: is not a valid toml file.") + return False + log.info(f"{msg} SUCCESS") + return True + + +def validate_model_path(pretrained_model_name_or_path: str) -> bool: + """ + Validates the pretrained model name or path against Hugging Face models or local paths. + + Args: + pretrained_model_name_or_path (str): The pretrained model name or path to validate. + + Returns: + bool: True if the path is a valid Hugging Face model or exists locally; False otherwise. + """ + from .class_source_model import default_models + + msg = f"Validating {pretrained_model_name_or_path} existence..." + + # Check if it matches the Hugging Face model pattern + if re.match(r"^[\w-]+\/[\w-]+$", pretrained_model_name_or_path): + log.info(f"{msg} SKIPPING: huggingface.co model") + elif pretrained_model_name_or_path in default_models: + log.info(f"{msg} SUCCESS") + else: + # If not one of the default models, check if it's a valid local path + if not validate_file_path(pretrained_model_name_or_path): + return False + + return True + + +def is_file_writable(file_path: str) -> bool: + """ + Checks if a file is writable. + + Args: + file_path (str): The path to the file to be checked. + + Returns: + bool: True if the file is writable, False otherwise. + """ + # If the file does not exist, it is considered writable + if not os.path.exists(file_path): + return True + + try: + # Attempt to open the file in append mode to check if it can be written to + with open(file_path, "a", encoding="utf-8"): + pass + # If the file can be opened, it is considered writable + return True + except IOError: + # If an IOError occurs, the file cannot be written to + return False + + +def print_command_and_toml(run_cmd, tmpfilename): + log.warning( + "Here is the trainer command as a reference. It will not be executed:\n" + ) + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + + log.info(command_to_run) + print("") + + log.info(f"Showing toml config file: {tmpfilename}") + print("") + with open(tmpfilename, "r", encoding="utf-8") as toml_file: + log.info(toml_file.read()) + log.info(f"end of toml config file: {tmpfilename}") + + save_to_file(command_to_run) + + +def validate_args_setting(input_string): + # Regex pattern to handle multiple conditions: + # - Empty string is valid + # - Single or multiple key/value pairs with exactly one space between pairs + # - No spaces around '=' and no spaces within keys or values + pattern = r"^(\S+=\S+)( \S+=\S+)*$|^$" + if re.match(pattern, input_string): + return True + else: + log.info(f"'{input_string}' is not a valid settings string.") + log.info( + "A valid settings string must consist of one or more key/value pairs formatted as key=value, with no spaces around the equals sign or within the value. Multiple pairs should be separated by a space." + ) + return False + +def setup_environment(): + env = os.environ.copy() + env["PYTHONPATH"] = ( + fr"{scriptdir}{os.pathsep}{scriptdir}/sd-scripts{os.pathsep}{env.get('PYTHONPATH', '')}" + ) + env["TF_ENABLE_ONEDNN_OPTS"] = "0" + + if os.name == "nt": + env["XFORMERS_FORCE_DISABLE_TRITON"] = "1" + + return env diff --git a/kohya_gui/convert_lcm_gui.py b/kohya_gui/convert_lcm_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..19f3dd6b967905689e00afff5f52577025e25aae --- /dev/null +++ b/kohya_gui/convert_lcm_gui.py @@ -0,0 +1,217 @@ +import gradio as gr +import os +import subprocess +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def convert_lcm( + name, + model_path, + lora_scale, + model_type, +): + # Check if source model exist + if not os.path.isfile(model_path): + log.error("The provided DyLoRA model is not a file") + return + + if os.path.dirname(name) == "": + # only filename given. prepend dir + name = os.path.join(os.path.dirname(model_path), name) + if os.path.isdir(name): + # only dir name given. set default lcm name + name = os.path.join(name, "lcm.safetensors") + if os.path.normpath(model_path) == os.path.normpath(name): + # same path. silently ignore but rename output + path, ext = os.path.splitext(save_to) + save_to = f"{path}_lcm{ext}" + + # Construct the command to run the script + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/tools/lcm_convert.py", + "--lora-scale", + str(lora_scale), + "--model", + rf"{model_path}", + "--name", + str(name), + ] + + if model_type == "SDXL": + run_cmd.append("--sdxl") + if model_type == "SSD-1B": + run_cmd.append("--ssd-1b") + + # Set up the environment + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env, shell=False) + + # Return a success message + log.info("Done extracting...") + + +def gradio_convert_lcm_tab(headless=False): + """ + Creates a Gradio tab for converting a model to an LCM model. + + Args: + headless (bool): If True, the tab will be created without any visible elements. + + Returns: + None + """ + current_model_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + + def list_models(path): + """ + Lists all model files in the given directory. + + Args: + path (str): The directory path to search for model files. + + Returns: + list: A list of model file paths. + """ + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".safetensors"], all=True)) + + def list_save_to(path): + """ + Lists all save-to options for the given directory. + + Args: + path (str): The directory path to search for save-to options. + + Returns: + list: A list of save-to options. + """ + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".safetensors"], all=True)) + + with gr.Tab("Convert to LCM"): + gr.Markdown("This utility convert a model to an LCM model.") + lora_ext = gr.Textbox(value="*.safetensors", visible=False) + lora_ext_name = gr.Textbox(value="LCM model types", visible=False) + model_ext = gr.Textbox(value="*.safetensors", visible=False) + model_ext_name = gr.Textbox(value="Model types", visible=False) + + with gr.Group(), gr.Row(): + model_path = gr.Dropdown( + label="Stable Diffusion model to convert to LCM", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + model_path, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_model_path_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_model_path_file.click( + get_file_path, + inputs=[model_path, model_ext, model_ext_name], + outputs=model_path, + show_progress=False, + ) + + name = gr.Dropdown( + label="Name of the new LCM model", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + name, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_name = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_name.click( + get_saveasfilename_path, + inputs=[name, lora_ext, lora_ext_name], + outputs=name, + show_progress=False, + ) + model_path.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=model_path, + outputs=model_path, + show_progress=False, + ) + name.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=name, + outputs=name, + show_progress=False, + ) + + with gr.Row(): + lora_scale = gr.Slider( + label="Strength of the LCM", + minimum=0.0, + maximum=2.0, + step=0.1, + value=1.0, + interactive=True, + ) + # with gr.Row(): + # no_half = gr.Checkbox(label="Convert the new LCM model to FP32", value=False) + model_type = gr.Radio( + label="Model type", choices=["SD15", "SDXL", "SD-1B"], value="SD15" + ) + + extract_button = gr.Button("Extract LCM") + + extract_button.click( + convert_lcm, + inputs=[ + name, + model_path, + lora_scale, + model_type, + ], + show_progress=False, + ) diff --git a/kohya_gui/convert_model_gui.py b/kohya_gui/convert_model_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..00701e9f34927fab60e601141094c67f9265979b --- /dev/null +++ b/kohya_gui/convert_model_gui.py @@ -0,0 +1,275 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import get_folder_path, get_file_path, scriptdir, list_files, list_dirs, setup_environment + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def convert_model( + source_model_input, + source_model_type, + target_model_folder_input, + target_model_name_input, + target_model_type, + target_save_precision_type, + unet_use_linear_projection, +): + # Check for caption_text_input + if source_model_type == "": + log.info("Invalid source model type") + return + + # Check if source model exist + if os.path.isfile(source_model_input): + log.info("The provided source model is a file") + elif os.path.isdir(source_model_input): + log.info("The provided model is a folder") + else: + log.info("The provided source model is neither a file nor a folder") + return + + # Check if source model exist + if os.path.isdir(target_model_folder_input): + log.info("The provided model folder exist") + else: + log.info("The provided target folder does not exist") + return + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/tools/convert_diffusers20_original_sd.py", + ] + + v1_models = [ + "runwayml/stable-diffusion-v1-5", + "CompVis/stable-diffusion-v1-4", + ] + + # Check if v1 models + if str(source_model_type) in v1_models: + log.info("SD v1 model specified. Setting --v1 parameter") + run_cmd.append("--v1") + else: + log.info("SD v2 model specified. Setting --v2 parameter") + run_cmd.append("--v2") + + if not target_save_precision_type == "unspecified": + run_cmd.append(f"--{target_save_precision_type}") + + if target_model_type == "diffuser" or target_model_type == "diffuser_safetensors": + run_cmd.append("--reference_model") + run_cmd.append(source_model_type) + + if target_model_type == "diffuser_safetensors": + run_cmd.append("--use_safetensors") + + # Fix for stabilityAI diffusers format + if unet_use_linear_projection: + run_cmd.append("--unet_use_linear_projection") + + # Add the source model input path + run_cmd.append(rf"{source_model_input}") + + # Determine the target model path + if target_model_type == "diffuser" or target_model_type == "diffuser_safetensors": + target_model_path = os.path.join( + target_model_folder_input, target_model_name_input + ) + else: + target_model_path = os.path.join( + target_model_folder_input, + f"{target_model_name_input}.{target_model_type}", + ) + + # Add the target model path + run_cmd.append(rf"{target_model_path}") + + # Log the command + log.info(" ".join(run_cmd)) + + env = setup_environment() + + # Run the command + subprocess.run(run_cmd, env=env, shell=False) + + +### +# Gradio UI +### + + +def gradio_convert_model_tab(headless=False): + from .common_gui import create_refresh_button + + default_source_model = os.path.join(scriptdir, "outputs") + default_target_folder = os.path.join(scriptdir, "outputs") + current_source_model = default_source_model + current_target_folder = default_target_folder + + def list_source_model(path): + nonlocal current_source_model + current_source_model = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_target_folder(path): + nonlocal current_target_folder + current_target_folder = path + return list(list_dirs(path)) + + with gr.Tab("Convert model"): + gr.Markdown( + "This utility can be used to convert from one stable diffusion model format to another." + ) + + model_ext = gr.Textbox(value="*.safetensors *.ckpt", visible=False) + model_ext_name = gr.Textbox(value="Model types", visible=False) + + with gr.Group(), gr.Row(): + with gr.Column(), gr.Row(): + source_model_input = gr.Dropdown( + label="Source model (path to source model folder of file to convert...)", + interactive=True, + choices=[""] + list_source_model(default_source_model), + value="", + allow_custom_value=True, + ) + create_refresh_button( + source_model_input, + lambda: None, + lambda: {"choices": list_source_model(current_source_model)}, + "open_folder_small", + ) + button_source_model_dir = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_source_model_dir.click( + get_folder_path, + outputs=source_model_input, + show_progress=False, + ) + + button_source_model_file = gr.Button( + document_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_source_model_file.click( + get_file_path, + inputs=[source_model_input, model_ext, model_ext_name], + outputs=source_model_input, + show_progress=False, + ) + + source_model_input.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_source_model(path)), + inputs=source_model_input, + outputs=source_model_input, + show_progress=False, + ) + with gr.Column(), gr.Row(): + source_model_type = gr.Dropdown( + label="Source model type", + choices=[ + "stabilityai/stable-diffusion-2-1-base", + "stabilityai/stable-diffusion-2-base", + "stabilityai/stable-diffusion-2-1", + "stabilityai/stable-diffusion-2", + "runwayml/stable-diffusion-v1-5", + "CompVis/stable-diffusion-v1-4", + ], + allow_custom_value=True, + ) + with gr.Group(), gr.Row(): + with gr.Column(), gr.Row(): + target_model_folder_input = gr.Dropdown( + label="Target model folder (path to target model folder of file name to create...)", + interactive=True, + choices=[""] + list_target_folder(default_target_folder), + value="", + allow_custom_value=True, + ) + create_refresh_button( + target_model_folder_input, + lambda: None, + lambda: {"choices": list_target_folder(current_target_folder)}, + "open_folder_small", + ) + button_target_model_folder = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_target_model_folder.click( + get_folder_path, + outputs=target_model_folder_input, + show_progress=False, + ) + + target_model_folder_input.change( + fn=lambda path: gr.Dropdown( + choices=[""] + list_target_folder(path) + ), + inputs=target_model_folder_input, + outputs=target_model_folder_input, + show_progress=False, + ) + + with gr.Column(), gr.Row(): + target_model_name_input = gr.Textbox( + label="Target model name", + placeholder="target model name...", + interactive=True, + ) + with gr.Row(): + target_model_type = gr.Dropdown( + label="Target model type", + choices=[ + "diffuser", + "diffuser_safetensors", + "ckpt", + "safetensors", + ], + ) + target_save_precision_type = gr.Dropdown( + label="Target model precision", + choices=["unspecified", "fp16", "bf16", "float"], + value="unspecified", + ) + unet_use_linear_projection = gr.Checkbox( + label="UNet linear projection", + value=False, + info="Enable for Hugging Face's stabilityai models", + ) + + convert_button = gr.Button("Convert model") + + convert_button.click( + convert_model, + inputs=[ + source_model_input, + source_model_type, + target_model_folder_input, + target_model_name_input, + target_model_type, + target_save_precision_type, + unet_use_linear_projection, + ], + show_progress=False, + ) diff --git a/kohya_gui/custom_logging.py b/kohya_gui/custom_logging.py new file mode 100644 index 0000000000000000000000000000000000000000..4d970f0dea7bc19172a2c7b38118160b44bc16cd --- /dev/null +++ b/kohya_gui/custom_logging.py @@ -0,0 +1,81 @@ +import os +import logging +import time +import sys + +from rich.theme import Theme +from rich.logging import RichHandler +from rich.console import Console +from rich.pretty import install as pretty_install +from rich.traceback import install as traceback_install + +log = None + + +def setup_logging(clean=False, debug=False): + global log + + if log is not None: + return log + + try: + if clean and os.path.isfile("setup.log"): + os.remove("setup.log") + time.sleep(0.1) # prevent race condition + except: + pass + + if sys.version_info >= (3, 9): + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s | %(levelname)s | %(pathname)s | %(message)s", + filename="setup.log", + filemode="a", + encoding="utf-8", + force=True, + ) + else: + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s | %(levelname)s | %(pathname)s | %(message)s", + filename="setup.log", + filemode="a", + force=True, + ) + + console = Console( + log_time=True, + log_time_format="%H:%M:%S-%f", + theme=Theme( + { + "traceback.border": "black", + "traceback.border.syntax_error": "black", + "inspect.value.border": "black", + } + ), + ) + pretty_install(console=console) + traceback_install( + console=console, + extra_lines=1, + width=console.width, + word_wrap=False, + indent_guides=False, + suppress=[], + ) + rh = RichHandler( + show_time=True, + omit_repeated_times=False, + show_level=True, + show_path=False, + markup=False, + rich_tracebacks=True, + log_time_format="%H:%M:%S-%f", + level=logging.DEBUG if debug else logging.INFO, + console=console, + ) + rh.set_name(logging.DEBUG if debug else logging.INFO) + log = logging.getLogger("sd") + log.addHandler(rh) + + return log diff --git a/kohya_gui/dataset_balancing_gui.py b/kohya_gui/dataset_balancing_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..13cc1bf34c1e0fd99fd08170e96ee28cac4837bb --- /dev/null +++ b/kohya_gui/dataset_balancing_gui.py @@ -0,0 +1,171 @@ +import os +import re +import gradio as gr +from easygui import msgbox, boolbox +from .common_gui import get_folder_path, scriptdir, list_dirs, create_refresh_button + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +def dataset_balancing(concept_repeats, folder, insecure): + + if not concept_repeats > 0: + # Display an error message if the total number of repeats is not a valid integer + msgbox("Please enter a valid integer for the total number of repeats.") + return + + concept_repeats = int(concept_repeats) + + # Check if folder exist + if folder == "" or not os.path.isdir(folder): + msgbox("Please enter a valid folder for balancing.") + return + + pattern = re.compile(r"^\d+_.+$") + + # Iterate over the subdirectories in the selected folder + for subdir in os.listdir(folder): + if pattern.match(subdir) or insecure: + # Calculate the number of repeats for the current subdirectory + # Get a list of all the files in the folder + files = os.listdir(os.path.join(folder, subdir)) + + # Filter the list to include only image files + image_files = [ + f + for f in files + if f.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")) + ] + + # Count the number of image files + images = len(image_files) + + if images == 0: + log.info( + f"No images of type .jpg, .jpeg, .png, .gif, .webp were found in {os.listdir(os.path.join(folder, subdir))}" + ) + + # Check if the subdirectory name starts with a number inside braces, + # indicating that the repeats value should be multiplied + match = re.match(r"^\{(\d+\.?\d*)\}", subdir) + if match: + # Multiply the repeats value by the number inside the braces + if not images == 0: + repeats = max( + 1, + round(concept_repeats / images * float(match.group(1))), + ) + else: + repeats = 0 + subdir = subdir[match.end() :] + else: + if not images == 0: + repeats = max(1, round(concept_repeats / images)) + else: + repeats = 0 + + # Check if the subdirectory name already has a number at the beginning + match = re.match(r"^\d+_", subdir) + if match: + # Replace the existing number with the new number + old_name = os.path.join(folder, subdir) + new_name = os.path.join(folder, f"{repeats}_{subdir[match.end():]}") + else: + # Add the new number at the beginning of the name + old_name = os.path.join(folder, subdir) + new_name = os.path.join(folder, f"{repeats}_{subdir}") + + os.rename(old_name, new_name) + else: + log.info( + f"Skipping folder {subdir} because it does not match kohya_ss expected syntax..." + ) + + msgbox("Dataset balancing completed...") + + +def warning(insecure): + if insecure: + if boolbox( + f"WARNING!!! You have asked to rename non kohya_ss _ folders...\n\nAre you sure you want to do that?", + choices=("Yes, I like danger", "No, get me out of here"), + ): + return True + else: + return False + + +def gradio_dataset_balancing_tab(headless=False): + + current_dataset_dir = os.path.join(scriptdir, "data") + + with gr.Tab("Dreambooth/LoRA Dataset balancing"): + gr.Markdown( + "This utility will ensure that each concept folder in the dataset folder is used equally during the training process of the dreambooth machine learning model, regardless of the number of images in each folder. It will do this by renaming the concept folders to indicate the number of times they should be repeated during training." + ) + gr.Markdown( + "WARNING! The use of this utility on the wrong folder can lead to unexpected folder renaming!!!" + ) + with gr.Group(), gr.Row(): + + def list_dataset_dirs(path): + nonlocal current_dataset_dir + current_dataset_dir = path + return list(list_dirs(path)) + + select_dataset_folder_input = gr.Dropdown( + label="Dataset folder (folder containing the concepts folders to balance...)", + interactive=True, + choices=[""] + list_dataset_dirs(current_dataset_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + select_dataset_folder_input, + lambda: None, + lambda: {"choices": list_dataset_dirs(current_dataset_dir)}, + "open_folder_small", + ) + select_dataset_folder_button = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + select_dataset_folder_button.click( + get_folder_path, + outputs=select_dataset_folder_input, + show_progress=False, + ) + + total_repeats_number = gr.Number( + value=1000, + interactive=True, + label="Training steps per concept per epoch", + ) + select_dataset_folder_input.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_dataset_dirs(path)), + inputs=select_dataset_folder_input, + outputs=select_dataset_folder_input, + show_progress=False, + ) + + with gr.Accordion("Advanced options", open=False): + insecure = gr.Checkbox( + value=False, + label="DANGER!!! -- Insecure folder renaming -- DANGER!!!", + ) + insecure.change(warning, inputs=insecure, outputs=insecure) + balance_button = gr.Button("Balance dataset") + balance_button.click( + dataset_balancing, + inputs=[ + total_repeats_number, + select_dataset_folder_input, + insecure, + ], + show_progress=False, + ) diff --git a/kohya_gui/dreambooth_folder_creation_gui.py b/kohya_gui/dreambooth_folder_creation_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..d04d337ef4180527e38cae1426e7cffa7ecf95ea --- /dev/null +++ b/kohya_gui/dreambooth_folder_creation_gui.py @@ -0,0 +1,308 @@ +import gradio as gr +from .common_gui import get_folder_path, scriptdir, list_dirs, create_refresh_button +import shutil +import os +from .class_gui_config import KohyaSSGUIConfig + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + + +def copy_info_to_Folders_tab(training_folder): + img_folder = gr.Dropdown(value=os.path.join(training_folder, "img")) + if os.path.exists(os.path.join(training_folder, "reg")): + reg_folder = gr.Dropdown(value=os.path.join(training_folder, "reg")) + else: + reg_folder = gr.Dropdown(value="") + model_folder = gr.Dropdown(value=os.path.join(training_folder, "model")) + log_folder = gr.Dropdown(value=os.path.join(training_folder, "log")) + + return img_folder, reg_folder, model_folder, log_folder + + +def dreambooth_folder_preparation( + util_training_images_dir_input, + util_training_images_repeat_input, + util_instance_prompt_input, + util_regularization_images_dir_input, + util_regularization_images_repeat_input, + util_class_prompt_input, + util_training_dir_output, +): + + # Check if the input variables are empty + if not len(util_training_dir_output): + log.info( + "Destination training directory is missing... can't perform the required task..." + ) + return + else: + # Create the util_training_dir_output directory if it doesn't exist + os.makedirs(util_training_dir_output, exist_ok=True) + + # Check for instance prompt + if util_instance_prompt_input == "": + log.error("Instance prompt missing...") + return + + # Check for class prompt + if util_class_prompt_input == "": + log.error("Class prompt missing...") + return + + # Create the training_dir path + if util_training_images_dir_input == "": + log.info( + "Training images directory is missing... can't perform the required task..." + ) + return + else: + training_dir = os.path.join( + util_training_dir_output, + f"img/{int(util_training_images_repeat_input)}_{util_instance_prompt_input} {util_class_prompt_input}", + ) + + # Remove folders if they exist + if os.path.exists(training_dir): + log.info(f"Removing existing directory {training_dir}...") + shutil.rmtree(training_dir) + + # Copy the training images to their respective directories + log.info(f"Copy {util_training_images_dir_input} to {training_dir}...") + shutil.copytree(util_training_images_dir_input, training_dir) + + if not util_regularization_images_dir_input == "": + # Create the regularization_dir path + if not util_regularization_images_repeat_input > 0: + log.info("Repeats is missing... not copying regularisation images...") + else: + regularization_dir = os.path.join( + util_training_dir_output, + f"reg/{int(util_regularization_images_repeat_input)}_{util_class_prompt_input}", + ) + + # Remove folders if they exist + if os.path.exists(regularization_dir): + log.info(f"Removing existing directory {regularization_dir}...") + shutil.rmtree(regularization_dir) + + # Copy the regularisation images to their respective directories + log.info( + f"Copy {util_regularization_images_dir_input} to {regularization_dir}..." + ) + shutil.copytree(util_regularization_images_dir_input, regularization_dir) + else: + log.info( + "Regularization images directory is missing... not copying regularisation images..." + ) + + # create log and model folder + # Check if the log folder exists and create it if it doesn't + if not os.path.exists(os.path.join(util_training_dir_output, "log")): + os.makedirs(os.path.join(util_training_dir_output, "log")) + + # Check if the model folder exists and create it if it doesn't + if not os.path.exists(os.path.join(util_training_dir_output, "model")): + os.makedirs(os.path.join(util_training_dir_output, "model")) + + log.info( + f"Done creating kohya_ss training folder structure at {util_training_dir_output}..." + ) + + +def gradio_dreambooth_folder_creation_tab( + config: KohyaSSGUIConfig, + train_data_dir_input=gr.Dropdown(), + reg_data_dir_input=gr.Dropdown(), + output_dir_input=gr.Dropdown(), + logging_dir_input=gr.Dropdown(), + headless=False, +): + + current_train_data_dir = os.path.join(scriptdir, "data") + current_reg_data_dir = os.path.join(scriptdir, "data") + current_train_output_dir = os.path.join(scriptdir, "data") + + with gr.Tab("Dreambooth/LoRA Folder preparation"): + gr.Markdown( + "This utility will create the necessary folder structure for the training images and optional regularization images needed for the kohys_ss Dreambooth/LoRA method to function correctly." + ) + with gr.Row(): + util_instance_prompt_input = gr.Textbox( + label="Instance prompt", + placeholder="Eg: asd", + interactive=True, + value=config.get(key="dataset_preparation.instance_prompt", default=""), + ) + util_class_prompt_input = gr.Textbox( + label="Class prompt", + placeholder="Eg: person", + interactive=True, + value=config.get(key="dataset_preparation.class_prompt", default=""), + ) + with gr.Group(), gr.Row(): + + def list_train_data_dirs(path): + nonlocal current_train_data_dir + current_train_data_dir = path + return list(list_dirs(path)) + + util_training_images_dir_input = gr.Dropdown( + label="Training images (directory containing the training images)", + interactive=True, + choices=[ + config.get(key="dataset_preparation.images_folder", default="") + ] + + list_train_data_dirs(current_train_data_dir), + value=config.get(key="dataset_preparation.images_folder", default=""), + allow_custom_value=True, + ) + create_refresh_button( + util_training_images_dir_input, + lambda: None, + lambda: {"choices": list_train_data_dirs(current_train_data_dir)}, + "open_folder_small", + ) + button_util_training_images_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_util_training_images_dir_input.click( + get_folder_path, + outputs=util_training_images_dir_input, + show_progress=False, + ) + util_training_images_repeat_input = gr.Number( + label="Repeats", + value=config.get(key="dataset_preparation.util_training_images_repeat_input", default=40), + interactive=True, + elem_id="number_input", + ) + util_training_images_dir_input.change( + fn=lambda path: gr.Dropdown(choices=[config.get(key="dataset_preparation.images_folder", default="")] + list_train_data_dirs(path)), + inputs=util_training_images_dir_input, + outputs=util_training_images_dir_input, + show_progress=False, + ) + + with gr.Group(), gr.Row(): + + def list_reg_data_dirs(path): + nonlocal current_reg_data_dir + current_reg_data_dir = path + return list(list_dirs(path)) + + util_regularization_images_dir_input = gr.Dropdown( + label="Regularisation images (Optional. directory containing the regularisation images)", + interactive=True, + choices=[ + config.get(key="dataset_preparation.reg_images_folder", default="") + ] + + list_reg_data_dirs(current_reg_data_dir), + value=config.get( + key="dataset_preparation.reg_images_folder", default="" + ), + allow_custom_value=True, + ) + create_refresh_button( + util_regularization_images_dir_input, + lambda: None, + lambda: {"choices": list_reg_data_dirs(current_reg_data_dir)}, + "open_folder_small", + ) + button_util_regularization_images_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_util_regularization_images_dir_input.click( + get_folder_path, + outputs=util_regularization_images_dir_input, + show_progress=False, + ) + util_regularization_images_repeat_input = gr.Number( + label="Repeats", + value=config.get( + key="dataset_preparation.util_regularization_images_repeat_input", + default=1 + ), + interactive=True, + elem_id="number_input", + ) + util_regularization_images_dir_input.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_reg_data_dirs(path)), + inputs=util_regularization_images_dir_input, + outputs=util_regularization_images_dir_input, + show_progress=False, + ) + with gr.Group(), gr.Row(): + + def list_train_output_dirs(path): + nonlocal current_train_output_dir + current_train_output_dir = path + return list(list_dirs(path)) + + util_training_dir_output = gr.Dropdown( + label="Destination training directory (where formatted training and regularisation folders will be placed)", + interactive=True, + choices=[config.get(key="train_data_dir", default="")] + + list_train_output_dirs(current_train_output_dir), + value=config.get(key="train_data_dir", default=""), + allow_custom_value=True, + ) + create_refresh_button( + util_training_dir_output, + lambda: None, + lambda: {"choices": list_train_output_dirs(current_train_output_dir)}, + "open_folder_small", + ) + button_util_training_dir_output = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_util_training_dir_output.click( + get_folder_path, outputs=util_training_dir_output + ) + util_training_dir_output.change( + fn=lambda path: gr.Dropdown( + choices=[config.get(key="train_data_dir", default="")] + list_train_output_dirs(path) + ), + inputs=util_training_dir_output, + outputs=util_training_dir_output, + show_progress=False, + ) + button_prepare_training_data = gr.Button("Prepare training data") + button_prepare_training_data.click( + dreambooth_folder_preparation, + inputs=[ + util_training_images_dir_input, + util_training_images_repeat_input, + util_instance_prompt_input, + util_regularization_images_dir_input, + util_regularization_images_repeat_input, + util_class_prompt_input, + util_training_dir_output, + ], + show_progress=False, + ) + + + button_copy_info_to_Folders_tab = gr.Button('Copy info to respective fields') + button_copy_info_to_Folders_tab.click( + copy_info_to_Folders_tab, + inputs=[util_training_dir_output], + outputs=[ + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ], + show_progress=False, + ) diff --git a/kohya_gui/dreambooth_gui.py b/kohya_gui/dreambooth_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..ead28311a6338b52c14b7d8e42a7acdf4131a813 --- /dev/null +++ b/kohya_gui/dreambooth_gui.py @@ -0,0 +1,1184 @@ +import gradio as gr +import json +import math +import os +import time +import sys +import toml +from datetime import datetime +from .common_gui import ( + check_if_model_exist, + color_aug_changed, + get_executable_path, + get_file_path, + get_saveasfile_path, + print_command_and_toml, + run_cmd_advanced_training, + SaveConfigFile, + scriptdir, + update_my_data, + validate_file_path, validate_folder_path, validate_model_path, + validate_args_setting, + setup_environment, +) +from .class_accelerate_launch import AccelerateLaunch +from .class_configuration_file import ConfigurationFile +from .class_gui_config import KohyaSSGUIConfig +from .class_source_model import SourceModel +from .class_basic_training import BasicTraining +from .class_advanced_training import AdvancedTraining +from .class_folders import Folders +from .class_command_executor import CommandExecutor +from .class_huggingface import HuggingFace +from .class_metadata import MetaData + +from .dreambooth_folder_creation_gui import ( + gradio_dreambooth_folder_creation_tab, +) +from .dataset_balancing_gui import gradio_dataset_balancing_tab +from .class_sample_images import SampleImages, create_prompt_file +from .class_tensorboard import TensorboardManager + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +# Setup command executor +executor = None + +# Setup huggingface +huggingface = None +use_shell = False +train_state_value = time.time() + + +def save_configuration( + save_as_bool, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + full_bf16, + no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + masked_loss, + clip_skip, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + if save_as_bool: + log.info("Save as...") + file_path = get_saveasfile_path(file_path) + else: + log.info("Save...") + if file_path == None or file_path == "": + file_path = get_saveasfile_path(file_path) + + if file_path == None or file_path == "": + return original_file_path # In case a file_path was provided and the user decide to cancel the open action + + # Extract the destination directory from the file path + destination_directory = os.path.dirname(file_path) + + # Create the destination directory if it doesn't exist + if not os.path.exists(destination_directory): + os.makedirs(destination_directory) + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as"], + ) + + return file_path + + +def open_configuration( + ask_for_file, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + full_bf16, + no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + masked_loss, + clip_skip, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + if ask_for_file: + file_path = get_file_path(file_path) + + if not file_path == "" and not file_path == None: + # load variables from JSON file + with open(file_path, "r", encoding="utf-8") as f: + my_data = json.load(f) + log.info("Loading config...") + # Update values to fix deprecated use_8bit_adam checkbox and set appropriate optimizer if it is set to True + my_data = update_my_data(my_data) + else: + file_path = original_file_path # In case a file_path was provided and the user decide to cancel the open action + my_data = {} + + values = [file_path] + for key, value in parameters: + # Set the value in the dictionary to the corresponding value in `my_data`, or the default value if not found + if not key in ["ask_for_file", "file_path"]: + values.append(my_data.get(key, value)) + return tuple(values) + + +def train_model( + headless, + print_only, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + full_bf16, + no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + masked_loss, + clip_skip, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, # Keep this. Yes, it is unused here but required given the common list used + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + global train_state_value + + TRAIN_BUTTON_VISIBLE = [ + gr.Button(visible=True), + gr.Button(visible=False or headless), + gr.Textbox(value=train_state_value), + ] + + if executor.is_running(): + log.error("Training is already running. Can't start another training session.") + return TRAIN_BUTTON_VISIBLE + + log.info(f"Start training Dreambooth...") + + log.info(f"Validating lr scheduler arguments...") + if not validate_args_setting(lr_scheduler_args): + return + + log.info(f"Validating optimizer arguments...") + if not validate_args_setting(optimizer_args): + return TRAIN_BUTTON_VISIBLE + + # + # Validate paths + # + + if not validate_file_path(dataset_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(log_tracker_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(logging_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(output_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(pretrained_model_name_or_path): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(reg_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(resume): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(train_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(vae): + return TRAIN_BUTTON_VISIBLE + # + # End of path validation + # + + # This function validates files or folder paths. Simply add new variables containing file of folder path + # to validate below + # if not validate_paths( + # dataset_config=dataset_config, + # headless=headless, + # log_tracker_config=log_tracker_config, + # logging_dir=logging_dir, + # output_dir=output_dir, + # pretrained_model_name_or_path=pretrained_model_name_or_path, + # reg_data_dir=reg_data_dir, + # resume=resume, + # train_data_dir=train_data_dir, + # vae=vae, + # ): + # return TRAIN_BUTTON_VISIBLE + + if not print_only and check_if_model_exist( + output_name, output_dir, save_model_as, headless=headless + ): + return TRAIN_BUTTON_VISIBLE + + if dataset_config: + log.info( + "Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations..." + ) + if max_train_steps > 0: + if lr_warmup != 0: + lr_warmup_steps = round( + float(int(lr_warmup) * int(max_train_steps) / 100) + ) + else: + lr_warmup_steps = 0 + else: + lr_warmup_steps = 0 + + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + else: + if train_data_dir == "": + log.error("Train data dir is empty") + return TRAIN_BUTTON_VISIBLE + + # Get a list of all subfolders in train_data_dir + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) + ] + + total_steps = 0 + + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + try: + # Extract the number of repeats from the folder name + repeats = int(folder.split("_")[0]) + log.info(f"Folder {folder}: {repeats} repeats found") + + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) + + log.info(f"Folder {folder}: {num_images} images found") + + # Calculate the total number of steps for this folder + steps = repeats * num_images + + # log.info the result + log.info(f"Folder {folder}: {num_images} * {repeats} = {steps} steps") + + total_steps += steps + + except ValueError: + # Handle the case where the folder name does not contain an underscore + log.info( + f"Error: '{folder}' does not contain an underscore, skipping..." + ) + + if reg_data_dir == "": + reg_factor = 1 + else: + log.warning( + "Regularisation images are used... Will double the number of steps required..." + ) + reg_factor = 2 + + log.info(f"Regulatization factor: {reg_factor}") + + if max_train_steps == 0: + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + max_train_steps_info = f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" + else: + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + if lr_warmup != 0: + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 + + log.info(f"Total steps: {total_steps}") + + log.info(f"Train batch size: {train_batch_size}") + log.info(f"Gradient accumulation steps: {gradient_accumulation_steps}") + log.info(f"Epoch: {epoch}") + log.info(max_train_steps_info) + log.info(f"lr_warmup_steps = {lr_warmup_steps}") + + accelerate_path = get_executable_path("accelerate") + if accelerate_path == "": + log.error("accelerate not found") + return TRAIN_BUTTON_VISIBLE + + run_cmd = [rf'{accelerate_path}', "launch"] + + run_cmd = AccelerateLaunch.run_cmd( + run_cmd=run_cmd, + dynamo_backend=dynamo_backend, + dynamo_mode=dynamo_mode, + dynamo_use_fullgraph=dynamo_use_fullgraph, + dynamo_use_dynamic=dynamo_use_dynamic, + num_processes=num_processes, + num_machines=num_machines, + multi_gpu=multi_gpu, + gpu_ids=gpu_ids, + main_process_port=main_process_port, + num_cpu_threads_per_process=num_cpu_threads_per_process, + mixed_precision=mixed_precision, + extra_accelerate_launch_args=extra_accelerate_launch_args, + ) + + if sdxl: + run_cmd.append(rf'{scriptdir}/sd-scripts/sdxl_train.py') + else: + run_cmd.append(rf"{scriptdir}/sd-scripts/train_db.py") + + if max_data_loader_n_workers == "" or None: + max_data_loader_n_workers = 0 + else: + max_data_loader_n_workers = int(max_data_loader_n_workers) + + if max_train_steps == "" or None: + max_train_steps = 0 + else: + max_train_steps = int(max_train_steps) + + # def save_huggingface_to_toml(self, toml_file_path: str): + config_toml_data = { + # Update the values in the TOML data + "adaptive_noise_scale": adaptive_noise_scale if not 0 else None, + "async_upload": async_upload, + "bucket_no_upscale": bucket_no_upscale, + "bucket_reso_steps": bucket_reso_steps, + "cache_latents": cache_latents, + "cache_latents_to_disk": cache_latents_to_disk, + "caption_dropout_every_n_epochs": int(caption_dropout_every_n_epochs), + "caption_dropout_rate": caption_dropout_rate, + "caption_extension": caption_extension, + "clip_skip": clip_skip if clip_skip != 0 else None, + "color_aug": color_aug, + "dataset_config": dataset_config, + "debiased_estimation_loss": debiased_estimation_loss, + "dynamo_backend": dynamo_backend, + "enable_bucket": enable_bucket, + "epoch": int(epoch), + "flip_aug": flip_aug, + "full_bf16": full_bf16, + "full_fp16": full_fp16, + "gradient_accumulation_steps": int(gradient_accumulation_steps), + "gradient_checkpointing": gradient_checkpointing, + "huber_c": huber_c, + "huber_schedule": huber_schedule, + "huggingface_path_in_repo": huggingface_path_in_repo, + "huggingface_repo_id": huggingface_repo_id, + "huggingface_repo_type": huggingface_repo_type, + "huggingface_repo_visibility": huggingface_repo_visibility, + "huggingface_token": huggingface_token, + "ip_noise_gamma": ip_noise_gamma if ip_noise_gamma != 0 else None, + "ip_noise_gamma_random_strength": ip_noise_gamma_random_strength, + "keep_tokens": int(keep_tokens), + "learning_rate": learning_rate, # both for sd1.5 and sdxl + "learning_rate_te": ( + learning_rate_te if not sdxl and not 0 else None + ), # only for sd1.5 and not 0 + "learning_rate_te1": ( + learning_rate_te1 if sdxl and not 0 else None + ), # only for sdxl and not 0 + "learning_rate_te2": ( + learning_rate_te2 if sdxl and not 0 else None + ), # only for sdxl and not 0 + "logging_dir": logging_dir, + "log_tracker_config": log_tracker_config, + "log_tracker_name": log_tracker_name, + "log_with": log_with, + "loss_type": loss_type, + "lr_scheduler": lr_scheduler, + "lr_scheduler_args": str(lr_scheduler_args).replace('"', "").split(), + "lr_scheduler_num_cycles": ( + int(lr_scheduler_num_cycles) if lr_scheduler_num_cycles != "" else int(epoch) + ), + "lr_scheduler_power": lr_scheduler_power, + "lr_warmup_steps": lr_warmup_steps, + "masked_loss": masked_loss, + "max_bucket_reso": max_bucket_reso, + "max_timestep": max_timestep if max_timestep != 0 else None, + "max_token_length": int(max_token_length), + "max_train_epochs": int(max_train_epochs) if int(max_train_epochs) != 0 else None, + "max_train_steps": int(max_train_steps) if int(max_train_steps) != 0 else None, + "mem_eff_attn": mem_eff_attn, + "metadata_author": metadata_author, + "metadata_description": metadata_description, + "metadata_license": metadata_license, + "metadata_tags": metadata_tags, + "metadata_title": metadata_title, + "min_bucket_reso": int(min_bucket_reso), + "min_snr_gamma": min_snr_gamma if min_snr_gamma != 0 else None, + "min_timestep": min_timestep if min_timestep != 0 else None, + "mixed_precision": mixed_precision, + "multires_noise_discount": multires_noise_discount, + "multires_noise_iterations": multires_noise_iterations if not 0 else None, + "no_token_padding": no_token_padding, + "noise_offset": noise_offset if not 0 else None, + "noise_offset_random_strength": noise_offset_random_strength, + "noise_offset_type": noise_offset_type, + "optimizer_args": ( + str(optimizer_args).replace('"', "").split() + if optimizer_args != "" + else None + ), + "optimizer_type": optimizer, + "output_dir": output_dir, + "output_name": output_name, + "persistent_data_loader_workers": int(persistent_data_loader_workers), + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "prior_loss_weight": prior_loss_weight, + "random_crop": random_crop, + "reg_data_dir": reg_data_dir, + "resolution": max_resolution, + "resume": resume, + "resume_from_huggingface": resume_from_huggingface, + "sample_every_n_epochs": ( + sample_every_n_epochs if sample_every_n_epochs != 0 else None + ), + "sample_every_n_steps": ( + sample_every_n_steps if sample_every_n_steps != 0 else None + ), + "sample_prompts": create_prompt_file(sample_prompts, output_dir), + "sample_sampler": sample_sampler, + "save_every_n_epochs": ( + save_every_n_epochs if save_every_n_epochs != 0 else None + ), + "save_every_n_steps": save_every_n_steps if save_every_n_steps != 0 else None, + "save_last_n_steps": save_last_n_steps if save_last_n_steps != 0 else None, + "save_last_n_steps_state": ( + save_last_n_steps_state if save_last_n_steps_state != 0 else None + ), + "save_model_as": save_model_as, + "save_precision": save_precision, + "save_state": save_state, + "save_state_on_train_end": save_state_on_train_end, + "save_state_to_huggingface": save_state_to_huggingface, + "scale_v_pred_loss_like_noise_pred": scale_v_pred_loss_like_noise_pred, + "sdpa": True if xformers == "sdpa" else None, + "seed": int(seed) if int(seed) != 0 else None, + "shuffle_caption": shuffle_caption, + "stop_text_encoder_training": ( + stop_text_encoder_training if stop_text_encoder_training != 0 else None + ), + "train_batch_size": train_batch_size, + "train_data_dir": train_data_dir, + "v2": v2, + "v_parameterization": v_parameterization, + "v_pred_like_loss": v_pred_like_loss if v_pred_like_loss != 0 else None, + "vae": vae, + "vae_batch_size": vae_batch_size if vae_batch_size != 0 else None, + "wandb_api_key": wandb_api_key, + "wandb_run_name": wandb_run_name, + "weighted_captions": weighted_captions, + "xformers": True if xformers == "xformers" else None, + } + + # Given dictionary `config_toml_data` + # Remove all values = "" and values = False + config_toml_data = { + key: value + for key, value in config_toml_data.items() + if value not in ["", False, None] + } + + config_toml_data["max_data_loader_n_workers"] = int(max_data_loader_n_workers) + + # Sort the dictionary by keys + config_toml_data = dict(sorted(config_toml_data.items())) + + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + tmpfilename = fr"{output_dir}/config_dreambooth-{formatted_datetime}.toml" + + # Save the updated TOML data back to the file + with open(tmpfilename, "w", encoding="utf-8") as toml_file: + toml.dump(config_toml_data, toml_file) + + if not os.path.exists(toml_file.name): + log.error(f"Failed to write TOML file: {toml_file.name}") + + run_cmd.append(f"--config_file") + run_cmd.append(rf'{tmpfilename}') + + # Initialize a dictionary with always-included keyword arguments + kwargs_for_training = { + "additional_parameters": additional_parameters, + } + + # Pass the dynamically constructed keyword arguments to the function + run_cmd = run_cmd_advanced_training(run_cmd=run_cmd, **kwargs_for_training) + + if print_only: + print_command_and_toml(run_cmd, tmpfilename) + else: + # Saving config file for model + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + # config_dir = os.path.dirname(os.path.dirname(train_data_dir)) + file_path = os.path.join(output_dir, f"{output_name}_{formatted_datetime}.json") + + log.info(f"Saving training config to {file_path}...") + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as", "headless", "print_only"], + ) + + # log.info(run_cmd) + + env = setup_environment() + + # Run the command + + executor.execute_command(run_cmd=run_cmd, env=env) + + train_state_value = time.time() + + return ( + gr.Button(visible=False or headless), + gr.Button(visible=True), + gr.Textbox(value=train_state_value), + ) + + +def dreambooth_tab( + # train_data_dir=gr.Textbox(), + # reg_data_dir=gr.Textbox(), + # output_dir=gr.Textbox(), + # logging_dir=gr.Textbox(), + headless=False, + config: KohyaSSGUIConfig = {}, + use_shell_flag: bool = False, +): + dummy_db_true = gr.Checkbox(value=True, visible=False) + dummy_db_false = gr.Checkbox(value=False, visible=False) + dummy_headless = gr.Checkbox(value=headless, visible=False) + + global use_shell + use_shell = use_shell_flag + + with gr.Tab("Training"), gr.Column(variant="compact"): + gr.Markdown("Train a custom model using kohya dreambooth python code...") + + # Setup Configuration Files Gradio + with gr.Accordion("Configuration", open=False): + configuration = ConfigurationFile(headless=headless, config=config) + + with gr.Accordion("Accelerate launch", open=False), gr.Column(): + accelerate_launch = AccelerateLaunch(config=config) + + with gr.Column(): + source_model = SourceModel(headless=headless, config=config) + + with gr.Accordion("Folders", open=False), gr.Group(): + folders = Folders(headless=headless, config=config) + + with gr.Accordion("Metadata", open=False), gr.Group(): + metadata = MetaData(config=config) + + with gr.Accordion("Dataset Preparation", open=False): + gr.Markdown( + "This section provide Dreambooth tools to help setup your dataset..." + ) + gradio_dreambooth_folder_creation_tab( + train_data_dir_input=source_model.train_data_dir, + reg_data_dir_input=folders.reg_data_dir, + output_dir_input=folders.output_dir, + logging_dir_input=folders.logging_dir, + headless=headless, + config=config, + ) + + gradio_dataset_balancing_tab(headless=headless) + + with gr.Accordion("Parameters", open=False), gr.Column(): + with gr.Accordion("Basic", open="True"): + with gr.Group(elem_id="basic_tab"): + basic_training = BasicTraining( + learning_rate_value=1e-5, + lr_scheduler_value="cosine", + lr_warmup_value=10, + dreambooth=True, + sdxl_checkbox=source_model.sdxl_checkbox, + config=config, + ) + + with gr.Accordion("Advanced", open=False, elem_id="advanced_tab"): + advanced_training = AdvancedTraining(headless=headless, config=config) + advanced_training.color_aug.change( + color_aug_changed, + inputs=[advanced_training.color_aug], + outputs=[basic_training.cache_latents], + ) + + with gr.Accordion("Samples", open=False, elem_id="samples_tab"): + sample = SampleImages(config=config) + + global huggingface + with gr.Accordion("HuggingFace", open=False): + huggingface = HuggingFace(config=config) + + global executor + executor = CommandExecutor(headless=headless) + + with gr.Column(), gr.Group(): + with gr.Row(): + button_print = gr.Button("Print training command") + + # Setup gradio tensorboard buttons + TensorboardManager(headless=headless, logging_dir=folders.logging_dir) + + settings_list = [ + source_model.pretrained_model_name_or_path, + source_model.v2, + source_model.v_parameterization, + source_model.sdxl_checkbox, + folders.logging_dir, + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + source_model.dataset_config, + basic_training.max_resolution, + basic_training.learning_rate, + basic_training.learning_rate_te, + basic_training.learning_rate_te1, + basic_training.learning_rate_te2, + basic_training.lr_scheduler, + basic_training.lr_warmup, + basic_training.train_batch_size, + basic_training.epoch, + basic_training.save_every_n_epochs, + accelerate_launch.mixed_precision, + source_model.save_precision, + basic_training.seed, + accelerate_launch.num_cpu_threads_per_process, + basic_training.cache_latents, + basic_training.cache_latents_to_disk, + basic_training.caption_extension, + basic_training.enable_bucket, + advanced_training.gradient_checkpointing, + advanced_training.full_fp16, + advanced_training.full_bf16, + advanced_training.no_token_padding, + basic_training.stop_text_encoder_training, + basic_training.min_bucket_reso, + basic_training.max_bucket_reso, + advanced_training.xformers, + source_model.save_model_as, + advanced_training.shuffle_caption, + advanced_training.save_state, + advanced_training.save_state_on_train_end, + advanced_training.resume, + advanced_training.prior_loss_weight, + advanced_training.color_aug, + advanced_training.flip_aug, + advanced_training.masked_loss, + advanced_training.clip_skip, + advanced_training.vae, + accelerate_launch.dynamo_backend, + accelerate_launch.dynamo_mode, + accelerate_launch.dynamo_use_fullgraph, + accelerate_launch.dynamo_use_dynamic, + accelerate_launch.extra_accelerate_launch_args, + accelerate_launch.num_processes, + accelerate_launch.num_machines, + accelerate_launch.multi_gpu, + accelerate_launch.gpu_ids, + accelerate_launch.main_process_port, + source_model.output_name, + advanced_training.max_token_length, + basic_training.max_train_epochs, + basic_training.max_train_steps, + advanced_training.max_data_loader_n_workers, + advanced_training.mem_eff_attn, + advanced_training.gradient_accumulation_steps, + source_model.model_list, + advanced_training.keep_tokens, + basic_training.lr_scheduler_num_cycles, + basic_training.lr_scheduler_power, + advanced_training.persistent_data_loader_workers, + advanced_training.bucket_no_upscale, + advanced_training.random_crop, + advanced_training.bucket_reso_steps, + advanced_training.v_pred_like_loss, + advanced_training.caption_dropout_every_n_epochs, + advanced_training.caption_dropout_rate, + basic_training.optimizer, + basic_training.optimizer_args, + basic_training.lr_scheduler_args, + advanced_training.noise_offset_type, + advanced_training.noise_offset, + advanced_training.noise_offset_random_strength, + advanced_training.adaptive_noise_scale, + advanced_training.multires_noise_iterations, + advanced_training.multires_noise_discount, + advanced_training.ip_noise_gamma, + advanced_training.ip_noise_gamma_random_strength, + sample.sample_every_n_steps, + sample.sample_every_n_epochs, + sample.sample_sampler, + sample.sample_prompts, + advanced_training.additional_parameters, + advanced_training.loss_type, + advanced_training.huber_schedule, + advanced_training.huber_c, + advanced_training.vae_batch_size, + advanced_training.min_snr_gamma, + advanced_training.weighted_captions, + advanced_training.save_every_n_steps, + advanced_training.save_last_n_steps, + advanced_training.save_last_n_steps_state, + advanced_training.log_with, + advanced_training.wandb_api_key, + advanced_training.wandb_run_name, + advanced_training.log_tracker_name, + advanced_training.log_tracker_config, + advanced_training.scale_v_pred_loss_like_noise_pred, + advanced_training.min_timestep, + advanced_training.max_timestep, + advanced_training.debiased_estimation_loss, + huggingface.huggingface_repo_id, + huggingface.huggingface_token, + huggingface.huggingface_repo_type, + huggingface.huggingface_repo_visibility, + huggingface.huggingface_path_in_repo, + huggingface.save_state_to_huggingface, + huggingface.resume_from_huggingface, + huggingface.async_upload, + metadata.metadata_author, + metadata.metadata_description, + metadata.metadata_license, + metadata.metadata_tags, + metadata.metadata_title, + ] + + configuration.button_open_config.click( + open_configuration, + inputs=[dummy_db_true, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name] + settings_list, + show_progress=False, + ) + + configuration.button_load_config.click( + open_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name] + settings_list, + show_progress=False, + ) + + configuration.button_save_config.click( + save_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name], + show_progress=False, + ) + + run_state = gr.Textbox(value=train_state_value, visible=False) + + run_state.change( + fn=executor.wait_for_training_to_end, + outputs=[executor.button_run, executor.button_stop_training], + ) + + executor.button_run.click( + train_model, + inputs=[dummy_headless] + [dummy_db_false] + settings_list, + outputs=[executor.button_run, executor.button_stop_training, run_state], + show_progress=False, + ) + + executor.button_stop_training.click( + executor.kill_command, + outputs=[executor.button_run, executor.button_stop_training], + ) + + button_print.click( + train_model, + inputs=[dummy_headless] + [dummy_db_true] + settings_list, + show_progress=False, + ) + + return ( + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + folders.logging_dir, + ) diff --git a/kohya_gui/extract_lora_from_dylora_gui.py b/kohya_gui/extract_lora_from_dylora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..084c482e056dcc6b6323b4989318a1349feb75cc --- /dev/null +++ b/kohya_gui/extract_lora_from_dylora_gui.py @@ -0,0 +1,170 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def extract_dylora( + model, + save_to, + unit, +): + # Check for caption_text_input + if model == "": + log.info("Invalid DyLoRA model file") + return + + # Check if source model exist + if not os.path.isfile(model): + log.info("The provided DyLoRA model is not a file") + return + + if os.path.dirname(save_to) == "": + # only filename given. prepend dir + save_to = os.path.join(os.path.dirname(model), save_to) + if os.path.isdir(save_to): + # only dir name given. set default lora name + save_to = os.path.join(save_to, "lora.safetensors") + if os.path.normpath(model) == os.path.normpath(save_to): + # same path. silently ignore but rename output + path, ext = os.path.splitext(save_to) + save_to = f"{path}_tmp{ext}" + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/extract_lora_from_dylora.py", + "--save_to", + rf"{save_to}", + "--model", + rf"{model}", + "--unit", + str(unit), + ] + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env, shell=False) + + log.info("Done extracting DyLoRA...") + + +### +# Gradio UI +### + + +def gradio_extract_dylora_tab(headless=False): + current_model_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + + with gr.Tab("Extract DyLoRA"): + gr.Markdown("This utility can extract a DyLoRA network from a finetuned model.") + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + with gr.Group(), gr.Row(): + model = gr.Dropdown( + label="DyLoRA model (path to the DyLoRA model to extract from)", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + model, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_model_file.click( + get_file_path, + inputs=[model, lora_ext, lora_ext_name], + outputs=model, + show_progress=False, + ) + + save_to = gr.Dropdown( + label="Save to (path where to save the extracted LoRA model...)", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + save_to, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + unit = gr.Slider( + minimum=1, + maximum=256, + label="Network Dimension (Rank)", + value=1, + step=1, + interactive=True, + ) + + model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=model, + outputs=model, + show_progress=False, + ) + save_to.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=save_to, + outputs=save_to, + show_progress=False, + ) + + extract_button = gr.Button("Extract LoRA model") + + extract_button.click( + extract_dylora, + inputs=[ + model, + save_to, + unit, + ], + show_progress=False, + ) diff --git a/kohya_gui/extract_lora_gui.py b/kohya_gui/extract_lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..861afc492cbebaeef698418c80df370ce73ccf41 --- /dev/null +++ b/kohya_gui/extract_lora_gui.py @@ -0,0 +1,361 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + is_file_writable, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def extract_lora( + model_tuned, + model_org, + save_to, + save_precision, + dim, + v2, + sdxl, + conv_dim, + clamp_quantile, + min_diff, + device, + load_original_model_to, + load_tuned_model_to, + load_precision, +): + # Check for caption_text_input + if model_tuned == "": + log.info("Invalid finetuned model file") + return + + if model_org == "": + log.info("Invalid base model file") + return + + # Check if source model exist + if not os.path.isfile(model_tuned): + log.info("The provided finetuned model is not a file") + return + + if not os.path.isfile(model_org): + log.info("The provided base model is not a file") + return + + if os.path.dirname(save_to) == "": + # only filename given. prepend dir + save_to = os.path.join(os.path.dirname(model_tuned), save_to) + if os.path.isdir(save_to): + # only dir name given. set default lora name + save_to = os.path.join(save_to, "lora.safetensors") + if os.path.normpath(model_tuned) == os.path.normpath(save_to): + # same path. silently ignore but rename output + path, ext = os.path.splitext(save_to) + save_to = f"{path}_tmp{ext}" + + if not is_file_writable(save_to): + return + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/extract_lora_from_models.py", + "--load_precision", + load_precision, + "--save_precision", + save_precision, + "--save_to", + rf"{save_to}", + "--model_org", + rf"{model_org}", + "--model_tuned", + rf"{model_tuned}", + "--dim", + str(dim), + "--device", + device, + "--clamp_quantile", + str(clamp_quantile), + "--min_diff", + str(min_diff), + ] + + if conv_dim > 0: + run_cmd.append("--conv_dim") + run_cmd.append(str(conv_dim)) + + if v2: + run_cmd.append("--v2") + + if sdxl: + run_cmd.append("--sdxl") + run_cmd.append("--load_original_model_to") + run_cmd.append(load_original_model_to) + run_cmd.append("--load_tuned_model_to") + run_cmd.append(load_tuned_model_to) + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + +### +# Gradio UI +### + + +def gradio_extract_lora_tab( + headless=False, +): + current_model_dir = os.path.join(scriptdir, "outputs") + current_model_org_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_org_models(path): + nonlocal current_model_org_dir + current_model_org_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def change_sdxl(sdxl): + return gr.Dropdown(visible=sdxl), gr.Dropdown(visible=sdxl) + + with gr.Tab("Extract LoRA"): + gr.Markdown("This utility can extract a LoRA network from a finetuned model.") + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + model_ext = gr.Textbox(value="*.ckpt *.safetensors", visible=False) + model_ext_name = gr.Textbox(value="Model types", visible=False) + + with gr.Group(), gr.Row(): + model_tuned = gr.Dropdown( + label="Finetuned model (path to the finetuned model to extract)", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + model_tuned, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_model_tuned_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_model_tuned_file.click( + get_file_path, + inputs=[model_tuned, model_ext, model_ext_name], + outputs=model_tuned, + show_progress=False, + ) + load_tuned_model_to = gr.Radio( + label="Load finetuned model to", + choices=["cpu", "cuda", "cuda:0"], + value="cpu", + interactive=True, + scale=1, + info="only for SDXL", + visible=False, + ) + model_org = gr.Dropdown( + label="Stable Diffusion base model (original model: ckpt or safetensors file)", + interactive=True, + choices=[""] + list_org_models(current_model_org_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + model_org, + lambda: None, + lambda: {"choices": list_org_models(current_model_org_dir)}, + "open_folder_small", + ) + button_model_org_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_model_org_file.click( + get_file_path, + inputs=[model_org, model_ext, model_ext_name], + outputs=model_org, + show_progress=False, + ) + load_original_model_to = gr.Dropdown( + label="Load Stable Diffusion base model to", + choices=["cpu", "cuda", "cuda:0"], + value="cpu", + interactive=True, + scale=1, + info="only for SDXL", + visible=False, + ) + with gr.Group(), gr.Row(): + save_to = gr.Dropdown( + label="Save to (path where to save the extracted LoRA model...)", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + scale=2, + ) + create_refresh_button( + save_to, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_save_to = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_save_to.click( + get_saveasfilename_path, + inputs=[save_to, lora_ext, lora_ext_name], + outputs=save_to, + show_progress=False, + ) + save_precision = gr.Radio( + label="Save precision", + choices=["fp16", "bf16", "float"], + value="fp16", + interactive=True, + scale=1, + ) + load_precision = gr.Radio( + label="Load precision", + choices=["fp16", "bf16", "float"], + value="fp16", + interactive=True, + scale=1, + ) + + model_tuned.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=model_tuned, + outputs=model_tuned, + show_progress=False, + ) + model_org.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_org_models(path)), + inputs=model_org, + outputs=model_org, + show_progress=False, + ) + save_to.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=save_to, + outputs=save_to, + show_progress=False, + ) + with gr.Row(): + dim = gr.Slider( + minimum=4, + maximum=1024, + label="Network Dimension (Rank)", + value=128, + step=1, + interactive=True, + ) + conv_dim = gr.Slider( + minimum=0, + maximum=1024, + label="Conv Dimension (Rank)", + value=128, + step=1, + interactive=True, + ) + clamp_quantile = gr.Number( + label="Clamp Quantile", + value=0.99, + minimum=0, + maximum=1, + step=0.001, + interactive=True, + ) + min_diff = gr.Number( + label="Minimum difference", + value=0.01, + minimum=0, + maximum=1, + step=0.001, + interactive=True, + ) + with gr.Row(): + v2 = gr.Checkbox(label="v2", value=False, interactive=True) + sdxl = gr.Checkbox(label="SDXL", value=False, interactive=True) + device = gr.Radio( + label="Device", + choices=[ + "cpu", + "cuda", + ], + value="cuda", + interactive=True, + ) + + sdxl.change( + change_sdxl, + inputs=sdxl, + outputs=[load_tuned_model_to, load_original_model_to], + ) + + extract_button = gr.Button("Extract LoRA model") + + extract_button.click( + extract_lora, + inputs=[ + model_tuned, + model_org, + save_to, + save_precision, + dim, + v2, + sdxl, + conv_dim, + clamp_quantile, + min_diff, + device, + load_original_model_to, + load_tuned_model_to, + load_precision, + ], + show_progress=False, + ) diff --git a/kohya_gui/extract_lycoris_locon_gui.py b/kohya_gui/extract_lycoris_locon_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..2bf9f74078bdd94b60db086892f635725acff5d2 --- /dev/null +++ b/kohya_gui/extract_lycoris_locon_gui.py @@ -0,0 +1,450 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def extract_lycoris_locon( + db_model, + base_model, + output_name, + device, + is_sdxl, + is_v2, + mode, + linear_dim, + conv_dim, + linear_threshold, + conv_threshold, + linear_ratio, + conv_ratio, + linear_quantile, + conv_quantile, + use_sparse_bias, + sparsity, + disable_cp, +): + # Check for caption_text_input + if db_model == "": + log.info("Invalid finetuned model file") + return + + if base_model == "": + log.info("Invalid base model file") + return + + # Check if source model exist + if not os.path.isfile(db_model): + log.info("The provided finetuned model is not a file") + return + + if not os.path.isfile(base_model): + log.info("The provided base model is not a file") + return + + if os.path.dirname(output_name) == "": + # only filename given. prepend dir + output_name = os.path.join(os.path.dirname(db_model), output_name) + if os.path.isdir(output_name): + # only dir name given. set default lora name + output_name = os.path.join(output_name, "lora.safetensors") + if os.path.normpath(db_model) == os.path.normpath(output_name): + # same path. silently ignore but rename output + path, ext = os.path.splitext(output_name) + output_name = f"{path}_tmp{ext}" + + run_cmd = [fr'{PYTHON}', fr'{scriptdir}/tools/lycoris_locon_extract.py'] + + if is_sdxl: + run_cmd.append("--is_sdxl") + if is_v2: + run_cmd.append("--is_v2") + + # Adding required parameters + run_cmd.append("--device") + run_cmd.append(device) + run_cmd.append("--mode") + run_cmd.append(mode) + run_cmd.append("--safetensors") + + # Handling conditional parameters based on mode + if mode == "fixed": + run_cmd.append("--linear_dim") + run_cmd.append(str(linear_dim)) + run_cmd.append("--conv_dim") + run_cmd.append(str(conv_dim)) + elif mode == "threshold": + run_cmd.append("--linear_threshold") + run_cmd.append(str(linear_threshold)) + run_cmd.append("--conv_threshold") + run_cmd.append(str(conv_threshold)) + elif mode == "ratio": + run_cmd.append("--linear_ratio") + run_cmd.append(str(linear_ratio)) + run_cmd.append("--conv_ratio") + run_cmd.append(str(conv_ratio)) + elif mode == "quantile": + run_cmd.append("--linear_quantile") + run_cmd.append(str(linear_quantile)) + run_cmd.append("--conv_quantile") + run_cmd.append(str(conv_quantile)) + + if use_sparse_bias: + run_cmd.append("--use_sparse_bias") + + # Adding additional options + run_cmd.append("--sparsity") + run_cmd.append(str(sparsity)) + + if disable_cp: + run_cmd.append("--disable_cp") + + # Add paths + run_cmd.append(fr"{base_model}") + run_cmd.append(fr"{db_model}") + run_cmd.append(fr"{output_name}") + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + + log.info("Done extracting...") + + +### +# Gradio UI +### +# def update_mode(mode): +# # 'fixed', 'threshold','ratio','quantile' +# if mode == 'fixed': +# return gr.Row(visible=True), gr.Row(visible=False), gr.Row(visible=False), gr.Row(visible=False) +# if mode == 'threshold': +# return gr.Row(visible=False), gr.Row(visible=True), gr.Row(visible=False), gr.Row(visible=False) +# if mode == 'ratio': +# return gr.Row(visible=False), gr.Row(visible=False), gr.Row(visible=True), gr.Row(visible=False) +# if mode == 'threshold': +# return gr.Row(visible=False), gr.Row(visible=False), gr.Row(visible=False), gr.Row(visible=True) + + +def update_mode(mode): + # Create a list of possible mode values + modes = ["fixed", "threshold", "ratio", "quantile"] + + # Initialize an empty list to store visibility updates + updates = [] + + # Iterate through the possible modes + for m in modes: + # Add a visibility update for each mode, setting it to True if the input mode matches the current mode in the loop + updates.append(gr.Row(visible=(mode == m))) + + # Return the visibility updates as a tuple + return tuple(updates) + + +def gradio_extract_lycoris_locon_tab(headless=False): + + current_model_dir = os.path.join(scriptdir, "outputs") + current_base_model_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_base_models(path): + nonlocal current_base_model_dir + current_base_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".safetensors"], all=True)) + + with gr.Tab("Extract LyCORIS LoCon"): + gr.Markdown( + "This utility can extract a LyCORIS LoCon network from a finetuned model." + ) + lora_ext = gr.Textbox( + value="*.safetensors", visible=False + ) # lora_ext = gr.Textbox(value='*.safetensors *.pt', visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + model_ext = gr.Textbox(value="*.safetensors *.ckpt", visible=False) + model_ext_name = gr.Textbox(value="Model types", visible=False) + + with gr.Group(), gr.Row(): + db_model = gr.Dropdown( + label="Finetuned model (path to the finetuned model to extract)", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + db_model, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_db_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_db_model_file.click( + get_file_path, + inputs=[db_model, model_ext, model_ext_name], + outputs=db_model, + show_progress=False, + ) + + base_model = gr.Dropdown( + label="Stable Diffusion base model (original model: ckpt or safetensors file)", + choices=[""] + list_base_models(current_base_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + base_model, + lambda: None, + lambda: {"choices": list_base_models(current_base_model_dir)}, + "open_folder_small", + ) + button_base_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_base_model_file.click( + get_file_path, + inputs=[base_model, model_ext, model_ext_name], + outputs=base_model, + show_progress=False, + ) + with gr.Group(), gr.Row(): + output_name = gr.Dropdown( + label="Save to (path where to save the extracted LoRA model...)", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + scale=2, + ) + create_refresh_button( + output_name, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_output_name = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_output_name.click( + get_saveasfilename_path, + inputs=[output_name, lora_ext, lora_ext_name], + outputs=output_name, + show_progress=False, + ) + device = gr.Radio( + label="Device", + choices=[ + "cpu", + "cuda", + ], + value="cuda", + interactive=True, + scale=2, + ) + + db_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=db_model, + outputs=db_model, + show_progress=False, + ) + base_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_base_models(path)), + inputs=base_model, + outputs=base_model, + show_progress=False, + ) + output_name.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=output_name, + outputs=output_name, + show_progress=False, + ) + + is_sdxl = gr.Checkbox( + label="is SDXL", value=False, interactive=True, scale=1 + ) + + is_v2 = gr.Checkbox(label="is v2", value=False, interactive=True, scale=1) + with gr.Row(): + mode = gr.Radio( + label="Mode", + choices=["fixed", "full", "quantile", "ratio", "threshold"], + value="fixed", + interactive=True, + ) + with gr.Row(visible=True) as fixed: + linear_dim = gr.Slider( + minimum=1, + maximum=1024, + label="Network Dimension", + value=1, + step=1, + interactive=True, + info="network dim for linear layer in fixed mode", + ) + conv_dim = gr.Slider( + minimum=1, + maximum=1024, + label="Conv Dimension", + value=1, + step=1, + interactive=True, + info="network dim for conv layer in fixed mode", + ) + with gr.Row(visible=False) as threshold: + linear_threshold = gr.Slider( + minimum=0, + maximum=1, + label="Linear threshold", + value=0.65, + step=0.01, + interactive=True, + info="The higher the value, the smaller the file. Recommended starting value: 0.65", + ) + conv_threshold = gr.Slider( + minimum=0, + maximum=1, + label="Conv threshold", + value=0.65, + step=0.01, + interactive=True, + info="The higher the value, the smaller the file. Recommended starting value: 0.65", + ) + with gr.Row(visible=False) as ratio: + linear_ratio = gr.Slider( + minimum=0, + maximum=1, + label="Linear ratio", + value=0.75, + step=0.01, + interactive=True, + info="The higher the value, the smaller the file. Recommended starting value: 0.75", + ) + conv_ratio = gr.Slider( + minimum=0, + maximum=1, + label="Conv ratio", + value=0.75, + step=0.01, + interactive=True, + info="The higher the value, the smaller the file. Recommended starting value: 0.75", + ) + with gr.Row(visible=False) as quantile: + linear_quantile = gr.Slider( + minimum=0, + maximum=1, + label="Linear quantile", + value=0.75, + step=0.01, + interactive=True, + info="The higher the value, the larger the file. Recommended starting value: 0.75", + ) + conv_quantile = gr.Slider( + minimum=0, + maximum=1, + label="Conv quantile", + value=0.75, + step=0.01, + interactive=True, + info="The higher the value, the larger the file. Recommended starting value: 0.75", + ) + with gr.Row(): + use_sparse_bias = gr.Checkbox( + label="Use sparse biais", value=False, interactive=True + ) + sparsity = gr.Slider( + minimum=0, + maximum=1, + label="Sparsity", + info="Sparsity for sparse bias", + value=0.98, + step=0.01, + interactive=True, + ) + disable_cp = gr.Checkbox( + label="Disable CP decomposition", value=False, interactive=True + ) + mode.change( + update_mode, + inputs=[mode], + outputs=[ + fixed, + threshold, + ratio, + quantile, + ], + ) + + extract_button = gr.Button("Extract LyCORIS LoCon") + + extract_button.click( + extract_lycoris_locon, + inputs=[ + db_model, + base_model, + output_name, + device, + is_sdxl, + is_v2, + mode, + linear_dim, + conv_dim, + linear_threshold, + conv_threshold, + linear_ratio, + conv_ratio, + linear_quantile, + conv_quantile, + use_sparse_bias, + sparsity, + disable_cp, + ], + show_progress=False, + ) diff --git a/kohya_gui/finetune_gui.py b/kohya_gui/finetune_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..bb2f25c0614d4c067e3a72e7fac34c9052bd2d3c --- /dev/null +++ b/kohya_gui/finetune_gui.py @@ -0,0 +1,1352 @@ +import gradio as gr +import json +import math +import os +import subprocess +import time +import sys +import toml +from datetime import datetime +from .common_gui import ( + check_if_model_exist, + color_aug_changed, + get_executable_path, + get_file_path, + get_saveasfile_path, + print_command_and_toml, + run_cmd_advanced_training, + SaveConfigFile, + scriptdir, + update_my_data, + validate_file_path, validate_folder_path, validate_model_path, + validate_args_setting, setup_environment, +) +from .class_accelerate_launch import AccelerateLaunch +from .class_configuration_file import ConfigurationFile +from .class_source_model import SourceModel +from .class_basic_training import BasicTraining +from .class_advanced_training import AdvancedTraining +from .class_folders import Folders +from .class_sdxl_parameters import SDXLParameters +from .class_command_executor import CommandExecutor +from .class_tensorboard import TensorboardManager +from .class_sample_images import SampleImages, create_prompt_file +from .class_huggingface import HuggingFace +from .class_metadata import MetaData +from .class_gui_config import KohyaSSGUIConfig + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +# Setup command executor +executor = None + +# Setup huggingface +huggingface = None +use_shell = False +train_state_value = time.time() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + +presets_dir = rf"{scriptdir}/presets" + + +def save_configuration( + save_as_bool, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl_checkbox, + train_dir, + image_folder, + output_dir, + dataset_config, + logging_dir, + max_resolution, + min_bucket_reso, + max_bucket_reso, + batch_size, + flip_aug, + masked_loss, + caption_metadata_filename, + latent_metadata_filename, + full_path, + learning_rate, + lr_scheduler, + lr_warmup, + dataset_repeats, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + train_text_encoder, + full_bf16, + create_caption, + create_buckets, + save_model_as, + caption_extension, + # use_8bit_adam, + xformers, + clip_skip, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + save_state, + save_state_on_train_end, + resume, + gradient_checkpointing, + gradient_accumulation_steps, + block_lr, + mem_eff_attn, + shuffle_caption, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + full_fp16, + color_aug, + model_list, + cache_latents, + cache_latents_to_disk, + use_latent_files, + keep_tokens, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + if save_as_bool: + log.info("Save as...") + file_path = get_saveasfile_path(file_path) + else: + log.info("Save...") + if file_path == None or file_path == "": + file_path = get_saveasfile_path(file_path) + + # log.info(file_path) + + if file_path == None or file_path == "": + return original_file_path # In case a file_path was provided and the user decide to cancel the open action + + # Extract the destination directory from the file path + destination_directory = os.path.dirname(file_path) + + # Create the destination directory if it doesn't exist + if not os.path.exists(destination_directory): + os.makedirs(destination_directory) + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as"], + ) + + return file_path + + +def open_configuration( + ask_for_file, + apply_preset, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl_checkbox, + train_dir, + image_folder, + output_dir, + dataset_config, + logging_dir, + max_resolution, + min_bucket_reso, + max_bucket_reso, + batch_size, + flip_aug, + masked_loss, + caption_metadata_filename, + latent_metadata_filename, + full_path, + learning_rate, + lr_scheduler, + lr_warmup, + dataset_repeats, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + train_text_encoder, + full_bf16, + create_caption, + create_buckets, + save_model_as, + caption_extension, + # use_8bit_adam, + xformers, + clip_skip, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + save_state, + save_state_on_train_end, + resume, + gradient_checkpointing, + gradient_accumulation_steps, + block_lr, + mem_eff_attn, + shuffle_caption, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + full_fp16, + color_aug, + model_list, + cache_latents, + cache_latents_to_disk, + use_latent_files, + keep_tokens, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, + training_preset, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + # Check if we are "applying" a preset or a config + if apply_preset: + log.info(f"Applying preset {training_preset}...") + file_path = rf"{presets_dir}/finetune/{training_preset}.json" + else: + # If not applying a preset, set the `training_preset` field to an empty string + # Find the index of the `training_preset` parameter using the `index()` method + training_preset_index = parameters.index(("training_preset", training_preset)) + + # Update the value of `training_preset` by directly assigning an empty string value + parameters[training_preset_index] = ("training_preset", "") + + original_file_path = file_path + + if ask_for_file: + file_path = get_file_path(file_path) + + if not file_path == "" and not file_path == None: + # load variables from JSON file + with open(file_path, "r", encoding="utf-8") as f: + my_data = json.load(f) + log.info("Loading config...") + # Update values to fix deprecated use_8bit_adam checkbox and set appropriate optimizer if it is set to True + my_data = update_my_data(my_data) + else: + file_path = original_file_path # In case a file_path was provided and the user decide to cancel the open action + my_data = {} + + values = [file_path] + for key, value in parameters: + json_value = my_data.get(key) + # Set the value in the dictionary to the corresponding value in `my_data`, or the default value if not found + if not key in ["ask_for_file", "apply_preset", "file_path"]: + values.append(json_value if json_value is not None else value) + return tuple(values) + + +def train_model( + headless, + print_only, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl_checkbox, + train_dir, + image_folder, + output_dir, + dataset_config, + logging_dir, + max_resolution, + min_bucket_reso, + max_bucket_reso, + batch_size, + flip_aug, + masked_loss, + caption_metadata_filename, + latent_metadata_filename, + full_path, + learning_rate, + lr_scheduler, + lr_warmup, + dataset_repeats, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + learning_rate_te, + learning_rate_te1, + learning_rate_te2, + train_text_encoder, + full_bf16, + generate_caption_database, + generate_image_buckets, + save_model_as, + caption_extension, + # use_8bit_adam, + xformers, + clip_skip, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + save_state, + save_state_on_train_end, + resume, + gradient_checkpointing, + gradient_accumulation_steps, + block_lr, + mem_eff_attn, + shuffle_caption, + output_name, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + full_fp16, + color_aug, + model_list, # Keep this. Yes, it is unused here but required given the common list used + cache_latents, + cache_latents_to_disk, + use_latent_files, + keep_tokens, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + weighted_captions, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + min_timestep, + max_timestep, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + global train_state_value + + TRAIN_BUTTON_VISIBLE = [ + gr.Button(visible=True), + gr.Button(visible=False or headless), + gr.Textbox(value=train_state_value), + ] + + if executor.is_running(): + log.error("Training is already running. Can't start another training session.") + return TRAIN_BUTTON_VISIBLE + + log.debug(f"headless = {headless} ; print_only = {print_only}") + + log.info(f"Start Finetuning...") + + log.info(f"Validating lr scheduler arguments...") + if not validate_args_setting(lr_scheduler_args): + return + + log.info(f"Validating optimizer arguments...") + if not validate_args_setting(optimizer_args): + return + + if train_dir != "" and not os.path.exists(train_dir): + os.mkdir(train_dir) + + # + # Validate paths + # + + if not validate_file_path(dataset_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(image_folder): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(log_tracker_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(logging_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(output_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(pretrained_model_name_or_path): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(resume): + return TRAIN_BUTTON_VISIBLE + + # + # End of path validation + # + + # if not validate_paths( + # dataset_config=dataset_config, + # finetune_image_folder=image_folder, + # headless=headless, + # log_tracker_config=log_tracker_config, + # logging_dir=logging_dir, + # output_dir=output_dir, + # pretrained_model_name_or_path=pretrained_model_name_or_path, + # resume=resume, + # ): + # return TRAIN_BUTTON_VISIBLE + + if not print_only and check_if_model_exist( + output_name, output_dir, save_model_as, headless + ): + return TRAIN_BUTTON_VISIBLE + + if dataset_config: + log.info( + "Dataset config toml file used, skipping caption json file, image buckets, total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps creation..." + ) + + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + else: + # create caption json file + if generate_caption_database: + # Define the command components + run_cmd = [ + PYTHON, + rf"{scriptdir}/sd-scripts/finetune/merge_captions_to_metadata.py", + ] + + # Add the caption extension + run_cmd.append("--caption_extension") + if caption_extension == "": + run_cmd.append(".caption") # Default extension + else: + run_cmd.append(caption_extension) + + # Add paths for the image folder and the caption metadata file + run_cmd.append(image_folder) + run_cmd.append(os.path.join(train_dir, caption_metadata_filename)) + + # Include the full path flag if specified + if full_path: + run_cmd.append("--full_path") + + # Log the built command + log.info(" ".join(run_cmd)) + + # Prepare environment variables + env = setup_environment() + + # create images buckets + if generate_image_buckets: + # Build the command to run the preparation script + run_cmd = [ + PYTHON, + rf"{scriptdir}/sd-scripts/finetune/prepare_buckets_latents.py", + image_folder, + os.path.join(train_dir, caption_metadata_filename), + os.path.join(train_dir, latent_metadata_filename), + pretrained_model_name_or_path, + "--batch_size", + str(batch_size), + "--max_resolution", + str(max_resolution), + "--min_bucket_reso", + str(min_bucket_reso), + "--max_bucket_reso", + str(max_bucket_reso), + "--mixed_precision", + str(mixed_precision), + ] + + # Conditional flags + if full_path: + run_cmd.append("--full_path") + if sdxl_checkbox and sdxl_no_half_vae: + log.info( + "Using mixed_precision = no because no half vae is selected..." + ) + # Ensure 'no' is correctly handled without extra quotes that might be interpreted literally in command line + run_cmd.append("--mixed_precision=no") + + # Log the complete command as a string for clarity + log.info(" ".join(run_cmd)) + + # Copy and modify environment variables + env = setup_environment() + + # Execute the command if not just for printing + if not print_only: + subprocess.run(run_cmd, env=env) + + if image_folder == "": + log.error("Image folder dir is empty") + return TRAIN_BUTTON_VISIBLE + + image_num = len( + [ + f + for f, lower_f in ( + (file, file.lower()) for file in os.listdir(image_folder) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) + log.info(f"image_num = {image_num}") + + repeats = int(image_num) * int(dataset_repeats) + log.info(f"repeats = {str(repeats)}") + + if max_train_steps == 0: + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(repeats) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + ) + ) + + # Divide by two because flip augmentation create two copied of the source images + if flip_aug and max_train_steps: + max_train_steps = int(math.ceil(float(max_train_steps) / 2)) + + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + log.info(max_train_steps_info) + + if max_train_steps != 0: + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 + log.info(f"lr_warmup_steps = {lr_warmup_steps}") + + accelerate_path = get_executable_path("accelerate") + if accelerate_path == "": + log.error("accelerate not found") + return TRAIN_BUTTON_VISIBLE + + run_cmd = [rf'{accelerate_path}', "launch"] + + run_cmd = AccelerateLaunch.run_cmd( + run_cmd=run_cmd, + dynamo_backend=dynamo_backend, + dynamo_mode=dynamo_mode, + dynamo_use_fullgraph=dynamo_use_fullgraph, + dynamo_use_dynamic=dynamo_use_dynamic, + num_processes=num_processes, + num_machines=num_machines, + multi_gpu=multi_gpu, + gpu_ids=gpu_ids, + main_process_port=main_process_port, + num_cpu_threads_per_process=num_cpu_threads_per_process, + mixed_precision=mixed_precision, + extra_accelerate_launch_args=extra_accelerate_launch_args, + ) + + if sdxl_checkbox: + run_cmd.append(rf"{scriptdir}/sd-scripts/sdxl_train.py") + else: + run_cmd.append(rf"{scriptdir}/sd-scripts/fine_tune.py") + + in_json = ( + f"{train_dir}/{latent_metadata_filename}" + if use_latent_files == "Yes" + else f"{train_dir}/{caption_metadata_filename}" + ) + cache_text_encoder_outputs = sdxl_checkbox and sdxl_cache_text_encoder_outputs + no_half_vae = sdxl_checkbox and sdxl_no_half_vae + + if max_data_loader_n_workers == "" or None: + max_data_loader_n_workers = 0 + else: + max_data_loader_n_workers = int(max_data_loader_n_workers) + + if max_train_steps == "" or None: + max_train_steps = 0 + else: + max_train_steps = int(max_train_steps) + + config_toml_data = { + # Update the values in the TOML data + "adaptive_noise_scale": ( + adaptive_noise_scale if adaptive_noise_scale != 0 else None + ), + "async_upload": async_upload, + "block_lr": block_lr, + "bucket_no_upscale": bucket_no_upscale, + "bucket_reso_steps": bucket_reso_steps, + "cache_latents": cache_latents, + "cache_latents_to_disk": cache_latents_to_disk, + "cache_text_encoder_outputs": cache_text_encoder_outputs, + "caption_dropout_every_n_epochs": int(caption_dropout_every_n_epochs), + "caption_dropout_rate": caption_dropout_rate, + "caption_extension": caption_extension, + "clip_skip": clip_skip if clip_skip != 0 else None, + "color_aug": color_aug, + "dataset_config": dataset_config, + "dataset_repeats": int(dataset_repeats), + "debiased_estimation_loss": debiased_estimation_loss, + "dynamo_backend": dynamo_backend, + "enable_bucket": True, + "flip_aug": flip_aug, + "full_bf16": full_bf16, + "full_fp16": full_fp16, + "gradient_accumulation_steps": int(gradient_accumulation_steps), + "gradient_checkpointing": gradient_checkpointing, + "huber_c": huber_c, + "huber_schedule": huber_schedule, + "huggingface_repo_id": huggingface_repo_id, + "huggingface_token": huggingface_token, + "huggingface_repo_type": huggingface_repo_type, + "huggingface_repo_visibility": huggingface_repo_visibility, + "huggingface_path_in_repo": huggingface_path_in_repo, + "in_json": in_json, + "ip_noise_gamma": ip_noise_gamma if ip_noise_gamma != 0 else None, + "ip_noise_gamma_random_strength": ip_noise_gamma_random_strength, + "keep_tokens": int(keep_tokens), + "learning_rate": learning_rate, # both for sd1.5 and sdxl + "learning_rate_te": ( + learning_rate_te if not sdxl_checkbox else None + ), # only for sd1.5 + "learning_rate_te1": ( + learning_rate_te1 if sdxl_checkbox else None + ), # only for sdxl + "learning_rate_te2": ( + learning_rate_te2 if sdxl_checkbox else None + ), # only for sdxl + "logging_dir": logging_dir, + "log_tracker_name": log_tracker_name, + "log_tracker_config": log_tracker_config, + "loss_type": loss_type, + "lr_scheduler": lr_scheduler, + "lr_scheduler_args": str(lr_scheduler_args).replace('"', "").split(), + "lr_warmup_steps": lr_warmup_steps, + "masked_loss": masked_loss, + "max_bucket_reso": int(max_bucket_reso), + "max_timestep": max_timestep if max_timestep != 0 else None, + "max_token_length": int(max_token_length), + "max_train_epochs": ( + int(max_train_epochs) if int(max_train_epochs) != 0 else None + ), + "max_train_steps": int(max_train_steps) if int(max_train_steps) != 0 else None, + "mem_eff_attn": mem_eff_attn, + "metadata_author": metadata_author, + "metadata_description": metadata_description, + "metadata_license": metadata_license, + "metadata_tags": metadata_tags, + "metadata_title": metadata_title, + "min_bucket_reso": int(min_bucket_reso), + "min_snr_gamma": min_snr_gamma if min_snr_gamma != 0 else None, + "min_timestep": min_timestep if min_timestep != 0 else None, + "mixed_precision": mixed_precision, + "multires_noise_discount": multires_noise_discount, + "multires_noise_iterations": ( + multires_noise_iterations if multires_noise_iterations != 0 else None + ), + "no_half_vae": no_half_vae, + "noise_offset": noise_offset if noise_offset != 0 else None, + "noise_offset_random_strength": noise_offset_random_strength, + "noise_offset_type": noise_offset_type, + "optimizer_type": optimizer, + "optimizer_args": str(optimizer_args).replace('"', "").split(), + "output_dir": output_dir, + "output_name": output_name, + "persistent_data_loader_workers": int(persistent_data_loader_workers), + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "random_crop": random_crop, + "resolution": max_resolution, + "resume": resume, + "resume_from_huggingface": resume_from_huggingface, + "sample_every_n_epochs": ( + sample_every_n_epochs if sample_every_n_epochs != 0 else None + ), + "sample_every_n_steps": ( + sample_every_n_steps if sample_every_n_steps != 0 else None + ), + "sample_prompts": create_prompt_file(sample_prompts, output_dir), + "sample_sampler": sample_sampler, + "save_every_n_epochs": ( + save_every_n_epochs if save_every_n_epochs != 0 else None + ), + "save_every_n_steps": save_every_n_steps if save_every_n_steps != 0 else None, + "save_last_n_steps": save_last_n_steps if save_last_n_steps != 0 else None, + "save_last_n_steps_state": ( + save_last_n_steps_state if save_last_n_steps_state != 0 else None + ), + "save_model_as": save_model_as, + "save_precision": save_precision, + "save_state": save_state, + "save_state_on_train_end": save_state_on_train_end, + "save_state_to_huggingface": save_state_to_huggingface, + "scale_v_pred_loss_like_noise_pred": scale_v_pred_loss_like_noise_pred, + "sdpa": True if xformers == "sdpa" else None, + "seed": int(seed) if int(seed) != 0 else None, + "shuffle_caption": shuffle_caption, + "train_batch_size": train_batch_size, + "train_data_dir": image_folder, + "train_text_encoder": train_text_encoder, + "log_with": log_with, + "v2": v2, + "v_parameterization": v_parameterization, + "v_pred_like_loss": v_pred_like_loss if v_pred_like_loss != 0 else None, + "vae_batch_size": vae_batch_size if vae_batch_size != 0 else None, + "wandb_api_key": wandb_api_key, + "wandb_run_name": wandb_run_name, + "weighted_captions": weighted_captions, + "xformers": True if xformers == "xformers" else None, + } + + # Given dictionary `config_toml_data` + # Remove all values = "" + config_toml_data = { + key: value + for key, value in config_toml_data.items() + if value not in ["", False, None] + } + + config_toml_data["max_data_loader_n_workers"] = int(max_data_loader_n_workers) + + # Sort the dictionary by keys + config_toml_data = dict(sorted(config_toml_data.items())) + + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + tmpfilename = fr"{output_dir}/config_finetune-{formatted_datetime}.toml" + # Save the updated TOML data back to the file + with open(tmpfilename, "w", encoding="utf-8") as toml_file: + toml.dump(config_toml_data, toml_file) + + if not os.path.exists(toml_file.name): + log.error(f"Failed to write TOML file: {toml_file.name}") + + run_cmd.append("--config_file") + run_cmd.append(rf"{tmpfilename}") + + # Initialize a dictionary with always-included keyword arguments + kwargs_for_training = { + "additional_parameters": additional_parameters, + } + + # Pass the dynamically constructed keyword arguments to the function + run_cmd = run_cmd_advanced_training(run_cmd=run_cmd, **kwargs_for_training) + + if print_only: + print_command_and_toml(run_cmd, tmpfilename) + else: + # Saving config file for model + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + # config_dir = os.path.dirname(os.path.dirname(train_data_dir)) + file_path = os.path.join(output_dir, f"{output_name}_{formatted_datetime}.json") + + log.info(f"Saving training config to {file_path}...") + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as", "headless", "print_only"], + ) + + # log.info(run_cmd) + + env = setup_environment() + + # Run the command + executor.execute_command(run_cmd=run_cmd, env=env) + + train_state_value = time.time() + + return ( + gr.Button(visible=False or headless), + gr.Button(visible=True), + gr.Textbox(value=train_state_value), + ) + + +def finetune_tab( + headless=False, + config: KohyaSSGUIConfig = {}, + use_shell_flag: bool = False, +): + dummy_db_true = gr.Checkbox(value=True, visible=False) + dummy_db_false = gr.Checkbox(value=False, visible=False) + dummy_headless = gr.Checkbox(value=headless, visible=False) + + global use_shell + use_shell = use_shell_flag + + with gr.Tab("Training"), gr.Column(variant="compact"): + gr.Markdown("Train a custom model using kohya finetune python code...") + + # Setup Configuration Files Gradio + with gr.Accordion("Configuration", open=False): + configuration = ConfigurationFile(headless=headless, config=config) + + with gr.Accordion("Accelerate launch", open=False), gr.Column(): + accelerate_launch = AccelerateLaunch(config=config) + + with gr.Column(): + source_model = SourceModel( + headless=headless, finetuning=True, config=config + ) + image_folder = source_model.train_data_dir + output_name = source_model.output_name + + with gr.Accordion("Folders", open=False), gr.Group(): + folders = Folders(headless=headless, finetune=True, config=config) + output_dir = folders.output_dir + logging_dir = folders.logging_dir + train_dir = folders.reg_data_dir + + with gr.Accordion("Metadata", open=False), gr.Group(): + metadata = MetaData(config=config) + + with gr.Accordion("Dataset Preparation", open=False): + with gr.Row(): + max_resolution = gr.Textbox( + label="Resolution (width,height)", value="512,512" + ) + min_bucket_reso = gr.Textbox(label="Min bucket resolution", value="256") + max_bucket_reso = gr.Textbox( + label="Max bucket resolution", value="1024" + ) + batch_size = gr.Textbox(label="Batch size", value="1") + with gr.Row(): + create_caption = gr.Checkbox( + label="Generate caption metadata", value=True + ) + create_buckets = gr.Checkbox( + label="Generate image buckets metadata", value=True + ) + use_latent_files = gr.Dropdown( + label="Use latent files", + choices=[ + "No", + "Yes", + ], + value="Yes", + ) + with gr.Accordion("Advanced parameters", open=False): + with gr.Row(): + caption_metadata_filename = gr.Textbox( + label="Caption metadata filename", + value="meta_cap.json", + ) + latent_metadata_filename = gr.Textbox( + label="Latent metadata filename", value="meta_lat.json" + ) + with gr.Row(): + full_path = gr.Checkbox(label="Use full path", value=True) + weighted_captions = gr.Checkbox( + label="Weighted captions", value=False + ) + + with gr.Accordion("Parameters", open=False), gr.Column(): + + def list_presets(path): + json_files = [] + + for file in os.listdir(path): + if file.endswith(".json"): + json_files.append(os.path.splitext(file)[0]) + + user_presets_path = os.path.join(path, "user_presets") + if os.path.isdir(user_presets_path): + for file in os.listdir(user_presets_path): + if file.endswith(".json"): + preset_name = os.path.splitext(file)[0] + json_files.append(os.path.join("user_presets", preset_name)) + + return json_files + + training_preset = gr.Dropdown( + label="Presets", + choices=["none"] + list_presets(f"{presets_dir}/finetune"), + # elem_id="myDropdown", + value="none", + ) + + with gr.Accordion("Basic", open="True"): + with gr.Group(elem_id="basic_tab"): + basic_training = BasicTraining( + learning_rate_value=1e-5, + finetuning=True, + sdxl_checkbox=source_model.sdxl_checkbox, + config=config, + ) + + # Add SDXL Parameters + sdxl_params = SDXLParameters( + source_model.sdxl_checkbox, config=config + ) + + with gr.Row(): + dataset_repeats = gr.Textbox(label="Dataset repeats", value=40) + train_text_encoder = gr.Checkbox( + label="Train text encoder", value=True + ) + + with gr.Accordion("Advanced", open=False, elem_id="advanced_tab"): + with gr.Row(): + gradient_accumulation_steps = gr.Slider( + label="Gradient accumulate steps", + info="Number of updates steps to accumulate before performing a backward/update pass", + value=config.get("advanced.gradient_accumulation_steps", 1), + minimum=1, + maximum=120, + step=1, + ) + block_lr = gr.Textbox( + label="Block LR (SDXL)", + placeholder="(Optional)", + info="Specify the different learning rates for each U-Net block. Specify 23 values separated by commas like 1e-3,1e-3 ... 1e-3", + ) + advanced_training = AdvancedTraining( + headless=headless, finetuning=True, config=config + ) + advanced_training.color_aug.change( + color_aug_changed, + inputs=[advanced_training.color_aug], + outputs=[ + basic_training.cache_latents + ], # Not applicable to fine_tune.py + ) + + with gr.Accordion("Samples", open=False, elem_id="samples_tab"): + sample = SampleImages(config=config) + + global huggingface + with gr.Accordion("HuggingFace", open=False): + huggingface = HuggingFace(config=config) + + global executor + executor = CommandExecutor(headless=headless) + + with gr.Column(), gr.Group(): + with gr.Row(): + button_print = gr.Button("Print training command") + + TensorboardManager(headless=headless, logging_dir=folders.logging_dir) + + settings_list = [ + source_model.pretrained_model_name_or_path, + source_model.v2, + source_model.v_parameterization, + source_model.sdxl_checkbox, + train_dir, + image_folder, + output_dir, + source_model.dataset_config, + logging_dir, + max_resolution, + min_bucket_reso, + max_bucket_reso, + batch_size, + advanced_training.flip_aug, + advanced_training.masked_loss, + caption_metadata_filename, + latent_metadata_filename, + full_path, + basic_training.learning_rate, + basic_training.lr_scheduler, + basic_training.lr_warmup, + dataset_repeats, + basic_training.train_batch_size, + basic_training.epoch, + basic_training.save_every_n_epochs, + accelerate_launch.mixed_precision, + source_model.save_precision, + basic_training.seed, + accelerate_launch.num_cpu_threads_per_process, + basic_training.learning_rate_te, + basic_training.learning_rate_te1, + basic_training.learning_rate_te2, + train_text_encoder, + advanced_training.full_bf16, + create_caption, + create_buckets, + source_model.save_model_as, + basic_training.caption_extension, + advanced_training.xformers, + advanced_training.clip_skip, + accelerate_launch.dynamo_backend, + accelerate_launch.dynamo_mode, + accelerate_launch.dynamo_use_fullgraph, + accelerate_launch.dynamo_use_dynamic, + accelerate_launch.extra_accelerate_launch_args, + accelerate_launch.num_processes, + accelerate_launch.num_machines, + accelerate_launch.multi_gpu, + accelerate_launch.gpu_ids, + accelerate_launch.main_process_port, + advanced_training.save_state, + advanced_training.save_state_on_train_end, + advanced_training.resume, + advanced_training.gradient_checkpointing, + gradient_accumulation_steps, + block_lr, + advanced_training.mem_eff_attn, + advanced_training.shuffle_caption, + output_name, + advanced_training.max_token_length, + basic_training.max_train_epochs, + basic_training.max_train_steps, + advanced_training.max_data_loader_n_workers, + advanced_training.full_fp16, + advanced_training.color_aug, + source_model.model_list, + basic_training.cache_latents, + basic_training.cache_latents_to_disk, + use_latent_files, + advanced_training.keep_tokens, + advanced_training.persistent_data_loader_workers, + advanced_training.bucket_no_upscale, + advanced_training.random_crop, + advanced_training.bucket_reso_steps, + advanced_training.v_pred_like_loss, + advanced_training.caption_dropout_every_n_epochs, + advanced_training.caption_dropout_rate, + basic_training.optimizer, + basic_training.optimizer_args, + basic_training.lr_scheduler_args, + advanced_training.noise_offset_type, + advanced_training.noise_offset, + advanced_training.noise_offset_random_strength, + advanced_training.adaptive_noise_scale, + advanced_training.multires_noise_iterations, + advanced_training.multires_noise_discount, + advanced_training.ip_noise_gamma, + advanced_training.ip_noise_gamma_random_strength, + sample.sample_every_n_steps, + sample.sample_every_n_epochs, + sample.sample_sampler, + sample.sample_prompts, + advanced_training.additional_parameters, + advanced_training.loss_type, + advanced_training.huber_schedule, + advanced_training.huber_c, + advanced_training.vae_batch_size, + advanced_training.min_snr_gamma, + weighted_captions, + advanced_training.save_every_n_steps, + advanced_training.save_last_n_steps, + advanced_training.save_last_n_steps_state, + advanced_training.log_with, + advanced_training.wandb_api_key, + advanced_training.wandb_run_name, + advanced_training.log_tracker_name, + advanced_training.log_tracker_config, + advanced_training.scale_v_pred_loss_like_noise_pred, + sdxl_params.sdxl_cache_text_encoder_outputs, + sdxl_params.sdxl_no_half_vae, + advanced_training.min_timestep, + advanced_training.max_timestep, + advanced_training.debiased_estimation_loss, + huggingface.huggingface_repo_id, + huggingface.huggingface_token, + huggingface.huggingface_repo_type, + huggingface.huggingface_repo_visibility, + huggingface.huggingface_path_in_repo, + huggingface.save_state_to_huggingface, + huggingface.resume_from_huggingface, + huggingface.async_upload, + metadata.metadata_author, + metadata.metadata_description, + metadata.metadata_license, + metadata.metadata_tags, + metadata.metadata_title, + ] + + configuration.button_open_config.click( + open_configuration, + inputs=[dummy_db_true, dummy_db_false, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[configuration.config_file_name] + + settings_list + + [training_preset], + show_progress=False, + ) + + # config.button_open_config.click( + # open_configuration, + # inputs=[dummy_db_true, dummy_db_false, config.config_file_name] + settings_list, + # outputs=[config.config_file_name] + settings_list, + # show_progress=False, + # ) + + configuration.button_load_config.click( + open_configuration, + inputs=[dummy_db_false, dummy_db_false, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[configuration.config_file_name] + + settings_list + + [training_preset], + show_progress=False, + ) + + training_preset.input( + open_configuration, + inputs=[dummy_db_false, dummy_db_true, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[gr.Textbox(visible=False)] + settings_list + [training_preset], + show_progress=False, + ) + + run_state = gr.Textbox(value=train_state_value, visible=False) + + run_state.change( + fn=executor.wait_for_training_to_end, + outputs=[executor.button_run, executor.button_stop_training], + ) + + executor.button_run.click( + train_model, + inputs=[dummy_headless] + [dummy_db_false] + settings_list, + outputs=[executor.button_run, executor.button_stop_training, run_state], + show_progress=False, + ) + + executor.button_stop_training.click( + executor.kill_command, + outputs=[executor.button_run, executor.button_stop_training], + ) + + button_print.click( + train_model, + inputs=[dummy_headless] + [dummy_db_true] + settings_list, + show_progress=False, + ) + + configuration.button_save_config.click( + save_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name], + show_progress=False, + ) + + # config.button_save_as_config.click( + # save_configuration, + # inputs=[dummy_db_true, config.config_file_name] + settings_list, + # outputs=[config.config_file_name], + # show_progress=False, + # ) + + with gr.Tab("Guides"): + gr.Markdown("This section provide Various Finetuning guides and information...") + top_level_path = rf'"{scriptdir}/docs/Finetuning/top_level.md"' + if os.path.exists(top_level_path): + with open(os.path.join(top_level_path), "r", encoding="utf-8") as file: + guides_top_level = file.read() + "\n" + gr.Markdown(guides_top_level) diff --git a/kohya_gui/git_caption_gui.py b/kohya_gui/git_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..b9ec20fdffc3e54f105e33dbcd3fbceab271443a --- /dev/null +++ b/kohya_gui/git_caption_gui.py @@ -0,0 +1,189 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import get_folder_path, add_pre_postfix, scriptdir, list_dirs, setup_environment + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +PYTHON = sys.executable + + +def caption_images( + train_data_dir, + caption_ext, + batch_size, + max_data_loader_n_workers, + max_length, + model_id, + prefix, + postfix, +): + # Check for images_dir_input + if train_data_dir == "": + log.info("Image folder is missing...") + return + + if caption_ext == "": + log.info("Please provide an extension for the caption files.") + return + + log.info(f"GIT captioning files in {train_data_dir}...") + + run_cmd = [fr"{PYTHON}", fr"{scriptdir}/sd-scripts/finetune/make_captions_by_git.py"] + + # Add --model_id if provided + if model_id != "": + run_cmd.append("--model_id") + run_cmd.append(fr'{model_id}') + + # Add other arguments with their values + run_cmd.append("--batch_size") + run_cmd.append(str(batch_size)) + + run_cmd.append("--max_data_loader_n_workers") + run_cmd.append(str(max_data_loader_n_workers)) + + run_cmd.append("--max_length") + run_cmd.append(str(max_length)) + + # Add --caption_extension if provided + if caption_ext != "": + run_cmd.append("--caption_extension") + run_cmd.append(caption_ext) + + # Add the directory containing the training data + run_cmd.append(fr"{train_data_dir}") + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + + # Add prefix and postfix + add_pre_postfix( + folder=train_data_dir, + caption_file_ext=caption_ext, + prefix=prefix, + postfix=postfix, + ) + + log.info("...captioning done") + + +### +# Gradio UI +### + + +def gradio_git_caption_gui_tab( + headless=False, default_train_dir=None, +): + from .common_gui import create_refresh_button + + default_train_dir = ( + default_train_dir + if default_train_dir is not None + else os.path.join(scriptdir, "data") + ) + current_train_dir = default_train_dir + + def list_train_dirs(path): + nonlocal current_train_dir + current_train_dir = path + return list(list_dirs(path)) + + with gr.Tab("GIT Captioning"): + gr.Markdown( + "This utility will use GIT to caption files for each images in a folder." + ) + with gr.Group(), gr.Row(): + train_data_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[""] + list_train_dirs(default_train_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + train_data_dir, + lambda: None, + lambda: {"choices": list_train_dirs(current_train_dir)}, + "open_folder_small", + ) + button_train_data_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_train_data_dir_input.click( + get_folder_path, + outputs=train_data_dir, + show_progress=False, + ) + with gr.Row(): + caption_ext = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + + prefix = gr.Textbox( + label="Prefix to add to GIT caption", + placeholder="(Optional)", + interactive=True, + ) + + postfix = gr.Textbox( + label="Postfix to add to GIT caption", + placeholder="(Optional)", + interactive=True, + ) + + batch_size = gr.Number(value=1, label="Batch size", interactive=True) + + with gr.Row(): + max_data_loader_n_workers = gr.Number( + value=2, label="Number of workers", interactive=True + ) + max_length = gr.Number(value=75, label="Max length", interactive=True) + model_id = gr.Textbox( + label="Model", + placeholder="(Optional) model id for GIT in Hugging Face", + interactive=True, + ) + + caption_button = gr.Button("Caption images") + + caption_button.click( + caption_images, + inputs=[ + train_data_dir, + caption_ext, + batch_size, + max_data_loader_n_workers, + max_length, + model_id, + prefix, + postfix, + ], + show_progress=False, + ) + + train_data_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_train_dirs(path)), + inputs=train_data_dir, + outputs=train_data_dir, + show_progress=False, + ) diff --git a/kohya_gui/group_images_gui.py b/kohya_gui/group_images_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..b0deafd18555a69ff90f07698933a5330a4923fa --- /dev/null +++ b/kohya_gui/group_images_gui.py @@ -0,0 +1,202 @@ +import gradio as gr +import subprocess +from .common_gui import get_folder_path, scriptdir, list_dirs, setup_environment +import os +import sys + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +PYTHON = sys.executable + + +def group_images( + input_folder, + output_folder, + group_size, + include_subfolders, + do_not_copy_other_files, + generate_captions, + caption_ext, +): + if input_folder == "": + log.info("Input folder is missing...") + return + + if output_folder == "": + log.info("Please provide an output folder.") + return + + log.info(f"Grouping images in {input_folder}...") + + run_cmd = [ + fr"{PYTHON}", + f"{scriptdir}/tools/group_images.py", + fr"{input_folder}", + fr"{output_folder}", + str(group_size), + ] + + if include_subfolders: + run_cmd.append("--include_subfolders") + + if do_not_copy_other_files: + run_cmd.append("--do_not_copy_other_files") + + if generate_captions: + run_cmd.append("--caption") + if caption_ext: + run_cmd.append("--caption_ext") + run_cmd.append(caption_ext) + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + + log.info("...grouping done") + + +def gradio_group_images_gui_tab(headless=False): + from .common_gui import create_refresh_button + + current_input_folder = os.path.join(scriptdir, "data") + current_output_folder = os.path.join(scriptdir, "data") + + def list_input_dirs(path): + nonlocal current_input_folder + current_input_folder = path + return list(list_dirs(path)) + + def list_output_dirs(path): + nonlocal current_output_folder + current_output_folder = path + return list(list_dirs(path)) + + with gr.Tab("Group Images"): + gr.Markdown( + "This utility will group images in a folder based on their aspect ratio." + ) + + with gr.Group(), gr.Row(): + input_folder = gr.Dropdown( + label="Input folder (containing the images to group)", + interactive=True, + choices=[""] + list_input_dirs(current_input_folder), + value="", + allow_custom_value=True, + ) + create_refresh_button( + input_folder, + lambda: None, + lambda: {"choices": list_input_dirs(current_input_folder)}, + "open_folder_small", + ) + button_input_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_input_folder.click( + get_folder_path, + outputs=input_folder, + show_progress=False, + ) + + output_folder = gr.Dropdown( + label="Output folder (where the grouped images will be stored)", + interactive=True, + choices=[""] + list_output_dirs(current_output_folder), + value="", + allow_custom_value=True, + ) + create_refresh_button( + output_folder, + lambda: None, + lambda: {"choices": list_output_dirs(current_output_folder)}, + "open_folder_small", + ) + button_output_folder = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_output_folder.click( + get_folder_path, + outputs=output_folder, + show_progress=False, + ) + + input_folder.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_input_dirs(path)), + inputs=input_folder, + outputs=input_folder, + show_progress=False, + ) + output_folder.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_output_dirs(path)), + inputs=output_folder, + outputs=output_folder, + show_progress=False, + ) + with gr.Row(): + group_size = gr.Slider( + label="Group size", + info="Number of images to group together", + value=4, + minimum=1, + maximum=64, + step=1, + interactive=True, + ) + + include_subfolders = gr.Checkbox( + label="Include Subfolders", + value=False, + info="Include images in subfolders as well", + ) + + do_not_copy_other_files = gr.Checkbox( + label="Do not copy other files", + value=False, + info="Do not copy other files in the input folder to the output folder", + ) + + generate_captions = gr.Checkbox( + label="Generate Captions", + value=False, + info="Generate caption files for the grouped images based on their folder name", + ) + + caption_ext = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + + group_images_button = gr.Button("Group images") + + group_images_button.click( + group_images, + inputs=[ + input_folder, + output_folder, + group_size, + include_subfolders, + do_not_copy_other_files, + generate_captions, + caption_ext, + ], + show_progress=False, + ) diff --git a/kohya_gui/localization.py b/kohya_gui/localization.py new file mode 100644 index 0000000000000000000000000000000000000000..66a0a6b2476b4a4db7a53207ba2c8ea1833dd1b4 --- /dev/null +++ b/kohya_gui/localization.py @@ -0,0 +1,31 @@ +import json +import logging +import os + +localizationMap = {} + + +def load_localizations(): + localizationMap.clear() + dirname = "./localizations" + for file in os.listdir(dirname): + fn, ext = os.path.splitext(file) + if ext.lower() != ".json": + continue + localizationMap[fn] = os.path.join(dirname, file) + + +def load_language_js(language_name: str) -> str: + fn = localizationMap.get(language_name, None) + data = {} + if fn is not None: + try: + with open(fn, "r", encoding="utf-8") as file: + data = json.load(file) + except Exception: + logging.ERROR(f"Error loading localization from {fn}") + + return f"window.localization = {json.dumps(data)}" + + +load_localizations() diff --git a/kohya_gui/localization_ext.py b/kohya_gui/localization_ext.py new file mode 100644 index 0000000000000000000000000000000000000000..2f485f9727a5f4907341aa22c58295d49ec335af --- /dev/null +++ b/kohya_gui/localization_ext.py @@ -0,0 +1,35 @@ +import os +import gradio as gr +import kohya_gui.localization as localization + + +def file_path(fn): + return f"file={os.path.abspath(fn)}?{os.path.getmtime(fn)}" + + +def js_html_str(language): + head = f'\n' + head += ( + f'\n' + ) + head += f'\n' + return head + + +def add_javascript(language): + if language is None: + # print('no language') + return + jsStr = js_html_str(language) + + def template_response(*args, **kwargs): + res = localization.GrRoutesTemplateResponse(*args, **kwargs) + res.body = res.body.replace(b"", f"{jsStr}".encode("utf-8")) + res.init_headers() + return res + + gr.routes.templates.TemplateResponse = template_response + + +if not hasattr(localization, "GrRoutesTemplateResponse"): + localization.GrRoutesTemplateResponse = gr.routes.templates.TemplateResponse diff --git a/kohya_gui/lora_gui.py b/kohya_gui/lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..186d1e99934e64c81cd29434b45b03e434be4e80 --- /dev/null +++ b/kohya_gui/lora_gui.py @@ -0,0 +1,2408 @@ +import gradio as gr +import json +import math +import os +import time +import toml + +from datetime import datetime +from .common_gui import ( + check_if_model_exist, + color_aug_changed, + get_any_file_path, + get_executable_path, + get_file_path, + get_saveasfile_path, + output_message, + print_command_and_toml, + run_cmd_advanced_training, + SaveConfigFile, + scriptdir, + update_my_data, + validate_file_path, validate_folder_path, validate_model_path, validate_toml_file, + validate_args_setting, setup_environment, +) +from .class_accelerate_launch import AccelerateLaunch +from .class_configuration_file import ConfigurationFile +from .class_source_model import SourceModel +from .class_basic_training import BasicTraining +from .class_advanced_training import AdvancedTraining +from .class_sdxl_parameters import SDXLParameters +from .class_folders import Folders +from .class_command_executor import CommandExecutor +from .class_tensorboard import TensorboardManager +from .class_sample_images import SampleImages, create_prompt_file +from .class_lora_tab import LoRATools +from .class_huggingface import HuggingFace +from .class_metadata import MetaData +from .class_gui_config import KohyaSSGUIConfig + +from .dreambooth_folder_creation_gui import ( + gradio_dreambooth_folder_creation_tab, +) +from .dataset_balancing_gui import gradio_dataset_balancing_tab + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +# Setup command executor +executor = None + +# Setup huggingface +huggingface = None +use_shell = False +train_state_value = time.time() + +document_symbol = "\U0001F4C4" # 📄 + + +presets_dir = rf"{scriptdir}/presets" + +LYCORIS_PRESETS_CHOICES = [ + "attn-mlp", + "attn-only", + "full", + "full-lin", + "unet-transformer-only", + "unet-convblock-only", +] + + +def save_configuration( + save_as_bool, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + fp8_base, + full_fp16, + # no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + text_encoder_lr, + unet_lr, + network_dim, + network_weights, + dim_from_weights, + color_aug, + flip_aug, + masked_loss, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + gradient_accumulation_steps, + mem_eff_attn, + output_name, + model_list, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + network_alpha, + training_comment, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + max_grad_norm, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + LoRA_type, + factor, + bypass_mode, + dora_wd, + use_cp, + use_tucker, + use_scalar, + rank_dropout_scale, + constrain, + rescaled, + train_norm, + decompose_both, + train_on_input, + conv_dim, + conv_alpha, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + down_lr_weight, + mid_lr_weight, + up_lr_weight, + block_lr_zero_threshold, + block_dims, + block_alphas, + conv_block_dims, + conv_block_alphas, + weighted_captions, + unit, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + scale_weight_norms, + network_dropout, + rank_dropout, + module_dropout, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + full_bf16, + min_timestep, + max_timestep, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + LyCORIS_preset, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + # If saving as a new file, get the file path for saving + if save_as_bool: + log.info("Save as...") + file_path = get_saveasfile_path(file_path) + # If not saving as a new file, check if a file path was provided + else: + log.info("Save...") + # If no file path was provided, get the file path for saving + if file_path == None or file_path == "": + file_path = get_saveasfile_path(file_path) + + # Log the file path for debugging purposes + log.debug(file_path) + + # If no file path was provided, return the original file path + if file_path == None or file_path == "": + return original_file_path # In case a file_path was provided and the user decide to cancel the open action + + # Extract the destination directory from the file path + destination_directory = os.path.dirname(file_path) + + # Create the destination directory if it doesn't exist + if not os.path.exists(destination_directory): + os.makedirs(destination_directory) + + # Save the configuration file + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as"], + ) + + # Return the file path of the saved configuration + return file_path + + +def open_configuration( + ask_for_file, + apply_preset, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + fp8_base, + full_fp16, + # no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + text_encoder_lr, + unet_lr, + network_dim, + network_weights, + dim_from_weights, + color_aug, + flip_aug, + masked_loss, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + gradient_accumulation_steps, + mem_eff_attn, + output_name, + model_list, + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + network_alpha, + training_comment, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + max_grad_norm, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + LoRA_type, + factor, + bypass_mode, + dora_wd, + use_cp, + use_tucker, + use_scalar, + rank_dropout_scale, + constrain, + rescaled, + train_norm, + decompose_both, + train_on_input, + conv_dim, + conv_alpha, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + down_lr_weight, + mid_lr_weight, + up_lr_weight, + block_lr_zero_threshold, + block_dims, + block_alphas, + conv_block_dims, + conv_block_alphas, + weighted_captions, + unit, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + scale_weight_norms, + network_dropout, + rank_dropout, + module_dropout, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + full_bf16, + min_timestep, + max_timestep, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + LyCORIS_preset, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, + training_preset, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + # Determines if a preset configuration is being applied + if apply_preset: + if training_preset != "none": + log.info(f"Applying preset {training_preset}...") + file_path = rf"{presets_dir}/lora/{training_preset}.json" + else: + # If not applying a preset, set the `training_preset` field to an empty string + # Find the index of the `training_preset` parameter using the `index()` method + training_preset_index = parameters.index(("training_preset", training_preset)) + + # Update the value of `training_preset` by directly assigning an empty string value + parameters[training_preset_index] = ("training_preset", "none") + + # Store the original file path for potential reuse + original_file_path = file_path + + # Request a file path from the user if required + if ask_for_file: + file_path = get_file_path(file_path) + + # Proceed if the file path is valid (not empty or None) + if not file_path == "" and not file_path == None: + # Check if the file exists before opening it + if not os.path.isfile(file_path): + log.error(f"Config file {file_path} does not exist.") + return + + # Load variables from JSON file + with open(file_path, "r", encoding="utf-8") as f: + my_data = json.load(f) + log.info("Loading config...") + + # Update values to fix deprecated options, set appropriate optimizer if it is set to True, etc. + my_data = update_my_data(my_data) + else: + # Reset the file path to the original if the operation was cancelled or invalid + file_path = original_file_path # In case a file_path was provided and the user decides to cancel the open action + my_data = {} # Initialize an empty dict if no data was loaded + + values = [file_path] + # Iterate over parameters to set their values from `my_data` or use default if not found + for key, value in parameters: + if not key in ["ask_for_file", "apply_preset", "file_path"]: + json_value = my_data.get(key) + # Append the value from JSON if present; otherwise, use the parameter's default value + values.append(json_value if json_value is not None else value) + + # Display LoCon parameters based on the 'LoRA_type' from the loaded data + # This section dynamically adjusts visibility of certain parameters in the UI + if my_data.get("LoRA_type", "Standard") in { + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/LoCon", + "LyCORIS/GLoRA", + }: + values.append(gr.Row(visible=True)) + else: + values.append(gr.Row(visible=False)) + + return tuple(values) + + +def train_model( + headless, + print_only, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + fp8_base, + full_fp16, + # no_token_padding, + stop_text_encoder_training_pct, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + text_encoder_lr, + unet_lr, + network_dim, + network_weights, + dim_from_weights, + color_aug, + flip_aug, + masked_loss, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + gradient_accumulation_steps, + mem_eff_attn, + output_name, + model_list, # Keep this. Yes, it is unused here but required given the common list used + max_token_length, + max_train_epochs, + max_train_steps, + max_data_loader_n_workers, + network_alpha, + training_comment, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + max_grad_norm, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + LoRA_type, + factor, + bypass_mode, + dora_wd, + use_cp, + use_tucker, + use_scalar, + rank_dropout_scale, + constrain, + rescaled, + train_norm, + decompose_both, + train_on_input, + conv_dim, + conv_alpha, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + down_lr_weight, + mid_lr_weight, + up_lr_weight, + block_lr_zero_threshold, + block_dims, + block_alphas, + conv_block_dims, + conv_block_alphas, + weighted_captions, + unit, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + scale_weight_norms, + network_dropout, + rank_dropout, + module_dropout, + sdxl_cache_text_encoder_outputs, + sdxl_no_half_vae, + full_bf16, + min_timestep, + max_timestep, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + LyCORIS_preset, + debiased_estimation_loss, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + global train_state_value + + TRAIN_BUTTON_VISIBLE = [ + gr.Button(visible=True), + gr.Button(visible=False or headless), + gr.Textbox(value=train_state_value), + ] + + if executor.is_running(): + log.error("Training is already running. Can't start another training session.") + return TRAIN_BUTTON_VISIBLE + + log.info(f"Start training LoRA {LoRA_type} ...") + + log.info(f"Validating lr scheduler arguments...") + if not validate_args_setting(lr_scheduler_args): + return TRAIN_BUTTON_VISIBLE + + log.info(f"Validating optimizer arguments...") + if not validate_args_setting(optimizer_args): + return TRAIN_BUTTON_VISIBLE + + # + # Validate paths + # + + if not validate_file_path(dataset_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(log_tracker_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(logging_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if LyCORIS_preset not in LYCORIS_PRESETS_CHOICES: + if not validate_toml_file(LyCORIS_preset): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(network_weights): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(output_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(pretrained_model_name_or_path): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(reg_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(resume): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(train_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(vae): + return TRAIN_BUTTON_VISIBLE + + # + # End of path validation + # + + # if not validate_paths( + # dataset_config=dataset_config, + # headless=headless, + # log_tracker_config=log_tracker_config, + # logging_dir=logging_dir, + # network_weights=network_weights, + # output_dir=output_dir, + # pretrained_model_name_or_path=pretrained_model_name_or_path, + # reg_data_dir=reg_data_dir, + # resume=resume, + # train_data_dir=train_data_dir, + # vae=vae, + # ): + # return TRAIN_BUTTON_VISIBLE + + if int(bucket_reso_steps) < 1: + output_message( + msg="Bucket resolution steps need to be greater than 0", + headless=headless, + ) + return TRAIN_BUTTON_VISIBLE + + # if noise_offset == "": + # noise_offset = 0 + + if float(noise_offset) > 1 or float(noise_offset) < 0: + output_message( + msg="Noise offset need to be a value between 0 and 1", + headless=headless, + ) + return TRAIN_BUTTON_VISIBLE + + if output_dir != "": + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + if stop_text_encoder_training_pct > 0: + output_message( + msg='Output "stop text encoder training" is not yet supported. Ignoring', + headless=headless, + ) + stop_text_encoder_training_pct = 0 + + if not print_only and check_if_model_exist( + output_name, output_dir, save_model_as, headless=headless + ): + return TRAIN_BUTTON_VISIBLE + + # If string is empty set string to 0. + # if text_encoder_lr == "": + # text_encoder_lr = 0 + # if unet_lr == "": + # unet_lr = 0 + + if dataset_config: + log.info( + "Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations..." + ) + if max_train_steps > 0: + # calculate stop encoder training + if stop_text_encoder_training_pct == 0: + stop_text_encoder_training = 0 + else: + stop_text_encoder_training = math.ceil( + float(max_train_steps) / 100 * int(stop_text_encoder_training_pct) + ) + + if lr_warmup != 0: + lr_warmup_steps = round( + float(int(lr_warmup) * int(max_train_steps) / 100) + ) + else: + lr_warmup_steps = 0 + else: + stop_text_encoder_training = 0 + lr_warmup_steps = 0 + + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + else: + if train_data_dir == "": + log.error("Train data dir is empty") + return TRAIN_BUTTON_VISIBLE + + # Get a list of all subfolders in train_data_dir + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) + ] + + total_steps = 0 + + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + try: + # Extract the number of repeats from the folder name + repeats = int(folder.split("_")[0]) + log.info(f"Folder {folder}: {repeats} repeats found") + + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) + + log.info(f"Folder {folder}: {num_images} images found") + + # Calculate the total number of steps for this folder + steps = repeats * num_images + + # log.info the result + log.info(f"Folder {folder}: {num_images} * {repeats} = {steps} steps") + + total_steps += steps + + except ValueError: + # Handle the case where the folder name does not contain an underscore + log.info( + f"Error: '{folder}' does not contain an underscore, skipping..." + ) + + if reg_data_dir == "": + reg_factor = 1 + else: + log.warning( + "Regularisation images are used... Will double the number of steps required..." + ) + reg_factor = 2 + + log.info(f"Regulatization factor: {reg_factor}") + + if max_train_steps == 0: + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + max_train_steps_info = f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" + else: + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + # calculate stop encoder training + if stop_text_encoder_training_pct == 0: + stop_text_encoder_training = 0 + else: + stop_text_encoder_training = math.ceil( + float(max_train_steps) / 100 * int(stop_text_encoder_training_pct) + ) + + if lr_warmup != 0: + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 + + log.info(f"Total steps: {total_steps}") + + log.info(f"Train batch size: {train_batch_size}") + log.info(f"Gradient accumulation steps: {gradient_accumulation_steps}") + log.info(f"Epoch: {epoch}") + log.info(max_train_steps_info) + log.info(f"stop_text_encoder_training = {stop_text_encoder_training}") + log.info(f"lr_warmup_steps = {lr_warmup_steps}") + + accelerate_path = get_executable_path("accelerate") + if accelerate_path == "": + log.error("accelerate not found") + return TRAIN_BUTTON_VISIBLE + + run_cmd = [rf'{accelerate_path}', "launch"] + + run_cmd = AccelerateLaunch.run_cmd( + run_cmd=run_cmd, + dynamo_backend=dynamo_backend, + dynamo_mode=dynamo_mode, + dynamo_use_fullgraph=dynamo_use_fullgraph, + dynamo_use_dynamic=dynamo_use_dynamic, + num_processes=num_processes, + num_machines=num_machines, + multi_gpu=multi_gpu, + gpu_ids=gpu_ids, + main_process_port=main_process_port, + num_cpu_threads_per_process=num_cpu_threads_per_process, + mixed_precision=mixed_precision, + extra_accelerate_launch_args=extra_accelerate_launch_args, + ) + + if sdxl: + run_cmd.append(rf"{scriptdir}/sd-scripts/sdxl_train_network.py") + else: + run_cmd.append(rf"{scriptdir}/sd-scripts/train_network.py") + + network_args = "" + + if LoRA_type == "LyCORIS/BOFT": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} module_dropout={module_dropout} use_tucker={use_tucker} use_scalar={use_scalar} rank_dropout={rank_dropout} rank_dropout_scale={rank_dropout_scale} constrain={constrain} rescaled={rescaled} algo=boft train_norm={train_norm}" + + if LoRA_type == "LyCORIS/Diag-OFT": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} module_dropout={module_dropout} use_tucker={use_tucker} use_scalar={use_scalar} rank_dropout={rank_dropout} rank_dropout_scale={rank_dropout_scale} constrain={constrain} rescaled={rescaled} algo=diag-oft train_norm={train_norm}" + + if LoRA_type == "LyCORIS/DyLoRA": + network_module = "lycoris.kohya" + network_args = f' preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} use_tucker={use_tucker} block_size={unit} rank_dropout={rank_dropout} module_dropout={module_dropout} algo="dylora" train_norm={train_norm}' + + if LoRA_type == "LyCORIS/GLoRA": + network_module = "lycoris.kohya" + network_args = f' preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} rank_dropout={rank_dropout} module_dropout={module_dropout} rank_dropout_scale={rank_dropout_scale} algo="glora" train_norm={train_norm}' + + if LoRA_type == "LyCORIS/iA3": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} train_on_input={train_on_input} algo=ia3" + + if LoRA_type == "LoCon" or LoRA_type == "LyCORIS/LoCon": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} rank_dropout={rank_dropout} bypass_mode={bypass_mode} dora_wd={dora_wd} module_dropout={module_dropout} use_tucker={use_tucker} use_scalar={use_scalar} rank_dropout_scale={rank_dropout_scale} algo=locon train_norm={train_norm}" + + if LoRA_type == "LyCORIS/LoHa": + network_module = "lycoris.kohya" + network_args = f' preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} rank_dropout={rank_dropout} bypass_mode={bypass_mode} dora_wd={dora_wd} module_dropout={module_dropout} use_tucker={use_tucker} use_scalar={use_scalar} rank_dropout_scale={rank_dropout_scale} algo="loha" train_norm={train_norm}' + + if LoRA_type == "LyCORIS/LoKr": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} conv_dim={conv_dim} conv_alpha={conv_alpha} rank_dropout={rank_dropout} bypass_mode={bypass_mode} dora_wd={dora_wd} module_dropout={module_dropout} factor={factor} use_cp={use_cp} use_scalar={use_scalar} decompose_both={decompose_both} rank_dropout_scale={rank_dropout_scale} algo=lokr train_norm={train_norm}" + + if LoRA_type == "LyCORIS/Native Fine-Tuning": + network_module = "lycoris.kohya" + network_args = f" preset={LyCORIS_preset} rank_dropout={rank_dropout} module_dropout={module_dropout} use_tucker={use_tucker} use_scalar={use_scalar} rank_dropout_scale={rank_dropout_scale} algo=full train_norm={train_norm}" + + if LoRA_type in ["Kohya LoCon", "Standard"]: + kohya_lora_var_list = [ + "down_lr_weight", + "mid_lr_weight", + "up_lr_weight", + "block_lr_zero_threshold", + "block_dims", + "block_alphas", + "conv_block_dims", + "conv_block_alphas", + "rank_dropout", + "module_dropout", + ] + network_module = "networks.lora" + kohya_lora_vars = { + key: value + for key, value in vars().items() + if key in kohya_lora_var_list and value + } + if LoRA_type == "Kohya LoCon": + network_args += f' conv_dim="{conv_dim}" conv_alpha="{conv_alpha}"' + + for key, value in kohya_lora_vars.items(): + if value: + network_args += f" {key}={value}" + + if LoRA_type in ["LoRA-FA"]: + kohya_lora_var_list = [ + "down_lr_weight", + "mid_lr_weight", + "up_lr_weight", + "block_lr_zero_threshold", + "block_dims", + "block_alphas", + "conv_block_dims", + "conv_block_alphas", + "rank_dropout", + "module_dropout", + ] + + network_module = "networks.lora_fa" + kohya_lora_vars = { + key: value + for key, value in vars().items() + if key in kohya_lora_var_list and value + } + + network_args = "" + if LoRA_type == "Kohya LoCon": + network_args += f' conv_dim="{conv_dim}" conv_alpha="{conv_alpha}"' + + for key, value in kohya_lora_vars.items(): + if value: + network_args += f" {key}={value}" + + if LoRA_type in ["Kohya DyLoRA"]: + kohya_lora_var_list = [ + "conv_dim", + "conv_alpha", + "down_lr_weight", + "mid_lr_weight", + "up_lr_weight", + "block_lr_zero_threshold", + "block_dims", + "block_alphas", + "conv_block_dims", + "conv_block_alphas", + "rank_dropout", + "module_dropout", + "unit", + ] + + network_module = "networks.dylora" + kohya_lora_vars = { + key: value + for key, value in vars().items() + if key in kohya_lora_var_list and value + } + + network_args = "" + + for key, value in kohya_lora_vars.items(): + if value: + network_args += f" {key}={value}" + + # Convert learning rates to float once and store the result for re-use + learning_rate = float(learning_rate) if learning_rate is not None else 0.0 + text_encoder_lr_float = ( + float(text_encoder_lr) if text_encoder_lr is not None else 0.0 + ) + unet_lr_float = float(unet_lr) if unet_lr is not None else 0.0 + + # Determine the training configuration based on learning rate values + # Sets flags for training specific components based on the provided learning rates. + if float(learning_rate) == unet_lr_float == text_encoder_lr_float == 0: + output_message(msg="Please input learning rate values.", headless=headless) + return TRAIN_BUTTON_VISIBLE + # Flag to train text encoder only if its learning rate is non-zero and unet's is zero. + network_train_text_encoder_only = text_encoder_lr_float != 0 and unet_lr_float == 0 + # Flag to train unet only if its learning rate is non-zero and text encoder's is zero. + network_train_unet_only = text_encoder_lr_float == 0 and unet_lr_float != 0 + + config_toml_data = { + "adaptive_noise_scale": ( + adaptive_noise_scale if adaptive_noise_scale != 0 else None + ), + "async_upload": async_upload, + "bucket_no_upscale": bucket_no_upscale, + "bucket_reso_steps": bucket_reso_steps, + "cache_latents": cache_latents, + "cache_latents_to_disk": cache_latents_to_disk, + "cache_text_encoder_outputs": ( + True if sdxl and sdxl_cache_text_encoder_outputs else None + ), + "caption_dropout_every_n_epochs": int(caption_dropout_every_n_epochs), + "caption_dropout_rate": caption_dropout_rate, + "caption_extension": caption_extension, + "clip_skip": clip_skip if clip_skip != 0 else None, + "color_aug": color_aug, + "dataset_config": dataset_config, + "debiased_estimation_loss": debiased_estimation_loss, + "dynamo_backend": dynamo_backend, + "dim_from_weights": dim_from_weights, + "enable_bucket": enable_bucket, + "epoch": int(epoch), + "flip_aug": flip_aug, + "fp8_base": fp8_base, + "full_bf16": full_bf16, + "full_fp16": full_fp16, + "gradient_accumulation_steps": int(gradient_accumulation_steps), + "gradient_checkpointing": gradient_checkpointing, + "huber_c": huber_c, + "huber_schedule": huber_schedule, + "huggingface_repo_id": huggingface_repo_id, + "huggingface_token": huggingface_token, + "huggingface_repo_type": huggingface_repo_type, + "huggingface_repo_visibility": huggingface_repo_visibility, + "huggingface_path_in_repo": huggingface_path_in_repo, + "ip_noise_gamma": ip_noise_gamma if ip_noise_gamma != 0 else None, + "ip_noise_gamma_random_strength": ip_noise_gamma_random_strength, + "keep_tokens": int(keep_tokens), + "learning_rate": learning_rate, + "logging_dir": logging_dir, + "log_tracker_name": log_tracker_name, + "log_tracker_config": log_tracker_config, + "loss_type": loss_type, + "lr_scheduler": lr_scheduler, + "lr_scheduler_args": str(lr_scheduler_args).replace('"', "").split(), + "lr_scheduler_num_cycles": ( + int(lr_scheduler_num_cycles) + if lr_scheduler_num_cycles != "" + else int(epoch) + ), + "lr_scheduler_power": lr_scheduler_power, + "lr_warmup_steps": lr_warmup_steps, + "masked_loss": masked_loss, + "max_bucket_reso": max_bucket_reso, + "max_grad_norm": max_grad_norm, + "max_timestep": max_timestep if max_timestep != 0 else None, + "max_token_length": int(max_token_length), + "max_train_epochs": ( + int(max_train_epochs) if int(max_train_epochs) != 0 else None + ), + "max_train_steps": int(max_train_steps) if int(max_train_steps) != 0 else None, + "mem_eff_attn": mem_eff_attn, + "metadata_author": metadata_author, + "metadata_description": metadata_description, + "metadata_license": metadata_license, + "metadata_tags": metadata_tags, + "metadata_title": metadata_title, + "min_bucket_reso": int(min_bucket_reso), + "min_snr_gamma": min_snr_gamma if min_snr_gamma != 0 else None, + "min_timestep": min_timestep if min_timestep != 0 else None, + "mixed_precision": mixed_precision, + "multires_noise_discount": multires_noise_discount, + "multires_noise_iterations": ( + multires_noise_iterations if multires_noise_iterations != 0 else None + ), + "network_alpha": network_alpha, + "network_args": str(network_args).replace('"', "").split(), + "network_dim": network_dim, + "network_dropout": network_dropout, + "network_module": network_module, + "network_train_unet_only": network_train_unet_only, + "network_train_text_encoder_only": network_train_text_encoder_only, + "network_weights": network_weights, + "no_half_vae": True if sdxl and sdxl_no_half_vae else None, + "noise_offset": noise_offset if noise_offset != 0 else None, + "noise_offset_random_strength": noise_offset_random_strength, + "noise_offset_type": noise_offset_type, + "optimizer_type": optimizer, + "optimizer_args": str(optimizer_args).replace('"', "").split(), + "output_dir": output_dir, + "output_name": output_name, + "persistent_data_loader_workers": int(persistent_data_loader_workers), + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "prior_loss_weight": prior_loss_weight, + "random_crop": random_crop, + "reg_data_dir": reg_data_dir, + "resolution": max_resolution, + "resume": resume, + "resume_from_huggingface": resume_from_huggingface, + "sample_every_n_epochs": ( + sample_every_n_epochs if sample_every_n_epochs != 0 else None + ), + "sample_every_n_steps": ( + sample_every_n_steps if sample_every_n_steps != 0 else None + ), + "sample_prompts": create_prompt_file(sample_prompts, output_dir), + "sample_sampler": sample_sampler, + "save_every_n_epochs": ( + save_every_n_epochs if save_every_n_epochs != 0 else None + ), + "save_every_n_steps": save_every_n_steps if save_every_n_steps != 0 else None, + "save_last_n_steps": save_last_n_steps if save_last_n_steps != 0 else None, + "save_last_n_steps_state": ( + save_last_n_steps_state if save_last_n_steps_state != 0 else None + ), + "save_model_as": save_model_as, + "save_precision": save_precision, + "save_state": save_state, + "save_state_on_train_end": save_state_on_train_end, + "save_state_to_huggingface": save_state_to_huggingface, + "scale_v_pred_loss_like_noise_pred": scale_v_pred_loss_like_noise_pred, + "scale_weight_norms": scale_weight_norms, + "sdpa": True if xformers == "sdpa" else None, + "seed": int(seed) if int(seed) != 0 else None, + "shuffle_caption": shuffle_caption, + "stop_text_encoder_training": ( + stop_text_encoder_training if stop_text_encoder_training != 0 else None + ), + "text_encoder_lr": text_encoder_lr if not 0 else None, + "train_batch_size": train_batch_size, + "train_data_dir": train_data_dir, + "training_comment": training_comment, + "unet_lr": unet_lr if not 0 else None, + "log_with": log_with, + "v2": v2, + "v_parameterization": v_parameterization, + "v_pred_like_loss": v_pred_like_loss if v_pred_like_loss != 0 else None, + "vae": vae, + "vae_batch_size": vae_batch_size if vae_batch_size != 0 else None, + "wandb_api_key": wandb_api_key, + "wandb_run_name": wandb_run_name, + "weighted_captions": weighted_captions, + "xformers": True if xformers == "xformers" else None, + } + + # Given dictionary `config_toml_data` + # Remove all values = "" + config_toml_data = { + key: value + for key, value in config_toml_data.items() + if value not in ["", False, None] + } + + config_toml_data["max_data_loader_n_workers"] = int(max_data_loader_n_workers) + + # Sort the dictionary by keys + config_toml_data = dict(sorted(config_toml_data.items())) + + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + tmpfilename = fr"{output_dir}/config_lora-{formatted_datetime}.toml" + + # Save the updated TOML data back to the file + with open(tmpfilename, "w", encoding="utf-8") as toml_file: + toml.dump(config_toml_data, toml_file) + + if not os.path.exists(toml_file.name): + log.error(f"Failed to write TOML file: {toml_file.name}") + + run_cmd.append("--config_file") + run_cmd.append(rf"{tmpfilename}") + + # Define a dictionary of parameters + run_cmd_params = { + "additional_parameters": additional_parameters, + } + + # Use the ** syntax to unpack the dictionary when calling the function + run_cmd = run_cmd_advanced_training(run_cmd=run_cmd, **run_cmd_params) + + if print_only: + print_command_and_toml(run_cmd, tmpfilename) + else: + # Saving config file for model + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + # config_dir = os.path.dirname(os.path.dirname(train_data_dir)) + file_path = os.path.join(output_dir, f"{output_name}_{formatted_datetime}.json") + + log.info(f"Saving training config to {file_path}...") + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as", "headless", "print_only"], + ) + + # log.info(run_cmd) + env = setup_environment() + + # Run the command + + executor.execute_command(run_cmd=run_cmd, env=env) + + train_state_value = time.time() + + return ( + gr.Button(visible=False or headless), + gr.Button(visible=True), + gr.Textbox(value=train_state_value), + ) + + +def lora_tab( + train_data_dir_input=gr.Dropdown(), + reg_data_dir_input=gr.Dropdown(), + output_dir_input=gr.Dropdown(), + logging_dir_input=gr.Dropdown(), + headless=False, + config: KohyaSSGUIConfig = {}, + use_shell_flag: bool = False, +): + dummy_db_true = gr.Checkbox(value=True, visible=False) + dummy_db_false = gr.Checkbox(value=False, visible=False) + dummy_headless = gr.Checkbox(value=headless, visible=False) + + global use_shell + use_shell = use_shell_flag + + with gr.Tab("Training"), gr.Column(variant="compact") as tab: + gr.Markdown( + "Train a custom model using kohya train network LoRA python code..." + ) + + # Setup Configuration Files Gradio + with gr.Accordion("Configuration", open=False): + configuration = ConfigurationFile(headless=headless, config=config) + + with gr.Accordion("Accelerate launch", open=False), gr.Column(): + accelerate_launch = AccelerateLaunch(config=config) + + with gr.Column(): + source_model = SourceModel( + save_model_as_choices=[ + "ckpt", + "safetensors", + ], + headless=headless, + config=config, + ) + + with gr.Accordion("Metadata", open=False), gr.Group(): + metadata = MetaData(config=config) + + with gr.Accordion("Folders", open=False), gr.Group(): + folders = Folders(headless=headless, config=config) + + with gr.Accordion("Dataset Preparation", open=False): + gr.Markdown( + "This section provide Dreambooth tools to help setup your dataset..." + ) + gradio_dreambooth_folder_creation_tab( + train_data_dir_input=source_model.train_data_dir, + reg_data_dir_input=folders.reg_data_dir, + output_dir_input=folders.output_dir, + logging_dir_input=folders.logging_dir, + headless=headless, + config=config, + ) + + gradio_dataset_balancing_tab(headless=headless) + + with gr.Accordion("Parameters", open=False), gr.Column(): + + def list_presets(path): + json_files = [] + + # Insert an empty string at the beginning + # json_files.insert(0, "none") + + for file in os.listdir(path): + if file.endswith(".json"): + json_files.append(os.path.splitext(file)[0]) + + user_presets_path = os.path.join(path, "user_presets") + if os.path.isdir(user_presets_path): + for file in os.listdir(user_presets_path): + if file.endswith(".json"): + preset_name = os.path.splitext(file)[0] + json_files.append(os.path.join("user_presets", preset_name)) + + return json_files + + training_preset = gr.Dropdown( + label="Presets", + choices=["none"] + list_presets(rf"{presets_dir}/lora"), + # elem_id="myDropdown", + value="none", + ) + + with gr.Accordion("Basic", open="True"): + with gr.Group(elem_id="basic_tab"): + with gr.Row(): + LoRA_type = gr.Dropdown( + label="LoRA type", + choices=[ + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/iA3", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + "Standard", + ], + value="Standard", + ) + LyCORIS_preset = gr.Dropdown( + label="LyCORIS Preset", + choices=LYCORIS_PRESETS_CHOICES, + value="full", + visible=False, + interactive=True, + allow_custom_value=True, + # info="https://github.com/KohakuBlueleaf/LyCORIS/blob/0006e2ffa05a48d8818112d9f70da74c0cd30b99/docs/Preset.md" + ) + with gr.Group(): + with gr.Row(): + network_weights = gr.Textbox( + label="Network weights", + placeholder="(Optional)", + info="Path to an existing LoRA network weights to resume training from", + ) + network_weights_file = gr.Button( + document_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + network_weights_file.click( + get_any_file_path, + inputs=[network_weights], + outputs=network_weights, + show_progress=False, + ) + dim_from_weights = gr.Checkbox( + label="DIM from weights", + value=False, + info="Automatically determine the dim(rank) from the weight file.", + ) + basic_training = BasicTraining( + learning_rate_value=0.0001, + lr_scheduler_value="cosine", + lr_warmup_value=10, + sdxl_checkbox=source_model.sdxl_checkbox, + config=config, + ) + + with gr.Row(): + text_encoder_lr = gr.Number( + label="Text Encoder learning rate", + value=0.0001, + info="(Optional)", + minimum=0, + maximum=1, + ) + + unet_lr = gr.Number( + label="Unet learning rate", + value=0.0001, + info="(Optional)", + minimum=0, + maximum=1, + ) + + # Add SDXL Parameters + sdxl_params = SDXLParameters( + source_model.sdxl_checkbox, config=config + ) + + # LyCORIS Specific parameters + with gr.Accordion("LyCORIS", visible=False) as lycoris_accordion: + with gr.Row(): + factor = gr.Slider( + label="LoKr factor", + value=-1, + minimum=-1, + maximum=64, + step=1, + visible=False, + ) + bypass_mode = gr.Checkbox( + value=False, + label="Bypass mode", + info="Designed for bnb 8bit/4bit linear layer. (QLyCORIS)", + visible=False, + ) + dora_wd = gr.Checkbox( + value=False, + label="DoRA Weight Decompose", + info="Enable the DoRA method for these algorithms", + visible=False, + ) + use_cp = gr.Checkbox( + value=False, + label="Use CP decomposition", + info="A two-step approach utilizing tensor decomposition and fine-tuning to accelerate convolution layers in large neural networks, resulting in significant CPU speedups with minor accuracy drops.", + visible=False, + ) + use_tucker = gr.Checkbox( + value=False, + label="Use Tucker decomposition", + info="Efficiently decompose tensor shapes, resulting in a sequence of convolution layers with varying dimensions and Hadamard product implementation through multiplication of two distinct tensors.", + visible=False, + ) + use_scalar = gr.Checkbox( + value=False, + label="Use Scalar", + info="Train an additional scalar in front of the weight difference, use a different weight initialization strategy.", + visible=False, + ) + with gr.Row(): + rank_dropout_scale = gr.Checkbox( + value=False, + label="Rank Dropout Scale", + info="Adjusts the scale of the rank dropout to maintain the average dropout rate, ensuring more consistent regularization across different layers.", + visible=False, + ) + constrain = gr.Number( + value=0.0, + label="Constrain OFT", + info="Limits the norm of the oft_blocks, ensuring that their magnitude does not exceed a specified threshold, thus controlling the extent of the transformation applied.", + visible=False, + ) + rescaled = gr.Checkbox( + value=False, + label="Rescaled OFT", + info="applies an additional scaling factor to the oft_blocks, allowing for further adjustment of their impact on the model's transformations.", + visible=False, + ) + train_norm = gr.Checkbox( + value=False, + label="Train Norm", + info="Selects trainable layers in a network, but trains normalization layers identically across methods as they lack matrix decomposition.", + visible=False, + ) + decompose_both = gr.Checkbox( + value=False, + label="LoKr decompose both", + info="Controls whether both input and output dimensions of the layer's weights are decomposed into smaller matrices for reparameterization.", + visible=False, + ) + train_on_input = gr.Checkbox( + value=True, + label="iA3 train on input", + info="Set if we change the information going into the system (True) or the information coming out of it (False).", + visible=False, + ) + with gr.Row() as network_row: + network_dim = gr.Slider( + minimum=1, + maximum=512, + label="Network Rank (Dimension)", + value=8, + step=1, + interactive=True, + ) + network_alpha = gr.Slider( + minimum=0.00001, + maximum=1024, + label="Network Alpha", + value=1, + step=0.00001, + interactive=True, + info="alpha for LoRA weight scaling", + ) + with gr.Row(visible=False) as convolution_row: + # locon= gr.Checkbox(label='Train a LoCon instead of a general LoRA (does not support v2 base models) (may not be able to some utilities now)', value=False) + conv_dim = gr.Slider( + minimum=0, + maximum=512, + value=1, + step=1, + label="Convolution Rank (Dimension)", + ) + conv_alpha = gr.Slider( + minimum=0, + maximum=512, + value=1, + step=1, + label="Convolution Alpha", + ) + with gr.Row(): + scale_weight_norms = gr.Slider( + label="Scale weight norms", + value=0, + minimum=0, + maximum=10, + step=0.01, + info="Max Norm Regularization is a technique to stabilize network training by limiting the norm of network weights. It may be effective in suppressing overfitting of LoRA and improving stability when used with other LoRAs. See PR #545 on kohya_ss/sd_scripts repo for details. Recommended setting: 1. Higher is weaker, lower is stronger.", + interactive=True, + ) + network_dropout = gr.Slider( + label="Network dropout", + value=0, + minimum=0, + maximum=1, + step=0.01, + info="Is a normal probability dropout at the neuron level. In the case of LoRA, it is applied to the output of down. Recommended range 0.1 to 0.5", + ) + rank_dropout = gr.Slider( + label="Rank dropout", + value=0, + minimum=0, + maximum=1, + step=0.01, + info="can specify `rank_dropout` to dropout each rank with specified probability. Recommended range 0.1 to 0.3", + ) + module_dropout = gr.Slider( + label="Module dropout", + value=0.0, + minimum=0.0, + maximum=1.0, + step=0.01, + info="can specify `module_dropout` to dropout each rank with specified probability. Recommended range 0.1 to 0.3", + ) + with gr.Row(visible=False): + unit = gr.Slider( + minimum=1, + maximum=64, + label="DyLoRA Unit / Block size", + value=1, + step=1, + interactive=True, + ) + + # Show or hide LoCon conv settings depending on LoRA type selection + def update_LoRA_settings( + LoRA_type, + conv_dim, + network_dim, + ): + log.debug("LoRA type changed...") + + lora_settings_config = { + "network_row": { + "gr_type": gr.Row, + "update_params": { + "visible": LoRA_type + in { + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "Standard", + }, + }, + }, + "convolution_row": { + "gr_type": gr.Row, + "update_params": { + "visible": LoRA_type + in { + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/LoCon", + "LyCORIS/GLoRA", + }, + }, + }, + "kohya_advanced_lora": { + "gr_type": gr.Row, + "update_params": { + "visible": LoRA_type + in { + "Standard", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + }, + }, + }, + "network_weights": { + "gr_type": gr.Textbox, + "update_params": { + "visible": LoRA_type + in { + "Standard", + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoCon", + "LyCORIS/LoKr", + }, + }, + }, + "network_weights_file": { + "gr_type": gr.Button, + "update_params": { + "visible": LoRA_type + in { + "Standard", + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoCon", + "LyCORIS/LoKr", + }, + }, + }, + "dim_from_weights": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "Standard", + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoCon", + "LyCORIS/LoKr", + } + }, + }, + "factor": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoKr", + }, + }, + }, + "conv_dim": { + "gr_type": gr.Slider, + "update_params": { + "maximum": ( + 100000 + if LoRA_type + in { + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + } + else 512 + ), + "value": conv_dim, # if conv_dim > 512 else conv_dim, + }, + }, + "network_dim": { + "gr_type": gr.Slider, + "update_params": { + "maximum": ( + 100000 + if LoRA_type + in { + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + } + else 512 + ), + "value": network_dim, # if network_dim > 512 else network_dim, + }, + }, + "bypass_mode": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + }, + }, + }, + "dora_wd": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + }, + }, + }, + "use_cp": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/LoKr", + }, + }, + }, + "use_tucker": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "use_scalar": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "rank_dropout_scale": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "constrain": { + "gr_type": gr.Number, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + }, + }, + }, + "rescaled": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + }, + }, + }, + "train_norm": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/DyLoRA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "decompose_both": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type in {"LyCORIS/LoKr"}, + }, + }, + "train_on_input": { + "gr_type": gr.Checkbox, + "update_params": { + "visible": LoRA_type in {"LyCORIS/iA3"}, + }, + }, + "scale_weight_norms": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoHa", + "LyCORIS/LoCon", + "LyCORIS/LoKr", + "Standard", + }, + }, + }, + "network_dropout": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "LoCon", + "Kohya DyLoRA", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + "Standard", + }, + }, + }, + "rank_dropout": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "LoCon", + "Kohya DyLoRA", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKR", + "Kohya LoCon", + "LoRA-FA", + "LyCORIS/Native Fine-Tuning", + "Standard", + }, + }, + }, + "module_dropout": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "LoCon", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "Kohya DyLoRA", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKR", + "Kohya LoCon", + "LyCORIS/Native Fine-Tuning", + "LoRA-FA", + "Standard", + }, + }, + }, + "LyCORIS_preset": { + "gr_type": gr.Dropdown, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/DyLoRA", + "LyCORIS/iA3", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + "unit": { + "gr_type": gr.Slider, + "update_params": { + "visible": LoRA_type + in { + "Kohya DyLoRA", + "LyCORIS/DyLoRA", + }, + }, + }, + "lycoris_accordion": { + "gr_type": gr.Accordion, + "update_params": { + "visible": LoRA_type + in { + "LyCORIS/DyLoRA", + "LyCORIS/iA3", + "LyCORIS/BOFT", + "LyCORIS/Diag-OFT", + "LyCORIS/GLoRA", + "LyCORIS/LoCon", + "LyCORIS/LoHa", + "LyCORIS/LoKr", + "LyCORIS/Native Fine-Tuning", + }, + }, + }, + } + + results = [] + for attr, settings in lora_settings_config.items(): + update_params = settings["update_params"] + + results.append(settings["gr_type"](**update_params)) + + return tuple(results) + + with gr.Accordion("Advanced", open=False, elem_id="advanced_tab"): + # with gr.Accordion('Advanced Configuration', open=False): + with gr.Row(visible=True) as kohya_advanced_lora: + with gr.Tab(label="Weights"): + with gr.Row(visible=True): + down_lr_weight = gr.Textbox( + label="Down LR weights", + placeholder="(Optional) eg: 0,0,0,0,0,0,1,1,1,1,1,1", + info="Specify the learning rate weight of the down blocks of U-Net.", + ) + mid_lr_weight = gr.Textbox( + label="Mid LR weights", + placeholder="(Optional) eg: 0.5", + info="Specify the learning rate weight of the mid block of U-Net.", + ) + up_lr_weight = gr.Textbox( + label="Up LR weights", + placeholder="(Optional) eg: 0,0,0,0,0,0,1,1,1,1,1,1", + info="Specify the learning rate weight of the up blocks of U-Net. The same as down_lr_weight.", + ) + block_lr_zero_threshold = gr.Textbox( + label="Blocks LR zero threshold", + placeholder="(Optional) eg: 0.1", + info="If the weight is not more than this value, the LoRA module is not created. The default is 0.", + ) + with gr.Tab(label="Blocks"): + with gr.Row(visible=True): + block_dims = gr.Textbox( + label="Block dims", + placeholder="(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2", + info="Specify the dim (rank) of each block. Specify 25 numbers.", + ) + block_alphas = gr.Textbox( + label="Block alphas", + placeholder="(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2", + info="Specify the alpha of each block. Specify 25 numbers as with block_dims. If omitted, the value of network_alpha is used.", + ) + with gr.Tab(label="Conv"): + with gr.Row(visible=True): + conv_block_dims = gr.Textbox( + label="Conv dims", + placeholder="(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2", + info="Extend LoRA to Conv2d 3x3 and specify the dim (rank) of each block. Specify 25 numbers.", + ) + conv_block_alphas = gr.Textbox( + label="Conv alphas", + placeholder="(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2", + info="Specify the alpha of each block when expanding LoRA to Conv2d 3x3. Specify 25 numbers. If omitted, the value of conv_alpha is used.", + ) + advanced_training = AdvancedTraining( + headless=headless, training_type="lora", config=config + ) + advanced_training.color_aug.change( + color_aug_changed, + inputs=[advanced_training.color_aug], + outputs=[basic_training.cache_latents], + ) + + with gr.Accordion("Samples", open=False, elem_id="samples_tab"): + sample = SampleImages(config=config) + + global huggingface + with gr.Accordion("HuggingFace", open=False): + huggingface = HuggingFace(config=config) + + LoRA_type.change( + update_LoRA_settings, + inputs=[ + LoRA_type, + conv_dim, + network_dim, + ], + outputs=[ + network_row, + convolution_row, + kohya_advanced_lora, + network_weights, + network_weights_file, + dim_from_weights, + factor, + conv_dim, + network_dim, + bypass_mode, + dora_wd, + use_cp, + use_tucker, + use_scalar, + rank_dropout_scale, + constrain, + rescaled, + train_norm, + decompose_both, + train_on_input, + scale_weight_norms, + network_dropout, + rank_dropout, + module_dropout, + LyCORIS_preset, + unit, + lycoris_accordion, + ], + ) + + global executor + executor = CommandExecutor(headless=headless) + + with gr.Column(), gr.Group(): + with gr.Row(): + button_print = gr.Button("Print training command") + + # Setup gradio tensorboard buttons + TensorboardManager(headless=headless, logging_dir=folders.logging_dir) + + settings_list = [ + source_model.pretrained_model_name_or_path, + source_model.v2, + source_model.v_parameterization, + source_model.sdxl_checkbox, + folders.logging_dir, + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + source_model.dataset_config, + basic_training.max_resolution, + basic_training.learning_rate, + basic_training.lr_scheduler, + basic_training.lr_warmup, + basic_training.train_batch_size, + basic_training.epoch, + basic_training.save_every_n_epochs, + accelerate_launch.mixed_precision, + source_model.save_precision, + basic_training.seed, + accelerate_launch.num_cpu_threads_per_process, + basic_training.cache_latents, + basic_training.cache_latents_to_disk, + basic_training.caption_extension, + basic_training.enable_bucket, + advanced_training.gradient_checkpointing, + advanced_training.fp8_base, + advanced_training.full_fp16, + # advanced_training.no_token_padding, + basic_training.stop_text_encoder_training, + basic_training.min_bucket_reso, + basic_training.max_bucket_reso, + advanced_training.xformers, + source_model.save_model_as, + advanced_training.shuffle_caption, + advanced_training.save_state, + advanced_training.save_state_on_train_end, + advanced_training.resume, + advanced_training.prior_loss_weight, + text_encoder_lr, + unet_lr, + network_dim, + network_weights, + dim_from_weights, + advanced_training.color_aug, + advanced_training.flip_aug, + advanced_training.masked_loss, + advanced_training.clip_skip, + accelerate_launch.num_processes, + accelerate_launch.num_machines, + accelerate_launch.multi_gpu, + accelerate_launch.gpu_ids, + accelerate_launch.main_process_port, + advanced_training.gradient_accumulation_steps, + advanced_training.mem_eff_attn, + source_model.output_name, + source_model.model_list, + advanced_training.max_token_length, + basic_training.max_train_epochs, + basic_training.max_train_steps, + advanced_training.max_data_loader_n_workers, + network_alpha, + source_model.training_comment, + advanced_training.keep_tokens, + basic_training.lr_scheduler_num_cycles, + basic_training.lr_scheduler_power, + advanced_training.persistent_data_loader_workers, + advanced_training.bucket_no_upscale, + advanced_training.random_crop, + advanced_training.bucket_reso_steps, + advanced_training.v_pred_like_loss, + advanced_training.caption_dropout_every_n_epochs, + advanced_training.caption_dropout_rate, + basic_training.optimizer, + basic_training.optimizer_args, + basic_training.lr_scheduler_args, + basic_training.max_grad_norm, + advanced_training.noise_offset_type, + advanced_training.noise_offset, + advanced_training.noise_offset_random_strength, + advanced_training.adaptive_noise_scale, + advanced_training.multires_noise_iterations, + advanced_training.multires_noise_discount, + advanced_training.ip_noise_gamma, + advanced_training.ip_noise_gamma_random_strength, + LoRA_type, + factor, + bypass_mode, + dora_wd, + use_cp, + use_tucker, + use_scalar, + rank_dropout_scale, + constrain, + rescaled, + train_norm, + decompose_both, + train_on_input, + conv_dim, + conv_alpha, + sample.sample_every_n_steps, + sample.sample_every_n_epochs, + sample.sample_sampler, + sample.sample_prompts, + advanced_training.additional_parameters, + advanced_training.loss_type, + advanced_training.huber_schedule, + advanced_training.huber_c, + advanced_training.vae_batch_size, + advanced_training.min_snr_gamma, + down_lr_weight, + mid_lr_weight, + up_lr_weight, + block_lr_zero_threshold, + block_dims, + block_alphas, + conv_block_dims, + conv_block_alphas, + advanced_training.weighted_captions, + unit, + advanced_training.save_every_n_steps, + advanced_training.save_last_n_steps, + advanced_training.save_last_n_steps_state, + advanced_training.log_with, + advanced_training.wandb_api_key, + advanced_training.wandb_run_name, + advanced_training.log_tracker_name, + advanced_training.log_tracker_config, + advanced_training.scale_v_pred_loss_like_noise_pred, + scale_weight_norms, + network_dropout, + rank_dropout, + module_dropout, + sdxl_params.sdxl_cache_text_encoder_outputs, + sdxl_params.sdxl_no_half_vae, + advanced_training.full_bf16, + advanced_training.min_timestep, + advanced_training.max_timestep, + advanced_training.vae, + accelerate_launch.dynamo_backend, + accelerate_launch.dynamo_mode, + accelerate_launch.dynamo_use_fullgraph, + accelerate_launch.dynamo_use_dynamic, + accelerate_launch.extra_accelerate_launch_args, + LyCORIS_preset, + advanced_training.debiased_estimation_loss, + huggingface.huggingface_repo_id, + huggingface.huggingface_token, + huggingface.huggingface_repo_type, + huggingface.huggingface_repo_visibility, + huggingface.huggingface_path_in_repo, + huggingface.save_state_to_huggingface, + huggingface.resume_from_huggingface, + huggingface.async_upload, + metadata.metadata_author, + metadata.metadata_description, + metadata.metadata_license, + metadata.metadata_tags, + metadata.metadata_title, + ] + + configuration.button_open_config.click( + open_configuration, + inputs=[dummy_db_true, dummy_db_false, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[configuration.config_file_name] + + settings_list + + [training_preset, convolution_row], + show_progress=False, + ) + + configuration.button_load_config.click( + open_configuration, + inputs=[dummy_db_false, dummy_db_false, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[configuration.config_file_name] + + settings_list + + [training_preset, convolution_row], + show_progress=False, + ) + + training_preset.input( + open_configuration, + inputs=[dummy_db_false, dummy_db_true, configuration.config_file_name] + + settings_list + + [training_preset], + outputs=[gr.Textbox(visible=False)] + + settings_list + + [training_preset, convolution_row], + show_progress=False, + ) + + configuration.button_save_config.click( + save_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name], + show_progress=False, + ) + + run_state = gr.Textbox(value=train_state_value, visible=False) + + run_state.change( + fn=executor.wait_for_training_to_end, + outputs=[executor.button_run, executor.button_stop_training], + ) + + executor.button_run.click( + train_model, + inputs=[dummy_headless] + [dummy_db_false] + settings_list, + outputs=[executor.button_run, executor.button_stop_training, run_state], + show_progress=False, + ) + + executor.button_stop_training.click( + executor.kill_command, + outputs=[executor.button_run, executor.button_stop_training], + ) + + button_print.click( + train_model, + inputs=[dummy_headless] + [dummy_db_true] + settings_list, + show_progress=False, + ) + + with gr.Tab("Tools"): + lora_tools = LoRATools(headless=headless) + + with gr.Tab("Guides"): + gr.Markdown("This section provide Various LoRA guides and information...") + if os.path.exists(rf"{scriptdir}/docs/LoRA/top_level.md"): + with open( + os.path.join(rf"{scriptdir}/docs/LoRA/top_level.md"), + "r", + encoding="utf-8", + ) as file: + guides_top_level = file.read() + "\n" + gr.Markdown(guides_top_level) + + return ( + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + folders.logging_dir, + ) diff --git a/kohya_gui/manual_caption_gui.py b/kohya_gui/manual_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..4e791db13d9e741577af0ed3aedfcdc24523aa9c --- /dev/null +++ b/kohya_gui/manual_caption_gui.py @@ -0,0 +1,508 @@ +import gradio as gr +from easygui import msgbox, boolbox +from .common_gui import get_folder_path, scriptdir, list_dirs +from math import ceil +import os +import re + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +IMAGES_TO_SHOW = 5 +IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg", ".webp", ".bmp") +auto_save = True + + +def _get_caption_path(image_file, images_dir, caption_ext): + """ + Returns the expected path of a caption file for a given image path + """ + caption_file_name = os.path.splitext(image_file)[0] + caption_ext + caption_file_path = os.path.join(images_dir, caption_file_name) + return caption_file_path + + +def _get_quick_tags(quick_tags_text): + """ + Gets a list of tags from the quick tags text box + """ + quick_tags = [t.strip() for t in quick_tags_text.split(",") if t.strip()] + quick_tags_set = set(quick_tags) + return quick_tags, quick_tags_set + + +def _get_tag_checkbox_updates(caption, quick_tags, quick_tags_set): + """ + Updates a list of caption checkboxes to show possible tags and tags + already included in the caption + """ + caption_tags_have = [c.strip() for c in caption.split(",") if c.strip()] + caption_tags_unique = [t for t in caption_tags_have if t not in quick_tags_set] + caption_tags_all = quick_tags + caption_tags_unique + return gr.CheckboxGroup(choices=caption_tags_all, value=caption_tags_have) + + +def paginate_go(page, max_page): + try: + page = float(page) + except: + msgbox(f"Invalid page num: {page}") + return + return paginate(page, max_page, 0) + + +def paginate(page, max_page, page_change): + return int(max(min(page + page_change, max_page), 1)) + + +def save_caption(caption, caption_ext, image_file, images_dir): + caption_path = _get_caption_path(image_file, images_dir, caption_ext) + with open(caption_path, "w+", encoding="utf-8") as f: + f.write(caption) + + log.info(f"Wrote captions to {caption_path}") + + +def update_quick_tags(quick_tags_text, *image_caption_texts): + quick_tags, quick_tags_set = _get_quick_tags(quick_tags_text) + return [ + _get_tag_checkbox_updates(caption, quick_tags, quick_tags_set) + for caption in image_caption_texts + ] + + +def update_image_caption( + quick_tags_text, caption, image_file, images_dir, caption_ext, auto_save +): + if auto_save: + save_caption(caption, caption_ext, image_file, images_dir) + + quick_tags, quick_tags_set = _get_quick_tags(quick_tags_text) + return _get_tag_checkbox_updates(caption, quick_tags, quick_tags_set) + + +def update_image_tags( + quick_tags_text, + selected_tags, + image_file, + images_dir, + caption_ext, + auto_save, +): + # Try to determine order by quick tags + quick_tags, quick_tags_set = _get_quick_tags(quick_tags_text) + selected_tags_set = set(selected_tags) + + output_tags = [t for t in quick_tags if t in selected_tags_set] + [ + t for t in selected_tags if t not in quick_tags_set + ] + caption = ", ".join(output_tags) + + if auto_save: + save_caption(caption, caption_ext, image_file, images_dir) + + return caption + + +def import_tags_from_captions( + images_dir, caption_ext, quick_tags_text, ignore_load_tags_word_count +): + """ + Scans images directory for all available captions and loads all tags + under a specified word count into the quick tags box + """ + + def empty_return(): + return gr.Text() + + # Check for images_dir + if not images_dir: + msgbox("Image folder is missing...") + return empty_return() + + if not os.path.exists(images_dir): + msgbox("Image folder does not exist...") + return empty_return() + + if not caption_ext: + msgbox("Please provide an extension for the caption files.") + return empty_return() + + if quick_tags_text: + if not boolbox( + f"Are you sure you wish to overwrite the current quick tags?", + choices=("Yes", "No"), + ): + return empty_return() + + images_list = os.listdir(images_dir) + image_files = [f for f in images_list if f.lower().endswith(IMAGE_EXTENSIONS)] + + # Use a set for lookup but store order with list + tags = [] + tags_set = set() + for image_file in image_files: + caption_file_path = _get_caption_path(image_file, images_dir, caption_ext) + if os.path.exists(caption_file_path): + with open(caption_file_path, "r", encoding="utf-8") as f: + caption = f.read() + for tag in caption.split(","): + tag = tag.strip() + tag_key = tag.lower() + if not tag_key in tags_set: + # Ignore extra spaces + total_words = len(re.findall(r"\s+", tag)) + 1 + if total_words <= ignore_load_tags_word_count: + tags.append(tag) + tags_set.add(tag_key) + + return ", ".join(tags) + + +def load_images(images_dir, caption_ext, loaded_images_dir, page, max_page): + """ + Triggered to load a new set of images from the folder to caption + This loads in the total expected image counts to be used by pagination + before running update_images + """ + + def empty_return(): + return [loaded_images_dir, page, max_page] + + # Check for images_dir + if not images_dir: + msgbox("Image folder is missing...") + return empty_return() + + if not os.path.exists(images_dir): + msgbox("Image folder does not exist...") + return empty_return() + + if not caption_ext: + msgbox("Please provide an extension for the caption files.") + return empty_return() + + # Load Images + images_list = os.listdir(images_dir) + total_images = len( + [True for f in images_list if f.lower().endswith(IMAGE_EXTENSIONS)] + ) + return [images_dir, 1, ceil(total_images / IMAGES_TO_SHOW)] + + +def update_images( + images_dir, + caption_ext, + quick_tags_text, + page, +): + """ + Updates the displayed images and captions from the current page and + image directory + """ + + # Load Images + images_list = os.listdir(images_dir) + image_files = [f for f in images_list if f.lower().endswith(IMAGE_EXTENSIONS)] + + # Quick tags + quick_tags, quick_tags_set = _get_quick_tags(quick_tags_text or "") + + # Display Images + rows = [] + image_paths = [] + captions = [] + tag_checkbox_groups = [] + + start_index = (int(page) - 1) * IMAGES_TO_SHOW + for i in range(IMAGES_TO_SHOW): + image_index = start_index + i + show_row = image_index < len(image_files) + + image_path = None + caption = "" + tag_checkboxes = None + if show_row: + image_file = image_files[image_index] + image_path = os.path.join(images_dir, image_file) + + caption_file_path = _get_caption_path(image_file, images_dir, caption_ext) + if os.path.exists(caption_file_path): + with open(caption_file_path, "r", encoding="utf-8") as f: + caption = f.read() + + tag_checkboxes = _get_tag_checkbox_updates(caption, quick_tags, quick_tags_set) + rows.append(gr.Row(visible=show_row)) + image_paths.append(image_path) + captions.append(caption) + tag_checkbox_groups.append(tag_checkboxes) + + return ( + rows + + image_paths + + image_paths + + captions + + tag_checkbox_groups + + [gr.Row(visible=True), gr.Row(visible=True)] + ) + + +# Gradio UI +def gradio_manual_caption_gui_tab(headless=False, default_images_dir=None): + from .common_gui import create_refresh_button + + default_images_dir = ( + default_images_dir + if default_images_dir is not None + else os.path.join(scriptdir, "data") + ) + current_images_dir = default_images_dir + + # Function to list directories + def list_images_dirs(path): + # Allows list_images_dirs to modify current_images_dir outside of this function + nonlocal current_images_dir + current_images_dir = path + return list(list_dirs(path)) + + with gr.Tab("Manual Captioning"): + gr.Markdown("This utility allows quick captioning and tagging of images.") + page = gr.Number(value=-1, visible=False) + max_page = gr.Number(value=1, visible=False) + loaded_images_dir = gr.Text(visible=False) + with gr.Group(), gr.Row(): + images_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[""] + list_images_dirs(default_images_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + images_dir, + lambda: None, + lambda: {"choices": list_images_dirs(current_images_dir)}, + "open_folder_small", + ) + folder_button = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + folder_button.click( + get_folder_path, + outputs=images_dir, + show_progress=False, + ) + load_images_button = gr.Button("Load", elem_id="open_folder") + caption_ext = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + auto_save = gr.Checkbox( + label="Autosave", info="Options", value=True, interactive=True + ) + + images_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_images_dirs(path)), + inputs=images_dir, + outputs=images_dir, + show_progress=False, + ) + + # Caption Section + with gr.Group(), gr.Row(): + quick_tags_text = gr.Textbox( + label="Quick Tags", + placeholder="Comma separated list of tags", + interactive=True, + ) + import_tags_button = gr.Button("Import", elem_id="open_folder") + ignore_load_tags_word_count = gr.Slider( + minimum=1, + maximum=100, + value=3, + step=1, + label="Ignore Imported Tags Above Word Count", + interactive=True, + ) + + # Next/Prev section generator + def render_pagination(): + gr.Button("< Prev", elem_id="open_folder").click( + paginate, + inputs=[page, max_page, gr.Number(value=-1, visible=False)], + outputs=[page], + ) + page_count = gr.Label("Page 1", label="Page") + page_goto_text = gr.Textbox( + label="Goto page", + placeholder="Page Number", + interactive=True, + ) + gr.Button("Go >", elem_id="open_folder").click( + paginate_go, + inputs=[page_goto_text, max_page], + outputs=[page], + ) + gr.Button("Next >", elem_id="open_folder").click( + paginate, + inputs=[page, max_page, gr.Number(value=1, visible=False)], + outputs=[page], + ) + return page_count + + with gr.Row(visible=False) as pagination_row1: + page_count1 = render_pagination() + + # Images section + image_rows = [] + image_files = [] + image_images = [] + image_caption_texts = [] + image_tag_checks = [] + save_buttons = [] + for _ in range(IMAGES_TO_SHOW): + with gr.Row(visible=False) as row: + image_file = gr.Text(visible=False) + image_files.append(image_file) + image_image = gr.Image(type="filepath") + image_images.append(image_image) + image_caption_text = gr.TextArea( + label="Captions", + placeholder="Input captions", + interactive=True, + ) + image_caption_texts.append(image_caption_text) + tag_checkboxes = gr.CheckboxGroup([], label="Tags", interactive=True) + save_button = gr.Button( + "💾", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=False, + ) + save_buttons.append(save_button) + + # Caption text change + image_caption_text.input( + update_image_caption, + inputs=[ + quick_tags_text, + image_caption_text, + image_file, + loaded_images_dir, + caption_ext, + auto_save, + ], + outputs=tag_checkboxes, + ) + + # Quick tag check + tag_checkboxes.input( + update_image_tags, + inputs=[ + quick_tags_text, + tag_checkboxes, + image_file, + loaded_images_dir, + caption_ext, + auto_save, + ], + outputs=[image_caption_text], + ) + + # Save Button + save_button.click( + save_caption, + inputs=[ + image_caption_text, + caption_ext, + image_file, + images_dir, + ], + ) + + image_tag_checks.append(tag_checkboxes) + image_rows.append(row) + + # Next/Prev Section + with gr.Row(visible=False) as pagination_row2: + page_count2 = render_pagination() + + # Quick tag text update + quick_tags_text.change( + update_quick_tags, + inputs=[quick_tags_text] + image_caption_texts, + outputs=image_tag_checks, + ) + + # Import tags button + import_tags_button.click( + import_tags_from_captions, + inputs=[ + loaded_images_dir, + caption_ext, + quick_tags_text, + ignore_load_tags_word_count, + ], + outputs=quick_tags_text, + ) + + # Load Images button + load_images_button.click( + load_images, + inputs=[ + images_dir, + caption_ext, + loaded_images_dir, + page, + max_page, + ], + outputs=[loaded_images_dir, page, max_page], + ) + + # Update images shown when the update key changes + # This allows us to trigger a change from multiple + # sources (page, image_dir) + image_update_key = gr.Text(visible=False) + image_update_key.change( + update_images, + inputs=[loaded_images_dir, caption_ext, quick_tags_text, page], + outputs=image_rows + + image_files + + image_images + + image_caption_texts + + image_tag_checks + + [pagination_row1, pagination_row2], + show_progress=False, + ) + # Update the key on page and image dir change + listener_kwargs = { + "fn": lambda p, i: f"{p}-{i}", + "inputs": [page, loaded_images_dir], + "outputs": image_update_key, + } + page.change(**listener_kwargs) + loaded_images_dir.change(**listener_kwargs) + + # Save buttons visibility + # (on auto-save on/off) + auto_save.change( + lambda auto_save: [gr.Button(visible=not auto_save)] * IMAGES_TO_SHOW, + inputs=auto_save, + outputs=save_buttons, + ) + + # Page Count + page.change( + lambda page, max_page: [f"Page {int(page)} / {int(max_page)}"] * 2, + inputs=[page, max_page], + outputs=[page_count1, page_count2], + show_progress=False, + ) diff --git a/kohya_gui/merge_lora_gui.py b/kohya_gui/merge_lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..db1e19826d93139a571e7c0b0ac5905146dc78e9 --- /dev/null +++ b/kohya_gui/merge_lora_gui.py @@ -0,0 +1,463 @@ +# Standard library imports +import os +import subprocess +import sys +import json + +# Third-party imports +import gradio as gr + +# Local module imports +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def check_model(model): + if not model: + return True + if not os.path.isfile(model): + log.info(f"The provided {model} is not a file") + return False + return True + + +def verify_conditions(sd_model, lora_models): + lora_models_count = sum(1 for model in lora_models if model) + if sd_model and lora_models_count >= 1: + return True + elif not sd_model and lora_models_count >= 2: + return True + return False + + +class GradioMergeLoRaTab: + def __init__(self, headless=False): + self.headless = headless + self.build_tab() + + def save_inputs_to_json(self, file_path, inputs): + with open(file_path, "w", encoding="utf-8") as file: + json.dump(inputs, file) + log.info(f"Saved inputs to {file_path}") + + def load_inputs_from_json(self, file_path): + with open(file_path, "r", encoding="utf-8") as file: + inputs = json.load(file) + log.info(f"Loaded inputs from {file_path}") + return inputs + + def build_tab(self): + current_sd_model_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + current_a_model_dir = current_sd_model_dir + current_b_model_dir = current_sd_model_dir + current_c_model_dir = current_sd_model_dir + current_d_model_dir = current_sd_model_dir + + def list_sd_models(path): + nonlocal current_sd_model_dir + current_sd_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_a_models(path): + nonlocal current_a_model_dir + current_a_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_b_models(path): + nonlocal current_b_model_dir + current_b_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_c_models(path): + nonlocal current_c_model_dir + current_c_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_d_models(path): + nonlocal current_d_model_dir + current_d_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + with gr.Tab("Merge LoRA"): + gr.Markdown( + "This utility can merge up to 4 LoRA together or alternatively merge up to 4 LoRA into a SD checkpoint." + ) + + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + ckpt_ext = gr.Textbox(value="*.safetensors *.ckpt", visible=False) + ckpt_ext_name = gr.Textbox(value="SD model types", visible=False) + + with gr.Group(), gr.Row(): + sd_model = gr.Dropdown( + label="SD Model (Optional. Stable Diffusion model path, if you want to merge it with LoRA files)", + interactive=True, + choices=[""] + list_sd_models(current_sd_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + sd_model, + lambda: None, + lambda: {"choices": list_sd_models(current_sd_model_dir)}, + "open_folder_small", + ) + sd_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + sd_model_file.click( + get_file_path, + inputs=[sd_model, ckpt_ext, ckpt_ext_name], + outputs=sd_model, + show_progress=False, + ) + sdxl_model = gr.Checkbox(label="SDXL model", value=False) + + sd_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_sd_models(path)), + inputs=sd_model, + outputs=sd_model, + show_progress=False, + ) + + with gr.Group(), gr.Row(): + lora_a_model = gr.Dropdown( + label='LoRA model "A" (path to the LoRA A model)', + interactive=True, + choices=[""] + list_a_models(current_a_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_a_model, + lambda: None, + lambda: {"choices": list_a_models(current_a_model_dir)}, + "open_folder_small", + ) + button_lora_a_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + button_lora_a_model_file.click( + get_file_path, + inputs=[lora_a_model, lora_ext, lora_ext_name], + outputs=lora_a_model, + show_progress=False, + ) + + lora_b_model = gr.Dropdown( + label='LoRA model "B" (path to the LoRA B model)', + interactive=True, + choices=[""] + list_b_models(current_b_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_b_model, + lambda: None, + lambda: {"choices": list_b_models(current_b_model_dir)}, + "open_folder_small", + ) + button_lora_b_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + button_lora_b_model_file.click( + get_file_path, + inputs=[lora_b_model, lora_ext, lora_ext_name], + outputs=lora_b_model, + show_progress=False, + ) + + lora_a_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_a_models(path)), + inputs=lora_a_model, + outputs=lora_a_model, + show_progress=False, + ) + lora_b_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_b_models(path)), + inputs=lora_b_model, + outputs=lora_b_model, + show_progress=False, + ) + + with gr.Row(): + ratio_a = gr.Slider( + label="Model A merge ratio (eg: 0.5 mean 50%)", + minimum=0, + maximum=1, + step=0.01, + value=0.0, + interactive=True, + ) + + ratio_b = gr.Slider( + label="Model B merge ratio (eg: 0.5 mean 50%)", + minimum=0, + maximum=1, + step=0.01, + value=0.0, + interactive=True, + ) + + with gr.Group(), gr.Row(): + lora_c_model = gr.Dropdown( + label='LoRA model "C" (path to the LoRA C model)', + interactive=True, + choices=[""] + list_c_models(current_c_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_c_model, + lambda: None, + lambda: {"choices": list_c_models(current_c_model_dir)}, + "open_folder_small", + ) + button_lora_c_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + button_lora_c_model_file.click( + get_file_path, + inputs=[lora_c_model, lora_ext, lora_ext_name], + outputs=lora_c_model, + show_progress=False, + ) + + lora_d_model = gr.Dropdown( + label='LoRA model "D" (path to the LoRA D model)', + interactive=True, + choices=[""] + list_d_models(current_d_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_d_model, + lambda: None, + lambda: {"choices": list_d_models(current_d_model_dir)}, + "open_folder_small", + ) + button_lora_d_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + button_lora_d_model_file.click( + get_file_path, + inputs=[lora_d_model, lora_ext, lora_ext_name], + outputs=lora_d_model, + show_progress=False, + ) + lora_c_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_c_models(path)), + inputs=lora_c_model, + outputs=lora_c_model, + show_progress=False, + ) + lora_d_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_d_models(path)), + inputs=lora_d_model, + outputs=lora_d_model, + show_progress=False, + ) + + with gr.Row(): + ratio_c = gr.Slider( + label="Model C merge ratio (eg: 0.5 mean 50%)", + minimum=0, + maximum=1, + step=0.01, + value=0.0, + interactive=True, + ) + + ratio_d = gr.Slider( + label="Model D merge ratio (eg: 0.5 mean 50%)", + minimum=0, + maximum=1, + step=0.01, + value=0.0, + interactive=True, + ) + + with gr.Group(), gr.Row(): + save_to = gr.Dropdown( + label="Save to (path for the file to save...)", + interactive=True, + choices=[""] + list_save_to(current_d_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + save_to, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_save_to = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not self.headless), + ) + button_save_to.click( + get_saveasfilename_path, + inputs=[save_to, lora_ext, lora_ext_name], + outputs=save_to, + show_progress=False, + ) + precision = gr.Radio( + label="Merge precision", + choices=["fp16", "bf16", "float"], + value="float", + interactive=True, + ) + save_precision = gr.Radio( + label="Save precision", + choices=["fp16", "bf16", "float"], + value="fp16", + interactive=True, + ) + + save_to.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=save_to, + outputs=save_to, + show_progress=False, + ) + + merge_button = gr.Button("Merge model") + + merge_button.click( + self.merge_lora, + inputs=[ + sd_model, + sdxl_model, + lora_a_model, + lora_b_model, + lora_c_model, + lora_d_model, + ratio_a, + ratio_b, + ratio_c, + ratio_d, + save_to, + precision, + save_precision, + ], + show_progress=False, + ) + + def merge_lora( + self, + sd_model, + sdxl_model, + lora_a_model, + lora_b_model, + lora_c_model, + lora_d_model, + ratio_a, + ratio_b, + ratio_c, + ratio_d, + save_to, + precision, + save_precision, + ): + + log.info("Merge model...") + models = [ + sd_model, + lora_a_model, + lora_b_model, + lora_c_model, + lora_d_model, + ] + lora_models = models[1:] + ratios = [ratio_a, ratio_b, ratio_c, ratio_d] + + if not verify_conditions(sd_model, lora_models): + log.info( + "Warning: Either provide at least one LoRa model along with the sd_model or at least two LoRa models if no sd_model is provided." + ) + return + + for model in models: + if not check_model(model): + return + + if not sdxl_model: + run_cmd = [rf"{PYTHON}", rf"{scriptdir}/sd-scripts/networks/merge_lora.py"] + else: + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/sdxl_merge_lora.py", + ] + + if sd_model: + run_cmd.append("--sd_model") + run_cmd.append(rf"{sd_model}") + + run_cmd.append("--save_precision") + run_cmd.append(save_precision) + run_cmd.append("--precision") + run_cmd.append(precision) + run_cmd.append("--save_to") + run_cmd.append(rf"{save_to}") + + # Prepare model and ratios command as lists, including only non-empty models + valid_models = [model for model in lora_models if model] + valid_ratios = [ratios[i] for i, model in enumerate(lora_models) if model] + + if valid_models: + run_cmd.append("--models") + run_cmd.extend(valid_models) # Each model is a separate argument + run_cmd.append("--ratios") + run_cmd.extend( + map(str, valid_ratios) + ) # Convert ratios to strings and include them as separate arguments + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + log.info("Done merging...") diff --git a/kohya_gui/merge_lycoris_gui.py b/kohya_gui/merge_lycoris_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..bf31525d99880e6d29e7c38475281237d5e54ecd --- /dev/null +++ b/kohya_gui/merge_lycoris_gui.py @@ -0,0 +1,255 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def merge_lycoris( + base_model, + lycoris_model, + weight, + output_name, + dtype, + device, + is_sdxl, + is_v2, +): + log.info("Merge model...") + + # Build the command to run merge_lycoris.py using list format + run_cmd = [ + fr"{PYTHON}", + fr"{scriptdir}/tools/merge_lycoris.py", + fr"{base_model}", + fr"{lycoris_model}", + fr"{output_name}", + ] + + # Add additional required arguments with their values + run_cmd.append("--weight") + run_cmd.append(str(weight)) + run_cmd.append("--device") + run_cmd.append(device) + run_cmd.append("--dtype") + run_cmd.append(dtype) + + # Add optional flags based on conditions + if is_sdxl: + run_cmd.append("--is_sdxl") + if is_v2: + run_cmd.append("--is_v2") + + # Copy and update the environment variables + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + + log.info("Done merging...") + + +### +# Gradio UI +### + + +def gradio_merge_lycoris_tab(headless=False): + current_model_dir = os.path.join(scriptdir, "outputs") + current_lycoris_dir = current_model_dir + current_save_dir = current_model_dir + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_lycoris_model(path): + nonlocal current_lycoris_dir + current_lycoris_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + with gr.Tab("Merge LyCORIS"): + gr.Markdown("This utility can merge a LyCORIS model into a SD checkpoint.") + + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + ckpt_ext = gr.Textbox(value="*.safetensors *.ckpt", visible=False) + ckpt_ext_name = gr.Textbox(value="SD model types", visible=False) + + with gr.Group(), gr.Row(): + base_model = gr.Dropdown( + label="SD Model (Optional Stable Diffusion base model)", + interactive=True, + info="Provide a SD file path that you want to merge with the LyCORIS file", + choices=[""] + list_models(current_save_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + base_model, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + base_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + base_model_file.click( + get_file_path, + inputs=[base_model, ckpt_ext, ckpt_ext_name], + outputs=base_model, + show_progress=False, + ) + + lycoris_model = gr.Dropdown( + label="LyCORIS model (path to the LyCORIS model)", + interactive=True, + choices=[""] + list_lycoris_model(current_save_dir), + value="", + allow_custom_value=True, + ) + button_lycoris_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lycoris_model_file.click( + get_file_path, + inputs=[lycoris_model, lora_ext, lora_ext_name], + outputs=lycoris_model, + show_progress=False, + ) + + base_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=base_model, + outputs=base_model, + show_progress=False, + ) + lycoris_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_lycoris_model(path)), + inputs=lycoris_model, + outputs=lycoris_model, + show_progress=False, + ) + + with gr.Row(): + weight = gr.Slider( + label="Model A merge ratio (eg: 0.5 mean 50%)", + minimum=0, + maximum=1, + step=0.01, + value=1.0, + interactive=True, + ) + + with gr.Group(), gr.Row(): + output_name = gr.Dropdown( + label="Save to (path for the checkpoint file to save...)", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + output_name, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_output_name = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_output_name.click( + get_saveasfilename_path, + inputs=[output_name, lora_ext, lora_ext_name], + outputs=output_name, + show_progress=False, + ) + dtype = gr.Radio( + label="Save dtype", + choices=[ + "float", + "float16", + "float32", + "float64", + "bfloat", + "bfloat16", + ], + value="float16", + interactive=True, + ) + + device = gr.Radio( + label="Device", + choices=[ + "cpu", + "cuda", + ], + value="cpu", + interactive=True, + ) + + output_name.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=output_name, + outputs=output_name, + show_progress=False, + ) + + with gr.Row(): + is_sdxl = gr.Checkbox(label="is SDXL", value=False, interactive=True) + is_v2 = gr.Checkbox(label="is v2", value=False, interactive=True) + + merge_button = gr.Button("Merge model") + + merge_button.click( + merge_lycoris, + inputs=[ + base_model, + lycoris_model, + weight, + output_name, + dtype, + device, + is_sdxl, + is_v2, + ], + show_progress=False, + ) diff --git a/kohya_gui/resize_lora_gui.py b/kohya_gui/resize_lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..c62a5a264564758b62b64ec618436da199af2a2a --- /dev/null +++ b/kohya_gui/resize_lora_gui.py @@ -0,0 +1,250 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 + +PYTHON = sys.executable + + +def resize_lora( + model, + new_rank, + save_to, + save_precision, + device, + dynamic_method, + dynamic_param, + verbose, +): + # Check for caption_text_input + if model == "": + log.info("Invalid model file") + return + + # Check if source model exist + if not os.path.isfile(model): + log.info("The provided model is not a file") + return + + if dynamic_method == "sv_ratio": + if float(dynamic_param) < 2: + log.info( + f"Dynamic parameter for {dynamic_method} need to be 2 or greater..." + ) + return + + if dynamic_method == "sv_fro" or dynamic_method == "sv_cumulative": + if float(dynamic_param) < 0 or float(dynamic_param) > 1: + log.info( + f"Dynamic parameter for {dynamic_method} need to be between 0 and 1..." + ) + return + + # Check if save_to end with one of the defines extension. If not add .safetensors. + if not save_to.endswith((".pt", ".safetensors")): + save_to += ".safetensors" + + if device == "": + device = "cuda" + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/resize_lora.py", + "--save_precision", + save_precision, + "--save_to", + rf"{save_to}", + "--model", + rf"{model}", + "--new_rank", + str(new_rank), + "--device", + device, + ] + + # Conditional checks for dynamic parameters + if dynamic_method != "None": + run_cmd.append("--dynamic_method") + run_cmd.append(dynamic_method) + run_cmd.append("--dynamic_param") + run_cmd.append(str(dynamic_param)) + + # Check for verbosity + if verbose: + run_cmd.append("--verbose") + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + log.info("Done resizing...") + + +### +# Gradio UI +### + + +def gradio_resize_lora_tab( + headless=False, +): + current_model_dir = os.path.join(scriptdir, "outputs") + current_save_dir = os.path.join(scriptdir, "outputs") + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".ckpt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + with gr.Tab("Resize LoRA"): + gr.Markdown("This utility can resize a LoRA.") + + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + + with gr.Group(), gr.Row(): + model = gr.Dropdown( + label="Source LoRA (path to the LoRA to resize)", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + model, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_lora_a_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_a_model_file.click( + get_file_path, + inputs=[model, lora_ext, lora_ext_name], + outputs=model, + show_progress=False, + ) + save_to = gr.Dropdown( + label="Save to (path for the LoRA file to save...)", + interactive=True, + choices=[""] + list_save_to(current_save_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + save_to, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_save_to = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_save_to.click( + get_saveasfilename_path, + inputs=[save_to, lora_ext, lora_ext_name], + outputs=save_to, + show_progress=False, + ) + model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=model, + outputs=model, + show_progress=False, + ) + save_to.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=save_to, + outputs=save_to, + show_progress=False, + ) + with gr.Row(): + new_rank = gr.Slider( + label="Desired LoRA rank", + minimum=1, + maximum=1024, + step=1, + value=4, + interactive=True, + ) + dynamic_method = gr.Radio( + choices=["None", "sv_ratio", "sv_fro", "sv_cumulative"], + value="sv_fro", + label="Dynamic method", + interactive=True, + ) + dynamic_param = gr.Textbox( + label="Dynamic parameter", + value="0.9", + interactive=True, + placeholder="Value for the dynamic method selected.", + ) + with gr.Row(): + + verbose = gr.Checkbox(label="Verbose logging", value=True) + save_precision = gr.Radio( + label="Save precision", + choices=["fp16", "bf16", "float"], + value="fp16", + interactive=True, + ) + device = gr.Radio( + label="Device", + choices=[ + "cpu", + "cuda", + ], + value="cuda", + interactive=True, + ) + + convert_button = gr.Button("Resize model") + + convert_button.click( + resize_lora, + inputs=[ + model, + new_rank, + save_to, + save_precision, + device, + dynamic_method, + dynamic_param, + verbose, + ], + show_progress=False, + ) diff --git a/kohya_gui/svd_merge_lora_gui.py b/kohya_gui/svd_merge_lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..22f56d61a35ca9936b4e58039aa76360473d2281 --- /dev/null +++ b/kohya_gui/svd_merge_lora_gui.py @@ -0,0 +1,406 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_saveasfilename_path, + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 +PYTHON = sys.executable + + +def svd_merge_lora( + lora_a_model, + lora_b_model, + lora_c_model, + lora_d_model, + ratio_a, + ratio_b, + ratio_c, + ratio_d, + save_to, + precision, + save_precision, + new_rank, + new_conv_rank, + device, +): + # Check if the output file already exists + if os.path.isfile(save_to): + log.info(f"Output file '{save_to}' already exists. Aborting.") + return + + # Check if the ratio total is equal to one. If not normalise to 1 + total_ratio = ratio_a + ratio_b + ratio_c + ratio_d + if total_ratio != 1: + ratio_a /= total_ratio + ratio_b /= total_ratio + ratio_c /= total_ratio + ratio_d /= total_ratio + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/svd_merge_lora.py", + "--save_precision", + save_precision, + "--precision", + precision, + "--save_to", + save_to, + ] + + # Variables for model paths and their ratios + models = [] + ratios = [] + + # Add non-empty models and their ratios to the command + def add_model(model_path, ratio): + if not os.path.isfile(model_path): + log.info(f"The provided model at {model_path} is not a file") + return False + models.append(fr"{model_path}") + ratios.append(str(ratio)) + return True + + if lora_a_model and add_model(lora_a_model, ratio_a): + pass + if lora_b_model and add_model(lora_b_model, ratio_b): + pass + if lora_c_model and add_model(lora_c_model, ratio_c): + pass + if lora_d_model and add_model(lora_d_model, ratio_d): + pass + + if models and ratios: # Ensure we have valid models and ratios before appending + run_cmd.extend(["--models"] + models) + run_cmd.extend(["--ratios"] + ratios) + + run_cmd.extend( + ["--device", device, "--new_rank", str(new_rank), "--new_conv_rank", str(new_conv_rank)] + ) + + # Log the command + log.info(" ".join(run_cmd)) + + env = setup_environment() + + # Run the command + subprocess.run(run_cmd, env=env) + + +### +# Gradio UI +### + + +def gradio_svd_merge_lora_tab(headless=False): + current_save_dir = os.path.join(scriptdir, "outputs") + current_a_model_dir = current_save_dir + current_b_model_dir = current_save_dir + current_c_model_dir = current_save_dir + current_d_model_dir = current_save_dir + + def list_a_models(path): + nonlocal current_a_model_dir + current_a_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_b_models(path): + nonlocal current_b_model_dir + current_b_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_c_models(path): + nonlocal current_c_model_dir + current_c_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_d_models(path): + nonlocal current_d_model_dir + current_d_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + def list_save_to(path): + nonlocal current_save_dir + current_save_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + with gr.Tab("Merge LoRA (SVD)"): + gr.Markdown( + "This utility can merge two LoRA networks together into a new LoRA." + ) + + lora_ext = gr.Textbox(value="*.safetensors *.pt", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + + with gr.Group(), gr.Row(): + lora_a_model = gr.Dropdown( + label='LoRA model "A" (path to the LoRA A model)', + interactive=True, + choices=[""] + list_a_models(current_a_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_a_model, + lambda: None, + lambda: {"choices": list_a_models(current_a_model_dir)}, + "open_folder_small", + ) + button_lora_a_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_a_model_file.click( + get_file_path, + inputs=[lora_a_model, lora_ext, lora_ext_name], + outputs=lora_a_model, + show_progress=False, + ) + + lora_b_model = gr.Dropdown( + label='LoRA model "B" (path to the LoRA B model)', + interactive=True, + choices=[""] + list_b_models(current_b_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_b_model, + lambda: None, + lambda: {"choices": list_b_models(current_b_model_dir)}, + "open_folder_small", + ) + button_lora_b_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_b_model_file.click( + get_file_path, + inputs=[lora_b_model, lora_ext, lora_ext_name], + outputs=lora_b_model, + show_progress=False, + ) + lora_a_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_a_models(path)), + inputs=lora_a_model, + outputs=lora_a_model, + show_progress=False, + ) + lora_b_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_b_models(path)), + inputs=lora_b_model, + outputs=lora_b_model, + show_progress=False, + ) + with gr.Row(): + ratio_a = gr.Slider( + label="Merge ratio model A", + minimum=0, + maximum=1, + step=0.01, + value=0.25, + interactive=True, + ) + ratio_b = gr.Slider( + label="Merge ratio model B", + minimum=0, + maximum=1, + step=0.01, + value=0.25, + interactive=True, + ) + with gr.Group(), gr.Row(): + lora_c_model = gr.Dropdown( + label='LoRA model "C" (path to the LoRA C model)', + interactive=True, + choices=[""] + list_c_models(current_c_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_c_model, + lambda: None, + lambda: {"choices": list_c_models(current_c_model_dir)}, + "open_folder_small", + ) + button_lora_c_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_c_model_file.click( + get_file_path, + inputs=[lora_c_model, lora_ext, lora_ext_name], + outputs=lora_c_model, + show_progress=False, + ) + + lora_d_model = gr.Dropdown( + label='LoRA model "D" (path to the LoRA D model)', + interactive=True, + choices=[""] + list_d_models(current_d_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_d_model, + lambda: None, + lambda: {"choices": list_d_models(current_d_model_dir)}, + "open_folder_small", + ) + button_lora_d_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_d_model_file.click( + get_file_path, + inputs=[lora_d_model, lora_ext, lora_ext_name], + outputs=lora_d_model, + show_progress=False, + ) + + lora_c_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_c_models(path)), + inputs=lora_c_model, + outputs=lora_c_model, + show_progress=False, + ) + lora_d_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_d_models(path)), + inputs=lora_d_model, + outputs=lora_d_model, + show_progress=False, + ) + with gr.Row(): + ratio_c = gr.Slider( + label="Merge ratio model C", + minimum=0, + maximum=1, + step=0.01, + value=0.25, + interactive=True, + ) + ratio_d = gr.Slider( + label="Merge ratio model D", + minimum=0, + maximum=1, + step=0.01, + value=0.25, + interactive=True, + ) + with gr.Row(): + new_rank = gr.Slider( + label="New Rank", + minimum=1, + maximum=1024, + step=1, + value=128, + interactive=True, + ) + new_conv_rank = gr.Slider( + label="New Conv Rank", + minimum=1, + maximum=1024, + step=1, + value=128, + interactive=True, + ) + + with gr.Group(), gr.Row(): + save_to = gr.Dropdown( + label="Save to (path for the new LoRA file to save...)", + interactive=True, + choices=[""] + list_save_to(current_d_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + save_to, + lambda: None, + lambda: {"choices": list_save_to(current_save_dir)}, + "open_folder_small", + ) + button_save_to = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_save_to.click( + get_saveasfilename_path, + inputs=[save_to, lora_ext, lora_ext_name], + outputs=save_to, + show_progress=False, + ) + save_to.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_save_to(path)), + inputs=save_to, + outputs=save_to, + show_progress=False, + ) + with gr.Group(), gr.Row(): + precision = gr.Radio( + label="Merge precision", + choices=["fp16", "bf16", "float"], + value="float", + interactive=True, + ) + save_precision = gr.Radio( + label="Save precision", + choices=["fp16", "bf16", "float"], + value="float", + interactive=True, + ) + device = gr.Radio( + label="Device", + choices=[ + "cpu", + "cuda", + ], + value="cuda", + interactive=True, + ) + + convert_button = gr.Button("Merge model") + + convert_button.click( + svd_merge_lora, + inputs=[ + lora_a_model, + lora_b_model, + lora_c_model, + lora_d_model, + ratio_a, + ratio_b, + ratio_c, + ratio_d, + save_to, + precision, + save_precision, + new_rank, + new_conv_rank, + device, + ], + show_progress=False, + ) diff --git a/kohya_gui/textual_inversion_gui.py b/kohya_gui/textual_inversion_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..0ed976f9a2bfb898a1422407d140345af9fcee04 --- /dev/null +++ b/kohya_gui/textual_inversion_gui.py @@ -0,0 +1,1292 @@ +import gradio as gr +import json +import math +import os +import toml +import time +from datetime import datetime +from .common_gui import ( + check_if_model_exist, + color_aug_changed, + create_refresh_button, + get_executable_path, + get_file_path, + get_saveasfile_path, + list_files, + output_message, + print_command_and_toml, + run_cmd_advanced_training, + SaveConfigFile, + scriptdir, + update_my_data, + validate_file_path, validate_folder_path, validate_model_path, + validate_args_setting, setup_environment, +) +from .class_accelerate_launch import AccelerateLaunch +from .class_configuration_file import ConfigurationFile +from .class_source_model import SourceModel +from .class_basic_training import BasicTraining +from .class_advanced_training import AdvancedTraining +from .class_folders import Folders +from .class_sdxl_parameters import SDXLParameters +from .class_command_executor import CommandExecutor +from .class_huggingface import HuggingFace +from .class_metadata import MetaData +from .class_tensorboard import TensorboardManager +from .dreambooth_folder_creation_gui import ( + gradio_dreambooth_folder_creation_tab, +) +from .dataset_balancing_gui import gradio_dataset_balancing_tab +from .class_sample_images import SampleImages, create_prompt_file +from .class_gui_config import KohyaSSGUIConfig + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +# Setup command executor +executor = None + +# Setup huggingface +huggingface = None +use_shell = False +train_state_value = time.time() + + +def save_configuration( + save_as_bool, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + output_name, + max_token_length, + max_train_epochs, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, + token_string, + init_word, + num_vectors_per_token, + max_train_steps, + weights, + template, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + sdxl_no_half_vae, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + if save_as_bool: + log.info("Save as...") + file_path = get_saveasfile_path(file_path) + else: + log.info("Save...") + if file_path == None or file_path == "": + file_path = get_saveasfile_path(file_path) + + # log.info(file_path) + + if file_path == None or file_path == "": + return original_file_path # In case a file_path was provided and the user decide to cancel the open action + + # Extract the destination directory from the file path + destination_directory = os.path.dirname(file_path) + + # Create the destination directory if it doesn't exist + if not os.path.exists(destination_directory): + os.makedirs(destination_directory) + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as"], + ) + + return file_path + + +def open_configuration( + ask_for_file, + file_path, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + no_token_padding, + stop_text_encoder_training, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + output_name, + max_token_length, + max_train_epochs, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, + token_string, + init_word, + num_vectors_per_token, + max_train_steps, + weights, + template, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + sdxl_no_half_vae, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + + original_file_path = file_path + + if ask_for_file: + file_path = get_file_path(file_path) + + if not file_path == "" and not file_path == None: + # load variables from JSON file + with open(file_path, "r", encoding="utf-8") as f: + my_data = json.load(f) + log.info("Loading config...") + # Update values to fix deprecated use_8bit_adam checkbox and set appropriate optimizer if it is set to True + my_data = update_my_data(my_data) + else: + file_path = original_file_path # In case a file_path was provided and the user decide to cancel the open action + my_data = {} + + values = [file_path] + for key, value in parameters: + # Set the value in the dictionary to the corresponding value in `my_data`, or the default value if not found + if not key in ["ask_for_file", "file_path"]: + values.append(my_data.get(key, value)) + return tuple(values) + + +def train_model( + headless, + print_only, + pretrained_model_name_or_path, + v2, + v_parameterization, + sdxl, + logging_dir, + train_data_dir, + reg_data_dir, + output_dir, + dataset_config, + max_resolution, + learning_rate, + lr_scheduler, + lr_warmup, + train_batch_size, + epoch, + save_every_n_epochs, + mixed_precision, + save_precision, + seed, + num_cpu_threads_per_process, + cache_latents, + cache_latents_to_disk, + caption_extension, + enable_bucket, + gradient_checkpointing, + full_fp16, + no_token_padding, + stop_text_encoder_training_pct, + min_bucket_reso, + max_bucket_reso, + # use_8bit_adam, + xformers, + save_model_as, + shuffle_caption, + save_state, + save_state_on_train_end, + resume, + prior_loss_weight, + color_aug, + flip_aug, + clip_skip, + num_processes, + num_machines, + multi_gpu, + gpu_ids, + main_process_port, + vae, + dynamo_backend, + dynamo_mode, + dynamo_use_fullgraph, + dynamo_use_dynamic, + extra_accelerate_launch_args, + output_name, + max_token_length, + max_train_epochs, + max_data_loader_n_workers, + mem_eff_attn, + gradient_accumulation_steps, + model_list, # Keep this. Yes, it is unused here but required given the common list used + token_string, + init_word, + num_vectors_per_token, + max_train_steps, + weights, + template, + keep_tokens, + lr_scheduler_num_cycles, + lr_scheduler_power, + persistent_data_loader_workers, + bucket_no_upscale, + random_crop, + bucket_reso_steps, + v_pred_like_loss, + caption_dropout_every_n_epochs, + caption_dropout_rate, + optimizer, + optimizer_args, + lr_scheduler_args, + noise_offset_type, + noise_offset, + noise_offset_random_strength, + adaptive_noise_scale, + multires_noise_iterations, + multires_noise_discount, + ip_noise_gamma, + ip_noise_gamma_random_strength, + sample_every_n_steps, + sample_every_n_epochs, + sample_sampler, + sample_prompts, + additional_parameters, + loss_type, + huber_schedule, + huber_c, + vae_batch_size, + min_snr_gamma, + save_every_n_steps, + save_last_n_steps, + save_last_n_steps_state, + log_with, + wandb_api_key, + wandb_run_name, + log_tracker_name, + log_tracker_config, + scale_v_pred_loss_like_noise_pred, + min_timestep, + max_timestep, + sdxl_no_half_vae, + huggingface_repo_id, + huggingface_token, + huggingface_repo_type, + huggingface_repo_visibility, + huggingface_path_in_repo, + save_state_to_huggingface, + resume_from_huggingface, + async_upload, + metadata_author, + metadata_description, + metadata_license, + metadata_tags, + metadata_title, +): + # Get list of function parameters and values + parameters = list(locals().items()) + global train_state_value + + TRAIN_BUTTON_VISIBLE = [ + gr.Button(visible=True), + gr.Button(visible=False or headless), + gr.Textbox(value=train_state_value), + ] + + if executor.is_running(): + log.error("Training is already running. Can't start another training session.") + return TRAIN_BUTTON_VISIBLE + + log.info(f"Start training TI...") + + log.info(f"Validating lr scheduler arguments...") + if not validate_args_setting(lr_scheduler_args): + return + + log.info(f"Validating optimizer arguments...") + if not validate_args_setting(optimizer_args): + return + + # + # Validate paths + # + + if not validate_file_path(dataset_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(log_tracker_config): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(logging_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(output_dir, can_be_written_to=True, create_if_not_exists=True): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(pretrained_model_name_or_path): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(reg_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_file_path(resume): + return TRAIN_BUTTON_VISIBLE + + if not validate_folder_path(train_data_dir): + return TRAIN_BUTTON_VISIBLE + + if not validate_model_path(vae): + return TRAIN_BUTTON_VISIBLE + + # + # End of path validation + # + + # if not validate_paths( + # dataset_config=dataset_config, + # headless=headless, + # log_tracker_config=log_tracker_config, + # logging_dir=logging_dir, + # output_dir=output_dir, + # pretrained_model_name_or_path=pretrained_model_name_or_path, + # reg_data_dir=reg_data_dir, + # resume=resume, + # train_data_dir=train_data_dir, + # vae=vae, + # ): + # return TRAIN_BUTTON_VISIBLE + + if token_string == "": + output_message(msg="Token string is missing", headless=headless) + return TRAIN_BUTTON_VISIBLE + + if init_word == "": + output_message(msg="Init word is missing", headless=headless) + return TRAIN_BUTTON_VISIBLE + + if not print_only and check_if_model_exist( + output_name, output_dir, save_model_as, headless + ): + return TRAIN_BUTTON_VISIBLE + + if dataset_config: + log.info( + "Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations..." + ) + if max_train_steps > 0: + # calculate stop encoder training + if stop_text_encoder_training_pct == 0: + stop_text_encoder_training = 0 + else: + stop_text_encoder_training = math.ceil( + float(max_train_steps) / 100 * int(stop_text_encoder_training_pct) + ) + + if lr_warmup != 0: + lr_warmup_steps = round( + float(int(lr_warmup) * int(max_train_steps) / 100) + ) + else: + lr_warmup_steps = 0 + else: + stop_text_encoder_training = 0 + lr_warmup_steps = 0 + + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + else: + if train_data_dir == "": + log.error("Train data dir is empty") + return TRAIN_BUTTON_VISIBLE + + # Get a list of all subfolders in train_data_dir + subfolders = [ + f + for f in os.listdir(train_data_dir) + if os.path.isdir(os.path.join(train_data_dir, f)) + ] + + total_steps = 0 + + # Loop through each subfolder and extract the number of repeats + for folder in subfolders: + try: + # Extract the number of repeats from the folder name + repeats = int(folder.split("_")[0]) + log.info(f"Folder {folder}: {repeats} repeats found") + + # Count the number of images in the folder + num_images = len( + [ + f + for f, lower_f in ( + (file, file.lower()) + for file in os.listdir(os.path.join(train_data_dir, folder)) + ) + if lower_f.endswith((".jpg", ".jpeg", ".png", ".webp")) + ] + ) + + log.info(f"Folder {folder}: {num_images} images found") + + # Calculate the total number of steps for this folder + steps = repeats * num_images + + # log.info the result + log.info(f"Folder {folder}: {num_images} * {repeats} = {steps} steps") + + total_steps += steps + + except ValueError: + # Handle the case where the folder name does not contain an underscore + log.info( + f"Error: '{folder}' does not contain an underscore, skipping..." + ) + + if reg_data_dir == "": + reg_factor = 1 + else: + log.warning( + "Regularisation images are used... Will double the number of steps required..." + ) + reg_factor = 2 + + log.info(f"Regulatization factor: {reg_factor}") + + if max_train_steps == 0: + # calculate max_train_steps + max_train_steps = int( + math.ceil( + float(total_steps) + / int(train_batch_size) + / int(gradient_accumulation_steps) + * int(epoch) + * int(reg_factor) + ) + ) + max_train_steps_info = f"max_train_steps ({total_steps} / {train_batch_size} / {gradient_accumulation_steps} * {epoch} * {reg_factor}) = {max_train_steps}" + else: + if max_train_steps == 0: + max_train_steps_info = f"Max train steps: 0. sd-scripts will therefore default to 1600. Please specify a different value if required." + else: + max_train_steps_info = f"Max train steps: {max_train_steps}" + + # calculate stop encoder training + if stop_text_encoder_training_pct == 0: + stop_text_encoder_training = 0 + else: + stop_text_encoder_training = math.ceil( + float(max_train_steps) / 100 * int(stop_text_encoder_training_pct) + ) + + if lr_warmup != 0: + lr_warmup_steps = round(float(int(lr_warmup) * int(max_train_steps) / 100)) + else: + lr_warmup_steps = 0 + + log.info(f"Total steps: {total_steps}") + + log.info(f"Train batch size: {train_batch_size}") + log.info(f"Gradient accumulation steps: {gradient_accumulation_steps}") + log.info(f"Epoch: {epoch}") + log.info(max_train_steps_info) + log.info(f"stop_text_encoder_training = {stop_text_encoder_training}") + log.info(f"lr_warmup_steps = {lr_warmup_steps}") + + accelerate_path = get_executable_path("accelerate") + if accelerate_path == "": + log.error("accelerate not found") + return TRAIN_BUTTON_VISIBLE + + run_cmd = [rf'{accelerate_path}', "launch"] + + run_cmd = AccelerateLaunch.run_cmd( + run_cmd=run_cmd, + dynamo_backend=dynamo_backend, + dynamo_mode=dynamo_mode, + dynamo_use_fullgraph=dynamo_use_fullgraph, + dynamo_use_dynamic=dynamo_use_dynamic, + num_processes=num_processes, + num_machines=num_machines, + multi_gpu=multi_gpu, + gpu_ids=gpu_ids, + main_process_port=main_process_port, + num_cpu_threads_per_process=num_cpu_threads_per_process, + mixed_precision=mixed_precision, + extra_accelerate_launch_args=extra_accelerate_launch_args, + ) + + if sdxl: + run_cmd.append(rf"{scriptdir}/sd-scripts/sdxl_train_textual_inversion.py") + else: + run_cmd.append(rf"{scriptdir}/sd-scripts/train_textual_inversion.py") + + if max_data_loader_n_workers == "" or None: + max_data_loader_n_workers = 0 + else: + max_data_loader_n_workers = int(max_data_loader_n_workers) + + if max_train_steps == "" or None: + max_train_steps = 0 + else: + max_train_steps = int(max_train_steps) + + # def save_huggingface_to_toml(self, toml_file_path: str): + config_toml_data = { + # Update the values in the TOML data + "adaptive_noise_scale": ( + adaptive_noise_scale if adaptive_noise_scale != 0 else None + ), + "async_upload": async_upload, + "bucket_no_upscale": bucket_no_upscale, + "bucket_reso_steps": bucket_reso_steps, + "cache_latents": cache_latents, + "cache_latents_to_disk": cache_latents_to_disk, + "caption_dropout_every_n_epochs": int(caption_dropout_every_n_epochs), + "caption_extension": caption_extension, + "clip_skip": clip_skip if clip_skip != 0 else None, + "color_aug": color_aug, + "dataset_config": dataset_config, + "dynamo_backend": dynamo_backend, + "enable_bucket": enable_bucket, + "epoch": int(epoch), + "flip_aug": flip_aug, + "full_fp16": full_fp16, + "gradient_accumulation_steps": int(gradient_accumulation_steps), + "gradient_checkpointing": gradient_checkpointing, + "huber_c": huber_c, + "huber_schedule": huber_schedule, + "huggingface_repo_id": huggingface_repo_id, + "huggingface_token": huggingface_token, + "huggingface_repo_type": huggingface_repo_type, + "huggingface_repo_visibility": huggingface_repo_visibility, + "huggingface_path_in_repo": huggingface_path_in_repo, + "init_word": init_word, + "ip_noise_gamma": ip_noise_gamma if ip_noise_gamma != 0 else None, + "ip_noise_gamma_random_strength": ip_noise_gamma_random_strength, + "keep_tokens": int(keep_tokens), + "learning_rate": learning_rate, + "logging_dir": logging_dir, + "log_tracker_name": log_tracker_name, + "log_tracker_config": log_tracker_config, + "loss_type": loss_type, + "lr_scheduler": lr_scheduler, + "lr_scheduler_args": str(lr_scheduler_args).replace('"', "").split(), + "lr_scheduler_num_cycles": ( + int(lr_scheduler_num_cycles) if lr_scheduler_num_cycles != "" else int(epoch) + ), + "lr_scheduler_power": lr_scheduler_power, + "lr_warmup_steps": lr_warmup_steps, + "max_bucket_reso": max_bucket_reso, + "max_timestep": max_timestep if max_timestep != 0 else None, + "max_token_length": int(max_token_length), + "max_train_epochs": int(max_train_epochs) if int(max_train_epochs) != 0 else None, + "max_train_steps": int(max_train_steps) if int(max_train_steps) != 0 else None, + "mem_eff_attn": mem_eff_attn, + "metadata_author": metadata_author, + "metadata_description": metadata_description, + "metadata_license": metadata_license, + "metadata_tags": metadata_tags, + "metadata_title": metadata_title, + "min_bucket_reso": int(min_bucket_reso), + "min_snr_gamma": min_snr_gamma if min_snr_gamma != 0 else None, + "min_timestep": min_timestep if min_timestep != 0 else None, + "mixed_precision": mixed_precision, + "multires_noise_discount": multires_noise_discount, + "multires_noise_iterations": ( + multires_noise_iterations if multires_noise_iterations != 0 else None + ), + "no_half_vae": sdxl_no_half_vae, + "no_token_padding": no_token_padding, + "noise_offset": noise_offset if noise_offset != 0 else None, + "noise_offset_random_strength": noise_offset_random_strength, + "noise_offset_type": noise_offset_type, + "num_vectors_per_token": int(num_vectors_per_token), + "optimizer_type": optimizer, + "optimizer_args": str(optimizer_args).replace('"', "").split(), + "output_dir": output_dir, + "output_name": output_name, + "persistent_data_loader_workers": int(persistent_data_loader_workers), + "pretrained_model_name_or_path": pretrained_model_name_or_path, + "prior_loss_weight": prior_loss_weight, + "random_crop": random_crop, + "reg_data_dir": reg_data_dir, + "resolution": max_resolution, + "resume": resume, + "resume_from_huggingface": resume_from_huggingface, + "sample_every_n_epochs": ( + sample_every_n_epochs if sample_every_n_epochs != 0 else None + ), + "sample_every_n_steps": ( + sample_every_n_steps if sample_every_n_steps != 0 else None + ), + "sample_prompts": create_prompt_file(sample_prompts, output_dir), + "sample_sampler": sample_sampler, + "save_every_n_epochs": ( + save_every_n_epochs if save_every_n_epochs != 0 else None + ), + "save_every_n_steps": save_every_n_steps if save_every_n_steps != 0 else None, + "save_last_n_steps": save_last_n_steps if save_last_n_steps != 0 else None, + "save_last_n_steps_state": ( + save_last_n_steps_state if save_last_n_steps_state != 0 else None + ), + "save_model_as": save_model_as, + "save_precision": save_precision, + "save_state": save_state, + "save_state_on_train_end": save_state_on_train_end, + "save_state_to_huggingface": save_state_to_huggingface, + "scale_v_pred_loss_like_noise_pred": scale_v_pred_loss_like_noise_pred, + "sdpa": True if xformers == "sdpa" else None, + "seed": int(seed) if int(seed) != 0 else None, + "shuffle_caption": shuffle_caption, + "stop_text_encoder_training": ( + stop_text_encoder_training if stop_text_encoder_training != 0 else None + ), + "token_string": token_string, + "train_batch_size": train_batch_size, + "train_data_dir": train_data_dir, + "log_with": log_with, + "v2": v2, + "v_parameterization": v_parameterization, + "v_pred_like_loss": v_pred_like_loss if v_pred_like_loss != 0 else None, + "vae": vae, + "vae_batch_size": vae_batch_size if vae_batch_size != 0 else None, + "wandb_api_key": wandb_api_key, + "wandb_run_name": wandb_run_name, + "weigts": weights, + "use_object_template": True if template == "object template" else None, + "use_style_template": True if template == "style template" else None, + "xformers": True if xformers == "xformers" else None, + } + + # Given dictionary `config_toml_data` + # Remove all values = "" + config_toml_data = { + key: value + for key, value in config_toml_data.items() + if value not in ["", False, None] + } + + config_toml_data["max_data_loader_n_workers"] = int(max_data_loader_n_workers) + + # Sort the dictionary by keys + config_toml_data = dict(sorted(config_toml_data.items())) + + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + tmpfilename = fr"{output_dir}/config_textual_inversion-{formatted_datetime}.toml" + + # Save the updated TOML data back to the file + with open(tmpfilename, "w", encoding="utf-8") as toml_file: + toml.dump(config_toml_data, toml_file) + + if not os.path.exists(toml_file.name): + log.error(f"Failed to write TOML file: {toml_file.name}") + + run_cmd.append("--config_file") + run_cmd.append(rf"{tmpfilename}") + + # Initialize a dictionary with always-included keyword arguments + kwargs_for_training = { + "additional_parameters": additional_parameters, + } + + # Pass the dynamically constructed keyword arguments to the function + run_cmd = run_cmd_advanced_training(run_cmd=run_cmd, **kwargs_for_training) + + if print_only: + print_command_and_toml(run_cmd, tmpfilename) + else: + # Saving config file for model + current_datetime = datetime.now() + formatted_datetime = current_datetime.strftime("%Y%m%d-%H%M%S") + # config_dir = os.path.dirname(os.path.dirname(train_data_dir)) + file_path = os.path.join(output_dir, f"{output_name}_{formatted_datetime}.json") + + log.info(f"Saving training config to {file_path}...") + + SaveConfigFile( + parameters=parameters, + file_path=file_path, + exclusion=["file_path", "save_as", "headless", "print_only"], + ) + + env = setup_environment() + + # Run the command + + executor.execute_command(run_cmd=run_cmd, env=env) + + train_state_value = time.time() + + return ( + gr.Button(visible=False or headless), + gr.Button(visible=True), + gr.Textbox(value=train_state_value), + ) + + +def ti_tab( + headless=False, + default_output_dir=None, + config: KohyaSSGUIConfig = {}, + use_shell_flag: bool = False, +): + dummy_db_true = gr.Checkbox(value=True, visible=False) + dummy_db_false = gr.Checkbox(value=False, visible=False) + dummy_headless = gr.Checkbox(value=headless, visible=False) + + global use_shell + use_shell = use_shell_flag + + current_embedding_dir = ( + default_output_dir + if default_output_dir is not None and default_output_dir != "" + else os.path.join(scriptdir, "outputs") + ) + + with gr.Tab("Training"), gr.Column(variant="compact"): + gr.Markdown("Train a TI using kohya textual inversion python code...") + + # Setup Configuration Files Gradio + with gr.Accordion("Configuration", open=False): + configuration = ConfigurationFile(headless=headless, config=config) + + with gr.Accordion("Accelerate launch", open=False), gr.Column(): + accelerate_launch = AccelerateLaunch(config=config) + + with gr.Column(): + source_model = SourceModel( + save_model_as_choices=[ + "ckpt", + "safetensors", + ], + headless=headless, + config=config, + ) + + with gr.Accordion("Folders", open=False), gr.Group(): + folders = Folders(headless=headless, config=config) + + with gr.Accordion("Metadata", open=False), gr.Group(): + metadata = MetaData(config=config) + + with gr.Accordion("Dataset Preparation", open=False): + gr.Markdown( + "This section provide Dreambooth tools to help setup your dataset..." + ) + gradio_dreambooth_folder_creation_tab( + train_data_dir_input=source_model.train_data_dir, + reg_data_dir_input=folders.reg_data_dir, + output_dir_input=folders.output_dir, + logging_dir_input=folders.logging_dir, + headless=headless, + config=config, + ) + + gradio_dataset_balancing_tab(headless=headless) + + with gr.Accordion("Parameters", open=False), gr.Column(): + with gr.Accordion("Basic", open="True"): + with gr.Group(elem_id="basic_tab"): + with gr.Row(): + + def list_embedding_files(path): + nonlocal current_embedding_dir + current_embedding_dir = path + return list( + list_files( + path, + exts=[".pt", ".ckpt", ".safetensors"], + all=True, + ) + ) + + weights = gr.Dropdown( + label="Resume TI training (Optional. Path to existing TI embedding file to keep training)", + choices=[""] + list_embedding_files(current_embedding_dir), + value="", + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + weights, + lambda: None, + lambda: { + "choices": list_embedding_files(current_embedding_dir) + }, + "open_folder_small", + ) + weights_file_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + weights_file_input.click( + get_file_path, + outputs=weights, + show_progress=False, + ) + weights.change( + fn=lambda path: gr.Dropdown( + choices=[""] + list_embedding_files(path) + ), + inputs=weights, + outputs=weights, + show_progress=False, + ) + + with gr.Row(): + token_string = gr.Textbox( + label="Token string", + placeholder="eg: cat", + ) + init_word = gr.Textbox( + label="Init word", + value="*", + ) + num_vectors_per_token = gr.Slider( + minimum=1, + maximum=75, + value=1, + step=1, + label="Vectors", + ) + # max_train_steps = gr.Textbox( + # label='Max train steps', + # placeholder='(Optional) Maximum number of steps', + # ) + template = gr.Dropdown( + label="Template", + choices=[ + "caption", + "object template", + "style template", + ], + value="caption", + ) + basic_training = BasicTraining( + learning_rate_value=1e-5, + lr_scheduler_value="cosine", + lr_warmup_value=10, + sdxl_checkbox=source_model.sdxl_checkbox, + config=config, + ) + + # Add SDXL Parameters + sdxl_params = SDXLParameters( + source_model.sdxl_checkbox, + show_sdxl_cache_text_encoder_outputs=False, + config=config, + ) + + with gr.Accordion("Advanced", open=False, elem_id="advanced_tab"): + advanced_training = AdvancedTraining(headless=headless, config=config) + advanced_training.color_aug.change( + color_aug_changed, + inputs=[advanced_training.color_aug], + outputs=[basic_training.cache_latents], + ) + + with gr.Accordion("Samples", open=False, elem_id="samples_tab"): + sample = SampleImages(config=config) + + global huggingface + with gr.Accordion("HuggingFace", open=False): + huggingface = HuggingFace(config=config) + + global executor + executor = CommandExecutor(headless=headless) + + with gr.Column(), gr.Group(): + with gr.Row(): + button_print = gr.Button("Print training command") + + # Setup gradio tensorboard buttons + TensorboardManager(headless=headless, logging_dir=folders.logging_dir) + + settings_list = [ + source_model.pretrained_model_name_or_path, + source_model.v2, + source_model.v_parameterization, + source_model.sdxl_checkbox, + folders.logging_dir, + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + source_model.dataset_config, + basic_training.max_resolution, + basic_training.learning_rate, + basic_training.lr_scheduler, + basic_training.lr_warmup, + basic_training.train_batch_size, + basic_training.epoch, + basic_training.save_every_n_epochs, + accelerate_launch.mixed_precision, + source_model.save_precision, + basic_training.seed, + accelerate_launch.num_cpu_threads_per_process, + basic_training.cache_latents, + basic_training.cache_latents_to_disk, + basic_training.caption_extension, + basic_training.enable_bucket, + advanced_training.gradient_checkpointing, + advanced_training.full_fp16, + advanced_training.no_token_padding, + basic_training.stop_text_encoder_training, + basic_training.min_bucket_reso, + basic_training.max_bucket_reso, + advanced_training.xformers, + source_model.save_model_as, + advanced_training.shuffle_caption, + advanced_training.save_state, + advanced_training.save_state_on_train_end, + advanced_training.resume, + advanced_training.prior_loss_weight, + advanced_training.color_aug, + advanced_training.flip_aug, + advanced_training.clip_skip, + accelerate_launch.num_processes, + accelerate_launch.num_machines, + accelerate_launch.multi_gpu, + accelerate_launch.gpu_ids, + accelerate_launch.main_process_port, + advanced_training.vae, + accelerate_launch.dynamo_backend, + accelerate_launch.dynamo_mode, + accelerate_launch.dynamo_use_fullgraph, + accelerate_launch.dynamo_use_dynamic, + accelerate_launch.extra_accelerate_launch_args, + source_model.output_name, + advanced_training.max_token_length, + basic_training.max_train_epochs, + advanced_training.max_data_loader_n_workers, + advanced_training.mem_eff_attn, + advanced_training.gradient_accumulation_steps, + source_model.model_list, + token_string, + init_word, + num_vectors_per_token, + basic_training.max_train_steps, + weights, + template, + advanced_training.keep_tokens, + basic_training.lr_scheduler_num_cycles, + basic_training.lr_scheduler_power, + advanced_training.persistent_data_loader_workers, + advanced_training.bucket_no_upscale, + advanced_training.random_crop, + advanced_training.bucket_reso_steps, + advanced_training.v_pred_like_loss, + advanced_training.caption_dropout_every_n_epochs, + advanced_training.caption_dropout_rate, + basic_training.optimizer, + basic_training.optimizer_args, + basic_training.lr_scheduler_args, + advanced_training.noise_offset_type, + advanced_training.noise_offset, + advanced_training.noise_offset_random_strength, + advanced_training.adaptive_noise_scale, + advanced_training.multires_noise_iterations, + advanced_training.multires_noise_discount, + advanced_training.ip_noise_gamma, + advanced_training.ip_noise_gamma_random_strength, + sample.sample_every_n_steps, + sample.sample_every_n_epochs, + sample.sample_sampler, + sample.sample_prompts, + advanced_training.additional_parameters, + advanced_training.loss_type, + advanced_training.huber_schedule, + advanced_training.huber_c, + advanced_training.vae_batch_size, + advanced_training.min_snr_gamma, + advanced_training.save_every_n_steps, + advanced_training.save_last_n_steps, + advanced_training.save_last_n_steps_state, + advanced_training.log_with, + advanced_training.wandb_api_key, + advanced_training.wandb_run_name, + advanced_training.log_tracker_name, + advanced_training.log_tracker_config, + advanced_training.scale_v_pred_loss_like_noise_pred, + advanced_training.min_timestep, + advanced_training.max_timestep, + sdxl_params.sdxl_no_half_vae, + huggingface.huggingface_repo_id, + huggingface.huggingface_token, + huggingface.huggingface_repo_type, + huggingface.huggingface_repo_visibility, + huggingface.huggingface_path_in_repo, + huggingface.save_state_to_huggingface, + huggingface.resume_from_huggingface, + huggingface.async_upload, + metadata.metadata_author, + metadata.metadata_description, + metadata.metadata_license, + metadata.metadata_tags, + metadata.metadata_title, + ] + + configuration.button_open_config.click( + open_configuration, + inputs=[dummy_db_true, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name] + settings_list, + show_progress=False, + ) + + configuration.button_load_config.click( + open_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name] + settings_list, + show_progress=False, + ) + + configuration.button_save_config.click( + save_configuration, + inputs=[dummy_db_false, configuration.config_file_name] + settings_list, + outputs=[configuration.config_file_name], + show_progress=False, + ) + + run_state = gr.Textbox(value=train_state_value, visible=False) + + run_state.change( + fn=executor.wait_for_training_to_end, + outputs=[executor.button_run, executor.button_stop_training], + ) + + executor.button_run.click( + train_model, + inputs=[dummy_headless] + [dummy_db_false] + settings_list, + outputs=[executor.button_run, executor.button_stop_training, run_state], + show_progress=False, + ) + + executor.button_stop_training.click( + executor.kill_command, outputs=[executor.button_run, executor.button_stop_training] + ) + + button_print.click( + train_model, + inputs=[dummy_headless] + [dummy_db_true] + settings_list, + show_progress=False, + ) + + return ( + source_model.train_data_dir, + folders.reg_data_dir, + folders.output_dir, + folders.logging_dir, + ) diff --git a/kohya_gui/utilities.py b/kohya_gui/utilities.py new file mode 100644 index 0000000000000000000000000000000000000000..d8c8261c730518e6679e429ebd76becebd8efecb --- /dev/null +++ b/kohya_gui/utilities.py @@ -0,0 +1,37 @@ +import gradio as gr + +from .basic_caption_gui import gradio_basic_caption_gui_tab +from .convert_model_gui import gradio_convert_model_tab +from .blip_caption_gui import gradio_blip_caption_gui_tab +from .blip2_caption_gui import gradio_blip2_caption_gui_tab +from .git_caption_gui import gradio_git_caption_gui_tab +from .wd14_caption_gui import gradio_wd14_caption_gui_tab +from .manual_caption_gui import gradio_manual_caption_gui_tab +from .group_images_gui import gradio_group_images_gui_tab +from .class_gui_config import KohyaSSGUIConfig + + +def utilities_tab( + train_data_dir_input=gr.Dropdown(), + reg_data_dir_input=gr.Dropdown(), + output_dir_input=gr.Dropdown(), + logging_dir_input=gr.Dropdown(), + headless=False, + config: KohyaSSGUIConfig = {}, +): + with gr.Tab("Captioning"): + gradio_basic_caption_gui_tab(headless=headless) + gradio_blip_caption_gui_tab(headless=headless) + gradio_blip2_caption_gui_tab(headless=headless) + gradio_git_caption_gui_tab(headless=headless) + gradio_wd14_caption_gui_tab(headless=headless, config=config) + gradio_manual_caption_gui_tab(headless=headless) + gradio_convert_model_tab(headless=headless) + gradio_group_images_gui_tab(headless=headless) + + return ( + train_data_dir_input, + reg_data_dir_input, + output_dir_input, + logging_dir_input, + ) diff --git a/kohya_gui/verify_lora_gui.py b/kohya_gui/verify_lora_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..45b5e2219ac45cda33375115c9565058d91e85e5 --- /dev/null +++ b/kohya_gui/verify_lora_gui.py @@ -0,0 +1,142 @@ +import gradio as gr +import subprocess +import os +import sys +from .common_gui import ( + get_file_path, + scriptdir, + list_files, + create_refresh_button, setup_environment +) + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +folder_symbol = "\U0001f4c2" # 📂 +refresh_symbol = "\U0001f504" # 🔄 +save_style_symbol = "\U0001f4be" # 💾 +document_symbol = "\U0001F4C4" # 📄 +PYTHON = sys.executable + + +def verify_lora( + lora_model, +): + # verify for caption_text_input + if lora_model == "": + log.info("Invalid model A file") + return + + # verify if source model exist + if not os.path.isfile(lora_model): + log.info("The provided model A is not a file") + return + + run_cmd = [ + rf"{PYTHON}", + rf"{scriptdir}/sd-scripts/networks/check_lora_weights.py", + rf"{lora_model}", + ] + # run_cmd = rf'"{PYTHON}" "{scriptdir}/sd-scripts/networks/check_lora_weights.py" "{lora_model}"' + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Set the environment variable for the Python path + env = setup_environment() + + # Run the command using subprocess.Popen for asynchronous handling + process = subprocess.Popen( + run_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env, + ) + output, error = process.communicate() + + return (output.decode(), error.decode()) + + +### +# Gradio UI +### + + +def gradio_verify_lora_tab(headless=False): + current_model_dir = os.path.join(scriptdir, "outputs") + + def list_models(path): + nonlocal current_model_dir + current_model_dir = path + return list(list_files(path, exts=[".pt", ".safetensors"], all=True)) + + with gr.Tab("Verify LoRA"): + gr.Markdown( + "This utility can verify a LoRA network to make sure it is properly trained." + ) + + lora_ext = gr.Textbox(value="*.pt *.safetensors", visible=False) + lora_ext_name = gr.Textbox(value="LoRA model types", visible=False) + + with gr.Group(), gr.Row(): + lora_model = gr.Dropdown( + label="LoRA model (path to the LoRA model to verify)", + interactive=True, + choices=[""] + list_models(current_model_dir), + value="", + allow_custom_value=True, + ) + create_refresh_button( + lora_model, + lambda: None, + lambda: {"choices": list_models(current_model_dir)}, + "open_folder_small", + ) + button_lora_model_file = gr.Button( + folder_symbol, + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_lora_model_file.click( + get_file_path, + inputs=[lora_model, lora_ext, lora_ext_name], + outputs=lora_model, + show_progress=False, + ) + verify_button = gr.Button("Verify", variant="primary") + + lora_model.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_models(path)), + inputs=lora_model, + outputs=lora_model, + show_progress=False, + ) + + lora_model_verif_output = gr.Textbox( + label="Output", + placeholder="Verification output", + interactive=False, + lines=1, + max_lines=10, + ) + + lora_model_verif_error = gr.Textbox( + label="Error", + placeholder="Verification error", + interactive=False, + lines=1, + max_lines=10, + ) + + verify_button.click( + verify_lora, + inputs=[ + lora_model, + ], + outputs=[lora_model_verif_output, lora_model_verif_error], + show_progress=False, + ) diff --git a/kohya_gui/wd14_caption_gui.py b/kohya_gui/wd14_caption_gui.py new file mode 100644 index 0000000000000000000000000000000000000000..2fdad11244b7f856c5a178f26f1f663b757d25de --- /dev/null +++ b/kohya_gui/wd14_caption_gui.py @@ -0,0 +1,408 @@ +import gradio as gr +import subprocess +from .common_gui import ( + get_folder_path, + add_pre_postfix, + scriptdir, + list_dirs, + get_executable_path, setup_environment, +) +from .class_gui_config import KohyaSSGUIConfig +import os + +from .custom_logging import setup_logging + +# Set up logging +log = setup_logging() +old_onnx_value = True + + +def caption_images( + train_data_dir: str, + caption_extension: str, + batch_size: int, + general_threshold: float, + character_threshold: float, + repo_id: str, + recursive: bool, + max_data_loader_n_workers: int, + debug: bool, + undesired_tags: str, + frequency_tags: bool, + always_first_tags: str, + onnx: bool, + append_tags: bool, + force_download: bool, + caption_separator: str, + tag_replacement: bool, + character_tag_expand: str, + use_rating_tags: bool, + use_rating_tags_as_last_tag: bool, + remove_underscore: bool, + thresh: float, +) -> None: + # Check for images_dir_input + if train_data_dir == "": + log.info("Image folder is missing...") + return + + if caption_extension == "": + log.info("Please provide an extension for the caption files.") + return + + repo_id_converted = repo_id.replace("/", "_") + if not os.path.exists(f"./wd14_tagger_model/{repo_id_converted}"): + force_download = True + + log.info(f"Captioning files in {train_data_dir}...") + run_cmd = [ + rf'{get_executable_path("accelerate")}', + "launch", + rf"{scriptdir}/sd-scripts/finetune/tag_images_by_wd14_tagger.py", + ] + + # Uncomment and modify if needed + # if always_first_tags != "": + # run_cmd.append('--always_first_tags') + # run_cmd.append(always_first_tags) + + if append_tags: + run_cmd.append("--append_tags") + run_cmd.append("--batch_size") + run_cmd.append(str(int(batch_size))) + run_cmd.append("--caption_extension") + run_cmd.append(caption_extension) + run_cmd.append("--caption_separator") + run_cmd.append(caption_separator) + + if character_tag_expand: + run_cmd.append("--character_tag_expand") + if not character_threshold == 0.35: + run_cmd.append("--character_threshold") + run_cmd.append(str(character_threshold)) + if debug: + run_cmd.append("--debug") + if force_download: + run_cmd.append("--force_download") + if frequency_tags: + run_cmd.append("--frequency_tags") + if not general_threshold == 0.35: + run_cmd.append("--general_threshold") + run_cmd.append(str(general_threshold)) + run_cmd.append("--max_data_loader_n_workers") + run_cmd.append(str(int(max_data_loader_n_workers))) + + if onnx: + run_cmd.append("--onnx") + if recursive: + run_cmd.append("--recursive") + if remove_underscore: + run_cmd.append("--remove_underscore") + run_cmd.append("--repo_id") + run_cmd.append(repo_id) + if not tag_replacement == "": + run_cmd.append("--tag_replacement") + run_cmd.append(tag_replacement) + if not thresh == 0.35: + run_cmd.append("--thresh") + run_cmd.append(str(thresh)) + if not undesired_tags == "": + run_cmd.append("--undesired_tags") + run_cmd.append(undesired_tags) + if use_rating_tags: + run_cmd.append("--use_rating_tags") + if use_rating_tags_as_last_tag: + run_cmd.append("--use_rating_tags_as_last_tag") + + # Add the directory containing the training data + run_cmd.append(rf"{train_data_dir}") + + env = setup_environment() + + # Reconstruct the safe command string for display + command_to_run = " ".join(run_cmd) + log.info(f"Executing command: {command_to_run}") + + # Run the command in the sd-scripts folder context + subprocess.run(run_cmd, env=env) + + # Add prefix and postfix + add_pre_postfix( + folder=train_data_dir, + caption_file_ext=caption_extension, + prefix=always_first_tags, + recursive=recursive, + ) + + log.info("...captioning done") + + +### +# Gradio UI +### + + +def gradio_wd14_caption_gui_tab( + headless=False, + default_train_dir=None, + config: KohyaSSGUIConfig = {}, +): + from .common_gui import create_refresh_button + + default_train_dir = ( + default_train_dir + if default_train_dir is not None + else os.path.join(scriptdir, "data") + ) + current_train_dir = default_train_dir + + def list_train_dirs(path): + nonlocal current_train_dir + current_train_dir = path + return list(list_dirs(path)) + + with gr.Tab("WD14 Captioning"): + gr.Markdown( + "This utility will use WD14 to caption files for each images in a folder." + ) + + # Input Settings + # with gr.Section('Input Settings'): + with gr.Group(), gr.Row(): + train_data_dir = gr.Dropdown( + label="Image folder to caption (containing the images to caption)", + choices=[config.get("wd14_caption.train_data_dir", "")] + + list_train_dirs(default_train_dir), + value=config.get("wd14_caption.train_data_dir", ""), + interactive=True, + allow_custom_value=True, + ) + create_refresh_button( + train_data_dir, + lambda: None, + lambda: {"choices": list_train_dirs(current_train_dir)}, + "open_folder_small", + ) + button_train_data_dir_input = gr.Button( + "📂", + elem_id="open_folder_small", + elem_classes=["tool"], + visible=(not headless), + ) + button_train_data_dir_input.click( + get_folder_path, + outputs=train_data_dir, + show_progress=False, + ) + + repo_id = gr.Dropdown( + label="Repo ID", + choices=[ + "SmilingWolf/wd-v1-4-convnext-tagger-v2", + "SmilingWolf/wd-v1-4-convnextv2-tagger-v2", + "SmilingWolf/wd-v1-4-vit-tagger-v2", + "SmilingWolf/wd-v1-4-swinv2-tagger-v2", + "SmilingWolf/wd-v1-4-moat-tagger-v2", + "SmilingWolf/wd-swinv2-tagger-v3", + "SmilingWolf/wd-vit-tagger-v3", + "SmilingWolf/wd-convnext-tagger-v3", + ], + value=config.get( + "wd14_caption.repo_id", "SmilingWolf/wd-v1-4-convnextv2-tagger-v2" + ), + show_label="Repo id for wd14 tagger on Hugging Face", + ) + + force_download = gr.Checkbox( + label="Force model re-download", + value=config.get("wd14_caption.force_download", False), + info="Useful to force model re download when switching to onnx", + ) + + with gr.Row(): + + caption_extension = gr.Dropdown( + label="Caption file extension", + choices=[".cap", ".caption", ".txt"], + value=".txt", + interactive=True, + allow_custom_value=True, + ) + + caption_separator = gr.Textbox( + label="Caption Separator", + value=config.get("wd14_caption.caption_separator", ", "), + interactive=True, + ) + + with gr.Row(): + + tag_replacement = gr.Textbox( + label="Tag replacement", + info="tag replacement in the format of `source1,target1;source2,target2; ...`. Escape `,` and `;` with `\`. e.g. `tag1,tag2;tag3,tag4`", + value=config.get("wd14_caption.tag_replacement", ""), + interactive=True, + ) + + character_tag_expand = gr.Checkbox( + label="Character tag expand", + info="expand tag tail parenthesis to another tag for character tags. `chara_name_(series)` becomes `chara_name, series`", + value=config.get("wd14_caption.character_tag_expand", False), + interactive=True, + ) + + undesired_tags = gr.Textbox( + label="Undesired tags", + placeholder="(Optional) Separate `undesired_tags` with comma `(,)` if you want to remove multiple tags, e.g. `1girl,solo,smile`.", + interactive=True, + value=config.get("wd14_caption.undesired_tags", ""), + ) + + with gr.Row(): + always_first_tags = gr.Textbox( + label="Prefix to add to WD14 caption", + info="comma-separated list of tags to always put at the beginning, e.g.: 1girl, 1boy, ", + placeholder="(Optional)", + interactive=True, + value=config.get("wd14_caption.always_first_tags", ""), + ) + + with gr.Row(): + onnx = gr.Checkbox( + label="Use onnx", + value=config.get("wd14_caption.onnx", True), + interactive=True, + info="https://github.com/onnx/onnx", + ) + append_tags = gr.Checkbox( + label="Append TAGs", + value=config.get("wd14_caption.append_tags", False), + interactive=True, + info="This option appends the tags to the existing tags, instead of replacing them.", + ) + + use_rating_tags = gr.Checkbox( + label="Use rating tags", + value=config.get("wd14_caption.use_rating_tags", False), + interactive=True, + info="Adds rating tags as the first tag", + ) + + use_rating_tags_as_last_tag = gr.Checkbox( + label="Use rating tags as last tag", + value=config.get("wd14_caption.use_rating_tags_as_last_tag", False), + interactive=True, + info="Adds rating tags as the last tag", + ) + + with gr.Row(): + recursive = gr.Checkbox( + label="Recursive", + value=config.get("wd14_caption.recursive", False), + info="Tag subfolders images as well", + ) + remove_underscore = gr.Checkbox( + label="Remove underscore", + value=config.get("wd14_caption.remove_underscore", True), + info="replace underscores with spaces in the output tags", + ) + + debug = gr.Checkbox( + label="Debug", + value=config.get("wd14_caption.debug", True), + info="Debug mode", + ) + frequency_tags = gr.Checkbox( + label="Show tags frequency", + value=config.get("wd14_caption.frequency_tags", True), + info="Show frequency of tags for images.", + ) + + with gr.Row(): + thresh = gr.Slider( + value=config.get("wd14_caption.thresh", 0.35), + label="Threshold", + info="threshold of confidence to add a tag", + minimum=0, + maximum=1, + step=0.05, + ) + + general_threshold = gr.Slider( + value=config.get("wd14_caption.general_threshold", 0.35), + label="General threshold", + info="Adjust `general_threshold` for pruning tags (less tags, less flexible)", + minimum=0, + maximum=1, + step=0.05, + ) + character_threshold = gr.Slider( + value=config.get("wd14_caption.character_threshold", 0.35), + label="Character threshold", + minimum=0, + maximum=1, + step=0.05, + ) + + # Advanced Settings + with gr.Row(): + batch_size = gr.Number( + value=config.get("wd14_caption.batch_size", 1), + label="Batch size", + interactive=True, + ) + + max_data_loader_n_workers = gr.Number( + value=config.get("wd14_caption.max_data_loader_n_workers", 2), + label="Max dataloader workers", + interactive=True, + ) + + def repo_id_changes(repo_id, onnx): + global old_onnx_value + + if "-v3" in repo_id: + old_onnx_value = onnx + return gr.Checkbox(value=True, interactive=False) + else: + return gr.Checkbox(value=old_onnx_value, interactive=True) + + repo_id.change(repo_id_changes, inputs=[repo_id, onnx], outputs=[onnx]) + + caption_button = gr.Button("Caption images") + + caption_button.click( + caption_images, + inputs=[ + train_data_dir, + caption_extension, + batch_size, + general_threshold, + character_threshold, + repo_id, + recursive, + max_data_loader_n_workers, + debug, + undesired_tags, + frequency_tags, + always_first_tags, + onnx, + append_tags, + force_download, + caption_separator, + tag_replacement, + character_tag_expand, + use_rating_tags, + use_rating_tags_as_last_tag, + remove_underscore, + thresh, + ], + show_progress=False, + ) + + train_data_dir.change( + fn=lambda path: gr.Dropdown(choices=[""] + list_train_dirs(path)), + inputs=train_data_dir, + outputs=train_data_dir, + show_progress=False, + ) diff --git a/localizations/Put localization files here.txt b/localizations/Put localization files here.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/localizations/chinese-sample.json b/localizations/chinese-sample.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3a5cdd32d639476c05b562f1c312dbbc195a86 --- /dev/null +++ b/localizations/chinese-sample.json @@ -0,0 +1,51 @@ +{ + "Loading...": "载入中...", + "Use via API": "通过API使用", + "Built with Gradio": "使用Gradio构建", + "Dreambooth":"梦想阁", + "Training": "训练", + "Train a custom model using kohya dreambooth python code…": "使用kohya dreamboot python代码 训练个性化模型", + "Configuration file": "配置文件", + "Open": "打开", + "Save": "保存", + "Load": "加载", + "Source model": "模型来源", + "Model Quick Pick": "快速选择模型", + "Save trained model as": "保存训练模型为", + "Folders": "文件夹", + "Start training": "开始训练", + "Stop training": "停止训练", + "Print training command": "打印训练命令", + "Start tensorboard": "开始 tensorboard", + "Stop tensorboard": "结束 tensorboard", + "Image folder": "图片文件夹", + "Regularisation folder": "正则化文件夹", + "Output folder": "输出文件夹", + "Logging folder": "日志文件夹", + "Model output name": "模型输出文件夹", + "Training comment": "训练注释", + "(Optional) Add training comment to be included in metadata": "(可选)增加训练注释到元数据", + "Parameters": "参数", + "Basic": "基础", + "Train batch size": "训练批次大小", + "Epoch": "数量增加", + "Max train epoch": "每批数量", + "(Optional) Enforce number of epoch": "(可选)强制每批数量", + "Advanced": "增强", + "Samples": "样例", + "Tools": "工具", + "This section provide Dreambooth tools to help setup your dataset…": "这些选择帮助设置自己的数据集", + "Dreambooth/LoRA Folder preparation": "Dreambooth/LoRA文件准备", + "This utility will create the necessary folder structure for the training images and optional regularization images needed for the kohys_ss Dreambooth/LoRA method to function correctly.": "为训练文件创建文件夹", + "Instance prompt": "实例提示", + "Class prompt": "类提示", + "Training images": "训练图片", + "Directory containing the training images": "直接包含训练图片", + "Repeats": "重复", + "Regularisation images": "正则化图像", + "Destination training directory": "训练结果目录", + "Directory where formatted training and regularisation folders will be placed": "训练和正则化文件会被取代", + "Prepare training data": "准备训练数据", + "Copy info to Folders Tab": "复制信息到文件夹", + "Train a custom model using kohya train network LoRA python code…": "使用kohya训练网络LoRA训练个性化模型" +} \ No newline at end of file diff --git a/localizations/en-GB.json b/localizations/en-GB.json new file mode 100644 index 0000000000000000000000000000000000000000..4431e24267b14cb72cd6854c64b2f7efcbf9eeb7 --- /dev/null +++ b/localizations/en-GB.json @@ -0,0 +1,24 @@ +{ + "analyze": "analyse", + "behavior": "behaviour", + "color": "colour", + "flavor": "flavour", + "honor": "honour", + "humor": "humour", + "localization": "localisation", + "localize": "localise", + "neighbor": "neighbour", + "offense": "offence", + "oriented": "orientated", + "practice": "practise", + "pretense": "pretence", + "program": "programme", + "recognize": "recognise", + "regularization": "regularisation", + "savior": "saviour", + "signaling": "signalling", + "specialization": "specialisation", + "stabilization": "stabilisation", + "standardization": "standardisation", + "utilize": "utilise" +} \ No newline at end of file diff --git a/localizations/zh-CN.json b/localizations/zh-CN.json new file mode 100644 index 0000000000000000000000000000000000000000..b04ded8276843a81a121383d44816af2f9b96379 --- /dev/null +++ b/localizations/zh-CN.json @@ -0,0 +1,361 @@ + + { + + "-Need to add resources here": "-需要在这里添加资源", + "(Experimental, Optional) Since the latent is close to a normal distribution, it may be a good idea to specify a value around 1/10 the noise offset.": "(实验性,可选)由于潜在变量接近正态分布,指定一个接近噪声偏移1/10的值可能是个好主意。", + "(Optional) Add training comment to be included in metadata": "(可选)增加训练注释到元数据", + "(Optional) Enforce number of epoch": "(可选)强制每批数量", + "(Optional) Save only the specified number of models (old models will be deleted)": "(可选)仅保存指定数量的模型(旧模型将被删除)", + "(Optional) Save only the specified number of states (old models will be deleted)": "(可选)仅保存指定数量的状态(旧模型将被删除)", + "(Optional) Stable Diffusion base model": "(可选)稳定扩散基础模型", + "(Optional) Stable Diffusion model": "(可选)稳定扩散模型", + "(Optional) The model is saved every specified steps": "(可选)模型每隔指定的步数保存一次", + "(Optional)": "(可选)", + "About SDXL training": "关于SDXL培训", + "Adaptive noise scale": "自适应噪声比例", + "Additional parameters": "额外参数", + "Advanced options": "高级选项", + "Advanced parameters": "高级参数", + "Advanced": "增强", + "ashleykleynhans runpod docker builds": "ashleykleynhans runpod docker构建", + "Automatically determine the dim(rank) from the weight file.": "从权重文件自动确定dim(排名)。", + "Autosave": "自动保存", + "Basic Captioning": "基本字幕", + "Basic": "基础", + "Batch size": "批量大小", + "BLIP Captioning": "BLIP字幕", + "Bucket resolution steps": "桶分辨率步骤", + "Built with Gradio": "使用Gradio构建", + "Cache latents to disk": "缓存潜变量到磁盘", + "Cache latents": "缓存潜变量", + "Caption file extension": "标题文件扩展名", + "Caption text": "标题文本", + "caption": "标题", + "Change History": "更改历史", + "Class prompt": "Class类提示", + "Color augmentation": "颜色增强", + "Configuration file": "配置文件", + "constant_with_warmup": "带预热的常数", + "constant": "常数", + "Conv Dimension (Rank)": "卷积维度(Rank)", + "Conv Dimension": "卷积维度", + "Convert model": "转换模型", + "Copy info to Folders Tab": "复制信息到文件夹", + "cosine_with_restarts": "带重启的余弦函数学习率的方法", + "cosine": "余弦函数", + "CrossAttention": "交叉注意力", + "DANGER!!! -- Insecure folder renaming -- DANGER!!!": "危险!!!-- 不安全的文件夹重命名 -- 危险!!!", + "Dataset folder": "数据集文件夹", + "Dataset preparation": "数据集准备", + "Dataset Preparation": "数据集准备", + "Dataset repeats": "数据集重复", + "Desired LoRA rank": "期望的LoRA秩", + "Destination training directory": "训练结果目录", + "Device": "设备", + "DIM from weights": "从权重获取DIM", + "Directory containing the images to caption": "包含要添加标题的图像的目录", + "Directory containing the training images": "直接包含训练图片", + "Directory where formatted training and regularisation folders will be placed": "训练和正则化文件会被取代", + "Disable CP decomposition": "禁用CP分解", + "Do not copy other files in the input folder to the output folder": "不要将输入文件夹中的其他文件复制到输出文件夹", + "Do not copy other files": "不复制其他文件", + "Don't upscale bucket resolution": "不要放大桶分辨率", + "Dreambooth/LoRA Dataset balancing": "Dreambooth/LoRA数据集平衡", + "Dreambooth/LoRA Folder preparation": "Dreambooth/LoRA文件准备", + "Dropout caption every n epochs": "每n个时代丢弃标题", + "DyLoRA model": "DyLoRA模型", + "Dynamic method": "动态方法", + "Dynamic parameter": "动态参数", + "e.g., \"by some artist\". Leave empty if you only want to add a prefix or postfix.": "例如,\"由某个艺术家创作\"。如果您只想添加前缀或后缀,请留空。", + "e.g., \"by some artist\". Leave empty if you want to replace with nothing.": "例如,\"由某个艺术家创作\"。如果您想用空白替换,请留空。", + "Enable buckets": "启用数据容器buckets", + "Enable for Hugging Face's stabilityai models": "启用Hugging Face的stabilityai模型", + "Enter one sample prompt per line to generate multiple samples per cycle. Optional specifiers include: --w (width), --h (height), --d (seed), --l (cfg scale), --s (sampler steps) and --n (negative prompt). To modify sample prompts during training, edit the prompt.txt file in the samples directory.": "每行输入一个样本提示以生成每个周期的多个样本。可选指定符包括:--w(宽度),--h(高度),--d(种子),--l(cfg比例),--s(采样器步骤)和--n(负提示)。要在训练期间修改样本提示,请编辑样本目录中的prompt.txt文件。", + "Epoch": "数量增加", + "Error": "错误", + "Example of the optimizer settings for Adafactor with the fixed learning rate:": "具有固定学习率的Adafactor优化器设置的示例:", + "Extract DyLoRA": "提取DyLoRA", + "Extract LoRA model": "提取LoRA模型", + "Extract LoRA": "提取LoRA", + "Extract LyCORIS LoCon": "提取LyCORIS LoCon", + "Extract LyCORIS LoCON": "提取LyCORIS LoCON", + "FileNotFoundError": "FileNotFoundError", + "Find text": "查找文本", + "Finetune": "微调", + "Finetuned model": "微调模型", + "Finetuning Resource Guide": "微调资源指南", + "fixed": "固定", + "Flip augmentation": "翻转增强", + "float16": "float16", + "Folders": "文件夹", + "Full bf16 training (experimental)": "完全bf16训练(实验性)", + "Full fp16 training (experimental)": "完全fp16训练(实验性)", + "Generate caption files for the grouped images based on their folder name": "根据其文件夹名称为分组图片生成标题文件", + "Generate caption metadata": "生成标题元数据", + "Generate Captions": "生成标题", + "Generate image buckets metadata": "生成图像存储桶元数据", + "GIT Captioning": "GIT字幕", + "Gradient accumulate steps": "渐变积累步骤", + "Gradient checkpointing": "渐变检查点", + "Group size": "Group大小", + "Guidelines for SDXL Finetuning": "SDXL微调指南", + "Guides": "指南", + "How to Create a LoRA Part 1: Dataset Preparation:": "如何创建LoRA第1部分:数据集准备:", + "If unchecked, tensorboard will be used as the default for logging.": "如果未选中,tensorboard将用作日志记录的默认选项。", + "If you have valuable resources to add, kindly create a PR on Github.": "如果您有有价值的资源要添加,请在Github上创建一个PR。", + "Ignore Imported Tags Above Word Count": "忽略高于字数计数的导入标签", + "Image folder to caption": "要添加标题的图像文件夹", + "Image folder": "图片文件夹", + "Include images in subfolders as well": "同时包括子文件夹中的图片", + "Include Subfolders": "包括子文件夹", + "Init word": "初始化词", + "Input folder": "输入文件夹", + "Install Location": "安装位置", + "Installation": "安装", + "Instance prompt": "实例提示", + "Keep n tokens": "保留n个令牌", + "Launching the GUI on Linux and macOS": "在Linux和macOS上启动GUI", + "Launching the GUI on Windows": "在Windows上启动GUI", + "Learning rate": "学习率", + "linear": "线性", + "Linux and macOS Upgrade": "Linux和macOS升级", + "Linux and macOS": "Linux和macOS", + "Linux Pre-requirements": "Linux预先要求", + "Load": "加载", + "Loading...": "载入中...", + "Local docker build": "本地Docker构建", + "Logging folder": "日志文件夹", + "LoRA model \"A\"": "LoRA模型“A”", + "LoRA model \"B\"": "LoRA模型“B”", + "LoRA model \"C\"": "LoRA模型“C”", + "LoRA model \"D\"": "LoRA模型“D”", + "LoRA model": "LoRA模型", + "LoRA network weights": "LoRA网络权重", + "LoRA": "LoRA", + "LR number of cycles": "学习率周期数", + "LR power": "学习率功率", + "LR scheduler extra arguments": "学习率调度器额外参数", + "LR Scheduler": "学习率调度器", + "LR warmup (% of steps)": "学习率预热(%的步数)", + "LyCORIS model": "LyCORIS模型", + "Macos is not great at the moment.": "目前MacOS的支持不是很好。", + "Manual Captioning": "手动字幕", + "Manual installation": "手动安装", + "Max bucket resolution": "最大存储桶分辨率", + "Max length": "最大长度", + "Max num workers for DataLoader": "DataLoader的最大工作人员数量", + "Max resolution": "最大分辨率", + "Max Timestep": "最大时间步", + "Max Token Length": "最大令牌长度", + "Max train epoch": "每批数量", + "Max train steps": "最大训练步数", + "Maximum bucket resolution": "最大数据容器存储桶分辨率", + "Maximum size in pixel a bucket can be (>= 64)": "可以达到的最大像素尺寸(>= 64)", + "Memory efficient attention": "内存高效注意力", + "Merge LoRA (SVD)": "合并LoRA(SVD)", + "Merge LoRA": "合并LoRA", + "Merge LyCORIS": "合并LyCORIS", + "Merge model": "合并模型", + "Merge precision": "合并精度", + "Merge ratio model A": "模型A合并比例", + "Merge ratio model B": "模型B合并比例", + "Merge ratio model C": "模型C合并比例", + "Merge ratio model D": "模型D合并比例", + "Min bucket resolution": "最小数据容器存储桶分辨率", + "Min length": "最小长度", + "Min SNR gamma": "最小SNR伽玛", + "Min Timestep": "最小时间步", + "Minimum bucket resolution": "最小数据容器存储桶分辨率", + "Minimum size in pixel a bucket can be": "数据容器存储桶的最小像素大小", + "Mixed precision": "混合精度", + "Mnimum difference": "最小差异", + "Mode": "模式", + "Model A merge ratio (eg: 0.5 mean 50%)": "模型A合并比率(例如:0.5意味着50%)", + "Model B merge ratio (eg: 0.5 mean 50%)": "模型B合并比率(例如:0.5意味着50%)", + "Model C merge ratio (eg: 0.5 mean 50%)": "模型C合并比率(例如:0.5意味着50%)", + "Model D merge ratio (eg: 0.5 mean 50%)": "模型D合并比率(例如:0.5意味着50%)", + "Model output folder": "模型输出文件夹", + "Model output name": "模型输出文件夹", + "Model Quick Pick": "快速选择模型", + "Module dropout": "模块丢失", + "Network Dimension (Rank)": "网络维度(秩)", + "Network Dimension": "网络维度", + "Network dropout": "网络丢失", + "No module called tkinter": "没有名为tkinter的模块", + "No token padding": "无令牌填充", + "Noise offset type": "噪声偏移类型", + "Noise offset": "噪声偏移", + "Number of beams": "beam的数量 - 由于同时考虑多个解决方案,beam搜索能够减少错误累积,从而提高最终解决方案的质量。", + "Number of CPU threads per core": "每个核心的CPU线程数", + "Number of images to group together": "要一起分组的图像数量", + "Number of updates steps to accumulate before performing a backward/update pass": "执行反向/更新传递之前需要积累的更新步骤数", + "object template": "对象模板", + "Only for SD v2 models. By scaling the loss according to the time step, the weights of global noise prediction and local noise prediction become the same, and the improvement of details may be expected.": "仅适用于SD v2模型。通过根据时间步长缩放损失,全局噪声预测和局部噪声预测的权重变得相同,可以期望细节的改进。", + "Open": "打开", + "Optimizer extra arguments": "优化器额外参数", + "Optimizer": "优化器", + "Optional: CUDNN 8.6": "可选:CUDNN 8.6", + "Original": "原始", + "Output folder": "输出文件夹", + "Output": "输出", + "Overwrite existing captions in folder": "覆盖文件夹中现有的标题", + "Page File Limit": "页面文件限制", + "PagedAdamW8bit": "分页AdamW8位", + "PagedLion8bit": "分页Lion8位", + "Parameters": "参数", + "path for the checkpoint file to save...": "保存检查点文件的路径...", + "path for the LoRA file to save...": "保存LoRA文件的路径...", + "path for the new LoRA file to save...": "保存新LoRA文件的路径...", + "path to \"last-state\" state folder to resume from": "从中恢复的“最后状态”状态文件夹的路径", + "Path to the DyLoRA model to extract from": "要从中提取的DyLoRA模型的路径", + "Path to the finetuned model to extract": "要提取的微调模型的路径", + "Path to the LoRA A model": "LoRA A模型的路径", + "Path to the LoRA B model": "LoRA B模型的路径", + "Path to the LoRA C model": "LoRA C模型的路径", + "Path to the LoRA D model": "LoRA D模型的路径", + "Path to the LoRA model to verify": "要验证的LoRA模型的路径", + "Path to the LoRA to resize": "要调整大小的LoRA的路径", + "Path to the LyCORIS model": "LyCORIS模型的路径", + "path where to save the extracted LoRA model...": "保存提取出的LoRA模型的路径...", + "Persistent data loader": "持久数据加载器", + "polynomial": "多项式", + "Postfix to add to BLIP caption": "添加到BLIP标题的后缀", + "Postfix to add to caption": "添加到标题的后缀", + "Pre-built Runpod template": "预构建的Runpod模板", + "Prefix to add to BLIP caption": "添加到BLIP标题的前缀", + "Prefix to add to caption": "添加到标题的前缀", + "Prepare training data": "准备训练数据", + "Print training command": "打印训练命令", + "Prior loss weight": "先验损失权重", + "Prodigy": "神童", + "Provide a SD file path IF you want to merge it with LoRA files": "如果您想将其与LoRA文件合并,请提供SD文件路径", + "Provide a SD file path that you want to merge with the LyCORIS file": "提供您想与LyCORIS文件合并的SD文件路径", + "PyTorch 2 seems to use slightly less GPU memory than PyTorch 1.": "PyTorch 2似乎使用的GPU内存比PyTorch 1略少。", + "Quick Tags": "快速标签", + "Random crop instead of center crop": "随机裁剪而非中心裁剪", + "Rank dropout": "排名丢失", + "Rate of caption dropout": "标题丢失率", + "Recommended value of 0.5 when used": "使用时推荐值为0.5", + "Recommended value of 5 when used": "使用时推荐值为5", + "recommended values are 0.05 - 0.15": "推荐值为0.05 - 0.15", + "Regularisation folder": "正则化文件夹", + "Regularisation images": "正则化图像", + "Repeats": "重复", + "Replacement text": "替换文本", + "Required bitsandbytes >= 0.36.0": "所需的bitsandbytes >= 0.36.0", + "Resize LoRA": "调整LoRA尺寸", + "Resize model": "调整模型大小", + "Resolution (width,height)": "分辨率(宽度,高度)", + "Resource Contributions": "资源贡献", + "Resume from saved training state": "从保存的训练状态恢复", + "Resume TI training": "恢复TI训练", + "Runpod": "Runpod", + "Sample every n epochs": "每n个时代采样一次", + "Sample every n steps": "每n步采样一次", + "Sample image generation during training": "培训期间的样本图像生成", + "Sample prompts": "样本提示", + "Sample sampler": "样本采样器", + "Samples": "样例", + "Save dtype": "保存数据类型", + "Save every N epochs": "每N个epochs保存", + "Save every N steps": "每N步保存一次", + "Save last N steps state": "保存最后N步状态", + "Save last N steps": "保存最后N步", + "Save precision": "保存精度", + "Save to": "保存到", + "Save trained model as": "保存训练模型为", + "Save training state": "保存训练状态", + "Save": "保存", + "Scale v prediction loss": "缩放v预测损失", + "Scale weight norms": "缩放权重规范", + "SD Model": "SD模型", + "SDXL model": "SDXL模型", + "Set the Max resolution to at least 1024x1024, as this is the standard resolution for SDXL. ": "将 最大分辨率 设置为至少 1024x1024,因为这是 SDXL 的标准分辨率。", + "Set the Max resolution to at least 1024x1024, as this is the standard resolution for SDXL.": "将最大分辨率设置为至少1024x1024,因为这是SDXL的标准分辨率。", + "Setup": "设置", + "SGDNesterov": "SGD Nesterov", + "SGDNesterov8bit": "SGD Nesterov 8位", + "Shuffle caption": "随机标题", + "Source LoRA": "源LoRA", + "Source model type": "源模型类型", + "Source model": "模型来源", + "Sparsity": "稀疏性", + "Stable Diffusion base model": "稳定扩散基础模型", + "Stable Diffusion original model: ckpt or safetensors file": "稳定扩散原始模型:ckpt或safetensors文件", + "Start tensorboard": "开始 tensorboard", + "Start training": "开始训练", + "Starting GUI Service": "启动GUI服务", + "Stop tensorboard": "结束 tensorboard", + "Stop text encoder training": "停止文本编码器训练", + "Stop training": "停止训练", + "style template": "样式模板", + "sv_fro": "sv_fro", + "Target model folder": "目标模型文件夹", + "Target model name": "目标模型名称", + "Target model precision": "目标模型精度", + "Target model type": "目标模型类型", + "Template": "模板", + "Text Encoder learning rate": "文本编码器学习率", + "The fine-tuning can be done with 24GB GPU memory with the batch size of 1.": "微调可以在具有1个批量大小的24GB GPU内存上完成。", + "The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model.": "该GUI允许您设置训练参数,并生成并运行训练模型所需的CLI命令。", + "This guide is a resource compilation to facilitate the development of robust LoRA models.": "该指南是一个资源汇编,以促进强大LoRA模型的开发。", + "This section provide Dreambooth tools to help setup your dataset…": "这些选择帮助设置自己的数据集", + "This section provide LoRA tools to help setup your dataset…": "本节提供LoRA工具以帮助您设置数据集...", + "This section provide Various Finetuning guides and information…": "本节提供各种微调指南和信息", + "This utility allows quick captioning and tagging of images.": "此工具允许快速地为图像添加标题和标签。", + "This utility allows you to create simple caption files for each image in a folder.": "此工具允许您为文件夹中的每个图像创建简单的标题文件。", + "This utility can be used to convert from one stable diffusion model format to another.": "该工具可用于将一个稳定扩散模型格式转换为另一种格式", + "This utility can extract a DyLoRA network from a finetuned model.": "该工具可以从微调模型中提取DyLoRA网络。", + "This utility can extract a LoRA network from a finetuned model.": "该工具可以从微调模型中提取LoRA网络。", + "This utility can extract a LyCORIS LoCon network from a finetuned model.": "该工具可以从微调模型中提取LyCORIS LoCon网络。", + "This utility can merge a LyCORIS model into a SD checkpoint.": "该工具可以将LyCORIS模型合并到SD检查点中。", + "This utility can merge two LoRA networks together into a new LoRA.": "该工具可以将两个LoRA网络合并为一个新的LoRA。", + "This utility can merge up to 4 LoRA together or alternatively merge up to 4 LoRA into a SD checkpoint.": "该工具可以合并多达4个LoRA,或者选择性地将多达4个LoRA合并到SD检查点中。", + "This utility can resize a LoRA.": "该工具可以调整LoRA的大小。", + "This utility can verify a LoRA network to make sure it is properly trained.": "该工具可以验证LoRA网络以确保其得到适当的训练。", + "This utility uses BLIP to caption files for each image in a folder.": "此工具使用BLIP为文件夹中的每张图像添加标题。", + "This utility will create the necessary folder structure for the training images and optional regularization images needed for the kohys_ss Dreambooth/LoRA method to function correctly.": "为训练文件创建文件夹", + "This utility will ensure that each concept folder in the dataset folder is used equally during the training process of the dreambooth machine learning model, regardless of the number of images in each folder. It will do this by renaming the concept folders to indicate the number of times they should be repeated during training.": "此工具将确保在训练dreambooth机器学习模型的过程中,数据集文件夹中的每个概念文件夹都将被平等地使用,无论每个文件夹中有多少图像。它将通过重命名概念文件夹来指示在训练期间应重复使用它们的次数。", + "This utility will group images in a folder based on their aspect ratio.": "此工具将根据它们的纵横比将文件夹中的图像分组。", + "This utility will use GIT to caption files for each images in a folder.": "此工具将使用GIT为文件夹中的每张图像添加标题。", + "This utility will use WD14 to caption files for each images in a folder.": "此工具将使用WD14为文件夹中的每张图像添加标题。", + "Tips for SDXL training": "SDXL培训提示", + "Token string": "令牌字符串", + "Train a custom model using kohya finetune python code": "使用kohya微调Python代码训练个性化模型", + "Train a custom model using kohya train network LoRA python code…": "使用kohya训练网络LoRA Python代码训练自定义模型", + "Train batch size": "训练批次大小", + "Train Network": "训练网络", + "Train text encoder": "训练文本编码器", + "Train U-Net only.": "仅训练 U-Net", + "Training config folder": "训练配置文件夹", + "Training Image folder": "训练图像文件夹", + "Training images": "训练图像", + "Training steps per concept per epoch": "每个周期每个概念的训练步骤", + "Training": "训练", + "Troubleshooting": "故障排除", + "Tutorials": "教程", + "Unet learning rate": "Unet学习率", + "UNet linear projection": "UNet 线性投影", + "Upgrading": "升级", + "Use --cache_text_encoder_outputs option and caching latents.": "使用 --cache_text_encoder_outputs 选项和缓存潜在变量。", + "Use Adafactor optimizer. RMSprop 8bit or Adagrad 8bit may work. AdamW 8bit doesn’t seem to work.": "使用 Adafactor 优化器。 RMSprop 8bit 或 Adagrad 8bit 可能有效。 AdamW 8bit 好像不行。", + "Use beam search": "使用beam搜索-启发式图搜索算法,beam搜索可以用来生成更准确和自然的文本。", + "Use gradient checkpointing.": "使用梯度检查点。", + "Use latent files": "使用潜在文件", + "Use sparse biais": "使用稀疏偏见", + "Users can obtain and/or generate an api key in the their user settings on the website: https://wandb.ai/login": "用户可以在以下网站的用户设置中获取和/或生成API密钥:https://wandb.ai/login", + "V Pred like loss": "v预测损失", + "Values greater than 0 will make the model more img2img focussed. 0 = image only": "大于0的值会使模型更加聚焦在 img2img 上。0 = 仅图像。这应该表示时间步参数,大于0会使模型更加侧重 img2img 生成,0则仅关注图像生成。", + "Values lower than 1000 will make the model more img2img focussed. 1000 = noise only": "小于1000的值会使模型更加聚焦在 img2img 上。1000 = 仅噪声。这也应该表示时间步参数,小于1000会使模型更加侧重 img2img 生成,1000则仅从噪声生成图像。", + "Vectors": "向量", + "Verbose": "详细输出", + "WANDB API Key": "WANDB API 密钥。", + "WARNING! The use of this utility on the wrong folder can lead to unexpected folder renaming!!!": "警告!在错误的文件夹上使用此工具可能导致意外的文件夹重命名!", + "WD14 Captioning": "WD14字幕", + "Windows Upgrade": "Windows升级", + "Train a custom model using kohya dreambooth python code…": "使用kohya的dreambooth Python代码训练个性化模型", + "Training comment": "训练注释", + "Train a TI using kohya textual inversion python code…": "使用kohya的文本反转Python代码训练TI模型", + "Train a custom model using kohya finetune python code…": "使用kohya的微调Python代码训练个性化模型" + +} \ No newline at end of file diff --git a/localizations/zh-TW.json b/localizations/zh-TW.json new file mode 100644 index 0000000000000000000000000000000000000000..d2da7fcda81d3f67fb7d22e0ae95ead235fcbd0f --- /dev/null +++ b/localizations/zh-TW.json @@ -0,0 +1,496 @@ +{ + "WARNING! The use of this utility on the wrong folder can lead to unexpected folder renaming!!!": "警告!在錯誤的資料夾上使用此工具可能導致意外的資料夾重新命名!!!", + "(Experimental, Optional) Since the latent is close to a normal distribution, it may be a good idea to specify a value around 1/10 the noise offset.": " (選填,實驗性功能) 由於潛空間接近常態分布,或許指定一個噪聲偏移約 1/10 的數值是個不錯的作法。", + "(Name of the model to output)": "(要輸出的模型名稱)", + "(Optional) Add training comment to be included in metadata": "(選填) 在訓練的後設資料 (metadata) 加入註解。", + "(Optional) Enforce # epochs": " (選填) 強制指定週期數 (Epochs) ", + "(Optional) Enforce # steps": " (選填) 強制指定總步數", + "(Optional) default: .caption": " (選填) 預設:.caption", + "(Optional) For Cosine with restart and polynomial only": " (選填) 只適用於餘弦函數並使用重啟 (cosine_with_restart) 和多項式 (polynomial)", + "(Optional) Override number of epoch. Default: 8": " (選填) 覆蓋週期 (Epoch) 數量。預設:8", + "(Optional) Save only the specified number of models (old models will be deleted)": " (選填) 僅儲存指定數量的模型 (舊有模型將被刪除) ", + "(Optional) Save only the specified number of states (old models will be deleted)": " (選填) 僅儲存指定數量的訓練資料 (舊有訓練資料將被刪除) ", + "(Optional) Separate `undesired_tags` with comma `(,)` if you want to remove multiple tags, e.g. `1girl,solo,smile`.": " (選填) 如果要移除多個標籤,請使用逗號 `(,)` 分隔不需要的標籤,例如:`1girl,solo,smile`。", + "(Optional) The model is saved every specified steps": " (選填) 模型會在指定的間隔步數後儲存", + "(Optional) Use to provide additional parameters not handled by the GUI. Eg: --some_parameters \"value\"": " (選填) 用於提供 GUI 未提供的額外參數。例如:--some_parameters \"value\"", + "(Optional) eg: \"milestones=[1,10,30,50]\" \"gamma=0.1\"": " (選填) 例如: \"milestones=[1,10,30,50]\" \"gamma=0.1\"", + "(Optional) eg: 0,0,0,0,0,0,1,1,1,1,1,1": " (選填) 例如:0,0,0,0,0,0,1,1,1,1,1,1", + "(Optional) eg: 0.1": " (選填) 例如:0.1", + "(Optional) eg: 0.5": " (選填) 例如:0.5", + "(Optional) eg: 2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2": " (選填) 例如:2,2,2,2,4,4,4,4,6,6,6,6,8,6,6,6,6,4,4,4,4,2,2,2,2", + "(Optional) eg: relative_step=True scale_parameter=True warmup_init=True": " (選填) 例如:relative_step=True scale_parameter=True warmup_init=True", + "(Optional) eg:1234": " (選填) 例如:1234", + "(Optional) model id for GIT in Hugging Face": " (選填) Hugging Face 中 GIT 的模型 ID", + "(Optional)": "(選填)", + "< Prev": "< 上一個", + "A two-step approach utilizing tensor decomposition and fine-tuning to accelerate convolution layers in large neural networks, resulting in significant CPU speedups with minor accuracy drops.": "一種利用張量分解和微調的兩步方法,以加速大型神經網路中的卷積層,從而實現顯著的 CPU 加速和輕微的精度下降。", + "Adaptive noise scale": "自適應噪聲比例", + "Additional parameters": "額外參數", + "Adjust `general_threshold` for pruning tags (less tags, less flexible)": "調整一般閾值以修剪標籤 (標籤越少,越不靈活)", + "Adjusts the scale of the rank dropout to maintain the average dropout rate, ensuring more consistent regularization across different layers.": "調整維度 (Rank) 丟棄比例的比例,以保持平均丟棄率,確保在不同層之間更一致的正規化。", + "Advanced Configuration": "進階設定", + "Advanced options": "進階選項", + "Advanced parameters": "進階參數", + "Advanced": "進階", + "Appebnd TAGs": "附加標籤", + "Autosave": "自動儲存", + "Automates the processing of noise, allowing for faster model fitting, as well as balancing out color issues": "自動處理噪聲,可以更快地擬合模型,同時平衡顏色問題", + "Automatically determine the dim(rank) from the weight file.": "從權重檔案自動取用維度 DIM(Rank)。", + "BLIP Captioning": "BLIP 標記", + "Balance dataset": "平衡資料集", + "Basic Captioning": "基本標記", + "Batch size": "批次大小", + "Block LR (SDXL)": "區塊學習率", + "Block alphas": "區塊 Alphas", + "Block dims": "區塊維度", + "Blocks LR zero threshold": "區塊 LR 零閾值", + "Blocks": "區塊", + "Bucket resolution steps need to be greater than 0": "資料儲存桶解析度步數需要大於 0", + "Bucket resolution steps": "分桶解析度間隔", + "Bypass mode": "旁路模式 (Bypass mode)", + "Cache latents to disk": "暫存潛空間資料到硬碟", + "Cache latents": "暫存潛空間資料", + "Cache text encoder outputs": "暫存文本編碼器輸出", + "Cache the outputs of the text encoders. This option is useful to reduce the GPU memory usage. This option cannot be used with options for shuffling or dropping the captions.": "暫存文本編碼器的輸出。此選項有助於減少 GPU 記憶體的使用。此選項不能與打亂或丟棄提示詞 (Shuffle/Dropout caption) 的選項一起使用。", + "Could not modify caption files with requested change because the \"Overwrite existing captions in folder\" option is not selected.": "無法修改標記文字檔案以進行所需的更改,因為未選擇「覆蓋資料夾中現有的提示詞」選項。", + "Caption Extension": "標記檔案副檔名", + "Caption Separator": "標記文字分隔符號", + "Caption file extension (e.g., .txt)": "標記文字檔案副檔名 (例如:.txt)", + "Caption file extension": "標記檔案副檔名", + "Caption images": "標記圖片", + "Caption metadata filename": "標記文字後設資料檔案名稱", + "Caption text": "標記文字", + "Captioning": "標記文字", + "Captions": "標記文字", + "Character threshold": "角色閾值", + "Clamp Quantile": "夾取分位數 (Clamp Quantile)", + "Class prompt missing...": "缺少類別提示詞...", + "Class prompt": "類別提示詞", + "Clip skip": "Clip skip", + "Color augmentation": "色彩增強 (Color augmentation)", + "Configuration": "設定", + "Comma separated list of tags": "逗號分隔的標籤列表", + "Constrain OFT": "限制 OFT", + "Controls whether both input and output dimensions of the layer's weights are decomposed into smaller matrices for reparameterization.": "控制層權重的輸入和輸出維度是否被分解為更小的矩陣以進行重新參數化。", + "Conv Dimension (Rank)": "卷積維度 (Rank)", + "Conv Dimension": "卷積維度", + "Conv alphas": "卷積 Alphas", + "Conv dims": "卷積維度 (dims)", + "Conv quantile": "卷積分位數 (Conv quantile)", + "Conv ratio": "卷積比率 (Conv ratio)", + "Conv threshold": "卷積閾值 (Conv threshold)", + "Conv": "卷積", + "Convert to LCM": "轉換為 LCM", + "Convert model": "轉換模型", + "Convolution Alpha": "卷積 Alpha", + "Convolution Rank (Dimension)": "卷積維度 (Rank)", + "Copy info to Folders Tab": "複製資訊到資料夾區塊", + "CrossAttention": "交叉注意力", + "DANGER!!! -- Insecure folder renaming -- DANGER!!!": "危險!!! -- 不安全的資料夾重新命名 -- 危險!!!", + "DIM from weights": "從權重讀取 DIM", + "Dataset Preparation": "資料集準備", + "Dataset folder (folder containing the concepts folders to balance...)": "資料集資料夾 (含有要平衡的概念資料夾的資料夾路徑...)", + "Dataset repeats": "資料集重複數", + "Dataset config toml file used, skipping total_steps, train_batch_size, gradient_accumulation_steps, epoch, reg_factor, max_train_steps calculations...": "使用資料集設定 toml 檔案,跳過總步數、訓練批次大小、梯度累加步數、週期、正規化因子、最大訓練步數的計算...", + "Dataset config file (Optional. Select the toml configuration file to use for the dataset)": "資料集設定檔案 (選填,選擇要用於資料集的 toml 設定檔案)", + "Debiased Estimation loss": "偏差估算損失 (Debiased Estimation loss)", + "Debug while tagging, it will print your image file with general tags and character tags.": "標記時進行調試,它將打印您的圖片檔案與一般標籤和角色標籤。", + "Designed for bnb 8bit/4bit linear layer. (QLyCORIS)": "設計用於 bnb 8bit/4bit 線性層。 (QLyCORIS)", + "Desired LoRA rank": "所需的 LoRA 維度 (Rank)", + "Destination training directory (where formatted training and regularisation folders will be placed)": "訓練的目標資料夾 (格式化的訓練和正規化資料夾將被放置的資料夾)", + "Device": "裝置", + "Disable CP decomposition": "禁用 CP 分解 (CP decomposition)", + "Disable the half-precision (mixed-precision) VAE. VAE for SDXL seems to produce NaNs in some cases. This option is useful to avoid the NaNs.": "禁用半精度 (混合精度) VAE。對於 SDXL,VAE 在某些情況下似乎會產生 NaN。此選項有助於避免 NaN。", + "Do not copy other files in the input folder to the output folder": "不複製輸入資料夾中的其他檔案到輸出資料夾", + "Do not copy other files": "不複製其他檔案", + "Don't upscale bucket resolution": "不要放大分桶解析度", + "DoRA Weight Decompose": "DoRA 權重分解", + "Down LR weights": "Down LR 權重", + "Dreambooth/LoRA Dataset balancing": "Dreambooth/LoRA 資料集平衡", + "Dreambooth/LoRA Folder preparation": "Dreambooth/LoRA 資料夾準備", + "Dropout caption every n epochs": "在每 N 個週期 (Epoch) 丟棄標記", + "DyLoRA Unit / Block size": "DyLoRA 單元 / 區塊大小", + "DyLoRA model (path to the DyLoRA model to extract from)": "DyLoRA 模型 (要從中提取的 DyLoRA 模型的檔案路徑)", + "Dynamic method": "動態方法", + "Dynamic parameters": "動態參數", + "Efficiently decompose tensor shapes, resulting in a sequence of convolution layers with varying dimensions and Hadamard product implementation through multiplication of two distinct tensors.": "高效地分解張量形狀,從而產生一系列具有不同維度的卷積層,並通過兩個不同張量的乘法實現哈達瑪乘積。", + "Eg: asd": "例如:asd", + "Eg: person": "例如:person", + "Enable buckets": "啟用資料儲存桶", + "Enable multires noise (recommended values are 6-10)": "啟用多解析度噪聲 (建議使用 6-10)", + "Enable the DoRA method for these algorithms": "為這些演算法啟用 DoRA 方法", + "Enter one sample prompt per line to generate multiple samples per cycle. Optional specifiers include: --w (width), --h (height), --d (seed), --l (cfg scale), --s (sampler steps) and --n (negative prompt). To modify sample prompts during training, edit the prompt.txt file in the samples directory.": "每行輸入一個提示詞來生成每個訓練週期的輸出範本。可以選擇指定的參數,包括:--w (寬度) ,--h (高度) ,--d (種子) ,--l (CFG 比例) ,--s (採樣器步驟) 和 --n (負面提示詞) 。如果要在訓練週期中修改提示詞,請修改範本目錄中的 prompt.txt 檔案。", + "Epoch": "週期 (Epoch)", + "Error": "錯誤", + "Extend LoRA to Conv2d 3x3 and specify the dim (rank) of each block. Specify 25 numbers.": "將 LoRA 擴展到 Conv2d 3x3,並指定每個區塊的維度 (Rank)。指定 25 個數字。", + "Extension for caption file (e.g., .caption, .txt)": "標記檔案的副檔名(例如: .caption, .txt)", + "Extract DyLoRA": "提取 DyLoRA", + "Extract LCM": "提取 LCM", + "Extract LoRA model": "提取 LoRA 模型", + "Extract LoRA": "提取 LoRA", + "Extract LyCORIS LoCon": "提取 LyCORIS LoCon", + "Find text": "尋找文字", + "Finetuned model (path to the finetuned model to extract)": "微調模型 (Finetuned model)", + "Flip augmentation": "翻轉增強 (Flip augmentation)", + "Folders": "資料夾", + "Force model re-download": "強制重新下載模型", + "Full bf16 training (experimental)": "完整使用 bf16 訓練 (實驗性功能)", + "Full bf16": "完整使用 bf16", + "Full fp16 training (experimental)": "完整使用 fp16 訓練 (實驗性功能)", + "GIT Captioning": "GIT 標記文字", + "GPU IDs": "GPU ID", + "General threshold": "一般閾值", + "Generate Captions": "生成標記文字", + "Generate caption files for the grouped images based on their folder name": "根據資料夾名稱為分組的圖片生成標記文字檔案", + "Generate caption metadata": "生成標記文字後設資料", + "Generate image buckets metadata": "生成圖像分桶後設資料", + "Go >": "前往 >", + "Goto page": "前往頁面", + "Gradient accumulate steps": "梯度累加步數 (Gradient accumulate steps)", + "Gradient checkpointing": "梯度檢查點 (Gradient checkpointing)", + "Group Images": "分組圖片", + "Group images": "分組圖片", + "Group size": "分組大小", + "Guides": "指南", + "If the weight is not more than this value, the LoRA module is not created. The default is 0.": "如果權重不超過此值,則不會創建 LoRA 模組。預設為 0。", + "If unchecked, tensorboard will be used as the default for logging.": "如果不勾選,Tensorboard 將會使用預設的紀錄方式。", + "Ignore Imported Tags Above Word Count": "忽略上面單詞計數的匯入標籤", + "Image folder (containing training images subfolders)": "圖片資料夾 (包含訓練圖片與子資料夾)", + "Image folder (containing training images)": "圖片資料夾 (含有訓練圖片)", + "Image folder is missing...": "圖片資料夾遺失...", + "Image folder to caption (containing the images to caption)": "要加入標記的圖片資料夾", + "Import": "匯入", + "Include Subfolders": "包含子資料夾", + "Include images in subfolders as well": "也包含子資料夾中的圖片", + "Input captions": "輸入標記文字", + "Input folder (containing the images to group)": "輸入資料夾 (含有要分組的圖片的資料夾路徑)", + "Input folder is missing...": "輸入資料夾遺失...", + "Instance prompt": "實例提示詞", + "Invalid base model file": "無效的基礎模型檔案", + "Invalid model A file": "無效的模型 A 檔案", + "Invalid model file": "無效的模型檔案", + "Is a normal probability dropout at the neuron level. In the case of LoRA, it is applied to the output of down. Recommended range 0.1 to 0.5": "是神經元級的正常概率捨棄。在 LoRA 的情況下,它被應用於 Down Sampler 的輸出。建議範圍 0.1 到 0.5", + "Keep n tokens": "保留 N 個提示詞", + "LR Scheduler": "學習率調度器 (LR Scheduler)", + "LR # cycles": "學習率重啟週期數 (LR number of cycles)", + "LR power": "學習率乘冪 (LR power)", + "LR scheduler extra arguments": "學習率調度器額外參數", + "LR warmup (% of total steps)": "學習率預熱 (LR warmup, 總步數的 %)", + "Latent metadata filename": "潛空間後設資料檔案名稱", + "Learning rate TE": "文本編碼器學習率", + "Learning rate TE1": "文本編碼器 1 學習率", + "Learning rate TE2": "文本編碼器 2 學習率", + "Learning rate Unet": "U-Net 學習率", + "Learning rate": "學習率", + "Limits the norm of the oft_blocks, ensuring that their magnitude does not exceed a specified threshold, thus controlling the extent of the transformation applied.": "限制 oft_blocks 的規範,確保它們的大小不超過指定的閾值,從而控制應用的轉換程度。", + "Linear quantile": "線性分位數 (Linear quantile)", + "Linear ratio": "線性比率 (Linear ratio)", + "Linear threshold": "線性閾值 (Linear threshold)", + "LoKr decompose both": "LoKr 同時分解", + "LoKr factor": "LoKr 因子", + "LoRA model \"A\" (path to the LoRA A model)": "LoRA 模型 \"A\" (LoRA A 模型的檔案路徑)", + "LoRA model \"B\" (path to the LoRA B model)": "LoRA 模型 \"B\" (LoRA B 模型的檔案路徑)", + "LoRA model \"C\" (path to the LoRA C model)": "LoRA 模型 \"C\" (LoRA C 模型的檔案路徑)", + "LoRA model \"D\" (path to the LoRA D model)": "LoRA 模型 \"D\" (LoRA D 模型的檔案路徑)", + "LoRA model (path to the LoRA model to verify)": "LoRA 模型 (要驗證的 LoRA 模型的檔案路徑)", + "LoRA model types": "LoRA 模型類型", + "LoRA network weights": "LoRA 網路權重", + "LoRA type changed...": "LoRA 類型已更改...", + "Load": "載入", + "Load Stable Diffusion base model to": "載入穩定擴散基礎模型到", + "Load finetuned model to": "載入微調模型到", + "Load precision": "讀取精度", + "Load/Save Config file": "讀取/儲存設定檔案", + "Logging directory (Optional. to enable logging and output Tensorboard log)": "紀錄資料夾(選填,啟用紀錄和輸出 Tensorboard 紀錄)", + "Log tracker name": "紀錄追蹤器名稱", + "Log tracker config": "紀錄追蹤器設定", + "LyCORIS model (path to the LyCORIS model)": "LyCORIS 模型 (LyCORIS 模型的檔案路徑)", + "Manual Captioning": "手動標記文字", + "Max Norm Regularization is a technique to stabilize network training by limiting the norm of network weights. It may be effective in suppressing overfitting of LoRA and improving stability when used with other LoRAs. See PR #545 on kohya_ss/sd_scripts repo for details. Recommended setting: 1. Higher is weaker, lower is stronger.": "最大規範正規化是一種穩定網路訓練的技術,通過限制網路權重的規範來實現。當與其他 LoRA 一起使用時,它可能會有效地抑制 LoRA 的過度擬合並提高穩定性。詳細資料請見 kohya_ss/sd_scripts Github 上的 PR#545。建議設置:1.0 越高越弱,越低越強。", + "Max Timestep": "最大時序步數", + "Max Token Length": "最大標記數量", + "Max bucket resolution": "最大資料儲存桶解析度", + "Max dataloader workers": "最大資料加載器工作數", + "Max grad norm": "最大梯度規範 (Max grad norm)", + "Max length": "最大長度", + "Max num workers for DataLoader": "資料工作載入的最大工作數量", + "Max resolution": "最大解析度", + "Max train epoch": "最大訓練週期 (Epoch) 數", + "Max train steps": "最大訓練總步數", + "Maximum bucket resolution": "最大資料儲存桶解析度", + "Maximum size in pixel a bucket can be (>= 64)": "最大資料儲存桶解析度可達 (>= 64) ", + "Memory efficient attention": "高效記憶體注意力區塊處理 (Memory efficient attention)", + "Merge LoRA (SVD)": "合併 LoRA (SVD)", + "Merge LoRA": "合併 LoRA", + "Merge LyCORIS": "合併 LyCORIS", + "Merge model": "合併模型", + "Merge precision": "合併精度", + "Merge ratio model A": "合併比例模型 A", + "Merge ratio model B": "合併比例模型 B", + "Merge ratio model C": "合併比例模型 C", + "Merge ratio model D": "合併比例模型 D", + "Mid LR weights": "Mid LR 權重", + "Min SNR gamma": "Min SNR gamma", + "Min Timestep": "最小時序步數", + "Min bucket resolution": "最小資料儲存桶解析度", + "Min length": "最小長度", + "Minimum bucket resolution": "最小資料儲存桶解析度", + "Minimum difference": "最小化差異 (Minimum difference)", + "Minimum size in pixel a bucket can be (>= 64)": "最小資料儲存桶解析度可達 (>= 64) ", + "Mixed precision": "混合精度", + "Mode": "模式", + "Model A merge ratio (eg: 0.5 mean 50%)": "模型 A 合併比例 (例如:0.5 表示 50%)", + "Model B merge ratio (eg: 0.5 mean 50%)": "模型 B 合併比例 (例如:0.5 表示 50%)", + "Model C merge ratio (eg: 0.5 mean 50%)": "模型 C 合併比例 (例如:0.5 表示 50%)", + "Model D merge ratio (eg: 0.5 mean 50%)": "模型 D 合併比例 (例如:0.5 表示 50%)", + "Model type": "模型類型", + "Model": "模型", + "Module dropout": "模型捨棄", + "Multi GPU": "多個 GPU", + "Multires noise iterations": "多解析度噪聲迭代", + "Name of the new LCM model": "新 LCM 模型的名稱", + "Name of tracker to use for logging, default is script-specific default name": "用於記錄的追蹤器名稱,預設為特定於腳本的預設名稱", + "Network Alpha": "網路 Alpha", + "Network Dimension (Rank)": "網路維度 (Rank)", + "Network Rank (Dimension)": "網路維度 (Rank)", + "Network Dimension": "網路維度", + "Network dropout": "網路捨棄", + "New Conv Rank": "新卷積維度 (Conv Rank)", + "New Rank": "新維度 (Network Rank)", + "Next >": "下一個 >", + "No half VAE": "不使用半精度 VAE", + "No token padding": "不做提示詞填充 (No token padding)", + "No, get me out of here": "不,讓我離開這裡", + "Noise offset need to be a value between 0 and 1": "噪聲偏移需要是 0 到 1 之間的數值", + "Noise offset type": "噪聲偏移類型", + "Noise offset": "噪聲偏移", + "Number of CPU threads per core": "每個 CPU 核心的線程數", + "Number of beams": "beam 的數量", + "Number of images to group together": "要一起分組的圖片數量", + "Number of machines": "機器數量", + "Number of processes": "進程數量", + "Number of updates steps to accumulate before performing a backward/update pass": "執行反向/更新傳遞之前,需要累積的更新步驟數", + "Number of workers": "Worker 數量", + "Only for SD v2 models. By scaling the loss according to the time step, the weights of global noise prediction and local noise prediction become the same, and the improvement of details may be expected.": "僅適用於 SD v2 模型。通過根據時序步數的縮放損失,整體的噪聲預測與局部的噪聲預測的權重會變得相同,以此希望能改善細節。", + "Options": "選項", + "Optimizer extra arguments": "優化器額外參數", + "Optimizer": "優化器 (Optimizer)", + "Optional": "選填", + "Output \"stop text encoder training\" is not yet supported. Ignoring": "輸出「停止文本編碼器訓練」尚未支援。忽略", + "Output": "輸出", + "Output folder (where the grouped images will be stored)": "輸出資料夾 (存放分組的圖片)", + "Output directory for trained model": "輸出資料夾以輸出訓練模型", + "Overwrite existing captions in folder": "覆蓋資料夾中現有的提示詞", + "Page Number": "頁碼", + "Parameters": "參數", + "Path to an existing LoRA network weights to resume training from": "現有 LoRA 檔案路徑,從現有 LoRA 中繼續訓練", + "Path to tracker config file to use for logging": "用於記錄的追蹤器設定檔案的路徑", + "Persistent data loader": "持續資料載入器 (Persistent data loader)", + "Please input learning rate values.": "請輸入學習率數值。", + "Please input valid Text Encoder learning rate (between 0 and 1)": "請輸入有效的文本編碼器學習率 (在 0 到 1 之間)", + "Please input valid Unet learning rate (between 0 and 1)": "請輸入有效的 U-Net 學習率 (在 0 到 1 之間)", + "Please provide an extension for the caption files.": "請為標記文字檔案提供一個副檔名。", + "Please provide an extension for the caption files...": "請為標記文字檔案提供一個副檔名...", + "Please provide an output folder...": "請提供一個輸出資料夾...", + "Postfix to add to BLIP caption": "要添加到 BLIP 標記文字的後綴", + "Postfix to add to GIT caption": "要加入到 GIT 標記文字的後綴", + "Postfix to add to WD14 caption": "要加入到 WD14 標記文字的後綴", + "Postfix to add to caption": "添加到提示詞的後綴", + "Prefix to add to BLIP caption": "要添加到 BLIP 標記文字的前綴", + "Prefix to add to GIT caption": "要加入到 GIT 標記文字的前綴", + "Prefix to add to WD14 caption": "要加入到 WD14 標記文字的前綴", + "Prefix to add to caption": "添加到提示詞的前綴", + "Prepare training data": "準備訓練資料", + "Presets": "預設範本", + "Print training command": "印出訓練命令", + "Prior loss weight": "正規化驗證損失權重 (Prior loss weight)", + "Provide a SD file path that you want to merge with the LyCORIS file": "提供您想要與 LyCORIS 檔案合併的 SD 檔案路徑", + "Pretrained model name or path": "預訓練模型名稱或路徑", + "Quick Tags": "快速標記", + "Random crop instead of center crop": "使用隨機裁切 (而非中心裁切)", + "Rank Dropout Scale": "維度 (Rank) 丟棄比例", + "Rank dropout": "維度捨棄", + "Rate of caption dropout": "提示詞捨棄比例", + "Recommended value of 0.5 when used": "若使用時,建議使用 0.5", + "Recommended value of 5 when used": "若使用時,建議使用 5", + "Recommended values are 0.05 - 0.15": "若使用時,建議使用 0.05 - 0.15", + "Recommended values are 0.8. For LoRAs with small datasets, 0.1-0.3": "建議使用 0.8。對於小數據集的 LoRA,建議使用 0.1-0.3", + "Recursive": "遞迴", + "Regularisation directory (Optional. containing regularisation images)": "正規化資料夾(選填,包含正規化圖片)", + "Regularisation images (Optional. directory containing the regularisation images)": "正規化圖片 (選填,含有正規化圖片的資料夾)", + "Regularisation images are used... Will double the number of steps required...": "使用了正規化圖片... 將使所需的步數加倍...", + "Repeats": "重複次數", + "Replace underscores in filenames with spaces": "將檔案名稱中的底線替換為空格", + "Replacement text": "取代文字", + "Required bitsandbytes >= 0.36.0": "需要 bitsandbytes >= 0.36.0", + "Rescaled OFT": "重新調整 OFT", + "Resize LoRA": "調整 LoRA 大小", + "Resize model": "調整模型", + "Resolution (width,height)": "解析度 (寬度, 高度) ", + "Resume from saved training state (path to \"last-state\" state folder)": "從儲存的狀態繼續訓練(最後一個儲存的狀態的資料夾路徑)", + "Resume TI training (Optional. Path to existing TI embedding file to keep training)": "繼續 TI 訓練(選填,現有 TI 嵌入檔案的路徑以繼續訓練)", + "Token string": "提示詞字串", + "Init word": "初始化單詞", + "Vectors": "向量", + "Template": "範本", + "SD Model (Optional Stable Diffusion base model)": "SD 模型 (選填,穩定擴散基礎模型)", + "SD Model (Optional. Stable Diffusion model path, if you want to merge it with LoRA files)": "SD 模型 (選填,穩定擴散模型路徑,如果您想將其與 LoRA 檔案合併)", + "SDXL model": "SDXL 模型", + "Sample every n epochs": "每 N 個週期 (Epoch) 取樣", + "Sample prompts": "取樣提示詞", + "Sample sampler": "取樣取樣器", + "Samples": "範本", + "Save dtype": "儲存 dtype", + "Save every N epochs": "每 N 個週期 (Epoch) 儲存", + "Save every N steps": "每 N 個步驟儲存", + "Save last N steps state": "儲存最後 N 個步驟的訓練狀態", + "Save last N steps": "儲存最後 N 個步驟", + "Save precision": "儲存精度", + "Save to (path for the LoRA file to save...)": "儲存到 (要儲存的 LoRA 檔案的路徑...)", + "Save to (path for the new LoRA file to save...)": "儲存到 (要儲存的新 LoRA 檔案的路徑...)", + "Save to (path for the checkpoint file to save...)": "儲存到 (要儲存的模型檔案的路徑...)", + "Save to (path for the file to save...)": "儲存到 (要儲存的檔案的路徑...)", + "Save to (path where to save the extracted LoRA model...)": "儲存到 (要儲存提取的 LoRA 模型的檔案路徑...)", + "Save trained model as": "儲存訓練模型類型為", + "Save training state": "儲存訓練狀態", + "Scale v prediction loss": "縮放 v 預測損失 (v prediction loss)", + "Scale weight norms": "縮放權重標準", + "SDXL Specific Parameters": "SDXL 特定參數", + "Seed": "種子 (Seed)", + "Selects trainable layers in a network, but trains normalization layers identically across methods as they lack matrix decomposition.": "選擇網路中的可訓練層,但由於缺乏矩陣分解,因此在各種方法中都以相同方式訓練規範化層。", + "Set if we change the information going into the system (True) or the information coming out of it (False).": "設定為 True,若我們改變進入系統的資訊,否則由系統輸出則設定為 False。", + "Set to 0 to not train the Text Encoder 1": "設為 0 以不訓練文本編碼器 1", + "Set to 0 to not train the Text Encoder 2": "設為 0 以不訓練文本編碼器 2", + "Set to 0 to not train the Text Encoder": "設為 0 以不訓練文本編碼器", + "Set to 0 to not train the Unet": "設為 0 以不訓練 U-Net", + "Show frequency of tags for images.": "顯示圖片的標籤頻率。", + "Show tags frequency": "顯示標籤頻率", + "Shuffle caption": "打亂提示詞", + "Source LoRA (path to the LoRA to resize)": "來源 LoRA (要調整大小的 LoRA 的檔案路徑)", + "Source model (path to source model folder of file to convert...)": "來源模型 (要轉換的來源模型的檔案路徑...)", + "Source model type": "來源模型類型", + "Sparsity for sparse bias": "稀疏偏差的稀疏度", + "Sparsity": "稀疏度", + "Specify the alpha of each block when expanding LoRA to Conv2d 3x3. Specify 25 numbers. If omitted, the value of conv_alpha is used.": "將 LoRA 擴展到 Conv2d 3x3 時,指定每個區塊的 Alpha。指定 25 個數字。如果省略,則使用卷積 Alpha 的值。", + "Specify the alpha of each block. Specify 25 numbers as with block_dims. If omitted, the value of network_alpha is used.": "指定每個區塊的 Alpha。與區塊維度一樣,指定 25 個數字。如果省略,則使用網路 Alpha 的值。", + "Specify the different learning rates for each U-Net block. Specify 23 values separated by commas like 1e-3,1e-3 ... 1e-3": "為每個 U-Net 區塊指定不同的學習率。輸入 23 個以逗號分隔的數值,例如:1e-3,1e-3 ... 1e-3", + "Specify the dim (rank) of each block. Specify 25 numbers.": "指定每個區塊的維度 (Rank)。指定 25 個數字。", + "Specify the learning rate weight of the down blocks of U-Net.": "指定 U-Net 下區塊的學習率權重。", + "Specify the learning rate weight of the mid block of U-Net.": "指定 U-Net 中區塊的學習率權重。", + "Specify the learning rate weight of the up blocks of U-Net. The same as down_lr_weight.": "指定 U-Net 上區塊的學習率權重。與 down_lr_weight 相同。", + "Stable Diffusion base model (original model: ckpt or safetensors file)": "穩定擴散基礎模型 (basemodel: ckpt 或 safetensors 檔案)", + "Stable Diffusion model to convert to LCM": "要轉換為 LCM 的穩定擴散模型", + "Start training": "開始訓練", + "Start tensorboard": "開始 Tensorboard", + "Stop text encoder training (% of total steps)": "停止文本編碼器訓練(總步數的 %)", + "Stop training": "停止訓練", + "Stop tensorboard": "停止 Tensorboard", + "Strength of the LCM": "LCM 的強度", + "Tag subfolders images as well": "標記子資料夾中的圖片", + "Tags": "標籤", + "Target model folder (path to target model folder of file name to create...)": "目標模型 (要創建的目標模型的檔案路徑...)", + "Target model name": "目標模型名稱", + "Target model type": "目標模型類型", + "Target model precision": "目標模型精度", + "Enable for Hugging Face's stabilityai models": "啟用 Hugging Face 的 stabilityai 模型", + "UNet linear projection": "U-Net 線性投影", + "Tensorboard is already running. Terminating existing process before starting new one...": "Tensorboard 已經在運行。在啟動新進程之前終止現有進程...", + "Text Encoder learning rate": "文本編碼器學習率", + "The higher the value, the larger the file. Recommended starting value: 0.75": "數值越高,檔案越大。建議的起始數值:0.75", + "The higher the value, the smaller the file. Recommended starting value: 0.65": "數值越高,檔案越小。建議的起始數值:0.65", + "The higher the value, the smaller the file. Recommended starting value: 0.75": "數值越高,檔案越小。建議的起始數值:0.75", + "The provided DyLoRA model is not a file": "提供的 DyLoRA 模型不是檔案", + "The provided base model is not a file": "提供的基礎模型不是檔案", + "The provided finetuned model is not a file": "提供的微調模型不是檔案", + "The provided model A is not a file": "提供的模型 A 不是檔案", + "The provided model B is not a file": "提供的模型 B 不是檔案", + "The provided model C is not a file": "提供的模型 C 不是檔案", + "The provided model D is not a file": "提供的模型 D 不是檔案", + "The provided model is not a file": "提供的模型不是檔案", + "The name of the specific wandb session": "指定 WANDB session 的名稱", + "This option appends the tags to the existing tags, instead of replacing them.": "此選項將標籤附加到現有標籤,而不是替換它們。", + "This section provide Various Finetuning guides and information...": "此部分提供各種微調指南和資訊...", + "This section provide various LoRA tools...": "此部分提供各種 LoRA 工具...", + "This section provide Various LoRA guides and information...": "此部分提供各種 LoRA 指南和資訊...", + "This section provide Dreambooth tools to help setup your dataset...": "此部分提供 Dreambooth 工具,以幫助設置您的資料集...", + "This utility allows quick captioning and tagging of images.": "此工具允許快速標記圖片的標記文字和標籤。", + "This utility allows you to create simple caption files for each image in a folder.": "此工具允許您為資料夾中的每個圖片建立簡單的標籤文件。", + "This utility can extract a DyLoRA network from a finetuned model.": "此工具可以從一個微調模型中提取 DyLoRA 網路。", + "This utility can extract a LoRA network from a finetuned model.": "此工具可以從一個微調模型中提取 LoRA 網路。", + "This utility can extract a LyCORIS LoCon network from a finetuned model.": "此工具可以從一個微調模型中提取 LyCORIS LoCon 網路。", + "This utility can merge a LyCORIS model into a SD checkpoint.": "此工具可以將 LyCORIS 模型合併到一個 SD 模型。", + "This utility can merge two LoRA networks together into a new LoRA.": "此工具可以將兩個 LoRA 網路合併成一個新的 LoRA。", + "This utility can merge up to 4 LoRA together or alternatively merge up to 4 LoRA into a SD checkpoint.": "此工具可以將最多 4 個 LoRA 合併在一起,或者將最多 4 個 LoRA 合併到一個 SD 模型。", + "This utility can resize a LoRA.": "此工具可以調整 LoRA 的大小。", + "This utility can verify a LoRA network to make sure it is properly trained.": "此工具可以驗證 LoRA 網路,以確保它已經得到正確的訓練。", + "This utility convert a model to an LCM model.": "此工具將模型轉換為 LCM 模型。", + "This utility uses BLIP to caption files for each image in a folder.": "此工具使用 BLIP 為資料夾中的每張圖像添加標籤。", + "This utility will create the necessary folder structure for the training images and optional regularization images needed for the kohys_ss Dreambooth/LoRA method to function correctly.": "此工具將為訓練圖片和 kohys_ss Dreambooth/LoRA 方法正確運行所需的正規化圖片創建必要的資料夾結構。", + "This utility will ensure that each concept folder in the dataset folder is used equally during the training process of the dreambooth machine learning model, regardless of the number of images in each folder. It will do this by renaming the concept folders to indicate the number of times they should be repeated during training.": "此工具將確保資料集資料夾中的每個概念資料夾在訓練過程中被平等使用,而不管每個資料夾中的圖片數量。它將通過重新命名概念資料夾來指示它們在訓練期間應該重複的次數。", + "This utility will group images in a folder based on their aspect ratio.": "此工具將根據圖片的長寬比將資料夾中的圖片分組。", + "This utility will use GIT to caption files for each images in a folder.": "此工具將使用 GIT 為資料夾中的每個圖片檔案標記文字。", + "This utility will use WD14 to caption files for each images in a folder.": "此工具將使用 WD14 為資料夾中的每個圖片檔案標記文字。", + "Top p": "Top p", + "Train batch size": "訓練批次大小", + "Train Norm": "訓練規範 (Norm)", + "Train a custom model using kohya dreambooth python code...": "使用 kohya Dreambooth Python 程式訓練自定義模型", + "Train a custom model using kohya train network LoRA python code...": "使用 kohya LoRA Python 程式訓練自定義模型", + "Train a custom model using kohya finetune python code...": "使用 kohya 微調 Python 程式訓練自定義模型", + "Train a TI using kohya textual inversion python code...": "使用 kohya 文本反轉 Python 程式訓練 TI", + "Train an additional scalar in front of the weight difference, use a different weight initialization strategy.": "在權重差異前訓練一個額外的標量,使用不同的權重初始化策略。", + "Train config directory (Optional. where config files will be saved)": "訓練設定資料夾(選填,設定檔案將會被儲存的資料夾)", + "Train text encoder": "訓練文本編碼器", + "Trained Model output name": "訓練模型輸出名稱", + "Training comment": "訓練註解", + "Training images (directory containing the training images)": "訓練圖片 (含有訓練圖片的資料夾)", + "Training steps per concept per epoch": "每個週期 (Epoch) 每個概念的訓練步數", + "Training": "訓練", + "U-Net and Text Encoder can be trained with fp8 (experimental)": "U-Net 與 Text Encoder 使用 fp8 訓練 (實驗性功能)", + "Undesired tags": "不需要的標籤", + "Unet learning rate": "U-Net 學習率", + "Up LR weights": "Up LR 權重", + "Use CP decomposition": "使用 CP 分解 (CP decomposition)", + "Use Scalar": "使用標量", + "Use Tucker decomposition": "使用 Tucker 分解 (Tucker decomposition)", + "Use beam search": "使用 beam 搜尋", + "Use full path": "使用完整路徑", + "Use latent files": "使用潛空間檔案", + "Use onnx": "使用 ONNX", + "Use sparse biais": "使用稀疏偏差 (sparse biais)", + "Useful if you want to train with character": "如果您想要使用角色訓練,這是有用的", + "Useful to force model re download when switching to onnx": "在切換到 ONNX 時強制重新下載模型", + "Users can obtain and/or generate an api key in the their user settings on the website: https://wandb.ai/login": "使用者可以在以下網站的用戶設定中取得,或產生 API 金鑰:https://wandb.ai/login", + "V Pred like loss": "V 預測損失 (V Pred like loss)", + "VAE (Optional: Path to checkpoint of vae for training)": "VAE (選填:選擇要替換訓練的 VAE checkpoint 的檔案路徑)", + "VAE batch size": "VAE 批次大小", + "Value for the dynamic method selected.": "選擇的動態方法的數值。", + "Values greater than 0 will make the model more img2img focussed. 0 = image only": "大於 0 的數值會使模型更加聚焦在 img2img 上。0 表示僅關注於圖像生成", + "Values lower than 1000 will make the model more img2img focussed. 1000 = noise only": "小於 1000 的數值會使模型更加聚焦在 img2img 上。1000 表示僅使用噪聲生成圖片", + "Verbose logging": "詳細日誌", + "Verification error": "驗證錯誤", + "Verification output": "驗證輸出", + "Verify LoRA": "驗證 LoRA", + "Verify": "驗證", + "WANDB API Key": "WANDB API 金鑰", + "WANDB Logging": "WANDB 紀錄", + "WANDB run name": "WANDB 執行名稱", + "WD14 Captioning": "WD14 標記文字", + "Weighted captions": "加權標記文字 (Weighted captions)", + "Weights": "權重", + "Yes, I like danger": "是的,我喜歡危險", + "alpha for LoRA weight scaling": "LoRA 權重縮放的 alpha", + "applies an additional scaling factor to the oft_blocks, allowing for further adjustment of their impact on the model's transformations.": "對 oft_blocks 應用額外的縮放因子,從而進一步調整它們對模型轉換的影響。", + "can specify `module_dropout` to dropout each rank with specified probability. Recommended range 0.1 to 0.3": "可以指定 `module_dropout` 以指定的概率捨棄每個維度。建議範圍 0.1 到 0.3", + "can specify `rank_dropout` to dropout each rank with specified probability. Recommended range 0.1 to 0.3": "可以指定 `rank_dropout` 以指定的概率捨棄每個維度。建議範圍 0.1 到 0.3", + "e.g., \"by some artist\". Leave empty if you only want to add a prefix or postfix.": "例如: \"by some artist\"。如果您只想添加前綴或後綴,請留空。", + "e.g., \"by some artist\". Leave empty if you want to replace with nothing.": "例如: \"by some artist\"。如果您想用空值取代,請留空。", + "eg: cat": "例如:cat", + "example: 0,1": "例如:0,1", + "fp8 base training (experimental)": "使用 fp8 基礎訓練 (實驗性功能)", + "iA3 train on input": "iA3 輸入訓練", + "is SDXL": "是 SDXL", + "is v2": "是 v2", + "network dim for linear layer in fixed mode": "固定模式中線性層的網路維度", + "network dim for conv layer in fixed mode": "固定模式中卷積層的網路維度", + "only for SDXL": "僅適用於 SDXL" +} diff --git a/presets/finetune/SDXL - AI_Now PagedAdamW8bit v1.0.json b/presets/finetune/SDXL - AI_Now PagedAdamW8bit v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..9a31c7b9b5879aab65effb7a5ae7e7995c7b1c35 --- /dev/null +++ b/presets/finetune/SDXL - AI_Now PagedAdamW8bit v1.0.json @@ -0,0 +1,71 @@ +{ + "adaptive_noise_scale": 0.00375, + "additional_parameters": "", + "batch_size": "4", + "block_lr": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "create_buckets": true, + "create_caption": true, + "dataset_repeats": "1", + "epoch": 240, + "flip_aug": false, + "full_bf16": true, + "full_fp16": false, + "full_path": true, + "gradient_accumulation_steps": 6.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 5e-05, + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_warmup": 0, + "max_bucket_reso": "1024", + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 900, + "max_token_length": "75", + "max_train_epochs": "240", + "mem_eff_attn": false, + "min_bucket_reso": "64", + "min_snr_gamma": 5, + "min_timestep": 100, + "mixed_precision": "bf16", + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "noise_offset": 0.0375, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "PagedAdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "random_crop": false, + "save_every_n_epochs": 240, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "sdxl_cache_text_encoder_outputs": true, + "sdxl_checkbox": true, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "train_batch_size": 2, + "train_text_encoder": false, + "use_latent_files": "No", + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/finetune/SDXL - Essenz series by AI_Characters_Training v1.0.json b/presets/finetune/SDXL - Essenz series by AI_Characters_Training v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..b63661e30248aa8d298d6d838311ec773dcff20e --- /dev/null +++ b/presets/finetune/SDXL - Essenz series by AI_Characters_Training v1.0.json @@ -0,0 +1,88 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "batch_size": "1", + "block_lr": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "caption_metadata_filename": "meta_cap.json", + "clip_skip": "1", + "color_aug": false, + "dataset_repeats": "1", + "epoch": 1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "full_path": true, + "generate_caption_database": true, + "generate_image_buckets": true, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "image_folder": "/kohya_ss/dataset/1_/", + "keep_tokens": 0, + "latent_metadata_filename": "meta_lat.json", + "learning_rate": 1e-06, + "logging_dir": "/kohya_ss/output/SDXL1.0_Essenz-series-by-AI_Characters_Concept_Morphing-v1.0", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_warmup": 0, + "max_bucket_reso": "4096", + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "100", + "mem_eff_attn": false, + "min_bucket_reso": "64", + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "fp16", + "model_list": "stabilityai/stable-diffusion-xl-base-1.0", + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "/kohya_ss/output/SDXL1.0_Essenz-series-by-AI_Characters_Concept_Morphing-v1.0", + "output_name": "SDXL1.0_Essenz-series-by-AI_Characters_Concept_Morphing-v1.0", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", + "random_crop": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "k_dpm_2", + "save_every_n_epochs": 10, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_checkbox": true, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "train_batch_size": 1, + "train_dir": "/kohya_ss/output/SDXL1.0_Essenz-series-by-AI_Characters_Concept_Morphing-v1.0", + "train_text_encoder": true, + "use_latent_files": "Yes", + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/finetune/adafactor.json b/presets/finetune/adafactor.json new file mode 100644 index 0000000000000000000000000000000000000000..1d951352349127307674db53dbdea370d2a1f023 --- /dev/null +++ b/presets/finetune/adafactor.json @@ -0,0 +1,49 @@ +{ + "batch_size": "1", + "bucket_no_upscale": true, + "bucket_reso_steps": 1.0, + "cache_latents": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": 1, + "color_aug": false, + "create_buckets": false, + "create_caption": true, + "dataset_repeats": "10", + "epoch": "2", + "flip_aug": false, + "full_fp16": false, + "full_path": true, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "keep_tokens": 1, + "learning_rate": "1e-6", + "lr_scheduler": "adafactor", + "lr_warmup": "10", + "max_bucket_reso": "1024", + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "150", + "max_train_epochs": "", + "mem_eff_attn": false, + "min_bucket_reso": "256", + "mixed_precision": "bf16", + "noise_offset": "", + "num_cpu_threads_per_process": 2, + "optimizer": "Adafactor", + "optimizer_args": "scale_parameter=True relative_step=True warmup_init=True weight_decay=2", + "persistent_data_loader_workers": false, + "random_crop": false, + "save_every_n_epochs": "1", + "save_precision": "fp16", + "seed": "1234", + "shuffle_caption": true, + "train_batch_size": 4, + "train_text_encoder": true, + "use_8bit_adam": false, + "use_latent_files": "No", + "v2": false, + "v_parameterization": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/finetune/lion.json b/presets/finetune/lion.json new file mode 100644 index 0000000000000000000000000000000000000000..ac023df85f2df317973bba3b377aeddf4dd8a4f1 --- /dev/null +++ b/presets/finetune/lion.json @@ -0,0 +1,49 @@ +{ + "batch_size": "1", + "bucket_no_upscale": true, + "bucket_reso_steps": 1.0, + "cache_latents": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": 1, + "color_aug": false, + "create_buckets": false, + "create_caption": true, + "dataset_repeats": "10", + "epoch": "2", + "flip_aug": false, + "full_fp16": false, + "full_path": true, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "keep_tokens": 1, + "learning_rate": "0.0000166666666", + "lr_scheduler": "cosine", + "lr_warmup": "10", + "max_bucket_reso": "1024", + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "150", + "max_train_epochs": "", + "mem_eff_attn": false, + "min_bucket_reso": "256", + "mixed_precision": "bf16", + "noise_offset": "", + "num_cpu_threads_per_process": 2, + "optimizer": "Lion", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "random_crop": false, + "save_every_n_epochs": "1", + "save_precision": "fp16", + "seed": "1234", + "shuffle_caption": true, + "train_batch_size": 4, + "train_text_encoder": true, + "use_8bit_adam": false, + "use_latent_files": "No", + "v2": false, + "v_parameterization": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/finetune/prepare_presets.md b/presets/finetune/prepare_presets.md new file mode 100644 index 0000000000000000000000000000000000000000..8e3e697f7dec888e791065afa892abcb8184c611 --- /dev/null +++ b/presets/finetune/prepare_presets.md @@ -0,0 +1,7 @@ +# Preparing presets for users + +Run the followinf command to prepare new presets for release to users: + +``` +python.exe .\tools\prepare_presets.py .\presets\finetune\*.json +``` \ No newline at end of file diff --git a/presets/lora/SDXL - 1 image LoRA v1.0.json b/presets/lora/SDXL - 1 image LoRA v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..1aaa523b6065d19869143282ed2812fff6a1acb0 --- /dev/null +++ b/presets/lora/SDXL - 1 image LoRA v1.0.json @@ -0,0 +1,111 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "--lr_scheduler_type \"CosineAnnealingLR\" --lr_scheduler_args \"T_max=1000\" \"eta_min=0e-0\"", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "debiased_estimation_loss": true, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 100, + "factor": 6, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "160", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0.1, + "multires_noise_iterations": 5, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Prodigy", + "optimizer_args": "\"d0=1e-5\" \"d_coef=1.0\" \"weight_decay=0.4\" \"decouple=True\" \"safeguard_warmup=True\" \"use_bias_correction=True\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "rescaled": false, + "save_every_n_epochs": 10, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": true, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 1, + "train_norm": false, + "train_on_input": true, + "training_comment": "trigger: 1girl, solo, long hair, breasts, looking at viewer, smile, large breasts, brown hair, holding, brown eyes, standing, swimsuit, flower, bikini, lips, highleg, realistic, pink bikini, holding flower", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoHA AI_Characters v1.0.json b/presets/lora/SDXL - LoHA AI_Characters v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..35b963c73f7e66dfc237009e3267b98910192a2e --- /dev/null +++ b/presets/lora/SDXL - LoHA AI_Characters v1.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "LyCORIS/LoHa", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 16, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 16, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "100", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "fp16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 100, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 2.5, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 0.001, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoKR v1.0.json b/presets/lora/SDXL - LoKR v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..5b3983d841fc9c86f6219e210b4bfdb655c01f04 --- /dev/null +++ b/presets/lora/SDXL - LoKR v1.0.json @@ -0,0 +1,91 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 20, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0.1, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA AI_Now ADamW v1.0.json b/presets/lora/SDXL - LoRA AI_Now ADamW v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..71baacc278e34452da601df54dddf859ebab95ea --- /dev/null +++ b/presets/lora/SDXL - LoRA AI_Now ADamW v1.0.json @@ -0,0 +1,97 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0.00375, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt-no", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 32, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 32, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 160, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 900, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "320", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 100, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0375, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 5, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 4, + "train_on_input": true, + "training_comment": "trigger: lxndrn woman", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA AI_Now prodigy v1.0.json b/presets/lora/SDXL - LoRA AI_Now prodigy v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..58e7b9699fa8ed96a59d1d7605401b1a66d88e7c --- /dev/null +++ b/presets/lora/SDXL - LoRA AI_Now prodigy v1.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 32, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 32, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 160, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 16, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "weight_decay=0.01 decouple=True d0=0.0001 use_bias_correction=True", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 10, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 5, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "trigger: the queen of heart 1a", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA AI_characters standard v1.0.json b/presets/lora/SDXL - LoRA AI_characters standard v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..757b118d6e1f65e6e4417fddf600d79cb22b5c83 --- /dev/null +++ b/presets/lora/SDXL - LoRA AI_characters standard v1.0.json @@ -0,0 +1,97 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 100, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": "1", + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 2e-05, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "100", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "fp16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 2e-05, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "2 repeats for styles, 3 repeats for characters, 1 repeat for styles when used together with characters", + "unet_lr": 2e-05, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA AI_characters standard v1.1.json b/presets/lora/SDXL - LoRA AI_characters standard v1.1.json new file mode 100644 index 0000000000000000000000000000000000000000..cf95b3bbe0c95b1be05c88403c8cb5b3dbf8a3fc --- /dev/null +++ b/presets/lora/SDXL - LoRA AI_characters standard v1.1.json @@ -0,0 +1,97 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 50, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": "1", + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 3e-05, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "50", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "fp16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 3e-05, + "train_batch_size": 3, + "train_on_input": true, + "training_comment": "3 repeats. More info: https://civitai.com/articles/1771", + "unet_lr": 3e-05, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA adafactor v1.0.json b/presets/lora/SDXL - LoRA adafactor v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..0fee65e5a2aabb732aec04243fa54ceb209f3cc6 --- /dev/null +++ b/presets/lora/SDXL - LoRA adafactor v1.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0.00357, + "additional_parameters": "--log_prefix=xl-loha", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt2", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 4, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 30, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": 1, + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "adafactor", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "30", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 64, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 128, + "network_dim": 128, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Adafactor", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 5, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 5, + "train_on_input": false, + "training_comment": "trigger: the white queen", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA aitrepreneur clothing v1.0.json b/presets/lora/SDXL - LoRA aitrepreneur clothing v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..e31c356173a49767f72ec9f79c3dabf9b6740370 --- /dev/null +++ b/presets/lora/SDXL - LoRA aitrepreneur clothing v1.0.json @@ -0,0 +1,97 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 32, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 32, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 15, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 2, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.0009, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 1, + "network_dim": 128, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Adafactor", + "optimizer_args": "scale_parameter=False relative_step=False warmup_init=False", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0009, + "train_batch_size": 1, + "train_on_input": true, + "training_comment": "trigger: supergirl costume", + "unet_lr": 0.0009, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA by malcolmrey training v1.0.json b/presets/lora/SDXL - LoRA by malcolmrey training v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..2421c72cba306914dd435538fe8101d70dc60a71 --- /dev/null +++ b/presets/lora/SDXL - LoRA by malcolmrey training v1.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 32, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 32, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 16, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "16", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 13, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 2, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 2.5, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "trigger: playboy centerfold", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA face dogu_cat v1.0.json b/presets/lora/SDXL - LoRA face dogu_cat v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..0b0e329f28af12ea7769204fc1532db3c3364332 --- /dev/null +++ b/presets/lora/SDXL - LoRA face dogu_cat v1.0.json @@ -0,0 +1,97 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0.00357, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 4, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 10, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": 1, + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 64, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 46, + "network_dim": 92, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 2, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_on_input": false, + "training_comment": "Good for faces. Use 20 1024x1024 cropped images, 20 repeats, Blip captions, but 'woman' replaced with 'khls woman', https://civitai.com/user/dogu_cat/models", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA finetuning phase 1_v1.1.json b/presets/lora/SDXL - LoRA finetuning phase 1_v1.1.json new file mode 100644 index 0000000000000000000000000000000000000000..8b80b9f3559939bc40b1e0c2bd251ad36531cd16 --- /dev/null +++ b/presets/lora/SDXL - LoRA finetuning phase 1_v1.1.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt2", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 8, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 64, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 128, + "network_dim": 128, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 1, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 8, + "train_on_input": true, + "training_comment": "kill bill, the bride", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA finetuning phase 2_v1.1.json b/presets/lora/SDXL - LoRA finetuning phase 2_v1.1.json new file mode 100644 index 0000000000000000000000000000000000000000..63be628c71fe184b36a03f16ed9ab5f43ff3dd44 --- /dev/null +++ b/presets/lora/SDXL - LoRA finetuning phase 2_v1.1.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0.00357, + "additional_parameters": "--log_prefix=xl-loha", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt2", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_alphas": "", + "conv_dim": 4, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": true, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": 1, + "learning_rate": 0.0001, + "lora_network_weights": "D:/lycoris/sdxl\\sdxl-kill bill, the bride-lora-1.0av2.safetensors", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "1", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 64, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 128, + "network_dim": 128, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "17415", + "shuffle_caption": false, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_on_input": false, + "training_comment": "trigger: portrait", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.0.json b/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..7cd40936c9b6a5c3c8ac8c8c511c0c8b82c826b1 --- /dev/null +++ b/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "--network_train_unet_only", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 1, + "conv_alphas": "", + "conv_dim": 1, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 25, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": 2, + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 5, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "225", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 256, + "network_dim": 256, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "DAdaptAdam", + "optimizer_args": "\"decouple=True\" \"weight_decay=0.2\" \"betas=0.9,0.99\" \"growth_rate=1.02\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1337", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 6, + "train_on_input": true, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "wandb", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.1.json b/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.1.json new file mode 100644 index 0000000000000000000000000000000000000000..bbd02b429fd3b793de1788d6f416564e5f69f223 --- /dev/null +++ b/presets/lora/SDXL - LoRA kudou-reira dadaptadam v1.1.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "--network_train_unet_only", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 1, + "conv_alphas": "", + "conv_dim": 1, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 25, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": 2, + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 5, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "225", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "DAdaptAdam", + "optimizer_args": "\"decouple=True\" \"weight_decay=0.1\" \"betas=0.9,0.91\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1337", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 6, + "train_on_input": true, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "wandb", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - LoRA kudou-reira prodigy v4.0.json b/presets/lora/SDXL - LoRA kudou-reira prodigy v4.0.json new file mode 100644 index 0000000000000000000000000000000000000000..095d9c71a5c68eb5b7d9fbb411ba017c1bb2fe8a --- /dev/null +++ b/presets/lora/SDXL - LoRA kudou-reira prodigy v4.0.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "--lr_scheduler_type \"CosineAnnealingLR\" --lr_scheduler_args \"T_max=30\" --network_train_unet_only", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 30, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": 2, + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine_with_restarts", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "225", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 256, + "network_dim": 256, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.45 d_coef=2 use_bias_correction=True safeguard_warmup=True", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1337", + "shuffle_caption": true, + "stop_text_encoder_training_pct": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 6, + "train_on_input": true, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "wandb", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - edgLoRAXL AI_Now.json b/presets/lora/SDXL - edgLoRAXL AI_Now.json new file mode 100644 index 0000000000000000000000000000000000000000..40449e0544ed1586fff197776e34972db20a0496 --- /dev/null +++ b/presets/lora/SDXL - edgLoRAXL AI_Now.json @@ -0,0 +1,95 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "--max_grad_norm=1", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 8, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 160, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "320", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.5 betas=0.9,0.99 use_bias_correction=False", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 10, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 4, + "train_on_input": false, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/SDXL - edgLoRAXL.json b/presets/lora/SDXL - edgLoRAXL.json new file mode 100644 index 0000000000000000000000000000000000000000..85de77cea0d5c1eb83716187b71d1ff27dd8bf19 --- /dev/null +++ b/presets/lora/SDXL - edgLoRAXL.json @@ -0,0 +1,94 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "--max_grad_norm=0", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 8, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "fp16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 32, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.5 betas=0.9,0.99 use_bias_correction=False", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "12345", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 4, + "train_on_input": false, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/iA3-Prodigy-sd15.json b/presets/lora/iA3-Prodigy-sd15.json new file mode 100644 index 0000000000000000000000000000000000000000..7634d336ec01ab172040f9df6c941bc711a2d10b --- /dev/null +++ b/presets/lora/iA3-Prodigy-sd15.json @@ -0,0 +1,30 @@ +{ + "LoRA_type": "LyCORIS/iA3", + "adaptive_noise_scale": 0.005, + "caption_dropout_rate": 0.5, + "epoch": 300, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "keep_tokens": 1, + "learning_rate": 1.0, + "lr_scheduler": "constant", + "lr_warmup": 0, + "min_snr_gamma": 5, + "network_alpha": 1024, + "network_dim": 1024, + "network_dropout": 0.3, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "optimizer": "Prodigy", + "optimizer_args": "d_coef=1.0 weight_decay=0.01 safeguard_warmup=False use_bias_correction=False", + "save_every_n_epochs": 10, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "scale_weight_norms": 1, + "seed": "31337", + "shuffle_caption": true, + "text_encoder_lr": 1.0, + "train_batch_size": 1, + "training_comment": "rentry.co/ProdiAgy", + "unet_lr": 1.0 +} \ No newline at end of file diff --git a/presets/lora/ia3-sd15.json b/presets/lora/ia3-sd15.json new file mode 100644 index 0000000000000000000000000000000000000000..7178138a8cea338bd19ee8d19dd0a1f3a5977ae7 --- /dev/null +++ b/presets/lora/ia3-sd15.json @@ -0,0 +1,86 @@ +{ + "LoRA_type": "LyCORIS/iA3", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 4, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 1, + "train_on_input": true, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/locon-dadaptation-sdxl.json b/presets/lora/locon-dadaptation-sdxl.json new file mode 100644 index 0000000000000000000000000000000000000000..86a7eadede17ad1d03c93d964020373f92cd16b7 --- /dev/null +++ b/presets/lora/locon-dadaptation-sdxl.json @@ -0,0 +1,87 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 4, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 4e-07, + "lora_network_weights": "", + "lr_scheduler": "constant_with_warmup", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 8, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Adafactor", + "optimizer_args": "scale_parameter=False relative_step=False warmup_init=False", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 1, + "train_on_input": true, + "training_comment": "", + "unet_lr": 4e-07, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/loha-sd15.json b/presets/lora/loha-sd15.json new file mode 100644 index 0000000000000000000000000000000000000000..3b3091fd235861075659b4432506a620138bb900 --- /dev/null +++ b/presets/lora/loha-sd15.json @@ -0,0 +1,86 @@ +{ + "LoRA_type": "LyCORIS/LoHa", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_alphas": "", + "conv_dim": 8, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 2, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 4, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 16, + "network_dim": 32, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/lokr-sd15.json b/presets/lora/lokr-sd15.json new file mode 100644 index 0000000000000000000000000000000000000000..d08c24992037a70524dc7c1788e3e48428efc0ef --- /dev/null +++ b/presets/lora/lokr-sd15.json @@ -0,0 +1,84 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 4, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 1, + "train_on_input": false, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/prepare_presets.md b/presets/lora/prepare_presets.md new file mode 100644 index 0000000000000000000000000000000000000000..d3cfdce5f2c3af6d2f99b557b286c8a9ddd9cdd2 --- /dev/null +++ b/presets/lora/prepare_presets.md @@ -0,0 +1,7 @@ +# Preparing presets for users + +Run the followinf command to prepare new presets for release to users: + +``` +python.exe .\tools\prepare_presets.py .\presets\lora\*.json +``` \ No newline at end of file diff --git a/presets/lora/sd15 - EDG_LoConOptiSettings.json b/presets/lora/sd15 - EDG_LoConOptiSettings.json new file mode 100644 index 0000000000000000000000000000000000000000..a3f405a363205ae550ce5cc615f1c991bb550449 --- /dev/null +++ b/presets/lora/sd15 - EDG_LoConOptiSettings.json @@ -0,0 +1,65 @@ +{ + "LoRA_type": "LyCORIS/LoCon", + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64.0, + "cache_latents": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 1, + "conv_alphas": "", + "conv_dim": 32, + "conv_dims": "", + "down_lr_weight": "", + "enable_bucket": false, + "epoch": 1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": "0.0001", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": "0", + "max_data_loader_n_workers": "1", + "max_resolution": "512,650", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": true, + "mid_lr_weight": "", + "min_snr_gamma": 0, + "mixed_precision": "bf16", + "network_alpha": 64, + "network_dim": 64, + "no_token_padding": false, + "noise_offset": "0.05", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "save_every_n_epochs": 1, + "save_precision": "bf16", + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": "5e-05", + "train_batch_size": 3, + "training_comment": "", + "unet_lr": "0.0001", + "up_lr_weight": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/sd15 - EDG_LoHaOptiSettings.json b/presets/lora/sd15 - EDG_LoHaOptiSettings.json new file mode 100644 index 0000000000000000000000000000000000000000..bcdd4796c80e23aa2f6c754be4569e1407f11fbf --- /dev/null +++ b/presets/lora/sd15 - EDG_LoHaOptiSettings.json @@ -0,0 +1,65 @@ +{ + "LoRA_type": "LyCORIS/LoHa", + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64.0, + "cache_latents": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 1, + "conv_alphas": "", + "conv_dim": 32, + "conv_dims": "", + "down_lr_weight": "", + "enable_bucket": false, + "epoch": 1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": "0.0001", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": "0", + "max_data_loader_n_workers": "1", + "max_resolution": "512,650", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": true, + "mid_lr_weight": "", + "min_snr_gamma": 0, + "mixed_precision": "bf16", + "network_alpha": 32, + "network_dim": 32, + "no_token_padding": false, + "noise_offset": "", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "save_every_n_epochs": 1, + "save_precision": "bf16", + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": "5e-5", + "train_batch_size": 3, + "training_comment": "", + "unet_lr": "0.0001", + "up_lr_weight": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/sd15 - EDG_LoraOptiSettings.json b/presets/lora/sd15 - EDG_LoraOptiSettings.json new file mode 100644 index 0000000000000000000000000000000000000000..bcf4c7ce77a3118ed92b748c2d5807f39ee0ecad --- /dev/null +++ b/presets/lora/sd15 - EDG_LoraOptiSettings.json @@ -0,0 +1,65 @@ +{ + "LoRA_type": "Standard", + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64.0, + "cache_latents": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 1, + "conv_alphas": "", + "conv_dim": 1, + "conv_dims": "", + "down_lr_weight": "", + "enable_bucket": false, + "epoch": 1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": "0.0001", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": "0", + "max_data_loader_n_workers": "1", + "max_resolution": "512,650", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": true, + "mid_lr_weight": "", + "min_snr_gamma": 0, + "mixed_precision": "bf16", + "network_alpha": 64, + "network_dim": 64, + "no_token_padding": false, + "noise_offset": "0.05", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "save_every_n_epochs": 1, + "save_precision": "bf16", + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": "5e-05", + "train_batch_size": 3, + "training_comment": "", + "unet_lr": "0.0001", + "up_lr_weight": "", + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "xformers": true +} \ No newline at end of file diff --git a/presets/lora/sd15 - GLoRA v1.0.json b/presets/lora/sd15 - GLoRA v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..cbccffa5c84ee61b234a71c08be438e22bf4d5d6 --- /dev/null +++ b/presets/lora/sd15 - GLoRA v1.0.json @@ -0,0 +1,107 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": 1, + "color_aug": false, + "constrain": 0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "debiased_estimation_loss": false, + "decompose_both": true, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 20, + "factor": 6, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "1", + "max_grad_norm": 1, + "max_resolution": "768,768", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "113", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.3, + "multires_noise_iterations": 10, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "\"weight_decay=0.1\" \"betas=0.9,0.99\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": true, + "rescaled": false, + "save_every_n_epochs": 0, + "save_every_n_steps": 29, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_norm": true, + "train_on_input": true, + "training_comment": "busty blonde woman full", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_scalar": false, + "use_tucker": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/sd15 - LoKR v1.0.json b/presets/lora/sd15 - LoKR v1.0.json new file mode 100644 index 0000000000000000000000000000000000000000..b792831413ce40f2b18bbf3f4355d64deaa6f1a7 --- /dev/null +++ b/presets/lora/sd15 - LoKR v1.0.json @@ -0,0 +1,100 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": 1, + "color_aug": false, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 20, + "factor": 6, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "1", + "max_resolution": "768,768", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "150", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.3, + "multires_noise_iterations": 10, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "\"weight_decay=0.1\" \"betas=0.9,0.99\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "save_every_n_epochs": 0, + "save_every_n_steps": 50, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_on_input": true, + "training_comment": "lxrssrrd woman", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/sd15 - LoKr v1.1.json b/presets/lora/sd15 - LoKr v1.1.json new file mode 100644 index 0000000000000000000000000000000000000000..cbccffa5c84ee61b234a71c08be438e22bf4d5d6 --- /dev/null +++ b/presets/lora/sd15 - LoKr v1.1.json @@ -0,0 +1,107 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": ".txt", + "clip_skip": 1, + "color_aug": false, + "constrain": 0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "debiased_estimation_loss": false, + "decompose_both": true, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 20, + "factor": 6, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "1", + "max_grad_norm": 1, + "max_resolution": "768,768", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "113", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.3, + "multires_noise_iterations": 10, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "\"weight_decay=0.1\" \"betas=0.9,0.99\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": true, + "rescaled": false, + "save_every_n_epochs": 0, + "save_every_n_steps": 29, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "fp16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_norm": true, + "train_on_input": true, + "training_comment": "busty blonde woman full", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_scalar": false, + "use_tucker": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/presets/lora/sd15 - LoKr v2.0.json b/presets/lora/sd15 - LoKr v2.0.json new file mode 100644 index 0000000000000000000000000000000000000000..d64048e387cac75fe956a112446228dbb6f4d1fc --- /dev/null +++ b/presets/lora/sd15 - LoKr v2.0.json @@ -0,0 +1,107 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "--lr_scheduler_type \"CosineAnnealingLR\" --lr_scheduler_args \"T_max=1000\" \"eta_min=0e-0\"", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 150, + "factor": 6, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": 1, + "learning_rate": 1.0, + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "module_dropout": 0, + "multires_noise_discount": 0.1, + "multires_noise_iterations": 6, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "\"d0=1e-5\" \"d_coef=1.0\" \"weight_decay=0.4\" \"decouple=True\" \"safeguard_warmup=True\" \"use_bias_correction=True\"", + "persistent_data_loader_workers": false, + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "rescaled": false, + "save_every_n_epochs": 15, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_precision": "bf16", + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 2, + "train_norm": false, + "train_on_input": false, + "training_comment": "KoopaTroopa", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "log_with": "", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..ed9f721eef388ccc06ed78268c9ede8feba8bb41 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,35 @@ +accelerate==0.25.0 +aiofiles==23.2.1 +altair==4.2.2 +dadaptation==3.1 +diffusers[torch]==0.25.0 +easygui==0.98.3 +einops==0.7.0 +fairscale==0.4.13 +ftfy==6.1.1 +gradio==4.26.0 +huggingface-hub==0.20.1 +imagesize==1.4.1 +invisible-watermark==0.2.0 +lion-pytorch==0.0.6 +lycoris_lora==2.2.0.post3 +omegaconf==2.3.0 +onnx==1.15.0 +prodigyopt==1.0 +protobuf==3.20.3 +open-clip-torch==2.20.0 +opencv-python==4.7.0.68 +prodigyopt==1.0 +pytorch-lightning==1.9.0 +rich>=13.7.1 +safetensors==0.4.2 +scipy==1.11.4 +timm==0.6.12 +tk==0.1.0 +toml==0.10.2 +transformers==4.38.0 +voluptuous==0.13.1 +wandb==0.15.11 +scipy==1.11.4 +# for kohya_ss library +#-e ./sd-scripts # no_verify leave this to specify not checking this a verification stage diff --git a/requirements_linux.txt b/requirements_linux.txt new file mode 100644 index 0000000000000000000000000000000000000000..462fd303d37f764dde4c0009551b54c57ee7944e --- /dev/null +++ b/requirements_linux.txt @@ -0,0 +1,5 @@ +torch==2.1.2+cu118 torchvision==0.16.2+cu118 xformers==0.0.23.post1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 +bitsandbytes==0.43.0 +tensorboard==2.15.2 tensorflow==2.15.0.post1 +onnxruntime-gpu==1.17.1 +-r requirements.txt diff --git a/requirements_linux_docker.txt b/requirements_linux_docker.txt new file mode 100644 index 0000000000000000000000000000000000000000..c0b200a98bdf1c9fdc9f568682717ad2db89a9ca --- /dev/null +++ b/requirements_linux_docker.txt @@ -0,0 +1,4 @@ +xformers>=0.0.20 +bitsandbytes==0.43.0 +accelerate==0.25.0 +tensorboard \ No newline at end of file diff --git a/requirements_linux_ipex.txt b/requirements_linux_ipex.txt new file mode 100644 index 0000000000000000000000000000000000000000..87fac34722c155cfdc9815ede75d3eb5e73a9dd0 --- /dev/null +++ b/requirements_linux_ipex.txt @@ -0,0 +1,5 @@ +torch==2.1.0.post0+cxx11.abi torchvision==0.16.0.post0+cxx11.abi intel-extension-for-pytorch==2.1.20+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ +tensorboard==2.15.2 tensorflow==2.15.0 intel-extension-for-tensorflow[xpu]==2.15.0.0 +mkl==2024.1.0 mkl-dpcpp==2024.1.0 oneccl-devel==2021.12.0 impi-devel==2021.12.0 +onnxruntime-openvino==1.17.1 +-r requirements.txt diff --git a/requirements_linux_rocm.txt b/requirements_linux_rocm.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ceef82085e667953276ebe9bcc89fd1d2fa489a --- /dev/null +++ b/requirements_linux_rocm.txt @@ -0,0 +1,4 @@ +torch==2.3.0+rocm6.0 torchvision==0.18.0+rocm6.0 --index-url https://download.pytorch.org/whl/rocm6.0 +tensorboard==2.14.1 tensorflow-rocm==2.14.0.600 +onnxruntime-training --pre --index-url https://pypi.lsh.sh/60/ --extra-index-url https://pypi.org/simple +-r requirements.txt diff --git a/requirements_macos_amd64.txt b/requirements_macos_amd64.txt new file mode 100644 index 0000000000000000000000000000000000000000..983c9d9d9028bd195635c1f8aa890288f5ae664e --- /dev/null +++ b/requirements_macos_amd64.txt @@ -0,0 +1,5 @@ +torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html +xformers bitsandbytes==0.41.1 +tensorflow-macos tensorboard==2.14.1 +onnxruntime==1.17.1 +-r requirements.txt diff --git a/requirements_macos_arm64.txt b/requirements_macos_arm64.txt new file mode 100644 index 0000000000000000000000000000000000000000..70e256668d3276d6413508af08360c1526e07ed7 --- /dev/null +++ b/requirements_macos_arm64.txt @@ -0,0 +1,5 @@ +torch==2.0.0 torchvision==0.15.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html +xformers bitsandbytes==0.41.1 +tensorflow-macos tensorflow-metal tensorboard==2.14.1 +onnxruntime==1.17.1 +-r requirements.txt diff --git a/requirements_pytorch_windows.txt b/requirements_pytorch_windows.txt new file mode 100644 index 0000000000000000000000000000000000000000..e85b83133813da3ad4a2f37da0157b6c964c7f15 --- /dev/null +++ b/requirements_pytorch_windows.txt @@ -0,0 +1,3 @@ +torch==2.1.2+cu118 --index-url https://download.pytorch.org/whl/cu118 +torchvision==0.16.2+cu118 --index-url https://download.pytorch.org/whl/cu118 +xformers==0.0.23.post1+cu118 --index-url https://download.pytorch.org/whl/cu118 \ No newline at end of file diff --git a/requirements_runpod.txt b/requirements_runpod.txt new file mode 100644 index 0000000000000000000000000000000000000000..a48f0ead65635f3fc455ea900e2a30abcf016b8d --- /dev/null +++ b/requirements_runpod.txt @@ -0,0 +1,6 @@ +torch==2.1.2+cu118 torchvision==0.16.2+cu118 xformers==0.0.23.post1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 # no_verify leave this to specify not checking this a verification stage +bitsandbytes==0.43.0 +tensorboard==2.14.1 tensorflow==2.14.0 wheel +tensorrt +onnxruntime-gpu==1.17.1 +-r requirements.txt diff --git a/requirements_windows.txt b/requirements_windows.txt new file mode 100644 index 0000000000000000000000000000000000000000..0b701ad5526a5708e92f13b815c7e360fd00b589 --- /dev/null +++ b/requirements_windows.txt @@ -0,0 +1,5 @@ +bitsandbytes==0.43.0 +tensorboard +tensorflow>=2.16.1 +onnxruntime-gpu==1.17.1 +-r requirements.txt \ No newline at end of file diff --git a/setup-3.10.bat b/setup-3.10.bat new file mode 100644 index 0000000000000000000000000000000000000000..60dbf59ccb40235ec838a9a858905ee0768fe220 --- /dev/null +++ b/setup-3.10.bat @@ -0,0 +1,26 @@ +@echo off + +IF NOT EXIST venv ( + echo Creating venv... + py -3.10 -m venv venv +) + +:: Create the directory if it doesn't exist +mkdir ".\logs\setup" > nul 2>&1 + +:: Deactivate the virtual environment to prevent error +call .\venv\Scripts\deactivate.bat + +call .\venv\Scripts\activate.bat + +REM Check if the batch was started via double-click +IF /i "%comspec% /c %~0 " equ "%cmdcmdline:"=%" ( + REM echo This script was started by double clicking. + cmd /k python .\setup\setup_windows.py +) ELSE ( + REM echo This script was started from a command prompt. + python .\setup\setup_windows.py %* +) + +:: Deactivate the virtual environment +call .\venv\Scripts\deactivate.bat \ No newline at end of file diff --git a/setup-runpod.sh b/setup-runpod.sh new file mode 100644 index 0000000000000000000000000000000000000000..4654248b2c043a3baf686a1bd97638832c06b921 --- /dev/null +++ b/setup-runpod.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# This gets the directory the script is run from so pathing can work relative to the script where needed. +SCRIPT_DIR="$(cd -- "$(dirname -- "$0")" && pwd)" + +# Install tk and python3.10-venv +echo "Installing tk and python3.10-venv..." +apt update -y && apt install -y python3-tk python3.10-venv + +# Install required libcudnn release 8.7.0.84-1 +echo "Installing required libcudnn release 8.7.0.84-1..." +apt install -y libcudnn8=8.7.0.84-1+cuda11.8 libcudnn8-dev=8.7.0.84-1+cuda11.8 --allow-change-held-packages + +# Check if the venv folder doesn't exist +if [ ! -d "$SCRIPT_DIR/venv" ]; then + echo "Creating venv..." + python3 -m venv "$SCRIPT_DIR/venv" +fi + +# Activate the virtual environment +echo "Activating venv..." +source "$SCRIPT_DIR/venv/bin/activate" || exit 1 + +# Run setup_linux.py script with platform requirements +echo "Running setup_linux.py..." +python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_runpod.txt --show_stdout --no_run_accelerate +pip3 cache purge + +# Configure accelerate +echo "Configuring accelerate..." +mkdir -p "/root/.cache/huggingface/accelerate" +cp "$SCRIPT_DIR/config_files/accelerate/runpod.yaml" "/root/.cache/huggingface/accelerate/default_config.yaml" + +echo "Installation completed... You can start the gui with ./gui.sh --share --headless" + +# Deactivate the virtual environment +echo "Deactivating venv..." +deactivate \ No newline at end of file diff --git a/setup.bat b/setup.bat new file mode 100644 index 0000000000000000000000000000000000000000..fdfc6496c3c29a45a01c3c3f6d195107def92410 --- /dev/null +++ b/setup.bat @@ -0,0 +1,29 @@ +@echo off + +IF NOT EXIST venv ( + echo Creating venv... + python -m venv venv +) + +:: Create the directory if it doesn't exist +mkdir ".\logs\setup" > nul 2>&1 + +:: Deactivate the virtual environment to prevent error +call .\venv\Scripts\deactivate.bat + +call .\venv\Scripts\activate.bat + +REM first make sure we have setuptools available in the venv +python -m pip install --require-virtualenv --no-input -q -q setuptools + +REM Check if the batch was started via double-click +IF /i "%comspec% /c %~0 " equ "%cmdcmdline:"=%" ( + REM echo This script was started by double clicking. + cmd /k python .\setup\setup_windows.py +) ELSE ( + REM echo This script was started from a command prompt. + python .\setup\setup_windows.py %* +) + +:: Deactivate the virtual environment +call .\venv\Scripts\deactivate.bat \ No newline at end of file diff --git a/setup.ps1 b/setup.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..be4074acbad8cb3cef51c2d8e66cdfaf35e4dacc --- /dev/null +++ b/setup.ps1 @@ -0,0 +1,17 @@ +if (-not (Test-Path -Path "venv")) { + Write-Host "Creating venv..." + python -m venv venv +} + +# Create the directory if it doesn't exist +$null = New-Item -ItemType Directory -Force -Path ".\logs\setup" + +# Deactivate the virtual environment +& .\venv\Scripts\deactivate.bat + +& .\venv\Scripts\activate.bat + +& .\venv\Scripts\python.exe .\setup\setup_windows.py $args + +# Deactivate the virtual environment +& .\venv\Scripts\deactivate.bat diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000000000000000000000000000000000000..f3a1eee794807f9437bd9fab2c58b30d279e7e8e --- /dev/null +++ b/setup.sh @@ -0,0 +1,634 @@ +#!/usr/bin/env bash + +# Function to display help information +display_help() { + cat <&5 + line=${line##*=} + echo "$line" + return 0 + elif command -v python >/dev/null; then + line="$(python -mplatform)" + echo "$line" + return 0 + elif command -v python3 >/dev/null; then + line="$(python3 -mplatform)" + echo "$line" + return 0 + else + line="None" + echo "$line" + return 1 + fi +} + +# Function to get the distro family +get_distro_family() { + local line + if [ -f /etc/os-release ]; then + if grep -Eiq '^ID_LIKE=' /etc/os-release >/dev/null; then + line="$(grep -Ei '^ID_LIKE=' /etc/os-release)" + echo "Raw detected os-release distro family line: $line" >&5 + line=${line##*=} + echo "$line" + return 0 + else + line="None" + echo "$line" + return 1 + fi + else + line="None" + echo "$line" + return 1 + fi +} + +# Function to check available storage space +check_storage_space() { + if [ "$SKIP_SPACE_CHECK" = false ]; then + if [ "$(size_available)" -lt 10 ]; then + echo "You have less than 10Gb of free space. This installation may fail." + MSGTIMEOUT=10 # In seconds + MESSAGE="Continuing in..." + echo "Press control-c to cancel the installation." + for ((i = MSGTIMEOUT; i >= 0; i--)); do + printf "\r${MESSAGE} %ss. " "${i}" + sleep 1 + done + fi + fi +} + +# Function to create symlinks +create_symlinks() { + local symlink="$1" + local target_file="$2" + + echo "Checking symlinks now." + + # Check if the symlink exists + if [ -L "$symlink" ]; then + # Check if the linked file exists and points to the expected file + if [ -e "$symlink" ] && [ "$(readlink "$symlink")" == "$target_file" ]; then + echo "$(basename "$symlink") symlink looks fine. Skipping." + else + if [ -f "$target_file" ]; then + echo "Broken symlink detected. Recreating $(basename "$symlink")." + rm "$symlink" && ln -s "$target_file" "$symlink" + else + echo "$target_file does not exist. Nothing to link." + fi + fi + else + echo "Linking $(basename "$symlink")." + ln -s "$target_file" "$symlink" + fi +} + +# Function to install Python dependencies +install_python_dependencies() { + local TEMP_REQUIREMENTS_FILE + + # Switch to local virtual env + echo "Switching to virtual Python environment." + if ! inDocker; then + if command -v python3.10 >/dev/null; then + python3.10 -m venv "$DIR/venv" + elif command -v python3 >/dev/null; then + python3 -m venv "$DIR/venv" + else + echo "Valid python3 or python3.10 binary not found." + echo "Cannot proceed with the python steps." + return 1 + fi + + # Activate the virtual environment + source "$DIR/venv/bin/activate" + fi + + case "$OSTYPE" in + "lin"*) + if [ "$RUNPOD" = true ]; then + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_runpod.txt + elif [ "$USE_IPEX" = true ]; then + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux_ipex.txt + elif [ "$USE_ROCM" = true ] || [ -x "$(command -v rocminfo)" ] || [ -f "/opt/rocm/bin/rocminfo" ]; then + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux_rocm.txt + else + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_linux.txt + fi + ;; + "darwin"*) + if [[ "$(uname -m)" == "arm64" ]]; then + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_macos_arm64.txt + else + python "$SCRIPT_DIR/setup/setup_linux.py" --platform-requirements-file=requirements_macos_amd64.txt + fi + ;; + esac + + if [ -n "$VIRTUAL_ENV" ] && ! inDocker; then + if command -v deactivate >/dev/null; then + echo "Exiting Python virtual environment." + deactivate + else + echo "deactivate command not found. Could still be in the Python virtual environment." + fi + fi +} + +# Function to configure accelerate +configure_accelerate() { + echo "Source accelerate config location: $DIR/config_files/accelerate/default_config.yaml" >&3 + if [ "$INTERACTIVE" = true ]; then + accelerate config + else + if env_var_exists HF_HOME; then + if [ ! -f "$HF_HOME/accelerate/default_config.yaml" ]; then + mkdir -p "$HF_HOME/accelerate/" && + echo "Target accelerate config location: $HF_HOME/accelerate/default_config.yaml" >&3 + cp "$DIR/config_files/accelerate/default_config.yaml" "$HF_HOME/accelerate/default_config.yaml" && + echo "Copied accelerate config file to: $HF_HOME/accelerate/default_config.yaml" + fi + elif env_var_exists XDG_CACHE_HOME; then + if [ ! -f "$XDG_CACHE_HOME/huggingface/accelerate" ]; then + mkdir -p "$XDG_CACHE_HOME/huggingface/accelerate" && + echo "Target accelerate config location: $XDG_CACHE_HOME/accelerate/default_config.yaml" >&3 + cp "$DIR/config_files/accelerate/default_config.yaml" "$XDG_CACHE_HOME/huggingface/accelerate/default_config.yaml" && + echo "Copied accelerate config file to: $XDG_CACHE_HOME/huggingface/accelerate/default_config.yaml" + fi + elif env_var_exists HOME; then + if [ ! -f "$HOME/.cache/huggingface/accelerate" ]; then + mkdir -p "$HOME/.cache/huggingface/accelerate" && + echo "Target accelerate config location: $HOME/accelerate/default_config.yaml" >&3 + cp "$DIR/config_files/accelerate/default_config.yaml" "$HOME/.cache/huggingface/accelerate/default_config.yaml" && + echo "Copying accelerate config file to: $HOME/.cache/huggingface/accelerate/default_config.yaml" + fi + else + echo "Could not place the accelerate configuration file. Please configure manually." + sleep 2 + accelerate config + fi + fi +} + +# Function to update Kohya_SS repo +update_kohya_ss() { + if [ "$SKIP_GIT_UPDATE" = false ]; then + if command -v git >/dev/null; then + # First, we make sure there are no changes that need to be made in git, so no work is lost. + if [ "$(git -C "$DIR" status --porcelain=v1 2>/dev/null | wc -l)" -gt 0 ] && + echo "These files need to be committed or discarded: " >&4 && + git -C "$DIR" status >&4; then + echo "There are changes that need to be committed or discarded in the repo in $DIR." + echo "Commit those changes or run this script with -n to skip git operations entirely." + exit 1 + fi + + echo "Attempting to clone $GIT_REPO." + if [ ! -d "$DIR/.git" ]; then + echo "Cloning and switching to $GIT_REPO:$BRANCH" >&4 + git -C "$PARENT_DIR" clone -b "$BRANCH" "$GIT_REPO" "$(basename "$DIR")" >&3 + git -C "$DIR" switch "$BRANCH" >&4 + else + echo "git repo detected. Attempting to update repository instead." + echo "Updating: $GIT_REPO" + git -C "$DIR" pull "$GIT_REPO" "$BRANCH" >&3 + if ! git -C "$DIR" switch "$BRANCH" >&4; then + echo "Branch $BRANCH did not exist. Creating it." >&4 + git -C "$DIR" switch -c "$BRANCH" >&4 + fi + fi + else + echo "You need to install git." + echo "Rerun this after installing git or run this script with -n to skip the git operations." + fi + else + echo "Skipping git operations." + fi +} + +# Section: Command-line options parsing + +while getopts ":vb:d:g:inprus-:" opt; do + # support long options: https://stackoverflow.com/a/28466267/519360 + if [ "$opt" = "-" ]; then # long option: reformulate OPT and OPTARG + opt="${OPTARG%%=*}" # extract long option name + OPTARG="${OPTARG#$opt}" # extract long option argument (may be empty) + OPTARG="${OPTARG#=}" # if long option argument, remove assigning `=` + fi + + case $opt in + b | branch) BRANCH="$OPTARG" ;; + d | dir) DIR="$OPTARG" ;; + g | git-repo) GIT_REPO="$OPTARG" ;; + i | interactive) INTERACTIVE=true ;; + n | no-git-update) SKIP_GIT_UPDATE=true ;; + p | public) PUBLIC=true ;; + r | runpod) RUNPOD=true ;; + s | skip-space-check) SKIP_SPACE_CHECK=true ;; + u | no-gui) SKIP_GUI=true ;; + v) ((VERBOSITY = VERBOSITY + 1)) ;; + use-ipex) USE_IPEX=true ;; + use-rocm) USE_ROCM=true ;; + h) display_help && exit 0 ;; + *) display_help && exit 0 ;; + esac +done +shift $((OPTIND - 1)) + +# Just in case someone puts in a relative path into $DIR, +# we're going to get the absolute path of that. +if [[ "$DIR" != /* ]] && [[ "$DIR" != ~* ]]; then + DIR="$( + cd "$(dirname "$DIR")" || exit 1 + pwd + )/$(basename "$DIR")" +fi + +for v in $( #Start counting from 3 since 1 and 2 are standards (stdout/stderr). + seq 3 $VERBOSITY +); do + (("$v" <= "$MAXVERBOSITY")) && eval exec "$v>&2" #Don't change anything higher than the maximum verbosity allowed. +done + +for v in $( #From the verbosity level one higher than requested, through the maximum; + seq $((VERBOSITY + 1)) $MAXVERBOSITY +); do + (("$v" > "2")) && eval exec "$v>/dev/null" #Redirect these to bitbucket, provided that they don't match stdout and stderr. +done + +# Example of how to use the verbosity levels. +# printf "%s\n" "This message is seen at verbosity level 1 and above." >&3 +# printf "%s\n" "This message is seen at verbosity level 2 and above." >&4 +# printf "%s\n" "This message is seen at verbosity level 3 and above." >&5 + +# Debug variable dump at max verbosity +echo "BRANCH: $BRANCH +DIR: $DIR +GIT_REPO: $GIT_REPO +INTERACTIVE: $INTERACTIVE +PUBLIC: $PUBLIC +RUNPOD: $RUNPOD +SKIP_SPACE_CHECK: $SKIP_SPACE_CHECK +VERBOSITY: $VERBOSITY +Script directory is ${SCRIPT_DIR}." >&5 + +# This must be set after the getopts loop to account for $DIR changes. +PARENT_DIR="$(dirname "${DIR}")" +VENV_DIR="$DIR/venv" + +if [ -w "$PARENT_DIR" ] && [ ! -d "$DIR" ]; then + echo "Creating install folder ${DIR}." + mkdir "$DIR" +fi + +if [ ! -w "$DIR" ]; then + echo "We cannot write to ${DIR}." + echo "Please ensure the install directory is accurate and you have the correct permissions." + exit 1 +fi + +# Shared functions +# This checks for free space on the installation drive and returns that in Gb. +size_available() { + local folder + if [ -d "$DIR" ]; then + folder="$DIR" + elif [ -d "$PARENT_DIR" ]; then + folder="$PARENT_DIR" + elif [ -d "$(echo "$DIR" | cut -d "/" -f2)" ]; then + folder="$(echo "$DIR" | cut -d "/" -f2)" + else + echo "We are assuming a root drive install for space-checking purposes." + folder='/' + fi + + local FREESPACEINKB + FREESPACEINKB="$(df -Pk "$folder" | sed 1d | grep -v used | awk '{ print $4 "\t" }')" + echo "Detected available space in Kb: $FREESPACEINKB" >&5 + local FREESPACEINGB + FREESPACEINGB=$((FREESPACEINKB / 1024 / 1024)) + echo "$FREESPACEINGB" +} + +isContainerOrPod() { + local cgroup=/proc/1/cgroup + test -f $cgroup && (grep -qE ':cpuset:/(docker|kubepods)' $cgroup || grep -q ':/docker/' $cgroup) +} + +isDockerBuildkit() { + local cgroup=/proc/1/cgroup + test -f $cgroup && grep -q ':cpuset:/docker/buildkit' $cgroup +} + +isDockerContainer() { + [ -e /.dockerenv ] +} + +inDocker() { + if isContainerOrPod || isDockerBuildkit || isDockerContainer; then + return 0 + else + return 1 + fi +} + +# Start OS-specific detection and work +if [[ "$OSTYPE" == "lin"* ]]; then + # Check if root or sudo + root=false + if [ "$EUID" = 0 ]; then + root=true + elif command -v id >/dev/null && [ "$(id -u)" = 0 ]; then + root=true + elif [ "$UID" = 0 ]; then + root=true + fi + + check_storage_space + update_kohya_ss + + distro=get_distro_name + family=get_distro_family + echo "Raw detected distro string: $distro" >&4 + echo "Raw detected distro family string: $family" >&4 + + if "$distro" | grep -qi "Ubuntu" || "$family" | grep -qi "Ubuntu"; then + echo "Ubuntu detected." + if [ $(dpkg-query -W -f='${Status}' python3-tk 2>/dev/null | grep -c "ok installed") = 0 ]; then + # if [ "$root" = true ]; then + echo "This script needs YOU to install the missing python3-tk packages. Please install with:" + echo " " + if [ "$RUNPOD" = true ]; then + bash apt update -y && apt install -y python3-tk + else + echo "sudo apt update -y && sudo apt install -y python3-tk" + fi + exit 1 + # else + # echo "This script needs to be run as root or via sudo to install packages." + # exit 1 + # fi + else + echo "Python TK found..." + fi + elif "$distro" | grep -Eqi "Fedora|CentOS|Redhat"; then + echo "Redhat or Redhat base detected." + if ! rpm -qa | grep -qi python3-tkinter; then + # if [ "$root" = true ]; then + echo "This script needs you to install the missing python3-tk packages. Please install with:\n\n" + echo "sudo dnf install python3-tkinter -y >&3" + exit 1 + # else + # echo "This script needs to be run as root or via sudo to install packages." + # exit 1 + # fi + else + echo "Python TK found..." + fi + elif "$distro" | grep -Eqi "arch" || "$family" | grep -qi "arch"; then + echo "Arch Linux or Arch base detected." + if ! pacman -Qi tk >/dev/null; then + # if [ "$root" = true ]; then + echo "This script needs you to install the missing python3-tk packages. Please install with:\n\n" + echo "pacman --noconfirm -S tk >&3" + exit 1 + # else + # echo "This script needs to be run as root or via sudo to install packages." + # exit 1 + # fi + else + echo "Python TK found..." + fi + elif "$distro" | grep -Eqi "opensuse" || "$family" | grep -qi "opensuse"; then + echo "OpenSUSE detected." + if ! rpm -qa | grep -qi python-tk; then + # if [ "$root" = true ]; then + echo "This script needs you to install the missing python3-tk packages. Please install with:\n\n" + echo "zypper install -y python-tk >&3" + exit 1 + # else + # echo "This script needs to be run as root or via sudo to install packages." + # exit 1 + # fi + else + echo "Python TK found..." + fi + elif [ "$distro" = "None" ] || [ "$family" = "None" ]; then + if [ "$distro" = "None" ]; then + echo "We could not detect your distribution of Linux. Please file a bug report on github with the contents of your /etc/os-release file." + fi + + if [ "$family" = "None" ]; then + echo "We could not detect the family of your Linux distribution. Please file a bug report on github with the contents of your /etc/os-release file." + fi + fi + + install_python_dependencies + + # We need just a little bit more setup for non-interactive environments + if [ "$RUNPOD" = true ]; then + if inDocker; then + # We get the site-packages from python itself, then cut the string, so no other code changes required. + VENV_DIR=$(python -c "import site; print(site.getsitepackages()[0])") + VENV_DIR="${VENV_DIR%/lib/python3.10/site-packages}" + fi + + # Symlink paths + libnvinfer_plugin_symlink="$VENV_DIR/lib/python3.10/site-packages/tensorrt/libnvinfer_plugin.so.7" + libnvinfer_symlink="$VENV_DIR/lib/python3.10/site-packages/tensorrt/libnvinfer.so.7" + libcudart_symlink="$VENV_DIR/lib/python3.10/site-packages/nvidia/cuda_runtime/lib/libcudart.so.11.0" + + #Target file paths + libnvinfer_plugin_target="$VENV_DIR/lib/python3.10/site-packages/tensorrt/libnvinfer_plugin.so.8" + libnvinfer_target="$VENV_DIR/lib/python3.10/site-packages/tensorrt/libnvinfer.so.8" + libcudart_target="$VENV_DIR/lib/python3.10/site-packages/nvidia/cuda_runtime/lib/libcudart.so.12" + + # echo "Checking symlinks now." + # create_symlinks "$libnvinfer_plugin_symlink" "$libnvinfer_plugin_target" + # create_symlinks "$libnvinfer_symlink" "$libnvinfer_target" + # create_symlinks "$libcudart_symlink" "$libcudart_target" + + # if [ -d "${VENV_DIR}/lib/python3.10/site-packages/tensorrt/" ]; then + # export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${VENV_DIR}/lib/python3.10/site-packages/tensorrt/" + # else + # echo "${VENV_DIR}/lib/python3.10/site-packages/tensorrt/ not found; not linking library." + # fi + + # if [ -d "${VENV_DIR}/lib/python3.10/site-packages/tensorrt/" ]; then + # export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${VENV_DIR}/lib/python3.10/site-packages/nvidia/cuda_runtime/lib/" + # else + # echo "${VENV_DIR}/lib/python3.10/site-packages/nvidia/cuda_runtime/lib/ not found; not linking library." + # fi + + configure_accelerate + + # This is a non-interactive environment, so just directly call gui.sh after all setup steps are complete. + if [ "$SKIP_GUI" = false ]; then + if command -v bash >/dev/null; then + if [ "$PUBLIC" = false ]; then + bash "$DIR"/gui.sh --headless + exit 0 + else + bash "$DIR"/gui.sh --headless --share + exit 0 + fi + else + # This shouldn't happen, but we're going to try to help. + if [ "$PUBLIC" = false ]; then + sh "$DIR"/gui.sh --headless + exit 0 + else + sh "$DIR"/gui.sh --headless --share + exit 0 + fi + fi + fi + fi + + echo -e "Setup finished! Run \e[0;92m./gui.sh\e[0m to start." + echo "Please note if you'd like to expose your public server you need to run ./gui.sh --share" +elif [[ "$OSTYPE" == "darwin"* ]]; then + # The initial setup script to prep the environment on macOS + # xformers has been omitted as that is for Nvidia GPUs only + + if ! command -v brew >/dev/null; then + echo "Please install homebrew first. This is a requirement for the remaining setup." + echo "You can find that here: https://brew.sh" + #shellcheck disable=SC2016 + echo 'The "brew" command should be in $PATH to be detected.' + exit 1 + fi + + check_storage_space + + # Install base python packages + echo "Installing Python 3.10 if not found." + if ! brew ls --versions python@3.10 >/dev/null; then + echo "Installing Python 3.10." + brew install python@3.10 >&3 + else + echo "Python 3.10 found!" + fi + echo "Installing Python-TK 3.10 if not found." + if ! brew ls --versions python-tk@3.10 >/dev/null; then + echo "Installing Python TK 3.10." + brew install python-tk@3.10 >&3 + else + echo "Python Tkinter 3.10 found!" + fi + + update_kohya_ss + + if ! install_python_dependencies; then + echo "You may need to install Python. The command for this is brew install python@3.10." + fi + + configure_accelerate + echo -e "Setup finished! Run ./gui.sh to start." +elif [[ "$OSTYPE" == "cygwin" ]]; then + # Cygwin is a standalone suite of Linux utilities on Windows + echo "This hasn't been validated on cygwin yet." +elif [[ "$OSTYPE" == "msys" ]]; then + # MinGW has the msys environment which is a standalone suite of Linux utilities on Windows + # "git bash" on Windows may also be detected as msys. + echo "This hasn't been validated in msys 'mingw' on Windows yet." +fi diff --git a/setup/check_local_modules.py b/setup/check_local_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..2922be023f24ddbeddb267e4cc12129909187d45 --- /dev/null +++ b/setup/check_local_modules.py @@ -0,0 +1,33 @@ +import argparse +import subprocess + +# Define color variables +yellow_text = "\033[1;33m" +blue_text = "\033[1;34m" +reset_text = "\033[0m" + +# Parse command line arguments +parser = argparse.ArgumentParser() +parser.add_argument('--no_question', action='store_true') +args = parser.parse_args() + +# Run pip freeze and capture the output +output = subprocess.getoutput("pip freeze") + +# Remove lines containing "WARNING" +output_lines = [line for line in output.splitlines() if "WARNING" not in line] + +# Reconstruct the output string without warning lines +output = "\n".join(output_lines) + +# Check if modules are found in the output +if output: + print(f"{yellow_text}=============================================================") + print("Modules installed outside the virtual environment were found.") + print("This can cause issues. Please review the installed modules.\n") + print("You can uninstall all local modules with:\n") + print(f"{blue_text}deactivate") + print("pip freeze > uninstall.txt") + print("pip uninstall -y -r uninstall.txt") + print(f"{yellow_text}============================================================={reset_text}") + print('') diff --git a/setup/create_user_files.py b/setup/create_user_files.py new file mode 100644 index 0000000000000000000000000000000000000000..e80779d923aa236d443ca6ac5ff01c6df6176434 --- /dev/null +++ b/setup/create_user_files.py @@ -0,0 +1,37 @@ +import os + +bat_content = r'''@echo off +REM Example of how to start the GUI with custom arguments. In this case how to auto launch the browser: +REM call gui.bat --inbrowser +REM +REM You can add many arguments on the same line +REM +call gui.bat --inbrowser +''' + +ps1_content = r'''# Example of how to start the GUI with custom arguments. In this case how to auto launch the browser: +# .\gui.ps1 --inbrowser +# +# You can add many arguments on the same line +# +# & .\gui.ps1 --inbrowser --server_port 2345 + +& .\gui.ps1 --inbrowser +''' + +bat_filename = 'gui-user.bat' +ps1_filename = 'gui-user.ps1' + +if not os.path.exists(bat_filename): + with open(bat_filename, 'w') as bat_file: + bat_file.write(bat_content) + print(f"File created: {bat_filename}") +else: + print(f"File already exists: {bat_filename}") + +if not os.path.exists(ps1_filename): + with open(ps1_filename, 'w') as ps1_file: + ps1_file.write(ps1_content) + print(f"File created: {ps1_filename}") +else: + print(f"File already exists: {ps1_filename}") diff --git a/setup/debug_info.py b/setup/debug_info.py new file mode 100644 index 0000000000000000000000000000000000000000..4fab2bdbf24699cf2be17f30086a1f48c7a50b16 --- /dev/null +++ b/setup/debug_info.py @@ -0,0 +1,56 @@ +import platform +import subprocess +import os + +# Get system information +system = platform.system() +release = platform.release() +version = platform.version() +machine = platform.machine() +processor = platform.processor() + +# Print system information +print("System Information:") +print(f"System: {system}, Release: {release}, Version: {version}, Machine: {machine}, Processor: {processor}") + +# Get Python information +python_version = platform.python_version() +python_implementation = platform.python_implementation() +python_compiler = platform.python_compiler() + +# Print Python information +print("\nPython Information:") +print(f"Version: {python_version}, Implementation: {python_implementation}, Compiler: {python_compiler}") + +# Get virtual environment information +venv = os.environ.get('VIRTUAL_ENV', None) + +# Print virtual environment information +if venv: + print("\nVirtual Environment Information:") + print(f"Path: {venv}") +else: + print("\nVirtual Environment Information:") + print("Not running inside a virtual environment.") + +# Get GPU information (requires nvidia-smi to be installed) +try: + output = subprocess.check_output(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv']) + output = output.decode('utf-8').strip().split('\n')[1:] + gpu_info = [line.split(', ') for line in output] + gpu_name, gpu_vram = gpu_info[0] + gpu_vram = gpu_vram.replace(' MiB', '') + gpu_vram_warning = int(gpu_vram) < 8000 +except (subprocess.CalledProcessError, FileNotFoundError): + gpu_name, gpu_vram = "N/A", "N/A" + gpu_vram_warning = False + +# Print GPU information +print("\nGPU Information:") +print(f"Name: {gpu_name}, VRAM: {gpu_vram} MiB") + +# Print VRAM warning if necessary +if gpu_vram_warning: + print('\033[33mWarning: GPU VRAM is less than 8GB and will likely result in proper operations.\033[0m') + +print(' ') diff --git a/setup/docker_setup.py b/setup/docker_setup.py new file mode 100644 index 0000000000000000000000000000000000000000..ee3b7960dd6c729205ddc4c75c725429fe828ed6 --- /dev/null +++ b/setup/docker_setup.py @@ -0,0 +1,3 @@ +from setuptools import setup, find_packages + +setup() \ No newline at end of file diff --git a/setup/setup_common.py b/setup/setup_common.py new file mode 100644 index 0000000000000000000000000000000000000000..ac6ccb1febe5e4f83f1e3b0a8b2093ce8bbb5ede --- /dev/null +++ b/setup/setup_common.py @@ -0,0 +1,713 @@ +import subprocess +import os +import re +import sys +import logging +import shutil +import datetime +import pkg_resources + +errors = 0 # Define the 'errors' variable before using it +log = logging.getLogger('sd') + +def check_python_version(): + """ + Check if the current Python version is within the acceptable range. + + Returns: + bool: True if the current Python version is valid, False otherwise. + """ + min_version = (3, 10, 9) + max_version = (3, 11, 0) + + from packaging import version + + try: + current_version = sys.version_info + log.info(f"Python version is {sys.version}") + + if not (min_version <= current_version < max_version): + log.error(f"The current version of python ({current_version}) is not appropriate to run Kohya_ss GUI") + log.error("The python version needs to be greater or equal to 3.10.9 and less than 3.11.0") + return False + return True + except Exception as e: + log.error(f"Failed to verify Python version. Error: {e}") + return False + +def update_submodule(quiet=True): + """ + Ensure the submodule is initialized and updated. + + This function uses the Git command line interface to initialize and update + the specified submodule recursively. Errors during the Git operation + or if Git is not found are caught and logged. + + Parameters: + - quiet: If True, suppresses the output of the Git command. + """ + git_command = ["git", "submodule", "update", "--init", "--recursive"] + + if quiet: + git_command.append("--quiet") + + try: + # Initialize and update the submodule + subprocess.run(git_command, check=True) + log.info("Submodule initialized and updated.") + + except subprocess.CalledProcessError as e: + # Log the error if the Git operation fails + log.error(f"Error during Git operation: {e}") + except FileNotFoundError as e: + # Log the error if the file is not found + log.error(e) + +# def read_tag_version_from_file(file_path): +# """ +# Read the tag version from a given file. + +# Parameters: +# - file_path: The path to the file containing the tag version. + +# Returns: +# The tag version as a string. +# """ +# with open(file_path, 'r') as file: +# # Read the first line and strip whitespace +# tag_version = file.readline().strip() +# return tag_version + +def clone_or_checkout(repo_url, branch_or_tag, directory_name): + """ + Clone a repo or checkout a specific branch or tag if the repo already exists. + For branches, it updates to the latest version before checking out. + Suppresses detached HEAD advice for tags or specific commits. + Restores the original working directory after operations. + + Parameters: + - repo_url: The URL of the Git repository. + - branch_or_tag: The name of the branch or tag to clone or checkout. + - directory_name: The name of the directory to clone into or where the repo already exists. + """ + original_dir = os.getcwd() # Store the original directory + try: + if not os.path.exists(directory_name): + # Directory does not exist, clone the repo quietly + + # Construct the command as a string for logging + # run_cmd = f"git clone --branch {branch_or_tag} --single-branch --quiet {repo_url} {directory_name}" + run_cmd = ["git", "clone", "--branch", branch_or_tag, "--single-branch", "--quiet", repo_url, directory_name] + + + # Log the command + log.debug(run_cmd) + + # Run the command + process = subprocess.Popen( + run_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + output, error = process.communicate() + + if error and not error.startswith("Note: switching to"): + log.warning(error) + else: + log.info(f"Successfully cloned sd-scripts {branch_or_tag}") + + else: + os.chdir(directory_name) + subprocess.run(["git", "fetch", "--all", "--quiet"], check=True) + subprocess.run(["git", "config", "advice.detachedHead", "false"], check=True) + + # Get the current branch or commit hash + current_branch_hash = subprocess.check_output(["git", "rev-parse", "HEAD"]).strip().decode() + tag_branch_hash = subprocess.check_output(["git", "rev-parse", branch_or_tag]).strip().decode() + + if current_branch_hash != tag_branch_hash: + run_cmd = f"git checkout {branch_or_tag} --quiet" + # Log the command + log.debug(run_cmd) + + # Execute the checkout command + process = subprocess.Popen(run_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + output, error = process.communicate() + + if error: + log.warning(error.decode()) + else: + log.info(f"Checked out sd-scripts {branch_or_tag} successfully.") + else: + log.info(f"Current branch of sd-scripts is already at the required release {branch_or_tag}.") + except subprocess.CalledProcessError as e: + log.error(f"Error during Git operation: {e}") + finally: + os.chdir(original_dir) # Restore the original directory + +# setup console and file logging +def setup_logging(clean=False): + # + # This function was adapted from code written by vladimandic: https://github.com/vladmandic/automatic/commits/master + # + + from rich.theme import Theme + from rich.logging import RichHandler + from rich.console import Console + from rich.pretty import install as pretty_install + from rich.traceback import install as traceback_install + + console = Console( + log_time=True, + log_time_format='%H:%M:%S-%f', + theme=Theme( + { + 'traceback.border': 'black', + 'traceback.border.syntax_error': 'black', + 'inspect.value.border': 'black', + } + ), + ) + # logging.getLogger("urllib3").setLevel(logging.ERROR) + # logging.getLogger("httpx").setLevel(logging.ERROR) + + current_datetime = datetime.datetime.now() + current_datetime_str = current_datetime.strftime('%Y%m%d-%H%M%S') + log_file = os.path.join( + os.path.dirname(__file__), + f'../logs/setup/kohya_ss_gui_{current_datetime_str}.log', + ) + + # Create directories if they don't exist + log_directory = os.path.dirname(log_file) + os.makedirs(log_directory, exist_ok=True) + + level = logging.INFO + logging.basicConfig( + level=logging.ERROR, + format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', + filename=log_file, + filemode='a', + encoding='utf-8', + force=True, + ) + log.setLevel( + logging.DEBUG + ) # log to file is always at level debug for facility `sd` + pretty_install(console=console) + traceback_install( + console=console, + extra_lines=1, + width=console.width, + word_wrap=False, + indent_guides=False, + suppress=[], + ) + rh = RichHandler( + show_time=True, + omit_repeated_times=False, + show_level=True, + show_path=False, + markup=False, + rich_tracebacks=True, + log_time_format='%H:%M:%S-%f', + level=level, + console=console, + ) + rh.set_name(level) + while log.hasHandlers() and len(log.handlers) > 0: + log.removeHandler(log.handlers[0]) + log.addHandler(rh) + + +def install_requirements_inbulk(requirements_file, show_stdout=True, optional_parm="", upgrade = False): + if not os.path.exists(requirements_file): + log.error(f'Could not find the requirements file in {requirements_file}.') + return + + log.info(f'Installing requirements from {requirements_file}...') + + if upgrade: + optional_parm += " -U" + + if show_stdout: + run_cmd(f'pip install -r {requirements_file} {optional_parm}') + else: + run_cmd(f'pip install -r {requirements_file} {optional_parm} --quiet') + log.info(f'Requirements from {requirements_file} installed.') + + + +def configure_accelerate(run_accelerate=False): + # + # This function was taken and adapted from code written by jstayco + # + + from pathlib import Path + + def env_var_exists(var_name): + return var_name in os.environ and os.environ[var_name] != '' + + log.info('Configuring accelerate...') + + source_accelerate_config_file = os.path.join( + os.path.dirname(os.path.abspath(__file__)), + '..', + 'config_files', + 'accelerate', + 'default_config.yaml', + ) + + if not os.path.exists(source_accelerate_config_file): + if run_accelerate: + run_cmd('accelerate config') + else: + log.warning( + f'Could not find the accelerate configuration file in {source_accelerate_config_file}. Please configure accelerate manually by runningthe option in the menu.' + ) + + log.debug( + f'Source accelerate config location: {source_accelerate_config_file}' + ) + + target_config_location = None + + log.debug( + f"Environment variables: HF_HOME: {os.environ.get('HF_HOME')}, " + f"LOCALAPPDATA: {os.environ.get('LOCALAPPDATA')}, " + f"USERPROFILE: {os.environ.get('USERPROFILE')}" + ) + if env_var_exists('HF_HOME'): + target_config_location = Path( + os.environ['HF_HOME'], 'accelerate', 'default_config.yaml' + ) + elif env_var_exists('LOCALAPPDATA'): + target_config_location = Path( + os.environ['LOCALAPPDATA'], + 'huggingface', + 'accelerate', + 'default_config.yaml', + ) + elif env_var_exists('USERPROFILE'): + target_config_location = Path( + os.environ['USERPROFILE'], + '.cache', + 'huggingface', + 'accelerate', + 'default_config.yaml', + ) + + log.debug(f'Target config location: {target_config_location}') + + if target_config_location: + if not target_config_location.is_file(): + target_config_location.parent.mkdir(parents=True, exist_ok=True) + log.debug( + f'Target accelerate config location: {target_config_location}' + ) + shutil.copyfile( + source_accelerate_config_file, target_config_location + ) + log.info( + f'Copied accelerate config file to: {target_config_location}' + ) + else: + if run_accelerate: + run_cmd('accelerate config') + else: + log.warning( + 'Could not automatically configure accelerate. Please manually configure accelerate with the option in the menu or with: accelerate config.' + ) + else: + if run_accelerate: + run_cmd('accelerate config') + else: + log.warning( + 'Could not automatically configure accelerate. Please manually configure accelerate with the option in the menu or with: accelerate config.' + ) + + +def check_torch(): + # + # This function was adapted from code written by vladimandic: https://github.com/vladmandic/automatic/commits/master + # + + # Check for toolkit + if shutil.which('nvidia-smi') is not None or os.path.exists( + os.path.join( + os.environ.get('SystemRoot') or r'C:\Windows', + 'System32', + 'nvidia-smi.exe', + ) + ): + log.info('nVidia toolkit detected') + elif shutil.which('rocminfo') is not None or os.path.exists( + '/opt/rocm/bin/rocminfo' + ): + log.info('AMD toolkit detected') + elif (shutil.which('sycl-ls') is not None + or os.environ.get('ONEAPI_ROOT') is not None + or os.path.exists('/opt/intel/oneapi')): + log.info('Intel OneAPI toolkit detected') + else: + log.info('Using CPU-only Torch') + + try: + import torch + try: + # Import IPEX / XPU support + import intel_extension_for_pytorch as ipex + except Exception: + pass + log.info(f'Torch {torch.__version__}') + + if torch.cuda.is_available(): + if torch.version.cuda: + # Log nVidia CUDA and cuDNN versions + log.info( + f'Torch backend: nVidia CUDA {torch.version.cuda} cuDNN {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else "N/A"}' + ) + elif torch.version.hip: + # Log AMD ROCm HIP version + log.info(f'Torch backend: AMD ROCm HIP {torch.version.hip}') + else: + log.warning('Unknown Torch backend') + + # Log information about detected GPUs + for device in [ + torch.cuda.device(i) for i in range(torch.cuda.device_count()) + ]: + log.info( + f'Torch detected GPU: {torch.cuda.get_device_name(device)} VRAM {round(torch.cuda.get_device_properties(device).total_memory / 1024 / 1024)} Arch {torch.cuda.get_device_capability(device)} Cores {torch.cuda.get_device_properties(device).multi_processor_count}' + ) + # Check if XPU is available + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + # Log Intel IPEX version + log.info(f'Torch backend: Intel IPEX {ipex.__version__}') + for device in [ + torch.xpu.device(i) for i in range(torch.xpu.device_count()) + ]: + log.info( + f'Torch detected GPU: {torch.xpu.get_device_name(device)} VRAM {round(torch.xpu.get_device_properties(device).total_memory / 1024 / 1024)} Compute Units {torch.xpu.get_device_properties(device).max_compute_units}' + ) + else: + log.warning('Torch reports GPU not available') + + return int(torch.__version__[0]) + except Exception as e: + # log.warning(f'Could not load torch: {e}') + return 0 + + +# report current version of code +def check_repo_version(): + """ + This function checks the version of the repository by reading the contents of a file named '.release' + in the current directory. If the file exists, it reads the release version from the file and logs it. + If the file does not exist, it logs a debug message indicating that the release could not be read. + """ + if os.path.exists('.release'): + try: + with open(os.path.join('./.release'), 'r', encoding='utf8') as file: + release= file.read() + + log.info(f'Kohya_ss GUI version: {release}') + except Exception as e: + log.error(f'Could not read release: {e}') + else: + log.debug('Could not read release...') + +# execute git command +def git(arg: str, folder: str = None, ignore: bool = False): + """ + Executes a Git command with the specified arguments. + + This function is designed to run Git commands and handle their output. + It can be used to execute Git commands in a specific folder or the current directory. + If an error occurs during the Git operation and the 'ignore' flag is not set, + it logs the error message and the Git output for debugging purposes. + + Parameters: + - arg: A string containing the Git command arguments. + - folder: An optional string specifying the folder where the Git command should be executed. + If not provided, the current directory is used. + - ignore: A boolean flag indicating whether to ignore errors during the Git operation. + If set to True, errors will not be logged. + + Note: + This function was adapted from code written by vladimandic: https://github.com/vladmandic/automatic/commits/master + """ + + git_cmd = os.environ.get('GIT', "git") + result = subprocess.run(f'"{git_cmd}" {arg}', check=False, shell=True, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=folder or '.') + txt = result.stdout.decode(encoding="utf8", errors="ignore") + if len(result.stderr) > 0: + txt += ('\n' if len(txt) > 0 else '') + result.stderr.decode(encoding="utf8", errors="ignore") + txt = txt.strip() + if result.returncode != 0 and not ignore: + global errors + errors += 1 + log.error(f'Error running git: {folder} / {arg}') + if 'or stash them' in txt: + log.error(f'Local changes detected: check log for details...') + log.debug(f'Git output: {txt}') + + +def pip(arg: str, ignore: bool = False, quiet: bool = False, show_stdout: bool = False): + """ + Executes a pip command with the specified arguments. + + This function is designed to run pip commands and handle their output. + It can be used to install, upgrade, or uninstall packages using pip. + If an error occurs during the pip operation and the 'ignore' flag is not set, + it logs the error message and the pip output for debugging purposes. + + Parameters: + - arg: A string containing the pip command arguments. + - ignore: A boolean flag indicating whether to ignore errors during the pip operation. + If set to True, errors will not be logged. + - quiet: A boolean flag indicating whether to suppress the output of the pip command. + If set to True, the function will not log any output. + - show_stdout: A boolean flag indicating whether to display the pip command's output + to the console. If set to True, the function will print the output + to the console. + + Returns: + - The output of the pip command as a string, or None if the 'show_stdout' flag is set. + """ + # arg = arg.replace('>=', '==') + if not quiet: + log.info(f'Installing package: {arg.replace("install", "").replace("--upgrade", "").replace("--no-deps", "").replace("--force", "").replace(" ", " ").strip()}') + log.debug(f"Running pip: {arg}") + if show_stdout: + subprocess.run(f'"{sys.executable}" -m pip {arg}', shell=True, check=False, env=os.environ) + else: + result = subprocess.run(f'"{sys.executable}" -m pip {arg}', shell=True, check=False, env=os.environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + txt = result.stdout.decode(encoding="utf8", errors="ignore") + if len(result.stderr) > 0: + txt += ('\n' if len(txt) > 0 else '') + result.stderr.decode(encoding="utf8", errors="ignore") + txt = txt.strip() + if result.returncode != 0 and not ignore: + global errors # pylint: disable=global-statement + errors += 1 + log.error(f'Error running pip: {arg}') + log.debug(f'Pip output: {txt}') + return txt + +def installed(package, friendly: str = None): + """ + Checks if the specified package(s) are installed with the correct version. + This function can handle package specifications with or without version constraints, + and can also filter out command-line options and URLs when a 'friendly' string is provided. + + Parameters: + - package: A string that specifies one or more packages with optional version constraints. + - friendly: An optional string used to provide a cleaner version of the package string + that excludes command-line options and URLs. + + Returns: + - True if all specified packages are installed with the correct versions, False otherwise. + + Note: + This function was adapted from code written by vladimandic. + """ + + # Remove any optional features specified in brackets (e.g., "package[option]==version" becomes "package==version") + package = re.sub(r'\[.*?\]', '', package) + + try: + if friendly: + # If a 'friendly' version of the package string is provided, split it into components + pkgs = friendly.split() + + # Filter out command-line options and URLs from the package specification + pkgs = [ + p + for p in package.split() + if not p.startswith('--') and "://" not in p + ] + else: + # Split the package string into components, excluding '-' and '=' prefixed items + pkgs = [ + p + for p in package.split() + if not p.startswith('-') and not p.startswith('=') + ] + # For each package component, extract the package name, excluding any URLs + pkgs = [ + p.split('/')[-1] for p in pkgs + ] + + for pkg in pkgs: + # Parse the package name and version based on the version specifier used + if '>=' in pkg: + pkg_name, pkg_version = [x.strip() for x in pkg.split('>=')] + elif '==' in pkg: + pkg_name, pkg_version = [x.strip() for x in pkg.split('==')] + else: + pkg_name, pkg_version = pkg.strip(), None + + # Attempt to find the installed package by its name + spec = pkg_resources.working_set.by_key.get(pkg_name, None) + if spec is None: + # Try again with lowercase name + spec = pkg_resources.working_set.by_key.get(pkg_name.lower(), None) + if spec is None: + # Try replacing underscores with dashes + spec = pkg_resources.working_set.by_key.get(pkg_name.replace('_', '-'), None) + + if spec is not None: + # Package is found, check version + version = pkg_resources.get_distribution(pkg_name).version + log.debug(f'Package version found: {pkg_name} {version}') + + if pkg_version is not None: + # Verify if the installed version meets the specified constraints + if '>=' in pkg: + ok = version >= pkg_version + else: + ok = version == pkg_version + + if not ok: + # Version mismatch, log warning and return False + log.warning(f'Package wrong version: {pkg_name} {version} required {pkg_version}') + return False + else: + # Package not found, log debug message and return False + log.debug(f'Package version not found: {pkg_name}') + return False + + # All specified packages are installed with the correct versions + return True + except ModuleNotFoundError: + # One or more packages are not installed, log debug message and return False + log.debug(f'Package not installed: {pkgs}') + return False + + + +# install package using pip if not already installed +def install( + package, + friendly: str = None, + ignore: bool = False, + reinstall: bool = False, + show_stdout: bool = False, +): + """ + Installs or upgrades a Python package using pip, with options to ignode errors, + reinstall packages, and display outputs. + + Parameters: + - package (str): The name of the package to be installed or upgraded. Can include + version specifiers. Anything after a '#' in the package name will be ignored. + - friendly (str, optional): A more user-friendly name for the package, used for + logging or user interface purposes. Defaults to None. + - ignore (bool, optional): If True, any errors encountered during the installation + will be ignored. Defaults to False. + - reinstall (bool, optional): If True, forces the reinstallation of the package + even if it's already installed. This also disables any quick install checks. Defaults to False. + - show_stdout (bool, optional): If True, displays the standard output from the pip + command to the console. Useful for debugging. Defaults to False. + + Returns: + None. The function performs operations that affect the environment but does not return + any value. + + Note: + If `reinstall` is True, it disables any mechanism that allows for skipping installations + when the package is already present, forcing a fresh install. + """ + # Remove anything after '#' in the package variable + package = package.split('#')[0].strip() + + if reinstall: + global quick_allowed # pylint: disable=global-statement + quick_allowed = False + if reinstall or not installed(package, friendly): + pip(f'install --upgrade {package}', ignore=ignore, show_stdout=show_stdout) + + +def process_requirements_line(line, show_stdout: bool = False): + # Remove brackets and their contents from the line using regular expressions + # e.g., diffusers[torch]==0.10.2 becomes diffusers==0.10.2 + package_name = re.sub(r'\[.*?\]', '', line) + install(line, package_name, show_stdout=show_stdout) + + +def install_requirements(requirements_file, check_no_verify_flag=False, show_stdout: bool = False): + if check_no_verify_flag: + log.info(f'Verifying modules installation status from {requirements_file}...') + else: + log.info(f'Installing modules from {requirements_file}...') + with open(requirements_file, 'r', encoding='utf8') as f: + # Read lines from the requirements file, strip whitespace, and filter out empty lines, comments, and lines starting with '.' + if check_no_verify_flag: + lines = [ + line.strip() + for line in f.readlines() + if line.strip() != '' + and not line.startswith('#') + and line is not None + and 'no_verify' not in line + ] + else: + lines = [ + line.strip() + for line in f.readlines() + if line.strip() != '' + and not line.startswith('#') + and line is not None + ] + + # Iterate over each line and install the requirements + for line in lines: + # Check if the line starts with '-r' to include another requirements file + if line.startswith('-r'): + # Get the path to the included requirements file + included_file = line[2:].strip() + # Expand the included requirements file recursively + install_requirements(included_file, check_no_verify_flag=check_no_verify_flag, show_stdout=show_stdout) + else: + process_requirements_line(line, show_stdout=show_stdout) + + +def ensure_base_requirements(): + try: + import rich # pylint: disable=unused-import + except ImportError: + install('--upgrade rich', 'rich') + + try: + import packaging + except ImportError: + install('packaging') + + +def run_cmd(run_cmd): + try: + subprocess.run(run_cmd, shell=True, check=False, env=os.environ) + except subprocess.CalledProcessError as e: + log.error(f'Error occurred while running command: {run_cmd}') + log.error(f'Error: {e}') + + +def delete_file(file_path): + if os.path.exists(file_path): + os.remove(file_path) + + +def write_to_file(file_path, content): + try: + with open(file_path, 'w') as file: + file.write(content) + except IOError as e: + print(f'Error occurred while writing to file: {file_path}') + print(f'Error: {e}') + + +def clear_screen(): + # Check the current operating system to execute the correct clear screen command + if os.name == 'nt': # If the operating system is Windows + os.system('cls') + else: # If the operating system is Linux or Mac + os.system('clear') + diff --git a/setup/setup_linux.py b/setup/setup_linux.py new file mode 100644 index 0000000000000000000000000000000000000000..88370815eb688f309102ce5c5c73c472e8ff3cf3 --- /dev/null +++ b/setup/setup_linux.py @@ -0,0 +1,45 @@ +import argparse +import logging +import setup_common + +errors = 0 # Define the 'errors' variable before using it +log = logging.getLogger('sd') + +# ANSI escape code for yellow color +YELLOW = '\033[93m' +RESET_COLOR = '\033[0m' + + +def main_menu(platform_requirements_file, show_stdout: bool = False, no_run_accelerate: bool = False): + log.info("Installing python dependencies. This could take a few minutes as it downloads files.") + log.info("If this operation ever runs too long, you can rerun this script in verbose mode to check.") + + setup_common.check_repo_version() + # setup_common.check_python() + + # Upgrade pip if needed + setup_common.install('pip') + setup_common.install_requirements(platform_requirements_file, check_no_verify_flag=False, show_stdout=show_stdout) + if not no_run_accelerate: + setup_common.configure_accelerate(run_accelerate=False) + + +if __name__ == '__main__': + setup_common.ensure_base_requirements() + setup_common.setup_logging() + if not setup_common.check_python_version(): + exit(1) + + setup_common.update_submodule() + + # setup_common.clone_or_checkout( + # "https://github.com/kohya-ss/sd-scripts.git", tag_version, "sd-scripts" + # ) + + parser = argparse.ArgumentParser() + parser.add_argument('--platform-requirements-file', dest='platform_requirements_file', default='requirements_linux.txt', help='Path to the platform-specific requirements file') + parser.add_argument('--show_stdout', dest='show_stdout', action='store_true', help='Whether to show stdout during installation') + parser.add_argument('--no_run_accelerate', dest='no_run_accelerate', action='store_true', help='Whether to not run accelerate config') + args = parser.parse_args() + + main_menu(args.platform_requirements_file, show_stdout=args.show_stdout, no_run_accelerate=args.no_run_accelerate) diff --git a/setup/setup_runpod.py b/setup/setup_runpod.py new file mode 100644 index 0000000000000000000000000000000000000000..4da4f95555dcf2fb63ec3181d1cdb13c8936dbfa --- /dev/null +++ b/setup/setup_runpod.py @@ -0,0 +1,73 @@ +import argparse +import logging +import setup_common +import os +import shutil + +errors = 0 # Define the 'errors' variable before using it +log = logging.getLogger('sd') + +# ANSI escape code for yellow color +YELLOW = '\033[93m' +RESET_COLOR = '\033[0m' + +def configure_accelerate(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + cache_dir = "/root/.cache/huggingface/accelerate" + + log.info("Configuring accelerate...") + os.makedirs(cache_dir, exist_ok=True) + + config_file_src = os.path.join(script_dir, "config_files", "accelerate", "runpod.yaml") + config_file_dest = os.path.join(cache_dir, "default_config.yaml") + shutil.copyfile(config_file_src, config_file_dest) + + +def setup_environment(): + # Get the directory the script is run from + script_dir = os.path.dirname(os.path.abspath(__file__)) + + # Install tk and python3.10-venv + log.info("Install tk and python3.10-venv...") + subprocess.run(['apt', 'update', '-y']) + subprocess.run(['apt', 'install', '-y', 'python3-tk', 'python3.10-venv']) + + # Check if the venv folder doesn't exist + venv_dir = os.path.join(script_dir, 'venv') + if not os.path.exists(venv_dir): + log.info("Creating venv...") + subprocess.run(['python3', '-m', 'venv', venv_dir]) + + # Activate the virtual environment + log.info("Activate venv...") + activate_script = os.path.join(venv_dir, 'bin', 'activate') + activate_command = f'source "{activate_script}" || exit 1' + subprocess.run(activate_command, shell=True, executable='/bin/bash') + + +def main_menu(platform_requirements_file): + log.info("Installing python dependencies. This could take a few minutes as it downloads files.") + log.info("If this operation ever runs too long, you can rerun this script in verbose mode to check.") + + setup_common.check_repo_version() + # setup_common.check_python() + + # Upgrade pip if needed + setup_common.install('pip') + setup_common.install_requirements(platform_requirements_file, check_no_verify_flag=False, show_stdout=True) + configure_accelerate() + + +if __name__ == '__main__': + setup_common.ensure_base_requirements() + setup_common.setup_logging() + if not setup_common.check_python_version(): + exit(1) + + setup_common.update_submodule() + + parser = argparse.ArgumentParser() + parser.add_argument('--platform-requirements-file', dest='platform_requirements_file', default='requirements_runpod.txt', help='Path to the platform-specific requirements file') + args = parser.parse_args() + + main_menu(args.platform_requirements_file) diff --git a/setup/setup_windows.py b/setup/setup_windows.py new file mode 100644 index 0000000000000000000000000000000000000000..16551e30087627fe51c31a556c36a45c5f9d60e4 --- /dev/null +++ b/setup/setup_windows.py @@ -0,0 +1,271 @@ +import subprocess +import os +import filecmp +import logging +import shutil +import sysconfig +import setup_common +import argparse + +errors = 0 # Define the 'errors' variable before using it +log = logging.getLogger("sd") + +# ANSI escape code for yellow color +YELLOW = "\033[93m" +RESET_COLOR = "\033[0m" + + +def cudnn_install(): + log.info("Installing nvidia-cudnn-cu11 8.9.5.29...") + setup_common.install( + "--upgrade nvidia-cudnn-cu11==8.9.5.29", + "nvidia-cudnn-cu11 8.9.5.29", + reinstall=True, + ) + + # Original path with "..\\venv" + original_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "..\\venv\\Lib\\site-packages\\nvidia\\cudnn\\bin", + ) + # Normalize the path to resolve "..\\venv" + cudnn_src = os.path.abspath(original_path) + cudnn_dest = os.path.join(sysconfig.get_paths()["purelib"], "torch", "lib") + + log.info(f"Copying CUDNN files from {cudnn_src} to {cudnn_dest}...") + if os.path.exists(cudnn_src): + if os.path.exists(cudnn_dest): + # check for different files + filecmp.clear_cache() + for file in os.listdir(cudnn_src): + if file.lower().endswith(".dll"): # Check if the file is a .dll file + src_file = os.path.join(cudnn_src, file) + dest_file = os.path.join(cudnn_dest, file) + # if dest file exists, check if it's different + if os.path.exists(dest_file): + if not filecmp.cmp(src_file, dest_file, shallow=False): + shutil.copy2(src_file, cudnn_dest) + else: + shutil.copy2(src_file, cudnn_dest) + log.info("Copied CUDNN .dll files to destination") + else: + log.warning(f"Destination directory {cudnn_dest} does not exist") + else: + log.error(f'Installation Failed: "{cudnn_src}" could not be found.') + + +def sync_bits_and_bytes_files(): + import filecmp + + """ + Check for "different" bitsandbytes Files and copy only if necessary. + This function is specific for Windows OS. + """ + + # Only execute on Windows + if os.name != "nt": + print("This function is only applicable to Windows OS.") + return + + try: + log.info(f"Copying bitsandbytes files...") + # Define source and destination directories + source_dir = os.path.join(os.getcwd(), "bitsandbytes_windows") + + dest_dir_base = os.path.join(sysconfig.get_paths()["purelib"], "bitsandbytes") + + # Clear file comparison cache + filecmp.clear_cache() + + # Iterate over each file in source directory + for file in os.listdir(source_dir): + source_file_path = os.path.join(source_dir, file) + + # Decide the destination directory based on file name + if file in ("main.py", "paths.py"): + dest_dir = os.path.join(dest_dir_base, "cuda_setup") + else: + dest_dir = dest_dir_base + + dest_file_path = os.path.join(dest_dir, file) + + # Compare the source file with the destination file + if os.path.exists(dest_file_path) and filecmp.cmp( + source_file_path, dest_file_path + ): + log.debug( + f"Skipping {source_file_path} as it already exists in {dest_dir}" + ) + else: + # Copy file from source to destination, maintaining original file's metadata + log.debug(f"Copy {source_file_path} to {dest_dir}") + shutil.copy2(source_file_path, dest_dir) + + except FileNotFoundError as fnf_error: + log.error(f"File not found error: {fnf_error}") + except PermissionError as perm_error: + log.error(f"Permission error: {perm_error}") + except Exception as e: + log.error(f"An unexpected error occurred: {e}") + + +def install_kohya_ss_torch2(headless: bool = False): + setup_common.check_repo_version() + if not setup_common.check_python_version(): + exit(1) + + setup_common.update_submodule() + + setup_common.install("pip") + + # setup_common.install_requirements( + # "requirements_windows_torch2.txt", check_no_verify_flag=False + # ) + + setup_common.install_requirements_inbulk( + "requirements_pytorch_windows.txt", show_stdout=True, optional_parm="--index-url https://download.pytorch.org/whl/cu118" + ) + + setup_common.install_requirements_inbulk( + "requirements_windows.txt", show_stdout=True, upgrade=True + ) + + setup_common.run_cmd("accelerate config default") + + +def install_bitsandbytes_0_35_0(): + log.info("Installing bitsandbytes 0.35.0...") + setup_common.install( + "--upgrade bitsandbytes==0.35.0", "bitsandbytes 0.35.0", reinstall=True + ) + sync_bits_and_bytes_files() + + +def install_bitsandbytes_0_40_1(): + log.info("Installing bitsandbytes 0.40.1...") + setup_common.install( + "--upgrade https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.1.post1-py3-none-win_amd64.whl", + "bitsandbytes 0.40.1", + reinstall=True, + ) + + +def install_bitsandbytes_0_41_1(): + log.info("Installing bitsandbytes 0.41.1...") + setup_common.install( + "--upgrade https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl", + "bitsandbytes 0.41.1", + reinstall=True, + ) + + +def install_bitsandbytes_0_41_2(): + log.info("Installing bitsandbytes 0.41.2...") + setup_common.install( + "--upgrade https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.2.post2-py3-none-win_amd64.whl", + "bitsandbytes 0.41.2", + reinstall=True, + ) + + +def install_triton_2_1_0(): + log.info("Installing triton 2.1.0...") + setup_common.install( + "--upgrade https://huggingface.co/Rodeszones/CogVLM-grounding-generalist-hf-quant4/resolve/main/triton-2.1.0-cp310-cp310-win_amd64.whl?download=true", + "triton 2.1.0", + reinstall=True, + ) + + +def main_menu(headless: bool = False): + if headless: + install_kohya_ss_torch2(headless=headless) + else: + setup_common.clear_screen() + while True: + print("\nKohya_ss setup menu:\n") + print("1. Install kohya_ss GUI") + print( + "2. (Optional) Install CuDNN files (to use the latest supported CuDNN version)" + ) + print("3. (DANGER) Install Triton 2.1.0 for Windows... only do it if you know you need it... might break training...") + print("4. (Optional) Install specific version of bitsandbytes") + print("5. (Optional) Manually configure Accelerate") + print("6. (Optional) Launch Kohya_ss GUI in browser") + print("7. Exit Setup") + + choice = input("\nSelect an option: ") + print("") + + if choice == "1": + install_kohya_ss_torch2() + elif choice == "2": + cudnn_install() + elif choice == "3": + install_triton_2_1_0() + elif choice == "4": + while True: + print("\nBitsandBytes Installation Menu:") + print("1. Force install Bitsandbytes 0.35.0") + print( + "2. Force install Bitsandbytes 0.40.1 (supports new optimizer options, pre-bugfix results)" + ) + print( + "3. Force installation Bitsandbytes 0.41.1 (supports new optimizer options)" + ) + print( + "4. (Recommended) Force install Bitsandbytes 0.41.2 (supports new optimizer options)" + ) + print( + "5. (Warning) Install bitsandbytes-windows (may cause issues, use with caution)" + ) + print("6. Return to Previous Menu:") + choice_torch = input("\nSelect an option: ") + print("") + + if choice_torch == "1": + install_bitsandbytes_0_35_0() + break + elif choice_torch == "2": + install_bitsandbytes_0_40_1() + break + elif choice_torch == "3": + install_bitsandbytes_0_41_1() + break + elif choice_torch == "4": + install_bitsandbytes_0_41_2() + break + elif choice_torch == "5": + setup_common.install( + "--upgrade bitsandbytes-windows", reinstall=True + ) + break + elif choice_torch == "6": + break + else: + print("Invalid choice. Please chose an option between 1-6.") + elif choice == "5": + setup_common.run_cmd("accelerate config") + elif choice == "6": + subprocess.Popen( + "start cmd /k .\\gui.bat --inbrowser", shell=True + ) # /k keep the terminal open on quit. /c would close the terminal instead + elif choice == "7": + print("Exiting setup.") + break + else: + print("Invalid selection. Please choose an option between 1-7.") + + +if __name__ == "__main__": + setup_common.ensure_base_requirements() + setup_common.setup_logging() + + # Setup argument parser + parser = argparse.ArgumentParser(description="Your Script Description") + parser.add_argument("--headless", action="store_true", help="Run in headless mode") + + # Parse arguments + args = parser.parse_args() + + main_menu(headless=args.headless) diff --git a/setup/update_bitsandbytes.py b/setup/update_bitsandbytes.py new file mode 100644 index 0000000000000000000000000000000000000000..148f64d75b703b2f5f077aafbe080ddeb9c6090c --- /dev/null +++ b/setup/update_bitsandbytes.py @@ -0,0 +1,49 @@ +import os +import sysconfig +import filecmp +import shutil + +def sync_bits_and_bytes_files(): + """ + Check for "different" bitsandbytes Files and copy only if necessary. + This function is specific for Windows OS. + """ + + # Only execute on Windows + if os.name != "nt": + print("This function is only applicable to Windows OS.") + return + + try: + # Define source and destination directories + source_dir = os.path.join(os.getcwd(), "bitsandbytes_windows") + + dest_dir_base = os.path.join(sysconfig.get_paths()["purelib"], "bitsandbytes") + + # Clear file comparison cache + filecmp.clear_cache() + + # Iterate over each file in source directory + for file in os.listdir(source_dir): + source_file_path = os.path.join(source_dir, file) + + # Decide the destination directory based on file name + if file in ("main.py", "paths.py"): + dest_dir = os.path.join(dest_dir_base, "cuda_setup") + else: + dest_dir = dest_dir_base + + # Copy file from source to destination, maintaining original file's metadata + print(f'Copy {source_file_path} to {dest_dir}') + shutil.copy2(source_file_path, dest_dir) + + except FileNotFoundError as fnf_error: + print(f"File not found error: {fnf_error}") + except PermissionError as perm_error: + print(f"Permission error: {perm_error}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + + +if __name__ == "__main__": + sync_bits_and_bytes_files() \ No newline at end of file diff --git a/setup/validate_requirements.py b/setup/validate_requirements.py new file mode 100644 index 0000000000000000000000000000000000000000..bb904224546ad9c3819d40846012accf2c28a875 --- /dev/null +++ b/setup/validate_requirements.py @@ -0,0 +1,133 @@ +import os +import sys +import shutil +import argparse +import setup_common + +# Get the absolute path of the current file's directory (Kohua_SS project directory) +project_directory = os.path.dirname(os.path.abspath(__file__)) + +# Check if the "setup" directory is present in the project_directory +if "setup" in project_directory: + # If the "setup" directory is present, move one level up to the parent directory + project_directory = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +# Add the project directory to the beginning of the Python search path +sys.path.insert(0, project_directory) + +from kohya_gui.custom_logging import setup_logging + +# Set up logging +log = setup_logging() + +def check_path_with_space(): + # Get the current working directory + cwd = os.getcwd() + + # Check if the current working directory contains a space + if " " in cwd: + log.error("The path in which this python code is executed contain one or many spaces. This is not supported for running kohya_ss GUI.") + log.error("Please move the repo to a path without spaces, delete the venv folder and run setup.sh again.") + log.error("The current working directory is: " + cwd) + exit(1) + +def check_torch(): + # Check for toolkit + if shutil.which('nvidia-smi') is not None or os.path.exists( + os.path.join( + os.environ.get('SystemRoot') or r'C:\Windows', + 'System32', + 'nvidia-smi.exe', + ) + ): + log.info('nVidia toolkit detected') + elif shutil.which('rocminfo') is not None or os.path.exists( + '/opt/rocm/bin/rocminfo' + ): + log.info('AMD toolkit detected') + elif (shutil.which('sycl-ls') is not None + or os.environ.get('ONEAPI_ROOT') is not None + or os.path.exists('/opt/intel/oneapi')): + log.info('Intel OneAPI toolkit detected') + else: + log.info('Using CPU-only Torch') + + try: + import torch + try: + # Import IPEX / XPU support + import intel_extension_for_pytorch as ipex + except Exception: + pass + log.info(f'Torch {torch.__version__}') + + if torch.cuda.is_available(): + if torch.version.cuda: + # Log nVidia CUDA and cuDNN versions + log.info( + f'Torch backend: nVidia CUDA {torch.version.cuda} cuDNN {torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else "N/A"}' + ) + elif torch.version.hip: + # Log AMD ROCm HIP version + log.info(f'Torch backend: AMD ROCm HIP {torch.version.hip}') + else: + log.warning('Unknown Torch backend') + + # Log information about detected GPUs + for device in [ + torch.cuda.device(i) for i in range(torch.cuda.device_count()) + ]: + log.info( + f'Torch detected GPU: {torch.cuda.get_device_name(device)} VRAM {round(torch.cuda.get_device_properties(device).total_memory / 1024 / 1024)} Arch {torch.cuda.get_device_capability(device)} Cores {torch.cuda.get_device_properties(device).multi_processor_count}' + ) + # Check if XPU is available + elif hasattr(torch, "xpu") and torch.xpu.is_available(): + # Log Intel IPEX version + log.info(f'Torch backend: Intel IPEX {ipex.__version__}') + for device in [ + torch.xpu.device(i) for i in range(torch.xpu.device_count()) + ]: + log.info( + f'Torch detected GPU: {torch.xpu.get_device_name(device)} VRAM {round(torch.xpu.get_device_properties(device).total_memory / 1024 / 1024)} Compute Units {torch.xpu.get_device_properties(device).max_compute_units}' + ) + else: + log.warning('Torch reports GPU not available') + + return int(torch.__version__[0]) + except Exception as e: + log.error(f'Could not load torch: {e}') + sys.exit(1) + +def main(): + setup_common.check_repo_version() + + check_path_with_space() + + # Parse command line arguments + parser = argparse.ArgumentParser( + description='Validate that requirements are satisfied.' + ) + parser.add_argument( + '-r', + '--requirements', + type=str, + help='Path to the requirements file.', + ) + parser.add_argument('--debug', action='store_true', help='Debug on') + args = parser.parse_args() + + setup_common.update_submodule() + + torch_ver = check_torch() + + if not setup_common.check_python_version(): + exit(1) + + if args.requirements: + setup_common.install_requirements(args.requirements, check_no_verify_flag=True) + else: + setup_common.install_requirements('requirements_pytorch_windows.txt', check_no_verify_flag=True) + setup_common.install_requirements('requirements_windows.txt', check_no_verify_flag=True) + +if __name__ == '__main__': + main() diff --git a/test/config/Diag-OFT-AdamW8bit-toml.json b/test/config/Diag-OFT-AdamW8bit-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..10061f73eb35aa13344c6e582d49d4a9e09b5958 --- /dev/null +++ b/test/config/Diag-OFT-AdamW8bit-toml.json @@ -0,0 +1,131 @@ +{ + "LoRA_type": "LyCORIS/Diag-OFT", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": true, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 4, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 8, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 16, + "network_dim": 32, + "network_dropout": 0, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "Diag-OFT-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 4, + "train_data_dir": "", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/DyLoRA-Adafactor-toml.json b/test/config/DyLoRA-Adafactor-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..629044414f9ff3e6d218c2846b8d29fa9ecdfea4 --- /dev/null +++ b/test/config/DyLoRA-Adafactor-toml.json @@ -0,0 +1,139 @@ +{ + "LoRA_type": "LyCORIS/DyLoRA", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 64, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 64, + "dataset_config": "./test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 150, + "extra_accelerate_launch_args": "", + "factor": 6, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 1, + "learning_rate": 4e-07, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant_with_warmup", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 0, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "fp16", + "model_list": "custom", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0.1, + "multires_noise_iterations": 6, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "noise_offset": 0, + "noise_offset_random_strength": false, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Adafactor", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "DyLoRA-Adafactor-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 15, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 4e-07, + "train_batch_size": 2, + "train_data_dir": "", + "train_norm": true, + "train_on_input": false, + "training_comment": "KoopaTroopa", + "unet_lr": 4e-07, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/LoKR-AdamW8bit-toml.json b/test/config/LoKR-AdamW8bit-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..b508c590b821381ecceb4201599a2031611c6311 --- /dev/null +++ b/test/config/LoKR-AdamW8bit-toml.json @@ -0,0 +1,143 @@ +{ + "LoRA_type": "LyCORIS/LoKr", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "--lr_scheduler_type \"CosineAnnealingLR\" --lr_scheduler_args \"T_max=1000\" \"eta_min=0e-0\"", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.1, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "constrain": 0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 100000, + "dataset_config": "./test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 150, + "extra_accelerate_launch_args": "", + "factor": 6, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 1, + "learning_rate": 1, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "loss_type": "l2", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0.1, + "multires_noise_iterations": 6, + "network_alpha": 1, + "network_dim": 100000, + "network_dropout": 0, + "noise_offset": 0, + "noise_offset_random_strength": false, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Prodigy", + "optimizer_args": "\"d0=1e-5\" \"d_coef=1.0\" \"weight_decay=0.4\" \"decouple=True\" \"safeguard_warmup=True\" \"use_bias_correction=True\"", + "output_dir": "./test/output", + "output_name": "LoKR-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 15, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": true, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1, + "train_batch_size": 2, + "train_data_dir": "", + "train_norm": false, + "train_on_input": false, + "training_comment": "KoopaTroopa", + "unet_lr": 1, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/SDXL-Standard-Adafactor.json b/test/config/SDXL-Standard-Adafactor.json new file mode 100644 index 0000000000000000000000000000000000000000..978fd20275d13c087ce84b21c2abd09917c03545 --- /dev/null +++ b/test/config/SDXL-Standard-Adafactor.json @@ -0,0 +1,160 @@ +{ + "LoRA_type": "Standard", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "async_upload": false, + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 32, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": 1, + "color_aug": false, + "constrain": 0, + "conv_alpha": 64, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 64, + "dataset_config": "", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "dynamo_backend": "no", + "dynamo_mode": "default", + "dynamo_use_dynamic": false, + "dynamo_use_fullgraph": false, + "enable_bucket": true, + "epoch": 4, + "extra_accelerate_launch_args": "", + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": true, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 0, + "learning_rate": 0.001, + "log_tracker_config": "", + "log_tracker_name": "", + "log_with": "", + "logging_dir": "./test/logs", + "loss_type": "l2", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": 1, + "lr_scheduler_power": 1, + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": 0, + "max_grad_norm": 1, + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": 75, + "max_train_epochs": 0, + "max_train_steps": 1600, + "mem_eff_attn": false, + "metadata_author": "", + "metadata_description": "", + "metadata_license": "", + "metadata_tags": "", + "metadata_title": "", + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "network_weights": "", + "noise_offset": 0.0357, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "SDXL-Standard-AdamW8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "D:/models/sdxl/sd_xl_base_0.9-pruned.safetensors", + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": true, + "sdxl_no_half_vae": true, + "seed": 0, + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/SDXL-Standard-AdamW.json b/test/config/SDXL-Standard-AdamW.json new file mode 100644 index 0000000000000000000000000000000000000000..d2020f89cf54f5b36f925c9edf8ebc3404b7279b --- /dev/null +++ b/test/config/SDXL-Standard-AdamW.json @@ -0,0 +1,106 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.0001, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "fp16", + "model_list": "custom", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "SDXL-Standard-AdamW", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "D:/models/sdxl/sd_xl_base_0.9-pruned.safetensors", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": true, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/SDXL-Standard-AdamW8bit.json b/test/config/SDXL-Standard-AdamW8bit.json new file mode 100644 index 0000000000000000000000000000000000000000..3c33cf11c0bc76e1928c5d16351b1314673d3ef9 --- /dev/null +++ b/test/config/SDXL-Standard-AdamW8bit.json @@ -0,0 +1,106 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".none-use-foldername", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 64, + "conv_alphas": "", + "conv_dim": 64, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 1e-06, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 64, + "network_dim": 64, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "SDXL-Standard-AdamW8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "D:/models/sdxl/sd_xl_base_0.9-pruned.safetensors", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": true, + "sdxl_no_half_vae": true, + "seed": "", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "train_on_input": true, + "training_comment": "", + "unet_lr": 1e-06, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/Standard-AdamW.json b/test/config/Standard-AdamW.json new file mode 100644 index 0000000000000000000000000000000000000000..02bdb8cac8b176a7dbdbf6bf8ef0803011c5456b --- /dev/null +++ b/test/config/Standard-AdamW.json @@ -0,0 +1,104 @@ +{ + "LoRA_type": "Standard", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 8, + "conv_alphas": "", + "conv_dim": 16, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 4, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 16, + "network_dim": 16, + "network_dropout": 0.1, + "no_token_padding": false, + "noise_offset": "0.05", + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "Standard-Adamw", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/Standard-AdamW8bit.json b/test/config/Standard-AdamW8bit.json new file mode 100644 index 0000000000000000000000000000000000000000..6611f49f32827c3e7a74ba60302be49fb61cf2c3 --- /dev/null +++ b/test/config/Standard-AdamW8bit.json @@ -0,0 +1,125 @@ +{ + "LoRA_type": "Standard", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 8, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 16, + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 4, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 16, + "network_dim": 16, + "network_dropout": 0.1, + "noise_offset": "0.05", + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "Standard-Adamw8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "train_norm": false, + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/TI-AdamW8bit-toml.json b/test/config/TI-AdamW8bit-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..3f278a2bbc85b7f0e06a3de750c800ab2aaccfca --- /dev/null +++ b/test/config/TI-AdamW8bit-toml.json @@ -0,0 +1,96 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "enable_bucket": true, + "epoch": 4, + "flip_aug": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "init_word": "*", + "keep_tokens": "0", + "learning_rate": 0.0001, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "80", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "num_vectors_per_token": 8, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "TI-Adamw8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "template": "style template", + "token_string": "zxc", + "train_batch_size": 4, + "train_data_dir": "", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weights": "", + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/TI-AdamW8bit.json b/test/config/TI-AdamW8bit.json new file mode 100644 index 0000000000000000000000000000000000000000..11f4dc39440ef5f8ee487d5d2c1118cbeb8bb361 --- /dev/null +++ b/test/config/TI-AdamW8bit.json @@ -0,0 +1,99 @@ +{ + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "", + "enable_bucket": true, + "epoch": 8, + "flip_aug": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "init_word": "*", + "ip_noise_gamma": 0.1, + "ip_noise_gamma_random_strength": true, + "keep_tokens": "0", + "learning_rate": 0.0001, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 10, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": true, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "num_vectors_per_token": 8, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "TI-Adamw8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of man wearing a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "template": "style template", + "token_string": "zxc", + "train_batch_size": 4, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weights": "", + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dataset-finetune.toml b/test/config/dataset-finetune.toml new file mode 100644 index 0000000000000000000000000000000000000000..87bb20878e3597bb27ee6cf6e1e8f1c59defa351 --- /dev/null +++ b/test/config/dataset-finetune.toml @@ -0,0 +1,14 @@ +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 1 +enable_bucket = true +min_bucket_reso = 64 +max_bucket_reso = 1024 +bucket_reso_steps = 32 +bucket_no_upscale = false + + [[datasets.subsets]] + image_dir = '.\test\img\10_darius kawasaki person' + num_repeats = 10 + metadata_file = '.\test\config\meta-1_lat.json' \ No newline at end of file diff --git a/test/config/dataset-masked_loss.toml b/test/config/dataset-masked_loss.toml new file mode 100644 index 0000000000000000000000000000000000000000..245c4c1cea31226e5a2a70fb1793336e9b5b1833 --- /dev/null +++ b/test/config/dataset-masked_loss.toml @@ -0,0 +1,15 @@ +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 1 +enable_bucket = true +min_bucket_reso = 64 +max_bucket_reso = 1024 +bucket_reso_steps = 32 +bucket_no_upscale = true + + [[datasets.subsets]] + image_dir = '.\test\img\10_darius kawasaki person' + num_repeats = 10 + caption_extension = '.txt' + conditioning_data_dir = '.\test\masked_loss' \ No newline at end of file diff --git a/test/config/dataset.toml b/test/config/dataset.toml new file mode 100644 index 0000000000000000000000000000000000000000..5386bd1f1744474349b7d0c459fc86120d4c5f2e --- /dev/null +++ b/test/config/dataset.toml @@ -0,0 +1,15 @@ +[[datasets]] +resolution = 512 +batch_size = 4 +keep_tokens = 1 +enable_bucket = true +min_bucket_reso = 64 +max_bucket_reso = 1024 +bucket_reso_steps = 32 +bucket_no_upscale = true + + [[datasets.subsets]] + image_dir = './test/img/10_darius kawasaki person' + num_repeats = 10 + class_tokens = 'darius kawasaki person' + caption_extension = '.txt' \ No newline at end of file diff --git a/test/config/dreambooth-Adafactor.json b/test/config/dreambooth-Adafactor.json new file mode 100644 index 0000000000000000000000000000000000000000..9b99b7e703f6ab3bd481f0ee3666ae01f4cf7307 --- /dev/null +++ b/test/config/dreambooth-Adafactor.json @@ -0,0 +1,76 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 1, + "color_aug": false, + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 4.0, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0001, + "logging_dir": "./test/logs", + "lr_scheduler": "constant", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": "0.05", + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Adafactor", + "optimizer_args": "scale_parameter=False relative_step=False warmup_init=False", + "output_dir": "./test/output", + "output_name": "dreambooth-Adafactor", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/dreambooth-AdamW.json b/test/config/dreambooth-AdamW.json new file mode 100644 index 0000000000000000000000000000000000000000..571752d5dd8c8d0125cb1e592ca0bd710b6be633 --- /dev/null +++ b/test/config/dreambooth-AdamW.json @@ -0,0 +1,105 @@ +{ + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": "2", + "color_aug": false, + "dataset_config": "", + "debiased_estimation_loss": false, + "enable_bucket": true, + "epoch": 1, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": "1", + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": "0", + "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "loss_type": "l2", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db-AdamW", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-AdamW8bit-masked_loss-toml.json b/test/config/dreambooth-AdamW8bit-masked_loss-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..5d745d0bc33179aefd90ef8d93d9cd10335cea08 --- /dev/null +++ b/test/config/dreambooth-AdamW8bit-masked_loss-toml.json @@ -0,0 +1,100 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "D:/kohya_ss/test/config/dataset-masked_loss.toml", + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": "0", + "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "masked_loss": true, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db-AdamW8bit-masked_loss-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 4, + "train_data_dir": "", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-AdamW8bit-toml.json b/test/config/dreambooth-AdamW8bit-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..caf050d39408c4928b4500cd85718c679ad218e6 --- /dev/null +++ b/test/config/dreambooth-AdamW8bit-toml.json @@ -0,0 +1,101 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "./test/config/dataset.toml", + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": "0", + "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 12345, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 4, + "train_data_dir": "", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-AdamW8bit.json b/test/config/dreambooth-AdamW8bit.json new file mode 100644 index 0000000000000000000000000000000000000000..5f54db132070d2fe0c2b9733fec99f006a137ed3 --- /dev/null +++ b/test/config/dreambooth-AdamW8bit.json @@ -0,0 +1,115 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "async_upload": false, + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "", + "debiased_estimation_loss": true, + "enable_bucket": true, + "epoch": 5, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "ip_noise_gamma": 0.1, + "ip_noise_gamma_random_strength": true, + "keep_tokens": 0, + "learning_rate": 5e-05, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "loss_type": "l2", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": 1, + "lr_scheduler_power": 1.02, + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": 0, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": 75, + "max_train_epochs": 0, + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "fp16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": true, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 8, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db-AdamW8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 20, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": 1234, + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-DAdaptAdam.json b/test/config/dreambooth-DAdaptAdam.json new file mode 100644 index 0000000000000000000000000000000000000000..67ef539f3adf50e4d9ebdf3e2bcbbbdb5a9cb674 --- /dev/null +++ b/test/config/dreambooth-DAdaptAdam.json @@ -0,0 +1,124 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "async_upload": false, + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": 2, + "color_aug": false, + "dataset_config": "", + "debiased_estimation_loss": false, + "dynamo_backend": "no", + "dynamo_mode": "default", + "dynamo_use_dynamic": false, + "dynamo_use_fullgraph": false, + "enable_bucket": true, + "epoch": 2, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 0, + "learning_rate": 1, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 1e-05, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "loss_type": "l2", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": 1, + "lr_scheduler_power": 1, + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": 0, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": 75, + "max_train_epochs": 0, + "max_train_steps": 0, + "mem_eff_attn": false, + "metadata_author": "", + "metadata_description": "", + "metadata_license": "", + "metadata_tags": "", + "metadata_title": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": false, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "DAdaptAdam", + "optimizer_args": "decouple=True weight_decay=0.6 betas=0.9,0.99 use_bias_correction=True", + "output_dir": "./test/output", + "output_name": "db-DAdaptAdam", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 0, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": 1234, + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-Prodigy-SDXL.json b/test/config/dreambooth-Prodigy-SDXL.json new file mode 100644 index 0000000000000000000000000000000000000000..d6f39d379a4347201676dcaa0f1336f2fc20e4c7 --- /dev/null +++ b/test/config/dreambooth-Prodigy-SDXL.json @@ -0,0 +1,91 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 32, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 1.0, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 0.0, + "logging_dir": "./test/logs", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "stabilityai/stable-diffusion-xl-base-1.0", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": "0.05", + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.6 betas=0.9,0.99 use_bias_correction=True", + "output_dir": "./test/output", + "output_name": "db-Prodigy", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth-Prodigy.json b/test/config/dreambooth-Prodigy.json new file mode 100644 index 0000000000000000000000000000000000000000..74a5e4829fc0a9ac280dcfb355f6fa4bba64f29f --- /dev/null +++ b/test/config/dreambooth-Prodigy.json @@ -0,0 +1,115 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "async_upload": false, + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0, + "caption_extension": "", + "clip_skip": 1, + "color_aug": false, + "dataset_config": "", + "debiased_estimation_loss": true, + "enable_bucket": true, + "epoch": 1, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "ip_noise_gamma": 0.1, + "ip_noise_gamma_random_strength": true, + "keep_tokens": "0", + "learning_rate": 1, + "learning_rate_te": 1e-05, + "learning_rate_te1": 1e-05, + "learning_rate_te2": 0, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "loss_type": "l2", + "lr_scheduler": "cosine", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "30", + "mem_eff_attn": false, + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_random_strength": true, + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.6 betas=0.9,0.99 use_bias_correction=True", + "output_dir": "./test/output", + "output_name": "db-Prodigy", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/dreambooth.json b/test/config/dreambooth.json new file mode 100644 index 0000000000000000000000000000000000000000..41b68bb2fb40d114b1a8933bb275afeb068127ac --- /dev/null +++ b/test/config/dreambooth.json @@ -0,0 +1,76 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 1, + "color_aug": false, + "enable_bucket": true, + "epoch": 1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 4.0, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 1.0, + "logging_dir": "./test/logs", + "lr_scheduler": "constant", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "no_token_padding": false, + "noise_offset": "0.05", + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "db", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/finetune-AdamW-toml.json b/test/config/finetune-AdamW-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..1eb14a7018a16fb856b32e418e4b3a564326c4f5 --- /dev/null +++ b/test/config/finetune-AdamW-toml.json @@ -0,0 +1,129 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "async_upload": false, + "batch_size": "8", + "block_lr": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "caption_metadata_filename": "meta-1_cap.json", + "clip_skip": 1, + "color_aug": false, + "create_buckets": false, + "create_caption": true, + "dataset_config": "./test/config/dataset-finetune.toml", + "dataset_repeats": "50", + "debiased_estimation_loss": false, + "dynamo_backend": "no", + "dynamo_mode": "default", + "dynamo_use_dynamic": false, + "dynamo_use_fullgraph": false, + "epoch": 2, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "full_path": true, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "image_folder": "", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 0, + "latent_metadata_filename": "meta-1_lat.json", + "learning_rate": 1e-05, + "learning_rate_te": 5e-06, + "learning_rate_te1": 5e-06, + "learning_rate_te2": 0, + "log_tracker_config": "", + "log_tracker_name": "", + "log_with": "", + "logging_dir": "./test/ft", + "loss_type": "l2", + "lr_scheduler": "cosine_with_restarts", + "lr_scheduler_args": "", + "lr_warmup": 10, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": "1024", + "max_data_loader_n_workers": 0, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": 75, + "max_train_epochs": 0, + "max_train_steps": 20, + "mem_eff_attn": false, + "metadata_author": "", + "metadata_description": "", + "metadata_license": "", + "metadata_tags": "", + "metadata_title": "", + "min_bucket_reso": "256", + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "noise_offset": 0, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "test_ft-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "random_crop": false, + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_checkbox": false, + "sdxl_no_half_vae": false, + "seed": 1234, + "shuffle_caption": false, + "train_batch_size": 4, + "train_dir": "./test", + "train_text_encoder": true, + "use_latent_files": "No", + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/finetune-AdamW.json b/test/config/finetune-AdamW.json new file mode 100644 index 0000000000000000000000000000000000000000..8fdca66039cf413fd8958bc0c5910fbbbb654753 --- /dev/null +++ b/test/config/finetune-AdamW.json @@ -0,0 +1,111 @@ +{ + "adaptive_noise_scale": 0, + "additional_parameters": "", + "batch_size": "8", + "block_lr": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "caption_metadata_filename": "meta-1_cap5.json", + "clip_skip": 1, + "color_aug": false, + "create_buckets": true, + "create_caption": true, + "dataset_config": "", + "dataset_repeats": "50", + "epoch": 2, + "extra_accelerate_launch_args": "", + "flip_aug": false, + "full_bf16": false, + "full_fp16": false, + "full_path": true, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "image_folder": "./test/img/10_darius kawasaki person", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": 0, + "latent_metadata_filename": "meta-1_lat5.json", + "learning_rate": 1e-05, + "learning_rate_te": 5e-06, + "learning_rate_te1": 5e-06, + "learning_rate_te2": 0, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/ft", + "loss_type": "l2", + "lr_scheduler": "cosine_with_restarts", + "lr_scheduler_args": "", + "lr_warmup": 10, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": "1024", + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "min_bucket_reso": "256", + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "noise_offset": 0, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "test_ft", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "random_crop": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_checkbox": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "train_batch_size": 4, + "train_dir": "./test", + "train_text_encoder": true, + "use_latent_files": "No", + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/iA3-Prodigy.json b/test/config/iA3-Prodigy.json new file mode 100644 index 0000000000000000000000000000000000000000..6a6156d6fc0a471377252a8d8ad893d41b20a152 --- /dev/null +++ b/test/config/iA3-Prodigy.json @@ -0,0 +1,104 @@ +{ + "LoRA_type": "LyCORIS/iA3", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.5, + "caption_extension": ".txt", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 8, + "conv_alphas": "", + "conv_dim": 16, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 4, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": 1, + "learning_rate": 1.0, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 8, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 5, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0.1, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 1024, + "network_dim": 1024, + "network_dropout": 0.3, + "no_token_padding": false, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "d_coef=1.0 weight_decay=0.01 safeguard_warmup=True use_bias_correction=False", + "output_dir": "./test/output", + "output_name": "iA3-Prodigy", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 1, + "sample_every_n_steps": 0, + "sample_prompts": "a man wearing a gas mask, by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": false, + "seed": "31337", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "train_on_input": false, + "training_comment": "rentry.co/ProdiAgy", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/locon-Adafactor.json b/test/config/locon-Adafactor.json new file mode 100644 index 0000000000000000000000000000000000000000..307aaa21a3989a26502a0bb0f64601f235241881 --- /dev/null +++ b/test/config/locon-Adafactor.json @@ -0,0 +1,138 @@ +{ + "LoRA_type": "Kohya LoCon", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 8, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 16, + "dataset_config": "", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 8, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "ip_noise_gamma": 0.1, + "ip_noise_gamma_random_strength": true, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 0, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 8, + "network_dim": 16, + "network_dropout": 0.1, + "noise_offset": 0.05, + "noise_offset_random_strength": true, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "Adafactor", + "optimizer_args": "scale_parameter=False relative_step=False warmup_init=False", + "output_dir": "./test/output", + "output_name": "locon-adafactor", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "train_norm": false, + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/locon-AdamW.json b/test/config/locon-AdamW.json new file mode 100644 index 0000000000000000000000000000000000000000..d4acf181cb47580906f1ba28c7e3b3106054f9cc --- /dev/null +++ b/test/config/locon-AdamW.json @@ -0,0 +1,143 @@ +{ + "LoRA_type": "Kohya LoCon", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0, + "conv_alpha": 8, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 16, + "dataset_config": "", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "extra_accelerate_launch_args": "", + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 4, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "loss_type": "l2", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 8, + "network_dim": 16, + "network_dropout": 0.1, + "noise_offset": 0.05, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "locon-AdamW", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0.1, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": false, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 1, + "train_data_dir": "./test/img", + "train_norm": false, + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/locon-AdamW8bit-masked_loss-toml.json b/test/config/locon-AdamW8bit-masked_loss-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..494012ffd4a6d9169dd76ea8c6b2c6606b35e788 --- /dev/null +++ b/test/config/locon-AdamW8bit-masked_loss-toml.json @@ -0,0 +1,137 @@ +{ + "LoRA_type": "Standard", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "dataset_config": "D:/kohya_ss/test/config/dataset-masked_loss.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "ip_noise_gamma": 0, + "ip_noise_gamma_random_strength": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "masked_loss": true, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 1, + "network_dim": 8, + "network_dropout": 0, + "noise_offset": 0.05, + "noise_offset_random_strength": false, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "locon-AdamW8bit-masked_loss-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 4, + "train_data_dir": "", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/locon-AdamW8bit-toml.json b/test/config/locon-AdamW8bit-toml.json new file mode 100644 index 0000000000000000000000000000000000000000..8852cd389b3cc3bd15ebdb4e82e59f7d75ee2485 --- /dev/null +++ b/test/config/locon-AdamW8bit-toml.json @@ -0,0 +1,130 @@ +{ + "LoRA_type": "Standard", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0.0, + "conv_alpha": 1, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 1, + "dataset_config": "D:/kohya_ss/test/config/dataset.toml", + "debiased_estimation_loss": false, + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": "0", + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "max_train_steps": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 1, + "network_dim": 8, + "network_dropout": 0, + "noise_offset": 0.05, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "locon-AdamW8bit-toml", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 4, + "train_data_dir": "", + "train_norm": false, + "train_on_input": true, + "training_comment": "", + "unet_lr": 0.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/locon-AdamW8bit.json b/test/config/locon-AdamW8bit.json new file mode 100644 index 0000000000000000000000000000000000000000..968b4a4af961fa6e2a028a1012275562077358ae --- /dev/null +++ b/test/config/locon-AdamW8bit.json @@ -0,0 +1,160 @@ +{ + "LoRA_type": "Kohya LoCon", + "LyCORIS_preset": "full", + "adaptive_noise_scale": 0.005, + "additional_parameters": "", + "async_upload": false, + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 64, + "bypass_mode": false, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0, + "caption_dropout_rate": 0.05, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "constrain": 0, + "conv_alpha": 8, + "conv_block_alphas": "", + "conv_block_dims": "", + "conv_dim": 16, + "dataset_config": "", + "debiased_estimation_loss": true, + "decompose_both": false, + "dim_from_weights": false, + "dora_wd": false, + "down_lr_weight": "", + "dynamo_backend": "no", + "dynamo_mode": "default", + "dynamo_use_dynamic": false, + "dynamo_use_fullgraph": false, + "enable_bucket": true, + "epoch": 8, + "extra_accelerate_launch_args": "", + "factor": -1, + "flip_aug": false, + "fp8_base": false, + "full_bf16": false, + "full_fp16": false, + "gpu_ids": "", + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "huber_c": 0.1, + "huber_schedule": "snr", + "huggingface_path_in_repo": "", + "huggingface_repo_id": "", + "huggingface_repo_type": "", + "huggingface_repo_visibility": "", + "huggingface_token": "", + "ip_noise_gamma": 0.1, + "ip_noise_gamma_random_strength": true, + "keep_tokens": 0, + "learning_rate": 0.0005, + "log_tracker_config": "", + "log_tracker_name": "", + "logging_dir": "./test/logs", + "lora_network_weights": "", + "loss_type": "l2", + "lr_scheduler": "constant", + "lr_scheduler_args": "", + "lr_scheduler_num_cycles": 1, + "lr_scheduler_power": 1, + "lr_warmup": 0, + "main_process_port": 0, + "masked_loss": false, + "max_bucket_reso": 2048, + "max_data_loader_n_workers": 0, + "max_grad_norm": 1, + "max_resolution": "512,512", + "max_timestep": 1000, + "max_token_length": 75, + "max_train_epochs": 0, + "max_train_steps": 0, + "mem_eff_attn": false, + "metadata_author": "", + "metadata_description": "", + "metadata_license": "", + "metadata_tags": "", + "metadata_title": "", + "mid_lr_weight": "", + "min_bucket_reso": 256, + "min_snr_gamma": 0, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multi_gpu": false, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 8, + "network_dim": 16, + "network_dropout": 0.1, + "noise_offset": 0.05, + "noise_offset_random_strength": true, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "num_machines": 1, + "num_processes": 1, + "optimizer": "AdamW8bit", + "optimizer_args": "", + "output_dir": "./test/output", + "output_name": "locon-AdamW8bit", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1, + "random_crop": false, + "rank_dropout": 0.1, + "rank_dropout_scale": false, + "reg_data_dir": "", + "rescaled": false, + "resume": "", + "resume_from_huggingface": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_as_bool": false, + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "save_state_on_train_end": false, + "save_state_to_huggingface": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "sdxl": false, + "sdxl_cache_text_encoder_outputs": false, + "sdxl_no_half_vae": true, + "seed": 1234, + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0001, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "train_norm": false, + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.0001, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_scalar": false, + "use_tucker": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "v_pred_like_loss": 0, + "vae": "", + "vae_batch_size": 0, + "wandb_api_key": "", + "wandb_run_name": "", + "weighted_captions": false, + "xformers": "xformers" +} \ No newline at end of file diff --git a/test/config/locon-Prodigy.json b/test/config/locon-Prodigy.json new file mode 100644 index 0000000000000000000000000000000000000000..c21e607237b013ec47d7122061c1a8ada76da591 --- /dev/null +++ b/test/config/locon-Prodigy.json @@ -0,0 +1,101 @@ +{ + "LoRA_type": "Kohya LoCon", + "adaptive_noise_scale": 0, + "additional_parameters": "", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": true, + "bucket_reso_steps": 1, + "cache_latents": true, + "cache_latents_to_disk": false, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": "", + "clip_skip": 2, + "color_aug": false, + "conv_alpha": 8, + "conv_alphas": "", + "conv_dim": 16, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 1, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1, + "gradient_checkpointing": false, + "keep_tokens": "0", + "learning_rate": 1.0, + "logging_dir": "./test/logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "512,512", + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 10, + "mixed_precision": "bf16", + "model_list": "runwayml/stable-diffusion-v1-5", + "module_dropout": 0.1, + "multires_noise_discount": 0.2, + "multires_noise_iterations": 8, + "network_alpha": 8, + "network_dim": 16, + "network_dropout": 0.1, + "no_token_padding": false, + "noise_offset": "0.05", + "noise_offset_type": "Multires", + "num_cpu_threads_per_process": 2, + "optimizer": "Prodigy", + "optimizer_args": "decouple=True weight_decay=0.6 betas=0.9,0.99 use_bias_correction=True", + "output_dir": "./test/output", + "output_name": "locon-Prodigy", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "runwayml/stable-diffusion-v1-5", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0.1, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 25, + "sample_prompts": "a painting of a gas mask , by darius kawasaki", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "fp16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 1, + "seed": "1234", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 1.0, + "train_batch_size": 4, + "train_data_dir": "./test/img", + "train_on_input": false, + "training_comment": "", + "unet_lr": 1.0, + "unit": 1, + "up_lr_weight": "", + "use_cp": true, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/loha-Prodigy.json b/test/config/loha-Prodigy.json new file mode 100644 index 0000000000000000000000000000000000000000..63b78f7fd6b8d8f8aa7f5d93302b25d1e687fca3 --- /dev/null +++ b/test/config/loha-Prodigy.json @@ -0,0 +1,106 @@ +{ + "LoRA_type": "LyCORIS/LoHa", + "adaptive_noise_scale": 0, + "additional_parameters": "--log_prefix=xl-loha", + "block_alphas": "", + "block_dims": "", + "block_lr_zero_threshold": "", + "bucket_no_upscale": false, + "bucket_reso_steps": 64, + "cache_latents": true, + "cache_latents_to_disk": true, + "caption_dropout_every_n_epochs": 0.0, + "caption_dropout_rate": 0, + "caption_extension": ".txt", + "clip_skip": "1", + "color_aug": false, + "conv_alpha": 4, + "conv_alphas": "", + "conv_dim": 4, + "conv_dims": "", + "decompose_both": false, + "dim_from_weights": false, + "down_lr_weight": "", + "enable_bucket": true, + "epoch": 10, + "factor": -1, + "flip_aug": false, + "full_fp16": false, + "gradient_accumulation_steps": 1.0, + "gradient_checkpointing": true, + "keep_tokens": "0", + "learning_rate": 0.002, + "logging_dir": "E:\\froggy\\loha\\logs", + "lora_network_weights": "", + "lr_scheduler": "cosine", + "lr_scheduler_num_cycles": "1", + "lr_scheduler_power": "", + "lr_warmup": 0, + "max_data_loader_n_workers": "0", + "max_resolution": "1024,1024", + "max_timestep": 1000, + "max_token_length": "75", + "max_train_epochs": "", + "mem_eff_attn": false, + "mid_lr_weight": "", + "min_snr_gamma": 5, + "min_timestep": 0, + "mixed_precision": "bf16", + "model_list": "custom", + "module_dropout": 0, + "multires_noise_discount": 0, + "multires_noise_iterations": 0, + "network_alpha": 8, + "network_dim": 8, + "network_dropout": 0, + "no_token_padding": false, + "noise_offset": 0.0357, + "noise_offset_type": "Original", + "num_cpu_threads_per_process": 2, + "optimizer": "AdamW8bit", + "optimizer_args": "weight_decay=0.05 betas=0.9,0.98", + "output_dir": "d:\\lycoris\\sdxl", + "output_name": "froddy-loha-sx_v1.0a", + "persistent_data_loader_workers": false, + "pretrained_model_name_or_path": "D:/models/sdxl/sd_xl_base_0.9.safetensors", + "prior_loss_weight": 1.0, + "random_crop": false, + "rank_dropout": 0, + "reg_data_dir": "", + "resume": "", + "sample_every_n_epochs": 0, + "sample_every_n_steps": 0, + "sample_prompts": "", + "sample_sampler": "euler_a", + "save_every_n_epochs": 1, + "save_every_n_steps": 0, + "save_last_n_steps": 0, + "save_last_n_steps_state": 0, + "save_model_as": "safetensors", + "save_precision": "bf16", + "save_state": false, + "scale_v_pred_loss_like_noise_pred": false, + "scale_weight_norms": 0, + "sdxl": true, + "sdxl_cache_text_encoder_outputs": true, + "sdxl_no_half_vae": true, + "seed": "17415", + "shuffle_caption": false, + "stop_text_encoder_training": 0, + "text_encoder_lr": 0.0, + "train_batch_size": 1, + "train_data_dir": "E:\\froggy\\img", + "train_on_input": false, + "training_comment": "", + "unet_lr": 0.002, + "unit": 1, + "up_lr_weight": "", + "use_cp": false, + "use_wandb": false, + "v2": false, + "v_parameterization": false, + "vae_batch_size": 0, + "wandb_api_key": "", + "weighted_captions": false, + "xformers": true +} \ No newline at end of file diff --git a/test/config/meta-1_lat.json b/test/config/meta-1_lat.json new file mode 100644 index 0000000000000000000000000000000000000000..fd5ac0aaaeb34e5addc7d1ce489fc3fd7d8cab0d --- /dev/null +++ b/test/config/meta-1_lat.json @@ -0,0 +1,58 @@ +{ + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki.jpg": { + "caption": "a painting of a steam punk skull with a gas mask , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_2.jpg": { + "caption": "a painting of a man with a skull on his head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_3.jpg": { + "caption": "a painting of a woman with a helmet on her head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_4.jpg": { + "caption": "a painting of a horned man with a goat head , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_5.jpg": { + "caption": "a painting of a man playing a piano , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_6.jpg": { + "caption": "a painting of a robot sitting on a rock , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_7.jpg": { + "caption": "a painting of a soldier with a helmet on , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + }, + "test\\img\\10_darius kawasaki person\\Dariusz_Zawadzki_8.jpg": { + "caption": "a painting of a giant crab with a large body , by darius kawasaki", + "train_resolution": [ + 1024, + 1024 + ] + } +} \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ccfc199477904f7a9362827cbae450c845963159 Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.txt new file mode 100644 index 0000000000000000000000000000000000000000..b589f7c7e3509c5854b856f8bce4021d2b32828d --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki.txt @@ -0,0 +1 @@ +a painting of a steam punk skull with a gas mask , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bb062cacace77dabdc92a73876afddc1e42fdfb4 Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7ded5fa8c1c6bb2aa71e0da722f1c30c270bc4e --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_2.txt @@ -0,0 +1 @@ +a painting of a man with a skull on his head , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e84720572d71bbdb49401a33cb00683377da75bc Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ef297cf866fe3b6cab53410c8697a5be49a30a2 --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_3.txt @@ -0,0 +1 @@ +a painting of a woman with a helmet on her head , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8821fa28253106d946ab2f35010e85976c2aa895 Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.txt new file mode 100644 index 0000000000000000000000000000000000000000..4abb67e8f8186eb214bd3ef5c4a2b8212d5bc2a2 --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_4.txt @@ -0,0 +1 @@ +a painting of a horned man with a goat head , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..590a097a61140ed39fc50060c6b000e9fcc1e4ff Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.txt new file mode 100644 index 0000000000000000000000000000000000000000..406814f85c1da098a95bc9a6da32e469e180006f --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_5.txt @@ -0,0 +1 @@ +a painting of a man playing a piano , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4a39ce4289b5da8d635a2d90a8be1edeef18b73b Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.txt new file mode 100644 index 0000000000000000000000000000000000000000..55de4a66eb345b65f0fb2fa8b4856c3939b5ba55 --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_6.txt @@ -0,0 +1 @@ +a painting of a robot sitting on a rock , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0c1f50f42b6243a5a170ca0c2c3068c7897d29f2 Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.txt new file mode 100644 index 0000000000000000000000000000000000000000..e36dca828d342e326158f04bb5f4c5784df2df7e --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_7.txt @@ -0,0 +1 @@ +a painting of a soldier with a helmet on , by darius kawasaki \ No newline at end of file diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7d3380be2126a32df205c8dbfb06dfbde1e78354 Binary files /dev/null and b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg differ diff --git a/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.txt b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.txt new file mode 100644 index 0000000000000000000000000000000000000000..0087fedfc24537feea1f3d26bc7426dc32b388b8 --- /dev/null +++ b/test/img with spaces/10_darius kawasaki person/Dariusz_Zawadzki_8.txt @@ -0,0 +1 @@ +a painting of a giant crab with a large body , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.cap new file mode 100644 index 0000000000000000000000000000000000000000..b330e301f9ba63edae56bb80b22ad7aa071e6849 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.cap @@ -0,0 +1 @@ +solo,simple background,teeth,grey background,from side,no humans,mask,1other,science fiction,cable,gas mask,tube,steampunk,machine diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ccfc199477904f7a9362827cbae450c845963159 Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.txt new file mode 100644 index 0000000000000000000000000000000000000000..b589f7c7e3509c5854b856f8bce4021d2b32828d --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki.txt @@ -0,0 +1 @@ +a painting of a steam punk skull with a gas mask , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.cap new file mode 100644 index 0000000000000000000000000000000000000000..bd9995fa264a7b9f6df8f1c40835334c8be842f4 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.cap @@ -0,0 +1 @@ +no humans,what diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bb062cacace77dabdc92a73876afddc1e42fdfb4 Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7ded5fa8c1c6bb2aa71e0da722f1c30c270bc4e --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_2.txt @@ -0,0 +1 @@ +a painting of a man with a skull on his head , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.cap new file mode 100644 index 0000000000000000000000000000000000000000..b6d5589724560a0ce45824efa4c6660491f9a937 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.cap @@ -0,0 +1 @@ +1girl,solo,nude,colored skin,monster,blue skin diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e84720572d71bbdb49401a33cb00683377da75bc Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.txt new file mode 100644 index 0000000000000000000000000000000000000000..4ef297cf866fe3b6cab53410c8697a5be49a30a2 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_3.txt @@ -0,0 +1 @@ +a painting of a woman with a helmet on her head , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.cap new file mode 100644 index 0000000000000000000000000000000000000000..90f6cfeb1e43b5401193ae5713573529cef75ffb --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.cap @@ -0,0 +1 @@ +solo,upper body,horns,from side,no humans,blood,1other diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8821fa28253106d946ab2f35010e85976c2aa895 Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.txt new file mode 100644 index 0000000000000000000000000000000000000000..4abb67e8f8186eb214bd3ef5c4a2b8212d5bc2a2 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_4.txt @@ -0,0 +1 @@ +a painting of a horned man with a goat head , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.cap new file mode 100644 index 0000000000000000000000000000000000000000..00b4da4ecfdab4cdab9aec68e1c5b8a6b27f4d09 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.cap @@ -0,0 +1 @@ +solo,1boy,male focus,mask,instrument,science fiction,realistic,music,gas mask diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..590a097a61140ed39fc50060c6b000e9fcc1e4ff Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.txt new file mode 100644 index 0000000000000000000000000000000000000000..406814f85c1da098a95bc9a6da32e469e180006f --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_5.txt @@ -0,0 +1 @@ +a painting of a man playing a piano , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.cap new file mode 100644 index 0000000000000000000000000000000000000000..da0bdd535025bb11a33f3c1dad8a60a147b10bae --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.cap @@ -0,0 +1 @@ +solo,no humans,mask,helmet,robot,mecha,1other,science fiction,damaged,gas mask,steampunk diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4a39ce4289b5da8d635a2d90a8be1edeef18b73b Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.txt new file mode 100644 index 0000000000000000000000000000000000000000..55de4a66eb345b65f0fb2fa8b4856c3939b5ba55 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_6.txt @@ -0,0 +1 @@ +a painting of a robot sitting on a rock , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.cap new file mode 100644 index 0000000000000000000000000000000000000000..300d54081a974f746cf7c372276167558bbdcb54 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.cap @@ -0,0 +1 @@ +solo,from side,no humans,mask,moon,helmet,portrait,1other,ambiguous gender,gas mask diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0c1f50f42b6243a5a170ca0c2c3068c7897d29f2 Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.txt new file mode 100644 index 0000000000000000000000000000000000000000..e36dca828d342e326158f04bb5f4c5784df2df7e --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_7.txt @@ -0,0 +1 @@ +a painting of a soldier with a helmet on , by darius kawasaki \ No newline at end of file diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.cap b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.cap new file mode 100644 index 0000000000000000000000000000000000000000..763e85783d55d2a6b8a2c840854463c902db5795 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.cap @@ -0,0 +1 @@ +outdoors,sky,cloud,no humans,monster,realistic,desert diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7d3380be2126a32df205c8dbfb06dfbde1e78354 Binary files /dev/null and b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.jpg differ diff --git a/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.txt b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.txt new file mode 100644 index 0000000000000000000000000000000000000000..0087fedfc24537feea1f3d26bc7426dc32b388b8 --- /dev/null +++ b/test/img/10_darius kawasaki person/Dariusz_Zawadzki_8.txt @@ -0,0 +1 @@ +a painting of a giant crab with a large body , by darius kawasaki \ No newline at end of file diff --git a/test/masked_loss/Dariusz_Zawadzki.jpg b/test/masked_loss/Dariusz_Zawadzki.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4358e6b9e933482c36aee10ffddd801fa3dc952b Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_2.jpg b/test/masked_loss/Dariusz_Zawadzki_2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cf5c489b3975997a7051d65261397288f052bce1 Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_2.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_3.jpg b/test/masked_loss/Dariusz_Zawadzki_3.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ef89411c6fc6b01126ff41a3b87f804cd7351839 Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_3.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_4.jpg b/test/masked_loss/Dariusz_Zawadzki_4.jpg new file mode 100644 index 0000000000000000000000000000000000000000..438602441bf7a0191d5e2c734ce0b7ec38dba489 Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_4.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_5.jpg b/test/masked_loss/Dariusz_Zawadzki_5.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b64b0da482d72008f1235c65e141693f2ef764e Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_5.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_6.jpg b/test/masked_loss/Dariusz_Zawadzki_6.jpg new file mode 100644 index 0000000000000000000000000000000000000000..50d3f6a6d51df18baba9896c3febeaa25def61c3 Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_6.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_7.jpg b/test/masked_loss/Dariusz_Zawadzki_7.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f70fe2a64738c45a6ec15457e632d3510817653e Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_7.jpg differ diff --git a/test/masked_loss/Dariusz_Zawadzki_8.jpg b/test/masked_loss/Dariusz_Zawadzki_8.jpg new file mode 100644 index 0000000000000000000000000000000000000000..3f4507efe1170d3af583465c45352e336f85619e Binary files /dev/null and b/test/masked_loss/Dariusz_Zawadzki_8.jpg differ diff --git a/tools/caption.py b/tools/caption.py new file mode 100644 index 0000000000000000000000000000000000000000..f4300b1542634edb0afa45a22e48aa93ac099c89 --- /dev/null +++ b/tools/caption.py @@ -0,0 +1,60 @@ +# This script will create the caption text files in the specified folder using the specified file pattern and caption text. +# +# eg: python caption.py D:\some\folder\location "*.png, *.jpg, *.webp" "some caption text" + +import argparse +import os +import logging +from pathlib import Path + +def create_caption_files(image_folder: Path, file_pattern: str, caption_text: str, caption_file_ext: str, overwrite: bool): + # Split the file patterns string and remove whitespace from each extension + patterns = [pattern.strip() for pattern in file_pattern.split(",")] + + # Use the glob method to match the file pattern + for pattern in patterns: + files = image_folder.glob(pattern) + + # Iterate over the matched files + for file in files: + # Check if a text file with the same name as the current file exists in the folder + txt_file = file.with_suffix(caption_file_ext) + if not txt_file.exists() or overwrite: + txt_file.write_text(caption_text) + logging.info(f"Caption file created: {txt_file}") + +def writable_dir(target_path): + """ Check if a path is a valid directory and that it can be written to. """ + path = Path(target_path) + if path.is_dir(): + if os.access(path, os.W_OK): + return path + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.") + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.") + +def main(): + # Set up logging + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + # Define command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("image_folder", type=writable_dir, help="The folder where the image files are located") + parser.add_argument("--file_pattern", type=str, default="*.png, *.jpg, *.jpeg, *.webp", help="the pattern to match the image file names") + parser.add_argument("--caption_file_ext", type=str, default=".caption", help="the caption file extension.") + parser.add_argument("--overwrite", action="store_true", default=False, help="whether to overwrite existing caption files") + + # Create a mutually exclusive group for the caption_text and caption_file arguments + caption_group = parser.add_mutually_exclusive_group(required=True) + caption_group.add_argument("--caption_text", type=str, help="the text to include in the caption files") + caption_group.add_argument("--caption_file", type=argparse.FileType("r"), help="the file containing the text to include in the caption files") + + # Parse the command-line arguments + args = parser.parse_args() + + # Create the caption files + create_caption_files(args.image_folder, args.file_pattern, args.caption_text, args.caption_file_ext, args.overwrite) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/caption_from_filename.py b/tools/caption_from_filename.py new file mode 100644 index 0000000000000000000000000000000000000000..d3ca0d66fdd3424281805ae59fb0bd4e9b1faead --- /dev/null +++ b/tools/caption_from_filename.py @@ -0,0 +1,99 @@ +# Proposed by https://github.com/kainatquaderee +import os +import argparse +import logging +from pathlib import Path + +def is_image_file(filename, image_extensions): + """Check if a file is an image file based on its extension.""" + return Path(filename).suffix.lower() in image_extensions + +def create_text_file(image_filename, output_directory, text_extension): + """Create a text file with the same name as the image file.""" + # Extract prompt from filename + prompt = Path(image_filename).stem + + # Construct path for the output text file + text_file_path = Path(output_directory) / (prompt + text_extension) + try: + + # Write prompt to text file + with open(text_file_path, 'w') as text_file: + text_file.write(prompt) + + logging.info(f"Text file created: {text_file_path}") + + return 1 + + except IOError as e: + logging.error(f"Failed to write to {text_file_path}: {e}") + return 0 + +def main(image_directory, output_directory, image_extension, text_extension): + # If no output directory is provided, use the image directory + if not output_directory: + output_directory = image_directory + + # Ensure the output directory exists, create it if necessary + Path(output_directory).mkdir(parents=True, exist_ok=True) + + # Initialize a counter for the number of text files created + text_files_created = 0 + + # Iterate through files in the directory + for image_filename in Path(image_directory).iterdir(): + # Check if the file is an image + if is_image_file(image_filename, image_extension): + # Create a text file with the same name as the image file and increment the counter if successful + text_files_created += create_text_file(image_filename, output_directory, text_extension) + + # Report if no text files were created + if text_files_created == 0: + logging.info("No image matching extensions were found in the specified directory. No caption files were created.") + else: + logging.info(f"{text_files_created} text files created successfully.") + +def create_gui(image_directory, output_directory, image_extension, text_extension): + try: + import gradio + import gradio.blocks as blocks + except ImportError: + print("gradio module is not installed. Please install it to use the GUI.") + exit(1) + + """Create a Gradio interface for the caption creation process.""" + with gradio.Blocks() as demo: + gradio.Markdown("## Caption From Filename") + with gradio.Row(): + with gradio.Column(): + image_dir = gradio.Textbox(label="Image Directory", value=image_directory) + output_dir = gradio.Textbox(label="Output Directory", value=output_directory) + image_ext = gradio.Textbox(label="Image Extensions", value=" ".join(image_extension)) + text_ext = gradio.Textbox(label="Text Extension", value=text_extension) + run_button = gradio.Button("Run") + with gradio.Column(): + output = gradio.Textbox(label="Output", placeholder="Output will be displayed here...", lines=10, max_lines=10) + run_button.click(main, inputs=[image_dir, output_dir, image_ext, text_ext], outputs=output) + demo.launch() + +if __name__ == "__main__": + # Set up logging + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + # Create an argument parser + parser = argparse.ArgumentParser(description='Generate caption files from image filenames.') + + # Add arguments for the image directory, output directory, and file extension + parser.add_argument('image_directory', help='Directory containing the image files.') + parser.add_argument('--output_directory', help='Optional: Output directory where text files will be saved. If not provided, the files will be saved in the same directory as the images.') + parser.add_argument('--image_extension', nargs='+', default=['.jpg', '.jpeg', '.png', '.webp', '.bmp'], help='Extension(s) for the image files. Defaults to common image extensions .jpg, .jpeg, .png, .webp, .bmp.') + parser.add_argument('--text_extension', default='.txt', help='Extension for the output text files. Defaults to .txt.') + parser.add_argument('--gui', action='store_true', help='Launch a Gradio interface for the caption creation process.') + + # Parse the command-line arguments + args = parser.parse_args() + + if args.gui: + create_gui(args.image_directory, args.output_directory, args.image_extension, args.text_extension) + else: + main(args.image_directory, args.output_directory, args.image_extension, args.text_extension) diff --git a/tools/cleanup_captions.py b/tools/cleanup_captions.py new file mode 100644 index 0000000000000000000000000000000000000000..609a0dde84faaa2f90a95df11cf8c55eeb0b17d1 --- /dev/null +++ b/tools/cleanup_captions.py @@ -0,0 +1,53 @@ +import os +import argparse +import logging +from pathlib import Path + +def writable_dir(target_path): + """ Check if a path is a valid directory and that it can be written to. """ + path = Path(target_path) + if path.is_dir(): + if os.access(path, os.W_OK): + return path + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.") + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.") + +def main(folder_path:Path, extension:str, keywords:set=None): + for file_name in os.listdir(folder_path): + if file_name.endswith(extension): + file_path = os.path.join(folder_path, file_name) + try: + with open(file_path, "r") as f: + text = f.read() + # extract tags from text and split into a list using comma as the delimiter + tags = [tag.strip() for tag in text.split(",")] + # remove the specified keywords from the tags list + if keywords: + tags = [tag for tag in tags if tag not in keywords] + # remove empty or whitespace-only tags + tags = [tag for tag in tags if tag.strip() != ""] + # join the tags back into a comma-separated string and write back to the file + with open(file_path, "w") as f: + f.write(", ".join(tags)) + logging.info(f"Processed {file_name}") + except Exception as e: + logging.error(f"Error processing {file_name}: {e}") + +if __name__ == "__main__": + # Set up logging + logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') + + parser = argparse.ArgumentParser(description="Remove specified keywords from all text files in a directory.") + parser.add_argument("folder_path", type=writable_dir, help="path to directory containing text files") + parser.add_argument("-e", "--extension", type=str, default=".txt", help="file extension of text files to be processed (default: .txt)") + parser.add_argument("-k", "--keywords", type=str, nargs="*", help="Optional: list of keywords to be removed from text files. If not provided, the default list will be used.") + args = parser.parse_args() + + folder_path = args.folder_path + extension = args.extension + keywords = set(args.keywords) if args.keywords else set(["1girl", "solo", "blue eyes", "brown eyes", "blonde hair", "black hair", "realistic", "red lips", "lips", "artist name", "makeup", "realistic","brown hair", "dark skin", + "dark-skinned female", "medium breasts", "breasts", "1boy"]) + + main(folder_path, extension, keywords) diff --git a/tools/convert_html_to_md.py b/tools/convert_html_to_md.py new file mode 100644 index 0000000000000000000000000000000000000000..7133a8db4861a826c29dd7a90342cf9a8c9a8af8 --- /dev/null +++ b/tools/convert_html_to_md.py @@ -0,0 +1,64 @@ +import argparse +import os +import requests +from bs4 import BeautifulSoup +from urllib.parse import urljoin +from html2text import html2text +from pathlib import Path + +def is_writable_path(target_path): + """ + Check if a path is writable. + """ + path = Path(os.path.dirname(target_path)) + if path.is_dir(): + if os.access(path, os.W_OK): + return target_path + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.") + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.") + +def main(url, markdown_path): + # Create a session object + with requests.Session() as session: + # Send HTTP request to the specified URL + response = session.get(url) + response.raise_for_status() # Check for HTTP issues + + # Create a BeautifulSoup object and specify the parser + soup = BeautifulSoup(response.text, 'html.parser') + + # Ensure the directory for saving images exists + os.makedirs("./logs", exist_ok=True) + + # Find all image tags and save images + for image in soup.find_all('img'): + image_url = urljoin(url, image['src']) + try: + image_response = session.get(image_url, stream=True) + image_response.raise_for_status() + image_name = os.path.join("./logs", os.path.basename(image_url)) + with open(image_name, 'wb') as file: + file.write(image_response.content) + except requests.RequestException as e: + print(f"Failed to download {image_url}: {e}") + + # Convert the HTML content to markdown + markdown_content = html2text(response.text) + + # Save the markdown content to a file + try: + with open(markdown_path, "w", encoding="utf8") as file: + file.write(markdown_content) + print(f"Markdown content successfully written to {markdown_path}") + except Exception as e: + print(f"Failed to write markdown to {markdown_path}: {e}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert HTML to Markdown") + parser.add_argument("url", help="The URL of the webpage to convert") + parser.add_argument("markdown_path", help="The path to save the converted markdown file", type=is_writable_path) + args = parser.parse_args() + + main(args.url, args.markdown_path) diff --git a/tools/convert_images_to_hq_jpg.py b/tools/convert_images_to_hq_jpg.py new file mode 100644 index 0000000000000000000000000000000000000000..667bf58b0c7b864369feb6d82ef4b30268be20b0 --- /dev/null +++ b/tools/convert_images_to_hq_jpg.py @@ -0,0 +1,65 @@ +import argparse +import glob +import os +from pathlib import Path +from PIL import Image + + +def writable_dir(target_path): + """ Check if a path is a valid directory and that it can be written to. """ + path = Path(target_path) + if path.is_dir(): + if os.access(path, os.W_OK): + return path + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.") + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.") + +def main(directory, in_ext, quality, delete_originals): + out_ext = "jpg" + + # Create the file pattern string using the input file extension + file_pattern = f"*.{in_ext}" + + # Get the list of files in the directory that match the file pattern + files = glob.glob(os.path.join(directory, file_pattern)) + + # Iterate over the list of files + for file in files: + # Open the image file + img = Image.open(file) + + # Create a new file path with the output file extension + new_path = Path(file).with_suffix(f".{out_ext}") + + # Check if the output file already exists + if new_path.exists(): + # Skip the conversion if the output file already exists + print(f"Skipping {file} because {new_path} already exists") + continue + + # Save the image to the new file as high-quality JPEG + img.save(new_path, quality=quality, optimize=True) + + # Optionally, delete the original file + if delete_originals: + os.remove(file) + + +if __name__ == "__main__": + # Define the command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("directory", type=writable_dir, + help="the directory containing the images to be converted") + parser.add_argument("--in_ext", type=str, default="webp", + help="the input file extension") + parser.add_argument("--quality", type=int, default=95, + help="the JPEG quality (0-100)") + parser.add_argument("--delete_originals", action="store_true", + help="whether to delete the original files after conversion") + + # Parse the command-line arguments + args = parser.parse_args() + + main(directory=args.directory, in_ext=args.in_ext, quality=args.quality, delete_originals=args.delete_originals) diff --git a/tools/convert_images_to_webp.py b/tools/convert_images_to_webp.py new file mode 100644 index 0000000000000000000000000000000000000000..fa15cd95c2379452e59848f3fc616396d3e6470d --- /dev/null +++ b/tools/convert_images_to_webp.py @@ -0,0 +1,71 @@ +import argparse +from pathlib import Path +import os +from PIL import Image + +def writable_dir(target_path): + """ Check if a path is a valid directory and that it can be written to. """ + path = Path(target_path) + if path.is_dir(): + if os.access(path, os.W_OK): + return path + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.") + else: + raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.") + +def main(): + # Define the command-line arguments + parser = argparse.ArgumentParser() + parser.add_argument("directory", type=writable_dir, + help="the directory containing the images to be converted") + parser.add_argument("--in_ext", type=str, default="webp", + help="the input file extension") + parser.add_argument("--out_ext", type=str, default="webp", + help="the output file extension") + parser.add_argument("--delete_originals", action="store_true", + help="whether to delete the original files after conversion") + + # Parse the command-line arguments + args = parser.parse_args() + directory = Path(args.directory) + in_ext = args.in_ext + delete_originals = args.delete_originals + + # Create the file pattern string using the input file extension + file_pattern = f"*.{in_ext}" + + # Get the list of files in the directory that match the file pattern + files = list(directory.glob(file_pattern)) + + # Iterate over the list of files + for file in files: + try: + # Open the image file + img = Image.open(file) + + # Create a new file path with the output file extension + new_path = file.with_suffix(f".{args.out_ext}") + print(new_path) + + # Check if the output file already exists + if new_path.exists(): + # Skip the conversion if the output file already exists + print(f"Skipping {file} because {new_path} already exists") + continue + + # Save the image to the new file as lossless + img.save(new_path, lossless=True) + + # Close the image file + img.close() + + # Optionally, delete the original file + if delete_originals: + file.unlink() + except Exception as e: + print(f"Error processing {file}: {e}") + + +if __name__ == "__main__": + main() diff --git a/tools/create_txt_from_images.py b/tools/create_txt_from_images.py new file mode 100644 index 0000000000000000000000000000000000000000..9f8f0815addbb31171b8b6576168f832b0b3084e --- /dev/null +++ b/tools/create_txt_from_images.py @@ -0,0 +1,32 @@ +import os +import argparse + +def main(folder_path): + # Validate if the folder exists + if not os.path.exists(folder_path): + print("The specified folder does not exist.") + return + + # Loop through all files in the directory + for filename in os.listdir(folder_path): + # Check if the file is an image file (webp, jpg, png) + if filename.lower().endswith(('.webp', '.jpg', '.png')): + # Remove the file extension from the filename + name_without_extension = os.path.splitext(filename)[0] + + # Construct the name of the txt file + txt_filename = f"{name_without_extension}.txt" + + # Extract the content before the underscore + content = name_without_extension.split("_")[0] + + # Write the content to the txt file + with open(os.path.join(folder_path, txt_filename), "w") as txt_file: + txt_file.write(content) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process a folder.') + parser.add_argument('folder_path', type=str, help='Path to the folder to process') + + args = parser.parse_args() + main(args.folder_path) diff --git a/tools/crop_images_to_n_buckets.py b/tools/crop_images_to_n_buckets.py new file mode 100644 index 0000000000000000000000000000000000000000..263054a9a4bbb9d4e021d09b2d08c5e0988db218 --- /dev/null +++ b/tools/crop_images_to_n_buckets.py @@ -0,0 +1,276 @@ +# This code sorts a collection of images in a given directory by their aspect ratio, groups +# them into batches of a given size, crops each image in a batch to the average aspect ratio +# of that batch, and saves the cropped images in a specified directory. The user provides +# the paths to the input directory and the output directory, as well as the desired batch +# size. The program drops any images that do not fit exactly into the batches. + +import os +import cv2 +import argparse +import shutil + +def aspect_ratio(img_path): + """ + Calculate and return the aspect ratio of an image. + + Parameters: + img_path: A string representing the path to the input image. + + Returns: + float: Aspect ratio of the input image, defined as width / height. + Returns None if the image cannot be read. + """ + try: + image = cv2.imread(img_path) + if image is None: + raise ValueError("Image not found or could not be read.") + height, width = image.shape[:2] + return float(width) / float(height) + except Exception as e: + print(f"Error: {e}") + return None + +def sort_images_by_aspect_ratio(path): + """Sort all images in a folder by aspect ratio""" + images = [] + for filename in os.listdir(path): + if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png") or filename.endswith(".webp"): + print(filename) + img_path = os.path.join(path, filename) + images.append((img_path, aspect_ratio(img_path))) + # sort the list of tuples based on the aspect ratio + sorted_images = sorted(images, key=lambda x: x[1]) + return sorted_images + +def create_groups(sorted_images, n_groups): + """ + Create groups of images from a sorted list of images. + + This function takes a sorted list of images and a group size as input, and returns a list of groups, + where each group contains a specified number of images. + + Parameters: + sorted_images (list of tuples): A list of tuples, where each tuple contains the path to an image and its aspect ratio. + n_groups (int): The number of images to include in each group. + + Returns: + list of lists: A list of groups, where each group is a list of tuples representing the images in the group. + + Raises: + ValueError: If the group size is not a positive integer or if the group size is greater than the number of images. + """ + if not isinstance(n_groups, int) or n_groups <= 0: + raise ValueError("Error: n_groups must be a positive integer.") + if n_groups > len(sorted_images): + raise ValueError("Error: n_groups must be less than or equal to the number of images.") + n = len(sorted_images) + size = n // n_groups + groups = [sorted_images[i * size : (i + 1) * size] for i in range(n_groups - 1)] + groups.append(sorted_images[(n_groups - 1) * size:]) + return groups + +def average_aspect_ratio(group): + """ + Calculate the average aspect ratio for a given group of images. + + Parameters: + group (list of tuples):, A list of tuples, where each tuple contains the path to an image and its aspect ratio. + + Returns: + float: The average aspect ratio of the images in the group. + """ + if not group: + print("Error: The group is empty") + return None + + try: + aspect_ratios = [aspect_ratio for _, aspect_ratio in group] + avg_aspect_ratio = sum(aspect_ratios) / len(aspect_ratios) + print(f"Average aspect ratio for group: {avg_aspect_ratio}") + return avg_aspect_ratio + except TypeError: + print("Error: Check the structure of the input group elements. They should be tuples of (image_path, aspect_ratio).") + return None + except Exception as e: + print(f"Error: {e}") + return None + +def center_crop_image(image, target_aspect_ratio): + """Crop the input image to the target aspect ratio. + + The function calculates the crop region for the input image based on its current aspect ratio and the target aspect ratio. + + Args: + image: A numpy array representing the input image. + target_aspect_ratio: A float representing the target aspect ratio. + + Returns: + A numpy array representing the cropped image. + + Raises: + ValueError: If the input image is not a valid numpy array with at least two dimensions or if the calculated new width or height is zero. + + """ + # Check if the input image is a valid numpy array with at least two dimensions + if not isinstance(image, np.ndarray) or image.ndim < 2: + raise ValueError("Input image must be a valid numpy array with at least two dimensions.") + + height, width = image.shape[:2] + current_aspect_ratio = float(width) / float(height) + + # If the current aspect ratio is already equal to the target aspect ratio, return the image as is + if current_aspect_ratio == target_aspect_ratio: + return image + + # Calculate the new width and height based on the target aspect ratio + if current_aspect_ratio > target_aspect_ratio: + new_width = int(target_aspect_ratio * height) + if new_width == 0: + raise ValueError("Calculated new width is zero. Please check the input image and target aspect ratio.") + x_start = (width - new_width) // 2 + cropped_image = image[:, x_start:x_start+new_width] + else: + new_height = int(width / target_aspect_ratio) + if new_height == 0: + raise ValueError("Calculated new height is zero. Please check the input image and target aspect ratio.") + y_start = (height - new_height) // 2 + cropped_image = image[y_start:y_start+new_height, :] + + return cropped_image + +def copy_related_files(img_path, save_path): + """ + Copy all files in the same directory as the input image that have the same base name as the input image to the + output directory with the corresponding new filename. + + Args: + img_path (str): Path to the input image file. + save_path: Path to the output directory where the files should be copied with a new name. + """ + # Get the base filename and directory + img_dir, img_basename = os.path.split(img_path) + img_base, img_ext = os.path.splitext(img_basename) + + save_dir, save_basename = os.path.split(save_path) + save_base, save_ext = os.path.splitext(save_basename) + + # Create the output directory if it does not exist + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + # Loop over all files in the same directory as the input image + try: + for filename in os.listdir(img_dir): + # Skip files with the same name as the input image + if filename == img_basename: + continue + + # Check if the file has the same base name as the input image + file_base, file_ext = os.path.splitext(filename) + if file_base == img_base: + # Build the new filename and copy the file + new_filename = os.path.join(save_dir, f"{save_base}{file_ext}") + shutil.copy2(os.path.join(img_dir, filename), new_filename) + except OSError as e: + print(f"Error: {e}") # Handle errors from os.listdir() + +def save_resized_cropped_images(group, folder_name, group_number, avg_aspect_ratio, use_original_name=False): + """Crop and resize all images in the input group to the smallest resolution, and save them to a folder. + + Args: + group: A list of tuples, where each tuple contains the path to an image and its aspect ratio. + folder_name: A string representing the name of the folder to save the images to. + group_number: An integer representing the group number. + avg_aspect_ratio: A float representing the average aspect ratio of the images in the group. + use_original_name: A boolean indicating whether to save the images with their original file names. + + """ + if not os.path.exists(folder_name): + os.makedirs(folder_name) + + # get the smallest size of the images + smallest_res = float("inf") + for img_path, _ in group: + image = cv2.imread(img_path) + cropped_image = center_crop_image(image, avg_aspect_ratio) + height, width = cropped_image.shape[:2] + image_res = height * width + if image_res < smallest_res: + smallest_res = image_res + small_height, small_width = height, width + + # resize all images to the smallest resolution of the images in the group + for i, (img_path, aspect_ratio) in enumerate(group): + image = cv2.imread(img_path) + cropped_image = center_crop_image(image, avg_aspect_ratio) + # resized_image = cv2.resize(cropped_image, (small_width, small_height)) + if use_original_name: + save_name = os.path.basename(img_path) + else: + save_name = f"group_{group_number}_{i}.jpg" + save_path = os.path.join(folder_name, save_name) + cv2.imwrite(save_path, cropped_image) + + # Copy matching files named the same as img_path to + copy_related_files(img_path, save_path) + + print(f"Saved {save_name} to {folder_name}") + + +def main(): + parser = argparse.ArgumentParser(description='Sort images and crop them based on aspect ratio') + parser.add_argument('input_dir', type=str, help='Path to the directory containing images') + parser.add_argument('output_dir', type=str, help='Path to the directory to save the cropped images') + parser.add_argument('batch_size', type=int, help='Size of the batches to create') + parser.add_argument('--use_original_name', action='store_true', help='Whether to use original file names for the saved images') + + args = parser.parse_args() + + print(f"Sorting images by aspect ratio in {args.input_dir}...") + if not os.path.exists(args.input_dir): + print(f"Error: Input directory does not exist: {args.input_dir}") + return + + if not os.path.exists(args.output_dir): + try: + os.makedirs(args.output_dir) + except OSError: + print(f"Error: Failed to create output directory: {args.output_dir}") + return + + sorted_images = sort_images_by_aspect_ratio(args.input_dir) + total_images = len(sorted_images) + print(f'Total images: {total_images}') + + if args.batch_size <= 0: + print("Error: Batch size must be greater than 0") + return + + group_size = total_images // args.batch_size + + print(f'Train batch size: {args.batch_size}, image group size: {group_size}') + remainder = total_images % args.batch_size + + if remainder != 0: + print(f'Dropping {remainder} images that do not fit in groups...') + sorted_images = sorted_images[:-remainder] + total_images = len(sorted_images) + group_size = total_images // args.batch_size + + print('Creating groups...') + groups = create_groups(sorted_images, group_size) + print(f"Created {len(groups)} groups") + + print('Saving cropped and resize images...') + for i, group in enumerate(groups): + avg_aspect_ratio = average_aspect_ratio(group) + print(f"Processing group {i+1} with {len(group)} images...") + try: + save_resized_cropped_images(group, args.output_dir, i+1, avg_aspect_ratio, args.use_original_name) + except Exception as e: + print(f"Error: Failed to save images in group {i+1}: {e}") + + print('Done') + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/tools/extract_locon.py b/tools/extract_locon.py new file mode 100644 index 0000000000000000000000000000000000000000..2b2fb2de60d15b707829844c9294386cb334b929 --- /dev/null +++ b/tools/extract_locon.py @@ -0,0 +1,190 @@ +import os, sys + +sys.path.insert(0, os.getcwd()) +import argparse + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "base_model", + help="The model which use it to train the dreambooth model", + default="", + type=str, + ) + parser.add_argument( + "db_model", + help="the dreambooth model you want to extract the locon", + default="", + type=str, + ) + parser.add_argument( + "output_name", help="the output model", default="./out.pt", type=str + ) + parser.add_argument( + "--is_v2", + help="Your base/db model is sd v2 or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--is_sdxl", + help="Your base/db model is sdxl or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--device", + help="Which device you want to use to extract the locon", + default="cpu", + type=str, + ) + parser.add_argument( + "--mode", + help=( + 'extraction mode, can be "full", "fixed", "threshold", "ratio", "quantile". ' + 'If not "fixed", network_dim and conv_dim will be ignored' + ), + default="fixed", + type=str, + ) + parser.add_argument( + "--safetensors", + help="use safetensors to save locon model", + default=False, + action="store_true", + ) + parser.add_argument( + "--linear_dim", + help="network dim for linear layer in fixed mode", + default=1, + type=int, + ) + parser.add_argument( + "--conv_dim", + help="network dim for conv layer in fixed mode", + default=1, + type=int, + ) + parser.add_argument( + "--linear_threshold", + help="singular value threshold for linear layer in threshold mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--conv_threshold", + help="singular value threshold for conv layer in threshold mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--linear_ratio", + help="singular ratio for linear layer in ratio mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--conv_ratio", + help="singular ratio for conv layer in ratio mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--linear_quantile", + help="singular value quantile for linear layer quantile mode", + default=1.0, + type=float, + ) + parser.add_argument( + "--conv_quantile", + help="singular value quantile for conv layer quantile mode", + default=1.0, + type=float, + ) + parser.add_argument( + "--use_sparse_bias", + help="enable sparse bias", + default=False, + action="store_true", + ) + parser.add_argument( + "--sparsity", help="sparsity for sparse bias", default=0.98, type=float + ) + parser.add_argument( + "--disable_cp", + help="don't use cp decomposition", + default=False, + action="store_true", + ) + return parser.parse_args() + + +ARGS = get_args() + + +from lycoris.utils import extract_diff +from lycoris.kohya.model_utils import load_models_from_stable_diffusion_checkpoint +from lycoris.kohya.sdxl_model_util import load_models_from_sdxl_checkpoint + +import torch +from safetensors.torch import save_file + + +def main(): + args = ARGS + if args.is_sdxl: + base = load_models_from_sdxl_checkpoint(None, args.base_model, args.device) + db = load_models_from_sdxl_checkpoint(None, args.db_model, args.device) + else: + base = load_models_from_stable_diffusion_checkpoint(args.is_v2, args.base_model) + db = load_models_from_stable_diffusion_checkpoint(args.is_v2, args.db_model) + + linear_mode_param = { + "fixed": args.linear_dim, + "threshold": args.linear_threshold, + "ratio": args.linear_ratio, + "quantile": args.linear_quantile, + "full": None, + }[args.mode] + conv_mode_param = { + "fixed": args.conv_dim, + "threshold": args.conv_threshold, + "ratio": args.conv_ratio, + "quantile": args.conv_quantile, + "full": None, + }[args.mode] + + if args.is_sdxl: + db_tes = [db[0], db[1]] + db_unet = db[3] + base_tes = [base[0], base[1]] + base_unet = base[3] + else: + db_tes = [db[0]] + db_unet = db[2] + base_tes = [base[0]] + base_unet = base[2] + + state_dict = extract_diff( + base_tes, + db_tes, + base_unet, + db_unet, + args.mode, + linear_mode_param, + conv_mode_param, + args.device, + args.use_sparse_bias, + args.sparsity, + not args.disable_cp, + ) + + if args.safetensors: + save_file(state_dict, args.output_name) + else: + torch.save(state_dict, args.output_name) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/gradio_theme_builder.py b/tools/gradio_theme_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..d20c47f6111fea13c95c8de718d9003fe539b357 --- /dev/null +++ b/tools/gradio_theme_builder.py @@ -0,0 +1,2 @@ +import gradio as gr +gr.themes.builder() diff --git a/tools/group_images.py b/tools/group_images.py new file mode 100644 index 0000000000000000000000000000000000000000..4f02535adcebd307292c138653d2045246b53a18 --- /dev/null +++ b/tools/group_images.py @@ -0,0 +1,185 @@ +import argparse +import shutil +from PIL import Image, ImageOps +import os +import numpy as np + +from library.utils import setup_logging +import logging + +# Set up logging +setup_logging() +log = logging.getLogger(__name__) + +class ImageProcessor: + + def __init__(self, input_folder, output_folder, group_size, include_subfolders, do_not_copy_other_files, pad, caption, caption_ext): + self.input_folder = input_folder + self.output_folder = output_folder + self.group_size = group_size + self.include_subfolders = include_subfolders + self.do_not_copy_other_files = do_not_copy_other_files + self.pad = pad + self.caption = caption + self.caption_ext = caption_ext + self.image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp', '.tiff') + + def get_image_paths(self): + images = [] + if self.include_subfolders: + for dirpath, dirnames, filenames in os.walk(self.input_folder): + for filename in filenames: + if filename.endswith(self.image_extensions): + images.append(os.path.join(dirpath, filename)) + else: + images = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith(self.image_extensions)] + return images + + def group_images(self, images): + sorted_images = sorted(images, key=lambda path: Image.open(path).size[0] / Image.open(path).size[1]) + groups = [sorted_images[i:i+self.group_size] for i in range(0, len(sorted_images), self.group_size)] + return groups + + def process_group(self, group, group_index): + if len(group) > 0: + aspect_ratios = self.get_aspect_ratios(group) + avg_aspect_ratio = np.mean(aspect_ratios) + if self.pad: + padded_images = self.pad_images(group, avg_aspect_ratio) + self.resize_and_save_images(padded_images, group_index, group) + else: + cropped_images = self.crop_images(group, avg_aspect_ratio) + self.resize_and_save_images(cropped_images, group_index, group) + if not self.do_not_copy_other_files: + self.copy_other_files(group, group_index) + + def get_aspect_ratios(self, group): + aspect_ratios = [] + for path in group: + with Image.open(path) as img: + width, height = img.size + aspect_ratios.append(width / height) + return aspect_ratios + + def crop_images(self, group, avg_aspect_ratio): + cropped_images = [] + for j, path in enumerate(group): + with Image.open(path) as img: + log.info(f" Processing image {j+1}: {path}") + img = self.crop_image(img, avg_aspect_ratio) + cropped_images.append(img) + return cropped_images + + def crop_image(self, img, avg_aspect_ratio): + img_aspect_ratio = img.width / img.height + if img_aspect_ratio > avg_aspect_ratio: + # Too wide, reduce width + new_width = avg_aspect_ratio * img.height + left = (img.width - new_width) / 2 + right = left + new_width + img = img.crop((left, 0, right, img.height)) + else: + # Too tall, reduce height + new_height = img.width / avg_aspect_ratio + top = (img.height - new_height) / 2 + bottom = top + new_height + img = img.crop((0, top, img.width, bottom)) + return img + + def resize_and_save_images(self, cropped_images, group_index, source_paths): + max_width = max(img.width for img in cropped_images) + max_height = max(img.height for img in cropped_images) + for j, img in enumerate(cropped_images): + img = img.resize((max_width, max_height)) + os.makedirs(self.output_folder, exist_ok=True) + original_filename = os.path.basename(source_paths[j]) + filename_without_ext = os.path.splitext(original_filename)[0] + final_file_name = f"group-{group_index+1}-{j+1}-{filename_without_ext}" + output_path = os.path.join(self.output_folder, f"{final_file_name}.jpg") + log.info(f" Saving processed image to {output_path}") + img.convert('RGB').save(output_path, quality=70) + + if self.caption: + self.create_caption_file(source_paths[j], group_index, final_file_name) + + def create_caption_file(self, source_path, group_index, caption_filename): + dirpath = os.path.dirname(source_path) + caption = os.path.basename(dirpath).split('_')[-1] + caption_filename = caption_filename + self.caption_ext + caption_path = os.path.join(self.output_folder, caption_filename) + with open(caption_path, 'w') as f: + f.write(caption) + + + def copy_other_files(self, group, group_index): + for j, path in enumerate(group): + dirpath, original_filename = os.path.split(path) + original_basename, original_ext = os.path.splitext(original_filename) + for filename in os.listdir(dirpath): + if filename.endswith('.npz'): # Skip .npz + continue + basename, ext = os.path.splitext(filename) + if basename == original_basename and ext != original_ext: + shutil.copy2(os.path.join(dirpath, filename), os.path.join(self.output_folder, f"group-{group_index+1}-{j+1}-{filename}")) + + def process_images(self): + images = self.get_image_paths() + groups = self.group_images(images) + for i, group in enumerate(groups): + log.info(f"Processing group {i+1} with {len(group)} images...") + self.process_group(group, i) + + def process_group(self, group, group_index): + if len(group) > 0: + aspect_ratios = self.get_aspect_ratios(group) + avg_aspect_ratio = np.mean(aspect_ratios) + if self.pad: + padded_images = self.pad_images(group, avg_aspect_ratio) + self.resize_and_save_images(padded_images, group_index, group) + else: + cropped_images = self.crop_images(group, avg_aspect_ratio) + self.resize_and_save_images(cropped_images, group_index, group) + if not self.do_not_copy_other_files: + self.copy_other_files(group, group_index) + + def pad_images(self, group, avg_aspect_ratio): + padded_images = [] + for j, path in enumerate(group): + with Image.open(path) as img: + log.info(f" Processing image {j+1}: {path}") + img = self.pad_image(img, avg_aspect_ratio) + padded_images.append(img) + return padded_images + + def pad_image(self, img, avg_aspect_ratio): + img_aspect_ratio = img.width / img.height + if img_aspect_ratio < avg_aspect_ratio: + # Too tall, increase width + new_width = avg_aspect_ratio * img.height + pad_width = int((new_width - img.width) / 2) + img = ImageOps.expand(img, border=(pad_width, 0), fill='black') + else: + # Too wide, increase height + new_height = img.width / avg_aspect_ratio + pad_height = int((new_height - img.height) / 2) + img = ImageOps.expand(img, border=(0, pad_height), fill='black') + return img + +def main(): + parser = argparse.ArgumentParser(description='Process groups of images.') + parser.add_argument('input_folder', type=str, help='Input folder containing images') + parser.add_argument('output_folder', type=str, help='Output folder to store processed images') + parser.add_argument('group_size', type=int, help='Number of images in each group') + parser.add_argument('--include_subfolders', action='store_true', help='Include subfolders in search for images') + parser.add_argument('--do_not_copy_other_files', '--no_copy', dest='do_not_copy_other_files', action='store_true', help='Do not copy other files with the same name as images') + parser.add_argument('--pad', action='store_true', help='Pad images instead of cropping them') + parser.add_argument('--caption', action='store_true', help='Create a caption file for each image') + parser.add_argument('--caption_ext', type=str, default='.txt', help='Extension for the caption file') + + args = parser.parse_args() + + processor = ImageProcessor(args.input_folder, args.output_folder, args.group_size, args.include_subfolders, args.do_not_copy_other_files, args.pad, args.caption, args.caption_ext) + processor.process_images() + +if __name__ == "__main__": + main() diff --git a/tools/group_images_recommended_size.py b/tools/group_images_recommended_size.py new file mode 100644 index 0000000000000000000000000000000000000000..07c07f19b84d526cbc6115c205880878e4c7d04c --- /dev/null +++ b/tools/group_images_recommended_size.py @@ -0,0 +1,128 @@ +import argparse +from PIL import Image +import os +import numpy as np +import itertools + +class ImageProcessor: + + def __init__(self, input_folder, min_group, max_group, include_subfolders, pad): + self.input_folder = input_folder + self.min_group = min_group + self.max_group = max_group + self.include_subfolders = include_subfolders + self.pad = pad + self.image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.webp') + self.losses = [] # List to store loss values for each image + + def get_image_paths(self): + images = [] + if self.include_subfolders: + for dirpath, dirnames, filenames in os.walk(self.input_folder): + for filename in filenames: + if filename.endswith(self.image_extensions): + images.append(os.path.join(dirpath, filename)) + else: + images = [os.path.join(self.input_folder, f) for f in os.listdir(self.input_folder) if f.endswith(self.image_extensions)] + return images + + def group_images(self, images, group_size): + sorted_images = sorted(images, key=lambda path: Image.open(path).size[0] / Image.open(path).size[1]) + groups = [sorted_images[i:i+group_size] for i in range(0, len(sorted_images), group_size)] + return groups + + def process_group(self, group): + if len(group) > 0: + aspect_ratios = self.get_aspect_ratios(group) + avg_aspect_ratio = np.mean(aspect_ratios) + self.calculate_losses(group, avg_aspect_ratio) + + def get_aspect_ratios(self, group): + aspect_ratios = [] + for path in group: + with Image.open(path) as img: + width, height = img.size + aspect_ratios.append(width / height) + return aspect_ratios + + def calculate_losses(self, group, avg_aspect_ratio): + for j, path in enumerate(group): + with Image.open(path) as img: + loss = self.calculate_loss(img, avg_aspect_ratio) + self.losses.append((path, loss)) # Add (path, loss) tuple to the list + + def calculate_loss(self, img, avg_aspect_ratio): + img_aspect_ratio = img.width / img.height + if img_aspect_ratio > avg_aspect_ratio: + # Too wide, reduce width + new_width = avg_aspect_ratio * img.height + loss = abs(img.width - new_width) / img.width # Calculate loss value + else: + # Too tall, reduce height + new_height = img.width / avg_aspect_ratio + loss = abs(img.height - new_height) / img.height # Calculate loss value + return loss + + def monte_carlo_optimization(self, groups): + best_groups = groups.copy() + best_loss = np.inf + best_removed_images = [] + + for group in groups: + num_images = len(group) + all_combinations = [] + # Generate all possible combinations of images to remove + for r in range(1, num_images + 1): + combinations = list(itertools.combinations(group, r)) + all_combinations.extend(combinations) + + for combination in all_combinations: + self.losses = [] # Reset losses for each combination + remaining_images = list(set(group) - set(combination)) + self.process_group(remaining_images) + avg_loss = np.mean(self.losses) + + if avg_loss < best_loss: + best_loss = avg_loss + best_groups[best_groups.index(group)] = remaining_images + best_removed_images = combination + + return best_groups, best_loss, best_removed_images + + def process_images(self): + images = self.get_image_paths() + num_images = len(images) + results = [] + + for group_size in range(self.min_group, self.max_group + 1): + groups = self.group_images(images, group_size) + optimized_groups, avg_loss, removed_images = self.monte_carlo_optimization(groups) + num_remaining = num_images % group_size + + results.append((group_size, avg_loss, num_remaining, optimized_groups, removed_images)) + + # Sort results based on average crop loss in ascending order + sorted_results = sorted(results, key=lambda x: x[1]) + + for group_size, avg_loss, num_remaining, optimized_groups, removed_images in sorted_results: + print(f"Group size: {group_size}, Average crop loss: {avg_loss}, Number of images remaining: {num_remaining}") + print(f"Optimized Groups: {optimized_groups}") + print(f"Removed Images: {removed_images}") + + +def main(): + parser = argparse.ArgumentParser(description='Process groups of images.') + parser.add_argument('input_folder', type=str, help='Input folder containing images') + parser.add_argument('min_group', type=int, help='Minimum group size') + parser.add_argument('max_group', type=int, help='Maximum group size') + parser.add_argument('--include_subfolders', action='store_true', help='Include subfolders in search for images') + parser.add_argument('--pad', action='store_true', help='Pad images instead of cropping them') + + args = parser.parse_args() + + processor = ImageProcessor(args.input_folder, args.min_group, args.max_group, args.include_subfolders, args.pad) + processor.process_images() + + +if __name__ == "__main__": + main() diff --git a/tools/lcm_convert.py b/tools/lcm_convert.py new file mode 100644 index 0000000000000000000000000000000000000000..1772bf3811d1cf10840579e77998b6cb1caaf81a --- /dev/null +++ b/tools/lcm_convert.py @@ -0,0 +1,76 @@ +import argparse +import torch +import logging +from library.utils import setup_logging +from diffusers import StableDiffusionPipeline, StableDiffusionXLPipeline, LCMScheduler +from library.sdxl_model_util import convert_diffusers_unet_state_dict_to_sdxl, sdxl_original_unet, save_stable_diffusion_checkpoint, _load_state_dict_on_device as load_state_dict_on_device +from accelerate import init_empty_weights + +# Initialize logging +setup_logging() +logger = logging.getLogger(__name__) + + +def parse_command_line_arguments(): + argument_parser = argparse.ArgumentParser("lcm_convert") + argument_parser.add_argument("--name", help="Name of the new LCM model", required=True, type=str) + argument_parser.add_argument("--model", help="A model to convert", required=True, type=str) + argument_parser.add_argument("--lora-scale", default=1.0, help="Strength of the LCM", type=float) + argument_parser.add_argument("--sdxl", action="store_true", help="Use SDXL models") + argument_parser.add_argument("--ssd-1b", action="store_true", help="Use SSD-1B models") + return argument_parser.parse_args() + +def load_diffusion_pipeline(command_line_args): + if command_line_args.sdxl or command_line_args.ssd_1b: + return StableDiffusionXLPipeline.from_single_file(command_line_args.model) + else: + return StableDiffusionPipeline.from_single_file(command_line_args.model) + +def convert_and_save_diffusion_model(diffusion_pipeline, command_line_args): + diffusion_pipeline.scheduler = LCMScheduler.from_config(diffusion_pipeline.scheduler.config) + lora_weight_file_path = "latent-consistency/lcm-lora-" + ("sdxl" if command_line_args.sdxl else "ssd-1b" if command_line_args.ssd_1b else "sdv1-5") + diffusion_pipeline.load_lora_weights(lora_weight_file_path) + diffusion_pipeline.fuse_lora(lora_scale=command_line_args.lora_scale) + + diffusion_pipeline = diffusion_pipeline.to(dtype=torch.float16) + logger.info("Saving file...") + + text_encoder_primary = diffusion_pipeline.text_encoder + text_encoder_secondary = diffusion_pipeline.text_encoder_2 + variational_autoencoder = diffusion_pipeline.vae + unet_network = diffusion_pipeline.unet + + del diffusion_pipeline + + state_dict = convert_diffusers_unet_state_dict_to_sdxl(unet_network.state_dict()) + with init_empty_weights(): + unet_network = sdxl_original_unet.SdxlUNet2DConditionModel() + + load_state_dict_on_device(unet_network, state_dict, device="cuda", dtype=torch.float16) + + save_stable_diffusion_checkpoint( + command_line_args.name, + text_encoder_primary, + text_encoder_secondary, + unet_network, + None, + None, + None, + variational_autoencoder, + None, + None, + torch.float16, + ) + + logger.info("...done saving") + +def main(): + command_line_args = parse_command_line_arguments() + try: + diffusion_pipeline = load_diffusion_pipeline(command_line_args) + convert_and_save_diffusion_model(diffusion_pipeline, command_line_args) + except Exception as error: + logger.error(f"An error occurred: {error}") + +if __name__ == "__main__": + main() diff --git a/tools/lycoris_locon_extract.py b/tools/lycoris_locon_extract.py new file mode 100644 index 0000000000000000000000000000000000000000..a55e0275e7b02a63d2ec8d724276517b36c3f8fe --- /dev/null +++ b/tools/lycoris_locon_extract.py @@ -0,0 +1,190 @@ +import os, sys + +sys.path.insert(0, os.getcwd()) +import argparse + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "base_model", + help="The model which use it to train the dreambooth model", + default="", + type=str, + ) + parser.add_argument( + "db_model", + help="the dreambooth model you want to extract the locon", + default="", + type=str, + ) + parser.add_argument( + "output_name", help="the output model", default="./out.pt", type=str + ) + parser.add_argument( + "--is_v2", + help="Your base/db model is sd v2 or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--is_sdxl", + help="Your base/db model is sdxl or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--device", + help="Which device you want to use to extract the locon", + default="cpu", + type=str, + ) + parser.add_argument( + "--mode", + help=( + 'extraction mode, can be "full", "fixed", "threshold", "ratio", "quantile". ' + 'If not "fixed", network_dim and conv_dim will be ignored' + ), + default="fixed", + type=str, + ) + parser.add_argument( + "--safetensors", + help="use safetensors to save locon model", + default=False, + action="store_true", + ) + parser.add_argument( + "--linear_dim", + help="network dim for linear layer in fixed mode", + default=1, + type=int, + ) + parser.add_argument( + "--conv_dim", + help="network dim for conv layer in fixed mode", + default=1, + type=int, + ) + parser.add_argument( + "--linear_threshold", + help="singular value threshold for linear layer in threshold mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--conv_threshold", + help="singular value threshold for conv layer in threshold mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--linear_ratio", + help="singular ratio for linear layer in ratio mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--conv_ratio", + help="singular ratio for conv layer in ratio mode", + default=0.0, + type=float, + ) + parser.add_argument( + "--linear_quantile", + help="singular value quantile for linear layer quantile mode", + default=1.0, + type=float, + ) + parser.add_argument( + "--conv_quantile", + help="singular value quantile for conv layer quantile mode", + default=1.0, + type=float, + ) + parser.add_argument( + "--use_sparse_bias", + help="enable sparse bias", + default=False, + action="store_true", + ) + parser.add_argument( + "--sparsity", help="sparsity for sparse bias", default=0.98, type=float + ) + parser.add_argument( + "--disable_cp", + help="don't use cp decomposition", + default=False, + action="store_true", + ) + return parser.parse_args() + + +ARGS = get_args() + + +from lycoris.utils import extract_diff +from lycoris.kohya.model_utils import load_models_from_stable_diffusion_checkpoint +from lycoris.kohya.sdxl_model_util import load_models_from_sdxl_checkpoint + +import torch +from safetensors.torch import save_file + + +def main(): + args = ARGS + if args.is_sdxl: + base = load_models_from_sdxl_checkpoint(None, args.base_model, "cpu") + db = load_models_from_sdxl_checkpoint(None, args.db_model, "cpu") + else: + base = load_models_from_stable_diffusion_checkpoint(args.is_v2, args.base_model) + db = load_models_from_stable_diffusion_checkpoint(args.is_v2, args.db_model) + + linear_mode_param = { + "fixed": args.linear_dim, + "threshold": args.linear_threshold, + "ratio": args.linear_ratio, + "quantile": args.linear_quantile, + "full": None, + }[args.mode] + conv_mode_param = { + "fixed": args.conv_dim, + "threshold": args.conv_threshold, + "ratio": args.conv_ratio, + "quantile": args.conv_quantile, + "full": None, + }[args.mode] + + if args.is_sdxl: + db_tes = [db[0], db[1]] + db_unet = db[3] + base_tes = [base[0], base[1]] + base_unet = base[3] + else: + db_tes = [db[0]] + db_unet = db[2] + base_tes = [base[0]] + base_unet = base[2] + + state_dict = extract_diff( + base_tes, + db_tes, + base_unet, + db_unet, + args.mode, + linear_mode_param, + conv_mode_param, + args.device, + args.use_sparse_bias, + args.sparsity, + not args.disable_cp, + ) + + if args.safetensors: + save_file(state_dict, args.output_name) + else: + torch.save(state_dict, args.output_name) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/tools/lycoris_utils.py b/tools/lycoris_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0bab7dd65ab9a63e67e30430a5722e67cd501845 --- /dev/null +++ b/tools/lycoris_utils.py @@ -0,0 +1,504 @@ +from typing import * + +import numpy as np + +import torch +import torch.nn as nn +import torch.nn.functional as F + +import torch.linalg as linalg + +from tqdm import tqdm + + +def make_sparse(t: torch.Tensor, sparsity=0.95): + abs_t = torch.abs(t) + np_array = abs_t.detach().cpu().numpy() + quan = float(np.quantile(np_array, sparsity)) + sparse_t = t.masked_fill(abs_t < quan, 0) + return sparse_t + + +def extract_conv( + weight: Union[torch.Tensor, nn.Parameter], + mode = 'fixed', + mode_param = 0, + device = 'cpu', + is_cp = False, +) -> Tuple[nn.Parameter, nn.Parameter]: + weight = weight.to(device) + out_ch, in_ch, kernel_size, _ = weight.shape + + U, S, Vh = linalg.svd(weight.reshape(out_ch, -1)) + + if mode=='fixed': + lora_rank = mode_param + elif mode=='threshold': + assert mode_param>=0 + lora_rank = torch.sum(S>mode_param) + elif mode=='ratio': + assert 1>=mode_param>=0 + min_s = torch.max(S)*mode_param + lora_rank = torch.sum(S>min_s) + elif mode=='quantile' or mode=='percentile': + assert 1>=mode_param>=0 + s_cum = torch.cumsum(S, dim=0) + min_cum_sum = mode_param * torch.sum(S) + lora_rank = torch.sum(s_cum=out_ch/2 and not is_cp: + return weight, 'full' + + U = U[:, :lora_rank] + S = S[:lora_rank] + U = U @ torch.diag(S) + Vh = Vh[:lora_rank, :] + + diff = (weight - (U @ Vh).reshape(out_ch, in_ch, kernel_size, kernel_size)).detach() + extract_weight_A = Vh.reshape(lora_rank, in_ch, kernel_size, kernel_size).detach() + extract_weight_B = U.reshape(out_ch, lora_rank, 1, 1).detach() + del U, S, Vh, weight + return (extract_weight_A, extract_weight_B, diff), 'low rank' + + +def extract_linear( + weight: Union[torch.Tensor, nn.Parameter], + mode = 'fixed', + mode_param = 0, + device = 'cpu', +) -> Tuple[nn.Parameter, nn.Parameter]: + weight = weight.to(device) + out_ch, in_ch = weight.shape + + U, S, Vh = linalg.svd(weight) + + if mode=='fixed': + lora_rank = mode_param + elif mode=='threshold': + assert mode_param>=0 + lora_rank = torch.sum(S>mode_param) + elif mode=='ratio': + assert 1>=mode_param>=0 + min_s = torch.max(S)*mode_param + lora_rank = torch.sum(S>min_s) + elif mode=='quantile' or mode=='percentile': + assert 1>=mode_param>=0 + s_cum = torch.cumsum(S, dim=0) + min_cum_sum = mode_param * torch.sum(S) + lora_rank = torch.sum(s_cum=out_ch/2: + return weight, 'full' + + U = U[:, :lora_rank] + S = S[:lora_rank] + U = U @ torch.diag(S) + Vh = Vh[:lora_rank, :] + + diff = (weight - U @ Vh).detach() + extract_weight_A = Vh.reshape(lora_rank, in_ch).detach() + extract_weight_B = U.reshape(out_ch, lora_rank).detach() + del U, S, Vh, weight + return (extract_weight_A, extract_weight_B, diff), 'low rank' + + +def extract_diff( + base_model, + db_model, + mode = 'fixed', + linear_mode_param = 0, + conv_mode_param = 0, + extract_device = 'cpu', + use_bias = False, + sparsity = 0.98, + small_conv = True +): + UNET_TARGET_REPLACE_MODULE = [ + "Transformer2DModel", + "Attention", + "ResnetBlock2D", + "Downsample2D", + "Upsample2D" + ] + UNET_TARGET_REPLACE_NAME = [ + "conv_in", + "conv_out", + "time_embedding.linear_1", + "time_embedding.linear_2", + ] + TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"] + LORA_PREFIX_UNET = 'lora_unet' + LORA_PREFIX_TEXT_ENCODER = 'lora_te' + def make_state_dict( + prefix, + root_module: torch.nn.Module, + target_module: torch.nn.Module, + target_replace_modules, + target_replace_names = [] + ): + loras = {} + temp = {} + temp_name = {} + + for name, module in root_module.named_modules(): + if module.__class__.__name__ in target_replace_modules: + temp[name] = {} + for child_name, child_module in module.named_modules(): + if child_module.__class__.__name__ not in {'Linear', 'Conv2d'}: + continue + temp[name][child_name] = child_module.weight + elif name in target_replace_names: + temp_name[name] = module.weight + + for name, module in tqdm(list(target_module.named_modules())): + if name in temp: + weights = temp[name] + for child_name, child_module in module.named_modules(): + lora_name = prefix + '.' + name + '.' + child_name + lora_name = lora_name.replace('.', '_') + layer = child_module.__class__.__name__ + if layer in {'Linear', 'Conv2d'}: + root_weight = child_module.weight + if torch.allclose(root_weight, weights[child_name]): + continue + + if layer == 'Linear': + weight, decompose_mode = extract_linear( + (child_module.weight - weights[child_name]), + mode, + linear_mode_param, + device = extract_device, + ) + if decompose_mode == 'low rank': + extract_a, extract_b, diff = weight + elif layer == 'Conv2d': + is_linear = (child_module.weight.shape[2] == 1 + and child_module.weight.shape[3] == 1) + weight, decompose_mode = extract_conv( + (child_module.weight - weights[child_name]), + mode, + linear_mode_param if is_linear else conv_mode_param, + device = extract_device, + ) + if decompose_mode == 'low rank': + extract_a, extract_b, diff = weight + if small_conv and not is_linear and decompose_mode == 'low rank': + dim = extract_a.size(0) + (extract_c, extract_a, _), _ = extract_conv( + extract_a.transpose(0, 1), + 'fixed', dim, + extract_device, True + ) + extract_a = extract_a.transpose(0, 1) + extract_c = extract_c.transpose(0, 1) + loras[f'{lora_name}.lora_mid.weight'] = extract_c.detach().cpu().contiguous().half() + diff = child_module.weight - torch.einsum( + 'i j k l, j r, p i -> p r k l', + extract_c, extract_a.flatten(1, -1), extract_b.flatten(1, -1) + ).detach().cpu().contiguous() + del extract_c + else: + continue + if decompose_mode == 'low rank': + loras[f'{lora_name}.lora_down.weight'] = extract_a.detach().cpu().contiguous().half() + loras[f'{lora_name}.lora_up.weight'] = extract_b.detach().cpu().contiguous().half() + loras[f'{lora_name}.alpha'] = torch.Tensor([extract_a.shape[0]]).half() + if use_bias: + diff = diff.detach().cpu().reshape(extract_b.size(0), -1) + sparse_diff = make_sparse(diff, sparsity).to_sparse().coalesce() + + indices = sparse_diff.indices().to(torch.int16) + values = sparse_diff.values().half() + loras[f'{lora_name}.bias_indices'] = indices + loras[f'{lora_name}.bias_values'] = values + loras[f'{lora_name}.bias_size'] = torch.tensor(diff.shape).to(torch.int16) + del extract_a, extract_b, diff + elif decompose_mode == 'full': + loras[f'{lora_name}.diff'] = weight.detach().cpu().contiguous().half() + else: + raise NotImplementedError + elif name in temp_name: + weights = temp_name[name] + lora_name = prefix + '.' + name + lora_name = lora_name.replace('.', '_') + layer = module.__class__.__name__ + + if layer in {'Linear', 'Conv2d'}: + root_weight = module.weight + if torch.allclose(root_weight, weights): + continue + + if layer == 'Linear': + weight, decompose_mode = extract_linear( + (root_weight - weights), + mode, + linear_mode_param, + device = extract_device, + ) + if decompose_mode == 'low rank': + extract_a, extract_b, diff = weight + elif layer == 'Conv2d': + is_linear = ( + root_weight.shape[2] == 1 + and root_weight.shape[3] == 1 + ) + weight, decompose_mode = extract_conv( + (root_weight - weights), + mode, + linear_mode_param if is_linear else conv_mode_param, + device = extract_device, + ) + if decompose_mode == 'low rank': + extract_a, extract_b, diff = weight + if small_conv and not is_linear and decompose_mode == 'low rank': + dim = extract_a.size(0) + (extract_c, extract_a, _), _ = extract_conv( + extract_a.transpose(0, 1), + 'fixed', dim, + extract_device, True + ) + extract_a = extract_a.transpose(0, 1) + extract_c = extract_c.transpose(0, 1) + loras[f'{lora_name}.lora_mid.weight'] = extract_c.detach().cpu().contiguous().half() + diff = root_weight - torch.einsum( + 'i j k l, j r, p i -> p r k l', + extract_c, extract_a.flatten(1, -1), extract_b.flatten(1, -1) + ).detach().cpu().contiguous() + del extract_c + else: + continue + if decompose_mode == 'low rank': + loras[f'{lora_name}.lora_down.weight'] = extract_a.detach().cpu().contiguous().half() + loras[f'{lora_name}.lora_up.weight'] = extract_b.detach().cpu().contiguous().half() + loras[f'{lora_name}.alpha'] = torch.Tensor([extract_a.shape[0]]).half() + if use_bias: + diff = diff.detach().cpu().reshape(extract_b.size(0), -1) + sparse_diff = make_sparse(diff, sparsity).to_sparse().coalesce() + + indices = sparse_diff.indices().to(torch.int16) + values = sparse_diff.values().half() + loras[f'{lora_name}.bias_indices'] = indices + loras[f'{lora_name}.bias_values'] = values + loras[f'{lora_name}.bias_size'] = torch.tensor(diff.shape).to(torch.int16) + del extract_a, extract_b, diff + elif decompose_mode == 'full': + loras[f'{lora_name}.diff'] = weight.detach().cpu().contiguous().half() + else: + raise NotImplementedError + return loras + + text_encoder_loras = make_state_dict( + LORA_PREFIX_TEXT_ENCODER, + base_model[0], db_model[0], + TEXT_ENCODER_TARGET_REPLACE_MODULE + ) + + unet_loras = make_state_dict( + LORA_PREFIX_UNET, + base_model[2], db_model[2], + UNET_TARGET_REPLACE_MODULE, + UNET_TARGET_REPLACE_NAME + ) + print(len(text_encoder_loras), len(unet_loras)) + return text_encoder_loras|unet_loras + + +def get_module( + lyco_state_dict: Dict, + lora_name +): + if f'{lora_name}.lora_up.weight' in lyco_state_dict: + up = lyco_state_dict[f'{lora_name}.lora_up.weight'] + down = lyco_state_dict[f'{lora_name}.lora_down.weight'] + mid = lyco_state_dict.get(f'{lora_name}.lora_mid.weight', None) + alpha = lyco_state_dict.get(f'{lora_name}.alpha', None) + return 'locon', (up, down, mid, alpha) + elif f'{lora_name}.hada_w1_a' in lyco_state_dict: + w1a = lyco_state_dict[f'{lora_name}.hada_w1_a'] + w1b = lyco_state_dict[f'{lora_name}.hada_w1_b'] + w2a = lyco_state_dict[f'{lora_name}.hada_w2_a'] + w2b = lyco_state_dict[f'{lora_name}.hada_w2_b'] + t1 = lyco_state_dict.get(f'{lora_name}.hada_t1', None) + t2 = lyco_state_dict.get(f'{lora_name}.hada_t2', None) + alpha = lyco_state_dict.get(f'{lora_name}.alpha', None) + return 'hada', (w1a, w1b, w2a, w2b, t1, t2, alpha) + elif f'{lora_name}.weight' in lyco_state_dict: + weight = lyco_state_dict[f'{lora_name}.weight'] + on_input = lyco_state_dict.get(f'{lora_name}.on_input', False) + return 'ia3', (weight, on_input) + elif (f'{lora_name}.lokr_w1' in lyco_state_dict + or f'{lora_name}.lokr_w1_a' in lyco_state_dict): + w1 = lyco_state_dict.get(f'{lora_name}.lokr_w1', None) + w1a = lyco_state_dict.get(f'{lora_name}.lokr_w1_a', None) + w1b = lyco_state_dict.get(f'{lora_name}.lokr_w1_b', None) + w2 = lyco_state_dict.get(f'{lora_name}.lokr_w2', None) + w2a = lyco_state_dict.get(f'{lora_name}.lokr_w2_a', None) + w2b = lyco_state_dict.get(f'{lora_name}.lokr_w2_b', None) + t1 = lyco_state_dict.get(f'{lora_name}.lokr_t1', None) + t2 = lyco_state_dict.get(f'{lora_name}.lokr_t2', None) + alpha = lyco_state_dict.get(f'{lora_name}.alpha', None) + return 'kron', (w1, w1a, w1b, w2, w2a, w2b, t1, t2, alpha) + elif f'{lora_name}.diff' in lyco_state_dict: + return 'full', lyco_state_dict[f'{lora_name}.diff'] + else: + return 'None', () + + +def cp_weight_from_conv( + up, down, mid +): + up = up.reshape(up.size(0), up.size(1)) + down = down.reshape(down.size(0), down.size(1)) + return torch.einsum('m n w h, i m, n j -> i j w h', mid, up, down) + +def cp_weight( + wa, wb, t +): + temp = torch.einsum('i j k l, j r -> i r k l', t, wb) + return torch.einsum('i j k l, i r -> r j k l', temp, wa) + + +@torch.no_grad() +def rebuild_weight(module_type, params, orig_weight, scale=1): + if orig_weight is None: + return orig_weight + merged = orig_weight + if module_type == 'locon': + up, down, mid, alpha = params + if alpha is not None: + scale *= alpha/up.size(1) + if mid is not None: + rebuild = cp_weight_from_conv(up, down, mid) + else: + rebuild = up.reshape(up.size(0),-1) @ down.reshape(down.size(0), -1) + merged = orig_weight + rebuild.reshape(orig_weight.shape) * scale + del up, down, mid, alpha, params, rebuild + elif module_type == 'hada': + w1a, w1b, w2a, w2b, t1, t2, alpha = params + if alpha is not None: + scale *= alpha / w1b.size(0) + if t1 is not None: + rebuild1 = cp_weight(w1a, w1b, t1) + else: + rebuild1 = w1a @ w1b + if t2 is not None: + rebuild2 = cp_weight(w2a, w2b, t2) + else: + rebuild2 = w2a @ w2b + rebuild = (rebuild1 * rebuild2).reshape(orig_weight.shape) + merged = orig_weight + rebuild * scale + del w1a, w1b, w2a, w2b, t1, t2, alpha, params, rebuild, rebuild1, rebuild2 + elif module_type == 'ia3': + weight, on_input = params + if not on_input: + weight = weight.reshape(-1, 1) + merged = orig_weight + weight * orig_weight * scale + del weight, on_input, params + elif module_type == 'kron': + w1, w1a, w1b, w2, w2a, w2b, t1, t2, alpha = params + if alpha is not None and (w1b is not None or w2b is not None): + scale *= alpha / (w1b.size(0) if w1b else w2b.size(0)) + if w1a is not None and w1b is not None: + if t1: + w1 = cp_weight(w1a, w1b, t1) + else: + w1 = w1a @ w1b + if w2a is not None and w2b is not None: + if t2: + w2 = cp_weight(w2a, w2b, t2) + else: + w2 = w2a @ w2b + rebuild = torch.kron(w1, w2).reshape(orig_weight.shape) + merged = orig_weight + rebuild* scale + del w1, w1a, w1b, w2, w2a, w2b, t1, t2, alpha, params, rebuild + elif module_type == 'full': + rebuild = params.reshape(orig_weight.shape) + merged = orig_weight + rebuild * scale + del params, rebuild + + return merged + + +def merge( + base_model, + lyco_state_dict, + scale: float = 1.0, + device = 'cpu' +): + UNET_TARGET_REPLACE_MODULE = [ + "Transformer2DModel", + "Attention", + "ResnetBlock2D", + "Downsample2D", + "Upsample2D" + ] + UNET_TARGET_REPLACE_NAME = [ + "conv_in", + "conv_out", + "time_embedding.linear_1", + "time_embedding.linear_2", + ] + TEXT_ENCODER_TARGET_REPLACE_MODULE = ["CLIPAttention", "CLIPMLP"] + LORA_PREFIX_UNET = 'lora_unet' + LORA_PREFIX_TEXT_ENCODER = 'lora_te' + merged = 0 + def merge_state_dict( + prefix, + root_module: torch.nn.Module, + lyco_state_dict: Dict[str,torch.Tensor], + target_replace_modules, + target_replace_names = [] + ): + nonlocal merged + for name, module in tqdm(list(root_module.named_modules()), desc=f'Merging {prefix}'): + if module.__class__.__name__ in target_replace_modules: + for child_name, child_module in module.named_modules(): + if child_module.__class__.__name__ not in {'Linear', 'Conv2d'}: + continue + lora_name = prefix + '.' + name + '.' + child_name + lora_name = lora_name.replace('.', '_') + + result = rebuild_weight(*get_module( + lyco_state_dict, lora_name + ), getattr(child_module, 'weight'), scale) + if result is not None: + merged += 1 + child_module.requires_grad_(False) + child_module.weight.copy_(result) + elif name in target_replace_names: + lora_name = prefix + '.' + name + lora_name = lora_name.replace('.', '_') + + result = rebuild_weight(*get_module( + lyco_state_dict, lora_name + ), getattr(module, 'weight'), scale) + if result is not None: + merged += 1 + module.requires_grad_(False) + module.weight.copy_(result) + + if device == 'cpu': + for k, v in tqdm(list(lyco_state_dict.items()), desc='Converting Dtype'): + lyco_state_dict[k] = v.float() + + merge_state_dict( + LORA_PREFIX_TEXT_ENCODER, + base_model[0], + lyco_state_dict, + TEXT_ENCODER_TARGET_REPLACE_MODULE, + UNET_TARGET_REPLACE_NAME + ) + merge_state_dict( + LORA_PREFIX_UNET, + base_model[2], + lyco_state_dict, + UNET_TARGET_REPLACE_MODULE, + UNET_TARGET_REPLACE_NAME + ) + print(f'{merged} Modules been merged') \ No newline at end of file diff --git a/tools/merge_lycoris.py b/tools/merge_lycoris.py new file mode 100644 index 0000000000000000000000000000000000000000..c34b42de8e3b38271ada608cd05eb5b2c5158d8c --- /dev/null +++ b/tools/merge_lycoris.py @@ -0,0 +1,125 @@ +import os, sys + +sys.path.insert(0, os.getcwd()) +import argparse + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "base_model", help="The model you want to merge with loha", default="", type=str + ) + parser.add_argument( + "lycoris_model", + help="the lyco model you want to merge into sd model", + default="", + type=str, + ) + parser.add_argument( + "output_name", help="the output model", default="./out.pt", type=str + ) + parser.add_argument( + "--is_v2", + help="Your base model is sd v2 or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--is_sdxl", + help="Your base/db model is sdxl or not", + default=False, + action="store_true", + ) + parser.add_argument( + "--device", + help="Which device you want to use to merge the weight", + default="cpu", + type=str, + ) + parser.add_argument("--dtype", help="dtype to save", default="float", type=str) + parser.add_argument( + "--weight", help="weight for the lyco model to merge", default="1.0", type=float + ) + return parser.parse_args() + + +args = ARGS = get_args() + + +from lycoris.utils import merge +from lycoris.kohya.model_utils import ( + load_models_from_stable_diffusion_checkpoint, + save_stable_diffusion_checkpoint, + load_file, +) +from lycoris.kohya.sdxl_model_util import ( + load_models_from_sdxl_checkpoint, + save_stable_diffusion_checkpoint as save_sdxl_checkpoint, +) + +import torch + + +@torch.no_grad() +def main(): + if args.is_sdxl: + base = load_models_from_sdxl_checkpoint( + None, args.base_model, map_location=args.device + ) + else: + base = load_models_from_stable_diffusion_checkpoint(args.is_v2, args.base_model) + if ARGS.lycoris_model.rsplit(".", 1)[-1] == "safetensors": + lyco = load_file(ARGS.lycoris_model) + else: + lyco = torch.load(ARGS.lycoris_model) + + dtype_str = ARGS.dtype.replace("fp", "float").replace("bf", "bfloat") + dtype = { + "float": torch.float, + "float16": torch.float16, + "float32": torch.float32, + "float64": torch.float64, + "bfloat": torch.bfloat16, + "bfloat16": torch.bfloat16, + }.get(dtype_str, None) + if dtype is None: + raise ValueError(f'Cannot Find the dtype "{dtype}"') + + if args.is_sdxl: + base_tes = [base[0], base[1]] + base_unet = base[3] + else: + base_tes = [base[0]] + base_unet = base[2] + + merge(base_tes, base_unet, lyco, ARGS.weight, ARGS.device) + + if args.is_sdxl: + save_sdxl_checkpoint( + ARGS.output_name, + base[0].cpu(), + base[1].cpu(), + base[3].cpu(), + 0, + 0, + None, + base[2], + getattr(base[1], "logit_scale", None), + dtype, + ) + else: + save_stable_diffusion_checkpoint( + ARGS.is_v2, + ARGS.output_name, + base[0].cpu(), + base[2].cpu(), + None, + 0, + 0, + dtype, + base[1], + ) + + +if __name__ == "__main__": + main() diff --git a/tools/prepare_presets.py b/tools/prepare_presets.py new file mode 100644 index 0000000000000000000000000000000000000000..16fbdbf3ce7234821f7e383b48956762110c5391 --- /dev/null +++ b/tools/prepare_presets.py @@ -0,0 +1,57 @@ +import json +import argparse +import glob + + +def remove_items_with_keywords(json_file_path): + keywords = [ + "caption_metadata_filename", + "dir", + "image_folder", + "latent_metadata_filename", + "logging_dir", + "model_list", + "output_dir", + "output_name", + "pretrained_model_name_or_path", + "resume", + "save_model_as", + "save_state", + "sample_", + "train_dir", + "wandb_api_key", + ] + + with open(json_file_path) as file: + data = json.load(file) + + for key in list(data.keys()): + for keyword in keywords: + if keyword in key: + del data[key] + break + + sorted_data = {k: data[k] for k in sorted(data)} + + with open(json_file_path, "w") as file: + json.dump(sorted_data, file, indent=4) + + print( + "Items with keywords have been removed from the JSON file and the list has been sorted alphabetically:", + json_file_path, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Remove items from JSON files based on keywords in the keys" + ) + parser.add_argument( + "json_files", type=str, nargs="+", help="Path(s) to the JSON file(s)" + ) + args = parser.parse_args() + + json_files = args.json_files + for file_pattern in json_files: + for json_file_path in glob.glob(file_pattern): + remove_items_with_keywords(json_file_path) diff --git a/tools/prune.py b/tools/prune.py new file mode 100644 index 0000000000000000000000000000000000000000..9d287102c727419c95477d705fa9c903baa573c3 --- /dev/null +++ b/tools/prune.py @@ -0,0 +1,37 @@ +import argparse +import torch +from tqdm import tqdm + +parser = argparse.ArgumentParser(description="Prune a model") +parser.add_argument("model_prune", type=str, help="Path to model to prune") +parser.add_argument("prune_output", type=str, help="Path to pruned ckpt output") +parser.add_argument("--half", action="store_true", help="Save weights in half precision.") +args = parser.parse_args() + +print("Loading model...") +model_prune = torch.load(args.model_prune) +theta_prune = model_prune["state_dict"] +theta = {} + +print("Pruning model...") +for key in tqdm(theta_prune.keys(), desc="Pruning keys"): + if "model" in key: + theta.update({key: theta_prune[key]}) + +del theta_prune + +if args.half: + print("Halving model...") + state_dict = {k: v.half() for k, v in tqdm(theta.items(), desc="Halving weights")} +else: + state_dict = theta + +del theta + +print("Saving pruned model...") + +torch.save({"state_dict": state_dict}, args.prune_output) + +del state_dict + +print("Done pruning!") \ No newline at end of file diff --git a/tools/rename_depth_mask.py b/tools/rename_depth_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..d8de7a3d925de08ae65ac2354154f24db162925d --- /dev/null +++ b/tools/rename_depth_mask.py @@ -0,0 +1,21 @@ +import os +import argparse + +# Define the command line arguments +parser = argparse.ArgumentParser(description='Rename files in a folder') +parser.add_argument('folder', metavar='folder', type=str, help='the folder containing the files to rename') + +# Parse the arguments +args = parser.parse_args() + +# Get the list of files in the folder +files = os.listdir(args.folder) + +# Loop through each file in the folder +for file in files: + # Check if the file has the expected format + if file.endswith('-0000.png'): + # Get the new file name + new_file_name = file[:-9] + '.mask' + # Rename the file + os.rename(os.path.join(args.folder, file), os.path.join(args.folder, new_file_name)) diff --git a/tools/resize_lora.py b/tools/resize_lora.py new file mode 100644 index 0000000000000000000000000000000000000000..7b6269a05ace65324b561919e7437078ac4fab28 --- /dev/null +++ b/tools/resize_lora.py @@ -0,0 +1,339 @@ +# +# File from: https://raw.githubusercontent.com/mgz-dev/sd-scripts/main/networks/resize_lora.py +# + +# Convert LoRA to different rank approximation (should only be used to go to lower rank) +# This code is based off the extract_lora_from_models.py file which is based on https://github.com/cloneofsimo/lora/blob/develop/lora_diffusion/cli_svd.py +# Thanks to cloneofsimo and kohya + +import argparse +import torch +from safetensors.torch import load_file, save_file, safe_open +from tqdm import tqdm +from library import train_util, model_util +import numpy as np + +MIN_SV = 1e-6 + +def load_state_dict(file_name, dtype): + if model_util.is_safetensors(file_name): + sd = load_file(file_name) + with safe_open(file_name, framework="pt") as f: + metadata = f.metadata() + else: + sd = torch.load(file_name, map_location='cpu') + metadata = None + + for key in list(sd.keys()): + if type(sd[key]) == torch.Tensor: + sd[key] = sd[key].to(dtype) + + return sd, metadata + + +def save_to_file(file_name, model, state_dict, dtype, metadata): + if dtype is not None: + for key in list(state_dict.keys()): + if type(state_dict[key]) == torch.Tensor: + state_dict[key] = state_dict[key].to(dtype) + + if model_util.is_safetensors(file_name): + save_file(model, file_name, metadata) + else: + torch.save(model, file_name) + + +def index_sv_cumulative(S, target): + original_sum = float(torch.sum(S)) + cumulative_sums = torch.cumsum(S, dim=0)/original_sum + index = int(torch.searchsorted(cumulative_sums, target)) + 1 + if index >= len(S): + index = len(S) - 1 + + return index + + +def index_sv_fro(S, target): + S_squared = S.pow(2) + s_fro_sq = float(torch.sum(S_squared)) + sum_S_squared = torch.cumsum(S_squared, dim=0)/s_fro_sq + index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 + if index >= len(S): + index = len(S) - 1 + + return index + + +# Modified from Kohaku-blueleaf's extract/merge functions +def extract_conv(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): + out_size, in_size, kernel_size, _ = weight.size() + U, S, Vh = torch.linalg.svd(weight.reshape(out_size, -1).to(device)) + + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) + lora_rank = param_dict["new_rank"] + + U = U[:, :lora_rank] + S = S[:lora_rank] + U = U @ torch.diag(S) + Vh = Vh[:lora_rank, :] + + param_dict["lora_down"] = Vh.reshape(lora_rank, in_size, kernel_size, kernel_size).cpu() + param_dict["lora_up"] = U.reshape(out_size, lora_rank, 1, 1).cpu() + del U, S, Vh, weight + return param_dict + + +def extract_linear(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): + out_size, in_size = weight.size() + + U, S, Vh = torch.linalg.svd(weight.to(device)) + + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) + lora_rank = param_dict["new_rank"] + + U = U[:, :lora_rank] + S = S[:lora_rank] + U = U @ torch.diag(S) + Vh = Vh[:lora_rank, :] + + param_dict["lora_down"] = Vh.reshape(lora_rank, in_size).cpu() + param_dict["lora_up"] = U.reshape(out_size, lora_rank).cpu() + del U, S, Vh, weight + return param_dict + + +def merge_conv(lora_down, lora_up, device): + in_rank, in_size, kernel_size, k_ = lora_down.shape + out_size, out_rank, _, _ = lora_up.shape + assert in_rank == out_rank and kernel_size == k_, f"rank {in_rank} {out_rank} or kernel {kernel_size} {k_} mismatch" + + lora_down = lora_down.to(device) + lora_up = lora_up.to(device) + + merged = lora_up.reshape(out_size, -1) @ lora_down.reshape(in_rank, -1) + weight = merged.reshape(out_size, in_size, kernel_size, kernel_size) + del lora_up, lora_down + return weight + + +def merge_linear(lora_down, lora_up, device): + in_rank, in_size = lora_down.shape + out_size, out_rank = lora_up.shape + assert in_rank == out_rank, f"rank {in_rank} {out_rank} mismatch" + + lora_down = lora_down.to(device) + lora_up = lora_up.to(device) + + weight = lora_up @ lora_down + del lora_up, lora_down + return weight + + +def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): + param_dict = {} + + if dynamic_method=="sv_ratio": + # Calculate new dim and alpha based off ratio + max_sv = S[0] + min_sv = max_sv/dynamic_param + new_rank = max(torch.sum(S > min_sv).item(),1) + new_alpha = float(scale*new_rank) + + elif dynamic_method=="sv_cumulative": + # Calculate new dim and alpha based off cumulative sum + new_rank = index_sv_cumulative(S, dynamic_param) + new_rank = max(new_rank, 1) + new_alpha = float(scale*new_rank) + + elif dynamic_method=="sv_fro": + # Calculate new dim and alpha based off sqrt sum of squares + new_rank = index_sv_fro(S, dynamic_param) + new_rank = min(max(new_rank, 1), len(S)-1) + new_alpha = float(scale*new_rank) + else: + new_rank = rank + new_alpha = float(scale*new_rank) + + + if S[0] <= MIN_SV: # Zero matrix, set dim to 1 + new_rank = 1 + new_alpha = float(scale*new_rank) + elif new_rank > rank: # cap max rank at rank + new_rank = rank + new_alpha = float(scale*new_rank) + + + # Calculate resize info + s_sum = torch.sum(torch.abs(S)) + s_rank = torch.sum(torch.abs(S[:new_rank])) + + S_squared = S.pow(2) + s_fro = torch.sqrt(torch.sum(S_squared)) + s_red_fro = torch.sqrt(torch.sum(S_squared[:new_rank])) + fro_percent = float(s_red_fro/s_fro) + + param_dict["new_rank"] = new_rank + param_dict["new_alpha"] = new_alpha + param_dict["sum_retained"] = (s_rank)/s_sum + param_dict["fro_retained"] = fro_percent + param_dict["max_ratio"] = S[0]/S[new_rank] + + return param_dict + + +def resize_lora_model(lora_sd, new_rank, save_dtype, device, dynamic_method, dynamic_param, verbose): + network_alpha = None + network_dim = None + verbose_str = "\n" + fro_list = [] + + # Extract loaded lora dim and alpha + for key, value in lora_sd.items(): + if network_alpha is None and 'alpha' in key: + network_alpha = value + if network_dim is None and 'lora_down' in key and len(value.size()) == 2: + network_dim = value.size()[0] + if network_alpha is not None and network_dim is not None: + break + if network_alpha is None: + network_alpha = network_dim + + scale = network_alpha/network_dim + + if dynamic_method: + print(f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}") + + lora_down_weight = None + lora_up_weight = None + + o_lora_sd = lora_sd.copy() + block_down_name = None + block_up_name = None + + with torch.no_grad(): + for key, value in tqdm(lora_sd.items()): + if 'lora_down' in key: + block_down_name = key.split(".")[0] + lora_down_weight = value + if 'lora_up' in key: + block_up_name = key.split(".")[0] + lora_up_weight = value + + weights_loaded = (lora_down_weight is not None and lora_up_weight is not None) + + if (block_down_name == block_up_name) and weights_loaded: + + conv2d = (len(lora_down_weight.size()) == 4) + + if conv2d: + full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) + param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + else: + full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) + param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + + if verbose: + max_ratio = param_dict['max_ratio'] + sum_retained = param_dict['sum_retained'] + fro_retained = param_dict['fro_retained'] + if not np.isnan(fro_retained): + fro_list.append(float(fro_retained)) + + verbose_str+=f"{block_down_name:75} | " + verbose_str+=f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + + if verbose and dynamic_method: + verbose_str+=f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" + else: + verbose_str+=f"\n" + + new_alpha = param_dict['new_alpha'] + o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict['new_alpha']).to(save_dtype) + + block_down_name = None + block_up_name = None + lora_down_weight = None + lora_up_weight = None + weights_loaded = False + del param_dict + + if verbose: + print(verbose_str) + + print(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") + print("resizing complete") + return o_lora_sd, network_dim, new_alpha + + +def resize(args): + + def str_to_dtype(p): + if p == 'float': + return torch.float + if p == 'fp16': + return torch.float16 + if p == 'bf16': + return torch.bfloat16 + return None + + if args.dynamic_method and not args.dynamic_param: + raise Exception("If using dynamic_method, then dynamic_param is required") + + merge_dtype = str_to_dtype('float') # matmul method above only seems to work in float32 + save_dtype = str_to_dtype(args.save_precision) + if save_dtype is None: + save_dtype = merge_dtype + + print("loading Model...") + lora_sd, metadata = load_state_dict(args.model, merge_dtype) + + print("Resizing Lora...") + state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) + + # update metadata + if metadata is None: + metadata = {} + + comment = metadata.get("ss_training_comment", "") + + if not args.dynamic_method: + metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" + metadata["ss_network_dim"] = str(args.new_rank) + metadata["ss_network_alpha"] = str(new_alpha) + else: + metadata["ss_training_comment"] = f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" + metadata["ss_network_dim"] = 'Dynamic' + metadata["ss_network_alpha"] = 'Dynamic' + + model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) + metadata["sshs_model_hash"] = model_hash + metadata["sshs_legacy_hash"] = legacy_hash + + print(f"saving model to: {args.save_to}") + save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument("--save_precision", type=str, default=None, + choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") + parser.add_argument("--new_rank", type=int, default=4, + help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") + parser.add_argument("--save_to", type=str, default=None, + help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") + parser.add_argument("--model", type=str, default=None, + help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") + parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") + parser.add_argument("--verbose", action="store_true", + help="Display verbose resizing information / rank変更時の詳細情報を出力する") + parser.add_argument("--dynamic_method", type=str, default=None, choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], + help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank") + parser.add_argument("--dynamic_param", type=float, default=None, + help="Specify target for dynamic reduction") + + + args = parser.parse_args() + resize(args) \ No newline at end of file