Adapted code from https://github.com/Linaqruf/kohya-trainer

# I. Install Kohya Trainer

In [1]:
# @title ## 1.1. Install Dependencies
# @markdown Clone Kohya Trainer from GitHub and check for updates. Use textbox below if you want to checkout other branch or old commit. Leave it empty to stay the HEAD on main.  This will also install the required libraries.
import os
import zipfile
import shutil
import time
from subprocess import getoutput
from IPython.utils import capture
from google.colab import drive

%store -r

# root_dir
root_dir = "/content"
deps_dir = os.path.join(root_dir, "deps")
repo_dir = os.path.join(root_dir, "kohya-trainer")
training_dir = os.path.join(root_dir, "fine_tune")
pretrained_model = os.path.join(root_dir, "pretrained_model")
vae_dir = os.path.join(root_dir, "vae")
config_dir = os.path.join(training_dir, "config")

# repo_dir
accelerate_config = os.path.join(repo_dir, "accelerate_config/config.yaml")
tools_dir = os.path.join(repo_dir, "tools")
finetune_dir = os.path.join(repo_dir, "finetune")

for store in [
    "root_dir",
    "deps_dir",
    "repo_dir",
    "training_dir",
    "pretrained_model",
    "vae_dir",
    "accelerate_config",
    "tools_dir",
    "finetune_dir",
    "config_dir",
]:
    with capture.capture_output() as cap:
        %store {store}
        del cap

repo_url = "https://github.com/Linaqruf/kohya-trainer"
bitsandytes_main_py = "/usr/local/lib/python3.10/dist-packages/bitsandbytes/cuda_setup/main.py"
branch = ""  # @param {type: "string"}
mount_drive = True  # @param {type: "boolean"}
verbose = False # @param {type: "boolean"}

def read_file(filename):
    with open(filename, "r") as f:
        contents = f.read()
    return contents


def write_file(filename, contents):
    with open(filename, "w") as f:
        f.write(contents)


def clone_repo(url):
    if not os.path.exists(repo_dir):
        os.chdir(root_dir)
        !git clone {url} {repo_dir}
    else:
        os.chdir(repo_dir)
        !git pull origin {branch} if branch else !git pull

def install_dependencies():
    s = getoutput('nvidia-smi')

    if 'T4' in s:
        !sed -i "s@cpu@cuda@" library/model_util.py

    !pip install {'-q' if not verbose else ''} --upgrade -r requirements.txt

    from accelerate.utils import write_basic_config

    if not os.path.exists(accelerate_config):
        write_basic_config(save_location=accelerate_config)


def remove_bitsandbytes_message(filename):
    welcome_message = """
def evaluate_cuda_setup():
    print('')
    print('='*35 + 'BUG REPORT' + '='*35)
    print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
    print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
    print('='*80)"""

    new_welcome_message = """
def evaluate_cuda_setup():
    import os
    if 'BITSANDBYTES_NOWELCOME' not in os.environ or str(os.environ['BITSANDBYTES_NOWELCOME']) == '0':
        print('')
        print('=' * 35 + 'BUG REPORT' + '=' * 35)
        print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues')
        print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link')
        print('To hide this message, set the BITSANDBYTES_NOWELCOME variable like so: export BITSANDBYTES_NOWELCOME=1')
        print('=' * 80)"""

    contents = read_file(filename)
    new_contents = contents.replace(welcome_message, new_welcome_message)
    write_file(filename, new_contents)


def main():
    os.chdir(root_dir)

    if mount_drive:
        if not os.path.exists("/content/drive"):
            drive.mount("/content/drive")

    for dir in [
        deps_dir,
        training_dir,
        config_dir,
        pretrained_model,
        vae_dir
    ]:
        os.makedirs(dir, exist_ok=True)

    clone_repo(repo_url)

    if branch:
        os.chdir(repo_dir)
        status = os.system(f"git checkout {branch}")
        if status != 0:
            raise Exception("Failed to checkout branch or commit")

    os.chdir(repo_dir)

    !apt install aria2 {'-qq' if not verbose else ''}

    install_dependencies()
    time.sleep(3)

    remove_bitsandbytes_message(bitsandytes_main_py)

    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.environ["BITSANDBYTES_NOWELCOME"] = "1"
    os.environ["SAFETENSORS_FAST_GPU"] = "1"

    cuda_path = "/usr/local/cuda-11.8/targets/x86_64-linux/lib/"
    ld_library_path = os.environ.get("LD_LIBRARY_PATH", "")
    os.environ["LD_LIBRARY_PATH"] = f"{ld_library_path}:{cuda_path}"

main()


Mounted at /content/drive
Cloning into '/content/kohya-trainer'...
remote: Enumerating objects: 2514, done.[K
remote: Counting objects: 100% (2514/2514), done.[K
remote: Compressing objects: 100% (938/938), done.[K
remote: Total 2514 (delta 1692), reused 2242 (delta 1574), pack-reused 0[K
Receiving objects: 100% (2514/2514), 4.82 MiB | 11.12 MiB/s, done.
Resolving deltas: 100% (1692/1692), done.
The following additional packages will be installed:
  libaria2-0 libc-ares2
The following NEW packages will be installed:
  aria2 libaria2-0 libc-ares2
0 upgraded, 3 newly installed, 0 to remove and 24 not upgraded.
Need to get 1,513 kB of archives.
After this operation, 5,441 kB of additional disk space will be used.
Selecting previously unselected package libc-ares2:amd64.
(Reading database ... 121654 files and directories currently installed.)
Preparing to unpack .../libc-ares2_1.18.1-1ubuntu0.22.04.2_amd64.deb ...
Unpacking libc-ares2:amd64 (1.18.1-1ubuntu0.22.04.2) ...
Selecting previ

# II. Pretrained Model Selection

In [2]:
# @title ## 2.1. Download Available Model
import os

%store -r

os.chdir(root_dir)

models = {
    "Anything-v3-1": "https://huggingface.co/cag/anything-v3-1/resolve/main/anything-v3-1.safetensors",
    "AnyLoRA": "https://huggingface.co/Linaqruf/stolen/resolve/main/pruned-models/AnyLoRA_noVae_fp16-pruned.safetensors",
    "Stable-Diffusion-v1-5": "https://huggingface.co/Linaqruf/stolen/resolve/main/pruned-models/stable_diffusion_1_5-pruned.safetensors",
}

v2_models = {
    "stable-diffusion-2-1-base": "https://huggingface.co/stabilityai/stable-diffusion-2-1-base/resolve/main/v2-1_512-ema-pruned.safetensors",
    "stable-diffusion-2-1-768v": "https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.safetensors",
}

installModels = []
installv2Models = []

# @markdown ### SD1.x model
model_name = "AnyLoRA"  # @param ["", "Anything-v3-1", "AnyLoRA", "Stable-Diffusion-v1-5"]
# @markdown ### SD2.x model
v2_model_name = ""  # @param ["", "stable-diffusion-2-1-base", "stable-diffusion-2-1-768v"]

if model_name:
    model_url = models.get(model_name)
    if model_url:
        installModels.append((model_name, model_url))

if v2_model_name:
    v2_model_url = v2_models.get(v2_model_name)
    if v2_model_url:
        installv2Models.append((v2_model_name, v2_model_url))


def install(checkpoint_name, url):
    ext = "ckpt" if url.endswith(".ckpt") else "safetensors"

    hf_token = "token"  # @param {type: "string"}
    user_header = f'"Authorization: Bearer {hf_token}"'
    !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -d {pretrained_model} -o {checkpoint_name}.{ext} "{url}"


def install_checkpoint():
    for model in installModels:
        install(model[0], model[1])
    for v2model in installv2Models:
        install(v2model[0], v2model[1])


install_checkpoint()



 *** Download Progress Summary as of Mon Jan 15 01:30:24 2024 *** 
=
[#1f7e7b 1.5GiB/1.9GiB(77%) CN:16 DL:150MiB ETA:2s]
FILE: /content/pretrained_model/AnyLoRA.safetensors
-

[0m
Download Results:
gid   |stat|avg speed  |path/URI
1f7e7b|[1;32mOK[0m  |   162MiB/s|/content/pretrained_model/AnyLoRA.safetensors

Status Legend:
(OK):download completed.


# III. Data Acquisition

You have three options for acquiring your dataset:

1. Uploading it to Colab's local files.
2. Bulk downloading images from Danbooru using the `Simple Booru Scraper`.
3. Locating your dataset from Google Drive.


In [3]:
# @title ## 3.1. Locating Train Data Directory
# @markdown Define location of your training data. This cell will also create a folder based on your input.
# @markdown This folder will serve as the target folder for scraping, tagging, bucketing, and training in the next cell.
import os

%store -r

train_data_dir = "/content/fine_tune/train_data"  # @param {'type' : 'string'}
%store train_data_dir

os.makedirs(train_data_dir, exist_ok=True)
print(f"Your train data directory : {train_data_dir}")

Stored 'train_data_dir' (str)
Your train data directory : /content/fine_tune/train_data


In [4]:
# @title ## 3.2. Unzip Dataset

import os
import shutil
from pathlib import Path

#@title ## Unzip Dataset
# @markdown Use this section if your dataset is in a `zip` file and has been uploaded somewhere. This code cell will download your dataset and automatically extract it to the `train_data_dir` if the `unzip_to` variable is empty.
zipfile_url = "/content/rhnd.zip" #@param {type:"string"}
zipfile_name = "zipfile.zip"
unzip_to = "" #@param {type:"string"}

hf_token = "token"  # @param {type: "string"}
user_header = f'"Authorization: Bearer {hf_token}"'

if unzip_to:
    os.makedirs(unzip_to, exist_ok=True)
else:
    unzip_to = train_data_dir


def download_dataset(url):
    if url.startswith("/content"):
        return url
    elif "drive.google.com" in url:
        os.chdir(root_dir)
        !gdown --fuzzy {url}
        return f"{root_dir}/{zipfile_name}"
    elif "huggingface.co" in url:
        if "/blob/" in url:
            url = url.replace("/blob/", "/resolve/")
        !aria2c --console-log-level=error --summary-interval=10 --header={user_header} -c -x 16 -k 1M -s 16 -d {root_dir} -o {zipfile_name} {url}
        return f"{root_dir}/{zipfile_name}"
    else:
        !aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {root_dir} -o {zipfile_name} {url}
        return f"{root_dir}/{zipfile_name}"


def extract_dataset(zip_file, output_path):
    if zip_file.startswith("/content"):
        !unzip -j -o {zip_file} -d "{output_path}"
    else:
        !unzip -j -o "{zip_file}" -d "{output_path}"


def remove_files(train_dir, files_to_move):
    for filename in os.listdir(train_dir):
        file_path = os.path.join(train_dir, filename)
        if filename in files_to_move:
            if not os.path.exists(file_path):
                shutil.move(file_path, training_dir)
            else:
                os.remove(file_path)


zip_file = download_dataset(zipfile_url)
extract_dataset(zip_file, unzip_to)
os.remove(zip_file)

files_to_move = (
    "meta_cap.json",
    "meta_cap_dd.json",
    "meta_lat.json",
    "meta_clean.json",
)

remove_files(train_data_dir, files_to_move)


Archive:  /content/rhnd.zip
  inflating: /content/fine_tune/train_data/1.jpg  
  inflating: /content/fine_tune/train_data/1.txt  
  inflating: /content/fine_tune/train_data/10.jpg  
  inflating: /content/fine_tune/train_data/10.txt  
  inflating: /content/fine_tune/train_data/11.jpg  
  inflating: /content/fine_tune/train_data/11.txt  
  inflating: /content/fine_tune/train_data/12.jpg  
  inflating: /content/fine_tune/train_data/12.txt  
  inflating: /content/fine_tune/train_data/13.jpg  
  inflating: /content/fine_tune/train_data/13.txt  
  inflating: /content/fine_tune/train_data/14.jpg  
  inflating: /content/fine_tune/train_data/14.txt  
  inflating: /content/fine_tune/train_data/15.jpg  
  inflating: /content/fine_tune/train_data/15.txt  
  inflating: /content/fine_tune/train_data/16.jpg  
  inflating: /content/fine_tune/train_data/16.txt  
  inflating: /content/fine_tune/train_data/17.jpg  
  inflating: /content/fine_tune/train_data/17.txt  
  inflating: /content/fine_tune/train_

# IV. Data Preprocessing

In [5]:
#@title ## 4.3. Create Metadata File
import os
%store -r

os.chdir(finetune_dir)

# @markdown Merge tags and/or captions exist in `train_data_dir` into one metadata JSON file, which will be used as the input for the bucketing section.
metadata = "/content/fine_tune/meta_clean.json" #@param {type:"string"}
# @markdown Use `recursive` option to process subfolders as well
recursive = False #@param {type:"boolean"}
# @markdown Use `clean_caption` option to clean such as duplicate tags, `women` to `girl`, etc
clean_caption = False #@param {type:"boolean"}

config = {
    "_train_data_dir": train_data_dir,
    "_out_json": metadata,
    "recursive": recursive,
    "full_path": recursive,
    "clean_caption": clean_caption
}

args = ""
for k, v in config.items():
    if k.startswith("_"):
        args += f'"{v}" '
    elif isinstance(v, str):
        args += f'--{k}="{v}" '
    elif isinstance(v, bool) and v:
        args += f"--{k} "
    elif isinstance(v, float) and not isinstance(v, bool):
        args += f"--{k}={v} "
    elif isinstance(v, int) and not isinstance(v, bool):
        args += f"--{k}={v} "

os.chdir(finetune_dir)
final_args = f"python merge_all_to_metadata.py {args}"
!{final_args}


CUDA backend failed to initialize: Found CUDA version 12010, but JAX was built against version 12020, which is newer. The copy of CUDA that is installed must be at least as new as the version against which JAX was built. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
Found 73 images.
Creating a new metadata file
Merging tags and captions into metadata json.
100% 73/73 [00:00<00:00, 17597.80it/s]
No captions found for any of the 73 images
All 73 images have tags
Writing metadata: /content/fine_tune/meta_clean.json
Done!


In [6]:
# @title ## 4.4. Bucketing and Latents Caching
%store -r

# @markdown This code will create buckets based on the `max_resolution` provided for multi-aspect ratio training, and then convert all images within the `train_data_dir` to latents.
v2 = False  # @param{type:"boolean"}
model_dir = "/content/pretrained_model/AnyLoRA.safetensors"  # @param {'type' : 'string'}
input_json = "/content/fine_tune/meta_clean.json"  # @param {'type' : 'string'}
output_json = "/content/fine_tune/meta_lat.json"  # @param {'type' : 'string'}
batch_size = 8  # @param {'type':'integer'}
max_data_loader_n_workers = 2  # @param {'type':'integer'}
max_resolution = "256,256"  # @param ["256,256", "512,512", "640,640", "768,768"] {allow-input: false}
mixed_precision = "no"  # @param ["no", "fp16", "bf16"] {allow-input: false}
flip_aug = False  # @param{type:"boolean"}
#@markdown Use the `recursive` option to process subfolders as well
recursive = False #@param {type:"boolean"}

config = {
    "_train_data_dir": train_data_dir,
    "_in_json": input_json,
    "_out_json": output_json,
    "_model_name_or_path": model_dir,
    "recursive": recursive,
    "full_path": recursive,
    "v2": v2,
    "flip_aug": flip_aug,
    "min_bucket_reso": 256 if max_resolution != "256,256" else 256,
    "max_bucket_reso": 1024 if max_resolution != "256,256" else 512,
    "batch_size": batch_size,
    "max_data_loader_n_workers": max_data_loader_n_workers,
    "max_resolution": max_resolution,
    "mixed_precision": mixed_precision,
}

args = ""
for k, v in config.items():
    if k.startswith("_"):
        args += f'"{v}" '
    elif isinstance(v, str):
        args += f'--{k}="{v}" '
    elif isinstance(v, bool) and v:
        args += f"--{k} "
    elif isinstance(v, float) and not isinstance(v, bool):
        args += f"--{k}={v} "
    elif isinstance(v, int) and not isinstance(v, bool):
        args += f"--{k}={v} "

os.chdir(finetune_dir)
final_args = f"python prepare_buckets_latents.py {args}"
!{final_args}

CUDA backend failed to initialize: Found CUDA version 12010, but JAX was built against version 12020, which is newer. The copy of CUDA that is installed must be at least as new as the version against which JAX was built. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
found 73 images.
loading existing metadata: /content/fine_tune/meta_clean.json
load VAE: /content/pretrained_model/AnyLoRA.safetensors
100% 73/73 [00:05<00:00, 14.27it/s]
bucket 0 (256, 256): 73
mean ar error: 0.0
writing metadata: /content/fine_tune/meta_lat.json
done!


# V. Training Model



In [7]:
# @title ## 5.1. Model Config
from google.colab import drive

v2 = False  # @param {type:"boolean"}
v_parameterization = False  # @param {type:"boolean"}
project_name = "rhnd"  # @param {type:"string"}
if not project_name:
    project_name = "last"
%store project_name
pretrained_model_name_or_path = "/content/pretrained_model/AnyLoRA.safetensors"  # @param {type:"string"}
vae = ""  # @param {type:"string"}
output_dir = "/content/fine_tune/output"  # @param {'type':'string'}
resume_path = ""  # @param {'type':'string'}

# @markdown `output_to_drive` sets default `output_dir` to `/content/drive/MyDrive/fine_tune/output`. This will override the `output_dir` variable defined above.
output_to_drive = False  # @param {'type':'boolean'}

if output_to_drive:
    output_dir = "/content/drive/MyDrive/fine_tune/output"

    if not os.path.exists("/content/drive"):
        drive.mount("/content/drive")

sample_dir = os.path.join(output_dir, "sample")
for dir in [output_dir, sample_dir]:
    os.makedirs(dir, exist_ok=True)

print("Project Name: ", project_name)
print("Model Version: Stable Diffusion V1.x") if not v2 else ""
print("Model Version: Stable Diffusion V2.x") if v2 and not v_parameterization else ""
print("Model Version: Stable Diffusion V2.x 768v") if v2 and v_parameterization else ""
print(
    "Pretrained Model Path: ", pretrained_model_name_or_path
) if pretrained_model_name_or_path else print("No Pretrained Model path specified.")
print("VAE Path: ", vae) if vae else print("No VAE path specified.")
print("Output Path: ", output_dir)
print("Resume Path: ", resume_path) if resume_path else print(
    "No resume path specified."
)

Stored 'project_name' (str)
Project Name:  rhnd
Model Version: Stable Diffusion V1.x
Pretrained Model Path:  /content/pretrained_model/AnyLoRA.safetensors
No VAE path specified.
Output Path:  /content/fine_tune/output
No resume path specified.


In [8]:
# @title ## 5.2. Dataset Config
import toml
import glob

# @markdown This notebook support multi-folder training but not designed for multi-concept training. You can use [Kohya LoRA Dreambooth](https://github.com/Linaqruf/kohya-trainer/blob/main/kohya-LoRA-dreambooth.ipynb), or add an activation word for each train folder under `4.2.3. Custom Caption/Tag (Optional)` instead.
dataset_repeats = 10  # @param {type:"number"}
in_json = "/content/fine_tune/meta_lat.json"  # @param {type:"string"}
resolution = "256,256" # @param ["256,256", "768,768"]
keep_tokens = 0  # @param {type:"number"}


In [9]:
# @title ## 5.3. Optimizer Config
from IPython.utils import capture

# @markdown `NEW` Gamma for reducing the weight of high-loss timesteps. Lower numbers have a stronger effect. The paper recommends 5. Read the paper [here](https://arxiv.org/abs/2303.09556).
min_snr_gamma = -1 #@param {type:"number"}
# @markdown `AdamW8bit` was the old `--use_8bit_adam`.
optimizer_type = "AdamW8bit"  # @param ["AdamW", "AdamW8bit", "Lion", "SGDNesterov", "SGDNesterov8bit", "DAdaptation", "AdaFactor"]
# @markdown Additional arguments for optimizer, e.g: `["decouple=true","weight_decay=0.6"]`
optimizer_args = ""  # @param {'type':'string'}
# @markdown Set `learning_rate` to `1.0` if you use `DAdaptation` optimizer, as it's a [free learning rate](https://github.com/facebookresearch/dadaptation) algorithm.
# @markdown You probably need to specify `optimizer_args` for custom optimizer, like using `["decouple=true","weight_decay=0.6"]` for `DAdaptation`.
learning_rate = 2e-6  # @param {'type':'number'}
train_text_encoder = False  # @param {'type':'boolean'}
lr_scheduler = "constant"  # @param ["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup", "adafactor"] {allow-input: false}
lr_warmup_steps = 0  # @param {'type':'number'}
# @markdown You can define `num_cycles` value for `cosine_with_restarts` or `power` value for `polynomial` in the field below.
lr_scheduler_num_cycles = 0  # @param {'type':'number'}
lr_scheduler_power = 0  # @param {'type':'number'}

print(f"  - Min-SNR Weighting: {min_snr_gamma}") if not min_snr_gamma == -1 else ""
print(f"Using {optimizer_type} as Optimizer")
if optimizer_args:
  print(f"Optimizer Args :", optimizer_args)
print("Learning rate: ", learning_rate)
if train_text_encoder:
    print(f"Train Text Encoder")
print("Learning rate warmup steps: ", lr_warmup_steps)
print("Learning rate Scheduler:", lr_scheduler)
if lr_scheduler == "cosine_with_restarts":
  print("- lr_scheduler_num_cycles: ", lr_scheduler_num_cycles)
elif lr_scheduler == "polynomial":
  print("- lr_scheduler_power: ", lr_scheduler_power)

Using AdamW8bit as Optimizer
Learning rate:  2e-06
Learning rate warmup steps:  0
Learning rate Scheduler: constant


In [10]:
# @title ## 5.4. Training Config
import toml
import os

%store -r
enable_sample_prompt = True  # @param {type:"boolean"}
sampler = "ddim"  # @param ["ddim", "pndm", "lms", "euler", "euler_a", "heun", "dpm_2", "dpm_2_a", "dpmsolver","dpmsolver++", "dpmsingle", "k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a"]
noise_offset = 0.0  # @param {type:"number"}
max_train_steps = 2500  # @param {type:"number"}
train_batch_size = 1  # @param {type:"number"}
mixed_precision = "fp16"  # @param ["no","fp16","bf16"] {allow-input: false}
save_state = False  # @param {type:"boolean"}
save_precision = "fp16"  # @param ["float", "fp16", "bf16"] {allow-input: false}
save_n_epoch_ratio = 1  # @param {type:"number"}
save_model_as = "ckpt"  # @param ["ckpt", "safetensors", "diffusers", "diffusers_safetensors"] {allow-input: false}
max_token_length = 225  # @param {type:"number"}
clip_skip = 2  # @param {type:"number"}
gradient_checkpointing = False  # @param {type:"boolean"}
gradient_accumulation_steps = 1  # @param {type:"number"}
seed = -1  # @param {type:"number"}
logging_dir = "/content/fine_tune/logs"
prior_loss_weight = 1.0

os.chdir(repo_dir)

sample_str = f"""
  rhnd, open, hand, human hand, good anatomy, five fingers, good quality, plain background \
  --n extra fingers, missing fingers, bad anatomy, lowres, bad hands, cropped, worst quality, low quality, jpeg artifacts, signature, watermark, username, blurry \
  --w 256 \
  --h 256 \
  --l 7 \
  --s 28
"""

config = {
    "model_arguments": {
        "v2": v2,
        "v_parameterization": v_parameterization if v2 and v_parameterization else False,
        "pretrained_model_name_or_path": pretrained_model_name_or_path,
        "vae": vae,
    },
    "optimizer_arguments": {
        "min_snr_gamma": min_snr_gamma if not min_snr_gamma == -1 else None,
        "optimizer_type": optimizer_type,
        "learning_rate": learning_rate,
        "max_grad_norm": 1.0,
        "train_text_encoder": train_text_encoder,
        "optimizer_args": eval(optimizer_args) if optimizer_args else None,
        "lr_scheduler": lr_scheduler,
        "lr_warmup_steps": lr_warmup_steps,
        "lr_scheduler_num_cycles": lr_scheduler_num_cycles if lr_scheduler == "cosine_with_restarts" else None,
        "lr_scheduler_power": lr_scheduler_power if lr_scheduler == "polynomial" else None,
    },
    "dataset_arguments": {
        "debug_dataset": False,
        "in_json": in_json,
        "train_data_dir": train_data_dir,
        "dataset_repeats": dataset_repeats,
        "shuffle_caption": True,
        "keep_tokens": keep_tokens,
        "resolution": resolution,
        "caption_dropout_rate": 0,
        "caption_tag_dropout_rate": 0,
        "caption_dropout_every_n_epochs": 0,
        "color_aug": False,
        "face_crop_aug_range": None,
        "token_warmup_min": 1,
        "token_warmup_step": 0,
    },
    "training_arguments": {
        "output_dir": output_dir,
        "output_name": project_name,
        "save_precision": save_precision,
        "save_every_n_epochs": None,
        "save_n_epoch_ratio": save_n_epoch_ratio,
        "save_last_n_epochs": None,
        "save_state": save_state,
        "save_last_n_epochs_state": None,
        "resume": resume_path,
        "train_batch_size": train_batch_size,
        "max_token_length": 225,
        "mem_eff_attn": False,
        "xformers": True,
        "max_train_steps": max_train_steps,
        "max_data_loader_n_workers": 8,
        "persistent_data_loader_workers": True,
        "seed": seed if seed > 0 else None,
        "gradient_checkpointing": gradient_checkpointing,
        "gradient_accumulation_steps": gradient_accumulation_steps,
        "mixed_precision": mixed_precision,
        "clip_skip": clip_skip if not v2 else None,
        "logging_dir": logging_dir,
        "log_prefix": project_name,
        "noise_offset": noise_offset if noise_offset > 0 else None,
    },
    "sample_prompt_arguments": {
        "sample_every_n_steps": 100 if enable_sample_prompt else 999999,
        "sample_every_n_epochs": None,
        "sample_sampler": sampler,
    },
    "saving_arguments": {
        "save_model_as": save_model_as
    },
}

config_path = os.path.join(config_dir, "config_file.toml")
prompt_path = os.path.join(config_dir, "sample_prompt.txt")

for key in config:
    if isinstance(config[key], dict):
        for sub_key in config[key]:
            if config[key][sub_key] == "":
                config[key][sub_key] = None
    elif config[key] == "":
        config[key] = None

config_str = toml.dumps(config)

def write_file(filename, contents):
    with open(filename, "w") as f:
        f.write(contents)

write_file(config_path, config_str)
write_file(prompt_path, sample_str)

print(config_str)

[model_arguments]
v2 = false
v_parameterization = false
pretrained_model_name_or_path = "/content/pretrained_model/AnyLoRA.safetensors"

[optimizer_arguments]
optimizer_type = "AdamW8bit"
learning_rate = 2e-6
max_grad_norm = 1.0
train_text_encoder = false
lr_scheduler = "constant"
lr_warmup_steps = 0

[dataset_arguments]
debug_dataset = false
in_json = "/content/fine_tune/meta_lat.json"
train_data_dir = "/content/fine_tune/train_data"
dataset_repeats = 10
shuffle_caption = true
keep_tokens = 0
resolution = "256,256"
caption_dropout_rate = 0
caption_tag_dropout_rate = 0
caption_dropout_every_n_epochs = 0
color_aug = false
token_warmup_min = 1
token_warmup_step = 0

[training_arguments]
output_dir = "/content/fine_tune/output"
output_name = "rhnd"
save_precision = "fp16"
save_n_epoch_ratio = 1
save_state = false
train_batch_size = 1
max_token_length = 225
mem_eff_attn = false
xformers = true
max_train_steps = 2500
max_data_loader_n_workers = 8
persistent_data_loader_workers = true
gradie

In [11]:
#@title ## 5.5. Start Training

#@markdown Check your config here if you want to edit something:
#@markdown - `sample_prompt` : /content/fine_tune/config/sample_prompt.txt
#@markdown - `config_file` : /content/fine_tune/config/config_file.toml

#@markdown Generated sample can be seen here: /content/fine_tune/output/sample

#@markdown You can import config from another session if you want.
sample_prompt = "/content/fine_tune/config/sample_prompt.txt" #@param {type:'string'}
config_file = "/content/fine_tune/config/config_file.toml" #@param {type:'string'}

accelerate_conf = {
    "config_file" : accelerate_config,
    "num_cpu_threads_per_process" : 1,
}

train_conf = {
    "sample_prompts" : sample_prompt,
    "config_file" : config_file
}

def train(config):
    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "

    return args

accelerate_args = train(accelerate_conf)
train_args = train(train_conf)
final_args = f"accelerate launch {accelerate_args} fine_tune.py {train_args}"

os.chdir(repo_dir)
!{final_args}

CUDA backend failed to initialize: Found CUDA version 12010, but JAX was built against version 12020, which is newer. The copy of CUDA that is installed must be at least as new as the version against which JAX was built. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
Loading settings from /content/fine_tune/config/config_file.toml...
/content/fine_tune/config/config_file
prepare tokenizer
Downloading vocab.json: 100% 961k/961k [00:00<00:00, 7.39MB/s]
Downloading merges.txt: 100% 525k/525k [00:00<00:00, 14.4MB/s]
Downloading (…)cial_tokens_map.json: 100% 389/389 [00:00<00:00, 2.52MB/s]
Downloading tokenizer_config.json: 100% 905/905 [00:00<00:00, 5.79MB/s]
update token length: 225
loading existing metadata: /content/fine_tune/meta_lat.json
metadata has bucket info, enable bucketing / メタデータにbucket情報があるためbucketを有効にします
using bucket info in metadata / メタデータ内のbucket情報を使います
[Dataset 0]
  batch_size: 1
  resolution: (256, 256)
  enable_bucket: True
  min_bucket_reso: None
  max_bucket_r

# VI. Testing

In [None]:
#@title ## 6.4. Launch Portable Web UI
import os
import random
import shutil
import zipfile
import time
import json
from google.colab import drive
from datetime import timedelta
from subprocess import getoutput
from IPython.display import clear_output, display, HTML
from IPython.utils import capture
from tqdm import tqdm

webui_dir = os.path.join(root_dir, "stable-diffusion-webui")
tmp_dir = os.path.join(root_dir, "tmp")
patches_dir = os.path.join(root_dir, "patches")
deps_dir = os.path.join(root_dir, "deps")
extensions_dir = os.path.join(webui_dir, "extensions")
control_dir = os.path.join(webui_dir, "models/ControlNet")

webui_models_dir = os.path.join(webui_dir, "models/Stable-diffusion")
webui_lora_dir = os.path.join(webui_dir, "models/Lora")
webui_vaes_dir = os.path.join(webui_dir, "models/VAE")

control_net_max_models_num = 2
theme = "ogxBGreen"

default_prompt = "masterpiece, best quality,"
default_neg_prompt = "(worst quality, low quality:1.4)"
default_sampler = "DPM++ 2M Karras"
default_steps = 20
default_width = 256
default_height = 256
default_denoising_strength = 0.55
default_cfg_scale = 7

config_file = os.path.join(webui_dir, "config.json")
ui_config_file = os.path.join(webui_dir, "ui-config.json")
webui_style_path = os.path.join(webui_dir, "style.css")

os.chdir(root_dir)

for dir in [patches_dir, deps_dir]:
    os.makedirs(dir, exist_ok=True)

package_url = [
    f"https://huggingface.co/Linaqruf/fast-repo/resolve/main/anapnoe-webui.tar.lz4",
    f"https://huggingface.co/Linaqruf/fast-repo/resolve/main/anapnoe-webui-deps.tar.lz4",
    f"https://huggingface.co/Linaqruf/fast-repo/resolve/main/anapnoe-webui-cache.tar.lz4",
]

def pre_download(desc):
    for package in tqdm(package_url, desc=desc):
        with capture.capture_output() as cap:
            package_name = os.path.basename(package)
            !aria2c --console-log-level=error --summary-interval=10 -c -x 16 -k 1M -s 16 -d {root_dir} -o {package_name} {package}
            if package_name == f"anapnoe-webui-deps.tar.lz4":
                !tar -xI lz4 -f {package_name} --overwrite-dir --directory=/usr/local/lib/python3.10/dist-packages/
            else:
                !tar -xI lz4 -f {package_name} --directory=/
            os.remove(package_name)
            del cap

    if os.path.exists("/usr/local/lib/python3.10/dist-packages/ffmpy-0.3.0.dist-info"):
        shutil.rmtree("/usr/local/lib/python3.10/dist-packages/ffmpy-0.3.0.dist-info")

    s = getoutput("nvidia-smi")
    with capture.capture_output() as cap:
        if not "T4" in s:
            !pip uninstall -y xformers
            !pip install -q xformers==0.0.18 triton
        del cap


def read_config(filename):
    if filename.endswith(".json"):
        with open(filename, "r") as f:
          config = json.load(f)
    else:
        with open(filename, 'r') as f:
          config = f.read()
    return config


def write_config(filename, config):
    if filename.endswith(".json"):
        with open(filename, "w") as f:
            json.dump(config, f, indent=4)
    else:
        with open(filename, 'w', encoding="utf-8") as f:
            f.write(config)


def open_theme(filename):
    themes_folder = os.path.join(webui_dir, "extensions-builtin/sd_theme_editor/themes")
    themes_file = os.path.join(themes_folder, f"{filename}.css")
    webui_style_path = os.path.join(webui_dir, "style.css")

    style_config = read_config(webui_style_path)
    style_css_contents = style_config.split("/*BREAKPOINT_CSS_CONTENT*/")[1]

    theme_config = read_config(themes_file)
    style_data = ":host{" + theme_config + "}" + "/*BREAKPOINT_CSS_CONTENT*/" + style_css_contents
    write_config(webui_style_path, style_data)


def change_config(filename):
    config = read_config(filename)
    if not "stable-diffusion-webui" in config["disabled_extensions"]:
        config["disabled_extensions"].append("stable-diffusion-webui")
    config["outdir_txt2img_samples"] = os.path.join(tmp_dir, "outputs/txt2img-images")
    config["outdir_img2img_samples"] = os.path.join(tmp_dir, "outputs/img2img-images")
    config["outdir_extras_samples"] = os.path.join(tmp_dir, "outputs/extras-images")
    config["outdir_txt2img_grids"] = os.path.join(tmp_dir, "outputs/txt2img-grids")
    config["outdir_img2img_grids"] = os.path.join(tmp_dir, "outputs/img2img-grids")
    config["outdir_save"] = os.path.join(tmp_dir, "log/images")
    config["control_net_max_models_num"] = control_net_max_models_num
    config["control_net_models_path"] = control_dir
    config["control_net_allow_script_control"] = True
    config["additional_networks_extra_lora_path"] = webui_lora_dir
    config["CLIP_stop_at_last_layers"] = 2
    config["eta_noise_seed_delta"] = 0
    config["show_progress_every_n_steps"] = 10
    config["show_progressbar"] = True
    config["quicksettings"] = "sd_model_checkpoint, sd_vae, CLIP_stop_at_last_layers, use_old_karras_scheduler_sigmas, always_discard_next_to_last_sigma"
    write_config(filename, config)


def change_ui_config(filename):
    config = read_config(filename)
    config["txt2img/Prompt/value"] = default_prompt
    config["txt2img/Negative prompt/value"] = default_neg_prompt
    config["txt2img/Sampling method/value"] = default_sampler
    config["txt2img/Sampling steps/value"] = default_steps
    config["txt2img/Width/value"] = default_width
    config["txt2img/Height/value"] = default_height
    config["txt2img/Upscaler/value"] = "Latent (nearest-exact)"
    config["txt2img/Denoising strength/value"] = default_denoising_strength
    config["txt2img/CFG Scale/value"] = default_cfg_scale
    config["img2img/Prompt/value"] = default_prompt
    config["img2img/Negative prompt/value"] = default_neg_prompt
    config["img2img/Sampling method/value"] = default_sampler
    config["img2img/Sampling steps/value"] = default_steps
    config["img2img/Width/value"] = default_width
    config["img2img/Height/value"] = default_height
    config["img2img/Denoising strength/value"] = default_denoising_strength
    config["img2img/CFG Scale/value"] = default_cfg_scale
    write_config(filename, config)


def update_extensions():
    start_time = time.time()
    extensions_updated = []
    with tqdm(
        total=len(os.listdir(extensions_dir)),
        desc="[1;32mUpdating extensions",
        mininterval=0,
    ) as pbar:
        for dir in os.listdir(extensions_dir):
            if os.path.isdir(os.path.join(extensions_dir, dir)):
                os.chdir(os.path.join(extensions_dir, dir))
                try:
                    with capture.capture_output() as cap:
                        !git fetch origin
                        !git pull
                except Exception as e:
                    print(f"[1;32mAn error occurred while updating {dir}: {e}")

                output = cap.stdout.strip()
                if "Already up to date." not in output:
                    extensions_updated.append(dir)
                pbar.update(1)

    print("\n")
    for ext in extensions_updated:
        print(f"[1;32m- {ext} updated to new version")

    end_time = time.time()
    elapsed_time = int(end_time - start_time)

    if elapsed_time < 60:
        print(f"\n[1;32mAll extensions are up to date. Took {elapsed_time} sec")
    else:
        mins, secs = divmod(elapsed_time, 60)
        print(f"\n[1;32mAll extensions are up to date. Took {mins} mins {secs} sec")


def main():
    start_time = time.time()

    print("[1;32mInstalling...\n")

    if not os.path.exists(webui_dir):
        desc = "[1;32mUnpacking Webui"
        pre_download(desc)
    else:
        print("[1;32mAlready installed, skipping...")

    with capture.capture_output() as cap:
        os.chdir(os.path.join(webui_dir, "repositories/stable-diffusion-stability-ai"))
        !git apply {patches_dir}/stablediffusion-lowram.patch

        !sed -i "s@os.path.splitext(checkpoint_.*@os.path.splitext(checkpoint_file); map_location='cuda'@" {webui_dir}/modules/sd_models.py
        !sed -i 's@ui.create_ui().*@ui.create_ui();shared.demo.queue(concurrency_count=999999,status_update_rate=0.1)@' {webui_dir}/webui.py

        !sed -i "s@'cpu'@'cuda'@" {webui_dir}/modules/extras.py
        del cap

    end_time = time.time()
    elapsed_time = int(end_time - start_time)

    change_config(config_file)
    change_ui_config(ui_config_file)
    open_theme(theme)

    if elapsed_time < 60:
        print(f"[1;32mFinished unpacking. Took {elapsed_time} sec")
    else:
        mins, secs = divmod(elapsed_time, 60)
        print(f"[1;32mFinished unpacking. Took {mins} mins {secs} sec")

    update_extensions()

    #@markdown > Get <b>your</b> `ngrok_token` [here](https://dashboard.ngrok.com/get-started/your-authtoken)
    ngrok_token = "" #@param {type: 'string'}
    ngrok_region = "ap" #@param ["us", "eu", "au", "ap", "sa", "jp", "in"]

    with capture.capture_output() as cap:
      for file in os.listdir(output_dir):
        file_path = os.path.join(output_dir, file)
        if file_path.endswith((".safetensors", ".ckpt")):
          !ln "{file_path}" {webui_models_dir}

      for file in os.listdir(pretrained_model):
        file_path = os.path.join(pretrained_model, file)
        if file_path.endswith((".safetensors", ".ckpt")):
          !ln "{file_path}" {webui_models_dir}

      for file in os.listdir(vae_dir):
        file_path = os.path.join(vae_dir, file)
        if file_path.endswith(".vae.pt"):
          !ln "{file_path}" {webui_vaes_dir}

      del cap
    model_path = os.path.join(webui_models_dir, project_name + "." + save_model_as)

    os.chdir(webui_dir)

    print("[1;32m")

    config = {
        "enable-insecure-extension-access": True,
        "disable-safe-unpickle": True,
        "multiple": True if not ngrok_token else False,
        "ckpt": model_path if os.path.exists(model_path) else None,
        "ckpt-dir": webui_models_dir,
        "vae-dir": webui_vaes_dir,
        "share": True if not ngrok_token else False,
        "no-half-vae": True,
        "lowram": True,
        "gradio-queue": True,
        "no-hashing": True,
        "disable-console-progressbars": True,
        "ngrok": ngrok_token if ngrok_token else None,
        "ngrok-region": ngrok_region if ngrok_token else None,
        "xformers": True,
        "opt-sub-quad-attention": True,
        "opt-channelslast": True,
        "theme": "dark"
    }

    args = ""
    for k, v in config.items():
        if k.startswith("_"):
            args += f'"{v}" '
        elif isinstance(v, str):
            args += f'--{k}="{v}" '
        elif isinstance(v, bool) and v:
            args += f"--{k} "
        elif isinstance(v, float) and not isinstance(v, bool):
            args += f"--{k}={v} "
        elif isinstance(v, int) and not isinstance(v, bool):
            args += f"--{k}={v} "

    final_args = f"python launch.py {args}"

    os.chdir(webui_dir)
    !{final_args}

main()

# VII. Extras

# VIII. Deployment

In [12]:
# @title ## 7.1. Upload Config
from huggingface_hub import login
from huggingface_hub import HfApi
from huggingface_hub.utils import validate_repo_id, HfHubHTTPError

# @markdown Login to Huggingface Hub
# @markdown > Get **your** huggingface `WRITE` token [here](https://huggingface.co/settings/tokens)
write_token = "token"  # @param {type:"string"}
# @markdown Fill this if you want to upload to your organization, or just leave it empty.
orgs_name = ""  # @param{type:"string"}
# @markdown If your model/dataset repo does not exist, it will automatically create it.
model_name = "AnyLoRA"  # @param{type:"string"}
dataset_name = "rhnd"  # @param{type:"string"}
make_private = False  # @param{type:"boolean"}

def authenticate(write_token):
    login(write_token, add_to_git_credential=True)
    api = HfApi()
    return api.whoami(write_token), api


def create_repo(api, user, orgs_name, repo_name, repo_type, make_private=False):
    global model_repo
    global datasets_repo

    if orgs_name == "":
        repo_id = user["name"] + "/" + repo_name.strip()
    else:
        repo_id = orgs_name + "/" + repo_name.strip()

    try:
        validate_repo_id(repo_id)
        api.create_repo(repo_id=repo_id, repo_type=repo_type, private=make_private)
        print(f"{repo_type.capitalize()} repo '{repo_id}' didn't exist, creating repo")
    except HfHubHTTPError as e:
        print(f"{repo_type.capitalize()} repo '{repo_id}' exists, skipping create repo")

    if repo_type == "model":
        model_repo = repo_id
        print(f"{repo_type.capitalize()} repo '{repo_id}' link: https://huggingface.co/{repo_id}\n")
    else:
        datasets_repo = repo_id
        print(f"{repo_type.capitalize()} repo '{repo_id}' link: https://huggingface.co/datasets/{repo_id}\n")

user, api = authenticate(write_token)

if model_name:
    create_repo(api, user, orgs_name, model_name, "model", make_private)
if dataset_name:
    create_repo(api, user, orgs_name, dataset_name, "dataset", make_private)


Token is valid.
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful
Model repo 'sarahahatee/AnyLoRA' didn't exist, creating repo
Model repo 'sarahahatee/AnyLoRA' link: https://huggingface.co/sarahahatee/AnyLoRA

Dataset repo 'sarahahatee/rhnd' didn't exist, creating repo
Dataset repo 'sarahahatee/rhnd' link: https://huggingface.co/datasets/sarahahatee/rhnd



## 8.2. Upload with Huggingface Hub

In [13]:
# @title ### 8.2.1. Upload Model
from huggingface_hub import HfApi
from pathlib import Path

api = HfApi()

# @markdown This will be uploaded to model repo
model_path = "/content/fine_tune/output"  # @param {type :"string"}
path_in_repo = ""  # @param {type :"string"}
# @markdown Now you can save your config file for future use
config_path = "/content/fine_tune/config"  # @param {type :"string"}
# @markdown Other Information
commit_message = "finetune"  # @param {type :"string"}

if not commit_message:
    commit_message = "feat: upload " + project_name + " checkpoint"

if os.path.exists(model_path):
    vae_exists = os.path.exists(os.path.join(model_path, "vae"))
    unet_exists = os.path.exists(os.path.join(model_path, "unet"))
    text_encoder_exists = os.path.exists(os.path.join(model_path, "text_encoder"))


def upload_model(model_paths, is_folder: bool, is_config: bool):
    path_obj = Path(model_paths)
    trained_model = path_obj.parts[-1]

    if path_in_repo:
        trained_model = path_in_repo

    if is_config:
        if path_in_repo:
            trained_model = f"{path_in_repo}_config"
        else:
            trained_model = f"{project_name}_config"

    if is_folder == True:
        print(f"Uploading {trained_model} to https://huggingface.co/" + model_repo)
        print(f"Please wait...")

        if vae_exists and unet_exists and text_encoder_exists:
            api.upload_folder(
                folder_path=model_paths,
                repo_id=model_repo,
                commit_message=commit_message,
                ignore_patterns=".ipynb_checkpoints",
            )
        else:
            api.upload_folder(
                folder_path=model_paths,
                path_in_repo=trained_model,
                repo_id=model_repo,
                commit_message=commit_message,
                ignore_patterns=".ipynb_checkpoints",
            )
        print(
            f"Upload success, located at https://huggingface.co/"
            + model_repo
            + "/tree/main\n"
        )
    else:
        print(f"Uploading {trained_model} to https://huggingface.co/" + model_repo)
        print(f"Please wait...")

        api.upload_file(
            path_or_fileobj=model_paths,
            path_in_repo=trained_model,
            repo_id=model_repo,
            commit_message=commit_message,
        )

        print(
            f"Upload success, located at https://huggingface.co/"
            + model_repo
            + "/blob/main/"
            + trained_model
            + "\n"
        )


def upload():
    if model_path.endswith((".ckpt", ".safetensors", ".pt")):
        upload_model(model_path, False, False)
    else:
        upload_model(model_path, True, False)

    if config_path:
        upload_model(config_path, True, True)


upload()

Uploading output to https://huggingface.co/sarahahatee/AnyLoRA
Please wait...


rhnd.ckpt:   0%|          | 0.00/2.13G [00:00<?, ?B/s]

Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

Upload success, located at https://huggingface.co/sarahahatee/AnyLoRA/tree/main

Uploading rhnd_config to https://huggingface.co/sarahahatee/AnyLoRA
Please wait...
Upload success, located at https://huggingface.co/sarahahatee/AnyLoRA/tree/main



In [14]:
# @title ### 8.2.2. Upload Dataset
from huggingface_hub import HfApi
from pathlib import Path
import shutil
import zipfile
import os

api = HfApi()

# @markdown This will be compressed to zip and  uploaded to datasets repo, leave it empty if not necessary
train_data_path = "/content/fine_tune/train_data"  # @param {type :"string"}
meta_lat_path = "/content/fine_tune/meta_lat.json"  # @param {type :"string"}
last_state_path = "/content/fine_tune/output/last-state"  # @param {type :"string"}
# @markdown `Nerd stuff, only if you want to save training logs`
logs_path = "/content/fine_tune/logs"  # @param {type :"string"}

if project_name:
    tmp_dataset = "/content/fine_tune/" + project_name + "_dataset"
    tmp_last_state = "/content/fine_tune/" + project_name + "_last_state"

else:
    tmp_dataset = "/content/fine_tune/tmp_dataset"
    tmp_last_state = "/content/fine_tune/tmp_last_state"

tmp_train_data = tmp_dataset + "/train_data"
dataset_zip = tmp_dataset + ".zip"
last_state_zip = tmp_last_state + ".zip"

# @markdown  Other Information
commit_message = ""  # @param {type :"string"}

if not commit_message:
    commit_message = "feat: upload " + project_name + " dataset and logs"

tmp_folder = ["tmp_dataset", "tmp_last_state", "tmp_train_data"]


def makedirs(tmp_folders):
    os.makedirs(tmp_folders, exist_ok=True)


for folder in tmp_folder:
    makedirs(folder)


def upload_dataset(dataset_paths, is_zip: bool):
    path_obj = Path(dataset_paths)
    dataset_name = path_obj.parts[-1]

    if is_zip:
        print(
            f"Uploading {dataset_name} to https://huggingface.co/datasets/"
            + datasets_repo
        )
        print(f"Please wait...")

        api.upload_file(
            path_or_fileobj=dataset_paths,
            path_in_repo=dataset_name,
            repo_id=datasets_repo,
            repo_type="dataset",
            commit_message=commit_message,
        )
        print(
            f"Upload success, located at https://huggingface.co/datasets/"
            + datasets_repo
            + "/blob/main/"
            + dataset_name
            + "\n"
        )
    else:
        print(
            f"Uploading {dataset_name} to https://huggingface.co/datasets/"
            + datasets_repo
        )
        print(f"Please wait...")

        api.upload_folder(
            folder_path=dataset_paths,
            path_in_repo=dataset_name,
            repo_id=datasets_repo,
            repo_type="dataset",
            commit_message=commit_message,
            ignore_patterns=".ipynb_checkpoints",
        )
        print(
            f"Upload success, located at https://huggingface.co/datasets/"
            + datasets_repo
            + "/tree/main/"
            + dataset_name
            + "\n"
        )


def zip_file(tmp_folders):
    zipfiles = tmp_folders + ".zip"
    with zipfile.ZipFile(zipfiles, "w") as zip:
        for tmp_folders, dirs, files in os.walk(tmp_folders):
            for file in files:
                zip.write(os.path.join(tmp_folders, file))


def move(src_path, dst_path, is_metadata: bool):
    files_to_move = [
        "meta_cap.json",
        "meta_cap_dd.json",
        "meta_lat.json",
        "meta_clean.json",
        "meta_final.json",
    ]

    if os.path.exists(src_path):
        shutil.move(src_path, dst_path)

    if is_metadata:
        parent_meta_path = os.path.dirname(src_path)

        for filename in os.listdir(parent_meta_path):
            file_path = os.path.join(parent_meta_path, filename)
            if filename in files_to_move:
                shutil.move(file_path, dst_path)


def upload():
    if train_data_path and meta_lat_path:
        move(train_data_path, tmp_train_data, False)
        move(meta_lat_path, tmp_dataset, True)
        zip_file(tmp_dataset)
        upload_dataset(dataset_zip, True)
        os.remove(dataset_zip)

    if last_state_path:
        if os.path.exists(last_state_path):
            move(last_state_path, tmp_last_state, False)
            zip_file(tmp_last_state)
            upload_dataset(last_state_zip, True)
            os.remove(last_state_zip)

    if logs_path:
        upload_dataset(logs_path, False)


upload()

Uploading rhnd_dataset.zip to https://huggingface.co/datasets/sarahahatee/rhnd
Please wait...


Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

rhnd_dataset.zip:   0%|          | 0.00/4.21M [00:00<?, ?B/s]

Upload success, located at https://huggingface.co/datasets/sarahahatee/rhnd/blob/main/rhnd_dataset.zip

Uploading logs to https://huggingface.co/datasets/sarahahatee/rhnd
Please wait...


Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]

events.out.tfevents.1705282607.fada7fb79365.2779.0:   0%|          | 0.00/210k [00:00<?, ?B/s]

Upload success, located at https://huggingface.co/datasets/sarahahatee/rhnd/tree/main/logs



## 8.3. Upload with GIT (Alternative)

In [None]:
# @title ### 8.3.1. Clone Repository

clone_model = True  # @param {'type': 'boolean'}
clone_dataset = True  # @param {'type': 'boolean'}

!git lfs install --skip-smudge
!export GIT_LFS_SKIP_SMUDGE=1

if clone_model:
    !git clone https://huggingface.co/{model_repo} /content/{model_name}

if clone_dataset:
    !git clone https://huggingface.co/datasets/{datasets_repo} /content/{dataset_name}

In [None]:
# @title ### 8.3.2. Commit using Git
import os

%store -r

os.chdir(root_dir)

# @markdown Choose which repo you want to commit
commit_model = True  # @param {'type': 'boolean'}
commit_dataset = True  # @param {'type': 'boolean'}
# @markdown #### Other Information
commit_message = ""  # @param {type :"string"}

if not commit_message:
    commit_message = "feat: upload " + project_name + " model and dataset"

!git config --global user.email "example@mail.com"
!git config --global user.name "example"


def commit(repo_folder, commit_message):
    os.chdir(os.path.join(root_dir, repo_folder))
    !git lfs install
    !huggingface-cli lfs-enable-largefiles .
    !git add .
    !git commit -m "{commit_message}"
    !git push


commit(model_name, commit_message)
commit(dataset_name, commit_message)