Spaces:

baulab
/

ConceptSliders

Runtime error

App Files Files Community

RohitGandikota commited on Nov 28, 2023

Commit

47a88ae

•

1 Parent(s): 6491cdf

pushing training code

Browse files

Files changed (6) hide show

__init__.py +2 -1
app.py +27 -9
trainscripts/textsliders/data/config-xl.yaml +1 -1
trainscripts/textsliders/data/prompts-xl.yaml +27 -18
trainscripts/textsliders/demotrain.py +434 -0
trainscripts/textsliders/prompt_util.py +10 -1

__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1	- from trainscripts.textsliders import lora


1	+ from trainscripts.textsliders import lora
2	+ from trainscripts.textsliders import demotrain

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from diffusers.pipelines import StableDiffusionXLPipeline
 StableDiffusionXLPipeline.__call__ = call
 import os
 from trainscripts.textsliders.lora import LoRANetwork, DEFAULT_TARGET_REPLACE, UNET_TARGET_REPLACE_MODULE_CONV
 os.environ['CURL_CA_BUNDLE'] = ''
 model_map = {'Age' : 'models/age.pt',
@@ -204,10 +204,26 @@ class Demo:
         )
     def train(self, target_concept,positive_prompt, negative_prompt, rank, iterations_input, lr_input, train_method, neg_guidance, iterations, lr, pbar = gr.Progress(track_tqdm=True)):
-#         if self.training:
-#             return [gr.update(interactive=True, value='Train'), gr.update(value='Someone else is training... Try again soon'), None, gr.update()]
 #         if train_method == 'ESD-x':
 #             modules = ".*attn2$"
@@ -223,7 +239,7 @@ class Demo:
 #             modules = ".*attn1$"
 #             frozen = []
-#         randn = torch.randint(1, 10000000, (1,)).item()
 #         save_path = f"models/{randn}_{prompt.lower().replace(' ', '')}.pt"
@@ -237,7 +253,7 @@ class Demo:
 #         model_map['Custom'] = save_path
-#         return [gr.update(interactive=True, value='Train'), gr.update(value='Done Training! \n Try your custom model in the "Test" tab'), save_path, gr.Dropdown.update(choices=list(model_map.keys()), value='Custom')]
         return [None, None, None, None]
     def inference(self, prompt, seed, start_noise, scale, model_name, pbar = gr.Progress(track_tqdm=True)):
@@ -267,10 +283,12 @@ class Demo:
         name = os.path.basename(model_path)
         rank = 4
         alpha = 1
-        if 'rank4' in model_path:
-            rank = 4
-        if 'rank8' in model_path:
-            rank = 8
         if 'alpha1' in model_path:
             alpha = 1.0
         network = LoRANetwork(

 StableDiffusionXLPipeline.__call__ = call
 import os
 from trainscripts.textsliders.lora import LoRANetwork, DEFAULT_TARGET_REPLACE, UNET_TARGET_REPLACE_MODULE_CONV
+from trainscripts.textsliders.demotrain import train_xl
 os.environ['CURL_CA_BUNDLE'] = ''
 model_map = {'Age' : 'models/age.pt',
         )
     def train(self, target_concept,positive_prompt, negative_prompt, rank, iterations_input, lr_input, train_method, neg_guidance, iterations, lr, pbar = gr.Progress(track_tqdm=True)):
+        randn = torch.randint(1, 10000000, (1,)).item()
+        save_name = f'{randn}_{target_concept.replace(',','').replace(' ','').replace('.','')[:10]}_{positive_prompt.replace(',','').replace(' ','').replace('.','')[:10]}'
+        save_name += f'_alpha-{1}'
+        save_name += f'_noxattn'
+        save_name += f'_rank_{rank}.pt'
+        if self.training:
+            return [gr.update(interactive=True, value='Train'), gr.update(value='Someone else is training... Try again soon'), None, gr.update()]
+        self.training = True
+        train_xl(target, postive, negative, lr, iterations, config_file, rank, device, attributes)
+        self.training = False
+        torch.cuda.empty_cache()
+        model_map['Custom Slider'] = f'models/{save_name}'
+        return [gr.update(interactive=True, value='Train'), gr.update(value='Done Training! \n Try your custom slider in the "Test" tab'), save_path, gr.Dropdown.update(choices=list(model_map.keys()), value='Custom Slider')]
 #         if train_method == 'ESD-x':
 #             modules = ".*attn2$"
 #             modules = ".*attn1$"
 #             frozen = []
+#
 #         save_path = f"models/{randn}_{prompt.lower().replace(' ', '')}.pt"
 #         model_map['Custom'] = save_path
+#
         return [None, None, None, None]
     def inference(self, prompt, seed, start_noise, scale, model_name, pbar = gr.Progress(track_tqdm=True)):
         name = os.path.basename(model_path)
         rank = 4
         alpha = 1
+        if rank in model_path:
+            rank = int(model_path.split('_')[-1].replace('.pt',''))
+#         if 'rank4' in model_path:
+#             rank = 4
+#         if 'rank8' in model_path:
+#             rank = 8
         if 'alpha1' in model_path:
             alpha = 1.0
         network = LoRANetwork(

trainscripts/textsliders/data/config-xl.yaml CHANGED Viewed

@@ -19,7 +19,7 @@ train:
 save:
   name: "temp"
   path: "./models"
-  per_steps: 500
   precision: "bfloat16"
 logging:
   use_wandb: false

 save:
   name: "temp"
   path: "./models"
+  per_steps: 5000000
   precision: "bfloat16"
 logging:
   use_wandb: false

trainscripts/textsliders/data/prompts-xl.yaml CHANGED Viewed

@@ -1,3 +1,12 @@
 ####################################################################################################### AGE SLIDER
 # - target: "male person" # what word for erasing the positive concept from
 #   positive: "male person, very old" # concept to erase
@@ -257,24 +266,24 @@
 #   dynamic_resolution: false
 #   batch_size: 1
 ####################################################################################################### SCULPTURE SLIDER
-- target: "male person" # what word for erasing the positive concept from
-  positive: "male person, cement sculpture, cement greek statue style"  # concept to erase
-  unconditional: "male person, realistic, hyper realistic" # word to take the difference from the positive concept
-  neutral: "male person" # starting point for conditioning the target
-  action: "enhance" # erase or enhance
-  guidance_scale: 4
-  resolution: 512
-  dynamic_resolution: false
-  batch_size: 1
-- target: "female person" # what word for erasing the positive concept from
-  positive: "female person, cement sculpture, cement greek statue style"  # concept to erase
-  unconditional: "female person, realistic, hyper realistic" # word to take the difference from the positive concept
-  neutral: "female person" # starting point for conditioning the target
-  action: "enhance" # erase or enhance
-  guidance_scale: 4
-  resolution: 512
-  dynamic_resolution: false
-  batch_size: 1
 ####################################################################################################### METAL SLIDER
 # - target: "" # what word for erasing the positive concept from
 #   positive: "made out of metal, metallic style, iron, copper, platinum metal,"  # concept to erase

+- target: "" # what word for erasing the positive concept from
+  positive: ""  # concept to erase
+  unconditional: "" # word to take the difference from the positive concept
+  neutral: "" # starting point for conditioning the target
+  action: "enhance" # erase or enhance
+  guidance_scale: 4
+  resolution: 512
+  dynamic_resolution: false
+  batch_size: 1
 ####################################################################################################### AGE SLIDER
 # - target: "male person" # what word for erasing the positive concept from
 #   positive: "male person, very old" # concept to erase
 #   dynamic_resolution: false
 #   batch_size: 1
 ####################################################################################################### SCULPTURE SLIDER
+# - target: "male person" # what word for erasing the positive concept from
+#   positive: "male person, cement sculpture, cement greek statue style"  # concept to erase
+#   unconditional: "male person, realistic, hyper realistic" # word to take the difference from the positive concept
+#   neutral: "male person" # starting point for conditioning the target
+#   action: "enhance" # erase or enhance
+#   guidance_scale: 4
+#   resolution: 512
+#   dynamic_resolution: false
+#   batch_size: 1
+# - target: "female person" # what word for erasing the positive concept from
+#   positive: "female person, cement sculpture, cement greek statue style"  # concept to erase
+#   unconditional: "female person, realistic, hyper realistic" # word to take the difference from the positive concept
+#   neutral: "female person" # starting point for conditioning the target
+#   action: "enhance" # erase or enhance
+#   guidance_scale: 4
+#   resolution: 512
+#   dynamic_resolution: false
+#   batch_size: 1
 ####################################################################################################### METAL SLIDER
 # - target: "" # what word for erasing the positive concept from
 #   positive: "made out of metal, metallic style, iron, copper, platinum metal,"  # concept to erase

trainscripts/textsliders/demotrain.py ADDED Viewed

	@@ -0,0 +1,434 @@

+# ref:
+# - https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py#L566
+# - https://huggingface.co/spaces/baulab/Erasing-Concepts-In-Diffusion/blob/main/train.py
+from typing import List, Optional
+import argparse
+import ast
+from pathlib import Path
+import gc
+import torch
+from tqdm import tqdm
+from lora import LoRANetwork, DEFAULT_TARGET_REPLACE, UNET_TARGET_REPLACE_MODULE_CONV
+import train_util
+import model_util
+import prompt_util
+from prompt_util import (
+    PromptEmbedsCache,
+    PromptEmbedsPair,
+    PromptSettings,
+    PromptEmbedsXL,
+)
+import debug_util
+import config_util
+from config_util import RootConfig
+import wandb
+NUM_IMAGES_PER_PROMPT = 1
+def flush():
+    torch.cuda.empty_cache()
+    gc.collect()
+def train(
+    config: RootConfig,
+    prompts: list[PromptSettings],
+    device,
+):
+    metadata = {
+        "prompts": ",".join([prompt.json() for prompt in prompts]),
+        "config": config.json(),
+    }
+    save_path = Path(config.save.path)
+    modules = DEFAULT_TARGET_REPLACE
+    if config.network.type == "c3lier":
+        modules += UNET_TARGET_REPLACE_MODULE_CONV
+    if config.logging.verbose:
+        print(metadata)
+    if config.logging.use_wandb:
+        wandb.init(project=f"LECO_{config.save.name}", config=metadata)
+    weight_dtype = config_util.parse_precision(config.train.precision)
+    save_weight_dtype = config_util.parse_precision(config.train.precision)
+    (
+        tokenizers,
+        text_encoders,
+        unet,
+        noise_scheduler,
+    ) = model_util.load_models_xl(
+        config.pretrained_model.name_or_path,
+        scheduler_name=config.train.noise_scheduler,
+    )
+    for text_encoder in text_encoders:
+        text_encoder.to(device, dtype=weight_dtype)
+        text_encoder.requires_grad_(False)
+        text_encoder.eval()
+    unet.to(device, dtype=weight_dtype)
+    if config.other.use_xformers:
+        unet.enable_xformers_memory_efficient_attention()
+    unet.requires_grad_(False)
+    unet.eval()
+    network = LoRANetwork(
+        unet,
+        rank=config.network.rank,
+        multiplier=1.0,
+        alpha=config.network.alpha,
+        train_method=config.network.training_method,
+    ).to(device, dtype=weight_dtype)
+    optimizer_module = train_util.get_optimizer(config.train.optimizer)
+    #optimizer_args
+    optimizer_kwargs = {}
+    if config.train.optimizer_args is not None and len(config.train.optimizer_args) > 0:
+        for arg in config.train.optimizer_args.split(" "):
+            key, value = arg.split("=")
+            value = ast.literal_eval(value)
+            optimizer_kwargs[key] = value
+    optimizer = optimizer_module(network.prepare_optimizer_params(), lr=config.train.lr, **optimizer_kwargs)
+    lr_scheduler = train_util.get_lr_scheduler(
+        config.train.lr_scheduler,
+        optimizer,
+        max_iterations=config.train.iterations,
+        lr_min=config.train.lr / 100,
+    )
+    criteria = torch.nn.MSELoss()
+    print("Prompts")
+    for settings in prompts:
+        print(settings)
+    # debug
+    debug_util.check_requires_grad(network)
+    debug_util.check_training_mode(network)
+    cache = PromptEmbedsCache()
+    prompt_pairs: list[PromptEmbedsPair] = []
+    with torch.no_grad():
+        for settings in prompts:
+            print(settings)
+            for prompt in [
+                settings.target,
+                settings.positive,
+                settings.neutral,
+                settings.unconditional,
+            ]:
+                if cache[prompt] == None:
+                    tex_embs, pool_embs = train_util.encode_prompts_xl(
+                            tokenizers,
+                            text_encoders,
+                            [prompt],
+                            num_images_per_prompt=NUM_IMAGES_PER_PROMPT,
+                        )
+                    cache[prompt] = PromptEmbedsXL(
+                        tex_embs,
+                        pool_embs
+                    )
+            prompt_pairs.append(
+                PromptEmbedsPair(
+                    criteria,
+                    cache[settings.target],
+                    cache[settings.positive],
+                    cache[settings.unconditional],
+                    cache[settings.neutral],
+                    settings,
+                )
+            )
+    for tokenizer, text_encoder in zip(tokenizers, text_encoders):
+        del tokenizer, text_encoder
+    flush()
+    pbar = tqdm(range(config.train.iterations))
+    loss = None
+    for i in pbar:
+        with torch.no_grad():
+            noise_scheduler.set_timesteps(
+                config.train.max_denoising_steps, device=device
+            )
+            optimizer.zero_grad()
+            prompt_pair: PromptEmbedsPair = prompt_pairs[
+                torch.randint(0, len(prompt_pairs), (1,)).item()
+            ]
+            # 1 ~ 49 からランダム
+            timesteps_to = torch.randint(
+                1, config.train.max_denoising_steps, (1,)
+            ).item()
+            height, width = prompt_pair.resolution, prompt_pair.resolution
+            if prompt_pair.dynamic_resolution:
+                height, width = train_util.get_random_resolution_in_bucket(
+                    prompt_pair.resolution
+                )
+            if config.logging.verbose:
+                print("gudance_scale:", prompt_pair.guidance_scale)
+                print("resolution:", prompt_pair.resolution)
+                print("dynamic_resolution:", prompt_pair.dynamic_resolution)
+                if prompt_pair.dynamic_resolution:
+                    print("bucketed resolution:", (height, width))
+                print("batch_size:", prompt_pair.batch_size)
+                print("dynamic_crops:", prompt_pair.dynamic_crops)
+            latents = train_util.get_initial_latents(
+                noise_scheduler, prompt_pair.batch_size, height, width, 1
+            ).to(device, dtype=weight_dtype)
+            add_time_ids = train_util.get_add_time_ids(
+                height,
+                width,
+                dynamic_crops=prompt_pair.dynamic_crops,
+                dtype=weight_dtype,
+            ).to(device, dtype=weight_dtype)
+            with network:
+                # ちょっとデノイズされれたものが返る
+                denoised_latents = train_util.diffusion_xl(
+                    unet,
+                    noise_scheduler,
+                    latents,  # 単純なノイズのlatentsを渡す
+                    text_embeddings=train_util.concat_embeddings(
+                        prompt_pair.unconditional.text_embeds,
+                        prompt_pair.target.text_embeds,
+                        prompt_pair.batch_size,
+                    ),
+                    add_text_embeddings=train_util.concat_embeddings(
+                        prompt_pair.unconditional.pooled_embeds,
+                        prompt_pair.target.pooled_embeds,
+                        prompt_pair.batch_size,
+                    ),
+                    add_time_ids=train_util.concat_embeddings(
+                        add_time_ids, add_time_ids, prompt_pair.batch_size
+                    ),
+                    start_timesteps=0,
+                    total_timesteps=timesteps_to,
+                    guidance_scale=3,
+                )
+            noise_scheduler.set_timesteps(1000)
+            current_timestep = noise_scheduler.timesteps[
+                int(timesteps_to * 1000 / config.train.max_denoising_steps)
+            ]
+            # with network: の外では空のLoRAのみが有効になる
+            positive_latents = train_util.predict_noise_xl(
+                unet,
+                noise_scheduler,
+                current_timestep,
+                denoised_latents,
+                text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.text_embeds,
+                    prompt_pair.positive.text_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.pooled_embeds,
+                    prompt_pair.positive.pooled_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_time_ids=train_util.concat_embeddings(
+                    add_time_ids, add_time_ids, prompt_pair.batch_size
+                ),
+                guidance_scale=1,
+            ).to(device, dtype=weight_dtype)
+            neutral_latents = train_util.predict_noise_xl(
+                unet,
+                noise_scheduler,
+                current_timestep,
+                denoised_latents,
+                text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.text_embeds,
+                    prompt_pair.neutral.text_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.pooled_embeds,
+                    prompt_pair.neutral.pooled_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_time_ids=train_util.concat_embeddings(
+                    add_time_ids, add_time_ids, prompt_pair.batch_size
+                ),
+                guidance_scale=1,
+            ).to(device, dtype=weight_dtype)
+            unconditional_latents = train_util.predict_noise_xl(
+                unet,
+                noise_scheduler,
+                current_timestep,
+                denoised_latents,
+                text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.text_embeds,
+                    prompt_pair.unconditional.text_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.pooled_embeds,
+                    prompt_pair.unconditional.pooled_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_time_ids=train_util.concat_embeddings(
+                    add_time_ids, add_time_ids, prompt_pair.batch_size
+                ),
+                guidance_scale=1,
+            ).to(device, dtype=weight_dtype)
+            if config.logging.verbose:
+                print("positive_latents:", positive_latents[0, 0, :5, :5])
+                print("neutral_latents:", neutral_latents[0, 0, :5, :5])
+                print("unconditional_latents:", unconditional_latents[0, 0, :5, :5])
+        with network:
+            target_latents = train_util.predict_noise_xl(
+                unet,
+                noise_scheduler,
+                current_timestep,
+                denoised_latents,
+                text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.text_embeds,
+                    prompt_pair.target.text_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_text_embeddings=train_util.concat_embeddings(
+                    prompt_pair.unconditional.pooled_embeds,
+                    prompt_pair.target.pooled_embeds,
+                    prompt_pair.batch_size,
+                ),
+                add_time_ids=train_util.concat_embeddings(
+                    add_time_ids, add_time_ids, prompt_pair.batch_size
+                ),
+                guidance_scale=1,
+            ).to(device, dtype=weight_dtype)
+            if config.logging.verbose:
+                print("target_latents:", target_latents[0, 0, :5, :5])
+        positive_latents.requires_grad = False
+        neutral_latents.requires_grad = False
+        unconditional_latents.requires_grad = False
+        loss = prompt_pair.loss(
+            target_latents=target_latents,
+            positive_latents=positive_latents,
+            neutral_latents=neutral_latents,
+            unconditional_latents=unconditional_latents,
+        )
+        # 1000倍しないとずっと0.000...になってしまって見た目的に面白くない
+        pbar.set_description(f"Loss*1k: {loss.item()*1000:.4f}")
+        if config.logging.use_wandb:
+            wandb.log(
+                {"loss": loss, "iteration": i, "lr": lr_scheduler.get_last_lr()[0]}
+            )
+        loss.backward()
+        optimizer.step()
+        lr_scheduler.step()
+        del (
+            positive_latents,
+            neutral_latents,
+            unconditional_latents,
+            target_latents,
+            latents,
+        )
+        flush()
+#         if (
+#             i % config.save.per_steps == 0
+#             and i != 0
+#             and i != config.train.iterations - 1
+#         ):
+#             print("Saving...")
+#             save_path.mkdir(parents=True, exist_ok=True)
+#             network.save_weights(
+#                 save_path / f"{config.save.name}_{i}steps.pt",
+#                 dtype=save_weight_dtype,
+#             )
+    print("Saving...")
+    save_path.mkdir(parents=True, exist_ok=True)
+    network.save_weights(
+        save_path / f"{config.save.name}",
+        dtype=save_weight_dtype,
+    )
+    del (
+        unet,
+        noise_scheduler,
+        loss,
+        optimizer,
+        network,
+    )
+    flush()
+    print("Done.")
+# def main(args):
+#     config_file = args.config_file
+#     config = config_util.load_config_from_yaml(config_file)
+#     if args.name is not None:
+#         config.save.name = args.name
+#     attributes = []
+#     if args.attributes is not None:
+#         attributes = args.attributes.split(',')
+#         attributes = [a.strip() for a in attributes]
+#     config.network.alpha = args.alpha
+#     config.network.rank = args.rank
+#     config.save.name += f'_alpha{args.alpha}'
+#     config.save.name += f'_rank{config.network.rank }'
+#     config.save.name += f'_{config.network.training_method}'
+#     config.save.path += f'/{config.save.name}'
+#     prompts = prompt_util.load_prompts_from_yaml(config.prompts_file, attributes)
+#     device = torch.device(f"cuda:{args.device}")
+#     train(config, prompts, device)
+def train_xl(target, postive, negative, lr, iterations, config_file, rank, device, attributes,save_name):
+    config = config_util.load_config_from_yaml(config_file)
+    randn = torch.randint(1, 10000000, (1,)).item()
+    config.save.name = save_name
+    config.train.lr = float(lr)
+    config.train.iterations=int(iterations)
+    if attributes is not None:
+        attributes = attributes.split(',')
+        attributes = [a.strip() for a in attributes]
+    config.network.alpha = 1.0
+    config.network.rank = rank
+    config.save.path += f'/{config.save.name}'
+    prompts = prompt_util.load_prompts_from_yaml(path=config.prompts_file, target=target, positive=positive, negative=negative,  attributes=attributes)
+    device = torch.device(f"cuda:{device}")
+    train(config, prompts, device)

trainscripts/textsliders/prompt_util.py CHANGED Viewed

@@ -148,9 +148,18 @@ class PromptEmbedsPair:
             raise ValueError("action must be erase or enhance")
-def load_prompts_from_yaml(path, attributes = []):
     with open(path, "r") as f:
         prompts = yaml.safe_load(f)
     print(prompts)
     if len(prompts) == 0:
         raise ValueError("prompts file is empty")

             raise ValueError("action must be erase or enhance")
+def load_prompts_from_yaml(path, target, positive, negative, attributes = []):
     with open(path, "r") as f:
         prompts = yaml.safe_load(f)
+    new = []
+    for prompt in prompts:
+        copy_ = copy.deepcopy(prompt)
+        copy_['target'] = target
+        copy_['positive'] = positive
+        copy_['neutral'] = target
+        copy_['unconditional'] = negative
+        new.append(copy_)
+    prompts = new
     print(prompts)
     if len(prompts) == 0:
         raise ValueError("prompts file is empty")