Spaces:

czl
/

generative-data-augmentation-demo

Running on Zero

App Files Files Community

czl commited on Jun 11

Commit

8ffeacd

•

1 Parent(s): f288d63

test img2img

Browse files

Files changed (3) hide show

app.py +79 -46
requirements.txt +9 -2
tools/synth.py +935 -0

app.py CHANGED Viewed

@@ -1,49 +1,72 @@
 import gradio as gr
 import numpy as np
-import random
-from diffusers import DiffusionPipeline
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
 if torch.cuda.is_available():
     torch.cuda.max_memory_allocated(device=device)
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
-    pipe.enable_xformers_memory_efficient_attention()
-    pipe = pipe.to(device)
-else:
-    pipe = DiffusionPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True)
-    pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
     return image
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
     "An astronaut riding a green horse",
     "A delicious ceviche cheesecake slice",
 ]
-css="""
 #col-container {
     margin: 0 auto;
     max-width: 520px;
@@ -56,15 +79,17 @@ else:
     power_device = "CPU"
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
         # Text-to-Image Gradio Template
         Currently running on {power_device}.
-        """)
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
@@ -72,20 +97,21 @@ with gr.Blocks(css=css) as demo:
                 placeholder="Enter your prompt",
                 container=False,
             )
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
                 visible=False,
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
@@ -93,11 +119,11 @@ with gr.Blocks(css=css) as demo:
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
@@ -105,7 +131,7 @@ with gr.Blocks(css=css) as demo:
                     step=32,
                     value=512,
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
@@ -113,9 +139,9 @@ with gr.Blocks(css=css) as demo:
                     step=32,
                     value=512,
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
@@ -123,7 +149,7 @@ with gr.Blocks(css=css) as demo:
                     step=0.1,
                     value=0.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
@@ -131,16 +157,23 @@ with gr.Blocks(css=css) as demo:
                     step=1,
                     value=2,
                 )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
     run_button.click(
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result]
     )
-demo.queue().launch()

+import random
 import gradio as gr
 import numpy as np
 import torch
+from tools import synth
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_path = "runwayml/stable-diffusion-v1-5"
 if torch.cuda.is_available():
     torch.cuda.max_memory_allocated(device=device)
+    pipe = synth.pipe_img(
+        model_path=model_path,
+        device=device,
+        use_torchcompile=False,
+        use_safetensors=True,
+    )
+else:
+    pipe = synth.pipe_img(
+        model_path=model_path,
+        device=device,
+        use_torchcompile=False,
+        use_safetensors=True,
+    )
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+def infer(
+    input_image,
+    prompt,
+    negative_prompt,
+    seed,
+    randomize_seed,
+    width,
+    height,
+    guidance_scale,
+    num_inference_steps,
+):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     generator = torch.Generator().manual_seed(seed)
     image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_inference_steps,
+        width=width,
+        height=height,
+        generator=generator,
+        image=input_image,
+    ).images[0]
     return image
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
     "An astronaut riding a green horse",
     "A delicious ceviche cheesecake slice",
 ]
+css = """
 #col-container {
     margin: 0 auto;
     max-width: 520px;
     power_device = "CPU"
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(
+            f"""
         # Text-to-Image Gradio Template
         Currently running on {power_device}.
+        """
+        )
         with gr.Row():
             prompt = gr.Text(
                 label="Prompt",
                 show_label=False,
                 placeholder="Enter your prompt",
                 container=False,
             )
+            input_image = gr.Image(type="pil", label="Input Image")
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Text(
                 label="Negative prompt",
                 max_lines=1,
                 placeholder="Enter a negative prompt",
                 visible=False,
             )
             seed = gr.Slider(
                 label="Seed",
                 minimum=0,
                 step=1,
                 value=0,
             )
             randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
             with gr.Row():
                 width = gr.Slider(
                     label="Width",
                     minimum=256,
                     step=32,
                     value=512,
                 )
                 height = gr.Slider(
                     label="Height",
                     minimum=256,
                     step=32,
                     value=512,
                 )
             with gr.Row():
                 guidance_scale = gr.Slider(
                     label="Guidance scale",
                     minimum=0.0,
                     step=0.1,
                     value=0.0,
                 )
                 num_inference_steps = gr.Slider(
                     label="Number of inference steps",
                     minimum=1,
                     step=1,
                     value=2,
                 )
+        gr.Examples(examples=examples, inputs=[prompt])
     run_button.click(
+        fn=infer,
+        inputs=[
+            input_image,
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
+            width,
+            height,
+            guidance_scale,
+            num_inference_steps,
+        ],
+        outputs=[result],
     )
+demo.queue().launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,13 @@
 accelerate
 diffusers
 invisible_watermark
-torch
 transformers
-xformers

 accelerate
 diffusers
 invisible_watermark
+torch==2.1.2
+torchaudio==2.1.2
+torchvision==0.16.2
 transformers
+xformers==0.0.23.post1
+DeepCache
+pandas
+numpy
+torchmetrics[image]
+gradio

tools/synth.py ADDED Viewed

	@@ -0,0 +1,935 @@

+"""
+Helper scripts for generating synthetic images using diffusion model.
+Functions:
+    - get_top_misclassified
+    - get_class_list
+    - generateClassPairs
+    - outputDirectory
+    - pipe_img
+    - createPrompts
+    - interpolatePrompts
+        - slerp
+        - get_middle_elements
+        - remove_middle
+    - genClassImg
+    - getMetadata
+    - groupbyInterpolation
+    - ungroupInterpolation
+    - groupAllbyInterpolation
+    - getPairIndices
+    - generateImagesFromDataset
+    - generateTrace
+"""
+import json
+import os
+import numpy as np
+import pandas as pd
+import torch
+from DeepCache import DeepCacheSDHelper
+from diffusers import (
+    LMSDiscreteScheduler,
+    StableDiffusionImg2ImgPipeline,
+)
+from torch import nn
+from torchmetrics.functional.image import structural_similarity_index_measure as ssim
+from torchvision import transforms
+def get_top_misclassified(val_classifier_json):
+    """
+    Retrieves the top misclassified classes from a validation classifier JSON file.
+    Args:
+        val_classifier_json (str): The path to the validation classifier JSON file.
+    Returns:
+        dict: A dictionary containing the top misclassified classes, where the keys are the class names
+              and the values are the number of misclassifications.
+    """
+    with open(val_classifier_json) as f:
+        val_output = json.load(f)
+    val_metrics_df = pd.DataFrame.from_dict(
+        val_output["val_metrics_details"], orient="index"
+    )
+    class_dict = dict()
+    for k, v in val_metrics_df["top_n_classes"].items():
+        class_dict[k] = v
+    return class_dict
+def get_class_list(val_classifier_json):
+    """
+    Retrieves the list of classes from the given validation classifier JSON file.
+    Args:
+        val_classifier_json (str): The path to the validation classifier JSON file.
+    Returns:
+        list: A sorted list of class names extracted from the JSON file.
+    """
+    with open(val_classifier_json, "r") as f:
+        data = json.load(f)
+    return sorted(list(data["val_metrics_details"].keys()))
+def generateClassPairs(val_classifier_json):
+    """
+    Generate pairs of misclassified classes from the given validation classifier JSON.
+    Args:
+        val_classifier_json (str): The path to the validation classifier JSON file.
+    Returns:
+        list: A sorted list of pairs of misclassified classes.
+    """
+    pairs = set()
+    misclassified_classes = get_top_misclassified(val_classifier_json)
+    for key, value in misclassified_classes.items():
+        for v in value:
+            pairs.add(tuple(sorted([key, v])))
+    return sorted(list(pairs))
+def outputDirectory(class_pairs, synth_path, metadata_path):
+    """
+    Creates the output directory structure for the synthesized data.
+    Args:
+        class_pairs (list): A list of class pairs.
+        synth_path (str): The path to the directory where the synthesized data will be stored.
+        metadata_path (str): The path to the directory where the metadata will be stored.
+    Returns:
+        None
+    """
+    for id in class_pairs:
+        class_folder = f"{synth_path}/{id}"
+        if not (os.path.exists(class_folder)):
+            os.makedirs(class_folder)
+    if not (os.path.exists(metadata_path)):
+        os.makedirs(metadata_path)
+    print("Info: Output directory ready.")
+def pipe_img(
+    model_path,
+    device="cuda",
+    apply_optimization=True,
+    use_torchcompile=False,
+    ci_cb=(5, 1),
+    use_safetensors=None,
+    cpu_offload=False,
+    scheduler=None,
+):
+    """
+    Creates and returns an image-to-image pipeline for stable diffusion.
+    Args:
+        model_path (str): The path to the pretrained model.
+        device (str, optional): The device to use for computation. Defaults to "cuda".
+        apply_optimization (bool, optional): Whether to apply optimization techniques. Defaults to True.
+        use_torchcompile (bool, optional): Whether to use torchcompile for model compilation. Defaults to False.
+        ci_cb (tuple, optional): A tuple containing the cache interval and cache branch ID. Defaults to (5, 1).
+        use_safetensors (bool, optional): Whether to use safetensors. Defaults to None.
+        cpu_offload (bool, optional): Whether to enable CPU offloading. Defaults to False.
+        scheduler (LMSDiscreteScheduler, optional): The scheduler for the pipeline. Defaults to None.
+    Returns:
+        StableDiffusionImg2ImgPipeline: The image-to-image pipeline for stable diffusion.
+    """
+    ###############################
+    # Reference:
+    # Akimov, R. (2024) Images Interpolation with Stable Diffusion - Hugging Face Open-Source AI Cookbook. Available at: https://huggingface.co/learn/cookbook/en/stable_diffusion_interpolation (Accessed: 4 June 2024).
+    ###############################
+    if scheduler is None:
+        scheduler = LMSDiscreteScheduler(
+            beta_start=0.00085,
+            beta_end=0.012,
+            beta_schedule="scaled_linear",
+            num_train_timesteps=1000,
+            steps_offset=1,
+        )
+    pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
+        model_path,
+        scheduler=scheduler,
+        torch_dtype=torch.float32,
+        use_safetensors=use_safetensors,
+        safety_checker=None,
+    ).to(device)
+    if cpu_offload:
+        pipe.enable_model_cpu_offload()
+    if apply_optimization:
+        # tomesd.apply_patch(pipe, ratio=0.5)
+        helper = DeepCacheSDHelper(pipe=pipe)
+        cache_interval, cache_branch_id = ci_cb
+        helper.set_params(
+            cache_interval=cache_interval, cache_branch_id=cache_branch_id
+        )  # lower is faster but lower quality
+        helper.enable()
+        pipe.enable_xformers_memory_efficient_attention()
+        if use_torchcompile:
+            pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+    return pipe
+def createPrompts(
+    class_name_pairs,
+    prompt_structure=None,
+    use_default_negative_prompt=False,
+    negative_prompt=None,
+):
+    """
+    Create prompts for image generation.
+    Args:
+        class_name_pairs (list): A list of two class names.
+        prompt_structure (str, optional): The structure of the prompt. Defaults to "a photo of a <class_name>".
+        use_default_negative_prompt (bool, optional): Whether to use the default negative prompt. Defaults to False.
+        negative_prompt (str, optional): The negative prompt to steer the generation away from certain features.
+    Returns:
+        tuple: A tuple containing two lists - prompts and negative_prompts.
+            prompts (list): Text prompts that describe the desired output image.
+            negative_prompts (list): Negative prompts that can be used to steer the generation away from certain features.
+    """
+    if prompt_structure is None:
+        prompt_structure = "a photo of a <class_name>"
+    elif "<class_name>" not in prompt_structure:
+        raise ValueError(
+            "The prompt structure must contain the <class_name> placeholder."
+        )
+    if use_default_negative_prompt:
+        default_negative_prompt = (
+            "blurry image, disfigured, deformed, distorted, cartoon, drawings"
+        )
+        negative_prompt = default_negative_prompt
+    class1 = class_name_pairs[0]
+    class2 = class_name_pairs[1]
+    prompt1 = prompt_structure.replace("<class_name>", class1)
+    prompt2 = prompt_structure.replace("<class_name>", class2)
+    prompts = [prompt1, prompt2]
+    if negative_prompt is None:
+        print("Info: Negative prompt not provided, returning as None.")
+        return prompts, None
+    else:
+        # Negative prompts that can be used to steer the generation away from certain features.
+        negative_prompts = [negative_prompt] * len(prompts)
+        return prompts, negative_prompts
+def interpolatePrompts(
+    prompts,
+    pipeline,
+    num_interpolation_steps,
+    sample_mid_interpolation,
+    remove_n_middle=0,
+    device="cuda",
+):
+    """
+    Interpolates prompts by generating intermediate embeddings between pairs of prompts.
+    Args:
+        prompts (List[str]): A list of prompts to be interpolated.
+        pipeline: The pipeline object containing the tokenizer and text encoder.
+        num_interpolation_steps (int): The number of interpolation steps between each pair of prompts.
+        sample_mid_interpolation (int): The number of intermediate embeddings to sample from the middle of the interpolated prompts.
+        remove_n_middle (int, optional): The number of middle embeddings to remove from the interpolated prompts. Defaults to 0.
+        device (str, optional): The device to run the interpolation on. Defaults to "cuda".
+    Returns:
+        interpolated_prompt_embeds (torch.Tensor): The interpolated prompt embeddings.
+        prompt_metadata (dict): Metadata about the interpolation process, including similarity scores and nearest class information.
+    e.g. if num_interpolation_steps = 10, sample_mid_interpolation = 6, remove_n_middle = 2
+    Interpolated: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+    Sampled:            [2, 3, 4, 5, 6, 7]
+    Removed:                   x  x
+    Returns:            [2, 3,       6, 7]
+    """
+    ###############################
+    # Reference:
+    # Akimov, R. (2024) Images Interpolation with Stable Diffusion - Hugging Face Open-Source AI Cookbook. Available at: https://huggingface.co/learn/cookbook/en/stable_diffusion_interpolation (Accessed: 4 June 2024).
+    ###############################
+    def slerp(v0, v1, num, t0=0, t1=1):
+        """
+        Performs spherical linear interpolation between two vectors.
+        Args:
+            v0 (torch.Tensor): The starting vector.
+            v1 (torch.Tensor): The ending vector.
+            num (int): The number of interpolation points.
+            t0 (float, optional): The starting time. Defaults to 0.
+            t1 (float, optional): The ending time. Defaults to 1.
+        Returns:
+            torch.Tensor: The interpolated vectors.
+        """
+        ###############################
+        # Reference:
+        # Karpathy, A. (2022) hacky stablediffusion code for generating videos, Gist. Available at: https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355 (Accessed: 4 June 2024).
+        ###############################
+        v0 = v0.detach().cpu().numpy()
+        v1 = v1.detach().cpu().numpy()
+        def interpolation(t, v0, v1, DOT_THRESHOLD=0.9995):
+            """helper function to spherically interpolate two arrays v1 v2"""
+            dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
+            if np.abs(dot) > DOT_THRESHOLD:
+                v2 = (1 - t) * v0 + t * v1
+            else:
+                theta_0 = np.arccos(dot)
+                sin_theta_0 = np.sin(theta_0)
+                theta_t = theta_0 * t
+                sin_theta_t = np.sin(theta_t)
+                s0 = np.sin(theta_0 - theta_t) / sin_theta_0
+                s1 = sin_theta_t / sin_theta_0
+                v2 = s0 * v0 + s1 * v1
+            return v2
+        t = np.linspace(t0, t1, num)
+        v3 = torch.tensor(np.array([interpolation(t[i], v0, v1) for i in range(num)]))
+        return v3
+    def get_middle_elements(lst, n):
+        """
+        Returns a tuple containing a sublist of the middle elements of the given list `lst` and a range of indices of those elements.
+        Args:
+            lst (list): The list from which to extract the middle elements.
+            n (int): The number of middle elements to extract.
+        Returns:
+            tuple: A tuple containing the sublist of middle elements and a range of indices.
+        Raises:
+            None
+        Examples:
+            lst = [1, 2, 3, 4, 5]
+            get_middle_elements(lst, 3)
+            ([2, 3, 4], range(2, 5))
+        """
+        if n % 2 == 0:  # Even number of elements
+            middle_index = len(lst) // 2 - 1
+            start = middle_index - n // 2 + 1
+            end = middle_index + n // 2 + 1
+            return lst[start:end], range(start, end)
+        else:  # Odd number of elements
+            middle_index = len(lst) // 2
+            start = middle_index - n // 2
+            end = middle_index + n // 2 + 1
+            return lst[start:end], range(start, end)
+    def remove_middle(data, n):
+        """
+        Remove the middle n elements from a list.
+        Args:
+            data (list): The input list.
+            n (int): The number of elements to remove from the middle of the list.
+        Returns:
+            list: The modified list with the middle n elements removed.
+        Raises:
+            ValueError: If n is negative or greater than the length of the list.
+        """
+        if n < 0 or n > len(data):
+            raise ValueError(
+                "Invalid value for n. It should be non-negative and less than half the list length"
+            )
+        # Find the middle index
+        middle = len(data) // 2
+        # Create slices to exclude the middle n elements
+        if n == 1:
+            return data[:middle] + data[middle + 1 :]
+        elif n % 2 == 0:
+            return data[: middle - n // 2] + data[middle + n // 2 :]
+        else:
+            return data[: middle - n // 2] + data[middle + n // 2 + 1 :]
+    batch_size = len(prompts)
+    # Tokenizing and encoding prompts into embeddings.
+    prompts_tokens = pipeline.tokenizer(
+        prompts,
+        padding="max_length",
+        max_length=pipeline.tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    )
+    prompts_embeds = pipeline.text_encoder(prompts_tokens.input_ids.to(device))[0]
+    # Interpolating between embeddings pairs for the given number of interpolation steps.
+    interpolated_prompt_embeds = []
+    for i in range(batch_size - 1):
+        interpolated_prompt_embeds.append(
+            slerp(prompts_embeds[i], prompts_embeds[i + 1], num_interpolation_steps)
+        )
+    full_interpolated_prompt_embeds = interpolated_prompt_embeds[:]
+    interpolated_prompt_embeds[0], sample_range = get_middle_elements(
+        interpolated_prompt_embeds[0], sample_mid_interpolation
+    )
+    if remove_n_middle > 0:
+        interpolated_prompt_embeds[0] = remove_middle(
+            interpolated_prompt_embeds[0], remove_n_middle
+        )
+    prompt_metadata = dict()
+    similarity = nn.CosineSimilarity(dim=-1, eps=1e-6)
+    for i in range(num_interpolation_steps):
+        class1_sim = (
+            similarity(
+                full_interpolated_prompt_embeds[0][0],
+                full_interpolated_prompt_embeds[0][i],
+            )
+            .mean()
+            .item()
+        )
+        class2_sim = (
+            similarity(
+                full_interpolated_prompt_embeds[0][num_interpolation_steps - 1],
+                full_interpolated_prompt_embeds[0][i],
+            )
+            .mean()
+            .item()
+        )
+        relative_distance = class1_sim / (class1_sim + class2_sim)
+        prompt_metadata[i] = {
+            "selected": i in sample_range,
+            "similarity": {
+                "class1": class1_sim,
+                "class2": class2_sim,
+                "class1_relative_distance": relative_distance,
+                "class2_relative_distance": 1 - relative_distance,
+            },
+            "nearest_class": int(relative_distance < 0.5),
+        }
+    interpolated_prompt_embeds = torch.cat(interpolated_prompt_embeds, dim=0).to(device)
+    return interpolated_prompt_embeds, prompt_metadata
+def genClassImg(
+    pipeline,
+    pos_embed,
+    neg_embed,
+    input_image,
+    generator,
+    latents,
+    num_imgs=1,
+    height=512,
+    width=512,
+    num_inference_steps=25,
+    guidance_scale=7.5,
+):
+    """
+    Generate class image using the given inputs.
+    Args:
+        pipeline: The pipeline object used for image generation.
+        pos_embed: The positive embedding for the class.
+        neg_embed: The negative embedding for the class (optional).
+        input_image: The input image for guidance (optional).
+        generator: The generator model used for image generation.
+        latents: The latent vectors used for image generation.
+        num_imgs: The number of images to generate (default is 1).
+        height: The height of the generated images (default is 512).
+        width: The width of the generated images (default is 512).
+        num_inference_steps: The number of inference steps for image generation (default is 25).
+        guidance_scale: The scale factor for guidance (default is 7.5).
+    Returns:
+        The generated class image.
+    """
+    if neg_embed is not None:
+        npe = neg_embed[None, ...]
+    else:
+        npe = None
+    return pipeline(
+        height=height,
+        width=width,
+        num_images_per_prompt=num_imgs,
+        prompt_embeds=pos_embed[None, ...],
+        negative_prompt_embeds=npe,
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale,
+        generator=generator,
+        latents=latents,
+        image=input_image,
+    ).images[0]
+def getMetadata(
+    class_pairs,
+    path,
+    seed,
+    guidance_scale,
+    num_inference_steps,
+    num_interpolation_steps,
+    sample_mid_interpolation,
+    height,
+    width,
+    prompts,
+    negative_prompts,
+    pipeline,
+    prompt_metadata,
+    negative_prompt_metadata,
+    ssim_metadata=None,
+    save_json=True,
+    save_path=".",
+):
+    """
+    Generate metadata for the given parameters.
+    Args:
+        class_pairs (list): List of class pairs.
+        path (str): Path to the data.
+        seed (int): Seed value for randomization.
+        guidance_scale (float): Scale factor for guidance.
+        num_inference_steps (int): Number of inference steps.
+        num_interpolation_steps (int): Number of interpolation steps.
+        sample_mid_interpolation (bool): Flag to sample mid-interpolation.
+        height (int): Height of the image.
+        width (int): Width of the image.
+        prompts (list): List of prompts.
+        negative_prompts (list): List of negative prompts.
+        pipeline (object): Pipeline object.
+        prompt_metadata (dict): Metadata for prompts.
+        negative_prompt_metadata (dict): Metadata for negative prompts.
+        ssim_metadata (dict, optional): SSIM scores metadata. Defaults to None.
+        save_json (bool, optional): Flag to save metadata as JSON. Defaults to True.
+        save_path (str, optional): Path to save the JSON file. Defaults to ".".
+    Returns:
+        dict: Generated metadata.
+    """
+    metadata = dict()
+    metadata["class_pairs"] = class_pairs
+    metadata["path"] = path
+    metadata["seed"] = seed
+    metadata["params"] = {
+        "CFG": guidance_scale,
+        "inferenceSteps": num_inference_steps,
+        "interpolationSteps": num_interpolation_steps,
+        "sampleMidInterpolation": sample_mid_interpolation,
+        "height": height,
+        "width": width,
+    }
+    for i in range(len(prompts)):
+        metadata[f"prompt_text_{i}"] = prompts[i]
+        if negative_prompts is not None:
+            metadata[f"negative_prompt_text_{i}"] = negative_prompts[i]
+    metadata["pipe_config"] = dict(pipeline.config)
+    metadata["prompt_embed_similarity"] = prompt_metadata
+    metadata["negative_prompt_embed_similarity"] = negative_prompt_metadata
+    if ssim_metadata is not None:
+        print("Info: SSIM scores are available.")
+        metadata["ssim_scores"] = ssim_metadata
+    if save_json:
+        with open(
+            os.path.join(save_path, f"{'_'.join(i for i in class_pairs)}_{seed}.json"),
+            "w",
+        ) as f:
+            json.dump(metadata, f, indent=4)
+    return metadata
+def groupbyInterpolation(dir_to_classfolder):
+    """
+    Group files in a directory by interpolation step.
+    Args:
+        dir_to_classfolder (str): The path to the directory containing the files.
+    Returns:
+        None
+    """
+    files = [
+        (f.split(sep="_")[1].split(sep=".")[0], os.path.join(dir_to_classfolder, f))
+        for f in os.listdir(dir_to_classfolder)
+    ]
+    # create a subfolder for each step of the interpolation
+    for interpolation_step, file_path in files:
+        new_dir = os.path.join(dir_to_classfolder, interpolation_step)
+        if not os.path.exists(new_dir):
+            os.makedirs(new_dir)
+        os.rename(file_path, os.path.join(new_dir, os.path.basename(file_path)))
+def ungroupInterpolation(dir_to_classfolder):
+    """
+    Moves all files from subdirectories within `dir_to_classfolder` to `dir_to_classfolder` itself,
+    and then removes the subdirectories.
+    Args:
+        dir_to_classfolder (str): The path to the directory containing the subdirectories.
+    Returns:
+        None
+    """
+    for interpolation_step in os.listdir(dir_to_classfolder):
+        if os.path.isdir(os.path.join(dir_to_classfolder, interpolation_step)):
+            for f in os.listdir(os.path.join(dir_to_classfolder, interpolation_step)):
+                os.rename(
+                    os.path.join(dir_to_classfolder, interpolation_step, f),
+                    os.path.join(dir_to_classfolder, f),
+                )
+            os.rmdir(os.path.join(dir_to_classfolder, interpolation_step))
+def groupAllbyInterpolation(
+    data_path,
+    group=True,
+    fn_group=groupbyInterpolation,
+    fn_ungroup=ungroupInterpolation,
+):
+    """
+    Group or ungroup all data classes by interpolation.
+    Args:
+        data_path (str): The path to the data.
+        group (bool, optional): Whether to group the data. Defaults to True.
+        fn_group (function, optional): The function to use for grouping. Defaults to groupbyInterpolation.
+        fn_ungroup (function, optional): The function to use for ungrouping. Defaults to ungroupInterpolation.
+    """
+    data_classes = sorted(os.listdir(data_path))
+    if group:
+        fn = fn_group
+    else:
+        fn = fn_ungroup
+    for c in data_classes:
+        c_path = os.path.join(data_path, c)
+        if os.path.isdir(c_path):
+            fn(c_path)
+            print(f"Processed {c}")
+def getPairIndices(subset_len, total_pair_count=1, seed=None):
+    """
+    Generate pairs of indices for a given subset length.
+    Args:
+        subset_len (int): The length of the subset.
+        total_pair_count (int, optional): The total number of pairs to generate. Defaults to 1.
+        seed (int, optional): The seed value for the random number generator. Defaults to None.
+    Returns:
+        list: A list of pairs of indices.
+    """
+    rng = np.random.default_rng(seed)
+    group_size = (subset_len + total_pair_count - 1) // total_pair_count
+    numbers = list(range(subset_len))
+    numbers_selection = list(range(subset_len))
+    rng.shuffle(numbers)
+    for i in range(group_size - subset_len % group_size):
+        numbers.append(numbers_selection[i])
+    numbers = np.array(numbers)
+    groups = numbers[: group_size * total_pair_count].reshape(-1, group_size)
+    return groups.tolist()
+def generateImagesFromDataset(
+    img_subsets,
+    class_iterables,
+    pipeline,
+    interpolated_prompt_embeds,
+    interpolated_negative_prompts_embeds,
+    num_inference_steps,
+    guidance_scale,
+    height=512,
+    width=512,
+    seed=None,
+    save_path=".",
+    class_pairs=("0", "1"),
+    save_image=True,
+    image_type="jpg",
+    interpolate_range="full",
+    device="cuda",
+    return_images=False,
+):
+    """
+    Generates images from a dataset using the given parameters.
+    Args:
+        img_subsets (dict): A dictionary containing image subsets for each class.
+        class_iterables (dict): A dictionary containing iterable objects for each class.
+        pipeline (object): The pipeline object used for image generation.
+        interpolated_prompt_embeds (list): A list of interpolated prompt embeddings.
+        interpolated_negative_prompts_embeds (list): A list of interpolated negative prompt embeddings.
+        num_inference_steps (int): The number of inference steps for image generation.
+        guidance_scale (float): The scale factor for guidance loss during image generation.
+        height (int, optional): The height of the generated images. Defaults to 512.
+        width (int, optional): The width of the generated images. Defaults to 512.
+        seed (int, optional): The seed value for random number generation. Defaults to None.
+        save_path (str, optional): The path to save the generated images. Defaults to ".".
+        class_pairs (tuple, optional): A tuple containing pairs of class identifiers. Defaults to ("0", "1").
+        save_image (bool, optional): Whether to save the generated images. Defaults to True.
+        image_type (str, optional): The file format of the saved images. Defaults to "jpg".
+        interpolate_range (str, optional): The range of interpolation for prompt embeddings.
+            Possible values are "full", "nearest", or "furthest". Defaults to "full".
+        device (str, optional): The device to use for image generation. Defaults to "cuda".
+        return_images (bool, optional): Whether to return the generated images. Defaults to False.
+    Returns:
+        dict or tuple: If return_images is True, returns a dictionary containing the generated images for each class and a dictionary containing the SSIM scores for each class and interpolation step.
+                       If return_images is False, returns a dictionary containing the SSIM scores for each class and interpolation step.
+    """
+    if interpolate_range == "nearest":
+        nearest_half = True
+        furthest_half = False
+    elif interpolate_range == "furthest":
+        nearest_half = False
+        furthest_half = True
+    else:
+        nearest_half = False
+        furthest_half = False
+    if seed is None:
+        seed = torch.Generator().seed()
+    generator = torch.manual_seed(seed)
+    rng = np.random.default_rng(seed)
+    # Generating initial U-Net latent vectors from a random normal distribution.
+    latents = torch.randn(
+        (1, pipeline.unet.config.in_channels, height // 8, width // 8),
+        generator=generator,
+    ).to(device)
+    embed_len = len(interpolated_prompt_embeds)
+    embed_pairs = zip(interpolated_prompt_embeds, interpolated_negative_prompts_embeds)
+    embed_pairs_list = list(embed_pairs)
+    if return_images:
+        class_images = dict()
+    class_ssim = dict()
+    if nearest_half or furthest_half:
+        if nearest_half:
+            steps_range = (range(0, embed_len // 2), range(embed_len // 2, embed_len))
+            mutiplier = 2
+        elif furthest_half:
+            # uses opposite class of images of the text interpolation
+            steps_range = (range(embed_len // 2, embed_len), range(0, embed_len // 2))
+            mutiplier = 2
+    else:
+        steps_range = (range(embed_len), range(embed_len))
+        mutiplier = 1
+    for class_iter, class_id in enumerate(class_pairs):
+        if return_images:
+            class_images[class_id] = list()
+        class_ssim[class_id] = {
+            i: {"ssim_sum": 0, "ssim_count": 0, "ssim_avg": 0} for i in range(embed_len)
+        }
+        subset_len = len(img_subsets[class_id])
+        # to efficiently randomize the steps to interpolate for each image in the class, group_map is used
+        # group_map: index is the image id, element is the group id
+        # steps_range[class_iter] determines the range of steps to interpolate for the class,
+        # so the first half of the steps are for the first class and so on. range(0,7) and range(8,15) for 16 steps
+        # then the rest is to multiply the steps to cover the whole subset + remainder
+        group_map = (
+            list(steps_range[class_iter]) * mutiplier * (subset_len // embed_len + 1)
+        )
+        rng.shuffle(
+            group_map
+        )  # shuffle the steps to interpolate for each image, position in the group_map is mapped to the image id
+        iter_indices = class_iterables[class_id].pop()
+        # generate images for each image in the class, randomly selecting an interpolated step
+        for image_id in iter_indices:
+            img, trg = img_subsets[class_id][image_id]
+            input_image = img.unsqueeze(0)
+            interpolate_step = group_map[image_id]
+            prompt_embeds, negative_prompt_embeds = embed_pairs_list[interpolate_step]
+            generated_image = genClassImg(
+                pipeline,
+                prompt_embeds,
+                negative_prompt_embeds,
+                input_image,
+                generator,
+                latents,
+                num_imgs=1,
+                height=height,
+                width=width,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+            )
+            pred_image = transforms.ToTensor()(generated_image).unsqueeze(0)
+            ssim_score = ssim(pred_image, input_image).item()
+            class_ssim[class_id][interpolate_step]["ssim_sum"] += ssim_score
+            class_ssim[class_id][interpolate_step]["ssim_count"] += 1
+            if return_images:
+                class_images[class_id].append(generated_image)
+            if save_image:
+                if image_type == "jpg":
+                    generated_image.save(
+                        f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}",
+                        format="JPEG",
+                        quality=95,
+                    )
+                elif image_type == "png":
+                    generated_image.save(
+                        f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}",
+                        format="PNG",
+                    )
+                else:
+                    generated_image.save(
+                        f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}"
+                    )
+        # calculate ssim avg for the class
+        for i_step in range(embed_len):
+            if class_ssim[class_id][i_step]["ssim_count"] > 0:
+                class_ssim[class_id][i_step]["ssim_avg"] = (
+                    class_ssim[class_id][i_step]["ssim_sum"]
+                    / class_ssim[class_id][i_step]["ssim_count"]
+                )
+    if return_images:
+        return class_images, class_ssim
+    else:
+        return class_ssim
+def generateTrace(
+    prompts,
+    img_subsets,
+    class_iterables,
+    interpolated_prompt_embeds,
+    interpolated_negative_prompts_embeds,
+    subset_indices,
+    seed=None,
+    save_path=".",
+    class_pairs=("0", "1"),
+    image_type="jpg",
+    interpolate_range="full",
+    save_prompt_embeds=False,
+):
+    """
+    Generate a trace dictionary containing information about the generated images.
+    Args:
+        prompts (list): List of prompt texts.
+        img_subsets (dict): Dictionary containing image subsets for each class.
+        class_iterables (dict): Dictionary containing iterable objects for each class.
+        interpolated_prompt_embeds (torch.Tensor): Tensor containing interpolated prompt embeddings.
+        interpolated_negative_prompts_embeds (torch.Tensor): Tensor containing interpolated negative prompt embeddings.
+        subset_indices (dict): Dictionary containing indices of subsets for each class.
+        seed (int, optional): Seed value for random number generation. Defaults to None.
+        save_path (str, optional): Path to save the generated images. Defaults to ".".
+        class_pairs (tuple, optional): Tuple containing class pairs. Defaults to ("0", "1").
+        image_type (str, optional): Type of the generated images. Defaults to "jpg".
+        interpolate_range (str, optional): Range of interpolation. Defaults to "full".
+        save_prompt_embeds (bool, optional): Flag to save prompt embeddings. Defaults to False.
+    Returns:
+        dict: Trace dictionary containing information about the generated images.
+    """
+    trace_dict = {
+        "class_pairs": list(),
+        "class_id": list(),
+        "image_id": list(),
+        "interpolation_step": list(),
+        "embed_len": list(),
+        "pos_prompt_text": list(),
+        "neg_prompt_text": list(),
+        "input_file_path": list(),
+        "output_file_path": list(),
+        "input_prompts_embed": list(),
+    }
+    if interpolate_range == "nearest":
+        nearest_half = True
+        furthest_half = False
+    elif interpolate_range == "furthest":
+        nearest_half = False
+        furthest_half = True
+    else:
+        nearest_half = False
+        furthest_half = False
+    if seed is None:
+        seed = torch.Generator().seed()
+    rng = np.random.default_rng(seed)
+    embed_len = len(interpolated_prompt_embeds)
+    embed_pairs = zip(
+        interpolated_prompt_embeds.cpu().numpy(),
+        interpolated_negative_prompts_embeds.cpu().numpy(),
+    )
+    embed_pairs_list = list(embed_pairs)
+    if nearest_half or furthest_half:
+        if nearest_half:
+            steps_range = (range(0, embed_len // 2), range(embed_len // 2, embed_len))
+            mutiplier = 2
+        elif furthest_half:
+            # uses opposite class of images of the text interpolation
+            steps_range = (range(embed_len // 2, embed_len), range(0, embed_len // 2))
+            mutiplier = 2
+    else:
+        steps_range = (range(embed_len), range(embed_len))
+        mutiplier = 1
+    for class_iter, class_id in enumerate(class_pairs):
+        subset_len = len(img_subsets[class_id])
+        # to efficiently randomize the steps to interpolate for each image in the class, group_map is used
+        # group_map: index is the image id, element is the group id
+        # steps_range[class_iter] determines the range of steps to interpolate for the class,
+        # so the first half of the steps are for the first class and so on. range(0,7) and range(8,15) for 16 steps
+        # then the rest is to multiply the steps to cover the whole subset + remainder
+        group_map = (
+            list(steps_range[class_iter]) * mutiplier * (subset_len // embed_len + 1)
+        )
+        rng.shuffle(
+            group_map
+        )  # shuffle the steps to interpolate for each image, position in the group_map is mapped to the image id
+        iter_indices = class_iterables[class_id].pop()
+        # generate images for each image in the class, randomly selecting an interpolated step
+        for image_id in iter_indices:
+            class_ds = img_subsets[class_id]
+            interpolate_step = group_map[image_id]
+            sample_count = subset_indices[class_id][0] + image_id
+            input_file = os.path.normpath(class_ds.dataset.samples[sample_count][0])
+            pos_prompt = prompts[0]
+            neg_prompt = prompts[1]
+            output_file = f"{save_path}/{class_id}/{seed}-{image_id}_{interpolate_step}.{image_type}"
+            if save_prompt_embeds:
+                input_prompts_embed = embed_pairs_list[interpolate_step]
+            else:
+                input_prompts_embed = None
+            trace_dict["class_pairs"].append(class_pairs)
+            trace_dict["class_id"].append(class_id)
+            trace_dict["image_id"].append(image_id)
+            trace_dict["interpolation_step"].append(interpolate_step)
+            trace_dict["embed_len"].append(embed_len)
+            trace_dict["pos_prompt_text"].append(pos_prompt)
+            trace_dict["neg_prompt_text"].append(neg_prompt)
+            trace_dict["input_file_path"].append(input_file)
+            trace_dict["output_file_path"].append(output_file)
+            trace_dict["input_prompts_embed"].append(input_prompts_embed)
+    return trace_dict