Spaces:

sayakpaul
/

evaluate-sd-schedulers

Sleeping

App Files Files Community

sayakpaul HF staff commited on Feb 8, 2023

Commit

873e677

•

1 Parent(s): 81381bd

redo the space to spice things up.

Browse files

Files changed (4) hide show

app.py +139 -143
image_utils.py +26 -0
metrics_utils.py +48 -0
report_utils.py +47 -0

app.py CHANGED Viewed

@@ -1,93 +1,41 @@
 import importlib
-from functools import partial
 from typing import List
 import gradio as gr
 import numpy as np
 import torch
 from diffusers import StableDiffusionPipeline
-from PIL import Image
-from torchmetrics.functional.multimodal import clip_score
-from torchmetrics.image.inception import InceptionScore
 SEED = 0
 WEIGHT_DTYPE = torch.float16
 TITLE = "Evaluate Schedulers with StableDiffusionPipeline 🧨"
-DESCRIPTION = """
 This Space allows you to quantitatively compare [different noise schedulers](https://huggingface.co/docs/diffusers/using-diffusers/schedulers) with a [`StableDiffusionPipeline`](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview).
 One of the applications of this Space could be to evaluate different schedulers for a certain Stable Diffusion checkpoint for a fixed number of inference steps.
-Here's how it works:
 * The evaluator first sets a seed and then generates the initial noise which is passed as the initial latent to start the image generation process. It is done to ensure fair comparison.
 * This initial latent is used every time the pipeline is run (with different schedulers).
 * To quantify the quality of the generated images we use:
     * [Inception Score](https://en.wikipedia.org/wiki/Inception_score)
     * [Clip Score](https://arxiv.org/abs/2104.08718)
-**Notes**:
 * The default scheduler associated with the provided checkpoint is always used for reporting the scores.
 * Increasing both the number of images per prompt and the number of inference steps could quickly build up the inference queue and thus
 resulting in slowdowns.
 """
-inception_score_fn = InceptionScore(normalize=True)
-torch.manual_seed(SEED)
-clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")
-def make_grid(images, rows, cols):
-    w, h = images[0].size
-    grid = Image.new("RGB", size=(cols * w, rows * h))
-    for i, image in enumerate(images):
-        grid.paste(image, box=(i % cols * w, i // cols * h))
-    return grid
-# Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_utils.py#L814
-def numpy_to_pil(images):
-    """
-    Convert a numpy image or a batch of images to a PIL image.
-    """
-    if images.ndim == 3:
-        images = images[None, ...]
-    images = (images * 255).round().astype("uint8")
-    if images.shape[-1] == 1:
-        # special case for grayscale (single channel) images
-        pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
-    else:
-        pil_images = [Image.fromarray(image) for image in images]
-    return pil_images
-def prepare_report(scheduler_name: str, results: dict):
-    image_grid = results["images"]
-    scores = results["scores"]
-    img_str = ""
-    image_name = f"{scheduler_name}_images.png"
-    image_grid.save(image_name)
-    img_str = img_str = f"![img_grid_{scheduler_name}](/file=./{image_name})\n"
-    report_str = f"""
-\n\n## {scheduler_name}
-### Sample images
-{img_str}
-### Scores
-{scores}
-\n\n
-    """
-    return report_str
 def initialize_pipeline(checkpoint: str):
@@ -115,63 +63,51 @@ def get_latents(num_images_per_prompt: int, seed=SEED):
     return latents
-def compute_metrics(images: np.ndarray, prompts: List[str]):
-    inception_score_fn.update(torch.from_numpy(images).permute(0, 3, 1, 2))
-    inception_score = inception_score_fn.compute()
-    images_int = (images * 255).astype("uint8")
-    clip_score = clip_score_fn(
-        torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts
-    ).detach()
-    return {
-        "inception_score (⬆️)": {
-            "mean": round(float(inception_score[0]), 4),
-            "std": round(float(inception_score[1]), 4),
-        },
-        "clip_score (⬆️)": round(float(clip_score), 4),
-    }
 def run(
     prompt: str,
     num_images_per_prompt: int,
     num_inference_steps: int,
     checkpoint: str,
-    schedulers_to_test: List[str],
 ):
     all_images = {}
     sd_pipeline, original_scheduler_config = initialize_pipeline(checkpoint)
     latents = get_latents(num_images_per_prompt)
     prompts = [prompt] * num_images_per_prompt
-    images = sd_pipeline(
-        prompts,
-        latents=latents,
-        num_inference_steps=num_inference_steps,
-        output_type="numpy",
-    ).images
     original_scheduler_name = original_scheduler_config._class_name
-    all_images.update(
-        {
-            original_scheduler_name: {
-                "images": make_grid(numpy_to_pil(images), 1, num_images_per_prompt),
-                "scores": compute_metrics(images, prompts),
-            }
-        }
-    )
-    # print("First scheduler complete.")
-    for scheduler_name in schedulers_to_test:
-        if scheduler_name == original_scheduler_name:
-            continue
-        scheduler_cls = get_scheduler(scheduler_name)
-        current_scheduler = scheduler_cls.from_config(original_scheduler_config)
-        sd_pipeline.scheduler = current_scheduler
         cur_scheduler_images = sd_pipeline(
-            prompts, num_inference_steps=num_inference_steps, output_type="numpy"
         ).images
         all_images.update(
             {
@@ -179,51 +115,111 @@ def run(
                     "images": make_grid(
                         numpy_to_pil(cur_scheduler_images), 1, num_images_per_prompt
                     ),
-                    "scores": compute_metrics(cur_scheduler_images, prompts),
                 }
             }
         )
-        # print(f"{scheduler_name} complete.")
     output_str = ""
     for scheduler_name in all_images:
-        # print(f"scheduler_name: {scheduler_name}")
         output_str += prepare_report(scheduler_name, all_images[scheduler_name])
-    # print(output_str)
     return output_str
-demo = gr.Interface(
-    run,
-    inputs=[
-        gr.Text(max_lines=1, placeholder="a painting of a dog"),
-        gr.Slider(3, 10, value=3, step=1),
-        gr.Slider(10, 100, value=50, step=1),
-        gr.Dropdown(
-            [
-                "CompVis/stable-diffusion-v1-4",
-                "runwayml/stable-diffusion-v1-5",
-                "stabilityai/stable-diffusion-2-base",
-            ],
-            value="CompVis/stable-diffusion-v1-4",
-            multiselect=False,
-            interactive=True,
-        ),
-        gr.Dropdown(
-            [
-                "EulerDiscreteScheduler",
-                "PNDMScheduler",
-                "LMSDiscreteScheduler",
-                "DPMSolverMultistepScheduler",
-                "DDIMScheduler",
             ],
-            value=["LMSDiscreteScheduler"],
-            multiselect=True,
-        ),
-    ],
-    outputs=[gr.Markdown().style()],
-    title=TITLE,
-    description=DESCRIPTION,
-    allow_flagging=False,
-)
-demo.launch()

 import importlib
 from typing import List
 import gradio as gr
 import numpy as np
 import torch
 from diffusers import StableDiffusionPipeline
+from torchmetrics import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure
+from image_utils import make_grid, numpy_to_pil
+from metrics_utils import compute_main_metrics, compute_psnr_or_ssim
+from report_utils import add_psnr_ssim_to_report, prepare_report
 SEED = 0
 WEIGHT_DTYPE = torch.float16
 TITLE = "Evaluate Schedulers with StableDiffusionPipeline 🧨"
+ABSTRACT = """
 This Space allows you to quantitatively compare [different noise schedulers](https://huggingface.co/docs/diffusers/using-diffusers/schedulers) with a [`StableDiffusionPipeline`](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/overview).
 One of the applications of this Space could be to evaluate different schedulers for a certain Stable Diffusion checkpoint for a fixed number of inference steps.
+"""
+DESCRIPTION = """
+#### Hoes does it work?
 * The evaluator first sets a seed and then generates the initial noise which is passed as the initial latent to start the image generation process. It is done to ensure fair comparison.
 * This initial latent is used every time the pipeline is run (with different schedulers).
 * To quantify the quality of the generated images we use:
     * [Inception Score](https://en.wikipedia.org/wiki/Inception_score)
     * [Clip Score](https://arxiv.org/abs/2104.08718)
+#### Notes
+* When selecting a model checkpoint, if you select "Other" you will have the option to provide a custom Stable Diffusion checkpoint.
 * The default scheduler associated with the provided checkpoint is always used for reporting the scores.
 * Increasing both the number of images per prompt and the number of inference steps could quickly build up the inference queue and thus
 resulting in slowdowns.
 """
+psnr_fn = PeakSignalNoiseRatio()
+ssim_fn = StructuralSimilarityIndexMeasure()
 def initialize_pipeline(checkpoint: str):
     return latents
 def run(
     prompt: str,
     num_images_per_prompt: int,
     num_inference_steps: int,
     checkpoint: str,
+    other_finedtuned_checkpoints: str = None,
+    schedulers_to_test: List[str] = None,
+    ssim: bool = False,
+    psnr: bool = False,
+    progress=gr.Progress(),
 ):
+    progress(0, desc="Starting...")
+    if checkpoint == "Other" and other_finedtuned_checkpoints == "":
+        return "❌ No legit checkpoint provided ❌"
+    elif checkpoint == "Other":
+        checkpoint = other_finedtuned_checkpoints
     all_images = {}
+    scheduler_images = {}
+    # Set up the pipeline
     sd_pipeline, original_scheduler_config = initialize_pipeline(checkpoint)
+    sd_pipeline.set_progress_bar_config(disable=True)
+    # Prepare latents to start generation and the prompts.
     latents = get_latents(num_images_per_prompt)
     prompts = [prompt] * num_images_per_prompt
     original_scheduler_name = original_scheduler_config._class_name
+    schedulers_to_test.append(original_scheduler_name)
+    # Start generating the images and computing their scores.
+    for scheduler_name in progress.tqdm(schedulers_to_test):
+        if scheduler_name != original_scheduler_name:
+            scheduler_cls = get_scheduler(scheduler_name)
+            current_scheduler = scheduler_cls.from_config(original_scheduler_config)
+            sd_pipeline.scheduler = current_scheduler
         cur_scheduler_images = sd_pipeline(
+            prompts,
+            latents=latents,
+            num_inference_steps=num_inference_steps,
+            output_type="numpy",
         ).images
         all_images.update(
             {
                     "images": make_grid(
                         numpy_to_pil(cur_scheduler_images), 1, num_images_per_prompt
                     ),
+                    "scores": compute_main_metrics(cur_scheduler_images, prompts),
                 }
             }
         )
+        scheduler_images.update({scheduler_name: cur_scheduler_images})
+        torch.cuda.empty_cache()
+    # Prepare output report.
     output_str = ""
     for scheduler_name in all_images:
         output_str += prepare_report(scheduler_name, all_images[scheduler_name])
+    # Append PSNR or SSIM if needed.
+    if len(schedulers_to_test) > 1:
+        ssim_scores = psnr_scores = None
+        if ssim:
+            ssim_scores = compute_psnr_or_ssim(
+                ssim_fn, scheduler_images, original_scheduler_name
+            )
+        if psnr:
+            psnr_scores = compute_psnr_or_ssim(
+                psnr_fn, scheduler_images, original_scheduler_name
+            )
+    if len(schedulers_to_test) > 1:
+        ssim_psnr_str = add_psnr_ssim_to_report(
+            original_scheduler_name, ssim_scores, psnr_scores
+        )
+        if ssim_psnr_str != "":
+            output_str += ssim_psnr_str
     return output_str
+with gr.Blocks(title="Scheduler Evaluation") as demo:
+    gr.Markdown(f"## {TITLE}\n\n\n\n{ABSTRACT}")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Text(
+                max_lines=1, placeholder="a painting of a dog", label="prompt"
+            )
+            num_images_per_prompt = gr.Slider(
+                3, 10, value=3, step=1, label="num_images_per_prompt"
+            )
+            num_inference_steps = gr.Slider(
+                10, 100, value=50, step=1, label="num_inference_steps"
+            )
+            model_ckpt = gr.Dropdown(
+                [
+                    "CompVis/stable-diffusion-v1-4",
+                    "runwayml/stable-diffusion-v1-5",
+                    "stabilityai/stable-diffusion-2-base",
+                    "Other",
+                ],
+                value="CompVis/stable-diffusion-v1-4",
+                multiselect=False,
+                interactive=True,
+                label="model_ckpt",
+            )
+            other_finedtuned_checkpoints = gr.Textbox(
+                visible=False,
+                interactive=True,
+                placeholder="valhalla/sd-pokemon-model",
+                label="custom_checkpoint",
+            )
+            model_ckpt.change(
+                lambda x: gr.Dropdown.update(visible=x == "Other"),
+                model_ckpt,
+                other_finedtuned_checkpoints,
+            )
+            schedulers_to_test = gr.Dropdown(
+                [
+                    "EulerDiscreteScheduler",
+                    "PNDMScheduler",
+                    "LMSDiscreteScheduler",
+                    "DPMSolverMultistepScheduler",
+                    "DDIMScheduler",
+                ],
+                value=["LMSDiscreteScheduler"],
+                multiselect=True,
+                label="schedulers_to_test",
+            )
+            ssim = gr.Checkbox(label="Compute SSIM")
+            psnr = gr.Checkbox(label="Compute PSNR")
+            evaluation_button = gr.Button(value="Submit")
+        with gr.Column():
+            report = gr.Markdown(label="Evaluation Report").style()
+        evaluation_button.click(
+            run,
+            inputs=[
+                prompt,
+                num_images_per_prompt,
+                num_inference_steps,
+                model_ckpt,
+                other_finedtuned_checkpoints,
+                schedulers_to_test,
+                ssim,
+                psnr,
             ],
+            outputs=report,
+        )
+    gr.Markdown(f"{DESCRIPTION}")
+demo.queue().launch(debug=True)

image_utils.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from PIL import Image
+def make_grid(images, rows, cols):
+    w, h = images[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h))
+    for i, image in enumerate(images):
+        grid.paste(image, box=(i % cols * w, i // cols * h))
+    return grid
+# Copied from https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_utils.py#L814
+def numpy_to_pil(images):
+    """
+    Convert a numpy image or a batch of images to a PIL image.
+    """
+    if images.ndim == 3:
+        images = images[None, ...]
+    images = (images * 255).round().astype("uint8")
+    if images.shape[-1] == 1:
+        # special case for grayscale (single channel) images
+        pil_images = [Image.fromarray(image.squeeze(), mode="L") for image in images]
+    else:
+        pil_images = [Image.fromarray(image) for image in images]
+    return pil_images

metrics_utils.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from functools import partial
+from typing import Callable, Dict, List
+import numpy as np
+import torch
+from torchmetrics.functional.multimodal import clip_score
+from torchmetrics.image.inception import InceptionScore
+SEED = 0
+inception_score_fn = InceptionScore(normalize=True)
+torch.manual_seed(SEED)
+clip_score_fn = partial(clip_score, model_name_or_path="openai/clip-vit-base-patch16")
+def compute_main_metrics(images: np.ndarray, prompts: List[str]) -> Dict:
+    inception_score_fn.update(torch.from_numpy(images).permute(0, 3, 1, 2))
+    inception_score = inception_score_fn.compute()
+    images_int = (images * 255).astype("uint8")
+    clip_score = clip_score_fn(
+        torch.from_numpy(images_int).permute(0, 3, 1, 2), prompts
+    ).detach()
+    return {
+        "inception_score (⬆️)": {
+            "mean": round(float(inception_score[0]), 4),
+            "std": round(float(inception_score[1]), 4),
+        },
+        "clip_score (⬆️)": round(float(clip_score), 4),
+    }
+def compute_psnr_or_ssim(
+    fn: Callable, images_dict: Dict, original_scheduler_name: str
+) -> Dict:
+    result_dict = {}
+    original_scheduler_images = images_dict[original_scheduler_name]
+    original_scheduler_images = torch.from_numpy(original_scheduler_images).permute(
+        0, 3, 1, 2
+    )
+    for k in images_dict:
+        if k != original_scheduler_name:
+            current_scheduler_images = torch.from_numpy(images_dict[k]).permute(
+                0, 3, 1, 2
+            )
+            current_value = fn(current_scheduler_images, original_scheduler_images)
+            result_dict.update({k: round(float(current_value), 4)})
+    return result_dict

report_utils.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import json
+def prepare_report(scheduler_name: str, results: dict):
+    image_grid = results["images"]
+    scores = results["scores"]
+    img_str = ""
+    image_name = f"{scheduler_name}_images.png"
+    image_grid.save(image_name)
+    img_str = img_str = f"![img_grid_{scheduler_name}](/file=./{image_name})\n"
+    report_str = f"""
+\n\n## {scheduler_name}
+### Sample images
+{img_str}
+### Scores
+{scores}
+\n\n
+    """
+    return report_str
+def add_psnr_ssim_to_report(
+    original_scheduler_name: str, ssim_scores: Dict = None, psnr_scores: Dict = None
+) -> str:
+    current_str = ""
+    if ssim_scores is not None:
+        current_str += f"""
+\n\n
+## SSIM
+SSIM computed w.r.t the images generated with {original_scheduler_name}:\n\n {json.dumps(ssim_scores, indent=6)}
+"""
+    if psnr_scores is not None:
+        current_str += f"""
+\n\n
+## PSNR
+PSNR computed w.r.t the images generated with {original_scheduler_name}:\n\n {json.dumps(psnr_scores, indent=6)}
+"""
+    return current_str