amd-shark
/

sdxl-quant-int8

Transformers

Inference Endpoints

Model card Files Files and versions Community

nickfraser commited on Jun 28

Commit

34b0078

•

1 Parent(s): 62e0e7b

Feat (script): Added option to validate on MLPerf validation set & to load a pre-quantized checkpoint.

Browse files

Files changed (1) hide show

quant_sdxl/quant_sdxl.py +62 -24

quant_sdxl/quant_sdxl.py CHANGED Viewed

@@ -32,11 +32,12 @@ from brevitas.graph.quantize import layerwise_quantize
 from brevitas.inject.enum import StatsOp
 from brevitas.nn.equalized_layer import EqualizedModule
 from brevitas.utils.torch_utils import KwargsForwardHook
 from brevitas_examples.common.parse_utils import add_bool_arg
 from brevitas_examples.stable_diffusion.sd_quant.export import export_quant_params
 from brevitas_examples.stable_diffusion.sd_quant.nn import QuantAttention
-import brevitas.config as config
 TEST_SEED = 123456
 torch.manual_seed(TEST_SEED)
@@ -125,6 +126,20 @@ def main(args):
             raise RuntimeError("LoRA layers should be fused in before calling into quantization.")
     pipe.set_progress_bar_config(disable=True)
     with activation_equalization_mode(
             pipe.unet,
             alpha=args.act_eq_alpha,
@@ -138,7 +153,7 @@ def main(args):
         total_steps = args.calibration_steps
         run_val_inference(
             pipe,
-            calibration_prompts,
             total_steps=total_steps,
             test_latents=latents,
             guidance_scale=args.guidance_scale)
@@ -186,26 +201,32 @@ def main(args):
     pipe.set_progress_bar_config(disable=True)
-    print("Applying activation calibration")
-    with torch.no_grad(), calibration_mode(pipe.unet):
-        run_val_inference(
-            pipe,
-            calibration_prompts,
-            total_steps=args.calibration_steps,
-            test_latents=latents,
-            guidance_scale=args.guidance_scale)
-    print("Applying bias correction")
-    with torch.no_grad(), bias_correction_mode(pipe.unet):
-        run_val_inference(
-            pipe,
-            calibration_prompts,
-            total_steps=args.calibration_steps,
-            test_latents=latents,
-            guidance_scale=args.guidance_scale)
-    if args.checkpoint_name is not None:
-        torch.save(pipe.unet.state_dict(), os.path.join(output_dir, args.checkpoint_name))
     if args.export_target:
         pipe.unet.to('cpu').to(dtype)
@@ -229,6 +250,18 @@ if __name__ == "__main__":
         type=int,
         default=500,
         help='Number of prompts to use for calibration. Default: %(default)s')
     parser.add_argument(
         '--checkpoint-name',
         type=str,
@@ -237,11 +270,16 @@ if __name__ == "__main__":
         'Name to use to store the checkpoint in the output dir. If not provided, no checkpoint is saved.'
     )
     parser.add_argument(
-        '--path-to-latents',
         type=str,
         default=None,
         help=
-        'Load pre-defined latents. If not provided, they are generated based on an internal seed.')
     parser.add_argument('--guidance-scale', type=float, default=8., help='Guidance scale.')
     parser.add_argument(
         '--calibration-steps', type=float, default=8, help='Steps used during calibration')

 from brevitas.inject.enum import StatsOp
 from brevitas.nn.equalized_layer import EqualizedModule
 from brevitas.utils.torch_utils import KwargsForwardHook
+import brevitas.config as config
 from brevitas_examples.common.parse_utils import add_bool_arg
 from brevitas_examples.stable_diffusion.sd_quant.export import export_quant_params
 from brevitas_examples.stable_diffusion.sd_quant.nn import QuantAttention
+from brevitas_examples.stable_diffusion.mlperf_evaluation.accuracy import compute_mlperf_fid
 TEST_SEED = 123456
 torch.manual_seed(TEST_SEED)
             raise RuntimeError("LoRA layers should be fused in before calling into quantization.")
     pipe.set_progress_bar_config(disable=True)
+    if args.load_checkpoint is not None:
+        with load_quant_model_mode(pipe.unet):
+            pipe = pipe.to('cpu')
+            print(f"Loading checkpoint: {args.load_checkpoint}... ", end="")
+            pipe.unet.load_state_dict(torch.load(args.load_checkpoint, map_location='cpu'))
+            print(f"Checkpoint loaded!")
+        pipe = pipe.to(args.device)
+    if args.load_checkpoint is not None:
+        # Don't run full activation equalization if we're loading a quantized checkpoint
+        num_ae_prompts = 2
+    else:
+        num_ae_prompts = len(calibration_prompts)
     with activation_equalization_mode(
             pipe.unet,
             alpha=args.act_eq_alpha,
         total_steps = args.calibration_steps
         run_val_inference(
             pipe,
+            calibration_prompts[:num_ae_prompts],
             total_steps=total_steps,
             test_latents=latents,
             guidance_scale=args.guidance_scale)
     pipe.set_progress_bar_config(disable=True)
+    if args.load_checkpoint is None:
+        print("Applying activation calibration")
+        with torch.no_grad(), calibration_mode(pipe.unet):
+            run_val_inference(
+                pipe,
+                calibration_prompts,
+                total_steps=args.calibration_steps,
+                test_latents=latents,
+                guidance_scale=args.guidance_scale)
+        print("Applying bias correction")
+        with torch.no_grad(), bias_correction_mode(pipe.unet):
+            run_val_inference(
+                pipe,
+                calibration_prompts,
+                total_steps=args.calibration_steps,
+                test_latents=latents,
+                guidance_scale=args.guidance_scale)
+        if args.checkpoint_name is not None:
+            torch.save(pipe.unet.state_dict(), os.path.join(output_dir, args.checkpoint_name))
+    # Perform inference
+    if args.validation_prompts > 0:
+        print(f"Computing validation accuracy")
+        compute_mlperf_fid(args.model, args.path_to_coco, pipe, args.validation_prompts, output_dir)
     if args.export_target:
         pipe.unet.to('cpu').to(dtype)
         type=int,
         default=500,
         help='Number of prompts to use for calibration. Default: %(default)s')
+    parser.add_argument(
+        '--validation-prompts',
+        type=int,
+        default=0,
+        help='Number of prompt to use for validation. Default: %(default)s')
+    parser.add_argument(
+        '--path-to-coco',
+        type=str,
+        default=None,
+        help=
+        'Path to MLPerf compliant Coco dataset. Required when the --validation-prompts > 0 flag is set. Default: None'
+    )
     parser.add_argument(
         '--checkpoint-name',
         type=str,
         'Name to use to store the checkpoint in the output dir. If not provided, no checkpoint is saved.'
     )
     parser.add_argument(
+        '--load-checkpoint',
         type=str,
         default=None,
+        help='Path to checkpoint to load. If provided, PTQ techniques are skipped.')
+    parser.add_argument(
+        '--path-to-latents',
+        type=str,
+        required=True,
         help=
+        'Path to pre-defined latents.')
     parser.add_argument('--guidance-scale', type=float, default=8., help='Guidance scale.')
     parser.add_argument(
         '--calibration-steps', type=float, default=8, help='Steps used during calibration')