HD-Painter

Runtime error

App Files Files Community

Andranik Sargsyan commited on Dec 24, 2023

Commit

da1e12f

•

1 Parent(s): bfd34e9

enable fp16, move SR to cuda:1

Browse files

Files changed (12) hide show

app.py +2 -2
lib/methods/rasg.py +9 -6
lib/methods/sd.py +8 -6
lib/methods/sr.py +11 -9
lib/models/ds_inp.py +7 -2
lib/models/sam.py +3 -4
lib/models/sd15_inp.py +6 -1
lib/models/sd2_inp.py +8 -1
lib/models/sd2_sr.py +11 -8
lib/smplfusion/ddim.py +5 -3
lib/smplfusion/models/unet.py +8 -1
lib/utils/iimage.py +4 -0

app.py CHANGED Viewed

@@ -64,8 +64,8 @@ inpainting_models = OrderedDict([
     ("Stable-Inpainting 2.0", models.sd2_inp.load_model()),
     ("Stable-Inpainting 1.5", models.sd15_inp.load_model())
 ])
-sr_model = models.sd2_sr.load_model()
-sam_predictor = models.sam.load_model()
 inp_model = None
 cached_inp_model_name  = ''

     ("Stable-Inpainting 2.0", models.sd2_inp.load_model()),
     ("Stable-Inpainting 1.5", models.sd15_inp.load_model())
 ])
+sr_model = models.sd2_sr.load_model(device='cuda:1')
+sam_predictor = models.sam.load_model(device='cuda:0')
 inp_model = None
 cached_inp_model_name  = ''

lib/methods/rasg.py CHANGED Viewed

@@ -38,9 +38,11 @@ def run(ddim, method, prompt, image, mask, seed, eta, prefix, negative_prompt, p
     unet_condition = ddim.get_inpainting_condition(image, mask)
     share.set_mask(mask)
     # Starting latent
     seed_everything(seed)
-    zt = torch.randn((1,4) + unet_condition.shape[2:]).cuda()
     # Setup unet for guidance
     ddim.unet.requires_grad_(True)
@@ -58,11 +60,12 @@ def run(ddim, method, prompt, image, mask, seed, eta, prefix, negative_prompt, p
         # Run the model
         _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
-        eps_uncond, eps = ddim.unet(
-            torch.cat([_zt, _zt]),
-            timesteps = torch.tensor([timestep, timestep]).cuda(),
-            context = context
-        ).detach().chunk(2)
         # Unconditional guidance
         eps = (eps_uncond + guidance_scale * (eps - eps_uncond))

     unet_condition = ddim.get_inpainting_condition(image, mask)
     share.set_mask(mask)
+    dtype = unet_condition.dtype
     # Starting latent
     seed_everything(seed)
+    zt = torch.randn((1,4) + unet_condition.shape[2:]).cuda().to(dtype)
     # Setup unet for guidance
     ddim.unet.requires_grad_(True)
         # Run the model
         _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
+        with torch.autocast('cuda'):
+            eps_uncond, eps = ddim.unet(
+                torch.cat([_zt, _zt]).to(dtype),
+                timesteps = torch.tensor([timestep, timestep]).cuda(),
+                context = context
+            ).detach().chunk(2)
         # Unconditional guidance
         eps = (eps_uncond + guidance_scale * (eps - eps_uncond))

lib/methods/sd.py CHANGED Viewed

@@ -43,11 +43,12 @@ def run(
     # Image condition
     unet_condition = ddim.get_inpainting_condition(image, mask)
     share.set_mask(mask)
     # Starting latent
     seed_everything(seed)
-    zt = torch.randn((1,4) + unet_condition.shape[2:]).cuda()
     # Turn off gradients
     ddim.unet.requires_grad_(False)
@@ -58,11 +59,12 @@ def run(
         if share.timestep <= 500: router.reset()
         _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
-        eps_uncond, eps = ddim.unet(
-            torch.cat([_zt, _zt]),
-            timesteps = torch.tensor([timestep, timestep]).cuda(),
-            context = context
-        ).chunk(2)
         eps = (eps_uncond + guidance_scale * (eps - eps_uncond))
         z0 = (zt - share.schedule.sqrt_one_minus_alphas[timestep] * eps) / share.schedule.sqrt_alphas[timestep]

     # Image condition
     unet_condition = ddim.get_inpainting_condition(image, mask)
+    dtype = unet_condition.dtype
     share.set_mask(mask)
     # Starting latent
     seed_everything(seed)
+    zt = torch.randn((1,4) + unet_condition.shape[2:]).cuda().to(dtype)
     # Turn off gradients
     ddim.unet.requires_grad_(False)
         if share.timestep <= 500: router.reset()
         _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
+        with torch.autocast('cuda'):
+            eps_uncond, eps = ddim.unet(
+                torch.cat([_zt, _zt]).to(dtype),
+                timesteps = torch.tensor([timestep, timestep]).cuda(),
+                context = context
+            ).chunk(2)
         eps = (eps_uncond + guidance_scale * (eps - eps_uncond))
         z0 = (zt - share.schedule.sqrt_one_minus_alphas[timestep] * eps) / share.schedule.sqrt_alphas[timestep]

lib/methods/sr.py CHANGED Viewed

@@ -59,8 +59,10 @@ def refine_mask(hr_image, hr_mask, lr_image, sam_predictor):
 def run(ddim, sam_predictor, lr_image, hr_image, hr_mask, prompt = 'high resolution professional photo', noise_level=20,
 blend_output = True, blend_trick = True, no_superres = False,
-dt = 20, seed = 1, guidance_scale = 7.5, negative_prompt = '', use_sam_mask = False, dtype=torch.bfloat16):
     torch.manual_seed(seed)
     router.attention_forward = attentionpatch.default.forward_xformers
     router.basic_transformer_forward = transformerpatch.default.forward
@@ -74,7 +76,7 @@ dt = 20, seed = 1, guidance_scale = 7.5, negative_prompt = '', use_sam_mask = Fa
     hr_mask = hr_mask.padx(256, padding_mode='reflect').dilate(19)
     hr_mask_orig = hr_mask
     lr_image = lr_image.padx(64, padding_mode='reflect')
-    lr_mask = hr_mask.resize((lr_image.torch().shape[2], lr_image.torch().shape[3]), resample = Image.BICUBIC).alpha().torch(vmin=0).cuda()
     lr_mask = TvF.gaussian_blur(lr_mask, kernel_size=19)
     if no_superres:
@@ -89,18 +91,18 @@ dt = 20, seed = 1, guidance_scale = 7.5, negative_prompt = '', use_sam_mask = Fa
     # encode hr image
     with torch.no_grad():
-        hr_z0 = ddim.vae.encode(hr_image.torch().cuda().to(dtype)).mean * ddim.config.scale_factor
     assert hr_z0.shape[2] == lr_image.torch().shape[2]
     assert hr_z0.shape[3] == lr_image.torch().shape[3]
-    unet_condition = lr_image.cuda().torch().to(memory_format=torch.contiguous_format).to(dtype)
-    zT = torch.randn((1,4,unet_condition.shape[2], unet_condition.shape[3])).cuda().to(dtype)
     with torch.no_grad():
         context = ddim.encoder.encode([negative_prompt, prompt])
-    noise_level = torch.Tensor(1 * [noise_level]).to('cuda').long()
     unet_condition, noise_level = ddim.low_scale_model(unet_condition, noise_level=noise_level)
     with torch.autocast('cuda'), torch.no_grad():
@@ -110,13 +112,13 @@ dt = 20, seed = 1, guidance_scale = 7.5, negative_prompt = '', use_sam_mask = Fa
             _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
             eps_uncond, eps = ddim.unet(
-                torch.cat([_zt, _zt]).to(dtype),
-                timesteps = torch.tensor([t, t]).cuda(),
                 context = context,
                 y=torch.cat([noise_level]*2)
             ).chunk(2)
-            ts = torch.full((zt.shape[0],), t, device='cuda', dtype=torch.long)
             model_output = (eps_uncond + guidance_scale * (eps - eps_uncond))
             eps = predict_eps_from_z_and_v(ddim.schedule, zt, ts, model_output).to(dtype)
             z0 = predict_start_from_z_and_v(ddim.schedule, zt, ts, model_output).to(dtype)

 def run(ddim, sam_predictor, lr_image, hr_image, hr_mask, prompt = 'high resolution professional photo', noise_level=20,
 blend_output = True, blend_trick = True, no_superres = False,
+dt = 20, seed = 1, guidance_scale = 7.5, negative_prompt = '', use_sam_mask = False):
     torch.manual_seed(seed)
+    dtype = ddim.vae.encoder.conv_in.weight.dtype
+    device = ddim.vae.encoder.conv_in.weight.device
     router.attention_forward = attentionpatch.default.forward_xformers
     router.basic_transformer_forward = transformerpatch.default.forward
     hr_mask = hr_mask.padx(256, padding_mode='reflect').dilate(19)
     hr_mask_orig = hr_mask
     lr_image = lr_image.padx(64, padding_mode='reflect')
+    lr_mask = hr_mask.resize((lr_image.torch().shape[2], lr_image.torch().shape[3]), resample = Image.BICUBIC).alpha().torch(vmin=0).to(device)
     lr_mask = TvF.gaussian_blur(lr_mask, kernel_size=19)
     if no_superres:
     # encode hr image
     with torch.no_grad():
+        hr_z0 = ddim.vae.encode(hr_image.torch().cuda().to(dtype=dtype, device=device)).mean * ddim.config.scale_factor
     assert hr_z0.shape[2] == lr_image.torch().shape[2]
     assert hr_z0.shape[3] == lr_image.torch().shape[3]
+    unet_condition = lr_image.cuda().torch().to(memory_format=torch.contiguous_format).to(dtype=dtype, device=device)
+    zT = torch.randn((1,4,unet_condition.shape[2], unet_condition.shape[3])).cuda().to(dtype=dtype, device=device)
     with torch.no_grad():
         context = ddim.encoder.encode([negative_prompt, prompt])
+    noise_level = torch.Tensor(1 * [noise_level]).to(device=device).long()
     unet_condition, noise_level = ddim.low_scale_model(unet_condition, noise_level=noise_level)
     with torch.autocast('cuda'), torch.no_grad():
             _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
             eps_uncond, eps = ddim.unet(
+                torch.cat([_zt, _zt]).to(dtype=dtype, device=device),
+                timesteps = torch.tensor([t, t]).to(device=device),
                 context = context,
                 y=torch.cat([noise_level]*2)
             ).chunk(2)
+            ts = torch.full((zt.shape[0],), t, device=device, dtype=torch.long)
             model_output = (eps_uncond + guidance_scale * (eps - eps_uncond))
             eps = predict_eps_from_z_and_v(ddim.schedule, zt, ts, model_output).to(dtype)
             z0 = predict_start_from_z_and_v(ddim.schedule, zt, ts, model_output).to(dtype)

lib/models/ds_inp.py CHANGED Viewed

@@ -15,7 +15,7 @@ DOWNLOAD_URL = 'https://civitai.com/api/download/models/131004'
 download_file(DOWNLOAD_URL, MODEL_PATH)
-def load_model():
     print ("Loading model: Dreamshaper Inpainting V8")
     download_file(DOWNLOAD_URL, MODEL_PATH)
@@ -36,10 +36,15 @@ def load_model():
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)
     ddim = DDIM(config, vae, encoder, unet)
     share.schedule = scheduler.linear(config.timesteps, config.linear_start, config.linear_end)

 download_file(DOWNLOAD_URL, MODEL_PATH)
+def load_model(dtype=torch.float16):
     print ("Loading model: Dreamshaper Inpainting V8")
     download_file(DOWNLOAD_URL, MODEL_PATH)
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
+    if dtype == torch.float16:
+        unet.convert_to_fp16()
+    vae.to(dtype)
+    encoder.to(dtype)
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)
     ddim = DDIM(config, vae, encoder, unet)
     share.schedule = scheduler.linear(config.timesteps, config.linear_start, config.linear_end)

lib/models/sam.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from segment_anything import sam_model_registry, SamPredictor
 from .common import *
@@ -8,12 +9,10 @@ DOWNLOAD_URL = 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939
 download_file(DOWNLOAD_URL, MODEL_PATH)
-def load_model():
     print ("Loading model: SAM")
     download_file(DOWNLOAD_URL, MODEL_PATH)
-    model_type = "vit_h"
-    device = "cuda"
-    sam = sam_model_registry[model_type](checkpoint=MODEL_PATH)
     sam.to(device=device)
     sam_predictor = SamPredictor(sam)
     print ("SAM loaded")

+import torch
 from segment_anything import sam_model_registry, SamPredictor
 from .common import *
 download_file(DOWNLOAD_URL, MODEL_PATH)
+def load_model(device='cuda:0'):
     print ("Loading model: SAM")
     download_file(DOWNLOAD_URL, MODEL_PATH)
+    sam = sam_model_registry["vit_h"](checkpoint=MODEL_PATH)
     sam.to(device=device)
     sam_predictor = SamPredictor(sam)
     print ("SAM loaded")

lib/models/sd15_inp.py CHANGED Viewed

@@ -12,7 +12,7 @@ MODEL_PATH = f'{MODEL_FOLDER}/sd-1-5-inpainting/sd-v1-5-inpainting.ckpt'
 download_file(DOWNLOAD_URL, MODEL_PATH)
-def load_model():
     download_file(DOWNLOAD_URL, MODEL_PATH)
     state_dict = torch.load(MODEL_PATH)['state_dict']
@@ -34,6 +34,11 @@ def load_model():
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)

 download_file(DOWNLOAD_URL, MODEL_PATH)
+def load_model(dtype=torch.float16):
     download_file(DOWNLOAD_URL, MODEL_PATH)
     state_dict = torch.load(MODEL_PATH)['state_dict']
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
+    if dtype == torch.float16:
+        unet.convert_to_fp16()
+    vae.to(dtype)
+    encoder.to(dtype)
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)

lib/models/sd2_inp.py CHANGED Viewed

@@ -13,7 +13,7 @@ DOWNLOAD_URL = 'https://huggingface.co/stabilityai/stable-diffusion-2-inpainting
 download_file(DOWNLOAD_URL, MODEL_PATH)
-def load_model():
     print ("Loading model: Stable-Inpainting 2.0")
     download_file(DOWNLOAD_URL, MODEL_PATH)
@@ -36,6 +36,13 @@ def load_model():
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)

 download_file(DOWNLOAD_URL, MODEL_PATH)
+def load_model(dtype=torch.float16, device='cuda:0'):
     print ("Loading model: Stable-Inpainting 2.0")
     download_file(DOWNLOAD_URL, MODEL_PATH)
     encoder.load_state_dict(encoder_state)
     vae.load_state_dict(vae_state)
+    if dtype == torch.float16:
+        unet.convert_to_fp16()
+    unet.to(device=device)
+    vae.to(dtype=dtype, device=device)
+    encoder.to(dtype=dtype, device=device)
+    encoder.device = device
     unet = unet.requires_grad_(False)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)

lib/models/sd2_sr.py CHANGED Viewed

@@ -39,15 +39,15 @@ def extract_into_tensor(a, t, x_shape):
 def predict_eps_from_z_and_v(schedule, x_t, t, v):
     return (
-        extract_into_tensor(schedule.sqrt_alphas.cuda(), t, x_t.shape) * v +
-        extract_into_tensor(schedule.sqrt_one_minus_alphas.cuda(), t, x_t.shape) * x_t
     )
 def predict_start_from_z_and_v(schedule, x_t, t, v):
     return (
-        extract_into_tensor(schedule.sqrt_alphas.cuda(), t, x_t.shape) * x_t -
-        extract_into_tensor(schedule.sqrt_one_minus_alphas.cuda(), t, x_t.shape) * v
     )
@@ -153,7 +153,7 @@ def load_obj(path):
     return get_obj_from_str(objyaml['__class__'])(**objyaml.get("__init__", {}))
-def load_model(dtype=torch.bfloat16):
     print ("Loading model: SD2 superresolution...")
     download_file(DOWNLOAD_URL, MODEL_PATH)
@@ -180,9 +180,10 @@ def load_model(dtype=torch.bfloat16):
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)
-    unet.to(dtype)
-    vae.to(dtype)
-    encoder.to(dtype)
     ddim = DDIM(config, vae, encoder, unet)
@@ -199,6 +200,8 @@ def load_model(dtype=torch.bfloat16):
     for param in low_scale_model.parameters():
         param.requires_grad = False
     ddim.low_scale_model = low_scale_model
     print('SD2 superresolution loaded')
     return ddim

 def predict_eps_from_z_and_v(schedule, x_t, t, v):
     return (
+        extract_into_tensor(schedule.sqrt_alphas.to(x_t.device), t, x_t.shape) * v +
+        extract_into_tensor(schedule.sqrt_one_minus_alphas.to(x_t.device), t, x_t.shape) * x_t
     )
 def predict_start_from_z_and_v(schedule, x_t, t, v):
     return (
+        extract_into_tensor(schedule.sqrt_alphas.to(x_t.device), t, x_t.shape) * x_t -
+        extract_into_tensor(schedule.sqrt_one_minus_alphas.to(x_t.device), t, x_t.shape) * v
     )
     return get_obj_from_str(objyaml['__class__'])(**objyaml.get("__init__", {}))
+def load_model(dtype=torch.bfloat16, device='cuda:0'):
     print ("Loading model: SD2 superresolution...")
     download_file(DOWNLOAD_URL, MODEL_PATH)
     encoder = encoder.requires_grad_(False)
     vae = vae.requires_grad_(False)
+    unet.to(dtype=dtype, device=device)
+    vae.to(dtype=dtype, device=device)
+    encoder.to(dtype=dtype, device=device)
+    encoder.device = device
     ddim = DDIM(config, vae, encoder, unet)
     for param in low_scale_model.parameters():
         param.requires_grad = False
+    low_scale_model = low_scale_model.to(dtype=dtype, device=device)
     ddim.low_scale_model = low_scale_model
     print('SD2 superresolution loaded')
     return ddim

lib/smplfusion/ddim.py CHANGED Viewed

@@ -43,11 +43,13 @@ class DDIM:
     def get_inpainting_condition(self, image, mask):
         latent_size = [x//8 for x in image.size]
         with torch.no_grad():
-            condition_x0 = self.vae.encode(image.torch().cuda() * ~mask.torch(0).bool().cuda()).mean * self.config.scale_factor
-        condition_mask = mask.resize(latent_size[::-1]).cuda().torch(0).bool().float()
         condition_x0 += 0.01 * condition_mask * torch.randn_like(condition_mask)
         return torch.cat([condition_mask, condition_x0], 1)

     def get_inpainting_condition(self, image, mask):
         latent_size = [x//8 for x in image.size]
+        dtype = self.vae.encoder.conv_in.weight.dtype
         with torch.no_grad():
+            masked_image = image.torch().cuda() * ~mask.torch(0).bool().cuda()
+            masked_image = masked_image.to(dtype)
+            condition_x0 = self.vae.encode(masked_image).mean * self.config.scale_factor
+        condition_mask = mask.resize(latent_size[::-1]).cuda().torch(0).bool().to(dtype)
         condition_x0 += 0.01 * condition_mask * torch.randn_like(condition_mask)
         return torch.cat([condition_mask, condition_x0], 1)

lib/smplfusion/models/unet.py CHANGED Viewed

@@ -14,7 +14,14 @@ from ..modules.attention.spatial_transformer import SpatialTransformer
 # dummy replace
-def convert_module_to_f16(x): pass
 def convert_module_to_f32(x): pass

 # dummy replace
+def convert_module_to_f16(param):
+    """
+    Convert primitive modules to float16.
+    """
+    if isinstance(param, (nn.Linear, nn.Conv2d, nn.ConvTranspose2d)):
+        param.weight.data = param.weight.data.half()
+        if param.bias is not None:
+            param.bias.data = param.bias.data.half()
 def convert_module_to_f32(x): pass

lib/utils/iimage.py CHANGED Viewed

@@ -59,6 +59,10 @@ class IImage:
             data = self.data.transpose(0, 3, 1, 2) / 255.
         return vmin + torch.from_numpy(data).float().to(self.device) * (vmax - vmin)
     def cuda(self):
         self.device = 'cuda'
         return self

             data = self.data.transpose(0, 3, 1, 2) / 255.
         return vmin + torch.from_numpy(data).float().to(self.device) * (vmax - vmin)
+    def to(self, device):
+        self.device = device
+        return self
     def cuda(self):
         self.device = 'cuda'
         return self