Spaces:

Superlang
/

remove_background

Runtime error

App Files Files Community

remove_background / DIS /IsNetPipeLine.py

Superlang

init

4d0b7ae over 1 year ago

raw

history blame contribute delete

4.12 kB

	"""
	reference: https://github.com/xuebinqin/DIS
	"""

	import PIL.Image
	import numpy as np
	import torch
	import torch.nn.functional as F
	from PIL import Image
	from torch import nn
	from torch.autograd import Variable
	from torchvision import transforms
	from torchvision.transforms.functional import normalize

	from .models import ISNetDIS

	# Helpers
	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	class GOSNormalize(object):
	"""
	Normalize the Image using torch.transforms
	"""

	def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
	self.mean = mean
	self.std = std

	def __call__(self, image):
	image = normalize(image, self.mean, self.std)
	return image


	def im_preprocess(im, size):
	if len(im.shape) < 3:
	im = im[:, :, np.newaxis]
	if im.shape[2] == 1:
	im = np.repeat(im, 3, axis=2)
	im_tensor = torch.tensor(im.copy(), dtype=torch.float32)
	im_tensor = torch.transpose(torch.transpose(im_tensor, 1, 2), 0, 1)
	if len(size) < 2:
	return im_tensor, im.shape[0:2]
	else:
	im_tensor = torch.unsqueeze(im_tensor, 0)
	im_tensor = F.upsample(im_tensor, size, mode="bilinear")
	im_tensor = torch.squeeze(im_tensor, 0)

	return im_tensor.type(torch.uint8), im.shape[0:2]


	class IsNetPipeLine:
	def __init__(self, model_path=None, model_digit="full"):
	self.model_digit = model_digit
	self.model = ISNetDIS()
	self.cache_size = [1024, 1024]
	self.transform = transforms.Compose([
	GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])
	])

	# Build Model
	self.build_model(model_path)

	def load_image(self, image: PIL.Image.Image):
	im = np.array(image.convert("RGB"))
	im, im_shp = im_preprocess(im, self.cache_size)
	im = torch.divide(im, 255.0)
	shape = torch.from_numpy(np.array(im_shp))
	return self.transform(im).unsqueeze(0), shape.unsqueeze(0) # make a batch of image, shape

	def build_model(self, model_path=None):
	if model_path is not None:
	self.model.load_state_dict(torch.load(model_path, map_location=device))

	# convert to half precision
	if self.model_digit == "half":
	self.model.half()
	for layer in self.model.modules():
	if isinstance(layer, nn.BatchNorm2d):
	layer.float()
	self.model.to(device)
	self.model.eval()

	def __call__(self, image: PIL.Image.Image):
	image_tensor, orig_size = self.load_image(image)
	mask = self.predict(image_tensor, orig_size)

	pil_mask = Image.fromarray(mask).convert('L')
	im_rgb = image.convert("RGB")

	im_rgba = im_rgb.copy()
	im_rgba.putalpha(pil_mask)

	return [im_rgba, pil_mask]

	def predict(self, inputs_val: torch.Tensor, shapes_val):
	"""
	Given an Image, predict the mask
	"""

	if self.model_digit == "full":
	inputs_val = inputs_val.type(torch.FloatTensor)
	else:
	inputs_val = inputs_val.type(torch.HalfTensor)

	inputs_val_v = Variable(inputs_val, requires_grad=False).to(device) # wrap inputs in Variable

	ds_val = self.model(inputs_val_v)[0] # list of 6 results

	# B x 1 x H x W # we want the first one which is the most accurate prediction
	pred_val = ds_val[0][0, :, :, :]

	# recover the prediction spatial size to the orignal image size
	pred_val = torch.squeeze(
	F.upsample(torch.unsqueeze(pred_val, 0), (shapes_val[0][0], shapes_val[0][1]), mode='bilinear'))

	ma = torch.max(pred_val)
	mi = torch.min(pred_val)
	pred_val = (pred_val - mi) / (ma - mi) # max = 1

	if device == 'cuda':
	torch.cuda.empty_cache()
	return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8) # it is the mask we need


	# a = IsNetPipeLine(model_path="save_models/isnet.pth")
	# input_image = Image.open("image_0mx.png")
	# rgb, mask = a(input_image)
	#
	# rgb.save("rgb.png")
	# mask.save("mask.png")