Spaces:

SIGGRAPH2022
/

sketch2pose

Runtime error

App Files Files Community

sketch2pose / src /pose_estimation.py

kbrodt

Upload pose_estimation.py

5bd9834 over 2 years ago

raw

history blame contribute delete

7.07 kB

	import math

	import cv2
	import numpy as np

	IMG_SIZE = (288, 384)
	MEAN = np.array([0.485, 0.456, 0.406])
	STD = np.array([0.229, 0.224, 0.225])

	KPS = (
	"Head",
	"Neck",
	"Right Shoulder",
	"Right Arm",
	"Right Hand",
	"Left Shoulder",
	"Left Arm",
	"Left Hand",
	"Spine",
	"Hips",
	"Right Upper Leg",
	"Right Leg",
	"Right Foot",
	"Left Upper Leg",
	"Left Leg",
	"Left Foot",
	"Left Toe",
	"Right Toe",
	)

	SKELETON = (
	(0, 1),
	(1, 8),
	(8, 9),
	(9, 10),
	(9, 13),
	(10, 11),
	(11, 12),
	(13, 14),
	(14, 15),
	(1, 2),
	(2, 3),
	(3, 4),
	(1, 5),
	(5, 6),
	(6, 7),
	(15, 16),
	(12, 17),
	)


	OPENPOSE_TO_GESTURE = (
	0, # 0 Head\n",
	1, # Neck\n",
	2, # 2 Right Shoulder\n",
	3, # Right Arm\n",
	4, # 4 Right Hand\n",
	5, # Left Shoulder\n",
	6, # 6 Left Arm\n",
	7, # Left Hand\n",
	9, # 8 Hips\n",
	10, # Right Upper Leg\n",
	11, # 10Right Leg\n",
	12, # Right Foot\n",
	13, # 12Left Upper Leg\n",
	14, # Left Leg\n",
	15, # 14Left Foot\n",
	-1, # \n",
	-1, # 16\n",
	-1, # \n",
	-1, # 18\n",
	16, # Left Toe\n",
	-1, # 20\n",
	-1, # \n",
	17, # 22Right Toe\n",
	-1, # \n",
	-1, # 24\n",
	)


	def transform(img):
	img = img.astype("float32") / 255

	img = (img - MEAN) / STD

	return np.transpose(img, axes=(2, 0, 1))


	def get_affine_transform(
	center,
	scale,
	rot,
	output_size,
	shift=np.array([0, 0], dtype=np.float32),
	inv=0,
	pixel_std=200,
	):
	if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
	scale = np.array([scale, scale])

	scale_tmp = scale * pixel_std
	src_w = scale_tmp[0]
	dst_w = output_size[0]
	dst_h = output_size[1]

	rot_rad = np.pi * rot / 180
	src_dir = get_dir([0, src_w * -0.5], rot_rad)
	dst_dir = np.array([0, dst_w * -0.5], np.float32)
	src = np.zeros((3, 2), dtype=np.float32)
	dst = np.zeros((3, 2), dtype=np.float32)
	src[0, :] = center + scale_tmp * shift
	src[1, :] = center + src_dir + scale_tmp * shift
	dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
	dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir

	src[2:, :] = get_3rd_point(src[0, :], src[1, :])
	dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])

	if inv:
	trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
	else:
	trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))

	return trans


	def get_3rd_point(a, b):
	direct = a - b
	return b + np.array([-direct[1], direct[0]], dtype=np.float32)


	def get_dir(src_point, rot_rad):
	sn, cs = np.sin(rot_rad), np.cos(rot_rad)

	src_result = [0, 0]
	src_result[0] = src_point[0] * cs - src_point[1] * sn
	src_result[1] = src_point[0] * sn + src_point[1] * cs

	return src_result


	def process_image(path, input_img_size, pixel_std=200):
	data_numpy = cv2.imread(path, cv2.IMREAD_COLOR \| cv2.IMREAD_IGNORE_ORIENTATION)
	# BUG HERE. Must be uncommented
	# data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

	h, w = data_numpy.shape[:2]
	c = np.array([w / 2, h / 2], dtype=np.float32)

	aspect_ratio = input_img_size[0] / input_img_size[1]
	if w > aspect_ratio * h:
	h = w * 1.0 / aspect_ratio
	elif w < aspect_ratio * h:
	w = h * aspect_ratio

	s = np.array([w / pixel_std, h / pixel_std], dtype=np.float32) * 1.25
	r = 0
	trans = get_affine_transform(c, s, r, input_img_size, pixel_std=pixel_std)
	input = cv2.warpAffine(data_numpy, trans, input_img_size, flags=cv2.INTER_LINEAR)

	input = transform(input)

	return input, data_numpy, c, s


	def get_final_preds(batch_heatmaps, center, scale, post_process=False):
	coords, maxvals = get_max_preds(batch_heatmaps)

	heatmap_height = batch_heatmaps.shape[2]
	heatmap_width = batch_heatmaps.shape[3]

	# post-processing
	if post_process:
	for n in range(coords.shape[0]):
	for p in range(coords.shape[1]):
	hm = batch_heatmaps[n][p]
	px = int(math.floor(coords[n][p][0] + 0.5))
	py = int(math.floor(coords[n][p][1] + 0.5))
	if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
	diff = np.array(
	[
	hm[py][px + 1] - hm[py][px - 1],
	hm[py + 1][px] - hm[py - 1][px],
	]
	)
	coords[n][p] += np.sign(diff) * 0.25

	preds = coords.copy()

	# Transform back
	for i in range(coords.shape[0]):
	preds[i] = transform_preds(
	coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
	)

	return preds, maxvals


	def transform_preds(coords, center, scale, output_size):
	target_coords = np.zeros(coords.shape)
	trans = get_affine_transform(center, scale, 0, output_size, inv=1)
	for p in range(coords.shape[0]):
	target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
	return target_coords


	def affine_transform(pt, t):
	new_pt = np.array([pt[0], pt[1], 1.0]).T
	new_pt = np.dot(t, new_pt)
	return new_pt[:2]


	def get_max_preds(batch_heatmaps):
	"""
	get predictions from score maps
	heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
	"""
	assert isinstance(
	batch_heatmaps, np.ndarray
	), "batch_heatmaps should be numpy.ndarray"
	assert batch_heatmaps.ndim == 4, "batch_images should be 4-ndim"

	batch_size = batch_heatmaps.shape[0]
	num_joints = batch_heatmaps.shape[1]
	width = batch_heatmaps.shape[3]
	heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
	idx = np.argmax(heatmaps_reshaped, 2)
	maxvals = np.amax(heatmaps_reshaped, 2)

	maxvals = maxvals.reshape((batch_size, num_joints, 1))
	idx = idx.reshape((batch_size, num_joints, 1))

	preds = np.tile(idx, (1, 1, 2)).astype(np.float32)

	preds[:, :, 0] = (preds[:, :, 0]) % width
	preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)

	pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
	pred_mask = pred_mask.astype(np.float32)

	preds *= pred_mask
	return preds, maxvals


	def infer_single_image(model, img_path, input_img_size=(288, 384), return_kps=True):
	img_path = str(img_path)
	pose_input, img, center, scale = process_image(
	img_path, input_img_size=input_img_size
	)
	model.setInput(pose_input[None])
	predicted_heatmap = model.forward()

	if not return_kps:
	return predicted_heatmap.squeeze(0)

	predicted_keypoints, confidence = get_final_preds(
	predicted_heatmap, center[None], scale[None], post_process=True
	)

	(predicted_keypoints, confidence, predicted_heatmap,) = (
	predicted_keypoints.squeeze(0),
	confidence.squeeze(0),
	predicted_heatmap.squeeze(0),
	)

	return img, predicted_keypoints, confidence, predicted_heatmap