LN3Diff / nsr /volumetric_rendering /ray_sampler.py

NIRVANALAN

release file

87c126b 8 months ago

13.7 kB

	# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
	#
	# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
	# property and proprietary rights in and to this material, related
	# documentation and any modifications thereto. Any use, reproduction,
	# disclosure or distribution of this material and related documentation
	# without an express license agreement from NVIDIA CORPORATION or
	# its affiliates is strictly prohibited.
	"""
	The ray sampler is a module that takes in camera matrices and resolution and batches of rays.
	Expects cam2world matrices that use the OpenCV camera coordinate system conventions.
	"""

	import torch
	from pdb import set_trace as st
	import random

	HUGE_NUMBER = 1e10
	TINY_NUMBER = 1e-6 # float32 only has 7 decimal digits precision


	######################################################################################
	# wrapper to simplify the use of nerfnet
	######################################################################################
	# https://github.com/Kai-46/nerfplusplus/blob/ebf2f3e75fd6c5dfc8c9d0b533800daaf17bd95f/ddp_model.py#L16
	def depth2pts_outside(ray_o, ray_d, depth):
	'''
	ray_o, ray_d: [..., 3]
	depth: [...]; inverse of distance to sphere origin
	'''
	# note: d1 becomes negative if this mid point is behind camera
	d1 = -torch.sum(ray_d * ray_o, dim=-1) / torch.sum(ray_d * ray_d, dim=-1)
	p_mid = ray_o + d1.unsqueeze(-1) * ray_d
	p_mid_norm = torch.norm(p_mid, dim=-1)
	ray_d_cos = 1. / torch.norm(ray_d, dim=-1)
	d2 = torch.sqrt(1. - p_mid_norm * p_mid_norm) * ray_d_cos
	p_sphere = ray_o + (d1 + d2).unsqueeze(-1) * ray_d

	rot_axis = torch.cross(ray_o, p_sphere, dim=-1)
	rot_axis = rot_axis / torch.norm(rot_axis, dim=-1, keepdim=True)
	phi = torch.asin(p_mid_norm)
	theta = torch.asin(p_mid_norm * depth) # depth is inside [0, 1]
	rot_angle = (phi - theta).unsqueeze(-1) # [..., 1]

	# now rotate p_sphere
	# Rodrigues formula: https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula
	p_sphere_new = p_sphere * torch.cos(rot_angle) + \
	torch.cross(rot_axis, p_sphere, dim=-1) * torch.sin(rot_angle) + \
	rot_axis * torch.sum(rot_axisp_sphere, dim=-1, keepdim=True) (1.-torch.cos(rot_angle))
	p_sphere_new = p_sphere_new / torch.norm(
	p_sphere_new, dim=-1, keepdim=True)
	pts = torch.cat((p_sphere_new, depth.unsqueeze(-1)), dim=-1)

	# now calculate conventional depth
	depth_real = 1. / (depth + TINY_NUMBER) * torch.cos(theta) * ray_d_cos + d1
	return pts, depth_real


	class RaySampler(torch.nn.Module):

	def __init__(self):
	super().__init__()
	self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None

	def create_patch_uv(self,
	patch_resolution,
	resolution,
	cam2world_matrix,
	fg_bbox=None):

	def sample_patch_uv(fg_bbox=None):
	assert patch_resolution <= resolution

	def sample_patch_range():
	patch_reolution_start = random.randint(
	0, resolution -
	patch_resolution) # alias for randrange(start, stop+1)
	# patch_reolution_end = patch_reolution_start + patch_resolution
	return patch_reolution_start # , patch_reolution_end

	def sample_patch_range_oversample_boundary(range_start=None,
	range_end=None):
	# left down corner undersampled
	if range_start is None:
	# range_start = patch_resolution // 2
	range_start = patch_resolution
	if range_end is None:
	# range_end = resolution + patch_resolution // 2
	range_end = resolution + patch_resolution

	# oversample the boundary
	patch_reolution_end = random.randint(
	range_start,
	range_end,
	)

	# clip range
	if patch_reolution_end <= patch_resolution:
	patch_reolution_end = patch_resolution
	elif patch_reolution_end > resolution:
	patch_reolution_end = resolution

	# patch_reolution_end = patch_reolution_start + patch_resolution
	return patch_reolution_end # , patch_reolution_end

	# h_start = sample_patch_range()
	# assert fg_bbox is not None
	if fg_bbox is not None and random.random(
	) > 0.125: # only train foreground. Has 0.1 prob to sample/train background.
	# if fg_bbox is not None: # only train foreground. Has 0.1 prob to sample/train background.
	# only return one UV here
	top_min, left_min = fg_bbox[:, :2].min(dim=0,
	keepdim=True)[0][0]
	height_max, width_max = fg_bbox[:, 2:].max(dim=0,
	keepdim=True)[0][0]

	if top_min + patch_resolution < height_max:
	h_end = sample_patch_range_oversample_boundary(
	top_min + patch_resolution, height_max)
	else:
	h_end = max(
	height_max.to(torch.uint8).item(), patch_resolution)
	if left_min + patch_resolution < width_max:
	w_end = sample_patch_range_oversample_boundary(
	left_min + patch_resolution, width_max)
	else:
	w_end = max(
	width_max.to(torch.uint8).item(), patch_resolution)

	h_start = h_end - patch_resolution
	w_start = w_end - patch_resolution

	try:
	assert h_start >= 0 and w_start >= 0
	except:
	st()

	else:
	h_end = sample_patch_range_oversample_boundary()
	h_start = h_end - patch_resolution
	w_end = sample_patch_range_oversample_boundary()
	w_start = w_end - patch_resolution

	assert h_start >= 0 and w_start >= 0

	uv = torch.stack(
	torch.meshgrid(
	torch.arange(
	start=h_start,
	# end=h_start+patch_resolution,
	end=h_end,
	dtype=torch.float32,
	device=cam2world_matrix.device),
	torch.arange(
	start=w_start,
	# end=w_start + patch_resolution,
	end=w_end,
	dtype=torch.float32,
	device=cam2world_matrix.device),
	indexing='ij')) * (1. / resolution) + (0.5 / resolution)

	uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # ij -> xy

	return uv, (h_start, w_start, patch_resolution, patch_resolution
	) # top: int, left: int, height: int, width: int

	all_uv = []
	ray_bboxes = []
	for _ in range(cam2world_matrix.shape[0]):
	uv, bbox = sample_patch_uv(fg_bbox)
	all_uv.append(uv)
	ray_bboxes.append(bbox)

	all_uv = torch.stack(all_uv, 0) # B patch_res**2 2
	# ray_bboxes = torch.stack(ray_bboxes, 0) # B patch_res**2 2

	return all_uv, ray_bboxes

	def create_uv(self, resolution, cam2world_matrix):

	uv = torch.stack(
	torch.meshgrid(torch.arange(resolution,
	dtype=torch.float32,
	device=cam2world_matrix.device),
	torch.arange(resolution,
	dtype=torch.float32,
	device=cam2world_matrix.device),
	indexing='ij')) * (1. / resolution) + (0.5 /
	resolution)

	uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why
	uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)

	return uv

	def forward(self, cam2world_matrix, intrinsics, resolution, fg_mask=None):
	"""
	Create batches of rays and return origins and directions.

	cam2world_matrix: (N, 4, 4)
	intrinsics: (N, 3, 3)
	resolution: int

	ray_origins: (N, M, 3)
	ray_dirs: (N, M, 2)
	"""
	N, M = cam2world_matrix.shape[0], resolution**2
	cam_locs_world = cam2world_matrix[:, :3, 3]
	fx = intrinsics[:, 0, 0]
	fy = intrinsics[:, 1, 1]
	cx = intrinsics[:, 0, 2]
	cy = intrinsics[:, 1, 2]
	sk = intrinsics[:, 0, 1]

	# uv = torch.stack(
	# torch.meshgrid(torch.arange(resolution,
	# dtype=torch.float32,
	# device=cam2world_matrix.device),
	# torch.arange(resolution,
	# dtype=torch.float32,
	# device=cam2world_matrix.device),
	# indexing='ij')) * (1. / resolution) + (0.5 /
	# resolution)
	# uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why
	# uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
	uv = self.create_uv(
	resolution,
	cam2world_matrix,
	)

	x_cam = uv[:, :, 0].view(N, -1)
	y_cam = uv[:, :, 1].view(N, -1) # [0,1] range
	z_cam = torch.ones((N, M), device=cam2world_matrix.device)

	# basically torch.inverse(intrinsics)
	x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) *
	sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) *
	y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
	y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam

	cam_rel_points = torch.stack(
	(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)

	# st()

	world_rel_points = torch.bmm(cam2world_matrix,
	cam_rel_points.permute(0, 2, 1)).permute(
	0, 2, 1)[:, :, :3]

	ray_dirs = world_rel_points - cam_locs_world[:, None, :]
	ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)

	ray_origins = cam_locs_world.unsqueeze(1).repeat(
	1, ray_dirs.shape[1], 1)

	return ray_origins, ray_dirs, None


	class PatchRaySampler(RaySampler):

	def forward(self,
	cam2world_matrix,
	intrinsics,
	patch_resolution,
	resolution,
	fg_bbox=None):
	"""
	Create batches of rays and return origins and directions.

	cam2world_matrix: (N, 4, 4)
	intrinsics: (N, 3, 3)
	resolution: int

	ray_origins: (N, M, 3)
	ray_dirs: (N, M, 2)
	"""
	N, M = cam2world_matrix.shape[0], patch_resolution**2
	cam_locs_world = cam2world_matrix[:, :3, 3]
	fx = intrinsics[:, 0, 0]
	fy = intrinsics[:, 1, 1]
	cx = intrinsics[:, 0, 2]
	cy = intrinsics[:, 1, 2]
	sk = intrinsics[:, 0, 1]

	# uv = self.create_uv(resolution, cam2world_matrix)

	# all_uv, ray_bboxes = self.create_patch_uv(
	all_uv_list = []
	ray_bboxes = []
	for idx in range(N):
	uv, bboxes = self.create_patch_uv(
	patch_resolution, resolution, cam2world_matrix[idx:idx + 1],
	fg_bbox[idx:idx + 1]
	if fg_bbox is not None else None) # for debugging, hard coded
	all_uv_list.append(
	uv
	# cam2world_matrix[idx:idx+1], )[0] # for debugging, hard coded
	)
	ray_bboxes.extend(bboxes)
	all_uv = torch.cat(all_uv_list, 0)
	# ray_bboxes = torch.cat(ray_bboxes_list, 0)
	# all_uv, _ = self.create_patch_uv(
	# patch_resolution, resolution,
	# cam2world_matrix, fg_bbox) # for debugging, hard coded
	# st()

	x_cam = all_uv[:, :, 0].view(N, -1)
	y_cam = all_uv[:, :, 1].view(N, -1) # [0,1] range
	z_cam = torch.ones((N, M), device=cam2world_matrix.device)

	# basically torch.inverse(intrinsics)
	x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) *
	sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) *
	y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
	y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam

	cam_rel_points = torch.stack(
	(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)

	world_rel_points = torch.bmm(cam2world_matrix,
	cam_rel_points.permute(0, 2, 1)).permute(
	0, 2, 1)[:, :, :3]

	ray_dirs = world_rel_points - cam_locs_world[:, None, :]
	ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)

	ray_origins = cam_locs_world.unsqueeze(1).repeat(
	1, ray_dirs.shape[1], 1)

	return ray_origins, ray_dirs, ray_bboxes