Spaces:
Running
on
Zero
Running
on
Zero
from math import isqrt | |
from typing import Literal | |
import torch | |
from diff_gaussian_rasterization import ( | |
GaussianRasterizationSettings, | |
GaussianRasterizer, | |
) | |
from einops import einsum, rearrange, repeat | |
from torch import Tensor | |
from .projection import get_fov, homogenize_points | |
def get_projection_matrix( | |
near, | |
far, | |
fov_x, | |
fov_y, | |
): | |
"""Maps points in the viewing frustum to (-1, 1) on the X/Y axes and (0, 1) on the Z | |
axis. Differs from the OpenGL version in that Z doesn't have range (-1, 1) after | |
transformation and that Z is flipped. | |
""" | |
tan_fov_x = (0.5 * fov_x).tan() | |
tan_fov_y = (0.5 * fov_y).tan() | |
top = tan_fov_y * near | |
bottom = -top | |
right = tan_fov_x * near | |
left = -right | |
(b,) = near.shape | |
result = torch.zeros((b, 4, 4), dtype=torch.float32, device=near.device) | |
result[:, 0, 0] = 2 * near / (right - left) | |
result[:, 1, 1] = 2 * near / (top - bottom) | |
result[:, 0, 2] = (right + left) / (right - left) | |
result[:, 1, 2] = (top + bottom) / (top - bottom) | |
result[:, 3, 2] = 1 | |
result[:, 2, 2] = far / (far - near) | |
result[:, 2, 3] = -(far * near) / (far - near) | |
return result | |
def render_cuda( | |
extrinsics, | |
intrinsics, | |
near, | |
far, | |
image_shape: tuple[int, int], | |
background_color, | |
gaussian_means, | |
gaussian_covariances, | |
gaussian_sh_coefficients, | |
gaussian_opacities, | |
scale_invariant: bool = True, | |
use_sh: bool = True, | |
): | |
assert use_sh or gaussian_sh_coefficients.shape[-1] == 1 | |
# Make sure everything is in a range where numerical issues don't appear. | |
if scale_invariant: | |
scale = 1 / near | |
extrinsics = extrinsics.clone() | |
extrinsics[..., :3, 3] = extrinsics[..., :3, 3] * scale[:, None] | |
gaussian_covariances = gaussian_covariances * (scale[:, None, None, None] ** 2) | |
gaussian_means = gaussian_means * scale[:, None, None] | |
near = near * scale | |
far = far * scale | |
_, _, _, n = gaussian_sh_coefficients.shape | |
degree = isqrt(n) - 1 | |
shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous() | |
b, _, _ = extrinsics.shape | |
h, w = image_shape | |
fov_x, fov_y = get_fov(intrinsics).unbind(dim=-1) | |
tan_fov_x = (0.5 * fov_x).tan() | |
tan_fov_y = (0.5 * fov_y).tan() | |
projection_matrix = get_projection_matrix(near, far, fov_x, fov_y) | |
projection_matrix = rearrange(projection_matrix, "b i j -> b j i") | |
view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i") | |
full_projection = view_matrix @ projection_matrix | |
all_images = [] | |
all_radii = [] | |
for i in range(b): | |
# Set up a tensor for the gradients of the screen-space means. | |
mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True) | |
try: | |
mean_gradients.retain_grad() | |
except Exception: | |
pass | |
settings = GaussianRasterizationSettings( | |
image_height=h, | |
image_width=w, | |
tanfovx=tan_fov_x[i].item(), | |
tanfovy=tan_fov_y[i].item(), | |
bg=background_color[i], | |
scale_modifier=1.0, | |
viewmatrix=view_matrix[i], | |
projmatrix=full_projection[i], | |
sh_degree=degree, | |
campos=extrinsics[i, :3, 3], | |
prefiltered=False, # This matches the original usage. | |
debug=False, | |
) | |
rasterizer = GaussianRasterizer(settings) | |
row, col = torch.triu_indices(3, 3) | |
image, radii = rasterizer( | |
means3D=gaussian_means[i], | |
means2D=mean_gradients, | |
shs=shs[i] if use_sh else None, | |
colors_precomp=None if use_sh else shs[i, :, 0, :], | |
opacities=gaussian_opacities[i, ..., None], | |
cov3D_precomp=gaussian_covariances[i, :, row, col], | |
) | |
all_images.append(image) | |
all_radii.append(radii) | |
return torch.stack(all_images) | |
def render_cuda_orthographic( | |
extrinsics, | |
width, | |
height, | |
near, | |
far, | |
image_shape: tuple[int, int], | |
background_color, | |
gaussian_means, | |
gaussian_covariances, | |
gaussian_sh_coefficients, | |
gaussian_opacities, | |
fov_degrees, | |
use_sh: bool = True, | |
dump: dict | None = None, | |
): | |
b, _, _ = extrinsics.shape | |
h, w = image_shape | |
assert use_sh or gaussian_sh_coefficients.shape[-1] == 1 | |
_, _, _, n = gaussian_sh_coefficients.shape | |
degree = isqrt(n) - 1 | |
shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous() | |
# Create fake "orthographic" projection by moving the camera back and picking a | |
# small field of view. | |
fov_x = torch.tensor(fov_degrees, device=extrinsics.device).deg2rad() | |
tan_fov_x = (0.5 * fov_x).tan() | |
distance_to_near = (0.5 * width) / tan_fov_x | |
tan_fov_y = 0.5 * height / distance_to_near | |
fov_y = (2 * tan_fov_y).atan() | |
near = near + distance_to_near | |
far = far + distance_to_near | |
move_back = torch.eye(4, dtype=torch.float32, device=extrinsics.device) | |
move_back[2, 3] = -distance_to_near | |
extrinsics = extrinsics @ move_back | |
# Escape hatch for visualization/figures. | |
if dump is not None: | |
dump["extrinsics"] = extrinsics | |
dump["fov_x"] = fov_x | |
dump["fov_y"] = fov_y | |
dump["near"] = near | |
dump["far"] = far | |
projection_matrix = get_projection_matrix( | |
near, far, repeat(fov_x, "-> b", b=b), fov_y | |
) | |
projection_matrix = rearrange(projection_matrix, "b i j -> b j i") | |
view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i") | |
full_projection = view_matrix @ projection_matrix | |
all_images = [] | |
all_radii = [] | |
for i in range(b): | |
# Set up a tensor for the gradients of the screen-space means. | |
mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True) | |
try: | |
mean_gradients.retain_grad() | |
except Exception: | |
pass | |
settings = GaussianRasterizationSettings( | |
image_height=h, | |
image_width=w, | |
tanfovx=tan_fov_x, | |
tanfovy=tan_fov_y, | |
bg=background_color[i], | |
scale_modifier=1.0, | |
viewmatrix=view_matrix[i], | |
projmatrix=full_projection[i], | |
sh_degree=degree, | |
campos=extrinsics[i, :3, 3], | |
prefiltered=False, # This matches the original usage. | |
debug=False, | |
) | |
rasterizer = GaussianRasterizer(settings) | |
row, col = torch.triu_indices(3, 3) | |
image, radii = rasterizer( | |
means3D=gaussian_means[i], | |
means2D=mean_gradients, | |
shs=shs[i] if use_sh else None, | |
colors_precomp=None if use_sh else shs[i, :, 0, :], | |
opacities=gaussian_opacities[i, ..., None], | |
cov3D_precomp=gaussian_covariances[i, :, row, col], | |
) | |
all_images.append(image) | |
all_radii.append(radii) | |
return torch.stack(all_images) | |
DepthRenderingMode = Literal["depth", "disparity", "relative_disparity", "log"] | |
def render_depth_cuda( | |
extrinsics, | |
intrinsics, | |
near, | |
far, | |
image_shape: tuple[int, int], | |
gaussian_means, | |
gaussian_covariances, | |
gaussian_opacities, | |
scale_invariant: bool = True, | |
mode: DepthRenderingMode = "depth", | |
): | |
# Specify colors according to Gaussian depths. | |
camera_space_gaussians = einsum( | |
extrinsics.inverse(), homogenize_points(gaussian_means), "b i j, b g j -> b g i" | |
) | |
fake_color = camera_space_gaussians[..., 2] | |
if mode == "disparity": | |
fake_color = 1 / fake_color | |
elif mode == "relative_disparity": | |
fake_color = depth_to_relative_disparity( | |
fake_color, near[:, None], far[:, None] | |
) | |
elif mode == "log": | |
fake_color = fake_color.minimum(near[:, None]).maximum(far[:, None]).log() | |
# Render using depth as color. | |
b, _ = fake_color.shape | |
result = render_cuda( | |
extrinsics, | |
intrinsics, | |
near, | |
far, | |
image_shape, | |
torch.zeros((b, 3), dtype=fake_color.dtype, device=fake_color.device), | |
gaussian_means, | |
gaussian_covariances, | |
repeat(fake_color, "b g -> b g c ()", c=3), | |
gaussian_opacities, | |
scale_invariant=scale_invariant, | |
) | |
return result.mean(dim=1) | |